diff options
| author | Laurenz <laurmaedje@gmail.com> | 2022-01-30 12:50:58 +0100 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2022-01-30 22:46:59 +0100 |
| commit | 8d1ce390e21ce0a5812a4211c893ec359906d6f1 (patch) | |
| tree | 5dbe1ad96af8ce8f9f01887340fe06025462e959 /src/parse | |
| parent | d7072f378fef733ae994fd9a1e767df4e4dd878e (diff) | |
Rework strong and emph
- Star and underscore not parsed as strong/emph inside of words
- Stars/underscores must be balanced and they cannot go over paragraph break
- New `strong` and `emph` classes
Diffstat (limited to 'src/parse')
| -rw-r--r-- | src/parse/incremental.rs | 8 | ||||
| -rw-r--r-- | src/parse/mod.rs | 39 | ||||
| -rw-r--r-- | src/parse/parser.rs | 84 | ||||
| -rw-r--r-- | src/parse/tokens.rs | 17 |
4 files changed, 100 insertions, 48 deletions
diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 9dd5bec1..fb927c24 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -435,10 +435,12 @@ impl NodeKind { | Self::LeftParen | Self::RightParen => SuccessionRule::Unsafe, + // These work similar to parentheses. + Self::Star | Self::Underscore => SuccessionRule::Unsafe, + // Replacing an operator can change whether the parent is an - // operation which makes it unsafe. The star can appear in markup. - Self::Star - | Self::Comma + // operation which makes it unsafe. + Self::Comma | Self::Semicolon | Self::Colon | Self::Plus diff --git a/src/parse/mod.rs b/src/parse/mod.rs index a9839ed6..b8ef3066 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -21,7 +21,7 @@ use crate::util::EcoString; /// Parse a source file. pub fn parse(src: &str) -> Rc<GreenNode> { let mut p = Parser::new(src, TokenMode::Markup); - markup(&mut p); + markup(&mut p, true); match p.finish().into_iter().next() { Some(Green::Node(node)) => node, _ => unreachable!(), @@ -61,7 +61,7 @@ pub fn parse_markup( ) -> Option<(Vec<Green>, bool)> { let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); if min_column == 0 { - markup(&mut p); + markup(&mut p, true); } else { markup_indented(&mut p, min_column); } @@ -128,8 +128,8 @@ pub fn parse_comment( } /// Parse markup. -fn markup(p: &mut Parser) { - markup_while(p, true, 0, &mut |_| true) +fn markup(p: &mut Parser, at_start: bool) { + markup_while(p, at_start, 0, &mut |_| true) } /// Parse markup that stays right of the given column. @@ -191,8 +191,6 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { | NodeKind::EnDash | NodeKind::EmDash | NodeKind::NonBreakingSpace - | NodeKind::Emph - | NodeKind::Strong | NodeKind::Linebreak | NodeKind::Raw(_) | NodeKind::Math(_) @@ -200,6 +198,9 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { p.eat(); } + // Grouping markup. + NodeKind::Star => strong(p), + NodeKind::Underscore => emph(p), NodeKind::Eq => heading(p, *at_start), NodeKind::Minus => list_node(p, *at_start), NodeKind::EnumNumbering(_) => enum_node(p, *at_start), @@ -227,6 +228,24 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { *at_start = false; } +/// Parse strong content. +fn strong(p: &mut Parser) { + p.perform(NodeKind::Strong, |p| { + p.start_group(Group::Strong); + markup(p, false); + p.end_group(); + }) +} + +/// Parse emphasized content. +fn emph(p: &mut Parser) { + p.perform(NodeKind::Emph, |p| { + p.start_group(Group::Emph); + markup(p, false); + p.end_group(); + }) +} + /// Parse a heading. fn heading(p: &mut Parser, at_start: bool) { let marker = p.marker(); @@ -234,7 +253,7 @@ fn heading(p: &mut Parser, at_start: bool) { p.eat_assert(&NodeKind::Eq); while p.eat_if(&NodeKind::Eq) {} - if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) { + if at_start && p.peek().map_or(true, |kind| kind.is_space()) { let column = p.column(p.prev_end()); markup_indented(p, column); marker.end(p, NodeKind::Heading); @@ -250,7 +269,7 @@ fn list_node(p: &mut Parser, at_start: bool) { let text: EcoString = p.peek_src().into(); p.eat_assert(&NodeKind::Minus); - if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) { + if at_start && p.peek().map_or(true, |kind| kind.is_space()) { let column = p.column(p.prev_end()); markup_indented(p, column); marker.end(p, NodeKind::List); @@ -265,7 +284,7 @@ fn enum_node(p: &mut Parser, at_start: bool) { let text: EcoString = p.peek_src().into(); p.eat(); - if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) { + if at_start && p.peek().map_or(true, |kind| kind.is_space()) { let column = p.column(p.prev_end()); markup_indented(p, column); marker.end(p, NodeKind::Enum); @@ -620,7 +639,7 @@ fn params(p: &mut Parser, marker: Marker) { fn template(p: &mut Parser) { p.perform(NodeKind::Template, |p| { p.start_group(Group::Bracket); - markup(p); + markup(p, true); p.end_group(); }); } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 0184c198..db003e72 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -239,17 +239,18 @@ impl<'s> Parser<'s> { pub fn start_group(&mut self, kind: Group) { self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() }); self.tokens.set_mode(match kind { - Group::Bracket => TokenMode::Markup, - _ => TokenMode::Code, + Group::Bracket | Group::Strong | Group::Emph => TokenMode::Markup, + Group::Paren | Group::Brace | Group::Expr | Group::Imports => TokenMode::Code, }); - self.repeek(); match kind { Group::Paren => self.eat_assert(&NodeKind::LeftParen), Group::Bracket => self.eat_assert(&NodeKind::LeftBracket), Group::Brace => self.eat_assert(&NodeKind::LeftBrace), - Group::Expr => {} - Group::Imports => {} + Group::Strong => self.eat_assert(&NodeKind::Star), + Group::Emph => self.eat_assert(&NodeKind::Underscore), + Group::Expr => self.repeek(), + Group::Imports => self.repeek(), } } @@ -273,6 +274,8 @@ impl<'s> Parser<'s> { Group::Paren => Some((NodeKind::RightParen, true)), Group::Bracket => Some((NodeKind::RightBracket, true)), Group::Brace => Some((NodeKind::RightBrace, true)), + Group::Strong => Some((NodeKind::Star, true)), + Group::Emph => Some((NodeKind::Underscore, true)), Group::Expr => Some((NodeKind::Semicolon, false)), Group::Imports => None, } { @@ -322,9 +325,11 @@ impl<'s> Parser<'s> { Some(NodeKind::RightParen) => self.inside(Group::Paren), Some(NodeKind::RightBracket) => self.inside(Group::Bracket), Some(NodeKind::RightBrace) => self.inside(Group::Brace), + Some(NodeKind::Star) => self.inside(Group::Strong), + Some(NodeKind::Underscore) => self.inside(Group::Emph), Some(NodeKind::Semicolon) => self.inside(Group::Expr), Some(NodeKind::From) => self.inside(Group::Imports), - Some(NodeKind::Space(n)) => *n >= 1 && self.stop_at_newline(), + Some(NodeKind::Space(n)) => self.space_ends_group(*n), Some(_) => false, None => true, }; @@ -332,31 +337,34 @@ impl<'s> Parser<'s> { /// Returns whether the given type can be skipped over. fn is_trivia(&self, token: &NodeKind) -> bool { - Self::is_trivia_ext(token, self.stop_at_newline()) - } - - /// Returns whether the given type can be skipped over given the current - /// newline mode. - fn is_trivia_ext(token: &NodeKind, stop_at_newline: bool) -> bool { match token { - NodeKind::Space(n) => *n == 0 || !stop_at_newline, + NodeKind::Space(n) => !self.space_ends_group(*n), NodeKind::LineComment => true, NodeKind::BlockComment => true, _ => false, } } - /// Whether the active group must end at a newline. - fn stop_at_newline(&self) -> bool { - matches!( - self.groups.last().map(|group| group.kind), - Some(Group::Expr | Group::Imports) - ) + /// Whether a space with the given number of newlines ends the current group. + fn space_ends_group(&self, n: usize) -> bool { + if n == 0 { + return false; + } + + match self.groups.last().map(|group| group.kind) { + Some(Group::Strong | Group::Emph) => n >= 2, + Some(Group::Expr | Group::Imports) => n >= 1, + _ => false, + } } - /// Whether we are inside the given group. + /// Whether we are inside the given group (can be nested). fn inside(&self, kind: Group) -> bool { - self.groups.iter().any(|g| g.kind == kind) + self.groups + .iter() + .rev() + .take_while(|g| !kind.is_weak() || g.kind.is_weak()) + .any(|g| g.kind == kind) } } @@ -431,15 +439,20 @@ impl Marker { F: Fn(&Green) -> Result<(), &'static str>, { for child in &mut p.children[self.0 ..] { - if (p.tokens.mode() == TokenMode::Markup - || !Parser::is_trivia_ext(child.kind(), false)) - && !child.kind().is_error() - { - if let Err(msg) = f(child) { - let error = NodeKind::Error(ErrorPos::Full, msg.into()); - let inner = mem::take(child); - *child = GreenNode::with_child(error, inner).into(); - } + // Don't expose errors. + if child.kind().is_error() { + continue; + } + + // Don't expose trivia in code. + if p.tokens.mode() == TokenMode::Code && child.kind().is_trivia() { + continue; + } + + if let Err(msg) = f(child) { + let error = NodeKind::Error(ErrorPos::Full, msg.into()); + let inner = mem::take(child); + *child = GreenNode::with_child(error, inner).into(); } } } @@ -485,12 +498,23 @@ pub enum Group { Brace, /// A parenthesized group: `(...)`. Paren, + /// A group surrounded with stars: `*...*`. + Strong, + /// A group surrounded with underscore: `_..._`. + Emph, /// A group ended by a semicolon or a line break: `;`, `\n`. Expr, /// A group for import items, ended by a semicolon, line break or `from`. Imports, } +impl Group { + /// Whether the group can only force other weak groups to end. + fn is_weak(self) -> bool { + matches!(self, Group::Strong | Group::Emph) + } +} + /// Allows parser methods to use the try operator. Never returned top-level /// because the parser recovers from all errors. pub type ParseResult<T = ()> = Result<T, ParseError>; diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index eef7a72d..d741dea1 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -123,8 +123,8 @@ impl<'s> Tokens<'s> { // Markup. '~' => NodeKind::NonBreakingSpace, - '*' => NodeKind::Strong, - '_' => NodeKind::Emph, + '*' if !self.in_word() => NodeKind::Star, + '_' if !self.in_word() => NodeKind::Underscore, '`' => self.raw(), '$' => self.math(), '-' => self.hyph(), @@ -527,6 +527,13 @@ impl<'s> Tokens<'s> { NodeKind::BlockComment } + fn in_word(&self) -> bool { + let alphanumeric = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric()); + let prev = self.s.get(.. self.s.last_index()).chars().next_back(); + let next = self.s.peek(); + alphanumeric(prev) && alphanumeric(next) + } + fn maybe_in_url(&self) -> bool { self.mode == TokenMode::Markup && self.s.eaten().ends_with(":/") } @@ -651,7 +658,7 @@ mod tests { ('/', None, "[", LeftBracket), ('/', None, "//", LineComment), ('/', None, "/**/", BlockComment), - ('/', Some(Markup), "*", Strong), + ('/', Some(Markup), "*", Star), ('/', Some(Markup), "$ $", Math(" ", false)), ('/', Some(Markup), r"\\", Escape('\\')), ('/', Some(Markup), "#let", Let), @@ -790,8 +797,8 @@ mod tests { #[test] fn test_tokenize_markup_symbols() { // Test markup tokens. - t!(Markup[" a1"]: "*" => Strong); - t!(Markup: "_" => Emph); + t!(Markup[" a1"]: "*" => Star); + t!(Markup: "_" => Underscore); t!(Markup[""]: "===" => Eq, Eq, Eq); t!(Markup["a1/"]: "= " => Eq, Space(0)); t!(Markup: "~" => NonBreakingSpace); |
