From ec884ec1d85f6e1d7868db3e82d572579cc5d345 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 5 Oct 2022 12:49:39 +0200 Subject: Refactor syntax module --- src/parse/incremental.rs | 37 ++++++++++++++++++++++++++++++++---- src/parse/mod.rs | 23 ++++++----------------- src/parse/parser.rs | 10 +++++----- src/parse/tokens.rs | 49 ++++++++++++++++++------------------------------ 4 files changed, 62 insertions(+), 57 deletions(-) (limited to 'src/parse') diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 06096a75..e0be9b6d 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -96,11 +96,10 @@ fn try_reparse( && (ahead.is_none() || change.replaced.start > child_span.end) && !ahead.map_or(false, Ahead::is_compulsory) { - ahead = - Some(Ahead::new(pos, at_start, child.kind().is_bounded())); + ahead = Some(Ahead::new(pos, at_start, is_bounded(child.kind()))); } - at_start = child.kind().is_at_start(at_start); + at_start = next_at_start(child.kind(), at_start); } } SearchState::Inside(start) => { @@ -137,7 +136,7 @@ fn try_reparse( if let SearchState::Contained(pos) = search { // Do not allow replacement of elements inside of constructs whose // opening and closing brackets look the same. - let safe_inside = node.kind().is_bounded(); + let safe_inside = is_bounded(node.kind()); let child = &mut node.children_mut()[pos.idx]; let prev_len = child.len(); let prev_descendants = child.descendants(); @@ -384,6 +383,36 @@ enum ReparseMode { MarkupElements { at_start: bool, min_indent: usize }, } +/// Whether changes _inside_ this node are safely encapsulated, so that only +/// this node must be reparsed. +fn is_bounded(kind: &NodeKind) -> bool { + match kind { + NodeKind::CodeBlock + | NodeKind::ContentBlock + | NodeKind::Backslash + | NodeKind::Tilde + | NodeKind::HyphQuest + | NodeKind::Hyph2 + | NodeKind::Hyph3 + | NodeKind::Dot3 + | NodeKind::Quote { .. } + | NodeKind::BlockComment + | NodeKind::Space { .. } + | NodeKind::Escape(_) => true, + _ => false, + } +} + +/// Whether `at_start` would still be true after this node given the +/// previous value of the property. +fn next_at_start(kind: &NodeKind, prev: bool) -> bool { + match kind { + NodeKind::Space { newlines: (1 ..) } => true, + NodeKind::Space { .. } | NodeKind::LineComment | NodeKind::BlockComment => prev, + _ => false, + } +} + #[cfg(test)] #[rustfmt::skip] mod tests { diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 7eb7343b..832c297e 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -22,17 +22,6 @@ pub fn parse(text: &str) -> SyntaxNode { p.finish().into_iter().next().unwrap() } -/// Parse math directly, only used for syntax highlighting. -pub fn parse_math(text: &str) -> SyntaxNode { - let mut p = Parser::new(text, TokenMode::Math); - p.perform(NodeKind::Math, |p| { - while !p.eof() { - math_node(p); - } - }); - p.finish().into_iter().next().unwrap() -} - /// Parse code directly, only used for syntax highlighting. pub fn parse_code(text: &str) -> SyntaxNode { let mut p = Parser::new(text, TokenMode::Code); @@ -250,7 +239,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { // Text and markup. NodeKind::Text(_) - | NodeKind::Linebreak { .. } + | NodeKind::Backslash | NodeKind::Tilde | NodeKind::HyphQuest | NodeKind::Hyph2 @@ -353,7 +342,7 @@ fn list_node(p: &mut Parser, at_start: bool) { let min_indent = p.column(p.prev_end()); if at_start && p.eat_if(NodeKind::Space { newlines: 0 }) && !p.eof() { markup_indented(p, min_indent); - marker.end(p, NodeKind::List); + marker.end(p, NodeKind::ListItem); } else { marker.convert(p, NodeKind::Text(text)); } @@ -368,7 +357,7 @@ fn enum_node(p: &mut Parser, at_start: bool) { let min_indent = p.column(p.prev_end()); if at_start && p.eat_if(NodeKind::Space { newlines: 0 }) && !p.eof() { markup_indented(p, min_indent); - marker.end(p, NodeKind::Enum); + marker.end(p, NodeKind::EnumItem); } else { marker.convert(p, NodeKind::Text(text)); } @@ -385,7 +374,7 @@ fn desc_node(p: &mut Parser, at_start: bool) -> ParseResult { markup_line(p, |node| matches!(node, NodeKind::Colon)); p.expect(NodeKind::Colon)?; markup_indented(p, min_indent); - marker.end(p, NodeKind::Desc); + marker.end(p, NodeKind::DescItem); } else { marker.convert(p, NodeKind::Text(text)); } @@ -485,7 +474,7 @@ fn math_primary(p: &mut Parser) { match token { // Spaces, atoms and expressions. NodeKind::Space { .. } - | NodeKind::Linebreak + | NodeKind::Backslash | NodeKind::Escape(_) | NodeKind::Atom(_) | NodeKind::Ident(_) => p.eat(), @@ -820,7 +809,7 @@ fn item(p: &mut Parser, keyed: bool) -> ParseResult { } if let Some(kind) = kind { msg.push_str(", found "); - msg.push_str(kind.as_str()); + msg.push_str(kind.name()); } let error = NodeKind::Error(SpanPos::Full, msg); marker.end(p, error); diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 12dd324b..4b73c2b9 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -159,7 +159,7 @@ impl<'s> Parser<'s> { self.eat(); Ok(()) } else { - self.expected(kind.as_str()); + self.expected(kind.name()); Err(ParseError) } } @@ -293,7 +293,7 @@ impl<'s> Parser<'s> { self.stray_terminator = s; rescan = false; } else if required { - self.expected(end.as_str()); + self.expected(end.name()); self.unterminated_group = true; } } @@ -397,7 +397,7 @@ impl Parser<'_> { /// Eat the current token and add an error that it is unexpected. pub fn unexpected(&mut self) { if let Some(found) = self.peek() { - let msg = format_eco!("unexpected {}", found); + let msg = format_eco!("unexpected {}", found.name()); let error = NodeKind::Error(SpanPos::Full, msg); self.perform(error, Self::eat); } @@ -421,7 +421,7 @@ impl Parser<'_> { pub fn expected_found(&mut self, thing: &str) { match self.peek() { Some(found) => { - let msg = format_eco!("expected {}, found {}", thing, found); + let msg = format_eco!("expected {}, found {}", thing, found.name()); let error = NodeKind::Error(SpanPos::Full, msg); self.perform(error, Self::eat); } @@ -492,7 +492,7 @@ impl Marker { let mut msg = EcoString::from(msg); if msg.starts_with("expected") { msg.push_str(", found "); - msg.push_str(child.kind().as_str()); + msg.push_str(child.kind().name()); } let error = NodeKind::Error(SpanPos::Full, msg); let inner = mem::take(child); diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index d495afa0..d3c497f3 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -108,7 +108,9 @@ impl<'s> Iterator for Tokens<'s> { // Trivia. '/' if self.s.eat_if('/') => self.line_comment(), '/' if self.s.eat_if('*') => self.block_comment(), - '*' if self.s.eat_if('/') => NodeKind::Unknown("*/".into()), + '*' if self.s.eat_if('/') => { + NodeKind::Error(SpanPos::Full, "unexpected end of block comment".into()) + } c if c.is_whitespace() => self.whitespace(c), // Other things. @@ -288,8 +290,8 @@ impl<'s> Tokens<'s> { } // Linebreaks. - Some(c) if c.is_whitespace() => NodeKind::Linebreak, - None => NodeKind::Linebreak, + Some(c) if c.is_whitespace() => NodeKind::Backslash, + None => NodeKind::Backslash, // Escapes. Some(c) => { @@ -517,7 +519,7 @@ impl<'s> Tokens<'s> { '"' => self.string(), // Invalid token. - _ => NodeKind::Unknown(self.s.from(start).into()), + _ => NodeKind::Error(SpanPos::Full, "not valid here".into()), } } @@ -556,7 +558,6 @@ impl<'s> Tokens<'s> { let number = self.s.get(start .. suffix_start); let suffix = self.s.from(suffix_start); - let all = self.s.from(start); // Find out whether it is a simple number. if suffix.is_empty() { @@ -577,10 +578,10 @@ impl<'s> Tokens<'s> { "em" => NodeKind::Numeric(f, Unit::Em), "fr" => NodeKind::Numeric(f, Unit::Fr), "%" => NodeKind::Numeric(f, Unit::Percent), - _ => NodeKind::Unknown(all.into()), + _ => NodeKind::Error(SpanPos::Full, "invalid number suffix".into()), } } else { - NodeKind::Unknown(all.into()) + NodeKind::Error(SpanPos::Full, "invalid number".into()) } } @@ -745,10 +746,6 @@ mod tests { NodeKind::Error(pos, message.into()) } - fn Invalid(invalid: &str) -> NodeKind { - NodeKind::Unknown(invalid.into()) - } - /// Building blocks for suffix testing. /// /// We extend each test case with a collection of different suffixes to make @@ -926,7 +923,7 @@ mod tests { t!(Markup: "_" => Underscore); t!(Markup[""]: "===" => Eq, Eq, Eq); t!(Markup["a1/"]: "= " => Eq, Space(0)); - t!(Markup[" "]: r"\" => Linebreak); + t!(Markup[" "]: r"\" => Backslash); t!(Markup: "~" => Tilde); t!(Markup["a1/"]: "-?" => HyphQuest); t!(Markup["a "]: r"a--" => Text("a"), Hyph2); @@ -972,6 +969,9 @@ mod tests { t!(Code[" /"]: "--1" => Minus, Minus, Int(1)); t!(Code[" /"]: "--_a" => Minus, Minus, Ident("_a")); t!(Code[" /"]: "a-b" => Ident("a-b")); + + // Test invalid. + t!(Code: r"\" => Error(Full, "not valid here")); } #[test] @@ -1107,6 +1107,9 @@ mod tests { t!(Code[" /"]: "1..2" => Int(1), Dots, Int(2)); t!(Code[" /"]: "1..2.3" => Int(1), Dots, Float(2.3)); t!(Code[" /"]: "1.2..3" => Float(1.2), Dots, Int(3)); + + // Test invalid. + t!(Code[" /"]: "1foo" => Error(Full, "invalid number suffix")); } #[test] @@ -1161,25 +1164,9 @@ mod tests { t!(Both[""]: "/*/*" => BlockComment); t!(Both[""]: "/**/" => BlockComment); t!(Both[""]: "/***" => BlockComment); - } - #[test] - fn test_tokenize_invalid() { - // Test invalidly closed block comments. - t!(Both: "*/" => Invalid("*/")); - t!(Both: "/**/*/" => BlockComment, Invalid("*/")); - - // Test invalid expressions. - t!(Code: r"\" => Invalid(r"\")); - t!(Code: "πŸŒ“" => Invalid("πŸŒ“")); - t!(Code: r"\:" => Invalid(r"\"), Colon); - t!(Code: "meal⌚" => Ident("meal"), Invalid("⌚")); - t!(Code[" /"]: r"\a" => Invalid(r"\"), Ident("a")); - t!(Code[" /"]: "#" => Invalid("#")); - - // Test invalid number suffixes. - t!(Code[" /"]: "1foo" => Invalid("1foo")); - t!(Code: "1p%" => Invalid("1p"), Invalid("%")); - t!(Code: "1%%" => Numeric(1.0, Unit::Percent), Invalid("%")); + // Test unexpected terminator. + t!(Both: "/*Hi*/*/" => BlockComment, + Error(Full, "unexpected end of block comment")); } } -- cgit v1.2.3