From ec884ec1d85f6e1d7868db3e82d572579cc5d345 Mon Sep 17 00:00:00 2001
From: Laurenz <laurmaedje@gmail.com>
Date: Wed, 5 Oct 2022 12:49:39 +0200
Subject: Refactor syntax module

---
 src/parse/incremental.rs | 37 ++++++++++++++++++++++++++++++++----
 src/parse/mod.rs         | 23 ++++++-----------------
 src/parse/parser.rs      | 10 +++++-----
 src/parse/tokens.rs      | 49 ++++++++++++++++++------------------------------
 4 files changed, 62 insertions(+), 57 deletions(-)

(limited to 'src/parse')

diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs
index 06096a75..e0be9b6d 100644
--- a/src/parse/incremental.rs
+++ b/src/parse/incremental.rs
@@ -96,11 +96,10 @@ fn try_reparse(
                         && (ahead.is_none() || change.replaced.start > child_span.end)
                         && !ahead.map_or(false, Ahead::is_compulsory)
                     {
-                        ahead =
-                            Some(Ahead::new(pos, at_start, child.kind().is_bounded()));
+                        ahead = Some(Ahead::new(pos, at_start, is_bounded(child.kind())));
                     }
 
-                    at_start = child.kind().is_at_start(at_start);
+                    at_start = next_at_start(child.kind(), at_start);
                 }
             }
             SearchState::Inside(start) => {
@@ -137,7 +136,7 @@ fn try_reparse(
     if let SearchState::Contained(pos) = search {
         // Do not allow replacement of elements inside of constructs whose
         // opening and closing brackets look the same.
-        let safe_inside = node.kind().is_bounded();
+        let safe_inside = is_bounded(node.kind());
         let child = &mut node.children_mut()[pos.idx];
         let prev_len = child.len();
         let prev_descendants = child.descendants();
@@ -384,6 +383,36 @@ enum ReparseMode {
     MarkupElements { at_start: bool, min_indent: usize },
 }
 
+/// Whether changes _inside_ this node are safely encapsulated, so that only
+/// this node must be reparsed.
+fn is_bounded(kind: &NodeKind) -> bool {
+    match kind {
+        NodeKind::CodeBlock
+        | NodeKind::ContentBlock
+        | NodeKind::Backslash
+        | NodeKind::Tilde
+        | NodeKind::HyphQuest
+        | NodeKind::Hyph2
+        | NodeKind::Hyph3
+        | NodeKind::Dot3
+        | NodeKind::Quote { .. }
+        | NodeKind::BlockComment
+        | NodeKind::Space { .. }
+        | NodeKind::Escape(_) => true,
+        _ => false,
+    }
+}
+
+/// Whether `at_start` would still be true after this node given the
+/// previous value of the property.
+fn next_at_start(kind: &NodeKind, prev: bool) -> bool {
+    match kind {
+        NodeKind::Space { newlines: (1 ..) } => true,
+        NodeKind::Space { .. } | NodeKind::LineComment | NodeKind::BlockComment => prev,
+        _ => false,
+    }
+}
+
 #[cfg(test)]
 #[rustfmt::skip]
 mod tests {
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 7eb7343b..832c297e 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -22,17 +22,6 @@ pub fn parse(text: &str) -> SyntaxNode {
     p.finish().into_iter().next().unwrap()
 }
 
-/// Parse math directly, only used for syntax highlighting.
-pub fn parse_math(text: &str) -> SyntaxNode {
-    let mut p = Parser::new(text, TokenMode::Math);
-    p.perform(NodeKind::Math, |p| {
-        while !p.eof() {
-            math_node(p);
-        }
-    });
-    p.finish().into_iter().next().unwrap()
-}
-
 /// Parse code directly, only used for syntax highlighting.
 pub fn parse_code(text: &str) -> SyntaxNode {
     let mut p = Parser::new(text, TokenMode::Code);
@@ -250,7 +239,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
 
         // Text and markup.
         NodeKind::Text(_)
-        | NodeKind::Linebreak { .. }
+        | NodeKind::Backslash
         | NodeKind::Tilde
         | NodeKind::HyphQuest
         | NodeKind::Hyph2
@@ -353,7 +342,7 @@ fn list_node(p: &mut Parser, at_start: bool) {
     let min_indent = p.column(p.prev_end());
     if at_start && p.eat_if(NodeKind::Space { newlines: 0 }) && !p.eof() {
         markup_indented(p, min_indent);
-        marker.end(p, NodeKind::List);
+        marker.end(p, NodeKind::ListItem);
     } else {
         marker.convert(p, NodeKind::Text(text));
     }
@@ -368,7 +357,7 @@ fn enum_node(p: &mut Parser, at_start: bool) {
     let min_indent = p.column(p.prev_end());
     if at_start && p.eat_if(NodeKind::Space { newlines: 0 }) && !p.eof() {
         markup_indented(p, min_indent);
-        marker.end(p, NodeKind::Enum);
+        marker.end(p, NodeKind::EnumItem);
     } else {
         marker.convert(p, NodeKind::Text(text));
     }
@@ -385,7 +374,7 @@ fn desc_node(p: &mut Parser, at_start: bool) -> ParseResult {
         markup_line(p, |node| matches!(node, NodeKind::Colon));
         p.expect(NodeKind::Colon)?;
         markup_indented(p, min_indent);
-        marker.end(p, NodeKind::Desc);
+        marker.end(p, NodeKind::DescItem);
     } else {
         marker.convert(p, NodeKind::Text(text));
     }
@@ -485,7 +474,7 @@ fn math_primary(p: &mut Parser) {
     match token {
         // Spaces, atoms and expressions.
         NodeKind::Space { .. }
-        | NodeKind::Linebreak
+        | NodeKind::Backslash
         | NodeKind::Escape(_)
         | NodeKind::Atom(_)
         | NodeKind::Ident(_) => p.eat(),
@@ -820,7 +809,7 @@ fn item(p: &mut Parser, keyed: bool) -> ParseResult<NodeKind> {
                 }
                 if let Some(kind) = kind {
                     msg.push_str(", found ");
-                    msg.push_str(kind.as_str());
+                    msg.push_str(kind.name());
                 }
                 let error = NodeKind::Error(SpanPos::Full, msg);
                 marker.end(p, error);
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 12dd324b..4b73c2b9 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -159,7 +159,7 @@ impl<'s> Parser<'s> {
             self.eat();
             Ok(())
         } else {
-            self.expected(kind.as_str());
+            self.expected(kind.name());
             Err(ParseError)
         }
     }
@@ -293,7 +293,7 @@ impl<'s> Parser<'s> {
                 self.stray_terminator = s;
                 rescan = false;
             } else if required {
-                self.expected(end.as_str());
+                self.expected(end.name());
                 self.unterminated_group = true;
             }
         }
@@ -397,7 +397,7 @@ impl Parser<'_> {
     /// Eat the current token and add an error that it is unexpected.
     pub fn unexpected(&mut self) {
         if let Some(found) = self.peek() {
-            let msg = format_eco!("unexpected {}", found);
+            let msg = format_eco!("unexpected {}", found.name());
             let error = NodeKind::Error(SpanPos::Full, msg);
             self.perform(error, Self::eat);
         }
@@ -421,7 +421,7 @@ impl Parser<'_> {
     pub fn expected_found(&mut self, thing: &str) {
         match self.peek() {
             Some(found) => {
-                let msg = format_eco!("expected {}, found {}", thing, found);
+                let msg = format_eco!("expected {}, found {}", thing, found.name());
                 let error = NodeKind::Error(SpanPos::Full, msg);
                 self.perform(error, Self::eat);
             }
@@ -492,7 +492,7 @@ impl Marker {
                 let mut msg = EcoString::from(msg);
                 if msg.starts_with("expected") {
                     msg.push_str(", found ");
-                    msg.push_str(child.kind().as_str());
+                    msg.push_str(child.kind().name());
                 }
                 let error = NodeKind::Error(SpanPos::Full, msg);
                 let inner = mem::take(child);
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index d495afa0..d3c497f3 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -108,7 +108,9 @@ impl<'s> Iterator for Tokens<'s> {
             // Trivia.
             '/' if self.s.eat_if('/') => self.line_comment(),
             '/' if self.s.eat_if('*') => self.block_comment(),
-            '*' if self.s.eat_if('/') => NodeKind::Unknown("*/".into()),
+            '*' if self.s.eat_if('/') => {
+                NodeKind::Error(SpanPos::Full, "unexpected end of block comment".into())
+            }
             c if c.is_whitespace() => self.whitespace(c),
 
             // Other things.
@@ -288,8 +290,8 @@ impl<'s> Tokens<'s> {
             }
 
             // Linebreaks.
-            Some(c) if c.is_whitespace() => NodeKind::Linebreak,
-            None => NodeKind::Linebreak,
+            Some(c) if c.is_whitespace() => NodeKind::Backslash,
+            None => NodeKind::Backslash,
 
             // Escapes.
             Some(c) => {
@@ -517,7 +519,7 @@ impl<'s> Tokens<'s> {
             '"' => self.string(),
 
             // Invalid token.
-            _ => NodeKind::Unknown(self.s.from(start).into()),
+            _ => NodeKind::Error(SpanPos::Full, "not valid here".into()),
         }
     }
 
@@ -556,7 +558,6 @@ impl<'s> Tokens<'s> {
 
         let number = self.s.get(start .. suffix_start);
         let suffix = self.s.from(suffix_start);
-        let all = self.s.from(start);
 
         // Find out whether it is a simple number.
         if suffix.is_empty() {
@@ -577,10 +578,10 @@ impl<'s> Tokens<'s> {
                 "em" => NodeKind::Numeric(f, Unit::Em),
                 "fr" => NodeKind::Numeric(f, Unit::Fr),
                 "%" => NodeKind::Numeric(f, Unit::Percent),
-                _ => NodeKind::Unknown(all.into()),
+                _ => NodeKind::Error(SpanPos::Full, "invalid number suffix".into()),
             }
         } else {
-            NodeKind::Unknown(all.into())
+            NodeKind::Error(SpanPos::Full, "invalid number".into())
         }
     }
 
@@ -745,10 +746,6 @@ mod tests {
         NodeKind::Error(pos, message.into())
     }
 
-    fn Invalid(invalid: &str) -> NodeKind {
-        NodeKind::Unknown(invalid.into())
-    }
-
     /// Building blocks for suffix testing.
     ///
     /// We extend each test case with a collection of different suffixes to make
@@ -926,7 +923,7 @@ mod tests {
         t!(Markup: "_"          => Underscore);
         t!(Markup[""]: "==="    => Eq, Eq, Eq);
         t!(Markup["a1/"]: "= "  => Eq, Space(0));
-        t!(Markup[" "]: r"\"    => Linebreak);
+        t!(Markup[" "]: r"\"    => Backslash);
         t!(Markup: "~"          => Tilde);
         t!(Markup["a1/"]: "-?"  => HyphQuest);
         t!(Markup["a "]: r"a--" => Text("a"), Hyph2);
@@ -972,6 +969,9 @@ mod tests {
         t!(Code[" /"]: "--1"  => Minus, Minus, Int(1));
         t!(Code[" /"]: "--_a" => Minus, Minus, Ident("_a"));
         t!(Code[" /"]: "a-b"  => Ident("a-b"));
+
+        // Test invalid.
+        t!(Code: r"\" => Error(Full, "not valid here"));
     }
 
     #[test]
@@ -1107,6 +1107,9 @@ mod tests {
         t!(Code[" /"]: "1..2"   => Int(1), Dots, Int(2));
         t!(Code[" /"]: "1..2.3" => Int(1), Dots, Float(2.3));
         t!(Code[" /"]: "1.2..3" => Float(1.2), Dots, Int(3));
+
+        // Test invalid.
+        t!(Code[" /"]: "1foo" => Error(Full, "invalid number suffix"));
     }
 
     #[test]
@@ -1161,25 +1164,9 @@ mod tests {
         t!(Both[""]: "/*/*" => BlockComment);
         t!(Both[""]: "/**/" => BlockComment);
         t!(Both[""]: "/***" => BlockComment);
-    }
 
-    #[test]
-    fn test_tokenize_invalid() {
-        // Test invalidly closed block comments.
-        t!(Both: "*/"     => Invalid("*/"));
-        t!(Both: "/**/*/" => BlockComment, Invalid("*/"));
-
-        // Test invalid expressions.
-        t!(Code: r"\"        => Invalid(r"\"));
-        t!(Code: "🌓"        => Invalid("🌓"));
-        t!(Code: r"\:"       => Invalid(r"\"), Colon);
-        t!(Code: "meal⌚"    => Ident("meal"), Invalid("⌚"));
-        t!(Code[" /"]: r"\a" => Invalid(r"\"), Ident("a"));
-        t!(Code[" /"]: "#"   => Invalid("#"));
-
-        // Test invalid number suffixes.
-        t!(Code[" /"]: "1foo" => Invalid("1foo"));
-        t!(Code: "1p%"        => Invalid("1p"), Invalid("%"));
-        t!(Code: "1%%"        => Numeric(1.0, Unit::Percent), Invalid("%"));
+        // Test unexpected terminator.
+        t!(Both: "/*Hi*/*/" => BlockComment,
+           Error(Full, "unexpected end of block comment"));
     }
 }
-- 
cgit v1.2.3