Added a test framework for incremental parsing

Fix several errors: - Indented markup is now reparsed right - All end group errors will now fail a reparse - Rightmost errors will always fail a reparse
author: Martin Haug <mhaug@live.de> 2022-01-02 00:46:19 +0100
committer: Martin Haug <mhaug@live.de> 2022-01-02 00:46:19 +0100
commit: 5f114e18eb76a1937941b2ea64842b908c9ad89e (patch)
tree: 0541aa560b19e5805603fc06b3440f40db3d5fd1
parent: 289122e83c085668e56e52225c2dcfd9417d6262 (diff)
10 files changed, 322 insertions, 62 deletions
diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs
index 0e2d196c..1ee37a51 100644
--- a/src/parse/incremental.rs
+++ b/src/parse/incremental.rs
@@ -47,6 +47,10 @@ pub enum Precondition {
     /// safe left neighbors has to check this invariant. Otherwise, this node is
     /// safe.
     NotAtStart,
+    /// These nodes could end up somewhere else up the tree if the parse was
+    /// happening from scratch. The parse result has to be checked for such
+    /// nodes. They are safe to add if followed up by other nodes.
+    NotAtEnd,
     /// No additional requirements.
     None,
 }
@@ -88,6 +92,12 @@ impl Reparser<'_> {
         let child_mode = green.kind().mode().unwrap_or(TokenMode::Code);
         let child_count = green.children().len();
 
+        // Save the current indent if this is a markup node.
+        let indent = match green.kind() {
+            NodeKind::Markup(n) => *n,
+            _ => 0,
+        };
+
         let mut first = None;
         let mut at_start = true;
 
@@ -170,12 +180,29 @@ impl Reparser<'_> {
         }
 
         // We now have a child that we can replace and a function to do so.
-        let func = last_kind.reparsing_func(child_mode)?;
+        let func = last_kind.reparsing_func(child_mode, indent)?;
         let post = last_kind.post();
 
+        let mut column = if mode == TokenMode::Markup {
+            // In this case, we want to pass the indentation to the function.
+            Scanner::new(self.src).column(children_span.start)
+        } else {
+            0
+        };
+
+        // If this is a markup node, we want to save its indent instead to pass
+        // the right indent argument.
+        if children_range.len() == 1 {
+            let child = &mut green.children_mut()[children_range.start];
+            if let NodeKind::Markup(n) = child.kind() {
+                column = *n;
+            }
+        }
+
         // The span of the to-be-reparsed children in the new source.
         let replace_span = children_span.start
-            .. children_span.end + self.replace_len - self.replace_range.len();
+            ..
+            children_span.end + self.replace_len - self.replace_range.len();
 
         // For atomic primaries we need to pass in the whole remaining string to
         // check whether the parser would eat more stuff illicitly.
@@ -186,7 +213,7 @@ impl Reparser<'_> {
         };
 
         // Do the reparsing!
-        let (mut newborns, terminated) = func(&self.src[reparse_span], at_start)?;
+        let (mut newborns, terminated) = func(&self.src[reparse_span], at_start, column)?;
 
         // Make sure that atomic primaries ate only what they were supposed to.
         if post == Postcondition::AtomicPrimary {
@@ -311,6 +338,14 @@ fn validate(
         at_start = child.kind().is_at_start(at_start);
     }
 
+    // Verify that the last of the newborns is not `NotAtEnd`.
+    if newborns
+        .last()
+        .map_or(false, |child| child.kind().pre() == Precondition::NotAtEnd)
+    {
+        return false;
+    }
+
     // We have to check whether the last non-trivia newborn is `AtStart` and
     // verify the indent of its right neighbors in order to make sure its
     // indentation requirements are fulfilled.
@@ -351,21 +386,22 @@ impl NodeKind {
     fn reparsing_func(
         &self,
         parent_mode: TokenMode,
-    ) -> Option<fn(&str, bool) -> Option<(Vec<Green>, bool)>> {
+        indent: usize,
+    ) -> Option<fn(&str, bool, usize) -> Option<(Vec<Green>, bool)>> {
         let mode = self.mode().unwrap_or(parent_mode);
         match self.post() {
             Postcondition::Unsafe | Postcondition::UnsafeLayer => None,
             Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic),
             Postcondition::AtomicPrimary => Some(parse_atomic_markup),
             Postcondition::SameKind(x) if x == None || x == Some(mode) => match self {
+                NodeKind::Markup(_) => Some(parse_markup),
                 NodeKind::Template => Some(parse_template),
                 NodeKind::Block => Some(parse_block),
                 NodeKind::LineComment | NodeKind::BlockComment => Some(parse_comment),
                 _ => None,
             },
             _ => match mode {
-                TokenMode::Markup if self == &Self::Markup => Some(parse_markup),
-                TokenMode::Markup => Some(parse_markup_elements),
+                TokenMode::Markup if indent == 0 => Some(parse_markup_elements),
                 _ => return None,
             },
         }
@@ -452,8 +488,9 @@ impl NodeKind {
                 Postcondition::UnsafeLayer
             }
 
-            // Only markup is expected at the points where it does occur.
-            Self::Markup => Postcondition::SameKind(None),
+            // Only markup is expected at the points where it does occur. The
+            // indentation must be preserved as well, also for the children.
+            Self::Markup(_) => Postcondition::SameKind(None),
 
             // These can appear everywhere and must not change to other stuff
             // because that could change the outer expression.
@@ -493,6 +530,10 @@ impl NodeKind {
             | Self::ImportExpr
             | Self::IncludeExpr => Postcondition::AtomicPrimary,
 
+            // This element always has to remain in the same column so better
+            // reparse the whole parent.
+            Self::Raw(_) => Postcondition::Unsafe,
+
             // These are all replaceable by other tokens.
             Self::Parbreak
             | Self::Linebreak
@@ -507,7 +548,6 @@ impl NodeKind {
             | Self::Heading
             | Self::Enum
             | Self::List
-            | Self::Raw(_)
             | Self::Math(_) => Postcondition::Safe,
         }
     }
@@ -517,6 +557,7 @@ impl NodeKind {
         match self {
             Self::Heading | Self::Enum | Self::List => Precondition::AtStart,
             Self::TextInLine(_) => Precondition::NotAtStart,
+            Self::Error(_, _) => Precondition::NotAtEnd,
             _ => Precondition::None,
         }
     }
@@ -557,12 +598,12 @@ mod tests {
         test("a d e", 1 .. 3, " b c d", 0 .. 8);
         test("a #f() e", 1 .. 6, " b c d", 0 .. 8);
         test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9);
-        test("= A heading", 3 .. 3, "n evocative", 2 .. 15);
+        test("= A heading", 3 .. 3, "n evocative", 2 .. 22);
         test("your thing", 5 .. 5, "a", 4 .. 11);
         test("a your thing a", 6 .. 7, "a", 2 .. 12);
         test("{call(); abc}", 7 .. 7, "[]", 0 .. 15);
         test("#call() abc", 7 .. 7, "[]", 0 .. 10);
-        test("hi[\n- item\n- item 2\n    - item 3]", 11 .. 11, "  ", 2 .. 35);
+        test("hi[\n- item\n- item 2\n    - item 3]", 11 .. 11, "  ", 3 .. 34);
         test("hi\n- item\nno item\n    - item 3", 10 .. 10, "- ", 0 .. 32);
         test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 16 .. 20);
         test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33 .. 42);
@@ -571,7 +612,7 @@ mod tests {
         test("{let i=1; for x in range(5) {i}}", 13 .. 14, "  ", 10 .. 32);
         test("hello {x}", 6 .. 9, "#f()", 5 .. 10);
         test("this is -- in my opinion -- spectacular", 8 .. 10, "---", 7 .. 12);
-        test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22);
+        test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 37);
         test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15);
         test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34);
 
@@ -596,7 +637,7 @@ mod tests {
         test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13);
         test("a #while x {\n g(x) \n}  b", 11 .. 11, "//", 0 .. 26);
         test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14);
-        test("a b c", 1 .. 1, "{[}", 0 .. 5);
+        test("a b c", 1 .. 1, "{[}", 0 .. 8);
 
         // Test unclosed things.
         test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6);
@@ -610,6 +651,6 @@ mod tests {
 
         // Test raw tokens.
         test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20);
-        test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18);
+        test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18);
     }
 }
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 2c5afb6b..f4826730 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -16,6 +16,7 @@ use std::rc::Rc;
 
 use crate::syntax::ast::{Associativity, BinOp, UnOp};
 use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind};
+use crate::util::EcoString;
 
 /// Parse a source file.
 pub fn parse(src: &str) -> Rc<GreenNode> {
@@ -28,23 +29,27 @@ pub fn parse(src: &str) -> Rc<GreenNode> {
 }
 
 /// Parse an atomic primary. Returns `Some` if all of the input was consumed.
-pub fn parse_atomic(src: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+pub fn parse_atomic(src: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(src, TokenMode::Code);
     primary(&mut p, true).ok()?;
     p.eject_partial()
 }
 
 /// Parse an atomic primary. Returns `Some` if all of the input was consumed.
-pub fn parse_atomic_markup(src: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+pub fn parse_atomic_markup(src: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(src, TokenMode::Markup);
     markup_expr(&mut p);
     p.eject_partial()
 }
 
 /// Parse some markup. Returns `Some` if all of the input was consumed.
-pub fn parse_markup(src: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+pub fn parse_markup(src: &str, _: bool, column: usize) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(src, TokenMode::Markup);
-    markup(&mut p);
+    if column == 0 {
+        markup(&mut p);
+    } else {
+        markup_indented(&mut p, column);
+    }
     p.eject()
 }
 
@@ -53,8 +58,10 @@ pub fn parse_markup(src: &str, _: bool) -> Option<(Vec<Green>, bool)> {
 pub fn parse_markup_elements(
     src: &str,
     mut at_start: bool,
+    column: usize,
 ) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(src, TokenMode::Markup);
+    p.offset(column);
     while !p.eof() {
         markup_node(&mut p, &mut at_start);
     }
@@ -62,7 +69,7 @@ pub fn parse_markup_elements(
 }
 
 /// Parse a template literal. Returns `Some` if all of the input was consumed.
-pub fn parse_template(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+pub fn parse_template(source: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(source, TokenMode::Code);
     if !p.at(&NodeKind::LeftBracket) {
         return None;
@@ -73,7 +80,7 @@ pub fn parse_template(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
 }
 
 /// Parse a code block. Returns `Some` if all of the input was consumed.
-pub fn parse_block(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+pub fn parse_block(source: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(source, TokenMode::Code);
     if !p.at(&NodeKind::LeftBrace) {
         return None;
@@ -84,7 +91,7 @@ pub fn parse_block(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
 }
 
 /// Parse a comment. Returns `Some` if all of the input was consumed.
-pub fn parse_comment(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+pub fn parse_comment(source: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(source, TokenMode::Code);
     comment(&mut p).ok()?;
     p.eject()
@@ -92,7 +99,7 @@ pub fn parse_comment(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
 
 /// Parse markup.
 fn markup(p: &mut Parser) {
-    markup_while(p, true, &mut |_| true)
+    markup_while(p, true, 0, &mut |_| true)
 }
 
 /// Parse markup that stays right of the given column.
@@ -103,8 +110,8 @@ fn markup_indented(p: &mut Parser, column: usize) {
         _ => false,
     });
 
-    markup_while(p, false, &mut |p| match p.peek() {
-        Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column,
+    markup_while(p, false, column, &mut |p| match p.peek() {
+        Some(NodeKind::Space(n)) if *n >= 1 => p.clean_column(p.current_end()) >= column,
         _ => true,
     })
 }
@@ -113,11 +120,11 @@ fn markup_indented(p: &mut Parser, column: usize) {
 ///
 /// If `at_start` is true, things like headings that may only appear at the
 /// beginning of a line or template are allowed.
-fn markup_while<F>(p: &mut Parser, mut at_start: bool, f: &mut F)
+fn markup_while<F>(p: &mut Parser, mut at_start: bool, column: usize, f: &mut F)
 where
     F: FnMut(&mut Parser) -> bool,
 {
-    p.perform(NodeKind::Markup, |p| {
+    p.perform(NodeKind::Markup(column), |p| {
         while !p.eof() && f(p) {
             markup_node(p, &mut at_start);
         }
@@ -205,20 +212,32 @@ fn heading(p: &mut Parser) {
 
 /// Parse a single list item.
 fn list_node(p: &mut Parser) {
-    p.perform(NodeKind::List, |p| {
-        p.eat_assert(&NodeKind::Minus);
+    let marker = p.marker();
+    let src: EcoString = p.peek_src().into();
+    p.eat_assert(&NodeKind::Minus);
+
+    if p.peek().map_or(true, |kind| kind.is_whitespace()) {
         let column = p.column(p.prev_end());
         markup_indented(p, column);
-    });
+        marker.end(p, NodeKind::List);
+    } else {
+        marker.convert(p, NodeKind::TextInLine(src));
+    }
 }
 
 /// Parse a single enum item.
 fn enum_node(p: &mut Parser) {
-    p.perform(NodeKind::Enum, |p| {
-        p.eat();
+    let marker = p.marker();
+    let src: EcoString = p.peek_src().into();
+    p.eat();
+
+    if p.peek().map_or(true, |kind| kind.is_whitespace()) {
         let column = p.column(p.prev_end());
         markup_indented(p, column);
-    });
+        marker.end(p, NodeKind::Enum);
+    } else {
+        marker.convert(p, NodeKind::TextInLine(src));
+    }
 }
 
 /// Parse an expression within markup mode.
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index ade9b5df..b31f69d3 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -21,8 +21,12 @@ pub struct Parser<'s> {
     groups: Vec<GroupEntry>,
     /// The children of the currently built node.
     children: Vec<Green>,
-    /// Whether the last group was terminated.
-    last_terminated: bool,
+    /// Is `Some` if there is an unterminated group at the last position where
+    /// groups were terminated.
+    last_unterminated: Option<usize>,
+    /// Offset the indentation. This can be used if the parser is processing a
+    /// subslice of the source and there was leading indent.
+    column_offset: usize,
 }
 
 impl<'s> Parser<'s> {
@@ -38,7 +42,8 @@ impl<'s> Parser<'s> {
             current_start: 0,
             groups: vec![],
             children: vec![],
-            last_terminated: true,
+            last_unterminated: None,
+            column_offset: 0,
         }
     }
 
@@ -102,6 +107,11 @@ impl<'s> Parser<'s> {
             .then(|| (self.children, self.tokens.was_terminated()))
     }
 
+    /// Set an indentation offset.
+    pub fn offset(&mut self, columns: usize) {
+        self.column_offset = columns;
+    }
+
     /// Whether the end of the source string or group is reached.
     pub fn eof(&self) -> bool {
         self.eof
@@ -206,6 +216,12 @@ impl<'s> Parser<'s> {
 
     /// Determine the column index for the given byte index.
     pub fn column(&self, index: usize) -> usize {
+        self.tokens.scanner().column(index) + self.column_offset
+    }
+
+    /// Determine the column index for the given byte index while ignoring the
+    /// offset.
+    pub fn clean_column(&self, index: usize) -> usize {
         self.tokens.scanner().column(index)
     }
 
@@ -244,7 +260,11 @@ impl<'s> Parser<'s> {
         let group = self.groups.pop().expect("no started group");
         self.tokens.set_mode(group.prev_mode);
         self.repeek();
-        self.last_terminated = true;
+        if let Some(n) = self.last_unterminated {
+            if n != self.prev_end() {
+                self.last_unterminated = None;
+            }
+        }
 
         let mut rescan = self.tokens.mode() != group_mode;
 
@@ -262,8 +282,14 @@ impl<'s> Parser<'s> {
                 self.eat();
                 rescan = false;
             } else if required {
+                // FIXME The error has to be inserted before any space rolls
+                // around because the rescan will set the cursor back in front
+                // of the space and reconsume it. Supressing the rescan is not
+                // an option since additional rescans (e.g. for statements) can
+                // be triggered directly afterwards, without processing any
+                // other token.
                 self.push_error(format_eco!("expected {}", end));
-                self.last_terminated = false;
+                self.last_unterminated = Some(self.prev_end());
             }
         }
 
@@ -283,13 +309,21 @@ impl<'s> Parser<'s> {
 
     /// Check if the group processing was successfully terminated.
     pub fn group_success(&self) -> bool {
-        self.last_terminated && self.groups.is_empty()
+        self.last_unterminated.is_none() && self.groups.is_empty()
     }
 
     /// Low-level bump that consumes exactly one token without special trivia
     /// handling.
     fn bump(&mut self) {
         let kind = self.current.take().unwrap();
+        if match kind {
+            NodeKind::Space(n) if n > 0 => true,
+            NodeKind::Parbreak => true,
+            _ => false,
+        } {
+            self.column_offset = 0;
+        }
+
         let len = self.tokens.index() - self.current_start;
         self.children.push(GreenData::new(kind, len).into());
         self.current_start = self.tokens.index();
@@ -346,6 +380,13 @@ impl Parser<'_> {
     /// Push an error into the children list.
     pub fn push_error(&mut self, msg: impl Into<EcoString>) {
         let error = NodeKind::Error(ErrorPos::Full, msg.into());
+        for i in (0 .. self.children.len()).rev() {
+            if Self::is_trivia_ext(self.children[i].kind(), false) {
+                self.children.remove(i);
+            } else {
+                break;
+            }
+        }
         self.children.push(GreenData::new(error, 0).into());
     }
 
@@ -445,6 +486,7 @@ impl Marker {
 }
 
 /// A logical group of tokens, e.g. `[...]`.
+#[derive(Debug)]
 struct GroupEntry {
     /// The kind of group this is. This decides which tokens will end the group.
     /// For example, a [`Group::Paren`] will be ended by
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 836e8cf1..3a0ad1ad 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -293,10 +293,8 @@ impl<'s> Tokens<'s> {
             } else {
                 NodeKind::EnDash
             }
-        } else if self.s.check_or(true, char::is_whitespace) {
-            NodeKind::Minus
         } else {
-            NodeKind::Text('-'.into())
+            NodeKind::Minus
         }
     }
 
@@ -312,11 +310,7 @@ impl<'s> Tokens<'s> {
             None
         };
 
-        if self.s.check_or(true, char::is_whitespace) {
-            NodeKind::EnumNumbering(number)
-        } else {
-            NodeKind::Text(self.s.eaten_from(start).into())
-        }
+        NodeKind::EnumNumbering(number)
     }
 
     fn raw(&mut self) -> NodeKind {
@@ -742,12 +736,12 @@ mod tests {
     fn test_tokenize_text() {
         // Test basic text.
         t!(Markup[" /"]: "hello"       => Text("hello"));
-        t!(Markup[" /"]: "hello-world" => Text("hello"), Text("-"), Text("world"));
+        t!(Markup[" /"]: "hello-world" => Text("hello"), Minus, Text("world"));
 
         // Test code symbols in text.
         t!(Markup[" /"]: "a():\"b" => Text("a():\"b"));
         t!(Markup[" /"]: ";:,|/+"  => Text(";:,|"), Text("/+"));
-        t!(Markup[" /"]: "=-a"     => Text("="), Text("-"), Text("a"));
+        t!(Markup[" /"]: "=-a"     => Text("="), Minus, Text("a"));
         t!(Markup[" "]: "#123"     => Text("#"), Text("123"));
 
         // Test text ends.
@@ -804,7 +798,7 @@ mod tests {
         t!(Markup["a1/"]: "- "  => Minus, Space(0));
         t!(Markup[" "]: "."     => EnumNumbering(None));
         t!(Markup[" "]: "1."    => EnumNumbering(Some(1)));
-        t!(Markup[" "]: "1.a"   => Text("1."), Text("a"));
+        t!(Markup[" "]: "1.a"   => EnumNumbering(Some(1)), Text("a"));
         t!(Markup[" /"]: "a1."  => Text("a1."));
     }
 
diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs
index ed74dfe5..bea4ef00 100644
--- a/src/syntax/ast.rs
+++ b/src/syntax/ast.rs
@@ -53,7 +53,7 @@ macro_rules! node {
 
 node! {
     /// The syntactical root capable of representing a full parsed document.
-    Markup
+    Markup: NodeKind::Markup(_)
 }
 
 impl Markup {
diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs
index 21af060f..9f7365a8 100644
--- a/src/syntax/highlight.rs
+++ b/src/syntax/highlight.rs
@@ -154,7 +154,7 @@ impl Category {
             NodeKind::Str(_) => Some(Category::String),
             NodeKind::Error(_, _) => Some(Category::Invalid),
             NodeKind::Unknown(_) => Some(Category::Invalid),
-            NodeKind::Markup => None,
+            NodeKind::Markup(_) => None,
             NodeKind::Space(_) => None,
             NodeKind::Parbreak => None,
             NodeKind::Text(_) => None,
diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs
index b72e5843..388d0bb0 100644
--- a/src/syntax/mod.rs
+++ b/src/syntax/mod.rs
@@ -64,6 +64,14 @@ impl Green {
         }
     }
 
+    /// Whether the node is a leaf node in the green tree.
+    pub fn is_leaf(&self) -> bool {
+        match self {
+            Green::Node(n) => n.children().is_empty(),
+            Green::Token(_) => true,
+        }
+    }
+
     /// Change the type of the node.
     pub fn convert(&mut self, kind: NodeKind) {
         match self {
@@ -361,6 +369,11 @@ impl<'a> RedRef<'a> {
         Span::new(self.id, self.offset, self.offset + self.green.len())
     }
 
+    /// Whether the node is a leaf node.
+    pub fn is_leaf(self) -> bool {
+        self.green.is_leaf()
+    }
+
     /// The error messages for this node and its descendants.
     pub fn errors(self) -> Vec<Error> {
         if !self.green.erroneous() {
@@ -385,6 +398,14 @@ impl<'a> RedRef<'a> {
         }
     }
 
+    /// Perform a depth-first search starting at this node.
+    pub fn all_children(&self) -> Vec<Self> {
+        let mut res = vec![self.clone()];
+        res.extend(self.children().flat_map(|child| child.all_children().into_iter()));
+
+        res
+    }
+
     /// Convert the node to a typed AST node.
     pub fn cast<T>(self) -> Option<T>
     where
@@ -562,8 +583,8 @@ pub enum NodeKind {
     Include,
     /// The `from` keyword.
     From,
-    /// Template markup.
-    Markup,
+    /// Template markup of which all lines must start in some column.
+    Markup(usize),
     /// One or more whitespace characters.
     Space(usize),
     /// A forced line break: `\`.
@@ -738,7 +759,7 @@ impl NodeKind {
     /// Whether this token appears in Markup.
     pub fn mode(&self) -> Option<TokenMode> {
         match self {
-            Self::Markup
+            Self::Markup(_)
             | Self::Linebreak
             | Self::Parbreak
             | Self::Text(_)
@@ -823,7 +844,7 @@ impl NodeKind {
             Self::Import => "keyword `import`",
             Self::Include => "keyword `include`",
             Self::From => "keyword `from`",
-            Self::Markup => "markup",
+            Self::Markup(_) => "markup",
             Self::Space(_) => "space",
             Self::Linebreak => "forced linebreak",
             Self::Parbreak => "paragraph break",
diff --git a/tests/typ/code/block.typ b/tests/typ/code/block.typ
index 45ee9204..5939ba9c 100644
--- a/tests/typ/code/block.typ
+++ b/tests/typ/code/block.typ
@@ -129,7 +129,7 @@
 }
 
 ---
-// Error: 2:1 expected closing brace
+// Error: 2 expected closing brace
 {
 
 ---
diff --git a/tests/typ/code/let.typ b/tests/typ/code/let.typ
index 7fd6e0da..a95d651a 100644
--- a/tests/typ/code/let.typ
+++ b/tests/typ/code/let.typ
@@ -57,7 +57,7 @@ Three
 
 // Terminated by semicolon even though we are in a paren group.
 // Error: 18 expected expression
-// Error: 19 expected closing paren
+// Error: 18 expected closing paren
 #let v5 = (1, 2 + ; Five
 
 ---
diff --git a/tests/typeset.rs b/tests/typeset.rs
index 164ccc91..f23de5cd 100644
--- a/tests/typeset.rs
+++ b/tests/typeset.rs
@@ -19,8 +19,8 @@ use typst::image::{Image, RasterImage, Svg};
 use typst::library::{PageNode, TextNode};
 use typst::loading::FsLoader;
 use typst::parse::Scanner;
-use typst::source::SourceFile;
-use typst::syntax::Span;
+use typst::source::{SourceFile, SourceId};
+use typst::syntax::{RedNode, Span};
 use typst::Context;
 
 #[cfg(feature = "layout-cache")]
@@ -186,6 +186,7 @@ fn test(
     let mut line = 0;
     let mut compare_ref = true;
     let mut compare_ever = false;
+    let mut rng = LinearShift::new();
 
     let parts: Vec<_> = src.split("\n---").collect();
     for (i, &part) in parts.iter().enumerate() {
@@ -202,8 +203,16 @@ fn test(
                 }
             }
         } else {
-            let (part_ok, compare_here, part_frames) =
-                test_part(ctx, src_path, part.into(), i, compare_ref, line, debug);
+            let (part_ok, compare_here, part_frames) = test_part(
+                ctx,
+                src_path,
+                part.into(),
+                i,
+                compare_ref,
+                line,
+                debug,
+                &mut rng,
+            );
             ok &= part_ok;
             compare_ever |= compare_here;
             frames.extend(part_frames);
@@ -252,14 +261,16 @@ fn test_part(
     compare_ref: bool,
     line: usize,
     debug: bool,
+    rng: &mut LinearShift,
 ) -> (bool, bool, Vec<Rc<Frame>>) {
+    let mut ok = test_reparse(&src, i, rng);
+
     let id = ctx.sources.provide(src_path, src);
     let source = ctx.sources.get(id);
 
     let (local_compare_ref, mut ref_errors) = parse_metadata(&source);
     let compare_ref = local_compare_ref.unwrap_or(compare_ref);
 
-    let mut ok = true;
     let (frames, mut errors) = match ctx.evaluate(id) {
         Ok(module) => {
             let tree = module.into_root();
@@ -366,6 +377,108 @@ fn test_incremental(
     ok
 }
 
+/// Pseudorandomly edit the source file and test whether a reparse produces the
+/// same result as a clean parse.
+///
+/// The method will first inject 10 strings once every 400 source characters
+/// and then select 5 leaf node boundries to inject an additional, randomly
+/// chosen string from the injection list.
+fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool {
+    let supplements = [
+        "[",
+        ")",
+        "#rect()",
+        "a word",
+        ", a: 1",
+        "10.0",
+        ":",
+        "if i == 0 {true}",
+        "for",
+        "* hello *",
+        "//",
+        "/*",
+        "\\u{12e4}",
+        "```typst",
+        " ",
+        "trees",
+        "\\",
+        "$ a $",
+        "2.",
+        "-",
+        "5",
+    ];
+
+    let mut ok = true;
+
+    let apply = |replace: std::ops::Range<usize>, with| {
+        let mut incr_source = SourceFile::detached(src);
+
+        incr_source.edit(replace.clone(), with);
+        let edited_src = incr_source.src();
+
+        let ref_source = SourceFile::detached(edited_src);
+        let incr_root = incr_source.root();
+        let ref_root = ref_source.root();
+        if incr_root != ref_root {
+            println!(
+                "    Subtest {} reparse differs from clean parse when inserting '{}' at {}-{} ❌",
+                i, with, replace.start, replace.end,
+            );
+            println!(
+                "\n    Expected reference tree:\n{:#?}\n\n    Found incremental tree:\n{:#?}",
+                ref_root, incr_root
+            );
+            println!("Full source ({}):\n\"{}\"", edited_src.len(), edited_src);
+            false
+        } else {
+            true
+        }
+    };
+
+    let mut in_range = |range: std::ops::Range<usize>| {
+        let full = rng.next().unwrap() as f64 / u64::MAX as f64;
+        (range.start as f64 + full * (range.end as f64 - range.start as f64)).floor()
+            as usize
+    };
+
+    let insertions = (src.len() as f64 / 400.0).ceil() as usize;
+
+    for _ in 0 .. insertions {
+        let supplement = supplements[in_range(0 .. supplements.len())];
+        let start = in_range(0 .. src.len());
+        let end = in_range(start .. src.len());
+
+        if !src.is_char_boundary(start) || !src.is_char_boundary(end) {
+            continue;
+        }
+
+        if !apply(start .. end, supplement) {
+            println!("original tree: {:#?}", SourceFile::detached(src).root());
+
+            ok = false;
+        }
+    }
+
+    let red = RedNode::from_root(
+        SourceFile::detached(src).root().clone(),
+        SourceId::from_raw(0),
+    );
+
+    let leafs: Vec<_> = red
+        .as_ref()
+        .all_children()
+        .into_iter()
+        .filter(|red| red.is_leaf())
+        .collect();
+
+    let leaf_start = leafs[in_range(0 .. leafs.len())].span().start;
+    let supplement = supplements[in_range(0 .. supplements.len())];
+
+    ok &= apply(leaf_start .. leaf_start, supplement);
+
+    ok
+}
+
 fn parse_metadata(source: &SourceFile) -> (Option<bool>, Vec<Error>) {
     let mut compare_ref = None;
     let mut errors = vec![];
@@ -823,3 +936,33 @@ where
     FileDescriptor::redirect_stdio(&stdout, Stdout).unwrap();
     result
 }
+
+/// This is an Linear-feedback shift register using XOR as its shifting
+/// function. It can be used as PRNG.
+struct LinearShift(u64);
+
+impl LinearShift {
+    /// Initialize the shift register with a pre-set seed.
+    pub fn new() -> Self {
+        Self(0xACE5)
+    }
+}
+
+impl Iterator for LinearShift {
+    type Item = u64;
+
+    /// Apply the shift.
+    fn next(&mut self) -> Option<Self::Item> {
+        self.0 ^= self.0 >> 3;
+        self.0 ^= self.0 << 14;
+        self.0 ^= self.0 >> 28;
+        self.0 ^= self.0 << 36;
+        self.0 ^= self.0 >> 52;
+        Some(self.0)
+    }
+
+    /// The iterator is endless but will repeat eventually.
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (usize::MAX, None)
+    }
+}
author	Martin Haug <mhaug@live.de>	2022-01-02 00:46:19 +0100
committer	Martin Haug <mhaug@live.de>	2022-01-02 00:46:19 +0100
commit	5f114e18eb76a1937941b2ea64842b908c9ad89e (patch)
tree	0541aa560b19e5805603fc06b3440f40db3d5fd1
parent	289122e83c085668e56e52225c2dcfd9417d6262 (diff)