From 1e4cab393e55df8875c6303ebb7bde8f09f911c9 Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Tue, 2 Nov 2021 12:06:22 +0100
Subject: Introduce incremental parsing

---
 src/parse/mod.rs | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'src/parse')
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 10aaad23..1ab2fb15 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -25,6 +25,20 @@ pub fn parse(src: &str) -> Rc<GreenNode> {
     }
 }
 
+/// Parse a block. Returns `Some` if there was only one block.
+pub fn parse_block(source: &str) -> Option<Rc<GreenNode>> {
+    let mut p = Parser::new(source);
+    block(&mut p);
+    if p.eof() {
+        match p.finish().into_iter().next() {
+            Some(Green::Node(node)) => Some(node),
+            _ => unreachable!(),
+        }
+    } else {
+        None
+    }
+}
+
 /// Parse markup.
 fn markup(p: &mut Parser) {
     markup_while(p, true, &mut |_| true)
-- 
cgit v1.2.3


From eba7fc34effbec3bcc6d5c40d831b1e15af77c4d Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Sat, 6 Nov 2021 16:07:21 +0100
Subject: Incremental-safety based approach

---
 src/parse/mod.rs    | 71 +++++++++++++++++++++++++++++++++++------------------
 src/parse/parser.rs | 24 ++++++++++++++++++
 2 files changed, 71 insertions(+), 24 deletions(-)

(limited to 'src/parse')

diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 1ab2fb15..5d845a55 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -25,18 +25,36 @@ pub fn parse(src: &str) -> Rc<GreenNode> {
     }
 }
 
-/// Parse a block. Returns `Some` if there was only one block.
-pub fn parse_block(source: &str) -> Option<Rc<GreenNode>> {
+/// Parse an atomic primary. Returns `Some` if all of the input was consumed.
+pub fn parse_atomic(source: &str, _: bool) -> Option<Vec<Green>> {
     let mut p = Parser::new(source);
-    block(&mut p);
-    if p.eof() {
-        match p.finish().into_iter().next() {
-            Some(Green::Node(node)) => Some(node),
-            _ => unreachable!(),
-        }
-    } else {
-        None
+    primary(&mut p, true).ok()?;
+    p.eject()
+}
+
+/// Parse some markup. Returns `Some` if all of the input was consumed.
+pub fn parse_markup(source: &str, _: bool) -> Option<Vec<Green>> {
+    let mut p = Parser::new(source);
+    markup(&mut p);
+    p.eject()
+}
+
+/// Parse some markup without the topmost node. Returns `Some` if all of the
+/// input was consumed.
+pub fn parse_markup_elements(source: &str, mut at_start: bool) -> Option<Vec<Green>> {
+    let mut p = Parser::new(source);
+    while !p.eof() {
+        markup_node(&mut p, &mut at_start);
     }
+    p.eject()
+}
+
+/// Parse some code. Returns `Some` if all of the input was consumed.
+pub fn parse_code(source: &str, _: bool) -> Option<Vec<Green>> {
+    let mut p = Parser::new(source);
+    p.set_mode(TokenMode::Code);
+    expr_list(&mut p);
+    p.eject()
 }
 
 /// Parse markup.
@@ -118,7 +136,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
 
         // Line-based markup that is not currently at the start of the line.
         NodeKind::Eq | NodeKind::Minus | NodeKind::EnumNumbering(_) => {
-            p.convert(NodeKind::Text(p.peek_src().into()));
+            p.convert(NodeKind::TextInLine(p.peek_src().into()))
         }
 
         // Hashtag + keyword / identifier.
@@ -196,7 +214,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
     let marker = p.marker();
 
     // Start the unary expression.
-    match p.peek().and_then(UnOp::from_token) {
+    match (!atomic).then(|| p.peek().and_then(UnOp::from_token)).flatten() {
         Some(op) => {
             p.eat();
             let prec = op.precedence();
@@ -268,7 +286,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult {
         }
 
         // Structures.
-        Some(NodeKind::LeftParen) => parenthesized(p),
+        Some(NodeKind::LeftParen) => parenthesized(p, atomic),
         Some(NodeKind::LeftBracket) => {
             template(p);
             Ok(())
@@ -329,7 +347,7 @@ fn literal(p: &mut Parser) -> bool {
 /// - Dictionary literal
 /// - Parenthesized expression
 /// - Parameter list of closure expression
-fn parenthesized(p: &mut Parser) -> ParseResult {
+fn parenthesized(p: &mut Parser, atomic: bool) -> ParseResult {
     let marker = p.marker();
 
     p.start_group(Group::Paren);
@@ -344,7 +362,7 @@ fn parenthesized(p: &mut Parser) -> ParseResult {
     }
 
     // Arrow means this is a closure's parameter list.
-    if p.at(&NodeKind::Arrow) {
+    if !atomic && p.at(&NodeKind::Arrow) {
         params(p, marker);
         p.eat_assert(&NodeKind::Arrow);
         return marker.perform(p, NodeKind::Closure, expr);
@@ -507,18 +525,23 @@ fn template(p: &mut Parser) {
 fn block(p: &mut Parser) {
     p.perform(NodeKind::Block, |p| {
         p.start_group(Group::Brace);
-        while !p.eof() {
-            p.start_group(Group::Stmt);
-            if expr(p).is_ok() && !p.eof() {
-                p.expected_at("semicolon or line break");
-            }
-            p.end_group();
+        expr_list(p);
+        p.end_group();
+    });
+}
 
-            // Forcefully skip over newlines since the group's contents can't.
-            p.eat_while(|t| matches!(t, NodeKind::Space(_)));
+/// Parse a number of code expressions.
+fn expr_list(p: &mut Parser) {
+    while !p.eof() {
+        p.start_group(Group::Stmt);
+        if expr(p).is_ok() && !p.eof() {
+            p.expected_at("semicolon or line break");
         }
         p.end_group();
-    });
+
+        // Forcefully skip over newlines since the group's contents can't.
+        p.eat_while(|t| matches!(t, NodeKind::Space(_)));
+    }
 }
 
 /// Parse a function call.
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index af8a7c5c..f391c473 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -21,6 +21,8 @@ pub struct Parser<'s> {
     groups: Vec<GroupEntry>,
     /// The children of the currently built node.
     children: Vec<Green>,
+    /// Whether the last group was terminated.
+    last_group_terminated: bool,
 }
 
 impl<'s> Parser<'s> {
@@ -36,6 +38,7 @@ impl<'s> Parser<'s> {
             current_start: 0,
             groups: vec![],
             children: vec![],
+            last_group_terminated: true,
         }
     }
 
@@ -44,6 +47,15 @@ impl<'s> Parser<'s> {
         self.children
     }
 
+    /// End the parsing process and return multiple children.
+    pub fn eject(self) -> Option<Vec<Green>> {
+        if self.eof() && self.group_success() {
+            Some(self.children)
+        } else {
+            None
+        }
+    }
+
     /// Create a new marker.
     pub fn marker(&mut self) -> Marker {
         Marker(self.children.len())
@@ -190,6 +202,11 @@ impl<'s> Parser<'s> {
         self.tokens.scanner().column(index)
     }
 
+    /// Set the tokenizer's mode.
+    pub fn set_mode(&mut self, mode: TokenMode) {
+        self.tokens.set_mode(mode);
+    }
+
     /// Continue parsing in a group.
     ///
     /// When the end delimiter of the group is reached, all subsequent calls to
@@ -225,6 +242,7 @@ impl<'s> Parser<'s> {
         let group = self.groups.pop().expect("no started group");
         self.tokens.set_mode(group.prev_mode);
         self.repeek();
+        self.last_group_terminated = true;
 
         let mut rescan = self.tokens.mode() != group_mode;
 
@@ -243,6 +261,7 @@ impl<'s> Parser<'s> {
                 rescan = false;
             } else if required {
                 self.push_error(format_eco!("expected {}", end));
+                self.last_group_terminated = false;
             }
         }
 
@@ -260,6 +279,11 @@ impl<'s> Parser<'s> {
         }
     }
 
+    /// Check if the group processing was successfully terminated.
+    pub fn group_success(&self) -> bool {
+        self.last_group_terminated && self.groups.is_empty()
+    }
+
     /// Low-level bump that consumes exactly one token without special trivia
     /// handling.
     fn bump(&mut self) {
-- 
cgit v1.2.3


From 0663758fbb42651a08bfcd46c27b5cdeab90fb75 Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Sun, 7 Nov 2021 19:43:01 +0100
Subject: Tests

- length updates
- dealing with keywords and comments
---
 src/parse/mod.rs    | 19 +++++++++----------
 src/parse/parser.rs |  9 ++-------
 2 files changed, 11 insertions(+), 17 deletions(-)

(limited to 'src/parse')

diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 5d845a55..02777350 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -17,7 +17,7 @@ use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind};
 
 /// Parse a source file.
 pub fn parse(src: &str) -> Rc<GreenNode> {
-    let mut p = Parser::new(src);
+    let mut p = Parser::new(src, TokenMode::Markup);
     markup(&mut p);
     match p.finish().into_iter().next() {
         Some(Green::Node(node)) => node,
@@ -26,23 +26,23 @@ pub fn parse(src: &str) -> Rc<GreenNode> {
 }
 
 /// Parse an atomic primary. Returns `Some` if all of the input was consumed.
-pub fn parse_atomic(source: &str, _: bool) -> Option<Vec<Green>> {
-    let mut p = Parser::new(source);
+pub fn parse_atomic(src: &str, _: bool) -> Option<Vec<Green>> {
+    let mut p = Parser::new(src, TokenMode::Code);
     primary(&mut p, true).ok()?;
     p.eject()
 }
 
 /// Parse some markup. Returns `Some` if all of the input was consumed.
-pub fn parse_markup(source: &str, _: bool) -> Option<Vec<Green>> {
-    let mut p = Parser::new(source);
+pub fn parse_markup(src: &str, _: bool) -> Option<Vec<Green>> {
+    let mut p = Parser::new(src, TokenMode::Markup);
     markup(&mut p);
     p.eject()
 }
 
 /// Parse some markup without the topmost node. Returns `Some` if all of the
 /// input was consumed.
-pub fn parse_markup_elements(source: &str, mut at_start: bool) -> Option<Vec<Green>> {
-    let mut p = Parser::new(source);
+pub fn parse_markup_elements(src: &str, mut at_start: bool) -> Option<Vec<Green>> {
+    let mut p = Parser::new(src, TokenMode::Markup);
     while !p.eof() {
         markup_node(&mut p, &mut at_start);
     }
@@ -50,9 +50,8 @@ pub fn parse_markup_elements(source: &str, mut at_start: bool) -> Option<Vec<Gre
 }
 
 /// Parse some code. Returns `Some` if all of the input was consumed.
-pub fn parse_code(source: &str, _: bool) -> Option<Vec<Green>> {
-    let mut p = Parser::new(source);
-    p.set_mode(TokenMode::Code);
+pub fn parse_code(src: &str, _: bool) -> Option<Vec<Green>> {
+    let mut p = Parser::new(src, TokenMode::Code);
     expr_list(&mut p);
     p.eject()
 }
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index f391c473..451e18f1 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -27,8 +27,8 @@ pub struct Parser<'s> {
 
 impl<'s> Parser<'s> {
     /// Create a new parser for the source string.
-    pub fn new(src: &'s str) -> Self {
-        let mut tokens = Tokens::new(src, TokenMode::Markup);
+    pub fn new(src: &'s str, mode: TokenMode) -> Self {
+        let mut tokens = Tokens::new(src, mode);
         let current = tokens.next();
         Self {
             tokens,
@@ -202,11 +202,6 @@ impl<'s> Parser<'s> {
         self.tokens.scanner().column(index)
     }
 
-    /// Set the tokenizer's mode.
-    pub fn set_mode(&mut self, mode: TokenMode) {
-        self.tokens.set_mode(mode);
-    }
-
     /// Continue parsing in a group.
     ///
     /// When the end delimiter of the group is reached, all subsequent calls to
-- 
cgit v1.2.3


From 9141cba6a9db6ae3106e39d92508cb91c390049b Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Mon, 8 Nov 2021 12:01:35 +0100
Subject: Deal with the effects of keywords

---
 src/parse/mod.rs    | 43 ++++++++++++++++++++++++++++++++++++++-----
 src/parse/parser.rs |  6 ++++++
 2 files changed, 44 insertions(+), 5 deletions(-)

(limited to 'src/parse')

diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 02777350..afeb34f1 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -29,7 +29,7 @@ pub fn parse(src: &str) -> Rc<GreenNode> {
 pub fn parse_atomic(src: &str, _: bool) -> Option<Vec<Green>> {
     let mut p = Parser::new(src, TokenMode::Code);
     primary(&mut p, true).ok()?;
-    p.eject()
+    p.eject_partial()
 }
 
 /// Parse some markup. Returns `Some` if all of the input was consumed.
@@ -49,10 +49,32 @@ pub fn parse_markup_elements(src: &str, mut at_start: bool) -> Option<Vec<Green>
     p.eject()
 }
 
-/// Parse some code. Returns `Some` if all of the input was consumed.
-pub fn parse_code(src: &str, _: bool) -> Option<Vec<Green>> {
-    let mut p = Parser::new(src, TokenMode::Code);
-    expr_list(&mut p);
+/// Parse a template literal. Returns `Some` if all of the input was consumed.
+pub fn parse_template(source: &str, _: bool) -> Option<Vec<Green>> {
+    let mut p = Parser::new(source, TokenMode::Code);
+    if !matches!(p.peek(), Some(NodeKind::LeftBracket)) {
+        return None;
+    }
+
+    template(&mut p);
+    p.eject()
+}
+
+/// Parse a code block. Returns `Some` if all of the input was consumed.
+pub fn parse_block(source: &str, _: bool) -> Option<Vec<Green>> {
+    let mut p = Parser::new(source, TokenMode::Code);
+    if !matches!(p.peek(), Some(NodeKind::LeftBrace)) {
+        return None;
+    }
+
+    block(&mut p);
+    p.eject()
+}
+
+/// Parse a comment. Returns `Some` if all of the input was consumed.
+pub fn parse_comment(source: &str, _: bool) -> Option<Vec<Green>> {
+    let mut p = Parser::new(source, TokenMode::Code);
+    comment(&mut p).ok()?;
     p.eject()
 }
 
@@ -742,3 +764,14 @@ fn body(p: &mut Parser) -> ParseResult {
     }
     Ok(())
 }
+
+/// Parse a comment.
+fn comment(p: &mut Parser) -> ParseResult {
+    match p.peek() {
+        Some(NodeKind::LineComment | NodeKind::BlockComment) => {
+            p.eat();
+            Ok(())
+        }
+        _ => Err(()),
+    }
+}
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 451e18f1..31c918a8 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -95,6 +95,12 @@ impl<'s> Parser<'s> {
         output
     }
 
+    /// End the parsing process and return multiple children, even if there
+    /// remains stuff in the string.
+    pub fn eject_partial(self) -> Option<Vec<Green>> {
+        self.group_success().then(|| self.children)
+    }
+
     /// Whether the end of the source string or group is reached.
     pub fn eof(&self) -> bool {
         self.eof
-- 
cgit v1.2.3


From 7a631d8b09bbffa8c7d90a1038d986876370ea7a Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Tue, 9 Nov 2021 13:07:55 +0100
Subject: Simplify node mode management

---
 src/parse/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/parse')

diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index afeb34f1..1f1ac266 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -772,6 +772,6 @@ fn comment(p: &mut Parser) -> ParseResult {
             p.eat();
             Ok(())
         }
-        _ => Err(()),
+        _ => Err(ParseError),
     }
 }
-- 
cgit v1.2.3


From 3162c6a83a910f34d6ed7e966c11b7e7b5bd4088 Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Wed, 10 Nov 2021 20:41:10 +0100
Subject: Comments and neighbors

---
 src/parse/mod.rs    | 15 +++++++++------
 src/parse/parser.rs |  9 +++++----
 src/parse/tokens.rs | 26 +++++++++++++++++++++++++-
 3 files changed, 39 insertions(+), 11 deletions(-)

(limited to 'src/parse')

diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 1f1ac266..f2fae5f2 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -26,14 +26,14 @@ pub fn parse(src: &str) -> Rc<GreenNode> {
 }
 
 /// Parse an atomic primary. Returns `Some` if all of the input was consumed.
-pub fn parse_atomic(src: &str, _: bool) -> Option<Vec<Green>> {
+pub fn parse_atomic(src: &str, _: bool) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(src, TokenMode::Code);
     primary(&mut p, true).ok()?;
     p.eject_partial()
 }
 
 /// Parse some markup. Returns `Some` if all of the input was consumed.
-pub fn parse_markup(src: &str, _: bool) -> Option<Vec<Green>> {
+pub fn parse_markup(src: &str, _: bool) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(src, TokenMode::Markup);
     markup(&mut p);
     p.eject()
@@ -41,7 +41,10 @@ pub fn parse_markup(src: &str, _: bool) -> Option<Vec<Green>> {
 
 /// Parse some markup without the topmost node. Returns `Some` if all of the
 /// input was consumed.
-pub fn parse_markup_elements(src: &str, mut at_start: bool) -> Option<Vec<Green>> {
+pub fn parse_markup_elements(
+    src: &str,
+    mut at_start: bool,
+) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(src, TokenMode::Markup);
     while !p.eof() {
         markup_node(&mut p, &mut at_start);
@@ -50,7 +53,7 @@ pub fn parse_markup_elements(src: &str, mut at_start: bool) -> Option<Vec<Green>
 }
 
 /// Parse a template literal. Returns `Some` if all of the input was consumed.
-pub fn parse_template(source: &str, _: bool) -> Option<Vec<Green>> {
+pub fn parse_template(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(source, TokenMode::Code);
     if !matches!(p.peek(), Some(NodeKind::LeftBracket)) {
         return None;
@@ -61,7 +64,7 @@ pub fn parse_template(source: &str, _: bool) -> Option<Vec<Green>> {
 }
 
 /// Parse a code block. Returns `Some` if all of the input was consumed.
-pub fn parse_block(source: &str, _: bool) -> Option<Vec<Green>> {
+pub fn parse_block(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(source, TokenMode::Code);
     if !matches!(p.peek(), Some(NodeKind::LeftBrace)) {
         return None;
@@ -72,7 +75,7 @@ pub fn parse_block(source: &str, _: bool) -> Option<Vec<Green>> {
 }
 
 /// Parse a comment. Returns `Some` if all of the input was consumed.
-pub fn parse_comment(source: &str, _: bool) -> Option<Vec<Green>> {
+pub fn parse_comment(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(source, TokenMode::Code);
     comment(&mut p).ok()?;
     p.eject()
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 31c918a8..a37cb9c6 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -48,9 +48,9 @@ impl<'s> Parser<'s> {
     }
 
     /// End the parsing process and return multiple children.
-    pub fn eject(self) -> Option<Vec<Green>> {
+    pub fn eject(self) -> Option<(Vec<Green>, bool)>{
         if self.eof() && self.group_success() {
-            Some(self.children)
+            Some((self.children, self.tokens.was_unterminated()))
         } else {
             None
         }
@@ -97,8 +97,9 @@ impl<'s> Parser<'s> {
 
     /// End the parsing process and return multiple children, even if there
     /// remains stuff in the string.
-    pub fn eject_partial(self) -> Option<Vec<Green>> {
-        self.group_success().then(|| self.children)
+    pub fn eject_partial(self) -> Option<(Vec<Green>, bool)> {
+        self.group_success()
+            .then(|| (self.children, self.tokens.was_unterminated()))
     }
 
     /// Whether the end of the source string or group is reached.
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 27ec046d..7be31fe1 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -13,6 +13,7 @@ use crate::util::EcoString;
 pub struct Tokens<'s> {
     s: Scanner<'s>,
     mode: TokenMode,
+    has_unterminated: bool,
 }
 
 /// What kind of tokens to emit.
@@ -28,7 +29,11 @@ impl<'s> Tokens<'s> {
     /// Create a new token iterator with the given mode.
     #[inline]
     pub fn new(src: &'s str, mode: TokenMode) -> Self {
-        Self { s: Scanner::new(src), mode }
+        Self {
+            s: Scanner::new(src),
+            mode,
+            has_unterminated: false,
+        }
     }
 
     /// Get the current token mode.
@@ -63,6 +68,12 @@ impl<'s> Tokens<'s> {
     pub fn scanner(&self) -> Scanner<'s> {
         self.s
     }
+
+    /// Whether the last token was unterminated.
+    #[inline]
+    pub fn was_unterminated(&self) -> bool {
+        self.has_unterminated
+    }
 }
 
 impl<'s> Iterator for Tokens<'s> {
@@ -248,6 +259,7 @@ impl<'s> Tokens<'s> {
                             )
                         }
                     } else {
+                        self.has_unterminated = true;
                         NodeKind::Error(
                             ErrorPos::End,
                             "expected closing brace".into(),
@@ -346,6 +358,7 @@ impl<'s> Tokens<'s> {
             let remaining = backticks - found;
             let noun = if remaining == 1 { "backtick" } else { "backticks" };
 
+            self.has_unterminated = true;
             NodeKind::Error(
                 ErrorPos::End,
                 if found == 0 {
@@ -393,6 +406,7 @@ impl<'s> Tokens<'s> {
                 display,
             }))
         } else {
+            self.has_unterminated = true;
             NodeKind::Error(
                 ErrorPos::End,
                 if !display || (!escaped && dollar) {
@@ -481,18 +495,23 @@ impl<'s> Tokens<'s> {
         if self.s.eat_if('"') {
             NodeKind::Str(string)
         } else {
+            self.has_unterminated = true;
             NodeKind::Error(ErrorPos::End, "expected quote".into())
         }
     }
 
     fn line_comment(&mut self) -> NodeKind {
         self.s.eat_until(is_newline);
+        if self.s.peek().is_none() {
+            self.has_unterminated = true;
+        }
         NodeKind::LineComment
     }
 
     fn block_comment(&mut self) -> NodeKind {
         let mut state = '_';
         let mut depth = 1;
+        let mut terminated = false;
 
         // Find the first `*/` that does not correspond to a nested `/*`.
         while let Some(c) = self.s.eat() {
@@ -500,6 +519,7 @@ impl<'s> Tokens<'s> {
                 ('*', '/') => {
                     depth -= 1;
                     if depth == 0 {
+                        terminated = true;
                         break;
                     }
                     '_'
@@ -512,6 +532,10 @@ impl<'s> Tokens<'s> {
             }
         }
 
+        if !terminated {
+            self.has_unterminated = true;
+        }
+
         NodeKind::BlockComment
     }
 
-- 
cgit v1.2.3


From fdb9d0743d73c278136b9254286fdc4be71c42a5 Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Thu, 18 Nov 2021 16:21:45 +0100
Subject: Refactoring and bugfixes

---
 src/parse/mod.rs    | 34 +++++++++++++++++++++++-----------
 src/parse/parser.rs |  2 +-
 2 files changed, 24 insertions(+), 12 deletions(-)

(limited to 'src/parse')

diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index f2fae5f2..f1f1e8b6 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -32,6 +32,13 @@ pub fn parse_atomic(src: &str, _: bool) -> Option<(Vec<Green>, bool)> {
     p.eject_partial()
 }
 
+/// Parse an atomic primary. Returns `Some` if all of the input was consumed.
+pub fn parse_atomic_markup(src: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::new(src, TokenMode::Markup);
+    markup_expr(&mut p);
+    p.eject_partial()
+}
+
 /// Parse some markup. Returns `Some` if all of the input was consumed.
 pub fn parse_markup(src: &str, _: bool) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(src, TokenMode::Markup);
@@ -171,17 +178,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
         | NodeKind::While
         | NodeKind::For
         | NodeKind::Import
-        | NodeKind::Include => {
-            let stmt = matches!(token, NodeKind::Let | NodeKind::Set | NodeKind::Import);
-            let group = if stmt { Group::Stmt } else { Group::Expr };
-
-            p.start_group(group);
-            let res = expr_prec(p, true, 0);
-            if stmt && res.is_ok() && !p.eof() {
-                p.expected_at("semicolon or line break");
-            }
-            p.end_group();
-        }
+        | NodeKind::Include => markup_expr(p),
 
         // Block and template.
         NodeKind::LeftBrace => block(p),
@@ -222,6 +219,21 @@ fn enum_node(p: &mut Parser) {
     });
 }
 
+/// Parse an expression within markup mode.
+fn markup_expr(p: &mut Parser) {
+    if let Some(token) = p.peek() {
+        let stmt = matches!(token, NodeKind::Let | NodeKind::Set | NodeKind::Import);
+        let group = if stmt { Group::Stmt } else { Group::Expr };
+
+        p.start_group(group);
+        let res = expr_prec(p, true, 0);
+        if stmt && res.is_ok() && !p.eof() {
+            p.expected_at("semicolon or line break");
+        }
+        p.end_group();
+    }
+}
+
 /// Parse an expression.
 fn expr(p: &mut Parser) -> ParseResult {
     expr_prec(p, false, 0)
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index a37cb9c6..06cb1578 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -48,7 +48,7 @@ impl<'s> Parser<'s> {
     }
 
     /// End the parsing process and return multiple children.
-    pub fn eject(self) -> Option<(Vec<Green>, bool)>{
+    pub fn eject(self) -> Option<(Vec<Green>, bool)> {
         if self.eof() && self.group_success() {
             Some((self.children, self.tokens.was_unterminated()))
         } else {
-- 
cgit v1.2.3


From edc686d7384470068858e16f2926cf50f31b2c90 Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Sat, 27 Nov 2021 16:10:22 +0100
Subject: Make incremental parsing simpler and move it somewhere else

---
 src/parse/incremental.rs | 661 +++++++++++++++++++++++++++++++++++++++++++++++
 src/parse/mod.rs         |   2 +
 2 files changed, 663 insertions(+)
 create mode 100644 src/parse/incremental.rs

(limited to 'src/parse')

diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs
new file mode 100644
index 00000000..9c912aae
--- /dev/null
+++ b/src/parse/incremental.rs
@@ -0,0 +1,661 @@
+use std::ops::Range;
+use std::rc::Rc;
+
+use crate::syntax::{Green, GreenNode, NodeKind, Span};
+
+use super::{
+    parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup,
+    parse_markup_elements, parse_template, TokenMode,
+};
+
+/// Allows partial refreshs of the [`Green`] node tree.
+///
+/// This struct holds a description of a change. Its methods can be used to try
+/// and apply the change to a green tree.
+pub struct Reparser<'a> {
+    /// The new source code, with the change applied.
+    src: &'a str,
+    /// Which range in the old source file was changed.
+    replace_range: Span,
+    /// How many characters replaced the text in `replacement_range`.
+    replace_len: usize,
+}
+
+impl<'a> Reparser<'a> {
+    /// Create a new reparser.
+    pub fn new(src: &'a str, replace_range: Span, replace_len: usize) -> Self {
+        Self { src, replace_range, replace_len }
+    }
+}
+
+impl Reparser<'_> {
+    /// Find the innermost child that is incremental safe.
+    pub fn reparse(&self, green: &mut GreenNode) -> Result<Range<usize>, ()> {
+        self.reparse_step(green, 0, TokenMode::Markup, true)
+    }
+
+    fn reparse_step(
+        &self,
+        green: &mut GreenNode,
+        mut offset: usize,
+        parent_mode: TokenMode,
+        outermost: bool,
+    ) -> Result<Range<usize>, ()> {
+        let kind = green.kind().clone();
+        let mode = kind.mode().unwrap_or(parent_mode);
+
+        let mut loop_result = None;
+        let mut child_at_start = true;
+        let last = green.children().len() - 1;
+        let mut start = None;
+
+        for (i, child) in green.children_mut().iter_mut().enumerate() {
+            let child_span =
+                Span::new(self.replace_range.source, offset, offset + child.len());
+
+            // We look for the start in the element but we only take a position
+            // at the right border if this is markup or the last element.
+            //
+            // This is because in Markup mode, we want to examine all nodes
+            // touching a replacement but in code we want to atomically replace.
+            if child_span.contains(self.replace_range.start)
+                && (mode == TokenMode::Markup
+                    || self.replace_range.start != child_span.end
+                    || self.replace_range.len() == 0
+                    || i == last)
+            {
+                start = Some((i, offset));
+                break;
+            }
+
+            offset += child.len();
+            child_at_start = child.kind().is_at_start(child_at_start);
+        }
+
+        let (start_idx, start_offset) = start.ok_or(())?;
+
+        for (i, child) in (green.children_mut()[start_idx ..]).iter_mut().enumerate() {
+            let i = i + start_idx;
+            let child_span =
+                Span::new(self.replace_range.source, offset, offset + child.len());
+
+            // Similarly to above, the end of the edit must be in the node but
+            // if it is at the edge and we are in markup node, we also want its
+            // neighbor!
+            if child_span.contains(self.replace_range.end)
+                && (mode != TokenMode::Markup
+                    || self.replace_range.end != child_span.end
+                    || i == last)
+            {
+                loop_result = Some((
+                    start_idx .. i + 1,
+                    Span::new(
+                        self.replace_range.source,
+                        start_offset,
+                        offset + child.len(),
+                    ),
+                    i == last && outermost,
+                    child.kind().clone(),
+                ));
+                break;
+            } else if mode != TokenMode::Markup || !child.kind().post().markup_safe() {
+                break;
+            }
+
+            offset += child.len();
+        }
+
+        let (child_idx_range, child_span, child_outermost, child_kind) =
+            loop_result.ok_or(())?;
+
+        if child_idx_range.len() == 1 {
+            let idx = child_idx_range.start;
+            let child = &mut green.children_mut()[idx];
+
+            let old_len = child.len();
+            // First, we try if the child has another, more specific applicable child.
+            if !child_kind.post().unsafe_interior() {
+                if let Ok(range) = match child {
+                    Green::Node(n) => self.reparse_step(
+                        Rc::make_mut(n),
+                        start_offset,
+                        kind.mode().unwrap_or(TokenMode::Code),
+                        child_outermost,
+                    ),
+                    Green::Token(_) => Err(()),
+                } {
+                    let new_len = child.len();
+                    green.update_child_len(new_len, old_len);
+                    return Ok(range);
+                }
+            }
+        }
+
+        debug_assert_ne!(child_idx_range.len(), 0);
+
+        if mode == TokenMode::Code && child_idx_range.len() > 1 {
+            return Err(());
+        }
+
+        // We now have a child that we can replace and a function to do so.
+        let (func, policy) =
+            child_kind.reparsing_function(kind.mode().unwrap_or(TokenMode::Code));
+        let func = func?;
+
+        let src_span = inserted_span(child_span, self.replace_range, self.replace_len);
+        let recompile_range = if policy == Postcondition::AtomicPrimary {
+            src_span.start .. self.src.len()
+        } else {
+            src_span.to_range()
+        };
+
+        let (mut new_children, unterminated) =
+            func(&self.src[recompile_range], child_at_start).ok_or(())?;
+
+        // Do not accept unclosed nodes if the old node did not use to be at the
+        // right edge of the tree.
+        if !child_outermost && unterminated {
+            return Err(());
+        }
+
+        let insertion = match check_invariants(
+            &new_children,
+            green.children(),
+            child_idx_range.clone(),
+            child_at_start,
+            mode,
+            src_span,
+            policy,
+        ) {
+            InvariantResult::Ok => Ok(new_children),
+            InvariantResult::UseFirst => Ok(vec![std::mem::take(&mut new_children[0])]),
+            InvariantResult::Error => Err(()),
+        }?;
+
+        green.replace_child_range(child_idx_range, insertion);
+
+        Ok(src_span.to_range())
+    }
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+enum InvariantResult {
+    Ok,
+    UseFirst,
+    Error,
+}
+
+fn check_invariants(
+    use_children: &[Green],
+    old_children: &[Green],
+    child_idx_range: Range<usize>,
+    child_at_start: bool,
+    mode: TokenMode,
+    src_span: Span,
+    policy: Postcondition,
+) -> InvariantResult {
+    let (new_children, ok) = if policy == Postcondition::AtomicPrimary {
+        if use_children.iter().map(Green::len).sum::<usize>() == src_span.len() {
+            (use_children, InvariantResult::Ok)
+        } else if use_children.len() == 1 && use_children[0].len() == src_span.len() {
+            (&use_children[0 .. 1], InvariantResult::UseFirst)
+        } else {
+            return InvariantResult::Error;
+        }
+    } else {
+        (use_children, InvariantResult::Ok)
+    };
+
+    let child_mode = old_children[child_idx_range.start]
+        .kind()
+        .mode()
+        .unwrap_or(TokenMode::Code);
+
+    // Check if the children / child has the right type.
+    let same_kind = match policy {
+        Postcondition::SameKind(x) => x.map_or(true, |x| x == child_mode),
+        _ => false,
+    };
+
+    if same_kind || policy == Postcondition::AtomicPrimary {
+        if new_children.len() != 1 {
+            return InvariantResult::Error;
+        }
+
+        if same_kind {
+            if old_children[child_idx_range.start].kind() != new_children[0].kind() {
+                return InvariantResult::Error;
+            }
+        }
+    }
+
+    // Check if the neighbor invariants are still true.
+    if mode == TokenMode::Markup {
+        if child_idx_range.start > 0 {
+            if old_children[child_idx_range.start - 1].kind().pre()
+                == Precondition::RightWhitespace
+                && !new_children[0].kind().is_whitespace()
+            {
+                return InvariantResult::Error;
+            }
+        }
+
+        if new_children.last().map(|x| x.kind().pre())
+            == Some(Precondition::RightWhitespace)
+            && old_children.len() > child_idx_range.end
+        {
+            if !old_children[child_idx_range.end].kind().is_whitespace() {
+                return InvariantResult::Error;
+            }
+        }
+
+        let mut new_at_start = child_at_start;
+        for child in new_children {
+            new_at_start = child.kind().is_at_start(new_at_start);
+        }
+
+        for child in &old_children[child_idx_range.end ..] {
+            if child.kind().is_trivia() {
+                new_at_start = child.kind().is_at_start(new_at_start);
+                continue;
+            }
+
+            match child.kind().pre() {
+                Precondition::AtStart if !new_at_start => {
+                    return InvariantResult::Error;
+                }
+                Precondition::NotAtStart if new_at_start => {
+                    return InvariantResult::Error;
+                }
+                _ => {}
+            }
+            break;
+        }
+    }
+
+    ok
+}
+
+/// Create a new span by specifying a span in which a modification happened
+/// and how many characters are now in that span.
+fn inserted_span(mut source: Span, other: Span, n: usize) -> Span {
+    if !source.surrounds(other) {
+        panic!();
+    }
+
+    let len_change = n as i64 - other.len() as i64;
+    source.end = (source.end as i64 + len_change) as usize;
+    source
+}
+
+impl NodeKind {
+    /// Return the correct reparsing function given the postconditions for the
+    /// type.
+    fn reparsing_function(
+        &self,
+        parent_mode: TokenMode,
+    ) -> (
+        Result<fn(&str, bool) -> Option<(Vec<Green>, bool)>, ()>,
+        Postcondition,
+    ) {
+        let policy = self.post();
+        let mode = self.mode().unwrap_or(parent_mode);
+
+        match policy {
+            Postcondition::Unsafe | Postcondition::UnsafeLayer => (Err(()), policy),
+            Postcondition::AtomicPrimary if mode == TokenMode::Code => {
+                (Ok(parse_atomic), policy)
+            }
+            Postcondition::AtomicPrimary => (Ok(parse_atomic_markup), policy),
+            Postcondition::SameKind(x) if x == None || x == Some(mode) => {
+                let parser: fn(&str, bool) -> _ = match self {
+                    NodeKind::Template => parse_template,
+                    NodeKind::Block => parse_block,
+                    NodeKind::LineComment | NodeKind::BlockComment => parse_comment,
+                    _ => return (Err(()), policy),
+                };
+
+                (Ok(parser), policy)
+            }
+            _ => {
+                let parser: fn(&str, bool) -> _ = match mode {
+                    TokenMode::Markup if self == &Self::Markup => parse_markup,
+                    TokenMode::Markup => parse_markup_elements,
+                    _ => return (Err(()), policy),
+                };
+
+                (Ok(parser), policy)
+            }
+        }
+    }
+
+    /// Whether it is safe to do incremental parsing on this node. Never allow
+    /// non-termination errors if this is not already the last leaf node.
+    pub fn post(&self) -> Postcondition {
+        match self {
+            // Replacing parenthesis changes if the expression is balanced and
+            // is therefore not safe.
+            Self::LeftBracket
+            | Self::RightBracket
+            | Self::LeftBrace
+            | Self::RightBrace
+            | Self::LeftParen
+            | Self::RightParen => Postcondition::Unsafe,
+
+            // Replacing an operator can change whether the parent is an
+            // operation which makes it unsafe. The star can appear in markup.
+            Self::Star
+            | Self::Comma
+            | Self::Semicolon
+            | Self::Colon
+            | Self::Plus
+            | Self::Minus
+            | Self::Slash
+            | Self::Eq
+            | Self::EqEq
+            | Self::ExclEq
+            | Self::Lt
+            | Self::LtEq
+            | Self::Gt
+            | Self::GtEq
+            | Self::PlusEq
+            | Self::HyphEq
+            | Self::StarEq
+            | Self::SlashEq
+            | Self::Not
+            | Self::And
+            | Self::Or
+            | Self::With
+            | Self::Dots
+            | Self::Arrow => Postcondition::Unsafe,
+
+            // These keywords are literals and can be safely be substituted with
+            // other expressions.
+            Self::None | Self::Auto => Postcondition::AtomicPrimary,
+
+            // These keywords change what kind of expression the parent is and
+            // how far the expression would go.
+            Self::Let
+            | Self::Set
+            | Self::If
+            | Self::Else
+            | Self::For
+            | Self::In
+            | Self::While
+            | Self::Break
+            | Self::Continue
+            | Self::Return
+            | Self::Import
+            | Self::Include
+            | Self::From => Postcondition::Unsafe,
+
+            Self::Markup => Postcondition::SameKind(None),
+
+            Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)),
+
+            // These are all replaceable by other tokens.
+            Self::Parbreak
+            | Self::Linebreak
+            | Self::Text(_)
+            | Self::TextInLine(_)
+            | Self::NonBreakingSpace
+            | Self::EnDash
+            | Self::EmDash
+            | Self::Escape(_)
+            | Self::Strong
+            | Self::Emph
+            | Self::Heading
+            | Self::Enum
+            | Self::List
+            | Self::Raw(_)
+            | Self::Math(_) => Postcondition::Safe,
+
+            // Changing the heading level, enum numbering, or list bullet
+            // changes the next layer.
+            Self::EnumNumbering(_) => Postcondition::Unsafe,
+
+            // These are expressions that can be replaced by other expressions.
+            Self::Ident(_)
+            | Self::Bool(_)
+            | Self::Int(_)
+            | Self::Float(_)
+            | Self::Length(_, _)
+            | Self::Angle(_, _)
+            | Self::Percentage(_)
+            | Self::Str(_)
+            | Self::Fraction(_)
+            | Self::Array
+            | Self::Dict
+            | Self::Group => Postcondition::AtomicPrimary,
+
+            Self::Call
+            | Self::Unary
+            | Self::Binary
+            | Self::CallArgs
+            | Self::Named
+            | Self::Spread => Postcondition::UnsafeLayer,
+
+            // The closure is a bit magic with the let expression, and also it
+            // is not atomic.
+            Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer,
+
+            // These can appear as bodies and would trigger an error if they
+            // became something else.
+            Self::Template => Postcondition::SameKind(None),
+            Self::Block => Postcondition::SameKind(Some(TokenMode::Code)),
+
+            Self::ForExpr
+            | Self::WhileExpr
+            | Self::IfExpr
+            | Self::LetExpr
+            | Self::SetExpr
+            | Self::ImportExpr
+            | Self::IncludeExpr => Postcondition::AtomicPrimary,
+
+            Self::WithExpr | Self::ForPattern | Self::ImportItems => {
+                Postcondition::UnsafeLayer
+            }
+
+            // These can appear everywhere and must not change to other stuff
+            // because that could change the outer expression.
+            Self::LineComment | Self::BlockComment => Postcondition::SameKind(None),
+
+            Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe,
+        }
+    }
+
+    /// The appropriate precondition for the type.
+    pub fn pre(&self) -> Precondition {
+        match self {
+            Self::Heading | Self::Enum | Self::List => Precondition::AtStart,
+            Self::TextInLine(_) => Precondition::NotAtStart,
+            Self::Linebreak => Precondition::RightWhitespace,
+            _ => Precondition::None,
+        }
+    }
+}
+
+/// The conditions that a node has to fulfill in order to be replaced.
+///
+/// This can dictate if a node can be replaced at all and if yes, what can take
+/// its place.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum Postcondition {
+    /// Changing this node can never have an influence on the other nodes.
+    Safe,
+    /// This node has to be replaced with a single token of the same kind.
+    SameKind(Option<TokenMode>),
+    /// Changing this node into a single atomic expression is allowed if it
+    /// appears in code mode, otherwise it is safe.
+    AtomicPrimary,
+    /// Changing an unsafe layer node changes what the parents or the
+    /// surrounding nodes would be and is therefore disallowed. Change the
+    /// parents or children instead. If it appears in Markup, however, it is
+    /// safe to change.
+    UnsafeLayer,
+    /// Changing an unsafe node or any of its children will trigger undefined
+    /// behavior. Change the parents instead.
+    Unsafe,
+}
+
+/// The conditions under which a node can be inserted or remain in a tree.
+///
+/// These conditions all search the neighbors of the node and see if its
+/// existence is plausible with them present. This can be used to encode some
+/// context-free language components for incremental parsing.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum Precondition {
+    /// These nodes depend on being at the start of a line. Reparsing of safe
+    /// left neighbors has to check this invariant. Otherwise, this node is
+    /// safe.
+    AtStart,
+    /// These nodes depend on not being at the start of a line. Reparsing of
+    /// safe left neighbors has to check this invariant. Otherwise, this node is
+    /// safe.
+    NotAtStart,
+    /// These nodes must be followed by whitespace.
+    RightWhitespace,
+    /// No additional requirements.
+    None,
+}
+
+impl Postcondition {
+    pub fn unsafe_interior(&self) -> bool {
+        match self {
+            Self::Unsafe => true,
+            _ => false,
+        }
+    }
+
+    pub fn markup_safe(&self) -> bool {
+        match self {
+            Self::Safe | Self::UnsafeLayer => true,
+            Self::SameKind(tm) => tm.map_or(false, |tm| tm != TokenMode::Markup),
+            _ => false,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::parse::parse;
+    use crate::source::SourceFile;
+
+    use super::*;
+
+    #[test]
+    fn test_incremental_parse() {
+        #[track_caller]
+        fn test(prev: &str, range: Range<usize>, with: &str, incr: Range<usize>) {
+            let mut source = SourceFile::detached(prev);
+            let range = source.edit(range, with);
+            assert_eq!(range, incr);
+
+            let incr_tree = source.root();
+            assert_eq!(parse(source.src()), incr_tree);
+        }
+
+        // Test simple replacements.
+        test("hello world", 6 .. 11, "wankers", 5 .. 13);
+        test("a d e", 1 .. 3, " b c d", 0 .. 8);
+        test("a #f() e", 1 .. 6, " b c d", 0 .. 8);
+        test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9);
+        test("= A heading", 3 .. 3, "n evocative", 2 .. 15);
+        test("your thing", 5 .. 5, "a", 4 .. 11);
+        test("a your thing a", 6 .. 7, "a", 2 .. 12);
+        test("{call(); abc}", 7 .. 7, "[]", 0 .. 15);
+        test("#call() abc", 7 .. 7, "[]", 0 .. 13);
+        // test(
+        //     "hi\n- item\n- item 2\n    - item 3",
+        //     10 .. 10,
+        //     "  ",
+        //     9 .. 33,
+        // );
+        test(
+            "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])",
+            16 .. 20,
+            "none",
+            16 .. 20,
+        );
+        test(
+            "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])",
+            33 .. 42,
+            "[_gronk_]",
+            33 .. 42,
+        );
+        test(
+            "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])",
+            34 .. 41,
+            "_bar_",
+            34 .. 39,
+        );
+        test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 1 .. 9);
+        test("{let i=1; for x in range(5) {i}}", 13 .. 14, "  ", 13 .. 15);
+        test("hello {x}", 6 .. 9, "#f()", 5 .. 10);
+        test(
+            "this is -- in my opinion -- spectacular",
+            8 .. 10,
+            "---",
+            7 .. 12,
+        );
+        test(
+            "understanding `code` is complicated",
+            15 .. 15,
+            "C ",
+            14 .. 22,
+        );
+        test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17);
+        test(
+            "a #let rect with (fill: eastern)\nb",
+            16 .. 31,
+            " (stroke: conifer",
+            2 .. 34,
+        );
+
+        // Test the whitespace invariants.
+        test("hello \\ world", 7 .. 8, "a ", 6 .. 14);
+        test("hello \\ world", 7 .. 8, " a", 6 .. 14);
+        test("x = y", 1 .. 1, " + y", 0 .. 6);
+        test("x = y", 1 .. 1, " + y\n", 0 .. 10);
+        test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21);
+        test("abc\n= a heading\njoke", 3 .. 4, "\nnot ", 0 .. 19);
+        test("hey #myfriend", 4 .. 4, "\\", 0 .. 14);
+        test("hey  #myfriend", 4 .. 4, "\\", 3 .. 6);
+
+        // Test type invariants.
+        test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22);
+        test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 15);
+        test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16);
+        test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 17);
+        test("{let x = 1 {5}}", 4 .. 4, " if", 0 .. 18);
+        test("a // b c #f()", 3 .. 4, "", 0 .. 12);
+        test("{\nf()\n//g(a)\n}", 6 .. 8, "", 0 .. 12);
+        test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13);
+        test("a #while x {\n g(x) \n}  b", 11 .. 11, "//", 0 .. 26);
+        test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14);
+        test("a b c", 1 .. 1, "{[}", 0 .. 5);
+
+        // Test unclosed things.
+        test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6);
+        test(r"this \u{abcd}", 8 .. 9, "", 5 .. 12);
+        test(r"this \u{abcd} that", 12 .. 13, "", 0 .. 17);
+        test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", 0 .. 24);
+        test("a b c", 1 .. 1, " /* letters */", 0 .. 16);
+        test("a b c", 1 .. 1, " /* letters", 0 .. 16);
+        test(
+            "{if i==1 {a} else [b]; b()}",
+            12 .. 12,
+            " /* letters */",
+            1 .. 35,
+        );
+        test(
+            "{if i==1 {a} else [b]; b()}",
+            12 .. 12,
+            " /* letters",
+            0 .. 38,
+        );
+
+        test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20);
+        test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18);
+    }
+}
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index f1f1e8b6..2c421374 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -1,10 +1,12 @@
 //! Parsing and tokenization.
 
+mod incremental;
 mod parser;
 mod resolve;
 mod scanner;
 mod tokens;
 
+pub use incremental::*;
 pub use parser::*;
 pub use resolve::*;
 pub use scanner::*;
-- 
cgit v1.2.3


From e05eb5fda5d1dfeef168b6fc071b20fdbcce2dcd Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Sun, 28 Nov 2021 18:18:45 +0100
Subject: Code Review: Parser, I can't let you do this

---
 src/parse/incremental.rs | 449 ++++++++++++++++++++---------------------------
 src/parse/mod.rs         |  10 +-
 src/parse/parser.rs      |  14 +-
 src/parse/tokens.rs      |  28 ++-
 4 files changed, 210 insertions(+), 291 deletions(-)

(limited to 'src/parse')

diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs
index 9c912aae..8e52c143 100644
--- a/src/parse/incremental.rs
+++ b/src/parse/incremental.rs
@@ -1,13 +1,57 @@
 use std::ops::Range;
 use std::rc::Rc;
 
-use crate::syntax::{Green, GreenNode, NodeKind, Span};
+use crate::syntax::{Green, GreenNode, NodeKind};
 
 use super::{
     parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup,
     parse_markup_elements, parse_template, TokenMode,
 };
 
+/// The conditions that a node has to fulfill in order to be replaced.
+///
+/// This can dictate if a node can be replaced at all and if yes, what can take
+/// its place.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum Postcondition {
+    /// Changing this node can never have an influence on the other nodes.
+    Safe,
+    /// This node has to be replaced with a single token of the same kind.
+    SameKind(Option<TokenMode>),
+    /// Changing this node into a single atomic expression is allowed if it
+    /// appears in code mode, otherwise it is safe.
+    AtomicPrimary,
+    /// Changing an unsafe layer node changes what the parents or the
+    /// surrounding nodes would be and is therefore disallowed. Change the
+    /// parents or children instead. If it appears in Markup, however, it is
+    /// safe to change.
+    UnsafeLayer,
+    /// Changing an unsafe node or any of its children will trigger undefined
+    /// behavior. Change the parents instead.
+    Unsafe,
+}
+
+/// The conditions under which a node can be inserted or remain in a tree.
+///
+/// These conditions all search the neighbors of the node and see if its
+/// existence is plausible with them present. This can be used to encode some
+/// context-free language components for incremental parsing.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum Precondition {
+    /// These nodes depend on being at the start of a line. Reparsing of safe
+    /// left neighbors has to check this invariant. Otherwise, this node is
+    /// safe.
+    AtStart,
+    /// These nodes depend on not being at the start of a line. Reparsing of
+    /// safe left neighbors has to check this invariant. Otherwise, this node is
+    /// safe.
+    NotAtStart,
+    /// These nodes must be followed by whitespace.
+    RightWhitespace,
+    /// No additional requirements.
+    None,
+}
+
 /// Allows partial refreshs of the [`Green`] node tree.
 ///
 /// This struct holds a description of a change. Its methods can be used to try
@@ -16,21 +60,21 @@ pub struct Reparser<'a> {
     /// The new source code, with the change applied.
     src: &'a str,
     /// Which range in the old source file was changed.
-    replace_range: Span,
-    /// How many characters replaced the text in `replacement_range`.
+    replace_range: Range<usize>,
+    /// How many characters replaced the text in `replace_range`.
     replace_len: usize,
 }
 
 impl<'a> Reparser<'a> {
     /// Create a new reparser.
-    pub fn new(src: &'a str, replace_range: Span, replace_len: usize) -> Self {
+    pub fn new(src: &'a str, replace_range: Range<usize>, replace_len: usize) -> Self {
         Self { src, replace_range, replace_len }
     }
 }
 
 impl Reparser<'_> {
     /// Find the innermost child that is incremental safe.
-    pub fn reparse(&self, green: &mut GreenNode) -> Result<Range<usize>, ()> {
+    pub fn reparse(&self, green: &mut GreenNode) -> Option<Range<usize>> {
         self.reparse_step(green, 0, TokenMode::Markup, true)
     }
 
@@ -39,30 +83,26 @@ impl Reparser<'_> {
         green: &mut GreenNode,
         mut offset: usize,
         parent_mode: TokenMode,
-        outermost: bool,
-    ) -> Result<Range<usize>, ()> {
+        mut outermost: bool,
+    ) -> Option<Range<usize>> {
         let kind = green.kind().clone();
         let mode = kind.mode().unwrap_or(parent_mode);
 
-        let mut loop_result = None;
         let mut child_at_start = true;
-        let last = green.children().len() - 1;
+        let last = green.children().len().saturating_sub(1);
         let mut start = None;
 
         for (i, child) in green.children_mut().iter_mut().enumerate() {
-            let child_span =
-                Span::new(self.replace_range.source, offset, offset + child.len());
+            let child_span = offset .. offset + child.len();
 
             // We look for the start in the element but we only take a position
             // at the right border if this is markup or the last element.
             //
             // This is because in Markup mode, we want to examine all nodes
             // touching a replacement but in code we want to atomically replace.
-            if child_span.contains(self.replace_range.start)
-                && (mode == TokenMode::Markup
-                    || self.replace_range.start != child_span.end
-                    || self.replace_range.len() == 0
-                    || i == last)
+            if child_span.contains(&self.replace_range.start)
+                || (mode == TokenMode::Markup
+                    && self.replace_range.start == child_span.end)
             {
                 start = Some((i, offset));
                 break;
@@ -72,31 +112,21 @@ impl Reparser<'_> {
             child_at_start = child.kind().is_at_start(child_at_start);
         }
 
-        let (start_idx, start_offset) = start.ok_or(())?;
+        let (start_idx, start_offset) = start?;
+        let mut end = None;
 
-        for (i, child) in (green.children_mut()[start_idx ..]).iter_mut().enumerate() {
-            let i = i + start_idx;
-            let child_span =
-                Span::new(self.replace_range.source, offset, offset + child.len());
+        for (i, child) in green.children_mut().iter_mut().enumerate().skip(start_idx) {
+            let child_span = offset .. offset + child.len();
 
             // Similarly to above, the end of the edit must be in the node but
             // if it is at the edge and we are in markup node, we also want its
             // neighbor!
-            if child_span.contains(self.replace_range.end)
-                && (mode != TokenMode::Markup
-                    || self.replace_range.end != child_span.end
-                    || i == last)
+            if child_span.contains(&self.replace_range.end)
+                || self.replace_range.end == child_span.end
+                    && (mode != TokenMode::Markup || i == last)
             {
-                loop_result = Some((
-                    start_idx .. i + 1,
-                    Span::new(
-                        self.replace_range.source,
-                        start_offset,
-                        offset + child.len(),
-                    ),
-                    i == last && outermost,
-                    child.kind().clone(),
-                ));
+                outermost &= i == last;
+                end = Some(i);
                 break;
             } else if mode != TokenMode::Markup || !child.kind().post().markup_safe() {
                 break;
@@ -105,28 +135,30 @@ impl Reparser<'_> {
             offset += child.len();
         }
 
-        let (child_idx_range, child_span, child_outermost, child_kind) =
-            loop_result.ok_or(())?;
+        let end = end?;
+        let child_idx_range = start_idx .. end + 1;
+        let child_span = start_offset .. offset + green.children()[end].len();
+        let child_kind = green.children()[end].kind().clone();
 
         if child_idx_range.len() == 1 {
             let idx = child_idx_range.start;
             let child = &mut green.children_mut()[idx];
+            let prev_len = child.len();
 
-            let old_len = child.len();
             // First, we try if the child has another, more specific applicable child.
             if !child_kind.post().unsafe_interior() {
-                if let Ok(range) = match child {
+                if let Some(range) = match child {
                     Green::Node(n) => self.reparse_step(
                         Rc::make_mut(n),
                         start_offset,
                         kind.mode().unwrap_or(TokenMode::Code),
-                        child_outermost,
+                        outermost,
                     ),
-                    Green::Token(_) => Err(()),
+                    Green::Token(_) => None,
                 } {
                     let new_len = child.len();
-                    green.update_child_len(new_len, old_len);
-                    return Ok(range);
+                    green.update_child_len(new_len, prev_len);
+                    return Some(range);
                 }
             }
         }
@@ -134,28 +166,31 @@ impl Reparser<'_> {
         debug_assert_ne!(child_idx_range.len(), 0);
 
         if mode == TokenMode::Code && child_idx_range.len() > 1 {
-            return Err(());
+            return None;
         }
 
         // We now have a child that we can replace and a function to do so.
-        let (func, policy) =
-            child_kind.reparsing_function(kind.mode().unwrap_or(TokenMode::Code));
-        let func = func?;
+        let func =
+            child_kind.reparsing_function(kind.mode().unwrap_or(TokenMode::Code))?;
+        let policy = child_kind.post();
+
+        let len_change = self.replace_len as isize - self.replace_range.len() as isize;
+        let mut src_span = child_span;
+        src_span.end = (src_span.end as isize + len_change) as usize;
 
-        let src_span = inserted_span(child_span, self.replace_range, self.replace_len);
         let recompile_range = if policy == Postcondition::AtomicPrimary {
             src_span.start .. self.src.len()
         } else {
-            src_span.to_range()
+            src_span.clone()
         };
 
-        let (mut new_children, unterminated) =
-            func(&self.src[recompile_range], child_at_start).ok_or(())?;
+        let (mut new_children, terminated) =
+            func(&self.src[recompile_range], child_at_start)?;
 
         // Do not accept unclosed nodes if the old node did not use to be at the
         // right edge of the tree.
-        if !child_outermost && unterminated {
-            return Err(());
+        if !outermost && !terminated {
+            return None;
         }
 
         let insertion = match check_invariants(
@@ -164,17 +199,17 @@ impl Reparser<'_> {
             child_idx_range.clone(),
             child_at_start,
             mode,
-            src_span,
+            src_span.clone(),
             policy,
         ) {
-            InvariantResult::Ok => Ok(new_children),
-            InvariantResult::UseFirst => Ok(vec![std::mem::take(&mut new_children[0])]),
-            InvariantResult::Error => Err(()),
+            InvariantResult::Ok => Some(new_children),
+            InvariantResult::UseFirst => Some(vec![std::mem::take(&mut new_children[0])]),
+            InvariantResult::Error => None,
         }?;
 
         green.replace_child_range(child_idx_range, insertion);
 
-        Ok(src_span.to_range())
+        Some(src_span)
     }
 }
 
@@ -191,7 +226,7 @@ fn check_invariants(
     child_idx_range: Range<usize>,
     child_at_start: bool,
     mode: TokenMode,
-    src_span: Span,
+    src_span: Range<usize>,
     policy: Postcondition,
 ) -> InvariantResult {
     let (new_children, ok) = if policy == Postcondition::AtomicPrimary {
@@ -206,10 +241,7 @@ fn check_invariants(
         (use_children, InvariantResult::Ok)
     };
 
-    let child_mode = old_children[child_idx_range.start]
-        .kind()
-        .mode()
-        .unwrap_or(TokenMode::Code);
+    let child_mode = old_children[child_idx_range.start].kind().mode().unwrap_or(mode);
 
     // Check if the children / child has the right type.
     let same_kind = match policy {
@@ -249,25 +281,22 @@ fn check_invariants(
             }
         }
 
-        let mut new_at_start = child_at_start;
+        let mut post_at_start = child_at_start;
         for child in new_children {
-            new_at_start = child.kind().is_at_start(new_at_start);
+            post_at_start = child.kind().is_at_start(post_at_start);
         }
 
         for child in &old_children[child_idx_range.end ..] {
             if child.kind().is_trivia() {
-                new_at_start = child.kind().is_at_start(new_at_start);
+                post_at_start = child.kind().is_at_start(post_at_start);
                 continue;
             }
 
-            match child.kind().pre() {
-                Precondition::AtStart if !new_at_start => {
-                    return InvariantResult::Error;
-                }
-                Precondition::NotAtStart if new_at_start => {
-                    return InvariantResult::Error;
-                }
-                _ => {}
+            let pre = child.kind().pre();
+            if pre == Precondition::AtStart && !post_at_start
+                || pre == Precondition::NotAtStart && post_at_start
+            {
+                return InvariantResult::Error;
             }
             break;
         }
@@ -276,56 +305,31 @@ fn check_invariants(
     ok
 }
 
-/// Create a new span by specifying a span in which a modification happened
-/// and how many characters are now in that span.
-fn inserted_span(mut source: Span, other: Span, n: usize) -> Span {
-    if !source.surrounds(other) {
-        panic!();
-    }
-
-    let len_change = n as i64 - other.len() as i64;
-    source.end = (source.end as i64 + len_change) as usize;
-    source
-}
-
 impl NodeKind {
     /// Return the correct reparsing function given the postconditions for the
     /// type.
     fn reparsing_function(
         &self,
         parent_mode: TokenMode,
-    ) -> (
-        Result<fn(&str, bool) -> Option<(Vec<Green>, bool)>, ()>,
-        Postcondition,
-    ) {
+    ) -> Option<fn(&str, bool) -> Option<(Vec<Green>, bool)>> {
         let policy = self.post();
         let mode = self.mode().unwrap_or(parent_mode);
 
         match policy {
-            Postcondition::Unsafe | Postcondition::UnsafeLayer => (Err(()), policy),
-            Postcondition::AtomicPrimary if mode == TokenMode::Code => {
-                (Ok(parse_atomic), policy)
-            }
-            Postcondition::AtomicPrimary => (Ok(parse_atomic_markup), policy),
-            Postcondition::SameKind(x) if x == None || x == Some(mode) => {
-                let parser: fn(&str, bool) -> _ = match self {
-                    NodeKind::Template => parse_template,
-                    NodeKind::Block => parse_block,
-                    NodeKind::LineComment | NodeKind::BlockComment => parse_comment,
-                    _ => return (Err(()), policy),
-                };
-
-                (Ok(parser), policy)
-            }
-            _ => {
-                let parser: fn(&str, bool) -> _ = match mode {
-                    TokenMode::Markup if self == &Self::Markup => parse_markup,
-                    TokenMode::Markup => parse_markup_elements,
-                    _ => return (Err(()), policy),
-                };
-
-                (Ok(parser), policy)
-            }
+            Postcondition::Unsafe | Postcondition::UnsafeLayer => None,
+            Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic),
+            Postcondition::AtomicPrimary => Some(parse_atomic_markup),
+            Postcondition::SameKind(x) if x == None || x == Some(mode) => match self {
+                NodeKind::Template => Some(parse_template),
+                NodeKind::Block => Some(parse_block),
+                NodeKind::LineComment | NodeKind::BlockComment => Some(parse_comment),
+                _ => None,
+            },
+            _ => match mode {
+                TokenMode::Markup if self == &Self::Markup => Some(parse_markup),
+                TokenMode::Markup => Some(parse_markup_elements),
+                _ => return None,
+            },
         }
     }
 
@@ -369,10 +373,6 @@ impl NodeKind {
             | Self::Dots
             | Self::Arrow => Postcondition::Unsafe,
 
-            // These keywords are literals and can be safely be substituted with
-            // other expressions.
-            Self::None | Self::Auto => Postcondition::AtomicPrimary,
-
             // These keywords change what kind of expression the parent is and
             // how far the expression would go.
             Self::Let
@@ -389,45 +389,14 @@ impl NodeKind {
             | Self::Include
             | Self::From => Postcondition::Unsafe,
 
-            Self::Markup => Postcondition::SameKind(None),
-
-            Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)),
-
-            // These are all replaceable by other tokens.
-            Self::Parbreak
-            | Self::Linebreak
-            | Self::Text(_)
-            | Self::TextInLine(_)
-            | Self::NonBreakingSpace
-            | Self::EnDash
-            | Self::EmDash
-            | Self::Escape(_)
-            | Self::Strong
-            | Self::Emph
-            | Self::Heading
-            | Self::Enum
-            | Self::List
-            | Self::Raw(_)
-            | Self::Math(_) => Postcondition::Safe,
-
             // Changing the heading level, enum numbering, or list bullet
             // changes the next layer.
             Self::EnumNumbering(_) => Postcondition::Unsafe,
 
-            // These are expressions that can be replaced by other expressions.
-            Self::Ident(_)
-            | Self::Bool(_)
-            | Self::Int(_)
-            | Self::Float(_)
-            | Self::Length(_, _)
-            | Self::Angle(_, _)
-            | Self::Percentage(_)
-            | Self::Str(_)
-            | Self::Fraction(_)
-            | Self::Array
-            | Self::Dict
-            | Self::Group => Postcondition::AtomicPrimary,
+            Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe,
 
+            // These are complex expressions which may screw with their
+            // environments.
             Self::Call
             | Self::Unary
             | Self::Binary
@@ -439,11 +408,44 @@ impl NodeKind {
             // is not atomic.
             Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer,
 
+            // Missing these creates errors for the parents.
+            Self::WithExpr | Self::ForPattern | Self::ImportItems => {
+                Postcondition::UnsafeLayer
+            }
+
+            // Only markup is expected at the points where it does occur.
+            Self::Markup => Postcondition::SameKind(None),
+
+            // These can appear everywhere and must not change to other stuff
+            // because that could change the outer expression.
+            Self::LineComment | Self::BlockComment => Postcondition::SameKind(None),
+
             // These can appear as bodies and would trigger an error if they
             // became something else.
             Self::Template => Postcondition::SameKind(None),
             Self::Block => Postcondition::SameKind(Some(TokenMode::Code)),
 
+            // Whitespace in code mode has to remain whitespace or else the type
+            // of things would change.
+            Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)),
+
+            // These are expressions that can be replaced by other expressions.
+            Self::Ident(_)
+            | Self::Bool(_)
+            | Self::Int(_)
+            | Self::Float(_)
+            | Self::Length(_, _)
+            | Self::Angle(_, _)
+            | Self::Percentage(_)
+            | Self::Str(_)
+            | Self::Fraction(_)
+            | Self::Array
+            | Self::Dict
+            | Self::Group
+            | Self::None
+            | Self::Auto => Postcondition::AtomicPrimary,
+
+            // More complex, but still an expression.
             Self::ForExpr
             | Self::WhileExpr
             | Self::IfExpr
@@ -452,15 +454,22 @@ impl NodeKind {
             | Self::ImportExpr
             | Self::IncludeExpr => Postcondition::AtomicPrimary,
 
-            Self::WithExpr | Self::ForPattern | Self::ImportItems => {
-                Postcondition::UnsafeLayer
-            }
-
-            // These can appear everywhere and must not change to other stuff
-            // because that could change the outer expression.
-            Self::LineComment | Self::BlockComment => Postcondition::SameKind(None),
-
-            Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe,
+            // These are all replaceable by other tokens.
+            Self::Parbreak
+            | Self::Linebreak
+            | Self::Text(_)
+            | Self::TextInLine(_)
+            | Self::NonBreakingSpace
+            | Self::EnDash
+            | Self::EmDash
+            | Self::Escape(_)
+            | Self::Strong
+            | Self::Emph
+            | Self::Heading
+            | Self::Enum
+            | Self::List
+            | Self::Raw(_)
+            | Self::Math(_) => Postcondition::Safe,
         }
     }
 
@@ -475,50 +484,6 @@ impl NodeKind {
     }
 }
 
-/// The conditions that a node has to fulfill in order to be replaced.
-///
-/// This can dictate if a node can be replaced at all and if yes, what can take
-/// its place.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum Postcondition {
-    /// Changing this node can never have an influence on the other nodes.
-    Safe,
-    /// This node has to be replaced with a single token of the same kind.
-    SameKind(Option<TokenMode>),
-    /// Changing this node into a single atomic expression is allowed if it
-    /// appears in code mode, otherwise it is safe.
-    AtomicPrimary,
-    /// Changing an unsafe layer node changes what the parents or the
-    /// surrounding nodes would be and is therefore disallowed. Change the
-    /// parents or children instead. If it appears in Markup, however, it is
-    /// safe to change.
-    UnsafeLayer,
-    /// Changing an unsafe node or any of its children will trigger undefined
-    /// behavior. Change the parents instead.
-    Unsafe,
-}
-
-/// The conditions under which a node can be inserted or remain in a tree.
-///
-/// These conditions all search the neighbors of the node and see if its
-/// existence is plausible with them present. This can be used to encode some
-/// context-free language components for incremental parsing.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum Precondition {
-    /// These nodes depend on being at the start of a line. Reparsing of safe
-    /// left neighbors has to check this invariant. Otherwise, this node is
-    /// safe.
-    AtStart,
-    /// These nodes depend on not being at the start of a line. Reparsing of
-    /// safe left neighbors has to check this invariant. Otherwise, this node is
-    /// safe.
-    NotAtStart,
-    /// These nodes must be followed by whitespace.
-    RightWhitespace,
-    /// No additional requirements.
-    None,
-}
-
 impl Postcondition {
     pub fn unsafe_interior(&self) -> bool {
         match self {
@@ -544,6 +509,7 @@ mod tests {
     use super::*;
 
     #[test]
+    #[rustfmt::skip]
     fn test_incremental_parse() {
         #[track_caller]
         fn test(prev: &str, range: Range<usize>, with: &str, incr: Range<usize>) {
@@ -551,12 +517,14 @@ mod tests {
             let range = source.edit(range, with);
             assert_eq!(range, incr);
 
-            let incr_tree = source.root();
+            let incr_tree = source.root().clone();
             assert_eq!(parse(source.src()), incr_tree);
         }
 
         // Test simple replacements.
-        test("hello world", 6 .. 11, "wankers", 5 .. 13);
+        test("hello world", 6 .. 11, "walkers", 5 .. 13);
+        test("some content", 0..12, "", 0..0);
+        test("", 0..0, "do it", 0..5);
         test("a d e", 1 .. 3, " b c d", 0 .. 8);
         test("a #f() e", 1 .. 6, " b c d", 0 .. 8);
         test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9);
@@ -564,53 +532,18 @@ mod tests {
         test("your thing", 5 .. 5, "a", 4 .. 11);
         test("a your thing a", 6 .. 7, "a", 2 .. 12);
         test("{call(); abc}", 7 .. 7, "[]", 0 .. 15);
-        test("#call() abc", 7 .. 7, "[]", 0 .. 13);
-        // test(
-        //     "hi\n- item\n- item 2\n    - item 3",
-        //     10 .. 10,
-        //     "  ",
-        //     9 .. 33,
-        // );
-        test(
-            "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])",
-            16 .. 20,
-            "none",
-            16 .. 20,
-        );
-        test(
-            "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])",
-            33 .. 42,
-            "[_gronk_]",
-            33 .. 42,
-        );
-        test(
-            "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])",
-            34 .. 41,
-            "_bar_",
-            34 .. 39,
-        );
+        test("#call() abc", 7 .. 7, "[]", 0 .. 10);
+        // test("hi\n- item\n- item 2\n    - item 3", 10 .. 10, "  ", 9 .. 33);
+        test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 16 .. 20);
+        test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33 .. 42);
+        test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 34 .. 39);
         test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 1 .. 9);
-        test("{let i=1; for x in range(5) {i}}", 13 .. 14, "  ", 13 .. 15);
+        test("{let i=1; for x in range(5) {i}}", 13 .. 14, "  ", 10 .. 32);
         test("hello {x}", 6 .. 9, "#f()", 5 .. 10);
-        test(
-            "this is -- in my opinion -- spectacular",
-            8 .. 10,
-            "---",
-            7 .. 12,
-        );
-        test(
-            "understanding `code` is complicated",
-            15 .. 15,
-            "C ",
-            14 .. 22,
-        );
+        test("this is -- in my opinion -- spectacular", 8 .. 10, "---", 7 .. 12);
+        test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22);
         test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17);
-        test(
-            "a #let rect with (fill: eastern)\nb",
-            16 .. 31,
-            " (stroke: conifer",
-            2 .. 34,
-        );
+        test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34);
 
         // Test the whitespace invariants.
         test("hello \\ world", 7 .. 8, "a ", 6 .. 14);
@@ -642,18 +575,8 @@ mod tests {
         test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", 0 .. 24);
         test("a b c", 1 .. 1, " /* letters */", 0 .. 16);
         test("a b c", 1 .. 1, " /* letters", 0 .. 16);
-        test(
-            "{if i==1 {a} else [b]; b()}",
-            12 .. 12,
-            " /* letters */",
-            1 .. 35,
-        );
-        test(
-            "{if i==1 {a} else [b]; b()}",
-            12 .. 12,
-            " /* letters",
-            0 .. 38,
-        );
+        test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35);
+        test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38);
 
         test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20);
         test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18);
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 2c421374..2c5afb6b 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -64,7 +64,7 @@ pub fn parse_markup_elements(
 /// Parse a template literal. Returns `Some` if all of the input was consumed.
 pub fn parse_template(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(source, TokenMode::Code);
-    if !matches!(p.peek(), Some(NodeKind::LeftBracket)) {
+    if !p.at(&NodeKind::LeftBracket) {
         return None;
     }
 
@@ -75,7 +75,7 @@ pub fn parse_template(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
 /// Parse a code block. Returns `Some` if all of the input was consumed.
 pub fn parse_block(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(source, TokenMode::Code);
-    if !matches!(p.peek(), Some(NodeKind::LeftBrace)) {
+    if !p.at(&NodeKind::LeftBrace) {
         return None;
     }
 
@@ -252,14 +252,14 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
     let marker = p.marker();
 
     // Start the unary expression.
-    match (!atomic).then(|| p.peek().and_then(UnOp::from_token)).flatten() {
-        Some(op) => {
+    match p.peek().and_then(UnOp::from_token) {
+        Some(op) if !atomic => {
             p.eat();
             let prec = op.precedence();
             expr_prec(p, atomic, prec)?;
             marker.end(p, NodeKind::Unary);
         }
-        None => primary(p, atomic)?,
+        _ => primary(p, atomic)?,
     };
 
     loop {
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 06cb1578..ade9b5df 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -22,7 +22,7 @@ pub struct Parser<'s> {
     /// The children of the currently built node.
     children: Vec<Green>,
     /// Whether the last group was terminated.
-    last_group_terminated: bool,
+    last_terminated: bool,
 }
 
 impl<'s> Parser<'s> {
@@ -38,7 +38,7 @@ impl<'s> Parser<'s> {
             current_start: 0,
             groups: vec![],
             children: vec![],
-            last_group_terminated: true,
+            last_terminated: true,
         }
     }
 
@@ -50,7 +50,7 @@ impl<'s> Parser<'s> {
     /// End the parsing process and return multiple children.
     pub fn eject(self) -> Option<(Vec<Green>, bool)> {
         if self.eof() && self.group_success() {
-            Some((self.children, self.tokens.was_unterminated()))
+            Some((self.children, self.tokens.was_terminated()))
         } else {
             None
         }
@@ -99,7 +99,7 @@ impl<'s> Parser<'s> {
     /// remains stuff in the string.
     pub fn eject_partial(self) -> Option<(Vec<Green>, bool)> {
         self.group_success()
-            .then(|| (self.children, self.tokens.was_unterminated()))
+            .then(|| (self.children, self.tokens.was_terminated()))
     }
 
     /// Whether the end of the source string or group is reached.
@@ -244,7 +244,7 @@ impl<'s> Parser<'s> {
         let group = self.groups.pop().expect("no started group");
         self.tokens.set_mode(group.prev_mode);
         self.repeek();
-        self.last_group_terminated = true;
+        self.last_terminated = true;
 
         let mut rescan = self.tokens.mode() != group_mode;
 
@@ -263,7 +263,7 @@ impl<'s> Parser<'s> {
                 rescan = false;
             } else if required {
                 self.push_error(format_eco!("expected {}", end));
-                self.last_group_terminated = false;
+                self.last_terminated = false;
             }
         }
 
@@ -283,7 +283,7 @@ impl<'s> Parser<'s> {
 
     /// Check if the group processing was successfully terminated.
     pub fn group_success(&self) -> bool {
-        self.last_group_terminated && self.groups.is_empty()
+        self.last_terminated && self.groups.is_empty()
     }
 
     /// Low-level bump that consumes exactly one token without special trivia
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 7be31fe1..836e8cf1 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -13,7 +13,7 @@ use crate::util::EcoString;
 pub struct Tokens<'s> {
     s: Scanner<'s>,
     mode: TokenMode,
-    has_unterminated: bool,
+    was_terminated: bool,
 }
 
 /// What kind of tokens to emit.
@@ -32,7 +32,7 @@ impl<'s> Tokens<'s> {
         Self {
             s: Scanner::new(src),
             mode,
-            has_unterminated: false,
+            was_terminated: true,
         }
     }
 
@@ -69,10 +69,10 @@ impl<'s> Tokens<'s> {
         self.s
     }
 
-    /// Whether the last token was unterminated.
+    /// Whether the last token was terminated.
     #[inline]
-    pub fn was_unterminated(&self) -> bool {
-        self.has_unterminated
+    pub fn was_terminated(&self) -> bool {
+        self.was_terminated
     }
 }
 
@@ -259,7 +259,7 @@ impl<'s> Tokens<'s> {
                             )
                         }
                     } else {
-                        self.has_unterminated = true;
+                        self.was_terminated = false;
                         NodeKind::Error(
                             ErrorPos::End,
                             "expected closing brace".into(),
@@ -358,7 +358,7 @@ impl<'s> Tokens<'s> {
             let remaining = backticks - found;
             let noun = if remaining == 1 { "backtick" } else { "backticks" };
 
-            self.has_unterminated = true;
+            self.was_terminated = false;
             NodeKind::Error(
                 ErrorPos::End,
                 if found == 0 {
@@ -406,7 +406,7 @@ impl<'s> Tokens<'s> {
                 display,
             }))
         } else {
-            self.has_unterminated = true;
+            self.was_terminated = false;
             NodeKind::Error(
                 ErrorPos::End,
                 if !display || (!escaped && dollar) {
@@ -495,7 +495,7 @@ impl<'s> Tokens<'s> {
         if self.s.eat_if('"') {
             NodeKind::Str(string)
         } else {
-            self.has_unterminated = true;
+            self.was_terminated = false;
             NodeKind::Error(ErrorPos::End, "expected quote".into())
         }
     }
@@ -503,7 +503,7 @@ impl<'s> Tokens<'s> {
     fn line_comment(&mut self) -> NodeKind {
         self.s.eat_until(is_newline);
         if self.s.peek().is_none() {
-            self.has_unterminated = true;
+            self.was_terminated = false;
         }
         NodeKind::LineComment
     }
@@ -511,7 +511,7 @@ impl<'s> Tokens<'s> {
     fn block_comment(&mut self) -> NodeKind {
         let mut state = '_';
         let mut depth = 1;
-        let mut terminated = false;
+        self.was_terminated = false;
 
         // Find the first `*/` that does not correspond to a nested `/*`.
         while let Some(c) = self.s.eat() {
@@ -519,7 +519,7 @@ impl<'s> Tokens<'s> {
                 ('*', '/') => {
                     depth -= 1;
                     if depth == 0 {
-                        terminated = true;
+                        self.was_terminated = true;
                         break;
                     }
                     '_'
@@ -532,10 +532,6 @@ impl<'s> Tokens<'s> {
             }
         }
 
-        if !terminated {
-            self.has_unterminated = true;
-        }
-
         NodeKind::BlockComment
     }
 
-- 
cgit v1.2.3


From 12f7335ac365759a8c6bc942ef6830c23a4176fc Mon Sep 17 00:00:00 2001
From: Laurenz <laurmaedje@gmail.com>
Date: Sun, 28 Nov 2021 22:32:20 +0100
Subject: Clarity and bugfix

Fixes a bug where validation would wrongly reject an atomic primary reparse due to trailing whitespace.

Co-Authored-By: Martin <mhaug@live.de>
---
 src/parse/incremental.rs | 287 ++++++++++++++++++++++-------------------------
 1 file changed, 135 insertions(+), 152 deletions(-)

(limited to 'src/parse')

diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs
index 8e52c143..cc100a4c 100644
--- a/src/parse/incremental.rs
+++ b/src/parse/incremental.rs
@@ -85,13 +85,14 @@ impl Reparser<'_> {
         parent_mode: TokenMode,
         mut outermost: bool,
     ) -> Option<Range<usize>> {
-        let kind = green.kind().clone();
-        let mode = kind.mode().unwrap_or(parent_mode);
+        let mode = green.kind().mode().unwrap_or(parent_mode);
+        let child_mode = green.kind().mode().unwrap_or(TokenMode::Code);
+        let child_count = green.children().len();
 
-        let mut child_at_start = true;
-        let last = green.children().len().saturating_sub(1);
-        let mut start = None;
+        let mut first = None;
+        let mut at_start = true;
 
+        // Find the the first child in the range of children to reparse.
         for (i, child) in green.children_mut().iter_mut().enumerate() {
             let child_span = offset .. offset + child.len();
 
@@ -104,18 +105,19 @@ impl Reparser<'_> {
                 || (mode == TokenMode::Markup
                     && self.replace_range.start == child_span.end)
             {
-                start = Some((i, offset));
+                first = Some((i, offset));
                 break;
             }
 
             offset += child.len();
-            child_at_start = child.kind().is_at_start(child_at_start);
+            at_start = child.kind().is_at_start(at_start);
         }
 
-        let (start_idx, start_offset) = start?;
-        let mut end = None;
+        let (first_idx, first_start) = first?;
+        let mut last = None;
 
-        for (i, child) in green.children_mut().iter_mut().enumerate().skip(start_idx) {
+        // Find the the last child in the range of children to reparse.
+        for (i, child) in green.children_mut().iter_mut().enumerate().skip(first_idx) {
             let child_span = offset .. offset + child.len();
 
             // Similarly to above, the end of the edit must be in the node but
@@ -123,35 +125,35 @@ impl Reparser<'_> {
             // neighbor!
             if child_span.contains(&self.replace_range.end)
                 || self.replace_range.end == child_span.end
-                    && (mode != TokenMode::Markup || i == last)
+                    && (mode != TokenMode::Markup || i + 1 == child_count)
             {
-                outermost &= i == last;
-                end = Some(i);
+                outermost &= i + 1 == child_count;
+                last = Some((i, offset + child.len()));
                 break;
-            } else if mode != TokenMode::Markup || !child.kind().post().markup_safe() {
+            } else if mode != TokenMode::Markup || !child.kind().post().safe_in_markup() {
                 break;
             }
 
             offset += child.len();
         }
 
-        let end = end?;
-        let child_idx_range = start_idx .. end + 1;
-        let child_span = start_offset .. offset + green.children()[end].len();
-        let child_kind = green.children()[end].kind().clone();
+        let (last_idx, last_end) = last?;
+        let children_range = first_idx .. last_idx + 1;
+        let children_span = first_start .. last_end;
+        let last_kind = green.children()[last_idx].kind().clone();
 
-        if child_idx_range.len() == 1 {
-            let idx = child_idx_range.start;
-            let child = &mut green.children_mut()[idx];
+        // First, we try if the child itself has another, more specific
+        // applicable child.
+        if children_range.len() == 1 {
+            let child = &mut green.children_mut()[children_range.start];
             let prev_len = child.len();
 
-            // First, we try if the child has another, more specific applicable child.
-            if !child_kind.post().unsafe_interior() {
+            if last_kind.post() != Postcondition::Unsafe {
                 if let Some(range) = match child {
-                    Green::Node(n) => self.reparse_step(
-                        Rc::make_mut(n),
-                        start_offset,
-                        kind.mode().unwrap_or(TokenMode::Code),
+                    Green::Node(node) => self.reparse_step(
+                        Rc::make_mut(node),
+                        first_start,
+                        child_mode,
                         outermost,
                     ),
                     Green::Token(_) => None,
@@ -163,159 +165,147 @@ impl Reparser<'_> {
             }
         }
 
-        debug_assert_ne!(child_idx_range.len(), 0);
-
-        if mode == TokenMode::Code && child_idx_range.len() > 1 {
+        // We only replace multiple children in markup mode.
+        if children_range.len() > 1 && mode == TokenMode::Code {
             return None;
         }
 
         // We now have a child that we can replace and a function to do so.
-        let func =
-            child_kind.reparsing_function(kind.mode().unwrap_or(TokenMode::Code))?;
-        let policy = child_kind.post();
+        let func = last_kind.reparsing_func(child_mode)?;
+        let post = last_kind.post();
 
-        let len_change = self.replace_len as isize - self.replace_range.len() as isize;
-        let mut src_span = child_span;
-        src_span.end = (src_span.end as isize + len_change) as usize;
+        // The span of the to-be-reparsed children in the new source.
+        let replace_span = children_span.start
+            .. children_span.end + self.replace_len - self.replace_range.len();
 
-        let recompile_range = if policy == Postcondition::AtomicPrimary {
-            src_span.start .. self.src.len()
+        // For atomic primaries we need to pass in the whole remaining string to
+        // check whether the parser would eat more stuff illicitly.
+        let reparse_span = if post == Postcondition::AtomicPrimary {
+            replace_span.start .. self.src.len()
         } else {
-            src_span.clone()
+            replace_span.clone()
         };
 
-        let (mut new_children, terminated) =
-            func(&self.src[recompile_range], child_at_start)?;
+        // Do the reparsing!
+        let (mut newborns, terminated) = func(&self.src[reparse_span], at_start)?;
+
+        // Make sure that atomic primaries ate only what they were supposed to.
+        if post == Postcondition::AtomicPrimary {
+            let len = replace_span.len();
+            if newborns.len() > 1 && newborns[0].len() == len {
+                newborns.truncate(1);
+            } else if newborns.iter().map(Green::len).sum::<usize>() != len {
+                return None;
+            }
+        }
 
-        // Do not accept unclosed nodes if the old node did not use to be at the
-        // right edge of the tree.
+        // Do not accept unclosed nodes if the old node wasn't at the right edge
+        // of the tree.
         if !outermost && !terminated {
             return None;
         }
 
-        let insertion = match check_invariants(
-            &new_children,
+        // If all post- and preconditions match, we are good to go!
+        if validate(
             green.children(),
-            child_idx_range.clone(),
-            child_at_start,
+            children_range.clone(),
+            at_start,
+            &newborns,
             mode,
-            src_span.clone(),
-            policy,
+            post,
         ) {
-            InvariantResult::Ok => Some(new_children),
-            InvariantResult::UseFirst => Some(vec![std::mem::take(&mut new_children[0])]),
-            InvariantResult::Error => None,
-        }?;
-
-        green.replace_child_range(child_idx_range, insertion);
-
-        Some(src_span)
+            green.replace_child_range(children_range, newborns);
+            Some(replace_span)
+        } else {
+            None
+        }
     }
 }
 
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-enum InvariantResult {
-    Ok,
-    UseFirst,
-    Error,
-}
-
-fn check_invariants(
-    use_children: &[Green],
-    old_children: &[Green],
-    child_idx_range: Range<usize>,
-    child_at_start: bool,
+/// Validate that a node replacement is allowed by post- and preconditions.
+fn validate(
+    prev_children: &[Green],
+    children_range: Range<usize>,
+    mut at_start: bool,
+    newborns: &[Green],
     mode: TokenMode,
-    src_span: Range<usize>,
-    policy: Postcondition,
-) -> InvariantResult {
-    let (new_children, ok) = if policy == Postcondition::AtomicPrimary {
-        if use_children.iter().map(Green::len).sum::<usize>() == src_span.len() {
-            (use_children, InvariantResult::Ok)
-        } else if use_children.len() == 1 && use_children[0].len() == src_span.len() {
-            (&use_children[0 .. 1], InvariantResult::UseFirst)
-        } else {
-            return InvariantResult::Error;
-        }
-    } else {
-        (use_children, InvariantResult::Ok)
-    };
-
-    let child_mode = old_children[child_idx_range.start].kind().mode().unwrap_or(mode);
-
-    // Check if the children / child has the right type.
-    let same_kind = match policy {
-        Postcondition::SameKind(x) => x.map_or(true, |x| x == child_mode),
-        _ => false,
-    };
-
-    if same_kind || policy == Postcondition::AtomicPrimary {
-        if new_children.len() != 1 {
-            return InvariantResult::Error;
-        }
+    post: Postcondition,
+) -> bool {
+    // Atomic primaries must only generate one new child.
+    if post == Postcondition::AtomicPrimary && newborns.len() != 1 {
+        return false;
+    }
 
-        if same_kind {
-            if old_children[child_idx_range.start].kind() != new_children[0].kind() {
-                return InvariantResult::Error;
-            }
+    // Same kind in mode `inside` must generate only one child and that child
+    // must be of the same kind as previously.
+    if let Postcondition::SameKind(inside) = post {
+        let prev_kind = prev_children[children_range.start].kind();
+        let prev_mode = prev_kind.mode().unwrap_or(mode);
+        if inside.map_or(true, |m| m == prev_mode)
+            && (newborns.len() != 1 || prev_kind != newborns[0].kind())
+        {
+            return false;
         }
     }
 
-    // Check if the neighbor invariants are still true.
-    if mode == TokenMode::Markup {
-        if child_idx_range.start > 0 {
-            if old_children[child_idx_range.start - 1].kind().pre()
-                == Precondition::RightWhitespace
-                && !new_children[0].kind().is_whitespace()
-            {
-                return InvariantResult::Error;
-            }
-        }
+    // Neighbor invariants are only relevant in markup mode.
+    if mode == TokenMode::Code {
+        return true;
+    }
 
-        if new_children.last().map(|x| x.kind().pre())
-            == Some(Precondition::RightWhitespace)
-            && old_children.len() > child_idx_range.end
-        {
-            if !old_children[child_idx_range.end].kind().is_whitespace() {
-                return InvariantResult::Error;
-            }
-        }
+    // Ensure that a possible right-whitespace precondition of a node before the
+    // replacement range is satisfied.
+    if children_range.start > 0
+        && prev_children[children_range.start - 1].kind().pre()
+            == Precondition::RightWhitespace
+        && !newborns[0].kind().is_whitespace()
+    {
+        return false;
+    }
 
-        let mut post_at_start = child_at_start;
-        for child in new_children {
-            post_at_start = child.kind().is_at_start(post_at_start);
-        }
+    // Ensure that a possible right-whitespace precondition of a new node at the
+    // end of the replacement range is satisfied.
+    if newborns.last().map(|x| x.kind().pre()) == Some(Precondition::RightWhitespace)
+        && children_range.end < prev_children.len()
+        && !prev_children[children_range.end].kind().is_whitespace()
+    {
+        return false;
+    }
 
-        for child in &old_children[child_idx_range.end ..] {
-            if child.kind().is_trivia() {
-                post_at_start = child.kind().is_at_start(post_at_start);
-                continue;
-            }
+    // Compute the at_start state behind the new children.
+    for child in newborns {
+        at_start = child.kind().is_at_start(at_start);
+    }
 
+    // Ensure that a possible at-start or not-at-start precondition of
+    // a node after the replacement range is satisfied.
+    for child in &prev_children[children_range.end ..] {
+        if !child.kind().is_trivia() {
             let pre = child.kind().pre();
-            if pre == Precondition::AtStart && !post_at_start
-                || pre == Precondition::NotAtStart && post_at_start
+            if (pre == Precondition::AtStart && !at_start)
+                || (pre == Precondition::NotAtStart && at_start)
             {
-                return InvariantResult::Error;
+                return false;
             }
+
             break;
         }
+
+        at_start = child.kind().is_at_start(at_start);
     }
 
-    ok
+    true
 }
 
 impl NodeKind {
     /// Return the correct reparsing function given the postconditions for the
     /// type.
-    fn reparsing_function(
+    fn reparsing_func(
         &self,
         parent_mode: TokenMode,
     ) -> Option<fn(&str, bool) -> Option<(Vec<Green>, bool)>> {
-        let policy = self.post();
         let mode = self.mode().unwrap_or(parent_mode);
-
-        match policy {
+        match self.post() {
             Postcondition::Unsafe | Postcondition::UnsafeLayer => None,
             Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic),
             Postcondition::AtomicPrimary => Some(parse_atomic_markup),
@@ -393,6 +383,7 @@ impl NodeKind {
             // changes the next layer.
             Self::EnumNumbering(_) => Postcondition::Unsafe,
 
+            // This can be anything, so we don't make any promises.
             Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe,
 
             // These are complex expressions which may screw with their
@@ -485,17 +476,11 @@ impl NodeKind {
 }
 
 impl Postcondition {
-    pub fn unsafe_interior(&self) -> bool {
-        match self {
-            Self::Unsafe => true,
-            _ => false,
-        }
-    }
-
-    pub fn markup_safe(&self) -> bool {
+    /// Whether a node with this condition can be reparsed in markup mode.
+    pub fn safe_in_markup(&self) -> bool {
         match self {
             Self::Safe | Self::UnsafeLayer => true,
-            Self::SameKind(tm) => tm.map_or(false, |tm| tm != TokenMode::Markup),
+            Self::SameKind(mode) => mode.map_or(false, |m| m != TokenMode::Markup),
             _ => false,
         }
     }
@@ -503,22 +488,19 @@ impl Postcondition {
 
 #[cfg(test)]
 mod tests {
+    use super::*;
     use crate::parse::parse;
     use crate::source::SourceFile;
 
-    use super::*;
-
     #[test]
     #[rustfmt::skip]
     fn test_incremental_parse() {
         #[track_caller]
-        fn test(prev: &str, range: Range<usize>, with: &str, incr: Range<usize>) {
+        fn test(prev: &str, range: Range<usize>, with: &str, goal: Range<usize>) {
             let mut source = SourceFile::detached(prev);
             let range = source.edit(range, with);
-            assert_eq!(range, incr);
-
-            let incr_tree = source.root().clone();
-            assert_eq!(parse(source.src()), incr_tree);
+            assert_eq!(range, goal);
+            assert_eq!(parse(source.src()), *source.root());
         }
 
         // Test simple replacements.
@@ -542,7 +524,7 @@ mod tests {
         test("hello {x}", 6 .. 9, "#f()", 5 .. 10);
         test("this is -- in my opinion -- spectacular", 8 .. 10, "---", 7 .. 12);
         test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22);
-        test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17);
+        test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15);
         test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34);
 
         // Test the whitespace invariants.
@@ -578,6 +560,7 @@ mod tests {
         test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35);
         test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38);
 
+        // Test raw tokens.
         test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20);
         test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18);
     }
-- 
cgit v1.2.3


From 289122e83c085668e56e52225c2dcfd9417d6262 Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Mon, 29 Nov 2021 12:06:41 +0100
Subject: Deal with offside rule and remove RightWhitespace

---
 src/parse/incremental.rs | 92 ++++++++++++++++++++++++++++++++++++------------
 1 file changed, 70 insertions(+), 22 deletions(-)

(limited to 'src/parse')

diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs
index cc100a4c..0e2d196c 100644
--- a/src/parse/incremental.rs
+++ b/src/parse/incremental.rs
@@ -5,7 +5,7 @@ use crate::syntax::{Green, GreenNode, NodeKind};
 
 use super::{
     parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup,
-    parse_markup_elements, parse_template, TokenMode,
+    parse_markup_elements, parse_template, Scanner, TokenMode,
 };
 
 /// The conditions that a node has to fulfill in order to be replaced.
@@ -40,14 +40,13 @@ pub enum Postcondition {
 pub enum Precondition {
     /// These nodes depend on being at the start of a line. Reparsing of safe
     /// left neighbors has to check this invariant. Otherwise, this node is
-    /// safe.
+    /// safe. Additionally, the indentation of the first right non-trivia,
+    /// non-whitespace sibling must not be greater than the current indentation.
     AtStart,
     /// These nodes depend on not being at the start of a line. Reparsing of
     /// safe left neighbors has to check this invariant. Otherwise, this node is
     /// safe.
     NotAtStart,
-    /// These nodes must be followed by whitespace.
-    RightWhitespace,
     /// No additional requirements.
     None,
 }
@@ -213,6 +212,8 @@ impl Reparser<'_> {
             &newborns,
             mode,
             post,
+            replace_span.clone(),
+            self.src,
         ) {
             green.replace_child_range(children_range, newborns);
             Some(replace_span)
@@ -230,6 +231,8 @@ fn validate(
     newborns: &[Green],
     mode: TokenMode,
     post: Postcondition,
+    replace_span: Range<usize>,
+    src: &str,
 ) -> bool {
     // Atomic primaries must only generate one new child.
     if post == Postcondition::AtomicPrimary && newborns.len() != 1 {
@@ -253,23 +256,37 @@ fn validate(
         return true;
     }
 
-    // Ensure that a possible right-whitespace precondition of a node before the
-    // replacement range is satisfied.
-    if children_range.start > 0
-        && prev_children[children_range.start - 1].kind().pre()
-            == Precondition::RightWhitespace
-        && !newborns[0].kind().is_whitespace()
-    {
-        return false;
-    }
+    // Check if there are any `AtStart` predecessors which require a certain
+    // indentation.
+    let s = Scanner::new(src);
+    let mut prev_pos = replace_span.start;
+    for child in (&prev_children[.. children_range.start]).iter().rev() {
+        prev_pos -= child.len();
+        if !child.kind().is_trivia() {
+            if child.kind().pre() == Precondition::AtStart {
+                let left_col = s.column(prev_pos);
+
+                // Search for the first non-trivia newborn.
+                let mut new_pos = replace_span.start;
+                let mut child_col = None;
+                for child in newborns {
+                    if !child.kind().is_trivia() {
+                        child_col = Some(s.column(new_pos));
+                        break;
+                    }
+
+                    new_pos += child.len();
+                }
 
-    // Ensure that a possible right-whitespace precondition of a new node at the
-    // end of the replacement range is satisfied.
-    if newborns.last().map(|x| x.kind().pre()) == Some(Precondition::RightWhitespace)
-        && children_range.end < prev_children.len()
-        && !prev_children[children_range.end].kind().is_whitespace()
-    {
-        return false;
+                if let Some(child_col) = child_col {
+                    if child_col > left_col {
+                        return false;
+                    }
+                }
+            }
+
+            break;
+        }
     }
 
     // Compute the at_start state behind the new children.
@@ -294,6 +311,37 @@ fn validate(
         at_start = child.kind().is_at_start(at_start);
     }
 
+    // We have to check whether the last non-trivia newborn is `AtStart` and
+    // verify the indent of its right neighbors in order to make sure its
+    // indentation requirements are fulfilled.
+    let mut child_pos = replace_span.end;
+    let mut child_col = None;
+    for child in newborns.iter().rev() {
+        child_pos -= child.len();
+
+        if !child.kind().is_trivia() {
+            if child.kind().pre() == Precondition::AtStart {
+                child_col = Some(s.column(child_pos));
+            }
+            break;
+        }
+    }
+
+    if let Some(child_col) = child_col {
+        let mut right_pos = replace_span.end;
+        for child in &prev_children[children_range.end ..] {
+            if !child.kind().is_trivia() {
+                if s.column(right_pos) > child_col {
+                    return false;
+                }
+
+                break;
+            }
+
+            right_pos += child.len();
+        }
+    }
+
     true
 }
 
@@ -469,7 +517,6 @@ impl NodeKind {
         match self {
             Self::Heading | Self::Enum | Self::List => Precondition::AtStart,
             Self::TextInLine(_) => Precondition::NotAtStart,
-            Self::Linebreak => Precondition::RightWhitespace,
             _ => Precondition::None,
         }
     }
@@ -515,7 +562,8 @@ mod tests {
         test("a your thing a", 6 .. 7, "a", 2 .. 12);
         test("{call(); abc}", 7 .. 7, "[]", 0 .. 15);
         test("#call() abc", 7 .. 7, "[]", 0 .. 10);
-        // test("hi\n- item\n- item 2\n    - item 3", 10 .. 10, "  ", 9 .. 33);
+        test("hi[\n- item\n- item 2\n    - item 3]", 11 .. 11, "  ", 2 .. 35);
+        test("hi\n- item\nno item\n    - item 3", 10 .. 10, "- ", 0 .. 32);
         test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 16 .. 20);
         test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33 .. 42);
         test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 34 .. 39);
-- 
cgit v1.2.3


From 5f114e18eb76a1937941b2ea64842b908c9ad89e Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Sun, 2 Jan 2022 00:46:19 +0100
Subject: Added a test framework for incremental parsing

Fix several errors:

- Indented markup is now reparsed right
- All end group errors will now fail a reparse
- Rightmost errors will always fail a reparse
---
 src/parse/incremental.rs | 69 ++++++++++++++++++++++++++++++++++++++----------
 src/parse/mod.rs         | 55 +++++++++++++++++++++++++-------------
 src/parse/parser.rs      | 54 ++++++++++++++++++++++++++++++++-----
 src/parse/tokens.rs      | 16 ++++-------
 4 files changed, 145 insertions(+), 49 deletions(-)

(limited to 'src/parse')

diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs
index 0e2d196c..1ee37a51 100644
--- a/src/parse/incremental.rs
+++ b/src/parse/incremental.rs
@@ -47,6 +47,10 @@ pub enum Precondition {
     /// safe left neighbors has to check this invariant. Otherwise, this node is
     /// safe.
     NotAtStart,
+    /// These nodes could end up somewhere else up the tree if the parse was
+    /// happening from scratch. The parse result has to be checked for such
+    /// nodes. They are safe to add if followed up by other nodes.
+    NotAtEnd,
     /// No additional requirements.
     None,
 }
@@ -88,6 +92,12 @@ impl Reparser<'_> {
         let child_mode = green.kind().mode().unwrap_or(TokenMode::Code);
         let child_count = green.children().len();
 
+        // Save the current indent if this is a markup node.
+        let indent = match green.kind() {
+            NodeKind::Markup(n) => *n,
+            _ => 0,
+        };
+
         let mut first = None;
         let mut at_start = true;
 
@@ -170,12 +180,29 @@ impl Reparser<'_> {
         }
 
         // We now have a child that we can replace and a function to do so.
-        let func = last_kind.reparsing_func(child_mode)?;
+        let func = last_kind.reparsing_func(child_mode, indent)?;
         let post = last_kind.post();
 
+        let mut column = if mode == TokenMode::Markup {
+            // In this case, we want to pass the indentation to the function.
+            Scanner::new(self.src).column(children_span.start)
+        } else {
+            0
+        };
+
+        // If this is a markup node, we want to save its indent instead to pass
+        // the right indent argument.
+        if children_range.len() == 1 {
+            let child = &mut green.children_mut()[children_range.start];
+            if let NodeKind::Markup(n) = child.kind() {
+                column = *n;
+            }
+        }
+
         // The span of the to-be-reparsed children in the new source.
         let replace_span = children_span.start
-            .. children_span.end + self.replace_len - self.replace_range.len();
+            ..
+            children_span.end + self.replace_len - self.replace_range.len();
 
         // For atomic primaries we need to pass in the whole remaining string to
         // check whether the parser would eat more stuff illicitly.
@@ -186,7 +213,7 @@ impl Reparser<'_> {
         };
 
         // Do the reparsing!
-        let (mut newborns, terminated) = func(&self.src[reparse_span], at_start)?;
+        let (mut newborns, terminated) = func(&self.src[reparse_span], at_start, column)?;
 
         // Make sure that atomic primaries ate only what they were supposed to.
         if post == Postcondition::AtomicPrimary {
@@ -311,6 +338,14 @@ fn validate(
         at_start = child.kind().is_at_start(at_start);
     }
 
+    // Verify that the last of the newborns is not `NotAtEnd`.
+    if newborns
+        .last()
+        .map_or(false, |child| child.kind().pre() == Precondition::NotAtEnd)
+    {
+        return false;
+    }
+
     // We have to check whether the last non-trivia newborn is `AtStart` and
     // verify the indent of its right neighbors in order to make sure its
     // indentation requirements are fulfilled.
@@ -351,21 +386,22 @@ impl NodeKind {
     fn reparsing_func(
         &self,
         parent_mode: TokenMode,
-    ) -> Option<fn(&str, bool) -> Option<(Vec<Green>, bool)>> {
+        indent: usize,
+    ) -> Option<fn(&str, bool, usize) -> Option<(Vec<Green>, bool)>> {
         let mode = self.mode().unwrap_or(parent_mode);
         match self.post() {
             Postcondition::Unsafe | Postcondition::UnsafeLayer => None,
             Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic),
             Postcondition::AtomicPrimary => Some(parse_atomic_markup),
             Postcondition::SameKind(x) if x == None || x == Some(mode) => match self {
+                NodeKind::Markup(_) => Some(parse_markup),
                 NodeKind::Template => Some(parse_template),
                 NodeKind::Block => Some(parse_block),
                 NodeKind::LineComment | NodeKind::BlockComment => Some(parse_comment),
                 _ => None,
             },
             _ => match mode {
-                TokenMode::Markup if self == &Self::Markup => Some(parse_markup),
-                TokenMode::Markup => Some(parse_markup_elements),
+                TokenMode::Markup if indent == 0 => Some(parse_markup_elements),
                 _ => return None,
             },
         }
@@ -452,8 +488,9 @@ impl NodeKind {
                 Postcondition::UnsafeLayer
             }
 
-            // Only markup is expected at the points where it does occur.
-            Self::Markup => Postcondition::SameKind(None),
+            // Only markup is expected at the points where it does occur. The
+            // indentation must be preserved as well, also for the children.
+            Self::Markup(_) => Postcondition::SameKind(None),
 
             // These can appear everywhere and must not change to other stuff
             // because that could change the outer expression.
@@ -493,6 +530,10 @@ impl NodeKind {
             | Self::ImportExpr
             | Self::IncludeExpr => Postcondition::AtomicPrimary,
 
+            // This element always has to remain in the same column so better
+            // reparse the whole parent.
+            Self::Raw(_) => Postcondition::Unsafe,
+
             // These are all replaceable by other tokens.
             Self::Parbreak
             | Self::Linebreak
@@ -507,7 +548,6 @@ impl NodeKind {
             | Self::Heading
             | Self::Enum
             | Self::List
-            | Self::Raw(_)
             | Self::Math(_) => Postcondition::Safe,
         }
     }
@@ -517,6 +557,7 @@ impl NodeKind {
         match self {
             Self::Heading | Self::Enum | Self::List => Precondition::AtStart,
             Self::TextInLine(_) => Precondition::NotAtStart,
+            Self::Error(_, _) => Precondition::NotAtEnd,
             _ => Precondition::None,
         }
     }
@@ -557,12 +598,12 @@ mod tests {
         test("a d e", 1 .. 3, " b c d", 0 .. 8);
         test("a #f() e", 1 .. 6, " b c d", 0 .. 8);
         test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9);
-        test("= A heading", 3 .. 3, "n evocative", 2 .. 15);
+        test("= A heading", 3 .. 3, "n evocative", 2 .. 22);
         test("your thing", 5 .. 5, "a", 4 .. 11);
         test("a your thing a", 6 .. 7, "a", 2 .. 12);
         test("{call(); abc}", 7 .. 7, "[]", 0 .. 15);
         test("#call() abc", 7 .. 7, "[]", 0 .. 10);
-        test("hi[\n- item\n- item 2\n    - item 3]", 11 .. 11, "  ", 2 .. 35);
+        test("hi[\n- item\n- item 2\n    - item 3]", 11 .. 11, "  ", 3 .. 34);
         test("hi\n- item\nno item\n    - item 3", 10 .. 10, "- ", 0 .. 32);
         test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 16 .. 20);
         test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33 .. 42);
@@ -571,7 +612,7 @@ mod tests {
         test("{let i=1; for x in range(5) {i}}", 13 .. 14, "  ", 10 .. 32);
         test("hello {x}", 6 .. 9, "#f()", 5 .. 10);
         test("this is -- in my opinion -- spectacular", 8 .. 10, "---", 7 .. 12);
-        test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22);
+        test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 37);
         test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15);
         test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34);
 
@@ -596,7 +637,7 @@ mod tests {
         test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13);
         test("a #while x {\n g(x) \n}  b", 11 .. 11, "//", 0 .. 26);
         test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14);
-        test("a b c", 1 .. 1, "{[}", 0 .. 5);
+        test("a b c", 1 .. 1, "{[}", 0 .. 8);
 
         // Test unclosed things.
         test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6);
@@ -610,6 +651,6 @@ mod tests {
 
         // Test raw tokens.
         test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20);
-        test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18);
+        test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18);
     }
 }
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 2c5afb6b..f4826730 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -16,6 +16,7 @@ use std::rc::Rc;
 
 use crate::syntax::ast::{Associativity, BinOp, UnOp};
 use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind};
+use crate::util::EcoString;
 
 /// Parse a source file.
 pub fn parse(src: &str) -> Rc<GreenNode> {
@@ -28,23 +29,27 @@ pub fn parse(src: &str) -> Rc<GreenNode> {
 }
 
 /// Parse an atomic primary. Returns `Some` if all of the input was consumed.
-pub fn parse_atomic(src: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+pub fn parse_atomic(src: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(src, TokenMode::Code);
     primary(&mut p, true).ok()?;
     p.eject_partial()
 }
 
 /// Parse an atomic primary. Returns `Some` if all of the input was consumed.
-pub fn parse_atomic_markup(src: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+pub fn parse_atomic_markup(src: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(src, TokenMode::Markup);
     markup_expr(&mut p);
     p.eject_partial()
 }
 
 /// Parse some markup. Returns `Some` if all of the input was consumed.
-pub fn parse_markup(src: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+pub fn parse_markup(src: &str, _: bool, column: usize) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(src, TokenMode::Markup);
-    markup(&mut p);
+    if column == 0 {
+        markup(&mut p);
+    } else {
+        markup_indented(&mut p, column);
+    }
     p.eject()
 }
 
@@ -53,8 +58,10 @@ pub fn parse_markup(src: &str, _: bool) -> Option<(Vec<Green>, bool)> {
 pub fn parse_markup_elements(
     src: &str,
     mut at_start: bool,
+    column: usize,
 ) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(src, TokenMode::Markup);
+    p.offset(column);
     while !p.eof() {
         markup_node(&mut p, &mut at_start);
     }
@@ -62,7 +69,7 @@ pub fn parse_markup_elements(
 }
 
 /// Parse a template literal. Returns `Some` if all of the input was consumed.
-pub fn parse_template(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+pub fn parse_template(source: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(source, TokenMode::Code);
     if !p.at(&NodeKind::LeftBracket) {
         return None;
@@ -73,7 +80,7 @@ pub fn parse_template(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
 }
 
 /// Parse a code block. Returns `Some` if all of the input was consumed.
-pub fn parse_block(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+pub fn parse_block(source: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(source, TokenMode::Code);
     if !p.at(&NodeKind::LeftBrace) {
         return None;
@@ -84,7 +91,7 @@ pub fn parse_block(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
 }
 
 /// Parse a comment. Returns `Some` if all of the input was consumed.
-pub fn parse_comment(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+pub fn parse_comment(source: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::new(source, TokenMode::Code);
     comment(&mut p).ok()?;
     p.eject()
@@ -92,7 +99,7 @@ pub fn parse_comment(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
 
 /// Parse markup.
 fn markup(p: &mut Parser) {
-    markup_while(p, true, &mut |_| true)
+    markup_while(p, true, 0, &mut |_| true)
 }
 
 /// Parse markup that stays right of the given column.
@@ -103,8 +110,8 @@ fn markup_indented(p: &mut Parser, column: usize) {
         _ => false,
     });
 
-    markup_while(p, false, &mut |p| match p.peek() {
-        Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column,
+    markup_while(p, false, column, &mut |p| match p.peek() {
+        Some(NodeKind::Space(n)) if *n >= 1 => p.clean_column(p.current_end()) >= column,
         _ => true,
     })
 }
@@ -113,11 +120,11 @@ fn markup_indented(p: &mut Parser, column: usize) {
 ///
 /// If `at_start` is true, things like headings that may only appear at the
 /// beginning of a line or template are allowed.
-fn markup_while<F>(p: &mut Parser, mut at_start: bool, f: &mut F)
+fn markup_while<F>(p: &mut Parser, mut at_start: bool, column: usize, f: &mut F)
 where
     F: FnMut(&mut Parser) -> bool,
 {
-    p.perform(NodeKind::Markup, |p| {
+    p.perform(NodeKind::Markup(column), |p| {
         while !p.eof() && f(p) {
             markup_node(p, &mut at_start);
         }
@@ -205,20 +212,32 @@ fn heading(p: &mut Parser) {
 
 /// Parse a single list item.
 fn list_node(p: &mut Parser) {
-    p.perform(NodeKind::List, |p| {
-        p.eat_assert(&NodeKind::Minus);
+    let marker = p.marker();
+    let src: EcoString = p.peek_src().into();
+    p.eat_assert(&NodeKind::Minus);
+
+    if p.peek().map_or(true, |kind| kind.is_whitespace()) {
         let column = p.column(p.prev_end());
         markup_indented(p, column);
-    });
+        marker.end(p, NodeKind::List);
+    } else {
+        marker.convert(p, NodeKind::TextInLine(src));
+    }
 }
 
 /// Parse a single enum item.
 fn enum_node(p: &mut Parser) {
-    p.perform(NodeKind::Enum, |p| {
-        p.eat();
+    let marker = p.marker();
+    let src: EcoString = p.peek_src().into();
+    p.eat();
+
+    if p.peek().map_or(true, |kind| kind.is_whitespace()) {
         let column = p.column(p.prev_end());
         markup_indented(p, column);
-    });
+        marker.end(p, NodeKind::Enum);
+    } else {
+        marker.convert(p, NodeKind::TextInLine(src));
+    }
 }
 
 /// Parse an expression within markup mode.
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index ade9b5df..b31f69d3 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -21,8 +21,12 @@ pub struct Parser<'s> {
     groups: Vec<GroupEntry>,
     /// The children of the currently built node.
     children: Vec<Green>,
-    /// Whether the last group was terminated.
-    last_terminated: bool,
+    /// Is `Some` if there is an unterminated group at the last position where
+    /// groups were terminated.
+    last_unterminated: Option<usize>,
+    /// Offset the indentation. This can be used if the parser is processing a
+    /// subslice of the source and there was leading indent.
+    column_offset: usize,
 }
 
 impl<'s> Parser<'s> {
@@ -38,7 +42,8 @@ impl<'s> Parser<'s> {
             current_start: 0,
             groups: vec![],
             children: vec![],
-            last_terminated: true,
+            last_unterminated: None,
+            column_offset: 0,
         }
     }
 
@@ -102,6 +107,11 @@ impl<'s> Parser<'s> {
             .then(|| (self.children, self.tokens.was_terminated()))
     }
 
+    /// Set an indentation offset.
+    pub fn offset(&mut self, columns: usize) {
+        self.column_offset = columns;
+    }
+
     /// Whether the end of the source string or group is reached.
     pub fn eof(&self) -> bool {
         self.eof
@@ -206,6 +216,12 @@ impl<'s> Parser<'s> {
 
     /// Determine the column index for the given byte index.
     pub fn column(&self, index: usize) -> usize {
+        self.tokens.scanner().column(index) + self.column_offset
+    }
+
+    /// Determine the column index for the given byte index while ignoring the
+    /// offset.
+    pub fn clean_column(&self, index: usize) -> usize {
         self.tokens.scanner().column(index)
     }
 
@@ -244,7 +260,11 @@ impl<'s> Parser<'s> {
         let group = self.groups.pop().expect("no started group");
         self.tokens.set_mode(group.prev_mode);
         self.repeek();
-        self.last_terminated = true;
+        if let Some(n) = self.last_unterminated {
+            if n != self.prev_end() {
+                self.last_unterminated = None;
+            }
+        }
 
         let mut rescan = self.tokens.mode() != group_mode;
 
@@ -262,8 +282,14 @@ impl<'s> Parser<'s> {
                 self.eat();
                 rescan = false;
             } else if required {
+                // FIXME The error has to be inserted before any space rolls
+                // around because the rescan will set the cursor back in front
+                // of the space and reconsume it. Supressing the rescan is not
+                // an option since additional rescans (e.g. for statements) can
+                // be triggered directly afterwards, without processing any
+                // other token.
                 self.push_error(format_eco!("expected {}", end));
-                self.last_terminated = false;
+                self.last_unterminated = Some(self.prev_end());
             }
         }
 
@@ -283,13 +309,21 @@ impl<'s> Parser<'s> {
 
     /// Check if the group processing was successfully terminated.
     pub fn group_success(&self) -> bool {
-        self.last_terminated && self.groups.is_empty()
+        self.last_unterminated.is_none() && self.groups.is_empty()
     }
 
     /// Low-level bump that consumes exactly one token without special trivia
     /// handling.
     fn bump(&mut self) {
         let kind = self.current.take().unwrap();
+        if match kind {
+            NodeKind::Space(n) if n > 0 => true,
+            NodeKind::Parbreak => true,
+            _ => false,
+        } {
+            self.column_offset = 0;
+        }
+
         let len = self.tokens.index() - self.current_start;
         self.children.push(GreenData::new(kind, len).into());
         self.current_start = self.tokens.index();
@@ -346,6 +380,13 @@ impl Parser<'_> {
     /// Push an error into the children list.
     pub fn push_error(&mut self, msg: impl Into<EcoString>) {
         let error = NodeKind::Error(ErrorPos::Full, msg.into());
+        for i in (0 .. self.children.len()).rev() {
+            if Self::is_trivia_ext(self.children[i].kind(), false) {
+                self.children.remove(i);
+            } else {
+                break;
+            }
+        }
         self.children.push(GreenData::new(error, 0).into());
     }
 
@@ -445,6 +486,7 @@ impl Marker {
 }
 
 /// A logical group of tokens, e.g. `[...]`.
+#[derive(Debug)]
 struct GroupEntry {
     /// The kind of group this is. This decides which tokens will end the group.
     /// For example, a [`Group::Paren`] will be ended by
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 836e8cf1..3a0ad1ad 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -293,10 +293,8 @@ impl<'s> Tokens<'s> {
             } else {
                 NodeKind::EnDash
             }
-        } else if self.s.check_or(true, char::is_whitespace) {
-            NodeKind::Minus
         } else {
-            NodeKind::Text('-'.into())
+            NodeKind::Minus
         }
     }
 
@@ -312,11 +310,7 @@ impl<'s> Tokens<'s> {
             None
         };
 
-        if self.s.check_or(true, char::is_whitespace) {
-            NodeKind::EnumNumbering(number)
-        } else {
-            NodeKind::Text(self.s.eaten_from(start).into())
-        }
+        NodeKind::EnumNumbering(number)
     }
 
     fn raw(&mut self) -> NodeKind {
@@ -742,12 +736,12 @@ mod tests {
     fn test_tokenize_text() {
         // Test basic text.
         t!(Markup[" /"]: "hello"       => Text("hello"));
-        t!(Markup[" /"]: "hello-world" => Text("hello"), Text("-"), Text("world"));
+        t!(Markup[" /"]: "hello-world" => Text("hello"), Minus, Text("world"));
 
         // Test code symbols in text.
         t!(Markup[" /"]: "a():\"b" => Text("a():\"b"));
         t!(Markup[" /"]: ";:,|/+"  => Text(";:,|"), Text("/+"));
-        t!(Markup[" /"]: "=-a"     => Text("="), Text("-"), Text("a"));
+        t!(Markup[" /"]: "=-a"     => Text("="), Minus, Text("a"));
         t!(Markup[" "]: "#123"     => Text("#"), Text("123"));
 
         // Test text ends.
@@ -804,7 +798,7 @@ mod tests {
         t!(Markup["a1/"]: "- "  => Minus, Space(0));
         t!(Markup[" "]: "."     => EnumNumbering(None));
         t!(Markup[" "]: "1."    => EnumNumbering(Some(1)));
-        t!(Markup[" "]: "1.a"   => Text("1."), Text("a"));
+        t!(Markup[" "]: "1.a"   => EnumNumbering(Some(1)), Text("a"));
         t!(Markup[" /"]: "a1."  => Text("a1."));
     }
 
-- 
cgit v1.2.3


From 98c96ba1cb8a46e327de313118e4ce1a84795ae9 Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Sun, 2 Jan 2022 14:46:08 +0100
Subject: Fix parser / space / error bug

---
 src/parse/incremental.rs |  1 +
 src/parse/parser.rs      | 16 ++--------------
 src/parse/tokens.rs      |  1 +
 3 files changed, 4 insertions(+), 14 deletions(-)

(limited to 'src/parse')

diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs
index 1ee37a51..5cb016d2 100644
--- a/src/parse/incremental.rs
+++ b/src/parse/incremental.rs
@@ -623,6 +623,7 @@ mod tests {
         test("x = y", 1 .. 1, " + y\n", 0 .. 10);
         test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21);
         test("abc\n= a heading\njoke", 3 .. 4, "\nnot ", 0 .. 19);
+        test("#let x = (1, 2 + ; Five\r\n\r", 19..22, "2.", 18..22);
         test("hey #myfriend", 4 .. 4, "\\", 0 .. 14);
         test("hey  #myfriend", 4 .. 4, "\\", 3 .. 6);
 
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index b31f69d3..f36155d5 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -282,12 +282,6 @@ impl<'s> Parser<'s> {
                 self.eat();
                 rescan = false;
             } else if required {
-                // FIXME The error has to be inserted before any space rolls
-                // around because the rescan will set the cursor back in front
-                // of the space and reconsume it. Supressing the rescan is not
-                // an option since additional rescans (e.g. for statements) can
-                // be triggered directly afterwards, without processing any
-                // other token.
                 self.push_error(format_eco!("expected {}", end));
                 self.last_unterminated = Some(self.prev_end());
             }
@@ -380,14 +374,8 @@ impl Parser<'_> {
     /// Push an error into the children list.
     pub fn push_error(&mut self, msg: impl Into<EcoString>) {
         let error = NodeKind::Error(ErrorPos::Full, msg.into());
-        for i in (0 .. self.children.len()).rev() {
-            if Self::is_trivia_ext(self.children[i].kind(), false) {
-                self.children.remove(i);
-            } else {
-                break;
-            }
-        }
-        self.children.push(GreenData::new(error, 0).into());
+        let idx = self.trivia_start();
+        self.children.insert(idx.0, GreenData::new(error, 0).into());
     }
 
     /// Eat the current token and add an error that it is unexpected.
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 3a0ad1ad..7dfca2bf 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -727,6 +727,7 @@ mod tests {
         t!(Both["a1/"]: "  \n"         => Space(1));
         t!(Both["a1/"]: "  \n   "      => Space(1));
         t!(Both["a1/"]: "\r\n"         => Space(1));
+        t!(Both["a1/"]: "\r\n\r"       => Space(2));
         t!(Both["a1/"]: "  \n\t \n  "  => Space(2));
         t!(Both["a1/"]: "\n\r"         => Space(2));
         t!(Both["a1/"]: " \r\r\n \x0D" => Space(3));
-- 
cgit v1.2.3


From c994cfa7d814e3909682b19322867ed5c676c453 Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Mon, 3 Jan 2022 23:18:21 +0100
Subject: Code Review: Your parsers were so preoccupied with whether they could

---
 src/parse/incremental.rs | 255 +++++++++++++++++++++++++----------------------
 src/parse/mod.rs         | 147 ++++++++++++++++-----------
 src/parse/parser.rs      |  81 +++++++--------
 src/parse/scanner.rs     |  23 ++++-
 src/parse/tokens.rs      |  28 +++---
 5 files changed, 291 insertions(+), 243 deletions(-)

(limited to 'src/parse')

diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs
index 5cb016d2..4c82f158 100644
--- a/src/parse/incremental.rs
+++ b/src/parse/incremental.rs
@@ -4,8 +4,8 @@ use std::rc::Rc;
 use crate::syntax::{Green, GreenNode, NodeKind};
 
 use super::{
-    parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup,
-    parse_markup_elements, parse_template, Scanner, TokenMode,
+    is_newline, parse, parse_atomic, parse_atomic_markup, parse_block, parse_comment,
+    parse_markup, parse_markup_elements, parse_template, Scanner, TokenMode,
 };
 
 /// The conditions that a node has to fulfill in order to be replaced.
@@ -13,21 +13,21 @@ use super::{
 /// This can dictate if a node can be replaced at all and if yes, what can take
 /// its place.
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum Postcondition {
+pub enum SuccessionRule {
     /// Changing this node can never have an influence on the other nodes.
     Safe,
     /// This node has to be replaced with a single token of the same kind.
     SameKind(Option<TokenMode>),
-    /// Changing this node into a single atomic expression is allowed if it
-    /// appears in code mode, otherwise it is safe.
+    /// In code mode, this node can only be changed into a single atomic
+    /// expression, otherwise it is safe.
     AtomicPrimary,
-    /// Changing an unsafe layer node changes what the parents or the
-    /// surrounding nodes would be and is therefore disallowed. Change the
+    /// Changing an unsafe layer node in code mode changes what the parents or
+    /// the surrounding nodes would be and is therefore disallowed. Change the
     /// parents or children instead. If it appears in Markup, however, it is
     /// safe to change.
     UnsafeLayer,
-    /// Changing an unsafe node or any of its children will trigger undefined
-    /// behavior. Change the parents instead.
+    /// Changing an unsafe node or any of its children is not allowed. Change
+    /// the parents instead.
     Unsafe,
 }
 
@@ -37,11 +37,12 @@ pub enum Postcondition {
 /// existence is plausible with them present. This can be used to encode some
 /// context-free language components for incremental parsing.
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum Precondition {
+pub enum NeighbourRule {
     /// These nodes depend on being at the start of a line. Reparsing of safe
-    /// left neighbors has to check this invariant. Otherwise, this node is
-    /// safe. Additionally, the indentation of the first right non-trivia,
-    /// non-whitespace sibling must not be greater than the current indentation.
+    /// left neighbors has to check this invariant. Additionally, when
+    /// exchanging the right sibling or inserting such a node the indentation of
+    /// the first right non-trivia, non-whitespace sibling must not be greater
+    /// than the current indentation.
     AtStart,
     /// These nodes depend on not being at the start of a line. Reparsing of
     /// safe left neighbors has to check this invariant. Otherwise, this node is
@@ -77,8 +78,12 @@ impl<'a> Reparser<'a> {
 
 impl Reparser<'_> {
     /// Find the innermost child that is incremental safe.
-    pub fn reparse(&self, green: &mut GreenNode) -> Option<Range<usize>> {
-        self.reparse_step(green, 0, TokenMode::Markup, true)
+    pub fn reparse(&self, green: &mut Rc<GreenNode>) -> Range<usize> {
+        self.reparse_step(Rc::make_mut(green), 0, TokenMode::Markup, true)
+            .unwrap_or_else(|| {
+                *green = parse(self.src);
+                0 .. self.src.len()
+            })
     }
 
     fn reparse_step(
@@ -90,7 +95,7 @@ impl Reparser<'_> {
     ) -> Option<Range<usize>> {
         let mode = green.kind().mode().unwrap_or(parent_mode);
         let child_mode = green.kind().mode().unwrap_or(TokenMode::Code);
-        let child_count = green.children().len();
+        let original_count = green.children().len();
 
         // Save the current indent if this is a markup node.
         let indent = match green.kind() {
@@ -134,12 +139,14 @@ impl Reparser<'_> {
             // neighbor!
             if child_span.contains(&self.replace_range.end)
                 || self.replace_range.end == child_span.end
-                    && (mode != TokenMode::Markup || i + 1 == child_count)
+                    && (mode != TokenMode::Markup || i + 1 == original_count)
             {
-                outermost &= i + 1 == child_count;
+                outermost &= i + 1 == original_count;
                 last = Some((i, offset + child.len()));
                 break;
-            } else if mode != TokenMode::Markup || !child.kind().post().safe_in_markup() {
+            } else if mode != TokenMode::Markup
+                || !child.kind().succession_rule().safe_in_markup()
+            {
                 break;
             }
 
@@ -147,17 +154,17 @@ impl Reparser<'_> {
         }
 
         let (last_idx, last_end) = last?;
-        let children_range = first_idx .. last_idx + 1;
-        let children_span = first_start .. last_end;
+        let superseded_range = first_idx .. last_idx + 1;
+        let superseded_span = first_start .. last_end;
         let last_kind = green.children()[last_idx].kind().clone();
 
         // First, we try if the child itself has another, more specific
         // applicable child.
-        if children_range.len() == 1 {
-            let child = &mut green.children_mut()[children_range.start];
+        if superseded_range.len() == 1 {
+            let child = &mut green.children_mut()[superseded_range.start];
             let prev_len = child.len();
 
-            if last_kind.post() != Postcondition::Unsafe {
+            if last_kind.succession_rule() != SuccessionRule::Unsafe {
                 if let Some(range) = match child {
                     Green::Node(node) => self.reparse_step(
                         Rc::make_mut(node),
@@ -168,56 +175,64 @@ impl Reparser<'_> {
                     Green::Token(_) => None,
                 } {
                     let new_len = child.len();
-                    green.update_child_len(new_len, prev_len);
+                    green.update_parent(new_len, prev_len);
                     return Some(range);
                 }
             }
         }
 
         // We only replace multiple children in markup mode.
-        if children_range.len() > 1 && mode == TokenMode::Code {
+        if superseded_range.len() > 1 && mode == TokenMode::Code {
             return None;
         }
 
         // We now have a child that we can replace and a function to do so.
         let func = last_kind.reparsing_func(child_mode, indent)?;
-        let post = last_kind.post();
+        let succession = last_kind.succession_rule();
 
-        let mut column = if mode == TokenMode::Markup {
-            // In this case, we want to pass the indentation to the function.
-            Scanner::new(self.src).column(children_span.start)
-        } else {
-            0
-        };
+        let mut markup_min_column = 0;
 
         // If this is a markup node, we want to save its indent instead to pass
         // the right indent argument.
-        if children_range.len() == 1 {
-            let child = &mut green.children_mut()[children_range.start];
+        if superseded_range.len() == 1 {
+            let child = &mut green.children_mut()[superseded_range.start];
             if let NodeKind::Markup(n) = child.kind() {
-                column = *n;
+                markup_min_column = *n;
             }
         }
 
         // The span of the to-be-reparsed children in the new source.
-        let replace_span = children_span.start
+        let newborn_span = superseded_span.start
             ..
-            children_span.end + self.replace_len - self.replace_range.len();
+            superseded_span.end + self.replace_len - self.replace_range.len();
 
         // For atomic primaries we need to pass in the whole remaining string to
         // check whether the parser would eat more stuff illicitly.
-        let reparse_span = if post == Postcondition::AtomicPrimary {
-            replace_span.start .. self.src.len()
+        let reparse_span = if succession == SuccessionRule::AtomicPrimary {
+            newborn_span.start .. self.src.len()
         } else {
-            replace_span.clone()
+            newborn_span.clone()
         };
 
+        let mut prefix = "";
+        for (i, c) in self.src[.. reparse_span.start].char_indices().rev() {
+            if is_newline(c) {
+                break;
+            }
+            prefix = &self.src[i .. reparse_span.start];
+        }
+
         // Do the reparsing!
-        let (mut newborns, terminated) = func(&self.src[reparse_span], at_start, column)?;
+        let (mut newborns, terminated) = func(
+            &prefix,
+            &self.src[reparse_span.clone()],
+            at_start,
+            markup_min_column,
+        )?;
 
         // Make sure that atomic primaries ate only what they were supposed to.
-        if post == Postcondition::AtomicPrimary {
-            let len = replace_span.len();
+        if succession == SuccessionRule::AtomicPrimary {
+            let len = newborn_span.len();
             if newborns.len() > 1 && newborns[0].len() == len {
                 newborns.truncate(1);
             } else if newborns.iter().map(Green::len).sum::<usize>() != len {
@@ -234,16 +249,16 @@ impl Reparser<'_> {
         // If all post- and preconditions match, we are good to go!
         if validate(
             green.children(),
-            children_range.clone(),
+            superseded_range.clone(),
             at_start,
             &newborns,
             mode,
-            post,
-            replace_span.clone(),
+            succession,
+            newborn_span.clone(),
             self.src,
         ) {
-            green.replace_child_range(children_range, newborns);
-            Some(replace_span)
+            green.replace_children(superseded_range, newborns);
+            Some(newborn_span)
         } else {
             None
         }
@@ -252,27 +267,27 @@ impl Reparser<'_> {
 
 /// Validate that a node replacement is allowed by post- and preconditions.
 fn validate(
-    prev_children: &[Green],
-    children_range: Range<usize>,
+    superseded: &[Green],
+    superseded_range: Range<usize>,
     mut at_start: bool,
     newborns: &[Green],
     mode: TokenMode,
-    post: Postcondition,
-    replace_span: Range<usize>,
+    post: SuccessionRule,
+    newborn_span: Range<usize>,
     src: &str,
 ) -> bool {
     // Atomic primaries must only generate one new child.
-    if post == Postcondition::AtomicPrimary && newborns.len() != 1 {
+    if post == SuccessionRule::AtomicPrimary && newborns.len() != 1 {
         return false;
     }
 
     // Same kind in mode `inside` must generate only one child and that child
     // must be of the same kind as previously.
-    if let Postcondition::SameKind(inside) = post {
-        let prev_kind = prev_children[children_range.start].kind();
-        let prev_mode = prev_kind.mode().unwrap_or(mode);
-        if inside.map_or(true, |m| m == prev_mode)
-            && (newborns.len() != 1 || prev_kind != newborns[0].kind())
+    if let SuccessionRule::SameKind(inside) = post {
+        let superseded_kind = superseded[superseded_range.start].kind();
+        let superseded_mode = superseded_kind.mode().unwrap_or(mode);
+        if inside.map_or(true, |m| m == superseded_mode)
+            && (newborns.len() != 1 || superseded_kind != newborns[0].kind())
         {
             return false;
         }
@@ -286,15 +301,15 @@ fn validate(
     // Check if there are any `AtStart` predecessors which require a certain
     // indentation.
     let s = Scanner::new(src);
-    let mut prev_pos = replace_span.start;
-    for child in (&prev_children[.. children_range.start]).iter().rev() {
+    let mut prev_pos = newborn_span.start;
+    for child in (&superseded[.. superseded_range.start]).iter().rev() {
         prev_pos -= child.len();
         if !child.kind().is_trivia() {
-            if child.kind().pre() == Precondition::AtStart {
+            if child.kind().neighbour_rule() == NeighbourRule::AtStart {
                 let left_col = s.column(prev_pos);
 
                 // Search for the first non-trivia newborn.
-                let mut new_pos = replace_span.start;
+                let mut new_pos = newborn_span.start;
                 let mut child_col = None;
                 for child in newborns {
                     if !child.kind().is_trivia() {
@@ -323,15 +338,15 @@ fn validate(
 
     // Ensure that a possible at-start or not-at-start precondition of
     // a node after the replacement range is satisfied.
-    for child in &prev_children[children_range.end ..] {
-        if !child.kind().is_trivia() {
-            let pre = child.kind().pre();
-            if (pre == Precondition::AtStart && !at_start)
-                || (pre == Precondition::NotAtStart && at_start)
-            {
-                return false;
-            }
+    for child in &superseded[superseded_range.end ..] {
+        let neighbour_rule = child.kind().neighbour_rule();
+        if (neighbour_rule == NeighbourRule::AtStart && !at_start)
+            || (neighbour_rule == NeighbourRule::NotAtStart && at_start)
+        {
+            return false;
+        }
 
+        if !child.kind().is_trivia() {
             break;
         }
 
@@ -339,42 +354,40 @@ fn validate(
     }
 
     // Verify that the last of the newborns is not `NotAtEnd`.
-    if newborns
-        .last()
-        .map_or(false, |child| child.kind().pre() == Precondition::NotAtEnd)
-    {
+    if newborns.last().map_or(false, |child| {
+        child.kind().neighbour_rule() == NeighbourRule::NotAtEnd
+    }) {
         return false;
     }
 
     // We have to check whether the last non-trivia newborn is `AtStart` and
     // verify the indent of its right neighbors in order to make sure its
     // indentation requirements are fulfilled.
-    let mut child_pos = replace_span.end;
-    let mut child_col = None;
+    let mut child_pos = newborn_span.end;
     for child in newborns.iter().rev() {
         child_pos -= child.len();
 
-        if !child.kind().is_trivia() {
-            if child.kind().pre() == Precondition::AtStart {
-                child_col = Some(s.column(child_pos));
-            }
-            break;
+        if child.kind().is_trivia() {
+            continue;
         }
-    }
 
-    if let Some(child_col) = child_col {
-        let mut right_pos = replace_span.end;
-        for child in &prev_children[children_range.end ..] {
-            if !child.kind().is_trivia() {
+        if child.kind().neighbour_rule() == NeighbourRule::AtStart {
+            let child_col = s.column(child_pos);
+
+            let mut right_pos = newborn_span.end;
+            for child in &superseded[superseded_range.end ..] {
+                if child.kind().is_trivia() {
+                    right_pos += child.len();
+                    continue;
+                }
+
                 if s.column(right_pos) > child_col {
                     return false;
                 }
-
                 break;
             }
-
-            right_pos += child.len();
         }
+        break;
     }
 
     true
@@ -387,13 +400,15 @@ impl NodeKind {
         &self,
         parent_mode: TokenMode,
         indent: usize,
-    ) -> Option<fn(&str, bool, usize) -> Option<(Vec<Green>, bool)>> {
+    ) -> Option<fn(&str, &str, bool, usize) -> Option<(Vec<Green>, bool)>> {
         let mode = self.mode().unwrap_or(parent_mode);
-        match self.post() {
-            Postcondition::Unsafe | Postcondition::UnsafeLayer => None,
-            Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic),
-            Postcondition::AtomicPrimary => Some(parse_atomic_markup),
-            Postcondition::SameKind(x) if x == None || x == Some(mode) => match self {
+        match self.succession_rule() {
+            SuccessionRule::Unsafe | SuccessionRule::UnsafeLayer => None,
+            SuccessionRule::AtomicPrimary if mode == TokenMode::Code => {
+                Some(parse_atomic)
+            }
+            SuccessionRule::AtomicPrimary => Some(parse_atomic_markup),
+            SuccessionRule::SameKind(x) if x == None || x == Some(mode) => match self {
                 NodeKind::Markup(_) => Some(parse_markup),
                 NodeKind::Template => Some(parse_template),
                 NodeKind::Block => Some(parse_block),
@@ -409,7 +424,7 @@ impl NodeKind {
 
     /// Whether it is safe to do incremental parsing on this node. Never allow
     /// non-termination errors if this is not already the last leaf node.
-    pub fn post(&self) -> Postcondition {
+    pub fn succession_rule(&self) -> SuccessionRule {
         match self {
             // Replacing parenthesis changes if the expression is balanced and
             // is therefore not safe.
@@ -418,7 +433,7 @@ impl NodeKind {
             | Self::LeftBrace
             | Self::RightBrace
             | Self::LeftParen
-            | Self::RightParen => Postcondition::Unsafe,
+            | Self::RightParen => SuccessionRule::Unsafe,
 
             // Replacing an operator can change whether the parent is an
             // operation which makes it unsafe. The star can appear in markup.
@@ -445,7 +460,7 @@ impl NodeKind {
             | Self::Or
             | Self::With
             | Self::Dots
-            | Self::Arrow => Postcondition::Unsafe,
+            | Self::Arrow => SuccessionRule::Unsafe,
 
             // These keywords change what kind of expression the parent is and
             // how far the expression would go.
@@ -461,14 +476,14 @@ impl NodeKind {
             | Self::Return
             | Self::Import
             | Self::Include
-            | Self::From => Postcondition::Unsafe,
+            | Self::From => SuccessionRule::Unsafe,
 
             // Changing the heading level, enum numbering, or list bullet
             // changes the next layer.
-            Self::EnumNumbering(_) => Postcondition::Unsafe,
+            Self::EnumNumbering(_) => SuccessionRule::Unsafe,
 
             // This can be anything, so we don't make any promises.
-            Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe,
+            Self::Error(_, _) | Self::Unknown(_) => SuccessionRule::Unsafe,
 
             // These are complex expressions which may screw with their
             // environments.
@@ -477,33 +492,33 @@ impl NodeKind {
             | Self::Binary
             | Self::CallArgs
             | Self::Named
-            | Self::Spread => Postcondition::UnsafeLayer,
+            | Self::Spread => SuccessionRule::UnsafeLayer,
 
             // The closure is a bit magic with the let expression, and also it
             // is not atomic.
-            Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer,
+            Self::Closure | Self::ClosureParams => SuccessionRule::UnsafeLayer,
 
             // Missing these creates errors for the parents.
             Self::WithExpr | Self::ForPattern | Self::ImportItems => {
-                Postcondition::UnsafeLayer
+                SuccessionRule::UnsafeLayer
             }
 
             // Only markup is expected at the points where it does occur. The
             // indentation must be preserved as well, also for the children.
-            Self::Markup(_) => Postcondition::SameKind(None),
+            Self::Markup(_) => SuccessionRule::SameKind(None),
 
             // These can appear everywhere and must not change to other stuff
             // because that could change the outer expression.
-            Self::LineComment | Self::BlockComment => Postcondition::SameKind(None),
+            Self::LineComment | Self::BlockComment => SuccessionRule::SameKind(None),
 
             // These can appear as bodies and would trigger an error if they
             // became something else.
-            Self::Template => Postcondition::SameKind(None),
-            Self::Block => Postcondition::SameKind(Some(TokenMode::Code)),
+            Self::Template => SuccessionRule::SameKind(None),
+            Self::Block => SuccessionRule::SameKind(Some(TokenMode::Code)),
 
             // Whitespace in code mode has to remain whitespace or else the type
             // of things would change.
-            Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)),
+            Self::Space(_) => SuccessionRule::SameKind(Some(TokenMode::Code)),
 
             // These are expressions that can be replaced by other expressions.
             Self::Ident(_)
@@ -519,7 +534,7 @@ impl NodeKind {
             | Self::Dict
             | Self::Group
             | Self::None
-            | Self::Auto => Postcondition::AtomicPrimary,
+            | Self::Auto => SuccessionRule::AtomicPrimary,
 
             // More complex, but still an expression.
             Self::ForExpr
@@ -528,11 +543,11 @@ impl NodeKind {
             | Self::LetExpr
             | Self::SetExpr
             | Self::ImportExpr
-            | Self::IncludeExpr => Postcondition::AtomicPrimary,
+            | Self::IncludeExpr => SuccessionRule::AtomicPrimary,
 
             // This element always has to remain in the same column so better
             // reparse the whole parent.
-            Self::Raw(_) => Postcondition::Unsafe,
+            Self::Raw(_) => SuccessionRule::Unsafe,
 
             // These are all replaceable by other tokens.
             Self::Parbreak
@@ -548,22 +563,22 @@ impl NodeKind {
             | Self::Heading
             | Self::Enum
             | Self::List
-            | Self::Math(_) => Postcondition::Safe,
+            | Self::Math(_) => SuccessionRule::Safe,
         }
     }
 
     /// The appropriate precondition for the type.
-    pub fn pre(&self) -> Precondition {
+    pub fn neighbour_rule(&self) -> NeighbourRule {
         match self {
-            Self::Heading | Self::Enum | Self::List => Precondition::AtStart,
-            Self::TextInLine(_) => Precondition::NotAtStart,
-            Self::Error(_, _) => Precondition::NotAtEnd,
-            _ => Precondition::None,
+            Self::Heading | Self::Enum | Self::List => NeighbourRule::AtStart,
+            Self::TextInLine(_) => NeighbourRule::NotAtStart,
+            Self::Error(_, _) => NeighbourRule::NotAtEnd,
+            _ => NeighbourRule::None,
         }
     }
 }
 
-impl Postcondition {
+impl SuccessionRule {
     /// Whether a node with this condition can be reparsed in markup mode.
     pub fn safe_in_markup(&self) -> bool {
         match self {
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index f4826730..a9752645 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -29,72 +29,102 @@ pub fn parse(src: &str) -> Rc<GreenNode> {
 }
 
 /// Parse an atomic primary. Returns `Some` if all of the input was consumed.
-pub fn parse_atomic(src: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
-    let mut p = Parser::new(src, TokenMode::Code);
+pub fn parse_atomic(
+    prefix: &str,
+    src: &str,
+    _: bool,
+    _: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
     primary(&mut p, true).ok()?;
-    p.eject_partial()
+    p.consume_unterminated()
 }
 
 /// Parse an atomic primary. Returns `Some` if all of the input was consumed.
-pub fn parse_atomic_markup(src: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
-    let mut p = Parser::new(src, TokenMode::Markup);
+pub fn parse_atomic_markup(
+    prefix: &str,
+    src: &str,
+    _: bool,
+    _: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup);
     markup_expr(&mut p);
-    p.eject_partial()
+    p.consume_unterminated()
 }
 
 /// Parse some markup. Returns `Some` if all of the input was consumed.
-pub fn parse_markup(src: &str, _: bool, column: usize) -> Option<(Vec<Green>, bool)> {
-    let mut p = Parser::new(src, TokenMode::Markup);
-    if column == 0 {
+pub fn parse_markup(
+    prefix: &str,
+    src: &str,
+    _: bool,
+    min_column: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup);
+    if min_column == 0 {
         markup(&mut p);
     } else {
-        markup_indented(&mut p, column);
+        markup_indented(&mut p, min_column);
     }
-    p.eject()
+    p.consume()
 }
 
 /// Parse some markup without the topmost node. Returns `Some` if all of the
 /// input was consumed.
 pub fn parse_markup_elements(
+    prefix: &str,
     src: &str,
     mut at_start: bool,
-    column: usize,
+    _: usize,
 ) -> Option<(Vec<Green>, bool)> {
-    let mut p = Parser::new(src, TokenMode::Markup);
-    p.offset(column);
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup);
     while !p.eof() {
         markup_node(&mut p, &mut at_start);
     }
-    p.eject()
+    p.consume()
 }
 
 /// Parse a template literal. Returns `Some` if all of the input was consumed.
-pub fn parse_template(source: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
-    let mut p = Parser::new(source, TokenMode::Code);
+pub fn parse_template(
+    prefix: &str,
+    src: &str,
+    _: bool,
+    _: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
     if !p.at(&NodeKind::LeftBracket) {
         return None;
     }
 
     template(&mut p);
-    p.eject()
+    p.consume()
 }
 
 /// Parse a code block. Returns `Some` if all of the input was consumed.
-pub fn parse_block(source: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
-    let mut p = Parser::new(source, TokenMode::Code);
+pub fn parse_block(
+    prefix: &str,
+    src: &str,
+    _: bool,
+    _: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
     if !p.at(&NodeKind::LeftBrace) {
         return None;
     }
 
     block(&mut p);
-    p.eject()
+    p.consume()
 }
 
 /// Parse a comment. Returns `Some` if all of the input was consumed.
-pub fn parse_comment(source: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
-    let mut p = Parser::new(source, TokenMode::Code);
+pub fn parse_comment(
+    prefix: &str,
+    src: &str,
+    _: bool,
+    _: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
     comment(&mut p).ok()?;
-    p.eject()
+    p.consume()
 }
 
 /// Parse markup.
@@ -111,7 +141,7 @@ fn markup_indented(p: &mut Parser, column: usize) {
     });
 
     markup_while(p, false, column, &mut |p| match p.peek() {
-        Some(NodeKind::Space(n)) if *n >= 1 => p.clean_column(p.current_end()) >= column,
+        Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column,
         _ => true,
     })
 }
@@ -170,14 +200,9 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
             p.eat();
         }
 
-        NodeKind::Eq if *at_start => heading(p),
-        NodeKind::Minus if *at_start => list_node(p),
-        NodeKind::EnumNumbering(_) if *at_start => enum_node(p),
-
-        // Line-based markup that is not currently at the start of the line.
-        NodeKind::Eq | NodeKind::Minus | NodeKind::EnumNumbering(_) => {
-            p.convert(NodeKind::TextInLine(p.peek_src().into()))
-        }
+        NodeKind::Eq => heading(p, *at_start),
+        NodeKind::Minus => list_node(p, *at_start),
+        NodeKind::EnumNumbering(_) => enum_node(p, *at_start),
 
         // Hashtag + keyword / identifier.
         NodeKind::Ident(_)
@@ -201,42 +226,49 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
 }
 
 /// Parse a heading.
-fn heading(p: &mut Parser) {
-    p.perform(NodeKind::Heading, |p| {
-        p.eat_assert(&NodeKind::Eq);
-        while p.eat_if(&NodeKind::Eq) {}
+fn heading(p: &mut Parser, at_start: bool) {
+    let marker = p.marker();
+    let current_start = p.current_start();
+    p.eat_assert(&NodeKind::Eq);
+    while p.eat_if(&NodeKind::Eq) {}
+
+    if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) {
         let column = p.column(p.prev_end());
         markup_indented(p, column);
-    });
+        marker.end(p, NodeKind::Heading);
+    } else {
+        let text = p.get(current_start .. p.prev_end()).into();
+        marker.convert(p, NodeKind::TextInLine(text));
+    }
 }
 
 /// Parse a single list item.
-fn list_node(p: &mut Parser) {
+fn list_node(p: &mut Parser, at_start: bool) {
     let marker = p.marker();
-    let src: EcoString = p.peek_src().into();
+    let text: EcoString = p.peek_src().into();
     p.eat_assert(&NodeKind::Minus);
 
-    if p.peek().map_or(true, |kind| kind.is_whitespace()) {
+    if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) {
         let column = p.column(p.prev_end());
         markup_indented(p, column);
         marker.end(p, NodeKind::List);
     } else {
-        marker.convert(p, NodeKind::TextInLine(src));
+        marker.convert(p, NodeKind::TextInLine(text));
     }
 }
 
 /// Parse a single enum item.
-fn enum_node(p: &mut Parser) {
+fn enum_node(p: &mut Parser, at_start: bool) {
     let marker = p.marker();
-    let src: EcoString = p.peek_src().into();
+    let text: EcoString = p.peek_src().into();
     p.eat();
 
-    if p.peek().map_or(true, |kind| kind.is_whitespace()) {
+    if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) {
         let column = p.column(p.prev_end());
         markup_indented(p, column);
         marker.end(p, NodeKind::Enum);
     } else {
-        marker.convert(p, NodeKind::TextInLine(src));
+        marker.convert(p, NodeKind::TextInLine(text));
     }
 }
 
@@ -582,23 +614,18 @@ fn template(p: &mut Parser) {
 fn block(p: &mut Parser) {
     p.perform(NodeKind::Block, |p| {
         p.start_group(Group::Brace);
-        expr_list(p);
-        p.end_group();
-    });
-}
+        while !p.eof() {
+            p.start_group(Group::Stmt);
+            if expr(p).is_ok() && !p.eof() {
+                p.expected_at("semicolon or line break");
+            }
+            p.end_group();
 
-/// Parse a number of code expressions.
-fn expr_list(p: &mut Parser) {
-    while !p.eof() {
-        p.start_group(Group::Stmt);
-        if expr(p).is_ok() && !p.eof() {
-            p.expected_at("semicolon or line break");
+            // Forcefully skip over newlines since the group's contents can't.
+            p.eat_while(|t| matches!(t, NodeKind::Space(_)));
         }
         p.end_group();
-
-        // Forcefully skip over newlines since the group's contents can't.
-        p.eat_while(|t| matches!(t, NodeKind::Space(_)));
-    }
+    });
 }
 
 /// Parse a function call.
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index f36155d5..4e5b277d 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -1,7 +1,8 @@
+use core::slice::SliceIndex;
 use std::fmt::{self, Display, Formatter};
 use std::mem;
 
-use super::{TokenMode, Tokens};
+use super::{Scanner, TokenMode, Tokens};
 use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind};
 use crate::util::EcoString;
 
@@ -24,8 +25,7 @@ pub struct Parser<'s> {
     /// Is `Some` if there is an unterminated group at the last position where
     /// groups were terminated.
     last_unterminated: Option<usize>,
-    /// Offset the indentation. This can be used if the parser is processing a
-    /// subslice of the source and there was leading indent.
+    /// Offsets the indentation on the first line of the source.
     column_offset: usize,
 }
 
@@ -47,18 +47,31 @@ impl<'s> Parser<'s> {
         }
     }
 
+    /// Create a new parser for the source string that is prefixed by some text
+    /// that does not need to be parsed but taken into account for column
+    /// calculation.
+    pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self {
+        let mut p = Self::new(src, mode);
+        p.column_offset = Scanner::new(prefix).column(prefix.len());
+        p
+    }
+
     /// End the parsing process and return the last child.
     pub fn finish(self) -> Vec<Green> {
         self.children
     }
 
-    /// End the parsing process and return multiple children.
-    pub fn eject(self) -> Option<(Vec<Green>, bool)> {
-        if self.eof() && self.group_success() {
-            Some((self.children, self.tokens.was_terminated()))
-        } else {
-            None
-        }
+    /// End the parsing process and return multiple children and whether the
+    /// last token was terminated.
+    pub fn consume(self) -> Option<(Vec<Green>, bool)> {
+        (self.eof() && self.terminated())
+            .then(|| (self.children, self.tokens.terminated()))
+    }
+
+    /// End the parsing process and return multiple children and whether the
+    /// last token was terminated, even if there remains stuff in the string.
+    pub fn consume_unterminated(self) -> Option<(Vec<Green>, bool)> {
+        self.terminated().then(|| (self.children, self.tokens.terminated()))
     }
 
     /// Create a new marker.
@@ -100,18 +113,6 @@ impl<'s> Parser<'s> {
         output
     }
 
-    /// End the parsing process and return multiple children, even if there
-    /// remains stuff in the string.
-    pub fn eject_partial(self) -> Option<(Vec<Green>, bool)> {
-        self.group_success()
-            .then(|| (self.children, self.tokens.was_terminated()))
-    }
-
-    /// Set an indentation offset.
-    pub fn offset(&mut self, columns: usize) {
-        self.column_offset = columns;
-    }
-
     /// Whether the end of the source string or group is reached.
     pub fn eof(&self) -> bool {
         self.eof
@@ -199,6 +200,14 @@ impl<'s> Parser<'s> {
         self.tokens.scanner().get(self.current_start() .. self.current_end())
     }
 
+    /// Obtain a range of the source code.
+    pub fn get<I>(&self, index: I) -> &'s str
+    where
+        I: SliceIndex<str, Output = str>,
+    {
+        self.tokens.scanner().get(index)
+    }
+
     /// The byte index at which the last non-trivia token ended.
     pub fn prev_end(&self) -> usize {
         self.prev_end
@@ -216,13 +225,7 @@ impl<'s> Parser<'s> {
 
     /// Determine the column index for the given byte index.
     pub fn column(&self, index: usize) -> usize {
-        self.tokens.scanner().column(index) + self.column_offset
-    }
-
-    /// Determine the column index for the given byte index while ignoring the
-    /// offset.
-    pub fn clean_column(&self, index: usize) -> usize {
-        self.tokens.scanner().column(index)
+        self.tokens.scanner().column_offset(index, self.column_offset)
     }
 
     /// Continue parsing in a group.
@@ -260,10 +263,8 @@ impl<'s> Parser<'s> {
         let group = self.groups.pop().expect("no started group");
         self.tokens.set_mode(group.prev_mode);
         self.repeek();
-        if let Some(n) = self.last_unterminated {
-            if n != self.prev_end() {
-                self.last_unterminated = None;
-            }
+        if self.last_unterminated != Some(self.prev_end()) {
+            self.last_unterminated = None;
         }
 
         let mut rescan = self.tokens.mode() != group_mode;
@@ -301,23 +302,15 @@ impl<'s> Parser<'s> {
         }
     }
 
-    /// Check if the group processing was successfully terminated.
-    pub fn group_success(&self) -> bool {
-        self.last_unterminated.is_none() && self.groups.is_empty()
+    /// Checks if all groups were correctly terminated.
+    pub fn terminated(&self) -> bool {
+        self.groups.is_empty() && self.last_unterminated.is_none()
     }
 
     /// Low-level bump that consumes exactly one token without special trivia
     /// handling.
     fn bump(&mut self) {
         let kind = self.current.take().unwrap();
-        if match kind {
-            NodeKind::Space(n) if n > 0 => true,
-            NodeKind::Parbreak => true,
-            _ => false,
-        } {
-            self.column_offset = 0;
-        }
-
         let len = self.tokens.index() - self.current_start;
         self.children.push(GreenData::new(kind, len).into());
         self.current_start = self.tokens.index();
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
index c735be40..6db89132 100644
--- a/src/parse/scanner.rs
+++ b/src/parse/scanner.rs
@@ -162,11 +162,26 @@ impl<'s> Scanner<'s> {
     /// The column index of a given index in the source string.
     #[inline]
     pub fn column(&self, index: usize) -> usize {
-        self.src[.. index]
-            .chars()
+        self.column_offset(index, 0)
+    }
+
+    /// The column index of a given index in the source string when an offset is
+    /// applied to the first line of the string.
+    #[inline]
+    pub fn column_offset(&self, index: usize, offset: usize) -> usize {
+        let mut apply_offset = false;
+        let res = self.src[.. index]
+            .char_indices()
             .rev()
-            .take_while(|&c| !is_newline(c))
-            .count()
+            .take_while(|&(_, c)| !is_newline(c))
+            .inspect(|&(i, _)| {
+                if i == 0 {
+                    apply_offset = true
+                }
+            })
+            .count();
+
+        if apply_offset { res + offset } else { res }
     }
 }
 
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 7dfca2bf..69c4d2de 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -13,7 +13,7 @@ use crate::util::EcoString;
 pub struct Tokens<'s> {
     s: Scanner<'s>,
     mode: TokenMode,
-    was_terminated: bool,
+    terminated: bool,
 }
 
 /// What kind of tokens to emit.
@@ -32,7 +32,7 @@ impl<'s> Tokens<'s> {
         Self {
             s: Scanner::new(src),
             mode,
-            was_terminated: true,
+            terminated: true,
         }
     }
 
@@ -71,8 +71,8 @@ impl<'s> Tokens<'s> {
 
     /// Whether the last token was terminated.
     #[inline]
-    pub fn was_terminated(&self) -> bool {
-        self.was_terminated
+    pub fn terminated(&self) -> bool {
+        self.terminated
     }
 }
 
@@ -128,9 +128,7 @@ impl<'s> Tokens<'s> {
             '`' => self.raw(),
             '$' => self.math(),
             '-' => self.hyph(),
-            '=' if self.s.check_or(true, |c| c == '=' || c.is_whitespace()) => {
-                NodeKind::Eq
-            }
+            '=' => NodeKind::Eq,
             c if c == '.' || c.is_ascii_digit() => self.numbering(start, c),
 
             // Plain text.
@@ -259,7 +257,7 @@ impl<'s> Tokens<'s> {
                             )
                         }
                     } else {
-                        self.was_terminated = false;
+                        self.terminated = false;
                         NodeKind::Error(
                             ErrorPos::End,
                             "expected closing brace".into(),
@@ -352,7 +350,7 @@ impl<'s> Tokens<'s> {
             let remaining = backticks - found;
             let noun = if remaining == 1 { "backtick" } else { "backticks" };
 
-            self.was_terminated = false;
+            self.terminated = false;
             NodeKind::Error(
                 ErrorPos::End,
                 if found == 0 {
@@ -400,7 +398,7 @@ impl<'s> Tokens<'s> {
                 display,
             }))
         } else {
-            self.was_terminated = false;
+            self.terminated = false;
             NodeKind::Error(
                 ErrorPos::End,
                 if !display || (!escaped && dollar) {
@@ -489,7 +487,7 @@ impl<'s> Tokens<'s> {
         if self.s.eat_if('"') {
             NodeKind::Str(string)
         } else {
-            self.was_terminated = false;
+            self.terminated = false;
             NodeKind::Error(ErrorPos::End, "expected quote".into())
         }
     }
@@ -497,7 +495,7 @@ impl<'s> Tokens<'s> {
     fn line_comment(&mut self) -> NodeKind {
         self.s.eat_until(is_newline);
         if self.s.peek().is_none() {
-            self.was_terminated = false;
+            self.terminated = false;
         }
         NodeKind::LineComment
     }
@@ -505,7 +503,7 @@ impl<'s> Tokens<'s> {
     fn block_comment(&mut self) -> NodeKind {
         let mut state = '_';
         let mut depth = 1;
-        self.was_terminated = false;
+        self.terminated = false;
 
         // Find the first `*/` that does not correspond to a nested `/*`.
         while let Some(c) = self.s.eat() {
@@ -513,7 +511,7 @@ impl<'s> Tokens<'s> {
                 ('*', '/') => {
                     depth -= 1;
                     if depth == 0 {
-                        self.was_terminated = true;
+                        self.terminated = true;
                         break;
                     }
                     '_'
@@ -742,7 +740,7 @@ mod tests {
         // Test code symbols in text.
         t!(Markup[" /"]: "a():\"b" => Text("a():\"b"));
         t!(Markup[" /"]: ";:,|/+"  => Text(";:,|"), Text("/+"));
-        t!(Markup[" /"]: "=-a"     => Text("="), Minus, Text("a"));
+        t!(Markup[" /"]: "=-a"     => Eq, Minus, Text("a"));
         t!(Markup[" "]: "#123"     => Text("#"), Text("123"));
 
         // Test text ends.
-- 
cgit v1.2.3