From eba7fc34effbec3bcc6d5c40d831b1e15af77c4d Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Sat, 6 Nov 2021 16:07:21 +0100
Subject: Incremental-safety based approach

---
 src/parse/parser.rs | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'src/parse/parser.rs')
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index af8a7c5c..f391c473 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -21,6 +21,8 @@ pub struct Parser<'s> {
     groups: Vec<GroupEntry>,
     /// The children of the currently built node.
     children: Vec<Green>,
+    /// Whether the last group was terminated.
+    last_group_terminated: bool,
 }
 
 impl<'s> Parser<'s> {
@@ -36,6 +38,7 @@ impl<'s> Parser<'s> {
             current_start: 0,
             groups: vec![],
             children: vec![],
+            last_group_terminated: true,
         }
     }
 
@@ -44,6 +47,15 @@ impl<'s> Parser<'s> {
         self.children
     }
 
+    /// End the parsing process and return multiple children.
+    pub fn eject(self) -> Option<Vec<Green>> {
+        if self.eof() && self.group_success() {
+            Some(self.children)
+        } else {
+            None
+        }
+    }
+
     /// Create a new marker.
     pub fn marker(&mut self) -> Marker {
         Marker(self.children.len())
@@ -190,6 +202,11 @@ impl<'s> Parser<'s> {
         self.tokens.scanner().column(index)
     }
 
+    /// Set the tokenizer's mode.
+    pub fn set_mode(&mut self, mode: TokenMode) {
+        self.tokens.set_mode(mode);
+    }
+
     /// Continue parsing in a group.
     ///
     /// When the end delimiter of the group is reached, all subsequent calls to
@@ -225,6 +242,7 @@ impl<'s> Parser<'s> {
         let group = self.groups.pop().expect("no started group");
         self.tokens.set_mode(group.prev_mode);
         self.repeek();
+        self.last_group_terminated = true;
 
         let mut rescan = self.tokens.mode() != group_mode;
 
@@ -243,6 +261,7 @@ impl<'s> Parser<'s> {
                 rescan = false;
             } else if required {
                 self.push_error(format_eco!("expected {}", end));
+                self.last_group_terminated = false;
             }
         }
 
@@ -260,6 +279,11 @@ impl<'s> Parser<'s> {
         }
     }
 
+    /// Check if the group processing was successfully terminated.
+    pub fn group_success(&self) -> bool {
+        self.last_group_terminated && self.groups.is_empty()
+    }
+
     /// Low-level bump that consumes exactly one token without special trivia
     /// handling.
     fn bump(&mut self) {
-- 
cgit v1.2.3


From 0663758fbb42651a08bfcd46c27b5cdeab90fb75 Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Sun, 7 Nov 2021 19:43:01 +0100
Subject: Tests

- length updates
- dealing with keywords and comments
---
 src/parse/parser.rs | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'src/parse/parser.rs')

diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index f391c473..451e18f1 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -27,8 +27,8 @@ pub struct Parser<'s> {
 
 impl<'s> Parser<'s> {
     /// Create a new parser for the source string.
-    pub fn new(src: &'s str) -> Self {
-        let mut tokens = Tokens::new(src, TokenMode::Markup);
+    pub fn new(src: &'s str, mode: TokenMode) -> Self {
+        let mut tokens = Tokens::new(src, mode);
         let current = tokens.next();
         Self {
             tokens,
@@ -202,11 +202,6 @@ impl<'s> Parser<'s> {
         self.tokens.scanner().column(index)
     }
 
-    /// Set the tokenizer's mode.
-    pub fn set_mode(&mut self, mode: TokenMode) {
-        self.tokens.set_mode(mode);
-    }
-
     /// Continue parsing in a group.
     ///
     /// When the end delimiter of the group is reached, all subsequent calls to
-- 
cgit v1.2.3


From 9141cba6a9db6ae3106e39d92508cb91c390049b Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Mon, 8 Nov 2021 12:01:35 +0100
Subject: Deal with the effects of keywords

---
 src/parse/parser.rs | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'src/parse/parser.rs')

diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 451e18f1..31c918a8 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -95,6 +95,12 @@ impl<'s> Parser<'s> {
         output
     }
 
+    /// End the parsing process and return multiple children, even if there
+    /// remains stuff in the string.
+    pub fn eject_partial(self) -> Option<Vec<Green>> {
+        self.group_success().then(|| self.children)
+    }
+
     /// Whether the end of the source string or group is reached.
     pub fn eof(&self) -> bool {
         self.eof
-- 
cgit v1.2.3


From 3162c6a83a910f34d6ed7e966c11b7e7b5bd4088 Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Wed, 10 Nov 2021 20:41:10 +0100
Subject: Comments and neighbors

---
 src/parse/parser.rs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'src/parse/parser.rs')

diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 31c918a8..a37cb9c6 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -48,9 +48,9 @@ impl<'s> Parser<'s> {
     }
 
     /// End the parsing process and return multiple children.
-    pub fn eject(self) -> Option<Vec<Green>> {
+    pub fn eject(self) -> Option<(Vec<Green>, bool)>{
         if self.eof() && self.group_success() {
-            Some(self.children)
+            Some((self.children, self.tokens.was_unterminated()))
         } else {
             None
         }
@@ -97,8 +97,9 @@ impl<'s> Parser<'s> {
 
     /// End the parsing process and return multiple children, even if there
     /// remains stuff in the string.
-    pub fn eject_partial(self) -> Option<Vec<Green>> {
-        self.group_success().then(|| self.children)
+    pub fn eject_partial(self) -> Option<(Vec<Green>, bool)> {
+        self.group_success()
+            .then(|| (self.children, self.tokens.was_unterminated()))
     }
 
     /// Whether the end of the source string or group is reached.
-- 
cgit v1.2.3


From fdb9d0743d73c278136b9254286fdc4be71c42a5 Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Thu, 18 Nov 2021 16:21:45 +0100
Subject: Refactoring and bugfixes

---
 src/parse/parser.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/parse/parser.rs')

diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index a37cb9c6..06cb1578 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -48,7 +48,7 @@ impl<'s> Parser<'s> {
     }
 
     /// End the parsing process and return multiple children.
-    pub fn eject(self) -> Option<(Vec<Green>, bool)>{
+    pub fn eject(self) -> Option<(Vec<Green>, bool)> {
         if self.eof() && self.group_success() {
             Some((self.children, self.tokens.was_unterminated()))
         } else {
-- 
cgit v1.2.3


From e05eb5fda5d1dfeef168b6fc071b20fdbcce2dcd Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Sun, 28 Nov 2021 18:18:45 +0100
Subject: Code Review: Parser, I can't let you do this

---
 src/parse/parser.rs | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'src/parse/parser.rs')

diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 06cb1578..ade9b5df 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -22,7 +22,7 @@ pub struct Parser<'s> {
     /// The children of the currently built node.
     children: Vec<Green>,
     /// Whether the last group was terminated.
-    last_group_terminated: bool,
+    last_terminated: bool,
 }
 
 impl<'s> Parser<'s> {
@@ -38,7 +38,7 @@ impl<'s> Parser<'s> {
             current_start: 0,
             groups: vec![],
             children: vec![],
-            last_group_terminated: true,
+            last_terminated: true,
         }
     }
 
@@ -50,7 +50,7 @@ impl<'s> Parser<'s> {
     /// End the parsing process and return multiple children.
     pub fn eject(self) -> Option<(Vec<Green>, bool)> {
         if self.eof() && self.group_success() {
-            Some((self.children, self.tokens.was_unterminated()))
+            Some((self.children, self.tokens.was_terminated()))
         } else {
             None
         }
@@ -99,7 +99,7 @@ impl<'s> Parser<'s> {
     /// remains stuff in the string.
     pub fn eject_partial(self) -> Option<(Vec<Green>, bool)> {
         self.group_success()
-            .then(|| (self.children, self.tokens.was_unterminated()))
+            .then(|| (self.children, self.tokens.was_terminated()))
     }
 
     /// Whether the end of the source string or group is reached.
@@ -244,7 +244,7 @@ impl<'s> Parser<'s> {
         let group = self.groups.pop().expect("no started group");
         self.tokens.set_mode(group.prev_mode);
         self.repeek();
-        self.last_group_terminated = true;
+        self.last_terminated = true;
 
         let mut rescan = self.tokens.mode() != group_mode;
 
@@ -263,7 +263,7 @@ impl<'s> Parser<'s> {
                 rescan = false;
             } else if required {
                 self.push_error(format_eco!("expected {}", end));
-                self.last_group_terminated = false;
+                self.last_terminated = false;
             }
         }
 
@@ -283,7 +283,7 @@ impl<'s> Parser<'s> {
 
     /// Check if the group processing was successfully terminated.
     pub fn group_success(&self) -> bool {
-        self.last_group_terminated && self.groups.is_empty()
+        self.last_terminated && self.groups.is_empty()
     }
 
     /// Low-level bump that consumes exactly one token without special trivia
-- 
cgit v1.2.3


From 5f114e18eb76a1937941b2ea64842b908c9ad89e Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Sun, 2 Jan 2022 00:46:19 +0100
Subject: Added a test framework for incremental parsing

Fix several errors:

- Indented markup is now reparsed right
- All end group errors will now fail a reparse
- Rightmost errors will always fail a reparse
---
 src/parse/parser.rs | 54 +++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 48 insertions(+), 6 deletions(-)

(limited to 'src/parse/parser.rs')

diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index ade9b5df..b31f69d3 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -21,8 +21,12 @@ pub struct Parser<'s> {
     groups: Vec<GroupEntry>,
     /// The children of the currently built node.
     children: Vec<Green>,
-    /// Whether the last group was terminated.
-    last_terminated: bool,
+    /// Is `Some` if there is an unterminated group at the last position where
+    /// groups were terminated.
+    last_unterminated: Option<usize>,
+    /// Offset the indentation. This can be used if the parser is processing a
+    /// subslice of the source and there was leading indent.
+    column_offset: usize,
 }
 
 impl<'s> Parser<'s> {
@@ -38,7 +42,8 @@ impl<'s> Parser<'s> {
             current_start: 0,
             groups: vec![],
             children: vec![],
-            last_terminated: true,
+            last_unterminated: None,
+            column_offset: 0,
         }
     }
 
@@ -102,6 +107,11 @@ impl<'s> Parser<'s> {
             .then(|| (self.children, self.tokens.was_terminated()))
     }
 
+    /// Set an indentation offset.
+    pub fn offset(&mut self, columns: usize) {
+        self.column_offset = columns;
+    }
+
     /// Whether the end of the source string or group is reached.
     pub fn eof(&self) -> bool {
         self.eof
@@ -206,6 +216,12 @@ impl<'s> Parser<'s> {
 
     /// Determine the column index for the given byte index.
     pub fn column(&self, index: usize) -> usize {
+        self.tokens.scanner().column(index) + self.column_offset
+    }
+
+    /// Determine the column index for the given byte index while ignoring the
+    /// offset.
+    pub fn clean_column(&self, index: usize) -> usize {
         self.tokens.scanner().column(index)
     }
 
@@ -244,7 +260,11 @@ impl<'s> Parser<'s> {
         let group = self.groups.pop().expect("no started group");
         self.tokens.set_mode(group.prev_mode);
         self.repeek();
-        self.last_terminated = true;
+        if let Some(n) = self.last_unterminated {
+            if n != self.prev_end() {
+                self.last_unterminated = None;
+            }
+        }
 
         let mut rescan = self.tokens.mode() != group_mode;
 
@@ -262,8 +282,14 @@ impl<'s> Parser<'s> {
                 self.eat();
                 rescan = false;
             } else if required {
+                // FIXME The error has to be inserted before any space rolls
+                // around because the rescan will set the cursor back in front
+                // of the space and reconsume it. Supressing the rescan is not
+                // an option since additional rescans (e.g. for statements) can
+                // be triggered directly afterwards, without processing any
+                // other token.
                 self.push_error(format_eco!("expected {}", end));
-                self.last_terminated = false;
+                self.last_unterminated = Some(self.prev_end());
             }
         }
 
@@ -283,13 +309,21 @@ impl<'s> Parser<'s> {
 
     /// Check if the group processing was successfully terminated.
     pub fn group_success(&self) -> bool {
-        self.last_terminated && self.groups.is_empty()
+        self.last_unterminated.is_none() && self.groups.is_empty()
     }
 
     /// Low-level bump that consumes exactly one token without special trivia
     /// handling.
     fn bump(&mut self) {
         let kind = self.current.take().unwrap();
+        if match kind {
+            NodeKind::Space(n) if n > 0 => true,
+            NodeKind::Parbreak => true,
+            _ => false,
+        } {
+            self.column_offset = 0;
+        }
+
         let len = self.tokens.index() - self.current_start;
         self.children.push(GreenData::new(kind, len).into());
         self.current_start = self.tokens.index();
@@ -346,6 +380,13 @@ impl Parser<'_> {
     /// Push an error into the children list.
     pub fn push_error(&mut self, msg: impl Into<EcoString>) {
         let error = NodeKind::Error(ErrorPos::Full, msg.into());
+        for i in (0 .. self.children.len()).rev() {
+            if Self::is_trivia_ext(self.children[i].kind(), false) {
+                self.children.remove(i);
+            } else {
+                break;
+            }
+        }
         self.children.push(GreenData::new(error, 0).into());
     }
 
@@ -445,6 +486,7 @@ impl Marker {
 }
 
 /// A logical group of tokens, e.g. `[...]`.
+#[derive(Debug)]
 struct GroupEntry {
     /// The kind of group this is. This decides which tokens will end the group.
     /// For example, a [`Group::Paren`] will be ended by
-- 
cgit v1.2.3


From 98c96ba1cb8a46e327de313118e4ce1a84795ae9 Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Sun, 2 Jan 2022 14:46:08 +0100
Subject: Fix parser / space / error bug

---
 src/parse/parser.rs | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

(limited to 'src/parse/parser.rs')

diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index b31f69d3..f36155d5 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -282,12 +282,6 @@ impl<'s> Parser<'s> {
                 self.eat();
                 rescan = false;
             } else if required {
-                // FIXME The error has to be inserted before any space rolls
-                // around because the rescan will set the cursor back in front
-                // of the space and reconsume it. Supressing the rescan is not
-                // an option since additional rescans (e.g. for statements) can
-                // be triggered directly afterwards, without processing any
-                // other token.
                 self.push_error(format_eco!("expected {}", end));
                 self.last_unterminated = Some(self.prev_end());
             }
@@ -380,14 +374,8 @@ impl Parser<'_> {
     /// Push an error into the children list.
     pub fn push_error(&mut self, msg: impl Into<EcoString>) {
         let error = NodeKind::Error(ErrorPos::Full, msg.into());
-        for i in (0 .. self.children.len()).rev() {
-            if Self::is_trivia_ext(self.children[i].kind(), false) {
-                self.children.remove(i);
-            } else {
-                break;
-            }
-        }
-        self.children.push(GreenData::new(error, 0).into());
+        let idx = self.trivia_start();
+        self.children.insert(idx.0, GreenData::new(error, 0).into());
     }
 
     /// Eat the current token and add an error that it is unexpected.
-- 
cgit v1.2.3


From c994cfa7d814e3909682b19322867ed5c676c453 Mon Sep 17 00:00:00 2001
From: Martin Haug <mhaug@live.de>
Date: Mon, 3 Jan 2022 23:18:21 +0100
Subject: Code Review: Your parsers were so preoccupied with whether they could

---
 src/parse/parser.rs | 81 ++++++++++++++++++++++++-----------------------------
 1 file changed, 37 insertions(+), 44 deletions(-)

(limited to 'src/parse/parser.rs')

diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index f36155d5..4e5b277d 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -1,7 +1,8 @@
+use core::slice::SliceIndex;
 use std::fmt::{self, Display, Formatter};
 use std::mem;
 
-use super::{TokenMode, Tokens};
+use super::{Scanner, TokenMode, Tokens};
 use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind};
 use crate::util::EcoString;
 
@@ -24,8 +25,7 @@ pub struct Parser<'s> {
     /// Is `Some` if there is an unterminated group at the last position where
     /// groups were terminated.
     last_unterminated: Option<usize>,
-    /// Offset the indentation. This can be used if the parser is processing a
-    /// subslice of the source and there was leading indent.
+    /// Offsets the indentation on the first line of the source.
     column_offset: usize,
 }
 
@@ -47,18 +47,31 @@ impl<'s> Parser<'s> {
         }
     }
 
+    /// Create a new parser for the source string that is prefixed by some text
+    /// that does not need to be parsed but taken into account for column
+    /// calculation.
+    pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self {
+        let mut p = Self::new(src, mode);
+        p.column_offset = Scanner::new(prefix).column(prefix.len());
+        p
+    }
+
     /// End the parsing process and return the last child.
     pub fn finish(self) -> Vec<Green> {
         self.children
     }
 
-    /// End the parsing process and return multiple children.
-    pub fn eject(self) -> Option<(Vec<Green>, bool)> {
-        if self.eof() && self.group_success() {
-            Some((self.children, self.tokens.was_terminated()))
-        } else {
-            None
-        }
+    /// End the parsing process and return multiple children and whether the
+    /// last token was terminated.
+    pub fn consume(self) -> Option<(Vec<Green>, bool)> {
+        (self.eof() && self.terminated())
+            .then(|| (self.children, self.tokens.terminated()))
+    }
+
+    /// End the parsing process and return multiple children and whether the
+    /// last token was terminated, even if there remains stuff in the string.
+    pub fn consume_unterminated(self) -> Option<(Vec<Green>, bool)> {
+        self.terminated().then(|| (self.children, self.tokens.terminated()))
     }
 
     /// Create a new marker.
@@ -100,18 +113,6 @@ impl<'s> Parser<'s> {
         output
     }
 
-    /// End the parsing process and return multiple children, even if there
-    /// remains stuff in the string.
-    pub fn eject_partial(self) -> Option<(Vec<Green>, bool)> {
-        self.group_success()
-            .then(|| (self.children, self.tokens.was_terminated()))
-    }
-
-    /// Set an indentation offset.
-    pub fn offset(&mut self, columns: usize) {
-        self.column_offset = columns;
-    }
-
     /// Whether the end of the source string or group is reached.
     pub fn eof(&self) -> bool {
         self.eof
@@ -199,6 +200,14 @@ impl<'s> Parser<'s> {
         self.tokens.scanner().get(self.current_start() .. self.current_end())
     }
 
+    /// Obtain a range of the source code.
+    pub fn get<I>(&self, index: I) -> &'s str
+    where
+        I: SliceIndex<str, Output = str>,
+    {
+        self.tokens.scanner().get(index)
+    }
+
     /// The byte index at which the last non-trivia token ended.
     pub fn prev_end(&self) -> usize {
         self.prev_end
@@ -216,13 +225,7 @@ impl<'s> Parser<'s> {
 
     /// Determine the column index for the given byte index.
     pub fn column(&self, index: usize) -> usize {
-        self.tokens.scanner().column(index) + self.column_offset
-    }
-
-    /// Determine the column index for the given byte index while ignoring the
-    /// offset.
-    pub fn clean_column(&self, index: usize) -> usize {
-        self.tokens.scanner().column(index)
+        self.tokens.scanner().column_offset(index, self.column_offset)
     }
 
     /// Continue parsing in a group.
@@ -260,10 +263,8 @@ impl<'s> Parser<'s> {
         let group = self.groups.pop().expect("no started group");
         self.tokens.set_mode(group.prev_mode);
         self.repeek();
-        if let Some(n) = self.last_unterminated {
-            if n != self.prev_end() {
-                self.last_unterminated = None;
-            }
+        if self.last_unterminated != Some(self.prev_end()) {
+            self.last_unterminated = None;
         }
 
         let mut rescan = self.tokens.mode() != group_mode;
@@ -301,23 +302,15 @@ impl<'s> Parser<'s> {
         }
     }
 
-    /// Check if the group processing was successfully terminated.
-    pub fn group_success(&self) -> bool {
-        self.last_unterminated.is_none() && self.groups.is_empty()
+    /// Checks if all groups were correctly terminated.
+    pub fn terminated(&self) -> bool {
+        self.groups.is_empty() && self.last_unterminated.is_none()
     }
 
     /// Low-level bump that consumes exactly one token without special trivia
     /// handling.
     fn bump(&mut self) {
         let kind = self.current.take().unwrap();
-        if match kind {
-            NodeKind::Space(n) if n > 0 => true,
-            NodeKind::Parbreak => true,
-            _ => false,
-        } {
-            self.column_offset = 0;
-        }
-
         let len = self.tokens.index() - self.current_start;
         self.children.push(GreenData::new(kind, len).into());
         self.current_start = self.tokens.index();
-- 
cgit v1.2.3