From eba7fc34effbec3bcc6d5c40d831b1e15af77c4d Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sat, 6 Nov 2021 16:07:21 +0100 Subject: Incremental-safety based approach --- src/parse/parser.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index af8a7c5c..f391c473 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -21,6 +21,8 @@ pub struct Parser<'s> { groups: Vec, /// The children of the currently built node. children: Vec, + /// Whether the last group was terminated. + last_group_terminated: bool, } impl<'s> Parser<'s> { @@ -36,6 +38,7 @@ impl<'s> Parser<'s> { current_start: 0, groups: vec![], children: vec![], + last_group_terminated: true, } } @@ -44,6 +47,15 @@ impl<'s> Parser<'s> { self.children } + /// End the parsing process and return multiple children. + pub fn eject(self) -> Option> { + if self.eof() && self.group_success() { + Some(self.children) + } else { + None + } + } + /// Create a new marker. pub fn marker(&mut self) -> Marker { Marker(self.children.len()) @@ -190,6 +202,11 @@ impl<'s> Parser<'s> { self.tokens.scanner().column(index) } + /// Set the tokenizer's mode. + pub fn set_mode(&mut self, mode: TokenMode) { + self.tokens.set_mode(mode); + } + /// Continue parsing in a group. /// /// When the end delimiter of the group is reached, all subsequent calls to @@ -225,6 +242,7 @@ impl<'s> Parser<'s> { let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); self.repeek(); + self.last_group_terminated = true; let mut rescan = self.tokens.mode() != group_mode; @@ -243,6 +261,7 @@ impl<'s> Parser<'s> { rescan = false; } else if required { self.push_error(format_eco!("expected {}", end)); + self.last_group_terminated = false; } } @@ -260,6 +279,11 @@ impl<'s> Parser<'s> { } } + /// Check if the group processing was successfully terminated. + pub fn group_success(&self) -> bool { + self.last_group_terminated && self.groups.is_empty() + } + /// Low-level bump that consumes exactly one token without special trivia /// handling. fn bump(&mut self) { -- cgit v1.2.3 From 0663758fbb42651a08bfcd46c27b5cdeab90fb75 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 7 Nov 2021 19:43:01 +0100 Subject: Tests - length updates - dealing with keywords and comments --- src/parse/parser.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index f391c473..451e18f1 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -27,8 +27,8 @@ pub struct Parser<'s> { impl<'s> Parser<'s> { /// Create a new parser for the source string. - pub fn new(src: &'s str) -> Self { - let mut tokens = Tokens::new(src, TokenMode::Markup); + pub fn new(src: &'s str, mode: TokenMode) -> Self { + let mut tokens = Tokens::new(src, mode); let current = tokens.next(); Self { tokens, @@ -202,11 +202,6 @@ impl<'s> Parser<'s> { self.tokens.scanner().column(index) } - /// Set the tokenizer's mode. - pub fn set_mode(&mut self, mode: TokenMode) { - self.tokens.set_mode(mode); - } - /// Continue parsing in a group. /// /// When the end delimiter of the group is reached, all subsequent calls to -- cgit v1.2.3 From 9141cba6a9db6ae3106e39d92508cb91c390049b Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 8 Nov 2021 12:01:35 +0100 Subject: Deal with the effects of keywords --- src/parse/parser.rs | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 451e18f1..31c918a8 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -95,6 +95,12 @@ impl<'s> Parser<'s> { output } + /// End the parsing process and return multiple children, even if there + /// remains stuff in the string. + pub fn eject_partial(self) -> Option> { + self.group_success().then(|| self.children) + } + /// Whether the end of the source string or group is reached. pub fn eof(&self) -> bool { self.eof -- cgit v1.2.3 From 3162c6a83a910f34d6ed7e966c11b7e7b5bd4088 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Wed, 10 Nov 2021 20:41:10 +0100 Subject: Comments and neighbors --- src/parse/parser.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 31c918a8..a37cb9c6 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -48,9 +48,9 @@ impl<'s> Parser<'s> { } /// End the parsing process and return multiple children. - pub fn eject(self) -> Option> { + pub fn eject(self) -> Option<(Vec, bool)>{ if self.eof() && self.group_success() { - Some(self.children) + Some((self.children, self.tokens.was_unterminated())) } else { None } @@ -97,8 +97,9 @@ impl<'s> Parser<'s> { /// End the parsing process and return multiple children, even if there /// remains stuff in the string. - pub fn eject_partial(self) -> Option> { - self.group_success().then(|| self.children) + pub fn eject_partial(self) -> Option<(Vec, bool)> { + self.group_success() + .then(|| (self.children, self.tokens.was_unterminated())) } /// Whether the end of the source string or group is reached. -- cgit v1.2.3 From fdb9d0743d73c278136b9254286fdc4be71c42a5 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Thu, 18 Nov 2021 16:21:45 +0100 Subject: Refactoring and bugfixes --- src/parse/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index a37cb9c6..06cb1578 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -48,7 +48,7 @@ impl<'s> Parser<'s> { } /// End the parsing process and return multiple children. - pub fn eject(self) -> Option<(Vec, bool)>{ + pub fn eject(self) -> Option<(Vec, bool)> { if self.eof() && self.group_success() { Some((self.children, self.tokens.was_unterminated())) } else { -- cgit v1.2.3 From e05eb5fda5d1dfeef168b6fc071b20fdbcce2dcd Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 28 Nov 2021 18:18:45 +0100 Subject: Code Review: Parser, I can't let you do this --- src/parse/parser.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 06cb1578..ade9b5df 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -22,7 +22,7 @@ pub struct Parser<'s> { /// The children of the currently built node. children: Vec, /// Whether the last group was terminated. - last_group_terminated: bool, + last_terminated: bool, } impl<'s> Parser<'s> { @@ -38,7 +38,7 @@ impl<'s> Parser<'s> { current_start: 0, groups: vec![], children: vec![], - last_group_terminated: true, + last_terminated: true, } } @@ -50,7 +50,7 @@ impl<'s> Parser<'s> { /// End the parsing process and return multiple children. pub fn eject(self) -> Option<(Vec, bool)> { if self.eof() && self.group_success() { - Some((self.children, self.tokens.was_unterminated())) + Some((self.children, self.tokens.was_terminated())) } else { None } @@ -99,7 +99,7 @@ impl<'s> Parser<'s> { /// remains stuff in the string. pub fn eject_partial(self) -> Option<(Vec, bool)> { self.group_success() - .then(|| (self.children, self.tokens.was_unterminated())) + .then(|| (self.children, self.tokens.was_terminated())) } /// Whether the end of the source string or group is reached. @@ -244,7 +244,7 @@ impl<'s> Parser<'s> { let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); self.repeek(); - self.last_group_terminated = true; + self.last_terminated = true; let mut rescan = self.tokens.mode() != group_mode; @@ -263,7 +263,7 @@ impl<'s> Parser<'s> { rescan = false; } else if required { self.push_error(format_eco!("expected {}", end)); - self.last_group_terminated = false; + self.last_terminated = false; } } @@ -283,7 +283,7 @@ impl<'s> Parser<'s> { /// Check if the group processing was successfully terminated. pub fn group_success(&self) -> bool { - self.last_group_terminated && self.groups.is_empty() + self.last_terminated && self.groups.is_empty() } /// Low-level bump that consumes exactly one token without special trivia -- cgit v1.2.3 From 5f114e18eb76a1937941b2ea64842b908c9ad89e Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 2 Jan 2022 00:46:19 +0100 Subject: Added a test framework for incremental parsing Fix several errors: - Indented markup is now reparsed right - All end group errors will now fail a reparse - Rightmost errors will always fail a reparse --- src/parse/parser.rs | 54 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 6 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index ade9b5df..b31f69d3 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -21,8 +21,12 @@ pub struct Parser<'s> { groups: Vec, /// The children of the currently built node. children: Vec, - /// Whether the last group was terminated. - last_terminated: bool, + /// Is `Some` if there is an unterminated group at the last position where + /// groups were terminated. + last_unterminated: Option, + /// Offset the indentation. This can be used if the parser is processing a + /// subslice of the source and there was leading indent. + column_offset: usize, } impl<'s> Parser<'s> { @@ -38,7 +42,8 @@ impl<'s> Parser<'s> { current_start: 0, groups: vec![], children: vec![], - last_terminated: true, + last_unterminated: None, + column_offset: 0, } } @@ -102,6 +107,11 @@ impl<'s> Parser<'s> { .then(|| (self.children, self.tokens.was_terminated())) } + /// Set an indentation offset. + pub fn offset(&mut self, columns: usize) { + self.column_offset = columns; + } + /// Whether the end of the source string or group is reached. pub fn eof(&self) -> bool { self.eof @@ -206,6 +216,12 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { + self.tokens.scanner().column(index) + self.column_offset + } + + /// Determine the column index for the given byte index while ignoring the + /// offset. + pub fn clean_column(&self, index: usize) -> usize { self.tokens.scanner().column(index) } @@ -244,7 +260,11 @@ impl<'s> Parser<'s> { let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); self.repeek(); - self.last_terminated = true; + if let Some(n) = self.last_unterminated { + if n != self.prev_end() { + self.last_unterminated = None; + } + } let mut rescan = self.tokens.mode() != group_mode; @@ -262,8 +282,14 @@ impl<'s> Parser<'s> { self.eat(); rescan = false; } else if required { + // FIXME The error has to be inserted before any space rolls + // around because the rescan will set the cursor back in front + // of the space and reconsume it. Supressing the rescan is not + // an option since additional rescans (e.g. for statements) can + // be triggered directly afterwards, without processing any + // other token. self.push_error(format_eco!("expected {}", end)); - self.last_terminated = false; + self.last_unterminated = Some(self.prev_end()); } } @@ -283,13 +309,21 @@ impl<'s> Parser<'s> { /// Check if the group processing was successfully terminated. pub fn group_success(&self) -> bool { - self.last_terminated && self.groups.is_empty() + self.last_unterminated.is_none() && self.groups.is_empty() } /// Low-level bump that consumes exactly one token without special trivia /// handling. fn bump(&mut self) { let kind = self.current.take().unwrap(); + if match kind { + NodeKind::Space(n) if n > 0 => true, + NodeKind::Parbreak => true, + _ => false, + } { + self.column_offset = 0; + } + let len = self.tokens.index() - self.current_start; self.children.push(GreenData::new(kind, len).into()); self.current_start = self.tokens.index(); @@ -346,6 +380,13 @@ impl Parser<'_> { /// Push an error into the children list. pub fn push_error(&mut self, msg: impl Into) { let error = NodeKind::Error(ErrorPos::Full, msg.into()); + for i in (0 .. self.children.len()).rev() { + if Self::is_trivia_ext(self.children[i].kind(), false) { + self.children.remove(i); + } else { + break; + } + } self.children.push(GreenData::new(error, 0).into()); } @@ -445,6 +486,7 @@ impl Marker { } /// A logical group of tokens, e.g. `[...]`. +#[derive(Debug)] struct GroupEntry { /// The kind of group this is. This decides which tokens will end the group. /// For example, a [`Group::Paren`] will be ended by -- cgit v1.2.3 From 98c96ba1cb8a46e327de313118e4ce1a84795ae9 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 2 Jan 2022 14:46:08 +0100 Subject: Fix parser / space / error bug --- src/parse/parser.rs | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index b31f69d3..f36155d5 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -282,12 +282,6 @@ impl<'s> Parser<'s> { self.eat(); rescan = false; } else if required { - // FIXME The error has to be inserted before any space rolls - // around because the rescan will set the cursor back in front - // of the space and reconsume it. Supressing the rescan is not - // an option since additional rescans (e.g. for statements) can - // be triggered directly afterwards, without processing any - // other token. self.push_error(format_eco!("expected {}", end)); self.last_unterminated = Some(self.prev_end()); } @@ -380,14 +374,8 @@ impl Parser<'_> { /// Push an error into the children list. pub fn push_error(&mut self, msg: impl Into) { let error = NodeKind::Error(ErrorPos::Full, msg.into()); - for i in (0 .. self.children.len()).rev() { - if Self::is_trivia_ext(self.children[i].kind(), false) { - self.children.remove(i); - } else { - break; - } - } - self.children.push(GreenData::new(error, 0).into()); + let idx = self.trivia_start(); + self.children.insert(idx.0, GreenData::new(error, 0).into()); } /// Eat the current token and add an error that it is unexpected. -- cgit v1.2.3 From c994cfa7d814e3909682b19322867ed5c676c453 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 3 Jan 2022 23:18:21 +0100 Subject: Code Review: Your parsers were so preoccupied with whether they could --- src/parse/parser.rs | 81 ++++++++++++++++++++++++----------------------------- 1 file changed, 37 insertions(+), 44 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index f36155d5..4e5b277d 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,7 +1,8 @@ +use core::slice::SliceIndex; use std::fmt::{self, Display, Formatter}; use std::mem; -use super::{TokenMode, Tokens}; +use super::{Scanner, TokenMode, Tokens}; use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; @@ -24,8 +25,7 @@ pub struct Parser<'s> { /// Is `Some` if there is an unterminated group at the last position where /// groups were terminated. last_unterminated: Option, - /// Offset the indentation. This can be used if the parser is processing a - /// subslice of the source and there was leading indent. + /// Offsets the indentation on the first line of the source. column_offset: usize, } @@ -47,18 +47,31 @@ impl<'s> Parser<'s> { } } + /// Create a new parser for the source string that is prefixed by some text + /// that does not need to be parsed but taken into account for column + /// calculation. + pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self { + let mut p = Self::new(src, mode); + p.column_offset = Scanner::new(prefix).column(prefix.len()); + p + } + /// End the parsing process and return the last child. pub fn finish(self) -> Vec { self.children } - /// End the parsing process and return multiple children. - pub fn eject(self) -> Option<(Vec, bool)> { - if self.eof() && self.group_success() { - Some((self.children, self.tokens.was_terminated())) - } else { - None - } + /// End the parsing process and return multiple children and whether the + /// last token was terminated. + pub fn consume(self) -> Option<(Vec, bool)> { + (self.eof() && self.terminated()) + .then(|| (self.children, self.tokens.terminated())) + } + + /// End the parsing process and return multiple children and whether the + /// last token was terminated, even if there remains stuff in the string. + pub fn consume_unterminated(self) -> Option<(Vec, bool)> { + self.terminated().then(|| (self.children, self.tokens.terminated())) } /// Create a new marker. @@ -100,18 +113,6 @@ impl<'s> Parser<'s> { output } - /// End the parsing process and return multiple children, even if there - /// remains stuff in the string. - pub fn eject_partial(self) -> Option<(Vec, bool)> { - self.group_success() - .then(|| (self.children, self.tokens.was_terminated())) - } - - /// Set an indentation offset. - pub fn offset(&mut self, columns: usize) { - self.column_offset = columns; - } - /// Whether the end of the source string or group is reached. pub fn eof(&self) -> bool { self.eof @@ -199,6 +200,14 @@ impl<'s> Parser<'s> { self.tokens.scanner().get(self.current_start() .. self.current_end()) } + /// Obtain a range of the source code. + pub fn get(&self, index: I) -> &'s str + where + I: SliceIndex, + { + self.tokens.scanner().get(index) + } + /// The byte index at which the last non-trivia token ended. pub fn prev_end(&self) -> usize { self.prev_end @@ -216,13 +225,7 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.tokens.scanner().column(index) + self.column_offset - } - - /// Determine the column index for the given byte index while ignoring the - /// offset. - pub fn clean_column(&self, index: usize) -> usize { - self.tokens.scanner().column(index) + self.tokens.scanner().column_offset(index, self.column_offset) } /// Continue parsing in a group. @@ -260,10 +263,8 @@ impl<'s> Parser<'s> { let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); self.repeek(); - if let Some(n) = self.last_unterminated { - if n != self.prev_end() { - self.last_unterminated = None; - } + if self.last_unterminated != Some(self.prev_end()) { + self.last_unterminated = None; } let mut rescan = self.tokens.mode() != group_mode; @@ -301,23 +302,15 @@ impl<'s> Parser<'s> { } } - /// Check if the group processing was successfully terminated. - pub fn group_success(&self) -> bool { - self.last_unterminated.is_none() && self.groups.is_empty() + /// Checks if all groups were correctly terminated. + pub fn terminated(&self) -> bool { + self.groups.is_empty() && self.last_unterminated.is_none() } /// Low-level bump that consumes exactly one token without special trivia /// handling. fn bump(&mut self) { let kind = self.current.take().unwrap(); - if match kind { - NodeKind::Space(n) if n > 0 => true, - NodeKind::Parbreak => true, - _ => false, - } { - self.column_offset = 0; - } - let len = self.tokens.index() - self.current_start; self.children.push(GreenData::new(kind, len).into()); self.current_start = self.tokens.index(); -- cgit v1.2.3