From edc686d7384470068858e16f2926cf50f31b2c90 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sat, 27 Nov 2021 16:10:22 +0100 Subject: Make incremental parsing simpler and move it somewhere else --- src/parse/incremental.rs | 661 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 661 insertions(+) create mode 100644 src/parse/incremental.rs (limited to 'src/parse/incremental.rs') diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs new file mode 100644 index 00000000..9c912aae --- /dev/null +++ b/src/parse/incremental.rs @@ -0,0 +1,661 @@ +use std::ops::Range; +use std::rc::Rc; + +use crate::syntax::{Green, GreenNode, NodeKind, Span}; + +use super::{ + parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup, + parse_markup_elements, parse_template, TokenMode, +}; + +/// Allows partial refreshs of the [`Green`] node tree. +/// +/// This struct holds a description of a change. Its methods can be used to try +/// and apply the change to a green tree. +pub struct Reparser<'a> { + /// The new source code, with the change applied. + src: &'a str, + /// Which range in the old source file was changed. + replace_range: Span, + /// How many characters replaced the text in `replacement_range`. + replace_len: usize, +} + +impl<'a> Reparser<'a> { + /// Create a new reparser. + pub fn new(src: &'a str, replace_range: Span, replace_len: usize) -> Self { + Self { src, replace_range, replace_len } + } +} + +impl Reparser<'_> { + /// Find the innermost child that is incremental safe. + pub fn reparse(&self, green: &mut GreenNode) -> Result, ()> { + self.reparse_step(green, 0, TokenMode::Markup, true) + } + + fn reparse_step( + &self, + green: &mut GreenNode, + mut offset: usize, + parent_mode: TokenMode, + outermost: bool, + ) -> Result, ()> { + let kind = green.kind().clone(); + let mode = kind.mode().unwrap_or(parent_mode); + + let mut loop_result = None; + let mut child_at_start = true; + let last = green.children().len() - 1; + let mut start = None; + + for (i, child) in green.children_mut().iter_mut().enumerate() { + let child_span = + Span::new(self.replace_range.source, offset, offset + child.len()); + + // We look for the start in the element but we only take a position + // at the right border if this is markup or the last element. + // + // This is because in Markup mode, we want to examine all nodes + // touching a replacement but in code we want to atomically replace. + if child_span.contains(self.replace_range.start) + && (mode == TokenMode::Markup + || self.replace_range.start != child_span.end + || self.replace_range.len() == 0 + || i == last) + { + start = Some((i, offset)); + break; + } + + offset += child.len(); + child_at_start = child.kind().is_at_start(child_at_start); + } + + let (start_idx, start_offset) = start.ok_or(())?; + + for (i, child) in (green.children_mut()[start_idx ..]).iter_mut().enumerate() { + let i = i + start_idx; + let child_span = + Span::new(self.replace_range.source, offset, offset + child.len()); + + // Similarly to above, the end of the edit must be in the node but + // if it is at the edge and we are in markup node, we also want its + // neighbor! + if child_span.contains(self.replace_range.end) + && (mode != TokenMode::Markup + || self.replace_range.end != child_span.end + || i == last) + { + loop_result = Some(( + start_idx .. i + 1, + Span::new( + self.replace_range.source, + start_offset, + offset + child.len(), + ), + i == last && outermost, + child.kind().clone(), + )); + break; + } else if mode != TokenMode::Markup || !child.kind().post().markup_safe() { + break; + } + + offset += child.len(); + } + + let (child_idx_range, child_span, child_outermost, child_kind) = + loop_result.ok_or(())?; + + if child_idx_range.len() == 1 { + let idx = child_idx_range.start; + let child = &mut green.children_mut()[idx]; + + let old_len = child.len(); + // First, we try if the child has another, more specific applicable child. + if !child_kind.post().unsafe_interior() { + if let Ok(range) = match child { + Green::Node(n) => self.reparse_step( + Rc::make_mut(n), + start_offset, + kind.mode().unwrap_or(TokenMode::Code), + child_outermost, + ), + Green::Token(_) => Err(()), + } { + let new_len = child.len(); + green.update_child_len(new_len, old_len); + return Ok(range); + } + } + } + + debug_assert_ne!(child_idx_range.len(), 0); + + if mode == TokenMode::Code && child_idx_range.len() > 1 { + return Err(()); + } + + // We now have a child that we can replace and a function to do so. + let (func, policy) = + child_kind.reparsing_function(kind.mode().unwrap_or(TokenMode::Code)); + let func = func?; + + let src_span = inserted_span(child_span, self.replace_range, self.replace_len); + let recompile_range = if policy == Postcondition::AtomicPrimary { + src_span.start .. self.src.len() + } else { + src_span.to_range() + }; + + let (mut new_children, unterminated) = + func(&self.src[recompile_range], child_at_start).ok_or(())?; + + // Do not accept unclosed nodes if the old node did not use to be at the + // right edge of the tree. + if !child_outermost && unterminated { + return Err(()); + } + + let insertion = match check_invariants( + &new_children, + green.children(), + child_idx_range.clone(), + child_at_start, + mode, + src_span, + policy, + ) { + InvariantResult::Ok => Ok(new_children), + InvariantResult::UseFirst => Ok(vec![std::mem::take(&mut new_children[0])]), + InvariantResult::Error => Err(()), + }?; + + green.replace_child_range(child_idx_range, insertion); + + Ok(src_span.to_range()) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum InvariantResult { + Ok, + UseFirst, + Error, +} + +fn check_invariants( + use_children: &[Green], + old_children: &[Green], + child_idx_range: Range, + child_at_start: bool, + mode: TokenMode, + src_span: Span, + policy: Postcondition, +) -> InvariantResult { + let (new_children, ok) = if policy == Postcondition::AtomicPrimary { + if use_children.iter().map(Green::len).sum::() == src_span.len() { + (use_children, InvariantResult::Ok) + } else if use_children.len() == 1 && use_children[0].len() == src_span.len() { + (&use_children[0 .. 1], InvariantResult::UseFirst) + } else { + return InvariantResult::Error; + } + } else { + (use_children, InvariantResult::Ok) + }; + + let child_mode = old_children[child_idx_range.start] + .kind() + .mode() + .unwrap_or(TokenMode::Code); + + // Check if the children / child has the right type. + let same_kind = match policy { + Postcondition::SameKind(x) => x.map_or(true, |x| x == child_mode), + _ => false, + }; + + if same_kind || policy == Postcondition::AtomicPrimary { + if new_children.len() != 1 { + return InvariantResult::Error; + } + + if same_kind { + if old_children[child_idx_range.start].kind() != new_children[0].kind() { + return InvariantResult::Error; + } + } + } + + // Check if the neighbor invariants are still true. + if mode == TokenMode::Markup { + if child_idx_range.start > 0 { + if old_children[child_idx_range.start - 1].kind().pre() + == Precondition::RightWhitespace + && !new_children[0].kind().is_whitespace() + { + return InvariantResult::Error; + } + } + + if new_children.last().map(|x| x.kind().pre()) + == Some(Precondition::RightWhitespace) + && old_children.len() > child_idx_range.end + { + if !old_children[child_idx_range.end].kind().is_whitespace() { + return InvariantResult::Error; + } + } + + let mut new_at_start = child_at_start; + for child in new_children { + new_at_start = child.kind().is_at_start(new_at_start); + } + + for child in &old_children[child_idx_range.end ..] { + if child.kind().is_trivia() { + new_at_start = child.kind().is_at_start(new_at_start); + continue; + } + + match child.kind().pre() { + Precondition::AtStart if !new_at_start => { + return InvariantResult::Error; + } + Precondition::NotAtStart if new_at_start => { + return InvariantResult::Error; + } + _ => {} + } + break; + } + } + + ok +} + +/// Create a new span by specifying a span in which a modification happened +/// and how many characters are now in that span. +fn inserted_span(mut source: Span, other: Span, n: usize) -> Span { + if !source.surrounds(other) { + panic!(); + } + + let len_change = n as i64 - other.len() as i64; + source.end = (source.end as i64 + len_change) as usize; + source +} + +impl NodeKind { + /// Return the correct reparsing function given the postconditions for the + /// type. + fn reparsing_function( + &self, + parent_mode: TokenMode, + ) -> ( + Result Option<(Vec, bool)>, ()>, + Postcondition, + ) { + let policy = self.post(); + let mode = self.mode().unwrap_or(parent_mode); + + match policy { + Postcondition::Unsafe | Postcondition::UnsafeLayer => (Err(()), policy), + Postcondition::AtomicPrimary if mode == TokenMode::Code => { + (Ok(parse_atomic), policy) + } + Postcondition::AtomicPrimary => (Ok(parse_atomic_markup), policy), + Postcondition::SameKind(x) if x == None || x == Some(mode) => { + let parser: fn(&str, bool) -> _ = match self { + NodeKind::Template => parse_template, + NodeKind::Block => parse_block, + NodeKind::LineComment | NodeKind::BlockComment => parse_comment, + _ => return (Err(()), policy), + }; + + (Ok(parser), policy) + } + _ => { + let parser: fn(&str, bool) -> _ = match mode { + TokenMode::Markup if self == &Self::Markup => parse_markup, + TokenMode::Markup => parse_markup_elements, + _ => return (Err(()), policy), + }; + + (Ok(parser), policy) + } + } + } + + /// Whether it is safe to do incremental parsing on this node. Never allow + /// non-termination errors if this is not already the last leaf node. + pub fn post(&self) -> Postcondition { + match self { + // Replacing parenthesis changes if the expression is balanced and + // is therefore not safe. + Self::LeftBracket + | Self::RightBracket + | Self::LeftBrace + | Self::RightBrace + | Self::LeftParen + | Self::RightParen => Postcondition::Unsafe, + + // Replacing an operator can change whether the parent is an + // operation which makes it unsafe. The star can appear in markup. + Self::Star + | Self::Comma + | Self::Semicolon + | Self::Colon + | Self::Plus + | Self::Minus + | Self::Slash + | Self::Eq + | Self::EqEq + | Self::ExclEq + | Self::Lt + | Self::LtEq + | Self::Gt + | Self::GtEq + | Self::PlusEq + | Self::HyphEq + | Self::StarEq + | Self::SlashEq + | Self::Not + | Self::And + | Self::Or + | Self::With + | Self::Dots + | Self::Arrow => Postcondition::Unsafe, + + // These keywords are literals and can be safely be substituted with + // other expressions. + Self::None | Self::Auto => Postcondition::AtomicPrimary, + + // These keywords change what kind of expression the parent is and + // how far the expression would go. + Self::Let + | Self::Set + | Self::If + | Self::Else + | Self::For + | Self::In + | Self::While + | Self::Break + | Self::Continue + | Self::Return + | Self::Import + | Self::Include + | Self::From => Postcondition::Unsafe, + + Self::Markup => Postcondition::SameKind(None), + + Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)), + + // These are all replaceable by other tokens. + Self::Parbreak + | Self::Linebreak + | Self::Text(_) + | Self::TextInLine(_) + | Self::NonBreakingSpace + | Self::EnDash + | Self::EmDash + | Self::Escape(_) + | Self::Strong + | Self::Emph + | Self::Heading + | Self::Enum + | Self::List + | Self::Raw(_) + | Self::Math(_) => Postcondition::Safe, + + // Changing the heading level, enum numbering, or list bullet + // changes the next layer. + Self::EnumNumbering(_) => Postcondition::Unsafe, + + // These are expressions that can be replaced by other expressions. + Self::Ident(_) + | Self::Bool(_) + | Self::Int(_) + | Self::Float(_) + | Self::Length(_, _) + | Self::Angle(_, _) + | Self::Percentage(_) + | Self::Str(_) + | Self::Fraction(_) + | Self::Array + | Self::Dict + | Self::Group => Postcondition::AtomicPrimary, + + Self::Call + | Self::Unary + | Self::Binary + | Self::CallArgs + | Self::Named + | Self::Spread => Postcondition::UnsafeLayer, + + // The closure is a bit magic with the let expression, and also it + // is not atomic. + Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer, + + // These can appear as bodies and would trigger an error if they + // became something else. + Self::Template => Postcondition::SameKind(None), + Self::Block => Postcondition::SameKind(Some(TokenMode::Code)), + + Self::ForExpr + | Self::WhileExpr + | Self::IfExpr + | Self::LetExpr + | Self::SetExpr + | Self::ImportExpr + | Self::IncludeExpr => Postcondition::AtomicPrimary, + + Self::WithExpr | Self::ForPattern | Self::ImportItems => { + Postcondition::UnsafeLayer + } + + // These can appear everywhere and must not change to other stuff + // because that could change the outer expression. + Self::LineComment | Self::BlockComment => Postcondition::SameKind(None), + + Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, + } + } + + /// The appropriate precondition for the type. + pub fn pre(&self) -> Precondition { + match self { + Self::Heading | Self::Enum | Self::List => Precondition::AtStart, + Self::TextInLine(_) => Precondition::NotAtStart, + Self::Linebreak => Precondition::RightWhitespace, + _ => Precondition::None, + } + } +} + +/// The conditions that a node has to fulfill in order to be replaced. +/// +/// This can dictate if a node can be replaced at all and if yes, what can take +/// its place. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Postcondition { + /// Changing this node can never have an influence on the other nodes. + Safe, + /// This node has to be replaced with a single token of the same kind. + SameKind(Option), + /// Changing this node into a single atomic expression is allowed if it + /// appears in code mode, otherwise it is safe. + AtomicPrimary, + /// Changing an unsafe layer node changes what the parents or the + /// surrounding nodes would be and is therefore disallowed. Change the + /// parents or children instead. If it appears in Markup, however, it is + /// safe to change. + UnsafeLayer, + /// Changing an unsafe node or any of its children will trigger undefined + /// behavior. Change the parents instead. + Unsafe, +} + +/// The conditions under which a node can be inserted or remain in a tree. +/// +/// These conditions all search the neighbors of the node and see if its +/// existence is plausible with them present. This can be used to encode some +/// context-free language components for incremental parsing. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Precondition { + /// These nodes depend on being at the start of a line. Reparsing of safe + /// left neighbors has to check this invariant. Otherwise, this node is + /// safe. + AtStart, + /// These nodes depend on not being at the start of a line. Reparsing of + /// safe left neighbors has to check this invariant. Otherwise, this node is + /// safe. + NotAtStart, + /// These nodes must be followed by whitespace. + RightWhitespace, + /// No additional requirements. + None, +} + +impl Postcondition { + pub fn unsafe_interior(&self) -> bool { + match self { + Self::Unsafe => true, + _ => false, + } + } + + pub fn markup_safe(&self) -> bool { + match self { + Self::Safe | Self::UnsafeLayer => true, + Self::SameKind(tm) => tm.map_or(false, |tm| tm != TokenMode::Markup), + _ => false, + } + } +} + +#[cfg(test)] +mod tests { + use crate::parse::parse; + use crate::source::SourceFile; + + use super::*; + + #[test] + fn test_incremental_parse() { + #[track_caller] + fn test(prev: &str, range: Range, with: &str, incr: Range) { + let mut source = SourceFile::detached(prev); + let range = source.edit(range, with); + assert_eq!(range, incr); + + let incr_tree = source.root(); + assert_eq!(parse(source.src()), incr_tree); + } + + // Test simple replacements. + test("hello world", 6 .. 11, "wankers", 5 .. 13); + test("a d e", 1 .. 3, " b c d", 0 .. 8); + test("a #f() e", 1 .. 6, " b c d", 0 .. 8); + test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9); + test("= A heading", 3 .. 3, "n evocative", 2 .. 15); + test("your thing", 5 .. 5, "a", 4 .. 11); + test("a your thing a", 6 .. 7, "a", 2 .. 12); + test("{call(); abc}", 7 .. 7, "[]", 0 .. 15); + test("#call() abc", 7 .. 7, "[]", 0 .. 13); + // test( + // "hi\n- item\n- item 2\n - item 3", + // 10 .. 10, + // " ", + // 9 .. 33, + // ); + test( + "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", + 16 .. 20, + "none", + 16 .. 20, + ); + test( + "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", + 33 .. 42, + "[_gronk_]", + 33 .. 42, + ); + test( + "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", + 34 .. 41, + "_bar_", + 34 .. 39, + ); + test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 1 .. 9); + test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 13 .. 15); + test("hello {x}", 6 .. 9, "#f()", 5 .. 10); + test( + "this is -- in my opinion -- spectacular", + 8 .. 10, + "---", + 7 .. 12, + ); + test( + "understanding `code` is complicated", + 15 .. 15, + "C ", + 14 .. 22, + ); + test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17); + test( + "a #let rect with (fill: eastern)\nb", + 16 .. 31, + " (stroke: conifer", + 2 .. 34, + ); + + // Test the whitespace invariants. + test("hello \\ world", 7 .. 8, "a ", 6 .. 14); + test("hello \\ world", 7 .. 8, " a", 6 .. 14); + test("x = y", 1 .. 1, " + y", 0 .. 6); + test("x = y", 1 .. 1, " + y\n", 0 .. 10); + test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21); + test("abc\n= a heading\njoke", 3 .. 4, "\nnot ", 0 .. 19); + test("hey #myfriend", 4 .. 4, "\\", 0 .. 14); + test("hey #myfriend", 4 .. 4, "\\", 3 .. 6); + + // Test type invariants. + test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22); + test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 15); + test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16); + test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 17); + test("{let x = 1 {5}}", 4 .. 4, " if", 0 .. 18); + test("a // b c #f()", 3 .. 4, "", 0 .. 12); + test("{\nf()\n//g(a)\n}", 6 .. 8, "", 0 .. 12); + test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13); + test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26); + test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14); + test("a b c", 1 .. 1, "{[}", 0 .. 5); + + // Test unclosed things. + test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6); + test(r"this \u{abcd}", 8 .. 9, "", 5 .. 12); + test(r"this \u{abcd} that", 12 .. 13, "", 0 .. 17); + test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", 0 .. 24); + test("a b c", 1 .. 1, " /* letters */", 0 .. 16); + test("a b c", 1 .. 1, " /* letters", 0 .. 16); + test( + "{if i==1 {a} else [b]; b()}", + 12 .. 12, + " /* letters */", + 1 .. 35, + ); + test( + "{if i==1 {a} else [b]; b()}", + 12 .. 12, + " /* letters", + 0 .. 38, + ); + + test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); + test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18); + } +} -- cgit v1.2.3 From e05eb5fda5d1dfeef168b6fc071b20fdbcce2dcd Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 28 Nov 2021 18:18:45 +0100 Subject: Code Review: Parser, I can't let you do this --- src/parse/incremental.rs | 449 ++++++++++++++++++++--------------------------- 1 file changed, 186 insertions(+), 263 deletions(-) (limited to 'src/parse/incremental.rs') diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 9c912aae..8e52c143 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -1,13 +1,57 @@ use std::ops::Range; use std::rc::Rc; -use crate::syntax::{Green, GreenNode, NodeKind, Span}; +use crate::syntax::{Green, GreenNode, NodeKind}; use super::{ parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup, parse_markup_elements, parse_template, TokenMode, }; +/// The conditions that a node has to fulfill in order to be replaced. +/// +/// This can dictate if a node can be replaced at all and if yes, what can take +/// its place. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Postcondition { + /// Changing this node can never have an influence on the other nodes. + Safe, + /// This node has to be replaced with a single token of the same kind. + SameKind(Option), + /// Changing this node into a single atomic expression is allowed if it + /// appears in code mode, otherwise it is safe. + AtomicPrimary, + /// Changing an unsafe layer node changes what the parents or the + /// surrounding nodes would be and is therefore disallowed. Change the + /// parents or children instead. If it appears in Markup, however, it is + /// safe to change. + UnsafeLayer, + /// Changing an unsafe node or any of its children will trigger undefined + /// behavior. Change the parents instead. + Unsafe, +} + +/// The conditions under which a node can be inserted or remain in a tree. +/// +/// These conditions all search the neighbors of the node and see if its +/// existence is plausible with them present. This can be used to encode some +/// context-free language components for incremental parsing. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Precondition { + /// These nodes depend on being at the start of a line. Reparsing of safe + /// left neighbors has to check this invariant. Otherwise, this node is + /// safe. + AtStart, + /// These nodes depend on not being at the start of a line. Reparsing of + /// safe left neighbors has to check this invariant. Otherwise, this node is + /// safe. + NotAtStart, + /// These nodes must be followed by whitespace. + RightWhitespace, + /// No additional requirements. + None, +} + /// Allows partial refreshs of the [`Green`] node tree. /// /// This struct holds a description of a change. Its methods can be used to try @@ -16,21 +60,21 @@ pub struct Reparser<'a> { /// The new source code, with the change applied. src: &'a str, /// Which range in the old source file was changed. - replace_range: Span, - /// How many characters replaced the text in `replacement_range`. + replace_range: Range, + /// How many characters replaced the text in `replace_range`. replace_len: usize, } impl<'a> Reparser<'a> { /// Create a new reparser. - pub fn new(src: &'a str, replace_range: Span, replace_len: usize) -> Self { + pub fn new(src: &'a str, replace_range: Range, replace_len: usize) -> Self { Self { src, replace_range, replace_len } } } impl Reparser<'_> { /// Find the innermost child that is incremental safe. - pub fn reparse(&self, green: &mut GreenNode) -> Result, ()> { + pub fn reparse(&self, green: &mut GreenNode) -> Option> { self.reparse_step(green, 0, TokenMode::Markup, true) } @@ -39,30 +83,26 @@ impl Reparser<'_> { green: &mut GreenNode, mut offset: usize, parent_mode: TokenMode, - outermost: bool, - ) -> Result, ()> { + mut outermost: bool, + ) -> Option> { let kind = green.kind().clone(); let mode = kind.mode().unwrap_or(parent_mode); - let mut loop_result = None; let mut child_at_start = true; - let last = green.children().len() - 1; + let last = green.children().len().saturating_sub(1); let mut start = None; for (i, child) in green.children_mut().iter_mut().enumerate() { - let child_span = - Span::new(self.replace_range.source, offset, offset + child.len()); + let child_span = offset .. offset + child.len(); // We look for the start in the element but we only take a position // at the right border if this is markup or the last element. // // This is because in Markup mode, we want to examine all nodes // touching a replacement but in code we want to atomically replace. - if child_span.contains(self.replace_range.start) - && (mode == TokenMode::Markup - || self.replace_range.start != child_span.end - || self.replace_range.len() == 0 - || i == last) + if child_span.contains(&self.replace_range.start) + || (mode == TokenMode::Markup + && self.replace_range.start == child_span.end) { start = Some((i, offset)); break; @@ -72,31 +112,21 @@ impl Reparser<'_> { child_at_start = child.kind().is_at_start(child_at_start); } - let (start_idx, start_offset) = start.ok_or(())?; + let (start_idx, start_offset) = start?; + let mut end = None; - for (i, child) in (green.children_mut()[start_idx ..]).iter_mut().enumerate() { - let i = i + start_idx; - let child_span = - Span::new(self.replace_range.source, offset, offset + child.len()); + for (i, child) in green.children_mut().iter_mut().enumerate().skip(start_idx) { + let child_span = offset .. offset + child.len(); // Similarly to above, the end of the edit must be in the node but // if it is at the edge and we are in markup node, we also want its // neighbor! - if child_span.contains(self.replace_range.end) - && (mode != TokenMode::Markup - || self.replace_range.end != child_span.end - || i == last) + if child_span.contains(&self.replace_range.end) + || self.replace_range.end == child_span.end + && (mode != TokenMode::Markup || i == last) { - loop_result = Some(( - start_idx .. i + 1, - Span::new( - self.replace_range.source, - start_offset, - offset + child.len(), - ), - i == last && outermost, - child.kind().clone(), - )); + outermost &= i == last; + end = Some(i); break; } else if mode != TokenMode::Markup || !child.kind().post().markup_safe() { break; @@ -105,28 +135,30 @@ impl Reparser<'_> { offset += child.len(); } - let (child_idx_range, child_span, child_outermost, child_kind) = - loop_result.ok_or(())?; + let end = end?; + let child_idx_range = start_idx .. end + 1; + let child_span = start_offset .. offset + green.children()[end].len(); + let child_kind = green.children()[end].kind().clone(); if child_idx_range.len() == 1 { let idx = child_idx_range.start; let child = &mut green.children_mut()[idx]; + let prev_len = child.len(); - let old_len = child.len(); // First, we try if the child has another, more specific applicable child. if !child_kind.post().unsafe_interior() { - if let Ok(range) = match child { + if let Some(range) = match child { Green::Node(n) => self.reparse_step( Rc::make_mut(n), start_offset, kind.mode().unwrap_or(TokenMode::Code), - child_outermost, + outermost, ), - Green::Token(_) => Err(()), + Green::Token(_) => None, } { let new_len = child.len(); - green.update_child_len(new_len, old_len); - return Ok(range); + green.update_child_len(new_len, prev_len); + return Some(range); } } } @@ -134,28 +166,31 @@ impl Reparser<'_> { debug_assert_ne!(child_idx_range.len(), 0); if mode == TokenMode::Code && child_idx_range.len() > 1 { - return Err(()); + return None; } // We now have a child that we can replace and a function to do so. - let (func, policy) = - child_kind.reparsing_function(kind.mode().unwrap_or(TokenMode::Code)); - let func = func?; + let func = + child_kind.reparsing_function(kind.mode().unwrap_or(TokenMode::Code))?; + let policy = child_kind.post(); + + let len_change = self.replace_len as isize - self.replace_range.len() as isize; + let mut src_span = child_span; + src_span.end = (src_span.end as isize + len_change) as usize; - let src_span = inserted_span(child_span, self.replace_range, self.replace_len); let recompile_range = if policy == Postcondition::AtomicPrimary { src_span.start .. self.src.len() } else { - src_span.to_range() + src_span.clone() }; - let (mut new_children, unterminated) = - func(&self.src[recompile_range], child_at_start).ok_or(())?; + let (mut new_children, terminated) = + func(&self.src[recompile_range], child_at_start)?; // Do not accept unclosed nodes if the old node did not use to be at the // right edge of the tree. - if !child_outermost && unterminated { - return Err(()); + if !outermost && !terminated { + return None; } let insertion = match check_invariants( @@ -164,17 +199,17 @@ impl Reparser<'_> { child_idx_range.clone(), child_at_start, mode, - src_span, + src_span.clone(), policy, ) { - InvariantResult::Ok => Ok(new_children), - InvariantResult::UseFirst => Ok(vec![std::mem::take(&mut new_children[0])]), - InvariantResult::Error => Err(()), + InvariantResult::Ok => Some(new_children), + InvariantResult::UseFirst => Some(vec![std::mem::take(&mut new_children[0])]), + InvariantResult::Error => None, }?; green.replace_child_range(child_idx_range, insertion); - Ok(src_span.to_range()) + Some(src_span) } } @@ -191,7 +226,7 @@ fn check_invariants( child_idx_range: Range, child_at_start: bool, mode: TokenMode, - src_span: Span, + src_span: Range, policy: Postcondition, ) -> InvariantResult { let (new_children, ok) = if policy == Postcondition::AtomicPrimary { @@ -206,10 +241,7 @@ fn check_invariants( (use_children, InvariantResult::Ok) }; - let child_mode = old_children[child_idx_range.start] - .kind() - .mode() - .unwrap_or(TokenMode::Code); + let child_mode = old_children[child_idx_range.start].kind().mode().unwrap_or(mode); // Check if the children / child has the right type. let same_kind = match policy { @@ -249,25 +281,22 @@ fn check_invariants( } } - let mut new_at_start = child_at_start; + let mut post_at_start = child_at_start; for child in new_children { - new_at_start = child.kind().is_at_start(new_at_start); + post_at_start = child.kind().is_at_start(post_at_start); } for child in &old_children[child_idx_range.end ..] { if child.kind().is_trivia() { - new_at_start = child.kind().is_at_start(new_at_start); + post_at_start = child.kind().is_at_start(post_at_start); continue; } - match child.kind().pre() { - Precondition::AtStart if !new_at_start => { - return InvariantResult::Error; - } - Precondition::NotAtStart if new_at_start => { - return InvariantResult::Error; - } - _ => {} + let pre = child.kind().pre(); + if pre == Precondition::AtStart && !post_at_start + || pre == Precondition::NotAtStart && post_at_start + { + return InvariantResult::Error; } break; } @@ -276,56 +305,31 @@ fn check_invariants( ok } -/// Create a new span by specifying a span in which a modification happened -/// and how many characters are now in that span. -fn inserted_span(mut source: Span, other: Span, n: usize) -> Span { - if !source.surrounds(other) { - panic!(); - } - - let len_change = n as i64 - other.len() as i64; - source.end = (source.end as i64 + len_change) as usize; - source -} - impl NodeKind { /// Return the correct reparsing function given the postconditions for the /// type. fn reparsing_function( &self, parent_mode: TokenMode, - ) -> ( - Result Option<(Vec, bool)>, ()>, - Postcondition, - ) { + ) -> Option Option<(Vec, bool)>> { let policy = self.post(); let mode = self.mode().unwrap_or(parent_mode); match policy { - Postcondition::Unsafe | Postcondition::UnsafeLayer => (Err(()), policy), - Postcondition::AtomicPrimary if mode == TokenMode::Code => { - (Ok(parse_atomic), policy) - } - Postcondition::AtomicPrimary => (Ok(parse_atomic_markup), policy), - Postcondition::SameKind(x) if x == None || x == Some(mode) => { - let parser: fn(&str, bool) -> _ = match self { - NodeKind::Template => parse_template, - NodeKind::Block => parse_block, - NodeKind::LineComment | NodeKind::BlockComment => parse_comment, - _ => return (Err(()), policy), - }; - - (Ok(parser), policy) - } - _ => { - let parser: fn(&str, bool) -> _ = match mode { - TokenMode::Markup if self == &Self::Markup => parse_markup, - TokenMode::Markup => parse_markup_elements, - _ => return (Err(()), policy), - }; - - (Ok(parser), policy) - } + Postcondition::Unsafe | Postcondition::UnsafeLayer => None, + Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic), + Postcondition::AtomicPrimary => Some(parse_atomic_markup), + Postcondition::SameKind(x) if x == None || x == Some(mode) => match self { + NodeKind::Template => Some(parse_template), + NodeKind::Block => Some(parse_block), + NodeKind::LineComment | NodeKind::BlockComment => Some(parse_comment), + _ => None, + }, + _ => match mode { + TokenMode::Markup if self == &Self::Markup => Some(parse_markup), + TokenMode::Markup => Some(parse_markup_elements), + _ => return None, + }, } } @@ -369,10 +373,6 @@ impl NodeKind { | Self::Dots | Self::Arrow => Postcondition::Unsafe, - // These keywords are literals and can be safely be substituted with - // other expressions. - Self::None | Self::Auto => Postcondition::AtomicPrimary, - // These keywords change what kind of expression the parent is and // how far the expression would go. Self::Let @@ -389,45 +389,14 @@ impl NodeKind { | Self::Include | Self::From => Postcondition::Unsafe, - Self::Markup => Postcondition::SameKind(None), - - Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)), - - // These are all replaceable by other tokens. - Self::Parbreak - | Self::Linebreak - | Self::Text(_) - | Self::TextInLine(_) - | Self::NonBreakingSpace - | Self::EnDash - | Self::EmDash - | Self::Escape(_) - | Self::Strong - | Self::Emph - | Self::Heading - | Self::Enum - | Self::List - | Self::Raw(_) - | Self::Math(_) => Postcondition::Safe, - // Changing the heading level, enum numbering, or list bullet // changes the next layer. Self::EnumNumbering(_) => Postcondition::Unsafe, - // These are expressions that can be replaced by other expressions. - Self::Ident(_) - | Self::Bool(_) - | Self::Int(_) - | Self::Float(_) - | Self::Length(_, _) - | Self::Angle(_, _) - | Self::Percentage(_) - | Self::Str(_) - | Self::Fraction(_) - | Self::Array - | Self::Dict - | Self::Group => Postcondition::AtomicPrimary, + Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, + // These are complex expressions which may screw with their + // environments. Self::Call | Self::Unary | Self::Binary @@ -439,11 +408,44 @@ impl NodeKind { // is not atomic. Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer, + // Missing these creates errors for the parents. + Self::WithExpr | Self::ForPattern | Self::ImportItems => { + Postcondition::UnsafeLayer + } + + // Only markup is expected at the points where it does occur. + Self::Markup => Postcondition::SameKind(None), + + // These can appear everywhere and must not change to other stuff + // because that could change the outer expression. + Self::LineComment | Self::BlockComment => Postcondition::SameKind(None), + // These can appear as bodies and would trigger an error if they // became something else. Self::Template => Postcondition::SameKind(None), Self::Block => Postcondition::SameKind(Some(TokenMode::Code)), + // Whitespace in code mode has to remain whitespace or else the type + // of things would change. + Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)), + + // These are expressions that can be replaced by other expressions. + Self::Ident(_) + | Self::Bool(_) + | Self::Int(_) + | Self::Float(_) + | Self::Length(_, _) + | Self::Angle(_, _) + | Self::Percentage(_) + | Self::Str(_) + | Self::Fraction(_) + | Self::Array + | Self::Dict + | Self::Group + | Self::None + | Self::Auto => Postcondition::AtomicPrimary, + + // More complex, but still an expression. Self::ForExpr | Self::WhileExpr | Self::IfExpr @@ -452,15 +454,22 @@ impl NodeKind { | Self::ImportExpr | Self::IncludeExpr => Postcondition::AtomicPrimary, - Self::WithExpr | Self::ForPattern | Self::ImportItems => { - Postcondition::UnsafeLayer - } - - // These can appear everywhere and must not change to other stuff - // because that could change the outer expression. - Self::LineComment | Self::BlockComment => Postcondition::SameKind(None), - - Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, + // These are all replaceable by other tokens. + Self::Parbreak + | Self::Linebreak + | Self::Text(_) + | Self::TextInLine(_) + | Self::NonBreakingSpace + | Self::EnDash + | Self::EmDash + | Self::Escape(_) + | Self::Strong + | Self::Emph + | Self::Heading + | Self::Enum + | Self::List + | Self::Raw(_) + | Self::Math(_) => Postcondition::Safe, } } @@ -475,50 +484,6 @@ impl NodeKind { } } -/// The conditions that a node has to fulfill in order to be replaced. -/// -/// This can dictate if a node can be replaced at all and if yes, what can take -/// its place. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Postcondition { - /// Changing this node can never have an influence on the other nodes. - Safe, - /// This node has to be replaced with a single token of the same kind. - SameKind(Option), - /// Changing this node into a single atomic expression is allowed if it - /// appears in code mode, otherwise it is safe. - AtomicPrimary, - /// Changing an unsafe layer node changes what the parents or the - /// surrounding nodes would be and is therefore disallowed. Change the - /// parents or children instead. If it appears in Markup, however, it is - /// safe to change. - UnsafeLayer, - /// Changing an unsafe node or any of its children will trigger undefined - /// behavior. Change the parents instead. - Unsafe, -} - -/// The conditions under which a node can be inserted or remain in a tree. -/// -/// These conditions all search the neighbors of the node and see if its -/// existence is plausible with them present. This can be used to encode some -/// context-free language components for incremental parsing. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Precondition { - /// These nodes depend on being at the start of a line. Reparsing of safe - /// left neighbors has to check this invariant. Otherwise, this node is - /// safe. - AtStart, - /// These nodes depend on not being at the start of a line. Reparsing of - /// safe left neighbors has to check this invariant. Otherwise, this node is - /// safe. - NotAtStart, - /// These nodes must be followed by whitespace. - RightWhitespace, - /// No additional requirements. - None, -} - impl Postcondition { pub fn unsafe_interior(&self) -> bool { match self { @@ -544,6 +509,7 @@ mod tests { use super::*; #[test] + #[rustfmt::skip] fn test_incremental_parse() { #[track_caller] fn test(prev: &str, range: Range, with: &str, incr: Range) { @@ -551,12 +517,14 @@ mod tests { let range = source.edit(range, with); assert_eq!(range, incr); - let incr_tree = source.root(); + let incr_tree = source.root().clone(); assert_eq!(parse(source.src()), incr_tree); } // Test simple replacements. - test("hello world", 6 .. 11, "wankers", 5 .. 13); + test("hello world", 6 .. 11, "walkers", 5 .. 13); + test("some content", 0..12, "", 0..0); + test("", 0..0, "do it", 0..5); test("a d e", 1 .. 3, " b c d", 0 .. 8); test("a #f() e", 1 .. 6, " b c d", 0 .. 8); test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9); @@ -564,53 +532,18 @@ mod tests { test("your thing", 5 .. 5, "a", 4 .. 11); test("a your thing a", 6 .. 7, "a", 2 .. 12); test("{call(); abc}", 7 .. 7, "[]", 0 .. 15); - test("#call() abc", 7 .. 7, "[]", 0 .. 13); - // test( - // "hi\n- item\n- item 2\n - item 3", - // 10 .. 10, - // " ", - // 9 .. 33, - // ); - test( - "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", - 16 .. 20, - "none", - 16 .. 20, - ); - test( - "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", - 33 .. 42, - "[_gronk_]", - 33 .. 42, - ); - test( - "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", - 34 .. 41, - "_bar_", - 34 .. 39, - ); + test("#call() abc", 7 .. 7, "[]", 0 .. 10); + // test("hi\n- item\n- item 2\n - item 3", 10 .. 10, " ", 9 .. 33); + test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 16 .. 20); + test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33 .. 42); + test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 34 .. 39); test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 1 .. 9); - test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 13 .. 15); + test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 10 .. 32); test("hello {x}", 6 .. 9, "#f()", 5 .. 10); - test( - "this is -- in my opinion -- spectacular", - 8 .. 10, - "---", - 7 .. 12, - ); - test( - "understanding `code` is complicated", - 15 .. 15, - "C ", - 14 .. 22, - ); + test("this is -- in my opinion -- spectacular", 8 .. 10, "---", 7 .. 12); + test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22); test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17); - test( - "a #let rect with (fill: eastern)\nb", - 16 .. 31, - " (stroke: conifer", - 2 .. 34, - ); + test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34); // Test the whitespace invariants. test("hello \\ world", 7 .. 8, "a ", 6 .. 14); @@ -642,18 +575,8 @@ mod tests { test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", 0 .. 24); test("a b c", 1 .. 1, " /* letters */", 0 .. 16); test("a b c", 1 .. 1, " /* letters", 0 .. 16); - test( - "{if i==1 {a} else [b]; b()}", - 12 .. 12, - " /* letters */", - 1 .. 35, - ); - test( - "{if i==1 {a} else [b]; b()}", - 12 .. 12, - " /* letters", - 0 .. 38, - ); + test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35); + test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38); test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18); -- cgit v1.2.3 From 12f7335ac365759a8c6bc942ef6830c23a4176fc Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sun, 28 Nov 2021 22:32:20 +0100 Subject: Clarity and bugfix Fixes a bug where validation would wrongly reject an atomic primary reparse due to trailing whitespace. Co-Authored-By: Martin --- src/parse/incremental.rs | 287 ++++++++++++++++++++++------------------------- 1 file changed, 135 insertions(+), 152 deletions(-) (limited to 'src/parse/incremental.rs') diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 8e52c143..cc100a4c 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -85,13 +85,14 @@ impl Reparser<'_> { parent_mode: TokenMode, mut outermost: bool, ) -> Option> { - let kind = green.kind().clone(); - let mode = kind.mode().unwrap_or(parent_mode); + let mode = green.kind().mode().unwrap_or(parent_mode); + let child_mode = green.kind().mode().unwrap_or(TokenMode::Code); + let child_count = green.children().len(); - let mut child_at_start = true; - let last = green.children().len().saturating_sub(1); - let mut start = None; + let mut first = None; + let mut at_start = true; + // Find the the first child in the range of children to reparse. for (i, child) in green.children_mut().iter_mut().enumerate() { let child_span = offset .. offset + child.len(); @@ -104,18 +105,19 @@ impl Reparser<'_> { || (mode == TokenMode::Markup && self.replace_range.start == child_span.end) { - start = Some((i, offset)); + first = Some((i, offset)); break; } offset += child.len(); - child_at_start = child.kind().is_at_start(child_at_start); + at_start = child.kind().is_at_start(at_start); } - let (start_idx, start_offset) = start?; - let mut end = None; + let (first_idx, first_start) = first?; + let mut last = None; - for (i, child) in green.children_mut().iter_mut().enumerate().skip(start_idx) { + // Find the the last child in the range of children to reparse. + for (i, child) in green.children_mut().iter_mut().enumerate().skip(first_idx) { let child_span = offset .. offset + child.len(); // Similarly to above, the end of the edit must be in the node but @@ -123,35 +125,35 @@ impl Reparser<'_> { // neighbor! if child_span.contains(&self.replace_range.end) || self.replace_range.end == child_span.end - && (mode != TokenMode::Markup || i == last) + && (mode != TokenMode::Markup || i + 1 == child_count) { - outermost &= i == last; - end = Some(i); + outermost &= i + 1 == child_count; + last = Some((i, offset + child.len())); break; - } else if mode != TokenMode::Markup || !child.kind().post().markup_safe() { + } else if mode != TokenMode::Markup || !child.kind().post().safe_in_markup() { break; } offset += child.len(); } - let end = end?; - let child_idx_range = start_idx .. end + 1; - let child_span = start_offset .. offset + green.children()[end].len(); - let child_kind = green.children()[end].kind().clone(); + let (last_idx, last_end) = last?; + let children_range = first_idx .. last_idx + 1; + let children_span = first_start .. last_end; + let last_kind = green.children()[last_idx].kind().clone(); - if child_idx_range.len() == 1 { - let idx = child_idx_range.start; - let child = &mut green.children_mut()[idx]; + // First, we try if the child itself has another, more specific + // applicable child. + if children_range.len() == 1 { + let child = &mut green.children_mut()[children_range.start]; let prev_len = child.len(); - // First, we try if the child has another, more specific applicable child. - if !child_kind.post().unsafe_interior() { + if last_kind.post() != Postcondition::Unsafe { if let Some(range) = match child { - Green::Node(n) => self.reparse_step( - Rc::make_mut(n), - start_offset, - kind.mode().unwrap_or(TokenMode::Code), + Green::Node(node) => self.reparse_step( + Rc::make_mut(node), + first_start, + child_mode, outermost, ), Green::Token(_) => None, @@ -163,159 +165,147 @@ impl Reparser<'_> { } } - debug_assert_ne!(child_idx_range.len(), 0); - - if mode == TokenMode::Code && child_idx_range.len() > 1 { + // We only replace multiple children in markup mode. + if children_range.len() > 1 && mode == TokenMode::Code { return None; } // We now have a child that we can replace and a function to do so. - let func = - child_kind.reparsing_function(kind.mode().unwrap_or(TokenMode::Code))?; - let policy = child_kind.post(); + let func = last_kind.reparsing_func(child_mode)?; + let post = last_kind.post(); - let len_change = self.replace_len as isize - self.replace_range.len() as isize; - let mut src_span = child_span; - src_span.end = (src_span.end as isize + len_change) as usize; + // The span of the to-be-reparsed children in the new source. + let replace_span = children_span.start + .. children_span.end + self.replace_len - self.replace_range.len(); - let recompile_range = if policy == Postcondition::AtomicPrimary { - src_span.start .. self.src.len() + // For atomic primaries we need to pass in the whole remaining string to + // check whether the parser would eat more stuff illicitly. + let reparse_span = if post == Postcondition::AtomicPrimary { + replace_span.start .. self.src.len() } else { - src_span.clone() + replace_span.clone() }; - let (mut new_children, terminated) = - func(&self.src[recompile_range], child_at_start)?; + // Do the reparsing! + let (mut newborns, terminated) = func(&self.src[reparse_span], at_start)?; + + // Make sure that atomic primaries ate only what they were supposed to. + if post == Postcondition::AtomicPrimary { + let len = replace_span.len(); + if newborns.len() > 1 && newborns[0].len() == len { + newborns.truncate(1); + } else if newborns.iter().map(Green::len).sum::() != len { + return None; + } + } - // Do not accept unclosed nodes if the old node did not use to be at the - // right edge of the tree. + // Do not accept unclosed nodes if the old node wasn't at the right edge + // of the tree. if !outermost && !terminated { return None; } - let insertion = match check_invariants( - &new_children, + // If all post- and preconditions match, we are good to go! + if validate( green.children(), - child_idx_range.clone(), - child_at_start, + children_range.clone(), + at_start, + &newborns, mode, - src_span.clone(), - policy, + post, ) { - InvariantResult::Ok => Some(new_children), - InvariantResult::UseFirst => Some(vec![std::mem::take(&mut new_children[0])]), - InvariantResult::Error => None, - }?; - - green.replace_child_range(child_idx_range, insertion); - - Some(src_span) + green.replace_child_range(children_range, newborns); + Some(replace_span) + } else { + None + } } } -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -enum InvariantResult { - Ok, - UseFirst, - Error, -} - -fn check_invariants( - use_children: &[Green], - old_children: &[Green], - child_idx_range: Range, - child_at_start: bool, +/// Validate that a node replacement is allowed by post- and preconditions. +fn validate( + prev_children: &[Green], + children_range: Range, + mut at_start: bool, + newborns: &[Green], mode: TokenMode, - src_span: Range, - policy: Postcondition, -) -> InvariantResult { - let (new_children, ok) = if policy == Postcondition::AtomicPrimary { - if use_children.iter().map(Green::len).sum::() == src_span.len() { - (use_children, InvariantResult::Ok) - } else if use_children.len() == 1 && use_children[0].len() == src_span.len() { - (&use_children[0 .. 1], InvariantResult::UseFirst) - } else { - return InvariantResult::Error; - } - } else { - (use_children, InvariantResult::Ok) - }; - - let child_mode = old_children[child_idx_range.start].kind().mode().unwrap_or(mode); - - // Check if the children / child has the right type. - let same_kind = match policy { - Postcondition::SameKind(x) => x.map_or(true, |x| x == child_mode), - _ => false, - }; - - if same_kind || policy == Postcondition::AtomicPrimary { - if new_children.len() != 1 { - return InvariantResult::Error; - } + post: Postcondition, +) -> bool { + // Atomic primaries must only generate one new child. + if post == Postcondition::AtomicPrimary && newborns.len() != 1 { + return false; + } - if same_kind { - if old_children[child_idx_range.start].kind() != new_children[0].kind() { - return InvariantResult::Error; - } + // Same kind in mode `inside` must generate only one child and that child + // must be of the same kind as previously. + if let Postcondition::SameKind(inside) = post { + let prev_kind = prev_children[children_range.start].kind(); + let prev_mode = prev_kind.mode().unwrap_or(mode); + if inside.map_or(true, |m| m == prev_mode) + && (newborns.len() != 1 || prev_kind != newborns[0].kind()) + { + return false; } } - // Check if the neighbor invariants are still true. - if mode == TokenMode::Markup { - if child_idx_range.start > 0 { - if old_children[child_idx_range.start - 1].kind().pre() - == Precondition::RightWhitespace - && !new_children[0].kind().is_whitespace() - { - return InvariantResult::Error; - } - } + // Neighbor invariants are only relevant in markup mode. + if mode == TokenMode::Code { + return true; + } - if new_children.last().map(|x| x.kind().pre()) - == Some(Precondition::RightWhitespace) - && old_children.len() > child_idx_range.end - { - if !old_children[child_idx_range.end].kind().is_whitespace() { - return InvariantResult::Error; - } - } + // Ensure that a possible right-whitespace precondition of a node before the + // replacement range is satisfied. + if children_range.start > 0 + && prev_children[children_range.start - 1].kind().pre() + == Precondition::RightWhitespace + && !newborns[0].kind().is_whitespace() + { + return false; + } - let mut post_at_start = child_at_start; - for child in new_children { - post_at_start = child.kind().is_at_start(post_at_start); - } + // Ensure that a possible right-whitespace precondition of a new node at the + // end of the replacement range is satisfied. + if newborns.last().map(|x| x.kind().pre()) == Some(Precondition::RightWhitespace) + && children_range.end < prev_children.len() + && !prev_children[children_range.end].kind().is_whitespace() + { + return false; + } - for child in &old_children[child_idx_range.end ..] { - if child.kind().is_trivia() { - post_at_start = child.kind().is_at_start(post_at_start); - continue; - } + // Compute the at_start state behind the new children. + for child in newborns { + at_start = child.kind().is_at_start(at_start); + } + // Ensure that a possible at-start or not-at-start precondition of + // a node after the replacement range is satisfied. + for child in &prev_children[children_range.end ..] { + if !child.kind().is_trivia() { let pre = child.kind().pre(); - if pre == Precondition::AtStart && !post_at_start - || pre == Precondition::NotAtStart && post_at_start + if (pre == Precondition::AtStart && !at_start) + || (pre == Precondition::NotAtStart && at_start) { - return InvariantResult::Error; + return false; } + break; } + + at_start = child.kind().is_at_start(at_start); } - ok + true } impl NodeKind { /// Return the correct reparsing function given the postconditions for the /// type. - fn reparsing_function( + fn reparsing_func( &self, parent_mode: TokenMode, ) -> Option Option<(Vec, bool)>> { - let policy = self.post(); let mode = self.mode().unwrap_or(parent_mode); - - match policy { + match self.post() { Postcondition::Unsafe | Postcondition::UnsafeLayer => None, Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic), Postcondition::AtomicPrimary => Some(parse_atomic_markup), @@ -393,6 +383,7 @@ impl NodeKind { // changes the next layer. Self::EnumNumbering(_) => Postcondition::Unsafe, + // This can be anything, so we don't make any promises. Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, // These are complex expressions which may screw with their @@ -485,17 +476,11 @@ impl NodeKind { } impl Postcondition { - pub fn unsafe_interior(&self) -> bool { - match self { - Self::Unsafe => true, - _ => false, - } - } - - pub fn markup_safe(&self) -> bool { + /// Whether a node with this condition can be reparsed in markup mode. + pub fn safe_in_markup(&self) -> bool { match self { Self::Safe | Self::UnsafeLayer => true, - Self::SameKind(tm) => tm.map_or(false, |tm| tm != TokenMode::Markup), + Self::SameKind(mode) => mode.map_or(false, |m| m != TokenMode::Markup), _ => false, } } @@ -503,22 +488,19 @@ impl Postcondition { #[cfg(test)] mod tests { + use super::*; use crate::parse::parse; use crate::source::SourceFile; - use super::*; - #[test] #[rustfmt::skip] fn test_incremental_parse() { #[track_caller] - fn test(prev: &str, range: Range, with: &str, incr: Range) { + fn test(prev: &str, range: Range, with: &str, goal: Range) { let mut source = SourceFile::detached(prev); let range = source.edit(range, with); - assert_eq!(range, incr); - - let incr_tree = source.root().clone(); - assert_eq!(parse(source.src()), incr_tree); + assert_eq!(range, goal); + assert_eq!(parse(source.src()), *source.root()); } // Test simple replacements. @@ -542,7 +524,7 @@ mod tests { test("hello {x}", 6 .. 9, "#f()", 5 .. 10); test("this is -- in my opinion -- spectacular", 8 .. 10, "---", 7 .. 12); test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22); - test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17); + test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15); test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34); // Test the whitespace invariants. @@ -578,6 +560,7 @@ mod tests { test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35); test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38); + // Test raw tokens. test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18); } -- cgit v1.2.3 From 289122e83c085668e56e52225c2dcfd9417d6262 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 29 Nov 2021 12:06:41 +0100 Subject: Deal with offside rule and remove RightWhitespace --- src/parse/incremental.rs | 92 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 70 insertions(+), 22 deletions(-) (limited to 'src/parse/incremental.rs') diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index cc100a4c..0e2d196c 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -5,7 +5,7 @@ use crate::syntax::{Green, GreenNode, NodeKind}; use super::{ parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup, - parse_markup_elements, parse_template, TokenMode, + parse_markup_elements, parse_template, Scanner, TokenMode, }; /// The conditions that a node has to fulfill in order to be replaced. @@ -40,14 +40,13 @@ pub enum Postcondition { pub enum Precondition { /// These nodes depend on being at the start of a line. Reparsing of safe /// left neighbors has to check this invariant. Otherwise, this node is - /// safe. + /// safe. Additionally, the indentation of the first right non-trivia, + /// non-whitespace sibling must not be greater than the current indentation. AtStart, /// These nodes depend on not being at the start of a line. Reparsing of /// safe left neighbors has to check this invariant. Otherwise, this node is /// safe. NotAtStart, - /// These nodes must be followed by whitespace. - RightWhitespace, /// No additional requirements. None, } @@ -213,6 +212,8 @@ impl Reparser<'_> { &newborns, mode, post, + replace_span.clone(), + self.src, ) { green.replace_child_range(children_range, newborns); Some(replace_span) @@ -230,6 +231,8 @@ fn validate( newborns: &[Green], mode: TokenMode, post: Postcondition, + replace_span: Range, + src: &str, ) -> bool { // Atomic primaries must only generate one new child. if post == Postcondition::AtomicPrimary && newborns.len() != 1 { @@ -253,23 +256,37 @@ fn validate( return true; } - // Ensure that a possible right-whitespace precondition of a node before the - // replacement range is satisfied. - if children_range.start > 0 - && prev_children[children_range.start - 1].kind().pre() - == Precondition::RightWhitespace - && !newborns[0].kind().is_whitespace() - { - return false; - } + // Check if there are any `AtStart` predecessors which require a certain + // indentation. + let s = Scanner::new(src); + let mut prev_pos = replace_span.start; + for child in (&prev_children[.. children_range.start]).iter().rev() { + prev_pos -= child.len(); + if !child.kind().is_trivia() { + if child.kind().pre() == Precondition::AtStart { + let left_col = s.column(prev_pos); + + // Search for the first non-trivia newborn. + let mut new_pos = replace_span.start; + let mut child_col = None; + for child in newborns { + if !child.kind().is_trivia() { + child_col = Some(s.column(new_pos)); + break; + } + + new_pos += child.len(); + } - // Ensure that a possible right-whitespace precondition of a new node at the - // end of the replacement range is satisfied. - if newborns.last().map(|x| x.kind().pre()) == Some(Precondition::RightWhitespace) - && children_range.end < prev_children.len() - && !prev_children[children_range.end].kind().is_whitespace() - { - return false; + if let Some(child_col) = child_col { + if child_col > left_col { + return false; + } + } + } + + break; + } } // Compute the at_start state behind the new children. @@ -294,6 +311,37 @@ fn validate( at_start = child.kind().is_at_start(at_start); } + // We have to check whether the last non-trivia newborn is `AtStart` and + // verify the indent of its right neighbors in order to make sure its + // indentation requirements are fulfilled. + let mut child_pos = replace_span.end; + let mut child_col = None; + for child in newborns.iter().rev() { + child_pos -= child.len(); + + if !child.kind().is_trivia() { + if child.kind().pre() == Precondition::AtStart { + child_col = Some(s.column(child_pos)); + } + break; + } + } + + if let Some(child_col) = child_col { + let mut right_pos = replace_span.end; + for child in &prev_children[children_range.end ..] { + if !child.kind().is_trivia() { + if s.column(right_pos) > child_col { + return false; + } + + break; + } + + right_pos += child.len(); + } + } + true } @@ -469,7 +517,6 @@ impl NodeKind { match self { Self::Heading | Self::Enum | Self::List => Precondition::AtStart, Self::TextInLine(_) => Precondition::NotAtStart, - Self::Linebreak => Precondition::RightWhitespace, _ => Precondition::None, } } @@ -515,7 +562,8 @@ mod tests { test("a your thing a", 6 .. 7, "a", 2 .. 12); test("{call(); abc}", 7 .. 7, "[]", 0 .. 15); test("#call() abc", 7 .. 7, "[]", 0 .. 10); - // test("hi\n- item\n- item 2\n - item 3", 10 .. 10, " ", 9 .. 33); + test("hi[\n- item\n- item 2\n - item 3]", 11 .. 11, " ", 2 .. 35); + test("hi\n- item\nno item\n - item 3", 10 .. 10, "- ", 0 .. 32); test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 16 .. 20); test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33 .. 42); test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 34 .. 39); -- cgit v1.2.3 From 5f114e18eb76a1937941b2ea64842b908c9ad89e Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 2 Jan 2022 00:46:19 +0100 Subject: Added a test framework for incremental parsing Fix several errors: - Indented markup is now reparsed right - All end group errors will now fail a reparse - Rightmost errors will always fail a reparse --- src/parse/incremental.rs | 69 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 14 deletions(-) (limited to 'src/parse/incremental.rs') diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 0e2d196c..1ee37a51 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -47,6 +47,10 @@ pub enum Precondition { /// safe left neighbors has to check this invariant. Otherwise, this node is /// safe. NotAtStart, + /// These nodes could end up somewhere else up the tree if the parse was + /// happening from scratch. The parse result has to be checked for such + /// nodes. They are safe to add if followed up by other nodes. + NotAtEnd, /// No additional requirements. None, } @@ -88,6 +92,12 @@ impl Reparser<'_> { let child_mode = green.kind().mode().unwrap_or(TokenMode::Code); let child_count = green.children().len(); + // Save the current indent if this is a markup node. + let indent = match green.kind() { + NodeKind::Markup(n) => *n, + _ => 0, + }; + let mut first = None; let mut at_start = true; @@ -170,12 +180,29 @@ impl Reparser<'_> { } // We now have a child that we can replace and a function to do so. - let func = last_kind.reparsing_func(child_mode)?; + let func = last_kind.reparsing_func(child_mode, indent)?; let post = last_kind.post(); + let mut column = if mode == TokenMode::Markup { + // In this case, we want to pass the indentation to the function. + Scanner::new(self.src).column(children_span.start) + } else { + 0 + }; + + // If this is a markup node, we want to save its indent instead to pass + // the right indent argument. + if children_range.len() == 1 { + let child = &mut green.children_mut()[children_range.start]; + if let NodeKind::Markup(n) = child.kind() { + column = *n; + } + } + // The span of the to-be-reparsed children in the new source. let replace_span = children_span.start - .. children_span.end + self.replace_len - self.replace_range.len(); + .. + children_span.end + self.replace_len - self.replace_range.len(); // For atomic primaries we need to pass in the whole remaining string to // check whether the parser would eat more stuff illicitly. @@ -186,7 +213,7 @@ impl Reparser<'_> { }; // Do the reparsing! - let (mut newborns, terminated) = func(&self.src[reparse_span], at_start)?; + let (mut newborns, terminated) = func(&self.src[reparse_span], at_start, column)?; // Make sure that atomic primaries ate only what they were supposed to. if post == Postcondition::AtomicPrimary { @@ -311,6 +338,14 @@ fn validate( at_start = child.kind().is_at_start(at_start); } + // Verify that the last of the newborns is not `NotAtEnd`. + if newborns + .last() + .map_or(false, |child| child.kind().pre() == Precondition::NotAtEnd) + { + return false; + } + // We have to check whether the last non-trivia newborn is `AtStart` and // verify the indent of its right neighbors in order to make sure its // indentation requirements are fulfilled. @@ -351,21 +386,22 @@ impl NodeKind { fn reparsing_func( &self, parent_mode: TokenMode, - ) -> Option Option<(Vec, bool)>> { + indent: usize, + ) -> Option Option<(Vec, bool)>> { let mode = self.mode().unwrap_or(parent_mode); match self.post() { Postcondition::Unsafe | Postcondition::UnsafeLayer => None, Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic), Postcondition::AtomicPrimary => Some(parse_atomic_markup), Postcondition::SameKind(x) if x == None || x == Some(mode) => match self { + NodeKind::Markup(_) => Some(parse_markup), NodeKind::Template => Some(parse_template), NodeKind::Block => Some(parse_block), NodeKind::LineComment | NodeKind::BlockComment => Some(parse_comment), _ => None, }, _ => match mode { - TokenMode::Markup if self == &Self::Markup => Some(parse_markup), - TokenMode::Markup => Some(parse_markup_elements), + TokenMode::Markup if indent == 0 => Some(parse_markup_elements), _ => return None, }, } @@ -452,8 +488,9 @@ impl NodeKind { Postcondition::UnsafeLayer } - // Only markup is expected at the points where it does occur. - Self::Markup => Postcondition::SameKind(None), + // Only markup is expected at the points where it does occur. The + // indentation must be preserved as well, also for the children. + Self::Markup(_) => Postcondition::SameKind(None), // These can appear everywhere and must not change to other stuff // because that could change the outer expression. @@ -493,6 +530,10 @@ impl NodeKind { | Self::ImportExpr | Self::IncludeExpr => Postcondition::AtomicPrimary, + // This element always has to remain in the same column so better + // reparse the whole parent. + Self::Raw(_) => Postcondition::Unsafe, + // These are all replaceable by other tokens. Self::Parbreak | Self::Linebreak @@ -507,7 +548,6 @@ impl NodeKind { | Self::Heading | Self::Enum | Self::List - | Self::Raw(_) | Self::Math(_) => Postcondition::Safe, } } @@ -517,6 +557,7 @@ impl NodeKind { match self { Self::Heading | Self::Enum | Self::List => Precondition::AtStart, Self::TextInLine(_) => Precondition::NotAtStart, + Self::Error(_, _) => Precondition::NotAtEnd, _ => Precondition::None, } } @@ -557,12 +598,12 @@ mod tests { test("a d e", 1 .. 3, " b c d", 0 .. 8); test("a #f() e", 1 .. 6, " b c d", 0 .. 8); test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9); - test("= A heading", 3 .. 3, "n evocative", 2 .. 15); + test("= A heading", 3 .. 3, "n evocative", 2 .. 22); test("your thing", 5 .. 5, "a", 4 .. 11); test("a your thing a", 6 .. 7, "a", 2 .. 12); test("{call(); abc}", 7 .. 7, "[]", 0 .. 15); test("#call() abc", 7 .. 7, "[]", 0 .. 10); - test("hi[\n- item\n- item 2\n - item 3]", 11 .. 11, " ", 2 .. 35); + test("hi[\n- item\n- item 2\n - item 3]", 11 .. 11, " ", 3 .. 34); test("hi\n- item\nno item\n - item 3", 10 .. 10, "- ", 0 .. 32); test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 16 .. 20); test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33 .. 42); @@ -571,7 +612,7 @@ mod tests { test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 10 .. 32); test("hello {x}", 6 .. 9, "#f()", 5 .. 10); test("this is -- in my opinion -- spectacular", 8 .. 10, "---", 7 .. 12); - test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22); + test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 37); test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15); test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34); @@ -596,7 +637,7 @@ mod tests { test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13); test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26); test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14); - test("a b c", 1 .. 1, "{[}", 0 .. 5); + test("a b c", 1 .. 1, "{[}", 0 .. 8); // Test unclosed things. test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6); @@ -610,6 +651,6 @@ mod tests { // Test raw tokens. test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); - test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18); + test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18); } } -- cgit v1.2.3 From 98c96ba1cb8a46e327de313118e4ce1a84795ae9 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 2 Jan 2022 14:46:08 +0100 Subject: Fix parser / space / error bug --- src/parse/incremental.rs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/parse/incremental.rs') diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 1ee37a51..5cb016d2 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -623,6 +623,7 @@ mod tests { test("x = y", 1 .. 1, " + y\n", 0 .. 10); test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21); test("abc\n= a heading\njoke", 3 .. 4, "\nnot ", 0 .. 19); + test("#let x = (1, 2 + ; Five\r\n\r", 19..22, "2.", 18..22); test("hey #myfriend", 4 .. 4, "\\", 0 .. 14); test("hey #myfriend", 4 .. 4, "\\", 3 .. 6); -- cgit v1.2.3 From c994cfa7d814e3909682b19322867ed5c676c453 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 3 Jan 2022 23:18:21 +0100 Subject: Code Review: Your parsers were so preoccupied with whether they could --- src/parse/incremental.rs | 255 +++++++++++++++++++++++++---------------------- 1 file changed, 135 insertions(+), 120 deletions(-) (limited to 'src/parse/incremental.rs') diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 5cb016d2..4c82f158 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -4,8 +4,8 @@ use std::rc::Rc; use crate::syntax::{Green, GreenNode, NodeKind}; use super::{ - parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup, - parse_markup_elements, parse_template, Scanner, TokenMode, + is_newline, parse, parse_atomic, parse_atomic_markup, parse_block, parse_comment, + parse_markup, parse_markup_elements, parse_template, Scanner, TokenMode, }; /// The conditions that a node has to fulfill in order to be replaced. @@ -13,21 +13,21 @@ use super::{ /// This can dictate if a node can be replaced at all and if yes, what can take /// its place. #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Postcondition { +pub enum SuccessionRule { /// Changing this node can never have an influence on the other nodes. Safe, /// This node has to be replaced with a single token of the same kind. SameKind(Option), - /// Changing this node into a single atomic expression is allowed if it - /// appears in code mode, otherwise it is safe. + /// In code mode, this node can only be changed into a single atomic + /// expression, otherwise it is safe. AtomicPrimary, - /// Changing an unsafe layer node changes what the parents or the - /// surrounding nodes would be and is therefore disallowed. Change the + /// Changing an unsafe layer node in code mode changes what the parents or + /// the surrounding nodes would be and is therefore disallowed. Change the /// parents or children instead. If it appears in Markup, however, it is /// safe to change. UnsafeLayer, - /// Changing an unsafe node or any of its children will trigger undefined - /// behavior. Change the parents instead. + /// Changing an unsafe node or any of its children is not allowed. Change + /// the parents instead. Unsafe, } @@ -37,11 +37,12 @@ pub enum Postcondition { /// existence is plausible with them present. This can be used to encode some /// context-free language components for incremental parsing. #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Precondition { +pub enum NeighbourRule { /// These nodes depend on being at the start of a line. Reparsing of safe - /// left neighbors has to check this invariant. Otherwise, this node is - /// safe. Additionally, the indentation of the first right non-trivia, - /// non-whitespace sibling must not be greater than the current indentation. + /// left neighbors has to check this invariant. Additionally, when + /// exchanging the right sibling or inserting such a node the indentation of + /// the first right non-trivia, non-whitespace sibling must not be greater + /// than the current indentation. AtStart, /// These nodes depend on not being at the start of a line. Reparsing of /// safe left neighbors has to check this invariant. Otherwise, this node is @@ -77,8 +78,12 @@ impl<'a> Reparser<'a> { impl Reparser<'_> { /// Find the innermost child that is incremental safe. - pub fn reparse(&self, green: &mut GreenNode) -> Option> { - self.reparse_step(green, 0, TokenMode::Markup, true) + pub fn reparse(&self, green: &mut Rc) -> Range { + self.reparse_step(Rc::make_mut(green), 0, TokenMode::Markup, true) + .unwrap_or_else(|| { + *green = parse(self.src); + 0 .. self.src.len() + }) } fn reparse_step( @@ -90,7 +95,7 @@ impl Reparser<'_> { ) -> Option> { let mode = green.kind().mode().unwrap_or(parent_mode); let child_mode = green.kind().mode().unwrap_or(TokenMode::Code); - let child_count = green.children().len(); + let original_count = green.children().len(); // Save the current indent if this is a markup node. let indent = match green.kind() { @@ -134,12 +139,14 @@ impl Reparser<'_> { // neighbor! if child_span.contains(&self.replace_range.end) || self.replace_range.end == child_span.end - && (mode != TokenMode::Markup || i + 1 == child_count) + && (mode != TokenMode::Markup || i + 1 == original_count) { - outermost &= i + 1 == child_count; + outermost &= i + 1 == original_count; last = Some((i, offset + child.len())); break; - } else if mode != TokenMode::Markup || !child.kind().post().safe_in_markup() { + } else if mode != TokenMode::Markup + || !child.kind().succession_rule().safe_in_markup() + { break; } @@ -147,17 +154,17 @@ impl Reparser<'_> { } let (last_idx, last_end) = last?; - let children_range = first_idx .. last_idx + 1; - let children_span = first_start .. last_end; + let superseded_range = first_idx .. last_idx + 1; + let superseded_span = first_start .. last_end; let last_kind = green.children()[last_idx].kind().clone(); // First, we try if the child itself has another, more specific // applicable child. - if children_range.len() == 1 { - let child = &mut green.children_mut()[children_range.start]; + if superseded_range.len() == 1 { + let child = &mut green.children_mut()[superseded_range.start]; let prev_len = child.len(); - if last_kind.post() != Postcondition::Unsafe { + if last_kind.succession_rule() != SuccessionRule::Unsafe { if let Some(range) = match child { Green::Node(node) => self.reparse_step( Rc::make_mut(node), @@ -168,56 +175,64 @@ impl Reparser<'_> { Green::Token(_) => None, } { let new_len = child.len(); - green.update_child_len(new_len, prev_len); + green.update_parent(new_len, prev_len); return Some(range); } } } // We only replace multiple children in markup mode. - if children_range.len() > 1 && mode == TokenMode::Code { + if superseded_range.len() > 1 && mode == TokenMode::Code { return None; } // We now have a child that we can replace and a function to do so. let func = last_kind.reparsing_func(child_mode, indent)?; - let post = last_kind.post(); + let succession = last_kind.succession_rule(); - let mut column = if mode == TokenMode::Markup { - // In this case, we want to pass the indentation to the function. - Scanner::new(self.src).column(children_span.start) - } else { - 0 - }; + let mut markup_min_column = 0; // If this is a markup node, we want to save its indent instead to pass // the right indent argument. - if children_range.len() == 1 { - let child = &mut green.children_mut()[children_range.start]; + if superseded_range.len() == 1 { + let child = &mut green.children_mut()[superseded_range.start]; if let NodeKind::Markup(n) = child.kind() { - column = *n; + markup_min_column = *n; } } // The span of the to-be-reparsed children in the new source. - let replace_span = children_span.start + let newborn_span = superseded_span.start .. - children_span.end + self.replace_len - self.replace_range.len(); + superseded_span.end + self.replace_len - self.replace_range.len(); // For atomic primaries we need to pass in the whole remaining string to // check whether the parser would eat more stuff illicitly. - let reparse_span = if post == Postcondition::AtomicPrimary { - replace_span.start .. self.src.len() + let reparse_span = if succession == SuccessionRule::AtomicPrimary { + newborn_span.start .. self.src.len() } else { - replace_span.clone() + newborn_span.clone() }; + let mut prefix = ""; + for (i, c) in self.src[.. reparse_span.start].char_indices().rev() { + if is_newline(c) { + break; + } + prefix = &self.src[i .. reparse_span.start]; + } + // Do the reparsing! - let (mut newborns, terminated) = func(&self.src[reparse_span], at_start, column)?; + let (mut newborns, terminated) = func( + &prefix, + &self.src[reparse_span.clone()], + at_start, + markup_min_column, + )?; // Make sure that atomic primaries ate only what they were supposed to. - if post == Postcondition::AtomicPrimary { - let len = replace_span.len(); + if succession == SuccessionRule::AtomicPrimary { + let len = newborn_span.len(); if newborns.len() > 1 && newborns[0].len() == len { newborns.truncate(1); } else if newborns.iter().map(Green::len).sum::() != len { @@ -234,16 +249,16 @@ impl Reparser<'_> { // If all post- and preconditions match, we are good to go! if validate( green.children(), - children_range.clone(), + superseded_range.clone(), at_start, &newborns, mode, - post, - replace_span.clone(), + succession, + newborn_span.clone(), self.src, ) { - green.replace_child_range(children_range, newborns); - Some(replace_span) + green.replace_children(superseded_range, newborns); + Some(newborn_span) } else { None } @@ -252,27 +267,27 @@ impl Reparser<'_> { /// Validate that a node replacement is allowed by post- and preconditions. fn validate( - prev_children: &[Green], - children_range: Range, + superseded: &[Green], + superseded_range: Range, mut at_start: bool, newborns: &[Green], mode: TokenMode, - post: Postcondition, - replace_span: Range, + post: SuccessionRule, + newborn_span: Range, src: &str, ) -> bool { // Atomic primaries must only generate one new child. - if post == Postcondition::AtomicPrimary && newborns.len() != 1 { + if post == SuccessionRule::AtomicPrimary && newborns.len() != 1 { return false; } // Same kind in mode `inside` must generate only one child and that child // must be of the same kind as previously. - if let Postcondition::SameKind(inside) = post { - let prev_kind = prev_children[children_range.start].kind(); - let prev_mode = prev_kind.mode().unwrap_or(mode); - if inside.map_or(true, |m| m == prev_mode) - && (newborns.len() != 1 || prev_kind != newborns[0].kind()) + if let SuccessionRule::SameKind(inside) = post { + let superseded_kind = superseded[superseded_range.start].kind(); + let superseded_mode = superseded_kind.mode().unwrap_or(mode); + if inside.map_or(true, |m| m == superseded_mode) + && (newborns.len() != 1 || superseded_kind != newborns[0].kind()) { return false; } @@ -286,15 +301,15 @@ fn validate( // Check if there are any `AtStart` predecessors which require a certain // indentation. let s = Scanner::new(src); - let mut prev_pos = replace_span.start; - for child in (&prev_children[.. children_range.start]).iter().rev() { + let mut prev_pos = newborn_span.start; + for child in (&superseded[.. superseded_range.start]).iter().rev() { prev_pos -= child.len(); if !child.kind().is_trivia() { - if child.kind().pre() == Precondition::AtStart { + if child.kind().neighbour_rule() == NeighbourRule::AtStart { let left_col = s.column(prev_pos); // Search for the first non-trivia newborn. - let mut new_pos = replace_span.start; + let mut new_pos = newborn_span.start; let mut child_col = None; for child in newborns { if !child.kind().is_trivia() { @@ -323,15 +338,15 @@ fn validate( // Ensure that a possible at-start or not-at-start precondition of // a node after the replacement range is satisfied. - for child in &prev_children[children_range.end ..] { - if !child.kind().is_trivia() { - let pre = child.kind().pre(); - if (pre == Precondition::AtStart && !at_start) - || (pre == Precondition::NotAtStart && at_start) - { - return false; - } + for child in &superseded[superseded_range.end ..] { + let neighbour_rule = child.kind().neighbour_rule(); + if (neighbour_rule == NeighbourRule::AtStart && !at_start) + || (neighbour_rule == NeighbourRule::NotAtStart && at_start) + { + return false; + } + if !child.kind().is_trivia() { break; } @@ -339,42 +354,40 @@ fn validate( } // Verify that the last of the newborns is not `NotAtEnd`. - if newborns - .last() - .map_or(false, |child| child.kind().pre() == Precondition::NotAtEnd) - { + if newborns.last().map_or(false, |child| { + child.kind().neighbour_rule() == NeighbourRule::NotAtEnd + }) { return false; } // We have to check whether the last non-trivia newborn is `AtStart` and // verify the indent of its right neighbors in order to make sure its // indentation requirements are fulfilled. - let mut child_pos = replace_span.end; - let mut child_col = None; + let mut child_pos = newborn_span.end; for child in newborns.iter().rev() { child_pos -= child.len(); - if !child.kind().is_trivia() { - if child.kind().pre() == Precondition::AtStart { - child_col = Some(s.column(child_pos)); - } - break; + if child.kind().is_trivia() { + continue; } - } - if let Some(child_col) = child_col { - let mut right_pos = replace_span.end; - for child in &prev_children[children_range.end ..] { - if !child.kind().is_trivia() { + if child.kind().neighbour_rule() == NeighbourRule::AtStart { + let child_col = s.column(child_pos); + + let mut right_pos = newborn_span.end; + for child in &superseded[superseded_range.end ..] { + if child.kind().is_trivia() { + right_pos += child.len(); + continue; + } + if s.column(right_pos) > child_col { return false; } - break; } - - right_pos += child.len(); } + break; } true @@ -387,13 +400,15 @@ impl NodeKind { &self, parent_mode: TokenMode, indent: usize, - ) -> Option Option<(Vec, bool)>> { + ) -> Option Option<(Vec, bool)>> { let mode = self.mode().unwrap_or(parent_mode); - match self.post() { - Postcondition::Unsafe | Postcondition::UnsafeLayer => None, - Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic), - Postcondition::AtomicPrimary => Some(parse_atomic_markup), - Postcondition::SameKind(x) if x == None || x == Some(mode) => match self { + match self.succession_rule() { + SuccessionRule::Unsafe | SuccessionRule::UnsafeLayer => None, + SuccessionRule::AtomicPrimary if mode == TokenMode::Code => { + Some(parse_atomic) + } + SuccessionRule::AtomicPrimary => Some(parse_atomic_markup), + SuccessionRule::SameKind(x) if x == None || x == Some(mode) => match self { NodeKind::Markup(_) => Some(parse_markup), NodeKind::Template => Some(parse_template), NodeKind::Block => Some(parse_block), @@ -409,7 +424,7 @@ impl NodeKind { /// Whether it is safe to do incremental parsing on this node. Never allow /// non-termination errors if this is not already the last leaf node. - pub fn post(&self) -> Postcondition { + pub fn succession_rule(&self) -> SuccessionRule { match self { // Replacing parenthesis changes if the expression is balanced and // is therefore not safe. @@ -418,7 +433,7 @@ impl NodeKind { | Self::LeftBrace | Self::RightBrace | Self::LeftParen - | Self::RightParen => Postcondition::Unsafe, + | Self::RightParen => SuccessionRule::Unsafe, // Replacing an operator can change whether the parent is an // operation which makes it unsafe. The star can appear in markup. @@ -445,7 +460,7 @@ impl NodeKind { | Self::Or | Self::With | Self::Dots - | Self::Arrow => Postcondition::Unsafe, + | Self::Arrow => SuccessionRule::Unsafe, // These keywords change what kind of expression the parent is and // how far the expression would go. @@ -461,14 +476,14 @@ impl NodeKind { | Self::Return | Self::Import | Self::Include - | Self::From => Postcondition::Unsafe, + | Self::From => SuccessionRule::Unsafe, // Changing the heading level, enum numbering, or list bullet // changes the next layer. - Self::EnumNumbering(_) => Postcondition::Unsafe, + Self::EnumNumbering(_) => SuccessionRule::Unsafe, // This can be anything, so we don't make any promises. - Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, + Self::Error(_, _) | Self::Unknown(_) => SuccessionRule::Unsafe, // These are complex expressions which may screw with their // environments. @@ -477,33 +492,33 @@ impl NodeKind { | Self::Binary | Self::CallArgs | Self::Named - | Self::Spread => Postcondition::UnsafeLayer, + | Self::Spread => SuccessionRule::UnsafeLayer, // The closure is a bit magic with the let expression, and also it // is not atomic. - Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer, + Self::Closure | Self::ClosureParams => SuccessionRule::UnsafeLayer, // Missing these creates errors for the parents. Self::WithExpr | Self::ForPattern | Self::ImportItems => { - Postcondition::UnsafeLayer + SuccessionRule::UnsafeLayer } // Only markup is expected at the points where it does occur. The // indentation must be preserved as well, also for the children. - Self::Markup(_) => Postcondition::SameKind(None), + Self::Markup(_) => SuccessionRule::SameKind(None), // These can appear everywhere and must not change to other stuff // because that could change the outer expression. - Self::LineComment | Self::BlockComment => Postcondition::SameKind(None), + Self::LineComment | Self::BlockComment => SuccessionRule::SameKind(None), // These can appear as bodies and would trigger an error if they // became something else. - Self::Template => Postcondition::SameKind(None), - Self::Block => Postcondition::SameKind(Some(TokenMode::Code)), + Self::Template => SuccessionRule::SameKind(None), + Self::Block => SuccessionRule::SameKind(Some(TokenMode::Code)), // Whitespace in code mode has to remain whitespace or else the type // of things would change. - Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)), + Self::Space(_) => SuccessionRule::SameKind(Some(TokenMode::Code)), // These are expressions that can be replaced by other expressions. Self::Ident(_) @@ -519,7 +534,7 @@ impl NodeKind { | Self::Dict | Self::Group | Self::None - | Self::Auto => Postcondition::AtomicPrimary, + | Self::Auto => SuccessionRule::AtomicPrimary, // More complex, but still an expression. Self::ForExpr @@ -528,11 +543,11 @@ impl NodeKind { | Self::LetExpr | Self::SetExpr | Self::ImportExpr - | Self::IncludeExpr => Postcondition::AtomicPrimary, + | Self::IncludeExpr => SuccessionRule::AtomicPrimary, // This element always has to remain in the same column so better // reparse the whole parent. - Self::Raw(_) => Postcondition::Unsafe, + Self::Raw(_) => SuccessionRule::Unsafe, // These are all replaceable by other tokens. Self::Parbreak @@ -548,22 +563,22 @@ impl NodeKind { | Self::Heading | Self::Enum | Self::List - | Self::Math(_) => Postcondition::Safe, + | Self::Math(_) => SuccessionRule::Safe, } } /// The appropriate precondition for the type. - pub fn pre(&self) -> Precondition { + pub fn neighbour_rule(&self) -> NeighbourRule { match self { - Self::Heading | Self::Enum | Self::List => Precondition::AtStart, - Self::TextInLine(_) => Precondition::NotAtStart, - Self::Error(_, _) => Precondition::NotAtEnd, - _ => Precondition::None, + Self::Heading | Self::Enum | Self::List => NeighbourRule::AtStart, + Self::TextInLine(_) => NeighbourRule::NotAtStart, + Self::Error(_, _) => NeighbourRule::NotAtEnd, + _ => NeighbourRule::None, } } } -impl Postcondition { +impl SuccessionRule { /// Whether a node with this condition can be reparsed in markup mode. pub fn safe_in_markup(&self) -> bool { match self { -- cgit v1.2.3