From 1e4cab393e55df8875c6303ebb7bde8f09f911c9 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Tue, 2 Nov 2021 12:06:22 +0100 Subject: Introduce incremental parsing --- src/syntax/mod.rs | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/syntax/span.rs | 11 +++++++ 2 files changed, 105 insertions(+) (limited to 'src/syntax') diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index d9ad42a8..b0911c63 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -127,6 +127,92 @@ impl GreenNode { pub fn children(&self) -> &[Green] { &self.children } + + /// The node's children, mutably. + pub fn children_mut(&mut self) -> &mut [Green] { + &mut self.children + } + + /// The node's metadata. + pub fn data(&self) -> &GreenData { + &self.data + } + + /// The node's type. + pub fn kind(&self) -> &NodeKind { + self.data().kind() + } + + /// The node's length. + pub fn len(&self) -> usize { + self.data().len() + } + + /// Find the parent of the deepest incremental-safe node and the index of + /// the found child. + pub fn incremental_parent( + &mut self, + span: Span, + ) -> Option<(usize, &mut GreenNode, usize)> { + self.incremental_parent_internal(span, 0) + } + + fn incremental_parent_internal( + &mut self, + span: Span, + mut offset: usize, + ) -> Option<(usize, &mut GreenNode, usize)> { + let x = unsafe { &mut *(self as *mut _) }; + + for (i, child) in self.children.iter_mut().enumerate() { + match child { + Green::Token(n) => { + if offset < span.start { + // the token is strictly before the span + offset += n.len(); + } else { + // the token is within or after the span; tokens are + // never safe, so we return. + return None; + } + } + Green::Node(n) => { + let end = n.len() + offset; + if offset < span.start && end < span.start { + // the node is strictly before the span + offset += n.len(); + } else if span.start >= offset + && span.start < end + && span.end <= end + && span.end > offset + { + // the node is within the span. + if n.kind().is_incremental_safe() { + let res = + Rc::make_mut(n).incremental_parent_internal(span, offset); + if res.is_none() { + return Some((i, x, offset)); + } + } else { + return Rc::make_mut(n) + .incremental_parent_internal(span, offset); + } + } else { + // the node is overlapping or after after the span; nodes are + // never safe, so we return. + return None; + } + } + } + } + + return None; + } + + /// Replace one of the node's children. + pub fn replace_child(&mut self, index: usize, child: impl Into) { + self.children[index] = child.into(); + } } impl From for Green { @@ -653,6 +739,14 @@ impl NodeKind { matches!(self, NodeKind::Error(_, _) | NodeKind::Unknown(_)) } + /// Whether it is safe to do incremental parsing on this node. + pub fn is_incremental_safe(&self) -> bool { + match self { + Self::Block | Self::Markup => true, + _ => false, + } + } + /// A human-readable name for the kind. pub fn as_str(&self) -> &'static str { match self { diff --git a/src/syntax/span.rs b/src/syntax/span.rs index 4d5b8819..a707d3d9 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -125,6 +125,17 @@ impl Span { *self = self.join(other) } + /// Create a new span with n characters inserted inside of this span. + pub fn inserted(mut self, other: Self, n: usize) -> Self { + if !self.contains(other.start) || !self.contains(other.end) { + panic!(); + } + + let len_change = (n as isize - other.len() as isize) as usize; + self.end += len_change; + self + } + /// Test whether a position is within the span. pub fn contains(&self, pos: usize) -> bool { self.start <= pos && self.end >= pos -- cgit v1.2.3 From 7016ab0d123ba06d0bbc6ed5001fa02fbd261bfa Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Wed, 3 Nov 2021 11:03:00 +0100 Subject: Make stuff more elegant --- src/syntax/mod.rs | 71 +++++++++++++++++++++++++----------------------------- src/syntax/span.rs | 6 ++--- 2 files changed, 36 insertions(+), 41 deletions(-) (limited to 'src/syntax') diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index b0911c63..5da690ab 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -98,7 +98,7 @@ pub struct GreenNode { /// This node's children, losslessly make up this node. children: Vec, /// Whether this node or any of its children are erroneous. - erroneous: bool, + pub erroneous: bool, } impl GreenNode { @@ -148,12 +148,9 @@ impl GreenNode { self.data().len() } - /// Find the parent of the deepest incremental-safe node and the index of - /// the found child. - pub fn incremental_parent( - &mut self, - span: Span, - ) -> Option<(usize, &mut GreenNode, usize)> { + /// Find the deepest incremental-safe node and its offset in the source + /// code. + pub fn incremental_parent(&mut self, span: Span) -> Option<(&mut GreenNode, usize)> { self.incremental_parent_internal(span, 0) } @@ -161,10 +158,8 @@ impl GreenNode { &mut self, span: Span, mut offset: usize, - ) -> Option<(usize, &mut GreenNode, usize)> { - let x = unsafe { &mut *(self as *mut _) }; - - for (i, child) in self.children.iter_mut().enumerate() { + ) -> Option<(&mut GreenNode, usize)> { + for child in self.children.iter_mut() { match child { Green::Token(n) => { if offset < span.start { @@ -187,15 +182,15 @@ impl GreenNode { && span.end > offset { // the node is within the span. - if n.kind().is_incremental_safe() { - let res = - Rc::make_mut(n).incremental_parent_internal(span, offset); + let safe = n.kind().is_incremental_safe(); + let mut_n = Rc::make_mut(n); + if safe { + let res = mut_n.incremental_parent_internal(span, offset); if res.is_none() { - return Some((i, x, offset)); + return Some((mut_n, offset)); } } else { - return Rc::make_mut(n) - .incremental_parent_internal(span, offset); + return mut_n.incremental_parent_internal(span, offset); } } else { // the node is overlapping or after after the span; nodes are @@ -208,11 +203,6 @@ impl GreenNode { return None; } - - /// Replace one of the node's children. - pub fn replace_child(&mut self, index: usize, child: impl Into) { - self.children[index] = child.into(); - } } impl From for Green { @@ -352,7 +342,7 @@ impl Debug for RedNode { } } -/// A borrowed wrapper for a green node with span information. +/// A borrowed wrapper for a [`GreenNode`] with span information. /// /// Borrowed variant of [`RedNode`]. Can be [cast](Self::cast) to an AST node. #[derive(Copy, Clone, PartialEq)] @@ -387,6 +377,26 @@ impl<'a> RedRef<'a> { Span::new(self.id, self.offset, self.offset + self.green.len()) } + /// Whether the node or its children contain an error. + pub fn erroneous(self) -> bool { + self.green.erroneous() + } + + /// The node's children. + pub fn children(self) -> Children<'a> { + let children = match &self.green { + Green::Node(node) => node.children(), + Green::Token(_) => &[], + }; + + Children { + id: self.id, + iter: children.iter(), + front: self.offset, + back: self.offset + self.len(), + } + } + /// The error messages for this node and its descendants. pub fn errors(self) -> Vec { if !self.green.erroneous() { @@ -419,21 +429,6 @@ impl<'a> RedRef<'a> { T::from_red(self) } - /// The node's children. - pub fn children(self) -> Children<'a> { - let children = match &self.green { - Green::Node(node) => node.children(), - Green::Token(_) => &[], - }; - - Children { - id: self.id, - iter: children.iter(), - front: self.offset, - back: self.offset + self.len(), - } - } - /// Get the first child that can cast to some AST type. pub fn cast_first_child(self) -> Option { self.children().find_map(RedRef::cast) diff --git a/src/syntax/span.rs b/src/syntax/span.rs index a707d3d9..430d5f1d 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -127,12 +127,12 @@ impl Span { /// Create a new span with n characters inserted inside of this span. pub fn inserted(mut self, other: Self, n: usize) -> Self { - if !self.contains(other.start) || !self.contains(other.end) { + if !self.surrounds(other) { panic!(); } - let len_change = (n as isize - other.len() as isize) as usize; - self.end += len_change; + let len_change = n as isize - other.len() as isize; + self.end += len_change as usize; self } -- cgit v1.2.3 From eba7fc34effbec3bcc6d5c40d831b1e15af77c4d Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sat, 6 Nov 2021 16:07:21 +0100 Subject: Incremental-safety based approach --- src/syntax/ast.rs | 4 +- src/syntax/highlight.rs | 1 + src/syntax/mod.rs | 574 ++++++++++++++++++++++++++++++++++++++++++++---- src/syntax/span.rs | 3 +- 4 files changed, 532 insertions(+), 50 deletions(-) (limited to 'src/syntax') diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index ae8ecdc9..ed74dfe5 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -65,7 +65,9 @@ impl Markup { NodeKind::Parbreak => Some(MarkupNode::Parbreak), NodeKind::Strong => Some(MarkupNode::Strong), NodeKind::Emph => Some(MarkupNode::Emph), - NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), + NodeKind::Text(s) | NodeKind::TextInLine(s) => { + Some(MarkupNode::Text(s.clone())) + } NodeKind::Escape(c) => Some(MarkupNode::Text((*c).into())), NodeKind::EnDash => Some(MarkupNode::Text('\u{2013}'.into())), NodeKind::EmDash => Some(MarkupNode::Text('\u{2014}'.into())), diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs index 85fbef12..21af060f 100644 --- a/src/syntax/highlight.rs +++ b/src/syntax/highlight.rs @@ -158,6 +158,7 @@ impl Category { NodeKind::Space(_) => None, NodeKind::Parbreak => None, NodeKind::Text(_) => None, + NodeKind::TextInLine(_) => None, NodeKind::List => None, NodeKind::Enum => None, NodeKind::Array => None, diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 5da690ab..0879ab7f 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -15,6 +15,9 @@ pub use span::*; use self::ast::{MathNode, RawNode, TypedNode}; use crate::diag::Error; use crate::geom::{AngularUnit, LengthUnit}; +use crate::parse::{ + parse_atomic, parse_code, parse_markup, parse_markup_elements, TokenMode, +}; use crate::source::SourceId; use crate::util::EcoString; @@ -73,6 +76,43 @@ impl Green { Self::Token(data) => data.kind = kind, } } + + /// Find the innermost child that is incremental safe. + pub fn incremental_int( + &mut self, + edit: &str, + replace: Span, + replacement_len: usize, + offset: usize, + parent_mode: TokenMode, + outermost: bool, + ) -> bool { + match self { + Green::Node(n) => Rc::make_mut(n).incremental_int( + edit, + replace, + replacement_len, + offset, + parent_mode, + outermost, + ), + Green::Token(_) => false, + } + } + + /// The error messages for this node and its descendants. + pub fn errors(&self) -> Vec { + match self { + Green::Node(n) => n.errors(), + Green::Token(t) => { + if t.kind().is_error() { + vec![t.kind().clone()] + } else { + vec![] + } + } + } + } } impl Default for Green { @@ -148,60 +188,181 @@ impl GreenNode { self.data().len() } - /// Find the deepest incremental-safe node and its offset in the source - /// code. - pub fn incremental_parent(&mut self, span: Span) -> Option<(&mut GreenNode, usize)> { - self.incremental_parent_internal(span, 0) + /// The error messages for this node and its descendants. + pub fn errors(&self) -> Vec { + let mut res = self.children.iter().flat_map(|c| c.errors()).collect::>(); + + if self.kind().is_error() { + res.push(self.kind().clone()); + } + + res + } + + /// Find the innermost child that is incremental safe. + pub fn incremental( + &mut self, + edit: &str, + replace: Span, + replacement_len: usize, + ) -> bool { + self.incremental_int(edit, replace, replacement_len, 0, TokenMode::Markup, true) } - fn incremental_parent_internal( + fn incremental_int( &mut self, - span: Span, + src: &str, + replace: Span, + replacement_len: usize, mut offset: usize, - ) -> Option<(&mut GreenNode, usize)> { - for child in self.children.iter_mut() { - match child { - Green::Token(n) => { - if offset < span.start { - // the token is strictly before the span - offset += n.len(); - } else { - // the token is within or after the span; tokens are - // never safe, so we return. - return None; - } + parent_mode: TokenMode, + outermost: bool, + ) -> bool { + let kind = self.kind().clone(); + let mode = kind.mode().apply(parent_mode); + eprintln!("in {:?} (mode {:?})", kind, mode); + + let mut loop_result = None; + let mut child_at_start = true; + let last = self.children.len() - 1; + for (i, child) in self.children.iter_mut().enumerate() { + let child_span = Span::new(replace.source, offset, offset + child.len()); + if child_span.surrounds(replace) { + eprintln!("found correct child"); + + // First, we try if the child has another, more specific applicable child. + if kind.incremental_safety() != IncrementalSafety::Unsafe + && child.incremental_int( + src, + replace, + replacement_len, + offset, + mode, + i == last && outermost, + ) + { + eprintln!("child was successful"); + return true; } - Green::Node(n) => { - let end = n.len() + offset; - if offset < span.start && end < span.start { - // the node is strictly before the span - offset += n.len(); - } else if span.start >= offset - && span.start < end - && span.end <= end - && span.end > offset - { - // the node is within the span. - let safe = n.kind().is_incremental_safe(); - let mut_n = Rc::make_mut(n); - if safe { - let res = mut_n.incremental_parent_internal(span, offset); - if res.is_none() { - return Some((mut_n, offset)); - } - } else { - return mut_n.incremental_parent_internal(span, offset); - } + + // This didn't work, so we try to replace the child at this + // level. + let (function, policy) = + if let Some(p) = child.kind().reparsing_function(mode) { + p } else { - // the node is overlapping or after after the span; nodes are - // never safe, so we return. - return None; - } + return false; + }; + loop_result = Some((i, child_span, function, policy)); + break; + } + + offset += child.len(); + child_at_start = child.kind().is_at_start(child_at_start); + } + + + // We now have a child that we can replace and a function to do so if + // the loop found any results at all. + let (child_idx, child_span, func, policy) = if let Some(loop_result) = loop_result + { + loop_result + } else { + // No child fully contains the replacement. + eprintln!("no child match"); + return false; + }; + + eprintln!("aquired function, policy {:?}", policy); + + let src_span = child_span.inserted(replace, replacement_len); + + let new_children = + if let Some(new_children) = func(&src[src_span.to_range()], child_at_start) { + new_children + } else { + eprintln!("function failed"); + return false; + }; + let child_mode = self.children[child_idx].kind().mode().apply(mode); + eprintln!("child mode {:?}", child_mode); + + // Check if the children / child has the right type. + let require_single = match policy { + IncrementalSafety::AtomicPrimary | IncrementalSafety::SameKind => true, + IncrementalSafety::SameKindInCode if child_mode == TokenMode::Code => true, + _ => false, + }; + + if require_single { + eprintln!("must be a single replacement"); + if new_children.len() != 1 { + eprintln!("not a single replacement"); + return false; + } + + if match policy { + IncrementalSafety::SameKind => true, + IncrementalSafety::SameKindInCode if child_mode == TokenMode::Code => { + true + } + _ => false, + } { + if self.children[child_idx].kind() != new_children[0].kind() { + eprintln!("not the same kind"); + return false; } } } - return None; + // Do not accept unclosed nodes if the old node did not use to be at the + // right edge of the tree. + if !outermost + && new_children + .iter() + .flat_map(|x| x.errors()) + .any(|x| matches!(x, NodeKind::Error(ErrorPos::End, _))) + { + eprintln!("unclosed node"); + return false; + } + + // Check if the neighbor invariants are still true. + if mode == TokenMode::Markup { + if child_idx > 0 { + if self.children[child_idx - 1].kind().incremental_safety() + == IncrementalSafety::EnsureRightWhitespace + && !new_children[0].kind().is_whitespace() + { + eprintln!("left whitespace missing"); + return false; + } + } + + let mut new_at_start = child_at_start; + for child in &new_children { + new_at_start = child.kind().is_at_start(new_at_start); + } + + for child in &self.children[child_idx + 1 ..] { + if child.kind().is_trivia() { + new_at_start = child.kind().is_at_start(new_at_start); + continue; + } + + match child.kind().incremental_safety() { + IncrementalSafety::EnsureAtStart if !new_at_start => return false, + IncrementalSafety::EnsureNotAtStart if new_at_start => return false, + _ => {} + } + break; + } + } + + eprintln!("... replacing"); + + self.children.splice(child_idx .. child_idx + 1, new_children); + true } } @@ -397,6 +558,7 @@ impl<'a> RedRef<'a> { } } + /// The error messages for this node and its descendants. pub fn errors(self) -> Vec { if !self.green.erroneous() { @@ -593,6 +755,8 @@ pub enum NodeKind { Parbreak, /// A consecutive non-markup string. Text(EcoString), + /// A text node that cannot appear at the beginning of a source line. + TextInLine(EcoString), /// A non-breaking space: `~`. NonBreakingSpace, /// An en-dash: `--`. @@ -729,19 +893,249 @@ impl NodeKind { matches!(self, Self::LeftParen | Self::RightParen) } + /// Whether this is whitespace. + pub fn is_whitespace(&self) -> bool { + match self { + Self::Space(_) | Self::Parbreak => true, + _ => false, + } + } + + /// Whether this is trivia. + pub fn is_trivia(&self) -> bool { + match self { + _ if self.is_whitespace() => true, + Self::LineComment | Self::BlockComment => true, + _ => false, + } + } + /// Whether this is some kind of error. pub fn is_error(&self) -> bool { matches!(self, NodeKind::Error(_, _) | NodeKind::Unknown(_)) } - /// Whether it is safe to do incremental parsing on this node. - pub fn is_incremental_safe(&self) -> bool { + /// Whether this node is `at_start` given the previous value of the property. + pub fn is_at_start(&self, prev: bool) -> bool { match self { - Self::Block | Self::Markup => true, + Self::Space(n) if *n > 0 => true, + Self::Parbreak => true, + Self::LineComment | Self::BlockComment => prev, _ => false, } } + /// Whether this token appears in Markup. + pub fn mode(&self) -> NodeMode { + match self { + Self::Markup + | Self::Space(_) + | Self::Parbreak + | Self::Text(_) + | Self::TextInLine(_) + | Self::NonBreakingSpace + | Self::EnDash + | Self::EmDash + | Self::Escape(_) + | Self::Strong + | Self::Emph + | Self::Math(_) => NodeMode::Markup, + Self::Template + | Self::Block + | Self::None + | Self::Auto + | Self::Ident(_) + | Self::Bool(_) + | Self::Int(_) + | Self::Float(_) + | Self::Length(_, _) + | Self::Angle(_, _) + | Self::Percentage(_) + | Self::Str(_) + | Self::Fraction(_) + | Self::Array + | Self::Dict + | Self::Group + | Self::Call + | Self::LineComment + | Self::BlockComment + | Self::Error(_, _) + | Self::Minus + | Self::Eq => NodeMode::Universal, + _ => NodeMode::Code, + } + } + + pub fn reparsing_function( + &self, + parent_mode: TokenMode, + ) -> Option<(fn(&str, bool) -> Option>, IncrementalSafety)> { + let policy = self.incremental_safety(); + if policy == IncrementalSafety::Unsafe { + return None; + } + + let mode = self.mode(); + if mode == NodeMode::Code && policy == IncrementalSafety::UnsafeLayer { + return None; + } + + if mode != NodeMode::Markup + && parent_mode == TokenMode::Code + && policy == IncrementalSafety::AtomicPrimary + { + return Some((parse_atomic, policy)); + } + + let parser: fn(&str, bool) -> _ = match mode { + NodeMode::Code => parse_code, + NodeMode::Markup if self == &Self::Markup => parse_markup, + NodeMode::Markup => parse_markup_elements, + NodeMode::Universal if parent_mode == TokenMode::Code => parse_code, + NodeMode::Universal => parse_markup_elements, + }; + + Some((parser, policy)) + } + + /// Whether it is safe to do incremental parsing on this node. Never allow + /// non-termination errors if this is not already the last leaf node. + pub fn incremental_safety(&self) -> IncrementalSafety { + match self { + // Replacing parenthesis changes if the expression is balanced and + // is therefore not safe. + Self::LeftBracket + | Self::RightBracket + | Self::LeftBrace + | Self::RightBrace + | Self::LeftParen + | Self::RightParen => IncrementalSafety::Unsafe, + + // Replacing an operator can change whether the parent is an + // operation which makes it unsafe. The star can appear in markup. + Self::Star + | Self::Comma + | Self::Semicolon + | Self::Colon + | Self::Plus + | Self::Minus + | Self::Slash + | Self::Eq + | Self::EqEq + | Self::ExclEq + | Self::Lt + | Self::LtEq + | Self::Gt + | Self::GtEq + | Self::PlusEq + | Self::HyphEq + | Self::StarEq + | Self::SlashEq + | Self::Not + | Self::And + | Self::Or + | Self::With + | Self::Dots + | Self::Arrow => IncrementalSafety::Unsafe, + + // These keywords are literals and can be safely be substituted with + // other expressions. + Self::None | Self::Auto => IncrementalSafety::AtomicPrimary, + + // These keywords change what kind of expression the parent is. + Self::Let + | Self::If + | Self::Else + | Self::For + | Self::In + | Self::While + | Self::Break + | Self::Continue + | Self::Return + | Self::Set + | Self::Import + | Self::Include + | Self::From => IncrementalSafety::Unsafe, + + // This is a backslash followed by a space. But changing it to + // anything else is fair game. + Self::Linebreak => IncrementalSafety::EnsureRightWhitespace, + + Self::Markup => IncrementalSafety::SameKind, + + Self::Space(_) => IncrementalSafety::SameKindInCode, + + // These are all replaceable by other tokens. + Self::Parbreak + | Self::Text(_) + | Self::NonBreakingSpace + | Self::EnDash + | Self::EmDash + | Self::Escape(_) + | Self::Strong + | Self::Emph => IncrementalSafety::Safe, + + // This is text that needs to be not `at_start`, otherwise it would + // start one of the below items. + Self::TextInLine(_) => IncrementalSafety::EnsureNotAtStart, + + // These have to be `at_start` so they must be preceeded with a + // Space(n) with n > 0 or a Parbreak. + Self::Heading | Self::Enum | Self::List => IncrementalSafety::EnsureAtStart, + + // Changing the heading level, enum numbering, or list bullet + // changes the next layer. + Self::EnumNumbering(_) => IncrementalSafety::Unsafe, + + Self::Raw(_) | Self::Math(_) => IncrementalSafety::Safe, + + // These are expressions that can be replaced by other expressions. + Self::Ident(_) + | Self::Bool(_) + | Self::Int(_) + | Self::Float(_) + | Self::Length(_, _) + | Self::Angle(_, _) + | Self::Percentage(_) + | Self::Str(_) + | Self::Fraction(_) + | Self::Array + | Self::Dict + | Self::Group => IncrementalSafety::AtomicPrimary, + + Self::Call | Self::Unary | Self::Binary | Self::SetExpr => { + IncrementalSafety::UnsafeLayer + } + + Self::CallArgs | Self::Named | Self::Spread => IncrementalSafety::UnsafeLayer, + + // The closure is a bit magic with the let expression, and also it + // is not atomic. + Self::Closure | Self::ClosureParams => IncrementalSafety::UnsafeLayer, + + // These can appear as bodies and would trigger an error if they + // became something else. + Self::Template | Self::Block => IncrementalSafety::SameKindInCode, + + Self::ForExpr + | Self::WhileExpr + | Self::IfExpr + | Self::LetExpr + | Self::ImportExpr + | Self::IncludeExpr => IncrementalSafety::AtomicPrimary, + + Self::WithExpr | Self::ForPattern | Self::ImportItems => { + IncrementalSafety::UnsafeLayer + } + + // These can appear everywhere and must not change to other stuff + // because that could change the outer expression. + Self::LineComment | Self::BlockComment => IncrementalSafety::SameKind, + + Self::Error(_, _) | Self::Unknown(_) => IncrementalSafety::Unsafe, + } + } + /// A human-readable name for the kind. pub fn as_str(&self) -> &'static str { match self { @@ -794,7 +1188,7 @@ impl NodeKind { Self::Space(_) => "space", Self::Linebreak => "forced linebreak", Self::Parbreak => "paragraph break", - Self::Text(_) => "text", + Self::Text(_) | Self::TextInLine(_) => "text", Self::NonBreakingSpace => "non-breaking space", Self::EnDash => "en dash", Self::EmDash => "em dash", @@ -855,3 +1249,87 @@ impl Display for NodeKind { f.pad(self.as_str()) } } + +/// This enum describes what conditions a node has for being replaced by a new +/// parse result. +/// +/// Safe nodes are replaced by the new parse result from the respective mode. +/// They can be replaced by multiple tokens. If a token is inserted in Markup +/// mode and the next token would not be `at_start` there needs to be a forward +/// check for a `EnsureAtStart` node. If this fails, the parent has to be +/// reparsed. if the direct whitespace sibling of a `EnsureRightWhitespace` is +/// `Unsafe`. Similarly, if a `EnsureRightWhitespace` token is one of the last +/// tokens to be inserted, the edit is invalidated if there is no following +/// whitespace. The atomic nodes may only be replaced by other atomic nodes. The +/// unsafe layers cannot be used but allow children access, the unsafe nodes do +/// neither. +/// +/// *Procedure:* +/// 1. Check if the node is safe - if unsafe layer recurse, if unsafe, return +/// None. +/// 2. Reparse with appropriate node kind and `at_start`. +/// 3. Check whether the topmost group is terminated and the range was +/// completely consumed, otherwise return None. +/// 4. Check if the type criteria are met. +/// 5. If the node is not at the end of the tree, check if Strings etc. are +/// terminated. +/// 6. If this is markup, check the following things: +/// - The `at_start` conditions of the next non-comment and non-space(0) node +/// are met. +/// - The first node is whitespace or the previous siblings are not +/// `EnsureRightWhitespace`. +/// - If any of those fails, return None. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub enum IncrementalSafety { + /// Changing this node can never have an influence on the other nodes. + Safe, + /// This node has to be replaced with a single token of the same kind. + SameKind, + /// This node has to be replaced with a single token of the same kind if in + /// code mode. + SameKindInCode, + /// These nodes depend on being at the start of a line. Reparsing of safe + /// left neighbors has to check this invariant. Otherwise, this node is + /// safe. + EnsureAtStart, + /// These nodes depend on not being at the start of a line. Reparsing of + /// safe left neighbors has to check this invariant. Otherwise, this node is + /// safe. + EnsureNotAtStart, + /// These nodes must be followed by whitespace. + EnsureRightWhitespace, + /// Changing this node into a single atomic expression is allowed if it + /// appears in code mode, otherwise it is safe. + AtomicPrimary, + /// Changing an unsafe layer node changes what the parents or the + /// surrounding nodes would be and is therefore disallowed. Change the + /// parents or children instead. If it appears in Markup, however, it is + /// safe to change. + UnsafeLayer, + /// Changing an unsafe node or any of its children will trigger undefined + /// behavior. Change the parents instead. + Unsafe, +} + +/// This enum describes which mode a token of [`NodeKind`] can appear in. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum NodeMode { + /// The token can only appear in markup mode. + Markup, + /// The token can only appear in code mode. + Code, + /// The token can appear in either mode. Look at the parent node to decide + /// which mode it is in. + Universal, +} + +impl NodeMode { + /// Returns the new [`TokenMode`] given the old one. + pub fn apply(&self, old: TokenMode) -> TokenMode { + match self { + Self::Markup => TokenMode::Markup, + Self::Code => TokenMode::Code, + Self::Universal => old, + } + } +} diff --git a/src/syntax/span.rs b/src/syntax/span.rs index 430d5f1d..2691acc7 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -125,7 +125,8 @@ impl Span { *self = self.join(other) } - /// Create a new span with n characters inserted inside of this span. + /// Create a new span by specifying a span in which a modification happened + /// and how many characters are now in that span. pub fn inserted(mut self, other: Self, n: usize) -> Self { if !self.surrounds(other) { panic!(); -- cgit v1.2.3 From 0663758fbb42651a08bfcd46c27b5cdeab90fb75 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 7 Nov 2021 19:43:01 +0100 Subject: Tests - length updates - dealing with keywords and comments --- src/syntax/mod.rs | 200 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 133 insertions(+), 67 deletions(-) (limited to 'src/syntax') diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 0879ab7f..cb811266 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -49,6 +49,15 @@ impl Green { self.data().len() } + /// Set the length of the node. + pub fn set_len(&mut self, len: usize) { + let data = match self { + Self::Node(node) => &mut Rc::make_mut(node).data, + Self::Token(data) => data, + }; + data.set_len(len); + } + /// Whether the node or its children contain an error. pub fn erroneous(&self) -> bool { match self { @@ -78,15 +87,15 @@ impl Green { } /// Find the innermost child that is incremental safe. - pub fn incremental_int( + fn incremental_int( &mut self, edit: &str, replace: Span, replacement_len: usize, offset: usize, - parent_mode: TokenMode, + parent_mode: NodeMode, outermost: bool, - ) -> bool { + ) -> Result<(), bool> { match self { Green::Node(n) => Rc::make_mut(n).incremental_int( edit, @@ -96,7 +105,7 @@ impl Green { parent_mode, outermost, ), - Green::Token(_) => false, + Green::Token(_) => Err(false), } } @@ -202,11 +211,17 @@ impl GreenNode { /// Find the innermost child that is incremental safe. pub fn incremental( &mut self, - edit: &str, + src: &str, replace: Span, replacement_len: usize, ) -> bool { - self.incremental_int(edit, replace, replacement_len, 0, TokenMode::Markup, true) + let edit = &src[replace.inserted(replace, replacement_len).to_range()]; + if edit.contains("//") || edit.contains("/*") || edit.contains("*/") { + return false; + } + + self.incremental_int(src, replace, replacement_len, 0, NodeMode::Markup, true) + .is_ok() } fn incremental_int( @@ -215,9 +230,9 @@ impl GreenNode { replace: Span, replacement_len: usize, mut offset: usize, - parent_mode: TokenMode, + parent_mode: NodeMode, outermost: bool, - ) -> bool { + ) -> Result<(), bool> { let kind = self.kind().clone(); let mode = kind.mode().apply(parent_mode); eprintln!("in {:?} (mode {:?})", kind, mode); @@ -230,30 +245,41 @@ impl GreenNode { if child_span.surrounds(replace) { eprintln!("found correct child"); + let old_len = child.len(); // First, we try if the child has another, more specific applicable child. - if kind.incremental_safety() != IncrementalSafety::Unsafe - && child.incremental_int( + if !kind.incremental_safety().unsafe_interior() { + match child.incremental_int( src, replace, replacement_len, offset, - mode, + kind.mode().child_mode(), i == last && outermost, - ) - { - eprintln!("child was successful"); - return true; + ) { + Ok(_) => { + eprintln!("child success"); + let new_len = child.len(); + self.data.set_len(self.data.len() + new_len - old_len); + return Ok(()); + } + Err(b) if b => return Err(false), + _ => {} + } } // This didn't work, so we try to replace the child at this // level. - let (function, policy) = - if let Some(p) = child.kind().reparsing_function(mode) { - p - } else { - return false; - }; - loop_result = Some((i, child_span, function, policy)); + let (function, policy) = match child + .kind() + .reparsing_function(mode.child_mode().as_token_mode()) + { + Ok(p) => p, + Err(policy) => { + return Err(policy == IncrementalSafety::VeryUnsafe); + } + }; + loop_result = + Some((i, child_span, i == last && outermost, function, policy)); break; } @@ -264,14 +290,14 @@ impl GreenNode { // We now have a child that we can replace and a function to do so if // the loop found any results at all. - let (child_idx, child_span, func, policy) = if let Some(loop_result) = loop_result - { - loop_result - } else { - // No child fully contains the replacement. - eprintln!("no child match"); - return false; - }; + let (child_idx, child_span, child_outermost, func, policy) = + if let Some(loop_result) = loop_result { + loop_result + } else { + // No child fully contains the replacement. + eprintln!("no child match"); + return Err(false); + }; eprintln!("aquired function, policy {:?}", policy); @@ -282,9 +308,10 @@ impl GreenNode { new_children } else { eprintln!("function failed"); - return false; + return Err(false); }; - let child_mode = self.children[child_idx].kind().mode().apply(mode); + let child_mode = + self.children[child_idx].kind().mode().child_mode().as_token_mode(); eprintln!("child mode {:?}", child_mode); // Check if the children / child has the right type. @@ -298,7 +325,7 @@ impl GreenNode { eprintln!("must be a single replacement"); if new_children.len() != 1 { eprintln!("not a single replacement"); - return false; + return Err(false); } if match policy { @@ -310,32 +337,32 @@ impl GreenNode { } { if self.children[child_idx].kind() != new_children[0].kind() { eprintln!("not the same kind"); - return false; + return Err(false); } } } // Do not accept unclosed nodes if the old node did not use to be at the // right edge of the tree. - if !outermost + if !child_outermost && new_children .iter() .flat_map(|x| x.errors()) .any(|x| matches!(x, NodeKind::Error(ErrorPos::End, _))) { eprintln!("unclosed node"); - return false; + return Err(false); } // Check if the neighbor invariants are still true. - if mode == TokenMode::Markup { + if mode.as_token_mode() == TokenMode::Markup { if child_idx > 0 { if self.children[child_idx - 1].kind().incremental_safety() == IncrementalSafety::EnsureRightWhitespace && !new_children[0].kind().is_whitespace() { eprintln!("left whitespace missing"); - return false; + return Err(false); } } @@ -351,8 +378,12 @@ impl GreenNode { } match child.kind().incremental_safety() { - IncrementalSafety::EnsureAtStart if !new_at_start => return false, - IncrementalSafety::EnsureNotAtStart if new_at_start => return false, + IncrementalSafety::EnsureAtStart if !new_at_start => { + return Err(false); + } + IncrementalSafety::EnsureNotAtStart if new_at_start => { + return Err(false); + } _ => {} } break; @@ -361,8 +392,12 @@ impl GreenNode { eprintln!("... replacing"); + let old_len = self.children[child_idx].len(); + let new_len: usize = new_children.iter().map(Green::len).sum(); + self.children.splice(child_idx .. child_idx + 1, new_children); - true + self.data.set_len(self.data.len + new_len - old_len); + Ok(()) } } @@ -414,6 +449,11 @@ impl GreenData { pub fn len(&self) -> usize { self.len } + + /// Set the length of the node. + pub fn set_len(&mut self, len: usize) { + self.len = len; + } } impl From for Green { @@ -939,24 +979,18 @@ impl NodeKind { | Self::Escape(_) | Self::Strong | Self::Emph + | Self::Raw(_) | Self::Math(_) => NodeMode::Markup, Self::Template | Self::Block - | Self::None - | Self::Auto | Self::Ident(_) - | Self::Bool(_) - | Self::Int(_) - | Self::Float(_) - | Self::Length(_, _) - | Self::Angle(_, _) - | Self::Percentage(_) - | Self::Str(_) - | Self::Fraction(_) - | Self::Array - | Self::Dict - | Self::Group + | Self::LetExpr + | Self::IfExpr + | Self::WhileExpr + | Self::ForExpr + | Self::ImportExpr | Self::Call + | Self::IncludeExpr | Self::LineComment | Self::BlockComment | Self::Error(_, _) @@ -969,22 +1003,25 @@ impl NodeKind { pub fn reparsing_function( &self, parent_mode: TokenMode, - ) -> Option<(fn(&str, bool) -> Option>, IncrementalSafety)> { + ) -> Result< + (fn(&str, bool) -> Option>, IncrementalSafety), + IncrementalSafety, + > { let policy = self.incremental_safety(); - if policy == IncrementalSafety::Unsafe { - return None; + if policy.unsafe_interior() { + return Err(policy); } let mode = self.mode(); if mode == NodeMode::Code && policy == IncrementalSafety::UnsafeLayer { - return None; + return Err(policy); } if mode != NodeMode::Markup && parent_mode == TokenMode::Code && policy == IncrementalSafety::AtomicPrimary { - return Some((parse_atomic, policy)); + return Ok((parse_atomic, policy)); } let parser: fn(&str, bool) -> _ = match mode { @@ -995,7 +1032,7 @@ impl NodeKind { NodeMode::Universal => parse_markup_elements, }; - Some((parser, policy)) + Ok((parser, policy)) } /// Whether it is safe to do incremental parsing on this node. Never allow @@ -1042,7 +1079,8 @@ impl NodeKind { // other expressions. Self::None | Self::Auto => IncrementalSafety::AtomicPrimary, - // These keywords change what kind of expression the parent is. + // These keywords change what kind of expression the parent is and + // how far the expression would go. Self::Let | Self::If | Self::Else @@ -1055,7 +1093,7 @@ impl NodeKind { | Self::Set | Self::Import | Self::Include - | Self::From => IncrementalSafety::Unsafe, + | Self::From => IncrementalSafety::VeryUnsafe, // This is a backslash followed by a space. But changing it to // anything else is fair game. @@ -1309,6 +1347,17 @@ pub enum IncrementalSafety { /// Changing an unsafe node or any of its children will trigger undefined /// behavior. Change the parents instead. Unsafe, + /// Its unsafe for two! + VeryUnsafe, +} + +impl IncrementalSafety { + pub fn unsafe_interior(&self) -> bool { + match self { + Self::Unsafe | Self::VeryUnsafe => true, + _ => false, + } + } } /// This enum describes which mode a token of [`NodeKind`] can appear in. @@ -1319,17 +1368,34 @@ pub enum NodeMode { /// The token can only appear in code mode. Code, /// The token can appear in either mode. Look at the parent node to decide - /// which mode it is in. + /// which mode it is in. After an apply, this is equivalent to Markup. Universal, } impl NodeMode { - /// Returns the new [`TokenMode`] given the old one. - pub fn apply(&self, old: TokenMode) -> TokenMode { + /// Returns a new mode considering the parent node. + pub fn apply(&self, old: Self) -> Self { match self { - Self::Markup => TokenMode::Markup, + Self::Markup => Self::Markup, + Self::Code => Self::Code, + Self::Universal if old != Self::Markup => Self::Code, + Self::Universal => Self::Universal, + } + } + + /// Return the corresponding token mode. + pub fn as_token_mode(&self) -> TokenMode { + match self { + Self::Markup | Self::Universal => TokenMode::Markup, Self::Code => TokenMode::Code, - Self::Universal => old, + } + } + + /// The mode of the children of this node. + pub fn child_mode(&self) -> Self { + match self { + Self::Markup => Self::Markup, + Self::Code | Self::Universal => Self::Code, } } } -- cgit v1.2.3 From 9141cba6a9db6ae3106e39d92508cb91c390049b Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 8 Nov 2021 12:01:35 +0100 Subject: Deal with the effects of keywords --- src/syntax/mod.rs | 111 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 70 insertions(+), 41 deletions(-) (limited to 'src/syntax') diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index cb811266..c1d7b8d3 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -16,7 +16,8 @@ use self::ast::{MathNode, RawNode, TypedNode}; use crate::diag::Error; use crate::geom::{AngularUnit, LengthUnit}; use crate::parse::{ - parse_atomic, parse_code, parse_markup, parse_markup_elements, TokenMode, + parse_atomic, parse_block, parse_comment, parse_markup, parse_markup_elements, + parse_template, TokenMode, }; use crate::source::SourceId; use crate::util::EcoString; @@ -95,7 +96,7 @@ impl Green { offset: usize, parent_mode: NodeMode, outermost: bool, - ) -> Result<(), bool> { + ) -> bool { match self { Green::Node(n) => Rc::make_mut(n).incremental_int( edit, @@ -105,7 +106,7 @@ impl Green { parent_mode, outermost, ), - Green::Token(_) => Err(false), + Green::Token(_) => false, } } @@ -221,7 +222,6 @@ impl GreenNode { } self.incremental_int(src, replace, replacement_len, 0, NodeMode::Markup, true) - .is_ok() } fn incremental_int( @@ -232,7 +232,7 @@ impl GreenNode { mut offset: usize, parent_mode: NodeMode, outermost: bool, - ) -> Result<(), bool> { + ) -> bool { let kind = self.kind().clone(); let mode = kind.mode().apply(parent_mode); eprintln!("in {:?} (mode {:?})", kind, mode); @@ -248,7 +248,7 @@ impl GreenNode { let old_len = child.len(); // First, we try if the child has another, more specific applicable child. if !kind.incremental_safety().unsafe_interior() { - match child.incremental_int( + if child.incremental_int( src, replace, replacement_len, @@ -256,14 +256,11 @@ impl GreenNode { kind.mode().child_mode(), i == last && outermost, ) { - Ok(_) => { - eprintln!("child success"); - let new_len = child.len(); - self.data.set_len(self.data.len() + new_len - old_len); - return Ok(()); - } - Err(b) if b => return Err(false), - _ => {} + eprintln!("child success"); + let new_len = child.len(); + self.data.set_len(self.data.len() + new_len - old_len); + self.erroneous = self.children.iter().any(|x| x.erroneous()); + return true; } } @@ -274,8 +271,8 @@ impl GreenNode { .reparsing_function(mode.child_mode().as_token_mode()) { Ok(p) => p, - Err(policy) => { - return Err(policy == IncrementalSafety::VeryUnsafe); + _ => { + return false; } }; loop_result = @@ -296,20 +293,33 @@ impl GreenNode { } else { // No child fully contains the replacement. eprintln!("no child match"); - return Err(false); + return false; }; eprintln!("aquired function, policy {:?}", policy); let src_span = child_span.inserted(replace, replacement_len); + let recompile_range = if policy == IncrementalSafety::AtomicPrimary { + src_span.start .. src.len() + } else { + src_span.to_range() + }; - let new_children = - if let Some(new_children) = func(&src[src_span.to_range()], child_at_start) { + let new_children = if let Some(new_children) = + func(&src[recompile_range], child_at_start) + { + if policy != IncrementalSafety::AtomicPrimary + || new_children.iter().map(Green::len).sum::() == src_span.len() + { new_children } else { - eprintln!("function failed"); - return Err(false); - }; + eprintln!("wrong atomic len"); + return false; + } + } else { + eprintln!("function failed"); + return false; + }; let child_mode = self.children[child_idx].kind().mode().child_mode().as_token_mode(); eprintln!("child mode {:?}", child_mode); @@ -325,7 +335,7 @@ impl GreenNode { eprintln!("must be a single replacement"); if new_children.len() != 1 { eprintln!("not a single replacement"); - return Err(false); + return false; } if match policy { @@ -337,7 +347,7 @@ impl GreenNode { } { if self.children[child_idx].kind() != new_children[0].kind() { eprintln!("not the same kind"); - return Err(false); + return false; } } } @@ -351,7 +361,7 @@ impl GreenNode { .any(|x| matches!(x, NodeKind::Error(ErrorPos::End, _))) { eprintln!("unclosed node"); - return Err(false); + return false; } // Check if the neighbor invariants are still true. @@ -362,7 +372,7 @@ impl GreenNode { && !new_children[0].kind().is_whitespace() { eprintln!("left whitespace missing"); - return Err(false); + return false; } } @@ -379,10 +389,10 @@ impl GreenNode { match child.kind().incremental_safety() { IncrementalSafety::EnsureAtStart if !new_at_start => { - return Err(false); + return false; } IncrementalSafety::EnsureNotAtStart if new_at_start => { - return Err(false); + return false; } _ => {} } @@ -396,8 +406,9 @@ impl GreenNode { let new_len: usize = new_children.iter().map(Green::len).sum(); self.children.splice(child_idx .. child_idx + 1, new_children); + self.erroneous = self.children.iter().any(|x| x.erroneous()); self.data.set_len(self.data.len + new_len - old_len); - Ok(()) + true } } @@ -1008,28 +1019,41 @@ impl NodeKind { IncrementalSafety, > { let policy = self.incremental_safety(); - if policy.unsafe_interior() { + if policy.is_unsafe() { return Err(policy); } let mode = self.mode(); + let is_code = mode == NodeMode::Universal && parent_mode == TokenMode::Code + || mode == NodeMode::Code; if mode == NodeMode::Code && policy == IncrementalSafety::UnsafeLayer { return Err(policy); } - if mode != NodeMode::Markup - && parent_mode == TokenMode::Code - && policy == IncrementalSafety::AtomicPrimary - { + if is_code && policy == IncrementalSafety::AtomicPrimary { return Ok((parse_atomic, policy)); } + if policy == IncrementalSafety::SameKind + || (policy == IncrementalSafety::SameKindInCode && is_code) + { + let parser: fn(&str, bool) -> _ = match self { + NodeKind::Template => parse_template, + NodeKind::Block => parse_block, + NodeKind::LineComment | NodeKind::BlockComment => parse_comment, + _ => return Err(policy), + }; + + return Ok((parser, policy)); + } + let parser: fn(&str, bool) -> _ = match mode { - NodeMode::Code => parse_code, NodeMode::Markup if self == &Self::Markup => parse_markup, NodeMode::Markup => parse_markup_elements, - NodeMode::Universal if parent_mode == TokenMode::Code => parse_code, - NodeMode::Universal => parse_markup_elements, + NodeMode::Universal if parent_mode == TokenMode::Markup => { + parse_markup_elements + } + _ => return Err(policy), }; Ok((parser, policy)) @@ -1093,7 +1117,7 @@ impl NodeKind { | Self::Set | Self::Import | Self::Include - | Self::From => IncrementalSafety::VeryUnsafe, + | Self::From => IncrementalSafety::Unsafe, // This is a backslash followed by a space. But changing it to // anything else is fair game. @@ -1347,14 +1371,19 @@ pub enum IncrementalSafety { /// Changing an unsafe node or any of its children will trigger undefined /// behavior. Change the parents instead. Unsafe, - /// Its unsafe for two! - VeryUnsafe, } impl IncrementalSafety { pub fn unsafe_interior(&self) -> bool { match self { - Self::Unsafe | Self::VeryUnsafe => true, + Self::Unsafe => true, + _ => false, + } + } + + pub fn is_unsafe(&self) -> bool { + match self { + Self::UnsafeLayer | Self::Unsafe => true, _ => false, } } -- cgit v1.2.3 From 7a631d8b09bbffa8c7d90a1038d986876370ea7a Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Tue, 9 Nov 2021 13:07:55 +0100 Subject: Simplify node mode management --- src/syntax/mod.rs | 66 +++++++++++++++++++++---------------------------------- 1 file changed, 25 insertions(+), 41 deletions(-) (limited to 'src/syntax') diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index c1d7b8d3..d6658fd3 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -94,7 +94,7 @@ impl Green { replace: Span, replacement_len: usize, offset: usize, - parent_mode: NodeMode, + parent_mode: TokenMode, outermost: bool, ) -> bool { match self { @@ -221,7 +221,7 @@ impl GreenNode { return false; } - self.incremental_int(src, replace, replacement_len, 0, NodeMode::Markup, true) + self.incremental_int(src, replace, replacement_len, 0, TokenMode::Markup, true) } fn incremental_int( @@ -230,11 +230,11 @@ impl GreenNode { replace: Span, replacement_len: usize, mut offset: usize, - parent_mode: NodeMode, + parent_mode: TokenMode, outermost: bool, ) -> bool { let kind = self.kind().clone(); - let mode = kind.mode().apply(parent_mode); + let mode = kind.mode().contextualize(parent_mode); eprintln!("in {:?} (mode {:?})", kind, mode); let mut loop_result = None; @@ -266,15 +266,11 @@ impl GreenNode { // This didn't work, so we try to replace the child at this // level. - let (function, policy) = match child - .kind() - .reparsing_function(mode.child_mode().as_token_mode()) - { - Ok(p) => p, - _ => { - return false; - } - }; + let (function, policy) = + match child.kind().reparsing_function(kind.mode().child_mode()) { + Ok(p) => p, + _ => return false, + }; loop_result = Some((i, child_span, i == last && outermost, function, policy)); break; @@ -320,8 +316,7 @@ impl GreenNode { eprintln!("function failed"); return false; }; - let child_mode = - self.children[child_idx].kind().mode().child_mode().as_token_mode(); + let child_mode = self.children[child_idx].kind().mode().child_mode(); eprintln!("child mode {:?}", child_mode); // Check if the children / child has the right type. @@ -365,7 +360,7 @@ impl GreenNode { } // Check if the neighbor invariants are still true. - if mode.as_token_mode() == TokenMode::Markup { + if mode == TokenMode::Markup { if child_idx > 0 { if self.children[child_idx - 1].kind().incremental_safety() == IncrementalSafety::EnsureRightWhitespace @@ -1023,10 +1018,10 @@ impl NodeKind { return Err(policy); } - let mode = self.mode(); - let is_code = mode == NodeMode::Universal && parent_mode == TokenMode::Code - || mode == NodeMode::Code; - if mode == NodeMode::Code && policy == IncrementalSafety::UnsafeLayer { + let contextualized = self.mode().contextualize(parent_mode); + let is_code = contextualized == TokenMode::Code; + + if is_code && policy == IncrementalSafety::UnsafeLayer { return Err(policy); } @@ -1047,12 +1042,9 @@ impl NodeKind { return Ok((parser, policy)); } - let parser: fn(&str, bool) -> _ = match mode { - NodeMode::Markup if self == &Self::Markup => parse_markup, - NodeMode::Markup => parse_markup_elements, - NodeMode::Universal if parent_mode == TokenMode::Markup => { - parse_markup_elements - } + let parser: fn(&str, bool) -> _ = match contextualized { + TokenMode::Markup if self == &Self::Markup => parse_markup, + TokenMode::Markup => parse_markup_elements, _ => return Err(policy), }; @@ -1403,28 +1395,20 @@ pub enum NodeMode { impl NodeMode { /// Returns a new mode considering the parent node. - pub fn apply(&self, old: Self) -> Self { - match self { - Self::Markup => Self::Markup, - Self::Code => Self::Code, - Self::Universal if old != Self::Markup => Self::Code, - Self::Universal => Self::Universal, - } - } - - /// Return the corresponding token mode. - pub fn as_token_mode(&self) -> TokenMode { + pub fn contextualize(&self, old: TokenMode) -> TokenMode { match self { - Self::Markup | Self::Universal => TokenMode::Markup, + Self::Markup => TokenMode::Markup, Self::Code => TokenMode::Code, + Self::Universal if old != TokenMode::Markup => TokenMode::Code, + Self::Universal => TokenMode::Markup, } } /// The mode of the children of this node. - pub fn child_mode(&self) -> Self { + pub fn child_mode(&self) -> TokenMode { match self { - Self::Markup => Self::Markup, - Self::Code | Self::Universal => Self::Code, + Self::Markup => TokenMode::Markup, + Self::Code | Self::Universal => TokenMode::Code, } } } -- cgit v1.2.3 From 91f2f97572c64d7eb25c88ad0ebb18192cf8eddf Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Tue, 9 Nov 2021 13:34:23 +0100 Subject: Multiple replacements, escapes --- src/syntax/mod.rs | 79 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 69 insertions(+), 10 deletions(-) (limited to 'src/syntax') diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index d6658fd3..d1ca3674 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -240,6 +240,7 @@ impl GreenNode { let mut loop_result = None; let mut child_at_start = true; let last = self.children.len() - 1; + let mut start = None; for (i, child) in self.children.iter_mut().enumerate() { let child_span = Span::new(replace.source, offset, offset + child.len()); if child_span.surrounds(replace) { @@ -271,8 +272,45 @@ impl GreenNode { Ok(p) => p, _ => return false, }; - loop_result = - Some((i, child_span, i == last && outermost, function, policy)); + loop_result = Some(( + i .. i + 1, + child_span, + i == last && outermost, + function, + policy, + )); + break; + } else if child_span.contains(replace.start) + && mode == TokenMode::Markup + && child.kind().incremental_safety().markup_safe() + { + eprintln!("found safe start"); + start = Some((i, offset)); + } else if child_span.contains(replace.end) + && mode == TokenMode::Markup + && child.kind().incremental_safety().markup_safe() + { + eprintln!("found safe end"); + if let Some((start, start_offset)) = start { + let (function, policy) = + match child.kind().reparsing_function(kind.mode().child_mode()) { + Ok(p) => p, + _ => return false, + }; + loop_result = Some(( + start .. i + 1, + Span::new(replace.source, start_offset, offset + child.len()), + i == last && outermost, + function, + policy, + )); + } + break; + } else if start.is_some() + && (mode != TokenMode::Markup + || !child.kind().incremental_safety().markup_safe()) + { + eprintln!("unsafe inbetweeen {:?}", child.kind()); break; } @@ -283,7 +321,7 @@ impl GreenNode { // We now have a child that we can replace and a function to do so if // the loop found any results at all. - let (child_idx, child_span, child_outermost, func, policy) = + let (child_idx_range, child_span, child_outermost, func, policy) = if let Some(loop_result) = loop_result { loop_result } else { @@ -316,7 +354,7 @@ impl GreenNode { eprintln!("function failed"); return false; }; - let child_mode = self.children[child_idx].kind().mode().child_mode(); + let child_mode = self.children[child_idx_range.start].kind().mode().child_mode(); eprintln!("child mode {:?}", child_mode); // Check if the children / child has the right type. @@ -340,7 +378,7 @@ impl GreenNode { } _ => false, } { - if self.children[child_idx].kind() != new_children[0].kind() { + if self.children[child_idx_range.start].kind() != new_children[0].kind() { eprintln!("not the same kind"); return false; } @@ -361,8 +399,8 @@ impl GreenNode { // Check if the neighbor invariants are still true. if mode == TokenMode::Markup { - if child_idx > 0 { - if self.children[child_idx - 1].kind().incremental_safety() + if child_idx_range.start > 0 { + if self.children[child_idx_range.start - 1].kind().incremental_safety() == IncrementalSafety::EnsureRightWhitespace && !new_children[0].kind().is_whitespace() { @@ -376,7 +414,7 @@ impl GreenNode { new_at_start = child.kind().is_at_start(new_at_start); } - for child in &self.children[child_idx + 1 ..] { + for child in &self.children[child_idx_range.end ..] { if child.kind().is_trivia() { new_at_start = child.kind().is_at_start(new_at_start); continue; @@ -393,14 +431,25 @@ impl GreenNode { } break; } + + if new_children.last().map(|x| x.kind().incremental_safety()) + == Some(IncrementalSafety::EnsureRightWhitespace) + && self.children.len() > child_idx_range.end + { + if !self.children[child_idx_range.end].kind().is_whitespace() { + eprintln!("right whitespace missing"); + return false; + } + } } eprintln!("... replacing"); - let old_len = self.children[child_idx].len(); + let old_len: usize = + self.children[child_idx_range.clone()].iter().map(Green::len).sum(); let new_len: usize = new_children.iter().map(Green::len).sum(); - self.children.splice(child_idx .. child_idx + 1, new_children); + self.children.splice(child_idx_range, new_children); self.erroneous = self.children.iter().any(|x| x.erroneous()); self.data.set_len(self.data.len + new_len - old_len); true @@ -1379,6 +1428,16 @@ impl IncrementalSafety { _ => false, } } + + pub fn markup_safe(&self) -> bool { + match self { + Self::Safe + | Self::SameKindInCode + | Self::EnsureAtStart + | Self::UnsafeLayer => true, + _ => false, + } + } } /// This enum describes which mode a token of [`NodeKind`] can appear in. -- cgit v1.2.3 From 3162c6a83a910f34d6ed7e966c11b7e7b5bd4088 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Wed, 10 Nov 2021 20:41:10 +0100 Subject: Comments and neighbors --- src/syntax/mod.rs | 335 +++++++++++++++++++++++++++--------------------------- 1 file changed, 167 insertions(+), 168 deletions(-) (limited to 'src/syntax') diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index d1ca3674..cfb44376 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -6,6 +6,7 @@ mod pretty; mod span; use std::fmt::{self, Debug, Display, Formatter}; +use std::ops::Range; use std::rc::Rc; pub use highlight::*; @@ -88,7 +89,7 @@ impl Green { } /// Find the innermost child that is incremental safe. - fn incremental_int( + fn incremental( &mut self, edit: &str, replace: Span, @@ -96,7 +97,7 @@ impl Green { offset: usize, parent_mode: TokenMode, outermost: bool, - ) -> bool { + ) -> Result, ()> { match self { Green::Node(n) => Rc::make_mut(n).incremental_int( edit, @@ -106,21 +107,7 @@ impl Green { parent_mode, outermost, ), - Green::Token(_) => false, - } - } - - /// The error messages for this node and its descendants. - pub fn errors(&self) -> Vec { - match self { - Green::Node(n) => n.errors(), - Green::Token(t) => { - if t.kind().is_error() { - vec![t.kind().clone()] - } else { - vec![] - } - } + Green::Token(_) => Err(()), } } } @@ -198,15 +185,23 @@ impl GreenNode { self.data().len() } - /// The error messages for this node and its descendants. - pub fn errors(&self) -> Vec { - let mut res = self.children.iter().flat_map(|c| c.errors()).collect::>(); + pub fn replace_child_range( + &mut self, + child_idx_range: Range, + replacement: Vec, + ) { + let old_len: usize = + self.children[child_idx_range.clone()].iter().map(Green::len).sum(); + let new_len: usize = replacement.iter().map(Green::len).sum(); - if self.kind().is_error() { - res.push(self.kind().clone()); - } + self.children.splice(child_idx_range, replacement); + self.erroneous = self.children.iter().any(|x| x.erroneous()); + self.data.set_len(self.data.len + new_len - old_len); + } - res + pub fn update_child_len(&mut self, new_len: usize, old_len: usize) { + self.data.len = self.data.len() + new_len - old_len; + self.erroneous = self.children.iter().any(|x| x.erroneous()); } /// Find the innermost child that is incremental safe. @@ -215,12 +210,7 @@ impl GreenNode { src: &str, replace: Span, replacement_len: usize, - ) -> bool { - let edit = &src[replace.inserted(replace, replacement_len).to_range()]; - if edit.contains("//") || edit.contains("/*") || edit.contains("*/") { - return false; - } - + ) -> Result, ()> { self.incremental_int(src, replace, replacement_len, 0, TokenMode::Markup, true) } @@ -232,10 +222,9 @@ impl GreenNode { mut offset: usize, parent_mode: TokenMode, outermost: bool, - ) -> bool { + ) -> Result, ()> { let kind = self.kind().clone(); let mode = kind.mode().contextualize(parent_mode); - eprintln!("in {:?} (mode {:?})", kind, mode); let mut loop_result = None; let mut child_at_start = true; @@ -243,13 +232,16 @@ impl GreenNode { let mut start = None; for (i, child) in self.children.iter_mut().enumerate() { let child_span = Span::new(replace.source, offset, offset + child.len()); - if child_span.surrounds(replace) { - eprintln!("found correct child"); - + if child_span.surrounds(replace) + && start.is_none() + && ((replace.start != child_span.end && replace.end != child_span.start) + || mode == TokenMode::Code + || i == last) + { let old_len = child.len(); // First, we try if the child has another, more specific applicable child. if !kind.incremental_safety().unsafe_interior() { - if child.incremental_int( + if let Ok(range) = child.incremental( src, replace, replacement_len, @@ -257,21 +249,17 @@ impl GreenNode { kind.mode().child_mode(), i == last && outermost, ) { - eprintln!("child success"); let new_len = child.len(); - self.data.set_len(self.data.len() + new_len - old_len); - self.erroneous = self.children.iter().any(|x| x.erroneous()); - return true; + self.update_child_len(new_len, old_len); + return Ok(range); } } // This didn't work, so we try to replace the child at this // level. let (function, policy) = - match child.kind().reparsing_function(kind.mode().child_mode()) { - Ok(p) => p, - _ => return false, - }; + child.kind().reparsing_function(kind.mode().child_mode()); + let function = function?; loop_result = Some(( i .. i + 1, child_span, @@ -280,23 +268,21 @@ impl GreenNode { policy, )); break; - } else if child_span.contains(replace.start) + } else if start.is_none() + && child_span.contains(replace.start) && mode == TokenMode::Markup && child.kind().incremental_safety().markup_safe() { - eprintln!("found safe start"); start = Some((i, offset)); } else if child_span.contains(replace.end) + && (replace.end != child_span.end || i == last) && mode == TokenMode::Markup && child.kind().incremental_safety().markup_safe() { - eprintln!("found safe end"); if let Some((start, start_offset)) = start { let (function, policy) = - match child.kind().reparsing_function(kind.mode().child_mode()) { - Ok(p) => p, - _ => return false, - }; + child.kind().reparsing_function(kind.mode().child_mode()); + let function = function?; loop_result = Some(( start .. i + 1, Span::new(replace.source, start_offset, offset + child.len()), @@ -310,7 +296,6 @@ impl GreenNode { && (mode != TokenMode::Markup || !child.kind().incremental_safety().markup_safe()) { - eprintln!("unsafe inbetweeen {:?}", child.kind()); break; } @@ -322,15 +307,7 @@ impl GreenNode { // We now have a child that we can replace and a function to do so if // the loop found any results at all. let (child_idx_range, child_span, child_outermost, func, policy) = - if let Some(loop_result) = loop_result { - loop_result - } else { - // No child fully contains the replacement. - eprintln!("no child match"); - return false; - }; - - eprintln!("aquired function, policy {:?}", policy); + loop_result.ok_or(())?; let src_span = child_span.inserted(replace, replacement_len); let recompile_range = if policy == IncrementalSafety::AtomicPrimary { @@ -339,121 +316,137 @@ impl GreenNode { src_span.to_range() }; - let new_children = if let Some(new_children) = - func(&src[recompile_range], child_at_start) - { - if policy != IncrementalSafety::AtomicPrimary - || new_children.iter().map(Green::len).sum::() == src_span.len() - { - new_children - } else { - eprintln!("wrong atomic len"); - return false; - } + let (mut new_children, unterminated) = + func(&src[recompile_range], child_at_start).ok_or(())?; + + let insertion = match check_invariants( + &new_children, + self.children(), + unterminated, + child_idx_range.clone(), + child_outermost, + child_at_start, + mode, + src_span, + policy, + ) { + InvariantResult::Ok => Ok(new_children), + InvariantResult::UseFirst => Ok(vec![std::mem::take(&mut new_children[0])]), + InvariantResult::Error => Err(()), + }?; + + self.replace_child_range(child_idx_range, insertion); + + Ok(src_span.to_range()) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum InvariantResult { + Ok, + UseFirst, + Error, +} + +fn check_invariants( + use_children: &[Green], + old_children: &[Green], + unterminated: bool, + child_idx_range: Range, + outermost: bool, + child_at_start: bool, + mode: TokenMode, + src_span: Span, + policy: IncrementalSafety, +) -> InvariantResult { + let (new_children, ok) = if policy == IncrementalSafety::AtomicPrimary { + if use_children.iter().map(Green::len).sum::() == src_span.len() { + (use_children, InvariantResult::Ok) + } else if use_children[0].len() == src_span.len() { + (&use_children[0 .. 1], InvariantResult::UseFirst) } else { - eprintln!("function failed"); - return false; - }; - let child_mode = self.children[child_idx_range.start].kind().mode().child_mode(); - eprintln!("child mode {:?}", child_mode); + return InvariantResult::Error; + } + } else { + (use_children, InvariantResult::Ok) + }; + + let child_mode = old_children[child_idx_range.start].kind().mode().child_mode(); + + // Check if the children / child has the right type. + let require_single = match policy { + IncrementalSafety::AtomicPrimary | IncrementalSafety::SameKind => true, + IncrementalSafety::SameKindInCode if child_mode == TokenMode::Code => true, + _ => false, + }; + + if require_single { + if new_children.len() != 1 { + return InvariantResult::Error; + } - // Check if the children / child has the right type. - let require_single = match policy { - IncrementalSafety::AtomicPrimary | IncrementalSafety::SameKind => true, - IncrementalSafety::SameKindInCode if child_mode == TokenMode::Code => true, + if match policy { + IncrementalSafety::SameKind => true, + IncrementalSafety::SameKindInCode => child_mode == TokenMode::Code, _ => false, - }; - - if require_single { - eprintln!("must be a single replacement"); - if new_children.len() != 1 { - eprintln!("not a single replacement"); - return false; + } { + if old_children[child_idx_range.start].kind() != new_children[0].kind() { + return InvariantResult::Error; } + } + } - if match policy { - IncrementalSafety::SameKind => true, - IncrementalSafety::SameKindInCode if child_mode == TokenMode::Code => { - true - } - _ => false, - } { - if self.children[child_idx_range.start].kind() != new_children[0].kind() { - eprintln!("not the same kind"); - return false; - } + // Do not accept unclosed nodes if the old node did not use to be at the + // right edge of the tree. + if !outermost && unterminated { + return InvariantResult::Error; + } + + // Check if the neighbor invariants are still true. + if mode == TokenMode::Markup { + if child_idx_range.start > 0 { + if old_children[child_idx_range.start - 1].kind().incremental_safety() + == IncrementalSafety::EnsureRightWhitespace + && !new_children[0].kind().is_whitespace() + { + return InvariantResult::Error; } } - // Do not accept unclosed nodes if the old node did not use to be at the - // right edge of the tree. - if !child_outermost - && new_children - .iter() - .flat_map(|x| x.errors()) - .any(|x| matches!(x, NodeKind::Error(ErrorPos::End, _))) - { - eprintln!("unclosed node"); - return false; + let mut new_at_start = child_at_start; + for child in new_children { + new_at_start = child.kind().is_at_start(new_at_start); } - // Check if the neighbor invariants are still true. - if mode == TokenMode::Markup { - if child_idx_range.start > 0 { - if self.children[child_idx_range.start - 1].kind().incremental_safety() - == IncrementalSafety::EnsureRightWhitespace - && !new_children[0].kind().is_whitespace() - { - eprintln!("left whitespace missing"); - return false; - } - } - - let mut new_at_start = child_at_start; - for child in &new_children { + for child in &old_children[child_idx_range.end ..] { + if child.kind().is_trivia() { new_at_start = child.kind().is_at_start(new_at_start); + continue; } - for child in &self.children[child_idx_range.end ..] { - if child.kind().is_trivia() { - new_at_start = child.kind().is_at_start(new_at_start); - continue; + match child.kind().incremental_safety() { + IncrementalSafety::EnsureAtStart if !new_at_start => { + return InvariantResult::Error; } - - match child.kind().incremental_safety() { - IncrementalSafety::EnsureAtStart if !new_at_start => { - return false; - } - IncrementalSafety::EnsureNotAtStart if new_at_start => { - return false; - } - _ => {} + IncrementalSafety::EnsureNotAtStart if new_at_start => { + return InvariantResult::Error; } - break; + _ => {} } + break; + } - if new_children.last().map(|x| x.kind().incremental_safety()) - == Some(IncrementalSafety::EnsureRightWhitespace) - && self.children.len() > child_idx_range.end - { - if !self.children[child_idx_range.end].kind().is_whitespace() { - eprintln!("right whitespace missing"); - return false; - } + if new_children.last().map(|x| x.kind().incremental_safety()) + == Some(IncrementalSafety::EnsureRightWhitespace) + && old_children.len() > child_idx_range.end + { + if !old_children[child_idx_range.end].kind().is_whitespace() { + return InvariantResult::Error; } } - - eprintln!("... replacing"); - - let old_len: usize = - self.children[child_idx_range.clone()].iter().map(Green::len).sum(); - let new_len: usize = new_children.iter().map(Green::len).sum(); - - self.children.splice(child_idx_range, new_children); - self.erroneous = self.children.iter().any(|x| x.erroneous()); - self.data.set_len(self.data.len + new_len - old_len); - true } + + ok } impl From for Green { @@ -1025,6 +1018,7 @@ impl NodeKind { match self { Self::Markup | Self::Space(_) + | Self::Linebreak | Self::Parbreak | Self::Text(_) | Self::TextInLine(_) @@ -1034,6 +1028,10 @@ impl NodeKind { | Self::Escape(_) | Self::Strong | Self::Emph + | Self::Heading + | Self::Enum + | Self::EnumNumbering(_) + | Self::List | Self::Raw(_) | Self::Math(_) => NodeMode::Markup, Self::Template @@ -1058,24 +1056,24 @@ impl NodeKind { pub fn reparsing_function( &self, parent_mode: TokenMode, - ) -> Result< - (fn(&str, bool) -> Option>, IncrementalSafety), + ) -> ( + Result Option<(Vec, bool)>, ()>, IncrementalSafety, - > { + ) { let policy = self.incremental_safety(); if policy.is_unsafe() { - return Err(policy); + return (Err(()), policy); } let contextualized = self.mode().contextualize(parent_mode); let is_code = contextualized == TokenMode::Code; if is_code && policy == IncrementalSafety::UnsafeLayer { - return Err(policy); + return (Err(()), policy); } if is_code && policy == IncrementalSafety::AtomicPrimary { - return Ok((parse_atomic, policy)); + return (Ok(parse_atomic), policy); } if policy == IncrementalSafety::SameKind @@ -1085,19 +1083,19 @@ impl NodeKind { NodeKind::Template => parse_template, NodeKind::Block => parse_block, NodeKind::LineComment | NodeKind::BlockComment => parse_comment, - _ => return Err(policy), + _ => return (Err(()), policy), }; - return Ok((parser, policy)); + return (Ok(parser), policy); } let parser: fn(&str, bool) -> _ = match contextualized { TokenMode::Markup if self == &Self::Markup => parse_markup, TokenMode::Markup => parse_markup_elements, - _ => return Err(policy), + _ => return (Err(()), policy), }; - Ok((parser, policy)) + (Ok(parser), policy) } /// Whether it is safe to do incremental parsing on this node. Never allow @@ -1434,6 +1432,8 @@ impl IncrementalSafety { Self::Safe | Self::SameKindInCode | Self::EnsureAtStart + | Self::EnsureNotAtStart + | Self::EnsureRightWhitespace | Self::UnsafeLayer => true, _ => false, } @@ -1458,8 +1458,7 @@ impl NodeMode { match self { Self::Markup => TokenMode::Markup, Self::Code => TokenMode::Code, - Self::Universal if old != TokenMode::Markup => TokenMode::Code, - Self::Universal => TokenMode::Markup, + Self::Universal => old, } } -- cgit v1.2.3 From fdb9d0743d73c278136b9254286fdc4be71c42a5 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Thu, 18 Nov 2021 16:21:45 +0100 Subject: Refactoring and bugfixes --- src/syntax/incremental.rs | 515 ++++++++++++++++++++++++++++++++++++++++++ src/syntax/mod.rs | 563 ++-------------------------------------------- 2 files changed, 533 insertions(+), 545 deletions(-) create mode 100644 src/syntax/incremental.rs (limited to 'src/syntax') diff --git a/src/syntax/incremental.rs b/src/syntax/incremental.rs new file mode 100644 index 00000000..d7b5ca3c --- /dev/null +++ b/src/syntax/incremental.rs @@ -0,0 +1,515 @@ +use std::ops::Range; +use std::rc::Rc; + +use super::{Green, GreenNode, NodeKind, Span}; + +use crate::parse::{ + parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup, + parse_markup_elements, parse_template, TokenMode, +}; + +pub struct Reparser<'a> { + src: &'a str, + replace_range: Span, + replace_len: usize, +} + +impl<'a> Reparser<'a> { + pub fn new(src: &'a str, replace_range: Span, replace_len: usize) -> Self { + Self { src, replace_range, replace_len } + } +} + +impl Reparser<'_> { + /// Find the innermost child that is incremental safe. + pub fn incremental(&self, green: &mut GreenNode) -> Result, ()> { + self.incremental_int(green, 0, TokenMode::Markup, true) + } + + fn incremental_int( + &self, + green: &mut GreenNode, + mut offset: usize, + parent_mode: TokenMode, + outermost: bool, + ) -> Result, ()> { + let kind = green.kind().clone(); + let mode = kind.mode().contextualize(parent_mode); + + let mut loop_result = None; + let mut child_at_start = true; + let last = green.children.len() - 1; + let mut start = None; + for (i, child) in green.children.iter_mut().enumerate() { + let child_span = + Span::new(self.replace_range.source, offset, offset + child.len()); + if child_span.surrounds(self.replace_range) + && start.is_none() + && ((self.replace_range.start != child_span.end + && self.replace_range.end != child_span.start) + || mode == TokenMode::Code + || i == last) + { + let old_len = child.len(); + // First, we try if the child has another, more specific applicable child. + if !kind.post().unsafe_interior() { + if let Ok(range) = match child { + Green::Node(n) => self.incremental_int( + Rc::make_mut(n), + offset, + kind.mode().child_mode(), + i == last && outermost, + ), + Green::Token(_) => Err(()), + } { + let new_len = child.len(); + green.update_child_len(new_len, old_len); + return Ok(range); + } + } + + // This didn't work, so we try to self.replace_range the child at this + // level. + loop_result = + Some((i .. i + 1, child_span, i == last && outermost, child.kind())); + break; + } else if start.is_none() + && child_span.contains(self.replace_range.start) + && mode == TokenMode::Markup + && child.kind().post().markup_safe() + { + start = Some((i, offset)); + } else if child_span.contains(self.replace_range.end) + && (self.replace_range.end != child_span.end || i == last) + && mode == TokenMode::Markup + && child.kind().post().markup_safe() + { + if let Some((start, start_offset)) = start { + loop_result = Some(( + start .. i + 1, + Span::new( + self.replace_range.source, + start_offset, + offset + child.len(), + ), + i == last && outermost, + child.kind(), + )); + } + break; + } else if start.is_some() + && (mode != TokenMode::Markup || !child.kind().post().markup_safe()) + { + break; + } + + offset += child.len(); + child_at_start = child.kind().is_at_start(child_at_start); + } + + + // We now have a child that we can self.replace_range and a function to do so if + // the loop found any results at all. + let (child_idx_range, child_span, child_outermost, func, policy) = + loop_result.ok_or(()).and_then(|(a, b, c, child_kind)| { + let (func, policy) = + child_kind.reparsing_function(kind.mode().child_mode()); + Ok((a, b, c, func?, policy)) + })?; + + let src_span = child_span.inserted(self.replace_range, self.replace_len); + let recompile_range = if policy == Postcondition::AtomicPrimary { + src_span.start .. self.src.len() + } else { + src_span.to_range() + }; + + let (mut new_children, unterminated) = + func(&self.src[recompile_range], child_at_start).ok_or(())?; + + // Do not accept unclosed nodes if the old node did not use to be at the + // right edge of the tree. + if !child_outermost && unterminated { + return Err(()); + } + + let insertion = match check_invariants( + &new_children, + green.children(), + child_idx_range.clone(), + child_at_start, + mode, + src_span, + policy, + ) { + InvariantResult::Ok => Ok(new_children), + InvariantResult::UseFirst => Ok(vec![std::mem::take(&mut new_children[0])]), + InvariantResult::Error => Err(()), + }?; + + green.replace_child_range(child_idx_range, insertion); + + Ok(src_span.to_range()) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum InvariantResult { + Ok, + UseFirst, + Error, +} + +fn check_invariants( + use_children: &[Green], + old_children: &[Green], + child_idx_range: Range, + child_at_start: bool, + mode: TokenMode, + src_span: Span, + policy: Postcondition, +) -> InvariantResult { + let (new_children, ok) = if policy == Postcondition::AtomicPrimary { + if use_children.iter().map(Green::len).sum::() == src_span.len() { + (use_children, InvariantResult::Ok) + } else if use_children.len() == 1 && use_children[0].len() == src_span.len() { + (&use_children[0 .. 1], InvariantResult::UseFirst) + } else { + return InvariantResult::Error; + } + } else { + (use_children, InvariantResult::Ok) + }; + + let child_mode = old_children[child_idx_range.start].kind().mode().child_mode(); + + // Check if the children / child has the right type. + let same_kind = match policy { + Postcondition::SameKind(x) => x.map_or(true, |x| x == child_mode), + _ => false, + }; + + if same_kind || policy == Postcondition::AtomicPrimary { + if new_children.len() != 1 { + return InvariantResult::Error; + } + + if same_kind { + if old_children[child_idx_range.start].kind() != new_children[0].kind() { + return InvariantResult::Error; + } + } + } + + // Check if the neighbor invariants are still true. + if mode == TokenMode::Markup { + if child_idx_range.start > 0 { + if old_children[child_idx_range.start - 1].kind().pre() + == Precondition::RightWhitespace + && !new_children[0].kind().is_whitespace() + { + return InvariantResult::Error; + } + } + + if new_children.last().map(|x| x.kind().pre()) + == Some(Precondition::RightWhitespace) + && old_children.len() > child_idx_range.end + { + if !old_children[child_idx_range.end].kind().is_whitespace() { + return InvariantResult::Error; + } + } + + let mut new_at_start = child_at_start; + for child in new_children { + new_at_start = child.kind().is_at_start(new_at_start); + } + + for child in &old_children[child_idx_range.end ..] { + if child.kind().is_trivia() { + new_at_start = child.kind().is_at_start(new_at_start); + continue; + } + + match child.kind().pre() { + Precondition::AtStart if !new_at_start => { + return InvariantResult::Error; + } + Precondition::NotAtStart if new_at_start => { + return InvariantResult::Error; + } + _ => {} + } + break; + } + } + + ok +} + +impl NodeKind { + pub fn reparsing_function( + &self, + parent_mode: TokenMode, + ) -> ( + Result Option<(Vec, bool)>, ()>, + Postcondition, + ) { + let policy = self.post(); + let mode = self.mode().contextualize(parent_mode); + + match policy { + Postcondition::Unsafe | Postcondition::UnsafeLayer => (Err(()), policy), + Postcondition::AtomicPrimary if mode == TokenMode::Code => { + (Ok(parse_atomic), policy) + } + Postcondition::AtomicPrimary => (Ok(parse_atomic_markup), policy), + Postcondition::SameKind(x) if x == None || x == Some(mode) => { + let parser: fn(&str, bool) -> _ = match self { + NodeKind::Template => parse_template, + NodeKind::Block => parse_block, + NodeKind::LineComment | NodeKind::BlockComment => parse_comment, + _ => return (Err(()), policy), + }; + + (Ok(parser), policy) + } + _ => { + let parser: fn(&str, bool) -> _ = match mode { + TokenMode::Markup if self == &Self::Markup => parse_markup, + TokenMode::Markup => parse_markup_elements, + _ => return (Err(()), policy), + }; + + (Ok(parser), policy) + } + } + } + + /// Whether it is safe to do incremental parsing on this node. Never allow + /// non-termination errors if this is not already the last leaf node. + pub fn post(&self) -> Postcondition { + match self { + // Replacing parenthesis changes if the expression is balanced and + // is therefore not safe. + Self::LeftBracket + | Self::RightBracket + | Self::LeftBrace + | Self::RightBrace + | Self::LeftParen + | Self::RightParen => Postcondition::Unsafe, + + // Replacing an operator can change whether the parent is an + // operation which makes it unsafe. The star can appear in markup. + Self::Star + | Self::Comma + | Self::Semicolon + | Self::Colon + | Self::Plus + | Self::Minus + | Self::Slash + | Self::Eq + | Self::EqEq + | Self::ExclEq + | Self::Lt + | Self::LtEq + | Self::Gt + | Self::GtEq + | Self::PlusEq + | Self::HyphEq + | Self::StarEq + | Self::SlashEq + | Self::Not + | Self::And + | Self::Or + | Self::With + | Self::Dots + | Self::Arrow => Postcondition::Unsafe, + + // These keywords are literals and can be safely be substituted with + // other expressions. + Self::None | Self::Auto => Postcondition::AtomicPrimary, + + // These keywords change what kind of expression the parent is and + // how far the expression would go. + Self::Let + | Self::Set + | Self::If + | Self::Else + | Self::For + | Self::In + | Self::While + | Self::Break + | Self::Continue + | Self::Return + | Self::Import + | Self::Include + | Self::From => Postcondition::Unsafe, + + Self::Markup => Postcondition::SameKind(None), + + Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)), + + // These are all replaceable by other tokens. + Self::Parbreak + | Self::Linebreak + | Self::Text(_) + | Self::TextInLine(_) + | Self::NonBreakingSpace + | Self::EnDash + | Self::EmDash + | Self::Escape(_) + | Self::Strong + | Self::Emph + | Self::Heading + | Self::Enum + | Self::List + | Self::Raw(_) + | Self::Math(_) => Postcondition::Safe, + + // Changing the heading level, enum numbering, or list bullet + // changes the next layer. + Self::EnumNumbering(_) => Postcondition::Unsafe, + + // These are expressions that can be replaced by other expressions. + Self::Ident(_) + | Self::Bool(_) + | Self::Int(_) + | Self::Float(_) + | Self::Length(_, _) + | Self::Angle(_, _) + | Self::Percentage(_) + | Self::Str(_) + | Self::Fraction(_) + | Self::Array + | Self::Dict + | Self::Group => Postcondition::AtomicPrimary, + + Self::Call + | Self::Unary + | Self::Binary + | Self::CallArgs + | Self::Named + | Self::Spread => Postcondition::UnsafeLayer, + + // The closure is a bit magic with the let expression, and also it + // is not atomic. + Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer, + + // These can appear as bodies and would trigger an error if they + // became something else. + Self::Template => Postcondition::SameKind(None), + Self::Block => Postcondition::SameKind(Some(TokenMode::Code)), + + Self::ForExpr + | Self::WhileExpr + | Self::IfExpr + | Self::LetExpr + | Self::SetExpr + | Self::ImportExpr + | Self::IncludeExpr => Postcondition::AtomicPrimary, + + Self::WithExpr | Self::ForPattern | Self::ImportItems => { + Postcondition::UnsafeLayer + } + + // These can appear everywhere and must not change to other stuff + // because that could change the outer expression. + Self::LineComment | Self::BlockComment => Postcondition::SameKind(None), + + Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, + } + } + + /// The appropriate precondition for the type. + pub fn pre(&self) -> Precondition { + match self { + Self::Heading | Self::Enum | Self::List => Precondition::AtStart, + Self::TextInLine(_) => Precondition::NotAtStart, + Self::Linebreak => Precondition::RightWhitespace, + _ => Precondition::None, + } + } +} + +/// This enum describes what conditions a node has for being replaced by a new +/// parse result. +/// +/// Safe nodes are replaced by the new parse result from the respective mode. +/// They can be replaced by multiple tokens. If a token is inserted in Markup +/// mode and the next token would not be `at_start` there needs to be a forward +/// check for a `EnsureAtStart` node. If this fails, the parent has to be +/// reparsed. if the direct whitespace sibling of a `EnsureRightWhitespace` is +/// `Unsafe`. Similarly, if a `EnsureRightWhitespace` token is one of the last +/// tokens to be inserted, the edit is invalidated if there is no following +/// whitespace. The atomic nodes may only be replaced by other atomic nodes. The +/// unsafe layers cannot be used but allow children access, the unsafe nodes do +/// neither. +/// +/// *Procedure:* +/// 1. Check if the node is safe - if unsafe layer recurse, if unsafe, return +/// None. +/// 2. Reparse with appropriate node kind and `at_start`. +/// 3. Check whether the topmost group is terminated and the range was +/// completely consumed, otherwise return None. +/// 4. Check if the type criteria are met. +/// 5. If the node is not at the end of the tree, check if Strings etc. are +/// terminated. +/// 6. If this is markup, check the following things: +/// - The `at_start` conditions of the next non-comment and non-space(0) node +/// are met. +/// - The first node is whitespace or the previous siblings are not +/// `EnsureRightWhitespace`. +/// - If any of those fails, return None. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Postcondition { + /// Changing this node can never have an influence on the other nodes. + Safe, + /// This node has to be replaced with a single token of the same kind. + SameKind(Option), + /// Changing this node into a single atomic expression is allowed if it + /// appears in code mode, otherwise it is safe. + AtomicPrimary, + /// Changing an unsafe layer node changes what the parents or the + /// surrounding nodes would be and is therefore disallowed. Change the + /// parents or children instead. If it appears in Markup, however, it is + /// safe to change. + UnsafeLayer, + /// Changing an unsafe node or any of its children will trigger undefined + /// behavior. Change the parents instead. + Unsafe, +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Precondition { + /// These nodes depend on being at the start of a line. Reparsing of safe + /// left neighbors has to check this invariant. Otherwise, this node is + /// safe. + AtStart, + /// These nodes depend on not being at the start of a line. Reparsing of + /// safe left neighbors has to check this invariant. Otherwise, this node is + /// safe. + NotAtStart, + /// These nodes must be followed by whitespace. + RightWhitespace, + /// No additional requirements. + None, +} + +impl Postcondition { + pub fn unsafe_interior(&self) -> bool { + match self { + Self::Unsafe => true, + _ => false, + } + } + + pub fn markup_safe(&self) -> bool { + match self { + Self::Safe | Self::UnsafeLayer => true, + Self::SameKind(tm) => tm.map_or(false, |tm| tm != TokenMode::Markup), + _ => false, + } + } +} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index cfb44376..4d0ca026 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -2,6 +2,7 @@ pub mod ast; mod highlight; +mod incremental; mod pretty; mod span; @@ -10,16 +11,14 @@ use std::ops::Range; use std::rc::Rc; pub use highlight::*; +pub use incremental::*; pub use pretty::*; pub use span::*; use self::ast::{MathNode, RawNode, TypedNode}; use crate::diag::Error; use crate::geom::{AngularUnit, LengthUnit}; -use crate::parse::{ - parse_atomic, parse_block, parse_comment, parse_markup, parse_markup_elements, - parse_template, TokenMode, -}; +use crate::parse::TokenMode; use crate::source::SourceId; use crate::util::EcoString; @@ -87,29 +86,6 @@ impl Green { Self::Token(data) => data.kind = kind, } } - - /// Find the innermost child that is incremental safe. - fn incremental( - &mut self, - edit: &str, - replace: Span, - replacement_len: usize, - offset: usize, - parent_mode: TokenMode, - outermost: bool, - ) -> Result, ()> { - match self { - Green::Node(n) => Rc::make_mut(n).incremental_int( - edit, - replace, - replacement_len, - offset, - parent_mode, - outermost, - ), - Green::Token(_) => Err(()), - } - } } impl Default for Green { @@ -194,8 +170,22 @@ impl GreenNode { self.children[child_idx_range.clone()].iter().map(Green::len).sum(); let new_len: usize = replacement.iter().map(Green::len).sum(); + if self.erroneous { + if self.children[child_idx_range.clone()].iter().any(Green::erroneous) { + // the old range was erroneous but we do not know if anywhere + // else was so we have to iterate over the whole thing. + self.erroneous = self.children[.. child_idx_range.start] + .iter() + .any(Green::erroneous) + || self.children[child_idx_range.end ..].iter().any(Green::erroneous); + } + // in this case nothing changes so we do not have to bother. + } + + // the or assignment operator is not lazy. + self.erroneous = self.erroneous || replacement.iter().any(Green::erroneous); + self.children.splice(child_idx_range, replacement); - self.erroneous = self.children.iter().any(|x| x.erroneous()); self.data.set_len(self.data.len + new_len - old_len); } @@ -203,250 +193,6 @@ impl GreenNode { self.data.len = self.data.len() + new_len - old_len; self.erroneous = self.children.iter().any(|x| x.erroneous()); } - - /// Find the innermost child that is incremental safe. - pub fn incremental( - &mut self, - src: &str, - replace: Span, - replacement_len: usize, - ) -> Result, ()> { - self.incremental_int(src, replace, replacement_len, 0, TokenMode::Markup, true) - } - - fn incremental_int( - &mut self, - src: &str, - replace: Span, - replacement_len: usize, - mut offset: usize, - parent_mode: TokenMode, - outermost: bool, - ) -> Result, ()> { - let kind = self.kind().clone(); - let mode = kind.mode().contextualize(parent_mode); - - let mut loop_result = None; - let mut child_at_start = true; - let last = self.children.len() - 1; - let mut start = None; - for (i, child) in self.children.iter_mut().enumerate() { - let child_span = Span::new(replace.source, offset, offset + child.len()); - if child_span.surrounds(replace) - && start.is_none() - && ((replace.start != child_span.end && replace.end != child_span.start) - || mode == TokenMode::Code - || i == last) - { - let old_len = child.len(); - // First, we try if the child has another, more specific applicable child. - if !kind.incremental_safety().unsafe_interior() { - if let Ok(range) = child.incremental( - src, - replace, - replacement_len, - offset, - kind.mode().child_mode(), - i == last && outermost, - ) { - let new_len = child.len(); - self.update_child_len(new_len, old_len); - return Ok(range); - } - } - - // This didn't work, so we try to replace the child at this - // level. - let (function, policy) = - child.kind().reparsing_function(kind.mode().child_mode()); - let function = function?; - loop_result = Some(( - i .. i + 1, - child_span, - i == last && outermost, - function, - policy, - )); - break; - } else if start.is_none() - && child_span.contains(replace.start) - && mode == TokenMode::Markup - && child.kind().incremental_safety().markup_safe() - { - start = Some((i, offset)); - } else if child_span.contains(replace.end) - && (replace.end != child_span.end || i == last) - && mode == TokenMode::Markup - && child.kind().incremental_safety().markup_safe() - { - if let Some((start, start_offset)) = start { - let (function, policy) = - child.kind().reparsing_function(kind.mode().child_mode()); - let function = function?; - loop_result = Some(( - start .. i + 1, - Span::new(replace.source, start_offset, offset + child.len()), - i == last && outermost, - function, - policy, - )); - } - break; - } else if start.is_some() - && (mode != TokenMode::Markup - || !child.kind().incremental_safety().markup_safe()) - { - break; - } - - offset += child.len(); - child_at_start = child.kind().is_at_start(child_at_start); - } - - - // We now have a child that we can replace and a function to do so if - // the loop found any results at all. - let (child_idx_range, child_span, child_outermost, func, policy) = - loop_result.ok_or(())?; - - let src_span = child_span.inserted(replace, replacement_len); - let recompile_range = if policy == IncrementalSafety::AtomicPrimary { - src_span.start .. src.len() - } else { - src_span.to_range() - }; - - let (mut new_children, unterminated) = - func(&src[recompile_range], child_at_start).ok_or(())?; - - let insertion = match check_invariants( - &new_children, - self.children(), - unterminated, - child_idx_range.clone(), - child_outermost, - child_at_start, - mode, - src_span, - policy, - ) { - InvariantResult::Ok => Ok(new_children), - InvariantResult::UseFirst => Ok(vec![std::mem::take(&mut new_children[0])]), - InvariantResult::Error => Err(()), - }?; - - self.replace_child_range(child_idx_range, insertion); - - Ok(src_span.to_range()) - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -enum InvariantResult { - Ok, - UseFirst, - Error, -} - -fn check_invariants( - use_children: &[Green], - old_children: &[Green], - unterminated: bool, - child_idx_range: Range, - outermost: bool, - child_at_start: bool, - mode: TokenMode, - src_span: Span, - policy: IncrementalSafety, -) -> InvariantResult { - let (new_children, ok) = if policy == IncrementalSafety::AtomicPrimary { - if use_children.iter().map(Green::len).sum::() == src_span.len() { - (use_children, InvariantResult::Ok) - } else if use_children[0].len() == src_span.len() { - (&use_children[0 .. 1], InvariantResult::UseFirst) - } else { - return InvariantResult::Error; - } - } else { - (use_children, InvariantResult::Ok) - }; - - let child_mode = old_children[child_idx_range.start].kind().mode().child_mode(); - - // Check if the children / child has the right type. - let require_single = match policy { - IncrementalSafety::AtomicPrimary | IncrementalSafety::SameKind => true, - IncrementalSafety::SameKindInCode if child_mode == TokenMode::Code => true, - _ => false, - }; - - if require_single { - if new_children.len() != 1 { - return InvariantResult::Error; - } - - if match policy { - IncrementalSafety::SameKind => true, - IncrementalSafety::SameKindInCode => child_mode == TokenMode::Code, - _ => false, - } { - if old_children[child_idx_range.start].kind() != new_children[0].kind() { - return InvariantResult::Error; - } - } - } - - // Do not accept unclosed nodes if the old node did not use to be at the - // right edge of the tree. - if !outermost && unterminated { - return InvariantResult::Error; - } - - // Check if the neighbor invariants are still true. - if mode == TokenMode::Markup { - if child_idx_range.start > 0 { - if old_children[child_idx_range.start - 1].kind().incremental_safety() - == IncrementalSafety::EnsureRightWhitespace - && !new_children[0].kind().is_whitespace() - { - return InvariantResult::Error; - } - } - - let mut new_at_start = child_at_start; - for child in new_children { - new_at_start = child.kind().is_at_start(new_at_start); - } - - for child in &old_children[child_idx_range.end ..] { - if child.kind().is_trivia() { - new_at_start = child.kind().is_at_start(new_at_start); - continue; - } - - match child.kind().incremental_safety() { - IncrementalSafety::EnsureAtStart if !new_at_start => { - return InvariantResult::Error; - } - IncrementalSafety::EnsureNotAtStart if new_at_start => { - return InvariantResult::Error; - } - _ => {} - } - break; - } - - if new_children.last().map(|x| x.kind().incremental_safety()) - == Some(IncrementalSafety::EnsureRightWhitespace) - && old_children.len() > child_idx_range.end - { - if !old_children[child_idx_range.end].kind().is_whitespace() { - return InvariantResult::Error; - } - } - } - - ok } impl From for Green { @@ -1053,190 +799,6 @@ impl NodeKind { } } - pub fn reparsing_function( - &self, - parent_mode: TokenMode, - ) -> ( - Result Option<(Vec, bool)>, ()>, - IncrementalSafety, - ) { - let policy = self.incremental_safety(); - if policy.is_unsafe() { - return (Err(()), policy); - } - - let contextualized = self.mode().contextualize(parent_mode); - let is_code = contextualized == TokenMode::Code; - - if is_code && policy == IncrementalSafety::UnsafeLayer { - return (Err(()), policy); - } - - if is_code && policy == IncrementalSafety::AtomicPrimary { - return (Ok(parse_atomic), policy); - } - - if policy == IncrementalSafety::SameKind - || (policy == IncrementalSafety::SameKindInCode && is_code) - { - let parser: fn(&str, bool) -> _ = match self { - NodeKind::Template => parse_template, - NodeKind::Block => parse_block, - NodeKind::LineComment | NodeKind::BlockComment => parse_comment, - _ => return (Err(()), policy), - }; - - return (Ok(parser), policy); - } - - let parser: fn(&str, bool) -> _ = match contextualized { - TokenMode::Markup if self == &Self::Markup => parse_markup, - TokenMode::Markup => parse_markup_elements, - _ => return (Err(()), policy), - }; - - (Ok(parser), policy) - } - - /// Whether it is safe to do incremental parsing on this node. Never allow - /// non-termination errors if this is not already the last leaf node. - pub fn incremental_safety(&self) -> IncrementalSafety { - match self { - // Replacing parenthesis changes if the expression is balanced and - // is therefore not safe. - Self::LeftBracket - | Self::RightBracket - | Self::LeftBrace - | Self::RightBrace - | Self::LeftParen - | Self::RightParen => IncrementalSafety::Unsafe, - - // Replacing an operator can change whether the parent is an - // operation which makes it unsafe. The star can appear in markup. - Self::Star - | Self::Comma - | Self::Semicolon - | Self::Colon - | Self::Plus - | Self::Minus - | Self::Slash - | Self::Eq - | Self::EqEq - | Self::ExclEq - | Self::Lt - | Self::LtEq - | Self::Gt - | Self::GtEq - | Self::PlusEq - | Self::HyphEq - | Self::StarEq - | Self::SlashEq - | Self::Not - | Self::And - | Self::Or - | Self::With - | Self::Dots - | Self::Arrow => IncrementalSafety::Unsafe, - - // These keywords are literals and can be safely be substituted with - // other expressions. - Self::None | Self::Auto => IncrementalSafety::AtomicPrimary, - - // These keywords change what kind of expression the parent is and - // how far the expression would go. - Self::Let - | Self::If - | Self::Else - | Self::For - | Self::In - | Self::While - | Self::Break - | Self::Continue - | Self::Return - | Self::Set - | Self::Import - | Self::Include - | Self::From => IncrementalSafety::Unsafe, - - // This is a backslash followed by a space. But changing it to - // anything else is fair game. - Self::Linebreak => IncrementalSafety::EnsureRightWhitespace, - - Self::Markup => IncrementalSafety::SameKind, - - Self::Space(_) => IncrementalSafety::SameKindInCode, - - // These are all replaceable by other tokens. - Self::Parbreak - | Self::Text(_) - | Self::NonBreakingSpace - | Self::EnDash - | Self::EmDash - | Self::Escape(_) - | Self::Strong - | Self::Emph => IncrementalSafety::Safe, - - // This is text that needs to be not `at_start`, otherwise it would - // start one of the below items. - Self::TextInLine(_) => IncrementalSafety::EnsureNotAtStart, - - // These have to be `at_start` so they must be preceeded with a - // Space(n) with n > 0 or a Parbreak. - Self::Heading | Self::Enum | Self::List => IncrementalSafety::EnsureAtStart, - - // Changing the heading level, enum numbering, or list bullet - // changes the next layer. - Self::EnumNumbering(_) => IncrementalSafety::Unsafe, - - Self::Raw(_) | Self::Math(_) => IncrementalSafety::Safe, - - // These are expressions that can be replaced by other expressions. - Self::Ident(_) - | Self::Bool(_) - | Self::Int(_) - | Self::Float(_) - | Self::Length(_, _) - | Self::Angle(_, _) - | Self::Percentage(_) - | Self::Str(_) - | Self::Fraction(_) - | Self::Array - | Self::Dict - | Self::Group => IncrementalSafety::AtomicPrimary, - - Self::Call | Self::Unary | Self::Binary | Self::SetExpr => { - IncrementalSafety::UnsafeLayer - } - - Self::CallArgs | Self::Named | Self::Spread => IncrementalSafety::UnsafeLayer, - - // The closure is a bit magic with the let expression, and also it - // is not atomic. - Self::Closure | Self::ClosureParams => IncrementalSafety::UnsafeLayer, - - // These can appear as bodies and would trigger an error if they - // became something else. - Self::Template | Self::Block => IncrementalSafety::SameKindInCode, - - Self::ForExpr - | Self::WhileExpr - | Self::IfExpr - | Self::LetExpr - | Self::ImportExpr - | Self::IncludeExpr => IncrementalSafety::AtomicPrimary, - - Self::WithExpr | Self::ForPattern | Self::ImportItems => { - IncrementalSafety::UnsafeLayer - } - - // These can appear everywhere and must not change to other stuff - // because that could change the outer expression. - Self::LineComment | Self::BlockComment => IncrementalSafety::SameKind, - - Self::Error(_, _) | Self::Unknown(_) => IncrementalSafety::Unsafe, - } - } - /// A human-readable name for the kind. pub fn as_str(&self) -> &'static str { match self { @@ -1351,95 +913,6 @@ impl Display for NodeKind { } } -/// This enum describes what conditions a node has for being replaced by a new -/// parse result. -/// -/// Safe nodes are replaced by the new parse result from the respective mode. -/// They can be replaced by multiple tokens. If a token is inserted in Markup -/// mode and the next token would not be `at_start` there needs to be a forward -/// check for a `EnsureAtStart` node. If this fails, the parent has to be -/// reparsed. if the direct whitespace sibling of a `EnsureRightWhitespace` is -/// `Unsafe`. Similarly, if a `EnsureRightWhitespace` token is one of the last -/// tokens to be inserted, the edit is invalidated if there is no following -/// whitespace. The atomic nodes may only be replaced by other atomic nodes. The -/// unsafe layers cannot be used but allow children access, the unsafe nodes do -/// neither. -/// -/// *Procedure:* -/// 1. Check if the node is safe - if unsafe layer recurse, if unsafe, return -/// None. -/// 2. Reparse with appropriate node kind and `at_start`. -/// 3. Check whether the topmost group is terminated and the range was -/// completely consumed, otherwise return None. -/// 4. Check if the type criteria are met. -/// 5. If the node is not at the end of the tree, check if Strings etc. are -/// terminated. -/// 6. If this is markup, check the following things: -/// - The `at_start` conditions of the next non-comment and non-space(0) node -/// are met. -/// - The first node is whitespace or the previous siblings are not -/// `EnsureRightWhitespace`. -/// - If any of those fails, return None. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub enum IncrementalSafety { - /// Changing this node can never have an influence on the other nodes. - Safe, - /// This node has to be replaced with a single token of the same kind. - SameKind, - /// This node has to be replaced with a single token of the same kind if in - /// code mode. - SameKindInCode, - /// These nodes depend on being at the start of a line. Reparsing of safe - /// left neighbors has to check this invariant. Otherwise, this node is - /// safe. - EnsureAtStart, - /// These nodes depend on not being at the start of a line. Reparsing of - /// safe left neighbors has to check this invariant. Otherwise, this node is - /// safe. - EnsureNotAtStart, - /// These nodes must be followed by whitespace. - EnsureRightWhitespace, - /// Changing this node into a single atomic expression is allowed if it - /// appears in code mode, otherwise it is safe. - AtomicPrimary, - /// Changing an unsafe layer node changes what the parents or the - /// surrounding nodes would be and is therefore disallowed. Change the - /// parents or children instead. If it appears in Markup, however, it is - /// safe to change. - UnsafeLayer, - /// Changing an unsafe node or any of its children will trigger undefined - /// behavior. Change the parents instead. - Unsafe, -} - -impl IncrementalSafety { - pub fn unsafe_interior(&self) -> bool { - match self { - Self::Unsafe => true, - _ => false, - } - } - - pub fn is_unsafe(&self) -> bool { - match self { - Self::UnsafeLayer | Self::Unsafe => true, - _ => false, - } - } - - pub fn markup_safe(&self) -> bool { - match self { - Self::Safe - | Self::SameKindInCode - | Self::EnsureAtStart - | Self::EnsureNotAtStart - | Self::EnsureRightWhitespace - | Self::UnsafeLayer => true, - _ => false, - } - } -} - /// This enum describes which mode a token of [`NodeKind`] can appear in. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum NodeMode { -- cgit v1.2.3 From edc686d7384470068858e16f2926cf50f31b2c90 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sat, 27 Nov 2021 16:10:22 +0100 Subject: Make incremental parsing simpler and move it somewhere else --- src/syntax/incremental.rs | 515 ---------------------------------------------- src/syntax/mod.rs | 77 +++---- src/syntax/span.rs | 12 -- 3 files changed, 24 insertions(+), 580 deletions(-) delete mode 100644 src/syntax/incremental.rs (limited to 'src/syntax') diff --git a/src/syntax/incremental.rs b/src/syntax/incremental.rs deleted file mode 100644 index d7b5ca3c..00000000 --- a/src/syntax/incremental.rs +++ /dev/null @@ -1,515 +0,0 @@ -use std::ops::Range; -use std::rc::Rc; - -use super::{Green, GreenNode, NodeKind, Span}; - -use crate::parse::{ - parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup, - parse_markup_elements, parse_template, TokenMode, -}; - -pub struct Reparser<'a> { - src: &'a str, - replace_range: Span, - replace_len: usize, -} - -impl<'a> Reparser<'a> { - pub fn new(src: &'a str, replace_range: Span, replace_len: usize) -> Self { - Self { src, replace_range, replace_len } - } -} - -impl Reparser<'_> { - /// Find the innermost child that is incremental safe. - pub fn incremental(&self, green: &mut GreenNode) -> Result, ()> { - self.incremental_int(green, 0, TokenMode::Markup, true) - } - - fn incremental_int( - &self, - green: &mut GreenNode, - mut offset: usize, - parent_mode: TokenMode, - outermost: bool, - ) -> Result, ()> { - let kind = green.kind().clone(); - let mode = kind.mode().contextualize(parent_mode); - - let mut loop_result = None; - let mut child_at_start = true; - let last = green.children.len() - 1; - let mut start = None; - for (i, child) in green.children.iter_mut().enumerate() { - let child_span = - Span::new(self.replace_range.source, offset, offset + child.len()); - if child_span.surrounds(self.replace_range) - && start.is_none() - && ((self.replace_range.start != child_span.end - && self.replace_range.end != child_span.start) - || mode == TokenMode::Code - || i == last) - { - let old_len = child.len(); - // First, we try if the child has another, more specific applicable child. - if !kind.post().unsafe_interior() { - if let Ok(range) = match child { - Green::Node(n) => self.incremental_int( - Rc::make_mut(n), - offset, - kind.mode().child_mode(), - i == last && outermost, - ), - Green::Token(_) => Err(()), - } { - let new_len = child.len(); - green.update_child_len(new_len, old_len); - return Ok(range); - } - } - - // This didn't work, so we try to self.replace_range the child at this - // level. - loop_result = - Some((i .. i + 1, child_span, i == last && outermost, child.kind())); - break; - } else if start.is_none() - && child_span.contains(self.replace_range.start) - && mode == TokenMode::Markup - && child.kind().post().markup_safe() - { - start = Some((i, offset)); - } else if child_span.contains(self.replace_range.end) - && (self.replace_range.end != child_span.end || i == last) - && mode == TokenMode::Markup - && child.kind().post().markup_safe() - { - if let Some((start, start_offset)) = start { - loop_result = Some(( - start .. i + 1, - Span::new( - self.replace_range.source, - start_offset, - offset + child.len(), - ), - i == last && outermost, - child.kind(), - )); - } - break; - } else if start.is_some() - && (mode != TokenMode::Markup || !child.kind().post().markup_safe()) - { - break; - } - - offset += child.len(); - child_at_start = child.kind().is_at_start(child_at_start); - } - - - // We now have a child that we can self.replace_range and a function to do so if - // the loop found any results at all. - let (child_idx_range, child_span, child_outermost, func, policy) = - loop_result.ok_or(()).and_then(|(a, b, c, child_kind)| { - let (func, policy) = - child_kind.reparsing_function(kind.mode().child_mode()); - Ok((a, b, c, func?, policy)) - })?; - - let src_span = child_span.inserted(self.replace_range, self.replace_len); - let recompile_range = if policy == Postcondition::AtomicPrimary { - src_span.start .. self.src.len() - } else { - src_span.to_range() - }; - - let (mut new_children, unterminated) = - func(&self.src[recompile_range], child_at_start).ok_or(())?; - - // Do not accept unclosed nodes if the old node did not use to be at the - // right edge of the tree. - if !child_outermost && unterminated { - return Err(()); - } - - let insertion = match check_invariants( - &new_children, - green.children(), - child_idx_range.clone(), - child_at_start, - mode, - src_span, - policy, - ) { - InvariantResult::Ok => Ok(new_children), - InvariantResult::UseFirst => Ok(vec![std::mem::take(&mut new_children[0])]), - InvariantResult::Error => Err(()), - }?; - - green.replace_child_range(child_idx_range, insertion); - - Ok(src_span.to_range()) - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -enum InvariantResult { - Ok, - UseFirst, - Error, -} - -fn check_invariants( - use_children: &[Green], - old_children: &[Green], - child_idx_range: Range, - child_at_start: bool, - mode: TokenMode, - src_span: Span, - policy: Postcondition, -) -> InvariantResult { - let (new_children, ok) = if policy == Postcondition::AtomicPrimary { - if use_children.iter().map(Green::len).sum::() == src_span.len() { - (use_children, InvariantResult::Ok) - } else if use_children.len() == 1 && use_children[0].len() == src_span.len() { - (&use_children[0 .. 1], InvariantResult::UseFirst) - } else { - return InvariantResult::Error; - } - } else { - (use_children, InvariantResult::Ok) - }; - - let child_mode = old_children[child_idx_range.start].kind().mode().child_mode(); - - // Check if the children / child has the right type. - let same_kind = match policy { - Postcondition::SameKind(x) => x.map_or(true, |x| x == child_mode), - _ => false, - }; - - if same_kind || policy == Postcondition::AtomicPrimary { - if new_children.len() != 1 { - return InvariantResult::Error; - } - - if same_kind { - if old_children[child_idx_range.start].kind() != new_children[0].kind() { - return InvariantResult::Error; - } - } - } - - // Check if the neighbor invariants are still true. - if mode == TokenMode::Markup { - if child_idx_range.start > 0 { - if old_children[child_idx_range.start - 1].kind().pre() - == Precondition::RightWhitespace - && !new_children[0].kind().is_whitespace() - { - return InvariantResult::Error; - } - } - - if new_children.last().map(|x| x.kind().pre()) - == Some(Precondition::RightWhitespace) - && old_children.len() > child_idx_range.end - { - if !old_children[child_idx_range.end].kind().is_whitespace() { - return InvariantResult::Error; - } - } - - let mut new_at_start = child_at_start; - for child in new_children { - new_at_start = child.kind().is_at_start(new_at_start); - } - - for child in &old_children[child_idx_range.end ..] { - if child.kind().is_trivia() { - new_at_start = child.kind().is_at_start(new_at_start); - continue; - } - - match child.kind().pre() { - Precondition::AtStart if !new_at_start => { - return InvariantResult::Error; - } - Precondition::NotAtStart if new_at_start => { - return InvariantResult::Error; - } - _ => {} - } - break; - } - } - - ok -} - -impl NodeKind { - pub fn reparsing_function( - &self, - parent_mode: TokenMode, - ) -> ( - Result Option<(Vec, bool)>, ()>, - Postcondition, - ) { - let policy = self.post(); - let mode = self.mode().contextualize(parent_mode); - - match policy { - Postcondition::Unsafe | Postcondition::UnsafeLayer => (Err(()), policy), - Postcondition::AtomicPrimary if mode == TokenMode::Code => { - (Ok(parse_atomic), policy) - } - Postcondition::AtomicPrimary => (Ok(parse_atomic_markup), policy), - Postcondition::SameKind(x) if x == None || x == Some(mode) => { - let parser: fn(&str, bool) -> _ = match self { - NodeKind::Template => parse_template, - NodeKind::Block => parse_block, - NodeKind::LineComment | NodeKind::BlockComment => parse_comment, - _ => return (Err(()), policy), - }; - - (Ok(parser), policy) - } - _ => { - let parser: fn(&str, bool) -> _ = match mode { - TokenMode::Markup if self == &Self::Markup => parse_markup, - TokenMode::Markup => parse_markup_elements, - _ => return (Err(()), policy), - }; - - (Ok(parser), policy) - } - } - } - - /// Whether it is safe to do incremental parsing on this node. Never allow - /// non-termination errors if this is not already the last leaf node. - pub fn post(&self) -> Postcondition { - match self { - // Replacing parenthesis changes if the expression is balanced and - // is therefore not safe. - Self::LeftBracket - | Self::RightBracket - | Self::LeftBrace - | Self::RightBrace - | Self::LeftParen - | Self::RightParen => Postcondition::Unsafe, - - // Replacing an operator can change whether the parent is an - // operation which makes it unsafe. The star can appear in markup. - Self::Star - | Self::Comma - | Self::Semicolon - | Self::Colon - | Self::Plus - | Self::Minus - | Self::Slash - | Self::Eq - | Self::EqEq - | Self::ExclEq - | Self::Lt - | Self::LtEq - | Self::Gt - | Self::GtEq - | Self::PlusEq - | Self::HyphEq - | Self::StarEq - | Self::SlashEq - | Self::Not - | Self::And - | Self::Or - | Self::With - | Self::Dots - | Self::Arrow => Postcondition::Unsafe, - - // These keywords are literals and can be safely be substituted with - // other expressions. - Self::None | Self::Auto => Postcondition::AtomicPrimary, - - // These keywords change what kind of expression the parent is and - // how far the expression would go. - Self::Let - | Self::Set - | Self::If - | Self::Else - | Self::For - | Self::In - | Self::While - | Self::Break - | Self::Continue - | Self::Return - | Self::Import - | Self::Include - | Self::From => Postcondition::Unsafe, - - Self::Markup => Postcondition::SameKind(None), - - Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)), - - // These are all replaceable by other tokens. - Self::Parbreak - | Self::Linebreak - | Self::Text(_) - | Self::TextInLine(_) - | Self::NonBreakingSpace - | Self::EnDash - | Self::EmDash - | Self::Escape(_) - | Self::Strong - | Self::Emph - | Self::Heading - | Self::Enum - | Self::List - | Self::Raw(_) - | Self::Math(_) => Postcondition::Safe, - - // Changing the heading level, enum numbering, or list bullet - // changes the next layer. - Self::EnumNumbering(_) => Postcondition::Unsafe, - - // These are expressions that can be replaced by other expressions. - Self::Ident(_) - | Self::Bool(_) - | Self::Int(_) - | Self::Float(_) - | Self::Length(_, _) - | Self::Angle(_, _) - | Self::Percentage(_) - | Self::Str(_) - | Self::Fraction(_) - | Self::Array - | Self::Dict - | Self::Group => Postcondition::AtomicPrimary, - - Self::Call - | Self::Unary - | Self::Binary - | Self::CallArgs - | Self::Named - | Self::Spread => Postcondition::UnsafeLayer, - - // The closure is a bit magic with the let expression, and also it - // is not atomic. - Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer, - - // These can appear as bodies and would trigger an error if they - // became something else. - Self::Template => Postcondition::SameKind(None), - Self::Block => Postcondition::SameKind(Some(TokenMode::Code)), - - Self::ForExpr - | Self::WhileExpr - | Self::IfExpr - | Self::LetExpr - | Self::SetExpr - | Self::ImportExpr - | Self::IncludeExpr => Postcondition::AtomicPrimary, - - Self::WithExpr | Self::ForPattern | Self::ImportItems => { - Postcondition::UnsafeLayer - } - - // These can appear everywhere and must not change to other stuff - // because that could change the outer expression. - Self::LineComment | Self::BlockComment => Postcondition::SameKind(None), - - Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, - } - } - - /// The appropriate precondition for the type. - pub fn pre(&self) -> Precondition { - match self { - Self::Heading | Self::Enum | Self::List => Precondition::AtStart, - Self::TextInLine(_) => Precondition::NotAtStart, - Self::Linebreak => Precondition::RightWhitespace, - _ => Precondition::None, - } - } -} - -/// This enum describes what conditions a node has for being replaced by a new -/// parse result. -/// -/// Safe nodes are replaced by the new parse result from the respective mode. -/// They can be replaced by multiple tokens. If a token is inserted in Markup -/// mode and the next token would not be `at_start` there needs to be a forward -/// check for a `EnsureAtStart` node. If this fails, the parent has to be -/// reparsed. if the direct whitespace sibling of a `EnsureRightWhitespace` is -/// `Unsafe`. Similarly, if a `EnsureRightWhitespace` token is one of the last -/// tokens to be inserted, the edit is invalidated if there is no following -/// whitespace. The atomic nodes may only be replaced by other atomic nodes. The -/// unsafe layers cannot be used but allow children access, the unsafe nodes do -/// neither. -/// -/// *Procedure:* -/// 1. Check if the node is safe - if unsafe layer recurse, if unsafe, return -/// None. -/// 2. Reparse with appropriate node kind and `at_start`. -/// 3. Check whether the topmost group is terminated and the range was -/// completely consumed, otherwise return None. -/// 4. Check if the type criteria are met. -/// 5. If the node is not at the end of the tree, check if Strings etc. are -/// terminated. -/// 6. If this is markup, check the following things: -/// - The `at_start` conditions of the next non-comment and non-space(0) node -/// are met. -/// - The first node is whitespace or the previous siblings are not -/// `EnsureRightWhitespace`. -/// - If any of those fails, return None. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Postcondition { - /// Changing this node can never have an influence on the other nodes. - Safe, - /// This node has to be replaced with a single token of the same kind. - SameKind(Option), - /// Changing this node into a single atomic expression is allowed if it - /// appears in code mode, otherwise it is safe. - AtomicPrimary, - /// Changing an unsafe layer node changes what the parents or the - /// surrounding nodes would be and is therefore disallowed. Change the - /// parents or children instead. If it appears in Markup, however, it is - /// safe to change. - UnsafeLayer, - /// Changing an unsafe node or any of its children will trigger undefined - /// behavior. Change the parents instead. - Unsafe, -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Precondition { - /// These nodes depend on being at the start of a line. Reparsing of safe - /// left neighbors has to check this invariant. Otherwise, this node is - /// safe. - AtStart, - /// These nodes depend on not being at the start of a line. Reparsing of - /// safe left neighbors has to check this invariant. Otherwise, this node is - /// safe. - NotAtStart, - /// These nodes must be followed by whitespace. - RightWhitespace, - /// No additional requirements. - None, -} - -impl Postcondition { - pub fn unsafe_interior(&self) -> bool { - match self { - Self::Unsafe => true, - _ => false, - } - } - - pub fn markup_safe(&self) -> bool { - match self { - Self::Safe | Self::UnsafeLayer => true, - Self::SameKind(tm) => tm.map_or(false, |tm| tm != TokenMode::Markup), - _ => false, - } - } -} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 4d0ca026..9ab530d8 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -2,7 +2,6 @@ pub mod ast; mod highlight; -mod incremental; mod pretty; mod span; @@ -11,7 +10,6 @@ use std::ops::Range; use std::rc::Rc; pub use highlight::*; -pub use incremental::*; pub use pretty::*; pub use span::*; @@ -161,6 +159,9 @@ impl GreenNode { self.data().len() } + /// Replaces a range of children with some replacement. + /// + /// This method updates the `erroneous` and `data.len` fields. pub fn replace_child_range( &mut self, child_idx_range: Range, @@ -189,6 +190,8 @@ impl GreenNode { self.data.set_len(self.data.len + new_len - old_len); } + /// Update the length of this node given the old and new length of a + /// replaced child. pub fn update_child_len(&mut self, new_len: usize, old_len: usize) { self.data.len = self.data.len() + new_len - old_len; self.erroneous = self.children.iter().any(|x| x.erroneous()); @@ -377,22 +380,6 @@ impl<'a> RedRef<'a> { self.green.erroneous() } - /// The node's children. - pub fn children(self) -> Children<'a> { - let children = match &self.green { - Green::Node(node) => node.children(), - Green::Token(_) => &[], - }; - - Children { - id: self.id, - iter: children.iter(), - front: self.offset, - back: self.offset + self.len(), - } - } - - /// The error messages for this node and its descendants. pub fn errors(self) -> Vec { if !self.green.erroneous() { @@ -425,6 +412,21 @@ impl<'a> RedRef<'a> { T::from_red(self) } + /// The node's children. + pub fn children(self) -> Children<'a> { + let children = match &self.green { + Green::Node(node) => node.children(), + Green::Token(_) => &[], + }; + + Children { + id: self.id, + iter: children.iter(), + front: self.offset, + back: self.offset + self.len(), + } + } + /// Get the first child that can cast to some AST type. pub fn cast_first_child(self) -> Option { self.children().find_map(RedRef::cast) @@ -760,7 +762,7 @@ impl NodeKind { } /// Whether this token appears in Markup. - pub fn mode(&self) -> NodeMode { + pub fn mode(&self) -> Option { match self { Self::Markup | Self::Space(_) @@ -779,7 +781,7 @@ impl NodeKind { | Self::EnumNumbering(_) | Self::List | Self::Raw(_) - | Self::Math(_) => NodeMode::Markup, + | Self::Math(_) => Some(TokenMode::Markup), Self::Template | Self::Block | Self::Ident(_) @@ -794,8 +796,8 @@ impl NodeKind { | Self::BlockComment | Self::Error(_, _) | Self::Minus - | Self::Eq => NodeMode::Universal, - _ => NodeMode::Code, + | Self::Eq => None, + _ => Some(TokenMode::Code), } } @@ -912,34 +914,3 @@ impl Display for NodeKind { f.pad(self.as_str()) } } - -/// This enum describes which mode a token of [`NodeKind`] can appear in. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum NodeMode { - /// The token can only appear in markup mode. - Markup, - /// The token can only appear in code mode. - Code, - /// The token can appear in either mode. Look at the parent node to decide - /// which mode it is in. After an apply, this is equivalent to Markup. - Universal, -} - -impl NodeMode { - /// Returns a new mode considering the parent node. - pub fn contextualize(&self, old: TokenMode) -> TokenMode { - match self { - Self::Markup => TokenMode::Markup, - Self::Code => TokenMode::Code, - Self::Universal => old, - } - } - - /// The mode of the children of this node. - pub fn child_mode(&self) -> TokenMode { - match self { - Self::Markup => TokenMode::Markup, - Self::Code | Self::Universal => TokenMode::Code, - } - } -} diff --git a/src/syntax/span.rs b/src/syntax/span.rs index 2691acc7..4d5b8819 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -125,18 +125,6 @@ impl Span { *self = self.join(other) } - /// Create a new span by specifying a span in which a modification happened - /// and how many characters are now in that span. - pub fn inserted(mut self, other: Self, n: usize) -> Self { - if !self.surrounds(other) { - panic!(); - } - - let len_change = n as isize - other.len() as isize; - self.end += len_change as usize; - self - } - /// Test whether a position is within the span. pub fn contains(&self, pos: usize) -> bool { self.start <= pos && self.end >= pos -- cgit v1.2.3 From e05eb5fda5d1dfeef168b6fc071b20fdbcce2dcd Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 28 Nov 2021 18:18:45 +0100 Subject: Code Review: Parser, I can't let you do this --- src/syntax/mod.rs | 50 ++++++++++++-------------------------------------- 1 file changed, 12 insertions(+), 38 deletions(-) (limited to 'src/syntax') diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 9ab530d8..b72e5843 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -48,15 +48,6 @@ impl Green { self.data().len() } - /// Set the length of the node. - pub fn set_len(&mut self, len: usize) { - let data = match self { - Self::Node(node) => &mut Rc::make_mut(node).data, - Self::Token(data) => data, - }; - data.set_len(len); - } - /// Whether the node or its children contain an error. pub fn erroneous(&self) -> bool { match self { @@ -139,11 +130,6 @@ impl GreenNode { &self.children } - /// The node's children, mutably. - pub fn children_mut(&mut self) -> &mut [Green] { - &mut self.children - } - /// The node's metadata. pub fn data(&self) -> &GreenData { &self.data @@ -159,10 +145,15 @@ impl GreenNode { self.data().len() } + /// The node's children, mutably. + pub(crate) fn children_mut(&mut self) -> &mut [Green] { + &mut self.children + } + /// Replaces a range of children with some replacement. /// /// This method updates the `erroneous` and `data.len` fields. - pub fn replace_child_range( + pub(crate) fn replace_child_range( &mut self, child_idx_range: Range, replacement: Vec, @@ -187,12 +178,12 @@ impl GreenNode { self.erroneous = self.erroneous || replacement.iter().any(Green::erroneous); self.children.splice(child_idx_range, replacement); - self.data.set_len(self.data.len + new_len - old_len); + self.data.len = self.data.len + new_len - old_len; } /// Update the length of this node given the old and new length of a /// replaced child. - pub fn update_child_len(&mut self, new_len: usize, old_len: usize) { + pub(crate) fn update_child_len(&mut self, new_len: usize, old_len: usize) { self.data.len = self.data.len() + new_len - old_len; self.erroneous = self.children.iter().any(|x| x.erroneous()); } @@ -246,11 +237,6 @@ impl GreenData { pub fn len(&self) -> usize { self.len } - - /// Set the length of the node. - pub fn set_len(&mut self, len: usize) { - self.len = len; - } } impl From for Green { @@ -261,7 +247,7 @@ impl From for Green { impl Debug for GreenData { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{:?}: {}", self.kind, self.len) + write!(f, "{:?}: {}", &self.kind, self.len) } } @@ -375,11 +361,6 @@ impl<'a> RedRef<'a> { Span::new(self.id, self.offset, self.offset + self.green.len()) } - /// Whether the node or its children contain an error. - pub fn erroneous(self) -> bool { - self.green.erroneous() - } - /// The error messages for this node and its descendants. pub fn errors(self) -> Vec { if !self.green.erroneous() { @@ -731,19 +712,12 @@ impl NodeKind { /// Whether this is whitespace. pub fn is_whitespace(&self) -> bool { - match self { - Self::Space(_) | Self::Parbreak => true, - _ => false, - } + matches!(self, Self::Space(_) | Self::Parbreak) } /// Whether this is trivia. pub fn is_trivia(&self) -> bool { - match self { - _ if self.is_whitespace() => true, - Self::LineComment | Self::BlockComment => true, - _ => false, - } + self.is_whitespace() || matches!(self, Self::LineComment | Self::BlockComment) } /// Whether this is some kind of error. @@ -765,7 +739,6 @@ impl NodeKind { pub fn mode(&self) -> Option { match self { Self::Markup - | Self::Space(_) | Self::Linebreak | Self::Parbreak | Self::Text(_) @@ -783,6 +756,7 @@ impl NodeKind { | Self::Raw(_) | Self::Math(_) => Some(TokenMode::Markup), Self::Template + | Self::Space(_) | Self::Block | Self::Ident(_) | Self::LetExpr -- cgit v1.2.3 From 5f114e18eb76a1937941b2ea64842b908c9ad89e Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 2 Jan 2022 00:46:19 +0100 Subject: Added a test framework for incremental parsing Fix several errors: - Indented markup is now reparsed right - All end group errors will now fail a reparse - Rightmost errors will always fail a reparse --- src/syntax/ast.rs | 2 +- src/syntax/highlight.rs | 2 +- src/syntax/mod.rs | 29 +++++++++++++++++++++++++---- 3 files changed, 27 insertions(+), 6 deletions(-) (limited to 'src/syntax') diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index ed74dfe5..bea4ef00 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -53,7 +53,7 @@ macro_rules! node { node! { /// The syntactical root capable of representing a full parsed document. - Markup + Markup: NodeKind::Markup(_) } impl Markup { diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs index 21af060f..9f7365a8 100644 --- a/src/syntax/highlight.rs +++ b/src/syntax/highlight.rs @@ -154,7 +154,7 @@ impl Category { NodeKind::Str(_) => Some(Category::String), NodeKind::Error(_, _) => Some(Category::Invalid), NodeKind::Unknown(_) => Some(Category::Invalid), - NodeKind::Markup => None, + NodeKind::Markup(_) => None, NodeKind::Space(_) => None, NodeKind::Parbreak => None, NodeKind::Text(_) => None, diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index b72e5843..388d0bb0 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -64,6 +64,14 @@ impl Green { } } + /// Whether the node is a leaf node in the green tree. + pub fn is_leaf(&self) -> bool { + match self { + Green::Node(n) => n.children().is_empty(), + Green::Token(_) => true, + } + } + /// Change the type of the node. pub fn convert(&mut self, kind: NodeKind) { match self { @@ -361,6 +369,11 @@ impl<'a> RedRef<'a> { Span::new(self.id, self.offset, self.offset + self.green.len()) } + /// Whether the node is a leaf node. + pub fn is_leaf(self) -> bool { + self.green.is_leaf() + } + /// The error messages for this node and its descendants. pub fn errors(self) -> Vec { if !self.green.erroneous() { @@ -385,6 +398,14 @@ impl<'a> RedRef<'a> { } } + /// Perform a depth-first search starting at this node. + pub fn all_children(&self) -> Vec { + let mut res = vec![self.clone()]; + res.extend(self.children().flat_map(|child| child.all_children().into_iter())); + + res + } + /// Convert the node to a typed AST node. pub fn cast(self) -> Option where @@ -562,8 +583,8 @@ pub enum NodeKind { Include, /// The `from` keyword. From, - /// Template markup. - Markup, + /// Template markup of which all lines must start in some column. + Markup(usize), /// One or more whitespace characters. Space(usize), /// A forced line break: `\`. @@ -738,7 +759,7 @@ impl NodeKind { /// Whether this token appears in Markup. pub fn mode(&self) -> Option { match self { - Self::Markup + Self::Markup(_) | Self::Linebreak | Self::Parbreak | Self::Text(_) @@ -823,7 +844,7 @@ impl NodeKind { Self::Import => "keyword `import`", Self::Include => "keyword `include`", Self::From => "keyword `from`", - Self::Markup => "markup", + Self::Markup(_) => "markup", Self::Space(_) => "space", Self::Linebreak => "forced linebreak", Self::Parbreak => "paragraph break", -- cgit v1.2.3 From c994cfa7d814e3909682b19322867ed5c676c453 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 3 Jan 2022 23:18:21 +0100 Subject: Code Review: Your parsers were so preoccupied with whether they could --- src/syntax/mod.rs | 61 +++++++++++++++++++++++-------------------------------- 1 file changed, 25 insertions(+), 36 deletions(-) (limited to 'src/syntax') diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 388d0bb0..3a0f3a5e 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -108,7 +108,7 @@ pub struct GreenNode { /// This node's children, losslessly make up this node. children: Vec, /// Whether this node or any of its children are erroneous. - pub erroneous: bool, + erroneous: bool, } impl GreenNode { @@ -139,7 +139,7 @@ impl GreenNode { } /// The node's metadata. - pub fn data(&self) -> &GreenData { + fn data(&self) -> &GreenData { &self.data } @@ -159,41 +159,29 @@ impl GreenNode { } /// Replaces a range of children with some replacement. - /// - /// This method updates the `erroneous` and `data.len` fields. - pub(crate) fn replace_child_range( + pub(crate) fn replace_children( &mut self, - child_idx_range: Range, + range: Range, replacement: Vec, ) { - let old_len: usize = - self.children[child_idx_range.clone()].iter().map(Green::len).sum(); - let new_len: usize = replacement.iter().map(Green::len).sum(); - - if self.erroneous { - if self.children[child_idx_range.clone()].iter().any(Green::erroneous) { - // the old range was erroneous but we do not know if anywhere - // else was so we have to iterate over the whole thing. - self.erroneous = self.children[.. child_idx_range.start] - .iter() - .any(Green::erroneous) - || self.children[child_idx_range.end ..].iter().any(Green::erroneous); - } - // in this case nothing changes so we do not have to bother. - } + let superseded = &self.children[range.clone()]; + let superseded_len: usize = superseded.iter().map(Green::len).sum(); + let replacement_len: usize = replacement.iter().map(Green::len).sum(); - // the or assignment operator is not lazy. - self.erroneous = self.erroneous || replacement.iter().any(Green::erroneous); + // If we're erroneous, but not due to the superseded range, then we will + // still be erroneous after the replacement. + let still_erroneous = self.erroneous && !superseded.iter().any(Green::erroneous); - self.children.splice(child_idx_range, replacement); - self.data.len = self.data.len + new_len - old_len; + self.children.splice(range, replacement); + self.data.len = self.data.len + replacement_len - superseded_len; + self.erroneous = still_erroneous || self.children.iter().any(Green::erroneous); } - /// Update the length of this node given the old and new length of a - /// replaced child. - pub(crate) fn update_child_len(&mut self, new_len: usize, old_len: usize) { + /// Update the length of this node given the old and new length of + /// replaced children. + pub(crate) fn update_parent(&mut self, new_len: usize, old_len: usize) { self.data.len = self.data.len() + new_len - old_len; - self.erroneous = self.children.iter().any(|x| x.erroneous()); + self.erroneous = self.children.iter().any(Green::erroneous); } } @@ -255,7 +243,7 @@ impl From for Green { impl Debug for GreenData { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{:?}: {}", &self.kind, self.len) + write!(f, "{:?}: {}", self.kind, self.len) } } @@ -398,12 +386,13 @@ impl<'a> RedRef<'a> { } } - /// Perform a depth-first search starting at this node. - pub fn all_children(&self) -> Vec { - let mut res = vec![self.clone()]; - res.extend(self.children().flat_map(|child| child.all_children().into_iter())); - - res + /// Returns all leaf descendants of this node (may include itself). + pub fn leafs(self) -> Vec { + if self.is_leaf() { + vec![self] + } else { + self.children().flat_map(Self::leafs).collect() + } } /// Convert the node to a typed AST node. -- cgit v1.2.3