diff options
Diffstat (limited to 'src/syntax/incremental.rs')
| -rw-r--r-- | src/syntax/incremental.rs | 515 |
1 files changed, 515 insertions, 0 deletions
diff --git a/src/syntax/incremental.rs b/src/syntax/incremental.rs new file mode 100644 index 00000000..d7b5ca3c --- /dev/null +++ b/src/syntax/incremental.rs @@ -0,0 +1,515 @@ +use std::ops::Range; +use std::rc::Rc; + +use super::{Green, GreenNode, NodeKind, Span}; + +use crate::parse::{ + parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup, + parse_markup_elements, parse_template, TokenMode, +}; + +pub struct Reparser<'a> { + src: &'a str, + replace_range: Span, + replace_len: usize, +} + +impl<'a> Reparser<'a> { + pub fn new(src: &'a str, replace_range: Span, replace_len: usize) -> Self { + Self { src, replace_range, replace_len } + } +} + +impl Reparser<'_> { + /// Find the innermost child that is incremental safe. + pub fn incremental(&self, green: &mut GreenNode) -> Result<Range<usize>, ()> { + self.incremental_int(green, 0, TokenMode::Markup, true) + } + + fn incremental_int( + &self, + green: &mut GreenNode, + mut offset: usize, + parent_mode: TokenMode, + outermost: bool, + ) -> Result<Range<usize>, ()> { + let kind = green.kind().clone(); + let mode = kind.mode().contextualize(parent_mode); + + let mut loop_result = None; + let mut child_at_start = true; + let last = green.children.len() - 1; + let mut start = None; + for (i, child) in green.children.iter_mut().enumerate() { + let child_span = + Span::new(self.replace_range.source, offset, offset + child.len()); + if child_span.surrounds(self.replace_range) + && start.is_none() + && ((self.replace_range.start != child_span.end + && self.replace_range.end != child_span.start) + || mode == TokenMode::Code + || i == last) + { + let old_len = child.len(); + // First, we try if the child has another, more specific applicable child. + if !kind.post().unsafe_interior() { + if let Ok(range) = match child { + Green::Node(n) => self.incremental_int( + Rc::make_mut(n), + offset, + kind.mode().child_mode(), + i == last && outermost, + ), + Green::Token(_) => Err(()), + } { + let new_len = child.len(); + green.update_child_len(new_len, old_len); + return Ok(range); + } + } + + // This didn't work, so we try to self.replace_range the child at this + // level. + loop_result = + Some((i .. i + 1, child_span, i == last && outermost, child.kind())); + break; + } else if start.is_none() + && child_span.contains(self.replace_range.start) + && mode == TokenMode::Markup + && child.kind().post().markup_safe() + { + start = Some((i, offset)); + } else if child_span.contains(self.replace_range.end) + && (self.replace_range.end != child_span.end || i == last) + && mode == TokenMode::Markup + && child.kind().post().markup_safe() + { + if let Some((start, start_offset)) = start { + loop_result = Some(( + start .. i + 1, + Span::new( + self.replace_range.source, + start_offset, + offset + child.len(), + ), + i == last && outermost, + child.kind(), + )); + } + break; + } else if start.is_some() + && (mode != TokenMode::Markup || !child.kind().post().markup_safe()) + { + break; + } + + offset += child.len(); + child_at_start = child.kind().is_at_start(child_at_start); + } + + + // We now have a child that we can self.replace_range and a function to do so if + // the loop found any results at all. + let (child_idx_range, child_span, child_outermost, func, policy) = + loop_result.ok_or(()).and_then(|(a, b, c, child_kind)| { + let (func, policy) = + child_kind.reparsing_function(kind.mode().child_mode()); + Ok((a, b, c, func?, policy)) + })?; + + let src_span = child_span.inserted(self.replace_range, self.replace_len); + let recompile_range = if policy == Postcondition::AtomicPrimary { + src_span.start .. self.src.len() + } else { + src_span.to_range() + }; + + let (mut new_children, unterminated) = + func(&self.src[recompile_range], child_at_start).ok_or(())?; + + // Do not accept unclosed nodes if the old node did not use to be at the + // right edge of the tree. + if !child_outermost && unterminated { + return Err(()); + } + + let insertion = match check_invariants( + &new_children, + green.children(), + child_idx_range.clone(), + child_at_start, + mode, + src_span, + policy, + ) { + InvariantResult::Ok => Ok(new_children), + InvariantResult::UseFirst => Ok(vec![std::mem::take(&mut new_children[0])]), + InvariantResult::Error => Err(()), + }?; + + green.replace_child_range(child_idx_range, insertion); + + Ok(src_span.to_range()) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum InvariantResult { + Ok, + UseFirst, + Error, +} + +fn check_invariants( + use_children: &[Green], + old_children: &[Green], + child_idx_range: Range<usize>, + child_at_start: bool, + mode: TokenMode, + src_span: Span, + policy: Postcondition, +) -> InvariantResult { + let (new_children, ok) = if policy == Postcondition::AtomicPrimary { + if use_children.iter().map(Green::len).sum::<usize>() == src_span.len() { + (use_children, InvariantResult::Ok) + } else if use_children.len() == 1 && use_children[0].len() == src_span.len() { + (&use_children[0 .. 1], InvariantResult::UseFirst) + } else { + return InvariantResult::Error; + } + } else { + (use_children, InvariantResult::Ok) + }; + + let child_mode = old_children[child_idx_range.start].kind().mode().child_mode(); + + // Check if the children / child has the right type. + let same_kind = match policy { + Postcondition::SameKind(x) => x.map_or(true, |x| x == child_mode), + _ => false, + }; + + if same_kind || policy == Postcondition::AtomicPrimary { + if new_children.len() != 1 { + return InvariantResult::Error; + } + + if same_kind { + if old_children[child_idx_range.start].kind() != new_children[0].kind() { + return InvariantResult::Error; + } + } + } + + // Check if the neighbor invariants are still true. + if mode == TokenMode::Markup { + if child_idx_range.start > 0 { + if old_children[child_idx_range.start - 1].kind().pre() + == Precondition::RightWhitespace + && !new_children[0].kind().is_whitespace() + { + return InvariantResult::Error; + } + } + + if new_children.last().map(|x| x.kind().pre()) + == Some(Precondition::RightWhitespace) + && old_children.len() > child_idx_range.end + { + if !old_children[child_idx_range.end].kind().is_whitespace() { + return InvariantResult::Error; + } + } + + let mut new_at_start = child_at_start; + for child in new_children { + new_at_start = child.kind().is_at_start(new_at_start); + } + + for child in &old_children[child_idx_range.end ..] { + if child.kind().is_trivia() { + new_at_start = child.kind().is_at_start(new_at_start); + continue; + } + + match child.kind().pre() { + Precondition::AtStart if !new_at_start => { + return InvariantResult::Error; + } + Precondition::NotAtStart if new_at_start => { + return InvariantResult::Error; + } + _ => {} + } + break; + } + } + + ok +} + +impl NodeKind { + pub fn reparsing_function( + &self, + parent_mode: TokenMode, + ) -> ( + Result<fn(&str, bool) -> Option<(Vec<Green>, bool)>, ()>, + Postcondition, + ) { + let policy = self.post(); + let mode = self.mode().contextualize(parent_mode); + + match policy { + Postcondition::Unsafe | Postcondition::UnsafeLayer => (Err(()), policy), + Postcondition::AtomicPrimary if mode == TokenMode::Code => { + (Ok(parse_atomic), policy) + } + Postcondition::AtomicPrimary => (Ok(parse_atomic_markup), policy), + Postcondition::SameKind(x) if x == None || x == Some(mode) => { + let parser: fn(&str, bool) -> _ = match self { + NodeKind::Template => parse_template, + NodeKind::Block => parse_block, + NodeKind::LineComment | NodeKind::BlockComment => parse_comment, + _ => return (Err(()), policy), + }; + + (Ok(parser), policy) + } + _ => { + let parser: fn(&str, bool) -> _ = match mode { + TokenMode::Markup if self == &Self::Markup => parse_markup, + TokenMode::Markup => parse_markup_elements, + _ => return (Err(()), policy), + }; + + (Ok(parser), policy) + } + } + } + + /// Whether it is safe to do incremental parsing on this node. Never allow + /// non-termination errors if this is not already the last leaf node. + pub fn post(&self) -> Postcondition { + match self { + // Replacing parenthesis changes if the expression is balanced and + // is therefore not safe. + Self::LeftBracket + | Self::RightBracket + | Self::LeftBrace + | Self::RightBrace + | Self::LeftParen + | Self::RightParen => Postcondition::Unsafe, + + // Replacing an operator can change whether the parent is an + // operation which makes it unsafe. The star can appear in markup. + Self::Star + | Self::Comma + | Self::Semicolon + | Self::Colon + | Self::Plus + | Self::Minus + | Self::Slash + | Self::Eq + | Self::EqEq + | Self::ExclEq + | Self::Lt + | Self::LtEq + | Self::Gt + | Self::GtEq + | Self::PlusEq + | Self::HyphEq + | Self::StarEq + | Self::SlashEq + | Self::Not + | Self::And + | Self::Or + | Self::With + | Self::Dots + | Self::Arrow => Postcondition::Unsafe, + + // These keywords are literals and can be safely be substituted with + // other expressions. + Self::None | Self::Auto => Postcondition::AtomicPrimary, + + // These keywords change what kind of expression the parent is and + // how far the expression would go. + Self::Let + | Self::Set + | Self::If + | Self::Else + | Self::For + | Self::In + | Self::While + | Self::Break + | Self::Continue + | Self::Return + | Self::Import + | Self::Include + | Self::From => Postcondition::Unsafe, + + Self::Markup => Postcondition::SameKind(None), + + Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)), + + // These are all replaceable by other tokens. + Self::Parbreak + | Self::Linebreak + | Self::Text(_) + | Self::TextInLine(_) + | Self::NonBreakingSpace + | Self::EnDash + | Self::EmDash + | Self::Escape(_) + | Self::Strong + | Self::Emph + | Self::Heading + | Self::Enum + | Self::List + | Self::Raw(_) + | Self::Math(_) => Postcondition::Safe, + + // Changing the heading level, enum numbering, or list bullet + // changes the next layer. + Self::EnumNumbering(_) => Postcondition::Unsafe, + + // These are expressions that can be replaced by other expressions. + Self::Ident(_) + | Self::Bool(_) + | Self::Int(_) + | Self::Float(_) + | Self::Length(_, _) + | Self::Angle(_, _) + | Self::Percentage(_) + | Self::Str(_) + | Self::Fraction(_) + | Self::Array + | Self::Dict + | Self::Group => Postcondition::AtomicPrimary, + + Self::Call + | Self::Unary + | Self::Binary + | Self::CallArgs + | Self::Named + | Self::Spread => Postcondition::UnsafeLayer, + + // The closure is a bit magic with the let expression, and also it + // is not atomic. + Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer, + + // These can appear as bodies and would trigger an error if they + // became something else. + Self::Template => Postcondition::SameKind(None), + Self::Block => Postcondition::SameKind(Some(TokenMode::Code)), + + Self::ForExpr + | Self::WhileExpr + | Self::IfExpr + | Self::LetExpr + | Self::SetExpr + | Self::ImportExpr + | Self::IncludeExpr => Postcondition::AtomicPrimary, + + Self::WithExpr | Self::ForPattern | Self::ImportItems => { + Postcondition::UnsafeLayer + } + + // These can appear everywhere and must not change to other stuff + // because that could change the outer expression. + Self::LineComment | Self::BlockComment => Postcondition::SameKind(None), + + Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, + } + } + + /// The appropriate precondition for the type. + pub fn pre(&self) -> Precondition { + match self { + Self::Heading | Self::Enum | Self::List => Precondition::AtStart, + Self::TextInLine(_) => Precondition::NotAtStart, + Self::Linebreak => Precondition::RightWhitespace, + _ => Precondition::None, + } + } +} + +/// This enum describes what conditions a node has for being replaced by a new +/// parse result. +/// +/// Safe nodes are replaced by the new parse result from the respective mode. +/// They can be replaced by multiple tokens. If a token is inserted in Markup +/// mode and the next token would not be `at_start` there needs to be a forward +/// check for a `EnsureAtStart` node. If this fails, the parent has to be +/// reparsed. if the direct whitespace sibling of a `EnsureRightWhitespace` is +/// `Unsafe`. Similarly, if a `EnsureRightWhitespace` token is one of the last +/// tokens to be inserted, the edit is invalidated if there is no following +/// whitespace. The atomic nodes may only be replaced by other atomic nodes. The +/// unsafe layers cannot be used but allow children access, the unsafe nodes do +/// neither. +/// +/// *Procedure:* +/// 1. Check if the node is safe - if unsafe layer recurse, if unsafe, return +/// None. +/// 2. Reparse with appropriate node kind and `at_start`. +/// 3. Check whether the topmost group is terminated and the range was +/// completely consumed, otherwise return None. +/// 4. Check if the type criteria are met. +/// 5. If the node is not at the end of the tree, check if Strings etc. are +/// terminated. +/// 6. If this is markup, check the following things: +/// - The `at_start` conditions of the next non-comment and non-space(0) node +/// are met. +/// - The first node is whitespace or the previous siblings are not +/// `EnsureRightWhitespace`. +/// - If any of those fails, return None. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Postcondition { + /// Changing this node can never have an influence on the other nodes. + Safe, + /// This node has to be replaced with a single token of the same kind. + SameKind(Option<TokenMode>), + /// Changing this node into a single atomic expression is allowed if it + /// appears in code mode, otherwise it is safe. + AtomicPrimary, + /// Changing an unsafe layer node changes what the parents or the + /// surrounding nodes would be and is therefore disallowed. Change the + /// parents or children instead. If it appears in Markup, however, it is + /// safe to change. + UnsafeLayer, + /// Changing an unsafe node or any of its children will trigger undefined + /// behavior. Change the parents instead. + Unsafe, +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Precondition { + /// These nodes depend on being at the start of a line. Reparsing of safe + /// left neighbors has to check this invariant. Otherwise, this node is + /// safe. + AtStart, + /// These nodes depend on not being at the start of a line. Reparsing of + /// safe left neighbors has to check this invariant. Otherwise, this node is + /// safe. + NotAtStart, + /// These nodes must be followed by whitespace. + RightWhitespace, + /// No additional requirements. + None, +} + +impl Postcondition { + pub fn unsafe_interior(&self) -> bool { + match self { + Self::Unsafe => true, + _ => false, + } + } + + pub fn markup_safe(&self) -> bool { + match self { + Self::Safe | Self::UnsafeLayer => true, + Self::SameKind(tm) => tm.map_or(false, |tm| tm != TokenMode::Markup), + _ => false, + } + } +} |
