summaryrefslogtreecommitdiff
path: root/src/syntax/incremental.rs
diff options
context:
space:
mode:
authorMartin Haug <mhaug@live.de>2021-11-27 16:10:22 +0100
committerMartin Haug <mhaug@live.de>2021-11-27 16:10:22 +0100
commitedc686d7384470068858e16f2926cf50f31b2c90 (patch)
tree00648e8e6e1873ebb7241352cb88881ce323d067 /src/syntax/incremental.rs
parentfdb9d0743d73c278136b9254286fdc4be71c42a5 (diff)
Make incremental parsing simpler and move it somewhere else
Diffstat (limited to 'src/syntax/incremental.rs')
-rw-r--r--src/syntax/incremental.rs515
1 files changed, 0 insertions, 515 deletions
diff --git a/src/syntax/incremental.rs b/src/syntax/incremental.rs
deleted file mode 100644
index d7b5ca3c..00000000
--- a/src/syntax/incremental.rs
+++ /dev/null
@@ -1,515 +0,0 @@
-use std::ops::Range;
-use std::rc::Rc;
-
-use super::{Green, GreenNode, NodeKind, Span};
-
-use crate::parse::{
- parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup,
- parse_markup_elements, parse_template, TokenMode,
-};
-
-pub struct Reparser<'a> {
- src: &'a str,
- replace_range: Span,
- replace_len: usize,
-}
-
-impl<'a> Reparser<'a> {
- pub fn new(src: &'a str, replace_range: Span, replace_len: usize) -> Self {
- Self { src, replace_range, replace_len }
- }
-}
-
-impl Reparser<'_> {
- /// Find the innermost child that is incremental safe.
- pub fn incremental(&self, green: &mut GreenNode) -> Result<Range<usize>, ()> {
- self.incremental_int(green, 0, TokenMode::Markup, true)
- }
-
- fn incremental_int(
- &self,
- green: &mut GreenNode,
- mut offset: usize,
- parent_mode: TokenMode,
- outermost: bool,
- ) -> Result<Range<usize>, ()> {
- let kind = green.kind().clone();
- let mode = kind.mode().contextualize(parent_mode);
-
- let mut loop_result = None;
- let mut child_at_start = true;
- let last = green.children.len() - 1;
- let mut start = None;
- for (i, child) in green.children.iter_mut().enumerate() {
- let child_span =
- Span::new(self.replace_range.source, offset, offset + child.len());
- if child_span.surrounds(self.replace_range)
- && start.is_none()
- && ((self.replace_range.start != child_span.end
- && self.replace_range.end != child_span.start)
- || mode == TokenMode::Code
- || i == last)
- {
- let old_len = child.len();
- // First, we try if the child has another, more specific applicable child.
- if !kind.post().unsafe_interior() {
- if let Ok(range) = match child {
- Green::Node(n) => self.incremental_int(
- Rc::make_mut(n),
- offset,
- kind.mode().child_mode(),
- i == last && outermost,
- ),
- Green::Token(_) => Err(()),
- } {
- let new_len = child.len();
- green.update_child_len(new_len, old_len);
- return Ok(range);
- }
- }
-
- // This didn't work, so we try to self.replace_range the child at this
- // level.
- loop_result =
- Some((i .. i + 1, child_span, i == last && outermost, child.kind()));
- break;
- } else if start.is_none()
- && child_span.contains(self.replace_range.start)
- && mode == TokenMode::Markup
- && child.kind().post().markup_safe()
- {
- start = Some((i, offset));
- } else if child_span.contains(self.replace_range.end)
- && (self.replace_range.end != child_span.end || i == last)
- && mode == TokenMode::Markup
- && child.kind().post().markup_safe()
- {
- if let Some((start, start_offset)) = start {
- loop_result = Some((
- start .. i + 1,
- Span::new(
- self.replace_range.source,
- start_offset,
- offset + child.len(),
- ),
- i == last && outermost,
- child.kind(),
- ));
- }
- break;
- } else if start.is_some()
- && (mode != TokenMode::Markup || !child.kind().post().markup_safe())
- {
- break;
- }
-
- offset += child.len();
- child_at_start = child.kind().is_at_start(child_at_start);
- }
-
-
- // We now have a child that we can self.replace_range and a function to do so if
- // the loop found any results at all.
- let (child_idx_range, child_span, child_outermost, func, policy) =
- loop_result.ok_or(()).and_then(|(a, b, c, child_kind)| {
- let (func, policy) =
- child_kind.reparsing_function(kind.mode().child_mode());
- Ok((a, b, c, func?, policy))
- })?;
-
- let src_span = child_span.inserted(self.replace_range, self.replace_len);
- let recompile_range = if policy == Postcondition::AtomicPrimary {
- src_span.start .. self.src.len()
- } else {
- src_span.to_range()
- };
-
- let (mut new_children, unterminated) =
- func(&self.src[recompile_range], child_at_start).ok_or(())?;
-
- // Do not accept unclosed nodes if the old node did not use to be at the
- // right edge of the tree.
- if !child_outermost && unterminated {
- return Err(());
- }
-
- let insertion = match check_invariants(
- &new_children,
- green.children(),
- child_idx_range.clone(),
- child_at_start,
- mode,
- src_span,
- policy,
- ) {
- InvariantResult::Ok => Ok(new_children),
- InvariantResult::UseFirst => Ok(vec![std::mem::take(&mut new_children[0])]),
- InvariantResult::Error => Err(()),
- }?;
-
- green.replace_child_range(child_idx_range, insertion);
-
- Ok(src_span.to_range())
- }
-}
-
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-enum InvariantResult {
- Ok,
- UseFirst,
- Error,
-}
-
-fn check_invariants(
- use_children: &[Green],
- old_children: &[Green],
- child_idx_range: Range<usize>,
- child_at_start: bool,
- mode: TokenMode,
- src_span: Span,
- policy: Postcondition,
-) -> InvariantResult {
- let (new_children, ok) = if policy == Postcondition::AtomicPrimary {
- if use_children.iter().map(Green::len).sum::<usize>() == src_span.len() {
- (use_children, InvariantResult::Ok)
- } else if use_children.len() == 1 && use_children[0].len() == src_span.len() {
- (&use_children[0 .. 1], InvariantResult::UseFirst)
- } else {
- return InvariantResult::Error;
- }
- } else {
- (use_children, InvariantResult::Ok)
- };
-
- let child_mode = old_children[child_idx_range.start].kind().mode().child_mode();
-
- // Check if the children / child has the right type.
- let same_kind = match policy {
- Postcondition::SameKind(x) => x.map_or(true, |x| x == child_mode),
- _ => false,
- };
-
- if same_kind || policy == Postcondition::AtomicPrimary {
- if new_children.len() != 1 {
- return InvariantResult::Error;
- }
-
- if same_kind {
- if old_children[child_idx_range.start].kind() != new_children[0].kind() {
- return InvariantResult::Error;
- }
- }
- }
-
- // Check if the neighbor invariants are still true.
- if mode == TokenMode::Markup {
- if child_idx_range.start > 0 {
- if old_children[child_idx_range.start - 1].kind().pre()
- == Precondition::RightWhitespace
- && !new_children[0].kind().is_whitespace()
- {
- return InvariantResult::Error;
- }
- }
-
- if new_children.last().map(|x| x.kind().pre())
- == Some(Precondition::RightWhitespace)
- && old_children.len() > child_idx_range.end
- {
- if !old_children[child_idx_range.end].kind().is_whitespace() {
- return InvariantResult::Error;
- }
- }
-
- let mut new_at_start = child_at_start;
- for child in new_children {
- new_at_start = child.kind().is_at_start(new_at_start);
- }
-
- for child in &old_children[child_idx_range.end ..] {
- if child.kind().is_trivia() {
- new_at_start = child.kind().is_at_start(new_at_start);
- continue;
- }
-
- match child.kind().pre() {
- Precondition::AtStart if !new_at_start => {
- return InvariantResult::Error;
- }
- Precondition::NotAtStart if new_at_start => {
- return InvariantResult::Error;
- }
- _ => {}
- }
- break;
- }
- }
-
- ok
-}
-
-impl NodeKind {
- pub fn reparsing_function(
- &self,
- parent_mode: TokenMode,
- ) -> (
- Result<fn(&str, bool) -> Option<(Vec<Green>, bool)>, ()>,
- Postcondition,
- ) {
- let policy = self.post();
- let mode = self.mode().contextualize(parent_mode);
-
- match policy {
- Postcondition::Unsafe | Postcondition::UnsafeLayer => (Err(()), policy),
- Postcondition::AtomicPrimary if mode == TokenMode::Code => {
- (Ok(parse_atomic), policy)
- }
- Postcondition::AtomicPrimary => (Ok(parse_atomic_markup), policy),
- Postcondition::SameKind(x) if x == None || x == Some(mode) => {
- let parser: fn(&str, bool) -> _ = match self {
- NodeKind::Template => parse_template,
- NodeKind::Block => parse_block,
- NodeKind::LineComment | NodeKind::BlockComment => parse_comment,
- _ => return (Err(()), policy),
- };
-
- (Ok(parser), policy)
- }
- _ => {
- let parser: fn(&str, bool) -> _ = match mode {
- TokenMode::Markup if self == &Self::Markup => parse_markup,
- TokenMode::Markup => parse_markup_elements,
- _ => return (Err(()), policy),
- };
-
- (Ok(parser), policy)
- }
- }
- }
-
- /// Whether it is safe to do incremental parsing on this node. Never allow
- /// non-termination errors if this is not already the last leaf node.
- pub fn post(&self) -> Postcondition {
- match self {
- // Replacing parenthesis changes if the expression is balanced and
- // is therefore not safe.
- Self::LeftBracket
- | Self::RightBracket
- | Self::LeftBrace
- | Self::RightBrace
- | Self::LeftParen
- | Self::RightParen => Postcondition::Unsafe,
-
- // Replacing an operator can change whether the parent is an
- // operation which makes it unsafe. The star can appear in markup.
- Self::Star
- | Self::Comma
- | Self::Semicolon
- | Self::Colon
- | Self::Plus
- | Self::Minus
- | Self::Slash
- | Self::Eq
- | Self::EqEq
- | Self::ExclEq
- | Self::Lt
- | Self::LtEq
- | Self::Gt
- | Self::GtEq
- | Self::PlusEq
- | Self::HyphEq
- | Self::StarEq
- | Self::SlashEq
- | Self::Not
- | Self::And
- | Self::Or
- | Self::With
- | Self::Dots
- | Self::Arrow => Postcondition::Unsafe,
-
- // These keywords are literals and can be safely be substituted with
- // other expressions.
- Self::None | Self::Auto => Postcondition::AtomicPrimary,
-
- // These keywords change what kind of expression the parent is and
- // how far the expression would go.
- Self::Let
- | Self::Set
- | Self::If
- | Self::Else
- | Self::For
- | Self::In
- | Self::While
- | Self::Break
- | Self::Continue
- | Self::Return
- | Self::Import
- | Self::Include
- | Self::From => Postcondition::Unsafe,
-
- Self::Markup => Postcondition::SameKind(None),
-
- Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)),
-
- // These are all replaceable by other tokens.
- Self::Parbreak
- | Self::Linebreak
- | Self::Text(_)
- | Self::TextInLine(_)
- | Self::NonBreakingSpace
- | Self::EnDash
- | Self::EmDash
- | Self::Escape(_)
- | Self::Strong
- | Self::Emph
- | Self::Heading
- | Self::Enum
- | Self::List
- | Self::Raw(_)
- | Self::Math(_) => Postcondition::Safe,
-
- // Changing the heading level, enum numbering, or list bullet
- // changes the next layer.
- Self::EnumNumbering(_) => Postcondition::Unsafe,
-
- // These are expressions that can be replaced by other expressions.
- Self::Ident(_)
- | Self::Bool(_)
- | Self::Int(_)
- | Self::Float(_)
- | Self::Length(_, _)
- | Self::Angle(_, _)
- | Self::Percentage(_)
- | Self::Str(_)
- | Self::Fraction(_)
- | Self::Array
- | Self::Dict
- | Self::Group => Postcondition::AtomicPrimary,
-
- Self::Call
- | Self::Unary
- | Self::Binary
- | Self::CallArgs
- | Self::Named
- | Self::Spread => Postcondition::UnsafeLayer,
-
- // The closure is a bit magic with the let expression, and also it
- // is not atomic.
- Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer,
-
- // These can appear as bodies and would trigger an error if they
- // became something else.
- Self::Template => Postcondition::SameKind(None),
- Self::Block => Postcondition::SameKind(Some(TokenMode::Code)),
-
- Self::ForExpr
- | Self::WhileExpr
- | Self::IfExpr
- | Self::LetExpr
- | Self::SetExpr
- | Self::ImportExpr
- | Self::IncludeExpr => Postcondition::AtomicPrimary,
-
- Self::WithExpr | Self::ForPattern | Self::ImportItems => {
- Postcondition::UnsafeLayer
- }
-
- // These can appear everywhere and must not change to other stuff
- // because that could change the outer expression.
- Self::LineComment | Self::BlockComment => Postcondition::SameKind(None),
-
- Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe,
- }
- }
-
- /// The appropriate precondition for the type.
- pub fn pre(&self) -> Precondition {
- match self {
- Self::Heading | Self::Enum | Self::List => Precondition::AtStart,
- Self::TextInLine(_) => Precondition::NotAtStart,
- Self::Linebreak => Precondition::RightWhitespace,
- _ => Precondition::None,
- }
- }
-}
-
-/// This enum describes what conditions a node has for being replaced by a new
-/// parse result.
-///
-/// Safe nodes are replaced by the new parse result from the respective mode.
-/// They can be replaced by multiple tokens. If a token is inserted in Markup
-/// mode and the next token would not be `at_start` there needs to be a forward
-/// check for a `EnsureAtStart` node. If this fails, the parent has to be
-/// reparsed. if the direct whitespace sibling of a `EnsureRightWhitespace` is
-/// `Unsafe`. Similarly, if a `EnsureRightWhitespace` token is one of the last
-/// tokens to be inserted, the edit is invalidated if there is no following
-/// whitespace. The atomic nodes may only be replaced by other atomic nodes. The
-/// unsafe layers cannot be used but allow children access, the unsafe nodes do
-/// neither.
-///
-/// *Procedure:*
-/// 1. Check if the node is safe - if unsafe layer recurse, if unsafe, return
-/// None.
-/// 2. Reparse with appropriate node kind and `at_start`.
-/// 3. Check whether the topmost group is terminated and the range was
-/// completely consumed, otherwise return None.
-/// 4. Check if the type criteria are met.
-/// 5. If the node is not at the end of the tree, check if Strings etc. are
-/// terminated.
-/// 6. If this is markup, check the following things:
-/// - The `at_start` conditions of the next non-comment and non-space(0) node
-/// are met.
-/// - The first node is whitespace or the previous siblings are not
-/// `EnsureRightWhitespace`.
-/// - If any of those fails, return None.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum Postcondition {
- /// Changing this node can never have an influence on the other nodes.
- Safe,
- /// This node has to be replaced with a single token of the same kind.
- SameKind(Option<TokenMode>),
- /// Changing this node into a single atomic expression is allowed if it
- /// appears in code mode, otherwise it is safe.
- AtomicPrimary,
- /// Changing an unsafe layer node changes what the parents or the
- /// surrounding nodes would be and is therefore disallowed. Change the
- /// parents or children instead. If it appears in Markup, however, it is
- /// safe to change.
- UnsafeLayer,
- /// Changing an unsafe node or any of its children will trigger undefined
- /// behavior. Change the parents instead.
- Unsafe,
-}
-
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum Precondition {
- /// These nodes depend on being at the start of a line. Reparsing of safe
- /// left neighbors has to check this invariant. Otherwise, this node is
- /// safe.
- AtStart,
- /// These nodes depend on not being at the start of a line. Reparsing of
- /// safe left neighbors has to check this invariant. Otherwise, this node is
- /// safe.
- NotAtStart,
- /// These nodes must be followed by whitespace.
- RightWhitespace,
- /// No additional requirements.
- None,
-}
-
-impl Postcondition {
- pub fn unsafe_interior(&self) -> bool {
- match self {
- Self::Unsafe => true,
- _ => false,
- }
- }
-
- pub fn markup_safe(&self) -> bool {
- match self {
- Self::Safe | Self::UnsafeLayer => true,
- Self::SameKind(tm) => tm.map_or(false, |tm| tm != TokenMode::Markup),
- _ => false,
- }
- }
-}