summaryrefslogtreecommitdiff
path: root/src/parse
diff options
context:
space:
mode:
authorMartin Haug <mhaug@live.de>2022-02-21 22:49:50 +0100
committerMartin Haug <mhaug@live.de>2022-02-23 13:58:56 +0100
commit20ac96f27a2e06b985abc1c95049c32c2b88ef5d (patch)
treeb145fa807c4088626c2f0bf1eeb32cff0fd163e1 /src/parse
parentaac3afcba8ee9b3692f784c78626aa0596aaf612 (diff)
New incremental parsing paradigm
Also move column offset into scanner. This fixes #62
Diffstat (limited to 'src/parse')
-rw-r--r--src/parse/incremental.rs768
-rw-r--r--src/parse/mod.rs148
-rw-r--r--src/parse/parser.rs17
-rw-r--r--src/parse/scanner.rs25
-rw-r--r--src/parse/tokens.rs6
5 files changed, 322 insertions, 642 deletions
diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs
index bb5288dc..7850fe4f 100644
--- a/src/parse/incremental.rs
+++ b/src/parse/incremental.rs
@@ -4,57 +4,11 @@ use std::sync::Arc;
use crate::syntax::{Green, GreenNode, NodeKind};
use super::{
- is_newline, parse, parse_atomic, parse_atomic_markup, parse_block, parse_comment,
- parse_markup, parse_markup_elements, parse_template, Scanner, TokenMode,
+ is_newline, parse, parse_block, parse_markup_elements, parse_template, TokenMode,
};
-/// The conditions that a node has to fulfill in order to be replaced.
-///
-/// This can dictate if a node can be replaced at all and if yes, what can take
-/// its place.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum SuccessionRule {
- /// Changing this node can never have an influence on the other nodes.
- Safe,
- /// This node has to be replaced with a single token of the same kind.
- SameKind(Option<TokenMode>),
- /// In code mode, this node can only be changed into a single atomic
- /// expression, otherwise it is safe.
- AtomicPrimary,
- /// Changing an unsafe layer node in code mode changes what the parents or
- /// the surrounding nodes would be and is therefore disallowed. Change the
- /// parents or children instead. If it appears in Markup, however, it is
- /// safe to change.
- UnsafeLayer,
- /// Changing an unsafe node or any of its children is not allowed. Change
- /// the parents instead.
- Unsafe,
-}
-
-/// The conditions under which a node can be inserted or remain in a tree.
-///
-/// These conditions all search the neighbors of the node and see if its
-/// existence is plausible with them present. This can be used to encode some
-/// context-free language components for incremental parsing.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum NeighbourRule {
- /// These nodes depend on being at the start of a line. Reparsing of safe
- /// left neighbors has to check this invariant. Additionally, when
- /// exchanging the right sibling or inserting such a node the indentation of
- /// the first right non-trivia, non-whitespace sibling must not be greater
- /// than the current indentation.
- AtStart,
- /// These nodes depend on not being at the start of a line. Reparsing of
- /// safe left neighbors has to check this invariant. Otherwise, this node is
- /// safe.
- NotAtStart,
- /// These nodes could end up somewhere else up the tree if the parse was
- /// happening from scratch. The parse result has to be checked for such
- /// nodes. They are safe to add if followed up by other nodes.
- NotAtEnd,
- /// No additional requirements.
- None,
-}
+type ReparseFunc =
+ fn(&str, &str, usize, isize, &[Green], bool) -> Option<(Vec<Green>, bool, usize)>;
/// Allows partial refreshs of the [`Green`] node tree.
///
@@ -79,180 +33,185 @@ impl<'a> Reparser<'a> {
impl Reparser<'_> {
/// Find the innermost child that is incremental safe.
pub fn reparse(&self, green: &mut Arc<GreenNode>) -> Range<usize> {
- self.reparse_step(Arc::make_mut(green), 0, TokenMode::Markup, true)
- .unwrap_or_else(|| {
- *green = parse(self.src);
- 0 .. self.src.len()
- })
+ self.reparse_step(Arc::make_mut(green), 0, true).unwrap_or_else(|| {
+ *green = parse(self.src);
+ 0 .. self.src.len()
+ })
}
fn reparse_step(
&self,
green: &mut GreenNode,
mut offset: usize,
- parent_mode: TokenMode,
- mut outermost: bool,
+ outermost: bool,
) -> Option<Range<usize>> {
- let mode = green.kind().mode().unwrap_or(parent_mode);
let child_mode = green.kind().mode().unwrap_or(TokenMode::Code);
let original_count = green.children().len();
- // Save the current indent if this is a markup node.
- let indent = match green.kind() {
- NodeKind::Markup(n) => *n,
- _ => 0,
- };
-
- let mut first = None;
+ let mut search = SearchState::default();
+ let mut ahead_nontrivia = None;
+ // Whether the first node that should be replaced is at start.
let mut at_start = true;
+ let mut end_outermost = false;
// Find the the first child in the range of children to reparse.
- for (i, child) in green.children_mut().iter_mut().enumerate() {
+ for (i, child) in green.children().iter().enumerate() {
+ let pos = GreenPos { idx: i, offset };
let child_span = offset .. offset + child.len();
- // We look for the start in the element but we only take a position
- // at the right border if this is markup or the last element.
- //
- // This is because in Markup mode, we want to examine all nodes next
- // to a replacement but in code we want to atomically replace. At
- // least one character on either side of the replacement must be
- // reparsed with it to keep the Space / Text node coalescing intact.
- if (child_mode == TokenMode::Markup
- && child_span.end + 1 >= self.replace_range.start)
- || child_span.contains(&self.replace_range.start)
- {
- first = Some((i, offset));
- break;
+ match search {
+ SearchState::NoneFound => {
+ // The edit is contained within the span of the current element.
+ if child_span.contains(&self.replace_range.start)
+ && child_span.end >= self.replace_range.end
+ {
+ // In Markup mode, we want to consider a non-whitespace
+ // neighbor if the edit is on the node boundary.
+ search = if child_span.end == self.replace_range.end
+ && child_mode == TokenMode::Markup
+ {
+ SearchState::RequireNonWS(pos)
+ } else {
+ // println!("found containing block {:?}", green.kind());
+ SearchState::Contained(pos)
+ };
+ } else if child_span.contains(&self.replace_range.start) {
+ search = SearchState::Inside(pos);
+ } else {
+ if !child.kind().is_space()
+ && child.kind() != &NodeKind::Semicolon
+ {
+ ahead_nontrivia = Some((pos, at_start));
+ }
+ at_start = child.kind().is_at_start(at_start);
+ }
+ }
+ SearchState::Inside(start) => {
+ if child_span.end == self.replace_range.end {
+ search = SearchState::RequireNonWS(start);
+ } else if child_span.end > self.replace_range.end {
+ search = SearchState::SpanFound(start, pos);
+ }
+ }
+ SearchState::RequireNonWS(start) => {
+ if !child.kind().is_trivia() {
+ search = SearchState::SpanFound(start, pos);
+ }
+ }
+ _ => unreachable!(),
}
offset += child.len();
- at_start = child.kind().is_at_start(at_start);
- }
-
- let (first_idx, first_start) = first?;
- let mut last = None;
-
- // Find the the last child in the range of children to reparse.
- for (i, child) in green.children_mut().iter_mut().enumerate().skip(first_idx) {
- let child_span = offset .. offset + child.len();
-
- // Similarly to above, the end of the edit must be in the
- // reconsidered range. However, in markup mode, we need to extend
- // the reconsidered range by up to two nodes so that spacing etc.
- // results in the same tree.
- //
- // Therefore, there are two cases:
- // 1. We are at the end of the string or in code mode and the
- // current node perfectly matches the end of the replacement
- // 2. The end is contained within this node, and, in Markup mode,
- // is not the first thing in it.
- let ignore_overhang =
- i + 1 == original_count || child_mode != TokenMode::Markup;
-
- if (self.replace_range.end == child_span.end && ignore_overhang)
- || (child_span.end > self.replace_range.end
- && (self.replace_range.end != child_span.start || ignore_overhang))
- {
- outermost &= i + 1 == original_count;
- last = Some((i, offset + child.len()));
- break;
- } else if child_mode != TokenMode::Markup
- || !child.kind().succession_rule().safe_in_markup()
- {
+ end_outermost = outermost && i + 1 == original_count;
+ if search.end().is_some() {
break;
}
-
- offset += child.len();
}
- let (last_idx, last_end) = last?;
- let superseded_range = first_idx .. last_idx + 1;
- let superseded_span = first_start .. last_end;
- let last_kind = green.children()[last_idx].kind().clone();
-
- // First, we try if the child itself has another, more specific
- // applicable child.
- if superseded_range.len() == 1 {
- let child = &mut green.children_mut()[superseded_range.start];
+ if let SearchState::Contained(pos) = search {
+ let child = &mut green.children_mut()[pos.idx];
let prev_len = child.len();
- if last_kind.succession_rule() != SuccessionRule::Unsafe
- && !matches!(last_kind, NodeKind::Strong | NodeKind::Emph)
- {
- if let Some(range) = match child {
- Green::Node(node) => self.reparse_step(
- Arc::make_mut(node),
- first_start,
- child_mode,
- outermost,
- ),
- Green::Token(_) => None,
- } {
- let new_len = child.len();
- green.update_parent(new_len, prev_len);
- return Some(range);
+ if let Some(range) = match child {
+ Green::Node(node) => {
+ self.reparse_step(Arc::make_mut(node), pos.offset, end_outermost)
+ }
+ Green::Token(_) => None,
+ } {
+ let new_len = child.len();
+ green.update_parent(new_len, prev_len);
+ return Some(range);
+ }
+
+ let superseded_span = pos.offset .. pos.offset + prev_len;
+ let func: Option<ReparseFunc> = match child.kind() {
+ NodeKind::Template => Some(parse_template),
+ NodeKind::Block => Some(parse_block),
+ _ => None,
+ };
+
+ if let Some(func) = func {
+ if let Some(result) = self.replace(
+ green,
+ func,
+ pos.idx .. pos.idx + 1,
+ superseded_span,
+ at_start,
+ end_outermost,
+ outermost,
+ ) {
+ return Some(result);
}
}
}
- // We only replace multiple children in markup mode.
- if superseded_range.len() > 1 && mode == TokenMode::Code {
+ if !matches!(green.kind(), NodeKind::Markup(_)) {
return None;
}
- // We now have a child that we can replace and a function to do so.
- let func = last_kind.reparsing_func(child_mode, indent)?;
- let succession = last_kind.succession_rule();
+ let (mut start, end) = search.end()?;
+ if let Some((ahead, ahead_at_start)) = ahead_nontrivia {
+ let ahead_kind = green.children()[ahead.idx].kind();
- let mut markup_min_column = 0;
-
- // If this is a markup node, we want to save its indent instead to pass
- // the right indent argument.
- if superseded_range.len() == 1 {
- let child = &mut green.children_mut()[superseded_range.start];
- if let NodeKind::Markup(n) = child.kind() {
- markup_min_column = *n;
+ if start.offset == self.replace_range.start
+ || ahead_kind.only_at_start()
+ || ahead_kind == &NodeKind::LineComment
+ {
+ start = ahead;
+ at_start = ahead_at_start;
}
}
- // The span of the to-be-reparsed children in the new source.
+ let superseded_span =
+ start.offset .. end.offset + green.children()[end.idx].len();
+ self.replace(
+ green,
+ parse_markup_elements,
+ start.idx .. end.idx + 1,
+ superseded_span,
+ at_start,
+ end_outermost,
+ outermost,
+ )
+ }
+
+ fn replace(
+ &self,
+ green: &mut GreenNode,
+ func: ReparseFunc,
+ superseded_idx: Range<usize>,
+ superseded_span: Range<usize>,
+ at_start: bool,
+ outermost: bool,
+ parent_outermost: bool,
+ ) -> Option<Range<usize>> {
+ let differential: isize =
+ self.replace_len as isize - self.replace_range.len() as isize;
let newborn_span = superseded_span.start
..
- superseded_span.end + self.replace_len - self.replace_range.len();
-
- // For atomic primaries we need to pass in the whole remaining string to
- // check whether the parser would eat more stuff illicitly.
- let reparse_span = if succession == SuccessionRule::AtomicPrimary {
- newborn_span.start .. self.src.len()
- } else {
- newborn_span.clone()
- };
+ (superseded_span.end as isize + differential) as usize;
+ let superseded_start = superseded_idx.start;
let mut prefix = "";
- for (i, c) in self.src[.. reparse_span.start].char_indices().rev() {
+ for (i, c) in self.src[.. newborn_span.start].char_indices().rev() {
if is_newline(c) {
break;
}
- prefix = &self.src[i .. reparse_span.start];
+ prefix = &self.src[i .. newborn_span.start];
}
- // Do the reparsing!
- let (mut newborns, terminated) = func(
+ // // println!("reparsing...");
+
+ let (newborns, terminated, amount) = func(
&prefix,
- &self.src[reparse_span.clone()],
+ &self.src[newborn_span.start ..],
+ newborn_span.len(),
+ differential,
+ &green.children()[superseded_start ..],
at_start,
- markup_min_column,
)?;
- // Make sure that atomic primaries ate only what they were supposed to.
- if succession == SuccessionRule::AtomicPrimary {
- let len = newborn_span.len();
- if newborns.len() > 1 && newborns[0].len() == len {
- newborns.truncate(1);
- } else if newborns.iter().map(Green::len).sum::<usize>() != len {
- return None;
- }
- }
+ // // println!("Reparse success");
// Do not accept unclosed nodes if the old node wasn't at the right edge
// of the tree.
@@ -260,362 +219,64 @@ impl Reparser<'_> {
return None;
}
- // If all post- and preconditions match, we are good to go!
- if validate(
- green.children(),
- superseded_range.clone(),
- at_start,
- &newborns,
- mode,
- succession,
- newborn_span.clone(),
- self.src,
- ) {
- green.replace_children(superseded_range, newborns);
- Some(newborn_span)
- } else {
- None
- }
- }
-}
-
-/// Validate that a node replacement is allowed by post- and preconditions.
-fn validate(
- superseded: &[Green],
- superseded_range: Range<usize>,
- mut at_start: bool,
- newborns: &[Green],
- mode: TokenMode,
- post: SuccessionRule,
- newborn_span: Range<usize>,
- src: &str,
-) -> bool {
- // Atomic primaries must only generate one new child.
- if post == SuccessionRule::AtomicPrimary && newborns.len() != 1 {
- return false;
- }
-
- // Same kind in mode `inside` must generate only one child and that child
- // must be of the same kind as previously.
- if let SuccessionRule::SameKind(inside) = post {
- let superseded_kind = superseded[superseded_range.start].kind();
- let superseded_mode = superseded_kind.mode().unwrap_or(mode);
- if inside.map_or(true, |m| m == superseded_mode)
- && (newborns.len() != 1 || superseded_kind != newborns[0].kind())
- {
- return false;
- }
- }
-
- // Neighbor invariants are only relevant in markup mode.
- if mode == TokenMode::Code {
- return true;
- }
-
- // Check if there are any `AtStart` predecessors which require a certain
- // indentation.
- let s = Scanner::new(src);
- let mut prev_pos = newborn_span.start;
- for child in (&superseded[.. superseded_range.start]).iter().rev() {
- prev_pos -= child.len();
- if !child.kind().is_trivia() {
- if child.kind().neighbour_rule() == NeighbourRule::AtStart {
- let left_col = s.column(prev_pos);
-
- // Search for the first non-trivia newborn.
- let mut new_pos = newborn_span.start;
- let mut child_col = None;
- for child in newborns {
- if !child.kind().is_trivia() {
- child_col = Some(s.column(new_pos));
- break;
- }
-
- new_pos += child.len();
- }
-
- if let Some(child_col) = child_col {
- if child_col > left_col {
- return false;
- }
- }
- }
-
- break;
- }
- }
-
- // Compute the at_start state behind the new children.
- for child in newborns {
- at_start = child.kind().is_at_start(at_start);
- }
-
- // Ensure that a possible at-start or not-at-start precondition of
- // a node after the replacement range is satisfied.
- for child in &superseded[superseded_range.end ..] {
- let neighbour_rule = child.kind().neighbour_rule();
- if (neighbour_rule == NeighbourRule::AtStart && !at_start)
- || (neighbour_rule == NeighbourRule::NotAtStart && at_start)
- {
- return false;
- }
-
- if !child.kind().is_trivia() {
- break;
- }
-
- at_start = child.kind().is_at_start(at_start);
- }
-
- // Verify that the last of the newborns is not `NotAtEnd`.
- if newborns.last().map_or(false, |child| {
- child.kind().neighbour_rule() == NeighbourRule::NotAtEnd
- }) {
- return false;
- }
-
- // We have to check whether the last non-trivia newborn is `AtStart` and
- // verify the indent of its right neighbors in order to make sure its
- // indentation requirements are fulfilled.
- let mut child_pos = newborn_span.end;
- for child in newborns.iter().rev() {
- child_pos -= child.len();
-
- if child.kind().is_trivia() {
- continue;
+ if !parent_outermost && green.children()[superseded_start ..].len() == amount {
+ return None;
}
- if child.kind().neighbour_rule() == NeighbourRule::AtStart {
- let child_col = s.column(child_pos)
- + match child.kind() {
- NodeKind::Heading => child
- .children()
- .iter()
- .filter(|n| n.kind() == &NodeKind::Eq)
- .count(),
- NodeKind::List => 1,
- NodeKind::Enum => child.children().first().unwrap().len(),
- _ => 0,
- };
-
- let mut right_pos = newborn_span.end;
- for child in &superseded[superseded_range.end ..] {
- if child.kind().is_trivia() {
- right_pos += child.len();
- continue;
- }
-
- if s.column(right_pos) > child_col {
- return false;
- }
- break;
- }
- }
- break;
+ green.replace_children(superseded_start .. superseded_start + amount, newborns);
+ Some(newborn_span)
}
-
- true
}
-impl NodeKind {
- /// Return the correct reparsing function given the postconditions for the
- /// type.
- fn reparsing_func(
- &self,
- parent_mode: TokenMode,
- indent: usize,
- ) -> Option<fn(&str, &str, bool, usize) -> Option<(Vec<Green>, bool)>> {
- let mode = self.mode().unwrap_or(parent_mode);
- match self.succession_rule() {
- SuccessionRule::Unsafe | SuccessionRule::UnsafeLayer => None,
- SuccessionRule::AtomicPrimary => match mode {
- TokenMode::Code => Some(parse_atomic),
- TokenMode::Markup => Some(parse_atomic_markup),
- },
- SuccessionRule::SameKind(x) if x == None || x == Some(mode) => match self {
- NodeKind::Markup(_) => Some(parse_markup),
- NodeKind::Template => Some(parse_template),
- NodeKind::Block => Some(parse_block),
- NodeKind::LineComment | NodeKind::BlockComment => Some(parse_comment),
- _ => None,
- },
- _ => match mode {
- TokenMode::Markup if indent == 0 => Some(parse_markup_elements),
- _ => return None,
- },
- }
- }
+#[derive(Clone, Copy, Debug, PartialEq)]
+struct GreenPos {
+ idx: usize,
+ offset: usize,
+}
- /// Whether it is safe to do incremental parsing on this node.
- pub fn succession_rule(&self) -> SuccessionRule {
- match self {
- // These are all replaceable by other tokens.
- Self::Linebreak
- | Self::Text(_)
- | Self::TextInLine(_)
- | Self::NonBreakingSpace
- | Self::EnDash
- | Self::EmDash
- | Self::Escape(_)
- | Self::Strong
- | Self::Emph
- | Self::Heading
- | Self::Enum
- | Self::List
- | Self::Math(_) => SuccessionRule::Safe,
-
- // Only markup is expected at the points where it does occur. The
- // indentation must be preserved as well, also for the children.
- Self::Markup(_) => SuccessionRule::SameKind(None),
-
- // These can appear everywhere and must not change to other stuff
- // because that could change the outer expression.
- Self::LineComment | Self::BlockComment => SuccessionRule::SameKind(None),
-
- // These can appear as bodies and would trigger an error if they
- // became something else.
- Self::Template => SuccessionRule::SameKind(None),
- Self::Block => SuccessionRule::SameKind(Some(TokenMode::Code)),
-
- // Whitespace in code mode has to remain whitespace or else the type
- // of things would change.
- Self::Space(_) => SuccessionRule::SameKind(Some(TokenMode::Code)),
-
- // These are expressions that can be replaced by other expressions.
- Self::Ident(_)
- | Self::Bool(_)
- | Self::Int(_)
- | Self::Float(_)
- | Self::Length(_, _)
- | Self::Angle(_, _)
- | Self::Percentage(_)
- | Self::Str(_)
- | Self::Fraction(_)
- | Self::Array
- | Self::Dict
- | Self::Group
- | Self::None
- | Self::Auto => SuccessionRule::AtomicPrimary,
-
- // More complex, but still an expression.
- Self::ForExpr
- | Self::WhileExpr
- | Self::IfExpr
- | Self::LetExpr
- | Self::SetExpr
- | Self::ShowExpr
- | Self::WrapExpr
- | Self::ImportExpr
- | Self::IncludeExpr
- | Self::BreakExpr
- | Self::ContinueExpr
- | Self::ReturnExpr => SuccessionRule::AtomicPrimary,
-
- // These are complex expressions which may screw with their
- // environments.
- Self::Call
- | Self::Unary
- | Self::Binary
- | Self::CallArgs
- | Self::Named
- | Self::Spread => SuccessionRule::UnsafeLayer,
-
- // The closure is a bit magic with the let expression, and also it
- // is not atomic.
- Self::Closure | Self::ClosureParams => SuccessionRule::UnsafeLayer,
-
- // Missing these creates errors for the parents.
- Self::WithExpr | Self::ForPattern | Self::ImportItems => {
- SuccessionRule::UnsafeLayer
- }
+/// Encodes the state machine of the search for the node which is pending for
+/// replacement.
+#[derive(Clone, Copy, Debug, PartialEq)]
+enum SearchState {
+ /// Neither an end nor a start have been found as of now.
+ /// The last non-whitespace child is continually saved.
+ NoneFound,
+ /// The search has concluded by finding a node that fully contains the
+ /// modifications.
+ Contained(GreenPos),
+ /// The search has found the start of the modified nodes.
+ Inside(GreenPos),
+ /// The search has found the end of the modified nodes but the change
+ /// touched its boundries so another non-trivia node is needed.
+ RequireNonWS(GreenPos),
+ /// The search has concluded by finding a start and an end index for nodes
+ /// with a pending reparse.
+ SpanFound(GreenPos, GreenPos),
+}
- // Replacing parenthesis changes if the expression is balanced and
- // is therefore not safe.
- Self::LeftBracket
- | Self::RightBracket
- | Self::LeftBrace
- | Self::RightBrace
- | Self::LeftParen
- | Self::RightParen => SuccessionRule::Unsafe,
-
- // These work similar to parentheses.
- Self::Star | Self::Underscore => SuccessionRule::Unsafe,
-
- // Replacing an operator can change whether the parent is an
- // operation which makes it unsafe.
- Self::Comma
- | Self::Semicolon
- | Self::Colon
- | Self::Plus
- | Self::Minus
- | Self::Slash
- | Self::Eq
- | Self::EqEq
- | Self::ExclEq
- | Self::Lt
- | Self::LtEq
- | Self::Gt
- | Self::GtEq
- | Self::PlusEq
- | Self::HyphEq
- | Self::StarEq
- | Self::SlashEq
- | Self::Not
- | Self::And
- | Self::Or
- | Self::With
- | Self::Dots
- | Self::Arrow => SuccessionRule::Unsafe,
-
- // These keywords change what kind of expression the parent is and
- // how far the expression would go.
- Self::Let
- | Self::Set
- | Self::Show
- | Self::Wrap
- | Self::If
- | Self::Else
- | Self::For
- | Self::In
- | Self::As
- | Self::While
- | Self::Break
- | Self::Continue
- | Self::Return
- | Self::Import
- | Self::Include
- | Self::From => SuccessionRule::Unsafe,
-
- // This element always has to remain in the same column so better
- // reparse the whole parent.
- Self::Raw(_) => SuccessionRule::Unsafe,
-
- // Changing the heading level, enum numbering, or list bullet
- // changes the next layer.
- Self::EnumNumbering(_) => SuccessionRule::Unsafe,
-
- // This can be anything, so we don't make any promises.
- Self::Error(_, _) | Self::Unknown(_) => SuccessionRule::Unsafe,
- }
+impl Default for SearchState {
+ fn default() -> Self {
+ Self::NoneFound
}
+}
- /// Whether it is safe to insert this node next to some nodes or vice versa.
- pub fn neighbour_rule(&self) -> NeighbourRule {
+impl SearchState {
+ fn end(&self) -> Option<(GreenPos, GreenPos)> {
match self {
- Self::Heading | Self::Enum | Self::List => NeighbourRule::AtStart,
- Self::TextInLine(_) => NeighbourRule::NotAtStart,
- Self::Error(_, _) => NeighbourRule::NotAtEnd,
- _ => NeighbourRule::None,
+ Self::NoneFound => None,
+ Self::Contained(s) => Some((*s, *s)),
+ Self::Inside(_) => None,
+ Self::RequireNonWS(_) => None,
+ Self::SpanFound(s, e) => Some((*s, *e)),
}
}
}
-impl SuccessionRule {
- /// Whether a node with this condition can be reparsed in markup mode.
- pub fn safe_in_markup(&self) -> bool {
+impl NodeKind {
+ /// Whether this node has to appear at the start of a line.
+ pub fn only_at_start(&self) -> bool {
match self {
- Self::Safe | Self::UnsafeLayer => true,
- Self::SameKind(mode) => mode.map_or(false, |m| m != TokenMode::Markup),
+ Self::Heading | Self::Enum | Self::List => true,
_ => false,
}
}
@@ -639,59 +300,62 @@ mod tests {
#[test]
fn test_parse_incremental_simple_replacements() {
- test("hello world", 7 .. 12, "walkers", 5 .. 14);
+ test("hello world", 7 .. 12, "walkers", 0 .. 14);
test("some content", 0..12, "", 0..0);
test("", 0..0, "do it", 0..5);
test("a d e", 1 .. 3, " b c d", 0 .. 9);
test("a #f() e", 1 .. 6, " b c d", 0 .. 9);
- test("a\nb\nc\nd\ne\n", 5..5, "c", 3..8);
- test("a\n\nb\n\nc\n\nd\n\ne\n", 7..7, "c", 4..11);
+ test("a\nb\nc\nd\ne\n", 5 .. 5, "c", 4 .. 7);
+ test("a\n\nb\n\nc\n\nd\n\ne\n", 7 .. 7, "c", 6 .. 10);
test("a\nb\nc *hel a b lo* d\nd\ne", 13..13, "c ", 6..20);
- test("{a}", 1 .. 2, "b", 1 .. 2);
- test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9);
- test("\n= A heading", 3 .. 3, "n evocative", 1 .. 23);
- test("for~your~thing", 9 .. 9, "a", 4 .. 15);
+ test("~~ {a} ~~", 4 .. 5, "b", 3 .. 6);
+ test("{(0, 1, 2)}", 5 .. 6, "11pt", 0..14);
+ test("\n= A heading", 3 .. 3, "n evocative", 3 .. 23);
+ test("for~your~thing", 9 .. 9, "a", 8 .. 15);
test("a your thing a", 6 .. 7, "a", 0 .. 14);
test("{call(); abc}", 7 .. 7, "[]", 0 .. 15);
- test("#call() abc", 7 .. 7, "[]", 0 .. 13);
- test("hi[\n- item\n- item 2\n - item 3]", 11 .. 11, " ", 4 .. 34);
- test("hi\n- item\nno item\n - item 3", 10 .. 10, "- ", 0 .. 32);
- test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 16 .. 20);
- test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33 .. 42);
- test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 34 .. 39);
- test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 1 .. 9);
- test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 10 .. 32);
- test("hello~~{x}", 7 .. 10, "#f()", 5 .. 11);
+ test("#call() abc", 7 .. 7, "[]", 0 .. 10);
+ // Investigate
+ test("hi[\n- item\n- item 2\n - item 3]", 11 .. 11, " ", 2 .. 35);
+ test("hi\n- item\nno item\n - item 3", 10 .. 10, "- ", 3..19);
+ test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 0..99);
+ test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33..42);
+ test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 33 .. 40);
+ test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 0 .. 33);
+ test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 0 .. 33);
+ // Investigate
+ test("hello~~{x}", 7 .. 10, "#f()", 0 .. 11);
test("this~is -- in my opinion -- spectacular", 8 .. 10, "---", 5 .. 25);
- test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 37);
- test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15);
+ test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22);
+ test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17);
test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34);
test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20);
- test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18);
+ test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18);
test("#for", 4 .. 4, "//", 0 .. 6);
}
#[test]
fn test_parse_incremental_whitespace_invariants() {
- test("hello \\ world", 7 .. 8, "a ", 5 .. 14);
- test("hello \\ world", 7 .. 8, " a", 5 .. 14);
- test("x = y", 1 .. 1, " + y", 0 .. 7);
- test("x = y", 1 .. 1, " + y\n", 0 .. 10);
- test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 22);
- test("abc\n= a heading\njoke", 3 .. 4, "\nnot ", 0 .. 20);
- test("#let x = (1, 2 + ;~ Five\r\n\r", 20..23, "2.", 18..23);
+ test("hello \\ world", 7 .. 8, "a ", 6 .. 14);
+ test("hello \\ world", 7 .. 8, " a", 6 .. 14);
+ test("x = y", 1 .. 1, " + y", 0 .. 6);
+ test("x = y", 1 .. 1, " + y\n", 0 .. 7);
+ test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21);
+ test("abc\n= a heading\njoke", 3 .. 4, "\nnot ", 0 .. 19);
+ test("#let x = (1, 2 + ;~ Five\r\n\r", 20 .. 23, "2.", 18 .. 23);
test("hey #myfriend", 4 .. 4, "\\", 0 .. 14);
- test("hey #myfriend", 4 .. 4, "\\", 0 .. 6);
+ test("hey #myfriend", 4 .. 4, "\\", 3 .. 6);
+ test("= foo\nbar\n - a\n - b", 6 .. 9, "", 0..11)
}
#[test]
fn test_parse_incremental_type_invariants() {
- test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22);
- test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 15);
- test("a {let x = 1 {5}} b", 3 .. 6, "if", 1 .. 16);
- test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 17);
+ test("a #for x in array {x}", 18 .. 21, "[#x]", 0 .. 22);
+ test("a #let x = 1 {5}", 3 .. 6, "if", 2 .. 11);
+ test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16);
+ test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 13);
test("{let x = 1 {5}}", 4 .. 4, " if", 0 .. 18);
- test("a // b c #f()", 3 .. 4, "", 0 .. 12);
+ test("a // b c #f()", 3 .. 4, "", 2 .. 12);
test("{\nf()\n//g(a)\n}", 6 .. 8, "", 0 .. 12);
test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13);
test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26);
@@ -707,12 +371,12 @@ mod tests {
test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", 0 .. 24);
test("a b c", 1 .. 1, " /* letters */", 0 .. 19);
test("a b c", 1 .. 1, " /* letters", 0 .. 16);
- test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35);
+ test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 0 .. 41);
test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38);
- test("~~~~", 2 .. 2, "[]", 0 .. 6);
- test("a[]b", 2 .. 2, "{", 0 .. 5);
+ test("~~~~", 2 .. 2, "[]", 1 .. 5);
+ test("a[]b", 2 .. 2, "{", 1 .. 4);
test("[hello]", 2 .. 3, "]", 0 .. 7);
- test("{a}", 1 .. 2, "b", 1 .. 2);
+ test("{a}", 1 .. 2, "b", 0 .. 3);
test("{ a; b; c }", 5 .. 6, "[}]", 0 .. 13);
test("#a()\n~", 3..4, "{}", 0..7);
test("[]\n~", 1..2, "#if i==0 {true}", 0..18);
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index bd217c1c..7c0a0932 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -28,103 +28,124 @@ pub fn parse(src: &str) -> Arc<GreenNode> {
}
}
-/// Parse some markup. Returns `Some` if all of the input was consumed.
-pub fn parse_markup(
- prefix: &str,
- src: &str,
- _: bool,
- min_column: usize,
-) -> Option<(Vec<Green>, bool)> {
- let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup);
- if min_column == 0 {
- markup(&mut p, true);
- } else {
- markup_indented(&mut p, min_column);
- }
- p.consume()
-}
-
/// Parse some markup without the topmost node. Returns `Some` if all of the
/// input was consumed.
pub fn parse_markup_elements(
prefix: &str,
src: &str,
+ end_pos: usize,
+ differential: isize,
+ reference: &[Green],
mut at_start: bool,
- _: usize,
-) -> Option<(Vec<Green>, bool)> {
+) -> Option<(Vec<Green>, bool, usize)> {
let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup);
+
+ let mut node: Option<&Green> = None;
+ let mut iter = reference.iter();
+ let mut offset = 0;
+ let mut replaced = 0;
+ let mut stopped = false;
+
while !p.eof() {
markup_node(&mut p, &mut at_start);
+
+ if p.prev_end() >= end_pos {
+ let recent = p.children.last().unwrap();
+ let recent_start = p.prev_end() - recent.len();
+
+ while offset <= recent_start {
+ if let Some(node) = node {
+ // The nodes are equal, at the same position and have the
+ // same content. The parsing trees have converged again, so
+ // the reparse may stop here.
+ if (offset as isize + differential) as usize == recent_start
+ && node == recent
+ {
+ replaced -= 1;
+ stopped = true;
+ break;
+ }
+ }
+
+ let result = iter.next();
+ if let Some(node) = node {
+ offset += node.len();
+ }
+ node = result;
+ if node.is_none() {
+ break;
+ } else {
+ replaced += 1;
+ }
+ }
+
+ if stopped {
+ break;
+ }
+ }
}
- p.consume()
-}
-/// Parse an atomic primary. Returns `Some` if all of the input was consumed.
-pub fn parse_atomic(
- prefix: &str,
- src: &str,
- _: bool,
- _: usize,
-) -> Option<(Vec<Green>, bool)> {
- let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
- primary(&mut p, true).ok()?;
- p.consume_open_ended()
-}
+ if p.eof() && !stopped {
+ replaced = reference.len();
+ }
-/// Parse an atomic primary. Returns `Some` if all of the input was consumed.
-pub fn parse_atomic_markup(
- prefix: &str,
- src: &str,
- _: bool,
- _: usize,
-) -> Option<(Vec<Green>, bool)> {
- let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup);
- markup_expr(&mut p);
- p.consume_open_ended()
+ let (mut res, terminated) = p.consume_open_ended()?;
+ if stopped {
+ res.pop().unwrap();
+ }
+
+ Some((res, terminated, replaced))
}
/// Parse a template literal. Returns `Some` if all of the input was consumed.
pub fn parse_template(
prefix: &str,
src: &str,
+ end_pos: usize,
+ _: isize,
+ reference: &[Green],
_: bool,
- _: usize,
-) -> Option<(Vec<Green>, bool)> {
+) -> Option<(Vec<Green>, bool, usize)> {
let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
if !p.at(&NodeKind::LeftBracket) {
return None;
}
template(&mut p);
- p.consume()
+
+ let (mut green, terminated) = p.consume_open_ended()?;
+ let first = green.remove(0);
+ if first.len() != end_pos {
+ return None;
+ }
+
+ Some((vec![first], terminated, 1))
}
/// Parse a code block. Returns `Some` if all of the input was consumed.
pub fn parse_block(
prefix: &str,
src: &str,
+ end_pos: usize,
+ _: isize,
+ reference: &[Green],
_: bool,
- _: usize,
-) -> Option<(Vec<Green>, bool)> {
+) -> Option<(Vec<Green>, bool, usize)> {
let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
if !p.at(&NodeKind::LeftBrace) {
return None;
}
block(&mut p);
- p.consume()
-}
-/// Parse a comment. Returns `Some` if all of the input was consumed.
-pub fn parse_comment(
- prefix: &str,
- src: &str,
- _: bool,
- _: usize,
-) -> Option<(Vec<Green>, bool)> {
- let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
- comment(&mut p).ok()?;
- p.consume()
+ let (mut green, terminated) = p.consume_open_ended()?;
+ let first = green.remove(0);
+
+ if first.len() != end_pos {
+ return None;
+ }
+
+ Some((vec![first], terminated, 1))
}
/// Parse markup.
@@ -916,17 +937,6 @@ fn body(p: &mut Parser) -> ParseResult {
Ok(())
}
-/// Parse a comment.
-fn comment(p: &mut Parser) -> ParseResult {
- match p.peek() {
- Some(NodeKind::LineComment | NodeKind::BlockComment) => {
- p.eat();
- Ok(())
- }
- _ => Err(ParseError),
- }
-}
-
#[cfg(test)]
mod tests {
use std::fmt::Debug;
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 545f6fd4..d9cc0e31 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -7,8 +7,6 @@ use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind};
/// A convenient token-based parser.
pub struct Parser<'s> {
- /// Offsets the indentation on the first line of the source.
- column_offset: usize,
/// An iterator over the source tokens.
tokens: Tokens<'s>,
/// Whether we are at the end of the file or of a group.
@@ -22,7 +20,7 @@ pub struct Parser<'s> {
/// The stack of open groups.
groups: Vec<GroupEntry>,
/// The children of the currently built node.
- children: Vec<Green>,
+ pub children: Vec<Green>,
/// Whether the last group was not correctly terminated.
unterminated_group: bool,
/// Whether a group terminator was found, that did not close a group.
@@ -32,10 +30,13 @@ pub struct Parser<'s> {
impl<'s> Parser<'s> {
/// Create a new parser for the source string.
pub fn new(src: &'s str, mode: TokenMode) -> Self {
- let mut tokens = Tokens::new(src, mode);
+ Self::with_offset(src, mode, 0)
+ }
+
+ fn with_offset(src: &'s str, mode: TokenMode, offset: usize) -> Self {
+ let mut tokens = Tokens::new(src, mode, offset);
let current = tokens.next();
Self {
- column_offset: 0,
tokens,
eof: current.is_none(),
current,
@@ -52,9 +53,7 @@ impl<'s> Parser<'s> {
/// that does not need to be parsed but taken into account for column
/// calculation.
pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self {
- let mut p = Self::new(src, mode);
- p.column_offset = Scanner::new(prefix).column(prefix.len());
- p
+ Self::with_offset(src, mode, Scanner::new(prefix).column(prefix.len()))
}
/// End the parsing process and return the last child.
@@ -226,7 +225,7 @@ impl<'s> Parser<'s> {
/// Determine the column index for the given byte index.
pub fn column(&self, index: usize) -> usize {
- self.tokens.scanner().column_offset(index, self.column_offset)
+ self.tokens.scanner().column(index)
}
/// Continue parsing in a group.
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
index 685503c3..15060c7b 100644
--- a/src/parse/scanner.rs
+++ b/src/parse/scanner.rs
@@ -10,13 +10,21 @@ pub struct Scanner<'s> {
/// The index at which the peekable character starts. Must be in bounds and
/// at a codepoint boundary to guarantee safety.
index: usize,
+ /// Offsets the indentation on the first line of the source.
+ column_offset: usize,
}
impl<'s> Scanner<'s> {
/// Create a new char scanner.
#[inline]
pub fn new(src: &'s str) -> Self {
- Self { src, index: 0 }
+ Self { src, index: 0, column_offset: 0 }
+ }
+
+ /// Create a new char scanner with an offset for the first line indent.
+ #[inline]
+ pub fn with_indent_offset(src: &'s str, column_offset: usize) -> Self {
+ Self { src, index: 0, column_offset }
}
/// Whether the end of the string is reached.
@@ -173,13 +181,6 @@ impl<'s> Scanner<'s> {
/// The column index of a given index in the source string.
#[inline]
pub fn column(&self, index: usize) -> usize {
- self.column_offset(index, 0)
- }
-
- /// The column index of a given index in the source string when an offset is
- /// applied to the first line of the string.
- #[inline]
- pub fn column_offset(&self, index: usize, offset: usize) -> usize {
let mut apply_offset = false;
let res = self.src[.. index]
.char_indices()
@@ -192,7 +193,13 @@ impl<'s> Scanner<'s> {
})
.count();
- if apply_offset { res + offset } else { res }
+ // The loop is never executed if the slice is empty, but we are of
+ // course still at the start of the first line.
+ if self.src[.. index].len() == 0 {
+ apply_offset = true;
+ }
+
+ if apply_offset { res + self.column_offset } else { res }
}
}
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index e88b49f9..4a13694a 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -28,9 +28,9 @@ pub enum TokenMode {
impl<'s> Tokens<'s> {
/// Create a new token iterator with the given mode.
#[inline]
- pub fn new(src: &'s str, mode: TokenMode) -> Self {
+ pub fn new(src: &'s str, mode: TokenMode, offset: usize) -> Self {
Self {
- s: Scanner::new(src),
+ s: Scanner::with_indent_offset(src, offset),
mode,
terminated: true,
}
@@ -689,7 +689,7 @@ mod tests {
}};
(@$mode:ident: $src:expr => $($token:expr),*) => {{
let src = $src;
- let found = Tokens::new(&src, $mode).collect::<Vec<_>>();
+ let found = Tokens::new(&src, $mode, 0).collect::<Vec<_>>();
let expected = vec![$($token.clone()),*];
check(&src, found, expected);
}};