diff options
Diffstat (limited to 'src/parse/incremental.rs')
| -rw-r--r-- | src/parse/incremental.rs | 449 |
1 files changed, 186 insertions, 263 deletions
diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 9c912aae..8e52c143 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -1,13 +1,57 @@ use std::ops::Range; use std::rc::Rc; -use crate::syntax::{Green, GreenNode, NodeKind, Span}; +use crate::syntax::{Green, GreenNode, NodeKind}; use super::{ parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup, parse_markup_elements, parse_template, TokenMode, }; +/// The conditions that a node has to fulfill in order to be replaced. +/// +/// This can dictate if a node can be replaced at all and if yes, what can take +/// its place. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Postcondition { + /// Changing this node can never have an influence on the other nodes. + Safe, + /// This node has to be replaced with a single token of the same kind. + SameKind(Option<TokenMode>), + /// Changing this node into a single atomic expression is allowed if it + /// appears in code mode, otherwise it is safe. + AtomicPrimary, + /// Changing an unsafe layer node changes what the parents or the + /// surrounding nodes would be and is therefore disallowed. Change the + /// parents or children instead. If it appears in Markup, however, it is + /// safe to change. + UnsafeLayer, + /// Changing an unsafe node or any of its children will trigger undefined + /// behavior. Change the parents instead. + Unsafe, +} + +/// The conditions under which a node can be inserted or remain in a tree. +/// +/// These conditions all search the neighbors of the node and see if its +/// existence is plausible with them present. This can be used to encode some +/// context-free language components for incremental parsing. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Precondition { + /// These nodes depend on being at the start of a line. Reparsing of safe + /// left neighbors has to check this invariant. Otherwise, this node is + /// safe. + AtStart, + /// These nodes depend on not being at the start of a line. Reparsing of + /// safe left neighbors has to check this invariant. Otherwise, this node is + /// safe. + NotAtStart, + /// These nodes must be followed by whitespace. + RightWhitespace, + /// No additional requirements. + None, +} + /// Allows partial refreshs of the [`Green`] node tree. /// /// This struct holds a description of a change. Its methods can be used to try @@ -16,21 +60,21 @@ pub struct Reparser<'a> { /// The new source code, with the change applied. src: &'a str, /// Which range in the old source file was changed. - replace_range: Span, - /// How many characters replaced the text in `replacement_range`. + replace_range: Range<usize>, + /// How many characters replaced the text in `replace_range`. replace_len: usize, } impl<'a> Reparser<'a> { /// Create a new reparser. - pub fn new(src: &'a str, replace_range: Span, replace_len: usize) -> Self { + pub fn new(src: &'a str, replace_range: Range<usize>, replace_len: usize) -> Self { Self { src, replace_range, replace_len } } } impl Reparser<'_> { /// Find the innermost child that is incremental safe. - pub fn reparse(&self, green: &mut GreenNode) -> Result<Range<usize>, ()> { + pub fn reparse(&self, green: &mut GreenNode) -> Option<Range<usize>> { self.reparse_step(green, 0, TokenMode::Markup, true) } @@ -39,30 +83,26 @@ impl Reparser<'_> { green: &mut GreenNode, mut offset: usize, parent_mode: TokenMode, - outermost: bool, - ) -> Result<Range<usize>, ()> { + mut outermost: bool, + ) -> Option<Range<usize>> { let kind = green.kind().clone(); let mode = kind.mode().unwrap_or(parent_mode); - let mut loop_result = None; let mut child_at_start = true; - let last = green.children().len() - 1; + let last = green.children().len().saturating_sub(1); let mut start = None; for (i, child) in green.children_mut().iter_mut().enumerate() { - let child_span = - Span::new(self.replace_range.source, offset, offset + child.len()); + let child_span = offset .. offset + child.len(); // We look for the start in the element but we only take a position // at the right border if this is markup or the last element. // // This is because in Markup mode, we want to examine all nodes // touching a replacement but in code we want to atomically replace. - if child_span.contains(self.replace_range.start) - && (mode == TokenMode::Markup - || self.replace_range.start != child_span.end - || self.replace_range.len() == 0 - || i == last) + if child_span.contains(&self.replace_range.start) + || (mode == TokenMode::Markup + && self.replace_range.start == child_span.end) { start = Some((i, offset)); break; @@ -72,31 +112,21 @@ impl Reparser<'_> { child_at_start = child.kind().is_at_start(child_at_start); } - let (start_idx, start_offset) = start.ok_or(())?; + let (start_idx, start_offset) = start?; + let mut end = None; - for (i, child) in (green.children_mut()[start_idx ..]).iter_mut().enumerate() { - let i = i + start_idx; - let child_span = - Span::new(self.replace_range.source, offset, offset + child.len()); + for (i, child) in green.children_mut().iter_mut().enumerate().skip(start_idx) { + let child_span = offset .. offset + child.len(); // Similarly to above, the end of the edit must be in the node but // if it is at the edge and we are in markup node, we also want its // neighbor! - if child_span.contains(self.replace_range.end) - && (mode != TokenMode::Markup - || self.replace_range.end != child_span.end - || i == last) + if child_span.contains(&self.replace_range.end) + || self.replace_range.end == child_span.end + && (mode != TokenMode::Markup || i == last) { - loop_result = Some(( - start_idx .. i + 1, - Span::new( - self.replace_range.source, - start_offset, - offset + child.len(), - ), - i == last && outermost, - child.kind().clone(), - )); + outermost &= i == last; + end = Some(i); break; } else if mode != TokenMode::Markup || !child.kind().post().markup_safe() { break; @@ -105,28 +135,30 @@ impl Reparser<'_> { offset += child.len(); } - let (child_idx_range, child_span, child_outermost, child_kind) = - loop_result.ok_or(())?; + let end = end?; + let child_idx_range = start_idx .. end + 1; + let child_span = start_offset .. offset + green.children()[end].len(); + let child_kind = green.children()[end].kind().clone(); if child_idx_range.len() == 1 { let idx = child_idx_range.start; let child = &mut green.children_mut()[idx]; + let prev_len = child.len(); - let old_len = child.len(); // First, we try if the child has another, more specific applicable child. if !child_kind.post().unsafe_interior() { - if let Ok(range) = match child { + if let Some(range) = match child { Green::Node(n) => self.reparse_step( Rc::make_mut(n), start_offset, kind.mode().unwrap_or(TokenMode::Code), - child_outermost, + outermost, ), - Green::Token(_) => Err(()), + Green::Token(_) => None, } { let new_len = child.len(); - green.update_child_len(new_len, old_len); - return Ok(range); + green.update_child_len(new_len, prev_len); + return Some(range); } } } @@ -134,28 +166,31 @@ impl Reparser<'_> { debug_assert_ne!(child_idx_range.len(), 0); if mode == TokenMode::Code && child_idx_range.len() > 1 { - return Err(()); + return None; } // We now have a child that we can replace and a function to do so. - let (func, policy) = - child_kind.reparsing_function(kind.mode().unwrap_or(TokenMode::Code)); - let func = func?; + let func = + child_kind.reparsing_function(kind.mode().unwrap_or(TokenMode::Code))?; + let policy = child_kind.post(); + + let len_change = self.replace_len as isize - self.replace_range.len() as isize; + let mut src_span = child_span; + src_span.end = (src_span.end as isize + len_change) as usize; - let src_span = inserted_span(child_span, self.replace_range, self.replace_len); let recompile_range = if policy == Postcondition::AtomicPrimary { src_span.start .. self.src.len() } else { - src_span.to_range() + src_span.clone() }; - let (mut new_children, unterminated) = - func(&self.src[recompile_range], child_at_start).ok_or(())?; + let (mut new_children, terminated) = + func(&self.src[recompile_range], child_at_start)?; // Do not accept unclosed nodes if the old node did not use to be at the // right edge of the tree. - if !child_outermost && unterminated { - return Err(()); + if !outermost && !terminated { + return None; } let insertion = match check_invariants( @@ -164,17 +199,17 @@ impl Reparser<'_> { child_idx_range.clone(), child_at_start, mode, - src_span, + src_span.clone(), policy, ) { - InvariantResult::Ok => Ok(new_children), - InvariantResult::UseFirst => Ok(vec![std::mem::take(&mut new_children[0])]), - InvariantResult::Error => Err(()), + InvariantResult::Ok => Some(new_children), + InvariantResult::UseFirst => Some(vec![std::mem::take(&mut new_children[0])]), + InvariantResult::Error => None, }?; green.replace_child_range(child_idx_range, insertion); - Ok(src_span.to_range()) + Some(src_span) } } @@ -191,7 +226,7 @@ fn check_invariants( child_idx_range: Range<usize>, child_at_start: bool, mode: TokenMode, - src_span: Span, + src_span: Range<usize>, policy: Postcondition, ) -> InvariantResult { let (new_children, ok) = if policy == Postcondition::AtomicPrimary { @@ -206,10 +241,7 @@ fn check_invariants( (use_children, InvariantResult::Ok) }; - let child_mode = old_children[child_idx_range.start] - .kind() - .mode() - .unwrap_or(TokenMode::Code); + let child_mode = old_children[child_idx_range.start].kind().mode().unwrap_or(mode); // Check if the children / child has the right type. let same_kind = match policy { @@ -249,25 +281,22 @@ fn check_invariants( } } - let mut new_at_start = child_at_start; + let mut post_at_start = child_at_start; for child in new_children { - new_at_start = child.kind().is_at_start(new_at_start); + post_at_start = child.kind().is_at_start(post_at_start); } for child in &old_children[child_idx_range.end ..] { if child.kind().is_trivia() { - new_at_start = child.kind().is_at_start(new_at_start); + post_at_start = child.kind().is_at_start(post_at_start); continue; } - match child.kind().pre() { - Precondition::AtStart if !new_at_start => { - return InvariantResult::Error; - } - Precondition::NotAtStart if new_at_start => { - return InvariantResult::Error; - } - _ => {} + let pre = child.kind().pre(); + if pre == Precondition::AtStart && !post_at_start + || pre == Precondition::NotAtStart && post_at_start + { + return InvariantResult::Error; } break; } @@ -276,56 +305,31 @@ fn check_invariants( ok } -/// Create a new span by specifying a span in which a modification happened -/// and how many characters are now in that span. -fn inserted_span(mut source: Span, other: Span, n: usize) -> Span { - if !source.surrounds(other) { - panic!(); - } - - let len_change = n as i64 - other.len() as i64; - source.end = (source.end as i64 + len_change) as usize; - source -} - impl NodeKind { /// Return the correct reparsing function given the postconditions for the /// type. fn reparsing_function( &self, parent_mode: TokenMode, - ) -> ( - Result<fn(&str, bool) -> Option<(Vec<Green>, bool)>, ()>, - Postcondition, - ) { + ) -> Option<fn(&str, bool) -> Option<(Vec<Green>, bool)>> { let policy = self.post(); let mode = self.mode().unwrap_or(parent_mode); match policy { - Postcondition::Unsafe | Postcondition::UnsafeLayer => (Err(()), policy), - Postcondition::AtomicPrimary if mode == TokenMode::Code => { - (Ok(parse_atomic), policy) - } - Postcondition::AtomicPrimary => (Ok(parse_atomic_markup), policy), - Postcondition::SameKind(x) if x == None || x == Some(mode) => { - let parser: fn(&str, bool) -> _ = match self { - NodeKind::Template => parse_template, - NodeKind::Block => parse_block, - NodeKind::LineComment | NodeKind::BlockComment => parse_comment, - _ => return (Err(()), policy), - }; - - (Ok(parser), policy) - } - _ => { - let parser: fn(&str, bool) -> _ = match mode { - TokenMode::Markup if self == &Self::Markup => parse_markup, - TokenMode::Markup => parse_markup_elements, - _ => return (Err(()), policy), - }; - - (Ok(parser), policy) - } + Postcondition::Unsafe | Postcondition::UnsafeLayer => None, + Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic), + Postcondition::AtomicPrimary => Some(parse_atomic_markup), + Postcondition::SameKind(x) if x == None || x == Some(mode) => match self { + NodeKind::Template => Some(parse_template), + NodeKind::Block => Some(parse_block), + NodeKind::LineComment | NodeKind::BlockComment => Some(parse_comment), + _ => None, + }, + _ => match mode { + TokenMode::Markup if self == &Self::Markup => Some(parse_markup), + TokenMode::Markup => Some(parse_markup_elements), + _ => return None, + }, } } @@ -369,10 +373,6 @@ impl NodeKind { | Self::Dots | Self::Arrow => Postcondition::Unsafe, - // These keywords are literals and can be safely be substituted with - // other expressions. - Self::None | Self::Auto => Postcondition::AtomicPrimary, - // These keywords change what kind of expression the parent is and // how far the expression would go. Self::Let @@ -389,45 +389,14 @@ impl NodeKind { | Self::Include | Self::From => Postcondition::Unsafe, - Self::Markup => Postcondition::SameKind(None), - - Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)), - - // These are all replaceable by other tokens. - Self::Parbreak - | Self::Linebreak - | Self::Text(_) - | Self::TextInLine(_) - | Self::NonBreakingSpace - | Self::EnDash - | Self::EmDash - | Self::Escape(_) - | Self::Strong - | Self::Emph - | Self::Heading - | Self::Enum - | Self::List - | Self::Raw(_) - | Self::Math(_) => Postcondition::Safe, - // Changing the heading level, enum numbering, or list bullet // changes the next layer. Self::EnumNumbering(_) => Postcondition::Unsafe, - // These are expressions that can be replaced by other expressions. - Self::Ident(_) - | Self::Bool(_) - | Self::Int(_) - | Self::Float(_) - | Self::Length(_, _) - | Self::Angle(_, _) - | Self::Percentage(_) - | Self::Str(_) - | Self::Fraction(_) - | Self::Array - | Self::Dict - | Self::Group => Postcondition::AtomicPrimary, + Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, + // These are complex expressions which may screw with their + // environments. Self::Call | Self::Unary | Self::Binary @@ -439,11 +408,44 @@ impl NodeKind { // is not atomic. Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer, + // Missing these creates errors for the parents. + Self::WithExpr | Self::ForPattern | Self::ImportItems => { + Postcondition::UnsafeLayer + } + + // Only markup is expected at the points where it does occur. + Self::Markup => Postcondition::SameKind(None), + + // These can appear everywhere and must not change to other stuff + // because that could change the outer expression. + Self::LineComment | Self::BlockComment => Postcondition::SameKind(None), + // These can appear as bodies and would trigger an error if they // became something else. Self::Template => Postcondition::SameKind(None), Self::Block => Postcondition::SameKind(Some(TokenMode::Code)), + // Whitespace in code mode has to remain whitespace or else the type + // of things would change. + Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)), + + // These are expressions that can be replaced by other expressions. + Self::Ident(_) + | Self::Bool(_) + | Self::Int(_) + | Self::Float(_) + | Self::Length(_, _) + | Self::Angle(_, _) + | Self::Percentage(_) + | Self::Str(_) + | Self::Fraction(_) + | Self::Array + | Self::Dict + | Self::Group + | Self::None + | Self::Auto => Postcondition::AtomicPrimary, + + // More complex, but still an expression. Self::ForExpr | Self::WhileExpr | Self::IfExpr @@ -452,15 +454,22 @@ impl NodeKind { | Self::ImportExpr | Self::IncludeExpr => Postcondition::AtomicPrimary, - Self::WithExpr | Self::ForPattern | Self::ImportItems => { - Postcondition::UnsafeLayer - } - - // These can appear everywhere and must not change to other stuff - // because that could change the outer expression. - Self::LineComment | Self::BlockComment => Postcondition::SameKind(None), - - Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, + // These are all replaceable by other tokens. + Self::Parbreak + | Self::Linebreak + | Self::Text(_) + | Self::TextInLine(_) + | Self::NonBreakingSpace + | Self::EnDash + | Self::EmDash + | Self::Escape(_) + | Self::Strong + | Self::Emph + | Self::Heading + | Self::Enum + | Self::List + | Self::Raw(_) + | Self::Math(_) => Postcondition::Safe, } } @@ -475,50 +484,6 @@ impl NodeKind { } } -/// The conditions that a node has to fulfill in order to be replaced. -/// -/// This can dictate if a node can be replaced at all and if yes, what can take -/// its place. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Postcondition { - /// Changing this node can never have an influence on the other nodes. - Safe, - /// This node has to be replaced with a single token of the same kind. - SameKind(Option<TokenMode>), - /// Changing this node into a single atomic expression is allowed if it - /// appears in code mode, otherwise it is safe. - AtomicPrimary, - /// Changing an unsafe layer node changes what the parents or the - /// surrounding nodes would be and is therefore disallowed. Change the - /// parents or children instead. If it appears in Markup, however, it is - /// safe to change. - UnsafeLayer, - /// Changing an unsafe node or any of its children will trigger undefined - /// behavior. Change the parents instead. - Unsafe, -} - -/// The conditions under which a node can be inserted or remain in a tree. -/// -/// These conditions all search the neighbors of the node and see if its -/// existence is plausible with them present. This can be used to encode some -/// context-free language components for incremental parsing. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Precondition { - /// These nodes depend on being at the start of a line. Reparsing of safe - /// left neighbors has to check this invariant. Otherwise, this node is - /// safe. - AtStart, - /// These nodes depend on not being at the start of a line. Reparsing of - /// safe left neighbors has to check this invariant. Otherwise, this node is - /// safe. - NotAtStart, - /// These nodes must be followed by whitespace. - RightWhitespace, - /// No additional requirements. - None, -} - impl Postcondition { pub fn unsafe_interior(&self) -> bool { match self { @@ -544,6 +509,7 @@ mod tests { use super::*; #[test] + #[rustfmt::skip] fn test_incremental_parse() { #[track_caller] fn test(prev: &str, range: Range<usize>, with: &str, incr: Range<usize>) { @@ -551,12 +517,14 @@ mod tests { let range = source.edit(range, with); assert_eq!(range, incr); - let incr_tree = source.root(); + let incr_tree = source.root().clone(); assert_eq!(parse(source.src()), incr_tree); } // Test simple replacements. - test("hello world", 6 .. 11, "wankers", 5 .. 13); + test("hello world", 6 .. 11, "walkers", 5 .. 13); + test("some content", 0..12, "", 0..0); + test("", 0..0, "do it", 0..5); test("a d e", 1 .. 3, " b c d", 0 .. 8); test("a #f() e", 1 .. 6, " b c d", 0 .. 8); test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9); @@ -564,53 +532,18 @@ mod tests { test("your thing", 5 .. 5, "a", 4 .. 11); test("a your thing a", 6 .. 7, "a", 2 .. 12); test("{call(); abc}", 7 .. 7, "[]", 0 .. 15); - test("#call() abc", 7 .. 7, "[]", 0 .. 13); - // test( - // "hi\n- item\n- item 2\n - item 3", - // 10 .. 10, - // " ", - // 9 .. 33, - // ); - test( - "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", - 16 .. 20, - "none", - 16 .. 20, - ); - test( - "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", - 33 .. 42, - "[_gronk_]", - 33 .. 42, - ); - test( - "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", - 34 .. 41, - "_bar_", - 34 .. 39, - ); + test("#call() abc", 7 .. 7, "[]", 0 .. 10); + // test("hi\n- item\n- item 2\n - item 3", 10 .. 10, " ", 9 .. 33); + test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 16 .. 20); + test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33 .. 42); + test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 34 .. 39); test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 1 .. 9); - test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 13 .. 15); + test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 10 .. 32); test("hello {x}", 6 .. 9, "#f()", 5 .. 10); - test( - "this is -- in my opinion -- spectacular", - 8 .. 10, - "---", - 7 .. 12, - ); - test( - "understanding `code` is complicated", - 15 .. 15, - "C ", - 14 .. 22, - ); + test("this is -- in my opinion -- spectacular", 8 .. 10, "---", 7 .. 12); + test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22); test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17); - test( - "a #let rect with (fill: eastern)\nb", - 16 .. 31, - " (stroke: conifer", - 2 .. 34, - ); + test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34); // Test the whitespace invariants. test("hello \\ world", 7 .. 8, "a ", 6 .. 14); @@ -642,18 +575,8 @@ mod tests { test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", 0 .. 24); test("a b c", 1 .. 1, " /* letters */", 0 .. 16); test("a b c", 1 .. 1, " /* letters", 0 .. 16); - test( - "{if i==1 {a} else [b]; b()}", - 12 .. 12, - " /* letters */", - 1 .. 35, - ); - test( - "{if i==1 {a} else [b]; b()}", - 12 .. 12, - " /* letters", - 0 .. 38, - ); + test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35); + test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38); test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18); |
