Merge pull request #48 from typst/parser-incr

Incremental parsing
author: Laurenz <laurmaedje@gmail.com> 2022-01-04 00:27:05 +0100
committer: GitHub <noreply@github.com> 2022-01-04 00:27:05 +0100
commit: 4c81a5d43eabd959dbb500a8076f99f21bd037bd (patch)
tree: 03349230f74786c7128876889c07a31a4932f108
parent: 52761a3baa901865b1fc42366017740cfa7eb566 (diff)
parent: c994cfa7d814e3909682b19322867ed5c676c453 (diff)
13 files changed, 1244 insertions, 86 deletions
diff --git a/benches/oneshot.rs b/benches/oneshot.rs
index d3e2ff8e..c088a93c 100644
--- a/benches/oneshot.rs
+++ b/benches/oneshot.rs
@@ -49,6 +49,11 @@ fn bench_parse(iai: &mut Iai) {
     iai.run(|| parse(SRC));
 }
 
+fn bench_edit(iai: &mut Iai) {
+    let (mut ctx, id) = context();
+    iai.run(|| black_box(ctx.sources.edit(id, 1168 .. 1171, "_Uhr_")));
+}
+
 fn bench_eval(iai: &mut Iai) {
     let (mut ctx, id) = context();
     iai.run(|| ctx.evaluate(id).unwrap());
@@ -66,6 +71,7 @@ main!(
     bench_scan,
     bench_tokenize,
     bench_parse,
+    bench_edit,
     bench_eval,
     bench_layout
 );
diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs
new file mode 100644
index 00000000..4c82f158
--- /dev/null
+++ b/src/parse/incremental.rs
@@ -0,0 +1,672 @@
+use std::ops::Range;
+use std::rc::Rc;
+
+use crate::syntax::{Green, GreenNode, NodeKind};
+
+use super::{
+    is_newline, parse, parse_atomic, parse_atomic_markup, parse_block, parse_comment,
+    parse_markup, parse_markup_elements, parse_template, Scanner, TokenMode,
+};
+
+/// The conditions that a node has to fulfill in order to be replaced.
+///
+/// This can dictate if a node can be replaced at all and if yes, what can take
+/// its place.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum SuccessionRule {
+    /// Changing this node can never have an influence on the other nodes.
+    Safe,
+    /// This node has to be replaced with a single token of the same kind.
+    SameKind(Option<TokenMode>),
+    /// In code mode, this node can only be changed into a single atomic
+    /// expression, otherwise it is safe.
+    AtomicPrimary,
+    /// Changing an unsafe layer node in code mode changes what the parents or
+    /// the surrounding nodes would be and is therefore disallowed. Change the
+    /// parents or children instead. If it appears in Markup, however, it is
+    /// safe to change.
+    UnsafeLayer,
+    /// Changing an unsafe node or any of its children is not allowed. Change
+    /// the parents instead.
+    Unsafe,
+}
+
+/// The conditions under which a node can be inserted or remain in a tree.
+///
+/// These conditions all search the neighbors of the node and see if its
+/// existence is plausible with them present. This can be used to encode some
+/// context-free language components for incremental parsing.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum NeighbourRule {
+    /// These nodes depend on being at the start of a line. Reparsing of safe
+    /// left neighbors has to check this invariant. Additionally, when
+    /// exchanging the right sibling or inserting such a node the indentation of
+    /// the first right non-trivia, non-whitespace sibling must not be greater
+    /// than the current indentation.
+    AtStart,
+    /// These nodes depend on not being at the start of a line. Reparsing of
+    /// safe left neighbors has to check this invariant. Otherwise, this node is
+    /// safe.
+    NotAtStart,
+    /// These nodes could end up somewhere else up the tree if the parse was
+    /// happening from scratch. The parse result has to be checked for such
+    /// nodes. They are safe to add if followed up by other nodes.
+    NotAtEnd,
+    /// No additional requirements.
+    None,
+}
+
+/// Allows partial refreshs of the [`Green`] node tree.
+///
+/// This struct holds a description of a change. Its methods can be used to try
+/// and apply the change to a green tree.
+pub struct Reparser<'a> {
+    /// The new source code, with the change applied.
+    src: &'a str,
+    /// Which range in the old source file was changed.
+    replace_range: Range<usize>,
+    /// How many characters replaced the text in `replace_range`.
+    replace_len: usize,
+}
+
+impl<'a> Reparser<'a> {
+    /// Create a new reparser.
+    pub fn new(src: &'a str, replace_range: Range<usize>, replace_len: usize) -> Self {
+        Self { src, replace_range, replace_len }
+    }
+}
+
+impl Reparser<'_> {
+    /// Find the innermost child that is incremental safe.
+    pub fn reparse(&self, green: &mut Rc<GreenNode>) -> Range<usize> {
+        self.reparse_step(Rc::make_mut(green), 0, TokenMode::Markup, true)
+            .unwrap_or_else(|| {
+                *green = parse(self.src);
+                0 .. self.src.len()
+            })
+    }
+
+    fn reparse_step(
+        &self,
+        green: &mut GreenNode,
+        mut offset: usize,
+        parent_mode: TokenMode,
+        mut outermost: bool,
+    ) -> Option<Range<usize>> {
+        let mode = green.kind().mode().unwrap_or(parent_mode);
+        let child_mode = green.kind().mode().unwrap_or(TokenMode::Code);
+        let original_count = green.children().len();
+
+        // Save the current indent if this is a markup node.
+        let indent = match green.kind() {
+            NodeKind::Markup(n) => *n,
+            _ => 0,
+        };
+
+        let mut first = None;
+        let mut at_start = true;
+
+        // Find the the first child in the range of children to reparse.
+        for (i, child) in green.children_mut().iter_mut().enumerate() {
+            let child_span = offset .. offset + child.len();
+
+            // We look for the start in the element but we only take a position
+            // at the right border if this is markup or the last element.
+            //
+            // This is because in Markup mode, we want to examine all nodes
+            // touching a replacement but in code we want to atomically replace.
+            if child_span.contains(&self.replace_range.start)
+                || (mode == TokenMode::Markup
+                    && self.replace_range.start == child_span.end)
+            {
+                first = Some((i, offset));
+                break;
+            }
+
+            offset += child.len();
+            at_start = child.kind().is_at_start(at_start);
+        }
+
+        let (first_idx, first_start) = first?;
+        let mut last = None;
+
+        // Find the the last child in the range of children to reparse.
+        for (i, child) in green.children_mut().iter_mut().enumerate().skip(first_idx) {
+            let child_span = offset .. offset + child.len();
+
+            // Similarly to above, the end of the edit must be in the node but
+            // if it is at the edge and we are in markup node, we also want its
+            // neighbor!
+            if child_span.contains(&self.replace_range.end)
+                || self.replace_range.end == child_span.end
+                    && (mode != TokenMode::Markup || i + 1 == original_count)
+            {
+                outermost &= i + 1 == original_count;
+                last = Some((i, offset + child.len()));
+                break;
+            } else if mode != TokenMode::Markup
+                || !child.kind().succession_rule().safe_in_markup()
+            {
+                break;
+            }
+
+            offset += child.len();
+        }
+
+        let (last_idx, last_end) = last?;
+        let superseded_range = first_idx .. last_idx + 1;
+        let superseded_span = first_start .. last_end;
+        let last_kind = green.children()[last_idx].kind().clone();
+
+        // First, we try if the child itself has another, more specific
+        // applicable child.
+        if superseded_range.len() == 1 {
+            let child = &mut green.children_mut()[superseded_range.start];
+            let prev_len = child.len();
+
+            if last_kind.succession_rule() != SuccessionRule::Unsafe {
+                if let Some(range) = match child {
+                    Green::Node(node) => self.reparse_step(
+                        Rc::make_mut(node),
+                        first_start,
+                        child_mode,
+                        outermost,
+                    ),
+                    Green::Token(_) => None,
+                } {
+                    let new_len = child.len();
+                    green.update_parent(new_len, prev_len);
+                    return Some(range);
+                }
+            }
+        }
+
+        // We only replace multiple children in markup mode.
+        if superseded_range.len() > 1 && mode == TokenMode::Code {
+            return None;
+        }
+
+        // We now have a child that we can replace and a function to do so.
+        let func = last_kind.reparsing_func(child_mode, indent)?;
+        let succession = last_kind.succession_rule();
+
+        let mut markup_min_column = 0;
+
+        // If this is a markup node, we want to save its indent instead to pass
+        // the right indent argument.
+        if superseded_range.len() == 1 {
+            let child = &mut green.children_mut()[superseded_range.start];
+            if let NodeKind::Markup(n) = child.kind() {
+                markup_min_column = *n;
+            }
+        }
+
+        // The span of the to-be-reparsed children in the new source.
+        let newborn_span = superseded_span.start
+            ..
+            superseded_span.end + self.replace_len - self.replace_range.len();
+
+        // For atomic primaries we need to pass in the whole remaining string to
+        // check whether the parser would eat more stuff illicitly.
+        let reparse_span = if succession == SuccessionRule::AtomicPrimary {
+            newborn_span.start .. self.src.len()
+        } else {
+            newborn_span.clone()
+        };
+
+        let mut prefix = "";
+        for (i, c) in self.src[.. reparse_span.start].char_indices().rev() {
+            if is_newline(c) {
+                break;
+            }
+            prefix = &self.src[i .. reparse_span.start];
+        }
+
+        // Do the reparsing!
+        let (mut newborns, terminated) = func(
+            &prefix,
+            &self.src[reparse_span.clone()],
+            at_start,
+            markup_min_column,
+        )?;
+
+        // Make sure that atomic primaries ate only what they were supposed to.
+        if succession == SuccessionRule::AtomicPrimary {
+            let len = newborn_span.len();
+            if newborns.len() > 1 && newborns[0].len() == len {
+                newborns.truncate(1);
+            } else if newborns.iter().map(Green::len).sum::<usize>() != len {
+                return None;
+            }
+        }
+
+        // Do not accept unclosed nodes if the old node wasn't at the right edge
+        // of the tree.
+        if !outermost && !terminated {
+            return None;
+        }
+
+        // If all post- and preconditions match, we are good to go!
+        if validate(
+            green.children(),
+            superseded_range.clone(),
+            at_start,
+            &newborns,
+            mode,
+            succession,
+            newborn_span.clone(),
+            self.src,
+        ) {
+            green.replace_children(superseded_range, newborns);
+            Some(newborn_span)
+        } else {
+            None
+        }
+    }
+}
+
+/// Validate that a node replacement is allowed by post- and preconditions.
+fn validate(
+    superseded: &[Green],
+    superseded_range: Range<usize>,
+    mut at_start: bool,
+    newborns: &[Green],
+    mode: TokenMode,
+    post: SuccessionRule,
+    newborn_span: Range<usize>,
+    src: &str,
+) -> bool {
+    // Atomic primaries must only generate one new child.
+    if post == SuccessionRule::AtomicPrimary && newborns.len() != 1 {
+        return false;
+    }
+
+    // Same kind in mode `inside` must generate only one child and that child
+    // must be of the same kind as previously.
+    if let SuccessionRule::SameKind(inside) = post {
+        let superseded_kind = superseded[superseded_range.start].kind();
+        let superseded_mode = superseded_kind.mode().unwrap_or(mode);
+        if inside.map_or(true, |m| m == superseded_mode)
+            && (newborns.len() != 1 || superseded_kind != newborns[0].kind())
+        {
+            return false;
+        }
+    }
+
+    // Neighbor invariants are only relevant in markup mode.
+    if mode == TokenMode::Code {
+        return true;
+    }
+
+    // Check if there are any `AtStart` predecessors which require a certain
+    // indentation.
+    let s = Scanner::new(src);
+    let mut prev_pos = newborn_span.start;
+    for child in (&superseded[.. superseded_range.start]).iter().rev() {
+        prev_pos -= child.len();
+        if !child.kind().is_trivia() {
+            if child.kind().neighbour_rule() == NeighbourRule::AtStart {
+                let left_col = s.column(prev_pos);
+
+                // Search for the first non-trivia newborn.
+                let mut new_pos = newborn_span.start;
+                let mut child_col = None;
+                for child in newborns {
+                    if !child.kind().is_trivia() {
+                        child_col = Some(s.column(new_pos));
+                        break;
+                    }
+
+                    new_pos += child.len();
+                }
+
+                if let Some(child_col) = child_col {
+                    if child_col > left_col {
+                        return false;
+                    }
+                }
+            }
+
+            break;
+        }
+    }
+
+    // Compute the at_start state behind the new children.
+    for child in newborns {
+        at_start = child.kind().is_at_start(at_start);
+    }
+
+    // Ensure that a possible at-start or not-at-start precondition of
+    // a node after the replacement range is satisfied.
+    for child in &superseded[superseded_range.end ..] {
+        let neighbour_rule = child.kind().neighbour_rule();
+        if (neighbour_rule == NeighbourRule::AtStart && !at_start)
+            || (neighbour_rule == NeighbourRule::NotAtStart && at_start)
+        {
+            return false;
+        }
+
+        if !child.kind().is_trivia() {
+            break;
+        }
+
+        at_start = child.kind().is_at_start(at_start);
+    }
+
+    // Verify that the last of the newborns is not `NotAtEnd`.
+    if newborns.last().map_or(false, |child| {
+        child.kind().neighbour_rule() == NeighbourRule::NotAtEnd
+    }) {
+        return false;
+    }
+
+    // We have to check whether the last non-trivia newborn is `AtStart` and
+    // verify the indent of its right neighbors in order to make sure its
+    // indentation requirements are fulfilled.
+    let mut child_pos = newborn_span.end;
+    for child in newborns.iter().rev() {
+        child_pos -= child.len();
+
+        if child.kind().is_trivia() {
+            continue;
+        }
+
+        if child.kind().neighbour_rule() == NeighbourRule::AtStart {
+            let child_col = s.column(child_pos);
+
+            let mut right_pos = newborn_span.end;
+            for child in &superseded[superseded_range.end ..] {
+                if child.kind().is_trivia() {
+                    right_pos += child.len();
+                    continue;
+                }
+
+                if s.column(right_pos) > child_col {
+                    return false;
+                }
+                break;
+            }
+        }
+        break;
+    }
+
+    true
+}
+
+impl NodeKind {
+    /// Return the correct reparsing function given the postconditions for the
+    /// type.
+    fn reparsing_func(
+        &self,
+        parent_mode: TokenMode,
+        indent: usize,
+    ) -> Option<fn(&str, &str, bool, usize) -> Option<(Vec<Green>, bool)>> {
+        let mode = self.mode().unwrap_or(parent_mode);
+        match self.succession_rule() {
+            SuccessionRule::Unsafe | SuccessionRule::UnsafeLayer => None,
+            SuccessionRule::AtomicPrimary if mode == TokenMode::Code => {
+                Some(parse_atomic)
+            }
+            SuccessionRule::AtomicPrimary => Some(parse_atomic_markup),
+            SuccessionRule::SameKind(x) if x == None || x == Some(mode) => match self {
+                NodeKind::Markup(_) => Some(parse_markup),
+                NodeKind::Template => Some(parse_template),
+                NodeKind::Block => Some(parse_block),
+                NodeKind::LineComment | NodeKind::BlockComment => Some(parse_comment),
+                _ => None,
+            },
+            _ => match mode {
+                TokenMode::Markup if indent == 0 => Some(parse_markup_elements),
+                _ => return None,
+            },
+        }
+    }
+
+    /// Whether it is safe to do incremental parsing on this node. Never allow
+    /// non-termination errors if this is not already the last leaf node.
+    pub fn succession_rule(&self) -> SuccessionRule {
+        match self {
+            // Replacing parenthesis changes if the expression is balanced and
+            // is therefore not safe.
+            Self::LeftBracket
+            | Self::RightBracket
+            | Self::LeftBrace
+            | Self::RightBrace
+            | Self::LeftParen
+            | Self::RightParen => SuccessionRule::Unsafe,
+
+            // Replacing an operator can change whether the parent is an
+            // operation which makes it unsafe. The star can appear in markup.
+            Self::Star
+            | Self::Comma
+            | Self::Semicolon
+            | Self::Colon
+            | Self::Plus
+            | Self::Minus
+            | Self::Slash
+            | Self::Eq
+            | Self::EqEq
+            | Self::ExclEq
+            | Self::Lt
+            | Self::LtEq
+            | Self::Gt
+            | Self::GtEq
+            | Self::PlusEq
+            | Self::HyphEq
+            | Self::StarEq
+            | Self::SlashEq
+            | Self::Not
+            | Self::And
+            | Self::Or
+            | Self::With
+            | Self::Dots
+            | Self::Arrow => SuccessionRule::Unsafe,
+
+            // These keywords change what kind of expression the parent is and
+            // how far the expression would go.
+            Self::Let
+            | Self::Set
+            | Self::If
+            | Self::Else
+            | Self::For
+            | Self::In
+            | Self::While
+            | Self::Break
+            | Self::Continue
+            | Self::Return
+            | Self::Import
+            | Self::Include
+            | Self::From => SuccessionRule::Unsafe,
+
+            // Changing the heading level, enum numbering, or list bullet
+            // changes the next layer.
+            Self::EnumNumbering(_) => SuccessionRule::Unsafe,
+
+            // This can be anything, so we don't make any promises.
+            Self::Error(_, _) | Self::Unknown(_) => SuccessionRule::Unsafe,
+
+            // These are complex expressions which may screw with their
+            // environments.
+            Self::Call
+            | Self::Unary
+            | Self::Binary
+            | Self::CallArgs
+            | Self::Named
+            | Self::Spread => SuccessionRule::UnsafeLayer,
+
+            // The closure is a bit magic with the let expression, and also it
+            // is not atomic.
+            Self::Closure | Self::ClosureParams => SuccessionRule::UnsafeLayer,
+
+            // Missing these creates errors for the parents.
+            Self::WithExpr | Self::ForPattern | Self::ImportItems => {
+                SuccessionRule::UnsafeLayer
+            }
+
+            // Only markup is expected at the points where it does occur. The
+            // indentation must be preserved as well, also for the children.
+            Self::Markup(_) => SuccessionRule::SameKind(None),
+
+            // These can appear everywhere and must not change to other stuff
+            // because that could change the outer expression.
+            Self::LineComment | Self::BlockComment => SuccessionRule::SameKind(None),
+
+            // These can appear as bodies and would trigger an error if they
+            // became something else.
+            Self::Template => SuccessionRule::SameKind(None),
+            Self::Block => SuccessionRule::SameKind(Some(TokenMode::Code)),
+
+            // Whitespace in code mode has to remain whitespace or else the type
+            // of things would change.
+            Self::Space(_) => SuccessionRule::SameKind(Some(TokenMode::Code)),
+
+            // These are expressions that can be replaced by other expressions.
+            Self::Ident(_)
+            | Self::Bool(_)
+            | Self::Int(_)
+            | Self::Float(_)
+            | Self::Length(_, _)
+            | Self::Angle(_, _)
+            | Self::Percentage(_)
+            | Self::Str(_)
+            | Self::Fraction(_)
+            | Self::Array
+            | Self::Dict
+            | Self::Group
+            | Self::None
+            | Self::Auto => SuccessionRule::AtomicPrimary,
+
+            // More complex, but still an expression.
+            Self::ForExpr
+            | Self::WhileExpr
+            | Self::IfExpr
+            | Self::LetExpr
+            | Self::SetExpr
+            | Self::ImportExpr
+            | Self::IncludeExpr => SuccessionRule::AtomicPrimary,
+
+            // This element always has to remain in the same column so better
+            // reparse the whole parent.
+            Self::Raw(_) => SuccessionRule::Unsafe,
+
+            // These are all replaceable by other tokens.
+            Self::Parbreak
+            | Self::Linebreak
+            | Self::Text(_)
+            | Self::TextInLine(_)
+            | Self::NonBreakingSpace
+            | Self::EnDash
+            | Self::EmDash
+            | Self::Escape(_)
+            | Self::Strong
+            | Self::Emph
+            | Self::Heading
+            | Self::Enum
+            | Self::List
+            | Self::Math(_) => SuccessionRule::Safe,
+        }
+    }
+
+    /// The appropriate precondition for the type.
+    pub fn neighbour_rule(&self) -> NeighbourRule {
+        match self {
+            Self::Heading | Self::Enum | Self::List => NeighbourRule::AtStart,
+            Self::TextInLine(_) => NeighbourRule::NotAtStart,
+            Self::Error(_, _) => NeighbourRule::NotAtEnd,
+            _ => NeighbourRule::None,
+        }
+    }
+}
+
+impl SuccessionRule {
+    /// Whether a node with this condition can be reparsed in markup mode.
+    pub fn safe_in_markup(&self) -> bool {
+        match self {
+            Self::Safe | Self::UnsafeLayer => true,
+            Self::SameKind(mode) => mode.map_or(false, |m| m != TokenMode::Markup),
+            _ => false,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::parse::parse;
+    use crate::source::SourceFile;
+
+    #[test]
+    #[rustfmt::skip]
+    fn test_incremental_parse() {
+        #[track_caller]
+        fn test(prev: &str, range: Range<usize>, with: &str, goal: Range<usize>) {
+            let mut source = SourceFile::detached(prev);
+            let range = source.edit(range, with);
+            assert_eq!(range, goal);
+            assert_eq!(parse(source.src()), *source.root());
+        }
+
+        // Test simple replacements.
+        test("hello world", 6 .. 11, "walkers", 5 .. 13);
+        test("some content", 0..12, "", 0..0);
+        test("", 0..0, "do it", 0..5);
+        test("a d e", 1 .. 3, " b c d", 0 .. 8);
+        test("a #f() e", 1 .. 6, " b c d", 0 .. 8);
+        test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9);
+        test("= A heading", 3 .. 3, "n evocative", 2 .. 22);
+        test("your thing", 5 .. 5, "a", 4 .. 11);
+        test("a your thing a", 6 .. 7, "a", 2 .. 12);
+        test("{call(); abc}", 7 .. 7, "[]", 0 .. 15);
+        test("#call() abc", 7 .. 7, "[]", 0 .. 10);
+        test("hi[\n- item\n- item 2\n    - item 3]", 11 .. 11, "  ", 3 .. 34);
+        test("hi\n- item\nno item\n    - item 3", 10 .. 10, "- ", 0 .. 32);
+        test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 16 .. 20);
+        test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33 .. 42);
+        test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 34 .. 39);
+        test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 1 .. 9);
+        test("{let i=1; for x in range(5) {i}}", 13 .. 14, "  ", 10 .. 32);
+        test("hello {x}", 6 .. 9, "#f()", 5 .. 10);
+        test("this is -- in my opinion -- spectacular", 8 .. 10, "---", 7 .. 12);
+        test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 37);
+        test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15);
+        test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34);
+
+        // Test the whitespace invariants.
+        test("hello \\ world", 7 .. 8, "a ", 6 .. 14);
+        test("hello \\ world", 7 .. 8, " a", 6 .. 14);
+        test("x = y", 1 .. 1, " + y", 0 .. 6);
+        test("x = y", 1 .. 1, " + y\n", 0 .. 10);
+        test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21);
+        test("abc\n= a heading\njoke", 3 .. 4, "\nnot ", 0 .. 19);
+        test("#let x = (1, 2 + ; Five\r\n\r", 19..22, "2.", 18..22);
+        test("hey #myfriend", 4 .. 4, "\\", 0 .. 14);
+        test("hey  #myfriend", 4 .. 4, "\\", 3 .. 6);
+
+        // Test type invariants.
+        test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22);
+        test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 15);
+        test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16);
+        test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 17);
+        test("{let x = 1 {5}}", 4 .. 4, " if", 0 .. 18);
+        test("a // b c #f()", 3 .. 4, "", 0 .. 12);
+        test("{\nf()\n//g(a)\n}", 6 .. 8, "", 0 .. 12);
+        test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13);
+        test("a #while x {\n g(x) \n}  b", 11 .. 11, "//", 0 .. 26);
+        test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14);
+        test("a b c", 1 .. 1, "{[}", 0 .. 8);
+
+        // Test unclosed things.
+        test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6);
+        test(r"this \u{abcd}", 8 .. 9, "", 5 .. 12);
+        test(r"this \u{abcd} that", 12 .. 13, "", 0 .. 17);
+        test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", 0 .. 24);
+        test("a b c", 1 .. 1, " /* letters */", 0 .. 16);
+        test("a b c", 1 .. 1, " /* letters", 0 .. 16);
+        test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35);
+        test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38);
+
+        // Test raw tokens.
+        test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20);
+        test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18);
+    }
+}
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 10aaad23..a9752645 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -1,10 +1,12 @@
 //! Parsing and tokenization.
 
+mod incremental;
 mod parser;
 mod resolve;
 mod scanner;
 mod tokens;
 
+pub use incremental::*;
 pub use parser::*;
 pub use resolve::*;
 pub use scanner::*;
@@ -14,10 +16,11 @@ use std::rc::Rc;
 
 use crate::syntax::ast::{Associativity, BinOp, UnOp};
 use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind};
+use crate::util::EcoString;
 
 /// Parse a source file.
 pub fn parse(src: &str) -> Rc<GreenNode> {
-    let mut p = Parser::new(src);
+    let mut p = Parser::new(src, TokenMode::Markup);
     markup(&mut p);
     match p.finish().into_iter().next() {
         Some(Green::Node(node)) => node,
@@ -25,9 +28,108 @@ pub fn parse(src: &str) -> Rc<GreenNode> {
     }
 }
 
+/// Parse an atomic primary. Returns `Some` if all of the input was consumed.
+pub fn parse_atomic(
+    prefix: &str,
+    src: &str,
+    _: bool,
+    _: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
+    primary(&mut p, true).ok()?;
+    p.consume_unterminated()
+}
+
+/// Parse an atomic primary. Returns `Some` if all of the input was consumed.
+pub fn parse_atomic_markup(
+    prefix: &str,
+    src: &str,
+    _: bool,
+    _: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup);
+    markup_expr(&mut p);
+    p.consume_unterminated()
+}
+
+/// Parse some markup. Returns `Some` if all of the input was consumed.
+pub fn parse_markup(
+    prefix: &str,
+    src: &str,
+    _: bool,
+    min_column: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup);
+    if min_column == 0 {
+        markup(&mut p);
+    } else {
+        markup_indented(&mut p, min_column);
+    }
+    p.consume()
+}
+
+/// Parse some markup without the topmost node. Returns `Some` if all of the
+/// input was consumed.
+pub fn parse_markup_elements(
+    prefix: &str,
+    src: &str,
+    mut at_start: bool,
+    _: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup);
+    while !p.eof() {
+        markup_node(&mut p, &mut at_start);
+    }
+    p.consume()
+}
+
+/// Parse a template literal. Returns `Some` if all of the input was consumed.
+pub fn parse_template(
+    prefix: &str,
+    src: &str,
+    _: bool,
+    _: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
+    if !p.at(&NodeKind::LeftBracket) {
+        return None;
+    }
+
+    template(&mut p);
+    p.consume()
+}
+
+/// Parse a code block. Returns `Some` if all of the input was consumed.
+pub fn parse_block(
+    prefix: &str,
+    src: &str,
+    _: bool,
+    _: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
+    if !p.at(&NodeKind::LeftBrace) {
+        return None;
+    }
+
+    block(&mut p);
+    p.consume()
+}
+
+/// Parse a comment. Returns `Some` if all of the input was consumed.
+pub fn parse_comment(
+    prefix: &str,
+    src: &str,
+    _: bool,
+    _: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
+    comment(&mut p).ok()?;
+    p.consume()
+}
+
 /// Parse markup.
 fn markup(p: &mut Parser) {
-    markup_while(p, true, &mut |_| true)
+    markup_while(p, true, 0, &mut |_| true)
 }
 
 /// Parse markup that stays right of the given column.
@@ -38,7 +140,7 @@ fn markup_indented(p: &mut Parser, column: usize) {
         _ => false,
     });
 
-    markup_while(p, false, &mut |p| match p.peek() {
+    markup_while(p, false, column, &mut |p| match p.peek() {
         Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column,
         _ => true,
     })
@@ -48,11 +150,11 @@ fn markup_indented(p: &mut Parser, column: usize) {
 ///
 /// If `at_start` is true, things like headings that may only appear at the
 /// beginning of a line or template are allowed.
-fn markup_while<F>(p: &mut Parser, mut at_start: bool, f: &mut F)
+fn markup_while<F>(p: &mut Parser, mut at_start: bool, column: usize, f: &mut F)
 where
     F: FnMut(&mut Parser) -> bool,
 {
-    p.perform(NodeKind::Markup, |p| {
+    p.perform(NodeKind::Markup(column), |p| {
         while !p.eof() && f(p) {
             markup_node(p, &mut at_start);
         }
@@ -98,14 +200,9 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
             p.eat();
         }
 
-        NodeKind::Eq if *at_start => heading(p),
-        NodeKind::Minus if *at_start => list_node(p),
-        NodeKind::EnumNumbering(_) if *at_start => enum_node(p),
-
-        // Line-based markup that is not currently at the start of the line.
-        NodeKind::Eq | NodeKind::Minus | NodeKind::EnumNumbering(_) => {
-            p.convert(NodeKind::Text(p.peek_src().into()));
-        }
+        NodeKind::Eq => heading(p, *at_start),
+        NodeKind::Minus => list_node(p, *at_start),
+        NodeKind::EnumNumbering(_) => enum_node(p, *at_start),
 
         // Hashtag + keyword / identifier.
         NodeKind::Ident(_)
@@ -115,17 +212,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
         | NodeKind::While
         | NodeKind::For
         | NodeKind::Import
-        | NodeKind::Include => {
-            let stmt = matches!(token, NodeKind::Let | NodeKind::Set | NodeKind::Import);
-            let group = if stmt { Group::Stmt } else { Group::Expr };
-
-            p.start_group(group);
-            let res = expr_prec(p, true, 0);
-            if stmt && res.is_ok() && !p.eof() {
-                p.expected_at("semicolon or line break");
-            }
-            p.end_group();
-        }
+        | NodeKind::Include => markup_expr(p),
 
         // Block and template.
         NodeKind::LeftBrace => block(p),
@@ -139,31 +226,65 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
 }
 
 /// Parse a heading.
-fn heading(p: &mut Parser) {
-    p.perform(NodeKind::Heading, |p| {
-        p.eat_assert(&NodeKind::Eq);
-        while p.eat_if(&NodeKind::Eq) {}
+fn heading(p: &mut Parser, at_start: bool) {
+    let marker = p.marker();
+    let current_start = p.current_start();
+    p.eat_assert(&NodeKind::Eq);
+    while p.eat_if(&NodeKind::Eq) {}
+
+    if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) {
         let column = p.column(p.prev_end());
         markup_indented(p, column);
-    });
+        marker.end(p, NodeKind::Heading);
+    } else {
+        let text = p.get(current_start .. p.prev_end()).into();
+        marker.convert(p, NodeKind::TextInLine(text));
+    }
 }
 
 /// Parse a single list item.
-fn list_node(p: &mut Parser) {
-    p.perform(NodeKind::List, |p| {
-        p.eat_assert(&NodeKind::Minus);
+fn list_node(p: &mut Parser, at_start: bool) {
+    let marker = p.marker();
+    let text: EcoString = p.peek_src().into();
+    p.eat_assert(&NodeKind::Minus);
+
+    if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) {
         let column = p.column(p.prev_end());
         markup_indented(p, column);
-    });
+        marker.end(p, NodeKind::List);
+    } else {
+        marker.convert(p, NodeKind::TextInLine(text));
+    }
 }
 
 /// Parse a single enum item.
-fn enum_node(p: &mut Parser) {
-    p.perform(NodeKind::Enum, |p| {
-        p.eat();
+fn enum_node(p: &mut Parser, at_start: bool) {
+    let marker = p.marker();
+    let text: EcoString = p.peek_src().into();
+    p.eat();
+
+    if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) {
         let column = p.column(p.prev_end());
         markup_indented(p, column);
-    });
+        marker.end(p, NodeKind::Enum);
+    } else {
+        marker.convert(p, NodeKind::TextInLine(text));
+    }
+}
+
+/// Parse an expression within markup mode.
+fn markup_expr(p: &mut Parser) {
+    if let Some(token) = p.peek() {
+        let stmt = matches!(token, NodeKind::Let | NodeKind::Set | NodeKind::Import);
+        let group = if stmt { Group::Stmt } else { Group::Expr };
+
+        p.start_group(group);
+        let res = expr_prec(p, true, 0);
+        if stmt && res.is_ok() && !p.eof() {
+            p.expected_at("semicolon or line break");
+        }
+        p.end_group();
+    }
 }
 
 /// Parse an expression.
@@ -183,13 +304,13 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
 
     // Start the unary expression.
     match p.peek().and_then(UnOp::from_token) {
-        Some(op) => {
+        Some(op) if !atomic => {
             p.eat();
             let prec = op.precedence();
             expr_prec(p, atomic, prec)?;
             marker.end(p, NodeKind::Unary);
         }
-        None => primary(p, atomic)?,
+        _ => primary(p, atomic)?,
     };
 
     loop {
@@ -254,7 +375,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult {
         }
 
         // Structures.
-        Some(NodeKind::LeftParen) => parenthesized(p),
+        Some(NodeKind::LeftParen) => parenthesized(p, atomic),
         Some(NodeKind::LeftBracket) => {
             template(p);
             Ok(())
@@ -315,7 +436,7 @@ fn literal(p: &mut Parser) -> bool {
 /// - Dictionary literal
 /// - Parenthesized expression
 /// - Parameter list of closure expression
-fn parenthesized(p: &mut Parser) -> ParseResult {
+fn parenthesized(p: &mut Parser, atomic: bool) -> ParseResult {
     let marker = p.marker();
 
     p.start_group(Group::Paren);
@@ -330,7 +451,7 @@ fn parenthesized(p: &mut Parser) -> ParseResult {
     }
 
     // Arrow means this is a closure's parameter list.
-    if p.at(&NodeKind::Arrow) {
+    if !atomic && p.at(&NodeKind::Arrow) {
         params(p, marker);
         p.eat_assert(&NodeKind::Arrow);
         return marker.perform(p, NodeKind::Closure, expr);
@@ -706,3 +827,14 @@ fn body(p: &mut Parser) -> ParseResult {
     }
     Ok(())
 }
+
+/// Parse a comment.
+fn comment(p: &mut Parser) -> ParseResult {
+    match p.peek() {
+        Some(NodeKind::LineComment | NodeKind::BlockComment) => {
+            p.eat();
+            Ok(())
+        }
+        _ => Err(ParseError),
+    }
+}
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index af8a7c5c..4e5b277d 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -1,7 +1,8 @@
+use core::slice::SliceIndex;
 use std::fmt::{self, Display, Formatter};
 use std::mem;
 
-use super::{TokenMode, Tokens};
+use super::{Scanner, TokenMode, Tokens};
 use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind};
 use crate::util::EcoString;
 
@@ -21,12 +22,17 @@ pub struct Parser<'s> {
     groups: Vec<GroupEntry>,
     /// The children of the currently built node.
     children: Vec<Green>,
+    /// Is `Some` if there is an unterminated group at the last position where
+    /// groups were terminated.
+    last_unterminated: Option<usize>,
+    /// Offsets the indentation on the first line of the source.
+    column_offset: usize,
 }
 
 impl<'s> Parser<'s> {
     /// Create a new parser for the source string.
-    pub fn new(src: &'s str) -> Self {
-        let mut tokens = Tokens::new(src, TokenMode::Markup);
+    pub fn new(src: &'s str, mode: TokenMode) -> Self {
+        let mut tokens = Tokens::new(src, mode);
         let current = tokens.next();
         Self {
             tokens,
@@ -36,14 +42,38 @@ impl<'s> Parser<'s> {
             current_start: 0,
             groups: vec![],
             children: vec![],
+            last_unterminated: None,
+            column_offset: 0,
         }
     }
 
+    /// Create a new parser for the source string that is prefixed by some text
+    /// that does not need to be parsed but taken into account for column
+    /// calculation.
+    pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self {
+        let mut p = Self::new(src, mode);
+        p.column_offset = Scanner::new(prefix).column(prefix.len());
+        p
+    }
+
     /// End the parsing process and return the last child.
     pub fn finish(self) -> Vec<Green> {
         self.children
     }
 
+    /// End the parsing process and return multiple children and whether the
+    /// last token was terminated.
+    pub fn consume(self) -> Option<(Vec<Green>, bool)> {
+        (self.eof() && self.terminated())
+            .then(|| (self.children, self.tokens.terminated()))
+    }
+
+    /// End the parsing process and return multiple children and whether the
+    /// last token was terminated, even if there remains stuff in the string.
+    pub fn consume_unterminated(self) -> Option<(Vec<Green>, bool)> {
+        self.terminated().then(|| (self.children, self.tokens.terminated()))
+    }
+
     /// Create a new marker.
     pub fn marker(&mut self) -> Marker {
         Marker(self.children.len())
@@ -170,6 +200,14 @@ impl<'s> Parser<'s> {
         self.tokens.scanner().get(self.current_start() .. self.current_end())
     }
 
+    /// Obtain a range of the source code.
+    pub fn get<I>(&self, index: I) -> &'s str
+    where
+        I: SliceIndex<str, Output = str>,
+    {
+        self.tokens.scanner().get(index)
+    }
+
     /// The byte index at which the last non-trivia token ended.
     pub fn prev_end(&self) -> usize {
         self.prev_end
@@ -187,7 +225,7 @@ impl<'s> Parser<'s> {
 
     /// Determine the column index for the given byte index.
     pub fn column(&self, index: usize) -> usize {
-        self.tokens.scanner().column(index)
+        self.tokens.scanner().column_offset(index, self.column_offset)
     }
 
     /// Continue parsing in a group.
@@ -225,6 +263,9 @@ impl<'s> Parser<'s> {
         let group = self.groups.pop().expect("no started group");
         self.tokens.set_mode(group.prev_mode);
         self.repeek();
+        if self.last_unterminated != Some(self.prev_end()) {
+            self.last_unterminated = None;
+        }
 
         let mut rescan = self.tokens.mode() != group_mode;
 
@@ -243,6 +284,7 @@ impl<'s> Parser<'s> {
                 rescan = false;
             } else if required {
                 self.push_error(format_eco!("expected {}", end));
+                self.last_unterminated = Some(self.prev_end());
             }
         }
 
@@ -260,6 +302,11 @@ impl<'s> Parser<'s> {
         }
     }
 
+    /// Checks if all groups were correctly terminated.
+    pub fn terminated(&self) -> bool {
+        self.groups.is_empty() && self.last_unterminated.is_none()
+    }
+
     /// Low-level bump that consumes exactly one token without special trivia
     /// handling.
     fn bump(&mut self) {
@@ -320,7 +367,8 @@ impl Parser<'_> {
     /// Push an error into the children list.
     pub fn push_error(&mut self, msg: impl Into<EcoString>) {
         let error = NodeKind::Error(ErrorPos::Full, msg.into());
-        self.children.push(GreenData::new(error, 0).into());
+        let idx = self.trivia_start();
+        self.children.insert(idx.0, GreenData::new(error, 0).into());
     }
 
     /// Eat the current token and add an error that it is unexpected.
@@ -419,6 +467,7 @@ impl Marker {
 }
 
 /// A logical group of tokens, e.g. `[...]`.
+#[derive(Debug)]
 struct GroupEntry {
     /// The kind of group this is. This decides which tokens will end the group.
     /// For example, a [`Group::Paren`] will be ended by
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
index c735be40..6db89132 100644
--- a/src/parse/scanner.rs
+++ b/src/parse/scanner.rs
@@ -162,11 +162,26 @@ impl<'s> Scanner<'s> {
     /// The column index of a given index in the source string.
     #[inline]
     pub fn column(&self, index: usize) -> usize {
-        self.src[.. index]
-            .chars()
+        self.column_offset(index, 0)
+    }
+
+    /// The column index of a given index in the source string when an offset is
+    /// applied to the first line of the string.
+    #[inline]
+    pub fn column_offset(&self, index: usize, offset: usize) -> usize {
+        let mut apply_offset = false;
+        let res = self.src[.. index]
+            .char_indices()
             .rev()
-            .take_while(|&c| !is_newline(c))
-            .count()
+            .take_while(|&(_, c)| !is_newline(c))
+            .inspect(|&(i, _)| {
+                if i == 0 {
+                    apply_offset = true
+                }
+            })
+            .count();
+
+        if apply_offset { res + offset } else { res }
     }
 }
 
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 27ec046d..69c4d2de 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -13,6 +13,7 @@ use crate::util::EcoString;
 pub struct Tokens<'s> {
     s: Scanner<'s>,
     mode: TokenMode,
+    terminated: bool,
 }
 
 /// What kind of tokens to emit.
@@ -28,7 +29,11 @@ impl<'s> Tokens<'s> {
     /// Create a new token iterator with the given mode.
     #[inline]
     pub fn new(src: &'s str, mode: TokenMode) -> Self {
-        Self { s: Scanner::new(src), mode }
+        Self {
+            s: Scanner::new(src),
+            mode,
+            terminated: true,
+        }
     }
 
     /// Get the current token mode.
@@ -63,6 +68,12 @@ impl<'s> Tokens<'s> {
     pub fn scanner(&self) -> Scanner<'s> {
         self.s
     }
+
+    /// Whether the last token was terminated.
+    #[inline]
+    pub fn terminated(&self) -> bool {
+        self.terminated
+    }
 }
 
 impl<'s> Iterator for Tokens<'s> {
@@ -117,9 +128,7 @@ impl<'s> Tokens<'s> {
             '`' => self.raw(),
             '$' => self.math(),
             '-' => self.hyph(),
-            '=' if self.s.check_or(true, |c| c == '=' || c.is_whitespace()) => {
-                NodeKind::Eq
-            }
+            '=' => NodeKind::Eq,
             c if c == '.' || c.is_ascii_digit() => self.numbering(start, c),
 
             // Plain text.
@@ -248,6 +257,7 @@ impl<'s> Tokens<'s> {
                             )
                         }
                     } else {
+                        self.terminated = false;
                         NodeKind::Error(
                             ErrorPos::End,
                             "expected closing brace".into(),
@@ -281,10 +291,8 @@ impl<'s> Tokens<'s> {
             } else {
                 NodeKind::EnDash
             }
-        } else if self.s.check_or(true, char::is_whitespace) {
-            NodeKind::Minus
         } else {
-            NodeKind::Text('-'.into())
+            NodeKind::Minus
         }
     }
 
@@ -300,11 +308,7 @@ impl<'s> Tokens<'s> {
             None
         };
 
-        if self.s.check_or(true, char::is_whitespace) {
-            NodeKind::EnumNumbering(number)
-        } else {
-            NodeKind::Text(self.s.eaten_from(start).into())
-        }
+        NodeKind::EnumNumbering(number)
     }
 
     fn raw(&mut self) -> NodeKind {
@@ -346,6 +350,7 @@ impl<'s> Tokens<'s> {
             let remaining = backticks - found;
             let noun = if remaining == 1 { "backtick" } else { "backticks" };
 
+            self.terminated = false;
             NodeKind::Error(
                 ErrorPos::End,
                 if found == 0 {
@@ -393,6 +398,7 @@ impl<'s> Tokens<'s> {
                 display,
             }))
         } else {
+            self.terminated = false;
             NodeKind::Error(
                 ErrorPos::End,
                 if !display || (!escaped && dollar) {
@@ -481,18 +487,23 @@ impl<'s> Tokens<'s> {
         if self.s.eat_if('"') {
             NodeKind::Str(string)
         } else {
+            self.terminated = false;
             NodeKind::Error(ErrorPos::End, "expected quote".into())
         }
     }
 
     fn line_comment(&mut self) -> NodeKind {
         self.s.eat_until(is_newline);
+        if self.s.peek().is_none() {
+            self.terminated = false;
+        }
         NodeKind::LineComment
     }
 
     fn block_comment(&mut self) -> NodeKind {
         let mut state = '_';
         let mut depth = 1;
+        self.terminated = false;
 
         // Find the first `*/` that does not correspond to a nested `/*`.
         while let Some(c) = self.s.eat() {
@@ -500,6 +511,7 @@ impl<'s> Tokens<'s> {
                 ('*', '/') => {
                     depth -= 1;
                     if depth == 0 {
+                        self.terminated = true;
                         break;
                     }
                     '_'
@@ -713,6 +725,7 @@ mod tests {
         t!(Both["a1/"]: "  \n"         => Space(1));
         t!(Both["a1/"]: "  \n   "      => Space(1));
         t!(Both["a1/"]: "\r\n"         => Space(1));
+        t!(Both["a1/"]: "\r\n\r"       => Space(2));
         t!(Both["a1/"]: "  \n\t \n  "  => Space(2));
         t!(Both["a1/"]: "\n\r"         => Space(2));
         t!(Both["a1/"]: " \r\r\n \x0D" => Space(3));
@@ -722,12 +735,12 @@ mod tests {
     fn test_tokenize_text() {
         // Test basic text.
         t!(Markup[" /"]: "hello"       => Text("hello"));
-        t!(Markup[" /"]: "hello-world" => Text("hello"), Text("-"), Text("world"));
+        t!(Markup[" /"]: "hello-world" => Text("hello"), Minus, Text("world"));
 
         // Test code symbols in text.
         t!(Markup[" /"]: "a():\"b" => Text("a():\"b"));
         t!(Markup[" /"]: ";:,|/+"  => Text(";:,|"), Text("/+"));
-        t!(Markup[" /"]: "=-a"     => Text("="), Text("-"), Text("a"));
+        t!(Markup[" /"]: "=-a"     => Eq, Minus, Text("a"));
         t!(Markup[" "]: "#123"     => Text("#"), Text("123"));
 
         // Test text ends.
@@ -784,7 +797,7 @@ mod tests {
         t!(Markup["a1/"]: "- "  => Minus, Space(0));
         t!(Markup[" "]: "."     => EnumNumbering(None));
         t!(Markup[" "]: "1."    => EnumNumbering(Some(1)));
-        t!(Markup[" "]: "1.a"   => Text("1."), Text("a"));
+        t!(Markup[" "]: "1.a"   => EnumNumbering(Some(1)), Text("a"));
         t!(Markup[" /"]: "a1."  => Text("a1."));
     }
 
diff --git a/src/source.rs b/src/source.rs
index 432688a0..7afeaa8a 100644
--- a/src/source.rs
+++ b/src/source.rs
@@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize};
 
 use crate::diag::TypResult;
 use crate::loading::{FileHash, Loader};
-use crate::parse::{is_newline, parse, Scanner};
+use crate::parse::{is_newline, parse, Reparser, Scanner};
 use crate::syntax::ast::Markup;
 use crate::syntax::{self, Category, GreenNode, RedNode};
 use crate::util::PathExt;
@@ -154,9 +154,14 @@ impl SourceFile {
         &self.root
     }
 
+    /// The root red node of the file's untyped red tree.
+    pub fn red(&self) -> RedNode {
+        RedNode::from_root(self.root.clone(), self.id)
+    }
+
     /// The root node of the file's typed abstract syntax tree.
     pub fn ast(&self) -> TypResult<Markup> {
-        let red = RedNode::from_root(self.root.clone(), self.id);
+        let red = self.red();
         let errors = red.errors();
         if errors.is_empty() {
             Ok(red.cast().unwrap())
@@ -265,10 +270,11 @@ impl SourceFile {
 
     /// Edit the source file by replacing the given range.
     ///
-    /// This panics if the `replace` range is out of bounds.
-    pub fn edit(&mut self, replace: Range<usize>, with: &str) {
+    /// Returns the range of the section in the new source that was ultimately
+    /// reparsed. The method panics if the `replace` range is out of bounds.
+    pub fn edit(&mut self, replace: Range<usize>, with: &str) -> Range<usize> {
         let start = replace.start;
-        self.src.replace_range(replace, with);
+        self.src.replace_range(replace.clone(), with);
 
         // Remove invalidated line starts.
         let line = self.byte_to_line(start).unwrap();
@@ -283,8 +289,8 @@ impl SourceFile {
         self.line_starts
             .extend(newlines(&self.src[start ..]).map(|idx| start + idx));
 
-        // Reparse.
-        self.root = parse(&self.src);
+        // Incrementally reparse the replaced range.
+        Reparser::new(&self.src, replace, with.len()).reparse(&mut self.root)
     }
 
     /// Provide highlighting categories for the given range of the source file.
diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs
index ae8ecdc9..bea4ef00 100644
--- a/src/syntax/ast.rs
+++ b/src/syntax/ast.rs
@@ -53,7 +53,7 @@ macro_rules! node {
 
 node! {
     /// The syntactical root capable of representing a full parsed document.
-    Markup
+    Markup: NodeKind::Markup(_)
 }
 
 impl Markup {
@@ -65,7 +65,9 @@ impl Markup {
             NodeKind::Parbreak => Some(MarkupNode::Parbreak),
             NodeKind::Strong => Some(MarkupNode::Strong),
             NodeKind::Emph => Some(MarkupNode::Emph),
-            NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())),
+            NodeKind::Text(s) | NodeKind::TextInLine(s) => {
+                Some(MarkupNode::Text(s.clone()))
+            }
             NodeKind::Escape(c) => Some(MarkupNode::Text((*c).into())),
             NodeKind::EnDash => Some(MarkupNode::Text('\u{2013}'.into())),
             NodeKind::EmDash => Some(MarkupNode::Text('\u{2014}'.into())),
diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs
index 85fbef12..9f7365a8 100644
--- a/src/syntax/highlight.rs
+++ b/src/syntax/highlight.rs
@@ -154,10 +154,11 @@ impl Category {
             NodeKind::Str(_) => Some(Category::String),
             NodeKind::Error(_, _) => Some(Category::Invalid),
             NodeKind::Unknown(_) => Some(Category::Invalid),
-            NodeKind::Markup => None,
+            NodeKind::Markup(_) => None,
             NodeKind::Space(_) => None,
             NodeKind::Parbreak => None,
             NodeKind::Text(_) => None,
+            NodeKind::TextInLine(_) => None,
             NodeKind::List => None,
             NodeKind::Enum => None,
             NodeKind::Array => None,
diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs
index d9ad42a8..3a0f3a5e 100644
--- a/src/syntax/mod.rs
+++ b/src/syntax/mod.rs
@@ -6,6 +6,7 @@ mod pretty;
 mod span;
 
 use std::fmt::{self, Debug, Display, Formatter};
+use std::ops::Range;
 use std::rc::Rc;
 
 pub use highlight::*;
@@ -15,6 +16,7 @@ pub use span::*;
 use self::ast::{MathNode, RawNode, TypedNode};
 use crate::diag::Error;
 use crate::geom::{AngularUnit, LengthUnit};
+use crate::parse::TokenMode;
 use crate::source::SourceId;
 use crate::util::EcoString;
 
@@ -62,6 +64,14 @@ impl Green {
         }
     }
 
+    /// Whether the node is a leaf node in the green tree.
+    pub fn is_leaf(&self) -> bool {
+        match self {
+            Green::Node(n) => n.children().is_empty(),
+            Green::Token(_) => true,
+        }
+    }
+
     /// Change the type of the node.
     pub fn convert(&mut self, kind: NodeKind) {
         match self {
@@ -127,6 +137,52 @@ impl GreenNode {
     pub fn children(&self) -> &[Green] {
         &self.children
     }
+
+    /// The node's metadata.
+    fn data(&self) -> &GreenData {
+        &self.data
+    }
+
+    /// The node's type.
+    pub fn kind(&self) -> &NodeKind {
+        self.data().kind()
+    }
+
+    /// The node's length.
+    pub fn len(&self) -> usize {
+        self.data().len()
+    }
+
+    /// The node's children, mutably.
+    pub(crate) fn children_mut(&mut self) -> &mut [Green] {
+        &mut self.children
+    }
+
+    /// Replaces a range of children with some replacement.
+    pub(crate) fn replace_children(
+        &mut self,
+        range: Range<usize>,
+        replacement: Vec<Green>,
+    ) {
+        let superseded = &self.children[range.clone()];
+        let superseded_len: usize = superseded.iter().map(Green::len).sum();
+        let replacement_len: usize = replacement.iter().map(Green::len).sum();
+
+        // If we're erroneous, but not due to the superseded range, then we will
+        // still be erroneous after the replacement.
+        let still_erroneous = self.erroneous && !superseded.iter().any(Green::erroneous);
+
+        self.children.splice(range, replacement);
+        self.data.len = self.data.len + replacement_len - superseded_len;
+        self.erroneous = still_erroneous || self.children.iter().any(Green::erroneous);
+    }
+
+    /// Update the length of this node given the old and new length of
+    /// replaced children.
+    pub(crate) fn update_parent(&mut self, new_len: usize, old_len: usize) {
+        self.data.len = self.data.len() + new_len - old_len;
+        self.erroneous = self.children.iter().any(Green::erroneous);
+    }
 }
 
 impl From<GreenNode> for Green {
@@ -266,7 +322,7 @@ impl Debug for RedNode {
     }
 }
 
-/// A borrowed wrapper for a green node with span information.
+/// A borrowed wrapper for a [`GreenNode`] with span information.
 ///
 /// Borrowed variant of [`RedNode`]. Can be [cast](Self::cast) to an AST node.
 #[derive(Copy, Clone, PartialEq)]
@@ -301,6 +357,11 @@ impl<'a> RedRef<'a> {
         Span::new(self.id, self.offset, self.offset + self.green.len())
     }
 
+    /// Whether the node is a leaf node.
+    pub fn is_leaf(self) -> bool {
+        self.green.is_leaf()
+    }
+
     /// The error messages for this node and its descendants.
     pub fn errors(self) -> Vec<Error> {
         if !self.green.erroneous() {
@@ -325,6 +386,15 @@ impl<'a> RedRef<'a> {
         }
     }
 
+    /// Returns all leaf descendants of this node (may include itself).
+    pub fn leafs(self) -> Vec<Self> {
+        if self.is_leaf() {
+            vec![self]
+        } else {
+            self.children().flat_map(Self::leafs).collect()
+        }
+    }
+
     /// Convert the node to a typed AST node.
     pub fn cast<T>(self) -> Option<T>
     where
@@ -502,8 +572,8 @@ pub enum NodeKind {
     Include,
     /// The `from` keyword.
     From,
-    /// Template markup.
-    Markup,
+    /// Template markup of which all lines must start in some column.
+    Markup(usize),
     /// One or more whitespace characters.
     Space(usize),
     /// A forced line break: `\`.
@@ -512,6 +582,8 @@ pub enum NodeKind {
     Parbreak,
     /// A consecutive non-markup string.
     Text(EcoString),
+    /// A text node that cannot appear at the beginning of a source line.
+    TextInLine(EcoString),
     /// A non-breaking space: `~`.
     NonBreakingSpace,
     /// An en-dash: `--`.
@@ -648,11 +720,71 @@ impl NodeKind {
         matches!(self, Self::LeftParen | Self::RightParen)
     }
 
+    /// Whether this is whitespace.
+    pub fn is_whitespace(&self) -> bool {
+        matches!(self, Self::Space(_) | Self::Parbreak)
+    }
+
+    /// Whether this is trivia.
+    pub fn is_trivia(&self) -> bool {
+        self.is_whitespace() || matches!(self, Self::LineComment | Self::BlockComment)
+    }
+
     /// Whether this is some kind of error.
     pub fn is_error(&self) -> bool {
         matches!(self, NodeKind::Error(_, _) | NodeKind::Unknown(_))
     }
 
+    /// Whether this node is `at_start` given the previous value of the property.
+    pub fn is_at_start(&self, prev: bool) -> bool {
+        match self {
+            Self::Space(n) if *n > 0 => true,
+            Self::Parbreak => true,
+            Self::LineComment | Self::BlockComment => prev,
+            _ => false,
+        }
+    }
+
+    /// Whether this token appears in Markup.
+    pub fn mode(&self) -> Option<TokenMode> {
+        match self {
+            Self::Markup(_)
+            | Self::Linebreak
+            | Self::Parbreak
+            | Self::Text(_)
+            | Self::TextInLine(_)
+            | Self::NonBreakingSpace
+            | Self::EnDash
+            | Self::EmDash
+            | Self::Escape(_)
+            | Self::Strong
+            | Self::Emph
+            | Self::Heading
+            | Self::Enum
+            | Self::EnumNumbering(_)
+            | Self::List
+            | Self::Raw(_)
+            | Self::Math(_) => Some(TokenMode::Markup),
+            Self::Template
+            | Self::Space(_)
+            | Self::Block
+            | Self::Ident(_)
+            | Self::LetExpr
+            | Self::IfExpr
+            | Self::WhileExpr
+            | Self::ForExpr
+            | Self::ImportExpr
+            | Self::Call
+            | Self::IncludeExpr
+            | Self::LineComment
+            | Self::BlockComment
+            | Self::Error(_, _)
+            | Self::Minus
+            | Self::Eq => None,
+            _ => Some(TokenMode::Code),
+        }
+    }
+
     /// A human-readable name for the kind.
     pub fn as_str(&self) -> &'static str {
         match self {
@@ -701,11 +833,11 @@ impl NodeKind {
             Self::Import => "keyword `import`",
             Self::Include => "keyword `include`",
             Self::From => "keyword `from`",
-            Self::Markup => "markup",
+            Self::Markup(_) => "markup",
             Self::Space(_) => "space",
             Self::Linebreak => "forced linebreak",
             Self::Parbreak => "paragraph break",
-            Self::Text(_) => "text",
+            Self::Text(_) | Self::TextInLine(_) => "text",
             Self::NonBreakingSpace => "non-breaking space",
             Self::EnDash => "en dash",
             Self::EmDash => "em dash",
diff --git a/tests/typ/code/block.typ b/tests/typ/code/block.typ
index 45ee9204..5939ba9c 100644
--- a/tests/typ/code/block.typ
+++ b/tests/typ/code/block.typ
@@ -129,7 +129,7 @@
 }
 
 ---
-// Error: 2:1 expected closing brace
+// Error: 2 expected closing brace
 {
 
 ---
diff --git a/tests/typ/code/let.typ b/tests/typ/code/let.typ
index 7fd6e0da..a95d651a 100644
--- a/tests/typ/code/let.typ
+++ b/tests/typ/code/let.typ
@@ -57,7 +57,7 @@ Three
 
 // Terminated by semicolon even though we are in a paren group.
 // Error: 18 expected expression
-// Error: 19 expected closing paren
+// Error: 18 expected closing paren
 #let v5 = (1, 2 + ; Five
 
 ---
diff --git a/tests/typeset.rs b/tests/typeset.rs
index 164ccc91..b1296886 100644
--- a/tests/typeset.rs
+++ b/tests/typeset.rs
@@ -1,6 +1,7 @@
 use std::env;
 use std::ffi::OsStr;
 use std::fs;
+use std::ops::Range;
 use std::path::Path;
 use std::rc::Rc;
 
@@ -186,6 +187,7 @@ fn test(
     let mut line = 0;
     let mut compare_ref = true;
     let mut compare_ever = false;
+    let mut rng = LinearShift::new();
 
     let parts: Vec<_> = src.split("\n---").collect();
     for (i, &part) in parts.iter().enumerate() {
@@ -202,8 +204,16 @@ fn test(
                 }
             }
         } else {
-            let (part_ok, compare_here, part_frames) =
-                test_part(ctx, src_path, part.into(), i, compare_ref, line, debug);
+            let (part_ok, compare_here, part_frames) = test_part(
+                ctx,
+                src_path,
+                part.into(),
+                i,
+                compare_ref,
+                line,
+                debug,
+                &mut rng,
+            );
             ok &= part_ok;
             compare_ever |= compare_here;
             frames.extend(part_frames);
@@ -252,14 +262,15 @@ fn test_part(
     compare_ref: bool,
     line: usize,
     debug: bool,
+    rng: &mut LinearShift,
 ) -> (bool, bool, Vec<Rc<Frame>>) {
     let id = ctx.sources.provide(src_path, src);
     let source = ctx.sources.get(id);
 
     let (local_compare_ref, mut ref_errors) = parse_metadata(&source);
     let compare_ref = local_compare_ref.unwrap_or(compare_ref);
+    let mut ok = test_reparse(ctx.sources.get(id).src(), i, rng);
 
-    let mut ok = true;
     let (frames, mut errors) = match ctx.evaluate(id) {
         Ok(module) => {
             let tree = module.into_root();
@@ -366,6 +377,104 @@ fn test_incremental(
     ok
 }
 
+/// Pseudorandomly edit the source file and test whether a reparse produces the
+/// same result as a clean parse.
+///
+/// The method will first inject 10 strings once every 400 source characters
+/// and then select 5 leaf node boundries to inject an additional, randomly
+/// chosen string from the injection list.
+fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool {
+    let supplements = [
+        "[",
+        ")",
+        "#rect()",
+        "a word",
+        ", a: 1",
+        "10.0",
+        ":",
+        "if i == 0 {true}",
+        "for",
+        "* hello *",
+        "//",
+        "/*",
+        "\\u{12e4}",
+        "```typst",
+        " ",
+        "trees",
+        "\\",
+        "$ a $",
+        "2.",
+        "-",
+        "5",
+    ];
+
+    let mut ok = true;
+
+    let apply = |replace: std::ops::Range<usize>, with| {
+        let mut incr_source = SourceFile::detached(src);
+        if incr_source.root().len() != src.len() {
+            println!(
+                "    Subtest {} tree length {} does not match string length {} ❌",
+                i,
+                incr_source.root().len(),
+                src.len(),
+            );
+            return false;
+        }
+
+        incr_source.edit(replace.clone(), with);
+        let edited_src = incr_source.src();
+
+        let ref_source = SourceFile::detached(edited_src);
+        let incr_root = incr_source.root();
+        let ref_root = ref_source.root();
+        if incr_root != ref_root {
+            println!(
+                "    Subtest {} reparse differs from clean parse when inserting '{}' at {}-{} ❌",
+                i, with, replace.start, replace.end,
+            );
+            println!(
+                "\n    Expected reference tree:\n{:#?}\n\n    Found incremental tree:\n{:#?}",
+                ref_root, incr_root
+            );
+            println!("Full source ({}):\n\"{:?}\"", edited_src.len(), edited_src);
+            false
+        } else {
+            true
+        }
+    };
+
+    let mut pick = |range: Range<usize>| {
+        let ratio = rng.next();
+        (range.start as f64 + ratio * (range.end - range.start) as f64).floor() as usize
+    };
+
+    let insertions = (src.len() as f64 / 400.0).ceil() as usize;
+
+    for _ in 0 .. insertions {
+        let supplement = supplements[pick(0 .. supplements.len())];
+        let start = pick(0 .. src.len());
+        let end = pick(start .. src.len());
+
+        if !src.is_char_boundary(start) || !src.is_char_boundary(end) {
+            continue;
+        }
+
+        ok &= apply(start .. end, supplement);
+    }
+
+    let red = SourceFile::detached(src).red();
+
+    let leafs = red.as_ref().leafs();
+
+    let leaf_start = leafs[pick(0 .. leafs.len())].span().start;
+    let supplement = supplements[pick(0 .. supplements.len())];
+
+    ok &= apply(leaf_start .. leaf_start, supplement);
+
+    ok
+}
+
 fn parse_metadata(source: &SourceFile) -> (Option<bool>, Vec<Error>) {
     let mut compare_ref = None;
     let mut errors = vec![];
@@ -823,3 +932,24 @@ where
     FileDescriptor::redirect_stdio(&stdout, Stdout).unwrap();
     result
 }
+
+/// This is an Linear-feedback shift register using XOR as its shifting
+/// function. It can be used as PRNG.
+struct LinearShift(u64);
+
+impl LinearShift {
+    /// Initialize the shift register with a pre-set seed.
+    pub fn new() -> Self {
+        Self(0xACE5)
+    }
+
+    /// Return a pseudo-random number between `0.0` and `1.0`.
+    pub fn next(&mut self) -> f64 {
+        self.0 ^= self.0 >> 3;
+        self.0 ^= self.0 << 14;
+        self.0 ^= self.0 >> 28;
+        self.0 ^= self.0 << 36;
+        self.0 ^= self.0 >> 52;
+        self.0 as f64 / u64::MAX as f64
+    }
+}
author	Laurenz <laurmaedje@gmail.com>	2022-01-04 00:27:05 +0100
committer	GitHub <noreply@github.com>	2022-01-04 00:27:05 +0100
commit	4c81a5d43eabd959dbb500a8076f99f21bd037bd (patch)
tree	03349230f74786c7128876889c07a31a4932f108
parent	52761a3baa901865b1fc42366017740cfa7eb566 (diff)
parent	c994cfa7d814e3909682b19322867ed5c676c453 (diff)