Merge some modules

author: Laurenz <laurmaedje@gmail.com> 2022-10-17 19:26:24 +0200
committer: Laurenz <laurmaedje@gmail.com> 2022-10-17 20:04:22 +0200
commit: e21822665591dc19766275da1e185215a6b945ef (patch)
tree: 7788e211c3c33c8b5a8ad7d5eb7574e33631eb16 /src/syntax
parent: 4fd031a256b2ecfe524859d5599fafb386395572 (diff)
10 files changed, 4638 insertions, 548 deletions
diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs
index bfb36078..325b7274 100644
--- a/src/syntax/highlight.rs
+++ b/src/syntax/highlight.rs
@@ -6,7 +6,7 @@ use std::ops::Range;
 use syntect::highlighting::{Color, FontStyle, Highlighter, Style, Theme};
 use syntect::parsing::Scope;
 
-use super::{NodeKind, SyntaxNode};
+use super::{parse, NodeKind, SyntaxNode};
 
 /// Highlight source text into a standalone HTML document.
 pub fn highlight_html(text: &str, theme: &Theme) -> String {
@@ -28,7 +28,7 @@ pub fn highlight_pre(text: &str, theme: &Theme) -> String {
     let mut buf = String::new();
     buf.push_str("<pre>\n");
 
-    let root = crate::parse::parse(text);
+    let root = parse(text);
     highlight_themed(&root, theme, |range, style| {
         let styled = style != Style::default();
         if styled {
@@ -401,8 +401,8 @@ impl Category {
 
 #[cfg(test)]
 mod tests {
+    use super::super::Source;
     use super::*;
-    use crate::source::Source;
 
     #[test]
     fn test_highlighting() {
diff --git a/src/syntax/incremental.rs b/src/syntax/incremental.rs
new file mode 100644
index 00000000..529defd7
--- /dev/null
+++ b/src/syntax/incremental.rs
@@ -0,0 +1,518 @@
+use std::ops::Range;
+use std::sync::Arc;
+
+use super::{
+    is_newline, parse, reparse_code_block, reparse_content_block,
+    reparse_markup_elements, InnerNode, NodeKind, Span, SyntaxNode,
+};
+
+/// Refresh the given syntax node with as little parsing as possible.
+///
+/// Takes the new source, the range in the old source that was replaced and the
+/// length of the replacement.
+///
+/// Returns the range in the new source that was ultimately reparsed.
+pub fn reparse(
+    root: &mut SyntaxNode,
+    text: &str,
+    replaced: Range<usize>,
+    replacement_len: usize,
+) -> Range<usize> {
+    if let SyntaxNode::Inner(inner) = root {
+        let change = Change { text, replaced, replacement_len };
+        if let Some(range) = try_reparse(&change, Arc::make_mut(inner), 0, true, true) {
+            return range;
+        }
+    }
+
+    let id = root.span().source();
+    *root = parse(text);
+    root.numberize(id, Span::FULL).unwrap();
+    0 .. text.len()
+}
+
+/// Try to reparse inside the given node.
+fn try_reparse(
+    change: &Change,
+    node: &mut InnerNode,
+    mut offset: usize,
+    outermost: bool,
+    safe_to_replace: bool,
+) -> Option<Range<usize>> {
+    let is_markup = matches!(node.kind(), NodeKind::Markup { .. });
+    let original_count = node.children().len();
+    let original_offset = offset;
+
+    let mut search = SearchState::default();
+    let mut ahead: Option<Ahead> = None;
+
+    // Whether the first node that should be replaced is at start.
+    let mut at_start = true;
+
+    // Whether the last searched child is the outermost child.
+    let mut child_outermost = false;
+
+    // Find the the first child in the range of children to reparse.
+    for (i, child) in node.children().enumerate() {
+        let pos = NodePos { idx: i, offset };
+        let child_span = offset .. offset + child.len();
+        child_outermost = outermost && i + 1 == original_count;
+
+        match search {
+            SearchState::NoneFound => {
+                // The edit is contained within the span of the current element.
+                if child_span.contains(&change.replaced.start)
+                    && child_span.end >= change.replaced.end
+                {
+                    // In Markup mode, we want to consider a non-whitespace
+                    // neighbor if the edit is on the node boundary.
+                    search = if is_markup && child_span.end == change.replaced.end {
+                        SearchState::RequireNonTrivia(pos)
+                    } else {
+                        SearchState::Contained(pos)
+                    };
+                } else if child_span.contains(&change.replaced.start) {
+                    search = SearchState::Inside(pos);
+                } else if child_span.end == change.replaced.start
+                    && change.replaced.start == change.replaced.end
+                    && child_outermost
+                {
+                    search = SearchState::SpanFound(pos, pos);
+                } else {
+                    // Update compulsary state of `ahead_nontrivia`.
+                    if let Some(ahead_nontrivia) = ahead.as_mut() {
+                        if let NodeKind::Space { newlines: (1 ..) } = child.kind() {
+                            ahead_nontrivia.newline();
+                        }
+                    }
+
+                    // We look only for non spaces, non-semicolon and also
+                    // reject text that points to the special case for URL
+                    // evasion and line comments.
+                    if !child.kind().is_space()
+                        && child.kind() != &NodeKind::Semicolon
+                        && child.kind() != &NodeKind::Text('/'.into())
+                        && (ahead.is_none() || change.replaced.start > child_span.end)
+                        && !ahead.map_or(false, Ahead::is_compulsory)
+                    {
+                        ahead = Some(Ahead::new(pos, at_start, is_bounded(child.kind())));
+                    }
+
+                    at_start = next_at_start(child.kind(), at_start);
+                }
+            }
+            SearchState::Inside(start) => {
+                if child_span.end == change.replaced.end {
+                    search = SearchState::RequireNonTrivia(start);
+                } else if child_span.end > change.replaced.end {
+                    search = SearchState::SpanFound(start, pos);
+                }
+            }
+            SearchState::RequireNonTrivia(start) => {
+                if !child.kind().is_trivia() {
+                    search = SearchState::SpanFound(start, pos);
+                }
+            }
+            _ => unreachable!(),
+        }
+
+        offset += child.len();
+
+        if search.done().is_some() {
+            break;
+        }
+    }
+
+    // If we were looking for a non-whitespace element and hit the end of
+    // the file here, we instead use EOF as the end of the span.
+    if let SearchState::RequireNonTrivia(start) = search {
+        search = SearchState::SpanFound(start, NodePos {
+            idx: node.children().len() - 1,
+            offset: offset - node.children().last().unwrap().len(),
+        })
+    }
+
+    if let SearchState::Contained(pos) = search {
+        // Do not allow replacement of elements inside of constructs whose
+        // opening and closing brackets look the same.
+        let safe_inside = is_bounded(node.kind());
+        let child = &mut node.children_mut()[pos.idx];
+        let prev_len = child.len();
+        let prev_descendants = child.descendants();
+
+        if let Some(range) = match child {
+            SyntaxNode::Inner(node) => try_reparse(
+                change,
+                Arc::make_mut(node),
+                pos.offset,
+                child_outermost,
+                safe_inside,
+            ),
+            SyntaxNode::Leaf(_) => None,
+        } {
+            let new_len = child.len();
+            let new_descendants = child.descendants();
+            node.update_parent(prev_len, new_len, prev_descendants, new_descendants);
+            return Some(range);
+        }
+
+        let superseded_span = pos.offset .. pos.offset + prev_len;
+        let func: Option<ReparseMode> = match child.kind() {
+            NodeKind::CodeBlock => Some(ReparseMode::Code),
+            NodeKind::ContentBlock => Some(ReparseMode::Content),
+            _ => None,
+        };
+
+        // Return if the element was reparsable on its own, otherwise try to
+        // treat it as a markup element.
+        if let Some(func) = func {
+            if let Some(result) = replace(
+                change,
+                node,
+                func,
+                pos.idx .. pos.idx + 1,
+                superseded_span,
+                outermost,
+            ) {
+                return Some(result);
+            }
+        }
+    }
+
+    // Make sure this is a markup node and that we may replace. If so, save
+    // the current indent.
+    let min_indent = match node.kind() {
+        NodeKind::Markup { min_indent } if safe_to_replace => *min_indent,
+        _ => return None,
+    };
+
+    let (mut start, end) = search.done()?;
+    if let Some(ahead) = ahead {
+        if start.offset == change.replaced.start || ahead.is_compulsory() {
+            start = ahead.pos;
+            at_start = ahead.at_start;
+        }
+    } else {
+        start = NodePos { idx: 0, offset: original_offset };
+    }
+
+    let superseded_span =
+        start.offset .. end.offset + node.children().as_slice()[end.idx].len();
+
+    replace(
+        change,
+        node,
+        ReparseMode::MarkupElements { at_start, min_indent },
+        start.idx .. end.idx + 1,
+        superseded_span,
+        outermost,
+    )
+}
+
+/// Reparse the superseded nodes and replace them.
+fn replace(
+    change: &Change,
+    node: &mut InnerNode,
+    mode: ReparseMode,
+    superseded_idx: Range<usize>,
+    superseded_span: Range<usize>,
+    outermost: bool,
+) -> Option<Range<usize>> {
+    let superseded_start = superseded_idx.start;
+
+    let differential: isize =
+        change.replacement_len as isize - change.replaced.len() as isize;
+    let newborn_end = (superseded_span.end as isize + differential) as usize;
+    let newborn_span = superseded_span.start .. newborn_end;
+
+    let mut prefix = "";
+    for (i, c) in change.text[.. newborn_span.start].char_indices().rev() {
+        if is_newline(c) {
+            break;
+        }
+        prefix = &change.text[i .. newborn_span.start];
+    }
+
+    let (newborns, terminated, amount) = match mode {
+        ReparseMode::Code => reparse_code_block(
+            &prefix,
+            &change.text[newborn_span.start ..],
+            newborn_span.len(),
+        ),
+        ReparseMode::Content => reparse_content_block(
+            &prefix,
+            &change.text[newborn_span.start ..],
+            newborn_span.len(),
+        ),
+        ReparseMode::MarkupElements { at_start, min_indent } => reparse_markup_elements(
+            &prefix,
+            &change.text[newborn_span.start ..],
+            newborn_span.len(),
+            differential,
+            &node.children().as_slice()[superseded_start ..],
+            at_start,
+            min_indent,
+        ),
+    }?;
+
+    // Do not accept unclosed nodes if the old node wasn't at the right edge
+    // of the tree.
+    if !outermost && !terminated {
+        return None;
+    }
+
+    node.replace_children(superseded_start .. superseded_start + amount, newborns)
+        .ok()?;
+
+    Some(newborn_span)
+}
+
+/// A description of a change.
+struct Change<'a> {
+    /// The new source code, with the change applied.
+    text: &'a str,
+    /// Which range in the old source file was changed.
+    replaced: Range<usize>,
+    /// How many characters replaced the text in `replaced`.
+    replacement_len: usize,
+}
+
+/// Encodes the state machine of the search for the nodes are pending for
+/// replacement.
+#[derive(Clone, Copy, Debug, PartialEq)]
+enum SearchState {
+    /// Neither an end nor a start have been found as of now.
+    /// The latest non-trivia child is continually saved.
+    NoneFound,
+    /// The search has concluded by finding a node that fully contains the
+    /// modifications.
+    Contained(NodePos),
+    /// The search has found the start of the modified nodes.
+    Inside(NodePos),
+    /// The search has found the end of the modified nodes but the change
+    /// touched its boundries so another non-trivia node is needed.
+    RequireNonTrivia(NodePos),
+    /// The search has concluded by finding a start and an end index for nodes
+    /// with a pending reparse.
+    SpanFound(NodePos, NodePos),
+}
+
+impl Default for SearchState {
+    fn default() -> Self {
+        Self::NoneFound
+    }
+}
+
+impl SearchState {
+    fn done(self) -> Option<(NodePos, NodePos)> {
+        match self {
+            Self::NoneFound => None,
+            Self::Contained(s) => Some((s, s)),
+            Self::Inside(_) => None,
+            Self::RequireNonTrivia(_) => None,
+            Self::SpanFound(s, e) => Some((s, e)),
+        }
+    }
+}
+
+/// The position of a syntax node.
+#[derive(Clone, Copy, Debug, PartialEq)]
+struct NodePos {
+    /// The index in the parent node.
+    idx: usize,
+    /// The byte offset in the string.
+    offset: usize,
+}
+
+/// An ahead node with an index and whether it is `at_start`.
+#[derive(Clone, Copy, Debug, PartialEq)]
+struct Ahead {
+    /// The position of the node.
+    pos: NodePos,
+    /// The `at_start` before this node.
+    at_start: bool,
+    /// The kind of ahead node.
+    kind: AheadKind,
+}
+
+/// The kind of ahead node.
+#[derive(Clone, Copy, Debug, PartialEq)]
+enum AheadKind {
+    /// A normal non-trivia child has been found.
+    Normal,
+    /// An unbounded child has been found. The boolean indicates whether it was
+    /// on the current line, in which case adding it to the reparsing range is
+    /// compulsory.
+    Unbounded(bool),
+}
+
+impl Ahead {
+    fn new(pos: NodePos, at_start: bool, bounded: bool) -> Self {
+        Self {
+            pos,
+            at_start,
+            kind: if bounded {
+                AheadKind::Normal
+            } else {
+                AheadKind::Unbounded(true)
+            },
+        }
+    }
+
+    fn newline(&mut self) {
+        if let AheadKind::Unbounded(current_line) = &mut self.kind {
+            *current_line = false;
+        }
+    }
+
+    fn is_compulsory(self) -> bool {
+        matches!(self.kind, AheadKind::Unbounded(true))
+    }
+}
+
+/// Which reparse function to choose for a span of elements.
+#[derive(Clone, Copy, Debug, PartialEq)]
+enum ReparseMode {
+    /// Reparse a code block, including its braces.
+    Code,
+    /// Reparse a content block, including its square brackets.
+    Content,
+    /// Reparse elements of the markup. Also specified the initial `at_start`
+    /// state for the reparse and the minimum indent of the reparsed nodes.
+    MarkupElements { at_start: bool, min_indent: usize },
+}
+
+/// Whether changes _inside_ this node are safely encapsulated, so that only
+/// this node must be reparsed.
+fn is_bounded(kind: &NodeKind) -> bool {
+    match kind {
+        NodeKind::CodeBlock
+        | NodeKind::ContentBlock
+        | NodeKind::Linebreak
+        | NodeKind::SmartQuote { .. }
+        | NodeKind::BlockComment
+        | NodeKind::Space { .. }
+        | NodeKind::Escape(_)
+        | NodeKind::Shorthand(_) => true,
+        _ => false,
+    }
+}
+
+/// Whether `at_start` would still be true after this node given the
+/// previous value of the property.
+fn next_at_start(kind: &NodeKind, prev: bool) -> bool {
+    match kind {
+        NodeKind::Space { newlines: (1 ..) } => true,
+        NodeKind::Space { .. } | NodeKind::LineComment | NodeKind::BlockComment => prev,
+        _ => false,
+    }
+}
+
+#[cfg(test)]
+#[rustfmt::skip]
+mod tests {
+    use super::*;
+    use super::super::{parse, Source};
+    use super::super::tests::check;
+
+    #[track_caller]
+    fn test(prev: &str, range: Range<usize>, with: &str, goal: Range<usize>) {
+        let mut source = Source::detached(prev);
+        let range = source.edit(range, with);
+        check(source.text(), source.root(), &parse(source.text()));
+        assert_eq!(range, goal);
+    }
+
+    #[test]
+    fn test_parse_incremental_simple_replacements() {
+        test("hello  world", 7 .. 12, "walkers", 0 .. 14);
+        test("some content", 0..12, "", 0..0);
+        test("", 0..0, "do it", 0..5);
+        test("a d e", 1 .. 3, " b c d", 0 .. 9);
+        test("*~ *", 2..2, "*", 0..5);
+        test("_1_\n2a\n3", 5..5, "4", 4..7);
+        test("_1_\n2a\n3~", 8..8, "4", 4..10);
+        test("_1_ 2 3a\n4", 7..7, "5", 0..9);
+        test("* {1+2} *", 5..6, "3", 2..7);
+        test("a #f() e", 1 .. 6, " b c d", 0 .. 9);
+        test("a\nb\nc\nd\ne\n", 5 .. 5, "c", 2 .. 7);
+        test("a\n\nb\n\nc\n\nd\n\ne\n", 7 .. 7, "c", 3 .. 10);
+        test("a\nb\nc *hel a b lo* d\nd\ne", 13..13, "c ", 4..20);
+        test("~~ {a} ~~", 4 .. 5, "b", 3 .. 6);
+        test("{(0, 1, 2)}", 5 .. 6, "11pt", 0..14);
+        test("\n= A heading", 4 .. 4, "n evocative", 0 .. 23);
+        test("for~your~thing", 9 .. 9, "a", 0 .. 15);
+        test("a your thing a", 6 .. 7, "a", 0 .. 14);
+        test("{call(); abc}", 7 .. 7, "[]", 0 .. 15);
+        test("#call() abc", 7 .. 7, "[]", 0 .. 10);
+        test("hi[\n- item\n- item 2\n    - item 3]", 11 .. 11, "  ", 2 .. 35);
+        test("hi\n- item\nno item\n    - item 3", 10 .. 10, "- ", 3..19);
+        test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 0..99);
+        test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33..42);
+        test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 33 .. 40);
+        test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 0 .. 33);
+        test("{let i=1; for x in range(5) {i}}", 13 .. 14, "  ", 0 .. 33);
+        test("hello~~{x}", 7 .. 10, "#f()", 0 .. 11);
+        test("this~is -- in my opinion -- spectacular", 8 .. 10, "---", 0 .. 25);
+        test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 22);
+        test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17);
+        test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 18);
+        test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18);
+        test("#for", 4 .. 4, "//", 0 .. 6);
+        test("#show a: f as b..", 16..16, "c", 0..18);
+        test("a\n#let \nb", 7 .. 7, "i", 2 .. 9);
+        test("a\n#for i \nb", 9 .. 9, "in", 2 .. 12);
+        test("a~https://fun/html", 13..14, "n", 0..18);
+    }
+
+    #[test]
+    fn test_parse_incremental_whitespace_invariants() {
+        test("hello \\ world", 7 .. 8, "a ", 0 .. 14);
+        test("hello \\ world", 7 .. 8, " a", 0 .. 14);
+        test("x = y", 1 .. 1, " + y", 0 .. 6);
+        test("x = y", 1 .. 1, " + y\n", 0 .. 7);
+        test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21);
+        test("abc\n= a heading\njoke", 3 .. 4, "\nnot ", 0 .. 19);
+        test("#let x = (1, 2 + ;~ Five\r\n\r", 20 .. 23, "2.", 0 .. 23);
+        test("hey #myfriend", 4 .. 4, "\\", 0 .. 14);
+        test("hey  #myfriend", 4 .. 4, "\\", 0 .. 6);
+        test("= foo\nbar\n - a\n - b", 6 .. 9, "", 0 .. 11);
+        test("= foo\n  bar\n  baz", 6 .. 8, "", 0 .. 9);
+        test(" // hi", 1 .. 1, " ", 0 .. 7);
+        test("- \nA", 2..3, "", 0..3);
+    }
+
+    #[test]
+    fn test_parse_incremental_type_invariants() {
+        test("a #for x in array {x}", 18 .. 21, "[#x]", 0 .. 22);
+        test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 11);
+        test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16);
+        test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 13);
+        test("{let x = 1 {5}}", 4 .. 4, " if", 0 .. 18);
+        test("a // b c #f()", 3 .. 4, "", 0 .. 12);
+        test("{\nf()\n//g(a)\n}", 6 .. 8, "", 0 .. 12);
+        test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13);
+        test("a #while x {\n g(x) \n}  b", 11 .. 11, "//", 0 .. 26);
+        test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14);
+        test("a b c", 1 .. 1, "{[}", 0 .. 8);
+    }
+
+    #[test]
+    fn test_parse_incremental_wrongly_or_unclosed_things() {
+        test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6);
+        test(r"this \u{abcd}", 8 .. 9, "", 0 .. 12);
+        test(r"this \u{abcd} that", 12 .. 13, "", 0 .. 17);
+        test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", 0 .. 24);
+        test("a b c", 1 .. 1, " /* letters */", 0 .. 19);
+        test("a b c", 1 .. 1, " /* letters", 0 .. 16);
+        test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 0 .. 41);
+        test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38);
+        test("~~~~", 2 .. 2, "[]", 0 .. 5);
+        test("a[]b", 2 .. 2, "{", 1 .. 4);
+        test("[hello]", 2 .. 3, "]", 0 .. 7);
+        test("{a}", 1 .. 2, "b", 0 .. 3);
+        test("{ a; b; c }", 5 .. 6, "[}]", 0 .. 13);
+        test("#a()\n~", 3..4, "{}", 0..7);
+        test("[]\n~", 1..2, "#if i==0 {true}", 0..18);
+    }
+}
diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs
index 8b172def..1a23db5f 100644
--- a/src/syntax/mod.rs
+++ b/src/syntax/mod.rs
@@ -1,558 +1,41 @@
-//! Syntax types.
+//! Syntax definition, parsing, and highlighting.
 
 pub mod ast;
 pub mod highlight;
+mod incremental;
 mod kind;
+mod node;
+mod parser;
+mod parsing;
+mod resolve;
+mod source;
 mod span;
-
-use std::fmt::{self, Debug, Formatter};
-use std::ops::Range;
-use std::sync::Arc;
+mod tokens;
 
 pub use kind::*;
+pub use node::*;
+pub use parsing::*;
+pub use source::*;
 pub use span::*;
+pub use tokens::*;
 
-use self::ast::TypedNode;
-use crate::diag::SourceError;
-use crate::source::SourceId;
-
-/// An inner or leaf node in the untyped syntax tree.
-#[derive(Clone, PartialEq, Hash)]
-pub enum SyntaxNode {
-    /// A reference-counted inner node.
-    Inner(Arc<InnerNode>),
-    /// A leaf token.
-    Leaf(NodeData),
-}
-
-impl SyntaxNode {
-    /// The metadata of the node.
-    pub fn data(&self) -> &NodeData {
-        match self {
-            Self::Inner(inner) => &inner.data,
-            Self::Leaf(leaf) => leaf,
-        }
-    }
-
-    /// The type of the node.
-    pub fn kind(&self) -> &NodeKind {
-        self.data().kind()
-    }
-
-    /// The length of the node.
-    pub fn len(&self) -> usize {
-        self.data().len()
-    }
-
-    /// The number of descendants, including the node itself.
-    pub fn descendants(&self) -> usize {
-        match self {
-            Self::Inner(inner) => inner.descendants(),
-            Self::Leaf(_) => 1,
-        }
-    }
-
-    /// The span of the node.
-    pub fn span(&self) -> Span {
-        self.data().span()
-    }
-
-    /// Whether the node or its children contain an error.
-    pub fn erroneous(&self) -> bool {
-        match self {
-            Self::Inner(node) => node.erroneous,
-            Self::Leaf(data) => data.kind.is_error(),
-        }
-    }
+use incremental::reparse;
+use parser::*;
 
-    /// The error messages for this node and its descendants.
-    pub fn errors(&self) -> Vec<SourceError> {
-        if !self.erroneous() {
-            return vec![];
-        }
+#[cfg(test)]
+mod tests {
+    use std::fmt::Debug;
 
-        match self.kind() {
-            NodeKind::Error(pos, message) => {
-                vec![SourceError::new(self.span(), message.clone()).with_pos(*pos)]
-            }
-            _ => self
-                .children()
-                .filter(|node| node.erroneous())
-                .flat_map(|node| node.errors())
-                .collect(),
-        }
-    }
-
-    /// The node's children.
-    pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> {
-        match self {
-            Self::Inner(inner) => inner.children(),
-            Self::Leaf(_) => [].iter(),
-        }
-    }
-
-    /// Convert the node to a typed AST node.
-    pub fn cast<T>(&self) -> Option<T>
+    #[track_caller]
+    pub fn check<T>(text: &str, found: T, expected: T)
     where
-        T: TypedNode,
+        T: Debug + PartialEq,
     {
-        T::from_untyped(self)
-    }
-
-    /// Get the first child that can cast to the AST type `T`.
-    pub fn cast_first_child<T: TypedNode>(&self) -> Option<T> {
-        self.children().find_map(Self::cast)
-    }
-
-    /// Get the last child that can cast to the AST type `T`.
-    pub fn cast_last_child<T: TypedNode>(&self) -> Option<T> {
-        self.children().rev().find_map(Self::cast)
-    }
-
-    /// Change the type of the node.
-    pub fn convert(&mut self, kind: NodeKind) {
-        match self {
-            Self::Inner(inner) => {
-                let node = Arc::make_mut(inner);
-                node.erroneous |= kind.is_error();
-                node.data.kind = kind;
-            }
-            Self::Leaf(leaf) => leaf.kind = kind,
-        }
-    }
-
-    /// Set a synthetic span for the node and all its descendants.
-    pub fn synthesize(&mut self, span: Span) {
-        match self {
-            Self::Inner(inner) => Arc::make_mut(inner).synthesize(span),
-            Self::Leaf(leaf) => leaf.synthesize(span),
-        }
-    }
-
-    /// Assign spans to each node.
-    pub fn numberize(&mut self, id: SourceId, within: Range<u64>) -> NumberingResult {
-        match self {
-            Self::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within),
-            Self::Leaf(leaf) => leaf.numberize(id, within),
-        }
-    }
-
-    /// The upper bound of assigned numbers in this subtree.
-    pub fn upper(&self) -> u64 {
-        match self {
-            Self::Inner(inner) => inner.upper(),
-            Self::Leaf(leaf) => leaf.span().number() + 1,
-        }
-    }
-
-    /// If the span points into this node, convert it to a byte range.
-    pub fn range(&self, span: Span, offset: usize) -> Option<Range<usize>> {
-        match self {
-            Self::Inner(inner) => inner.range(span, offset),
-            Self::Leaf(leaf) => leaf.range(span, offset),
-        }
-    }
-
-    /// Returns all leaf descendants of this node (may include itself).
-    ///
-    /// This method is slow and only intended for testing.
-    pub fn leafs(&self) -> Vec<Self> {
-        if match self {
-            Self::Inner(inner) => inner.children.is_empty(),
-            Self::Leaf(_) => true,
-        } {
-            vec![self.clone()]
-        } else {
-            self.children().flat_map(Self::leafs).collect()
-        }
-    }
-}
-
-impl Default for SyntaxNode {
-    fn default() -> Self {
-        Self::Leaf(NodeData::new(NodeKind::None, 0))
-    }
-}
-
-impl Debug for SyntaxNode {
-    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
-        match self {
-            Self::Inner(node) => node.fmt(f),
-            Self::Leaf(token) => token.fmt(f),
-        }
-    }
-}
-
-/// An inner node in the untyped syntax tree.
-#[derive(Clone, Hash)]
-pub struct InnerNode {
-    /// Node metadata.
-    data: NodeData,
-    /// The number of nodes in the whole subtree, including this node.
-    descendants: usize,
-    /// Whether this node or any of its children are erroneous.
-    erroneous: bool,
-    /// The upper bound of this node's numbering range.
-    upper: u64,
-    /// This node's children, losslessly make up this node.
-    children: Vec<SyntaxNode>,
-}
-
-impl InnerNode {
-    /// Creates a new node with the given kind and a single child.
-    pub fn with_child(kind: NodeKind, child: impl Into<SyntaxNode>) -> Self {
-        Self::with_children(kind, vec![child.into()])
-    }
-
-    /// Creates a new node with the given kind and children.
-    pub fn with_children(kind: NodeKind, children: Vec<SyntaxNode>) -> Self {
-        let mut len = 0;
-        let mut descendants = 1;
-        let mut erroneous = kind.is_error();
-
-        for child in &children {
-            len += child.len();
-            descendants += child.descendants();
-            erroneous |= child.erroneous();
-        }
-
-        Self {
-            data: NodeData::new(kind, len),
-            descendants,
-            erroneous,
-            upper: 0,
-            children,
-        }
-    }
-
-    /// The node's metadata.
-    pub fn data(&self) -> &NodeData {
-        &self.data
-    }
-
-    /// The node's type.
-    pub fn kind(&self) -> &NodeKind {
-        self.data().kind()
-    }
-
-    /// The node's length.
-    pub fn len(&self) -> usize {
-        self.data().len()
-    }
-
-    /// The node's span.
-    pub fn span(&self) -> Span {
-        self.data().span()
-    }
-
-    /// The number of descendants, including the node itself.
-    pub fn descendants(&self) -> usize {
-        self.descendants
-    }
-
-    /// The node's children.
-    pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> {
-        self.children.iter()
-    }
-
-    /// Set a synthetic span for the node and all its descendants.
-    pub fn synthesize(&mut self, span: Span) {
-        self.data.synthesize(span);
-        for child in &mut self.children {
-            child.synthesize(span);
-        }
-    }
-
-    /// Assign span numbers `within` an interval to this node's subtree or just
-    /// a `range` of its children.
-    pub fn numberize(
-        &mut self,
-        id: SourceId,
-        range: Option<Range<usize>>,
-        within: Range<u64>,
-    ) -> NumberingResult {
-        // Determine how many nodes we will number.
-        let descendants = match &range {
-            Some(range) if range.is_empty() => return Ok(()),
-            Some(range) => self.children[range.clone()]
-                .iter()
-                .map(SyntaxNode::descendants)
-                .sum::<usize>(),
-            None => self.descendants,
-        };
-
-        // Determine the distance between two neighbouring assigned numbers. If
-        // possible, we try to fit all numbers into the left half of `within`
-        // so that there is space for future insertions.
-        let space = within.end - within.start;
-        let mut stride = space / (2 * descendants as u64);
-        if stride == 0 {
-            stride = space / self.descendants as u64;
-            if stride == 0 {
-                return Err(Unnumberable);
-            }
-        }
-
-        // Number this node itself.
-        let mut start = within.start;
-        if range.is_none() {
-            let end = start + stride;
-            self.data.numberize(id, start .. end)?;
-            self.upper = within.end;
-            start = end;
-        }
-
-        // Number the children.
-        let len = self.children.len();
-        for child in &mut self.children[range.unwrap_or(0 .. len)] {
-            let end = start + child.descendants() as u64 * stride;
-            child.numberize(id, start .. end)?;
-            start = end;
-        }
-
-        Ok(())
-    }
-
-    /// The upper bound of assigned numbers in this subtree.
-    pub fn upper(&self) -> u64 {
-        self.upper
-    }
-
-    /// If the span points into this node, convert it to a byte range.
-    pub fn range(&self, span: Span, mut offset: usize) -> Option<Range<usize>> {
-        // Check whether we found it.
-        if let Some(range) = self.data.range(span, offset) {
-            return Some(range);
-        }
-
-        // The parent of a subtree has a smaller span number than all of its
-        // descendants. Therefore, we can bail out early if the target span's
-        // number is smaller than our number.
-        if span.number() < self.span().number() {
-            return None;
-        }
-
-        let mut children = self.children.iter().peekable();
-        while let Some(child) = children.next() {
-            // Every node in this child's subtree has a smaller span number than
-            // the next sibling. Therefore we only need to recurse if the next
-            // sibling's span number is larger than the target span's number.
-            if children
-                .peek()
-                .map_or(true, |next| next.span().number() > span.number())
-            {
-                if let Some(range) = child.range(span, offset) {
-                    return Some(range);
-                }
-            }
-
-            offset += child.len();
-        }
-
-        None
-    }
-
-    /// The node's children, mutably.
-    pub(crate) fn children_mut(&mut self) -> &mut [SyntaxNode] {
-        &mut self.children
-    }
-
-    /// Replaces a range of children with a replacement.
-    ///
-    /// May have mutated the children if it returns `Err(_)`.
-    pub(crate) fn replace_children(
-        &mut self,
-        mut range: Range<usize>,
-        replacement: Vec<SyntaxNode>,
-    ) -> NumberingResult {
-        let superseded = &self.children[range.clone()];
-
-        // Compute the new byte length.
-        self.data.len = self.data.len
-            + replacement.iter().map(SyntaxNode::len).sum::<usize>()
-            - superseded.iter().map(SyntaxNode::len).sum::<usize>();
-
-        // Compute the new number of descendants.
-        self.descendants = self.descendants
-            + replacement.iter().map(SyntaxNode::descendants).sum::<usize>()
-            - superseded.iter().map(SyntaxNode::descendants).sum::<usize>();
-
-        // Determine whether we're still erroneous after the replacement. That's
-        // the case if
-        // - any of the new nodes is erroneous,
-        // - or if we were erroneous before due to a non-superseded node.
-        self.erroneous = replacement.iter().any(SyntaxNode::erroneous)
-            || (self.erroneous
-                && (self.children[.. range.start].iter().any(SyntaxNode::erroneous))
-                || self.children[range.end ..].iter().any(SyntaxNode::erroneous));
-
-        // Perform the replacement.
-        let replacement_count = replacement.len();
-        self.children.splice(range.clone(), replacement);
-        range.end = range.start + replacement_count;
-
-        // Renumber the new children. Retries until it works, taking
-        // exponentially more children into account.
-        let mut left = 0;
-        let mut right = 0;
-        let max_left = range.start;
-        let max_right = self.children.len() - range.end;
-        loop {
-            let renumber = range.start - left .. range.end + right;
-
-            // The minimum assignable number is either
-            // - the upper bound of the node right before the to-be-renumbered
-            //   children,
-            // - or this inner node's span number plus one if renumbering starts
-            //   at the first child.
-            let start_number = renumber
-                .start
-                .checked_sub(1)
-                .and_then(|i| self.children.get(i))
-                .map_or(self.span().number() + 1, |child| child.upper());
-
-            // The upper bound for renumbering is either
-            // - the span number of the first child after the to-be-renumbered
-            //   children,
-            // - or this node's upper bound if renumbering ends behind the last
-            //   child.
-            let end_number = self
-                .children
-                .get(renumber.end)
-                .map_or(self.upper(), |next| next.span().number());
-
-            // Try to renumber.
-            let within = start_number .. end_number;
-            let id = self.span().source();
-            if self.numberize(id, Some(renumber), within).is_ok() {
-                return Ok(());
-            }
-
-            // If it didn't even work with all children, we give up.
-            if left == max_left && right == max_right {
-                return Err(Unnumberable);
-            }
-
-            // Exponential expansion to both sides.
-            left = (left + 1).next_power_of_two().min(max_left);
-            right = (right + 1).next_power_of_two().min(max_right);
-        }
-    }
-
-    /// Update this node after changes were made to one of its children.
-    pub(crate) fn update_parent(
-        &mut self,
-        prev_len: usize,
-        new_len: usize,
-        prev_descendants: usize,
-        new_descendants: usize,
-    ) {
-        self.data.len = self.data.len + new_len - prev_len;
-        self.descendants = self.descendants + new_descendants - prev_descendants;
-        self.erroneous = self.children.iter().any(SyntaxNode::erroneous);
-    }
-}
-
-impl From<InnerNode> for SyntaxNode {
-    fn from(node: InnerNode) -> Self {
-        Arc::new(node).into()
-    }
-}
-
-impl From<Arc<InnerNode>> for SyntaxNode {
-    fn from(node: Arc<InnerNode>) -> Self {
-        Self::Inner(node)
-    }
-}
-
-impl Debug for InnerNode {
-    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
-        self.data.fmt(f)?;
-        if !self.children.is_empty() {
-            f.write_str(" ")?;
-            f.debug_list().entries(&self.children).finish()?;
+        if found != expected {
+            println!("source:   {text:?}");
+            println!("expected: {expected:#?}");
+            println!("found:    {found:#?}");
+            panic!("test failed");
         }
-        Ok(())
-    }
-}
-
-impl PartialEq for InnerNode {
-    fn eq(&self, other: &Self) -> bool {
-        self.data == other.data
-            && self.descendants == other.descendants
-            && self.erroneous == other.erroneous
-            && self.children == other.children
-    }
-}
-
-/// Data shared between inner and leaf nodes.
-#[derive(Clone, Hash)]
-pub struct NodeData {
-    /// What kind of node this is (each kind would have its own struct in a
-    /// strongly typed AST).
-    kind: NodeKind,
-    /// The byte length of the node in the source.
-    len: usize,
-    /// The node's span.
-    span: Span,
-}
-
-impl NodeData {
-    /// Create new node metadata.
-    pub fn new(kind: NodeKind, len: usize) -> Self {
-        Self { len, kind, span: Span::detached() }
-    }
-
-    /// The node's type.
-    pub fn kind(&self) -> &NodeKind {
-        &self.kind
-    }
-
-    /// The node's length.
-    pub fn len(&self) -> usize {
-        self.len
-    }
-
-    /// The node's span.
-    pub fn span(&self) -> Span {
-        self.span
-    }
-
-    /// Set a synthetic span for the node.
-    pub fn synthesize(&mut self, span: Span) {
-        self.span = span;
-    }
-
-    /// Assign a span to the node.
-    pub fn numberize(&mut self, id: SourceId, within: Range<u64>) -> NumberingResult {
-        if within.start < within.end {
-            self.span = Span::new(id, (within.start + within.end) / 2);
-            Ok(())
-        } else {
-            Err(Unnumberable)
-        }
-    }
-
-    /// If the span points into this node, convert it to a byte range.
-    pub fn range(&self, span: Span, offset: usize) -> Option<Range<usize>> {
-        (self.span == span).then(|| offset .. offset + self.len())
-    }
-}
-
-impl From<NodeData> for SyntaxNode {
-    fn from(token: NodeData) -> Self {
-        Self::Leaf(token)
-    }
-}
-
-impl Debug for NodeData {
-    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
-        write!(f, "{:?}: {}", self.kind, self.len)
-    }
-}
-
-impl PartialEq for NodeData {
-    fn eq(&self, other: &Self) -> bool {
-        self.kind == other.kind && self.len == other.len
     }
 }
diff --git a/src/syntax/node.rs b/src/syntax/node.rs
new file mode 100644
index 00000000..6a7d424a
--- /dev/null
+++ b/src/syntax/node.rs
@@ -0,0 +1,548 @@
+use std::fmt::{self, Debug, Formatter};
+use std::ops::Range;
+use std::sync::Arc;
+
+use super::ast::TypedNode;
+use super::{NodeKind, NumberingResult, SourceId, Span, Unnumberable};
+use crate::diag::SourceError;
+
+/// An inner or leaf node in the untyped syntax tree.
+#[derive(Clone, PartialEq, Hash)]
+pub enum SyntaxNode {
+    /// A reference-counted inner node.
+    Inner(Arc<InnerNode>),
+    /// A leaf token.
+    Leaf(NodeData),
+}
+
+impl SyntaxNode {
+    /// The metadata of the node.
+    pub fn data(&self) -> &NodeData {
+        match self {
+            Self::Inner(inner) => &inner.data,
+            Self::Leaf(leaf) => leaf,
+        }
+    }
+
+    /// The type of the node.
+    pub fn kind(&self) -> &NodeKind {
+        self.data().kind()
+    }
+
+    /// The length of the node.
+    pub fn len(&self) -> usize {
+        self.data().len()
+    }
+
+    /// The number of descendants, including the node itself.
+    pub fn descendants(&self) -> usize {
+        match self {
+            Self::Inner(inner) => inner.descendants(),
+            Self::Leaf(_) => 1,
+        }
+    }
+
+    /// The span of the node.
+    pub fn span(&self) -> Span {
+        self.data().span()
+    }
+
+    /// Whether the node or its children contain an error.
+    pub fn erroneous(&self) -> bool {
+        match self {
+            Self::Inner(node) => node.erroneous,
+            Self::Leaf(data) => data.kind.is_error(),
+        }
+    }
+
+    /// The error messages for this node and its descendants.
+    pub fn errors(&self) -> Vec<SourceError> {
+        if !self.erroneous() {
+            return vec![];
+        }
+
+        match self.kind() {
+            NodeKind::Error(pos, message) => {
+                vec![SourceError::new(self.span(), message.clone()).with_pos(*pos)]
+            }
+            _ => self
+                .children()
+                .filter(|node| node.erroneous())
+                .flat_map(|node| node.errors())
+                .collect(),
+        }
+    }
+
+    /// The node's children.
+    pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> {
+        match self {
+            Self::Inner(inner) => inner.children(),
+            Self::Leaf(_) => [].iter(),
+        }
+    }
+
+    /// Convert the node to a typed AST node.
+    pub fn cast<T>(&self) -> Option<T>
+    where
+        T: TypedNode,
+    {
+        T::from_untyped(self)
+    }
+
+    /// Get the first child that can cast to the AST type `T`.
+    pub fn cast_first_child<T: TypedNode>(&self) -> Option<T> {
+        self.children().find_map(Self::cast)
+    }
+
+    /// Get the last child that can cast to the AST type `T`.
+    pub fn cast_last_child<T: TypedNode>(&self) -> Option<T> {
+        self.children().rev().find_map(Self::cast)
+    }
+
+    /// Change the type of the node.
+    pub fn convert(&mut self, kind: NodeKind) {
+        match self {
+            Self::Inner(inner) => {
+                let node = Arc::make_mut(inner);
+                node.erroneous |= kind.is_error();
+                node.data.kind = kind;
+            }
+            Self::Leaf(leaf) => leaf.kind = kind,
+        }
+    }
+
+    /// Set a synthetic span for the node and all its descendants.
+    pub fn synthesize(&mut self, span: Span) {
+        match self {
+            Self::Inner(inner) => Arc::make_mut(inner).synthesize(span),
+            Self::Leaf(leaf) => leaf.synthesize(span),
+        }
+    }
+
+    /// Assign spans to each node.
+    pub fn numberize(&mut self, id: SourceId, within: Range<u64>) -> NumberingResult {
+        match self {
+            Self::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within),
+            Self::Leaf(leaf) => leaf.numberize(id, within),
+        }
+    }
+
+    /// The upper bound of assigned numbers in this subtree.
+    pub fn upper(&self) -> u64 {
+        match self {
+            Self::Inner(inner) => inner.upper(),
+            Self::Leaf(leaf) => leaf.span().number() + 1,
+        }
+    }
+
+    /// If the span points into this node, convert it to a byte range.
+    pub fn range(&self, span: Span, offset: usize) -> Option<Range<usize>> {
+        match self {
+            Self::Inner(inner) => inner.range(span, offset),
+            Self::Leaf(leaf) => leaf.range(span, offset),
+        }
+    }
+
+    /// Returns all leaf descendants of this node (may include itself).
+    ///
+    /// This method is slow and only intended for testing.
+    pub fn leafs(&self) -> Vec<Self> {
+        if match self {
+            Self::Inner(inner) => inner.children.is_empty(),
+            Self::Leaf(_) => true,
+        } {
+            vec![self.clone()]
+        } else {
+            self.children().flat_map(Self::leafs).collect()
+        }
+    }
+}
+
+impl Default for SyntaxNode {
+    fn default() -> Self {
+        Self::Leaf(NodeData::new(NodeKind::None, 0))
+    }
+}
+
+impl Debug for SyntaxNode {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        match self {
+            Self::Inner(node) => node.fmt(f),
+            Self::Leaf(token) => token.fmt(f),
+        }
+    }
+}
+
+/// An inner node in the untyped syntax tree.
+#[derive(Clone, Hash)]
+pub struct InnerNode {
+    /// Node metadata.
+    data: NodeData,
+    /// The number of nodes in the whole subtree, including this node.
+    descendants: usize,
+    /// Whether this node or any of its children are erroneous.
+    erroneous: bool,
+    /// The upper bound of this node's numbering range.
+    upper: u64,
+    /// This node's children, losslessly make up this node.
+    children: Vec<SyntaxNode>,
+}
+
+impl InnerNode {
+    /// Creates a new node with the given kind and a single child.
+    pub fn with_child(kind: NodeKind, child: impl Into<SyntaxNode>) -> Self {
+        Self::with_children(kind, vec![child.into()])
+    }
+
+    /// Creates a new node with the given kind and children.
+    pub fn with_children(kind: NodeKind, children: Vec<SyntaxNode>) -> Self {
+        let mut len = 0;
+        let mut descendants = 1;
+        let mut erroneous = kind.is_error();
+
+        for child in &children {
+            len += child.len();
+            descendants += child.descendants();
+            erroneous |= child.erroneous();
+        }
+
+        Self {
+            data: NodeData::new(kind, len),
+            descendants,
+            erroneous,
+            upper: 0,
+            children,
+        }
+    }
+
+    /// The node's metadata.
+    pub fn data(&self) -> &NodeData {
+        &self.data
+    }
+
+    /// The node's type.
+    pub fn kind(&self) -> &NodeKind {
+        self.data().kind()
+    }
+
+    /// The node's length.
+    pub fn len(&self) -> usize {
+        self.data().len()
+    }
+
+    /// The node's span.
+    pub fn span(&self) -> Span {
+        self.data().span()
+    }
+
+    /// The number of descendants, including the node itself.
+    pub fn descendants(&self) -> usize {
+        self.descendants
+    }
+
+    /// The node's children.
+    pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> {
+        self.children.iter()
+    }
+
+    /// Set a synthetic span for the node and all its descendants.
+    pub fn synthesize(&mut self, span: Span) {
+        self.data.synthesize(span);
+        for child in &mut self.children {
+            child.synthesize(span);
+        }
+    }
+
+    /// Assign span numbers `within` an interval to this node's subtree or just
+    /// a `range` of its children.
+    pub fn numberize(
+        &mut self,
+        id: SourceId,
+        range: Option<Range<usize>>,
+        within: Range<u64>,
+    ) -> NumberingResult {
+        // Determine how many nodes we will number.
+        let descendants = match &range {
+            Some(range) if range.is_empty() => return Ok(()),
+            Some(range) => self.children[range.clone()]
+                .iter()
+                .map(SyntaxNode::descendants)
+                .sum::<usize>(),
+            None => self.descendants,
+        };
+
+        // Determine the distance between two neighbouring assigned numbers. If
+        // possible, we try to fit all numbers into the left half of `within`
+        // so that there is space for future insertions.
+        let space = within.end - within.start;
+        let mut stride = space / (2 * descendants as u64);
+        if stride == 0 {
+            stride = space / self.descendants as u64;
+            if stride == 0 {
+                return Err(Unnumberable);
+            }
+        }
+
+        // Number this node itself.
+        let mut start = within.start;
+        if range.is_none() {
+            let end = start + stride;
+            self.data.numberize(id, start .. end)?;
+            self.upper = within.end;
+            start = end;
+        }
+
+        // Number the children.
+        let len = self.children.len();
+        for child in &mut self.children[range.unwrap_or(0 .. len)] {
+            let end = start + child.descendants() as u64 * stride;
+            child.numberize(id, start .. end)?;
+            start = end;
+        }
+
+        Ok(())
+    }
+
+    /// The upper bound of assigned numbers in this subtree.
+    pub fn upper(&self) -> u64 {
+        self.upper
+    }
+
+    /// If the span points into this node, convert it to a byte range.
+    pub fn range(&self, span: Span, mut offset: usize) -> Option<Range<usize>> {
+        // Check whether we found it.
+        if let Some(range) = self.data.range(span, offset) {
+            return Some(range);
+        }
+
+        // The parent of a subtree has a smaller span number than all of its
+        // descendants. Therefore, we can bail out early if the target span's
+        // number is smaller than our number.
+        if span.number() < self.span().number() {
+            return None;
+        }
+
+        let mut children = self.children.iter().peekable();
+        while let Some(child) = children.next() {
+            // Every node in this child's subtree has a smaller span number than
+            // the next sibling. Therefore we only need to recurse if the next
+            // sibling's span number is larger than the target span's number.
+            if children
+                .peek()
+                .map_or(true, |next| next.span().number() > span.number())
+            {
+                if let Some(range) = child.range(span, offset) {
+                    return Some(range);
+                }
+            }
+
+            offset += child.len();
+        }
+
+        None
+    }
+
+    /// The node's children, mutably.
+    pub(crate) fn children_mut(&mut self) -> &mut [SyntaxNode] {
+        &mut self.children
+    }
+
+    /// Replaces a range of children with a replacement.
+    ///
+    /// May have mutated the children if it returns `Err(_)`.
+    pub(crate) fn replace_children(
+        &mut self,
+        mut range: Range<usize>,
+        replacement: Vec<SyntaxNode>,
+    ) -> NumberingResult {
+        let superseded = &self.children[range.clone()];
+
+        // Compute the new byte length.
+        self.data.len = self.data.len
+            + replacement.iter().map(SyntaxNode::len).sum::<usize>()
+            - superseded.iter().map(SyntaxNode::len).sum::<usize>();
+
+        // Compute the new number of descendants.
+        self.descendants = self.descendants
+            + replacement.iter().map(SyntaxNode::descendants).sum::<usize>()
+            - superseded.iter().map(SyntaxNode::descendants).sum::<usize>();
+
+        // Determine whether we're still erroneous after the replacement. That's
+        // the case if
+        // - any of the new nodes is erroneous,
+        // - or if we were erroneous before due to a non-superseded node.
+        self.erroneous = replacement.iter().any(SyntaxNode::erroneous)
+            || (self.erroneous
+                && (self.children[.. range.start].iter().any(SyntaxNode::erroneous))
+                || self.children[range.end ..].iter().any(SyntaxNode::erroneous));
+
+        // Perform the replacement.
+        let replacement_count = replacement.len();
+        self.children.splice(range.clone(), replacement);
+        range.end = range.start + replacement_count;
+
+        // Renumber the new children. Retries until it works, taking
+        // exponentially more children into account.
+        let mut left = 0;
+        let mut right = 0;
+        let max_left = range.start;
+        let max_right = self.children.len() - range.end;
+        loop {
+            let renumber = range.start - left .. range.end + right;
+
+            // The minimum assignable number is either
+            // - the upper bound of the node right before the to-be-renumbered
+            //   children,
+            // - or this inner node's span number plus one if renumbering starts
+            //   at the first child.
+            let start_number = renumber
+                .start
+                .checked_sub(1)
+                .and_then(|i| self.children.get(i))
+                .map_or(self.span().number() + 1, |child| child.upper());
+
+            // The upper bound for renumbering is either
+            // - the span number of the first child after the to-be-renumbered
+            //   children,
+            // - or this node's upper bound if renumbering ends behind the last
+            //   child.
+            let end_number = self
+                .children
+                .get(renumber.end)
+                .map_or(self.upper(), |next| next.span().number());
+
+            // Try to renumber.
+            let within = start_number .. end_number;
+            let id = self.span().source();
+            if self.numberize(id, Some(renumber), within).is_ok() {
+                return Ok(());
+            }
+
+            // If it didn't even work with all children, we give up.
+            if left == max_left && right == max_right {
+                return Err(Unnumberable);
+            }
+
+            // Exponential expansion to both sides.
+            left = (left + 1).next_power_of_two().min(max_left);
+            right = (right + 1).next_power_of_two().min(max_right);
+        }
+    }
+
+    /// Update this node after changes were made to one of its children.
+    pub(crate) fn update_parent(
+        &mut self,
+        prev_len: usize,
+        new_len: usize,
+        prev_descendants: usize,
+        new_descendants: usize,
+    ) {
+        self.data.len = self.data.len + new_len - prev_len;
+        self.descendants = self.descendants + new_descendants - prev_descendants;
+        self.erroneous = self.children.iter().any(SyntaxNode::erroneous);
+    }
+}
+
+impl From<InnerNode> for SyntaxNode {
+    fn from(node: InnerNode) -> Self {
+        Arc::new(node).into()
+    }
+}
+
+impl From<Arc<InnerNode>> for SyntaxNode {
+    fn from(node: Arc<InnerNode>) -> Self {
+        Self::Inner(node)
+    }
+}
+
+impl Debug for InnerNode {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        self.data.fmt(f)?;
+        if !self.children.is_empty() {
+            f.write_str(" ")?;
+            f.debug_list().entries(&self.children).finish()?;
+        }
+        Ok(())
+    }
+}
+
+impl PartialEq for InnerNode {
+    fn eq(&self, other: &Self) -> bool {
+        self.data == other.data
+            && self.descendants == other.descendants
+            && self.erroneous == other.erroneous
+            && self.children == other.children
+    }
+}
+
+/// Data shared between inner and leaf nodes.
+#[derive(Clone, Hash)]
+pub struct NodeData {
+    /// What kind of node this is (each kind would have its own struct in a
+    /// strongly typed AST).
+    pub(super) kind: NodeKind,
+    /// The byte length of the node in the source.
+    len: usize,
+    /// The node's span.
+    span: Span,
+}
+
+impl NodeData {
+    /// Create new node metadata.
+    pub fn new(kind: NodeKind, len: usize) -> Self {
+        Self { len, kind, span: Span::detached() }
+    }
+
+    /// The node's type.
+    pub fn kind(&self) -> &NodeKind {
+        &self.kind
+    }
+
+    /// The node's length.
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    /// The node's span.
+    pub fn span(&self) -> Span {
+        self.span
+    }
+
+    /// Set a synthetic span for the node.
+    pub fn synthesize(&mut self, span: Span) {
+        self.span = span;
+    }
+
+    /// Assign a span to the node.
+    pub fn numberize(&mut self, id: SourceId, within: Range<u64>) -> NumberingResult {
+        if within.start < within.end {
+            self.span = Span::new(id, (within.start + within.end) / 2);
+            Ok(())
+        } else {
+            Err(Unnumberable)
+        }
+    }
+
+    /// If the span points into this node, convert it to a byte range.
+    pub fn range(&self, span: Span, offset: usize) -> Option<Range<usize>> {
+        (self.span == span).then(|| offset .. offset + self.len())
+    }
+}
+
+impl From<NodeData> for SyntaxNode {
+    fn from(token: NodeData) -> Self {
+        Self::Leaf(token)
+    }
+}
+
+impl Debug for NodeData {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        write!(f, "{:?}: {}", self.kind, self.len)
+    }
+}
+
+impl PartialEq for NodeData {
+    fn eq(&self, other: &Self) -> bool {
+        self.kind == other.kind && self.len == other.len
+    }
+}
diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs
new file mode 100644
index 00000000..83b333f4
--- /dev/null
+++ b/src/syntax/parser.rs
@@ -0,0 +1,558 @@
+use std::fmt::{self, Display, Formatter};
+use std::mem;
+use std::ops::Range;
+
+use super::{ErrorPos, InnerNode, NodeData, NodeKind, SyntaxNode, TokenMode, Tokens};
+use crate::util::EcoString;
+
+/// A convenient token-based parser.
+pub struct Parser<'s> {
+    /// An iterator over the source tokens.
+    tokens: Tokens<'s>,
+    /// Whether we are at the end of the file or of a group.
+    eof: bool,
+    /// The current token.
+    current: Option<NodeKind>,
+    /// The end byte index of the last non-trivia token.
+    prev_end: usize,
+    /// The start byte index of the peeked token.
+    current_start: usize,
+    /// The stack of open groups.
+    groups: Vec<GroupEntry>,
+    /// The children of the currently built node.
+    children: Vec<SyntaxNode>,
+    /// Whether the last group was not correctly terminated.
+    unterminated_group: bool,
+    /// Whether a group terminator was found that did not close a group.
+    stray_terminator: bool,
+}
+
+impl<'s> Parser<'s> {
+    /// Create a new parser for the source string.
+    pub fn new(text: &'s str, mode: TokenMode) -> Self {
+        Self::with_prefix("", text, mode)
+    }
+
+    /// Create a new parser for the source string that is prefixed by some text
+    /// that does not need to be parsed but taken into account for column
+    /// calculation.
+    pub fn with_prefix(prefix: &str, text: &'s str, mode: TokenMode) -> Self {
+        let mut tokens = Tokens::with_prefix(prefix, text, mode);
+        let current = tokens.next();
+        Self {
+            tokens,
+            eof: current.is_none(),
+            current,
+            prev_end: 0,
+            current_start: 0,
+            groups: vec![],
+            children: vec![],
+            unterminated_group: false,
+            stray_terminator: false,
+        }
+    }
+
+    /// End the parsing process and return the parsed children.
+    pub fn finish(self) -> Vec<SyntaxNode> {
+        self.children
+    }
+
+    /// End the parsing process and return
+    /// - the parsed children and whether the last token was terminated, if all
+    ///   groups were terminated correctly, or
+    /// - `None` otherwise.
+    pub fn consume(self) -> Option<(Vec<SyntaxNode>, bool)> {
+        self.terminated().then(|| (self.children, self.tokens.terminated()))
+    }
+
+    /// Create a new marker.
+    pub fn marker(&mut self) -> Marker {
+        Marker(self.children.len())
+    }
+
+    /// Create a marker right before the trailing trivia.
+    pub fn trivia_start(&self) -> Marker {
+        let count = self
+            .children
+            .iter()
+            .rev()
+            .take_while(|node| self.is_trivia(node.kind()))
+            .count();
+        Marker(self.children.len() - count)
+    }
+
+    /// Perform a subparse that wraps its result in a node with the given kind.
+    pub fn perform<F, T>(&mut self, kind: NodeKind, f: F) -> T
+    where
+        F: FnOnce(&mut Self) -> T,
+    {
+        let prev = mem::take(&mut self.children);
+        let output = f(self);
+        let until = self.trivia_start();
+        let mut children = mem::replace(&mut self.children, prev);
+
+        if self.tokens.mode() == TokenMode::Markup {
+            self.children.push(InnerNode::with_children(kind, children).into());
+        } else {
+            // Trailing trivia should not be wrapped into the new node.
+            let idx = self.children.len();
+            self.children.push(SyntaxNode::default());
+            self.children.extend(children.drain(until.0 ..));
+            self.children[idx] = InnerNode::with_children(kind, children).into();
+        }
+
+        output
+    }
+
+    /// Whether the end of the source string or group is reached.
+    pub fn eof(&self) -> bool {
+        self.eof
+    }
+
+    /// Consume the current token and also trailing trivia.
+    pub fn eat(&mut self) {
+        self.stray_terminator |= match self.current {
+            Some(NodeKind::RightParen) => !self.inside(Group::Paren),
+            Some(NodeKind::RightBracket) => !self.inside(Group::Bracket),
+            Some(NodeKind::RightBrace) => !self.inside(Group::Brace),
+            _ => false,
+        };
+
+        self.prev_end = self.tokens.cursor();
+        self.bump();
+
+        if self.tokens.mode() != TokenMode::Markup {
+            // Skip whitespace and comments.
+            while self.current.as_ref().map_or(false, |x| self.is_trivia(x)) {
+                self.bump();
+            }
+        }
+
+        self.repeek();
+    }
+
+    /// Consume the current token if it is the given one.
+    pub fn eat_if(&mut self, kind: NodeKind) -> bool {
+        let at = self.at(kind);
+        if at {
+            self.eat();
+        }
+        at
+    }
+
+    /// Eat tokens while the condition is true.
+    pub fn eat_while<F>(&mut self, mut f: F)
+    where
+        F: FnMut(&NodeKind) -> bool,
+    {
+        while self.peek().map_or(false, |t| f(t)) {
+            self.eat();
+        }
+    }
+
+    /// Consume the current token if it is the given one and produce an error if
+    /// not.
+    pub fn expect(&mut self, kind: NodeKind) -> ParseResult {
+        let at = self.peek() == Some(&kind);
+        if at {
+            self.eat();
+            Ok(())
+        } else {
+            self.expected(kind.name());
+            Err(ParseError)
+        }
+    }
+
+    /// Consume the current token, debug-asserting that it is the given one.
+    #[track_caller]
+    pub fn assert(&mut self, kind: NodeKind) {
+        debug_assert_eq!(self.peek(), Some(&kind));
+        self.eat();
+    }
+
+    /// Whether the current token is of the given type.
+    pub fn at(&self, kind: NodeKind) -> bool {
+        self.peek() == Some(&kind)
+    }
+
+    /// Peek at the current token without consuming it.
+    pub fn peek(&self) -> Option<&NodeKind> {
+        if self.eof { None } else { self.current.as_ref() }
+    }
+
+    /// Peek at the current token, but only if it follows immediately after the
+    /// last one without any trivia in between.
+    pub fn peek_direct(&self) -> Option<&NodeKind> {
+        if self.prev_end() == self.current_start() {
+            self.peek()
+        } else {
+            None
+        }
+    }
+
+    /// Peek at the source of the current token.
+    pub fn peek_src(&self) -> &'s str {
+        self.get(self.current_start() .. self.current_end())
+    }
+
+    /// Obtain a range of the source code.
+    pub fn get(&self, range: Range<usize>) -> &'s str {
+        self.tokens.scanner().get(range)
+    }
+
+    /// The byte index at which the last non-trivia token ended.
+    pub fn prev_end(&self) -> usize {
+        self.prev_end
+    }
+
+    /// The byte index at which the current token starts.
+    pub fn current_start(&self) -> usize {
+        self.current_start
+    }
+
+    /// The byte index at which the current token ends.
+    pub fn current_end(&self) -> usize {
+        self.tokens.cursor()
+    }
+
+    /// Determine the column index for the given byte index.
+    pub fn column(&self, index: usize) -> usize {
+        self.tokens.column(index)
+    }
+
+    /// Continue parsing in a group.
+    ///
+    /// When the end delimiter of the group is reached, all subsequent calls to
+    /// `peek()` return `None`. Parsing can only continue with a matching call
+    /// to `end_group`.
+    ///
+    /// This panics if the current token does not start the given group.
+    #[track_caller]
+    pub fn start_group(&mut self, kind: Group) {
+        self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() });
+        self.tokens.set_mode(match kind {
+            Group::Strong | Group::Emph => TokenMode::Markup,
+            Group::Bracket => match self.tokens.mode() {
+                TokenMode::Math => TokenMode::Math,
+                _ => TokenMode::Markup,
+            },
+            Group::Brace | Group::Paren => match self.tokens.mode() {
+                TokenMode::Math => TokenMode::Math,
+                _ => TokenMode::Code,
+            },
+            Group::Math => TokenMode::Math,
+            Group::Expr | Group::Imports => TokenMode::Code,
+        });
+
+        match kind {
+            Group::Brace => self.assert(NodeKind::LeftBrace),
+            Group::Bracket => self.assert(NodeKind::LeftBracket),
+            Group::Paren => self.assert(NodeKind::LeftParen),
+            Group::Strong => self.assert(NodeKind::Star),
+            Group::Emph => self.assert(NodeKind::Underscore),
+            Group::Math => self.assert(NodeKind::Dollar),
+            Group::Expr => self.repeek(),
+            Group::Imports => self.repeek(),
+        }
+    }
+
+    /// End the parsing of a group.
+    ///
+    /// This panics if no group was started.
+    #[track_caller]
+    pub fn end_group(&mut self) {
+        let group_mode = self.tokens.mode();
+        let group = self.groups.pop().expect("no started group");
+        self.tokens.set_mode(group.prev_mode);
+
+        let mut rescan = self.tokens.mode() != group_mode;
+
+        // Eat the end delimiter if there is one.
+        if let Some((end, required)) = match group.kind {
+            Group::Brace => Some((NodeKind::RightBrace, true)),
+            Group::Bracket => Some((NodeKind::RightBracket, true)),
+            Group::Paren => Some((NodeKind::RightParen, true)),
+            Group::Strong => Some((NodeKind::Star, true)),
+            Group::Emph => Some((NodeKind::Underscore, true)),
+            Group::Math => Some((NodeKind::Dollar, true)),
+            Group::Expr => Some((NodeKind::Semicolon, false)),
+            Group::Imports => None,
+        } {
+            if self.current.as_ref() == Some(&end) {
+                // If another group closes after a group with the missing
+                // terminator, its scope of influence ends here and no longer
+                // taints the rest of the reparse.
+                self.unterminated_group = false;
+
+                // Bump the delimeter and return. No need to rescan in this
+                // case. Also, we know that the delimiter is not stray even
+                // though we already removed the group.
+                let s = self.stray_terminator;
+                self.eat();
+                self.stray_terminator = s;
+                rescan = false;
+            } else if required {
+                self.expected(end.name());
+                self.unterminated_group = true;
+            }
+        }
+
+        // Rescan the peeked token if the mode changed.
+        if rescan {
+            let mut target = self.prev_end();
+            if group_mode != TokenMode::Markup {
+                let start = self.trivia_start().0;
+                target = self.current_start
+                    - self.children[start ..].iter().map(SyntaxNode::len).sum::<usize>();
+                self.children.truncate(start);
+            }
+
+            self.tokens.jump(target);
+            self.prev_end = self.tokens.cursor();
+            self.current_start = self.tokens.cursor();
+            self.current = self.tokens.next();
+        }
+
+        self.repeek();
+    }
+
+    /// Checks if all groups were correctly terminated.
+    fn terminated(&self) -> bool {
+        self.groups.is_empty() && !self.unterminated_group && !self.stray_terminator
+    }
+
+    /// Low-level bump that consumes exactly one token without special trivia
+    /// handling.
+    fn bump(&mut self) {
+        let kind = self.current.take().unwrap();
+        let len = self.tokens.cursor() - self.current_start;
+        self.children.push(NodeData::new(kind, len).into());
+        self.current_start = self.tokens.cursor();
+        self.current = self.tokens.next();
+    }
+
+    /// Take another look at the current token to recheck whether it ends a
+    /// group.
+    fn repeek(&mut self) {
+        self.eof = match &self.current {
+            Some(NodeKind::RightBrace) => self.inside(Group::Brace),
+            Some(NodeKind::RightBracket) => self.inside(Group::Bracket),
+            Some(NodeKind::RightParen) => self.inside(Group::Paren),
+            Some(NodeKind::Star) => self.inside(Group::Strong),
+            Some(NodeKind::Underscore) => self.inside(Group::Emph),
+            Some(NodeKind::Dollar) => self.inside(Group::Math),
+            Some(NodeKind::Semicolon) => self.inside(Group::Expr),
+            Some(NodeKind::From) => self.inside(Group::Imports),
+            Some(NodeKind::Space { newlines }) => self.space_ends_group(*newlines),
+            Some(_) => false,
+            None => true,
+        };
+    }
+
+    /// Returns whether the given type can be skipped over.
+    fn is_trivia(&self, token: &NodeKind) -> bool {
+        match token {
+            NodeKind::Space { newlines } => !self.space_ends_group(*newlines),
+            NodeKind::LineComment => true,
+            NodeKind::BlockComment => true,
+            _ => false,
+        }
+    }
+
+    /// Whether a space with the given number of newlines ends the current group.
+    fn space_ends_group(&self, n: usize) -> bool {
+        if n == 0 {
+            return false;
+        }
+
+        match self.groups.last().map(|group| group.kind) {
+            Some(Group::Strong | Group::Emph) => n >= 2,
+            Some(Group::Imports) => n >= 1,
+            Some(Group::Expr) if n >= 1 => {
+                // Allow else and method call to continue on next line.
+                self.groups.iter().nth_back(1).map(|group| group.kind)
+                    != Some(Group::Brace)
+                    || !matches!(
+                        self.tokens.clone().next(),
+                        Some(NodeKind::Else | NodeKind::Dot)
+                    )
+            }
+            _ => false,
+        }
+    }
+
+    /// Whether we are inside the given group (can be nested).
+    fn inside(&self, kind: Group) -> bool {
+        self.groups
+            .iter()
+            .rev()
+            .take_while(|g| !kind.is_weak() || g.kind.is_weak())
+            .any(|g| g.kind == kind)
+    }
+}
+
+/// Error handling.
+impl Parser<'_> {
+    /// Eat the current token and add an error that it is unexpected.
+    pub fn unexpected(&mut self) {
+        if let Some(found) = self.peek() {
+            let msg = format_eco!("unexpected {}", found.name());
+            let error = NodeKind::Error(ErrorPos::Full, msg);
+            self.perform(error, Self::eat);
+        }
+    }
+
+    /// Add an error that the `thing` was expected at the end of the last
+    /// non-trivia token.
+    pub fn expected(&mut self, thing: &str) {
+        self.expected_at(self.trivia_start(), thing);
+    }
+
+    /// Insert an error message that `what` was expected at the marker position.
+    pub fn expected_at(&mut self, marker: Marker, what: &str) {
+        let msg = format_eco!("expected {}", what);
+        let error = NodeKind::Error(ErrorPos::Full, msg);
+        self.children.insert(marker.0, NodeData::new(error, 0).into());
+    }
+
+    /// Eat the current token and add an error that it is not the expected
+    /// `thing`.
+    pub fn expected_found(&mut self, thing: &str) {
+        match self.peek() {
+            Some(found) => {
+                let msg = format_eco!("expected {}, found {}", thing, found.name());
+                let error = NodeKind::Error(ErrorPos::Full, msg);
+                self.perform(error, Self::eat);
+            }
+            None => self.expected(thing),
+        }
+    }
+}
+
+/// Marks a location in a parser's child list.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub struct Marker(usize);
+
+impl Marker {
+    /// Peek at the child directly before the marker.
+    pub fn before<'a>(self, p: &'a Parser) -> Option<&'a SyntaxNode> {
+        p.children.get(self.0.checked_sub(1)?)
+    }
+
+    /// Peek at the child directly after the marker.
+    pub fn after<'a>(self, p: &'a Parser) -> Option<&'a SyntaxNode> {
+        p.children.get(self.0)
+    }
+
+    /// Convert the child directly after marker.
+    pub fn convert(self, p: &mut Parser, kind: NodeKind) {
+        if let Some(child) = p.children.get_mut(self.0) {
+            child.convert(kind);
+        }
+    }
+
+    /// Perform a subparse that wraps all children after the marker in a node
+    /// with the given kind.
+    pub fn perform<T, F>(self, p: &mut Parser, kind: NodeKind, f: F) -> T
+    where
+        F: FnOnce(&mut Parser) -> T,
+    {
+        let success = f(p);
+        self.end(p, kind);
+        success
+    }
+
+    /// Wrap all children after the marker (excluding trailing trivia) in a node
+    /// with the given `kind`.
+    pub fn end(self, p: &mut Parser, kind: NodeKind) {
+        let until = p.trivia_start().0.max(self.0);
+        let children = p.children.drain(self.0 .. until).collect();
+        p.children
+            .insert(self.0, InnerNode::with_children(kind, children).into());
+    }
+
+    /// Wrap all children that do not fulfill the predicate in error nodes.
+    pub fn filter_children<F>(self, p: &mut Parser, mut f: F)
+    where
+        F: FnMut(&SyntaxNode) -> Result<(), &'static str>,
+    {
+        for child in &mut p.children[self.0 ..] {
+            // Don't expose errors.
+            if child.kind().is_error() {
+                continue;
+            }
+
+            // Don't expose trivia in code.
+            if p.tokens.mode() != TokenMode::Markup && child.kind().is_trivia() {
+                continue;
+            }
+
+            if let Err(msg) = f(child) {
+                let mut msg = EcoString::from(msg);
+                if msg.starts_with("expected") {
+                    msg.push_str(", found ");
+                    msg.push_str(child.kind().name());
+                }
+                let error = NodeKind::Error(ErrorPos::Full, msg);
+                let inner = mem::take(child);
+                *child = InnerNode::with_child(error, inner).into();
+            }
+        }
+    }
+}
+
+/// A logical group of tokens, e.g. `[...]`.
+#[derive(Debug)]
+struct GroupEntry {
+    /// The kind of group this is. This decides which token(s) will end the
+    /// group. For example, a [`Group::Paren`] will be ended by
+    /// [`Token::RightParen`].
+    pub kind: Group,
+    /// The mode the parser was in _before_ the group started (to which we go
+    /// back once the group ends).
+    pub prev_mode: TokenMode,
+}
+
+/// A group, confined by optional start and end delimiters.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum Group {
+    /// A curly-braced group: `{...}`.
+    Brace,
+    /// A bracketed group: `[...]`.
+    Bracket,
+    /// A parenthesized group: `(...)`.
+    Paren,
+    /// A group surrounded with stars: `*...*`.
+    Strong,
+    /// A group surrounded with underscore: `_..._`.
+    Emph,
+    /// A group surrounded by dollar signs: `$...$`.
+    Math,
+    /// A group ended by a semicolon or a line break: `;`, `\n`.
+    Expr,
+    /// A group for import items, ended by a semicolon, line break or `from`.
+    Imports,
+}
+
+impl Group {
+    /// Whether the group can only force other weak groups to end.
+    fn is_weak(self) -> bool {
+        matches!(self, Group::Strong | Group::Emph)
+    }
+}
+
+/// Allows parser methods to use the try operator. Never returned top-level
+/// because the parser recovers from all errors.
+pub type ParseResult<T = ()> = Result<T, ParseError>;
+
+/// The error type for parsing.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub struct ParseError;
+
+impl Display for ParseError {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        f.pad("failed to parse")
+    }
+}
+
+impl std::error::Error for ParseError {}
diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs
new file mode 100644
index 00000000..10b4c4c2
--- /dev/null
+++ b/src/syntax/parsing.rs
@@ -0,0 +1,1140 @@
+use std::collections::HashSet;
+
+use super::ast::{Assoc, BinOp, UnOp};
+use super::{
+    ErrorPos, Group, Marker, NodeKind, ParseError, ParseResult, Parser, SyntaxNode,
+    TokenMode,
+};
+use crate::util::EcoString;
+
+/// Parse a source file.
+pub fn parse(text: &str) -> SyntaxNode {
+    let mut p = Parser::new(text, TokenMode::Markup);
+    markup(&mut p, true);
+    p.finish().into_iter().next().unwrap()
+}
+
+/// Parse code directly, only used for syntax highlighting.
+pub fn parse_code(text: &str) -> SyntaxNode {
+    let mut p = Parser::new(text, TokenMode::Code);
+    p.perform(NodeKind::CodeBlock, code);
+    p.finish().into_iter().next().unwrap()
+}
+
+/// Reparse a code block.
+///
+/// Returns `Some` if all of the input was consumed.
+pub(crate) fn reparse_code_block(
+    prefix: &str,
+    text: &str,
+    end_pos: usize,
+) -> Option<(Vec<SyntaxNode>, bool, usize)> {
+    let mut p = Parser::with_prefix(prefix, text, TokenMode::Code);
+    if !p.at(NodeKind::LeftBrace) {
+        return None;
+    }
+
+    code_block(&mut p);
+
+    let (mut node, terminated) = p.consume()?;
+    let first = node.remove(0);
+    if first.len() != end_pos {
+        return None;
+    }
+
+    Some((vec![first], terminated, 1))
+}
+
+/// Reparse a content block.
+///
+/// Returns `Some` if all of the input was consumed.
+pub(crate) fn reparse_content_block(
+    prefix: &str,
+    text: &str,
+    end_pos: usize,
+) -> Option<(Vec<SyntaxNode>, bool, usize)> {
+    let mut p = Parser::with_prefix(prefix, text, TokenMode::Code);
+    if !p.at(NodeKind::LeftBracket) {
+        return None;
+    }
+
+    content_block(&mut p);
+
+    let (mut node, terminated) = p.consume()?;
+    let first = node.remove(0);
+    if first.len() != end_pos {
+        return None;
+    }
+
+    Some((vec![first], terminated, 1))
+}
+
+/// Reparse a sequence markup elements without the topmost node.
+///
+/// Returns `Some` if all of the input was consumed.
+pub(crate) fn reparse_markup_elements(
+    prefix: &str,
+    text: &str,
+    end_pos: usize,
+    differential: isize,
+    reference: &[SyntaxNode],
+    mut at_start: bool,
+    min_indent: usize,
+) -> Option<(Vec<SyntaxNode>, bool, usize)> {
+    let mut p = Parser::with_prefix(prefix, text, TokenMode::Markup);
+
+    let mut node: Option<&SyntaxNode> = None;
+    let mut iter = reference.iter();
+    let mut offset = differential;
+    let mut replaced = 0;
+    let mut stopped = false;
+
+    'outer: while !p.eof() {
+        if let Some(NodeKind::Space { newlines: (1 ..) }) = p.peek() {
+            if p.column(p.current_end()) < min_indent {
+                return None;
+            }
+        }
+
+        markup_node(&mut p, &mut at_start);
+
+        if p.prev_end() <= end_pos {
+            continue;
+        }
+
+        let recent = p.marker().before(&p).unwrap();
+        let recent_start = p.prev_end() - recent.len();
+
+        while offset <= recent_start as isize {
+            if let Some(node) = node {
+                // The nodes are equal, at the same position and have the
+                // same content. The parsing trees have converged again, so
+                // the reparse may stop here.
+                if offset == recent_start as isize && node == recent {
+                    replaced -= 1;
+                    stopped = true;
+                    break 'outer;
+                }
+            }
+
+            if let Some(node) = node {
+                offset += node.len() as isize;
+            }
+
+            node = iter.next();
+            if node.is_none() {
+                break;
+            }
+
+            replaced += 1;
+        }
+    }
+
+    if p.eof() && !stopped {
+        replaced = reference.len();
+    }
+
+    let (mut res, terminated) = p.consume()?;
+    if stopped {
+        res.pop().unwrap();
+    }
+
+    Some((res, terminated, replaced))
+}
+
+/// Parse markup.
+///
+/// If `at_start` is true, things like headings that may only appear at the
+/// beginning of a line or content block are initially allowed.
+fn markup(p: &mut Parser, mut at_start: bool) {
+    p.perform(NodeKind::Markup { min_indent: 0 }, |p| {
+        while !p.eof() {
+            markup_node(p, &mut at_start);
+        }
+    });
+}
+
+/// Parse markup that stays right of the given `column`.
+fn markup_indented(p: &mut Parser, min_indent: usize) {
+    p.eat_while(|t| match t {
+        NodeKind::Space { newlines } => *newlines == 0,
+        NodeKind::LineComment | NodeKind::BlockComment => true,
+        _ => false,
+    });
+
+    let marker = p.marker();
+    let mut at_start = false;
+
+    while !p.eof() {
+        match p.peek() {
+            Some(NodeKind::Space { newlines: (1 ..) })
+                if p.column(p.current_end()) < min_indent =>
+            {
+                break;
+            }
+            _ => {}
+        }
+
+        markup_node(p, &mut at_start);
+    }
+
+    marker.end(p, NodeKind::Markup { min_indent });
+}
+
+/// Parse a line of markup that can prematurely end if `f` returns true.
+fn markup_line<F>(p: &mut Parser, mut f: F)
+where
+    F: FnMut(&NodeKind) -> bool,
+{
+    p.eat_while(|t| match t {
+        NodeKind::Space { newlines } => *newlines == 0,
+        NodeKind::LineComment | NodeKind::BlockComment => true,
+        _ => false,
+    });
+
+    p.perform(NodeKind::Markup { min_indent: usize::MAX }, |p| {
+        let mut at_start = false;
+        while let Some(kind) = p.peek() {
+            if let NodeKind::Space { newlines: (1 ..) } = kind {
+                break;
+            }
+
+            if f(kind) {
+                break;
+            }
+
+            markup_node(p, &mut at_start);
+        }
+    });
+}
+
+/// Parse a markup node.
+fn markup_node(p: &mut Parser, at_start: &mut bool) {
+    let token = match p.peek() {
+        Some(t) => t,
+        None => return,
+    };
+
+    match token {
+        // Whitespace.
+        NodeKind::Space { newlines } => {
+            *at_start |= *newlines > 0;
+            p.eat();
+            return;
+        }
+
+        // Comments.
+        NodeKind::LineComment | NodeKind::BlockComment => {
+            p.eat();
+            return;
+        }
+
+        // Text and markup.
+        NodeKind::Text(_)
+        | NodeKind::Linebreak
+        | NodeKind::SmartQuote { .. }
+        | NodeKind::Escape(_)
+        | NodeKind::Shorthand(_)
+        | NodeKind::Link(_)
+        | NodeKind::Raw(_)
+        | NodeKind::Label(_)
+        | NodeKind::Ref(_) => p.eat(),
+
+        // Math.
+        NodeKind::Dollar => math(p),
+
+        // Strong, emph, heading.
+        NodeKind::Star => strong(p),
+        NodeKind::Underscore => emph(p),
+        NodeKind::Eq => heading(p, *at_start),
+
+        // Lists.
+        NodeKind::Minus => list_node(p, *at_start),
+        NodeKind::Plus | NodeKind::EnumNumbering(_) => enum_node(p, *at_start),
+        NodeKind::Slash => {
+            desc_node(p, *at_start).ok();
+        }
+        NodeKind::Colon => {
+            let marker = p.marker();
+            p.eat();
+            marker.convert(p, NodeKind::Text(':'.into()));
+        }
+
+        // Hashtag + keyword / identifier.
+        NodeKind::Ident(_)
+        | NodeKind::Let
+        | NodeKind::Set
+        | NodeKind::Show
+        | NodeKind::Wrap
+        | NodeKind::If
+        | NodeKind::While
+        | NodeKind::For
+        | NodeKind::Import
+        | NodeKind::Include
+        | NodeKind::Break
+        | NodeKind::Continue
+        | NodeKind::Return => markup_expr(p),
+
+        // Code and content block.
+        NodeKind::LeftBrace => code_block(p),
+        NodeKind::LeftBracket => content_block(p),
+
+        NodeKind::Error(_, _) => p.eat(),
+        _ => p.unexpected(),
+    };
+
+    *at_start = false;
+}
+
+/// Parse strong content.
+fn strong(p: &mut Parser) {
+    p.perform(NodeKind::Strong, |p| {
+        p.start_group(Group::Strong);
+        markup(p, false);
+        p.end_group();
+    })
+}
+
+/// Parse emphasized content.
+fn emph(p: &mut Parser) {
+    p.perform(NodeKind::Emph, |p| {
+        p.start_group(Group::Emph);
+        markup(p, false);
+        p.end_group();
+    })
+}
+
+/// Parse a heading.
+fn heading(p: &mut Parser, at_start: bool) {
+    let marker = p.marker();
+    let current_start = p.current_start();
+    p.assert(NodeKind::Eq);
+    while p.eat_if(NodeKind::Eq) {}
+
+    if at_start && p.peek().map_or(true, |kind| kind.is_space()) {
+        p.eat_while(|kind| *kind == NodeKind::Space { newlines: 0 });
+        markup_line(p, |kind| matches!(kind, NodeKind::Label(_)));
+        marker.end(p, NodeKind::Heading);
+    } else {
+        let text = p.get(current_start .. p.prev_end()).into();
+        marker.convert(p, NodeKind::Text(text));
+    }
+}
+
+/// Parse a single list item.
+fn list_node(p: &mut Parser, at_start: bool) {
+    let marker = p.marker();
+    let text: EcoString = p.peek_src().into();
+    p.assert(NodeKind::Minus);
+
+    let min_indent = p.column(p.prev_end());
+    if at_start && p.eat_if(NodeKind::Space { newlines: 0 }) && !p.eof() {
+        markup_indented(p, min_indent);
+        marker.end(p, NodeKind::ListItem);
+    } else {
+        marker.convert(p, NodeKind::Text(text));
+    }
+}
+
+/// Parse a single enum item.
+fn enum_node(p: &mut Parser, at_start: bool) {
+    let marker = p.marker();
+    let text: EcoString = p.peek_src().into();
+    p.eat();
+
+    let min_indent = p.column(p.prev_end());
+    if at_start && p.eat_if(NodeKind::Space { newlines: 0 }) && !p.eof() {
+        markup_indented(p, min_indent);
+        marker.end(p, NodeKind::EnumItem);
+    } else {
+        marker.convert(p, NodeKind::Text(text));
+    }
+}
+
+/// Parse a single description list item.
+fn desc_node(p: &mut Parser, at_start: bool) -> ParseResult {
+    let marker = p.marker();
+    let text: EcoString = p.peek_src().into();
+    p.eat();
+
+    let min_indent = p.column(p.prev_end());
+    if at_start && p.eat_if(NodeKind::Space { newlines: 0 }) && !p.eof() {
+        markup_line(p, |node| matches!(node, NodeKind::Colon));
+        p.expect(NodeKind::Colon)?;
+        markup_indented(p, min_indent);
+        marker.end(p, NodeKind::DescItem);
+    } else {
+        marker.convert(p, NodeKind::Text(text));
+    }
+
+    Ok(())
+}
+
+/// Parse an expression within a markup mode.
+fn markup_expr(p: &mut Parser) {
+    // Does the expression need termination or can content follow directly?
+    let stmt = matches!(
+        p.peek(),
+        Some(
+            NodeKind::Let
+                | NodeKind::Set
+                | NodeKind::Show
+                | NodeKind::Wrap
+                | NodeKind::Import
+                | NodeKind::Include
+        )
+    );
+
+    p.start_group(Group::Expr);
+    let res = expr_prec(p, true, 0);
+    if stmt && res.is_ok() && !p.eof() {
+        p.expected("semicolon or line break");
+    }
+    p.end_group();
+}
+
+/// Parse math.
+fn math(p: &mut Parser) {
+    p.perform(NodeKind::Math, |p| {
+        p.start_group(Group::Math);
+        while !p.eof() {
+            math_node(p);
+        }
+        p.end_group();
+    });
+}
+
+/// Parse a math node.
+fn math_node(p: &mut Parser) {
+    math_node_prec(p, 0, None)
+}
+
+/// Parse a math node with operators having at least the minimum precedence.
+fn math_node_prec(p: &mut Parser, min_prec: usize, stop: Option<NodeKind>) {
+    let marker = p.marker();
+    math_primary(p);
+
+    loop {
+        let (kind, mut prec, assoc, stop) = match p.peek() {
+            v if v == stop.as_ref() => break,
+            Some(NodeKind::Underscore) => {
+                (NodeKind::Script, 2, Assoc::Right, Some(NodeKind::Hat))
+            }
+            Some(NodeKind::Hat) => (
+                NodeKind::Script,
+                2,
+                Assoc::Right,
+                Some(NodeKind::Underscore),
+            ),
+            Some(NodeKind::Slash) => (NodeKind::Frac, 1, Assoc::Left, None),
+            _ => break,
+        };
+
+        if prec < min_prec {
+            break;
+        }
+
+        match assoc {
+            Assoc::Left => prec += 1,
+            Assoc::Right => {}
+        }
+
+        p.eat();
+        math_node_prec(p, prec, stop);
+
+        // Allow up to two different scripts. We do not risk encountering the
+        // previous script kind again here due to right-associativity.
+        if p.eat_if(NodeKind::Underscore) || p.eat_if(NodeKind::Hat) {
+            math_node_prec(p, prec, None);
+        }
+
+        marker.end(p, kind);
+    }
+}
+
+/// Parse a primary math node.
+fn math_primary(p: &mut Parser) {
+    let token = match p.peek() {
+        Some(t) => t,
+        None => return,
+    };
+
+    match token {
+        // Spaces, atoms and expressions.
+        NodeKind::Space { .. }
+        | NodeKind::Linebreak
+        | NodeKind::Escape(_)
+        | NodeKind::Atom(_)
+        | NodeKind::Ident(_) => p.eat(),
+
+        // Groups.
+        NodeKind::LeftParen => group(p, Group::Paren, '(', ')'),
+        NodeKind::LeftBracket => group(p, Group::Bracket, '[', ']'),
+        NodeKind::LeftBrace => group(p, Group::Brace, '{', '}'),
+
+        // Alignment indactor.
+        NodeKind::Amp => align(p),
+
+        _ => p.unexpected(),
+    }
+}
+
+/// Parse grouped math.
+fn group(p: &mut Parser, group: Group, l: char, r: char) {
+    p.perform(NodeKind::Math, |p| {
+        let marker = p.marker();
+        p.start_group(group);
+        marker.convert(p, NodeKind::Atom(l.into()));
+        while !p.eof() {
+            math_node(p);
+        }
+        let marker = p.marker();
+        p.end_group();
+        marker.convert(p, NodeKind::Atom(r.into()));
+    })
+}
+
+/// Parse an alignment indicator.
+fn align(p: &mut Parser) {
+    p.perform(NodeKind::Align, |p| {
+        p.assert(NodeKind::Amp);
+        while p.eat_if(NodeKind::Amp) {}
+    })
+}
+
+/// Parse an expression.
+fn expr(p: &mut Parser) -> ParseResult {
+    expr_prec(p, false, 0)
+}
+
+/// Parse an expression with operators having at least the minimum precedence.
+///
+/// If `atomic` is true, this does not parse binary operations and arrow
+/// functions, which is exactly what we want in a shorthand expression directly
+/// in markup.
+///
+/// Stops parsing at operations with lower precedence than `min_prec`,
+fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
+    let marker = p.marker();
+
+    // Start the unary expression.
+    match p.peek().and_then(UnOp::from_token) {
+        Some(op) if !atomic => {
+            p.eat();
+            let prec = op.precedence();
+            expr_prec(p, atomic, prec)?;
+            marker.end(p, NodeKind::Unary);
+        }
+        _ => primary(p, atomic)?,
+    };
+
+    loop {
+        // Parenthesis or bracket means this is a function call.
+        if let Some(NodeKind::LeftParen | NodeKind::LeftBracket) = p.peek_direct() {
+            marker.perform(p, NodeKind::FuncCall, args)?;
+            continue;
+        }
+
+        if atomic {
+            break;
+        }
+
+        // Method call or field access.
+        if p.eat_if(NodeKind::Dot) {
+            ident(p)?;
+            if let Some(NodeKind::LeftParen | NodeKind::LeftBracket) = p.peek_direct() {
+                marker.perform(p, NodeKind::MethodCall, args)?;
+            } else {
+                marker.end(p, NodeKind::FieldAccess);
+            }
+            continue;
+        }
+
+        let op = if p.eat_if(NodeKind::Not) {
+            if p.at(NodeKind::In) {
+                BinOp::NotIn
+            } else {
+                p.expected("keyword `in`");
+                return Err(ParseError);
+            }
+        } else {
+            match p.peek().and_then(BinOp::from_token) {
+                Some(binop) => binop,
+                None => break,
+            }
+        };
+
+        let mut prec = op.precedence();
+        if prec < min_prec {
+            break;
+        }
+
+        p.eat();
+
+        match op.assoc() {
+            Assoc::Left => prec += 1,
+            Assoc::Right => {}
+        }
+
+        marker.perform(p, NodeKind::Binary, |p| expr_prec(p, atomic, prec))?;
+    }
+
+    Ok(())
+}
+
+/// Parse a primary expression.
+fn primary(p: &mut Parser, atomic: bool) -> ParseResult {
+    if literal(p) {
+        return Ok(());
+    }
+
+    match p.peek() {
+        // Things that start with an identifier.
+        Some(NodeKind::Ident(_)) => {
+            let marker = p.marker();
+            p.eat();
+
+            // Arrow means this is a closure's lone parameter.
+            if !atomic && p.at(NodeKind::Arrow) {
+                marker.end(p, NodeKind::Params);
+                p.assert(NodeKind::Arrow);
+                marker.perform(p, NodeKind::Closure, expr)
+            } else {
+                Ok(())
+            }
+        }
+
+        // Structures.
+        Some(NodeKind::LeftParen) => parenthesized(p, atomic),
+        Some(NodeKind::LeftBrace) => Ok(code_block(p)),
+        Some(NodeKind::LeftBracket) => Ok(content_block(p)),
+
+        // Keywords.
+        Some(NodeKind::Let) => let_expr(p),
+        Some(NodeKind::Set) => set_expr(p),
+        Some(NodeKind::Show) => show_expr(p),
+        Some(NodeKind::Wrap) => wrap_expr(p),
+        Some(NodeKind::If) => if_expr(p),
+        Some(NodeKind::While) => while_expr(p),
+        Some(NodeKind::For) => for_expr(p),
+        Some(NodeKind::Import) => import_expr(p),
+        Some(NodeKind::Include) => include_expr(p),
+        Some(NodeKind::Break) => break_expr(p),
+        Some(NodeKind::Continue) => continue_expr(p),
+        Some(NodeKind::Return) => return_expr(p),
+
+        Some(NodeKind::Error(_, _)) => {
+            p.eat();
+            Err(ParseError)
+        }
+
+        // Nothing.
+        _ => {
+            p.expected_found("expression");
+            Err(ParseError)
+        }
+    }
+}
+
+/// Parse a literal.
+fn literal(p: &mut Parser) -> bool {
+    match p.peek() {
+        // Basic values.
+        Some(
+            NodeKind::None
+            | NodeKind::Auto
+            | NodeKind::Int(_)
+            | NodeKind::Float(_)
+            | NodeKind::Bool(_)
+            | NodeKind::Numeric(_, _)
+            | NodeKind::Str(_),
+        ) => {
+            p.eat();
+            true
+        }
+
+        _ => false,
+    }
+}
+
+/// Parse an identifier.
+fn ident(p: &mut Parser) -> ParseResult {
+    match p.peek() {
+        Some(NodeKind::Ident(_)) => {
+            p.eat();
+            Ok(())
+        }
+        _ => {
+            p.expected_found("identifier");
+            Err(ParseError)
+        }
+    }
+}
+
+/// Parse something that starts with a parenthesis, which can be either of:
+/// - Array literal
+/// - Dictionary literal
+/// - Parenthesized expression
+/// - Parameter list of closure expression
+fn parenthesized(p: &mut Parser, atomic: bool) -> ParseResult {
+    let marker = p.marker();
+
+    p.start_group(Group::Paren);
+    let colon = p.eat_if(NodeKind::Colon);
+    let kind = collection(p, true).0;
+    p.end_group();
+
+    // Leading colon makes this a dictionary.
+    if colon {
+        dict(p, marker);
+        return Ok(());
+    }
+
+    // Arrow means this is a closure's parameter list.
+    if !atomic && p.at(NodeKind::Arrow) {
+        params(p, marker);
+        p.assert(NodeKind::Arrow);
+        return marker.perform(p, NodeKind::Closure, expr);
+    }
+
+    // Transform into the identified collection.
+    match kind {
+        CollectionKind::Group => marker.end(p, NodeKind::Parenthesized),
+        CollectionKind::Positional => array(p, marker),
+        CollectionKind::Named => dict(p, marker),
+    }
+
+    Ok(())
+}
+
+/// The type of a collection.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+enum CollectionKind {
+    /// The collection is only one item and has no comma.
+    Group,
+    /// The collection starts with a positional item and has multiple items or a
+    /// trailing comma.
+    Positional,
+    /// The collection starts with a colon or named item.
+    Named,
+}
+
+/// Parse a collection.
+///
+/// Returns the length of the collection and whether the literal contained any
+/// commas.
+fn collection(p: &mut Parser, keyed: bool) -> (CollectionKind, usize) {
+    let mut kind = None;
+    let mut items = 0;
+    let mut can_group = true;
+    let mut missing_coma: Option<Marker> = None;
+
+    while !p.eof() {
+        if let Ok(item_kind) = item(p, keyed) {
+            match item_kind {
+                NodeKind::Spread => can_group = false,
+                NodeKind::Named if kind.is_none() => {
+                    kind = Some(CollectionKind::Named);
+                    can_group = false;
+                }
+                _ if kind.is_none() => {
+                    kind = Some(CollectionKind::Positional);
+                }
+                _ => {}
+            }
+
+            items += 1;
+
+            if let Some(marker) = missing_coma.take() {
+                p.expected_at(marker, "comma");
+            }
+
+            if p.eof() {
+                break;
+            }
+
+            if p.eat_if(NodeKind::Comma) {
+                can_group = false;
+            } else {
+                missing_coma = Some(p.trivia_start());
+            }
+        } else {
+            p.eat_if(NodeKind::Comma);
+            kind = Some(CollectionKind::Group);
+        }
+    }
+
+    let kind = if can_group && items == 1 {
+        CollectionKind::Group
+    } else {
+        kind.unwrap_or(CollectionKind::Positional)
+    };
+
+    (kind, items)
+}
+
+/// Parse an expression or a named pair, returning whether it's a spread or a
+/// named pair.
+fn item(p: &mut Parser, keyed: bool) -> ParseResult<NodeKind> {
+    let marker = p.marker();
+    if p.eat_if(NodeKind::Dots) {
+        marker.perform(p, NodeKind::Spread, expr)?;
+        return Ok(NodeKind::Spread);
+    }
+
+    expr(p)?;
+
+    if p.at(NodeKind::Colon) {
+        match marker.after(p).map(|c| c.kind()) {
+            Some(NodeKind::Ident(_)) => {
+                p.eat();
+                marker.perform(p, NodeKind::Named, expr)?;
+            }
+            Some(NodeKind::Str(_)) if keyed => {
+                p.eat();
+                marker.perform(p, NodeKind::Keyed, expr)?;
+            }
+            kind => {
+                let mut msg = EcoString::from("expected identifier");
+                if keyed {
+                    msg.push_str(" or string");
+                }
+                if let Some(kind) = kind {
+                    msg.push_str(", found ");
+                    msg.push_str(kind.name());
+                }
+                let error = NodeKind::Error(ErrorPos::Full, msg);
+                marker.end(p, error);
+                p.eat();
+                marker.perform(p, NodeKind::Named, expr).ok();
+                return Err(ParseError);
+            }
+        }
+
+        Ok(NodeKind::Named)
+    } else {
+        Ok(NodeKind::None)
+    }
+}
+
+/// Convert a collection into an array, producing errors for anything other than
+/// expressions.
+fn array(p: &mut Parser, marker: Marker) {
+    marker.filter_children(p, |x| match x.kind() {
+        NodeKind::Named | NodeKind::Keyed => Err("expected expression"),
+        _ => Ok(()),
+    });
+    marker.end(p, NodeKind::Array);
+}
+
+/// Convert a collection into a dictionary, producing errors for anything other
+/// than named and keyed pairs.
+fn dict(p: &mut Parser, marker: Marker) {
+    let mut used = HashSet::new();
+    marker.filter_children(p, |x| match x.kind() {
+        kind if kind.is_paren() => Ok(()),
+        NodeKind::Named | NodeKind::Keyed => {
+            if let Some(NodeKind::Ident(key) | NodeKind::Str(key)) =
+                x.children().next().map(|child| child.kind())
+            {
+                if !used.insert(key.clone()) {
+                    return Err("pair has duplicate key");
+                }
+            }
+            Ok(())
+        }
+        NodeKind::Spread | NodeKind::Comma | NodeKind::Colon => Ok(()),
+        _ => Err("expected named or keyed pair"),
+    });
+    marker.end(p, NodeKind::Dict);
+}
+
+/// Convert a collection into a list of parameters, producing errors for
+/// anything other than identifiers, spread operations and named pairs.
+fn params(p: &mut Parser, marker: Marker) {
+    marker.filter_children(p, |x| match x.kind() {
+        kind if kind.is_paren() => Ok(()),
+        NodeKind::Named | NodeKind::Ident(_) | NodeKind::Comma => Ok(()),
+        NodeKind::Spread
+            if matches!(
+                x.children().last().map(|child| child.kind()),
+                Some(&NodeKind::Ident(_))
+            ) =>
+        {
+            Ok(())
+        }
+        _ => Err("expected identifier, named pair or argument sink"),
+    });
+    marker.end(p, NodeKind::Params);
+}
+
+/// Parse a code block: `{...}`.
+fn code_block(p: &mut Parser) {
+    p.perform(NodeKind::CodeBlock, |p| {
+        p.start_group(Group::Brace);
+        code(p);
+        p.end_group();
+    });
+}
+
+/// Parse expressions.
+fn code(p: &mut Parser) {
+    while !p.eof() {
+        p.start_group(Group::Expr);
+        if expr(p).is_ok() && !p.eof() {
+            p.expected("semicolon or line break");
+        }
+        p.end_group();
+
+        // Forcefully skip over newlines since the group's contents can't.
+        p.eat_while(NodeKind::is_space);
+    }
+}
+
+/// Parse a content block: `[...]`.
+fn content_block(p: &mut Parser) {
+    p.perform(NodeKind::ContentBlock, |p| {
+        p.start_group(Group::Bracket);
+        markup(p, true);
+        p.end_group();
+    });
+}
+
+/// Parse the arguments to a function call.
+fn args(p: &mut Parser) -> ParseResult {
+    match p.peek_direct() {
+        Some(NodeKind::LeftParen) => {}
+        Some(NodeKind::LeftBracket) => {}
+        _ => {
+            p.expected_found("argument list");
+            return Err(ParseError);
+        }
+    }
+
+    p.perform(NodeKind::Args, |p| {
+        if p.at(NodeKind::LeftParen) {
+            let marker = p.marker();
+            p.start_group(Group::Paren);
+            collection(p, false);
+            p.end_group();
+
+            let mut used = HashSet::new();
+            marker.filter_children(p, |x| match x.kind() {
+                NodeKind::Named => {
+                    if let Some(NodeKind::Ident(ident)) =
+                        x.children().next().map(|child| child.kind())
+                    {
+                        if !used.insert(ident.clone()) {
+                            return Err("duplicate argument");
+                        }
+                    }
+                    Ok(())
+                }
+                _ => Ok(()),
+            });
+        }
+
+        while p.peek_direct() == Some(&NodeKind::LeftBracket) {
+            content_block(p);
+        }
+    });
+
+    Ok(())
+}
+
+/// Parse a let expression.
+fn let_expr(p: &mut Parser) -> ParseResult {
+    p.perform(NodeKind::LetBinding, |p| {
+        p.assert(NodeKind::Let);
+
+        let marker = p.marker();
+        ident(p)?;
+
+        // If a parenthesis follows, this is a function definition.
+        let has_params = p.peek_direct() == Some(&NodeKind::LeftParen);
+        if has_params {
+            let marker = p.marker();
+            p.start_group(Group::Paren);
+            collection(p, false);
+            p.end_group();
+            params(p, marker);
+        }
+
+        if p.eat_if(NodeKind::Eq) {
+            expr(p)?;
+        } else if has_params {
+            // Function definitions must have a body.
+            p.expected("body");
+        }
+
+        // Rewrite into a closure expression if it's a function definition.
+        if has_params {
+            marker.end(p, NodeKind::Closure);
+        }
+
+        Ok(())
+    })
+}
+
+/// Parse a set expression.
+fn set_expr(p: &mut Parser) -> ParseResult {
+    p.perform(NodeKind::SetRule, |p| {
+        p.assert(NodeKind::Set);
+        ident(p)?;
+        args(p)
+    })
+}
+
+/// Parse a show expression.
+fn show_expr(p: &mut Parser) -> ParseResult {
+    p.perform(NodeKind::ShowRule, |p| {
+        p.assert(NodeKind::Show);
+        let marker = p.marker();
+        expr(p)?;
+        if p.eat_if(NodeKind::Colon) {
+            marker.filter_children(p, |child| match child.kind() {
+                NodeKind::Ident(_) | NodeKind::Colon => Ok(()),
+                _ => Err("expected identifier"),
+            });
+            expr(p)?;
+        }
+        p.expect(NodeKind::As)?;
+        expr(p)
+    })
+}
+
+/// Parse a wrap expression.
+fn wrap_expr(p: &mut Parser) -> ParseResult {
+    p.perform(NodeKind::WrapRule, |p| {
+        p.assert(NodeKind::Wrap);
+        ident(p)?;
+        p.expect(NodeKind::In)?;
+        expr(p)
+    })
+}
+
+/// Parse an if-else expresion.
+fn if_expr(p: &mut Parser) -> ParseResult {
+    p.perform(NodeKind::Conditional, |p| {
+        p.assert(NodeKind::If);
+
+        expr(p)?;
+        body(p)?;
+
+        if p.eat_if(NodeKind::Else) {
+            if p.at(NodeKind::If) {
+                if_expr(p)?;
+            } else {
+                body(p)?;
+            }
+        }
+
+        Ok(())
+    })
+}
+
+/// Parse a while expresion.
+fn while_expr(p: &mut Parser) -> ParseResult {
+    p.perform(NodeKind::WhileLoop, |p| {
+        p.assert(NodeKind::While);
+        expr(p)?;
+        body(p)
+    })
+}
+
+/// Parse a for-in expression.
+fn for_expr(p: &mut Parser) -> ParseResult {
+    p.perform(NodeKind::ForLoop, |p| {
+        p.assert(NodeKind::For);
+        for_pattern(p)?;
+        p.expect(NodeKind::In)?;
+        expr(p)?;
+        body(p)
+    })
+}
+
+/// Parse a for loop pattern.
+fn for_pattern(p: &mut Parser) -> ParseResult {
+    p.perform(NodeKind::ForPattern, |p| {
+        ident(p)?;
+        if p.eat_if(NodeKind::Comma) {
+            ident(p)?;
+        }
+        Ok(())
+    })
+}
+
+/// Parse an import expression.
+fn import_expr(p: &mut Parser) -> ParseResult {
+    p.perform(NodeKind::ModuleImport, |p| {
+        p.assert(NodeKind::Import);
+
+        if !p.eat_if(NodeKind::Star) {
+            // This is the list of identifiers scenario.
+            p.perform(NodeKind::ImportItems, |p| {
+                p.start_group(Group::Imports);
+                let marker = p.marker();
+                let items = collection(p, false).1;
+                if items == 0 {
+                    p.expected("import items");
+                }
+                p.end_group();
+
+                marker.filter_children(p, |n| match n.kind() {
+                    NodeKind::Ident(_) | NodeKind::Comma => Ok(()),
+                    _ => Err("expected identifier"),
+                });
+            });
+        };
+
+        p.expect(NodeKind::From)?;
+        expr(p)
+    })
+}
+
+/// Parse an include expression.
+fn include_expr(p: &mut Parser) -> ParseResult {
+    p.perform(NodeKind::ModuleInclude, |p| {
+        p.assert(NodeKind::Include);
+        expr(p)
+    })
+}
+
+/// Parse a break expression.
+fn break_expr(p: &mut Parser) -> ParseResult {
+    p.perform(NodeKind::BreakStmt, |p| {
+        p.assert(NodeKind::Break);
+        Ok(())
+    })
+}
+
+/// Parse a continue expression.
+fn continue_expr(p: &mut Parser) -> ParseResult {
+    p.perform(NodeKind::ContinueStmt, |p| {
+        p.assert(NodeKind::Continue);
+        Ok(())
+    })
+}
+
+/// Parse a return expression.
+fn return_expr(p: &mut Parser) -> ParseResult {
+    p.perform(NodeKind::ReturnStmt, |p| {
+        p.assert(NodeKind::Return);
+        if !p.at(NodeKind::Comma) && !p.eof() {
+            expr(p)?;
+        }
+        Ok(())
+    })
+}
+
+/// Parse a control flow body.
+fn body(p: &mut Parser) -> ParseResult {
+    match p.peek() {
+        Some(NodeKind::LeftBracket) => Ok(content_block(p)),
+        Some(NodeKind::LeftBrace) => Ok(code_block(p)),
+        _ => {
+            p.expected("body");
+            Err(ParseError)
+        }
+    }
+}
diff --git a/src/syntax/resolve.rs b/src/syntax/resolve.rs
new file mode 100644
index 00000000..2ad35cec
--- /dev/null
+++ b/src/syntax/resolve.rs
@@ -0,0 +1,237 @@
+use unscanny::Scanner;
+
+use super::{is_ident, is_newline, RawKind};
+use crate::util::EcoString;
+
+/// Resolve all escape sequences in a string.
+pub fn resolve_string(string: &str) -> EcoString {
+    let mut out = EcoString::with_capacity(string.len());
+    let mut s = Scanner::new(string);
+
+    while let Some(c) = s.eat() {
+        if c != '\\' {
+            out.push(c);
+            continue;
+        }
+
+        let start = s.locate(-1);
+        match s.eat() {
+            Some('\\') => out.push('\\'),
+            Some('"') => out.push('"'),
+            Some('n') => out.push('\n'),
+            Some('r') => out.push('\r'),
+            Some('t') => out.push('\t'),
+            Some('u') if s.eat_if('{') => {
+                // TODO: Error if closing brace is missing.
+                let sequence = s.eat_while(char::is_ascii_hexdigit);
+                let _terminated = s.eat_if('}');
+                match resolve_hex(sequence) {
+                    Some(c) => out.push(c),
+                    None => out.push_str(s.from(start)),
+                }
+            }
+
+            _ => out.push_str(s.from(start)),
+        }
+    }
+
+    out
+}
+
+/// Resolve a hexadecimal escape sequence into a character
+/// (only the inner hex letters without braces or `\u`).
+pub fn resolve_hex(sequence: &str) -> Option<char> {
+    u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32)
+}
+
+/// Resolve the language tag and trim the raw text.
+pub fn resolve_raw(column: usize, backticks: usize, text: &str) -> RawKind {
+    if backticks > 1 {
+        let (tag, inner) = split_at_lang_tag(text);
+        let (text, block) = trim_and_split_raw(column, inner);
+        RawKind {
+            lang: is_ident(tag).then(|| tag.into()),
+            text: text.into(),
+            block,
+        }
+    } else {
+        RawKind {
+            lang: None,
+            text: split_lines(text).join("\n").into(),
+            block: false,
+        }
+    }
+}
+
+/// Parse the lang tag and return it alongside the remaining inner raw text.
+fn split_at_lang_tag(raw: &str) -> (&str, &str) {
+    let mut s = Scanner::new(raw);
+    (
+        s.eat_until(|c: char| c == '`' || c.is_whitespace() || is_newline(c)),
+        s.after(),
+    )
+}
+
+/// Trim raw text and splits it into lines.
+///
+/// Also returns whether at least one newline was contained in `raw`.
+fn trim_and_split_raw(column: usize, mut raw: &str) -> (String, bool) {
+    // Trims one space at the start.
+    raw = raw.strip_prefix(' ').unwrap_or(raw);
+
+    // Trim one space at the end if the last non-whitespace char is a backtick.
+    if raw.trim_end().ends_with('`') {
+        raw = raw.strip_suffix(' ').unwrap_or(raw);
+    }
+
+    let mut lines = split_lines(raw);
+
+    // Dedent based on column, but not for the first line.
+    for line in lines.iter_mut().skip(1) {
+        let offset = line
+            .chars()
+            .take(column)
+            .take_while(|c| c.is_whitespace())
+            .map(char::len_utf8)
+            .sum();
+        *line = &line[offset ..];
+    }
+
+    let had_newline = lines.len() > 1;
+    let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace);
+
+    // Trims a sequence of whitespace followed by a newline at the start.
+    if lines.first().map_or(false, is_whitespace) {
+        lines.remove(0);
+    }
+
+    // Trims a newline followed by a sequence of whitespace at the end.
+    if lines.last().map_or(false, is_whitespace) {
+        lines.pop();
+    }
+
+    (lines.join("\n"), had_newline)
+}
+
+/// Split a string into a vector of lines
+/// (respecting Unicode, Unix, Mac and Windows line breaks).
+fn split_lines(text: &str) -> Vec<&str> {
+    let mut s = Scanner::new(text);
+    let mut lines = Vec::new();
+    let mut start = 0;
+    let mut end = 0;
+
+    while let Some(c) = s.eat() {
+        if is_newline(c) {
+            if c == '\r' {
+                s.eat_if('\n');
+            }
+
+            lines.push(&text[start .. end]);
+            start = s.cursor();
+        }
+        end = s.cursor();
+    }
+
+    lines.push(&text[start ..]);
+    lines
+}
+
+#[cfg(test)]
+#[rustfmt::skip]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_resolve_strings() {
+        #[track_caller]
+        fn test(string: &str, expected: &str) {
+            assert_eq!(resolve_string(string), expected);
+        }
+
+        test(r#"hello world"#,  "hello world");
+        test(r#"hello\nworld"#, "hello\nworld");
+        test(r#"a\"bc"#,        "a\"bc");
+        test(r#"a\u{2603}bc"#,  "a☃bc");
+        test(r#"a\u{26c3bg"#,   "a𦰻g");
+        test(r#"av\u{6797"#,    "av林");
+        test(r#"a\\"#,          "a\\");
+        test(r#"a\\\nbc"#,      "a\\\nbc");
+        test(r#"a\t\r\nbc"#,    "a\t\r\nbc");
+        test(r"🌎",             "🌎");
+        test(r"🌎\",            r"🌎\");
+        test(r"\🌎",            r"\🌎");
+    }
+
+    #[test]
+    fn test_split_at_lang_tag() {
+        #[track_caller]
+        fn test(text: &str, lang: &str, inner: &str) {
+            assert_eq!(split_at_lang_tag(text), (lang, inner));
+        }
+
+        test("typst it!",   "typst", " it!");
+        test("typst\n it!", "typst", "\n it!");
+        test("typst\n it!", "typst", "\n it!");
+        test("abc`",        "abc",   "`");
+        test(" hi",         "",      " hi");
+        test("`",           "",      "`");
+    }
+
+    #[test]
+    fn test_resolve_raw() {
+        #[track_caller]
+        fn test(
+            column: usize,
+            backticks: usize,
+            raw: &str,
+            lang: Option<&str>,
+            text: &str,
+            block: bool,
+        ) {
+            let node = resolve_raw(column, backticks, raw);
+            assert_eq!(node.lang.as_deref(), lang);
+            assert_eq!(node.text, text);
+            assert_eq!(node.block, block);
+        }
+
+        // Just one backtick.
+        test(0, 1, "py",     None, "py",   false);
+        test(0, 1, "1\n2",   None, "1\n2", false);
+        test(0, 1, "1\r\n2", None, "1\n2", false);
+
+        // More than one backtick with lang tag.
+        test(0, 2, "js alert()",    Some("js"), "alert()",    false);
+        test(0, 3, "py quit(\n\n)", Some("py"), "quit(\n\n)", true);
+        test(0, 2, "♥",             None,       "",           false);
+
+        // Trimming of whitespace (tested more thoroughly in separate test).
+        test(0, 2, " a",   None, "a",  false);
+        test(0, 2, "  a",  None, " a", false);
+        test(0, 2, " \na", None, "a",  true);
+
+        // Dedenting
+        test(2, 3, " def foo():\n    bar()", None, "def foo():\n  bar()", true);
+    }
+
+    #[test]
+    fn test_trim_raw() {
+        #[track_caller]
+        fn test(text: &str, expected: &str) {
+            assert_eq!(trim_and_split_raw(0, text).0, expected);
+        }
+
+        test(" hi",          "hi");
+        test("  hi",         " hi");
+        test("\nhi",         "hi");
+        test("    \n hi",    " hi");
+        test("hi` ",         "hi`");
+        test("hi`  ",        "hi` ");
+        test("hi`   ",       "hi`  ");
+        test("hi ",          "hi ");
+        test("hi  ",         "hi  ");
+        test("hi\n",         "hi");
+        test("hi \n   ",     "hi ");
+        test("  \n hi \n  ", " hi ");
+    }
+}
diff --git a/src/syntax/source.rs b/src/syntax/source.rs
new file mode 100644
index 00000000..1b87b1c9
--- /dev/null
+++ b/src/syntax/source.rs
@@ -0,0 +1,430 @@
+//! Source file management.
+
+use std::fmt::{self, Debug, Formatter};
+use std::hash::{Hash, Hasher};
+use std::ops::Range;
+use std::path::{Path, PathBuf};
+
+use comemo::Prehashed;
+use unscanny::Scanner;
+
+use crate::diag::SourceResult;
+use crate::syntax::ast::Markup;
+use crate::syntax::{is_newline, parse, reparse};
+use crate::syntax::{Span, SyntaxNode};
+use crate::util::{PathExt, StrExt};
+
+/// A source file.
+///
+/// All line and column indices start at zero, just like byte indices. Only for
+/// user-facing display, you should add 1 to them.
+pub struct Source {
+    id: SourceId,
+    path: PathBuf,
+    text: Prehashed<String>,
+    lines: Vec<Line>,
+    root: Prehashed<SyntaxNode>,
+}
+
+impl Source {
+    /// Create a new source file.
+    pub fn new(id: SourceId, path: &Path, text: String) -> Self {
+        let lines = std::iter::once(Line { byte_idx: 0, utf16_idx: 0 })
+            .chain(lines(0, 0, &text))
+            .collect();
+
+        let mut root = parse(&text);
+        root.numberize(id, Span::FULL).unwrap();
+
+        Self {
+            id,
+            path: path.normalize(),
+            text: Prehashed::new(text),
+            lines,
+            root: Prehashed::new(root),
+        }
+    }
+
+    /// Create a source file without a real id and path, usually for testing.
+    pub fn detached(text: impl Into<String>) -> Self {
+        Self::new(SourceId::detached(), Path::new(""), text.into())
+    }
+
+    /// Create a source file with the same synthetic span for all nodes.
+    pub fn synthesized(text: impl Into<String>, span: Span) -> Self {
+        let mut file = Self::detached(text);
+        let mut root = file.root.into_inner();
+        root.synthesize(span);
+        file.root = Prehashed::new(root);
+        file.id = span.source();
+        file
+    }
+
+    /// The root node of the file's untyped syntax tree.
+    pub fn root(&self) -> &SyntaxNode {
+        &self.root
+    }
+
+    /// The root node of the file's typed abstract syntax tree.
+    pub fn ast(&self) -> SourceResult<Markup> {
+        let errors = self.root.errors();
+        if errors.is_empty() {
+            Ok(self.root.cast().expect("root node must be markup"))
+        } else {
+            Err(Box::new(errors))
+        }
+    }
+
+    /// The id of the source file.
+    pub fn id(&self) -> SourceId {
+        self.id
+    }
+
+    /// The normalized path to the source file.
+    pub fn path(&self) -> &Path {
+        &self.path
+    }
+
+    /// The whole source as a string slice.
+    pub fn text(&self) -> &str {
+        &self.text
+    }
+
+    /// Slice out the part of the source code enclosed by the range.
+    pub fn get(&self, range: Range<usize>) -> Option<&str> {
+        self.text.get(range)
+    }
+
+    /// Fully replace the source text and increase the revision number.
+    pub fn replace(&mut self, text: String) {
+        self.text = Prehashed::new(text);
+        self.lines = vec![Line { byte_idx: 0, utf16_idx: 0 }];
+        self.lines.extend(lines(0, 0, &self.text));
+        let mut root = parse(&self.text);
+        root.numberize(self.id(), Span::FULL).unwrap();
+        self.root = Prehashed::new(root);
+    }
+
+    /// Edit the source file by replacing the given range and increase the
+    /// revision number.
+    ///
+    /// Returns the range in the new source that was ultimately reparsed.
+    ///
+    /// The method panics if the `replace` range is out of bounds.
+    pub fn edit(&mut self, replace: Range<usize>, with: &str) -> Range<usize> {
+        let start_byte = replace.start;
+        let start_utf16 = self.byte_to_utf16(replace.start).unwrap();
+        let mut text = std::mem::take(&mut self.text).into_inner();
+        text.replace_range(replace.clone(), with);
+        self.text = Prehashed::new(text);
+
+        // Remove invalidated line starts.
+        let line = self.byte_to_line(start_byte).unwrap();
+        self.lines.truncate(line + 1);
+
+        // Handle adjoining of \r and \n.
+        if self.text[.. start_byte].ends_with('\r') && with.starts_with('\n') {
+            self.lines.pop();
+        }
+
+        // Recalculate the line starts after the edit.
+        self.lines
+            .extend(lines(start_byte, start_utf16, &self.text[start_byte ..]));
+
+        // Incrementally reparse the replaced range.
+        let mut root = std::mem::take(&mut self.root).into_inner();
+        let range = reparse(&mut root, &self.text, replace, with.len());
+        self.root = Prehashed::new(root);
+        range
+    }
+
+    /// Get the length of the file in UTF-8 encoded bytes.
+    pub fn len_bytes(&self) -> usize {
+        self.text.len()
+    }
+
+    /// Get the length of the file in UTF-16 code units.
+    pub fn len_utf16(&self) -> usize {
+        let last = self.lines.last().unwrap();
+        last.utf16_idx + self.text[last.byte_idx ..].len_utf16()
+    }
+
+    /// Get the length of the file in lines.
+    pub fn len_lines(&self) -> usize {
+        self.lines.len()
+    }
+
+    /// Map a span that points into this source file to a byte range.
+    ///
+    /// Panics if the span does not point into this source file.
+    pub fn range(&self, span: Span) -> Range<usize> {
+        self.root
+            .range(span, 0)
+            .expect("span does not point into this source file")
+    }
+
+    /// Return the index of the UTF-16 code unit at the byte index.
+    pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> {
+        let line_idx = self.byte_to_line(byte_idx)?;
+        let line = self.lines.get(line_idx)?;
+        let head = self.text.get(line.byte_idx .. byte_idx)?;
+        Some(line.utf16_idx + head.len_utf16())
+    }
+
+    /// Return the index of the line that contains the given byte index.
+    pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> {
+        (byte_idx <= self.text.len()).then(|| {
+            match self.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) {
+                Ok(i) => i,
+                Err(i) => i - 1,
+            }
+        })
+    }
+
+    /// Return the index of the column at the byte index.
+    ///
+    /// The column is defined as the number of characters in the line before the
+    /// byte index.
+    pub fn byte_to_column(&self, byte_idx: usize) -> Option<usize> {
+        let line = self.byte_to_line(byte_idx)?;
+        let start = self.line_to_byte(line)?;
+        let head = self.get(start .. byte_idx)?;
+        Some(head.chars().count())
+    }
+
+    /// Return the byte index at the UTF-16 code unit.
+    pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> {
+        let line = self.lines.get(
+            match self.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) {
+                Ok(i) => i,
+                Err(i) => i - 1,
+            },
+        )?;
+
+        let mut k = line.utf16_idx;
+        for (i, c) in self.text[line.byte_idx ..].char_indices() {
+            if k >= utf16_idx {
+                return Some(line.byte_idx + i);
+            }
+            k += c.len_utf16();
+        }
+
+        (k == utf16_idx).then(|| self.text.len())
+    }
+
+
+    /// Return the byte position at which the given line starts.
+    pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> {
+        self.lines.get(line_idx).map(|line| line.byte_idx)
+    }
+
+    /// Return the range which encloses the given line.
+    pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> {
+        let start = self.line_to_byte(line_idx)?;
+        let end = self.line_to_byte(line_idx + 1).unwrap_or(self.text.len());
+        Some(start .. end)
+    }
+
+    /// Return the byte index of the given (line, column) pair.
+    ///
+    /// The column defines the number of characters to go beyond the start of
+    /// the line.
+    pub fn line_column_to_byte(
+        &self,
+        line_idx: usize,
+        column_idx: usize,
+    ) -> Option<usize> {
+        let range = self.line_to_range(line_idx)?;
+        let line = self.get(range.clone())?;
+        let mut chars = line.chars();
+        for _ in 0 .. column_idx {
+            chars.next();
+        }
+        Some(range.start + (line.len() - chars.as_str().len()))
+    }
+}
+
+impl Debug for Source {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        write!(f, "Source({})", self.path.display())
+    }
+}
+
+impl Hash for Source {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.id.hash(state);
+        self.path.hash(state);
+        self.text.hash(state);
+        self.root.hash(state);
+    }
+}
+
+/// A unique identifier for a loaded source file.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub struct SourceId(u16);
+
+impl SourceId {
+    /// Create a new source id for a file that is not part of a store.
+    pub const fn detached() -> Self {
+        Self(u16::MAX)
+    }
+
+    /// Create a source id from a number.
+    pub const fn from_u16(v: u16) -> Self {
+        Self(v)
+    }
+
+    /// Extract the underlying number.
+    pub const fn into_u16(self) -> u16 {
+        self.0
+    }
+}
+
+/// Metadata about a line.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+struct Line {
+    /// The UTF-8 byte offset where the line starts.
+    byte_idx: usize,
+    /// The UTF-16 codepoint offset where the line starts.
+    utf16_idx: usize,
+}
+
+/// Iterate over the lines in the string.
+fn lines(
+    byte_offset: usize,
+    utf16_offset: usize,
+    text: &str,
+) -> impl Iterator<Item = Line> + '_ {
+    let mut s = Scanner::new(text);
+    let mut utf16_idx = utf16_offset;
+
+    std::iter::from_fn(move || {
+        s.eat_until(|c: char| {
+            utf16_idx += c.len_utf16();
+            is_newline(c)
+        });
+
+        if s.done() {
+            return None;
+        }
+
+        if s.eat() == Some('\r') && s.eat_if('\n') {
+            utf16_idx += 1;
+        }
+
+        Some(Line {
+            byte_idx: byte_offset + s.cursor(),
+            utf16_idx,
+        })
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
+
+    #[test]
+    fn test_source_file_new() {
+        let source = Source::detached(TEST);
+        assert_eq!(source.lines, [
+            Line { byte_idx: 0, utf16_idx: 0 },
+            Line { byte_idx: 7, utf16_idx: 6 },
+            Line { byte_idx: 15, utf16_idx: 12 },
+            Line { byte_idx: 18, utf16_idx: 15 },
+        ]);
+    }
+
+    #[test]
+    fn test_source_file_pos_to_line() {
+        let source = Source::detached(TEST);
+        assert_eq!(source.byte_to_line(0), Some(0));
+        assert_eq!(source.byte_to_line(2), Some(0));
+        assert_eq!(source.byte_to_line(6), Some(0));
+        assert_eq!(source.byte_to_line(7), Some(1));
+        assert_eq!(source.byte_to_line(8), Some(1));
+        assert_eq!(source.byte_to_line(12), Some(1));
+        assert_eq!(source.byte_to_line(21), Some(3));
+        assert_eq!(source.byte_to_line(22), None);
+    }
+
+    #[test]
+    fn test_source_file_pos_to_column() {
+        let source = Source::detached(TEST);
+        assert_eq!(source.byte_to_column(0), Some(0));
+        assert_eq!(source.byte_to_column(2), Some(1));
+        assert_eq!(source.byte_to_column(6), Some(5));
+        assert_eq!(source.byte_to_column(7), Some(0));
+        assert_eq!(source.byte_to_column(8), Some(1));
+        assert_eq!(source.byte_to_column(12), Some(2));
+    }
+
+    #[test]
+    fn test_source_file_utf16() {
+        #[track_caller]
+        fn roundtrip(source: &Source, byte_idx: usize, utf16_idx: usize) {
+            let middle = source.byte_to_utf16(byte_idx).unwrap();
+            let result = source.utf16_to_byte(middle).unwrap();
+            assert_eq!(middle, utf16_idx);
+            assert_eq!(result, byte_idx);
+        }
+
+        let source = Source::detached(TEST);
+        roundtrip(&source, 0, 0);
+        roundtrip(&source, 2, 1);
+        roundtrip(&source, 3, 2);
+        roundtrip(&source, 8, 7);
+        roundtrip(&source, 12, 9);
+        roundtrip(&source, 21, 18);
+        assert_eq!(source.byte_to_utf16(22), None);
+        assert_eq!(source.utf16_to_byte(19), None);
+    }
+
+    #[test]
+    fn test_source_file_roundtrip() {
+        #[track_caller]
+        fn roundtrip(source: &Source, byte_idx: usize) {
+            let line = source.byte_to_line(byte_idx).unwrap();
+            let column = source.byte_to_column(byte_idx).unwrap();
+            let result = source.line_column_to_byte(line, column).unwrap();
+            assert_eq!(result, byte_idx);
+        }
+
+        let source = Source::detached(TEST);
+        roundtrip(&source, 0);
+        roundtrip(&source, 7);
+        roundtrip(&source, 12);
+        roundtrip(&source, 21);
+    }
+
+    #[test]
+    fn test_source_file_edit() {
+        #[track_caller]
+        fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
+            let mut source = Source::detached(prev);
+            let result = Source::detached(after);
+            source.edit(range, with);
+            assert_eq!(source.text, result.text);
+            assert_eq!(source.lines, result.lines);
+            assert_eq!(*source.root, *result.root);
+        }
+
+        // Test inserting at the begining.
+        test("abc\n", 0 .. 0, "hi\n", "hi\nabc\n");
+        test("\nabc", 0 .. 0, "hi\r", "hi\r\nabc");
+
+        // Test editing in the middle.
+        test(TEST, 4 .. 16, "❌", "ä\tc❌i\rjkl");
+
+        // Test appending.
+        test("abc\ndef", 7 .. 7, "hi", "abc\ndefhi");
+        test("abc\ndef\n", 8 .. 8, "hi", "abc\ndef\nhi");
+
+        // Test appending with adjoining \r and \n.
+        test("abc\ndef\r", 8 .. 8, "\nghi", "abc\ndef\r\nghi");
+
+        // Test removing everything.
+        test(TEST, 0 .. 21, "", "");
+    }
+}
diff --git a/src/syntax/span.rs b/src/syntax/span.rs
index 744aa123..d4d9a8f6 100644
--- a/src/syntax/span.rs
+++ b/src/syntax/span.rs
@@ -2,7 +2,7 @@ use std::fmt::{self, Debug, Display, Formatter};
 use std::num::NonZeroU64;
 use std::ops::Range;
 
-use crate::syntax::SourceId;
+use super::SourceId;
 
 /// A value with a span locating it in the source code.
 #[derive(Copy, Clone, Eq, PartialEq, Hash)]
@@ -42,8 +42,8 @@ impl<T: Debug> Debug for Spanned<T> {
 /// A unique identifier for a syntax node.
 ///
 /// This is used throughout the compiler to track which source section an error
-/// or element stems from. Can be [mapped back](crate::source::Source::range)
-/// to a byte range for user facing display.
+/// or element stems from. Can be [mapped back](super::Source::range) to a byte
+/// range for user facing display.
 ///
 /// Span ids are ordered in the tree to enable quickly finding the node with
 /// some id:
diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs
new file mode 100644
index 00000000..8e1b6944
--- /dev/null
+++ b/src/syntax/tokens.rs
@@ -0,0 +1,1176 @@
+use std::sync::Arc;
+
+use unicode_xid::UnicodeXID;
+use unscanny::Scanner;
+
+use super::resolve::{resolve_hex, resolve_raw, resolve_string};
+use super::{ErrorPos, NodeKind, RawKind, Unit};
+use crate::geom::{AngleUnit, LengthUnit};
+use crate::util::EcoString;
+
+/// An iterator over the tokens of a string of source code.
+#[derive(Clone)]
+pub struct Tokens<'s> {
+    /// The underlying scanner.
+    s: Scanner<'s>,
+    /// The mode the scanner is in. This determines what tokens it recognizes.
+    mode: TokenMode,
+    /// Whether the last token has been terminated.
+    terminated: bool,
+    /// Offsets the indentation on the first line of the source.
+    column_offset: usize,
+}
+
+/// What kind of tokens to emit.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum TokenMode {
+    /// Text and markup.
+    Markup,
+    /// Math atoms, operators, etc.
+    Math,
+    /// Keywords, literals and operators.
+    Code,
+}
+
+impl<'s> Tokens<'s> {
+    /// Create a new token iterator with the given mode.
+    #[inline]
+    pub fn new(text: &'s str, mode: TokenMode) -> Self {
+        Self::with_prefix("", text, mode)
+    }
+
+    /// Create a new token iterator with the given mode and a prefix to offset
+    /// column calculations.
+    #[inline]
+    pub fn with_prefix(prefix: &str, text: &'s str, mode: TokenMode) -> Self {
+        Self {
+            s: Scanner::new(text),
+            mode,
+            terminated: true,
+            column_offset: column(prefix, prefix.len(), 0),
+        }
+    }
+
+    /// Get the current token mode.
+    #[inline]
+    pub fn mode(&self) -> TokenMode {
+        self.mode
+    }
+
+    /// Change the token mode.
+    #[inline]
+    pub fn set_mode(&mut self, mode: TokenMode) {
+        self.mode = mode;
+    }
+
+    /// The index in the string at which the last token ends and next token
+    /// will start.
+    #[inline]
+    pub fn cursor(&self) -> usize {
+        self.s.cursor()
+    }
+
+    /// Jump to the given index in the string.
+    #[inline]
+    pub fn jump(&mut self, index: usize) {
+        self.s.jump(index);
+    }
+
+    /// The underlying scanner.
+    #[inline]
+    pub fn scanner(&self) -> Scanner<'s> {
+        self.s
+    }
+
+    /// Whether the last token was terminated.
+    #[inline]
+    pub fn terminated(&self) -> bool {
+        self.terminated
+    }
+
+    /// The column index of a given index in the source string.
+    #[inline]
+    pub fn column(&self, index: usize) -> usize {
+        column(self.s.string(), index, self.column_offset)
+    }
+}
+
+impl<'s> Iterator for Tokens<'s> {
+    type Item = NodeKind;
+
+    /// Parse the next token in the source code.
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        let start = self.s.cursor();
+        let c = self.s.eat()?;
+        Some(match c {
+            // Trivia.
+            '/' if self.s.eat_if('/') => self.line_comment(),
+            '/' if self.s.eat_if('*') => self.block_comment(),
+            '*' if self.s.eat_if('/') => {
+                NodeKind::Error(ErrorPos::Full, "unexpected end of block comment".into())
+            }
+            c if c.is_whitespace() => self.whitespace(c),
+
+            // Other things.
+            _ => match self.mode {
+                TokenMode::Markup => self.markup(start, c),
+                TokenMode::Math => self.math(start, c),
+                TokenMode::Code => self.code(start, c),
+            },
+        })
+    }
+}
+
+impl<'s> Tokens<'s> {
+    fn line_comment(&mut self) -> NodeKind {
+        self.s.eat_until(is_newline);
+        if self.s.peek().is_none() {
+            self.terminated = false;
+        }
+        NodeKind::LineComment
+    }
+
+    fn block_comment(&mut self) -> NodeKind {
+        let mut state = '_';
+        let mut depth = 1;
+        self.terminated = false;
+
+        // Find the first `*/` that does not correspond to a nested `/*`.
+        while let Some(c) = self.s.eat() {
+            state = match (state, c) {
+                ('*', '/') => {
+                    depth -= 1;
+                    if depth == 0 {
+                        self.terminated = true;
+                        break;
+                    }
+                    '_'
+                }
+                ('/', '*') => {
+                    depth += 1;
+                    '_'
+                }
+                ('/', '/') => {
+                    self.line_comment();
+                    '_'
+                }
+                _ => c,
+            }
+        }
+
+        NodeKind::BlockComment
+    }
+
+    fn whitespace(&mut self, c: char) -> NodeKind {
+        if c == ' ' && !self.s.at(char::is_whitespace) {
+            return NodeKind::Space { newlines: 0 };
+        }
+
+        self.s.uneat();
+
+        // Count the number of newlines.
+        let mut newlines = 0;
+        while let Some(c) = self.s.eat() {
+            if !c.is_whitespace() {
+                self.s.uneat();
+                break;
+            }
+
+            if is_newline(c) {
+                if c == '\r' {
+                    self.s.eat_if('\n');
+                }
+                newlines += 1;
+            }
+        }
+
+        NodeKind::Space { newlines }
+    }
+
+    #[inline]
+    fn markup(&mut self, start: usize, c: char) -> NodeKind {
+        match c {
+            // Blocks.
+            '{' => NodeKind::LeftBrace,
+            '}' => NodeKind::RightBrace,
+            '[' => NodeKind::LeftBracket,
+            ']' => NodeKind::RightBracket,
+
+            // Multi-char things.
+            '#' => self.hash(start),
+            '.' if self.s.eat_if("..") => NodeKind::Shorthand('\u{2026}'),
+            '-' => self.hyph(),
+            'h' if self.s.eat_if("ttp://") || self.s.eat_if("ttps://") => {
+                self.link(start)
+            }
+            '`' => self.raw(),
+            c if c.is_ascii_digit() => self.numbering(start),
+            '<' => self.label(),
+            '@' => self.reference(start),
+
+            // Escape sequences.
+            '\\' => self.backslash(),
+
+            // Single-char things.
+            '~' => NodeKind::Shorthand('\u{00A0}'),
+            '\'' => NodeKind::SmartQuote { double: false },
+            '"' => NodeKind::SmartQuote { double: true },
+            '*' if !self.in_word() => NodeKind::Star,
+            '_' if !self.in_word() => NodeKind::Underscore,
+            '$' => NodeKind::Dollar,
+            '=' => NodeKind::Eq,
+            '+' => NodeKind::Plus,
+            '/' => NodeKind::Slash,
+            ':' => NodeKind::Colon,
+
+            // Plain text.
+            _ => self.text(start),
+        }
+    }
+
+    #[inline]
+    fn text(&mut self, start: usize) -> NodeKind {
+        macro_rules! table {
+            ($(|$c:literal)*) => {{
+                let mut t = [false; 128];
+                $(t[$c as usize] = true;)*
+                t
+            }}
+        }
+
+        const TABLE: [bool; 128] = table! {
+            | ' ' | '\t' | '\n' | '\x0b' | '\x0c' | '\r' | '\\' | '/'
+            | '[' | ']' | '{' | '}' | '~' | '-' | '.' | '\'' | '"'
+            | '*' | '_' | ':' | 'h' | '`' | '$' | '<' | '>' | '@' | '#'
+        };
+
+        loop {
+            self.s.eat_until(|c: char| {
+                TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace())
+            });
+
+            // Continue with the same text node if the thing would become text
+            // anyway.
+            let mut s = self.s;
+            match s.eat() {
+                Some('/') if !s.at(['/', '*']) => {}
+                Some(' ') if s.at(char::is_alphanumeric) => {}
+                Some('-') if !s.at(['-', '?']) => {}
+                Some('.') if !s.at("..") => {}
+                Some('h') if !s.at("ttp://") && !s.at("ttps://") => {}
+                Some('@' | '#') if !s.at(is_id_start) => {}
+                _ => break,
+            }
+
+            self.s = s;
+        }
+
+        NodeKind::Text(self.s.from(start).into())
+    }
+
+    fn backslash(&mut self) -> NodeKind {
+        match self.s.peek() {
+            Some('u') if self.s.eat_if("u{") => {
+                let sequence = self.s.eat_while(char::is_ascii_alphanumeric);
+                if self.s.eat_if('}') {
+                    if let Some(c) = resolve_hex(sequence) {
+                        NodeKind::Escape(c)
+                    } else {
+                        NodeKind::Error(
+                            ErrorPos::Full,
+                            "invalid unicode escape sequence".into(),
+                        )
+                    }
+                } else {
+                    self.terminated = false;
+                    NodeKind::Error(ErrorPos::End, "expected closing brace".into())
+                }
+            }
+
+            // Linebreaks.
+            Some(c) if c.is_whitespace() => NodeKind::Linebreak,
+            None => NodeKind::Linebreak,
+
+            // Escapes.
+            Some(c) => {
+                self.s.expect(c);
+                NodeKind::Escape(c)
+            }
+        }
+    }
+
+    fn hash(&mut self, start: usize) -> NodeKind {
+        if self.s.at(is_id_start) {
+            let read = self.s.eat_while(is_id_continue);
+            match keyword(read) {
+                Some(keyword) => keyword,
+                None => NodeKind::Ident(read.into()),
+            }
+        } else {
+            self.text(start)
+        }
+    }
+
+    fn hyph(&mut self) -> NodeKind {
+        if self.s.eat_if('-') {
+            if self.s.eat_if('-') {
+                NodeKind::Shorthand('\u{2014}')
+            } else {
+                NodeKind::Shorthand('\u{2013}')
+            }
+        } else if self.s.eat_if('?') {
+            NodeKind::Shorthand('\u{00AD}')
+        } else {
+            NodeKind::Minus
+        }
+    }
+
+    fn link(&mut self, start: usize) -> NodeKind {
+        #[rustfmt::skip]
+        self.s.eat_while(|c: char| matches!(c,
+            | '0' ..= '9'
+            | 'a' ..= 'z'
+            | 'A' ..= 'Z'
+            | '~'  | '/' | '%' | '?' | '#' | '&' | '+' | '='
+            | '\'' | '.' | ',' | ';'
+        ));
+        if self.s.scout(-1) == Some('.') {
+            self.s.uneat();
+        }
+        NodeKind::Link(self.s.from(start).into())
+    }
+
+    fn raw(&mut self) -> NodeKind {
+        let column = self.column(self.s.cursor() - 1);
+
+        let mut backticks = 1;
+        while self.s.eat_if('`') {
+            backticks += 1;
+        }
+
+        // Special case for empty inline block.
+        if backticks == 2 {
+            return NodeKind::Raw(Arc::new(RawKind {
+                text: EcoString::new(),
+                lang: None,
+                block: false,
+            }));
+        }
+
+        let start = self.s.cursor();
+        let mut found = 0;
+        while found < backticks {
+            match self.s.eat() {
+                Some('`') => found += 1,
+                Some(_) => found = 0,
+                None => break,
+            }
+        }
+
+        if found == backticks {
+            let end = self.s.cursor() - found as usize;
+            NodeKind::Raw(Arc::new(resolve_raw(
+                column,
+                backticks,
+                self.s.get(start .. end),
+            )))
+        } else {
+            self.terminated = false;
+            let remaining = backticks - found;
+            let noun = if remaining == 1 { "backtick" } else { "backticks" };
+            NodeKind::Error(
+                ErrorPos::End,
+                if found == 0 {
+                    format_eco!("expected {} {}", remaining, noun)
+                } else {
+                    format_eco!("expected {} more {}", remaining, noun)
+                },
+            )
+        }
+    }
+
+    fn numbering(&mut self, start: usize) -> NodeKind {
+        self.s.eat_while(char::is_ascii_digit);
+        let read = self.s.from(start);
+        if self.s.eat_if('.') {
+            if let Ok(number) = read.parse() {
+                return NodeKind::EnumNumbering(number);
+            }
+        }
+
+        self.text(start)
+    }
+
+    fn label(&mut self) -> NodeKind {
+        let label = self.s.eat_while(is_id_continue);
+        if self.s.eat_if('>') {
+            if !label.is_empty() {
+                NodeKind::Label(label.into())
+            } else {
+                NodeKind::Error(ErrorPos::Full, "label cannot be empty".into())
+            }
+        } else {
+            self.terminated = false;
+            NodeKind::Error(ErrorPos::End, "expected closing angle bracket".into())
+        }
+    }
+
+    fn reference(&mut self, start: usize) -> NodeKind {
+        let label = self.s.eat_while(is_id_continue);
+        if !label.is_empty() {
+            NodeKind::Ref(label.into())
+        } else {
+            self.text(start)
+        }
+    }
+
+    fn math(&mut self, start: usize, c: char) -> NodeKind {
+        match c {
+            // Escape sequences.
+            '\\' => self.backslash(),
+
+            // Single-char things.
+            '_' => NodeKind::Underscore,
+            '^' => NodeKind::Hat,
+            '/' => NodeKind::Slash,
+            '&' => NodeKind::Amp,
+            '$' => NodeKind::Dollar,
+
+            // Brackets.
+            '{' => NodeKind::LeftBrace,
+            '}' => NodeKind::RightBrace,
+            '[' => NodeKind::LeftBracket,
+            ']' => NodeKind::RightBracket,
+            '(' => NodeKind::LeftParen,
+            ')' => NodeKind::RightParen,
+
+            // Identifiers.
+            c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
+                self.s.eat_while(is_math_id_continue);
+                NodeKind::Ident(self.s.from(start).into())
+            }
+
+            // Numbers.
+            c if c.is_numeric() => {
+                self.s.eat_while(char::is_numeric);
+                NodeKind::Atom(self.s.from(start).into())
+            }
+
+            // Other math atoms.
+            c => NodeKind::Atom(c.into()),
+        }
+    }
+
+    fn code(&mut self, start: usize, c: char) -> NodeKind {
+        match c {
+            // Blocks.
+            '{' => NodeKind::LeftBrace,
+            '}' => NodeKind::RightBrace,
+            '[' => NodeKind::LeftBracket,
+            ']' => NodeKind::RightBracket,
+
+            // Parentheses.
+            '(' => NodeKind::LeftParen,
+            ')' => NodeKind::RightParen,
+
+            // Two-char operators.
+            '=' if self.s.eat_if('=') => NodeKind::EqEq,
+            '!' if self.s.eat_if('=') => NodeKind::ExclEq,
+            '<' if self.s.eat_if('=') => NodeKind::LtEq,
+            '>' if self.s.eat_if('=') => NodeKind::GtEq,
+            '+' if self.s.eat_if('=') => NodeKind::PlusEq,
+            '-' if self.s.eat_if('=') => NodeKind::HyphEq,
+            '*' if self.s.eat_if('=') => NodeKind::StarEq,
+            '/' if self.s.eat_if('=') => NodeKind::SlashEq,
+            '.' if self.s.eat_if('.') => NodeKind::Dots,
+            '=' if self.s.eat_if('>') => NodeKind::Arrow,
+
+            // Single-char operators.
+            ',' => NodeKind::Comma,
+            ';' => NodeKind::Semicolon,
+            ':' => NodeKind::Colon,
+            '+' => NodeKind::Plus,
+            '-' => NodeKind::Minus,
+            '*' => NodeKind::Star,
+            '/' => NodeKind::Slash,
+            '=' => NodeKind::Eq,
+            '<' => NodeKind::Lt,
+            '>' => NodeKind::Gt,
+            '.' if !self.s.at(char::is_ascii_digit) => NodeKind::Dot,
+
+            // Identifiers.
+            c if is_id_start(c) => self.ident(start),
+
+            // Numbers.
+            c if c.is_ascii_digit() || (c == '.' && self.s.at(char::is_ascii_digit)) => {
+                self.number(start, c)
+            }
+
+            // Strings.
+            '"' => self.string(),
+
+            // Invalid token.
+            _ => NodeKind::Error(ErrorPos::Full, "not valid here".into()),
+        }
+    }
+
+    fn ident(&mut self, start: usize) -> NodeKind {
+        self.s.eat_while(is_id_continue);
+        match self.s.from(start) {
+            "none" => NodeKind::None,
+            "auto" => NodeKind::Auto,
+            "true" => NodeKind::Bool(true),
+            "false" => NodeKind::Bool(false),
+            id => keyword(id).unwrap_or_else(|| NodeKind::Ident(id.into())),
+        }
+    }
+
+    fn number(&mut self, start: usize, c: char) -> NodeKind {
+        // Read the first part (integer or fractional depending on `first`).
+        self.s.eat_while(char::is_ascii_digit);
+
+        // Read the fractional part if not already done.
+        // Make sure not to confuse a range for the decimal separator.
+        if c != '.' && !self.s.at("..") && self.s.eat_if('.') {
+            self.s.eat_while(char::is_ascii_digit);
+        }
+
+        // Read the exponent.
+        if !self.s.at("em") && self.s.eat_if(['e', 'E']) {
+            self.s.eat_if(['+', '-']);
+            self.s.eat_while(char::is_ascii_digit);
+        }
+
+        // Read the suffix.
+        let suffix_start = self.s.cursor();
+        if !self.s.eat_if('%') {
+            self.s.eat_while(char::is_ascii_alphanumeric);
+        }
+
+        let number = self.s.get(start .. suffix_start);
+        let suffix = self.s.from(suffix_start);
+
+        // Find out whether it is a simple number.
+        if suffix.is_empty() {
+            if let Ok(i) = number.parse::<i64>() {
+                return NodeKind::Int(i);
+            }
+        }
+
+        let v = match number.parse::<f64>() {
+            Ok(v) => v,
+            Err(_) => return NodeKind::Error(ErrorPos::Full, "invalid number".into()),
+        };
+
+        match suffix {
+            "" => NodeKind::Float(v),
+            "pt" => NodeKind::Numeric(v, Unit::Length(LengthUnit::Pt)),
+            "mm" => NodeKind::Numeric(v, Unit::Length(LengthUnit::Mm)),
+            "cm" => NodeKind::Numeric(v, Unit::Length(LengthUnit::Cm)),
+            "in" => NodeKind::Numeric(v, Unit::Length(LengthUnit::In)),
+            "deg" => NodeKind::Numeric(v, Unit::Angle(AngleUnit::Deg)),
+            "rad" => NodeKind::Numeric(v, Unit::Angle(AngleUnit::Rad)),
+            "em" => NodeKind::Numeric(v, Unit::Em),
+            "fr" => NodeKind::Numeric(v, Unit::Fr),
+            "%" => NodeKind::Numeric(v, Unit::Percent),
+            _ => NodeKind::Error(ErrorPos::Full, "invalid number suffix".into()),
+        }
+    }
+
+    fn string(&mut self) -> NodeKind {
+        let mut escaped = false;
+        let verbatim = self.s.eat_until(|c| {
+            if c == '"' && !escaped {
+                true
+            } else {
+                escaped = c == '\\' && !escaped;
+                false
+            }
+        });
+
+        let string = resolve_string(verbatim);
+        if self.s.eat_if('"') {
+            NodeKind::Str(string)
+        } else {
+            self.terminated = false;
+            NodeKind::Error(ErrorPos::End, "expected quote".into())
+        }
+    }
+
+    fn in_word(&self) -> bool {
+        let alphanumeric = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
+        let prev = self.s.scout(-2);
+        let next = self.s.peek();
+        alphanumeric(prev) && alphanumeric(next)
+    }
+}
+
+fn keyword(ident: &str) -> Option<NodeKind> {
+    Some(match ident {
+        "not" => NodeKind::Not,
+        "and" => NodeKind::And,
+        "or" => NodeKind::Or,
+        "let" => NodeKind::Let,
+        "set" => NodeKind::Set,
+        "show" => NodeKind::Show,
+        "wrap" => NodeKind::Wrap,
+        "if" => NodeKind::If,
+        "else" => NodeKind::Else,
+        "for" => NodeKind::For,
+        "in" => NodeKind::In,
+        "as" => NodeKind::As,
+        "while" => NodeKind::While,
+        "break" => NodeKind::Break,
+        "continue" => NodeKind::Continue,
+        "return" => NodeKind::Return,
+        "import" => NodeKind::Import,
+        "include" => NodeKind::Include,
+        "from" => NodeKind::From,
+        _ => return None,
+    })
+}
+
+/// The column index of a given index in the source string, given a column
+/// offset for the first line.
+#[inline]
+fn column(string: &str, index: usize, offset: usize) -> usize {
+    let mut apply_offset = false;
+    let res = string[.. index]
+        .char_indices()
+        .rev()
+        .take_while(|&(_, c)| !is_newline(c))
+        .inspect(|&(i, _)| {
+            if i == 0 {
+                apply_offset = true
+            }
+        })
+        .count();
+
+    // The loop is never executed if the slice is empty, but we are of
+    // course still at the start of the first line.
+    if index == 0 {
+        apply_offset = true;
+    }
+
+    if apply_offset { res + offset } else { res }
+}
+
+/// Whether this character denotes a newline.
+#[inline]
+pub fn is_newline(character: char) -> bool {
+    matches!(
+        character,
+        // Line Feed, Vertical Tab, Form Feed, Carriage Return.
+        '\n' | '\x0B' | '\x0C' | '\r' |
+        // Next Line, Line Separator, Paragraph Separator.
+        '\u{0085}' | '\u{2028}' | '\u{2029}'
+    )
+}
+
+/// Whether a string is a valid unicode identifier.
+///
+/// In addition to what is specified in the [Unicode Standard][uax31], we allow:
+/// - `_` as a starting character,
+/// - `_` and `-` as continuing characters.
+///
+/// [uax31]: http://www.unicode.org/reports/tr31/
+#[inline]
+pub fn is_ident(string: &str) -> bool {
+    let mut chars = string.chars();
+    chars
+        .next()
+        .map_or(false, |c| is_id_start(c) && chars.all(is_id_continue))
+}
+
+/// Whether a character can start an identifier.
+#[inline]
+fn is_id_start(c: char) -> bool {
+    c.is_xid_start() || c == '_'
+}
+
+/// Whether a character can continue an identifier.
+#[inline]
+fn is_id_continue(c: char) -> bool {
+    c.is_xid_continue() || c == '_' || c == '-'
+}
+
+/// Whether a character can start an identifier in math.
+#[inline]
+fn is_math_id_start(c: char) -> bool {
+    c.is_xid_start()
+}
+
+/// Whether a character can continue an identifier in math.
+#[inline]
+fn is_math_id_continue(c: char) -> bool {
+    c.is_xid_continue() && c != '_'
+}
+
+#[cfg(test)]
+#[allow(non_snake_case)]
+mod tests {
+    use super::super::tests::check;
+    use super::*;
+
+    use ErrorPos::*;
+    use NodeKind::*;
+    use Option::None;
+    use TokenMode::{Code, Markup};
+
+    fn Space(newlines: usize) -> NodeKind {
+        NodeKind::Space { newlines }
+    }
+
+    fn Raw(text: &str, lang: Option<&str>, block: bool) -> NodeKind {
+        NodeKind::Raw(Arc::new(RawKind {
+            text: text.into(),
+            lang: lang.map(Into::into),
+            block,
+        }))
+    }
+
+    fn Str(string: &str) -> NodeKind {
+        NodeKind::Str(string.into())
+    }
+
+    fn Text(string: &str) -> NodeKind {
+        NodeKind::Text(string.into())
+    }
+
+    fn Ident(ident: &str) -> NodeKind {
+        NodeKind::Ident(ident.into())
+    }
+
+    fn Error(pos: ErrorPos, message: &str) -> NodeKind {
+        NodeKind::Error(pos, message.into())
+    }
+
+    /// Building blocks for suffix testing.
+    ///
+    /// We extend each test case with a collection of different suffixes to make
+    /// sure tokens end at the correct position. These suffixes are split into
+    /// blocks, which can be disabled/enabled per test case. For example, when
+    /// testing identifiers we disable letter suffixes because these would
+    /// mingle with the identifiers.
+    ///
+    /// Suffix blocks:
+    /// - ' ': spacing
+    /// - 'a': letters
+    /// - '1': numbers
+    /// - '/': symbols
+    const BLOCKS: &str = " a1/";
+
+    // Suffixes described by four-tuples of:
+    //
+    // - block the suffix is part of
+    // - mode in which the suffix is applicable
+    // - the suffix string
+    // - the resulting suffix NodeKind
+    fn suffixes()
+    -> impl Iterator<Item = (char, Option<TokenMode>, &'static str, NodeKind)> {
+        [
+            // Whitespace suffixes.
+            (' ', None, " ", Space(0)),
+            (' ', None, "\n", Space(1)),
+            (' ', None, "\r", Space(1)),
+            (' ', None, "\r\n", Space(1)),
+            // Letter suffixes.
+            ('a', Some(Markup), "hello", Text("hello")),
+            ('a', Some(Markup), "💚", Text("💚")),
+            ('a', Some(Code), "val", Ident("val")),
+            ('a', Some(Code), "α", Ident("α")),
+            ('a', Some(Code), "_", Ident("_")),
+            // Number suffixes.
+            ('1', Some(Code), "2", Int(2)),
+            ('1', Some(Code), ".2", Float(0.2)),
+            // Symbol suffixes.
+            ('/', None, "[", LeftBracket),
+            ('/', None, "//", LineComment),
+            ('/', None, "/**/", BlockComment),
+            ('/', Some(Markup), "*", Star),
+            ('/', Some(Markup), r"\\", Escape('\\')),
+            ('/', Some(Markup), "#let", Let),
+            ('/', Some(Code), "(", LeftParen),
+            ('/', Some(Code), ":", Colon),
+            ('/', Some(Code), "+=", PlusEq),
+        ]
+        .into_iter()
+    }
+
+    macro_rules! t {
+        (Both $($tts:tt)*) => {
+            t!(Markup $($tts)*);
+            t!(Code $($tts)*);
+        };
+        ($mode:ident $([$blocks:literal])?: $text:expr => $($token:expr),*) => {{
+            // Test without suffix.
+            t!(@$mode: $text => $($token),*);
+
+            // Test with each applicable suffix.
+            for (block, mode, suffix, ref token) in suffixes() {
+                let text = $text;
+                #[allow(unused_variables)]
+                let blocks = BLOCKS;
+                $(let blocks = $blocks;)?
+                assert!(!blocks.contains(|c| !BLOCKS.contains(c)));
+                if (mode.is_none() || mode == Some($mode)) && blocks.contains(block) {
+                    t!(@$mode: format!("{}{}", text, suffix) => $($token,)* token);
+                }
+            }
+        }};
+        (@$mode:ident: $text:expr => $($token:expr),*) => {{
+            let text = $text;
+            let found = Tokens::new(&text, $mode).collect::<Vec<_>>();
+            let expected = vec![$($token.clone()),*];
+            check(&text, found, expected);
+        }};
+    }
+
+    #[test]
+    fn test_tokenize_brackets() {
+        // Test in markup.
+        t!(Markup: "{"       => LeftBrace);
+        t!(Markup: "}"       => RightBrace);
+        t!(Markup: "["       => LeftBracket);
+        t!(Markup: "]"       => RightBracket);
+        t!(Markup[" /"]: "(" => Text("("));
+        t!(Markup[" /"]: ")" => Text(")"));
+
+        // Test in code.
+        t!(Code: "{" => LeftBrace);
+        t!(Code: "}" => RightBrace);
+        t!(Code: "[" => LeftBracket);
+        t!(Code: "]" => RightBracket);
+        t!(Code: "(" => LeftParen);
+        t!(Code: ")" => RightParen);
+    }
+
+    #[test]
+    fn test_tokenize_whitespace() {
+        // Test basic whitespace.
+        t!(Both["a1/"]: ""         => );
+        t!(Both["a1/"]: " "        => Space(0));
+        t!(Both["a1/"]: "    "     => Space(0));
+        t!(Both["a1/"]: "\t"       => Space(0));
+        t!(Both["a1/"]: "  \t"     => Space(0));
+        t!(Both["a1/"]: "\u{202F}" => Space(0));
+
+        // Test newline counting.
+        t!(Both["a1/"]: "\n"           => Space(1));
+        t!(Both["a1/"]: "\n "          => Space(1));
+        t!(Both["a1/"]: "  \n"         => Space(1));
+        t!(Both["a1/"]: "  \n   "      => Space(1));
+        t!(Both["a1/"]: "\r\n"         => Space(1));
+        t!(Both["a1/"]: "\r\n\r"       => Space(2));
+        t!(Both["a1/"]: "  \n\t \n  "  => Space(2));
+        t!(Both["a1/"]: "\n\r"         => Space(2));
+        t!(Both["a1/"]: " \r\r\n \x0D" => Space(3));
+    }
+
+    #[test]
+    fn test_tokenize_text() {
+        // Test basic text.
+        t!(Markup[" /"]: "hello"      => Text("hello"));
+        t!(Markup[" /"]: "reha-world" => Text("reha-world"));
+
+        // Test code symbols in text.
+        t!(Markup[" /"]: "a():\"b" => Text("a()"), Colon, SmartQuote { double: true }, Text("b"));
+        t!(Markup[" /"]: ";,|/+"  => Text(";,|/+"));
+        t!(Markup[" /"]: "=-a"     => Eq, Minus, Text("a"));
+        t!(Markup[" "]: "#123"     => Text("#123"));
+
+        // Test text ends.
+        t!(Markup[""]: "hello " => Text("hello"), Space(0));
+        t!(Markup[""]: "hello~" => Text("hello"), Shorthand('\u{00A0}'));
+    }
+
+    #[test]
+    fn test_tokenize_escape_sequences() {
+        // Test escapable symbols.
+        t!(Markup: r"\\" => Escape('\\'));
+        t!(Markup: r"\/" => Escape('/'));
+        t!(Markup: r"\[" => Escape('['));
+        t!(Markup: r"\]" => Escape(']'));
+        t!(Markup: r"\{" => Escape('{'));
+        t!(Markup: r"\}" => Escape('}'));
+        t!(Markup: r"\*" => Escape('*'));
+        t!(Markup: r"\_" => Escape('_'));
+        t!(Markup: r"\=" => Escape('='));
+        t!(Markup: r"\~" => Escape('~'));
+        t!(Markup: r"\'" => Escape('\''));
+        t!(Markup: r#"\""# => Escape('"'));
+        t!(Markup: r"\`" => Escape('`'));
+        t!(Markup: r"\$" => Escape('$'));
+        t!(Markup: r"\#" => Escape('#'));
+        t!(Markup: r"\a"   => Escape('a'));
+        t!(Markup: r"\u"   => Escape('u'));
+        t!(Markup: r"\1"   => Escape('1'));
+
+        // Test basic unicode escapes.
+        t!(Markup: r"\u{}"     => Error(Full, "invalid unicode escape sequence"));
+        t!(Markup: r"\u{2603}" => Escape('☃'));
+        t!(Markup: r"\u{P}"    => Error(Full, "invalid unicode escape sequence"));
+
+        // Test unclosed unicode escapes.
+        t!(Markup[" /"]: r"\u{"     => Error(End, "expected closing brace"));
+        t!(Markup[" /"]: r"\u{1"    => Error(End, "expected closing brace"));
+        t!(Markup[" /"]: r"\u{26A4" => Error(End, "expected closing brace"));
+        t!(Markup[" /"]: r"\u{1Q3P" => Error(End, "expected closing brace"));
+        t!(Markup: r"\u{1🏕}"       => Error(End, "expected closing brace"), Text("🏕"), RightBrace);
+    }
+
+    #[test]
+    fn test_tokenize_markup_symbols() {
+        // Test markup tokens.
+        t!(Markup[" a1"]: "*"   => Star);
+        t!(Markup: "_"          => Underscore);
+        t!(Markup[""]: "==="    => Eq, Eq, Eq);
+        t!(Markup["a1/"]: "= "  => Eq, Space(0));
+        t!(Markup[" "]: r"\"    => Linebreak);
+        t!(Markup: "~"          => Shorthand('\u{00A0}'));
+        t!(Markup["a1/"]: "-?"  => Shorthand('\u{00AD}'));
+        t!(Markup["a "]: r"a--" => Text("a"), Shorthand('\u{2013}'));
+        t!(Markup["a1/"]: "- "  => Minus, Space(0));
+        t!(Markup[" "]: "+"     => Plus);
+        t!(Markup[" "]: "1."    => EnumNumbering(1));
+        t!(Markup[" "]: "1.a"   => EnumNumbering(1), Text("a"));
+        t!(Markup[" /"]: "a1."  => Text("a1."));
+    }
+
+    #[test]
+    fn test_tokenize_code_symbols() {
+        // Test all symbols.
+        t!(Code: ","        => Comma);
+        t!(Code: ";"        => Semicolon);
+        t!(Code: ":"        => Colon);
+        t!(Code: "+"        => Plus);
+        t!(Code: "-"        => Minus);
+        t!(Code[" a1"]: "*" => Star);
+        t!(Code[" a1"]: "/" => Slash);
+        t!(Code[" a/"]: "." => Dot);
+        t!(Code: "="        => Eq);
+        t!(Code: "=="       => EqEq);
+        t!(Code: "!="       => ExclEq);
+        t!(Code: "<"        => Lt);
+        t!(Code: "<="       => LtEq);
+        t!(Code: ">"        => Gt);
+        t!(Code: ">="       => GtEq);
+        t!(Code: "+="       => PlusEq);
+        t!(Code: "-="       => HyphEq);
+        t!(Code: "*="       => StarEq);
+        t!(Code: "/="       => SlashEq);
+        t!(Code: ".."       => Dots);
+        t!(Code: "=>"       => Arrow);
+
+        // Test combinations.
+        t!(Code: "<=>"        => LtEq, Gt);
+        t!(Code[" a/"]: "..." => Dots, Dot);
+
+        // Test hyphen as symbol vs part of identifier.
+        t!(Code[" /"]: "-1"   => Minus, Int(1));
+        t!(Code[" /"]: "-a"   => Minus, Ident("a"));
+        t!(Code[" /"]: "--1"  => Minus, Minus, Int(1));
+        t!(Code[" /"]: "--_a" => Minus, Minus, Ident("_a"));
+        t!(Code[" /"]: "a-b"  => Ident("a-b"));
+
+        // Test invalid.
+        t!(Code: r"\" => Error(Full, "not valid here"));
+    }
+
+    #[test]
+    fn test_tokenize_keywords() {
+        // A list of a few (not all) keywords.
+        let list = [
+            ("not", Not),
+            ("let", Let),
+            ("if", If),
+            ("else", Else),
+            ("for", For),
+            ("in", In),
+            ("import", Import),
+        ];
+
+        for (s, t) in list.clone() {
+            t!(Markup[" "]: format!("#{}", s) => t);
+            t!(Markup[" "]: format!("#{0}#{0}", s) => t, t);
+            t!(Markup[" /"]: format!("# {}", s) => Text(&format!("# {s}")));
+        }
+
+        for (s, t) in list {
+            t!(Code[" "]: s => t);
+            t!(Markup[" /"]: s => Text(s));
+        }
+
+        // Test simple identifier.
+        t!(Markup[" "]: "#letter" => Ident("letter"));
+        t!(Code[" /"]: "falser"   => Ident("falser"));
+        t!(Code[" /"]: "None"     => Ident("None"));
+        t!(Code[" /"]: "True"     => Ident("True"));
+    }
+
+    #[test]
+    fn test_tokenize_raw_blocks() {
+        // Test basic raw block.
+        t!(Markup: "``"     => Raw("", None, false));
+        t!(Markup: "`raw`"  => Raw("raw", None, false));
+        t!(Markup[""]: "`]" => Error(End, "expected 1 backtick"));
+
+        // Test special symbols in raw block.
+        t!(Markup: "`[brackets]`" => Raw("[brackets]", None, false));
+        t!(Markup[""]: r"`\`` "   => Raw(r"\", None, false), Error(End, "expected 1 backtick"));
+
+        // Test separated closing backticks.
+        t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), false));
+
+        // Test more backticks.
+        t!(Markup: "``nope``"             => Raw("", None, false), Text("nope"), Raw("", None, false));
+        t!(Markup: "````🚀````"           => Raw("", None, false));
+        t!(Markup[""]: "`````👩‍🚀````noend" => Error(End, "expected 5 backticks"));
+        t!(Markup[""]: "````raw``````"    => Raw("", Some("raw"), false), Raw("", None, false));
+    }
+
+    #[test]
+    fn test_tokenize_idents() {
+        // Test valid identifiers.
+        t!(Code[" /"]: "x"           => Ident("x"));
+        t!(Code[" /"]: "value"       => Ident("value"));
+        t!(Code[" /"]: "__main__"    => Ident("__main__"));
+        t!(Code[" /"]: "_snake_case" => Ident("_snake_case"));
+
+        // Test non-ascii.
+        t!(Code[" /"]: "α"    => Ident("α"));
+        t!(Code[" /"]: "ម្តាយ" => Ident("ម្តាយ"));
+
+        // Test hyphen parsed as identifier.
+        t!(Code[" /"]: "kebab-case" => Ident("kebab-case"));
+        t!(Code[" /"]: "one-10"     => Ident("one-10"));
+    }
+
+    #[test]
+    fn test_tokenize_numeric() {
+        let ints = [("7", 7), ("012", 12)];
+        let floats = [
+            (".3", 0.3),
+            ("0.3", 0.3),
+            ("3.", 3.0),
+            ("3.0", 3.0),
+            ("14.3", 14.3),
+            ("10e2", 1000.0),
+            ("10e+0", 10.0),
+            ("10e+1", 100.0),
+            ("10e-2", 0.1),
+            ("10.e1", 100.0),
+            ("10.e-1", 1.0),
+            (".1e1", 1.0),
+            ("10E2", 1000.0),
+        ];
+
+        // Test integers.
+        for &(s, v) in &ints {
+            t!(Code[" /"]: s => Int(v));
+        }
+
+        // Test floats.
+        for &(s, v) in &floats {
+            t!(Code[" /"]: s => Float(v));
+        }
+
+        // Test attached numbers.
+        t!(Code[" /"]: ".2.3"  => Float(0.2), Float(0.3));
+        t!(Code[" /"]: "1.2.3"  => Float(1.2), Float(0.3));
+        t!(Code[" /"]: "1e-2+3" => Float(0.01), Plus, Int(3));
+
+        // Test float from too large integer.
+        let large = i64::MAX as f64 + 1.0;
+        t!(Code[" /"]: large.to_string() => Float(large));
+
+        // Combined integers and floats.
+        let nums = ints.iter().map(|&(k, v)| (k, v as f64)).chain(floats);
+
+        let suffixes: &[(&str, fn(f64) -> NodeKind)] = &[
+            ("mm", |x| Numeric(x, Unit::Length(LengthUnit::Mm))),
+            ("pt", |x| Numeric(x, Unit::Length(LengthUnit::Pt))),
+            ("cm", |x| Numeric(x, Unit::Length(LengthUnit::Cm))),
+            ("in", |x| Numeric(x, Unit::Length(LengthUnit::In))),
+            ("rad", |x| Numeric(x, Unit::Angle(AngleUnit::Rad))),
+            ("deg", |x| Numeric(x, Unit::Angle(AngleUnit::Deg))),
+            ("em", |x| Numeric(x, Unit::Em)),
+            ("fr", |x| Numeric(x, Unit::Fr)),
+            ("%", |x| Numeric(x, Unit::Percent)),
+        ];
+
+        // Numeric types.
+        for &(suffix, build) in suffixes {
+            for (s, v) in nums.clone() {
+                t!(Code[" /"]: format!("{}{}", s, suffix) => build(v));
+            }
+        }
+
+        // Multiple dots close the number.
+        t!(Code[" /"]: "1..2"   => Int(1), Dots, Int(2));
+        t!(Code[" /"]: "1..2.3" => Int(1), Dots, Float(2.3));
+        t!(Code[" /"]: "1.2..3" => Float(1.2), Dots, Int(3));
+
+        // Test invalid.
+        t!(Code[" /"]: "1foo" => Error(Full, "invalid number suffix"));
+    }
+
+    #[test]
+    fn test_tokenize_strings() {
+        // Test basic strings.
+        t!(Code: "\"hi\""        => Str("hi"));
+        t!(Code: "\"hi\nthere\"" => Str("hi\nthere"));
+        t!(Code: "\"🌎\""        => Str("🌎"));
+
+        // Test unterminated.
+        t!(Code[""]: "\"hi" => Error(End, "expected quote"));
+
+        // Test escaped quote.
+        t!(Code: r#""a\"bc""# => Str("a\"bc"));
+        t!(Code[""]: r#""\""# => Error(End, "expected quote"));
+    }
+
+    #[test]
+    fn test_tokenize_line_comments() {
+        // Test line comment with no trailing newline.
+        t!(Both[""]: "//" => LineComment);
+
+        // Test line comment ends at newline.
+        t!(Both["a1/"]: "//bc\n"   => LineComment, Space(1));
+        t!(Both["a1/"]: "// bc \n" => LineComment, Space(1));
+        t!(Both["a1/"]: "//bc\r\n" => LineComment, Space(1));
+
+        // Test nested line comments.
+        t!(Both["a1/"]: "//a//b\n" => LineComment, Space(1));
+    }
+
+    #[test]
+    fn test_tokenize_block_comments() {
+        // Test basic block comments.
+        t!(Both[""]: "/*" => BlockComment);
+        t!(Both: "/**/"   => BlockComment);
+        t!(Both: "/*🏞*/" => BlockComment);
+        t!(Both: "/*\n*/" => BlockComment);
+
+        // Test depth 1 and 2 nested block comments.
+        t!(Both: "/* /* */ */"  => BlockComment);
+        t!(Both: "/*/*/**/*/*/" => BlockComment);
+
+        // Test two nested, one unclosed block comments.
+        t!(Both[""]: "/*/*/**/*/" => BlockComment);
+
+        // Test all combinations of up to two following slashes and stars.
+        t!(Both[""]: "/*"   => BlockComment);
+        t!(Both[""]: "/*/"  => BlockComment);
+        t!(Both[""]: "/**"  => BlockComment);
+        t!(Both[""]: "/*//" => BlockComment);
+        t!(Both[""]: "/*/*" => BlockComment);
+        t!(Both[""]: "/**/" => BlockComment);
+        t!(Both[""]: "/***" => BlockComment);
+
+        // Test unexpected terminator.
+        t!(Both: "/*Hi*/*/" => BlockComment,
+           Error(Full, "unexpected end of block comment"));
+    }
+}
author	Laurenz <laurmaedje@gmail.com>	2022-10-17 19:26:24 +0200
committer	Laurenz <laurmaedje@gmail.com>	2022-10-17 20:04:22 +0200
commit	e21822665591dc19766275da1e185215a6b945ef (patch)
tree	7788e211c3c33c8b5a8ad7d5eb7574e33631eb16 /src/syntax
parent	4fd031a256b2ecfe524859d5599fafb386395572 (diff)