Refactor Parser (#5310)

author: Laurenz <laurmaedje@gmail.com> 2024-11-04 10:17:49 +0100
committer: GitHub <noreply@github.com> 2024-11-04 10:17:49 +0100
commit: cb1aad3a0cc862c5ff57a557e196ba49a02917de (patch)
tree: 80cd62cbeb0f8d2bb999cc984d213f42293ebd24 /crates/typst-syntax/src
parent: 6b636167ef2e84c761777261ce1ca3087a75f765 (diff)
parent: 2c9728f53b318a6cae092f30ad0956a536af7ccb (diff)
4 files changed, 840 insertions, 648 deletions
diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs
index 721225c6..1314016f 100644
--- a/crates/typst-syntax/src/lexer.rs
+++ b/crates/typst-syntax/src/lexer.rs
@@ -4,20 +4,18 @@ use unicode_script::{Script, UnicodeScript};
 use unicode_segmentation::UnicodeSegmentation;
 use unscanny::Scanner;
 
-use crate::{SyntaxError, SyntaxKind};
+use crate::{SyntaxError, SyntaxKind, SyntaxNode};
 
-/// Splits up a string of source code into tokens.
+/// An iterator over a source code string which returns tokens.
 #[derive(Clone)]
 pub(super) struct Lexer<'s> {
-    /// The underlying scanner.
+    /// The scanner: contains the underlying string and location as a "cursor".
     s: Scanner<'s>,
     /// The mode the lexer is in. This determines which kinds of tokens it
     /// produces.
     mode: LexMode,
     /// Whether the last token contained a newline.
     newline: bool,
-    /// The state held by raw line lexing.
-    raw: Vec<(SyntaxKind, usize)>,
     /// An error for the last token.
     error: Option<SyntaxError>,
 }
@@ -31,8 +29,6 @@ pub(super) enum LexMode {
     Math,
     /// Keywords, literals and operators.
     Code,
-    /// The contents of a raw block.
-    Raw,
 }
 
 impl<'s> Lexer<'s> {
@@ -44,7 +40,6 @@ impl<'s> Lexer<'s> {
             mode,
             newline: false,
             error: None,
-            raw: Vec::new(),
         }
     }
 
@@ -74,9 +69,11 @@ impl<'s> Lexer<'s> {
         self.newline
     }
 
-    /// Take out the last error, if any.
-    pub fn take_error(&mut self) -> Option<SyntaxError> {
-        self.error.take()
+    /// The number of characters until the most recent newline from an index.
+    pub fn column(&self, index: usize) -> usize {
+        let mut s = self.s; // Make a new temporary scanner (cheap).
+        s.jump(index);
+        s.before().chars().rev().take_while(|&c| !is_newline(c)).count()
     }
 }
 
@@ -97,21 +94,14 @@ impl Lexer<'_> {
 
 /// Shared methods with all [`LexMode`].
 impl Lexer<'_> {
-    /// Proceed to the next token and return its [`SyntaxKind`]. Note the
-    /// token could be a [trivia](SyntaxKind::is_trivia).
-    pub fn next(&mut self) -> SyntaxKind {
-        if self.mode == LexMode::Raw {
-            let Some((kind, end)) = self.raw.pop() else {
-                return SyntaxKind::End;
-            };
-            self.s.jump(end);
-            return kind;
-        }
+    /// Return the next token in our text. Returns both the [`SyntaxNode`]
+    /// and the raw [`SyntaxKind`] to make it more ergonomic to check the kind
+    pub fn next(&mut self) -> (SyntaxKind, SyntaxNode) {
+        debug_assert!(self.error.is_none());
+        let start = self.s.cursor();
 
         self.newline = false;
-        self.error = None;
-        let start = self.s.cursor();
-        match self.s.eat() {
+        let kind = match self.s.eat() {
             Some(c) if is_space(c, self.mode) => self.whitespace(start, c),
             Some('/') if self.s.eat_if('/') => self.line_comment(),
             Some('/') if self.s.eat_if('*') => self.block_comment(),
@@ -123,22 +113,32 @@ impl Lexer<'_> {
                 );
                 kind
             }
-
+            Some('`') if self.mode != LexMode::Math => return self.raw(),
             Some(c) => match self.mode {
                 LexMode::Markup => self.markup(start, c),
-                LexMode::Math => self.math(start, c),
+                LexMode::Math => match self.math(start, c) {
+                    (kind, None) => kind,
+                    (kind, Some(node)) => return (kind, node),
+                },
                 LexMode::Code => self.code(start, c),
-                LexMode::Raw => unreachable!(),
             },
 
             None => SyntaxKind::End,
-        }
+        };
+
+        let text = self.s.from(start);
+        let node = match self.error.take() {
+            Some(error) => SyntaxNode::error(error, text),
+            None => SyntaxNode::leaf(kind, text),
+        };
+        (kind, node)
     }
 
     /// Eat whitespace characters greedily.
     fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind {
         let more = self.s.eat_while(|c| is_space(c, self.mode));
         let newlines = match c {
+            // Optimize eating a single space.
             ' ' if more.is_empty() => 0,
             _ => count_newlines(self.s.from(start)),
         };
@@ -187,7 +187,6 @@ impl Lexer<'_> {
     fn markup(&mut self, start: usize, c: char) -> SyntaxKind {
         match c {
             '\\' => self.backslash(),
-            '`' => self.raw(),
             'h' if self.s.eat_if("ttp://") => self.link(),
             'h' if self.s.eat_if("ttps://") => self.link(),
             '<' if self.s.at(is_id_continue) => self.label(),
@@ -252,9 +251,10 @@ impl Lexer<'_> {
         }
     }
 
-    fn raw(&mut self) -> SyntaxKind {
+    /// Lex an entire raw segment at once. This is a convenience to avoid going
+    /// to and from the parser for each raw section.
+    fn raw(&mut self) -> (SyntaxKind, SyntaxNode) {
         let start = self.s.cursor() - 1;
-        self.raw.clear();
 
         // Determine number of opening backticks.
         let mut backticks = 1;
@@ -264,9 +264,11 @@ impl Lexer<'_> {
 
         // Special case for ``.
         if backticks == 2 {
-            self.push_raw(SyntaxKind::RawDelim);
-            self.s.jump(start + 1);
-            return SyntaxKind::RawDelim;
+            let nodes = vec![
+                SyntaxNode::leaf(SyntaxKind::RawDelim, "`"),
+                SyntaxNode::leaf(SyntaxKind::RawDelim, "`"),
+            ];
+            return (SyntaxKind::Raw, SyntaxNode::inner(SyntaxKind::Raw, nodes));
         }
 
         // Find end of raw text.
@@ -275,43 +277,55 @@ impl Lexer<'_> {
             match self.s.eat() {
                 Some('`') => found += 1,
                 Some(_) => found = 0,
-                None => break,
+                None => {
+                    let msg = SyntaxError::new("unclosed raw text");
+                    let error = SyntaxNode::error(msg, self.s.from(start));
+                    return (SyntaxKind::Error, error);
+                }
             }
         }
+        let end = self.s.cursor();
 
-        if found != backticks {
-            return self.error("unclosed raw text");
-        }
+        let mut nodes = Vec::with_capacity(3); // Will have at least 3.
+
+        // A closure for pushing a node onto our raw vector. Assumes the caller
+        // will move the scanner to the next location at each step.
+        let mut prev_start = start;
+        let mut push_raw = |kind, s: &Scanner| {
+            nodes.push(SyntaxNode::leaf(kind, s.from(prev_start)));
+            prev_start = s.cursor();
+        };
+
+        // Opening delimiter.
+        self.s.jump(start + backticks);
+        push_raw(SyntaxKind::RawDelim, &self.s);
 
-        let end = self.s.cursor();
         if backticks >= 3 {
-            self.blocky_raw(start, end, backticks);
+            self.blocky_raw(end - backticks, &mut push_raw);
         } else {
-            self.inline_raw(start, end, backticks);
+            self.inline_raw(end - backticks, &mut push_raw);
         }
 
         // Closing delimiter.
-        self.push_raw(SyntaxKind::RawDelim);
-
-        // The saved tokens will be removed in reverse.
-        self.raw.reverse();
+        self.s.jump(end);
+        push_raw(SyntaxKind::RawDelim, &self.s);
 
-        // Opening delimiter.
-        self.s.jump(start + backticks);
-        SyntaxKind::RawDelim
+        (SyntaxKind::Raw, SyntaxNode::inner(SyntaxKind::Raw, nodes))
     }
 
-    fn blocky_raw(&mut self, start: usize, end: usize, backticks: usize) {
+    fn blocky_raw<F>(&mut self, inner_end: usize, mut push_raw: F)
+    where
+        F: FnMut(SyntaxKind, &Scanner),
+    {
         // Language tag.
-        self.s.jump(start + backticks);
         if self.s.eat_if(is_id_start) {
             self.s.eat_while(is_id_continue);
-            self.push_raw(SyntaxKind::RawLang);
+            push_raw(SyntaxKind::RawLang, &self.s);
         }
 
         // Determine inner content between backticks.
         self.s.eat_if(' ');
-        let inner = self.s.to(end - backticks);
+        let inner = self.s.to(inner_end);
 
         // Determine dedent level.
         let mut lines = split_newlines(inner);
@@ -357,41 +371,32 @@ impl Lexer<'_> {
             let offset: usize = line.chars().take(dedent).map(char::len_utf8).sum();
             self.s.eat_newline();
             self.s.advance(offset);
-            self.push_raw(SyntaxKind::RawTrimmed);
+            push_raw(SyntaxKind::RawTrimmed, &self.s);
             self.s.advance(line.len() - offset);
-            self.push_raw(SyntaxKind::Text);
+            push_raw(SyntaxKind::Text, &self.s);
         }
 
         // Add final trimmed.
-        if self.s.cursor() < end - backticks {
-            self.s.jump(end - backticks);
-            self.push_raw(SyntaxKind::RawTrimmed);
+        if self.s.cursor() < inner_end {
+            self.s.jump(inner_end);
+            push_raw(SyntaxKind::RawTrimmed, &self.s);
         }
-        self.s.jump(end);
     }
 
-    fn inline_raw(&mut self, start: usize, end: usize, backticks: usize) {
-        self.s.jump(start + backticks);
-
-        while self.s.cursor() < end - backticks {
+    fn inline_raw<F>(&mut self, inner_end: usize, mut push_raw: F)
+    where
+        F: FnMut(SyntaxKind, &Scanner),
+    {
+        while self.s.cursor() < inner_end {
             if self.s.at(is_newline) {
-                self.push_raw(SyntaxKind::Text);
+                push_raw(SyntaxKind::Text, &self.s);
                 self.s.eat_newline();
-                self.push_raw(SyntaxKind::RawTrimmed);
+                push_raw(SyntaxKind::RawTrimmed, &self.s);
                 continue;
             }
             self.s.eat();
         }
-        self.push_raw(SyntaxKind::Text);
-
-        self.s.jump(end);
-    }
-
-    /// Push the current cursor that marks the end of a raw segment of
-    /// the given `kind`.
-    fn push_raw(&mut self, kind: SyntaxKind) {
-        let end = self.s.cursor();
-        self.raw.push((kind, end));
+        push_raw(SyntaxKind::Text, &self.s);
     }
 
     fn link(&mut self) -> SyntaxKind {
@@ -512,8 +517,8 @@ impl Lexer<'_> {
 
 /// Math.
 impl Lexer<'_> {
-    fn math(&mut self, start: usize, c: char) -> SyntaxKind {
-        match c {
+    fn math(&mut self, start: usize, c: char) -> (SyntaxKind, Option<SyntaxNode>) {
+        let kind = match c {
             '\\' => self.backslash(),
             '"' => self.string(),
 
@@ -566,11 +571,41 @@ impl Lexer<'_> {
             // Identifiers.
             c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
                 self.s.eat_while(is_math_id_continue);
-                SyntaxKind::MathIdent
+                let (kind, node) = self.math_ident_or_field(start);
+                return (kind, Some(node));
             }
 
             // Other math atoms.
             _ => self.math_text(start, c),
+        };
+        (kind, None)
+    }
+
+    /// Parse a single `MathIdent` or an entire `FieldAccess`.
+    fn math_ident_or_field(&mut self, start: usize) -> (SyntaxKind, SyntaxNode) {
+        let mut kind = SyntaxKind::MathIdent;
+        let mut node = SyntaxNode::leaf(kind, self.s.from(start));
+        while let Some(ident) = self.maybe_dot_ident() {
+            kind = SyntaxKind::FieldAccess;
+            let field_children = vec![
+                node,
+                SyntaxNode::leaf(SyntaxKind::Dot, '.'),
+                SyntaxNode::leaf(SyntaxKind::Ident, ident),
+            ];
+            node = SyntaxNode::inner(kind, field_children);
+        }
+        (kind, node)
+    }
+
+    /// If at a dot and a math identifier, eat and return the identifier.
+    fn maybe_dot_ident(&mut self) -> Option<&str> {
+        if self.s.scout(1).is_some_and(is_math_id_start) && self.s.eat_if('.') {
+            let ident_start = self.s.cursor();
+            self.s.eat();
+            self.s.eat_while(is_math_id_continue);
+            Some(self.s.from(ident_start))
+        } else {
+            None
         }
     }
 
@@ -599,7 +634,6 @@ impl Lexer<'_> {
 impl Lexer<'_> {
     fn code(&mut self, start: usize, c: char) -> SyntaxKind {
         match c {
-            '`' => self.raw(),
             '<' if self.s.at(is_id_continue) => self.label(),
             '0'..='9' => self.number(start, c),
             '.' if self.s.at(char::is_ascii_digit) => self.number(start, c),
diff --git a/crates/typst-syntax/src/parser.rs b/crates/typst-syntax/src/parser.rs
index a8bec626..ea5b9155 100644
--- a/crates/typst-syntax/src/parser.rs
+++ b/crates/typst-syntax/src/parser.rs
@@ -6,66 +6,57 @@ use ecow::{eco_format, EcoString};
 use unicode_math_class::MathClass;
 
 use crate::set::{syntax_set, SyntaxSet};
-use crate::{
-    ast, is_ident, is_newline, set, LexMode, Lexer, SyntaxError, SyntaxKind, SyntaxNode,
-};
+use crate::{ast, set, LexMode, Lexer, SyntaxError, SyntaxKind, SyntaxNode};
 
-/// Parses a source file.
+/// Parses a source file as top-level markup.
 pub fn parse(text: &str) -> SyntaxNode {
     let _scope = typst_timing::TimingScope::new("parse");
     let mut p = Parser::new(text, 0, LexMode::Markup);
-    markup(&mut p, true, 0, |_| false);
-    p.finish().into_iter().next().unwrap()
+    markup_exprs(&mut p, true, syntax_set!(End));
+    p.finish_into(SyntaxKind::Markup)
 }
 
 /// Parses top-level code.
 pub fn parse_code(text: &str) -> SyntaxNode {
     let _scope = typst_timing::TimingScope::new("parse code");
     let mut p = Parser::new(text, 0, LexMode::Code);
-    let m = p.marker();
-    p.skip();
-    code_exprs(&mut p, |_| false);
-    p.wrap_all(m, SyntaxKind::Code);
-    p.finish().into_iter().next().unwrap()
+    code_exprs(&mut p, syntax_set!(End));
+    p.finish_into(SyntaxKind::Code)
 }
 
 /// Parses top-level math.
 pub fn parse_math(text: &str) -> SyntaxNode {
     let _scope = typst_timing::TimingScope::new("parse math");
     let mut p = Parser::new(text, 0, LexMode::Math);
-    math(&mut p, |_| false);
-    p.finish().into_iter().next().unwrap()
+    math_exprs(&mut p, syntax_set!(End));
+    p.finish_into(SyntaxKind::Math)
 }
 
-/// Parses the contents of a file or content block.
-fn markup(
-    p: &mut Parser,
-    mut at_start: bool,
-    min_indent: usize,
-    mut stop: impl FnMut(&Parser) -> bool,
-) {
-    let m = p.marker();
+/// Parses markup expressions until a stop condition is met.
+fn markup(p: &mut Parser, at_start: bool, wrap_trivia: bool, stop_set: SyntaxSet) {
+    let m = if wrap_trivia { p.before_trivia() } else { p.marker() };
+    markup_exprs(p, at_start, stop_set);
+    if wrap_trivia {
+        p.flush_trivia();
+    }
+    p.wrap(m, SyntaxKind::Markup);
+}
+
+/// Parses a sequence of markup expressions.
+fn markup_exprs(p: &mut Parser, mut at_start: bool, stop_set: SyntaxSet) {
+    debug_assert!(stop_set.contains(SyntaxKind::End));
+    at_start |= p.had_newline();
     let mut nesting: usize = 0;
-    while !p.end() {
+    loop {
         match p.current() {
             SyntaxKind::LeftBracket => nesting += 1,
             SyntaxKind::RightBracket if nesting > 0 => nesting -= 1,
-            _ if stop(p) => break,
+            _ if p.at_set(stop_set) => break,
             _ => {}
         }
-
-        if p.newline() {
-            at_start = true;
-            if min_indent > 0 && p.column(p.current_end()) < min_indent {
-                break;
-            }
-            p.eat();
-            continue;
-        }
-
-        markup_expr(p, &mut at_start);
+        markup_expr(p, at_start);
+        at_start = p.had_newline();
     }
-    p.wrap(m, SyntaxKind::Markup);
 }
 
 /// Reparses a subsection of markup incrementally.
@@ -74,40 +65,29 @@ pub(super) fn reparse_markup(
     range: Range<usize>,
     at_start: &mut bool,
     nesting: &mut usize,
-    mut stop: impl FnMut(SyntaxKind) -> bool,
+    top_level: bool,
 ) -> Option<Vec<SyntaxNode>> {
     let mut p = Parser::new(text, range.start, LexMode::Markup);
-    while !p.end() && p.current_start() < range.end {
+    *at_start |= p.had_newline();
+    while p.current_start() < range.end {
         match p.current() {
             SyntaxKind::LeftBracket => *nesting += 1,
             SyntaxKind::RightBracket if *nesting > 0 => *nesting -= 1,
-            _ if stop(p.current()) => break,
+            SyntaxKind::RightBracket if !top_level => break,
+            SyntaxKind::End => break,
             _ => {}
         }
-
-        if p.newline() {
-            *at_start = true;
-            p.eat();
-            continue;
-        }
-
-        markup_expr(&mut p, at_start);
+        markup_expr(&mut p, *at_start);
+        *at_start = p.had_newline();
     }
     (p.balanced && p.current_start() == range.end).then(|| p.finish())
 }
 
-/// Parses a single markup expression: This includes markup elements like
-/// spaces, text, and headings, and embedded code expressions.
-fn markup_expr(p: &mut Parser, at_start: &mut bool) {
+/// Parses a single markup expression. This includes markup elements like text,
+/// headings, strong/emph, lists/enums, etc. This is also the entry point for
+/// parsing math equations and embedded code expressions.
+fn markup_expr(p: &mut Parser, at_start: bool) {
     match p.current() {
-        SyntaxKind::Space
-        | SyntaxKind::Parbreak
-        | SyntaxKind::LineComment
-        | SyntaxKind::BlockComment => {
-            p.eat();
-            return;
-        }
-
         SyntaxKind::Text
         | SyntaxKind::Linebreak
         | SyntaxKind::Escape
@@ -116,14 +96,15 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
         | SyntaxKind::Link
         | SyntaxKind::Label => p.eat(),
 
+        SyntaxKind::Raw => p.eat(), // Raw is handled entirely in the Lexer.
+
         SyntaxKind::Hash => embedded_code_expr(p),
         SyntaxKind::Star => strong(p),
         SyntaxKind::Underscore => emph(p),
-        SyntaxKind::RawDelim => raw(p),
-        SyntaxKind::HeadingMarker if *at_start => heading(p),
-        SyntaxKind::ListMarker if *at_start => list_item(p),
-        SyntaxKind::EnumMarker if *at_start => enum_item(p),
-        SyntaxKind::TermMarker if *at_start => term_item(p),
+        SyntaxKind::HeadingMarker if at_start => heading(p),
+        SyntaxKind::ListMarker if at_start => list_item(p),
+        SyntaxKind::EnumMarker if at_start => enum_item(p),
+        SyntaxKind::TermMarker if at_start => term_item(p),
         SyntaxKind::RefMarker => reference(p),
         SyntaxKind::Dollar => equation(p),
 
@@ -133,94 +114,76 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
         | SyntaxKind::ListMarker
         | SyntaxKind::EnumMarker
         | SyntaxKind::TermMarker
-        | SyntaxKind::Colon => p.convert(SyntaxKind::Text),
+        | SyntaxKind::Colon => p.convert_and_eat(SyntaxKind::Text),
 
-        _ => {
-            p.unexpected();
-            return; // Don't set `at_start`
-        }
+        _ => p.unexpected(),
     }
-
-    *at_start = false;
 }
 
 /// Parses strong content: `*Strong*`.
 fn strong(p: &mut Parser) {
-    let m = p.marker();
-    p.assert(SyntaxKind::Star);
-    markup(p, false, 0, |p| p.at_set(syntax_set!(Star, Parbreak, RightBracket)));
-    p.expect_closing_delimiter(m, SyntaxKind::Star);
-    p.wrap(m, SyntaxKind::Strong);
+    p.with_nl_mode(AtNewline::StopParBreak, |p| {
+        let m = p.marker();
+        p.assert(SyntaxKind::Star);
+        markup(p, false, true, syntax_set!(Star, RightBracket, End));
+        p.expect_closing_delimiter(m, SyntaxKind::Star);
+        p.wrap(m, SyntaxKind::Strong);
+    });
 }
 
 /// Parses emphasized content: `_Emphasized_`.
 fn emph(p: &mut Parser) {
-    let m = p.marker();
-    p.assert(SyntaxKind::Underscore);
-    markup(p, false, 0, |p| p.at_set(syntax_set!(Underscore, Parbreak, RightBracket)));
-    p.expect_closing_delimiter(m, SyntaxKind::Underscore);
-    p.wrap(m, SyntaxKind::Emph);
-}
-
-/// Parses raw text with optional syntax highlighting: `` `...` ``.
-fn raw(p: &mut Parser) {
-    let m = p.marker();
-    p.enter(LexMode::Raw);
-    p.assert(SyntaxKind::RawDelim);
-
-    // Eats until the closing delimiter.
-    while !p.end() && !p.at(SyntaxKind::RawDelim) {
-        p.eat();
-    }
-
-    p.expect(SyntaxKind::RawDelim);
-    p.exit();
-    p.wrap(m, SyntaxKind::Raw);
+    p.with_nl_mode(AtNewline::StopParBreak, |p| {
+        let m = p.marker();
+        p.assert(SyntaxKind::Underscore);
+        markup(p, false, true, syntax_set!(Underscore, RightBracket, End));
+        p.expect_closing_delimiter(m, SyntaxKind::Underscore);
+        p.wrap(m, SyntaxKind::Emph);
+    });
 }
 
 /// Parses a section heading: `= Introduction`.
 fn heading(p: &mut Parser) {
-    let m = p.marker();
-    p.assert(SyntaxKind::HeadingMarker);
-    whitespace_line(p);
-    markup(p, false, usize::MAX, |p| {
-        p.at_set(syntax_set!(Label, Space, RightBracket))
-            && (!p.at(SyntaxKind::Space) || p.lexer.clone().next() == SyntaxKind::Label)
+    p.with_nl_mode(AtNewline::Stop, |p| {
+        let m = p.marker();
+        p.assert(SyntaxKind::HeadingMarker);
+        markup(p, false, false, syntax_set!(Label, RightBracket, End));
+        p.wrap(m, SyntaxKind::Heading);
     });
-    p.wrap(m, SyntaxKind::Heading);
 }
 
 /// Parses an item in a bullet list: `- ...`.
 fn list_item(p: &mut Parser) {
-    let m = p.marker();
-    let min_indent = p.column(p.current_start()) + 1;
-    p.assert(SyntaxKind::ListMarker);
-    whitespace_line(p);
-    markup(p, false, min_indent, |p| p.at(SyntaxKind::RightBracket));
-    p.wrap(m, SyntaxKind::ListItem);
+    p.with_nl_mode(AtNewline::RequireColumn(p.current_column()), |p| {
+        let m = p.marker();
+        p.assert(SyntaxKind::ListMarker);
+        markup(p, false, false, syntax_set!(RightBracket, End));
+        p.wrap(m, SyntaxKind::ListItem);
+    });
 }
 
 /// Parses an item in an enumeration (numbered list): `+ ...` or `1. ...`.
 fn enum_item(p: &mut Parser) {
-    let m = p.marker();
-    let min_indent = p.column(p.current_start()) + 1;
-    p.assert(SyntaxKind::EnumMarker);
-    whitespace_line(p);
-    markup(p, false, min_indent, |p| p.at(SyntaxKind::RightBracket));
-    p.wrap(m, SyntaxKind::EnumItem);
+    p.with_nl_mode(AtNewline::RequireColumn(p.current_column()), |p| {
+        let m = p.marker();
+        p.assert(SyntaxKind::EnumMarker);
+        markup(p, false, false, syntax_set!(RightBracket, End));
+        p.wrap(m, SyntaxKind::EnumItem);
+    });
 }
 
 /// Parses an item in a term list: `/ Term: Details`.
 fn term_item(p: &mut Parser) {
-    let m = p.marker();
-    p.assert(SyntaxKind::TermMarker);
-    let min_indent = p.column(p.prev_end());
-    whitespace_line(p);
-    markup(p, false, usize::MAX, |p| p.at_set(syntax_set!(Colon, RightBracket)));
-    p.expect(SyntaxKind::Colon);
-    whitespace_line(p);
-    markup(p, false, min_indent, |p| p.at(SyntaxKind::RightBracket));
-    p.wrap(m, SyntaxKind::TermItem);
+    p.with_nl_mode(AtNewline::RequireColumn(p.current_column()), |p| {
+        let m = p.marker();
+        p.with_nl_mode(AtNewline::Stop, |p| {
+            p.assert(SyntaxKind::TermMarker);
+            markup(p, false, false, syntax_set!(Colon, RightBracket, End));
+        });
+        p.expect(SyntaxKind::Colon);
+        markup(p, false, false, syntax_set!(RightBracket, End));
+        p.wrap(m, SyntaxKind::TermItem);
+    });
 }
 
 /// Parses a reference: `@target`, `@target[..]`.
@@ -233,35 +196,34 @@ fn reference(p: &mut Parser) {
     p.wrap(m, SyntaxKind::Ref);
 }
 
-/// Consumes whitespace that does not contain a newline.
-fn whitespace_line(p: &mut Parser) {
-    while !p.newline() && p.current().is_trivia() {
-        p.eat();
-    }
-}
-
 /// Parses a mathematical equation: `$x$`, `$ x^2 $`.
 fn equation(p: &mut Parser) {
     let m = p.marker();
-    p.enter(LexMode::Math);
-    p.assert(SyntaxKind::Dollar);
-    math(p, |p| p.at(SyntaxKind::Dollar));
-    p.expect_closing_delimiter(m, SyntaxKind::Dollar);
-    p.exit();
+    p.enter_modes(LexMode::Math, AtNewline::Continue, |p| {
+        p.assert(SyntaxKind::Dollar);
+        math(p, syntax_set!(Dollar, RightBracket, End));
+        p.expect_closing_delimiter(m, SyntaxKind::Dollar);
+    });
     p.wrap(m, SyntaxKind::Equation);
 }
 
 /// Parses the contents of a mathematical equation: `x^2 + 1`.
-fn math(p: &mut Parser, mut stop: impl FnMut(&Parser) -> bool) {
+fn math(p: &mut Parser, stop_set: SyntaxSet) {
     let m = p.marker();
-    while !p.end() && !stop(p) {
+    math_exprs(p, stop_set);
+    p.wrap(m, SyntaxKind::Math);
+}
+
+/// Parses a sequence of math expressions.
+fn math_exprs(p: &mut Parser, stop_set: SyntaxSet) {
+    debug_assert!(stop_set.contains(SyntaxKind::End));
+    while !p.at_set(stop_set) {
         if p.at_set(set::MATH_EXPR) {
             math_expr(p);
         } else {
             p.unexpected();
         }
     }
-    p.wrap(m, SyntaxKind::Math);
 }
 
 /// Parses a single math expression: This includes math elements like
@@ -276,21 +238,11 @@ fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) {
     let mut continuable = false;
     match p.current() {
         SyntaxKind::Hash => embedded_code_expr(p),
-        SyntaxKind::MathIdent => {
+        // The lexer manages creating full FieldAccess nodes if needed.
+        SyntaxKind::MathIdent | SyntaxKind::FieldAccess => {
             continuable = true;
             p.eat();
-            while p.directly_at(SyntaxKind::Text) && p.current_text() == "." && {
-                let mut copy = p.lexer.clone();
-                let start = copy.cursor();
-                let next = copy.next();
-                let end = copy.cursor();
-                matches!(next, SyntaxKind::MathIdent | SyntaxKind::Text)
-                    && is_ident(&p.text[start..end])
-            } {
-                p.convert(SyntaxKind::Dot);
-                p.convert(SyntaxKind::Ident);
-                p.wrap(m, SyntaxKind::FieldAccess);
-            }
+            // Parse a function call for an identifier or field access.
             if min_prec < 3 && p.directly_at(SyntaxKind::Text) && p.current_text() == "("
             {
                 math_args(p);
@@ -340,11 +292,7 @@ fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) {
         _ => p.expected("expression"),
     }
 
-    if continuable
-        && min_prec < 3
-        && p.prev_end() == p.current_start()
-        && maybe_delimited(p)
-    {
+    if continuable && min_prec < 3 && !p.had_trivia() && maybe_delimited(p) {
         p.wrap(m, SyntaxKind::Math);
     }
 
@@ -414,6 +362,23 @@ fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) {
     }
 }
 
+/// Precedence and wrapper kinds for the binary math operators.
+fn math_op(kind: SyntaxKind) -> Option<(SyntaxKind, SyntaxKind, ast::Assoc, usize)> {
+    match kind {
+        SyntaxKind::Underscore => {
+            Some((SyntaxKind::MathAttach, SyntaxKind::Hat, ast::Assoc::Right, 2))
+        }
+        SyntaxKind::Hat => {
+            Some((SyntaxKind::MathAttach, SyntaxKind::Underscore, ast::Assoc::Right, 2))
+        }
+        SyntaxKind::Slash => {
+            Some((SyntaxKind::MathFrac, SyntaxKind::End, ast::Assoc::Left, 1))
+        }
+        _ => None,
+    }
+}
+
+/// Try to parse delimiters based on the current token's unicode math class.
 fn maybe_delimited(p: &mut Parser) -> bool {
     let open = math_class(p.current_text()) == Some(MathClass::Opening);
     if open {
@@ -422,11 +387,12 @@ fn maybe_delimited(p: &mut Parser) -> bool {
     open
 }
 
+/// Parse matched delimiters in math: `[x + y]`.
 fn math_delimited(p: &mut Parser) {
     let m = p.marker();
     p.eat();
     let m2 = p.marker();
-    while !p.end() && !p.at(SyntaxKind::Dollar) {
+    while !p.at_set(syntax_set!(Dollar, End)) {
         if math_class(p.current_text()) == Some(MathClass::Closing) {
             p.wrap(m2, SyntaxKind::Math);
             p.eat();
@@ -444,6 +410,8 @@ fn math_delimited(p: &mut Parser) {
     p.wrap(m, SyntaxKind::Math);
 }
 
+/// Remove one set of parentheses (if any) from a previously parsed expression
+/// by converting to non-expression SyntaxKinds.
 fn math_unparen(p: &mut Parser, m: Marker) {
     let Some(node) = p.nodes.get_mut(m.0) else { return };
     if node.kind() != SyntaxKind::MathDelimited {
@@ -460,6 +428,10 @@ fn math_unparen(p: &mut Parser, m: Marker) {
     node.convert_to_kind(SyntaxKind::Math);
 }
 
+/// The unicode math class of a string. Only returns `Some` if `text` has
+/// exactly one unicode character or is a math shorthand string (currently just
+/// `[|`, `||`, `|]`) and then only returns `Some` if there is a math class
+/// defined for that character.
 fn math_class(text: &str) -> Option<MathClass> {
     match text {
         "[|" => return Some(MathClass::Opening),
@@ -475,38 +447,26 @@ fn math_class(text: &str) -> Option<MathClass> {
         .and_then(unicode_math_class::class)
 }
 
-fn math_op(kind: SyntaxKind) -> Option<(SyntaxKind, SyntaxKind, ast::Assoc, usize)> {
-    match kind {
-        SyntaxKind::Underscore => {
-            Some((SyntaxKind::MathAttach, SyntaxKind::Hat, ast::Assoc::Right, 2))
-        }
-        SyntaxKind::Hat => {
-            Some((SyntaxKind::MathAttach, SyntaxKind::Underscore, ast::Assoc::Right, 2))
-        }
-        SyntaxKind::Slash => {
-            Some((SyntaxKind::MathFrac, SyntaxKind::End, ast::Assoc::Left, 1))
-        }
-        _ => None,
-    }
-}
-
+/// Parse an argument list in math: `(a, b; c, d; size: #50%)`.
 fn math_args(p: &mut Parser) {
     let m = p.marker();
-    p.convert(SyntaxKind::LeftParen);
+    p.convert_and_eat(SyntaxKind::LeftParen);
 
     let mut namable = true;
     let mut named = None;
     let mut has_arrays = false;
     let mut array = p.marker();
     let mut arg = p.marker();
+    // The number of math expressions per argument.
+    let mut count = 0;
 
-    while !p.end() && !p.at(SyntaxKind::Dollar) {
+    while !p.at_set(syntax_set!(Dollar, End)) {
         if namable
             && (p.at(SyntaxKind::MathIdent) || p.at(SyntaxKind::Text))
             && p.text[p.current_end()..].starts_with(':')
         {
-            p.convert(SyntaxKind::Ident);
-            p.convert(SyntaxKind::Colon);
+            p.convert_and_eat(SyntaxKind::Ident);
+            p.convert_and_eat(SyntaxKind::Colon);
             named = Some(arg);
             arg = p.marker();
             array = p.marker();
@@ -515,20 +475,22 @@ fn math_args(p: &mut Parser) {
         match p.current_text() {
             ")" => break,
             ";" => {
-                maybe_wrap_in_math(p, arg, named);
+                maybe_wrap_in_math(p, arg, count, named);
                 p.wrap(array, SyntaxKind::Array);
-                p.convert(SyntaxKind::Semicolon);
+                p.convert_and_eat(SyntaxKind::Semicolon);
                 array = p.marker();
                 arg = p.marker();
+                count = 0;
                 namable = true;
                 named = None;
                 has_arrays = true;
                 continue;
             }
             "," => {
-                maybe_wrap_in_math(p, arg, named);
-                p.convert(SyntaxKind::Comma);
+                maybe_wrap_in_math(p, arg, count, named);
+                p.convert_and_eat(SyntaxKind::Comma);
                 arg = p.marker();
+                count = 0;
                 namable = true;
                 if named.is_some() {
                     array = p.marker();
@@ -541,6 +503,7 @@ fn math_args(p: &mut Parser) {
 
         if p.at_set(set::MATH_EXPR) {
             math_expr(p);
+            count += 1;
         } else {
             p.unexpected();
         }
@@ -549,7 +512,7 @@ fn math_args(p: &mut Parser) {
     }
 
     if arg != p.marker() {
-        maybe_wrap_in_math(p, arg, named);
+        maybe_wrap_in_math(p, arg, count, named);
         if named.is_some() {
             array = p.marker();
         }
@@ -560,7 +523,7 @@ fn math_args(p: &mut Parser) {
     }
 
     if p.at(SyntaxKind::Text) && p.current_text() == ")" {
-        p.convert(SyntaxKind::RightParen);
+        p.convert_and_eat(SyntaxKind::RightParen);
     } else {
         p.expected("closing paren");
         p.balanced = false;
@@ -569,23 +532,26 @@ fn math_args(p: &mut Parser) {
     p.wrap(m, SyntaxKind::Args);
 }
 
-/// Wrap math function arguments in a "Math" SyntaxKind to combine adjacent expressions
-/// or create blank content.
-///
-/// We don't wrap when `exprs == 1`, as there is only one expression, so the grouping
-/// isn't needed, and this would change the type of the expression from potentially
-/// non-content to content.
+/// Wrap math function arguments to join adjacent math content or create an
+/// empty 'Math' node for when we have 0 args.
 ///
-/// Note that `exprs` might be 0 if we have whitespace or trivia before a comma i.e.
-/// `mat(; ,)` or `sin(x, , , ,)`. This would create an empty Math element before that
-/// trivia if we called `p.wrap()` -- breaking the expected AST for 2-d arguments -- so
-/// we instead manually wrap to our current marker using `p.wrap_within()`.
-fn maybe_wrap_in_math(p: &mut Parser, arg: Marker, named: Option<Marker>) {
-    let exprs = p.post_process(arg).filter(|node| node.is::<ast::Expr>()).count();
-    if exprs != 1 {
-        // Convert 0 exprs into a blank math element (so empty arguments are allowed).
-        // Convert 2+ exprs into a math element (so they become a joined sequence).
-        p.wrap_within(arg, p.marker(), SyntaxKind::Math);
+/// We don't wrap when `count == 1`, since wrapping would change the type of the
+/// expression from potentially non-content to content. Ex: `$ func(#12pt) $`
+/// would change the type from size to content if wrapped.
+fn maybe_wrap_in_math(p: &mut Parser, arg: Marker, count: usize, named: Option<Marker>) {
+    if count == 0 {
+        // Flush trivia so that the new empty Math node will be wrapped _inside_
+        // any `SyntaxKind::Array` elements created in `math_args`.
+        // (And if we don't follow by wrapping in an array, it has no effect.)
+        // The difference in node layout without this would look like:
+        // Expression: `$ mat( ;) $`
+        // - Correct:   [ .., Space(" "), Array[Math[], ], Semicolon(";"), .. ]
+        // - Incorrect: [ .., Math[], Array[], Space(" "), Semicolon(";"), .. ]
+        p.flush_trivia();
+    }
+
+    if count != 1 {
+        p.wrap(arg, SyntaxKind::Math);
     }
 
     if let Some(m) = named {
@@ -594,66 +560,63 @@ fn maybe_wrap_in_math(p: &mut Parser, arg: Marker, named: Option<Marker>) {
 }
 
 /// Parses the contents of a code block.
-fn code(p: &mut Parser, stop: impl FnMut(&Parser) -> bool) {
+fn code(p: &mut Parser, stop_set: SyntaxSet) {
     let m = p.marker();
-    code_exprs(p, stop);
+    code_exprs(p, stop_set);
     p.wrap(m, SyntaxKind::Code);
 }
 
 /// Parses a sequence of code expressions.
-fn code_exprs(p: &mut Parser, mut stop: impl FnMut(&Parser) -> bool) {
-    while !p.end() && !stop(p) {
-        p.enter_newline_mode(NewlineMode::Contextual);
-
-        let at_expr = p.at_set(set::CODE_EXPR);
-        if at_expr {
+fn code_exprs(p: &mut Parser, stop_set: SyntaxSet) {
+    debug_assert!(stop_set.contains(SyntaxKind::End));
+    while !p.at_set(stop_set) {
+        p.with_nl_mode(AtNewline::ContextualContinue, |p| {
+            if !p.at_set(set::CODE_EXPR) {
+                p.unexpected();
+                return;
+            }
             code_expr(p);
-            if !p.end() && !stop(p) && !p.eat_if(SyntaxKind::Semicolon) {
+            if !p.at_set(stop_set) && !p.eat_if(SyntaxKind::Semicolon) {
                 p.expected("semicolon or line break");
                 if p.at(SyntaxKind::Label) {
                     p.hint("labels can only be applied in markup mode");
                     p.hint("try wrapping your code in a markup block (`[ ]`)");
                 }
             }
-        }
-
-        p.exit_newline_mode();
-        if !at_expr && !p.end() {
-            p.unexpected();
-        }
+        });
     }
 }
 
-/// Parses a single code expression.
-fn code_expr(p: &mut Parser) {
-    code_expr_prec(p, false, 0)
-}
-
-/// Parses a code expression embedded in markup or math.
+/// Parses an atomic code expression embedded in markup or math.
 fn embedded_code_expr(p: &mut Parser) {
-    p.enter_newline_mode(NewlineMode::Stop);
-    p.enter(LexMode::Code);
-    p.assert(SyntaxKind::Hash);
-    p.unskip();
+    p.enter_modes(LexMode::Code, AtNewline::Stop, |p| {
+        p.assert(SyntaxKind::Hash);
+        if p.had_trivia() || p.end() {
+            p.expected("expression");
+            return;
+        }
 
-    let stmt = p.at_set(set::STMT);
-    let at = p.at_set(set::ATOMIC_CODE_EXPR);
-    code_expr_prec(p, true, 0);
+        let stmt = p.at_set(set::STMT);
+        let at = p.at_set(set::ATOMIC_CODE_EXPR);
+        code_expr_prec(p, true, 0);
 
-    // Consume error for things like `#12p` or `#"abc\"`.#
-    if !at && !p.current().is_trivia() && !p.end() {
-        p.unexpected();
-    }
+        // Consume error for things like `#12p` or `#"abc\"`.#
+        if !at {
+            p.unexpected();
+        }
 
-    let semi =
-        (stmt || p.directly_at(SyntaxKind::Semicolon)) && p.eat_if(SyntaxKind::Semicolon);
+        let semi = (stmt || p.directly_at(SyntaxKind::Semicolon))
+            && p.eat_if(SyntaxKind::Semicolon);
 
-    if stmt && !semi && !p.end() && !p.at(SyntaxKind::RightBracket) {
-        p.expected("semicolon or line break");
-    }
+        if stmt && !semi && !p.end() && !p.at(SyntaxKind::RightBracket) {
+            p.expected("semicolon or line break");
+        }
+    });
+}
 
-    p.exit();
-    p.exit_newline_mode();
+/// Parses a single code expression.
+fn code_expr(p: &mut Parser) {
+    code_expr_prec(p, false, 0)
 }
 
 /// Parses a code expression with at least the given precedence.
@@ -676,8 +639,8 @@ fn code_expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) {
             continue;
         }
 
-        let at_field_or_method =
-            p.directly_at(SyntaxKind::Dot) && p.lexer.clone().next() == SyntaxKind::Ident;
+        let at_field_or_method = p.directly_at(SyntaxKind::Dot)
+            && p.lexer.clone().next().0 == SyntaxKind::Ident;
 
         if atomic && !at_field_or_method {
             break;
@@ -757,7 +720,6 @@ fn code_primary(p: &mut Parser, atomic: bool) {
         SyntaxKind::LeftBrace => code_block(p),
         SyntaxKind::LeftBracket => content_block(p),
         SyntaxKind::LeftParen => expr_with_paren(p, atomic),
-        SyntaxKind::RawDelim => raw(p),
         SyntaxKind::Dollar => equation(p),
         SyntaxKind::Let => let_binding(p),
         SyntaxKind::Set => set_rule(p),
@@ -772,6 +734,8 @@ fn code_primary(p: &mut Parser, atomic: bool) {
         SyntaxKind::Continue => continue_stmt(p),
         SyntaxKind::Return => return_stmt(p),
 
+        SyntaxKind::Raw => p.eat(), // Raw is handled entirely in the Lexer.
+
         SyntaxKind::None
         | SyntaxKind::Auto
         | SyntaxKind::Int
@@ -785,15 +749,6 @@ fn code_primary(p: &mut Parser, atomic: bool) {
     }
 }
 
-/// Parses a content or code block.
-fn block(p: &mut Parser) {
-    match p.current() {
-        SyntaxKind::LeftBracket => content_block(p),
-        SyntaxKind::LeftBrace => code_block(p),
-        _ => p.expected("block"),
-    }
-}
-
 /// Reparses a full content or code block.
 pub(super) fn reparse_block(text: &str, range: Range<usize>) -> Option<SyntaxNode> {
     let mut p = Parser::new(text, range.start, LexMode::Code);
@@ -803,27 +758,34 @@ pub(super) fn reparse_block(text: &str, range: Range<usize>) -> Option<SyntaxNod
         .then(|| p.finish().into_iter().next().unwrap())
 }
 
+/// Parses a content or code block.
+fn block(p: &mut Parser) {
+    match p.current() {
+        SyntaxKind::LeftBracket => content_block(p),
+        SyntaxKind::LeftBrace => code_block(p),
+        _ => p.expected("block"),
+    }
+}
+
 /// Parses a code block: `{ let x = 1; x + 2 }`.
 fn code_block(p: &mut Parser) {
     let m = p.marker();
-    p.enter(LexMode::Code);
-    p.enter_newline_mode(NewlineMode::Continue);
-    p.assert(SyntaxKind::LeftBrace);
-    code(p, |p| p.at_set(syntax_set!(RightBrace, RightBracket, RightParen)));
-    p.expect_closing_delimiter(m, SyntaxKind::RightBrace);
-    p.exit();
-    p.exit_newline_mode();
+    p.enter_modes(LexMode::Code, AtNewline::Continue, |p| {
+        p.assert(SyntaxKind::LeftBrace);
+        code(p, syntax_set!(RightBrace, RightBracket, RightParen, End));
+        p.expect_closing_delimiter(m, SyntaxKind::RightBrace);
+    });
     p.wrap(m, SyntaxKind::CodeBlock);
 }
 
 /// Parses a content block: `[*Hi* there!]`.
 fn content_block(p: &mut Parser) {
     let m = p.marker();
-    p.enter(LexMode::Markup);
-    p.assert(SyntaxKind::LeftBracket);
-    markup(p, true, 0, |p| p.at(SyntaxKind::RightBracket));
-    p.expect_closing_delimiter(m, SyntaxKind::RightBracket);
-    p.exit();
+    p.enter_modes(LexMode::Markup, AtNewline::Continue, |p| {
+        p.assert(SyntaxKind::LeftBracket);
+        markup(p, true, true, syntax_set!(RightBracket, End));
+        p.expect_closing_delimiter(m, SyntaxKind::RightBracket);
+    });
     p.wrap(m, SyntaxKind::ContentBlock);
 }
 
@@ -937,9 +899,8 @@ fn for_loop(p: &mut Parser) {
     let mut seen = HashSet::new();
     pattern(p, false, &mut seen, None);
 
-    let m2 = p.marker();
-    if p.eat_if(SyntaxKind::Comma) {
-        let node = &mut p[m2];
+    if p.at(SyntaxKind::Comma) {
+        let node = p.eat_and_get();
         node.unexpected();
         node.hint("destructuring patterns must be wrapped in parentheses");
         if p.at_set(set::PATTERN) {
@@ -967,14 +928,14 @@ fn module_import(p: &mut Parser) {
 
     if p.eat_if(SyntaxKind::Colon) {
         if p.at(SyntaxKind::LeftParen) {
-            let m1 = p.marker();
-            p.enter_newline_mode(NewlineMode::Continue);
-            p.assert(SyntaxKind::LeftParen);
+            p.with_nl_mode(AtNewline::Continue, |p| {
+                let m2 = p.marker();
+                p.assert(SyntaxKind::LeftParen);
 
-            import_items(p);
+                import_items(p);
 
-            p.expect_closing_delimiter(m1, SyntaxKind::RightParen);
-            p.exit_newline_mode();
+                p.expect_closing_delimiter(m2, SyntaxKind::RightParen);
+            });
         } else if !p.eat_if(SyntaxKind::Star) {
             import_items(p);
         }
@@ -1047,29 +1008,25 @@ fn return_stmt(p: &mut Parser) {
 
 /// An expression that starts with a parenthesis.
 fn expr_with_paren(p: &mut Parser, atomic: bool) {
-    // If we've seen this position before and have a memoized result, just use
-    // it. See below for more explanation about this memoization.
-    let start = p.current_start();
-    if let Some((range, end_point)) = p.memo.get(&start).cloned() {
-        // Restore the end point first, so that it doesn't truncate our freshly
-        // pushed nodes. If the current length of `p.nodes` doesn't match what
-        // we had in the memoized run, this might otherwise happen.
-        p.restore(end_point);
-        p.nodes.extend(p.memo_arena[range].iter().cloned());
+    if atomic {
+        // Atomic expressions aren't modified by operators that follow them, so
+        // our first guess of array/dict will be correct.
+        parenthesized_or_array_or_dict(p);
         return;
     }
 
-    let m = p.marker();
-    let checkpoint = p.checkpoint();
+    // If we've seen this position before and have a memoized result, restore it
+    // and return. Otherwise, get a key to this position and a checkpoint to
+    // restart from in case we make a wrong prediction.
+    let Some((memo_key, checkpoint)) = p.restore_memo_or_checkpoint() else { return };
+    // The node length from when we restored.
+    let prev_len = checkpoint.node_len;
 
     // When we reach a '(', we can't be sure what it is. First, we attempt to
     // parse as a simple parenthesized expression, array, or dictionary as
     // these are the most likely things. We can handle all of those in a single
     // pass.
     let kind = parenthesized_or_array_or_dict(p);
-    if atomic {
-        return;
-    }
 
     // If, however, '=>' or '=' follows, we must backtrack and reparse as either
     // a parameter list or a destructuring. To be able to do that, we created a
@@ -1090,6 +1047,7 @@ fn expr_with_paren(p: &mut Parser, atomic: bool) {
     // case running time of O(2n).
     if p.at(SyntaxKind::Arrow) {
         p.restore(checkpoint);
+        let m = p.marker();
         params(p);
         if !p.expect(SyntaxKind::Arrow) {
             return;
@@ -1098,6 +1056,7 @@ fn expr_with_paren(p: &mut Parser, atomic: bool) {
         p.wrap(m, SyntaxKind::Closure);
     } else if p.at(SyntaxKind::Eq) && kind != SyntaxKind::Parenthesized {
         p.restore(checkpoint);
+        let m = p.marker();
         destructuring_or_parenthesized(p, true, &mut HashSet::new());
         if !p.expect(SyntaxKind::Eq) {
             return;
@@ -1109,9 +1068,7 @@ fn expr_with_paren(p: &mut Parser, atomic: bool) {
     }
 
     // Memoize result if we backtracked.
-    let offset = p.memo_arena.len();
-    p.memo_arena.extend(p.nodes[m.0..].iter().cloned());
-    p.memo.insert(start, (offset..p.memo_arena.len(), p.checkpoint()));
+    p.memoize_parsed_nodes(memo_key, prev_len);
 }
 
 /// Parses either
@@ -1119,10 +1076,6 @@ fn expr_with_paren(p: &mut Parser, atomic: bool) {
 /// - an array: `(1, "hi", 12cm)`, or
 /// - a dictionary: `(thickness: 3pt, pattern: dashed)`.
 fn parenthesized_or_array_or_dict(p: &mut Parser) -> SyntaxKind {
-    let m = p.marker();
-    p.enter_newline_mode(NewlineMode::Continue);
-    p.assert(SyntaxKind::LeftParen);
-
     let mut state = GroupState {
         count: 0,
         maybe_just_parens: true,
@@ -1130,27 +1083,44 @@ fn parenthesized_or_array_or_dict(p: &mut Parser) -> SyntaxKind {
         seen: HashSet::new(),
     };
 
-    if p.eat_if(SyntaxKind::Colon) {
-        state.kind = Some(SyntaxKind::Dict);
-        state.maybe_just_parens = false;
-    }
-
-    while !p.current().is_terminator() {
-        if !p.at_set(set::ARRAY_OR_DICT_ITEM) {
-            p.unexpected();
-            continue;
+    // An edge case with parens is whether we can interpret a leading spread
+    // expression as a dictionary, e.g. if we want `(..dict1, ..dict2)` to join
+    // the two dicts.
+    //
+    // The issue is that we decide on the type of the parenthesized expression
+    // here in the parser by the `SyntaxKind` we wrap with, instead of in eval
+    // based on the type of the spread item.
+    //
+    // The current fix is that we allow a leading colon to force the
+    // parenthesized value into a dict:
+    // - `(..arr1, ..arr2)` is wrapped as an `Array`.
+    // - `(: ..dict1, ..dict2)` is wrapped as a `Dict`.
+    //
+    // This does allow some unexpected expressions, such as `(: key: val)`, but
+    // it's currently intentional.
+    let m = p.marker();
+    p.with_nl_mode(AtNewline::Continue, |p| {
+        p.assert(SyntaxKind::LeftParen);
+        if p.eat_if(SyntaxKind::Colon) {
+            state.kind = Some(SyntaxKind::Dict);
         }
 
-        array_or_dict_item(p, &mut state);
-        state.count += 1;
+        while !p.current().is_terminator() {
+            if !p.at_set(set::ARRAY_OR_DICT_ITEM) {
+                p.unexpected();
+                continue;
+            }
+
+            array_or_dict_item(p, &mut state);
+            state.count += 1;
 
-        if !p.current().is_terminator() && p.expect(SyntaxKind::Comma) {
-            state.maybe_just_parens = false;
+            if !p.current().is_terminator() && p.expect(SyntaxKind::Comma) {
+                state.maybe_just_parens = false;
+            }
         }
-    }
 
-    p.expect_closing_delimiter(m, SyntaxKind::RightParen);
-    p.exit_newline_mode();
+        p.expect_closing_delimiter(m, SyntaxKind::RightParen);
+    });
 
     let kind = if state.maybe_just_parens && state.count == 1 {
         SyntaxKind::Parenthesized
@@ -1165,8 +1135,13 @@ fn parenthesized_or_array_or_dict(p: &mut Parser) -> SyntaxKind {
 /// State for array/dictionary parsing.
 struct GroupState {
     count: usize,
+    /// Whether this is just a single expression in parens: `(a)`. Single
+    /// element arrays require an explicit comma: `(a,)`, unless we're
+    /// spreading: `(..a)`.
     maybe_just_parens: bool,
+    /// The `SyntaxKind` to wrap as (if we've figured it out yet).
     kind: Option<SyntaxKind>,
+    /// Store named arguments so we can give an error if they're repeated.
     seen: HashSet<EcoString>,
 }
 
@@ -1234,25 +1209,25 @@ fn args(p: &mut Parser) {
     let m = p.marker();
     if p.at(SyntaxKind::LeftParen) {
         let m2 = p.marker();
-        p.enter_newline_mode(NewlineMode::Continue);
-        p.assert(SyntaxKind::LeftParen);
+        p.with_nl_mode(AtNewline::Continue, |p| {
+            p.assert(SyntaxKind::LeftParen);
 
-        let mut seen = HashSet::new();
-        while !p.current().is_terminator() {
-            if !p.at_set(set::ARG) {
-                p.unexpected();
-                continue;
-            }
+            let mut seen = HashSet::new();
+            while !p.current().is_terminator() {
+                if !p.at_set(set::ARG) {
+                    p.unexpected();
+                    continue;
+                }
 
-            arg(p, &mut seen);
+                arg(p, &mut seen);
 
-            if !p.current().is_terminator() {
-                p.expect(SyntaxKind::Comma);
+                if !p.current().is_terminator() {
+                    p.expect(SyntaxKind::Comma);
+                }
             }
-        }
 
-        p.expect_closing_delimiter(m2, SyntaxKind::RightParen);
-        p.exit_newline_mode();
+            p.expect_closing_delimiter(m2, SyntaxKind::RightParen);
+        });
     }
 
     while p.directly_at(SyntaxKind::LeftBracket) {
@@ -1297,27 +1272,27 @@ fn arg<'s>(p: &mut Parser<'s>, seen: &mut HashSet<&'s str>) {
 /// Parses a closure's parameters: `(x, y)`.
 fn params(p: &mut Parser) {
     let m = p.marker();
-    p.enter_newline_mode(NewlineMode::Continue);
-    p.assert(SyntaxKind::LeftParen);
+    p.with_nl_mode(AtNewline::Continue, |p| {
+        p.assert(SyntaxKind::LeftParen);
 
-    let mut seen = HashSet::new();
-    let mut sink = false;
+        let mut seen = HashSet::new();
+        let mut sink = false;
 
-    while !p.current().is_terminator() {
-        if !p.at_set(set::PARAM) {
-            p.unexpected();
-            continue;
-        }
+        while !p.current().is_terminator() {
+            if !p.at_set(set::PARAM) {
+                p.unexpected();
+                continue;
+            }
 
-        param(p, &mut seen, &mut sink);
+            param(p, &mut seen, &mut sink);
 
-        if !p.current().is_terminator() {
-            p.expect(SyntaxKind::Comma);
+            if !p.current().is_terminator() {
+                p.expect(SyntaxKind::Comma);
+            }
         }
-    }
 
-    p.expect_closing_delimiter(m, SyntaxKind::RightParen);
-    p.exit_newline_mode();
+        p.expect_closing_delimiter(m, SyntaxKind::RightParen);
+    });
     p.wrap(m, SyntaxKind::Params);
 }
 
@@ -1378,25 +1353,25 @@ fn destructuring_or_parenthesized<'s>(
     let mut maybe_just_parens = true;
 
     let m = p.marker();
-    p.enter_newline_mode(NewlineMode::Continue);
-    p.assert(SyntaxKind::LeftParen);
+    p.with_nl_mode(AtNewline::Continue, |p| {
+        p.assert(SyntaxKind::LeftParen);
 
-    while !p.current().is_terminator() {
-        if !p.at_set(set::DESTRUCTURING_ITEM) {
-            p.unexpected();
-            continue;
-        }
+        while !p.current().is_terminator() {
+            if !p.at_set(set::DESTRUCTURING_ITEM) {
+                p.unexpected();
+                continue;
+            }
 
-        destructuring_item(p, reassignment, seen, &mut maybe_just_parens, &mut sink);
-        count += 1;
+            destructuring_item(p, reassignment, seen, &mut maybe_just_parens, &mut sink);
+            count += 1;
 
-        if !p.current().is_terminator() && p.expect(SyntaxKind::Comma) {
-            maybe_just_parens = false;
+            if !p.current().is_terminator() && p.expect(SyntaxKind::Comma) {
+                maybe_just_parens = false;
+            }
         }
-    }
 
-    p.expect_closing_delimiter(m, SyntaxKind::RightParen);
-    p.exit_newline_mode();
+        p.expect_closing_delimiter(m, SyntaxKind::RightParen);
+    });
 
     if maybe_just_parens && count == 1 && !sink {
         p.wrap(m, SyntaxKind::Parenthesized);
@@ -1429,6 +1404,9 @@ fn destructuring_item<'s>(
 
     // Parse a normal positional pattern or a destructuring key.
     let was_at_pat = p.at_set(set::PATTERN);
+
+    // We must use a full checkpoint here (can't just clone the lexer) because
+    // there may be trivia between the identifier and the colon we need to skip.
     let checkpoint = p.checkpoint();
     if !(p.eat_if(SyntaxKind::Ident) && p.at(SyntaxKind::Colon)) {
         p.restore(checkpoint);
@@ -1487,122 +1465,274 @@ fn pattern_leaf<'s>(
     }
 }
 
-/// Manages parsing of a stream of tokens.
+/// Manages parsing a stream of tokens into a tree of [`SyntaxNode`]s.
+///
+/// The implementation presents an interface that investigates a current `token`
+/// with a [`SyntaxKind`] and can take one of the following actions:
+///
+/// 1. Eat a token: push `token` onto the `nodes` vector as a [leaf
+///    node](`SyntaxNode::leaf`) and prepare a new `token` by calling into the
+///    lexer.
+/// 2. Wrap nodes from a marker to the end of `nodes` (excluding `token` and any
+///    attached trivia) into an [inner node](`SyntaxNode::inner`) of a specific
+///    `SyntaxKind`.
+/// 3. Produce or convert nodes into an [error node](`SyntaxNode::error`) when
+///    something expected is missing or something unexpected is found.
+///
+/// Overall the parser produces a nested tree of SyntaxNodes as a "_Concrete_
+/// Syntax Tree." The raw Concrete Syntax Tree should contain the entire source
+/// text, and is used as-is for e.g. syntax highlighting and IDE features. In
+/// `ast.rs` the CST is interpreted as a lazy view over an "_Abstract_ Syntax
+/// Tree." The AST module skips over irrelevant tokens -- whitespace, comments,
+/// code parens, commas in function args, etc. -- as it iterates through the
+/// tree.
+///
+/// ### Modes
+///
+/// The parser manages the transitions between the three modes of Typst through
+/// [lexer modes](`LexMode`) and [newline modes](`AtNewline`).
+///
+/// The lexer modes map to the three Typst modes and are stored in the lexer,
+/// changing which`SyntaxKind`s it will generate.
+///
+/// The newline mode is used to determine whether a newline should end the
+/// current expression. If so, the parser temporarily changes `token`'s kind to
+/// a fake [`SyntaxKind::End`]. When the parser exits the mode the original
+/// `SyntaxKind` is restored.
 struct Parser<'s> {
+    /// The source text shared with the lexer.
     text: &'s str,
+    /// A lexer over the source text with multiple modes. Defines the boundaries
+    /// of tokens and determines their [`SyntaxKind`]. Contains the [`LexMode`]
+    /// defining our current Typst mode.
     lexer: Lexer<'s>,
-    prev_end: usize,
-    current_start: usize,
-    current: SyntaxKind,
+    /// The newline mode: whether to insert a temporary end at newlines.
+    nl_mode: AtNewline,
+    /// The current token under inspection, not yet present in `nodes`. This
+    /// acts like a single item of lookahead for the parser.
+    ///
+    /// When wrapping, this is _not_ included in the wrapped nodes.
+    token: Token,
+    /// Whether the parser has the expected set of open/close delimiters. This
+    /// only ever transitions from `true` to `false`.
     balanced: bool,
+    /// Nodes representing the concrete syntax tree of previously parsed text.
+    /// In Code and Math, includes previously parsed trivia, but not `token`.
     nodes: Vec<SyntaxNode>,
-    modes: Vec<LexMode>,
-    newline_modes: Vec<NewlineMode>,
-    memo: HashMap<usize, (Range<usize>, Checkpoint<'s>)>,
-    memo_arena: Vec<SyntaxNode>,
+    /// Parser checkpoints for a given text index. Used for efficient parser
+    /// backtracking similar to packrat parsing. See comments above in
+    /// [`expr_with_paren`].
+    memo: MemoArena,
+}
+
+/// A single token returned from the lexer with a cached [`SyntaxKind`] and a
+/// record of preceding trivia.
+#[derive(Debug, Clone)]
+struct Token {
+    /// The [`SyntaxKind`] of the current token.
+    kind: SyntaxKind,
+    /// The [`SyntaxNode`] of the current token, ready to be eaten and pushed
+    /// onto the end of `nodes`.
+    node: SyntaxNode,
+    /// The number of preceding trivia before this token.
+    n_trivia: usize,
+    /// Whether this token's preceding trivia contained a newline.
+    newline: Option<Newline>,
+    /// The index into `text` of the start of our current token (the end is
+    /// stored as the lexer's cursor).
+    start: usize,
+    /// The index into `text` of the end of the previous token.
+    prev_end: usize,
 }
 
-/// How to proceed with parsing when seeing a newline.
-#[derive(Clone)]
-enum NewlineMode {
-    /// Stop always.
-    Stop,
-    /// Proceed if there is no continuation with `else` or `.`
-    Contextual,
-    /// Just proceed like with normal whitespace.
-    Continue,
+/// Information about a newline if present (currently only relevant in Markup).
+#[derive(Debug, Clone, Copy)]
+struct Newline {
+    /// The column of the start of our token in its line.
+    column: Option<usize>,
+    /// Whether any of our newlines were paragraph breaks.
+    parbreak: bool,
 }
 
+/// How to proceed with parsing when at a newline.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum AtNewline {
+    /// Continue at newlines.
+    Continue,
+    /// Stop at any newline.
+    Stop,
+    /// Continue only if there is no continuation with `else` or `.` (Code only).
+    ContextualContinue,
+    /// Stop only at a parbreak, not normal newlines (Markup only).
+    StopParBreak,
+    /// Require that the token's column be greater or equal to a column (Markup
+    /// only). If this is `0`, acts like `Continue`; if this is `usize::MAX`,
+    /// acts like `Stop`.
+    RequireColumn(usize),
+}
+
+impl AtNewline {
+    /// Whether to stop at a newline or continue based on the current context.
+    fn stop_at(self, Newline { column, parbreak }: Newline, kind: SyntaxKind) -> bool {
+        #[allow(clippy::match_like_matches_macro)]
+        match self {
+            AtNewline::Continue => false,
+            AtNewline::Stop => true,
+            AtNewline::ContextualContinue => match kind {
+                SyntaxKind::Else | SyntaxKind::Dot => false,
+                _ => true,
+            },
+            AtNewline::StopParBreak => parbreak,
+            AtNewline::RequireColumn(min_col) => match column {
+                Some(column) => column <= min_col,
+                None => false, // Don't stop if we had no column.
+            },
+        }
+    }
+}
+
+/// A marker representing a node's position in the parser. Mainly used for
+/// wrapping, but can also index into the parser to access the node, like
+/// `p[m]`.
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 struct Marker(usize);
 
-#[derive(Clone)]
-struct Checkpoint<'s> {
-    lexer: Lexer<'s>,
-    prev_end: usize,
-    current_start: usize,
-    current: SyntaxKind,
-    nodes: usize,
+// Index into the parser with markers.
+impl Index<Marker> for Parser<'_> {
+    type Output = SyntaxNode;
+
+    fn index(&self, m: Marker) -> &Self::Output {
+        &self.nodes[m.0]
+    }
 }
 
+impl IndexMut<Marker> for Parser<'_> {
+    fn index_mut(&mut self, m: Marker) -> &mut Self::Output {
+        &mut self.nodes[m.0]
+    }
+}
+
+/// Creating/Consuming the parser and getting info about the current token.
 impl<'s> Parser<'s> {
+    /// Create a new parser starting from the given text offset and lexer mode.
     fn new(text: &'s str, offset: usize, mode: LexMode) -> Self {
         let mut lexer = Lexer::new(text, mode);
         lexer.jump(offset);
-        let current = lexer.next();
+        let nl_mode = AtNewline::Continue;
+        let mut nodes = vec![];
+        let token = Self::lex(&mut nodes, &mut lexer, nl_mode);
         Self {
-            lexer,
             text,
-            prev_end: offset,
-            current_start: offset,
-            current,
+            lexer,
+            nl_mode,
+            token,
             balanced: true,
-            nodes: vec![],
-            modes: vec![],
-            newline_modes: vec![],
-            memo: HashMap::new(),
-            memo_arena: vec![],
+            nodes,
+            memo: Default::default(),
         }
     }
 
+    /// Consume the parser, yielding the full vector of parsed SyntaxNodes.
     fn finish(self) -> Vec<SyntaxNode> {
         self.nodes
     }
 
-    fn prev_end(&self) -> usize {
-        self.prev_end
+    /// Consume the parser, generating a single top-level node.
+    fn finish_into(self, kind: SyntaxKind) -> SyntaxNode {
+        assert!(self.at(SyntaxKind::End));
+        SyntaxNode::inner(kind, self.finish())
     }
 
+    /// Similar to a `peek()` function: returns the `kind` of the next token to
+    /// be eaten.
     fn current(&self) -> SyntaxKind {
-        self.current
+        self.token.kind
     }
 
-    fn current_start(&self) -> usize {
-        self.current_start
+    /// Whether the current token is a given [`SyntaxKind`].
+    fn at(&self, kind: SyntaxKind) -> bool {
+        self.token.kind == kind
     }
 
-    fn current_end(&self) -> usize {
-        self.lexer.cursor()
+    /// Whether the current token is contained in a [`SyntaxSet`].
+    fn at_set(&self, set: SyntaxSet) -> bool {
+        set.contains(self.token.kind)
+    }
+
+    /// Whether we're at the end of the token stream.
+    ///
+    /// Note: This might be a fake end due to the newline mode.
+    fn end(&self) -> bool {
+        self.at(SyntaxKind::End)
+    }
+
+    /// If we're at the given `kind` with no preceding trivia tokens.
+    fn directly_at(&self, kind: SyntaxKind) -> bool {
+        self.token.kind == kind && !self.had_trivia()
     }
 
+    /// Whether `token` had any preceding trivia.
+    fn had_trivia(&self) -> bool {
+        self.token.n_trivia > 0
+    }
+
+    /// Whether `token` had a newline among any of its preceding trivia.
+    fn had_newline(&self) -> bool {
+        self.token.newline.is_some()
+    }
+
+    /// The number of characters until the most recent newline from the current
+    /// token, or 0 if it did not follow a newline.
+    fn current_column(&self) -> usize {
+        self.token.newline.and_then(|newline| newline.column).unwrap_or(0)
+    }
+
+    /// The current token's text.
     fn current_text(&self) -> &'s str {
-        &self.text[self.current_start..self.current_end()]
+        &self.text[self.token.start..self.current_end()]
     }
 
-    fn at(&self, kind: SyntaxKind) -> bool {
-        self.current == kind
+    /// The offset into `text` of the current token's start.
+    fn current_start(&self) -> usize {
+        self.token.start
     }
 
-    fn at_set(&self, set: SyntaxSet) -> bool {
-        set.contains(self.current)
+    /// The offset into `text` of the current token's end.
+    fn current_end(&self) -> usize {
+        self.lexer.cursor()
     }
 
-    fn end(&self) -> bool {
-        self.at(SyntaxKind::End)
+    /// The offset into `text` of the previous token's end.
+    fn prev_end(&self) -> usize {
+        self.token.prev_end
     }
+}
 
-    fn directly_at(&self, kind: SyntaxKind) -> bool {
-        self.current == kind && self.prev_end == self.current_start
+// The main parsing interface for generating tokens and eating/modifying nodes.
+impl<'s> Parser<'s> {
+    /// A marker that will point to the current token in the parser once it's
+    /// been eaten.
+    fn marker(&self) -> Marker {
+        Marker(self.nodes.len())
     }
 
-    fn eat(&mut self) {
-        self.save();
-        self.lex();
-        self.skip();
+    /// A marker that will point to first trivia before this token in the
+    /// parser (or the token itself if no trivia precede it).
+    fn before_trivia(&self) -> Marker {
+        Marker(self.nodes.len() - self.token.n_trivia)
     }
 
+    /// Eat the current node and return a reference for in-place mutation.
     #[track_caller]
     fn eat_and_get(&mut self) -> &mut SyntaxNode {
         let offset = self.nodes.len();
-        self.save();
-        self.lex();
-        self.skip();
+        self.eat();
         &mut self.nodes[offset]
     }
 
-    /// Eats if at `kind`.
+    /// Eat the token if at `kind`. Returns `true` if eaten.
     ///
-    /// Note: In math and code mode, this will ignore trivia in front of the
+    /// Note: In Math and Code, this will ignore trivia in front of the
     /// `kind`, To forbid skipping trivia, consider using `eat_if_direct`.
     fn eat_if(&mut self, kind: SyntaxKind) -> bool {
         let at = self.at(kind);
@@ -1612,7 +1742,8 @@ impl<'s> Parser<'s> {
         at
     }
 
-    /// Eats only if currently at the start of `kind`.
+    /// Eat the token only if at `kind` with no preceding trivia. Returns `true`
+    /// if eaten.
     fn eat_if_direct(&mut self, kind: SyntaxKind) -> bool {
         let at = self.directly_at(kind);
         if at {
@@ -1621,188 +1752,228 @@ impl<'s> Parser<'s> {
         at
     }
 
+    /// Assert that we are at the given [`SyntaxKind`] and eat it. This should
+    /// be used when moving between functions that expect to start with a
+    /// specific token.
     #[track_caller]
     fn assert(&mut self, kind: SyntaxKind) {
-        assert_eq!(self.current, kind);
+        assert_eq!(self.token.kind, kind);
         self.eat();
     }
 
-    fn convert(&mut self, kind: SyntaxKind) {
-        self.current = kind;
+    /// Convert the current token's [`SyntaxKind`] and eat it.
+    fn convert_and_eat(&mut self, kind: SyntaxKind) {
+        // Only need to replace the node here.
+        self.token.node.convert_to_kind(kind);
         self.eat();
     }
 
-    fn newline(&mut self) -> bool {
-        self.lexer.newline()
-    }
-
-    fn column(&self, at: usize) -> usize {
-        self.text[..at].chars().rev().take_while(|&c| !is_newline(c)).count()
-    }
-
-    fn marker(&self) -> Marker {
-        Marker(self.nodes.len())
-    }
-
-    /// Get a marker after the last non-trivia node.
-    fn before_trivia(&self) -> Marker {
-        let mut i = self.nodes.len();
-        if self.lexer.mode() != LexMode::Markup && self.prev_end != self.current_start {
-            while i > 0 && self.nodes[i - 1].kind().is_trivia() {
-                i -= 1;
-            }
-        }
-        Marker(i)
-    }
-
-    /// Whether the last non-trivia node is an error.
-    fn after_error(&mut self) -> bool {
-        let m = self.before_trivia();
-        m.0 > 0 && self.nodes[m.0 - 1].kind().is_error()
+    /// Eat the current token by saving it to the `nodes` vector, then move
+    /// the lexer forward to prepare a new token.
+    fn eat(&mut self) {
+        self.nodes.push(std::mem::take(&mut self.token.node));
+        self.token = Self::lex(&mut self.nodes, &mut self.lexer, self.nl_mode);
     }
 
-    #[track_caller]
-    fn post_process(&mut self, m: Marker) -> impl Iterator<Item = &mut SyntaxNode> {
-        self.nodes[m.0..]
-            .iter_mut()
-            .filter(|child| !child.kind().is_error() && !child.kind().is_trivia())
+    /// Detach the parsed trivia nodes from this token (but not newline info) so
+    /// that subsequent wrapping will include the trivia.
+    fn flush_trivia(&mut self) {
+        self.token.n_trivia = 0;
+        self.token.prev_end = self.token.start;
     }
 
+    /// Wrap the nodes from a marker up to (but excluding) the current token in
+    /// a new [inner node](`SyntaxNode::inner`) of the given kind. This is an
+    /// easy interface for creating nested syntax nodes _after_ having parsed
+    /// their children.
     fn wrap(&mut self, from: Marker, kind: SyntaxKind) {
-        self.wrap_within(from, self.before_trivia(), kind);
-    }
-
-    fn wrap_all(&mut self, from: Marker, kind: SyntaxKind) {
-        self.wrap_within(from, Marker(self.nodes.len()), kind)
-    }
-
-    fn wrap_within(&mut self, from: Marker, to: Marker, kind: SyntaxKind) {
-        let len = self.nodes.len();
-        let to = to.0.min(len);
+        let to = self.before_trivia().0;
         let from = from.0.min(to);
         let children = self.nodes.drain(from..to).collect();
         self.nodes.insert(from, SyntaxNode::inner(kind, children));
     }
 
-    fn enter(&mut self, mode: LexMode) {
-        self.modes.push(self.lexer.mode());
+    /// Parse within the [`LexMode`] for subsequent tokens (does not change the
+    /// current token). This may re-lex the final token on exit.
+    ///
+    /// This function effectively repurposes the call stack as a stack of modes.
+    fn enter_modes(
+        &mut self,
+        mode: LexMode,
+        stop: AtNewline,
+        func: impl FnOnce(&mut Parser<'s>),
+    ) {
+        let previous = self.lexer.mode();
         self.lexer.set_mode(mode);
+        self.with_nl_mode(stop, func);
+        if mode != previous {
+            self.lexer.set_mode(previous);
+            self.lexer.jump(self.token.prev_end);
+            self.nodes.truncate(self.nodes.len() - self.token.n_trivia);
+            self.token = Self::lex(&mut self.nodes, &mut self.lexer, self.nl_mode);
+        }
     }
 
-    fn exit(&mut self) {
-        let mode = self.modes.pop().unwrap();
-        if mode != self.lexer.mode() {
-            self.unskip();
-            self.lexer.set_mode(mode);
-            self.lexer.jump(self.current_start);
-            self.lex();
-            self.skip();
+    /// Parse within the [`AtNewline`] mode for subsequent tokens (does not
+    /// change the current token). This may re-lex the final token on exit.
+    ///
+    /// This function effectively repurposes the call stack as a stack of modes.
+    fn with_nl_mode(&mut self, mode: AtNewline, func: impl FnOnce(&mut Parser<'s>)) {
+        let previous = self.nl_mode;
+        self.nl_mode = mode;
+        func(self);
+        self.nl_mode = previous;
+        if let Some(newline) = self.token.newline {
+            if mode != previous {
+                // Restore our actual token's kind or insert a fake end.
+                let actual_kind = self.token.node.kind();
+                if self.nl_mode.stop_at(newline, actual_kind) {
+                    self.token.kind = SyntaxKind::End;
+                } else {
+                    self.token.kind = actual_kind;
+                }
+            }
         }
     }
 
-    fn enter_newline_mode(&mut self, stop: NewlineMode) {
-        self.newline_modes.push(stop);
-    }
+    /// Move the lexer forward and prepare the current token. In Code, this
+    /// might insert a temporary [`SyntaxKind::End`] based on our newline mode.
+    ///
+    /// This is not a method on `self` because we need a valid token before we
+    /// can initialize the parser.
+    fn lex(nodes: &mut Vec<SyntaxNode>, lexer: &mut Lexer, nl_mode: AtNewline) -> Token {
+        let prev_end = lexer.cursor();
+        let mut start = prev_end;
+        let (mut kind, mut node) = lexer.next();
+        let mut n_trivia = 0;
+        let mut had_newline = false;
+        let mut parbreak = false;
+
+        while kind.is_trivia() {
+            had_newline |= lexer.newline(); // Newlines are always trivia.
+            parbreak |= kind == SyntaxKind::Parbreak;
+            n_trivia += 1;
+            nodes.push(node);
+            start = lexer.cursor();
+            (kind, node) = lexer.next();
+        }
+
+        let newline = if had_newline {
+            let column = (lexer.mode() == LexMode::Markup).then(|| lexer.column(start));
+            let newline = Newline { column, parbreak };
+            if nl_mode.stop_at(newline, kind) {
+                // Insert a temporary `SyntaxKind::End` to halt the parser.
+                // The actual kind will be restored from `node` later.
+                kind = SyntaxKind::End;
+            }
+            Some(newline)
+        } else {
+            None
+        };
 
-    fn exit_newline_mode(&mut self) {
-        self.unskip();
-        self.newline_modes.pop();
-        self.lexer.jump(self.prev_end);
-        self.lex();
-        self.skip();
+        Token { kind, node, n_trivia, newline, start, prev_end }
     }
+}
 
-    fn checkpoint(&self) -> Checkpoint<'s> {
-        Checkpoint {
-            lexer: self.lexer.clone(),
-            prev_end: self.prev_end,
-            current_start: self.current_start,
-            current: self.current,
-            nodes: self.nodes.len(),
-        }
-    }
+/// Extra parser state for efficiently recovering from mispredicted parses.
+///
+/// This is the same idea as packrat parsing, but we use it only in the limited
+/// case of parenthesized structures. See [`expr_with_paren`] for more.
+#[derive(Default)]
+struct MemoArena {
+    /// A single arena of previously parsed nodes (to reduce allocations).
+    /// Memoized ranges refer to unique sections of the arena.
+    arena: Vec<SyntaxNode>,
+    /// A map from the parser's current position to a range of previously parsed
+    /// nodes in the arena and a checkpoint of the parser's state. These allow
+    /// us to reset the parser to avoid parsing the same location again.
+    memo_map: HashMap<MemoKey, (Range<usize>, PartialState)>,
+}
+
+/// A type alias for the memo key so it doesn't get confused with other usizes.
+///
+/// The memo is keyed by the index into `text` of the current token's start.
+type MemoKey = usize;
 
-    fn restore(&mut self, checkpoint: Checkpoint<'s>) {
-        self.lexer = checkpoint.lexer;
-        self.prev_end = checkpoint.prev_end;
-        self.current_start = checkpoint.current_start;
-        self.current = checkpoint.current;
-        self.nodes.truncate(checkpoint.nodes);
-    }
+/// A checkpoint of the parser which can fully restore it to a previous state.
+struct Checkpoint {
+    node_len: usize,
+    state: PartialState,
+}
 
-    fn skip(&mut self) {
-        if self.lexer.mode() != LexMode::Markup {
-            while self.current.is_trivia() {
-                self.save();
-                self.lex();
-            }
-        }
-    }
+/// State needed to restore the parser's current token and the lexer (but not
+/// the nodes vector).
+#[derive(Clone)]
+struct PartialState {
+    cursor: usize,
+    lex_mode: LexMode,
+    token: Token,
+}
 
-    fn unskip(&mut self) {
-        if self.lexer.mode() != LexMode::Markup && self.prev_end != self.current_start {
-            while self.nodes.last().is_some_and(|last| last.kind().is_trivia()) {
-                self.nodes.pop();
+/// The Memoization interface.
+impl<'s> Parser<'s> {
+    /// Store the already parsed nodes and the parser state into the memo map by
+    /// extending the arena and storing the extended range and a checkpoint.
+    fn memoize_parsed_nodes(&mut self, key: MemoKey, prev_len: usize) {
+        let Checkpoint { state, node_len } = self.checkpoint();
+        let memo_start = self.memo.arena.len();
+        self.memo.arena.extend_from_slice(&self.nodes[prev_len..node_len]);
+        let arena_range = memo_start..self.memo.arena.len();
+        self.memo.memo_map.insert(key, (arena_range, state));
+    }
+
+    /// Try to load a memoized result, return `None` if we did or `Some` (with a
+    /// checkpoint and a key for the memo map) if we didn't.
+    fn restore_memo_or_checkpoint(&mut self) -> Option<(MemoKey, Checkpoint)> {
+        // We use the starting index of the current token as our key.
+        let key: MemoKey = self.current_start();
+        match self.memo.memo_map.get(&key).cloned() {
+            Some((range, state)) => {
+                self.nodes.extend_from_slice(&self.memo.arena[range]);
+                // It's important that we don't truncate the nodes vector since
+                // it may have grown or shrunk (due to other memoization or
+                // error reporting) since we made this checkpoint.
+                self.restore_partial(state);
+                None
             }
-
-            self.lexer.jump(self.prev_end);
-            self.lex();
+            None => Some((key, self.checkpoint())),
         }
     }
 
-    fn save(&mut self) {
-        let text = self.current_text();
-        if self.at(SyntaxKind::Error) {
-            let error = self.lexer.take_error().unwrap();
-            self.nodes.push(SyntaxNode::error(error, text));
-        } else {
-            self.nodes.push(SyntaxNode::leaf(self.current, text));
-        }
-
-        if self.lexer.mode() == LexMode::Markup || !self.current.is_trivia() {
-            self.prev_end = self.current_end();
-        }
+    /// Restore the parser to the state at a checkpoint.
+    fn restore(&mut self, checkpoint: Checkpoint) {
+        self.nodes.truncate(checkpoint.node_len);
+        self.restore_partial(checkpoint.state);
     }
 
-    fn next_non_trivia(lexer: &mut Lexer<'s>) -> SyntaxKind {
-        loop {
-            let next = lexer.next();
-            // Loop is terminable, because SyntaxKind::End is not a trivia.
-            if !next.is_trivia() {
-                break next;
-            }
-        }
+    /// Restore parts of the checkpoint excluding the nodes vector.
+    fn restore_partial(&mut self, state: PartialState) {
+        self.lexer.jump(state.cursor);
+        self.lexer.set_mode(state.lex_mode);
+        self.token = state.token;
     }
 
-    fn lex(&mut self) {
-        self.current_start = self.lexer.cursor();
-        self.current = self.lexer.next();
-
-        // Special cases to handle newlines in code mode.
-        if self.lexer.mode() == LexMode::Code
-            && self.lexer.newline()
-            && match self.newline_modes.last() {
-                Some(NewlineMode::Continue) => false,
-                Some(NewlineMode::Contextual) => !matches!(
-                    Self::next_non_trivia(&mut self.lexer.clone()),
-                    SyntaxKind::Else | SyntaxKind::Dot
-                ),
-                Some(NewlineMode::Stop) => true,
-                None => false,
-            }
-        {
-            self.current = SyntaxKind::End;
-        }
+    /// Save a checkpoint of the parser state.
+    fn checkpoint(&self) -> Checkpoint {
+        let node_len = self.nodes.len();
+        let state = PartialState {
+            cursor: self.lexer.cursor(),
+            lex_mode: self.lexer.mode(),
+            token: self.token.clone(),
+        };
+        Checkpoint { node_len, state }
     }
 }
 
+/// Functions for eating expected or unexpected tokens and generating errors if
+/// we don't get what we expect.
 impl<'s> Parser<'s> {
-    /// Consume the given syntax `kind` or produce an error.
+    /// Consume the given `kind` or produce an error.
     fn expect(&mut self, kind: SyntaxKind) -> bool {
         let at = self.at(kind);
         if at {
             self.eat();
-        } else if kind == SyntaxKind::Ident && self.current.is_keyword() {
+        } else if kind == SyntaxKind::Ident && self.token.kind.is_keyword() {
             self.trim_errors();
             self.eat_and_get().expected(kind.name());
         } else {
@@ -1828,6 +1999,12 @@ impl<'s> Parser<'s> {
         }
     }
 
+    /// Whether the last non-trivia node is an error.
+    fn after_error(&mut self) -> bool {
+        let m = self.before_trivia();
+        m.0 > 0 && self.nodes[m.0 - 1].kind().is_error()
+    }
+
     /// Produce an error that the given `thing` was expected at the position
     /// of the marker `m`.
     fn expected_at(&mut self, m: Marker, thing: &str) {
@@ -1836,7 +2013,7 @@ impl<'s> Parser<'s> {
         self.nodes.insert(m.0, error);
     }
 
-    /// Produce a hint.
+    /// Add a hint to a trailing error.
     fn hint(&mut self, hint: &str) {
         let m = self.before_trivia();
         if let Some(error) = self.nodes.get_mut(m.0 - 1) {
@@ -1848,7 +2025,7 @@ impl<'s> Parser<'s> {
     /// unexpected.
     fn unexpected(&mut self) {
         self.trim_errors();
-        self.balanced &= !self.current.is_grouping();
+        self.balanced &= !self.token.kind.is_grouping();
         self.eat_and_get().unexpected();
     }
 
@@ -1865,17 +2042,3 @@ impl<'s> Parser<'s> {
         self.nodes.drain(start..end);
     }
 }
-
-impl Index<Marker> for Parser<'_> {
-    type Output = SyntaxNode;
-
-    fn index(&self, m: Marker) -> &Self::Output {
-        &self.nodes[m.0]
-    }
-}
-
-impl IndexMut<Marker> for Parser<'_> {
-    fn index_mut(&mut self, m: Marker) -> &mut Self::Output {
-        &mut self.nodes[m.0]
-    }
-}
diff --git a/crates/typst-syntax/src/reparser.rs b/crates/typst-syntax/src/reparser.rs
index 7a970490..c20d8314 100644
--- a/crates/typst-syntax/src/reparser.rs
+++ b/crates/typst-syntax/src/reparser.rs
@@ -157,19 +157,13 @@ fn try_reparse(
         let new_range = shifted..shifted + new_len;
         let at_end = end == children.len();
 
-        // Stop parsing early if this kind is encountered.
-        let stop_kind = match parent_kind {
-            Some(_) => SyntaxKind::RightBracket,
-            None => SyntaxKind::End,
-        };
-
         // Reparse!
         let reparsed = reparse_markup(
             text,
             new_range.clone(),
             &mut at_start,
             &mut nesting,
-            |kind| kind == stop_kind,
+            parent_kind.is_none(),
         );
 
         if let Some(newborns) = reparsed {
diff --git a/crates/typst-syntax/src/set.rs b/crates/typst-syntax/src/set.rs
index eaee7ef2..014aaf2f 100644
--- a/crates/typst-syntax/src/set.rs
+++ b/crates/typst-syntax/src/set.rs
@@ -58,6 +58,7 @@ pub const STMT: SyntaxSet = syntax_set!(Let, Set, Show, Import, Include, Return)
 pub const MATH_EXPR: SyntaxSet = syntax_set!(
     Hash,
     MathIdent,
+    FieldAccess,
     Text,
     MathShorthand,
     Linebreak,
@@ -104,7 +105,7 @@ pub const ATOMIC_CODE_PRIMARY: SyntaxSet = syntax_set!(
     Numeric,
     Str,
     Label,
-    RawDelim,
+    Raw,
 );
 
 /// Syntax kinds that are unary operators.
author	Laurenz <laurmaedje@gmail.com>	2024-11-04 10:17:49 +0100
committer	GitHub <noreply@github.com>	2024-11-04 10:17:49 +0100
commit	cb1aad3a0cc862c5ff57a557e196ba49a02917de (patch)
tree	80cd62cbeb0f8d2bb999cc984d213f42293ebd24 /crates/typst-syntax/src
parent	6b636167ef2e84c761777261ce1ca3087a75f765 (diff)
parent	2c9728f53b318a6cae092f30ad0956a536af7ccb (diff)