summaryrefslogtreecommitdiff
path: root/crates/typst-syntax
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2023-07-18 20:11:31 +0200
committerLaurenz <laurmaedje@gmail.com>2023-07-18 21:04:46 +0200
commitf5953887c9ae0b40a0c3e0ab516daf425c5a598c (patch)
treeb517ca68517e49bdf458bfa92036a8ff855c72f6 /crates/typst-syntax
parent7dc605307cf7d69a3476b8b6fc4786f683c3289b (diff)
Extract syntax module into typst-syntax crate
Diffstat (limited to 'crates/typst-syntax')
-rw-r--r--crates/typst-syntax/Cargo.toml27
-rw-r--r--crates/typst-syntax/src/ast.rs2026
-rw-r--r--crates/typst-syntax/src/file.rs279
-rw-r--r--crates/typst-syntax/src/kind.rs480
-rw-r--r--crates/typst-syntax/src/lexer.rs739
-rw-r--r--crates/typst-syntax/src/lib.rs23
-rw-r--r--crates/typst-syntax/src/node.rs912
-rw-r--r--crates/typst-syntax/src/parser.rs1760
-rw-r--r--crates/typst-syntax/src/reparser.rs322
-rw-r--r--crates/typst-syntax/src/source.rs423
-rw-r--r--crates/typst-syntax/src/span.rs128
11 files changed, 7119 insertions, 0 deletions
diff --git a/crates/typst-syntax/Cargo.toml b/crates/typst-syntax/Cargo.toml
new file mode 100644
index 00000000..928635c4
--- /dev/null
+++ b/crates/typst-syntax/Cargo.toml
@@ -0,0 +1,27 @@
+[package]
+name = "typst-syntax"
+description = "Parser and syntax tree for Typst."
+categories = ["compilers", "science"]
+keywords = ["typst"]
+version.workspace = true
+rust-version.workspace = true
+authors.workspace = true
+edition.workspace = true
+homepage.workspace = true
+repository.workspace = true
+license.workspace = true
+
+[lib]
+doctest = false
+bench = false
+
+[dependencies]
+comemo = "0.3"
+ecow = "0.1.1"
+once_cell = "1"
+serde = { version = "1", features = ["derive"] }
+tracing = "0.1.37"
+unicode-ident = "1.0"
+unicode-math-class = "0.1"
+unicode-segmentation = "1"
+unscanny = "0.1"
diff --git a/crates/typst-syntax/src/ast.rs b/crates/typst-syntax/src/ast.rs
new file mode 100644
index 00000000..c2755d0c
--- /dev/null
+++ b/crates/typst-syntax/src/ast.rs
@@ -0,0 +1,2026 @@
+//! A typed layer over the untyped syntax tree.
+//!
+//! The AST is rooted in the [`Markup`] node.
+
+use std::num::NonZeroUsize;
+use std::ops::Deref;
+
+use ecow::EcoString;
+use unscanny::Scanner;
+
+use super::{
+ is_id_continue, is_id_start, is_newline, split_newlines, Span, SyntaxKind, SyntaxNode,
+};
+
+/// A typed AST node.
+pub trait AstNode: Sized {
+ /// Convert a node into its typed variant.
+ fn from_untyped(node: &SyntaxNode) -> Option<Self>;
+
+ /// A reference to the underlying syntax node.
+ fn as_untyped(&self) -> &SyntaxNode;
+
+ /// The source code location.
+ fn span(&self) -> Span {
+ self.as_untyped().span()
+ }
+}
+
+macro_rules! node {
+ ($(#[$attr:meta])* $name:ident) => {
+ #[derive(Debug, Default, Clone, Hash)]
+ #[repr(transparent)]
+ $(#[$attr])*
+ pub struct $name(SyntaxNode);
+
+ impl AstNode for $name {
+ fn from_untyped(node: &SyntaxNode) -> Option<Self> {
+ if matches!(node.kind(), SyntaxKind::$name) {
+ Some(Self(node.clone()))
+ } else {
+ Option::None
+ }
+ }
+
+ fn as_untyped(&self) -> &SyntaxNode {
+ &self.0
+ }
+ }
+ };
+}
+
+node! {
+ /// The syntactical root capable of representing a full parsed document.
+ Markup
+}
+
+impl Markup {
+ /// The expressions.
+ pub fn exprs(&self) -> impl DoubleEndedIterator<Item = Expr> + '_ {
+ let mut was_stmt = false;
+ self.0
+ .children()
+ .filter(move |node| {
+ // Ignore newline directly after statements without semicolons.
+ let kind = node.kind();
+ let keep = !was_stmt || node.kind() != SyntaxKind::Space;
+ was_stmt = kind.is_stmt();
+ keep
+ })
+ .filter_map(Expr::cast_with_space)
+ }
+}
+
+/// An expression in markup, math or code.
+#[derive(Debug, Clone, Hash)]
+pub enum Expr {
+ /// Plain text without markup.
+ Text(Text),
+ /// Whitespace in markup or math. Has at most one newline in markup, as more
+ /// indicate a paragraph break.
+ Space(Space),
+ /// A forced line break: `\`.
+ Linebreak(Linebreak),
+ /// A paragraph break, indicated by one or multiple blank lines.
+ Parbreak(Parbreak),
+ /// An escape sequence: `\#`, `\u{1F5FA}`.
+ Escape(Escape),
+ /// A shorthand for a unicode codepoint. For example, `~` for non-breaking
+ /// space or `-?` for a soft hyphen.
+ Shorthand(Shorthand),
+ /// A smart quote: `'` or `"`.
+ SmartQuote(SmartQuote),
+ /// Strong content: `*Strong*`.
+ Strong(Strong),
+ /// Emphasized content: `_Emphasized_`.
+ Emph(Emph),
+ /// Raw text with optional syntax highlighting: `` `...` ``.
+ Raw(Raw),
+ /// A hyperlink: `https://typst.org`.
+ Link(Link),
+ /// A label: `<intro>`.
+ Label(Label),
+ /// A reference: `@target`, `@target[..]`.
+ Ref(Ref),
+ /// A section heading: `= Introduction`.
+ Heading(Heading),
+ /// An item in a bullet list: `- ...`.
+ List(ListItem),
+ /// An item in an enumeration (numbered list): `+ ...` or `1. ...`.
+ Enum(EnumItem),
+ /// An item in a term list: `/ Term: Details`.
+ Term(TermItem),
+ /// A mathematical equation: `$x$`, `$ x^2 $`.
+ Equation(Equation),
+ /// The contents of a mathematical equation: `x^2 + 1`.
+ Math(Math),
+ /// An identifier in math: `pi`.
+ MathIdent(MathIdent),
+ /// An alignment point in math: `&`.
+ MathAlignPoint(MathAlignPoint),
+ /// Matched delimiters in math: `[x + y]`.
+ MathDelimited(MathDelimited),
+ /// A base with optional attachments in math: `a_1^2`.
+ MathAttach(MathAttach),
+ /// Grouped math primes
+ MathPrimes(MathPrimes),
+ /// A fraction in math: `x/2`.
+ MathFrac(MathFrac),
+ /// A root in math: `√x`, `∛x` or `∜x`.
+ MathRoot(MathRoot),
+ /// An identifier: `left`.
+ Ident(Ident),
+ /// The `none` literal.
+ None(None),
+ /// The `auto` literal.
+ Auto(Auto),
+ /// A boolean: `true`, `false`.
+ Bool(Bool),
+ /// An integer: `120`.
+ Int(Int),
+ /// A floating-point number: `1.2`, `10e-4`.
+ Float(Float),
+ /// A numeric value with a unit: `12pt`, `3cm`, `2em`, `90deg`, `50%`.
+ Numeric(Numeric),
+ /// A quoted string: `"..."`.
+ Str(Str),
+ /// A code block: `{ let x = 1; x + 2 }`.
+ Code(CodeBlock),
+ /// A content block: `[*Hi* there!]`.
+ Content(ContentBlock),
+ /// A grouped expression: `(1 + 2)`.
+ Parenthesized(Parenthesized),
+ /// An array: `(1, "hi", 12cm)`.
+ Array(Array),
+ /// A dictionary: `(thickness: 3pt, pattern: dashed)`.
+ Dict(Dict),
+ /// A unary operation: `-x`.
+ Unary(Unary),
+ /// A binary operation: `a + b`.
+ Binary(Binary),
+ /// A field access: `properties.age`.
+ FieldAccess(FieldAccess),
+ /// An invocation of a function or method: `f(x, y)`.
+ FuncCall(FuncCall),
+ /// A closure: `(x, y) => z`.
+ Closure(Closure),
+ /// A let binding: `let x = 1`.
+ Let(LetBinding),
+ //// A destructuring assignment: `(x, y) = (1, 2)`.
+ DestructAssign(DestructAssignment),
+ /// A set rule: `set text(...)`.
+ Set(SetRule),
+ /// A show rule: `show heading: it => emph(it.body)`.
+ Show(ShowRule),
+ /// An if-else conditional: `if x { y } else { z }`.
+ Conditional(Conditional),
+ /// A while loop: `while x { y }`.
+ While(WhileLoop),
+ /// A for loop: `for x in y { z }`.
+ For(ForLoop),
+ /// A module import: `import "utils.typ": a, b, c`.
+ Import(ModuleImport),
+ /// A module include: `include "chapter1.typ"`.
+ Include(ModuleInclude),
+ /// A break from a loop: `break`.
+ Break(LoopBreak),
+ /// A continue in a loop: `continue`.
+ Continue(LoopContinue),
+ /// A return from a function: `return`, `return x + 1`.
+ Return(FuncReturn),
+}
+
+impl Expr {
+ fn cast_with_space(node: &SyntaxNode) -> Option<Self> {
+ match node.kind() {
+ SyntaxKind::Space => node.cast().map(Self::Space),
+ _ => Self::from_untyped(node),
+ }
+ }
+}
+
+impl AstNode for Expr {
+ fn from_untyped(node: &SyntaxNode) -> Option<Self> {
+ match node.kind() {
+ SyntaxKind::Linebreak => node.cast().map(Self::Linebreak),
+ SyntaxKind::Parbreak => node.cast().map(Self::Parbreak),
+ SyntaxKind::Text => node.cast().map(Self::Text),
+ SyntaxKind::Escape => node.cast().map(Self::Escape),
+ SyntaxKind::Shorthand => node.cast().map(Self::Shorthand),
+ SyntaxKind::SmartQuote => node.cast().map(Self::SmartQuote),
+ SyntaxKind::Strong => node.cast().map(Self::Strong),
+ SyntaxKind::Emph => node.cast().map(Self::Emph),
+ SyntaxKind::Raw => node.cast().map(Self::Raw),
+ SyntaxKind::Link => node.cast().map(Self::Link),
+ SyntaxKind::Label => node.cast().map(Self::Label),
+ SyntaxKind::Ref => node.cast().map(Self::Ref),
+ SyntaxKind::Heading => node.cast().map(Self::Heading),
+ SyntaxKind::ListItem => node.cast().map(Self::List),
+ SyntaxKind::EnumItem => node.cast().map(Self::Enum),
+ SyntaxKind::TermItem => node.cast().map(Self::Term),
+ SyntaxKind::Equation => node.cast().map(Self::Equation),
+ SyntaxKind::Math => node.cast().map(Self::Math),
+ SyntaxKind::MathIdent => node.cast().map(Self::MathIdent),
+ SyntaxKind::MathAlignPoint => node.cast().map(Self::MathAlignPoint),
+ SyntaxKind::MathDelimited => node.cast().map(Self::MathDelimited),
+ SyntaxKind::MathAttach => node.cast().map(Self::MathAttach),
+ SyntaxKind::MathPrimes => node.cast().map(Self::MathPrimes),
+ SyntaxKind::MathFrac => node.cast().map(Self::MathFrac),
+ SyntaxKind::MathRoot => node.cast().map(Self::MathRoot),
+ SyntaxKind::Ident => node.cast().map(Self::Ident),
+ SyntaxKind::None => node.cast().map(Self::None),
+ SyntaxKind::Auto => node.cast().map(Self::Auto),
+ SyntaxKind::Bool => node.cast().map(Self::Bool),
+ SyntaxKind::Int => node.cast().map(Self::Int),
+ SyntaxKind::Float => node.cast().map(Self::Float),
+ SyntaxKind::Numeric => node.cast().map(Self::Numeric),
+ SyntaxKind::Str => node.cast().map(Self::Str),
+ SyntaxKind::CodeBlock => node.cast().map(Self::Code),
+ SyntaxKind::ContentBlock => node.cast().map(Self::Content),
+ SyntaxKind::Parenthesized => node.cast().map(Self::Parenthesized),
+ SyntaxKind::Array => node.cast().map(Self::Array),
+ SyntaxKind::Dict => node.cast().map(Self::Dict),
+ SyntaxKind::Unary => node.cast().map(Self::Unary),
+ SyntaxKind::Binary => node.cast().map(Self::Binary),
+ SyntaxKind::FieldAccess => node.cast().map(Self::FieldAccess),
+ SyntaxKind::FuncCall => node.cast().map(Self::FuncCall),
+ SyntaxKind::Closure => node.cast().map(Self::Closure),
+ SyntaxKind::LetBinding => node.cast().map(Self::Let),
+ SyntaxKind::DestructAssignment => node.cast().map(Self::DestructAssign),
+ SyntaxKind::SetRule => node.cast().map(Self::Set),
+ SyntaxKind::ShowRule => node.cast().map(Self::Show),
+ SyntaxKind::Conditional => node.cast().map(Self::Conditional),
+ SyntaxKind::WhileLoop => node.cast().map(Self::While),
+ SyntaxKind::ForLoop => node.cast().map(Self::For),
+ SyntaxKind::ModuleImport => node.cast().map(Self::Import),
+ SyntaxKind::ModuleInclude => node.cast().map(Self::Include),
+ SyntaxKind::LoopBreak => node.cast().map(Self::Break),
+ SyntaxKind::LoopContinue => node.cast().map(Self::Continue),
+ SyntaxKind::FuncReturn => node.cast().map(Self::Return),
+ _ => Option::None,
+ }
+ }
+
+ fn as_untyped(&self) -> &SyntaxNode {
+ match self {
+ Self::Text(v) => v.as_untyped(),
+ Self::Space(v) => v.as_untyped(),
+ Self::Linebreak(v) => v.as_untyped(),
+ Self::Parbreak(v) => v.as_untyped(),
+ Self::Escape(v) => v.as_untyped(),
+ Self::Shorthand(v) => v.as_untyped(),
+ Self::SmartQuote(v) => v.as_untyped(),
+ Self::Strong(v) => v.as_untyped(),
+ Self::Emph(v) => v.as_untyped(),
+ Self::Raw(v) => v.as_untyped(),
+ Self::Link(v) => v.as_untyped(),
+ Self::Label(v) => v.as_untyped(),
+ Self::Ref(v) => v.as_untyped(),
+ Self::Heading(v) => v.as_untyped(),
+ Self::List(v) => v.as_untyped(),
+ Self::Enum(v) => v.as_untyped(),
+ Self::Term(v) => v.as_untyped(),
+ Self::Equation(v) => v.as_untyped(),
+ Self::Math(v) => v.as_untyped(),
+ Self::MathIdent(v) => v.as_untyped(),
+ Self::MathAlignPoint(v) => v.as_untyped(),
+ Self::MathDelimited(v) => v.as_untyped(),
+ Self::MathAttach(v) => v.as_untyped(),
+ Self::MathPrimes(v) => v.as_untyped(),
+ Self::MathFrac(v) => v.as_untyped(),
+ Self::MathRoot(v) => v.as_untyped(),
+ Self::Ident(v) => v.as_untyped(),
+ Self::None(v) => v.as_untyped(),
+ Self::Auto(v) => v.as_untyped(),
+ Self::Bool(v) => v.as_untyped(),
+ Self::Int(v) => v.as_untyped(),
+ Self::Float(v) => v.as_untyped(),
+ Self::Numeric(v) => v.as_untyped(),
+ Self::Str(v) => v.as_untyped(),
+ Self::Code(v) => v.as_untyped(),
+ Self::Content(v) => v.as_untyped(),
+ Self::Array(v) => v.as_untyped(),
+ Self::Dict(v) => v.as_untyped(),
+ Self::Parenthesized(v) => v.as_untyped(),
+ Self::Unary(v) => v.as_untyped(),
+ Self::Binary(v) => v.as_untyped(),
+ Self::FieldAccess(v) => v.as_untyped(),
+ Self::FuncCall(v) => v.as_untyped(),
+ Self::Closure(v) => v.as_untyped(),
+ Self::Let(v) => v.as_untyped(),
+ Self::DestructAssign(v) => v.as_untyped(),
+ Self::Set(v) => v.as_untyped(),
+ Self::Show(v) => v.as_untyped(),
+ Self::Conditional(v) => v.as_untyped(),
+ Self::While(v) => v.as_untyped(),
+ Self::For(v) => v.as_untyped(),
+ Self::Import(v) => v.as_untyped(),
+ Self::Include(v) => v.as_untyped(),
+ Self::Break(v) => v.as_untyped(),
+ Self::Continue(v) => v.as_untyped(),
+ Self::Return(v) => v.as_untyped(),
+ }
+ }
+}
+
+impl Expr {
+ /// Can this expression be embedded into markup with a hashtag?
+ pub fn hashtag(&self) -> bool {
+ matches!(
+ self,
+ Self::Ident(_)
+ | Self::None(_)
+ | Self::Auto(_)
+ | Self::Bool(_)
+ | Self::Int(_)
+ | Self::Float(_)
+ | Self::Numeric(_)
+ | Self::Str(_)
+ | Self::Code(_)
+ | Self::Content(_)
+ | Self::Array(_)
+ | Self::Dict(_)
+ | Self::Parenthesized(_)
+ | Self::FieldAccess(_)
+ | Self::FuncCall(_)
+ | Self::Let(_)
+ | Self::Set(_)
+ | Self::Show(_)
+ | Self::Conditional(_)
+ | Self::While(_)
+ | Self::For(_)
+ | Self::Import(_)
+ | Self::Include(_)
+ | Self::Break(_)
+ | Self::Continue(_)
+ | Self::Return(_)
+ )
+ }
+
+ /// Is this a literal?
+ pub fn is_literal(&self) -> bool {
+ matches!(
+ self,
+ Self::None(_)
+ | Self::Auto(_)
+ | Self::Bool(_)
+ | Self::Int(_)
+ | Self::Float(_)
+ | Self::Numeric(_)
+ | Self::Str(_)
+ )
+ }
+}
+
+impl Default for Expr {
+ fn default() -> Self {
+ Expr::Space(Space::default())
+ }
+}
+
+node! {
+ /// Plain text without markup.
+ Text
+}
+
+impl Text {
+ /// Get the text.
+ pub fn get(&self) -> &EcoString {
+ self.0.text()
+ }
+}
+
+node! {
+ /// Whitespace in markup or math. Has at most one newline in markup, as more
+ /// indicate a paragraph break.
+ Space
+}
+
+node! {
+ /// A forced line break: `\`.
+ Linebreak
+}
+
+node! {
+ /// A paragraph break, indicated by one or multiple blank lines.
+ Parbreak
+}
+
+node! {
+ /// An escape sequence: `\#`, `\u{1F5FA}`.
+ Escape
+}
+
+impl Escape {
+ /// Get the escaped character.
+ pub fn get(&self) -> char {
+ let mut s = Scanner::new(self.0.text());
+ s.expect('\\');
+ if s.eat_if("u{") {
+ let hex = s.eat_while(char::is_ascii_hexdigit);
+ u32::from_str_radix(hex, 16)
+ .ok()
+ .and_then(std::char::from_u32)
+ .unwrap_or_default()
+ } else {
+ s.eat().unwrap_or_default()
+ }
+ }
+}
+
+node! {
+ /// A shorthand for a unicode codepoint. For example, `~` for a non-breaking
+ /// space or `-?` for a soft hyphen.
+ Shorthand
+}
+
+impl Shorthand {
+ /// A list of all shorthands.
+ pub const LIST: &[(&'static str, char)] = &[
+ // Both.
+ ("...", '…'),
+ // Text only.
+ ("~", '\u{00A0}'),
+ ("--", '\u{2013}'),
+ ("---", '\u{2014}'),
+ ("-?", '\u{00AD}'),
+ // Math only.
+ ("-", '\u{2212}'),
+ ("'", '′'),
+ ("*", '∗'),
+ ("!=", '≠'),
+ (":=", '≔'),
+ ("::=", '⩴'),
+ ("=:", '≕'),
+ ("<<", '≪'),
+ ("<<<", '⋘'),
+ (">>", '≫'),
+ (">>>", '⋙'),
+ ("<=", '≤'),
+ (">=", '≥'),
+ ("->", '→'),
+ ("-->", '⟶'),
+ ("|->", '↦'),
+ (">->", '↣'),
+ ("->>", '↠'),
+ ("<-", '←'),
+ ("<--", '⟵'),
+ ("<-<", '↢'),
+ ("<<-", '↞'),
+ ("<->", '↔'),
+ ("<-->", '⟷'),
+ ("~>", '⇝'),
+ ("~~>", '⟿'),
+ ("<~", '⇜'),
+ ("<~~", '⬳'),
+ ("=>", '⇒'),
+ ("|=>", '⤇'),
+ ("==>", '⟹'),
+ ("<==", '⟸'),
+ ("<=>", '⇔'),
+ ("<==>", '⟺'),
+ ("[|", '⟦'),
+ ("|]", '⟧'),
+ ("||", '‖'),
+ ];
+
+ /// Get the shorthanded character.
+ pub fn get(&self) -> char {
+ let text = self.0.text();
+ Self::LIST
+ .iter()
+ .find(|&&(s, _)| s == text)
+ .map_or_else(char::default, |&(_, c)| c)
+ }
+}
+
+node! {
+ /// A smart quote: `'` or `"`.
+ SmartQuote
+}
+
+impl SmartQuote {
+ /// Whether this is a double quote.
+ pub fn double(&self) -> bool {
+ self.0.text() == "\""
+ }
+}
+
+node! {
+ /// Strong content: `*Strong*`.
+ Strong
+}
+
+impl Strong {
+ /// The contents of the strong node.
+ pub fn body(&self) -> Markup {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// Emphasized content: `_Emphasized_`.
+ Emph
+}
+
+impl Emph {
+ /// The contents of the emphasis node.
+ pub fn body(&self) -> Markup {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// Raw text with optional syntax highlighting: `` `...` ``.
+ Raw
+}
+
+impl Raw {
+ /// The trimmed raw text.
+ pub fn text(&self) -> EcoString {
+ let mut text = self.0.text().as_str();
+ let blocky = text.starts_with("```");
+ text = text.trim_matches('`');
+
+ // Trim tag, one space at the start, and one space at the end if the
+ // last non-whitespace char is a backtick.
+ if blocky {
+ let mut s = Scanner::new(text);
+ if s.eat_if(is_id_start) {
+ s.eat_while(is_id_continue);
+ }
+ text = s.after();
+ text = text.strip_prefix(' ').unwrap_or(text);
+ if text.trim_end().ends_with('`') {
+ text = text.strip_suffix(' ').unwrap_or(text);
+ }
+ }
+
+ // Split into lines.
+ let mut lines = split_newlines(text);
+
+ if blocky {
+ let dedent = lines
+ .iter()
+ .skip(1)
+ .filter(|line| !line.chars().all(char::is_whitespace))
+ // The line with the closing ``` is always taken into account
+ .chain(lines.last())
+ .map(|line| line.chars().take_while(|c| c.is_whitespace()).count())
+ .min()
+ .unwrap_or(0);
+
+ // Dedent based on column, but not for the first line.
+ for line in lines.iter_mut().skip(1) {
+ let offset = line.chars().take(dedent).map(char::len_utf8).sum();
+ *line = &line[offset..];
+ }
+
+ let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace);
+
+ // Trims a sequence of whitespace followed by a newline at the start.
+ if lines.first().map_or(false, is_whitespace) {
+ lines.remove(0);
+ }
+
+ // Trims a newline followed by a sequence of whitespace at the end.
+ if lines.last().map_or(false, is_whitespace) {
+ lines.pop();
+ }
+ }
+
+ lines.join("\n").into()
+ }
+
+ /// An optional identifier specifying the language to syntax-highlight in.
+ pub fn lang(&self) -> Option<&str> {
+ let text = self.0.text();
+
+ // Only blocky literals are supposed to contain a language.
+ if !text.starts_with("```") {
+ return Option::None;
+ }
+
+ let inner = text.trim_start_matches('`');
+ let mut s = Scanner::new(inner);
+ s.eat_if(is_id_start).then(|| {
+ s.eat_while(is_id_continue);
+ s.before()
+ })
+ }
+
+ /// Whether the raw text should be displayed in a separate block.
+ pub fn block(&self) -> bool {
+ let text = self.0.text();
+ text.starts_with("```") && text.chars().any(is_newline)
+ }
+}
+
+node! {
+ /// A hyperlink: `https://typst.org`.
+ Link
+}
+
+impl Link {
+ /// Get the URL.
+ pub fn get(&self) -> &EcoString {
+ self.0.text()
+ }
+}
+
+node! {
+ /// A label: `<intro>`.
+ Label
+}
+
+impl Label {
+ /// Get the label's text.
+ pub fn get(&self) -> &str {
+ self.0.text().trim_start_matches('<').trim_end_matches('>')
+ }
+}
+
+node! {
+ /// A reference: `@target`, `@target[..]`.
+ Ref
+}
+
+impl Ref {
+ /// Get the target.
+ pub fn target(&self) -> &str {
+ self.0
+ .children()
+ .find(|node| node.kind() == SyntaxKind::RefMarker)
+ .map(|node| node.text().trim_start_matches('@'))
+ .unwrap_or_default()
+ }
+
+ /// Get the supplement.
+ pub fn supplement(&self) -> Option<ContentBlock> {
+ self.0.cast_last_match()
+ }
+}
+
+node! {
+ /// A section heading: `= Introduction`.
+ Heading
+}
+
+impl Heading {
+ /// The contents of the heading.
+ pub fn body(&self) -> Markup {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+
+ /// The section depth (number of equals signs).
+ pub fn level(&self) -> NonZeroUsize {
+ self.0
+ .children()
+ .find(|node| node.kind() == SyntaxKind::HeadingMarker)
+ .and_then(|node| node.len().try_into().ok())
+ .unwrap_or(NonZeroUsize::new(1).unwrap())
+ }
+}
+
+node! {
+ /// An item in a bullet list: `- ...`.
+ ListItem
+}
+
+impl ListItem {
+ /// The contents of the list item.
+ pub fn body(&self) -> Markup {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// An item in an enumeration (numbered list): `+ ...` or `1. ...`.
+ EnumItem
+}
+
+impl EnumItem {
+ /// The explicit numbering, if any: `23.`.
+ pub fn number(&self) -> Option<usize> {
+ self.0.children().find_map(|node| match node.kind() {
+ SyntaxKind::EnumMarker => node.text().trim_end_matches('.').parse().ok(),
+ _ => Option::None,
+ })
+ }
+
+ /// The contents of the list item.
+ pub fn body(&self) -> Markup {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// An item in a term list: `/ Term: Details`.
+ TermItem
+}
+
+impl TermItem {
+ /// The term described by the item.
+ pub fn term(&self) -> Markup {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+
+ /// The description of the term.
+ pub fn description(&self) -> Markup {
+ self.0.cast_last_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// A mathemathical equation: `$x$`, `$ x^2 $`.
+ Equation
+}
+
+impl Equation {
+ /// The contained math.
+ pub fn body(&self) -> Math {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+
+ /// Whether the equation should be displayed as a separate block.
+ pub fn block(&self) -> bool {
+ let is_space = |node: Option<&SyntaxNode>| {
+ node.map(SyntaxNode::kind) == Some(SyntaxKind::Space)
+ };
+ is_space(self.0.children().nth(1)) && is_space(self.0.children().nth_back(1))
+ }
+}
+
+node! {
+ /// The contents of a mathematical equation: `x^2 + 1`.
+ Math
+}
+
+impl Math {
+ /// The expressions the mathematical content consists of.
+ pub fn exprs(&self) -> impl DoubleEndedIterator<Item = Expr> + '_ {
+ self.0.children().filter_map(Expr::cast_with_space)
+ }
+}
+
+node! {
+ /// An identifier in math: `pi`.
+ MathIdent
+}
+
+impl MathIdent {
+ /// Get the identifier.
+ pub fn get(&self) -> &EcoString {
+ self.0.text()
+ }
+
+ /// Take out the contained identifier.
+ pub fn take(self) -> EcoString {
+ self.0.into_text()
+ }
+
+ /// Get the identifier as a string slice.
+ pub fn as_str(&self) -> &str {
+ self.get()
+ }
+}
+
+impl Deref for MathIdent {
+ type Target = str;
+
+ fn deref(&self) -> &Self::Target {
+ self.as_str()
+ }
+}
+
+node! {
+ /// An alignment point in math: `&`.
+ MathAlignPoint
+}
+
+node! {
+ /// Matched delimiters in math: `[x + y]`.
+ MathDelimited
+}
+
+impl MathDelimited {
+ /// The opening delimiter.
+ pub fn open(&self) -> Expr {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+
+ /// The contents, including the delimiters.
+ pub fn body(&self) -> Math {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+
+ /// The closing delimiter.
+ pub fn close(&self) -> Expr {
+ self.0.cast_last_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// A base with optional attachments in math: `a_1^2`.
+ MathAttach
+}
+
+impl MathAttach {
+ /// The base, to which things are attached.
+ pub fn base(&self) -> Expr {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+
+ /// The bottom attachment.
+ pub fn bottom(&self) -> Option<Expr> {
+ self.0
+ .children()
+ .skip_while(|node| !matches!(node.kind(), SyntaxKind::Underscore))
+ .find_map(SyntaxNode::cast)
+ }
+
+ /// The top attachment.
+ pub fn top(&self) -> Option<Expr> {
+ self.0
+ .children()
+ .skip_while(|node| !matches!(node.kind(), SyntaxKind::Hat))
+ .find_map(SyntaxNode::cast)
+ }
+
+ /// Extract primes if present.
+ pub fn primes(&self) -> Option<MathPrimes> {
+ self.0.cast_first_match()
+ }
+}
+
+node! {
+ /// Grouped primes in math: `a'''`.
+ MathPrimes
+}
+
+impl MathPrimes {
+ pub fn count(&self) -> usize {
+ self.0
+ .children()
+ .filter(|node| matches!(node.kind(), SyntaxKind::Prime))
+ .count()
+ }
+}
+
+node! {
+ /// A fraction in math: `x/2`
+ MathFrac
+}
+
+impl MathFrac {
+ /// The numerator.
+ pub fn num(&self) -> Expr {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+
+ /// The denominator.
+ pub fn denom(&self) -> Expr {
+ self.0.cast_last_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// A root in math: `√x`, `∛x` or `∜x`.
+ MathRoot
+}
+
+impl MathRoot {
+ /// The index of the root.
+ pub fn index(&self) -> Option<usize> {
+ match self.0.children().next().map(|node| node.text().as_str()) {
+ Some("∜") => Some(4),
+ Some("∛") => Some(3),
+ Some("√") => Option::None,
+ _ => Option::None,
+ }
+ }
+
+ /// The radicand.
+ pub fn radicand(&self) -> Expr {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// An identifier: `it`.
+ Ident
+}
+
+impl Ident {
+ /// Get the identifier.
+ pub fn get(&self) -> &EcoString {
+ self.0.text()
+ }
+
+ /// Take out the contained identifier.
+ pub fn take(self) -> EcoString {
+ self.0.into_text()
+ }
+
+ /// Get the identifier as a string slice.
+ pub fn as_str(&self) -> &str {
+ self.get()
+ }
+}
+
+impl Deref for Ident {
+ type Target = str;
+
+ fn deref(&self) -> &Self::Target {
+ self.as_str()
+ }
+}
+
+node! {
+ /// The `none` literal.
+ None
+}
+
+node! {
+ /// The `auto` literal.
+ Auto
+}
+
+node! {
+ /// A boolean: `true`, `false`.
+ Bool
+}
+
+impl Bool {
+ /// Get the boolean value.
+ pub fn get(&self) -> bool {
+ self.0.text() == "true"
+ }
+}
+
+node! {
+ /// An integer: `120`.
+ Int
+}
+
+impl Int {
+ /// Get the integer value.
+ pub fn get(&self) -> i64 {
+ let text = self.0.text();
+ if let Some(rest) = text.strip_prefix("0x") {
+ i64::from_str_radix(rest, 16)
+ } else if let Some(rest) = text.strip_prefix("0o") {
+ i64::from_str_radix(rest, 8)
+ } else if let Some(rest) = text.strip_prefix("0b") {
+ i64::from_str_radix(rest, 2)
+ } else {
+ text.parse()
+ }
+ .unwrap_or_default()
+ }
+}
+
+node! {
+ /// A floating-point number: `1.2`, `10e-4`.
+ Float
+}
+
+impl Float {
+ /// Get the floating-point value.
+ pub fn get(&self) -> f64 {
+ self.0.text().parse().unwrap_or_default()
+ }
+}
+
+node! {
+ /// A numeric value with a unit: `12pt`, `3cm`, `2em`, `90deg`, `50%`.
+ Numeric
+}
+
+impl Numeric {
+ /// Get the numeric value and unit.
+ pub fn get(&self) -> (f64, Unit) {
+ let text = self.0.text();
+ let count = text
+ .chars()
+ .rev()
+ .take_while(|c| matches!(c, 'a'..='z' | '%'))
+ .count();
+
+ let split = text.len() - count;
+ let value = text[..split].parse().unwrap_or_default();
+ let unit = match &text[split..] {
+ "pt" => Unit::Pt,
+ "mm" => Unit::Mm,
+ "cm" => Unit::Cm,
+ "in" => Unit::In,
+ "deg" => Unit::Deg,
+ "rad" => Unit::Rad,
+ "em" => Unit::Em,
+ "fr" => Unit::Fr,
+ "%" => Unit::Percent,
+ _ => Unit::Percent,
+ };
+
+ (value, unit)
+ }
+}
+
+/// Unit of a numeric value.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub enum Unit {
+ /// Points.
+ Pt,
+ /// Millimeters.
+ Mm,
+ /// Centimeters.
+ Cm,
+ /// Inches.
+ In,
+ /// Radians.
+ Rad,
+ /// Degrees.
+ Deg,
+ /// Font-relative: `1em` is the same as the font size.
+ Em,
+ /// Fractions: `fr`.
+ Fr,
+ /// Percentage: `%`.
+ Percent,
+}
+
+node! {
+ /// A quoted string: `"..."`.
+ Str
+}
+
+impl Str {
+ /// Get the string value with resolved escape sequences.
+ pub fn get(&self) -> EcoString {
+ let text = self.0.text();
+ let unquoted = &text[1..text.len() - 1];
+ if !unquoted.contains('\\') {
+ return unquoted.into();
+ }
+
+ let mut out = EcoString::with_capacity(unquoted.len());
+ let mut s = Scanner::new(unquoted);
+
+ while let Some(c) = s.eat() {
+ if c != '\\' {
+ out.push(c);
+ continue;
+ }
+
+ let start = s.locate(-1);
+ match s.eat() {
+ Some('\\') => out.push('\\'),
+ Some('"') => out.push('"'),
+ Some('n') => out.push('\n'),
+ Some('r') => out.push('\r'),
+ Some('t') => out.push('\t'),
+ Some('u') if s.eat_if('{') => {
+ let sequence = s.eat_while(char::is_ascii_hexdigit);
+ s.eat_if('}');
+
+ match u32::from_str_radix(sequence, 16)
+ .ok()
+ .and_then(std::char::from_u32)
+ {
+ Some(c) => out.push(c),
+ Option::None => out.push_str(s.from(start)),
+ }
+ }
+ _ => out.push_str(s.from(start)),
+ }
+ }
+
+ out
+ }
+}
+
+node! {
+ /// A code block: `{ let x = 1; x + 2 }`.
+ CodeBlock
+}
+
+impl CodeBlock {
+ /// The contained code.
+ pub fn body(&self) -> Code {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// Code.
+ Code
+}
+
+impl Code {
+ /// The list of expressions contained in the code.
+ pub fn exprs(&self) -> impl DoubleEndedIterator<Item = Expr> + '_ {
+ self.0.children().filter_map(SyntaxNode::cast)
+ }
+}
+
+node! {
+ /// A content block: `[*Hi* there!]`.
+ ContentBlock
+}
+
+impl ContentBlock {
+ /// The contained markup.
+ pub fn body(&self) -> Markup {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// A grouped expression: `(1 + 2)`.
+ Parenthesized
+}
+
+impl Parenthesized {
+ /// The wrapped expression.
+ pub fn expr(&self) -> Expr {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// An array: `(1, "hi", 12cm)`.
+ Array
+}
+
+impl Array {
+ /// The array's items.
+ pub fn items(&self) -> impl DoubleEndedIterator<Item = ArrayItem> + '_ {
+ self.0.children().filter_map(SyntaxNode::cast)
+ }
+}
+
+/// An item in an array.
+#[derive(Debug, Clone, Hash)]
+pub enum ArrayItem {
+ /// A bare expression: `12`.
+ Pos(Expr),
+ /// A spread expression: `..things`.
+ Spread(Expr),
+}
+
+impl AstNode for ArrayItem {
+ fn from_untyped(node: &SyntaxNode) -> Option<Self> {
+ match node.kind() {
+ SyntaxKind::Spread => node.cast_first_match().map(Self::Spread),
+ _ => node.cast().map(Self::Pos),
+ }
+ }
+
+ fn as_untyped(&self) -> &SyntaxNode {
+ match self {
+ Self::Pos(v) => v.as_untyped(),
+ Self::Spread(v) => v.as_untyped(),
+ }
+ }
+}
+
+node! {
+ /// A dictionary: `(thickness: 3pt, pattern: dashed)`.
+ Dict
+}
+
+impl Dict {
+ /// The dictionary's items.
+ pub fn items(&self) -> impl DoubleEndedIterator<Item = DictItem> + '_ {
+ self.0.children().filter_map(SyntaxNode::cast)
+ }
+}
+
+/// An item in an dictionary expression.
+#[derive(Debug, Clone, Hash)]
+pub enum DictItem {
+ /// A named pair: `thickness: 3pt`.
+ Named(Named),
+ /// A keyed pair: `"spacy key": true`.
+ Keyed(Keyed),
+ /// A spread expression: `..things`.
+ Spread(Expr),
+}
+
+impl AstNode for DictItem {
+ fn from_untyped(node: &SyntaxNode) -> Option<Self> {
+ match node.kind() {
+ SyntaxKind::Named => node.cast().map(Self::Named),
+ SyntaxKind::Keyed => node.cast().map(Self::Keyed),
+ SyntaxKind::Spread => node.cast_first_match().map(Self::Spread),
+ _ => Option::None,
+ }
+ }
+
+ fn as_untyped(&self) -> &SyntaxNode {
+ match self {
+ Self::Named(v) => v.as_untyped(),
+ Self::Keyed(v) => v.as_untyped(),
+ Self::Spread(v) => v.as_untyped(),
+ }
+ }
+}
+
+node! {
+ /// A named pair: `thickness: 3pt`.
+ Named
+}
+
+impl Named {
+ /// The name: `thickness`.
+ pub fn name(&self) -> Ident {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+
+ /// The right-hand side of the pair: `3pt`.
+ pub fn expr(&self) -> Expr {
+ self.0.cast_last_match().unwrap_or_default()
+ }
+
+ /// The right-hand side of the pair as an identifier.
+ pub fn expr_ident(&self) -> Option<Ident> {
+ self.0.cast_last_match()
+ }
+}
+
+node! {
+ /// A keyed pair: `"spacy key": true`.
+ Keyed
+}
+
+impl Keyed {
+ /// The key: `"spacy key"`.
+ pub fn key(&self) -> Str {
+ self.0
+ .children()
+ .find_map(|node| node.cast::<Str>())
+ .unwrap_or_default()
+ }
+
+ /// The right-hand side of the pair: `true`.
+ pub fn expr(&self) -> Expr {
+ self.0.cast_last_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// A unary operation: `-x`.
+ Unary
+}
+
+impl Unary {
+ /// The operator: `-`.
+ pub fn op(&self) -> UnOp {
+ self.0
+ .children()
+ .find_map(|node| UnOp::from_kind(node.kind()))
+ .unwrap_or(UnOp::Pos)
+ }
+
+ /// The expression to operate on: `x`.
+ pub fn expr(&self) -> Expr {
+ self.0.cast_last_match().unwrap_or_default()
+ }
+}
+
+/// A unary operator.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub enum UnOp {
+ /// The plus operator: `+`.
+ Pos,
+ /// The negation operator: `-`.
+ Neg,
+ /// The boolean `not`.
+ Not,
+}
+
+impl UnOp {
+ /// Try to convert the token into a unary operation.
+ pub fn from_kind(token: SyntaxKind) -> Option<Self> {
+ Some(match token {
+ SyntaxKind::Plus => Self::Pos,
+ SyntaxKind::Minus => Self::Neg,
+ SyntaxKind::Not => Self::Not,
+ _ => return Option::None,
+ })
+ }
+
+ /// The precedence of this operator.
+ pub fn precedence(self) -> usize {
+ match self {
+ Self::Pos | Self::Neg => 7,
+ Self::Not => 4,
+ }
+ }
+
+ /// The string representation of this operation.
+ pub fn as_str(self) -> &'static str {
+ match self {
+ Self::Pos => "+",
+ Self::Neg => "-",
+ Self::Not => "not",
+ }
+ }
+}
+
+node! {
+ /// A binary operation: `a + b`.
+ Binary
+}
+
+impl Binary {
+ /// The binary operator: `+`.
+ pub fn op(&self) -> BinOp {
+ let mut not = false;
+ self.0
+ .children()
+ .find_map(|node| match node.kind() {
+ SyntaxKind::Not => {
+ not = true;
+ Option::None
+ }
+ SyntaxKind::In if not => Some(BinOp::NotIn),
+ _ => BinOp::from_kind(node.kind()),
+ })
+ .unwrap_or(BinOp::Add)
+ }
+
+ /// The left-hand side of the operation: `a`.
+ pub fn lhs(&self) -> Expr {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+
+ /// The right-hand side of the operation: `b`.
+ pub fn rhs(&self) -> Expr {
+ self.0.cast_last_match().unwrap_or_default()
+ }
+}
+
+/// A binary operator.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub enum BinOp {
+ /// The addition operator: `+`.
+ Add,
+ /// The subtraction operator: `-`.
+ Sub,
+ /// The multiplication operator: `*`.
+ Mul,
+ /// The division operator: `/`.
+ Div,
+ /// The short-circuiting boolean `and`.
+ And,
+ /// The short-circuiting boolean `or`.
+ Or,
+ /// The equality operator: `==`.
+ Eq,
+ /// The inequality operator: `!=`.
+ Neq,
+ /// The less-than operator: `<`.
+ Lt,
+ /// The less-than or equal operator: `<=`.
+ Leq,
+ /// The greater-than operator: `>`.
+ Gt,
+ /// The greater-than or equal operator: `>=`.
+ Geq,
+ /// The assignment operator: `=`.
+ Assign,
+ /// The containment operator: `in`.
+ In,
+ /// The inversed containment operator: `not in`.
+ NotIn,
+ /// The add-assign operator: `+=`.
+ AddAssign,
+ /// The subtract-assign oeprator: `-=`.
+ SubAssign,
+ /// The multiply-assign operator: `*=`.
+ MulAssign,
+ /// The divide-assign operator: `/=`.
+ DivAssign,
+}
+
+impl BinOp {
+ /// Try to convert the token into a binary operation.
+ pub fn from_kind(token: SyntaxKind) -> Option<Self> {
+ Some(match token {
+ SyntaxKind::Plus => Self::Add,
+ SyntaxKind::Minus => Self::Sub,
+ SyntaxKind::Star => Self::Mul,
+ SyntaxKind::Slash => Self::Div,
+ SyntaxKind::And => Self::And,
+ SyntaxKind::Or => Self::Or,
+ SyntaxKind::EqEq => Self::Eq,
+ SyntaxKind::ExclEq => Self::Neq,
+ SyntaxKind::Lt => Self::Lt,
+ SyntaxKind::LtEq => Self::Leq,
+ SyntaxKind::Gt => Self::Gt,
+ SyntaxKind::GtEq => Self::Geq,
+ SyntaxKind::Eq => Self::Assign,
+ SyntaxKind::In => Self::In,
+ SyntaxKind::PlusEq => Self::AddAssign,
+ SyntaxKind::HyphEq => Self::SubAssign,
+ SyntaxKind::StarEq => Self::MulAssign,
+ SyntaxKind::SlashEq => Self::DivAssign,
+ _ => return Option::None,
+ })
+ }
+
+ /// The precedence of this operator.
+ pub fn precedence(self) -> usize {
+ match self {
+ Self::Mul => 6,
+ Self::Div => 6,
+ Self::Add => 5,
+ Self::Sub => 5,
+ Self::Eq => 4,
+ Self::Neq => 4,
+ Self::Lt => 4,
+ Self::Leq => 4,
+ Self::Gt => 4,
+ Self::Geq => 4,
+ Self::In => 4,
+ Self::NotIn => 4,
+ Self::And => 3,
+ Self::Or => 2,
+ Self::Assign => 1,
+ Self::AddAssign => 1,
+ Self::SubAssign => 1,
+ Self::MulAssign => 1,
+ Self::DivAssign => 1,
+ }
+ }
+
+ /// The associativity of this operator.
+ pub fn assoc(self) -> Assoc {
+ match self {
+ Self::Add => Assoc::Left,
+ Self::Sub => Assoc::Left,
+ Self::Mul => Assoc::Left,
+ Self::Div => Assoc::Left,
+ Self::And => Assoc::Left,
+ Self::Or => Assoc::Left,
+ Self::Eq => Assoc::Left,
+ Self::Neq => Assoc::Left,
+ Self::Lt => Assoc::Left,
+ Self::Leq => Assoc::Left,
+ Self::Gt => Assoc::Left,
+ Self::Geq => Assoc::Left,
+ Self::In => Assoc::Left,
+ Self::NotIn => Assoc::Left,
+ Self::Assign => Assoc::Right,
+ Self::AddAssign => Assoc::Right,
+ Self::SubAssign => Assoc::Right,
+ Self::MulAssign => Assoc::Right,
+ Self::DivAssign => Assoc::Right,
+ }
+ }
+
+ /// The string representation of this operation.
+ pub fn as_str(self) -> &'static str {
+ match self {
+ Self::Add => "+",
+ Self::Sub => "-",
+ Self::Mul => "*",
+ Self::Div => "/",
+ Self::And => "and",
+ Self::Or => "or",
+ Self::Eq => "==",
+ Self::Neq => "!=",
+ Self::Lt => "<",
+ Self::Leq => "<=",
+ Self::Gt => ">",
+ Self::Geq => ">=",
+ Self::In => "in",
+ Self::NotIn => "not in",
+ Self::Assign => "=",
+ Self::AddAssign => "+=",
+ Self::SubAssign => "-=",
+ Self::MulAssign => "*=",
+ Self::DivAssign => "/=",
+ }
+ }
+}
+
+/// The associativity of a binary operator.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub enum Assoc {
+ /// Left-associative: `a + b + c` is equivalent to `(a + b) + c`.
+ Left,
+ /// Right-associative: `a = b = c` is equivalent to `a = (b = c)`.
+ Right,
+}
+
+node! {
+ /// A field access: `properties.age`.
+ FieldAccess
+}
+
+impl FieldAccess {
+ /// The expression to access the field on.
+ pub fn target(&self) -> Expr {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+
+ /// The name of the field.
+ pub fn field(&self) -> Ident {
+ self.0.cast_last_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// An invocation of a function or method: `f(x, y)`.
+ FuncCall
+}
+
+impl FuncCall {
+ /// The function to call.
+ pub fn callee(&self) -> Expr {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+
+ /// The arguments to the function.
+ pub fn args(&self) -> Args {
+ self.0.cast_last_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// A function call's argument list: `(12pt, y)`.
+ Args
+}
+
+impl Args {
+ /// The positional and named arguments.
+ pub fn items(&self) -> impl DoubleEndedIterator<Item = Arg> + '_ {
+ self.0.children().filter_map(SyntaxNode::cast)
+ }
+}
+
+/// An argument to a function call.
+#[derive(Debug, Clone, Hash)]
+pub enum Arg {
+ /// A positional argument: `12`.
+ Pos(Expr),
+ /// A named argument: `draw: false`.
+ Named(Named),
+ /// A spread argument: `..things`.
+ Spread(Expr),
+}
+
+impl AstNode for Arg {
+ fn from_untyped(node: &SyntaxNode) -> Option<Self> {
+ match node.kind() {
+ SyntaxKind::Named => node.cast().map(Self::Named),
+ SyntaxKind::Spread => node.cast_first_match().map(Self::Spread),
+ _ => node.cast().map(Self::Pos),
+ }
+ }
+
+ fn as_untyped(&self) -> &SyntaxNode {
+ match self {
+ Self::Pos(v) => v.as_untyped(),
+ Self::Named(v) => v.as_untyped(),
+ Self::Spread(v) => v.as_untyped(),
+ }
+ }
+}
+
+node! {
+ /// A closure: `(x, y) => z`.
+ Closure
+}
+
+impl Closure {
+ /// The name of the closure.
+ ///
+ /// This only exists if you use the function syntax sugar: `let f(x) = y`.
+ pub fn name(&self) -> Option<Ident> {
+ self.0.children().next()?.cast()
+ }
+
+ /// The parameter bindings.
+ pub fn params(&self) -> Params {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+
+ /// The body of the closure.
+ pub fn body(&self) -> Expr {
+ self.0.cast_last_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// A closure's parameters: `(x, y)`.
+ Params
+}
+
+impl Params {
+ /// The parameter bindings.
+ pub fn children(&self) -> impl DoubleEndedIterator<Item = Param> + '_ {
+ self.0.children().filter_map(SyntaxNode::cast)
+ }
+}
+
+node! {
+ /// A spread: `..x` or `..x.at(0)`.
+ Spread
+}
+
+impl Spread {
+ /// Try to get an identifier.
+ pub fn name(&self) -> Option<Ident> {
+ self.0.cast_first_match()
+ }
+
+ /// Try to get an expression.
+ pub fn expr(&self) -> Option<Expr> {
+ self.0.cast_first_match()
+ }
+}
+
+node! {
+ /// An underscore: `_`
+ Underscore
+}
+
+/// A parameter to a closure.
+#[derive(Debug, Clone, Hash)]
+pub enum Param {
+ /// A positional parameter: `x`.
+ Pos(Pattern),
+ /// A named parameter with a default value: `draw: false`.
+ Named(Named),
+ /// An argument sink: `..args`.
+ Sink(Spread),
+}
+
+impl AstNode for Param {
+ fn from_untyped(node: &SyntaxNode) -> Option<Self> {
+ match node.kind() {
+ SyntaxKind::Named => node.cast().map(Self::Named),
+ SyntaxKind::Spread => node.cast().map(Self::Sink),
+ _ => node.cast().map(Self::Pos),
+ }
+ }
+
+ fn as_untyped(&self) -> &SyntaxNode {
+ match self {
+ Self::Pos(v) => v.as_untyped(),
+ Self::Named(v) => v.as_untyped(),
+ Self::Sink(v) => v.as_untyped(),
+ }
+ }
+}
+
+node! {
+ /// A destructuring pattern: `x` or `(x, _, ..y)`.
+ Destructuring
+}
+
+impl Destructuring {
+ /// The bindings of the destructuring.
+ pub fn bindings(&self) -> impl Iterator<Item = DestructuringKind> + '_ {
+ self.0.children().filter_map(SyntaxNode::cast)
+ }
+
+ // Returns a list of all identifiers in the pattern.
+ pub fn idents(&self) -> impl Iterator<Item = Ident> + '_ {
+ self.bindings().filter_map(|binding| match binding {
+ DestructuringKind::Normal(Expr::Ident(ident)) => Some(ident),
+ DestructuringKind::Sink(spread) => spread.name(),
+ DestructuringKind::Named(named) => named.expr_ident(),
+ _ => Option::None,
+ })
+ }
+}
+
+/// The kind of an element in a destructuring pattern.
+#[derive(Debug, Clone, Hash)]
+pub enum DestructuringKind {
+ /// An expression: `x`.
+ Normal(Expr),
+ /// An argument sink: `..y`.
+ Sink(Spread),
+ /// Named arguments: `x: 1`.
+ Named(Named),
+ /// A placeholder: `_`.
+ Placeholder(Underscore),
+}
+
+impl AstNode for DestructuringKind {
+ fn from_untyped(node: &SyntaxNode) -> Option<Self> {
+ match node.kind() {
+ SyntaxKind::Named => node.cast().map(Self::Named),
+ SyntaxKind::Spread => node.cast().map(Self::Sink),
+ SyntaxKind::Underscore => node.cast().map(Self::Placeholder),
+ _ => node.cast().map(Self::Normal),
+ }
+ }
+
+ fn as_untyped(&self) -> &SyntaxNode {
+ match self {
+ Self::Normal(v) => v.as_untyped(),
+ Self::Named(v) => v.as_untyped(),
+ Self::Sink(v) => v.as_untyped(),
+ Self::Placeholder(v) => v.as_untyped(),
+ }
+ }
+}
+
+/// The kind of a pattern.
+#[derive(Debug, Clone, Hash)]
+pub enum Pattern {
+ /// A single expression: `x`.
+ Normal(Expr),
+ /// A placeholder: `_`.
+ Placeholder(Underscore),
+ /// A destructuring pattern: `(x, _, ..y)`.
+ Destructuring(Destructuring),
+}
+
+impl AstNode for Pattern {
+ fn from_untyped(node: &SyntaxNode) -> Option<Self> {
+ match node.kind() {
+ SyntaxKind::Destructuring => node.cast().map(Self::Destructuring),
+ SyntaxKind::Underscore => node.cast().map(Self::Placeholder),
+ _ => node.cast().map(Self::Normal),
+ }
+ }
+
+ fn as_untyped(&self) -> &SyntaxNode {
+ match self {
+ Self::Normal(v) => v.as_untyped(),
+ Self::Destructuring(v) => v.as_untyped(),
+ Self::Placeholder(v) => v.as_untyped(),
+ }
+ }
+}
+
+impl Pattern {
+ // Returns a list of all identifiers in the pattern.
+ pub fn idents(&self) -> Vec<Ident> {
+ match self {
+ Pattern::Normal(Expr::Ident(ident)) => vec![ident.clone()],
+ Pattern::Destructuring(destruct) => destruct.idents().collect(),
+ _ => vec![],
+ }
+ }
+}
+
+impl Default for Pattern {
+ fn default() -> Self {
+ Self::Normal(Expr::default())
+ }
+}
+
+node! {
+ /// A let binding: `let x = 1`.
+ LetBinding
+}
+
+#[derive(Debug)]
+pub enum LetBindingKind {
+ /// A normal binding: `let x = 1`.
+ Normal(Pattern),
+ /// A closure binding: `let f(x) = 1`.
+ Closure(Ident),
+}
+
+impl LetBindingKind {
+ // Returns a list of all identifiers in the pattern.
+ pub fn idents(&self) -> Vec<Ident> {
+ match self {
+ LetBindingKind::Normal(pattern) => pattern.idents(),
+ LetBindingKind::Closure(ident) => {
+ vec![ident.clone()]
+ }
+ }
+ }
+}
+
+impl LetBinding {
+ /// The kind of the let binding.
+ pub fn kind(&self) -> LetBindingKind {
+ match self.0.cast_first_match::<Pattern>() {
+ Some(Pattern::Normal(Expr::Closure(closure))) => {
+ LetBindingKind::Closure(closure.name().unwrap_or_default())
+ }
+ pattern => LetBindingKind::Normal(pattern.unwrap_or_default()),
+ }
+ }
+
+ /// The expression the binding is initialized with.
+ pub fn init(&self) -> Option<Expr> {
+ match self.kind() {
+ LetBindingKind::Normal(Pattern::Normal(_)) => {
+ self.0.children().filter_map(SyntaxNode::cast).nth(1)
+ }
+ LetBindingKind::Normal(_) => self.0.cast_first_match(),
+ LetBindingKind::Closure(_) => self.0.cast_first_match(),
+ }
+ }
+}
+
+node! {
+ /// An assignment expression `(x, y) = (1, 2)`.
+ DestructAssignment
+}
+
+impl DestructAssignment {
+ /// The pattern of the assignment.
+ pub fn pattern(&self) -> Pattern {
+ self.0.cast_first_match::<Pattern>().unwrap_or_default()
+ }
+
+ /// The expression that is assigned.
+ pub fn value(&self) -> Expr {
+ self.0.cast_last_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// A set rule: `set text(...)`.
+ SetRule
+}
+
+impl SetRule {
+ /// The function to set style properties for.
+ pub fn target(&self) -> Expr {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+
+ /// The style properties to set.
+ pub fn args(&self) -> Args {
+ self.0.cast_last_match().unwrap_or_default()
+ }
+
+ /// A condition under which the set rule applies.
+ pub fn condition(&self) -> Option<Expr> {
+ self.0
+ .children()
+ .skip_while(|child| child.kind() != SyntaxKind::If)
+ .find_map(SyntaxNode::cast)
+ }
+}
+
+node! {
+ /// A show rule: `show heading: it => emph(it.body)`.
+ ShowRule
+}
+
+impl ShowRule {
+ /// Defines which nodes the show rule applies to.
+ pub fn selector(&self) -> Option<Expr> {
+ self.0
+ .children()
+ .rev()
+ .skip_while(|child| child.kind() != SyntaxKind::Colon)
+ .find_map(SyntaxNode::cast)
+ }
+
+ /// The transformation recipe.
+ pub fn transform(&self) -> Expr {
+ self.0.cast_last_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// An if-else conditional: `if x { y } else { z }`.
+ Conditional
+}
+
+impl Conditional {
+ /// The condition which selects the body to evaluate.
+ pub fn condition(&self) -> Expr {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+
+ /// The expression to evaluate if the condition is true.
+ pub fn if_body(&self) -> Expr {
+ self.0
+ .children()
+ .filter_map(SyntaxNode::cast)
+ .nth(1)
+ .unwrap_or_default()
+ }
+
+ /// The expression to evaluate if the condition is false.
+ pub fn else_body(&self) -> Option<Expr> {
+ self.0.children().filter_map(SyntaxNode::cast).nth(2)
+ }
+}
+
+node! {
+ /// A while loop: `while x { y }`.
+ WhileLoop
+}
+
+impl WhileLoop {
+ /// The condition which selects whether to evaluate the body.
+ pub fn condition(&self) -> Expr {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+
+ /// The expression to evaluate while the condition is true.
+ pub fn body(&self) -> Expr {
+ self.0.cast_last_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// A for loop: `for x in y { z }`.
+ ForLoop
+}
+
+impl ForLoop {
+ /// The pattern to assign to.
+ pub fn pattern(&self) -> Pattern {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+
+ /// The expression to iterate over.
+ pub fn iter(&self) -> Expr {
+ self.0
+ .children()
+ .skip_while(|&c| c.kind() != SyntaxKind::In)
+ .find_map(SyntaxNode::cast)
+ .unwrap_or_default()
+ }
+
+ /// The expression to evaluate for each iteration.
+ pub fn body(&self) -> Expr {
+ self.0.cast_last_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// A module import: `import "utils.typ": a, b, c`.
+ ModuleImport
+}
+
+impl ModuleImport {
+ /// The module or path from which the items should be imported.
+ pub fn source(&self) -> Expr {
+ self.0.cast_first_match().unwrap_or_default()
+ }
+
+ /// The items to be imported.
+ pub fn imports(&self) -> Option<Imports> {
+ self.0.children().find_map(|node| match node.kind() {
+ SyntaxKind::Star => Some(Imports::Wildcard),
+ SyntaxKind::ImportItems => {
+ let items = node.children().filter_map(SyntaxNode::cast).collect();
+ Some(Imports::Items(items))
+ }
+ _ => Option::None,
+ })
+ }
+}
+
+/// The items that ought to be imported from a file.
+#[derive(Debug, Clone, Hash)]
+pub enum Imports {
+ /// All items in the scope of the file should be imported.
+ Wildcard,
+ /// The specified items from the file should be imported.
+ Items(Vec<Ident>),
+}
+
+node! {
+ /// A module include: `include "chapter1.typ"`.
+ ModuleInclude
+}
+
+impl ModuleInclude {
+ /// The module or path from which the content should be included.
+ pub fn source(&self) -> Expr {
+ self.0.cast_last_match().unwrap_or_default()
+ }
+}
+
+node! {
+ /// A break from a loop: `break`.
+ LoopBreak
+}
+
+node! {
+ /// A continue in a loop: `continue`.
+ LoopContinue
+}
+
+node! {
+ /// A return from a function: `return`, `return x + 1`.
+ FuncReturn
+}
+
+impl FuncReturn {
+ /// The expression to return.
+ pub fn body(&self) -> Option<Expr> {
+ self.0.cast_last_match()
+ }
+}
diff --git a/crates/typst-syntax/src/file.rs b/crates/typst-syntax/src/file.rs
new file mode 100644
index 00000000..fc1bed21
--- /dev/null
+++ b/crates/typst-syntax/src/file.rs
@@ -0,0 +1,279 @@
+//! File and package management.
+
+use std::collections::HashMap;
+use std::fmt::{self, Debug, Display, Formatter};
+use std::path::{Component, Path, PathBuf};
+use std::str::FromStr;
+use std::sync::RwLock;
+
+use ecow::{eco_format, EcoString};
+use once_cell::sync::Lazy;
+use serde::{Deserialize, Deserializer, Serialize, Serializer};
+
+use super::is_ident;
+
+/// The global package-path interner.
+static INTERNER: Lazy<RwLock<Interner>> =
+ Lazy::new(|| RwLock::new(Interner { to_id: HashMap::new(), from_id: Vec::new() }));
+
+/// A package-path interner.
+struct Interner {
+ to_id: HashMap<Pair, FileId>,
+ from_id: Vec<Pair>,
+}
+
+/// An interned pair of a package specification and a path.
+type Pair = &'static (Option<PackageSpec>, PathBuf);
+
+/// Identifies a file in a project or package.
+///
+/// This type is globally interned and thus cheap to copy, compare, and hash.
+#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub struct FileId(u16);
+
+impl FileId {
+ /// Create a new interned file specification.
+ ///
+ /// The path must start with a `/` or this function will panic.
+ /// Note that the path is normalized before interning.
+ #[track_caller]
+ pub fn new(package: Option<PackageSpec>, path: &Path) -> Self {
+ assert_eq!(
+ path.components().next(),
+ Some(std::path::Component::RootDir),
+ "file path must be absolute within project or package: {}",
+ path.display(),
+ );
+
+ // Try to find an existing entry that we can reuse.
+ let pair = (package, normalize_path(path));
+ if let Some(&id) = INTERNER.read().unwrap().to_id.get(&pair) {
+ return id;
+ }
+
+ let mut interner = INTERNER.write().unwrap();
+ let len = interner.from_id.len();
+ if len >= usize::from(u16::MAX) {
+ panic!("too many file specifications");
+ }
+
+ // Create a new entry forever by leaking the pair. We can't leak more
+ // than 2^16 pair (and typically will leak a lot less), so its not a
+ // big deal.
+ let id = FileId(len as u16);
+ let leaked = Box::leak(Box::new(pair));
+ interner.to_id.insert(leaked, id);
+ interner.from_id.push(leaked);
+ id
+ }
+
+ /// Get an id that does not identify any real file.
+ pub const fn detached() -> Self {
+ Self(u16::MAX)
+ }
+
+ /// Whether the id is the detached.
+ pub const fn is_detached(self) -> bool {
+ self.0 == Self::detached().0
+ }
+
+ /// The package the file resides in, if any.
+ pub fn package(&self) -> Option<&'static PackageSpec> {
+ if self.is_detached() {
+ None
+ } else {
+ self.pair().0.as_ref()
+ }
+ }
+
+ /// The absolute and normalized path to the file _within_ the project or
+ /// package.
+ pub fn path(&self) -> &'static Path {
+ if self.is_detached() {
+ Path::new("/detached.typ")
+ } else {
+ &self.pair().1
+ }
+ }
+
+ /// Resolve a file location relative to this file.
+ pub fn join(self, path: &str) -> Result<Self, EcoString> {
+ if self.is_detached() {
+ Err("cannot access file system from here")?;
+ }
+
+ let package = self.package().cloned();
+ let base = self.path();
+ Ok(if let Some(parent) = base.parent() {
+ Self::new(package, &parent.join(path))
+ } else {
+ Self::new(package, Path::new(path))
+ })
+ }
+
+ /// Construct from a raw number.
+ pub(crate) const fn from_u16(v: u16) -> Self {
+ Self(v)
+ }
+
+ /// Extract the raw underlying number.
+ pub(crate) const fn as_u16(self) -> u16 {
+ self.0
+ }
+
+ /// Get the static pair.
+ fn pair(&self) -> Pair {
+ INTERNER.read().unwrap().from_id[usize::from(self.0)]
+ }
+}
+
+impl Display for FileId {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ let path = self.path().display();
+ match self.package() {
+ Some(package) => write!(f, "{package}{path}"),
+ None => write!(f, "{path}"),
+ }
+ }
+}
+
+impl Debug for FileId {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ Display::fmt(self, f)
+ }
+}
+
+/// Lexically normalize a path.
+fn normalize_path(path: &Path) -> PathBuf {
+ let mut out = PathBuf::new();
+ for component in path.components() {
+ match component {
+ Component::CurDir => {}
+ Component::ParentDir => match out.components().next_back() {
+ Some(Component::Normal(_)) => {
+ out.pop();
+ }
+ _ => out.push(component),
+ },
+ Component::Prefix(_) | Component::RootDir | Component::Normal(_) => {
+ out.push(component)
+ }
+ }
+ }
+ if out.as_os_str().is_empty() {
+ out.push(Component::CurDir);
+ }
+ out
+}
+
+/// Identifies a package.
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+pub struct PackageSpec {
+ /// The namespace the package lives in.
+ pub namespace: EcoString,
+ /// The name of the package within its namespace.
+ pub name: EcoString,
+ /// The package's version.
+ pub version: PackageVersion,
+}
+
+impl FromStr for PackageSpec {
+ type Err = EcoString;
+
+ fn from_str(s: &str) -> Result<Self, Self::Err> {
+ let mut s = unscanny::Scanner::new(s);
+ if !s.eat_if('@') {
+ Err("package specification must start with '@'")?;
+ }
+
+ let namespace = s.eat_until('/');
+ if namespace.is_empty() {
+ Err("package specification is missing namespace")?;
+ } else if !is_ident(namespace) {
+ Err(eco_format!("`{namespace}` is not a valid package namespace"))?;
+ }
+
+ s.eat_if('/');
+
+ let name = s.eat_until(':');
+ if name.is_empty() {
+ Err("package specification is missing name")?;
+ } else if !is_ident(name) {
+ Err(eco_format!("`{name}` is not a valid package name"))?;
+ }
+
+ s.eat_if(':');
+
+ let version = s.after();
+ if version.is_empty() {
+ Err("package specification is missing version")?;
+ }
+
+ Ok(Self {
+ namespace: namespace.into(),
+ name: name.into(),
+ version: version.parse()?,
+ })
+ }
+}
+
+impl Display for PackageSpec {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ write!(f, "@{}/{}:{}", self.namespace, self.name, self.version)
+ }
+}
+
+/// A package's version.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub struct PackageVersion {
+ /// The package's major version.
+ pub major: u32,
+ /// The package's minor version.
+ pub minor: u32,
+ /// The package's patch version.
+ pub patch: u32,
+}
+
+impl FromStr for PackageVersion {
+ type Err = EcoString;
+
+ fn from_str(s: &str) -> Result<Self, Self::Err> {
+ let mut parts = s.split('.');
+ let mut next = |kind| {
+ let part = parts
+ .next()
+ .filter(|s| !s.is_empty())
+ .ok_or_else(|| eco_format!("version number is missing {kind} version"))?;
+ part.parse::<u32>()
+ .map_err(|_| eco_format!("`{part}` is not a valid {kind} version"))
+ };
+
+ let major = next("major")?;
+ let minor = next("minor")?;
+ let patch = next("patch")?;
+ if let Some(rest) = parts.next() {
+ Err(eco_format!("version number has unexpected fourth component: `{rest}`"))?;
+ }
+
+ Ok(Self { major, minor, patch })
+ }
+}
+
+impl Display for PackageVersion {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ write!(f, "{}.{}.{}", self.major, self.minor, self.patch)
+ }
+}
+
+impl Serialize for PackageVersion {
+ fn serialize<S: Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
+ s.collect_str(self)
+ }
+}
+
+impl<'de> Deserialize<'de> for PackageVersion {
+ fn deserialize<D: Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
+ let string = EcoString::deserialize(d)?;
+ string.parse().map_err(serde::de::Error::custom)
+ }
+}
diff --git a/crates/typst-syntax/src/kind.rs b/crates/typst-syntax/src/kind.rs
new file mode 100644
index 00000000..49119720
--- /dev/null
+++ b/crates/typst-syntax/src/kind.rs
@@ -0,0 +1,480 @@
+/// A syntactical building block of a Typst file.
+///
+/// Can be created by the lexer or by the parser.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+#[repr(u8)]
+pub enum SyntaxKind {
+ /// Markup.
+ Markup,
+ /// Plain text without markup.
+ Text,
+ /// Whitespace. Contains at most one newline in markup, as more indicate a
+ /// paragraph break.
+ Space,
+ /// A forced line break: `\`.
+ Linebreak,
+ /// A paragraph break, indicated by one or multiple blank lines.
+ Parbreak,
+ /// An escape sequence: `\#`, `\u{1F5FA}`.
+ Escape,
+ /// A shorthand for a unicode codepoint. For example, `~` for non-breaking
+ /// space or `-?` for a soft hyphen.
+ Shorthand,
+ /// A smart quote: `'` or `"`.
+ SmartQuote,
+ /// Strong content: `*Strong*`.
+ Strong,
+ /// Emphasized content: `_Emphasized_`.
+ Emph,
+ /// Raw text with optional syntax highlighting: `` `...` ``.
+ Raw,
+ /// A hyperlink: `https://typst.org`.
+ Link,
+ /// A label: `<intro>`.
+ Label,
+ /// A reference: `@target`, `@target[..]`.
+ Ref,
+ /// Introduces a reference: `@target`.
+ RefMarker,
+ /// A section heading: `= Introduction`.
+ Heading,
+ /// Introduces a section heading: `=`, `==`, ...
+ HeadingMarker,
+ /// An item in a bullet list: `- ...`.
+ ListItem,
+ /// Introduces a list item: `-`.
+ ListMarker,
+ /// An item in an enumeration (numbered list): `+ ...` or `1. ...`.
+ EnumItem,
+ /// Introduces an enumeration item: `+`, `1.`.
+ EnumMarker,
+ /// An item in a term list: `/ Term: Details`.
+ TermItem,
+ /// Introduces a term item: `/`.
+ TermMarker,
+ /// A mathematical equation: `$x$`, `$ x^2 $`.
+ Equation,
+
+ /// The contents of a mathematical equation: `x^2 + 1`.
+ Math,
+ /// An identifier in math: `pi`.
+ MathIdent,
+ /// An alignment point in math: `&`.
+ MathAlignPoint,
+ /// Matched delimiters in math: `[x + y]`.
+ MathDelimited,
+ /// A base with optional attachments in math: `a_1^2`.
+ MathAttach,
+ /// Grouped primes in math: `a'''`.
+ MathPrimes,
+ /// A fraction in math: `x/2`.
+ MathFrac,
+ /// A root in math: `√x`, `∛x` or `∜x`.
+ MathRoot,
+
+ /// A hashtag that switches into code mode: `#`.
+ Hashtag,
+ /// A left curly brace, starting a code block: `{`.
+ LeftBrace,
+ /// A right curly brace, terminating a code block: `}`.
+ RightBrace,
+ /// A left square bracket, starting a content block: `[`.
+ LeftBracket,
+ /// A right square bracket, terminating a content block: `]`.
+ RightBracket,
+ /// A left round parenthesis, starting a grouped expression, collection,
+ /// argument or parameter list: `(`.
+ LeftParen,
+ /// A right round parenthesis, terminating a grouped expression, collection,
+ /// argument or parameter list: `)`.
+ RightParen,
+ /// A comma separator in a sequence: `,`.
+ Comma,
+ /// A semicolon terminating an expression: `;`.
+ Semicolon,
+ /// A colon between name/key and value in a dictionary, argument or
+ /// parameter list, or between the term and body of a term list term: `:`.
+ Colon,
+ /// The strong text toggle, multiplication operator, and wildcard import
+ /// symbol: `*`.
+ Star,
+ /// Toggles emphasized text and indicates a subscript in math: `_`.
+ Underscore,
+ /// Starts and ends a mathematical equation: `$`.
+ Dollar,
+ /// The unary plus and binary addition operator: `+`.
+ Plus,
+ /// The unary negation and binary subtraction operator: `-`.
+ Minus,
+ /// The division operator and fraction operator in math: `/`.
+ Slash,
+ /// The superscript operator in math: `^`.
+ Hat,
+ /// The prime in math: `'`.
+ Prime,
+ /// The field access and method call operator: `.`.
+ Dot,
+ /// The assignment operator: `=`.
+ Eq,
+ /// The equality operator: `==`.
+ EqEq,
+ /// The inequality operator: `!=`.
+ ExclEq,
+ /// The less-than operator: `<`.
+ Lt,
+ /// The less-than or equal operator: `<=`.
+ LtEq,
+ /// The greater-than operator: `>`.
+ Gt,
+ /// The greater-than or equal operator: `>=`.
+ GtEq,
+ /// The add-assign operator: `+=`.
+ PlusEq,
+ /// The subtract-assign operator: `-=`.
+ HyphEq,
+ /// The multiply-assign operator: `*=`.
+ StarEq,
+ /// The divide-assign operator: `/=`.
+ SlashEq,
+ /// The spread operator: `..`.
+ Dots,
+ /// An arrow between a closure's parameters and body: `=>`.
+ Arrow,
+ /// A root: `√`, `∛` or `∜`.
+ Root,
+
+ /// The `not` operator.
+ Not,
+ /// The `and` operator.
+ And,
+ /// The `or` operator.
+ Or,
+ /// The `none` literal.
+ None,
+ /// The `auto` literal.
+ Auto,
+ /// The `let` keyword.
+ Let,
+ /// The `set` keyword.
+ Set,
+ /// The `show` keyword.
+ Show,
+ /// The `if` keyword.
+ If,
+ /// The `else` keyword.
+ Else,
+ /// The `for` keyword.
+ For,
+ /// The `in` keyword.
+ In,
+ /// The `while` keyword.
+ While,
+ /// The `break` keyword.
+ Break,
+ /// The `continue` keyword.
+ Continue,
+ /// The `return` keyword.
+ Return,
+ /// The `import` keyword.
+ Import,
+ /// The `include` keyword.
+ Include,
+ /// The `as` keyword.
+ As,
+
+ /// Code.
+ Code,
+ /// An identifier: `it`.
+ Ident,
+ /// A boolean: `true`, `false`.
+ Bool,
+ /// An integer: `120`.
+ Int,
+ /// A floating-point number: `1.2`, `10e-4`.
+ Float,
+ /// A numeric value with a unit: `12pt`, `3cm`, `2em`, `90deg`, `50%`.
+ Numeric,
+ /// A quoted string: `"..."`.
+ Str,
+ /// A code block: `{ let x = 1; x + 2 }`.
+ CodeBlock,
+ /// A content block: `[*Hi* there!]`.
+ ContentBlock,
+ /// A grouped expression: `(1 + 2)`.
+ Parenthesized,
+ /// An array: `(1, "hi", 12cm)`.
+ Array,
+ /// A dictionary: `(thickness: 3pt, pattern: dashed)`.
+ Dict,
+ /// A named pair: `thickness: 3pt`.
+ Named,
+ /// A keyed pair: `"spacy key": true`.
+ Keyed,
+ /// A unary operation: `-x`.
+ Unary,
+ /// A binary operation: `a + b`.
+ Binary,
+ /// A field access: `properties.age`.
+ FieldAccess,
+ /// An invocation of a function or method: `f(x, y)`.
+ FuncCall,
+ /// A function call's argument list: `(12pt, y)`.
+ Args,
+ /// Spread arguments or an argument sink: `..x`.
+ Spread,
+ /// A closure: `(x, y) => z`.
+ Closure,
+ /// A closure's parameters: `(x, y)`.
+ Params,
+ /// A let binding: `let x = 1`.
+ LetBinding,
+ /// A set rule: `set text(...)`.
+ SetRule,
+ /// A show rule: `show heading: it => emph(it.body)`.
+ ShowRule,
+ /// An if-else conditional: `if x { y } else { z }`.
+ Conditional,
+ /// A while loop: `while x { y }`.
+ WhileLoop,
+ /// A for loop: `for x in y { z }`.
+ ForLoop,
+ /// A module import: `import "utils.typ": a, b, c`.
+ ModuleImport,
+ /// Items to import from a module: `a, b, c`.
+ ImportItems,
+ /// A module include: `include "chapter1.typ"`.
+ ModuleInclude,
+ /// A break from a loop: `break`.
+ LoopBreak,
+ /// A continue in a loop: `continue`.
+ LoopContinue,
+ /// A return from a function: `return`, `return x + 1`.
+ FuncReturn,
+ /// A destructuring pattern: `(x, _, ..y)`.
+ Destructuring,
+ /// A destructuring assignment expression: `(x, y) = (1, 2)`.
+ DestructAssignment,
+
+ /// A line comment: `// ...`.
+ LineComment,
+ /// A block comment: `/* ... */`.
+ BlockComment,
+ /// An invalid sequence of characters.
+ Error,
+ /// The end of the file.
+ Eof,
+}
+
+impl SyntaxKind {
+ /// Is this a bracket, brace, or parenthesis?
+ pub fn is_grouping(self) -> bool {
+ matches!(
+ self,
+ Self::LeftBracket
+ | Self::LeftBrace
+ | Self::LeftParen
+ | Self::RightBracket
+ | Self::RightBrace
+ | Self::RightParen
+ )
+ }
+
+ /// Does this node terminate a preceding expression?
+ pub fn is_terminator(self) -> bool {
+ matches!(
+ self,
+ Self::Eof
+ | Self::Semicolon
+ | Self::RightBrace
+ | Self::RightParen
+ | Self::RightBracket
+ )
+ }
+
+ /// Is this a code or content block.
+ pub fn is_block(self) -> bool {
+ matches!(self, Self::CodeBlock | Self::ContentBlock)
+ }
+
+ /// Does this node need termination through a semicolon or linebreak?
+ pub fn is_stmt(self) -> bool {
+ matches!(
+ self,
+ Self::LetBinding
+ | Self::SetRule
+ | Self::ShowRule
+ | Self::ModuleImport
+ | Self::ModuleInclude
+ )
+ }
+
+ /// Is this node is a keyword.
+ pub fn is_keyword(self) -> bool {
+ matches!(
+ self,
+ Self::Not
+ | Self::And
+ | Self::Or
+ | Self::None
+ | Self::Auto
+ | Self::Let
+ | Self::Set
+ | Self::Show
+ | Self::If
+ | Self::Else
+ | Self::For
+ | Self::In
+ | Self::While
+ | Self::Break
+ | Self::Continue
+ | Self::Return
+ | Self::Import
+ | Self::Include
+ | Self::As
+ )
+ }
+
+ /// Whether this kind of node is automatically skipped by the parser in
+ /// code and math mode.
+ pub fn is_trivia(self) -> bool {
+ matches!(
+ self,
+ Self::Space | Self::Parbreak | Self::LineComment | Self::BlockComment
+ )
+ }
+
+ /// Whether this is an error.
+ pub fn is_error(self) -> bool {
+ self == Self::Error
+ }
+
+ /// A human-readable name for the kind.
+ pub fn name(self) -> &'static str {
+ match self {
+ Self::Markup => "markup",
+ Self::Text => "text",
+ Self::Space => "space",
+ Self::Linebreak => "line break",
+ Self::Parbreak => "paragraph break",
+ Self::Escape => "escape sequence",
+ Self::Shorthand => "shorthand",
+ Self::SmartQuote => "smart quote",
+ Self::Strong => "strong content",
+ Self::Emph => "emphasized content",
+ Self::Raw => "raw block",
+ Self::Link => "link",
+ Self::Label => "label",
+ Self::Ref => "reference",
+ Self::RefMarker => "reference marker",
+ Self::Heading => "heading",
+ Self::HeadingMarker => "heading marker",
+ Self::ListItem => "list item",
+ Self::ListMarker => "list marker",
+ Self::EnumItem => "enum item",
+ Self::EnumMarker => "enum marker",
+ Self::TermItem => "term list item",
+ Self::TermMarker => "term marker",
+ Self::Equation => "equation",
+ Self::Math => "math",
+ Self::MathIdent => "math identifier",
+ Self::MathAlignPoint => "math alignment point",
+ Self::MathDelimited => "delimited math",
+ Self::MathAttach => "math attachments",
+ Self::MathFrac => "math fraction",
+ Self::MathRoot => "math root",
+ Self::MathPrimes => "math primes",
+ Self::Hashtag => "hashtag",
+ Self::LeftBrace => "opening brace",
+ Self::RightBrace => "closing brace",
+ Self::LeftBracket => "opening bracket",
+ Self::RightBracket => "closing bracket",
+ Self::LeftParen => "opening paren",
+ Self::RightParen => "closing paren",
+ Self::Comma => "comma",
+ Self::Semicolon => "semicolon",
+ Self::Colon => "colon",
+ Self::Star => "star",
+ Self::Underscore => "underscore",
+ Self::Dollar => "dollar sign",
+ Self::Plus => "plus",
+ Self::Minus => "minus",
+ Self::Slash => "slash",
+ Self::Hat => "hat",
+ Self::Prime => "prime",
+ Self::Dot => "dot",
+ Self::Eq => "equals sign",
+ Self::EqEq => "equality operator",
+ Self::ExclEq => "inequality operator",
+ Self::Lt => "less-than operator",
+ Self::LtEq => "less-than or equal operator",
+ Self::Gt => "greater-than operator",
+ Self::GtEq => "greater-than or equal operator",
+ Self::PlusEq => "add-assign operator",
+ Self::HyphEq => "subtract-assign operator",
+ Self::StarEq => "multiply-assign operator",
+ Self::SlashEq => "divide-assign operator",
+ Self::Dots => "dots",
+ Self::Arrow => "arrow",
+ Self::Root => "root",
+ Self::Not => "operator `not`",
+ Self::And => "operator `and`",
+ Self::Or => "operator `or`",
+ Self::None => "`none`",
+ Self::Auto => "`auto`",
+ Self::Let => "keyword `let`",
+ Self::Set => "keyword `set`",
+ Self::Show => "keyword `show`",
+ Self::If => "keyword `if`",
+ Self::Else => "keyword `else`",
+ Self::For => "keyword `for`",
+ Self::In => "keyword `in`",
+ Self::While => "keyword `while`",
+ Self::Break => "keyword `break`",
+ Self::Continue => "keyword `continue`",
+ Self::Return => "keyword `return`",
+ Self::Import => "keyword `import`",
+ Self::Include => "keyword `include`",
+ Self::As => "keyword `as`",
+ Self::Code => "code",
+ Self::Ident => "identifier",
+ Self::Bool => "boolean",
+ Self::Int => "integer",
+ Self::Float => "float",
+ Self::Numeric => "numeric value",
+ Self::Str => "string",
+ Self::CodeBlock => "code block",
+ Self::ContentBlock => "content block",
+ Self::Parenthesized => "group",
+ Self::Array => "array",
+ Self::Dict => "dictionary",
+ Self::Named => "named pair",
+ Self::Keyed => "keyed pair",
+ Self::Unary => "unary expression",
+ Self::Binary => "binary expression",
+ Self::FieldAccess => "field access",
+ Self::FuncCall => "function call",
+ Self::Args => "call arguments",
+ Self::Spread => "spread",
+ Self::Closure => "closure",
+ Self::Params => "closure parameters",
+ Self::LetBinding => "`let` expression",
+ Self::SetRule => "`set` expression",
+ Self::ShowRule => "`show` expression",
+ Self::Conditional => "`if` expression",
+ Self::WhileLoop => "while-loop expression",
+ Self::ForLoop => "for-loop expression",
+ Self::ModuleImport => "`import` expression",
+ Self::ImportItems => "import items",
+ Self::ModuleInclude => "`include` expression",
+ Self::LoopBreak => "`break` expression",
+ Self::LoopContinue => "`continue` expression",
+ Self::FuncReturn => "`return` expression",
+ Self::Destructuring => "destructuring pattern",
+ Self::DestructAssignment => "destructuring assignment expression",
+ Self::LineComment => "line comment",
+ Self::BlockComment => "block comment",
+ Self::Error => "syntax error",
+ Self::Eof => "end of file",
+ }
+ }
+}
diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs
new file mode 100644
index 00000000..b96b3c07
--- /dev/null
+++ b/crates/typst-syntax/src/lexer.rs
@@ -0,0 +1,739 @@
+use ecow::{eco_format, EcoString};
+use unicode_ident::{is_xid_continue, is_xid_start};
+use unicode_segmentation::UnicodeSegmentation;
+use unscanny::Scanner;
+
+use super::SyntaxKind;
+
+/// Splits up a string of source code into tokens.
+#[derive(Clone)]
+pub(super) struct Lexer<'s> {
+ /// The underlying scanner.
+ s: Scanner<'s>,
+ /// The mode the lexer is in. This determines which kinds of tokens it
+ /// produces.
+ mode: LexMode,
+ /// Whether the last token contained a newline.
+ newline: bool,
+ /// An error for the last token.
+ error: Option<EcoString>,
+}
+
+/// What kind of tokens to emit.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub(super) enum LexMode {
+ /// Text and markup.
+ Markup,
+ /// Math atoms, operators, etc.
+ Math,
+ /// Keywords, literals and operators.
+ Code,
+}
+
+impl<'s> Lexer<'s> {
+ /// Create a new lexer with the given mode and a prefix to offset column
+ /// calculations.
+ pub fn new(text: &'s str, mode: LexMode) -> Self {
+ Self {
+ s: Scanner::new(text),
+ mode,
+ newline: false,
+ error: None,
+ }
+ }
+
+ /// Get the current lexing mode.
+ pub fn mode(&self) -> LexMode {
+ self.mode
+ }
+
+ /// Change the lexing mode.
+ pub fn set_mode(&mut self, mode: LexMode) {
+ self.mode = mode;
+ }
+
+ /// The index in the string at which the last token ends and next token
+ /// will start.
+ pub fn cursor(&self) -> usize {
+ self.s.cursor()
+ }
+
+ /// Jump to the given index in the string.
+ pub fn jump(&mut self, index: usize) {
+ self.s.jump(index);
+ }
+
+ /// Whether the last token contained a newline.
+ pub fn newline(&self) -> bool {
+ self.newline
+ }
+
+ /// Take out the last error, if any.
+ pub fn take_error(&mut self) -> Option<EcoString> {
+ self.error.take()
+ }
+}
+
+impl Lexer<'_> {
+ /// Construct a full-positioned syntax error.
+ fn error(&mut self, message: impl Into<EcoString>) -> SyntaxKind {
+ self.error = Some(message.into());
+ SyntaxKind::Error
+ }
+}
+
+/// Shared.
+impl Lexer<'_> {
+ pub fn next(&mut self) -> SyntaxKind {
+ self.newline = false;
+ self.error = None;
+ let start = self.s.cursor();
+ match self.s.eat() {
+ Some(c) if c.is_whitespace() => self.whitespace(start, c),
+ Some('/') if self.s.eat_if('/') => self.line_comment(),
+ Some('/') if self.s.eat_if('*') => self.block_comment(),
+ Some('*') if self.s.eat_if('/') => {
+ self.error("unexpected end of block comment")
+ }
+
+ Some(c) => match self.mode {
+ LexMode::Markup => self.markup(start, c),
+ LexMode::Math => self.math(start, c),
+ LexMode::Code => self.code(start, c),
+ },
+
+ None => SyntaxKind::Eof,
+ }
+ }
+
+ fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind {
+ let more = self.s.eat_while(char::is_whitespace);
+ let newlines = match c {
+ ' ' if more.is_empty() => 0,
+ _ => count_newlines(self.s.from(start)),
+ };
+
+ self.newline = newlines > 0;
+ if self.mode == LexMode::Markup && newlines >= 2 {
+ SyntaxKind::Parbreak
+ } else {
+ SyntaxKind::Space
+ }
+ }
+
+ fn line_comment(&mut self) -> SyntaxKind {
+ self.s.eat_until(is_newline);
+ SyntaxKind::LineComment
+ }
+
+ fn block_comment(&mut self) -> SyntaxKind {
+ let mut state = '_';
+ let mut depth = 1;
+
+ // Find the first `*/` that does not correspond to a nested `/*`.
+ while let Some(c) = self.s.eat() {
+ state = match (state, c) {
+ ('*', '/') => {
+ depth -= 1;
+ if depth == 0 {
+ break;
+ }
+ '_'
+ }
+ ('/', '*') => {
+ depth += 1;
+ '_'
+ }
+ ('/', '/') => {
+ self.line_comment();
+ '_'
+ }
+ _ => c,
+ }
+ }
+
+ SyntaxKind::BlockComment
+ }
+}
+
+/// Markup.
+impl Lexer<'_> {
+ fn markup(&mut self, start: usize, c: char) -> SyntaxKind {
+ match c {
+ '\\' => self.backslash(),
+ '`' => self.raw(),
+ 'h' if self.s.eat_if("ttp://") => self.link(),
+ 'h' if self.s.eat_if("ttps://") => self.link(),
+ '<' if self.s.at(is_id_continue) => self.label(),
+ '@' => self.ref_marker(),
+
+ '.' if self.s.eat_if("..") => SyntaxKind::Shorthand,
+ '-' if self.s.eat_if("--") => SyntaxKind::Shorthand,
+ '-' if self.s.eat_if('-') => SyntaxKind::Shorthand,
+ '-' if self.s.eat_if('?') => SyntaxKind::Shorthand,
+ '*' if !self.in_word() => SyntaxKind::Star,
+ '_' if !self.in_word() => SyntaxKind::Underscore,
+
+ '#' => SyntaxKind::Hashtag,
+ '[' => SyntaxKind::LeftBracket,
+ ']' => SyntaxKind::RightBracket,
+ '\'' => SyntaxKind::SmartQuote,
+ '"' => SyntaxKind::SmartQuote,
+ '$' => SyntaxKind::Dollar,
+ '~' => SyntaxKind::Shorthand,
+ ':' => SyntaxKind::Colon,
+ '=' => {
+ self.s.eat_while('=');
+ if self.space_or_end() {
+ SyntaxKind::HeadingMarker
+ } else {
+ self.text()
+ }
+ }
+ '-' if self.space_or_end() => SyntaxKind::ListMarker,
+ '+' if self.space_or_end() => SyntaxKind::EnumMarker,
+ '/' if self.space_or_end() => SyntaxKind::TermMarker,
+ '0'..='9' => self.numbering(start),
+
+ _ => self.text(),
+ }
+ }
+
+ fn backslash(&mut self) -> SyntaxKind {
+ if self.s.eat_if("u{") {
+ let hex = self.s.eat_while(char::is_ascii_alphanumeric);
+ if !self.s.eat_if('}') {
+ return self.error("unclosed Unicode escape sequence");
+ }
+
+ if u32::from_str_radix(hex, 16)
+ .ok()
+ .and_then(std::char::from_u32)
+ .is_none()
+ {
+ return self.error(eco_format!("invalid Unicode codepoint: {}", hex));
+ }
+
+ return SyntaxKind::Escape;
+ }
+
+ if self.s.done() || self.s.at(char::is_whitespace) {
+ SyntaxKind::Linebreak
+ } else {
+ self.s.eat();
+ SyntaxKind::Escape
+ }
+ }
+
+ fn raw(&mut self) -> SyntaxKind {
+ let mut backticks = 1;
+ while self.s.eat_if('`') {
+ backticks += 1;
+ }
+
+ if backticks == 2 {
+ return SyntaxKind::Raw;
+ }
+
+ let mut found = 0;
+ while found < backticks {
+ match self.s.eat() {
+ Some('`') => found += 1,
+ Some(_) => found = 0,
+ None => break,
+ }
+ }
+
+ if found != backticks {
+ return self.error("unclosed raw text");
+ }
+
+ SyntaxKind::Raw
+ }
+
+ fn link(&mut self) -> SyntaxKind {
+ let mut brackets = Vec::new();
+
+ #[rustfmt::skip]
+ self.s.eat_while(|c: char| {
+ match c {
+ | '0' ..= '9'
+ | 'a' ..= 'z'
+ | 'A' ..= 'Z'
+ | '!' | '#' | '$' | '%' | '&' | '*' | '+'
+ | ',' | '-' | '.' | '/' | ':' | ';' | '='
+ | '?' | '@' | '_' | '~' | '\'' => true,
+ '[' => {
+ brackets.push(SyntaxKind::LeftBracket);
+ true
+ }
+ '(' => {
+ brackets.push(SyntaxKind::LeftParen);
+ true
+ }
+ ']' => brackets.pop() == Some(SyntaxKind::LeftBracket),
+ ')' => brackets.pop() == Some(SyntaxKind::LeftParen),
+ _ => false,
+ }
+ });
+
+ if !brackets.is_empty() {
+ return self.error(
+ "automatic links cannot contain unbalanced brackets, \
+ use the `link` function instead",
+ );
+ }
+
+ // Don't include the trailing characters likely to be part of text.
+ while matches!(self.s.scout(-1), Some('!' | ',' | '.' | ':' | ';' | '?' | '\'')) {
+ self.s.uneat();
+ }
+
+ SyntaxKind::Link
+ }
+
+ fn numbering(&mut self, start: usize) -> SyntaxKind {
+ self.s.eat_while(char::is_ascii_digit);
+
+ let read = self.s.from(start);
+ if self.s.eat_if('.') && self.space_or_end() && read.parse::<usize>().is_ok() {
+ return SyntaxKind::EnumMarker;
+ }
+
+ self.text()
+ }
+
+ fn ref_marker(&mut self) -> SyntaxKind {
+ self.s.eat_while(|c| is_id_continue(c) || matches!(c, ':' | '.'));
+
+ // Don't include the trailing characters likely to be part of text.
+ while matches!(self.s.scout(-1), Some('.' | ':')) {
+ self.s.uneat();
+ }
+
+ SyntaxKind::RefMarker
+ }
+
+ fn label(&mut self) -> SyntaxKind {
+ let label = self.s.eat_while(|c| is_id_continue(c) || matches!(c, ':' | '.'));
+ if label.is_empty() {
+ return self.error("label cannot be empty");
+ }
+
+ if !self.s.eat_if('>') {
+ return self.error("unclosed label");
+ }
+
+ SyntaxKind::Label
+ }
+
+ fn text(&mut self) -> SyntaxKind {
+ macro_rules! table {
+ ($(|$c:literal)*) => {
+ static TABLE: [bool; 128] = {
+ let mut t = [false; 128];
+ $(t[$c as usize] = true;)*
+ t
+ };
+ };
+ }
+
+ table! {
+ | ' ' | '\t' | '\n' | '\x0b' | '\x0c' | '\r' | '\\' | '/'
+ | '[' | ']' | '{' | '}' | '~' | '-' | '.' | '\'' | '"'
+ | '*' | '_' | ':' | 'h' | '`' | '$' | '<' | '>' | '@' | '#'
+ };
+
+ loop {
+ self.s.eat_until(|c: char| {
+ TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace())
+ });
+
+ // Continue with the same text node if the thing would become text
+ // anyway.
+ let mut s = self.s;
+ match s.eat() {
+ Some(' ') if s.at(char::is_alphanumeric) => {}
+ Some('/') if !s.at(['/', '*']) => {}
+ Some('-') if !s.at(['-', '?']) => {}
+ Some('.') if !s.at("..") => {}
+ Some('h') if !s.at("ttp://") && !s.at("ttps://") => {}
+ Some('@') if !s.at(is_id_start) => {}
+ _ => break,
+ }
+
+ self.s = s;
+ }
+
+ SyntaxKind::Text
+ }
+
+ fn in_word(&self) -> bool {
+ let alphanum = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
+ let prev = self.s.scout(-2);
+ let next = self.s.peek();
+ alphanum(prev) && alphanum(next)
+ }
+
+ fn space_or_end(&self) -> bool {
+ self.s.done() || self.s.at(char::is_whitespace)
+ }
+}
+
+/// Math.
+impl Lexer<'_> {
+ fn math(&mut self, start: usize, c: char) -> SyntaxKind {
+ match c {
+ '\\' => self.backslash(),
+ '"' => self.string(),
+
+ '-' if self.s.eat_if(">>") => SyntaxKind::Shorthand,
+ '-' if self.s.eat_if('>') => SyntaxKind::Shorthand,
+ '-' if self.s.eat_if("->") => SyntaxKind::Shorthand,
+ ':' if self.s.eat_if('=') => SyntaxKind::Shorthand,
+ ':' if self.s.eat_if(":=") => SyntaxKind::Shorthand,
+ '!' if self.s.eat_if('=') => SyntaxKind::Shorthand,
+ '.' if self.s.eat_if("..") => SyntaxKind::Shorthand,
+ '[' if self.s.eat_if('|') => SyntaxKind::Shorthand,
+ '<' if self.s.eat_if("==>") => SyntaxKind::Shorthand,
+ '<' if self.s.eat_if("-->") => SyntaxKind::Shorthand,
+ '<' if self.s.eat_if("--") => SyntaxKind::Shorthand,
+ '<' if self.s.eat_if("-<") => SyntaxKind::Shorthand,
+ '<' if self.s.eat_if("->") => SyntaxKind::Shorthand,
+ '<' if self.s.eat_if("<-") => SyntaxKind::Shorthand,
+ '<' if self.s.eat_if("<<") => SyntaxKind::Shorthand,
+ '<' if self.s.eat_if("=>") => SyntaxKind::Shorthand,
+ '<' if self.s.eat_if("==") => SyntaxKind::Shorthand,
+ '<' if self.s.eat_if("~~") => SyntaxKind::Shorthand,
+ '<' if self.s.eat_if('=') => SyntaxKind::Shorthand,
+ '<' if self.s.eat_if('<') => SyntaxKind::Shorthand,
+ '<' if self.s.eat_if('-') => SyntaxKind::Shorthand,
+ '<' if self.s.eat_if('~') => SyntaxKind::Shorthand,
+ '>' if self.s.eat_if("->") => SyntaxKind::Shorthand,
+ '>' if self.s.eat_if(">>") => SyntaxKind::Shorthand,
+ '=' if self.s.eat_if("=>") => SyntaxKind::Shorthand,
+ '=' if self.s.eat_if('>') => SyntaxKind::Shorthand,
+ '=' if self.s.eat_if(':') => SyntaxKind::Shorthand,
+ '>' if self.s.eat_if('=') => SyntaxKind::Shorthand,
+ '>' if self.s.eat_if('>') => SyntaxKind::Shorthand,
+ '|' if self.s.eat_if("->") => SyntaxKind::Shorthand,
+ '|' if self.s.eat_if("=>") => SyntaxKind::Shorthand,
+ '|' if self.s.eat_if(']') => SyntaxKind::Shorthand,
+ '|' if self.s.eat_if('|') => SyntaxKind::Shorthand,
+ '~' if self.s.eat_if("~>") => SyntaxKind::Shorthand,
+ '~' if self.s.eat_if('>') => SyntaxKind::Shorthand,
+ '*' | '-' => SyntaxKind::Shorthand,
+
+ '#' => SyntaxKind::Hashtag,
+ '_' => SyntaxKind::Underscore,
+ '$' => SyntaxKind::Dollar,
+ '/' => SyntaxKind::Slash,
+ '^' => SyntaxKind::Hat,
+ '\'' => SyntaxKind::Prime,
+ '&' => SyntaxKind::MathAlignPoint,
+ '√' | '∛' | '∜' => SyntaxKind::Root,
+
+ // Identifiers.
+ c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
+ self.s.eat_while(is_math_id_continue);
+ SyntaxKind::MathIdent
+ }
+
+ // Other math atoms.
+ _ => self.math_text(start, c),
+ }
+ }
+
+ fn math_text(&mut self, start: usize, c: char) -> SyntaxKind {
+ // Keep numbers and grapheme clusters together.
+ if c.is_numeric() {
+ self.s.eat_while(char::is_numeric);
+ let mut s = self.s;
+ if s.eat_if('.') && !s.eat_while(char::is_numeric).is_empty() {
+ self.s = s;
+ }
+ } else {
+ let len = self
+ .s
+ .get(start..self.s.string().len())
+ .graphemes(true)
+ .next()
+ .map_or(0, str::len);
+ self.s.jump(start + len);
+ }
+ SyntaxKind::Text
+ }
+}
+
+/// Code.
+impl Lexer<'_> {
+ fn code(&mut self, start: usize, c: char) -> SyntaxKind {
+ match c {
+ '`' => self.raw(),
+ '<' if self.s.at(is_id_continue) => self.label(),
+ '0'..='9' => self.number(start, c),
+ '.' if self.s.at(char::is_ascii_digit) => self.number(start, c),
+ '"' => self.string(),
+
+ '=' if self.s.eat_if('=') => SyntaxKind::EqEq,
+ '!' if self.s.eat_if('=') => SyntaxKind::ExclEq,
+ '<' if self.s.eat_if('=') => SyntaxKind::LtEq,
+ '>' if self.s.eat_if('=') => SyntaxKind::GtEq,
+ '+' if self.s.eat_if('=') => SyntaxKind::PlusEq,
+ '-' if self.s.eat_if('=') => SyntaxKind::HyphEq,
+ '*' if self.s.eat_if('=') => SyntaxKind::StarEq,
+ '/' if self.s.eat_if('=') => SyntaxKind::SlashEq,
+ '.' if self.s.eat_if('.') => SyntaxKind::Dots,
+ '=' if self.s.eat_if('>') => SyntaxKind::Arrow,
+
+ '{' => SyntaxKind::LeftBrace,
+ '}' => SyntaxKind::RightBrace,
+ '[' => SyntaxKind::LeftBracket,
+ ']' => SyntaxKind::RightBracket,
+ '(' => SyntaxKind::LeftParen,
+ ')' => SyntaxKind::RightParen,
+ '$' => SyntaxKind::Dollar,
+ ',' => SyntaxKind::Comma,
+ ';' => SyntaxKind::Semicolon,
+ ':' => SyntaxKind::Colon,
+ '.' => SyntaxKind::Dot,
+ '+' => SyntaxKind::Plus,
+ '-' => SyntaxKind::Minus,
+ '*' => SyntaxKind::Star,
+ '/' => SyntaxKind::Slash,
+ '=' => SyntaxKind::Eq,
+ '<' => SyntaxKind::Lt,
+ '>' => SyntaxKind::Gt,
+
+ c if is_id_start(c) => self.ident(start),
+
+ c => self.error(eco_format!("the character `{c}` is not valid in code")),
+ }
+ }
+
+ fn ident(&mut self, start: usize) -> SyntaxKind {
+ self.s.eat_while(is_id_continue);
+ let ident = self.s.from(start);
+
+ let prev = self.s.get(0..start);
+ if !prev.ends_with(['.', '@']) || prev.ends_with("..") {
+ if let Some(keyword) = keyword(ident) {
+ return keyword;
+ }
+ }
+
+ if ident == "_" {
+ SyntaxKind::Underscore
+ } else {
+ SyntaxKind::Ident
+ }
+ }
+
+ fn number(&mut self, mut start: usize, c: char) -> SyntaxKind {
+ // Handle alternative integer bases.
+ let mut base = 10;
+ if c == '0' {
+ if self.s.eat_if('b') {
+ base = 2;
+ } else if self.s.eat_if('o') {
+ base = 8;
+ } else if self.s.eat_if('x') {
+ base = 16;
+ }
+ if base != 10 {
+ start = self.s.cursor();
+ }
+ }
+
+ // Read the first part (integer or fractional depending on `first`).
+ self.s.eat_while(if base == 16 {
+ char::is_ascii_alphanumeric
+ } else {
+ char::is_ascii_digit
+ });
+
+ // Read the fractional part if not already done.
+ // Make sure not to confuse a range for the decimal separator.
+ if c != '.'
+ && !self.s.at("..")
+ && !self.s.scout(1).map_or(false, is_id_start)
+ && self.s.eat_if('.')
+ && base == 10
+ {
+ self.s.eat_while(char::is_ascii_digit);
+ }
+
+ // Read the exponent.
+ if !self.s.at("em") && self.s.eat_if(['e', 'E']) && base == 10 {
+ self.s.eat_if(['+', '-']);
+ self.s.eat_while(char::is_ascii_digit);
+ }
+
+ // Read the suffix.
+ let suffix_start = self.s.cursor();
+ if !self.s.eat_if('%') {
+ self.s.eat_while(char::is_ascii_alphanumeric);
+ }
+
+ let number = self.s.get(start..suffix_start);
+ let suffix = self.s.from(suffix_start);
+
+ let kind = if i64::from_str_radix(number, base).is_ok() {
+ SyntaxKind::Int
+ } else if base == 10 && number.parse::<f64>().is_ok() {
+ SyntaxKind::Float
+ } else {
+ return self.error(match base {
+ 2 => eco_format!("invalid binary number: 0b{}", number),
+ 8 => eco_format!("invalid octal number: 0o{}", number),
+ 16 => eco_format!("invalid hexadecimal number: 0x{}", number),
+ _ => eco_format!("invalid number: {}", number),
+ });
+ };
+
+ if suffix.is_empty() {
+ return kind;
+ }
+
+ if !matches!(
+ suffix,
+ "pt" | "mm" | "cm" | "in" | "deg" | "rad" | "em" | "fr" | "%"
+ ) {
+ return self.error(eco_format!("invalid number suffix: {}", suffix));
+ }
+
+ SyntaxKind::Numeric
+ }
+
+ fn string(&mut self) -> SyntaxKind {
+ let mut escaped = false;
+ self.s.eat_until(|c| {
+ let stop = c == '"' && !escaped;
+ escaped = c == '\\' && !escaped;
+ stop
+ });
+
+ if !self.s.eat_if('"') {
+ return self.error("unclosed string");
+ }
+
+ SyntaxKind::Str
+ }
+}
+
+/// Try to parse an identifier into a keyword.
+fn keyword(ident: &str) -> Option<SyntaxKind> {
+ Some(match ident {
+ "none" => SyntaxKind::None,
+ "auto" => SyntaxKind::Auto,
+ "true" => SyntaxKind::Bool,
+ "false" => SyntaxKind::Bool,
+ "not" => SyntaxKind::Not,
+ "and" => SyntaxKind::And,
+ "or" => SyntaxKind::Or,
+ "let" => SyntaxKind::Let,
+ "set" => SyntaxKind::Set,
+ "show" => SyntaxKind::Show,
+ "if" => SyntaxKind::If,
+ "else" => SyntaxKind::Else,
+ "for" => SyntaxKind::For,
+ "in" => SyntaxKind::In,
+ "while" => SyntaxKind::While,
+ "break" => SyntaxKind::Break,
+ "continue" => SyntaxKind::Continue,
+ "return" => SyntaxKind::Return,
+ "import" => SyntaxKind::Import,
+ "include" => SyntaxKind::Include,
+ "as" => SyntaxKind::As,
+ _ => return None,
+ })
+}
+
+/// Whether a character is interpreted as a newline by Typst.
+#[inline]
+pub fn is_newline(character: char) -> bool {
+ matches!(
+ character,
+ // Line Feed, Vertical Tab, Form Feed, Carriage Return.
+ '\n' | '\x0B' | '\x0C' | '\r' |
+ // Next Line, Line Separator, Paragraph Separator.
+ '\u{0085}' | '\u{2028}' | '\u{2029}'
+ )
+}
+
+/// Split text at newlines.
+pub(super) fn split_newlines(text: &str) -> Vec<&str> {
+ let mut s = Scanner::new(text);
+ let mut lines = Vec::new();
+ let mut start = 0;
+ let mut end = 0;
+
+ while let Some(c) = s.eat() {
+ if is_newline(c) {
+ if c == '\r' {
+ s.eat_if('\n');
+ }
+
+ lines.push(&text[start..end]);
+ start = s.cursor();
+ }
+ end = s.cursor();
+ }
+
+ lines.push(&text[start..]);
+ lines
+}
+
+/// Count the number of newlines in text.
+fn count_newlines(text: &str) -> usize {
+ let mut newlines = 0;
+ let mut s = Scanner::new(text);
+ while let Some(c) = s.eat() {
+ if is_newline(c) {
+ if c == '\r' {
+ s.eat_if('\n');
+ }
+ newlines += 1;
+ }
+ }
+ newlines
+}
+
+/// Whether a string is a valid Typst identifier.
+///
+/// In addition to what is specified in the [Unicode Standard][uax31], we allow:
+/// - `_` as a starting character,
+/// - `_` and `-` as continuing characters.
+///
+/// [uax31]: http://www.unicode.org/reports/tr31/
+#[inline]
+pub fn is_ident(string: &str) -> bool {
+ let mut chars = string.chars();
+ chars
+ .next()
+ .map_or(false, |c| is_id_start(c) && chars.all(is_id_continue))
+}
+
+/// Whether a character can start an identifier.
+#[inline]
+pub fn is_id_start(c: char) -> bool {
+ is_xid_start(c) || c == '_'
+}
+
+/// Whether a character can continue an identifier.
+#[inline]
+pub fn is_id_continue(c: char) -> bool {
+ is_xid_continue(c) || c == '_' || c == '-'
+}
+
+/// Whether a character can start an identifier in math.
+#[inline]
+fn is_math_id_start(c: char) -> bool {
+ is_xid_start(c)
+}
+
+/// Whether a character can continue an identifier in math.
+#[inline]
+fn is_math_id_continue(c: char) -> bool {
+ is_xid_continue(c) && c != '_'
+}
diff --git a/crates/typst-syntax/src/lib.rs b/crates/typst-syntax/src/lib.rs
new file mode 100644
index 00000000..8562bb19
--- /dev/null
+++ b/crates/typst-syntax/src/lib.rs
@@ -0,0 +1,23 @@
+//! Parser and syntax tree for Typst.
+
+pub mod ast;
+
+mod file;
+mod kind;
+mod lexer;
+mod node;
+mod parser;
+mod reparser;
+mod source;
+mod span;
+
+pub use self::file::{FileId, PackageSpec, PackageVersion};
+pub use self::kind::SyntaxKind;
+pub use self::lexer::{is_id_continue, is_id_start, is_ident, is_newline};
+pub use self::node::{LinkedChildren, LinkedNode, SyntaxError, SyntaxNode};
+pub use self::parser::{parse, parse_code, parse_math};
+pub use self::source::Source;
+pub use self::span::{Span, Spanned};
+
+use self::lexer::{split_newlines, LexMode, Lexer};
+use self::parser::{reparse_block, reparse_markup};
diff --git a/crates/typst-syntax/src/node.rs b/crates/typst-syntax/src/node.rs
new file mode 100644
index 00000000..f949b4dd
--- /dev/null
+++ b/crates/typst-syntax/src/node.rs
@@ -0,0 +1,912 @@
+use std::fmt::{self, Debug, Display, Formatter};
+use std::ops::{Deref, Range};
+use std::rc::Rc;
+use std::sync::Arc;
+
+use ecow::EcoString;
+
+use super::ast::AstNode;
+use super::{FileId, Span, SyntaxKind};
+
+/// A node in the untyped syntax tree.
+#[derive(Clone, Eq, PartialEq, Hash)]
+pub struct SyntaxNode(Repr);
+
+/// The three internal representations.
+#[derive(Clone, Eq, PartialEq, Hash)]
+enum Repr {
+ /// A leaf node.
+ Leaf(LeafNode),
+ /// A reference-counted inner node.
+ Inner(Arc<InnerNode>),
+ /// An error node.
+ Error(Arc<ErrorNode>),
+}
+
+impl SyntaxNode {
+ /// Create a new leaf node.
+ pub fn leaf(kind: SyntaxKind, text: impl Into<EcoString>) -> Self {
+ Self(Repr::Leaf(LeafNode::new(kind, text)))
+ }
+
+ /// Create a new inner node with children.
+ pub fn inner(kind: SyntaxKind, children: Vec<SyntaxNode>) -> Self {
+ Self(Repr::Inner(Arc::new(InnerNode::new(kind, children))))
+ }
+
+ /// Create a new error node.
+ pub fn error(message: impl Into<EcoString>, text: impl Into<EcoString>) -> Self {
+ Self(Repr::Error(Arc::new(ErrorNode::new(message, text))))
+ }
+
+ /// The type of the node.
+ pub fn kind(&self) -> SyntaxKind {
+ match &self.0 {
+ Repr::Leaf(leaf) => leaf.kind,
+ Repr::Inner(inner) => inner.kind,
+ Repr::Error(_) => SyntaxKind::Error,
+ }
+ }
+
+ /// Return `true` if the length is 0.
+ pub fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+
+ /// The byte length of the node in the source text.
+ pub fn len(&self) -> usize {
+ match &self.0 {
+ Repr::Leaf(leaf) => leaf.len(),
+ Repr::Inner(inner) => inner.len,
+ Repr::Error(node) => node.len(),
+ }
+ }
+
+ /// The span of the node.
+ pub fn span(&self) -> Span {
+ match &self.0 {
+ Repr::Leaf(leaf) => leaf.span,
+ Repr::Inner(inner) => inner.span,
+ Repr::Error(node) => node.error.span,
+ }
+ }
+
+ /// The text of the node if it is a leaf or error node.
+ ///
+ /// Returns the empty string if this is an inner node.
+ pub fn text(&self) -> &EcoString {
+ static EMPTY: EcoString = EcoString::new();
+ match &self.0 {
+ Repr::Leaf(leaf) => &leaf.text,
+ Repr::Inner(_) => &EMPTY,
+ Repr::Error(node) => &node.text,
+ }
+ }
+
+ /// Extract the text from the node.
+ ///
+ /// Builds the string if this is an inner node.
+ pub fn into_text(self) -> EcoString {
+ match self.0 {
+ Repr::Leaf(leaf) => leaf.text,
+ Repr::Inner(inner) => {
+ inner.children.iter().cloned().map(Self::into_text).collect()
+ }
+ Repr::Error(node) => node.text.clone(),
+ }
+ }
+
+ /// The node's children.
+ pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> {
+ match &self.0 {
+ Repr::Leaf(_) | Repr::Error(_) => [].iter(),
+ Repr::Inner(inner) => inner.children.iter(),
+ }
+ }
+
+ /// Whether the node can be cast to the given AST node.
+ pub fn is<T: AstNode>(&self) -> bool {
+ self.cast::<T>().is_some()
+ }
+
+ /// Try to convert the node to a typed AST node.
+ pub fn cast<T: AstNode>(&self) -> Option<T> {
+ T::from_untyped(self)
+ }
+
+ /// Cast the first child that can cast to the AST type `T`.
+ pub fn cast_first_match<T: AstNode>(&self) -> Option<T> {
+ self.children().find_map(Self::cast)
+ }
+
+ /// Cast the last child that can cast to the AST type `T`.
+ pub fn cast_last_match<T: AstNode>(&self) -> Option<T> {
+ self.children().rev().find_map(Self::cast)
+ }
+
+ /// Whether the node or its children contain an error.
+ pub fn erroneous(&self) -> bool {
+ match &self.0 {
+ Repr::Leaf(_) => false,
+ Repr::Inner(inner) => inner.erroneous,
+ Repr::Error(_) => true,
+ }
+ }
+
+ /// The error messages for this node and its descendants.
+ pub fn errors(&self) -> Vec<SyntaxError> {
+ if !self.erroneous() {
+ return vec![];
+ }
+
+ if let Repr::Error(node) = &self.0 {
+ vec![node.error.clone()]
+ } else {
+ self.children()
+ .filter(|node| node.erroneous())
+ .flat_map(|node| node.errors())
+ .collect()
+ }
+ }
+
+ /// Add a user-presentable hint if this is an error node.
+ pub fn hint(&mut self, hint: impl Into<EcoString>) {
+ if let Repr::Error(node) = &mut self.0 {
+ Arc::make_mut(node).hint(hint);
+ }
+ }
+
+ /// Set a synthetic span for the node and all its descendants.
+ pub fn synthesize(&mut self, span: Span) {
+ match &mut self.0 {
+ Repr::Leaf(leaf) => leaf.span = span,
+ Repr::Inner(inner) => Arc::make_mut(inner).synthesize(span),
+ Repr::Error(node) => Arc::make_mut(node).error.span = span,
+ }
+ }
+}
+
+impl SyntaxNode {
+ /// Mark this node as erroneous.
+ pub(super) fn make_erroneous(&mut self) {
+ if let Repr::Inner(inner) = &mut self.0 {
+ Arc::make_mut(inner).erroneous = true;
+ }
+ }
+
+ /// Convert the child to another kind.
+ #[track_caller]
+ pub(super) fn convert_to_kind(&mut self, kind: SyntaxKind) {
+ debug_assert!(!kind.is_error());
+ match &mut self.0 {
+ Repr::Leaf(leaf) => leaf.kind = kind,
+ Repr::Inner(inner) => Arc::make_mut(inner).kind = kind,
+ Repr::Error(_) => panic!("cannot convert error"),
+ }
+ }
+
+ /// Convert the child to an error.
+ pub(super) fn convert_to_error(&mut self, message: impl Into<EcoString>) {
+ let text = std::mem::take(self).into_text();
+ *self = SyntaxNode::error(message, text);
+ }
+
+ /// Assign spans to each node.
+ #[tracing::instrument(skip_all)]
+ pub(super) fn numberize(
+ &mut self,
+ id: FileId,
+ within: Range<u64>,
+ ) -> NumberingResult {
+ if within.start >= within.end {
+ return Err(Unnumberable);
+ }
+
+ let mid = Span::new(id, (within.start + within.end) / 2);
+ match &mut self.0 {
+ Repr::Leaf(leaf) => leaf.span = mid,
+ Repr::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within)?,
+ Repr::Error(node) => Arc::make_mut(node).error.span = mid,
+ }
+
+ Ok(())
+ }
+
+ /// Whether this is a leaf node.
+ pub(super) fn is_leaf(&self) -> bool {
+ matches!(self.0, Repr::Leaf(_))
+ }
+
+ /// The number of descendants, including the node itself.
+ pub(super) fn descendants(&self) -> usize {
+ match &self.0 {
+ Repr::Leaf(_) | Repr::Error(_) => 1,
+ Repr::Inner(inner) => inner.descendants,
+ }
+ }
+
+ /// The node's children, mutably.
+ pub(super) fn children_mut(&mut self) -> &mut [SyntaxNode] {
+ match &mut self.0 {
+ Repr::Leaf(_) | Repr::Error(_) => &mut [],
+ Repr::Inner(inner) => &mut Arc::make_mut(inner).children,
+ }
+ }
+
+ /// Replaces a range of children with a replacement.
+ ///
+ /// May have mutated the children if it returns `Err(_)`.
+ pub(super) fn replace_children(
+ &mut self,
+ range: Range<usize>,
+ replacement: Vec<SyntaxNode>,
+ ) -> NumberingResult {
+ if let Repr::Inner(inner) = &mut self.0 {
+ Arc::make_mut(inner).replace_children(range, replacement)?;
+ }
+ Ok(())
+ }
+
+ /// Update this node after changes were made to one of its children.
+ pub(super) fn update_parent(
+ &mut self,
+ prev_len: usize,
+ new_len: usize,
+ prev_descendants: usize,
+ new_descendants: usize,
+ ) {
+ if let Repr::Inner(inner) = &mut self.0 {
+ Arc::make_mut(inner).update_parent(
+ prev_len,
+ new_len,
+ prev_descendants,
+ new_descendants,
+ );
+ }
+ }
+
+ /// The upper bound of assigned numbers in this subtree.
+ pub(super) fn upper(&self) -> u64 {
+ match &self.0 {
+ Repr::Leaf(leaf) => leaf.span.number() + 1,
+ Repr::Inner(inner) => inner.upper,
+ Repr::Error(node) => node.error.span.number() + 1,
+ }
+ }
+}
+
+impl Debug for SyntaxNode {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ match &self.0 {
+ Repr::Leaf(leaf) => leaf.fmt(f),
+ Repr::Inner(inner) => inner.fmt(f),
+ Repr::Error(node) => node.fmt(f),
+ }
+ }
+}
+
+impl Default for SyntaxNode {
+ fn default() -> Self {
+ Self::error("", "")
+ }
+}
+
+/// A leaf node in the untyped syntax tree.
+#[derive(Clone, Eq, PartialEq, Hash)]
+struct LeafNode {
+ /// What kind of node this is (each kind would have its own struct in a
+ /// strongly typed AST).
+ kind: SyntaxKind,
+ /// The source text of the node.
+ text: EcoString,
+ /// The node's span.
+ span: Span,
+}
+
+impl LeafNode {
+ /// Create a new leaf node.
+ #[track_caller]
+ fn new(kind: SyntaxKind, text: impl Into<EcoString>) -> Self {
+ debug_assert!(!kind.is_error());
+ Self { kind, text: text.into(), span: Span::detached() }
+ }
+
+ /// The byte length of the node in the source text.
+ fn len(&self) -> usize {
+ self.text.len()
+ }
+}
+
+impl Debug for LeafNode {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ write!(f, "{:?}: {:?}", self.kind, self.text)
+ }
+}
+
+/// An inner node in the untyped syntax tree.
+#[derive(Clone, Eq, PartialEq, Hash)]
+struct InnerNode {
+ /// What kind of node this is (each kind would have its own struct in a
+ /// strongly typed AST).
+ kind: SyntaxKind,
+ /// The byte length of the node in the source.
+ len: usize,
+ /// The node's span.
+ span: Span,
+ /// The number of nodes in the whole subtree, including this node.
+ descendants: usize,
+ /// Whether this node or any of its children are erroneous.
+ erroneous: bool,
+ /// The upper bound of this node's numbering range.
+ upper: u64,
+ /// This node's children, losslessly make up this node.
+ children: Vec<SyntaxNode>,
+}
+
+impl InnerNode {
+ /// Create a new inner node with the given kind and children.
+ #[track_caller]
+ fn new(kind: SyntaxKind, children: Vec<SyntaxNode>) -> Self {
+ debug_assert!(!kind.is_error());
+
+ let mut len = 0;
+ let mut descendants = 1;
+ let mut erroneous = false;
+
+ for child in &children {
+ len += child.len();
+ descendants += child.descendants();
+ erroneous |= child.erroneous();
+ }
+
+ Self {
+ kind,
+ len,
+ span: Span::detached(),
+ descendants,
+ erroneous,
+ upper: 0,
+ children,
+ }
+ }
+
+ /// Set a synthetic span for the node and all its descendants.
+ fn synthesize(&mut self, span: Span) {
+ self.span = span;
+ self.upper = span.number();
+ for child in &mut self.children {
+ child.synthesize(span);
+ }
+ }
+
+ /// Assign span numbers `within` an interval to this node's subtree or just
+ /// a `range` of its children.
+ fn numberize(
+ &mut self,
+ id: FileId,
+ range: Option<Range<usize>>,
+ within: Range<u64>,
+ ) -> NumberingResult {
+ // Determine how many nodes we will number.
+ let descendants = match &range {
+ Some(range) if range.is_empty() => return Ok(()),
+ Some(range) => self.children[range.clone()]
+ .iter()
+ .map(SyntaxNode::descendants)
+ .sum::<usize>(),
+ None => self.descendants,
+ };
+
+ // Determine the distance between two neighbouring assigned numbers. If
+ // possible, we try to fit all numbers into the left half of `within`
+ // so that there is space for future insertions.
+ let space = within.end - within.start;
+ let mut stride = space / (2 * descendants as u64);
+ if stride == 0 {
+ stride = space / self.descendants as u64;
+ if stride == 0 {
+ return Err(Unnumberable);
+ }
+ }
+
+ // Number the node itself.
+ let mut start = within.start;
+ if range.is_none() {
+ let end = start + stride;
+ self.span = Span::new(id, (start + end) / 2);
+ self.upper = within.end;
+ start = end;
+ }
+
+ // Number the children.
+ let len = self.children.len();
+ for child in &mut self.children[range.unwrap_or(0..len)] {
+ let end = start + child.descendants() as u64 * stride;
+ child.numberize(id, start..end)?;
+ start = end;
+ }
+
+ Ok(())
+ }
+
+ /// Replaces a range of children with a replacement.
+ ///
+ /// May have mutated the children if it returns `Err(_)`.
+ fn replace_children(
+ &mut self,
+ mut range: Range<usize>,
+ replacement: Vec<SyntaxNode>,
+ ) -> NumberingResult {
+ let superseded = &self.children[range.clone()];
+
+ // Compute the new byte length.
+ self.len = self.len + replacement.iter().map(SyntaxNode::len).sum::<usize>()
+ - superseded.iter().map(SyntaxNode::len).sum::<usize>();
+
+ // Compute the new number of descendants.
+ self.descendants = self.descendants
+ + replacement.iter().map(SyntaxNode::descendants).sum::<usize>()
+ - superseded.iter().map(SyntaxNode::descendants).sum::<usize>();
+
+ // Determine whether we're still erroneous after the replacement. That's
+ // the case if
+ // - any of the new nodes is erroneous,
+ // - or if we were erroneous before due to a non-superseded node.
+ self.erroneous = replacement.iter().any(SyntaxNode::erroneous)
+ || (self.erroneous
+ && (self.children[..range.start].iter().any(SyntaxNode::erroneous))
+ || self.children[range.end..].iter().any(SyntaxNode::erroneous));
+
+ // Perform the replacement.
+ let replacement_count = replacement.len();
+ self.children.splice(range.clone(), replacement);
+ range.end = range.start + replacement_count;
+
+ // Renumber the new children. Retries until it works, taking
+ // exponentially more children into account.
+ let mut left = 0;
+ let mut right = 0;
+ let max_left = range.start;
+ let max_right = self.children.len() - range.end;
+ loop {
+ let renumber = range.start - left..range.end + right;
+
+ // The minimum assignable number is either
+ // - the upper bound of the node right before the to-be-renumbered
+ // children,
+ // - or this inner node's span number plus one if renumbering starts
+ // at the first child.
+ let start_number = renumber
+ .start
+ .checked_sub(1)
+ .and_then(|i| self.children.get(i))
+ .map_or(self.span.number() + 1, |child| child.upper());
+
+ // The upper bound for renumbering is either
+ // - the span number of the first child after the to-be-renumbered
+ // children,
+ // - or this node's upper bound if renumbering ends behind the last
+ // child.
+ let end_number = self
+ .children
+ .get(renumber.end)
+ .map_or(self.upper, |next| next.span().number());
+
+ // Try to renumber.
+ let within = start_number..end_number;
+ let id = self.span.id();
+ if self.numberize(id, Some(renumber), within).is_ok() {
+ return Ok(());
+ }
+
+ // If it didn't even work with all children, we give up.
+ if left == max_left && right == max_right {
+ return Err(Unnumberable);
+ }
+
+ // Exponential expansion to both sides.
+ left = (left + 1).next_power_of_two().min(max_left);
+ right = (right + 1).next_power_of_two().min(max_right);
+ }
+ }
+
+ /// Update this node after changes were made to one of its children.
+ fn update_parent(
+ &mut self,
+ prev_len: usize,
+ new_len: usize,
+ prev_descendants: usize,
+ new_descendants: usize,
+ ) {
+ self.len = self.len + new_len - prev_len;
+ self.descendants = self.descendants + new_descendants - prev_descendants;
+ self.erroneous = self.children.iter().any(SyntaxNode::erroneous);
+ }
+}
+
+impl Debug for InnerNode {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ write!(f, "{:?}: {}", self.kind, self.len)?;
+ if !self.children.is_empty() {
+ f.write_str(" ")?;
+ f.debug_list().entries(&self.children).finish()?;
+ }
+ Ok(())
+ }
+}
+
+/// An error node in the untyped syntax tree.
+#[derive(Clone, Eq, PartialEq, Hash)]
+struct ErrorNode {
+ /// The source text of the node.
+ text: EcoString,
+ /// The syntax error.
+ error: SyntaxError,
+}
+
+impl ErrorNode {
+ /// Create new error node.
+ fn new(message: impl Into<EcoString>, text: impl Into<EcoString>) -> Self {
+ Self {
+ text: text.into(),
+ error: SyntaxError {
+ span: Span::detached(),
+ message: message.into(),
+ hints: vec![],
+ },
+ }
+ }
+
+ /// The byte length of the node in the source text.
+ fn len(&self) -> usize {
+ self.text.len()
+ }
+
+ /// Add a user-presentable hint to this error node.
+ fn hint(&mut self, hint: impl Into<EcoString>) {
+ self.error.hints.push(hint.into());
+ }
+}
+
+impl Debug for ErrorNode {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ write!(f, "Error: {:?} ({})", self.text, self.error.message)
+ }
+}
+
+/// A syntactical error.
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+pub struct SyntaxError {
+ /// The node's span.
+ pub span: Span,
+ /// The error message.
+ pub message: EcoString,
+ /// Additonal hints to the user, indicating how this error could be avoided
+ /// or worked around.
+ pub hints: Vec<EcoString>,
+}
+
+/// A syntax node in a context.
+///
+/// Knows its exact offset in the file and provides access to its
+/// children, parent and siblings.
+///
+/// **Note that all sibling and leaf accessors skip over trivia!**
+#[derive(Clone)]
+pub struct LinkedNode<'a> {
+ node: &'a SyntaxNode,
+ parent: Option<Rc<Self>>,
+ index: usize,
+ offset: usize,
+}
+
+impl<'a> LinkedNode<'a> {
+ /// Start a new traversal at a root node.
+ pub fn new(root: &'a SyntaxNode) -> Self {
+ Self { node: root, parent: None, index: 0, offset: 0 }
+ }
+
+ /// Get the contained syntax node.
+ pub fn get(&self) -> &'a SyntaxNode {
+ self.node
+ }
+
+ /// The index of this node in its parent's children list.
+ pub fn index(&self) -> usize {
+ self.index
+ }
+
+ /// The absolute byte offset of this node in the source file.
+ pub fn offset(&self) -> usize {
+ self.offset
+ }
+
+ /// The byte range of this node in the source file.
+ pub fn range(&self) -> Range<usize> {
+ self.offset..self.offset + self.node.len()
+ }
+
+ /// An iterator over this node's children.
+ pub fn children(&self) -> LinkedChildren<'a> {
+ LinkedChildren {
+ parent: Rc::new(self.clone()),
+ iter: self.node.children().enumerate(),
+ front: self.offset,
+ back: self.offset + self.len(),
+ }
+ }
+
+ /// Find a descendant with the given span.
+ pub fn find(&self, span: Span) -> Option<LinkedNode<'a>> {
+ if self.span() == span {
+ return Some(self.clone());
+ }
+
+ if let Repr::Inner(inner) = &self.0 {
+ // The parent of a subtree has a smaller span number than all of its
+ // descendants. Therefore, we can bail out early if the target span's
+ // number is smaller than our number.
+ if span.number() < inner.span.number() {
+ return None;
+ }
+
+ let mut children = self.children().peekable();
+ while let Some(child) = children.next() {
+ // Every node in this child's subtree has a smaller span number than
+ // the next sibling. Therefore we only need to recurse if the next
+ // sibling's span number is larger than the target span's number.
+ if children
+ .peek()
+ .map_or(true, |next| next.span().number() > span.number())
+ {
+ if let Some(found) = child.find(span) {
+ return Some(found);
+ }
+ }
+ }
+ }
+
+ None
+ }
+}
+
+/// Access to parents and siblings.
+impl<'a> LinkedNode<'a> {
+ /// Get this node's parent.
+ pub fn parent(&self) -> Option<&Self> {
+ self.parent.as_deref()
+ }
+
+ /// Get the first previous non-trivia sibling node.
+ pub fn prev_sibling(&self) -> Option<Self> {
+ let parent = self.parent()?;
+ let index = self.index.checked_sub(1)?;
+ let node = parent.node.children().nth(index)?;
+ let offset = self.offset - node.len();
+ let prev = Self { node, parent: self.parent.clone(), index, offset };
+ if prev.kind().is_trivia() {
+ prev.prev_sibling()
+ } else {
+ Some(prev)
+ }
+ }
+
+ /// Get the next non-trivia sibling node.
+ pub fn next_sibling(&self) -> Option<Self> {
+ let parent = self.parent()?;
+ let index = self.index.checked_add(1)?;
+ let node = parent.node.children().nth(index)?;
+ let offset = self.offset + self.node.len();
+ let next = Self { node, parent: self.parent.clone(), index, offset };
+ if next.kind().is_trivia() {
+ next.next_sibling()
+ } else {
+ Some(next)
+ }
+ }
+
+ /// Get the kind of this node's parent.
+ pub fn parent_kind(&self) -> Option<SyntaxKind> {
+ Some(self.parent()?.node.kind())
+ }
+
+ /// Get the kind of this node's first previous non-trivia sibling.
+ pub fn prev_sibling_kind(&self) -> Option<SyntaxKind> {
+ Some(self.prev_sibling()?.node.kind())
+ }
+
+ /// Get the kind of this node's next non-trivia sibling.
+ pub fn next_sibling_kind(&self) -> Option<SyntaxKind> {
+ Some(self.next_sibling()?.node.kind())
+ }
+}
+
+/// Access to leafs.
+impl<'a> LinkedNode<'a> {
+ /// Get the rightmost non-trivia leaf before this node.
+ pub fn prev_leaf(&self) -> Option<Self> {
+ let mut node = self.clone();
+ while let Some(prev) = node.prev_sibling() {
+ if let Some(leaf) = prev.rightmost_leaf() {
+ return Some(leaf);
+ }
+ node = prev;
+ }
+ self.parent()?.prev_leaf()
+ }
+
+ /// Find the leftmost contained non-trivia leaf.
+ pub fn leftmost_leaf(&self) -> Option<Self> {
+ if self.is_leaf() && !self.kind().is_trivia() && !self.kind().is_error() {
+ return Some(self.clone());
+ }
+
+ for child in self.children() {
+ if let Some(leaf) = child.leftmost_leaf() {
+ return Some(leaf);
+ }
+ }
+
+ None
+ }
+
+ /// Get the leaf at the specified byte offset.
+ pub fn leaf_at(&self, cursor: usize) -> Option<Self> {
+ if self.node.children().len() == 0 && cursor <= self.offset + self.len() {
+ return Some(self.clone());
+ }
+
+ let mut offset = self.offset;
+ let count = self.node.children().len();
+ for (i, child) in self.children().enumerate() {
+ let len = child.len();
+ if (offset < cursor && cursor <= offset + len)
+ || (offset == cursor && i + 1 == count)
+ {
+ return child.leaf_at(cursor);
+ }
+ offset += len;
+ }
+
+ None
+ }
+
+ /// Find the rightmost contained non-trivia leaf.
+ pub fn rightmost_leaf(&self) -> Option<Self> {
+ if self.is_leaf() && !self.kind().is_trivia() {
+ return Some(self.clone());
+ }
+
+ for child in self.children().rev() {
+ if let Some(leaf) = child.rightmost_leaf() {
+ return Some(leaf);
+ }
+ }
+
+ None
+ }
+
+ /// Get the leftmost non-trivia leaf after this node.
+ pub fn next_leaf(&self) -> Option<Self> {
+ let mut node = self.clone();
+ while let Some(next) = node.next_sibling() {
+ if let Some(leaf) = next.leftmost_leaf() {
+ return Some(leaf);
+ }
+ node = next;
+ }
+ self.parent()?.next_leaf()
+ }
+}
+
+impl Deref for LinkedNode<'_> {
+ type Target = SyntaxNode;
+
+ fn deref(&self) -> &Self::Target {
+ self.get()
+ }
+}
+
+impl Debug for LinkedNode<'_> {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ self.node.fmt(f)
+ }
+}
+
+/// An iterator over the children of a linked node.
+pub struct LinkedChildren<'a> {
+ parent: Rc<LinkedNode<'a>>,
+ iter: std::iter::Enumerate<std::slice::Iter<'a, SyntaxNode>>,
+ front: usize,
+ back: usize,
+}
+
+impl<'a> Iterator for LinkedChildren<'a> {
+ type Item = LinkedNode<'a>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ self.iter.next().map(|(index, node)| {
+ let offset = self.front;
+ self.front += node.len();
+ LinkedNode {
+ node,
+ parent: Some(self.parent.clone()),
+ index,
+ offset,
+ }
+ })
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ self.iter.size_hint()
+ }
+}
+
+impl DoubleEndedIterator for LinkedChildren<'_> {
+ fn next_back(&mut self) -> Option<Self::Item> {
+ self.iter.next_back().map(|(index, node)| {
+ self.back -= node.len();
+ LinkedNode {
+ node,
+ parent: Some(self.parent.clone()),
+ index,
+ offset: self.back,
+ }
+ })
+ }
+}
+
+impl ExactSizeIterator for LinkedChildren<'_> {}
+
+/// Result of numbering a node within an interval.
+pub(super) type NumberingResult = Result<(), Unnumberable>;
+
+/// Indicates that a node cannot be numbered within a given interval.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub(super) struct Unnumberable;
+
+impl Display for Unnumberable {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ f.pad("cannot number within this interval")
+ }
+}
+
+impl std::error::Error for Unnumberable {}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::Source;
+
+ #[test]
+ fn test_linked_node() {
+ let source = Source::detached("#set text(12pt, red)");
+
+ // Find "text".
+ let node = LinkedNode::new(source.root()).leaf_at(7).unwrap();
+ assert_eq!(node.offset(), 5);
+ assert_eq!(node.text(), "text");
+
+ // Go back to "#set". Skips the space.
+ let prev = node.prev_sibling().unwrap();
+ assert_eq!(prev.offset(), 1);
+ assert_eq!(prev.text(), "set");
+ }
+
+ #[test]
+ fn test_linked_node_non_trivia_leaf() {
+ let source = Source::detached("#set fun(12pt, red)");
+ let leaf = LinkedNode::new(source.root()).leaf_at(6).unwrap();
+ let prev = leaf.prev_leaf().unwrap();
+ assert_eq!(leaf.text(), "fun");
+ assert_eq!(prev.text(), "set");
+
+ let source = Source::detached("#let x = 10");
+ let leaf = LinkedNode::new(source.root()).leaf_at(9).unwrap();
+ let prev = leaf.prev_leaf().unwrap();
+ let next = leaf.next_leaf().unwrap();
+ assert_eq!(prev.text(), "=");
+ assert_eq!(leaf.text(), " ");
+ assert_eq!(next.text(), "10");
+ }
+}
diff --git a/crates/typst-syntax/src/parser.rs b/crates/typst-syntax/src/parser.rs
new file mode 100644
index 00000000..24cf7116
--- /dev/null
+++ b/crates/typst-syntax/src/parser.rs
@@ -0,0 +1,1760 @@
+use std::collections::HashSet;
+use std::ops::Range;
+
+use ecow::{eco_format, EcoString};
+use unicode_math_class::MathClass;
+
+use super::{ast, is_newline, LexMode, Lexer, SyntaxKind, SyntaxNode};
+
+/// Parse a source file.
+#[tracing::instrument(skip_all)]
+pub fn parse(text: &str) -> SyntaxNode {
+ let mut p = Parser::new(text, 0, LexMode::Markup);
+ markup(&mut p, true, 0, |_| false);
+ p.finish().into_iter().next().unwrap()
+}
+
+/// Parse top-level code.
+#[tracing::instrument(skip_all)]
+pub fn parse_code(text: &str) -> SyntaxNode {
+ let mut p = Parser::new(text, 0, LexMode::Code);
+ let m = p.marker();
+ p.skip();
+ code_exprs(&mut p, |_| false);
+ p.wrap_all(m, SyntaxKind::Code);
+ p.finish().into_iter().next().unwrap()
+}
+
+/// Parse top-level math.
+#[tracing::instrument(skip_all)]
+pub fn parse_math(text: &str) -> SyntaxNode {
+ let mut p = Parser::new(text, 0, LexMode::Math);
+ math(&mut p, |_| false);
+ p.finish().into_iter().next().unwrap()
+}
+
+fn markup(
+ p: &mut Parser,
+ mut at_start: bool,
+ min_indent: usize,
+ mut stop: impl FnMut(&Parser) -> bool,
+) {
+ let m = p.marker();
+ let mut nesting: usize = 0;
+ while !p.eof() {
+ match p.current() {
+ SyntaxKind::LeftBracket => nesting += 1,
+ SyntaxKind::RightBracket if nesting > 0 => nesting -= 1,
+ _ if stop(p) => break,
+ _ => {}
+ }
+
+ if p.newline() {
+ at_start = true;
+ if min_indent > 0 && p.column(p.current_end()) < min_indent {
+ break;
+ }
+ p.eat();
+ continue;
+ }
+
+ let prev = p.prev_end();
+ markup_expr(p, &mut at_start);
+ if !p.progress(prev) {
+ p.unexpected();
+ }
+ }
+ p.wrap(m, SyntaxKind::Markup);
+}
+
+pub(super) fn reparse_markup(
+ text: &str,
+ range: Range<usize>,
+ at_start: &mut bool,
+ nesting: &mut usize,
+ mut stop: impl FnMut(SyntaxKind) -> bool,
+) -> Option<Vec<SyntaxNode>> {
+ let mut p = Parser::new(text, range.start, LexMode::Markup);
+ while !p.eof() && p.current_start() < range.end {
+ match p.current() {
+ SyntaxKind::LeftBracket => *nesting += 1,
+ SyntaxKind::RightBracket if *nesting > 0 => *nesting -= 1,
+ _ if stop(p.current()) => break,
+ _ => {}
+ }
+
+ if p.newline() {
+ *at_start = true;
+ p.eat();
+ continue;
+ }
+
+ let prev = p.prev_end();
+ markup_expr(&mut p, at_start);
+ if !p.progress(prev) {
+ p.unexpected();
+ }
+ }
+ (p.balanced && p.current_start() == range.end).then(|| p.finish())
+}
+
+fn markup_expr(p: &mut Parser, at_start: &mut bool) {
+ match p.current() {
+ SyntaxKind::Space
+ | SyntaxKind::Parbreak
+ | SyntaxKind::LineComment
+ | SyntaxKind::BlockComment => {
+ p.eat();
+ return;
+ }
+
+ SyntaxKind::Text
+ | SyntaxKind::Linebreak
+ | SyntaxKind::Escape
+ | SyntaxKind::Shorthand
+ | SyntaxKind::SmartQuote
+ | SyntaxKind::Raw
+ | SyntaxKind::Link
+ | SyntaxKind::Label => p.eat(),
+
+ SyntaxKind::Hashtag => embedded_code_expr(p),
+ SyntaxKind::Star => strong(p),
+ SyntaxKind::Underscore => emph(p),
+ SyntaxKind::HeadingMarker if *at_start => heading(p),
+ SyntaxKind::ListMarker if *at_start => list_item(p),
+ SyntaxKind::EnumMarker if *at_start => enum_item(p),
+ SyntaxKind::TermMarker if *at_start => term_item(p),
+ SyntaxKind::RefMarker => reference(p),
+ SyntaxKind::Dollar => equation(p),
+
+ SyntaxKind::LeftBracket
+ | SyntaxKind::RightBracket
+ | SyntaxKind::HeadingMarker
+ | SyntaxKind::ListMarker
+ | SyntaxKind::EnumMarker
+ | SyntaxKind::TermMarker
+ | SyntaxKind::Colon => p.convert(SyntaxKind::Text),
+
+ _ => {}
+ }
+
+ *at_start = false;
+}
+
+fn strong(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Star);
+ markup(p, false, 0, |p| {
+ p.at(SyntaxKind::Star)
+ || p.at(SyntaxKind::Parbreak)
+ || p.at(SyntaxKind::RightBracket)
+ });
+ p.expect_closing_delimiter(m, SyntaxKind::Star);
+ p.wrap(m, SyntaxKind::Strong);
+}
+
+fn emph(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Underscore);
+ markup(p, false, 0, |p| {
+ p.at(SyntaxKind::Underscore)
+ || p.at(SyntaxKind::Parbreak)
+ || p.at(SyntaxKind::RightBracket)
+ });
+ p.expect_closing_delimiter(m, SyntaxKind::Underscore);
+ p.wrap(m, SyntaxKind::Emph);
+}
+
+fn heading(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::HeadingMarker);
+ whitespace_line(p);
+ markup(p, false, usize::MAX, |p| {
+ p.at(SyntaxKind::Label)
+ || p.at(SyntaxKind::RightBracket)
+ || (p.at(SyntaxKind::Space) && p.lexer.clone().next() == SyntaxKind::Label)
+ });
+ p.wrap(m, SyntaxKind::Heading);
+}
+
+fn list_item(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::ListMarker);
+ let min_indent = p.column(p.prev_end());
+ whitespace_line(p);
+ markup(p, false, min_indent, |p| p.at(SyntaxKind::RightBracket));
+ p.wrap(m, SyntaxKind::ListItem);
+}
+
+fn enum_item(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::EnumMarker);
+ let min_indent = p.column(p.prev_end());
+ whitespace_line(p);
+ markup(p, false, min_indent, |p| p.at(SyntaxKind::RightBracket));
+ p.wrap(m, SyntaxKind::EnumItem);
+}
+
+fn term_item(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::TermMarker);
+ let min_indent = p.column(p.prev_end());
+ whitespace_line(p);
+ markup(p, false, usize::MAX, |p| {
+ p.at(SyntaxKind::Colon) || p.at(SyntaxKind::RightBracket)
+ });
+ p.expect(SyntaxKind::Colon);
+ whitespace_line(p);
+ markup(p, false, min_indent, |p| p.at(SyntaxKind::RightBracket));
+ p.wrap(m, SyntaxKind::TermItem);
+}
+
+fn reference(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::RefMarker);
+ if p.directly_at(SyntaxKind::LeftBracket) {
+ content_block(p);
+ }
+ p.wrap(m, SyntaxKind::Ref);
+}
+
+fn whitespace_line(p: &mut Parser) {
+ while !p.newline() && p.current().is_trivia() {
+ p.eat();
+ }
+}
+
+fn equation(p: &mut Parser) {
+ let m = p.marker();
+ p.enter(LexMode::Math);
+ p.assert(SyntaxKind::Dollar);
+ math(p, |p| p.at(SyntaxKind::Dollar));
+ p.expect_closing_delimiter(m, SyntaxKind::Dollar);
+ p.exit();
+ p.wrap(m, SyntaxKind::Equation);
+}
+
+fn math(p: &mut Parser, mut stop: impl FnMut(&Parser) -> bool) {
+ let m = p.marker();
+ while !p.eof() && !stop(p) {
+ let prev = p.prev_end();
+ math_expr(p);
+ if !p.progress(prev) {
+ p.unexpected();
+ }
+ }
+ p.wrap(m, SyntaxKind::Math);
+}
+
+fn math_expr(p: &mut Parser) {
+ math_expr_prec(p, 0, SyntaxKind::Eof)
+}
+
+fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) {
+ let m = p.marker();
+ let mut continuable = false;
+ match p.current() {
+ SyntaxKind::Hashtag => embedded_code_expr(p),
+ SyntaxKind::MathIdent => {
+ continuable = true;
+ p.eat();
+ while p.directly_at(SyntaxKind::Text)
+ && p.current_text() == "."
+ && matches!(
+ p.lexer.clone().next(),
+ SyntaxKind::MathIdent | SyntaxKind::Text
+ )
+ {
+ p.convert(SyntaxKind::Dot);
+ p.convert(SyntaxKind::Ident);
+ p.wrap(m, SyntaxKind::FieldAccess);
+ }
+ if min_prec < 3 && p.directly_at(SyntaxKind::Text) && p.current_text() == "("
+ {
+ math_args(p);
+ p.wrap(m, SyntaxKind::FuncCall);
+ continuable = false;
+ }
+ }
+
+ SyntaxKind::Text | SyntaxKind::Shorthand => {
+ continuable = matches!(
+ math_class(p.current_text()),
+ None | Some(MathClass::Alphabetic)
+ );
+ if !maybe_delimited(p, true) {
+ p.eat();
+ }
+ }
+
+ SyntaxKind::Linebreak | SyntaxKind::MathAlignPoint => p.eat(),
+ SyntaxKind::Escape | SyntaxKind::Str => {
+ continuable = true;
+ p.eat();
+ }
+
+ SyntaxKind::Root => {
+ if min_prec < 3 {
+ p.eat();
+ let m2 = p.marker();
+ math_expr_prec(p, 2, stop);
+ math_unparen(p, m2);
+ p.wrap(m, SyntaxKind::MathRoot);
+ }
+ }
+
+ SyntaxKind::Prime => {
+ // Means that there is nothing to attach the prime to.
+ continuable = true;
+ while p.at(SyntaxKind::Prime) {
+ let m2 = p.marker();
+ p.eat();
+ // Eat the group until the space.
+ while p.eat_if_direct(SyntaxKind::Prime) {}
+ p.wrap(m2, SyntaxKind::MathPrimes);
+ }
+ }
+
+ _ => p.expected("expression"),
+ }
+
+ if continuable
+ && min_prec < 3
+ && p.prev_end() == p.current_start()
+ && maybe_delimited(p, false)
+ {
+ p.wrap(m, SyntaxKind::Math);
+ }
+
+ // Whether there were _any_ primes in the loop.
+ let mut primed = false;
+
+ while !p.eof() && !p.at(stop) {
+ if p.directly_at(SyntaxKind::Text) && p.current_text() == "!" {
+ p.eat();
+ p.wrap(m, SyntaxKind::Math);
+ continue;
+ }
+
+ let prime_marker = p.marker();
+ if p.eat_if_direct(SyntaxKind::Prime) {
+ // Eat as many primes as possible.
+ while p.eat_if_direct(SyntaxKind::Prime) {}
+ p.wrap(prime_marker, SyntaxKind::MathPrimes);
+
+ // Will not be continued, so need to wrap the prime as attachment.
+ if p.at(stop) {
+ p.wrap(m, SyntaxKind::MathAttach);
+ }
+
+ primed = true;
+ continue;
+ }
+
+ // Separate primes and superscripts to different attachments.
+ if primed && p.current() == SyntaxKind::Hat {
+ p.wrap(m, SyntaxKind::MathAttach);
+ }
+
+ let Some((kind, stop, assoc, mut prec)) = math_op(p.current()) else {
+ // No attachments, so we need to wrap primes as attachment.
+ if primed {
+ p.wrap(m, SyntaxKind::MathAttach);
+ }
+
+ break;
+ };
+
+ if primed && kind == SyntaxKind::MathFrac {
+ p.wrap(m, SyntaxKind::MathAttach);
+ }
+
+ if prec < min_prec {
+ break;
+ }
+
+ match assoc {
+ ast::Assoc::Left => prec += 1,
+ ast::Assoc::Right => {}
+ }
+
+ if kind == SyntaxKind::MathFrac {
+ math_unparen(p, m);
+ }
+
+ p.eat();
+ let m2 = p.marker();
+ math_expr_prec(p, prec, stop);
+ math_unparen(p, m2);
+
+ if p.eat_if(SyntaxKind::Underscore) || (!primed && p.eat_if(SyntaxKind::Hat)) {
+ let m3 = p.marker();
+ math_expr_prec(p, prec, SyntaxKind::Eof);
+ math_unparen(p, m3);
+ }
+
+ p.wrap(m, kind);
+ }
+}
+
+fn maybe_delimited(p: &mut Parser, allow_fence: bool) -> bool {
+ if allow_fence && math_class(p.current_text()) == Some(MathClass::Fence) {
+ math_delimited(p, MathClass::Fence);
+ true
+ } else if math_class(p.current_text()) == Some(MathClass::Opening) {
+ math_delimited(p, MathClass::Closing);
+ true
+ } else {
+ false
+ }
+}
+
+fn math_delimited(p: &mut Parser, stop: MathClass) {
+ let m = p.marker();
+ p.eat();
+ let m2 = p.marker();
+ while !p.eof() && !p.at(SyntaxKind::Dollar) {
+ let class = math_class(p.current_text());
+ if stop == MathClass::Fence && class == Some(MathClass::Closing) {
+ break;
+ }
+
+ if class == Some(stop) {
+ p.wrap(m2, SyntaxKind::Math);
+ p.eat();
+ p.wrap(m, SyntaxKind::MathDelimited);
+ return;
+ }
+
+ let prev = p.prev_end();
+ math_expr(p);
+ if !p.progress(prev) {
+ p.unexpected();
+ }
+ }
+
+ p.wrap(m, SyntaxKind::Math);
+}
+
+fn math_unparen(p: &mut Parser, m: Marker) {
+ let Some(node) = p.nodes.get_mut(m.0) else { return };
+ if node.kind() != SyntaxKind::MathDelimited {
+ return;
+ }
+
+ if let [first, .., last] = node.children_mut() {
+ if first.text() == "(" && last.text() == ")" {
+ first.convert_to_kind(SyntaxKind::LeftParen);
+ last.convert_to_kind(SyntaxKind::RightParen);
+ }
+ }
+
+ node.convert_to_kind(SyntaxKind::Math);
+}
+
+fn math_class(text: &str) -> Option<MathClass> {
+ match text {
+ "[|" => return Some(MathClass::Opening),
+ "|]" => return Some(MathClass::Closing),
+ "||" => return Some(MathClass::Fence),
+ _ => {}
+ }
+
+ let mut chars = text.chars();
+ chars
+ .next()
+ .filter(|_| chars.next().is_none())
+ .and_then(unicode_math_class::class)
+}
+
+fn math_op(kind: SyntaxKind) -> Option<(SyntaxKind, SyntaxKind, ast::Assoc, usize)> {
+ match kind {
+ SyntaxKind::Underscore => {
+ Some((SyntaxKind::MathAttach, SyntaxKind::Hat, ast::Assoc::Right, 2))
+ }
+ SyntaxKind::Hat => {
+ Some((SyntaxKind::MathAttach, SyntaxKind::Underscore, ast::Assoc::Right, 2))
+ }
+ SyntaxKind::Slash => {
+ Some((SyntaxKind::MathFrac, SyntaxKind::Eof, ast::Assoc::Left, 1))
+ }
+ _ => None,
+ }
+}
+
+fn math_args(p: &mut Parser) {
+ let m = p.marker();
+ p.convert(SyntaxKind::LeftParen);
+
+ let mut namable = true;
+ let mut named = None;
+ let mut has_arrays = false;
+ let mut array = p.marker();
+ let mut arg = p.marker();
+
+ while !p.eof() && !p.at(SyntaxKind::Dollar) {
+ if namable
+ && (p.at(SyntaxKind::MathIdent) || p.at(SyntaxKind::Text))
+ && p.text[p.current_end()..].starts_with(':')
+ {
+ p.convert(SyntaxKind::Ident);
+ p.convert(SyntaxKind::Colon);
+ named = Some(arg);
+ arg = p.marker();
+ array = p.marker();
+ }
+
+ match p.current_text() {
+ ")" => break,
+ ";" => {
+ maybe_wrap_in_math(p, arg, named);
+ p.wrap(array, SyntaxKind::Array);
+ p.convert(SyntaxKind::Semicolon);
+ array = p.marker();
+ arg = p.marker();
+ namable = true;
+ named = None;
+ has_arrays = true;
+ continue;
+ }
+ "," => {
+ maybe_wrap_in_math(p, arg, named);
+ p.convert(SyntaxKind::Comma);
+ arg = p.marker();
+ namable = true;
+ if named.is_some() {
+ array = p.marker();
+ named = None;
+ }
+ continue;
+ }
+ _ => {}
+ }
+
+ let prev = p.prev_end();
+ math_expr(p);
+ if !p.progress(prev) {
+ p.unexpected();
+ }
+
+ namable = false;
+ }
+
+ if arg != p.marker() {
+ maybe_wrap_in_math(p, arg, named);
+ if named.is_some() {
+ array = p.marker();
+ }
+ }
+
+ if has_arrays && array != p.marker() {
+ p.wrap(array, SyntaxKind::Array);
+ }
+
+ if p.at(SyntaxKind::Text) && p.current_text() == ")" {
+ p.convert(SyntaxKind::RightParen);
+ } else {
+ p.expected("closing paren");
+ p.balanced = false;
+ }
+
+ p.wrap(m, SyntaxKind::Args);
+}
+
+fn maybe_wrap_in_math(p: &mut Parser, arg: Marker, named: Option<Marker>) {
+ let exprs = p.post_process(arg).filter(|node| node.is::<ast::Expr>()).count();
+ if exprs != 1 {
+ p.wrap(arg, SyntaxKind::Math);
+ }
+
+ if let Some(m) = named {
+ p.wrap(m, SyntaxKind::Named);
+ }
+}
+
+fn code(p: &mut Parser, stop: impl FnMut(&Parser) -> bool) {
+ let m = p.marker();
+ code_exprs(p, stop);
+ p.wrap(m, SyntaxKind::Code);
+}
+
+fn code_exprs(p: &mut Parser, mut stop: impl FnMut(&Parser) -> bool) {
+ while !p.eof() && !stop(p) {
+ p.stop_at_newline(true);
+ let prev = p.prev_end();
+ code_expr(p);
+ if p.progress(prev) && !p.eof() && !stop(p) && !p.eat_if(SyntaxKind::Semicolon) {
+ p.expected("semicolon or line break");
+ }
+ p.unstop();
+ if !p.progress(prev) && !p.eof() {
+ p.unexpected();
+ }
+ }
+}
+
+fn code_expr(p: &mut Parser) {
+ code_expr_prec(p, false, 0, false)
+}
+
+fn code_expr_or_pattern(p: &mut Parser) {
+ code_expr_prec(p, false, 0, true)
+}
+
+fn embedded_code_expr(p: &mut Parser) {
+ p.stop_at_newline(true);
+ p.enter(LexMode::Code);
+ p.assert(SyntaxKind::Hashtag);
+ p.unskip();
+
+ let stmt = matches!(
+ p.current(),
+ SyntaxKind::Let
+ | SyntaxKind::Set
+ | SyntaxKind::Show
+ | SyntaxKind::Import
+ | SyntaxKind::Include
+ );
+
+ let prev = p.prev_end();
+ code_expr_prec(p, true, 0, false);
+
+ // Consume error for things like `#12p` or `#"abc\"`.#
+ if !p.progress(prev) {
+ if p.current().is_trivia() {
+ // p.unskip();
+ } else if !p.eof() {
+ p.unexpected();
+ }
+ }
+
+ let semi =
+ (stmt || p.directly_at(SyntaxKind::Semicolon)) && p.eat_if(SyntaxKind::Semicolon);
+
+ if stmt && !semi && !p.eof() && !p.at(SyntaxKind::RightBracket) {
+ p.expected("semicolon or line break");
+ }
+
+ p.exit();
+ p.unstop();
+}
+
+fn code_expr_prec(
+ p: &mut Parser,
+ atomic: bool,
+ min_prec: usize,
+ allow_destructuring: bool,
+) {
+ let m = p.marker();
+ if let (false, Some(op)) = (atomic, ast::UnOp::from_kind(p.current())) {
+ p.eat();
+ code_expr_prec(p, atomic, op.precedence(), false);
+ p.wrap(m, SyntaxKind::Unary);
+ } else {
+ code_primary(p, atomic, allow_destructuring);
+ }
+
+ loop {
+ if p.directly_at(SyntaxKind::LeftParen) || p.directly_at(SyntaxKind::LeftBracket)
+ {
+ args(p);
+ p.wrap(m, SyntaxKind::FuncCall);
+ continue;
+ }
+
+ let at_field_or_method =
+ p.directly_at(SyntaxKind::Dot) && p.lexer.clone().next() == SyntaxKind::Ident;
+
+ if atomic && !at_field_or_method {
+ break;
+ }
+
+ if p.eat_if(SyntaxKind::Dot) {
+ p.expect(SyntaxKind::Ident);
+ p.wrap(m, SyntaxKind::FieldAccess);
+ continue;
+ }
+
+ let binop =
+ if ast::BinOp::NotIn.precedence() >= min_prec && p.eat_if(SyntaxKind::Not) {
+ if p.at(SyntaxKind::In) {
+ Some(ast::BinOp::NotIn)
+ } else {
+ p.expected("keyword `in`");
+ break;
+ }
+ } else {
+ ast::BinOp::from_kind(p.current())
+ };
+
+ if let Some(op) = binop {
+ let mut prec = op.precedence();
+ if prec < min_prec {
+ break;
+ }
+
+ match op.assoc() {
+ ast::Assoc::Left => prec += 1,
+ ast::Assoc::Right => {}
+ }
+
+ p.eat();
+ code_expr_prec(p, false, prec, false);
+ p.wrap(m, SyntaxKind::Binary);
+ continue;
+ }
+
+ break;
+ }
+}
+
+fn code_primary(p: &mut Parser, atomic: bool, allow_destructuring: bool) {
+ let m = p.marker();
+ match p.current() {
+ SyntaxKind::Ident => {
+ p.eat();
+ if !atomic && p.at(SyntaxKind::Arrow) {
+ p.wrap(m, SyntaxKind::Params);
+ p.assert(SyntaxKind::Arrow);
+ code_expr(p);
+ p.wrap(m, SyntaxKind::Closure);
+ }
+ }
+ SyntaxKind::Underscore if !atomic => {
+ p.eat();
+ if p.at(SyntaxKind::Arrow) {
+ p.wrap(m, SyntaxKind::Params);
+ p.eat();
+ code_expr(p);
+ p.wrap(m, SyntaxKind::Closure);
+ } else if let Some(underscore) = p.node_mut(m) {
+ underscore.convert_to_error("expected expression, found underscore");
+ }
+ }
+
+ SyntaxKind::LeftBrace => code_block(p),
+ SyntaxKind::LeftBracket => content_block(p),
+ SyntaxKind::LeftParen => with_paren(p, allow_destructuring),
+ SyntaxKind::Dollar => equation(p),
+ SyntaxKind::Let => let_binding(p),
+ SyntaxKind::Set => set_rule(p),
+ SyntaxKind::Show => show_rule(p),
+ SyntaxKind::If => conditional(p),
+ SyntaxKind::While => while_loop(p),
+ SyntaxKind::For => for_loop(p),
+ SyntaxKind::Import => module_import(p),
+ SyntaxKind::Include => module_include(p),
+ SyntaxKind::Break => break_stmt(p),
+ SyntaxKind::Continue => continue_stmt(p),
+ SyntaxKind::Return => return_stmt(p),
+
+ SyntaxKind::None
+ | SyntaxKind::Auto
+ | SyntaxKind::Int
+ | SyntaxKind::Float
+ | SyntaxKind::Bool
+ | SyntaxKind::Numeric
+ | SyntaxKind::Str
+ | SyntaxKind::Label
+ | SyntaxKind::Raw => p.eat(),
+
+ _ => p.expected("expression"),
+ }
+}
+
+fn block(p: &mut Parser) {
+ match p.current() {
+ SyntaxKind::LeftBracket => content_block(p),
+ SyntaxKind::LeftBrace => code_block(p),
+ _ => p.expected("block"),
+ }
+}
+
+pub(super) fn reparse_block(text: &str, range: Range<usize>) -> Option<SyntaxNode> {
+ let mut p = Parser::new(text, range.start, LexMode::Code);
+ assert!(p.at(SyntaxKind::LeftBracket) || p.at(SyntaxKind::LeftBrace));
+ block(&mut p);
+ (p.balanced && p.prev_end() == range.end)
+ .then(|| p.finish().into_iter().next().unwrap())
+}
+
+fn code_block(p: &mut Parser) {
+ let m = p.marker();
+ p.enter(LexMode::Code);
+ p.stop_at_newline(false);
+ p.assert(SyntaxKind::LeftBrace);
+ code(p, |p| {
+ p.at(SyntaxKind::RightBrace)
+ || p.at(SyntaxKind::RightBracket)
+ || p.at(SyntaxKind::RightParen)
+ });
+ p.expect_closing_delimiter(m, SyntaxKind::RightBrace);
+ p.exit();
+ p.unstop();
+ p.wrap(m, SyntaxKind::CodeBlock);
+}
+
+fn content_block(p: &mut Parser) {
+ let m = p.marker();
+ p.enter(LexMode::Markup);
+ p.assert(SyntaxKind::LeftBracket);
+ markup(p, true, 0, |p| p.at(SyntaxKind::RightBracket));
+ p.expect_closing_delimiter(m, SyntaxKind::RightBracket);
+ p.exit();
+ p.wrap(m, SyntaxKind::ContentBlock);
+}
+
+fn with_paren(p: &mut Parser, allow_destructuring: bool) {
+ let m = p.marker();
+ let mut kind = collection(p, true);
+ if p.at(SyntaxKind::Arrow) {
+ validate_params_at(p, m);
+ p.wrap(m, SyntaxKind::Params);
+ p.assert(SyntaxKind::Arrow);
+ code_expr(p);
+ kind = SyntaxKind::Closure;
+ } else if p.at(SyntaxKind::Eq) && kind != SyntaxKind::Parenthesized {
+ // TODO: add warning if p.at(SyntaxKind::Eq) && kind == SyntaxKind::Parenthesized
+
+ validate_pattern_at(p, m, false);
+ p.wrap(m, SyntaxKind::Destructuring);
+ p.assert(SyntaxKind::Eq);
+ code_expr(p);
+ kind = SyntaxKind::DestructAssignment;
+ }
+
+ match kind {
+ SyntaxKind::Array if !allow_destructuring => validate_array_at(p, m),
+ SyntaxKind::Dict if !allow_destructuring => validate_dict_at(p, m),
+ SyntaxKind::Parenthesized if !allow_destructuring => {
+ validate_parenthesized_at(p, m)
+ }
+ SyntaxKind::Destructuring if !allow_destructuring => {
+ invalidate_destructuring(p, m)
+ }
+ _ => {}
+ }
+ p.wrap(m, kind);
+}
+
+fn invalidate_destructuring(p: &mut Parser, m: Marker) {
+ let mut collection_kind = Option::None;
+ for child in p.post_process(m) {
+ match child.kind() {
+ SyntaxKind::Named | SyntaxKind::Keyed => match collection_kind {
+ Some(SyntaxKind::Array) => child.convert_to_error(eco_format!(
+ "expected expression, found {}",
+ child.kind().name()
+ )),
+ _ => collection_kind = Some(SyntaxKind::Dict),
+ },
+ SyntaxKind::LeftParen | SyntaxKind::RightParen | SyntaxKind::Comma => {}
+ kind => match collection_kind {
+ Some(SyntaxKind::Dict) => child.convert_to_error(eco_format!(
+ "expected named or keyed pair, found {}",
+ kind.name()
+ )),
+ _ => collection_kind = Some(SyntaxKind::Array),
+ },
+ }
+ }
+}
+
+fn collection(p: &mut Parser, keyed: bool) -> SyntaxKind {
+ p.stop_at_newline(false);
+
+ let m = p.marker();
+ p.assert(SyntaxKind::LeftParen);
+
+ let mut count = 0;
+ let mut parenthesized = true;
+ let mut kind = None;
+ if keyed && p.eat_if(SyntaxKind::Colon) {
+ kind = Some(SyntaxKind::Dict);
+ parenthesized = false;
+ }
+
+ while !p.current().is_terminator() {
+ let prev = p.prev_end();
+ match item(p, keyed) {
+ SyntaxKind::Spread => parenthesized = false,
+ SyntaxKind::Named | SyntaxKind::Keyed => {
+ match kind {
+ Some(SyntaxKind::Array) => kind = Some(SyntaxKind::Destructuring),
+ _ => kind = Some(SyntaxKind::Dict),
+ }
+ parenthesized = false;
+ }
+ SyntaxKind::Int => match kind {
+ Some(SyntaxKind::Array) | None => kind = Some(SyntaxKind::Array),
+ Some(_) => kind = Some(SyntaxKind::Destructuring),
+ },
+ _ if kind.is_none() => kind = Some(SyntaxKind::Array),
+ _ => {}
+ }
+
+ if !p.progress(prev) {
+ p.unexpected();
+ continue;
+ }
+
+ count += 1;
+
+ if p.current().is_terminator() {
+ break;
+ }
+
+ if p.expect(SyntaxKind::Comma) {
+ parenthesized = false;
+ }
+ }
+
+ p.expect_closing_delimiter(m, SyntaxKind::RightParen);
+ p.unstop();
+
+ if parenthesized && count == 1 {
+ SyntaxKind::Parenthesized
+ } else {
+ kind.unwrap_or(SyntaxKind::Array)
+ }
+}
+
+fn item(p: &mut Parser, keyed: bool) -> SyntaxKind {
+ let m = p.marker();
+
+ if p.eat_if(SyntaxKind::Dots) {
+ if p.at(SyntaxKind::Comma) || p.at(SyntaxKind::RightParen) {
+ p.wrap(m, SyntaxKind::Spread);
+ return SyntaxKind::Spread;
+ }
+
+ code_expr(p);
+ p.wrap(m, SyntaxKind::Spread);
+ return SyntaxKind::Spread;
+ }
+
+ if p.at(SyntaxKind::Underscore) {
+ // This is a temporary workaround to fix `v.map(_ => {})`.
+ let mut lexer = p.lexer.clone();
+ let next =
+ std::iter::from_fn(|| Some(lexer.next())).find(|kind| !kind.is_trivia());
+ if next != Some(SyntaxKind::Arrow) {
+ p.eat();
+ return SyntaxKind::Underscore;
+ }
+ }
+
+ code_expr_or_pattern(p);
+
+ if !p.eat_if(SyntaxKind::Colon) {
+ return SyntaxKind::Int;
+ }
+
+ if !p.eat_if(SyntaxKind::Underscore) {
+ code_expr(p);
+ }
+
+ let kind = match p.node(m).map(SyntaxNode::kind) {
+ Some(SyntaxKind::Ident) => SyntaxKind::Named,
+ Some(SyntaxKind::Str) if keyed => SyntaxKind::Keyed,
+ _ => {
+ for child in p.post_process(m) {
+ if child.kind() == SyntaxKind::Colon {
+ break;
+ }
+
+ let mut message = EcoString::from("expected identifier");
+ if keyed {
+ message.push_str(" or string");
+ }
+ message.push_str(", found ");
+ message.push_str(child.kind().name());
+ child.convert_to_error(message);
+ }
+ SyntaxKind::Named
+ }
+ };
+
+ p.wrap(m, kind);
+ kind
+}
+
+fn args(p: &mut Parser) {
+ if !p.at(SyntaxKind::LeftParen) && !p.at(SyntaxKind::LeftBracket) {
+ p.expected("argument list");
+ }
+
+ let m = p.marker();
+ if p.at(SyntaxKind::LeftParen) {
+ collection(p, false);
+ validate_args_at(p, m);
+ }
+
+ while p.directly_at(SyntaxKind::LeftBracket) {
+ content_block(p);
+ }
+
+ p.wrap(m, SyntaxKind::Args);
+}
+
+enum PatternKind {
+ Ident,
+ Placeholder,
+ Destructuring,
+}
+
+fn pattern(p: &mut Parser) -> PatternKind {
+ let m = p.marker();
+ if p.at(SyntaxKind::LeftParen) {
+ let kind = collection(p, false);
+ validate_pattern_at(p, m, true);
+
+ if kind == SyntaxKind::Parenthesized {
+ PatternKind::Ident
+ } else {
+ p.wrap(m, SyntaxKind::Destructuring);
+ PatternKind::Destructuring
+ }
+ } else if p.eat_if(SyntaxKind::Underscore) {
+ PatternKind::Placeholder
+ } else {
+ p.expect(SyntaxKind::Ident);
+ PatternKind::Ident
+ }
+}
+
+fn let_binding(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Let);
+
+ let m2 = p.marker();
+ let mut closure = false;
+ let mut destructuring = false;
+ match pattern(p) {
+ PatternKind::Ident => {
+ closure = p.directly_at(SyntaxKind::LeftParen);
+ if closure {
+ let m3 = p.marker();
+ collection(p, false);
+ validate_params_at(p, m3);
+ p.wrap(m3, SyntaxKind::Params);
+ }
+ }
+ PatternKind::Placeholder => {}
+ PatternKind::Destructuring => destructuring = true,
+ }
+
+ let f = if closure || destructuring { Parser::expect } else { Parser::eat_if };
+ if f(p, SyntaxKind::Eq) {
+ code_expr(p);
+ }
+
+ if closure {
+ p.wrap(m2, SyntaxKind::Closure);
+ }
+
+ p.wrap(m, SyntaxKind::LetBinding);
+}
+
+fn set_rule(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Set);
+
+ let m2 = p.marker();
+ p.expect(SyntaxKind::Ident);
+ while p.eat_if(SyntaxKind::Dot) {
+ p.expect(SyntaxKind::Ident);
+ p.wrap(m2, SyntaxKind::FieldAccess);
+ }
+
+ args(p);
+ if p.eat_if(SyntaxKind::If) {
+ code_expr(p);
+ }
+ p.wrap(m, SyntaxKind::SetRule);
+}
+
+fn show_rule(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Show);
+ let m2 = p.before_trivia();
+
+ if !p.at(SyntaxKind::Colon) {
+ code_expr(p);
+ }
+
+ if p.eat_if(SyntaxKind::Colon) {
+ code_expr(p);
+ } else {
+ p.expected_at(m2, "colon");
+ }
+
+ p.wrap(m, SyntaxKind::ShowRule);
+}
+
+fn conditional(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::If);
+ code_expr(p);
+ block(p);
+ if p.eat_if(SyntaxKind::Else) {
+ if p.at(SyntaxKind::If) {
+ conditional(p);
+ } else {
+ block(p);
+ }
+ }
+ p.wrap(m, SyntaxKind::Conditional);
+}
+
+fn while_loop(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::While);
+ code_expr(p);
+ block(p);
+ p.wrap(m, SyntaxKind::WhileLoop);
+}
+
+fn for_loop(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::For);
+ pattern(p);
+ if p.at(SyntaxKind::Comma) {
+ p.expected("keyword `in`");
+ p.hint("did you mean to use a destructuring pattern?");
+ if !p.eat_if(SyntaxKind::Ident) {
+ p.eat_if(SyntaxKind::Underscore);
+ }
+ p.eat_if(SyntaxKind::In);
+ } else {
+ p.expect(SyntaxKind::In);
+ }
+ code_expr(p);
+ block(p);
+ p.wrap(m, SyntaxKind::ForLoop);
+}
+
+fn module_import(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Import);
+ code_expr(p);
+ if p.eat_if(SyntaxKind::Colon) && !p.eat_if(SyntaxKind::Star) {
+ import_items(p);
+ }
+ p.wrap(m, SyntaxKind::ModuleImport);
+}
+
+fn import_items(p: &mut Parser) {
+ let m = p.marker();
+ while !p.eof() && !p.at(SyntaxKind::Semicolon) {
+ if !p.eat_if(SyntaxKind::Ident) {
+ p.unexpected();
+ }
+ if p.current().is_terminator() {
+ break;
+ }
+ p.expect(SyntaxKind::Comma);
+ }
+ p.wrap(m, SyntaxKind::ImportItems);
+}
+
+fn module_include(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Include);
+ code_expr(p);
+ p.wrap(m, SyntaxKind::ModuleInclude);
+}
+
+fn break_stmt(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Break);
+ p.wrap(m, SyntaxKind::LoopBreak);
+}
+
+fn continue_stmt(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Continue);
+ p.wrap(m, SyntaxKind::LoopContinue);
+}
+
+fn return_stmt(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Return);
+ if !p.current().is_terminator() && !p.at(SyntaxKind::Comma) {
+ code_expr(p);
+ }
+ p.wrap(m, SyntaxKind::FuncReturn);
+}
+
+fn validate_parenthesized_at(p: &mut Parser, m: Marker) {
+ for child in p.post_process(m) {
+ let kind = child.kind();
+ match kind {
+ SyntaxKind::Array => validate_array(child.children_mut().iter_mut()),
+ SyntaxKind::Dict => validate_dict(child.children_mut().iter_mut()),
+ SyntaxKind::Underscore => {
+ child.convert_to_error(eco_format!(
+ "expected expression, found {}",
+ kind.name()
+ ));
+ }
+ _ => {}
+ }
+ }
+}
+
+fn validate_array_at(p: &mut Parser, m: Marker) {
+ validate_array(p.post_process(m))
+}
+
+fn validate_array<'a>(children: impl Iterator<Item = &'a mut SyntaxNode>) {
+ for child in children {
+ let kind = child.kind();
+ match kind {
+ SyntaxKind::Array => validate_array(child.children_mut().iter_mut()),
+ SyntaxKind::Dict => validate_dict(child.children_mut().iter_mut()),
+ SyntaxKind::Named | SyntaxKind::Keyed | SyntaxKind::Underscore => {
+ child.convert_to_error(eco_format!(
+ "expected expression, found {}",
+ kind.name()
+ ));
+ }
+ _ => {}
+ }
+ }
+}
+
+fn validate_dict_at(p: &mut Parser, m: Marker) {
+ validate_dict(p.post_process(m))
+}
+
+fn validate_dict<'a>(children: impl Iterator<Item = &'a mut SyntaxNode>) {
+ let mut used = HashSet::new();
+ for child in children {
+ match child.kind() {
+ SyntaxKind::Named | SyntaxKind::Keyed => {
+ let Some(first) = child.children_mut().first_mut() else { continue };
+ let key = match first.cast::<ast::Str>() {
+ Some(str) => str.get(),
+ None => first.text().clone(),
+ };
+
+ if !used.insert(key.clone()) {
+ first.convert_to_error(eco_format!("duplicate key: {key}"));
+ child.make_erroneous();
+ }
+ }
+ SyntaxKind::Spread => {}
+ SyntaxKind::LeftParen
+ | SyntaxKind::RightParen
+ | SyntaxKind::Comma
+ | SyntaxKind::Colon
+ | SyntaxKind::Space => {}
+ kind => {
+ child.convert_to_error(eco_format!(
+ "expected named or keyed pair, found {}",
+ kind.name()
+ ));
+ }
+ }
+ }
+}
+
+fn validate_params_at(p: &mut Parser, m: Marker) {
+ let mut used_spread = false;
+ let mut used = HashSet::new();
+ for child in p.post_process(m) {
+ match child.kind() {
+ SyntaxKind::Ident => {
+ if !used.insert(child.text().clone()) {
+ child.convert_to_error(eco_format!(
+ "duplicate parameter: {}",
+ child.text()
+ ));
+ }
+ }
+ SyntaxKind::Named => {
+ let Some(within) = child.children_mut().first_mut() else { return };
+ if !used.insert(within.text().clone()) {
+ within.convert_to_error(eco_format!(
+ "duplicate parameter: {}",
+ within.text()
+ ));
+ child.make_erroneous();
+ }
+ }
+ SyntaxKind::Spread => {
+ let Some(within) = child.children_mut().last_mut() else { continue };
+ if used_spread {
+ child.convert_to_error("only one argument sink is allowed");
+ continue;
+ }
+ used_spread = true;
+ if within.kind() == SyntaxKind::Dots {
+ continue;
+ } else if within.kind() != SyntaxKind::Ident {
+ within.convert_to_error(eco_format!(
+ "expected identifier, found {}",
+ within.kind().name(),
+ ));
+ child.make_erroneous();
+ continue;
+ }
+ if !used.insert(within.text().clone()) {
+ within.convert_to_error(eco_format!(
+ "duplicate parameter: {}",
+ within.text()
+ ));
+ child.make_erroneous();
+ }
+ }
+ SyntaxKind::Array | SyntaxKind::Dict | SyntaxKind::Destructuring => {
+ validate_pattern(child.children_mut().iter_mut(), &mut used, false);
+ child.convert_to_kind(SyntaxKind::Destructuring);
+ }
+ SyntaxKind::LeftParen
+ | SyntaxKind::RightParen
+ | SyntaxKind::Comma
+ | SyntaxKind::Underscore => {}
+ kind => {
+ child.convert_to_error(eco_format!(
+ "expected identifier, named pair or argument sink, found {}",
+ kind.name()
+ ));
+ }
+ }
+ }
+}
+
+fn validate_args_at(p: &mut Parser, m: Marker) {
+ let mut used = HashSet::new();
+ for child in p.post_process(m) {
+ if child.kind() == SyntaxKind::Named {
+ let Some(within) = child.children_mut().first_mut() else { return };
+ if !used.insert(within.text().clone()) {
+ within.convert_to_error(eco_format!(
+ "duplicate argument: {}",
+ within.text()
+ ));
+ child.make_erroneous();
+ }
+ } else if child.kind() == SyntaxKind::Underscore {
+ child.convert_to_error("unexpected underscore");
+ }
+ }
+}
+
+fn validate_pattern_at(p: &mut Parser, m: Marker, forbid_expressions: bool) {
+ let mut used = HashSet::new();
+ validate_pattern(p.post_process(m), &mut used, forbid_expressions);
+}
+
+fn validate_pattern<'a>(
+ children: impl Iterator<Item = &'a mut SyntaxNode>,
+ used: &mut HashSet<EcoString>,
+ forbid_expressions: bool,
+) {
+ let mut used_spread = false;
+ for child in children {
+ match child.kind() {
+ SyntaxKind::Ident => {
+ if !used.insert(child.text().clone()) {
+ child.convert_to_error(
+ "at most one binding per identifier is allowed",
+ );
+ }
+ }
+ SyntaxKind::Spread => {
+ let Some(within) = child.children_mut().last_mut() else { continue };
+ if used_spread {
+ child.convert_to_error("at most one destructuring sink is allowed");
+ continue;
+ }
+ used_spread = true;
+
+ if within.kind() == SyntaxKind::Dots {
+ continue;
+ } else if forbid_expressions && within.kind() != SyntaxKind::Ident {
+ within.convert_to_error(eco_format!(
+ "expected identifier, found {}",
+ within.kind().name(),
+ ));
+ child.make_erroneous();
+ continue;
+ }
+
+ if !used.insert(within.text().clone()) {
+ within.convert_to_error(
+ "at most one binding per identifier is allowed",
+ );
+ child.make_erroneous();
+ }
+ }
+ SyntaxKind::Named => {
+ let Some(within) = child.children_mut().first_mut() else { return };
+ if !used.insert(within.text().clone()) {
+ within.convert_to_error(
+ "at most one binding per identifier is allowed",
+ );
+ child.make_erroneous();
+ }
+
+ if forbid_expressions {
+ let Some(within) = child.children_mut().last_mut() else { return };
+ if within.kind() != SyntaxKind::Ident
+ && within.kind() != SyntaxKind::Underscore
+ {
+ within.convert_to_error(eco_format!(
+ "expected identifier, found {}",
+ within.kind().name(),
+ ));
+ child.make_erroneous();
+ }
+ }
+ }
+ SyntaxKind::LeftParen
+ | SyntaxKind::RightParen
+ | SyntaxKind::Comma
+ | SyntaxKind::Underscore => {}
+ kind => {
+ if forbid_expressions {
+ child.convert_to_error(eco_format!(
+ "expected identifier or destructuring sink, found {}",
+ kind.name()
+ ));
+ }
+ }
+ }
+ }
+}
+
+/// Manages parsing of a stream of tokens.
+struct Parser<'s> {
+ text: &'s str,
+ lexer: Lexer<'s>,
+ prev_end: usize,
+ current_start: usize,
+ current: SyntaxKind,
+ modes: Vec<LexMode>,
+ nodes: Vec<SyntaxNode>,
+ stop_at_newline: Vec<bool>,
+ balanced: bool,
+}
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+struct Marker(usize);
+
+impl<'s> Parser<'s> {
+ fn new(text: &'s str, offset: usize, mode: LexMode) -> Self {
+ let mut lexer = Lexer::new(text, mode);
+ lexer.jump(offset);
+ let current = lexer.next();
+ Self {
+ lexer,
+ text,
+ prev_end: offset,
+ current_start: offset,
+ current,
+ modes: vec![],
+ nodes: vec![],
+ stop_at_newline: vec![],
+ balanced: true,
+ }
+ }
+
+ fn finish(self) -> Vec<SyntaxNode> {
+ self.nodes
+ }
+
+ fn prev_end(&self) -> usize {
+ self.prev_end
+ }
+
+ fn current(&self) -> SyntaxKind {
+ self.current
+ }
+
+ fn current_start(&self) -> usize {
+ self.current_start
+ }
+
+ fn current_end(&self) -> usize {
+ self.lexer.cursor()
+ }
+
+ fn current_text(&self) -> &'s str {
+ &self.text[self.current_start..self.current_end()]
+ }
+
+ fn at(&self, kind: SyntaxKind) -> bool {
+ self.current == kind
+ }
+
+ #[track_caller]
+ fn assert(&mut self, kind: SyntaxKind) {
+ assert_eq!(self.current, kind);
+ self.eat();
+ }
+
+ fn eof(&self) -> bool {
+ self.at(SyntaxKind::Eof)
+ }
+
+ fn directly_at(&self, kind: SyntaxKind) -> bool {
+ self.current == kind && self.prev_end == self.current_start
+ }
+
+ /// Eats if at `kind`.
+ ///
+ /// Note: In math and code mode, this will ignore trivia in front of the
+ /// `kind`, To forbid skipping trivia, consider using `eat_if_direct`.
+ fn eat_if(&mut self, kind: SyntaxKind) -> bool {
+ let at = self.at(kind);
+ if at {
+ self.eat();
+ }
+ at
+ }
+
+ /// Eats only if currently at the start of `kind`.
+ fn eat_if_direct(&mut self, kind: SyntaxKind) -> bool {
+ let at = self.directly_at(kind);
+ if at {
+ self.eat();
+ }
+ at
+ }
+
+ fn convert(&mut self, kind: SyntaxKind) {
+ self.current = kind;
+ self.eat();
+ }
+
+ fn newline(&mut self) -> bool {
+ self.lexer.newline()
+ }
+
+ fn column(&self, at: usize) -> usize {
+ self.text[..at].chars().rev().take_while(|&c| !is_newline(c)).count()
+ }
+
+ fn marker(&self) -> Marker {
+ Marker(self.nodes.len())
+ }
+
+ fn node(&self, m: Marker) -> Option<&SyntaxNode> {
+ self.nodes.get(m.0)
+ }
+
+ fn node_mut(&mut self, m: Marker) -> Option<&mut SyntaxNode> {
+ self.nodes.get_mut(m.0)
+ }
+
+ fn post_process(&mut self, m: Marker) -> impl Iterator<Item = &mut SyntaxNode> {
+ self.nodes[m.0..]
+ .iter_mut()
+ .filter(|child| !child.kind().is_error() && !child.kind().is_trivia())
+ }
+
+ fn wrap(&mut self, from: Marker, kind: SyntaxKind) {
+ self.wrap_within(from, self.before_trivia(), kind);
+ }
+
+ fn wrap_all(&mut self, from: Marker, kind: SyntaxKind) {
+ self.wrap_within(from, Marker(self.nodes.len()), kind)
+ }
+
+ fn wrap_within(&mut self, from: Marker, to: Marker, kind: SyntaxKind) {
+ let len = self.nodes.len();
+ let to = to.0.min(len);
+ let from = from.0.min(to);
+ let children = self.nodes.drain(from..to).collect();
+ self.nodes.insert(from, SyntaxNode::inner(kind, children));
+ }
+
+ fn progress(&self, offset: usize) -> bool {
+ offset < self.prev_end
+ }
+
+ fn enter(&mut self, mode: LexMode) {
+ self.modes.push(self.lexer.mode());
+ self.lexer.set_mode(mode);
+ }
+
+ fn exit(&mut self) {
+ let mode = self.modes.pop().unwrap();
+ if mode != self.lexer.mode() {
+ self.unskip();
+ self.lexer.set_mode(mode);
+ self.lexer.jump(self.current_start);
+ self.lex();
+ self.skip();
+ }
+ }
+
+ fn stop_at_newline(&mut self, stop: bool) {
+ self.stop_at_newline.push(stop);
+ }
+
+ fn unstop(&mut self) {
+ self.unskip();
+ self.stop_at_newline.pop();
+ self.lexer.jump(self.prev_end);
+ self.lex();
+ self.skip();
+ }
+
+ fn eat(&mut self) {
+ self.save();
+ self.lex();
+ self.skip();
+ }
+
+ fn skip(&mut self) {
+ if self.lexer.mode() != LexMode::Markup {
+ while self.current.is_trivia() {
+ self.save();
+ self.lex();
+ }
+ }
+ }
+
+ fn unskip(&mut self) {
+ if self.lexer.mode() != LexMode::Markup && self.prev_end != self.current_start {
+ while self.nodes.last().map_or(false, |last| last.kind().is_trivia()) {
+ self.nodes.pop();
+ }
+
+ self.lexer.jump(self.prev_end);
+ self.lex();
+ }
+ }
+
+ fn save(&mut self) {
+ let text = self.current_text();
+ if self.at(SyntaxKind::Error) {
+ let message = self.lexer.take_error().unwrap();
+ self.nodes.push(SyntaxNode::error(message, text));
+ } else {
+ self.nodes.push(SyntaxNode::leaf(self.current, text));
+ }
+
+ if self.lexer.mode() == LexMode::Markup || !self.current.is_trivia() {
+ self.prev_end = self.current_end();
+ }
+ }
+
+ fn lex(&mut self) {
+ self.current_start = self.lexer.cursor();
+ self.current = self.lexer.next();
+ if self.lexer.mode() == LexMode::Code
+ && self.lexer.newline()
+ && self.stop_at_newline.last().copied().unwrap_or(false)
+ && !matches!(self.lexer.clone().next(), SyntaxKind::Else | SyntaxKind::Dot)
+ {
+ self.current = SyntaxKind::Eof;
+ }
+ }
+}
+
+impl<'s> Parser<'s> {
+ /// Consume the given syntax `kind` or produce an error.
+ fn expect(&mut self, kind: SyntaxKind) -> bool {
+ let at = self.at(kind);
+ if at {
+ self.eat();
+ } else if kind == SyntaxKind::Ident && self.current.is_keyword() {
+ self.expected_found(kind.name(), self.current.name());
+ } else {
+ self.balanced &= !kind.is_grouping();
+ self.expected(kind.name());
+ }
+ at
+ }
+
+ /// Produce an error that the given `thing` was expected.
+ fn expected(&mut self, thing: &str) {
+ if !self.after_error() {
+ self.expected_at(self.before_trivia(), thing);
+ }
+ }
+
+ /// Produce an error that the given `thing` was expected but another
+ /// thing was `found` and consume the next token.
+ fn expected_found(&mut self, thing: &str, found: &str) {
+ self.trim_errors();
+ self.convert_to_error(eco_format!("expected {thing}, found {found}"));
+ }
+
+ /// Produce an error that the given `thing` was expected at the position
+ /// of the marker `m`.
+ fn expected_at(&mut self, m: Marker, thing: &str) {
+ let message = eco_format!("expected {}", thing);
+ let error = SyntaxNode::error(message, "");
+ self.nodes.insert(m.0, error);
+ }
+
+ /// Produce an error for the unclosed delimiter `kind` at the position
+ /// `open`.
+ fn expect_closing_delimiter(&mut self, open: Marker, kind: SyntaxKind) {
+ if !self.eat_if(kind) {
+ self.nodes[open.0].convert_to_error("unclosed delimiter");
+ }
+ }
+
+ /// Consume the next token (if any) and produce an error stating that it was
+ /// unexpected.
+ fn unexpected(&mut self) {
+ self.trim_errors();
+ self.convert_to_error(eco_format!("unexpected {}", self.current.name()));
+ }
+
+ /// Consume the next token and turn it into an error.
+ fn convert_to_error(&mut self, message: EcoString) {
+ let kind = self.current;
+ let offset = self.nodes.len();
+ self.eat();
+ self.balanced &= !kind.is_grouping();
+ if !kind.is_error() {
+ self.nodes[offset].convert_to_error(message);
+ }
+ }
+
+ /// Adds a hint to the last node, if the last node is an error.
+ fn hint(&mut self, hint: impl Into<EcoString>) {
+ let m = self.before_trivia();
+ if m.0 > 0 {
+ self.nodes[m.0 - 1].hint(hint);
+ }
+ }
+
+ /// Get a marker after the last non-trivia node.
+ fn before_trivia(&self) -> Marker {
+ let mut i = self.nodes.len();
+ if self.lexer.mode() != LexMode::Markup && self.prev_end != self.current_start {
+ while i > 0 && self.nodes[i - 1].kind().is_trivia() {
+ i -= 1;
+ }
+ }
+ Marker(i)
+ }
+
+ /// Whether the last non-trivia node is an error.
+ fn after_error(&mut self) -> bool {
+ let m = self.before_trivia();
+ m.0 > 0 && self.nodes[m.0 - 1].kind().is_error()
+ }
+
+ /// Remove trailing errors with zero length.
+ fn trim_errors(&mut self) {
+ let Marker(end) = self.before_trivia();
+ let mut start = end;
+ while start > 0
+ && self.nodes[start - 1].kind().is_error()
+ && self.nodes[start - 1].is_empty()
+ {
+ start -= 1;
+ }
+ self.nodes.drain(start..end);
+ }
+}
diff --git a/crates/typst-syntax/src/reparser.rs b/crates/typst-syntax/src/reparser.rs
new file mode 100644
index 00000000..a4186fa7
--- /dev/null
+++ b/crates/typst-syntax/src/reparser.rs
@@ -0,0 +1,322 @@
+use std::ops::Range;
+
+use super::{
+ is_newline, parse, reparse_block, reparse_markup, Span, SyntaxKind, SyntaxNode,
+};
+
+/// Refresh the given syntax node with as little parsing as possible.
+///
+/// Takes the new text, the range in the old text that was replaced and the
+/// length of the replacement and returns the range in the new text that was
+/// ultimately reparsed.
+///
+/// The high-level API for this function is
+/// [`Source::edit`](super::Source::edit).
+pub fn reparse(
+ root: &mut SyntaxNode,
+ text: &str,
+ replaced: Range<usize>,
+ replacement_len: usize,
+) -> Range<usize> {
+ try_reparse(text, replaced, replacement_len, None, root, 0).unwrap_or_else(|| {
+ let id = root.span().id();
+ *root = parse(text);
+ root.numberize(id, Span::FULL).unwrap();
+ 0..text.len()
+ })
+}
+
+/// Try to reparse inside the given node.
+fn try_reparse(
+ text: &str,
+ replaced: Range<usize>,
+ replacement_len: usize,
+ parent_kind: Option<SyntaxKind>,
+ node: &mut SyntaxNode,
+ offset: usize,
+) -> Option<Range<usize>> {
+ // The range of children which overlap with the edit.
+ #[allow(clippy::reversed_empty_ranges)]
+ let mut overlap = usize::MAX..0;
+ let mut cursor = offset;
+ let node_kind = node.kind();
+
+ for (i, child) in node.children_mut().iter_mut().enumerate() {
+ let prev_range = cursor..cursor + child.len();
+ let prev_len = child.len();
+ let prev_desc = child.descendants();
+
+ // Does the child surround the edit?
+ // If so, try to reparse within it or itself.
+ if !child.is_leaf() && includes(&prev_range, &replaced) {
+ let new_len = prev_len + replacement_len - replaced.len();
+ let new_range = cursor..cursor + new_len;
+
+ // Try to reparse within the child.
+ if let Some(range) = try_reparse(
+ text,
+ replaced.clone(),
+ replacement_len,
+ Some(node_kind),
+ child,
+ cursor,
+ ) {
+ assert_eq!(child.len(), new_len);
+ let new_desc = child.descendants();
+ node.update_parent(prev_len, new_len, prev_desc, new_desc);
+ return Some(range);
+ }
+
+ // If the child is a block, try to reparse the block.
+ if child.kind().is_block() {
+ if let Some(newborn) = reparse_block(text, new_range.clone()) {
+ return node
+ .replace_children(i..i + 1, vec![newborn])
+ .is_ok()
+ .then_some(new_range);
+ }
+ }
+ }
+
+ // Does the child overlap with the edit?
+ if overlaps(&prev_range, &replaced) {
+ overlap.start = overlap.start.min(i);
+ overlap.end = i + 1;
+ }
+
+ // Is the child beyond the edit?
+ if replaced.end < cursor {
+ break;
+ }
+
+ cursor += child.len();
+ }
+
+ // Try to reparse a range of markup expressions within markup. This is only
+ // possible if the markup is top-level or contained in a block, not if it is
+ // contained in things like headings or lists because too much can go wrong
+ // with indent and line breaks.
+ if overlap.is_empty()
+ || node.kind() != SyntaxKind::Markup
+ || !matches!(parent_kind, None | Some(SyntaxKind::ContentBlock))
+ {
+ return None;
+ }
+
+ let children = node.children_mut();
+
+ // Reparse a segment. Retries until it works, taking exponentially more
+ // children into account.
+ let mut expansion = 1;
+ loop {
+ // Add slack in both directions.
+ let mut start = overlap.start.saturating_sub(expansion.max(2));
+ let mut end = (overlap.end + expansion).min(children.len());
+
+ // Expand to the left.
+ while start > 0 && expand(&children[start]) {
+ start -= 1;
+ }
+
+ // Expand to the right.
+ while end < children.len() && expand(&children[end]) {
+ end += 1;
+ }
+
+ // Also take hashtag.
+ if start > 0 && children[start - 1].kind() == SyntaxKind::Hashtag {
+ start -= 1;
+ }
+
+ // Synthesize what `at_start` and `nesting` would be at the start of the
+ // reparse.
+ let mut prefix_len = 0;
+ let mut nesting = 0;
+ let mut at_start = true;
+ for child in &children[..start] {
+ prefix_len += child.len();
+ next_at_start(child, &mut at_start);
+ next_nesting(child, &mut nesting);
+ }
+
+ // Determine what `at_start` will have to be at the end of the reparse.
+ let mut prev_len = 0;
+ let mut prev_at_start_after = at_start;
+ let mut prev_nesting_after = nesting;
+ for child in &children[start..end] {
+ prev_len += child.len();
+ next_at_start(child, &mut prev_at_start_after);
+ next_nesting(child, &mut prev_nesting_after);
+ }
+
+ // Determine the range in the new text that we want to reparse.
+ let shifted = offset + prefix_len;
+ let new_len = prev_len + replacement_len - replaced.len();
+ let new_range = shifted..shifted + new_len;
+ let at_end = end == children.len();
+
+ // Stop parsing early if this kind is encountered.
+ let stop_kind = match parent_kind {
+ Some(_) => SyntaxKind::RightBracket,
+ None => SyntaxKind::Eof,
+ };
+
+ // Reparse!
+ let reparsed = reparse_markup(
+ text,
+ new_range.clone(),
+ &mut at_start,
+ &mut nesting,
+ |kind| kind == stop_kind,
+ );
+
+ if let Some(newborns) = reparsed {
+ // If more children follow, at_start must match its previous value.
+ // Similarly, if we children follow or we not top-level the nesting
+ // must match its previous value.
+ if (at_end || at_start == prev_at_start_after)
+ && ((at_end && parent_kind.is_none()) || nesting == prev_nesting_after)
+ {
+ return node
+ .replace_children(start..end, newborns)
+ .is_ok()
+ .then_some(new_range);
+ }
+ }
+
+ // If it didn't even work with all children, we give up.
+ if start == 0 && at_end {
+ break;
+ }
+
+ // Exponential expansion to both sides.
+ expansion *= 2;
+ }
+
+ None
+}
+
+/// Whether the inner range is fully contained in the outer one (no touching).
+fn includes(outer: &Range<usize>, inner: &Range<usize>) -> bool {
+ outer.start < inner.start && outer.end > inner.end
+}
+
+/// Whether the first and second range overlap or touch.
+fn overlaps(first: &Range<usize>, second: &Range<usize>) -> bool {
+ (first.start <= second.start && second.start <= first.end)
+ || (second.start <= first.start && first.start <= second.end)
+}
+
+/// Whether the selection should be expanded beyond a node of this kind.
+fn expand(node: &SyntaxNode) -> bool {
+ let kind = node.kind();
+ kind.is_trivia()
+ || kind.is_error()
+ || kind == SyntaxKind::Semicolon
+ || node.text() == "/"
+ || node.text() == ":"
+}
+
+/// Whether `at_start` would still be true after this node given the
+/// previous value of the property.
+fn next_at_start(node: &SyntaxNode, at_start: &mut bool) {
+ let kind = node.kind();
+ if kind.is_trivia() {
+ *at_start |= kind == SyntaxKind::Parbreak
+ || (kind == SyntaxKind::Space && node.text().chars().any(is_newline));
+ } else {
+ *at_start = false;
+ }
+}
+
+/// Update `nesting` based on the node.
+fn next_nesting(node: &SyntaxNode, nesting: &mut usize) {
+ if node.kind() == SyntaxKind::Text {
+ match node.text().as_str() {
+ "[" => *nesting += 1,
+ "]" if *nesting > 0 => *nesting -= 1,
+ _ => {}
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use std::ops::Range;
+
+ use super::super::{parse, Source, Span};
+
+ #[track_caller]
+ fn test(prev: &str, range: Range<usize>, with: &str, incremental: bool) {
+ let mut source = Source::detached(prev);
+ let prev = source.root().clone();
+ let range = source.edit(range, with);
+ let mut found = source.root().clone();
+ let mut expected = parse(source.text());
+ found.synthesize(Span::detached());
+ expected.synthesize(Span::detached());
+ if found != expected {
+ eprintln!("source: {:?}", source.text());
+ eprintln!("previous: {prev:#?}");
+ eprintln!("expected: {expected:#?}");
+ eprintln!("found: {found:#?}");
+ panic!("test failed");
+ }
+ if incremental {
+ assert_ne!(source.len_bytes(), range.len(), "should have been incremental");
+ } else {
+ assert_eq!(
+ source.len_bytes(),
+ range.len(),
+ "shouldn't have been incremental"
+ );
+ }
+ }
+
+ #[test]
+ fn test_reparse_markup() {
+ test("abc~def~gh~", 5..6, "+", true);
+ test("~~~~~~~", 3..4, "A", true);
+ test("abc~~", 1..2, "", true);
+ test("#var. hello", 5..6, " ", false);
+ test("#var;hello", 9..10, "a", false);
+ test("https:/world", 7..7, "/", false);
+ test("hello world", 7..12, "walkers", false);
+ test("some content", 0..12, "", false);
+ test("", 0..0, "do it", false);
+ test("a d e", 1..3, " b c d", false);
+ test("~*~*~", 2..2, "*", false);
+ test("::1\n2. a\n3", 7..7, "4", true);
+ test("* #{1+2} *", 6..7, "3", true);
+ test("#{(0, 1, 2)}", 6..7, "11pt", true);
+ test("\n= A heading", 4..4, "n evocative", false);
+ test("#call() abc~d", 7..7, "[]", true);
+ test("a your thing a", 6..7, "a", false);
+ test("#grid(columns: (auto, 1fr, 40%))", 16..20, "4pt", false);
+ test("abc\n= a heading\njoke", 3..4, "\nmore\n\n", true);
+ test("#show f: a => b..", 16..16, "c", false);
+ test("#for", 4..4, "//", false);
+ test("a\n#let \nb", 7..7, "i", true);
+ test(r"#{{let x = z}; a = 1} b", 7..7, "//", false);
+ test(r#"a ```typst hello```"#, 16..17, "", false);
+ }
+
+ #[test]
+ fn test_reparse_block() {
+ test("Hello #{ x + 1 }!", 9..10, "abc", true);
+ test("A#{}!", 3..3, "\"", false);
+ test("#{ [= x] }!", 5..5, "=", true);
+ test("#[[]]", 3..3, "\\", true);
+ test("#[[ab]]", 4..5, "\\", true);
+ test("#{}}", 2..2, "{", false);
+ test("A: #[BC]", 6..6, "{", true);
+ test("A: #[BC]", 6..6, "#{", true);
+ test("A: #[BC]", 6..6, "#{}", true);
+ test("#{\"ab\"}A", 5..5, "c", true);
+ test("#{\"ab\"}A", 5..6, "c", false);
+ test("a#[]b", 3..3, "#{", true);
+ test("a#{call(); abc}b", 8..8, "[]", true);
+ test("a #while x {\n g(x) \n} b", 12..12, "//", true);
+ test("a#[]b", 3..3, "[hey]", true);
+ }
+}
diff --git a/crates/typst-syntax/src/source.rs b/crates/typst-syntax/src/source.rs
new file mode 100644
index 00000000..25b3b86c
--- /dev/null
+++ b/crates/typst-syntax/src/source.rs
@@ -0,0 +1,423 @@
+//! Source file management.
+
+use std::fmt::{self, Debug, Formatter};
+use std::hash::{Hash, Hasher};
+use std::ops::Range;
+use std::sync::Arc;
+
+use comemo::Prehashed;
+
+use super::reparser::reparse;
+use super::{is_newline, parse, FileId, LinkedNode, Span, SyntaxNode};
+
+/// A source file.
+///
+/// All line and column indices start at zero, just like byte indices. Only for
+/// user-facing display, you should add 1 to them.
+///
+/// Values of this type are cheap to clone and hash.
+#[derive(Clone)]
+pub struct Source(Arc<Repr>);
+
+/// The internal representation.
+#[derive(Clone)]
+struct Repr {
+ id: FileId,
+ text: Prehashed<String>,
+ root: Prehashed<SyntaxNode>,
+ lines: Vec<Line>,
+}
+
+impl Source {
+ /// Create a new source file.
+ #[tracing::instrument(skip_all)]
+ pub fn new(id: FileId, text: String) -> Self {
+ let mut root = parse(&text);
+ root.numberize(id, Span::FULL).unwrap();
+ Self(Arc::new(Repr {
+ id,
+ lines: lines(&text),
+ text: Prehashed::new(text),
+ root: Prehashed::new(root),
+ }))
+ }
+
+ /// Create a source file without a real id and path, usually for testing.
+ pub fn detached(text: impl Into<String>) -> Self {
+ Self::new(FileId::detached(), text.into())
+ }
+
+ /// Create a source file with the same synthetic span for all nodes.
+ pub fn synthesized(text: String, span: Span) -> Self {
+ let mut root = parse(&text);
+ root.synthesize(span);
+ Self(Arc::new(Repr {
+ id: FileId::detached(),
+ lines: lines(&text),
+ text: Prehashed::new(text),
+ root: Prehashed::new(root),
+ }))
+ }
+
+ /// The root node of the file's untyped syntax tree.
+ pub fn root(&self) -> &SyntaxNode {
+ &self.0.root
+ }
+
+ /// The id of the source file.
+ pub fn id(&self) -> FileId {
+ self.0.id
+ }
+
+ /// The whole source as a string slice.
+ pub fn text(&self) -> &str {
+ &self.0.text
+ }
+
+ /// Slice out the part of the source code enclosed by the range.
+ pub fn get(&self, range: Range<usize>) -> Option<&str> {
+ self.text().get(range)
+ }
+
+ /// Fully replace the source text.
+ pub fn replace(&mut self, text: String) {
+ let inner = Arc::make_mut(&mut self.0);
+ inner.text = Prehashed::new(text);
+ inner.lines = lines(&inner.text);
+ let mut root = parse(&inner.text);
+ root.numberize(inner.id, Span::FULL).unwrap();
+ inner.root = Prehashed::new(root);
+ }
+
+ /// Edit the source file by replacing the given range.
+ ///
+ /// Returns the range in the new source that was ultimately reparsed.
+ ///
+ /// The method panics if the `replace` range is out of bounds.
+ #[track_caller]
+ pub fn edit(&mut self, replace: Range<usize>, with: &str) -> Range<usize> {
+ let start_byte = replace.start;
+ let start_utf16 = self.byte_to_utf16(start_byte).unwrap();
+ let line = self.byte_to_line(start_byte).unwrap();
+
+ let inner = Arc::make_mut(&mut self.0);
+
+ // Update the text itself.
+ inner.text.update(|text| text.replace_range(replace.clone(), with));
+
+ // Remove invalidated line starts.
+ inner.lines.truncate(line + 1);
+
+ // Handle adjoining of \r and \n.
+ if inner.text[..start_byte].ends_with('\r') && with.starts_with('\n') {
+ inner.lines.pop();
+ }
+
+ // Recalculate the line starts after the edit.
+ inner.lines.extend(lines_from(
+ start_byte,
+ start_utf16,
+ &inner.text[start_byte..],
+ ));
+
+ // Incrementally reparse the replaced range.
+ inner
+ .root
+ .update(|root| reparse(root, &inner.text, replace, with.len()))
+ }
+
+ /// Get the length of the file in UTF-8 encoded bytes.
+ pub fn len_bytes(&self) -> usize {
+ self.text().len()
+ }
+
+ /// Get the length of the file in UTF-16 code units.
+ pub fn len_utf16(&self) -> usize {
+ let last = self.0.lines.last().unwrap();
+ last.utf16_idx + len_utf16(&self.0.text[last.byte_idx..])
+ }
+
+ /// Get the length of the file in lines.
+ pub fn len_lines(&self) -> usize {
+ self.0.lines.len()
+ }
+
+ /// Find the node with the given span.
+ ///
+ /// Returns `None` if the span does not point into this source file.
+ pub fn find(&self, span: Span) -> Option<LinkedNode<'_>> {
+ LinkedNode::new(self.root()).find(span)
+ }
+
+ /// Get the byte range for the given span in this file.
+ ///
+ /// Panics if the span does not point into this source file.
+ #[track_caller]
+ pub fn range(&self, span: Span) -> Range<usize> {
+ self.find(span)
+ .expect("span does not point into this source file")
+ .range()
+ }
+
+ /// Return the index of the UTF-16 code unit at the byte index.
+ pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> {
+ let line_idx = self.byte_to_line(byte_idx)?;
+ let line = self.0.lines.get(line_idx)?;
+ let head = self.0.text.get(line.byte_idx..byte_idx)?;
+ Some(line.utf16_idx + len_utf16(head))
+ }
+
+ /// Return the index of the line that contains the given byte index.
+ pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> {
+ (byte_idx <= self.0.text.len()).then(|| {
+ match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) {
+ Ok(i) => i,
+ Err(i) => i - 1,
+ }
+ })
+ }
+
+ /// Return the index of the column at the byte index.
+ ///
+ /// The column is defined as the number of characters in the line before the
+ /// byte index.
+ pub fn byte_to_column(&self, byte_idx: usize) -> Option<usize> {
+ let line = self.byte_to_line(byte_idx)?;
+ let start = self.line_to_byte(line)?;
+ let head = self.get(start..byte_idx)?;
+ Some(head.chars().count())
+ }
+
+ /// Return the byte index at the UTF-16 code unit.
+ pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> {
+ let line = self.0.lines.get(
+ match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) {
+ Ok(i) => i,
+ Err(i) => i - 1,
+ },
+ )?;
+
+ let mut k = line.utf16_idx;
+ for (i, c) in self.0.text[line.byte_idx..].char_indices() {
+ if k >= utf16_idx {
+ return Some(line.byte_idx + i);
+ }
+ k += c.len_utf16();
+ }
+
+ (k == utf16_idx).then_some(self.0.text.len())
+ }
+
+ /// Return the byte position at which the given line starts.
+ pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> {
+ self.0.lines.get(line_idx).map(|line| line.byte_idx)
+ }
+
+ /// Return the range which encloses the given line.
+ pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> {
+ let start = self.line_to_byte(line_idx)?;
+ let end = self.line_to_byte(line_idx + 1).unwrap_or(self.0.text.len());
+ Some(start..end)
+ }
+
+ /// Return the byte index of the given (line, column) pair.
+ ///
+ /// The column defines the number of characters to go beyond the start of
+ /// the line.
+ pub fn line_column_to_byte(
+ &self,
+ line_idx: usize,
+ column_idx: usize,
+ ) -> Option<usize> {
+ let range = self.line_to_range(line_idx)?;
+ let line = self.get(range.clone())?;
+ let mut chars = line.chars();
+ for _ in 0..column_idx {
+ chars.next();
+ }
+ Some(range.start + (line.len() - chars.as_str().len()))
+ }
+}
+
+impl Debug for Source {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ write!(f, "Source({})", self.id().path().display())
+ }
+}
+
+impl Hash for Source {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ self.0.id.hash(state);
+ self.0.text.hash(state);
+ self.0.root.hash(state);
+ }
+}
+
+impl AsRef<str> for Source {
+ fn as_ref(&self) -> &str {
+ self.text()
+ }
+}
+
+/// Metadata about a line.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+struct Line {
+ /// The UTF-8 byte offset where the line starts.
+ byte_idx: usize,
+ /// The UTF-16 codepoint offset where the line starts.
+ utf16_idx: usize,
+}
+
+/// Create a line vector.
+fn lines(text: &str) -> Vec<Line> {
+ std::iter::once(Line { byte_idx: 0, utf16_idx: 0 })
+ .chain(lines_from(0, 0, text))
+ .collect()
+}
+
+/// Compute a line iterator from an offset.
+fn lines_from(
+ byte_offset: usize,
+ utf16_offset: usize,
+ text: &str,
+) -> impl Iterator<Item = Line> + '_ {
+ let mut s = unscanny::Scanner::new(text);
+ let mut utf16_idx = utf16_offset;
+
+ std::iter::from_fn(move || {
+ s.eat_until(|c: char| {
+ utf16_idx += c.len_utf16();
+ is_newline(c)
+ });
+
+ if s.done() {
+ return None;
+ }
+
+ if s.eat() == Some('\r') && s.eat_if('\n') {
+ utf16_idx += 1;
+ }
+
+ Some(Line { byte_idx: byte_offset + s.cursor(), utf16_idx })
+ })
+}
+
+/// The number of code units this string would use if it was encoded in
+/// UTF16. This runs in linear time.
+fn len_utf16(string: &str) -> usize {
+ string.chars().map(char::len_utf16).sum()
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
+
+ #[test]
+ fn test_source_file_new() {
+ let source = Source::detached(TEST);
+ assert_eq!(
+ source.0.lines,
+ [
+ Line { byte_idx: 0, utf16_idx: 0 },
+ Line { byte_idx: 7, utf16_idx: 6 },
+ Line { byte_idx: 15, utf16_idx: 12 },
+ Line { byte_idx: 18, utf16_idx: 15 },
+ ]
+ );
+ }
+
+ #[test]
+ fn test_source_file_pos_to_line() {
+ let source = Source::detached(TEST);
+ assert_eq!(source.byte_to_line(0), Some(0));
+ assert_eq!(source.byte_to_line(2), Some(0));
+ assert_eq!(source.byte_to_line(6), Some(0));
+ assert_eq!(source.byte_to_line(7), Some(1));
+ assert_eq!(source.byte_to_line(8), Some(1));
+ assert_eq!(source.byte_to_line(12), Some(1));
+ assert_eq!(source.byte_to_line(21), Some(3));
+ assert_eq!(source.byte_to_line(22), None);
+ }
+
+ #[test]
+ fn test_source_file_pos_to_column() {
+ let source = Source::detached(TEST);
+ assert_eq!(source.byte_to_column(0), Some(0));
+ assert_eq!(source.byte_to_column(2), Some(1));
+ assert_eq!(source.byte_to_column(6), Some(5));
+ assert_eq!(source.byte_to_column(7), Some(0));
+ assert_eq!(source.byte_to_column(8), Some(1));
+ assert_eq!(source.byte_to_column(12), Some(2));
+ }
+
+ #[test]
+ fn test_source_file_utf16() {
+ #[track_caller]
+ fn roundtrip(source: &Source, byte_idx: usize, utf16_idx: usize) {
+ let middle = source.byte_to_utf16(byte_idx).unwrap();
+ let result = source.utf16_to_byte(middle).unwrap();
+ assert_eq!(middle, utf16_idx);
+ assert_eq!(result, byte_idx);
+ }
+
+ let source = Source::detached(TEST);
+ roundtrip(&source, 0, 0);
+ roundtrip(&source, 2, 1);
+ roundtrip(&source, 3, 2);
+ roundtrip(&source, 8, 7);
+ roundtrip(&source, 12, 9);
+ roundtrip(&source, 21, 18);
+ assert_eq!(source.byte_to_utf16(22), None);
+ assert_eq!(source.utf16_to_byte(19), None);
+ }
+
+ #[test]
+ fn test_source_file_roundtrip() {
+ #[track_caller]
+ fn roundtrip(source: &Source, byte_idx: usize) {
+ let line = source.byte_to_line(byte_idx).unwrap();
+ let column = source.byte_to_column(byte_idx).unwrap();
+ let result = source.line_column_to_byte(line, column).unwrap();
+ assert_eq!(result, byte_idx);
+ }
+
+ let source = Source::detached(TEST);
+ roundtrip(&source, 0);
+ roundtrip(&source, 7);
+ roundtrip(&source, 12);
+ roundtrip(&source, 21);
+ }
+
+ #[test]
+ fn test_source_file_edit() {
+ // This tests only the non-parser parts. The reparsing itself is
+ // tested separately.
+ #[track_caller]
+ fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
+ let mut source = Source::detached(prev);
+ let result = Source::detached(after);
+ source.edit(range, with);
+ assert_eq!(source.text(), result.text());
+ assert_eq!(source.0.lines, result.0.lines);
+ }
+
+ // Test inserting at the beginning.
+ test("abc\n", 0..0, "hi\n", "hi\nabc\n");
+ test("\nabc", 0..0, "hi\r", "hi\r\nabc");
+
+ // Test editing in the middle.
+ test(TEST, 4..16, "❌", "ä\tc❌i\rjkl");
+
+ // Test appending.
+ test("abc\ndef", 7..7, "hi", "abc\ndefhi");
+ test("abc\ndef\n", 8..8, "hi", "abc\ndef\nhi");
+
+ // Test appending with adjoining \r and \n.
+ test("abc\ndef\r", 8..8, "\nghi", "abc\ndef\r\nghi");
+
+ // Test removing everything.
+ test(TEST, 0..21, "", "");
+ }
+}
diff --git a/crates/typst-syntax/src/span.rs b/crates/typst-syntax/src/span.rs
new file mode 100644
index 00000000..8715e476
--- /dev/null
+++ b/crates/typst-syntax/src/span.rs
@@ -0,0 +1,128 @@
+use std::fmt::{self, Debug, Formatter};
+use std::num::NonZeroU64;
+use std::ops::Range;
+
+use super::FileId;
+
+/// A unique identifier for a syntax node.
+///
+/// This is used throughout the compiler to track which source section an error
+/// or element stems from. Can be [mapped back](super::Source::range) to a byte
+/// range for user facing display.
+///
+/// During editing, the span values stay mostly stable, even for nodes behind an
+/// insertion. This is not true for simple ranges as they would shift. Spans can
+/// be used as inputs to memoized functions without hurting cache performance
+/// when text is inserted somewhere in the document other than the end.
+///
+/// Span ids are ordered in the syntax tree to enable quickly finding the node
+/// with some id:
+/// - The id of a parent is always smaller than the ids of any of its children.
+/// - The id of a node is always greater than any id in the subtrees of any left
+/// sibling and smaller than any id in the subtrees of any right sibling.
+///
+/// This type takes up 8 bytes and is null-optimized (i.e. `Option<Span>` also
+/// takes 8 bytes).
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub struct Span(NonZeroU64);
+
+impl Span {
+ /// The full range of numbers available for span numbering.
+ pub const FULL: Range<u64> = 2..(1 << Self::BITS);
+ const DETACHED: u64 = 1;
+
+ // Data layout:
+ // | 16 bits source id | 48 bits number |
+ const BITS: usize = 48;
+
+ /// Create a new span from a source id and a unique number.
+ ///
+ /// Panics if the `number` is not contained in `FULL`.
+ #[track_caller]
+ pub const fn new(id: FileId, number: u64) -> Self {
+ assert!(
+ Self::FULL.start <= number && number < Self::FULL.end,
+ "span number outside valid range"
+ );
+
+ Self::pack(id, number)
+ }
+
+ /// A span that does not point into any source file.
+ pub const fn detached() -> Self {
+ Self::pack(FileId::detached(), Self::DETACHED)
+ }
+
+ /// Pack the components into a span.
+ #[track_caller]
+ const fn pack(id: FileId, number: u64) -> Span {
+ let bits = ((id.as_u16() as u64) << Self::BITS) | number;
+ match NonZeroU64::new(bits) {
+ Some(v) => Self(v),
+ None => panic!("span encoding is zero"),
+ }
+ }
+
+ /// The id of the source file the span points into.
+ pub const fn id(self) -> FileId {
+ FileId::from_u16((self.0.get() >> Self::BITS) as u16)
+ }
+
+ /// The unique number of the span within its source file.
+ pub const fn number(self) -> u64 {
+ self.0.get() & ((1 << Self::BITS) - 1)
+ }
+
+ /// Whether the span is detached.
+ pub const fn is_detached(self) -> bool {
+ self.id().is_detached()
+ }
+}
+
+/// A value with a span locating it in the source code.
+#[derive(Copy, Clone, Eq, PartialEq, Hash)]
+pub struct Spanned<T> {
+ /// The spanned value.
+ pub v: T,
+ /// The value's location in source code.
+ pub span: Span,
+}
+
+impl<T> Spanned<T> {
+ /// Create a new instance from a value and its span.
+ pub fn new(v: T, span: Span) -> Self {
+ Self { v, span }
+ }
+
+ /// Convert from `&Spanned<T>` to `Spanned<&T>`
+ pub fn as_ref(&self) -> Spanned<&T> {
+ Spanned { v: &self.v, span: self.span }
+ }
+
+ /// Map the value using a function.
+ pub fn map<F, U>(self, f: F) -> Spanned<U>
+ where
+ F: FnOnce(T) -> U,
+ {
+ Spanned { v: f(self.v), span: self.span }
+ }
+}
+
+impl<T: Debug> Debug for Spanned<T> {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ self.v.fmt(f)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{FileId, Span};
+
+ #[test]
+ fn test_span_encoding() {
+ let id = FileId::from_u16(5);
+ let span = Span::new(id, 10);
+ assert_eq!(span.id(), id);
+ assert_eq!(span.number(), 10);
+ }
+}