From ebfdb1dafa430786db10dad2ef7d5467c1bdbed1 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sun, 2 Jul 2023 19:59:52 +0200 Subject: Move everything into `crates/` directory --- src/syntax/ast.rs | 1994 ------------------------------------------------ src/syntax/kind.rs | 448 ----------- src/syntax/lexer.rs | 738 ------------------ src/syntax/mod.rs | 23 - src/syntax/node.rs | 889 --------------------- src/syntax/parser.rs | 1643 --------------------------------------- src/syntax/reparser.rs | 322 -------- src/syntax/source.rs | 421 ---------- src/syntax/span.rs | 148 ---- 9 files changed, 6626 deletions(-) delete mode 100644 src/syntax/ast.rs delete mode 100644 src/syntax/kind.rs delete mode 100644 src/syntax/lexer.rs delete mode 100644 src/syntax/mod.rs delete mode 100644 src/syntax/node.rs delete mode 100644 src/syntax/parser.rs delete mode 100644 src/syntax/reparser.rs delete mode 100644 src/syntax/source.rs delete mode 100644 src/syntax/span.rs (limited to 'src/syntax') diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs deleted file mode 100644 index 7d5e2989..00000000 --- a/src/syntax/ast.rs +++ /dev/null @@ -1,1994 +0,0 @@ -//! A typed layer over the untyped syntax tree. -//! -//! The AST is rooted in the [`Markup`] node. - -use std::num::NonZeroUsize; -use std::ops::Deref; - -use ecow::EcoString; -use unscanny::Scanner; - -use super::{ - is_id_continue, is_id_start, is_newline, split_newlines, Span, SyntaxKind, SyntaxNode, -}; -use crate::geom::{AbsUnit, AngleUnit}; -use crate::util::NonZeroExt; - -/// A typed AST node. -pub trait AstNode: Sized { - /// Convert a node into its typed variant. - fn from_untyped(node: &SyntaxNode) -> Option; - - /// A reference to the underlying syntax node. - fn as_untyped(&self) -> &SyntaxNode; - - /// The source code location. - fn span(&self) -> Span { - self.as_untyped().span() - } -} - -macro_rules! node { - ($(#[$attr:meta])* $name:ident) => { - #[derive(Debug, Default, Clone, Hash)] - #[repr(transparent)] - $(#[$attr])* - pub struct $name(SyntaxNode); - - impl AstNode for $name { - fn from_untyped(node: &SyntaxNode) -> Option { - if matches!(node.kind(), SyntaxKind::$name) { - Some(Self(node.clone())) - } else { - Option::None - } - } - - fn as_untyped(&self) -> &SyntaxNode { - &self.0 - } - } - }; -} - -node! { - /// The syntactical root capable of representing a full parsed document. - Markup -} - -impl Markup { - /// The expressions. - pub fn exprs(&self) -> impl DoubleEndedIterator + '_ { - let mut was_stmt = false; - self.0 - .children() - .filter(move |node| { - // Ignore newline directly after statements without semicolons. - let kind = node.kind(); - let keep = !was_stmt || node.kind() != SyntaxKind::Space; - was_stmt = kind.is_stmt(); - keep - }) - .filter_map(Expr::cast_with_space) - } -} - -/// An expression in markup, math or code. -#[derive(Debug, Clone, Hash)] -pub enum Expr { - /// Plain text without markup. - Text(Text), - /// Whitespace in markup or math. Has at most one newline in markup, as more - /// indicate a paragraph break. - Space(Space), - /// A forced line break: `\`. - Linebreak(Linebreak), - /// A paragraph break, indicated by one or multiple blank lines. - Parbreak(Parbreak), - /// An escape sequence: `\#`, `\u{1F5FA}`. - Escape(Escape), - /// A shorthand for a unicode codepoint. For example, `~` for non-breaking - /// space or `-?` for a soft hyphen. - Shorthand(Shorthand), - /// A smart quote: `'` or `"`. - SmartQuote(SmartQuote), - /// Strong content: `*Strong*`. - Strong(Strong), - /// Emphasized content: `_Emphasized_`. - Emph(Emph), - /// Raw text with optional syntax highlighting: `` `...` ``. - Raw(Raw), - /// A hyperlink: `https://typst.org`. - Link(Link), - /// A label: ``. - Label(Label), - /// A reference: `@target`, `@target[..]`. - Ref(Ref), - /// A section heading: `= Introduction`. - Heading(Heading), - /// An item in a bullet list: `- ...`. - List(ListItem), - /// An item in an enumeration (numbered list): `+ ...` or `1. ...`. - Enum(EnumItem), - /// An item in a term list: `/ Term: Details`. - Term(TermItem), - /// A mathematical equation: `$x$`, `$ x^2 $`. - Equation(Equation), - /// The contents of a mathematical equation: `x^2 + 1`. - Math(Math), - /// An identifier in math: `pi`. - MathIdent(MathIdent), - /// An alignment point in math: `&`. - MathAlignPoint(MathAlignPoint), - /// Matched delimiters in math: `[x + y]`. - MathDelimited(MathDelimited), - /// A base with optional attachments in math: `a_1^2`. - MathAttach(MathAttach), - /// A fraction in math: `x/2`. - MathFrac(MathFrac), - /// A root in math: `√x`, `∛x` or `∜x`. - MathRoot(MathRoot), - /// An identifier: `left`. - Ident(Ident), - /// The `none` literal. - None(None), - /// The `auto` literal. - Auto(Auto), - /// A boolean: `true`, `false`. - Bool(Bool), - /// An integer: `120`. - Int(Int), - /// A floating-point number: `1.2`, `10e-4`. - Float(Float), - /// A numeric value with a unit: `12pt`, `3cm`, `2em`, `90deg`, `50%`. - Numeric(Numeric), - /// A quoted string: `"..."`. - Str(Str), - /// A code block: `{ let x = 1; x + 2 }`. - Code(CodeBlock), - /// A content block: `[*Hi* there!]`. - Content(ContentBlock), - /// A grouped expression: `(1 + 2)`. - Parenthesized(Parenthesized), - /// An array: `(1, "hi", 12cm)`. - Array(Array), - /// A dictionary: `(thickness: 3pt, pattern: dashed)`. - Dict(Dict), - /// A unary operation: `-x`. - Unary(Unary), - /// A binary operation: `a + b`. - Binary(Binary), - /// A field access: `properties.age`. - FieldAccess(FieldAccess), - /// An invocation of a function or method: `f(x, y)`. - FuncCall(FuncCall), - /// A closure: `(x, y) => z`. - Closure(Closure), - /// A let binding: `let x = 1`. - Let(LetBinding), - //// A destructuring assignment: `(x, y) = (1, 2)`. - DestructAssign(DestructAssignment), - /// A set rule: `set text(...)`. - Set(SetRule), - /// A show rule: `show heading: it => emph(it.body)`. - Show(ShowRule), - /// An if-else conditional: `if x { y } else { z }`. - Conditional(Conditional), - /// A while loop: `while x { y }`. - While(WhileLoop), - /// A for loop: `for x in y { z }`. - For(ForLoop), - /// A module import: `import "utils.typ": a, b, c`. - Import(ModuleImport), - /// A module include: `include "chapter1.typ"`. - Include(ModuleInclude), - /// A break from a loop: `break`. - Break(LoopBreak), - /// A continue in a loop: `continue`. - Continue(LoopContinue), - /// A return from a function: `return`, `return x + 1`. - Return(FuncReturn), -} - -impl Expr { - fn cast_with_space(node: &SyntaxNode) -> Option { - match node.kind() { - SyntaxKind::Space => node.cast().map(Self::Space), - _ => Self::from_untyped(node), - } - } -} - -impl AstNode for Expr { - fn from_untyped(node: &SyntaxNode) -> Option { - match node.kind() { - SyntaxKind::Linebreak => node.cast().map(Self::Linebreak), - SyntaxKind::Parbreak => node.cast().map(Self::Parbreak), - SyntaxKind::Text => node.cast().map(Self::Text), - SyntaxKind::Escape => node.cast().map(Self::Escape), - SyntaxKind::Shorthand => node.cast().map(Self::Shorthand), - SyntaxKind::SmartQuote => node.cast().map(Self::SmartQuote), - SyntaxKind::Strong => node.cast().map(Self::Strong), - SyntaxKind::Emph => node.cast().map(Self::Emph), - SyntaxKind::Raw => node.cast().map(Self::Raw), - SyntaxKind::Link => node.cast().map(Self::Link), - SyntaxKind::Label => node.cast().map(Self::Label), - SyntaxKind::Ref => node.cast().map(Self::Ref), - SyntaxKind::Heading => node.cast().map(Self::Heading), - SyntaxKind::ListItem => node.cast().map(Self::List), - SyntaxKind::EnumItem => node.cast().map(Self::Enum), - SyntaxKind::TermItem => node.cast().map(Self::Term), - SyntaxKind::Equation => node.cast().map(Self::Equation), - SyntaxKind::Math => node.cast().map(Self::Math), - SyntaxKind::MathIdent => node.cast().map(Self::MathIdent), - SyntaxKind::MathAlignPoint => node.cast().map(Self::MathAlignPoint), - SyntaxKind::MathDelimited => node.cast().map(Self::MathDelimited), - SyntaxKind::MathAttach => node.cast().map(Self::MathAttach), - SyntaxKind::MathFrac => node.cast().map(Self::MathFrac), - SyntaxKind::MathRoot => node.cast().map(Self::MathRoot), - SyntaxKind::Ident => node.cast().map(Self::Ident), - SyntaxKind::None => node.cast().map(Self::None), - SyntaxKind::Auto => node.cast().map(Self::Auto), - SyntaxKind::Bool => node.cast().map(Self::Bool), - SyntaxKind::Int => node.cast().map(Self::Int), - SyntaxKind::Float => node.cast().map(Self::Float), - SyntaxKind::Numeric => node.cast().map(Self::Numeric), - SyntaxKind::Str => node.cast().map(Self::Str), - SyntaxKind::CodeBlock => node.cast().map(Self::Code), - SyntaxKind::ContentBlock => node.cast().map(Self::Content), - SyntaxKind::Parenthesized => node.cast().map(Self::Parenthesized), - SyntaxKind::Array => node.cast().map(Self::Array), - SyntaxKind::Dict => node.cast().map(Self::Dict), - SyntaxKind::Unary => node.cast().map(Self::Unary), - SyntaxKind::Binary => node.cast().map(Self::Binary), - SyntaxKind::FieldAccess => node.cast().map(Self::FieldAccess), - SyntaxKind::FuncCall => node.cast().map(Self::FuncCall), - SyntaxKind::Closure => node.cast().map(Self::Closure), - SyntaxKind::LetBinding => node.cast().map(Self::Let), - SyntaxKind::DestructAssignment => node.cast().map(Self::DestructAssign), - SyntaxKind::SetRule => node.cast().map(Self::Set), - SyntaxKind::ShowRule => node.cast().map(Self::Show), - SyntaxKind::Conditional => node.cast().map(Self::Conditional), - SyntaxKind::WhileLoop => node.cast().map(Self::While), - SyntaxKind::ForLoop => node.cast().map(Self::For), - SyntaxKind::ModuleImport => node.cast().map(Self::Import), - SyntaxKind::ModuleInclude => node.cast().map(Self::Include), - SyntaxKind::LoopBreak => node.cast().map(Self::Break), - SyntaxKind::LoopContinue => node.cast().map(Self::Continue), - SyntaxKind::FuncReturn => node.cast().map(Self::Return), - _ => Option::None, - } - } - - fn as_untyped(&self) -> &SyntaxNode { - match self { - Self::Text(v) => v.as_untyped(), - Self::Space(v) => v.as_untyped(), - Self::Linebreak(v) => v.as_untyped(), - Self::Parbreak(v) => v.as_untyped(), - Self::Escape(v) => v.as_untyped(), - Self::Shorthand(v) => v.as_untyped(), - Self::SmartQuote(v) => v.as_untyped(), - Self::Strong(v) => v.as_untyped(), - Self::Emph(v) => v.as_untyped(), - Self::Raw(v) => v.as_untyped(), - Self::Link(v) => v.as_untyped(), - Self::Label(v) => v.as_untyped(), - Self::Ref(v) => v.as_untyped(), - Self::Heading(v) => v.as_untyped(), - Self::List(v) => v.as_untyped(), - Self::Enum(v) => v.as_untyped(), - Self::Term(v) => v.as_untyped(), - Self::Equation(v) => v.as_untyped(), - Self::Math(v) => v.as_untyped(), - Self::MathIdent(v) => v.as_untyped(), - Self::MathAlignPoint(v) => v.as_untyped(), - Self::MathDelimited(v) => v.as_untyped(), - Self::MathAttach(v) => v.as_untyped(), - Self::MathFrac(v) => v.as_untyped(), - Self::MathRoot(v) => v.as_untyped(), - Self::Ident(v) => v.as_untyped(), - Self::None(v) => v.as_untyped(), - Self::Auto(v) => v.as_untyped(), - Self::Bool(v) => v.as_untyped(), - Self::Int(v) => v.as_untyped(), - Self::Float(v) => v.as_untyped(), - Self::Numeric(v) => v.as_untyped(), - Self::Str(v) => v.as_untyped(), - Self::Code(v) => v.as_untyped(), - Self::Content(v) => v.as_untyped(), - Self::Array(v) => v.as_untyped(), - Self::Dict(v) => v.as_untyped(), - Self::Parenthesized(v) => v.as_untyped(), - Self::Unary(v) => v.as_untyped(), - Self::Binary(v) => v.as_untyped(), - Self::FieldAccess(v) => v.as_untyped(), - Self::FuncCall(v) => v.as_untyped(), - Self::Closure(v) => v.as_untyped(), - Self::Let(v) => v.as_untyped(), - Self::DestructAssign(v) => v.as_untyped(), - Self::Set(v) => v.as_untyped(), - Self::Show(v) => v.as_untyped(), - Self::Conditional(v) => v.as_untyped(), - Self::While(v) => v.as_untyped(), - Self::For(v) => v.as_untyped(), - Self::Import(v) => v.as_untyped(), - Self::Include(v) => v.as_untyped(), - Self::Break(v) => v.as_untyped(), - Self::Continue(v) => v.as_untyped(), - Self::Return(v) => v.as_untyped(), - } - } -} - -impl Expr { - /// Can this expression be embedded into markup with a hashtag? - pub fn hashtag(&self) -> bool { - matches!( - self, - Self::Ident(_) - | Self::None(_) - | Self::Auto(_) - | Self::Bool(_) - | Self::Int(_) - | Self::Float(_) - | Self::Numeric(_) - | Self::Str(_) - | Self::Code(_) - | Self::Content(_) - | Self::Array(_) - | Self::Dict(_) - | Self::Parenthesized(_) - | Self::FieldAccess(_) - | Self::FuncCall(_) - | Self::Let(_) - | Self::Set(_) - | Self::Show(_) - | Self::Conditional(_) - | Self::While(_) - | Self::For(_) - | Self::Import(_) - | Self::Include(_) - | Self::Break(_) - | Self::Continue(_) - | Self::Return(_) - ) - } - - /// Is this a literal? - pub fn is_literal(&self) -> bool { - matches!( - self, - Self::None(_) - | Self::Auto(_) - | Self::Bool(_) - | Self::Int(_) - | Self::Float(_) - | Self::Numeric(_) - | Self::Str(_) - ) - } -} - -impl Default for Expr { - fn default() -> Self { - Expr::Space(Space::default()) - } -} - -node! { - /// Plain text without markup. - Text -} - -impl Text { - /// Get the text. - pub fn get(&self) -> &EcoString { - self.0.text() - } -} - -node! { - /// Whitespace in markup or math. Has at most one newline in markup, as more - /// indicate a paragraph break. - Space -} - -node! { - /// A forced line break: `\`. - Linebreak -} - -node! { - /// A paragraph break, indicated by one or multiple blank lines. - Parbreak -} - -node! { - /// An escape sequence: `\#`, `\u{1F5FA}`. - Escape -} - -impl Escape { - /// Get the escaped character. - pub fn get(&self) -> char { - let mut s = Scanner::new(self.0.text()); - s.expect('\\'); - if s.eat_if("u{") { - let hex = s.eat_while(char::is_ascii_hexdigit); - u32::from_str_radix(hex, 16) - .ok() - .and_then(std::char::from_u32) - .unwrap_or_default() - } else { - s.eat().unwrap_or_default() - } - } -} - -node! { - /// A shorthand for a unicode codepoint. For example, `~` for a non-breaking - /// space or `-?` for a soft hyphen. - Shorthand -} - -impl Shorthand { - /// A list of all shorthands. - pub const LIST: &[(&'static str, char)] = &[ - // Both. - ("...", '…'), - // Text only. - ("~", '\u{00A0}'), - ("--", '\u{2013}'), - ("---", '\u{2014}'), - ("-?", '\u{00AD}'), - // Math only. - ("-", '\u{2212}'), - ("'", '′'), - ("*", '∗'), - ("!=", '≠'), - (":=", '≔'), - ("::=", '⩴'), - ("=:", '≕'), - ("<<", '≪'), - ("<<<", '⋘'), - (">>", '≫'), - (">>>", '⋙'), - ("<=", '≤'), - (">=", '≥'), - ("->", '→'), - ("-->", '⟶'), - ("|->", '↦'), - (">->", '↣'), - ("->>", '↠'), - ("<-", '←'), - ("<--", '⟵'), - ("<-<", '↢'), - ("<<-", '↞'), - ("<->", '↔'), - ("<-->", '⟷'), - ("~>", '⇝'), - ("~~>", '⟿'), - ("<~", '⇜'), - ("<~~", '⬳'), - ("=>", '⇒'), - ("|=>", '⤇'), - ("==>", '⟹'), - ("<==", '⟸'), - ("<=>", '⇔'), - ("<==>", '⟺'), - ("[|", '⟦'), - ("|]", '⟧'), - ("||", '‖'), - ]; - - /// Get the shorthanded character. - pub fn get(&self) -> char { - let text = self.0.text(); - Self::LIST - .iter() - .find(|&&(s, _)| s == text) - .map_or_else(char::default, |&(_, c)| c) - } -} - -node! { - /// A smart quote: `'` or `"`. - SmartQuote -} - -impl SmartQuote { - /// Whether this is a double quote. - pub fn double(&self) -> bool { - self.0.text() == "\"" - } -} - -node! { - /// Strong content: `*Strong*`. - Strong -} - -impl Strong { - /// The contents of the strong node. - pub fn body(&self) -> Markup { - self.0.cast_first_match().unwrap_or_default() - } -} - -node! { - /// Emphasized content: `_Emphasized_`. - Emph -} - -impl Emph { - /// The contents of the emphasis node. - pub fn body(&self) -> Markup { - self.0.cast_first_match().unwrap_or_default() - } -} - -node! { - /// Raw text with optional syntax highlighting: `` `...` ``. - Raw -} - -impl Raw { - /// The trimmed raw text. - pub fn text(&self) -> EcoString { - let mut text = self.0.text().as_str(); - let blocky = text.starts_with("```"); - text = text.trim_matches('`'); - - // Trim tag, one space at the start, and one space at the end if the - // last non-whitespace char is a backtick. - if blocky { - let mut s = Scanner::new(text); - if s.eat_if(is_id_start) { - s.eat_while(is_id_continue); - } - text = s.after(); - text = text.strip_prefix(' ').unwrap_or(text); - if text.trim_end().ends_with('`') { - text = text.strip_suffix(' ').unwrap_or(text); - } - } - - // Split into lines. - let mut lines = split_newlines(text); - - if blocky { - let dedent = lines - .iter() - .skip(1) - .map(|line| line.chars().take_while(|c| c.is_whitespace()).count()) - .min() - .unwrap_or(0); - - // Dedent based on column, but not for the first line. - for line in lines.iter_mut().skip(1) { - let offset = line.chars().take(dedent).map(char::len_utf8).sum(); - *line = &line[offset..]; - } - - let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace); - - // Trims a sequence of whitespace followed by a newline at the start. - if lines.first().map_or(false, is_whitespace) { - lines.remove(0); - } - - // Trims a newline followed by a sequence of whitespace at the end. - if lines.last().map_or(false, is_whitespace) { - lines.pop(); - } - } - - lines.join("\n").into() - } - - /// An optional identifier specifying the language to syntax-highlight in. - pub fn lang(&self) -> Option<&str> { - let text = self.0.text(); - - // Only blocky literals are supposed to contain a language. - if !text.starts_with("```") { - return Option::None; - } - - let inner = text.trim_start_matches('`'); - let mut s = Scanner::new(inner); - s.eat_if(is_id_start).then(|| { - s.eat_while(is_id_continue); - s.before() - }) - } - - /// Whether the raw text should be displayed in a separate block. - pub fn block(&self) -> bool { - let text = self.0.text(); - text.starts_with("```") && text.chars().any(is_newline) - } -} - -node! { - /// A hyperlink: `https://typst.org`. - Link -} - -impl Link { - /// Get the URL. - pub fn get(&self) -> &EcoString { - self.0.text() - } -} - -node! { - /// A label: ``. - Label -} - -impl Label { - /// Get the label's text. - pub fn get(&self) -> &str { - self.0.text().trim_start_matches('<').trim_end_matches('>') - } -} - -node! { - /// A reference: `@target`, `@target[..]`. - Ref -} - -impl Ref { - /// Get the target. - pub fn target(&self) -> &str { - self.0 - .children() - .find(|node| node.kind() == SyntaxKind::RefMarker) - .map(|node| node.text().trim_start_matches('@')) - .unwrap_or_default() - } - - /// Get the supplement. - pub fn supplement(&self) -> Option { - self.0.cast_last_match() - } -} - -node! { - /// A section heading: `= Introduction`. - Heading -} - -impl Heading { - /// The contents of the heading. - pub fn body(&self) -> Markup { - self.0.cast_first_match().unwrap_or_default() - } - - /// The section depth (number of equals signs). - pub fn level(&self) -> NonZeroUsize { - self.0 - .children() - .find(|node| node.kind() == SyntaxKind::HeadingMarker) - .and_then(|node| node.len().try_into().ok()) - .unwrap_or(NonZeroUsize::ONE) - } -} - -node! { - /// An item in a bullet list: `- ...`. - ListItem -} - -impl ListItem { - /// The contents of the list item. - pub fn body(&self) -> Markup { - self.0.cast_first_match().unwrap_or_default() - } -} - -node! { - /// An item in an enumeration (numbered list): `+ ...` or `1. ...`. - EnumItem -} - -impl EnumItem { - /// The explicit numbering, if any: `23.`. - pub fn number(&self) -> Option { - self.0.children().find_map(|node| match node.kind() { - SyntaxKind::EnumMarker => node.text().trim_end_matches('.').parse().ok(), - _ => Option::None, - }) - } - - /// The contents of the list item. - pub fn body(&self) -> Markup { - self.0.cast_first_match().unwrap_or_default() - } -} - -node! { - /// An item in a term list: `/ Term: Details`. - TermItem -} - -impl TermItem { - /// The term described by the item. - pub fn term(&self) -> Markup { - self.0.cast_first_match().unwrap_or_default() - } - - /// The description of the term. - pub fn description(&self) -> Markup { - self.0.cast_last_match().unwrap_or_default() - } -} - -node! { - /// A mathemathical equation: `$x$`, `$ x^2 $`. - Equation -} - -impl Equation { - /// The contained math. - pub fn body(&self) -> Math { - self.0.cast_first_match().unwrap_or_default() - } - - /// Whether the equation should be displayed as a separate block. - pub fn block(&self) -> bool { - let is_space = |node: Option<&SyntaxNode>| { - node.map(SyntaxNode::kind) == Some(SyntaxKind::Space) - }; - is_space(self.0.children().nth(1)) && is_space(self.0.children().nth_back(1)) - } -} - -node! { - /// The contents of a mathematical equation: `x^2 + 1`. - Math -} - -impl Math { - /// The expressions the mathematical content consists of. - pub fn exprs(&self) -> impl DoubleEndedIterator + '_ { - self.0.children().filter_map(Expr::cast_with_space) - } -} - -node! { - /// An identifier in math: `pi`. - MathIdent -} - -impl MathIdent { - /// Get the identifier. - pub fn get(&self) -> &EcoString { - self.0.text() - } - - /// Take out the contained identifier. - pub fn take(self) -> EcoString { - self.0.into_text() - } - - /// Get the identifier as a string slice. - pub fn as_str(&self) -> &str { - self.get() - } -} - -impl Deref for MathIdent { - type Target = str; - - fn deref(&self) -> &Self::Target { - self.as_str() - } -} - -node! { - /// An alignment point in math: `&`. - MathAlignPoint -} - -node! { - /// Matched delimiters in math: `[x + y]`. - MathDelimited -} - -impl MathDelimited { - /// The opening delimiter. - pub fn open(&self) -> Expr { - self.0.cast_first_match().unwrap_or_default() - } - - /// The contents, including the delimiters. - pub fn body(&self) -> Math { - self.0.cast_first_match().unwrap_or_default() - } - - /// The closing delimiter. - pub fn close(&self) -> Expr { - self.0.cast_last_match().unwrap_or_default() - } -} - -node! { - /// A base with optional attachments in math: `a_1^2`. - MathAttach -} - -impl MathAttach { - /// The base, to which things are attached. - pub fn base(&self) -> Expr { - self.0.cast_first_match().unwrap_or_default() - } - - /// The bottom attachment. - pub fn bottom(&self) -> Option { - self.0 - .children() - .skip_while(|node| !matches!(node.kind(), SyntaxKind::Underscore)) - .find_map(SyntaxNode::cast) - } - - /// The top attachment. - pub fn top(&self) -> Option { - self.0 - .children() - .skip_while(|node| !matches!(node.kind(), SyntaxKind::Hat)) - .find_map(SyntaxNode::cast) - } -} - -node! { - /// A fraction in math: `x/2` - MathFrac -} - -impl MathFrac { - /// The numerator. - pub fn num(&self) -> Expr { - self.0.cast_first_match().unwrap_or_default() - } - - /// The denominator. - pub fn denom(&self) -> Expr { - self.0.cast_last_match().unwrap_or_default() - } -} - -node! { - /// A root in math: `√x`, `∛x` or `∜x`. - MathRoot -} - -impl MathRoot { - /// The index of the root. - pub fn index(&self) -> Option { - match self.0.children().next().map(|node| node.text().as_str()) { - Some("∜") => Some(4), - Some("∛") => Some(3), - Some("√") => Option::None, - _ => Option::None, - } - } - - /// The radicand. - pub fn radicand(&self) -> Expr { - self.0.cast_first_match().unwrap_or_default() - } -} - -node! { - /// An identifier: `it`. - Ident -} - -impl Ident { - /// Get the identifier. - pub fn get(&self) -> &EcoString { - self.0.text() - } - - /// Take out the contained identifier. - pub fn take(self) -> EcoString { - self.0.into_text() - } - - /// Get the identifier as a string slice. - pub fn as_str(&self) -> &str { - self.get() - } -} - -impl Deref for Ident { - type Target = str; - - fn deref(&self) -> &Self::Target { - self.as_str() - } -} - -node! { - /// The `none` literal. - None -} - -node! { - /// The `auto` literal. - Auto -} - -node! { - /// A boolean: `true`, `false`. - Bool -} - -impl Bool { - /// Get the boolean value. - pub fn get(&self) -> bool { - self.0.text() == "true" - } -} - -node! { - /// An integer: `120`. - Int -} - -impl Int { - /// Get the integer value. - pub fn get(&self) -> i64 { - let text = self.0.text(); - if let Some(rest) = text.strip_prefix("0x") { - i64::from_str_radix(rest, 16) - } else if let Some(rest) = text.strip_prefix("0o") { - i64::from_str_radix(rest, 8) - } else if let Some(rest) = text.strip_prefix("0b") { - i64::from_str_radix(rest, 2) - } else { - text.parse() - } - .unwrap_or_default() - } -} - -node! { - /// A floating-point number: `1.2`, `10e-4`. - Float -} - -impl Float { - /// Get the floating-point value. - pub fn get(&self) -> f64 { - self.0.text().parse().unwrap_or_default() - } -} - -node! { - /// A numeric value with a unit: `12pt`, `3cm`, `2em`, `90deg`, `50%`. - Numeric -} - -impl Numeric { - /// Get the numeric value and unit. - pub fn get(&self) -> (f64, Unit) { - let text = self.0.text(); - let count = text - .chars() - .rev() - .take_while(|c| matches!(c, 'a'..='z' | '%')) - .count(); - - let split = text.len() - count; - let value = text[..split].parse().unwrap_or_default(); - let unit = match &text[split..] { - "pt" => Unit::Length(AbsUnit::Pt), - "mm" => Unit::Length(AbsUnit::Mm), - "cm" => Unit::Length(AbsUnit::Cm), - "in" => Unit::Length(AbsUnit::In), - "deg" => Unit::Angle(AngleUnit::Deg), - "rad" => Unit::Angle(AngleUnit::Rad), - "em" => Unit::Em, - "fr" => Unit::Fr, - "%" => Unit::Percent, - _ => Unit::Percent, - }; - - (value, unit) - } -} - -/// Unit of a numeric value. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub enum Unit { - /// An absolute length unit. - Length(AbsUnit), - /// An angular unit. - Angle(AngleUnit), - /// Font-relative: `1em` is the same as the font size. - Em, - /// Fractions: `fr`. - Fr, - /// Percentage: `%`. - Percent, -} - -node! { - /// A quoted string: `"..."`. - Str -} - -impl Str { - /// Get the string value with resolved escape sequences. - pub fn get(&self) -> EcoString { - let text = self.0.text(); - let unquoted = &text[1..text.len() - 1]; - if !unquoted.contains('\\') { - return unquoted.into(); - } - - let mut out = EcoString::with_capacity(unquoted.len()); - let mut s = Scanner::new(unquoted); - - while let Some(c) = s.eat() { - if c != '\\' { - out.push(c); - continue; - } - - let start = s.locate(-1); - match s.eat() { - Some('\\') => out.push('\\'), - Some('"') => out.push('"'), - Some('n') => out.push('\n'), - Some('r') => out.push('\r'), - Some('t') => out.push('\t'), - Some('u') if s.eat_if('{') => { - let sequence = s.eat_while(char::is_ascii_hexdigit); - s.eat_if('}'); - - match u32::from_str_radix(sequence, 16) - .ok() - .and_then(std::char::from_u32) - { - Some(c) => out.push(c), - Option::None => out.push_str(s.from(start)), - } - } - _ => out.push_str(s.from(start)), - } - } - - out - } -} - -node! { - /// A code block: `{ let x = 1; x + 2 }`. - CodeBlock -} - -impl CodeBlock { - /// The contained code. - pub fn body(&self) -> Code { - self.0.cast_first_match().unwrap_or_default() - } -} - -node! { - /// Code. - Code -} - -impl Code { - /// The list of expressions contained in the code. - pub fn exprs(&self) -> impl DoubleEndedIterator + '_ { - self.0.children().filter_map(SyntaxNode::cast) - } -} - -node! { - /// A content block: `[*Hi* there!]`. - ContentBlock -} - -impl ContentBlock { - /// The contained markup. - pub fn body(&self) -> Markup { - self.0.cast_first_match().unwrap_or_default() - } -} - -node! { - /// A grouped expression: `(1 + 2)`. - Parenthesized -} - -impl Parenthesized { - /// The wrapped expression. - pub fn expr(&self) -> Expr { - self.0.cast_first_match().unwrap_or_default() - } -} - -node! { - /// An array: `(1, "hi", 12cm)`. - Array -} - -impl Array { - /// The array's items. - pub fn items(&self) -> impl DoubleEndedIterator + '_ { - self.0.children().filter_map(SyntaxNode::cast) - } -} - -/// An item in an array. -#[derive(Debug, Clone, Hash)] -pub enum ArrayItem { - /// A bare expression: `12`. - Pos(Expr), - /// A spread expression: `..things`. - Spread(Expr), -} - -impl AstNode for ArrayItem { - fn from_untyped(node: &SyntaxNode) -> Option { - match node.kind() { - SyntaxKind::Spread => node.cast_first_match().map(Self::Spread), - _ => node.cast().map(Self::Pos), - } - } - - fn as_untyped(&self) -> &SyntaxNode { - match self { - Self::Pos(v) => v.as_untyped(), - Self::Spread(v) => v.as_untyped(), - } - } -} - -node! { - /// A dictionary: `(thickness: 3pt, pattern: dashed)`. - Dict -} - -impl Dict { - /// The dictionary's items. - pub fn items(&self) -> impl DoubleEndedIterator + '_ { - self.0.children().filter_map(SyntaxNode::cast) - } -} - -/// An item in an dictionary expression. -#[derive(Debug, Clone, Hash)] -pub enum DictItem { - /// A named pair: `thickness: 3pt`. - Named(Named), - /// A keyed pair: `"spacy key": true`. - Keyed(Keyed), - /// A spread expression: `..things`. - Spread(Expr), -} - -impl AstNode for DictItem { - fn from_untyped(node: &SyntaxNode) -> Option { - match node.kind() { - SyntaxKind::Named => node.cast().map(Self::Named), - SyntaxKind::Keyed => node.cast().map(Self::Keyed), - SyntaxKind::Spread => node.cast_first_match().map(Self::Spread), - _ => Option::None, - } - } - - fn as_untyped(&self) -> &SyntaxNode { - match self { - Self::Named(v) => v.as_untyped(), - Self::Keyed(v) => v.as_untyped(), - Self::Spread(v) => v.as_untyped(), - } - } -} - -node! { - /// A named pair: `thickness: 3pt`. - Named -} - -impl Named { - /// The name: `thickness`. - pub fn name(&self) -> Ident { - self.0.cast_first_match().unwrap_or_default() - } - - /// The right-hand side of the pair: `3pt`. - pub fn expr(&self) -> Expr { - self.0.cast_last_match().unwrap_or_default() - } - - /// The right-hand side of the pair as an identifier. - pub fn expr_ident(&self) -> Option { - self.0.cast_last_match() - } -} - -node! { - /// A keyed pair: `"spacy key": true`. - Keyed -} - -impl Keyed { - /// The key: `"spacy key"`. - pub fn key(&self) -> Str { - self.0 - .children() - .find_map(|node| node.cast::()) - .unwrap_or_default() - } - - /// The right-hand side of the pair: `true`. - pub fn expr(&self) -> Expr { - self.0.cast_last_match().unwrap_or_default() - } -} - -node! { - /// A unary operation: `-x`. - Unary -} - -impl Unary { - /// The operator: `-`. - pub fn op(&self) -> UnOp { - self.0 - .children() - .find_map(|node| UnOp::from_kind(node.kind())) - .unwrap_or(UnOp::Pos) - } - - /// The expression to operate on: `x`. - pub fn expr(&self) -> Expr { - self.0.cast_last_match().unwrap_or_default() - } -} - -/// A unary operator. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub enum UnOp { - /// The plus operator: `+`. - Pos, - /// The negation operator: `-`. - Neg, - /// The boolean `not`. - Not, -} - -impl UnOp { - /// Try to convert the token into a unary operation. - pub fn from_kind(token: SyntaxKind) -> Option { - Some(match token { - SyntaxKind::Plus => Self::Pos, - SyntaxKind::Minus => Self::Neg, - SyntaxKind::Not => Self::Not, - _ => return Option::None, - }) - } - - /// The precedence of this operator. - pub fn precedence(self) -> usize { - match self { - Self::Pos | Self::Neg => 7, - Self::Not => 4, - } - } - - /// The string representation of this operation. - pub fn as_str(self) -> &'static str { - match self { - Self::Pos => "+", - Self::Neg => "-", - Self::Not => "not", - } - } -} - -node! { - /// A binary operation: `a + b`. - Binary -} - -impl Binary { - /// The binary operator: `+`. - pub fn op(&self) -> BinOp { - let mut not = false; - self.0 - .children() - .find_map(|node| match node.kind() { - SyntaxKind::Not => { - not = true; - Option::None - } - SyntaxKind::In if not => Some(BinOp::NotIn), - _ => BinOp::from_kind(node.kind()), - }) - .unwrap_or(BinOp::Add) - } - - /// The left-hand side of the operation: `a`. - pub fn lhs(&self) -> Expr { - self.0.cast_first_match().unwrap_or_default() - } - - /// The right-hand side of the operation: `b`. - pub fn rhs(&self) -> Expr { - self.0.cast_last_match().unwrap_or_default() - } -} - -/// A binary operator. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub enum BinOp { - /// The addition operator: `+`. - Add, - /// The subtraction operator: `-`. - Sub, - /// The multiplication operator: `*`. - Mul, - /// The division operator: `/`. - Div, - /// The short-circuiting boolean `and`. - And, - /// The short-circuiting boolean `or`. - Or, - /// The equality operator: `==`. - Eq, - /// The inequality operator: `!=`. - Neq, - /// The less-than operator: `<`. - Lt, - /// The less-than or equal operator: `<=`. - Leq, - /// The greater-than operator: `>`. - Gt, - /// The greater-than or equal operator: `>=`. - Geq, - /// The assignment operator: `=`. - Assign, - /// The containment operator: `in`. - In, - /// The inversed containment operator: `not in`. - NotIn, - /// The add-assign operator: `+=`. - AddAssign, - /// The subtract-assign oeprator: `-=`. - SubAssign, - /// The multiply-assign operator: `*=`. - MulAssign, - /// The divide-assign operator: `/=`. - DivAssign, -} - -impl BinOp { - /// Try to convert the token into a binary operation. - pub fn from_kind(token: SyntaxKind) -> Option { - Some(match token { - SyntaxKind::Plus => Self::Add, - SyntaxKind::Minus => Self::Sub, - SyntaxKind::Star => Self::Mul, - SyntaxKind::Slash => Self::Div, - SyntaxKind::And => Self::And, - SyntaxKind::Or => Self::Or, - SyntaxKind::EqEq => Self::Eq, - SyntaxKind::ExclEq => Self::Neq, - SyntaxKind::Lt => Self::Lt, - SyntaxKind::LtEq => Self::Leq, - SyntaxKind::Gt => Self::Gt, - SyntaxKind::GtEq => Self::Geq, - SyntaxKind::Eq => Self::Assign, - SyntaxKind::In => Self::In, - SyntaxKind::PlusEq => Self::AddAssign, - SyntaxKind::HyphEq => Self::SubAssign, - SyntaxKind::StarEq => Self::MulAssign, - SyntaxKind::SlashEq => Self::DivAssign, - _ => return Option::None, - }) - } - - /// The precedence of this operator. - pub fn precedence(self) -> usize { - match self { - Self::Mul => 6, - Self::Div => 6, - Self::Add => 5, - Self::Sub => 5, - Self::Eq => 4, - Self::Neq => 4, - Self::Lt => 4, - Self::Leq => 4, - Self::Gt => 4, - Self::Geq => 4, - Self::In => 4, - Self::NotIn => 4, - Self::And => 3, - Self::Or => 2, - Self::Assign => 1, - Self::AddAssign => 1, - Self::SubAssign => 1, - Self::MulAssign => 1, - Self::DivAssign => 1, - } - } - - /// The associativity of this operator. - pub fn assoc(self) -> Assoc { - match self { - Self::Add => Assoc::Left, - Self::Sub => Assoc::Left, - Self::Mul => Assoc::Left, - Self::Div => Assoc::Left, - Self::And => Assoc::Left, - Self::Or => Assoc::Left, - Self::Eq => Assoc::Left, - Self::Neq => Assoc::Left, - Self::Lt => Assoc::Left, - Self::Leq => Assoc::Left, - Self::Gt => Assoc::Left, - Self::Geq => Assoc::Left, - Self::In => Assoc::Left, - Self::NotIn => Assoc::Left, - Self::Assign => Assoc::Right, - Self::AddAssign => Assoc::Right, - Self::SubAssign => Assoc::Right, - Self::MulAssign => Assoc::Right, - Self::DivAssign => Assoc::Right, - } - } - - /// The string representation of this operation. - pub fn as_str(self) -> &'static str { - match self { - Self::Add => "+", - Self::Sub => "-", - Self::Mul => "*", - Self::Div => "/", - Self::And => "and", - Self::Or => "or", - Self::Eq => "==", - Self::Neq => "!=", - Self::Lt => "<", - Self::Leq => "<=", - Self::Gt => ">", - Self::Geq => ">=", - Self::In => "in", - Self::NotIn => "not in", - Self::Assign => "=", - Self::AddAssign => "+=", - Self::SubAssign => "-=", - Self::MulAssign => "*=", - Self::DivAssign => "/=", - } - } -} - -/// The associativity of a binary operator. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub enum Assoc { - /// Left-associative: `a + b + c` is equivalent to `(a + b) + c`. - Left, - /// Right-associative: `a = b = c` is equivalent to `a = (b = c)`. - Right, -} - -node! { - /// A field access: `properties.age`. - FieldAccess -} - -impl FieldAccess { - /// The expression to access the field on. - pub fn target(&self) -> Expr { - self.0.cast_first_match().unwrap_or_default() - } - - /// The name of the field. - pub fn field(&self) -> Ident { - self.0.cast_last_match().unwrap_or_default() - } -} - -node! { - /// An invocation of a function or method: `f(x, y)`. - FuncCall -} - -impl FuncCall { - /// The function to call. - pub fn callee(&self) -> Expr { - self.0.cast_first_match().unwrap_or_default() - } - - /// The arguments to the function. - pub fn args(&self) -> Args { - self.0.cast_last_match().unwrap_or_default() - } -} - -node! { - /// A function call's argument list: `(12pt, y)`. - Args -} - -impl Args { - /// The positional and named arguments. - pub fn items(&self) -> impl DoubleEndedIterator + '_ { - self.0.children().filter_map(SyntaxNode::cast) - } -} - -/// An argument to a function call. -#[derive(Debug, Clone, Hash)] -pub enum Arg { - /// A positional argument: `12`. - Pos(Expr), - /// A named argument: `draw: false`. - Named(Named), - /// A spread argument: `..things`. - Spread(Expr), -} - -impl AstNode for Arg { - fn from_untyped(node: &SyntaxNode) -> Option { - match node.kind() { - SyntaxKind::Named => node.cast().map(Self::Named), - SyntaxKind::Spread => node.cast_first_match().map(Self::Spread), - _ => node.cast().map(Self::Pos), - } - } - - fn as_untyped(&self) -> &SyntaxNode { - match self { - Self::Pos(v) => v.as_untyped(), - Self::Named(v) => v.as_untyped(), - Self::Spread(v) => v.as_untyped(), - } - } -} - -node! { - /// A closure: `(x, y) => z`. - Closure -} - -impl Closure { - /// The name of the closure. - /// - /// This only exists if you use the function syntax sugar: `let f(x) = y`. - pub fn name(&self) -> Option { - self.0.children().next()?.cast() - } - - /// The parameter bindings. - pub fn params(&self) -> Params { - self.0.cast_first_match().unwrap_or_default() - } - - /// The body of the closure. - pub fn body(&self) -> Expr { - self.0.cast_last_match().unwrap_or_default() - } -} - -node! { - /// A closure's parameters: `(x, y)`. - Params -} - -impl Params { - /// The parameter bindings. - pub fn children(&self) -> impl DoubleEndedIterator + '_ { - self.0.children().filter_map(SyntaxNode::cast) - } -} - -node! { - /// A spread: `..x` or `..x.at(0)`. - Spread -} - -impl Spread { - /// Try to get an identifier. - pub fn name(&self) -> Option { - self.0.cast_first_match() - } - - /// Try to get an expression. - pub fn expr(&self) -> Option { - self.0.cast_first_match() - } -} - -node! { - /// An underscore: `_` - Underscore -} - -/// A parameter to a closure. -#[derive(Debug, Clone, Hash)] -pub enum Param { - /// A positional parameter: `x`. - Pos(Pattern), - /// A named parameter with a default value: `draw: false`. - Named(Named), - /// An argument sink: `..args`. - Sink(Spread), -} - -impl AstNode for Param { - fn from_untyped(node: &SyntaxNode) -> Option { - match node.kind() { - SyntaxKind::Named => node.cast().map(Self::Named), - SyntaxKind::Spread => node.cast().map(Self::Sink), - _ => node.cast().map(Self::Pos), - } - } - - fn as_untyped(&self) -> &SyntaxNode { - match self { - Self::Pos(v) => v.as_untyped(), - Self::Named(v) => v.as_untyped(), - Self::Sink(v) => v.as_untyped(), - } - } -} - -node! { - /// A destructuring pattern: `x` or `(x, _, ..y)`. - Destructuring -} - -impl Destructuring { - /// The bindings of the destructuring. - pub fn bindings(&self) -> impl Iterator + '_ { - self.0.children().filter_map(SyntaxNode::cast) - } - - // Returns a list of all identifiers in the pattern. - pub fn idents(&self) -> impl Iterator + '_ { - self.bindings().filter_map(|binding| match binding { - DestructuringKind::Normal(Expr::Ident(ident)) => Some(ident), - DestructuringKind::Sink(spread) => spread.name(), - DestructuringKind::Named(named) => named.expr_ident(), - _ => Option::None, - }) - } -} - -/// The kind of an element in a destructuring pattern. -#[derive(Debug, Clone, Hash)] -pub enum DestructuringKind { - /// An expression: `x`. - Normal(Expr), - /// An argument sink: `..y`. - Sink(Spread), - /// Named arguments: `x: 1`. - Named(Named), - /// A placeholder: `_`. - Placeholder(Underscore), -} - -impl AstNode for DestructuringKind { - fn from_untyped(node: &SyntaxNode) -> Option { - match node.kind() { - SyntaxKind::Named => node.cast().map(Self::Named), - SyntaxKind::Spread => node.cast().map(Self::Sink), - SyntaxKind::Underscore => node.cast().map(Self::Placeholder), - _ => node.cast().map(Self::Normal), - } - } - - fn as_untyped(&self) -> &SyntaxNode { - match self { - Self::Normal(v) => v.as_untyped(), - Self::Named(v) => v.as_untyped(), - Self::Sink(v) => v.as_untyped(), - Self::Placeholder(v) => v.as_untyped(), - } - } -} - -/// The kind of a pattern. -#[derive(Debug, Clone, Hash)] -pub enum Pattern { - /// A single expression: `x`. - Normal(Expr), - /// A placeholder: `_`. - Placeholder(Underscore), - /// A destructuring pattern: `(x, _, ..y)`. - Destructuring(Destructuring), -} - -impl AstNode for Pattern { - fn from_untyped(node: &SyntaxNode) -> Option { - match node.kind() { - SyntaxKind::Destructuring => node.cast().map(Self::Destructuring), - SyntaxKind::Underscore => node.cast().map(Self::Placeholder), - _ => node.cast().map(Self::Normal), - } - } - - fn as_untyped(&self) -> &SyntaxNode { - match self { - Self::Normal(v) => v.as_untyped(), - Self::Destructuring(v) => v.as_untyped(), - Self::Placeholder(v) => v.as_untyped(), - } - } -} - -impl Pattern { - // Returns a list of all identifiers in the pattern. - pub fn idents(&self) -> Vec { - match self { - Pattern::Normal(Expr::Ident(ident)) => vec![ident.clone()], - Pattern::Destructuring(destruct) => destruct.idents().collect(), - _ => vec![], - } - } -} - -impl Default for Pattern { - fn default() -> Self { - Self::Normal(Expr::default()) - } -} - -node! { - /// A let binding: `let x = 1`. - LetBinding -} - -#[derive(Debug)] -pub enum LetBindingKind { - /// A normal binding: `let x = 1`. - Normal(Pattern), - /// A closure binding: `let f(x) = 1`. - Closure(Ident), -} - -impl LetBindingKind { - // Returns a list of all identifiers in the pattern. - pub fn idents(&self) -> Vec { - match self { - LetBindingKind::Normal(pattern) => pattern.idents(), - LetBindingKind::Closure(ident) => { - vec![ident.clone()] - } - } - } -} - -impl LetBinding { - /// The kind of the let binding. - pub fn kind(&self) -> LetBindingKind { - match self.0.cast_first_match::() { - Some(Pattern::Normal(Expr::Closure(closure))) => { - LetBindingKind::Closure(closure.name().unwrap_or_default()) - } - pattern => LetBindingKind::Normal(pattern.unwrap_or_default()), - } - } - - /// The expression the binding is initialized with. - pub fn init(&self) -> Option { - match self.kind() { - LetBindingKind::Normal(Pattern::Normal(_)) => { - self.0.children().filter_map(SyntaxNode::cast).nth(1) - } - LetBindingKind::Normal(_) => self.0.cast_first_match(), - LetBindingKind::Closure(_) => self.0.cast_first_match(), - } - } -} - -node! { - /// An assignment expression `(x, y) = (1, 2)`. - DestructAssignment -} - -impl DestructAssignment { - /// The pattern of the assignment. - pub fn pattern(&self) -> Pattern { - self.0.cast_first_match::().unwrap_or_default() - } - - /// The expression that is assigned. - pub fn value(&self) -> Expr { - self.0.cast_last_match().unwrap_or_default() - } -} - -node! { - /// A set rule: `set text(...)`. - SetRule -} - -impl SetRule { - /// The function to set style properties for. - pub fn target(&self) -> Expr { - self.0.cast_first_match().unwrap_or_default() - } - - /// The style properties to set. - pub fn args(&self) -> Args { - self.0.cast_last_match().unwrap_or_default() - } - - /// A condition under which the set rule applies. - pub fn condition(&self) -> Option { - self.0 - .children() - .skip_while(|child| child.kind() != SyntaxKind::If) - .find_map(SyntaxNode::cast) - } -} - -node! { - /// A show rule: `show heading: it => emph(it.body)`. - ShowRule -} - -impl ShowRule { - /// Defines which nodes the show rule applies to. - pub fn selector(&self) -> Option { - self.0 - .children() - .rev() - .skip_while(|child| child.kind() != SyntaxKind::Colon) - .find_map(SyntaxNode::cast) - } - - /// The transformation recipe. - pub fn transform(&self) -> Expr { - self.0.cast_last_match().unwrap_or_default() - } -} - -node! { - /// An if-else conditional: `if x { y } else { z }`. - Conditional -} - -impl Conditional { - /// The condition which selects the body to evaluate. - pub fn condition(&self) -> Expr { - self.0.cast_first_match().unwrap_or_default() - } - - /// The expression to evaluate if the condition is true. - pub fn if_body(&self) -> Expr { - self.0 - .children() - .filter_map(SyntaxNode::cast) - .nth(1) - .unwrap_or_default() - } - - /// The expression to evaluate if the condition is false. - pub fn else_body(&self) -> Option { - self.0.children().filter_map(SyntaxNode::cast).nth(2) - } -} - -node! { - /// A while loop: `while x { y }`. - WhileLoop -} - -impl WhileLoop { - /// The condition which selects whether to evaluate the body. - pub fn condition(&self) -> Expr { - self.0.cast_first_match().unwrap_or_default() - } - - /// The expression to evaluate while the condition is true. - pub fn body(&self) -> Expr { - self.0.cast_last_match().unwrap_or_default() - } -} - -node! { - /// A for loop: `for x in y { z }`. - ForLoop -} - -impl ForLoop { - /// The pattern to assign to. - pub fn pattern(&self) -> Pattern { - self.0.cast_first_match().unwrap_or_default() - } - - /// The expression to iterate over. - pub fn iter(&self) -> Expr { - self.0 - .children() - .skip_while(|&c| c.kind() != SyntaxKind::In) - .find_map(SyntaxNode::cast) - .unwrap_or_default() - } - - /// The expression to evaluate for each iteration. - pub fn body(&self) -> Expr { - self.0.cast_last_match().unwrap_or_default() - } -} - -node! { - /// A module import: `import "utils.typ": a, b, c`. - ModuleImport -} - -impl ModuleImport { - /// The module or path from which the items should be imported. - pub fn source(&self) -> Expr { - self.0.cast_first_match().unwrap_or_default() - } - - /// The items to be imported. - pub fn imports(&self) -> Option { - self.0.children().find_map(|node| match node.kind() { - SyntaxKind::Star => Some(Imports::Wildcard), - SyntaxKind::ImportItems => { - let items = node.children().filter_map(SyntaxNode::cast).collect(); - Some(Imports::Items(items)) - } - _ => Option::None, - }) - } -} - -/// The items that ought to be imported from a file. -#[derive(Debug, Clone, Hash)] -pub enum Imports { - /// All items in the scope of the file should be imported. - Wildcard, - /// The specified items from the file should be imported. - Items(Vec), -} - -node! { - /// A module include: `include "chapter1.typ"`. - ModuleInclude -} - -impl ModuleInclude { - /// The module or path from which the content should be included. - pub fn source(&self) -> Expr { - self.0.cast_last_match().unwrap_or_default() - } -} - -node! { - /// A break from a loop: `break`. - LoopBreak -} - -node! { - /// A continue in a loop: `continue`. - LoopContinue -} - -node! { - /// A return from a function: `return`, `return x + 1`. - FuncReturn -} - -impl FuncReturn { - /// The expression to return. - pub fn body(&self) -> Option { - self.0.cast_last_match() - } -} diff --git a/src/syntax/kind.rs b/src/syntax/kind.rs deleted file mode 100644 index 26e949ca..00000000 --- a/src/syntax/kind.rs +++ /dev/null @@ -1,448 +0,0 @@ -/// A syntactical building block of a Typst file. -/// -/// Can be created by the lexer or by the parser. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -#[repr(u8)] -pub enum SyntaxKind { - /// Markup. - Markup, - /// Plain text without markup. - Text, - /// Whitespace. Contains at most one newline in markup, as more indicate a - /// paragraph break. - Space, - /// A forced line break: `\`. - Linebreak, - /// A paragraph break, indicated by one or multiple blank lines. - Parbreak, - /// An escape sequence: `\#`, `\u{1F5FA}`. - Escape, - /// A shorthand for a unicode codepoint. For example, `~` for non-breaking - /// space or `-?` for a soft hyphen. - Shorthand, - /// A smart quote: `'` or `"`. - SmartQuote, - /// Strong content: `*Strong*`. - Strong, - /// Emphasized content: `_Emphasized_`. - Emph, - /// Raw text with optional syntax highlighting: `` `...` ``. - Raw, - /// A hyperlink: `https://typst.org`. - Link, - /// A label: ``. - Label, - /// A reference: `@target`, `@target[..]`. - Ref, - /// Introduces a reference: `@target`. - RefMarker, - /// A section heading: `= Introduction`. - Heading, - /// Introduces a section heading: `=`, `==`, ... - HeadingMarker, - /// An item in a bullet list: `- ...`. - ListItem, - /// Introduces a list item: `-`. - ListMarker, - /// An item in an enumeration (numbered list): `+ ...` or `1. ...`. - EnumItem, - /// Introduces an enumeration item: `+`, `1.`. - EnumMarker, - /// An item in a term list: `/ Term: Details`. - TermItem, - /// Introduces a term item: `/`. - TermMarker, - /// A mathematical equation: `$x$`, `$ x^2 $`. - Equation, - - /// The contents of a mathematical equation: `x^2 + 1`. - Math, - /// An identifier in math: `pi`. - MathIdent, - /// An alignment point in math: `&`. - MathAlignPoint, - /// Matched delimiters in math: `[x + y]`. - MathDelimited, - /// A base with optional attachments in math: `a_1^2`. - MathAttach, - /// A fraction in math: `x/2`. - MathFrac, - /// A root in math: `√x`, `∛x` or `∜x`. - MathRoot, - - /// A hashtag that switches into code mode: `#`. - Hashtag, - /// A left curly brace, starting a code block: `{`. - LeftBrace, - /// A right curly brace, terminating a code block: `}`. - RightBrace, - /// A left square bracket, starting a content block: `[`. - LeftBracket, - /// A right square bracket, terminating a content block: `]`. - RightBracket, - /// A left round parenthesis, starting a grouped expression, collection, - /// argument or parameter list: `(`. - LeftParen, - /// A right round parenthesis, terminating a grouped expression, collection, - /// argument or parameter list: `)`. - RightParen, - /// A comma separator in a sequence: `,`. - Comma, - /// A semicolon terminating an expression: `;`. - Semicolon, - /// A colon between name/key and value in a dictionary, argument or - /// parameter list, or between the term and body of a term list term: `:`. - Colon, - /// The strong text toggle, multiplication operator, and wildcard import - /// symbol: `*`. - Star, - /// Toggles emphasized text and indicates a subscript in math: `_`. - Underscore, - /// Starts and ends a mathematical equation: `$`. - Dollar, - /// The unary plus and binary addition operator: `+`. - Plus, - /// The unary negation and binary subtraction operator: `-`. - Minus, - /// The division operator and fraction operator in math: `/`. - Slash, - /// The superscript operator in math: `^`. - Hat, - /// The field access and method call operator: `.`. - Dot, - /// The assignment operator: `=`. - Eq, - /// The equality operator: `==`. - EqEq, - /// The inequality operator: `!=`. - ExclEq, - /// The less-than operator: `<`. - Lt, - /// The less-than or equal operator: `<=`. - LtEq, - /// The greater-than operator: `>`. - Gt, - /// The greater-than or equal operator: `>=`. - GtEq, - /// The add-assign operator: `+=`. - PlusEq, - /// The subtract-assign operator: `-=`. - HyphEq, - /// The multiply-assign operator: `*=`. - StarEq, - /// The divide-assign operator: `/=`. - SlashEq, - /// The spread operator: `..`. - Dots, - /// An arrow between a closure's parameters and body: `=>`. - Arrow, - /// A root: `√`, `∛` or `∜`. - Root, - - /// The `not` operator. - Not, - /// The `and` operator. - And, - /// The `or` operator. - Or, - /// The `none` literal. - None, - /// The `auto` literal. - Auto, - /// The `let` keyword. - Let, - /// The `set` keyword. - Set, - /// The `show` keyword. - Show, - /// The `if` keyword. - If, - /// The `else` keyword. - Else, - /// The `for` keyword. - For, - /// The `in` keyword. - In, - /// The `while` keyword. - While, - /// The `break` keyword. - Break, - /// The `continue` keyword. - Continue, - /// The `return` keyword. - Return, - /// The `import` keyword. - Import, - /// The `include` keyword. - Include, - /// The `as` keyword. - As, - - /// Code. - Code, - /// An identifier: `it`. - Ident, - /// A boolean: `true`, `false`. - Bool, - /// An integer: `120`. - Int, - /// A floating-point number: `1.2`, `10e-4`. - Float, - /// A numeric value with a unit: `12pt`, `3cm`, `2em`, `90deg`, `50%`. - Numeric, - /// A quoted string: `"..."`. - Str, - /// A code block: `{ let x = 1; x + 2 }`. - CodeBlock, - /// A content block: `[*Hi* there!]`. - ContentBlock, - /// A grouped expression: `(1 + 2)`. - Parenthesized, - /// An array: `(1, "hi", 12cm)`. - Array, - /// A dictionary: `(thickness: 3pt, pattern: dashed)`. - Dict, - /// A named pair: `thickness: 3pt`. - Named, - /// A keyed pair: `"spacy key": true`. - Keyed, - /// A unary operation: `-x`. - Unary, - /// A binary operation: `a + b`. - Binary, - /// A field access: `properties.age`. - FieldAccess, - /// An invocation of a function or method: `f(x, y)`. - FuncCall, - /// A function call's argument list: `(12pt, y)`. - Args, - /// Spread arguments or an argument sink: `..x`. - Spread, - /// A closure: `(x, y) => z`. - Closure, - /// A closure's parameters: `(x, y)`. - Params, - /// A let binding: `let x = 1`. - LetBinding, - /// A set rule: `set text(...)`. - SetRule, - /// A show rule: `show heading: it => emph(it.body)`. - ShowRule, - /// An if-else conditional: `if x { y } else { z }`. - Conditional, - /// A while loop: `while x { y }`. - WhileLoop, - /// A for loop: `for x in y { z }`. - ForLoop, - /// A module import: `import "utils.typ": a, b, c`. - ModuleImport, - /// Items to import from a module: `a, b, c`. - ImportItems, - /// A module include: `include "chapter1.typ"`. - ModuleInclude, - /// A break from a loop: `break`. - LoopBreak, - /// A continue in a loop: `continue`. - LoopContinue, - /// A return from a function: `return`, `return x + 1`. - FuncReturn, - /// A destructuring pattern: `(x, _, ..y)`. - Destructuring, - /// A destructuring assignment expression: `(x, y) = (1, 2)`. - DestructAssignment, - - /// A line comment: `// ...`. - LineComment, - /// A block comment: `/* ... */`. - BlockComment, - /// An invalid sequence of characters. - Error, - /// The end of the file. - Eof, -} - -impl SyntaxKind { - /// Is this a bracket, brace, or parenthesis? - pub fn is_grouping(self) -> bool { - matches!( - self, - Self::LeftBracket - | Self::LeftBrace - | Self::LeftParen - | Self::RightBracket - | Self::RightBrace - | Self::RightParen - ) - } - - /// Does this node terminate a preceding expression? - pub fn is_terminator(self) -> bool { - matches!( - self, - Self::Eof - | Self::Semicolon - | Self::RightBrace - | Self::RightParen - | Self::RightBracket - ) - } - - /// Is this a code or content block. - pub fn is_block(self) -> bool { - matches!(self, Self::CodeBlock | Self::ContentBlock) - } - - /// Does this node need termination through a semicolon or linebreak? - pub fn is_stmt(self) -> bool { - matches!( - self, - Self::LetBinding - | Self::SetRule - | Self::ShowRule - | Self::ModuleImport - | Self::ModuleInclude - ) - } - - /// Whether this kind of node is automatically skipped by the parser in - /// code and math mode. - pub fn is_trivia(self) -> bool { - matches!( - self, - Self::Space | Self::Parbreak | Self::LineComment | Self::BlockComment - ) - } - - /// Whether this is an error. - pub fn is_error(self) -> bool { - self == Self::Error - } - - /// A human-readable name for the kind. - pub fn name(self) -> &'static str { - match self { - Self::Markup => "markup", - Self::Text => "text", - Self::Space => "space", - Self::Linebreak => "line break", - Self::Parbreak => "paragraph break", - Self::Escape => "escape sequence", - Self::Shorthand => "shorthand", - Self::SmartQuote => "smart quote", - Self::Strong => "strong content", - Self::Emph => "emphasized content", - Self::Raw => "raw block", - Self::Link => "link", - Self::Label => "label", - Self::Ref => "reference", - Self::RefMarker => "reference marker", - Self::Heading => "heading", - Self::HeadingMarker => "heading marker", - Self::ListItem => "list item", - Self::ListMarker => "list marker", - Self::EnumItem => "enum item", - Self::EnumMarker => "enum marker", - Self::TermItem => "term list item", - Self::TermMarker => "term marker", - Self::Equation => "equation", - Self::Math => "math", - Self::MathIdent => "math identifier", - Self::MathAlignPoint => "math alignment point", - Self::MathDelimited => "delimited math", - Self::MathAttach => "math attachments", - Self::MathFrac => "math fraction", - Self::MathRoot => "math root", - Self::Hashtag => "hashtag", - Self::LeftBrace => "opening brace", - Self::RightBrace => "closing brace", - Self::LeftBracket => "opening bracket", - Self::RightBracket => "closing bracket", - Self::LeftParen => "opening paren", - Self::RightParen => "closing paren", - Self::Comma => "comma", - Self::Semicolon => "semicolon", - Self::Colon => "colon", - Self::Star => "star", - Self::Underscore => "underscore", - Self::Dollar => "dollar sign", - Self::Plus => "plus", - Self::Minus => "minus", - Self::Slash => "slash", - Self::Hat => "hat", - Self::Dot => "dot", - Self::Eq => "equals sign", - Self::EqEq => "equality operator", - Self::ExclEq => "inequality operator", - Self::Lt => "less-than operator", - Self::LtEq => "less-than or equal operator", - Self::Gt => "greater-than operator", - Self::GtEq => "greater-than or equal operator", - Self::PlusEq => "add-assign operator", - Self::HyphEq => "subtract-assign operator", - Self::StarEq => "multiply-assign operator", - Self::SlashEq => "divide-assign operator", - Self::Dots => "dots", - Self::Arrow => "arrow", - Self::Root => "root", - Self::Not => "operator `not`", - Self::And => "operator `and`", - Self::Or => "operator `or`", - Self::None => "`none`", - Self::Auto => "`auto`", - Self::Let => "keyword `let`", - Self::Set => "keyword `set`", - Self::Show => "keyword `show`", - Self::If => "keyword `if`", - Self::Else => "keyword `else`", - Self::For => "keyword `for`", - Self::In => "keyword `in`", - Self::While => "keyword `while`", - Self::Break => "keyword `break`", - Self::Continue => "keyword `continue`", - Self::Return => "keyword `return`", - Self::Import => "keyword `import`", - Self::Include => "keyword `include`", - Self::As => "keyword `as`", - Self::Code => "code", - Self::Ident => "identifier", - Self::Bool => "boolean", - Self::Int => "integer", - Self::Float => "float", - Self::Numeric => "numeric value", - Self::Str => "string", - Self::CodeBlock => "code block", - Self::ContentBlock => "content block", - Self::Parenthesized => "group", - Self::Array => "array", - Self::Dict => "dictionary", - Self::Named => "named pair", - Self::Keyed => "keyed pair", - Self::Unary => "unary expression", - Self::Binary => "binary expression", - Self::FieldAccess => "field access", - Self::FuncCall => "function call", - Self::Args => "call arguments", - Self::Spread => "spread", - Self::Closure => "closure", - Self::Params => "closure parameters", - Self::LetBinding => "`let` expression", - Self::SetRule => "`set` expression", - Self::ShowRule => "`show` expression", - Self::Conditional => "`if` expression", - Self::WhileLoop => "while-loop expression", - Self::ForLoop => "for-loop expression", - Self::ModuleImport => "`import` expression", - Self::ImportItems => "import items", - Self::ModuleInclude => "`include` expression", - Self::LoopBreak => "`break` expression", - Self::LoopContinue => "`continue` expression", - Self::FuncReturn => "`return` expression", - Self::Destructuring => "destructuring pattern", - Self::DestructAssignment => "destructuring assignment expression", - Self::LineComment => "line comment", - Self::BlockComment => "block comment", - Self::Error => "syntax error", - Self::Eof => "end of file", - } - } -} diff --git a/src/syntax/lexer.rs b/src/syntax/lexer.rs deleted file mode 100644 index d95b5b7b..00000000 --- a/src/syntax/lexer.rs +++ /dev/null @@ -1,738 +0,0 @@ -use ecow::{eco_format, EcoString}; -use unicode_ident::{is_xid_continue, is_xid_start}; -use unicode_segmentation::UnicodeSegmentation; -use unscanny::Scanner; - -use super::SyntaxKind; - -/// Splits up a string of source code into tokens. -#[derive(Clone)] -pub(super) struct Lexer<'s> { - /// The underlying scanner. - s: Scanner<'s>, - /// The mode the lexer is in. This determines which kinds of tokens it - /// produces. - mode: LexMode, - /// Whether the last token contained a newline. - newline: bool, - /// An error for the last token. - error: Option, -} - -/// What kind of tokens to emit. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub(super) enum LexMode { - /// Text and markup. - Markup, - /// Math atoms, operators, etc. - Math, - /// Keywords, literals and operators. - Code, -} - -impl<'s> Lexer<'s> { - /// Create a new lexer with the given mode and a prefix to offset column - /// calculations. - pub fn new(text: &'s str, mode: LexMode) -> Self { - Self { - s: Scanner::new(text), - mode, - newline: false, - error: None, - } - } - - /// Get the current lexing mode. - pub fn mode(&self) -> LexMode { - self.mode - } - - /// Change the lexing mode. - pub fn set_mode(&mut self, mode: LexMode) { - self.mode = mode; - } - - /// The index in the string at which the last token ends and next token - /// will start. - pub fn cursor(&self) -> usize { - self.s.cursor() - } - - /// Jump to the given index in the string. - pub fn jump(&mut self, index: usize) { - self.s.jump(index); - } - - /// Whether the last token contained a newline. - pub fn newline(&self) -> bool { - self.newline - } - - /// Take out the last error, if any. - pub fn take_error(&mut self) -> Option { - self.error.take() - } -} - -impl Lexer<'_> { - /// Construct a full-positioned syntax error. - fn error(&mut self, message: impl Into) -> SyntaxKind { - self.error = Some(message.into()); - SyntaxKind::Error - } -} - -/// Shared. -impl Lexer<'_> { - pub fn next(&mut self) -> SyntaxKind { - self.newline = false; - self.error = None; - let start = self.s.cursor(); - match self.s.eat() { - Some(c) if c.is_whitespace() => self.whitespace(start, c), - Some('/') if self.s.eat_if('/') => self.line_comment(), - Some('/') if self.s.eat_if('*') => self.block_comment(), - Some('*') if self.s.eat_if('/') => { - self.error("unexpected end of block comment") - } - - Some(c) => match self.mode { - LexMode::Markup => self.markup(start, c), - LexMode::Math => self.math(start, c), - LexMode::Code => self.code(start, c), - }, - - None => SyntaxKind::Eof, - } - } - - fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind { - let more = self.s.eat_while(char::is_whitespace); - let newlines = match c { - ' ' if more.is_empty() => 0, - _ => count_newlines(self.s.from(start)), - }; - - self.newline = newlines > 0; - if self.mode == LexMode::Markup && newlines >= 2 { - SyntaxKind::Parbreak - } else { - SyntaxKind::Space - } - } - - fn line_comment(&mut self) -> SyntaxKind { - self.s.eat_until(is_newline); - SyntaxKind::LineComment - } - - fn block_comment(&mut self) -> SyntaxKind { - let mut state = '_'; - let mut depth = 1; - - // Find the first `*/` that does not correspond to a nested `/*`. - while let Some(c) = self.s.eat() { - state = match (state, c) { - ('*', '/') => { - depth -= 1; - if depth == 0 { - break; - } - '_' - } - ('/', '*') => { - depth += 1; - '_' - } - ('/', '/') => { - self.line_comment(); - '_' - } - _ => c, - } - } - - SyntaxKind::BlockComment - } -} - -/// Markup. -impl Lexer<'_> { - fn markup(&mut self, start: usize, c: char) -> SyntaxKind { - match c { - '\\' => self.backslash(), - '`' => self.raw(), - 'h' if self.s.eat_if("ttp://") => self.link(), - 'h' if self.s.eat_if("ttps://") => self.link(), - '<' if self.s.at(is_id_continue) => self.label(), - '@' => self.ref_marker(), - - '.' if self.s.eat_if("..") => SyntaxKind::Shorthand, - '-' if self.s.eat_if("--") => SyntaxKind::Shorthand, - '-' if self.s.eat_if('-') => SyntaxKind::Shorthand, - '-' if self.s.eat_if('?') => SyntaxKind::Shorthand, - '*' if !self.in_word() => SyntaxKind::Star, - '_' if !self.in_word() => SyntaxKind::Underscore, - - '#' => SyntaxKind::Hashtag, - '[' => SyntaxKind::LeftBracket, - ']' => SyntaxKind::RightBracket, - '\'' => SyntaxKind::SmartQuote, - '"' => SyntaxKind::SmartQuote, - '$' => SyntaxKind::Dollar, - '~' => SyntaxKind::Shorthand, - ':' => SyntaxKind::Colon, - '=' => { - self.s.eat_while('='); - if self.space_or_end() { - SyntaxKind::HeadingMarker - } else { - self.text() - } - } - '-' if self.space_or_end() => SyntaxKind::ListMarker, - '+' if self.space_or_end() => SyntaxKind::EnumMarker, - '/' if self.space_or_end() => SyntaxKind::TermMarker, - '0'..='9' => self.numbering(start), - - _ => self.text(), - } - } - - fn backslash(&mut self) -> SyntaxKind { - if self.s.eat_if("u{") { - let hex = self.s.eat_while(char::is_ascii_alphanumeric); - if !self.s.eat_if('}') { - return self.error("unclosed Unicode escape sequence"); - } - - if u32::from_str_radix(hex, 16) - .ok() - .and_then(std::char::from_u32) - .is_none() - { - return self.error(eco_format!("invalid Unicode codepoint: {}", hex)); - } - - return SyntaxKind::Escape; - } - - if self.s.done() || self.s.at(char::is_whitespace) { - SyntaxKind::Linebreak - } else { - self.s.eat(); - SyntaxKind::Escape - } - } - - fn raw(&mut self) -> SyntaxKind { - let mut backticks = 1; - while self.s.eat_if('`') { - backticks += 1; - } - - if backticks == 2 { - return SyntaxKind::Raw; - } - - let mut found = 0; - while found < backticks { - match self.s.eat() { - Some('`') => found += 1, - Some(_) => found = 0, - None => break, - } - } - - if found != backticks { - return self.error("unclosed raw text"); - } - - SyntaxKind::Raw - } - - fn link(&mut self) -> SyntaxKind { - let mut brackets = Vec::new(); - - #[rustfmt::skip] - self.s.eat_while(|c: char| { - match c { - | '0' ..= '9' - | 'a' ..= 'z' - | 'A' ..= 'Z' - | '!' | '#' | '$' | '%' | '&' | '*' | '+' - | ',' | '-' | '.' | '/' | ':' | ';' | '=' - | '?' | '@' | '_' | '~' | '\'' => true, - '[' => { - brackets.push(SyntaxKind::LeftBracket); - true - } - '(' => { - brackets.push(SyntaxKind::LeftParen); - true - } - ']' => brackets.pop() == Some(SyntaxKind::LeftBracket), - ')' => brackets.pop() == Some(SyntaxKind::LeftParen), - _ => false, - } - }); - - if !brackets.is_empty() { - return self.error( - "automatic links cannot contain unbalanced brackets, \ - use the `link` function instead", - ); - } - - // Don't include the trailing characters likely to be part of text. - while matches!(self.s.scout(-1), Some('!' | ',' | '.' | ':' | ';' | '?' | '\'')) { - self.s.uneat(); - } - - SyntaxKind::Link - } - - fn numbering(&mut self, start: usize) -> SyntaxKind { - self.s.eat_while(char::is_ascii_digit); - - let read = self.s.from(start); - if self.s.eat_if('.') && self.space_or_end() && read.parse::().is_ok() { - return SyntaxKind::EnumMarker; - } - - self.text() - } - - fn ref_marker(&mut self) -> SyntaxKind { - self.s.eat_while(|c| is_id_continue(c) || matches!(c, ':' | '.')); - - // Don't include the trailing characters likely to be part of text. - while matches!(self.s.scout(-1), Some('.' | ':')) { - self.s.uneat(); - } - - SyntaxKind::RefMarker - } - - fn label(&mut self) -> SyntaxKind { - let label = self.s.eat_while(|c| is_id_continue(c) || matches!(c, ':' | '.')); - if label.is_empty() { - return self.error("label cannot be empty"); - } - - if !self.s.eat_if('>') { - return self.error("unclosed label"); - } - - SyntaxKind::Label - } - - fn text(&mut self) -> SyntaxKind { - macro_rules! table { - ($(|$c:literal)*) => { - static TABLE: [bool; 128] = { - let mut t = [false; 128]; - $(t[$c as usize] = true;)* - t - }; - }; - } - - table! { - | ' ' | '\t' | '\n' | '\x0b' | '\x0c' | '\r' | '\\' | '/' - | '[' | ']' | '{' | '}' | '~' | '-' | '.' | '\'' | '"' - | '*' | '_' | ':' | 'h' | '`' | '$' | '<' | '>' | '@' | '#' - }; - - loop { - self.s.eat_until(|c: char| { - TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace()) - }); - - // Continue with the same text node if the thing would become text - // anyway. - let mut s = self.s; - match s.eat() { - Some(' ') if s.at(char::is_alphanumeric) => {} - Some('/') if !s.at(['/', '*']) => {} - Some('-') if !s.at(['-', '?']) => {} - Some('.') if !s.at("..") => {} - Some('h') if !s.at("ttp://") && !s.at("ttps://") => {} - Some('@') if !s.at(is_id_start) => {} - _ => break, - } - - self.s = s; - } - - SyntaxKind::Text - } - - fn in_word(&self) -> bool { - let alphanum = |c: Option| c.map_or(false, |c| c.is_alphanumeric()); - let prev = self.s.scout(-2); - let next = self.s.peek(); - alphanum(prev) && alphanum(next) - } - - fn space_or_end(&self) -> bool { - self.s.done() || self.s.at(char::is_whitespace) - } -} - -/// Math. -impl Lexer<'_> { - fn math(&mut self, start: usize, c: char) -> SyntaxKind { - match c { - '\\' => self.backslash(), - '"' => self.string(), - - '-' if self.s.eat_if(">>") => SyntaxKind::Shorthand, - '-' if self.s.eat_if('>') => SyntaxKind::Shorthand, - '-' if self.s.eat_if("->") => SyntaxKind::Shorthand, - ':' if self.s.eat_if('=') => SyntaxKind::Shorthand, - ':' if self.s.eat_if(":=") => SyntaxKind::Shorthand, - '!' if self.s.eat_if('=') => SyntaxKind::Shorthand, - '.' if self.s.eat_if("..") => SyntaxKind::Shorthand, - '[' if self.s.eat_if('|') => SyntaxKind::Shorthand, - '<' if self.s.eat_if("==>") => SyntaxKind::Shorthand, - '<' if self.s.eat_if("-->") => SyntaxKind::Shorthand, - '<' if self.s.eat_if("--") => SyntaxKind::Shorthand, - '<' if self.s.eat_if("-<") => SyntaxKind::Shorthand, - '<' if self.s.eat_if("->") => SyntaxKind::Shorthand, - '<' if self.s.eat_if("<-") => SyntaxKind::Shorthand, - '<' if self.s.eat_if("<<") => SyntaxKind::Shorthand, - '<' if self.s.eat_if("=>") => SyntaxKind::Shorthand, - '<' if self.s.eat_if("==") => SyntaxKind::Shorthand, - '<' if self.s.eat_if("~~") => SyntaxKind::Shorthand, - '<' if self.s.eat_if('=') => SyntaxKind::Shorthand, - '<' if self.s.eat_if('<') => SyntaxKind::Shorthand, - '<' if self.s.eat_if('-') => SyntaxKind::Shorthand, - '<' if self.s.eat_if('~') => SyntaxKind::Shorthand, - '>' if self.s.eat_if("->") => SyntaxKind::Shorthand, - '>' if self.s.eat_if(">>") => SyntaxKind::Shorthand, - '=' if self.s.eat_if("=>") => SyntaxKind::Shorthand, - '=' if self.s.eat_if('>') => SyntaxKind::Shorthand, - '=' if self.s.eat_if(':') => SyntaxKind::Shorthand, - '>' if self.s.eat_if('=') => SyntaxKind::Shorthand, - '>' if self.s.eat_if('>') => SyntaxKind::Shorthand, - '|' if self.s.eat_if("->") => SyntaxKind::Shorthand, - '|' if self.s.eat_if("=>") => SyntaxKind::Shorthand, - '|' if self.s.eat_if(']') => SyntaxKind::Shorthand, - '|' if self.s.eat_if('|') => SyntaxKind::Shorthand, - '~' if self.s.eat_if("~>") => SyntaxKind::Shorthand, - '~' if self.s.eat_if('>') => SyntaxKind::Shorthand, - '*' | '\'' | '-' => SyntaxKind::Shorthand, - - '#' => SyntaxKind::Hashtag, - '_' => SyntaxKind::Underscore, - '$' => SyntaxKind::Dollar, - '/' => SyntaxKind::Slash, - '^' => SyntaxKind::Hat, - '&' => SyntaxKind::MathAlignPoint, - '√' | '∛' | '∜' => SyntaxKind::Root, - - // Identifiers. - c if is_math_id_start(c) && self.s.at(is_math_id_continue) => { - self.s.eat_while(is_math_id_continue); - SyntaxKind::MathIdent - } - - // Other math atoms. - _ => self.math_text(start, c), - } - } - - fn math_text(&mut self, start: usize, c: char) -> SyntaxKind { - // Keep numbers and grapheme clusters together. - if c.is_numeric() { - self.s.eat_while(char::is_numeric); - let mut s = self.s; - if s.eat_if('.') && !s.eat_while(char::is_numeric).is_empty() { - self.s = s; - } - } else { - let len = self - .s - .get(start..self.s.string().len()) - .graphemes(true) - .next() - .map_or(0, str::len); - self.s.jump(start + len); - } - SyntaxKind::Text - } -} - -/// Code. -impl Lexer<'_> { - fn code(&mut self, start: usize, c: char) -> SyntaxKind { - match c { - '`' => self.raw(), - '<' if self.s.at(is_id_continue) => self.label(), - '0'..='9' => self.number(start, c), - '.' if self.s.at(char::is_ascii_digit) => self.number(start, c), - '"' => self.string(), - - '=' if self.s.eat_if('=') => SyntaxKind::EqEq, - '!' if self.s.eat_if('=') => SyntaxKind::ExclEq, - '<' if self.s.eat_if('=') => SyntaxKind::LtEq, - '>' if self.s.eat_if('=') => SyntaxKind::GtEq, - '+' if self.s.eat_if('=') => SyntaxKind::PlusEq, - '-' if self.s.eat_if('=') => SyntaxKind::HyphEq, - '*' if self.s.eat_if('=') => SyntaxKind::StarEq, - '/' if self.s.eat_if('=') => SyntaxKind::SlashEq, - '.' if self.s.eat_if('.') => SyntaxKind::Dots, - '=' if self.s.eat_if('>') => SyntaxKind::Arrow, - - '{' => SyntaxKind::LeftBrace, - '}' => SyntaxKind::RightBrace, - '[' => SyntaxKind::LeftBracket, - ']' => SyntaxKind::RightBracket, - '(' => SyntaxKind::LeftParen, - ')' => SyntaxKind::RightParen, - '$' => SyntaxKind::Dollar, - ',' => SyntaxKind::Comma, - ';' => SyntaxKind::Semicolon, - ':' => SyntaxKind::Colon, - '.' => SyntaxKind::Dot, - '+' => SyntaxKind::Plus, - '-' => SyntaxKind::Minus, - '*' => SyntaxKind::Star, - '/' => SyntaxKind::Slash, - '=' => SyntaxKind::Eq, - '<' => SyntaxKind::Lt, - '>' => SyntaxKind::Gt, - - c if is_id_start(c) => self.ident(start), - - c => self.error(eco_format!("the character `{c}` is not valid in code")), - } - } - - fn ident(&mut self, start: usize) -> SyntaxKind { - self.s.eat_while(is_id_continue); - let ident = self.s.from(start); - - let prev = self.s.get(0..start); - if !prev.ends_with(['.', '@']) || prev.ends_with("..") { - if let Some(keyword) = keyword(ident) { - return keyword; - } - } - - if ident == "_" { - SyntaxKind::Underscore - } else { - SyntaxKind::Ident - } - } - - fn number(&mut self, mut start: usize, c: char) -> SyntaxKind { - // Handle alternative integer bases. - let mut base = 10; - if c == '0' { - if self.s.eat_if('b') { - base = 2; - } else if self.s.eat_if('o') { - base = 8; - } else if self.s.eat_if('x') { - base = 16; - } - if base != 10 { - start = self.s.cursor(); - } - } - - // Read the first part (integer or fractional depending on `first`). - self.s.eat_while(if base == 16 { - char::is_ascii_alphanumeric - } else { - char::is_ascii_digit - }); - - // Read the fractional part if not already done. - // Make sure not to confuse a range for the decimal separator. - if c != '.' - && !self.s.at("..") - && !self.s.scout(1).map_or(false, is_id_start) - && self.s.eat_if('.') - && base == 10 - { - self.s.eat_while(char::is_ascii_digit); - } - - // Read the exponent. - if !self.s.at("em") && self.s.eat_if(['e', 'E']) && base == 10 { - self.s.eat_if(['+', '-']); - self.s.eat_while(char::is_ascii_digit); - } - - // Read the suffix. - let suffix_start = self.s.cursor(); - if !self.s.eat_if('%') { - self.s.eat_while(char::is_ascii_alphanumeric); - } - - let number = self.s.get(start..suffix_start); - let suffix = self.s.from(suffix_start); - - let kind = if i64::from_str_radix(number, base).is_ok() { - SyntaxKind::Int - } else if base == 10 && number.parse::().is_ok() { - SyntaxKind::Float - } else { - return self.error(match base { - 2 => eco_format!("invalid binary number: 0b{}", number), - 8 => eco_format!("invalid octal number: 0o{}", number), - 16 => eco_format!("invalid hexadecimal number: 0x{}", number), - _ => eco_format!("invalid number: {}", number), - }); - }; - - if suffix.is_empty() { - return kind; - } - - if !matches!( - suffix, - "pt" | "mm" | "cm" | "in" | "deg" | "rad" | "em" | "fr" | "%" - ) { - return self.error(eco_format!("invalid number suffix: {}", suffix)); - } - - SyntaxKind::Numeric - } - - fn string(&mut self) -> SyntaxKind { - let mut escaped = false; - self.s.eat_until(|c| { - let stop = c == '"' && !escaped; - escaped = c == '\\' && !escaped; - stop - }); - - if !self.s.eat_if('"') { - return self.error("unclosed string"); - } - - SyntaxKind::Str - } -} - -/// Try to parse an identifier into a keyword. -fn keyword(ident: &str) -> Option { - Some(match ident { - "none" => SyntaxKind::None, - "auto" => SyntaxKind::Auto, - "true" => SyntaxKind::Bool, - "false" => SyntaxKind::Bool, - "not" => SyntaxKind::Not, - "and" => SyntaxKind::And, - "or" => SyntaxKind::Or, - "let" => SyntaxKind::Let, - "set" => SyntaxKind::Set, - "show" => SyntaxKind::Show, - "if" => SyntaxKind::If, - "else" => SyntaxKind::Else, - "for" => SyntaxKind::For, - "in" => SyntaxKind::In, - "while" => SyntaxKind::While, - "break" => SyntaxKind::Break, - "continue" => SyntaxKind::Continue, - "return" => SyntaxKind::Return, - "import" => SyntaxKind::Import, - "include" => SyntaxKind::Include, - "as" => SyntaxKind::As, - _ => return None, - }) -} - -/// Whether this character denotes a newline. -#[inline] -pub fn is_newline(character: char) -> bool { - matches!( - character, - // Line Feed, Vertical Tab, Form Feed, Carriage Return. - '\n' | '\x0B' | '\x0C' | '\r' | - // Next Line, Line Separator, Paragraph Separator. - '\u{0085}' | '\u{2028}' | '\u{2029}' - ) -} - -/// Split text at newlines. -pub(super) fn split_newlines(text: &str) -> Vec<&str> { - let mut s = Scanner::new(text); - let mut lines = Vec::new(); - let mut start = 0; - let mut end = 0; - - while let Some(c) = s.eat() { - if is_newline(c) { - if c == '\r' { - s.eat_if('\n'); - } - - lines.push(&text[start..end]); - start = s.cursor(); - } - end = s.cursor(); - } - - lines.push(&text[start..]); - lines -} - -/// Count the number of newlines in text. -fn count_newlines(text: &str) -> usize { - let mut newlines = 0; - let mut s = Scanner::new(text); - while let Some(c) = s.eat() { - if is_newline(c) { - if c == '\r' { - s.eat_if('\n'); - } - newlines += 1; - } - } - newlines -} - -/// Whether a string is a valid Typst identifier. -/// -/// In addition to what is specified in the [Unicode Standard][uax31], we allow: -/// - `_` as a starting character, -/// - `_` and `-` as continuing characters. -/// -/// [uax31]: http://www.unicode.org/reports/tr31/ -#[inline] -pub fn is_ident(string: &str) -> bool { - let mut chars = string.chars(); - chars - .next() - .map_or(false, |c| is_id_start(c) && chars.all(is_id_continue)) -} - -/// Whether a character can start an identifier. -#[inline] -pub(crate) fn is_id_start(c: char) -> bool { - is_xid_start(c) || c == '_' -} - -/// Whether a character can continue an identifier. -#[inline] -pub(crate) fn is_id_continue(c: char) -> bool { - is_xid_continue(c) || c == '_' || c == '-' -} - -/// Whether a character can start an identifier in math. -#[inline] -fn is_math_id_start(c: char) -> bool { - is_xid_start(c) -} - -/// Whether a character can continue an identifier in math. -#[inline] -fn is_math_id_continue(c: char) -> bool { - is_xid_continue(c) && c != '_' -} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs deleted file mode 100644 index 1ce1e4c0..00000000 --- a/src/syntax/mod.rs +++ /dev/null @@ -1,23 +0,0 @@ -//! Syntax definition, parsing, and highlighting. - -pub mod ast; - -mod kind; -mod lexer; -mod node; -mod parser; -mod reparser; -mod source; -mod span; - -pub use self::kind::SyntaxKind; -pub use self::lexer::{is_ident, is_newline}; -pub use self::node::{LinkedChildren, LinkedNode, SyntaxNode}; -pub use self::parser::{parse, parse_code}; -pub use self::source::Source; -pub use self::span::{Span, Spanned}; - -pub(crate) use self::lexer::{is_id_continue, is_id_start}; - -use self::lexer::{split_newlines, LexMode, Lexer}; -use self::parser::{reparse_block, reparse_markup}; diff --git a/src/syntax/node.rs b/src/syntax/node.rs deleted file mode 100644 index 6a66416d..00000000 --- a/src/syntax/node.rs +++ /dev/null @@ -1,889 +0,0 @@ -use std::fmt::{self, Debug, Display, Formatter}; -use std::ops::{Deref, Range}; -use std::rc::Rc; -use std::sync::Arc; - -use ecow::EcoString; - -use super::ast::AstNode; -use super::{Span, SyntaxKind}; -use crate::diag::SourceError; -use crate::file::FileId; - -/// A node in the untyped syntax tree. -#[derive(Clone, Eq, PartialEq, Hash)] -pub struct SyntaxNode(Repr); - -/// The three internal representations. -#[derive(Clone, Eq, PartialEq, Hash)] -enum Repr { - /// A leaf node. - Leaf(LeafNode), - /// A reference-counted inner node. - Inner(Arc), - /// An error node. - Error(Arc), -} - -impl SyntaxNode { - /// Create a new leaf node. - pub fn leaf(kind: SyntaxKind, text: impl Into) -> Self { - Self(Repr::Leaf(LeafNode::new(kind, text))) - } - - /// Create a new inner node with children. - pub fn inner(kind: SyntaxKind, children: Vec) -> Self { - Self(Repr::Inner(Arc::new(InnerNode::new(kind, children)))) - } - - /// Create a new error node. - pub fn error(message: impl Into, text: impl Into) -> Self { - Self(Repr::Error(Arc::new(ErrorNode::new(message, text)))) - } - - /// The type of the node. - pub fn kind(&self) -> SyntaxKind { - match &self.0 { - Repr::Leaf(leaf) => leaf.kind, - Repr::Inner(inner) => inner.kind, - Repr::Error(_) => SyntaxKind::Error, - } - } - - /// Return `true` if the length is 0. - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// The byte length of the node in the source text. - pub fn len(&self) -> usize { - match &self.0 { - Repr::Leaf(leaf) => leaf.len(), - Repr::Inner(inner) => inner.len, - Repr::Error(error) => error.len(), - } - } - - /// The span of the node. - pub fn span(&self) -> Span { - match &self.0 { - Repr::Leaf(leaf) => leaf.span, - Repr::Inner(inner) => inner.span, - Repr::Error(error) => error.span, - } - } - - /// The text of the node if it is a leaf node. - /// - /// Returns the empty string if this is an inner node. - pub fn text(&self) -> &EcoString { - static EMPTY: EcoString = EcoString::new(); - match &self.0 { - Repr::Leaf(leaf) => &leaf.text, - Repr::Error(error) => &error.text, - Repr::Inner(_) => &EMPTY, - } - } - - /// Extract the text from the node. - /// - /// Builds the string if this is an inner node. - pub fn into_text(self) -> EcoString { - match self.0 { - Repr::Leaf(leaf) => leaf.text, - Repr::Error(error) => error.text.clone(), - Repr::Inner(node) => { - node.children.iter().cloned().map(Self::into_text).collect() - } - } - } - - /// The node's children. - pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> { - match &self.0 { - Repr::Leaf(_) | Repr::Error(_) => [].iter(), - Repr::Inner(inner) => inner.children.iter(), - } - } - - /// Whether the node can be cast to the given AST node. - pub fn is(&self) -> bool { - self.cast::().is_some() - } - - /// Try to convert the node to a typed AST node. - pub fn cast(&self) -> Option { - T::from_untyped(self) - } - - /// Cast the first child that can cast to the AST type `T`. - pub fn cast_first_match(&self) -> Option { - self.children().find_map(Self::cast) - } - - /// Cast the last child that can cast to the AST type `T`. - pub fn cast_last_match(&self) -> Option { - self.children().rev().find_map(Self::cast) - } - - /// Whether the node or its children contain an error. - pub fn erroneous(&self) -> bool { - match &self.0 { - Repr::Leaf(_) => false, - Repr::Inner(node) => node.erroneous, - Repr::Error(_) => true, - } - } - - /// The error messages for this node and its descendants. - pub fn errors(&self) -> Vec { - if !self.erroneous() { - return vec![]; - } - - if let Repr::Error(error) = &self.0 { - vec![SourceError::new(error.span, error.message.clone())] - } else { - self.children() - .filter(|node| node.erroneous()) - .flat_map(|node| node.errors()) - .collect() - } - } - - /// Set a synthetic span for the node and all its descendants. - pub fn synthesize(&mut self, span: Span) { - match &mut self.0 { - Repr::Leaf(leaf) => leaf.span = span, - Repr::Inner(inner) => Arc::make_mut(inner).synthesize(span), - Repr::Error(error) => Arc::make_mut(error).span = span, - } - } -} - -impl SyntaxNode { - /// Mark this node as erroneous. - pub(super) fn make_erroneous(&mut self) { - if let Repr::Inner(inner) = &mut self.0 { - Arc::make_mut(inner).erroneous = true; - } - } - - /// Convert the child to another kind. - #[track_caller] - pub(super) fn convert_to_kind(&mut self, kind: SyntaxKind) { - debug_assert!(!kind.is_error()); - match &mut self.0 { - Repr::Leaf(leaf) => leaf.kind = kind, - Repr::Inner(inner) => Arc::make_mut(inner).kind = kind, - Repr::Error(_) => panic!("cannot convert error"), - } - } - - /// Convert the child to an error. - pub(super) fn convert_to_error(&mut self, message: impl Into) { - let text = std::mem::take(self).into_text(); - *self = SyntaxNode::error(message, text); - } - - /// Assign spans to each node. - #[tracing::instrument(skip_all)] - pub(super) fn numberize( - &mut self, - id: FileId, - within: Range, - ) -> NumberingResult { - if within.start >= within.end { - return Err(Unnumberable); - } - - let mid = Span::new(id, (within.start + within.end) / 2); - match &mut self.0 { - Repr::Leaf(leaf) => leaf.span = mid, - Repr::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within)?, - Repr::Error(error) => Arc::make_mut(error).span = mid, - } - - Ok(()) - } - - /// Whether this is a leaf node. - pub(super) fn is_leaf(&self) -> bool { - matches!(self.0, Repr::Leaf(_)) - } - - /// The number of descendants, including the node itself. - pub(super) fn descendants(&self) -> usize { - match &self.0 { - Repr::Leaf(_) | Repr::Error(_) => 1, - Repr::Inner(inner) => inner.descendants, - } - } - - /// The node's children, mutably. - pub(super) fn children_mut(&mut self) -> &mut [SyntaxNode] { - match &mut self.0 { - Repr::Leaf(_) | Repr::Error(_) => &mut [], - Repr::Inner(inner) => &mut Arc::make_mut(inner).children, - } - } - - /// Replaces a range of children with a replacement. - /// - /// May have mutated the children if it returns `Err(_)`. - pub(super) fn replace_children( - &mut self, - range: Range, - replacement: Vec, - ) -> NumberingResult { - if let Repr::Inner(inner) = &mut self.0 { - Arc::make_mut(inner).replace_children(range, replacement)?; - } - Ok(()) - } - - /// Update this node after changes were made to one of its children. - pub(super) fn update_parent( - &mut self, - prev_len: usize, - new_len: usize, - prev_descendants: usize, - new_descendants: usize, - ) { - if let Repr::Inner(inner) = &mut self.0 { - Arc::make_mut(inner).update_parent( - prev_len, - new_len, - prev_descendants, - new_descendants, - ); - } - } - - /// The upper bound of assigned numbers in this subtree. - pub(super) fn upper(&self) -> u64 { - match &self.0 { - Repr::Inner(inner) => inner.upper, - Repr::Leaf(leaf) => leaf.span.number() + 1, - Repr::Error(error) => error.span.number() + 1, - } - } -} - -impl Debug for SyntaxNode { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - match &self.0 { - Repr::Inner(node) => node.fmt(f), - Repr::Leaf(node) => node.fmt(f), - Repr::Error(node) => node.fmt(f), - } - } -} - -impl Default for SyntaxNode { - fn default() -> Self { - Self::error("", "") - } -} - -/// A leaf node in the untyped syntax tree. -#[derive(Clone, Eq, PartialEq, Hash)] -struct LeafNode { - /// What kind of node this is (each kind would have its own struct in a - /// strongly typed AST). - kind: SyntaxKind, - /// The source text of the node. - text: EcoString, - /// The node's span. - span: Span, -} - -impl LeafNode { - /// Create a new leaf node. - #[track_caller] - fn new(kind: SyntaxKind, text: impl Into) -> Self { - debug_assert!(!kind.is_error()); - Self { kind, text: text.into(), span: Span::detached() } - } - - /// The byte length of the node in the source text. - fn len(&self) -> usize { - self.text.len() - } -} - -impl Debug for LeafNode { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{:?}: {:?}", self.kind, self.text) - } -} - -/// An inner node in the untyped syntax tree. -#[derive(Clone, Eq, PartialEq, Hash)] -struct InnerNode { - /// What kind of node this is (each kind would have its own struct in a - /// strongly typed AST). - kind: SyntaxKind, - /// The byte length of the node in the source. - len: usize, - /// The node's span. - span: Span, - /// The number of nodes in the whole subtree, including this node. - descendants: usize, - /// Whether this node or any of its children are erroneous. - erroneous: bool, - /// The upper bound of this node's numbering range. - upper: u64, - /// This node's children, losslessly make up this node. - children: Vec, -} - -impl InnerNode { - /// Create a new inner node with the given kind and children. - #[track_caller] - fn new(kind: SyntaxKind, children: Vec) -> Self { - debug_assert!(!kind.is_error()); - - let mut len = 0; - let mut descendants = 1; - let mut erroneous = false; - - for child in &children { - len += child.len(); - descendants += child.descendants(); - erroneous |= child.erroneous(); - } - - Self { - kind, - len, - span: Span::detached(), - descendants, - erroneous, - upper: 0, - children, - } - } - - /// Set a synthetic span for the node and all its descendants. - fn synthesize(&mut self, span: Span) { - self.span = span; - self.upper = span.number(); - for child in &mut self.children { - child.synthesize(span); - } - } - - /// Assign span numbers `within` an interval to this node's subtree or just - /// a `range` of its children. - fn numberize( - &mut self, - id: FileId, - range: Option>, - within: Range, - ) -> NumberingResult { - // Determine how many nodes we will number. - let descendants = match &range { - Some(range) if range.is_empty() => return Ok(()), - Some(range) => self.children[range.clone()] - .iter() - .map(SyntaxNode::descendants) - .sum::(), - None => self.descendants, - }; - - // Determine the distance between two neighbouring assigned numbers. If - // possible, we try to fit all numbers into the left half of `within` - // so that there is space for future insertions. - let space = within.end - within.start; - let mut stride = space / (2 * descendants as u64); - if stride == 0 { - stride = space / self.descendants as u64; - if stride == 0 { - return Err(Unnumberable); - } - } - - // Number the node itself. - let mut start = within.start; - if range.is_none() { - let end = start + stride; - self.span = Span::new(id, (start + end) / 2); - self.upper = within.end; - start = end; - } - - // Number the children. - let len = self.children.len(); - for child in &mut self.children[range.unwrap_or(0..len)] { - let end = start + child.descendants() as u64 * stride; - child.numberize(id, start..end)?; - start = end; - } - - Ok(()) - } - - /// Replaces a range of children with a replacement. - /// - /// May have mutated the children if it returns `Err(_)`. - fn replace_children( - &mut self, - mut range: Range, - replacement: Vec, - ) -> NumberingResult { - let superseded = &self.children[range.clone()]; - - // Compute the new byte length. - self.len = self.len + replacement.iter().map(SyntaxNode::len).sum::() - - superseded.iter().map(SyntaxNode::len).sum::(); - - // Compute the new number of descendants. - self.descendants = self.descendants - + replacement.iter().map(SyntaxNode::descendants).sum::() - - superseded.iter().map(SyntaxNode::descendants).sum::(); - - // Determine whether we're still erroneous after the replacement. That's - // the case if - // - any of the new nodes is erroneous, - // - or if we were erroneous before due to a non-superseded node. - self.erroneous = replacement.iter().any(SyntaxNode::erroneous) - || (self.erroneous - && (self.children[..range.start].iter().any(SyntaxNode::erroneous)) - || self.children[range.end..].iter().any(SyntaxNode::erroneous)); - - // Perform the replacement. - let replacement_count = replacement.len(); - self.children.splice(range.clone(), replacement); - range.end = range.start + replacement_count; - - // Renumber the new children. Retries until it works, taking - // exponentially more children into account. - let mut left = 0; - let mut right = 0; - let max_left = range.start; - let max_right = self.children.len() - range.end; - loop { - let renumber = range.start - left..range.end + right; - - // The minimum assignable number is either - // - the upper bound of the node right before the to-be-renumbered - // children, - // - or this inner node's span number plus one if renumbering starts - // at the first child. - let start_number = renumber - .start - .checked_sub(1) - .and_then(|i| self.children.get(i)) - .map_or(self.span.number() + 1, |child| child.upper()); - - // The upper bound for renumbering is either - // - the span number of the first child after the to-be-renumbered - // children, - // - or this node's upper bound if renumbering ends behind the last - // child. - let end_number = self - .children - .get(renumber.end) - .map_or(self.upper, |next| next.span().number()); - - // Try to renumber. - let within = start_number..end_number; - let id = self.span.id(); - if self.numberize(id, Some(renumber), within).is_ok() { - return Ok(()); - } - - // If it didn't even work with all children, we give up. - if left == max_left && right == max_right { - return Err(Unnumberable); - } - - // Exponential expansion to both sides. - left = (left + 1).next_power_of_two().min(max_left); - right = (right + 1).next_power_of_two().min(max_right); - } - } - - /// Update this node after changes were made to one of its children. - fn update_parent( - &mut self, - prev_len: usize, - new_len: usize, - prev_descendants: usize, - new_descendants: usize, - ) { - self.len = self.len + new_len - prev_len; - self.descendants = self.descendants + new_descendants - prev_descendants; - self.erroneous = self.children.iter().any(SyntaxNode::erroneous); - } -} - -impl Debug for InnerNode { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{:?}: {}", self.kind, self.len)?; - if !self.children.is_empty() { - f.write_str(" ")?; - f.debug_list().entries(&self.children).finish()?; - } - Ok(()) - } -} - -/// An error node in the untyped syntax tree. -#[derive(Clone, Eq, PartialEq, Hash)] -struct ErrorNode { - /// The error message. - message: EcoString, - /// The source text of the node. - text: EcoString, - /// The node's span. - span: Span, -} - -impl ErrorNode { - /// Create new error node. - fn new(message: impl Into, text: impl Into) -> Self { - Self { - message: message.into(), - text: text.into(), - span: Span::detached(), - } - } - - /// The byte length of the node in the source text. - fn len(&self) -> usize { - self.text.len() - } -} - -impl Debug for ErrorNode { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "Error: {:?} ({})", self.text, self.message) - } -} - -/// A syntax node in a context. -/// -/// Knows its exact offset in the file and provides access to its -/// children, parent and siblings. -/// -/// **Note that all sibling and leaf accessors skip over trivia!** -#[derive(Clone)] -pub struct LinkedNode<'a> { - node: &'a SyntaxNode, - parent: Option>, - index: usize, - offset: usize, -} - -impl<'a> LinkedNode<'a> { - /// Start a new traversal at a root node. - pub fn new(root: &'a SyntaxNode) -> Self { - Self { node: root, parent: None, index: 0, offset: 0 } - } - - /// Get the contained syntax node. - pub fn get(&self) -> &'a SyntaxNode { - self.node - } - - /// The index of this node in its parent's children list. - pub fn index(&self) -> usize { - self.index - } - - /// The absolute byte offset of this node in the source file. - pub fn offset(&self) -> usize { - self.offset - } - - /// The byte range of this node in the source file. - pub fn range(&self) -> Range { - self.offset..self.offset + self.node.len() - } - - /// An iterator over this node's children. - pub fn children(&self) -> LinkedChildren<'a> { - LinkedChildren { - parent: Rc::new(self.clone()), - iter: self.node.children().enumerate(), - front: self.offset, - back: self.offset + self.len(), - } - } - - /// Find a descendant with the given span. - pub fn find(&self, span: Span) -> Option> { - if self.span() == span { - return Some(self.clone()); - } - - if let Repr::Inner(inner) = &self.0 { - // The parent of a subtree has a smaller span number than all of its - // descendants. Therefore, we can bail out early if the target span's - // number is smaller than our number. - if span.number() < inner.span.number() { - return None; - } - - let mut children = self.children().peekable(); - while let Some(child) = children.next() { - // Every node in this child's subtree has a smaller span number than - // the next sibling. Therefore we only need to recurse if the next - // sibling's span number is larger than the target span's number. - if children - .peek() - .map_or(true, |next| next.span().number() > span.number()) - { - if let Some(found) = child.find(span) { - return Some(found); - } - } - } - } - - None - } -} - -/// Access to parents and siblings. -impl<'a> LinkedNode<'a> { - /// Get this node's parent. - pub fn parent(&self) -> Option<&Self> { - self.parent.as_deref() - } - - /// Get the first previous non-trivia sibling node. - pub fn prev_sibling(&self) -> Option { - let parent = self.parent()?; - let index = self.index.checked_sub(1)?; - let node = parent.node.children().nth(index)?; - let offset = self.offset - node.len(); - let prev = Self { node, parent: self.parent.clone(), index, offset }; - if prev.kind().is_trivia() { - prev.prev_sibling() - } else { - Some(prev) - } - } - - /// Get the next non-trivia sibling node. - pub fn next_sibling(&self) -> Option { - let parent = self.parent()?; - let index = self.index.checked_add(1)?; - let node = parent.node.children().nth(index)?; - let offset = self.offset + self.node.len(); - let next = Self { node, parent: self.parent.clone(), index, offset }; - if next.kind().is_trivia() { - next.next_sibling() - } else { - Some(next) - } - } - - /// Get the kind of this node's parent. - pub fn parent_kind(&self) -> Option { - Some(self.parent()?.node.kind()) - } - - /// Get the kind of this node's first previous non-trivia sibling. - pub fn prev_sibling_kind(&self) -> Option { - Some(self.prev_sibling()?.node.kind()) - } - - /// Get the kind of this node's next non-trivia sibling. - pub fn next_sibling_kind(&self) -> Option { - Some(self.next_sibling()?.node.kind()) - } -} - -/// Access to leafs. -impl<'a> LinkedNode<'a> { - /// Get the rightmost non-trivia leaf before this node. - pub fn prev_leaf(&self) -> Option { - let mut node = self.clone(); - while let Some(prev) = node.prev_sibling() { - if let Some(leaf) = prev.rightmost_leaf() { - return Some(leaf); - } - node = prev; - } - self.parent()?.prev_leaf() - } - - /// Find the leftmost contained non-trivia leaf. - pub fn leftmost_leaf(&self) -> Option { - if self.is_leaf() && !self.kind().is_trivia() && !self.kind().is_error() { - return Some(self.clone()); - } - - for child in self.children() { - if let Some(leaf) = child.leftmost_leaf() { - return Some(leaf); - } - } - - None - } - - /// Get the leaf at the specified byte offset. - pub fn leaf_at(&self, cursor: usize) -> Option { - if self.node.children().len() == 0 && cursor <= self.offset + self.len() { - return Some(self.clone()); - } - - let mut offset = self.offset; - let count = self.node.children().len(); - for (i, child) in self.children().enumerate() { - let len = child.len(); - if (offset < cursor && cursor <= offset + len) - || (offset == cursor && i + 1 == count) - { - return child.leaf_at(cursor); - } - offset += len; - } - - None - } - - /// Find the rightmost contained non-trivia leaf. - pub fn rightmost_leaf(&self) -> Option { - if self.is_leaf() && !self.kind().is_trivia() { - return Some(self.clone()); - } - - for child in self.children().rev() { - if let Some(leaf) = child.rightmost_leaf() { - return Some(leaf); - } - } - - None - } - - /// Get the leftmost non-trivia leaf after this node. - pub fn next_leaf(&self) -> Option { - let mut node = self.clone(); - while let Some(next) = node.next_sibling() { - if let Some(leaf) = next.leftmost_leaf() { - return Some(leaf); - } - node = next; - } - self.parent()?.next_leaf() - } -} - -impl Deref for LinkedNode<'_> { - type Target = SyntaxNode; - - fn deref(&self) -> &Self::Target { - self.get() - } -} - -impl Debug for LinkedNode<'_> { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - self.node.fmt(f) - } -} - -/// An iterator over the children of a linked node. -pub struct LinkedChildren<'a> { - parent: Rc>, - iter: std::iter::Enumerate>, - front: usize, - back: usize, -} - -impl<'a> Iterator for LinkedChildren<'a> { - type Item = LinkedNode<'a>; - - fn next(&mut self) -> Option { - self.iter.next().map(|(index, node)| { - let offset = self.front; - self.front += node.len(); - LinkedNode { - node, - parent: Some(self.parent.clone()), - index, - offset, - } - }) - } - - fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() - } -} - -impl DoubleEndedIterator for LinkedChildren<'_> { - fn next_back(&mut self) -> Option { - self.iter.next_back().map(|(index, node)| { - self.back -= node.len(); - LinkedNode { - node, - parent: Some(self.parent.clone()), - index, - offset: self.back, - } - }) - } -} - -impl ExactSizeIterator for LinkedChildren<'_> {} - -/// Result of numbering a node within an interval. -pub(super) type NumberingResult = Result<(), Unnumberable>; - -/// Indicates that a node cannot be numbered within a given interval. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub(super) struct Unnumberable; - -impl Display for Unnumberable { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - f.pad("cannot number within this interval") - } -} - -impl std::error::Error for Unnumberable {} - -#[cfg(test)] -mod tests { - use super::*; - use crate::syntax::Source; - - #[test] - fn test_linked_node() { - let source = Source::detached("#set text(12pt, red)"); - - // Find "text". - let node = LinkedNode::new(source.root()).leaf_at(7).unwrap(); - assert_eq!(node.offset(), 5); - assert_eq!(node.text(), "text"); - - // Go back to "#set". Skips the space. - let prev = node.prev_sibling().unwrap(); - assert_eq!(prev.offset(), 1); - assert_eq!(prev.text(), "set"); - } - - #[test] - fn test_linked_node_non_trivia_leaf() { - let source = Source::detached("#set fun(12pt, red)"); - let leaf = LinkedNode::new(source.root()).leaf_at(6).unwrap(); - let prev = leaf.prev_leaf().unwrap(); - assert_eq!(leaf.text(), "fun"); - assert_eq!(prev.text(), "set"); - - let source = Source::detached("#let x = 10"); - let leaf = LinkedNode::new(source.root()).leaf_at(9).unwrap(); - let prev = leaf.prev_leaf().unwrap(); - let next = leaf.next_leaf().unwrap(); - assert_eq!(prev.text(), "="); - assert_eq!(leaf.text(), " "); - assert_eq!(next.text(), "10"); - } -} diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs deleted file mode 100644 index 54670df5..00000000 --- a/src/syntax/parser.rs +++ /dev/null @@ -1,1643 +0,0 @@ -use std::collections::HashSet; -use std::ops::Range; - -use ecow::{eco_format, EcoString}; -use unicode_math_class::MathClass; - -use super::{ast, is_newline, LexMode, Lexer, SyntaxKind, SyntaxNode}; - -/// Parse a source file. -pub fn parse(text: &str) -> SyntaxNode { - let mut p = Parser::new(text, 0, LexMode::Markup); - markup(&mut p, true, 0, |_| false); - p.finish().into_iter().next().unwrap() -} - -/// Parse code directly. -/// -/// This is only used for syntax highlighting. -pub fn parse_code(text: &str) -> SyntaxNode { - let mut p = Parser::new(text, 0, LexMode::Code); - let m = p.marker(); - p.skip(); - code_exprs(&mut p, |_| false); - p.wrap_skipless(m, SyntaxKind::Code); - p.finish().into_iter().next().unwrap() -} - -fn markup( - p: &mut Parser, - mut at_start: bool, - min_indent: usize, - mut stop: impl FnMut(&Parser) -> bool, -) { - let m = p.marker(); - let mut nesting: usize = 0; - while !p.eof() { - match p.current() { - SyntaxKind::LeftBracket => nesting += 1, - SyntaxKind::RightBracket if nesting > 0 => nesting -= 1, - _ if stop(p) => break, - _ => {} - } - - if p.newline() { - at_start = true; - if min_indent > 0 && p.column(p.current_end()) < min_indent { - break; - } - p.eat(); - continue; - } - - let prev = p.prev_end(); - markup_expr(p, &mut at_start); - if !p.progress(prev) { - p.unexpected(); - } - } - p.wrap(m, SyntaxKind::Markup); -} - -pub(super) fn reparse_markup( - text: &str, - range: Range, - at_start: &mut bool, - nesting: &mut usize, - mut stop: impl FnMut(SyntaxKind) -> bool, -) -> Option> { - let mut p = Parser::new(text, range.start, LexMode::Markup); - while !p.eof() && p.current_start() < range.end { - match p.current() { - SyntaxKind::LeftBracket => *nesting += 1, - SyntaxKind::RightBracket if *nesting > 0 => *nesting -= 1, - _ if stop(p.current()) => break, - _ => {} - } - - if p.newline() { - *at_start = true; - p.eat(); - continue; - } - - let prev = p.prev_end(); - markup_expr(&mut p, at_start); - if !p.progress(prev) { - p.unexpected(); - } - } - (p.balanced && p.current_start() == range.end).then(|| p.finish()) -} - -fn markup_expr(p: &mut Parser, at_start: &mut bool) { - match p.current() { - SyntaxKind::Space - | SyntaxKind::Parbreak - | SyntaxKind::LineComment - | SyntaxKind::BlockComment => { - p.eat(); - return; - } - - SyntaxKind::Text - | SyntaxKind::Linebreak - | SyntaxKind::Escape - | SyntaxKind::Shorthand - | SyntaxKind::SmartQuote - | SyntaxKind::Raw - | SyntaxKind::Link - | SyntaxKind::Label => p.eat(), - - SyntaxKind::Hashtag => embedded_code_expr(p), - SyntaxKind::Star => strong(p), - SyntaxKind::Underscore => emph(p), - SyntaxKind::HeadingMarker if *at_start => heading(p), - SyntaxKind::ListMarker if *at_start => list_item(p), - SyntaxKind::EnumMarker if *at_start => enum_item(p), - SyntaxKind::TermMarker if *at_start => term_item(p), - SyntaxKind::RefMarker => reference(p), - SyntaxKind::Dollar => equation(p), - - SyntaxKind::LeftBracket - | SyntaxKind::RightBracket - | SyntaxKind::HeadingMarker - | SyntaxKind::ListMarker - | SyntaxKind::EnumMarker - | SyntaxKind::TermMarker - | SyntaxKind::Colon => p.convert(SyntaxKind::Text), - - _ => {} - } - - *at_start = false; -} - -fn strong(p: &mut Parser) { - let m = p.marker(); - p.assert(SyntaxKind::Star); - markup(p, false, 0, |p| { - p.at(SyntaxKind::Star) - || p.at(SyntaxKind::Parbreak) - || p.at(SyntaxKind::RightBracket) - }); - p.expect_closing_delimiter(m, SyntaxKind::Star); - p.wrap(m, SyntaxKind::Strong); -} - -fn emph(p: &mut Parser) { - let m = p.marker(); - p.assert(SyntaxKind::Underscore); - markup(p, false, 0, |p| { - p.at(SyntaxKind::Underscore) - || p.at(SyntaxKind::Parbreak) - || p.at(SyntaxKind::RightBracket) - }); - p.expect_closing_delimiter(m, SyntaxKind::Underscore); - p.wrap(m, SyntaxKind::Emph); -} - -fn heading(p: &mut Parser) { - let m = p.marker(); - p.assert(SyntaxKind::HeadingMarker); - whitespace_line(p); - markup(p, false, usize::MAX, |p| { - p.at(SyntaxKind::Label) - || p.at(SyntaxKind::RightBracket) - || (p.at(SyntaxKind::Space) && p.lexer.clone().next() == SyntaxKind::Label) - }); - p.wrap(m, SyntaxKind::Heading); -} - -fn list_item(p: &mut Parser) { - let m = p.marker(); - p.assert(SyntaxKind::ListMarker); - let min_indent = p.column(p.prev_end()); - whitespace_line(p); - markup(p, false, min_indent, |p| p.at(SyntaxKind::RightBracket)); - p.wrap(m, SyntaxKind::ListItem); -} - -fn enum_item(p: &mut Parser) { - let m = p.marker(); - p.assert(SyntaxKind::EnumMarker); - let min_indent = p.column(p.prev_end()); - whitespace_line(p); - markup(p, false, min_indent, |p| p.at(SyntaxKind::RightBracket)); - p.wrap(m, SyntaxKind::EnumItem); -} - -fn term_item(p: &mut Parser) { - let m = p.marker(); - p.assert(SyntaxKind::TermMarker); - let min_indent = p.column(p.prev_end()); - whitespace_line(p); - markup(p, false, usize::MAX, |p| { - p.at(SyntaxKind::Colon) || p.at(SyntaxKind::RightBracket) - }); - p.expect(SyntaxKind::Colon); - whitespace_line(p); - markup(p, false, min_indent, |p| p.at(SyntaxKind::RightBracket)); - p.wrap(m, SyntaxKind::TermItem); -} - -fn reference(p: &mut Parser) { - let m = p.marker(); - p.assert(SyntaxKind::RefMarker); - if p.directly_at(SyntaxKind::LeftBracket) { - content_block(p); - } - p.wrap(m, SyntaxKind::Ref); -} - -fn whitespace_line(p: &mut Parser) { - while !p.newline() && p.current().is_trivia() { - p.eat(); - } -} - -fn equation(p: &mut Parser) { - let m = p.marker(); - p.enter(LexMode::Math); - p.assert(SyntaxKind::Dollar); - math(p, |p| p.at(SyntaxKind::Dollar)); - p.expect_closing_delimiter(m, SyntaxKind::Dollar); - p.exit(); - p.wrap(m, SyntaxKind::Equation); -} - -fn math(p: &mut Parser, mut stop: impl FnMut(&Parser) -> bool) { - let m = p.marker(); - while !p.eof() && !stop(p) { - let prev = p.prev_end(); - math_expr(p); - if !p.progress(prev) { - p.unexpected(); - } - } - p.wrap(m, SyntaxKind::Math); -} - -fn math_expr(p: &mut Parser) { - math_expr_prec(p, 0, SyntaxKind::Eof) -} - -fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) { - let m = p.marker(); - let mut continuable = false; - match p.current() { - SyntaxKind::Hashtag => embedded_code_expr(p), - SyntaxKind::MathIdent => { - continuable = true; - p.eat(); - while p.directly_at(SyntaxKind::Text) - && p.current_text() == "." - && matches!( - p.lexer.clone().next(), - SyntaxKind::MathIdent | SyntaxKind::Text - ) - { - p.convert(SyntaxKind::Dot); - p.convert(SyntaxKind::Ident); - p.wrap(m, SyntaxKind::FieldAccess); - } - if min_prec < 3 && p.directly_at(SyntaxKind::Text) && p.current_text() == "(" - { - math_args(p); - p.wrap(m, SyntaxKind::FuncCall); - continuable = false; - } - } - - SyntaxKind::Text | SyntaxKind::Shorthand => { - continuable = matches!( - math_class(p.current_text()), - None | Some(MathClass::Alphabetic) - ); - if !maybe_delimited(p, true) { - p.eat(); - } - } - - SyntaxKind::Linebreak | SyntaxKind::MathAlignPoint => p.eat(), - SyntaxKind::Escape | SyntaxKind::Str => { - continuable = true; - p.eat(); - } - - SyntaxKind::Root => { - if min_prec < 3 { - p.eat(); - let m2 = p.marker(); - math_expr_prec(p, 2, stop); - math_unparen(p, m2); - p.wrap(m, SyntaxKind::MathRoot); - } - } - - _ => p.expected("expression"), - } - - if continuable - && min_prec < 3 - && p.prev_end() == p.current_start() - && maybe_delimited(p, false) - { - p.wrap(m, SyntaxKind::Math); - } - - while !p.eof() && !p.at(stop) { - if p.directly_at(SyntaxKind::Text) && p.current_text() == "!" { - p.eat(); - p.wrap(m, SyntaxKind::Math); - continue; - } - - let Some((kind, stop, assoc, mut prec)) = math_op(p.current()) else { - break; - }; - - if prec < min_prec { - break; - } - - match assoc { - ast::Assoc::Left => prec += 1, - ast::Assoc::Right => {} - } - - if kind == SyntaxKind::MathFrac { - math_unparen(p, m); - } - - p.eat(); - let m2 = p.marker(); - math_expr_prec(p, prec, stop); - math_unparen(p, m2); - - if p.eat_if(SyntaxKind::Underscore) || p.eat_if(SyntaxKind::Hat) { - let m3 = p.marker(); - math_expr_prec(p, prec, SyntaxKind::Eof); - math_unparen(p, m3); - } - - p.wrap(m, kind); - } -} - -fn maybe_delimited(p: &mut Parser, allow_fence: bool) -> bool { - if allow_fence && math_class(p.current_text()) == Some(MathClass::Fence) { - math_delimited(p, MathClass::Fence); - true - } else if math_class(p.current_text()) == Some(MathClass::Opening) { - math_delimited(p, MathClass::Closing); - true - } else { - false - } -} - -fn math_delimited(p: &mut Parser, stop: MathClass) { - let m = p.marker(); - p.eat(); - let m2 = p.marker(); - while !p.eof() && !p.at(SyntaxKind::Dollar) { - let class = math_class(p.current_text()); - if stop == MathClass::Fence && class == Some(MathClass::Closing) { - break; - } - - if class == Some(stop) { - p.wrap(m2, SyntaxKind::Math); - p.eat(); - p.wrap(m, SyntaxKind::MathDelimited); - return; - } - - let prev = p.prev_end(); - math_expr(p); - if !p.progress(prev) { - p.unexpected(); - } - } - - p.wrap(m, SyntaxKind::Math); -} - -fn math_unparen(p: &mut Parser, m: Marker) { - let Some(node) = p.nodes.get_mut(m.0) else { return }; - if node.kind() != SyntaxKind::MathDelimited { - return; - } - - if let [first, .., last] = node.children_mut() { - if first.text() == "(" && last.text() == ")" { - first.convert_to_kind(SyntaxKind::LeftParen); - last.convert_to_kind(SyntaxKind::RightParen); - } - } - - node.convert_to_kind(SyntaxKind::Math); -} - -fn math_class(text: &str) -> Option { - match text { - "[|" => return Some(MathClass::Opening), - "|]" => return Some(MathClass::Closing), - "||" => return Some(MathClass::Fence), - _ => {} - } - - let mut chars = text.chars(); - chars - .next() - .filter(|_| chars.next().is_none()) - .and_then(unicode_math_class::class) -} - -fn math_op(kind: SyntaxKind) -> Option<(SyntaxKind, SyntaxKind, ast::Assoc, usize)> { - match kind { - SyntaxKind::Underscore => { - Some((SyntaxKind::MathAttach, SyntaxKind::Hat, ast::Assoc::Right, 2)) - } - SyntaxKind::Hat => { - Some((SyntaxKind::MathAttach, SyntaxKind::Underscore, ast::Assoc::Right, 2)) - } - SyntaxKind::Slash => { - Some((SyntaxKind::MathFrac, SyntaxKind::Eof, ast::Assoc::Left, 1)) - } - _ => None, - } -} - -fn math_args(p: &mut Parser) { - let m = p.marker(); - p.convert(SyntaxKind::LeftParen); - - let mut namable = true; - let mut named = None; - let mut has_arrays = false; - let mut array = p.marker(); - let mut arg = p.marker(); - - while !p.eof() && !p.at(SyntaxKind::Dollar) { - if namable - && (p.at(SyntaxKind::MathIdent) || p.at(SyntaxKind::Text)) - && p.text[p.current_end()..].starts_with(':') - { - p.convert(SyntaxKind::Ident); - p.convert(SyntaxKind::Colon); - named = Some(arg); - arg = p.marker(); - array = p.marker(); - } - - match p.current_text() { - ")" => break, - ";" => { - maybe_wrap_in_math(p, arg, named); - p.wrap(array, SyntaxKind::Array); - p.convert(SyntaxKind::Semicolon); - array = p.marker(); - arg = p.marker(); - namable = true; - named = None; - has_arrays = true; - continue; - } - "," => { - maybe_wrap_in_math(p, arg, named); - p.convert(SyntaxKind::Comma); - arg = p.marker(); - namable = true; - if named.is_some() { - array = p.marker(); - named = None; - } - continue; - } - _ => {} - } - - let prev = p.prev_end(); - math_expr(p); - if !p.progress(prev) { - p.unexpected(); - } - - namable = false; - } - - if arg != p.marker() { - maybe_wrap_in_math(p, arg, named); - if named.is_some() { - array = p.marker(); - } - } - - if has_arrays && array != p.marker() { - p.wrap(array, SyntaxKind::Array); - } - - if p.at(SyntaxKind::Text) && p.current_text() == ")" { - p.convert(SyntaxKind::RightParen); - } else { - p.expected("closing paren"); - p.balanced = false; - } - - p.wrap(m, SyntaxKind::Args); -} - -fn maybe_wrap_in_math(p: &mut Parser, arg: Marker, named: Option) { - let exprs = p.post_process(arg).filter(|node| node.is::()).count(); - if exprs != 1 { - p.wrap(arg, SyntaxKind::Math); - } - - if let Some(m) = named { - p.wrap(m, SyntaxKind::Named); - } -} - -fn code(p: &mut Parser, stop: impl FnMut(&Parser) -> bool) { - let m = p.marker(); - code_exprs(p, stop); - p.wrap(m, SyntaxKind::Code); -} - -fn code_exprs(p: &mut Parser, mut stop: impl FnMut(&Parser) -> bool) { - while !p.eof() && !stop(p) { - p.stop_at_newline(true); - let prev = p.prev_end(); - code_expr(p); - if p.progress(prev) && !p.eof() && !stop(p) && !p.eat_if(SyntaxKind::Semicolon) { - p.expected("semicolon or line break"); - } - p.unstop(); - if !p.progress(prev) && !p.eof() { - p.unexpected(); - } - } -} - -fn code_expr(p: &mut Parser) { - code_expr_prec(p, false, 0, false) -} - -fn code_expr_or_pattern(p: &mut Parser) { - code_expr_prec(p, false, 0, true) -} - -fn embedded_code_expr(p: &mut Parser) { - p.stop_at_newline(true); - p.enter(LexMode::Code); - p.assert(SyntaxKind::Hashtag); - p.unskip(); - - let stmt = matches!( - p.current(), - SyntaxKind::Let - | SyntaxKind::Set - | SyntaxKind::Show - | SyntaxKind::Import - | SyntaxKind::Include - ); - - let prev = p.prev_end(); - code_expr_prec(p, true, 0, false); - - // Consume error for things like `#12p` or `#"abc\"`. - if !p.progress(prev) { - p.unexpected(); - } - - let semi = - (stmt || p.directly_at(SyntaxKind::Semicolon)) && p.eat_if(SyntaxKind::Semicolon); - - if stmt && !semi && !p.eof() && !p.at(SyntaxKind::RightBracket) { - p.expected("semicolon or line break"); - } - - p.exit(); - p.unstop(); -} - -fn code_expr_prec( - p: &mut Parser, - atomic: bool, - min_prec: usize, - allow_destructuring: bool, -) { - let m = p.marker(); - if let (false, Some(op)) = (atomic, ast::UnOp::from_kind(p.current())) { - p.eat(); - code_expr_prec(p, atomic, op.precedence(), false); - p.wrap(m, SyntaxKind::Unary); - } else { - code_primary(p, atomic, allow_destructuring); - } - - loop { - if p.directly_at(SyntaxKind::LeftParen) || p.directly_at(SyntaxKind::LeftBracket) - { - args(p); - p.wrap(m, SyntaxKind::FuncCall); - continue; - } - - let at_field_or_method = - p.directly_at(SyntaxKind::Dot) && p.lexer.clone().next() == SyntaxKind::Ident; - - if atomic && !at_field_or_method { - break; - } - - if p.eat_if(SyntaxKind::Dot) { - p.expect(SyntaxKind::Ident); - p.wrap(m, SyntaxKind::FieldAccess); - continue; - } - - let binop = - if ast::BinOp::NotIn.precedence() >= min_prec && p.eat_if(SyntaxKind::Not) { - if p.at(SyntaxKind::In) { - Some(ast::BinOp::NotIn) - } else { - p.expected("keyword `in`"); - break; - } - } else { - ast::BinOp::from_kind(p.current()) - }; - - if let Some(op) = binop { - let mut prec = op.precedence(); - if prec < min_prec { - break; - } - - match op.assoc() { - ast::Assoc::Left => prec += 1, - ast::Assoc::Right => {} - } - - p.eat(); - code_expr_prec(p, false, prec, false); - p.wrap(m, SyntaxKind::Binary); - continue; - } - - break; - } -} - -fn code_primary(p: &mut Parser, atomic: bool, allow_destructuring: bool) { - let m = p.marker(); - match p.current() { - SyntaxKind::Ident => { - p.eat(); - if !atomic && p.at(SyntaxKind::Arrow) { - p.wrap(m, SyntaxKind::Params); - p.assert(SyntaxKind::Arrow); - code_expr(p); - p.wrap(m, SyntaxKind::Closure); - } - } - SyntaxKind::Underscore if !atomic => { - p.eat(); - if p.at(SyntaxKind::Arrow) { - p.wrap(m, SyntaxKind::Params); - p.eat(); - code_expr(p); - p.wrap(m, SyntaxKind::Closure); - } else if let Some(underscore) = p.node_mut(m) { - underscore.convert_to_error("expected expression, found underscore"); - } - } - - SyntaxKind::LeftBrace => code_block(p), - SyntaxKind::LeftBracket => content_block(p), - SyntaxKind::LeftParen => with_paren(p, allow_destructuring), - SyntaxKind::Dollar => equation(p), - SyntaxKind::Let => let_binding(p), - SyntaxKind::Set => set_rule(p), - SyntaxKind::Show => show_rule(p), - SyntaxKind::If => conditional(p), - SyntaxKind::While => while_loop(p), - SyntaxKind::For => for_loop(p), - SyntaxKind::Import => module_import(p), - SyntaxKind::Include => module_include(p), - SyntaxKind::Break => break_stmt(p), - SyntaxKind::Continue => continue_stmt(p), - SyntaxKind::Return => return_stmt(p), - - SyntaxKind::None - | SyntaxKind::Auto - | SyntaxKind::Int - | SyntaxKind::Float - | SyntaxKind::Bool - | SyntaxKind::Numeric - | SyntaxKind::Str - | SyntaxKind::Label - | SyntaxKind::Raw => p.eat(), - - _ => p.expected("expression"), - } -} - -fn block(p: &mut Parser) { - match p.current() { - SyntaxKind::LeftBracket => content_block(p), - SyntaxKind::LeftBrace => code_block(p), - _ => p.expected("block"), - } -} - -pub(super) fn reparse_block(text: &str, range: Range) -> Option { - let mut p = Parser::new(text, range.start, LexMode::Code); - assert!(p.at(SyntaxKind::LeftBracket) || p.at(SyntaxKind::LeftBrace)); - block(&mut p); - (p.balanced && p.prev_end() == range.end) - .then(|| p.finish().into_iter().next().unwrap()) -} - -fn code_block(p: &mut Parser) { - let m = p.marker(); - p.enter(LexMode::Code); - p.stop_at_newline(false); - p.assert(SyntaxKind::LeftBrace); - code(p, |p| { - p.at(SyntaxKind::RightBrace) - || p.at(SyntaxKind::RightBracket) - || p.at(SyntaxKind::RightParen) - }); - p.expect_closing_delimiter(m, SyntaxKind::RightBrace); - p.exit(); - p.unstop(); - p.wrap(m, SyntaxKind::CodeBlock); -} - -fn content_block(p: &mut Parser) { - let m = p.marker(); - p.enter(LexMode::Markup); - p.assert(SyntaxKind::LeftBracket); - markup(p, true, 0, |p| p.at(SyntaxKind::RightBracket)); - p.expect_closing_delimiter(m, SyntaxKind::RightBracket); - p.exit(); - p.wrap(m, SyntaxKind::ContentBlock); -} - -fn with_paren(p: &mut Parser, allow_destructuring: bool) { - let m = p.marker(); - let mut kind = collection(p, true); - if p.at(SyntaxKind::Arrow) { - validate_params_at(p, m); - p.wrap(m, SyntaxKind::Params); - p.assert(SyntaxKind::Arrow); - code_expr(p); - kind = SyntaxKind::Closure; - } else if p.at(SyntaxKind::Eq) && kind != SyntaxKind::Parenthesized { - // TODO: add warning if p.at(SyntaxKind::Eq) && kind == SyntaxKind::Parenthesized - - validate_pattern_at(p, m, false); - p.wrap(m, SyntaxKind::Destructuring); - p.assert(SyntaxKind::Eq); - code_expr(p); - kind = SyntaxKind::DestructAssignment; - } - - match kind { - SyntaxKind::Array if !allow_destructuring => validate_array_at(p, m), - SyntaxKind::Dict if !allow_destructuring => validate_dict_at(p, m), - SyntaxKind::Parenthesized if !allow_destructuring => { - validate_parenthesized_at(p, m) - } - SyntaxKind::Destructuring if !allow_destructuring => { - invalidate_destructuring(p, m) - } - _ => {} - } - p.wrap(m, kind); -} - -fn invalidate_destructuring(p: &mut Parser, m: Marker) { - let mut collection_kind = Option::None; - for child in p.post_process(m) { - match child.kind() { - SyntaxKind::Named | SyntaxKind::Keyed => match collection_kind { - Some(SyntaxKind::Array) => child.convert_to_error(eco_format!( - "expected expression, found {}", - child.kind().name() - )), - _ => collection_kind = Some(SyntaxKind::Dict), - }, - SyntaxKind::LeftParen | SyntaxKind::RightParen | SyntaxKind::Comma => {} - kind => match collection_kind { - Some(SyntaxKind::Dict) => child.convert_to_error(eco_format!( - "expected named or keyed pair, found {}", - kind.name() - )), - _ => collection_kind = Some(SyntaxKind::Array), - }, - } - } -} - -fn collection(p: &mut Parser, keyed: bool) -> SyntaxKind { - p.stop_at_newline(false); - - let m = p.marker(); - p.assert(SyntaxKind::LeftParen); - - let mut count = 0; - let mut parenthesized = true; - let mut kind = None; - if keyed && p.eat_if(SyntaxKind::Colon) { - kind = Some(SyntaxKind::Dict); - parenthesized = false; - } - - while !p.current().is_terminator() { - let prev = p.prev_end(); - match item(p, keyed) { - SyntaxKind::Spread => parenthesized = false, - SyntaxKind::Named | SyntaxKind::Keyed => { - match kind { - Some(SyntaxKind::Array) => kind = Some(SyntaxKind::Destructuring), - _ => kind = Some(SyntaxKind::Dict), - } - parenthesized = false; - } - SyntaxKind::Int => match kind { - Some(SyntaxKind::Array) | None => kind = Some(SyntaxKind::Array), - Some(_) => kind = Some(SyntaxKind::Destructuring), - }, - _ if kind.is_none() => kind = Some(SyntaxKind::Array), - _ => {} - } - - if !p.progress(prev) { - p.unexpected(); - continue; - } - - count += 1; - - if p.current().is_terminator() { - break; - } - - if p.expect(SyntaxKind::Comma) { - parenthesized = false; - } - } - - p.expect_closing_delimiter(m, SyntaxKind::RightParen); - p.unstop(); - - if parenthesized && count == 1 { - SyntaxKind::Parenthesized - } else { - kind.unwrap_or(SyntaxKind::Array) - } -} - -fn item(p: &mut Parser, keyed: bool) -> SyntaxKind { - let m = p.marker(); - - if p.eat_if(SyntaxKind::Dots) { - if p.at(SyntaxKind::Comma) || p.at(SyntaxKind::RightParen) { - p.wrap(m, SyntaxKind::Spread); - return SyntaxKind::Spread; - } - - code_expr(p); - p.wrap(m, SyntaxKind::Spread); - return SyntaxKind::Spread; - } - - if p.at(SyntaxKind::Underscore) { - // This is a temporary workaround to fix `v.map(_ => {})`. - let mut lexer = p.lexer.clone(); - let next = - std::iter::from_fn(|| Some(lexer.next())).find(|kind| !kind.is_trivia()); - if next != Some(SyntaxKind::Arrow) { - p.eat(); - return SyntaxKind::Underscore; - } - } - - code_expr_or_pattern(p); - - if !p.eat_if(SyntaxKind::Colon) { - return SyntaxKind::Int; - } - - if !p.eat_if(SyntaxKind::Underscore) { - code_expr(p); - } - - let kind = match p.node(m).map(SyntaxNode::kind) { - Some(SyntaxKind::Ident) => SyntaxKind::Named, - Some(SyntaxKind::Str) if keyed => SyntaxKind::Keyed, - _ => { - for child in p.post_process(m) { - if child.kind() == SyntaxKind::Colon { - break; - } - - let mut message = EcoString::from("expected identifier"); - if keyed { - message.push_str(" or string"); - } - message.push_str(", found "); - message.push_str(child.kind().name()); - child.convert_to_error(message); - } - SyntaxKind::Named - } - }; - - p.wrap(m, kind); - kind -} - -fn args(p: &mut Parser) { - if !p.at(SyntaxKind::LeftParen) && !p.at(SyntaxKind::LeftBracket) { - p.expected("argument list"); - } - - let m = p.marker(); - if p.at(SyntaxKind::LeftParen) { - collection(p, false); - validate_args_at(p, m); - } - - while p.directly_at(SyntaxKind::LeftBracket) { - content_block(p); - } - - p.wrap(m, SyntaxKind::Args); -} - -enum PatternKind { - Ident, - Placeholder, - Destructuring, -} - -fn pattern(p: &mut Parser) -> PatternKind { - let m = p.marker(); - if p.at(SyntaxKind::LeftParen) { - let kind = collection(p, false); - validate_pattern_at(p, m, true); - - if kind == SyntaxKind::Parenthesized { - PatternKind::Ident - } else { - p.wrap(m, SyntaxKind::Destructuring); - PatternKind::Destructuring - } - } else if p.eat_if(SyntaxKind::Underscore) { - PatternKind::Placeholder - } else { - p.expect(SyntaxKind::Ident); - PatternKind::Ident - } -} - -fn let_binding(p: &mut Parser) { - let m = p.marker(); - p.assert(SyntaxKind::Let); - - let m2 = p.marker(); - let mut closure = false; - let mut destructuring = false; - match pattern(p) { - PatternKind::Ident => { - closure = p.directly_at(SyntaxKind::LeftParen); - if closure { - let m3 = p.marker(); - collection(p, false); - validate_params_at(p, m3); - p.wrap(m3, SyntaxKind::Params); - } - } - PatternKind::Placeholder => {} - PatternKind::Destructuring => destructuring = true, - } - - let f = if closure || destructuring { Parser::expect } else { Parser::eat_if }; - if f(p, SyntaxKind::Eq) { - code_expr(p); - } - - if closure { - p.wrap(m2, SyntaxKind::Closure); - } - - p.wrap(m, SyntaxKind::LetBinding); -} - -fn set_rule(p: &mut Parser) { - let m = p.marker(); - p.assert(SyntaxKind::Set); - - let m2 = p.marker(); - p.expect(SyntaxKind::Ident); - while p.eat_if(SyntaxKind::Dot) { - p.expect(SyntaxKind::Ident); - p.wrap(m2, SyntaxKind::FieldAccess); - } - - args(p); - if p.eat_if(SyntaxKind::If) { - code_expr(p); - } - p.wrap(m, SyntaxKind::SetRule); -} - -fn show_rule(p: &mut Parser) { - let m = p.marker(); - p.assert(SyntaxKind::Show); - p.unskip(); - let m2 = p.marker(); - p.skip(); - - if !p.at(SyntaxKind::Colon) { - code_expr(p); - } - - if p.eat_if(SyntaxKind::Colon) { - code_expr(p); - } else { - p.expected_at(m2, "colon"); - } - - p.wrap(m, SyntaxKind::ShowRule); -} - -fn conditional(p: &mut Parser) { - let m = p.marker(); - p.assert(SyntaxKind::If); - code_expr(p); - block(p); - if p.eat_if(SyntaxKind::Else) { - if p.at(SyntaxKind::If) { - conditional(p); - } else { - block(p); - } - } - p.wrap(m, SyntaxKind::Conditional); -} - -fn while_loop(p: &mut Parser) { - let m = p.marker(); - p.assert(SyntaxKind::While); - code_expr(p); - block(p); - p.wrap(m, SyntaxKind::WhileLoop); -} - -fn for_loop(p: &mut Parser) { - let m = p.marker(); - p.assert(SyntaxKind::For); - pattern(p); - if p.at(SyntaxKind::Comma) { - p.expected("keyword `in` - did you mean to use a destructuring pattern?"); - if !p.eat_if(SyntaxKind::Ident) { - p.eat_if(SyntaxKind::Underscore); - } - p.eat_if(SyntaxKind::In); - } else { - p.expect(SyntaxKind::In); - } - code_expr(p); - block(p); - p.wrap(m, SyntaxKind::ForLoop); -} - -fn module_import(p: &mut Parser) { - let m = p.marker(); - p.assert(SyntaxKind::Import); - code_expr(p); - if p.eat_if(SyntaxKind::Colon) && !p.eat_if(SyntaxKind::Star) { - import_items(p); - } - p.wrap(m, SyntaxKind::ModuleImport); -} - -fn import_items(p: &mut Parser) { - let m = p.marker(); - while !p.eof() && !p.at(SyntaxKind::Semicolon) { - if !p.eat_if(SyntaxKind::Ident) { - p.unexpected(); - } - if p.current().is_terminator() { - break; - } - p.expect(SyntaxKind::Comma); - } - p.wrap(m, SyntaxKind::ImportItems); -} - -fn module_include(p: &mut Parser) { - let m = p.marker(); - p.assert(SyntaxKind::Include); - code_expr(p); - p.wrap(m, SyntaxKind::ModuleInclude); -} - -fn break_stmt(p: &mut Parser) { - let m = p.marker(); - p.assert(SyntaxKind::Break); - p.wrap(m, SyntaxKind::LoopBreak); -} - -fn continue_stmt(p: &mut Parser) { - let m = p.marker(); - p.assert(SyntaxKind::Continue); - p.wrap(m, SyntaxKind::LoopContinue); -} - -fn return_stmt(p: &mut Parser) { - let m = p.marker(); - p.assert(SyntaxKind::Return); - if !p.current().is_terminator() && !p.at(SyntaxKind::Comma) { - code_expr(p); - } - p.wrap(m, SyntaxKind::FuncReturn); -} - -fn validate_parenthesized_at(p: &mut Parser, m: Marker) { - for child in p.post_process(m) { - let kind = child.kind(); - match kind { - SyntaxKind::Array => validate_array(child.children_mut().iter_mut()), - SyntaxKind::Dict => validate_dict(child.children_mut().iter_mut()), - SyntaxKind::Underscore => { - child.convert_to_error(eco_format!( - "expected expression, found {}", - kind.name() - )); - } - _ => {} - } - } -} - -fn validate_array_at(p: &mut Parser, m: Marker) { - validate_array(p.post_process(m)) -} - -fn validate_array<'a>(children: impl Iterator) { - for child in children { - let kind = child.kind(); - match kind { - SyntaxKind::Array => validate_array(child.children_mut().iter_mut()), - SyntaxKind::Dict => validate_dict(child.children_mut().iter_mut()), - SyntaxKind::Named | SyntaxKind::Keyed | SyntaxKind::Underscore => { - child.convert_to_error(eco_format!( - "expected expression, found {}", - kind.name() - )); - } - _ => {} - } - } -} - -fn validate_dict_at(p: &mut Parser, m: Marker) { - validate_dict(p.post_process(m)) -} - -fn validate_dict<'a>(children: impl Iterator) { - let mut used = HashSet::new(); - for child in children { - match child.kind() { - SyntaxKind::Named | SyntaxKind::Keyed => { - let Some(first) = child.children_mut().first_mut() else { continue }; - let key = match first.cast::() { - Some(str) => str.get(), - None => first.text().clone(), - }; - - if !used.insert(key.clone()) { - first.convert_to_error(eco_format!("duplicate key: {key}")); - child.make_erroneous(); - } - } - SyntaxKind::Spread => {} - SyntaxKind::LeftParen - | SyntaxKind::RightParen - | SyntaxKind::Comma - | SyntaxKind::Colon => {} - kind => { - child.convert_to_error(eco_format!( - "expected named or keyed pair, found {}", - kind.name() - )); - } - } - } -} - -fn validate_params_at(p: &mut Parser, m: Marker) { - let mut used_spread = false; - let mut used = HashSet::new(); - for child in p.post_process(m) { - match child.kind() { - SyntaxKind::Ident => { - if !used.insert(child.text().clone()) { - child.convert_to_error(eco_format!( - "duplicate parameter: {}", - child.text() - )); - } - } - SyntaxKind::Named => { - let Some(within) = child.children_mut().first_mut() else { return }; - if !used.insert(within.text().clone()) { - within.convert_to_error(eco_format!( - "duplicate parameter: {}", - within.text() - )); - child.make_erroneous(); - } - } - SyntaxKind::Spread => { - let Some(within) = child.children_mut().last_mut() else { continue }; - if used_spread { - child.convert_to_error("only one argument sink is allowed"); - continue; - } - used_spread = true; - if within.kind() == SyntaxKind::Dots { - continue; - } else if within.kind() != SyntaxKind::Ident { - within.convert_to_error(eco_format!( - "expected identifier, found {}", - within.kind().name(), - )); - child.make_erroneous(); - continue; - } - if !used.insert(within.text().clone()) { - within.convert_to_error(eco_format!( - "duplicate parameter: {}", - within.text() - )); - child.make_erroneous(); - } - } - SyntaxKind::Array | SyntaxKind::Dict | SyntaxKind::Destructuring => { - validate_pattern(child.children_mut().iter_mut(), &mut used, false); - child.convert_to_kind(SyntaxKind::Destructuring); - } - SyntaxKind::LeftParen - | SyntaxKind::RightParen - | SyntaxKind::Comma - | SyntaxKind::Underscore => {} - kind => { - child.convert_to_error(eco_format!( - "expected identifier, named pair or argument sink, found {}", - kind.name() - )); - } - } - } -} - -fn validate_args_at(p: &mut Parser, m: Marker) { - let mut used = HashSet::new(); - for child in p.post_process(m) { - if child.kind() == SyntaxKind::Named { - let Some(within) = child.children_mut().first_mut() else { return }; - if !used.insert(within.text().clone()) { - within.convert_to_error(eco_format!( - "duplicate argument: {}", - within.text() - )); - child.make_erroneous(); - } - } else if child.kind() == SyntaxKind::Underscore { - child.convert_to_error("unexpected underscore"); - } - } -} - -fn validate_pattern_at(p: &mut Parser, m: Marker, forbid_expressions: bool) { - let mut used = HashSet::new(); - validate_pattern(p.post_process(m), &mut used, forbid_expressions); -} - -fn validate_pattern<'a>( - children: impl Iterator, - used: &mut HashSet, - forbid_expressions: bool, -) { - let mut used_spread = false; - for child in children { - match child.kind() { - SyntaxKind::Ident => { - if !used.insert(child.text().clone()) { - child.convert_to_error( - "at most one binding per identifier is allowed", - ); - } - } - SyntaxKind::Spread => { - let Some(within) = child.children_mut().last_mut() else { continue }; - if used_spread { - child.convert_to_error("at most one destructuring sink is allowed"); - continue; - } - used_spread = true; - - if within.kind() == SyntaxKind::Dots { - continue; - } else if forbid_expressions && within.kind() != SyntaxKind::Ident { - within.convert_to_error(eco_format!( - "expected identifier, found {}", - within.kind().name(), - )); - child.make_erroneous(); - continue; - } - - if !used.insert(within.text().clone()) { - within.convert_to_error( - "at most one binding per identifier is allowed", - ); - child.make_erroneous(); - } - } - SyntaxKind::Named => { - let Some(within) = child.children_mut().first_mut() else { return }; - if !used.insert(within.text().clone()) { - within.convert_to_error( - "at most one binding per identifier is allowed", - ); - child.make_erroneous(); - } - - if forbid_expressions { - let Some(within) = child.children_mut().last_mut() else { return }; - if within.kind() != SyntaxKind::Ident - && within.kind() != SyntaxKind::Underscore - { - within.convert_to_error(eco_format!( - "expected identifier, found {}", - within.kind().name(), - )); - child.make_erroneous(); - } - } - } - SyntaxKind::LeftParen - | SyntaxKind::RightParen - | SyntaxKind::Comma - | SyntaxKind::Underscore => {} - kind => { - if forbid_expressions { - child.convert_to_error(eco_format!( - "expected identifier or destructuring sink, found {}", - kind.name() - )); - } - } - } - } -} - -/// Manages parsing of a stream of tokens. -struct Parser<'s> { - text: &'s str, - lexer: Lexer<'s>, - prev_end: usize, - current_start: usize, - current: SyntaxKind, - modes: Vec, - nodes: Vec, - stop_at_newline: Vec, - balanced: bool, -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -struct Marker(usize); - -impl<'s> Parser<'s> { - fn new(text: &'s str, offset: usize, mode: LexMode) -> Self { - let mut lexer = Lexer::new(text, mode); - lexer.jump(offset); - let current = lexer.next(); - Self { - lexer, - text, - prev_end: offset, - current_start: offset, - current, - modes: vec![], - nodes: vec![], - stop_at_newline: vec![], - balanced: true, - } - } - - fn finish(self) -> Vec { - self.nodes - } - - fn prev_end(&self) -> usize { - self.prev_end - } - - fn current(&self) -> SyntaxKind { - self.current - } - - fn current_start(&self) -> usize { - self.current_start - } - - fn current_end(&self) -> usize { - self.lexer.cursor() - } - - fn current_text(&self) -> &'s str { - &self.text[self.current_start..self.current_end()] - } - - fn at(&self, kind: SyntaxKind) -> bool { - self.current == kind - } - - #[track_caller] - fn assert(&mut self, kind: SyntaxKind) { - assert_eq!(self.current, kind); - self.eat(); - } - - fn eof(&self) -> bool { - self.at(SyntaxKind::Eof) - } - - fn directly_at(&self, kind: SyntaxKind) -> bool { - self.current == kind && self.prev_end == self.current_start - } - - fn eat_if(&mut self, kind: SyntaxKind) -> bool { - let at = self.at(kind); - if at { - self.eat(); - } - at - } - - fn convert(&mut self, kind: SyntaxKind) { - self.current = kind; - self.eat(); - } - - fn newline(&mut self) -> bool { - self.lexer.newline() - } - - fn column(&self, at: usize) -> usize { - self.text[..at].chars().rev().take_while(|&c| !is_newline(c)).count() - } - - fn marker(&self) -> Marker { - Marker(self.nodes.len()) - } - - fn node(&self, m: Marker) -> Option<&SyntaxNode> { - self.nodes.get(m.0) - } - - fn node_mut(&mut self, m: Marker) -> Option<&mut SyntaxNode> { - self.nodes.get_mut(m.0) - } - - fn post_process(&mut self, m: Marker) -> impl Iterator { - self.nodes[m.0..] - .iter_mut() - .filter(|child| !child.kind().is_error() && !child.kind().is_trivia()) - } - - fn wrap(&mut self, m: Marker, kind: SyntaxKind) { - self.unskip(); - self.wrap_skipless(m, kind); - self.skip(); - } - - fn wrap_skipless(&mut self, m: Marker, kind: SyntaxKind) { - let from = m.0.min(self.nodes.len()); - let children = self.nodes.drain(from..).collect(); - self.nodes.push(SyntaxNode::inner(kind, children)); - } - - fn progress(&self, offset: usize) -> bool { - offset < self.prev_end - } - - fn enter(&mut self, mode: LexMode) { - self.modes.push(self.lexer.mode()); - self.lexer.set_mode(mode); - } - - fn exit(&mut self) { - let mode = self.modes.pop().unwrap(); - if mode != self.lexer.mode() { - self.unskip(); - self.lexer.set_mode(mode); - self.lexer.jump(self.current_start); - self.lex(); - self.skip(); - } - } - - fn stop_at_newline(&mut self, stop: bool) { - self.stop_at_newline.push(stop); - } - - fn unstop(&mut self) { - self.unskip(); - self.stop_at_newline.pop(); - self.lexer.jump(self.prev_end); - self.lex(); - self.skip(); - } - - fn eat(&mut self) { - self.save(); - self.lex(); - self.skip(); - } - - fn skip(&mut self) { - if self.lexer.mode() != LexMode::Markup { - while self.current.is_trivia() { - self.save(); - self.lex(); - } - } - } - - fn unskip(&mut self) { - if self.lexer.mode() != LexMode::Markup && self.prev_end != self.current_start { - while self.nodes.last().map_or(false, |last| last.kind().is_trivia()) { - self.nodes.pop(); - } - - self.lexer.jump(self.prev_end); - self.lex(); - } - } - - fn save(&mut self) { - let text = self.current_text(); - if self.at(SyntaxKind::Error) { - let message = self.lexer.take_error().unwrap(); - self.nodes.push(SyntaxNode::error(message, text)); - } else { - self.nodes.push(SyntaxNode::leaf(self.current, text)); - } - - if self.lexer.mode() == LexMode::Markup || !self.current.is_trivia() { - self.prev_end = self.current_end(); - } - } - - fn lex(&mut self) { - self.current_start = self.lexer.cursor(); - self.current = self.lexer.next(); - if self.lexer.mode() == LexMode::Code - && self.lexer.newline() - && self.stop_at_newline.last().copied().unwrap_or(false) - && !matches!(self.lexer.clone().next(), SyntaxKind::Else | SyntaxKind::Dot) - { - self.current = SyntaxKind::Eof; - } - } - - fn expect(&mut self, kind: SyntaxKind) -> bool { - let at = self.at(kind); - if at { - self.eat(); - } else { - self.balanced &= !kind.is_grouping(); - self.expected(kind.name()); - } - at - } - - fn expect_closing_delimiter(&mut self, open: Marker, kind: SyntaxKind) { - if !self.eat_if(kind) { - self.nodes[open.0].convert_to_error("unclosed delimiter"); - } - } - - fn expected(&mut self, thing: &str) { - self.unskip(); - if self - .nodes - .last() - .map_or(true, |child| child.kind() != SyntaxKind::Error) - { - let message = eco_format!("expected {}", thing); - self.nodes.push(SyntaxNode::error(message, "")); - } - self.skip(); - } - - fn expected_at(&mut self, m: Marker, thing: &str) { - let message = eco_format!("expected {}", thing); - let error = SyntaxNode::error(message, ""); - self.nodes.insert(m.0, error); - } - - fn unexpected(&mut self) { - self.unskip(); - while self - .nodes - .last() - .map_or(false, |child| child.kind() == SyntaxKind::Error && child.is_empty()) - { - self.nodes.pop(); - } - self.skip(); - - let kind = self.current; - let offset = self.nodes.len(); - self.eat(); - self.balanced &= !kind.is_grouping(); - - if !kind.is_error() { - self.nodes[offset] - .convert_to_error(eco_format!("unexpected {}", kind.name())); - } - } -} diff --git a/src/syntax/reparser.rs b/src/syntax/reparser.rs deleted file mode 100644 index a4186fa7..00000000 --- a/src/syntax/reparser.rs +++ /dev/null @@ -1,322 +0,0 @@ -use std::ops::Range; - -use super::{ - is_newline, parse, reparse_block, reparse_markup, Span, SyntaxKind, SyntaxNode, -}; - -/// Refresh the given syntax node with as little parsing as possible. -/// -/// Takes the new text, the range in the old text that was replaced and the -/// length of the replacement and returns the range in the new text that was -/// ultimately reparsed. -/// -/// The high-level API for this function is -/// [`Source::edit`](super::Source::edit). -pub fn reparse( - root: &mut SyntaxNode, - text: &str, - replaced: Range, - replacement_len: usize, -) -> Range { - try_reparse(text, replaced, replacement_len, None, root, 0).unwrap_or_else(|| { - let id = root.span().id(); - *root = parse(text); - root.numberize(id, Span::FULL).unwrap(); - 0..text.len() - }) -} - -/// Try to reparse inside the given node. -fn try_reparse( - text: &str, - replaced: Range, - replacement_len: usize, - parent_kind: Option, - node: &mut SyntaxNode, - offset: usize, -) -> Option> { - // The range of children which overlap with the edit. - #[allow(clippy::reversed_empty_ranges)] - let mut overlap = usize::MAX..0; - let mut cursor = offset; - let node_kind = node.kind(); - - for (i, child) in node.children_mut().iter_mut().enumerate() { - let prev_range = cursor..cursor + child.len(); - let prev_len = child.len(); - let prev_desc = child.descendants(); - - // Does the child surround the edit? - // If so, try to reparse within it or itself. - if !child.is_leaf() && includes(&prev_range, &replaced) { - let new_len = prev_len + replacement_len - replaced.len(); - let new_range = cursor..cursor + new_len; - - // Try to reparse within the child. - if let Some(range) = try_reparse( - text, - replaced.clone(), - replacement_len, - Some(node_kind), - child, - cursor, - ) { - assert_eq!(child.len(), new_len); - let new_desc = child.descendants(); - node.update_parent(prev_len, new_len, prev_desc, new_desc); - return Some(range); - } - - // If the child is a block, try to reparse the block. - if child.kind().is_block() { - if let Some(newborn) = reparse_block(text, new_range.clone()) { - return node - .replace_children(i..i + 1, vec![newborn]) - .is_ok() - .then_some(new_range); - } - } - } - - // Does the child overlap with the edit? - if overlaps(&prev_range, &replaced) { - overlap.start = overlap.start.min(i); - overlap.end = i + 1; - } - - // Is the child beyond the edit? - if replaced.end < cursor { - break; - } - - cursor += child.len(); - } - - // Try to reparse a range of markup expressions within markup. This is only - // possible if the markup is top-level or contained in a block, not if it is - // contained in things like headings or lists because too much can go wrong - // with indent and line breaks. - if overlap.is_empty() - || node.kind() != SyntaxKind::Markup - || !matches!(parent_kind, None | Some(SyntaxKind::ContentBlock)) - { - return None; - } - - let children = node.children_mut(); - - // Reparse a segment. Retries until it works, taking exponentially more - // children into account. - let mut expansion = 1; - loop { - // Add slack in both directions. - let mut start = overlap.start.saturating_sub(expansion.max(2)); - let mut end = (overlap.end + expansion).min(children.len()); - - // Expand to the left. - while start > 0 && expand(&children[start]) { - start -= 1; - } - - // Expand to the right. - while end < children.len() && expand(&children[end]) { - end += 1; - } - - // Also take hashtag. - if start > 0 && children[start - 1].kind() == SyntaxKind::Hashtag { - start -= 1; - } - - // Synthesize what `at_start` and `nesting` would be at the start of the - // reparse. - let mut prefix_len = 0; - let mut nesting = 0; - let mut at_start = true; - for child in &children[..start] { - prefix_len += child.len(); - next_at_start(child, &mut at_start); - next_nesting(child, &mut nesting); - } - - // Determine what `at_start` will have to be at the end of the reparse. - let mut prev_len = 0; - let mut prev_at_start_after = at_start; - let mut prev_nesting_after = nesting; - for child in &children[start..end] { - prev_len += child.len(); - next_at_start(child, &mut prev_at_start_after); - next_nesting(child, &mut prev_nesting_after); - } - - // Determine the range in the new text that we want to reparse. - let shifted = offset + prefix_len; - let new_len = prev_len + replacement_len - replaced.len(); - let new_range = shifted..shifted + new_len; - let at_end = end == children.len(); - - // Stop parsing early if this kind is encountered. - let stop_kind = match parent_kind { - Some(_) => SyntaxKind::RightBracket, - None => SyntaxKind::Eof, - }; - - // Reparse! - let reparsed = reparse_markup( - text, - new_range.clone(), - &mut at_start, - &mut nesting, - |kind| kind == stop_kind, - ); - - if let Some(newborns) = reparsed { - // If more children follow, at_start must match its previous value. - // Similarly, if we children follow or we not top-level the nesting - // must match its previous value. - if (at_end || at_start == prev_at_start_after) - && ((at_end && parent_kind.is_none()) || nesting == prev_nesting_after) - { - return node - .replace_children(start..end, newborns) - .is_ok() - .then_some(new_range); - } - } - - // If it didn't even work with all children, we give up. - if start == 0 && at_end { - break; - } - - // Exponential expansion to both sides. - expansion *= 2; - } - - None -} - -/// Whether the inner range is fully contained in the outer one (no touching). -fn includes(outer: &Range, inner: &Range) -> bool { - outer.start < inner.start && outer.end > inner.end -} - -/// Whether the first and second range overlap or touch. -fn overlaps(first: &Range, second: &Range) -> bool { - (first.start <= second.start && second.start <= first.end) - || (second.start <= first.start && first.start <= second.end) -} - -/// Whether the selection should be expanded beyond a node of this kind. -fn expand(node: &SyntaxNode) -> bool { - let kind = node.kind(); - kind.is_trivia() - || kind.is_error() - || kind == SyntaxKind::Semicolon - || node.text() == "/" - || node.text() == ":" -} - -/// Whether `at_start` would still be true after this node given the -/// previous value of the property. -fn next_at_start(node: &SyntaxNode, at_start: &mut bool) { - let kind = node.kind(); - if kind.is_trivia() { - *at_start |= kind == SyntaxKind::Parbreak - || (kind == SyntaxKind::Space && node.text().chars().any(is_newline)); - } else { - *at_start = false; - } -} - -/// Update `nesting` based on the node. -fn next_nesting(node: &SyntaxNode, nesting: &mut usize) { - if node.kind() == SyntaxKind::Text { - match node.text().as_str() { - "[" => *nesting += 1, - "]" if *nesting > 0 => *nesting -= 1, - _ => {} - } - } -} - -#[cfg(test)] -mod tests { - use std::ops::Range; - - use super::super::{parse, Source, Span}; - - #[track_caller] - fn test(prev: &str, range: Range, with: &str, incremental: bool) { - let mut source = Source::detached(prev); - let prev = source.root().clone(); - let range = source.edit(range, with); - let mut found = source.root().clone(); - let mut expected = parse(source.text()); - found.synthesize(Span::detached()); - expected.synthesize(Span::detached()); - if found != expected { - eprintln!("source: {:?}", source.text()); - eprintln!("previous: {prev:#?}"); - eprintln!("expected: {expected:#?}"); - eprintln!("found: {found:#?}"); - panic!("test failed"); - } - if incremental { - assert_ne!(source.len_bytes(), range.len(), "should have been incremental"); - } else { - assert_eq!( - source.len_bytes(), - range.len(), - "shouldn't have been incremental" - ); - } - } - - #[test] - fn test_reparse_markup() { - test("abc~def~gh~", 5..6, "+", true); - test("~~~~~~~", 3..4, "A", true); - test("abc~~", 1..2, "", true); - test("#var. hello", 5..6, " ", false); - test("#var;hello", 9..10, "a", false); - test("https:/world", 7..7, "/", false); - test("hello world", 7..12, "walkers", false); - test("some content", 0..12, "", false); - test("", 0..0, "do it", false); - test("a d e", 1..3, " b c d", false); - test("~*~*~", 2..2, "*", false); - test("::1\n2. a\n3", 7..7, "4", true); - test("* #{1+2} *", 6..7, "3", true); - test("#{(0, 1, 2)}", 6..7, "11pt", true); - test("\n= A heading", 4..4, "n evocative", false); - test("#call() abc~d", 7..7, "[]", true); - test("a your thing a", 6..7, "a", false); - test("#grid(columns: (auto, 1fr, 40%))", 16..20, "4pt", false); - test("abc\n= a heading\njoke", 3..4, "\nmore\n\n", true); - test("#show f: a => b..", 16..16, "c", false); - test("#for", 4..4, "//", false); - test("a\n#let \nb", 7..7, "i", true); - test(r"#{{let x = z}; a = 1} b", 7..7, "//", false); - test(r#"a ```typst hello```"#, 16..17, "", false); - } - - #[test] - fn test_reparse_block() { - test("Hello #{ x + 1 }!", 9..10, "abc", true); - test("A#{}!", 3..3, "\"", false); - test("#{ [= x] }!", 5..5, "=", true); - test("#[[]]", 3..3, "\\", true); - test("#[[ab]]", 4..5, "\\", true); - test("#{}}", 2..2, "{", false); - test("A: #[BC]", 6..6, "{", true); - test("A: #[BC]", 6..6, "#{", true); - test("A: #[BC]", 6..6, "#{}", true); - test("#{\"ab\"}A", 5..5, "c", true); - test("#{\"ab\"}A", 5..6, "c", false); - test("a#[]b", 3..3, "#{", true); - test("a#{call(); abc}b", 8..8, "[]", true); - test("a #while x {\n g(x) \n} b", 12..12, "//", true); - test("a#[]b", 3..3, "[hey]", true); - } -} diff --git a/src/syntax/source.rs b/src/syntax/source.rs deleted file mode 100644 index 2f3e4144..00000000 --- a/src/syntax/source.rs +++ /dev/null @@ -1,421 +0,0 @@ -//! Source file management. - -use std::fmt::{self, Debug, Formatter}; -use std::hash::{Hash, Hasher}; -use std::ops::Range; -use std::sync::Arc; - -use comemo::Prehashed; - -use super::ast::Markup; -use super::reparser::reparse; -use super::{is_newline, parse, LinkedNode, Span, SyntaxNode}; -use crate::diag::SourceResult; -use crate::file::FileId; -use crate::util::StrExt; - -/// A source file. -/// -/// All line and column indices start at zero, just like byte indices. Only for -/// user-facing display, you should add 1 to them. -/// -/// Values of this type are cheap to clone and hash. -#[derive(Clone)] -pub struct Source(Arc); - -/// The internal representation. -#[derive(Clone)] -struct Repr { - id: FileId, - text: Prehashed, - root: Prehashed, - lines: Vec, -} - -impl Source { - /// Create a new source file. - #[tracing::instrument(skip_all)] - pub fn new(id: FileId, text: String) -> Self { - let mut root = parse(&text); - root.numberize(id, Span::FULL).unwrap(); - Self(Arc::new(Repr { - id, - lines: lines(&text), - text: Prehashed::new(text), - root: Prehashed::new(root), - })) - } - - /// Create a source file without a real id and path, usually for testing. - pub fn detached(text: impl Into) -> Self { - Self::new(FileId::detached(), text.into()) - } - - /// Create a source file with the same synthetic span for all nodes. - pub fn synthesized(text: String, span: Span) -> Self { - let mut root = parse(&text); - root.synthesize(span); - Self(Arc::new(Repr { - id: FileId::detached(), - lines: lines(&text), - text: Prehashed::new(text), - root: Prehashed::new(root), - })) - } - - /// The root node of the file's untyped syntax tree. - pub fn root(&self) -> &SyntaxNode { - &self.0.root - } - - /// The root node of the file's typed abstract syntax tree. - pub fn ast(&self) -> SourceResult { - let errors = self.root().errors(); - if errors.is_empty() { - Ok(self.root().cast().expect("root node must be markup")) - } else { - Err(Box::new(errors)) - } - } - - /// The id of the source file. - pub fn id(&self) -> FileId { - self.0.id - } - - /// The whole source as a string slice. - pub fn text(&self) -> &str { - &self.0.text - } - - /// Slice out the part of the source code enclosed by the range. - pub fn get(&self, range: Range) -> Option<&str> { - self.text().get(range) - } - - /// Fully replace the source text. - pub fn replace(&mut self, text: String) { - let inner = Arc::make_mut(&mut self.0); - inner.text = Prehashed::new(text); - inner.lines = lines(&inner.text); - let mut root = parse(&inner.text); - root.numberize(inner.id, Span::FULL).unwrap(); - inner.root = Prehashed::new(root); - } - - /// Edit the source file by replacing the given range. - /// - /// Returns the range in the new source that was ultimately reparsed. - /// - /// The method panics if the `replace` range is out of bounds. - #[track_caller] - pub fn edit(&mut self, replace: Range, with: &str) -> Range { - let start_byte = replace.start; - let start_utf16 = self.byte_to_utf16(start_byte).unwrap(); - let line = self.byte_to_line(start_byte).unwrap(); - - let inner = Arc::make_mut(&mut self.0); - - // Update the text itself. - inner.text.update(|text| text.replace_range(replace.clone(), with)); - - // Remove invalidated line starts. - inner.lines.truncate(line + 1); - - // Handle adjoining of \r and \n. - if inner.text[..start_byte].ends_with('\r') && with.starts_with('\n') { - inner.lines.pop(); - } - - // Recalculate the line starts after the edit. - inner.lines.extend(lines_from( - start_byte, - start_utf16, - &inner.text[start_byte..], - )); - - // Incrementally reparse the replaced range. - inner - .root - .update(|root| reparse(root, &inner.text, replace, with.len())) - } - - /// Get the length of the file in UTF-8 encoded bytes. - pub fn len_bytes(&self) -> usize { - self.text().len() - } - - /// Get the length of the file in UTF-16 code units. - pub fn len_utf16(&self) -> usize { - let last = self.0.lines.last().unwrap(); - last.utf16_idx + self.0.text[last.byte_idx..].len_utf16() - } - - /// Get the length of the file in lines. - pub fn len_lines(&self) -> usize { - self.0.lines.len() - } - - /// Find the node with the given span. - /// - /// Returns `None` if the span does not point into this source file. - pub fn find(&self, span: Span) -> Option> { - LinkedNode::new(self.root()).find(span) - } - - /// Return the index of the UTF-16 code unit at the byte index. - pub fn byte_to_utf16(&self, byte_idx: usize) -> Option { - let line_idx = self.byte_to_line(byte_idx)?; - let line = self.0.lines.get(line_idx)?; - let head = self.0.text.get(line.byte_idx..byte_idx)?; - Some(line.utf16_idx + head.len_utf16()) - } - - /// Return the index of the line that contains the given byte index. - pub fn byte_to_line(&self, byte_idx: usize) -> Option { - (byte_idx <= self.0.text.len()).then(|| { - match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) { - Ok(i) => i, - Err(i) => i - 1, - } - }) - } - - /// Return the index of the column at the byte index. - /// - /// The column is defined as the number of characters in the line before the - /// byte index. - pub fn byte_to_column(&self, byte_idx: usize) -> Option { - let line = self.byte_to_line(byte_idx)?; - let start = self.line_to_byte(line)?; - let head = self.get(start..byte_idx)?; - Some(head.chars().count()) - } - - /// Return the byte index at the UTF-16 code unit. - pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option { - let line = self.0.lines.get( - match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) { - Ok(i) => i, - Err(i) => i - 1, - }, - )?; - - let mut k = line.utf16_idx; - for (i, c) in self.0.text[line.byte_idx..].char_indices() { - if k >= utf16_idx { - return Some(line.byte_idx + i); - } - k += c.len_utf16(); - } - - (k == utf16_idx).then_some(self.0.text.len()) - } - - /// Return the byte position at which the given line starts. - pub fn line_to_byte(&self, line_idx: usize) -> Option { - self.0.lines.get(line_idx).map(|line| line.byte_idx) - } - - /// Return the range which encloses the given line. - pub fn line_to_range(&self, line_idx: usize) -> Option> { - let start = self.line_to_byte(line_idx)?; - let end = self.line_to_byte(line_idx + 1).unwrap_or(self.0.text.len()); - Some(start..end) - } - - /// Return the byte index of the given (line, column) pair. - /// - /// The column defines the number of characters to go beyond the start of - /// the line. - pub fn line_column_to_byte( - &self, - line_idx: usize, - column_idx: usize, - ) -> Option { - let range = self.line_to_range(line_idx)?; - let line = self.get(range.clone())?; - let mut chars = line.chars(); - for _ in 0..column_idx { - chars.next(); - } - Some(range.start + (line.len() - chars.as_str().len())) - } -} - -impl Debug for Source { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "Source({})", self.id().path().display()) - } -} - -impl Hash for Source { - fn hash(&self, state: &mut H) { - self.0.id.hash(state); - self.0.text.hash(state); - self.0.root.hash(state); - } -} - -impl AsRef for Source { - fn as_ref(&self) -> &str { - self.text() - } -} - -/// Metadata about a line. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -struct Line { - /// The UTF-8 byte offset where the line starts. - byte_idx: usize, - /// The UTF-16 codepoint offset where the line starts. - utf16_idx: usize, -} - -/// Create a line vector. -fn lines(text: &str) -> Vec { - std::iter::once(Line { byte_idx: 0, utf16_idx: 0 }) - .chain(lines_from(0, 0, text)) - .collect() -} - -/// Compute a line iterator from an offset. -fn lines_from( - byte_offset: usize, - utf16_offset: usize, - text: &str, -) -> impl Iterator + '_ { - let mut s = unscanny::Scanner::new(text); - let mut utf16_idx = utf16_offset; - - std::iter::from_fn(move || { - s.eat_until(|c: char| { - utf16_idx += c.len_utf16(); - is_newline(c) - }); - - if s.done() { - return None; - } - - if s.eat() == Some('\r') && s.eat_if('\n') { - utf16_idx += 1; - } - - Some(Line { byte_idx: byte_offset + s.cursor(), utf16_idx }) - }) -} - -#[cfg(test)] -mod tests { - use super::*; - - const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl"; - - #[test] - fn test_source_file_new() { - let source = Source::detached(TEST); - assert_eq!( - source.0.lines, - [ - Line { byte_idx: 0, utf16_idx: 0 }, - Line { byte_idx: 7, utf16_idx: 6 }, - Line { byte_idx: 15, utf16_idx: 12 }, - Line { byte_idx: 18, utf16_idx: 15 }, - ] - ); - } - - #[test] - fn test_source_file_pos_to_line() { - let source = Source::detached(TEST); - assert_eq!(source.byte_to_line(0), Some(0)); - assert_eq!(source.byte_to_line(2), Some(0)); - assert_eq!(source.byte_to_line(6), Some(0)); - assert_eq!(source.byte_to_line(7), Some(1)); - assert_eq!(source.byte_to_line(8), Some(1)); - assert_eq!(source.byte_to_line(12), Some(1)); - assert_eq!(source.byte_to_line(21), Some(3)); - assert_eq!(source.byte_to_line(22), None); - } - - #[test] - fn test_source_file_pos_to_column() { - let source = Source::detached(TEST); - assert_eq!(source.byte_to_column(0), Some(0)); - assert_eq!(source.byte_to_column(2), Some(1)); - assert_eq!(source.byte_to_column(6), Some(5)); - assert_eq!(source.byte_to_column(7), Some(0)); - assert_eq!(source.byte_to_column(8), Some(1)); - assert_eq!(source.byte_to_column(12), Some(2)); - } - - #[test] - fn test_source_file_utf16() { - #[track_caller] - fn roundtrip(source: &Source, byte_idx: usize, utf16_idx: usize) { - let middle = source.byte_to_utf16(byte_idx).unwrap(); - let result = source.utf16_to_byte(middle).unwrap(); - assert_eq!(middle, utf16_idx); - assert_eq!(result, byte_idx); - } - - let source = Source::detached(TEST); - roundtrip(&source, 0, 0); - roundtrip(&source, 2, 1); - roundtrip(&source, 3, 2); - roundtrip(&source, 8, 7); - roundtrip(&source, 12, 9); - roundtrip(&source, 21, 18); - assert_eq!(source.byte_to_utf16(22), None); - assert_eq!(source.utf16_to_byte(19), None); - } - - #[test] - fn test_source_file_roundtrip() { - #[track_caller] - fn roundtrip(source: &Source, byte_idx: usize) { - let line = source.byte_to_line(byte_idx).unwrap(); - let column = source.byte_to_column(byte_idx).unwrap(); - let result = source.line_column_to_byte(line, column).unwrap(); - assert_eq!(result, byte_idx); - } - - let source = Source::detached(TEST); - roundtrip(&source, 0); - roundtrip(&source, 7); - roundtrip(&source, 12); - roundtrip(&source, 21); - } - - #[test] - fn test_source_file_edit() { - // This tests only the non-parser parts. The reparsing itself is - // tested separately. - #[track_caller] - fn test(prev: &str, range: Range, with: &str, after: &str) { - let mut source = Source::detached(prev); - let result = Source::detached(after); - source.edit(range, with); - assert_eq!(source.text(), result.text()); - assert_eq!(source.0.lines, result.0.lines); - } - - // Test inserting at the beginning. - test("abc\n", 0..0, "hi\n", "hi\nabc\n"); - test("\nabc", 0..0, "hi\r", "hi\r\nabc"); - - // Test editing in the middle. - test(TEST, 4..16, "❌", "ä\tc❌i\rjkl"); - - // Test appending. - test("abc\ndef", 7..7, "hi", "abc\ndefhi"); - test("abc\ndef\n", 8..8, "hi", "abc\ndef\nhi"); - - // Test appending with adjoining \r and \n. - test("abc\ndef\r", 8..8, "\nghi", "abc\ndef\r\nghi"); - - // Test removing everything. - test(TEST, 0..21, "", ""); - } -} diff --git a/src/syntax/span.rs b/src/syntax/span.rs deleted file mode 100644 index 5c220252..00000000 --- a/src/syntax/span.rs +++ /dev/null @@ -1,148 +0,0 @@ -use std::fmt::{self, Debug, Formatter}; -use std::num::NonZeroU64; -use std::ops::Range; - -use super::Source; -use crate::file::FileId; -use crate::World; - -/// A unique identifier for a syntax node. -/// -/// This is used throughout the compiler to track which source section an error -/// or element stems from. Can be [mapped back](Self::range) to a byte range for -/// user facing display. -/// -/// During editing, the span values stay mostly stable, even for nodes behind an -/// insertion. This is not true for simple ranges as they would shift. Spans can -/// be used as inputs to memoized functions without hurting cache performance -/// when text is inserted somewhere in the document other than the end. -/// -/// Span ids are ordered in the syntax tree to enable quickly finding the node -/// with some id: -/// - The id of a parent is always smaller than the ids of any of its children. -/// - The id of a node is always greater than any id in the subtrees of any left -/// sibling and smaller than any id in the subtrees of any right sibling. -/// -/// This type takes up 8 bytes and is null-optimized (i.e. `Option` also -/// takes 8 bytes). -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub struct Span(NonZeroU64); - -impl Span { - /// The full range of numbers available for span numbering. - pub const FULL: Range = 2..(1 << Self::BITS); - const DETACHED: u64 = 1; - - // Data layout: - // | 16 bits source id | 48 bits number | - const BITS: usize = 48; - - /// Create a new span from a source id and a unique number. - /// - /// Panics if the `number` is not contained in `FULL`. - #[track_caller] - pub const fn new(id: FileId, number: u64) -> Self { - assert!( - Self::FULL.start <= number && number < Self::FULL.end, - "span number outside valid range" - ); - - Self::pack(id, number) - } - - /// A span that does not point into any source file. - pub const fn detached() -> Self { - Self::pack(FileId::detached(), Self::DETACHED) - } - - /// Pack the components into a span. - #[track_caller] - const fn pack(id: FileId, number: u64) -> Span { - let bits = ((id.as_u16() as u64) << Self::BITS) | number; - match NonZeroU64::new(bits) { - Some(v) => Self(v), - None => panic!("span encoding is zero"), - } - } - - /// The id of the source file the span points into. - pub const fn id(self) -> FileId { - FileId::from_u16((self.0.get() >> Self::BITS) as u16) - } - - /// The unique number of the span within its source file. - pub const fn number(self) -> u64 { - self.0.get() & ((1 << Self::BITS) - 1) - } - - /// Whether the span is detached. - pub const fn is_detached(self) -> bool { - self.id().is_detached() - } - - /// Get the byte range for this span. - #[track_caller] - pub fn range(self, world: &dyn World) -> Range { - let source = world - .source(self.id()) - .expect("span does not point into any source file"); - self.range_in(&source) - } - - /// Get the byte range for this span in the given source file. - #[track_caller] - pub fn range_in(self, source: &Source) -> Range { - source - .find(self) - .expect("span does not point into this source file") - .range() - } -} - -/// A value with a span locating it in the source code. -#[derive(Copy, Clone, Eq, PartialEq, Hash)] -pub struct Spanned { - /// The spanned value. - pub v: T, - /// The value's location in source code. - pub span: Span, -} - -impl Spanned { - /// Create a new instance from a value and its span. - pub fn new(v: T, span: Span) -> Self { - Self { v, span } - } - - /// Convert from `&Spanned` to `Spanned<&T>` - pub fn as_ref(&self) -> Spanned<&T> { - Spanned { v: &self.v, span: self.span } - } - - /// Map the value using a function. - pub fn map(self, f: F) -> Spanned - where - F: FnOnce(T) -> U, - { - Spanned { v: f(self.v), span: self.span } - } -} - -impl Debug for Spanned { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - self.v.fmt(f) - } -} - -#[cfg(test)] -mod tests { - use super::{FileId, Span}; - - #[test] - fn test_span_encoding() { - let id = FileId::from_u16(5); - let span = Span::new(id, 10); - assert_eq!(span.id(), id); - assert_eq!(span.number(), 10); - } -} -- cgit v1.2.3