summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/ide/complete.rs8
-rw-r--r--src/ide/highlight.rs86
-rw-r--r--src/model/eval.rs28
-rw-r--r--src/syntax/ast.rs106
-rw-r--r--src/syntax/kind.rs268
-rw-r--r--src/syntax/lexer.rs344
-rw-r--r--src/syntax/mod.rs3
-rw-r--r--src/syntax/node.rs36
-rw-r--r--src/syntax/parser.rs2184
-rw-r--r--src/syntax/reparse.rs525
-rw-r--r--src/syntax/reparser.rs262
-rw-r--r--src/syntax/source.rs3
12 files changed, 1530 insertions, 2323 deletions
diff --git a/src/ide/complete.rs b/src/ide/complete.rs
index 9e13fc8d..f0808b21 100644
--- a/src/ide/complete.rs
+++ b/src/ide/complete.rs
@@ -141,7 +141,7 @@ fn complete_params(ctx: &mut CompletionContext) -> bool {
if let Some(param) = before_colon.cast::<ast::Ident>();
then {
ctx.from = match ctx.leaf.kind() {
- SyntaxKind::Colon | SyntaxKind::Space { .. } => ctx.cursor,
+ SyntaxKind::Colon | SyntaxKind::Space => ctx.cursor,
_ => ctx.leaf.offset(),
};
ctx.named_param_value_completions(&callee, &param);
@@ -246,7 +246,7 @@ fn complete_symbols(ctx: &mut CompletionContext) -> bool {
/// Complete in markup mode.
fn complete_markup(ctx: &mut CompletionContext) -> bool {
// Bail if we aren't even in markup.
- if !matches!(ctx.leaf.parent_kind(), None | Some(SyntaxKind::Markup { .. })) {
+ if !matches!(ctx.leaf.parent_kind(), None | Some(SyntaxKind::Markup)) {
return false;
}
@@ -325,7 +325,7 @@ fn complete_math(ctx: &mut CompletionContext) -> bool {
fn complete_code(ctx: &mut CompletionContext) -> bool {
if matches!(
ctx.leaf.parent_kind(),
- None | Some(SyntaxKind::Markup { .. }) | Some(SyntaxKind::Math)
+ None | Some(SyntaxKind::Markup) | Some(SyntaxKind::Math)
) {
return false;
}
@@ -887,7 +887,7 @@ impl<'a> CompletionContext<'a> {
self.snippet_completion(
"import",
- "import ${items} from \"${file.typ}\"",
+ "import \"${file.typ}\": ${items}",
"Imports variables from another file.",
);
diff --git a/src/ide/highlight.rs b/src/ide/highlight.rs
index cc502537..42c05002 100644
--- a/src/ide/highlight.rs
+++ b/src/ide/highlight.rs
@@ -83,9 +83,41 @@ impl Category {
/// highlighted.
pub fn highlight(node: &LinkedNode) -> Option<Category> {
match node.kind() {
- SyntaxKind::LineComment => Some(Category::Comment),
- SyntaxKind::BlockComment => Some(Category::Comment),
- SyntaxKind::Space { .. } => None,
+ SyntaxKind::Markup
+ if node.parent_kind() == Some(SyntaxKind::TermItem)
+ && node.next_sibling().as_ref().map(|v| v.kind())
+ == Some(SyntaxKind::Colon) =>
+ {
+ Some(Category::ListTerm)
+ }
+ SyntaxKind::Markup => None,
+ SyntaxKind::Text => None,
+ SyntaxKind::Space => None,
+ SyntaxKind::Linebreak => Some(Category::Escape),
+ SyntaxKind::Parbreak => None,
+ SyntaxKind::Escape => Some(Category::Escape),
+ SyntaxKind::Shorthand => Some(Category::Escape),
+ SyntaxKind::Symbol => Some(Category::Escape),
+ SyntaxKind::SmartQuote => None,
+ SyntaxKind::Strong => Some(Category::Strong),
+ SyntaxKind::Emph => Some(Category::Emph),
+ SyntaxKind::Raw => Some(Category::Raw),
+ SyntaxKind::Link => Some(Category::Link),
+ SyntaxKind::Label => Some(Category::Label),
+ SyntaxKind::Ref => Some(Category::Ref),
+ SyntaxKind::Heading => Some(Category::Heading),
+ SyntaxKind::HeadingMarker => None,
+ SyntaxKind::ListItem => None,
+ SyntaxKind::ListMarker => Some(Category::ListMarker),
+ SyntaxKind::EnumItem => None,
+ SyntaxKind::EnumMarker => Some(Category::ListMarker),
+ SyntaxKind::TermItem => None,
+ SyntaxKind::TermMarker => Some(Category::ListMarker),
+ SyntaxKind::Math => None,
+ SyntaxKind::Atom => None,
+ SyntaxKind::Script => None,
+ SyntaxKind::Frac => None,
+ SyntaxKind::AlignPoint => Some(Category::MathOperator),
SyntaxKind::LeftBrace => Some(Category::Punctuation),
SyntaxKind::RightBrace => Some(Category::Punctuation),
@@ -105,16 +137,9 @@ pub fn highlight(node: &LinkedNode) -> Option<Category> {
_ => None,
},
SyntaxKind::Dollar => Some(Category::MathDelimiter),
- SyntaxKind::Plus => Some(match node.parent_kind() {
- Some(SyntaxKind::EnumItem) => Category::ListMarker,
- _ => Category::Operator,
- }),
- SyntaxKind::Minus => Some(match node.parent_kind() {
- Some(SyntaxKind::ListItem) => Category::ListMarker,
- _ => Category::Operator,
- }),
+ SyntaxKind::Plus => Some(Category::Operator),
+ SyntaxKind::Minus => Some(Category::Operator),
SyntaxKind::Slash => Some(match node.parent_kind() {
- Some(SyntaxKind::TermItem) => Category::ListMarker,
Some(SyntaxKind::Frac) => Category::MathOperator,
_ => Category::Operator,
}),
@@ -157,41 +182,9 @@ pub fn highlight(node: &LinkedNode) -> Option<Category> {
SyntaxKind::Include => Some(Category::Keyword),
SyntaxKind::As => Some(Category::Keyword),
- SyntaxKind::Markup { .. }
- if node.parent_kind() == Some(SyntaxKind::TermItem)
- && node.next_sibling().as_ref().map(|v| v.kind())
- == Some(SyntaxKind::Colon) =>
- {
- Some(Category::ListTerm)
- }
- SyntaxKind::Markup { .. } => None,
-
- SyntaxKind::Text => None,
- SyntaxKind::Linebreak => Some(Category::Escape),
- SyntaxKind::Escape => Some(Category::Escape),
- SyntaxKind::Shorthand => Some(Category::Escape),
- SyntaxKind::Symbol => Some(Category::Escape),
- SyntaxKind::SmartQuote { .. } => None,
- SyntaxKind::Strong => Some(Category::Strong),
- SyntaxKind::Emph => Some(Category::Emph),
- SyntaxKind::Raw { .. } => Some(Category::Raw),
- SyntaxKind::Link => Some(Category::Link),
- SyntaxKind::Label => Some(Category::Label),
- SyntaxKind::Ref => Some(Category::Ref),
- SyntaxKind::Heading => Some(Category::Heading),
- SyntaxKind::ListItem => None,
- SyntaxKind::EnumItem => None,
- SyntaxKind::EnumNumbering => Some(Category::ListMarker),
- SyntaxKind::TermItem => None,
- SyntaxKind::Math => None,
- SyntaxKind::Atom => None,
- SyntaxKind::Script => None,
- SyntaxKind::Frac => None,
- SyntaxKind::AlignPoint => Some(Category::MathOperator),
-
SyntaxKind::Ident => match node.parent_kind() {
Some(
- SyntaxKind::Markup { .. }
+ SyntaxKind::Markup
| SyntaxKind::Math
| SyntaxKind::Script
| SyntaxKind::Frac,
@@ -258,7 +251,10 @@ pub fn highlight(node: &LinkedNode) -> Option<Category> {
SyntaxKind::LoopContinue => None,
SyntaxKind::FuncReturn => None,
+ SyntaxKind::LineComment => Some(Category::Comment),
+ SyntaxKind::BlockComment => Some(Category::Comment),
SyntaxKind::Error => Some(Category::Error),
+ SyntaxKind::Eof => None,
}
}
diff --git a/src/model/eval.rs b/src/model/eval.rs
index 8e8c93c5..0469649b 100644
--- a/src/model/eval.rs
+++ b/src/model/eval.rs
@@ -261,9 +261,10 @@ impl Eval for ast::Expr {
};
match self {
+ Self::Text(v) => v.eval(vm).map(Value::Content),
Self::Space(v) => v.eval(vm).map(Value::Content),
Self::Linebreak(v) => v.eval(vm).map(Value::Content),
- Self::Text(v) => v.eval(vm).map(Value::Content),
+ Self::Parbreak(v) => v.eval(vm).map(Value::Content),
Self::Escape(v) => v.eval(vm).map(Value::Content),
Self::Shorthand(v) => v.eval(vm).map(Value::Content),
Self::Symbol(v) => v.eval(vm).map(Value::Content),
@@ -330,14 +331,19 @@ impl ast::Expr {
}
}
+impl Eval for ast::Text {
+ type Output = Content;
+
+ fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {
+ Ok((vm.items.text)(self.get().clone()))
+ }
+}
+
impl Eval for ast::Space {
type Output = Content;
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {
- Ok(match self.newlines() {
- 0..=1 => (vm.items.space)(),
- _ => (vm.items.parbreak)(),
- })
+ Ok((vm.items.space)())
}
}
@@ -349,11 +355,11 @@ impl Eval for ast::Linebreak {
}
}
-impl Eval for ast::Text {
+impl Eval for ast::Parbreak {
type Output = Content;
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {
- Ok((vm.items.text)(self.get().clone()))
+ Ok((vm.items.parbreak)())
}
}
@@ -438,7 +444,7 @@ impl Eval for ast::Link {
type Output = Content;
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {
- Ok((vm.items.link)(self.url().clone()))
+ Ok((vm.items.link)(self.get().clone()))
}
}
@@ -1231,13 +1237,17 @@ impl Eval for ast::ModuleImport {
}
}
Some(ast::Imports::Items(idents)) => {
+ let mut errors = vec![];
for ident in idents {
if let Some(value) = module.scope().get(&ident) {
vm.scopes.top.define(ident.take(), value.clone());
} else {
- bail!(ident.span(), "unresolved import");
+ errors.push(error!(ident.span(), "unresolved import"));
}
}
+ if errors.len() > 0 {
+ return Err(Box::new(errors));
+ }
}
}
diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs
index bf4b37bc..169b0276 100644
--- a/src/syntax/ast.rs
+++ b/src/syntax/ast.rs
@@ -29,9 +29,6 @@ pub trait AstNode: Sized {
macro_rules! node {
($(#[$attr:meta])* $name:ident) => {
- node!{ $(#[$attr])* $name: SyntaxKind::$name { .. } }
- };
- ($(#[$attr:meta])* $name:ident: $variants:pat) => {
#[derive(Debug, Clone, PartialEq, Hash)]
#[repr(transparent)]
$(#[$attr])*
@@ -39,7 +36,7 @@ macro_rules! node {
impl AstNode for $name {
fn from_untyped(node: &SyntaxNode) -> Option<Self> {
- if matches!(node.kind(), $variants) {
+ if matches!(node.kind(), SyntaxKind::$name) {
Some(Self(node.clone()))
} else {
Option::None
@@ -67,8 +64,7 @@ impl Markup {
.filter(move |node| {
// Ignore newline directly after statements without semicolons.
let kind = node.kind();
- let keep =
- !was_stmt || !matches!(kind, SyntaxKind::Space { newlines: 1 });
+ let keep = !was_stmt || node.kind() != SyntaxKind::Space;
was_stmt = kind.is_stmt();
keep
})
@@ -79,12 +75,15 @@ impl Markup {
/// An expression in markup, math or code.
#[derive(Debug, Clone, PartialEq, Hash)]
pub enum Expr {
- /// Whitespace.
+ /// Plain text without markup.
+ Text(Text),
+ /// Whitespace in markup or math. Has at most one newline in markup, as more
+ /// indicate a paragraph break.
Space(Space),
/// A forced line break: `\`.
Linebreak(Linebreak),
- /// Plain text without markup.
- Text(Text),
+ /// A paragraph break, indicated by one or multiple blank lines.
+ Parbreak(Parbreak),
/// An escape sequence: `\#`, `\u{1F5FA}`.
Escape(Escape),
/// A shorthand for a unicode codepoint. For example, `~` for non-breaking
@@ -189,7 +188,7 @@ pub enum Expr {
impl Expr {
fn cast_with_space(node: &SyntaxNode) -> Option<Self> {
match node.kind() {
- SyntaxKind::Space { .. } => node.cast().map(Self::Space),
+ SyntaxKind::Space => node.cast().map(Self::Space),
_ => Self::from_untyped(node),
}
}
@@ -199,14 +198,15 @@ impl AstNode for Expr {
fn from_untyped(node: &SyntaxNode) -> Option<Self> {
match node.kind() {
SyntaxKind::Linebreak => node.cast().map(Self::Linebreak),
+ SyntaxKind::Parbreak => node.cast().map(Self::Parbreak),
SyntaxKind::Text => node.cast().map(Self::Text),
SyntaxKind::Escape => node.cast().map(Self::Escape),
SyntaxKind::Shorthand => node.cast().map(Self::Shorthand),
SyntaxKind::Symbol => node.cast().map(Self::Symbol),
- SyntaxKind::SmartQuote { .. } => node.cast().map(Self::SmartQuote),
+ SyntaxKind::SmartQuote => node.cast().map(Self::SmartQuote),
SyntaxKind::Strong => node.cast().map(Self::Strong),
SyntaxKind::Emph => node.cast().map(Self::Emph),
- SyntaxKind::Raw { .. } => node.cast().map(Self::Raw),
+ SyntaxKind::Raw => node.cast().map(Self::Raw),
SyntaxKind::Link => node.cast().map(Self::Link),
SyntaxKind::Label => node.cast().map(Self::Label),
SyntaxKind::Ref => node.cast().map(Self::Ref),
@@ -255,9 +255,10 @@ impl AstNode for Expr {
fn as_untyped(&self) -> &SyntaxNode {
match self {
+ Self::Text(v) => v.as_untyped(),
Self::Space(v) => v.as_untyped(),
Self::Linebreak(v) => v.as_untyped(),
- Self::Text(v) => v.as_untyped(),
+ Self::Parbreak(v) => v.as_untyped(),
Self::Escape(v) => v.as_untyped(),
Self::Shorthand(v) => v.as_untyped(),
Self::Symbol(v) => v.as_untyped(),
@@ -312,35 +313,31 @@ impl AstNode for Expr {
}
node! {
- /// Whitespace.
- Space
+ /// Plain text without markup.
+ Text
}
-impl Space {
- /// Get the number of newlines.
- pub fn newlines(&self) -> usize {
- match self.0.kind() {
- SyntaxKind::Space { newlines } => newlines,
- _ => panic!("space is of wrong kind"),
- }
+impl Text {
+ /// Get the text.
+ pub fn get(&self) -> &EcoString {
+ self.0.text()
}
}
node! {
- /// A forced line break: `\`.
- Linebreak
+ /// Whitespace in markup or math. Has at most one newline in markup, as more
+ /// indicate a paragraph break.
+ Space
}
node! {
- /// Plain text without markup.
- Text
+ /// A forced line break: `\`.
+ Linebreak
}
-impl Text {
- /// Get the text.
- pub fn get(&self) -> &EcoString {
- self.0.text()
- }
+node! {
+ /// A paragraph break, indicated by one or multiple blank lines.
+ Parbreak
}
node! {
@@ -454,10 +451,6 @@ node! {
impl Raw {
/// The trimmed raw text.
pub fn text(&self) -> EcoString {
- let SyntaxKind::Raw { column } = self.0.kind() else {
- panic!("raw node is of wrong kind");
- };
-
let mut text = self.0.text().as_str();
let blocky = text.starts_with("```");
text = text.trim_matches('`');
@@ -480,14 +473,16 @@ impl Raw {
let mut lines = split_newlines(text);
if blocky {
+ let dedent = lines
+ .iter()
+ .skip(1)
+ .map(|line| line.chars().take_while(|c| c.is_whitespace()).count())
+ .min()
+ .unwrap_or(0);
+
// Dedent based on column, but not for the first line.
for line in lines.iter_mut().skip(1) {
- let offset = line
- .chars()
- .take(column)
- .take_while(|c| c.is_whitespace())
- .map(char::len_utf8)
- .sum();
+ let offset = line.chars().take(dedent).map(char::len_utf8).sum();
*line = &line[offset..];
}
@@ -531,7 +526,7 @@ node! {
impl Link {
/// Get the URL.
- pub fn url(&self) -> &EcoString {
+ pub fn get(&self) -> &EcoString {
self.0.text()
}
}
@@ -575,10 +570,9 @@ impl Heading {
pub fn level(&self) -> NonZeroUsize {
self.0
.children()
- .filter(|n| n.kind() == SyntaxKind::Eq)
- .count()
- .try_into()
- .expect("heading is missing equals sign")
+ .find(|node| node.kind() == SyntaxKind::HeadingMarker)
+ .and_then(|node| node.len().try_into().ok())
+ .expect("heading is missing marker")
}
}
@@ -603,7 +597,7 @@ impl EnumItem {
/// The explicit numbering, if any: `23.`.
pub fn number(&self) -> Option<NonZeroUsize> {
self.0.children().find_map(|node| match node.kind() {
- SyntaxKind::EnumNumbering => node.text().trim_end_matches('.').parse().ok(),
+ SyntaxKind::EnumMarker => node.text().trim_end_matches('.').parse().ok(),
_ => Option::None,
})
}
@@ -765,7 +759,7 @@ node! {
}
impl Bool {
- /// Get the value.
+ /// Get the boolean value.
pub fn get(&self) -> bool {
self.0.text() == "true"
}
@@ -777,7 +771,7 @@ node! {
}
impl Int {
- /// Get the value.
+ /// Get the integer value.
pub fn get(&self) -> i64 {
self.0.text().parse().expect("integer is invalid")
}
@@ -789,7 +783,7 @@ node! {
}
impl Float {
- /// Get the value.
+ /// Get the floating-point value.
pub fn get(&self) -> f64 {
self.0.text().parse().expect("float is invalid")
}
@@ -801,7 +795,7 @@ node! {
}
impl Numeric {
- /// Get the value and unit.
+ /// Get the numeric value and unit.
pub fn get(&self) -> (f64, Unit) {
let text = self.0.text();
let count = text
@@ -850,7 +844,7 @@ node! {
}
impl Str {
- /// Get the value.
+ /// Get the string value with resolved escape sequences.
pub fn get(&self) -> EcoString {
let text = self.0.text();
let unquoted = &text[1..text.len() - 1];
@@ -1058,7 +1052,7 @@ impl Unary {
pub fn op(&self) -> UnOp {
self.0
.children()
- .find_map(|node| UnOp::from_token(node.kind()))
+ .find_map(|node| UnOp::from_kind(node.kind()))
.expect("unary operation is missing operator")
}
@@ -1081,7 +1075,7 @@ pub enum UnOp {
impl UnOp {
/// Try to convert the token into a unary operation.
- pub fn from_token(token: SyntaxKind) -> Option<Self> {
+ pub fn from_kind(token: SyntaxKind) -> Option<Self> {
Some(match token {
SyntaxKind::Plus => Self::Pos,
SyntaxKind::Minus => Self::Neg,
@@ -1125,7 +1119,7 @@ impl Binary {
Option::None
}
SyntaxKind::In if not => Some(BinOp::NotIn),
- _ => BinOp::from_token(node.kind()),
+ _ => BinOp::from_kind(node.kind()),
})
.expect("binary operation is missing operator")
}
@@ -1190,7 +1184,7 @@ pub enum BinOp {
impl BinOp {
/// Try to convert the token into a binary operation.
- pub fn from_token(token: SyntaxKind) -> Option<Self> {
+ pub fn from_kind(token: SyntaxKind) -> Option<Self> {
Some(match token {
SyntaxKind::Plus => Self::Add,
SyntaxKind::Minus => Self::Sub,
diff --git a/src/syntax/kind.rs b/src/syntax/kind.rs
index 26e92b93..5928fa0a 100644
--- a/src/syntax/kind.rs
+++ b/src/syntax/kind.rs
@@ -1,17 +1,72 @@
-/// All syntactical building blocks that can be part of a Typst document.
+/// A syntactical building block of a Typst file.
///
/// Can be created by the lexer or by the parser.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+#[repr(u8)]
pub enum SyntaxKind {
- /// A line comment: `// ...`.
- LineComment,
- /// A block comment: `/* ... */`.
- BlockComment,
- /// One or more whitespace characters. Single spaces are collapsed into text
- /// nodes if they would otherwise be surrounded by text nodes.
+ /// Markup of which all lines must have a minimal indentation.
///
- /// Also stores how many newlines are contained.
- Space { newlines: usize },
+ /// Notably, the number does not determine in which column the markup
+ /// started, but to the right of which column all markup elements must be,
+ /// so it is zero except inside indent-aware constructs like lists.
+ Markup,
+ /// Plain text without markup.
+ Text,
+ /// Whitespace. Contains at most one newline in markup, as more indicate a
+ /// paragraph break.
+ Space,
+ /// A forced line break: `\`.
+ Linebreak,
+ /// A paragraph break, indicated by one or multiple blank lines.
+ Parbreak,
+ /// An escape sequence: `\#`, `\u{1F5FA}`.
+ Escape,
+ /// A shorthand for a unicode codepoint. For example, `~` for non-breaking
+ /// space or `-?` for a soft hyphen.
+ Shorthand,
+ /// Symbol notation: `:arrow:l:`. The string only contains the inner part
+ /// without leading and trailing dot.
+ Symbol,
+ /// A smart quote: `'` or `"`.
+ SmartQuote,
+ /// Strong content: `*Strong*`.
+ Strong,
+ /// Emphasized content: `_Emphasized_`.
+ Emph,
+ /// Raw text with optional syntax highlighting: `` `...` ``.
+ Raw,
+ /// A hyperlink: `https://typst.org`.
+ Link,
+ /// A label: `<intro>`.
+ Label,
+ /// A reference: `@target`.
+ Ref,
+ /// A section heading: `= Introduction`.
+ Heading,
+ /// Introduces a section heading: `=`, `==`, ...
+ HeadingMarker,
+ /// An item in a bullet list: `- ...`.
+ ListItem,
+ /// Introduces a list item: `-`.
+ ListMarker,
+ /// An item in an enumeration (numbered list): `+ ...` or `1. ...`.
+ EnumItem,
+ /// Introduces an enumeration item: `+`, `1.`.
+ EnumMarker,
+ /// An item in a term list: `/ Term: Details`.
+ TermItem,
+ /// Introduces a term item: `/`.
+ TermMarker,
+ /// A mathematical formula: `$x$`, `$ x^2 $`.
+ Math,
+ /// An atom in math: `x`, `+`, `12`.
+ Atom,
+ /// A base with optional sub- and superscripts in math: `a_1^2`.
+ Script,
+ /// A fraction in math: `x/2`.
+ Frac,
+ /// An alignment point in math: `&`.
+ AlignPoint,
/// A left curly brace, starting a code block: `{`.
LeftBrace,
@@ -37,19 +92,17 @@ pub enum SyntaxKind {
/// The strong text toggle, multiplication operator, and wildcard import
/// symbol: `*`.
Star,
- /// Toggles emphasized text and indicates a subscript in a formula: `_`.
+ /// Toggles emphasized text and indicates a subscript in math: `_`.
Underscore,
/// Starts and ends a math formula: `$`.
Dollar,
- /// The unary plus, binary addition operator, and start of enum items: `+`.
+ /// The unary plus and binary addition operator: `+`.
Plus,
- /// The unary negation, binary subtraction operator, and start of list
- /// items: `-`.
+ /// The unary negation and binary subtraction operator: `-`.
Minus,
- /// The division operator, start of term list items, and fraction operator
- /// in a formula: `/`.
+ /// The division operator and fraction operator in math: `/`.
Slash,
- /// The superscript operator in a formula: `^`.
+ /// The superscript operator in math: `^`.
Hat,
/// The field access and method call operator: `.`.
Dot,
@@ -119,59 +172,6 @@ pub enum SyntaxKind {
/// The `as` keyword.
As,
- /// Markup of which all lines must have a minimal indentation.
- ///
- /// Notably, the number does not determine in which column the markup
- /// started, but to the right of which column all markup elements must be,
- /// so it is zero except inside indent-aware constructs like lists.
- Markup { min_indent: usize },
- /// Plain text without markup.
- Text,
- /// A forced line break: `\`.
- Linebreak,
- /// An escape sequence: `\#`, `\u{1F5FA}`.
- Escape,
- /// A shorthand for a unicode codepoint. For example, `~` for non-breaking
- /// space or `-?` for a soft hyphen.
- Shorthand,
- /// Symbol notation: `:arrow:l:`. The string only contains the inner part
- /// without leading and trailing dot.
- Symbol,
- /// A smart quote: `'` or `"`.
- SmartQuote,
- /// Strong content: `*Strong*`.
- Strong,
- /// Emphasized content: `_Emphasized_`.
- Emph,
- /// Raw text with optional syntax highlighting: `` `...` ``.
- Raw { column: usize },
- /// A hyperlink: `https://typst.org`.
- Link,
- /// A label: `<intro>`.
- Label,
- /// A reference: `@target`.
- Ref,
- /// A section heading: `= Introduction`.
- Heading,
- /// An item in a bullet list: `- ...`.
- ListItem,
- /// An item in an enumeration (numbered list): `+ ...` or `1. ...`.
- EnumItem,
- /// An explicit enumeration numbering: `23.`.
- EnumNumbering,
- /// An item in a term list: `/ Term: Details`.
- TermItem,
- /// A mathematical formula: `$x$`, `$ x^2 $`.
- Math,
- /// An atom in a formula: `x`, `+`, `12`.
- Atom,
- /// A base with optional sub- and superscripts in a formula: `a_1^2`.
- Script,
- /// A fraction in a formula: `x/2`.
- Frac,
- /// An alignment point in a formula: `&`.
- AlignPoint,
-
/// An identifier: `it`.
Ident,
/// A boolean: `true`, `false`.
@@ -243,54 +243,103 @@ pub enum SyntaxKind {
/// A return from a function: `return`, `return x + 1`.
FuncReturn,
+ /// A line comment: `// ...`.
+ LineComment,
+ /// A block comment: `/* ... */`.
+ BlockComment,
/// An invalid sequence of characters.
Error,
+ /// The end of the file.
+ Eof,
}
impl SyntaxKind {
- /// Whether this is trivia.
- pub fn is_trivia(self) -> bool {
- self.is_space() || self.is_comment() || self.is_error()
- }
-
- /// Whether this is a space.
- pub fn is_space(self) -> bool {
- matches!(self, Self::Space { .. })
- }
-
- /// Whether this is a comment.
- pub fn is_comment(self) -> bool {
- matches!(self, Self::LineComment | Self::BlockComment)
+ /// Is this a bracket, brace, or parenthesis?
+ pub fn is_grouping(self) -> bool {
+ matches!(
+ self,
+ Self::LeftBracket
+ | Self::LeftBrace
+ | Self::LeftParen
+ | Self::RightBracket
+ | Self::RightBrace
+ | Self::RightParen
+ )
}
- /// Whether this is an error.
- pub fn is_error(self) -> bool {
- matches!(self, SyntaxKind::Error)
+ /// Does this node terminate a preceding expression?
+ pub fn is_terminator(self) -> bool {
+ matches!(
+ self,
+ Self::Eof
+ | Self::Semicolon
+ | Self::RightBrace
+ | Self::RightParen
+ | Self::RightBracket
+ )
}
- /// Whether this is a left or right parenthesis.
- pub fn is_paren(self) -> bool {
- matches!(self, Self::LeftParen | Self::RightParen)
+ /// Is this a code or content block.
+ pub fn is_block(self) -> bool {
+ matches!(self, Self::CodeBlock | Self::ContentBlock)
}
/// Does this node need termination through a semicolon or linebreak?
pub fn is_stmt(self) -> bool {
matches!(
self,
- SyntaxKind::LetBinding
- | SyntaxKind::SetRule
- | SyntaxKind::ShowRule
- | SyntaxKind::ModuleImport
- | SyntaxKind::ModuleInclude
+ Self::LetBinding
+ | Self::SetRule
+ | Self::ShowRule
+ | Self::ModuleImport
+ | Self::ModuleInclude
)
}
+ /// Whether this kind of node is automatically skipped by the parser in
+ /// code and math mode.
+ pub fn is_trivia(self) -> bool {
+ matches!(
+ self,
+ Self::Space | Self::Parbreak | Self::LineComment | Self::BlockComment
+ )
+ }
+
+ /// Whether this is an error.
+ pub fn is_error(self) -> bool {
+ self == Self::Error
+ }
+
/// A human-readable name for the kind.
pub fn name(self) -> &'static str {
match self {
- Self::LineComment => "line comment",
- Self::BlockComment => "block comment",
- Self::Space { .. } => "space",
+ Self::Markup => "markup",
+ Self::Text => "text",
+ Self::Space => "space",
+ Self::Linebreak => "line break",
+ Self::Parbreak => "paragraph break",
+ Self::Escape => "escape sequence",
+ Self::Shorthand => "shorthand",
+ Self::Symbol => "symbol notation",
+ Self::Strong => "strong content",
+ Self::Emph => "emphasized content",
+ Self::Raw => "raw block",
+ Self::Link => "link",
+ Self::Label => "label",
+ Self::Ref => "reference",
+ Self::Heading => "heading",
+ Self::HeadingMarker => "heading marker",
+ Self::ListItem => "list item",
+ Self::ListMarker => "list marker",
+ Self::EnumItem => "enum item",
+ Self::EnumMarker => "enum marker",
+ Self::TermItem => "term list item",
+ Self::TermMarker => "term marker",
+ Self::Math => "math formula",
+ Self::Atom => "math atom",
+ Self::Script => "script",
+ Self::Frac => "fraction",
+ Self::AlignPoint => "alignment point",
Self::LeftBrace => "opening brace",
Self::RightBrace => "closing brace",
Self::LeftBracket => "opening bracket",
@@ -309,7 +358,7 @@ impl SyntaxKind {
Self::Slash => "slash",
Self::Hat => "hat",
Self::Dot => "dot",
- Self::Eq => "assignment operator",
+ Self::Eq => "equals sign",
Self::EqEq => "equality operator",
Self::ExclEq => "inequality operator",
Self::Lt => "less-than operator",
@@ -341,28 +390,6 @@ impl SyntaxKind {
Self::Import => "keyword `import`",
Self::Include => "keyword `include`",
Self::As => "keyword `as`",
- Self::Markup { .. } => "markup",
- Self::Text => "text",
- Self::Linebreak => "linebreak",
- Self::Escape => "escape sequence",
- Self::Shorthand => "shorthand",
- Self::Symbol => "symbol notation",
- Self::Strong => "strong content",
- Self::Emph => "emphasized content",
- Self::Raw { .. } => "raw block",
- Self::Link => "link",
- Self::Label => "label",
- Self::Ref => "reference",
- Self::Heading => "heading",
- Self::ListItem => "list item",
- Self::EnumItem => "enumeration item",
- Self::EnumNumbering => "enumeration item numbering",
- Self::TermItem => "term list item",
- Self::Math => "math formula",
- Self::Atom => "math atom",
- Self::Script => "script",
- Self::Frac => "fraction",
- Self::AlignPoint => "alignment point",
Self::Ident => "identifier",
Self::Bool => "boolean",
Self::Int => "integer",
@@ -398,7 +425,10 @@ impl SyntaxKind {
Self::LoopBreak => "`break` expression",
Self::LoopContinue => "`continue` expression",
Self::FuncReturn => "`return` expression",
+ Self::LineComment => "line comment",
+ Self::BlockComment => "block comment",
Self::Error => "syntax error",
+ Self::Eof => "end of file",
}
}
}
diff --git a/src/syntax/lexer.rs b/src/syntax/lexer.rs
index f082bd28..e3c29150 100644
--- a/src/syntax/lexer.rs
+++ b/src/syntax/lexer.rs
@@ -9,12 +9,11 @@ use crate::util::{format_eco, EcoString};
pub(super) struct Lexer<'s> {
/// The underlying scanner.
s: Scanner<'s>,
- /// The mode the lexer is in. This determines what tokens it recognizes.
+ /// The mode the lexer is in. This determines which kinds of tokens it
+ /// produces.
mode: LexMode,
- /// Whether the last token has been terminated.
- terminated: bool,
- /// Offsets the indentation on the first line of the source.
- column_offset: usize,
+ /// Whether the last token contained a newline.
+ newline: bool,
/// An error for the last token.
error: Option<(EcoString, ErrorPos)>,
}
@@ -33,12 +32,11 @@ pub(super) enum LexMode {
impl<'s> Lexer<'s> {
/// Create a new lexer with the given mode and a prefix to offset column
/// calculations.
- pub fn with_prefix(prefix: &str, text: &'s str, mode: LexMode) -> Self {
+ pub fn new(text: &'s str, mode: LexMode) -> Self {
Self {
s: Scanner::new(text),
mode,
- terminated: true,
- column_offset: column(prefix, prefix.len(), 0),
+ newline: false,
error: None,
}
}
@@ -64,26 +62,18 @@ impl<'s> Lexer<'s> {
self.s.jump(index);
}
- /// The underlying scanner.
- pub fn scanner(&self) -> Scanner<'s> {
- self.s
- }
-
- /// Whether the last token was terminated.
- pub fn terminated(&self) -> bool {
- self.terminated
+ /// Whether the last token contained a newline.
+ pub fn newline(&self) -> bool {
+ self.newline
}
- /// The column index of a given index in the source string.
- pub fn column(&self, index: usize) -> usize {
- column(self.s.string(), index, self.column_offset)
- }
-
- /// Take out the last error.
- pub fn last_error(&mut self) -> Option<(EcoString, ErrorPos)> {
+ /// Take out the last error, if any.
+ pub fn take_error(&mut self) -> Option<(EcoString, ErrorPos)> {
self.error.take()
}
+}
+impl Lexer<'_> {
/// Construct a full-positioned syntax error.
fn error(&mut self, message: impl Into<EcoString>) -> SyntaxKind {
self.error = Some((message.into(), ErrorPos::Full));
@@ -97,45 +87,53 @@ impl<'s> Lexer<'s> {
}
}
-impl Iterator for Lexer<'_> {
- type Item = SyntaxKind;
-
- /// Produce the next token.
- fn next(&mut self) -> Option<Self::Item> {
+/// Shared.
+impl Lexer<'_> {
+ pub fn next(&mut self) -> SyntaxKind {
+ self.newline = false;
self.error = None;
let start = self.s.cursor();
- let c = self.s.eat()?;
- Some(match c {
- // Trivia.
- c if c.is_whitespace() => self.whitespace(c),
- '/' if self.s.eat_if('/') => self.line_comment(),
- '/' if self.s.eat_if('*') => self.block_comment(),
- '*' if self.s.eat_if('/') => self.error("unexpected end of block comment"),
-
- // Other things.
- _ => match self.mode {
+ match self.s.eat() {
+ Some(c) if c.is_whitespace() => self.whitespace(start, c),
+ Some('/') if self.s.eat_if('/') => self.line_comment(),
+ Some('/') if self.s.eat_if('*') => self.block_comment(),
+ Some('*') if self.s.eat_if('/') => {
+ self.error("unexpected end of block comment")
+ }
+
+ Some(c) => match self.mode {
LexMode::Markup => self.markup(start, c),
LexMode::Math => self.math(c),
LexMode::Code => self.code(start, c),
},
- })
+
+ None => SyntaxKind::Eof,
+ }
+ }
+
+ fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind {
+ let more = self.s.eat_while(char::is_whitespace);
+ let newlines = match c {
+ ' ' if more.is_empty() => 0,
+ _ => count_newlines(self.s.from(start)),
+ };
+
+ self.newline = newlines > 0;
+ if self.mode == LexMode::Markup && newlines >= 2 {
+ SyntaxKind::Parbreak
+ } else {
+ SyntaxKind::Space
+ }
}
-}
-/// Shared.
-impl Lexer<'_> {
fn line_comment(&mut self) -> SyntaxKind {
self.s.eat_until(is_newline);
- if self.s.done() {
- self.terminated = false;
- }
SyntaxKind::LineComment
}
fn block_comment(&mut self) -> SyntaxKind {
let mut state = '_';
let mut depth = 1;
- self.terminated = false;
// Find the first `*/` that does not correspond to a nested `/*`.
while let Some(c) = self.s.eat() {
@@ -143,7 +141,6 @@ impl Lexer<'_> {
('*', '/') => {
depth -= 1;
if depth == 0 {
- self.terminated = true;
break;
}
'_'
@@ -162,32 +159,6 @@ impl Lexer<'_> {
SyntaxKind::BlockComment
}
-
- fn whitespace(&mut self, c: char) -> SyntaxKind {
- if c == ' ' && !self.s.at(char::is_whitespace) {
- return SyntaxKind::Space { newlines: 0 };
- }
-
- self.s.uneat();
-
- // Count the number of newlines.
- let mut newlines = 0;
- while let Some(c) = self.s.eat() {
- if !c.is_whitespace() {
- self.s.uneat();
- break;
- }
-
- if is_newline(c) {
- if c == '\r' {
- self.s.eat_if('\n');
- }
- newlines += 1;
- }
- }
-
- SyntaxKind::Space { newlines }
- }
}
/// Markup.
@@ -199,9 +170,9 @@ impl Lexer<'_> {
'`' => self.raw(),
'h' if self.s.eat_if("ttp://") => self.link(),
'h' if self.s.eat_if("ttps://") => self.link(),
+ '0'..='9' => self.numbering(start),
'<' if self.s.at(is_id_continue) => self.label(),
'@' if self.s.at(is_id_continue) => self.reference(),
- '0'..='9' => self.numbering(start),
'#' if self.s.eat_if('{') => SyntaxKind::LeftBrace,
'#' if self.s.eat_if('[') => SyntaxKind::LeftBracket,
'#' if self.s.at(is_id_start) => {
@@ -225,63 +196,28 @@ impl Lexer<'_> {
'\'' => SyntaxKind::SmartQuote,
'"' => SyntaxKind::SmartQuote,
'$' => SyntaxKind::Dollar,
- '=' => SyntaxKind::Eq,
- '+' => SyntaxKind::Plus,
- '/' => SyntaxKind::Slash,
'~' => SyntaxKind::Shorthand,
':' => SyntaxKind::Colon,
- '-' => SyntaxKind::Minus,
-
- _ => self.text(),
- }
- }
-
- fn text(&mut self) -> SyntaxKind {
- macro_rules! table {
- ($(|$c:literal)*) => {
- static TABLE: [bool; 128] = {
- let mut t = [false; 128];
- $(t[$c as usize] = true;)*
- t
- };
- };
- }
-
- table! {
- | ' ' | '\t' | '\n' | '\x0b' | '\x0c' | '\r' | '\\' | '/'
- | '[' | ']' | '{' | '}' | '~' | '-' | '.' | '\'' | '"'
- | '*' | '_' | ':' | 'h' | '`' | '$' | '<' | '>' | '@' | '#'
- };
-
- loop {
- self.s.eat_until(|c: char| {
- TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace())
- });
-
- // Continue with the same text node if the thing would become text
- // anyway.
- let mut s = self.s;
- match s.eat() {
- Some(' ') if s.at(char::is_alphanumeric) => {}
- Some('/') if !s.at(['/', '*']) => {}
- Some('-') if !s.at(['-', '?']) => {}
- Some('.') if !s.at("..") => {}
- Some('h') if !s.at("ttp://") && !s.at("ttps://") => {}
- Some('@' | '#') if !s.at(is_id_start) => {}
- _ => break,
+ '=' => {
+ self.s.eat_while('=');
+ if self.space_and_more() {
+ SyntaxKind::HeadingMarker
+ } else {
+ self.text()
+ }
}
+ '-' if self.space_and_more() => SyntaxKind::ListMarker,
+ '+' if self.space_and_more() => SyntaxKind::EnumMarker,
+ '/' if self.space_and_more() => SyntaxKind::TermMarker,
- self.s = s;
+ _ => self.text(),
}
-
- SyntaxKind::Text
}
fn backslash(&mut self) -> SyntaxKind {
if self.s.eat_if("u{") {
let hex = self.s.eat_while(char::is_ascii_alphanumeric);
if !self.s.eat_if('}') {
- self.terminated = false;
return self.error_at_end("expected closing brace");
}
@@ -324,33 +260,14 @@ impl Lexer<'_> {
}
}
- fn link(&mut self) -> SyntaxKind {
- #[rustfmt::skip]
- self.s.eat_while(|c: char| matches!(c,
- | '0' ..= '9'
- | 'a' ..= 'z'
- | 'A' ..= 'Z'
- | '~' | '/' | '%' | '?' | '#' | '&' | '+' | '='
- | '\'' | '.' | ',' | ';'
- ));
-
- if self.s.scout(-1) == Some('.') {
- self.s.uneat();
- }
-
- SyntaxKind::Link
- }
-
fn raw(&mut self) -> SyntaxKind {
- let column = self.column(self.s.cursor() - 1);
-
let mut backticks = 1;
while self.s.eat_if('`') {
backticks += 1;
}
if backticks == 2 {
- return SyntaxKind::Raw { column };
+ return SyntaxKind::Raw;
}
let mut found = 0;
@@ -363,7 +280,6 @@ impl Lexer<'_> {
}
if found != backticks {
- self.terminated = false;
let remaining = backticks - found;
let noun = if remaining == 1 { "backtick" } else { "backticks" };
return self.error_at_end(if found == 0 {
@@ -373,7 +289,24 @@ impl Lexer<'_> {
});
}
- SyntaxKind::Raw { column }
+ SyntaxKind::Raw
+ }
+
+ fn link(&mut self) -> SyntaxKind {
+ #[rustfmt::skip]
+ self.s.eat_while(|c: char| matches!(c,
+ | '0' ..= '9'
+ | 'a' ..= 'z'
+ | 'A' ..= 'Z'
+ | '~' | '/' | '%' | '?' | '#' | '&' | '+' | '='
+ | '\'' | '.' | ',' | ';'
+ ));
+
+ if self.s.scout(-1) == Some('.') {
+ self.s.uneat();
+ }
+
+ SyntaxKind::Link
}
fn numbering(&mut self, start: usize) -> SyntaxKind {
@@ -386,23 +319,86 @@ impl Lexer<'_> {
return self.error("must be positive");
}
- return SyntaxKind::EnumNumbering;
+ return SyntaxKind::EnumMarker;
}
}
self.text()
}
+ fn label(&mut self) -> SyntaxKind {
+ let label = self.s.eat_while(is_id_continue);
+ if label.is_empty() {
+ return self.error("label cannot be empty");
+ }
+
+ if !self.s.eat_if('>') {
+ return self.error_at_end("expected closing angle bracket");
+ }
+
+ SyntaxKind::Label
+ }
+
fn reference(&mut self) -> SyntaxKind {
self.s.eat_while(is_id_continue);
SyntaxKind::Ref
}
+ fn text(&mut self) -> SyntaxKind {
+ macro_rules! table {
+ ($(|$c:literal)*) => {
+ static TABLE: [bool; 128] = {
+ let mut t = [false; 128];
+ $(t[$c as usize] = true;)*
+ t
+ };
+ };
+ }
+
+ table! {
+ | ' ' | '\t' | '\n' | '\x0b' | '\x0c' | '\r' | '\\' | '/'
+ | '[' | ']' | '{' | '}' | '~' | '-' | '.' | '\'' | '"'
+ | '*' | '_' | ':' | 'h' | '`' | '$' | '<' | '>' | '@' | '#'
+ };
+
+ loop {
+ self.s.eat_until(|c: char| {
+ TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace())
+ });
+
+ // Continue with the same text node if the thing would become text
+ // anyway.
+ let mut s = self.s;
+ match s.eat() {
+ Some(' ') if s.at(char::is_alphanumeric) => {}
+ Some('/') if !s.at(['/', '*']) => {}
+ Some('-') if !s.at(['-', '?']) => {}
+ Some('.') if !s.at("..") => {}
+ Some('h') if !s.at("ttp://") && !s.at("ttps://") => {}
+ Some('@' | '#') if !s.at(is_id_start) => {}
+ _ => break,
+ }
+
+ self.s = s;
+ }
+
+ SyntaxKind::Text
+ }
+
fn in_word(&self) -> bool {
- let alphanumeric = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
+ let alphanum = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
let prev = self.s.scout(-2);
let next = self.s.peek();
- alphanumeric(prev) && alphanumeric(next)
+ alphanum(prev) && alphanum(next)
+ }
+
+ fn space_and_more(&self) -> bool {
+ let mut s = self.s;
+ if !s.at(char::is_whitespace) {
+ return false;
+ }
+ s.eat_while(|c: char| c.is_whitespace() && !is_newline(c));
+ !s.done() && !s.at(is_newline)
}
}
@@ -586,26 +582,11 @@ impl Lexer<'_> {
});
if !self.s.eat_if('"') {
- self.terminated = false;
return self.error_at_end("expected quote");
}
SyntaxKind::Str
}
-
- fn label(&mut self) -> SyntaxKind {
- let label = self.s.eat_while(is_id_continue);
- if label.is_empty() {
- return self.error("label cannot be empty");
- }
-
- if !self.s.eat_if('>') {
- self.terminated = false;
- return self.error_at_end("expected closing angle bracket");
- }
-
- SyntaxKind::Label
- }
}
/// Try to parse an identifier into a keyword.
@@ -632,34 +613,6 @@ fn keyword(ident: &str) -> Option<SyntaxKind> {
})
}
-/// The column index of a given index in the source string, given a column
-/// offset for the first line.
-fn column(string: &str, index: usize, offset: usize) -> usize {
- let mut apply_offset = false;
- let res = string[..index]
- .char_indices()
- .rev()
- .take_while(|&(_, c)| !is_newline(c))
- .inspect(|&(i, _)| {
- if i == 0 {
- apply_offset = true
- }
- })
- .count();
-
- // The loop is never executed if the slice is empty, but we are of
- // course still at the start of the first line.
- if index == 0 {
- apply_offset = true;
- }
-
- if apply_offset {
- res + offset
- } else {
- res
- }
-}
-
/// Whether this character denotes a newline.
#[inline]
pub fn is_newline(character: char) -> bool {
@@ -695,6 +648,21 @@ pub(super) fn split_newlines(text: &str) -> Vec<&str> {
lines
}
+/// Count the number of newlines in text.
+fn count_newlines(text: &str) -> usize {
+ let mut newlines = 0;
+ let mut s = Scanner::new(text);
+ while let Some(c) = s.eat() {
+ if is_newline(c) {
+ if c == '\r' {
+ s.eat_if('\n');
+ }
+ newlines += 1;
+ }
+ }
+ newlines
+}
+
/// Whether a string is a valid unicode identifier.
///
/// In addition to what is specified in the [Unicode Standard][uax31], we allow:
diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs
index a2bb5766..ae12e818 100644
--- a/src/syntax/mod.rs
+++ b/src/syntax/mod.rs
@@ -6,7 +6,7 @@ mod kind;
mod lexer;
mod node;
mod parser;
-mod reparse;
+mod reparser;
mod source;
mod span;
@@ -14,5 +14,6 @@ pub use self::kind::*;
pub use self::lexer::*;
pub use self::node::*;
pub use self::parser::*;
+pub use self::reparser::*;
pub use self::source::*;
pub use self::span::*;
diff --git a/src/syntax/node.rs b/src/syntax/node.rs
index 283d55b4..3465f73f 100644
--- a/src/syntax/node.rs
+++ b/src/syntax/node.rs
@@ -12,15 +12,15 @@ use crate::util::EcoString;
#[derive(Clone, PartialEq, Hash)]
pub struct SyntaxNode(Repr);
-/// The two internal representations.
+/// The three internal representations.
#[derive(Clone, PartialEq, Hash)]
enum Repr {
/// A leaf node.
Leaf(LeafNode),
/// A reference-counted inner node.
Inner(Arc<InnerNode>),
- /// An error.
- Error(ErrorNode),
+ /// An error node.
+ Error(Arc<ErrorNode>),
}
impl SyntaxNode {
@@ -36,7 +36,7 @@ impl SyntaxNode {
/// Create a new error node.
pub fn error(message: impl Into<EcoString>, pos: ErrorPos, len: usize) -> Self {
- Self(Repr::Error(ErrorNode::new(message, pos, len)))
+ Self(Repr::Error(Arc::new(ErrorNode::new(message, pos, len))))
}
/// The type of the node.
@@ -134,17 +134,13 @@ impl SyntaxNode {
.collect()
}
}
+}
- /// Change the type of the node.
- pub(super) fn convert_to(&mut self, kind: SyntaxKind) {
- debug_assert!(!kind.is_error());
- match &mut self.0 {
- Repr::Leaf(leaf) => leaf.kind = kind,
- Repr::Inner(inner) => {
- let node = Arc::make_mut(inner);
- node.kind = kind;
- }
- Repr::Error(_) => {}
+impl SyntaxNode {
+ /// Mark this node as erroneous.
+ pub(super) fn make_erroneous(&mut self) {
+ if let Repr::Inner(inner) = &mut self.0 {
+ Arc::make_mut(inner).erroneous = true;
}
}
@@ -159,7 +155,7 @@ impl SyntaxNode {
match &mut self.0 {
Repr::Leaf(leaf) => leaf.span = span,
Repr::Inner(inner) => Arc::make_mut(inner).synthesize(span),
- Repr::Error(error) => error.span = span,
+ Repr::Error(error) => Arc::make_mut(error).span = span,
}
}
@@ -177,7 +173,7 @@ impl SyntaxNode {
match &mut self.0 {
Repr::Leaf(leaf) => leaf.span = mid,
Repr::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within)?,
- Repr::Error(error) => error.span = mid,
+ Repr::Error(error) => Arc::make_mut(error).span = mid,
}
Ok(())
@@ -245,7 +241,7 @@ impl SyntaxNode {
}
/// The upper bound of assigned numbers in this subtree.
- fn upper(&self) -> u64 {
+ pub(super) fn upper(&self) -> u64 {
match &self.0 {
Repr::Inner(inner) => inner.upper,
Repr::Leaf(leaf) => leaf.span.number() + 1,
@@ -297,7 +293,7 @@ impl LeafNode {
impl Debug for LeafNode {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
- write!(f, "{:?}: {}", self.kind, self.len())
+ write!(f, "{:?}: {:?}", self.kind, self.text)
}
}
@@ -588,7 +584,7 @@ impl ErrorNode {
impl Debug for ErrorNode {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
- write!(f, "({}): {}", self.message, self.len)
+ write!(f, "Error: {} ({})", self.len, self.message)
}
}
@@ -888,7 +884,7 @@ mod tests {
let prev = leaf.prev_leaf().unwrap();
let next = leaf.next_leaf().unwrap();
assert_eq!(prev.kind(), SyntaxKind::Eq);
- assert_eq!(leaf.kind(), SyntaxKind::Space { newlines: 0 });
+ assert_eq!(leaf.kind(), SyntaxKind::Space);
assert_eq!(next.kind(), SyntaxKind::Int);
}
}
diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs
index 1584e59b..0e1b52b1 100644
--- a/src/syntax/parser.rs
+++ b/src/syntax/parser.rs
@@ -1,262 +1,93 @@
use std::collections::HashSet;
-use std::fmt::{self, Display, Formatter};
-use std::mem;
+use std::ops::Range;
-use super::ast::{self, Assoc, BinOp, UnOp};
-use super::{ErrorPos, LexMode, Lexer, SyntaxKind, SyntaxNode};
+use super::{ast, is_newline, ErrorPos, LexMode, Lexer, SyntaxKind, SyntaxNode};
use crate::util::{format_eco, EcoString};
/// Parse a source file.
pub fn parse(text: &str) -> SyntaxNode {
- let mut p = Parser::new(text, LexMode::Markup);
- markup(&mut p, true);
+ let mut p = Parser::new(text, 0, LexMode::Markup);
+ markup(&mut p, true, 0, |_| false);
p.finish().into_iter().next().unwrap()
}
-/// Parse code directly, only used for syntax highlighting.
+/// Parse code directly.
+///
+/// This is only used for syntax highlighting.
pub fn parse_code(text: &str) -> SyntaxNode {
- let mut p = Parser::new(text, LexMode::Code);
- p.perform(SyntaxKind::CodeBlock, code);
+ let mut p = Parser::new(text, 0, LexMode::Code);
+ let m = p.marker();
+ code(&mut p, |_| false);
+ p.wrap(m, SyntaxKind::CodeBlock);
p.finish().into_iter().next().unwrap()
}
-/// Reparse a code block.
-///
-/// Returns `Some` if all of the input was consumed.
-pub(super) fn reparse_code_block(
- prefix: &str,
- text: &str,
- end_pos: usize,
-) -> Option<(Vec<SyntaxNode>, bool, usize)> {
- let mut p = Parser::with_prefix(prefix, text, LexMode::Code);
- if !p.at(SyntaxKind::LeftBrace) {
- return None;
- }
-
- code_block(&mut p);
-
- let (mut node, terminated) = p.consume()?;
- let first = node.remove(0);
- if first.len() != end_pos {
- return None;
- }
-
- Some((vec![first], terminated, 1))
-}
-
-/// Reparse a content block.
-///
-/// Returns `Some` if all of the input was consumed.
-pub(super) fn reparse_content_block(
- prefix: &str,
- text: &str,
- end_pos: usize,
-) -> Option<(Vec<SyntaxNode>, bool, usize)> {
- let mut p = Parser::with_prefix(prefix, text, LexMode::Code);
- if !p.at(SyntaxKind::LeftBracket) {
- return None;
- }
-
- content_block(&mut p);
-
- let (mut node, terminated) = p.consume()?;
- let first = node.remove(0);
- if first.len() != end_pos {
- return None;
- }
-
- Some((vec![first], terminated, 1))
-}
-
-/// Reparse a sequence markup elements without the topmost node.
-///
-/// Returns `Some` if all of the input was consumed.
-pub(super) fn reparse_markup_elements(
- prefix: &str,
- text: &str,
- end_pos: usize,
- differential: isize,
- reference: &[SyntaxNode],
+fn markup(
+ p: &mut Parser,
mut at_start: bool,
min_indent: usize,
-) -> Option<(Vec<SyntaxNode>, bool, usize)> {
- let mut p = Parser::with_prefix(prefix, text, LexMode::Markup);
-
- let mut node: Option<&SyntaxNode> = None;
- let mut iter = reference.iter();
- let mut offset = differential;
- let mut replaced = 0;
- let mut stopped = false;
-
- 'outer: while !p.eof() {
- if let Some(SyntaxKind::Space { newlines: (1..) }) = p.peek() {
- if p.column(p.current_end()) < min_indent {
- return None;
+ mut stop: impl FnMut(SyntaxKind) -> bool,
+) {
+ let m = p.marker();
+ while !p.eof() && !stop(p.current) {
+ if p.newline() {
+ at_start = true;
+ if min_indent > 0 && p.column(p.current_end()) < min_indent {
+ break;
}
- }
-
- markup_node(&mut p, &mut at_start);
-
- if p.prev_end() <= end_pos {
+ p.eat();
continue;
}
- let recent = p.marker().before(&p).unwrap();
- let recent_start = p.prev_end() - recent.len();
-
- while offset <= recent_start as isize {
- if let Some(node) = node {
- // The nodes are equal, at the same position and have the
- // same content. The parsing trees have converged again, so
- // the reparse may stop here.
- if offset == recent_start as isize && node == recent {
- replaced -= 1;
- stopped = true;
- break 'outer;
- }
- }
-
- if let Some(node) = node {
- offset += node.len() as isize;
- }
-
- node = iter.next();
- if node.is_none() {
- break;
- }
-
- replaced += 1;
+ let prev = p.prev_end();
+ markup_expr(p, &mut at_start);
+ if !p.progress(prev) {
+ p.unexpected();
}
}
-
- if p.eof() && !stopped {
- replaced = reference.len();
- }
-
- let (mut res, terminated) = p.consume()?;
- if stopped {
- res.pop().unwrap();
- }
-
- Some((res, terminated, replaced))
+ p.wrap(m, SyntaxKind::Markup);
}
-/// Parse markup.
-///
-/// If `at_start` is true, things like headings that may only appear at the
-/// beginning of a line or content block are initially allowed.
-fn markup(p: &mut Parser, mut at_start: bool) {
- p.perform(SyntaxKind::Markup { min_indent: 0 }, |p| {
- while !p.eof() {
- markup_node(p, &mut at_start);
+pub(super) fn reparse_markup(
+ text: &str,
+ range: Range<usize>,
+ at_start: &mut bool,
+ mut stop: impl FnMut(SyntaxKind) -> bool,
+) -> Option<Vec<SyntaxNode>> {
+ let mut p = Parser::new(&text, range.start, LexMode::Markup);
+ while !p.eof() && !stop(p.current) && p.current_start() < range.end {
+ if p.newline() {
+ *at_start = true;
+ p.eat();
+ continue;
}
- });
-}
-
-/// Parse markup that stays right of the given `column`.
-fn markup_indented(p: &mut Parser, min_indent: usize) {
- p.eat_while(|t| match t {
- SyntaxKind::Space { newlines } => newlines == 0,
- SyntaxKind::LineComment | SyntaxKind::BlockComment => true,
- _ => false,
- });
-
- let marker = p.marker();
- let mut at_start = false;
- while !p.eof() {
- match p.peek() {
- Some(SyntaxKind::Space { newlines: (1..) })
- if p.column(p.current_end()) < min_indent =>
- {
- break;
- }
- _ => {}
+ let prev = p.prev_end();
+ markup_expr(&mut p, at_start);
+ if !p.progress(prev) {
+ p.unexpected();
}
-
- markup_node(p, &mut at_start);
}
-
- marker.end(p, SyntaxKind::Markup { min_indent });
+ (p.balanced && p.current_start() == range.end).then(|| p.finish())
}
-/// Parse a line of markup that can prematurely end if `f` returns true.
-fn markup_line<F>(p: &mut Parser, mut f: F)
-where
- F: FnMut(SyntaxKind) -> bool,
-{
- p.eat_while(|t| match t {
- SyntaxKind::Space { newlines } => newlines == 0,
- SyntaxKind::LineComment | SyntaxKind::BlockComment => true,
- _ => false,
- });
-
- p.perform(SyntaxKind::Markup { min_indent: usize::MAX }, |p| {
- let mut at_start = false;
- while let Some(kind) = p.peek() {
- if let SyntaxKind::Space { newlines: (1..) } = kind {
- break;
- }
-
- if f(kind) {
- break;
- }
-
- markup_node(p, &mut at_start);
- }
- });
-}
-
-fn markup_node(p: &mut Parser, at_start: &mut bool) {
- let Some(token) = p.peek() else { return };
- match token {
- // Whitespace.
- SyntaxKind::Space { newlines } => {
- *at_start |= newlines > 0;
- p.eat();
- return;
- }
-
- // Comments.
- SyntaxKind::LineComment | SyntaxKind::BlockComment => {
- p.eat();
- return;
- }
-
- // Text and markup.
- SyntaxKind::Text
- | SyntaxKind::Linebreak
- | SyntaxKind::SmartQuote { .. }
- | SyntaxKind::Escape
- | SyntaxKind::Shorthand
- | SyntaxKind::Symbol
- | SyntaxKind::Link
- | SyntaxKind::Raw { .. }
- | SyntaxKind::Ref => p.eat(),
-
- // Math.
- SyntaxKind::Dollar => math(p),
-
- // Strong, emph, heading.
+fn markup_expr(p: &mut Parser, at_start: &mut bool) {
+ match p.current() {
SyntaxKind::Star => strong(p),
SyntaxKind::Underscore => emph(p),
- SyntaxKind::Eq => heading(p, *at_start),
-
- // Lists.
- SyntaxKind::Minus => list_item(p, *at_start),
- SyntaxKind::Plus | SyntaxKind::EnumNumbering => enum_item(p, *at_start),
- SyntaxKind::Slash => {
- term_item(p, *at_start).ok();
- }
- SyntaxKind::Colon => {
- let marker = p.marker();
- p.eat();
- marker.convert(p, SyntaxKind::Text);
- }
+ SyntaxKind::HeadingMarker if *at_start => heading(p),
+ SyntaxKind::ListMarker if *at_start => list_item(p),
+ SyntaxKind::EnumMarker if *at_start => enum_item(p),
+ SyntaxKind::TermMarker if *at_start => term_item(p),
+ SyntaxKind::Dollar => equation(p),
+
+ SyntaxKind::HeadingMarker
+ | SyntaxKind::ListMarker
+ | SyntaxKind::EnumMarker
+ | SyntaxKind::TermMarker
+ | SyntaxKind::Colon => p.convert(SyntaxKind::Text),
- // Hashtag + keyword / identifier.
SyntaxKind::Ident
- | SyntaxKind::Label
| SyntaxKind::Let
| SyntaxKind::Set
| SyntaxKind::Show
@@ -267,147 +98,175 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
| SyntaxKind::Include
| SyntaxKind::Break
| SyntaxKind::Continue
- | SyntaxKind::Return => embedded_expr(p),
+ | SyntaxKind::Return
+ | SyntaxKind::LeftBrace
+ | SyntaxKind::LeftBracket => embedded_code_expr(p),
- // Code and content block.
- SyntaxKind::LeftBrace => code_block(p),
- SyntaxKind::LeftBracket => content_block(p),
+ SyntaxKind::Text
+ | SyntaxKind::Linebreak
+ | SyntaxKind::Escape
+ | SyntaxKind::Shorthand
+ | SyntaxKind::Symbol
+ | SyntaxKind::SmartQuote
+ | SyntaxKind::Raw
+ | SyntaxKind::Link
+ | SyntaxKind::Label
+ | SyntaxKind::Ref => p.eat(),
- SyntaxKind::Error => p.eat(),
- _ => p.unexpected(),
- };
+ SyntaxKind::Space
+ | SyntaxKind::Parbreak
+ | SyntaxKind::LineComment
+ | SyntaxKind::BlockComment => {
+ p.eat();
+ return;
+ }
+ _ => {}
+ }
*at_start = false;
}
fn strong(p: &mut Parser) {
- p.perform(SyntaxKind::Strong, |p| {
- p.start_group(Group::Strong);
- markup(p, false);
- p.end_group();
- })
+ let m = p.marker();
+ p.expect(SyntaxKind::Star);
+ markup(p, false, 0, |kind| {
+ kind == SyntaxKind::Star
+ || kind == SyntaxKind::Parbreak
+ || kind == SyntaxKind::RightBracket
+ });
+ p.expect(SyntaxKind::Star);
+ p.wrap(m, SyntaxKind::Strong);
}
fn emph(p: &mut Parser) {
- p.perform(SyntaxKind::Emph, |p| {
- p.start_group(Group::Emph);
- markup(p, false);
- p.end_group();
- })
+ let m = p.marker();
+ p.expect(SyntaxKind::Underscore);
+ markup(p, false, 0, |kind| {
+ kind == SyntaxKind::Underscore
+ || kind == SyntaxKind::Parbreak
+ || kind == SyntaxKind::RightBracket
+ });
+ p.expect(SyntaxKind::Underscore);
+ p.wrap(m, SyntaxKind::Emph);
}
-fn heading(p: &mut Parser, at_start: bool) {
- let marker = p.marker();
- let mut markers = vec![];
- while p.at(SyntaxKind::Eq) {
- markers.push(p.marker());
- p.eat();
- }
-
- if at_start && p.peek().map_or(true, |kind| kind.is_space()) {
- p.eat_while(|kind| kind == SyntaxKind::Space { newlines: 0 });
- markup_line(p, |kind| matches!(kind, SyntaxKind::Label));
- marker.end(p, SyntaxKind::Heading);
- } else {
- for marker in markers {
- marker.convert(p, SyntaxKind::Text);
- }
- }
+fn heading(p: &mut Parser) {
+ let m = p.marker();
+ p.expect(SyntaxKind::HeadingMarker);
+ whitespace(p);
+ markup(p, false, usize::MAX, |kind| {
+ kind == SyntaxKind::Label || kind == SyntaxKind::RightBracket
+ });
+ p.wrap(m, SyntaxKind::Heading);
}
-fn list_item(p: &mut Parser, at_start: bool) {
- let marker = p.marker();
- p.assert(SyntaxKind::Minus);
-
+fn list_item(p: &mut Parser) {
+ let m = p.marker();
+ p.expect(SyntaxKind::ListMarker);
let min_indent = p.column(p.prev_end());
- if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() {
- markup_indented(p, min_indent);
- marker.end(p, SyntaxKind::ListItem);
- } else {
- marker.convert(p, SyntaxKind::Text);
- }
+ whitespace(p);
+ markup(p, false, min_indent, |kind| kind == SyntaxKind::RightBracket);
+ p.wrap(m, SyntaxKind::ListItem);
}
-fn enum_item(p: &mut Parser, at_start: bool) {
- let marker = p.marker();
- p.eat();
-
+fn enum_item(p: &mut Parser) {
+ let m = p.marker();
+ p.expect(SyntaxKind::EnumMarker);
let min_indent = p.column(p.prev_end());
- if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() {
- markup_indented(p, min_indent);
- marker.end(p, SyntaxKind::EnumItem);
- } else {
- marker.convert(p, SyntaxKind::Text);
- }
+ whitespace(p);
+ markup(p, false, min_indent, |kind| kind == SyntaxKind::RightBracket);
+ p.wrap(m, SyntaxKind::EnumItem);
}
-fn term_item(p: &mut Parser, at_start: bool) -> ParseResult {
- let marker = p.marker();
- p.eat();
-
+fn term_item(p: &mut Parser) {
+ let m = p.marker();
+ p.expect(SyntaxKind::TermMarker);
let min_indent = p.column(p.prev_end());
- if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() {
- markup_line(p, |node| matches!(node, SyntaxKind::Colon));
- p.expect(SyntaxKind::Colon)?;
- markup_indented(p, min_indent);
- marker.end(p, SyntaxKind::TermItem);
- } else {
- marker.convert(p, SyntaxKind::Text);
- }
-
- Ok(())
+ whitespace(p);
+ markup(p, false, usize::MAX, |kind| {
+ kind == SyntaxKind::Colon || kind == SyntaxKind::RightBracket
+ });
+ p.expect(SyntaxKind::Colon);
+ whitespace(p);
+ markup(p, false, min_indent, |kind| kind == SyntaxKind::RightBracket);
+ p.wrap(m, SyntaxKind::TermItem);
}
-fn embedded_expr(p: &mut Parser) {
- // Does the expression need termination or can content follow directly?
- let stmt = matches!(
- p.peek(),
- Some(
- SyntaxKind::Let
- | SyntaxKind::Set
- | SyntaxKind::Show
- | SyntaxKind::Import
- | SyntaxKind::Include
- )
- );
-
- p.start_group(Group::Expr);
- let res = expr_prec(p, true, 0);
- if stmt && res.is_ok() && !p.eof() {
- p.expected("semicolon or line break");
+fn whitespace(p: &mut Parser) {
+ while p.current().is_trivia() {
+ p.eat();
}
- p.end_group();
}
-fn math(p: &mut Parser) {
- p.perform(SyntaxKind::Math, |p| {
- p.start_group(Group::Math);
- while !p.eof() {
- math_node(p);
- }
- p.end_group();
- });
+fn equation(p: &mut Parser) {
+ let m = p.marker();
+ p.enter(LexMode::Math);
+ p.expect(SyntaxKind::Dollar);
+ math(p, |kind| kind == SyntaxKind::Dollar);
+ p.expect(SyntaxKind::Dollar);
+ p.exit();
+ p.wrap(m, SyntaxKind::Math);
}
-fn math_node(p: &mut Parser) {
- math_node_prec(p, 0, None)
+fn math(p: &mut Parser, mut stop: impl FnMut(SyntaxKind) -> bool) {
+ while !p.eof() && !stop(p.current()) {
+ let prev = p.prev_end();
+ math_expr(p);
+ if !p.progress(prev) {
+ p.unexpected();
+ }
+ }
}
-fn math_node_prec(p: &mut Parser, min_prec: usize, stop: Option<SyntaxKind>) {
- let marker = p.marker();
- math_primary(p);
+fn math_expr(p: &mut Parser) {
+ math_expr_prec(p, 0, SyntaxKind::Eof)
+}
- loop {
- let (kind, mut prec, assoc, stop) = match p.peek() {
- v if v == stop => break,
- Some(SyntaxKind::Underscore) => {
- (SyntaxKind::Script, 2, Assoc::Right, Some(SyntaxKind::Hat))
- }
- Some(SyntaxKind::Hat) => {
- (SyntaxKind::Script, 2, Assoc::Right, Some(SyntaxKind::Underscore))
+fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) {
+ let m = p.marker();
+ match p.current() {
+ SyntaxKind::Ident => {
+ p.eat();
+ if p.directly_at(SyntaxKind::Atom) && p.current_text() == "(" {
+ math_args(p);
+ p.wrap(m, SyntaxKind::FuncCall);
}
- Some(SyntaxKind::Slash) => (SyntaxKind::Frac, 1, Assoc::Left, None),
- _ => break,
+ }
+
+ SyntaxKind::Atom => match p.current_text() {
+ "(" => math_delimited(p, ")"),
+ "{" => math_delimited(p, "}"),
+ "[" => math_delimited(p, "]"),
+ _ => p.eat(),
+ },
+
+ SyntaxKind::Let
+ | SyntaxKind::Set
+ | SyntaxKind::Show
+ | SyntaxKind::If
+ | SyntaxKind::While
+ | SyntaxKind::For
+ | SyntaxKind::Import
+ | SyntaxKind::Include
+ | SyntaxKind::Break
+ | SyntaxKind::Continue
+ | SyntaxKind::Return
+ | SyntaxKind::LeftBrace
+ | SyntaxKind::LeftBracket => embedded_code_expr(p),
+
+ SyntaxKind::Linebreak
+ | SyntaxKind::Escape
+ | SyntaxKind::Shorthand
+ | SyntaxKind::Symbol
+ | SyntaxKind::AlignPoint
+ | SyntaxKind::Str => p.eat(),
+
+ _ => return,
+ }
+
+ while !p.eof() && !p.at(stop) {
+ let Some((kind, stop, assoc, mut prec)) = math_op(p.current()) else {
+ break;
};
if prec < min_prec {
@@ -415,115 +274,140 @@ fn math_node_prec(p: &mut Parser, min_prec: usize, stop: Option<SyntaxKind>) {
}
match assoc {
- Assoc::Left => prec += 1,
- Assoc::Right => {}
+ ast::Assoc::Left => prec += 1,
+ ast::Assoc::Right => {}
}
p.eat();
- math_node_prec(p, prec, stop);
-
- // Allow up to two different scripts. We do not risk encountering the
- // previous script kind again here due to right-associativity.
+ math_expr_prec(p, prec, stop);
if p.eat_if(SyntaxKind::Underscore) || p.eat_if(SyntaxKind::Hat) {
- math_node_prec(p, prec, None);
+ math_expr_prec(p, prec, SyntaxKind::Eof);
}
- marker.end(p, kind);
+ p.wrap(m, kind);
}
}
-/// Parse a primary math node.
-fn math_primary(p: &mut Parser) {
- let Some(token) = p.peek() else { return };
- match token {
- // Spaces and expressions.
- SyntaxKind::Space { .. }
- | SyntaxKind::Linebreak
- | SyntaxKind::Escape
- | SyntaxKind::Str
- | SyntaxKind::Shorthand
- | SyntaxKind::AlignPoint
- | SyntaxKind::Symbol => p.eat(),
-
- // Atoms.
- SyntaxKind::Atom => match p.peek_src() {
- "(" => math_group(p, Group::MathRow('(', ')')),
- "{" => math_group(p, Group::MathRow('{', '}')),
- "[" => math_group(p, Group::MathRow('[', ']')),
- _ => p.eat(),
- },
+fn math_delimited(p: &mut Parser, closing: &str) {
+ let m = p.marker();
+ p.expect(SyntaxKind::Atom);
+ while !p.eof()
+ && !p.at(SyntaxKind::Dollar)
+ && (!p.at(SyntaxKind::Atom) || p.current_text() != closing)
+ {
+ let prev = p.prev_end();
+ math_expr(p);
+ if !p.progress(prev) {
+ p.unexpected();
+ }
+ }
+ p.expect(SyntaxKind::Atom);
+ p.wrap(m, SyntaxKind::Math);
+}
- // Identifiers and math calls.
- SyntaxKind::Ident => {
- let marker = p.marker();
- p.eat();
+fn math_op(kind: SyntaxKind) -> Option<(SyntaxKind, SyntaxKind, ast::Assoc, usize)> {
+ match kind {
+ SyntaxKind::Underscore => {
+ Some((SyntaxKind::Script, SyntaxKind::Hat, ast::Assoc::Right, 2))
+ }
+ SyntaxKind::Hat => {
+ Some((SyntaxKind::Script, SyntaxKind::Underscore, ast::Assoc::Right, 2))
+ }
+ SyntaxKind::Slash => {
+ Some((SyntaxKind::Frac, SyntaxKind::Eof, ast::Assoc::Left, 1))
+ }
+ _ => None,
+ }
+}
- // Parenthesis or bracket means this is a function call.
- if matches!(p.peek_direct(), Some(SyntaxKind::Atom) if p.peek_src() == "(") {
- marker.perform(p, SyntaxKind::FuncCall, math_args);
+fn math_args(p: &mut Parser) {
+ p.expect(SyntaxKind::Atom);
+ let m = p.marker();
+ let mut m2 = p.marker();
+ while !p.eof() {
+ match p.current_text() {
+ ")" => break,
+ "," => {
+ p.wrap(m2, SyntaxKind::Math);
+ p.convert(SyntaxKind::Comma);
+ m2 = p.marker();
+ continue;
}
+ _ => {}
}
- // Hashtag + keyword / identifier.
- SyntaxKind::Let
- | SyntaxKind::Set
- | SyntaxKind::Show
- | SyntaxKind::If
- | SyntaxKind::While
- | SyntaxKind::For
- | SyntaxKind::Import
- | SyntaxKind::Include
- | SyntaxKind::Break
- | SyntaxKind::Continue
- | SyntaxKind::Return => embedded_expr(p),
-
- // Code and content block.
- SyntaxKind::LeftBrace => code_block(p),
- SyntaxKind::LeftBracket => content_block(p),
-
- _ => p.unexpected(),
+ let prev = p.prev_end();
+ math_expr(p);
+ if !p.progress(prev) {
+ p.unexpected();
+ }
+ }
+ if m2 != p.marker() {
+ p.wrap(m2, SyntaxKind::Math);
}
+ p.wrap(m, SyntaxKind::Args);
+ p.expect(SyntaxKind::Atom);
}
-fn math_group(p: &mut Parser, group: Group) {
- p.perform(SyntaxKind::Math, |p| {
- p.start_group(group);
- while !p.eof() {
- math_node(p);
+fn code(p: &mut Parser, mut stop: impl FnMut(SyntaxKind) -> bool) {
+ while !p.eof() && !stop(p.current()) {
+ p.stop_at_newline(true);
+ let prev = p.prev_end();
+ code_expr(p);
+ if p.progress(prev)
+ && !p.eof()
+ && !stop(p.current())
+ && !p.eat_if(SyntaxKind::Semicolon)
+ {
+ p.expected("semicolon or line break");
}
- p.end_group();
- })
+ p.unstop();
+ if !p.progress(prev) && !p.eof() {
+ p.unexpected();
+ }
+ }
}
-fn expr(p: &mut Parser) -> ParseResult {
- expr_prec(p, false, 0)
+fn code_expr(p: &mut Parser) {
+ code_expr_prec(p, false, 0)
}
-/// Parse an expression with operators having at least the minimum precedence.
-///
-/// If `atomic` is true, this does not parse binary operations and arrow
-/// functions, which is exactly what we want in a shorthand expression directly
-/// in markup.
-///
-/// Stops parsing at operations with lower precedence than `min_prec`,
-fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
- let marker = p.marker();
+fn embedded_code_expr(p: &mut Parser) {
+ let stmt = matches!(
+ p.current(),
+ SyntaxKind::Let
+ | SyntaxKind::Set
+ | SyntaxKind::Show
+ | SyntaxKind::Import
+ | SyntaxKind::Include
+ );
- // Start the unary expression.
- match p.peek().and_then(UnOp::from_token) {
- Some(op) if !atomic => {
- p.eat();
- let prec = op.precedence();
- expr_prec(p, atomic, prec)?;
- marker.end(p, SyntaxKind::Unary);
- }
- _ => primary(p, atomic)?,
- };
+ p.stop_at_newline(true);
+ p.enter(LexMode::Code);
+ code_expr_prec(p, true, 0);
+ let semi = p.eat_if(SyntaxKind::Semicolon);
+ if stmt && !semi && !p.eof() && !p.at(SyntaxKind::RightBracket) {
+ p.expected("semicolon or line break");
+ }
+ p.exit();
+ p.unstop();
+}
+
+fn code_expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) {
+ let m = p.marker();
+ if let Some(op) = ast::UnOp::from_kind(p.current()) {
+ p.eat();
+ code_expr_prec(p, atomic, op.precedence());
+ p.wrap(m, SyntaxKind::Unary);
+ } else {
+ code_primary(p, atomic);
+ }
loop {
- // Parenthesis or bracket means this is a function call.
- if let Some(SyntaxKind::LeftParen | SyntaxKind::LeftBracket) = p.peek_direct() {
- marker.perform(p, SyntaxKind::FuncCall, args)?;
+ if p.directly_at(SyntaxKind::LeftParen) || p.directly_at(SyntaxKind::LeftBracket)
+ {
+ args(p);
+ p.wrap(m, SyntaxKind::FuncCall);
continue;
}
@@ -531,711 +415,571 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
break;
}
- // Method call or field access.
if p.eat_if(SyntaxKind::Dot) {
- ident(p)?;
- if let Some(SyntaxKind::LeftParen | SyntaxKind::LeftBracket) = p.peek_direct()
+ p.expect(SyntaxKind::Ident);
+ if p.directly_at(SyntaxKind::LeftParen)
+ || p.directly_at(SyntaxKind::LeftBracket)
{
- marker.perform(p, SyntaxKind::MethodCall, args)?;
+ args(p);
+ p.wrap(m, SyntaxKind::MethodCall);
} else {
- marker.end(p, SyntaxKind::FieldAccess);
+ p.wrap(m, SyntaxKind::FieldAccess)
}
continue;
}
- let op = if p.eat_if(SyntaxKind::Not) {
+ let binop = if p.eat_if(SyntaxKind::Not) {
if p.at(SyntaxKind::In) {
- BinOp::NotIn
+ Some(ast::BinOp::NotIn)
} else {
p.expected("keyword `in`");
- return Err(ParseError);
+ break;
}
} else {
- match p.peek().and_then(BinOp::from_token) {
- Some(binop) => binop,
- None => break,
- }
+ ast::BinOp::from_kind(p.current())
};
- let mut prec = op.precedence();
- if prec < min_prec {
- break;
- }
+ if let Some(op) = binop {
+ let mut prec = op.precedence();
+ if prec < min_prec {
+ break;
+ }
- p.eat();
+ match op.assoc() {
+ ast::Assoc::Left => prec += 1,
+ ast::Assoc::Right => {}
+ }
- match op.assoc() {
- Assoc::Left => prec += 1,
- Assoc::Right => {}
+ p.eat();
+ code_expr_prec(p, false, prec);
+ p.wrap(m, SyntaxKind::Binary);
+ continue;
}
- marker.perform(p, SyntaxKind::Binary, |p| expr_prec(p, atomic, prec))?;
+ break;
}
-
- Ok(())
}
-fn primary(p: &mut Parser, atomic: bool) -> ParseResult {
- match p.peek() {
- // Literals and few other things.
- Some(
- SyntaxKind::None
- | SyntaxKind::Auto
- | SyntaxKind::Int
- | SyntaxKind::Float
- | SyntaxKind::Bool
- | SyntaxKind::Numeric
- | SyntaxKind::Str
- | SyntaxKind::Label
- | SyntaxKind::Raw { .. },
- ) => {
- p.eat();
- Ok(())
- }
-
- // Things that start with an identifier.
- Some(SyntaxKind::Ident) => {
- let marker = p.marker();
+fn code_primary(p: &mut Parser, atomic: bool) {
+ let m = p.marker();
+ match p.current() {
+ SyntaxKind::Ident => {
p.eat();
-
- // Arrow means this is a closure's lone parameter.
if !atomic && p.at(SyntaxKind::Arrow) {
- marker.end(p, SyntaxKind::Params);
- p.assert(SyntaxKind::Arrow);
- marker.perform(p, SyntaxKind::Closure, expr)
- } else {
- Ok(())
+ p.wrap(m, SyntaxKind::Params);
+ p.expect(SyntaxKind::Arrow);
+ code_expr(p);
+ p.wrap(m, SyntaxKind::Closure);
}
}
- // Structures.
- Some(SyntaxKind::LeftParen) => parenthesized(p, atomic),
- Some(SyntaxKind::LeftBrace) => Ok(code_block(p)),
- Some(SyntaxKind::LeftBracket) => Ok(content_block(p)),
- Some(SyntaxKind::Dollar) => Ok(math(p)),
-
- // Keywords.
- Some(SyntaxKind::Let) => let_binding(p),
- Some(SyntaxKind::Set) => set_rule(p),
- Some(SyntaxKind::Show) => show_rule(p),
- Some(SyntaxKind::If) => conditional(p),
- Some(SyntaxKind::While) => while_loop(p),
- Some(SyntaxKind::For) => for_loop(p),
- Some(SyntaxKind::Import) => module_import(p),
- Some(SyntaxKind::Include) => module_include(p),
- Some(SyntaxKind::Break) => break_stmt(p),
- Some(SyntaxKind::Continue) => continue_stmt(p),
- Some(SyntaxKind::Return) => return_stmt(p),
-
- Some(SyntaxKind::Error) => {
- p.eat();
- Err(ParseError)
- }
+ SyntaxKind::LeftBrace => code_block(p),
+ SyntaxKind::LeftBracket => content_block(p),
+ SyntaxKind::LeftParen => with_paren(p),
+ SyntaxKind::Dollar => equation(p),
+ SyntaxKind::Let => let_binding(p),
+ SyntaxKind::Set => set_rule(p),
+ SyntaxKind::Show => show_rule(p),
+ SyntaxKind::If => conditional(p),
+ SyntaxKind::While => while_loop(p),
+ SyntaxKind::For => for_loop(p),
+ SyntaxKind::Import => module_import(p),
+ SyntaxKind::Include => module_include(p),
+ SyntaxKind::Break => break_stmt(p),
+ SyntaxKind::Continue => continue_stmt(p),
+ SyntaxKind::Return => return_stmt(p),
+
+ SyntaxKind::None
+ | SyntaxKind::Auto
+ | SyntaxKind::Int
+ | SyntaxKind::Float
+ | SyntaxKind::Bool
+ | SyntaxKind::Numeric
+ | SyntaxKind::Str
+ | SyntaxKind::Label
+ | SyntaxKind::Raw => p.eat(),
- // Nothing.
- _ => {
- p.expected_found("expression");
- Err(ParseError)
- }
+ _ => p.expected("expression"),
}
}
-fn ident(p: &mut Parser) -> ParseResult {
- match p.peek() {
- Some(SyntaxKind::Ident) => {
- p.eat();
- Ok(())
- }
- _ => {
- p.expected_found("identifier");
- Err(ParseError)
- }
+fn block(p: &mut Parser) {
+ match p.current() {
+ SyntaxKind::LeftBracket => content_block(p),
+ SyntaxKind::LeftBrace => code_block(p),
+ _ => p.expected("block"),
}
}
-/// Parse something that starts with a parenthesis, which can be either of:
-/// - Array literal
-/// - Dictionary literal
-/// - Parenthesized expression
-/// - Parameter list of closure expression
-fn parenthesized(p: &mut Parser, atomic: bool) -> ParseResult {
- let marker = p.marker();
+pub(super) fn reparse_block(text: &str, range: Range<usize>) -> Option<SyntaxNode> {
+ let mut p = Parser::new(&text, range.start, LexMode::Code);
+ assert!(p.at(SyntaxKind::LeftBracket) || p.at(SyntaxKind::LeftBrace));
+ block(&mut p);
+ (p.balanced && p.prev_end() == range.end)
+ .then(|| p.finish().into_iter().next().unwrap())
+}
- p.start_group(Group::Paren);
- let colon = p.eat_if(SyntaxKind::Colon);
- let kind = collection(p, true).0;
- p.end_group();
+fn code_block(p: &mut Parser) {
+ let m = p.marker();
+ p.enter(LexMode::Code);
+ p.stop_at_newline(false);
+ p.expect(SyntaxKind::LeftBrace);
+ code(p, |kind| kind == SyntaxKind::RightBrace);
+ p.expect(SyntaxKind::RightBrace);
+ p.exit();
+ p.unstop();
+ p.wrap(m, SyntaxKind::CodeBlock);
+}
- // Leading colon makes this a dictionary.
- if colon {
- dict(p, marker);
- return Ok(());
- }
+fn content_block(p: &mut Parser) {
+ let m = p.marker();
+ p.enter(LexMode::Markup);
+ p.expect(SyntaxKind::LeftBracket);
+ markup(p, true, 0, |kind| kind == SyntaxKind::RightBracket);
+ p.expect(SyntaxKind::RightBracket);
+ p.exit();
+ p.wrap(m, SyntaxKind::ContentBlock);
+}
- // Arrow means this is a closure's parameter list.
- if !atomic && p.at(SyntaxKind::Arrow) {
- params(p, marker);
- p.assert(SyntaxKind::Arrow);
- return marker.perform(p, SyntaxKind::Closure, expr);
+fn with_paren(p: &mut Parser) {
+ let m = p.marker();
+ let mut kind = collection(p, true);
+ if p.at(SyntaxKind::Arrow) {
+ validate_params(p, m);
+ p.wrap(m, SyntaxKind::Params);
+ p.expect(SyntaxKind::Arrow);
+ code_expr(p);
+ kind = SyntaxKind::Closure;
}
-
- // Transform into the identified collection.
match kind {
- CollectionKind::Group => marker.end(p, SyntaxKind::Parenthesized),
- CollectionKind::Positional => array(p, marker),
- CollectionKind::Named => dict(p, marker),
+ SyntaxKind::Array => validate_array(p, m),
+ SyntaxKind::Dict => validate_dict(p, m),
+ _ => {}
}
-
- Ok(())
+ p.wrap(m, kind);
}
-/// The type of a collection.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-enum CollectionKind {
- /// The collection is only one item and has no comma.
- Group,
- /// The collection starts with a positional item and has multiple items or a
- /// trailing comma.
- Positional,
- /// The collection starts with a colon or named item.
- Named,
-}
-
-/// Parse a collection.
-///
-/// Returns the length of the collection and whether the literal contained any
-/// commas.
-fn collection(p: &mut Parser, keyed: bool) -> (CollectionKind, usize) {
- let mut collection_kind = None;
- let mut items = 0;
- let mut can_group = true;
- let mut missing_coma: Option<Marker> = None;
-
- while !p.eof() {
- let Ok(item_kind) = item(p, keyed) else {
- p.eat_if(SyntaxKind::Comma);
- collection_kind = Some(CollectionKind::Group);
- continue;
- };
-
- match item_kind {
- SyntaxKind::Spread => can_group = false,
- SyntaxKind::Named if collection_kind.is_none() => {
- collection_kind = Some(CollectionKind::Named);
- can_group = false;
- }
- _ if collection_kind.is_none() => {
- collection_kind = Some(CollectionKind::Positional);
+fn collection(p: &mut Parser, keyed: bool) -> SyntaxKind {
+ p.stop_at_newline(false);
+ p.expect(SyntaxKind::LeftParen);
+
+ let mut count = 0;
+ let mut parenthesized = true;
+ let mut kind = None;
+ if keyed && p.eat_if(SyntaxKind::Colon) {
+ kind = Some(SyntaxKind::Dict);
+ parenthesized = false;
+ }
+
+ while !p.current().is_terminator() {
+ let prev = p.prev_end();
+ match item(p, keyed) {
+ SyntaxKind::Spread => parenthesized = false,
+ SyntaxKind::Named | SyntaxKind::Keyed if kind.is_none() => {
+ kind = Some(SyntaxKind::Dict);
+ parenthesized = false;
}
+ _ if kind.is_none() => kind = Some(SyntaxKind::Array),
_ => {}
}
- items += 1;
-
- if let Some(marker) = missing_coma.take() {
- p.expected_at(marker, "comma");
+ if !p.progress(prev) {
+ p.unexpected();
+ continue;
}
- if p.eof() {
+ count += 1;
+
+ if p.current().is_terminator() {
break;
}
- if p.eat_if(SyntaxKind::Comma) {
- can_group = false;
- } else {
- missing_coma = Some(p.trivia_start());
+ if p.expect(SyntaxKind::Comma) {
+ parenthesized = false;
}
}
- let kind = if can_group && items == 1 {
- CollectionKind::Group
- } else {
- collection_kind.unwrap_or(CollectionKind::Positional)
- };
+ p.expect(SyntaxKind::RightParen);
+ p.unstop();
- (kind, items)
+ if parenthesized && count == 1 {
+ SyntaxKind::Parenthesized
+ } else {
+ kind.unwrap_or(SyntaxKind::Array)
+ }
}
-fn item(p: &mut Parser, keyed: bool) -> ParseResult<SyntaxKind> {
- let marker = p.marker();
+fn item(p: &mut Parser, keyed: bool) -> SyntaxKind {
+ let m = p.marker();
+
if p.eat_if(SyntaxKind::Dots) {
- marker.perform(p, SyntaxKind::Spread, expr)?;
- return Ok(SyntaxKind::Spread);
+ code_expr(p);
+ p.wrap(m, SyntaxKind::Spread);
+ return SyntaxKind::Spread;
}
- expr(p)?;
-
- if p.at(SyntaxKind::Colon) {
- match marker.after(p).map(|c| c.kind()) {
- Some(SyntaxKind::Ident) => {
- p.eat();
- marker.perform(p, SyntaxKind::Named, expr)?;
- }
- Some(SyntaxKind::Str) if keyed => {
- p.eat();
- marker.perform(p, SyntaxKind::Keyed, expr)?;
- }
- kind => {
- let mut msg = EcoString::from("expected identifier");
- if keyed {
- msg.push_str(" or string");
- }
- if let Some(kind) = kind {
- msg.push_str(", found ");
- msg.push_str(kind.name());
- }
- marker.to_error(p, msg);
- p.eat();
- marker.perform(p, SyntaxKind::Named, expr).ok();
- return Err(ParseError);
- }
- }
+ code_expr(p);
- Ok(SyntaxKind::Named)
- } else {
- Ok(SyntaxKind::None)
+ if !p.eat_if(SyntaxKind::Colon) {
+ return SyntaxKind::Int;
}
-}
-fn array(p: &mut Parser, marker: Marker) {
- marker.filter_children(p, |x| match x.kind() {
- SyntaxKind::Named | SyntaxKind::Keyed => Err("expected expression"),
- _ => Ok(()),
- });
- marker.end(p, SyntaxKind::Array);
-}
+ code_expr(p);
-fn dict(p: &mut Parser, marker: Marker) {
- let mut used = HashSet::new();
- marker.filter_children(p, |x| match x.kind() {
- kind if kind.is_paren() => Ok(()),
- SyntaxKind::Named | SyntaxKind::Keyed => {
- if let Some(child) = x.children().next() {
- let key = match child.cast::<ast::Str>() {
- Some(str) => str.get(),
- None => child.text().clone(),
- };
+ let kind = match p.node(m).map(SyntaxNode::kind) {
+ Some(SyntaxKind::Ident) => SyntaxKind::Named,
+ Some(SyntaxKind::Str) if keyed => SyntaxKind::Keyed,
+ _ => {
+ for child in p.post_process(m).next() {
+ if child.kind() == SyntaxKind::Colon {
+ break;
+ }
- if !used.insert(key) {
- return Err("pair has duplicate key");
+ let mut message = EcoString::from("expected identifier");
+ if keyed {
+ message.push_str(" or string");
}
+ message.push_str(", found ");
+ message.push_str(child.kind().name());
+ child.convert_to_error(message);
}
- Ok(())
+ SyntaxKind::Named
}
- SyntaxKind::Spread | SyntaxKind::Comma | SyntaxKind::Colon => Ok(()),
- _ => Err("expected named or keyed pair"),
- });
- marker.end(p, SyntaxKind::Dict);
-}
+ };
-fn params(p: &mut Parser, marker: Marker) {
- marker.filter_children(p, |x| match x.kind() {
- kind if kind.is_paren() => Ok(()),
- SyntaxKind::Named | SyntaxKind::Ident | SyntaxKind::Comma => Ok(()),
- SyntaxKind::Spread
- if matches!(
- x.children().last().map(|child| child.kind()),
- Some(SyntaxKind::Ident)
- ) =>
- {
- Ok(())
- }
- _ => Err("expected identifier, named pair or argument sink"),
- });
- marker.end(p, SyntaxKind::Params);
+ p.wrap(m, kind);
+ kind
}
-/// Parse a code block: `{...}`.
-fn code_block(p: &mut Parser) {
- p.perform(SyntaxKind::CodeBlock, |p| {
- p.start_group(Group::Brace);
- code(p);
- p.end_group();
- });
-}
+fn args(p: &mut Parser) {
+ if !p.at(SyntaxKind::LeftParen) && !p.at(SyntaxKind::LeftBracket) {
+ p.expected("argument list");
+ }
-fn code(p: &mut Parser) {
- while !p.eof() {
- p.start_group(Group::Expr);
- if expr(p).is_ok() && !p.eof() {
- p.expected("semicolon or line break");
- }
- p.end_group();
+ let m = p.marker();
+ if p.at(SyntaxKind::LeftParen) {
+ collection(p, false);
+ validate_args(p, m);
+ }
- // Forcefully skip over newlines since the group's contents can't.
- p.eat_while(SyntaxKind::is_space);
+ while p.directly_at(SyntaxKind::LeftBracket) {
+ content_block(p);
}
-}
-fn content_block(p: &mut Parser) {
- p.perform(SyntaxKind::ContentBlock, |p| {
- p.start_group(Group::Bracket);
- markup(p, true);
- p.end_group();
- });
+ p.wrap(m, SyntaxKind::Args);
}
-fn args(p: &mut Parser) -> ParseResult {
- match p.peek_direct() {
- Some(SyntaxKind::LeftParen) => {}
- Some(SyntaxKind::LeftBracket) => {}
- _ => {
- p.expected_found("argument list");
- return Err(ParseError);
- }
+fn let_binding(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Let);
+
+ let m2 = p.marker();
+ p.expect(SyntaxKind::Ident);
+
+ let closure = p.directly_at(SyntaxKind::LeftParen);
+ if closure {
+ let m3 = p.marker();
+ collection(p, false);
+ validate_params(p, m3);
+ p.wrap(m3, SyntaxKind::Params);
}
- p.perform(SyntaxKind::Args, |p| {
- if p.at(SyntaxKind::LeftParen) {
- let marker = p.marker();
- p.start_group(Group::Paren);
- collection(p, false);
- p.end_group();
-
- let mut used = HashSet::new();
- marker.filter_children(p, |x| match x.kind() {
- SyntaxKind::Named => {
- if let Some(ident) =
- x.children().next().and_then(|child| child.cast::<ast::Ident>())
- {
- if !used.insert(ident.take()) {
- return Err("duplicate argument");
- }
- }
- Ok(())
- }
- _ => Ok(()),
- });
- }
+ let f = if closure { Parser::expect } else { Parser::eat_if };
+ if f(p, SyntaxKind::Eq) {
+ code_expr(p);
+ }
- while p.peek_direct() == Some(SyntaxKind::LeftBracket) {
- content_block(p);
- }
- });
+ if closure {
+ p.wrap(m2, SyntaxKind::Closure);
+ }
- Ok(())
+ p.wrap(m, SyntaxKind::LetBinding);
}
-fn math_args(p: &mut Parser) {
- p.start_group(Group::MathRow('(', ')'));
- p.perform(SyntaxKind::Args, |p| {
- let mut marker = p.marker();
- while !p.eof() {
- if matches!(p.peek(), Some(SyntaxKind::Atom) if p.peek_src() == ",") {
- marker.end(p, SyntaxKind::Math);
- let comma = p.marker();
- p.eat();
- comma.convert(p, SyntaxKind::Comma);
- marker = p.marker();
- } else {
- math_node(p);
- }
- }
- if marker != p.marker() {
- marker.end(p, SyntaxKind::Math);
- }
- });
- p.end_group();
+fn set_rule(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Set);
+ p.expect(SyntaxKind::Ident);
+ args(p);
+ if p.eat_if(SyntaxKind::If) {
+ code_expr(p);
+ }
+ p.wrap(m, SyntaxKind::SetRule);
}
-fn let_binding(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::LetBinding, |p| {
- p.assert(SyntaxKind::Let);
-
- let marker = p.marker();
- ident(p)?;
-
- // If a parenthesis follows, this is a function definition.
- let has_params = p.peek_direct() == Some(SyntaxKind::LeftParen);
- if has_params {
- let marker = p.marker();
- p.start_group(Group::Paren);
- collection(p, false);
- p.end_group();
- params(p, marker);
- }
-
- if p.eat_if(SyntaxKind::Eq) {
- expr(p)?;
- } else if has_params {
- // Function definitions must have a body.
- p.expected("body");
- }
+fn show_rule(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Show);
+ code_expr(p);
+ if p.eat_if(SyntaxKind::Colon) {
+ code_expr(p);
+ }
+ p.wrap(m, SyntaxKind::ShowRule);
+}
- // Rewrite into a closure expression if it's a function definition.
- if has_params {
- marker.end(p, SyntaxKind::Closure);
+fn conditional(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::If);
+ code_expr(p);
+ block(p);
+ if p.eat_if(SyntaxKind::Else) {
+ if p.at(SyntaxKind::If) {
+ conditional(p);
+ } else {
+ block(p);
}
+ }
+ p.wrap(m, SyntaxKind::Conditional);
+}
- Ok(())
- })
+fn while_loop(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::While);
+ code_expr(p);
+ block(p);
+ p.wrap(m, SyntaxKind::WhileLoop);
}
-fn set_rule(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::SetRule, |p| {
- p.assert(SyntaxKind::Set);
- ident(p)?;
- args(p)?;
- if p.eat_if(SyntaxKind::If) {
- expr(p)?;
- }
- Ok(())
- })
+fn for_loop(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::For);
+ for_pattern(p);
+ p.expect(SyntaxKind::In);
+ code_expr(p);
+ block(p);
+ p.wrap(m, SyntaxKind::ForLoop);
}
-fn show_rule(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::ShowRule, |p| {
- p.assert(SyntaxKind::Show);
- expr(p)?;
- if p.eat_if(SyntaxKind::Colon) {
- expr(p)?;
+fn for_pattern(p: &mut Parser) {
+ let m = p.marker();
+ if p.expect(SyntaxKind::Ident) {
+ if p.eat_if(SyntaxKind::Comma) {
+ p.expect(SyntaxKind::Ident);
}
- Ok(())
- })
+ p.wrap(m, SyntaxKind::ForPattern);
+ }
}
-fn conditional(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::Conditional, |p| {
- p.assert(SyntaxKind::If);
-
- expr(p)?;
- body(p)?;
+fn module_import(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Import);
+ code_expr(p);
+ if p.eat_if(SyntaxKind::Colon) && !p.eat_if(SyntaxKind::Star) {
+ import_items(p);
+ }
+ p.wrap(m, SyntaxKind::ModuleImport);
+}
- if p.eat_if(SyntaxKind::Else) {
- if p.at(SyntaxKind::If) {
- conditional(p)?;
- } else {
- body(p)?;
- }
+fn import_items(p: &mut Parser) {
+ let m = p.marker();
+ while !p.eof() && !p.at(SyntaxKind::Semicolon) {
+ if !p.eat_if(SyntaxKind::Ident) {
+ p.unexpected();
}
-
- Ok(())
- })
+ if p.current().is_terminator() {
+ break;
+ }
+ p.expect(SyntaxKind::Comma);
+ }
+ p.wrap(m, SyntaxKind::ImportItems);
}
-fn while_loop(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::WhileLoop, |p| {
- p.assert(SyntaxKind::While);
- expr(p)?;
- body(p)
- })
+fn module_include(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Include);
+ code_expr(p);
+ p.wrap(m, SyntaxKind::ModuleInclude);
}
-fn for_loop(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::ForLoop, |p| {
- p.assert(SyntaxKind::For);
- for_pattern(p)?;
- p.expect(SyntaxKind::In)?;
- expr(p)?;
- body(p)
- })
+fn break_stmt(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Break);
+ p.wrap(m, SyntaxKind::LoopBreak);
}
-fn for_pattern(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::ForPattern, |p| {
- ident(p)?;
- if p.eat_if(SyntaxKind::Comma) {
- ident(p)?;
- }
- Ok(())
- })
+fn continue_stmt(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Continue);
+ p.wrap(m, SyntaxKind::LoopContinue);
}
-fn module_import(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::ModuleImport, |p| {
- p.assert(SyntaxKind::Import);
- expr(p)?;
-
- if !p.eat_if(SyntaxKind::Colon) || p.eat_if(SyntaxKind::Star) {
- return Ok(());
- }
-
- // This is the list of identifiers scenario.
- p.perform(SyntaxKind::ImportItems, |p| {
- let marker = p.marker();
- let items = collection(p, false).1;
- if items == 0 {
- p.expected("import items");
- }
- marker.filter_children(p, |n| match n.kind() {
- SyntaxKind::Ident | SyntaxKind::Comma => Ok(()),
- _ => Err("expected identifier"),
- });
- });
-
- Ok(())
- })
+fn return_stmt(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Return);
+ if !p.current().is_terminator() && !p.at(SyntaxKind::Comma) {
+ code_expr(p);
+ }
+ p.wrap(m, SyntaxKind::FuncReturn);
}
-fn module_include(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::ModuleInclude, |p| {
- p.assert(SyntaxKind::Include);
- expr(p)
- })
+fn validate_array(p: &mut Parser, m: Marker) {
+ for child in p.post_process(m) {
+ let kind = child.kind();
+ if kind == SyntaxKind::Named || kind == SyntaxKind::Keyed {
+ child.convert_to_error(format_eco!(
+ "expected expression, found {}",
+ kind.name()
+ ));
+ }
+ }
}
-fn break_stmt(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::LoopBreak, |p| {
- p.assert(SyntaxKind::Break);
- Ok(())
- })
-}
+fn validate_dict(p: &mut Parser, m: Marker) {
+ let mut used = HashSet::new();
+ for child in p.post_process(m) {
+ match child.kind() {
+ SyntaxKind::Named | SyntaxKind::Keyed => {
+ let Some(first) = child.children_mut().first_mut() else { continue };
+ let key = match first.cast::<ast::Str>() {
+ Some(str) => str.get(),
+ None => first.text().clone(),
+ };
-fn continue_stmt(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::LoopContinue, |p| {
- p.assert(SyntaxKind::Continue);
- Ok(())
- })
+ if !used.insert(key) {
+ first.convert_to_error("duplicate key");
+ child.make_erroneous();
+ }
+ }
+ SyntaxKind::Spread => {}
+ SyntaxKind::LeftParen
+ | SyntaxKind::RightParen
+ | SyntaxKind::Comma
+ | SyntaxKind::Colon => {}
+ kind => {
+ child.convert_to_error(format_eco!(
+ "expected named or keyed pair, found {}",
+ kind.name()
+ ));
+ }
+ }
+ }
}
-fn return_stmt(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::FuncReturn, |p| {
- p.assert(SyntaxKind::Return);
- if !p.at(SyntaxKind::Comma) && !p.eof() {
- expr(p)?;
+fn validate_params(p: &mut Parser, m: Marker) {
+ let mut used = HashSet::new();
+ for child in p.post_process(m) {
+ match child.kind() {
+ SyntaxKind::Ident => {
+ if !used.insert(child.text().clone()) {
+ child.convert_to_error("duplicate parameter");
+ }
+ }
+ SyntaxKind::Named => {
+ let Some(within) = child.children_mut().first_mut() else { return };
+ if !used.insert(within.text().clone()) {
+ within.convert_to_error("duplicate parameter");
+ child.make_erroneous();
+ }
+ }
+ SyntaxKind::Spread => {
+ let Some(within) = child.children_mut().last_mut() else { continue };
+ if within.kind() != SyntaxKind::Ident {
+ within.convert_to_error(format_eco!(
+ "expected identifier, found {}",
+ within.kind().name(),
+ ));
+ child.make_erroneous();
+ }
+ }
+ SyntaxKind::LeftParen | SyntaxKind::RightParen | SyntaxKind::Comma => {}
+ kind => {
+ child.convert_to_error(format_eco!(
+ "expected identifier, named pair or argument sink, found {}",
+ kind.name()
+ ));
+ }
}
- Ok(())
- })
+ }
}
-fn body(p: &mut Parser) -> ParseResult {
- match p.peek() {
- Some(SyntaxKind::LeftBracket) => Ok(content_block(p)),
- Some(SyntaxKind::LeftBrace) => Ok(code_block(p)),
- _ => {
- p.expected("body");
- Err(ParseError)
+fn validate_args(p: &mut Parser, m: Marker) {
+ let mut used = HashSet::new();
+ for child in p.post_process(m) {
+ if child.kind() == SyntaxKind::Named {
+ let Some(within) = child.children_mut().first_mut() else { return };
+ if !used.insert(within.text().clone()) {
+ within.convert_to_error("duplicate argument");
+ child.make_erroneous();
+ }
}
}
}
-/// A convenient token-based parser.
+/// Manages parsing of a stream of tokens.
struct Parser<'s> {
- /// An iterator over the source tokens.
+ text: &'s str,
lexer: Lexer<'s>,
- /// Whether we are at the end of the file or of a group.
- eof: bool,
- /// The current token.
- current: Option<SyntaxKind>,
- /// The end byte index of the last non-trivia token.
prev_end: usize,
- /// The start byte index of the peeked token.
current_start: usize,
- /// The stack of open groups.
- groups: Vec<GroupEntry>,
- /// The children of the currently built node.
- children: Vec<SyntaxNode>,
- /// Whether the last group was not correctly terminated.
- unterminated_group: bool,
- /// Whether a group terminator was found that did not close a group.
- stray_terminator: bool,
+ current: SyntaxKind,
+ modes: Vec<LexMode>,
+ nodes: Vec<SyntaxNode>,
+ stop_at_newline: Vec<bool>,
+ balanced: bool,
}
-impl<'s> Parser<'s> {
- /// Create a new parser for the source string.
- fn new(text: &'s str, mode: LexMode) -> Self {
- Self::with_prefix("", text, mode)
- }
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+struct Marker(usize);
- /// Create a new parser for the source string that is prefixed by some text
- /// that does not need to be parsed but taken into account for column
- /// calculation.
- fn with_prefix(prefix: &str, text: &'s str, mode: LexMode) -> Self {
- let mut lexer = Lexer::with_prefix(prefix, text, mode);
+impl<'s> Parser<'s> {
+ fn new(text: &'s str, offset: usize, mode: LexMode) -> Self {
+ let mut lexer = Lexer::new(text, mode);
+ lexer.jump(offset);
let current = lexer.next();
Self {
lexer,
- eof: current.is_none(),
+ text,
+ prev_end: offset,
+ current_start: offset,
current,
- prev_end: 0,
- current_start: 0,
- groups: vec![],
- children: vec![],
- unterminated_group: false,
- stray_terminator: false,
+ modes: vec![],
+ nodes: vec![],
+ stop_at_newline: vec![],
+ balanced: true,
}
}
- /// End the parsing process and return the parsed children.
fn finish(self) -> Vec<SyntaxNode> {
- self.children
+ self.nodes
}
- /// End the parsing process and return
- /// - the parsed children and whether the last token was terminated, if all
- /// groups were terminated correctly, or
- /// - `None` otherwise.
- fn consume(self) -> Option<(Vec<SyntaxNode>, bool)> {
- self.terminated().then(|| (self.children, self.lexer.terminated()))
+ fn prev_end(&self) -> usize {
+ self.prev_end
}
- /// Create a new marker.
- fn marker(&mut self) -> Marker {
- Marker(self.children.len())
+ fn current(&self) -> SyntaxKind {
+ self.current
}
- /// Create a marker right before the trailing trivia.
- fn trivia_start(&self) -> Marker {
- let count = self
- .children
- .iter()
- .rev()
- .take_while(|node| self.is_trivia(node.kind()))
- .count();
- Marker(self.children.len() - count)
+ fn current_start(&self) -> usize {
+ self.current_start
}
- /// Perform a subparse that wraps its result in a node with the given kind.
- fn perform<F, T>(&mut self, kind: SyntaxKind, f: F) -> T
- where
- F: FnOnce(&mut Self) -> T,
- {
- let prev = mem::take(&mut self.children);
- let output = f(self);
- let until = self.trivia_start();
- let mut children = mem::replace(&mut self.children, prev);
-
- if self.lexer.mode() == LexMode::Markup {
- self.children.push(SyntaxNode::inner(kind, children));
- } else {
- // Trailing trivia should not be wrapped into the new node.
- let idx = self.children.len();
- self.children.push(SyntaxNode::default());
- self.children.extend(children.drain(until.0..));
- self.children[idx] = SyntaxNode::inner(kind, children);
- }
-
- output
+ fn current_end(&self) -> usize {
+ self.lexer.cursor()
}
- /// Whether the end of the source string or group is reached.
- fn eof(&self) -> bool {
- self.eof
+ fn current_text(&self) -> &'s str {
+ &self.text[self.current_start..self.current_end()]
}
- /// Consume the current token and also trailing trivia.
- fn eat(&mut self) {
- self.stray_terminator |= match self.current {
- Some(SyntaxKind::RightParen) => !self.inside(Group::Paren),
- Some(SyntaxKind::RightBracket) => !self.inside(Group::Bracket),
- Some(SyntaxKind::RightBrace) => !self.inside(Group::Brace),
- _ => false,
- };
+ fn at(&self, kind: SyntaxKind) -> bool {
+ self.current == kind
+ }
- self.prev_end = self.lexer.cursor();
- self.bump();
+ fn assert(&mut self, kind: SyntaxKind) {
+ assert_eq!(self.current, kind);
+ self.eat();
+ }
- if self.lexer.mode() != LexMode::Markup {
- // Skip whitespace and comments.
- while self.current.map_or(false, |kind| self.is_trivia(kind)) {
- self.bump();
- }
- }
+ fn eof(&self) -> bool {
+ self.at(SyntaxKind::Eof)
+ }
- self.repeek();
+ fn directly_at(&self, kind: SyntaxKind) -> bool {
+ self.current == kind && self.prev_end == self.current_start
}
- /// Consume the current token if it is the given one.
fn eat_if(&mut self, kind: SyntaxKind) -> bool {
let at = self.at(kind);
if at {
@@ -1244,437 +988,169 @@ impl<'s> Parser<'s> {
at
}
- /// Eat tokens while the condition is true.
- fn eat_while<F>(&mut self, mut f: F)
- where
- F: FnMut(SyntaxKind) -> bool,
- {
- while self.peek().map_or(false, |t| f(t)) {
- self.eat();
- }
+ fn convert(&mut self, kind: SyntaxKind) {
+ self.current = kind;
+ self.eat();
}
- /// Consume the current token if it is the given one and produce an error if
- /// not.
- fn expect(&mut self, kind: SyntaxKind) -> ParseResult {
- let at = self.peek() == Some(kind);
- if at {
- self.eat();
- Ok(())
- } else {
- self.expected(kind.name());
- Err(ParseError)
- }
+ fn newline(&mut self) -> bool {
+ self.lexer.newline()
}
- /// Consume the current token, debug-asserting that it is the given one.
- #[track_caller]
- fn assert(&mut self, kind: SyntaxKind) {
- debug_assert_eq!(self.peek(), Some(kind));
- self.eat();
+ fn column(&self, at: usize) -> usize {
+ self.text[..at].chars().rev().take_while(|&c| !is_newline(c)).count()
}
- /// Whether the current token is of the given type.
- fn at(&self, kind: SyntaxKind) -> bool {
- self.peek() == Some(kind)
+ fn marker(&self) -> Marker {
+ Marker(self.nodes.len())
}
- /// Peek at the current token without consuming it.
- fn peek(&self) -> Option<SyntaxKind> {
- if self.eof {
- None
- } else {
- self.current
- }
+ fn node(&self, m: Marker) -> Option<&SyntaxNode> {
+ self.nodes.get(m.0)
}
- /// Peek at the current token, but only if it follows immediately after the
- /// last one without any trivia in between.
- fn peek_direct(&self) -> Option<SyntaxKind> {
- if self.prev_end() == self.current_start() {
- self.peek()
- } else {
- None
- }
+ fn post_process(&mut self, m: Marker) -> impl Iterator<Item = &mut SyntaxNode> {
+ self.nodes[m.0..]
+ .iter_mut()
+ .filter(|child| !child.kind().is_error() && !child.kind().is_trivia())
}
- /// The byte index at which the last non-trivia token ended.
- fn prev_end(&self) -> usize {
- self.prev_end
+ fn wrap(&mut self, m: Marker, kind: SyntaxKind) {
+ self.unskip();
+ let from = m.0.min(self.nodes.len());
+ let children = self.nodes.drain(from..).collect();
+ self.nodes.push(SyntaxNode::inner(kind, children));
+ self.skip();
}
- /// The byte index at which the current token starts.
- fn current_start(&self) -> usize {
- self.current_start
+ fn progress(&self, offset: usize) -> bool {
+ offset < self.prev_end
}
- /// The byte index at which the current token ends.
- fn current_end(&self) -> usize {
- self.lexer.cursor()
+ fn enter(&mut self, mode: LexMode) {
+ self.modes.push(self.lexer.mode());
+ self.lexer.set_mode(mode);
}
- /// The byte length of the current token.
- fn current_len(&self) -> usize {
- self.current_end() - self.current_start()
- }
-
- /// The text of the current node.
- fn peek_src(&self) -> &str {
- self.lexer.scanner().from(self.current_start)
- }
-
- /// Determine the column index for the given byte index.
- fn column(&self, index: usize) -> usize {
- self.lexer.column(index)
- }
-
- /// Continue parsing in a group.
- ///
- /// When the end delimiter of the group is reached, all subsequent calls to
- /// `peek()` return `None`. Parsing can only continue with a matching call
- /// to `end_group`.
- ///
- /// This panics if the current token does not start the given group.
- #[track_caller]
- fn start_group(&mut self, kind: Group) {
- self.groups.push(GroupEntry { kind, prev_mode: self.lexer.mode() });
- self.lexer.set_mode(match kind {
- Group::Bracket | Group::Strong | Group::Emph => LexMode::Markup,
- Group::Math | Group::MathRow(_, _) => LexMode::Math,
- Group::Brace | Group::Paren | Group::Expr => LexMode::Code,
- });
-
- match kind {
- Group::Brace => self.assert(SyntaxKind::LeftBrace),
- Group::Bracket => self.assert(SyntaxKind::LeftBracket),
- Group::Paren => self.assert(SyntaxKind::LeftParen),
- Group::Strong => self.assert(SyntaxKind::Star),
- Group::Emph => self.assert(SyntaxKind::Underscore),
- Group::Math => self.assert(SyntaxKind::Dollar),
- Group::MathRow(..) => self.assert(SyntaxKind::Atom),
- Group::Expr => self.repeek(),
+ fn exit(&mut self) {
+ let mode = self.modes.pop().unwrap();
+ if mode != self.lexer.mode() {
+ self.unskip();
+ self.lexer.set_mode(mode);
+ self.lexer.jump(self.current_start);
+ self.lex();
+ self.skip();
}
}
- /// End the parsing of a group.
- ///
- /// This panics if no group was started.
- #[track_caller]
- fn end_group(&mut self) {
- let group_mode = self.lexer.mode();
- let group = self.groups.pop().expect("no started group");
- self.lexer.set_mode(group.prev_mode);
-
- let mut rescan = self.lexer.mode() != group_mode;
-
- // Eat the end delimiter if there is one.
- if let Some((end, required)) = match group.kind {
- Group::Brace => Some((SyntaxKind::RightBrace, true)),
- Group::Bracket => Some((SyntaxKind::RightBracket, true)),
- Group::Paren => Some((SyntaxKind::RightParen, true)),
- Group::Strong => Some((SyntaxKind::Star, true)),
- Group::Emph => Some((SyntaxKind::Underscore, true)),
- Group::Math => Some((SyntaxKind::Dollar, true)),
- Group::MathRow(..) => Some((SyntaxKind::Atom, true)),
- Group::Expr => Some((SyntaxKind::Semicolon, false)),
- } {
- if self.current.as_ref() == Some(&end) {
- // If another group closes after a group with the missing
- // terminator, its scope of influence ends here and no longer
- // taints the rest of the reparse.
- self.unterminated_group = false;
-
- // Bump the delimeter and return. No need to rescan in this
- // case. Also, we know that the delimiter is not stray even
- // though we already removed the group.
- let s = self.stray_terminator;
- self.eat();
- self.stray_terminator = s;
- rescan = false;
- } else if required {
- self.expected(end.name());
- self.unterminated_group = true;
- }
- }
-
- // Rescan the peeked token if the mode changed.
- if rescan {
- let mut target = self.prev_end();
- if group_mode != LexMode::Markup {
- let start = self.trivia_start().0;
- target = self.current_start
- - self.children[start..].iter().map(SyntaxNode::len).sum::<usize>();
- self.children.truncate(start);
- }
-
- self.lexer.jump(target);
- self.prev_end = self.lexer.cursor();
- self.current_start = self.lexer.cursor();
- self.current = self.lexer.next();
- }
+ fn stop_at_newline(&mut self, stop: bool) {
+ self.stop_at_newline.push(stop);
+ }
- self.repeek();
+ fn unstop(&mut self) {
+ self.unskip();
+ self.stop_at_newline.pop();
+ self.lexer.jump(self.prev_end);
+ self.lex();
+ self.skip();
}
- /// Checks if all groups were correctly terminated.
- fn terminated(&self) -> bool {
- self.groups.is_empty() && !self.unterminated_group && !self.stray_terminator
+ fn eat(&mut self) {
+ self.save();
+ self.lex();
+ self.skip();
}
- /// Low-level bump that consumes exactly one token without special trivia
- /// handling.
- fn bump(&mut self) {
- if let Some((message, pos)) = self.lexer.last_error() {
- let len = self.current_len();
- self.children.push(SyntaxNode::error(message, pos, len))
- } else {
- let kind = self.current.unwrap();
- let text = self.peek_src();
- self.children.push(SyntaxNode::leaf(kind, text));
+ fn skip(&mut self) {
+ if self.lexer.mode() != LexMode::Markup {
+ while self.current.is_trivia() {
+ self.save();
+ self.lex();
+ }
}
- self.current_start = self.lexer.cursor();
- self.current = self.lexer.next();
}
- /// Take another look at the current token to recheck whether it ends a
- /// group.
- fn repeek(&mut self) {
- self.eof = match &self.current {
- Some(SyntaxKind::RightBrace) => self.inside(Group::Brace),
- Some(SyntaxKind::RightBracket) => self.inside(Group::Bracket),
- Some(SyntaxKind::RightParen) => self.inside(Group::Paren),
- Some(SyntaxKind::Star) => self.inside(Group::Strong),
- Some(SyntaxKind::Underscore) => self.inside(Group::Emph),
- Some(SyntaxKind::Dollar) => self
- .groups
- .iter()
- .rev()
- .skip_while(|group| matches!(group.kind, Group::MathRow(..)))
- .next()
- .map_or(false, |group| group.kind == Group::Math),
- Some(SyntaxKind::Semicolon) => self.inside(Group::Expr),
- Some(SyntaxKind::Atom) => match self.peek_src() {
- ")" => self.inside(Group::MathRow('(', ')')),
- "}" => self.inside(Group::MathRow('{', '}')),
- "]" => self.inside(Group::MathRow('[', ']')),
- _ => false,
- },
- Some(SyntaxKind::Space { newlines }) => self.space_ends_group(*newlines),
- Some(_) => false,
- None => true,
- };
- }
+ fn unskip(&mut self) {
+ if self.lexer.mode() != LexMode::Markup && self.prev_end != self.current_start {
+ while self.nodes.last().map_or(false, |last| last.kind().is_trivia()) {
+ self.nodes.pop();
+ }
- /// Returns whether the given type can be skipped over.
- fn is_trivia(&self, token: SyntaxKind) -> bool {
- match token {
- SyntaxKind::Space { newlines } => !self.space_ends_group(newlines),
- SyntaxKind::LineComment => true,
- SyntaxKind::BlockComment => true,
- _ => false,
+ self.lexer.jump(self.prev_end);
+ self.lex();
}
}
- /// Whether a space with the given number of newlines ends the current group.
- fn space_ends_group(&self, n: usize) -> bool {
- if n == 0 {
- return false;
+ fn save(&mut self) {
+ if self.at(SyntaxKind::Error) {
+ let (message, pos) = self.lexer.take_error().unwrap();
+ let len = self.current_end() - self.current_start;
+ self.nodes.push(SyntaxNode::error(message, pos, len));
+ } else {
+ let text = self.current_text();
+ self.nodes.push(SyntaxNode::leaf(self.current, text));
}
- match self.groups.last().map(|group| group.kind) {
- Some(Group::Strong | Group::Emph) => n >= 2,
- Some(Group::Expr) if n >= 1 => {
- // Allow else and method call to continue on next line.
- self.groups.iter().nth_back(1).map(|group| group.kind)
- != Some(Group::Brace)
- || !matches!(
- self.lexer.clone().next(),
- Some(SyntaxKind::Else | SyntaxKind::Dot)
- )
- }
- _ => false,
+ if self.lexer.mode() == LexMode::Markup || !self.current.is_trivia() {
+ self.prev_end = self.current_end();
}
}
- /// Whether we are inside the given group (can be nested).
- fn inside(&self, kind: Group) -> bool {
- self.groups
- .iter()
- .rev()
- .take_while(|g| !kind.is_weak() || g.kind.is_weak())
- .any(|g| g.kind == kind)
+ fn lex(&mut self) {
+ self.current_start = self.lexer.cursor();
+ self.current = self.lexer.next();
+ if self.lexer.mode() == LexMode::Code
+ && self.lexer.newline()
+ && self.stop_at_newline.last().copied().unwrap_or(false)
+ && !matches!(self.lexer.clone().next(), SyntaxKind::Else | SyntaxKind::Dot)
+ {
+ self.current = SyntaxKind::Eof;
+ }
}
-}
-/// Error handling.
-impl Parser<'_> {
- /// Eat the current token and add an error that it is unexpected.
- fn unexpected(&mut self) {
- if let Some(found) = self.peek() {
- let marker = self.marker();
- let msg = format_eco!("unexpected {}", found.name());
+ fn expect(&mut self, kind: SyntaxKind) -> bool {
+ let at = self.at(kind);
+ if at {
self.eat();
- marker.to_error(self, msg);
+ } else {
+ self.balanced &= !kind.is_grouping();
+ self.expected(kind.name());
}
+ at
}
- /// Add an error that the `thing` was expected at the end of the last
- /// non-trivia token.
fn expected(&mut self, thing: &str) {
- self.expected_at(self.trivia_start(), thing);
- }
-
- /// Insert an error message that `what` was expected at the marker position.
- fn expected_at(&mut self, marker: Marker, what: &str) {
- let msg = format_eco!("expected {}", what);
- self.children
- .insert(marker.0, SyntaxNode::error(msg, ErrorPos::Full, 0));
- }
-
- /// Eat the current token and add an error that it is not the expected
- /// `thing`.
- fn expected_found(&mut self, thing: &str) {
- match self.peek() {
- Some(found) => {
- let marker = self.marker();
- let msg = format_eco!("expected {}, found {}", thing, found.name());
- self.eat();
- marker.to_error(self, msg);
- }
- None => self.expected(thing),
- }
- }
-}
-
-/// Marks a location in a parser's child list.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-struct Marker(usize);
-
-impl Marker {
- /// Peek at the child directly before the marker.
- fn before<'a>(self, p: &'a Parser) -> Option<&'a SyntaxNode> {
- p.children.get(self.0.checked_sub(1)?)
- }
-
- /// Peek at the child directly after the marker.
- fn after<'a>(self, p: &'a Parser) -> Option<&'a SyntaxNode> {
- p.children.get(self.0)
- }
-
- /// Convert the child directly after marker.
- fn convert(self, p: &mut Parser, kind: SyntaxKind) {
- if let Some(child) = p.children.get_mut(self.0) {
- child.convert_to(kind);
+ self.unskip();
+ if self
+ .nodes
+ .last()
+ .map_or(true, |child| child.kind() != SyntaxKind::Error)
+ {
+ let message = format_eco!("expected {}", thing);
+ self.nodes.push(SyntaxNode::error(message, ErrorPos::Full, 0));
}
+ self.skip();
}
- /// Convert the child directly after marker.
- fn to_error(self, p: &mut Parser, message: impl Into<EcoString>) {
- if let Some(child) = p.children.get_mut(self.0) {
- child.convert_to_error(message);
+ fn unexpected(&mut self) {
+ self.unskip();
+ while self
+ .nodes
+ .last()
+ .map_or(false, |child| child.kind() == SyntaxKind::Error && child.len() == 0)
+ {
+ self.nodes.pop();
}
- }
-
- /// Perform a subparse that wraps all children after the marker in a node
- /// with the given kind.
- fn perform<T, F>(self, p: &mut Parser, kind: SyntaxKind, f: F) -> T
- where
- F: FnOnce(&mut Parser) -> T,
- {
- let success = f(p);
- self.end(p, kind);
- success
- }
-
- /// Wrap all children after the marker (excluding trailing trivia) in a node
- /// with the given `kind`.
- fn end(self, p: &mut Parser, kind: SyntaxKind) {
- let until = p.trivia_start().0.max(self.0);
- let children = p.children.drain(self.0..until).collect();
- p.children.insert(self.0, SyntaxNode::inner(kind, children));
- }
+ self.skip();
- /// Wrap all children that do not fulfill the predicate in error nodes.
- fn filter_children<F>(self, p: &mut Parser, mut f: F)
- where
- F: FnMut(&SyntaxNode) -> Result<(), &'static str>,
- {
- for child in &mut p.children[self.0..] {
- // Don't expose errors.
- if child.kind().is_error() {
- continue;
- }
-
- // Don't expose trivia in code.
- if p.lexer.mode() != LexMode::Markup && child.kind().is_trivia() {
- continue;
- }
+ let kind = self.current;
+ let offset = self.nodes.len();
+ self.eat();
+ self.balanced &= !kind.is_grouping();
- if let Err(msg) = f(child) {
- let mut msg = EcoString::from(msg);
- if msg.starts_with("expected") {
- msg.push_str(", found ");
- msg.push_str(child.kind().name());
- }
- let len = child.len();
- *child = SyntaxNode::error(msg, ErrorPos::Full, len);
- }
+ if !kind.is_error() {
+ self.nodes[offset]
+ .convert_to_error(format_eco!("unexpected {}", kind.name()));
}
}
}
-
-/// A logical group of tokens, e.g. `[...]`.
-#[derive(Debug)]
-struct GroupEntry {
- /// The kind of group this is. This decides which token(s) will end the
- /// group. For example, a [`Group::Paren`] will be ended by
- /// [`Token::RightParen`].
- kind: Group,
- /// The mode the parser was in _before_ the group started (to which we go
- /// back once the group ends).
- prev_mode: LexMode,
-}
-
-/// A group, confined by optional start and end delimiters.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-enum Group {
- /// A curly-braced group: `{...}`.
- Brace,
- /// A bracketed group: `[...]`.
- Bracket,
- /// A parenthesized group: `(...)`.
- Paren,
- /// A group surrounded with stars: `*...*`.
- Strong,
- /// A group surrounded with underscore: `_..._`.
- Emph,
- /// A group surrounded by dollar signs: `$...$`.
- Math,
- /// A group surrounded by math delimiters.
- MathRow(char, char),
- /// A group ended by a semicolon or a line break: `;`, `\n`.
- Expr,
-}
-
-impl Group {
- /// Whether the group can only force other weak groups to end.
- fn is_weak(self) -> bool {
- matches!(self, Group::Strong | Group::Emph)
- }
-}
-
-/// Allows parser methods to use the try operator. Never returned top-level
-/// because the parser recovers from all errors.
-type ParseResult<T = ()> = Result<T, ParseError>;
-
-/// The error type for parsing.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-struct ParseError;
-
-impl Display for ParseError {
- fn fmt(&self, f: &mut Formatter) -> fmt::Result {
- f.pad("failed to parse")
- }
-}
-
-impl std::error::Error for ParseError {}
diff --git a/src/syntax/reparse.rs b/src/syntax/reparse.rs
deleted file mode 100644
index e72192ff..00000000
--- a/src/syntax/reparse.rs
+++ /dev/null
@@ -1,525 +0,0 @@
-use std::ops::Range;
-
-use super::{
- is_newline, parse, reparse_code_block, reparse_content_block,
- reparse_markup_elements, Span, SyntaxKind, SyntaxNode,
-};
-
-/// Refresh the given syntax node with as little parsing as possible.
-///
-/// Takes the new source, the range in the old source that was replaced and the
-/// length of the replacement.
-///
-/// Returns the range in the new source that was ultimately reparsed.
-pub fn reparse(
- root: &mut SyntaxNode,
- text: &str,
- replaced: Range<usize>,
- replacement_len: usize,
-) -> Range<usize> {
- let change = Change { text, replaced, replacement_len };
- if let Some(range) = try_reparse(&change, root, 0, true, true) {
- return range;
- }
-
- let id = root.span().source();
- *root = parse(text);
- root.numberize(id, Span::FULL).unwrap();
- 0..text.len()
-}
-
-/// Try to reparse inside the given node.
-fn try_reparse(
- change: &Change,
- node: &mut SyntaxNode,
- mut offset: usize,
- outermost: bool,
- safe_to_replace: bool,
-) -> Option<Range<usize>> {
- let is_markup = matches!(node.kind(), SyntaxKind::Markup { .. });
- let original_count = node.children().len();
- let original_offset = offset;
-
- let mut search = SearchState::default();
- let mut ahead: Option<Ahead> = None;
-
- // Whether the first node that should be replaced is at start.
- let mut at_start = true;
-
- // Whether the last searched child is the outermost child.
- let mut child_outermost = false;
-
- // Find the the first child in the range of children to reparse.
- for (i, child) in node.children().enumerate() {
- let pos = NodePos { idx: i, offset };
- let child_span = offset..offset + child.len();
- child_outermost = outermost && i + 1 == original_count;
-
- match search {
- SearchState::NoneFound => {
- // The edit is contained within the span of the current element.
- if child_span.contains(&change.replaced.start)
- && child_span.end >= change.replaced.end
- {
- // In Markup mode, we want to consider a non-whitespace
- // neighbor if the edit is on the node boundary.
- search = if is_markup && child_span.end == change.replaced.end {
- SearchState::RequireNonTrivia(pos)
- } else {
- SearchState::Contained(pos)
- };
- } else if child_span.contains(&change.replaced.start) {
- search = SearchState::Inside(pos);
- } else if child_span.end == change.replaced.start
- && change.replaced.start == change.replaced.end
- && child_outermost
- {
- search = SearchState::SpanFound(pos, pos);
- } else {
- // Update compulsary state of `ahead_nontrivia`.
- if let Some(ahead_nontrivia) = ahead.as_mut() {
- if let SyntaxKind::Space { newlines: (1..) } = child.kind() {
- ahead_nontrivia.newline();
- }
- }
-
- // We look only for non spaces, non-semicolon and also
- // reject text that points to the special case for URL
- // evasion and line comments.
- if !child.kind().is_space()
- && child.kind() != SyntaxKind::Semicolon
- && (child.kind() != SyntaxKind::Text || child.text() != "/")
- && (ahead.is_none() || change.replaced.start > child_span.end)
- && !ahead.map_or(false, Ahead::is_compulsory)
- {
- ahead = Some(Ahead::new(pos, at_start, is_bounded(child.kind())));
- }
-
- at_start = next_at_start(child.kind(), at_start);
- }
- }
- SearchState::Inside(start) => {
- if child_span.end == change.replaced.end {
- search = SearchState::RequireNonTrivia(start);
- } else if child_span.end > change.replaced.end {
- search = SearchState::SpanFound(start, pos);
- }
- }
- SearchState::RequireNonTrivia(start) => {
- if !child.kind().is_trivia() {
- search = SearchState::SpanFound(start, pos);
- }
- }
- _ => unreachable!(),
- }
-
- offset += child.len();
-
- if search.done().is_some() {
- break;
- }
- }
-
- // If we were looking for a non-whitespace element and hit the end of
- // the file here, we instead use EOF as the end of the span.
- if let SearchState::RequireNonTrivia(start) = search {
- search = SearchState::SpanFound(
- start,
- NodePos {
- idx: node.children().len() - 1,
- offset: offset - node.children().last().unwrap().len(),
- },
- )
- }
-
- if let SearchState::Contained(pos) = search {
- // Do not allow replacement of elements inside of constructs whose
- // opening and closing brackets look the same.
- let safe_inside = is_bounded(node.kind());
- let child = &mut node.children_mut()[pos.idx];
- let prev_len = child.len();
- let prev_descendants = child.descendants();
-
- if !child.is_leaf() {
- if let Some(range) =
- try_reparse(change, child, pos.offset, child_outermost, safe_inside)
- {
- let new_len = child.len();
- let new_descendants = child.descendants();
- node.update_parent(prev_len, new_len, prev_descendants, new_descendants);
- return Some(range);
- }
- }
-
- let superseded_span = pos.offset..pos.offset + prev_len;
- let func: Option<ReparseMode> = match child.kind() {
- SyntaxKind::CodeBlock => Some(ReparseMode::Code),
- SyntaxKind::ContentBlock => Some(ReparseMode::Content),
- _ => None,
- };
-
- // Return if the element was reparsable on its own, otherwise try to
- // treat it as a markup element.
- if let Some(func) = func {
- if let Some(result) = replace(
- change,
- node,
- func,
- pos.idx..pos.idx + 1,
- superseded_span,
- outermost,
- ) {
- return Some(result);
- }
- }
- }
-
- // Make sure this is a markup node and that we may replace. If so, save
- // the current indent.
- let min_indent = match node.kind() {
- SyntaxKind::Markup { min_indent } if safe_to_replace => min_indent,
- _ => return None,
- };
-
- let (mut start, end) = search.done()?;
- if let Some(ahead) = ahead {
- if start.offset == change.replaced.start || ahead.is_compulsory() {
- start = ahead.pos;
- at_start = ahead.at_start;
- }
- } else {
- start = NodePos { idx: 0, offset: original_offset };
- }
-
- let superseded_span =
- start.offset..end.offset + node.children().as_slice()[end.idx].len();
-
- replace(
- change,
- node,
- ReparseMode::MarkupElements { at_start, min_indent },
- start.idx..end.idx + 1,
- superseded_span,
- outermost,
- )
-}
-
-/// Reparse the superseded nodes and replace them.
-fn replace(
- change: &Change,
- node: &mut SyntaxNode,
- mode: ReparseMode,
- superseded_idx: Range<usize>,
- superseded_span: Range<usize>,
- outermost: bool,
-) -> Option<Range<usize>> {
- let superseded_start = superseded_idx.start;
-
- let differential: isize =
- change.replacement_len as isize - change.replaced.len() as isize;
- let newborn_end = (superseded_span.end as isize + differential) as usize;
- let newborn_span = superseded_span.start..newborn_end;
-
- let mut prefix = "";
- for (i, c) in change.text[..newborn_span.start].char_indices().rev() {
- if is_newline(c) {
- break;
- }
- prefix = &change.text[i..newborn_span.start];
- }
-
- let (newborns, terminated, amount) = match mode {
- ReparseMode::Code => reparse_code_block(
- prefix,
- &change.text[newborn_span.start..],
- newborn_span.len(),
- ),
- ReparseMode::Content => reparse_content_block(
- prefix,
- &change.text[newborn_span.start..],
- newborn_span.len(),
- ),
- ReparseMode::MarkupElements { at_start, min_indent } => reparse_markup_elements(
- prefix,
- &change.text[newborn_span.start..],
- newborn_span.len(),
- differential,
- &node.children().as_slice()[superseded_start..],
- at_start,
- min_indent,
- ),
- }?;
-
- // Do not accept unclosed nodes if the old node wasn't at the right edge
- // of the tree.
- if !outermost && !terminated {
- return None;
- }
-
- node.replace_children(superseded_start..superseded_start + amount, newborns)
- .ok()?;
-
- Some(newborn_span)
-}
-
-/// A description of a change.
-struct Change<'a> {
- /// The new source code, with the change applied.
- text: &'a str,
- /// Which range in the old source file was changed.
- replaced: Range<usize>,
- /// How many characters replaced the text in `replaced`.
- replacement_len: usize,
-}
-
-/// Encodes the state machine of the search for the nodes are pending for
-/// replacement.
-#[derive(Clone, Copy, Debug, PartialEq)]
-enum SearchState {
- /// Neither an end nor a start have been found as of now.
- /// The latest non-trivia child is continually saved.
- NoneFound,
- /// The search has concluded by finding a node that fully contains the
- /// modifications.
- Contained(NodePos),
- /// The search has found the start of the modified nodes.
- Inside(NodePos),
- /// The search has found the end of the modified nodes but the change
- /// touched its boundries so another non-trivia node is needed.
- RequireNonTrivia(NodePos),
- /// The search has concluded by finding a start and an end index for nodes
- /// with a pending reparse.
- SpanFound(NodePos, NodePos),
-}
-
-impl Default for SearchState {
- fn default() -> Self {
- Self::NoneFound
- }
-}
-
-impl SearchState {
- fn done(self) -> Option<(NodePos, NodePos)> {
- match self {
- Self::NoneFound => None,
- Self::Contained(s) => Some((s, s)),
- Self::Inside(_) => None,
- Self::RequireNonTrivia(_) => None,
- Self::SpanFound(s, e) => Some((s, e)),
- }
- }
-}
-
-/// The position of a syntax node.
-#[derive(Clone, Copy, Debug, PartialEq)]
-struct NodePos {
- /// The index in the parent node.
- idx: usize,
- /// The byte offset in the string.
- offset: usize,
-}
-
-/// An ahead node with an index and whether it is `at_start`.
-#[derive(Clone, Copy, Debug, PartialEq)]
-struct Ahead {
- /// The position of the node.
- pos: NodePos,
- /// The `at_start` before this node.
- at_start: bool,
- /// The kind of ahead node.
- kind: AheadKind,
-}
-
-/// The kind of ahead node.
-#[derive(Clone, Copy, Debug, PartialEq)]
-enum AheadKind {
- /// A normal non-trivia child has been found.
- Normal,
- /// An unbounded child has been found. The boolean indicates whether it was
- /// on the current line, in which case adding it to the reparsing range is
- /// compulsory.
- Unbounded(bool),
-}
-
-impl Ahead {
- fn new(pos: NodePos, at_start: bool, bounded: bool) -> Self {
- Self {
- pos,
- at_start,
- kind: if bounded { AheadKind::Normal } else { AheadKind::Unbounded(true) },
- }
- }
-
- fn newline(&mut self) {
- if let AheadKind::Unbounded(current_line) = &mut self.kind {
- *current_line = false;
- }
- }
-
- fn is_compulsory(self) -> bool {
- matches!(self.kind, AheadKind::Unbounded(true))
- }
-}
-
-/// Which reparse function to choose for a span of elements.
-#[derive(Clone, Copy, Debug, PartialEq)]
-enum ReparseMode {
- /// Reparse a code block, including its braces.
- Code,
- /// Reparse a content block, including its square brackets.
- Content,
- /// Reparse elements of the markup. Also specified the initial `at_start`
- /// state for the reparse and the minimum indent of the reparsed nodes.
- MarkupElements { at_start: bool, min_indent: usize },
-}
-
-/// Whether changes _inside_ this node are safely encapsulated, so that only
-/// this node must be reparsed.
-fn is_bounded(kind: SyntaxKind) -> bool {
- matches!(
- kind,
- SyntaxKind::CodeBlock
- | SyntaxKind::ContentBlock
- | SyntaxKind::Linebreak
- | SyntaxKind::SmartQuote
- | SyntaxKind::BlockComment
- | SyntaxKind::Space { .. }
- | SyntaxKind::Escape
- | SyntaxKind::Shorthand
- )
-}
-
-/// Whether `at_start` would still be true after this node given the
-/// previous value of the property.
-fn next_at_start(kind: SyntaxKind, prev: bool) -> bool {
- match kind {
- SyntaxKind::Space { newlines: (1..) } => true,
- SyntaxKind::Space { .. } | SyntaxKind::LineComment | SyntaxKind::BlockComment => {
- prev
- }
- _ => false,
- }
-}
-
-#[cfg(test)]
-#[rustfmt::skip]
-mod tests {
- use std::fmt::Debug;
-
- use super::*;
- use super::super::{parse, Source};
-
- #[track_caller]
- fn check<T>(text: &str, found: T, expected: T)
- where
- T: Debug + PartialEq,
- {
- if found != expected {
- println!("source: {text:?}");
- println!("expected: {expected:#?}");
- println!("found: {found:#?}");
- panic!("test failed");
- }
- }
-
- #[track_caller]
- fn test(prev: &str, range: Range<usize>, with: &str, goal: Range<usize>) {
- let mut source = Source::detached(prev);
- let range = source.edit(range, with);
- check(source.text(), source.root(), &parse(source.text()));
- assert_eq!(range, goal);
- }
-
- #[test]
- fn test_parse_incremental_simple_replacements() {
- test("hello world", 7 .. 12, "walkers", 0 .. 14);
- test("some content", 0..12, "", 0..0);
- test("", 0..0, "do it", 0..5);
- test("a d e", 1 .. 3, " b c d", 0 .. 9);
- test("*~ *", 2..2, "*", 0..5);
- test("_1_\n2a\n3", 5..5, "4", 4..7);
- test("_1_\n2a\n3~", 8..8, "4", 4..10);
- test("_1_ 2 3a\n4", 7..7, "5", 0..9);
- test("* {1+2} *", 5..6, "3", 2..7);
- test("a #f() e", 1 .. 6, " b c d", 0 .. 9);
- test("a\nb\nc\nd\ne\n", 5 .. 5, "c", 2 .. 7);
- test("a\n\nb\n\nc\n\nd\n\ne\n", 7 .. 7, "c", 3 .. 10);
- test("a\nb\nc *hel a b lo* d\nd\ne", 13..13, "c ", 4..20);
- test("~~ {a} ~~", 4 .. 5, "b", 3 .. 6);
- test("{(0, 1, 2)}", 5 .. 6, "11pt", 0..14);
- test("\n= A heading", 4 .. 4, "n evocative", 0 .. 23);
- test("for~your~thing", 9 .. 9, "a", 0 .. 15);
- test("a your thing a", 6 .. 7, "a", 0 .. 14);
- test("{call(); abc}", 7 .. 7, "[]", 0 .. 15);
- test("#call() abc", 7 .. 7, "[]", 0 .. 10);
- test("hi[\n- item\n- item 2\n - item 3]", 11 .. 11, " ", 2 .. 35);
- test("hi\n- item\nno item\n - item 3", 10 .. 10, "- ", 3..19);
- test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 0..99);
- test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33..42);
- test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 33 .. 40);
- test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 0 .. 33);
- test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 0 .. 33);
- test("hello~~{x}", 7 .. 10, "#f()", 0 .. 11);
- test("this~is -- in my opinion -- spectacular", 8 .. 10, "---", 0 .. 25);
- test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 22);
- test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17);
- test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 18);
- test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18);
- test("#for", 4 .. 4, "//", 0 .. 6);
- test("#show f: a => b..", 16..16, "c", 0..18);
- test("a\n#let \nb", 7 .. 7, "i", 2 .. 9);
- test("a\n#for i \nb", 9 .. 9, "in", 2 .. 12);
- test("a~https://fun/html", 13..14, "n", 0..18);
- }
-
- #[test]
- fn test_parse_incremental_whitespace_invariants() {
- test("hello \\ world", 7 .. 8, "a ", 0 .. 14);
- test("hello \\ world", 7 .. 8, " a", 0 .. 14);
- test("x = y", 1 .. 1, " + y", 0 .. 6);
- test("x = y", 1 .. 1, " + y\n", 0 .. 7);
- test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21);
- test("abc\n= a heading\njoke", 3 .. 4, "\nnot ", 0 .. 19);
- test("#let x = (1, 2 + ;~ Five\r\n\r", 20 .. 23, "2.", 0 .. 23);
- test("hey #myfriend", 4 .. 4, "\\", 0 .. 14);
- test("hey #myfriend", 4 .. 4, "\\", 0 .. 6);
- test("= foo\nbar\n - a\n - b", 6 .. 9, "", 0 .. 11);
- test("= foo\n bar\n baz", 6 .. 8, "", 0 .. 9);
- test(" // hi", 1 .. 1, " ", 0 .. 7);
- test("- \nA", 2..3, "", 0..3);
- }
-
- #[test]
- fn test_parse_incremental_type_invariants() {
- test("a #for x in array {x}", 18 .. 21, "[#x]", 0 .. 22);
- test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 11);
- test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16);
- test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 13);
- test("{let x = 1 {5}}", 4 .. 4, " if", 0 .. 18);
- test("a // b c #f()", 3 .. 4, "", 0 .. 12);
- test("{\nf()\n//g(a)\n}", 6 .. 8, "", 0 .. 12);
- test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13);
- test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26);
- test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14);
- test("a b c", 1 .. 1, "{[}", 0 .. 8);
- }
-
- #[test]
- fn test_parse_incremental_wrongly_or_unclosed_things() {
- test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6);
- test(r"this \u{abcd}", 8 .. 9, "", 0 .. 12);
- test(r"this \u{abcd} that", 12 .. 13, "", 0 .. 17);
- test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", 0 .. 24);
- test("a b c", 1 .. 1, " /* letters */", 0 .. 19);
- test("a b c", 1 .. 1, " /* letters", 0 .. 16);
- test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 0 .. 41);
- test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38);
- test("~~~~", 2 .. 2, "[]", 0 .. 5);
- test("a[]b", 2 .. 2, "{", 1 .. 4);
- test("[hello]", 2 .. 3, "]", 0 .. 7);
- test("{a}", 1 .. 2, "b", 0 .. 3);
- test("{ a; b; c }", 5 .. 6, "[}]", 0 .. 13);
- test("#a()\n~", 3..4, "{}", 0..7);
- test("[]\n~", 1..2, "#if i==0 {true}", 0..18);
- }
-}
diff --git a/src/syntax/reparser.rs b/src/syntax/reparser.rs
new file mode 100644
index 00000000..9404055d
--- /dev/null
+++ b/src/syntax/reparser.rs
@@ -0,0 +1,262 @@
+use std::ops::Range;
+
+use super::{
+ is_newline, parse, reparse_block, reparse_markup, Span, SyntaxKind, SyntaxNode,
+};
+
+/// Refresh the given syntax node with as little parsing as possible.
+///
+/// Takes the new text, the range in the old text that was replaced and the
+/// length of the replacement and returns the range in the new text that was
+/// ultimately reparsed.
+///
+/// The high-level API for this function is
+/// [`Source::edit`](super::Source::edit).
+pub fn reparse(
+ root: &mut SyntaxNode,
+ text: &str,
+ replaced: Range<usize>,
+ replacement_len: usize,
+) -> Range<usize> {
+ try_reparse(text, replaced, replacement_len, None, root, 0).unwrap_or_else(|| {
+ let id = root.span().source();
+ *root = parse(text);
+ root.numberize(id, Span::FULL).unwrap();
+ 0..text.len()
+ })
+}
+
+/// Try to reparse inside the given node.
+fn try_reparse(
+ text: &str,
+ replaced: Range<usize>,
+ replacement_len: usize,
+ parent_kind: Option<SyntaxKind>,
+ node: &mut SyntaxNode,
+ offset: usize,
+) -> Option<Range<usize>> {
+ // The range of children which overlap with the edit.
+ let mut overlap = usize::MAX..0;
+ let mut cursor = offset;
+ let node_kind = node.kind();
+
+ for (i, child) in node.children_mut().iter_mut().enumerate() {
+ let prev_range = cursor..cursor + child.len();
+ let prev_len = child.len();
+ let prev_desc = child.descendants();
+
+ // Does the child surround the edit?
+ // If so, try to reparse within it or itself.
+ if !child.is_leaf() && includes(&prev_range, &replaced) {
+ let new_len = prev_len + replacement_len - replaced.len();
+ let new_range = cursor..cursor + new_len;
+
+ // Try to reparse within the child.
+ if let Some(range) = try_reparse(
+ text,
+ replaced.clone(),
+ replacement_len,
+ Some(node_kind),
+ child,
+ cursor,
+ ) {
+ assert_eq!(child.len(), new_len);
+ let new_desc = child.descendants();
+ node.update_parent(prev_len, new_len, prev_desc, new_desc);
+ return Some(range);
+ }
+
+ // If the child is a block, try to reparse the block.
+ if child.kind().is_block() {
+ if let Some(newborn) = reparse_block(text, new_range.clone()) {
+ return node
+ .replace_children(i..i + 1, vec![newborn])
+ .is_ok()
+ .then(|| new_range);
+ }
+ }
+ }
+
+ // Does the child overlap with the edit?
+ if overlaps(&prev_range, &replaced) {
+ overlap.start = overlap.start.min(i);
+ overlap.end = i + 1;
+ }
+
+ // Is the child beyond the edit?
+ if replaced.end < cursor {
+ break;
+ }
+
+ cursor += child.len();
+ }
+
+ // Try to reparse a range of markup expressions within markup. This is only
+ // possible if the markup is top-level or contained in a block, not if it is
+ // contained in things like headings or lists because too much can go wrong
+ // with indent and line breaks.
+ if node.kind() == SyntaxKind::Markup
+ && (parent_kind == None || parent_kind == Some(SyntaxKind::ContentBlock))
+ && !overlap.is_empty()
+ {
+ // Add one node of slack in both directions.
+ let children = node.children_mut();
+ let mut start = overlap.start.saturating_sub(1);
+ let mut end = (overlap.end + 1).min(children.len());
+
+ // Expand to the left.
+ while start > 0 && expand(&children[start]) {
+ start -= 1;
+ }
+
+ // Expand to the right.
+ while end < children.len() && expand(&children[end]) {
+ end += 1;
+ }
+
+ // Synthesize what `at_start` would be at the start of the reparse.
+ let mut prefix_len = 0;
+ let mut at_start = true;
+ for child in &children[..start] {
+ prefix_len += child.len();
+ next_at_start(child, &mut at_start);
+ }
+
+ // Determine what `at_start` will have to be at the end of the reparse.
+ let mut prev_len = 0;
+ let mut prev_at_start_after = at_start;
+ for child in &children[start..end] {
+ prev_len += child.len();
+ next_at_start(child, &mut prev_at_start_after);
+ }
+
+ let shifted = offset + prefix_len;
+ let new_len = prev_len + replacement_len - replaced.len();
+ let new_range = shifted..shifted + new_len;
+ let stop_kind = match parent_kind {
+ Some(_) => SyntaxKind::RightBracket,
+ None => SyntaxKind::Eof,
+ };
+
+ if let Some(newborns) =
+ reparse_markup(text, new_range.clone(), &mut at_start, |kind| {
+ kind == stop_kind
+ })
+ {
+ if at_start == prev_at_start_after {
+ return node
+ .replace_children(start..end, newborns)
+ .is_ok()
+ .then(|| new_range);
+ }
+ }
+ }
+
+ None
+}
+
+/// Whether the inner range is fully contained in the outer one (no touching).
+fn includes(outer: &Range<usize>, inner: &Range<usize>) -> bool {
+ outer.start < inner.start && outer.end > inner.end
+}
+
+/// Whether the first and second range overlap or touch.
+fn overlaps(first: &Range<usize>, second: &Range<usize>) -> bool {
+ (first.start <= second.start && second.start <= first.end)
+ || (second.start <= first.start && first.start <= second.end)
+}
+
+/// Whether the selection should be expanded beyond a node of this kind.
+fn expand(node: &SyntaxNode) -> bool {
+ let kind = node.kind();
+ kind.is_trivia()
+ || kind.is_error()
+ || kind == SyntaxKind::Semicolon
+ || node.text() == "/"
+ || node.text() == ":"
+}
+
+/// Whether `at_start` would still be true after this node given the
+/// previous value of the property.
+fn next_at_start(node: &SyntaxNode, at_start: &mut bool) {
+ if node.kind().is_trivia() {
+ if node.text().chars().any(is_newline) {
+ *at_start = true;
+ }
+ } else {
+ *at_start = false;
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use std::ops::Range;
+
+ use super::super::{parse, Source};
+
+ #[track_caller]
+ fn test(prev: &str, range: Range<usize>, with: &str, incremental: bool) {
+ let mut source = Source::detached(prev);
+ let prev = source.root().clone();
+ let range = source.edit(range, with);
+ let found = source.root();
+ let expected = parse(source.text());
+ if found != &expected {
+ eprintln!("source: {:?}", source.text());
+ eprintln!("previous: {prev:#?}");
+ eprintln!("expected: {expected:#?}");
+ eprintln!("found: {found:#?}");
+ panic!("test failed");
+ }
+ if incremental {
+ assert_ne!(source.len_bytes(), range.len());
+ } else {
+ assert_eq!(source.len_bytes(), range.len());
+ }
+ }
+
+ #[test]
+ fn test_reparse_markup() {
+ test("abc~def~ghi", 5..6, "+", true);
+ test("~~~~~~~", 3..4, "A", true);
+ test("abc~~", 1..2, "", true);
+ test("#var;hello", 9..10, "a", false);
+ test("https:/world", 7..7, "/", false);
+ test("hello world", 7..12, "walkers", false);
+ test("some content", 0..12, "", false);
+ test("", 0..0, "do it", false);
+ test("a d e", 1..3, " b c d", false);
+ test("~*~*~", 2..2, "*", false);
+ test("::1\n2. a\n3", 7..7, "4", true);
+ test("* {1+2} *", 5..6, "3", true);
+ test("{(0, 1, 2)}", 5..6, "11pt", false);
+ test("\n= A heading", 4..4, "n evocative", false);
+ test("#call() abc~d", 7..7, "[]", true);
+ test("a your thing a", 6..7, "a", false);
+ test("#grid(columns: (auto, 1fr, 40%))", 16..20, "4pt", false);
+ test("abc\n= a heading\njoke", 3..4, "\nmore\n\n", true);
+ test("#show f: a => b..", 16..16, "c", false);
+ test("#for", 4..4, "//", false);
+ test("a\n#let \nb", 7..7, "i", true);
+ test("#let x = (1, 2 + ;~ Five\r\n\r", 20..23, "2.", true);
+ test(r"{{let x = z}; a = 1} b", 6..6, "//", false);
+ test(r#"a ```typst hello```"#, 16..17, "", false);
+ }
+
+ #[test]
+ fn test_reparse_block() {
+ test("Hello { x + 1 }!", 8..9, "abc", true);
+ test("A{}!", 2..2, "\"", false);
+ test("{ [= x] }!", 4..4, "=", true);
+ test("[[]]", 2..2, "\\", false);
+ test("[[ab]]", 3..4, "\\", false);
+ test("{}}", 1..1, "{", false);
+ test("A: [BC]", 5..5, "{", false);
+ test("A: [BC]", 5..5, "{}", true);
+ test("{\"ab\"}A", 4..4, "c", true);
+ test("{\"ab\"}A", 4..5, "c", false);
+ test("a[]b", 2..2, "{", false);
+ test("a{call(); abc}b", 7..7, "[]", true);
+ test("a #while x {\n g(x) \n} b", 12..12, "//", true);
+ }
+}
diff --git a/src/syntax/source.rs b/src/syntax/source.rs
index 41805a60..472e8c6c 100644
--- a/src/syntax/source.rs
+++ b/src/syntax/source.rs
@@ -9,8 +9,7 @@ use comemo::Prehashed;
use unscanny::Scanner;
use super::ast::Markup;
-use super::reparse::reparse;
-use super::{is_newline, parse, Span, SyntaxNode};
+use super::{is_newline, parse, reparse, Span, SyntaxNode};
use crate::diag::SourceResult;
use crate::util::{PathExt, StrExt};