diff options
| author | Laurenz <laurmaedje@gmail.com> | 2024-02-21 09:38:47 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-02-21 08:38:47 +0000 |
| commit | be49935753f0e37ae8e04fb53111e6f116c63f47 (patch) | |
| tree | 0fbc875af61bdedb0d0de42b8809bdb4aae8586f /crates/typst-syntax | |
| parent | b2e509d472634fd5dd43514dbde24eedab566abd (diff) | |
Destructuring improvements (#3463)
Diffstat (limited to 'crates/typst-syntax')
| -rw-r--r-- | crates/typst-syntax/src/ast.rs | 240 | ||||
| -rw-r--r-- | crates/typst-syntax/src/kind.rs | 2 | ||||
| -rw-r--r-- | crates/typst-syntax/src/node.rs | 37 | ||||
| -rw-r--r-- | crates/typst-syntax/src/parser.rs | 1112 | ||||
| -rw-r--r-- | crates/typst-syntax/src/set.rs | 226 |
5 files changed, 825 insertions, 792 deletions
diff --git a/crates/typst-syntax/src/ast.rs b/crates/typst-syntax/src/ast.rs index 6f4b52f0..6dd9b5f6 100644 --- a/crates/typst-syntax/src/ast.rs +++ b/crates/typst-syntax/src/ast.rs @@ -1157,9 +1157,18 @@ node! { impl<'a> Parenthesized<'a> { /// The wrapped expression. + /// + /// Should only be accessed if this is contained in an `Expr`. pub fn expr(self) -> Expr<'a> { self.0.cast_first_match().unwrap_or_default() } + + /// The wrapped pattern. + /// + /// Should only be accessed if this is contained in a `Pattern`. + pub fn pattern(self) -> Pattern<'a> { + self.0.cast_first_match().unwrap_or_default() + } } node! { @@ -1180,13 +1189,13 @@ pub enum ArrayItem<'a> { /// A bare expression: `12`. Pos(Expr<'a>), /// A spread expression: `..things`. - Spread(Expr<'a>), + Spread(Spread<'a>), } impl<'a> AstNode<'a> for ArrayItem<'a> { fn from_untyped(node: &'a SyntaxNode) -> Option<Self> { match node.kind() { - SyntaxKind::Spread => node.cast_first_match().map(Self::Spread), + SyntaxKind::Spread => node.cast().map(Self::Spread), _ => node.cast().map(Self::Pos), } } @@ -1219,7 +1228,7 @@ pub enum DictItem<'a> { /// A keyed pair: `"spacy key": true`. Keyed(Keyed<'a>), /// A spread expression: `..things`. - Spread(Expr<'a>), + Spread(Spread<'a>), } impl<'a> AstNode<'a> for DictItem<'a> { @@ -1227,7 +1236,7 @@ impl<'a> AstNode<'a> for DictItem<'a> { match node.kind() { SyntaxKind::Named => node.cast().map(Self::Named), SyntaxKind::Keyed => node.cast().map(Self::Keyed), - SyntaxKind::Spread => node.cast_first_match().map(Self::Spread), + SyntaxKind::Spread => node.cast().map(Self::Spread), _ => Option::None, } } @@ -1253,13 +1262,19 @@ impl<'a> Named<'a> { } /// The right-hand side of the pair: `3pt`. + /// + /// This should only be accessed if this `Named` is contained in a + /// `DictItem`, `Arg`, or `Param`. pub fn expr(self) -> Expr<'a> { self.0.cast_last_match().unwrap_or_default() } - /// The right-hand side of the pair as an identifier. - pub fn expr_ident(self) -> Option<Ident<'a>> { - self.0.cast_last_match() + /// The right-hand side of the pair as a pattern. + /// + /// This should only be accessed if this `Named` is contained in a + /// `Destructuring`. + pub fn pattern(self) -> Pattern<'a> { + self.0.cast_last_match().unwrap_or_default() } } @@ -1275,12 +1290,46 @@ impl<'a> Keyed<'a> { } /// The right-hand side of the pair: `true`. + /// + /// This should only be accessed if this `Keyed` is contained in a + /// `DictItem`. pub fn expr(self) -> Expr<'a> { self.0.cast_last_match().unwrap_or_default() } } node! { + /// A spread: `..x` or `..x.at(0)`. + Spread +} + +impl<'a> Spread<'a> { + /// The spreaded expression. + /// + /// This should only be accessed if this `Spread` is contained in an + /// `ArrayItem`, `DictItem`, or `Arg`. + pub fn expr(self) -> Expr<'a> { + self.0.cast_first_match().unwrap_or_default() + } + + /// The sink identifier, if present. + /// + /// This should only be accessed if this `Spread` is contained in a + /// `Param` or binding `DestructuringItem`. + pub fn sink_ident(self) -> Option<Ident<'a>> { + self.0.cast_first_match() + } + + /// The sink expressions, if present. + /// + /// This should only be accessed if this `Spread` is contained in a + /// `DestructuringItem`. + pub fn sink_expr(self) -> Option<Expr<'a>> { + self.0.cast_first_match() + } +} + +node! { /// A unary operation: `-x`. Unary } @@ -1591,14 +1640,14 @@ pub enum Arg<'a> { /// A named argument: `draw: false`. Named(Named<'a>), /// A spread argument: `..things`. - Spread(Expr<'a>), + Spread(Spread<'a>), } impl<'a> AstNode<'a> for Arg<'a> { fn from_untyped(node: &'a SyntaxNode) -> Option<Self> { match node.kind() { SyntaxKind::Named => node.cast().map(Self::Named), - SyntaxKind::Spread => node.cast_first_match().map(Self::Spread), + SyntaxKind::Spread => node.cast().map(Self::Spread), _ => node.cast().map(Self::Pos), } } @@ -1648,28 +1697,6 @@ impl<'a> Params<'a> { } } -node! { - /// A spread: `..x` or `..x.at(0)`. - Spread -} - -impl<'a> Spread<'a> { - /// Try to get an identifier. - pub fn name(self) -> Option<Ident<'a>> { - self.0.cast_first_match() - } - - /// Try to get an expression. - pub fn expr(self) -> Option<Expr<'a>> { - self.0.cast_first_match() - } -} - -node! { - /// An underscore: `_` - Underscore -} - /// A parameter to a closure. #[derive(Debug, Copy, Clone, Hash)] pub enum Param<'a> { @@ -1677,15 +1704,15 @@ pub enum Param<'a> { Pos(Pattern<'a>), /// A named parameter with a default value: `draw: false`. Named(Named<'a>), - /// An argument sink: `..args`. - Sink(Spread<'a>), + /// An argument sink: `..args` or `..`. + Spread(Spread<'a>), } impl<'a> AstNode<'a> for Param<'a> { fn from_untyped(node: &'a SyntaxNode) -> Option<Self> { match node.kind() { SyntaxKind::Named => node.cast().map(Self::Named), - SyntaxKind::Spread => node.cast().map(Self::Sink), + SyntaxKind::Spread => node.cast().map(Self::Spread), _ => node.cast().map(Self::Pos), } } @@ -1694,62 +1721,7 @@ impl<'a> AstNode<'a> for Param<'a> { match self { Self::Pos(v) => v.to_untyped(), Self::Named(v) => v.to_untyped(), - Self::Sink(v) => v.to_untyped(), - } - } -} - -node! { - /// A destructuring pattern: `x` or `(x, _, ..y)`. - Destructuring -} - -impl<'a> Destructuring<'a> { - /// The bindings of the destructuring. - pub fn bindings(self) -> impl DoubleEndedIterator<Item = DestructuringKind<'a>> { - self.0.children().filter_map(SyntaxNode::cast) - } - - /// Returns a list of all identifiers in the pattern. - pub fn idents(self) -> impl DoubleEndedIterator<Item = Ident<'a>> { - self.bindings().filter_map(|binding| match binding { - DestructuringKind::Normal(Expr::Ident(ident)) => Some(ident), - DestructuringKind::Sink(spread) => spread.name(), - DestructuringKind::Named(named) => named.expr_ident(), - _ => Option::None, - }) - } -} - -/// The kind of an element in a destructuring pattern. -#[derive(Debug, Copy, Clone, Hash)] -pub enum DestructuringKind<'a> { - /// An expression: `x`. - Normal(Expr<'a>), - /// An argument sink: `..y`. - Sink(Spread<'a>), - /// Named arguments: `x: 1`. - Named(Named<'a>), - /// A placeholder: `_`. - Placeholder(Underscore<'a>), -} - -impl<'a> AstNode<'a> for DestructuringKind<'a> { - fn from_untyped(node: &'a SyntaxNode) -> Option<Self> { - match node.kind() { - SyntaxKind::Named => node.cast().map(Self::Named), - SyntaxKind::Spread => node.cast().map(Self::Sink), - SyntaxKind::Underscore => node.cast().map(Self::Placeholder), - _ => node.cast().map(Self::Normal), - } - } - - fn to_untyped(self) -> &'a SyntaxNode { - match self { - Self::Normal(v) => v.to_untyped(), - Self::Named(v) => v.to_untyped(), - Self::Sink(v) => v.to_untyped(), - Self::Placeholder(v) => v.to_untyped(), + Self::Spread(v) => v.to_untyped(), } } } @@ -1761,6 +1733,8 @@ pub enum Pattern<'a> { Normal(Expr<'a>), /// A placeholder: `_`. Placeholder(Underscore<'a>), + /// A parenthesized pattern. + Parenthesized(Parenthesized<'a>), /// A destructuring pattern: `(x, _, ..y)`. Destructuring(Destructuring<'a>), } @@ -1768,8 +1742,9 @@ pub enum Pattern<'a> { impl<'a> AstNode<'a> for Pattern<'a> { fn from_untyped(node: &'a SyntaxNode) -> Option<Self> { match node.kind() { - SyntaxKind::Destructuring => node.cast().map(Self::Destructuring), SyntaxKind::Underscore => node.cast().map(Self::Placeholder), + SyntaxKind::Parenthesized => node.cast().map(Self::Parenthesized), + SyntaxKind::Destructuring => node.cast().map(Self::Destructuring), _ => node.cast().map(Self::Normal), } } @@ -1777,18 +1752,20 @@ impl<'a> AstNode<'a> for Pattern<'a> { fn to_untyped(self) -> &'a SyntaxNode { match self { Self::Normal(v) => v.to_untyped(), - Self::Destructuring(v) => v.to_untyped(), Self::Placeholder(v) => v.to_untyped(), + Self::Parenthesized(v) => v.to_untyped(), + Self::Destructuring(v) => v.to_untyped(), } } } impl<'a> Pattern<'a> { - /// Returns a list of all identifiers in the pattern. - pub fn idents(self) -> Vec<Ident<'a>> { + /// Returns a list of all new bindings introduced by the pattern. + pub fn bindings(self) -> Vec<Ident<'a>> { match self { - Pattern::Normal(Expr::Ident(ident)) => vec![ident], - Pattern::Destructuring(destruct) => destruct.idents().collect(), + Self::Normal(Expr::Ident(ident)) => vec![ident], + Self::Parenthesized(v) => v.pattern().bindings(), + Self::Destructuring(v) => v.bindings(), _ => vec![], } } @@ -1801,6 +1778,65 @@ impl Default for Pattern<'_> { } node! { + /// An underscore: `_` + Underscore +} + +node! { + /// A destructuring pattern: `x` or `(x, _, ..y)`. + Destructuring +} + +impl<'a> Destructuring<'a> { + /// The items of the destructuring. + pub fn items(self) -> impl DoubleEndedIterator<Item = DestructuringItem<'a>> { + self.0.children().filter_map(SyntaxNode::cast) + } + + /// Returns a list of all new bindings introduced by the destructuring. + pub fn bindings(self) -> Vec<Ident<'a>> { + self.items() + .flat_map(|binding| match binding { + DestructuringItem::Pattern(pattern) => pattern.bindings(), + DestructuringItem::Named(named) => named.pattern().bindings(), + DestructuringItem::Spread(spread) => { + spread.sink_ident().into_iter().collect() + } + }) + .collect() + } +} + +/// The kind of an element in a destructuring pattern. +#[derive(Debug, Copy, Clone, Hash)] +pub enum DestructuringItem<'a> { + /// A sub-pattern: `x`. + Pattern(Pattern<'a>), + /// A renamed destructuring: `x: y`. + Named(Named<'a>), + /// A destructuring sink: `..y` or `..`. + Spread(Spread<'a>), +} + +impl<'a> AstNode<'a> for DestructuringItem<'a> { + fn from_untyped(node: &'a SyntaxNode) -> Option<Self> { + match node.kind() { + SyntaxKind::Named => node.cast().map(Self::Named), + SyntaxKind::Spread => node.cast().map(Self::Spread), + _ => node.cast().map(Self::Pattern), + } + } + + fn to_untyped(self) -> &'a SyntaxNode { + match self { + Self::Pattern(v) => v.to_untyped(), + Self::Named(v) => v.to_untyped(), + Self::Spread(v) => v.to_untyped(), + } + } +} + +node! { /// A let binding: `let x = 1`. LetBinding } @@ -1815,13 +1851,11 @@ pub enum LetBindingKind<'a> { } impl<'a> LetBindingKind<'a> { - /// Returns a list of all identifiers in the pattern. - pub fn idents(self) -> Vec<Ident<'a>> { + /// Returns a list of all new bindings introduced by the let binding. + pub fn bindings(self) -> Vec<Ident<'a>> { match self { - LetBindingKind::Normal(pattern) => pattern.idents(), - LetBindingKind::Closure(ident) => { - vec![ident] - } + LetBindingKind::Normal(pattern) => pattern.bindings(), + LetBindingKind::Closure(ident) => vec![ident], } } } @@ -1840,7 +1874,7 @@ impl<'a> LetBinding<'a> { /// The expression the binding is initialized with. pub fn init(self) -> Option<Expr<'a>> { match self.kind() { - LetBindingKind::Normal(Pattern::Normal(_)) => { + LetBindingKind::Normal(Pattern::Normal(_) | Pattern::Parenthesized(_)) => { self.0.children().filter_map(SyntaxNode::cast).nth(1) } LetBindingKind::Normal(_) => self.0.cast_first_match(), diff --git a/crates/typst-syntax/src/kind.rs b/crates/typst-syntax/src/kind.rs index a772175e..536c9381 100644 --- a/crates/typst-syntax/src/kind.rs +++ b/crates/typst-syntax/src/kind.rs @@ -136,7 +136,7 @@ pub enum SyntaxKind { StarEq, /// The divide-assign operator: `/=`. SlashEq, - /// The spread operator: `..`. + /// Indicates a spread or sink: `..`. Dots, /// An arrow between a closure's parameters and body: `=>`. Arrow, diff --git a/crates/typst-syntax/src/node.rs b/crates/typst-syntax/src/node.rs index fed7049c..3c93cd84 100644 --- a/crates/typst-syntax/src/node.rs +++ b/crates/typst-syntax/src/node.rs @@ -3,7 +3,7 @@ use std::ops::{Deref, Range}; use std::rc::Rc; use std::sync::Arc; -use ecow::{eco_vec, EcoString, EcoVec}; +use ecow::{eco_format, eco_vec, EcoString, EcoVec}; use crate::ast::AstNode; use crate::{FileId, Span, SyntaxKind}; @@ -177,14 +177,9 @@ impl SyntaxNode { } impl SyntaxNode { - /// Mark this node as erroneous. - pub(super) fn make_erroneous(&mut self) { - if let Repr::Inner(inner) = &mut self.0 { - Arc::make_mut(inner).erroneous = true; - } - } - /// Convert the child to another kind. + /// + /// Don't use this for converting to an error! #[track_caller] pub(super) fn convert_to_kind(&mut self, kind: SyntaxKind) { debug_assert!(!kind.is_error()); @@ -195,10 +190,30 @@ impl SyntaxNode { } } - /// Convert the child to an error. + /// Convert the child to an error, if it isn't already one. pub(super) fn convert_to_error(&mut self, message: impl Into<EcoString>) { - let text = std::mem::take(self).into_text(); - *self = SyntaxNode::error(message, text); + if !self.kind().is_error() { + let text = std::mem::take(self).into_text(); + *self = SyntaxNode::error(message, text); + } + } + + /// Convert the child to an error stating that the given thing was + /// expected, but the current kind was found. + pub(super) fn expected(&mut self, expected: &str) { + let kind = self.kind(); + self.convert_to_error(eco_format!("expected {expected}, found {}", kind.name())); + if kind.is_keyword() && matches!(expected, "identifier" | "pattern") { + self.hint(eco_format!( + "keyword `{text}` is not allowed as an identifier; try `{text}_` instead", + text = self.text(), + )); + } + } + + /// Convert the child to an error stating it was unexpected. + pub(super) fn unexpected(&mut self) { + self.convert_to_error(eco_format!("unexpected {}", self.kind().name())); } /// Assign spans to each node. diff --git a/crates/typst-syntax/src/parser.rs b/crates/typst-syntax/src/parser.rs index a1bd5ad0..32e15cb7 100644 --- a/crates/typst-syntax/src/parser.rs +++ b/crates/typst-syntax/src/parser.rs @@ -1,5 +1,6 @@ -use std::collections::HashSet; -use std::ops::Range; +use std::collections::{HashMap, HashSet}; +use std::mem; +use std::ops::{Index, IndexMut, Range}; use ecow::{eco_format, EcoString}; use unicode_math_class::MathClass; @@ -145,11 +146,10 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) { /// Parses strong content: `*Strong*`. fn strong(p: &mut Parser) { - const END: SyntaxSet = SyntaxSet::new(&[ - SyntaxKind::Star, - SyntaxKind::Parbreak, - SyntaxKind::RightBracket, - ]); + const END: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Star) + .add(SyntaxKind::Parbreak) + .add(SyntaxKind::RightBracket); let m = p.marker(); p.assert(SyntaxKind::Star); @@ -160,11 +160,10 @@ fn strong(p: &mut Parser) { /// Parses emphasized content: `_Emphasized_`. fn emph(p: &mut Parser) { - const END: SyntaxSet = SyntaxSet::new(&[ - SyntaxKind::Underscore, - SyntaxKind::Parbreak, - SyntaxKind::RightBracket, - ]); + const END: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Underscore) + .add(SyntaxKind::Parbreak) + .add(SyntaxKind::RightBracket); let m = p.marker(); p.assert(SyntaxKind::Underscore); @@ -175,15 +174,16 @@ fn emph(p: &mut Parser) { /// Parses a section heading: `= Introduction`. fn heading(p: &mut Parser) { - const END: SyntaxSet = - SyntaxSet::new(&[SyntaxKind::Label, SyntaxKind::RightBracket, SyntaxKind::Space]); + const END: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Label) + .add(SyntaxKind::RightBracket) + .add(SyntaxKind::Space); let m = p.marker(); p.assert(SyntaxKind::HeadingMarker); whitespace_line(p); markup(p, false, usize::MAX, |p| { - p.at_set(END) - && (!p.at(SyntaxKind::Space) || p.lexer.clone().next() == SyntaxKind::Label) + p.at_set(END) && (!p.at(SyntaxKind::Space) || p.peek() == SyntaxKind::Label) }); p.wrap(m, SyntaxKind::Heading); } @@ -211,7 +211,7 @@ fn enum_item(p: &mut Parser) { /// Parses an item in a term list: `/ Term: Details`. fn term_item(p: &mut Parser) { const TERM_END: SyntaxSet = - SyntaxSet::new(&[SyntaxKind::Colon, SyntaxKind::RightBracket]); + SyntaxSet::new().add(SyntaxKind::Colon).add(SyntaxKind::RightBracket); let m = p.marker(); p.assert(SyntaxKind::TermMarker); @@ -615,11 +615,7 @@ fn code_exprs(p: &mut Parser, mut stop: impl FnMut(&Parser) -> bool) { /// Parses a single code expression. fn code_expr(p: &mut Parser) { - code_expr_prec(p, false, 0, false) -} - -fn code_expr_or_pattern(p: &mut Parser) { - code_expr_prec(p, false, 0, true) + code_expr_prec(p, false, 0) } /// Parses a code expression embedded in markup or math. @@ -631,7 +627,7 @@ fn embedded_code_expr(p: &mut Parser) { let stmt = p.at_set(set::STMT); let at = p.at_set(set::ATOMIC_CODE_EXPR); - code_expr_prec(p, true, 0, false); + code_expr_prec(p, true, 0); // Consume error for things like `#12p` or `#"abc\"`.# if !at && !p.current().is_trivia() && !p.eof() { @@ -650,20 +646,15 @@ fn embedded_code_expr(p: &mut Parser) { } /// Parses a code expression with at least the given precedence. -fn code_expr_prec( - p: &mut Parser, - atomic: bool, - min_prec: usize, - allow_destructuring: bool, -) { +fn code_expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) { let m = p.marker(); if !atomic && p.at_set(set::UNARY_OP) { let op = ast::UnOp::from_kind(p.current()).unwrap(); p.eat(); - code_expr_prec(p, atomic, op.precedence(), false); + code_expr_prec(p, atomic, op.precedence()); p.wrap(m, SyntaxKind::Unary); } else { - code_primary(p, atomic, allow_destructuring); + code_primary(p, atomic); } loop { @@ -675,7 +666,7 @@ fn code_expr_prec( } let at_field_or_method = - p.directly_at(SyntaxKind::Dot) && p.lexer.clone().next() == SyntaxKind::Ident; + p.directly_at(SyntaxKind::Dot) && p.peek() == SyntaxKind::Ident; if atomic && !at_field_or_method { break; @@ -713,7 +704,7 @@ fn code_expr_prec( } p.eat(); - code_expr_prec(p, false, prec, false); + code_expr_prec(p, false, prec); p.wrap(m, SyntaxKind::Binary); continue; } @@ -725,7 +716,7 @@ fn code_expr_prec( /// Parses an primary in a code expression. These are the atoms that unary and /// binary operations, functions calls, and field accesses start with / are /// composed of. -fn code_primary(p: &mut Parser, atomic: bool, allow_destructuring: bool) { +fn code_primary(p: &mut Parser, atomic: bool) { let m = p.marker(); match p.current() { SyntaxKind::Ident => { @@ -744,14 +735,17 @@ fn code_primary(p: &mut Parser, atomic: bool, allow_destructuring: bool) { p.eat(); code_expr(p); p.wrap(m, SyntaxKind::Closure); - } else if let Some(underscore) = p.node_mut(m) { - underscore.convert_to_error("expected expression, found underscore"); + } else if p.eat_if(SyntaxKind::Eq) { + code_expr(p); + p.wrap(m, SyntaxKind::DestructAssignment); + } else { + p[m].expected("expression"); } } SyntaxKind::LeftBrace => code_block(p), SyntaxKind::LeftBracket => content_block(p), - SyntaxKind::LeftParen => with_paren(p, allow_destructuring), + SyntaxKind::LeftParen => expr_with_paren(p), SyntaxKind::Dollar => equation(p), SyntaxKind::Let => let_binding(p), SyntaxKind::Set => set_rule(p), @@ -799,11 +793,10 @@ pub(super) fn reparse_block(text: &str, range: Range<usize>) -> Option<SyntaxNod /// Parses a code block: `{ let x = 1; x + 2 }`. fn code_block(p: &mut Parser) { - const END: SyntaxSet = SyntaxSet::new(&[ - SyntaxKind::RightBrace, - SyntaxKind::RightBracket, - SyntaxKind::RightParen, - ]); + const END: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::RightBrace) + .add(SyntaxKind::RightBracket) + .add(SyntaxKind::RightParen); let m = p.marker(); p.enter(LexMode::Code); @@ -827,223 +820,6 @@ fn content_block(p: &mut Parser) { p.wrap(m, SyntaxKind::ContentBlock); } -fn with_paren(p: &mut Parser, allow_destructuring: bool) { - let m = p.marker(); - let mut kind = collection(p, true); - if p.at(SyntaxKind::Arrow) { - validate_params_at(p, m); - p.wrap(m, SyntaxKind::Params); - p.assert(SyntaxKind::Arrow); - code_expr(p); - kind = SyntaxKind::Closure; - } else if p.at(SyntaxKind::Eq) && kind != SyntaxKind::Parenthesized { - // TODO: add warning if p.at(SyntaxKind::Eq) && kind == SyntaxKind::Parenthesized - - validate_pattern_at(p, m, false); - p.wrap(m, SyntaxKind::Destructuring); - p.assert(SyntaxKind::Eq); - code_expr(p); - kind = SyntaxKind::DestructAssignment; - } - - match kind { - SyntaxKind::Array if !allow_destructuring => validate_array_at(p, m), - SyntaxKind::Dict if !allow_destructuring => validate_dict_at(p, m), - SyntaxKind::Parenthesized if !allow_destructuring => { - validate_parenthesized_at(p, m) - } - SyntaxKind::Destructuring if !allow_destructuring => { - invalidate_destructuring(p, m) - } - _ => {} - } - p.wrap(m, kind); -} - -fn invalidate_destructuring(p: &mut Parser, m: Marker) { - let mut collection_kind = Option::None; - for child in p.post_process(m) { - match child.kind() { - SyntaxKind::Named | SyntaxKind::Keyed => match collection_kind { - Some(SyntaxKind::Array) => child.convert_to_error(eco_format!( - "expected expression, found {}", - child.kind().name() - )), - _ => collection_kind = Some(SyntaxKind::Dict), - }, - SyntaxKind::LeftParen | SyntaxKind::RightParen | SyntaxKind::Comma => {} - kind => match collection_kind { - Some(SyntaxKind::Dict) => child.convert_to_error(eco_format!( - "expected named or keyed pair, found {}", - kind.name() - )), - _ => collection_kind = Some(SyntaxKind::Array), - }, - } - } -} - -fn collection(p: &mut Parser, keyed: bool) -> SyntaxKind { - p.enter_newline_mode(NewlineMode::Continue); - - let m = p.marker(); - p.assert(SyntaxKind::LeftParen); - - let mut count = 0; - let mut parenthesized = true; - let mut kind = None; - if keyed && p.eat_if(SyntaxKind::Colon) { - kind = Some(SyntaxKind::Dict); - parenthesized = false; - } - - while !p.current().is_terminator() { - let prev = p.prev_end(); - match item(p, keyed) { - SyntaxKind::Spread => parenthesized = false, - SyntaxKind::Named | SyntaxKind::Keyed => { - match kind { - Some(SyntaxKind::Array) => kind = Some(SyntaxKind::Destructuring), - _ => kind = Some(SyntaxKind::Dict), - } - parenthesized = false; - } - SyntaxKind::Int => match kind { - Some(SyntaxKind::Array) | None => kind = Some(SyntaxKind::Array), - Some(_) => kind = Some(SyntaxKind::Destructuring), - }, - _ if kind.is_none() => kind = Some(SyntaxKind::Array), - _ => {} - } - - if !p.progress(prev) { - p.unexpected(); - continue; - } - - count += 1; - - if p.current().is_terminator() { - break; - } - - if p.expect(SyntaxKind::Comma) { - parenthesized = false; - } - } - - p.expect_closing_delimiter(m, SyntaxKind::RightParen); - p.exit_newline_mode(); - - if parenthesized && count == 1 { - SyntaxKind::Parenthesized - } else { - kind.unwrap_or(SyntaxKind::Array) - } -} - -fn item(p: &mut Parser, keyed: bool) -> SyntaxKind { - let m = p.marker(); - - if p.eat_if(SyntaxKind::Dots) { - if p.at(SyntaxKind::Comma) || p.at(SyntaxKind::RightParen) { - p.wrap(m, SyntaxKind::Spread); - return SyntaxKind::Spread; - } - - code_expr(p); - p.wrap(m, SyntaxKind::Spread); - return SyntaxKind::Spread; - } - - if p.at(SyntaxKind::Underscore) { - // This is a temporary workaround to fix `v.map(_ => {})`. - let mut lexer = p.lexer.clone(); - let next = - std::iter::from_fn(|| Some(lexer.next())).find(|kind| !kind.is_trivia()); - if next != Some(SyntaxKind::Arrow) { - p.eat(); - return SyntaxKind::Underscore; - } - } - - code_expr_or_pattern(p); - - if !p.eat_if(SyntaxKind::Colon) { - return SyntaxKind::Int; - } - - if !p.eat_if(SyntaxKind::Underscore) { - code_expr(p); - } - - let kind = match p.node(m).map(SyntaxNode::kind) { - Some(SyntaxKind::Ident) => SyntaxKind::Named, - Some(_) if keyed => SyntaxKind::Keyed, - _ => { - for child in p.post_process(m) { - if child.kind() == SyntaxKind::Colon { - break; - } - - let expected = if keyed { "expression" } else { "identifier" }; - let message = eco_format!( - "expected {expected}, found {found}", - found = child.kind().name(), - ); - child.convert_to_error(message); - } - SyntaxKind::Named - } - }; - - p.wrap(m, kind); - kind -} - -/// Parses a function call's argument list: `(12pt, y)`. -fn args(p: &mut Parser) { - if !p.at(SyntaxKind::LeftParen) && !p.at(SyntaxKind::LeftBracket) { - p.expected("argument list"); - } - - let m = p.marker(); - if p.at(SyntaxKind::LeftParen) { - collection(p, false); - validate_args_at(p, m); - } - - while p.directly_at(SyntaxKind::LeftBracket) { - content_block(p); - } - - p.wrap(m, SyntaxKind::Args); -} - -enum PatternKind { - Ident, - Other, -} - -/// Parses a pattern that can be assigned to. -fn pattern(p: &mut Parser) -> PatternKind { - let m = p.marker(); - if p.at(SyntaxKind::LeftParen) { - let kind = collection(p, false); - validate_pattern_at(p, m, true); - - if kind != SyntaxKind::Parenthesized { - p.wrap(m, SyntaxKind::Destructuring); - } - PatternKind::Other - } else if p.eat_if(SyntaxKind::Underscore) { - PatternKind::Other - } else { - p.expect(SyntaxKind::Ident); - PatternKind::Ident - } -} - /// Parses a let binding: `let x = 1`. fn let_binding(p: &mut Parser) { let m = p.marker(); @@ -1052,17 +828,15 @@ fn let_binding(p: &mut Parser) { let m2 = p.marker(); let mut closure = false; let mut other = false; - match pattern(p) { - PatternKind::Ident => { - if p.directly_at(SyntaxKind::LeftParen) { - let m3 = p.marker(); - collection(p, false); - validate_params_at(p, m3); - p.wrap(m3, SyntaxKind::Params); - closure = true; - } + + if p.eat_if(SyntaxKind::Ident) { + if p.directly_at(SyntaxKind::LeftParen) { + params(p); + closure = true; } - PatternKind::Other => other = true, + } else { + pattern(p, false, &mut HashSet::new(), None); + other = true; } let f = if closure || other { Parser::expect } else { Parser::eat_if }; @@ -1144,17 +918,21 @@ fn while_loop(p: &mut Parser) { fn for_loop(p: &mut Parser) { let m = p.marker(); p.assert(SyntaxKind::For); - pattern(p); - if p.at(SyntaxKind::Comma) { - p.expected("keyword `in`"); - p.hint("did you mean to use a destructuring pattern?"); - if !p.eat_if(SyntaxKind::Ident) { - p.eat_if(SyntaxKind::Underscore); + + let mut seen = HashSet::new(); + pattern(p, false, &mut seen, None); + + let m2 = p.marker(); + if p.eat_if(SyntaxKind::Comma) { + let node = &mut p[m2]; + node.unexpected(); + node.hint("destructuring patterns must be wrapped in parentheses"); + if p.at_set(set::PATTERN) { + pattern(p, false, &mut seen, None); } - p.eat_if(SyntaxKind::In); - } else { - p.expect(SyntaxKind::In); } + + p.expect(SyntaxKind::In); code_expr(p); block(p); p.wrap(m, SyntaxKind::ForLoop); @@ -1192,10 +970,9 @@ fn import_items(p: &mut Parser) { p.wrap(item_marker, SyntaxKind::RenamedImportItem); } - if p.current().is_terminator() { - break; + if !p.current().is_terminator() { + p.expect(SyntaxKind::Comma); } - p.expect(SyntaxKind::Comma); } p.wrap(m, SyntaxKind::ImportItems); } @@ -1232,247 +1009,426 @@ fn return_stmt(p: &mut Parser) { p.wrap(m, SyntaxKind::FuncReturn); } -fn validate_parenthesized_at(p: &mut Parser, m: Marker) { - for child in p.post_process(m) { - let kind = child.kind(); - match kind { - SyntaxKind::Array => validate_array(child.children_mut().iter_mut()), - SyntaxKind::Dict => validate_dict(child.children_mut().iter_mut()), - SyntaxKind::Underscore => { - child.convert_to_error(eco_format!( - "expected expression, found {}", - kind.name() - )); - } - _ => {} +/// An expression that starts with a parenthesis. +fn expr_with_paren(p: &mut Parser) { + // If we've seen this position before and have a memoized result, just use + // it. See below for more explanation about this memoization. + let start = p.current_start(); + if let Some((range, end_point)) = p.memo.get(&start) { + p.nodes.extend(p.memo_arena[range.clone()].iter().cloned()); + p.restore(end_point.clone()); + return; + } + + let m = p.marker(); + let checkpoint = p.checkpoint(); + + // When we reach a '(', we can't be sure what it is. First, we attempt to + // parse as a simple parenthesized expression, array, or dictionary as + // these are the most likely things. We can handle all of those in a single + // pass. + let kind = parenthesized_or_array_or_dict(p); + + // If, however, '=>' or '=' follows, we must backtrack and reparse as either + // a parameter list or a destructuring. To be able to do that, we created a + // parser checkpoint before our speculative parse, which we can restore. + // + // However, naive backtracking has a fatal flaw: It can lead to exponential + // parsing time if we are constantly getting things wrong in a nested + // scenario. The particular failure case for parameter parsing is the + // following: `(x: (x: (x) => y) => y) => y` + // + // Such a structure will reparse over and over again recursively, leading to + // a running time of O(2^n) for nesting depth n. To prevent this, we perform + // a simple trick: When we have done the mistake of picking the wrong path + // once and have subsequently parsed correctly, we save the result of that + // correct parsing in the `p.memo` map. When we reach the same position + // again, we can then just restore this result. In this way, no + // parenthesized expression is parsed more than twice, leading to a worst + // case running time of O(2n). + if p.at(SyntaxKind::Arrow) { + p.restore(checkpoint); + params(p); + p.assert(SyntaxKind::Arrow); + code_expr(p); + p.wrap(m, SyntaxKind::Closure); + } else if p.at(SyntaxKind::Eq) && kind != SyntaxKind::Parenthesized { + p.restore(checkpoint); + destructuring_or_parenthesized(p, true, &mut HashSet::new()); + p.assert(SyntaxKind::Eq); + code_expr(p); + p.wrap(m, SyntaxKind::DestructAssignment); + } else { + return; + } + + // Memoize result if we backtracked. + let offset = p.memo_arena.len(); + p.memo_arena.extend(p.nodes[m.0..].iter().cloned()); + p.memo.insert(start, (offset..p.memo_arena.len(), p.checkpoint())); +} + +/// Parses either +/// - a parenthesized expression: `(1 + 2)`, or +/// - an array: `(1, "hi", 12cm)`, or +/// - a dictionary: `(thickness: 3pt, pattern: dashed)`. +fn parenthesized_or_array_or_dict(p: &mut Parser) -> SyntaxKind { + let m = p.marker(); + p.enter_newline_mode(NewlineMode::Continue); + p.assert(SyntaxKind::LeftParen); + + let mut state = GroupState { + count: 0, + maybe_just_parens: true, + kind: None, + seen: HashSet::new(), + }; + + if p.eat_if(SyntaxKind::Colon) { + state.kind = Some(SyntaxKind::Dict); + state.maybe_just_parens = false; + } + + while !p.current().is_terminator() { + if !p.at_set(set::ARRAY_OR_DICT_ITEM) { + p.unexpected(); + continue; + } + + array_or_dict_item(p, &mut state); + state.count += 1; + + if !p.current().is_terminator() && p.expect(SyntaxKind::Comma) { + state.maybe_just_parens = false; } } + + p.expect_closing_delimiter(m, SyntaxKind::RightParen); + p.exit_newline_mode(); + + let kind = if state.maybe_just_parens && state.count == 1 { + SyntaxKind::Parenthesized + } else { + state.kind.unwrap_or(SyntaxKind::Array) + }; + + p.wrap(m, kind); + kind } -fn validate_array_at(p: &mut Parser, m: Marker) { - validate_array(p.post_process(m)) +/// State for array/dictionary parsing. +struct GroupState { + count: usize, + maybe_just_parens: bool, + kind: Option<SyntaxKind>, + seen: HashSet<EcoString>, } -fn validate_array<'a>(children: impl Iterator<Item = &'a mut SyntaxNode>) { - for child in children { - let kind = child.kind(); - match kind { - SyntaxKind::Array => validate_array(child.children_mut().iter_mut()), - SyntaxKind::Dict => validate_dict(child.children_mut().iter_mut()), - SyntaxKind::Named | SyntaxKind::Keyed | SyntaxKind::Underscore => { - child.convert_to_error(eco_format!( - "expected expression, found {}", - kind.name() - )); +/// Parses a single item in an array or dictionary. +fn array_or_dict_item(p: &mut Parser, state: &mut GroupState) { + let m = p.marker(); + + if p.eat_if(SyntaxKind::Dots) { + // Parses a spreaded item: `..item`. + code_expr(p); + p.wrap(m, SyntaxKind::Spread); + state.maybe_just_parens = false; + return; + } + + code_expr(p); + + if p.eat_if(SyntaxKind::Colon) { + // Parses a named/keyed pair: `name: item` or `"key": item`. + code_expr(p); + + let node = &mut p[m]; + let pair_kind = match node.kind() { + SyntaxKind::Ident => SyntaxKind::Named, + _ => SyntaxKind::Keyed, + }; + + if let Some(key) = match node.cast::<ast::Expr>() { + Some(ast::Expr::Ident(ident)) => Some(ident.get().clone()), + Some(ast::Expr::Str(s)) => Some(s.get()), + _ => None, + } { + if !state.seen.insert(key.clone()) { + node.convert_to_error(eco_format!("duplicate key: {key}")); } - _ => {} + } + + p.wrap(m, pair_kind); + state.maybe_just_parens = false; + + if state.kind == Some(SyntaxKind::Array) { + p[m].expected("expression"); + } else { + state.kind = Some(SyntaxKind::Dict); + } + } else { + // Parses a positional item. + if state.kind == Some(SyntaxKind::Dict) { + p[m].expected("named or keyed pair"); + } else { + state.kind = Some(SyntaxKind::Array) } } } -fn validate_dict_at(p: &mut Parser, m: Marker) { - validate_dict(p.post_process(m)) -} +/// Parses a function call's argument list: `(12pt, y)`. +fn args(p: &mut Parser) { + if !p.at(SyntaxKind::LeftParen) && !p.at(SyntaxKind::LeftBracket) { + p.expected("argument list"); + } -fn validate_dict<'a>(children: impl Iterator<Item = &'a mut SyntaxNode>) { - let mut used = HashSet::new(); - for child in children { - match child.kind() { - SyntaxKind::Named | SyntaxKind::Keyed => { - let Some(first) = child.children_mut().first_mut() else { continue }; - let key = if let Some(str) = first.cast::<ast::Str>() { - str.get() - } else if let Some(ident) = first.cast::<ast::Ident>() { - ident.get().clone() - } else { - continue; - }; + let m = p.marker(); + if p.at(SyntaxKind::LeftParen) { + let m2 = p.marker(); + p.enter_newline_mode(NewlineMode::Continue); + p.assert(SyntaxKind::LeftParen); - if !used.insert(key.clone()) { - first.convert_to_error(eco_format!("duplicate key: {key}")); - child.make_erroneous(); - } + let mut seen = HashSet::new(); + while !p.current().is_terminator() { + if !p.at_set(set::ARG) { + p.unexpected(); + continue; } - SyntaxKind::Spread => {} - SyntaxKind::LeftParen - | SyntaxKind::RightParen - | SyntaxKind::Comma - | SyntaxKind::Colon - | SyntaxKind::Space => {} - kind => { - child.convert_to_error(eco_format!( - "expected named or keyed pair, found {}", - kind.name() - )); + + arg(p, &mut seen); + + if !p.current().is_terminator() { + p.expect(SyntaxKind::Comma); } } + + p.expect_closing_delimiter(m2, SyntaxKind::RightParen); + p.exit_newline_mode(); } + + while p.directly_at(SyntaxKind::LeftBracket) { + content_block(p); + } + + p.wrap(m, SyntaxKind::Args); } -fn validate_params_at(p: &mut Parser, m: Marker) { - let mut used_spread = false; - let mut used = HashSet::new(); - for child in p.post_process(m) { - match child.kind() { - SyntaxKind::Ident => { - if !used.insert(child.text().clone()) { - child.convert_to_error(eco_format!( - "duplicate parameter: {}", - child.text() - )); - } - } - SyntaxKind::Named => { - let Some(within) = child.children_mut().first_mut() else { return }; - if !used.insert(within.text().clone()) { - within.convert_to_error(eco_format!( - "duplicate parameter: {}", - within.text() - )); - child.make_erroneous(); - } - } - SyntaxKind::Spread => { - let Some(within) = child.children_mut().last_mut() else { continue }; - if used_spread { - child.convert_to_error("only one argument sink is allowed"); - continue; - } - used_spread = true; - if within.kind() == SyntaxKind::Dots { - continue; - } else if within.kind() != SyntaxKind::Ident { - within.convert_to_error(eco_format!( - "expected identifier, found {}", - within.kind().name(), - )); - child.make_erroneous(); - continue; - } - if !used.insert(within.text().clone()) { - within.convert_to_error(eco_format!( - "duplicate parameter: {}", - within.text() - )); - child.make_erroneous(); - } - } - SyntaxKind::Array | SyntaxKind::Dict | SyntaxKind::Destructuring => { - validate_pattern(child.children_mut().iter_mut(), &mut used, false); - child.convert_to_kind(SyntaxKind::Destructuring); - } - SyntaxKind::LeftParen - | SyntaxKind::RightParen - | SyntaxKind::Comma - | SyntaxKind::Underscore => {} - kind => { - child.convert_to_error(eco_format!( - "expected identifier, named pair or argument sink, found {}", - kind.name() - )); - } +/// Parses a single argument in an argument list. +fn arg<'s>(p: &mut Parser<'s>, seen: &mut HashSet<&'s str>) { + let m = p.marker(); + if p.eat_if(SyntaxKind::Dots) { + // Parses a spreaded argument: `..args`. + code_expr(p); + p.wrap(m, SyntaxKind::Spread); + } else if p.at(SyntaxKind::Ident) && p.peek() == SyntaxKind::Colon { + // Parses a named argument: `thickness: 12pt`. + let text = p.current_text(); + p.assert(SyntaxKind::Ident); + if !seen.insert(text) { + p[m].convert_to_error(eco_format!("duplicate argument: {text}")); + } + p.assert(SyntaxKind::Colon); + code_expr(p); + p.wrap(m, SyntaxKind::Named); + } else { + // Parses a normal positional argument. + let at_expr = p.at_set(set::CODE_EXPR); + code_expr(p); + + // Recover from bad named pair. + if at_expr && p.eat_if(SyntaxKind::Colon) { + p[m].expected("identifier"); + code_expr(p); } } } -fn validate_args_at(p: &mut Parser, m: Marker) { - let mut used = HashSet::new(); - for child in p.post_process(m) { - if child.kind() == SyntaxKind::Named { - let Some(within) = child.children_mut().first_mut() else { return }; - if !used.insert(within.text().clone()) { - within.convert_to_error(eco_format!( - "duplicate argument: {}", - within.text() - )); - child.make_erroneous(); - } - } else if child.kind() == SyntaxKind::Underscore { - child.convert_to_error("unexpected underscore"); +/// Parses a closure's parameters: `(x, y)`. +fn params(p: &mut Parser) { + let m = p.marker(); + p.enter_newline_mode(NewlineMode::Continue); + p.assert(SyntaxKind::LeftParen); + + let mut seen = HashSet::new(); + let mut sink = false; + + while !p.current().is_terminator() { + if !p.at_set(set::PARAM) { + p.unexpected(); + continue; + } + + param(p, &mut seen, &mut sink); + + if !p.current().is_terminator() { + p.expect(SyntaxKind::Comma); } } + + p.expect_closing_delimiter(m, SyntaxKind::RightParen); + p.exit_newline_mode(); + p.wrap(m, SyntaxKind::Params); } -fn validate_pattern_at(p: &mut Parser, m: Marker, forbid_expressions: bool) { - let mut used = HashSet::new(); - validate_pattern(p.post_process(m), &mut used, forbid_expressions); +/// Parses a single parameter in a parameter list. +fn param<'s>(p: &mut Parser<'s>, seen: &mut HashSet<&'s str>, sink: &mut bool) { + let m = p.marker(); + if p.eat_if(SyntaxKind::Dots) { + // Parses argument sink: `..sink`. + if p.at_set(set::PATTERN_LEAF) { + pattern_leaf(p, false, seen, Some("parameter")); + } + p.wrap(m, SyntaxKind::Spread); + if mem::replace(sink, true) { + p[m].convert_to_error("only one argument sink is allowed"); + } + } else if p.at(SyntaxKind::Ident) && p.peek() == SyntaxKind::Colon { + // Parses named parameter: `thickness: 3pt`. + // We still use `pattern` even though we know it's just an identifier + // because it gives us duplicate parameter detection for free. + pattern(p, false, seen, Some("parameter")); + p.assert(SyntaxKind::Colon); + code_expr(p); + p.wrap(m, SyntaxKind::Named); + } else { + // Parses a normal position parameter. + let at_pat = p.at_set(set::PATTERN); + pattern(p, false, seen, Some("parameter")); + + // Recover from bad named pair. + if at_pat && p.eat_if(SyntaxKind::Colon) { + p[m].expected("identifier"); + code_expr(p); + } + } } -fn validate_pattern<'a>( - children: impl Iterator<Item = &'a mut SyntaxNode>, - used: &mut HashSet<EcoString>, - forbid_expressions: bool, +/// Parses a binding or reassignment pattern. +fn pattern<'s>( + p: &mut Parser<'s>, + reassignment: bool, + seen: &mut HashSet<&'s str>, + dupe: Option<&'s str>, ) { - let mut used_spread = false; - for child in children { - match child.kind() { - SyntaxKind::Ident => { - if !used.insert(child.text().clone()) { - child.convert_to_error( - "at most one binding per identifier is allowed", - ); - } - } - SyntaxKind::Spread => { - let Some(within) = child.children_mut().last_mut() else { continue }; - if used_spread { - child.convert_to_error("at most one destructuring sink is allowed"); - continue; - } - used_spread = true; - - if within.kind() == SyntaxKind::Dots { - continue; - } else if forbid_expressions && within.kind() != SyntaxKind::Ident { - within.convert_to_error(eco_format!( - "expected identifier, found {}", - within.kind().name(), - )); - child.make_erroneous(); - continue; - } + match p.current() { + SyntaxKind::Underscore => p.eat(), + SyntaxKind::LeftParen => destructuring_or_parenthesized(p, reassignment, seen), + _ => pattern_leaf(p, reassignment, seen, dupe), + } +} - if !used.insert(within.text().clone()) { - within.convert_to_error( - "at most one binding per identifier is allowed", - ); - child.make_erroneous(); - } - } - SyntaxKind::Named => { - let Some(within) = child.children_mut().first_mut() else { return }; - if !used.insert(within.text().clone()) { - within.convert_to_error( - "at most one binding per identifier is allowed", - ); - child.make_erroneous(); - } +/// Parses a destructuring pattern or just a parenthesized pattern. +fn destructuring_or_parenthesized<'s>( + p: &mut Parser<'s>, + reassignment: bool, + seen: &mut HashSet<&'s str>, +) { + let mut sink = false; + let mut count = 0; + let mut maybe_just_parens = true; - if forbid_expressions { - let Some(within) = child.children_mut().last_mut() else { return }; - if within.kind() != SyntaxKind::Ident - && within.kind() != SyntaxKind::Underscore - { - within.convert_to_error(eco_format!( - "expected identifier, found {}", - within.kind().name(), - )); - child.make_erroneous(); - } - } - } - SyntaxKind::LeftParen - | SyntaxKind::RightParen - | SyntaxKind::Comma - | SyntaxKind::Underscore => {} - kind => { - if forbid_expressions { - child.convert_to_error(eco_format!( - "expected identifier or destructuring sink, found {}", - kind.name() - )); - } + let m = p.marker(); + p.enter_newline_mode(NewlineMode::Continue); + p.assert(SyntaxKind::LeftParen); + + while !p.current().is_terminator() { + if !p.at_set(set::DESTRUCTURING_ITEM) { + p.unexpected(); + continue; + } + + destructuring_item(p, reassignment, seen, &mut maybe_just_parens, &mut sink); + count += 1; + + if !p.current().is_terminator() && p.expect(SyntaxKind::Comma) { + maybe_just_parens = false; + } + } + + p.expect_closing_delimiter(m, SyntaxKind::RightParen); + p.exit_newline_mode(); + + if maybe_just_parens && count == 1 && !sink { + p.wrap(m, SyntaxKind::Parenthesized); + } else { + p.wrap(m, SyntaxKind::Destructuring); + } +} + +/// Parses an item in a destructuring pattern. +fn destructuring_item<'s>( + p: &mut Parser<'s>, + reassignment: bool, + seen: &mut HashSet<&'s str>, + maybe_just_parens: &mut bool, + sink: &mut bool, +) { + let m = p.marker(); + if p.eat_if(SyntaxKind::Dots) { + // Parse destructuring sink: `..rest`. + if p.at_set(set::PATTERN_LEAF) { + pattern_leaf(p, reassignment, seen, None); + } + p.wrap(m, SyntaxKind::Spread); + if mem::replace(sink, true) { + p[m].convert_to_error("only one destructuring sink is allowed"); + } + } else if p.at(SyntaxKind::Ident) && p.peek() == SyntaxKind::Colon { + // Parse named destructuring item. + p.assert(SyntaxKind::Ident); + p.assert(SyntaxKind::Colon); + pattern(p, reassignment, seen, None); + p.wrap(m, SyntaxKind::Named); + *maybe_just_parens = false; + } else { + // Parse positional destructuring item. + let at_pat = p.at_set(set::PATTERN); + pattern(p, reassignment, seen, None); + + // Recover from bad named destructuring. + if at_pat && p.eat_if(SyntaxKind::Colon) { + p[m].expected("identifier"); + pattern(p, reassignment, seen, None); + } + } +} + +/// Parses a leaf in a pattern - either an identifier or an expression +/// depending on whether it's a binding or reassignment pattern. +fn pattern_leaf<'s>( + p: &mut Parser<'s>, + reassignment: bool, + seen: &mut HashSet<&'s str>, + dupe: Option<&'s str>, +) { + if !p.at_set(set::PATTERN_LEAF) { + if p.current().is_keyword() { + p.eat_and_get().expected("pattern"); + } else { + p.expected("pattern"); + } + return; + } + + let m = p.marker(); + let text = p.current_text(); + + // We parse an atomic expression even though we only want an identifier for + // better error recovery. We can mark the whole expression as unexpected + // instead of going through its pieces one by one. + code_expr_prec(p, true, 0); + + if !reassignment { + let node = &mut p[m]; + if node.kind() == SyntaxKind::Ident { + if !seen.insert(text) { + node.convert_to_error(eco_format!( + "duplicate {}: {text}", + dupe.unwrap_or("binding"), + )); } + } else { + node.expected("pattern"); } } } @@ -1484,13 +1440,16 @@ struct Parser<'s> { prev_end: usize, current_start: usize, current: SyntaxKind, - modes: Vec<LexMode>, + balanced: bool, nodes: Vec<SyntaxNode>, + modes: Vec<LexMode>, newline_modes: Vec<NewlineMode>, - balanced: bool, + memo: HashMap<usize, (Range<usize>, Checkpoint<'s>)>, + memo_arena: Vec<SyntaxNode>, } /// How to proceed with parsing when seeing a newline. +#[derive(Clone)] enum NewlineMode { /// Stop always. Stop, @@ -1503,6 +1462,15 @@ enum NewlineMode { #[derive(Debug, Copy, Clone, Eq, PartialEq)] struct Marker(usize); +#[derive(Clone)] +struct Checkpoint<'s> { + lexer: Lexer<'s>, + prev_end: usize, + current_start: usize, + current: SyntaxKind, + nodes: usize, +} + impl<'s> Parser<'s> { fn new(text: &'s str, offset: usize, mode: LexMode) -> Self { let mut lexer = Lexer::new(text, mode); @@ -1514,10 +1482,12 @@ impl<'s> Parser<'s> { prev_end: offset, current_start: offset, current, - modes: vec![], + balanced: true, nodes: vec![], + modes: vec![], newline_modes: vec![], - balanced: true, + memo: HashMap::new(), + memo_arena: vec![], } } @@ -1553,10 +1523,8 @@ impl<'s> Parser<'s> { set.contains(self.current) } - #[track_caller] - fn assert(&mut self, kind: SyntaxKind) { - assert_eq!(self.current, kind); - self.eat(); + fn peek(&self) -> SyntaxKind { + self.lexer.clone().next() } fn eof(&self) -> bool { @@ -1567,6 +1535,20 @@ impl<'s> Parser<'s> { self.current == kind && self.prev_end == self.current_start } + fn eat(&mut self) { + self.save(); + self.lex(); + self.skip(); + } + + fn eat_and_get(&mut self) -> &mut SyntaxNode { + let offset = self.nodes.len(); + self.save(); + self.lex(); + self.skip(); + &mut self.nodes[offset] + } + /// Eats if at `kind`. /// /// Note: In math and code mode, this will ignore trivia in front of the @@ -1588,6 +1570,12 @@ impl<'s> Parser<'s> { at } + #[track_caller] + fn assert(&mut self, kind: SyntaxKind) { + assert_eq!(self.current, kind); + self.eat(); + } + fn convert(&mut self, kind: SyntaxKind) { self.current = kind; self.eat(); @@ -1605,12 +1593,21 @@ impl<'s> Parser<'s> { Marker(self.nodes.len()) } - fn node(&self, m: Marker) -> Option<&SyntaxNode> { - self.nodes.get(m.0) + /// Get a marker after the last non-trivia node. + fn before_trivia(&self) -> Marker { + let mut i = self.nodes.len(); + if self.lexer.mode() != LexMode::Markup && self.prev_end != self.current_start { + while i > 0 && self.nodes[i - 1].kind().is_trivia() { + i -= 1; + } + } + Marker(i) } - fn node_mut(&mut self, m: Marker) -> Option<&mut SyntaxNode> { - self.nodes.get_mut(m.0) + /// Whether the last non-trivia node is an error. + fn after_error(&mut self) -> bool { + let m = self.before_trivia(); + m.0 > 0 && self.nodes[m.0 - 1].kind().is_error() } fn post_process(&mut self, m: Marker) -> impl Iterator<Item = &mut SyntaxNode> { @@ -1635,10 +1632,6 @@ impl<'s> Parser<'s> { self.nodes.insert(from, SyntaxNode::inner(kind, children)); } - fn progress(&self, offset: usize) -> bool { - offset < self.prev_end - } - fn enter(&mut self, mode: LexMode) { self.modes.push(self.lexer.mode()); self.lexer.set_mode(mode); @@ -1667,10 +1660,22 @@ impl<'s> Parser<'s> { self.skip(); } - fn eat(&mut self) { - self.save(); - self.lex(); - self.skip(); + fn checkpoint(&self) -> Checkpoint<'s> { + Checkpoint { + lexer: self.lexer.clone(), + prev_end: self.prev_end, + current_start: self.current_start, + current: self.current, + nodes: self.nodes.len(), + } + } + + fn restore(&mut self, checkpoint: Checkpoint<'s>) { + self.lexer = checkpoint.lexer; + self.prev_end = checkpoint.prev_end; + self.current_start = checkpoint.current_start; + self.current = checkpoint.current; + self.nodes.truncate(checkpoint.nodes); } fn skip(&mut self) { @@ -1734,14 +1739,8 @@ impl<'s> Parser<'s> { if at { self.eat(); } else if kind == SyntaxKind::Ident && self.current.is_keyword() { - let found_text = self.current_text(); - let found = self.current.name(); - self.expected_found(kind.name(), found); - self.hint(eco_format!( - "{} is not allowed as an identifier; try `{}_` instead", - found, - found_text - )); + self.trim_errors(); + self.eat_and_get().expected(kind.name()); } else { self.balanced &= !kind.is_grouping(); self.expected(kind.name()); @@ -1749,6 +1748,14 @@ impl<'s> Parser<'s> { at } + /// Consume the given closing delimiter or produce an error for the matching + /// opening delimiter at `open`. + fn expect_closing_delimiter(&mut self, open: Marker, kind: SyntaxKind) { + if !self.eat_if(kind) { + self.nodes[open.0].convert_to_error("unclosed delimiter"); + } + } + /// Produce an error that the given `thing` was expected. fn expected(&mut self, thing: &str) { if !self.after_error() { @@ -1756,70 +1763,19 @@ impl<'s> Parser<'s> { } } - /// Produce an error that the given `thing` was expected but another - /// thing was `found` and consume the next token. - fn expected_found(&mut self, thing: &str, found: &str) { - self.trim_errors(); - self.convert_to_error(eco_format!("expected {thing}, found {found}")); - } - /// Produce an error that the given `thing` was expected at the position /// of the marker `m`. fn expected_at(&mut self, m: Marker, thing: &str) { - let message = eco_format!("expected {}", thing); - let error = SyntaxNode::error(message, ""); + let error = SyntaxNode::error(eco_format!("expected {thing}"), ""); self.nodes.insert(m.0, error); } - /// Produce an error for the unclosed delimiter `kind` at the position - /// `open`. - fn expect_closing_delimiter(&mut self, open: Marker, kind: SyntaxKind) { - if !self.eat_if(kind) { - self.nodes[open.0].convert_to_error("unclosed delimiter"); - } - } - /// Consume the next token (if any) and produce an error stating that it was /// unexpected. fn unexpected(&mut self) { self.trim_errors(); - self.convert_to_error(eco_format!("unexpected {}", self.current.name())); - } - - /// Consume the next token and turn it into an error. - fn convert_to_error(&mut self, message: EcoString) { - let kind = self.current; - let offset = self.nodes.len(); - self.eat(); - self.balanced &= !kind.is_grouping(); - if !kind.is_error() { - self.nodes[offset].convert_to_error(message); - } - } - - /// Adds a hint to the last node, if the last node is an error. - fn hint(&mut self, hint: impl Into<EcoString>) { - let m = self.before_trivia(); - if m.0 > 0 { - self.nodes[m.0 - 1].hint(hint); - } - } - - /// Get a marker after the last non-trivia node. - fn before_trivia(&self) -> Marker { - let mut i = self.nodes.len(); - if self.lexer.mode() != LexMode::Markup && self.prev_end != self.current_start { - while i > 0 && self.nodes[i - 1].kind().is_trivia() { - i -= 1; - } - } - Marker(i) - } - - /// Whether the last non-trivia node is an error. - fn after_error(&mut self) -> bool { - let m = self.before_trivia(); - m.0 > 0 && self.nodes[m.0 - 1].kind().is_error() + self.balanced &= !self.current.is_grouping(); + self.eat_and_get().unexpected(); } /// Remove trailing errors with zero length. @@ -1835,3 +1791,17 @@ impl<'s> Parser<'s> { self.nodes.drain(start..end); } } + +impl Index<Marker> for Parser<'_> { + type Output = SyntaxNode; + + fn index(&self, m: Marker) -> &Self::Output { + &self.nodes[m.0] + } +} + +impl IndexMut<Marker> for Parser<'_> { + fn index_mut(&mut self, m: Marker) -> &mut Self::Output { + &mut self.nodes[m.0] + } +} diff --git a/crates/typst-syntax/src/set.rs b/crates/typst-syntax/src/set.rs index 26b4ecd5..88a9b18b 100644 --- a/crates/typst-syntax/src/set.rs +++ b/crates/typst-syntax/src/set.rs @@ -10,17 +10,16 @@ pub struct SyntaxSet(u128); impl SyntaxSet { /// Create a new set from a slice of kinds. - pub const fn new(slice: &[SyntaxKind]) -> Self { - let mut bits = 0; - let mut i = 0; - while i < slice.len() { - bits |= bit(slice[i]); - i += 1; - } - Self(bits) + pub const fn new() -> Self { + Self(0) } /// Insert a syntax kind into the set. + pub const fn add(self, kind: SyntaxKind) -> Self { + Self(self.0 | bit(kind)) + } + + /// Combine two syntax sets. pub const fn union(self, other: Self) -> Self { Self(self.0 | other.0) } @@ -36,56 +35,53 @@ const fn bit(kind: SyntaxKind) -> u128 { } /// Syntax kinds that can start a statement. -pub const STMT: SyntaxSet = SyntaxSet::new(&[ - SyntaxKind::Let, - SyntaxKind::Set, - SyntaxKind::Show, - SyntaxKind::Import, - SyntaxKind::Include, - SyntaxKind::Return, -]); +pub const STMT: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Let) + .add(SyntaxKind::Set) + .add(SyntaxKind::Show) + .add(SyntaxKind::Import) + .add(SyntaxKind::Include) + .add(SyntaxKind::Return); /// Syntax kinds that can start a markup expression. -pub const MARKUP_EXPR: SyntaxSet = SyntaxSet::new(&[ - SyntaxKind::Space, - SyntaxKind::Parbreak, - SyntaxKind::LineComment, - SyntaxKind::BlockComment, - SyntaxKind::Text, - SyntaxKind::Linebreak, - SyntaxKind::Escape, - SyntaxKind::Shorthand, - SyntaxKind::SmartQuote, - SyntaxKind::Raw, - SyntaxKind::Link, - SyntaxKind::Label, - SyntaxKind::Hash, - SyntaxKind::Star, - SyntaxKind::Underscore, - SyntaxKind::HeadingMarker, - SyntaxKind::ListMarker, - SyntaxKind::EnumMarker, - SyntaxKind::TermMarker, - SyntaxKind::RefMarker, - SyntaxKind::Dollar, - SyntaxKind::LeftBracket, - SyntaxKind::RightBracket, - SyntaxKind::Colon, -]); +pub const MARKUP_EXPR: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Space) + .add(SyntaxKind::Parbreak) + .add(SyntaxKind::LineComment) + .add(SyntaxKind::BlockComment) + .add(SyntaxKind::Text) + .add(SyntaxKind::Linebreak) + .add(SyntaxKind::Escape) + .add(SyntaxKind::Shorthand) + .add(SyntaxKind::SmartQuote) + .add(SyntaxKind::Raw) + .add(SyntaxKind::Link) + .add(SyntaxKind::Label) + .add(SyntaxKind::Hash) + .add(SyntaxKind::Star) + .add(SyntaxKind::Underscore) + .add(SyntaxKind::HeadingMarker) + .add(SyntaxKind::ListMarker) + .add(SyntaxKind::EnumMarker) + .add(SyntaxKind::TermMarker) + .add(SyntaxKind::RefMarker) + .add(SyntaxKind::Dollar) + .add(SyntaxKind::LeftBracket) + .add(SyntaxKind::RightBracket) + .add(SyntaxKind::Colon); /// Syntax kinds that can start a math expression. -pub const MATH_EXPR: SyntaxSet = SyntaxSet::new(&[ - SyntaxKind::Hash, - SyntaxKind::MathIdent, - SyntaxKind::Text, - SyntaxKind::Shorthand, - SyntaxKind::Linebreak, - SyntaxKind::MathAlignPoint, - SyntaxKind::Escape, - SyntaxKind::Str, - SyntaxKind::Root, - SyntaxKind::Prime, -]); +pub const MATH_EXPR: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Hash) + .add(SyntaxKind::MathIdent) + .add(SyntaxKind::Text) + .add(SyntaxKind::Shorthand) + .add(SyntaxKind::Linebreak) + .add(SyntaxKind::MathAlignPoint) + .add(SyntaxKind::Escape) + .add(SyntaxKind::Str) + .add(SyntaxKind::Root) + .add(SyntaxKind::Prime); /// Syntax kinds that can start a code expression. pub const CODE_EXPR: SyntaxSet = CODE_PRIMARY.union(UNARY_OP); @@ -94,63 +90,81 @@ pub const CODE_EXPR: SyntaxSet = CODE_PRIMARY.union(UNARY_OP); pub const ATOMIC_CODE_EXPR: SyntaxSet = ATOMIC_CODE_PRIMARY; /// Syntax kinds that can start a code primary. -pub const CODE_PRIMARY: SyntaxSet = - ATOMIC_CODE_PRIMARY.union(SyntaxSet::new(&[SyntaxKind::Underscore])); +pub const CODE_PRIMARY: SyntaxSet = ATOMIC_CODE_PRIMARY.add(SyntaxKind::Underscore); /// Syntax kinds that can start an atomic code primary. -pub const ATOMIC_CODE_PRIMARY: SyntaxSet = SyntaxSet::new(&[ - SyntaxKind::Ident, - SyntaxKind::LeftBrace, - SyntaxKind::LeftBracket, - SyntaxKind::LeftParen, - SyntaxKind::Dollar, - SyntaxKind::Let, - SyntaxKind::Set, - SyntaxKind::Show, - SyntaxKind::If, - SyntaxKind::While, - SyntaxKind::For, - SyntaxKind::Import, - SyntaxKind::Include, - SyntaxKind::Break, - SyntaxKind::Continue, - SyntaxKind::Return, - SyntaxKind::None, - SyntaxKind::Auto, - SyntaxKind::Int, - SyntaxKind::Float, - SyntaxKind::Bool, - SyntaxKind::Numeric, - SyntaxKind::Str, - SyntaxKind::Label, - SyntaxKind::Raw, -]); +pub const ATOMIC_CODE_PRIMARY: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Ident) + .add(SyntaxKind::LeftBrace) + .add(SyntaxKind::LeftBracket) + .add(SyntaxKind::LeftParen) + .add(SyntaxKind::Dollar) + .add(SyntaxKind::Let) + .add(SyntaxKind::Set) + .add(SyntaxKind::Show) + .add(SyntaxKind::If) + .add(SyntaxKind::While) + .add(SyntaxKind::For) + .add(SyntaxKind::Import) + .add(SyntaxKind::Include) + .add(SyntaxKind::Break) + .add(SyntaxKind::Continue) + .add(SyntaxKind::Return) + .add(SyntaxKind::None) + .add(SyntaxKind::Auto) + .add(SyntaxKind::Int) + .add(SyntaxKind::Float) + .add(SyntaxKind::Bool) + .add(SyntaxKind::Numeric) + .add(SyntaxKind::Str) + .add(SyntaxKind::Label) + .add(SyntaxKind::Raw); /// Syntax kinds that are unary operators. -pub const UNARY_OP: SyntaxSet = - SyntaxSet::new(&[SyntaxKind::Plus, SyntaxKind::Minus, SyntaxKind::Not]); +pub const UNARY_OP: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Plus) + .add(SyntaxKind::Minus) + .add(SyntaxKind::Not); /// Syntax kinds that are binary operators. -pub const BINARY_OP: SyntaxSet = SyntaxSet::new(&[ - SyntaxKind::Plus, - SyntaxKind::Minus, - SyntaxKind::Star, - SyntaxKind::Slash, - SyntaxKind::And, - SyntaxKind::Or, - SyntaxKind::EqEq, - SyntaxKind::ExclEq, - SyntaxKind::Lt, - SyntaxKind::LtEq, - SyntaxKind::Gt, - SyntaxKind::GtEq, - SyntaxKind::Eq, - SyntaxKind::In, - SyntaxKind::PlusEq, - SyntaxKind::HyphEq, - SyntaxKind::StarEq, - SyntaxKind::SlashEq, -]); +pub const BINARY_OP: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Plus) + .add(SyntaxKind::Minus) + .add(SyntaxKind::Star) + .add(SyntaxKind::Slash) + .add(SyntaxKind::And) + .add(SyntaxKind::Or) + .add(SyntaxKind::EqEq) + .add(SyntaxKind::ExclEq) + .add(SyntaxKind::Lt) + .add(SyntaxKind::LtEq) + .add(SyntaxKind::Gt) + .add(SyntaxKind::GtEq) + .add(SyntaxKind::Eq) + .add(SyntaxKind::In) + .add(SyntaxKind::PlusEq) + .add(SyntaxKind::HyphEq) + .add(SyntaxKind::StarEq) + .add(SyntaxKind::SlashEq); + +/// Syntax kinds that can start an argument in a function call. +pub const ARRAY_OR_DICT_ITEM: SyntaxSet = CODE_EXPR.add(SyntaxKind::Dots); + +/// Syntax kinds that can start an argument in a function call. +pub const ARG: SyntaxSet = CODE_EXPR.add(SyntaxKind::Dots); + +/// Syntax kinds that can start a parameter in a parameter list. +pub const PARAM: SyntaxSet = PATTERN.add(SyntaxKind::Dots); + +/// Syntax kinds that can start a destructuring item. +pub const DESTRUCTURING_ITEM: SyntaxSet = PATTERN.add(SyntaxKind::Dots); + +/// Syntax kinds that can start a pattern. +pub const PATTERN: SyntaxSet = + PATTERN_LEAF.add(SyntaxKind::LeftParen).add(SyntaxKind::Underscore); + +/// Syntax kinds that can start a pattern leaf. +pub const PATTERN_LEAF: SyntaxSet = ATOMIC_CODE_EXPR; #[cfg(test)] mod tests { @@ -163,7 +177,7 @@ mod tests { #[test] fn test_set() { - let set = SyntaxSet::new(&[SyntaxKind::And, SyntaxKind::Or]); + let set = SyntaxSet::new().add(SyntaxKind::And).add(SyntaxKind::Or); assert!(set.contains(SyntaxKind::And)); assert!(set.contains(SyntaxKind::Or)); assert!(!set.contains(SyntaxKind::Not)); |
