diff options
Diffstat (limited to 'crates/typst-syntax/src/parser.rs')
| -rw-r--r-- | crates/typst-syntax/src/parser.rs | 1112 |
1 files changed, 541 insertions, 571 deletions
diff --git a/crates/typst-syntax/src/parser.rs b/crates/typst-syntax/src/parser.rs index a1bd5ad0..32e15cb7 100644 --- a/crates/typst-syntax/src/parser.rs +++ b/crates/typst-syntax/src/parser.rs @@ -1,5 +1,6 @@ -use std::collections::HashSet; -use std::ops::Range; +use std::collections::{HashMap, HashSet}; +use std::mem; +use std::ops::{Index, IndexMut, Range}; use ecow::{eco_format, EcoString}; use unicode_math_class::MathClass; @@ -145,11 +146,10 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) { /// Parses strong content: `*Strong*`. fn strong(p: &mut Parser) { - const END: SyntaxSet = SyntaxSet::new(&[ - SyntaxKind::Star, - SyntaxKind::Parbreak, - SyntaxKind::RightBracket, - ]); + const END: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Star) + .add(SyntaxKind::Parbreak) + .add(SyntaxKind::RightBracket); let m = p.marker(); p.assert(SyntaxKind::Star); @@ -160,11 +160,10 @@ fn strong(p: &mut Parser) { /// Parses emphasized content: `_Emphasized_`. fn emph(p: &mut Parser) { - const END: SyntaxSet = SyntaxSet::new(&[ - SyntaxKind::Underscore, - SyntaxKind::Parbreak, - SyntaxKind::RightBracket, - ]); + const END: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Underscore) + .add(SyntaxKind::Parbreak) + .add(SyntaxKind::RightBracket); let m = p.marker(); p.assert(SyntaxKind::Underscore); @@ -175,15 +174,16 @@ fn emph(p: &mut Parser) { /// Parses a section heading: `= Introduction`. fn heading(p: &mut Parser) { - const END: SyntaxSet = - SyntaxSet::new(&[SyntaxKind::Label, SyntaxKind::RightBracket, SyntaxKind::Space]); + const END: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::Label) + .add(SyntaxKind::RightBracket) + .add(SyntaxKind::Space); let m = p.marker(); p.assert(SyntaxKind::HeadingMarker); whitespace_line(p); markup(p, false, usize::MAX, |p| { - p.at_set(END) - && (!p.at(SyntaxKind::Space) || p.lexer.clone().next() == SyntaxKind::Label) + p.at_set(END) && (!p.at(SyntaxKind::Space) || p.peek() == SyntaxKind::Label) }); p.wrap(m, SyntaxKind::Heading); } @@ -211,7 +211,7 @@ fn enum_item(p: &mut Parser) { /// Parses an item in a term list: `/ Term: Details`. fn term_item(p: &mut Parser) { const TERM_END: SyntaxSet = - SyntaxSet::new(&[SyntaxKind::Colon, SyntaxKind::RightBracket]); + SyntaxSet::new().add(SyntaxKind::Colon).add(SyntaxKind::RightBracket); let m = p.marker(); p.assert(SyntaxKind::TermMarker); @@ -615,11 +615,7 @@ fn code_exprs(p: &mut Parser, mut stop: impl FnMut(&Parser) -> bool) { /// Parses a single code expression. fn code_expr(p: &mut Parser) { - code_expr_prec(p, false, 0, false) -} - -fn code_expr_or_pattern(p: &mut Parser) { - code_expr_prec(p, false, 0, true) + code_expr_prec(p, false, 0) } /// Parses a code expression embedded in markup or math. @@ -631,7 +627,7 @@ fn embedded_code_expr(p: &mut Parser) { let stmt = p.at_set(set::STMT); let at = p.at_set(set::ATOMIC_CODE_EXPR); - code_expr_prec(p, true, 0, false); + code_expr_prec(p, true, 0); // Consume error for things like `#12p` or `#"abc\"`.# if !at && !p.current().is_trivia() && !p.eof() { @@ -650,20 +646,15 @@ fn embedded_code_expr(p: &mut Parser) { } /// Parses a code expression with at least the given precedence. -fn code_expr_prec( - p: &mut Parser, - atomic: bool, - min_prec: usize, - allow_destructuring: bool, -) { +fn code_expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) { let m = p.marker(); if !atomic && p.at_set(set::UNARY_OP) { let op = ast::UnOp::from_kind(p.current()).unwrap(); p.eat(); - code_expr_prec(p, atomic, op.precedence(), false); + code_expr_prec(p, atomic, op.precedence()); p.wrap(m, SyntaxKind::Unary); } else { - code_primary(p, atomic, allow_destructuring); + code_primary(p, atomic); } loop { @@ -675,7 +666,7 @@ fn code_expr_prec( } let at_field_or_method = - p.directly_at(SyntaxKind::Dot) && p.lexer.clone().next() == SyntaxKind::Ident; + p.directly_at(SyntaxKind::Dot) && p.peek() == SyntaxKind::Ident; if atomic && !at_field_or_method { break; @@ -713,7 +704,7 @@ fn code_expr_prec( } p.eat(); - code_expr_prec(p, false, prec, false); + code_expr_prec(p, false, prec); p.wrap(m, SyntaxKind::Binary); continue; } @@ -725,7 +716,7 @@ fn code_expr_prec( /// Parses an primary in a code expression. These are the atoms that unary and /// binary operations, functions calls, and field accesses start with / are /// composed of. -fn code_primary(p: &mut Parser, atomic: bool, allow_destructuring: bool) { +fn code_primary(p: &mut Parser, atomic: bool) { let m = p.marker(); match p.current() { SyntaxKind::Ident => { @@ -744,14 +735,17 @@ fn code_primary(p: &mut Parser, atomic: bool, allow_destructuring: bool) { p.eat(); code_expr(p); p.wrap(m, SyntaxKind::Closure); - } else if let Some(underscore) = p.node_mut(m) { - underscore.convert_to_error("expected expression, found underscore"); + } else if p.eat_if(SyntaxKind::Eq) { + code_expr(p); + p.wrap(m, SyntaxKind::DestructAssignment); + } else { + p[m].expected("expression"); } } SyntaxKind::LeftBrace => code_block(p), SyntaxKind::LeftBracket => content_block(p), - SyntaxKind::LeftParen => with_paren(p, allow_destructuring), + SyntaxKind::LeftParen => expr_with_paren(p), SyntaxKind::Dollar => equation(p), SyntaxKind::Let => let_binding(p), SyntaxKind::Set => set_rule(p), @@ -799,11 +793,10 @@ pub(super) fn reparse_block(text: &str, range: Range<usize>) -> Option<SyntaxNod /// Parses a code block: `{ let x = 1; x + 2 }`. fn code_block(p: &mut Parser) { - const END: SyntaxSet = SyntaxSet::new(&[ - SyntaxKind::RightBrace, - SyntaxKind::RightBracket, - SyntaxKind::RightParen, - ]); + const END: SyntaxSet = SyntaxSet::new() + .add(SyntaxKind::RightBrace) + .add(SyntaxKind::RightBracket) + .add(SyntaxKind::RightParen); let m = p.marker(); p.enter(LexMode::Code); @@ -827,223 +820,6 @@ fn content_block(p: &mut Parser) { p.wrap(m, SyntaxKind::ContentBlock); } -fn with_paren(p: &mut Parser, allow_destructuring: bool) { - let m = p.marker(); - let mut kind = collection(p, true); - if p.at(SyntaxKind::Arrow) { - validate_params_at(p, m); - p.wrap(m, SyntaxKind::Params); - p.assert(SyntaxKind::Arrow); - code_expr(p); - kind = SyntaxKind::Closure; - } else if p.at(SyntaxKind::Eq) && kind != SyntaxKind::Parenthesized { - // TODO: add warning if p.at(SyntaxKind::Eq) && kind == SyntaxKind::Parenthesized - - validate_pattern_at(p, m, false); - p.wrap(m, SyntaxKind::Destructuring); - p.assert(SyntaxKind::Eq); - code_expr(p); - kind = SyntaxKind::DestructAssignment; - } - - match kind { - SyntaxKind::Array if !allow_destructuring => validate_array_at(p, m), - SyntaxKind::Dict if !allow_destructuring => validate_dict_at(p, m), - SyntaxKind::Parenthesized if !allow_destructuring => { - validate_parenthesized_at(p, m) - } - SyntaxKind::Destructuring if !allow_destructuring => { - invalidate_destructuring(p, m) - } - _ => {} - } - p.wrap(m, kind); -} - -fn invalidate_destructuring(p: &mut Parser, m: Marker) { - let mut collection_kind = Option::None; - for child in p.post_process(m) { - match child.kind() { - SyntaxKind::Named | SyntaxKind::Keyed => match collection_kind { - Some(SyntaxKind::Array) => child.convert_to_error(eco_format!( - "expected expression, found {}", - child.kind().name() - )), - _ => collection_kind = Some(SyntaxKind::Dict), - }, - SyntaxKind::LeftParen | SyntaxKind::RightParen | SyntaxKind::Comma => {} - kind => match collection_kind { - Some(SyntaxKind::Dict) => child.convert_to_error(eco_format!( - "expected named or keyed pair, found {}", - kind.name() - )), - _ => collection_kind = Some(SyntaxKind::Array), - }, - } - } -} - -fn collection(p: &mut Parser, keyed: bool) -> SyntaxKind { - p.enter_newline_mode(NewlineMode::Continue); - - let m = p.marker(); - p.assert(SyntaxKind::LeftParen); - - let mut count = 0; - let mut parenthesized = true; - let mut kind = None; - if keyed && p.eat_if(SyntaxKind::Colon) { - kind = Some(SyntaxKind::Dict); - parenthesized = false; - } - - while !p.current().is_terminator() { - let prev = p.prev_end(); - match item(p, keyed) { - SyntaxKind::Spread => parenthesized = false, - SyntaxKind::Named | SyntaxKind::Keyed => { - match kind { - Some(SyntaxKind::Array) => kind = Some(SyntaxKind::Destructuring), - _ => kind = Some(SyntaxKind::Dict), - } - parenthesized = false; - } - SyntaxKind::Int => match kind { - Some(SyntaxKind::Array) | None => kind = Some(SyntaxKind::Array), - Some(_) => kind = Some(SyntaxKind::Destructuring), - }, - _ if kind.is_none() => kind = Some(SyntaxKind::Array), - _ => {} - } - - if !p.progress(prev) { - p.unexpected(); - continue; - } - - count += 1; - - if p.current().is_terminator() { - break; - } - - if p.expect(SyntaxKind::Comma) { - parenthesized = false; - } - } - - p.expect_closing_delimiter(m, SyntaxKind::RightParen); - p.exit_newline_mode(); - - if parenthesized && count == 1 { - SyntaxKind::Parenthesized - } else { - kind.unwrap_or(SyntaxKind::Array) - } -} - -fn item(p: &mut Parser, keyed: bool) -> SyntaxKind { - let m = p.marker(); - - if p.eat_if(SyntaxKind::Dots) { - if p.at(SyntaxKind::Comma) || p.at(SyntaxKind::RightParen) { - p.wrap(m, SyntaxKind::Spread); - return SyntaxKind::Spread; - } - - code_expr(p); - p.wrap(m, SyntaxKind::Spread); - return SyntaxKind::Spread; - } - - if p.at(SyntaxKind::Underscore) { - // This is a temporary workaround to fix `v.map(_ => {})`. - let mut lexer = p.lexer.clone(); - let next = - std::iter::from_fn(|| Some(lexer.next())).find(|kind| !kind.is_trivia()); - if next != Some(SyntaxKind::Arrow) { - p.eat(); - return SyntaxKind::Underscore; - } - } - - code_expr_or_pattern(p); - - if !p.eat_if(SyntaxKind::Colon) { - return SyntaxKind::Int; - } - - if !p.eat_if(SyntaxKind::Underscore) { - code_expr(p); - } - - let kind = match p.node(m).map(SyntaxNode::kind) { - Some(SyntaxKind::Ident) => SyntaxKind::Named, - Some(_) if keyed => SyntaxKind::Keyed, - _ => { - for child in p.post_process(m) { - if child.kind() == SyntaxKind::Colon { - break; - } - - let expected = if keyed { "expression" } else { "identifier" }; - let message = eco_format!( - "expected {expected}, found {found}", - found = child.kind().name(), - ); - child.convert_to_error(message); - } - SyntaxKind::Named - } - }; - - p.wrap(m, kind); - kind -} - -/// Parses a function call's argument list: `(12pt, y)`. -fn args(p: &mut Parser) { - if !p.at(SyntaxKind::LeftParen) && !p.at(SyntaxKind::LeftBracket) { - p.expected("argument list"); - } - - let m = p.marker(); - if p.at(SyntaxKind::LeftParen) { - collection(p, false); - validate_args_at(p, m); - } - - while p.directly_at(SyntaxKind::LeftBracket) { - content_block(p); - } - - p.wrap(m, SyntaxKind::Args); -} - -enum PatternKind { - Ident, - Other, -} - -/// Parses a pattern that can be assigned to. -fn pattern(p: &mut Parser) -> PatternKind { - let m = p.marker(); - if p.at(SyntaxKind::LeftParen) { - let kind = collection(p, false); - validate_pattern_at(p, m, true); - - if kind != SyntaxKind::Parenthesized { - p.wrap(m, SyntaxKind::Destructuring); - } - PatternKind::Other - } else if p.eat_if(SyntaxKind::Underscore) { - PatternKind::Other - } else { - p.expect(SyntaxKind::Ident); - PatternKind::Ident - } -} - /// Parses a let binding: `let x = 1`. fn let_binding(p: &mut Parser) { let m = p.marker(); @@ -1052,17 +828,15 @@ fn let_binding(p: &mut Parser) { let m2 = p.marker(); let mut closure = false; let mut other = false; - match pattern(p) { - PatternKind::Ident => { - if p.directly_at(SyntaxKind::LeftParen) { - let m3 = p.marker(); - collection(p, false); - validate_params_at(p, m3); - p.wrap(m3, SyntaxKind::Params); - closure = true; - } + + if p.eat_if(SyntaxKind::Ident) { + if p.directly_at(SyntaxKind::LeftParen) { + params(p); + closure = true; } - PatternKind::Other => other = true, + } else { + pattern(p, false, &mut HashSet::new(), None); + other = true; } let f = if closure || other { Parser::expect } else { Parser::eat_if }; @@ -1144,17 +918,21 @@ fn while_loop(p: &mut Parser) { fn for_loop(p: &mut Parser) { let m = p.marker(); p.assert(SyntaxKind::For); - pattern(p); - if p.at(SyntaxKind::Comma) { - p.expected("keyword `in`"); - p.hint("did you mean to use a destructuring pattern?"); - if !p.eat_if(SyntaxKind::Ident) { - p.eat_if(SyntaxKind::Underscore); + + let mut seen = HashSet::new(); + pattern(p, false, &mut seen, None); + + let m2 = p.marker(); + if p.eat_if(SyntaxKind::Comma) { + let node = &mut p[m2]; + node.unexpected(); + node.hint("destructuring patterns must be wrapped in parentheses"); + if p.at_set(set::PATTERN) { + pattern(p, false, &mut seen, None); } - p.eat_if(SyntaxKind::In); - } else { - p.expect(SyntaxKind::In); } + + p.expect(SyntaxKind::In); code_expr(p); block(p); p.wrap(m, SyntaxKind::ForLoop); @@ -1192,10 +970,9 @@ fn import_items(p: &mut Parser) { p.wrap(item_marker, SyntaxKind::RenamedImportItem); } - if p.current().is_terminator() { - break; + if !p.current().is_terminator() { + p.expect(SyntaxKind::Comma); } - p.expect(SyntaxKind::Comma); } p.wrap(m, SyntaxKind::ImportItems); } @@ -1232,247 +1009,426 @@ fn return_stmt(p: &mut Parser) { p.wrap(m, SyntaxKind::FuncReturn); } -fn validate_parenthesized_at(p: &mut Parser, m: Marker) { - for child in p.post_process(m) { - let kind = child.kind(); - match kind { - SyntaxKind::Array => validate_array(child.children_mut().iter_mut()), - SyntaxKind::Dict => validate_dict(child.children_mut().iter_mut()), - SyntaxKind::Underscore => { - child.convert_to_error(eco_format!( - "expected expression, found {}", - kind.name() - )); - } - _ => {} +/// An expression that starts with a parenthesis. +fn expr_with_paren(p: &mut Parser) { + // If we've seen this position before and have a memoized result, just use + // it. See below for more explanation about this memoization. + let start = p.current_start(); + if let Some((range, end_point)) = p.memo.get(&start) { + p.nodes.extend(p.memo_arena[range.clone()].iter().cloned()); + p.restore(end_point.clone()); + return; + } + + let m = p.marker(); + let checkpoint = p.checkpoint(); + + // When we reach a '(', we can't be sure what it is. First, we attempt to + // parse as a simple parenthesized expression, array, or dictionary as + // these are the most likely things. We can handle all of those in a single + // pass. + let kind = parenthesized_or_array_or_dict(p); + + // If, however, '=>' or '=' follows, we must backtrack and reparse as either + // a parameter list or a destructuring. To be able to do that, we created a + // parser checkpoint before our speculative parse, which we can restore. + // + // However, naive backtracking has a fatal flaw: It can lead to exponential + // parsing time if we are constantly getting things wrong in a nested + // scenario. The particular failure case for parameter parsing is the + // following: `(x: (x: (x) => y) => y) => y` + // + // Such a structure will reparse over and over again recursively, leading to + // a running time of O(2^n) for nesting depth n. To prevent this, we perform + // a simple trick: When we have done the mistake of picking the wrong path + // once and have subsequently parsed correctly, we save the result of that + // correct parsing in the `p.memo` map. When we reach the same position + // again, we can then just restore this result. In this way, no + // parenthesized expression is parsed more than twice, leading to a worst + // case running time of O(2n). + if p.at(SyntaxKind::Arrow) { + p.restore(checkpoint); + params(p); + p.assert(SyntaxKind::Arrow); + code_expr(p); + p.wrap(m, SyntaxKind::Closure); + } else if p.at(SyntaxKind::Eq) && kind != SyntaxKind::Parenthesized { + p.restore(checkpoint); + destructuring_or_parenthesized(p, true, &mut HashSet::new()); + p.assert(SyntaxKind::Eq); + code_expr(p); + p.wrap(m, SyntaxKind::DestructAssignment); + } else { + return; + } + + // Memoize result if we backtracked. + let offset = p.memo_arena.len(); + p.memo_arena.extend(p.nodes[m.0..].iter().cloned()); + p.memo.insert(start, (offset..p.memo_arena.len(), p.checkpoint())); +} + +/// Parses either +/// - a parenthesized expression: `(1 + 2)`, or +/// - an array: `(1, "hi", 12cm)`, or +/// - a dictionary: `(thickness: 3pt, pattern: dashed)`. +fn parenthesized_or_array_or_dict(p: &mut Parser) -> SyntaxKind { + let m = p.marker(); + p.enter_newline_mode(NewlineMode::Continue); + p.assert(SyntaxKind::LeftParen); + + let mut state = GroupState { + count: 0, + maybe_just_parens: true, + kind: None, + seen: HashSet::new(), + }; + + if p.eat_if(SyntaxKind::Colon) { + state.kind = Some(SyntaxKind::Dict); + state.maybe_just_parens = false; + } + + while !p.current().is_terminator() { + if !p.at_set(set::ARRAY_OR_DICT_ITEM) { + p.unexpected(); + continue; + } + + array_or_dict_item(p, &mut state); + state.count += 1; + + if !p.current().is_terminator() && p.expect(SyntaxKind::Comma) { + state.maybe_just_parens = false; } } + + p.expect_closing_delimiter(m, SyntaxKind::RightParen); + p.exit_newline_mode(); + + let kind = if state.maybe_just_parens && state.count == 1 { + SyntaxKind::Parenthesized + } else { + state.kind.unwrap_or(SyntaxKind::Array) + }; + + p.wrap(m, kind); + kind } -fn validate_array_at(p: &mut Parser, m: Marker) { - validate_array(p.post_process(m)) +/// State for array/dictionary parsing. +struct GroupState { + count: usize, + maybe_just_parens: bool, + kind: Option<SyntaxKind>, + seen: HashSet<EcoString>, } -fn validate_array<'a>(children: impl Iterator<Item = &'a mut SyntaxNode>) { - for child in children { - let kind = child.kind(); - match kind { - SyntaxKind::Array => validate_array(child.children_mut().iter_mut()), - SyntaxKind::Dict => validate_dict(child.children_mut().iter_mut()), - SyntaxKind::Named | SyntaxKind::Keyed | SyntaxKind::Underscore => { - child.convert_to_error(eco_format!( - "expected expression, found {}", - kind.name() - )); +/// Parses a single item in an array or dictionary. +fn array_or_dict_item(p: &mut Parser, state: &mut GroupState) { + let m = p.marker(); + + if p.eat_if(SyntaxKind::Dots) { + // Parses a spreaded item: `..item`. + code_expr(p); + p.wrap(m, SyntaxKind::Spread); + state.maybe_just_parens = false; + return; + } + + code_expr(p); + + if p.eat_if(SyntaxKind::Colon) { + // Parses a named/keyed pair: `name: item` or `"key": item`. + code_expr(p); + + let node = &mut p[m]; + let pair_kind = match node.kind() { + SyntaxKind::Ident => SyntaxKind::Named, + _ => SyntaxKind::Keyed, + }; + + if let Some(key) = match node.cast::<ast::Expr>() { + Some(ast::Expr::Ident(ident)) => Some(ident.get().clone()), + Some(ast::Expr::Str(s)) => Some(s.get()), + _ => None, + } { + if !state.seen.insert(key.clone()) { + node.convert_to_error(eco_format!("duplicate key: {key}")); } - _ => {} + } + + p.wrap(m, pair_kind); + state.maybe_just_parens = false; + + if state.kind == Some(SyntaxKind::Array) { + p[m].expected("expression"); + } else { + state.kind = Some(SyntaxKind::Dict); + } + } else { + // Parses a positional item. + if state.kind == Some(SyntaxKind::Dict) { + p[m].expected("named or keyed pair"); + } else { + state.kind = Some(SyntaxKind::Array) } } } -fn validate_dict_at(p: &mut Parser, m: Marker) { - validate_dict(p.post_process(m)) -} +/// Parses a function call's argument list: `(12pt, y)`. +fn args(p: &mut Parser) { + if !p.at(SyntaxKind::LeftParen) && !p.at(SyntaxKind::LeftBracket) { + p.expected("argument list"); + } -fn validate_dict<'a>(children: impl Iterator<Item = &'a mut SyntaxNode>) { - let mut used = HashSet::new(); - for child in children { - match child.kind() { - SyntaxKind::Named | SyntaxKind::Keyed => { - let Some(first) = child.children_mut().first_mut() else { continue }; - let key = if let Some(str) = first.cast::<ast::Str>() { - str.get() - } else if let Some(ident) = first.cast::<ast::Ident>() { - ident.get().clone() - } else { - continue; - }; + let m = p.marker(); + if p.at(SyntaxKind::LeftParen) { + let m2 = p.marker(); + p.enter_newline_mode(NewlineMode::Continue); + p.assert(SyntaxKind::LeftParen); - if !used.insert(key.clone()) { - first.convert_to_error(eco_format!("duplicate key: {key}")); - child.make_erroneous(); - } + let mut seen = HashSet::new(); + while !p.current().is_terminator() { + if !p.at_set(set::ARG) { + p.unexpected(); + continue; } - SyntaxKind::Spread => {} - SyntaxKind::LeftParen - | SyntaxKind::RightParen - | SyntaxKind::Comma - | SyntaxKind::Colon - | SyntaxKind::Space => {} - kind => { - child.convert_to_error(eco_format!( - "expected named or keyed pair, found {}", - kind.name() - )); + + arg(p, &mut seen); + + if !p.current().is_terminator() { + p.expect(SyntaxKind::Comma); } } + + p.expect_closing_delimiter(m2, SyntaxKind::RightParen); + p.exit_newline_mode(); } + + while p.directly_at(SyntaxKind::LeftBracket) { + content_block(p); + } + + p.wrap(m, SyntaxKind::Args); } -fn validate_params_at(p: &mut Parser, m: Marker) { - let mut used_spread = false; - let mut used = HashSet::new(); - for child in p.post_process(m) { - match child.kind() { - SyntaxKind::Ident => { - if !used.insert(child.text().clone()) { - child.convert_to_error(eco_format!( - "duplicate parameter: {}", - child.text() - )); - } - } - SyntaxKind::Named => { - let Some(within) = child.children_mut().first_mut() else { return }; - if !used.insert(within.text().clone()) { - within.convert_to_error(eco_format!( - "duplicate parameter: {}", - within.text() - )); - child.make_erroneous(); - } - } - SyntaxKind::Spread => { - let Some(within) = child.children_mut().last_mut() else { continue }; - if used_spread { - child.convert_to_error("only one argument sink is allowed"); - continue; - } - used_spread = true; - if within.kind() == SyntaxKind::Dots { - continue; - } else if within.kind() != SyntaxKind::Ident { - within.convert_to_error(eco_format!( - "expected identifier, found {}", - within.kind().name(), - )); - child.make_erroneous(); - continue; - } - if !used.insert(within.text().clone()) { - within.convert_to_error(eco_format!( - "duplicate parameter: {}", - within.text() - )); - child.make_erroneous(); - } - } - SyntaxKind::Array | SyntaxKind::Dict | SyntaxKind::Destructuring => { - validate_pattern(child.children_mut().iter_mut(), &mut used, false); - child.convert_to_kind(SyntaxKind::Destructuring); - } - SyntaxKind::LeftParen - | SyntaxKind::RightParen - | SyntaxKind::Comma - | SyntaxKind::Underscore => {} - kind => { - child.convert_to_error(eco_format!( - "expected identifier, named pair or argument sink, found {}", - kind.name() - )); - } +/// Parses a single argument in an argument list. +fn arg<'s>(p: &mut Parser<'s>, seen: &mut HashSet<&'s str>) { + let m = p.marker(); + if p.eat_if(SyntaxKind::Dots) { + // Parses a spreaded argument: `..args`. + code_expr(p); + p.wrap(m, SyntaxKind::Spread); + } else if p.at(SyntaxKind::Ident) && p.peek() == SyntaxKind::Colon { + // Parses a named argument: `thickness: 12pt`. + let text = p.current_text(); + p.assert(SyntaxKind::Ident); + if !seen.insert(text) { + p[m].convert_to_error(eco_format!("duplicate argument: {text}")); + } + p.assert(SyntaxKind::Colon); + code_expr(p); + p.wrap(m, SyntaxKind::Named); + } else { + // Parses a normal positional argument. + let at_expr = p.at_set(set::CODE_EXPR); + code_expr(p); + + // Recover from bad named pair. + if at_expr && p.eat_if(SyntaxKind::Colon) { + p[m].expected("identifier"); + code_expr(p); } } } -fn validate_args_at(p: &mut Parser, m: Marker) { - let mut used = HashSet::new(); - for child in p.post_process(m) { - if child.kind() == SyntaxKind::Named { - let Some(within) = child.children_mut().first_mut() else { return }; - if !used.insert(within.text().clone()) { - within.convert_to_error(eco_format!( - "duplicate argument: {}", - within.text() - )); - child.make_erroneous(); - } - } else if child.kind() == SyntaxKind::Underscore { - child.convert_to_error("unexpected underscore"); +/// Parses a closure's parameters: `(x, y)`. +fn params(p: &mut Parser) { + let m = p.marker(); + p.enter_newline_mode(NewlineMode::Continue); + p.assert(SyntaxKind::LeftParen); + + let mut seen = HashSet::new(); + let mut sink = false; + + while !p.current().is_terminator() { + if !p.at_set(set::PARAM) { + p.unexpected(); + continue; + } + + param(p, &mut seen, &mut sink); + + if !p.current().is_terminator() { + p.expect(SyntaxKind::Comma); } } + + p.expect_closing_delimiter(m, SyntaxKind::RightParen); + p.exit_newline_mode(); + p.wrap(m, SyntaxKind::Params); } -fn validate_pattern_at(p: &mut Parser, m: Marker, forbid_expressions: bool) { - let mut used = HashSet::new(); - validate_pattern(p.post_process(m), &mut used, forbid_expressions); +/// Parses a single parameter in a parameter list. +fn param<'s>(p: &mut Parser<'s>, seen: &mut HashSet<&'s str>, sink: &mut bool) { + let m = p.marker(); + if p.eat_if(SyntaxKind::Dots) { + // Parses argument sink: `..sink`. + if p.at_set(set::PATTERN_LEAF) { + pattern_leaf(p, false, seen, Some("parameter")); + } + p.wrap(m, SyntaxKind::Spread); + if mem::replace(sink, true) { + p[m].convert_to_error("only one argument sink is allowed"); + } + } else if p.at(SyntaxKind::Ident) && p.peek() == SyntaxKind::Colon { + // Parses named parameter: `thickness: 3pt`. + // We still use `pattern` even though we know it's just an identifier + // because it gives us duplicate parameter detection for free. + pattern(p, false, seen, Some("parameter")); + p.assert(SyntaxKind::Colon); + code_expr(p); + p.wrap(m, SyntaxKind::Named); + } else { + // Parses a normal position parameter. + let at_pat = p.at_set(set::PATTERN); + pattern(p, false, seen, Some("parameter")); + + // Recover from bad named pair. + if at_pat && p.eat_if(SyntaxKind::Colon) { + p[m].expected("identifier"); + code_expr(p); + } + } } -fn validate_pattern<'a>( - children: impl Iterator<Item = &'a mut SyntaxNode>, - used: &mut HashSet<EcoString>, - forbid_expressions: bool, +/// Parses a binding or reassignment pattern. +fn pattern<'s>( + p: &mut Parser<'s>, + reassignment: bool, + seen: &mut HashSet<&'s str>, + dupe: Option<&'s str>, ) { - let mut used_spread = false; - for child in children { - match child.kind() { - SyntaxKind::Ident => { - if !used.insert(child.text().clone()) { - child.convert_to_error( - "at most one binding per identifier is allowed", - ); - } - } - SyntaxKind::Spread => { - let Some(within) = child.children_mut().last_mut() else { continue }; - if used_spread { - child.convert_to_error("at most one destructuring sink is allowed"); - continue; - } - used_spread = true; - - if within.kind() == SyntaxKind::Dots { - continue; - } else if forbid_expressions && within.kind() != SyntaxKind::Ident { - within.convert_to_error(eco_format!( - "expected identifier, found {}", - within.kind().name(), - )); - child.make_erroneous(); - continue; - } + match p.current() { + SyntaxKind::Underscore => p.eat(), + SyntaxKind::LeftParen => destructuring_or_parenthesized(p, reassignment, seen), + _ => pattern_leaf(p, reassignment, seen, dupe), + } +} - if !used.insert(within.text().clone()) { - within.convert_to_error( - "at most one binding per identifier is allowed", - ); - child.make_erroneous(); - } - } - SyntaxKind::Named => { - let Some(within) = child.children_mut().first_mut() else { return }; - if !used.insert(within.text().clone()) { - within.convert_to_error( - "at most one binding per identifier is allowed", - ); - child.make_erroneous(); - } +/// Parses a destructuring pattern or just a parenthesized pattern. +fn destructuring_or_parenthesized<'s>( + p: &mut Parser<'s>, + reassignment: bool, + seen: &mut HashSet<&'s str>, +) { + let mut sink = false; + let mut count = 0; + let mut maybe_just_parens = true; - if forbid_expressions { - let Some(within) = child.children_mut().last_mut() else { return }; - if within.kind() != SyntaxKind::Ident - && within.kind() != SyntaxKind::Underscore - { - within.convert_to_error(eco_format!( - "expected identifier, found {}", - within.kind().name(), - )); - child.make_erroneous(); - } - } - } - SyntaxKind::LeftParen - | SyntaxKind::RightParen - | SyntaxKind::Comma - | SyntaxKind::Underscore => {} - kind => { - if forbid_expressions { - child.convert_to_error(eco_format!( - "expected identifier or destructuring sink, found {}", - kind.name() - )); - } + let m = p.marker(); + p.enter_newline_mode(NewlineMode::Continue); + p.assert(SyntaxKind::LeftParen); + + while !p.current().is_terminator() { + if !p.at_set(set::DESTRUCTURING_ITEM) { + p.unexpected(); + continue; + } + + destructuring_item(p, reassignment, seen, &mut maybe_just_parens, &mut sink); + count += 1; + + if !p.current().is_terminator() && p.expect(SyntaxKind::Comma) { + maybe_just_parens = false; + } + } + + p.expect_closing_delimiter(m, SyntaxKind::RightParen); + p.exit_newline_mode(); + + if maybe_just_parens && count == 1 && !sink { + p.wrap(m, SyntaxKind::Parenthesized); + } else { + p.wrap(m, SyntaxKind::Destructuring); + } +} + +/// Parses an item in a destructuring pattern. +fn destructuring_item<'s>( + p: &mut Parser<'s>, + reassignment: bool, + seen: &mut HashSet<&'s str>, + maybe_just_parens: &mut bool, + sink: &mut bool, +) { + let m = p.marker(); + if p.eat_if(SyntaxKind::Dots) { + // Parse destructuring sink: `..rest`. + if p.at_set(set::PATTERN_LEAF) { + pattern_leaf(p, reassignment, seen, None); + } + p.wrap(m, SyntaxKind::Spread); + if mem::replace(sink, true) { + p[m].convert_to_error("only one destructuring sink is allowed"); + } + } else if p.at(SyntaxKind::Ident) && p.peek() == SyntaxKind::Colon { + // Parse named destructuring item. + p.assert(SyntaxKind::Ident); + p.assert(SyntaxKind::Colon); + pattern(p, reassignment, seen, None); + p.wrap(m, SyntaxKind::Named); + *maybe_just_parens = false; + } else { + // Parse positional destructuring item. + let at_pat = p.at_set(set::PATTERN); + pattern(p, reassignment, seen, None); + + // Recover from bad named destructuring. + if at_pat && p.eat_if(SyntaxKind::Colon) { + p[m].expected("identifier"); + pattern(p, reassignment, seen, None); + } + } +} + +/// Parses a leaf in a pattern - either an identifier or an expression +/// depending on whether it's a binding or reassignment pattern. +fn pattern_leaf<'s>( + p: &mut Parser<'s>, + reassignment: bool, + seen: &mut HashSet<&'s str>, + dupe: Option<&'s str>, +) { + if !p.at_set(set::PATTERN_LEAF) { + if p.current().is_keyword() { + p.eat_and_get().expected("pattern"); + } else { + p.expected("pattern"); + } + return; + } + + let m = p.marker(); + let text = p.current_text(); + + // We parse an atomic expression even though we only want an identifier for + // better error recovery. We can mark the whole expression as unexpected + // instead of going through its pieces one by one. + code_expr_prec(p, true, 0); + + if !reassignment { + let node = &mut p[m]; + if node.kind() == SyntaxKind::Ident { + if !seen.insert(text) { + node.convert_to_error(eco_format!( + "duplicate {}: {text}", + dupe.unwrap_or("binding"), + )); } + } else { + node.expected("pattern"); } } } @@ -1484,13 +1440,16 @@ struct Parser<'s> { prev_end: usize, current_start: usize, current: SyntaxKind, - modes: Vec<LexMode>, + balanced: bool, nodes: Vec<SyntaxNode>, + modes: Vec<LexMode>, newline_modes: Vec<NewlineMode>, - balanced: bool, + memo: HashMap<usize, (Range<usize>, Checkpoint<'s>)>, + memo_arena: Vec<SyntaxNode>, } /// How to proceed with parsing when seeing a newline. +#[derive(Clone)] enum NewlineMode { /// Stop always. Stop, @@ -1503,6 +1462,15 @@ enum NewlineMode { #[derive(Debug, Copy, Clone, Eq, PartialEq)] struct Marker(usize); +#[derive(Clone)] +struct Checkpoint<'s> { + lexer: Lexer<'s>, + prev_end: usize, + current_start: usize, + current: SyntaxKind, + nodes: usize, +} + impl<'s> Parser<'s> { fn new(text: &'s str, offset: usize, mode: LexMode) -> Self { let mut lexer = Lexer::new(text, mode); @@ -1514,10 +1482,12 @@ impl<'s> Parser<'s> { prev_end: offset, current_start: offset, current, - modes: vec![], + balanced: true, nodes: vec![], + modes: vec![], newline_modes: vec![], - balanced: true, + memo: HashMap::new(), + memo_arena: vec![], } } @@ -1553,10 +1523,8 @@ impl<'s> Parser<'s> { set.contains(self.current) } - #[track_caller] - fn assert(&mut self, kind: SyntaxKind) { - assert_eq!(self.current, kind); - self.eat(); + fn peek(&self) -> SyntaxKind { + self.lexer.clone().next() } fn eof(&self) -> bool { @@ -1567,6 +1535,20 @@ impl<'s> Parser<'s> { self.current == kind && self.prev_end == self.current_start } + fn eat(&mut self) { + self.save(); + self.lex(); + self.skip(); + } + + fn eat_and_get(&mut self) -> &mut SyntaxNode { + let offset = self.nodes.len(); + self.save(); + self.lex(); + self.skip(); + &mut self.nodes[offset] + } + /// Eats if at `kind`. /// /// Note: In math and code mode, this will ignore trivia in front of the @@ -1588,6 +1570,12 @@ impl<'s> Parser<'s> { at } + #[track_caller] + fn assert(&mut self, kind: SyntaxKind) { + assert_eq!(self.current, kind); + self.eat(); + } + fn convert(&mut self, kind: SyntaxKind) { self.current = kind; self.eat(); @@ -1605,12 +1593,21 @@ impl<'s> Parser<'s> { Marker(self.nodes.len()) } - fn node(&self, m: Marker) -> Option<&SyntaxNode> { - self.nodes.get(m.0) + /// Get a marker after the last non-trivia node. + fn before_trivia(&self) -> Marker { + let mut i = self.nodes.len(); + if self.lexer.mode() != LexMode::Markup && self.prev_end != self.current_start { + while i > 0 && self.nodes[i - 1].kind().is_trivia() { + i -= 1; + } + } + Marker(i) } - fn node_mut(&mut self, m: Marker) -> Option<&mut SyntaxNode> { - self.nodes.get_mut(m.0) + /// Whether the last non-trivia node is an error. + fn after_error(&mut self) -> bool { + let m = self.before_trivia(); + m.0 > 0 && self.nodes[m.0 - 1].kind().is_error() } fn post_process(&mut self, m: Marker) -> impl Iterator<Item = &mut SyntaxNode> { @@ -1635,10 +1632,6 @@ impl<'s> Parser<'s> { self.nodes.insert(from, SyntaxNode::inner(kind, children)); } - fn progress(&self, offset: usize) -> bool { - offset < self.prev_end - } - fn enter(&mut self, mode: LexMode) { self.modes.push(self.lexer.mode()); self.lexer.set_mode(mode); @@ -1667,10 +1660,22 @@ impl<'s> Parser<'s> { self.skip(); } - fn eat(&mut self) { - self.save(); - self.lex(); - self.skip(); + fn checkpoint(&self) -> Checkpoint<'s> { + Checkpoint { + lexer: self.lexer.clone(), + prev_end: self.prev_end, + current_start: self.current_start, + current: self.current, + nodes: self.nodes.len(), + } + } + + fn restore(&mut self, checkpoint: Checkpoint<'s>) { + self.lexer = checkpoint.lexer; + self.prev_end = checkpoint.prev_end; + self.current_start = checkpoint.current_start; + self.current = checkpoint.current; + self.nodes.truncate(checkpoint.nodes); } fn skip(&mut self) { @@ -1734,14 +1739,8 @@ impl<'s> Parser<'s> { if at { self.eat(); } else if kind == SyntaxKind::Ident && self.current.is_keyword() { - let found_text = self.current_text(); - let found = self.current.name(); - self.expected_found(kind.name(), found); - self.hint(eco_format!( - "{} is not allowed as an identifier; try `{}_` instead", - found, - found_text - )); + self.trim_errors(); + self.eat_and_get().expected(kind.name()); } else { self.balanced &= !kind.is_grouping(); self.expected(kind.name()); @@ -1749,6 +1748,14 @@ impl<'s> Parser<'s> { at } + /// Consume the given closing delimiter or produce an error for the matching + /// opening delimiter at `open`. + fn expect_closing_delimiter(&mut self, open: Marker, kind: SyntaxKind) { + if !self.eat_if(kind) { + self.nodes[open.0].convert_to_error("unclosed delimiter"); + } + } + /// Produce an error that the given `thing` was expected. fn expected(&mut self, thing: &str) { if !self.after_error() { @@ -1756,70 +1763,19 @@ impl<'s> Parser<'s> { } } - /// Produce an error that the given `thing` was expected but another - /// thing was `found` and consume the next token. - fn expected_found(&mut self, thing: &str, found: &str) { - self.trim_errors(); - self.convert_to_error(eco_format!("expected {thing}, found {found}")); - } - /// Produce an error that the given `thing` was expected at the position /// of the marker `m`. fn expected_at(&mut self, m: Marker, thing: &str) { - let message = eco_format!("expected {}", thing); - let error = SyntaxNode::error(message, ""); + let error = SyntaxNode::error(eco_format!("expected {thing}"), ""); self.nodes.insert(m.0, error); } - /// Produce an error for the unclosed delimiter `kind` at the position - /// `open`. - fn expect_closing_delimiter(&mut self, open: Marker, kind: SyntaxKind) { - if !self.eat_if(kind) { - self.nodes[open.0].convert_to_error("unclosed delimiter"); - } - } - /// Consume the next token (if any) and produce an error stating that it was /// unexpected. fn unexpected(&mut self) { self.trim_errors(); - self.convert_to_error(eco_format!("unexpected {}", self.current.name())); - } - - /// Consume the next token and turn it into an error. - fn convert_to_error(&mut self, message: EcoString) { - let kind = self.current; - let offset = self.nodes.len(); - self.eat(); - self.balanced &= !kind.is_grouping(); - if !kind.is_error() { - self.nodes[offset].convert_to_error(message); - } - } - - /// Adds a hint to the last node, if the last node is an error. - fn hint(&mut self, hint: impl Into<EcoString>) { - let m = self.before_trivia(); - if m.0 > 0 { - self.nodes[m.0 - 1].hint(hint); - } - } - - /// Get a marker after the last non-trivia node. - fn before_trivia(&self) -> Marker { - let mut i = self.nodes.len(); - if self.lexer.mode() != LexMode::Markup && self.prev_end != self.current_start { - while i > 0 && self.nodes[i - 1].kind().is_trivia() { - i -= 1; - } - } - Marker(i) - } - - /// Whether the last non-trivia node is an error. - fn after_error(&mut self) -> bool { - let m = self.before_trivia(); - m.0 > 0 && self.nodes[m.0 - 1].kind().is_error() + self.balanced &= !self.current.is_grouping(); + self.eat_and_get().unexpected(); } /// Remove trailing errors with zero length. @@ -1835,3 +1791,17 @@ impl<'s> Parser<'s> { self.nodes.drain(start..end); } } + +impl Index<Marker> for Parser<'_> { + type Output = SyntaxNode; + + fn index(&self, m: Marker) -> &Self::Output { + &self.nodes[m.0] + } +} + +impl IndexMut<Marker> for Parser<'_> { + fn index_mut(&mut self, m: Marker) -> &mut Self::Output { + &mut self.nodes[m.0] + } +} |
