diff options
| author | Max <me@mkor.je> | 2025-01-09 10:49:06 +0000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-01-09 10:49:06 +0000 |
| commit | be6629c7cbd00b06beab2b1477c4270859906cb2 (patch) | |
| tree | 66087a899b1f2c5d90d9e72145cfdb9d55486b5d /crates/typst-syntax/src | |
| parent | e2b37fef33a92a7086790e04fb133472413c0c0a (diff) | |
Better math argument parsing (#5008)
Diffstat (limited to 'crates/typst-syntax/src')
| -rw-r--r-- | crates/typst-syntax/src/lexer.rs | 44 | ||||
| -rw-r--r-- | crates/typst-syntax/src/parser.rs | 173 | ||||
| -rw-r--r-- | crates/typst-syntax/src/set.rs | 4 |
3 files changed, 144 insertions, 77 deletions
diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs index b0cb5c46..6b5d2816 100644 --- a/crates/typst-syntax/src/lexer.rs +++ b/crates/typst-syntax/src/lexer.rs @@ -616,6 +616,11 @@ impl Lexer<'_> { '~' if self.s.eat_if('>') => SyntaxKind::MathShorthand, '*' | '-' | '~' => SyntaxKind::MathShorthand, + '.' => SyntaxKind::Dot, + ',' => SyntaxKind::Comma, + ';' => SyntaxKind::Semicolon, + ')' => SyntaxKind::RightParen, + '#' => SyntaxKind::Hash, '_' => SyntaxKind::Underscore, '$' => SyntaxKind::Dollar, @@ -685,6 +690,45 @@ impl Lexer<'_> { } SyntaxKind::Text } + + /// Handle named arguments in math function call. + pub fn maybe_math_named_arg(&mut self, start: usize) -> Option<SyntaxNode> { + let cursor = self.s.cursor(); + self.s.jump(start); + if self.s.eat_if(is_id_start) { + self.s.eat_while(is_id_continue); + // Check that a colon directly follows the identifier, and not the + // `:=` or `::=` math shorthands. + if self.s.at(':') && !self.s.at(":=") && !self.s.at("::=") { + // Check that the identifier is not just `_`. + let node = if self.s.from(start) != "_" { + SyntaxNode::leaf(SyntaxKind::Ident, self.s.from(start)) + } else { + let msg = SyntaxError::new("expected identifier, found underscore"); + SyntaxNode::error(msg, self.s.from(start)) + }; + return Some(node); + } + } + self.s.jump(cursor); + None + } + + /// Handle spread arguments in math function call. + pub fn maybe_math_spread_arg(&mut self, start: usize) -> Option<SyntaxNode> { + let cursor = self.s.cursor(); + self.s.jump(start); + if self.s.eat_if("..") { + // Check that neither a space nor a dot follows the spread syntax. + // A dot would clash with the `...` math shorthand. + if !self.space_or_end() && !self.s.at('.') { + let node = SyntaxNode::leaf(SyntaxKind::Dots, self.s.from(start)); + return Some(node); + } + } + self.s.jump(cursor); + None + } } /// Code. diff --git a/crates/typst-syntax/src/parser.rs b/crates/typst-syntax/src/parser.rs index 6c1778c4..335b8f1a 100644 --- a/crates/typst-syntax/src/parser.rs +++ b/crates/typst-syntax/src/parser.rs @@ -217,16 +217,20 @@ fn math(p: &mut Parser, stop_set: SyntaxSet) { p.wrap(m, SyntaxKind::Math); } -/// Parses a sequence of math expressions. -fn math_exprs(p: &mut Parser, stop_set: SyntaxSet) { +/// Parses a sequence of math expressions. Returns the number of expressions +/// parsed. +fn math_exprs(p: &mut Parser, stop_set: SyntaxSet) -> usize { debug_assert!(stop_set.contains(SyntaxKind::End)); + let mut count = 0; while !p.at_set(stop_set) { if p.at_set(set::MATH_EXPR) { math_expr(p); + count += 1; } else { p.unexpected(); } } + count } /// Parses a single math expression: This includes math elements like @@ -254,6 +258,13 @@ fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) { } } + SyntaxKind::Dot + | SyntaxKind::Comma + | SyntaxKind::Semicolon + | SyntaxKind::RightParen => { + p.convert_and_eat(SyntaxKind::Text); + } + SyntaxKind::Text | SyntaxKind::MathShorthand => { continuable = matches!( math_class(p.current_text()), @@ -398,7 +409,13 @@ fn math_delimited(p: &mut Parser) { while !p.at_set(syntax_set!(Dollar, End)) { if math_class(p.current_text()) == Some(MathClass::Closing) { p.wrap(m2, SyntaxKind::Math); - p.eat(); + // We could be at the shorthand `|]`, which shouldn't be converted + // to a `Text` kind. + if p.at(SyntaxKind::RightParen) { + p.convert_and_eat(SyntaxKind::Text); + } else { + p.eat(); + } p.wrap(m, SyntaxKind::MathDelimited); return; } @@ -455,94 +472,90 @@ fn math_args(p: &mut Parser) { let m = p.marker(); p.convert_and_eat(SyntaxKind::LeftParen); - let mut namable = true; - let mut named = None; + let mut positional = true; let mut has_arrays = false; - let mut array = p.marker(); - let mut arg = p.marker(); - // The number of math expressions per argument. - let mut count = 0; - while !p.at_set(syntax_set!(Dollar, End)) { - if namable - && (p.at(SyntaxKind::MathIdent) || p.at(SyntaxKind::Text)) - && p.text[p.current_end()..].starts_with(':') - { - p.convert_and_eat(SyntaxKind::Ident); - p.convert_and_eat(SyntaxKind::Colon); - named = Some(arg); - arg = p.marker(); - array = p.marker(); - } - - match p.current_text() { - ")" => break, - ";" => { - maybe_wrap_in_math(p, arg, count, named); - p.wrap(array, SyntaxKind::Array); - p.convert_and_eat(SyntaxKind::Semicolon); - array = p.marker(); - arg = p.marker(); - count = 0; - namable = true; - named = None; - has_arrays = true; - continue; - } - "," => { - maybe_wrap_in_math(p, arg, count, named); - p.convert_and_eat(SyntaxKind::Comma); - arg = p.marker(); - count = 0; - namable = true; - if named.is_some() { - array = p.marker(); - named = None; + let mut maybe_array_start = p.marker(); + let mut seen = HashSet::new(); + while !p.at_set(syntax_set!(End, Dollar, RightParen)) { + positional = math_arg(p, &mut seen); + + match p.current() { + SyntaxKind::Comma => { + p.eat(); + if !positional { + maybe_array_start = p.marker(); } - continue; } - _ => {} - } - - if p.at_set(set::MATH_EXPR) { - math_expr(p); - count += 1; - } else { - p.unexpected(); - } - - namable = false; - } + SyntaxKind::Semicolon => { + if !positional { + maybe_array_start = p.marker(); + } - if arg != p.marker() { - maybe_wrap_in_math(p, arg, count, named); - if named.is_some() { - array = p.marker(); + // Parses an array: `a, b, c;`. + // The semicolon merges preceding arguments separated by commas + // into an array argument. + p.wrap(maybe_array_start, SyntaxKind::Array); + p.eat(); + maybe_array_start = p.marker(); + has_arrays = true; + } + SyntaxKind::End | SyntaxKind::Dollar | SyntaxKind::RightParen => {} + _ => p.expected("comma or semicolon"), } } - if has_arrays && array != p.marker() { - p.wrap(array, SyntaxKind::Array); - } - - if p.at(SyntaxKind::Text) && p.current_text() == ")" { - p.convert_and_eat(SyntaxKind::RightParen); - } else { - p.expected("closing paren"); - p.balanced = false; + // Check if we need to wrap the preceding arguments in an array. + if maybe_array_start != p.marker() && has_arrays && positional { + p.wrap(maybe_array_start, SyntaxKind::Array); } + p.expect_closing_delimiter(m, SyntaxKind::RightParen); p.wrap(m, SyntaxKind::Args); } -/// Wrap math function arguments to join adjacent math content or create an -/// empty 'Math' node for when we have 0 args. +/// Parses a single argument in a math argument list. /// -/// We don't wrap when `count == 1`, since wrapping would change the type of the -/// expression from potentially non-content to content. Ex: `$ func(#12pt) $` -/// would change the type from size to content if wrapped. -fn maybe_wrap_in_math(p: &mut Parser, arg: Marker, count: usize, named: Option<Marker>) { +/// Returns whether the parsed argument was positional or not. +fn math_arg<'s>(p: &mut Parser<'s>, seen: &mut HashSet<&'s str>) -> bool { + let m = p.marker(); + let start = p.current_start(); + + if p.at(SyntaxKind::Dot) { + // Parses a spread argument: `..args`. + if let Some(spread) = p.lexer.maybe_math_spread_arg(start) { + p.token.node = spread; + p.eat(); + math_expr(p); + p.wrap(m, SyntaxKind::Spread); + return true; + } + } + + let mut positional = true; + if p.at_set(syntax_set!(Text, MathIdent, Underscore)) { + // Parses a named argument: `thickness: #12pt`. + if let Some(named) = p.lexer.maybe_math_named_arg(start) { + p.token.node = named; + let text = p.current_text(); + p.eat(); + p.convert_and_eat(SyntaxKind::Colon); + if !seen.insert(text) { + p[m].convert_to_error(eco_format!("duplicate argument: {text}")); + } + positional = false; + } + } + + // Parses a normal positional argument. + let arg = p.marker(); + let count = math_exprs(p, syntax_set!(End, Dollar, Comma, Semicolon, RightParen)); if count == 0 { + // Named argument requires a value. + if !positional { + p.expected("expression"); + } + // Flush trivia so that the new empty Math node will be wrapped _inside_ // any `SyntaxKind::Array` elements created in `math_args`. // (And if we don't follow by wrapping in an array, it has no effect.) @@ -553,13 +566,19 @@ fn maybe_wrap_in_math(p: &mut Parser, arg: Marker, count: usize, named: Option<M p.flush_trivia(); } + // Wrap math function arguments to join adjacent math content or create an + // empty 'Math' node for when we have 0 args. We don't wrap when + // `count == 1`, since wrapping would change the type of the expression + // from potentially non-content to content. Ex: `$ func(#12pt) $` would + // change the type from size to content if wrapped. if count != 1 { p.wrap(arg, SyntaxKind::Math); } - if let Some(m) = named { + if !positional { p.wrap(m, SyntaxKind::Named); } + positional } /// Parses the contents of a code block. diff --git a/crates/typst-syntax/src/set.rs b/crates/typst-syntax/src/set.rs index 014aaf2f..9eb457b8 100644 --- a/crates/typst-syntax/src/set.rs +++ b/crates/typst-syntax/src/set.rs @@ -59,6 +59,10 @@ pub const MATH_EXPR: SyntaxSet = syntax_set!( Hash, MathIdent, FieldAccess, + Dot, + Comma, + Semicolon, + RightParen, Text, MathShorthand, Linebreak, |
