From 0b8b7d0f233f748a5c12d1b8e31f657803122eba Mon Sep 17 00:00:00 2001 From: Ian Wrzesinski Date: Sat, 20 Jul 2024 21:21:53 -0500 Subject: Just add MathText SyntaxKind --- crates/typst-syntax/src/ast.rs | 32 ++++++++++++++++++++++++++++++++ crates/typst-syntax/src/highlight.rs | 1 + crates/typst-syntax/src/kind.rs | 3 +++ crates/typst-syntax/src/lexer.rs | 9 ++++++++- crates/typst-syntax/src/parser.rs | 14 ++++++++------ crates/typst-syntax/src/set.rs | 1 + 6 files changed, 53 insertions(+), 7 deletions(-) (limited to 'crates/typst-syntax/src') diff --git a/crates/typst-syntax/src/ast.rs b/crates/typst-syntax/src/ast.rs index 19e12372..014e8392 100644 --- a/crates/typst-syntax/src/ast.rs +++ b/crates/typst-syntax/src/ast.rs @@ -123,6 +123,8 @@ pub enum Expr<'a> { Equation(Equation<'a>), /// The contents of a mathematical equation: `x^2 + 1`. Math(Math<'a>), + /// A lone text fragment in math: `x`, `25`, `3.1415`, `=`, `[`. + MathText(MathText<'a>), /// An identifier in math: `pi`. MathIdent(MathIdent<'a>), /// A shorthand for a unicode codepoint in math: `a <= b`. @@ -233,6 +235,7 @@ impl<'a> AstNode<'a> for Expr<'a> { SyntaxKind::TermItem => node.cast().map(Self::Term), SyntaxKind::Equation => node.cast().map(Self::Equation), SyntaxKind::Math => node.cast().map(Self::Math), + SyntaxKind::MathText => node.cast().map(Self::MathText), SyntaxKind::MathIdent => node.cast().map(Self::MathIdent), SyntaxKind::MathShorthand => node.cast().map(Self::MathShorthand), SyntaxKind::MathAlignPoint => node.cast().map(Self::MathAlignPoint), @@ -297,6 +300,7 @@ impl<'a> AstNode<'a> for Expr<'a> { Self::Term(v) => v.to_untyped(), Self::Equation(v) => v.to_untyped(), Self::Math(v) => v.to_untyped(), + Self::MathText(v) => v.to_untyped(), Self::MathIdent(v) => v.to_untyped(), Self::MathShorthand(v) => v.to_untyped(), Self::MathAlignPoint(v) => v.to_untyped(), @@ -706,6 +710,34 @@ impl<'a> Math<'a> { } } +node! { + /// A lone text fragment in math: `x`, `25`, `3.1415`, `=`, `[`. + MathText +} + +/// The underlying text kind. +pub enum MathTextKind<'a> { + Character(char), + Number(&'a EcoString), +} + +impl<'a> MathText<'a> { + /// Return the underlying text. + pub fn get(self) -> MathTextKind<'a> { + let text = self.0.text(); + let mut chars = text.chars(); + let c = chars.next().unwrap(); + if c.is_numeric() { + // Numbers are potentially grouped as multiple characters. This is + // done in `Lexer::math_text()`. + MathTextKind::Number(text) + } else { + assert!(chars.next().is_none()); + MathTextKind::Character(c) + } + } +} + node! { /// An identifier in math: `pi`. MathIdent diff --git a/crates/typst-syntax/src/highlight.rs b/crates/typst-syntax/src/highlight.rs index c59a0338..cd815694 100644 --- a/crates/typst-syntax/src/highlight.rs +++ b/crates/typst-syntax/src/highlight.rs @@ -171,6 +171,7 @@ pub fn highlight(node: &LinkedNode) -> Option { SyntaxKind::Equation => None, SyntaxKind::Math => None, + SyntaxKind::MathText => None, SyntaxKind::MathIdent => highlight_ident(node), SyntaxKind::MathShorthand => Some(Tag::Escape), SyntaxKind::MathAlignPoint => Some(Tag::MathOperator), diff --git a/crates/typst-syntax/src/kind.rs b/crates/typst-syntax/src/kind.rs index b4a97a3e..c24b47fe 100644 --- a/crates/typst-syntax/src/kind.rs +++ b/crates/typst-syntax/src/kind.rs @@ -75,6 +75,8 @@ pub enum SyntaxKind { /// The contents of a mathematical equation: `x^2 + 1`. Math, + /// A lone text fragment in math: `x`, `25`, `3.1415`, `=`, `|`, `[`. + MathText, /// An identifier in math: `pi`. MathIdent, /// A shorthand for a unicode codepoint in math: `a <= b`. @@ -408,6 +410,7 @@ impl SyntaxKind { Self::TermMarker => "term marker", Self::Equation => "equation", Self::Math => "math", + Self::MathText => "math text", Self::MathIdent => "math identifier", Self::MathShorthand => "math shorthand", Self::MathAlignPoint => "math alignment point", diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs index 17401044..b8f2bf25 100644 --- a/crates/typst-syntax/src/lexer.rs +++ b/crates/typst-syntax/src/lexer.rs @@ -685,6 +685,7 @@ impl Lexer<'_> { if s.eat_if('.') && !s.eat_while(char::is_numeric).is_empty() { self.s = s; } + SyntaxKind::MathText } else { let len = self .s @@ -693,8 +694,14 @@ impl Lexer<'_> { .next() .map_or(0, str::len); self.s.jump(start + len); + if len > c.len_utf8() { + // Grapheme clusters are treated as normal text and stay grouped + // This may need to change in the future. + SyntaxKind::Text + } else { + SyntaxKind::MathText + } } - SyntaxKind::Text } /// Handle named arguments in math function call. diff --git a/crates/typst-syntax/src/parser.rs b/crates/typst-syntax/src/parser.rs index 5de71caf..55d5550b 100644 --- a/crates/typst-syntax/src/parser.rs +++ b/crates/typst-syntax/src/parser.rs @@ -252,7 +252,9 @@ fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) { continuable = true; p.eat(); // Parse a function call for an identifier or field access. - if min_prec < 3 && p.directly_at(SyntaxKind::Text) && p.current_text() == "(" + if min_prec < 3 + && p.directly_at(SyntaxKind::MathText) + && p.current_text() == "(" { math_args(p); p.wrap(m, SyntaxKind::FuncCall); @@ -264,10 +266,10 @@ fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) { | SyntaxKind::Comma | SyntaxKind::Semicolon | SyntaxKind::RightParen => { - p.convert_and_eat(SyntaxKind::Text); + p.convert_and_eat(SyntaxKind::MathText); } - SyntaxKind::Text | SyntaxKind::MathShorthand => { + SyntaxKind::Text | SyntaxKind::MathText | SyntaxKind::MathShorthand => { continuable = matches!( math_class(p.current_text()), None | Some(MathClass::Alphabetic) @@ -316,7 +318,7 @@ fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) { let mut primed = false; while !p.end() && !p.at(stop) { - if p.directly_at(SyntaxKind::Text) && p.current_text() == "!" { + if p.directly_at(SyntaxKind::MathText) && p.current_text() == "!" { p.eat(); p.wrap(m, SyntaxKind::Math); continue; @@ -414,7 +416,7 @@ fn math_delimited(p: &mut Parser) { // We could be at the shorthand `|]`, which shouldn't be converted // to a `Text` kind. if p.at(SyntaxKind::RightParen) { - p.convert_and_eat(SyntaxKind::Text); + p.convert_and_eat(SyntaxKind::MathText); } else { p.eat(); } @@ -535,7 +537,7 @@ fn math_arg<'s>(p: &mut Parser<'s>, seen: &mut HashSet<&'s str>) -> bool { } let mut positional = true; - if p.at_set(syntax_set!(Text, MathIdent, Underscore)) { + if p.at_set(syntax_set!(MathText, MathIdent, Underscore)) { // Parses a named argument: `thickness: #12pt`. if let Some(named) = p.lexer.maybe_math_named_arg(start) { p.token.node = named; diff --git a/crates/typst-syntax/src/set.rs b/crates/typst-syntax/src/set.rs index 9eb457b8..a7b9a594 100644 --- a/crates/typst-syntax/src/set.rs +++ b/crates/typst-syntax/src/set.rs @@ -64,6 +64,7 @@ pub const MATH_EXPR: SyntaxSet = syntax_set!( Semicolon, RightParen, Text, + MathText, MathShorthand, Linebreak, MathAlignPoint, -- cgit v1.2.3