diff options
| author | Laurenz <laurmaedje@gmail.com> | 2023-01-22 13:27:49 +0100 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2023-01-22 13:27:49 +0100 |
| commit | ea378e89b4f2267bb85ec56c905111a6c73d4721 (patch) | |
| tree | 41d72ea4e55ee86de0cc0a50b761c8d6c75c44a6 /src | |
| parent | a50cb588236a9258271d68b22b2c07fe71d19553 (diff) | |
Better math atoms
Diffstat (limited to 'src')
| -rw-r--r-- | src/syntax/lexer.rs | 29 |
1 files changed, 20 insertions, 9 deletions
diff --git a/src/syntax/lexer.rs b/src/syntax/lexer.rs index e3c29150..433d0def 100644 --- a/src/syntax/lexer.rs +++ b/src/syntax/lexer.rs @@ -1,3 +1,4 @@ +use unicode_segmentation::UnicodeSegmentation; use unicode_xid::UnicodeXID; use unscanny::Scanner; @@ -103,7 +104,7 @@ impl Lexer<'_> { Some(c) => match self.mode { LexMode::Markup => self.markup(start, c), - LexMode::Math => self.math(c), + LexMode::Math => self.math(start, c), LexMode::Code => self.code(start, c), }, @@ -404,7 +405,7 @@ impl Lexer<'_> { /// Math. impl Lexer<'_> { - fn math(&mut self, c: char) -> SyntaxKind { + fn math(&mut self, start: usize, c: char) -> SyntaxKind { match c { '\\' => self.backslash(), ':' if self.s.at(is_id_start) => self.maybe_symbol(), @@ -441,13 +442,7 @@ impl Lexer<'_> { } // Other math atoms. - _ => { - // Keep numbers together. - if c.is_numeric() { - self.s.eat_while(char::is_numeric); - } - SyntaxKind::Atom - } + _ => self.atom(start, c), } } @@ -469,6 +464,22 @@ impl Lexer<'_> { SyntaxKind::Ident } + + fn atom(&mut self, start: usize, c: char) -> SyntaxKind { + // Keep numbers and grapheme clusters together. + if c.is_numeric() { + self.s.eat_while(char::is_numeric); + } else { + let len = self + .s + .get(start..self.s.string().len()) + .graphemes(true) + .next() + .map_or(0, str::len); + self.s.jump(start + len); + } + SyntaxKind::Atom + } } /// Code. |
