summaryrefslogtreecommitdiff
path: root/crates/typst-syntax
diff options
context:
space:
mode:
Diffstat (limited to 'crates/typst-syntax')
-rw-r--r--crates/typst-syntax/src/lexer.rs37
-rw-r--r--crates/typst-syntax/src/lib.rs13
-rw-r--r--crates/typst-syntax/src/parser.rs43
3 files changed, 47 insertions, 46 deletions
diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs
index 7d363d7b..74f14cfe 100644
--- a/crates/typst-syntax/src/lexer.rs
+++ b/crates/typst-syntax/src/lexer.rs
@@ -4,7 +4,7 @@ use unicode_script::{Script, UnicodeScript};
use unicode_segmentation::UnicodeSegmentation;
use unscanny::Scanner;
-use crate::{SyntaxError, SyntaxKind, SyntaxNode};
+use crate::{SyntaxError, SyntaxKind, SyntaxMode, SyntaxNode};
/// An iterator over a source code string which returns tokens.
#[derive(Clone)]
@@ -13,28 +13,17 @@ pub(super) struct Lexer<'s> {
s: Scanner<'s>,
/// The mode the lexer is in. This determines which kinds of tokens it
/// produces.
- mode: LexMode,
+ mode: SyntaxMode,
/// Whether the last token contained a newline.
newline: bool,
/// An error for the last token.
error: Option<SyntaxError>,
}
-/// What kind of tokens to emit.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub(super) enum LexMode {
- /// Text and markup.
- Markup,
- /// Math atoms, operators, etc.
- Math,
- /// Keywords, literals and operators.
- Code,
-}
-
impl<'s> Lexer<'s> {
/// Create a new lexer with the given mode and a prefix to offset column
/// calculations.
- pub fn new(text: &'s str, mode: LexMode) -> Self {
+ pub fn new(text: &'s str, mode: SyntaxMode) -> Self {
Self {
s: Scanner::new(text),
mode,
@@ -44,12 +33,12 @@ impl<'s> Lexer<'s> {
}
/// Get the current lexing mode.
- pub fn mode(&self) -> LexMode {
+ pub fn mode(&self) -> SyntaxMode {
self.mode
}
/// Change the lexing mode.
- pub fn set_mode(&mut self, mode: LexMode) {
+ pub fn set_mode(&mut self, mode: SyntaxMode) {
self.mode = mode;
}
@@ -92,7 +81,7 @@ impl Lexer<'_> {
}
}
-/// Shared methods with all [`LexMode`].
+/// Shared methods with all [`SyntaxMode`].
impl Lexer<'_> {
/// Return the next token in our text. Returns both the [`SyntaxNode`]
/// and the raw [`SyntaxKind`] to make it more ergonomic to check the kind
@@ -114,14 +103,14 @@ impl Lexer<'_> {
);
kind
}
- Some('`') if self.mode != LexMode::Math => return self.raw(),
+ Some('`') if self.mode != SyntaxMode::Math => return self.raw(),
Some(c) => match self.mode {
- LexMode::Markup => self.markup(start, c),
- LexMode::Math => match self.math(start, c) {
+ SyntaxMode::Markup => self.markup(start, c),
+ SyntaxMode::Math => match self.math(start, c) {
(kind, None) => kind,
(kind, Some(node)) => return (kind, node),
},
- LexMode::Code => self.code(start, c),
+ SyntaxMode::Code => self.code(start, c),
},
None => SyntaxKind::End,
@@ -145,7 +134,7 @@ impl Lexer<'_> {
};
self.newline = newlines > 0;
- if self.mode == LexMode::Markup && newlines >= 2 {
+ if self.mode == SyntaxMode::Markup && newlines >= 2 {
SyntaxKind::Parbreak
} else {
SyntaxKind::Space
@@ -965,9 +954,9 @@ impl ScannerExt for Scanner<'_> {
/// Whether a character will become a [`SyntaxKind::Space`] token.
#[inline]
-fn is_space(character: char, mode: LexMode) -> bool {
+fn is_space(character: char, mode: SyntaxMode) -> bool {
match mode {
- LexMode::Markup => matches!(character, ' ' | '\t') || is_newline(character),
+ SyntaxMode::Markup => matches!(character, ' ' | '\t') || is_newline(character),
_ => character.is_whitespace(),
}
}
diff --git a/crates/typst-syntax/src/lib.rs b/crates/typst-syntax/src/lib.rs
index 1249f88e..4741506c 100644
--- a/crates/typst-syntax/src/lib.rs
+++ b/crates/typst-syntax/src/lib.rs
@@ -30,5 +30,16 @@ pub use self::path::VirtualPath;
pub use self::source::Source;
pub use self::span::{Span, Spanned};
-use self::lexer::{LexMode, Lexer};
+use self::lexer::Lexer;
use self::parser::{reparse_block, reparse_markup};
+
+/// The syntax mode of a portion of Typst code.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub enum SyntaxMode {
+ /// Text and markup, as in the top level.
+ Markup,
+ /// Math atoms, operators, etc., as in equations.
+ Math,
+ /// Keywords, literals and operators, as after hashes.
+ Code,
+}
diff --git a/crates/typst-syntax/src/parser.rs b/crates/typst-syntax/src/parser.rs
index a6881580..b452c2c0 100644
--- a/crates/typst-syntax/src/parser.rs
+++ b/crates/typst-syntax/src/parser.rs
@@ -7,12 +7,12 @@ use typst_utils::default_math_class;
use unicode_math_class::MathClass;
use crate::set::{syntax_set, SyntaxSet};
-use crate::{ast, set, LexMode, Lexer, SyntaxError, SyntaxKind, SyntaxNode};
+use crate::{ast, set, Lexer, SyntaxError, SyntaxKind, SyntaxMode, SyntaxNode};
/// Parses a source file as top-level markup.
pub fn parse(text: &str) -> SyntaxNode {
let _scope = typst_timing::TimingScope::new("parse");
- let mut p = Parser::new(text, 0, LexMode::Markup);
+ let mut p = Parser::new(text, 0, SyntaxMode::Markup);
markup_exprs(&mut p, true, syntax_set!(End));
p.finish_into(SyntaxKind::Markup)
}
@@ -20,7 +20,7 @@ pub fn parse(text: &str) -> SyntaxNode {
/// Parses top-level code.
pub fn parse_code(text: &str) -> SyntaxNode {
let _scope = typst_timing::TimingScope::new("parse code");
- let mut p = Parser::new(text, 0, LexMode::Code);
+ let mut p = Parser::new(text, 0, SyntaxMode::Code);
code_exprs(&mut p, syntax_set!(End));
p.finish_into(SyntaxKind::Code)
}
@@ -28,7 +28,7 @@ pub fn parse_code(text: &str) -> SyntaxNode {
/// Parses top-level math.
pub fn parse_math(text: &str) -> SyntaxNode {
let _scope = typst_timing::TimingScope::new("parse math");
- let mut p = Parser::new(text, 0, LexMode::Math);
+ let mut p = Parser::new(text, 0, SyntaxMode::Math);
math_exprs(&mut p, syntax_set!(End));
p.finish_into(SyntaxKind::Math)
}
@@ -63,7 +63,7 @@ pub(super) fn reparse_markup(
nesting: &mut usize,
top_level: bool,
) -> Option<Vec<SyntaxNode>> {
- let mut p = Parser::new(text, range.start, LexMode::Markup);
+ let mut p = Parser::new(text, range.start, SyntaxMode::Markup);
*at_start |= p.had_newline();
while !p.end() && p.current_start() < range.end {
// If not top-level and at a new RightBracket, stop the reparse.
@@ -205,7 +205,7 @@ fn reference(p: &mut Parser) {
/// Parses a mathematical equation: `$x$`, `$ x^2 $`.
fn equation(p: &mut Parser) {
let m = p.marker();
- p.enter_modes(LexMode::Math, AtNewline::Continue, |p| {
+ p.enter_modes(SyntaxMode::Math, AtNewline::Continue, |p| {
p.assert(SyntaxKind::Dollar);
math(p, syntax_set!(Dollar, End));
p.expect_closing_delimiter(m, SyntaxKind::Dollar);
@@ -615,7 +615,7 @@ fn code_exprs(p: &mut Parser, stop_set: SyntaxSet) {
/// Parses an atomic code expression embedded in markup or math.
fn embedded_code_expr(p: &mut Parser) {
- p.enter_modes(LexMode::Code, AtNewline::Stop, |p| {
+ p.enter_modes(SyntaxMode::Code, AtNewline::Stop, |p| {
p.assert(SyntaxKind::Hash);
if p.had_trivia() || p.end() {
p.expected("expression");
@@ -777,7 +777,7 @@ fn code_primary(p: &mut Parser, atomic: bool) {
/// Reparses a full content or code block.
pub(super) fn reparse_block(text: &str, range: Range<usize>) -> Option<SyntaxNode> {
- let mut p = Parser::new(text, range.start, LexMode::Code);
+ let mut p = Parser::new(text, range.start, SyntaxMode::Code);
assert!(p.at(SyntaxKind::LeftBracket) || p.at(SyntaxKind::LeftBrace));
block(&mut p);
(p.balanced && p.prev_end() == range.end)
@@ -796,7 +796,7 @@ fn block(p: &mut Parser) {
/// Parses a code block: `{ let x = 1; x + 2 }`.
fn code_block(p: &mut Parser) {
let m = p.marker();
- p.enter_modes(LexMode::Code, AtNewline::Continue, |p| {
+ p.enter_modes(SyntaxMode::Code, AtNewline::Continue, |p| {
p.assert(SyntaxKind::LeftBrace);
code(p, syntax_set!(RightBrace, RightBracket, RightParen, End));
p.expect_closing_delimiter(m, SyntaxKind::RightBrace);
@@ -807,7 +807,7 @@ fn code_block(p: &mut Parser) {
/// Parses a content block: `[*Hi* there!]`.
fn content_block(p: &mut Parser) {
let m = p.marker();
- p.enter_modes(LexMode::Markup, AtNewline::Continue, |p| {
+ p.enter_modes(SyntaxMode::Markup, AtNewline::Continue, |p| {
p.assert(SyntaxKind::LeftBracket);
markup(p, true, true, syntax_set!(RightBracket, End));
p.expect_closing_delimiter(m, SyntaxKind::RightBracket);
@@ -1516,10 +1516,10 @@ fn pattern_leaf<'s>(
/// ### Modes
///
/// The parser manages the transitions between the three modes of Typst through
-/// [lexer modes](`LexMode`) and [newline modes](`AtNewline`).
+/// [syntax modes](`SyntaxMode`) and [newline modes](`AtNewline`).
///
-/// The lexer modes map to the three Typst modes and are stored in the lexer,
-/// changing which`SyntaxKind`s it will generate.
+/// The syntax modes map to the three Typst modes and are stored in the lexer,
+/// changing which `SyntaxKind`s it will generate.
///
/// The newline mode is used to determine whether a newline should end the
/// current expression. If so, the parser temporarily changes `token`'s kind to
@@ -1529,7 +1529,7 @@ struct Parser<'s> {
/// The source text shared with the lexer.
text: &'s str,
/// A lexer over the source text with multiple modes. Defines the boundaries
- /// of tokens and determines their [`SyntaxKind`]. Contains the [`LexMode`]
+ /// of tokens and determines their [`SyntaxKind`]. Contains the [`SyntaxMode`]
/// defining our current Typst mode.
lexer: Lexer<'s>,
/// The newline mode: whether to insert a temporary end at newlines.
@@ -1612,7 +1612,7 @@ impl AtNewline {
AtNewline::RequireColumn(min_col) => {
// When the column is `None`, the newline doesn't start a
// column, and we continue parsing. This may happen on the
- // boundary of lexer modes, since we only report a column in
+ // boundary of syntax modes, since we only report a column in
// Markup.
column.is_some_and(|column| column <= min_col)
}
@@ -1643,8 +1643,8 @@ impl IndexMut<Marker> for Parser<'_> {
/// Creating/Consuming the parser and getting info about the current token.
impl<'s> Parser<'s> {
- /// Create a new parser starting from the given text offset and lexer mode.
- fn new(text: &'s str, offset: usize, mode: LexMode) -> Self {
+ /// Create a new parser starting from the given text offset and syntax mode.
+ fn new(text: &'s str, offset: usize, mode: SyntaxMode) -> Self {
let mut lexer = Lexer::new(text, mode);
lexer.jump(offset);
let nl_mode = AtNewline::Continue;
@@ -1825,13 +1825,13 @@ impl<'s> Parser<'s> {
self.nodes.insert(from, SyntaxNode::inner(kind, children));
}
- /// Parse within the [`LexMode`] for subsequent tokens (does not change the
+ /// Parse within the [`SyntaxMode`] for subsequent tokens (does not change the
/// current token). This may re-lex the final token on exit.
///
/// This function effectively repurposes the call stack as a stack of modes.
fn enter_modes(
&mut self,
- mode: LexMode,
+ mode: SyntaxMode,
stop: AtNewline,
func: impl FnOnce(&mut Parser<'s>),
) {
@@ -1891,7 +1891,8 @@ impl<'s> Parser<'s> {
}
let newline = if had_newline {
- let column = (lexer.mode() == LexMode::Markup).then(|| lexer.column(start));
+ let column =
+ (lexer.mode() == SyntaxMode::Markup).then(|| lexer.column(start));
let newline = Newline { column, parbreak };
if nl_mode.stop_at(newline, kind) {
// Insert a temporary `SyntaxKind::End` to halt the parser.
@@ -1938,7 +1939,7 @@ struct Checkpoint {
#[derive(Clone)]
struct PartialState {
cursor: usize,
- lex_mode: LexMode,
+ lex_mode: SyntaxMode,
token: Token,
}