summaryrefslogtreecommitdiff
path: root/crates/typst-syntax/src/parser.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/typst-syntax/src/parser.rs')
-rw-r--r--crates/typst-syntax/src/parser.rs193
1 files changed, 124 insertions, 69 deletions
diff --git a/crates/typst-syntax/src/parser.rs b/crates/typst-syntax/src/parser.rs
index b77c5068..a1bd5ad0 100644
--- a/crates/typst-syntax/src/parser.rs
+++ b/crates/typst-syntax/src/parser.rs
@@ -4,16 +4,17 @@ use std::ops::Range;
use ecow::{eco_format, EcoString};
use unicode_math_class::MathClass;
-use crate::{ast, is_ident, is_newline, LexMode, Lexer, SyntaxKind, SyntaxNode};
+use crate::set::SyntaxSet;
+use crate::{ast, is_ident, is_newline, set, LexMode, Lexer, SyntaxKind, SyntaxNode};
-/// Parse a source file.
+/// Parses a source file.
pub fn parse(text: &str) -> SyntaxNode {
let mut p = Parser::new(text, 0, LexMode::Markup);
markup(&mut p, true, 0, |_| false);
p.finish().into_iter().next().unwrap()
}
-/// Parse top-level code.
+/// Parses top-level code.
pub fn parse_code(text: &str) -> SyntaxNode {
let mut p = Parser::new(text, 0, LexMode::Code);
let m = p.marker();
@@ -23,13 +24,14 @@ pub fn parse_code(text: &str) -> SyntaxNode {
p.finish().into_iter().next().unwrap()
}
-/// Parse top-level math.
+/// Parses top-level math.
pub fn parse_math(text: &str) -> SyntaxNode {
let mut p = Parser::new(text, 0, LexMode::Math);
math(&mut p, |_| false);
p.finish().into_iter().next().unwrap()
}
+/// Parses the contents of a file or content block.
fn markup(
p: &mut Parser,
mut at_start: bool,
@@ -55,15 +57,16 @@ fn markup(
continue;
}
- let prev = p.prev_end();
- markup_expr(p, &mut at_start);
- if !p.progress(prev) {
+ if p.at_set(set::MARKUP_EXPR) {
+ markup_expr(p, &mut at_start);
+ } else {
p.unexpected();
}
}
p.wrap(m, SyntaxKind::Markup);
}
+/// Reparses a subsection of markup incrementally.
pub(super) fn reparse_markup(
text: &str,
range: Range<usize>,
@@ -86,15 +89,17 @@ pub(super) fn reparse_markup(
continue;
}
- let prev = p.prev_end();
- markup_expr(&mut p, at_start);
- if !p.progress(prev) {
+ if p.at_set(set::MARKUP_EXPR) {
+ markup_expr(&mut p, at_start);
+ } else {
p.unexpected();
}
}
(p.balanced && p.current_start() == range.end).then(|| p.finish())
}
+/// Parses a single markup expression: This includes markup elements like
+/// spaces, text, and headings, and embedded code expressions.
fn markup_expr(p: &mut Parser, at_start: &mut bool) {
match p.current() {
SyntaxKind::Space
@@ -138,42 +143,52 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
*at_start = false;
}
+/// Parses strong content: `*Strong*`.
fn strong(p: &mut Parser) {
+ const END: SyntaxSet = SyntaxSet::new(&[
+ SyntaxKind::Star,
+ SyntaxKind::Parbreak,
+ SyntaxKind::RightBracket,
+ ]);
+
let m = p.marker();
p.assert(SyntaxKind::Star);
- markup(p, false, 0, |p| {
- p.at(SyntaxKind::Star)
- || p.at(SyntaxKind::Parbreak)
- || p.at(SyntaxKind::RightBracket)
- });
+ markup(p, false, 0, |p| p.at_set(END));
p.expect_closing_delimiter(m, SyntaxKind::Star);
p.wrap(m, SyntaxKind::Strong);
}
+/// Parses emphasized content: `_Emphasized_`.
fn emph(p: &mut Parser) {
+ const END: SyntaxSet = SyntaxSet::new(&[
+ SyntaxKind::Underscore,
+ SyntaxKind::Parbreak,
+ SyntaxKind::RightBracket,
+ ]);
+
let m = p.marker();
p.assert(SyntaxKind::Underscore);
- markup(p, false, 0, |p| {
- p.at(SyntaxKind::Underscore)
- || p.at(SyntaxKind::Parbreak)
- || p.at(SyntaxKind::RightBracket)
- });
+ markup(p, false, 0, |p| p.at_set(END));
p.expect_closing_delimiter(m, SyntaxKind::Underscore);
p.wrap(m, SyntaxKind::Emph);
}
+/// Parses a section heading: `= Introduction`.
fn heading(p: &mut Parser) {
+ const END: SyntaxSet =
+ SyntaxSet::new(&[SyntaxKind::Label, SyntaxKind::RightBracket, SyntaxKind::Space]);
+
let m = p.marker();
p.assert(SyntaxKind::HeadingMarker);
whitespace_line(p);
markup(p, false, usize::MAX, |p| {
- p.at(SyntaxKind::Label)
- || p.at(SyntaxKind::RightBracket)
- || (p.at(SyntaxKind::Space) && p.lexer.clone().next() == SyntaxKind::Label)
+ p.at_set(END)
+ && (!p.at(SyntaxKind::Space) || p.lexer.clone().next() == SyntaxKind::Label)
});
p.wrap(m, SyntaxKind::Heading);
}
+/// Parses an item in a bullet list: `- ...`.
fn list_item(p: &mut Parser) {
let m = p.marker();
let min_indent = p.column(p.current_start()) + 1;
@@ -183,6 +198,7 @@ fn list_item(p: &mut Parser) {
p.wrap(m, SyntaxKind::ListItem);
}
+/// Parses an item in an enumeration (numbered list): `+ ...` or `1. ...`.
fn enum_item(p: &mut Parser) {
let m = p.marker();
let min_indent = p.column(p.current_start()) + 1;
@@ -192,20 +208,23 @@ fn enum_item(p: &mut Parser) {
p.wrap(m, SyntaxKind::EnumItem);
}
+/// Parses an item in a term list: `/ Term: Details`.
fn term_item(p: &mut Parser) {
+ const TERM_END: SyntaxSet =
+ SyntaxSet::new(&[SyntaxKind::Colon, SyntaxKind::RightBracket]);
+
let m = p.marker();
p.assert(SyntaxKind::TermMarker);
let min_indent = p.column(p.prev_end());
whitespace_line(p);
- markup(p, false, usize::MAX, |p| {
- p.at(SyntaxKind::Colon) || p.at(SyntaxKind::RightBracket)
- });
+ markup(p, false, usize::MAX, |p| p.at_set(TERM_END));
p.expect(SyntaxKind::Colon);
whitespace_line(p);
markup(p, false, min_indent, |p| p.at(SyntaxKind::RightBracket));
p.wrap(m, SyntaxKind::TermItem);
}
+/// Parses a reference: `@target`, `@target[..]`.
fn reference(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::RefMarker);
@@ -215,12 +234,14 @@ fn reference(p: &mut Parser) {
p.wrap(m, SyntaxKind::Ref);
}
+/// Consumes whitespace that does not contain a newline.
fn whitespace_line(p: &mut Parser) {
while !p.newline() && p.current().is_trivia() {
p.eat();
}
}
+/// Parses a mathematical equation: `$x$`, `$ x^2 $`.
fn equation(p: &mut Parser) {
let m = p.marker();
p.enter(LexMode::Math);
@@ -231,22 +252,26 @@ fn equation(p: &mut Parser) {
p.wrap(m, SyntaxKind::Equation);
}
+/// Parses the contents of a mathematical equation: `x^2 + 1`.
fn math(p: &mut Parser, mut stop: impl FnMut(&Parser) -> bool) {
let m = p.marker();
while !p.eof() && !stop(p) {
- let prev = p.prev_end();
- math_expr(p);
- if !p.progress(prev) {
+ if p.at_set(set::MATH_EXPR) {
+ math_expr(p);
+ } else {
p.unexpected();
}
}
p.wrap(m, SyntaxKind::Math);
}
+/// Parses a single math expression: This includes math elements like
+/// attachment, fractions, and roots, and embedded code expressions.
fn math_expr(p: &mut Parser) {
math_expr_prec(p, 0, SyntaxKind::Eof)
}
+/// Parses a math expression with at least the given precedence.
fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) {
let m = p.marker();
let mut continuable = false;
@@ -415,9 +440,9 @@ fn math_delimited(p: &mut Parser) {
return;
}
- let prev = p.prev_end();
- math_expr(p);
- if !p.progress(prev) {
+ if p.at_set(set::MATH_EXPR) {
+ math_expr(p);
+ } else {
p.unexpected();
}
}
@@ -520,9 +545,9 @@ fn math_args(p: &mut Parser) {
_ => {}
}
- let prev = p.prev_end();
- math_expr(p);
- if !p.progress(prev) {
+ if p.at_set(set::MATH_EXPR) {
+ math_expr(p);
+ } else {
p.unexpected();
}
@@ -561,27 +586,34 @@ fn maybe_wrap_in_math(p: &mut Parser, arg: Marker, named: Option<Marker>) {
}
}
+/// Parses the contents of a code block.
fn code(p: &mut Parser, stop: impl FnMut(&Parser) -> bool) {
let m = p.marker();
code_exprs(p, stop);
p.wrap(m, SyntaxKind::Code);
}
+/// Parses a sequence of code expressions.
fn code_exprs(p: &mut Parser, mut stop: impl FnMut(&Parser) -> bool) {
while !p.eof() && !stop(p) {
p.enter_newline_mode(NewlineMode::Contextual);
- let prev = p.prev_end();
- code_expr(p);
- if p.progress(prev) && !p.eof() && !stop(p) && !p.eat_if(SyntaxKind::Semicolon) {
- p.expected("semicolon or line break");
+
+ let at_expr = p.at_set(set::CODE_EXPR);
+ if at_expr {
+ code_expr(p);
+ if !p.eof() && !stop(p) && !p.eat_if(SyntaxKind::Semicolon) {
+ p.expected("semicolon or line break");
+ }
}
+
p.exit_newline_mode();
- if !p.progress(prev) && !p.eof() {
+ if !at_expr && !p.eof() {
p.unexpected();
}
}
}
+/// Parses a single code expression.
fn code_expr(p: &mut Parser) {
code_expr_prec(p, false, 0, false)
}
@@ -590,27 +622,19 @@ fn code_expr_or_pattern(p: &mut Parser) {
code_expr_prec(p, false, 0, true)
}
+/// Parses a code expression embedded in markup or math.
fn embedded_code_expr(p: &mut Parser) {
p.enter_newline_mode(NewlineMode::Stop);
p.enter(LexMode::Code);
p.assert(SyntaxKind::Hash);
p.unskip();
- let stmt = matches!(
- p.current(),
- SyntaxKind::Let
- | SyntaxKind::Set
- | SyntaxKind::Show
- | SyntaxKind::Import
- | SyntaxKind::Include
- | SyntaxKind::Return
- );
-
- let prev = p.prev_end();
+ let stmt = p.at_set(set::STMT);
+ let at = p.at_set(set::ATOMIC_CODE_EXPR);
code_expr_prec(p, true, 0, false);
// Consume error for things like `#12p` or `#"abc\"`.#
- if !p.progress(prev) && !p.current().is_trivia() && !p.eof() {
+ if !at && !p.current().is_trivia() && !p.eof() {
p.unexpected();
}
@@ -625,6 +649,7 @@ fn embedded_code_expr(p: &mut Parser) {
p.exit_newline_mode();
}
+/// Parses a code expression with at least the given precedence.
fn code_expr_prec(
p: &mut Parser,
atomic: bool,
@@ -632,7 +657,8 @@ fn code_expr_prec(
allow_destructuring: bool,
) {
let m = p.marker();
- if let (false, Some(op)) = (atomic, ast::UnOp::from_kind(p.current())) {
+ if !atomic && p.at_set(set::UNARY_OP) {
+ let op = ast::UnOp::from_kind(p.current()).unwrap();
p.eat();
code_expr_prec(p, atomic, op.precedence(), false);
p.wrap(m, SyntaxKind::Unary);
@@ -661,17 +687,19 @@ fn code_expr_prec(
continue;
}
- let binop =
- if ast::BinOp::NotIn.precedence() >= min_prec && p.eat_if(SyntaxKind::Not) {
- if p.at(SyntaxKind::In) {
- Some(ast::BinOp::NotIn)
- } else {
- p.expected("keyword `in`");
- break;
- }
+ let binop = if p.at_set(set::BINARY_OP) {
+ ast::BinOp::from_kind(p.current())
+ } else if min_prec <= ast::BinOp::NotIn.precedence() && p.eat_if(SyntaxKind::Not)
+ {
+ if p.at(SyntaxKind::In) {
+ Some(ast::BinOp::NotIn)
} else {
- ast::BinOp::from_kind(p.current())
- };
+ p.expected("keyword `in`");
+ break;
+ }
+ } else {
+ None
+ };
if let Some(op) = binop {
let mut prec = op.precedence();
@@ -694,6 +722,9 @@ fn code_expr_prec(
}
}
+/// Parses an primary in a code expression. These are the atoms that unary and
+/// binary operations, functions calls, and field accesses start with / are
+/// composed of.
fn code_primary(p: &mut Parser, atomic: bool, allow_destructuring: bool) {
let m = p.marker();
match p.current() {
@@ -748,6 +779,7 @@ fn code_primary(p: &mut Parser, atomic: bool, allow_destructuring: bool) {
}
}
+/// Parses a content or code block.
fn block(p: &mut Parser) {
match p.current() {
SyntaxKind::LeftBracket => content_block(p),
@@ -756,6 +788,7 @@ fn block(p: &mut Parser) {
}
}
+/// Reparses a full content or code block.
pub(super) fn reparse_block(text: &str, range: Range<usize>) -> Option<SyntaxNode> {
let mut p = Parser::new(text, range.start, LexMode::Code);
assert!(p.at(SyntaxKind::LeftBracket) || p.at(SyntaxKind::LeftBrace));
@@ -764,22 +797,26 @@ pub(super) fn reparse_block(text: &str, range: Range<usize>) -> Option<SyntaxNod
.then(|| p.finish().into_iter().next().unwrap())
}
+/// Parses a code block: `{ let x = 1; x + 2 }`.
fn code_block(p: &mut Parser) {
+ const END: SyntaxSet = SyntaxSet::new(&[
+ SyntaxKind::RightBrace,
+ SyntaxKind::RightBracket,
+ SyntaxKind::RightParen,
+ ]);
+
let m = p.marker();
p.enter(LexMode::Code);
p.enter_newline_mode(NewlineMode::Continue);
p.assert(SyntaxKind::LeftBrace);
- code(p, |p| {
- p.at(SyntaxKind::RightBrace)
- || p.at(SyntaxKind::RightBracket)
- || p.at(SyntaxKind::RightParen)
- });
+ code(p, |p| p.at_set(END));
p.expect_closing_delimiter(m, SyntaxKind::RightBrace);
p.exit();
p.exit_newline_mode();
p.wrap(m, SyntaxKind::CodeBlock);
}
+/// Parses a content block: `[*Hi* there!]`.
fn content_block(p: &mut Parser) {
let m = p.marker();
p.enter(LexMode::Markup);
@@ -964,6 +1001,7 @@ fn item(p: &mut Parser, keyed: bool) -> SyntaxKind {
kind
}
+/// Parses a function call's argument list: `(12pt, y)`.
fn args(p: &mut Parser) {
if !p.at(SyntaxKind::LeftParen) && !p.at(SyntaxKind::LeftBracket) {
p.expected("argument list");
@@ -987,6 +1025,7 @@ enum PatternKind {
Other,
}
+/// Parses a pattern that can be assigned to.
fn pattern(p: &mut Parser) -> PatternKind {
let m = p.marker();
if p.at(SyntaxKind::LeftParen) {
@@ -1005,6 +1044,7 @@ fn pattern(p: &mut Parser) -> PatternKind {
}
}
+/// Parses a let binding: `let x = 1`.
fn let_binding(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::Let);
@@ -1037,6 +1077,7 @@ fn let_binding(p: &mut Parser) {
p.wrap(m, SyntaxKind::LetBinding);
}
+/// Parses a set rule: `set text(...)`.
fn set_rule(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::Set);
@@ -1055,6 +1096,7 @@ fn set_rule(p: &mut Parser) {
p.wrap(m, SyntaxKind::SetRule);
}
+/// Parses a show rule: `show heading: it => emph(it.body)`.
fn show_rule(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::Show);
@@ -1073,6 +1115,7 @@ fn show_rule(p: &mut Parser) {
p.wrap(m, SyntaxKind::ShowRule);
}
+/// Parses an if-else conditional: `if x { y } else { z }`.
fn conditional(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::If);
@@ -1088,6 +1131,7 @@ fn conditional(p: &mut Parser) {
p.wrap(m, SyntaxKind::Conditional);
}
+/// Parses a while loop: `while x { y }`.
fn while_loop(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::While);
@@ -1096,6 +1140,7 @@ fn while_loop(p: &mut Parser) {
p.wrap(m, SyntaxKind::WhileLoop);
}
+/// Parses a for loop: `for x in y { z }`.
fn for_loop(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::For);
@@ -1115,6 +1160,7 @@ fn for_loop(p: &mut Parser) {
p.wrap(m, SyntaxKind::ForLoop);
}
+/// Parses a module import: `import "utils.typ": a, b, c`.
fn module_import(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::Import);
@@ -1131,6 +1177,7 @@ fn module_import(p: &mut Parser) {
p.wrap(m, SyntaxKind::ModuleImport);
}
+/// Parses items to import from a module: `a, b, c`.
fn import_items(p: &mut Parser) {
let m = p.marker();
while !p.eof() && !p.at(SyntaxKind::Semicolon) {
@@ -1153,6 +1200,7 @@ fn import_items(p: &mut Parser) {
p.wrap(m, SyntaxKind::ImportItems);
}
+/// Parses a module include: `include "chapter1.typ"`.
fn module_include(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::Include);
@@ -1160,22 +1208,25 @@ fn module_include(p: &mut Parser) {
p.wrap(m, SyntaxKind::ModuleInclude);
}
+/// Parses a break from a loop: `break`.
fn break_stmt(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::Break);
p.wrap(m, SyntaxKind::LoopBreak);
}
+/// Parses a continue in a loop: `continue`.
fn continue_stmt(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::Continue);
p.wrap(m, SyntaxKind::LoopContinue);
}
+/// Parses a return from a function: `return`, `return x + 1`.
fn return_stmt(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::Return);
- if !p.current().is_terminator() && !p.at(SyntaxKind::Comma) {
+ if p.at_set(set::CODE_EXPR) {
code_expr(p);
}
p.wrap(m, SyntaxKind::FuncReturn);
@@ -1498,6 +1549,10 @@ impl<'s> Parser<'s> {
self.current == kind
}
+ fn at_set(&self, set: SyntaxSet) -> bool {
+ set.contains(self.current)
+ }
+
#[track_caller]
fn assert(&mut self, kind: SyntaxKind) {
assert_eq!(self.current, kind);