summaryrefslogtreecommitdiff
path: root/src/syntax/parser.rs
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2023-01-20 14:05:17 +0100
committerLaurenz <laurmaedje@gmail.com>2023-01-20 14:05:35 +0100
commitdd331f007cb9c9968605f8d3eaef8fb498c21322 (patch)
treef1b1490758ec53fd204724a325158d16c980d131 /src/syntax/parser.rs
parent40561e57fbbc68becac07acd54a34f94f591f277 (diff)
Rewrite parser
Diffstat (limited to 'src/syntax/parser.rs')
-rw-r--r--src/syntax/parser.rs2184
1 files changed, 830 insertions, 1354 deletions
diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs
index 1584e59b..0e1b52b1 100644
--- a/src/syntax/parser.rs
+++ b/src/syntax/parser.rs
@@ -1,262 +1,93 @@
use std::collections::HashSet;
-use std::fmt::{self, Display, Formatter};
-use std::mem;
+use std::ops::Range;
-use super::ast::{self, Assoc, BinOp, UnOp};
-use super::{ErrorPos, LexMode, Lexer, SyntaxKind, SyntaxNode};
+use super::{ast, is_newline, ErrorPos, LexMode, Lexer, SyntaxKind, SyntaxNode};
use crate::util::{format_eco, EcoString};
/// Parse a source file.
pub fn parse(text: &str) -> SyntaxNode {
- let mut p = Parser::new(text, LexMode::Markup);
- markup(&mut p, true);
+ let mut p = Parser::new(text, 0, LexMode::Markup);
+ markup(&mut p, true, 0, |_| false);
p.finish().into_iter().next().unwrap()
}
-/// Parse code directly, only used for syntax highlighting.
+/// Parse code directly.
+///
+/// This is only used for syntax highlighting.
pub fn parse_code(text: &str) -> SyntaxNode {
- let mut p = Parser::new(text, LexMode::Code);
- p.perform(SyntaxKind::CodeBlock, code);
+ let mut p = Parser::new(text, 0, LexMode::Code);
+ let m = p.marker();
+ code(&mut p, |_| false);
+ p.wrap(m, SyntaxKind::CodeBlock);
p.finish().into_iter().next().unwrap()
}
-/// Reparse a code block.
-///
-/// Returns `Some` if all of the input was consumed.
-pub(super) fn reparse_code_block(
- prefix: &str,
- text: &str,
- end_pos: usize,
-) -> Option<(Vec<SyntaxNode>, bool, usize)> {
- let mut p = Parser::with_prefix(prefix, text, LexMode::Code);
- if !p.at(SyntaxKind::LeftBrace) {
- return None;
- }
-
- code_block(&mut p);
-
- let (mut node, terminated) = p.consume()?;
- let first = node.remove(0);
- if first.len() != end_pos {
- return None;
- }
-
- Some((vec![first], terminated, 1))
-}
-
-/// Reparse a content block.
-///
-/// Returns `Some` if all of the input was consumed.
-pub(super) fn reparse_content_block(
- prefix: &str,
- text: &str,
- end_pos: usize,
-) -> Option<(Vec<SyntaxNode>, bool, usize)> {
- let mut p = Parser::with_prefix(prefix, text, LexMode::Code);
- if !p.at(SyntaxKind::LeftBracket) {
- return None;
- }
-
- content_block(&mut p);
-
- let (mut node, terminated) = p.consume()?;
- let first = node.remove(0);
- if first.len() != end_pos {
- return None;
- }
-
- Some((vec![first], terminated, 1))
-}
-
-/// Reparse a sequence markup elements without the topmost node.
-///
-/// Returns `Some` if all of the input was consumed.
-pub(super) fn reparse_markup_elements(
- prefix: &str,
- text: &str,
- end_pos: usize,
- differential: isize,
- reference: &[SyntaxNode],
+fn markup(
+ p: &mut Parser,
mut at_start: bool,
min_indent: usize,
-) -> Option<(Vec<SyntaxNode>, bool, usize)> {
- let mut p = Parser::with_prefix(prefix, text, LexMode::Markup);
-
- let mut node: Option<&SyntaxNode> = None;
- let mut iter = reference.iter();
- let mut offset = differential;
- let mut replaced = 0;
- let mut stopped = false;
-
- 'outer: while !p.eof() {
- if let Some(SyntaxKind::Space { newlines: (1..) }) = p.peek() {
- if p.column(p.current_end()) < min_indent {
- return None;
+ mut stop: impl FnMut(SyntaxKind) -> bool,
+) {
+ let m = p.marker();
+ while !p.eof() && !stop(p.current) {
+ if p.newline() {
+ at_start = true;
+ if min_indent > 0 && p.column(p.current_end()) < min_indent {
+ break;
}
- }
-
- markup_node(&mut p, &mut at_start);
-
- if p.prev_end() <= end_pos {
+ p.eat();
continue;
}
- let recent = p.marker().before(&p).unwrap();
- let recent_start = p.prev_end() - recent.len();
-
- while offset <= recent_start as isize {
- if let Some(node) = node {
- // The nodes are equal, at the same position and have the
- // same content. The parsing trees have converged again, so
- // the reparse may stop here.
- if offset == recent_start as isize && node == recent {
- replaced -= 1;
- stopped = true;
- break 'outer;
- }
- }
-
- if let Some(node) = node {
- offset += node.len() as isize;
- }
-
- node = iter.next();
- if node.is_none() {
- break;
- }
-
- replaced += 1;
+ let prev = p.prev_end();
+ markup_expr(p, &mut at_start);
+ if !p.progress(prev) {
+ p.unexpected();
}
}
-
- if p.eof() && !stopped {
- replaced = reference.len();
- }
-
- let (mut res, terminated) = p.consume()?;
- if stopped {
- res.pop().unwrap();
- }
-
- Some((res, terminated, replaced))
+ p.wrap(m, SyntaxKind::Markup);
}
-/// Parse markup.
-///
-/// If `at_start` is true, things like headings that may only appear at the
-/// beginning of a line or content block are initially allowed.
-fn markup(p: &mut Parser, mut at_start: bool) {
- p.perform(SyntaxKind::Markup { min_indent: 0 }, |p| {
- while !p.eof() {
- markup_node(p, &mut at_start);
+pub(super) fn reparse_markup(
+ text: &str,
+ range: Range<usize>,
+ at_start: &mut bool,
+ mut stop: impl FnMut(SyntaxKind) -> bool,
+) -> Option<Vec<SyntaxNode>> {
+ let mut p = Parser::new(&text, range.start, LexMode::Markup);
+ while !p.eof() && !stop(p.current) && p.current_start() < range.end {
+ if p.newline() {
+ *at_start = true;
+ p.eat();
+ continue;
}
- });
-}
-
-/// Parse markup that stays right of the given `column`.
-fn markup_indented(p: &mut Parser, min_indent: usize) {
- p.eat_while(|t| match t {
- SyntaxKind::Space { newlines } => newlines == 0,
- SyntaxKind::LineComment | SyntaxKind::BlockComment => true,
- _ => false,
- });
-
- let marker = p.marker();
- let mut at_start = false;
- while !p.eof() {
- match p.peek() {
- Some(SyntaxKind::Space { newlines: (1..) })
- if p.column(p.current_end()) < min_indent =>
- {
- break;
- }
- _ => {}
+ let prev = p.prev_end();
+ markup_expr(&mut p, at_start);
+ if !p.progress(prev) {
+ p.unexpected();
}
-
- markup_node(p, &mut at_start);
}
-
- marker.end(p, SyntaxKind::Markup { min_indent });
+ (p.balanced && p.current_start() == range.end).then(|| p.finish())
}
-/// Parse a line of markup that can prematurely end if `f` returns true.
-fn markup_line<F>(p: &mut Parser, mut f: F)
-where
- F: FnMut(SyntaxKind) -> bool,
-{
- p.eat_while(|t| match t {
- SyntaxKind::Space { newlines } => newlines == 0,
- SyntaxKind::LineComment | SyntaxKind::BlockComment => true,
- _ => false,
- });
-
- p.perform(SyntaxKind::Markup { min_indent: usize::MAX }, |p| {
- let mut at_start = false;
- while let Some(kind) = p.peek() {
- if let SyntaxKind::Space { newlines: (1..) } = kind {
- break;
- }
-
- if f(kind) {
- break;
- }
-
- markup_node(p, &mut at_start);
- }
- });
-}
-
-fn markup_node(p: &mut Parser, at_start: &mut bool) {
- let Some(token) = p.peek() else { return };
- match token {
- // Whitespace.
- SyntaxKind::Space { newlines } => {
- *at_start |= newlines > 0;
- p.eat();
- return;
- }
-
- // Comments.
- SyntaxKind::LineComment | SyntaxKind::BlockComment => {
- p.eat();
- return;
- }
-
- // Text and markup.
- SyntaxKind::Text
- | SyntaxKind::Linebreak
- | SyntaxKind::SmartQuote { .. }
- | SyntaxKind::Escape
- | SyntaxKind::Shorthand
- | SyntaxKind::Symbol
- | SyntaxKind::Link
- | SyntaxKind::Raw { .. }
- | SyntaxKind::Ref => p.eat(),
-
- // Math.
- SyntaxKind::Dollar => math(p),
-
- // Strong, emph, heading.
+fn markup_expr(p: &mut Parser, at_start: &mut bool) {
+ match p.current() {
SyntaxKind::Star => strong(p),
SyntaxKind::Underscore => emph(p),
- SyntaxKind::Eq => heading(p, *at_start),
-
- // Lists.
- SyntaxKind::Minus => list_item(p, *at_start),
- SyntaxKind::Plus | SyntaxKind::EnumNumbering => enum_item(p, *at_start),
- SyntaxKind::Slash => {
- term_item(p, *at_start).ok();
- }
- SyntaxKind::Colon => {
- let marker = p.marker();
- p.eat();
- marker.convert(p, SyntaxKind::Text);
- }
+ SyntaxKind::HeadingMarker if *at_start => heading(p),
+ SyntaxKind::ListMarker if *at_start => list_item(p),
+ SyntaxKind::EnumMarker if *at_start => enum_item(p),
+ SyntaxKind::TermMarker if *at_start => term_item(p),
+ SyntaxKind::Dollar => equation(p),
+
+ SyntaxKind::HeadingMarker
+ | SyntaxKind::ListMarker
+ | SyntaxKind::EnumMarker
+ | SyntaxKind::TermMarker
+ | SyntaxKind::Colon => p.convert(SyntaxKind::Text),
- // Hashtag + keyword / identifier.
SyntaxKind::Ident
- | SyntaxKind::Label
| SyntaxKind::Let
| SyntaxKind::Set
| SyntaxKind::Show
@@ -267,147 +98,175 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
| SyntaxKind::Include
| SyntaxKind::Break
| SyntaxKind::Continue
- | SyntaxKind::Return => embedded_expr(p),
+ | SyntaxKind::Return
+ | SyntaxKind::LeftBrace
+ | SyntaxKind::LeftBracket => embedded_code_expr(p),
- // Code and content block.
- SyntaxKind::LeftBrace => code_block(p),
- SyntaxKind::LeftBracket => content_block(p),
+ SyntaxKind::Text
+ | SyntaxKind::Linebreak
+ | SyntaxKind::Escape
+ | SyntaxKind::Shorthand
+ | SyntaxKind::Symbol
+ | SyntaxKind::SmartQuote
+ | SyntaxKind::Raw
+ | SyntaxKind::Link
+ | SyntaxKind::Label
+ | SyntaxKind::Ref => p.eat(),
- SyntaxKind::Error => p.eat(),
- _ => p.unexpected(),
- };
+ SyntaxKind::Space
+ | SyntaxKind::Parbreak
+ | SyntaxKind::LineComment
+ | SyntaxKind::BlockComment => {
+ p.eat();
+ return;
+ }
+ _ => {}
+ }
*at_start = false;
}
fn strong(p: &mut Parser) {
- p.perform(SyntaxKind::Strong, |p| {
- p.start_group(Group::Strong);
- markup(p, false);
- p.end_group();
- })
+ let m = p.marker();
+ p.expect(SyntaxKind::Star);
+ markup(p, false, 0, |kind| {
+ kind == SyntaxKind::Star
+ || kind == SyntaxKind::Parbreak
+ || kind == SyntaxKind::RightBracket
+ });
+ p.expect(SyntaxKind::Star);
+ p.wrap(m, SyntaxKind::Strong);
}
fn emph(p: &mut Parser) {
- p.perform(SyntaxKind::Emph, |p| {
- p.start_group(Group::Emph);
- markup(p, false);
- p.end_group();
- })
+ let m = p.marker();
+ p.expect(SyntaxKind::Underscore);
+ markup(p, false, 0, |kind| {
+ kind == SyntaxKind::Underscore
+ || kind == SyntaxKind::Parbreak
+ || kind == SyntaxKind::RightBracket
+ });
+ p.expect(SyntaxKind::Underscore);
+ p.wrap(m, SyntaxKind::Emph);
}
-fn heading(p: &mut Parser, at_start: bool) {
- let marker = p.marker();
- let mut markers = vec![];
- while p.at(SyntaxKind::Eq) {
- markers.push(p.marker());
- p.eat();
- }
-
- if at_start && p.peek().map_or(true, |kind| kind.is_space()) {
- p.eat_while(|kind| kind == SyntaxKind::Space { newlines: 0 });
- markup_line(p, |kind| matches!(kind, SyntaxKind::Label));
- marker.end(p, SyntaxKind::Heading);
- } else {
- for marker in markers {
- marker.convert(p, SyntaxKind::Text);
- }
- }
+fn heading(p: &mut Parser) {
+ let m = p.marker();
+ p.expect(SyntaxKind::HeadingMarker);
+ whitespace(p);
+ markup(p, false, usize::MAX, |kind| {
+ kind == SyntaxKind::Label || kind == SyntaxKind::RightBracket
+ });
+ p.wrap(m, SyntaxKind::Heading);
}
-fn list_item(p: &mut Parser, at_start: bool) {
- let marker = p.marker();
- p.assert(SyntaxKind::Minus);
-
+fn list_item(p: &mut Parser) {
+ let m = p.marker();
+ p.expect(SyntaxKind::ListMarker);
let min_indent = p.column(p.prev_end());
- if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() {
- markup_indented(p, min_indent);
- marker.end(p, SyntaxKind::ListItem);
- } else {
- marker.convert(p, SyntaxKind::Text);
- }
+ whitespace(p);
+ markup(p, false, min_indent, |kind| kind == SyntaxKind::RightBracket);
+ p.wrap(m, SyntaxKind::ListItem);
}
-fn enum_item(p: &mut Parser, at_start: bool) {
- let marker = p.marker();
- p.eat();
-
+fn enum_item(p: &mut Parser) {
+ let m = p.marker();
+ p.expect(SyntaxKind::EnumMarker);
let min_indent = p.column(p.prev_end());
- if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() {
- markup_indented(p, min_indent);
- marker.end(p, SyntaxKind::EnumItem);
- } else {
- marker.convert(p, SyntaxKind::Text);
- }
+ whitespace(p);
+ markup(p, false, min_indent, |kind| kind == SyntaxKind::RightBracket);
+ p.wrap(m, SyntaxKind::EnumItem);
}
-fn term_item(p: &mut Parser, at_start: bool) -> ParseResult {
- let marker = p.marker();
- p.eat();
-
+fn term_item(p: &mut Parser) {
+ let m = p.marker();
+ p.expect(SyntaxKind::TermMarker);
let min_indent = p.column(p.prev_end());
- if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() {
- markup_line(p, |node| matches!(node, SyntaxKind::Colon));
- p.expect(SyntaxKind::Colon)?;
- markup_indented(p, min_indent);
- marker.end(p, SyntaxKind::TermItem);
- } else {
- marker.convert(p, SyntaxKind::Text);
- }
-
- Ok(())
+ whitespace(p);
+ markup(p, false, usize::MAX, |kind| {
+ kind == SyntaxKind::Colon || kind == SyntaxKind::RightBracket
+ });
+ p.expect(SyntaxKind::Colon);
+ whitespace(p);
+ markup(p, false, min_indent, |kind| kind == SyntaxKind::RightBracket);
+ p.wrap(m, SyntaxKind::TermItem);
}
-fn embedded_expr(p: &mut Parser) {
- // Does the expression need termination or can content follow directly?
- let stmt = matches!(
- p.peek(),
- Some(
- SyntaxKind::Let
- | SyntaxKind::Set
- | SyntaxKind::Show
- | SyntaxKind::Import
- | SyntaxKind::Include
- )
- );
-
- p.start_group(Group::Expr);
- let res = expr_prec(p, true, 0);
- if stmt && res.is_ok() && !p.eof() {
- p.expected("semicolon or line break");
+fn whitespace(p: &mut Parser) {
+ while p.current().is_trivia() {
+ p.eat();
}
- p.end_group();
}
-fn math(p: &mut Parser) {
- p.perform(SyntaxKind::Math, |p| {
- p.start_group(Group::Math);
- while !p.eof() {
- math_node(p);
- }
- p.end_group();
- });
+fn equation(p: &mut Parser) {
+ let m = p.marker();
+ p.enter(LexMode::Math);
+ p.expect(SyntaxKind::Dollar);
+ math(p, |kind| kind == SyntaxKind::Dollar);
+ p.expect(SyntaxKind::Dollar);
+ p.exit();
+ p.wrap(m, SyntaxKind::Math);
}
-fn math_node(p: &mut Parser) {
- math_node_prec(p, 0, None)
+fn math(p: &mut Parser, mut stop: impl FnMut(SyntaxKind) -> bool) {
+ while !p.eof() && !stop(p.current()) {
+ let prev = p.prev_end();
+ math_expr(p);
+ if !p.progress(prev) {
+ p.unexpected();
+ }
+ }
}
-fn math_node_prec(p: &mut Parser, min_prec: usize, stop: Option<SyntaxKind>) {
- let marker = p.marker();
- math_primary(p);
+fn math_expr(p: &mut Parser) {
+ math_expr_prec(p, 0, SyntaxKind::Eof)
+}
- loop {
- let (kind, mut prec, assoc, stop) = match p.peek() {
- v if v == stop => break,
- Some(SyntaxKind::Underscore) => {
- (SyntaxKind::Script, 2, Assoc::Right, Some(SyntaxKind::Hat))
- }
- Some(SyntaxKind::Hat) => {
- (SyntaxKind::Script, 2, Assoc::Right, Some(SyntaxKind::Underscore))
+fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) {
+ let m = p.marker();
+ match p.current() {
+ SyntaxKind::Ident => {
+ p.eat();
+ if p.directly_at(SyntaxKind::Atom) && p.current_text() == "(" {
+ math_args(p);
+ p.wrap(m, SyntaxKind::FuncCall);
}
- Some(SyntaxKind::Slash) => (SyntaxKind::Frac, 1, Assoc::Left, None),
- _ => break,
+ }
+
+ SyntaxKind::Atom => match p.current_text() {
+ "(" => math_delimited(p, ")"),
+ "{" => math_delimited(p, "}"),
+ "[" => math_delimited(p, "]"),
+ _ => p.eat(),
+ },
+
+ SyntaxKind::Let
+ | SyntaxKind::Set
+ | SyntaxKind::Show
+ | SyntaxKind::If
+ | SyntaxKind::While
+ | SyntaxKind::For
+ | SyntaxKind::Import
+ | SyntaxKind::Include
+ | SyntaxKind::Break
+ | SyntaxKind::Continue
+ | SyntaxKind::Return
+ | SyntaxKind::LeftBrace
+ | SyntaxKind::LeftBracket => embedded_code_expr(p),
+
+ SyntaxKind::Linebreak
+ | SyntaxKind::Escape
+ | SyntaxKind::Shorthand
+ | SyntaxKind::Symbol
+ | SyntaxKind::AlignPoint
+ | SyntaxKind::Str => p.eat(),
+
+ _ => return,
+ }
+
+ while !p.eof() && !p.at(stop) {
+ let Some((kind, stop, assoc, mut prec)) = math_op(p.current()) else {
+ break;
};
if prec < min_prec {
@@ -415,115 +274,140 @@ fn math_node_prec(p: &mut Parser, min_prec: usize, stop: Option<SyntaxKind>) {
}
match assoc {
- Assoc::Left => prec += 1,
- Assoc::Right => {}
+ ast::Assoc::Left => prec += 1,
+ ast::Assoc::Right => {}
}
p.eat();
- math_node_prec(p, prec, stop);
-
- // Allow up to two different scripts. We do not risk encountering the
- // previous script kind again here due to right-associativity.
+ math_expr_prec(p, prec, stop);
if p.eat_if(SyntaxKind::Underscore) || p.eat_if(SyntaxKind::Hat) {
- math_node_prec(p, prec, None);
+ math_expr_prec(p, prec, SyntaxKind::Eof);
}
- marker.end(p, kind);
+ p.wrap(m, kind);
}
}
-/// Parse a primary math node.
-fn math_primary(p: &mut Parser) {
- let Some(token) = p.peek() else { return };
- match token {
- // Spaces and expressions.
- SyntaxKind::Space { .. }
- | SyntaxKind::Linebreak
- | SyntaxKind::Escape
- | SyntaxKind::Str
- | SyntaxKind::Shorthand
- | SyntaxKind::AlignPoint
- | SyntaxKind::Symbol => p.eat(),
-
- // Atoms.
- SyntaxKind::Atom => match p.peek_src() {
- "(" => math_group(p, Group::MathRow('(', ')')),
- "{" => math_group(p, Group::MathRow('{', '}')),
- "[" => math_group(p, Group::MathRow('[', ']')),
- _ => p.eat(),
- },
+fn math_delimited(p: &mut Parser, closing: &str) {
+ let m = p.marker();
+ p.expect(SyntaxKind::Atom);
+ while !p.eof()
+ && !p.at(SyntaxKind::Dollar)
+ && (!p.at(SyntaxKind::Atom) || p.current_text() != closing)
+ {
+ let prev = p.prev_end();
+ math_expr(p);
+ if !p.progress(prev) {
+ p.unexpected();
+ }
+ }
+ p.expect(SyntaxKind::Atom);
+ p.wrap(m, SyntaxKind::Math);
+}
- // Identifiers and math calls.
- SyntaxKind::Ident => {
- let marker = p.marker();
- p.eat();
+fn math_op(kind: SyntaxKind) -> Option<(SyntaxKind, SyntaxKind, ast::Assoc, usize)> {
+ match kind {
+ SyntaxKind::Underscore => {
+ Some((SyntaxKind::Script, SyntaxKind::Hat, ast::Assoc::Right, 2))
+ }
+ SyntaxKind::Hat => {
+ Some((SyntaxKind::Script, SyntaxKind::Underscore, ast::Assoc::Right, 2))
+ }
+ SyntaxKind::Slash => {
+ Some((SyntaxKind::Frac, SyntaxKind::Eof, ast::Assoc::Left, 1))
+ }
+ _ => None,
+ }
+}
- // Parenthesis or bracket means this is a function call.
- if matches!(p.peek_direct(), Some(SyntaxKind::Atom) if p.peek_src() == "(") {
- marker.perform(p, SyntaxKind::FuncCall, math_args);
+fn math_args(p: &mut Parser) {
+ p.expect(SyntaxKind::Atom);
+ let m = p.marker();
+ let mut m2 = p.marker();
+ while !p.eof() {
+ match p.current_text() {
+ ")" => break,
+ "," => {
+ p.wrap(m2, SyntaxKind::Math);
+ p.convert(SyntaxKind::Comma);
+ m2 = p.marker();
+ continue;
}
+ _ => {}
}
- // Hashtag + keyword / identifier.
- SyntaxKind::Let
- | SyntaxKind::Set
- | SyntaxKind::Show
- | SyntaxKind::If
- | SyntaxKind::While
- | SyntaxKind::For
- | SyntaxKind::Import
- | SyntaxKind::Include
- | SyntaxKind::Break
- | SyntaxKind::Continue
- | SyntaxKind::Return => embedded_expr(p),
-
- // Code and content block.
- SyntaxKind::LeftBrace => code_block(p),
- SyntaxKind::LeftBracket => content_block(p),
-
- _ => p.unexpected(),
+ let prev = p.prev_end();
+ math_expr(p);
+ if !p.progress(prev) {
+ p.unexpected();
+ }
+ }
+ if m2 != p.marker() {
+ p.wrap(m2, SyntaxKind::Math);
}
+ p.wrap(m, SyntaxKind::Args);
+ p.expect(SyntaxKind::Atom);
}
-fn math_group(p: &mut Parser, group: Group) {
- p.perform(SyntaxKind::Math, |p| {
- p.start_group(group);
- while !p.eof() {
- math_node(p);
+fn code(p: &mut Parser, mut stop: impl FnMut(SyntaxKind) -> bool) {
+ while !p.eof() && !stop(p.current()) {
+ p.stop_at_newline(true);
+ let prev = p.prev_end();
+ code_expr(p);
+ if p.progress(prev)
+ && !p.eof()
+ && !stop(p.current())
+ && !p.eat_if(SyntaxKind::Semicolon)
+ {
+ p.expected("semicolon or line break");
}
- p.end_group();
- })
+ p.unstop();
+ if !p.progress(prev) && !p.eof() {
+ p.unexpected();
+ }
+ }
}
-fn expr(p: &mut Parser) -> ParseResult {
- expr_prec(p, false, 0)
+fn code_expr(p: &mut Parser) {
+ code_expr_prec(p, false, 0)
}
-/// Parse an expression with operators having at least the minimum precedence.
-///
-/// If `atomic` is true, this does not parse binary operations and arrow
-/// functions, which is exactly what we want in a shorthand expression directly
-/// in markup.
-///
-/// Stops parsing at operations with lower precedence than `min_prec`,
-fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
- let marker = p.marker();
+fn embedded_code_expr(p: &mut Parser) {
+ let stmt = matches!(
+ p.current(),
+ SyntaxKind::Let
+ | SyntaxKind::Set
+ | SyntaxKind::Show
+ | SyntaxKind::Import
+ | SyntaxKind::Include
+ );
- // Start the unary expression.
- match p.peek().and_then(UnOp::from_token) {
- Some(op) if !atomic => {
- p.eat();
- let prec = op.precedence();
- expr_prec(p, atomic, prec)?;
- marker.end(p, SyntaxKind::Unary);
- }
- _ => primary(p, atomic)?,
- };
+ p.stop_at_newline(true);
+ p.enter(LexMode::Code);
+ code_expr_prec(p, true, 0);
+ let semi = p.eat_if(SyntaxKind::Semicolon);
+ if stmt && !semi && !p.eof() && !p.at(SyntaxKind::RightBracket) {
+ p.expected("semicolon or line break");
+ }
+ p.exit();
+ p.unstop();
+}
+
+fn code_expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) {
+ let m = p.marker();
+ if let Some(op) = ast::UnOp::from_kind(p.current()) {
+ p.eat();
+ code_expr_prec(p, atomic, op.precedence());
+ p.wrap(m, SyntaxKind::Unary);
+ } else {
+ code_primary(p, atomic);
+ }
loop {
- // Parenthesis or bracket means this is a function call.
- if let Some(SyntaxKind::LeftParen | SyntaxKind::LeftBracket) = p.peek_direct() {
- marker.perform(p, SyntaxKind::FuncCall, args)?;
+ if p.directly_at(SyntaxKind::LeftParen) || p.directly_at(SyntaxKind::LeftBracket)
+ {
+ args(p);
+ p.wrap(m, SyntaxKind::FuncCall);
continue;
}
@@ -531,711 +415,571 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
break;
}
- // Method call or field access.
if p.eat_if(SyntaxKind::Dot) {
- ident(p)?;
- if let Some(SyntaxKind::LeftParen | SyntaxKind::LeftBracket) = p.peek_direct()
+ p.expect(SyntaxKind::Ident);
+ if p.directly_at(SyntaxKind::LeftParen)
+ || p.directly_at(SyntaxKind::LeftBracket)
{
- marker.perform(p, SyntaxKind::MethodCall, args)?;
+ args(p);
+ p.wrap(m, SyntaxKind::MethodCall);
} else {
- marker.end(p, SyntaxKind::FieldAccess);
+ p.wrap(m, SyntaxKind::FieldAccess)
}
continue;
}
- let op = if p.eat_if(SyntaxKind::Not) {
+ let binop = if p.eat_if(SyntaxKind::Not) {
if p.at(SyntaxKind::In) {
- BinOp::NotIn
+ Some(ast::BinOp::NotIn)
} else {
p.expected("keyword `in`");
- return Err(ParseError);
+ break;
}
} else {
- match p.peek().and_then(BinOp::from_token) {
- Some(binop) => binop,
- None => break,
- }
+ ast::BinOp::from_kind(p.current())
};
- let mut prec = op.precedence();
- if prec < min_prec {
- break;
- }
+ if let Some(op) = binop {
+ let mut prec = op.precedence();
+ if prec < min_prec {
+ break;
+ }
- p.eat();
+ match op.assoc() {
+ ast::Assoc::Left => prec += 1,
+ ast::Assoc::Right => {}
+ }
- match op.assoc() {
- Assoc::Left => prec += 1,
- Assoc::Right => {}
+ p.eat();
+ code_expr_prec(p, false, prec);
+ p.wrap(m, SyntaxKind::Binary);
+ continue;
}
- marker.perform(p, SyntaxKind::Binary, |p| expr_prec(p, atomic, prec))?;
+ break;
}
-
- Ok(())
}
-fn primary(p: &mut Parser, atomic: bool) -> ParseResult {
- match p.peek() {
- // Literals and few other things.
- Some(
- SyntaxKind::None
- | SyntaxKind::Auto
- | SyntaxKind::Int
- | SyntaxKind::Float
- | SyntaxKind::Bool
- | SyntaxKind::Numeric
- | SyntaxKind::Str
- | SyntaxKind::Label
- | SyntaxKind::Raw { .. },
- ) => {
- p.eat();
- Ok(())
- }
-
- // Things that start with an identifier.
- Some(SyntaxKind::Ident) => {
- let marker = p.marker();
+fn code_primary(p: &mut Parser, atomic: bool) {
+ let m = p.marker();
+ match p.current() {
+ SyntaxKind::Ident => {
p.eat();
-
- // Arrow means this is a closure's lone parameter.
if !atomic && p.at(SyntaxKind::Arrow) {
- marker.end(p, SyntaxKind::Params);
- p.assert(SyntaxKind::Arrow);
- marker.perform(p, SyntaxKind::Closure, expr)
- } else {
- Ok(())
+ p.wrap(m, SyntaxKind::Params);
+ p.expect(SyntaxKind::Arrow);
+ code_expr(p);
+ p.wrap(m, SyntaxKind::Closure);
}
}
- // Structures.
- Some(SyntaxKind::LeftParen) => parenthesized(p, atomic),
- Some(SyntaxKind::LeftBrace) => Ok(code_block(p)),
- Some(SyntaxKind::LeftBracket) => Ok(content_block(p)),
- Some(SyntaxKind::Dollar) => Ok(math(p)),
-
- // Keywords.
- Some(SyntaxKind::Let) => let_binding(p),
- Some(SyntaxKind::Set) => set_rule(p),
- Some(SyntaxKind::Show) => show_rule(p),
- Some(SyntaxKind::If) => conditional(p),
- Some(SyntaxKind::While) => while_loop(p),
- Some(SyntaxKind::For) => for_loop(p),
- Some(SyntaxKind::Import) => module_import(p),
- Some(SyntaxKind::Include) => module_include(p),
- Some(SyntaxKind::Break) => break_stmt(p),
- Some(SyntaxKind::Continue) => continue_stmt(p),
- Some(SyntaxKind::Return) => return_stmt(p),
-
- Some(SyntaxKind::Error) => {
- p.eat();
- Err(ParseError)
- }
+ SyntaxKind::LeftBrace => code_block(p),
+ SyntaxKind::LeftBracket => content_block(p),
+ SyntaxKind::LeftParen => with_paren(p),
+ SyntaxKind::Dollar => equation(p),
+ SyntaxKind::Let => let_binding(p),
+ SyntaxKind::Set => set_rule(p),
+ SyntaxKind::Show => show_rule(p),
+ SyntaxKind::If => conditional(p),
+ SyntaxKind::While => while_loop(p),
+ SyntaxKind::For => for_loop(p),
+ SyntaxKind::Import => module_import(p),
+ SyntaxKind::Include => module_include(p),
+ SyntaxKind::Break => break_stmt(p),
+ SyntaxKind::Continue => continue_stmt(p),
+ SyntaxKind::Return => return_stmt(p),
+
+ SyntaxKind::None
+ | SyntaxKind::Auto
+ | SyntaxKind::Int
+ | SyntaxKind::Float
+ | SyntaxKind::Bool
+ | SyntaxKind::Numeric
+ | SyntaxKind::Str
+ | SyntaxKind::Label
+ | SyntaxKind::Raw => p.eat(),
- // Nothing.
- _ => {
- p.expected_found("expression");
- Err(ParseError)
- }
+ _ => p.expected("expression"),
}
}
-fn ident(p: &mut Parser) -> ParseResult {
- match p.peek() {
- Some(SyntaxKind::Ident) => {
- p.eat();
- Ok(())
- }
- _ => {
- p.expected_found("identifier");
- Err(ParseError)
- }
+fn block(p: &mut Parser) {
+ match p.current() {
+ SyntaxKind::LeftBracket => content_block(p),
+ SyntaxKind::LeftBrace => code_block(p),
+ _ => p.expected("block"),
}
}
-/// Parse something that starts with a parenthesis, which can be either of:
-/// - Array literal
-/// - Dictionary literal
-/// - Parenthesized expression
-/// - Parameter list of closure expression
-fn parenthesized(p: &mut Parser, atomic: bool) -> ParseResult {
- let marker = p.marker();
+pub(super) fn reparse_block(text: &str, range: Range<usize>) -> Option<SyntaxNode> {
+ let mut p = Parser::new(&text, range.start, LexMode::Code);
+ assert!(p.at(SyntaxKind::LeftBracket) || p.at(SyntaxKind::LeftBrace));
+ block(&mut p);
+ (p.balanced && p.prev_end() == range.end)
+ .then(|| p.finish().into_iter().next().unwrap())
+}
- p.start_group(Group::Paren);
- let colon = p.eat_if(SyntaxKind::Colon);
- let kind = collection(p, true).0;
- p.end_group();
+fn code_block(p: &mut Parser) {
+ let m = p.marker();
+ p.enter(LexMode::Code);
+ p.stop_at_newline(false);
+ p.expect(SyntaxKind::LeftBrace);
+ code(p, |kind| kind == SyntaxKind::RightBrace);
+ p.expect(SyntaxKind::RightBrace);
+ p.exit();
+ p.unstop();
+ p.wrap(m, SyntaxKind::CodeBlock);
+}
- // Leading colon makes this a dictionary.
- if colon {
- dict(p, marker);
- return Ok(());
- }
+fn content_block(p: &mut Parser) {
+ let m = p.marker();
+ p.enter(LexMode::Markup);
+ p.expect(SyntaxKind::LeftBracket);
+ markup(p, true, 0, |kind| kind == SyntaxKind::RightBracket);
+ p.expect(SyntaxKind::RightBracket);
+ p.exit();
+ p.wrap(m, SyntaxKind::ContentBlock);
+}
- // Arrow means this is a closure's parameter list.
- if !atomic && p.at(SyntaxKind::Arrow) {
- params(p, marker);
- p.assert(SyntaxKind::Arrow);
- return marker.perform(p, SyntaxKind::Closure, expr);
+fn with_paren(p: &mut Parser) {
+ let m = p.marker();
+ let mut kind = collection(p, true);
+ if p.at(SyntaxKind::Arrow) {
+ validate_params(p, m);
+ p.wrap(m, SyntaxKind::Params);
+ p.expect(SyntaxKind::Arrow);
+ code_expr(p);
+ kind = SyntaxKind::Closure;
}
-
- // Transform into the identified collection.
match kind {
- CollectionKind::Group => marker.end(p, SyntaxKind::Parenthesized),
- CollectionKind::Positional => array(p, marker),
- CollectionKind::Named => dict(p, marker),
+ SyntaxKind::Array => validate_array(p, m),
+ SyntaxKind::Dict => validate_dict(p, m),
+ _ => {}
}
-
- Ok(())
+ p.wrap(m, kind);
}
-/// The type of a collection.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-enum CollectionKind {
- /// The collection is only one item and has no comma.
- Group,
- /// The collection starts with a positional item and has multiple items or a
- /// trailing comma.
- Positional,
- /// The collection starts with a colon or named item.
- Named,
-}
-
-/// Parse a collection.
-///
-/// Returns the length of the collection and whether the literal contained any
-/// commas.
-fn collection(p: &mut Parser, keyed: bool) -> (CollectionKind, usize) {
- let mut collection_kind = None;
- let mut items = 0;
- let mut can_group = true;
- let mut missing_coma: Option<Marker> = None;
-
- while !p.eof() {
- let Ok(item_kind) = item(p, keyed) else {
- p.eat_if(SyntaxKind::Comma);
- collection_kind = Some(CollectionKind::Group);
- continue;
- };
-
- match item_kind {
- SyntaxKind::Spread => can_group = false,
- SyntaxKind::Named if collection_kind.is_none() => {
- collection_kind = Some(CollectionKind::Named);
- can_group = false;
- }
- _ if collection_kind.is_none() => {
- collection_kind = Some(CollectionKind::Positional);
+fn collection(p: &mut Parser, keyed: bool) -> SyntaxKind {
+ p.stop_at_newline(false);
+ p.expect(SyntaxKind::LeftParen);
+
+ let mut count = 0;
+ let mut parenthesized = true;
+ let mut kind = None;
+ if keyed && p.eat_if(SyntaxKind::Colon) {
+ kind = Some(SyntaxKind::Dict);
+ parenthesized = false;
+ }
+
+ while !p.current().is_terminator() {
+ let prev = p.prev_end();
+ match item(p, keyed) {
+ SyntaxKind::Spread => parenthesized = false,
+ SyntaxKind::Named | SyntaxKind::Keyed if kind.is_none() => {
+ kind = Some(SyntaxKind::Dict);
+ parenthesized = false;
}
+ _ if kind.is_none() => kind = Some(SyntaxKind::Array),
_ => {}
}
- items += 1;
-
- if let Some(marker) = missing_coma.take() {
- p.expected_at(marker, "comma");
+ if !p.progress(prev) {
+ p.unexpected();
+ continue;
}
- if p.eof() {
+ count += 1;
+
+ if p.current().is_terminator() {
break;
}
- if p.eat_if(SyntaxKind::Comma) {
- can_group = false;
- } else {
- missing_coma = Some(p.trivia_start());
+ if p.expect(SyntaxKind::Comma) {
+ parenthesized = false;
}
}
- let kind = if can_group && items == 1 {
- CollectionKind::Group
- } else {
- collection_kind.unwrap_or(CollectionKind::Positional)
- };
+ p.expect(SyntaxKind::RightParen);
+ p.unstop();
- (kind, items)
+ if parenthesized && count == 1 {
+ SyntaxKind::Parenthesized
+ } else {
+ kind.unwrap_or(SyntaxKind::Array)
+ }
}
-fn item(p: &mut Parser, keyed: bool) -> ParseResult<SyntaxKind> {
- let marker = p.marker();
+fn item(p: &mut Parser, keyed: bool) -> SyntaxKind {
+ let m = p.marker();
+
if p.eat_if(SyntaxKind::Dots) {
- marker.perform(p, SyntaxKind::Spread, expr)?;
- return Ok(SyntaxKind::Spread);
+ code_expr(p);
+ p.wrap(m, SyntaxKind::Spread);
+ return SyntaxKind::Spread;
}
- expr(p)?;
-
- if p.at(SyntaxKind::Colon) {
- match marker.after(p).map(|c| c.kind()) {
- Some(SyntaxKind::Ident) => {
- p.eat();
- marker.perform(p, SyntaxKind::Named, expr)?;
- }
- Some(SyntaxKind::Str) if keyed => {
- p.eat();
- marker.perform(p, SyntaxKind::Keyed, expr)?;
- }
- kind => {
- let mut msg = EcoString::from("expected identifier");
- if keyed {
- msg.push_str(" or string");
- }
- if let Some(kind) = kind {
- msg.push_str(", found ");
- msg.push_str(kind.name());
- }
- marker.to_error(p, msg);
- p.eat();
- marker.perform(p, SyntaxKind::Named, expr).ok();
- return Err(ParseError);
- }
- }
+ code_expr(p);
- Ok(SyntaxKind::Named)
- } else {
- Ok(SyntaxKind::None)
+ if !p.eat_if(SyntaxKind::Colon) {
+ return SyntaxKind::Int;
}
-}
-fn array(p: &mut Parser, marker: Marker) {
- marker.filter_children(p, |x| match x.kind() {
- SyntaxKind::Named | SyntaxKind::Keyed => Err("expected expression"),
- _ => Ok(()),
- });
- marker.end(p, SyntaxKind::Array);
-}
+ code_expr(p);
-fn dict(p: &mut Parser, marker: Marker) {
- let mut used = HashSet::new();
- marker.filter_children(p, |x| match x.kind() {
- kind if kind.is_paren() => Ok(()),
- SyntaxKind::Named | SyntaxKind::Keyed => {
- if let Some(child) = x.children().next() {
- let key = match child.cast::<ast::Str>() {
- Some(str) => str.get(),
- None => child.text().clone(),
- };
+ let kind = match p.node(m).map(SyntaxNode::kind) {
+ Some(SyntaxKind::Ident) => SyntaxKind::Named,
+ Some(SyntaxKind::Str) if keyed => SyntaxKind::Keyed,
+ _ => {
+ for child in p.post_process(m).next() {
+ if child.kind() == SyntaxKind::Colon {
+ break;
+ }
- if !used.insert(key) {
- return Err("pair has duplicate key");
+ let mut message = EcoString::from("expected identifier");
+ if keyed {
+ message.push_str(" or string");
}
+ message.push_str(", found ");
+ message.push_str(child.kind().name());
+ child.convert_to_error(message);
}
- Ok(())
+ SyntaxKind::Named
}
- SyntaxKind::Spread | SyntaxKind::Comma | SyntaxKind::Colon => Ok(()),
- _ => Err("expected named or keyed pair"),
- });
- marker.end(p, SyntaxKind::Dict);
-}
+ };
-fn params(p: &mut Parser, marker: Marker) {
- marker.filter_children(p, |x| match x.kind() {
- kind if kind.is_paren() => Ok(()),
- SyntaxKind::Named | SyntaxKind::Ident | SyntaxKind::Comma => Ok(()),
- SyntaxKind::Spread
- if matches!(
- x.children().last().map(|child| child.kind()),
- Some(SyntaxKind::Ident)
- ) =>
- {
- Ok(())
- }
- _ => Err("expected identifier, named pair or argument sink"),
- });
- marker.end(p, SyntaxKind::Params);
+ p.wrap(m, kind);
+ kind
}
-/// Parse a code block: `{...}`.
-fn code_block(p: &mut Parser) {
- p.perform(SyntaxKind::CodeBlock, |p| {
- p.start_group(Group::Brace);
- code(p);
- p.end_group();
- });
-}
+fn args(p: &mut Parser) {
+ if !p.at(SyntaxKind::LeftParen) && !p.at(SyntaxKind::LeftBracket) {
+ p.expected("argument list");
+ }
-fn code(p: &mut Parser) {
- while !p.eof() {
- p.start_group(Group::Expr);
- if expr(p).is_ok() && !p.eof() {
- p.expected("semicolon or line break");
- }
- p.end_group();
+ let m = p.marker();
+ if p.at(SyntaxKind::LeftParen) {
+ collection(p, false);
+ validate_args(p, m);
+ }
- // Forcefully skip over newlines since the group's contents can't.
- p.eat_while(SyntaxKind::is_space);
+ while p.directly_at(SyntaxKind::LeftBracket) {
+ content_block(p);
}
-}
-fn content_block(p: &mut Parser) {
- p.perform(SyntaxKind::ContentBlock, |p| {
- p.start_group(Group::Bracket);
- markup(p, true);
- p.end_group();
- });
+ p.wrap(m, SyntaxKind::Args);
}
-fn args(p: &mut Parser) -> ParseResult {
- match p.peek_direct() {
- Some(SyntaxKind::LeftParen) => {}
- Some(SyntaxKind::LeftBracket) => {}
- _ => {
- p.expected_found("argument list");
- return Err(ParseError);
- }
+fn let_binding(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Let);
+
+ let m2 = p.marker();
+ p.expect(SyntaxKind::Ident);
+
+ let closure = p.directly_at(SyntaxKind::LeftParen);
+ if closure {
+ let m3 = p.marker();
+ collection(p, false);
+ validate_params(p, m3);
+ p.wrap(m3, SyntaxKind::Params);
}
- p.perform(SyntaxKind::Args, |p| {
- if p.at(SyntaxKind::LeftParen) {
- let marker = p.marker();
- p.start_group(Group::Paren);
- collection(p, false);
- p.end_group();
-
- let mut used = HashSet::new();
- marker.filter_children(p, |x| match x.kind() {
- SyntaxKind::Named => {
- if let Some(ident) =
- x.children().next().and_then(|child| child.cast::<ast::Ident>())
- {
- if !used.insert(ident.take()) {
- return Err("duplicate argument");
- }
- }
- Ok(())
- }
- _ => Ok(()),
- });
- }
+ let f = if closure { Parser::expect } else { Parser::eat_if };
+ if f(p, SyntaxKind::Eq) {
+ code_expr(p);
+ }
- while p.peek_direct() == Some(SyntaxKind::LeftBracket) {
- content_block(p);
- }
- });
+ if closure {
+ p.wrap(m2, SyntaxKind::Closure);
+ }
- Ok(())
+ p.wrap(m, SyntaxKind::LetBinding);
}
-fn math_args(p: &mut Parser) {
- p.start_group(Group::MathRow('(', ')'));
- p.perform(SyntaxKind::Args, |p| {
- let mut marker = p.marker();
- while !p.eof() {
- if matches!(p.peek(), Some(SyntaxKind::Atom) if p.peek_src() == ",") {
- marker.end(p, SyntaxKind::Math);
- let comma = p.marker();
- p.eat();
- comma.convert(p, SyntaxKind::Comma);
- marker = p.marker();
- } else {
- math_node(p);
- }
- }
- if marker != p.marker() {
- marker.end(p, SyntaxKind::Math);
- }
- });
- p.end_group();
+fn set_rule(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Set);
+ p.expect(SyntaxKind::Ident);
+ args(p);
+ if p.eat_if(SyntaxKind::If) {
+ code_expr(p);
+ }
+ p.wrap(m, SyntaxKind::SetRule);
}
-fn let_binding(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::LetBinding, |p| {
- p.assert(SyntaxKind::Let);
-
- let marker = p.marker();
- ident(p)?;
-
- // If a parenthesis follows, this is a function definition.
- let has_params = p.peek_direct() == Some(SyntaxKind::LeftParen);
- if has_params {
- let marker = p.marker();
- p.start_group(Group::Paren);
- collection(p, false);
- p.end_group();
- params(p, marker);
- }
-
- if p.eat_if(SyntaxKind::Eq) {
- expr(p)?;
- } else if has_params {
- // Function definitions must have a body.
- p.expected("body");
- }
+fn show_rule(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Show);
+ code_expr(p);
+ if p.eat_if(SyntaxKind::Colon) {
+ code_expr(p);
+ }
+ p.wrap(m, SyntaxKind::ShowRule);
+}
- // Rewrite into a closure expression if it's a function definition.
- if has_params {
- marker.end(p, SyntaxKind::Closure);
+fn conditional(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::If);
+ code_expr(p);
+ block(p);
+ if p.eat_if(SyntaxKind::Else) {
+ if p.at(SyntaxKind::If) {
+ conditional(p);
+ } else {
+ block(p);
}
+ }
+ p.wrap(m, SyntaxKind::Conditional);
+}
- Ok(())
- })
+fn while_loop(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::While);
+ code_expr(p);
+ block(p);
+ p.wrap(m, SyntaxKind::WhileLoop);
}
-fn set_rule(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::SetRule, |p| {
- p.assert(SyntaxKind::Set);
- ident(p)?;
- args(p)?;
- if p.eat_if(SyntaxKind::If) {
- expr(p)?;
- }
- Ok(())
- })
+fn for_loop(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::For);
+ for_pattern(p);
+ p.expect(SyntaxKind::In);
+ code_expr(p);
+ block(p);
+ p.wrap(m, SyntaxKind::ForLoop);
}
-fn show_rule(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::ShowRule, |p| {
- p.assert(SyntaxKind::Show);
- expr(p)?;
- if p.eat_if(SyntaxKind::Colon) {
- expr(p)?;
+fn for_pattern(p: &mut Parser) {
+ let m = p.marker();
+ if p.expect(SyntaxKind::Ident) {
+ if p.eat_if(SyntaxKind::Comma) {
+ p.expect(SyntaxKind::Ident);
}
- Ok(())
- })
+ p.wrap(m, SyntaxKind::ForPattern);
+ }
}
-fn conditional(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::Conditional, |p| {
- p.assert(SyntaxKind::If);
-
- expr(p)?;
- body(p)?;
+fn module_import(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Import);
+ code_expr(p);
+ if p.eat_if(SyntaxKind::Colon) && !p.eat_if(SyntaxKind::Star) {
+ import_items(p);
+ }
+ p.wrap(m, SyntaxKind::ModuleImport);
+}
- if p.eat_if(SyntaxKind::Else) {
- if p.at(SyntaxKind::If) {
- conditional(p)?;
- } else {
- body(p)?;
- }
+fn import_items(p: &mut Parser) {
+ let m = p.marker();
+ while !p.eof() && !p.at(SyntaxKind::Semicolon) {
+ if !p.eat_if(SyntaxKind::Ident) {
+ p.unexpected();
}
-
- Ok(())
- })
+ if p.current().is_terminator() {
+ break;
+ }
+ p.expect(SyntaxKind::Comma);
+ }
+ p.wrap(m, SyntaxKind::ImportItems);
}
-fn while_loop(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::WhileLoop, |p| {
- p.assert(SyntaxKind::While);
- expr(p)?;
- body(p)
- })
+fn module_include(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Include);
+ code_expr(p);
+ p.wrap(m, SyntaxKind::ModuleInclude);
}
-fn for_loop(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::ForLoop, |p| {
- p.assert(SyntaxKind::For);
- for_pattern(p)?;
- p.expect(SyntaxKind::In)?;
- expr(p)?;
- body(p)
- })
+fn break_stmt(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Break);
+ p.wrap(m, SyntaxKind::LoopBreak);
}
-fn for_pattern(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::ForPattern, |p| {
- ident(p)?;
- if p.eat_if(SyntaxKind::Comma) {
- ident(p)?;
- }
- Ok(())
- })
+fn continue_stmt(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Continue);
+ p.wrap(m, SyntaxKind::LoopContinue);
}
-fn module_import(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::ModuleImport, |p| {
- p.assert(SyntaxKind::Import);
- expr(p)?;
-
- if !p.eat_if(SyntaxKind::Colon) || p.eat_if(SyntaxKind::Star) {
- return Ok(());
- }
-
- // This is the list of identifiers scenario.
- p.perform(SyntaxKind::ImportItems, |p| {
- let marker = p.marker();
- let items = collection(p, false).1;
- if items == 0 {
- p.expected("import items");
- }
- marker.filter_children(p, |n| match n.kind() {
- SyntaxKind::Ident | SyntaxKind::Comma => Ok(()),
- _ => Err("expected identifier"),
- });
- });
-
- Ok(())
- })
+fn return_stmt(p: &mut Parser) {
+ let m = p.marker();
+ p.assert(SyntaxKind::Return);
+ if !p.current().is_terminator() && !p.at(SyntaxKind::Comma) {
+ code_expr(p);
+ }
+ p.wrap(m, SyntaxKind::FuncReturn);
}
-fn module_include(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::ModuleInclude, |p| {
- p.assert(SyntaxKind::Include);
- expr(p)
- })
+fn validate_array(p: &mut Parser, m: Marker) {
+ for child in p.post_process(m) {
+ let kind = child.kind();
+ if kind == SyntaxKind::Named || kind == SyntaxKind::Keyed {
+ child.convert_to_error(format_eco!(
+ "expected expression, found {}",
+ kind.name()
+ ));
+ }
+ }
}
-fn break_stmt(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::LoopBreak, |p| {
- p.assert(SyntaxKind::Break);
- Ok(())
- })
-}
+fn validate_dict(p: &mut Parser, m: Marker) {
+ let mut used = HashSet::new();
+ for child in p.post_process(m) {
+ match child.kind() {
+ SyntaxKind::Named | SyntaxKind::Keyed => {
+ let Some(first) = child.children_mut().first_mut() else { continue };
+ let key = match first.cast::<ast::Str>() {
+ Some(str) => str.get(),
+ None => first.text().clone(),
+ };
-fn continue_stmt(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::LoopContinue, |p| {
- p.assert(SyntaxKind::Continue);
- Ok(())
- })
+ if !used.insert(key) {
+ first.convert_to_error("duplicate key");
+ child.make_erroneous();
+ }
+ }
+ SyntaxKind::Spread => {}
+ SyntaxKind::LeftParen
+ | SyntaxKind::RightParen
+ | SyntaxKind::Comma
+ | SyntaxKind::Colon => {}
+ kind => {
+ child.convert_to_error(format_eco!(
+ "expected named or keyed pair, found {}",
+ kind.name()
+ ));
+ }
+ }
+ }
}
-fn return_stmt(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::FuncReturn, |p| {
- p.assert(SyntaxKind::Return);
- if !p.at(SyntaxKind::Comma) && !p.eof() {
- expr(p)?;
+fn validate_params(p: &mut Parser, m: Marker) {
+ let mut used = HashSet::new();
+ for child in p.post_process(m) {
+ match child.kind() {
+ SyntaxKind::Ident => {
+ if !used.insert(child.text().clone()) {
+ child.convert_to_error("duplicate parameter");
+ }
+ }
+ SyntaxKind::Named => {
+ let Some(within) = child.children_mut().first_mut() else { return };
+ if !used.insert(within.text().clone()) {
+ within.convert_to_error("duplicate parameter");
+ child.make_erroneous();
+ }
+ }
+ SyntaxKind::Spread => {
+ let Some(within) = child.children_mut().last_mut() else { continue };
+ if within.kind() != SyntaxKind::Ident {
+ within.convert_to_error(format_eco!(
+ "expected identifier, found {}",
+ within.kind().name(),
+ ));
+ child.make_erroneous();
+ }
+ }
+ SyntaxKind::LeftParen | SyntaxKind::RightParen | SyntaxKind::Comma => {}
+ kind => {
+ child.convert_to_error(format_eco!(
+ "expected identifier, named pair or argument sink, found {}",
+ kind.name()
+ ));
+ }
}
- Ok(())
- })
+ }
}
-fn body(p: &mut Parser) -> ParseResult {
- match p.peek() {
- Some(SyntaxKind::LeftBracket) => Ok(content_block(p)),
- Some(SyntaxKind::LeftBrace) => Ok(code_block(p)),
- _ => {
- p.expected("body");
- Err(ParseError)
+fn validate_args(p: &mut Parser, m: Marker) {
+ let mut used = HashSet::new();
+ for child in p.post_process(m) {
+ if child.kind() == SyntaxKind::Named {
+ let Some(within) = child.children_mut().first_mut() else { return };
+ if !used.insert(within.text().clone()) {
+ within.convert_to_error("duplicate argument");
+ child.make_erroneous();
+ }
}
}
}
-/// A convenient token-based parser.
+/// Manages parsing of a stream of tokens.
struct Parser<'s> {
- /// An iterator over the source tokens.
+ text: &'s str,
lexer: Lexer<'s>,
- /// Whether we are at the end of the file or of a group.
- eof: bool,
- /// The current token.
- current: Option<SyntaxKind>,
- /// The end byte index of the last non-trivia token.
prev_end: usize,
- /// The start byte index of the peeked token.
current_start: usize,
- /// The stack of open groups.
- groups: Vec<GroupEntry>,
- /// The children of the currently built node.
- children: Vec<SyntaxNode>,
- /// Whether the last group was not correctly terminated.
- unterminated_group: bool,
- /// Whether a group terminator was found that did not close a group.
- stray_terminator: bool,
+ current: SyntaxKind,
+ modes: Vec<LexMode>,
+ nodes: Vec<SyntaxNode>,
+ stop_at_newline: Vec<bool>,
+ balanced: bool,
}
-impl<'s> Parser<'s> {
- /// Create a new parser for the source string.
- fn new(text: &'s str, mode: LexMode) -> Self {
- Self::with_prefix("", text, mode)
- }
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+struct Marker(usize);
- /// Create a new parser for the source string that is prefixed by some text
- /// that does not need to be parsed but taken into account for column
- /// calculation.
- fn with_prefix(prefix: &str, text: &'s str, mode: LexMode) -> Self {
- let mut lexer = Lexer::with_prefix(prefix, text, mode);
+impl<'s> Parser<'s> {
+ fn new(text: &'s str, offset: usize, mode: LexMode) -> Self {
+ let mut lexer = Lexer::new(text, mode);
+ lexer.jump(offset);
let current = lexer.next();
Self {
lexer,
- eof: current.is_none(),
+ text,
+ prev_end: offset,
+ current_start: offset,
current,
- prev_end: 0,
- current_start: 0,
- groups: vec![],
- children: vec![],
- unterminated_group: false,
- stray_terminator: false,
+ modes: vec![],
+ nodes: vec![],
+ stop_at_newline: vec![],
+ balanced: true,
}
}
- /// End the parsing process and return the parsed children.
fn finish(self) -> Vec<SyntaxNode> {
- self.children
+ self.nodes
}
- /// End the parsing process and return
- /// - the parsed children and whether the last token was terminated, if all
- /// groups were terminated correctly, or
- /// - `None` otherwise.
- fn consume(self) -> Option<(Vec<SyntaxNode>, bool)> {
- self.terminated().then(|| (self.children, self.lexer.terminated()))
+ fn prev_end(&self) -> usize {
+ self.prev_end
}
- /// Create a new marker.
- fn marker(&mut self) -> Marker {
- Marker(self.children.len())
+ fn current(&self) -> SyntaxKind {
+ self.current
}
- /// Create a marker right before the trailing trivia.
- fn trivia_start(&self) -> Marker {
- let count = self
- .children
- .iter()
- .rev()
- .take_while(|node| self.is_trivia(node.kind()))
- .count();
- Marker(self.children.len() - count)
+ fn current_start(&self) -> usize {
+ self.current_start
}
- /// Perform a subparse that wraps its result in a node with the given kind.
- fn perform<F, T>(&mut self, kind: SyntaxKind, f: F) -> T
- where
- F: FnOnce(&mut Self) -> T,
- {
- let prev = mem::take(&mut self.children);
- let output = f(self);
- let until = self.trivia_start();
- let mut children = mem::replace(&mut self.children, prev);
-
- if self.lexer.mode() == LexMode::Markup {
- self.children.push(SyntaxNode::inner(kind, children));
- } else {
- // Trailing trivia should not be wrapped into the new node.
- let idx = self.children.len();
- self.children.push(SyntaxNode::default());
- self.children.extend(children.drain(until.0..));
- self.children[idx] = SyntaxNode::inner(kind, children);
- }
-
- output
+ fn current_end(&self) -> usize {
+ self.lexer.cursor()
}
- /// Whether the end of the source string or group is reached.
- fn eof(&self) -> bool {
- self.eof
+ fn current_text(&self) -> &'s str {
+ &self.text[self.current_start..self.current_end()]
}
- /// Consume the current token and also trailing trivia.
- fn eat(&mut self) {
- self.stray_terminator |= match self.current {
- Some(SyntaxKind::RightParen) => !self.inside(Group::Paren),
- Some(SyntaxKind::RightBracket) => !self.inside(Group::Bracket),
- Some(SyntaxKind::RightBrace) => !self.inside(Group::Brace),
- _ => false,
- };
+ fn at(&self, kind: SyntaxKind) -> bool {
+ self.current == kind
+ }
- self.prev_end = self.lexer.cursor();
- self.bump();
+ fn assert(&mut self, kind: SyntaxKind) {
+ assert_eq!(self.current, kind);
+ self.eat();
+ }
- if self.lexer.mode() != LexMode::Markup {
- // Skip whitespace and comments.
- while self.current.map_or(false, |kind| self.is_trivia(kind)) {
- self.bump();
- }
- }
+ fn eof(&self) -> bool {
+ self.at(SyntaxKind::Eof)
+ }
- self.repeek();
+ fn directly_at(&self, kind: SyntaxKind) -> bool {
+ self.current == kind && self.prev_end == self.current_start
}
- /// Consume the current token if it is the given one.
fn eat_if(&mut self, kind: SyntaxKind) -> bool {
let at = self.at(kind);
if at {
@@ -1244,437 +988,169 @@ impl<'s> Parser<'s> {
at
}
- /// Eat tokens while the condition is true.
- fn eat_while<F>(&mut self, mut f: F)
- where
- F: FnMut(SyntaxKind) -> bool,
- {
- while self.peek().map_or(false, |t| f(t)) {
- self.eat();
- }
+ fn convert(&mut self, kind: SyntaxKind) {
+ self.current = kind;
+ self.eat();
}
- /// Consume the current token if it is the given one and produce an error if
- /// not.
- fn expect(&mut self, kind: SyntaxKind) -> ParseResult {
- let at = self.peek() == Some(kind);
- if at {
- self.eat();
- Ok(())
- } else {
- self.expected(kind.name());
- Err(ParseError)
- }
+ fn newline(&mut self) -> bool {
+ self.lexer.newline()
}
- /// Consume the current token, debug-asserting that it is the given one.
- #[track_caller]
- fn assert(&mut self, kind: SyntaxKind) {
- debug_assert_eq!(self.peek(), Some(kind));
- self.eat();
+ fn column(&self, at: usize) -> usize {
+ self.text[..at].chars().rev().take_while(|&c| !is_newline(c)).count()
}
- /// Whether the current token is of the given type.
- fn at(&self, kind: SyntaxKind) -> bool {
- self.peek() == Some(kind)
+ fn marker(&self) -> Marker {
+ Marker(self.nodes.len())
}
- /// Peek at the current token without consuming it.
- fn peek(&self) -> Option<SyntaxKind> {
- if self.eof {
- None
- } else {
- self.current
- }
+ fn node(&self, m: Marker) -> Option<&SyntaxNode> {
+ self.nodes.get(m.0)
}
- /// Peek at the current token, but only if it follows immediately after the
- /// last one without any trivia in between.
- fn peek_direct(&self) -> Option<SyntaxKind> {
- if self.prev_end() == self.current_start() {
- self.peek()
- } else {
- None
- }
+ fn post_process(&mut self, m: Marker) -> impl Iterator<Item = &mut SyntaxNode> {
+ self.nodes[m.0..]
+ .iter_mut()
+ .filter(|child| !child.kind().is_error() && !child.kind().is_trivia())
}
- /// The byte index at which the last non-trivia token ended.
- fn prev_end(&self) -> usize {
- self.prev_end
+ fn wrap(&mut self, m: Marker, kind: SyntaxKind) {
+ self.unskip();
+ let from = m.0.min(self.nodes.len());
+ let children = self.nodes.drain(from..).collect();
+ self.nodes.push(SyntaxNode::inner(kind, children));
+ self.skip();
}
- /// The byte index at which the current token starts.
- fn current_start(&self) -> usize {
- self.current_start
+ fn progress(&self, offset: usize) -> bool {
+ offset < self.prev_end
}
- /// The byte index at which the current token ends.
- fn current_end(&self) -> usize {
- self.lexer.cursor()
+ fn enter(&mut self, mode: LexMode) {
+ self.modes.push(self.lexer.mode());
+ self.lexer.set_mode(mode);
}
- /// The byte length of the current token.
- fn current_len(&self) -> usize {
- self.current_end() - self.current_start()
- }
-
- /// The text of the current node.
- fn peek_src(&self) -> &str {
- self.lexer.scanner().from(self.current_start)
- }
-
- /// Determine the column index for the given byte index.
- fn column(&self, index: usize) -> usize {
- self.lexer.column(index)
- }
-
- /// Continue parsing in a group.
- ///
- /// When the end delimiter of the group is reached, all subsequent calls to
- /// `peek()` return `None`. Parsing can only continue with a matching call
- /// to `end_group`.
- ///
- /// This panics if the current token does not start the given group.
- #[track_caller]
- fn start_group(&mut self, kind: Group) {
- self.groups.push(GroupEntry { kind, prev_mode: self.lexer.mode() });
- self.lexer.set_mode(match kind {
- Group::Bracket | Group::Strong | Group::Emph => LexMode::Markup,
- Group::Math | Group::MathRow(_, _) => LexMode::Math,
- Group::Brace | Group::Paren | Group::Expr => LexMode::Code,
- });
-
- match kind {
- Group::Brace => self.assert(SyntaxKind::LeftBrace),
- Group::Bracket => self.assert(SyntaxKind::LeftBracket),
- Group::Paren => self.assert(SyntaxKind::LeftParen),
- Group::Strong => self.assert(SyntaxKind::Star),
- Group::Emph => self.assert(SyntaxKind::Underscore),
- Group::Math => self.assert(SyntaxKind::Dollar),
- Group::MathRow(..) => self.assert(SyntaxKind::Atom),
- Group::Expr => self.repeek(),
+ fn exit(&mut self) {
+ let mode = self.modes.pop().unwrap();
+ if mode != self.lexer.mode() {
+ self.unskip();
+ self.lexer.set_mode(mode);
+ self.lexer.jump(self.current_start);
+ self.lex();
+ self.skip();
}
}
- /// End the parsing of a group.
- ///
- /// This panics if no group was started.
- #[track_caller]
- fn end_group(&mut self) {
- let group_mode = self.lexer.mode();
- let group = self.groups.pop().expect("no started group");
- self.lexer.set_mode(group.prev_mode);
-
- let mut rescan = self.lexer.mode() != group_mode;
-
- // Eat the end delimiter if there is one.
- if let Some((end, required)) = match group.kind {
- Group::Brace => Some((SyntaxKind::RightBrace, true)),
- Group::Bracket => Some((SyntaxKind::RightBracket, true)),
- Group::Paren => Some((SyntaxKind::RightParen, true)),
- Group::Strong => Some((SyntaxKind::Star, true)),
- Group::Emph => Some((SyntaxKind::Underscore, true)),
- Group::Math => Some((SyntaxKind::Dollar, true)),
- Group::MathRow(..) => Some((SyntaxKind::Atom, true)),
- Group::Expr => Some((SyntaxKind::Semicolon, false)),
- } {
- if self.current.as_ref() == Some(&end) {
- // If another group closes after a group with the missing
- // terminator, its scope of influence ends here and no longer
- // taints the rest of the reparse.
- self.unterminated_group = false;
-
- // Bump the delimeter and return. No need to rescan in this
- // case. Also, we know that the delimiter is not stray even
- // though we already removed the group.
- let s = self.stray_terminator;
- self.eat();
- self.stray_terminator = s;
- rescan = false;
- } else if required {
- self.expected(end.name());
- self.unterminated_group = true;
- }
- }
-
- // Rescan the peeked token if the mode changed.
- if rescan {
- let mut target = self.prev_end();
- if group_mode != LexMode::Markup {
- let start = self.trivia_start().0;
- target = self.current_start
- - self.children[start..].iter().map(SyntaxNode::len).sum::<usize>();
- self.children.truncate(start);
- }
-
- self.lexer.jump(target);
- self.prev_end = self.lexer.cursor();
- self.current_start = self.lexer.cursor();
- self.current = self.lexer.next();
- }
+ fn stop_at_newline(&mut self, stop: bool) {
+ self.stop_at_newline.push(stop);
+ }
- self.repeek();
+ fn unstop(&mut self) {
+ self.unskip();
+ self.stop_at_newline.pop();
+ self.lexer.jump(self.prev_end);
+ self.lex();
+ self.skip();
}
- /// Checks if all groups were correctly terminated.
- fn terminated(&self) -> bool {
- self.groups.is_empty() && !self.unterminated_group && !self.stray_terminator
+ fn eat(&mut self) {
+ self.save();
+ self.lex();
+ self.skip();
}
- /// Low-level bump that consumes exactly one token without special trivia
- /// handling.
- fn bump(&mut self) {
- if let Some((message, pos)) = self.lexer.last_error() {
- let len = self.current_len();
- self.children.push(SyntaxNode::error(message, pos, len))
- } else {
- let kind = self.current.unwrap();
- let text = self.peek_src();
- self.children.push(SyntaxNode::leaf(kind, text));
+ fn skip(&mut self) {
+ if self.lexer.mode() != LexMode::Markup {
+ while self.current.is_trivia() {
+ self.save();
+ self.lex();
+ }
}
- self.current_start = self.lexer.cursor();
- self.current = self.lexer.next();
}
- /// Take another look at the current token to recheck whether it ends a
- /// group.
- fn repeek(&mut self) {
- self.eof = match &self.current {
- Some(SyntaxKind::RightBrace) => self.inside(Group::Brace),
- Some(SyntaxKind::RightBracket) => self.inside(Group::Bracket),
- Some(SyntaxKind::RightParen) => self.inside(Group::Paren),
- Some(SyntaxKind::Star) => self.inside(Group::Strong),
- Some(SyntaxKind::Underscore) => self.inside(Group::Emph),
- Some(SyntaxKind::Dollar) => self
- .groups
- .iter()
- .rev()
- .skip_while(|group| matches!(group.kind, Group::MathRow(..)))
- .next()
- .map_or(false, |group| group.kind == Group::Math),
- Some(SyntaxKind::Semicolon) => self.inside(Group::Expr),
- Some(SyntaxKind::Atom) => match self.peek_src() {
- ")" => self.inside(Group::MathRow('(', ')')),
- "}" => self.inside(Group::MathRow('{', '}')),
- "]" => self.inside(Group::MathRow('[', ']')),
- _ => false,
- },
- Some(SyntaxKind::Space { newlines }) => self.space_ends_group(*newlines),
- Some(_) => false,
- None => true,
- };
- }
+ fn unskip(&mut self) {
+ if self.lexer.mode() != LexMode::Markup && self.prev_end != self.current_start {
+ while self.nodes.last().map_or(false, |last| last.kind().is_trivia()) {
+ self.nodes.pop();
+ }
- /// Returns whether the given type can be skipped over.
- fn is_trivia(&self, token: SyntaxKind) -> bool {
- match token {
- SyntaxKind::Space { newlines } => !self.space_ends_group(newlines),
- SyntaxKind::LineComment => true,
- SyntaxKind::BlockComment => true,
- _ => false,
+ self.lexer.jump(self.prev_end);
+ self.lex();
}
}
- /// Whether a space with the given number of newlines ends the current group.
- fn space_ends_group(&self, n: usize) -> bool {
- if n == 0 {
- return false;
+ fn save(&mut self) {
+ if self.at(SyntaxKind::Error) {
+ let (message, pos) = self.lexer.take_error().unwrap();
+ let len = self.current_end() - self.current_start;
+ self.nodes.push(SyntaxNode::error(message, pos, len));
+ } else {
+ let text = self.current_text();
+ self.nodes.push(SyntaxNode::leaf(self.current, text));
}
- match self.groups.last().map(|group| group.kind) {
- Some(Group::Strong | Group::Emph) => n >= 2,
- Some(Group::Expr) if n >= 1 => {
- // Allow else and method call to continue on next line.
- self.groups.iter().nth_back(1).map(|group| group.kind)
- != Some(Group::Brace)
- || !matches!(
- self.lexer.clone().next(),
- Some(SyntaxKind::Else | SyntaxKind::Dot)
- )
- }
- _ => false,
+ if self.lexer.mode() == LexMode::Markup || !self.current.is_trivia() {
+ self.prev_end = self.current_end();
}
}
- /// Whether we are inside the given group (can be nested).
- fn inside(&self, kind: Group) -> bool {
- self.groups
- .iter()
- .rev()
- .take_while(|g| !kind.is_weak() || g.kind.is_weak())
- .any(|g| g.kind == kind)
+ fn lex(&mut self) {
+ self.current_start = self.lexer.cursor();
+ self.current = self.lexer.next();
+ if self.lexer.mode() == LexMode::Code
+ && self.lexer.newline()
+ && self.stop_at_newline.last().copied().unwrap_or(false)
+ && !matches!(self.lexer.clone().next(), SyntaxKind::Else | SyntaxKind::Dot)
+ {
+ self.current = SyntaxKind::Eof;
+ }
}
-}
-/// Error handling.
-impl Parser<'_> {
- /// Eat the current token and add an error that it is unexpected.
- fn unexpected(&mut self) {
- if let Some(found) = self.peek() {
- let marker = self.marker();
- let msg = format_eco!("unexpected {}", found.name());
+ fn expect(&mut self, kind: SyntaxKind) -> bool {
+ let at = self.at(kind);
+ if at {
self.eat();
- marker.to_error(self, msg);
+ } else {
+ self.balanced &= !kind.is_grouping();
+ self.expected(kind.name());
}
+ at
}
- /// Add an error that the `thing` was expected at the end of the last
- /// non-trivia token.
fn expected(&mut self, thing: &str) {
- self.expected_at(self.trivia_start(), thing);
- }
-
- /// Insert an error message that `what` was expected at the marker position.
- fn expected_at(&mut self, marker: Marker, what: &str) {
- let msg = format_eco!("expected {}", what);
- self.children
- .insert(marker.0, SyntaxNode::error(msg, ErrorPos::Full, 0));
- }
-
- /// Eat the current token and add an error that it is not the expected
- /// `thing`.
- fn expected_found(&mut self, thing: &str) {
- match self.peek() {
- Some(found) => {
- let marker = self.marker();
- let msg = format_eco!("expected {}, found {}", thing, found.name());
- self.eat();
- marker.to_error(self, msg);
- }
- None => self.expected(thing),
- }
- }
-}
-
-/// Marks a location in a parser's child list.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-struct Marker(usize);
-
-impl Marker {
- /// Peek at the child directly before the marker.
- fn before<'a>(self, p: &'a Parser) -> Option<&'a SyntaxNode> {
- p.children.get(self.0.checked_sub(1)?)
- }
-
- /// Peek at the child directly after the marker.
- fn after<'a>(self, p: &'a Parser) -> Option<&'a SyntaxNode> {
- p.children.get(self.0)
- }
-
- /// Convert the child directly after marker.
- fn convert(self, p: &mut Parser, kind: SyntaxKind) {
- if let Some(child) = p.children.get_mut(self.0) {
- child.convert_to(kind);
+ self.unskip();
+ if self
+ .nodes
+ .last()
+ .map_or(true, |child| child.kind() != SyntaxKind::Error)
+ {
+ let message = format_eco!("expected {}", thing);
+ self.nodes.push(SyntaxNode::error(message, ErrorPos::Full, 0));
}
+ self.skip();
}
- /// Convert the child directly after marker.
- fn to_error(self, p: &mut Parser, message: impl Into<EcoString>) {
- if let Some(child) = p.children.get_mut(self.0) {
- child.convert_to_error(message);
+ fn unexpected(&mut self) {
+ self.unskip();
+ while self
+ .nodes
+ .last()
+ .map_or(false, |child| child.kind() == SyntaxKind::Error && child.len() == 0)
+ {
+ self.nodes.pop();
}
- }
-
- /// Perform a subparse that wraps all children after the marker in a node
- /// with the given kind.
- fn perform<T, F>(self, p: &mut Parser, kind: SyntaxKind, f: F) -> T
- where
- F: FnOnce(&mut Parser) -> T,
- {
- let success = f(p);
- self.end(p, kind);
- success
- }
-
- /// Wrap all children after the marker (excluding trailing trivia) in a node
- /// with the given `kind`.
- fn end(self, p: &mut Parser, kind: SyntaxKind) {
- let until = p.trivia_start().0.max(self.0);
- let children = p.children.drain(self.0..until).collect();
- p.children.insert(self.0, SyntaxNode::inner(kind, children));
- }
+ self.skip();
- /// Wrap all children that do not fulfill the predicate in error nodes.
- fn filter_children<F>(self, p: &mut Parser, mut f: F)
- where
- F: FnMut(&SyntaxNode) -> Result<(), &'static str>,
- {
- for child in &mut p.children[self.0..] {
- // Don't expose errors.
- if child.kind().is_error() {
- continue;
- }
-
- // Don't expose trivia in code.
- if p.lexer.mode() != LexMode::Markup && child.kind().is_trivia() {
- continue;
- }
+ let kind = self.current;
+ let offset = self.nodes.len();
+ self.eat();
+ self.balanced &= !kind.is_grouping();
- if let Err(msg) = f(child) {
- let mut msg = EcoString::from(msg);
- if msg.starts_with("expected") {
- msg.push_str(", found ");
- msg.push_str(child.kind().name());
- }
- let len = child.len();
- *child = SyntaxNode::error(msg, ErrorPos::Full, len);
- }
+ if !kind.is_error() {
+ self.nodes[offset]
+ .convert_to_error(format_eco!("unexpected {}", kind.name()));
}
}
}
-
-/// A logical group of tokens, e.g. `[...]`.
-#[derive(Debug)]
-struct GroupEntry {
- /// The kind of group this is. This decides which token(s) will end the
- /// group. For example, a [`Group::Paren`] will be ended by
- /// [`Token::RightParen`].
- kind: Group,
- /// The mode the parser was in _before_ the group started (to which we go
- /// back once the group ends).
- prev_mode: LexMode,
-}
-
-/// A group, confined by optional start and end delimiters.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-enum Group {
- /// A curly-braced group: `{...}`.
- Brace,
- /// A bracketed group: `[...]`.
- Bracket,
- /// A parenthesized group: `(...)`.
- Paren,
- /// A group surrounded with stars: `*...*`.
- Strong,
- /// A group surrounded with underscore: `_..._`.
- Emph,
- /// A group surrounded by dollar signs: `$...$`.
- Math,
- /// A group surrounded by math delimiters.
- MathRow(char, char),
- /// A group ended by a semicolon or a line break: `;`, `\n`.
- Expr,
-}
-
-impl Group {
- /// Whether the group can only force other weak groups to end.
- fn is_weak(self) -> bool {
- matches!(self, Group::Strong | Group::Emph)
- }
-}
-
-/// Allows parser methods to use the try operator. Never returned top-level
-/// because the parser recovers from all errors.
-type ParseResult<T = ()> = Result<T, ParseError>;
-
-/// The error type for parsing.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-struct ParseError;
-
-impl Display for ParseError {
- fn fmt(&self, f: &mut Formatter) -> fmt::Result {
- f.pad("failed to parse")
- }
-}
-
-impl std::error::Error for ParseError {}