summaryrefslogtreecommitdiff
path: root/src/parse
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2021-11-08 13:08:15 +0100
committerGitHub <noreply@github.com>2021-11-08 13:08:15 +0100
commitc6f8ad35f45248f1fd36ee00195966f1629c6ca7 (patch)
tree51faa3f6bbc56f75636823adeea135ed76e1b33b /src/parse
parentea6ee3f667e922ed2f21b08719a45d2395787932 (diff)
parent38c5c362419c5eee7a4fdc0b43d3a9dfb339a6d2 (diff)
Merge pull request #46 from typst/parser-ng
Next Generation Parser
Diffstat (limited to 'src/parse')
-rw-r--r--src/parse/mod.rs963
-rw-r--r--src/parse/parser.rs556
-rw-r--r--src/parse/resolve.rs21
-rw-r--r--src/parse/scanner.rs45
-rw-r--r--src/parse/tokens.rs618
5 files changed, 1141 insertions, 1062 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 30787423..f9c0049f 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -12,216 +12,162 @@ pub use tokens::*;
use std::rc::Rc;
-use crate::diag::TypResult;
-use crate::source::SourceFile;
-use crate::syntax::*;
-use crate::util::EcoString;
+use crate::syntax::ast::{Associativity, BinOp, UnOp};
+use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind};
/// Parse a source file.
-pub fn parse(source: &SourceFile) -> TypResult<Markup> {
- let mut p = Parser::new(source);
- let markup = markup(&mut p);
- let errors = p.finish();
- if errors.is_empty() {
- Ok(markup)
- } else {
- Err(Box::new(errors))
+pub fn parse(src: &str) -> Rc<GreenNode> {
+ let mut p = Parser::new(src);
+ markup(&mut p);
+ match p.finish().into_iter().next() {
+ Some(Green::Node(node)) => node,
+ _ => unreachable!(),
}
}
/// Parse markup.
-fn markup(p: &mut Parser) -> Markup {
+fn markup(p: &mut Parser) {
markup_while(p, true, &mut |_| true)
}
-/// Parse markup that stays equal or right of the given column.
-fn markup_indented(p: &mut Parser, column: usize) -> Markup {
+/// Parse markup that stays right of the given column.
+fn markup_indented(p: &mut Parser, column: usize) {
p.eat_while(|t| match t {
- Token::Space(n) => n == 0,
- Token::LineComment(_) | Token::BlockComment(_) => true,
+ NodeKind::Space(n) => *n == 0,
+ NodeKind::LineComment | NodeKind::BlockComment => true,
_ => false,
});
markup_while(p, false, &mut |p| match p.peek() {
- Some(Token::Space(n)) if n >= 1 => p.column(p.next_end()) >= column,
+ Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column,
_ => true,
})
}
-/// Parse a syntax tree while the peeked token satisifies a condition.
+/// Parse a syntax tree while the peeked NodeKind satisifies a condition.
///
/// If `at_start` is true, things like headings that may only appear at the
/// beginning of a line or template are allowed.
-fn markup_while<F>(p: &mut Parser, mut at_start: bool, f: &mut F) -> Markup
+fn markup_while<F>(p: &mut Parser, mut at_start: bool, f: &mut F)
where
F: FnMut(&mut Parser) -> bool,
{
- let mut tree = vec![];
- while !p.eof() && f(p) {
- if let Some(node) = markup_node(p, &mut at_start) {
- at_start &= matches!(node, MarkupNode::Space | MarkupNode::Parbreak(_));
- tree.push(node);
+ p.perform(NodeKind::Markup, |p| {
+ while !p.eof() && f(p) {
+ markup_node(p, &mut at_start);
}
- }
-
- tree
+ });
}
/// Parse a markup node.
-fn markup_node(p: &mut Parser, at_start: &mut bool) -> Option<MarkupNode> {
- let token = p.peek()?;
- let span = p.peek_span();
- let node = match token {
+fn markup_node(p: &mut Parser, at_start: &mut bool) {
+ let token = match p.peek() {
+ Some(t) => t,
+ None => return,
+ };
+
+ match token {
// Whitespace.
- Token::Space(newlines) => {
- *at_start |= newlines > 0;
- if newlines < 2 {
- MarkupNode::Space
+ NodeKind::Space(newlines) => {
+ *at_start |= *newlines > 0;
+ if *newlines < 2 {
+ p.eat();
} else {
- MarkupNode::Parbreak(span)
+ p.convert(NodeKind::Parbreak);
}
+ return;
}
- // Text.
- Token::Text(text) => MarkupNode::Text(text.into()),
- Token::Tilde => MarkupNode::Text("\u{00A0}".into()),
- Token::HyphHyph => MarkupNode::Text("\u{2013}".into()),
- Token::HyphHyphHyph => MarkupNode::Text("\u{2014}".into()),
- Token::UnicodeEscape(t) => MarkupNode::Text(unicode_escape(p, t)),
+ // Comments.
+ NodeKind::LineComment | NodeKind::BlockComment => {
+ p.eat();
+ return;
+ }
+
+ // Text and markup.
+ NodeKind::Text(_)
+ | NodeKind::EnDash
+ | NodeKind::EmDash
+ | NodeKind::NonBreakingSpace
+ | NodeKind::Emph
+ | NodeKind::Strong
+ | NodeKind::Linebreak
+ | NodeKind::Raw(_)
+ | NodeKind::Math(_)
+ | NodeKind::UnicodeEscape(_) => {
+ p.eat();
+ }
- // Markup.
- Token::Backslash => MarkupNode::Linebreak(span),
- Token::Star => MarkupNode::Strong(span),
- Token::Underscore => MarkupNode::Emph(span),
- Token::Raw(t) => raw(p, t),
- Token::Eq if *at_start => return Some(heading(p)),
- Token::Hyph if *at_start => return Some(list_node(p)),
- Token::Numbering(number) if *at_start => return Some(enum_node(p, number)),
+ NodeKind::Eq if *at_start => heading(p),
+ NodeKind::Minus if *at_start => list_node(p),
+ NodeKind::EnumNumbering(_) if *at_start => enum_node(p),
// Line-based markup that is not currently at the start of the line.
- Token::Eq | Token::Hyph | Token::Numbering(_) => {
- MarkupNode::Text(p.peek_src().into())
+ NodeKind::Eq | NodeKind::Minus | NodeKind::EnumNumbering(_) => {
+ p.convert(NodeKind::Text(p.peek_src().into()));
}
// Hashtag + keyword / identifier.
- Token::Ident(_)
- | Token::Let
- | Token::If
- | Token::While
- | Token::For
- | Token::Import
- | Token::Include => {
- let stmt = matches!(token, Token::Let | Token::Import);
+ NodeKind::Ident(_)
+ | NodeKind::Let
+ | NodeKind::If
+ | NodeKind::While
+ | NodeKind::For
+ | NodeKind::Import
+ | NodeKind::Include => {
+ let stmt = matches!(token, NodeKind::Let | NodeKind::Import);
let group = if stmt { Group::Stmt } else { Group::Expr };
- p.start_group(group, TokenMode::Code);
- let expr = expr_with(p, true, 0);
- if stmt && expr.is_some() && !p.eof() {
- p.expected_at(p.prev_end(), "semicolon or line break");
+ p.start_group(group);
+ let res = expr_prec(p, true, 0);
+ if stmt && res.is_ok() && !p.eof() {
+ p.expected_at("semicolon or line break");
}
p.end_group();
-
- return expr.map(MarkupNode::Expr);
}
// Block and template.
- Token::LeftBrace => return Some(MarkupNode::Expr(block(p))),
- Token::LeftBracket => return Some(MarkupNode::Expr(template(p))),
-
- // Comments.
- Token::LineComment(_) | Token::BlockComment(_) => {
- p.eat();
- return None;
- }
+ NodeKind::LeftBrace => block(p),
+ NodeKind::LeftBracket => template(p),
- _ => {
- *at_start = false;
- p.unexpected();
- return None;
- }
+ NodeKind::Error(_, _) => p.eat(),
+ _ => p.unexpected(),
};
- p.eat();
- Some(node)
-}
-/// Handle a unicode escape sequence.
-fn unicode_escape(p: &mut Parser, token: UnicodeEscapeToken) -> EcoString {
- let span = p.peek_span();
- let text = if let Some(c) = resolve::resolve_hex(token.sequence) {
- c.into()
- } else {
- // Print out the escape sequence verbatim if it is invalid.
- p.error(span, "invalid unicode escape sequence");
- p.peek_src().into()
- };
-
- if !token.terminated {
- p.error(span.end, "expected closing brace");
- }
-
- text
-}
-
-/// Handle a raw block.
-fn raw(p: &mut Parser, token: RawToken) -> MarkupNode {
- let column = p.column(p.next_start());
- let span = p.peek_span();
- let raw = resolve::resolve_raw(span, column, token.backticks, token.text);
- if !token.terminated {
- p.error(span.end, "expected backtick(s)");
- }
- MarkupNode::Raw(Box::new(raw))
+ *at_start = false;
}
/// Parse a heading.
-fn heading(p: &mut Parser) -> MarkupNode {
- let start = p.next_start();
- p.eat_assert(Token::Eq);
-
- // Count depth.
- let mut level: usize = 1;
- while p.eat_if(Token::Eq) {
- level += 1;
- }
-
- if level > 6 {
- return MarkupNode::Text(p.get(start .. p.prev_end()).into());
- }
-
- let column = p.column(p.prev_end());
- let body = markup_indented(p, column);
- MarkupNode::Heading(Box::new(HeadingNode {
- span: p.span_from(start),
- level,
- body,
- }))
+fn heading(p: &mut Parser) {
+ p.perform(NodeKind::Heading, |p| {
+ p.eat_assert(&NodeKind::Eq);
+ while p.eat_if(&NodeKind::Eq) {}
+ let column = p.column(p.prev_end());
+ markup_indented(p, column);
+ });
}
/// Parse a single list item.
-fn list_node(p: &mut Parser) -> MarkupNode {
- let start = p.next_start();
- p.eat_assert(Token::Hyph);
- let column = p.column(p.prev_end());
- let body = markup_indented(p, column);
- MarkupNode::List(Box::new(ListNode { span: p.span_from(start), body }))
+fn list_node(p: &mut Parser) {
+ p.perform(NodeKind::List, |p| {
+ p.eat_assert(&NodeKind::Minus);
+ let column = p.column(p.prev_end());
+ markup_indented(p, column);
+ });
}
/// Parse a single enum item.
-fn enum_node(p: &mut Parser, number: Option<usize>) -> MarkupNode {
- let start = p.next_start();
- p.eat_assert(Token::Numbering(number));
- let column = p.column(p.prev_end());
- let body = markup_indented(p, column);
- MarkupNode::Enum(Box::new(EnumNode {
- span: p.span_from(start),
- number,
- body,
- }))
+fn enum_node(p: &mut Parser) {
+ p.perform(NodeKind::Enum, |p| {
+ p.eat();
+ let column = p.column(p.prev_end());
+ markup_indented(p, column);
+ });
}
/// Parse an expression.
-fn expr(p: &mut Parser) -> Option<Expr> {
- expr_with(p, false, 0)
+fn expr(p: &mut Parser) -> ParseResult {
+ expr_prec(p, false, 0)
}
/// Parse an expression with operators having at least the minimum precedence.
@@ -231,13 +177,16 @@ fn expr(p: &mut Parser) -> Option<Expr> {
/// in markup.
///
/// Stops parsing at operations with lower precedence than `min_prec`,
-fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> Option<Expr> {
- let start = p.next_start();
- let mut lhs = match p.eat_map(UnOp::from_token) {
+fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
+ let marker = p.marker();
+
+ // Start the unary expression.
+ match p.peek().and_then(UnOp::from_token) {
Some(op) => {
+ p.eat();
let prec = op.precedence();
- let expr = expr_with(p, atomic, prec)?;
- Expr::Unary(Box::new(UnaryExpr { span: p.span_from(start), op, expr }))
+ expr_prec(p, atomic, prec)?;
+ marker.end(p, NodeKind::Unary);
}
None => primary(p, atomic)?,
};
@@ -245,19 +194,19 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> Option<Expr> {
loop {
// Exclamation mark, parenthesis or bracket means this is a function
// call.
- if matches!(p.peek_direct(), Some(Token::LeftParen | Token::LeftBracket)) {
- lhs = call(p, lhs)?;
+ if let Some(NodeKind::LeftParen | NodeKind::LeftBracket) = p.peek_direct() {
+ call(p, marker)?;
continue;
}
- if p.eat_if(Token::With) {
- lhs = with_expr(p, lhs)?;
- }
-
if atomic {
break;
}
+ if p.at(&NodeKind::With) {
+ with_expr(p, marker)?;
+ }
+
let op = match p.peek().and_then(BinOp::from_token) {
Some(binop) => binop,
None => break,
@@ -269,96 +218,94 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> Option<Expr> {
}
p.eat();
+
match op.associativity() {
Associativity::Left => prec += 1,
Associativity::Right => {}
}
- let rhs = match expr_with(p, atomic, prec) {
- Some(rhs) => rhs,
- None => break,
- };
-
- let span = lhs.span().join(rhs.span());
- lhs = Expr::Binary(Box::new(BinaryExpr { span, lhs, op, rhs }));
+ marker.perform(p, NodeKind::Binary, |p| expr_prec(p, atomic, prec))?;
}
- Some(lhs)
+ Ok(())
}
/// Parse a primary expression.
-fn primary(p: &mut Parser, atomic: bool) -> Option<Expr> {
- if let Some(expr) = literal(p) {
- return Some(expr);
+fn primary(p: &mut Parser, atomic: bool) -> ParseResult {
+ if literal(p) {
+ return Ok(());
}
match p.peek() {
// Things that start with an identifier.
- Some(Token::Ident(string)) => {
- let ident = Ident {
- span: p.eat_span(),
- string: string.into(),
- };
+ Some(NodeKind::Ident(_)) => {
+ let marker = p.marker();
+ p.eat();
// Arrow means this is a closure's lone parameter.
- Some(if !atomic && p.eat_if(Token::Arrow) {
- let body = expr(p)?;
- Expr::Closure(Box::new(ClosureExpr {
- span: ident.span.join(body.span()),
- name: None,
- params: vec![ClosureParam::Pos(ident)],
- body: Rc::new(body),
- }))
+ if !atomic && p.at(&NodeKind::Arrow) {
+ marker.end(p, NodeKind::ClosureParams);
+ p.eat_assert(&NodeKind::Arrow);
+ marker.perform(p, NodeKind::Closure, expr)
} else {
- Expr::Ident(Box::new(ident))
- })
+ Ok(())
+ }
}
// Structures.
- Some(Token::LeftParen) => parenthesized(p),
- Some(Token::LeftBracket) => Some(template(p)),
- Some(Token::LeftBrace) => Some(block(p)),
+ Some(NodeKind::LeftParen) => parenthesized(p),
+ Some(NodeKind::LeftBracket) => {
+ template(p);
+ Ok(())
+ }
+ Some(NodeKind::LeftBrace) => {
+ block(p);
+ Ok(())
+ }
// Keywords.
- Some(Token::Let) => let_expr(p),
- Some(Token::If) => if_expr(p),
- Some(Token::While) => while_expr(p),
- Some(Token::For) => for_expr(p),
- Some(Token::Import) => import_expr(p),
- Some(Token::Include) => include_expr(p),
+ Some(NodeKind::Let) => let_expr(p),
+ Some(NodeKind::If) => if_expr(p),
+ Some(NodeKind::While) => while_expr(p),
+ Some(NodeKind::For) => for_expr(p),
+ Some(NodeKind::Import) => import_expr(p),
+ Some(NodeKind::Include) => include_expr(p),
+
+ Some(NodeKind::Error(_, _)) => {
+ p.eat();
+ Err(())
+ }
// Nothing.
_ => {
p.expected("expression");
- None
+ Err(())
}
}
}
/// Parse a literal.
-fn literal(p: &mut Parser) -> Option<Expr> {
- let span = p.peek_span();
- let lit = match p.peek()? {
+fn literal(p: &mut Parser) -> bool {
+ match p.peek() {
// Basic values.
- Token::None => Lit::None(span),
- Token::Auto => Lit::Auto(span),
- Token::Bool(b) => Lit::Bool(span, b),
- Token::Int(i) => Lit::Int(span, i),
- Token::Float(f) => Lit::Float(span, f),
- Token::Length(val, unit) => Lit::Length(span, val, unit),
- Token::Angle(val, unit) => Lit::Angle(span, val, unit),
- Token::Percent(p) => Lit::Percent(span, p),
- Token::Fraction(p) => Lit::Fractional(span, p),
- Token::Str(token) => Lit::Str(span, {
- if !token.terminated {
- p.expected_at(span.end, "quote");
- }
- resolve::resolve_string(token.string)
- }),
- _ => return None,
- };
- p.eat();
- Some(Expr::Lit(Box::new(lit)))
+ Some(
+ NodeKind::None
+ | NodeKind::Auto
+ | NodeKind::Int(_)
+ | NodeKind::Float(_)
+ | NodeKind::Bool(_)
+ | NodeKind::Fraction(_)
+ | NodeKind::Length(_, _)
+ | NodeKind::Angle(_, _)
+ | NodeKind::Percentage(_)
+ | NodeKind::Str(_),
+ ) => {
+ p.eat();
+ true
+ }
+
+ _ => false,
+ }
}
/// Parse something that starts with a parenthesis, which can be either of:
@@ -366,433 +313,395 @@ fn literal(p: &mut Parser) -> Option<Expr> {
/// - Dictionary literal
/// - Parenthesized expression
/// - Parameter list of closure expression
-fn parenthesized(p: &mut Parser) -> Option<Expr> {
- p.start_group(Group::Paren, TokenMode::Code);
- let colon = p.eat_if(Token::Colon);
- let (items, has_comma) = collection(p);
- let span = p.end_group();
+fn parenthesized(p: &mut Parser) -> ParseResult {
+ let marker = p.marker();
- // Leading colon makes this a dictionary.
+ p.start_group(Group::Paren);
+ let colon = p.eat_if(&NodeKind::Colon);
+ let kind = collection(p).0;
+ p.end_group();
+
+ // Leading colon makes this a (empty) dictionary.
if colon {
- return Some(dict(p, items, span));
+ dict(p, marker);
+ return Ok(());
}
// Arrow means this is a closure's parameter list.
- if p.eat_if(Token::Arrow) {
- let params = params(p, items);
- let body = expr(p)?;
- return Some(Expr::Closure(Box::new(ClosureExpr {
- span: span.join(body.span()),
- name: None,
- params,
- body: Rc::new(body),
- })));
+ if p.at(&NodeKind::Arrow) {
+ params(p, marker);
+ p.eat_assert(&NodeKind::Arrow);
+ return marker.perform(p, NodeKind::Closure, expr);
}
- // Find out which kind of collection this is.
- Some(match items.as_slice() {
- [] => array(p, items, span),
- [CallArg::Pos(_)] if !has_comma => match items.into_iter().next() {
- Some(CallArg::Pos(expr)) => Expr::Group(Box::new(GroupExpr { span, expr })),
- _ => unreachable!(),
- },
- [CallArg::Pos(_), ..] => array(p, items, span),
- [CallArg::Named(_), ..] => dict(p, items, span),
- [CallArg::Spread(expr), ..] => {
- p.error(expr.span(), "spreading is not allowed here");
- return None;
- }
- })
+ // Transform into the identified collection.
+ match kind {
+ CollectionKind::Group => marker.end(p, NodeKind::Group),
+ CollectionKind::Positional => array(p, marker),
+ CollectionKind::Named => dict(p, marker),
+ }
+
+ Ok(())
+}
+
+/// The type of a collection.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+enum CollectionKind {
+ /// The collection is only one item and has no comma.
+ Group,
+ /// The collection starts with a positional and has more items or a trailing
+ /// comma.
+ Positional,
+ /// The collection starts with a named item.
+ Named,
}
/// Parse a collection.
///
-/// Returns whether the literal contained any commas.
-fn collection(p: &mut Parser) -> (Vec<CallArg>, bool) {
- let mut items = vec![];
- let mut has_comma = false;
- let mut missing_coma = None;
+/// Returns the length of the collection and whether the literal contained any
+/// commas.
+fn collection(p: &mut Parser) -> (CollectionKind, usize) {
+ let mut kind = CollectionKind::Positional;
+ let mut items = 0;
+ let mut can_group = true;
+ let mut error = false;
+ let mut missing_coma: Option<Marker> = None;
while !p.eof() {
- if let Some(arg) = item(p) {
- items.push(arg);
+ if let Ok(item_kind) = item(p) {
+ if items == 0 && item_kind == NodeKind::Named {
+ kind = CollectionKind::Named;
+ can_group = false;
+ }
- if let Some(pos) = missing_coma.take() {
- p.expected_at(pos, "comma");
+ if item_kind == NodeKind::Spread {
+ can_group = false;
+ }
+
+ items += 1;
+
+ if let Some(marker) = missing_coma.take() {
+ marker.expected(p, "comma");
}
if p.eof() {
break;
}
- let behind = p.prev_end();
- if p.eat_if(Token::Comma) {
- has_comma = true;
+ if p.eat_if(&NodeKind::Comma) {
+ can_group = false;
} else {
- missing_coma = Some(behind);
+ missing_coma = Some(p.trivia_start());
}
+ } else {
+ error = true;
}
}
- (items, has_comma)
+ if error || (can_group && items == 1) {
+ kind = CollectionKind::Group;
+ }
+
+ (kind, items)
}
-/// Parse an expression or a named pair.
-fn item(p: &mut Parser) -> Option<CallArg> {
- if p.eat_if(Token::Dots) {
- return expr(p).map(CallArg::Spread);
+/// Parse an expression or a named pair, returning whether it's a spread or a
+/// named pair.
+fn item(p: &mut Parser) -> ParseResult<NodeKind> {
+ let marker = p.marker();
+ if p.eat_if(&NodeKind::Dots) {
+ marker.perform(p, NodeKind::Spread, expr)?;
+ return Ok(NodeKind::Spread);
}
- let first = expr(p)?;
- if p.eat_if(Token::Colon) {
- if let Expr::Ident(name) = first {
- Some(CallArg::Named(Named { name: *name, expr: expr(p)? }))
- } else {
- p.error(first.span(), "expected identifier");
- expr(p);
- None
- }
+ expr(p)?;
+
+ if p.at(&NodeKind::Colon) {
+ marker.perform(p, NodeKind::Named, |p| {
+ if let Some(NodeKind::Ident(_)) = marker.peek(p).map(|c| c.kind()) {
+ p.eat();
+ expr(p)
+ } else {
+ let error = NodeKind::Error(ErrorPos::Full, "expected identifier".into());
+ marker.end(p, error);
+ p.eat();
+ expr(p).ok();
+ Err(())
+ }
+ })?;
+
+ Ok(NodeKind::Named)
} else {
- Some(CallArg::Pos(first))
+ Ok(NodeKind::None)
}
}
/// Convert a collection into an array, producing errors for anything other than
/// expressions.
-fn array(p: &mut Parser, items: Vec<CallArg>, span: Span) -> Expr {
- let iter = items.into_iter().filter_map(|item| match item {
- CallArg::Pos(expr) => Some(expr),
- CallArg::Named(_) => {
- p.error(item.span(), "expected expression, found named pair");
- None
- }
- CallArg::Spread(_) => {
- p.error(item.span(), "spreading is not allowed here");
- None
- }
+fn array(p: &mut Parser, marker: Marker) {
+ marker.filter_children(p, |x| match x.kind() {
+ NodeKind::Named => Err("expected expression, found named pair"),
+ NodeKind::Spread => Err("spreading is not allowed here"),
+ _ => Ok(()),
});
- Expr::Array(Box::new(ArrayExpr { span, items: iter.collect() }))
+ marker.end(p, NodeKind::Array);
}
/// Convert a collection into a dictionary, producing errors for anything other
/// than named pairs.
-fn dict(p: &mut Parser, items: Vec<CallArg>, span: Span) -> Expr {
- let iter = items.into_iter().filter_map(|item| match item {
- CallArg::Named(named) => Some(named),
- CallArg::Pos(_) => {
- p.error(item.span(), "expected named pair, found expression");
- None
- }
- CallArg::Spread(_) => {
- p.error(item.span(), "spreading is not allowed here");
- None
- }
+fn dict(p: &mut Parser, marker: Marker) {
+ marker.filter_children(p, |x| match x.kind() {
+ kind if kind.is_paren() => Ok(()),
+ NodeKind::Named | NodeKind::Comma | NodeKind::Colon => Ok(()),
+ NodeKind::Spread => Err("spreading is not allowed here"),
+ _ => Err("expected named pair, found expression"),
});
- Expr::Dict(Box::new(DictExpr { span, items: iter.collect() }))
+ marker.end(p, NodeKind::Dict);
}
/// Convert a collection into a list of parameters, producing errors for
/// anything other than identifiers, spread operations and named pairs.
-fn params(p: &mut Parser, items: Vec<CallArg>) -> Vec<ClosureParam> {
- let iter = items.into_iter().filter_map(|item| match item {
- CallArg::Pos(Expr::Ident(ident)) => Some(ClosureParam::Pos(*ident)),
- CallArg::Named(named) => Some(ClosureParam::Named(named)),
- CallArg::Spread(Expr::Ident(ident)) => Some(ClosureParam::Sink(*ident)),
- _ => {
- p.error(item.span(), "expected identifier");
- None
- }
+fn params(p: &mut Parser, marker: Marker) {
+ marker.filter_children(p, |x| match x.kind() {
+ kind if kind.is_paren() => Ok(()),
+ NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => Ok(()),
+ NodeKind::Spread
+ if matches!(
+ x.children().last().map(|child| child.kind()),
+ Some(&NodeKind::Ident(_))
+ ) =>
+ {
+ Ok(())
+ }
+ _ => Err("expected identifier"),
});
- iter.collect()
-}
-
-/// Convert a collection into a list of identifiers, producing errors for
-/// anything other than identifiers.
-fn idents(p: &mut Parser, items: Vec<CallArg>) -> Vec<Ident> {
- let iter = items.into_iter().filter_map(|item| match item {
- CallArg::Pos(Expr::Ident(ident)) => Some(*ident),
- _ => {
- p.error(item.span(), "expected identifier");
- None
- }
- });
- iter.collect()
+ marker.end(p, NodeKind::ClosureParams);
}
// Parse a template block: `[...]`.
-fn template(p: &mut Parser) -> Expr {
- p.start_group(Group::Bracket, TokenMode::Markup);
- let tree = markup(p);
- let span = p.end_group();
- Expr::Template(Box::new(TemplateExpr { span, body: tree }))
+fn template(p: &mut Parser) {
+ p.perform(NodeKind::Template, |p| {
+ p.start_group(Group::Bracket);
+ markup(p);
+ p.end_group();
+ });
}
/// Parse a code block: `{...}`.
-fn block(p: &mut Parser) -> Expr {
- p.start_group(Group::Brace, TokenMode::Code);
- let mut exprs = vec![];
- while !p.eof() {
- p.start_group(Group::Stmt, TokenMode::Code);
- if let Some(expr) = expr(p) {
- exprs.push(expr);
- if !p.eof() {
- p.expected_at(p.prev_end(), "semicolon or line break");
+fn block(p: &mut Parser) {
+ p.perform(NodeKind::Block, |p| {
+ p.start_group(Group::Brace);
+ while !p.eof() {
+ p.start_group(Group::Stmt);
+ if expr(p).is_ok() && !p.eof() {
+ p.expected_at("semicolon or line break");
}
+ p.end_group();
+
+ // Forcefully skip over newlines since the group's contents can't.
+ p.eat_while(|t| matches!(t, NodeKind::Space(_)));
}
p.end_group();
-
- // Forcefully skip over newlines since the group's contents can't.
- p.eat_while(|t| matches!(t, Token::Space(_)));
- }
- let span = p.end_group();
- Expr::Block(Box::new(BlockExpr { span, exprs }))
+ });
}
/// Parse a function call.
-fn call(p: &mut Parser, callee: Expr) -> Option<Expr> {
- let mut args = match p.peek_direct() {
- Some(Token::LeftParen) => args(p),
- Some(Token::LeftBracket) => CallArgs {
- span: Span::at(p.id(), callee.span().end),
- items: vec![],
- },
+fn call(p: &mut Parser, callee: Marker) -> ParseResult {
+ callee.perform(p, NodeKind::Call, |p| match p.peek_direct() {
+ Some(NodeKind::LeftParen | NodeKind::LeftBracket) => {
+ args(p, true);
+ Ok(())
+ }
_ => {
- p.expected_at(p.prev_end(), "argument list");
- return None;
+ p.expected_at("argument list");
+ Err(())
}
- };
-
- while p.peek_direct() == Some(Token::LeftBracket) {
- let body = template(p);
- args.items.push(CallArg::Pos(body));
- }
-
- Some(Expr::Call(Box::new(CallExpr {
- span: p.span_from(callee.span().start),
- callee,
- args,
- })))
+ })
}
/// Parse the arguments to a function call.
-fn args(p: &mut Parser) -> CallArgs {
- p.start_group(Group::Paren, TokenMode::Code);
- let items = collection(p).0;
- let span = p.end_group();
- CallArgs { span, items }
+fn args(p: &mut Parser, allow_template: bool) {
+ p.perform(NodeKind::CallArgs, |p| {
+ if !allow_template || p.peek_direct() == Some(&NodeKind::LeftParen) {
+ p.start_group(Group::Paren);
+ collection(p);
+ p.end_group();
+ }
+
+ while allow_template && p.peek_direct() == Some(&NodeKind::LeftBracket) {
+ template(p);
+ }
+ })
}
/// Parse a with expression.
-fn with_expr(p: &mut Parser, callee: Expr) -> Option<Expr> {
- if p.peek() == Some(Token::LeftParen) {
- Some(Expr::With(Box::new(WithExpr {
- span: p.span_from(callee.span().start),
- callee,
- args: args(p),
- })))
- } else {
- p.expected("argument list");
- None
- }
+fn with_expr(p: &mut Parser, marker: Marker) -> ParseResult {
+ marker.perform(p, NodeKind::WithExpr, |p| {
+ p.eat_assert(&NodeKind::With);
+
+ if p.at(&NodeKind::LeftParen) {
+ args(p, false);
+ Ok(())
+ } else {
+ p.expected("argument list");
+ Err(())
+ }
+ })
}
/// Parse a let expression.
-fn let_expr(p: &mut Parser) -> Option<Expr> {
- let start = p.next_start();
- p.eat_assert(Token::Let);
+fn let_expr(p: &mut Parser) -> ParseResult {
+ p.perform(NodeKind::LetExpr, |p| {
+ p.eat_assert(&NodeKind::Let);
- let mut output = None;
- if let Some(binding) = ident(p) {
- let mut init = None;
+ let marker = p.marker();
+ ident(p)?;
- if p.eat_if(Token::With) {
- init = with_expr(p, Expr::Ident(Box::new(binding.clone())));
+ if p.at(&NodeKind::With) {
+ with_expr(p, marker)?;
} else {
// If a parenthesis follows, this is a function definition.
- let mut maybe_params = None;
- if p.peek_direct() == Some(Token::LeftParen) {
- p.start_group(Group::Paren, TokenMode::Code);
- let items = collection(p).0;
- maybe_params = Some(params(p, items));
+ let has_params = p.peek_direct() == Some(&NodeKind::LeftParen);
+ if has_params {
+ let marker = p.marker();
+ p.start_group(Group::Paren);
+ collection(p);
p.end_group();
+ params(p, marker);
}
- if p.eat_if(Token::Eq) {
- init = expr(p);
- } else if maybe_params.is_some() {
+ if p.eat_if(&NodeKind::Eq) {
+ expr(p)?;
+ } else if has_params {
// Function definitions must have a body.
- p.expected_at(p.prev_end(), "body");
+ p.expected_at("body");
}
// Rewrite into a closure expression if it's a function definition.
- if let Some(params) = maybe_params {
- let body = init?;
- init = Some(Expr::Closure(Box::new(ClosureExpr {
- span: binding.span.join(body.span()),
- name: Some(binding.clone()),
- params,
- body: Rc::new(body),
- })));
+ if has_params {
+ marker.end(p, NodeKind::Closure);
}
}
- output = Some(Expr::Let(Box::new(LetExpr {
- span: p.span_from(start),
- binding,
- init,
- })));
- }
-
- output
+ Ok(())
+ })
}
/// Parse an if expresion.
-fn if_expr(p: &mut Parser) -> Option<Expr> {
- let start = p.next_start();
- p.eat_assert(Token::If);
-
- let mut output = None;
- if let Some(condition) = expr(p) {
- if let Some(if_body) = body(p) {
- let mut else_body = None;
- if p.eat_if(Token::Else) {
- if p.peek() == Some(Token::If) {
- else_body = if_expr(p);
- } else {
- else_body = body(p);
- }
- }
+fn if_expr(p: &mut Parser) -> ParseResult {
+ p.perform(NodeKind::IfExpr, |p| {
+ p.eat_assert(&NodeKind::If);
+
+ expr(p)?;
+ body(p)?;
- output = Some(Expr::If(Box::new(IfExpr {
- span: p.span_from(start),
- condition,
- if_body,
- else_body,
- })));
+ if p.eat_if(&NodeKind::Else) {
+ if p.at(&NodeKind::If) {
+ if_expr(p)?;
+ } else {
+ body(p)?;
+ }
}
- }
- output
+ Ok(())
+ })
}
/// Parse a while expresion.
-fn while_expr(p: &mut Parser) -> Option<Expr> {
- let start = p.next_start();
- p.eat_assert(Token::While);
-
- let mut output = None;
- if let Some(condition) = expr(p) {
- if let Some(body) = body(p) {
- output = Some(Expr::While(Box::new(WhileExpr {
- span: p.span_from(start),
- condition,
- body,
- })));
- }
- }
-
- output
+fn while_expr(p: &mut Parser) -> ParseResult {
+ p.perform(NodeKind::WhileExpr, |p| {
+ p.eat_assert(&NodeKind::While);
+ expr(p)?;
+ body(p)?;
+ Ok(())
+ })
}
/// Parse a for expression.
-fn for_expr(p: &mut Parser) -> Option<Expr> {
- let start = p.next_start();
- p.eat_assert(Token::For);
-
- let mut output = None;
- if let Some(pattern) = for_pattern(p) {
- if p.eat_expect(Token::In) {
- if let Some(iter) = expr(p) {
- if let Some(body) = body(p) {
- output = Some(Expr::For(Box::new(ForExpr {
- span: p.span_from(start),
- pattern,
- iter,
- body,
- })));
- }
- }
- }
- }
-
- output
+fn for_expr(p: &mut Parser) -> ParseResult {
+ p.perform(NodeKind::ForExpr, |p| {
+ p.eat_assert(&NodeKind::For);
+ for_pattern(p)?;
+ p.eat_expect(&NodeKind::In)?;
+ expr(p)?;
+ body(p)?;
+ Ok(())
+ })
}
/// Parse a for loop pattern.
-fn for_pattern(p: &mut Parser) -> Option<ForPattern> {
- let first = ident(p)?;
- if p.eat_if(Token::Comma) {
- if let Some(second) = ident(p) {
- return Some(ForPattern::KeyValue(first, second));
+fn for_pattern(p: &mut Parser) -> ParseResult {
+ p.perform(NodeKind::ForPattern, |p| {
+ ident(p)?;
+ if p.eat_if(&NodeKind::Comma) {
+ ident(p)?;
}
- }
- Some(ForPattern::Value(first))
+ Ok(())
+ })
}
/// Parse an import expression.
-fn import_expr(p: &mut Parser) -> Option<Expr> {
- let start = p.next_start();
- p.eat_assert(Token::Import);
+fn import_expr(p: &mut Parser) -> ParseResult {
+ p.perform(NodeKind::ImportExpr, |p| {
+ p.eat_assert(&NodeKind::Import);
+
+ if !p.eat_if(&NodeKind::Star) {
+ // This is the list of identifiers scenario.
+ p.perform(NodeKind::ImportItems, |p| {
+ p.start_group(Group::Imports);
+ let marker = p.marker();
+ let items = collection(p).1;
+ if items == 0 {
+ p.expected_at("import items");
+ }
+ p.end_group();
- let imports = if p.eat_if(Token::Star) {
- // This is the wildcard scenario.
- Imports::Wildcard
- } else {
- // This is the list of identifiers scenario.
- p.start_group(Group::Imports, TokenMode::Code);
- let items = collection(p).0;
- if items.is_empty() {
- p.expected_at(p.prev_end(), "import items");
- }
- p.end_group();
- Imports::Idents(idents(p, items))
- };
+ marker.filter_children(p, |n| match n.kind() {
+ NodeKind::Ident(_) | NodeKind::Comma => Ok(()),
+ _ => Err("expected identifier"),
+ });
+ });
+ };
- let mut output = None;
- if p.eat_expect(Token::From) {
- if let Some(path) = expr(p) {
- output = Some(Expr::Import(Box::new(ImportExpr {
- span: p.span_from(start),
- imports,
- path,
- })));
- }
- }
+ p.eat_expect(&NodeKind::From)?;
+ expr(p)?;
- output
+ Ok(())
+ })
}
/// Parse an include expression.
-fn include_expr(p: &mut Parser) -> Option<Expr> {
- let start = p.next_start();
- p.eat_assert(Token::Include);
-
- expr(p).map(|path| {
- Expr::Include(Box::new(IncludeExpr { span: p.span_from(start), path }))
+fn include_expr(p: &mut Parser) -> ParseResult {
+ p.perform(NodeKind::IncludeExpr, |p| {
+ p.eat_assert(&NodeKind::Include);
+ expr(p)?;
+ Ok(())
})
}
/// Parse an identifier.
-fn ident(p: &mut Parser) -> Option<Ident> {
- if let Some(Token::Ident(string)) = p.peek() {
- Some(Ident {
- span: p.eat_span(),
- string: string.into(),
- })
- } else {
- p.expected("identifier");
- None
+fn ident(p: &mut Parser) -> ParseResult {
+ match p.peek() {
+ Some(NodeKind::Ident(_)) => {
+ p.eat();
+ Ok(())
+ }
+ _ => {
+ p.expected("identifier");
+ Err(())
+ }
}
}
/// Parse a control flow body.
-fn body(p: &mut Parser) -> Option<Expr> {
+fn body(p: &mut Parser) -> ParseResult {
match p.peek() {
- Some(Token::LeftBracket) => Some(template(p)),
- Some(Token::LeftBrace) => Some(block(p)),
+ Some(NodeKind::LeftBracket) => template(p),
+ Some(NodeKind::LeftBrace) => block(p),
_ => {
- p.expected_at(p.prev_end(), "body");
- None
+ p.expected_at("body");
+ return Err(());
}
}
+ Ok(())
}
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 347d6f71..1c4c2a5c 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -1,250 +1,216 @@
-use std::ops::Range;
+use std::mem;
use super::{TokenMode, Tokens};
-use crate::diag::Error;
-use crate::source::{SourceFile, SourceId};
-use crate::syntax::{IntoSpan, Pos, Span, Token};
+use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind};
+use crate::util::EcoString;
+
+/// Allows parser methods to use the try operator. Not exposed as the parser
+/// recovers from all errors.
+pub(crate) type ParseResult<T = ()> = Result<T, ()>;
/// A convenient token-based parser.
pub struct Parser<'s> {
- /// The parsed file.
- source: &'s SourceFile,
- /// Parsing errors.
- errors: Vec<Error>,
/// An iterator over the source tokens.
tokens: Tokens<'s>,
+ /// Whether we are at the end of the file or of a group.
+ eof: bool,
+ /// The current token.
+ current: Option<NodeKind>,
+ /// The end byte index of the last non-trivia token.
+ prev_end: usize,
+ /// The start byte index of the peeked token.
+ current_start: usize,
/// The stack of open groups.
groups: Vec<GroupEntry>,
- /// The next token.
- next: Option<Token<'s>>,
- /// The peeked token.
- /// (Same as `next` except if we are at the end of group, then `None`).
- peeked: Option<Token<'s>>,
- /// The end index of the last (non-whitespace if in code mode) token.
- prev_end: usize,
- /// The start index of the peeked token.
- next_start: usize,
-}
-
-/// A logical group of tokens, e.g. `[...]`.
-struct GroupEntry {
- /// The kind of group this is. This decides which tokens will end the group.
- /// For example, a [`Group::Paren`] will be ended by
- /// [`Token::RightParen`].
- pub kind: Group,
- /// The start index of the group. Used by `Parser::end_group` to return the
- /// group's full span.
- pub start: usize,
- /// The mode the parser was in _before_ the group started (to which we go
- /// back once the group ends).
- pub prev_mode: TokenMode,
-}
-
-/// A group, confined by optional start and end delimiters.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum Group {
- /// A parenthesized group: `(...)`.
- Paren,
- /// A bracketed group: `[...]`.
- Bracket,
- /// A curly-braced group: `{...}`.
- Brace,
- /// A group ended by a semicolon or a line break: `;`, `\n`.
- Stmt,
- /// A group for a single expression, ended by a line break.
- Expr,
- /// A group for import items, ended by a semicolon, line break or `from`.
- Imports,
+ /// The children of the currently built node.
+ children: Vec<Green>,
}
impl<'s> Parser<'s> {
/// Create a new parser for the source string.
- pub fn new(source: &'s SourceFile) -> Self {
- let mut tokens = Tokens::new(source.src(), TokenMode::Markup);
- let next = tokens.next();
+ pub fn new(src: &'s str) -> Self {
+ let mut tokens = Tokens::new(src, TokenMode::Markup);
+ let current = tokens.next();
Self {
- source,
- errors: vec![],
tokens,
- groups: vec![],
- next,
- peeked: next,
+ eof: current.is_none(),
+ current,
prev_end: 0,
- next_start: 0,
+ current_start: 0,
+ groups: vec![],
+ children: vec![],
}
}
- /// Finish parsing and return all errors.
- pub fn finish(self) -> Vec<Error> {
- self.errors
+ /// End the parsing process and return the last child.
+ pub fn finish(self) -> Vec<Green> {
+ self.children
}
- /// The id of the parsed source file.
- pub fn id(&self) -> SourceId {
- self.source.id()
+ /// Create a new marker.
+ pub fn marker(&mut self) -> Marker {
+ Marker(self.children.len())
}
- /// Whether the end of the source string or group is reached.
- pub fn eof(&self) -> bool {
- self.peek().is_none()
+ /// Create a markup right before the trailing trivia.
+ pub fn trivia_start(&self) -> Marker {
+ let count = self
+ .children
+ .iter()
+ .rev()
+ .take_while(|node| self.is_trivia(node.kind()))
+ .count();
+ Marker(self.children.len() - count)
}
- /// Consume the next token.
- pub fn eat(&mut self) -> Option<Token<'s>> {
- let token = self.peek()?;
- self.bump();
- Some(token)
+ /// Perform a subparse that wraps its result in a node with the given kind.
+ pub fn perform<F, T>(&mut self, kind: NodeKind, f: F) -> T
+ where
+ F: FnOnce(&mut Self) -> T,
+ {
+ let prev = mem::take(&mut self.children);
+ let output = f(self);
+ let until = self.trivia_start();
+ let mut children = mem::replace(&mut self.children, prev);
+
+ if self.tokens.mode() == TokenMode::Code {
+ // Trailing trivia should not be wrapped into the new node.
+ let idx = self.children.len();
+ self.children.push(Green::default());
+ self.children.extend(children.drain(until.0 ..));
+ self.children[idx] = GreenNode::with_children(kind, children).into();
+ } else {
+ self.children.push(GreenNode::with_children(kind, children).into());
+ }
+
+ output
}
- /// Eat the next token and return its source range.
- pub fn eat_span(&mut self) -> Span {
- let start = self.next_start();
- self.eat();
- Span::new(self.id(), start, self.prev_end())
+ /// Whether the end of the source string or group is reached.
+ pub fn eof(&self) -> bool {
+ self.eof
}
- /// Consume the next token if it is the given one.
- pub fn eat_if(&mut self, t: Token) -> bool {
- if self.peek() == Some(t) {
- self.bump();
- true
- } else {
- false
+ /// Consume the current token and also trailing trivia.
+ pub fn eat(&mut self) {
+ self.prev_end = self.tokens.index();
+ self.bump();
+
+ if self.tokens.mode() == TokenMode::Code {
+ // Skip whitespace and comments.
+ while self.current.as_ref().map_or(false, |x| self.is_trivia(x)) {
+ self.bump();
+ }
}
+
+ self.repeek();
}
- /// Consume the next token if the closure maps it a to `Some`-variant.
- pub fn eat_map<T, F>(&mut self, f: F) -> Option<T>
- where
- F: FnOnce(Token<'s>) -> Option<T>,
- {
- let token = self.peek()?;
- let mapped = f(token);
- if mapped.is_some() {
- self.bump();
+ /// Eat if the current token it is the given one.
+ pub fn eat_if(&mut self, t: &NodeKind) -> bool {
+ let at = self.at(t);
+ if at {
+ self.eat();
}
- mapped
+ at
}
- /// Consume the next token if it is the given one and produce an error if
- /// not.
- pub fn eat_expect(&mut self, t: Token) -> bool {
+ /// Eat if the current token is the given one and produce an error if not.
+ pub fn eat_expect(&mut self, t: &NodeKind) -> ParseResult {
let eaten = self.eat_if(t);
if !eaten {
- self.expected_at(self.prev_end(), t.name());
+ self.expected_at(t.as_str());
}
- eaten
+ if eaten { Ok(()) } else { Err(()) }
}
- /// Consume the next token, debug-asserting that it is one of the given ones.
- pub fn eat_assert(&mut self, t: Token) {
- let next = self.eat();
- debug_assert_eq!(next, Some(t));
+ /// Eat, debug-asserting that the token is the given one.
+ pub fn eat_assert(&mut self, t: &NodeKind) {
+ debug_assert_eq!(self.peek(), Some(t));
+ self.eat();
}
- /// Consume tokens while the condition is true.
+ /// Eat tokens while the condition is true.
pub fn eat_while<F>(&mut self, mut f: F)
where
- F: FnMut(Token<'s>) -> bool,
+ F: FnMut(&NodeKind) -> bool,
{
while self.peek().map_or(false, |t| f(t)) {
self.eat();
}
}
- /// Peek at the next token without consuming it.
- pub fn peek(&self) -> Option<Token<'s>> {
- self.peeked
+ /// Eat the current token, but change its type.
+ pub fn convert(&mut self, kind: NodeKind) {
+ let marker = self.marker();
+ self.eat();
+ marker.convert(self, kind);
}
- /// Peek at the next token if it follows immediately after the last one
- /// without any whitespace in between.
- pub fn peek_direct(&self) -> Option<Token<'s>> {
- if self.next_start() == self.prev_end() {
- self.peeked
- } else {
- None
- }
+ /// Whether the current token is of the given type.
+ pub fn at(&self, kind: &NodeKind) -> bool {
+ self.peek() == Some(kind)
}
- /// Peek at the span of the next token.
- ///
- /// Has length zero if `peek()` returns `None`.
- pub fn peek_span(&self) -> Span {
- Span::new(self.id(), self.next_start(), self.next_end())
+ /// Peek at the current token without consuming it.
+ pub fn peek(&self) -> Option<&NodeKind> {
+ if self.eof { None } else { self.current.as_ref() }
}
- /// Peek at the source of the next token.
- pub fn peek_src(&self) -> &'s str {
- self.get(self.next_start() .. self.next_end())
+ /// Peek at the current token, if it follows immediately after the last one
+ /// without any trivia in between.
+ pub fn peek_direct(&self) -> Option<&NodeKind> {
+ if self.prev_end() == self.current_start() {
+ self.peek()
+ } else {
+ None
+ }
}
- /// Checks whether the next token fulfills a condition.
- ///
- /// Returns `false` if there is no next token.
- pub fn check<F>(&self, f: F) -> bool
- where
- F: FnOnce(Token<'s>) -> bool,
- {
- self.peek().map_or(false, f)
+ /// Peek at the source of the current token.
+ pub fn peek_src(&self) -> &'s str {
+ self.tokens.scanner().get(self.current_start() .. self.current_end())
}
- /// The byte index at which the last token ended.
- ///
- /// Refers to the end of the last _non-whitespace_ token in code mode.
+ /// The byte index at which the last non-trivia token ended.
pub fn prev_end(&self) -> usize {
self.prev_end
}
- /// The byte index at which the next token starts.
- pub fn next_start(&self) -> usize {
- self.next_start
+ /// The byte index at which the current token starts.
+ pub fn current_start(&self) -> usize {
+ self.current_start
}
- /// The byte index at which the next token will end.
- ///
- /// Is the same as [`next_start()`][Self::next_start] if `peek()` returns
- /// `None`.
- pub fn next_end(&self) -> usize {
+ /// The byte index at which the current token ends.
+ pub fn current_end(&self) -> usize {
self.tokens.index()
}
/// Determine the column index for the given byte index.
pub fn column(&self, index: usize) -> usize {
- self.source.byte_to_column(index).unwrap()
- }
-
- /// Slice out part of the source string.
- pub fn get(&self, range: Range<usize>) -> &'s str {
- self.source.get(range).unwrap()
- }
-
- /// The span from `start` to [`self.prev_end()`](Self::prev_end).
- pub fn span_from(&self, start: impl Into<Pos>) -> Span {
- Span::new(self.id(), start, self.prev_end())
+ self.tokens.scanner().column(index)
}
/// Continue parsing in a group.
///
/// When the end delimiter of the group is reached, all subsequent calls to
- /// `eat()` and `peek()` return `None`. Parsing can only continue with
- /// a matching call to `end_group`.
+ /// `peek()` return `None`. Parsing can only continue with a matching call
+ /// to `end_group`.
///
- /// This panics if the next token does not start the given group.
- pub fn start_group(&mut self, kind: Group, mode: TokenMode) {
- self.groups.push(GroupEntry {
- kind,
- start: self.next_start(),
- prev_mode: self.tokens.mode(),
+ /// This panics if the current token does not start the given group.
+ pub fn start_group(&mut self, kind: Group) {
+ self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() });
+ self.tokens.set_mode(match kind {
+ Group::Bracket => TokenMode::Markup,
+ _ => TokenMode::Code,
});
- self.tokens.set_mode(mode);
self.repeek();
-
match kind {
- Group::Paren => self.eat_assert(Token::LeftParen),
- Group::Bracket => self.eat_assert(Token::LeftBracket),
- Group::Brace => self.eat_assert(Token::LeftBrace),
+ Group::Paren => self.eat_assert(&NodeKind::LeftParen),
+ Group::Bracket => self.eat_assert(&NodeKind::LeftBracket),
+ Group::Brace => self.eat_assert(&NodeKind::LeftBrace),
Group::Stmt => {}
Group::Expr => {}
Group::Imports => {}
@@ -254,130 +220,228 @@ impl<'s> Parser<'s> {
/// End the parsing of a group.
///
/// This panics if no group was started.
- pub fn end_group(&mut self) -> Span {
- let prev_mode = self.tokens.mode();
+ pub fn end_group(&mut self) {
+ let group_mode = self.tokens.mode();
let group = self.groups.pop().expect("no started group");
self.tokens.set_mode(group.prev_mode);
self.repeek();
- let mut rescan = self.tokens.mode() != prev_mode;
+ let mut rescan = self.tokens.mode() != group_mode;
// Eat the end delimiter if there is one.
if let Some((end, required)) = match group.kind {
- Group::Paren => Some((Token::RightParen, true)),
- Group::Bracket => Some((Token::RightBracket, true)),
- Group::Brace => Some((Token::RightBrace, true)),
- Group::Stmt => Some((Token::Semicolon, false)),
+ Group::Paren => Some((NodeKind::RightParen, true)),
+ Group::Bracket => Some((NodeKind::RightBracket, true)),
+ Group::Brace => Some((NodeKind::RightBrace, true)),
+ Group::Stmt => Some((NodeKind::Semicolon, false)),
Group::Expr => None,
Group::Imports => None,
} {
- if self.next == Some(end) {
+ if self.current.as_ref() == Some(&end) {
// Bump the delimeter and return. No need to rescan in this case.
- self.bump();
+ self.eat();
rescan = false;
} else if required {
- self.error(
- self.next_start() .. self.next_start(),
- format!("expected {}", end.name()),
- );
+ self.push_error(format!("expected {}", end));
}
}
// Rescan the peeked token if the mode changed.
if rescan {
+ if group_mode == TokenMode::Code {
+ self.children.truncate(self.trivia_start().0);
+ }
+
self.tokens.jump(self.prev_end());
- self.bump();
+ self.prev_end = self.tokens.index();
+ self.current_start = self.tokens.index();
+ self.current = self.tokens.next();
+ self.repeek();
}
+ }
- Span::new(self.id(), group.start, self.prev_end())
+ /// Low-level bump that consumes exactly one token without special trivia
+ /// handling.
+ fn bump(&mut self) {
+ let kind = self.current.take().unwrap();
+ let len = self.tokens.index() - self.current_start;
+ self.children.push(GreenData::new(kind, len).into());
+ self.current_start = self.tokens.index();
+ self.current = self.tokens.next();
}
- /// Add an error with location and message.
- pub fn error(&mut self, span: impl IntoSpan, message: impl Into<String>) {
- self.errors.push(Error::new(span.into_span(self.id()), message));
+ /// Take another look at the current token to recheck whether it ends a
+ /// group.
+ fn repeek(&mut self) {
+ self.eof = match &self.current {
+ Some(NodeKind::RightParen) => self.inside(Group::Paren),
+ Some(NodeKind::RightBracket) => self.inside(Group::Bracket),
+ Some(NodeKind::RightBrace) => self.inside(Group::Brace),
+ Some(NodeKind::Semicolon) => self.inside(Group::Stmt),
+ Some(NodeKind::From) => self.inside(Group::Imports),
+ Some(NodeKind::Space(n)) => *n >= 1 && self.stop_at_newline(),
+ Some(_) => false,
+ None => true,
+ };
}
- /// Add an error that `what` was expected at the given span.
- pub fn expected_at(&mut self, span: impl IntoSpan, what: &str) {
- self.error(span, format!("expected {}", what));
+ /// Returns whether the given type can be skipped over.
+ fn is_trivia(&self, token: &NodeKind) -> bool {
+ Self::is_trivia_ext(token, self.stop_at_newline())
}
- /// Eat the next token and add an error that it is not the expected `thing`.
- pub fn expected(&mut self, what: &str) {
- let before = self.next_start();
- if let Some(found) = self.eat() {
- let after = self.prev_end();
- self.error(
- before .. after,
- format!("expected {}, found {}", what, found.name()),
- );
- } else {
- self.expected_at(self.next_start(), what);
+ /// Returns whether the given type can be skipped over given the current
+ /// newline mode.
+ fn is_trivia_ext(token: &NodeKind, stop_at_newline: bool) -> bool {
+ match token {
+ NodeKind::Space(n) => *n == 0 || !stop_at_newline,
+ NodeKind::LineComment => true,
+ NodeKind::BlockComment => true,
+ _ => false,
}
}
- /// Eat the next token and add an error that it is unexpected.
+ /// Whether the active group must end at a newline.
+ fn stop_at_newline(&self) -> bool {
+ matches!(
+ self.groups.last().map(|group| group.kind),
+ Some(Group::Stmt | Group::Expr | Group::Imports)
+ )
+ }
+
+ /// Whether we are inside the given group.
+ fn inside(&self, kind: Group) -> bool {
+ self.groups.iter().any(|g| g.kind == kind)
+ }
+}
+
+/// Error handling.
+impl Parser<'_> {
+ /// Push an error into the children list.
+ pub fn push_error(&mut self, msg: impl Into<EcoString>) {
+ let error = NodeKind::Error(ErrorPos::Full, msg.into());
+ self.children.push(GreenData::new(error, 0).into());
+ }
+
+ /// Eat the current token and add an error that it is unexpected.
pub fn unexpected(&mut self) {
- let before = self.next_start();
- if let Some(found) = self.eat() {
- let after = self.prev_end();
- self.error(before .. after, format!("unexpected {}", found.name()));
+ match self.peek() {
+ Some(found) => {
+ let msg = format!("unexpected {}", found);
+ let error = NodeKind::Error(ErrorPos::Full, msg.into());
+ self.perform(error, Self::eat);
+ }
+ None => self.push_error("unexpected end of file"),
}
}
- /// Move to the next token.
- fn bump(&mut self) {
- self.prev_end = self.tokens.index().into();
- self.next_start = self.tokens.index().into();
- self.next = self.tokens.next();
-
- if self.tokens.mode() == TokenMode::Code {
- // Skip whitespace and comments.
- while match self.next {
- Some(Token::Space(n)) => n < 1 || !self.stop_at_newline(),
- Some(Token::LineComment(_)) => true,
- Some(Token::BlockComment(_)) => true,
- _ => false,
- } {
- self.next_start = self.tokens.index().into();
- self.next = self.tokens.next();
+ /// Eat the current token and add an error that it is not the expected `thing`.
+ pub fn expected(&mut self, thing: &str) {
+ match self.peek() {
+ Some(found) => {
+ let msg = format!("expected {}, found {}", thing, found);
+ let error = NodeKind::Error(ErrorPos::Full, msg.into());
+ self.perform(error, Self::eat);
}
+ None => self.expected_at(thing),
}
+ }
- self.repeek();
+ /// Add an error that the `thing` was expected at the end of the last
+ /// non-trivia token.
+ pub fn expected_at(&mut self, thing: &str) {
+ self.trivia_start().expected(self, thing);
}
+}
- /// Take another look at the next token to recheck whether it ends a group.
- fn repeek(&mut self) {
- self.peeked = self.next;
- let token = match self.next {
- Some(token) => token,
- None => return,
- };
+/// A marker that indicates where a node may start.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub struct Marker(usize);
- if match token {
- Token::RightParen => self.inside(Group::Paren),
- Token::RightBracket => self.inside(Group::Bracket),
- Token::RightBrace => self.inside(Group::Brace),
- Token::Semicolon => self.inside(Group::Stmt),
- Token::From => self.inside(Group::Imports),
- Token::Space(n) => n >= 1 && self.stop_at_newline(),
- _ => false,
- } {
- self.peeked = None;
+impl Marker {
+ /// Perform a subparse that wraps all children after the marker in a node
+ /// with the given kind.
+ pub fn perform<T, F>(self, p: &mut Parser, kind: NodeKind, f: F) -> T
+ where
+ F: FnOnce(&mut Parser) -> T,
+ {
+ let success = f(p);
+ self.end(p, kind);
+ success
+ }
+
+ /// Wrap all children after the marker (excluding trailing trivia) in a node
+ /// with the given `kind`.
+ pub fn end(self, p: &mut Parser, kind: NodeKind) {
+ let until = p.trivia_start();
+ let children = p.children.drain(self.0 .. until.0).collect();
+ p.children
+ .insert(self.0, GreenNode::with_children(kind, children).into());
+ }
+
+ /// Wrap all children that do not fulfill the predicate in error nodes.
+ pub fn filter_children<F>(self, p: &mut Parser, f: F)
+ where
+ F: Fn(&Green) -> Result<(), &'static str>,
+ {
+ for child in &mut p.children[self.0 ..] {
+ if (p.tokens.mode() == TokenMode::Markup
+ || !Parser::is_trivia_ext(child.kind(), false))
+ && !child.kind().is_error()
+ {
+ if let Err(msg) = f(child) {
+ let error = NodeKind::Error(ErrorPos::Full, msg.into());
+ let inner = mem::take(child);
+ *child = GreenNode::with_child(error, inner).into();
+ }
+ }
}
}
- /// Whether the active group ends at a newline.
- fn stop_at_newline(&self) -> bool {
- matches!(
- self.groups.last().map(|group| group.kind),
- Some(Group::Stmt | Group::Expr | Group::Imports)
- )
+ /// Insert an error message that `what` was expected at the marker position.
+ pub fn expected(self, p: &mut Parser, what: &str) {
+ let msg = format!("expected {}", what);
+ let error = NodeKind::Error(ErrorPos::Full, msg.into());
+ p.children.insert(self.0, GreenData::new(error, 0).into());
}
- /// Whether we are inside the given group.
- fn inside(&self, kind: Group) -> bool {
- self.groups.iter().any(|g| g.kind == kind)
+ /// Peek at the child directly after the marker.
+ pub fn peek<'a>(self, p: &'a Parser) -> Option<&'a Green> {
+ p.children.get(self.0)
}
+
+ /// Convert the child directly after marker.
+ pub fn convert(self, p: &mut Parser, kind: NodeKind) {
+ if let Some(child) = p.children.get_mut(self.0) {
+ child.convert(kind);
+ }
+ }
+}
+
+/// A logical group of tokens, e.g. `[...]`.
+struct GroupEntry {
+ /// The kind of group this is. This decides which tokens will end the group.
+ /// For example, a [`Group::Paren`] will be ended by
+ /// [`Token::RightParen`].
+ pub kind: Group,
+ /// The mode the parser was in _before_ the group started (to which we go
+ /// back once the group ends).
+ pub prev_mode: TokenMode,
+}
+
+/// A group, confined by optional start and end delimiters.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum Group {
+ /// A bracketed group: `[...]`.
+ Bracket,
+ /// A curly-braced group: `{...}`.
+ Brace,
+ /// A parenthesized group: `(...)`.
+ Paren,
+ /// A group ended by a semicolon or a line break: `;`, `\n`.
+ Stmt,
+ /// A group for a single expression, ended by a line break.
+ Expr,
+ /// A group for import items, ended by a semicolon, line break or `from`.
+ Imports,
}
diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs
index 1b323847..e15ae339 100644
--- a/src/parse/resolve.rs
+++ b/src/parse/resolve.rs
@@ -1,5 +1,5 @@
-use super::{is_newline, Scanner};
-use crate::syntax::{Ident, RawNode, Span};
+use super::{is_ident, is_newline, Scanner};
+use crate::syntax::ast::RawNode;
use crate::util::EcoString;
/// Resolve all escape sequences in a string.
@@ -25,11 +25,9 @@ pub fn resolve_string(string: &str) -> EcoString {
let sequence = s.eat_while(|c| c.is_ascii_hexdigit());
let _terminated = s.eat_if('}');
- if let Some(c) = resolve_hex(sequence) {
- out.push(c);
- } else {
- // TODO: Feedback that unicode escape sequence is wrong.
- out.push_str(s.eaten_from(start));
+ match resolve_hex(sequence) {
+ Some(c) => out.push(c),
+ None => out.push_str(s.eaten_from(start)),
}
}
@@ -48,19 +46,17 @@ pub fn resolve_hex(sequence: &str) -> Option<char> {
}
/// Resolve the language tag and trims the raw text.
-pub fn resolve_raw(span: Span, column: usize, backticks: usize, text: &str) -> RawNode {
+pub fn resolve_raw(column: usize, backticks: usize, text: &str) -> RawNode {
if backticks > 1 {
let (tag, inner) = split_at_lang_tag(text);
let (text, block) = trim_and_split_raw(column, inner);
RawNode {
- span,
- lang: Ident::new(tag, span.with_end(span.start + tag.len())),
+ lang: is_ident(tag).then(|| tag.into()),
text: text.into(),
block,
}
} else {
RawNode {
- span,
lang: None,
text: split_lines(text).join("\n").into(),
block: false,
@@ -140,7 +136,6 @@ fn split_lines(text: &str) -> Vec<&str> {
#[cfg(test)]
#[rustfmt::skip]
mod tests {
- use crate::syntax::Span;
use super::*;
#[test]
@@ -190,7 +185,7 @@ mod tests {
text: &str,
block: bool,
) {
- let node = resolve_raw(Span::detached(), column, backticks, raw);
+ let node = resolve_raw(column, backticks, raw);
assert_eq!(node.lang.as_deref(), lang);
assert_eq!(node.text, text);
assert_eq!(node.block, block);
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
index 8e3e4278..ea06a2e0 100644
--- a/src/parse/scanner.rs
+++ b/src/parse/scanner.rs
@@ -1,5 +1,7 @@
use std::slice::SliceIndex;
+use unicode_xid::UnicodeXID;
+
/// A featureful char-based scanner.
#[derive(Copy, Clone)]
pub struct Scanner<'s> {
@@ -114,6 +116,12 @@ impl<'s> Scanner<'s> {
self.index = index;
}
+ /// The full source string.
+ #[inline]
+ pub fn src(&self) -> &'s str {
+ &self.src
+ }
+
/// Slice out part of the source string.
#[inline]
pub fn get<I>(&self, index: I) -> &'s str
@@ -150,6 +158,16 @@ impl<'s> Scanner<'s> {
// optimized away in some cases.
self.src.get(start .. self.index).unwrap_or_default()
}
+
+ /// The column index of a given index in the source string.
+ #[inline]
+ pub fn column(&self, index: usize) -> usize {
+ self.src[.. index]
+ .chars()
+ .rev()
+ .take_while(|&c| !is_newline(c))
+ .count()
+ }
}
/// Whether this character denotes a newline.
@@ -163,3 +181,30 @@ pub fn is_newline(character: char) -> bool {
'\u{0085}' | '\u{2028}' | '\u{2029}'
)
}
+
+/// Whether a string is a valid unicode identifier.
+///
+/// In addition to what is specified in the [Unicode Standard][uax31], we allow:
+/// - `_` as a starting character,
+/// - `_` and `-` as continuing characters.
+///
+/// [uax31]: http://www.unicode.org/reports/tr31/
+#[inline]
+pub fn is_ident(string: &str) -> bool {
+ let mut chars = string.chars();
+ chars
+ .next()
+ .map_or(false, |c| is_id_start(c) && chars.all(is_id_continue))
+}
+
+/// Whether a character can start an identifier.
+#[inline]
+pub fn is_id_start(c: char) -> bool {
+ c.is_xid_start() || c == '_'
+}
+
+/// Whether a character can continue an identifier.
+#[inline]
+pub fn is_id_continue(c: char) -> bool {
+ c.is_xid_continue() || c == '_' || c == '-'
+}
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 5f969452..96dfd9d1 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -1,6 +1,13 @@
-use super::{is_newline, Scanner};
+use std::rc::Rc;
+
+use super::{
+ is_id_continue, is_id_start, is_newline, resolve_hex, resolve_raw, resolve_string,
+ Scanner,
+};
use crate::geom::{AngularUnit, LengthUnit};
-use crate::syntax::*;
+use crate::syntax::ast::{MathNode, RawNode};
+use crate::syntax::{ErrorPos, NodeKind};
+use crate::util::EcoString;
/// An iterator over the tokens of a string of source code.
pub struct Tokens<'s> {
@@ -59,7 +66,7 @@ impl<'s> Tokens<'s> {
}
impl<'s> Iterator for Tokens<'s> {
- type Item = Token<'s>;
+ type Item = NodeKind;
/// Parse the next token in the source code.
#[inline]
@@ -68,19 +75,21 @@ impl<'s> Iterator for Tokens<'s> {
let c = self.s.eat()?;
Some(match c {
// Blocks and templates.
- '[' => Token::LeftBracket,
- ']' => Token::RightBracket,
- '{' => Token::LeftBrace,
- '}' => Token::RightBrace,
+ '[' => NodeKind::LeftBracket,
+ ']' => NodeKind::RightBracket,
+ '{' => NodeKind::LeftBrace,
+ '}' => NodeKind::RightBrace,
// Whitespace.
- ' ' if self.s.check_or(true, |c| !c.is_whitespace()) => Token::Space(0),
+ ' ' if self.s.check_or(true, |c| !c.is_whitespace()) => NodeKind::Space(0),
c if c.is_whitespace() => self.whitespace(),
// Comments with special case for URLs.
'/' if self.s.eat_if('*') => self.block_comment(),
'/' if !self.maybe_in_url() && self.s.eat_if('/') => self.line_comment(),
- '*' if self.s.eat_if('/') => Token::Invalid(self.s.eaten_from(start)),
+ '*' if self.s.eat_if('/') => {
+ NodeKind::Unknown(self.s.eaten_from(start).into())
+ }
// Other things.
_ => match self.mode {
@@ -93,7 +102,7 @@ impl<'s> Iterator for Tokens<'s> {
impl<'s> Tokens<'s> {
#[inline]
- fn markup(&mut self, start: usize, c: char) -> Token<'s> {
+ fn markup(&mut self, start: usize, c: char) -> NodeKind {
match c {
// Escape sequences.
'\\' => self.backslash(),
@@ -102,13 +111,15 @@ impl<'s> Tokens<'s> {
'#' => self.hash(),
// Markup.
- '~' => Token::Tilde,
- '*' => Token::Star,
- '_' => Token::Underscore,
+ '~' => NodeKind::NonBreakingSpace,
+ '*' => NodeKind::Strong,
+ '_' => NodeKind::Emph,
'`' => self.raw(),
'$' => self.math(),
- '-' => self.hyph(start),
- '=' if self.s.check_or(true, |c| c == '=' || c.is_whitespace()) => Token::Eq,
+ '-' => self.hyph(),
+ '=' if self.s.check_or(true, |c| c == '=' || c.is_whitespace()) => {
+ NodeKind::Eq
+ }
c if c == '.' || c.is_ascii_digit() => self.numbering(start, c),
// Plain text.
@@ -116,35 +127,35 @@ impl<'s> Tokens<'s> {
}
}
- fn code(&mut self, start: usize, c: char) -> Token<'s> {
+ fn code(&mut self, start: usize, c: char) -> NodeKind {
match c {
// Parens.
- '(' => Token::LeftParen,
- ')' => Token::RightParen,
+ '(' => NodeKind::LeftParen,
+ ')' => NodeKind::RightParen,
// Length two.
- '=' if self.s.eat_if('=') => Token::EqEq,
- '!' if self.s.eat_if('=') => Token::ExclEq,
- '<' if self.s.eat_if('=') => Token::LtEq,
- '>' if self.s.eat_if('=') => Token::GtEq,
- '+' if self.s.eat_if('=') => Token::PlusEq,
- '-' if self.s.eat_if('=') => Token::HyphEq,
- '*' if self.s.eat_if('=') => Token::StarEq,
- '/' if self.s.eat_if('=') => Token::SlashEq,
- '.' if self.s.eat_if('.') => Token::Dots,
- '=' if self.s.eat_if('>') => Token::Arrow,
+ '=' if self.s.eat_if('=') => NodeKind::EqEq,
+ '!' if self.s.eat_if('=') => NodeKind::ExclEq,
+ '<' if self.s.eat_if('=') => NodeKind::LtEq,
+ '>' if self.s.eat_if('=') => NodeKind::GtEq,
+ '+' if self.s.eat_if('=') => NodeKind::PlusEq,
+ '-' if self.s.eat_if('=') => NodeKind::HyphEq,
+ '*' if self.s.eat_if('=') => NodeKind::StarEq,
+ '/' if self.s.eat_if('=') => NodeKind::SlashEq,
+ '.' if self.s.eat_if('.') => NodeKind::Dots,
+ '=' if self.s.eat_if('>') => NodeKind::Arrow,
// Length one.
- ',' => Token::Comma,
- ';' => Token::Semicolon,
- ':' => Token::Colon,
- '+' => Token::Plus,
- '-' => Token::Hyph,
- '*' => Token::Star,
- '/' => Token::Slash,
- '=' => Token::Eq,
- '<' => Token::Lt,
- '>' => Token::Gt,
+ ',' => NodeKind::Comma,
+ ';' => NodeKind::Semicolon,
+ ':' => NodeKind::Colon,
+ '+' => NodeKind::Plus,
+ '-' => NodeKind::Minus,
+ '*' => NodeKind::Star,
+ '/' => NodeKind::Slash,
+ '=' => NodeKind::Eq,
+ '<' => NodeKind::Lt,
+ '>' => NodeKind::Gt,
// Identifiers.
c if is_id_start(c) => self.ident(start),
@@ -159,12 +170,12 @@ impl<'s> Tokens<'s> {
// Strings.
'"' => self.string(),
- _ => Token::Invalid(self.s.eaten_from(start)),
+ _ => NodeKind::Unknown(self.s.eaten_from(start).into()),
}
}
#[inline]
- fn text(&mut self, start: usize) -> Token<'s> {
+ fn text(&mut self, start: usize) -> NodeKind {
macro_rules! table {
($($c:literal)|*) => {{
let mut t = [false; 128];
@@ -186,10 +197,10 @@ impl<'s> Tokens<'s> {
TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace())
});
- Token::Text(self.s.eaten_from(start))
+ NodeKind::Text(self.s.eaten_from(start).into())
}
- fn whitespace(&mut self) -> Token<'s> {
+ fn whitespace(&mut self) -> NodeKind {
self.s.uneat();
// Count the number of newlines.
@@ -208,73 +219,81 @@ impl<'s> Tokens<'s> {
}
}
- Token::Space(newlines)
+ NodeKind::Space(newlines)
}
- fn backslash(&mut self) -> Token<'s> {
- if let Some(c) = self.s.peek() {
- match c {
+ fn backslash(&mut self) -> NodeKind {
+ match self.s.peek() {
+ Some(c) => match c {
// Backslash and comments.
'\\' | '/' |
// Parenthesis and hashtag.
'[' | ']' | '{' | '}' | '#' |
// Markup.
'*' | '_' | '=' | '~' | '`' | '$' => {
- let start = self.s.index();
self.s.eat_assert(c);
- Token::Text(&self.s.eaten_from(start))
+ NodeKind::Text(c.into())
}
'u' if self.s.rest().starts_with("u{") => {
self.s.eat_assert('u');
self.s.eat_assert('{');
- Token::UnicodeEscape(UnicodeEscapeToken {
- // Allow more than `ascii_hexdigit` for better error recovery.
- sequence: self.s.eat_while(|c| c.is_ascii_alphanumeric()),
- terminated: self.s.eat_if('}'),
- })
+ let sequence = self.s.eat_while(|c| c.is_ascii_alphanumeric());
+ if self.s.eat_if('}') {
+ if let Some(c) = resolve_hex(&sequence) {
+ NodeKind::UnicodeEscape(c)
+ } else {
+ NodeKind::Error(
+ ErrorPos::Full,
+ "invalid unicode escape sequence".into(),
+ )
+ }
+ } else {
+ NodeKind::Error(
+ ErrorPos::End,
+ "expected closing brace".into(),
+ )
+ }
}
- c if c.is_whitespace() => Token::Backslash,
- _ => Token::Text("\\"),
- }
- } else {
- Token::Backslash
+ c if c.is_whitespace() => NodeKind::Linebreak,
+ _ => NodeKind::Text('\\'.into()),
+ },
+ None => NodeKind::Linebreak,
}
}
#[inline]
- fn hash(&mut self) -> Token<'s> {
+ fn hash(&mut self) -> NodeKind {
if self.s.check_or(false, is_id_start) {
let read = self.s.eat_while(is_id_continue);
- if let Some(keyword) = keyword(read) {
- keyword
- } else {
- Token::Ident(read)
+ match keyword(read) {
+ Some(keyword) => keyword,
+ None => NodeKind::Ident(read.into()),
}
} else {
- Token::Text("#")
+ NodeKind::Text("#".into())
}
}
- fn hyph(&mut self, start: usize) -> Token<'s> {
+ fn hyph(&mut self) -> NodeKind {
if self.s.eat_if('-') {
if self.s.eat_if('-') {
- Token::HyphHyphHyph
+ NodeKind::EmDash
} else {
- Token::HyphHyph
+ NodeKind::EnDash
}
} else if self.s.check_or(true, char::is_whitespace) {
- Token::Hyph
+ NodeKind::Minus
} else {
- Token::Text(self.s.eaten_from(start))
+ NodeKind::Text("-".into())
}
}
- fn numbering(&mut self, start: usize, c: char) -> Token<'s> {
+ fn numbering(&mut self, start: usize, c: char) -> NodeKind {
let number = if c != '.' {
self.s.eat_while(|c| c.is_ascii_digit());
let read = self.s.eaten_from(start);
if !self.s.eat_if('.') {
- return Token::Text(read);
+ return NodeKind::Text(self.s.eaten_from(start).into());
}
read.parse().ok()
} else {
@@ -282,13 +301,15 @@ impl<'s> Tokens<'s> {
};
if self.s.check_or(true, char::is_whitespace) {
- Token::Numbering(number)
+ NodeKind::EnumNumbering(number)
} else {
- Token::Text(self.s.eaten_from(start))
+ NodeKind::Text(self.s.eaten_from(start).into())
}
}
- fn raw(&mut self) -> Token<'s> {
+ fn raw(&mut self) -> NodeKind {
+ let column = self.s.column(self.s.index() - 1);
+
let mut backticks = 1;
while self.s.eat_if('`') {
backticks += 1;
@@ -296,7 +317,11 @@ impl<'s> Tokens<'s> {
// Special case for empty inline block.
if backticks == 2 {
- return Token::Raw(RawToken { text: "", backticks: 1, terminated: true });
+ return NodeKind::Raw(Rc::new(RawNode {
+ text: EcoString::new(),
+ lang: None,
+ block: false,
+ }));
}
let start = self.s.index();
@@ -310,17 +335,30 @@ impl<'s> Tokens<'s> {
}
}
- let terminated = found == backticks;
- let end = self.s.index() - if terminated { found } else { 0 };
+ if found == backticks {
+ let end = self.s.index() - found as usize;
+ NodeKind::Raw(Rc::new(resolve_raw(
+ column,
+ backticks,
+ self.s.get(start .. end).into(),
+ )))
+ } else {
+ let remaining = backticks - found;
+ let noun = if remaining == 1 { "backtick" } else { "backticks" };
- Token::Raw(RawToken {
- text: self.s.get(start .. end),
- backticks,
- terminated,
- })
+ NodeKind::Error(
+ ErrorPos::End,
+ if found == 0 {
+ format!("expected {} {}", remaining, noun)
+ } else {
+ format!("expected {} more {}", remaining, noun)
+ }
+ .into(),
+ )
+ }
}
- fn math(&mut self) -> Token<'s> {
+ fn math(&mut self) -> NodeKind {
let mut display = false;
if self.s.eat_if('[') {
display = true;
@@ -350,25 +388,36 @@ impl<'s> Tokens<'s> {
(true, true) => 2,
};
- Token::Math(MathToken {
- formula: self.s.get(start .. end),
- display,
- terminated,
- })
+ if terminated {
+ NodeKind::Math(Rc::new(MathNode {
+ formula: self.s.get(start .. end).into(),
+ display,
+ }))
+ } else {
+ NodeKind::Error(
+ ErrorPos::End,
+ if !display || (!escaped && dollar) {
+ "expected closing dollar sign"
+ } else {
+ "expected closing bracket and dollar sign"
+ }
+ .into(),
+ )
+ }
}
- fn ident(&mut self, start: usize) -> Token<'s> {
+ fn ident(&mut self, start: usize) -> NodeKind {
self.s.eat_while(is_id_continue);
match self.s.eaten_from(start) {
- "none" => Token::None,
- "auto" => Token::Auto,
- "true" => Token::Bool(true),
- "false" => Token::Bool(false),
- id => keyword(id).unwrap_or(Token::Ident(id)),
+ "none" => NodeKind::None,
+ "auto" => NodeKind::Auto,
+ "true" => NodeKind::Bool(true),
+ "false" => NodeKind::Bool(false),
+ id => keyword(id).unwrap_or(NodeKind::Ident(id.into())),
}
}
- fn number(&mut self, start: usize, c: char) -> Token<'s> {
+ fn number(&mut self, start: usize, c: char) -> NodeKind {
// Read the first part (integer or fractional depending on `first`).
self.s.eat_while(|c| c.is_ascii_digit());
@@ -396,55 +445,56 @@ impl<'s> Tokens<'s> {
// Find out whether it is a simple number.
if suffix.is_empty() {
- if let Ok(int) = number.parse::<i64>() {
- return Token::Int(int);
- } else if let Ok(float) = number.parse::<f64>() {
- return Token::Float(float);
+ if let Ok(i) = number.parse::<i64>() {
+ return NodeKind::Int(i);
}
}
- // Otherwise parse into the fitting numeric type.
- let build = match suffix {
- "%" => Token::Percent,
- "fr" => Token::Fraction,
- "pt" => |x| Token::Length(x, LengthUnit::Pt),
- "mm" => |x| Token::Length(x, LengthUnit::Mm),
- "cm" => |x| Token::Length(x, LengthUnit::Cm),
- "in" => |x| Token::Length(x, LengthUnit::In),
- "rad" => |x| Token::Angle(x, AngularUnit::Rad),
- "deg" => |x| Token::Angle(x, AngularUnit::Deg),
- _ => return Token::Invalid(all),
- };
-
- if let Ok(float) = number.parse::<f64>() {
- build(float)
+ if let Ok(f) = number.parse::<f64>() {
+ match suffix {
+ "" => NodeKind::Float(f),
+ "%" => NodeKind::Percentage(f),
+ "fr" => NodeKind::Fraction(f),
+ "pt" => NodeKind::Length(f, LengthUnit::Pt),
+ "mm" => NodeKind::Length(f, LengthUnit::Mm),
+ "cm" => NodeKind::Length(f, LengthUnit::Cm),
+ "in" => NodeKind::Length(f, LengthUnit::In),
+ "deg" => NodeKind::Angle(f, AngularUnit::Deg),
+ "rad" => NodeKind::Angle(f, AngularUnit::Rad),
+ _ => {
+ return NodeKind::Unknown(all.into());
+ }
+ }
} else {
- Token::Invalid(all)
+ NodeKind::Unknown(all.into())
}
}
- fn string(&mut self) -> Token<'s> {
+
+ fn string(&mut self) -> NodeKind {
let mut escaped = false;
- Token::Str(StrToken {
- string: self.s.eat_until(|c| {
- if c == '"' && !escaped {
- true
- } else {
- escaped = c == '\\' && !escaped;
- false
- }
- }),
- terminated: self.s.eat_if('"'),
- })
- }
+ let string = resolve_string(self.s.eat_until(|c| {
+ if c == '"' && !escaped {
+ true
+ } else {
+ escaped = c == '\\' && !escaped;
+ false
+ }
+ }));
- fn line_comment(&mut self) -> Token<'s> {
- Token::LineComment(self.s.eat_until(is_newline))
+ if self.s.eat_if('"') {
+ NodeKind::Str(string)
+ } else {
+ NodeKind::Error(ErrorPos::End, "expected quote".into())
+ }
}
- fn block_comment(&mut self) -> Token<'s> {
- let start = self.s.index();
+ fn line_comment(&mut self) -> NodeKind {
+ self.s.eat_until(is_newline);
+ NodeKind::LineComment
+ }
+ fn block_comment(&mut self) -> NodeKind {
let mut state = '_';
let mut depth = 1;
@@ -466,10 +516,7 @@ impl<'s> Tokens<'s> {
}
}
- let terminated = depth == 0;
- let end = self.s.index() - if terminated { 2 } else { 0 };
-
- Token::BlockComment(self.s.get(start .. end))
+ NodeKind::BlockComment
}
fn maybe_in_url(&self) -> bool {
@@ -477,24 +524,24 @@ impl<'s> Tokens<'s> {
}
}
-fn keyword(ident: &str) -> Option<Token<'static>> {
+fn keyword(ident: &str) -> Option<NodeKind> {
Some(match ident {
- "not" => Token::Not,
- "and" => Token::And,
- "or" => Token::Or,
- "with" => Token::With,
- "let" => Token::Let,
- "if" => Token::If,
- "else" => Token::Else,
- "for" => Token::For,
- "in" => Token::In,
- "while" => Token::While,
- "break" => Token::Break,
- "continue" => Token::Continue,
- "return" => Token::Return,
- "import" => Token::Import,
- "include" => Token::Include,
- "from" => Token::From,
+ "not" => NodeKind::Not,
+ "and" => NodeKind::And,
+ "or" => NodeKind::Or,
+ "with" => NodeKind::With,
+ "let" => NodeKind::Let,
+ "if" => NodeKind::If,
+ "else" => NodeKind::Else,
+ "for" => NodeKind::For,
+ "in" => NodeKind::In,
+ "while" => NodeKind::While,
+ "break" => NodeKind::Break,
+ "continue" => NodeKind::Continue,
+ "return" => NodeKind::Return,
+ "import" => NodeKind::Import,
+ "include" => NodeKind::Include,
+ "from" => NodeKind::From,
_ => return None,
})
}
@@ -506,24 +553,45 @@ mod tests {
use super::*;
+ use ErrorPos::*;
+ use NodeKind::*;
use Option::None;
- use Token::{Ident, *};
use TokenMode::{Code, Markup};
- const fn UnicodeEscape(sequence: &str, terminated: bool) -> Token {
- Token::UnicodeEscape(UnicodeEscapeToken { sequence, terminated })
+ fn UnicodeEscape(c: char) -> NodeKind {
+ NodeKind::UnicodeEscape(c)
}
- const fn Raw(text: &str, backticks: usize, terminated: bool) -> Token {
- Token::Raw(RawToken { text, backticks, terminated })
+ fn Error(pos: ErrorPos, message: &str) -> NodeKind {
+ NodeKind::Error(pos, message.into())
}
- const fn Math(formula: &str, display: bool, terminated: bool) -> Token {
- Token::Math(MathToken { formula, display, terminated })
+ fn Raw(text: &str, lang: Option<&str>, block: bool) -> NodeKind {
+ NodeKind::Raw(Rc::new(RawNode {
+ text: text.into(),
+ lang: lang.map(Into::into),
+ block,
+ }))
}
- const fn Str(string: &str, terminated: bool) -> Token {
- Token::Str(StrToken { string, terminated })
+ fn Math(formula: &str, display: bool) -> NodeKind {
+ NodeKind::Math(Rc::new(MathNode { formula: formula.into(), display }))
+ }
+
+ fn Str(string: &str) -> NodeKind {
+ NodeKind::Str(string.into())
+ }
+
+ fn Text(string: &str) -> NodeKind {
+ NodeKind::Text(string.into())
+ }
+
+ fn Ident(ident: &str) -> NodeKind {
+ NodeKind::Ident(ident.into())
+ }
+
+ fn Invalid(invalid: &str) -> NodeKind {
+ NodeKind::Unknown(invalid.into())
}
/// Building blocks for suffix testing.
@@ -541,40 +609,6 @@ mod tests {
/// - '/': symbols
const BLOCKS: &str = " a1/";
- /// Suffixes described by four-tuples of:
- ///
- /// - block the suffix is part of
- /// - mode in which the suffix is applicable
- /// - the suffix string
- /// - the resulting suffix token
- const SUFFIXES: &[(char, Option<TokenMode>, &str, Token)] = &[
- // Whitespace suffixes.
- (' ', None, " ", Space(0)),
- (' ', None, "\n", Space(1)),
- (' ', None, "\r", Space(1)),
- (' ', None, "\r\n", Space(1)),
- // Letter suffixes.
- ('a', Some(Markup), "hello", Text("hello")),
- ('a', Some(Markup), "💚", Text("💚")),
- ('a', Some(Code), "val", Ident("val")),
- ('a', Some(Code), "α", Ident("α")),
- ('a', Some(Code), "_", Ident("_")),
- // Number suffixes.
- ('1', Some(Code), "2", Int(2)),
- ('1', Some(Code), ".2", Float(0.2)),
- // Symbol suffixes.
- ('/', None, "[", LeftBracket),
- ('/', None, "//", LineComment("")),
- ('/', None, "/**/", BlockComment("")),
- ('/', Some(Markup), "*", Star),
- ('/', Some(Markup), "$ $", Math(" ", false, true)),
- ('/', Some(Markup), r"\\", Text(r"\")),
- ('/', Some(Markup), "#let", Let),
- ('/', Some(Code), "(", LeftParen),
- ('/', Some(Code), ":", Colon),
- ('/', Some(Code), "+=", PlusEq),
- ];
-
macro_rules! t {
(Both $($tts:tt)*) => {
t!(Markup $($tts)*);
@@ -584,8 +618,42 @@ mod tests {
// Test without suffix.
t!(@$mode: $src => $($token),*);
+ // Suffixes described by four-tuples of:
+ //
+ // - block the suffix is part of
+ // - mode in which the suffix is applicable
+ // - the suffix string
+ // - the resulting suffix NodeKind
+ let suffixes: &[(char, Option<TokenMode>, &str, NodeKind)] = &[
+ // Whitespace suffixes.
+ (' ', None, " ", Space(0)),
+ (' ', None, "\n", Space(1)),
+ (' ', None, "\r", Space(1)),
+ (' ', None, "\r\n", Space(1)),
+ // Letter suffixes.
+ ('a', Some(Markup), "hello", Text("hello")),
+ ('a', Some(Markup), "💚", Text("💚")),
+ ('a', Some(Code), "val", Ident("val")),
+ ('a', Some(Code), "α", Ident("α")),
+ ('a', Some(Code), "_", Ident("_")),
+ // Number suffixes.
+ ('1', Some(Code), "2", Int(2)),
+ ('1', Some(Code), ".2", Float(0.2)),
+ // Symbol suffixes.
+ ('/', None, "[", LeftBracket),
+ ('/', None, "//", LineComment),
+ ('/', None, "/**/", BlockComment),
+ ('/', Some(Markup), "*", Strong),
+ ('/', Some(Markup), "$ $", Math(" ", false)),
+ ('/', Some(Markup), r"\\", Text("\\")),
+ ('/', Some(Markup), "#let", Let),
+ ('/', Some(Code), "(", LeftParen),
+ ('/', Some(Code), ":", Colon),
+ ('/', Some(Code), "+=", PlusEq),
+ ];
+
// Test with each applicable suffix.
- for &(block, mode, suffix, token) in SUFFIXES {
+ for &(block, mode, suffix, ref token) in suffixes {
let src = $src;
#[allow(unused_variables)]
let blocks = BLOCKS;
@@ -599,7 +667,7 @@ mod tests {
(@$mode:ident: $src:expr => $($token:expr),*) => {{
let src = $src;
let found = Tokens::new(&src, $mode).collect::<Vec<_>>();
- let expected = vec![$($token),*];
+ let expected = vec![$($token.clone()),*];
check(&src, found, expected);
}};
}
@@ -671,7 +739,7 @@ mod tests {
// Test text ends.
t!(Markup[""]: "hello " => Text("hello"), Space(0));
- t!(Markup[""]: "hello~" => Text("hello"), Tilde);
+ t!(Markup[""]: "hello~" => Text("hello"), NonBreakingSpace);
}
#[test]
@@ -698,31 +766,31 @@ mod tests {
t!(Markup[" /"]: r#"\""# => Text(r"\"), Text("\""));
// Test basic unicode escapes.
- t!(Markup: r"\u{}" => UnicodeEscape("", true));
- t!(Markup: r"\u{2603}" => UnicodeEscape("2603", true));
- t!(Markup: r"\u{P}" => UnicodeEscape("P", true));
+ t!(Markup: r"\u{}" => Error(Full, "invalid unicode escape sequence"));
+ t!(Markup: r"\u{2603}" => UnicodeEscape('☃'));
+ t!(Markup: r"\u{P}" => Error(Full, "invalid unicode escape sequence"));
// Test unclosed unicode escapes.
- t!(Markup[" /"]: r"\u{" => UnicodeEscape("", false));
- t!(Markup[" /"]: r"\u{1" => UnicodeEscape("1", false));
- t!(Markup[" /"]: r"\u{26A4" => UnicodeEscape("26A4", false));
- t!(Markup[" /"]: r"\u{1Q3P" => UnicodeEscape("1Q3P", false));
- t!(Markup: r"\u{1🏕}" => UnicodeEscape("1", false), Text("🏕"), RightBrace);
+ t!(Markup[" /"]: r"\u{" => Error(End, "expected closing brace"));
+ t!(Markup[" /"]: r"\u{1" => Error(End, "expected closing brace"));
+ t!(Markup[" /"]: r"\u{26A4" => Error(End, "expected closing brace"));
+ t!(Markup[" /"]: r"\u{1Q3P" => Error(End, "expected closing brace"));
+ t!(Markup: r"\u{1🏕}" => Error(End, "expected closing brace"), Text("🏕"), RightBrace);
}
#[test]
fn test_tokenize_markup_symbols() {
// Test markup tokens.
- t!(Markup[" a1"]: "*" => Star);
- t!(Markup: "_" => Underscore);
+ t!(Markup[" a1"]: "*" => Strong);
+ t!(Markup: "_" => Emph);
t!(Markup[""]: "===" => Eq, Eq, Eq);
t!(Markup["a1/"]: "= " => Eq, Space(0));
- t!(Markup: "~" => Tilde);
- t!(Markup[" "]: r"\" => Backslash);
- t!(Markup["a "]: r"a--" => Text("a"), HyphHyph);
- t!(Markup["a1/"]: "- " => Hyph, Space(0));
- t!(Markup[" "]: "." => Numbering(None));
- t!(Markup[" "]: "1." => Numbering(Some(1)));
+ t!(Markup: "~" => NonBreakingSpace);
+ t!(Markup[" "]: r"\" => Linebreak);
+ t!(Markup["a "]: r"a--" => Text("a"), EnDash);
+ t!(Markup["a1/"]: "- " => Minus, Space(0));
+ t!(Markup[" "]: "." => EnumNumbering(None));
+ t!(Markup[" "]: "1." => EnumNumbering(Some(1)));
t!(Markup[" "]: "1.a" => Text("1."), Text("a"));
t!(Markup[" /"]: "a1." => Text("a1."));
}
@@ -734,7 +802,7 @@ mod tests {
t!(Code: ";" => Semicolon);
t!(Code: ":" => Colon);
t!(Code: "+" => Plus);
- t!(Code: "-" => Hyph);
+ t!(Code: "-" => Minus);
t!(Code[" a1"]: "*" => Star);
t!(Code[" a1"]: "/" => Slash);
t!(Code: "=" => Eq);
@@ -756,10 +824,10 @@ mod tests {
t!(Code[" a/"]: "..." => Dots, Invalid("."));
// Test hyphen as symbol vs part of identifier.
- t!(Code[" /"]: "-1" => Hyph, Int(1));
- t!(Code[" /"]: "-a" => Hyph, Ident("a"));
- t!(Code[" /"]: "--1" => Hyph, Hyph, Int(1));
- t!(Code[" /"]: "--_a" => Hyph, Hyph, Ident("_a"));
+ t!(Code[" /"]: "-1" => Minus, Int(1));
+ t!(Code[" /"]: "-a" => Minus, Ident("a"));
+ t!(Code[" /"]: "--1" => Minus, Minus, Int(1));
+ t!(Code[" /"]: "--_a" => Minus, Minus, Ident("_a"));
t!(Code[" /"]: "a-b" => Ident("a-b"));
}
@@ -776,13 +844,13 @@ mod tests {
("import", Import),
];
- for &(s, t) in &list {
+ for (s, t) in list.clone() {
t!(Markup[" "]: format!("#{}", s) => t);
t!(Markup[" "]: format!("#{0}#{0}", s) => t, t);
- t!(Markup[" /"]: format!("# {}", s) => Token::Text("#"), Space(0), Text(s));
+ t!(Markup[" /"]: format!("# {}", s) => Text("#"), Space(0), Text(s));
}
- for &(s, t) in &list {
+ for (s, t) in list {
t!(Code[" "]: s => t);
t!(Markup[" /"]: s => Text(s));
}
@@ -796,45 +864,43 @@ mod tests {
#[test]
fn test_tokenize_raw_blocks() {
- let empty = Raw("", 1, true);
-
// Test basic raw block.
- t!(Markup: "``" => empty);
- t!(Markup: "`raw`" => Raw("raw", 1, true));
- t!(Markup[""]: "`]" => Raw("]", 1, false));
+ t!(Markup: "``" => Raw("", None, false));
+ t!(Markup: "`raw`" => Raw("raw", None, false));
+ t!(Markup[""]: "`]" => Error(End, "expected 1 backtick"));
// Test special symbols in raw block.
- t!(Markup: "`[brackets]`" => Raw("[brackets]", 1, true));
- t!(Markup[""]: r"`\`` " => Raw(r"\", 1, true), Raw(" ", 1, false));
+ t!(Markup: "`[brackets]`" => Raw("[brackets]", None, false));
+ t!(Markup[""]: r"`\`` " => Raw(r"\", None, false), Error(End, "expected 1 backtick"));
// Test separated closing backticks.
- t!(Markup: "```not `y`e`t```" => Raw("not `y`e`t", 3, true));
+ t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), false));
// Test more backticks.
- t!(Markup: "``nope``" => empty, Text("nope"), empty);
- t!(Markup: "````🚀````" => Raw("🚀", 4, true));
- t!(Markup[""]: "`````👩‍🚀````noend" => Raw("👩‍🚀````noend", 5, false));
- t!(Markup[""]: "````raw``````" => Raw("raw", 4, true), empty);
+ t!(Markup: "``nope``" => Raw("", None, false), Text("nope"), Raw("", None, false));
+ t!(Markup: "````🚀````" => Raw("", None, false));
+ t!(Markup[""]: "`````👩‍🚀````noend" => Error(End, "expected 5 backticks"));
+ t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), false), Raw("", None, false));
}
#[test]
fn test_tokenize_math_formulas() {
// Test basic formula.
- t!(Markup: "$$" => Math("", false, true));
- t!(Markup: "$x$" => Math("x", false, true));
- t!(Markup: r"$\\$" => Math(r"\\", false, true));
- t!(Markup: "$[x + y]$" => Math("x + y", true, true));
- t!(Markup: r"$[\\]$" => Math(r"\\", true, true));
+ t!(Markup: "$$" => Math("", false));
+ t!(Markup: "$x$" => Math("x", false));
+ t!(Markup: r"$\\$" => Math(r"\\", false));
+ t!(Markup: "$[x + y]$" => Math("x + y", true));
+ t!(Markup: r"$[\\]$" => Math(r"\\", true));
// Test unterminated.
- t!(Markup[""]: "$x" => Math("x", false, false));
- t!(Markup[""]: "$[x" => Math("x", true, false));
- t!(Markup[""]: "$[x]\n$" => Math("x]\n$", true, false));
+ t!(Markup[""]: "$x" => Error(End, "expected closing dollar sign"));
+ t!(Markup[""]: "$[x" => Error(End, "expected closing bracket and dollar sign"));
+ t!(Markup[""]: "$[x]\n$" => Error(End, "expected closing bracket and dollar sign"));
// Test escape sequences.
- t!(Markup: r"$\$x$" => Math(r"\$x", false, true));
- t!(Markup: r"$[\\\]$]$" => Math(r"\\\]$", true, true));
- t!(Markup[""]: r"$[ ]\\$" => Math(r" ]\\$", true, false));
+ t!(Markup: r"$\$x$" => Math(r"\$x", false));
+ t!(Markup: r"$[\\\]$]$" => Math(r"\\\]$", true));
+ t!(Markup[""]: r"$[ ]\\$" => Error(End, "expected closing bracket and dollar sign"));
}
#[test]
@@ -896,8 +962,8 @@ mod tests {
let nums = ints.iter().map(|&(k, v)| (k, v as f64)).chain(floats);
let suffixes = [
- ("%", Percent as fn(f64) -> Token<'static>),
- ("fr", Fraction as fn(f64) -> Token<'static>),
+ ("%", Percentage as fn(f64) -> NodeKind),
+ ("fr", Fraction as fn(f64) -> NodeKind),
("mm", |x| Length(x, LengthUnit::Mm)),
("pt", |x| Length(x, LengthUnit::Pt)),
("cm", |x| Length(x, LengthUnit::Cm)),
@@ -922,62 +988,62 @@ mod tests {
#[test]
fn test_tokenize_strings() {
// Test basic strings.
- t!(Code: "\"hi\"" => Str("hi", true));
- t!(Code: "\"hi\nthere\"" => Str("hi\nthere", true));
- t!(Code: "\"🌎\"" => Str("🌎", true));
+ t!(Code: "\"hi\"" => Str("hi"));
+ t!(Code: "\"hi\nthere\"" => Str("hi\nthere"));
+ t!(Code: "\"🌎\"" => Str("🌎"));
// Test unterminated.
- t!(Code[""]: "\"hi" => Str("hi", false));
+ t!(Code[""]: "\"hi" => Error(End, "expected quote"));
// Test escaped quote.
- t!(Code: r#""a\"bc""# => Str(r#"a\"bc"#, true));
- t!(Code[""]: r#""\""# => Str(r#"\""#, false));
+ t!(Code: r#""a\"bc""# => Str("a\"bc"));
+ t!(Code[""]: r#""\""# => Error(End, "expected quote"));
}
#[test]
fn test_tokenize_line_comments() {
// Test line comment with no trailing newline.
- t!(Both[""]: "//" => LineComment(""));
+ t!(Both[""]: "//" => LineComment);
// Test line comment ends at newline.
- t!(Both["a1/"]: "//bc\n" => LineComment("bc"), Space(1));
- t!(Both["a1/"]: "// bc \n" => LineComment(" bc "), Space(1));
- t!(Both["a1/"]: "//bc\r\n" => LineComment("bc"), Space(1));
+ t!(Both["a1/"]: "//bc\n" => LineComment, Space(1));
+ t!(Both["a1/"]: "// bc \n" => LineComment, Space(1));
+ t!(Both["a1/"]: "//bc\r\n" => LineComment, Space(1));
// Test nested line comments.
- t!(Both["a1/"]: "//a//b\n" => LineComment("a//b"), Space(1));
+ t!(Both["a1/"]: "//a//b\n" => LineComment, Space(1));
}
#[test]
fn test_tokenize_block_comments() {
// Test basic block comments.
- t!(Both[""]: "/*" => BlockComment(""));
- t!(Both: "/**/" => BlockComment(""));
- t!(Both: "/*🏞*/" => BlockComment("🏞"));
- t!(Both: "/*\n*/" => BlockComment("\n"));
+ t!(Both[""]: "/*" => BlockComment);
+ t!(Both: "/**/" => BlockComment);
+ t!(Both: "/*🏞*/" => BlockComment);
+ t!(Both: "/*\n*/" => BlockComment);
// Test depth 1 and 2 nested block comments.
- t!(Both: "/* /* */ */" => BlockComment(" /* */ "));
- t!(Both: "/*/*/**/*/*/" => BlockComment("/*/**/*/"));
+ t!(Both: "/* /* */ */" => BlockComment);
+ t!(Both: "/*/*/**/*/*/" => BlockComment);
// Test two nested, one unclosed block comments.
- t!(Both[""]: "/*/*/**/*/" => BlockComment("/*/**/*/"));
+ t!(Both[""]: "/*/*/**/*/" => BlockComment);
// Test all combinations of up to two following slashes and stars.
- t!(Both[""]: "/*" => BlockComment(""));
- t!(Both[""]: "/*/" => BlockComment("/"));
- t!(Both[""]: "/**" => BlockComment("*"));
- t!(Both[""]: "/*//" => BlockComment("//"));
- t!(Both[""]: "/*/*" => BlockComment("/*"));
- t!(Both[""]: "/**/" => BlockComment(""));
- t!(Both[""]: "/***" => BlockComment("**"));
+ t!(Both[""]: "/*" => BlockComment);
+ t!(Both[""]: "/*/" => BlockComment);
+ t!(Both[""]: "/**" => BlockComment);
+ t!(Both[""]: "/*//" => BlockComment);
+ t!(Both[""]: "/*/*" => BlockComment);
+ t!(Both[""]: "/**/" => BlockComment);
+ t!(Both[""]: "/***" => BlockComment);
}
#[test]
fn test_tokenize_invalid() {
// Test invalidly closed block comments.
- t!(Both: "*/" => Token::Invalid("*/"));
- t!(Both: "/**/*/" => BlockComment(""), Token::Invalid("*/"));
+ t!(Both: "*/" => Invalid("*/"));
+ t!(Both: "/**/*/" => BlockComment, Invalid("*/"));
// Test invalid expressions.
t!(Code: r"\" => Invalid(r"\"));
@@ -990,6 +1056,6 @@ mod tests {
// Test invalid number suffixes.
t!(Code[" /"]: "1foo" => Invalid("1foo"));
t!(Code: "1p%" => Invalid("1p"), Invalid("%"));
- t!(Code: "1%%" => Percent(1.0), Invalid("%"));
+ t!(Code: "1%%" => Percentage(1.0), Invalid("%"));
}
}