From 4875633acf4701705b9b3b014eb7d94268b897c2 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sat, 23 Oct 2021 19:03:27 +0200 Subject: Change parser --- src/parse/mod.rs | 1088 +++++++++++++++++++++++++++----------------------- src/parse/parser.rs | 415 +++++++++++++------ src/parse/resolve.rs | 40 +- src/parse/tokens.rs | 519 +++++++++++++----------- 4 files changed, 1205 insertions(+), 857 deletions(-) (limited to 'src/parse') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 30787423..dc769183 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -12,215 +12,213 @@ pub use tokens::*; use std::rc::Rc; -use crate::diag::TypResult; use crate::source::SourceFile; use crate::syntax::*; use crate::util::EcoString; /// Parse a source file. -pub fn parse(source: &SourceFile) -> TypResult { +pub fn parse(source: &SourceFile) -> Rc { let mut p = Parser::new(source); - let markup = markup(&mut p); - let errors = p.finish(); - if errors.is_empty() { - Ok(markup) - } else { - Err(Box::new(errors)) - } + markup(&mut p); + p.finish() } /// Parse markup. -fn markup(p: &mut Parser) -> Markup { +fn markup(p: &mut Parser) { markup_while(p, true, &mut |_| true) } -/// Parse markup that stays equal or right of the given column. -fn markup_indented(p: &mut Parser, column: usize) -> Markup { +/// Parse markup that stays right of the given column. +fn markup_indented(p: &mut Parser, column: usize) { + // TODO this is broken p.eat_while(|t| match t { - Token::Space(n) => n == 0, - Token::LineComment(_) | Token::BlockComment(_) => true, + NodeKind::Space(n) => n == 0, + NodeKind::LineComment | NodeKind::BlockComment => true, _ => false, }); markup_while(p, false, &mut |p| match p.peek() { - Some(Token::Space(n)) if n >= 1 => p.column(p.next_end()) >= column, + Some(NodeKind::Space(n)) if n >= 1 => p.column(p.next_end()) >= column, _ => true, }) } -/// Parse a syntax tree while the peeked token satisifies a condition. +/// Parse a syntax tree while the peeked NodeKind satisifies a condition. /// /// If `at_start` is true, things like headings that may only appear at the /// beginning of a line or template are allowed. -fn markup_while(p: &mut Parser, mut at_start: bool, f: &mut F) -> Markup +fn markup_while(p: &mut Parser, mut at_start: bool, f: &mut F) where F: FnMut(&mut Parser) -> bool, { - let mut tree = vec![]; + p.start(); while !p.eof() && f(p) { - if let Some(node) = markup_node(p, &mut at_start) { - at_start &= matches!(node, MarkupNode::Space | MarkupNode::Parbreak(_)); - tree.push(node); + markup_node(p, &mut at_start); + if let Some(node) = p.last_child() { + at_start &= matches!(node.kind(), &NodeKind::Space(_) | &NodeKind::Parbreak | &NodeKind::LineComment | &NodeKind::BlockComment); } } - tree + p.end(NodeKind::Markup); } /// Parse a markup node. -fn markup_node(p: &mut Parser, at_start: &mut bool) -> Option { - let token = p.peek()?; - let span = p.peek_span(); - let node = match token { - // Whitespace. - Token::Space(newlines) => { - *at_start |= newlines > 0; - if newlines < 2 { - MarkupNode::Space - } else { - MarkupNode::Parbreak(span) +fn markup_node(p: &mut Parser, at_start: &mut bool) { + if let Some(token) = p.peek() { + match token { + // Whitespace. + NodeKind::Space(newlines) => { + *at_start |= newlines > 0; + + if newlines < 2 { + p.eat(); + } else { + p.convert(NodeKind::Parbreak); + } } - } - // Text. - Token::Text(text) => MarkupNode::Text(text.into()), - Token::Tilde => MarkupNode::Text("\u{00A0}".into()), - Token::HyphHyph => MarkupNode::Text("\u{2013}".into()), - Token::HyphHyphHyph => MarkupNode::Text("\u{2014}".into()), - Token::UnicodeEscape(t) => MarkupNode::Text(unicode_escape(p, t)), - - // Markup. - Token::Backslash => MarkupNode::Linebreak(span), - Token::Star => MarkupNode::Strong(span), - Token::Underscore => MarkupNode::Emph(span), - Token::Raw(t) => raw(p, t), - Token::Eq if *at_start => return Some(heading(p)), - Token::Hyph if *at_start => return Some(list_node(p)), - Token::Numbering(number) if *at_start => return Some(enum_node(p, number)), - - // Line-based markup that is not currently at the start of the line. - Token::Eq | Token::Hyph | Token::Numbering(_) => { - MarkupNode::Text(p.peek_src().into()) - } + // Text. + NodeKind::UnicodeEscape(u) => { + if !u.terminated { + p.convert(NodeKind::Error( + ErrorPosition::End, + "expected closing brace".into(), + )); + p.unsuccessful(); + return; + } + + if u.character.is_none() { + let src = p.peek_src(); + p.convert(NodeKind::Error( + ErrorPosition::Full, + "invalid unicode escape sequence".into(), + )); + p.start(); + p.end(NodeKind::Text(src.into())); + return; + } - // Hashtag + keyword / identifier. - Token::Ident(_) - | Token::Let - | Token::If - | Token::While - | Token::For - | Token::Import - | Token::Include => { - let stmt = matches!(token, Token::Let | Token::Import); - let group = if stmt { Group::Stmt } else { Group::Expr }; - - p.start_group(group, TokenMode::Code); - let expr = expr_with(p, true, 0); - if stmt && expr.is_some() && !p.eof() { - p.expected_at(p.prev_end(), "semicolon or line break"); + p.eat(); } - p.end_group(); + NodeKind::Raw(r) => { + if !r.terminated { + p.convert(NodeKind::Error( + ErrorPosition::End, + "expected backtick(s)".into(), + )); + p.unsuccessful(); + return; + } - return expr.map(MarkupNode::Expr); - } + p.eat(); + } + NodeKind::Text(_) + | NodeKind::EnDash + | NodeKind::EmDash + | NodeKind::NonBreakingSpace => { + p.eat(); + } - // Block and template. - Token::LeftBrace => return Some(MarkupNode::Expr(block(p))), - Token::LeftBracket => return Some(MarkupNode::Expr(template(p))), + // Markup. + NodeKind::Emph | NodeKind::Strong | NodeKind::Linebreak => { + p.eat(); + } - // Comments. - Token::LineComment(_) | Token::BlockComment(_) => { - p.eat(); - return None; - } + NodeKind::Eq if *at_start => heading(p), + NodeKind::ListBullet if *at_start => list_node(p), + NodeKind::EnumNumbering(_) if *at_start => enum_node(p), - _ => { - *at_start = false; - p.unexpected(); - return None; - } - }; - p.eat(); - Some(node) -} + // Line-based markup that is not currently at the start of the line. + NodeKind::Eq | NodeKind::ListBullet | NodeKind::EnumNumbering(_) => { + p.convert(NodeKind::Text(p.peek_src().into())) + } -/// Handle a unicode escape sequence. -fn unicode_escape(p: &mut Parser, token: UnicodeEscapeToken) -> EcoString { - let span = p.peek_span(); - let text = if let Some(c) = resolve::resolve_hex(token.sequence) { - c.into() - } else { - // Print out the escape sequence verbatim if it is invalid. - p.error(span, "invalid unicode escape sequence"); - p.peek_src().into() - }; + // Hashtag + keyword / identifier. + NodeKind::Ident(_) + | NodeKind::Let + | NodeKind::If + | NodeKind::While + | NodeKind::For + | NodeKind::Import + | NodeKind::Include => { + let stmt = matches!(token, NodeKind::Let | NodeKind::Import); + let group = if stmt { Group::Stmt } else { Group::Expr }; + + p.start_group(group, TokenMode::Code); + expr_with(p, true, 0); + if stmt && p.success() && !p.eof() { + p.expected_at("semicolon or line break"); + } + p.end_group(); + } - if !token.terminated { - p.error(span.end, "expected closing brace"); - } + // Block and template. + NodeKind::LeftBrace => { + block(p); + } + NodeKind::LeftBracket => { + template(p); + } - text -} + // Comments. + NodeKind::LineComment | NodeKind::BlockComment => { + p.eat(); + } -/// Handle a raw block. -fn raw(p: &mut Parser, token: RawToken) -> MarkupNode { - let column = p.column(p.next_start()); - let span = p.peek_span(); - let raw = resolve::resolve_raw(span, column, token.backticks, token.text); - if !token.terminated { - p.error(span.end, "expected backtick(s)"); + _ => { + *at_start = false; + p.unexpected(); + } + }; } - MarkupNode::Raw(Box::new(raw)) } /// Parse a heading. -fn heading(p: &mut Parser) -> MarkupNode { - let start = p.next_start(); - p.eat_assert(Token::Eq); +fn heading(p: &mut Parser) { + p.start(); + p.start(); + p.eat_assert(NodeKind::Eq); // Count depth. let mut level: usize = 1; - while p.eat_if(Token::Eq) { + while p.eat_if(NodeKind::Eq) { level += 1; } if level > 6 { - return MarkupNode::Text(p.get(start .. p.prev_end()).into()); + p.lift(); + p.end(NodeKind::Text(EcoString::from('=').repeat(level))); + } else { + p.end(NodeKind::HeadingLevel(level as u8)); + let column = p.column(p.prev_end()); + markup_indented(p, column); + p.end(NodeKind::Heading); } - - let column = p.column(p.prev_end()); - let body = markup_indented(p, column); - MarkupNode::Heading(Box::new(HeadingNode { - span: p.span_from(start), - level, - body, - })) } /// Parse a single list item. -fn list_node(p: &mut Parser) -> MarkupNode { - let start = p.next_start(); - p.eat_assert(Token::Hyph); +fn list_node(p: &mut Parser) { + p.start(); + p.eat_assert(NodeKind::ListBullet); let column = p.column(p.prev_end()); - let body = markup_indented(p, column); - MarkupNode::List(Box::new(ListNode { span: p.span_from(start), body })) + markup_indented(p, column); + p.end(NodeKind::List); } /// Parse a single enum item. -fn enum_node(p: &mut Parser, number: Option) -> MarkupNode { - let start = p.next_start(); - p.eat_assert(Token::Numbering(number)); +fn enum_node(p: &mut Parser) { + p.start(); + if !matches!(p.eat(), Some(NodeKind::EnumNumbering(_))) { + panic!("enum item does not start with numbering") + }; let column = p.column(p.prev_end()); - let body = markup_indented(p, column); - MarkupNode::Enum(Box::new(EnumNode { - span: p.span_from(start), - number, - body, - })) + markup_indented(p, column); + p.end(NodeKind::Enum); } /// Parse an expression. -fn expr(p: &mut Parser) -> Option { +fn expr(p: &mut Parser) { expr_with(p, false, 0) } @@ -231,134 +229,167 @@ fn expr(p: &mut Parser) -> Option { /// in markup. /// /// Stops parsing at operations with lower precedence than `min_prec`, -fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> Option { - let start = p.next_start(); - let mut lhs = match p.eat_map(UnOp::from_token) { +fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) { + p.start(); + let mut offset = p.child_count(); + // Start the unary expression. + match p.eat_map(|x| UnOp::from_token(&x)) { Some(op) => { let prec = op.precedence(); - let expr = expr_with(p, atomic, prec)?; - Expr::Unary(Box::new(UnaryExpr { span: p.span_from(start), op, expr })) + expr_with(p, atomic, prec); + + if p.may_lift_abort() { + return; + } + + p.end_and_start_with(NodeKind::Unary); + } + None => { + primary(p, atomic); + if p.may_lift_abort() { + return; + } } - None => primary(p, atomic)?, }; loop { // Exclamation mark, parenthesis or bracket means this is a function // call. - if matches!(p.peek_direct(), Some(Token::LeftParen | Token::LeftBracket)) { - lhs = call(p, lhs)?; + if matches!( + p.peek_direct(), + Some(NodeKind::LeftParen | NodeKind::LeftBracket) + ) { + call(p, p.child_count() - offset); continue; } - if p.eat_if(Token::With) { - lhs = with_expr(p, lhs)?; + if p.peek() == Some(NodeKind::With) { + with_expr(p, p.child_count() - offset); + + if p.may_lift_abort() { + return; + } } if atomic { + p.lift(); break; } - let op = match p.peek().and_then(BinOp::from_token) { + let op = match p.peek().as_ref().and_then(BinOp::from_token) { Some(binop) => binop, - None => break, + None => { + p.lift(); + break; + } }; let mut prec = op.precedence(); if prec < min_prec { - break; + { + p.lift(); + break; + }; } p.eat(); + match op.associativity() { Associativity::Left => prec += 1, Associativity::Right => {} } - let rhs = match expr_with(p, atomic, prec) { - Some(rhs) => rhs, - None => break, - }; + expr_with(p, atomic, prec); - let span = lhs.span().join(rhs.span()); - lhs = Expr::Binary(Box::new(BinaryExpr { span, lhs, op, rhs })); - } + if !p.success() { + p.lift(); + break; + } - Some(lhs) + offset = p.end_and_start_with(NodeKind::Binary).0; + } } /// Parse a primary expression. -fn primary(p: &mut Parser, atomic: bool) -> Option { - if let Some(expr) = literal(p) { - return Some(expr); +fn primary(p: &mut Parser, atomic: bool) { + if literal(p) { + return; } match p.peek() { // Things that start with an identifier. - Some(Token::Ident(string)) => { - let ident = Ident { - span: p.eat_span(), - string: string.into(), - }; + Some(NodeKind::Ident(_)) => { + // Start closure params. + p.start(); + p.eat(); // Arrow means this is a closure's lone parameter. - Some(if !atomic && p.eat_if(Token::Arrow) { - let body = expr(p)?; - Expr::Closure(Box::new(ClosureExpr { - span: ident.span.join(body.span()), - name: None, - params: vec![ClosureParam::Pos(ident)], - body: Rc::new(body), - })) + if !atomic && p.peek() == Some(NodeKind::Arrow) { + p.end_and_start_with(NodeKind::ClosureParams); + p.eat(); + + expr(p); + + p.end_or_abort(NodeKind::Closure); } else { - Expr::Ident(Box::new(ident)) - }) + p.lift(); + } } // Structures. - Some(Token::LeftParen) => parenthesized(p), - Some(Token::LeftBracket) => Some(template(p)), - Some(Token::LeftBrace) => Some(block(p)), + Some(NodeKind::LeftParen) => parenthesized(p), + Some(NodeKind::LeftBracket) => template(p), + Some(NodeKind::LeftBrace) => block(p), // Keywords. - Some(Token::Let) => let_expr(p), - Some(Token::If) => if_expr(p), - Some(Token::While) => while_expr(p), - Some(Token::For) => for_expr(p), - Some(Token::Import) => import_expr(p), - Some(Token::Include) => include_expr(p), + Some(NodeKind::Let) => let_expr(p), + Some(NodeKind::If) => if_expr(p), + Some(NodeKind::While) => while_expr(p), + Some(NodeKind::For) => for_expr(p), + Some(NodeKind::Import) => import_expr(p), + Some(NodeKind::Include) => include_expr(p), // Nothing. _ => { p.expected("expression"); - None + p.unsuccessful(); } } } /// Parse a literal. -fn literal(p: &mut Parser) -> Option { - let span = p.peek_span(); - let lit = match p.peek()? { +fn literal(p: &mut Parser) -> bool { + let peeked = if let Some(p) = p.peek() { + p + } else { + return false; + }; + + match peeked { // Basic values. - Token::None => Lit::None(span), - Token::Auto => Lit::Auto(span), - Token::Bool(b) => Lit::Bool(span, b), - Token::Int(i) => Lit::Int(span, i), - Token::Float(f) => Lit::Float(span, f), - Token::Length(val, unit) => Lit::Length(span, val, unit), - Token::Angle(val, unit) => Lit::Angle(span, val, unit), - Token::Percent(p) => Lit::Percent(span, p), - Token::Fraction(p) => Lit::Fractional(span, p), - Token::Str(token) => Lit::Str(span, { - if !token.terminated { - p.expected_at(span.end, "quote"); + NodeKind::None + | NodeKind::Auto + | NodeKind::Int(_) + | NodeKind::Float(_) + | NodeKind::Bool(_) + | NodeKind::Fraction(_) + | NodeKind::Length(_, _) + | NodeKind::Angle(_, _) + | NodeKind::Percentage(_) => { + p.eat(); + } + NodeKind::Str(s) => { + p.eat(); + if !s.terminated { + p.expected_at("quote"); } - resolve::resolve_string(token.string) - }), - _ => return None, - }; - p.eat(); - Some(Expr::Lit(Box::new(lit))) + } + _ => { + return false; + } + } + + true } /// Parse something that starts with a parenthesis, which can be either of: @@ -366,433 +397,508 @@ fn literal(p: &mut Parser) -> Option { /// - Dictionary literal /// - Parenthesized expression /// - Parameter list of closure expression -fn parenthesized(p: &mut Parser) -> Option { +fn parenthesized(p: &mut Parser) { + let offset = p.child_count(); + p.start(); p.start_group(Group::Paren, TokenMode::Code); - let colon = p.eat_if(Token::Colon); - let (items, has_comma) = collection(p); - let span = p.end_group(); + let colon = p.eat_if(NodeKind::Colon); + let kind = collection(p).0; + p.end_group(); + let token_count = p.child_count() - offset; - // Leading colon makes this a dictionary. + // Leading colon makes this a (empty) dictionary. if colon { - return Some(dict(p, items, span)); + p.lift(); + dict(p, token_count); + return; } // Arrow means this is a closure's parameter list. - if p.eat_if(Token::Arrow) { - let params = params(p, items); - let body = expr(p)?; - return Some(Expr::Closure(Box::new(ClosureExpr { - span: span.join(body.span()), - name: None, - params, - body: Rc::new(body), - }))); + if p.peek() == Some(NodeKind::Arrow) { + p.start_with(token_count); + params(p, 0, true); + p.end(NodeKind::ClosureParams); + + p.eat_assert(NodeKind::Arrow); + + expr(p); + + p.end_or_abort(NodeKind::Closure); + return; } // Find out which kind of collection this is. - Some(match items.as_slice() { - [] => array(p, items, span), - [CallArg::Pos(_)] if !has_comma => match items.into_iter().next() { - Some(CallArg::Pos(expr)) => Expr::Group(Box::new(GroupExpr { span, expr })), - _ => unreachable!(), - }, - [CallArg::Pos(_), ..] => array(p, items, span), - [CallArg::Named(_), ..] => dict(p, items, span), - [CallArg::Spread(expr), ..] => { - p.error(expr.span(), "spreading is not allowed here"); - return None; + match kind { + CollectionKind::Group => p.end(NodeKind::Group), + CollectionKind::PositionalCollection => { + p.lift(); + array(p, token_count); } - }) + CollectionKind::NamedCollection => { + p.lift(); + dict(p, token_count); + } + } +} + +/// The type of a collection. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +enum CollectionKind { + /// The collection is only one item and has no comma. + Group, + /// The collection starts with a positional and has more items or a trailing + /// comma. + PositionalCollection, + /// The collection starts with a named item. + NamedCollection, } /// Parse a collection. /// -/// Returns whether the literal contained any commas. -fn collection(p: &mut Parser) -> (Vec, bool) { - let mut items = vec![]; +/// Returns the length of the collection and whether the literal contained any +/// commas. +fn collection(p: &mut Parser) -> (CollectionKind, usize) { + let mut items = 0; + let mut kind = CollectionKind::PositionalCollection; + let mut seen_spread = false; let mut has_comma = false; let mut missing_coma = None; while !p.eof() { - if let Some(arg) = item(p) { - items.push(arg); + let item_kind = item(p); + if p.success() { + if items == 0 && item_kind == CollectionItemKind::Named { + kind = CollectionKind::NamedCollection; + } + + if item_kind == CollectionItemKind::ParameterSink { + seen_spread = true; + } + + items += 1; if let Some(pos) = missing_coma.take() { - p.expected_at(pos, "comma"); + p.expected_at_child(pos, "comma"); } if p.eof() { break; } - let behind = p.prev_end(); - if p.eat_if(Token::Comma) { + if p.eat_if(NodeKind::Comma) { has_comma = true; } else { - missing_coma = Some(behind); + missing_coma = Some(p.child_count()); } } } - (items, has_comma) + if !has_comma + && items == 1 + && !seen_spread + && kind == CollectionKind::PositionalCollection + { + kind = CollectionKind::Group; + } + + (kind, items) } -/// Parse an expression or a named pair. -fn item(p: &mut Parser) -> Option { - if p.eat_if(Token::Dots) { - return expr(p).map(CallArg::Spread); +/// What kind of item is this? +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +enum CollectionItemKind { + /// A named item. + Named, + /// An unnamed item. + Unnamed, + /// A parameter sink. + ParameterSink, +} + +/// Parse an expression or a named pair. Returns if this is a named pair. +fn item(p: &mut Parser) -> CollectionItemKind { + p.start(); + if p.eat_if(NodeKind::Dots) { + expr(p); + + p.end_or_abort(NodeKind::ParameterSink); + return CollectionItemKind::ParameterSink; + } + + expr(p); + + if p.may_lift_abort() { + return CollectionItemKind::Unnamed; } - let first = expr(p)?; - if p.eat_if(Token::Colon) { - if let Expr::Ident(name) = first { - Some(CallArg::Named(Named { name: *name, expr: expr(p)? })) + if p.eat_if(NodeKind::Colon) { + let child = p.child(1).unwrap(); + if matches!(child.kind(), &NodeKind::Ident(_)) { + expr(p); + p.end_or_abort(NodeKind::Named); } else { - p.error(first.span(), "expected identifier"); + p.wrap( + 1, + NodeKind::Error(ErrorPosition::Full, "expected identifier".into()), + ); + expr(p); - None + p.end(NodeKind::Named); + p.unsuccessful(); } + + CollectionItemKind::Named } else { - Some(CallArg::Pos(first)) + p.lift(); + CollectionItemKind::Unnamed } } /// Convert a collection into an array, producing errors for anything other than /// expressions. -fn array(p: &mut Parser, items: Vec, span: Span) -> Expr { - let iter = items.into_iter().filter_map(|item| match item { - CallArg::Pos(expr) => Some(expr), - CallArg::Named(_) => { - p.error(item.span(), "expected expression, found named pair"); - None - } - CallArg::Spread(_) => { - p.error(item.span(), "spreading is not allowed here"); - None - } - }); - Expr::Array(Box::new(ArrayExpr { span, items: iter.collect() })) +fn array(p: &mut Parser, items: usize) { + p.start_with(items); + p.filter_children( + 0, + |x| match x.kind() { + NodeKind::Named | NodeKind::ParameterSink => false, + _ => true, + }, + |kind| match kind { + NodeKind::Named => ( + ErrorPosition::Full, + "expected expression, found named pair".into(), + ), + NodeKind::ParameterSink => { + (ErrorPosition::Full, "spreading is not allowed here".into()) + } + _ => unreachable!(), + }, + ); + + p.end(NodeKind::Array) } /// Convert a collection into a dictionary, producing errors for anything other /// than named pairs. -fn dict(p: &mut Parser, items: Vec, span: Span) -> Expr { - let iter = items.into_iter().filter_map(|item| match item { - CallArg::Named(named) => Some(named), - CallArg::Pos(_) => { - p.error(item.span(), "expected named pair, found expression"); - None - } - CallArg::Spread(_) => { - p.error(item.span(), "spreading is not allowed here"); - None - } - }); - Expr::Dict(Box::new(DictExpr { span, items: iter.collect() })) +fn dict(p: &mut Parser, items: usize) { + p.start_with(items); + p.filter_children( + 0, + |x| { + x.kind() == &NodeKind::Named + || x.kind().is_parenthesis() + || x.kind() == &NodeKind::Comma + || x.kind() == &NodeKind::Colon + }, + |kind| match kind { + NodeKind::ParameterSink => { + (ErrorPosition::Full, "spreading is not allowed here".into()) + } + _ => ( + ErrorPosition::Full, + "expected named pair, found expression".into(), + ), + }, + ); + p.end(NodeKind::Dict); } /// Convert a collection into a list of parameters, producing errors for /// anything other than identifiers, spread operations and named pairs. -fn params(p: &mut Parser, items: Vec) -> Vec { - let iter = items.into_iter().filter_map(|item| match item { - CallArg::Pos(Expr::Ident(ident)) => Some(ClosureParam::Pos(*ident)), - CallArg::Named(named) => Some(ClosureParam::Named(named)), - CallArg::Spread(Expr::Ident(ident)) => Some(ClosureParam::Sink(*ident)), - _ => { - p.error(item.span(), "expected identifier"); - None - } - }); - iter.collect() -} - -/// Convert a collection into a list of identifiers, producing errors for -/// anything other than identifiers. -fn idents(p: &mut Parser, items: Vec) -> Vec { - let iter = items.into_iter().filter_map(|item| match item { - CallArg::Pos(Expr::Ident(ident)) => Some(*ident), - _ => { - p.error(item.span(), "expected identifier"); - None - } - }); - iter.collect() +fn params(p: &mut Parser, count: usize, allow_parens: bool) { + p.filter_children( + count, + |x| match x.kind() { + NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => true, + NodeKind::ParameterSink => matches!( + x.children().last().map(|x| x.kind()), + Some(&NodeKind::Ident(_)) + ), + _ => false, + } + || (allow_parens && x.kind().is_parenthesis()), + |_| (ErrorPosition::Full, "expected identifier".into()), + ); } // Parse a template block: `[...]`. -fn template(p: &mut Parser) -> Expr { +fn template(p: &mut Parser) { + p.start(); p.start_group(Group::Bracket, TokenMode::Markup); - let tree = markup(p); - let span = p.end_group(); - Expr::Template(Box::new(TemplateExpr { span, body: tree })) + markup(p); + p.end_group(); + p.end(NodeKind::Template); } /// Parse a code block: `{...}`. -fn block(p: &mut Parser) -> Expr { +fn block(p: &mut Parser) { + p.start(); p.start_group(Group::Brace, TokenMode::Code); - let mut exprs = vec![]; while !p.eof() { p.start_group(Group::Stmt, TokenMode::Code); - if let Some(expr) = expr(p) { - exprs.push(expr); + expr(p); + if p.success() { if !p.eof() { - p.expected_at(p.prev_end(), "semicolon or line break"); + p.expected_at("semicolon or line break"); } } p.end_group(); // Forcefully skip over newlines since the group's contents can't. - p.eat_while(|t| matches!(t, Token::Space(_))); + p.eat_while(|t| matches!(t, NodeKind::Space(_))); } - let span = p.end_group(); - Expr::Block(Box::new(BlockExpr { span, exprs })) + p.end_group(); + p.end(NodeKind::Block); } /// Parse a function call. -fn call(p: &mut Parser, callee: Expr) -> Option { - let mut args = match p.peek_direct() { - Some(Token::LeftParen) => args(p), - Some(Token::LeftBracket) => CallArgs { - span: Span::at(p.id(), callee.span().end), - items: vec![], - }, +fn call(p: &mut Parser, callee: usize) { + p.start_with(callee); + match p.peek_direct() { + Some(NodeKind::LeftParen) | Some(NodeKind::LeftBracket) => args(p, true), _ => { - p.expected_at(p.prev_end(), "argument list"); - return None; + p.expected_at("argument list"); + p.may_end_abort(NodeKind::Call); + return; } }; - while p.peek_direct() == Some(Token::LeftBracket) { - let body = template(p); - args.items.push(CallArg::Pos(body)); - } - - Some(Expr::Call(Box::new(CallExpr { - span: p.span_from(callee.span().start), - callee, - args, - }))) + p.end(NodeKind::Call); } /// Parse the arguments to a function call. -fn args(p: &mut Parser) -> CallArgs { - p.start_group(Group::Paren, TokenMode::Code); - let items = collection(p).0; - let span = p.end_group(); - CallArgs { span, items } +fn args(p: &mut Parser, allow_template: bool) { + p.start(); + if !allow_template || p.peek_direct() == Some(&NodeKind::LeftParen) { + p.start_group(Group::Paren, TokenMode::Code); + collection(p); + p.end_group(); + } + + while allow_template && p.peek_direct() == Some(&NodeKind::LeftBracket) { + template(p); + } + + p.end(NodeKind::CallArgs); } /// Parse a with expression. -fn with_expr(p: &mut Parser, callee: Expr) -> Option { - if p.peek() == Some(Token::LeftParen) { - Some(Expr::With(Box::new(WithExpr { - span: p.span_from(callee.span().start), - callee, - args: args(p), - }))) +fn with_expr(p: &mut Parser, preserve: usize) { + p.start_with(preserve); + p.eat_assert(NodeKind::With); + + if p.peek() == Some(NodeKind::LeftParen) { + args(p, false); + p.end(NodeKind::WithExpr); } else { p.expected("argument list"); - None + p.may_end_abort(NodeKind::WithExpr); } } /// Parse a let expression. -fn let_expr(p: &mut Parser) -> Option { - let start = p.next_start(); - p.eat_assert(Token::Let); - - let mut output = None; - if let Some(binding) = ident(p) { - let mut init = None; +fn let_expr(p: &mut Parser) { + p.start(); + p.eat_assert(NodeKind::Let); + + let offset = p.child_count(); + ident(p); + if p.may_end_abort(NodeKind::LetExpr) { + return; + } - if p.eat_if(Token::With) { - init = with_expr(p, Expr::Ident(Box::new(binding.clone()))); + if p.peek() == Some(NodeKind::With) { + with_expr(p, p.child_count() - offset); + } else { + // If a parenthesis follows, this is a function definition. + let has_params = if p.peek_direct() == Some(&NodeKind::LeftParen) { + p.start(); + p.start_group(Group::Paren, TokenMode::Code); + let offset = p.child_count(); + collection(p); + params(p, offset, true); + p.end_group(); + p.end(NodeKind::ClosureParams); + true } else { - // If a parenthesis follows, this is a function definition. - let mut maybe_params = None; - if p.peek_direct() == Some(Token::LeftParen) { - p.start_group(Group::Paren, TokenMode::Code); - let items = collection(p).0; - maybe_params = Some(params(p, items)); - p.end_group(); - } + false + }; - if p.eat_if(Token::Eq) { - init = expr(p); - } else if maybe_params.is_some() { - // Function definitions must have a body. - p.expected_at(p.prev_end(), "body"); - } + if p.eat_if(NodeKind::Eq) { + expr(p); + } else if has_params { + // Function definitions must have a body. + p.expected_at("body"); + } - // Rewrite into a closure expression if it's a function definition. - if let Some(params) = maybe_params { - let body = init?; - init = Some(Expr::Closure(Box::new(ClosureExpr { - span: binding.span.join(body.span()), - name: Some(binding.clone()), - params, - body: Rc::new(body), - }))); + // Rewrite into a closure expression if it's a function definition. + if has_params { + if p.may_end_abort(NodeKind::LetExpr) { + return; } - } - output = Some(Expr::Let(Box::new(LetExpr { - span: p.span_from(start), - binding, - init, - }))); + p.start_with(p.child_count() - offset); + p.end(NodeKind::Closure) + } } - output + p.end(NodeKind::LetExpr); } /// Parse an if expresion. -fn if_expr(p: &mut Parser) -> Option { - let start = p.next_start(); - p.eat_assert(Token::If); - - let mut output = None; - if let Some(condition) = expr(p) { - if let Some(if_body) = body(p) { - let mut else_body = None; - if p.eat_if(Token::Else) { - if p.peek() == Some(Token::If) { - else_body = if_expr(p); - } else { - else_body = body(p); - } - } +fn if_expr(p: &mut Parser) { + p.start(); + p.eat_assert(NodeKind::If); - output = Some(Expr::If(Box::new(IfExpr { - span: p.span_from(start), - condition, - if_body, - else_body, - }))); + expr(p); + if p.may_end_abort(NodeKind::IfExpr) { + return; + } + + body(p); + if p.may_end_abort(NodeKind::IfExpr) { + // Expected function body. + return; + } + + if p.eat_if(NodeKind::Else) { + if p.peek() == Some(NodeKind::If) { + if_expr(p); + } else { + body(p); } } - output + p.end(NodeKind::IfExpr); } /// Parse a while expresion. -fn while_expr(p: &mut Parser) -> Option { - let start = p.next_start(); - p.eat_assert(Token::While); - - let mut output = None; - if let Some(condition) = expr(p) { - if let Some(body) = body(p) { - output = Some(Expr::While(Box::new(WhileExpr { - span: p.span_from(start), - condition, - body, - }))); - } +fn while_expr(p: &mut Parser) { + p.start(); + p.eat_assert(NodeKind::While); + + expr(p); + + if p.may_end_abort(NodeKind::WhileExpr) { + return; } - output + body(p); + if !p.may_end_abort(NodeKind::WhileExpr) { + p.end(NodeKind::WhileExpr); + } } /// Parse a for expression. -fn for_expr(p: &mut Parser) -> Option { - let start = p.next_start(); - p.eat_assert(Token::For); - - let mut output = None; - if let Some(pattern) = for_pattern(p) { - if p.eat_expect(Token::In) { - if let Some(iter) = expr(p) { - if let Some(body) = body(p) { - output = Some(Expr::For(Box::new(ForExpr { - span: p.span_from(start), - pattern, - iter, - body, - }))); - } - } - } +fn for_expr(p: &mut Parser) { + p.start(); + p.eat_assert(NodeKind::For); + + for_pattern(p); + + if p.may_end_abort(NodeKind::ForExpr) { + return; } - output + if p.eat_expect(NodeKind::In) { + expr(p); + + if p.may_end_abort(NodeKind::ForExpr) { + return; + } + + body(p); + + if !p.may_end_abort(NodeKind::ForExpr) { + p.end(NodeKind::ForExpr); + } + } else { + p.unsuccessful(); + p.may_end_abort(NodeKind::ForExpr); + } } /// Parse a for loop pattern. -fn for_pattern(p: &mut Parser) -> Option { - let first = ident(p)?; - if p.eat_if(Token::Comma) { - if let Some(second) = ident(p) { - return Some(ForPattern::KeyValue(first, second)); +fn for_pattern(p: &mut Parser) { + p.start(); + ident(p); + + if p.may_end_abort(NodeKind::ForPattern) { + return; + } + + if p.peek() == Some(NodeKind::Comma) { + p.eat(); + + ident(p); + + if p.may_end_abort(NodeKind::ForPattern) { + return; } } - Some(ForPattern::Value(first)) + + p.end(NodeKind::ForPattern); } /// Parse an import expression. -fn import_expr(p: &mut Parser) -> Option { - let start = p.next_start(); - p.eat_assert(Token::Import); +fn import_expr(p: &mut Parser) { + p.start(); + p.eat_assert(NodeKind::Import); - let imports = if p.eat_if(Token::Star) { - // This is the wildcard scenario. - Imports::Wildcard - } else { + if !p.eat_if(NodeKind::Star) { // This is the list of identifiers scenario. + p.start(); p.start_group(Group::Imports, TokenMode::Code); - let items = collection(p).0; - if items.is_empty() { - p.expected_at(p.prev_end(), "import items"); + let offset = p.child_count(); + let items = collection(p).1; + if items == 0 { + p.expected_at("import items"); } p.end_group(); - Imports::Idents(idents(p, items)) + + p.filter_children( + offset, + |n| matches!(n.kind(), NodeKind::Ident(_) | NodeKind::Comma), + |_| (ErrorPosition::Full, "expected identifier".into()), + ); + p.end(NodeKind::ImportItems); }; - let mut output = None; - if p.eat_expect(Token::From) { - if let Some(path) = expr(p) { - output = Some(Expr::Import(Box::new(ImportExpr { - span: p.span_from(start), - imports, - path, - }))); - } + if p.eat_expect(NodeKind::From) { + expr(p); } - output + p.end(NodeKind::ImportExpr); } /// Parse an include expression. -fn include_expr(p: &mut Parser) -> Option { - let start = p.next_start(); - p.eat_assert(Token::Include); +fn include_expr(p: &mut Parser) { + p.start(); + p.eat_assert(NodeKind::Include); - expr(p).map(|path| { - Expr::Include(Box::new(IncludeExpr { span: p.span_from(start), path })) - }) + expr(p); + p.end(NodeKind::IncludeExpr); } /// Parse an identifier. -fn ident(p: &mut Parser) -> Option { - if let Some(Token::Ident(string)) = p.peek() { - Some(Ident { - span: p.eat_span(), - string: string.into(), - }) +fn ident(p: &mut Parser) { + if let Some(NodeKind::Ident(_)) = p.peek() { + p.eat(); } else { p.expected("identifier"); - None + p.unsuccessful(); } } /// Parse a control flow body. -fn body(p: &mut Parser) -> Option { +fn body(p: &mut Parser) { match p.peek() { - Some(Token::LeftBracket) => Some(template(p)), - Some(Token::LeftBrace) => Some(block(p)), + Some(NodeKind::LeftBracket) => template(p), + Some(NodeKind::LeftBrace) => block(p), _ => { - p.expected_at(p.prev_end(), "body"); - None + p.expected_at("body"); + p.unsuccessful(); } } } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 347d6f71..f62e882a 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,29 +1,34 @@ use std::ops::Range; +use std::rc::Rc; use super::{TokenMode, Tokens}; -use crate::diag::Error; use crate::source::{SourceFile, SourceId}; -use crate::syntax::{IntoSpan, Pos, Span, Token}; +use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; +use crate::util::EcoString; /// A convenient token-based parser. pub struct Parser<'s> { /// The parsed file. source: &'s SourceFile, - /// Parsing errors. - errors: Vec, /// An iterator over the source tokens. tokens: Tokens<'s>, /// The stack of open groups. groups: Vec, /// The next token. - next: Option>, + next: Option, /// The peeked token. /// (Same as `next` except if we are at the end of group, then `None`). - peeked: Option>, + peeked: Option, /// The end index of the last (non-whitespace if in code mode) token. prev_end: usize, /// The start index of the peeked token. next_start: usize, + /// A stack of outer children vectors. + stack: Vec>, + /// The children of the currently built node. + children: Vec, + /// Whether the last parsing step was successful. + success: bool, } /// A logical group of tokens, e.g. `[...]`. @@ -32,9 +37,6 @@ struct GroupEntry { /// For example, a [`Group::Paren`] will be ended by /// [`Token::RightParen`]. pub kind: Group, - /// The start index of the group. Used by `Parser::end_group` to return the - /// group's full span. - pub start: usize, /// The mode the parser was in _before_ the group started (to which we go /// back once the group ends). pub prev_mode: TokenMode, @@ -60,51 +62,204 @@ pub enum Group { impl<'s> Parser<'s> { /// Create a new parser for the source string. pub fn new(source: &'s SourceFile) -> Self { - let mut tokens = Tokens::new(source.src(), TokenMode::Markup); + let mut tokens = Tokens::new(source, TokenMode::Markup); let next = tokens.next(); Self { source, - errors: vec![], tokens, groups: vec![], - next, + next: next.clone(), peeked: next, prev_end: 0, next_start: 0, + stack: vec![], + children: vec![], + success: true, } } - /// Finish parsing and return all errors. - pub fn finish(self) -> Vec { - self.errors - } - /// The id of the parsed source file. pub fn id(&self) -> SourceId { self.source.id() } + /// Start a nested node. + /// + /// Each start call has to be matched with a call to `end`, + /// `end_with_custom_children`, `lift`, `abort`, or `end_or_abort`. + pub fn start(&mut self) { + self.stack.push(std::mem::take(&mut self.children)); + } + + /// Start a nested node, preserving a number of the current children. + pub fn start_with(&mut self, preserve: usize) { + let preserved = self.children.drain(self.children.len() - preserve ..).collect(); + self.stack.push(std::mem::replace(&mut self.children, preserved)); + } + + /// Filter the last children using the given predicate. + pub fn filter_children(&mut self, count: usize, f: F, error: G) + where + F: Fn(&Green) -> bool, + G: Fn(&NodeKind) -> (ErrorPosition, EcoString), + { + for child in &mut self.children[count ..] { + if !((self.tokens.mode() != TokenMode::Code + || Self::skip_type_ext(child.kind(), false)) + || child.kind().is_error() + || f(&child)) + { + let (pos, msg) = error(child.kind()); + let inner = std::mem::take(child); + *child = + GreenNode::with_child(NodeKind::Error(pos, msg), inner.len(), inner) + .into(); + } + } + } + + pub fn child(&self, child: usize) -> Option<&Green> { + self.node_index_from_back(child).map(|i| &self.children[i]) + } + + fn node_index_from_back(&self, child: usize) -> Option { + let len = self.children.len(); + let code = self.tokens.mode() == TokenMode::Code; + let mut seen = 0; + for x in (0 .. len).rev() { + if self.skip_type(self.children[x].kind()) && code { + continue; + } + if seen == child { + return Some(x); + } + seen += 1; + } + + None + } + + /// End the current node as a node of given `kind`. + pub fn end(&mut self, kind: NodeKind) { + let outer = self.stack.pop().unwrap(); + let mut children = std::mem::replace(&mut self.children, outer); + + // have trailing whitespace continue to sit in self.children in code + // mode. + let mut remains = vec![]; + if self.tokens.mode() == TokenMode::Code { + let len = children.len(); + for n in (0 .. len).rev() { + if !self.skip_type(&children[n].kind()) { + break; + } + + remains.push(children.pop().unwrap()); + } + remains.reverse(); + } + + let len = children.iter().map(|c| c.len()).sum(); + self.children + .push(GreenNode::with_children(kind, len, children.into_iter()).into()); + self.children.extend(remains); + self.success = true; + } + + /// End the current node as a node of given `kind`, and start a new node + /// with the ended node as a first child. The function returns how many + /// children the stack frame had before and how many were appended (accounts + /// for trivia). + pub fn end_and_start_with(&mut self, kind: NodeKind) -> (usize, usize) { + let stack_offset = self.stack.last().unwrap().len(); + self.end(kind); + let diff = self.children.len() - stack_offset; + self.start_with(diff); + (stack_offset, diff) + } + + pub fn wrap(&mut self, index: usize, kind: NodeKind) { + let index = self.node_index_from_back(index).unwrap(); + let child = std::mem::take(&mut self.children[index]); + let item = GreenNode::with_child(kind, child.len(), child); + self.children[index] = item.into(); + } + + pub fn convert(&mut self, kind: NodeKind) { + self.start(); + self.eat(); + self.end(kind); + } + + /// End the current node and undo its existence, inling all accumulated + /// children into its parent. + pub fn lift(&mut self) { + let outer = self.stack.pop().unwrap(); + let children = std::mem::replace(&mut self.children, outer); + self.children.extend(children); + self.success = true; + } + + /// End the current node and undo its existence, deleting all accumulated + /// children. + pub fn abort(&mut self, msg: impl Into) { + self.end(NodeKind::Error(ErrorPosition::Full, msg.into().into())); + self.success = false; + } + + pub fn may_lift_abort(&mut self) -> bool { + if !self.success { + self.lift(); + self.success = false; + true + } else { + false + } + } + + pub fn may_end_abort(&mut self, kind: NodeKind) -> bool { + if !self.success { + self.end(kind); + self.success = false; + true + } else { + false + } + } + + /// End the current node as a node of given `kind` if the last parse was + /// successful, otherwise, abort. + pub fn end_or_abort(&mut self, kind: NodeKind) -> bool { + if self.success { + self.end(kind); + true + } else { + self.may_end_abort(kind); + false + } + } + + pub fn finish(&mut self) -> Rc { + if let Green::Node(n) = self.children.pop().unwrap() { + n + } else { + panic!() + } + } + /// Whether the end of the source string or group is reached. pub fn eof(&self) -> bool { self.peek().is_none() } - /// Consume the next token. - pub fn eat(&mut self) -> Option> { + pub fn eat(&mut self) -> Option { let token = self.peek()?; self.bump(); Some(token) } - /// Eat the next token and return its source range. - pub fn eat_span(&mut self) -> Span { - let start = self.next_start(); - self.eat(); - Span::new(self.id(), start, self.prev_end()) - } - /// Consume the next token if it is the given one. - pub fn eat_if(&mut self, t: Token) -> bool { + pub fn eat_if(&mut self, t: NodeKind) -> bool { if self.peek() == Some(t) { self.bump(); true @@ -116,7 +271,7 @@ impl<'s> Parser<'s> { /// Consume the next token if the closure maps it a to `Some`-variant. pub fn eat_map(&mut self, f: F) -> Option where - F: FnOnce(Token<'s>) -> Option, + F: FnOnce(NodeKind) -> Option, { let token = self.peek()?; let mapped = f(token); @@ -128,16 +283,16 @@ impl<'s> Parser<'s> { /// Consume the next token if it is the given one and produce an error if /// not. - pub fn eat_expect(&mut self, t: Token) -> bool { - let eaten = self.eat_if(t); + pub fn eat_expect(&mut self, t: NodeKind) -> bool { + let eaten = self.eat_if(t.clone()); if !eaten { - self.expected_at(self.prev_end(), t.name()); + self.expected_at(&t.to_string()); } eaten } /// Consume the next token, debug-asserting that it is one of the given ones. - pub fn eat_assert(&mut self, t: Token) { + pub fn eat_assert(&mut self, t: NodeKind) { let next = self.eat(); debug_assert_eq!(next, Some(t)); } @@ -145,7 +300,7 @@ impl<'s> Parser<'s> { /// Consume tokens while the condition is true. pub fn eat_while(&mut self, mut f: F) where - F: FnMut(Token<'s>) -> bool, + F: FnMut(NodeKind) -> bool, { while self.peek().map_or(false, |t| f(t)) { self.eat(); @@ -153,42 +308,25 @@ impl<'s> Parser<'s> { } /// Peek at the next token without consuming it. - pub fn peek(&self) -> Option> { - self.peeked + pub fn peek(&self) -> Option { + self.peeked.clone() } /// Peek at the next token if it follows immediately after the last one /// without any whitespace in between. - pub fn peek_direct(&self) -> Option> { + pub fn peek_direct(&self) -> Option<&NodeKind> { if self.next_start() == self.prev_end() { - self.peeked + self.peeked.as_ref() } else { None } } - /// Peek at the span of the next token. - /// - /// Has length zero if `peek()` returns `None`. - pub fn peek_span(&self) -> Span { - Span::new(self.id(), self.next_start(), self.next_end()) - } - /// Peek at the source of the next token. pub fn peek_src(&self) -> &'s str { self.get(self.next_start() .. self.next_end()) } - /// Checks whether the next token fulfills a condition. - /// - /// Returns `false` if there is no next token. - pub fn check(&self, f: F) -> bool - where - F: FnOnce(Token<'s>) -> bool, - { - self.peek().map_or(false, f) - } - /// The byte index at which the last token ended. /// /// Refers to the end of the last _non-whitespace_ token in code mode. @@ -219,11 +357,6 @@ impl<'s> Parser<'s> { self.source.get(range).unwrap() } - /// The span from `start` to [`self.prev_end()`](Self::prev_end). - pub fn span_from(&self, start: impl Into) -> Span { - Span::new(self.id(), start, self.prev_end()) - } - /// Continue parsing in a group. /// /// When the end delimiter of the group is reached, all subsequent calls to @@ -232,19 +365,15 @@ impl<'s> Parser<'s> { /// /// This panics if the next token does not start the given group. pub fn start_group(&mut self, kind: Group, mode: TokenMode) { - self.groups.push(GroupEntry { - kind, - start: self.next_start(), - prev_mode: self.tokens.mode(), - }); + self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() }); self.tokens.set_mode(mode); self.repeek(); match kind { - Group::Paren => self.eat_assert(Token::LeftParen), - Group::Bracket => self.eat_assert(Token::LeftBracket), - Group::Brace => self.eat_assert(Token::LeftBrace), + Group::Paren => self.eat_assert(NodeKind::LeftParen), + Group::Bracket => self.eat_assert(NodeKind::LeftBracket), + Group::Brace => self.eat_assert(NodeKind::LeftBrace), Group::Stmt => {} Group::Expr => {} Group::Imports => {} @@ -254,7 +383,7 @@ impl<'s> Parser<'s> { /// End the parsing of a group. /// /// This panics if no group was started. - pub fn end_group(&mut self) -> Span { + pub fn end_group(&mut self) { let prev_mode = self.tokens.mode(); let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); @@ -264,83 +393,125 @@ impl<'s> Parser<'s> { // Eat the end delimiter if there is one. if let Some((end, required)) = match group.kind { - Group::Paren => Some((Token::RightParen, true)), - Group::Bracket => Some((Token::RightBracket, true)), - Group::Brace => Some((Token::RightBrace, true)), - Group::Stmt => Some((Token::Semicolon, false)), + Group::Paren => Some((NodeKind::RightParen, true)), + Group::Bracket => Some((NodeKind::RightBracket, true)), + Group::Brace => Some((NodeKind::RightBrace, true)), + Group::Stmt => Some((NodeKind::Semicolon, false)), Group::Expr => None, Group::Imports => None, } { - if self.next == Some(end) { + if self.next == Some(end.clone()) { // Bump the delimeter and return. No need to rescan in this case. self.bump(); rescan = false; } else if required { - self.error( - self.next_start() .. self.next_start(), - format!("expected {}", end.name()), - ); + self.start(); + self.abort(format!("expected {}", end.to_string())); } } // Rescan the peeked token if the mode changed. if rescan { self.tokens.jump(self.prev_end()); - self.bump(); - } - Span::new(self.id(), group.start, self.prev_end()) - } + if prev_mode == TokenMode::Code { + let len = self.children.len(); + for n in (0 .. len).rev() { + if !self.skip_type(self.children[n].kind()) { + break; + } + + self.children.pop(); + } + } - /// Add an error with location and message. - pub fn error(&mut self, span: impl IntoSpan, message: impl Into) { - self.errors.push(Error::new(span.into_span(self.id()), message)); + self.fast_forward(); + } } /// Add an error that `what` was expected at the given span. - pub fn expected_at(&mut self, span: impl IntoSpan, what: &str) { - self.error(span, format!("expected {}", what)); + pub fn expected_at(&mut self, what: &str) { + let mut found = self.children.len(); + for (i, node) in self.children.iter().enumerate().rev() { + if !self.skip_type(node.kind()) { + break; + } + found = i; + } + + self.expected_at_child(found, what); + } + + /// Add an error that `what` was expected at the given child index. + pub fn expected_at_child(&mut self, index: usize, what: &str) { + self.children.insert( + index, + GreenData::new( + NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), + 0, + ) + .into(), + ); } /// Eat the next token and add an error that it is not the expected `thing`. pub fn expected(&mut self, what: &str) { - let before = self.next_start(); + self.start(); if let Some(found) = self.eat() { - let after = self.prev_end(); - self.error( - before .. after, - format!("expected {}, found {}", what, found.name()), - ); + self.abort(format!("expected {}, found {}", what, found.to_string())) } else { - self.expected_at(self.next_start(), what); + self.lift(); + self.expected_at(what); } } /// Eat the next token and add an error that it is unexpected. pub fn unexpected(&mut self) { - let before = self.next_start(); + self.start(); if let Some(found) = self.eat() { - let after = self.prev_end(); - self.error(before .. after, format!("unexpected {}", found.name())); + self.abort(format!("unexpected {}", found.to_string())) + } else { + self.abort("unexpected end of file") } } + pub fn skip_type_ext(token: &NodeKind, stop_at_newline: bool) -> bool { + match token { + NodeKind::Space(n) => n < &1 || !stop_at_newline, + NodeKind::LineComment => true, + NodeKind::BlockComment => true, + _ => false, + } + } + + fn skip_type(&self, token: &NodeKind) -> bool { + Self::skip_type_ext(token, self.stop_at_newline()) + } + /// Move to the next token. fn bump(&mut self) { - self.prev_end = self.tokens.index().into(); + self.children.push( + GreenData::new( + self.next.clone().unwrap(), + self.tokens.index() - self.next_start, + ) + .into(), + ); + + self.fast_forward(); + } + + pub fn fast_forward(&mut self) { + if !self.next.as_ref().map_or(false, |x| self.skip_type(x)) { + self.prev_end = self.tokens.index().into(); + } self.next_start = self.tokens.index().into(); self.next = self.tokens.next(); if self.tokens.mode() == TokenMode::Code { // Skip whitespace and comments. - while match self.next { - Some(Token::Space(n)) => n < 1 || !self.stop_at_newline(), - Some(Token::LineComment(_)) => true, - Some(Token::BlockComment(_)) => true, - _ => false, - } { - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); + while self.next.as_ref().map_or(false, |x| self.skip_type(x)) { + self.bump(); } } @@ -349,19 +520,19 @@ impl<'s> Parser<'s> { /// Take another look at the next token to recheck whether it ends a group. fn repeek(&mut self) { - self.peeked = self.next; - let token = match self.next { + self.peeked = self.next.clone(); + let token = match self.next.as_ref() { Some(token) => token, None => return, }; if match token { - Token::RightParen => self.inside(Group::Paren), - Token::RightBracket => self.inside(Group::Bracket), - Token::RightBrace => self.inside(Group::Brace), - Token::Semicolon => self.inside(Group::Stmt), - Token::From => self.inside(Group::Imports), - Token::Space(n) => n >= 1 && self.stop_at_newline(), + NodeKind::RightParen => self.inside(Group::Paren), + NodeKind::RightBracket => self.inside(Group::Bracket), + NodeKind::RightBrace => self.inside(Group::Brace), + NodeKind::Semicolon => self.inside(Group::Stmt), + NodeKind::From => self.inside(Group::Imports), + NodeKind::Space(n) => n > &0 && self.stop_at_newline(), _ => false, } { self.peeked = None; @@ -380,4 +551,22 @@ impl<'s> Parser<'s> { fn inside(&self, kind: Group) -> bool { self.groups.iter().any(|g| g.kind == kind) } + + pub fn last_child(&self) -> Option<&Green> { + self.children.last() + } + + pub fn success(&mut self) -> bool { + let s = self.success; + self.success = true; + s + } + + pub fn unsuccessful(&mut self) { + self.success = false; + } + + pub fn child_count(&self) -> usize { + self.children.len() + } } diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index 1b323847..c59c3bb1 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -1,5 +1,5 @@ use super::{is_newline, Scanner}; -use crate::syntax::{Ident, RawNode, Span}; +use crate::syntax::RawToken; use crate::util::EcoString; /// Resolve all escape sequences in a string. @@ -48,21 +48,28 @@ pub fn resolve_hex(sequence: &str) -> Option { } /// Resolve the language tag and trims the raw text. -pub fn resolve_raw(span: Span, column: usize, backticks: usize, text: &str) -> RawNode { +pub fn resolve_raw( + column: usize, + backticks: u8, + text: &str, + terminated: bool, +) -> RawToken { if backticks > 1 { let (tag, inner) = split_at_lang_tag(text); let (text, block) = trim_and_split_raw(column, inner); - RawNode { - span, - lang: Ident::new(tag, span.with_end(span.start + tag.len())), + RawToken { + lang: Some(tag.into()), text: text.into(), + backticks, + terminated, block, } } else { - RawNode { - span, + RawToken { lang: None, text: split_lines(text).join("\n").into(), + backticks, + terminated, block: false, } } @@ -140,7 +147,6 @@ fn split_lines(text: &str) -> Vec<&str> { #[cfg(test)] #[rustfmt::skip] mod tests { - use crate::syntax::Span; use super::*; #[test] @@ -175,8 +181,8 @@ mod tests { test("typst\n it!", "typst", "\n it!"); test("typst\n it!", "typst", "\n it!"); test("abc`", "abc", "`"); - test(" hi", "", " hi"); - test("`", "", "`"); + test(" hi", "", " hi"); + test("`", "", "`"); } #[test] @@ -184,13 +190,13 @@ mod tests { #[track_caller] fn test( column: usize, - backticks: usize, + backticks: u8, raw: &str, lang: Option<&str>, text: &str, block: bool, ) { - let node = resolve_raw(Span::detached(), column, backticks, raw); + let node = resolve_raw(column, backticks, raw, true); assert_eq!(node.lang.as_deref(), lang); assert_eq!(node.text, text); assert_eq!(node.block, block); @@ -204,15 +210,15 @@ mod tests { // More than one backtick with lang tag. test(0, 2, "js alert()", Some("js"), "alert()", false); test(0, 3, "py quit(\n\n)", Some("py"), "quit(\n\n)", true); - test(0, 2, "♥", None, "", false); + test(0, 2, "♥", Some("♥"), "", false); // Trimming of whitespace (tested more thoroughly in separate test). - test(0, 2, " a", None, "a", false); - test(0, 2, " a", None, " a", false); - test(0, 2, " \na", None, "a", true); + test(0, 2, " a", Some(""), "a", false); + test(0, 2, " a", Some(""), " a", false); + test(0, 2, " \na", Some(""), "a", true); // Dedenting - test(2, 3, " def foo():\n bar()", None, "def foo():\n bar()", true); + test(2, 3, " def foo():\n bar()", Some(""), "def foo():\n bar()", true); } #[test] diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 5f969452..19d0d77b 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -1,9 +1,13 @@ -use super::{is_newline, Scanner}; +use super::{is_newline, resolve_raw, Scanner}; use crate::geom::{AngularUnit, LengthUnit}; +use crate::parse::resolve::{resolve_hex, resolve_string}; +use crate::source::SourceFile; use crate::syntax::*; +use crate::util::EcoString; /// An iterator over the tokens of a string of source code. pub struct Tokens<'s> { + source: &'s SourceFile, s: Scanner<'s>, mode: TokenMode, } @@ -20,8 +24,12 @@ pub enum TokenMode { impl<'s> Tokens<'s> { /// Create a new token iterator with the given mode. #[inline] - pub fn new(src: &'s str, mode: TokenMode) -> Self { - Self { s: Scanner::new(src), mode } + pub fn new(source: &'s SourceFile, mode: TokenMode) -> Self { + Self { + s: Scanner::new(source.src()), + source, + mode, + } } /// Get the current token mode. @@ -59,7 +67,7 @@ impl<'s> Tokens<'s> { } impl<'s> Iterator for Tokens<'s> { - type Item = Token<'s>; + type Item = NodeKind; /// Parse the next token in the source code. #[inline] @@ -68,19 +76,21 @@ impl<'s> Iterator for Tokens<'s> { let c = self.s.eat()?; Some(match c { // Blocks and templates. - '[' => Token::LeftBracket, - ']' => Token::RightBracket, - '{' => Token::LeftBrace, - '}' => Token::RightBrace, + '[' => NodeKind::LeftBracket, + ']' => NodeKind::RightBracket, + '{' => NodeKind::LeftBrace, + '}' => NodeKind::RightBrace, // Whitespace. - ' ' if self.s.check_or(true, |c| !c.is_whitespace()) => Token::Space(0), + ' ' if self.s.check_or(true, |c| !c.is_whitespace()) => NodeKind::Space(0), c if c.is_whitespace() => self.whitespace(), // Comments with special case for URLs. '/' if self.s.eat_if('*') => self.block_comment(), '/' if !self.maybe_in_url() && self.s.eat_if('/') => self.line_comment(), - '*' if self.s.eat_if('/') => Token::Invalid(self.s.eaten_from(start)), + '*' if self.s.eat_if('/') => { + NodeKind::Error(ErrorPosition::Full, self.s.eaten_from(start).into()) + } // Other things. _ => match self.mode { @@ -93,7 +103,7 @@ impl<'s> Iterator for Tokens<'s> { impl<'s> Tokens<'s> { #[inline] - fn markup(&mut self, start: usize, c: char) -> Token<'s> { + fn markup(&mut self, start: usize, c: char) -> NodeKind { match c { // Escape sequences. '\\' => self.backslash(), @@ -102,13 +112,15 @@ impl<'s> Tokens<'s> { '#' => self.hash(), // Markup. - '~' => Token::Tilde, - '*' => Token::Star, - '_' => Token::Underscore, + '~' => NodeKind::NonBreakingSpace, + '*' => NodeKind::Strong, + '_' => NodeKind::Emph, '`' => self.raw(), '$' => self.math(), - '-' => self.hyph(start), - '=' if self.s.check_or(true, |c| c == '=' || c.is_whitespace()) => Token::Eq, + '-' => self.hyph(), + '=' if self.s.check_or(true, |c| c == '=' || c.is_whitespace()) => { + NodeKind::Eq + } c if c == '.' || c.is_ascii_digit() => self.numbering(start, c), // Plain text. @@ -116,35 +128,35 @@ impl<'s> Tokens<'s> { } } - fn code(&mut self, start: usize, c: char) -> Token<'s> { + fn code(&mut self, start: usize, c: char) -> NodeKind { match c { // Parens. - '(' => Token::LeftParen, - ')' => Token::RightParen, + '(' => NodeKind::LeftParen, + ')' => NodeKind::RightParen, // Length two. - '=' if self.s.eat_if('=') => Token::EqEq, - '!' if self.s.eat_if('=') => Token::ExclEq, - '<' if self.s.eat_if('=') => Token::LtEq, - '>' if self.s.eat_if('=') => Token::GtEq, - '+' if self.s.eat_if('=') => Token::PlusEq, - '-' if self.s.eat_if('=') => Token::HyphEq, - '*' if self.s.eat_if('=') => Token::StarEq, - '/' if self.s.eat_if('=') => Token::SlashEq, - '.' if self.s.eat_if('.') => Token::Dots, - '=' if self.s.eat_if('>') => Token::Arrow, + '=' if self.s.eat_if('=') => NodeKind::EqEq, + '!' if self.s.eat_if('=') => NodeKind::ExclEq, + '<' if self.s.eat_if('=') => NodeKind::LtEq, + '>' if self.s.eat_if('=') => NodeKind::GtEq, + '+' if self.s.eat_if('=') => NodeKind::PlusEq, + '-' if self.s.eat_if('=') => NodeKind::HyphEq, + '*' if self.s.eat_if('=') => NodeKind::StarEq, + '/' if self.s.eat_if('=') => NodeKind::SlashEq, + '.' if self.s.eat_if('.') => NodeKind::Dots, + '=' if self.s.eat_if('>') => NodeKind::Arrow, // Length one. - ',' => Token::Comma, - ';' => Token::Semicolon, - ':' => Token::Colon, - '+' => Token::Plus, - '-' => Token::Hyph, - '*' => Token::Star, - '/' => Token::Slash, - '=' => Token::Eq, - '<' => Token::Lt, - '>' => Token::Gt, + ',' => NodeKind::Comma, + ';' => NodeKind::Semicolon, + ':' => NodeKind::Colon, + '+' => NodeKind::Plus, + '-' => NodeKind::Minus, + '*' => NodeKind::Star, + '/' => NodeKind::Slash, + '=' => NodeKind::Eq, + '<' => NodeKind::Lt, + '>' => NodeKind::Gt, // Identifiers. c if is_id_start(c) => self.ident(start), @@ -159,12 +171,12 @@ impl<'s> Tokens<'s> { // Strings. '"' => self.string(), - _ => Token::Invalid(self.s.eaten_from(start)), + _ => NodeKind::Error(ErrorPosition::Full, self.s.eaten_from(start).into()), } } #[inline] - fn text(&mut self, start: usize) -> Token<'s> { + fn text(&mut self, start: usize) -> NodeKind { macro_rules! table { ($($c:literal)|*) => {{ let mut t = [false; 128]; @@ -186,10 +198,10 @@ impl<'s> Tokens<'s> { TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace()) }); - Token::Text(self.s.eaten_from(start)) + NodeKind::Text(resolve_string(self.s.eaten_from(start))) } - fn whitespace(&mut self) -> Token<'s> { + fn whitespace(&mut self) -> NodeKind { self.s.uneat(); // Count the number of newlines. @@ -208,10 +220,10 @@ impl<'s> Tokens<'s> { } } - Token::Space(newlines) + NodeKind::Space(newlines) } - fn backslash(&mut self) -> Token<'s> { + fn backslash(&mut self) -> NodeKind { if let Some(c) = self.s.peek() { match c { // Backslash and comments. @@ -220,61 +232,61 @@ impl<'s> Tokens<'s> { '[' | ']' | '{' | '}' | '#' | // Markup. '*' | '_' | '=' | '~' | '`' | '$' => { - let start = self.s.index(); self.s.eat_assert(c); - Token::Text(&self.s.eaten_from(start)) + NodeKind::Text(c.into()) } 'u' if self.s.rest().starts_with("u{") => { self.s.eat_assert('u'); self.s.eat_assert('{'); - Token::UnicodeEscape(UnicodeEscapeToken { - // Allow more than `ascii_hexdigit` for better error recovery. - sequence: self.s.eat_while(|c| c.is_ascii_alphanumeric()), - terminated: self.s.eat_if('}'), + let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into(); + NodeKind::UnicodeEscape(UnicodeEscapeToken { + character: resolve_hex(&sequence), + sequence, + terminated: self.s.eat_if('}') }) } - c if c.is_whitespace() => Token::Backslash, - _ => Token::Text("\\"), + c if c.is_whitespace() => NodeKind::Linebreak, + _ => NodeKind::Text("\\".into()), } } else { - Token::Backslash + NodeKind::Linebreak } } #[inline] - fn hash(&mut self) -> Token<'s> { + fn hash(&mut self) -> NodeKind { if self.s.check_or(false, is_id_start) { let read = self.s.eat_while(is_id_continue); if let Some(keyword) = keyword(read) { keyword } else { - Token::Ident(read) + NodeKind::Ident(read.into()) } } else { - Token::Text("#") + NodeKind::Text("#".into()) } } - fn hyph(&mut self, start: usize) -> Token<'s> { + fn hyph(&mut self) -> NodeKind { if self.s.eat_if('-') { if self.s.eat_if('-') { - Token::HyphHyphHyph + NodeKind::EmDash } else { - Token::HyphHyph + NodeKind::EnDash } } else if self.s.check_or(true, char::is_whitespace) { - Token::Hyph + NodeKind::ListBullet } else { - Token::Text(self.s.eaten_from(start)) + NodeKind::Text("-".into()) } } - fn numbering(&mut self, start: usize, c: char) -> Token<'s> { + fn numbering(&mut self, start: usize, c: char) -> NodeKind { let number = if c != '.' { self.s.eat_while(|c| c.is_ascii_digit()); let read = self.s.eaten_from(start); if !self.s.eat_if('.') { - return Token::Text(read); + return NodeKind::Text(self.s.eaten_from(start).into()); } read.parse().ok() } else { @@ -282,21 +294,28 @@ impl<'s> Tokens<'s> { }; if self.s.check_or(true, char::is_whitespace) { - Token::Numbering(number) + NodeKind::EnumNumbering(number) } else { - Token::Text(self.s.eaten_from(start)) + NodeKind::Text(self.s.eaten_from(start).into()) } } - fn raw(&mut self) -> Token<'s> { + fn raw(&mut self) -> NodeKind { + let column = self.source.byte_to_column(self.s.index() - 1).unwrap(); let mut backticks = 1; - while self.s.eat_if('`') { + while self.s.eat_if('`') && backticks < u8::MAX { backticks += 1; } // Special case for empty inline block. if backticks == 2 { - return Token::Raw(RawToken { text: "", backticks: 1, terminated: true }); + return NodeKind::Raw(RawToken { + text: EcoString::new(), + lang: None, + backticks: 1, + terminated: true, + block: false, + }); } let start = self.s.index(); @@ -311,16 +330,17 @@ impl<'s> Tokens<'s> { } let terminated = found == backticks; - let end = self.s.index() - if terminated { found } else { 0 }; + let end = self.s.index() - if terminated { found as usize } else { 0 }; - Token::Raw(RawToken { - text: self.s.get(start .. end), + NodeKind::Raw(resolve_raw( + column, backticks, + self.s.get(start .. end).into(), terminated, - }) + )) } - fn math(&mut self) -> Token<'s> { + fn math(&mut self) -> NodeKind { let mut display = false; if self.s.eat_if('[') { display = true; @@ -350,25 +370,25 @@ impl<'s> Tokens<'s> { (true, true) => 2, }; - Token::Math(MathToken { - formula: self.s.get(start .. end), + NodeKind::Math(MathToken { + formula: self.s.get(start .. end).into(), display, terminated, }) } - fn ident(&mut self, start: usize) -> Token<'s> { + fn ident(&mut self, start: usize) -> NodeKind { self.s.eat_while(is_id_continue); match self.s.eaten_from(start) { - "none" => Token::None, - "auto" => Token::Auto, - "true" => Token::Bool(true), - "false" => Token::Bool(false), - id => keyword(id).unwrap_or(Token::Ident(id)), + "none" => NodeKind::None, + "auto" => NodeKind::Auto, + "true" => NodeKind::Bool(true), + "false" => NodeKind::Bool(false), + id => keyword(id).unwrap_or(NodeKind::Ident(id.into())), } } - fn number(&mut self, start: usize, c: char) -> Token<'s> { + fn number(&mut self, start: usize, c: char) -> NodeKind { // Read the first part (integer or fractional depending on `first`). self.s.eat_while(|c| c.is_ascii_digit()); @@ -380,7 +400,9 @@ impl<'s> Tokens<'s> { // Read the exponent. if self.s.eat_if('e') || self.s.eat_if('E') { - let _ = self.s.eat_if('+') || self.s.eat_if('-'); + if !self.s.eat_if('+') { + self.s.eat_if('-'); + } self.s.eat_while(|c| c.is_ascii_digit()); } @@ -396,55 +418,53 @@ impl<'s> Tokens<'s> { // Find out whether it is a simple number. if suffix.is_empty() { - if let Ok(int) = number.parse::() { - return Token::Int(int); - } else if let Ok(float) = number.parse::() { - return Token::Float(float); + if let Ok(i) = number.parse::() { + return NodeKind::Int(i); } } - // Otherwise parse into the fitting numeric type. - let build = match suffix { - "%" => Token::Percent, - "fr" => Token::Fraction, - "pt" => |x| Token::Length(x, LengthUnit::Pt), - "mm" => |x| Token::Length(x, LengthUnit::Mm), - "cm" => |x| Token::Length(x, LengthUnit::Cm), - "in" => |x| Token::Length(x, LengthUnit::In), - "rad" => |x| Token::Angle(x, AngularUnit::Rad), - "deg" => |x| Token::Angle(x, AngularUnit::Deg), - _ => return Token::Invalid(all), - }; - - if let Ok(float) = number.parse::() { - build(float) + if let Ok(f) = number.parse::() { + match suffix { + "" => NodeKind::Float(f), + "%" => NodeKind::Percentage(f), + "fr" => NodeKind::Fraction(f), + "pt" => NodeKind::Length(f, LengthUnit::Pt), + "mm" => NodeKind::Length(f, LengthUnit::Mm), + "cm" => NodeKind::Length(f, LengthUnit::Cm), + "in" => NodeKind::Length(f, LengthUnit::In), + "deg" => NodeKind::Angle(f, AngularUnit::Deg), + "rad" => NodeKind::Angle(f, AngularUnit::Rad), + _ => { + return NodeKind::Error(ErrorPosition::Full, all.into()); + } + } } else { - Token::Invalid(all) + NodeKind::Error(ErrorPosition::Full, all.into()) } } - fn string(&mut self) -> Token<'s> { + + fn string(&mut self) -> NodeKind { let mut escaped = false; - Token::Str(StrToken { - string: self.s.eat_until(|c| { + NodeKind::Str(StrToken { + string: resolve_string(self.s.eat_until(|c| { if c == '"' && !escaped { true } else { escaped = c == '\\' && !escaped; false } - }), + })), terminated: self.s.eat_if('"'), }) } - fn line_comment(&mut self) -> Token<'s> { - Token::LineComment(self.s.eat_until(is_newline)) + fn line_comment(&mut self) -> NodeKind { + self.s.eat_until(is_newline); + NodeKind::LineComment } - fn block_comment(&mut self) -> Token<'s> { - let start = self.s.index(); - + fn block_comment(&mut self) -> NodeKind { let mut state = '_'; let mut depth = 1; @@ -466,10 +486,7 @@ impl<'s> Tokens<'s> { } } - let terminated = depth == 0; - let end = self.s.index() - if terminated { 2 } else { 0 }; - - Token::BlockComment(self.s.get(start .. end)) + NodeKind::BlockComment } fn maybe_in_url(&self) -> bool { @@ -477,24 +494,24 @@ impl<'s> Tokens<'s> { } } -fn keyword(ident: &str) -> Option> { +fn keyword(ident: &str) -> Option { Some(match ident { - "not" => Token::Not, - "and" => Token::And, - "or" => Token::Or, - "with" => Token::With, - "let" => Token::Let, - "if" => Token::If, - "else" => Token::Else, - "for" => Token::For, - "in" => Token::In, - "while" => Token::While, - "break" => Token::Break, - "continue" => Token::Continue, - "return" => Token::Return, - "import" => Token::Import, - "include" => Token::Include, - "from" => Token::From, + "not" => NodeKind::Not, + "and" => NodeKind::And, + "or" => NodeKind::Or, + "with" => NodeKind::With, + "let" => NodeKind::Let, + "if" => NodeKind::If, + "else" => NodeKind::Else, + "for" => NodeKind::For, + "in" => NodeKind::In, + "while" => NodeKind::While, + "break" => NodeKind::Break, + "continue" => NodeKind::Continue, + "return" => NodeKind::Return, + "import" => NodeKind::Import, + "include" => NodeKind::Include, + "from" => NodeKind::From, _ => return None, }) } @@ -506,24 +523,56 @@ mod tests { use super::*; + use NodeKind::*; use Option::None; - use Token::{Ident, *}; use TokenMode::{Code, Markup}; - const fn UnicodeEscape(sequence: &str, terminated: bool) -> Token { - Token::UnicodeEscape(UnicodeEscapeToken { sequence, terminated }) + fn UnicodeEscape(sequence: &str, terminated: bool) -> NodeKind { + NodeKind::UnicodeEscape(UnicodeEscapeToken { + character: resolve_hex(sequence), + sequence: sequence.into(), + terminated, + }) } - const fn Raw(text: &str, backticks: usize, terminated: bool) -> Token { - Token::Raw(RawToken { text, backticks, terminated }) + fn Raw( + text: &str, + lang: Option<&str>, + backticks: u8, + terminated: bool, + block: bool, + ) -> NodeKind { + NodeKind::Raw(RawToken { + text: text.into(), + lang: lang.map(Into::into), + backticks, + terminated, + block, + }) + } + + fn Math(formula: &str, display: bool, terminated: bool) -> NodeKind { + NodeKind::Math(MathToken { + formula: formula.into(), + display, + terminated, + }) } - const fn Math(formula: &str, display: bool, terminated: bool) -> Token { - Token::Math(MathToken { formula, display, terminated }) + fn Str(string: &str, terminated: bool) -> NodeKind { + NodeKind::Str(StrToken { string: string.into(), terminated }) } - const fn Str(string: &str, terminated: bool) -> Token { - Token::Str(StrToken { string, terminated }) + fn Text(string: &str) -> NodeKind { + NodeKind::Text(string.into()) + } + + fn Ident(ident: &str) -> NodeKind { + NodeKind::Ident(ident.into()) + } + + fn Invalid(invalid: &str) -> NodeKind { + NodeKind::Error(ErrorPosition::Full, invalid.into()) } /// Building blocks for suffix testing. @@ -541,40 +590,6 @@ mod tests { /// - '/': symbols const BLOCKS: &str = " a1/"; - /// Suffixes described by four-tuples of: - /// - /// - block the suffix is part of - /// - mode in which the suffix is applicable - /// - the suffix string - /// - the resulting suffix token - const SUFFIXES: &[(char, Option, &str, Token)] = &[ - // Whitespace suffixes. - (' ', None, " ", Space(0)), - (' ', None, "\n", Space(1)), - (' ', None, "\r", Space(1)), - (' ', None, "\r\n", Space(1)), - // Letter suffixes. - ('a', Some(Markup), "hello", Text("hello")), - ('a', Some(Markup), "💚", Text("💚")), - ('a', Some(Code), "val", Ident("val")), - ('a', Some(Code), "α", Ident("α")), - ('a', Some(Code), "_", Ident("_")), - // Number suffixes. - ('1', Some(Code), "2", Int(2)), - ('1', Some(Code), ".2", Float(0.2)), - // Symbol suffixes. - ('/', None, "[", LeftBracket), - ('/', None, "//", LineComment("")), - ('/', None, "/**/", BlockComment("")), - ('/', Some(Markup), "*", Star), - ('/', Some(Markup), "$ $", Math(" ", false, true)), - ('/', Some(Markup), r"\\", Text(r"\")), - ('/', Some(Markup), "#let", Let), - ('/', Some(Code), "(", LeftParen), - ('/', Some(Code), ":", Colon), - ('/', Some(Code), "+=", PlusEq), - ]; - macro_rules! t { (Both $($tts:tt)*) => { t!(Markup $($tts)*); @@ -584,22 +599,56 @@ mod tests { // Test without suffix. t!(@$mode: $src => $($token),*); + // Suffixes described by four-tuples of: + // + // - block the suffix is part of + // - mode in which the suffix is applicable + // - the suffix string + // - the resulting suffix NodeKind + let suffixes: &[(char, Option, &str, NodeKind)] = &[ + // Whitespace suffixes. + (' ', None, " ", Space(0)), + (' ', None, "\n", Space(1)), + (' ', None, "\r", Space(1)), + (' ', None, "\r\n", Space(1)), + // Letter suffixes. + ('a', Some(Markup), "hello", Text("hello")), + ('a', Some(Markup), "💚", Text("💚")), + ('a', Some(Code), "val", Ident("val")), + ('a', Some(Code), "α", Ident("α")), + ('a', Some(Code), "_", Ident("_")), + // Number suffixes. + ('1', Some(Code), "2", Int(2)), + ('1', Some(Code), ".2", Float(0.2)), + // Symbol suffixes. + ('/', None, "[", LeftBracket), + ('/', None, "//", LineComment), + ('/', None, "/**/", BlockComment), + ('/', Some(Markup), "*", Strong), + ('/', Some(Markup), "$ $", Math(" ", false, true)), + ('/', Some(Markup), r"\\", Text("\\")), + ('/', Some(Markup), "#let", Let), + ('/', Some(Code), "(", LeftParen), + ('/', Some(Code), ":", Colon), + ('/', Some(Code), "+=", PlusEq), + ]; + // Test with each applicable suffix. - for &(block, mode, suffix, token) in SUFFIXES { + for (block, mode, suffix, token) in suffixes { let src = $src; #[allow(unused_variables)] let blocks = BLOCKS; $(let blocks = $blocks;)? assert!(!blocks.contains(|c| !BLOCKS.contains(c))); - if (mode.is_none() || mode == Some($mode)) && blocks.contains(block) { + if (mode.is_none() || mode == &Some($mode)) && blocks.contains(*block) { t!(@$mode: format!("{}{}", src, suffix) => $($token,)* token); } } }}; (@$mode:ident: $src:expr => $($token:expr),*) => {{ let src = $src; - let found = Tokens::new(&src, $mode).collect::>(); - let expected = vec![$($token),*]; + let found = Tokens::new(&SourceFile::detached(src.clone()), $mode).collect::>(); + let expected = vec![$($token.clone()),*]; check(&src, found, expected); }}; } @@ -671,7 +720,7 @@ mod tests { // Test text ends. t!(Markup[""]: "hello " => Text("hello"), Space(0)); - t!(Markup[""]: "hello~" => Text("hello"), Tilde); + t!(Markup[""]: "hello~" => Text("hello"), NonBreakingSpace); } #[test] @@ -713,16 +762,16 @@ mod tests { #[test] fn test_tokenize_markup_symbols() { // Test markup tokens. - t!(Markup[" a1"]: "*" => Star); - t!(Markup: "_" => Underscore); + t!(Markup[" a1"]: "*" => Strong); + t!(Markup: "_" => Emph); t!(Markup[""]: "===" => Eq, Eq, Eq); t!(Markup["a1/"]: "= " => Eq, Space(0)); - t!(Markup: "~" => Tilde); - t!(Markup[" "]: r"\" => Backslash); - t!(Markup["a "]: r"a--" => Text("a"), HyphHyph); - t!(Markup["a1/"]: "- " => Hyph, Space(0)); - t!(Markup[" "]: "." => Numbering(None)); - t!(Markup[" "]: "1." => Numbering(Some(1))); + t!(Markup: "~" => NonBreakingSpace); + t!(Markup[" "]: r"\" => Linebreak); + t!(Markup["a "]: r"a--" => Text("a"), EnDash); + t!(Markup["a1/"]: "- " => ListBullet, Space(0)); + t!(Markup[" "]: "." => EnumNumbering(None)); + t!(Markup[" "]: "1." => EnumNumbering(Some(1))); t!(Markup[" "]: "1.a" => Text("1."), Text("a")); t!(Markup[" /"]: "a1." => Text("a1.")); } @@ -734,7 +783,7 @@ mod tests { t!(Code: ";" => Semicolon); t!(Code: ":" => Colon); t!(Code: "+" => Plus); - t!(Code: "-" => Hyph); + t!(Code: "-" => Minus); t!(Code[" a1"]: "*" => Star); t!(Code[" a1"]: "/" => Slash); t!(Code: "=" => Eq); @@ -756,10 +805,10 @@ mod tests { t!(Code[" a/"]: "..." => Dots, Invalid(".")); // Test hyphen as symbol vs part of identifier. - t!(Code[" /"]: "-1" => Hyph, Int(1)); - t!(Code[" /"]: "-a" => Hyph, Ident("a")); - t!(Code[" /"]: "--1" => Hyph, Hyph, Int(1)); - t!(Code[" /"]: "--_a" => Hyph, Hyph, Ident("_a")); + t!(Code[" /"]: "-1" => Minus, Int(1)); + t!(Code[" /"]: "-a" => Minus, Ident("a")); + t!(Code[" /"]: "--1" => Minus, Minus, Int(1)); + t!(Code[" /"]: "--_a" => Minus, Minus, Ident("_a")); t!(Code[" /"]: "a-b" => Ident("a-b")); } @@ -776,13 +825,13 @@ mod tests { ("import", Import), ]; - for &(s, t) in &list { + for (s, t) in list.clone() { t!(Markup[" "]: format!("#{}", s) => t); t!(Markup[" "]: format!("#{0}#{0}", s) => t, t); - t!(Markup[" /"]: format!("# {}", s) => Token::Text("#"), Space(0), Text(s)); + t!(Markup[" /"]: format!("# {}", s) => Text("#"), Space(0), Text(s)); } - for &(s, t) in &list { + for (s, t) in list { t!(Code[" "]: s => t); t!(Markup[" /"]: s => Text(s)); } @@ -796,25 +845,23 @@ mod tests { #[test] fn test_tokenize_raw_blocks() { - let empty = Raw("", 1, true); - // Test basic raw block. - t!(Markup: "``" => empty); - t!(Markup: "`raw`" => Raw("raw", 1, true)); - t!(Markup[""]: "`]" => Raw("]", 1, false)); + t!(Markup: "``" => Raw("", None, 1, true, false)); + t!(Markup: "`raw`" => Raw("raw", None, 1, true, false)); + t!(Markup[""]: "`]" => Raw("]", None, 1, false, false)); // Test special symbols in raw block. - t!(Markup: "`[brackets]`" => Raw("[brackets]", 1, true)); - t!(Markup[""]: r"`\`` " => Raw(r"\", 1, true), Raw(" ", 1, false)); + t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, true, false)); + t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, true, false), Raw(" ", None, 1, false, false)); // Test separated closing backticks. - t!(Markup: "```not `y`e`t```" => Raw("not `y`e`t", 3, true)); + t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, true, false)); // Test more backticks. - t!(Markup: "``nope``" => empty, Text("nope"), empty); - t!(Markup: "````🚀````" => Raw("🚀", 4, true)); - t!(Markup[""]: "`````👩‍🚀````noend" => Raw("👩‍🚀````noend", 5, false)); - t!(Markup[""]: "````raw``````" => Raw("raw", 4, true), empty); + t!(Markup: "``nope``" => Raw("", None, 1, true, false), Text("nope"), Raw("", None, 1, true, false)); + t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, true, false)); + t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, false, false)); + t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, true, false), Raw("", None, 1, true, false)); } #[test] @@ -896,8 +943,8 @@ mod tests { let nums = ints.iter().map(|&(k, v)| (k, v as f64)).chain(floats); let suffixes = [ - ("%", Percent as fn(f64) -> Token<'static>), - ("fr", Fraction as fn(f64) -> Token<'static>), + ("%", Percentage as fn(f64) -> NodeKind), + ("fr", Fraction as fn(f64) -> NodeKind), ("mm", |x| Length(x, LengthUnit::Mm)), ("pt", |x| Length(x, LengthUnit::Pt)), ("cm", |x| Length(x, LengthUnit::Cm)), @@ -930,54 +977,54 @@ mod tests { t!(Code[""]: "\"hi" => Str("hi", false)); // Test escaped quote. - t!(Code: r#""a\"bc""# => Str(r#"a\"bc"#, true)); - t!(Code[""]: r#""\""# => Str(r#"\""#, false)); + t!(Code: r#""a\"bc""# => Str("a\"bc", true)); + t!(Code[""]: r#""\""# => Str("\"", false)); } #[test] fn test_tokenize_line_comments() { // Test line comment with no trailing newline. - t!(Both[""]: "//" => LineComment("")); + t!(Both[""]: "//" => LineComment); // Test line comment ends at newline. - t!(Both["a1/"]: "//bc\n" => LineComment("bc"), Space(1)); - t!(Both["a1/"]: "// bc \n" => LineComment(" bc "), Space(1)); - t!(Both["a1/"]: "//bc\r\n" => LineComment("bc"), Space(1)); + t!(Both["a1/"]: "//bc\n" => LineComment, Space(1)); + t!(Both["a1/"]: "// bc \n" => LineComment, Space(1)); + t!(Both["a1/"]: "//bc\r\n" => LineComment, Space(1)); // Test nested line comments. - t!(Both["a1/"]: "//a//b\n" => LineComment("a//b"), Space(1)); + t!(Both["a1/"]: "//a//b\n" => LineComment, Space(1)); } #[test] fn test_tokenize_block_comments() { // Test basic block comments. - t!(Both[""]: "/*" => BlockComment("")); - t!(Both: "/**/" => BlockComment("")); - t!(Both: "/*🏞*/" => BlockComment("🏞")); - t!(Both: "/*\n*/" => BlockComment("\n")); + t!(Both[""]: "/*" => BlockComment); + t!(Both: "/**/" => BlockComment); + t!(Both: "/*🏞*/" => BlockComment); + t!(Both: "/*\n*/" => BlockComment); // Test depth 1 and 2 nested block comments. - t!(Both: "/* /* */ */" => BlockComment(" /* */ ")); - t!(Both: "/*/*/**/*/*/" => BlockComment("/*/**/*/")); + t!(Both: "/* /* */ */" => BlockComment); + t!(Both: "/*/*/**/*/*/" => BlockComment); // Test two nested, one unclosed block comments. - t!(Both[""]: "/*/*/**/*/" => BlockComment("/*/**/*/")); + t!(Both[""]: "/*/*/**/*/" => BlockComment); // Test all combinations of up to two following slashes and stars. - t!(Both[""]: "/*" => BlockComment("")); - t!(Both[""]: "/*/" => BlockComment("/")); - t!(Both[""]: "/**" => BlockComment("*")); - t!(Both[""]: "/*//" => BlockComment("//")); - t!(Both[""]: "/*/*" => BlockComment("/*")); - t!(Both[""]: "/**/" => BlockComment("")); - t!(Both[""]: "/***" => BlockComment("**")); + t!(Both[""]: "/*" => BlockComment); + t!(Both[""]: "/*/" => BlockComment); + t!(Both[""]: "/**" => BlockComment); + t!(Both[""]: "/*//" => BlockComment); + t!(Both[""]: "/*/*" => BlockComment); + t!(Both[""]: "/**/" => BlockComment); + t!(Both[""]: "/***" => BlockComment); } #[test] fn test_tokenize_invalid() { // Test invalidly closed block comments. - t!(Both: "*/" => Token::Invalid("*/")); - t!(Both: "/**/*/" => BlockComment(""), Token::Invalid("*/")); + t!(Both: "*/" => Invalid("*/")); + t!(Both: "/**/*/" => BlockComment, Invalid("*/")); // Test invalid expressions. t!(Code: r"\" => Invalid(r"\")); @@ -990,6 +1037,6 @@ mod tests { // Test invalid number suffixes. t!(Code[" /"]: "1foo" => Invalid("1foo")); t!(Code: "1p%" => Invalid("1p"), Invalid("%")); - t!(Code: "1%%" => Percent(1.0), Invalid("%")); + t!(Code: "1%%" => Percentage(1.0), Invalid("%")); } } -- cgit v1.2.3 From 84d35efee38d137a77e368c50421ac24327371c6 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 31 Oct 2021 11:46:12 +0100 Subject: Less owning, more iterating --- src/parse/mod.rs | 275 ++++++++++++++++++++++++--------------------------- src/parse/parser.rs | 73 +++++++------- src/parse/resolve.rs | 8 +- src/parse/tokens.rs | 16 ++- 4 files changed, 176 insertions(+), 196 deletions(-) (limited to 'src/parse') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index dc769183..0425f824 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -30,15 +30,14 @@ fn markup(p: &mut Parser) { /// Parse markup that stays right of the given column. fn markup_indented(p: &mut Parser, column: usize) { - // TODO this is broken p.eat_while(|t| match t { - NodeKind::Space(n) => n == 0, + NodeKind::Space(n) => *n == 0, NodeKind::LineComment | NodeKind::BlockComment => true, _ => false, }); markup_while(p, false, &mut |p| match p.peek() { - Some(NodeKind::Space(n)) if n >= 1 => p.column(p.next_end()) >= column, + Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.next_end()) >= column, _ => true, }) } @@ -64,125 +63,119 @@ where /// Parse a markup node. fn markup_node(p: &mut Parser, at_start: &mut bool) { - if let Some(token) = p.peek() { - match token { - // Whitespace. - NodeKind::Space(newlines) => { - *at_start |= newlines > 0; - - if newlines < 2 { - p.eat(); - } else { - p.convert(NodeKind::Parbreak); - } - } + let token = match p.peek() { + Some(t) => t, + None => return, + }; - // Text. - NodeKind::UnicodeEscape(u) => { - if !u.terminated { - p.convert(NodeKind::Error( - ErrorPosition::End, - "expected closing brace".into(), - )); - p.unsuccessful(); - return; - } - - if u.character.is_none() { - let src = p.peek_src(); - p.convert(NodeKind::Error( - ErrorPosition::Full, - "invalid unicode escape sequence".into(), - )); - p.start(); - p.end(NodeKind::Text(src.into())); - return; - } + match token { + // Whitespace. + NodeKind::Space(newlines) => { + *at_start |= *newlines > 0; + if *newlines < 2 { p.eat(); + } else { + p.convert(NodeKind::Parbreak); } - NodeKind::Raw(r) => { - if !r.terminated { - p.convert(NodeKind::Error( - ErrorPosition::End, - "expected backtick(s)".into(), - )); - p.unsuccessful(); - return; - } + } - p.eat(); + // Text and markup. + NodeKind::Text(_) + | NodeKind::EnDash + | NodeKind::EmDash + | NodeKind::NonBreakingSpace + | NodeKind::Emph + | NodeKind::Strong + | NodeKind::Linebreak => p.eat(), + + NodeKind::UnicodeEscape(u) => { + if !u.terminated { + p.convert(NodeKind::Error( + ErrorPosition::End, + "expected closing brace".into(), + )); + p.unsuccessful(); + return; } - NodeKind::Text(_) - | NodeKind::EnDash - | NodeKind::EmDash - | NodeKind::NonBreakingSpace => { - p.eat(); + + if u.character.is_none() { + let src = p.peek_src(); + p.convert(NodeKind::Error( + ErrorPosition::Full, + "invalid unicode escape sequence".into(), + )); + p.start(); + p.end(NodeKind::Text(src.into())); + return; } - // Markup. - NodeKind::Emph | NodeKind::Strong | NodeKind::Linebreak => { - p.eat(); + p.eat(); + } + NodeKind::Raw(r) => { + if !r.terminated { + p.convert(NodeKind::Error( + ErrorPosition::End, + "expected backtick(s)".into(), + )); + p.unsuccessful(); + return; } - NodeKind::Eq if *at_start => heading(p), - NodeKind::ListBullet if *at_start => list_node(p), - NodeKind::EnumNumbering(_) if *at_start => enum_node(p), + p.eat(); + } - // Line-based markup that is not currently at the start of the line. - NodeKind::Eq | NodeKind::ListBullet | NodeKind::EnumNumbering(_) => { - p.convert(NodeKind::Text(p.peek_src().into())) - } + NodeKind::Eq if *at_start => heading(p), + NodeKind::ListBullet if *at_start => list_node(p), + NodeKind::EnumNumbering(_) if *at_start => enum_node(p), - // Hashtag + keyword / identifier. - NodeKind::Ident(_) - | NodeKind::Let - | NodeKind::If - | NodeKind::While - | NodeKind::For - | NodeKind::Import - | NodeKind::Include => { - let stmt = matches!(token, NodeKind::Let | NodeKind::Import); - let group = if stmt { Group::Stmt } else { Group::Expr }; - - p.start_group(group, TokenMode::Code); - expr_with(p, true, 0); - if stmt && p.success() && !p.eof() { - p.expected_at("semicolon or line break"); - } - p.end_group(); - } + // Line-based markup that is not currently at the start of the line. + NodeKind::Eq | NodeKind::ListBullet | NodeKind::EnumNumbering(_) => { + p.convert(NodeKind::Text(p.peek_src().into())) + } - // Block and template. - NodeKind::LeftBrace => { - block(p); - } - NodeKind::LeftBracket => { - template(p); + // Hashtag + keyword / identifier. + NodeKind::Ident(_) + | NodeKind::Let + | NodeKind::If + | NodeKind::While + | NodeKind::For + | NodeKind::Import + | NodeKind::Include => { + let stmt = matches!(token, NodeKind::Let | NodeKind::Import); + let group = if stmt { Group::Stmt } else { Group::Expr }; + + p.start_group(group, TokenMode::Code); + expr_with(p, true, 0); + if stmt && p.success() && !p.eof() { + p.expected_at("semicolon or line break"); } + p.end_group(); + } - // Comments. - NodeKind::LineComment | NodeKind::BlockComment => { - p.eat(); - } + // Block and template. + NodeKind::LeftBrace => block(p), + NodeKind::LeftBracket => template(p), - _ => { - *at_start = false; - p.unexpected(); - } - }; - } + // Comments. + NodeKind::LineComment | NodeKind::BlockComment => p.eat(), + + _ => { + *at_start = false; + p.unexpected(); + } + }; } /// Parse a heading. fn heading(p: &mut Parser) { p.start(); p.start(); - p.eat_assert(NodeKind::Eq); + p.eat_assert(&NodeKind::Eq); // Count depth. let mut level: usize = 1; - while p.eat_if(NodeKind::Eq) { + while p.eat_if(&NodeKind::Eq) { level += 1; } @@ -200,7 +193,7 @@ fn heading(p: &mut Parser) { /// Parse a single list item. fn list_node(p: &mut Parser) { p.start(); - p.eat_assert(NodeKind::ListBullet); + p.eat_assert(&NodeKind::ListBullet); let column = p.column(p.prev_end()); markup_indented(p, column); p.end(NodeKind::List); @@ -209,9 +202,7 @@ fn list_node(p: &mut Parser) { /// Parse a single enum item. fn enum_node(p: &mut Parser) { p.start(); - if !matches!(p.eat(), Some(NodeKind::EnumNumbering(_))) { - panic!("enum item does not start with numbering") - }; + p.eat(); let column = p.column(p.prev_end()); markup_indented(p, column); p.end(NodeKind::Enum); @@ -263,7 +254,7 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) { continue; } - if p.peek() == Some(NodeKind::With) { + if p.peek() == Some(&NodeKind::With) { with_expr(p, p.child_count() - offset); if p.may_lift_abort() { @@ -276,7 +267,7 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) { break; } - let op = match p.peek().as_ref().and_then(BinOp::from_token) { + let op = match p.peek().and_then(BinOp::from_token) { Some(binop) => binop, None => { p.lift(); @@ -286,10 +277,8 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) { let mut prec = op.precedence(); if prec < min_prec { - { - p.lift(); - break; - }; + p.lift(); + break; } p.eat(); @@ -324,7 +313,7 @@ fn primary(p: &mut Parser, atomic: bool) { p.eat(); // Arrow means this is a closure's lone parameter. - if !atomic && p.peek() == Some(NodeKind::Arrow) { + if !atomic && p.peek() == Some(&NodeKind::Arrow) { p.end_and_start_with(NodeKind::ClosureParams); p.eat(); @@ -359,10 +348,9 @@ fn primary(p: &mut Parser, atomic: bool) { /// Parse a literal. fn literal(p: &mut Parser) -> bool { - let peeked = if let Some(p) = p.peek() { - p - } else { - return false; + let peeked = match p.peek() { + Some(x) => x.clone(), + None => return false, }; match peeked { @@ -375,18 +363,14 @@ fn literal(p: &mut Parser) -> bool { | NodeKind::Fraction(_) | NodeKind::Length(_, _) | NodeKind::Angle(_, _) - | NodeKind::Percentage(_) => { - p.eat(); - } + | NodeKind::Percentage(_) => p.eat(), NodeKind::Str(s) => { p.eat(); if !s.terminated { p.expected_at("quote"); } } - _ => { - return false; - } + _ => return false, } true @@ -401,7 +385,7 @@ fn parenthesized(p: &mut Parser) { let offset = p.child_count(); p.start(); p.start_group(Group::Paren, TokenMode::Code); - let colon = p.eat_if(NodeKind::Colon); + let colon = p.eat_if(&NodeKind::Colon); let kind = collection(p).0; p.end_group(); let token_count = p.child_count() - offset; @@ -414,12 +398,12 @@ fn parenthesized(p: &mut Parser) { } // Arrow means this is a closure's parameter list. - if p.peek() == Some(NodeKind::Arrow) { + if p.peek() == Some(&NodeKind::Arrow) { p.start_with(token_count); params(p, 0, true); p.end(NodeKind::ClosureParams); - p.eat_assert(NodeKind::Arrow); + p.eat_assert(&NodeKind::Arrow); expr(p); @@ -485,7 +469,7 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { break; } - if p.eat_if(NodeKind::Comma) { + if p.eat_if(&NodeKind::Comma) { has_comma = true; } else { missing_coma = Some(p.child_count()); @@ -518,7 +502,7 @@ enum CollectionItemKind { /// Parse an expression or a named pair. Returns if this is a named pair. fn item(p: &mut Parser) -> CollectionItemKind { p.start(); - if p.eat_if(NodeKind::Dots) { + if p.eat_if(&NodeKind::Dots) { expr(p); p.end_or_abort(NodeKind::ParameterSink); @@ -531,7 +515,7 @@ fn item(p: &mut Parser) -> CollectionItemKind { return CollectionItemKind::Unnamed; } - if p.eat_if(NodeKind::Colon) { + if p.eat_if(&NodeKind::Colon) { let child = p.child(1).unwrap(); if matches!(child.kind(), &NodeKind::Ident(_)) { expr(p); @@ -686,9 +670,9 @@ fn args(p: &mut Parser, allow_template: bool) { /// Parse a with expression. fn with_expr(p: &mut Parser, preserve: usize) { p.start_with(preserve); - p.eat_assert(NodeKind::With); + p.eat_assert(&NodeKind::With); - if p.peek() == Some(NodeKind::LeftParen) { + if p.peek() == Some(&NodeKind::LeftParen) { args(p, false); p.end(NodeKind::WithExpr); } else { @@ -700,7 +684,7 @@ fn with_expr(p: &mut Parser, preserve: usize) { /// Parse a let expression. fn let_expr(p: &mut Parser) { p.start(); - p.eat_assert(NodeKind::Let); + p.eat_assert(&NodeKind::Let); let offset = p.child_count(); ident(p); @@ -708,7 +692,7 @@ fn let_expr(p: &mut Parser) { return; } - if p.peek() == Some(NodeKind::With) { + if p.peek() == Some(&NodeKind::With) { with_expr(p, p.child_count() - offset); } else { // If a parenthesis follows, this is a function definition. @@ -725,7 +709,7 @@ fn let_expr(p: &mut Parser) { false }; - if p.eat_if(NodeKind::Eq) { + if p.eat_if(&NodeKind::Eq) { expr(p); } else if has_params { // Function definitions must have a body. @@ -749,7 +733,7 @@ fn let_expr(p: &mut Parser) { /// Parse an if expresion. fn if_expr(p: &mut Parser) { p.start(); - p.eat_assert(NodeKind::If); + p.eat_assert(&NodeKind::If); expr(p); if p.may_end_abort(NodeKind::IfExpr) { @@ -762,8 +746,8 @@ fn if_expr(p: &mut Parser) { return; } - if p.eat_if(NodeKind::Else) { - if p.peek() == Some(NodeKind::If) { + if p.eat_if(&NodeKind::Else) { + if p.peek() == Some(&NodeKind::If) { if_expr(p); } else { body(p); @@ -776,7 +760,7 @@ fn if_expr(p: &mut Parser) { /// Parse a while expresion. fn while_expr(p: &mut Parser) { p.start(); - p.eat_assert(NodeKind::While); + p.eat_assert(&NodeKind::While); expr(p); @@ -793,7 +777,7 @@ fn while_expr(p: &mut Parser) { /// Parse a for expression. fn for_expr(p: &mut Parser) { p.start(); - p.eat_assert(NodeKind::For); + p.eat_assert(&NodeKind::For); for_pattern(p); @@ -801,7 +785,7 @@ fn for_expr(p: &mut Parser) { return; } - if p.eat_expect(NodeKind::In) { + if p.eat_expect(&NodeKind::In) { expr(p); if p.may_end_abort(NodeKind::ForExpr) { @@ -828,7 +812,7 @@ fn for_pattern(p: &mut Parser) { return; } - if p.peek() == Some(NodeKind::Comma) { + if p.peek() == Some(&NodeKind::Comma) { p.eat(); ident(p); @@ -844,9 +828,9 @@ fn for_pattern(p: &mut Parser) { /// Parse an import expression. fn import_expr(p: &mut Parser) { p.start(); - p.eat_assert(NodeKind::Import); + p.eat_assert(&NodeKind::Import); - if !p.eat_if(NodeKind::Star) { + if !p.eat_if(&NodeKind::Star) { // This is the list of identifiers scenario. p.start(); p.start_group(Group::Imports, TokenMode::Code); @@ -865,7 +849,7 @@ fn import_expr(p: &mut Parser) { p.end(NodeKind::ImportItems); }; - if p.eat_expect(NodeKind::From) { + if p.eat_expect(&NodeKind::From) { expr(p); } @@ -875,7 +859,7 @@ fn import_expr(p: &mut Parser) { /// Parse an include expression. fn include_expr(p: &mut Parser) { p.start(); - p.eat_assert(NodeKind::Include); + p.eat_assert(&NodeKind::Include); expr(p); p.end(NodeKind::IncludeExpr); @@ -883,11 +867,12 @@ fn include_expr(p: &mut Parser) { /// Parse an identifier. fn ident(p: &mut Parser) { - if let Some(NodeKind::Ident(_)) = p.peek() { - p.eat(); - } else { - p.expected("identifier"); - p.unsuccessful(); + match p.peek() { + Some(NodeKind::Ident(_)) => p.eat(), + _ => { + p.expected("identifier"); + p.unsuccessful(); + } } } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index f62e882a..e6fcc1ae 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -161,7 +161,7 @@ impl<'s> Parser<'s> { let len = children.iter().map(|c| c.len()).sum(); self.children - .push(GreenNode::with_children(kind, len, children.into_iter()).into()); + .push(GreenNode::with_children(kind, len, children).into()); self.children.extend(remains); self.success = true; } @@ -240,10 +240,9 @@ impl<'s> Parser<'s> { } pub fn finish(&mut self) -> Rc { - if let Green::Node(n) = self.children.pop().unwrap() { - n - } else { - panic!() + match self.children.pop().unwrap() { + Green::Node(n) => n, + _ => panic!(), } } @@ -252,16 +251,16 @@ impl<'s> Parser<'s> { self.peek().is_none() } - pub fn eat(&mut self) -> Option { - let token = self.peek()?; - self.bump(); + fn eat_peeked(&mut self) -> Option { + let token = self.peek()?.clone(); + self.eat(); Some(token) } /// Consume the next token if it is the given one. - pub fn eat_if(&mut self, t: NodeKind) -> bool { + pub fn eat_if(&mut self, t: &NodeKind) -> bool { if self.peek() == Some(t) { - self.bump(); + self.eat(); true } else { false @@ -271,36 +270,36 @@ impl<'s> Parser<'s> { /// Consume the next token if the closure maps it a to `Some`-variant. pub fn eat_map(&mut self, f: F) -> Option where - F: FnOnce(NodeKind) -> Option, + F: FnOnce(&NodeKind) -> Option, { let token = self.peek()?; let mapped = f(token); if mapped.is_some() { - self.bump(); + self.eat(); } mapped } /// Consume the next token if it is the given one and produce an error if /// not. - pub fn eat_expect(&mut self, t: NodeKind) -> bool { - let eaten = self.eat_if(t.clone()); + pub fn eat_expect(&mut self, t: &NodeKind) -> bool { + let eaten = self.eat_if(t); if !eaten { - self.expected_at(&t.to_string()); + self.expected_at(t.as_str()); } eaten } /// Consume the next token, debug-asserting that it is one of the given ones. - pub fn eat_assert(&mut self, t: NodeKind) { - let next = self.eat(); - debug_assert_eq!(next, Some(t)); + pub fn eat_assert(&mut self, t: &NodeKind) { + let next = self.eat_peeked(); + debug_assert_eq!(next.as_ref(), Some(t)); } /// Consume tokens while the condition is true. pub fn eat_while(&mut self, mut f: F) where - F: FnMut(NodeKind) -> bool, + F: FnMut(&NodeKind) -> bool, { while self.peek().map_or(false, |t| f(t)) { self.eat(); @@ -308,8 +307,8 @@ impl<'s> Parser<'s> { } /// Peek at the next token without consuming it. - pub fn peek(&self) -> Option { - self.peeked.clone() + pub fn peek(&self) -> Option<&NodeKind> { + self.peeked.as_ref() } /// Peek at the next token if it follows immediately after the last one @@ -371,9 +370,9 @@ impl<'s> Parser<'s> { self.repeek(); match kind { - Group::Paren => self.eat_assert(NodeKind::LeftParen), - Group::Bracket => self.eat_assert(NodeKind::LeftBracket), - Group::Brace => self.eat_assert(NodeKind::LeftBrace), + Group::Paren => self.eat_assert(&NodeKind::LeftParen), + Group::Bracket => self.eat_assert(&NodeKind::LeftBracket), + Group::Brace => self.eat_assert(&NodeKind::LeftBrace), Group::Stmt => {} Group::Expr => {} Group::Imports => {} @@ -402,11 +401,11 @@ impl<'s> Parser<'s> { } { if self.next == Some(end.clone()) { // Bump the delimeter and return. No need to rescan in this case. - self.bump(); + self.eat(); rescan = false; } else if required { self.start(); - self.abort(format!("expected {}", end.to_string())); + self.abort(format!("expected {}", end)); } } @@ -457,21 +456,21 @@ impl<'s> Parser<'s> { /// Eat the next token and add an error that it is not the expected `thing`. pub fn expected(&mut self, what: &str) { self.start(); - if let Some(found) = self.eat() { - self.abort(format!("expected {}, found {}", what, found.to_string())) - } else { - self.lift(); - self.expected_at(what); + match self.eat_peeked() { + Some(found) => self.abort(format!("expected {}, found {}", what, found)), + None => { + self.lift(); + self.expected_at(what); + } } } /// Eat the next token and add an error that it is unexpected. pub fn unexpected(&mut self) { self.start(); - if let Some(found) = self.eat() { - self.abort(format!("unexpected {}", found.to_string())) - } else { - self.abort("unexpected end of file") + match self.eat_peeked() { + Some(found) => self.abort(format!("unexpected {}", found)), + None => self.abort("unexpected end of file"), } } @@ -489,7 +488,7 @@ impl<'s> Parser<'s> { } /// Move to the next token. - fn bump(&mut self) { + pub fn eat(&mut self) { self.children.push( GreenData::new( self.next.clone().unwrap(), @@ -511,7 +510,7 @@ impl<'s> Parser<'s> { if self.tokens.mode() == TokenMode::Code { // Skip whitespace and comments. while self.next.as_ref().map_or(false, |x| self.skip_type(x)) { - self.bump(); + self.eat(); } } diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index c59c3bb1..1b3089a6 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -25,11 +25,9 @@ pub fn resolve_string(string: &str) -> EcoString { let sequence = s.eat_while(|c| c.is_ascii_hexdigit()); let _terminated = s.eat_if('}'); - if let Some(c) = resolve_hex(sequence) { - out.push(c); - } else { - // TODO: Feedback that unicode escape sequence is wrong. - out.push_str(s.eaten_from(start)); + match resolve_hex(sequence) { + Some(c) => out.push(c), + None => out.push_str(s.eaten_from(start)), } } diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 19d0d77b..bfd9f3ed 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -224,8 +224,8 @@ impl<'s> Tokens<'s> { } fn backslash(&mut self) -> NodeKind { - if let Some(c) = self.s.peek() { - match c { + match self.s.peek() { + Some(c) => match c { // Backslash and comments. '\\' | '/' | // Parenthesis and hashtag. @@ -247,9 +247,8 @@ impl<'s> Tokens<'s> { } c if c.is_whitespace() => NodeKind::Linebreak, _ => NodeKind::Text("\\".into()), - } - } else { - NodeKind::Linebreak + }, + None => NodeKind::Linebreak, } } @@ -257,10 +256,9 @@ impl<'s> Tokens<'s> { fn hash(&mut self) -> NodeKind { if self.s.check_or(false, is_id_start) { let read = self.s.eat_while(is_id_continue); - if let Some(keyword) = keyword(read) { - keyword - } else { - NodeKind::Ident(read.into()) + match keyword(read) { + Some(keyword) => keyword, + None => NodeKind::Ident(read.into()), } } else { NodeKind::Text("#".into()) -- cgit v1.2.3 From 1c0ac793d2b9c403f1a8fa60a3748f4ff8623acb Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 31 Oct 2021 15:01:39 +0100 Subject: Slim `NodeKind` memory footprint --- src/parse/mod.rs | 39 +++-------- src/parse/resolve.rs | 11 +-- src/parse/tokens.rs | 184 ++++++++++++++++++++++++++++++++++----------------- 3 files changed, 136 insertions(+), 98 deletions(-) (limited to 'src/parse') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 0425f824..773f642c 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -87,18 +87,10 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { | NodeKind::NonBreakingSpace | NodeKind::Emph | NodeKind::Strong - | NodeKind::Linebreak => p.eat(), + | NodeKind::Linebreak + | NodeKind::Raw(_) => p.eat(), NodeKind::UnicodeEscape(u) => { - if !u.terminated { - p.convert(NodeKind::Error( - ErrorPosition::End, - "expected closing brace".into(), - )); - p.unsuccessful(); - return; - } - if u.character.is_none() { let src = p.peek_src(); p.convert(NodeKind::Error( @@ -112,18 +104,6 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { p.eat(); } - NodeKind::Raw(r) => { - if !r.terminated { - p.convert(NodeKind::Error( - ErrorPosition::End, - "expected backtick(s)".into(), - )); - p.unsuccessful(); - return; - } - - p.eat(); - } NodeKind::Eq if *at_start => heading(p), NodeKind::ListBullet if *at_start => list_node(p), @@ -159,6 +139,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { // Comments. NodeKind::LineComment | NodeKind::BlockComment => p.eat(), + NodeKind::Error(t, e) if t != &ErrorPosition::Full || e.contains(' ') => p.eat(), _ => { *at_start = false; @@ -338,6 +319,10 @@ fn primary(p: &mut Parser, atomic: bool) { Some(NodeKind::Import) => import_expr(p), Some(NodeKind::Include) => include_expr(p), + Some(NodeKind::Error(t, e)) if t != &ErrorPosition::Full || e.contains(' ') => { + p.eat(); + } + // Nothing. _ => { p.expected("expression"); @@ -363,13 +348,9 @@ fn literal(p: &mut Parser) -> bool { | NodeKind::Fraction(_) | NodeKind::Length(_, _) | NodeKind::Angle(_, _) - | NodeKind::Percentage(_) => p.eat(), - NodeKind::Str(s) => { - p.eat(); - if !s.terminated { - p.expected_at("quote"); - } - } + | NodeKind::Percentage(_) + | NodeKind::Str(_) => p.eat(), + _ => return false, } diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index 1b3089a6..8d4c04d4 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -46,12 +46,7 @@ pub fn resolve_hex(sequence: &str) -> Option { } /// Resolve the language tag and trims the raw text. -pub fn resolve_raw( - column: usize, - backticks: u8, - text: &str, - terminated: bool, -) -> RawToken { +pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawToken { if backticks > 1 { let (tag, inner) = split_at_lang_tag(text); let (text, block) = trim_and_split_raw(column, inner); @@ -59,7 +54,6 @@ pub fn resolve_raw( lang: Some(tag.into()), text: text.into(), backticks, - terminated, block, } } else { @@ -67,7 +61,6 @@ pub fn resolve_raw( lang: None, text: split_lines(text).join("\n").into(), backticks, - terminated, block: false, } } @@ -194,7 +187,7 @@ mod tests { text: &str, block: bool, ) { - let node = resolve_raw(column, backticks, raw, true); + let node = resolve_raw(column, backticks, raw); assert_eq!(node.lang.as_deref(), lang); assert_eq!(node.text, text); assert_eq!(node.block, block); diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index bfd9f3ed..8a480b02 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -5,6 +5,8 @@ use crate::source::SourceFile; use crate::syntax::*; use crate::util::EcoString; +use std::rc::Rc; + /// An iterator over the tokens of a string of source code. pub struct Tokens<'s> { source: &'s SourceFile, @@ -239,11 +241,18 @@ impl<'s> Tokens<'s> { self.s.eat_assert('u'); self.s.eat_assert('{'); let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into(); - NodeKind::UnicodeEscape(UnicodeEscapeToken { - character: resolve_hex(&sequence), - sequence, - terminated: self.s.eat_if('}') - }) + + if self.s.eat_if('}') { + NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken { + character: resolve_hex(&sequence), + sequence, + })) + } else { + NodeKind::Error( + ErrorPosition::End, + "expected closing brace".into(), + ) + } } c if c.is_whitespace() => NodeKind::Linebreak, _ => NodeKind::Text("\\".into()), @@ -307,13 +316,12 @@ impl<'s> Tokens<'s> { // Special case for empty inline block. if backticks == 2 { - return NodeKind::Raw(RawToken { + return NodeKind::Raw(Rc::new(RawToken { text: EcoString::new(), lang: None, backticks: 1, - terminated: true, block: false, - }); + })); } let start = self.s.index(); @@ -330,12 +338,26 @@ impl<'s> Tokens<'s> { let terminated = found == backticks; let end = self.s.index() - if terminated { found as usize } else { 0 }; - NodeKind::Raw(resolve_raw( - column, - backticks, - self.s.get(start .. end).into(), - terminated, - )) + if terminated { + NodeKind::Raw(Rc::new(resolve_raw( + column, + backticks, + self.s.get(start .. end).into(), + ))) + } else { + let remaining = backticks - found; + let noun = if remaining == 1 { "backtick" } else { "backticks" }; + + NodeKind::Error( + ErrorPosition::End, + if found == 0 { + format!("expected {} {}", remaining, noun) + } else { + format!("expected {} more {}", remaining, noun) + } + .into(), + ) + } } fn math(&mut self) -> NodeKind { @@ -368,11 +390,22 @@ impl<'s> Tokens<'s> { (true, true) => 2, }; - NodeKind::Math(MathToken { - formula: self.s.get(start .. end).into(), - display, - terminated, - }) + if terminated { + NodeKind::Math(Rc::new(MathToken { + formula: self.s.get(start .. end).into(), + display, + })) + } else { + NodeKind::Error( + ErrorPosition::End, + if display { + "expected closing dollar sign" + } else { + "expected display math closure sequence" + } + .into(), + ) + } } fn ident(&mut self, start: usize) -> NodeKind { @@ -444,17 +477,19 @@ impl<'s> Tokens<'s> { fn string(&mut self) -> NodeKind { let mut escaped = false; - NodeKind::Str(StrToken { - string: resolve_string(self.s.eat_until(|c| { - if c == '"' && !escaped { - true - } else { - escaped = c == '\\' && !escaped; - false - } - })), - terminated: self.s.eat_if('"'), - }) + let string = resolve_string(self.s.eat_until(|c| { + if c == '"' && !escaped { + true + } else { + escaped = c == '\\' && !escaped; + false + } + })); + if self.s.eat_if('"') { + NodeKind::Str(StrToken { string }) + } else { + NodeKind::Error(ErrorPosition::End, "expected quote".into()) + } } fn line_comment(&mut self) -> NodeKind { @@ -526,39 +561,68 @@ mod tests { use TokenMode::{Code, Markup}; fn UnicodeEscape(sequence: &str, terminated: bool) -> NodeKind { - NodeKind::UnicodeEscape(UnicodeEscapeToken { - character: resolve_hex(sequence), - sequence: sequence.into(), - terminated, - }) + if terminated { + NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken { + character: resolve_hex(sequence), + sequence: sequence.into(), + })) + } else { + NodeKind::Error(ErrorPosition::End, "expected closing brace".into()) + } } fn Raw( text: &str, lang: Option<&str>, - backticks: u8, - terminated: bool, + backticks_left: u8, + backticks_right: u8, block: bool, ) -> NodeKind { - NodeKind::Raw(RawToken { - text: text.into(), - lang: lang.map(Into::into), - backticks, - terminated, - block, - }) + if backticks_left == backticks_right { + NodeKind::Raw(Rc::new(RawToken { + text: text.into(), + lang: lang.map(Into::into), + backticks: backticks_left, + block, + })) + } else { + let remaining = backticks_left - backticks_right; + let noun = if remaining == 1 { "backtick" } else { "backticks" }; + + NodeKind::Error( + ErrorPosition::End, + if backticks_right == 0 { + format!("expected {} {}", remaining, noun) + } else { + format!("expected {} more {}", remaining, noun) + } + .into(), + ) + } } fn Math(formula: &str, display: bool, terminated: bool) -> NodeKind { - NodeKind::Math(MathToken { - formula: formula.into(), - display, - terminated, - }) + if terminated { + NodeKind::Math(Rc::new(MathToken { formula: formula.into(), display })) + } else { + NodeKind::Error( + ErrorPosition::End, + if display { + "expected closing dollar sign" + } else { + "expected display math closure sequence" + } + .into(), + ) + } } fn Str(string: &str, terminated: bool) -> NodeKind { - NodeKind::Str(StrToken { string: string.into(), terminated }) + if terminated { + NodeKind::Str(StrToken { string: string.into() }) + } else { + NodeKind::Error(ErrorPosition::End, "expected quote".into()) + } } fn Text(string: &str) -> NodeKind { @@ -844,22 +908,22 @@ mod tests { #[test] fn test_tokenize_raw_blocks() { // Test basic raw block. - t!(Markup: "``" => Raw("", None, 1, true, false)); - t!(Markup: "`raw`" => Raw("raw", None, 1, true, false)); - t!(Markup[""]: "`]" => Raw("]", None, 1, false, false)); + t!(Markup: "``" => Raw("", None, 1, 1, false)); + t!(Markup: "`raw`" => Raw("raw", None, 1, 1, false)); + t!(Markup[""]: "`]" => Raw("]", None, 1, 0, false)); // Test special symbols in raw block. - t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, true, false)); - t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, true, false), Raw(" ", None, 1, false, false)); + t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, 1, false)); + t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, 1, false), Raw(" ", None, 1, 0, false)); // Test separated closing backticks. - t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, true, false)); + t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, 3, false)); // Test more backticks. - t!(Markup: "``nope``" => Raw("", None, 1, true, false), Text("nope"), Raw("", None, 1, true, false)); - t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, true, false)); - t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, false, false)); - t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, true, false), Raw("", None, 1, true, false)); + t!(Markup: "``nope``" => Raw("", None, 1, 1, false), Text("nope"), Raw("", None, 1, 1, false)); + t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, 4, false)); + t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, 0, false)); + t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, 4, false), Raw("", None, 1, 1, false)); } #[test] -- cgit v1.2.3 From c569e14c07902b23b7b3e29df4076cea1f4496cf Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 31 Oct 2021 16:22:33 +0100 Subject: Improve error handling --- src/parse/mod.rs | 87 ++++++++++++++++++-------------------------- src/parse/tokens.rs | 102 +++++++++++++++++++++++----------------------------- 2 files changed, 78 insertions(+), 111 deletions(-) (limited to 'src/parse') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 773f642c..ce992834 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -138,8 +138,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { NodeKind::LeftBracket => template(p), // Comments. - NodeKind::LineComment | NodeKind::BlockComment => p.eat(), - NodeKind::Error(t, e) if t != &ErrorPosition::Full || e.contains(' ') => p.eat(), + NodeKind::LineComment | NodeKind::BlockComment | NodeKind::Error(_, _) => p.eat(), _ => { *at_start = false; @@ -319,7 +318,7 @@ fn primary(p: &mut Parser, atomic: bool) { Some(NodeKind::Import) => import_expr(p), Some(NodeKind::Include) => include_expr(p), - Some(NodeKind::Error(t, e)) if t != &ErrorPosition::Full || e.contains(' ') => { + Some(NodeKind::Error(_, _)) => { p.eat(); } @@ -333,28 +332,26 @@ fn primary(p: &mut Parser, atomic: bool) { /// Parse a literal. fn literal(p: &mut Parser) -> bool { - let peeked = match p.peek() { - Some(x) => x.clone(), - None => return false, - }; - - match peeked { + match p.peek() { // Basic values. - NodeKind::None - | NodeKind::Auto - | NodeKind::Int(_) - | NodeKind::Float(_) - | NodeKind::Bool(_) - | NodeKind::Fraction(_) - | NodeKind::Length(_, _) - | NodeKind::Angle(_, _) - | NodeKind::Percentage(_) - | NodeKind::Str(_) => p.eat(), + Some( + NodeKind::None + | NodeKind::Auto + | NodeKind::Int(_) + | NodeKind::Float(_) + | NodeKind::Bool(_) + | NodeKind::Fraction(_) + | NodeKind::Length(_, _) + | NodeKind::Angle(_, _) + | NodeKind::Percentage(_) + | NodeKind::Str(_), + ) => { + p.eat(); + true + } - _ => return false, + _ => false, } - - true } /// Parse something that starts with a parenthesis, which can be either of: @@ -395,11 +392,11 @@ fn parenthesized(p: &mut Parser) { // Find out which kind of collection this is. match kind { CollectionKind::Group => p.end(NodeKind::Group), - CollectionKind::PositionalCollection => { + CollectionKind::Positional => { p.lift(); array(p, token_count); } - CollectionKind::NamedCollection => { + CollectionKind::Named => { p.lift(); dict(p, token_count); } @@ -413,9 +410,9 @@ enum CollectionKind { Group, /// The collection starts with a positional and has more items or a trailing /// comma. - PositionalCollection, + Positional, /// The collection starts with a named item. - NamedCollection, + Named, } /// Parse a collection. @@ -424,20 +421,19 @@ enum CollectionKind { /// commas. fn collection(p: &mut Parser) -> (CollectionKind, usize) { let mut items = 0; - let mut kind = CollectionKind::PositionalCollection; - let mut seen_spread = false; + let mut kind = CollectionKind::Positional; let mut has_comma = false; let mut missing_coma = None; while !p.eof() { let item_kind = item(p); if p.success() { - if items == 0 && item_kind == CollectionItemKind::Named { - kind = CollectionKind::NamedCollection; + if items == 0 && item_kind == NodeKind::Named { + kind = CollectionKind::Named; } - if item_kind == CollectionItemKind::ParameterSink { - seen_spread = true; + if item_kind == NodeKind::ParameterSink { + has_comma = true; } items += 1; @@ -458,42 +454,27 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { } } - if !has_comma - && items == 1 - && !seen_spread - && kind == CollectionKind::PositionalCollection - { + if !has_comma && items == 1 && kind == CollectionKind::Positional { kind = CollectionKind::Group; } (kind, items) } -/// What kind of item is this? -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -enum CollectionItemKind { - /// A named item. - Named, - /// An unnamed item. - Unnamed, - /// A parameter sink. - ParameterSink, -} - /// Parse an expression or a named pair. Returns if this is a named pair. -fn item(p: &mut Parser) -> CollectionItemKind { +fn item(p: &mut Parser) -> NodeKind { p.start(); if p.eat_if(&NodeKind::Dots) { expr(p); p.end_or_abort(NodeKind::ParameterSink); - return CollectionItemKind::ParameterSink; + return NodeKind::ParameterSink; } expr(p); if p.may_lift_abort() { - return CollectionItemKind::Unnamed; + return NodeKind::None; } if p.eat_if(&NodeKind::Colon) { @@ -512,10 +493,10 @@ fn item(p: &mut Parser) -> CollectionItemKind { p.unsuccessful(); } - CollectionItemKind::Named + NodeKind::Named } else { p.lift(); - CollectionItemKind::Unnamed + p.last_child().unwrap().kind().clone() } } diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 8a480b02..7c500ce7 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -91,7 +91,7 @@ impl<'s> Iterator for Tokens<'s> { '/' if self.s.eat_if('*') => self.block_comment(), '/' if !self.maybe_in_url() && self.s.eat_if('/') => self.line_comment(), '*' if self.s.eat_if('/') => { - NodeKind::Error(ErrorPosition::Full, self.s.eaten_from(start).into()) + NodeKind::Unknown(self.s.eaten_from(start).into()) } // Other things. @@ -173,7 +173,7 @@ impl<'s> Tokens<'s> { // Strings. '"' => self.string(), - _ => NodeKind::Error(ErrorPosition::Full, self.s.eaten_from(start).into()), + _ => NodeKind::Unknown(self.s.eaten_from(start).into()), } } @@ -398,10 +398,10 @@ impl<'s> Tokens<'s> { } else { NodeKind::Error( ErrorPosition::End, - if display { + if !display || (!escaped && dollar) { "expected closing dollar sign" } else { - "expected display math closure sequence" + "expected closing bracket and dollar sign" } .into(), ) @@ -466,11 +466,11 @@ impl<'s> Tokens<'s> { "deg" => NodeKind::Angle(f, AngularUnit::Deg), "rad" => NodeKind::Angle(f, AngularUnit::Rad), _ => { - return NodeKind::Error(ErrorPosition::Full, all.into()); + return NodeKind::Unknown(all.into()); } } } else { - NodeKind::Error(ErrorPosition::Full, all.into()) + NodeKind::Unknown(all.into()) } } @@ -575,45 +575,31 @@ mod tests { text: &str, lang: Option<&str>, backticks_left: u8, - backticks_right: u8, + err_msg: Option<&str>, block: bool, ) -> NodeKind { - if backticks_left == backticks_right { - NodeKind::Raw(Rc::new(RawToken { + match err_msg { + None => NodeKind::Raw(Rc::new(RawToken { text: text.into(), lang: lang.map(Into::into), backticks: backticks_left, block, - })) - } else { - let remaining = backticks_left - backticks_right; - let noun = if remaining == 1 { "backtick" } else { "backticks" }; - - NodeKind::Error( - ErrorPosition::End, - if backticks_right == 0 { - format!("expected {} {}", remaining, noun) - } else { - format!("expected {} more {}", remaining, noun) - } - .into(), - ) + })), + Some(msg) => { + NodeKind::Error(ErrorPosition::End, format!("expected {}", msg).into()) + } } } - fn Math(formula: &str, display: bool, terminated: bool) -> NodeKind { - if terminated { - NodeKind::Math(Rc::new(MathToken { formula: formula.into(), display })) - } else { - NodeKind::Error( + fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind { + match err_msg { + None => { + NodeKind::Math(Rc::new(MathToken { formula: formula.into(), display })) + } + Some(msg) => NodeKind::Error( ErrorPosition::End, - if display { - "expected closing dollar sign" - } else { - "expected display math closure sequence" - } - .into(), - ) + format!("expected closing {}", msg).into(), + ), } } @@ -634,7 +620,7 @@ mod tests { } fn Invalid(invalid: &str) -> NodeKind { - NodeKind::Error(ErrorPosition::Full, invalid.into()) + NodeKind::Unknown(invalid.into()) } /// Building blocks for suffix testing. @@ -687,7 +673,7 @@ mod tests { ('/', None, "//", LineComment), ('/', None, "/**/", BlockComment), ('/', Some(Markup), "*", Strong), - ('/', Some(Markup), "$ $", Math(" ", false, true)), + ('/', Some(Markup), "$ $", Math(" ", false, None)), ('/', Some(Markup), r"\\", Text("\\")), ('/', Some(Markup), "#let", Let), ('/', Some(Code), "(", LeftParen), @@ -908,42 +894,42 @@ mod tests { #[test] fn test_tokenize_raw_blocks() { // Test basic raw block. - t!(Markup: "``" => Raw("", None, 1, 1, false)); - t!(Markup: "`raw`" => Raw("raw", None, 1, 1, false)); - t!(Markup[""]: "`]" => Raw("]", None, 1, 0, false)); + t!(Markup: "``" => Raw("", None, 1, None, false)); + t!(Markup: "`raw`" => Raw("raw", None, 1, None, false)); + t!(Markup[""]: "`]" => Raw("]", None, 1, Some("1 backtick"), false)); // Test special symbols in raw block. - t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, 1, false)); - t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, 1, false), Raw(" ", None, 1, 0, false)); + t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, None, false)); + t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, None, false), Raw(" ", None, 1, Some("1 backtick"), false)); // Test separated closing backticks. - t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, 3, false)); + t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, None, false)); // Test more backticks. - t!(Markup: "``nope``" => Raw("", None, 1, 1, false), Text("nope"), Raw("", None, 1, 1, false)); - t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, 4, false)); - t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, 0, false)); - t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, 4, false), Raw("", None, 1, 1, false)); + t!(Markup: "``nope``" => Raw("", None, 1, None, false), Text("nope"), Raw("", None, 1, None, false)); + t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, None, false)); + t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, Some("5 backticks"), false)); + t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, None, false), Raw("", None, 1, None, false)); } #[test] fn test_tokenize_math_formulas() { // Test basic formula. - t!(Markup: "$$" => Math("", false, true)); - t!(Markup: "$x$" => Math("x", false, true)); - t!(Markup: r"$\\$" => Math(r"\\", false, true)); - t!(Markup: "$[x + y]$" => Math("x + y", true, true)); - t!(Markup: r"$[\\]$" => Math(r"\\", true, true)); + t!(Markup: "$$" => Math("", false, None)); + t!(Markup: "$x$" => Math("x", false, None)); + t!(Markup: r"$\\$" => Math(r"\\", false, None)); + t!(Markup: "$[x + y]$" => Math("x + y", true, None)); + t!(Markup: r"$[\\]$" => Math(r"\\", true, None)); // Test unterminated. - t!(Markup[""]: "$x" => Math("x", false, false)); - t!(Markup[""]: "$[x" => Math("x", true, false)); - t!(Markup[""]: "$[x]\n$" => Math("x]\n$", true, false)); + t!(Markup[""]: "$x" => Math("x", false, Some("dollar sign"))); + t!(Markup[""]: "$[x" => Math("x", true, Some("bracket and dollar sign"))); + t!(Markup[""]: "$[x]\n$" => Math("x]\n$", true, Some("bracket and dollar sign"))); // Test escape sequences. - t!(Markup: r"$\$x$" => Math(r"\$x", false, true)); - t!(Markup: r"$[\\\]$]$" => Math(r"\\\]$", true, true)); - t!(Markup[""]: r"$[ ]\\$" => Math(r" ]\\$", true, false)); + t!(Markup: r"$\$x$" => Math(r"\$x", false, None)); + t!(Markup: r"$[\\\]$]$" => Math(r"\\\]$", true, None)); + t!(Markup[""]: r"$[ ]\\$" => Math(r" ]\\$", true, Some("bracket and dollar sign"))); } #[test] -- cgit v1.2.3 From 2e7d359e59a45849f53eea6e022ca83295f5a6e7 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 31 Oct 2021 18:52:48 +0100 Subject: Unicode escape error moved to tokenizer --- src/parse/mod.rs | 36 +++++++-------------- src/parse/parser.rs | 24 ++++++++++++-- src/parse/tokens.rs | 92 ++++++++++++++++++++++++----------------------------- 3 files changed, 74 insertions(+), 78 deletions(-) (limited to 'src/parse') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index ce992834..8775e8a1 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -54,7 +54,10 @@ where while !p.eof() && f(p) { markup_node(p, &mut at_start); if let Some(node) = p.last_child() { - at_start &= matches!(node.kind(), &NodeKind::Space(_) | &NodeKind::Parbreak | &NodeKind::LineComment | &NodeKind::BlockComment); + at_start &= matches!(node.kind(), + &NodeKind::Space(_) | &NodeKind::Parbreak | + &NodeKind::LineComment | &NodeKind::BlockComment + ); } } @@ -88,22 +91,8 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { | NodeKind::Emph | NodeKind::Strong | NodeKind::Linebreak - | NodeKind::Raw(_) => p.eat(), - - NodeKind::UnicodeEscape(u) => { - if u.character.is_none() { - let src = p.peek_src(); - p.convert(NodeKind::Error( - ErrorPosition::Full, - "invalid unicode escape sequence".into(), - )); - p.start(); - p.end(NodeKind::Text(src.into())); - return; - } - - p.eat(); - } + | NodeKind::Raw(_) + | NodeKind::UnicodeEscape(_) => p.eat(), NodeKind::Eq if *at_start => heading(p), NodeKind::ListBullet if *at_start => list_node(p), @@ -503,9 +492,8 @@ fn item(p: &mut Parser) -> NodeKind { /// Convert a collection into an array, producing errors for anything other than /// expressions. fn array(p: &mut Parser, items: usize) { - p.start_with(items); p.filter_children( - 0, + p.child_count() - items, |x| match x.kind() { NodeKind::Named | NodeKind::ParameterSink => false, _ => true, @@ -522,15 +510,14 @@ fn array(p: &mut Parser, items: usize) { }, ); - p.end(NodeKind::Array) + p.convert_with(items, NodeKind::Array); } /// Convert a collection into a dictionary, producing errors for anything other /// than named pairs. fn dict(p: &mut Parser, items: usize) { - p.start_with(items); p.filter_children( - 0, + p.child_count() - items, |x| { x.kind() == &NodeKind::Named || x.kind().is_parenthesis() @@ -547,7 +534,7 @@ fn dict(p: &mut Parser, items: usize) { ), }, ); - p.end(NodeKind::Dict); + p.convert_with(items, NodeKind::Dict); } /// Convert a collection into a list of parameters, producing errors for @@ -684,8 +671,7 @@ fn let_expr(p: &mut Parser) { return; } - p.start_with(p.child_count() - offset); - p.end(NodeKind::Closure) + p.convert_with(p.child_count() - offset, NodeKind::Closure); } } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index e6fcc1ae..240de43d 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -186,9 +186,27 @@ impl<'s> Parser<'s> { } pub fn convert(&mut self, kind: NodeKind) { - self.start(); - self.eat(); - self.end(kind); + let len = self.tokens.index() - self.next_start; + + self.children.push( + GreenNode::with_child( + kind, + len, + GreenData::new(self.next.clone().unwrap(), len), + ) + .into(), + ); + self.fast_forward(); + self.success = true; + } + + pub fn convert_with(&mut self, preserve: usize, kind: NodeKind) { + let preserved: Vec<_> = + self.children.drain(self.children.len() - preserve ..).collect(); + let len = preserved.iter().map(|c| c.len()).sum(); + self.children + .push(GreenNode::with_children(kind, len, preserved).into()); + self.success = true; } /// End the current node and undo its existence, inling all accumulated diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 7c500ce7..1d2e32ec 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -200,7 +200,7 @@ impl<'s> Tokens<'s> { TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace()) }); - NodeKind::Text(resolve_string(self.s.eaten_from(start))) + NodeKind::Text(self.s.eaten_from(start).into()) } fn whitespace(&mut self) -> NodeKind { @@ -243,10 +243,16 @@ impl<'s> Tokens<'s> { let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into(); if self.s.eat_if('}') { - NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken { - character: resolve_hex(&sequence), - sequence, - })) + if let Some(character) = resolve_hex(&sequence) { + NodeKind::UnicodeEscape(UnicodeEscapeToken { + character, + }) + } else { + NodeKind::Error( + ErrorPosition::Full, + "invalid unicode escape sequence".into(), + ) + } } else { NodeKind::Error( ErrorPosition::End, @@ -560,35 +566,21 @@ mod tests { use Option::None; use TokenMode::{Code, Markup}; - fn UnicodeEscape(sequence: &str, terminated: bool) -> NodeKind { - if terminated { - NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken { - character: resolve_hex(sequence), - sequence: sequence.into(), - })) - } else { - NodeKind::Error(ErrorPosition::End, "expected closing brace".into()) - } + fn UnicodeEscape(character: char) -> NodeKind { + NodeKind::UnicodeEscape(UnicodeEscapeToken { character }) } - fn Raw( - text: &str, - lang: Option<&str>, - backticks_left: u8, - err_msg: Option<&str>, - block: bool, - ) -> NodeKind { - match err_msg { - None => NodeKind::Raw(Rc::new(RawToken { - text: text.into(), - lang: lang.map(Into::into), - backticks: backticks_left, - block, - })), - Some(msg) => { - NodeKind::Error(ErrorPosition::End, format!("expected {}", msg).into()) - } - } + fn Error(pos: ErrorPosition, message: &str) -> NodeKind { + NodeKind::Error(pos, message.into()) + } + + fn Raw(text: &str, lang: Option<&str>, backticks_left: u8, block: bool) -> NodeKind { + NodeKind::Raw(Rc::new(RawToken { + text: text.into(), + lang: lang.map(Into::into), + backticks: backticks_left, + block, + })) } fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind { @@ -795,16 +787,16 @@ mod tests { t!(Markup[" /"]: r#"\""# => Text(r"\"), Text("\"")); // Test basic unicode escapes. - t!(Markup: r"\u{}" => UnicodeEscape("", true)); - t!(Markup: r"\u{2603}" => UnicodeEscape("2603", true)); - t!(Markup: r"\u{P}" => UnicodeEscape("P", true)); + t!(Markup: r"\u{}" => Error(ErrorPosition::Full, "invalid unicode escape sequence")); + t!(Markup: r"\u{2603}" => UnicodeEscape('☃')); + t!(Markup: r"\u{P}" => Error(ErrorPosition::Full, "invalid unicode escape sequence")); // Test unclosed unicode escapes. - t!(Markup[" /"]: r"\u{" => UnicodeEscape("", false)); - t!(Markup[" /"]: r"\u{1" => UnicodeEscape("1", false)); - t!(Markup[" /"]: r"\u{26A4" => UnicodeEscape("26A4", false)); - t!(Markup[" /"]: r"\u{1Q3P" => UnicodeEscape("1Q3P", false)); - t!(Markup: r"\u{1🏕}" => UnicodeEscape("1", false), Text("🏕"), RightBrace); + t!(Markup[" /"]: r"\u{" => Error(ErrorPosition::End, "expected closing brace")); + t!(Markup[" /"]: r"\u{1" => Error(ErrorPosition::End, "expected closing brace")); + t!(Markup[" /"]: r"\u{26A4" => Error(ErrorPosition::End, "expected closing brace")); + t!(Markup[" /"]: r"\u{1Q3P" => Error(ErrorPosition::End, "expected closing brace")); + t!(Markup: r"\u{1🏕}" => Error(ErrorPosition::End, "expected closing brace"), Text("🏕"), RightBrace); } #[test] @@ -894,22 +886,22 @@ mod tests { #[test] fn test_tokenize_raw_blocks() { // Test basic raw block. - t!(Markup: "``" => Raw("", None, 1, None, false)); - t!(Markup: "`raw`" => Raw("raw", None, 1, None, false)); - t!(Markup[""]: "`]" => Raw("]", None, 1, Some("1 backtick"), false)); + t!(Markup: "``" => Raw("", None, 1, false)); + t!(Markup: "`raw`" => Raw("raw", None, 1, false)); + t!(Markup[""]: "`]" => Error(ErrorPosition::End, "expected 1 backtick")); // Test special symbols in raw block. - t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, None, false)); - t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, None, false), Raw(" ", None, 1, Some("1 backtick"), false)); + t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, false)); + t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, false), Error(ErrorPosition::End, "expected 1 backtick")); // Test separated closing backticks. - t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, None, false)); + t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, false)); // Test more backticks. - t!(Markup: "``nope``" => Raw("", None, 1, None, false), Text("nope"), Raw("", None, 1, None, false)); - t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, None, false)); - t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, Some("5 backticks"), false)); - t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, None, false), Raw("", None, 1, None, false)); + t!(Markup: "``nope``" => Raw("", None, 1, false), Text("nope"), Raw("", None, 1, false)); + t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, false)); + t!(Markup[""]: "`````👩‍🚀````noend" => Error(ErrorPosition::End, "expected 5 backticks")); + t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, false), Raw("", None, 1, false)); } #[test] -- cgit v1.2.3 From 7d34a548ccd14debe0668e23454e1ced70e485ec Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 1 Nov 2021 10:57:45 +0100 Subject: Reorganize syntax module --- src/parse/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/parse') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 8775e8a1..22288d01 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -520,7 +520,7 @@ fn dict(p: &mut Parser, items: usize) { p.child_count() - items, |x| { x.kind() == &NodeKind::Named - || x.kind().is_parenthesis() + || x.kind().is_paren() || x.kind() == &NodeKind::Comma || x.kind() == &NodeKind::Colon }, @@ -550,7 +550,7 @@ fn params(p: &mut Parser, count: usize, allow_parens: bool) { ), _ => false, } - || (allow_parens && x.kind().is_parenthesis()), + || (allow_parens && x.kind().is_paren()), |_| (ErrorPosition::Full, "expected identifier".into()), ); } -- cgit v1.2.3 From 49fb3cd4e2a5d6997ad4046d3514f154d8c866dd Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 1 Nov 2021 13:03:18 +0100 Subject: Code Review: Life is Like a Box of Iterators --- src/parse/mod.rs | 3 +-- src/parse/parser.rs | 24 +++++++++++------------- src/parse/resolve.rs | 8 ++++---- src/parse/scanner.rs | 10 ++++++++++ src/parse/tokens.rs | 30 ++++++++++++------------------ 5 files changed, 38 insertions(+), 37 deletions(-) (limited to 'src/parse') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 22288d01..c6def4dc 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -12,12 +12,11 @@ pub use tokens::*; use std::rc::Rc; -use crate::source::SourceFile; use crate::syntax::*; use crate::util::EcoString; /// Parse a source file. -pub fn parse(source: &SourceFile) -> Rc { +pub fn parse(source: &str) -> Rc { let mut p = Parser::new(source); markup(&mut p); p.finish() diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 240de43d..374e7c09 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,15 +1,14 @@ use std::ops::Range; use std::rc::Rc; -use super::{TokenMode, Tokens}; -use crate::source::{SourceFile, SourceId}; +use super::{is_newline, TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; /// A convenient token-based parser. pub struct Parser<'s> { /// The parsed file. - source: &'s SourceFile, + src: &'s str, /// An iterator over the source tokens. tokens: Tokens<'s>, /// The stack of open groups. @@ -61,11 +60,11 @@ pub enum Group { impl<'s> Parser<'s> { /// Create a new parser for the source string. - pub fn new(source: &'s SourceFile) -> Self { - let mut tokens = Tokens::new(source, TokenMode::Markup); + pub fn new(src: &'s str) -> Self { + let mut tokens = Tokens::new(src, TokenMode::Markup); let next = tokens.next(); Self { - source, + src, tokens, groups: vec![], next: next.clone(), @@ -78,11 +77,6 @@ impl<'s> Parser<'s> { } } - /// The id of the parsed source file. - pub fn id(&self) -> SourceId { - self.source.id() - } - /// Start a nested node. /// /// Each start call has to be matched with a call to `end`, @@ -366,12 +360,16 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.source.byte_to_column(index).unwrap() + self.src[.. index] + .chars() + .rev() + .take_while(|&c| !is_newline(c)) + .count() } /// Slice out part of the source string. pub fn get(&self, range: Range) -> &'s str { - self.source.get(range).unwrap() + self.src.get(range).unwrap() } /// Continue parsing in a group. diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index 8d4c04d4..3fab98a4 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -1,5 +1,5 @@ use super::{is_newline, Scanner}; -use crate::syntax::RawToken; +use crate::syntax::RawData; use crate::util::EcoString; /// Resolve all escape sequences in a string. @@ -46,18 +46,18 @@ pub fn resolve_hex(sequence: &str) -> Option { } /// Resolve the language tag and trims the raw text. -pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawToken { +pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawData { if backticks > 1 { let (tag, inner) = split_at_lang_tag(text); let (text, block) = trim_and_split_raw(column, inner); - RawToken { + RawData { lang: Some(tag.into()), text: text.into(), backticks, block, } } else { - RawToken { + RawData { lang: None, text: split_lines(text).join("\n").into(), backticks, diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index 8e3e4278..edf28e17 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -106,6 +106,16 @@ impl<'s> Scanner<'s> { self.index } + /// The column index of a given index in the source string. + #[inline] + pub fn column(&self, index: usize) -> usize { + self.src[.. index] + .chars() + .rev() + .take_while(|&c| !is_newline(c)) + .count() + } + /// Jump to an index in the source string. #[inline] pub fn jump(&mut self, index: usize) { diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 1d2e32ec..ef2678d4 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -1,7 +1,6 @@ use super::{is_newline, resolve_raw, Scanner}; use crate::geom::{AngularUnit, LengthUnit}; use crate::parse::resolve::{resolve_hex, resolve_string}; -use crate::source::SourceFile; use crate::syntax::*; use crate::util::EcoString; @@ -9,7 +8,6 @@ use std::rc::Rc; /// An iterator over the tokens of a string of source code. pub struct Tokens<'s> { - source: &'s SourceFile, s: Scanner<'s>, mode: TokenMode, } @@ -26,12 +24,8 @@ pub enum TokenMode { impl<'s> Tokens<'s> { /// Create a new token iterator with the given mode. #[inline] - pub fn new(source: &'s SourceFile, mode: TokenMode) -> Self { - Self { - s: Scanner::new(source.src()), - source, - mode, - } + pub fn new(source: &'s str, mode: TokenMode) -> Self { + Self { s: Scanner::new(source), mode } } /// Get the current token mode. @@ -244,7 +238,7 @@ impl<'s> Tokens<'s> { if self.s.eat_if('}') { if let Some(character) = resolve_hex(&sequence) { - NodeKind::UnicodeEscape(UnicodeEscapeToken { + NodeKind::UnicodeEscape(UnicodeEscapeData { character, }) } else { @@ -314,7 +308,7 @@ impl<'s> Tokens<'s> { } fn raw(&mut self) -> NodeKind { - let column = self.source.byte_to_column(self.s.index() - 1).unwrap(); + let column = self.s.column(self.s.index() - 1); let mut backticks = 1; while self.s.eat_if('`') && backticks < u8::MAX { backticks += 1; @@ -322,7 +316,7 @@ impl<'s> Tokens<'s> { // Special case for empty inline block. if backticks == 2 { - return NodeKind::Raw(Rc::new(RawToken { + return NodeKind::Raw(Rc::new(RawData { text: EcoString::new(), lang: None, backticks: 1, @@ -397,7 +391,7 @@ impl<'s> Tokens<'s> { }; if terminated { - NodeKind::Math(Rc::new(MathToken { + NodeKind::Math(Rc::new(MathData { formula: self.s.get(start .. end).into(), display, })) @@ -492,7 +486,7 @@ impl<'s> Tokens<'s> { } })); if self.s.eat_if('"') { - NodeKind::Str(StrToken { string }) + NodeKind::Str(StrData { string }) } else { NodeKind::Error(ErrorPosition::End, "expected quote".into()) } @@ -567,7 +561,7 @@ mod tests { use TokenMode::{Code, Markup}; fn UnicodeEscape(character: char) -> NodeKind { - NodeKind::UnicodeEscape(UnicodeEscapeToken { character }) + NodeKind::UnicodeEscape(UnicodeEscapeData { character }) } fn Error(pos: ErrorPosition, message: &str) -> NodeKind { @@ -575,7 +569,7 @@ mod tests { } fn Raw(text: &str, lang: Option<&str>, backticks_left: u8, block: bool) -> NodeKind { - NodeKind::Raw(Rc::new(RawToken { + NodeKind::Raw(Rc::new(RawData { text: text.into(), lang: lang.map(Into::into), backticks: backticks_left, @@ -586,7 +580,7 @@ mod tests { fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind { match err_msg { None => { - NodeKind::Math(Rc::new(MathToken { formula: formula.into(), display })) + NodeKind::Math(Rc::new(MathData { formula: formula.into(), display })) } Some(msg) => NodeKind::Error( ErrorPosition::End, @@ -597,7 +591,7 @@ mod tests { fn Str(string: &str, terminated: bool) -> NodeKind { if terminated { - NodeKind::Str(StrToken { string: string.into() }) + NodeKind::Str(StrData { string: string.into() }) } else { NodeKind::Error(ErrorPosition::End, "expected quote".into()) } @@ -687,7 +681,7 @@ mod tests { }}; (@$mode:ident: $src:expr => $($token:expr),*) => {{ let src = $src; - let found = Tokens::new(&SourceFile::detached(src.clone()), $mode).collect::>(); + let found = Tokens::new(&src, $mode).collect::>(); let expected = vec![$($token.clone()),*]; check(&src, found, expected); }}; -- cgit v1.2.3 From 42afb27cef5540535420fb6d8d9d2fcda7300a47 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 1 Nov 2021 13:45:33 +0100 Subject: Add documentation --- src/parse/parser.rs | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'src/parse') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 374e7c09..8c68d630 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -112,10 +112,14 @@ impl<'s> Parser<'s> { } } + /// Return the a child from the current stack frame specified by its + /// non-trivia index from the back. pub fn child(&self, child: usize) -> Option<&Green> { self.node_index_from_back(child).map(|i| &self.children[i]) } + /// Map a non-trivia index from the back of the current stack frame to a + /// normal index. fn node_index_from_back(&self, child: usize) -> Option { let len = self.children.len(); let code = self.tokens.mode() == TokenMode::Code; @@ -172,6 +176,8 @@ impl<'s> Parser<'s> { (stack_offset, diff) } + /// Wrap a specified node in the current stack frame (indexed from the back, + /// not including trivia). pub fn wrap(&mut self, index: usize, kind: NodeKind) { let index = self.node_index_from_back(index).unwrap(); let child = std::mem::take(&mut self.children[index]); @@ -179,6 +185,7 @@ impl<'s> Parser<'s> { self.children[index] = item.into(); } + /// Eat and wrap the next token. pub fn convert(&mut self, kind: NodeKind) { let len = self.tokens.index() - self.next_start; @@ -194,9 +201,11 @@ impl<'s> Parser<'s> { self.success = true; } - pub fn convert_with(&mut self, preserve: usize, kind: NodeKind) { + /// Wrap the last `amount` children in the current stack frame with a new + /// node. + pub fn convert_with(&mut self, amount: usize, kind: NodeKind) { let preserved: Vec<_> = - self.children.drain(self.children.len() - preserve ..).collect(); + self.children.drain(self.children.len() - amount ..).collect(); let len = preserved.iter().map(|c| c.len()).sum(); self.children .push(GreenNode::with_children(kind, len, preserved).into()); @@ -219,6 +228,8 @@ impl<'s> Parser<'s> { self.success = false; } + /// This function [`Self::lift`]s if the last operation was unsuccessful and + /// returns whether it did. pub fn may_lift_abort(&mut self) -> bool { if !self.success { self.lift(); @@ -229,6 +240,8 @@ impl<'s> Parser<'s> { } } + /// This function [`Self::end`]s if the last operation was unsuccessful and + /// returns whether it did. pub fn may_end_abort(&mut self, kind: NodeKind) -> bool { if !self.success { self.end(kind); @@ -251,6 +264,7 @@ impl<'s> Parser<'s> { } } + /// End the parsing process and return the last child. pub fn finish(&mut self) -> Rc { match self.children.pop().unwrap() { Green::Node(n) => n, @@ -263,6 +277,7 @@ impl<'s> Parser<'s> { self.peek().is_none() } + /// Consume the next token and return its kind. fn eat_peeked(&mut self) -> Option { let token = self.peek()?.clone(); self.eat(); @@ -490,6 +505,8 @@ impl<'s> Parser<'s> { } } + /// Returns whether the given type can be skipped over given the current + /// newline mode. pub fn skip_type_ext(token: &NodeKind, stop_at_newline: bool) -> bool { match token { NodeKind::Space(n) => n < &1 || !stop_at_newline, @@ -499,11 +516,12 @@ impl<'s> Parser<'s> { } } + /// Returns whether the given type can be skipped over. fn skip_type(&self, token: &NodeKind) -> bool { Self::skip_type_ext(token, self.stop_at_newline()) } - /// Move to the next token. + /// Consume the next token. pub fn eat(&mut self) { self.children.push( GreenData::new( @@ -516,6 +534,7 @@ impl<'s> Parser<'s> { self.fast_forward(); } + /// Move to the next token. pub fn fast_forward(&mut self) { if !self.next.as_ref().map_or(false, |x| self.skip_type(x)) { self.prev_end = self.tokens.index().into(); @@ -567,20 +586,24 @@ impl<'s> Parser<'s> { self.groups.iter().any(|g| g.kind == kind) } + /// Returns the last child of the current stack frame. pub fn last_child(&self) -> Option<&Green> { self.children.last() } + /// Whether the last operation was successful. pub fn success(&mut self) -> bool { let s = self.success; self.success = true; s } + /// Declare the last operation as unsuccessful. pub fn unsuccessful(&mut self) { self.success = false; } + /// Amount of children in the current stack frame. pub fn child_count(&self) -> usize { self.children.len() } -- cgit v1.2.3 From 65fac0e57c9852eb2131aa06c0bac43b70bfbfbc Mon Sep 17 00:00:00 2001 From: Laurenz Date: Tue, 2 Nov 2021 12:13:45 +0100 Subject: Refactoring Co-Authored-By: Martin --- src/parse/mod.rs | 3 +- src/parse/parser.rs | 8 +--- src/parse/scanner.rs | 49 ++++++++++++++++++----- src/parse/tokens.rs | 108 +++++++++++++++++++++++++-------------------------- 4 files changed, 95 insertions(+), 73 deletions(-) (limited to 'src/parse') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index c6def4dc..bfe93896 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -12,7 +12,8 @@ pub use tokens::*; use std::rc::Rc; -use crate::syntax::*; +use crate::syntax::ast::{Associativity, BinOp, UnOp}; +use crate::syntax::{ErrorPosition, GreenNode, NodeKind}; use crate::util::EcoString; /// Parse a source file. diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 8c68d630..5833c724 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,7 +1,7 @@ use std::ops::Range; use std::rc::Rc; -use super::{is_newline, TokenMode, Tokens}; +use super::{TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; @@ -375,11 +375,7 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.src[.. index] - .chars() - .rev() - .take_while(|&c| !is_newline(c)) - .count() + self.tokens.column(index) } /// Slice out part of the source string. diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index edf28e17..92a2333d 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -1,5 +1,7 @@ use std::slice::SliceIndex; +use unicode_xid::UnicodeXID; + /// A featureful char-based scanner. #[derive(Copy, Clone)] pub struct Scanner<'s> { @@ -106,16 +108,6 @@ impl<'s> Scanner<'s> { self.index } - /// The column index of a given index in the source string. - #[inline] - pub fn column(&self, index: usize) -> usize { - self.src[.. index] - .chars() - .rev() - .take_while(|&c| !is_newline(c)) - .count() - } - /// Jump to an index in the source string. #[inline] pub fn jump(&mut self, index: usize) { @@ -124,6 +116,12 @@ impl<'s> Scanner<'s> { self.index = index; } + /// The full source string. + #[inline] + pub fn src(&self) -> &'s str { + &self.src + } + /// Slice out part of the source string. #[inline] pub fn get(&self, index: I) -> &'s str @@ -160,6 +158,16 @@ impl<'s> Scanner<'s> { // optimized away in some cases. self.src.get(start .. self.index).unwrap_or_default() } + + /// The column index of a given index in the source string. + #[inline] + pub fn column(&self, index: usize) -> usize { + self.src[.. index] + .chars() + .rev() + .take_while(|&c| !is_newline(c)) + .count() + } } /// Whether this character denotes a newline. @@ -173,3 +181,24 @@ pub fn is_newline(character: char) -> bool { '\u{0085}' | '\u{2028}' | '\u{2029}' ) } + +/// Whether a string is a valid identifier. +#[inline] +pub fn is_ident(string: &str) -> bool { + let mut chars = string.chars(); + chars + .next() + .map_or(false, |c| is_id_start(c) && chars.all(is_id_continue)) +} + +/// Whether a character can start an identifier. +#[inline] +pub fn is_id_start(c: char) -> bool { + c.is_xid_start() || c == '_' +} + +/// Whether a character can continue an identifier. +#[inline] +pub fn is_id_continue(c: char) -> bool { + c.is_xid_continue() || c == '_' || c == '-' +} diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index ef2678d4..aa28e1f5 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -1,11 +1,13 @@ -use super::{is_newline, resolve_raw, Scanner}; +use std::rc::Rc; + +use super::{ + is_id_continue, is_id_start, is_newline, resolve_hex, resolve_raw, resolve_string, + Scanner, +}; use crate::geom::{AngularUnit, LengthUnit}; -use crate::parse::resolve::{resolve_hex, resolve_string}; use crate::syntax::*; use crate::util::EcoString; -use std::rc::Rc; - /// An iterator over the tokens of a string of source code. pub struct Tokens<'s> { s: Scanner<'s>, @@ -55,6 +57,12 @@ impl<'s> Tokens<'s> { self.s.jump(index); } + /// The column of a given index in the source string. + #[inline] + pub fn column(&self, index: usize) -> usize { + self.s.column(index) + } + /// The underlying scanner. #[inline] pub fn scanner(&self) -> Scanner<'s> { @@ -237,10 +245,8 @@ impl<'s> Tokens<'s> { let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into(); if self.s.eat_if('}') { - if let Some(character) = resolve_hex(&sequence) { - NodeKind::UnicodeEscape(UnicodeEscapeData { - character, - }) + if let Some(c) = resolve_hex(&sequence) { + NodeKind::UnicodeEscape(c) } else { NodeKind::Error( ErrorPosition::Full, @@ -308,7 +314,8 @@ impl<'s> Tokens<'s> { } fn raw(&mut self) -> NodeKind { - let column = self.s.column(self.s.index() - 1); + let column = self.column(self.s.index() - 1); + let mut backticks = 1; while self.s.eat_if('`') && backticks < u8::MAX { backticks += 1; @@ -486,7 +493,7 @@ impl<'s> Tokens<'s> { } })); if self.s.eat_if('"') { - NodeKind::Str(StrData { string }) + NodeKind::Str(string) } else { NodeKind::Error(ErrorPosition::End, "expected quote".into()) } @@ -556,12 +563,13 @@ mod tests { use super::*; + use ErrorPosition::*; use NodeKind::*; use Option::None; use TokenMode::{Code, Markup}; - fn UnicodeEscape(character: char) -> NodeKind { - NodeKind::UnicodeEscape(UnicodeEscapeData { character }) + fn UnicodeEscape(c: char) -> NodeKind { + NodeKind::UnicodeEscape(c) } fn Error(pos: ErrorPosition, message: &str) -> NodeKind { @@ -577,24 +585,12 @@ mod tests { })) } - fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind { - match err_msg { - None => { - NodeKind::Math(Rc::new(MathData { formula: formula.into(), display })) - } - Some(msg) => NodeKind::Error( - ErrorPosition::End, - format!("expected closing {}", msg).into(), - ), - } + fn Math(formula: &str, display: bool) -> NodeKind { + NodeKind::Math(Rc::new(MathData { formula: formula.into(), display })) } - fn Str(string: &str, terminated: bool) -> NodeKind { - if terminated { - NodeKind::Str(StrData { string: string.into() }) - } else { - NodeKind::Error(ErrorPosition::End, "expected quote".into()) - } + fn Str(string: &str) -> NodeKind { + NodeKind::Str(string.into()) } fn Text(string: &str) -> NodeKind { @@ -659,7 +655,7 @@ mod tests { ('/', None, "//", LineComment), ('/', None, "/**/", BlockComment), ('/', Some(Markup), "*", Strong), - ('/', Some(Markup), "$ $", Math(" ", false, None)), + ('/', Some(Markup), "$ $", Math(" ", false)), ('/', Some(Markup), r"\\", Text("\\")), ('/', Some(Markup), "#let", Let), ('/', Some(Code), "(", LeftParen), @@ -781,16 +777,16 @@ mod tests { t!(Markup[" /"]: r#"\""# => Text(r"\"), Text("\"")); // Test basic unicode escapes. - t!(Markup: r"\u{}" => Error(ErrorPosition::Full, "invalid unicode escape sequence")); + t!(Markup: r"\u{}" => Error(Full, "invalid unicode escape sequence")); t!(Markup: r"\u{2603}" => UnicodeEscape('☃')); - t!(Markup: r"\u{P}" => Error(ErrorPosition::Full, "invalid unicode escape sequence")); + t!(Markup: r"\u{P}" => Error(Full, "invalid unicode escape sequence")); // Test unclosed unicode escapes. - t!(Markup[" /"]: r"\u{" => Error(ErrorPosition::End, "expected closing brace")); - t!(Markup[" /"]: r"\u{1" => Error(ErrorPosition::End, "expected closing brace")); - t!(Markup[" /"]: r"\u{26A4" => Error(ErrorPosition::End, "expected closing brace")); - t!(Markup[" /"]: r"\u{1Q3P" => Error(ErrorPosition::End, "expected closing brace")); - t!(Markup: r"\u{1🏕}" => Error(ErrorPosition::End, "expected closing brace"), Text("🏕"), RightBrace); + t!(Markup[" /"]: r"\u{" => Error(End, "expected closing brace")); + t!(Markup[" /"]: r"\u{1" => Error(End, "expected closing brace")); + t!(Markup[" /"]: r"\u{26A4" => Error(End, "expected closing brace")); + t!(Markup[" /"]: r"\u{1Q3P" => Error(End, "expected closing brace")); + t!(Markup: r"\u{1🏕}" => Error(End, "expected closing brace"), Text("🏕"), RightBrace); } #[test] @@ -882,11 +878,11 @@ mod tests { // Test basic raw block. t!(Markup: "``" => Raw("", None, 1, false)); t!(Markup: "`raw`" => Raw("raw", None, 1, false)); - t!(Markup[""]: "`]" => Error(ErrorPosition::End, "expected 1 backtick")); + t!(Markup[""]: "`]" => Error(End, "expected 1 backtick")); // Test special symbols in raw block. t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, false)); - t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, false), Error(ErrorPosition::End, "expected 1 backtick")); + t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, false), Error(End, "expected 1 backtick")); // Test separated closing backticks. t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, false)); @@ -894,28 +890,28 @@ mod tests { // Test more backticks. t!(Markup: "``nope``" => Raw("", None, 1, false), Text("nope"), Raw("", None, 1, false)); t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, false)); - t!(Markup[""]: "`````👩‍🚀````noend" => Error(ErrorPosition::End, "expected 5 backticks")); + t!(Markup[""]: "`````👩‍🚀````noend" => Error(End, "expected 5 backticks")); t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, false), Raw("", None, 1, false)); } #[test] fn test_tokenize_math_formulas() { // Test basic formula. - t!(Markup: "$$" => Math("", false, None)); - t!(Markup: "$x$" => Math("x", false, None)); - t!(Markup: r"$\\$" => Math(r"\\", false, None)); - t!(Markup: "$[x + y]$" => Math("x + y", true, None)); - t!(Markup: r"$[\\]$" => Math(r"\\", true, None)); + t!(Markup: "$$" => Math("", false)); + t!(Markup: "$x$" => Math("x", false)); + t!(Markup: r"$\\$" => Math(r"\\", false)); + t!(Markup: "$[x + y]$" => Math("x + y", true)); + t!(Markup: r"$[\\]$" => Math(r"\\", true)); // Test unterminated. - t!(Markup[""]: "$x" => Math("x", false, Some("dollar sign"))); - t!(Markup[""]: "$[x" => Math("x", true, Some("bracket and dollar sign"))); - t!(Markup[""]: "$[x]\n$" => Math("x]\n$", true, Some("bracket and dollar sign"))); + t!(Markup[""]: "$x" => Error(End, "expected closing dollar sign")); + t!(Markup[""]: "$[x" => Error(End, "expected closing bracket and dollar sign")); + t!(Markup[""]: "$[x]\n$" => Error(End, "expected closing bracket and dollar sign")); // Test escape sequences. - t!(Markup: r"$\$x$" => Math(r"\$x", false, None)); - t!(Markup: r"$[\\\]$]$" => Math(r"\\\]$", true, None)); - t!(Markup[""]: r"$[ ]\\$" => Math(r" ]\\$", true, Some("bracket and dollar sign"))); + t!(Markup: r"$\$x$" => Math(r"\$x", false)); + t!(Markup: r"$[\\\]$]$" => Math(r"\\\]$", true)); + t!(Markup[""]: r"$[ ]\\$" => Error(End, "expected closing bracket and dollar sign")); } #[test] @@ -1003,16 +999,16 @@ mod tests { #[test] fn test_tokenize_strings() { // Test basic strings. - t!(Code: "\"hi\"" => Str("hi", true)); - t!(Code: "\"hi\nthere\"" => Str("hi\nthere", true)); - t!(Code: "\"🌎\"" => Str("🌎", true)); + t!(Code: "\"hi\"" => Str("hi")); + t!(Code: "\"hi\nthere\"" => Str("hi\nthere")); + t!(Code: "\"🌎\"" => Str("🌎")); // Test unterminated. - t!(Code[""]: "\"hi" => Str("hi", false)); + t!(Code[""]: "\"hi" => Error(End, "expected quote")); // Test escaped quote. - t!(Code: r#""a\"bc""# => Str("a\"bc", true)); - t!(Code[""]: r#""\""# => Str("\"", false)); + t!(Code: r#""a\"bc""# => Str("a\"bc")); + t!(Code[""]: r#""\""# => Error(End, "expected quote")); } #[test] -- cgit v1.2.3 From f0c9635db5efd0c66e01bef1be0a8f140fdbdd84 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Thu, 4 Nov 2021 15:16:46 +0100 Subject: Notes --- src/parse/mod.rs | 11 +++++++++++ src/parse/parser.rs | 43 ++++++++++++++++++++----------------------- 2 files changed, 31 insertions(+), 23 deletions(-) (limited to 'src/parse') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index bfe93896..30e20c0d 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -53,6 +53,8 @@ where p.start(); while !p.eof() && f(p) { markup_node(p, &mut at_start); + // NOTE: Just do this at the end of markup_node. Maybe even gives a + // speed boost. Wasn't possible in old parser due to use of ?. if let Some(node) = p.last_child() { at_start &= matches!(node.kind(), &NodeKind::Space(_) | &NodeKind::Parbreak | @@ -115,6 +117,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { let group = if stmt { Group::Stmt } else { Group::Expr }; p.start_group(group, TokenMode::Code); + // NOTE: Return success from expr_with? expr_with(p, true, 0); if stmt && p.success() && !p.eof() { p.expected_at("semicolon or line break"); @@ -138,6 +141,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { /// Parse a heading. fn heading(p: &mut Parser) { + // NOTE: Remove HeadingLevel kind and simply count Eq children in AST. p.start(); p.start(); p.eat_assert(&NodeKind::Eq); @@ -198,6 +202,8 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) { let prec = op.precedence(); expr_with(p, atomic, prec); + // NOTE: Lifting not needed if we don't start in the first place. + // Then we could simply do expr_with(p, atomic, prec)?; if p.may_lift_abort() { return; } @@ -264,6 +270,10 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) { break; } + // NOTE: All lifts up to here wouldn't be needed. + // Only here we then need to do + // marker.end(p, NodeKind::Binary); + offset = p.end_and_start_with(NodeKind::Binary).0; } } @@ -456,6 +466,7 @@ fn item(p: &mut Parser) -> NodeKind { if p.eat_if(&NodeKind::Dots) { expr(p); + // NOTE: Should be called `Spread`. p.end_or_abort(NodeKind::ParameterSink); return NodeKind::ParameterSink; } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 5833c724..5ecb6e9d 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -187,17 +187,8 @@ impl<'s> Parser<'s> { /// Eat and wrap the next token. pub fn convert(&mut self, kind: NodeKind) { - let len = self.tokens.index() - self.next_start; - - self.children.push( - GreenNode::with_child( - kind, - len, - GreenData::new(self.next.clone().unwrap(), len), - ) - .into(), - ); - self.fast_forward(); + self.eat(); + self.children.last_mut().unwrap().set_kind(kind); self.success = true; } @@ -278,6 +269,7 @@ impl<'s> Parser<'s> { } /// Consume the next token and return its kind. + // NOTE: This isn't great. fn eat_peeked(&mut self) -> Option { let token = self.peek()?.clone(); self.eat(); @@ -319,6 +311,7 @@ impl<'s> Parser<'s> { /// Consume the next token, debug-asserting that it is one of the given ones. pub fn eat_assert(&mut self, t: &NodeKind) { + // NOTE: assert with peek(), then eat() let next = self.eat_peeked(); debug_assert_eq!(next.as_ref(), Some(t)); } @@ -438,8 +431,6 @@ impl<'s> Parser<'s> { // Rescan the peeked token if the mode changed. if rescan { - self.tokens.jump(self.prev_end()); - if prev_mode == TokenMode::Code { let len = self.children.len(); for n in (0 .. len).rev() { @@ -451,7 +442,11 @@ impl<'s> Parser<'s> { } } - self.fast_forward(); + self.tokens.jump(self.prev_end()); + self.prev_end = self.tokens.index().into(); + self.next_start = self.tokens.index().into(); + self.next = self.tokens.next(); + self.repeek(); } } @@ -527,21 +522,23 @@ impl<'s> Parser<'s> { .into(), ); - self.fast_forward(); - } - - /// Move to the next token. - pub fn fast_forward(&mut self) { - if !self.next.as_ref().map_or(false, |x| self.skip_type(x)) { - self.prev_end = self.tokens.index().into(); - } + self.prev_end = self.tokens.index().into(); self.next_start = self.tokens.index().into(); self.next = self.tokens.next(); if self.tokens.mode() == TokenMode::Code { // Skip whitespace and comments. while self.next.as_ref().map_or(false, |x| self.skip_type(x)) { - self.eat(); + self.children.push( + GreenData::new( + self.next.clone().unwrap(), + self.tokens.index() - self.next_start, + ) + .into(), + ); + + self.next_start = self.tokens.index().into(); + self.next = self.tokens.next(); } } -- cgit v1.2.3 From 5c952d56d0d602a1dbcf85210ae30fa402219fca Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Thu, 4 Nov 2021 19:36:32 +0100 Subject: New error handling --- src/parse/mod.rs | 518 +++++++++++++++++++++++----------------------------- src/parse/parser.rs | 228 +++++++++-------------- 2 files changed, 321 insertions(+), 425 deletions(-) (limited to 'src/parse') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 30e20c0d..92220eaa 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -16,6 +16,8 @@ use crate::syntax::ast::{Associativity, BinOp, UnOp}; use crate::syntax::{ErrorPosition, GreenNode, NodeKind}; use crate::util::EcoString; +type ParseResult = Result<(), ()>; + /// Parse a source file. pub fn parse(source: &str) -> Rc { let mut p = Parser::new(source); @@ -53,24 +55,16 @@ where p.start(); while !p.eof() && f(p) { markup_node(p, &mut at_start); - // NOTE: Just do this at the end of markup_node. Maybe even gives a - // speed boost. Wasn't possible in old parser due to use of ?. - if let Some(node) = p.last_child() { - at_start &= matches!(node.kind(), - &NodeKind::Space(_) | &NodeKind::Parbreak | - &NodeKind::LineComment | &NodeKind::BlockComment - ); - } } p.end(NodeKind::Markup); } /// Parse a markup node. -fn markup_node(p: &mut Parser, at_start: &mut bool) { +fn markup_node(p: &mut Parser, at_start: &mut bool) -> ParseResult { let token = match p.peek() { Some(t) => t, - None => return, + None => return Ok(()), }; match token { @@ -83,6 +77,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { } else { p.convert(NodeKind::Parbreak); } + return Ok(()); } // Text and markup. @@ -94,7 +89,10 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { | NodeKind::Strong | NodeKind::Linebreak | NodeKind::Raw(_) - | NodeKind::UnicodeEscape(_) => p.eat(), + | NodeKind::UnicodeEscape(_) => { + p.eat(); + Ok(()) + } NodeKind::Eq if *at_start => heading(p), NodeKind::ListBullet if *at_start => list_node(p), @@ -102,7 +100,8 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { // Line-based markup that is not currently at the start of the line. NodeKind::Eq | NodeKind::ListBullet | NodeKind::EnumNumbering(_) => { - p.convert(NodeKind::Text(p.peek_src().into())) + p.convert(NodeKind::Text(p.peek_src().into())); + Ok(()) } // Hashtag + keyword / identifier. @@ -117,12 +116,11 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { let group = if stmt { Group::Stmt } else { Group::Expr }; p.start_group(group, TokenMode::Code); - // NOTE: Return success from expr_with? - expr_with(p, true, 0); - if stmt && p.success() && !p.eof() { + let res = expr_with(p, true, 0); + if stmt && res.is_ok() && !p.eof() { p.expected_at("semicolon or line break"); } - p.end_group(); + p.end_group() } // Block and template. @@ -130,19 +128,28 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { NodeKind::LeftBracket => template(p), // Comments. - NodeKind::LineComment | NodeKind::BlockComment | NodeKind::Error(_, _) => p.eat(), + NodeKind::LineComment | NodeKind::BlockComment => { + p.eat(); + return Ok(()); + } + + NodeKind::Error(_, _) => { + p.eat(); + Ok(()) + } _ => { - *at_start = false; p.unexpected(); + Err(()) } - }; + }?; + + *at_start = false; + Ok(()) } /// Parse a heading. -fn heading(p: &mut Parser) { - // NOTE: Remove HeadingLevel kind and simply count Eq children in AST. - p.start(); +fn heading(p: &mut Parser) -> ParseResult { p.start(); p.eat_assert(&NodeKind::Eq); @@ -153,36 +160,37 @@ fn heading(p: &mut Parser) { } if level > 6 { - p.lift(); p.end(NodeKind::Text(EcoString::from('=').repeat(level))); } else { - p.end(NodeKind::HeadingLevel(level as u8)); let column = p.column(p.prev_end()); markup_indented(p, column); p.end(NodeKind::Heading); } + Ok(()) } /// Parse a single list item. -fn list_node(p: &mut Parser) { +fn list_node(p: &mut Parser) -> ParseResult { p.start(); p.eat_assert(&NodeKind::ListBullet); let column = p.column(p.prev_end()); markup_indented(p, column); p.end(NodeKind::List); + Ok(()) } /// Parse a single enum item. -fn enum_node(p: &mut Parser) { +fn enum_node(p: &mut Parser) -> ParseResult { p.start(); p.eat(); let column = p.column(p.prev_end()); markup_indented(p, column); p.end(NodeKind::Enum); + Ok(()) } /// Parse an expression. -fn expr(p: &mut Parser) { +fn expr(p: &mut Parser) -> ParseResult { expr_with(p, false, 0) } @@ -193,28 +201,19 @@ fn expr(p: &mut Parser) { /// in markup. /// /// Stops parsing at operations with lower precedence than `min_prec`, -fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) { - p.start(); - let mut offset = p.child_count(); +fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { + let marker = p.marker(); + // Start the unary expression. match p.eat_map(|x| UnOp::from_token(&x)) { Some(op) => { let prec = op.precedence(); - expr_with(p, atomic, prec); - - // NOTE: Lifting not needed if we don't start in the first place. - // Then we could simply do expr_with(p, atomic, prec)?; - if p.may_lift_abort() { - return; - } + expr_with(p, atomic, prec)?; - p.end_and_start_with(NodeKind::Unary); + marker.end(p, NodeKind::Unary); } None => { - primary(p, atomic); - if p.may_lift_abort() { - return; - } + primary(p, atomic)?; } }; @@ -225,35 +224,28 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) { p.peek_direct(), Some(NodeKind::LeftParen | NodeKind::LeftBracket) ) { - call(p, p.child_count() - offset); + call(p, &marker); continue; } - if p.peek() == Some(&NodeKind::With) { - with_expr(p, p.child_count() - offset); - - if p.may_lift_abort() { - return; - } + if atomic { + break Ok(()); } - if atomic { - p.lift(); - break; + if p.peek() == Some(&NodeKind::With) { + with_expr(p, &marker)?; } let op = match p.peek().and_then(BinOp::from_token) { Some(binop) => binop, None => { - p.lift(); - break; + break Ok(()); } }; let mut prec = op.precedence(); if prec < min_prec { - p.lift(); - break; + break Ok(()); } p.eat(); @@ -263,44 +255,38 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) { Associativity::Right => {} } - expr_with(p, atomic, prec); - - if !p.success() { - p.lift(); - break; + if expr_with(p, atomic, prec).is_err() { + break Ok(()); } - // NOTE: All lifts up to here wouldn't be needed. - // Only here we then need to do - // marker.end(p, NodeKind::Binary); - - offset = p.end_and_start_with(NodeKind::Binary).0; + marker.end(p, NodeKind::Binary); } } /// Parse a primary expression. -fn primary(p: &mut Parser, atomic: bool) { - if literal(p) { - return; +fn primary(p: &mut Parser, atomic: bool) -> ParseResult { + let lit = literal(p); + if lit.is_ok() { + return lit; } match p.peek() { // Things that start with an identifier. Some(NodeKind::Ident(_)) => { // Start closure params. - p.start(); + let marker = p.marker(); p.eat(); // Arrow means this is a closure's lone parameter. if !atomic && p.peek() == Some(&NodeKind::Arrow) { - p.end_and_start_with(NodeKind::ClosureParams); + marker.end(p, NodeKind::ClosureParams); p.eat(); - expr(p); - - p.end_or_abort(NodeKind::Closure); + let e = expr(p); + marker.end(p, NodeKind::Closure); + e } else { - p.lift(); + Ok(()) } } @@ -319,18 +305,19 @@ fn primary(p: &mut Parser, atomic: bool) { Some(NodeKind::Error(_, _)) => { p.eat(); + Ok(()) } // Nothing. _ => { p.expected("expression"); - p.unsuccessful(); + Err(()) } } } /// Parse a literal. -fn literal(p: &mut Parser) -> bool { +fn literal(p: &mut Parser) -> ParseResult { match p.peek() { // Basic values. Some( @@ -346,10 +333,10 @@ fn literal(p: &mut Parser) -> bool { | NodeKind::Str(_), ) => { p.eat(); - true + Ok(()) } - _ => false, + _ => Err(()), } } @@ -358,47 +345,39 @@ fn literal(p: &mut Parser) -> bool { /// - Dictionary literal /// - Parenthesized expression /// - Parameter list of closure expression -fn parenthesized(p: &mut Parser) { - let offset = p.child_count(); - p.start(); +fn parenthesized(p: &mut Parser) -> ParseResult { + let marker = p.marker(); p.start_group(Group::Paren, TokenMode::Code); let colon = p.eat_if(&NodeKind::Colon); let kind = collection(p).0; p.end_group(); - let token_count = p.child_count() - offset; // Leading colon makes this a (empty) dictionary. if colon { - p.lift(); - dict(p, token_count); - return; + return dict(p, &marker); } // Arrow means this is a closure's parameter list. if p.peek() == Some(&NodeKind::Arrow) { - p.start_with(token_count); - params(p, 0, true); - p.end(NodeKind::ClosureParams); + params(p, &marker, true); + marker.end(p, NodeKind::ClosureParams); p.eat_assert(&NodeKind::Arrow); - expr(p); + let r = expr(p); - p.end_or_abort(NodeKind::Closure); - return; + marker.end(p, NodeKind::Closure); + return r; } // Find out which kind of collection this is. match kind { - CollectionKind::Group => p.end(NodeKind::Group), - CollectionKind::Positional => { - p.lift(); - array(p, token_count); - } - CollectionKind::Named => { - p.lift(); - dict(p, token_count); + CollectionKind::Group => { + marker.end(p, NodeKind::Group); + Ok(()) } + CollectionKind::Positional => array(p, &marker), + CollectionKind::Named => dict(p, &marker), } } @@ -422,23 +401,22 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { let mut items = 0; let mut kind = CollectionKind::Positional; let mut has_comma = false; - let mut missing_coma = None; + let mut missing_coma: Option = None; while !p.eof() { - let item_kind = item(p); - if p.success() { + if let Ok(item_kind) = item(p) { if items == 0 && item_kind == NodeKind::Named { kind = CollectionKind::Named; } - if item_kind == NodeKind::ParameterSink { + if item_kind == NodeKind::Spread { has_comma = true; } items += 1; - if let Some(pos) = missing_coma.take() { - p.expected_at_child(pos, "comma"); + if let Some(marker) = missing_coma.take() { + marker.expected_at(p, "comma"); } if p.eof() { @@ -448,7 +426,7 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { if p.eat_if(&NodeKind::Comma) { has_comma = true; } else { - missing_coma = Some(p.child_count()); + missing_coma = Some(p.marker()); } } } @@ -461,52 +439,49 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { } /// Parse an expression or a named pair. Returns if this is a named pair. -fn item(p: &mut Parser) -> NodeKind { - p.start(); +fn item(p: &mut Parser) -> Result { + let marker = p.marker(); if p.eat_if(&NodeKind::Dots) { - expr(p); + let r = expr(p); - // NOTE: Should be called `Spread`. - p.end_or_abort(NodeKind::ParameterSink); - return NodeKind::ParameterSink; + marker.end(p, NodeKind::Spread); + return r.map(|_| NodeKind::Spread); } - expr(p); - - if p.may_lift_abort() { - return NodeKind::None; + let ident_marker = p.marker(); + if expr(p).is_err() { + return Err(()); } - if p.eat_if(&NodeKind::Colon) { - let child = p.child(1).unwrap(); - if matches!(child.kind(), &NodeKind::Ident(_)) { - expr(p); - p.end_or_abort(NodeKind::Named); + if p.peek() == Some(&NodeKind::Colon) { + let r = if matches!(p.child(0).unwrap().kind(), &NodeKind::Ident(_)) { + p.eat(); + expr(p) } else { - p.wrap( - 1, + ident_marker.end( + p, NodeKind::Error(ErrorPosition::Full, "expected identifier".into()), ); + p.eat(); expr(p); - p.end(NodeKind::Named); - p.unsuccessful(); - } + Err(()) + }; - NodeKind::Named + marker.end(p, NodeKind::Named); + r.map(|_| NodeKind::Named) } else { - p.lift(); - p.last_child().unwrap().kind().clone() + Ok(p.last_child().unwrap().kind().clone()) } } /// Convert a collection into an array, producing errors for anything other than /// expressions. -fn array(p: &mut Parser, items: usize) { - p.filter_children( - p.child_count() - items, +fn array(p: &mut Parser, marker: &Marker) -> ParseResult { + marker.filter_children( + p, |x| match x.kind() { - NodeKind::Named | NodeKind::ParameterSink => false, + NodeKind::Named | NodeKind::Spread => false, _ => true, }, |kind| match kind { @@ -514,21 +489,22 @@ fn array(p: &mut Parser, items: usize) { ErrorPosition::Full, "expected expression, found named pair".into(), ), - NodeKind::ParameterSink => { + NodeKind::Spread => { (ErrorPosition::Full, "spreading is not allowed here".into()) } _ => unreachable!(), }, ); - p.convert_with(items, NodeKind::Array); + marker.end(p, NodeKind::Array); + Ok(()) } /// Convert a collection into a dictionary, producing errors for anything other /// than named pairs. -fn dict(p: &mut Parser, items: usize) { - p.filter_children( - p.child_count() - items, +fn dict(p: &mut Parser, marker: &Marker) -> ParseResult { + marker.filter_children( + p, |x| { x.kind() == &NodeKind::Named || x.kind().is_paren() @@ -536,7 +512,7 @@ fn dict(p: &mut Parser, items: usize) { || x.kind() == &NodeKind::Colon }, |kind| match kind { - NodeKind::ParameterSink => { + NodeKind::Spread => { (ErrorPosition::Full, "spreading is not allowed here".into()) } _ => ( @@ -545,17 +521,19 @@ fn dict(p: &mut Parser, items: usize) { ), }, ); - p.convert_with(items, NodeKind::Dict); + + marker.end(p, NodeKind::Dict); + Ok(()) } /// Convert a collection into a list of parameters, producing errors for /// anything other than identifiers, spread operations and named pairs. -fn params(p: &mut Parser, count: usize, allow_parens: bool) { - p.filter_children( - count, +fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) { + marker.filter_children( + p, |x| match x.kind() { NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => true, - NodeKind::ParameterSink => matches!( + NodeKind::Spread => matches!( x.children().last().map(|x| x.kind()), Some(&NodeKind::Ident(_)) ), @@ -567,22 +545,22 @@ fn params(p: &mut Parser, count: usize, allow_parens: bool) { } // Parse a template block: `[...]`. -fn template(p: &mut Parser) { +fn template(p: &mut Parser) -> ParseResult { p.start(); p.start_group(Group::Bracket, TokenMode::Markup); markup(p); p.end_group(); p.end(NodeKind::Template); + Ok(()) } /// Parse a code block: `{...}`. -fn block(p: &mut Parser) { +fn block(p: &mut Parser) -> ParseResult { p.start(); p.start_group(Group::Brace, TokenMode::Code); while !p.eof() { p.start_group(Group::Stmt, TokenMode::Code); - expr(p); - if p.success() { + if expr(p).is_ok() { if !p.eof() { p.expected_at("semicolon or line break"); } @@ -594,25 +572,25 @@ fn block(p: &mut Parser) { } p.end_group(); p.end(NodeKind::Block); + Ok(()) } /// Parse a function call. -fn call(p: &mut Parser, callee: usize) { - p.start_with(callee); - match p.peek_direct() { +fn call(p: &mut Parser, callee: &Marker) -> ParseResult { + let res = match p.peek_direct() { Some(NodeKind::LeftParen) | Some(NodeKind::LeftBracket) => args(p, true), _ => { p.expected_at("argument list"); - p.may_end_abort(NodeKind::Call); - return; + Err(()) } }; - p.end(NodeKind::Call); + callee.end(p, NodeKind::Call); + res } /// Parse the arguments to a function call. -fn args(p: &mut Parser, allow_template: bool) { +fn args(p: &mut Parser, allow_template: bool) -> ParseResult { p.start(); if !allow_template || p.peek_direct() == Some(&NodeKind::LeftParen) { p.start_group(Group::Paren, TokenMode::Code); @@ -625,167 +603,126 @@ fn args(p: &mut Parser, allow_template: bool) { } p.end(NodeKind::CallArgs); + Ok(()) } /// Parse a with expression. -fn with_expr(p: &mut Parser, preserve: usize) { - p.start_with(preserve); +fn with_expr(p: &mut Parser, marker: &Marker) -> ParseResult { p.eat_assert(&NodeKind::With); - if p.peek() == Some(&NodeKind::LeftParen) { - args(p, false); - p.end(NodeKind::WithExpr); + let res = if p.peek() == Some(&NodeKind::LeftParen) { + args(p, false) } else { p.expected("argument list"); - p.may_end_abort(NodeKind::WithExpr); - } + Err(()) + }; + + marker.end(p, NodeKind::WithExpr); + res } /// Parse a let expression. -fn let_expr(p: &mut Parser) { - p.start(); - p.eat_assert(&NodeKind::Let); +fn let_expr(p: &mut Parser) -> ParseResult { + p.perform(NodeKind::LetExpr, |p| { + p.eat_assert(&NodeKind::Let); - let offset = p.child_count(); - ident(p); - if p.may_end_abort(NodeKind::LetExpr) { - return; - } + let marker = p.marker(); + ident(p)?; - if p.peek() == Some(&NodeKind::With) { - with_expr(p, p.child_count() - offset); - } else { - // If a parenthesis follows, this is a function definition. - let has_params = if p.peek_direct() == Some(&NodeKind::LeftParen) { - p.start(); - p.start_group(Group::Paren, TokenMode::Code); - let offset = p.child_count(); - collection(p); - params(p, offset, true); - p.end_group(); - p.end(NodeKind::ClosureParams); - true + if p.peek() == Some(&NodeKind::With) { + with_expr(p, &marker); } else { - false - }; - - if p.eat_if(&NodeKind::Eq) { - expr(p); - } else if has_params { - // Function definitions must have a body. - p.expected_at("body"); - } - - // Rewrite into a closure expression if it's a function definition. - if has_params { - if p.may_end_abort(NodeKind::LetExpr) { - return; + // If a parenthesis follows, this is a function definition. + let has_params = if p.peek_direct() == Some(&NodeKind::LeftParen) { + p.start(); + p.start_group(Group::Paren, TokenMode::Code); + let marker = p.marker(); + collection(p); + params(p, &marker, true); + p.end_group(); + p.end(NodeKind::ClosureParams); + true + } else { + false + }; + + if p.eat_if(&NodeKind::Eq) { + expr(p)?; + } else if has_params { + // Function definitions must have a body. + p.expected_at("body"); } - p.convert_with(p.child_count() - offset, NodeKind::Closure); + // Rewrite into a closure expression if it's a function definition. + if has_params { + marker.end(p, NodeKind::Closure); + } } - } - p.end(NodeKind::LetExpr); + Ok(()) + }) } /// Parse an if expresion. -fn if_expr(p: &mut Parser) { - p.start(); - p.eat_assert(&NodeKind::If); +fn if_expr(p: &mut Parser) -> ParseResult { + p.perform(NodeKind::IfExpr, |p| { + p.eat_assert(&NodeKind::If); - expr(p); - if p.may_end_abort(NodeKind::IfExpr) { - return; - } + expr(p)?; + body(p)?; - body(p); - if p.may_end_abort(NodeKind::IfExpr) { - // Expected function body. - return; - } - - if p.eat_if(&NodeKind::Else) { - if p.peek() == Some(&NodeKind::If) { - if_expr(p); - } else { - body(p); + if p.eat_if(&NodeKind::Else) { + if p.peek() == Some(&NodeKind::If) { + if_expr(p)?; + } else { + body(p)?; + } } - } - p.end(NodeKind::IfExpr); + Ok(()) + }) } /// Parse a while expresion. -fn while_expr(p: &mut Parser) { - p.start(); - p.eat_assert(&NodeKind::While); - - expr(p); - - if p.may_end_abort(NodeKind::WhileExpr) { - return; - } - - body(p); - if !p.may_end_abort(NodeKind::WhileExpr) { - p.end(NodeKind::WhileExpr); - } +fn while_expr(p: &mut Parser) -> ParseResult { + p.perform(NodeKind::WhileExpr, |p| { + p.eat_assert(&NodeKind::While); + expr(p)?; + body(p)?; + Ok(()) + }) } /// Parse a for expression. -fn for_expr(p: &mut Parser) { - p.start(); - p.eat_assert(&NodeKind::For); - - for_pattern(p); - - if p.may_end_abort(NodeKind::ForExpr) { - return; - } - - if p.eat_expect(&NodeKind::In) { - expr(p); - - if p.may_end_abort(NodeKind::ForExpr) { - return; - } - - body(p); - - if !p.may_end_abort(NodeKind::ForExpr) { - p.end(NodeKind::ForExpr); +fn for_expr(p: &mut Parser) -> ParseResult { + p.perform(NodeKind::ForExpr, |p| { + p.eat_assert(&NodeKind::For); + + for_pattern(p)?; + if p.eat_expect(&NodeKind::In) { + expr(p)?; + body(p)?; + Ok(()) + } else { + Err(()) } - } else { - p.unsuccessful(); - p.may_end_abort(NodeKind::ForExpr); - } + }) } /// Parse a for loop pattern. -fn for_pattern(p: &mut Parser) { - p.start(); - ident(p); - - if p.may_end_abort(NodeKind::ForPattern) { - return; - } - - if p.peek() == Some(&NodeKind::Comma) { - p.eat(); - - ident(p); - - if p.may_end_abort(NodeKind::ForPattern) { - return; +fn for_pattern(p: &mut Parser) -> ParseResult { + p.perform(NodeKind::ForPattern, |p| { + ident(p)?; + if p.peek() == Some(&NodeKind::Comma) { + p.eat(); + ident(p)?; } - } - - p.end(NodeKind::ForPattern); + Ok(()) + }) } /// Parse an import expression. -fn import_expr(p: &mut Parser) { +fn import_expr(p: &mut Parser) -> ParseResult { p.start(); p.eat_assert(&NodeKind::Import); @@ -793,15 +730,15 @@ fn import_expr(p: &mut Parser) { // This is the list of identifiers scenario. p.start(); p.start_group(Group::Imports, TokenMode::Code); - let offset = p.child_count(); + let marker = p.marker(); let items = collection(p).1; if items == 0 { p.expected_at("import items"); } p.end_group(); - p.filter_children( - offset, + marker.filter_children( + p, |n| matches!(n.kind(), NodeKind::Ident(_) | NodeKind::Comma), |_| (ErrorPosition::Full, "expected identifier".into()), ); @@ -813,36 +750,41 @@ fn import_expr(p: &mut Parser) { } p.end(NodeKind::ImportExpr); + Ok(()) } /// Parse an include expression. -fn include_expr(p: &mut Parser) { +fn include_expr(p: &mut Parser) -> ParseResult { p.start(); p.eat_assert(&NodeKind::Include); expr(p); p.end(NodeKind::IncludeExpr); + Ok(()) } /// Parse an identifier. -fn ident(p: &mut Parser) { +fn ident(p: &mut Parser) -> ParseResult { match p.peek() { - Some(NodeKind::Ident(_)) => p.eat(), + Some(NodeKind::Ident(_)) => { + p.eat(); + Ok(()) + } _ => { p.expected("identifier"); - p.unsuccessful(); + Err(()) } } } /// Parse a control flow body. -fn body(p: &mut Parser) { +fn body(p: &mut Parser) -> ParseResult { match p.peek() { Some(NodeKind::LeftBracket) => template(p), Some(NodeKind::LeftBrace) => block(p), _ => { p.expected_at("body"); - p.unsuccessful(); + Err(()) } } } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 5ecb6e9d..bc028876 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,7 +1,7 @@ use std::ops::Range; use std::rc::Rc; -use super::{TokenMode, Tokens}; +use super::{ParseResult, TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; @@ -26,8 +26,6 @@ pub struct Parser<'s> { stack: Vec>, /// The children of the currently built node. children: Vec, - /// Whether the last parsing step was successful. - success: bool, } /// A logical group of tokens, e.g. `[...]`. @@ -58,6 +56,49 @@ pub enum Group { Imports, } +/// A marker that indicates where a child may start. +pub struct Marker(usize); + +impl Marker { + /// Wraps all children in front of the marker. + pub fn end(&self, p: &mut Parser, kind: NodeKind) { + if p.children.len() != self.0 { + let stop_nl = p.stop_at_newline(); + let end = (self.0 .. p.children.len()) + .rev() + .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) + .unwrap_or(self.0) + + 1; + + let children: Vec<_> = p.children.drain(self.0 .. end).collect(); + let len = children.iter().map(Green::len).sum(); + p.children + .insert(self.0, GreenNode::with_children(kind, len, children).into()); + } + } + + /// Wrap all children that do not fulfill the predicate in error nodes. + pub fn filter_children(&self, p: &mut Parser, f: F, error: G) + where + F: Fn(&Green) -> bool, + G: Fn(&NodeKind) -> (ErrorPosition, EcoString), + { + p.filter_children(self, f, error) + } + + /// Insert an error message that `what` was expected at the marker position. + pub fn expected_at(&self, p: &mut Parser, what: &str) { + p.children.insert( + self.0, + GreenData::new( + NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), + 0, + ) + .into(), + ); + } +} + impl<'s> Parser<'s> { /// Create a new parser for the source string. pub fn new(src: &'s str) -> Self { @@ -73,7 +114,6 @@ impl<'s> Parser<'s> { next_start: 0, stack: vec![], children: vec![], - success: true, } } @@ -85,19 +125,13 @@ impl<'s> Parser<'s> { self.stack.push(std::mem::take(&mut self.children)); } - /// Start a nested node, preserving a number of the current children. - pub fn start_with(&mut self, preserve: usize) { - let preserved = self.children.drain(self.children.len() - preserve ..).collect(); - self.stack.push(std::mem::replace(&mut self.children, preserved)); - } - /// Filter the last children using the given predicate. - pub fn filter_children(&mut self, count: usize, f: F, error: G) + fn filter_children(&mut self, count: &Marker, f: F, error: G) where F: Fn(&Green) -> bool, G: Fn(&NodeKind) -> (ErrorPosition, EcoString), { - for child in &mut self.children[count ..] { + for child in &mut self.children[count.0 ..] { if !((self.tokens.mode() != TokenMode::Code || Self::skip_type_ext(child.kind(), false)) || child.kind().is_error() @@ -161,46 +195,22 @@ impl<'s> Parser<'s> { self.children .push(GreenNode::with_children(kind, len, children).into()); self.children.extend(remains); - self.success = true; } - /// End the current node as a node of given `kind`, and start a new node - /// with the ended node as a first child. The function returns how many - /// children the stack frame had before and how many were appended (accounts - /// for trivia). - pub fn end_and_start_with(&mut self, kind: NodeKind) -> (usize, usize) { - let stack_offset = self.stack.last().unwrap().len(); + pub fn perform(&mut self, kind: NodeKind, f: F) -> ParseResult + where + F: FnOnce(&mut Self) -> ParseResult, + { + self.start(); + let success = f(self); self.end(kind); - let diff = self.children.len() - stack_offset; - self.start_with(diff); - (stack_offset, diff) - } - - /// Wrap a specified node in the current stack frame (indexed from the back, - /// not including trivia). - pub fn wrap(&mut self, index: usize, kind: NodeKind) { - let index = self.node_index_from_back(index).unwrap(); - let child = std::mem::take(&mut self.children[index]); - let item = GreenNode::with_child(kind, child.len(), child); - self.children[index] = item.into(); + success } /// Eat and wrap the next token. pub fn convert(&mut self, kind: NodeKind) { self.eat(); self.children.last_mut().unwrap().set_kind(kind); - self.success = true; - } - - /// Wrap the last `amount` children in the current stack frame with a new - /// node. - pub fn convert_with(&mut self, amount: usize, kind: NodeKind) { - let preserved: Vec<_> = - self.children.drain(self.children.len() - amount ..).collect(); - let len = preserved.iter().map(|c| c.len()).sum(); - self.children - .push(GreenNode::with_children(kind, len, preserved).into()); - self.success = true; } /// End the current node and undo its existence, inling all accumulated @@ -209,50 +219,14 @@ impl<'s> Parser<'s> { let outer = self.stack.pop().unwrap(); let children = std::mem::replace(&mut self.children, outer); self.children.extend(children); - self.success = true; } - /// End the current node and undo its existence, deleting all accumulated - /// children. - pub fn abort(&mut self, msg: impl Into) { - self.end(NodeKind::Error(ErrorPosition::Full, msg.into().into())); - self.success = false; - } - - /// This function [`Self::lift`]s if the last operation was unsuccessful and - /// returns whether it did. - pub fn may_lift_abort(&mut self) -> bool { - if !self.success { - self.lift(); - self.success = false; - true - } else { - false - } - } - - /// This function [`Self::end`]s if the last operation was unsuccessful and - /// returns whether it did. - pub fn may_end_abort(&mut self, kind: NodeKind) -> bool { - if !self.success { - self.end(kind); - self.success = false; - true - } else { - false - } - } - - /// End the current node as a node of given `kind` if the last parse was - /// successful, otherwise, abort. - pub fn end_or_abort(&mut self, kind: NodeKind) -> bool { - if self.success { - self.end(kind); - true - } else { - self.may_end_abort(kind); - false - } + /// Add an error to the current children list. + fn push_error(&mut self, msg: impl Into) { + self.children.push( + GreenData::new(NodeKind::Error(ErrorPosition::Full, msg.into().into()), 0) + .into(), + ); } /// End the parsing process and return the last child. @@ -268,14 +242,6 @@ impl<'s> Parser<'s> { self.peek().is_none() } - /// Consume the next token and return its kind. - // NOTE: This isn't great. - fn eat_peeked(&mut self) -> Option { - let token = self.peek()?.clone(); - self.eat(); - Some(token) - } - /// Consume the next token if it is the given one. pub fn eat_if(&mut self, t: &NodeKind) -> bool { if self.peek() == Some(t) { @@ -311,9 +277,9 @@ impl<'s> Parser<'s> { /// Consume the next token, debug-asserting that it is one of the given ones. pub fn eat_assert(&mut self, t: &NodeKind) { - // NOTE: assert with peek(), then eat() - let next = self.eat_peeked(); - debug_assert_eq!(next.as_ref(), Some(t)); + let next = self.peek(); + debug_assert_eq!(next, Some(t)); + self.eat(); } /// Consume tokens while the condition is true. @@ -402,9 +368,10 @@ impl<'s> Parser<'s> { /// End the parsing of a group. /// /// This panics if no group was started. - pub fn end_group(&mut self) { + pub fn end_group(&mut self) -> ParseResult { let prev_mode = self.tokens.mode(); let group = self.groups.pop().expect("no started group"); + let mut success = true; self.tokens.set_mode(group.prev_mode); self.repeek(); @@ -424,8 +391,8 @@ impl<'s> Parser<'s> { self.eat(); rescan = false; } else if required { - self.start(); - self.abort(format!("expected {}", end)); + self.push_error(format!("expected {}", end)); + success = false; } } @@ -448,6 +415,8 @@ impl<'s> Parser<'s> { self.next = self.tokens.next(); self.repeek(); } + + if success { Ok(()) } else { Err(()) } } /// Add an error that `what` was expected at the given span. @@ -460,39 +429,36 @@ impl<'s> Parser<'s> { found = i; } - self.expected_at_child(found, what); - } - - /// Add an error that `what` was expected at the given child index. - pub fn expected_at_child(&mut self, index: usize, what: &str) { - self.children.insert( - index, - GreenData::new( - NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), - 0, - ) - .into(), - ); + Marker(found).expected_at(self, what); } /// Eat the next token and add an error that it is not the expected `thing`. pub fn expected(&mut self, what: &str) { - self.start(); - match self.eat_peeked() { - Some(found) => self.abort(format!("expected {}, found {}", what, found)), - None => { - self.lift(); - self.expected_at(what); + match self.peek().cloned() { + Some(found) => { + self.start(); + self.eat(); + self.end(NodeKind::Error( + ErrorPosition::Full, + format!("expected {}, found {}", what, found).into(), + )); } + None => self.expected_at(what), } } /// Eat the next token and add an error that it is unexpected. pub fn unexpected(&mut self) { - self.start(); - match self.eat_peeked() { - Some(found) => self.abort(format!("unexpected {}", found)), - None => self.abort("unexpected end of file"), + match self.peek().cloned() { + Some(found) => { + self.start(); + self.eat(); + self.end(NodeKind::Error( + ErrorPosition::Full, + format!("unexpected {}", found).into(), + )); + } + None => self.push_error("unexpected end of file"), } } @@ -584,20 +550,8 @@ impl<'s> Parser<'s> { self.children.last() } - /// Whether the last operation was successful. - pub fn success(&mut self) -> bool { - let s = self.success; - self.success = true; - s - } - - /// Declare the last operation as unsuccessful. - pub fn unsuccessful(&mut self) { - self.success = false; - } - - /// Amount of children in the current stack frame. - pub fn child_count(&self) -> usize { - self.children.len() + /// Create a new marker. + pub fn marker(&mut self) -> Marker { + Marker(self.children.len()) } } -- cgit v1.2.3 From cf2e527a026e81269ef716b4d6675ae6d981d681 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Fri, 5 Nov 2021 12:53:52 +0100 Subject: Code Review: No Patrick, question marks are not an instrument --- src/parse/mod.rs | 417 +++++++++++++++++++++++----------------------------- src/parse/parser.rs | 135 +++++++---------- 2 files changed, 243 insertions(+), 309 deletions(-) (limited to 'src/parse') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 92220eaa..21ca303e 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -14,9 +14,8 @@ use std::rc::Rc; use crate::syntax::ast::{Associativity, BinOp, UnOp}; use crate::syntax::{ErrorPosition, GreenNode, NodeKind}; -use crate::util::EcoString; -type ParseResult = Result<(), ()>; +type ParseResult = Result; /// Parse a source file. pub fn parse(source: &str) -> Rc { @@ -52,12 +51,11 @@ fn markup_while(p: &mut Parser, mut at_start: bool, f: &mut F) where F: FnMut(&mut Parser) -> bool, { - p.start(); - while !p.eof() && f(p) { - markup_node(p, &mut at_start); - } - - p.end(NodeKind::Markup); + p.perform(NodeKind::Markup, |p| { + while !p.eof() && f(p) { + markup_node(p, &mut at_start).ok(); + } + }); } /// Parse a markup node. @@ -91,7 +89,6 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) -> ParseResult { | NodeKind::Raw(_) | NodeKind::UnicodeEscape(_) => { p.eat(); - Ok(()) } NodeKind::Eq if *at_start => heading(p), @@ -101,7 +98,6 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) -> ParseResult { // Line-based markup that is not currently at the start of the line. NodeKind::Eq | NodeKind::ListBullet | NodeKind::EnumNumbering(_) => { p.convert(NodeKind::Text(p.peek_src().into())); - Ok(()) } // Hashtag + keyword / identifier. @@ -120,7 +116,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) -> ParseResult { if stmt && res.is_ok() && !p.eof() { p.expected_at("semicolon or line break"); } - p.end_group() + p.end_group(); } // Block and template. @@ -135,58 +131,46 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) -> ParseResult { NodeKind::Error(_, _) => { p.eat(); - Ok(()) } _ => { p.unexpected(); - Err(()) + return Err(()); } - }?; + }; *at_start = false; Ok(()) } /// Parse a heading. -fn heading(p: &mut Parser) -> ParseResult { - p.start(); - p.eat_assert(&NodeKind::Eq); - - // Count depth. - let mut level: usize = 1; - while p.eat_if(&NodeKind::Eq) { - level += 1; - } +fn heading(p: &mut Parser) { + p.perform(NodeKind::Heading, |p| { + p.eat_assert(&NodeKind::Eq); + + while p.eat_if(&NodeKind::Eq) {} - if level > 6 { - p.end(NodeKind::Text(EcoString::from('=').repeat(level))); - } else { let column = p.column(p.prev_end()); markup_indented(p, column); - p.end(NodeKind::Heading); - } - Ok(()) + }); } /// Parse a single list item. -fn list_node(p: &mut Parser) -> ParseResult { - p.start(); - p.eat_assert(&NodeKind::ListBullet); - let column = p.column(p.prev_end()); - markup_indented(p, column); - p.end(NodeKind::List); - Ok(()) +fn list_node(p: &mut Parser) { + p.perform(NodeKind::List, |p| { + p.eat_assert(&NodeKind::ListBullet); + let column = p.column(p.prev_end()); + markup_indented(p, column); + }); } /// Parse a single enum item. -fn enum_node(p: &mut Parser) -> ParseResult { - p.start(); - p.eat(); - let column = p.column(p.prev_end()); - markup_indented(p, column); - p.end(NodeKind::Enum); - Ok(()) +fn enum_node(p: &mut Parser) { + p.perform(NodeKind::Enum, |p| { + p.eat(); + let column = p.column(p.prev_end()); + markup_indented(p, column); + }); } /// Parse an expression. @@ -224,7 +208,7 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { p.peek_direct(), Some(NodeKind::LeftParen | NodeKind::LeftBracket) ) { - call(p, &marker); + call(p, &marker)?; continue; } @@ -255,19 +239,14 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { Associativity::Right => {} } - if expr_with(p, atomic, prec).is_err() { - break Ok(()); - } - - marker.end(p, NodeKind::Binary); + marker.perform(p, NodeKind::Binary, |p| expr_with(p, atomic, prec))?; } } /// Parse a primary expression. fn primary(p: &mut Parser, atomic: bool) -> ParseResult { - let lit = literal(p); - if lit.is_ok() { - return lit; + if literal(p) { + return Ok(()); } match p.peek() { @@ -282,9 +261,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { marker.end(p, NodeKind::ClosureParams); p.eat(); - let e = expr(p); - marker.end(p, NodeKind::Closure); - e + marker.perform(p, NodeKind::Closure, expr) } else { Ok(()) } @@ -292,8 +269,14 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { // Structures. Some(NodeKind::LeftParen) => parenthesized(p), - Some(NodeKind::LeftBracket) => template(p), - Some(NodeKind::LeftBrace) => block(p), + Some(NodeKind::LeftBracket) => { + template(p); + Ok(()) + } + Some(NodeKind::LeftBrace) => { + block(p); + Ok(()) + } // Keywords. Some(NodeKind::Let) => let_expr(p), @@ -317,7 +300,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { } /// Parse a literal. -fn literal(p: &mut Parser) -> ParseResult { +fn literal(p: &mut Parser) -> bool { match p.peek() { // Basic values. Some( @@ -333,10 +316,10 @@ fn literal(p: &mut Parser) -> ParseResult { | NodeKind::Str(_), ) => { p.eat(); - Ok(()) + true } - _ => Err(()), + _ => false, } } @@ -364,10 +347,7 @@ fn parenthesized(p: &mut Parser) -> ParseResult { p.eat_assert(&NodeKind::Arrow); - let r = expr(p); - - marker.end(p, NodeKind::Closure); - return r; + return marker.perform(p, NodeKind::Closure, expr); } // Find out which kind of collection this is. @@ -439,37 +419,35 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { } /// Parse an expression or a named pair. Returns if this is a named pair. -fn item(p: &mut Parser) -> Result { +fn item(p: &mut Parser) -> ParseResult { let marker = p.marker(); if p.eat_if(&NodeKind::Dots) { - let r = expr(p); - - marker.end(p, NodeKind::Spread); - return r.map(|_| NodeKind::Spread); + return marker + .perform(p, NodeKind::Spread, |p| expr(p).map(|_| NodeKind::Spread)); } let ident_marker = p.marker(); - if expr(p).is_err() { - return Err(()); - } + expr(p)?; if p.peek() == Some(&NodeKind::Colon) { - let r = if matches!(p.child(0).unwrap().kind(), &NodeKind::Ident(_)) { - p.eat(); - expr(p) - } else { - ident_marker.end( - p, - NodeKind::Error(ErrorPosition::Full, "expected identifier".into()), - ); - p.eat(); - - expr(p); - Err(()) - }; + marker.perform(p, NodeKind::Named, |p| { + if matches!( + ident_marker.child_at(p).unwrap().kind(), + &NodeKind::Ident(_) + ) { + p.eat(); + expr(p).map(|_| NodeKind::Named) + } else { + ident_marker.end( + p, + NodeKind::Error(ErrorPosition::Full, "expected identifier".into()), + ); + p.eat(); - marker.end(p, NodeKind::Named); - r.map(|_| NodeKind::Named) + expr(p).ok(); + Err(()) + } + }) } else { Ok(p.last_child().unwrap().kind().clone()) } @@ -478,23 +456,16 @@ fn item(p: &mut Parser) -> Result { /// Convert a collection into an array, producing errors for anything other than /// expressions. fn array(p: &mut Parser, marker: &Marker) -> ParseResult { - marker.filter_children( - p, - |x| match x.kind() { - NodeKind::Named | NodeKind::Spread => false, - _ => true, - }, - |kind| match kind { - NodeKind::Named => ( - ErrorPosition::Full, - "expected expression, found named pair".into(), - ), - NodeKind::Spread => { - (ErrorPosition::Full, "spreading is not allowed here".into()) - } - _ => unreachable!(), - }, - ); + marker.filter_children(p, |x| match x.kind() { + NodeKind::Named => Err(( + ErrorPosition::Full, + "expected expression, found named pair".into(), + )), + NodeKind::Spread => { + Err((ErrorPosition::Full, "spreading is not allowed here".into())) + } + _ => Ok(()), + }); marker.end(p, NodeKind::Array); Ok(()) @@ -503,24 +474,17 @@ fn array(p: &mut Parser, marker: &Marker) -> ParseResult { /// Convert a collection into a dictionary, producing errors for anything other /// than named pairs. fn dict(p: &mut Parser, marker: &Marker) -> ParseResult { - marker.filter_children( - p, - |x| { - x.kind() == &NodeKind::Named - || x.kind().is_paren() - || x.kind() == &NodeKind::Comma - || x.kind() == &NodeKind::Colon - }, - |kind| match kind { - NodeKind::Spread => { - (ErrorPosition::Full, "spreading is not allowed here".into()) - } - _ => ( - ErrorPosition::Full, - "expected named pair, found expression".into(), - ), - }, - ); + marker.filter_children(p, |x| match x.kind() { + NodeKind::Named | NodeKind::Comma | NodeKind::Colon => Ok(()), + NodeKind::Spread => { + Err((ErrorPosition::Full, "spreading is not allowed here".into())) + } + _ if x.kind().is_paren() => Ok(()), + _ => Err(( + ErrorPosition::Full, + "expected named pair, found expression".into(), + )), + }); marker.end(p, NodeKind::Dict); Ok(()) @@ -529,96 +493,90 @@ fn dict(p: &mut Parser, marker: &Marker) -> ParseResult { /// Convert a collection into a list of parameters, producing errors for /// anything other than identifiers, spread operations and named pairs. fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) { - marker.filter_children( - p, - |x| match x.kind() { - NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => true, - NodeKind::Spread => matches!( - x.children().last().map(|x| x.kind()), - Some(&NodeKind::Ident(_)) - ), - _ => false, - } - || (allow_parens && x.kind().is_paren()), - |_| (ErrorPosition::Full, "expected identifier".into()), - ); + marker.filter_children(p, |x| match x.kind() { + NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => Ok(()), + NodeKind::Spread + if matches!( + x.children().last().map(|x| x.kind()), + Some(&NodeKind::Ident(_)) + ) => + { + Ok(()) + } + _ if allow_parens && x.kind().is_paren() => Ok(()), + _ => Err((ErrorPosition::Full, "expected identifier".into())), + }); } // Parse a template block: `[...]`. -fn template(p: &mut Parser) -> ParseResult { - p.start(); - p.start_group(Group::Bracket, TokenMode::Markup); - markup(p); - p.end_group(); - p.end(NodeKind::Template); - Ok(()) +fn template(p: &mut Parser) { + p.perform(NodeKind::Template, |p| { + p.start_group(Group::Bracket, TokenMode::Markup); + markup(p); + p.end_group(); + }); } /// Parse a code block: `{...}`. -fn block(p: &mut Parser) -> ParseResult { - p.start(); - p.start_group(Group::Brace, TokenMode::Code); - while !p.eof() { - p.start_group(Group::Stmt, TokenMode::Code); - if expr(p).is_ok() { - if !p.eof() { +fn block(p: &mut Parser) { + p.perform(NodeKind::Block, |p| { + p.start_group(Group::Brace, TokenMode::Code); + while !p.eof() { + p.start_group(Group::Stmt, TokenMode::Code); + if expr(p).is_ok() && !p.eof() { p.expected_at("semicolon or line break"); } + p.end_group(); + + // Forcefully skip over newlines since the group's contents can't. + p.eat_while(|t| matches!(t, NodeKind::Space(_))); } p.end_group(); - - // Forcefully skip over newlines since the group's contents can't. - p.eat_while(|t| matches!(t, NodeKind::Space(_))); - } - p.end_group(); - p.end(NodeKind::Block); - Ok(()) + }); } /// Parse a function call. fn call(p: &mut Parser, callee: &Marker) -> ParseResult { - let res = match p.peek_direct() { - Some(NodeKind::LeftParen) | Some(NodeKind::LeftBracket) => args(p, true), + callee.perform(p, NodeKind::Call, |p| match p.peek_direct() { + Some(NodeKind::LeftParen) | Some(NodeKind::LeftBracket) => { + args(p, true); + Ok(()) + } _ => { p.expected_at("argument list"); Err(()) } - }; - - callee.end(p, NodeKind::Call); - res + }) } /// Parse the arguments to a function call. -fn args(p: &mut Parser, allow_template: bool) -> ParseResult { - p.start(); - if !allow_template || p.peek_direct() == Some(&NodeKind::LeftParen) { - p.start_group(Group::Paren, TokenMode::Code); - collection(p); - p.end_group(); - } - - while allow_template && p.peek_direct() == Some(&NodeKind::LeftBracket) { - template(p); - } +fn args(p: &mut Parser, allow_template: bool) { + p.perform(NodeKind::CallArgs, |p| { + if !allow_template || p.peek_direct() == Some(&NodeKind::LeftParen) { + p.start_group(Group::Paren, TokenMode::Code); + collection(p); + p.end_group(); + } - p.end(NodeKind::CallArgs); - Ok(()) + while allow_template && p.peek_direct() == Some(&NodeKind::LeftBracket) { + template(p); + } + }) } /// Parse a with expression. fn with_expr(p: &mut Parser, marker: &Marker) -> ParseResult { - p.eat_assert(&NodeKind::With); - - let res = if p.peek() == Some(&NodeKind::LeftParen) { - args(p, false) - } else { - p.expected("argument list"); - Err(()) - }; + marker.perform(p, NodeKind::WithExpr, |p| { + p.eat_assert(&NodeKind::With); - marker.end(p, NodeKind::WithExpr); - res + if p.peek() == Some(&NodeKind::LeftParen) { + args(p, false); + Ok(()) + } else { + p.expected("argument list"); + Err(()) + } + }) } /// Parse a let expression. @@ -630,17 +588,17 @@ fn let_expr(p: &mut Parser) -> ParseResult { ident(p)?; if p.peek() == Some(&NodeKind::With) { - with_expr(p, &marker); + with_expr(p, &marker)?; } else { // If a parenthesis follows, this is a function definition. let has_params = if p.peek_direct() == Some(&NodeKind::LeftParen) { - p.start(); - p.start_group(Group::Paren, TokenMode::Code); - let marker = p.marker(); - collection(p); - params(p, &marker, true); - p.end_group(); - p.end(NodeKind::ClosureParams); + p.perform(NodeKind::ClosureParams, |p| { + p.start_group(Group::Paren, TokenMode::Code); + let marker = p.marker(); + collection(p); + params(p, &marker, true); + p.end_group(); + }); true } else { false @@ -699,13 +657,10 @@ fn for_expr(p: &mut Parser) -> ParseResult { p.eat_assert(&NodeKind::For); for_pattern(p)?; - if p.eat_expect(&NodeKind::In) { - expr(p)?; - body(p)?; - Ok(()) - } else { - Err(()) - } + p.eat_expect(&NodeKind::In)?; + expr(p)?; + body(p)?; + Ok(()) }) } @@ -723,44 +678,42 @@ fn for_pattern(p: &mut Parser) -> ParseResult { /// Parse an import expression. fn import_expr(p: &mut Parser) -> ParseResult { - p.start(); - p.eat_assert(&NodeKind::Import); + p.perform(NodeKind::ImportExpr, |p| { + p.eat_assert(&NodeKind::Import); - if !p.eat_if(&NodeKind::Star) { - // This is the list of identifiers scenario. - p.start(); - p.start_group(Group::Imports, TokenMode::Code); - let marker = p.marker(); - let items = collection(p).1; - if items == 0 { - p.expected_at("import items"); - } - p.end_group(); + if !p.eat_if(&NodeKind::Star) { + // This is the list of identifiers scenario. + p.perform(NodeKind::ImportItems, |p| { + p.start_group(Group::Imports, TokenMode::Code); + let marker = p.marker(); + let items = collection(p).1; + if items == 0 { + p.expected_at("import items"); + } + p.end_group(); - marker.filter_children( - p, - |n| matches!(n.kind(), NodeKind::Ident(_) | NodeKind::Comma), - |_| (ErrorPosition::Full, "expected identifier".into()), - ); - p.end(NodeKind::ImportItems); - }; + marker.filter_children(p, |n| match n.kind() { + NodeKind::Ident(_) | NodeKind::Comma => Ok(()), + _ => Err((ErrorPosition::Full, "expected identifier".into())), + }); + }); + }; - if p.eat_expect(&NodeKind::From) { - expr(p); - } + if p.eat_expect(&NodeKind::From).is_ok() { + expr(p)?; + } - p.end(NodeKind::ImportExpr); - Ok(()) + Ok(()) + }) } /// Parse an include expression. fn include_expr(p: &mut Parser) -> ParseResult { - p.start(); - p.eat_assert(&NodeKind::Include); - - expr(p); - p.end(NodeKind::IncludeExpr); - Ok(()) + p.perform(NodeKind::IncludeExpr, |p| { + p.eat_assert(&NodeKind::Include); + expr(p)?; + Ok(()) + }) } /// Parse an identifier. @@ -784,7 +737,9 @@ fn body(p: &mut Parser) -> ParseResult { Some(NodeKind::LeftBrace) => block(p), _ => { p.expected_at("body"); - Err(()) + return Err(()); } } + + Ok(()) } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index bc028876..3813ee84 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -62,28 +62,24 @@ pub struct Marker(usize); impl Marker { /// Wraps all children in front of the marker. pub fn end(&self, p: &mut Parser, kind: NodeKind) { - if p.children.len() != self.0 { - let stop_nl = p.stop_at_newline(); - let end = (self.0 .. p.children.len()) - .rev() - .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) - .unwrap_or(self.0) - + 1; - - let children: Vec<_> = p.children.drain(self.0 .. end).collect(); - let len = children.iter().map(Green::len).sum(); - p.children - .insert(self.0, GreenNode::with_children(kind, len, children).into()); - } + let stop_nl = p.stop_at_newline(); + let end = (self.0 .. p.children.len()) + .rev() + .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) + .unwrap_or(self.0) + + 1; + + let children: Vec<_> = p.children.drain(self.0 .. end).collect(); + p.children + .insert(self.0, GreenNode::with_children(kind, children).into()); } /// Wrap all children that do not fulfill the predicate in error nodes. - pub fn filter_children(&self, p: &mut Parser, f: F, error: G) + pub fn filter_children(&self, p: &mut Parser, f: F) where - F: Fn(&Green) -> bool, - G: Fn(&NodeKind) -> (ErrorPosition, EcoString), + F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, { - p.filter_children(self, f, error) + p.filter_children(self, f) } /// Insert an error message that `what` was expected at the marker position. @@ -97,6 +93,20 @@ impl Marker { .into(), ); } + + /// Return a reference to the child after the marker. + pub fn child_at<'a>(&self, p: &'a Parser) -> Option<&'a Green> { + p.children.get(self.0) + } + + pub fn perform(&self, p: &mut Parser, kind: NodeKind, f: F) -> T + where + F: FnOnce(&mut Parser) -> T, + { + let success = f(p); + self.end(p, kind); + success + } } impl<'s> Parser<'s> { @@ -121,58 +131,31 @@ impl<'s> Parser<'s> { /// /// Each start call has to be matched with a call to `end`, /// `end_with_custom_children`, `lift`, `abort`, or `end_or_abort`. - pub fn start(&mut self) { + fn start(&mut self) { self.stack.push(std::mem::take(&mut self.children)); } /// Filter the last children using the given predicate. - fn filter_children(&mut self, count: &Marker, f: F, error: G) + fn filter_children(&mut self, count: &Marker, f: F) where - F: Fn(&Green) -> bool, - G: Fn(&NodeKind) -> (ErrorPosition, EcoString), + F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, { for child in &mut self.children[count.0 ..] { if !((self.tokens.mode() != TokenMode::Code || Self::skip_type_ext(child.kind(), false)) - || child.kind().is_error() - || f(&child)) + || child.kind().is_error()) { - let (pos, msg) = error(child.kind()); - let inner = std::mem::take(child); - *child = - GreenNode::with_child(NodeKind::Error(pos, msg), inner.len(), inner) - .into(); - } - } - } - - /// Return the a child from the current stack frame specified by its - /// non-trivia index from the back. - pub fn child(&self, child: usize) -> Option<&Green> { - self.node_index_from_back(child).map(|i| &self.children[i]) - } - - /// Map a non-trivia index from the back of the current stack frame to a - /// normal index. - fn node_index_from_back(&self, child: usize) -> Option { - let len = self.children.len(); - let code = self.tokens.mode() == TokenMode::Code; - let mut seen = 0; - for x in (0 .. len).rev() { - if self.skip_type(self.children[x].kind()) && code { - continue; - } - if seen == child { - return Some(x); + if let Err((pos, msg)) = f(child) { + let inner = std::mem::take(child); + *child = + GreenNode::with_child(NodeKind::Error(pos, msg), inner).into(); + } } - seen += 1; } - - None } /// End the current node as a node of given `kind`. - pub fn end(&mut self, kind: NodeKind) { + fn end(&mut self, kind: NodeKind) { let outer = self.stack.pop().unwrap(); let mut children = std::mem::replace(&mut self.children, outer); @@ -191,15 +174,13 @@ impl<'s> Parser<'s> { remains.reverse(); } - let len = children.iter().map(|c| c.len()).sum(); - self.children - .push(GreenNode::with_children(kind, len, children).into()); + self.children.push(GreenNode::with_children(kind, children).into()); self.children.extend(remains); } - pub fn perform(&mut self, kind: NodeKind, f: F) -> ParseResult + pub fn perform(&mut self, kind: NodeKind, f: F) -> T where - F: FnOnce(&mut Self) -> ParseResult, + F: FnOnce(&mut Self) -> T, { self.start(); let success = f(self); @@ -267,12 +248,12 @@ impl<'s> Parser<'s> { /// Consume the next token if it is the given one and produce an error if /// not. - pub fn eat_expect(&mut self, t: &NodeKind) -> bool { + pub fn eat_expect(&mut self, t: &NodeKind) -> ParseResult { let eaten = self.eat_if(t); if !eaten { self.expected_at(t.as_str()); } - eaten + if eaten { Ok(()) } else { Err(()) } } /// Consume the next token, debug-asserting that it is one of the given ones. @@ -368,10 +349,9 @@ impl<'s> Parser<'s> { /// End the parsing of a group. /// /// This panics if no group was started. - pub fn end_group(&mut self) -> ParseResult { + pub fn end_group(&mut self) { let prev_mode = self.tokens.mode(); let group = self.groups.pop().expect("no started group"); - let mut success = true; self.tokens.set_mode(group.prev_mode); self.repeek(); @@ -392,7 +372,6 @@ impl<'s> Parser<'s> { rescan = false; } else if required { self.push_error(format!("expected {}", end)); - success = false; } } @@ -415,8 +394,6 @@ impl<'s> Parser<'s> { self.next = self.tokens.next(); self.repeek(); } - - if success { Ok(()) } else { Err(()) } } /// Add an error that `what` was expected at the given span. @@ -436,12 +413,13 @@ impl<'s> Parser<'s> { pub fn expected(&mut self, what: &str) { match self.peek().cloned() { Some(found) => { - self.start(); - self.eat(); - self.end(NodeKind::Error( - ErrorPosition::Full, - format!("expected {}, found {}", what, found).into(), - )); + self.perform( + NodeKind::Error( + ErrorPosition::Full, + format!("expected {}, found {}", what, found).into(), + ), + Self::eat, + ); } None => self.expected_at(what), } @@ -451,12 +429,13 @@ impl<'s> Parser<'s> { pub fn unexpected(&mut self) { match self.peek().cloned() { Some(found) => { - self.start(); - self.eat(); - self.end(NodeKind::Error( - ErrorPosition::Full, - format!("unexpected {}", found).into(), - )); + self.perform( + NodeKind::Error( + ErrorPosition::Full, + format!("unexpected {}", found).into(), + ), + Self::eat, + ); } None => self.push_error("unexpected end of file"), } -- cgit v1.2.3 From 515fe89c5ea94e6bcdcfe387d006776d31ad3646 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Fri, 5 Nov 2021 13:21:39 +0100 Subject: Style changes Co-Authored-By: Martin --- src/parse/mod.rs | 128 +++++++++++++++----------------------- src/parse/parser.rs | 172 +++++++++++++++++++++++----------------------------- 2 files changed, 127 insertions(+), 173 deletions(-) (limited to 'src/parse') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 21ca303e..90be73f9 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -15,8 +15,6 @@ use std::rc::Rc; use crate::syntax::ast::{Associativity, BinOp, UnOp}; use crate::syntax::{ErrorPosition, GreenNode, NodeKind}; -type ParseResult = Result; - /// Parse a source file. pub fn parse(source: &str) -> Rc { let mut p = Parser::new(source); @@ -53,29 +51,34 @@ where { p.perform(NodeKind::Markup, |p| { while !p.eof() && f(p) { - markup_node(p, &mut at_start).ok(); + markup_node(p, &mut at_start); } }); } /// Parse a markup node. -fn markup_node(p: &mut Parser, at_start: &mut bool) -> ParseResult { +fn markup_node(p: &mut Parser, at_start: &mut bool) { let token = match p.peek() { Some(t) => t, - None => return Ok(()), + None => return, }; match token { // Whitespace. NodeKind::Space(newlines) => { *at_start |= *newlines > 0; - if *newlines < 2 { p.eat(); } else { p.convert(NodeKind::Parbreak); } - return Ok(()); + return; + } + + // Comments. + NodeKind::LineComment | NodeKind::BlockComment => { + p.eat(); + return; } // Text and markup. @@ -112,7 +115,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) -> ParseResult { let group = if stmt { Group::Stmt } else { Group::Expr }; p.start_group(group, TokenMode::Code); - let res = expr_with(p, true, 0); + let res = expr_prec(p, true, 0); if stmt && res.is_ok() && !p.eof() { p.expected_at("semicolon or line break"); } @@ -123,33 +126,18 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) -> ParseResult { NodeKind::LeftBrace => block(p), NodeKind::LeftBracket => template(p), - // Comments. - NodeKind::LineComment | NodeKind::BlockComment => { - p.eat(); - return Ok(()); - } - - NodeKind::Error(_, _) => { - p.eat(); - } - - _ => { - p.unexpected(); - return Err(()); - } + NodeKind::Error(_, _) => p.eat(), + _ => p.unexpected(), }; *at_start = false; - Ok(()) } /// Parse a heading. fn heading(p: &mut Parser) { p.perform(NodeKind::Heading, |p| { p.eat_assert(&NodeKind::Eq); - while p.eat_if(&NodeKind::Eq) {} - let column = p.column(p.prev_end()); markup_indented(p, column); }); @@ -175,7 +163,7 @@ fn enum_node(p: &mut Parser) { /// Parse an expression. fn expr(p: &mut Parser) -> ParseResult { - expr_with(p, false, 0) + expr_prec(p, false, 0) } /// Parse an expression with operators having at least the minimum precedence. @@ -185,20 +173,17 @@ fn expr(p: &mut Parser) -> ParseResult { /// in markup. /// /// Stops parsing at operations with lower precedence than `min_prec`, -fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { +fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { let marker = p.marker(); // Start the unary expression. match p.eat_map(|x| UnOp::from_token(&x)) { Some(op) => { let prec = op.precedence(); - expr_with(p, atomic, prec)?; - + expr_prec(p, atomic, prec)?; marker.end(p, NodeKind::Unary); } - None => { - primary(p, atomic)?; - } + None => primary(p, atomic)?, }; loop { @@ -213,7 +198,7 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { } if atomic { - break Ok(()); + break; } if p.peek() == Some(&NodeKind::With) { @@ -222,14 +207,12 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { let op = match p.peek().and_then(BinOp::from_token) { Some(binop) => binop, - None => { - break Ok(()); - } + None => break, }; let mut prec = op.precedence(); if prec < min_prec { - break Ok(()); + break; } p.eat(); @@ -239,8 +222,10 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { Associativity::Right => {} } - marker.perform(p, NodeKind::Binary, |p| expr_with(p, atomic, prec))?; + marker.perform(p, NodeKind::Binary, |p| expr_prec(p, atomic, prec))?; } + + Ok(()) } /// Parse a primary expression. @@ -260,7 +245,6 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { if !atomic && p.peek() == Some(&NodeKind::Arrow) { marker.end(p, NodeKind::ClosureParams); p.eat(); - marker.perform(p, NodeKind::Closure, expr) } else { Ok(()) @@ -288,7 +272,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { Some(NodeKind::Error(_, _)) => { p.eat(); - Ok(()) + Err(()) } // Nothing. @@ -330,6 +314,7 @@ fn literal(p: &mut Parser) -> bool { /// - Parameter list of closure expression fn parenthesized(p: &mut Parser) -> ParseResult { let marker = p.marker(); + p.start_group(Group::Paren, TokenMode::Code); let colon = p.eat_if(&NodeKind::Colon); let kind = collection(p).0; @@ -337,28 +322,26 @@ fn parenthesized(p: &mut Parser) -> ParseResult { // Leading colon makes this a (empty) dictionary. if colon { - return dict(p, &marker); + dict(p, &marker); + return Ok(()); } // Arrow means this is a closure's parameter list. if p.peek() == Some(&NodeKind::Arrow) { params(p, &marker, true); marker.end(p, NodeKind::ClosureParams); - p.eat_assert(&NodeKind::Arrow); - return marker.perform(p, NodeKind::Closure, expr); } // Find out which kind of collection this is. match kind { - CollectionKind::Group => { - marker.end(p, NodeKind::Group); - Ok(()) - } + CollectionKind::Group => marker.end(p, NodeKind::Group), CollectionKind::Positional => array(p, &marker), CollectionKind::Named => dict(p, &marker), } + + Ok(()) } /// The type of a collection. @@ -380,17 +363,18 @@ enum CollectionKind { fn collection(p: &mut Parser) -> (CollectionKind, usize) { let mut items = 0; let mut kind = CollectionKind::Positional; - let mut has_comma = false; + let mut can_group = true; let mut missing_coma: Option = None; while !p.eof() { if let Ok(item_kind) = item(p) { if items == 0 && item_kind == NodeKind::Named { kind = CollectionKind::Named; + can_group = false; } if item_kind == NodeKind::Spread { - has_comma = true; + can_group = false; } items += 1; @@ -404,14 +388,14 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { } if p.eat_if(&NodeKind::Comma) { - has_comma = true; + can_group = false; } else { missing_coma = Some(p.marker()); } } } - if !has_comma && items == 1 && kind == CollectionKind::Positional { + if can_group && items == 1 { kind = CollectionKind::Group; } @@ -422,23 +406,19 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { fn item(p: &mut Parser) -> ParseResult { let marker = p.marker(); if p.eat_if(&NodeKind::Dots) { - return marker - .perform(p, NodeKind::Spread, |p| expr(p).map(|_| NodeKind::Spread)); + marker.perform(p, NodeKind::Spread, expr)?; + return Ok(NodeKind::Spread); } - let ident_marker = p.marker(); expr(p)?; if p.peek() == Some(&NodeKind::Colon) { marker.perform(p, NodeKind::Named, |p| { - if matches!( - ident_marker.child_at(p).unwrap().kind(), - &NodeKind::Ident(_) - ) { + if matches!(marker.child_at(p).unwrap().kind(), &NodeKind::Ident(_)) { p.eat(); - expr(p).map(|_| NodeKind::Named) + expr(p) } else { - ident_marker.end( + marker.end( p, NodeKind::Error(ErrorPosition::Full, "expected identifier".into()), ); @@ -447,7 +427,8 @@ fn item(p: &mut Parser) -> ParseResult { expr(p).ok(); Err(()) } - }) + })?; + Ok(NodeKind::Named) } else { Ok(p.last_child().unwrap().kind().clone()) } @@ -455,7 +436,7 @@ fn item(p: &mut Parser) -> ParseResult { /// Convert a collection into an array, producing errors for anything other than /// expressions. -fn array(p: &mut Parser, marker: &Marker) -> ParseResult { +fn array(p: &mut Parser, marker: &Marker) { marker.filter_children(p, |x| match x.kind() { NodeKind::Named => Err(( ErrorPosition::Full, @@ -466,14 +447,12 @@ fn array(p: &mut Parser, marker: &Marker) -> ParseResult { } _ => Ok(()), }); - marker.end(p, NodeKind::Array); - Ok(()) } /// Convert a collection into a dictionary, producing errors for anything other /// than named pairs. -fn dict(p: &mut Parser, marker: &Marker) -> ParseResult { +fn dict(p: &mut Parser, marker: &Marker) { marker.filter_children(p, |x| match x.kind() { NodeKind::Named | NodeKind::Comma | NodeKind::Colon => Ok(()), NodeKind::Spread => { @@ -485,9 +464,7 @@ fn dict(p: &mut Parser, marker: &Marker) -> ParseResult { "expected named pair, found expression".into(), )), }); - marker.end(p, NodeKind::Dict); - Ok(()) } /// Convert a collection into a list of parameters, producing errors for @@ -591,7 +568,8 @@ fn let_expr(p: &mut Parser) -> ParseResult { with_expr(p, &marker)?; } else { // If a parenthesis follows, this is a function definition. - let has_params = if p.peek_direct() == Some(&NodeKind::LeftParen) { + let has_params = p.peek_direct() == Some(&NodeKind::LeftParen); + if has_params { p.perform(NodeKind::ClosureParams, |p| { p.start_group(Group::Paren, TokenMode::Code); let marker = p.marker(); @@ -599,10 +577,7 @@ fn let_expr(p: &mut Parser) -> ParseResult { params(p, &marker, true); p.end_group(); }); - true - } else { - false - }; + } if p.eat_if(&NodeKind::Eq) { expr(p)?; @@ -655,7 +630,6 @@ fn while_expr(p: &mut Parser) -> ParseResult { fn for_expr(p: &mut Parser) -> ParseResult { p.perform(NodeKind::ForExpr, |p| { p.eat_assert(&NodeKind::For); - for_pattern(p)?; p.eat_expect(&NodeKind::In)?; expr(p)?; @@ -668,8 +642,7 @@ fn for_expr(p: &mut Parser) -> ParseResult { fn for_pattern(p: &mut Parser) -> ParseResult { p.perform(NodeKind::ForPattern, |p| { ident(p)?; - if p.peek() == Some(&NodeKind::Comma) { - p.eat(); + if p.eat_if(&NodeKind::Comma) { ident(p)?; } Ok(()) @@ -699,9 +672,8 @@ fn import_expr(p: &mut Parser) -> ParseResult { }); }; - if p.eat_expect(&NodeKind::From).is_ok() { - expr(p)?; - } + p.eat_expect(&NodeKind::From)?; + expr(p)?; Ok(()) }) diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 3813ee84..4f181821 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,10 +1,14 @@ use std::ops::Range; use std::rc::Rc; -use super::{ParseResult, TokenMode, Tokens}; +use super::{TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; +/// Allows parser methods to use the try operator. Not exposed as the parser +/// recovers from all errors. +pub(crate) type ParseResult = Result; + /// A convenient token-based parser. pub struct Parser<'s> { /// The parsed file. @@ -56,59 +60,6 @@ pub enum Group { Imports, } -/// A marker that indicates where a child may start. -pub struct Marker(usize); - -impl Marker { - /// Wraps all children in front of the marker. - pub fn end(&self, p: &mut Parser, kind: NodeKind) { - let stop_nl = p.stop_at_newline(); - let end = (self.0 .. p.children.len()) - .rev() - .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) - .unwrap_or(self.0) - + 1; - - let children: Vec<_> = p.children.drain(self.0 .. end).collect(); - p.children - .insert(self.0, GreenNode::with_children(kind, children).into()); - } - - /// Wrap all children that do not fulfill the predicate in error nodes. - pub fn filter_children(&self, p: &mut Parser, f: F) - where - F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, - { - p.filter_children(self, f) - } - - /// Insert an error message that `what` was expected at the marker position. - pub fn expected_at(&self, p: &mut Parser, what: &str) { - p.children.insert( - self.0, - GreenData::new( - NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), - 0, - ) - .into(), - ); - } - - /// Return a reference to the child after the marker. - pub fn child_at<'a>(&self, p: &'a Parser) -> Option<&'a Green> { - p.children.get(self.0) - } - - pub fn perform(&self, p: &mut Parser, kind: NodeKind, f: F) -> T - where - F: FnOnce(&mut Parser) -> T, - { - let success = f(p); - self.end(p, kind); - success - } -} - impl<'s> Parser<'s> { /// Create a new parser for the source string. pub fn new(src: &'s str) -> Self { @@ -127,40 +78,16 @@ impl<'s> Parser<'s> { } } - /// Start a nested node. - /// - /// Each start call has to be matched with a call to `end`, - /// `end_with_custom_children`, `lift`, `abort`, or `end_or_abort`. - fn start(&mut self) { - self.stack.push(std::mem::take(&mut self.children)); - } - - /// Filter the last children using the given predicate. - fn filter_children(&mut self, count: &Marker, f: F) + /// Perform a subparse that wraps its result in a node with the given kind. + pub fn perform(&mut self, kind: NodeKind, f: F) -> T where - F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, + F: FnOnce(&mut Self) -> T, { - for child in &mut self.children[count.0 ..] { - if !((self.tokens.mode() != TokenMode::Code - || Self::skip_type_ext(child.kind(), false)) - || child.kind().is_error()) - { - if let Err((pos, msg)) = f(child) { - let inner = std::mem::take(child); - *child = - GreenNode::with_child(NodeKind::Error(pos, msg), inner).into(); - } - } - } - } + let prev = std::mem::take(&mut self.children); + let output = f(self); + let mut children = std::mem::replace(&mut self.children, prev); - /// End the current node as a node of given `kind`. - fn end(&mut self, kind: NodeKind) { - let outer = self.stack.pop().unwrap(); - let mut children = std::mem::replace(&mut self.children, outer); - - // have trailing whitespace continue to sit in self.children in code - // mode. + // Trailing trivia should not be wrapped into the new node. let mut remains = vec![]; if self.tokens.mode() == TokenMode::Code { let len = children.len(); @@ -176,16 +103,8 @@ impl<'s> Parser<'s> { self.children.push(GreenNode::with_children(kind, children).into()); self.children.extend(remains); - } - pub fn perform(&mut self, kind: NodeKind, f: F) -> T - where - F: FnOnce(&mut Self) -> T, - { - self.start(); - let success = f(self); - self.end(kind); - success + output } /// Eat and wrap the next token. @@ -332,7 +251,6 @@ impl<'s> Parser<'s> { /// This panics if the next token does not start the given group. pub fn start_group(&mut self, kind: Group, mode: TokenMode) { self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() }); - self.tokens.set_mode(mode); self.repeek(); @@ -534,3 +452,67 @@ impl<'s> Parser<'s> { Marker(self.children.len()) } } + +/// A marker that indicates where a child may start. +pub struct Marker(usize); + +impl Marker { + /// Wraps all children in front of the marker. + pub fn end(&self, p: &mut Parser, kind: NodeKind) { + let stop_nl = p.stop_at_newline(); + let end = (self.0 .. p.children.len()) + .rev() + .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) + .unwrap_or(self.0) + + 1; + + let children: Vec<_> = p.children.drain(self.0 .. end).collect(); + p.children + .insert(self.0, GreenNode::with_children(kind, children).into()); + } + + /// Wrap all children that do not fulfill the predicate in error nodes. + pub fn filter_children(&self, p: &mut Parser, f: F) + where + F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, + { + for child in &mut p.children[self.0 ..] { + if !((p.tokens.mode() != TokenMode::Code + || Parser::skip_type_ext(child.kind(), false)) + || child.kind().is_error()) + { + if let Err((pos, msg)) = f(child) { + let inner = std::mem::take(child); + *child = + GreenNode::with_child(NodeKind::Error(pos, msg), inner).into(); + } + } + } + } + + /// Insert an error message that `what` was expected at the marker position. + pub fn expected_at(&self, p: &mut Parser, what: &str) { + p.children.insert( + self.0, + GreenData::new( + NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), + 0, + ) + .into(), + ); + } + + /// Return a reference to the child after the marker. + pub fn child_at<'a>(&self, p: &'a Parser) -> Option<&'a Green> { + p.children.get(self.0) + } + + pub fn perform(&self, p: &mut Parser, kind: NodeKind, f: F) -> T + where + F: FnOnce(&mut Parser) -> T, + { + let success = f(p); + self.end(p, kind); + success + } +} -- cgit v1.2.3 From 41bdafb5785dd85d20a3e79900b18e0010f6d71d Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sat, 6 Nov 2021 12:12:02 +0100 Subject: Faster parser --- src/parse/mod.rs | 61 ++++--- src/parse/parser.rs | 506 +++++++++++++++++++++++---------------------------- src/parse/resolve.rs | 4 +- src/parse/tokens.rs | 14 +- 4 files changed, 260 insertions(+), 325 deletions(-) (limited to 'src/parse') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 90be73f9..aa616fdf 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -13,13 +13,16 @@ pub use tokens::*; use std::rc::Rc; use crate::syntax::ast::{Associativity, BinOp, UnOp}; -use crate::syntax::{ErrorPosition, GreenNode, NodeKind}; +use crate::syntax::{ErrorPosition, Green, GreenNode, NodeKind}; /// Parse a source file. pub fn parse(source: &str) -> Rc { let mut p = Parser::new(source); markup(&mut p); - p.finish() + match p.finish().into_iter().next() { + Some(Green::Node(node)) => node, + _ => unreachable!(), + } } /// Parse markup. @@ -36,7 +39,7 @@ fn markup_indented(p: &mut Parser, column: usize) { }); markup_while(p, false, &mut |p| match p.peek() { - Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.next_end()) >= column, + Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column, _ => true, }) } @@ -114,7 +117,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { let stmt = matches!(token, NodeKind::Let | NodeKind::Import); let group = if stmt { Group::Stmt } else { Group::Expr }; - p.start_group(group, TokenMode::Code); + p.start_group(group); let res = expr_prec(p, true, 0); if stmt && res.is_ok() && !p.eof() { p.expected_at("semicolon or line break"); @@ -177,8 +180,9 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { let marker = p.marker(); // Start the unary expression. - match p.eat_map(|x| UnOp::from_token(&x)) { + match p.peek().and_then(UnOp::from_token) { Some(op) => { + p.eat(); let prec = op.precedence(); expr_prec(p, atomic, prec)?; marker.end(p, NodeKind::Unary); @@ -201,7 +205,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { break; } - if p.peek() == Some(&NodeKind::With) { + if p.at(&NodeKind::With) { with_expr(p, &marker)?; } @@ -242,7 +246,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { p.eat(); // Arrow means this is a closure's lone parameter. - if !atomic && p.peek() == Some(&NodeKind::Arrow) { + if !atomic && p.at(&NodeKind::Arrow) { marker.end(p, NodeKind::ClosureParams); p.eat(); marker.perform(p, NodeKind::Closure, expr) @@ -315,7 +319,7 @@ fn literal(p: &mut Parser) -> bool { fn parenthesized(p: &mut Parser) -> ParseResult { let marker = p.marker(); - p.start_group(Group::Paren, TokenMode::Code); + p.start_group(Group::Paren); let colon = p.eat_if(&NodeKind::Colon); let kind = collection(p).0; p.end_group(); @@ -327,14 +331,14 @@ fn parenthesized(p: &mut Parser) -> ParseResult { } // Arrow means this is a closure's parameter list. - if p.peek() == Some(&NodeKind::Arrow) { + if p.at(&NodeKind::Arrow) { params(p, &marker, true); marker.end(p, NodeKind::ClosureParams); p.eat_assert(&NodeKind::Arrow); return marker.perform(p, NodeKind::Closure, expr); } - // Find out which kind of collection this is. + // Transform into the identified collection. match kind { CollectionKind::Group => marker.end(p, NodeKind::Group), CollectionKind::Positional => array(p, &marker), @@ -402,7 +406,8 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { (kind, items) } -/// Parse an expression or a named pair. Returns if this is a named pair. +/// Parse an expression or a named pair, returning whether it's a spread or a +/// named pair. fn item(p: &mut Parser) -> ParseResult { let marker = p.marker(); if p.eat_if(&NodeKind::Dots) { @@ -412,25 +417,24 @@ fn item(p: &mut Parser) -> ParseResult { expr(p)?; - if p.peek() == Some(&NodeKind::Colon) { + if p.at(&NodeKind::Colon) { marker.perform(p, NodeKind::Named, |p| { if matches!(marker.child_at(p).unwrap().kind(), &NodeKind::Ident(_)) { p.eat(); expr(p) } else { - marker.end( - p, - NodeKind::Error(ErrorPosition::Full, "expected identifier".into()), - ); + let error = + NodeKind::Error(ErrorPosition::Full, "expected identifier".into()); + marker.end(p, error); p.eat(); - expr(p).ok(); Err(()) } })?; + Ok(NodeKind::Named) } else { - Ok(p.last_child().unwrap().kind().clone()) + Ok(NodeKind::None) } } @@ -488,7 +492,7 @@ fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) { // Parse a template block: `[...]`. fn template(p: &mut Parser) { p.perform(NodeKind::Template, |p| { - p.start_group(Group::Bracket, TokenMode::Markup); + p.start_group(Group::Bracket); markup(p); p.end_group(); }); @@ -497,9 +501,9 @@ fn template(p: &mut Parser) { /// Parse a code block: `{...}`. fn block(p: &mut Parser) { p.perform(NodeKind::Block, |p| { - p.start_group(Group::Brace, TokenMode::Code); + p.start_group(Group::Brace); while !p.eof() { - p.start_group(Group::Stmt, TokenMode::Code); + p.start_group(Group::Stmt); if expr(p).is_ok() && !p.eof() { p.expected_at("semicolon or line break"); } @@ -515,7 +519,7 @@ fn block(p: &mut Parser) { /// Parse a function call. fn call(p: &mut Parser, callee: &Marker) -> ParseResult { callee.perform(p, NodeKind::Call, |p| match p.peek_direct() { - Some(NodeKind::LeftParen) | Some(NodeKind::LeftBracket) => { + Some(NodeKind::LeftParen | NodeKind::LeftBracket) => { args(p, true); Ok(()) } @@ -530,7 +534,7 @@ fn call(p: &mut Parser, callee: &Marker) -> ParseResult { fn args(p: &mut Parser, allow_template: bool) { p.perform(NodeKind::CallArgs, |p| { if !allow_template || p.peek_direct() == Some(&NodeKind::LeftParen) { - p.start_group(Group::Paren, TokenMode::Code); + p.start_group(Group::Paren); collection(p); p.end_group(); } @@ -546,7 +550,7 @@ fn with_expr(p: &mut Parser, marker: &Marker) -> ParseResult { marker.perform(p, NodeKind::WithExpr, |p| { p.eat_assert(&NodeKind::With); - if p.peek() == Some(&NodeKind::LeftParen) { + if p.at(&NodeKind::LeftParen) { args(p, false); Ok(()) } else { @@ -564,14 +568,14 @@ fn let_expr(p: &mut Parser) -> ParseResult { let marker = p.marker(); ident(p)?; - if p.peek() == Some(&NodeKind::With) { + if p.at(&NodeKind::With) { with_expr(p, &marker)?; } else { // If a parenthesis follows, this is a function definition. let has_params = p.peek_direct() == Some(&NodeKind::LeftParen); if has_params { p.perform(NodeKind::ClosureParams, |p| { - p.start_group(Group::Paren, TokenMode::Code); + p.start_group(Group::Paren); let marker = p.marker(); collection(p); params(p, &marker, true); @@ -605,7 +609,7 @@ fn if_expr(p: &mut Parser) -> ParseResult { body(p)?; if p.eat_if(&NodeKind::Else) { - if p.peek() == Some(&NodeKind::If) { + if p.at(&NodeKind::If) { if_expr(p)?; } else { body(p)?; @@ -657,7 +661,7 @@ fn import_expr(p: &mut Parser) -> ParseResult { if !p.eat_if(&NodeKind::Star) { // This is the list of identifiers scenario. p.perform(NodeKind::ImportItems, |p| { - p.start_group(Group::Imports, TokenMode::Code); + p.start_group(Group::Imports); let marker = p.marker(); let items = collection(p).1; if items == 0 { @@ -712,6 +716,5 @@ fn body(p: &mut Parser) -> ParseResult { return Err(()); } } - Ok(()) } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 4f181821..5d26ff63 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,5 +1,4 @@ -use std::ops::Range; -use std::rc::Rc; +use std::mem; use super::{TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; @@ -11,88 +10,63 @@ pub(crate) type ParseResult = Result; /// A convenient token-based parser. pub struct Parser<'s> { - /// The parsed file. - src: &'s str, /// An iterator over the source tokens. tokens: Tokens<'s>, + /// Whether we are at the end of the file or of a group. + eof: bool, + /// The current token. + current: Option, + /// The end byte index of the last (non-whitespace if in code mode) token. + prev_end: usize, + /// The start byte index of the peeked token. + current_start: usize, /// The stack of open groups. groups: Vec, - /// The next token. - next: Option, - /// The peeked token. - /// (Same as `next` except if we are at the end of group, then `None`). - peeked: Option, - /// The end index of the last (non-whitespace if in code mode) token. - prev_end: usize, - /// The start index of the peeked token. - next_start: usize, - /// A stack of outer children vectors. - stack: Vec>, /// The children of the currently built node. children: Vec, } -/// A logical group of tokens, e.g. `[...]`. -struct GroupEntry { - /// The kind of group this is. This decides which tokens will end the group. - /// For example, a [`Group::Paren`] will be ended by - /// [`Token::RightParen`]. - pub kind: Group, - /// The mode the parser was in _before_ the group started (to which we go - /// back once the group ends). - pub prev_mode: TokenMode, -} - -/// A group, confined by optional start and end delimiters. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Group { - /// A parenthesized group: `(...)`. - Paren, - /// A bracketed group: `[...]`. - Bracket, - /// A curly-braced group: `{...}`. - Brace, - /// A group ended by a semicolon or a line break: `;`, `\n`. - Stmt, - /// A group for a single expression, ended by a line break. - Expr, - /// A group for import items, ended by a semicolon, line break or `from`. - Imports, -} - impl<'s> Parser<'s> { /// Create a new parser for the source string. pub fn new(src: &'s str) -> Self { let mut tokens = Tokens::new(src, TokenMode::Markup); - let next = tokens.next(); + let current = tokens.next(); Self { - src, tokens, - groups: vec![], - next: next.clone(), - peeked: next, + eof: current.is_none(), + current, prev_end: 0, - next_start: 0, - stack: vec![], + current_start: 0, + groups: vec![], children: vec![], } } + /// End the parsing process and return the last child. + pub fn finish(self) -> Vec { + self.children + } + + /// Create a new marker. + pub fn marker(&mut self) -> Marker { + Marker(self.children.len()) + } + /// Perform a subparse that wraps its result in a node with the given kind. - pub fn perform(&mut self, kind: NodeKind, f: F) -> T + pub fn perform(&mut self, kind: NodeKind, f: F) -> T where F: FnOnce(&mut Self) -> T, { - let prev = std::mem::take(&mut self.children); + let prev = mem::take(&mut self.children); let output = f(self); - let mut children = std::mem::replace(&mut self.children, prev); + let mut children = mem::replace(&mut self.children, prev); // Trailing trivia should not be wrapped into the new node. let mut remains = vec![]; if self.tokens.mode() == TokenMode::Code { let len = children.len(); for n in (0 .. len).rev() { - if !self.skip_type(&children[n].kind()) { + if !self.is_trivia(&children[n].kind()) { break; } @@ -107,66 +81,36 @@ impl<'s> Parser<'s> { output } - /// Eat and wrap the next token. - pub fn convert(&mut self, kind: NodeKind) { - self.eat(); - self.children.last_mut().unwrap().set_kind(kind); - } - - /// End the current node and undo its existence, inling all accumulated - /// children into its parent. - pub fn lift(&mut self) { - let outer = self.stack.pop().unwrap(); - let children = std::mem::replace(&mut self.children, outer); - self.children.extend(children); + /// Whether the end of the source string or group is reached. + pub fn eof(&self) -> bool { + self.eof } - /// Add an error to the current children list. - fn push_error(&mut self, msg: impl Into) { - self.children.push( - GreenData::new(NodeKind::Error(ErrorPosition::Full, msg.into().into()), 0) - .into(), - ); - } + /// Consume the current token and also trailing trivia if in code mode. + pub fn eat(&mut self) { + self.prev_end = self.tokens.index(); + self.bump(); - /// End the parsing process and return the last child. - pub fn finish(&mut self) -> Rc { - match self.children.pop().unwrap() { - Green::Node(n) => n, - _ => panic!(), + if self.tokens.mode() == TokenMode::Code { + // Skip whitespace and comments. + while self.current.as_ref().map_or(false, |x| self.is_trivia(x)) { + self.bump(); + } } - } - /// Whether the end of the source string or group is reached. - pub fn eof(&self) -> bool { - self.peek().is_none() + self.repeek(); } - /// Consume the next token if it is the given one. + /// Eat if the current token it is the given one. pub fn eat_if(&mut self, t: &NodeKind) -> bool { - if self.peek() == Some(t) { + let at = self.at(t); + if at { self.eat(); - true - } else { - false } + at } - /// Consume the next token if the closure maps it a to `Some`-variant. - pub fn eat_map(&mut self, f: F) -> Option - where - F: FnOnce(&NodeKind) -> Option, - { - let token = self.peek()?; - let mapped = f(token); - if mapped.is_some() { - self.eat(); - } - mapped - } - - /// Consume the next token if it is the given one and produce an error if - /// not. + /// Eat if the current token is the given one and produce an error if not. pub fn eat_expect(&mut self, t: &NodeKind) -> ParseResult { let eaten = self.eat_if(t); if !eaten { @@ -175,14 +119,13 @@ impl<'s> Parser<'s> { if eaten { Ok(()) } else { Err(()) } } - /// Consume the next token, debug-asserting that it is one of the given ones. + /// Eat, debug-asserting that the token is the given one. pub fn eat_assert(&mut self, t: &NodeKind) { - let next = self.peek(); - debug_assert_eq!(next, Some(t)); + debug_assert_eq!(self.peek(), Some(t)); self.eat(); } - /// Consume tokens while the condition is true. + /// Eat tokens while the condition is true. pub fn eat_while(&mut self, mut f: F) where F: FnMut(&NodeKind) -> bool, @@ -192,68 +135,77 @@ impl<'s> Parser<'s> { } } - /// Peek at the next token without consuming it. + /// Eat the current token, but change its type. + pub fn convert(&mut self, kind: NodeKind) { + let idx = self.children.len(); + self.eat(); + if let Some(child) = self.children.get_mut(idx) { + child.set_kind(kind); + } + } + + /// Whether the current token is of the given type. + pub fn at(&self, kind: &NodeKind) -> bool { + self.peek() == Some(kind) + } + + /// Peek at the current token without consuming it. pub fn peek(&self) -> Option<&NodeKind> { - self.peeked.as_ref() + if self.eof { None } else { self.current.as_ref() } } - /// Peek at the next token if it follows immediately after the last one - /// without any whitespace in between. + /// Peek at the current token, if it follows immediately after the last one + /// without any trivia in between. pub fn peek_direct(&self) -> Option<&NodeKind> { - if self.next_start() == self.prev_end() { - self.peeked.as_ref() + if self.prev_end() == self.current_start() { + self.peek() } else { None } } - /// Peek at the source of the next token. + /// Peek at the source of the current token. pub fn peek_src(&self) -> &'s str { - self.get(self.next_start() .. self.next_end()) + self.tokens.scanner().get(self.current_start() .. self.current_end()) } /// The byte index at which the last token ended. /// - /// Refers to the end of the last _non-whitespace_ token in code mode. + /// Refers to the end of the last non-trivia token in code mode. pub fn prev_end(&self) -> usize { self.prev_end } - /// The byte index at which the next token starts. - pub fn next_start(&self) -> usize { - self.next_start + /// The byte index at which the current token starts. + pub fn current_start(&self) -> usize { + self.current_start } - /// The byte index at which the next token will end. - /// - /// Is the same as [`next_start()`][Self::next_start] if `peek()` returns - /// `None`. - pub fn next_end(&self) -> usize { + /// The byte index at which the current token ends. + pub fn current_end(&self) -> usize { self.tokens.index() } /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.tokens.column(index) - } - - /// Slice out part of the source string. - pub fn get(&self, range: Range) -> &'s str { - self.src.get(range).unwrap() + self.tokens.scanner().column(index) } /// Continue parsing in a group. /// /// When the end delimiter of the group is reached, all subsequent calls to - /// `eat()` and `peek()` return `None`. Parsing can only continue with - /// a matching call to `end_group`. + /// `peek()` return `None`. Parsing can only continue with a matching call + /// to `end_group`. /// - /// This panics if the next token does not start the given group. - pub fn start_group(&mut self, kind: Group, mode: TokenMode) { + /// This panics if the current token does not start the given group. + pub fn start_group(&mut self, kind: Group) { self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() }); - self.tokens.set_mode(mode); - self.repeek(); + self.tokens.set_mode(match kind { + Group::Bracket => TokenMode::Markup, + _ => TokenMode::Code, + }); + self.repeek(); match kind { Group::Paren => self.eat_assert(&NodeKind::LeftParen), Group::Bracket => self.eat_assert(&NodeKind::LeftBracket), @@ -268,12 +220,12 @@ impl<'s> Parser<'s> { /// /// This panics if no group was started. pub fn end_group(&mut self) { - let prev_mode = self.tokens.mode(); + let group_mode = self.tokens.mode(); let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); self.repeek(); - let mut rescan = self.tokens.mode() != prev_mode; + let mut rescan = self.tokens.mode() != group_mode; // Eat the end delimiter if there is one. if let Some((end, required)) = match group.kind { @@ -284,7 +236,7 @@ impl<'s> Parser<'s> { Group::Expr => None, Group::Imports => None, } { - if self.next == Some(end.clone()) { + if self.current.as_ref() == Some(&end) { // Bump the delimeter and return. No need to rescan in this case. self.eat(); rescan = false; @@ -295,10 +247,10 @@ impl<'s> Parser<'s> { // Rescan the peeked token if the mode changed. if rescan { - if prev_mode == TokenMode::Code { + if group_mode == TokenMode::Code { let len = self.children.len(); for n in (0 .. len).rev() { - if !self.skip_type(self.children[n].kind()) { + if !self.is_trivia(self.children[n].kind()) { break; } @@ -307,129 +259,55 @@ impl<'s> Parser<'s> { } self.tokens.jump(self.prev_end()); - self.prev_end = self.tokens.index().into(); - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); + self.prev_end = self.tokens.index(); + self.current_start = self.tokens.index(); + self.current = self.tokens.next(); self.repeek(); } } - /// Add an error that `what` was expected at the given span. - pub fn expected_at(&mut self, what: &str) { - let mut found = self.children.len(); - for (i, node) in self.children.iter().enumerate().rev() { - if !self.skip_type(node.kind()) { - break; - } - found = i; - } - - Marker(found).expected_at(self, what); + /// Low-level bump that consumes exactly one token without special trivia + /// handling. + fn bump(&mut self) { + let kind = self.current.take().unwrap(); + let len = self.tokens.index() - self.current_start; + self.children.push(GreenData::new(kind, len).into()); + self.current_start = self.tokens.index(); + self.current = self.tokens.next(); } - /// Eat the next token and add an error that it is not the expected `thing`. - pub fn expected(&mut self, what: &str) { - match self.peek().cloned() { - Some(found) => { - self.perform( - NodeKind::Error( - ErrorPosition::Full, - format!("expected {}, found {}", what, found).into(), - ), - Self::eat, - ); - } - None => self.expected_at(what), - } + /// Take another look at the current token to recheck whether it ends a + /// group. + fn repeek(&mut self) { + self.eof = match &self.current { + Some(NodeKind::RightParen) => self.inside(Group::Paren), + Some(NodeKind::RightBracket) => self.inside(Group::Bracket), + Some(NodeKind::RightBrace) => self.inside(Group::Brace), + Some(NodeKind::Semicolon) => self.inside(Group::Stmt), + Some(NodeKind::From) => self.inside(Group::Imports), + Some(NodeKind::Space(n)) => *n >= 1 && self.stop_at_newline(), + Some(_) => false, + None => true, + }; } - /// Eat the next token and add an error that it is unexpected. - pub fn unexpected(&mut self) { - match self.peek().cloned() { - Some(found) => { - self.perform( - NodeKind::Error( - ErrorPosition::Full, - format!("unexpected {}", found).into(), - ), - Self::eat, - ); - } - None => self.push_error("unexpected end of file"), - } + /// Returns whether the given type can be skipped over. + fn is_trivia(&self, token: &NodeKind) -> bool { + Self::is_trivia_ext(token, self.stop_at_newline()) } /// Returns whether the given type can be skipped over given the current /// newline mode. - pub fn skip_type_ext(token: &NodeKind, stop_at_newline: bool) -> bool { + fn is_trivia_ext(token: &NodeKind, stop_at_newline: bool) -> bool { match token { - NodeKind::Space(n) => n < &1 || !stop_at_newline, + NodeKind::Space(n) => *n == 0 || !stop_at_newline, NodeKind::LineComment => true, NodeKind::BlockComment => true, _ => false, } } - /// Returns whether the given type can be skipped over. - fn skip_type(&self, token: &NodeKind) -> bool { - Self::skip_type_ext(token, self.stop_at_newline()) - } - - /// Consume the next token. - pub fn eat(&mut self) { - self.children.push( - GreenData::new( - self.next.clone().unwrap(), - self.tokens.index() - self.next_start, - ) - .into(), - ); - - self.prev_end = self.tokens.index().into(); - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); - - if self.tokens.mode() == TokenMode::Code { - // Skip whitespace and comments. - while self.next.as_ref().map_or(false, |x| self.skip_type(x)) { - self.children.push( - GreenData::new( - self.next.clone().unwrap(), - self.tokens.index() - self.next_start, - ) - .into(), - ); - - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); - } - } - - self.repeek(); - } - - /// Take another look at the next token to recheck whether it ends a group. - fn repeek(&mut self) { - self.peeked = self.next.clone(); - let token = match self.next.as_ref() { - Some(token) => token, - None => return, - }; - - if match token { - NodeKind::RightParen => self.inside(Group::Paren), - NodeKind::RightBracket => self.inside(Group::Bracket), - NodeKind::RightBrace => self.inside(Group::Brace), - NodeKind::Semicolon => self.inside(Group::Stmt), - NodeKind::From => self.inside(Group::Imports), - NodeKind::Space(n) => n > &0 && self.stop_at_newline(), - _ => false, - } { - self.peeked = None; - } - } - - /// Whether the active group ends at a newline. + /// Whether the active group must end at a newline. fn stop_at_newline(&self) -> bool { matches!( self.groups.last().map(|group| group.kind), @@ -441,28 +319,76 @@ impl<'s> Parser<'s> { fn inside(&self, kind: Group) -> bool { self.groups.iter().any(|g| g.kind == kind) } +} - /// Returns the last child of the current stack frame. - pub fn last_child(&self) -> Option<&Green> { - self.children.last() +/// Error handling. +impl Parser<'_> { + /// Push an error into the children list. + pub fn push_error(&mut self, msg: impl Into) { + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + self.children.push(GreenData::new(error, 0).into()); } - /// Create a new marker. - pub fn marker(&mut self) -> Marker { - Marker(self.children.len()) + /// Eat the current token and add an error that it is unexpected. + pub fn unexpected(&mut self) { + match self.peek() { + Some(found) => { + let msg = format!("unexpected {}", found); + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + self.perform(error, Self::eat); + } + None => self.push_error("unexpected end of file"), + } + } + + /// Eat the current token and add an error that it is not the expected `thing`. + pub fn expected(&mut self, thing: &str) { + match self.peek() { + Some(found) => { + let msg = format!("expected {}, found {}", thing, found); + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + self.perform(error, Self::eat); + } + None => self.expected_at(thing), + } + } + + /// Add an error that the `thing` was expected at the end of the last + /// non-trivia token. + pub fn expected_at(&mut self, thing: &str) { + let mut found = self.children.len(); + for (i, node) in self.children.iter().enumerate().rev() { + if !self.is_trivia(node.kind()) { + break; + } + found = i; + } + + Marker(found).expected_at(self, thing); } } -/// A marker that indicates where a child may start. +/// A marker that indicates where a node may start. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct Marker(usize); impl Marker { - /// Wraps all children in front of the marker. - pub fn end(&self, p: &mut Parser, kind: NodeKind) { - let stop_nl = p.stop_at_newline(); + /// Perform a subparse that wraps all children after the marker in a node + /// with the given kind. + pub fn perform(self, p: &mut Parser, kind: NodeKind, f: F) -> T + where + F: FnOnce(&mut Parser) -> T, + { + let success = f(p); + self.end(p, kind); + success + } + + /// Wrap all children after the marker in a node with the given `kind`. + pub fn end(self, p: &mut Parser, kind: NodeKind) { let end = (self.0 .. p.children.len()) .rev() - .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) + .find(|&i| !p.is_trivia(p.children[i].kind())) .unwrap_or(self.0) + 1; @@ -472,47 +398,61 @@ impl Marker { } /// Wrap all children that do not fulfill the predicate in error nodes. - pub fn filter_children(&self, p: &mut Parser, f: F) + pub fn filter_children(self, p: &mut Parser, f: F) where F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, { for child in &mut p.children[self.0 ..] { - if !((p.tokens.mode() != TokenMode::Code - || Parser::skip_type_ext(child.kind(), false)) - || child.kind().is_error()) + if (p.tokens.mode() == TokenMode::Markup + || !Parser::is_trivia_ext(child.kind(), false)) + && !child.kind().is_error() { if let Err((pos, msg)) = f(child) { - let inner = std::mem::take(child); - *child = - GreenNode::with_child(NodeKind::Error(pos, msg), inner).into(); + let error = NodeKind::Error(pos, msg); + let inner = mem::take(child); + *child = GreenNode::with_child(error, inner).into(); } } } } /// Insert an error message that `what` was expected at the marker position. - pub fn expected_at(&self, p: &mut Parser, what: &str) { - p.children.insert( - self.0, - GreenData::new( - NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), - 0, - ) - .into(), - ); - } - - /// Return a reference to the child after the marker. - pub fn child_at<'a>(&self, p: &'a Parser) -> Option<&'a Green> { - p.children.get(self.0) + pub fn expected_at(self, p: &mut Parser, what: &str) { + let msg = format!("expected {}", what); + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + p.children.insert(self.0, GreenData::new(error, 0).into()); } - pub fn perform(&self, p: &mut Parser, kind: NodeKind, f: F) -> T - where - F: FnOnce(&mut Parser) -> T, - { - let success = f(p); - self.end(p, kind); - success + /// Return a reference to the child directly after the marker. + pub fn child_at<'a>(self, p: &'a Parser) -> Option<&'a Green> { + p.children.get(self.0) } } + +/// A logical group of tokens, e.g. `[...]`. +struct GroupEntry { + /// The kind of group this is. This decides which tokens will end the group. + /// For example, a [`Group::Paren`] will be ended by + /// [`Token::RightParen`]. + pub kind: Group, + /// The mode the parser was in _before_ the group started (to which we go + /// back once the group ends). + pub prev_mode: TokenMode, +} + +/// A group, confined by optional start and end delimiters. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Group { + /// A bracketed group: `[...]`. + Bracket, + /// A curly-braced group: `{...}`. + Brace, + /// A parenthesized group: `(...)`. + Paren, + /// A group ended by a semicolon or a line break: `;`, `\n`. + Stmt, + /// A group for a single expression, ended by a line break. + Expr, + /// A group for import items, ended by a semicolon, line break or `from`. + Imports, +} diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index 3fab98a4..b330dbd6 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -172,8 +172,8 @@ mod tests { test("typst\n it!", "typst", "\n it!"); test("typst\n it!", "typst", "\n it!"); test("abc`", "abc", "`"); - test(" hi", "", " hi"); - test("`", "", "`"); + test(" hi", "", " hi"); + test("`", "", "`"); } #[test] diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index aa28e1f5..494a9f0b 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -57,12 +57,6 @@ impl<'s> Tokens<'s> { self.s.jump(index); } - /// The column of a given index in the source string. - #[inline] - pub fn column(&self, index: usize) -> usize { - self.s.column(index) - } - /// The underlying scanner. #[inline] pub fn scanner(&self) -> Scanner<'s> { @@ -314,7 +308,7 @@ impl<'s> Tokens<'s> { } fn raw(&mut self) -> NodeKind { - let column = self.column(self.s.index() - 1); + let column = self.s.column(self.s.index() - 1); let mut backticks = 1; while self.s.eat_if('`') && backticks < u8::MAX { @@ -342,10 +336,8 @@ impl<'s> Tokens<'s> { } } - let terminated = found == backticks; - let end = self.s.index() - if terminated { found as usize } else { 0 }; - - if terminated { + if found == backticks { + let end = self.s.index() - found as usize; NodeKind::Raw(Rc::new(resolve_raw( column, backticks, -- cgit v1.2.3 From 8117ca9950a2027efae133f811a26a4a7bf86a8e Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sat, 6 Nov 2021 15:30:08 +0100 Subject: Deduplicate trivia search --- src/parse/parser.rs | 72 +++++++++++++++++++++-------------------------------- 1 file changed, 28 insertions(+), 44 deletions(-) (limited to 'src/parse') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 5d26ff63..a30895ad 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -16,7 +16,7 @@ pub struct Parser<'s> { eof: bool, /// The current token. current: Option, - /// The end byte index of the last (non-whitespace if in code mode) token. + /// The end byte index of the last non-trivia token. prev_end: usize, /// The start byte index of the peeked token. current_start: usize, @@ -59,25 +59,19 @@ impl<'s> Parser<'s> { { let prev = mem::take(&mut self.children); let output = f(self); + let until = self.trivia_start(); let mut children = mem::replace(&mut self.children, prev); - // Trailing trivia should not be wrapped into the new node. - let mut remains = vec![]; if self.tokens.mode() == TokenMode::Code { - let len = children.len(); - for n in (0 .. len).rev() { - if !self.is_trivia(&children[n].kind()) { - break; - } - - remains.push(children.pop().unwrap()); - } - remains.reverse(); + // Trailing trivia should not be wrapped into the new node. + let idx = self.children.len(); + self.children.push(Green::default()); + self.children.extend(children.drain(until ..)); + self.children[idx] = GreenNode::with_children(kind, children).into(); + } else { + self.children.push(GreenNode::with_children(kind, children).into()); } - self.children.push(GreenNode::with_children(kind, children).into()); - self.children.extend(remains); - output } @@ -86,7 +80,7 @@ impl<'s> Parser<'s> { self.eof } - /// Consume the current token and also trailing trivia if in code mode. + /// Consume the current token and also trailing trivia. pub fn eat(&mut self) { self.prev_end = self.tokens.index(); self.bump(); @@ -169,9 +163,7 @@ impl<'s> Parser<'s> { self.tokens.scanner().get(self.current_start() .. self.current_end()) } - /// The byte index at which the last token ended. - /// - /// Refers to the end of the last non-trivia token in code mode. + /// The byte index at which the last non-trivia token ended. pub fn prev_end(&self) -> usize { self.prev_end } @@ -248,14 +240,7 @@ impl<'s> Parser<'s> { // Rescan the peeked token if the mode changed. if rescan { if group_mode == TokenMode::Code { - let len = self.children.len(); - for n in (0 .. len).rev() { - if !self.is_trivia(self.children[n].kind()) { - break; - } - - self.children.pop(); - } + self.children.truncate(self.trivia_start()); } self.tokens.jump(self.prev_end()); @@ -307,6 +292,17 @@ impl<'s> Parser<'s> { } } + /// Find the index in the children list where trailing trivia starts. + fn trivia_start(&self) -> usize { + self.children.len() + - self + .children + .iter() + .rev() + .take_while(|node| self.is_trivia(node.kind())) + .count() + } + /// Whether the active group must end at a newline. fn stop_at_newline(&self) -> bool { matches!( @@ -356,15 +352,7 @@ impl Parser<'_> { /// Add an error that the `thing` was expected at the end of the last /// non-trivia token. pub fn expected_at(&mut self, thing: &str) { - let mut found = self.children.len(); - for (i, node) in self.children.iter().enumerate().rev() { - if !self.is_trivia(node.kind()) { - break; - } - found = i; - } - - Marker(found).expected_at(self, thing); + Marker(self.trivia_start()).expected_at(self, thing); } } @@ -384,15 +372,11 @@ impl Marker { success } - /// Wrap all children after the marker in a node with the given `kind`. + /// Wrap all children after the marker (excluding trailing trivia) in a node + /// with the given `kind`. pub fn end(self, p: &mut Parser, kind: NodeKind) { - let end = (self.0 .. p.children.len()) - .rev() - .find(|&i| !p.is_trivia(p.children[i].kind())) - .unwrap_or(self.0) - + 1; - - let children: Vec<_> = p.children.drain(self.0 .. end).collect(); + let until = p.trivia_start(); + let children = p.children.drain(self.0 .. until).collect(); p.children .insert(self.0, GreenNode::with_children(kind, children).into()); } -- cgit v1.2.3 From 95866d5fc9ae89a23c5754193c7de5d4fe4873b1 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sun, 7 Nov 2021 22:05:48 +0100 Subject: Tidy up AST --- src/parse/mod.rs | 75 ++++++++++++++++++++++------------------------------ src/parse/parser.rs | 37 +++++++++++++++----------- src/parse/resolve.rs | 14 +++++----- src/parse/scanner.rs | 8 +++++- src/parse/tokens.rs | 19 +++++++------ 5 files changed, 75 insertions(+), 78 deletions(-) (limited to 'src/parse') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index aa616fdf..505482ca 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -13,7 +13,7 @@ pub use tokens::*; use std::rc::Rc; use crate::syntax::ast::{Associativity, BinOp, UnOp}; -use crate::syntax::{ErrorPosition, Green, GreenNode, NodeKind}; +use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind}; /// Parse a source file. pub fn parse(source: &str) -> Rc { @@ -197,7 +197,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { p.peek_direct(), Some(NodeKind::LeftParen | NodeKind::LeftBracket) ) { - call(p, &marker)?; + call(p, marker)?; continue; } @@ -206,7 +206,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { } if p.at(&NodeKind::With) { - with_expr(p, &marker)?; + with_expr(p, marker)?; } let op = match p.peek().and_then(BinOp::from_token) { @@ -248,7 +248,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { // Arrow means this is a closure's lone parameter. if !atomic && p.at(&NodeKind::Arrow) { marker.end(p, NodeKind::ClosureParams); - p.eat(); + p.eat_assert(&NodeKind::Arrow); marker.perform(p, NodeKind::Closure, expr) } else { Ok(()) @@ -326,14 +326,13 @@ fn parenthesized(p: &mut Parser) -> ParseResult { // Leading colon makes this a (empty) dictionary. if colon { - dict(p, &marker); + dict(p, marker); return Ok(()); } // Arrow means this is a closure's parameter list. if p.at(&NodeKind::Arrow) { - params(p, &marker, true); - marker.end(p, NodeKind::ClosureParams); + params(p, marker); p.eat_assert(&NodeKind::Arrow); return marker.perform(p, NodeKind::Closure, expr); } @@ -341,8 +340,8 @@ fn parenthesized(p: &mut Parser) -> ParseResult { // Transform into the identified collection. match kind { CollectionKind::Group => marker.end(p, NodeKind::Group), - CollectionKind::Positional => array(p, &marker), - CollectionKind::Named => dict(p, &marker), + CollectionKind::Positional => array(p, marker), + CollectionKind::Named => dict(p, marker), } Ok(()) @@ -384,7 +383,7 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { items += 1; if let Some(marker) = missing_coma.take() { - marker.expected_at(p, "comma"); + marker.expected(p, "comma"); } if p.eof() { @@ -419,12 +418,11 @@ fn item(p: &mut Parser) -> ParseResult { if p.at(&NodeKind::Colon) { marker.perform(p, NodeKind::Named, |p| { - if matches!(marker.child_at(p).unwrap().kind(), &NodeKind::Ident(_)) { + if matches!(marker.peek(p).unwrap().kind(), &NodeKind::Ident(_)) { p.eat(); expr(p) } else { - let error = - NodeKind::Error(ErrorPosition::Full, "expected identifier".into()); + let error = NodeKind::Error(ErrorPos::Full, "expected identifier".into()); marker.end(p, error); p.eat(); expr(p).ok(); @@ -440,15 +438,10 @@ fn item(p: &mut Parser) -> ParseResult { /// Convert a collection into an array, producing errors for anything other than /// expressions. -fn array(p: &mut Parser, marker: &Marker) { +fn array(p: &mut Parser, marker: Marker) { marker.filter_children(p, |x| match x.kind() { - NodeKind::Named => Err(( - ErrorPosition::Full, - "expected expression, found named pair".into(), - )), - NodeKind::Spread => { - Err((ErrorPosition::Full, "spreading is not allowed here".into())) - } + NodeKind::Named => Err("expected expression, found named pair"), + NodeKind::Spread => Err("spreading is not allowed here"), _ => Ok(()), }); marker.end(p, NodeKind::Array); @@ -456,25 +449,21 @@ fn array(p: &mut Parser, marker: &Marker) { /// Convert a collection into a dictionary, producing errors for anything other /// than named pairs. -fn dict(p: &mut Parser, marker: &Marker) { +fn dict(p: &mut Parser, marker: Marker) { marker.filter_children(p, |x| match x.kind() { + kind if kind.is_paren() => Ok(()), NodeKind::Named | NodeKind::Comma | NodeKind::Colon => Ok(()), - NodeKind::Spread => { - Err((ErrorPosition::Full, "spreading is not allowed here".into())) - } - _ if x.kind().is_paren() => Ok(()), - _ => Err(( - ErrorPosition::Full, - "expected named pair, found expression".into(), - )), + NodeKind::Spread => Err("spreading is not allowed here"), + _ => Err("expected named pair, found expression"), }); marker.end(p, NodeKind::Dict); } /// Convert a collection into a list of parameters, producing errors for /// anything other than identifiers, spread operations and named pairs. -fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) { +fn params(p: &mut Parser, marker: Marker) { marker.filter_children(p, |x| match x.kind() { + kind if kind.is_paren() => Ok(()), NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => Ok(()), NodeKind::Spread if matches!( @@ -484,9 +473,9 @@ fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) { { Ok(()) } - _ if allow_parens && x.kind().is_paren() => Ok(()), - _ => Err((ErrorPosition::Full, "expected identifier".into())), + _ => Err("expected identifier"), }); + marker.end(p, NodeKind::ClosureParams); } // Parse a template block: `[...]`. @@ -517,7 +506,7 @@ fn block(p: &mut Parser) { } /// Parse a function call. -fn call(p: &mut Parser, callee: &Marker) -> ParseResult { +fn call(p: &mut Parser, callee: Marker) -> ParseResult { callee.perform(p, NodeKind::Call, |p| match p.peek_direct() { Some(NodeKind::LeftParen | NodeKind::LeftBracket) => { args(p, true); @@ -546,7 +535,7 @@ fn args(p: &mut Parser, allow_template: bool) { } /// Parse a with expression. -fn with_expr(p: &mut Parser, marker: &Marker) -> ParseResult { +fn with_expr(p: &mut Parser, marker: Marker) -> ParseResult { marker.perform(p, NodeKind::WithExpr, |p| { p.eat_assert(&NodeKind::With); @@ -569,18 +558,16 @@ fn let_expr(p: &mut Parser) -> ParseResult { ident(p)?; if p.at(&NodeKind::With) { - with_expr(p, &marker)?; + with_expr(p, marker)?; } else { // If a parenthesis follows, this is a function definition. let has_params = p.peek_direct() == Some(&NodeKind::LeftParen); if has_params { - p.perform(NodeKind::ClosureParams, |p| { - p.start_group(Group::Paren); - let marker = p.marker(); - collection(p); - params(p, &marker, true); - p.end_group(); - }); + let marker = p.marker(); + p.start_group(Group::Paren); + collection(p); + p.end_group(); + params(p, marker); } if p.eat_if(&NodeKind::Eq) { @@ -671,7 +658,7 @@ fn import_expr(p: &mut Parser) -> ParseResult { marker.filter_children(p, |n| match n.kind() { NodeKind::Ident(_) | NodeKind::Comma => Ok(()), - _ => Err((ErrorPosition::Full, "expected identifier".into())), + _ => Err("expected identifier"), }); }); }; diff --git a/src/parse/parser.rs b/src/parse/parser.rs index a30895ad..5ebc2c17 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,7 +1,7 @@ use std::mem; use super::{TokenMode, Tokens}; -use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; +use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; /// Allows parser methods to use the try operator. Not exposed as the parser @@ -131,11 +131,9 @@ impl<'s> Parser<'s> { /// Eat the current token, but change its type. pub fn convert(&mut self, kind: NodeKind) { - let idx = self.children.len(); + let marker = self.marker(); self.eat(); - if let Some(child) = self.children.get_mut(idx) { - child.set_kind(kind); - } + marker.convert(self, kind); } /// Whether the current token is of the given type. @@ -321,7 +319,7 @@ impl<'s> Parser<'s> { impl Parser<'_> { /// Push an error into the children list. pub fn push_error(&mut self, msg: impl Into) { - let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + let error = NodeKind::Error(ErrorPos::Full, msg.into()); self.children.push(GreenData::new(error, 0).into()); } @@ -330,7 +328,7 @@ impl Parser<'_> { match self.peek() { Some(found) => { let msg = format!("unexpected {}", found); - let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + let error = NodeKind::Error(ErrorPos::Full, msg.into()); self.perform(error, Self::eat); } None => self.push_error("unexpected end of file"), @@ -342,7 +340,7 @@ impl Parser<'_> { match self.peek() { Some(found) => { let msg = format!("expected {}, found {}", thing, found); - let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + let error = NodeKind::Error(ErrorPos::Full, msg.into()); self.perform(error, Self::eat); } None => self.expected_at(thing), @@ -352,7 +350,7 @@ impl Parser<'_> { /// Add an error that the `thing` was expected at the end of the last /// non-trivia token. pub fn expected_at(&mut self, thing: &str) { - Marker(self.trivia_start()).expected_at(self, thing); + Marker(self.trivia_start()).expected(self, thing); } } @@ -384,15 +382,15 @@ impl Marker { /// Wrap all children that do not fulfill the predicate in error nodes. pub fn filter_children(self, p: &mut Parser, f: F) where - F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, + F: Fn(&Green) -> Result<(), &'static str>, { for child in &mut p.children[self.0 ..] { if (p.tokens.mode() == TokenMode::Markup || !Parser::is_trivia_ext(child.kind(), false)) && !child.kind().is_error() { - if let Err((pos, msg)) = f(child) { - let error = NodeKind::Error(pos, msg); + if let Err(msg) = f(child) { + let error = NodeKind::Error(ErrorPos::Full, msg.into()); let inner = mem::take(child); *child = GreenNode::with_child(error, inner).into(); } @@ -401,16 +399,23 @@ impl Marker { } /// Insert an error message that `what` was expected at the marker position. - pub fn expected_at(self, p: &mut Parser, what: &str) { + pub fn expected(self, p: &mut Parser, what: &str) { let msg = format!("expected {}", what); - let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + let error = NodeKind::Error(ErrorPos::Full, msg.into()); p.children.insert(self.0, GreenData::new(error, 0).into()); } - /// Return a reference to the child directly after the marker. - pub fn child_at<'a>(self, p: &'a Parser) -> Option<&'a Green> { + /// Peek at the child directly after the marker. + pub fn peek<'a>(self, p: &'a Parser) -> Option<&'a Green> { p.children.get(self.0) } + + /// Convert the child directly after marker. + pub fn convert(self, p: &mut Parser, kind: NodeKind) { + if let Some(child) = p.children.get_mut(self.0) { + child.convert(kind); + } + } } /// A logical group of tokens, e.g. `[...]`. diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index b330dbd6..6719f41d 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -1,4 +1,4 @@ -use super::{is_newline, Scanner}; +use super::{is_ident, is_newline, Scanner}; use crate::syntax::RawData; use crate::util::EcoString; @@ -51,7 +51,7 @@ pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawData { let (tag, inner) = split_at_lang_tag(text); let (text, block) = trim_and_split_raw(column, inner); RawData { - lang: Some(tag.into()), + lang: is_ident(tag).then(|| tag.into()), text: text.into(), backticks, block, @@ -201,15 +201,15 @@ mod tests { // More than one backtick with lang tag. test(0, 2, "js alert()", Some("js"), "alert()", false); test(0, 3, "py quit(\n\n)", Some("py"), "quit(\n\n)", true); - test(0, 2, "♥", Some("♥"), "", false); + test(0, 2, "♥", None, "", false); // Trimming of whitespace (tested more thoroughly in separate test). - test(0, 2, " a", Some(""), "a", false); - test(0, 2, " a", Some(""), " a", false); - test(0, 2, " \na", Some(""), "a", true); + test(0, 2, " a", None, "a", false); + test(0, 2, " a", None, " a", false); + test(0, 2, " \na", None, "a", true); // Dedenting - test(2, 3, " def foo():\n bar()", Some(""), "def foo():\n bar()", true); + test(2, 3, " def foo():\n bar()", None, "def foo():\n bar()", true); } #[test] diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index 92a2333d..ea06a2e0 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -182,7 +182,13 @@ pub fn is_newline(character: char) -> bool { ) } -/// Whether a string is a valid identifier. +/// Whether a string is a valid unicode identifier. +/// +/// In addition to what is specified in the [Unicode Standard][uax31], we allow: +/// - `_` as a starting character, +/// - `_` and `-` as continuing characters. +/// +/// [uax31]: http://www.unicode.org/reports/tr31/ #[inline] pub fn is_ident(string: &str) -> bool { let mut chars = string.chars(); diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 494a9f0b..1523cd64 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -236,20 +236,19 @@ impl<'s> Tokens<'s> { 'u' if self.s.rest().starts_with("u{") => { self.s.eat_assert('u'); self.s.eat_assert('{'); - let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into(); - + let sequence = self.s.eat_while(|c| c.is_ascii_alphanumeric()); if self.s.eat_if('}') { if let Some(c) = resolve_hex(&sequence) { NodeKind::UnicodeEscape(c) } else { NodeKind::Error( - ErrorPosition::Full, + ErrorPos::Full, "invalid unicode escape sequence".into(), ) } } else { NodeKind::Error( - ErrorPosition::End, + ErrorPos::End, "expected closing brace".into(), ) } @@ -348,7 +347,7 @@ impl<'s> Tokens<'s> { let noun = if remaining == 1 { "backtick" } else { "backticks" }; NodeKind::Error( - ErrorPosition::End, + ErrorPos::End, if found == 0 { format!("expected {} {}", remaining, noun) } else { @@ -396,7 +395,7 @@ impl<'s> Tokens<'s> { })) } else { NodeKind::Error( - ErrorPosition::End, + ErrorPos::End, if !display || (!escaped && dollar) { "expected closing dollar sign" } else { @@ -487,7 +486,7 @@ impl<'s> Tokens<'s> { if self.s.eat_if('"') { NodeKind::Str(string) } else { - NodeKind::Error(ErrorPosition::End, "expected quote".into()) + NodeKind::Error(ErrorPos::End, "expected quote".into()) } } @@ -555,7 +554,7 @@ mod tests { use super::*; - use ErrorPosition::*; + use ErrorPos::*; use NodeKind::*; use Option::None; use TokenMode::{Code, Markup}; @@ -564,7 +563,7 @@ mod tests { NodeKind::UnicodeEscape(c) } - fn Error(pos: ErrorPosition, message: &str) -> NodeKind { + fn Error(pos: ErrorPos, message: &str) -> NodeKind { NodeKind::Error(pos, message.into()) } @@ -881,7 +880,7 @@ mod tests { // Test more backticks. t!(Markup: "``nope``" => Raw("", None, 1, false), Text("nope"), Raw("", None, 1, false)); - t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, false)); + t!(Markup: "````🚀````" => Raw("", None, 4, false)); t!(Markup[""]: "`````👩‍🚀````noend" => Error(End, "expected 5 backticks")); t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, false), Raw("", None, 1, false)); } -- cgit v1.2.3 From 75fffc1f9b6ef8bf258b2b1845a4ba74a0f5f2c1 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sun, 7 Nov 2021 23:31:42 +0100 Subject: Fine-grained capturing --- src/parse/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/parse') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 505482ca..78e4f896 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -418,7 +418,7 @@ fn item(p: &mut Parser) -> ParseResult { if p.at(&NodeKind::Colon) { marker.perform(p, NodeKind::Named, |p| { - if matches!(marker.peek(p).unwrap().kind(), &NodeKind::Ident(_)) { + if let Some(NodeKind::Ident(_)) = marker.peek(p).map(|c| c.kind()) { p.eat(); expr(p) } else { -- cgit v1.2.3 From 38c5c362419c5eee7a4fdc0b43d3a9dfb339a6d2 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Mon, 8 Nov 2021 12:13:32 +0100 Subject: Final touches --- src/parse/mod.rs | 28 ++++++++++++++-------------- src/parse/parser.rs | 30 +++++++++++++++--------------- src/parse/resolve.rs | 12 +++++------- src/parse/tokens.rs | 52 +++++++++++++++++++++++++--------------------------- 4 files changed, 59 insertions(+), 63 deletions(-) (limited to 'src/parse') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 78e4f896..f9c0049f 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -16,8 +16,8 @@ use crate::syntax::ast::{Associativity, BinOp, UnOp}; use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind}; /// Parse a source file. -pub fn parse(source: &str) -> Rc { - let mut p = Parser::new(source); +pub fn parse(src: &str) -> Rc { + let mut p = Parser::new(src); markup(&mut p); match p.finish().into_iter().next() { Some(Green::Node(node)) => node, @@ -93,16 +93,17 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { | NodeKind::Strong | NodeKind::Linebreak | NodeKind::Raw(_) + | NodeKind::Math(_) | NodeKind::UnicodeEscape(_) => { p.eat(); } NodeKind::Eq if *at_start => heading(p), - NodeKind::ListBullet if *at_start => list_node(p), + NodeKind::Minus if *at_start => list_node(p), NodeKind::EnumNumbering(_) if *at_start => enum_node(p), // Line-based markup that is not currently at the start of the line. - NodeKind::Eq | NodeKind::ListBullet | NodeKind::EnumNumbering(_) => { + NodeKind::Eq | NodeKind::Minus | NodeKind::EnumNumbering(_) => { p.convert(NodeKind::Text(p.peek_src().into())); } @@ -149,7 +150,7 @@ fn heading(p: &mut Parser) { /// Parse a single list item. fn list_node(p: &mut Parser) { p.perform(NodeKind::List, |p| { - p.eat_assert(&NodeKind::ListBullet); + p.eat_assert(&NodeKind::Minus); let column = p.column(p.prev_end()); markup_indented(p, column); }); @@ -193,10 +194,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { loop { // Exclamation mark, parenthesis or bracket means this is a function // call. - if matches!( - p.peek_direct(), - Some(NodeKind::LeftParen | NodeKind::LeftBracket) - ) { + if let Some(NodeKind::LeftParen | NodeKind::LeftBracket) = p.peek_direct() { call(p, marker)?; continue; } @@ -241,7 +239,6 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { match p.peek() { // Things that start with an identifier. Some(NodeKind::Ident(_)) => { - // Start closure params. let marker = p.marker(); p.eat(); @@ -364,9 +361,10 @@ enum CollectionKind { /// Returns the length of the collection and whether the literal contained any /// commas. fn collection(p: &mut Parser) -> (CollectionKind, usize) { - let mut items = 0; let mut kind = CollectionKind::Positional; + let mut items = 0; let mut can_group = true; + let mut error = false; let mut missing_coma: Option = None; while !p.eof() { @@ -393,12 +391,14 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { if p.eat_if(&NodeKind::Comma) { can_group = false; } else { - missing_coma = Some(p.marker()); + missing_coma = Some(p.trivia_start()); } + } else { + error = true; } } - if can_group && items == 1 { + if error || (can_group && items == 1) { kind = CollectionKind::Group; } @@ -467,7 +467,7 @@ fn params(p: &mut Parser, marker: Marker) { NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => Ok(()), NodeKind::Spread if matches!( - x.children().last().map(|x| x.kind()), + x.children().last().map(|child| child.kind()), Some(&NodeKind::Ident(_)) ) => { diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 5ebc2c17..1c4c2a5c 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -52,6 +52,17 @@ impl<'s> Parser<'s> { Marker(self.children.len()) } + /// Create a markup right before the trailing trivia. + pub fn trivia_start(&self) -> Marker { + let count = self + .children + .iter() + .rev() + .take_while(|node| self.is_trivia(node.kind())) + .count(); + Marker(self.children.len() - count) + } + /// Perform a subparse that wraps its result in a node with the given kind. pub fn perform(&mut self, kind: NodeKind, f: F) -> T where @@ -66,7 +77,7 @@ impl<'s> Parser<'s> { // Trailing trivia should not be wrapped into the new node. let idx = self.children.len(); self.children.push(Green::default()); - self.children.extend(children.drain(until ..)); + self.children.extend(children.drain(until.0 ..)); self.children[idx] = GreenNode::with_children(kind, children).into(); } else { self.children.push(GreenNode::with_children(kind, children).into()); @@ -238,7 +249,7 @@ impl<'s> Parser<'s> { // Rescan the peeked token if the mode changed. if rescan { if group_mode == TokenMode::Code { - self.children.truncate(self.trivia_start()); + self.children.truncate(self.trivia_start().0); } self.tokens.jump(self.prev_end()); @@ -290,17 +301,6 @@ impl<'s> Parser<'s> { } } - /// Find the index in the children list where trailing trivia starts. - fn trivia_start(&self) -> usize { - self.children.len() - - self - .children - .iter() - .rev() - .take_while(|node| self.is_trivia(node.kind())) - .count() - } - /// Whether the active group must end at a newline. fn stop_at_newline(&self) -> bool { matches!( @@ -350,7 +350,7 @@ impl Parser<'_> { /// Add an error that the `thing` was expected at the end of the last /// non-trivia token. pub fn expected_at(&mut self, thing: &str) { - Marker(self.trivia_start()).expected(self, thing); + self.trivia_start().expected(self, thing); } } @@ -374,7 +374,7 @@ impl Marker { /// with the given `kind`. pub fn end(self, p: &mut Parser, kind: NodeKind) { let until = p.trivia_start(); - let children = p.children.drain(self.0 .. until).collect(); + let children = p.children.drain(self.0 .. until.0).collect(); p.children .insert(self.0, GreenNode::with_children(kind, children).into()); } diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index 6719f41d..e15ae339 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -1,5 +1,5 @@ use super::{is_ident, is_newline, Scanner}; -use crate::syntax::RawData; +use crate::syntax::ast::RawNode; use crate::util::EcoString; /// Resolve all escape sequences in a string. @@ -46,21 +46,19 @@ pub fn resolve_hex(sequence: &str) -> Option { } /// Resolve the language tag and trims the raw text. -pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawData { +pub fn resolve_raw(column: usize, backticks: usize, text: &str) -> RawNode { if backticks > 1 { let (tag, inner) = split_at_lang_tag(text); let (text, block) = trim_and_split_raw(column, inner); - RawData { + RawNode { lang: is_ident(tag).then(|| tag.into()), text: text.into(), - backticks, block, } } else { - RawData { + RawNode { lang: None, text: split_lines(text).join("\n").into(), - backticks, block: false, } } @@ -181,7 +179,7 @@ mod tests { #[track_caller] fn test( column: usize, - backticks: u8, + backticks: usize, raw: &str, lang: Option<&str>, text: &str, diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 1523cd64..96dfd9d1 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -5,7 +5,8 @@ use super::{ Scanner, }; use crate::geom::{AngularUnit, LengthUnit}; -use crate::syntax::*; +use crate::syntax::ast::{MathNode, RawNode}; +use crate::syntax::{ErrorPos, NodeKind}; use crate::util::EcoString; /// An iterator over the tokens of a string of source code. @@ -26,8 +27,8 @@ pub enum TokenMode { impl<'s> Tokens<'s> { /// Create a new token iterator with the given mode. #[inline] - pub fn new(source: &'s str, mode: TokenMode) -> Self { - Self { s: Scanner::new(source), mode } + pub fn new(src: &'s str, mode: TokenMode) -> Self { + Self { s: Scanner::new(src), mode } } /// Get the current token mode. @@ -254,7 +255,7 @@ impl<'s> Tokens<'s> { } } c if c.is_whitespace() => NodeKind::Linebreak, - _ => NodeKind::Text("\\".into()), + _ => NodeKind::Text('\\'.into()), }, None => NodeKind::Linebreak, } @@ -281,7 +282,7 @@ impl<'s> Tokens<'s> { NodeKind::EnDash } } else if self.s.check_or(true, char::is_whitespace) { - NodeKind::ListBullet + NodeKind::Minus } else { NodeKind::Text("-".into()) } @@ -310,16 +311,15 @@ impl<'s> Tokens<'s> { let column = self.s.column(self.s.index() - 1); let mut backticks = 1; - while self.s.eat_if('`') && backticks < u8::MAX { + while self.s.eat_if('`') { backticks += 1; } // Special case for empty inline block. if backticks == 2 { - return NodeKind::Raw(Rc::new(RawData { + return NodeKind::Raw(Rc::new(RawNode { text: EcoString::new(), lang: None, - backticks: 1, block: false, })); } @@ -389,7 +389,7 @@ impl<'s> Tokens<'s> { }; if terminated { - NodeKind::Math(Rc::new(MathData { + NodeKind::Math(Rc::new(MathNode { formula: self.s.get(start .. end).into(), display, })) @@ -429,9 +429,7 @@ impl<'s> Tokens<'s> { // Read the exponent. if self.s.eat_if('e') || self.s.eat_if('E') { - if !self.s.eat_if('+') { - self.s.eat_if('-'); - } + let _ = self.s.eat_if('+') || self.s.eat_if('-'); self.s.eat_while(|c| c.is_ascii_digit()); } @@ -483,6 +481,7 @@ impl<'s> Tokens<'s> { false } })); + if self.s.eat_if('"') { NodeKind::Str(string) } else { @@ -567,17 +566,16 @@ mod tests { NodeKind::Error(pos, message.into()) } - fn Raw(text: &str, lang: Option<&str>, backticks_left: u8, block: bool) -> NodeKind { - NodeKind::Raw(Rc::new(RawData { + fn Raw(text: &str, lang: Option<&str>, block: bool) -> NodeKind { + NodeKind::Raw(Rc::new(RawNode { text: text.into(), lang: lang.map(Into::into), - backticks: backticks_left, block, })) } fn Math(formula: &str, display: bool) -> NodeKind { - NodeKind::Math(Rc::new(MathData { formula: formula.into(), display })) + NodeKind::Math(Rc::new(MathNode { formula: formula.into(), display })) } fn Str(string: &str) -> NodeKind { @@ -655,13 +653,13 @@ mod tests { ]; // Test with each applicable suffix. - for (block, mode, suffix, token) in suffixes { + for &(block, mode, suffix, ref token) in suffixes { let src = $src; #[allow(unused_variables)] let blocks = BLOCKS; $(let blocks = $blocks;)? assert!(!blocks.contains(|c| !BLOCKS.contains(c))); - if (mode.is_none() || mode == &Some($mode)) && blocks.contains(*block) { + if (mode.is_none() || mode == Some($mode)) && blocks.contains(block) { t!(@$mode: format!("{}{}", src, suffix) => $($token,)* token); } } @@ -790,7 +788,7 @@ mod tests { t!(Markup: "~" => NonBreakingSpace); t!(Markup[" "]: r"\" => Linebreak); t!(Markup["a "]: r"a--" => Text("a"), EnDash); - t!(Markup["a1/"]: "- " => ListBullet, Space(0)); + t!(Markup["a1/"]: "- " => Minus, Space(0)); t!(Markup[" "]: "." => EnumNumbering(None)); t!(Markup[" "]: "1." => EnumNumbering(Some(1))); t!(Markup[" "]: "1.a" => Text("1."), Text("a")); @@ -867,22 +865,22 @@ mod tests { #[test] fn test_tokenize_raw_blocks() { // Test basic raw block. - t!(Markup: "``" => Raw("", None, 1, false)); - t!(Markup: "`raw`" => Raw("raw", None, 1, false)); + t!(Markup: "``" => Raw("", None, false)); + t!(Markup: "`raw`" => Raw("raw", None, false)); t!(Markup[""]: "`]" => Error(End, "expected 1 backtick")); // Test special symbols in raw block. - t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, false)); - t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, false), Error(End, "expected 1 backtick")); + t!(Markup: "`[brackets]`" => Raw("[brackets]", None, false)); + t!(Markup[""]: r"`\`` " => Raw(r"\", None, false), Error(End, "expected 1 backtick")); // Test separated closing backticks. - t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, false)); + t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), false)); // Test more backticks. - t!(Markup: "``nope``" => Raw("", None, 1, false), Text("nope"), Raw("", None, 1, false)); - t!(Markup: "````🚀````" => Raw("", None, 4, false)); + t!(Markup: "``nope``" => Raw("", None, false), Text("nope"), Raw("", None, false)); + t!(Markup: "````🚀````" => Raw("", None, false)); t!(Markup[""]: "`````👩‍🚀````noend" => Error(End, "expected 5 backticks")); - t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, false), Raw("", None, 1, false)); + t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), false), Raw("", None, false)); } #[test] -- cgit v1.2.3