From 84cdc85ca7494368e7ce2039fcef06ac2d3bd2ed Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 17 Feb 2021 23:07:28 +0100 Subject: =?UTF-8?q?Refresh=20parser=20=F0=9F=8C=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/parse/collection.rs | 2 +- src/parse/mod.rs | 231 +++++++++++++++++++----------------------------- src/parse/parser.rs | 132 +++++++++++++++------------ src/pretty.rs | 107 ++++++++-------------- src/syntax/expr.rs | 11 +++ src/syntax/token.rs | 3 - 6 files changed, 219 insertions(+), 267 deletions(-) (limited to 'src') diff --git a/src/parse/collection.rs b/src/parse/collection.rs index 7ffc4539..ab358f76 100644 --- a/src/parse/collection.rs +++ b/src/parse/collection.rs @@ -4,7 +4,7 @@ use super::*; pub fn args(p: &mut Parser) -> ExprArgs { let start = p.start(); let items = collection(p, vec![]); - ExprArgs { span: p.span_from(start), items } + ExprArgs { span: p.span(start), items } } /// Parse a parenthesized group, which can be either of: diff --git a/src/parse/mod.rs b/src/parse/mod.rs index e9cf2a60..1d3b8be7 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -70,8 +70,8 @@ fn node(p: &mut Parser, at_start: &mut bool) -> Option { Token::Raw(t) => raw(p, t), Token::UnicodeEscape(t) => Node::Text(unicode_escape(p, t)), - // Keywords. - Token::Let | Token::If | Token::For => { + // Hashtag + keyword / identifier. + Token::Ident(_) | Token::Let | Token::If | Token::For => { *at_start = false; let stmt = token == Token::Let; let group = if stmt { Group::Stmt } else { Group::Expr }; @@ -100,12 +100,6 @@ fn node(p: &mut Parser, at_start: &mut bool) -> Option { return Some(Node::Expr(template(p))); } - // Bracket function. - Token::HashBracket => { - *at_start = false; - return Some(Node::Expr(bracket_call(p)?)); - } - // Comments. Token::LineComment(_) | Token::BlockComment(_) => { p.eat(); @@ -125,7 +119,7 @@ fn node(p: &mut Parser, at_start: &mut bool) -> Option { /// Parse a heading. fn heading(p: &mut Parser) -> Node { let start = p.start(); - p.assert(&[Token::Eq]); + p.assert(Token::Eq); // Count depth. let mut level: usize = 0; @@ -174,108 +168,6 @@ fn unicode_escape(p: &mut Parser, token: TokenUnicodeEscape) -> String { text } -/// Parse a bracketed function call. -fn bracket_call(p: &mut Parser) -> Option { - p.start_group(Group::Bracket, TokenMode::Code); - - // One header is guaranteed, but there may be more (through chaining). - let mut outer = vec![]; - let mut inner = bracket_subheader(p); - while p.eat_if(Token::Pipe) { - if let Some(new) = bracket_subheader(p) { - outer.extend(inner); - inner = Some(new); - } - } - - p.end_group(); - - let body = match p.peek() { - Some(Token::LeftBracket) => Some(bracket_body(p)), - _ => None, - }; - - let mut inner = inner?; - if let Some(body) = body { - inner.span.expand(body.span()); - inner.args.items.push(ExprArg::Pos(body)); - } - - while let Some(mut top) = outer.pop() { - top.args.items.push(ExprArg::Pos(Expr::Call(inner))); - inner = top; - } - - Some(Expr::Call(inner)) -} - -/// Parse one subheader of a bracketed function call. -fn bracket_subheader(p: &mut Parser) -> Option { - p.start_group(Group::Subheader, TokenMode::Code); - let name = ident(p); - let args = args(p); - let span = p.end_group(); - Some(ExprCall { - span, - callee: Box::new(Expr::Ident(name?)), - args, - }) -} - -/// Parse the body of a bracketed function call. -fn bracket_body(p: &mut Parser) -> Expr { - p.start_group(Group::Bracket, TokenMode::Markup); - let tree = Rc::new(tree(p)); - let span = p.end_group(); - Expr::Template(ExprTemplate { span, tree }) -} - -/// Parse an expression. -fn expr(p: &mut Parser) -> Option { - expr_with(p, 0) -} - -/// Parse an expression with operators having at least the minimum precedence. -fn expr_with(p: &mut Parser, min_prec: usize) -> Option { - let start = p.start(); - let mut lhs = match p.eat_map(UnOp::from_token) { - Some(op) => { - let prec = op.precedence(); - let expr = Box::new(expr_with(p, prec)?); - Expr::Unary(ExprUnary { span: p.span_from(start), op, expr }) - } - None => primary(p)?, - }; - - loop { - let op = match p.peek().and_then(BinOp::from_token) { - Some(binop) => binop, - None => break, - }; - - let mut prec = op.precedence(); - if prec < min_prec { - break; - } - - p.eat(); - match op.associativity() { - Associativity::Left => prec += 1, - Associativity::Right => {} - } - - let rhs = match expr_with(p, prec) { - Some(rhs) => Box::new(rhs), - None => break, - }; - - let span = lhs.span().join(rhs.span()); - lhs = Expr::Binary(ExprBinary { span, lhs: Box::new(lhs), op, rhs }); - } - - Some(lhs) -} - /// Parse a primary expression. fn primary(p: &mut Parser) -> Option { if let Some(expr) = literal(p) { @@ -289,10 +181,10 @@ fn primary(p: &mut Parser) -> Option { span: p.eat_span(), string: string.into(), }; - if p.peek() == Some(Token::LeftParen) { - Some(paren_call(p, ident)) - } else { - Some(Expr::Ident(ident)) + + match p.peek_direct() { + Some(Token::LeftParen) | Some(Token::LeftBracket) => Some(call(p, ident)), + _ => Some(Expr::Ident(ident)), } } @@ -304,7 +196,6 @@ fn primary(p: &mut Parser) -> Option { // Structures. Some(Token::LeftBrace) => block(p, true), Some(Token::LeftBracket) => Some(template(p)), - Some(Token::HashBracket) => bracket_call(p), Some(Token::LeftParen) => Some(parenthesized(p)), // Nothing. @@ -327,7 +218,12 @@ fn literal(p: &mut Parser) -> Option { Token::Angle(val, unit) => LitKind::Angle(val, unit), Token::Percent(p) => LitKind::Percent(p), Token::Color(color) => LitKind::Color(color), - Token::Str(token) => LitKind::Str(string(p, token)), + Token::Str(token) => LitKind::Str({ + if !token.terminated { + p.expected_at("quote", p.peek_span().end); + } + resolve::resolve_string(token.string) + }), _ => return None, }; Some(Expr::Lit(Lit { span: p.eat_span(), kind })) @@ -360,30 +256,83 @@ fn block(p: &mut Parser, scopes: bool) -> Option { Some(Expr::Block(ExprBlock { span, exprs, scoping: scopes })) } -/// Parse a parenthesized function call. -fn paren_call(p: &mut Parser, name: Ident) -> Expr { - p.start_group(Group::Paren, TokenMode::Code); - let args = args(p); - p.end_group(); +/// Parse an expression. +fn expr(p: &mut Parser) -> Option { + expr_with(p, 0) +} + +/// Parse an expression with operators having at least the minimum precedence. +fn expr_with(p: &mut Parser, min_prec: usize) -> Option { + let start = p.start(); + let mut lhs = match p.eat_map(UnOp::from_token) { + Some(op) => { + let prec = op.precedence(); + let expr = Box::new(expr_with(p, prec)?); + Expr::Unary(ExprUnary { span: p.span(start), op, expr }) + } + None => primary(p)?, + }; + + loop { + let op = match p.peek().and_then(BinOp::from_token) { + Some(binop) => binop, + None => break, + }; + + let mut prec = op.precedence(); + if prec < min_prec { + break; + } + + p.eat(); + match op.associativity() { + Associativity::Left => prec += 1, + Associativity::Right => {} + } + + let rhs = match expr_with(p, prec) { + Some(rhs) => Box::new(rhs), + None => break, + }; + + let span = lhs.span().join(rhs.span()); + lhs = Expr::Binary(ExprBinary { span, lhs: Box::new(lhs), op, rhs }); + } + + Some(lhs) +} + +/// Parse a function call. +fn call(p: &mut Parser, name: Ident) -> Expr { + let mut args = match p.peek_direct() { + Some(Token::LeftParen) => { + p.start_group(Group::Paren, TokenMode::Code); + let args = args(p); + p.end_group(); + args + } + _ => ExprArgs { + span: Span::at(name.span.end), + items: vec![], + }, + }; + + if p.peek_direct() == Some(Token::LeftBracket) { + let body = template(p); + args.items.push(ExprArg::Pos(body)); + } + Expr::Call(ExprCall { - span: p.span_from(name.span.start), + span: p.span(name.span.start), callee: Box::new(Expr::Ident(name)), args, }) } -/// Parse a string. -fn string(p: &mut Parser, token: TokenStr) -> String { - if !token.terminated { - p.expected_at("quote", p.peek_span().end); - } - resolve::resolve_string(token.string) -} - /// Parse a let expression. fn expr_let(p: &mut Parser) -> Option { let start = p.start(); - p.assert(&[Token::Let]); + p.assert(Token::Let); let mut expr_let = None; if let Some(binding) = ident(p) { @@ -393,7 +342,7 @@ fn expr_let(p: &mut Parser) -> Option { } expr_let = Some(Expr::Let(ExprLet { - span: p.span_from(start), + span: p.span(start), binding, init: init.map(Box::new), })) @@ -405,18 +354,24 @@ fn expr_let(p: &mut Parser) -> Option { /// Parse an if expresion. fn expr_if(p: &mut Parser) -> Option { let start = p.start(); - p.assert(&[Token::If]); + p.assert(Token::If); let mut expr_if = None; if let Some(condition) = expr(p) { if let Some(if_body) = body(p) { let mut else_body = None; - if p.eat_if(Token::Else) { + + // We are in code mode but still want to react to `#else` if the + // outer mode is markup. + if match p.outer_mode() { + TokenMode::Markup => p.eat_if(Token::Invalid("#else")), + TokenMode::Code => p.eat_if(Token::Else), + } { else_body = body(p); } expr_if = Some(Expr::If(ExprIf { - span: p.span_from(start), + span: p.span(start), condition: Box::new(condition), if_body: Box::new(if_body), else_body: else_body.map(Box::new), @@ -430,7 +385,7 @@ fn expr_if(p: &mut Parser) -> Option { /// Parse a for expression. fn expr_for(p: &mut Parser) -> Option { let start = p.start(); - p.assert(&[Token::For]); + p.assert(Token::For); let mut expr_for = None; if let Some(pattern) = for_pattern(p) { @@ -438,7 +393,7 @@ fn expr_for(p: &mut Parser) -> Option { if let Some(iter) = expr(p) { if let Some(body) = body(p) { expr_for = Some(Expr::For(ExprFor { - span: p.span_from(start), + span: p.span(start), pattern, iter: Box::new(iter), body: Box::new(body), diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 7c660182..5d390bc1 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -10,6 +10,8 @@ pub struct Parser<'s> { pub diags: DiagSet, /// An iterator over the source tokens. tokens: Tokens<'s>, + /// The stack of open groups. + groups: Vec, /// The next token. next: Option>, /// The peeked token. @@ -19,21 +21,20 @@ pub struct Parser<'s> { next_start: Pos, /// The end position of the last (non-whitespace if in code mode) token. last_end: Pos, - /// The stack of open groups. - groups: Vec, } /// A logical group of tokens, e.g. `[...]`. +#[derive(Debug, Copy, Clone)] struct GroupEntry { /// The start position of the group. Used by `Parser::end_group` to return /// The group's full span. - start: Pos, + pub start: Pos, /// The kind of group this is. This decides which tokens will end the group. - /// For example, a [`GroupKind::Paren`] will be ended by + /// For example, a [`Group::Paren`] will be ended by /// [`Token::RightParen`]. - kind: Group, + pub kind: Group, /// The mode the parser was in _before_ the group started. - prev_mode: TokenMode, + pub outer_mode: TokenMode, } /// A group, confined by optional start and end delimiters. @@ -60,13 +61,13 @@ impl<'s> Parser<'s> { let mut tokens = Tokens::new(src, TokenMode::Markup); let next = tokens.next(); Self { - tokens, + diags: DiagSet::new(), next, + tokens, + last_end: Pos::ZERO, peeked: next, next_start: Pos::ZERO, - last_end: Pos::ZERO, groups: vec![], - diags: DiagSet::new(), } } @@ -118,16 +119,16 @@ impl<'s> Parser<'s> { self.groups.push(GroupEntry { start: self.next_start, kind, - prev_mode: self.tokens.mode(), + outer_mode: self.tokens.mode(), }); self.tokens.set_mode(mode); self.repeek(); match kind { - Group::Paren => self.assert(&[Token::LeftParen]), - Group::Bracket => self.assert(&[Token::HashBracket, Token::LeftBracket]), - Group::Brace => self.assert(&[Token::LeftBrace]), + Group::Paren => self.assert(Token::LeftParen), + Group::Bracket => self.assert(Token::LeftBracket), + Group::Brace => self.assert(Token::LeftBrace), Group::Subheader => {} Group::Stmt => {} Group::Expr => {} @@ -141,7 +142,7 @@ impl<'s> Parser<'s> { pub fn end_group(&mut self) -> Span { let prev_mode = self.tokens.mode(); let group = self.groups.pop().expect("no started group"); - self.tokens.set_mode(group.prev_mode); + self.tokens.set_mode(group.outer_mode); self.repeek(); let mut rescan = self.tokens.mode() != prev_mode; @@ -173,6 +174,62 @@ impl<'s> Parser<'s> { Span::new(group.start, self.last_end) } + /// The tokenization mode outside of the current group. + /// + /// For example, this would be [`Markup`] if we are in a [`Code`] group that + /// is embedded in a [`Markup`] group. + /// + /// [`Markup`]: TokenMode::Markup + /// [`Code`]: TokenMode::Code + pub fn outer_mode(&mut self) -> TokenMode { + self.groups.last().map_or(TokenMode::Markup, |group| group.outer_mode) + } + + /// Whether the end of the source string or group is reached. + pub fn eof(&self) -> bool { + self.peek().is_none() + } + + /// Peek at the next token without consuming it. + pub fn peek(&self) -> Option> { + self.peeked + } + + /// Peek at the next token if it follows immediately after the last one + /// without any whitespace in between. + pub fn peek_direct(&self) -> Option> { + if self.next_start == self.last_end { + self.peeked + } else { + None + } + } + + /// Peek at the span of the next token. + /// + /// Has length zero if `peek()` returns `None`. + pub fn peek_span(&self) -> Span { + Span::new( + self.next_start, + if self.eof() { self.next_start } else { self.tokens.pos() }, + ) + } + + /// Peek at the source of the next token. + pub fn peek_src(&self) -> &'s str { + self.get(self.peek_span()) + } + + /// Checks whether the next token fulfills a condition. + /// + /// Returns `false` if there is no next token. + pub fn check(&self, f: F) -> bool + where + F: FnOnce(Token<'s>) -> bool, + { + self.peek().map_or(false, f) + } + /// Consume the next token. pub fn eat(&mut self) -> Option> { let token = self.peek()?; @@ -210,8 +267,8 @@ impl<'s> Parser<'s> { Span::new(start, self.last_end) } - /// Consume the next token if it is the given one and produce an error if - /// not. + /// Consume the next token if it is the given one and produce a diagnostic + /// if not. pub fn expect(&mut self, t: Token) -> bool { let eaten = self.eat_if(t); if !eaten { @@ -221,9 +278,9 @@ impl<'s> Parser<'s> { } /// Consume the next token, debug-asserting that it is one of the given ones. - pub fn assert(&mut self, ts: &[Token]) { + pub fn assert(&mut self, t: Token) { let next = self.eat(); - debug_assert!(next.map_or(false, |n| ts.contains(&n))); + debug_assert_eq!(next, Some(t)); } /// Skip whitespace and comment tokens. @@ -238,41 +295,6 @@ impl<'s> Parser<'s> { } } - /// Peek at the next token without consuming it. - pub fn peek(&self) -> Option> { - self.peeked - } - - /// Peek at the span of the next token. - /// - /// Has length zero if `peek()` returns `None`. - pub fn peek_span(&self) -> Span { - Span::new( - self.next_start, - if self.eof() { self.next_start } else { self.tokens.pos() }, - ) - } - - /// Peek at the source of the next token. - pub fn peek_src(&self) -> &'s str { - self.get(self.peek_span()) - } - - /// Checks whether the next token fulfills a condition. - /// - /// Returns `false` if there is no next token. - pub fn check(&self, f: F) -> bool - where - F: FnOnce(Token<'s>) -> bool, - { - self.peek().map_or(false, f) - } - - /// Whether the end of the source string or group is reached. - pub fn eof(&self) -> bool { - self.peek().is_none() - } - /// The position at which the next token starts. pub fn start(&self) -> Pos { self.next_start @@ -285,8 +307,8 @@ impl<'s> Parser<'s> { self.last_end } - /// The span from - pub fn span_from(&self, start: Pos) -> Span { + /// The span from `start` to the end of the last token. + pub fn span(&self, start: Pos) -> Span { Span::new(start, self.last_end) } diff --git a/src/pretty.rs b/src/pretty.rs index e040d3ae..2ed6e80d 100644 --- a/src/pretty.rs +++ b/src/pretty.rs @@ -127,10 +127,10 @@ impl PrettyWithMap for Node { if let Some(map) = map { let value = &map[&(expr as *const _)]; value.pretty(p); - } else if let Expr::Call(call) = expr { - // Format bracket functions appropriately. - pretty_bracketed(call, p, false) } else { + if expr.has_short_form() { + p.push('#'); + } expr.pretty(p); } } @@ -287,13 +287,9 @@ impl Pretty for Named { impl Pretty for ExprTemplate { fn pretty(&self, p: &mut Printer) { - if let [Node::Expr(Expr::Call(call))] = self.tree.as_slice() { - pretty_bracketed(call, p, false); - } else { - p.push('['); - self.tree.pretty_with_map(p, None); - p.push(']'); - } + p.push('['); + self.tree.pretty_with_map(p, None); + p.push(']'); } } @@ -354,51 +350,25 @@ impl Pretty for BinOp { impl Pretty for ExprCall { fn pretty(&self, p: &mut Printer) { self.callee.pretty(p); - p.push('('); - self.args.pretty(p); - p.push(')'); - } -} - -/// Pretty print a bracket function, with body or chaining when possible. -pub fn pretty_bracketed(call: &ExprCall, p: &mut Printer, chained: bool) { - if chained { - p.push_str(" | "); - } else { - p.push_str("#["); - } - - // Function name. - call.callee.pretty(p); - let mut write_args = |items: &[ExprArg]| { - if !items.is_empty() { - p.push(' '); + let mut write_args = |items: &[ExprArg]| { + p.push('('); p.join(items, ", ", |item, p| item.pretty(p)); - } - }; - - match call.args.items.as_slice() { - // This can written as a chain. - // - // Example: Transforms "#[v][[f]]" => "#[v | f]". - [head @ .., ExprArg::Pos(Expr::Call(call))] => { - write_args(head); - pretty_bracketed(call, p, true); - } - - // This can be written with a body. - // - // Example: Transforms "#[v [Hi]]" => "#[v][Hi]". - [head @ .., ExprArg::Pos(Expr::Template(template))] => { - write_args(head); - p.push(']'); - template.pretty(p); - } + p.push(')'); + }; + + match self.args.items.as_slice() { + // This can be moved behind the arguments. + // + // Example: Transforms "#v(a, [b])" => "#v(a)[b]". + [head @ .., ExprArg::Pos(Expr::Template(template))] => { + if !head.is_empty() { + write_args(head); + } + template.pretty(p); + } - items => { - write_args(items); - p.push(']'); + items => write_args(items), } } } @@ -420,7 +390,7 @@ impl Pretty for ExprArg { impl Pretty for ExprLet { fn pretty(&self, p: &mut Printer) { - p.push_str("#let "); + p.push_str("let "); self.binding.pretty(p); if let Some(init) = &self.init { p.push_str(" = "); @@ -431,12 +401,13 @@ impl Pretty for ExprLet { impl Pretty for ExprIf { fn pretty(&self, p: &mut Printer) { - p.push_str("#if "); + p.push_str("if "); self.condition.pretty(p); p.push(' '); self.if_body.pretty(p); if let Some(expr) = &self.else_body { - p.push_str(" #else "); + // FIXME: Hashtag in markup. + p.push_str(" else "); expr.pretty(p); } } @@ -444,9 +415,9 @@ impl Pretty for ExprIf { impl Pretty for ExprFor { fn pretty(&self, p: &mut Printer) { - p.push_str("#for "); + p.push_str("for "); self.pattern.pretty(p); - p.push_str(" #in "); + p.push_str(" in "); self.iter.pretty(p); p.push(' '); self.body.pretty(p); @@ -728,7 +699,7 @@ mod tests { // Blocks. roundtrip("{}"); roundtrip("{1}"); - roundtrip("{ #let x = 1; x += 2; x + 1 }"); + roundtrip("{ let x = 1; x += 2; x + 1 }"); roundtrip("[{}]"); // Operators. @@ -736,24 +707,20 @@ mod tests { roundtrip("{not true}"); roundtrip("{1 + 3}"); - // Parenthesized calls. + // Function calls. roundtrip("{v()}"); roundtrip("{v(1)}"); roundtrip("{v(a: 1, b)}"); - - // Bracket functions. - roundtrip("#[v]"); - roundtrip("#[v 1]"); - roundtrip("#[v 1, 2][*Ok*]"); - roundtrip("#[v 1 | f 2]"); - test_parse("{#[v]}", "{v()}"); - test_parse("#[v 1, #[f 2]]", "#[v 1 | f 2]"); + roundtrip("#v()"); + roundtrip("#v(1)"); + roundtrip("#v(1, 2)[*Ok*]"); + roundtrip("#v(1, f[2])"); // Keywords. roundtrip("#let x = 1 + 2"); - roundtrip("#if x [y] #else [z]"); - roundtrip("#for x #in y {z}"); - roundtrip("#for k, x #in y {z}"); + roundtrip("#for x in y {z}"); + roundtrip("#for k, x in y {z}"); + test_parse("#if x [y] #else [z]", "#if x [y] else [z]"); } #[test] diff --git a/src/syntax/expr.rs b/src/syntax/expr.rs index d18d3404..5b37bb56 100644 --- a/src/syntax/expr.rs +++ b/src/syntax/expr.rs @@ -54,6 +54,17 @@ impl Expr { Self::For(v) => v.span, } } + + /// Whether the expression can be shorten in markup with a hashtag. + pub fn has_short_form(&self) -> bool { + matches!(self, + Expr::Ident(_) + | Expr::Call(_) + | Expr::Let(_) + | Expr::If(_) + | Expr::For(_) + ) + } } /// A literal. diff --git a/src/syntax/token.rs b/src/syntax/token.rs index fe429e24..e57620af 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -6,8 +6,6 @@ use crate::geom::{AngularUnit, LengthUnit}; pub enum Token<'s> { /// A left square bracket: `[`. LeftBracket, - /// A hashtag followed by a left square bracket: `#[`. - HashBracket, /// A right square bracket: `]`. RightBracket, /// A left curly brace: `{`. @@ -191,7 +189,6 @@ impl<'s> Token<'s> { pub fn name(self) -> &'static str { match self { Self::LeftBracket => "opening bracket", - Self::HashBracket => "start of bracket function", Self::RightBracket => "closing bracket", Self::LeftBrace => "opening brace", Self::RightBrace => "closing brace", -- cgit v1.2.3