diff options
| author | Laurenz <laurmaedje@gmail.com> | 2021-11-06 12:12:02 +0100 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2021-11-06 15:49:39 +0100 |
| commit | 41bdafb5785dd85d20a3e79900b18e0010f6d71d (patch) | |
| tree | b2466b87e7da2b99256d7d1e08a644185e5879f3 /src | |
| parent | 515fe89c5ea94e6bcdcfe387d006776d31ad3646 (diff) | |
Faster parser
Diffstat (limited to 'src')
| -rw-r--r-- | src/parse/mod.rs | 61 | ||||
| -rw-r--r-- | src/parse/parser.rs | 506 | ||||
| -rw-r--r-- | src/parse/resolve.rs | 4 | ||||
| -rw-r--r-- | src/parse/tokens.rs | 14 | ||||
| -rw-r--r-- | src/syntax/mod.rs | 75 |
5 files changed, 292 insertions, 368 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 90be73f9..aa616fdf 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -13,13 +13,16 @@ pub use tokens::*; use std::rc::Rc; use crate::syntax::ast::{Associativity, BinOp, UnOp}; -use crate::syntax::{ErrorPosition, GreenNode, NodeKind}; +use crate::syntax::{ErrorPosition, Green, GreenNode, NodeKind}; /// Parse a source file. pub fn parse(source: &str) -> Rc<GreenNode> { let mut p = Parser::new(source); markup(&mut p); - p.finish() + match p.finish().into_iter().next() { + Some(Green::Node(node)) => node, + _ => unreachable!(), + } } /// Parse markup. @@ -36,7 +39,7 @@ fn markup_indented(p: &mut Parser, column: usize) { }); markup_while(p, false, &mut |p| match p.peek() { - Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.next_end()) >= column, + Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column, _ => true, }) } @@ -114,7 +117,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { let stmt = matches!(token, NodeKind::Let | NodeKind::Import); let group = if stmt { Group::Stmt } else { Group::Expr }; - p.start_group(group, TokenMode::Code); + p.start_group(group); let res = expr_prec(p, true, 0); if stmt && res.is_ok() && !p.eof() { p.expected_at("semicolon or line break"); @@ -177,8 +180,9 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { let marker = p.marker(); // Start the unary expression. - match p.eat_map(|x| UnOp::from_token(&x)) { + match p.peek().and_then(UnOp::from_token) { Some(op) => { + p.eat(); let prec = op.precedence(); expr_prec(p, atomic, prec)?; marker.end(p, NodeKind::Unary); @@ -201,7 +205,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { break; } - if p.peek() == Some(&NodeKind::With) { + if p.at(&NodeKind::With) { with_expr(p, &marker)?; } @@ -242,7 +246,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { p.eat(); // Arrow means this is a closure's lone parameter. - if !atomic && p.peek() == Some(&NodeKind::Arrow) { + if !atomic && p.at(&NodeKind::Arrow) { marker.end(p, NodeKind::ClosureParams); p.eat(); marker.perform(p, NodeKind::Closure, expr) @@ -315,7 +319,7 @@ fn literal(p: &mut Parser) -> bool { fn parenthesized(p: &mut Parser) -> ParseResult { let marker = p.marker(); - p.start_group(Group::Paren, TokenMode::Code); + p.start_group(Group::Paren); let colon = p.eat_if(&NodeKind::Colon); let kind = collection(p).0; p.end_group(); @@ -327,14 +331,14 @@ fn parenthesized(p: &mut Parser) -> ParseResult { } // Arrow means this is a closure's parameter list. - if p.peek() == Some(&NodeKind::Arrow) { + if p.at(&NodeKind::Arrow) { params(p, &marker, true); marker.end(p, NodeKind::ClosureParams); p.eat_assert(&NodeKind::Arrow); return marker.perform(p, NodeKind::Closure, expr); } - // Find out which kind of collection this is. + // Transform into the identified collection. match kind { CollectionKind::Group => marker.end(p, NodeKind::Group), CollectionKind::Positional => array(p, &marker), @@ -402,7 +406,8 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { (kind, items) } -/// Parse an expression or a named pair. Returns if this is a named pair. +/// Parse an expression or a named pair, returning whether it's a spread or a +/// named pair. fn item(p: &mut Parser) -> ParseResult<NodeKind> { let marker = p.marker(); if p.eat_if(&NodeKind::Dots) { @@ -412,25 +417,24 @@ fn item(p: &mut Parser) -> ParseResult<NodeKind> { expr(p)?; - if p.peek() == Some(&NodeKind::Colon) { + if p.at(&NodeKind::Colon) { marker.perform(p, NodeKind::Named, |p| { if matches!(marker.child_at(p).unwrap().kind(), &NodeKind::Ident(_)) { p.eat(); expr(p) } else { - marker.end( - p, - NodeKind::Error(ErrorPosition::Full, "expected identifier".into()), - ); + let error = + NodeKind::Error(ErrorPosition::Full, "expected identifier".into()); + marker.end(p, error); p.eat(); - expr(p).ok(); Err(()) } })?; + Ok(NodeKind::Named) } else { - Ok(p.last_child().unwrap().kind().clone()) + Ok(NodeKind::None) } } @@ -488,7 +492,7 @@ fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) { // Parse a template block: `[...]`. fn template(p: &mut Parser) { p.perform(NodeKind::Template, |p| { - p.start_group(Group::Bracket, TokenMode::Markup); + p.start_group(Group::Bracket); markup(p); p.end_group(); }); @@ -497,9 +501,9 @@ fn template(p: &mut Parser) { /// Parse a code block: `{...}`. fn block(p: &mut Parser) { p.perform(NodeKind::Block, |p| { - p.start_group(Group::Brace, TokenMode::Code); + p.start_group(Group::Brace); while !p.eof() { - p.start_group(Group::Stmt, TokenMode::Code); + p.start_group(Group::Stmt); if expr(p).is_ok() && !p.eof() { p.expected_at("semicolon or line break"); } @@ -515,7 +519,7 @@ fn block(p: &mut Parser) { /// Parse a function call. fn call(p: &mut Parser, callee: &Marker) -> ParseResult { callee.perform(p, NodeKind::Call, |p| match p.peek_direct() { - Some(NodeKind::LeftParen) | Some(NodeKind::LeftBracket) => { + Some(NodeKind::LeftParen | NodeKind::LeftBracket) => { args(p, true); Ok(()) } @@ -530,7 +534,7 @@ fn call(p: &mut Parser, callee: &Marker) -> ParseResult { fn args(p: &mut Parser, allow_template: bool) { p.perform(NodeKind::CallArgs, |p| { if !allow_template || p.peek_direct() == Some(&NodeKind::LeftParen) { - p.start_group(Group::Paren, TokenMode::Code); + p.start_group(Group::Paren); collection(p); p.end_group(); } @@ -546,7 +550,7 @@ fn with_expr(p: &mut Parser, marker: &Marker) -> ParseResult { marker.perform(p, NodeKind::WithExpr, |p| { p.eat_assert(&NodeKind::With); - if p.peek() == Some(&NodeKind::LeftParen) { + if p.at(&NodeKind::LeftParen) { args(p, false); Ok(()) } else { @@ -564,14 +568,14 @@ fn let_expr(p: &mut Parser) -> ParseResult { let marker = p.marker(); ident(p)?; - if p.peek() == Some(&NodeKind::With) { + if p.at(&NodeKind::With) { with_expr(p, &marker)?; } else { // If a parenthesis follows, this is a function definition. let has_params = p.peek_direct() == Some(&NodeKind::LeftParen); if has_params { p.perform(NodeKind::ClosureParams, |p| { - p.start_group(Group::Paren, TokenMode::Code); + p.start_group(Group::Paren); let marker = p.marker(); collection(p); params(p, &marker, true); @@ -605,7 +609,7 @@ fn if_expr(p: &mut Parser) -> ParseResult { body(p)?; if p.eat_if(&NodeKind::Else) { - if p.peek() == Some(&NodeKind::If) { + if p.at(&NodeKind::If) { if_expr(p)?; } else { body(p)?; @@ -657,7 +661,7 @@ fn import_expr(p: &mut Parser) -> ParseResult { if !p.eat_if(&NodeKind::Star) { // This is the list of identifiers scenario. p.perform(NodeKind::ImportItems, |p| { - p.start_group(Group::Imports, TokenMode::Code); + p.start_group(Group::Imports); let marker = p.marker(); let items = collection(p).1; if items == 0 { @@ -712,6 +716,5 @@ fn body(p: &mut Parser) -> ParseResult { return Err(()); } } - Ok(()) } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 4f181821..5d26ff63 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,5 +1,4 @@ -use std::ops::Range; -use std::rc::Rc; +use std::mem; use super::{TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; @@ -11,88 +10,63 @@ pub(crate) type ParseResult<T = ()> = Result<T, ()>; /// A convenient token-based parser. pub struct Parser<'s> { - /// The parsed file. - src: &'s str, /// An iterator over the source tokens. tokens: Tokens<'s>, + /// Whether we are at the end of the file or of a group. + eof: bool, + /// The current token. + current: Option<NodeKind>, + /// The end byte index of the last (non-whitespace if in code mode) token. + prev_end: usize, + /// The start byte index of the peeked token. + current_start: usize, /// The stack of open groups. groups: Vec<GroupEntry>, - /// The next token. - next: Option<NodeKind>, - /// The peeked token. - /// (Same as `next` except if we are at the end of group, then `None`). - peeked: Option<NodeKind>, - /// The end index of the last (non-whitespace if in code mode) token. - prev_end: usize, - /// The start index of the peeked token. - next_start: usize, - /// A stack of outer children vectors. - stack: Vec<Vec<Green>>, /// The children of the currently built node. children: Vec<Green>, } -/// A logical group of tokens, e.g. `[...]`. -struct GroupEntry { - /// The kind of group this is. This decides which tokens will end the group. - /// For example, a [`Group::Paren`] will be ended by - /// [`Token::RightParen`]. - pub kind: Group, - /// The mode the parser was in _before_ the group started (to which we go - /// back once the group ends). - pub prev_mode: TokenMode, -} - -/// A group, confined by optional start and end delimiters. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Group { - /// A parenthesized group: `(...)`. - Paren, - /// A bracketed group: `[...]`. - Bracket, - /// A curly-braced group: `{...}`. - Brace, - /// A group ended by a semicolon or a line break: `;`, `\n`. - Stmt, - /// A group for a single expression, ended by a line break. - Expr, - /// A group for import items, ended by a semicolon, line break or `from`. - Imports, -} - impl<'s> Parser<'s> { /// Create a new parser for the source string. pub fn new(src: &'s str) -> Self { let mut tokens = Tokens::new(src, TokenMode::Markup); - let next = tokens.next(); + let current = tokens.next(); Self { - src, tokens, - groups: vec![], - next: next.clone(), - peeked: next, + eof: current.is_none(), + current, prev_end: 0, - next_start: 0, - stack: vec![], + current_start: 0, + groups: vec![], children: vec![], } } + /// End the parsing process and return the last child. + pub fn finish(self) -> Vec<Green> { + self.children + } + + /// Create a new marker. + pub fn marker(&mut self) -> Marker { + Marker(self.children.len()) + } + /// Perform a subparse that wraps its result in a node with the given kind. - pub fn perform<T, F>(&mut self, kind: NodeKind, f: F) -> T + pub fn perform<F, T>(&mut self, kind: NodeKind, f: F) -> T where F: FnOnce(&mut Self) -> T, { - let prev = std::mem::take(&mut self.children); + let prev = mem::take(&mut self.children); let output = f(self); - let mut children = std::mem::replace(&mut self.children, prev); + let mut children = mem::replace(&mut self.children, prev); // Trailing trivia should not be wrapped into the new node. let mut remains = vec![]; if self.tokens.mode() == TokenMode::Code { let len = children.len(); for n in (0 .. len).rev() { - if !self.skip_type(&children[n].kind()) { + if !self.is_trivia(&children[n].kind()) { break; } @@ -107,66 +81,36 @@ impl<'s> Parser<'s> { output } - /// Eat and wrap the next token. - pub fn convert(&mut self, kind: NodeKind) { - self.eat(); - self.children.last_mut().unwrap().set_kind(kind); - } - - /// End the current node and undo its existence, inling all accumulated - /// children into its parent. - pub fn lift(&mut self) { - let outer = self.stack.pop().unwrap(); - let children = std::mem::replace(&mut self.children, outer); - self.children.extend(children); + /// Whether the end of the source string or group is reached. + pub fn eof(&self) -> bool { + self.eof } - /// Add an error to the current children list. - fn push_error(&mut self, msg: impl Into<String>) { - self.children.push( - GreenData::new(NodeKind::Error(ErrorPosition::Full, msg.into().into()), 0) - .into(), - ); - } + /// Consume the current token and also trailing trivia if in code mode. + pub fn eat(&mut self) { + self.prev_end = self.tokens.index(); + self.bump(); - /// End the parsing process and return the last child. - pub fn finish(&mut self) -> Rc<GreenNode> { - match self.children.pop().unwrap() { - Green::Node(n) => n, - _ => panic!(), + if self.tokens.mode() == TokenMode::Code { + // Skip whitespace and comments. + while self.current.as_ref().map_or(false, |x| self.is_trivia(x)) { + self.bump(); + } } - } - /// Whether the end of the source string or group is reached. - pub fn eof(&self) -> bool { - self.peek().is_none() + self.repeek(); } - /// Consume the next token if it is the given one. + /// Eat if the current token it is the given one. pub fn eat_if(&mut self, t: &NodeKind) -> bool { - if self.peek() == Some(t) { + let at = self.at(t); + if at { self.eat(); - true - } else { - false } + at } - /// Consume the next token if the closure maps it a to `Some`-variant. - pub fn eat_map<T, F>(&mut self, f: F) -> Option<T> - where - F: FnOnce(&NodeKind) -> Option<T>, - { - let token = self.peek()?; - let mapped = f(token); - if mapped.is_some() { - self.eat(); - } - mapped - } - - /// Consume the next token if it is the given one and produce an error if - /// not. + /// Eat if the current token is the given one and produce an error if not. pub fn eat_expect(&mut self, t: &NodeKind) -> ParseResult { let eaten = self.eat_if(t); if !eaten { @@ -175,14 +119,13 @@ impl<'s> Parser<'s> { if eaten { Ok(()) } else { Err(()) } } - /// Consume the next token, debug-asserting that it is one of the given ones. + /// Eat, debug-asserting that the token is the given one. pub fn eat_assert(&mut self, t: &NodeKind) { - let next = self.peek(); - debug_assert_eq!(next, Some(t)); + debug_assert_eq!(self.peek(), Some(t)); self.eat(); } - /// Consume tokens while the condition is true. + /// Eat tokens while the condition is true. pub fn eat_while<F>(&mut self, mut f: F) where F: FnMut(&NodeKind) -> bool, @@ -192,68 +135,77 @@ impl<'s> Parser<'s> { } } - /// Peek at the next token without consuming it. + /// Eat the current token, but change its type. + pub fn convert(&mut self, kind: NodeKind) { + let idx = self.children.len(); + self.eat(); + if let Some(child) = self.children.get_mut(idx) { + child.set_kind(kind); + } + } + + /// Whether the current token is of the given type. + pub fn at(&self, kind: &NodeKind) -> bool { + self.peek() == Some(kind) + } + + /// Peek at the current token without consuming it. pub fn peek(&self) -> Option<&NodeKind> { - self.peeked.as_ref() + if self.eof { None } else { self.current.as_ref() } } - /// Peek at the next token if it follows immediately after the last one - /// without any whitespace in between. + /// Peek at the current token, if it follows immediately after the last one + /// without any trivia in between. pub fn peek_direct(&self) -> Option<&NodeKind> { - if self.next_start() == self.prev_end() { - self.peeked.as_ref() + if self.prev_end() == self.current_start() { + self.peek() } else { None } } - /// Peek at the source of the next token. + /// Peek at the source of the current token. pub fn peek_src(&self) -> &'s str { - self.get(self.next_start() .. self.next_end()) + self.tokens.scanner().get(self.current_start() .. self.current_end()) } /// The byte index at which the last token ended. /// - /// Refers to the end of the last _non-whitespace_ token in code mode. + /// Refers to the end of the last non-trivia token in code mode. pub fn prev_end(&self) -> usize { self.prev_end } - /// The byte index at which the next token starts. - pub fn next_start(&self) -> usize { - self.next_start + /// The byte index at which the current token starts. + pub fn current_start(&self) -> usize { + self.current_start } - /// The byte index at which the next token will end. - /// - /// Is the same as [`next_start()`][Self::next_start] if `peek()` returns - /// `None`. - pub fn next_end(&self) -> usize { + /// The byte index at which the current token ends. + pub fn current_end(&self) -> usize { self.tokens.index() } /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.tokens.column(index) - } - - /// Slice out part of the source string. - pub fn get(&self, range: Range<usize>) -> &'s str { - self.src.get(range).unwrap() + self.tokens.scanner().column(index) } /// Continue parsing in a group. /// /// When the end delimiter of the group is reached, all subsequent calls to - /// `eat()` and `peek()` return `None`. Parsing can only continue with - /// a matching call to `end_group`. + /// `peek()` return `None`. Parsing can only continue with a matching call + /// to `end_group`. /// - /// This panics if the next token does not start the given group. - pub fn start_group(&mut self, kind: Group, mode: TokenMode) { + /// This panics if the current token does not start the given group. + pub fn start_group(&mut self, kind: Group) { self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() }); - self.tokens.set_mode(mode); - self.repeek(); + self.tokens.set_mode(match kind { + Group::Bracket => TokenMode::Markup, + _ => TokenMode::Code, + }); + self.repeek(); match kind { Group::Paren => self.eat_assert(&NodeKind::LeftParen), Group::Bracket => self.eat_assert(&NodeKind::LeftBracket), @@ -268,12 +220,12 @@ impl<'s> Parser<'s> { /// /// This panics if no group was started. pub fn end_group(&mut self) { - let prev_mode = self.tokens.mode(); + let group_mode = self.tokens.mode(); let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); self.repeek(); - let mut rescan = self.tokens.mode() != prev_mode; + let mut rescan = self.tokens.mode() != group_mode; // Eat the end delimiter if there is one. if let Some((end, required)) = match group.kind { @@ -284,7 +236,7 @@ impl<'s> Parser<'s> { Group::Expr => None, Group::Imports => None, } { - if self.next == Some(end.clone()) { + if self.current.as_ref() == Some(&end) { // Bump the delimeter and return. No need to rescan in this case. self.eat(); rescan = false; @@ -295,10 +247,10 @@ impl<'s> Parser<'s> { // Rescan the peeked token if the mode changed. if rescan { - if prev_mode == TokenMode::Code { + if group_mode == TokenMode::Code { let len = self.children.len(); for n in (0 .. len).rev() { - if !self.skip_type(self.children[n].kind()) { + if !self.is_trivia(self.children[n].kind()) { break; } @@ -307,129 +259,55 @@ impl<'s> Parser<'s> { } self.tokens.jump(self.prev_end()); - self.prev_end = self.tokens.index().into(); - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); + self.prev_end = self.tokens.index(); + self.current_start = self.tokens.index(); + self.current = self.tokens.next(); self.repeek(); } } - /// Add an error that `what` was expected at the given span. - pub fn expected_at(&mut self, what: &str) { - let mut found = self.children.len(); - for (i, node) in self.children.iter().enumerate().rev() { - if !self.skip_type(node.kind()) { - break; - } - found = i; - } - - Marker(found).expected_at(self, what); + /// Low-level bump that consumes exactly one token without special trivia + /// handling. + fn bump(&mut self) { + let kind = self.current.take().unwrap(); + let len = self.tokens.index() - self.current_start; + self.children.push(GreenData::new(kind, len).into()); + self.current_start = self.tokens.index(); + self.current = self.tokens.next(); } - /// Eat the next token and add an error that it is not the expected `thing`. - pub fn expected(&mut self, what: &str) { - match self.peek().cloned() { - Some(found) => { - self.perform( - NodeKind::Error( - ErrorPosition::Full, - format!("expected {}, found {}", what, found).into(), - ), - Self::eat, - ); - } - None => self.expected_at(what), - } + /// Take another look at the current token to recheck whether it ends a + /// group. + fn repeek(&mut self) { + self.eof = match &self.current { + Some(NodeKind::RightParen) => self.inside(Group::Paren), + Some(NodeKind::RightBracket) => self.inside(Group::Bracket), + Some(NodeKind::RightBrace) => self.inside(Group::Brace), + Some(NodeKind::Semicolon) => self.inside(Group::Stmt), + Some(NodeKind::From) => self.inside(Group::Imports), + Some(NodeKind::Space(n)) => *n >= 1 && self.stop_at_newline(), + Some(_) => false, + None => true, + }; } - /// Eat the next token and add an error that it is unexpected. - pub fn unexpected(&mut self) { - match self.peek().cloned() { - Some(found) => { - self.perform( - NodeKind::Error( - ErrorPosition::Full, - format!("unexpected {}", found).into(), - ), - Self::eat, - ); - } - None => self.push_error("unexpected end of file"), - } + /// Returns whether the given type can be skipped over. + fn is_trivia(&self, token: &NodeKind) -> bool { + Self::is_trivia_ext(token, self.stop_at_newline()) } /// Returns whether the given type can be skipped over given the current /// newline mode. - pub fn skip_type_ext(token: &NodeKind, stop_at_newline: bool) -> bool { + fn is_trivia_ext(token: &NodeKind, stop_at_newline: bool) -> bool { match token { - NodeKind::Space(n) => n < &1 || !stop_at_newline, + NodeKind::Space(n) => *n == 0 || !stop_at_newline, NodeKind::LineComment => true, NodeKind::BlockComment => true, _ => false, } } - /// Returns whether the given type can be skipped over. - fn skip_type(&self, token: &NodeKind) -> bool { - Self::skip_type_ext(token, self.stop_at_newline()) - } - - /// Consume the next token. - pub fn eat(&mut self) { - self.children.push( - GreenData::new( - self.next.clone().unwrap(), - self.tokens.index() - self.next_start, - ) - .into(), - ); - - self.prev_end = self.tokens.index().into(); - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); - - if self.tokens.mode() == TokenMode::Code { - // Skip whitespace and comments. - while self.next.as_ref().map_or(false, |x| self.skip_type(x)) { - self.children.push( - GreenData::new( - self.next.clone().unwrap(), - self.tokens.index() - self.next_start, - ) - .into(), - ); - - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); - } - } - - self.repeek(); - } - - /// Take another look at the next token to recheck whether it ends a group. - fn repeek(&mut self) { - self.peeked = self.next.clone(); - let token = match self.next.as_ref() { - Some(token) => token, - None => return, - }; - - if match token { - NodeKind::RightParen => self.inside(Group::Paren), - NodeKind::RightBracket => self.inside(Group::Bracket), - NodeKind::RightBrace => self.inside(Group::Brace), - NodeKind::Semicolon => self.inside(Group::Stmt), - NodeKind::From => self.inside(Group::Imports), - NodeKind::Space(n) => n > &0 && self.stop_at_newline(), - _ => false, - } { - self.peeked = None; - } - } - - /// Whether the active group ends at a newline. + /// Whether the active group must end at a newline. fn stop_at_newline(&self) -> bool { matches!( self.groups.last().map(|group| group.kind), @@ -441,28 +319,76 @@ impl<'s> Parser<'s> { fn inside(&self, kind: Group) -> bool { self.groups.iter().any(|g| g.kind == kind) } +} - /// Returns the last child of the current stack frame. - pub fn last_child(&self) -> Option<&Green> { - self.children.last() +/// Error handling. +impl Parser<'_> { + /// Push an error into the children list. + pub fn push_error(&mut self, msg: impl Into<EcoString>) { + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + self.children.push(GreenData::new(error, 0).into()); } - /// Create a new marker. - pub fn marker(&mut self) -> Marker { - Marker(self.children.len()) + /// Eat the current token and add an error that it is unexpected. + pub fn unexpected(&mut self) { + match self.peek() { + Some(found) => { + let msg = format!("unexpected {}", found); + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + self.perform(error, Self::eat); + } + None => self.push_error("unexpected end of file"), + } + } + + /// Eat the current token and add an error that it is not the expected `thing`. + pub fn expected(&mut self, thing: &str) { + match self.peek() { + Some(found) => { + let msg = format!("expected {}, found {}", thing, found); + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + self.perform(error, Self::eat); + } + None => self.expected_at(thing), + } + } + + /// Add an error that the `thing` was expected at the end of the last + /// non-trivia token. + pub fn expected_at(&mut self, thing: &str) { + let mut found = self.children.len(); + for (i, node) in self.children.iter().enumerate().rev() { + if !self.is_trivia(node.kind()) { + break; + } + found = i; + } + + Marker(found).expected_at(self, thing); } } -/// A marker that indicates where a child may start. +/// A marker that indicates where a node may start. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct Marker(usize); impl Marker { - /// Wraps all children in front of the marker. - pub fn end(&self, p: &mut Parser, kind: NodeKind) { - let stop_nl = p.stop_at_newline(); + /// Perform a subparse that wraps all children after the marker in a node + /// with the given kind. + pub fn perform<T, F>(self, p: &mut Parser, kind: NodeKind, f: F) -> T + where + F: FnOnce(&mut Parser) -> T, + { + let success = f(p); + self.end(p, kind); + success + } + + /// Wrap all children after the marker in a node with the given `kind`. + pub fn end(self, p: &mut Parser, kind: NodeKind) { let end = (self.0 .. p.children.len()) .rev() - .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) + .find(|&i| !p.is_trivia(p.children[i].kind())) .unwrap_or(self.0) + 1; @@ -472,47 +398,61 @@ impl Marker { } /// Wrap all children that do not fulfill the predicate in error nodes. - pub fn filter_children<F>(&self, p: &mut Parser, f: F) + pub fn filter_children<F>(self, p: &mut Parser, f: F) where F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, { for child in &mut p.children[self.0 ..] { - if !((p.tokens.mode() != TokenMode::Code - || Parser::skip_type_ext(child.kind(), false)) - || child.kind().is_error()) + if (p.tokens.mode() == TokenMode::Markup + || !Parser::is_trivia_ext(child.kind(), false)) + && !child.kind().is_error() { if let Err((pos, msg)) = f(child) { - let inner = std::mem::take(child); - *child = - GreenNode::with_child(NodeKind::Error(pos, msg), inner).into(); + let error = NodeKind::Error(pos, msg); + let inner = mem::take(child); + *child = GreenNode::with_child(error, inner).into(); } } } } /// Insert an error message that `what` was expected at the marker position. - pub fn expected_at(&self, p: &mut Parser, what: &str) { - p.children.insert( - self.0, - GreenData::new( - NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), - 0, - ) - .into(), - ); - } - - /// Return a reference to the child after the marker. - pub fn child_at<'a>(&self, p: &'a Parser) -> Option<&'a Green> { - p.children.get(self.0) + pub fn expected_at(self, p: &mut Parser, what: &str) { + let msg = format!("expected {}", what); + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + p.children.insert(self.0, GreenData::new(error, 0).into()); } - pub fn perform<T, F>(&self, p: &mut Parser, kind: NodeKind, f: F) -> T - where - F: FnOnce(&mut Parser) -> T, - { - let success = f(p); - self.end(p, kind); - success + /// Return a reference to the child directly after the marker. + pub fn child_at<'a>(self, p: &'a Parser) -> Option<&'a Green> { + p.children.get(self.0) } } + +/// A logical group of tokens, e.g. `[...]`. +struct GroupEntry { + /// The kind of group this is. This decides which tokens will end the group. + /// For example, a [`Group::Paren`] will be ended by + /// [`Token::RightParen`]. + pub kind: Group, + /// The mode the parser was in _before_ the group started (to which we go + /// back once the group ends). + pub prev_mode: TokenMode, +} + +/// A group, confined by optional start and end delimiters. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Group { + /// A bracketed group: `[...]`. + Bracket, + /// A curly-braced group: `{...}`. + Brace, + /// A parenthesized group: `(...)`. + Paren, + /// A group ended by a semicolon or a line break: `;`, `\n`. + Stmt, + /// A group for a single expression, ended by a line break. + Expr, + /// A group for import items, ended by a semicolon, line break or `from`. + Imports, +} diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index 3fab98a4..b330dbd6 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -172,8 +172,8 @@ mod tests { test("typst\n it!", "typst", "\n it!"); test("typst\n it!", "typst", "\n it!"); test("abc`", "abc", "`"); - test(" hi", "", " hi"); - test("`", "", "`"); + test(" hi", "", " hi"); + test("`", "", "`"); } #[test] diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index aa28e1f5..494a9f0b 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -57,12 +57,6 @@ impl<'s> Tokens<'s> { self.s.jump(index); } - /// The column of a given index in the source string. - #[inline] - pub fn column(&self, index: usize) -> usize { - self.s.column(index) - } - /// The underlying scanner. #[inline] pub fn scanner(&self) -> Scanner<'s> { @@ -314,7 +308,7 @@ impl<'s> Tokens<'s> { } fn raw(&mut self) -> NodeKind { - let column = self.column(self.s.index() - 1); + let column = self.s.column(self.s.index() - 1); let mut backticks = 1; while self.s.eat_if('`') && backticks < u8::MAX { @@ -342,10 +336,8 @@ impl<'s> Tokens<'s> { } } - let terminated = found == backticks; - let end = self.s.index() - if terminated { found as usize } else { 0 }; - - if terminated { + if found == backticks { + let end = self.s.index() - found as usize; NodeKind::Raw(Rc::new(resolve_raw( column, backticks, diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 363cbe6e..022b51de 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -42,11 +42,10 @@ impl Green { /// Set the type of the node. pub fn set_kind(&mut self, kind: NodeKind) { - let data = match self { - Self::Node(node) => &mut Rc::make_mut(node).data, - Self::Token(data) => data, - }; - data.set_kind(kind); + match self { + Self::Node(node) => Rc::make_mut(node).data.set_kind(kind), + Self::Token(data) => data.set_kind(kind), + } } /// The length of the node. @@ -56,7 +55,10 @@ impl Green { /// Whether the node or its children contain an error. pub fn erroneous(&self) -> bool { - self.data().erroneous() + match self { + Self::Node(node) => node.erroneous, + Self::Token(data) => data.kind.is_error(), + } } /// The node's children. @@ -94,24 +96,30 @@ pub struct GreenNode { data: GreenData, /// This node's children, losslessly make up this node. children: Vec<Green>, + /// Whether this node or any of its children are erroneous. + erroneous: bool, } impl GreenNode { + /// Creates a new node with the given kind and a single child. + pub fn with_child(kind: NodeKind, child: impl Into<Green>) -> Self { + Self::with_children(kind, vec![child.into()]) + } + /// Creates a new node with the given kind and children. pub fn with_children(kind: NodeKind, children: Vec<Green>) -> Self { - let mut data = GreenData::new(kind, 0); + let mut erroneous = kind.is_error(); let len = children .iter() - .inspect(|c| data.erroneous |= c.erroneous()) + .inspect(|c| erroneous |= c.erroneous()) .map(Green::len) .sum(); - data.len = len; - Self { data, children } - } - /// Creates a new node with the given kind and a single child. - pub fn with_child(kind: NodeKind, child: impl Into<Green>) -> Self { - Self::with_children(kind, vec![child.into()]) + Self { + data: GreenData::new(kind, len), + children, + erroneous, + } } /// The node's children. @@ -140,14 +148,12 @@ pub struct GreenData { kind: NodeKind, /// The byte length of the node in the source. len: usize, - /// Whether this node or any of its children contain an error. - erroneous: bool, } impl GreenData { /// Create new node metadata. pub fn new(kind: NodeKind, len: usize) -> Self { - Self { len, erroneous: kind.is_error(), kind } + Self { len, kind } } /// The type of the node. @@ -164,11 +170,6 @@ impl GreenData { pub fn len(&self) -> usize { self.len } - - /// Whether the node or its children contain an error. - pub fn erroneous(&self) -> bool { - self.erroneous - } } impl From<GreenData> for Green { @@ -219,7 +220,7 @@ impl<'a> RedRef<'a> { /// The error messages for this node and its descendants. pub fn errors(self) -> Vec<Error> { - if !self.green.erroneous() { + if !self.erroneous() { return vec![]; } @@ -235,7 +236,7 @@ impl<'a> RedRef<'a> { } _ => self .children() - .filter(|red| red.green.erroneous()) + .filter(|red| red.erroneous()) .flat_map(|red| red.errors()) .collect(), } @@ -256,11 +257,11 @@ impl<'a> RedRef<'a> { Green::Token(_) => &[], }; - let mut offset = self.offset; + let mut cursor = self.offset; children.iter().map(move |green| { - let child_offset = offset; - offset += green.len(); - RedRef { id: self.id, offset: child_offset, green } + let offset = cursor; + cursor += green.len(); + RedRef { id: self.id, offset, green } }) } @@ -623,29 +624,17 @@ pub enum ErrorPosition { impl NodeKind { /// Whether this is some kind of parenthesis. pub fn is_paren(&self) -> bool { - match self { - Self::LeftParen => true, - Self::RightParen => true, - _ => false, - } + matches!(self, Self::LeftParen | Self::RightParen) } /// Whether this is some kind of bracket. pub fn is_bracket(&self) -> bool { - match self { - Self::LeftBracket => true, - Self::RightBracket => true, - _ => false, - } + matches!(self, Self::LeftBracket | Self::RightBracket) } /// Whether this is some kind of brace. pub fn is_brace(&self) -> bool { - match self { - Self::LeftBrace => true, - Self::RightBrace => true, - _ => false, - } + matches!(self, Self::LeftBrace | Self::RightBrace) } /// Whether this is some kind of error. |
