From 4875633acf4701705b9b3b014eb7d94268b897c2 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sat, 23 Oct 2021 19:03:27 +0200 Subject: Change parser --- src/parse/parser.rs | 415 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 302 insertions(+), 113 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 347d6f71..f62e882a 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,29 +1,34 @@ use std::ops::Range; +use std::rc::Rc; use super::{TokenMode, Tokens}; -use crate::diag::Error; use crate::source::{SourceFile, SourceId}; -use crate::syntax::{IntoSpan, Pos, Span, Token}; +use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; +use crate::util::EcoString; /// A convenient token-based parser. pub struct Parser<'s> { /// The parsed file. source: &'s SourceFile, - /// Parsing errors. - errors: Vec, /// An iterator over the source tokens. tokens: Tokens<'s>, /// The stack of open groups. groups: Vec, /// The next token. - next: Option>, + next: Option, /// The peeked token. /// (Same as `next` except if we are at the end of group, then `None`). - peeked: Option>, + peeked: Option, /// The end index of the last (non-whitespace if in code mode) token. prev_end: usize, /// The start index of the peeked token. next_start: usize, + /// A stack of outer children vectors. + stack: Vec>, + /// The children of the currently built node. + children: Vec, + /// Whether the last parsing step was successful. + success: bool, } /// A logical group of tokens, e.g. `[...]`. @@ -32,9 +37,6 @@ struct GroupEntry { /// For example, a [`Group::Paren`] will be ended by /// [`Token::RightParen`]. pub kind: Group, - /// The start index of the group. Used by `Parser::end_group` to return the - /// group's full span. - pub start: usize, /// The mode the parser was in _before_ the group started (to which we go /// back once the group ends). pub prev_mode: TokenMode, @@ -60,51 +62,204 @@ pub enum Group { impl<'s> Parser<'s> { /// Create a new parser for the source string. pub fn new(source: &'s SourceFile) -> Self { - let mut tokens = Tokens::new(source.src(), TokenMode::Markup); + let mut tokens = Tokens::new(source, TokenMode::Markup); let next = tokens.next(); Self { source, - errors: vec![], tokens, groups: vec![], - next, + next: next.clone(), peeked: next, prev_end: 0, next_start: 0, + stack: vec![], + children: vec![], + success: true, } } - /// Finish parsing and return all errors. - pub fn finish(self) -> Vec { - self.errors - } - /// The id of the parsed source file. pub fn id(&self) -> SourceId { self.source.id() } + /// Start a nested node. + /// + /// Each start call has to be matched with a call to `end`, + /// `end_with_custom_children`, `lift`, `abort`, or `end_or_abort`. + pub fn start(&mut self) { + self.stack.push(std::mem::take(&mut self.children)); + } + + /// Start a nested node, preserving a number of the current children. + pub fn start_with(&mut self, preserve: usize) { + let preserved = self.children.drain(self.children.len() - preserve ..).collect(); + self.stack.push(std::mem::replace(&mut self.children, preserved)); + } + + /// Filter the last children using the given predicate. + pub fn filter_children(&mut self, count: usize, f: F, error: G) + where + F: Fn(&Green) -> bool, + G: Fn(&NodeKind) -> (ErrorPosition, EcoString), + { + for child in &mut self.children[count ..] { + if !((self.tokens.mode() != TokenMode::Code + || Self::skip_type_ext(child.kind(), false)) + || child.kind().is_error() + || f(&child)) + { + let (pos, msg) = error(child.kind()); + let inner = std::mem::take(child); + *child = + GreenNode::with_child(NodeKind::Error(pos, msg), inner.len(), inner) + .into(); + } + } + } + + pub fn child(&self, child: usize) -> Option<&Green> { + self.node_index_from_back(child).map(|i| &self.children[i]) + } + + fn node_index_from_back(&self, child: usize) -> Option { + let len = self.children.len(); + let code = self.tokens.mode() == TokenMode::Code; + let mut seen = 0; + for x in (0 .. len).rev() { + if self.skip_type(self.children[x].kind()) && code { + continue; + } + if seen == child { + return Some(x); + } + seen += 1; + } + + None + } + + /// End the current node as a node of given `kind`. + pub fn end(&mut self, kind: NodeKind) { + let outer = self.stack.pop().unwrap(); + let mut children = std::mem::replace(&mut self.children, outer); + + // have trailing whitespace continue to sit in self.children in code + // mode. + let mut remains = vec![]; + if self.tokens.mode() == TokenMode::Code { + let len = children.len(); + for n in (0 .. len).rev() { + if !self.skip_type(&children[n].kind()) { + break; + } + + remains.push(children.pop().unwrap()); + } + remains.reverse(); + } + + let len = children.iter().map(|c| c.len()).sum(); + self.children + .push(GreenNode::with_children(kind, len, children.into_iter()).into()); + self.children.extend(remains); + self.success = true; + } + + /// End the current node as a node of given `kind`, and start a new node + /// with the ended node as a first child. The function returns how many + /// children the stack frame had before and how many were appended (accounts + /// for trivia). + pub fn end_and_start_with(&mut self, kind: NodeKind) -> (usize, usize) { + let stack_offset = self.stack.last().unwrap().len(); + self.end(kind); + let diff = self.children.len() - stack_offset; + self.start_with(diff); + (stack_offset, diff) + } + + pub fn wrap(&mut self, index: usize, kind: NodeKind) { + let index = self.node_index_from_back(index).unwrap(); + let child = std::mem::take(&mut self.children[index]); + let item = GreenNode::with_child(kind, child.len(), child); + self.children[index] = item.into(); + } + + pub fn convert(&mut self, kind: NodeKind) { + self.start(); + self.eat(); + self.end(kind); + } + + /// End the current node and undo its existence, inling all accumulated + /// children into its parent. + pub fn lift(&mut self) { + let outer = self.stack.pop().unwrap(); + let children = std::mem::replace(&mut self.children, outer); + self.children.extend(children); + self.success = true; + } + + /// End the current node and undo its existence, deleting all accumulated + /// children. + pub fn abort(&mut self, msg: impl Into) { + self.end(NodeKind::Error(ErrorPosition::Full, msg.into().into())); + self.success = false; + } + + pub fn may_lift_abort(&mut self) -> bool { + if !self.success { + self.lift(); + self.success = false; + true + } else { + false + } + } + + pub fn may_end_abort(&mut self, kind: NodeKind) -> bool { + if !self.success { + self.end(kind); + self.success = false; + true + } else { + false + } + } + + /// End the current node as a node of given `kind` if the last parse was + /// successful, otherwise, abort. + pub fn end_or_abort(&mut self, kind: NodeKind) -> bool { + if self.success { + self.end(kind); + true + } else { + self.may_end_abort(kind); + false + } + } + + pub fn finish(&mut self) -> Rc { + if let Green::Node(n) = self.children.pop().unwrap() { + n + } else { + panic!() + } + } + /// Whether the end of the source string or group is reached. pub fn eof(&self) -> bool { self.peek().is_none() } - /// Consume the next token. - pub fn eat(&mut self) -> Option> { + pub fn eat(&mut self) -> Option { let token = self.peek()?; self.bump(); Some(token) } - /// Eat the next token and return its source range. - pub fn eat_span(&mut self) -> Span { - let start = self.next_start(); - self.eat(); - Span::new(self.id(), start, self.prev_end()) - } - /// Consume the next token if it is the given one. - pub fn eat_if(&mut self, t: Token) -> bool { + pub fn eat_if(&mut self, t: NodeKind) -> bool { if self.peek() == Some(t) { self.bump(); true @@ -116,7 +271,7 @@ impl<'s> Parser<'s> { /// Consume the next token if the closure maps it a to `Some`-variant. pub fn eat_map(&mut self, f: F) -> Option where - F: FnOnce(Token<'s>) -> Option, + F: FnOnce(NodeKind) -> Option, { let token = self.peek()?; let mapped = f(token); @@ -128,16 +283,16 @@ impl<'s> Parser<'s> { /// Consume the next token if it is the given one and produce an error if /// not. - pub fn eat_expect(&mut self, t: Token) -> bool { - let eaten = self.eat_if(t); + pub fn eat_expect(&mut self, t: NodeKind) -> bool { + let eaten = self.eat_if(t.clone()); if !eaten { - self.expected_at(self.prev_end(), t.name()); + self.expected_at(&t.to_string()); } eaten } /// Consume the next token, debug-asserting that it is one of the given ones. - pub fn eat_assert(&mut self, t: Token) { + pub fn eat_assert(&mut self, t: NodeKind) { let next = self.eat(); debug_assert_eq!(next, Some(t)); } @@ -145,7 +300,7 @@ impl<'s> Parser<'s> { /// Consume tokens while the condition is true. pub fn eat_while(&mut self, mut f: F) where - F: FnMut(Token<'s>) -> bool, + F: FnMut(NodeKind) -> bool, { while self.peek().map_or(false, |t| f(t)) { self.eat(); @@ -153,42 +308,25 @@ impl<'s> Parser<'s> { } /// Peek at the next token without consuming it. - pub fn peek(&self) -> Option> { - self.peeked + pub fn peek(&self) -> Option { + self.peeked.clone() } /// Peek at the next token if it follows immediately after the last one /// without any whitespace in between. - pub fn peek_direct(&self) -> Option> { + pub fn peek_direct(&self) -> Option<&NodeKind> { if self.next_start() == self.prev_end() { - self.peeked + self.peeked.as_ref() } else { None } } - /// Peek at the span of the next token. - /// - /// Has length zero if `peek()` returns `None`. - pub fn peek_span(&self) -> Span { - Span::new(self.id(), self.next_start(), self.next_end()) - } - /// Peek at the source of the next token. pub fn peek_src(&self) -> &'s str { self.get(self.next_start() .. self.next_end()) } - /// Checks whether the next token fulfills a condition. - /// - /// Returns `false` if there is no next token. - pub fn check(&self, f: F) -> bool - where - F: FnOnce(Token<'s>) -> bool, - { - self.peek().map_or(false, f) - } - /// The byte index at which the last token ended. /// /// Refers to the end of the last _non-whitespace_ token in code mode. @@ -219,11 +357,6 @@ impl<'s> Parser<'s> { self.source.get(range).unwrap() } - /// The span from `start` to [`self.prev_end()`](Self::prev_end). - pub fn span_from(&self, start: impl Into) -> Span { - Span::new(self.id(), start, self.prev_end()) - } - /// Continue parsing in a group. /// /// When the end delimiter of the group is reached, all subsequent calls to @@ -232,19 +365,15 @@ impl<'s> Parser<'s> { /// /// This panics if the next token does not start the given group. pub fn start_group(&mut self, kind: Group, mode: TokenMode) { - self.groups.push(GroupEntry { - kind, - start: self.next_start(), - prev_mode: self.tokens.mode(), - }); + self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() }); self.tokens.set_mode(mode); self.repeek(); match kind { - Group::Paren => self.eat_assert(Token::LeftParen), - Group::Bracket => self.eat_assert(Token::LeftBracket), - Group::Brace => self.eat_assert(Token::LeftBrace), + Group::Paren => self.eat_assert(NodeKind::LeftParen), + Group::Bracket => self.eat_assert(NodeKind::LeftBracket), + Group::Brace => self.eat_assert(NodeKind::LeftBrace), Group::Stmt => {} Group::Expr => {} Group::Imports => {} @@ -254,7 +383,7 @@ impl<'s> Parser<'s> { /// End the parsing of a group. /// /// This panics if no group was started. - pub fn end_group(&mut self) -> Span { + pub fn end_group(&mut self) { let prev_mode = self.tokens.mode(); let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); @@ -264,83 +393,125 @@ impl<'s> Parser<'s> { // Eat the end delimiter if there is one. if let Some((end, required)) = match group.kind { - Group::Paren => Some((Token::RightParen, true)), - Group::Bracket => Some((Token::RightBracket, true)), - Group::Brace => Some((Token::RightBrace, true)), - Group::Stmt => Some((Token::Semicolon, false)), + Group::Paren => Some((NodeKind::RightParen, true)), + Group::Bracket => Some((NodeKind::RightBracket, true)), + Group::Brace => Some((NodeKind::RightBrace, true)), + Group::Stmt => Some((NodeKind::Semicolon, false)), Group::Expr => None, Group::Imports => None, } { - if self.next == Some(end) { + if self.next == Some(end.clone()) { // Bump the delimeter and return. No need to rescan in this case. self.bump(); rescan = false; } else if required { - self.error( - self.next_start() .. self.next_start(), - format!("expected {}", end.name()), - ); + self.start(); + self.abort(format!("expected {}", end.to_string())); } } // Rescan the peeked token if the mode changed. if rescan { self.tokens.jump(self.prev_end()); - self.bump(); - } - Span::new(self.id(), group.start, self.prev_end()) - } + if prev_mode == TokenMode::Code { + let len = self.children.len(); + for n in (0 .. len).rev() { + if !self.skip_type(self.children[n].kind()) { + break; + } + + self.children.pop(); + } + } - /// Add an error with location and message. - pub fn error(&mut self, span: impl IntoSpan, message: impl Into) { - self.errors.push(Error::new(span.into_span(self.id()), message)); + self.fast_forward(); + } } /// Add an error that `what` was expected at the given span. - pub fn expected_at(&mut self, span: impl IntoSpan, what: &str) { - self.error(span, format!("expected {}", what)); + pub fn expected_at(&mut self, what: &str) { + let mut found = self.children.len(); + for (i, node) in self.children.iter().enumerate().rev() { + if !self.skip_type(node.kind()) { + break; + } + found = i; + } + + self.expected_at_child(found, what); + } + + /// Add an error that `what` was expected at the given child index. + pub fn expected_at_child(&mut self, index: usize, what: &str) { + self.children.insert( + index, + GreenData::new( + NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), + 0, + ) + .into(), + ); } /// Eat the next token and add an error that it is not the expected `thing`. pub fn expected(&mut self, what: &str) { - let before = self.next_start(); + self.start(); if let Some(found) = self.eat() { - let after = self.prev_end(); - self.error( - before .. after, - format!("expected {}, found {}", what, found.name()), - ); + self.abort(format!("expected {}, found {}", what, found.to_string())) } else { - self.expected_at(self.next_start(), what); + self.lift(); + self.expected_at(what); } } /// Eat the next token and add an error that it is unexpected. pub fn unexpected(&mut self) { - let before = self.next_start(); + self.start(); if let Some(found) = self.eat() { - let after = self.prev_end(); - self.error(before .. after, format!("unexpected {}", found.name())); + self.abort(format!("unexpected {}", found.to_string())) + } else { + self.abort("unexpected end of file") } } + pub fn skip_type_ext(token: &NodeKind, stop_at_newline: bool) -> bool { + match token { + NodeKind::Space(n) => n < &1 || !stop_at_newline, + NodeKind::LineComment => true, + NodeKind::BlockComment => true, + _ => false, + } + } + + fn skip_type(&self, token: &NodeKind) -> bool { + Self::skip_type_ext(token, self.stop_at_newline()) + } + /// Move to the next token. fn bump(&mut self) { - self.prev_end = self.tokens.index().into(); + self.children.push( + GreenData::new( + self.next.clone().unwrap(), + self.tokens.index() - self.next_start, + ) + .into(), + ); + + self.fast_forward(); + } + + pub fn fast_forward(&mut self) { + if !self.next.as_ref().map_or(false, |x| self.skip_type(x)) { + self.prev_end = self.tokens.index().into(); + } self.next_start = self.tokens.index().into(); self.next = self.tokens.next(); if self.tokens.mode() == TokenMode::Code { // Skip whitespace and comments. - while match self.next { - Some(Token::Space(n)) => n < 1 || !self.stop_at_newline(), - Some(Token::LineComment(_)) => true, - Some(Token::BlockComment(_)) => true, - _ => false, - } { - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); + while self.next.as_ref().map_or(false, |x| self.skip_type(x)) { + self.bump(); } } @@ -349,19 +520,19 @@ impl<'s> Parser<'s> { /// Take another look at the next token to recheck whether it ends a group. fn repeek(&mut self) { - self.peeked = self.next; - let token = match self.next { + self.peeked = self.next.clone(); + let token = match self.next.as_ref() { Some(token) => token, None => return, }; if match token { - Token::RightParen => self.inside(Group::Paren), - Token::RightBracket => self.inside(Group::Bracket), - Token::RightBrace => self.inside(Group::Brace), - Token::Semicolon => self.inside(Group::Stmt), - Token::From => self.inside(Group::Imports), - Token::Space(n) => n >= 1 && self.stop_at_newline(), + NodeKind::RightParen => self.inside(Group::Paren), + NodeKind::RightBracket => self.inside(Group::Bracket), + NodeKind::RightBrace => self.inside(Group::Brace), + NodeKind::Semicolon => self.inside(Group::Stmt), + NodeKind::From => self.inside(Group::Imports), + NodeKind::Space(n) => n > &0 && self.stop_at_newline(), _ => false, } { self.peeked = None; @@ -380,4 +551,22 @@ impl<'s> Parser<'s> { fn inside(&self, kind: Group) -> bool { self.groups.iter().any(|g| g.kind == kind) } + + pub fn last_child(&self) -> Option<&Green> { + self.children.last() + } + + pub fn success(&mut self) -> bool { + let s = self.success; + self.success = true; + s + } + + pub fn unsuccessful(&mut self) { + self.success = false; + } + + pub fn child_count(&self) -> usize { + self.children.len() + } } -- cgit v1.2.3 From 84d35efee38d137a77e368c50421ac24327371c6 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 31 Oct 2021 11:46:12 +0100 Subject: Less owning, more iterating --- src/parse/parser.rs | 73 ++++++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 37 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index f62e882a..e6fcc1ae 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -161,7 +161,7 @@ impl<'s> Parser<'s> { let len = children.iter().map(|c| c.len()).sum(); self.children - .push(GreenNode::with_children(kind, len, children.into_iter()).into()); + .push(GreenNode::with_children(kind, len, children).into()); self.children.extend(remains); self.success = true; } @@ -240,10 +240,9 @@ impl<'s> Parser<'s> { } pub fn finish(&mut self) -> Rc { - if let Green::Node(n) = self.children.pop().unwrap() { - n - } else { - panic!() + match self.children.pop().unwrap() { + Green::Node(n) => n, + _ => panic!(), } } @@ -252,16 +251,16 @@ impl<'s> Parser<'s> { self.peek().is_none() } - pub fn eat(&mut self) -> Option { - let token = self.peek()?; - self.bump(); + fn eat_peeked(&mut self) -> Option { + let token = self.peek()?.clone(); + self.eat(); Some(token) } /// Consume the next token if it is the given one. - pub fn eat_if(&mut self, t: NodeKind) -> bool { + pub fn eat_if(&mut self, t: &NodeKind) -> bool { if self.peek() == Some(t) { - self.bump(); + self.eat(); true } else { false @@ -271,36 +270,36 @@ impl<'s> Parser<'s> { /// Consume the next token if the closure maps it a to `Some`-variant. pub fn eat_map(&mut self, f: F) -> Option where - F: FnOnce(NodeKind) -> Option, + F: FnOnce(&NodeKind) -> Option, { let token = self.peek()?; let mapped = f(token); if mapped.is_some() { - self.bump(); + self.eat(); } mapped } /// Consume the next token if it is the given one and produce an error if /// not. - pub fn eat_expect(&mut self, t: NodeKind) -> bool { - let eaten = self.eat_if(t.clone()); + pub fn eat_expect(&mut self, t: &NodeKind) -> bool { + let eaten = self.eat_if(t); if !eaten { - self.expected_at(&t.to_string()); + self.expected_at(t.as_str()); } eaten } /// Consume the next token, debug-asserting that it is one of the given ones. - pub fn eat_assert(&mut self, t: NodeKind) { - let next = self.eat(); - debug_assert_eq!(next, Some(t)); + pub fn eat_assert(&mut self, t: &NodeKind) { + let next = self.eat_peeked(); + debug_assert_eq!(next.as_ref(), Some(t)); } /// Consume tokens while the condition is true. pub fn eat_while(&mut self, mut f: F) where - F: FnMut(NodeKind) -> bool, + F: FnMut(&NodeKind) -> bool, { while self.peek().map_or(false, |t| f(t)) { self.eat(); @@ -308,8 +307,8 @@ impl<'s> Parser<'s> { } /// Peek at the next token without consuming it. - pub fn peek(&self) -> Option { - self.peeked.clone() + pub fn peek(&self) -> Option<&NodeKind> { + self.peeked.as_ref() } /// Peek at the next token if it follows immediately after the last one @@ -371,9 +370,9 @@ impl<'s> Parser<'s> { self.repeek(); match kind { - Group::Paren => self.eat_assert(NodeKind::LeftParen), - Group::Bracket => self.eat_assert(NodeKind::LeftBracket), - Group::Brace => self.eat_assert(NodeKind::LeftBrace), + Group::Paren => self.eat_assert(&NodeKind::LeftParen), + Group::Bracket => self.eat_assert(&NodeKind::LeftBracket), + Group::Brace => self.eat_assert(&NodeKind::LeftBrace), Group::Stmt => {} Group::Expr => {} Group::Imports => {} @@ -402,11 +401,11 @@ impl<'s> Parser<'s> { } { if self.next == Some(end.clone()) { // Bump the delimeter and return. No need to rescan in this case. - self.bump(); + self.eat(); rescan = false; } else if required { self.start(); - self.abort(format!("expected {}", end.to_string())); + self.abort(format!("expected {}", end)); } } @@ -457,21 +456,21 @@ impl<'s> Parser<'s> { /// Eat the next token and add an error that it is not the expected `thing`. pub fn expected(&mut self, what: &str) { self.start(); - if let Some(found) = self.eat() { - self.abort(format!("expected {}, found {}", what, found.to_string())) - } else { - self.lift(); - self.expected_at(what); + match self.eat_peeked() { + Some(found) => self.abort(format!("expected {}, found {}", what, found)), + None => { + self.lift(); + self.expected_at(what); + } } } /// Eat the next token and add an error that it is unexpected. pub fn unexpected(&mut self) { self.start(); - if let Some(found) = self.eat() { - self.abort(format!("unexpected {}", found.to_string())) - } else { - self.abort("unexpected end of file") + match self.eat_peeked() { + Some(found) => self.abort(format!("unexpected {}", found)), + None => self.abort("unexpected end of file"), } } @@ -489,7 +488,7 @@ impl<'s> Parser<'s> { } /// Move to the next token. - fn bump(&mut self) { + pub fn eat(&mut self) { self.children.push( GreenData::new( self.next.clone().unwrap(), @@ -511,7 +510,7 @@ impl<'s> Parser<'s> { if self.tokens.mode() == TokenMode::Code { // Skip whitespace and comments. while self.next.as_ref().map_or(false, |x| self.skip_type(x)) { - self.bump(); + self.eat(); } } -- cgit v1.2.3 From 2e7d359e59a45849f53eea6e022ca83295f5a6e7 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 31 Oct 2021 18:52:48 +0100 Subject: Unicode escape error moved to tokenizer --- src/parse/parser.rs | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index e6fcc1ae..240de43d 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -186,9 +186,27 @@ impl<'s> Parser<'s> { } pub fn convert(&mut self, kind: NodeKind) { - self.start(); - self.eat(); - self.end(kind); + let len = self.tokens.index() - self.next_start; + + self.children.push( + GreenNode::with_child( + kind, + len, + GreenData::new(self.next.clone().unwrap(), len), + ) + .into(), + ); + self.fast_forward(); + self.success = true; + } + + pub fn convert_with(&mut self, preserve: usize, kind: NodeKind) { + let preserved: Vec<_> = + self.children.drain(self.children.len() - preserve ..).collect(); + let len = preserved.iter().map(|c| c.len()).sum(); + self.children + .push(GreenNode::with_children(kind, len, preserved).into()); + self.success = true; } /// End the current node and undo its existence, inling all accumulated -- cgit v1.2.3 From 49fb3cd4e2a5d6997ad4046d3514f154d8c866dd Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 1 Nov 2021 13:03:18 +0100 Subject: Code Review: Life is Like a Box of Iterators --- src/parse/parser.rs | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 240de43d..374e7c09 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,15 +1,14 @@ use std::ops::Range; use std::rc::Rc; -use super::{TokenMode, Tokens}; -use crate::source::{SourceFile, SourceId}; +use super::{is_newline, TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; /// A convenient token-based parser. pub struct Parser<'s> { /// The parsed file. - source: &'s SourceFile, + src: &'s str, /// An iterator over the source tokens. tokens: Tokens<'s>, /// The stack of open groups. @@ -61,11 +60,11 @@ pub enum Group { impl<'s> Parser<'s> { /// Create a new parser for the source string. - pub fn new(source: &'s SourceFile) -> Self { - let mut tokens = Tokens::new(source, TokenMode::Markup); + pub fn new(src: &'s str) -> Self { + let mut tokens = Tokens::new(src, TokenMode::Markup); let next = tokens.next(); Self { - source, + src, tokens, groups: vec![], next: next.clone(), @@ -78,11 +77,6 @@ impl<'s> Parser<'s> { } } - /// The id of the parsed source file. - pub fn id(&self) -> SourceId { - self.source.id() - } - /// Start a nested node. /// /// Each start call has to be matched with a call to `end`, @@ -366,12 +360,16 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.source.byte_to_column(index).unwrap() + self.src[.. index] + .chars() + .rev() + .take_while(|&c| !is_newline(c)) + .count() } /// Slice out part of the source string. pub fn get(&self, range: Range) -> &'s str { - self.source.get(range).unwrap() + self.src.get(range).unwrap() } /// Continue parsing in a group. -- cgit v1.2.3 From 42afb27cef5540535420fb6d8d9d2fcda7300a47 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 1 Nov 2021 13:45:33 +0100 Subject: Add documentation --- src/parse/parser.rs | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 374e7c09..8c68d630 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -112,10 +112,14 @@ impl<'s> Parser<'s> { } } + /// Return the a child from the current stack frame specified by its + /// non-trivia index from the back. pub fn child(&self, child: usize) -> Option<&Green> { self.node_index_from_back(child).map(|i| &self.children[i]) } + /// Map a non-trivia index from the back of the current stack frame to a + /// normal index. fn node_index_from_back(&self, child: usize) -> Option { let len = self.children.len(); let code = self.tokens.mode() == TokenMode::Code; @@ -172,6 +176,8 @@ impl<'s> Parser<'s> { (stack_offset, diff) } + /// Wrap a specified node in the current stack frame (indexed from the back, + /// not including trivia). pub fn wrap(&mut self, index: usize, kind: NodeKind) { let index = self.node_index_from_back(index).unwrap(); let child = std::mem::take(&mut self.children[index]); @@ -179,6 +185,7 @@ impl<'s> Parser<'s> { self.children[index] = item.into(); } + /// Eat and wrap the next token. pub fn convert(&mut self, kind: NodeKind) { let len = self.tokens.index() - self.next_start; @@ -194,9 +201,11 @@ impl<'s> Parser<'s> { self.success = true; } - pub fn convert_with(&mut self, preserve: usize, kind: NodeKind) { + /// Wrap the last `amount` children in the current stack frame with a new + /// node. + pub fn convert_with(&mut self, amount: usize, kind: NodeKind) { let preserved: Vec<_> = - self.children.drain(self.children.len() - preserve ..).collect(); + self.children.drain(self.children.len() - amount ..).collect(); let len = preserved.iter().map(|c| c.len()).sum(); self.children .push(GreenNode::with_children(kind, len, preserved).into()); @@ -219,6 +228,8 @@ impl<'s> Parser<'s> { self.success = false; } + /// This function [`Self::lift`]s if the last operation was unsuccessful and + /// returns whether it did. pub fn may_lift_abort(&mut self) -> bool { if !self.success { self.lift(); @@ -229,6 +240,8 @@ impl<'s> Parser<'s> { } } + /// This function [`Self::end`]s if the last operation was unsuccessful and + /// returns whether it did. pub fn may_end_abort(&mut self, kind: NodeKind) -> bool { if !self.success { self.end(kind); @@ -251,6 +264,7 @@ impl<'s> Parser<'s> { } } + /// End the parsing process and return the last child. pub fn finish(&mut self) -> Rc { match self.children.pop().unwrap() { Green::Node(n) => n, @@ -263,6 +277,7 @@ impl<'s> Parser<'s> { self.peek().is_none() } + /// Consume the next token and return its kind. fn eat_peeked(&mut self) -> Option { let token = self.peek()?.clone(); self.eat(); @@ -490,6 +505,8 @@ impl<'s> Parser<'s> { } } + /// Returns whether the given type can be skipped over given the current + /// newline mode. pub fn skip_type_ext(token: &NodeKind, stop_at_newline: bool) -> bool { match token { NodeKind::Space(n) => n < &1 || !stop_at_newline, @@ -499,11 +516,12 @@ impl<'s> Parser<'s> { } } + /// Returns whether the given type can be skipped over. fn skip_type(&self, token: &NodeKind) -> bool { Self::skip_type_ext(token, self.stop_at_newline()) } - /// Move to the next token. + /// Consume the next token. pub fn eat(&mut self) { self.children.push( GreenData::new( @@ -516,6 +534,7 @@ impl<'s> Parser<'s> { self.fast_forward(); } + /// Move to the next token. pub fn fast_forward(&mut self) { if !self.next.as_ref().map_or(false, |x| self.skip_type(x)) { self.prev_end = self.tokens.index().into(); @@ -567,20 +586,24 @@ impl<'s> Parser<'s> { self.groups.iter().any(|g| g.kind == kind) } + /// Returns the last child of the current stack frame. pub fn last_child(&self) -> Option<&Green> { self.children.last() } + /// Whether the last operation was successful. pub fn success(&mut self) -> bool { let s = self.success; self.success = true; s } + /// Declare the last operation as unsuccessful. pub fn unsuccessful(&mut self) { self.success = false; } + /// Amount of children in the current stack frame. pub fn child_count(&self) -> usize { self.children.len() } -- cgit v1.2.3 From 65fac0e57c9852eb2131aa06c0bac43b70bfbfbc Mon Sep 17 00:00:00 2001 From: Laurenz Date: Tue, 2 Nov 2021 12:13:45 +0100 Subject: Refactoring Co-Authored-By: Martin --- src/parse/parser.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 8c68d630..5833c724 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,7 +1,7 @@ use std::ops::Range; use std::rc::Rc; -use super::{is_newline, TokenMode, Tokens}; +use super::{TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; @@ -375,11 +375,7 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.src[.. index] - .chars() - .rev() - .take_while(|&c| !is_newline(c)) - .count() + self.tokens.column(index) } /// Slice out part of the source string. -- cgit v1.2.3 From f0c9635db5efd0c66e01bef1be0a8f140fdbdd84 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Thu, 4 Nov 2021 15:16:46 +0100 Subject: Notes --- src/parse/parser.rs | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 5833c724..5ecb6e9d 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -187,17 +187,8 @@ impl<'s> Parser<'s> { /// Eat and wrap the next token. pub fn convert(&mut self, kind: NodeKind) { - let len = self.tokens.index() - self.next_start; - - self.children.push( - GreenNode::with_child( - kind, - len, - GreenData::new(self.next.clone().unwrap(), len), - ) - .into(), - ); - self.fast_forward(); + self.eat(); + self.children.last_mut().unwrap().set_kind(kind); self.success = true; } @@ -278,6 +269,7 @@ impl<'s> Parser<'s> { } /// Consume the next token and return its kind. + // NOTE: This isn't great. fn eat_peeked(&mut self) -> Option { let token = self.peek()?.clone(); self.eat(); @@ -319,6 +311,7 @@ impl<'s> Parser<'s> { /// Consume the next token, debug-asserting that it is one of the given ones. pub fn eat_assert(&mut self, t: &NodeKind) { + // NOTE: assert with peek(), then eat() let next = self.eat_peeked(); debug_assert_eq!(next.as_ref(), Some(t)); } @@ -438,8 +431,6 @@ impl<'s> Parser<'s> { // Rescan the peeked token if the mode changed. if rescan { - self.tokens.jump(self.prev_end()); - if prev_mode == TokenMode::Code { let len = self.children.len(); for n in (0 .. len).rev() { @@ -451,7 +442,11 @@ impl<'s> Parser<'s> { } } - self.fast_forward(); + self.tokens.jump(self.prev_end()); + self.prev_end = self.tokens.index().into(); + self.next_start = self.tokens.index().into(); + self.next = self.tokens.next(); + self.repeek(); } } @@ -527,21 +522,23 @@ impl<'s> Parser<'s> { .into(), ); - self.fast_forward(); - } - - /// Move to the next token. - pub fn fast_forward(&mut self) { - if !self.next.as_ref().map_or(false, |x| self.skip_type(x)) { - self.prev_end = self.tokens.index().into(); - } + self.prev_end = self.tokens.index().into(); self.next_start = self.tokens.index().into(); self.next = self.tokens.next(); if self.tokens.mode() == TokenMode::Code { // Skip whitespace and comments. while self.next.as_ref().map_or(false, |x| self.skip_type(x)) { - self.eat(); + self.children.push( + GreenData::new( + self.next.clone().unwrap(), + self.tokens.index() - self.next_start, + ) + .into(), + ); + + self.next_start = self.tokens.index().into(); + self.next = self.tokens.next(); } } -- cgit v1.2.3 From 5c952d56d0d602a1dbcf85210ae30fa402219fca Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Thu, 4 Nov 2021 19:36:32 +0100 Subject: New error handling --- src/parse/parser.rs | 228 +++++++++++++++++++++------------------------------- 1 file changed, 91 insertions(+), 137 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 5ecb6e9d..bc028876 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,7 +1,7 @@ use std::ops::Range; use std::rc::Rc; -use super::{TokenMode, Tokens}; +use super::{ParseResult, TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; @@ -26,8 +26,6 @@ pub struct Parser<'s> { stack: Vec>, /// The children of the currently built node. children: Vec, - /// Whether the last parsing step was successful. - success: bool, } /// A logical group of tokens, e.g. `[...]`. @@ -58,6 +56,49 @@ pub enum Group { Imports, } +/// A marker that indicates where a child may start. +pub struct Marker(usize); + +impl Marker { + /// Wraps all children in front of the marker. + pub fn end(&self, p: &mut Parser, kind: NodeKind) { + if p.children.len() != self.0 { + let stop_nl = p.stop_at_newline(); + let end = (self.0 .. p.children.len()) + .rev() + .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) + .unwrap_or(self.0) + + 1; + + let children: Vec<_> = p.children.drain(self.0 .. end).collect(); + let len = children.iter().map(Green::len).sum(); + p.children + .insert(self.0, GreenNode::with_children(kind, len, children).into()); + } + } + + /// Wrap all children that do not fulfill the predicate in error nodes. + pub fn filter_children(&self, p: &mut Parser, f: F, error: G) + where + F: Fn(&Green) -> bool, + G: Fn(&NodeKind) -> (ErrorPosition, EcoString), + { + p.filter_children(self, f, error) + } + + /// Insert an error message that `what` was expected at the marker position. + pub fn expected_at(&self, p: &mut Parser, what: &str) { + p.children.insert( + self.0, + GreenData::new( + NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), + 0, + ) + .into(), + ); + } +} + impl<'s> Parser<'s> { /// Create a new parser for the source string. pub fn new(src: &'s str) -> Self { @@ -73,7 +114,6 @@ impl<'s> Parser<'s> { next_start: 0, stack: vec![], children: vec![], - success: true, } } @@ -85,19 +125,13 @@ impl<'s> Parser<'s> { self.stack.push(std::mem::take(&mut self.children)); } - /// Start a nested node, preserving a number of the current children. - pub fn start_with(&mut self, preserve: usize) { - let preserved = self.children.drain(self.children.len() - preserve ..).collect(); - self.stack.push(std::mem::replace(&mut self.children, preserved)); - } - /// Filter the last children using the given predicate. - pub fn filter_children(&mut self, count: usize, f: F, error: G) + fn filter_children(&mut self, count: &Marker, f: F, error: G) where F: Fn(&Green) -> bool, G: Fn(&NodeKind) -> (ErrorPosition, EcoString), { - for child in &mut self.children[count ..] { + for child in &mut self.children[count.0 ..] { if !((self.tokens.mode() != TokenMode::Code || Self::skip_type_ext(child.kind(), false)) || child.kind().is_error() @@ -161,46 +195,22 @@ impl<'s> Parser<'s> { self.children .push(GreenNode::with_children(kind, len, children).into()); self.children.extend(remains); - self.success = true; } - /// End the current node as a node of given `kind`, and start a new node - /// with the ended node as a first child. The function returns how many - /// children the stack frame had before and how many were appended (accounts - /// for trivia). - pub fn end_and_start_with(&mut self, kind: NodeKind) -> (usize, usize) { - let stack_offset = self.stack.last().unwrap().len(); + pub fn perform(&mut self, kind: NodeKind, f: F) -> ParseResult + where + F: FnOnce(&mut Self) -> ParseResult, + { + self.start(); + let success = f(self); self.end(kind); - let diff = self.children.len() - stack_offset; - self.start_with(diff); - (stack_offset, diff) - } - - /// Wrap a specified node in the current stack frame (indexed from the back, - /// not including trivia). - pub fn wrap(&mut self, index: usize, kind: NodeKind) { - let index = self.node_index_from_back(index).unwrap(); - let child = std::mem::take(&mut self.children[index]); - let item = GreenNode::with_child(kind, child.len(), child); - self.children[index] = item.into(); + success } /// Eat and wrap the next token. pub fn convert(&mut self, kind: NodeKind) { self.eat(); self.children.last_mut().unwrap().set_kind(kind); - self.success = true; - } - - /// Wrap the last `amount` children in the current stack frame with a new - /// node. - pub fn convert_with(&mut self, amount: usize, kind: NodeKind) { - let preserved: Vec<_> = - self.children.drain(self.children.len() - amount ..).collect(); - let len = preserved.iter().map(|c| c.len()).sum(); - self.children - .push(GreenNode::with_children(kind, len, preserved).into()); - self.success = true; } /// End the current node and undo its existence, inling all accumulated @@ -209,50 +219,14 @@ impl<'s> Parser<'s> { let outer = self.stack.pop().unwrap(); let children = std::mem::replace(&mut self.children, outer); self.children.extend(children); - self.success = true; } - /// End the current node and undo its existence, deleting all accumulated - /// children. - pub fn abort(&mut self, msg: impl Into) { - self.end(NodeKind::Error(ErrorPosition::Full, msg.into().into())); - self.success = false; - } - - /// This function [`Self::lift`]s if the last operation was unsuccessful and - /// returns whether it did. - pub fn may_lift_abort(&mut self) -> bool { - if !self.success { - self.lift(); - self.success = false; - true - } else { - false - } - } - - /// This function [`Self::end`]s if the last operation was unsuccessful and - /// returns whether it did. - pub fn may_end_abort(&mut self, kind: NodeKind) -> bool { - if !self.success { - self.end(kind); - self.success = false; - true - } else { - false - } - } - - /// End the current node as a node of given `kind` if the last parse was - /// successful, otherwise, abort. - pub fn end_or_abort(&mut self, kind: NodeKind) -> bool { - if self.success { - self.end(kind); - true - } else { - self.may_end_abort(kind); - false - } + /// Add an error to the current children list. + fn push_error(&mut self, msg: impl Into) { + self.children.push( + GreenData::new(NodeKind::Error(ErrorPosition::Full, msg.into().into()), 0) + .into(), + ); } /// End the parsing process and return the last child. @@ -268,14 +242,6 @@ impl<'s> Parser<'s> { self.peek().is_none() } - /// Consume the next token and return its kind. - // NOTE: This isn't great. - fn eat_peeked(&mut self) -> Option { - let token = self.peek()?.clone(); - self.eat(); - Some(token) - } - /// Consume the next token if it is the given one. pub fn eat_if(&mut self, t: &NodeKind) -> bool { if self.peek() == Some(t) { @@ -311,9 +277,9 @@ impl<'s> Parser<'s> { /// Consume the next token, debug-asserting that it is one of the given ones. pub fn eat_assert(&mut self, t: &NodeKind) { - // NOTE: assert with peek(), then eat() - let next = self.eat_peeked(); - debug_assert_eq!(next.as_ref(), Some(t)); + let next = self.peek(); + debug_assert_eq!(next, Some(t)); + self.eat(); } /// Consume tokens while the condition is true. @@ -402,9 +368,10 @@ impl<'s> Parser<'s> { /// End the parsing of a group. /// /// This panics if no group was started. - pub fn end_group(&mut self) { + pub fn end_group(&mut self) -> ParseResult { let prev_mode = self.tokens.mode(); let group = self.groups.pop().expect("no started group"); + let mut success = true; self.tokens.set_mode(group.prev_mode); self.repeek(); @@ -424,8 +391,8 @@ impl<'s> Parser<'s> { self.eat(); rescan = false; } else if required { - self.start(); - self.abort(format!("expected {}", end)); + self.push_error(format!("expected {}", end)); + success = false; } } @@ -448,6 +415,8 @@ impl<'s> Parser<'s> { self.next = self.tokens.next(); self.repeek(); } + + if success { Ok(()) } else { Err(()) } } /// Add an error that `what` was expected at the given span. @@ -460,39 +429,36 @@ impl<'s> Parser<'s> { found = i; } - self.expected_at_child(found, what); - } - - /// Add an error that `what` was expected at the given child index. - pub fn expected_at_child(&mut self, index: usize, what: &str) { - self.children.insert( - index, - GreenData::new( - NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), - 0, - ) - .into(), - ); + Marker(found).expected_at(self, what); } /// Eat the next token and add an error that it is not the expected `thing`. pub fn expected(&mut self, what: &str) { - self.start(); - match self.eat_peeked() { - Some(found) => self.abort(format!("expected {}, found {}", what, found)), - None => { - self.lift(); - self.expected_at(what); + match self.peek().cloned() { + Some(found) => { + self.start(); + self.eat(); + self.end(NodeKind::Error( + ErrorPosition::Full, + format!("expected {}, found {}", what, found).into(), + )); } + None => self.expected_at(what), } } /// Eat the next token and add an error that it is unexpected. pub fn unexpected(&mut self) { - self.start(); - match self.eat_peeked() { - Some(found) => self.abort(format!("unexpected {}", found)), - None => self.abort("unexpected end of file"), + match self.peek().cloned() { + Some(found) => { + self.start(); + self.eat(); + self.end(NodeKind::Error( + ErrorPosition::Full, + format!("unexpected {}", found).into(), + )); + } + None => self.push_error("unexpected end of file"), } } @@ -584,20 +550,8 @@ impl<'s> Parser<'s> { self.children.last() } - /// Whether the last operation was successful. - pub fn success(&mut self) -> bool { - let s = self.success; - self.success = true; - s - } - - /// Declare the last operation as unsuccessful. - pub fn unsuccessful(&mut self) { - self.success = false; - } - - /// Amount of children in the current stack frame. - pub fn child_count(&self) -> usize { - self.children.len() + /// Create a new marker. + pub fn marker(&mut self) -> Marker { + Marker(self.children.len()) } } -- cgit v1.2.3 From cf2e527a026e81269ef716b4d6675ae6d981d681 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Fri, 5 Nov 2021 12:53:52 +0100 Subject: Code Review: No Patrick, question marks are not an instrument --- src/parse/parser.rs | 135 ++++++++++++++++++++++------------------------------ 1 file changed, 57 insertions(+), 78 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index bc028876..3813ee84 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -62,28 +62,24 @@ pub struct Marker(usize); impl Marker { /// Wraps all children in front of the marker. pub fn end(&self, p: &mut Parser, kind: NodeKind) { - if p.children.len() != self.0 { - let stop_nl = p.stop_at_newline(); - let end = (self.0 .. p.children.len()) - .rev() - .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) - .unwrap_or(self.0) - + 1; - - let children: Vec<_> = p.children.drain(self.0 .. end).collect(); - let len = children.iter().map(Green::len).sum(); - p.children - .insert(self.0, GreenNode::with_children(kind, len, children).into()); - } + let stop_nl = p.stop_at_newline(); + let end = (self.0 .. p.children.len()) + .rev() + .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) + .unwrap_or(self.0) + + 1; + + let children: Vec<_> = p.children.drain(self.0 .. end).collect(); + p.children + .insert(self.0, GreenNode::with_children(kind, children).into()); } /// Wrap all children that do not fulfill the predicate in error nodes. - pub fn filter_children(&self, p: &mut Parser, f: F, error: G) + pub fn filter_children(&self, p: &mut Parser, f: F) where - F: Fn(&Green) -> bool, - G: Fn(&NodeKind) -> (ErrorPosition, EcoString), + F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, { - p.filter_children(self, f, error) + p.filter_children(self, f) } /// Insert an error message that `what` was expected at the marker position. @@ -97,6 +93,20 @@ impl Marker { .into(), ); } + + /// Return a reference to the child after the marker. + pub fn child_at<'a>(&self, p: &'a Parser) -> Option<&'a Green> { + p.children.get(self.0) + } + + pub fn perform(&self, p: &mut Parser, kind: NodeKind, f: F) -> T + where + F: FnOnce(&mut Parser) -> T, + { + let success = f(p); + self.end(p, kind); + success + } } impl<'s> Parser<'s> { @@ -121,58 +131,31 @@ impl<'s> Parser<'s> { /// /// Each start call has to be matched with a call to `end`, /// `end_with_custom_children`, `lift`, `abort`, or `end_or_abort`. - pub fn start(&mut self) { + fn start(&mut self) { self.stack.push(std::mem::take(&mut self.children)); } /// Filter the last children using the given predicate. - fn filter_children(&mut self, count: &Marker, f: F, error: G) + fn filter_children(&mut self, count: &Marker, f: F) where - F: Fn(&Green) -> bool, - G: Fn(&NodeKind) -> (ErrorPosition, EcoString), + F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, { for child in &mut self.children[count.0 ..] { if !((self.tokens.mode() != TokenMode::Code || Self::skip_type_ext(child.kind(), false)) - || child.kind().is_error() - || f(&child)) + || child.kind().is_error()) { - let (pos, msg) = error(child.kind()); - let inner = std::mem::take(child); - *child = - GreenNode::with_child(NodeKind::Error(pos, msg), inner.len(), inner) - .into(); - } - } - } - - /// Return the a child from the current stack frame specified by its - /// non-trivia index from the back. - pub fn child(&self, child: usize) -> Option<&Green> { - self.node_index_from_back(child).map(|i| &self.children[i]) - } - - /// Map a non-trivia index from the back of the current stack frame to a - /// normal index. - fn node_index_from_back(&self, child: usize) -> Option { - let len = self.children.len(); - let code = self.tokens.mode() == TokenMode::Code; - let mut seen = 0; - for x in (0 .. len).rev() { - if self.skip_type(self.children[x].kind()) && code { - continue; - } - if seen == child { - return Some(x); + if let Err((pos, msg)) = f(child) { + let inner = std::mem::take(child); + *child = + GreenNode::with_child(NodeKind::Error(pos, msg), inner).into(); + } } - seen += 1; } - - None } /// End the current node as a node of given `kind`. - pub fn end(&mut self, kind: NodeKind) { + fn end(&mut self, kind: NodeKind) { let outer = self.stack.pop().unwrap(); let mut children = std::mem::replace(&mut self.children, outer); @@ -191,15 +174,13 @@ impl<'s> Parser<'s> { remains.reverse(); } - let len = children.iter().map(|c| c.len()).sum(); - self.children - .push(GreenNode::with_children(kind, len, children).into()); + self.children.push(GreenNode::with_children(kind, children).into()); self.children.extend(remains); } - pub fn perform(&mut self, kind: NodeKind, f: F) -> ParseResult + pub fn perform(&mut self, kind: NodeKind, f: F) -> T where - F: FnOnce(&mut Self) -> ParseResult, + F: FnOnce(&mut Self) -> T, { self.start(); let success = f(self); @@ -267,12 +248,12 @@ impl<'s> Parser<'s> { /// Consume the next token if it is the given one and produce an error if /// not. - pub fn eat_expect(&mut self, t: &NodeKind) -> bool { + pub fn eat_expect(&mut self, t: &NodeKind) -> ParseResult { let eaten = self.eat_if(t); if !eaten { self.expected_at(t.as_str()); } - eaten + if eaten { Ok(()) } else { Err(()) } } /// Consume the next token, debug-asserting that it is one of the given ones. @@ -368,10 +349,9 @@ impl<'s> Parser<'s> { /// End the parsing of a group. /// /// This panics if no group was started. - pub fn end_group(&mut self) -> ParseResult { + pub fn end_group(&mut self) { let prev_mode = self.tokens.mode(); let group = self.groups.pop().expect("no started group"); - let mut success = true; self.tokens.set_mode(group.prev_mode); self.repeek(); @@ -392,7 +372,6 @@ impl<'s> Parser<'s> { rescan = false; } else if required { self.push_error(format!("expected {}", end)); - success = false; } } @@ -415,8 +394,6 @@ impl<'s> Parser<'s> { self.next = self.tokens.next(); self.repeek(); } - - if success { Ok(()) } else { Err(()) } } /// Add an error that `what` was expected at the given span. @@ -436,12 +413,13 @@ impl<'s> Parser<'s> { pub fn expected(&mut self, what: &str) { match self.peek().cloned() { Some(found) => { - self.start(); - self.eat(); - self.end(NodeKind::Error( - ErrorPosition::Full, - format!("expected {}, found {}", what, found).into(), - )); + self.perform( + NodeKind::Error( + ErrorPosition::Full, + format!("expected {}, found {}", what, found).into(), + ), + Self::eat, + ); } None => self.expected_at(what), } @@ -451,12 +429,13 @@ impl<'s> Parser<'s> { pub fn unexpected(&mut self) { match self.peek().cloned() { Some(found) => { - self.start(); - self.eat(); - self.end(NodeKind::Error( - ErrorPosition::Full, - format!("unexpected {}", found).into(), - )); + self.perform( + NodeKind::Error( + ErrorPosition::Full, + format!("unexpected {}", found).into(), + ), + Self::eat, + ); } None => self.push_error("unexpected end of file"), } -- cgit v1.2.3 From 515fe89c5ea94e6bcdcfe387d006776d31ad3646 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Fri, 5 Nov 2021 13:21:39 +0100 Subject: Style changes Co-Authored-By: Martin --- src/parse/parser.rs | 172 +++++++++++++++++++++++----------------------------- 1 file changed, 77 insertions(+), 95 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 3813ee84..4f181821 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,10 +1,14 @@ use std::ops::Range; use std::rc::Rc; -use super::{ParseResult, TokenMode, Tokens}; +use super::{TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; +/// Allows parser methods to use the try operator. Not exposed as the parser +/// recovers from all errors. +pub(crate) type ParseResult = Result; + /// A convenient token-based parser. pub struct Parser<'s> { /// The parsed file. @@ -56,59 +60,6 @@ pub enum Group { Imports, } -/// A marker that indicates where a child may start. -pub struct Marker(usize); - -impl Marker { - /// Wraps all children in front of the marker. - pub fn end(&self, p: &mut Parser, kind: NodeKind) { - let stop_nl = p.stop_at_newline(); - let end = (self.0 .. p.children.len()) - .rev() - .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) - .unwrap_or(self.0) - + 1; - - let children: Vec<_> = p.children.drain(self.0 .. end).collect(); - p.children - .insert(self.0, GreenNode::with_children(kind, children).into()); - } - - /// Wrap all children that do not fulfill the predicate in error nodes. - pub fn filter_children(&self, p: &mut Parser, f: F) - where - F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, - { - p.filter_children(self, f) - } - - /// Insert an error message that `what` was expected at the marker position. - pub fn expected_at(&self, p: &mut Parser, what: &str) { - p.children.insert( - self.0, - GreenData::new( - NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), - 0, - ) - .into(), - ); - } - - /// Return a reference to the child after the marker. - pub fn child_at<'a>(&self, p: &'a Parser) -> Option<&'a Green> { - p.children.get(self.0) - } - - pub fn perform(&self, p: &mut Parser, kind: NodeKind, f: F) -> T - where - F: FnOnce(&mut Parser) -> T, - { - let success = f(p); - self.end(p, kind); - success - } -} - impl<'s> Parser<'s> { /// Create a new parser for the source string. pub fn new(src: &'s str) -> Self { @@ -127,40 +78,16 @@ impl<'s> Parser<'s> { } } - /// Start a nested node. - /// - /// Each start call has to be matched with a call to `end`, - /// `end_with_custom_children`, `lift`, `abort`, or `end_or_abort`. - fn start(&mut self) { - self.stack.push(std::mem::take(&mut self.children)); - } - - /// Filter the last children using the given predicate. - fn filter_children(&mut self, count: &Marker, f: F) + /// Perform a subparse that wraps its result in a node with the given kind. + pub fn perform(&mut self, kind: NodeKind, f: F) -> T where - F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, + F: FnOnce(&mut Self) -> T, { - for child in &mut self.children[count.0 ..] { - if !((self.tokens.mode() != TokenMode::Code - || Self::skip_type_ext(child.kind(), false)) - || child.kind().is_error()) - { - if let Err((pos, msg)) = f(child) { - let inner = std::mem::take(child); - *child = - GreenNode::with_child(NodeKind::Error(pos, msg), inner).into(); - } - } - } - } + let prev = std::mem::take(&mut self.children); + let output = f(self); + let mut children = std::mem::replace(&mut self.children, prev); - /// End the current node as a node of given `kind`. - fn end(&mut self, kind: NodeKind) { - let outer = self.stack.pop().unwrap(); - let mut children = std::mem::replace(&mut self.children, outer); - - // have trailing whitespace continue to sit in self.children in code - // mode. + // Trailing trivia should not be wrapped into the new node. let mut remains = vec![]; if self.tokens.mode() == TokenMode::Code { let len = children.len(); @@ -176,16 +103,8 @@ impl<'s> Parser<'s> { self.children.push(GreenNode::with_children(kind, children).into()); self.children.extend(remains); - } - pub fn perform(&mut self, kind: NodeKind, f: F) -> T - where - F: FnOnce(&mut Self) -> T, - { - self.start(); - let success = f(self); - self.end(kind); - success + output } /// Eat and wrap the next token. @@ -332,7 +251,6 @@ impl<'s> Parser<'s> { /// This panics if the next token does not start the given group. pub fn start_group(&mut self, kind: Group, mode: TokenMode) { self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() }); - self.tokens.set_mode(mode); self.repeek(); @@ -534,3 +452,67 @@ impl<'s> Parser<'s> { Marker(self.children.len()) } } + +/// A marker that indicates where a child may start. +pub struct Marker(usize); + +impl Marker { + /// Wraps all children in front of the marker. + pub fn end(&self, p: &mut Parser, kind: NodeKind) { + let stop_nl = p.stop_at_newline(); + let end = (self.0 .. p.children.len()) + .rev() + .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) + .unwrap_or(self.0) + + 1; + + let children: Vec<_> = p.children.drain(self.0 .. end).collect(); + p.children + .insert(self.0, GreenNode::with_children(kind, children).into()); + } + + /// Wrap all children that do not fulfill the predicate in error nodes. + pub fn filter_children(&self, p: &mut Parser, f: F) + where + F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, + { + for child in &mut p.children[self.0 ..] { + if !((p.tokens.mode() != TokenMode::Code + || Parser::skip_type_ext(child.kind(), false)) + || child.kind().is_error()) + { + if let Err((pos, msg)) = f(child) { + let inner = std::mem::take(child); + *child = + GreenNode::with_child(NodeKind::Error(pos, msg), inner).into(); + } + } + } + } + + /// Insert an error message that `what` was expected at the marker position. + pub fn expected_at(&self, p: &mut Parser, what: &str) { + p.children.insert( + self.0, + GreenData::new( + NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), + 0, + ) + .into(), + ); + } + + /// Return a reference to the child after the marker. + pub fn child_at<'a>(&self, p: &'a Parser) -> Option<&'a Green> { + p.children.get(self.0) + } + + pub fn perform(&self, p: &mut Parser, kind: NodeKind, f: F) -> T + where + F: FnOnce(&mut Parser) -> T, + { + let success = f(p); + self.end(p, kind); + success + } +} -- cgit v1.2.3 From 41bdafb5785dd85d20a3e79900b18e0010f6d71d Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sat, 6 Nov 2021 12:12:02 +0100 Subject: Faster parser --- src/parse/parser.rs | 506 +++++++++++++++++++++++----------------------------- 1 file changed, 223 insertions(+), 283 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 4f181821..5d26ff63 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,5 +1,4 @@ -use std::ops::Range; -use std::rc::Rc; +use std::mem; use super::{TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; @@ -11,88 +10,63 @@ pub(crate) type ParseResult = Result; /// A convenient token-based parser. pub struct Parser<'s> { - /// The parsed file. - src: &'s str, /// An iterator over the source tokens. tokens: Tokens<'s>, + /// Whether we are at the end of the file or of a group. + eof: bool, + /// The current token. + current: Option, + /// The end byte index of the last (non-whitespace if in code mode) token. + prev_end: usize, + /// The start byte index of the peeked token. + current_start: usize, /// The stack of open groups. groups: Vec, - /// The next token. - next: Option, - /// The peeked token. - /// (Same as `next` except if we are at the end of group, then `None`). - peeked: Option, - /// The end index of the last (non-whitespace if in code mode) token. - prev_end: usize, - /// The start index of the peeked token. - next_start: usize, - /// A stack of outer children vectors. - stack: Vec>, /// The children of the currently built node. children: Vec, } -/// A logical group of tokens, e.g. `[...]`. -struct GroupEntry { - /// The kind of group this is. This decides which tokens will end the group. - /// For example, a [`Group::Paren`] will be ended by - /// [`Token::RightParen`]. - pub kind: Group, - /// The mode the parser was in _before_ the group started (to which we go - /// back once the group ends). - pub prev_mode: TokenMode, -} - -/// A group, confined by optional start and end delimiters. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Group { - /// A parenthesized group: `(...)`. - Paren, - /// A bracketed group: `[...]`. - Bracket, - /// A curly-braced group: `{...}`. - Brace, - /// A group ended by a semicolon or a line break: `;`, `\n`. - Stmt, - /// A group for a single expression, ended by a line break. - Expr, - /// A group for import items, ended by a semicolon, line break or `from`. - Imports, -} - impl<'s> Parser<'s> { /// Create a new parser for the source string. pub fn new(src: &'s str) -> Self { let mut tokens = Tokens::new(src, TokenMode::Markup); - let next = tokens.next(); + let current = tokens.next(); Self { - src, tokens, - groups: vec![], - next: next.clone(), - peeked: next, + eof: current.is_none(), + current, prev_end: 0, - next_start: 0, - stack: vec![], + current_start: 0, + groups: vec![], children: vec![], } } + /// End the parsing process and return the last child. + pub fn finish(self) -> Vec { + self.children + } + + /// Create a new marker. + pub fn marker(&mut self) -> Marker { + Marker(self.children.len()) + } + /// Perform a subparse that wraps its result in a node with the given kind. - pub fn perform(&mut self, kind: NodeKind, f: F) -> T + pub fn perform(&mut self, kind: NodeKind, f: F) -> T where F: FnOnce(&mut Self) -> T, { - let prev = std::mem::take(&mut self.children); + let prev = mem::take(&mut self.children); let output = f(self); - let mut children = std::mem::replace(&mut self.children, prev); + let mut children = mem::replace(&mut self.children, prev); // Trailing trivia should not be wrapped into the new node. let mut remains = vec![]; if self.tokens.mode() == TokenMode::Code { let len = children.len(); for n in (0 .. len).rev() { - if !self.skip_type(&children[n].kind()) { + if !self.is_trivia(&children[n].kind()) { break; } @@ -107,66 +81,36 @@ impl<'s> Parser<'s> { output } - /// Eat and wrap the next token. - pub fn convert(&mut self, kind: NodeKind) { - self.eat(); - self.children.last_mut().unwrap().set_kind(kind); - } - - /// End the current node and undo its existence, inling all accumulated - /// children into its parent. - pub fn lift(&mut self) { - let outer = self.stack.pop().unwrap(); - let children = std::mem::replace(&mut self.children, outer); - self.children.extend(children); + /// Whether the end of the source string or group is reached. + pub fn eof(&self) -> bool { + self.eof } - /// Add an error to the current children list. - fn push_error(&mut self, msg: impl Into) { - self.children.push( - GreenData::new(NodeKind::Error(ErrorPosition::Full, msg.into().into()), 0) - .into(), - ); - } + /// Consume the current token and also trailing trivia if in code mode. + pub fn eat(&mut self) { + self.prev_end = self.tokens.index(); + self.bump(); - /// End the parsing process and return the last child. - pub fn finish(&mut self) -> Rc { - match self.children.pop().unwrap() { - Green::Node(n) => n, - _ => panic!(), + if self.tokens.mode() == TokenMode::Code { + // Skip whitespace and comments. + while self.current.as_ref().map_or(false, |x| self.is_trivia(x)) { + self.bump(); + } } - } - /// Whether the end of the source string or group is reached. - pub fn eof(&self) -> bool { - self.peek().is_none() + self.repeek(); } - /// Consume the next token if it is the given one. + /// Eat if the current token it is the given one. pub fn eat_if(&mut self, t: &NodeKind) -> bool { - if self.peek() == Some(t) { + let at = self.at(t); + if at { self.eat(); - true - } else { - false } + at } - /// Consume the next token if the closure maps it a to `Some`-variant. - pub fn eat_map(&mut self, f: F) -> Option - where - F: FnOnce(&NodeKind) -> Option, - { - let token = self.peek()?; - let mapped = f(token); - if mapped.is_some() { - self.eat(); - } - mapped - } - - /// Consume the next token if it is the given one and produce an error if - /// not. + /// Eat if the current token is the given one and produce an error if not. pub fn eat_expect(&mut self, t: &NodeKind) -> ParseResult { let eaten = self.eat_if(t); if !eaten { @@ -175,14 +119,13 @@ impl<'s> Parser<'s> { if eaten { Ok(()) } else { Err(()) } } - /// Consume the next token, debug-asserting that it is one of the given ones. + /// Eat, debug-asserting that the token is the given one. pub fn eat_assert(&mut self, t: &NodeKind) { - let next = self.peek(); - debug_assert_eq!(next, Some(t)); + debug_assert_eq!(self.peek(), Some(t)); self.eat(); } - /// Consume tokens while the condition is true. + /// Eat tokens while the condition is true. pub fn eat_while(&mut self, mut f: F) where F: FnMut(&NodeKind) -> bool, @@ -192,68 +135,77 @@ impl<'s> Parser<'s> { } } - /// Peek at the next token without consuming it. + /// Eat the current token, but change its type. + pub fn convert(&mut self, kind: NodeKind) { + let idx = self.children.len(); + self.eat(); + if let Some(child) = self.children.get_mut(idx) { + child.set_kind(kind); + } + } + + /// Whether the current token is of the given type. + pub fn at(&self, kind: &NodeKind) -> bool { + self.peek() == Some(kind) + } + + /// Peek at the current token without consuming it. pub fn peek(&self) -> Option<&NodeKind> { - self.peeked.as_ref() + if self.eof { None } else { self.current.as_ref() } } - /// Peek at the next token if it follows immediately after the last one - /// without any whitespace in between. + /// Peek at the current token, if it follows immediately after the last one + /// without any trivia in between. pub fn peek_direct(&self) -> Option<&NodeKind> { - if self.next_start() == self.prev_end() { - self.peeked.as_ref() + if self.prev_end() == self.current_start() { + self.peek() } else { None } } - /// Peek at the source of the next token. + /// Peek at the source of the current token. pub fn peek_src(&self) -> &'s str { - self.get(self.next_start() .. self.next_end()) + self.tokens.scanner().get(self.current_start() .. self.current_end()) } /// The byte index at which the last token ended. /// - /// Refers to the end of the last _non-whitespace_ token in code mode. + /// Refers to the end of the last non-trivia token in code mode. pub fn prev_end(&self) -> usize { self.prev_end } - /// The byte index at which the next token starts. - pub fn next_start(&self) -> usize { - self.next_start + /// The byte index at which the current token starts. + pub fn current_start(&self) -> usize { + self.current_start } - /// The byte index at which the next token will end. - /// - /// Is the same as [`next_start()`][Self::next_start] if `peek()` returns - /// `None`. - pub fn next_end(&self) -> usize { + /// The byte index at which the current token ends. + pub fn current_end(&self) -> usize { self.tokens.index() } /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.tokens.column(index) - } - - /// Slice out part of the source string. - pub fn get(&self, range: Range) -> &'s str { - self.src.get(range).unwrap() + self.tokens.scanner().column(index) } /// Continue parsing in a group. /// /// When the end delimiter of the group is reached, all subsequent calls to - /// `eat()` and `peek()` return `None`. Parsing can only continue with - /// a matching call to `end_group`. + /// `peek()` return `None`. Parsing can only continue with a matching call + /// to `end_group`. /// - /// This panics if the next token does not start the given group. - pub fn start_group(&mut self, kind: Group, mode: TokenMode) { + /// This panics if the current token does not start the given group. + pub fn start_group(&mut self, kind: Group) { self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() }); - self.tokens.set_mode(mode); - self.repeek(); + self.tokens.set_mode(match kind { + Group::Bracket => TokenMode::Markup, + _ => TokenMode::Code, + }); + self.repeek(); match kind { Group::Paren => self.eat_assert(&NodeKind::LeftParen), Group::Bracket => self.eat_assert(&NodeKind::LeftBracket), @@ -268,12 +220,12 @@ impl<'s> Parser<'s> { /// /// This panics if no group was started. pub fn end_group(&mut self) { - let prev_mode = self.tokens.mode(); + let group_mode = self.tokens.mode(); let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); self.repeek(); - let mut rescan = self.tokens.mode() != prev_mode; + let mut rescan = self.tokens.mode() != group_mode; // Eat the end delimiter if there is one. if let Some((end, required)) = match group.kind { @@ -284,7 +236,7 @@ impl<'s> Parser<'s> { Group::Expr => None, Group::Imports => None, } { - if self.next == Some(end.clone()) { + if self.current.as_ref() == Some(&end) { // Bump the delimeter and return. No need to rescan in this case. self.eat(); rescan = false; @@ -295,10 +247,10 @@ impl<'s> Parser<'s> { // Rescan the peeked token if the mode changed. if rescan { - if prev_mode == TokenMode::Code { + if group_mode == TokenMode::Code { let len = self.children.len(); for n in (0 .. len).rev() { - if !self.skip_type(self.children[n].kind()) { + if !self.is_trivia(self.children[n].kind()) { break; } @@ -307,129 +259,55 @@ impl<'s> Parser<'s> { } self.tokens.jump(self.prev_end()); - self.prev_end = self.tokens.index().into(); - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); + self.prev_end = self.tokens.index(); + self.current_start = self.tokens.index(); + self.current = self.tokens.next(); self.repeek(); } } - /// Add an error that `what` was expected at the given span. - pub fn expected_at(&mut self, what: &str) { - let mut found = self.children.len(); - for (i, node) in self.children.iter().enumerate().rev() { - if !self.skip_type(node.kind()) { - break; - } - found = i; - } - - Marker(found).expected_at(self, what); + /// Low-level bump that consumes exactly one token without special trivia + /// handling. + fn bump(&mut self) { + let kind = self.current.take().unwrap(); + let len = self.tokens.index() - self.current_start; + self.children.push(GreenData::new(kind, len).into()); + self.current_start = self.tokens.index(); + self.current = self.tokens.next(); } - /// Eat the next token and add an error that it is not the expected `thing`. - pub fn expected(&mut self, what: &str) { - match self.peek().cloned() { - Some(found) => { - self.perform( - NodeKind::Error( - ErrorPosition::Full, - format!("expected {}, found {}", what, found).into(), - ), - Self::eat, - ); - } - None => self.expected_at(what), - } + /// Take another look at the current token to recheck whether it ends a + /// group. + fn repeek(&mut self) { + self.eof = match &self.current { + Some(NodeKind::RightParen) => self.inside(Group::Paren), + Some(NodeKind::RightBracket) => self.inside(Group::Bracket), + Some(NodeKind::RightBrace) => self.inside(Group::Brace), + Some(NodeKind::Semicolon) => self.inside(Group::Stmt), + Some(NodeKind::From) => self.inside(Group::Imports), + Some(NodeKind::Space(n)) => *n >= 1 && self.stop_at_newline(), + Some(_) => false, + None => true, + }; } - /// Eat the next token and add an error that it is unexpected. - pub fn unexpected(&mut self) { - match self.peek().cloned() { - Some(found) => { - self.perform( - NodeKind::Error( - ErrorPosition::Full, - format!("unexpected {}", found).into(), - ), - Self::eat, - ); - } - None => self.push_error("unexpected end of file"), - } + /// Returns whether the given type can be skipped over. + fn is_trivia(&self, token: &NodeKind) -> bool { + Self::is_trivia_ext(token, self.stop_at_newline()) } /// Returns whether the given type can be skipped over given the current /// newline mode. - pub fn skip_type_ext(token: &NodeKind, stop_at_newline: bool) -> bool { + fn is_trivia_ext(token: &NodeKind, stop_at_newline: bool) -> bool { match token { - NodeKind::Space(n) => n < &1 || !stop_at_newline, + NodeKind::Space(n) => *n == 0 || !stop_at_newline, NodeKind::LineComment => true, NodeKind::BlockComment => true, _ => false, } } - /// Returns whether the given type can be skipped over. - fn skip_type(&self, token: &NodeKind) -> bool { - Self::skip_type_ext(token, self.stop_at_newline()) - } - - /// Consume the next token. - pub fn eat(&mut self) { - self.children.push( - GreenData::new( - self.next.clone().unwrap(), - self.tokens.index() - self.next_start, - ) - .into(), - ); - - self.prev_end = self.tokens.index().into(); - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); - - if self.tokens.mode() == TokenMode::Code { - // Skip whitespace and comments. - while self.next.as_ref().map_or(false, |x| self.skip_type(x)) { - self.children.push( - GreenData::new( - self.next.clone().unwrap(), - self.tokens.index() - self.next_start, - ) - .into(), - ); - - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); - } - } - - self.repeek(); - } - - /// Take another look at the next token to recheck whether it ends a group. - fn repeek(&mut self) { - self.peeked = self.next.clone(); - let token = match self.next.as_ref() { - Some(token) => token, - None => return, - }; - - if match token { - NodeKind::RightParen => self.inside(Group::Paren), - NodeKind::RightBracket => self.inside(Group::Bracket), - NodeKind::RightBrace => self.inside(Group::Brace), - NodeKind::Semicolon => self.inside(Group::Stmt), - NodeKind::From => self.inside(Group::Imports), - NodeKind::Space(n) => n > &0 && self.stop_at_newline(), - _ => false, - } { - self.peeked = None; - } - } - - /// Whether the active group ends at a newline. + /// Whether the active group must end at a newline. fn stop_at_newline(&self) -> bool { matches!( self.groups.last().map(|group| group.kind), @@ -441,28 +319,76 @@ impl<'s> Parser<'s> { fn inside(&self, kind: Group) -> bool { self.groups.iter().any(|g| g.kind == kind) } +} - /// Returns the last child of the current stack frame. - pub fn last_child(&self) -> Option<&Green> { - self.children.last() +/// Error handling. +impl Parser<'_> { + /// Push an error into the children list. + pub fn push_error(&mut self, msg: impl Into) { + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + self.children.push(GreenData::new(error, 0).into()); } - /// Create a new marker. - pub fn marker(&mut self) -> Marker { - Marker(self.children.len()) + /// Eat the current token and add an error that it is unexpected. + pub fn unexpected(&mut self) { + match self.peek() { + Some(found) => { + let msg = format!("unexpected {}", found); + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + self.perform(error, Self::eat); + } + None => self.push_error("unexpected end of file"), + } + } + + /// Eat the current token and add an error that it is not the expected `thing`. + pub fn expected(&mut self, thing: &str) { + match self.peek() { + Some(found) => { + let msg = format!("expected {}, found {}", thing, found); + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + self.perform(error, Self::eat); + } + None => self.expected_at(thing), + } + } + + /// Add an error that the `thing` was expected at the end of the last + /// non-trivia token. + pub fn expected_at(&mut self, thing: &str) { + let mut found = self.children.len(); + for (i, node) in self.children.iter().enumerate().rev() { + if !self.is_trivia(node.kind()) { + break; + } + found = i; + } + + Marker(found).expected_at(self, thing); } } -/// A marker that indicates where a child may start. +/// A marker that indicates where a node may start. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct Marker(usize); impl Marker { - /// Wraps all children in front of the marker. - pub fn end(&self, p: &mut Parser, kind: NodeKind) { - let stop_nl = p.stop_at_newline(); + /// Perform a subparse that wraps all children after the marker in a node + /// with the given kind. + pub fn perform(self, p: &mut Parser, kind: NodeKind, f: F) -> T + where + F: FnOnce(&mut Parser) -> T, + { + let success = f(p); + self.end(p, kind); + success + } + + /// Wrap all children after the marker in a node with the given `kind`. + pub fn end(self, p: &mut Parser, kind: NodeKind) { let end = (self.0 .. p.children.len()) .rev() - .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) + .find(|&i| !p.is_trivia(p.children[i].kind())) .unwrap_or(self.0) + 1; @@ -472,47 +398,61 @@ impl Marker { } /// Wrap all children that do not fulfill the predicate in error nodes. - pub fn filter_children(&self, p: &mut Parser, f: F) + pub fn filter_children(self, p: &mut Parser, f: F) where F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, { for child in &mut p.children[self.0 ..] { - if !((p.tokens.mode() != TokenMode::Code - || Parser::skip_type_ext(child.kind(), false)) - || child.kind().is_error()) + if (p.tokens.mode() == TokenMode::Markup + || !Parser::is_trivia_ext(child.kind(), false)) + && !child.kind().is_error() { if let Err((pos, msg)) = f(child) { - let inner = std::mem::take(child); - *child = - GreenNode::with_child(NodeKind::Error(pos, msg), inner).into(); + let error = NodeKind::Error(pos, msg); + let inner = mem::take(child); + *child = GreenNode::with_child(error, inner).into(); } } } } /// Insert an error message that `what` was expected at the marker position. - pub fn expected_at(&self, p: &mut Parser, what: &str) { - p.children.insert( - self.0, - GreenData::new( - NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), - 0, - ) - .into(), - ); - } - - /// Return a reference to the child after the marker. - pub fn child_at<'a>(&self, p: &'a Parser) -> Option<&'a Green> { - p.children.get(self.0) + pub fn expected_at(self, p: &mut Parser, what: &str) { + let msg = format!("expected {}", what); + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + p.children.insert(self.0, GreenData::new(error, 0).into()); } - pub fn perform(&self, p: &mut Parser, kind: NodeKind, f: F) -> T - where - F: FnOnce(&mut Parser) -> T, - { - let success = f(p); - self.end(p, kind); - success + /// Return a reference to the child directly after the marker. + pub fn child_at<'a>(self, p: &'a Parser) -> Option<&'a Green> { + p.children.get(self.0) } } + +/// A logical group of tokens, e.g. `[...]`. +struct GroupEntry { + /// The kind of group this is. This decides which tokens will end the group. + /// For example, a [`Group::Paren`] will be ended by + /// [`Token::RightParen`]. + pub kind: Group, + /// The mode the parser was in _before_ the group started (to which we go + /// back once the group ends). + pub prev_mode: TokenMode, +} + +/// A group, confined by optional start and end delimiters. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Group { + /// A bracketed group: `[...]`. + Bracket, + /// A curly-braced group: `{...}`. + Brace, + /// A parenthesized group: `(...)`. + Paren, + /// A group ended by a semicolon or a line break: `;`, `\n`. + Stmt, + /// A group for a single expression, ended by a line break. + Expr, + /// A group for import items, ended by a semicolon, line break or `from`. + Imports, +} -- cgit v1.2.3 From 8117ca9950a2027efae133f811a26a4a7bf86a8e Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sat, 6 Nov 2021 15:30:08 +0100 Subject: Deduplicate trivia search --- src/parse/parser.rs | 72 +++++++++++++++++++++-------------------------------- 1 file changed, 28 insertions(+), 44 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 5d26ff63..a30895ad 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -16,7 +16,7 @@ pub struct Parser<'s> { eof: bool, /// The current token. current: Option, - /// The end byte index of the last (non-whitespace if in code mode) token. + /// The end byte index of the last non-trivia token. prev_end: usize, /// The start byte index of the peeked token. current_start: usize, @@ -59,25 +59,19 @@ impl<'s> Parser<'s> { { let prev = mem::take(&mut self.children); let output = f(self); + let until = self.trivia_start(); let mut children = mem::replace(&mut self.children, prev); - // Trailing trivia should not be wrapped into the new node. - let mut remains = vec![]; if self.tokens.mode() == TokenMode::Code { - let len = children.len(); - for n in (0 .. len).rev() { - if !self.is_trivia(&children[n].kind()) { - break; - } - - remains.push(children.pop().unwrap()); - } - remains.reverse(); + // Trailing trivia should not be wrapped into the new node. + let idx = self.children.len(); + self.children.push(Green::default()); + self.children.extend(children.drain(until ..)); + self.children[idx] = GreenNode::with_children(kind, children).into(); + } else { + self.children.push(GreenNode::with_children(kind, children).into()); } - self.children.push(GreenNode::with_children(kind, children).into()); - self.children.extend(remains); - output } @@ -86,7 +80,7 @@ impl<'s> Parser<'s> { self.eof } - /// Consume the current token and also trailing trivia if in code mode. + /// Consume the current token and also trailing trivia. pub fn eat(&mut self) { self.prev_end = self.tokens.index(); self.bump(); @@ -169,9 +163,7 @@ impl<'s> Parser<'s> { self.tokens.scanner().get(self.current_start() .. self.current_end()) } - /// The byte index at which the last token ended. - /// - /// Refers to the end of the last non-trivia token in code mode. + /// The byte index at which the last non-trivia token ended. pub fn prev_end(&self) -> usize { self.prev_end } @@ -248,14 +240,7 @@ impl<'s> Parser<'s> { // Rescan the peeked token if the mode changed. if rescan { if group_mode == TokenMode::Code { - let len = self.children.len(); - for n in (0 .. len).rev() { - if !self.is_trivia(self.children[n].kind()) { - break; - } - - self.children.pop(); - } + self.children.truncate(self.trivia_start()); } self.tokens.jump(self.prev_end()); @@ -307,6 +292,17 @@ impl<'s> Parser<'s> { } } + /// Find the index in the children list where trailing trivia starts. + fn trivia_start(&self) -> usize { + self.children.len() + - self + .children + .iter() + .rev() + .take_while(|node| self.is_trivia(node.kind())) + .count() + } + /// Whether the active group must end at a newline. fn stop_at_newline(&self) -> bool { matches!( @@ -356,15 +352,7 @@ impl Parser<'_> { /// Add an error that the `thing` was expected at the end of the last /// non-trivia token. pub fn expected_at(&mut self, thing: &str) { - let mut found = self.children.len(); - for (i, node) in self.children.iter().enumerate().rev() { - if !self.is_trivia(node.kind()) { - break; - } - found = i; - } - - Marker(found).expected_at(self, thing); + Marker(self.trivia_start()).expected_at(self, thing); } } @@ -384,15 +372,11 @@ impl Marker { success } - /// Wrap all children after the marker in a node with the given `kind`. + /// Wrap all children after the marker (excluding trailing trivia) in a node + /// with the given `kind`. pub fn end(self, p: &mut Parser, kind: NodeKind) { - let end = (self.0 .. p.children.len()) - .rev() - .find(|&i| !p.is_trivia(p.children[i].kind())) - .unwrap_or(self.0) - + 1; - - let children: Vec<_> = p.children.drain(self.0 .. end).collect(); + let until = p.trivia_start(); + let children = p.children.drain(self.0 .. until).collect(); p.children .insert(self.0, GreenNode::with_children(kind, children).into()); } -- cgit v1.2.3 From 95866d5fc9ae89a23c5754193c7de5d4fe4873b1 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sun, 7 Nov 2021 22:05:48 +0100 Subject: Tidy up AST --- src/parse/parser.rs | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index a30895ad..5ebc2c17 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,7 +1,7 @@ use std::mem; use super::{TokenMode, Tokens}; -use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; +use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; /// Allows parser methods to use the try operator. Not exposed as the parser @@ -131,11 +131,9 @@ impl<'s> Parser<'s> { /// Eat the current token, but change its type. pub fn convert(&mut self, kind: NodeKind) { - let idx = self.children.len(); + let marker = self.marker(); self.eat(); - if let Some(child) = self.children.get_mut(idx) { - child.set_kind(kind); - } + marker.convert(self, kind); } /// Whether the current token is of the given type. @@ -321,7 +319,7 @@ impl<'s> Parser<'s> { impl Parser<'_> { /// Push an error into the children list. pub fn push_error(&mut self, msg: impl Into) { - let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + let error = NodeKind::Error(ErrorPos::Full, msg.into()); self.children.push(GreenData::new(error, 0).into()); } @@ -330,7 +328,7 @@ impl Parser<'_> { match self.peek() { Some(found) => { let msg = format!("unexpected {}", found); - let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + let error = NodeKind::Error(ErrorPos::Full, msg.into()); self.perform(error, Self::eat); } None => self.push_error("unexpected end of file"), @@ -342,7 +340,7 @@ impl Parser<'_> { match self.peek() { Some(found) => { let msg = format!("expected {}, found {}", thing, found); - let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + let error = NodeKind::Error(ErrorPos::Full, msg.into()); self.perform(error, Self::eat); } None => self.expected_at(thing), @@ -352,7 +350,7 @@ impl Parser<'_> { /// Add an error that the `thing` was expected at the end of the last /// non-trivia token. pub fn expected_at(&mut self, thing: &str) { - Marker(self.trivia_start()).expected_at(self, thing); + Marker(self.trivia_start()).expected(self, thing); } } @@ -384,15 +382,15 @@ impl Marker { /// Wrap all children that do not fulfill the predicate in error nodes. pub fn filter_children(self, p: &mut Parser, f: F) where - F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, + F: Fn(&Green) -> Result<(), &'static str>, { for child in &mut p.children[self.0 ..] { if (p.tokens.mode() == TokenMode::Markup || !Parser::is_trivia_ext(child.kind(), false)) && !child.kind().is_error() { - if let Err((pos, msg)) = f(child) { - let error = NodeKind::Error(pos, msg); + if let Err(msg) = f(child) { + let error = NodeKind::Error(ErrorPos::Full, msg.into()); let inner = mem::take(child); *child = GreenNode::with_child(error, inner).into(); } @@ -401,16 +399,23 @@ impl Marker { } /// Insert an error message that `what` was expected at the marker position. - pub fn expected_at(self, p: &mut Parser, what: &str) { + pub fn expected(self, p: &mut Parser, what: &str) { let msg = format!("expected {}", what); - let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + let error = NodeKind::Error(ErrorPos::Full, msg.into()); p.children.insert(self.0, GreenData::new(error, 0).into()); } - /// Return a reference to the child directly after the marker. - pub fn child_at<'a>(self, p: &'a Parser) -> Option<&'a Green> { + /// Peek at the child directly after the marker. + pub fn peek<'a>(self, p: &'a Parser) -> Option<&'a Green> { p.children.get(self.0) } + + /// Convert the child directly after marker. + pub fn convert(self, p: &mut Parser, kind: NodeKind) { + if let Some(child) = p.children.get_mut(self.0) { + child.convert(kind); + } + } } /// A logical group of tokens, e.g. `[...]`. -- cgit v1.2.3 From 38c5c362419c5eee7a4fdc0b43d3a9dfb339a6d2 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Mon, 8 Nov 2021 12:13:32 +0100 Subject: Final touches --- src/parse/parser.rs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'src/parse/parser.rs') diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 5ebc2c17..1c4c2a5c 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -52,6 +52,17 @@ impl<'s> Parser<'s> { Marker(self.children.len()) } + /// Create a markup right before the trailing trivia. + pub fn trivia_start(&self) -> Marker { + let count = self + .children + .iter() + .rev() + .take_while(|node| self.is_trivia(node.kind())) + .count(); + Marker(self.children.len() - count) + } + /// Perform a subparse that wraps its result in a node with the given kind. pub fn perform(&mut self, kind: NodeKind, f: F) -> T where @@ -66,7 +77,7 @@ impl<'s> Parser<'s> { // Trailing trivia should not be wrapped into the new node. let idx = self.children.len(); self.children.push(Green::default()); - self.children.extend(children.drain(until ..)); + self.children.extend(children.drain(until.0 ..)); self.children[idx] = GreenNode::with_children(kind, children).into(); } else { self.children.push(GreenNode::with_children(kind, children).into()); @@ -238,7 +249,7 @@ impl<'s> Parser<'s> { // Rescan the peeked token if the mode changed. if rescan { if group_mode == TokenMode::Code { - self.children.truncate(self.trivia_start()); + self.children.truncate(self.trivia_start().0); } self.tokens.jump(self.prev_end()); @@ -290,17 +301,6 @@ impl<'s> Parser<'s> { } } - /// Find the index in the children list where trailing trivia starts. - fn trivia_start(&self) -> usize { - self.children.len() - - self - .children - .iter() - .rev() - .take_while(|node| self.is_trivia(node.kind())) - .count() - } - /// Whether the active group must end at a newline. fn stop_at_newline(&self) -> bool { matches!( @@ -350,7 +350,7 @@ impl Parser<'_> { /// Add an error that the `thing` was expected at the end of the last /// non-trivia token. pub fn expected_at(&mut self, thing: &str) { - Marker(self.trivia_start()).expected(self, thing); + self.trivia_start().expected(self, thing); } } @@ -374,7 +374,7 @@ impl Marker { /// with the given `kind`. pub fn end(self, p: &mut Parser, kind: NodeKind) { let until = p.trivia_start(); - let children = p.children.drain(self.0 .. until).collect(); + let children = p.children.drain(self.0 .. until.0).collect(); p.children .insert(self.0, GreenNode::with_children(kind, children).into()); } -- cgit v1.2.3