diff options
Diffstat (limited to 'src/parse/parser.rs')
| -rw-r--r-- | src/parse/parser.rs | 506 |
1 files changed, 223 insertions, 283 deletions
diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 4f181821..5d26ff63 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,5 +1,4 @@ -use std::ops::Range; -use std::rc::Rc; +use std::mem; use super::{TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; @@ -11,88 +10,63 @@ pub(crate) type ParseResult<T = ()> = Result<T, ()>; /// A convenient token-based parser. pub struct Parser<'s> { - /// The parsed file. - src: &'s str, /// An iterator over the source tokens. tokens: Tokens<'s>, + /// Whether we are at the end of the file or of a group. + eof: bool, + /// The current token. + current: Option<NodeKind>, + /// The end byte index of the last (non-whitespace if in code mode) token. + prev_end: usize, + /// The start byte index of the peeked token. + current_start: usize, /// The stack of open groups. groups: Vec<GroupEntry>, - /// The next token. - next: Option<NodeKind>, - /// The peeked token. - /// (Same as `next` except if we are at the end of group, then `None`). - peeked: Option<NodeKind>, - /// The end index of the last (non-whitespace if in code mode) token. - prev_end: usize, - /// The start index of the peeked token. - next_start: usize, - /// A stack of outer children vectors. - stack: Vec<Vec<Green>>, /// The children of the currently built node. children: Vec<Green>, } -/// A logical group of tokens, e.g. `[...]`. -struct GroupEntry { - /// The kind of group this is. This decides which tokens will end the group. - /// For example, a [`Group::Paren`] will be ended by - /// [`Token::RightParen`]. - pub kind: Group, - /// The mode the parser was in _before_ the group started (to which we go - /// back once the group ends). - pub prev_mode: TokenMode, -} - -/// A group, confined by optional start and end delimiters. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Group { - /// A parenthesized group: `(...)`. - Paren, - /// A bracketed group: `[...]`. - Bracket, - /// A curly-braced group: `{...}`. - Brace, - /// A group ended by a semicolon or a line break: `;`, `\n`. - Stmt, - /// A group for a single expression, ended by a line break. - Expr, - /// A group for import items, ended by a semicolon, line break or `from`. - Imports, -} - impl<'s> Parser<'s> { /// Create a new parser for the source string. pub fn new(src: &'s str) -> Self { let mut tokens = Tokens::new(src, TokenMode::Markup); - let next = tokens.next(); + let current = tokens.next(); Self { - src, tokens, - groups: vec![], - next: next.clone(), - peeked: next, + eof: current.is_none(), + current, prev_end: 0, - next_start: 0, - stack: vec![], + current_start: 0, + groups: vec![], children: vec![], } } + /// End the parsing process and return the last child. + pub fn finish(self) -> Vec<Green> { + self.children + } + + /// Create a new marker. + pub fn marker(&mut self) -> Marker { + Marker(self.children.len()) + } + /// Perform a subparse that wraps its result in a node with the given kind. - pub fn perform<T, F>(&mut self, kind: NodeKind, f: F) -> T + pub fn perform<F, T>(&mut self, kind: NodeKind, f: F) -> T where F: FnOnce(&mut Self) -> T, { - let prev = std::mem::take(&mut self.children); + let prev = mem::take(&mut self.children); let output = f(self); - let mut children = std::mem::replace(&mut self.children, prev); + let mut children = mem::replace(&mut self.children, prev); // Trailing trivia should not be wrapped into the new node. let mut remains = vec![]; if self.tokens.mode() == TokenMode::Code { let len = children.len(); for n in (0 .. len).rev() { - if !self.skip_type(&children[n].kind()) { + if !self.is_trivia(&children[n].kind()) { break; } @@ -107,66 +81,36 @@ impl<'s> Parser<'s> { output } - /// Eat and wrap the next token. - pub fn convert(&mut self, kind: NodeKind) { - self.eat(); - self.children.last_mut().unwrap().set_kind(kind); - } - - /// End the current node and undo its existence, inling all accumulated - /// children into its parent. - pub fn lift(&mut self) { - let outer = self.stack.pop().unwrap(); - let children = std::mem::replace(&mut self.children, outer); - self.children.extend(children); + /// Whether the end of the source string or group is reached. + pub fn eof(&self) -> bool { + self.eof } - /// Add an error to the current children list. - fn push_error(&mut self, msg: impl Into<String>) { - self.children.push( - GreenData::new(NodeKind::Error(ErrorPosition::Full, msg.into().into()), 0) - .into(), - ); - } + /// Consume the current token and also trailing trivia if in code mode. + pub fn eat(&mut self) { + self.prev_end = self.tokens.index(); + self.bump(); - /// End the parsing process and return the last child. - pub fn finish(&mut self) -> Rc<GreenNode> { - match self.children.pop().unwrap() { - Green::Node(n) => n, - _ => panic!(), + if self.tokens.mode() == TokenMode::Code { + // Skip whitespace and comments. + while self.current.as_ref().map_or(false, |x| self.is_trivia(x)) { + self.bump(); + } } - } - /// Whether the end of the source string or group is reached. - pub fn eof(&self) -> bool { - self.peek().is_none() + self.repeek(); } - /// Consume the next token if it is the given one. + /// Eat if the current token it is the given one. pub fn eat_if(&mut self, t: &NodeKind) -> bool { - if self.peek() == Some(t) { + let at = self.at(t); + if at { self.eat(); - true - } else { - false } + at } - /// Consume the next token if the closure maps it a to `Some`-variant. - pub fn eat_map<T, F>(&mut self, f: F) -> Option<T> - where - F: FnOnce(&NodeKind) -> Option<T>, - { - let token = self.peek()?; - let mapped = f(token); - if mapped.is_some() { - self.eat(); - } - mapped - } - - /// Consume the next token if it is the given one and produce an error if - /// not. + /// Eat if the current token is the given one and produce an error if not. pub fn eat_expect(&mut self, t: &NodeKind) -> ParseResult { let eaten = self.eat_if(t); if !eaten { @@ -175,14 +119,13 @@ impl<'s> Parser<'s> { if eaten { Ok(()) } else { Err(()) } } - /// Consume the next token, debug-asserting that it is one of the given ones. + /// Eat, debug-asserting that the token is the given one. pub fn eat_assert(&mut self, t: &NodeKind) { - let next = self.peek(); - debug_assert_eq!(next, Some(t)); + debug_assert_eq!(self.peek(), Some(t)); self.eat(); } - /// Consume tokens while the condition is true. + /// Eat tokens while the condition is true. pub fn eat_while<F>(&mut self, mut f: F) where F: FnMut(&NodeKind) -> bool, @@ -192,68 +135,77 @@ impl<'s> Parser<'s> { } } - /// Peek at the next token without consuming it. + /// Eat the current token, but change its type. + pub fn convert(&mut self, kind: NodeKind) { + let idx = self.children.len(); + self.eat(); + if let Some(child) = self.children.get_mut(idx) { + child.set_kind(kind); + } + } + + /// Whether the current token is of the given type. + pub fn at(&self, kind: &NodeKind) -> bool { + self.peek() == Some(kind) + } + + /// Peek at the current token without consuming it. pub fn peek(&self) -> Option<&NodeKind> { - self.peeked.as_ref() + if self.eof { None } else { self.current.as_ref() } } - /// Peek at the next token if it follows immediately after the last one - /// without any whitespace in between. + /// Peek at the current token, if it follows immediately after the last one + /// without any trivia in between. pub fn peek_direct(&self) -> Option<&NodeKind> { - if self.next_start() == self.prev_end() { - self.peeked.as_ref() + if self.prev_end() == self.current_start() { + self.peek() } else { None } } - /// Peek at the source of the next token. + /// Peek at the source of the current token. pub fn peek_src(&self) -> &'s str { - self.get(self.next_start() .. self.next_end()) + self.tokens.scanner().get(self.current_start() .. self.current_end()) } /// The byte index at which the last token ended. /// - /// Refers to the end of the last _non-whitespace_ token in code mode. + /// Refers to the end of the last non-trivia token in code mode. pub fn prev_end(&self) -> usize { self.prev_end } - /// The byte index at which the next token starts. - pub fn next_start(&self) -> usize { - self.next_start + /// The byte index at which the current token starts. + pub fn current_start(&self) -> usize { + self.current_start } - /// The byte index at which the next token will end. - /// - /// Is the same as [`next_start()`][Self::next_start] if `peek()` returns - /// `None`. - pub fn next_end(&self) -> usize { + /// The byte index at which the current token ends. + pub fn current_end(&self) -> usize { self.tokens.index() } /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.tokens.column(index) - } - - /// Slice out part of the source string. - pub fn get(&self, range: Range<usize>) -> &'s str { - self.src.get(range).unwrap() + self.tokens.scanner().column(index) } /// Continue parsing in a group. /// /// When the end delimiter of the group is reached, all subsequent calls to - /// `eat()` and `peek()` return `None`. Parsing can only continue with - /// a matching call to `end_group`. + /// `peek()` return `None`. Parsing can only continue with a matching call + /// to `end_group`. /// - /// This panics if the next token does not start the given group. - pub fn start_group(&mut self, kind: Group, mode: TokenMode) { + /// This panics if the current token does not start the given group. + pub fn start_group(&mut self, kind: Group) { self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() }); - self.tokens.set_mode(mode); - self.repeek(); + self.tokens.set_mode(match kind { + Group::Bracket => TokenMode::Markup, + _ => TokenMode::Code, + }); + self.repeek(); match kind { Group::Paren => self.eat_assert(&NodeKind::LeftParen), Group::Bracket => self.eat_assert(&NodeKind::LeftBracket), @@ -268,12 +220,12 @@ impl<'s> Parser<'s> { /// /// This panics if no group was started. pub fn end_group(&mut self) { - let prev_mode = self.tokens.mode(); + let group_mode = self.tokens.mode(); let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); self.repeek(); - let mut rescan = self.tokens.mode() != prev_mode; + let mut rescan = self.tokens.mode() != group_mode; // Eat the end delimiter if there is one. if let Some((end, required)) = match group.kind { @@ -284,7 +236,7 @@ impl<'s> Parser<'s> { Group::Expr => None, Group::Imports => None, } { - if self.next == Some(end.clone()) { + if self.current.as_ref() == Some(&end) { // Bump the delimeter and return. No need to rescan in this case. self.eat(); rescan = false; @@ -295,10 +247,10 @@ impl<'s> Parser<'s> { // Rescan the peeked token if the mode changed. if rescan { - if prev_mode == TokenMode::Code { + if group_mode == TokenMode::Code { let len = self.children.len(); for n in (0 .. len).rev() { - if !self.skip_type(self.children[n].kind()) { + if !self.is_trivia(self.children[n].kind()) { break; } @@ -307,129 +259,55 @@ impl<'s> Parser<'s> { } self.tokens.jump(self.prev_end()); - self.prev_end = self.tokens.index().into(); - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); + self.prev_end = self.tokens.index(); + self.current_start = self.tokens.index(); + self.current = self.tokens.next(); self.repeek(); } } - /// Add an error that `what` was expected at the given span. - pub fn expected_at(&mut self, what: &str) { - let mut found = self.children.len(); - for (i, node) in self.children.iter().enumerate().rev() { - if !self.skip_type(node.kind()) { - break; - } - found = i; - } - - Marker(found).expected_at(self, what); + /// Low-level bump that consumes exactly one token without special trivia + /// handling. + fn bump(&mut self) { + let kind = self.current.take().unwrap(); + let len = self.tokens.index() - self.current_start; + self.children.push(GreenData::new(kind, len).into()); + self.current_start = self.tokens.index(); + self.current = self.tokens.next(); } - /// Eat the next token and add an error that it is not the expected `thing`. - pub fn expected(&mut self, what: &str) { - match self.peek().cloned() { - Some(found) => { - self.perform( - NodeKind::Error( - ErrorPosition::Full, - format!("expected {}, found {}", what, found).into(), - ), - Self::eat, - ); - } - None => self.expected_at(what), - } + /// Take another look at the current token to recheck whether it ends a + /// group. + fn repeek(&mut self) { + self.eof = match &self.current { + Some(NodeKind::RightParen) => self.inside(Group::Paren), + Some(NodeKind::RightBracket) => self.inside(Group::Bracket), + Some(NodeKind::RightBrace) => self.inside(Group::Brace), + Some(NodeKind::Semicolon) => self.inside(Group::Stmt), + Some(NodeKind::From) => self.inside(Group::Imports), + Some(NodeKind::Space(n)) => *n >= 1 && self.stop_at_newline(), + Some(_) => false, + None => true, + }; } - /// Eat the next token and add an error that it is unexpected. - pub fn unexpected(&mut self) { - match self.peek().cloned() { - Some(found) => { - self.perform( - NodeKind::Error( - ErrorPosition::Full, - format!("unexpected {}", found).into(), - ), - Self::eat, - ); - } - None => self.push_error("unexpected end of file"), - } + /// Returns whether the given type can be skipped over. + fn is_trivia(&self, token: &NodeKind) -> bool { + Self::is_trivia_ext(token, self.stop_at_newline()) } /// Returns whether the given type can be skipped over given the current /// newline mode. - pub fn skip_type_ext(token: &NodeKind, stop_at_newline: bool) -> bool { + fn is_trivia_ext(token: &NodeKind, stop_at_newline: bool) -> bool { match token { - NodeKind::Space(n) => n < &1 || !stop_at_newline, + NodeKind::Space(n) => *n == 0 || !stop_at_newline, NodeKind::LineComment => true, NodeKind::BlockComment => true, _ => false, } } - /// Returns whether the given type can be skipped over. - fn skip_type(&self, token: &NodeKind) -> bool { - Self::skip_type_ext(token, self.stop_at_newline()) - } - - /// Consume the next token. - pub fn eat(&mut self) { - self.children.push( - GreenData::new( - self.next.clone().unwrap(), - self.tokens.index() - self.next_start, - ) - .into(), - ); - - self.prev_end = self.tokens.index().into(); - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); - - if self.tokens.mode() == TokenMode::Code { - // Skip whitespace and comments. - while self.next.as_ref().map_or(false, |x| self.skip_type(x)) { - self.children.push( - GreenData::new( - self.next.clone().unwrap(), - self.tokens.index() - self.next_start, - ) - .into(), - ); - - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); - } - } - - self.repeek(); - } - - /// Take another look at the next token to recheck whether it ends a group. - fn repeek(&mut self) { - self.peeked = self.next.clone(); - let token = match self.next.as_ref() { - Some(token) => token, - None => return, - }; - - if match token { - NodeKind::RightParen => self.inside(Group::Paren), - NodeKind::RightBracket => self.inside(Group::Bracket), - NodeKind::RightBrace => self.inside(Group::Brace), - NodeKind::Semicolon => self.inside(Group::Stmt), - NodeKind::From => self.inside(Group::Imports), - NodeKind::Space(n) => n > &0 && self.stop_at_newline(), - _ => false, - } { - self.peeked = None; - } - } - - /// Whether the active group ends at a newline. + /// Whether the active group must end at a newline. fn stop_at_newline(&self) -> bool { matches!( self.groups.last().map(|group| group.kind), @@ -441,28 +319,76 @@ impl<'s> Parser<'s> { fn inside(&self, kind: Group) -> bool { self.groups.iter().any(|g| g.kind == kind) } +} - /// Returns the last child of the current stack frame. - pub fn last_child(&self) -> Option<&Green> { - self.children.last() +/// Error handling. +impl Parser<'_> { + /// Push an error into the children list. + pub fn push_error(&mut self, msg: impl Into<EcoString>) { + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + self.children.push(GreenData::new(error, 0).into()); } - /// Create a new marker. - pub fn marker(&mut self) -> Marker { - Marker(self.children.len()) + /// Eat the current token and add an error that it is unexpected. + pub fn unexpected(&mut self) { + match self.peek() { + Some(found) => { + let msg = format!("unexpected {}", found); + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + self.perform(error, Self::eat); + } + None => self.push_error("unexpected end of file"), + } + } + + /// Eat the current token and add an error that it is not the expected `thing`. + pub fn expected(&mut self, thing: &str) { + match self.peek() { + Some(found) => { + let msg = format!("expected {}, found {}", thing, found); + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + self.perform(error, Self::eat); + } + None => self.expected_at(thing), + } + } + + /// Add an error that the `thing` was expected at the end of the last + /// non-trivia token. + pub fn expected_at(&mut self, thing: &str) { + let mut found = self.children.len(); + for (i, node) in self.children.iter().enumerate().rev() { + if !self.is_trivia(node.kind()) { + break; + } + found = i; + } + + Marker(found).expected_at(self, thing); } } -/// A marker that indicates where a child may start. +/// A marker that indicates where a node may start. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct Marker(usize); impl Marker { - /// Wraps all children in front of the marker. - pub fn end(&self, p: &mut Parser, kind: NodeKind) { - let stop_nl = p.stop_at_newline(); + /// Perform a subparse that wraps all children after the marker in a node + /// with the given kind. + pub fn perform<T, F>(self, p: &mut Parser, kind: NodeKind, f: F) -> T + where + F: FnOnce(&mut Parser) -> T, + { + let success = f(p); + self.end(p, kind); + success + } + + /// Wrap all children after the marker in a node with the given `kind`. + pub fn end(self, p: &mut Parser, kind: NodeKind) { let end = (self.0 .. p.children.len()) .rev() - .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) + .find(|&i| !p.is_trivia(p.children[i].kind())) .unwrap_or(self.0) + 1; @@ -472,47 +398,61 @@ impl Marker { } /// Wrap all children that do not fulfill the predicate in error nodes. - pub fn filter_children<F>(&self, p: &mut Parser, f: F) + pub fn filter_children<F>(self, p: &mut Parser, f: F) where F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, { for child in &mut p.children[self.0 ..] { - if !((p.tokens.mode() != TokenMode::Code - || Parser::skip_type_ext(child.kind(), false)) - || child.kind().is_error()) + if (p.tokens.mode() == TokenMode::Markup + || !Parser::is_trivia_ext(child.kind(), false)) + && !child.kind().is_error() { if let Err((pos, msg)) = f(child) { - let inner = std::mem::take(child); - *child = - GreenNode::with_child(NodeKind::Error(pos, msg), inner).into(); + let error = NodeKind::Error(pos, msg); + let inner = mem::take(child); + *child = GreenNode::with_child(error, inner).into(); } } } } /// Insert an error message that `what` was expected at the marker position. - pub fn expected_at(&self, p: &mut Parser, what: &str) { - p.children.insert( - self.0, - GreenData::new( - NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), - 0, - ) - .into(), - ); - } - - /// Return a reference to the child after the marker. - pub fn child_at<'a>(&self, p: &'a Parser) -> Option<&'a Green> { - p.children.get(self.0) + pub fn expected_at(self, p: &mut Parser, what: &str) { + let msg = format!("expected {}", what); + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + p.children.insert(self.0, GreenData::new(error, 0).into()); } - pub fn perform<T, F>(&self, p: &mut Parser, kind: NodeKind, f: F) -> T - where - F: FnOnce(&mut Parser) -> T, - { - let success = f(p); - self.end(p, kind); - success + /// Return a reference to the child directly after the marker. + pub fn child_at<'a>(self, p: &'a Parser) -> Option<&'a Green> { + p.children.get(self.0) } } + +/// A logical group of tokens, e.g. `[...]`. +struct GroupEntry { + /// The kind of group this is. This decides which tokens will end the group. + /// For example, a [`Group::Paren`] will be ended by + /// [`Token::RightParen`]. + pub kind: Group, + /// The mode the parser was in _before_ the group started (to which we go + /// back once the group ends). + pub prev_mode: TokenMode, +} + +/// A group, confined by optional start and end delimiters. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Group { + /// A bracketed group: `[...]`. + Bracket, + /// A curly-braced group: `{...}`. + Brace, + /// A parenthesized group: `(...)`. + Paren, + /// A group ended by a semicolon or a line break: `;`, `\n`. + Stmt, + /// A group for a single expression, ended by a line break. + Expr, + /// A group for import items, ended by a semicolon, line break or `from`. + Imports, +} |
