summaryrefslogtreecommitdiff
path: root/src/parse/parser.rs
diff options
context:
space:
mode:
authorMartin Haug <mhaug@live.de>2021-10-23 19:03:27 +0200
committerMartin Haug <mhaug@live.de>2021-11-05 13:44:49 +0100
commit4875633acf4701705b9b3b014eb7d94268b897c2 (patch)
tree0aedda87c8c2dc65316e2455c35e72054d9bae0e /src/parse/parser.rs
parentea6ee3f667e922ed2f21b08719a45d2395787932 (diff)
Change parser
Diffstat (limited to 'src/parse/parser.rs')
-rw-r--r--src/parse/parser.rs415
1 files changed, 302 insertions, 113 deletions
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 347d6f71..f62e882a 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -1,29 +1,34 @@
use std::ops::Range;
+use std::rc::Rc;
use super::{TokenMode, Tokens};
-use crate::diag::Error;
use crate::source::{SourceFile, SourceId};
-use crate::syntax::{IntoSpan, Pos, Span, Token};
+use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind};
+use crate::util::EcoString;
/// A convenient token-based parser.
pub struct Parser<'s> {
/// The parsed file.
source: &'s SourceFile,
- /// Parsing errors.
- errors: Vec<Error>,
/// An iterator over the source tokens.
tokens: Tokens<'s>,
/// The stack of open groups.
groups: Vec<GroupEntry>,
/// The next token.
- next: Option<Token<'s>>,
+ next: Option<NodeKind>,
/// The peeked token.
/// (Same as `next` except if we are at the end of group, then `None`).
- peeked: Option<Token<'s>>,
+ peeked: Option<NodeKind>,
/// The end index of the last (non-whitespace if in code mode) token.
prev_end: usize,
/// The start index of the peeked token.
next_start: usize,
+ /// A stack of outer children vectors.
+ stack: Vec<Vec<Green>>,
+ /// The children of the currently built node.
+ children: Vec<Green>,
+ /// Whether the last parsing step was successful.
+ success: bool,
}
/// A logical group of tokens, e.g. `[...]`.
@@ -32,9 +37,6 @@ struct GroupEntry {
/// For example, a [`Group::Paren`] will be ended by
/// [`Token::RightParen`].
pub kind: Group,
- /// The start index of the group. Used by `Parser::end_group` to return the
- /// group's full span.
- pub start: usize,
/// The mode the parser was in _before_ the group started (to which we go
/// back once the group ends).
pub prev_mode: TokenMode,
@@ -60,51 +62,204 @@ pub enum Group {
impl<'s> Parser<'s> {
/// Create a new parser for the source string.
pub fn new(source: &'s SourceFile) -> Self {
- let mut tokens = Tokens::new(source.src(), TokenMode::Markup);
+ let mut tokens = Tokens::new(source, TokenMode::Markup);
let next = tokens.next();
Self {
source,
- errors: vec![],
tokens,
groups: vec![],
- next,
+ next: next.clone(),
peeked: next,
prev_end: 0,
next_start: 0,
+ stack: vec![],
+ children: vec![],
+ success: true,
}
}
- /// Finish parsing and return all errors.
- pub fn finish(self) -> Vec<Error> {
- self.errors
- }
-
/// The id of the parsed source file.
pub fn id(&self) -> SourceId {
self.source.id()
}
+ /// Start a nested node.
+ ///
+ /// Each start call has to be matched with a call to `end`,
+ /// `end_with_custom_children`, `lift`, `abort`, or `end_or_abort`.
+ pub fn start(&mut self) {
+ self.stack.push(std::mem::take(&mut self.children));
+ }
+
+ /// Start a nested node, preserving a number of the current children.
+ pub fn start_with(&mut self, preserve: usize) {
+ let preserved = self.children.drain(self.children.len() - preserve ..).collect();
+ self.stack.push(std::mem::replace(&mut self.children, preserved));
+ }
+
+ /// Filter the last children using the given predicate.
+ pub fn filter_children<F, G>(&mut self, count: usize, f: F, error: G)
+ where
+ F: Fn(&Green) -> bool,
+ G: Fn(&NodeKind) -> (ErrorPosition, EcoString),
+ {
+ for child in &mut self.children[count ..] {
+ if !((self.tokens.mode() != TokenMode::Code
+ || Self::skip_type_ext(child.kind(), false))
+ || child.kind().is_error()
+ || f(&child))
+ {
+ let (pos, msg) = error(child.kind());
+ let inner = std::mem::take(child);
+ *child =
+ GreenNode::with_child(NodeKind::Error(pos, msg), inner.len(), inner)
+ .into();
+ }
+ }
+ }
+
+ pub fn child(&self, child: usize) -> Option<&Green> {
+ self.node_index_from_back(child).map(|i| &self.children[i])
+ }
+
+ fn node_index_from_back(&self, child: usize) -> Option<usize> {
+ let len = self.children.len();
+ let code = self.tokens.mode() == TokenMode::Code;
+ let mut seen = 0;
+ for x in (0 .. len).rev() {
+ if self.skip_type(self.children[x].kind()) && code {
+ continue;
+ }
+ if seen == child {
+ return Some(x);
+ }
+ seen += 1;
+ }
+
+ None
+ }
+
+ /// End the current node as a node of given `kind`.
+ pub fn end(&mut self, kind: NodeKind) {
+ let outer = self.stack.pop().unwrap();
+ let mut children = std::mem::replace(&mut self.children, outer);
+
+ // have trailing whitespace continue to sit in self.children in code
+ // mode.
+ let mut remains = vec![];
+ if self.tokens.mode() == TokenMode::Code {
+ let len = children.len();
+ for n in (0 .. len).rev() {
+ if !self.skip_type(&children[n].kind()) {
+ break;
+ }
+
+ remains.push(children.pop().unwrap());
+ }
+ remains.reverse();
+ }
+
+ let len = children.iter().map(|c| c.len()).sum();
+ self.children
+ .push(GreenNode::with_children(kind, len, children.into_iter()).into());
+ self.children.extend(remains);
+ self.success = true;
+ }
+
+ /// End the current node as a node of given `kind`, and start a new node
+ /// with the ended node as a first child. The function returns how many
+ /// children the stack frame had before and how many were appended (accounts
+ /// for trivia).
+ pub fn end_and_start_with(&mut self, kind: NodeKind) -> (usize, usize) {
+ let stack_offset = self.stack.last().unwrap().len();
+ self.end(kind);
+ let diff = self.children.len() - stack_offset;
+ self.start_with(diff);
+ (stack_offset, diff)
+ }
+
+ pub fn wrap(&mut self, index: usize, kind: NodeKind) {
+ let index = self.node_index_from_back(index).unwrap();
+ let child = std::mem::take(&mut self.children[index]);
+ let item = GreenNode::with_child(kind, child.len(), child);
+ self.children[index] = item.into();
+ }
+
+ pub fn convert(&mut self, kind: NodeKind) {
+ self.start();
+ self.eat();
+ self.end(kind);
+ }
+
+ /// End the current node and undo its existence, inling all accumulated
+ /// children into its parent.
+ pub fn lift(&mut self) {
+ let outer = self.stack.pop().unwrap();
+ let children = std::mem::replace(&mut self.children, outer);
+ self.children.extend(children);
+ self.success = true;
+ }
+
+ /// End the current node and undo its existence, deleting all accumulated
+ /// children.
+ pub fn abort(&mut self, msg: impl Into<String>) {
+ self.end(NodeKind::Error(ErrorPosition::Full, msg.into().into()));
+ self.success = false;
+ }
+
+ pub fn may_lift_abort(&mut self) -> bool {
+ if !self.success {
+ self.lift();
+ self.success = false;
+ true
+ } else {
+ false
+ }
+ }
+
+ pub fn may_end_abort(&mut self, kind: NodeKind) -> bool {
+ if !self.success {
+ self.end(kind);
+ self.success = false;
+ true
+ } else {
+ false
+ }
+ }
+
+ /// End the current node as a node of given `kind` if the last parse was
+ /// successful, otherwise, abort.
+ pub fn end_or_abort(&mut self, kind: NodeKind) -> bool {
+ if self.success {
+ self.end(kind);
+ true
+ } else {
+ self.may_end_abort(kind);
+ false
+ }
+ }
+
+ pub fn finish(&mut self) -> Rc<GreenNode> {
+ if let Green::Node(n) = self.children.pop().unwrap() {
+ n
+ } else {
+ panic!()
+ }
+ }
+
/// Whether the end of the source string or group is reached.
pub fn eof(&self) -> bool {
self.peek().is_none()
}
- /// Consume the next token.
- pub fn eat(&mut self) -> Option<Token<'s>> {
+ pub fn eat(&mut self) -> Option<NodeKind> {
let token = self.peek()?;
self.bump();
Some(token)
}
- /// Eat the next token and return its source range.
- pub fn eat_span(&mut self) -> Span {
- let start = self.next_start();
- self.eat();
- Span::new(self.id(), start, self.prev_end())
- }
-
/// Consume the next token if it is the given one.
- pub fn eat_if(&mut self, t: Token) -> bool {
+ pub fn eat_if(&mut self, t: NodeKind) -> bool {
if self.peek() == Some(t) {
self.bump();
true
@@ -116,7 +271,7 @@ impl<'s> Parser<'s> {
/// Consume the next token if the closure maps it a to `Some`-variant.
pub fn eat_map<T, F>(&mut self, f: F) -> Option<T>
where
- F: FnOnce(Token<'s>) -> Option<T>,
+ F: FnOnce(NodeKind) -> Option<T>,
{
let token = self.peek()?;
let mapped = f(token);
@@ -128,16 +283,16 @@ impl<'s> Parser<'s> {
/// Consume the next token if it is the given one and produce an error if
/// not.
- pub fn eat_expect(&mut self, t: Token) -> bool {
- let eaten = self.eat_if(t);
+ pub fn eat_expect(&mut self, t: NodeKind) -> bool {
+ let eaten = self.eat_if(t.clone());
if !eaten {
- self.expected_at(self.prev_end(), t.name());
+ self.expected_at(&t.to_string());
}
eaten
}
/// Consume the next token, debug-asserting that it is one of the given ones.
- pub fn eat_assert(&mut self, t: Token) {
+ pub fn eat_assert(&mut self, t: NodeKind) {
let next = self.eat();
debug_assert_eq!(next, Some(t));
}
@@ -145,7 +300,7 @@ impl<'s> Parser<'s> {
/// Consume tokens while the condition is true.
pub fn eat_while<F>(&mut self, mut f: F)
where
- F: FnMut(Token<'s>) -> bool,
+ F: FnMut(NodeKind) -> bool,
{
while self.peek().map_or(false, |t| f(t)) {
self.eat();
@@ -153,42 +308,25 @@ impl<'s> Parser<'s> {
}
/// Peek at the next token without consuming it.
- pub fn peek(&self) -> Option<Token<'s>> {
- self.peeked
+ pub fn peek(&self) -> Option<NodeKind> {
+ self.peeked.clone()
}
/// Peek at the next token if it follows immediately after the last one
/// without any whitespace in between.
- pub fn peek_direct(&self) -> Option<Token<'s>> {
+ pub fn peek_direct(&self) -> Option<&NodeKind> {
if self.next_start() == self.prev_end() {
- self.peeked
+ self.peeked.as_ref()
} else {
None
}
}
- /// Peek at the span of the next token.
- ///
- /// Has length zero if `peek()` returns `None`.
- pub fn peek_span(&self) -> Span {
- Span::new(self.id(), self.next_start(), self.next_end())
- }
-
/// Peek at the source of the next token.
pub fn peek_src(&self) -> &'s str {
self.get(self.next_start() .. self.next_end())
}
- /// Checks whether the next token fulfills a condition.
- ///
- /// Returns `false` if there is no next token.
- pub fn check<F>(&self, f: F) -> bool
- where
- F: FnOnce(Token<'s>) -> bool,
- {
- self.peek().map_or(false, f)
- }
-
/// The byte index at which the last token ended.
///
/// Refers to the end of the last _non-whitespace_ token in code mode.
@@ -219,11 +357,6 @@ impl<'s> Parser<'s> {
self.source.get(range).unwrap()
}
- /// The span from `start` to [`self.prev_end()`](Self::prev_end).
- pub fn span_from(&self, start: impl Into<Pos>) -> Span {
- Span::new(self.id(), start, self.prev_end())
- }
-
/// Continue parsing in a group.
///
/// When the end delimiter of the group is reached, all subsequent calls to
@@ -232,19 +365,15 @@ impl<'s> Parser<'s> {
///
/// This panics if the next token does not start the given group.
pub fn start_group(&mut self, kind: Group, mode: TokenMode) {
- self.groups.push(GroupEntry {
- kind,
- start: self.next_start(),
- prev_mode: self.tokens.mode(),
- });
+ self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() });
self.tokens.set_mode(mode);
self.repeek();
match kind {
- Group::Paren => self.eat_assert(Token::LeftParen),
- Group::Bracket => self.eat_assert(Token::LeftBracket),
- Group::Brace => self.eat_assert(Token::LeftBrace),
+ Group::Paren => self.eat_assert(NodeKind::LeftParen),
+ Group::Bracket => self.eat_assert(NodeKind::LeftBracket),
+ Group::Brace => self.eat_assert(NodeKind::LeftBrace),
Group::Stmt => {}
Group::Expr => {}
Group::Imports => {}
@@ -254,7 +383,7 @@ impl<'s> Parser<'s> {
/// End the parsing of a group.
///
/// This panics if no group was started.
- pub fn end_group(&mut self) -> Span {
+ pub fn end_group(&mut self) {
let prev_mode = self.tokens.mode();
let group = self.groups.pop().expect("no started group");
self.tokens.set_mode(group.prev_mode);
@@ -264,83 +393,125 @@ impl<'s> Parser<'s> {
// Eat the end delimiter if there is one.
if let Some((end, required)) = match group.kind {
- Group::Paren => Some((Token::RightParen, true)),
- Group::Bracket => Some((Token::RightBracket, true)),
- Group::Brace => Some((Token::RightBrace, true)),
- Group::Stmt => Some((Token::Semicolon, false)),
+ Group::Paren => Some((NodeKind::RightParen, true)),
+ Group::Bracket => Some((NodeKind::RightBracket, true)),
+ Group::Brace => Some((NodeKind::RightBrace, true)),
+ Group::Stmt => Some((NodeKind::Semicolon, false)),
Group::Expr => None,
Group::Imports => None,
} {
- if self.next == Some(end) {
+ if self.next == Some(end.clone()) {
// Bump the delimeter and return. No need to rescan in this case.
self.bump();
rescan = false;
} else if required {
- self.error(
- self.next_start() .. self.next_start(),
- format!("expected {}", end.name()),
- );
+ self.start();
+ self.abort(format!("expected {}", end.to_string()));
}
}
// Rescan the peeked token if the mode changed.
if rescan {
self.tokens.jump(self.prev_end());
- self.bump();
- }
- Span::new(self.id(), group.start, self.prev_end())
- }
+ if prev_mode == TokenMode::Code {
+ let len = self.children.len();
+ for n in (0 .. len).rev() {
+ if !self.skip_type(self.children[n].kind()) {
+ break;
+ }
+
+ self.children.pop();
+ }
+ }
- /// Add an error with location and message.
- pub fn error(&mut self, span: impl IntoSpan, message: impl Into<String>) {
- self.errors.push(Error::new(span.into_span(self.id()), message));
+ self.fast_forward();
+ }
}
/// Add an error that `what` was expected at the given span.
- pub fn expected_at(&mut self, span: impl IntoSpan, what: &str) {
- self.error(span, format!("expected {}", what));
+ pub fn expected_at(&mut self, what: &str) {
+ let mut found = self.children.len();
+ for (i, node) in self.children.iter().enumerate().rev() {
+ if !self.skip_type(node.kind()) {
+ break;
+ }
+ found = i;
+ }
+
+ self.expected_at_child(found, what);
+ }
+
+ /// Add an error that `what` was expected at the given child index.
+ pub fn expected_at_child(&mut self, index: usize, what: &str) {
+ self.children.insert(
+ index,
+ GreenData::new(
+ NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()),
+ 0,
+ )
+ .into(),
+ );
}
/// Eat the next token and add an error that it is not the expected `thing`.
pub fn expected(&mut self, what: &str) {
- let before = self.next_start();
+ self.start();
if let Some(found) = self.eat() {
- let after = self.prev_end();
- self.error(
- before .. after,
- format!("expected {}, found {}", what, found.name()),
- );
+ self.abort(format!("expected {}, found {}", what, found.to_string()))
} else {
- self.expected_at(self.next_start(), what);
+ self.lift();
+ self.expected_at(what);
}
}
/// Eat the next token and add an error that it is unexpected.
pub fn unexpected(&mut self) {
- let before = self.next_start();
+ self.start();
if let Some(found) = self.eat() {
- let after = self.prev_end();
- self.error(before .. after, format!("unexpected {}", found.name()));
+ self.abort(format!("unexpected {}", found.to_string()))
+ } else {
+ self.abort("unexpected end of file")
}
}
+ pub fn skip_type_ext(token: &NodeKind, stop_at_newline: bool) -> bool {
+ match token {
+ NodeKind::Space(n) => n < &1 || !stop_at_newline,
+ NodeKind::LineComment => true,
+ NodeKind::BlockComment => true,
+ _ => false,
+ }
+ }
+
+ fn skip_type(&self, token: &NodeKind) -> bool {
+ Self::skip_type_ext(token, self.stop_at_newline())
+ }
+
/// Move to the next token.
fn bump(&mut self) {
- self.prev_end = self.tokens.index().into();
+ self.children.push(
+ GreenData::new(
+ self.next.clone().unwrap(),
+ self.tokens.index() - self.next_start,
+ )
+ .into(),
+ );
+
+ self.fast_forward();
+ }
+
+ pub fn fast_forward(&mut self) {
+ if !self.next.as_ref().map_or(false, |x| self.skip_type(x)) {
+ self.prev_end = self.tokens.index().into();
+ }
self.next_start = self.tokens.index().into();
self.next = self.tokens.next();
if self.tokens.mode() == TokenMode::Code {
// Skip whitespace and comments.
- while match self.next {
- Some(Token::Space(n)) => n < 1 || !self.stop_at_newline(),
- Some(Token::LineComment(_)) => true,
- Some(Token::BlockComment(_)) => true,
- _ => false,
- } {
- self.next_start = self.tokens.index().into();
- self.next = self.tokens.next();
+ while self.next.as_ref().map_or(false, |x| self.skip_type(x)) {
+ self.bump();
}
}
@@ -349,19 +520,19 @@ impl<'s> Parser<'s> {
/// Take another look at the next token to recheck whether it ends a group.
fn repeek(&mut self) {
- self.peeked = self.next;
- let token = match self.next {
+ self.peeked = self.next.clone();
+ let token = match self.next.as_ref() {
Some(token) => token,
None => return,
};
if match token {
- Token::RightParen => self.inside(Group::Paren),
- Token::RightBracket => self.inside(Group::Bracket),
- Token::RightBrace => self.inside(Group::Brace),
- Token::Semicolon => self.inside(Group::Stmt),
- Token::From => self.inside(Group::Imports),
- Token::Space(n) => n >= 1 && self.stop_at_newline(),
+ NodeKind::RightParen => self.inside(Group::Paren),
+ NodeKind::RightBracket => self.inside(Group::Bracket),
+ NodeKind::RightBrace => self.inside(Group::Brace),
+ NodeKind::Semicolon => self.inside(Group::Stmt),
+ NodeKind::From => self.inside(Group::Imports),
+ NodeKind::Space(n) => n > &0 && self.stop_at_newline(),
_ => false,
} {
self.peeked = None;
@@ -380,4 +551,22 @@ impl<'s> Parser<'s> {
fn inside(&self, kind: Group) -> bool {
self.groups.iter().any(|g| g.kind == kind)
}
+
+ pub fn last_child(&self) -> Option<&Green> {
+ self.children.last()
+ }
+
+ pub fn success(&mut self) -> bool {
+ let s = self.success;
+ self.success = true;
+ s
+ }
+
+ pub fn unsuccessful(&mut self) {
+ self.success = false;
+ }
+
+ pub fn child_count(&self) -> usize {
+ self.children.len()
+ }
}