summaryrefslogtreecommitdiff
path: root/src/parse/parser.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/parse/parser.rs')
-rw-r--r--src/parse/parser.rs506
1 files changed, 223 insertions, 283 deletions
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 4f181821..5d26ff63 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -1,5 +1,4 @@
-use std::ops::Range;
-use std::rc::Rc;
+use std::mem;
use super::{TokenMode, Tokens};
use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind};
@@ -11,88 +10,63 @@ pub(crate) type ParseResult<T = ()> = Result<T, ()>;
/// A convenient token-based parser.
pub struct Parser<'s> {
- /// The parsed file.
- src: &'s str,
/// An iterator over the source tokens.
tokens: Tokens<'s>,
+ /// Whether we are at the end of the file or of a group.
+ eof: bool,
+ /// The current token.
+ current: Option<NodeKind>,
+ /// The end byte index of the last (non-whitespace if in code mode) token.
+ prev_end: usize,
+ /// The start byte index of the peeked token.
+ current_start: usize,
/// The stack of open groups.
groups: Vec<GroupEntry>,
- /// The next token.
- next: Option<NodeKind>,
- /// The peeked token.
- /// (Same as `next` except if we are at the end of group, then `None`).
- peeked: Option<NodeKind>,
- /// The end index of the last (non-whitespace if in code mode) token.
- prev_end: usize,
- /// The start index of the peeked token.
- next_start: usize,
- /// A stack of outer children vectors.
- stack: Vec<Vec<Green>>,
/// The children of the currently built node.
children: Vec<Green>,
}
-/// A logical group of tokens, e.g. `[...]`.
-struct GroupEntry {
- /// The kind of group this is. This decides which tokens will end the group.
- /// For example, a [`Group::Paren`] will be ended by
- /// [`Token::RightParen`].
- pub kind: Group,
- /// The mode the parser was in _before_ the group started (to which we go
- /// back once the group ends).
- pub prev_mode: TokenMode,
-}
-
-/// A group, confined by optional start and end delimiters.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum Group {
- /// A parenthesized group: `(...)`.
- Paren,
- /// A bracketed group: `[...]`.
- Bracket,
- /// A curly-braced group: `{...}`.
- Brace,
- /// A group ended by a semicolon or a line break: `;`, `\n`.
- Stmt,
- /// A group for a single expression, ended by a line break.
- Expr,
- /// A group for import items, ended by a semicolon, line break or `from`.
- Imports,
-}
-
impl<'s> Parser<'s> {
/// Create a new parser for the source string.
pub fn new(src: &'s str) -> Self {
let mut tokens = Tokens::new(src, TokenMode::Markup);
- let next = tokens.next();
+ let current = tokens.next();
Self {
- src,
tokens,
- groups: vec![],
- next: next.clone(),
- peeked: next,
+ eof: current.is_none(),
+ current,
prev_end: 0,
- next_start: 0,
- stack: vec![],
+ current_start: 0,
+ groups: vec![],
children: vec![],
}
}
+ /// End the parsing process and return the last child.
+ pub fn finish(self) -> Vec<Green> {
+ self.children
+ }
+
+ /// Create a new marker.
+ pub fn marker(&mut self) -> Marker {
+ Marker(self.children.len())
+ }
+
/// Perform a subparse that wraps its result in a node with the given kind.
- pub fn perform<T, F>(&mut self, kind: NodeKind, f: F) -> T
+ pub fn perform<F, T>(&mut self, kind: NodeKind, f: F) -> T
where
F: FnOnce(&mut Self) -> T,
{
- let prev = std::mem::take(&mut self.children);
+ let prev = mem::take(&mut self.children);
let output = f(self);
- let mut children = std::mem::replace(&mut self.children, prev);
+ let mut children = mem::replace(&mut self.children, prev);
// Trailing trivia should not be wrapped into the new node.
let mut remains = vec![];
if self.tokens.mode() == TokenMode::Code {
let len = children.len();
for n in (0 .. len).rev() {
- if !self.skip_type(&children[n].kind()) {
+ if !self.is_trivia(&children[n].kind()) {
break;
}
@@ -107,66 +81,36 @@ impl<'s> Parser<'s> {
output
}
- /// Eat and wrap the next token.
- pub fn convert(&mut self, kind: NodeKind) {
- self.eat();
- self.children.last_mut().unwrap().set_kind(kind);
- }
-
- /// End the current node and undo its existence, inling all accumulated
- /// children into its parent.
- pub fn lift(&mut self) {
- let outer = self.stack.pop().unwrap();
- let children = std::mem::replace(&mut self.children, outer);
- self.children.extend(children);
+ /// Whether the end of the source string or group is reached.
+ pub fn eof(&self) -> bool {
+ self.eof
}
- /// Add an error to the current children list.
- fn push_error(&mut self, msg: impl Into<String>) {
- self.children.push(
- GreenData::new(NodeKind::Error(ErrorPosition::Full, msg.into().into()), 0)
- .into(),
- );
- }
+ /// Consume the current token and also trailing trivia if in code mode.
+ pub fn eat(&mut self) {
+ self.prev_end = self.tokens.index();
+ self.bump();
- /// End the parsing process and return the last child.
- pub fn finish(&mut self) -> Rc<GreenNode> {
- match self.children.pop().unwrap() {
- Green::Node(n) => n,
- _ => panic!(),
+ if self.tokens.mode() == TokenMode::Code {
+ // Skip whitespace and comments.
+ while self.current.as_ref().map_or(false, |x| self.is_trivia(x)) {
+ self.bump();
+ }
}
- }
- /// Whether the end of the source string or group is reached.
- pub fn eof(&self) -> bool {
- self.peek().is_none()
+ self.repeek();
}
- /// Consume the next token if it is the given one.
+ /// Eat if the current token it is the given one.
pub fn eat_if(&mut self, t: &NodeKind) -> bool {
- if self.peek() == Some(t) {
+ let at = self.at(t);
+ if at {
self.eat();
- true
- } else {
- false
}
+ at
}
- /// Consume the next token if the closure maps it a to `Some`-variant.
- pub fn eat_map<T, F>(&mut self, f: F) -> Option<T>
- where
- F: FnOnce(&NodeKind) -> Option<T>,
- {
- let token = self.peek()?;
- let mapped = f(token);
- if mapped.is_some() {
- self.eat();
- }
- mapped
- }
-
- /// Consume the next token if it is the given one and produce an error if
- /// not.
+ /// Eat if the current token is the given one and produce an error if not.
pub fn eat_expect(&mut self, t: &NodeKind) -> ParseResult {
let eaten = self.eat_if(t);
if !eaten {
@@ -175,14 +119,13 @@ impl<'s> Parser<'s> {
if eaten { Ok(()) } else { Err(()) }
}
- /// Consume the next token, debug-asserting that it is one of the given ones.
+ /// Eat, debug-asserting that the token is the given one.
pub fn eat_assert(&mut self, t: &NodeKind) {
- let next = self.peek();
- debug_assert_eq!(next, Some(t));
+ debug_assert_eq!(self.peek(), Some(t));
self.eat();
}
- /// Consume tokens while the condition is true.
+ /// Eat tokens while the condition is true.
pub fn eat_while<F>(&mut self, mut f: F)
where
F: FnMut(&NodeKind) -> bool,
@@ -192,68 +135,77 @@ impl<'s> Parser<'s> {
}
}
- /// Peek at the next token without consuming it.
+ /// Eat the current token, but change its type.
+ pub fn convert(&mut self, kind: NodeKind) {
+ let idx = self.children.len();
+ self.eat();
+ if let Some(child) = self.children.get_mut(idx) {
+ child.set_kind(kind);
+ }
+ }
+
+ /// Whether the current token is of the given type.
+ pub fn at(&self, kind: &NodeKind) -> bool {
+ self.peek() == Some(kind)
+ }
+
+ /// Peek at the current token without consuming it.
pub fn peek(&self) -> Option<&NodeKind> {
- self.peeked.as_ref()
+ if self.eof { None } else { self.current.as_ref() }
}
- /// Peek at the next token if it follows immediately after the last one
- /// without any whitespace in between.
+ /// Peek at the current token, if it follows immediately after the last one
+ /// without any trivia in between.
pub fn peek_direct(&self) -> Option<&NodeKind> {
- if self.next_start() == self.prev_end() {
- self.peeked.as_ref()
+ if self.prev_end() == self.current_start() {
+ self.peek()
} else {
None
}
}
- /// Peek at the source of the next token.
+ /// Peek at the source of the current token.
pub fn peek_src(&self) -> &'s str {
- self.get(self.next_start() .. self.next_end())
+ self.tokens.scanner().get(self.current_start() .. self.current_end())
}
/// The byte index at which the last token ended.
///
- /// Refers to the end of the last _non-whitespace_ token in code mode.
+ /// Refers to the end of the last non-trivia token in code mode.
pub fn prev_end(&self) -> usize {
self.prev_end
}
- /// The byte index at which the next token starts.
- pub fn next_start(&self) -> usize {
- self.next_start
+ /// The byte index at which the current token starts.
+ pub fn current_start(&self) -> usize {
+ self.current_start
}
- /// The byte index at which the next token will end.
- ///
- /// Is the same as [`next_start()`][Self::next_start] if `peek()` returns
- /// `None`.
- pub fn next_end(&self) -> usize {
+ /// The byte index at which the current token ends.
+ pub fn current_end(&self) -> usize {
self.tokens.index()
}
/// Determine the column index for the given byte index.
pub fn column(&self, index: usize) -> usize {
- self.tokens.column(index)
- }
-
- /// Slice out part of the source string.
- pub fn get(&self, range: Range<usize>) -> &'s str {
- self.src.get(range).unwrap()
+ self.tokens.scanner().column(index)
}
/// Continue parsing in a group.
///
/// When the end delimiter of the group is reached, all subsequent calls to
- /// `eat()` and `peek()` return `None`. Parsing can only continue with
- /// a matching call to `end_group`.
+ /// `peek()` return `None`. Parsing can only continue with a matching call
+ /// to `end_group`.
///
- /// This panics if the next token does not start the given group.
- pub fn start_group(&mut self, kind: Group, mode: TokenMode) {
+ /// This panics if the current token does not start the given group.
+ pub fn start_group(&mut self, kind: Group) {
self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() });
- self.tokens.set_mode(mode);
- self.repeek();
+ self.tokens.set_mode(match kind {
+ Group::Bracket => TokenMode::Markup,
+ _ => TokenMode::Code,
+ });
+ self.repeek();
match kind {
Group::Paren => self.eat_assert(&NodeKind::LeftParen),
Group::Bracket => self.eat_assert(&NodeKind::LeftBracket),
@@ -268,12 +220,12 @@ impl<'s> Parser<'s> {
///
/// This panics if no group was started.
pub fn end_group(&mut self) {
- let prev_mode = self.tokens.mode();
+ let group_mode = self.tokens.mode();
let group = self.groups.pop().expect("no started group");
self.tokens.set_mode(group.prev_mode);
self.repeek();
- let mut rescan = self.tokens.mode() != prev_mode;
+ let mut rescan = self.tokens.mode() != group_mode;
// Eat the end delimiter if there is one.
if let Some((end, required)) = match group.kind {
@@ -284,7 +236,7 @@ impl<'s> Parser<'s> {
Group::Expr => None,
Group::Imports => None,
} {
- if self.next == Some(end.clone()) {
+ if self.current.as_ref() == Some(&end) {
// Bump the delimeter and return. No need to rescan in this case.
self.eat();
rescan = false;
@@ -295,10 +247,10 @@ impl<'s> Parser<'s> {
// Rescan the peeked token if the mode changed.
if rescan {
- if prev_mode == TokenMode::Code {
+ if group_mode == TokenMode::Code {
let len = self.children.len();
for n in (0 .. len).rev() {
- if !self.skip_type(self.children[n].kind()) {
+ if !self.is_trivia(self.children[n].kind()) {
break;
}
@@ -307,129 +259,55 @@ impl<'s> Parser<'s> {
}
self.tokens.jump(self.prev_end());
- self.prev_end = self.tokens.index().into();
- self.next_start = self.tokens.index().into();
- self.next = self.tokens.next();
+ self.prev_end = self.tokens.index();
+ self.current_start = self.tokens.index();
+ self.current = self.tokens.next();
self.repeek();
}
}
- /// Add an error that `what` was expected at the given span.
- pub fn expected_at(&mut self, what: &str) {
- let mut found = self.children.len();
- for (i, node) in self.children.iter().enumerate().rev() {
- if !self.skip_type(node.kind()) {
- break;
- }
- found = i;
- }
-
- Marker(found).expected_at(self, what);
+ /// Low-level bump that consumes exactly one token without special trivia
+ /// handling.
+ fn bump(&mut self) {
+ let kind = self.current.take().unwrap();
+ let len = self.tokens.index() - self.current_start;
+ self.children.push(GreenData::new(kind, len).into());
+ self.current_start = self.tokens.index();
+ self.current = self.tokens.next();
}
- /// Eat the next token and add an error that it is not the expected `thing`.
- pub fn expected(&mut self, what: &str) {
- match self.peek().cloned() {
- Some(found) => {
- self.perform(
- NodeKind::Error(
- ErrorPosition::Full,
- format!("expected {}, found {}", what, found).into(),
- ),
- Self::eat,
- );
- }
- None => self.expected_at(what),
- }
+ /// Take another look at the current token to recheck whether it ends a
+ /// group.
+ fn repeek(&mut self) {
+ self.eof = match &self.current {
+ Some(NodeKind::RightParen) => self.inside(Group::Paren),
+ Some(NodeKind::RightBracket) => self.inside(Group::Bracket),
+ Some(NodeKind::RightBrace) => self.inside(Group::Brace),
+ Some(NodeKind::Semicolon) => self.inside(Group::Stmt),
+ Some(NodeKind::From) => self.inside(Group::Imports),
+ Some(NodeKind::Space(n)) => *n >= 1 && self.stop_at_newline(),
+ Some(_) => false,
+ None => true,
+ };
}
- /// Eat the next token and add an error that it is unexpected.
- pub fn unexpected(&mut self) {
- match self.peek().cloned() {
- Some(found) => {
- self.perform(
- NodeKind::Error(
- ErrorPosition::Full,
- format!("unexpected {}", found).into(),
- ),
- Self::eat,
- );
- }
- None => self.push_error("unexpected end of file"),
- }
+ /// Returns whether the given type can be skipped over.
+ fn is_trivia(&self, token: &NodeKind) -> bool {
+ Self::is_trivia_ext(token, self.stop_at_newline())
}
/// Returns whether the given type can be skipped over given the current
/// newline mode.
- pub fn skip_type_ext(token: &NodeKind, stop_at_newline: bool) -> bool {
+ fn is_trivia_ext(token: &NodeKind, stop_at_newline: bool) -> bool {
match token {
- NodeKind::Space(n) => n < &1 || !stop_at_newline,
+ NodeKind::Space(n) => *n == 0 || !stop_at_newline,
NodeKind::LineComment => true,
NodeKind::BlockComment => true,
_ => false,
}
}
- /// Returns whether the given type can be skipped over.
- fn skip_type(&self, token: &NodeKind) -> bool {
- Self::skip_type_ext(token, self.stop_at_newline())
- }
-
- /// Consume the next token.
- pub fn eat(&mut self) {
- self.children.push(
- GreenData::new(
- self.next.clone().unwrap(),
- self.tokens.index() - self.next_start,
- )
- .into(),
- );
-
- self.prev_end = self.tokens.index().into();
- self.next_start = self.tokens.index().into();
- self.next = self.tokens.next();
-
- if self.tokens.mode() == TokenMode::Code {
- // Skip whitespace and comments.
- while self.next.as_ref().map_or(false, |x| self.skip_type(x)) {
- self.children.push(
- GreenData::new(
- self.next.clone().unwrap(),
- self.tokens.index() - self.next_start,
- )
- .into(),
- );
-
- self.next_start = self.tokens.index().into();
- self.next = self.tokens.next();
- }
- }
-
- self.repeek();
- }
-
- /// Take another look at the next token to recheck whether it ends a group.
- fn repeek(&mut self) {
- self.peeked = self.next.clone();
- let token = match self.next.as_ref() {
- Some(token) => token,
- None => return,
- };
-
- if match token {
- NodeKind::RightParen => self.inside(Group::Paren),
- NodeKind::RightBracket => self.inside(Group::Bracket),
- NodeKind::RightBrace => self.inside(Group::Brace),
- NodeKind::Semicolon => self.inside(Group::Stmt),
- NodeKind::From => self.inside(Group::Imports),
- NodeKind::Space(n) => n > &0 && self.stop_at_newline(),
- _ => false,
- } {
- self.peeked = None;
- }
- }
-
- /// Whether the active group ends at a newline.
+ /// Whether the active group must end at a newline.
fn stop_at_newline(&self) -> bool {
matches!(
self.groups.last().map(|group| group.kind),
@@ -441,28 +319,76 @@ impl<'s> Parser<'s> {
fn inside(&self, kind: Group) -> bool {
self.groups.iter().any(|g| g.kind == kind)
}
+}
- /// Returns the last child of the current stack frame.
- pub fn last_child(&self) -> Option<&Green> {
- self.children.last()
+/// Error handling.
+impl Parser<'_> {
+ /// Push an error into the children list.
+ pub fn push_error(&mut self, msg: impl Into<EcoString>) {
+ let error = NodeKind::Error(ErrorPosition::Full, msg.into());
+ self.children.push(GreenData::new(error, 0).into());
}
- /// Create a new marker.
- pub fn marker(&mut self) -> Marker {
- Marker(self.children.len())
+ /// Eat the current token and add an error that it is unexpected.
+ pub fn unexpected(&mut self) {
+ match self.peek() {
+ Some(found) => {
+ let msg = format!("unexpected {}", found);
+ let error = NodeKind::Error(ErrorPosition::Full, msg.into());
+ self.perform(error, Self::eat);
+ }
+ None => self.push_error("unexpected end of file"),
+ }
+ }
+
+ /// Eat the current token and add an error that it is not the expected `thing`.
+ pub fn expected(&mut self, thing: &str) {
+ match self.peek() {
+ Some(found) => {
+ let msg = format!("expected {}, found {}", thing, found);
+ let error = NodeKind::Error(ErrorPosition::Full, msg.into());
+ self.perform(error, Self::eat);
+ }
+ None => self.expected_at(thing),
+ }
+ }
+
+ /// Add an error that the `thing` was expected at the end of the last
+ /// non-trivia token.
+ pub fn expected_at(&mut self, thing: &str) {
+ let mut found = self.children.len();
+ for (i, node) in self.children.iter().enumerate().rev() {
+ if !self.is_trivia(node.kind()) {
+ break;
+ }
+ found = i;
+ }
+
+ Marker(found).expected_at(self, thing);
}
}
-/// A marker that indicates where a child may start.
+/// A marker that indicates where a node may start.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Marker(usize);
impl Marker {
- /// Wraps all children in front of the marker.
- pub fn end(&self, p: &mut Parser, kind: NodeKind) {
- let stop_nl = p.stop_at_newline();
+ /// Perform a subparse that wraps all children after the marker in a node
+ /// with the given kind.
+ pub fn perform<T, F>(self, p: &mut Parser, kind: NodeKind, f: F) -> T
+ where
+ F: FnOnce(&mut Parser) -> T,
+ {
+ let success = f(p);
+ self.end(p, kind);
+ success
+ }
+
+ /// Wrap all children after the marker in a node with the given `kind`.
+ pub fn end(self, p: &mut Parser, kind: NodeKind) {
let end = (self.0 .. p.children.len())
.rev()
- .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl))
+ .find(|&i| !p.is_trivia(p.children[i].kind()))
.unwrap_or(self.0)
+ 1;
@@ -472,47 +398,61 @@ impl Marker {
}
/// Wrap all children that do not fulfill the predicate in error nodes.
- pub fn filter_children<F>(&self, p: &mut Parser, f: F)
+ pub fn filter_children<F>(self, p: &mut Parser, f: F)
where
F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>,
{
for child in &mut p.children[self.0 ..] {
- if !((p.tokens.mode() != TokenMode::Code
- || Parser::skip_type_ext(child.kind(), false))
- || child.kind().is_error())
+ if (p.tokens.mode() == TokenMode::Markup
+ || !Parser::is_trivia_ext(child.kind(), false))
+ && !child.kind().is_error()
{
if let Err((pos, msg)) = f(child) {
- let inner = std::mem::take(child);
- *child =
- GreenNode::with_child(NodeKind::Error(pos, msg), inner).into();
+ let error = NodeKind::Error(pos, msg);
+ let inner = mem::take(child);
+ *child = GreenNode::with_child(error, inner).into();
}
}
}
}
/// Insert an error message that `what` was expected at the marker position.
- pub fn expected_at(&self, p: &mut Parser, what: &str) {
- p.children.insert(
- self.0,
- GreenData::new(
- NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()),
- 0,
- )
- .into(),
- );
- }
-
- /// Return a reference to the child after the marker.
- pub fn child_at<'a>(&self, p: &'a Parser) -> Option<&'a Green> {
- p.children.get(self.0)
+ pub fn expected_at(self, p: &mut Parser, what: &str) {
+ let msg = format!("expected {}", what);
+ let error = NodeKind::Error(ErrorPosition::Full, msg.into());
+ p.children.insert(self.0, GreenData::new(error, 0).into());
}
- pub fn perform<T, F>(&self, p: &mut Parser, kind: NodeKind, f: F) -> T
- where
- F: FnOnce(&mut Parser) -> T,
- {
- let success = f(p);
- self.end(p, kind);
- success
+ /// Return a reference to the child directly after the marker.
+ pub fn child_at<'a>(self, p: &'a Parser) -> Option<&'a Green> {
+ p.children.get(self.0)
}
}
+
+/// A logical group of tokens, e.g. `[...]`.
+struct GroupEntry {
+ /// The kind of group this is. This decides which tokens will end the group.
+ /// For example, a [`Group::Paren`] will be ended by
+ /// [`Token::RightParen`].
+ pub kind: Group,
+ /// The mode the parser was in _before_ the group started (to which we go
+ /// back once the group ends).
+ pub prev_mode: TokenMode,
+}
+
+/// A group, confined by optional start and end delimiters.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum Group {
+ /// A bracketed group: `[...]`.
+ Bracket,
+ /// A curly-braced group: `{...}`.
+ Brace,
+ /// A parenthesized group: `(...)`.
+ Paren,
+ /// A group ended by a semicolon or a line break: `;`, `\n`.
+ Stmt,
+ /// A group for a single expression, ended by a line break.
+ Expr,
+ /// A group for import items, ended by a semicolon, line break or `from`.
+ Imports,
+}