summaryrefslogtreecommitdiff
path: root/src/parse/parser.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/parse/parser.rs')
-rw-r--r--src/parse/parser.rs139
1 files changed, 78 insertions, 61 deletions
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 986a36b0..a64e39dd 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -3,7 +3,7 @@ use std::fmt::{self, Debug, Formatter};
use super::{Scanner, TokenMode, Tokens};
use crate::diag::Diag;
use crate::diag::{Deco, Feedback};
-use crate::syntax::{Pos, Span, Spanned, Token, WithSpan};
+use crate::syntax::{Pos, Span, Spanned, Token};
/// A convenient token-based parser.
pub struct Parser<'s> {
@@ -18,14 +18,43 @@ pub struct Parser<'s> {
next_start: Pos,
/// The end position of the last (non-whitespace if in code mode) token.
last_end: Pos,
- /// The stack of modes we were in.
- modes: Vec<TokenMode>,
/// The stack of open groups.
- groups: Vec<Group>,
+ groups: Vec<GroupEntry>,
/// Accumulated feedback.
feedback: Feedback,
}
+/// A logical group of tokens, e.g. `[...]`.
+struct GroupEntry {
+ /// The start position of the group. Used by `Parser::end_group` to return
+ /// The group's full span.
+ start: Pos,
+ /// The kind of group this is. This decides which tokens will end the group.
+ /// For example, a [`GroupKind::Paren`] will be ended by
+ /// [`Token::RightParen`].
+ kind: Group,
+ /// The mode the parser was in _before_ the group started.
+ prev_mode: TokenMode,
+}
+
+/// A group, confined by optional start and end delimiters.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum Group {
+ /// A parenthesized group: `(...)`.
+ Paren,
+ /// A bracketed group: `[...]`.
+ Bracket,
+ /// A curly-braced group: `{...}`.
+ Brace,
+ /// A group ended by a chained subheader or a closing bracket:
+ /// `... >>`, `...]`.
+ Subheader,
+ /// A group ended by a semicolon or a line break: `;`, `\n`.
+ Stmt,
+ /// A group for a single expression. Not ended by something specific.
+ Expr,
+}
+
impl<'s> Parser<'s> {
/// Create a new parser for the source string.
pub fn new(src: &'s str) -> Self {
@@ -37,7 +66,6 @@ impl<'s> Parser<'s> {
peeked: next,
next_start: Pos::ZERO,
last_end: Pos::ZERO,
- modes: vec![],
groups: vec![],
feedback: Feedback::new(),
}
@@ -97,14 +125,17 @@ impl<'s> Parser<'s> {
///
/// # Panics
/// This panics if the next token does not start the given group.
- pub fn start_group(&mut self, group: Group, mode: TokenMode) {
- self.modes.push(self.tokens.mode());
- self.tokens.set_mode(mode);
+ pub fn start_group(&mut self, kind: Group, mode: TokenMode) {
+ self.groups.push(GroupEntry {
+ start: self.next_start,
+ kind,
+ prev_mode: self.tokens.mode(),
+ });
- self.groups.push(group);
+ self.tokens.set_mode(mode);
self.repeek();
- match group {
+ match kind {
Group::Paren => self.assert(&[Token::LeftParen]),
Group::Bracket => self.assert(&[Token::HashBracket, Token::LeftBracket]),
Group::Brace => self.assert(&[Token::LeftBrace]),
@@ -118,15 +149,16 @@ impl<'s> Parser<'s> {
///
/// # Panics
/// This panics if no group was started.
- pub fn end_group(&mut self) {
+ pub fn end_group(&mut self) -> Span {
let prev_mode = self.tokens.mode();
- self.tokens.set_mode(self.modes.pop().expect("no pushed mode"));
-
let group = self.groups.pop().expect("no started group");
+ self.tokens.set_mode(group.prev_mode);
self.repeek();
+ let mut rescan = self.tokens.mode() != prev_mode;
+
// Eat the end delimiter if there is one.
- if let Some((end, required)) = match group {
+ if let Some((end, required)) = match group.kind {
Group::Paren => Some((Token::RightParen, true)),
Group::Bracket => Some((Token::RightBracket, true)),
Group::Brace => Some((Token::RightBrace, true)),
@@ -137,37 +169,19 @@ impl<'s> Parser<'s> {
if self.next == Some(end) {
// Bump the delimeter and return. No need to rescan in this case.
self.bump();
- return;
+ rescan = false;
} else if required {
self.diag(error!(self.next_start, "expected {}", end.name()));
}
}
// Rescan the peeked token if the mode changed.
- if self.tokens.mode() != prev_mode {
+ if rescan {
self.tokens.jump(self.last_end);
self.bump();
}
- }
-
- /// Execute `f` and return the result alongside the span of everything `f`
- /// ate. Excludes leading and trailing whitespace in code mode.
- pub fn span<T, F>(&mut self, f: F) -> Spanned<T>
- where
- F: FnOnce(&mut Self) -> T,
- {
- let start = self.next_start;
- let output = f(self);
- let end = self.last_end;
- output.with_span(start .. end)
- }
- /// A version of [`span`](Self::span) that works better with options.
- pub fn span_if<T, F>(&mut self, f: F) -> Option<Spanned<T>>
- where
- F: FnOnce(&mut Self) -> Option<T>,
- {
- self.span(f).transpose()
+ Span::new(group.start, self.last_end)
}
/// Consume the next token.
@@ -200,6 +214,13 @@ impl<'s> Parser<'s> {
mapped
}
+ /// Eat the next token and return its span.
+ pub fn eat_span(&mut self) -> Span {
+ let start = self.next_start;
+ self.eat();
+ Span::new(start, self.last_end)
+ }
+
/// Consume the next token if it is the given one and produce an error if
/// not.
pub fn expect(&mut self, t: Token) -> bool {
@@ -264,17 +285,22 @@ impl<'s> Parser<'s> {
}
/// The position at which the next token starts.
- pub fn next_start(&self) -> Pos {
+ pub fn start(&self) -> Pos {
self.next_start
}
/// The position at which the last token ended.
///
/// Refers to the end of the last _non-whitespace_ token in code mode.
- pub fn last_end(&self) -> Pos {
+ pub fn end(&self) -> Pos {
self.last_end
}
+ /// The span from
+ pub fn span_from(&self, start: Pos) -> Span {
+ Span::new(start, self.last_end)
+ }
+
/// Jump to a position in the source string.
pub fn jump(&mut self, pos: Pos) {
self.tokens.jump(pos);
@@ -325,13 +351,14 @@ impl<'s> Parser<'s> {
None => return,
};
+ let inside = |x| self.kinds().any(|k| k == x);
match token {
- Token::RightParen if self.groups.contains(&Group::Paren) => {}
- Token::RightBracket if self.groups.contains(&Group::Bracket) => {}
- Token::RightBrace if self.groups.contains(&Group::Brace) => {}
- Token::Semicolon if self.groups.contains(&Group::Stmt) => {}
+ Token::RightParen if inside(Group::Paren) => {}
+ Token::RightBracket if inside(Group::Bracket) => {}
+ Token::RightBrace if inside(Group::Brace) => {}
+ Token::Semicolon if inside(Group::Stmt) => {}
+ Token::Pipe if inside(Group::Subheader) => {}
Token::Space(n) if n >= 1 && self.in_line_group() => {}
- Token::Pipe if self.groups.contains(&Group::Subheader) => {}
_ => return,
}
@@ -340,7 +367,15 @@ impl<'s> Parser<'s> {
/// Whether the active group ends at a newline.
fn in_line_group(&self) -> bool {
- matches!(self.groups.last(), Some(&Group::Stmt) | Some(&Group::Expr))
+ matches!(
+ self.kinds().next_back(),
+ Some(Group::Stmt) | Some(Group::Expr)
+ )
+ }
+
+ /// The outer groups.
+ fn kinds(&self) -> impl DoubleEndedIterator<Item = Group> + '_ {
+ self.groups.iter().map(|group| group.kind)
}
}
@@ -350,21 +385,3 @@ impl Debug for Parser<'_> {
write!(f, "Parser({}|{})", s.eaten(), s.rest())
}
}
-
-/// A group, confined by optional start and end delimiters.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum Group {
- /// A parenthesized group: `(...)`.
- Paren,
- /// A bracketed group: `[...]`.
- Bracket,
- /// A curly-braced group: `{...}`.
- Brace,
- /// A group ended by a chained subheader or a closing bracket:
- /// `... >>`, `...]`.
- Subheader,
- /// A group ended by a semicolon or a line break: `;`, `\n`.
- Stmt,
- /// A group for a single expression. Not ended by something specific.
- Expr,
-}