diff options
Diffstat (limited to 'src/parse/parser.rs')
| -rw-r--r-- | src/parse/parser.rs | 139 |
1 files changed, 78 insertions, 61 deletions
diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 986a36b0..a64e39dd 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -3,7 +3,7 @@ use std::fmt::{self, Debug, Formatter}; use super::{Scanner, TokenMode, Tokens}; use crate::diag::Diag; use crate::diag::{Deco, Feedback}; -use crate::syntax::{Pos, Span, Spanned, Token, WithSpan}; +use crate::syntax::{Pos, Span, Spanned, Token}; /// A convenient token-based parser. pub struct Parser<'s> { @@ -18,14 +18,43 @@ pub struct Parser<'s> { next_start: Pos, /// The end position of the last (non-whitespace if in code mode) token. last_end: Pos, - /// The stack of modes we were in. - modes: Vec<TokenMode>, /// The stack of open groups. - groups: Vec<Group>, + groups: Vec<GroupEntry>, /// Accumulated feedback. feedback: Feedback, } +/// A logical group of tokens, e.g. `[...]`. +struct GroupEntry { + /// The start position of the group. Used by `Parser::end_group` to return + /// The group's full span. + start: Pos, + /// The kind of group this is. This decides which tokens will end the group. + /// For example, a [`GroupKind::Paren`] will be ended by + /// [`Token::RightParen`]. + kind: Group, + /// The mode the parser was in _before_ the group started. + prev_mode: TokenMode, +} + +/// A group, confined by optional start and end delimiters. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Group { + /// A parenthesized group: `(...)`. + Paren, + /// A bracketed group: `[...]`. + Bracket, + /// A curly-braced group: `{...}`. + Brace, + /// A group ended by a chained subheader or a closing bracket: + /// `... >>`, `...]`. + Subheader, + /// A group ended by a semicolon or a line break: `;`, `\n`. + Stmt, + /// A group for a single expression. Not ended by something specific. + Expr, +} + impl<'s> Parser<'s> { /// Create a new parser for the source string. pub fn new(src: &'s str) -> Self { @@ -37,7 +66,6 @@ impl<'s> Parser<'s> { peeked: next, next_start: Pos::ZERO, last_end: Pos::ZERO, - modes: vec![], groups: vec![], feedback: Feedback::new(), } @@ -97,14 +125,17 @@ impl<'s> Parser<'s> { /// /// # Panics /// This panics if the next token does not start the given group. - pub fn start_group(&mut self, group: Group, mode: TokenMode) { - self.modes.push(self.tokens.mode()); - self.tokens.set_mode(mode); + pub fn start_group(&mut self, kind: Group, mode: TokenMode) { + self.groups.push(GroupEntry { + start: self.next_start, + kind, + prev_mode: self.tokens.mode(), + }); - self.groups.push(group); + self.tokens.set_mode(mode); self.repeek(); - match group { + match kind { Group::Paren => self.assert(&[Token::LeftParen]), Group::Bracket => self.assert(&[Token::HashBracket, Token::LeftBracket]), Group::Brace => self.assert(&[Token::LeftBrace]), @@ -118,15 +149,16 @@ impl<'s> Parser<'s> { /// /// # Panics /// This panics if no group was started. - pub fn end_group(&mut self) { + pub fn end_group(&mut self) -> Span { let prev_mode = self.tokens.mode(); - self.tokens.set_mode(self.modes.pop().expect("no pushed mode")); - let group = self.groups.pop().expect("no started group"); + self.tokens.set_mode(group.prev_mode); self.repeek(); + let mut rescan = self.tokens.mode() != prev_mode; + // Eat the end delimiter if there is one. - if let Some((end, required)) = match group { + if let Some((end, required)) = match group.kind { Group::Paren => Some((Token::RightParen, true)), Group::Bracket => Some((Token::RightBracket, true)), Group::Brace => Some((Token::RightBrace, true)), @@ -137,37 +169,19 @@ impl<'s> Parser<'s> { if self.next == Some(end) { // Bump the delimeter and return. No need to rescan in this case. self.bump(); - return; + rescan = false; } else if required { self.diag(error!(self.next_start, "expected {}", end.name())); } } // Rescan the peeked token if the mode changed. - if self.tokens.mode() != prev_mode { + if rescan { self.tokens.jump(self.last_end); self.bump(); } - } - - /// Execute `f` and return the result alongside the span of everything `f` - /// ate. Excludes leading and trailing whitespace in code mode. - pub fn span<T, F>(&mut self, f: F) -> Spanned<T> - where - F: FnOnce(&mut Self) -> T, - { - let start = self.next_start; - let output = f(self); - let end = self.last_end; - output.with_span(start .. end) - } - /// A version of [`span`](Self::span) that works better with options. - pub fn span_if<T, F>(&mut self, f: F) -> Option<Spanned<T>> - where - F: FnOnce(&mut Self) -> Option<T>, - { - self.span(f).transpose() + Span::new(group.start, self.last_end) } /// Consume the next token. @@ -200,6 +214,13 @@ impl<'s> Parser<'s> { mapped } + /// Eat the next token and return its span. + pub fn eat_span(&mut self) -> Span { + let start = self.next_start; + self.eat(); + Span::new(start, self.last_end) + } + /// Consume the next token if it is the given one and produce an error if /// not. pub fn expect(&mut self, t: Token) -> bool { @@ -264,17 +285,22 @@ impl<'s> Parser<'s> { } /// The position at which the next token starts. - pub fn next_start(&self) -> Pos { + pub fn start(&self) -> Pos { self.next_start } /// The position at which the last token ended. /// /// Refers to the end of the last _non-whitespace_ token in code mode. - pub fn last_end(&self) -> Pos { + pub fn end(&self) -> Pos { self.last_end } + /// The span from + pub fn span_from(&self, start: Pos) -> Span { + Span::new(start, self.last_end) + } + /// Jump to a position in the source string. pub fn jump(&mut self, pos: Pos) { self.tokens.jump(pos); @@ -325,13 +351,14 @@ impl<'s> Parser<'s> { None => return, }; + let inside = |x| self.kinds().any(|k| k == x); match token { - Token::RightParen if self.groups.contains(&Group::Paren) => {} - Token::RightBracket if self.groups.contains(&Group::Bracket) => {} - Token::RightBrace if self.groups.contains(&Group::Brace) => {} - Token::Semicolon if self.groups.contains(&Group::Stmt) => {} + Token::RightParen if inside(Group::Paren) => {} + Token::RightBracket if inside(Group::Bracket) => {} + Token::RightBrace if inside(Group::Brace) => {} + Token::Semicolon if inside(Group::Stmt) => {} + Token::Pipe if inside(Group::Subheader) => {} Token::Space(n) if n >= 1 && self.in_line_group() => {} - Token::Pipe if self.groups.contains(&Group::Subheader) => {} _ => return, } @@ -340,7 +367,15 @@ impl<'s> Parser<'s> { /// Whether the active group ends at a newline. fn in_line_group(&self) -> bool { - matches!(self.groups.last(), Some(&Group::Stmt) | Some(&Group::Expr)) + matches!( + self.kinds().next_back(), + Some(Group::Stmt) | Some(Group::Expr) + ) + } + + /// The outer groups. + fn kinds(&self) -> impl DoubleEndedIterator<Item = Group> + '_ { + self.groups.iter().map(|group| group.kind) } } @@ -350,21 +385,3 @@ impl Debug for Parser<'_> { write!(f, "Parser({}|{})", s.eaten(), s.rest()) } } - -/// A group, confined by optional start and end delimiters. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Group { - /// A parenthesized group: `(...)`. - Paren, - /// A bracketed group: `[...]`. - Bracket, - /// A curly-braced group: `{...}`. - Brace, - /// A group ended by a chained subheader or a closing bracket: - /// `... >>`, `...]`. - Subheader, - /// A group ended by a semicolon or a line break: `;`, `\n`. - Stmt, - /// A group for a single expression. Not ended by something specific. - Expr, -} |
