diff options
| author | Laurenz <laurmaedje@gmail.com> | 2020-10-02 15:43:29 +0200 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2020-10-02 15:43:29 +0200 |
| commit | 3533268b1f7a31581e7b8f44dff6d4f553ef348f (patch) | |
| tree | 3fee21d2df7ce173131f75f46a1ef040f272ed29 /src/parse/parser.rs | |
| parent | f8770d2b2a8ac389704897f92f2753398352835b (diff) | |
Refactor parser 🏞
Diffstat (limited to 'src/parse/parser.rs')
| -rw-r--r-- | src/parse/parser.rs | 292 |
1 files changed, 292 insertions, 0 deletions
diff --git a/src/parse/parser.rs b/src/parse/parser.rs new file mode 100644 index 00000000..d0735931 --- /dev/null +++ b/src/parse/parser.rs @@ -0,0 +1,292 @@ +use std::fmt::{self, Debug, Formatter}; + +use super::{Scanner, TokenMode, Tokens}; +use crate::diagnostic::Diagnostic; +use crate::syntax::{Decoration, Pos, Span, SpanWith, Spanned, Token}; +use crate::Feedback; + +/// A convenient token-based parser. +pub struct Parser<'s> { + tokens: Tokens<'s>, + modes: Vec<TokenMode>, + groups: Vec<(Pos, Group)>, + f: Feedback, +} + +impl<'s> Parser<'s> { + /// Create a new parser for the source string. + pub fn new(src: &'s str) -> Self { + Self { + tokens: Tokens::new(src, TokenMode::Body), + modes: vec![], + groups: vec![], + f: Feedback::new(), + } + } + + /// Finish parsing and return the accumulated feedback. + pub fn finish(self) -> Feedback { + self.f + } + + /// Add a diagnostic to the feedback. + pub fn diag(&mut self, diag: Spanned<Diagnostic>) { + self.f.diagnostics.push(diag); + } + + /// Eat the next token and add a diagnostic that it was not expected thing. + pub fn diag_expected(&mut self, thing: &str) { + if let Some(found) = self.eat() { + self.diag(error!( + found.span, + "expected {}, found {}", + thing, + found.v.name(), + )); + } else { + self.diag_expected_at(thing, self.pos()); + } + } + + /// Add a diagnostic that the thing was expected at the given position. + pub fn diag_expected_at(&mut self, thing: &str, pos: Pos) { + self.diag(error!(pos, "expected {}", thing)); + } + + /// Add a diagnostic that the given token was unexpected. + pub fn diag_unexpected(&mut self, token: Spanned<Token>) { + self.diag(error!(token.span, "unexpected {}", token.v.name())); + } + + /// Add a decoration to the feedback. + pub fn deco(&mut self, deco: Spanned<Decoration>) { + self.f.decorations.push(deco); + } + + /// Update the token mode and push the previous mode onto a stack. + pub fn push_mode(&mut self, mode: TokenMode) { + self.modes.push(self.tokens.mode()); + self.tokens.set_mode(mode); + } + + /// Pop the topmost token mode from the stack. + /// + /// # Panics + /// This panics if there is no mode on the stack. + pub fn pop_mode(&mut self) { + self.tokens.set_mode(self.modes.pop().expect("no pushed mode")); + } + + /// Continues parsing in a group. + /// + /// When the end delimiter of the group is reached, all subsequent calls to + /// `eat()` and `peek()` return `None`. Parsing can only continue with + /// a matching call to `end_group`. + /// + /// # Panics + /// This panics if the next token does not start the given group. + pub fn start_group(&mut self, group: Group) { + let start = self.pos(); + match group { + Group::Paren => self.eat_assert(Token::LeftParen), + Group::Bracket => self.eat_assert(Token::LeftBracket), + Group::Brace => self.eat_assert(Token::LeftBrace), + Group::Subheader => {} + } + self.groups.push((start, group)); + } + + /// Ends the parsing of a group and returns the span of the whole group. + /// + /// # Panics + /// This panics if no group was started. + pub fn end_group(&mut self) -> Span { + debug_assert_eq!(self.peek(), None, "unfinished group"); + + let (start, group) = self.groups.pop().expect("unstarted group"); + let end = match group { + Group::Paren => Some(Token::RightParen), + Group::Bracket => Some(Token::RightBracket), + Group::Brace => Some(Token::RightBrace), + Group::Subheader => None, + }; + + if let Some(token) = end { + let next = self.tokens.clone().next().map(|s| s.v); + if next == Some(token) { + self.tokens.next(); + } else { + self.diag(error!(self.pos(), "expected {}", token.name())); + } + } + + Span::new(start, self.pos()) + } + + /// Consume the next token. + pub fn eat(&mut self) -> Option<Spanned<Token<'s>>> { + next_group_aware(&mut self.tokens, &self.groups) + } + + /// Consume the next token if it is the given one. + pub fn eat_if(&mut self, t: Token) -> Option<Spanned<Token<'s>>> { + // Don't call eat() twice if it suceeds. + // + // TODO: Benchmark this vs. the naive version. + let before = self.pos(); + let token = self.eat()?; + if token.v == t { + Some(token) + } else { + self.jump(before); + None + } + } + + /// Consume the next token if the closure maps to `Some`. + pub fn eat_map<T>( + &mut self, + mut f: impl FnMut(Token<'s>) -> Option<T>, + ) -> Option<Spanned<T>> { + let before = self.pos(); + let token = self.eat()?; + if let Some(t) = f(token.v) { + Some(t.span_with(token.span)) + } else { + self.jump(before); + None + } + } + + /// Consume the next token, debug-asserting that it is the given one. + pub fn eat_assert(&mut self, t: Token) { + let next = self.eat(); + debug_assert_eq!(next.map(|s| s.v), Some(t)); + } + + /// Consume tokens while the condition is true. + /// + /// Returns how many tokens were eaten. + pub fn eat_while(&mut self, mut f: impl FnMut(Token<'s>) -> bool) -> usize { + self.eat_until(|t| !f(t)) + } + + /// Consume tokens until the condition is true. + /// + /// Returns how many tokens were eaten. + pub fn eat_until(&mut self, mut f: impl FnMut(Token<'s>) -> bool) -> usize { + let mut count = 0; + let mut before = self.pos(); + while let Some(t) = self.eat() { + if f(t.v) { + // Undo the last eat by jumping. This prevents + // double-tokenization by not peeking all the time. + // + // TODO: Benchmark this vs. the naive peeking version. + self.jump(before); + break; + } + before = self.pos(); + count += 1; + } + count + } + + /// Peek at the next token without consuming it. + pub fn peek(&self) -> Option<Token<'s>> { + next_group_aware(&mut self.tokens.clone(), &self.groups).map(|s| s.v) + } + + /// Checks whether the next token fulfills a condition. + /// + /// Returns `false` if there is no next token. + pub fn check(&self, f: impl FnMut(Token<'s>) -> bool) -> bool { + self.peek().map(f).unwrap_or(false) + } + + /// Whether the there is no next token. + pub fn eof(&self) -> bool { + self.peek().is_none() + } + + /// Skip whitespace tokens. + pub fn skip_white(&mut self) { + self.eat_while(|t| { + matches!(t, + Token::Space(_) | + Token::LineComment(_) | + Token::BlockComment(_)) + }); + } + + /// The position in the string at which the last token ends and next token + /// will start. + pub fn pos(&self) -> Pos { + self.tokens.pos() + } + + /// Jump to a position in the source string. + pub fn jump(&mut self, pos: Pos) { + self.tokens.jump(pos); + } + + /// The full source string. + pub fn src(&self) -> &'s str { + self.scanner().src() + } + + /// The part of the source string that is spanned by the given span. + pub fn get(&self, span: Span) -> &'s str { + self.scanner().get(span.start.to_usize() .. span.end.to_usize()) + } + + /// The underlying scanner. + pub fn scanner(&self) -> &Scanner<'s> { + self.tokens.scanner() + } +} + +/// Wraps `tokens.next()`, but is group-aware. +fn next_group_aware<'s>( + tokens: &mut Tokens<'s>, + groups: &[(Pos, Group)], +) -> Option<Spanned<Token<'s>>> { + let pos = tokens.pos(); + let token = tokens.next(); + + let group = match token?.v { + Token::RightParen => Group::Paren, + Token::RightBracket => Group::Bracket, + Token::RightBrace => Group::Brace, + Token::Chain => Group::Subheader, + _ => return token, + }; + + if groups.iter().rev().any(|&(_, g)| g == group) { + tokens.jump(pos); + None + } else { + token + } +} + +impl Debug for Parser<'_> { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let s = self.scanner(); + write!(f, "Parser({}|{})", s.eaten(), s.rest()) + } +} + +/// A group, confined by optional start and end delimiters. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Group { + /// A parenthesized group: `(...)`. + Paren, + /// A bracketed group: `[...]`. + Bracket, + /// A curly-braced group: `{...}`. + Brace, + /// A group ended by a chained subheader or a closing bracket: + /// `... >>`, `...]`. + Subheader, +} |
