diff options
Diffstat (limited to 'src/parse/parser.rs')
| -rw-r--r-- | src/parse/parser.rs | 559 |
1 files changed, 0 insertions, 559 deletions
diff --git a/src/parse/parser.rs b/src/parse/parser.rs deleted file mode 100644 index 3dbb7d50..00000000 --- a/src/parse/parser.rs +++ /dev/null @@ -1,559 +0,0 @@ -use std::fmt::{self, Display, Formatter}; -use std::mem; -use std::ops::Range; - -use super::{TokenMode, Tokens}; -use crate::syntax::{ErrorPos, InnerNode, NodeData, NodeKind, SyntaxNode}; -use crate::util::EcoString; - -/// A convenient token-based parser. -pub struct Parser<'s> { - /// An iterator over the source tokens. - tokens: Tokens<'s>, - /// Whether we are at the end of the file or of a group. - eof: bool, - /// The current token. - current: Option<NodeKind>, - /// The end byte index of the last non-trivia token. - prev_end: usize, - /// The start byte index of the peeked token. - current_start: usize, - /// The stack of open groups. - groups: Vec<GroupEntry>, - /// The children of the currently built node. - children: Vec<SyntaxNode>, - /// Whether the last group was not correctly terminated. - unterminated_group: bool, - /// Whether a group terminator was found that did not close a group. - stray_terminator: bool, -} - -impl<'s> Parser<'s> { - /// Create a new parser for the source string. - pub fn new(text: &'s str, mode: TokenMode) -> Self { - Self::with_prefix("", text, mode) - } - - /// Create a new parser for the source string that is prefixed by some text - /// that does not need to be parsed but taken into account for column - /// calculation. - pub fn with_prefix(prefix: &str, text: &'s str, mode: TokenMode) -> Self { - let mut tokens = Tokens::with_prefix(prefix, text, mode); - let current = tokens.next(); - Self { - tokens, - eof: current.is_none(), - current, - prev_end: 0, - current_start: 0, - groups: vec![], - children: vec![], - unterminated_group: false, - stray_terminator: false, - } - } - - /// End the parsing process and return the parsed children. - pub fn finish(self) -> Vec<SyntaxNode> { - self.children - } - - /// End the parsing process and return - /// - the parsed children and whether the last token was terminated, if all - /// groups were terminated correctly, or - /// - `None` otherwise. - pub fn consume(self) -> Option<(Vec<SyntaxNode>, bool)> { - self.terminated().then(|| (self.children, self.tokens.terminated())) - } - - /// Create a new marker. - pub fn marker(&mut self) -> Marker { - Marker(self.children.len()) - } - - /// Create a marker right before the trailing trivia. - pub fn trivia_start(&self) -> Marker { - let count = self - .children - .iter() - .rev() - .take_while(|node| self.is_trivia(node.kind())) - .count(); - Marker(self.children.len() - count) - } - - /// Perform a subparse that wraps its result in a node with the given kind. - pub fn perform<F, T>(&mut self, kind: NodeKind, f: F) -> T - where - F: FnOnce(&mut Self) -> T, - { - let prev = mem::take(&mut self.children); - let output = f(self); - let until = self.trivia_start(); - let mut children = mem::replace(&mut self.children, prev); - - if self.tokens.mode() == TokenMode::Markup { - self.children.push(InnerNode::with_children(kind, children).into()); - } else { - // Trailing trivia should not be wrapped into the new node. - let idx = self.children.len(); - self.children.push(SyntaxNode::default()); - self.children.extend(children.drain(until.0 ..)); - self.children[idx] = InnerNode::with_children(kind, children).into(); - } - - output - } - - /// Whether the end of the source string or group is reached. - pub fn eof(&self) -> bool { - self.eof - } - - /// Consume the current token and also trailing trivia. - pub fn eat(&mut self) { - self.stray_terminator |= match self.current { - Some(NodeKind::RightParen) => !self.inside(Group::Paren), - Some(NodeKind::RightBracket) => !self.inside(Group::Bracket), - Some(NodeKind::RightBrace) => !self.inside(Group::Brace), - _ => false, - }; - - self.prev_end = self.tokens.cursor(); - self.bump(); - - if self.tokens.mode() != TokenMode::Markup { - // Skip whitespace and comments. - while self.current.as_ref().map_or(false, |x| self.is_trivia(x)) { - self.bump(); - } - } - - self.repeek(); - } - - /// Consume the current token if it is the given one. - pub fn eat_if(&mut self, kind: NodeKind) -> bool { - let at = self.at(kind); - if at { - self.eat(); - } - at - } - - /// Eat tokens while the condition is true. - pub fn eat_while<F>(&mut self, mut f: F) - where - F: FnMut(&NodeKind) -> bool, - { - while self.peek().map_or(false, |t| f(t)) { - self.eat(); - } - } - - /// Consume the current token if it is the given one and produce an error if - /// not. - pub fn expect(&mut self, kind: NodeKind) -> ParseResult { - let at = self.peek() == Some(&kind); - if at { - self.eat(); - Ok(()) - } else { - self.expected(kind.name()); - Err(ParseError) - } - } - - /// Consume the current token, debug-asserting that it is the given one. - #[track_caller] - pub fn assert(&mut self, kind: NodeKind) { - debug_assert_eq!(self.peek(), Some(&kind)); - self.eat(); - } - - /// Whether the current token is of the given type. - pub fn at(&self, kind: NodeKind) -> bool { - self.peek() == Some(&kind) - } - - /// Peek at the current token without consuming it. - pub fn peek(&self) -> Option<&NodeKind> { - if self.eof { None } else { self.current.as_ref() } - } - - /// Peek at the current token, but only if it follows immediately after the - /// last one without any trivia in between. - pub fn peek_direct(&self) -> Option<&NodeKind> { - if self.prev_end() == self.current_start() { - self.peek() - } else { - None - } - } - - /// Peek at the source of the current token. - pub fn peek_src(&self) -> &'s str { - self.get(self.current_start() .. self.current_end()) - } - - /// Obtain a range of the source code. - pub fn get(&self, range: Range<usize>) -> &'s str { - self.tokens.scanner().get(range) - } - - /// The byte index at which the last non-trivia token ended. - pub fn prev_end(&self) -> usize { - self.prev_end - } - - /// The byte index at which the current token starts. - pub fn current_start(&self) -> usize { - self.current_start - } - - /// The byte index at which the current token ends. - pub fn current_end(&self) -> usize { - self.tokens.cursor() - } - - /// Determine the column index for the given byte index. - pub fn column(&self, index: usize) -> usize { - self.tokens.column(index) - } - - /// Continue parsing in a group. - /// - /// When the end delimiter of the group is reached, all subsequent calls to - /// `peek()` return `None`. Parsing can only continue with a matching call - /// to `end_group`. - /// - /// This panics if the current token does not start the given group. - #[track_caller] - pub fn start_group(&mut self, kind: Group) { - self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() }); - self.tokens.set_mode(match kind { - Group::Strong | Group::Emph => TokenMode::Markup, - Group::Bracket => match self.tokens.mode() { - TokenMode::Math => TokenMode::Math, - _ => TokenMode::Markup, - }, - Group::Brace | Group::Paren => match self.tokens.mode() { - TokenMode::Math => TokenMode::Math, - _ => TokenMode::Code, - }, - Group::Math => TokenMode::Math, - Group::Expr | Group::Imports => TokenMode::Code, - }); - - match kind { - Group::Brace => self.assert(NodeKind::LeftBrace), - Group::Bracket => self.assert(NodeKind::LeftBracket), - Group::Paren => self.assert(NodeKind::LeftParen), - Group::Strong => self.assert(NodeKind::Star), - Group::Emph => self.assert(NodeKind::Underscore), - Group::Math => self.assert(NodeKind::Dollar), - Group::Expr => self.repeek(), - Group::Imports => self.repeek(), - } - } - - /// End the parsing of a group. - /// - /// This panics if no group was started. - #[track_caller] - pub fn end_group(&mut self) { - let group_mode = self.tokens.mode(); - let group = self.groups.pop().expect("no started group"); - self.tokens.set_mode(group.prev_mode); - - let mut rescan = self.tokens.mode() != group_mode; - - // Eat the end delimiter if there is one. - if let Some((end, required)) = match group.kind { - Group::Brace => Some((NodeKind::RightBrace, true)), - Group::Bracket => Some((NodeKind::RightBracket, true)), - Group::Paren => Some((NodeKind::RightParen, true)), - Group::Strong => Some((NodeKind::Star, true)), - Group::Emph => Some((NodeKind::Underscore, true)), - Group::Math => Some((NodeKind::Dollar, true)), - Group::Expr => Some((NodeKind::Semicolon, false)), - Group::Imports => None, - } { - if self.current.as_ref() == Some(&end) { - // If another group closes after a group with the missing - // terminator, its scope of influence ends here and no longer - // taints the rest of the reparse. - self.unterminated_group = false; - - // Bump the delimeter and return. No need to rescan in this - // case. Also, we know that the delimiter is not stray even - // though we already removed the group. - let s = self.stray_terminator; - self.eat(); - self.stray_terminator = s; - rescan = false; - } else if required { - self.expected(end.name()); - self.unterminated_group = true; - } - } - - // Rescan the peeked token if the mode changed. - if rescan { - let mut target = self.prev_end(); - if group_mode != TokenMode::Markup { - let start = self.trivia_start().0; - target = self.current_start - - self.children[start ..].iter().map(SyntaxNode::len).sum::<usize>(); - self.children.truncate(start); - } - - self.tokens.jump(target); - self.prev_end = self.tokens.cursor(); - self.current_start = self.tokens.cursor(); - self.current = self.tokens.next(); - } - - self.repeek(); - } - - /// Checks if all groups were correctly terminated. - fn terminated(&self) -> bool { - self.groups.is_empty() && !self.unterminated_group && !self.stray_terminator - } - - /// Low-level bump that consumes exactly one token without special trivia - /// handling. - fn bump(&mut self) { - let kind = self.current.take().unwrap(); - let len = self.tokens.cursor() - self.current_start; - self.children.push(NodeData::new(kind, len).into()); - self.current_start = self.tokens.cursor(); - self.current = self.tokens.next(); - } - - /// Take another look at the current token to recheck whether it ends a - /// group. - fn repeek(&mut self) { - self.eof = match &self.current { - Some(NodeKind::RightBrace) => self.inside(Group::Brace), - Some(NodeKind::RightBracket) => self.inside(Group::Bracket), - Some(NodeKind::RightParen) => self.inside(Group::Paren), - Some(NodeKind::Star) => self.inside(Group::Strong), - Some(NodeKind::Underscore) => self.inside(Group::Emph), - Some(NodeKind::Dollar) => self.inside(Group::Math), - Some(NodeKind::Semicolon) => self.inside(Group::Expr), - Some(NodeKind::From) => self.inside(Group::Imports), - Some(NodeKind::Space { newlines }) => self.space_ends_group(*newlines), - Some(_) => false, - None => true, - }; - } - - /// Returns whether the given type can be skipped over. - fn is_trivia(&self, token: &NodeKind) -> bool { - match token { - NodeKind::Space { newlines } => !self.space_ends_group(*newlines), - NodeKind::LineComment => true, - NodeKind::BlockComment => true, - _ => false, - } - } - - /// Whether a space with the given number of newlines ends the current group. - fn space_ends_group(&self, n: usize) -> bool { - if n == 0 { - return false; - } - - match self.groups.last().map(|group| group.kind) { - Some(Group::Strong | Group::Emph) => n >= 2, - Some(Group::Imports) => n >= 1, - Some(Group::Expr) if n >= 1 => { - // Allow else and method call to continue on next line. - self.groups.iter().nth_back(1).map(|group| group.kind) - != Some(Group::Brace) - || !matches!( - self.tokens.clone().next(), - Some(NodeKind::Else | NodeKind::Dot) - ) - } - _ => false, - } - } - - /// Whether we are inside the given group (can be nested). - fn inside(&self, kind: Group) -> bool { - self.groups - .iter() - .rev() - .take_while(|g| !kind.is_weak() || g.kind.is_weak()) - .any(|g| g.kind == kind) - } -} - -/// Error handling. -impl Parser<'_> { - /// Eat the current token and add an error that it is unexpected. - pub fn unexpected(&mut self) { - if let Some(found) = self.peek() { - let msg = format_eco!("unexpected {}", found.name()); - let error = NodeKind::Error(ErrorPos::Full, msg); - self.perform(error, Self::eat); - } - } - - /// Add an error that the `thing` was expected at the end of the last - /// non-trivia token. - pub fn expected(&mut self, thing: &str) { - self.expected_at(self.trivia_start(), thing); - } - - /// Insert an error message that `what` was expected at the marker position. - pub fn expected_at(&mut self, marker: Marker, what: &str) { - let msg = format_eco!("expected {}", what); - let error = NodeKind::Error(ErrorPos::Full, msg); - self.children.insert(marker.0, NodeData::new(error, 0).into()); - } - - /// Eat the current token and add an error that it is not the expected - /// `thing`. - pub fn expected_found(&mut self, thing: &str) { - match self.peek() { - Some(found) => { - let msg = format_eco!("expected {}, found {}", thing, found.name()); - let error = NodeKind::Error(ErrorPos::Full, msg); - self.perform(error, Self::eat); - } - None => self.expected(thing), - } - } -} - -/// Marks a location in a parser's child list. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct Marker(usize); - -impl Marker { - /// Peek at the child directly before the marker. - pub fn before<'a>(self, p: &'a Parser) -> Option<&'a SyntaxNode> { - p.children.get(self.0.checked_sub(1)?) - } - - /// Peek at the child directly after the marker. - pub fn after<'a>(self, p: &'a Parser) -> Option<&'a SyntaxNode> { - p.children.get(self.0) - } - - /// Convert the child directly after marker. - pub fn convert(self, p: &mut Parser, kind: NodeKind) { - if let Some(child) = p.children.get_mut(self.0) { - child.convert(kind); - } - } - - /// Perform a subparse that wraps all children after the marker in a node - /// with the given kind. - pub fn perform<T, F>(self, p: &mut Parser, kind: NodeKind, f: F) -> T - where - F: FnOnce(&mut Parser) -> T, - { - let success = f(p); - self.end(p, kind); - success - } - - /// Wrap all children after the marker (excluding trailing trivia) in a node - /// with the given `kind`. - pub fn end(self, p: &mut Parser, kind: NodeKind) { - let until = p.trivia_start().0.max(self.0); - let children = p.children.drain(self.0 .. until).collect(); - p.children - .insert(self.0, InnerNode::with_children(kind, children).into()); - } - - /// Wrap all children that do not fulfill the predicate in error nodes. - pub fn filter_children<F>(self, p: &mut Parser, mut f: F) - where - F: FnMut(&SyntaxNode) -> Result<(), &'static str>, - { - for child in &mut p.children[self.0 ..] { - // Don't expose errors. - if child.kind().is_error() { - continue; - } - - // Don't expose trivia in code. - if p.tokens.mode() != TokenMode::Markup && child.kind().is_trivia() { - continue; - } - - if let Err(msg) = f(child) { - let mut msg = EcoString::from(msg); - if msg.starts_with("expected") { - msg.push_str(", found "); - msg.push_str(child.kind().name()); - } - let error = NodeKind::Error(ErrorPos::Full, msg); - let inner = mem::take(child); - *child = InnerNode::with_child(error, inner).into(); - } - } - } -} - -/// A logical group of tokens, e.g. `[...]`. -#[derive(Debug)] -struct GroupEntry { - /// The kind of group this is. This decides which token(s) will end the - /// group. For example, a [`Group::Paren`] will be ended by - /// [`Token::RightParen`]. - pub kind: Group, - /// The mode the parser was in _before_ the group started (to which we go - /// back once the group ends). - pub prev_mode: TokenMode, -} - -/// A group, confined by optional start and end delimiters. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Group { - /// A curly-braced group: `{...}`. - Brace, - /// A bracketed group: `[...]`. - Bracket, - /// A parenthesized group: `(...)`. - Paren, - /// A group surrounded with stars: `*...*`. - Strong, - /// A group surrounded with underscore: `_..._`. - Emph, - /// A group surrounded by dollar signs: `$...$`. - Math, - /// A group ended by a semicolon or a line break: `;`, `\n`. - Expr, - /// A group for import items, ended by a semicolon, line break or `from`. - Imports, -} - -impl Group { - /// Whether the group can only force other weak groups to end. - fn is_weak(self) -> bool { - matches!(self, Group::Strong | Group::Emph) - } -} - -/// Allows parser methods to use the try operator. Never returned top-level -/// because the parser recovers from all errors. -pub type ParseResult<T = ()> = Result<T, ParseError>; - -/// The error type for parsing. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct ParseError; - -impl Display for ParseError { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - f.pad("failed to parse") - } -} - -impl std::error::Error for ParseError {} |
