summaryrefslogtreecommitdiff
path: root/src/parse
diff options
context:
space:
mode:
Diffstat (limited to 'src/parse')
-rw-r--r--src/parse/mod.rs61
-rw-r--r--src/parse/parser.rs506
-rw-r--r--src/parse/resolve.rs4
-rw-r--r--src/parse/tokens.rs14
4 files changed, 260 insertions, 325 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 90be73f9..aa616fdf 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -13,13 +13,16 @@ pub use tokens::*;
use std::rc::Rc;
use crate::syntax::ast::{Associativity, BinOp, UnOp};
-use crate::syntax::{ErrorPosition, GreenNode, NodeKind};
+use crate::syntax::{ErrorPosition, Green, GreenNode, NodeKind};
/// Parse a source file.
pub fn parse(source: &str) -> Rc<GreenNode> {
let mut p = Parser::new(source);
markup(&mut p);
- p.finish()
+ match p.finish().into_iter().next() {
+ Some(Green::Node(node)) => node,
+ _ => unreachable!(),
+ }
}
/// Parse markup.
@@ -36,7 +39,7 @@ fn markup_indented(p: &mut Parser, column: usize) {
});
markup_while(p, false, &mut |p| match p.peek() {
- Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.next_end()) >= column,
+ Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column,
_ => true,
})
}
@@ -114,7 +117,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
let stmt = matches!(token, NodeKind::Let | NodeKind::Import);
let group = if stmt { Group::Stmt } else { Group::Expr };
- p.start_group(group, TokenMode::Code);
+ p.start_group(group);
let res = expr_prec(p, true, 0);
if stmt && res.is_ok() && !p.eof() {
p.expected_at("semicolon or line break");
@@ -177,8 +180,9 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
let marker = p.marker();
// Start the unary expression.
- match p.eat_map(|x| UnOp::from_token(&x)) {
+ match p.peek().and_then(UnOp::from_token) {
Some(op) => {
+ p.eat();
let prec = op.precedence();
expr_prec(p, atomic, prec)?;
marker.end(p, NodeKind::Unary);
@@ -201,7 +205,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
break;
}
- if p.peek() == Some(&NodeKind::With) {
+ if p.at(&NodeKind::With) {
with_expr(p, &marker)?;
}
@@ -242,7 +246,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult {
p.eat();
// Arrow means this is a closure's lone parameter.
- if !atomic && p.peek() == Some(&NodeKind::Arrow) {
+ if !atomic && p.at(&NodeKind::Arrow) {
marker.end(p, NodeKind::ClosureParams);
p.eat();
marker.perform(p, NodeKind::Closure, expr)
@@ -315,7 +319,7 @@ fn literal(p: &mut Parser) -> bool {
fn parenthesized(p: &mut Parser) -> ParseResult {
let marker = p.marker();
- p.start_group(Group::Paren, TokenMode::Code);
+ p.start_group(Group::Paren);
let colon = p.eat_if(&NodeKind::Colon);
let kind = collection(p).0;
p.end_group();
@@ -327,14 +331,14 @@ fn parenthesized(p: &mut Parser) -> ParseResult {
}
// Arrow means this is a closure's parameter list.
- if p.peek() == Some(&NodeKind::Arrow) {
+ if p.at(&NodeKind::Arrow) {
params(p, &marker, true);
marker.end(p, NodeKind::ClosureParams);
p.eat_assert(&NodeKind::Arrow);
return marker.perform(p, NodeKind::Closure, expr);
}
- // Find out which kind of collection this is.
+ // Transform into the identified collection.
match kind {
CollectionKind::Group => marker.end(p, NodeKind::Group),
CollectionKind::Positional => array(p, &marker),
@@ -402,7 +406,8 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) {
(kind, items)
}
-/// Parse an expression or a named pair. Returns if this is a named pair.
+/// Parse an expression or a named pair, returning whether it's a spread or a
+/// named pair.
fn item(p: &mut Parser) -> ParseResult<NodeKind> {
let marker = p.marker();
if p.eat_if(&NodeKind::Dots) {
@@ -412,25 +417,24 @@ fn item(p: &mut Parser) -> ParseResult<NodeKind> {
expr(p)?;
- if p.peek() == Some(&NodeKind::Colon) {
+ if p.at(&NodeKind::Colon) {
marker.perform(p, NodeKind::Named, |p| {
if matches!(marker.child_at(p).unwrap().kind(), &NodeKind::Ident(_)) {
p.eat();
expr(p)
} else {
- marker.end(
- p,
- NodeKind::Error(ErrorPosition::Full, "expected identifier".into()),
- );
+ let error =
+ NodeKind::Error(ErrorPosition::Full, "expected identifier".into());
+ marker.end(p, error);
p.eat();
-
expr(p).ok();
Err(())
}
})?;
+
Ok(NodeKind::Named)
} else {
- Ok(p.last_child().unwrap().kind().clone())
+ Ok(NodeKind::None)
}
}
@@ -488,7 +492,7 @@ fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) {
// Parse a template block: `[...]`.
fn template(p: &mut Parser) {
p.perform(NodeKind::Template, |p| {
- p.start_group(Group::Bracket, TokenMode::Markup);
+ p.start_group(Group::Bracket);
markup(p);
p.end_group();
});
@@ -497,9 +501,9 @@ fn template(p: &mut Parser) {
/// Parse a code block: `{...}`.
fn block(p: &mut Parser) {
p.perform(NodeKind::Block, |p| {
- p.start_group(Group::Brace, TokenMode::Code);
+ p.start_group(Group::Brace);
while !p.eof() {
- p.start_group(Group::Stmt, TokenMode::Code);
+ p.start_group(Group::Stmt);
if expr(p).is_ok() && !p.eof() {
p.expected_at("semicolon or line break");
}
@@ -515,7 +519,7 @@ fn block(p: &mut Parser) {
/// Parse a function call.
fn call(p: &mut Parser, callee: &Marker) -> ParseResult {
callee.perform(p, NodeKind::Call, |p| match p.peek_direct() {
- Some(NodeKind::LeftParen) | Some(NodeKind::LeftBracket) => {
+ Some(NodeKind::LeftParen | NodeKind::LeftBracket) => {
args(p, true);
Ok(())
}
@@ -530,7 +534,7 @@ fn call(p: &mut Parser, callee: &Marker) -> ParseResult {
fn args(p: &mut Parser, allow_template: bool) {
p.perform(NodeKind::CallArgs, |p| {
if !allow_template || p.peek_direct() == Some(&NodeKind::LeftParen) {
- p.start_group(Group::Paren, TokenMode::Code);
+ p.start_group(Group::Paren);
collection(p);
p.end_group();
}
@@ -546,7 +550,7 @@ fn with_expr(p: &mut Parser, marker: &Marker) -> ParseResult {
marker.perform(p, NodeKind::WithExpr, |p| {
p.eat_assert(&NodeKind::With);
- if p.peek() == Some(&NodeKind::LeftParen) {
+ if p.at(&NodeKind::LeftParen) {
args(p, false);
Ok(())
} else {
@@ -564,14 +568,14 @@ fn let_expr(p: &mut Parser) -> ParseResult {
let marker = p.marker();
ident(p)?;
- if p.peek() == Some(&NodeKind::With) {
+ if p.at(&NodeKind::With) {
with_expr(p, &marker)?;
} else {
// If a parenthesis follows, this is a function definition.
let has_params = p.peek_direct() == Some(&NodeKind::LeftParen);
if has_params {
p.perform(NodeKind::ClosureParams, |p| {
- p.start_group(Group::Paren, TokenMode::Code);
+ p.start_group(Group::Paren);
let marker = p.marker();
collection(p);
params(p, &marker, true);
@@ -605,7 +609,7 @@ fn if_expr(p: &mut Parser) -> ParseResult {
body(p)?;
if p.eat_if(&NodeKind::Else) {
- if p.peek() == Some(&NodeKind::If) {
+ if p.at(&NodeKind::If) {
if_expr(p)?;
} else {
body(p)?;
@@ -657,7 +661,7 @@ fn import_expr(p: &mut Parser) -> ParseResult {
if !p.eat_if(&NodeKind::Star) {
// This is the list of identifiers scenario.
p.perform(NodeKind::ImportItems, |p| {
- p.start_group(Group::Imports, TokenMode::Code);
+ p.start_group(Group::Imports);
let marker = p.marker();
let items = collection(p).1;
if items == 0 {
@@ -712,6 +716,5 @@ fn body(p: &mut Parser) -> ParseResult {
return Err(());
}
}
-
Ok(())
}
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 4f181821..5d26ff63 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -1,5 +1,4 @@
-use std::ops::Range;
-use std::rc::Rc;
+use std::mem;
use super::{TokenMode, Tokens};
use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind};
@@ -11,88 +10,63 @@ pub(crate) type ParseResult<T = ()> = Result<T, ()>;
/// A convenient token-based parser.
pub struct Parser<'s> {
- /// The parsed file.
- src: &'s str,
/// An iterator over the source tokens.
tokens: Tokens<'s>,
+ /// Whether we are at the end of the file or of a group.
+ eof: bool,
+ /// The current token.
+ current: Option<NodeKind>,
+ /// The end byte index of the last (non-whitespace if in code mode) token.
+ prev_end: usize,
+ /// The start byte index of the peeked token.
+ current_start: usize,
/// The stack of open groups.
groups: Vec<GroupEntry>,
- /// The next token.
- next: Option<NodeKind>,
- /// The peeked token.
- /// (Same as `next` except if we are at the end of group, then `None`).
- peeked: Option<NodeKind>,
- /// The end index of the last (non-whitespace if in code mode) token.
- prev_end: usize,
- /// The start index of the peeked token.
- next_start: usize,
- /// A stack of outer children vectors.
- stack: Vec<Vec<Green>>,
/// The children of the currently built node.
children: Vec<Green>,
}
-/// A logical group of tokens, e.g. `[...]`.
-struct GroupEntry {
- /// The kind of group this is. This decides which tokens will end the group.
- /// For example, a [`Group::Paren`] will be ended by
- /// [`Token::RightParen`].
- pub kind: Group,
- /// The mode the parser was in _before_ the group started (to which we go
- /// back once the group ends).
- pub prev_mode: TokenMode,
-}
-
-/// A group, confined by optional start and end delimiters.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum Group {
- /// A parenthesized group: `(...)`.
- Paren,
- /// A bracketed group: `[...]`.
- Bracket,
- /// A curly-braced group: `{...}`.
- Brace,
- /// A group ended by a semicolon or a line break: `;`, `\n`.
- Stmt,
- /// A group for a single expression, ended by a line break.
- Expr,
- /// A group for import items, ended by a semicolon, line break or `from`.
- Imports,
-}
-
impl<'s> Parser<'s> {
/// Create a new parser for the source string.
pub fn new(src: &'s str) -> Self {
let mut tokens = Tokens::new(src, TokenMode::Markup);
- let next = tokens.next();
+ let current = tokens.next();
Self {
- src,
tokens,
- groups: vec![],
- next: next.clone(),
- peeked: next,
+ eof: current.is_none(),
+ current,
prev_end: 0,
- next_start: 0,
- stack: vec![],
+ current_start: 0,
+ groups: vec![],
children: vec![],
}
}
+ /// End the parsing process and return the last child.
+ pub fn finish(self) -> Vec<Green> {
+ self.children
+ }
+
+ /// Create a new marker.
+ pub fn marker(&mut self) -> Marker {
+ Marker(self.children.len())
+ }
+
/// Perform a subparse that wraps its result in a node with the given kind.
- pub fn perform<T, F>(&mut self, kind: NodeKind, f: F) -> T
+ pub fn perform<F, T>(&mut self, kind: NodeKind, f: F) -> T
where
F: FnOnce(&mut Self) -> T,
{
- let prev = std::mem::take(&mut self.children);
+ let prev = mem::take(&mut self.children);
let output = f(self);
- let mut children = std::mem::replace(&mut self.children, prev);
+ let mut children = mem::replace(&mut self.children, prev);
// Trailing trivia should not be wrapped into the new node.
let mut remains = vec![];
if self.tokens.mode() == TokenMode::Code {
let len = children.len();
for n in (0 .. len).rev() {
- if !self.skip_type(&children[n].kind()) {
+ if !self.is_trivia(&children[n].kind()) {
break;
}
@@ -107,66 +81,36 @@ impl<'s> Parser<'s> {
output
}
- /// Eat and wrap the next token.
- pub fn convert(&mut self, kind: NodeKind) {
- self.eat();
- self.children.last_mut().unwrap().set_kind(kind);
- }
-
- /// End the current node and undo its existence, inling all accumulated
- /// children into its parent.
- pub fn lift(&mut self) {
- let outer = self.stack.pop().unwrap();
- let children = std::mem::replace(&mut self.children, outer);
- self.children.extend(children);
+ /// Whether the end of the source string or group is reached.
+ pub fn eof(&self) -> bool {
+ self.eof
}
- /// Add an error to the current children list.
- fn push_error(&mut self, msg: impl Into<String>) {
- self.children.push(
- GreenData::new(NodeKind::Error(ErrorPosition::Full, msg.into().into()), 0)
- .into(),
- );
- }
+ /// Consume the current token and also trailing trivia if in code mode.
+ pub fn eat(&mut self) {
+ self.prev_end = self.tokens.index();
+ self.bump();
- /// End the parsing process and return the last child.
- pub fn finish(&mut self) -> Rc<GreenNode> {
- match self.children.pop().unwrap() {
- Green::Node(n) => n,
- _ => panic!(),
+ if self.tokens.mode() == TokenMode::Code {
+ // Skip whitespace and comments.
+ while self.current.as_ref().map_or(false, |x| self.is_trivia(x)) {
+ self.bump();
+ }
}
- }
- /// Whether the end of the source string or group is reached.
- pub fn eof(&self) -> bool {
- self.peek().is_none()
+ self.repeek();
}
- /// Consume the next token if it is the given one.
+ /// Eat if the current token it is the given one.
pub fn eat_if(&mut self, t: &NodeKind) -> bool {
- if self.peek() == Some(t) {
+ let at = self.at(t);
+ if at {
self.eat();
- true
- } else {
- false
}
+ at
}
- /// Consume the next token if the closure maps it a to `Some`-variant.
- pub fn eat_map<T, F>(&mut self, f: F) -> Option<T>
- where
- F: FnOnce(&NodeKind) -> Option<T>,
- {
- let token = self.peek()?;
- let mapped = f(token);
- if mapped.is_some() {
- self.eat();
- }
- mapped
- }
-
- /// Consume the next token if it is the given one and produce an error if
- /// not.
+ /// Eat if the current token is the given one and produce an error if not.
pub fn eat_expect(&mut self, t: &NodeKind) -> ParseResult {
let eaten = self.eat_if(t);
if !eaten {
@@ -175,14 +119,13 @@ impl<'s> Parser<'s> {
if eaten { Ok(()) } else { Err(()) }
}
- /// Consume the next token, debug-asserting that it is one of the given ones.
+ /// Eat, debug-asserting that the token is the given one.
pub fn eat_assert(&mut self, t: &NodeKind) {
- let next = self.peek();
- debug_assert_eq!(next, Some(t));
+ debug_assert_eq!(self.peek(), Some(t));
self.eat();
}
- /// Consume tokens while the condition is true.
+ /// Eat tokens while the condition is true.
pub fn eat_while<F>(&mut self, mut f: F)
where
F: FnMut(&NodeKind) -> bool,
@@ -192,68 +135,77 @@ impl<'s> Parser<'s> {
}
}
- /// Peek at the next token without consuming it.
+ /// Eat the current token, but change its type.
+ pub fn convert(&mut self, kind: NodeKind) {
+ let idx = self.children.len();
+ self.eat();
+ if let Some(child) = self.children.get_mut(idx) {
+ child.set_kind(kind);
+ }
+ }
+
+ /// Whether the current token is of the given type.
+ pub fn at(&self, kind: &NodeKind) -> bool {
+ self.peek() == Some(kind)
+ }
+
+ /// Peek at the current token without consuming it.
pub fn peek(&self) -> Option<&NodeKind> {
- self.peeked.as_ref()
+ if self.eof { None } else { self.current.as_ref() }
}
- /// Peek at the next token if it follows immediately after the last one
- /// without any whitespace in between.
+ /// Peek at the current token, if it follows immediately after the last one
+ /// without any trivia in between.
pub fn peek_direct(&self) -> Option<&NodeKind> {
- if self.next_start() == self.prev_end() {
- self.peeked.as_ref()
+ if self.prev_end() == self.current_start() {
+ self.peek()
} else {
None
}
}
- /// Peek at the source of the next token.
+ /// Peek at the source of the current token.
pub fn peek_src(&self) -> &'s str {
- self.get(self.next_start() .. self.next_end())
+ self.tokens.scanner().get(self.current_start() .. self.current_end())
}
/// The byte index at which the last token ended.
///
- /// Refers to the end of the last _non-whitespace_ token in code mode.
+ /// Refers to the end of the last non-trivia token in code mode.
pub fn prev_end(&self) -> usize {
self.prev_end
}
- /// The byte index at which the next token starts.
- pub fn next_start(&self) -> usize {
- self.next_start
+ /// The byte index at which the current token starts.
+ pub fn current_start(&self) -> usize {
+ self.current_start
}
- /// The byte index at which the next token will end.
- ///
- /// Is the same as [`next_start()`][Self::next_start] if `peek()` returns
- /// `None`.
- pub fn next_end(&self) -> usize {
+ /// The byte index at which the current token ends.
+ pub fn current_end(&self) -> usize {
self.tokens.index()
}
/// Determine the column index for the given byte index.
pub fn column(&self, index: usize) -> usize {
- self.tokens.column(index)
- }
-
- /// Slice out part of the source string.
- pub fn get(&self, range: Range<usize>) -> &'s str {
- self.src.get(range).unwrap()
+ self.tokens.scanner().column(index)
}
/// Continue parsing in a group.
///
/// When the end delimiter of the group is reached, all subsequent calls to
- /// `eat()` and `peek()` return `None`. Parsing can only continue with
- /// a matching call to `end_group`.
+ /// `peek()` return `None`. Parsing can only continue with a matching call
+ /// to `end_group`.
///
- /// This panics if the next token does not start the given group.
- pub fn start_group(&mut self, kind: Group, mode: TokenMode) {
+ /// This panics if the current token does not start the given group.
+ pub fn start_group(&mut self, kind: Group) {
self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() });
- self.tokens.set_mode(mode);
- self.repeek();
+ self.tokens.set_mode(match kind {
+ Group::Bracket => TokenMode::Markup,
+ _ => TokenMode::Code,
+ });
+ self.repeek();
match kind {
Group::Paren => self.eat_assert(&NodeKind::LeftParen),
Group::Bracket => self.eat_assert(&NodeKind::LeftBracket),
@@ -268,12 +220,12 @@ impl<'s> Parser<'s> {
///
/// This panics if no group was started.
pub fn end_group(&mut self) {
- let prev_mode = self.tokens.mode();
+ let group_mode = self.tokens.mode();
let group = self.groups.pop().expect("no started group");
self.tokens.set_mode(group.prev_mode);
self.repeek();
- let mut rescan = self.tokens.mode() != prev_mode;
+ let mut rescan = self.tokens.mode() != group_mode;
// Eat the end delimiter if there is one.
if let Some((end, required)) = match group.kind {
@@ -284,7 +236,7 @@ impl<'s> Parser<'s> {
Group::Expr => None,
Group::Imports => None,
} {
- if self.next == Some(end.clone()) {
+ if self.current.as_ref() == Some(&end) {
// Bump the delimeter and return. No need to rescan in this case.
self.eat();
rescan = false;
@@ -295,10 +247,10 @@ impl<'s> Parser<'s> {
// Rescan the peeked token if the mode changed.
if rescan {
- if prev_mode == TokenMode::Code {
+ if group_mode == TokenMode::Code {
let len = self.children.len();
for n in (0 .. len).rev() {
- if !self.skip_type(self.children[n].kind()) {
+ if !self.is_trivia(self.children[n].kind()) {
break;
}
@@ -307,129 +259,55 @@ impl<'s> Parser<'s> {
}
self.tokens.jump(self.prev_end());
- self.prev_end = self.tokens.index().into();
- self.next_start = self.tokens.index().into();
- self.next = self.tokens.next();
+ self.prev_end = self.tokens.index();
+ self.current_start = self.tokens.index();
+ self.current = self.tokens.next();
self.repeek();
}
}
- /// Add an error that `what` was expected at the given span.
- pub fn expected_at(&mut self, what: &str) {
- let mut found = self.children.len();
- for (i, node) in self.children.iter().enumerate().rev() {
- if !self.skip_type(node.kind()) {
- break;
- }
- found = i;
- }
-
- Marker(found).expected_at(self, what);
+ /// Low-level bump that consumes exactly one token without special trivia
+ /// handling.
+ fn bump(&mut self) {
+ let kind = self.current.take().unwrap();
+ let len = self.tokens.index() - self.current_start;
+ self.children.push(GreenData::new(kind, len).into());
+ self.current_start = self.tokens.index();
+ self.current = self.tokens.next();
}
- /// Eat the next token and add an error that it is not the expected `thing`.
- pub fn expected(&mut self, what: &str) {
- match self.peek().cloned() {
- Some(found) => {
- self.perform(
- NodeKind::Error(
- ErrorPosition::Full,
- format!("expected {}, found {}", what, found).into(),
- ),
- Self::eat,
- );
- }
- None => self.expected_at(what),
- }
+ /// Take another look at the current token to recheck whether it ends a
+ /// group.
+ fn repeek(&mut self) {
+ self.eof = match &self.current {
+ Some(NodeKind::RightParen) => self.inside(Group::Paren),
+ Some(NodeKind::RightBracket) => self.inside(Group::Bracket),
+ Some(NodeKind::RightBrace) => self.inside(Group::Brace),
+ Some(NodeKind::Semicolon) => self.inside(Group::Stmt),
+ Some(NodeKind::From) => self.inside(Group::Imports),
+ Some(NodeKind::Space(n)) => *n >= 1 && self.stop_at_newline(),
+ Some(_) => false,
+ None => true,
+ };
}
- /// Eat the next token and add an error that it is unexpected.
- pub fn unexpected(&mut self) {
- match self.peek().cloned() {
- Some(found) => {
- self.perform(
- NodeKind::Error(
- ErrorPosition::Full,
- format!("unexpected {}", found).into(),
- ),
- Self::eat,
- );
- }
- None => self.push_error("unexpected end of file"),
- }
+ /// Returns whether the given type can be skipped over.
+ fn is_trivia(&self, token: &NodeKind) -> bool {
+ Self::is_trivia_ext(token, self.stop_at_newline())
}
/// Returns whether the given type can be skipped over given the current
/// newline mode.
- pub fn skip_type_ext(token: &NodeKind, stop_at_newline: bool) -> bool {
+ fn is_trivia_ext(token: &NodeKind, stop_at_newline: bool) -> bool {
match token {
- NodeKind::Space(n) => n < &1 || !stop_at_newline,
+ NodeKind::Space(n) => *n == 0 || !stop_at_newline,
NodeKind::LineComment => true,
NodeKind::BlockComment => true,
_ => false,
}
}
- /// Returns whether the given type can be skipped over.
- fn skip_type(&self, token: &NodeKind) -> bool {
- Self::skip_type_ext(token, self.stop_at_newline())
- }
-
- /// Consume the next token.
- pub fn eat(&mut self) {
- self.children.push(
- GreenData::new(
- self.next.clone().unwrap(),
- self.tokens.index() - self.next_start,
- )
- .into(),
- );
-
- self.prev_end = self.tokens.index().into();
- self.next_start = self.tokens.index().into();
- self.next = self.tokens.next();
-
- if self.tokens.mode() == TokenMode::Code {
- // Skip whitespace and comments.
- while self.next.as_ref().map_or(false, |x| self.skip_type(x)) {
- self.children.push(
- GreenData::new(
- self.next.clone().unwrap(),
- self.tokens.index() - self.next_start,
- )
- .into(),
- );
-
- self.next_start = self.tokens.index().into();
- self.next = self.tokens.next();
- }
- }
-
- self.repeek();
- }
-
- /// Take another look at the next token to recheck whether it ends a group.
- fn repeek(&mut self) {
- self.peeked = self.next.clone();
- let token = match self.next.as_ref() {
- Some(token) => token,
- None => return,
- };
-
- if match token {
- NodeKind::RightParen => self.inside(Group::Paren),
- NodeKind::RightBracket => self.inside(Group::Bracket),
- NodeKind::RightBrace => self.inside(Group::Brace),
- NodeKind::Semicolon => self.inside(Group::Stmt),
- NodeKind::From => self.inside(Group::Imports),
- NodeKind::Space(n) => n > &0 && self.stop_at_newline(),
- _ => false,
- } {
- self.peeked = None;
- }
- }
-
- /// Whether the active group ends at a newline.
+ /// Whether the active group must end at a newline.
fn stop_at_newline(&self) -> bool {
matches!(
self.groups.last().map(|group| group.kind),
@@ -441,28 +319,76 @@ impl<'s> Parser<'s> {
fn inside(&self, kind: Group) -> bool {
self.groups.iter().any(|g| g.kind == kind)
}
+}
- /// Returns the last child of the current stack frame.
- pub fn last_child(&self) -> Option<&Green> {
- self.children.last()
+/// Error handling.
+impl Parser<'_> {
+ /// Push an error into the children list.
+ pub fn push_error(&mut self, msg: impl Into<EcoString>) {
+ let error = NodeKind::Error(ErrorPosition::Full, msg.into());
+ self.children.push(GreenData::new(error, 0).into());
}
- /// Create a new marker.
- pub fn marker(&mut self) -> Marker {
- Marker(self.children.len())
+ /// Eat the current token and add an error that it is unexpected.
+ pub fn unexpected(&mut self) {
+ match self.peek() {
+ Some(found) => {
+ let msg = format!("unexpected {}", found);
+ let error = NodeKind::Error(ErrorPosition::Full, msg.into());
+ self.perform(error, Self::eat);
+ }
+ None => self.push_error("unexpected end of file"),
+ }
+ }
+
+ /// Eat the current token and add an error that it is not the expected `thing`.
+ pub fn expected(&mut self, thing: &str) {
+ match self.peek() {
+ Some(found) => {
+ let msg = format!("expected {}, found {}", thing, found);
+ let error = NodeKind::Error(ErrorPosition::Full, msg.into());
+ self.perform(error, Self::eat);
+ }
+ None => self.expected_at(thing),
+ }
+ }
+
+ /// Add an error that the `thing` was expected at the end of the last
+ /// non-trivia token.
+ pub fn expected_at(&mut self, thing: &str) {
+ let mut found = self.children.len();
+ for (i, node) in self.children.iter().enumerate().rev() {
+ if !self.is_trivia(node.kind()) {
+ break;
+ }
+ found = i;
+ }
+
+ Marker(found).expected_at(self, thing);
}
}
-/// A marker that indicates where a child may start.
+/// A marker that indicates where a node may start.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Marker(usize);
impl Marker {
- /// Wraps all children in front of the marker.
- pub fn end(&self, p: &mut Parser, kind: NodeKind) {
- let stop_nl = p.stop_at_newline();
+ /// Perform a subparse that wraps all children after the marker in a node
+ /// with the given kind.
+ pub fn perform<T, F>(self, p: &mut Parser, kind: NodeKind, f: F) -> T
+ where
+ F: FnOnce(&mut Parser) -> T,
+ {
+ let success = f(p);
+ self.end(p, kind);
+ success
+ }
+
+ /// Wrap all children after the marker in a node with the given `kind`.
+ pub fn end(self, p: &mut Parser, kind: NodeKind) {
let end = (self.0 .. p.children.len())
.rev()
- .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl))
+ .find(|&i| !p.is_trivia(p.children[i].kind()))
.unwrap_or(self.0)
+ 1;
@@ -472,47 +398,61 @@ impl Marker {
}
/// Wrap all children that do not fulfill the predicate in error nodes.
- pub fn filter_children<F>(&self, p: &mut Parser, f: F)
+ pub fn filter_children<F>(self, p: &mut Parser, f: F)
where
F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>,
{
for child in &mut p.children[self.0 ..] {
- if !((p.tokens.mode() != TokenMode::Code
- || Parser::skip_type_ext(child.kind(), false))
- || child.kind().is_error())
+ if (p.tokens.mode() == TokenMode::Markup
+ || !Parser::is_trivia_ext(child.kind(), false))
+ && !child.kind().is_error()
{
if let Err((pos, msg)) = f(child) {
- let inner = std::mem::take(child);
- *child =
- GreenNode::with_child(NodeKind::Error(pos, msg), inner).into();
+ let error = NodeKind::Error(pos, msg);
+ let inner = mem::take(child);
+ *child = GreenNode::with_child(error, inner).into();
}
}
}
}
/// Insert an error message that `what` was expected at the marker position.
- pub fn expected_at(&self, p: &mut Parser, what: &str) {
- p.children.insert(
- self.0,
- GreenData::new(
- NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()),
- 0,
- )
- .into(),
- );
- }
-
- /// Return a reference to the child after the marker.
- pub fn child_at<'a>(&self, p: &'a Parser) -> Option<&'a Green> {
- p.children.get(self.0)
+ pub fn expected_at(self, p: &mut Parser, what: &str) {
+ let msg = format!("expected {}", what);
+ let error = NodeKind::Error(ErrorPosition::Full, msg.into());
+ p.children.insert(self.0, GreenData::new(error, 0).into());
}
- pub fn perform<T, F>(&self, p: &mut Parser, kind: NodeKind, f: F) -> T
- where
- F: FnOnce(&mut Parser) -> T,
- {
- let success = f(p);
- self.end(p, kind);
- success
+ /// Return a reference to the child directly after the marker.
+ pub fn child_at<'a>(self, p: &'a Parser) -> Option<&'a Green> {
+ p.children.get(self.0)
}
}
+
+/// A logical group of tokens, e.g. `[...]`.
+struct GroupEntry {
+ /// The kind of group this is. This decides which tokens will end the group.
+ /// For example, a [`Group::Paren`] will be ended by
+ /// [`Token::RightParen`].
+ pub kind: Group,
+ /// The mode the parser was in _before_ the group started (to which we go
+ /// back once the group ends).
+ pub prev_mode: TokenMode,
+}
+
+/// A group, confined by optional start and end delimiters.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum Group {
+ /// A bracketed group: `[...]`.
+ Bracket,
+ /// A curly-braced group: `{...}`.
+ Brace,
+ /// A parenthesized group: `(...)`.
+ Paren,
+ /// A group ended by a semicolon or a line break: `;`, `\n`.
+ Stmt,
+ /// A group for a single expression, ended by a line break.
+ Expr,
+ /// A group for import items, ended by a semicolon, line break or `from`.
+ Imports,
+}
diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs
index 3fab98a4..b330dbd6 100644
--- a/src/parse/resolve.rs
+++ b/src/parse/resolve.rs
@@ -172,8 +172,8 @@ mod tests {
test("typst\n it!", "typst", "\n it!");
test("typst\n it!", "typst", "\n it!");
test("abc`", "abc", "`");
- test(" hi", "", " hi");
- test("`", "", "`");
+ test(" hi", "", " hi");
+ test("`", "", "`");
}
#[test]
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index aa28e1f5..494a9f0b 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -57,12 +57,6 @@ impl<'s> Tokens<'s> {
self.s.jump(index);
}
- /// The column of a given index in the source string.
- #[inline]
- pub fn column(&self, index: usize) -> usize {
- self.s.column(index)
- }
-
/// The underlying scanner.
#[inline]
pub fn scanner(&self) -> Scanner<'s> {
@@ -314,7 +308,7 @@ impl<'s> Tokens<'s> {
}
fn raw(&mut self) -> NodeKind {
- let column = self.column(self.s.index() - 1);
+ let column = self.s.column(self.s.index() - 1);
let mut backticks = 1;
while self.s.eat_if('`') && backticks < u8::MAX {
@@ -342,10 +336,8 @@ impl<'s> Tokens<'s> {
}
}
- let terminated = found == backticks;
- let end = self.s.index() - if terminated { found as usize } else { 0 };
-
- if terminated {
+ if found == backticks {
+ let end = self.s.index() - found as usize;
NodeKind::Raw(Rc::new(resolve_raw(
column,
backticks,