diff options
| author | Laurenz <laurmaedje@gmail.com> | 2021-11-07 22:05:48 +0100 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2021-11-08 01:37:49 +0100 |
| commit | 95866d5fc9ae89a23c5754193c7de5d4fe4873b1 (patch) | |
| tree | ae408006c29ba31aa62dab7e48e9326316f89fed /src/parse | |
| parent | 8117ca9950a2027efae133f811a26a4a7bf86a8e (diff) | |
Tidy up AST
Diffstat (limited to 'src/parse')
| -rw-r--r-- | src/parse/mod.rs | 75 | ||||
| -rw-r--r-- | src/parse/parser.rs | 37 | ||||
| -rw-r--r-- | src/parse/resolve.rs | 14 | ||||
| -rw-r--r-- | src/parse/scanner.rs | 8 | ||||
| -rw-r--r-- | src/parse/tokens.rs | 19 |
5 files changed, 75 insertions, 78 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs index aa616fdf..505482ca 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -13,7 +13,7 @@ pub use tokens::*; use std::rc::Rc; use crate::syntax::ast::{Associativity, BinOp, UnOp}; -use crate::syntax::{ErrorPosition, Green, GreenNode, NodeKind}; +use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind}; /// Parse a source file. pub fn parse(source: &str) -> Rc<GreenNode> { @@ -197,7 +197,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { p.peek_direct(), Some(NodeKind::LeftParen | NodeKind::LeftBracket) ) { - call(p, &marker)?; + call(p, marker)?; continue; } @@ -206,7 +206,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { } if p.at(&NodeKind::With) { - with_expr(p, &marker)?; + with_expr(p, marker)?; } let op = match p.peek().and_then(BinOp::from_token) { @@ -248,7 +248,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { // Arrow means this is a closure's lone parameter. if !atomic && p.at(&NodeKind::Arrow) { marker.end(p, NodeKind::ClosureParams); - p.eat(); + p.eat_assert(&NodeKind::Arrow); marker.perform(p, NodeKind::Closure, expr) } else { Ok(()) @@ -326,14 +326,13 @@ fn parenthesized(p: &mut Parser) -> ParseResult { // Leading colon makes this a (empty) dictionary. if colon { - dict(p, &marker); + dict(p, marker); return Ok(()); } // Arrow means this is a closure's parameter list. if p.at(&NodeKind::Arrow) { - params(p, &marker, true); - marker.end(p, NodeKind::ClosureParams); + params(p, marker); p.eat_assert(&NodeKind::Arrow); return marker.perform(p, NodeKind::Closure, expr); } @@ -341,8 +340,8 @@ fn parenthesized(p: &mut Parser) -> ParseResult { // Transform into the identified collection. match kind { CollectionKind::Group => marker.end(p, NodeKind::Group), - CollectionKind::Positional => array(p, &marker), - CollectionKind::Named => dict(p, &marker), + CollectionKind::Positional => array(p, marker), + CollectionKind::Named => dict(p, marker), } Ok(()) @@ -384,7 +383,7 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { items += 1; if let Some(marker) = missing_coma.take() { - marker.expected_at(p, "comma"); + marker.expected(p, "comma"); } if p.eof() { @@ -419,12 +418,11 @@ fn item(p: &mut Parser) -> ParseResult<NodeKind> { if p.at(&NodeKind::Colon) { marker.perform(p, NodeKind::Named, |p| { - if matches!(marker.child_at(p).unwrap().kind(), &NodeKind::Ident(_)) { + if matches!(marker.peek(p).unwrap().kind(), &NodeKind::Ident(_)) { p.eat(); expr(p) } else { - let error = - NodeKind::Error(ErrorPosition::Full, "expected identifier".into()); + let error = NodeKind::Error(ErrorPos::Full, "expected identifier".into()); marker.end(p, error); p.eat(); expr(p).ok(); @@ -440,15 +438,10 @@ fn item(p: &mut Parser) -> ParseResult<NodeKind> { /// Convert a collection into an array, producing errors for anything other than /// expressions. -fn array(p: &mut Parser, marker: &Marker) { +fn array(p: &mut Parser, marker: Marker) { marker.filter_children(p, |x| match x.kind() { - NodeKind::Named => Err(( - ErrorPosition::Full, - "expected expression, found named pair".into(), - )), - NodeKind::Spread => { - Err((ErrorPosition::Full, "spreading is not allowed here".into())) - } + NodeKind::Named => Err("expected expression, found named pair"), + NodeKind::Spread => Err("spreading is not allowed here"), _ => Ok(()), }); marker.end(p, NodeKind::Array); @@ -456,25 +449,21 @@ fn array(p: &mut Parser, marker: &Marker) { /// Convert a collection into a dictionary, producing errors for anything other /// than named pairs. -fn dict(p: &mut Parser, marker: &Marker) { +fn dict(p: &mut Parser, marker: Marker) { marker.filter_children(p, |x| match x.kind() { + kind if kind.is_paren() => Ok(()), NodeKind::Named | NodeKind::Comma | NodeKind::Colon => Ok(()), - NodeKind::Spread => { - Err((ErrorPosition::Full, "spreading is not allowed here".into())) - } - _ if x.kind().is_paren() => Ok(()), - _ => Err(( - ErrorPosition::Full, - "expected named pair, found expression".into(), - )), + NodeKind::Spread => Err("spreading is not allowed here"), + _ => Err("expected named pair, found expression"), }); marker.end(p, NodeKind::Dict); } /// Convert a collection into a list of parameters, producing errors for /// anything other than identifiers, spread operations and named pairs. -fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) { +fn params(p: &mut Parser, marker: Marker) { marker.filter_children(p, |x| match x.kind() { + kind if kind.is_paren() => Ok(()), NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => Ok(()), NodeKind::Spread if matches!( @@ -484,9 +473,9 @@ fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) { { Ok(()) } - _ if allow_parens && x.kind().is_paren() => Ok(()), - _ => Err((ErrorPosition::Full, "expected identifier".into())), + _ => Err("expected identifier"), }); + marker.end(p, NodeKind::ClosureParams); } // Parse a template block: `[...]`. @@ -517,7 +506,7 @@ fn block(p: &mut Parser) { } /// Parse a function call. -fn call(p: &mut Parser, callee: &Marker) -> ParseResult { +fn call(p: &mut Parser, callee: Marker) -> ParseResult { callee.perform(p, NodeKind::Call, |p| match p.peek_direct() { Some(NodeKind::LeftParen | NodeKind::LeftBracket) => { args(p, true); @@ -546,7 +535,7 @@ fn args(p: &mut Parser, allow_template: bool) { } /// Parse a with expression. -fn with_expr(p: &mut Parser, marker: &Marker) -> ParseResult { +fn with_expr(p: &mut Parser, marker: Marker) -> ParseResult { marker.perform(p, NodeKind::WithExpr, |p| { p.eat_assert(&NodeKind::With); @@ -569,18 +558,16 @@ fn let_expr(p: &mut Parser) -> ParseResult { ident(p)?; if p.at(&NodeKind::With) { - with_expr(p, &marker)?; + with_expr(p, marker)?; } else { // If a parenthesis follows, this is a function definition. let has_params = p.peek_direct() == Some(&NodeKind::LeftParen); if has_params { - p.perform(NodeKind::ClosureParams, |p| { - p.start_group(Group::Paren); - let marker = p.marker(); - collection(p); - params(p, &marker, true); - p.end_group(); - }); + let marker = p.marker(); + p.start_group(Group::Paren); + collection(p); + p.end_group(); + params(p, marker); } if p.eat_if(&NodeKind::Eq) { @@ -671,7 +658,7 @@ fn import_expr(p: &mut Parser) -> ParseResult { marker.filter_children(p, |n| match n.kind() { NodeKind::Ident(_) | NodeKind::Comma => Ok(()), - _ => Err((ErrorPosition::Full, "expected identifier".into())), + _ => Err("expected identifier"), }); }); }; diff --git a/src/parse/parser.rs b/src/parse/parser.rs index a30895ad..5ebc2c17 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,7 +1,7 @@ use std::mem; use super::{TokenMode, Tokens}; -use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; +use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; /// Allows parser methods to use the try operator. Not exposed as the parser @@ -131,11 +131,9 @@ impl<'s> Parser<'s> { /// Eat the current token, but change its type. pub fn convert(&mut self, kind: NodeKind) { - let idx = self.children.len(); + let marker = self.marker(); self.eat(); - if let Some(child) = self.children.get_mut(idx) { - child.set_kind(kind); - } + marker.convert(self, kind); } /// Whether the current token is of the given type. @@ -321,7 +319,7 @@ impl<'s> Parser<'s> { impl Parser<'_> { /// Push an error into the children list. pub fn push_error(&mut self, msg: impl Into<EcoString>) { - let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + let error = NodeKind::Error(ErrorPos::Full, msg.into()); self.children.push(GreenData::new(error, 0).into()); } @@ -330,7 +328,7 @@ impl Parser<'_> { match self.peek() { Some(found) => { let msg = format!("unexpected {}", found); - let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + let error = NodeKind::Error(ErrorPos::Full, msg.into()); self.perform(error, Self::eat); } None => self.push_error("unexpected end of file"), @@ -342,7 +340,7 @@ impl Parser<'_> { match self.peek() { Some(found) => { let msg = format!("expected {}, found {}", thing, found); - let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + let error = NodeKind::Error(ErrorPos::Full, msg.into()); self.perform(error, Self::eat); } None => self.expected_at(thing), @@ -352,7 +350,7 @@ impl Parser<'_> { /// Add an error that the `thing` was expected at the end of the last /// non-trivia token. pub fn expected_at(&mut self, thing: &str) { - Marker(self.trivia_start()).expected_at(self, thing); + Marker(self.trivia_start()).expected(self, thing); } } @@ -384,15 +382,15 @@ impl Marker { /// Wrap all children that do not fulfill the predicate in error nodes. pub fn filter_children<F>(self, p: &mut Parser, f: F) where - F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, + F: Fn(&Green) -> Result<(), &'static str>, { for child in &mut p.children[self.0 ..] { if (p.tokens.mode() == TokenMode::Markup || !Parser::is_trivia_ext(child.kind(), false)) && !child.kind().is_error() { - if let Err((pos, msg)) = f(child) { - let error = NodeKind::Error(pos, msg); + if let Err(msg) = f(child) { + let error = NodeKind::Error(ErrorPos::Full, msg.into()); let inner = mem::take(child); *child = GreenNode::with_child(error, inner).into(); } @@ -401,16 +399,23 @@ impl Marker { } /// Insert an error message that `what` was expected at the marker position. - pub fn expected_at(self, p: &mut Parser, what: &str) { + pub fn expected(self, p: &mut Parser, what: &str) { let msg = format!("expected {}", what); - let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + let error = NodeKind::Error(ErrorPos::Full, msg.into()); p.children.insert(self.0, GreenData::new(error, 0).into()); } - /// Return a reference to the child directly after the marker. - pub fn child_at<'a>(self, p: &'a Parser) -> Option<&'a Green> { + /// Peek at the child directly after the marker. + pub fn peek<'a>(self, p: &'a Parser) -> Option<&'a Green> { p.children.get(self.0) } + + /// Convert the child directly after marker. + pub fn convert(self, p: &mut Parser, kind: NodeKind) { + if let Some(child) = p.children.get_mut(self.0) { + child.convert(kind); + } + } } /// A logical group of tokens, e.g. `[...]`. diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index b330dbd6..6719f41d 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -1,4 +1,4 @@ -use super::{is_newline, Scanner}; +use super::{is_ident, is_newline, Scanner}; use crate::syntax::RawData; use crate::util::EcoString; @@ -51,7 +51,7 @@ pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawData { let (tag, inner) = split_at_lang_tag(text); let (text, block) = trim_and_split_raw(column, inner); RawData { - lang: Some(tag.into()), + lang: is_ident(tag).then(|| tag.into()), text: text.into(), backticks, block, @@ -201,15 +201,15 @@ mod tests { // More than one backtick with lang tag. test(0, 2, "js alert()", Some("js"), "alert()", false); test(0, 3, "py quit(\n\n)", Some("py"), "quit(\n\n)", true); - test(0, 2, "♥", Some("♥"), "", false); + test(0, 2, "♥", None, "", false); // Trimming of whitespace (tested more thoroughly in separate test). - test(0, 2, " a", Some(""), "a", false); - test(0, 2, " a", Some(""), " a", false); - test(0, 2, " \na", Some(""), "a", true); + test(0, 2, " a", None, "a", false); + test(0, 2, " a", None, " a", false); + test(0, 2, " \na", None, "a", true); // Dedenting - test(2, 3, " def foo():\n bar()", Some(""), "def foo():\n bar()", true); + test(2, 3, " def foo():\n bar()", None, "def foo():\n bar()", true); } #[test] diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index 92a2333d..ea06a2e0 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -182,7 +182,13 @@ pub fn is_newline(character: char) -> bool { ) } -/// Whether a string is a valid identifier. +/// Whether a string is a valid unicode identifier. +/// +/// In addition to what is specified in the [Unicode Standard][uax31], we allow: +/// - `_` as a starting character, +/// - `_` and `-` as continuing characters. +/// +/// [uax31]: http://www.unicode.org/reports/tr31/ #[inline] pub fn is_ident(string: &str) -> bool { let mut chars = string.chars(); diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 494a9f0b..1523cd64 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -236,20 +236,19 @@ impl<'s> Tokens<'s> { 'u' if self.s.rest().starts_with("u{") => { self.s.eat_assert('u'); self.s.eat_assert('{'); - let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into(); - + let sequence = self.s.eat_while(|c| c.is_ascii_alphanumeric()); if self.s.eat_if('}') { if let Some(c) = resolve_hex(&sequence) { NodeKind::UnicodeEscape(c) } else { NodeKind::Error( - ErrorPosition::Full, + ErrorPos::Full, "invalid unicode escape sequence".into(), ) } } else { NodeKind::Error( - ErrorPosition::End, + ErrorPos::End, "expected closing brace".into(), ) } @@ -348,7 +347,7 @@ impl<'s> Tokens<'s> { let noun = if remaining == 1 { "backtick" } else { "backticks" }; NodeKind::Error( - ErrorPosition::End, + ErrorPos::End, if found == 0 { format!("expected {} {}", remaining, noun) } else { @@ -396,7 +395,7 @@ impl<'s> Tokens<'s> { })) } else { NodeKind::Error( - ErrorPosition::End, + ErrorPos::End, if !display || (!escaped && dollar) { "expected closing dollar sign" } else { @@ -487,7 +486,7 @@ impl<'s> Tokens<'s> { if self.s.eat_if('"') { NodeKind::Str(string) } else { - NodeKind::Error(ErrorPosition::End, "expected quote".into()) + NodeKind::Error(ErrorPos::End, "expected quote".into()) } } @@ -555,7 +554,7 @@ mod tests { use super::*; - use ErrorPosition::*; + use ErrorPos::*; use NodeKind::*; use Option::None; use TokenMode::{Code, Markup}; @@ -564,7 +563,7 @@ mod tests { NodeKind::UnicodeEscape(c) } - fn Error(pos: ErrorPosition, message: &str) -> NodeKind { + fn Error(pos: ErrorPos, message: &str) -> NodeKind { NodeKind::Error(pos, message.into()) } @@ -881,7 +880,7 @@ mod tests { // Test more backticks. t!(Markup: "``nope``" => Raw("", None, 1, false), Text("nope"), Raw("", None, 1, false)); - t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, false)); + t!(Markup: "````🚀````" => Raw("", None, 4, false)); t!(Markup[""]: "`````👩🚀````noend" => Error(End, "expected 5 backticks")); t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, false), Raw("", None, 1, false)); } |
