summaryrefslogtreecommitdiff
path: root/src/parse
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2021-11-07 22:05:48 +0100
committerLaurenz <laurmaedje@gmail.com>2021-11-08 01:37:49 +0100
commit95866d5fc9ae89a23c5754193c7de5d4fe4873b1 (patch)
treeae408006c29ba31aa62dab7e48e9326316f89fed /src/parse
parent8117ca9950a2027efae133f811a26a4a7bf86a8e (diff)
Tidy up AST
Diffstat (limited to 'src/parse')
-rw-r--r--src/parse/mod.rs75
-rw-r--r--src/parse/parser.rs37
-rw-r--r--src/parse/resolve.rs14
-rw-r--r--src/parse/scanner.rs8
-rw-r--r--src/parse/tokens.rs19
5 files changed, 75 insertions, 78 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index aa616fdf..505482ca 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -13,7 +13,7 @@ pub use tokens::*;
use std::rc::Rc;
use crate::syntax::ast::{Associativity, BinOp, UnOp};
-use crate::syntax::{ErrorPosition, Green, GreenNode, NodeKind};
+use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind};
/// Parse a source file.
pub fn parse(source: &str) -> Rc<GreenNode> {
@@ -197,7 +197,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
p.peek_direct(),
Some(NodeKind::LeftParen | NodeKind::LeftBracket)
) {
- call(p, &marker)?;
+ call(p, marker)?;
continue;
}
@@ -206,7 +206,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
}
if p.at(&NodeKind::With) {
- with_expr(p, &marker)?;
+ with_expr(p, marker)?;
}
let op = match p.peek().and_then(BinOp::from_token) {
@@ -248,7 +248,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult {
// Arrow means this is a closure's lone parameter.
if !atomic && p.at(&NodeKind::Arrow) {
marker.end(p, NodeKind::ClosureParams);
- p.eat();
+ p.eat_assert(&NodeKind::Arrow);
marker.perform(p, NodeKind::Closure, expr)
} else {
Ok(())
@@ -326,14 +326,13 @@ fn parenthesized(p: &mut Parser) -> ParseResult {
// Leading colon makes this a (empty) dictionary.
if colon {
- dict(p, &marker);
+ dict(p, marker);
return Ok(());
}
// Arrow means this is a closure's parameter list.
if p.at(&NodeKind::Arrow) {
- params(p, &marker, true);
- marker.end(p, NodeKind::ClosureParams);
+ params(p, marker);
p.eat_assert(&NodeKind::Arrow);
return marker.perform(p, NodeKind::Closure, expr);
}
@@ -341,8 +340,8 @@ fn parenthesized(p: &mut Parser) -> ParseResult {
// Transform into the identified collection.
match kind {
CollectionKind::Group => marker.end(p, NodeKind::Group),
- CollectionKind::Positional => array(p, &marker),
- CollectionKind::Named => dict(p, &marker),
+ CollectionKind::Positional => array(p, marker),
+ CollectionKind::Named => dict(p, marker),
}
Ok(())
@@ -384,7 +383,7 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) {
items += 1;
if let Some(marker) = missing_coma.take() {
- marker.expected_at(p, "comma");
+ marker.expected(p, "comma");
}
if p.eof() {
@@ -419,12 +418,11 @@ fn item(p: &mut Parser) -> ParseResult<NodeKind> {
if p.at(&NodeKind::Colon) {
marker.perform(p, NodeKind::Named, |p| {
- if matches!(marker.child_at(p).unwrap().kind(), &NodeKind::Ident(_)) {
+ if matches!(marker.peek(p).unwrap().kind(), &NodeKind::Ident(_)) {
p.eat();
expr(p)
} else {
- let error =
- NodeKind::Error(ErrorPosition::Full, "expected identifier".into());
+ let error = NodeKind::Error(ErrorPos::Full, "expected identifier".into());
marker.end(p, error);
p.eat();
expr(p).ok();
@@ -440,15 +438,10 @@ fn item(p: &mut Parser) -> ParseResult<NodeKind> {
/// Convert a collection into an array, producing errors for anything other than
/// expressions.
-fn array(p: &mut Parser, marker: &Marker) {
+fn array(p: &mut Parser, marker: Marker) {
marker.filter_children(p, |x| match x.kind() {
- NodeKind::Named => Err((
- ErrorPosition::Full,
- "expected expression, found named pair".into(),
- )),
- NodeKind::Spread => {
- Err((ErrorPosition::Full, "spreading is not allowed here".into()))
- }
+ NodeKind::Named => Err("expected expression, found named pair"),
+ NodeKind::Spread => Err("spreading is not allowed here"),
_ => Ok(()),
});
marker.end(p, NodeKind::Array);
@@ -456,25 +449,21 @@ fn array(p: &mut Parser, marker: &Marker) {
/// Convert a collection into a dictionary, producing errors for anything other
/// than named pairs.
-fn dict(p: &mut Parser, marker: &Marker) {
+fn dict(p: &mut Parser, marker: Marker) {
marker.filter_children(p, |x| match x.kind() {
+ kind if kind.is_paren() => Ok(()),
NodeKind::Named | NodeKind::Comma | NodeKind::Colon => Ok(()),
- NodeKind::Spread => {
- Err((ErrorPosition::Full, "spreading is not allowed here".into()))
- }
- _ if x.kind().is_paren() => Ok(()),
- _ => Err((
- ErrorPosition::Full,
- "expected named pair, found expression".into(),
- )),
+ NodeKind::Spread => Err("spreading is not allowed here"),
+ _ => Err("expected named pair, found expression"),
});
marker.end(p, NodeKind::Dict);
}
/// Convert a collection into a list of parameters, producing errors for
/// anything other than identifiers, spread operations and named pairs.
-fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) {
+fn params(p: &mut Parser, marker: Marker) {
marker.filter_children(p, |x| match x.kind() {
+ kind if kind.is_paren() => Ok(()),
NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => Ok(()),
NodeKind::Spread
if matches!(
@@ -484,9 +473,9 @@ fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) {
{
Ok(())
}
- _ if allow_parens && x.kind().is_paren() => Ok(()),
- _ => Err((ErrorPosition::Full, "expected identifier".into())),
+ _ => Err("expected identifier"),
});
+ marker.end(p, NodeKind::ClosureParams);
}
// Parse a template block: `[...]`.
@@ -517,7 +506,7 @@ fn block(p: &mut Parser) {
}
/// Parse a function call.
-fn call(p: &mut Parser, callee: &Marker) -> ParseResult {
+fn call(p: &mut Parser, callee: Marker) -> ParseResult {
callee.perform(p, NodeKind::Call, |p| match p.peek_direct() {
Some(NodeKind::LeftParen | NodeKind::LeftBracket) => {
args(p, true);
@@ -546,7 +535,7 @@ fn args(p: &mut Parser, allow_template: bool) {
}
/// Parse a with expression.
-fn with_expr(p: &mut Parser, marker: &Marker) -> ParseResult {
+fn with_expr(p: &mut Parser, marker: Marker) -> ParseResult {
marker.perform(p, NodeKind::WithExpr, |p| {
p.eat_assert(&NodeKind::With);
@@ -569,18 +558,16 @@ fn let_expr(p: &mut Parser) -> ParseResult {
ident(p)?;
if p.at(&NodeKind::With) {
- with_expr(p, &marker)?;
+ with_expr(p, marker)?;
} else {
// If a parenthesis follows, this is a function definition.
let has_params = p.peek_direct() == Some(&NodeKind::LeftParen);
if has_params {
- p.perform(NodeKind::ClosureParams, |p| {
- p.start_group(Group::Paren);
- let marker = p.marker();
- collection(p);
- params(p, &marker, true);
- p.end_group();
- });
+ let marker = p.marker();
+ p.start_group(Group::Paren);
+ collection(p);
+ p.end_group();
+ params(p, marker);
}
if p.eat_if(&NodeKind::Eq) {
@@ -671,7 +658,7 @@ fn import_expr(p: &mut Parser) -> ParseResult {
marker.filter_children(p, |n| match n.kind() {
NodeKind::Ident(_) | NodeKind::Comma => Ok(()),
- _ => Err((ErrorPosition::Full, "expected identifier".into())),
+ _ => Err("expected identifier"),
});
});
};
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index a30895ad..5ebc2c17 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -1,7 +1,7 @@
use std::mem;
use super::{TokenMode, Tokens};
-use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind};
+use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind};
use crate::util::EcoString;
/// Allows parser methods to use the try operator. Not exposed as the parser
@@ -131,11 +131,9 @@ impl<'s> Parser<'s> {
/// Eat the current token, but change its type.
pub fn convert(&mut self, kind: NodeKind) {
- let idx = self.children.len();
+ let marker = self.marker();
self.eat();
- if let Some(child) = self.children.get_mut(idx) {
- child.set_kind(kind);
- }
+ marker.convert(self, kind);
}
/// Whether the current token is of the given type.
@@ -321,7 +319,7 @@ impl<'s> Parser<'s> {
impl Parser<'_> {
/// Push an error into the children list.
pub fn push_error(&mut self, msg: impl Into<EcoString>) {
- let error = NodeKind::Error(ErrorPosition::Full, msg.into());
+ let error = NodeKind::Error(ErrorPos::Full, msg.into());
self.children.push(GreenData::new(error, 0).into());
}
@@ -330,7 +328,7 @@ impl Parser<'_> {
match self.peek() {
Some(found) => {
let msg = format!("unexpected {}", found);
- let error = NodeKind::Error(ErrorPosition::Full, msg.into());
+ let error = NodeKind::Error(ErrorPos::Full, msg.into());
self.perform(error, Self::eat);
}
None => self.push_error("unexpected end of file"),
@@ -342,7 +340,7 @@ impl Parser<'_> {
match self.peek() {
Some(found) => {
let msg = format!("expected {}, found {}", thing, found);
- let error = NodeKind::Error(ErrorPosition::Full, msg.into());
+ let error = NodeKind::Error(ErrorPos::Full, msg.into());
self.perform(error, Self::eat);
}
None => self.expected_at(thing),
@@ -352,7 +350,7 @@ impl Parser<'_> {
/// Add an error that the `thing` was expected at the end of the last
/// non-trivia token.
pub fn expected_at(&mut self, thing: &str) {
- Marker(self.trivia_start()).expected_at(self, thing);
+ Marker(self.trivia_start()).expected(self, thing);
}
}
@@ -384,15 +382,15 @@ impl Marker {
/// Wrap all children that do not fulfill the predicate in error nodes.
pub fn filter_children<F>(self, p: &mut Parser, f: F)
where
- F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>,
+ F: Fn(&Green) -> Result<(), &'static str>,
{
for child in &mut p.children[self.0 ..] {
if (p.tokens.mode() == TokenMode::Markup
|| !Parser::is_trivia_ext(child.kind(), false))
&& !child.kind().is_error()
{
- if let Err((pos, msg)) = f(child) {
- let error = NodeKind::Error(pos, msg);
+ if let Err(msg) = f(child) {
+ let error = NodeKind::Error(ErrorPos::Full, msg.into());
let inner = mem::take(child);
*child = GreenNode::with_child(error, inner).into();
}
@@ -401,16 +399,23 @@ impl Marker {
}
/// Insert an error message that `what` was expected at the marker position.
- pub fn expected_at(self, p: &mut Parser, what: &str) {
+ pub fn expected(self, p: &mut Parser, what: &str) {
let msg = format!("expected {}", what);
- let error = NodeKind::Error(ErrorPosition::Full, msg.into());
+ let error = NodeKind::Error(ErrorPos::Full, msg.into());
p.children.insert(self.0, GreenData::new(error, 0).into());
}
- /// Return a reference to the child directly after the marker.
- pub fn child_at<'a>(self, p: &'a Parser) -> Option<&'a Green> {
+ /// Peek at the child directly after the marker.
+ pub fn peek<'a>(self, p: &'a Parser) -> Option<&'a Green> {
p.children.get(self.0)
}
+
+ /// Convert the child directly after marker.
+ pub fn convert(self, p: &mut Parser, kind: NodeKind) {
+ if let Some(child) = p.children.get_mut(self.0) {
+ child.convert(kind);
+ }
+ }
}
/// A logical group of tokens, e.g. `[...]`.
diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs
index b330dbd6..6719f41d 100644
--- a/src/parse/resolve.rs
+++ b/src/parse/resolve.rs
@@ -1,4 +1,4 @@
-use super::{is_newline, Scanner};
+use super::{is_ident, is_newline, Scanner};
use crate::syntax::RawData;
use crate::util::EcoString;
@@ -51,7 +51,7 @@ pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawData {
let (tag, inner) = split_at_lang_tag(text);
let (text, block) = trim_and_split_raw(column, inner);
RawData {
- lang: Some(tag.into()),
+ lang: is_ident(tag).then(|| tag.into()),
text: text.into(),
backticks,
block,
@@ -201,15 +201,15 @@ mod tests {
// More than one backtick with lang tag.
test(0, 2, "js alert()", Some("js"), "alert()", false);
test(0, 3, "py quit(\n\n)", Some("py"), "quit(\n\n)", true);
- test(0, 2, "♥", Some("♥"), "", false);
+ test(0, 2, "♥", None, "", false);
// Trimming of whitespace (tested more thoroughly in separate test).
- test(0, 2, " a", Some(""), "a", false);
- test(0, 2, " a", Some(""), " a", false);
- test(0, 2, " \na", Some(""), "a", true);
+ test(0, 2, " a", None, "a", false);
+ test(0, 2, " a", None, " a", false);
+ test(0, 2, " \na", None, "a", true);
// Dedenting
- test(2, 3, " def foo():\n bar()", Some(""), "def foo():\n bar()", true);
+ test(2, 3, " def foo():\n bar()", None, "def foo():\n bar()", true);
}
#[test]
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
index 92a2333d..ea06a2e0 100644
--- a/src/parse/scanner.rs
+++ b/src/parse/scanner.rs
@@ -182,7 +182,13 @@ pub fn is_newline(character: char) -> bool {
)
}
-/// Whether a string is a valid identifier.
+/// Whether a string is a valid unicode identifier.
+///
+/// In addition to what is specified in the [Unicode Standard][uax31], we allow:
+/// - `_` as a starting character,
+/// - `_` and `-` as continuing characters.
+///
+/// [uax31]: http://www.unicode.org/reports/tr31/
#[inline]
pub fn is_ident(string: &str) -> bool {
let mut chars = string.chars();
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 494a9f0b..1523cd64 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -236,20 +236,19 @@ impl<'s> Tokens<'s> {
'u' if self.s.rest().starts_with("u{") => {
self.s.eat_assert('u');
self.s.eat_assert('{');
- let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into();
-
+ let sequence = self.s.eat_while(|c| c.is_ascii_alphanumeric());
if self.s.eat_if('}') {
if let Some(c) = resolve_hex(&sequence) {
NodeKind::UnicodeEscape(c)
} else {
NodeKind::Error(
- ErrorPosition::Full,
+ ErrorPos::Full,
"invalid unicode escape sequence".into(),
)
}
} else {
NodeKind::Error(
- ErrorPosition::End,
+ ErrorPos::End,
"expected closing brace".into(),
)
}
@@ -348,7 +347,7 @@ impl<'s> Tokens<'s> {
let noun = if remaining == 1 { "backtick" } else { "backticks" };
NodeKind::Error(
- ErrorPosition::End,
+ ErrorPos::End,
if found == 0 {
format!("expected {} {}", remaining, noun)
} else {
@@ -396,7 +395,7 @@ impl<'s> Tokens<'s> {
}))
} else {
NodeKind::Error(
- ErrorPosition::End,
+ ErrorPos::End,
if !display || (!escaped && dollar) {
"expected closing dollar sign"
} else {
@@ -487,7 +486,7 @@ impl<'s> Tokens<'s> {
if self.s.eat_if('"') {
NodeKind::Str(string)
} else {
- NodeKind::Error(ErrorPosition::End, "expected quote".into())
+ NodeKind::Error(ErrorPos::End, "expected quote".into())
}
}
@@ -555,7 +554,7 @@ mod tests {
use super::*;
- use ErrorPosition::*;
+ use ErrorPos::*;
use NodeKind::*;
use Option::None;
use TokenMode::{Code, Markup};
@@ -564,7 +563,7 @@ mod tests {
NodeKind::UnicodeEscape(c)
}
- fn Error(pos: ErrorPosition, message: &str) -> NodeKind {
+ fn Error(pos: ErrorPos, message: &str) -> NodeKind {
NodeKind::Error(pos, message.into())
}
@@ -881,7 +880,7 @@ mod tests {
// Test more backticks.
t!(Markup: "``nope``" => Raw("", None, 1, false), Text("nope"), Raw("", None, 1, false));
- t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, false));
+ t!(Markup: "````🚀````" => Raw("", None, 4, false));
t!(Markup[""]: "`````👩‍🚀````noend" => Error(End, "expected 5 backticks"));
t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, false), Raw("", None, 1, false));
}