Dynamic values, Types, Arrays, and Dictionaries 🚀

- Identifiers are now evaluated as variables instead of being plain values - Constants like `left` or `bold` are stored as dynamic values containing the respective rust types - We now distinguish between arrays and dictionaries to make things more intuitive (at the cost of a bit more complex parsing) - Spans were removed from collections (arrays, dictionaries), function arguments still have spans for the top-level values to enable good diagnostics
author: Laurenz <laurmaedje@gmail.com> 2021-01-02 19:37:10 +0100
committer: Laurenz <laurmaedje@gmail.com> 2021-01-02 19:37:10 +0100
commit: 1c40dc42e7bc7b799b77f06d25414aca59a044ba (patch)
tree: ea8bdedaebf59f5bc601346b0108236c7264a29d /src/parse
parent: 8cad78481cd52680317032c3bb84cacda5666489 (diff)
4 files changed, 293 insertions, 154 deletions
diff --git a/src/parse/collection.rs b/src/parse/collection.rs
new file mode 100644
index 00000000..db267dbe
--- /dev/null
+++ b/src/parse/collection.rs
@@ -0,0 +1,142 @@
+use super::*;
+use crate::diag::Deco;
+
+/// Parse the arguments to a function call.
+pub fn arguments(p: &mut Parser) -> Arguments {
+    collection(p, vec![])
+}
+
+/// Parse a parenthesized group, which can be either of:
+/// - Array literal
+/// - Dictionary literal
+/// - Parenthesized expression
+pub fn parenthesized(p: &mut Parser) -> Expr {
+    p.start_group(Group::Paren);
+    let state = if p.eat_if(Token::Colon) {
+        collection(p, State::Dict(vec![]))
+    } else {
+        collection(p, State::Unknown)
+    };
+    p.end_group();
+    state.into_expr()
+}
+
+/// Parse a collection.
+fn collection<T: Collection>(p: &mut Parser, mut collection: T) -> T {
+    let mut missing_coma = None;
+
+    while !p.eof() {
+        if let Some(arg) = p.span_if(argument) {
+            collection.push_arg(p, arg);
+
+            if let Some(pos) = missing_coma.take() {
+                p.diag_expected_at("comma", pos);
+            }
+
+            if p.eof() {
+                break;
+            }
+
+            let behind = p.last_end();
+            if p.eat_if(Token::Comma) {
+                collection.push_comma();
+            } else {
+                missing_coma = Some(behind);
+            }
+        }
+    }
+
+    collection
+}
+
+/// Parse an expression or a named pair.
+fn argument(p: &mut Parser) -> Option<Argument> {
+    let first = p.span_if(expr)?;
+    if p.eat_if(Token::Colon) {
+        if let Expr::Lit(Lit::Ident(ident)) = first.v {
+            let expr = p.span_if(expr)?;
+            let name = ident.with_span(first.span);
+            p.deco(Deco::Name.with_span(name.span));
+            Some(Argument::Named(Named { name, expr }))
+        } else {
+            p.diag(error!(first.span, "name must be identifier"));
+            expr(p);
+            None
+        }
+    } else {
+        Some(Argument::Pos(first))
+    }
+}
+
+/// Abstraction for comma-separated list of expression / named pairs.
+trait Collection {
+    fn push_arg(&mut self, p: &mut Parser, arg: Spanned<Argument>);
+    fn push_comma(&mut self) {}
+}
+
+impl Collection for Arguments {
+    fn push_arg(&mut self, _: &mut Parser, arg: Spanned<Argument>) {
+        self.push(arg.v);
+    }
+}
+
+/// State of collection parsing.
+#[derive(Debug)]
+enum State {
+    Unknown,
+    Expr(Spanned<Expr>),
+    Array(Array),
+    Dict(Dict),
+}
+
+impl State {
+    fn into_expr(self) -> Expr {
+        match self {
+            Self::Unknown => Expr::Lit(Lit::Array(vec![])),
+            Self::Expr(expr) => expr.v,
+            Self::Array(array) => Expr::Lit(Lit::Array(array)),
+            Self::Dict(dict) => Expr::Lit(Lit::Dict(dict)),
+        }
+    }
+}
+
+impl Collection for State {
+    fn push_arg(&mut self, p: &mut Parser, arg: Spanned<Argument>) {
+        match self {
+            Self::Unknown => match arg.v {
+                Argument::Pos(expr) => *self = Self::Expr(expr),
+                Argument::Named(named) => *self = Self::Dict(vec![named]),
+            },
+            Self::Expr(prev) => match arg.v {
+                Argument::Pos(expr) => *self = Self::Array(vec![take(prev), expr]),
+                Argument::Named(_) => diag(p, arg),
+            },
+            Self::Array(array) => match arg.v {
+                Argument::Pos(expr) => array.push(expr),
+                Argument::Named(_) => diag(p, arg),
+            },
+            Self::Dict(dict) => match arg.v {
+                Argument::Pos(_) => diag(p, arg),
+                Argument::Named(named) => dict.push(named),
+            },
+        }
+    }
+
+    fn push_comma(&mut self) {
+        if let Self::Expr(expr) = self {
+            *self = Self::Array(vec![take(expr)]);
+        }
+    }
+}
+
+fn take(expr: &mut Spanned<Expr>) -> Spanned<Expr> {
+    // Replace with anything, it's overwritten anyway.
+    std::mem::replace(expr, Spanned::zero(Expr::Lit(Lit::Bool(false))))
+}
+
+fn diag(p: &mut Parser, arg: Spanned<Argument>) {
+    p.diag(error!(arg.span, "{}", match arg.v {
+        Argument::Pos(_) => "expected named pair, found expression",
+        Argument::Named(_) => "expected expression, found named pair",
+    }));
+}
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 7880dd7a..912a34d0 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -1,5 +1,6 @@
 //! Parsing and tokenization.
 
+mod collection;
 mod lines;
 mod parser;
 mod resolve;
@@ -15,10 +16,11 @@ pub use tokens::*;
 use std::str::FromStr;
 
 use crate::color::RgbaColor;
-use crate::diag::{Deco, Pass};
-use crate::eval::DictKey;
+use crate::diag::Pass;
 use crate::syntax::*;
 
+use collection::{arguments, parenthesized};
+
 /// Parse a string of source code.
 pub fn parse(src: &str) -> Pass<SynTree> {
     let mut p = Parser::new(src);
@@ -153,6 +155,9 @@ fn block_expr(p: &mut Parser) -> Option<Expr> {
     p.push_mode(TokenMode::Header);
     p.start_group(Group::Brace);
     let expr = expr(p);
+    while !p.eof() {
+        p.diag_unexpected();
+    }
     p.pop_mode();
     p.end_group();
     expr
@@ -161,7 +166,7 @@ fn block_expr(p: &mut Parser) -> Option<Expr> {
 /// Parse a parenthesized function call.
 fn paren_call(p: &mut Parser, name: Spanned<Ident>) -> ExprCall {
     p.start_group(Group::Paren);
-    let args = p.span(|p| dict_contents(p).0);
+    let args = p.span(arguments);
     p.end_group();
     ExprCall { name, args }
 }
@@ -184,16 +189,16 @@ fn bracket_call(p: &mut Parser) -> ExprCall {
     p.end_group();
 
     if p.peek() == Some(Token::LeftBracket) {
-        let expr = p.span(|p| Expr::Lit(Lit::Content(bracket_body(p))));
-        inner.span.expand(expr.span);
-        inner.v.args.v.0.push(LitDictEntry { key: None, expr });
+        let body = p.span(|p| Expr::Lit(Lit::Content(bracket_body(p))));
+        inner.span.expand(body.span);
+        inner.v.args.v.push(Argument::Pos(body));
     }
 
     while let Some(mut top) = outer.pop() {
         let span = inner.span;
         let node = inner.map(|c| SynNode::Expr(Expr::Call(c)));
         let expr = Expr::Lit(Lit::Content(vec![node])).with_span(span);
-        top.v.args.v.0.push(LitDictEntry { key: None, expr });
+        top.v.args.v.push(Argument::Pos(expr));
         inner = top;
     }
 
@@ -215,9 +220,9 @@ fn bracket_subheader(p: &mut Parser) -> ExprCall {
         Ident(String::new()).with_span(start)
     });
 
-    let args = p.span(|p| dict_contents(p).0);
-
+    let args = p.span(arguments);
     p.end_group();
+
     ExprCall { name, args }
 }
 
@@ -231,75 +236,6 @@ fn bracket_body(p: &mut Parser) -> SynTree {
     tree
 }
 
-/// Parse the contents of a dictionary.
-fn dict_contents(p: &mut Parser) -> (LitDict, bool) {
-    let mut dict = LitDict::new();
-    let mut missing_coma = None;
-    let mut comma_and_keyless = true;
-
-    while !p.eof() {
-        if let Some(entry) = dict_entry(p) {
-            let behind = entry.expr.span.end;
-            if let Some(pos) = missing_coma.take() {
-                p.diag_expected_at("comma", pos);
-            }
-
-            if let Some(key) = &entry.key {
-                comma_and_keyless = false;
-                p.deco(Deco::Name.with_span(key.span));
-            }
-
-            dict.0.push(entry);
-            if p.eof() {
-                break;
-            }
-
-            if p.eat_if(Token::Comma) {
-                comma_and_keyless = false;
-            } else {
-                missing_coma = Some(behind);
-            }
-        }
-    }
-
-    let coercible = comma_and_keyless && !dict.0.is_empty();
-    (dict, coercible)
-}
-
-/// Parse a single entry in a dictionary.
-fn dict_entry(p: &mut Parser) -> Option<LitDictEntry> {
-    if let Some(ident) = p.span_if(ident) {
-        match p.peek() {
-            // Key-value pair.
-            Some(Token::Colon) => {
-                p.eat_assert(Token::Colon);
-                p.span_if(expr).map(|expr| LitDictEntry {
-                    key: Some(ident.map(|id| DictKey::Str(id.0))),
-                    expr,
-                })
-            }
-
-            // Function call.
-            Some(Token::LeftParen) => Some(LitDictEntry {
-                key: None,
-                expr: {
-                    let start = ident.span.start;
-                    let call = paren_call(p, ident);
-                    Expr::Call(call).with_span(start .. p.last_end())
-                },
-            }),
-
-            // Just an identifier.
-            _ => Some(LitDictEntry {
-                key: None,
-                expr: ident.map(|id| Expr::Lit(Lit::Ident(id))),
-            }),
-        }
-    } else {
-        p.span_if(expr).map(|expr| LitDictEntry { key: None, expr })
-    }
-}
-
 /// Parse an expression: `term (+ term)*`.
 fn expr(p: &mut Parser) -> Option<Expr> {
     binops(p, term, |token| match token {
@@ -418,19 +354,6 @@ fn content(p: &mut Parser) -> SynTree {
     tree
 }
 
-/// Parse a parenthesized expression: `(a + b)`, `(1, name: "value").
-fn parenthesized(p: &mut Parser) -> Expr {
-    p.start_group(Group::Paren);
-    let (dict, coercible) = dict_contents(p);
-    let expr = if coercible {
-        dict.0.into_iter().next().expect("dict is coercible").expr.v
-    } else {
-        Expr::Lit(Lit::Dict(dict))
-    };
-    p.end_group();
-    expr
-}
-
 /// Parse an identifier.
 fn ident(p: &mut Parser) -> Option<Ident> {
     p.eat_map(|token| match token {
diff --git a/src/parse/tests.rs b/src/parse/tests.rs
index 230a5dba..0c8998b5 100644
--- a/src/parse/tests.rs
+++ b/src/parse/tests.rs
@@ -5,7 +5,6 @@ use std::fmt::Debug;
 use super::parse;
 use crate::color::RgbaColor;
 use crate::diag::{Diag, Level, Pass};
-use crate::eval::DictKey;
 use crate::geom::Unit;
 use crate::syntax::*;
 
@@ -154,21 +153,38 @@ fn Unary(op: impl Into<Spanned<UnOp>>, expr: impl Into<Spanned<Expr>>) -> Expr {
     })
 }
 
+macro_rules! Array {
+    (@$($expr:expr),* $(,)?) => {
+        vec![$(into!($expr)),*]
+    };
+    ($($tts:tt)*) => (Expr::Lit(Lit::Array(Array![@$($tts)*])));
+}
+
 macro_rules! Dict {
-    (@$($a:expr $(=> $b:expr)?),* $(,)?) => {
-        LitDict(vec![$(#[allow(unused)] {
-            let key: Option<Spanned<DictKey>> = None;
-            let expr = $a;
-            $(
-                let key = Some(into!($a).map(|s: &str| s.into()));
-                let expr = $b;
-            )?
-            LitDictEntry { key, expr: into!(expr) }
-        }),*])
+    (@$($name:expr => $expr:expr),* $(,)?) => {
+        vec![$(Named {
+            name: into!($name).map(|s: &str| Ident(s.into())),
+            expr: into!($expr)
+        }),*]
     };
     ($($tts:tt)*) => (Expr::Lit(Lit::Dict(Dict![@$($tts)*])));
 }
 
+macro_rules! Args {
+    (@$a:expr) => {
+        Argument::Pos(into!($a))
+    };
+    (@$a:expr => $b:expr) => {
+        Argument::Named(Named {
+            name: into!($a).map(|s: &str| Ident(s.into())),
+            expr: into!($b)
+        })
+    };
+    ($($a:expr $(=> $b:expr)?),* $(,)?) => {
+        vec![$(Args!(@$a $(=> $b)?)),*]
+    };
+}
+
 macro_rules! Content {
     (@$($node:expr),* $(,)?) => (vec![$(into!($node)),*]);
     ($($tts:tt)*) => (Expr::Lit(Lit::Content(Content![@$($tts)*])));
@@ -188,10 +204,6 @@ macro_rules! Call {
     ($($tts:tt)*) => (SynNode::Expr(Call!(@$($tts)*)));
 }
 
-macro_rules! Args {
-    ($($tts:tt)*) => (Dict![@$($tts)*]);
-}
-
 #[test]
 fn test_parse_comments() {
     // In body.
@@ -316,10 +328,9 @@ fn test_parse_groups() {
         errors: [S(1..2, "expected function name, found closing paren"),
                  S(2..2, "expected closing bracket")]);
 
-    t!("[v {]}"
-        nodes: [Call!("v", Args![Content![]])],
-        errors: [S(4..4, "expected closing brace"),
-                 S(5..6, "unexpected closing brace")]);
+    t!("[v {*]_"
+        nodes: [Call!("v", Args![Content![Strong]]), Emph],
+        errors: [S(5..5, "expected closing brace")]);
 
     // Test brace group.
     t!("{1 + [}"
@@ -329,7 +340,7 @@ fn test_parse_groups() {
 
     // Test subheader group.
     t!("[v (|u )]"
-        nodes: [Call!("v", Args![Dict![], Content![Call!("u")]])],
+        nodes: [Call!("v", Args![Array![], Content![Call!("u")]])],
         errors: [S(4..4, "expected closing paren"),
                  S(7..8, "expected expression, found closing paren")]);
 }
@@ -348,6 +359,12 @@ fn test_parse_blocks() {
         nodes: [],
         errors: [S(1..1, "expected expression"),
                  S(3..5, "expected expression, found invalid token")]);
+
+    // Too much stuff.
+    t!("{1 #{} end"
+        nodes: [Block(Int(1)), Space, Text("end")],
+        errors: [S(3..4, "unexpected hex value"),
+                 S(4..5, "unexpected opening brace")]);
 }
 
 #[test]
@@ -385,7 +402,7 @@ fn test_parse_bracket_funcs() {
         nodes: [Call!("", Args![Int(1)])],
         errors: [S(1..2, "expected function name, found hex value")]);
 
-    // String header eats closing bracket.
+    // String in header eats closing bracket.
     t!(r#"[v "]"#
         nodes: [Call!("v", Args![Str("]")])],
         errors: [S(5..5, "expected quote"),
@@ -400,8 +417,8 @@ fn test_parse_bracket_funcs() {
 #[test]
 fn test_parse_chaining() {
     // Basic.
-    t!("[a | b]"     Call!("a", Args![Content![Call!("b")]]));
-    t!("[a | b | c]" Call!("a", Args![Content![
+    t!("[a | b]" Call!("a", Args![Content![Call!("b")]]));
+    t!("[a|b|c]" Call!("a", Args![Content![
         Call!("b", Args![Content![Call!("c")]])
     ]]));
 
@@ -428,16 +445,14 @@ fn test_parse_chaining() {
 #[test]
 fn test_parse_arguments() {
     // Bracket functions.
-    t!("[v 1]"   Call!("v", Args![Int(1)]));
-    t!("[v 1,]"  Call!("v", Args![Int(1)]));
     t!("[v a]"   Call!("v", Args![Id("a")]));
-    t!("[v a,]"  Call!("v", Args![Id("a")]));
+    t!("[v 1,]"  Call!("v", Args![Int(1)]));
     t!("[v a:2]" Call!("v", Args!["a" => Int(2)]));
 
-    // Parenthesized function with nested dictionary literal.
+    // Parenthesized function with nested array literal.
     t!(r#"{f(1, a: (2, 3), #004, b: "five")}"# Block(Call!(@"f", Args![
         Int(1),
-        "a" => Dict![Int(2), Int(3)],
+        "a" => Array![Int(2), Int(3)],
         Color(RgbaColor::new(0, 0, 0x44, 0xff)),
         "b" => Str("five"),
     ])));
@@ -447,56 +462,111 @@ fn test_parse_arguments() {
         nodes: [Call!("v", Args![])],
         errors: [S(3..5, "expected expression, found end of block comment")]);
 
+    // Bad expression.
+    t!("[v a:1:]"
+        nodes: [Call!("v", Args!["a" => Int(1)])],
+        errors: [S(6..7, "expected expression, found colon")]);
+
     // Missing comma between arguments.
     t!("[v 1 2]"
         nodes: [Call!("v", Args![Int(1), Int(2)])],
         errors: [S(4..4, "expected comma")]);
 
-    // Missing expression after name.
-    t!("[v a:]"
-        nodes: [Call!("v", Args![])],
-        errors: [S(5..5, "expected expression")]);
-
-    // Bad expression after name.
-    t!("[v a:1:]"
-        nodes: [Call!("v", Args!["a" => Int(1)])],
-        errors: [S(6..7, "expected expression, found colon")]);
-
-    // Name has to be identifier. Number parsed as positional argument.
+    // Name has to be identifier.
     t!("[v 1:]"
-        nodes: [Call!("v", Args![Int(1)])],
-        errors: [S(4..5, "expected expression, found colon")]);
+        nodes: [Call!("v", Args![])],
+        errors: [S(3..4, "name must be identifier"),
+                 S(5..5, "expected expression")]);
 
-    // Parsed as two positional arguments.
+    // Name has to be identifier.
     t!("[v 1:2]"
-        nodes: [Call!("v", Args![Int(1), Int(2)])],
-        errors: [S(4..5, "expected expression, found colon"),
-                 S(4..4, "expected comma")]);
+        nodes: [Call!("v", Args![])],
+        errors: [S(3..4, "name must be identifier")]);
 }
 
 #[test]
-fn test_parse_dict_literals() {
-    // Basic.
-    t!("{()}" Block(Dict![]));
-
-    // With spans.
-    t!("{(1, two: 2)}"
-        nodes: [S(0..13, Block(Dict![
-            S(2..3, Int(1)),
-            S(5..8, "two") => S(10..11, Int(2)),
-        ]))],
+fn test_parse_arrays() {
+    // Empty array.
+    t!("{()}" Block(Array![]));
+
+    // Array with one item and trailing comma + spans.
+    t!("{-(1,)}"
+        nodes: [S(0..7, Block(Unary(
+            S(1..2, Neg),
+            S(2..6, Array![S(3..4, Int(1))])
+        )))],
         spans: true);
 
+    // Array with three items and trailing comma.
+    t!(r#"{("one", 2, #003,)}"# Block(Array![
+        Str("one"),
+        Int(2),
+        Color(RgbaColor::new(0, 0, 0x33, 0xff))
+    ]));
+
     // Unclosed.
     t!("{(}"
-        nodes: [Block(Dict![])],
+        nodes: [Block(Array![])],
         errors: [S(2..2, "expected closing paren")]);
+
+    // Missing comma + invalid token.
+    t!("{(1*/2)}"
+        nodes: [Block(Array![Int(1), Int(2)])],
+        errors: [S(3..5, "expected expression, found end of block comment"),
+                 S(3..3, "expected comma")]);
+
+    // Invalid token.
+    t!("{(1, 1u 2)}"
+        nodes: [Block(Array![Int(1), Int(2)])],
+        errors: [S(5..7, "expected expression, found invalid token")]);
+
+    // Coerced to expression with leading comma.
+    t!("{(,1)}"
+        nodes: [Block(Int(1))],
+        errors: [S(2..3, "expected expression, found comma")]);
+
+    // Missing expression after name makes this an array.
+    t!("{(a:)}"
+        nodes: [Block(Array![])],
+        errors: [S(4..4, "expected expression")]);
+
+    // Expected expression, found named pair.
+    t!("{(1, b: 2)}"
+        nodes: [Block(Array![Int(1)])],
+        errors: [S(5..9, "expected expression, found named pair")]);
+}
+
+#[test]
+fn test_parse_dictionaries() {
+    // Empty dictionary.
+    t!("{(:)}" Block(Dict![]));
+
+    // Dictionary with two pairs + spans.
+    t!("{(one: 1, two: 2)}"
+        nodes: [S(0..18, Block(Dict![
+            S(2..5, "one") => S(7..8, Int(1)),
+            S(10..13, "two") => S(15..16, Int(2)),
+        ]))],
+        spans: true);
+
+    // Expected named pair, found expression.
+    t!("{(a: 1, b)}"
+        nodes: [Block(Dict!["a" => Int(1)])],
+        errors: [S(8..9, "expected named pair, found expression")]);
+
+    // Dictionary marker followed by more stuff.
+    t!("{(:1 b:2, true::)}"
+        nodes: [Block(Dict!["b" => Int(2)])],
+        errors: [S(3..4, "expected named pair, found expression"),
+                 S(4..4, "expected comma"),
+                 S(10..14, "name must be identifier"),
+                 S(15..16, "expected expression, found colon")]);
 }
 
 #[test]
 fn test_parse_expressions() {
-    // Parenthesis.
-    t!("{(x)}" Block(Id("x")));
+    // Parentheses.
+    t!("{(x)}{(1)}" Block(Id("x")), Block(Int(1)));
 
     // Unary operations.
     t!("{-1}"  Block(Unary(Neg, Int(1))));
@@ -561,4 +631,12 @@ fn test_parse_values() {
     t!("{#a5}"
         nodes: [Block(Color(RgbaColor::new(0, 0, 0, 0xff)))],
         errors: [S(1..4, "invalid color")]);
+
+    // Content.
+    t!("{{*Hi*}}" Block(Content![Strong, Text("Hi"), Strong]));
+
+    // Invalid tokens.
+    t!("{1u}"
+        nodes: [],
+        errors: [S(1..3, "expected expression, found invalid token")]);
 }
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index d7919763..a9692a58 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -477,13 +477,9 @@ mod tests {
     }
 
     #[test]
-    fn test_length_from_str_parses_correct_value_and_unit() {
+    fn test_length_from_str() {
         assert_eq!(parse_length("2.5cm"), Some((2.5, Cm)));
         assert_eq!(parse_length("1.e+2cm"), Some((100.0, Cm)));
-    }
-
-    #[test]
-    fn test_length_from_str_works_with_non_ascii_chars() {
         assert_eq!(parse_length("123🚚"), None);
     }
author	Laurenz <laurmaedje@gmail.com>	2021-01-02 19:37:10 +0100
committer	Laurenz <laurmaedje@gmail.com>	2021-01-02 19:37:10 +0100
commit	1c40dc42e7bc7b799b77f06d25414aca59a044ba (patch)
tree	ea8bdedaebf59f5bc601346b0108236c7264a29d /src/parse
parent	8cad78481cd52680317032c3bb84cacda5666489 (diff)