Reorganize syntax types into two modules 📦

author: Laurenz <laurmaedje@gmail.com> 2020-09-30 12:38:02 +0200
committer: Laurenz <laurmaedje@gmail.com> 2020-09-30 12:45:33 +0200
commit: bc1b4216a802d09e8d00dd277a0e204d49bcaa7f (patch)
tree: 31dabd48d5062fdd684797ed6053bf279ba67490 /src/parse
parent: fee5170a68a6ef97108d731a4873787894f65a06 (diff)
5 files changed, 2096 insertions, 0 deletions
diff --git a/src/parse/escaping.rs b/src/parse/escaping.rs
new file mode 100644
index 00000000..55b1fe67
--- /dev/null
+++ b/src/parse/escaping.rs
@@ -0,0 +1,243 @@
+use super::is_newline_char;
+
+/// Resolves all escape sequences in a string.
+pub fn unescape_string(string: &str) -> String {
+    let mut iter = string.chars().peekable();
+    let mut out = String::with_capacity(string.len());
+
+    while let Some(c) = iter.next() {
+        if c == '\\' {
+            match iter.next() {
+                Some('\\') => out.push('\\'),
+                Some('"') => out.push('"'),
+                Some('u') if iter.peek() == Some(&'{') => {
+                    iter.next();
+
+                    let mut sequence = String::new();
+                    let terminated = loop {
+                        match iter.peek() {
+                            // TODO: Feedback that closing brace is missing.
+                            Some('}') => {
+                                iter.next();
+                                break true;
+                            }
+                            Some(&c) if c.is_ascii_hexdigit() => {
+                                iter.next();
+                                sequence.push(c);
+                            }
+                            _ => break false,
+                        }
+                    };
+
+                    // TODO: Feedback that escape sequence is wrong.
+                    if let Some(c) = hex_to_char(&sequence) {
+                        out.push(c);
+                    } else {
+                        out.push_str("\\u{");
+                        out.push_str(&sequence);
+                        if terminated {
+                            out.push('}');
+                        }
+                    }
+                }
+                Some('n') => out.push('\n'),
+                Some('t') => out.push('\t'),
+                Some(c) => {
+                    out.push('\\');
+                    out.push(c);
+                }
+                None => out.push('\\'),
+            }
+        } else {
+            out.push(c);
+        }
+    }
+
+    out
+}
+
+/// Resolves all escape sequences in raw markup (between backticks) and splits it into
+/// into lines.
+pub fn unescape_raw(raw: &str) -> Vec<String> {
+    let mut iter = raw.chars();
+    let mut text = String::new();
+
+    while let Some(c) = iter.next() {
+        if c == '\\' {
+            if let Some(c) = iter.next() {
+                if c != '\\' && c != '`' {
+                    text.push('\\');
+                }
+
+                text.push(c);
+            } else {
+                text.push('\\');
+            }
+        } else {
+            text.push(c);
+        }
+    }
+
+    split_lines(&text)
+}
+
+/// Resolves all escape sequences in code markup (between triple backticks) and splits it
+/// into into lines.
+pub fn unescape_code(raw: &str) -> Vec<String> {
+    let mut iter = raw.chars().peekable();
+    let mut text = String::new();
+    let mut backticks = 0u32;
+    let mut update_backtick_count;
+
+    while let Some(c) = iter.next() {
+        update_backtick_count = true;
+
+        if c == '\\' && backticks > 0 {
+            let mut tail = String::new();
+            let mut escape_success = false;
+            let mut backticks_after_slash = 0u32;
+
+            while let Some(&s) = iter.peek() {
+                match s {
+                    '\\' => {
+                        if backticks_after_slash == 0 {
+                            tail.push('\\');
+                        } else {
+                            // Pattern like `\`\` should fail
+                            // escape and just be printed verbantim.
+                            break;
+                        }
+                    }
+                    '`' => {
+                        tail.push(s);
+                        backticks_after_slash += 1;
+                        if backticks_after_slash == 2 {
+                            escape_success = true;
+                            iter.next();
+                            break;
+                        }
+                    }
+                    _ => break,
+                }
+
+                iter.next();
+            }
+
+            if !escape_success {
+                text.push(c);
+                backticks = backticks_after_slash;
+                update_backtick_count = false;
+            } else {
+                backticks = 0;
+            }
+
+            text.push_str(&tail);
+        } else {
+            text.push(c);
+        }
+
+        if update_backtick_count {
+            if c == '`' {
+                backticks += 1;
+            } else {
+                backticks = 0;
+            }
+        }
+    }
+
+    split_lines(&text)
+}
+
+/// Converts a hexademical sequence (without braces or "\u") into a character.
+pub fn hex_to_char(sequence: &str) -> Option<char> {
+    u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32)
+}
+
+/// Splits a string into a vector of lines (respecting Unicode & Windows line breaks).
+pub fn split_lines(text: &str) -> Vec<String> {
+    let mut iter = text.chars().peekable();
+    let mut line = String::new();
+    let mut lines = Vec::new();
+
+    while let Some(c) = iter.next() {
+        if is_newline_char(c) {
+            if c == '\r' && iter.peek() == Some(&'\n') {
+                iter.next();
+            }
+
+            lines.push(std::mem::take(&mut line));
+        } else {
+            line.push(c);
+        }
+    }
+
+    lines.push(line);
+    lines
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    #[rustfmt::skip]
+    fn test_unescape_strings() {
+        fn test(string: &str, expected: &str) {
+            assert_eq!(unescape_string(string), expected.to_string());
+        }
+
+        test(r#"hello world"#,  "hello world");
+        test(r#"hello\nworld"#, "hello\nworld");
+        test(r#"a\"bc"#,        "a\"bc");
+        test(r#"a\u{2603}bc"#,  "a☃bc");
+        test(r#"a\u{26c3bg"#,   "a𦰻g");
+        test(r#"av\u{6797"#,    "av林");
+        test(r#"a\\"#,          "a\\");
+        test(r#"a\\\nbc"#,      "a\\\nbc");
+        test(r#"a\tbc"#,        "a\tbc");
+        test(r"🌎",             "🌎");
+        test(r"🌎\",            r"🌎\");
+        test(r"\🌎",            r"\🌎");
+    }
+
+    #[test]
+    #[rustfmt::skip]
+    fn test_unescape_raws() {
+        fn test(raw: &str, expected: Vec<&str>) {
+            assert_eq!(unescape_raw(raw), expected);
+        }
+
+        test("raw\\`",     vec!["raw`"]);
+        test("raw\\\\`",   vec!["raw\\`"]);
+        test("raw\ntext",  vec!["raw", "text"]);
+        test("a\r\nb",     vec!["a", "b"]);
+        test("a\n\nb",     vec!["a", "", "b"]);
+        test("a\r\x0Bb",   vec!["a", "", "b"]);
+        test("a\r\n\r\nb", vec!["a", "", "b"]);
+        test("raw\\a",     vec!["raw\\a"]);
+        test("raw\\",      vec!["raw\\"]);
+    }
+
+    #[test]
+    #[rustfmt::skip]
+    fn test_unescape_code() {
+        fn test(raw: &str, expected: Vec<&str>) {
+            assert_eq!(unescape_code(raw), expected);
+        }
+
+        test("code\\`",       vec!["code\\`"]);
+        test("code`\\``",     vec!["code```"]);
+        test("code`\\`a",     vec!["code`\\`a"]);
+        test("code``hi`\\``", vec!["code``hi```"]);
+        test("code`\\\\``",   vec!["code`\\``"]);
+        test("code`\\`\\`go", vec!["code`\\`\\`go"]);
+        test("code`\\`\\``",  vec!["code`\\```"]);
+        test("code\ntext",    vec!["code", "text"]);
+        test("a\r\nb",        vec!["a", "b"]);
+        test("a\n\nb",        vec!["a", "", "b"]);
+        test("a\r\x0Bb",      vec!["a", "", "b"]);
+        test("a\r\n\r\nb",    vec!["a", "", "b"]);
+        test("code\\a",       vec!["code\\a"]);
+        test("code\\",        vec!["code\\"]);
+    }
+}
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
new file mode 100644
index 00000000..340e89ea
--- /dev/null
+++ b/src/parse/mod.rs
@@ -0,0 +1,11 @@
+//! Parsing and tokenization.
+
+mod escaping;
+mod parser;
+mod tokenizer;
+
+pub use parser::*;
+pub use tokenizer::*;
+
+#[cfg(test)]
+mod tests;
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
new file mode 100644
index 00000000..2ff30397
--- /dev/null
+++ b/src/parse/parser.rs
@@ -0,0 +1,658 @@
+use std::str::FromStr;
+
+use super::escaping::*;
+use super::*;
+use crate::color::RgbaColor;
+use crate::compute::table::SpannedEntry;
+use crate::syntax::*;
+use crate::{Feedback, Pass};
+
+/// Parse a string of source code.
+pub fn parse(src: &str) -> Pass<SyntaxTree> {
+    Parser::new(src).parse()
+}
+
+struct Parser<'s> {
+    tokens: Tokens<'s>,
+    peeked: Option<Option<Spanned<Token<'s>>>>,
+    delimiters: Vec<(Pos, Token<'static>)>,
+    at_block_or_line_start: bool,
+    feedback: Feedback,
+}
+
+impl<'s> Parser<'s> {
+    fn new(src: &'s str) -> Self {
+        Self {
+            tokens: Tokens::new(src, TokenMode::Body),
+            peeked: None,
+            delimiters: vec![],
+            at_block_or_line_start: true,
+            feedback: Feedback::new(),
+        }
+    }
+
+    fn parse(mut self) -> Pass<SyntaxTree> {
+        let tree = self.parse_body_contents();
+        Pass::new(tree, self.feedback)
+    }
+}
+
+// Typesetting content.
+impl Parser<'_> {
+    fn parse_body_contents(&mut self) -> SyntaxTree {
+        let mut tree = SyntaxTree::new();
+
+        self.at_block_or_line_start = true;
+        while !self.eof() {
+            if let Some(node) = self.parse_node() {
+                tree.push(node);
+            }
+        }
+
+        tree
+    }
+
+    fn parse_node(&mut self) -> Option<Spanned<SyntaxNode>> {
+        let token = self.peek()?;
+        let end = Span::at(token.span.end);
+
+        // Set block or line start to false because most nodes have that effect, but
+        // remember the old value to actually check it for hashtags and because comments
+        // and spaces want to retain it.
+        let was_at_block_or_line_start = self.at_block_or_line_start;
+        self.at_block_or_line_start = false;
+
+        Some(match token.v {
+            // Starting from two newlines counts as a paragraph break, a single
+            // newline does not.
+            Token::Space(n) => {
+                if n == 0 {
+                    self.at_block_or_line_start = was_at_block_or_line_start;
+                } else if n >= 1 {
+                    self.at_block_or_line_start = true;
+                }
+
+                self.with_span(if n >= 2 {
+                    SyntaxNode::Parbreak
+                } else {
+                    SyntaxNode::Spacing
+                })
+            }
+
+            Token::LineComment(_) | Token::BlockComment(_) => {
+                self.at_block_or_line_start = was_at_block_or_line_start;
+                self.eat();
+                return None;
+            }
+
+            Token::LeftBracket => {
+                let call = self.parse_bracket_call(false);
+                self.at_block_or_line_start = false;
+                call.map(SyntaxNode::Call)
+            }
+
+            Token::Star => self.with_span(SyntaxNode::ToggleBolder),
+            Token::Underscore => self.with_span(SyntaxNode::ToggleItalic),
+            Token::Backslash => self.with_span(SyntaxNode::Linebreak),
+
+            Token::Hashtag if was_at_block_or_line_start => {
+                self.parse_heading().map(SyntaxNode::Heading)
+            }
+
+            Token::Raw { raw, terminated } => {
+                if !terminated {
+                    error!(@self.feedback, end, "expected backtick");
+                }
+                self.with_span(SyntaxNode::Raw(unescape_raw(raw)))
+            }
+
+            Token::Code { lang, raw, terminated } => {
+                if !terminated {
+                    error!(@self.feedback, end, "expected backticks");
+                }
+
+                let lang = lang.and_then(|lang| {
+                    if let Some(ident) = Ident::new(lang.v) {
+                        Some(Spanned::new(ident, lang.span))
+                    } else {
+                        error!(@self.feedback, lang.span, "invalid identifier");
+                        None
+                    }
+                });
+
+                let mut lines = unescape_code(raw);
+                let block = lines.len() > 1;
+
+                if lines.last().map(|s| s.is_empty()).unwrap_or(false) {
+                    lines.pop();
+                }
+
+                self.with_span(SyntaxNode::Code(Code { lang, lines, block }))
+            }
+
+            Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())),
+            Token::Hashtag => self.with_span(SyntaxNode::Text("#".to_string())),
+
+            Token::UnicodeEscape { sequence, terminated } => {
+                if !terminated {
+                    error!(@self.feedback, end, "expected closing brace");
+                }
+
+                if let Some(c) = hex_to_char(sequence) {
+                    self.with_span(SyntaxNode::Text(c.to_string()))
+                } else {
+                    error!(@self.feedback, token.span, "invalid unicode escape sequence");
+                    self.eat();
+                    return None;
+                }
+            }
+
+            unexpected => {
+                error!(@self.feedback, token.span, "unexpected {}", unexpected.name());
+                self.eat();
+                return None;
+            }
+        })
+    }
+
+    fn parse_heading(&mut self) -> Spanned<Heading> {
+        let start = self.pos();
+        self.assert(Token::Hashtag);
+
+        let mut level = 0;
+        while self.peekv() == Some(Token::Hashtag) {
+            level += 1;
+            self.eat();
+        }
+
+        let span = Span::new(start, self.pos());
+        let level = Spanned::new(level, span);
+
+        if level.v > 5 {
+            warning!(
+                @self.feedback, level.span,
+                "section depth larger than 6 has no effect",
+            );
+        }
+
+        self.skip_ws();
+
+        let mut tree = SyntaxTree::new();
+        while !self.eof() && !matches!(self.peekv(), Some(Token::Space(n)) if n >= 1) {
+            if let Some(node) = self.parse_node() {
+                tree.push(node);
+            }
+        }
+
+        let span = Span::new(start, self.pos());
+        Spanned::new(Heading { level, tree }, span)
+    }
+}
+
+// Function calls.
+impl Parser<'_> {
+    fn parse_bracket_call(&mut self, chained: bool) -> Spanned<CallExpr> {
+        let before_bracket = self.pos();
+        if !chained {
+            self.start_group(Group::Bracket);
+            self.tokens.push_mode(TokenMode::Header);
+        }
+
+        let before_name = self.pos();
+        self.start_group(Group::Subheader);
+        self.skip_ws();
+        let name = self.parse_ident().unwrap_or_else(|| {
+            self.expected_found_or_at("function name", before_name);
+            Spanned::new(Ident(String::new()), Span::at(before_name))
+        });
+
+        self.skip_ws();
+
+        let mut args = match self.eatv() {
+            Some(Token::Colon) => self.parse_table_contents().0,
+            Some(_) => {
+                self.expected_at("colon", name.span.end);
+                while self.eat().is_some() {}
+                TableExpr::new()
+            }
+            None => TableExpr::new(),
+        };
+
+        self.end_group();
+        self.skip_ws();
+        let (has_chained_child, end) = if self.peek().is_some() {
+            let item = self.parse_bracket_call(true);
+            let span = item.span;
+            let t = vec![item.map(SyntaxNode::Call)];
+            args.push(SpannedEntry::val(Spanned::new(Expr::Tree(t), span)));
+            (true, span.end)
+        } else {
+            self.tokens.pop_mode();
+            (false, self.end_group().end)
+        };
+
+        let start = if chained { before_name } else { before_bracket };
+        let mut span = Span::new(start, end);
+
+        if self.check(Token::LeftBracket) && !has_chained_child {
+            self.start_group(Group::Bracket);
+            self.tokens.push_mode(TokenMode::Body);
+
+            let body = self.parse_body_contents();
+
+            self.tokens.pop_mode();
+            let body_span = self.end_group();
+
+            let expr = Expr::Tree(body);
+            args.push(SpannedEntry::val(Spanned::new(expr, body_span)));
+            span.expand(body_span);
+        }
+
+        Spanned::new(CallExpr { name, args }, span)
+    }
+
+    fn parse_paren_call(&mut self, name: Spanned<Ident>) -> Spanned<CallExpr> {
+        self.start_group(Group::Paren);
+        let args = self.parse_table_contents().0;
+        let args_span = self.end_group();
+        let span = Span::merge(name.span, args_span);
+        Spanned::new(CallExpr { name, args }, span)
+    }
+}
+
+// Tables.
+impl Parser<'_> {
+    fn parse_table_contents(&mut self) -> (TableExpr, bool) {
+        let mut table = TableExpr::new();
+        let mut comma_and_keyless = true;
+
+        while {
+            self.skip_ws();
+            !self.eof()
+        } {
+            let (key, val) = if let Some(ident) = self.parse_ident() {
+                self.skip_ws();
+
+                match self.peekv() {
+                    Some(Token::Equals) => {
+                        self.eat();
+                        self.skip_ws();
+                        if let Some(value) = self.parse_expr() {
+                            (Some(ident), value)
+                        } else {
+                            self.expected("value");
+                            continue;
+                        }
+                    }
+
+                    Some(Token::LeftParen) => {
+                        let call = self.parse_paren_call(ident);
+                        (None, call.map(Expr::Call))
+                    }
+
+                    _ => (None, ident.map(Expr::Ident)),
+                }
+            } else if let Some(value) = self.parse_expr() {
+                (None, value)
+            } else {
+                self.expected("value");
+                continue;
+            };
+
+            let behind = val.span.end;
+            if let Some(key) = key {
+                comma_and_keyless = false;
+                table.insert(key.v.0, SpannedEntry::new(key.span, val));
+                self.feedback
+                    .decorations
+                    .push(Spanned::new(Decoration::TableKey, key.span));
+            } else {
+                table.push(SpannedEntry::val(val));
+            }
+
+            if {
+                self.skip_ws();
+                self.eof()
+            } {
+                break;
+            }
+
+            self.expect_at(Token::Comma, behind);
+            comma_and_keyless = false;
+        }
+
+        let coercable = comma_and_keyless && !table.is_empty();
+        (table, coercable)
+    }
+}
+
+type Binop = fn(Box<Spanned<Expr>>, Box<Spanned<Expr>>) -> Expr;
+
+// Expressions and values.
+impl Parser<'_> {
+    fn parse_expr(&mut self) -> Option<Spanned<Expr>> {
+        self.parse_binops("summand", Self::parse_term, |token| match token {
+            Token::Plus => Some(Expr::Add),
+            Token::Hyphen => Some(Expr::Sub),
+            _ => None,
+        })
+    }
+
+    fn parse_term(&mut self) -> Option<Spanned<Expr>> {
+        self.parse_binops("factor", Self::parse_factor, |token| match token {
+            Token::Star => Some(Expr::Mul),
+            Token::Slash => Some(Expr::Div),
+            _ => None,
+        })
+    }
+
+    /// Parse expression of the form `<operand> (<op> <operand>)*`.
+    fn parse_binops(
+        &mut self,
+        operand_name: &str,
+        mut parse_operand: impl FnMut(&mut Self) -> Option<Spanned<Expr>>,
+        mut parse_op: impl FnMut(Token) -> Option<Binop>,
+    ) -> Option<Spanned<Expr>> {
+        let mut left = parse_operand(self)?;
+
+        self.skip_ws();
+        while let Some(token) = self.peek() {
+            if let Some(op) = parse_op(token.v) {
+                self.eat();
+                self.skip_ws();
+
+                if let Some(right) = parse_operand(self) {
+                    let span = Span::merge(left.span, right.span);
+                    let v = op(Box::new(left), Box::new(right));
+                    left = Spanned::new(v, span);
+                    self.skip_ws();
+                    continue;
+                }
+
+                error!(
+                    @self.feedback, Span::merge(left.span, token.span),
+                    "missing right {}", operand_name,
+                );
+            }
+            break;
+        }
+
+        Some(left)
+    }
+
+    fn parse_factor(&mut self) -> Option<Spanned<Expr>> {
+        if let Some(hyph) = self.check_eat(Token::Hyphen) {
+            self.skip_ws();
+            if let Some(factor) = self.parse_factor() {
+                let span = Span::merge(hyph.span, factor.span);
+                Some(Spanned::new(Expr::Neg(Box::new(factor)), span))
+            } else {
+                error!(@self.feedback, hyph.span, "dangling minus");
+                None
+            }
+        } else {
+            self.parse_value()
+        }
+    }
+
+    fn parse_value(&mut self) -> Option<Spanned<Expr>> {
+        let Spanned { v: token, span } = self.peek()?;
+        Some(match token {
+            // This could be a function call or an identifier.
+            Token::Ident(id) => {
+                let name = Spanned::new(Ident(id.to_string()), span);
+                self.eat();
+                self.skip_ws();
+                if self.check(Token::LeftParen) {
+                    self.parse_paren_call(name).map(Expr::Call)
+                } else {
+                    name.map(Expr::Ident)
+                }
+            }
+
+            Token::Str { string, terminated } => {
+                if !terminated {
+                    self.expected_at("quote", span.end);
+                }
+                self.with_span(Expr::Str(unescape_string(string)))
+            }
+
+            Token::Bool(b) => self.with_span(Expr::Bool(b)),
+            Token::Number(n) => self.with_span(Expr::Number(n)),
+            Token::Length(s) => self.with_span(Expr::Length(s)),
+            Token::Hex(s) => {
+                if let Ok(color) = RgbaColor::from_str(s) {
+                    self.with_span(Expr::Color(color))
+                } else {
+                    // Heal color by assuming black.
+                    error!(@self.feedback, span, "invalid color");
+                    let healed = RgbaColor::new_healed(0, 0, 0, 255);
+                    self.with_span(Expr::Color(healed))
+                }
+            }
+
+            // This could be a table or a parenthesized expression. We parse as
+            // a table in any case and coerce the table into a value if it is
+            // coercable (length 1 and no trailing comma).
+            Token::LeftParen => {
+                self.start_group(Group::Paren);
+                let (table, coercable) = self.parse_table_contents();
+                let span = self.end_group();
+
+                let expr = if coercable {
+                    table.into_values().next().expect("table is coercable").val.v
+                } else {
+                    Expr::Table(table)
+                };
+
+                Spanned::new(expr, span)
+            }
+
+            // This is a content expression.
+            Token::LeftBrace => {
+                self.start_group(Group::Brace);
+                self.tokens.push_mode(TokenMode::Body);
+
+                let tree = self.parse_body_contents();
+
+                self.tokens.pop_mode();
+                let span = self.end_group();
+                Spanned::new(Expr::Tree(tree), span)
+            }
+
+            // This is a bracketed function call.
+            Token::LeftBracket => {
+                let call = self.parse_bracket_call(false);
+                let tree = vec![call.map(SyntaxNode::Call)];
+                Spanned::new(Expr::Tree(tree), span)
+            }
+
+            _ => return None,
+        })
+    }
+
+    fn parse_ident(&mut self) -> Option<Spanned<Ident>> {
+        self.peek().and_then(|token| match token.v {
+            Token::Ident(id) => Some(self.with_span(Ident(id.to_string()))),
+            _ => None,
+        })
+    }
+}
+
+// Error handling.
+impl Parser<'_> {
+    fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool {
+        if self.check(token) {
+            self.eat();
+            true
+        } else {
+            self.expected_at(token.name(), pos);
+            false
+        }
+    }
+
+    fn expected(&mut self, thing: &str) {
+        if let Some(found) = self.eat() {
+            error!(
+                @self.feedback, found.span,
+                "expected {}, found {}", thing, found.v.name(),
+            );
+        } else {
+            error!(@self.feedback, Span::at(self.pos()), "expected {}", thing);
+        }
+    }
+
+    fn expected_at(&mut self, thing: &str, pos: Pos) {
+        error!(@self.feedback, Span::at(pos), "expected {}", thing);
+    }
+
+    fn expected_found_or_at(&mut self, thing: &str, pos: Pos) {
+        if self.eof() {
+            self.expected_at(thing, pos)
+        } else {
+            self.expected(thing);
+        }
+    }
+}
+
+// Parsing primitives.
+impl<'s> Parser<'s> {
+    fn start_group(&mut self, group: Group) {
+        let start = self.pos();
+        if let Some(start_token) = group.start() {
+            self.assert(start_token);
+        }
+        self.delimiters.push((start, group.end()));
+    }
+
+    fn end_group(&mut self) -> Span {
+        let peeked = self.peek();
+
+        let (start, end_token) = self.delimiters.pop().expect("group was not started");
+
+        if end_token != Token::Chain && peeked != None {
+            self.delimiters.push((start, end_token));
+            assert_eq!(peeked, None, "unfinished group");
+        }
+
+        match self.peeked.unwrap() {
+            Some(token) if token.v == end_token => {
+                self.peeked = None;
+                Span::new(start, token.span.end)
+            }
+            _ => {
+                let end = self.pos();
+                if end_token != Token::Chain {
+                    error!(
+                        @self.feedback, Span::at(end),
+                        "expected {}", end_token.name(),
+                    );
+                }
+                Span::new(start, end)
+            }
+        }
+    }
+
+    fn skip_ws(&mut self) {
+        while matches!(
+            self.peekv(),
+            Some(Token::Space(_)) |
+            Some(Token::LineComment(_)) |
+            Some(Token::BlockComment(_))
+        ) {
+            self.eat();
+        }
+    }
+
+    fn eatv(&mut self) -> Option<Token<'s>> {
+        self.eat().map(Spanned::value)
+    }
+
+    fn peekv(&mut self) -> Option<Token<'s>> {
+        self.peek().map(Spanned::value)
+    }
+
+    fn assert(&mut self, token: Token<'_>) {
+        assert!(self.check_eat(token).is_some());
+    }
+
+    fn check_eat(&mut self, token: Token<'_>) -> Option<Spanned<Token<'s>>> {
+        if self.check(token) { self.eat() } else { None }
+    }
+
+    /// Checks if the next token is of some kind
+    fn check(&mut self, token: Token<'_>) -> bool {
+        self.peekv() == Some(token)
+    }
+
+    fn with_span<T>(&mut self, v: T) -> Spanned<T> {
+        let span = self.eat().expect("expected token").span;
+        Spanned::new(v, span)
+    }
+
+    fn eof(&mut self) -> bool {
+        self.peek().is_none()
+    }
+
+    fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
+        let token = self.peek()?;
+        self.peeked = None;
+        Some(token)
+    }
+
+    fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
+        let tokens = &mut self.tokens;
+        let token = (*self.peeked.get_or_insert_with(|| tokens.next()))?;
+
+        // Check for unclosed groups.
+        if Group::is_delimiter(token.v) {
+            if self.delimiters.iter().rev().any(|&(_, end)| token.v == end) {
+                return None;
+            }
+        }
+
+        Some(token)
+    }
+
+    fn pos(&self) -> Pos {
+        self.peeked
+            .flatten()
+            .map(|s| s.span.start)
+            .unwrap_or_else(|| self.tokens.pos())
+    }
+}
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+enum Group {
+    Paren,
+    Bracket,
+    Brace,
+    Subheader,
+}
+
+impl Group {
+    fn is_delimiter(token: Token<'_>) -> bool {
+        matches!(
+            token,
+            Token::RightParen | Token::RightBracket | Token::RightBrace | Token::Chain
+        )
+    }
+
+    fn start(self) -> Option<Token<'static>> {
+        match self {
+            Self::Paren => Some(Token::LeftParen),
+            Self::Bracket => Some(Token::LeftBracket),
+            Self::Brace => Some(Token::LeftBrace),
+            Self::Subheader => None,
+        }
+    }
+
+    fn end(self) -> Token<'static> {
+        match self {
+            Self::Paren => Token::RightParen,
+            Self::Bracket => Token::RightBracket,
+            Self::Brace => Token::RightBrace,
+            Self::Subheader => Token::Chain,
+        }
+    }
+}
diff --git a/src/parse/tests.rs b/src/parse/tests.rs
new file mode 100644
index 00000000..70517bd8
--- /dev/null
+++ b/src/parse/tests.rs
@@ -0,0 +1,545 @@
+#![allow(non_snake_case)]
+
+use std::fmt::Debug;
+
+use super::parse;
+use crate::color::RgbaColor;
+use crate::compute::table::SpannedEntry;
+use crate::length::Length;
+use crate::syntax::*;
+
+// ------------------------------ Construct Syntax Nodes ------------------------------ //
+
+use Decoration::*;
+use SyntaxNode::{
+    Linebreak as L, Parbreak as P, Spacing as S, ToggleBolder as B, ToggleItalic as I,
+};
+
+fn T(text: &str) -> SyntaxNode {
+    SyntaxNode::Text(text.to_string())
+}
+
+macro_rules! H {
+    ($level:expr, $($tts:tt)*) => {
+        SyntaxNode::Heading(Heading {
+            level: Spanned::zero($level),
+            tree: Tree![@$($tts)*],
+        })
+    };
+}
+
+macro_rules! R {
+    ($($line:expr),* $(,)?) => {
+        SyntaxNode::Raw(vec![$($line.to_string()),*])
+    };
+}
+
+macro_rules! C {
+    ($lang:expr, $($line:expr),* $(,)?) => {{
+        let lines = vec![$($line.to_string()) ,*];
+        SyntaxNode::Code(Code {
+            lang: $lang,
+            block: lines.len() > 1,
+            lines,
+        })
+    }};
+}
+
+fn Lang<'a, T: Into<Spanned<&'a str>>>(lang: T) -> Option<Spanned<Ident>> {
+    Some(Into::<Spanned<&str>>::into(lang).map(|s| Ident(s.to_string())))
+}
+
+macro_rules! F {
+    ($($tts:tt)*) => { SyntaxNode::Call(Call!(@$($tts)*)) }
+}
+
+// ------------------------------- Construct Expressions ------------------------------ //
+
+use Expr::{Bool, Color, Length as Len, Number as Num};
+
+fn Id(ident: &str) -> Expr {
+    Expr::Ident(Ident(ident.to_string()))
+}
+fn Str(string: &str) -> Expr {
+    Expr::Str(string.to_string())
+}
+
+macro_rules! Table {
+    (@table=$table:expr,) => {};
+    (@table=$table:expr, $key:expr => $value:expr $(, $($tts:tt)*)?) => {{
+        let key = Into::<Spanned<&str>>::into($key);
+        let val = Into::<Spanned<Expr>>::into($value);
+        $table.insert(key.v, SpannedEntry::new(key.span, val));
+        Table![@table=$table, $($($tts)*)?];
+    }};
+    (@table=$table:expr, $value:expr $(, $($tts:tt)*)?) => {
+        let val = Into::<Spanned<Expr>>::into($value);
+        $table.push(SpannedEntry::val(val));
+        Table![@table=$table, $($($tts)*)?];
+    };
+    (@$($tts:tt)*) => {{
+        #[allow(unused_mut)]
+        let mut table = TableExpr::new();
+        Table![@table=table, $($tts)*];
+        table
+    }};
+    ($($tts:tt)*) => { Expr::Table(Table![@$($tts)*]) };
+}
+
+macro_rules! Tree {
+    (@$($node:expr),* $(,)?) => {
+        vec![$(Into::<Spanned<SyntaxNode>>::into($node)),*]
+    };
+    ($($tts:tt)*) => { Expr::Tree(Tree![@$($tts)*]) };
+}
+
+macro_rules! Call {
+    (@$name:expr $(; $($tts:tt)*)?) => {{
+        let name = Into::<Spanned<&str>>::into($name);
+        CallExpr {
+            name: name.map(|n| Ident(n.to_string())),
+            args: Table![@$($($tts)*)?],
+        }
+    }};
+    ($($tts:tt)*) => { Expr::Call(Call![@$($tts)*]) };
+}
+
+fn Neg<T: Into<Spanned<Expr>>>(e1: T) -> Expr {
+    Expr::Neg(Box::new(e1.into()))
+}
+fn Add<T: Into<Spanned<Expr>>>(e1: T, e2: T) -> Expr {
+    Expr::Add(Box::new(e1.into()), Box::new(e2.into()))
+}
+fn Sub<T: Into<Spanned<Expr>>>(e1: T, e2: T) -> Expr {
+    Expr::Sub(Box::new(e1.into()), Box::new(e2.into()))
+}
+fn Mul<T: Into<Spanned<Expr>>>(e1: T, e2: T) -> Expr {
+    Expr::Mul(Box::new(e1.into()), Box::new(e2.into()))
+}
+fn Div<T: Into<Spanned<Expr>>>(e1: T, e2: T) -> Expr {
+    Expr::Div(Box::new(e1.into()), Box::new(e2.into()))
+}
+
+// ------------------------------------ Test Macros ----------------------------------- //
+
+// Test syntax trees with or without spans.
+macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} }
+macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} }
+macro_rules! test {
+    (@spans=$spans:expr, $src:expr => $($tts:tt)*) => {
+        let exp = Tree![@$($tts)*];
+        let pass = parse($src);
+        check($src, exp, pass.output, $spans);
+    };
+}
+
+// Test expressions.
+macro_rules! v {
+    ($src:expr => $($tts:tt)*) => {
+        t!(concat!("[val: ", $src, "]") => F!("val"; $($tts)*));
+    }
+}
+
+// Test error messages.
+macro_rules! e {
+    ($src:expr => $($tts:tt)*) => {
+        let exp = vec![$($tts)*];
+        let pass = parse($src);
+        let found = pass.feedback.diagnostics.iter()
+            .map(|s| s.as_ref().map(|e| e.message.as_str()))
+            .collect::<Vec<_>>();
+        check($src, exp, found, true);
+    };
+}
+
+// Test decorations.
+macro_rules! d {
+    ($src:expr => $($tts:tt)*) => {
+        let exp = vec![$($tts)*];
+        let pass = parse($src);
+        check($src, exp, pass.feedback.decorations, true);
+    };
+}
+
+/// Assert that expected and found are equal, printing both and panicking
+/// and the source of their test case if they aren't.
+///
+/// When `cmp_spans` is false, spans are ignored.
+pub fn check<T>(src: &str, exp: T, found: T, cmp_spans: bool)
+where
+    T: Debug + PartialEq,
+{
+    Span::set_cmp(cmp_spans);
+    let equal = exp == found;
+    Span::set_cmp(true);
+
+    if !equal {
+        println!("source:   {:?}", src);
+        if cmp_spans {
+            println!("expected: {:#?}", exp);
+            println!("found:    {:#?}", found);
+        } else {
+            println!("expected: {:?}", exp);
+            println!("found:    {:?}", found);
+        }
+        panic!("test failed");
+    }
+}
+
+pub fn s<T>(sl: usize, sc: usize, el: usize, ec: usize, v: T) -> Spanned<T> {
+    Spanned::new(v, Span::new(Pos::new(sl, sc), Pos::new(el, ec)))
+}
+
+// Enables tests to optionally specify spans.
+impl<T> From<T> for Spanned<T> {
+    fn from(t: T) -> Self {
+        Spanned::zero(t)
+    }
+}
+
+// --------------------------------------- Tests -------------------------------------- //
+
+#[test]
+fn test_parse_groups() {
+    e!("[)" => s(0,1, 0,2, "expected function name, found closing paren"),
+               s(0,2, 0,2, "expected closing bracket"));
+
+    e!("[v:{]}" => s(0,4, 0,4, "expected closing brace"),
+                   s(0,5, 0,6, "unexpected closing brace"));
+}
+
+#[test]
+fn test_parse_simple_nodes() {
+    t!(""               => );
+    t!("hi"             => T("hi"));
+    t!("*hi"            => B, T("hi"));
+    t!("hi_"            => T("hi"), I);
+    t!("hi you"         => T("hi"), S, T("you"));
+    t!("special~name"   => T("special"), T("\u{00A0}"), T("name"));
+    t!("special\\~name" => T("special"), T("~"), T("name"));
+    t!("\\u{1f303}"     => T("🌃"));
+    t!("\n\n\nhello"    => P, T("hello"));
+    t!(r"a\ b"          => T("a"), L, S, T("b"));
+    t!("`py`"           => R!["py"]);
+    t!("`hi\nyou"       => R!["hi", "you"]);
+    e!("`hi\nyou"       => s(1,3, 1,3, "expected backtick"));
+    t!("`hi\\`du`"      => R!["hi`du"]);
+
+    ts!("```java out```" => s(0,0, 0,14, C![Lang(s(0,3, 0,7, "java")), "out"]));
+    t!("``` console.log(\n\"alert\"\n)" => C![None, "console.log(", "\"alert\"", ")"]);
+    t!("```typst \r\n Typst uses `\\`` to indicate code blocks" => C![
+        Lang("typst"), " Typst uses ``` to indicate code blocks"
+    ]);
+
+    e!("``` hi\nyou"      => s(1,3, 1,3,  "expected backticks"));
+    e!("```🌍 hi\nyou```" => s(0,3, 0,4,  "invalid identifier"));
+    e!("\\u{d421c809}"    => s(0,0, 0,12, "invalid unicode escape sequence"));
+    e!("\\u{abc"          => s(0,6, 0,6, "expected closing brace"));
+    t!("💜\n\n 🌍"       => T("💜"), P, T("🌍"));
+
+    ts!("hi"   => s(0,0, 0,2, T("hi")));
+    ts!("*Hi*" => s(0,0, 0,1, B), s(0,1, 0,3, T("Hi")), s(0,3, 0,4, B));
+    ts!("💜\n\n 🌍" => s(0,0, 0,1, T("💜")), s(0,1, 2,1, P), s(2,1, 2,2, T("🌍")));
+}
+
+#[test]
+fn test_parse_comments() {
+    // In body.
+    t!("hi// you\nw"          => T("hi"), S, T("w"));
+    t!("first//\n//\nsecond"  => T("first"), S, S, T("second"));
+    t!("first//\n \nsecond"   => T("first"), P, T("second"));
+    t!("first/*\n \n*/second" => T("first"), T("second"));
+    e!("🌎\n*/n" => s(1,0, 1,2, "unexpected end of block comment"));
+
+    // In header.
+    t!("[val:/*12pt*/]"          => F!("val"));
+    t!("[val \n /* \n */:]"      => F!("val"));
+    e!("[val \n /* \n */:]"      => );
+    e!("[val : 12, /* \n */ 14]" => );
+}
+
+#[test]
+fn test_parse_headings() {
+    t!("## Hello world!" => H![1, T("Hello"), S, T("world!")]);
+
+    // Handle various whitespace usages.
+    t!("####Simple"                         => H![3, T("Simple")]);
+    t!("  #    Whitespace!"                 => S, H![0, T("Whitespace!")]);
+    t!("  /* TODO: Improve */  ## Analysis" => S, S, H!(1, T("Analysis")));
+
+    // Complex heading contents.
+    t!("Some text [box][### Valuable facts]" => T("Some"), S, T("text"), S,
+        F!("box"; Tree![H!(2, T("Valuable"), S, T("facts"))])
+    );
+    t!("### Grandiose stuff [box][Get it \n\n straight]" => H![2,
+        T("Grandiose"), S, T("stuff"), S,
+        F!("box"; Tree![T("Get"), S, T("it"), P, T("straight")])
+    ]);
+    t!("###### Multiline \\ headings" => H![5, T("Multiline"), S, L, S, T("headings")]);
+
+    // Things that should not become headings.
+    t!("\\## Text"      => T("#"), T("#"), S, T("Text"));
+    t!(" ###### # Text" => S, H!(5, T("#"), S, T("Text")));
+    t!("I am #1"        => T("I"), S, T("am"), S, T("#"), T("1"));
+    t!("[box][\n] # hi" => F!("box"; Tree![S]), S, T("#"), S, T("hi"));
+
+    // Depth warnings.
+    e!("########" => s(0,0, 0,8, "section depth larger than 6 has no effect"));
+}
+
+#[test]
+fn test_parse_function_names() {
+    // No closing bracket.
+    t!("[" => F!(""));
+    e!("[" => s(0,1, 0,1, "expected function name"),
+                s(0,1, 0,1, "expected closing bracket"));
+
+    // No name.
+    e!("[]"   => s(0,1, 0,1, "expected function name"));
+    e!("[\"]" => s(0,1, 0,3, "expected function name, found string"),
+                    s(0,3, 0,3, "expected closing bracket"));
+
+    // A valid name.
+    t!("[hi]"  => F!("hi"));
+    t!("[  f]" => F!("f"));
+
+    // An invalid name.
+    e!("[12]"   => s(0,1, 0,3, "expected function name, found number"));
+    e!("[  🌎]" => s(0,3, 0,4, "expected function name, found invalid token"));
+}
+
+#[test]
+fn test_parse_chaining() {
+    // Things the parser has to make sense of
+    t!("[hi: (5.0, 2.1 >> you]" => F!("hi"; Table![Num(5.0), Num(2.1)], Tree![F!("you")]));
+    t!("[box >>][Hi]"           => F!("box"; Tree![T("Hi")]));
+    t!("[box >> pad: 1pt][Hi]"  => F!("box"; Tree![
+        F!("pad"; Len(Length::pt(1.0)), Tree!(T("Hi")))
+    ]));
+    t!("[bold: 400, >> emph >> sub: 1cm]" => F!("bold"; Num(400.0), Tree![
+        F!("emph"; Tree!(F!("sub"; Len(Length::cm(1.0)))))
+    ]));
+
+    // Errors for unclosed / empty predecessor groups
+    e!("[hi: (5.0, 2.1 >> you]" => s(0, 15, 0, 15, "expected closing paren"));
+    e!("[>> abc]" => s(0, 1, 0, 1, "expected function name"));
+}
+
+#[test]
+fn test_parse_colon_starting_func_args() {
+    // Just colon without args.
+    e!("[val:]" => );
+
+    // Wrong token.
+    t!("[val=]"     => F!("val"));
+    e!("[val=]"     => s(0,4, 0,4, "expected colon"));
+    e!("[val/🌎:$]" => s(0,4, 0,4, "expected colon"));
+
+    // String in invalid header without colon still parsed as string
+    // Note: No "expected quote" error because not even the string was
+    //       expected.
+    e!("[val/\"]" => s(0,4, 0,4, "expected colon"),
+                        s(0,7, 0,7, "expected closing bracket"));
+}
+
+#[test]
+fn test_parse_function_bodies() {
+    t!("[val: 1][*Hi*]" => F!("val"; Num(1.0), Tree![B, T("Hi"), B]));
+    e!(" [val][ */ ]"   => s(0,8, 0,10, "unexpected end of block comment"));
+
+    // Raw in body.
+    t!("[val][`Hi]`" => F!("val"; Tree![R!["Hi]"]]));
+    e!("[val][`Hi]`" => s(0,11, 0,11, "expected closing bracket"));
+
+    // Crazy.
+    t!("[v][[v][v][v]]" => F!("v"; Tree![F!("v"; Tree![T("v")]), F!("v")]));
+
+    // Spanned.
+    ts!(" [box][Oh my]" =>
+        s(0,0, 0,1, S),
+        s(0,1, 0,13, F!(s(0,2, 0,5, "box");
+            s(0,6, 0,13, Tree![
+                s(0,7, 0,9, T("Oh")), s(0,9, 0,10, S), s(0,10, 0,12, T("my"))
+            ])
+        ))
+    );
+}
+
+#[test]
+fn test_parse_values() {
+    // Simple.
+    v!("_"         => Id("_"));
+    v!("name"      => Id("name"));
+    v!("α"         => Id("α"));
+    v!("\"hi\""    => Str("hi"));
+    v!("true"      => Bool(true));
+    v!("false"     => Bool(false));
+    v!("1.0e-4"    => Num(1e-4));
+    v!("3.14"      => Num(3.14));
+    v!("50%"       => Num(0.5));
+    v!("4.5cm"     => Len(Length::cm(4.5)));
+    v!("12e1pt"    => Len(Length::pt(12e1)));
+    v!("#f7a20500" => Color(RgbaColor::new(0xf7, 0xa2, 0x05, 0x00)));
+    v!("\"a\n[]\\\"string\"" => Str("a\n[]\"string"));
+
+    // Content.
+    v!("{_hi_}"        => Tree![I, T("hi"), I]);
+    e!("[val: {_hi_}]" => );
+    v!("[hi]"          => Tree![F!("hi")]);
+    e!("[val: [hi]]"   => );
+
+    // Healed colors.
+    v!("#12345"            => Color(RgbaColor::new_healed(0, 0, 0, 0xff)));
+    e!("[val: #12345]"     => s(0,6, 0,12, "invalid color"));
+    e!("[val: #a5]"        => s(0,6, 0,9,  "invalid color"));
+    e!("[val: #14b2ah]"    => s(0,6, 0,13, "invalid color"));
+    e!("[val: #f075ff011]" => s(0,6, 0,16, "invalid color"));
+
+    // Unclosed string.
+    v!("\"hello"        => Str("hello]"));
+    e!("[val: \"hello]" => s(0,13, 0,13, "expected quote"),
+                            s(0,13, 0,13, "expected closing bracket"));
+
+    // Spanned.
+    ts!("[val: 1.4]" => s(0,0, 0,10, F!(s(0,1, 0,4, "val"); s(0,6, 0,9, Num(1.4)))));
+}
+
+#[test]
+fn test_parse_expressions() {
+    // Coerced table.
+    v!("(hi)" => Id("hi"));
+
+    // Operations.
+    v!("-1"          => Neg(Num(1.0)));
+    v!("-- 1"        => Neg(Neg(Num(1.0))));
+    v!("3.2in + 6pt" => Add(Len(Length::inches(3.2)), Len(Length::pt(6.0))));
+    v!("5 - 0.01"    => Sub(Num(5.0), Num(0.01)));
+    v!("(3mm * 2)"   => Mul(Len(Length::mm(3.0)), Num(2.0)));
+    v!("12e-3cm/1pt" => Div(Len(Length::cm(12e-3)), Len(Length::pt(1.0))));
+
+    // More complex.
+    v!("(3.2in + 6pt)*(5/2-1)" => Mul(
+        Add(Len(Length::inches(3.2)), Len(Length::pt(6.0))),
+        Sub(Div(Num(5.0), Num(2.0)), Num(1.0))
+    ));
+    v!("(6.3E+2+4* - 3.2pt)/2" => Div(
+        Add(Num(6.3e2), Mul(Num(4.0), Neg(Len(Length::pt(3.2))))),
+        Num(2.0)
+    ));
+
+    // Associativity of multiplication and division.
+    v!("3/4*5" => Mul(Div(Num(3.0), Num(4.0)), Num(5.0)));
+
+    // Spanned.
+    ts!("[val: 1 + 3]" => s(0,0, 0,12, F!(
+        s(0,1, 0,4, "val"); s(0,6, 0,11, Add(
+            s(0,6, 0,7, Num(1.0)),
+            s(0,10, 0,11, Num(3.0)),
+        ))
+    )));
+
+    // Span of parenthesized expression contains parens.
+    ts!("[val: (1)]" => s(0,0, 0,10, F!(s(0,1, 0,4, "val"); s(0,6, 0,9, Num(1.0)))));
+
+    // Invalid expressions.
+    v!("4pt--"        => Len(Length::pt(4.0)));
+    e!("[val: 4pt--]" => s(0,10, 0,11, "dangling minus"),
+                            s(0,6, 0,10, "missing right summand"));
+
+    v!("3mm+4pt*"        => Add(Len(Length::mm(3.0)), Len(Length::pt(4.0))));
+    e!("[val: 3mm+4pt*]" => s(0,10, 0,14, "missing right factor"));
+}
+
+#[test]
+fn test_parse_tables() {
+    // Okay.
+    v!("()"                 => Table![]);
+    v!("(false)"            => Bool(false));
+    v!("(true,)"            => Table![Bool(true)]);
+    v!("(key=val)"          => Table!["key" => Id("val")]);
+    v!("(1, 2)"             => Table![Num(1.0), Num(2.0)]);
+    v!("(1, key=\"value\")" => Table![Num(1.0), "key" => Str("value")]);
+
+    // Decorations.
+    d!("[val: key=hi]"    => s(0,6, 0,9, TableKey));
+    d!("[val: (key=hi)]"  => s(0,7, 0,10, TableKey));
+    d!("[val: f(key=hi)]" => s(0,8, 0,11, TableKey));
+
+    // Spanned with spacing around keyword arguments.
+    ts!("[val: \n hi \n = /* //\n */ \"s\n\"]" => s(0,0, 4,2, F!(
+        s(0,1, 0,4, "val"); s(1,1, 1,3, "hi") => s(3,4, 4,1, Str("s\n"))
+    )));
+    e!("[val: \n hi \n = /* //\n */ \"s\n\"]" => );
+}
+
+#[test]
+fn test_parse_tables_compute_func_calls() {
+    v!("empty()"                  => Call!("empty"));
+    v!("add ( 1 , 2 )"            => Call!("add"; Num(1.0), Num(2.0)));
+    v!("items(\"fire\", #f93a6d)" => Call!("items";
+        Str("fire"), Color(RgbaColor::new(0xf9, 0x3a, 0x6d, 0xff))
+    ));
+
+    // More complex.
+    v!("css(1pt, rgb(90, 102, 254), \"solid\")" => Call!(
+        "css";
+        Len(Length::pt(1.0)),
+        Call!("rgb"; Num(90.0), Num(102.0), Num(254.0)),
+        Str("solid"),
+    ));
+
+    // Unclosed.
+    v!("lang(中文]"       => Call!("lang"; Id("中文")));
+    e!("[val: lang(中文]" => s(0,13, 0,13, "expected closing paren"));
+
+    // Invalid name.
+    v!("👠(\"abc\", 13e-5)"        => Table!(Str("abc"), Num(13.0e-5)));
+    e!("[val: 👠(\"abc\", 13e-5)]" => s(0,6, 0,7, "expected value, found invalid token"));
+}
+
+#[test]
+fn test_parse_tables_nested() {
+    v!("(1, ( ab=(), d = (3, 14pt) )), false" =>
+        Table![
+            Num(1.0),
+            Table!(
+                "ab" => Table![],
+                "d"  => Table!(Num(3.0), Len(Length::pt(14.0))),
+            ),
+        ],
+        Bool(false),
+    );
+}
+
+#[test]
+fn test_parse_tables_errors() {
+    // Expected value.
+    e!("[val: (=)]"         => s(0,7, 0,8, "expected value, found equals sign"));
+    e!("[val: (,)]"         => s(0,7, 0,8, "expected value, found comma"));
+    v!("(\x07 abc,)"        => Table![Id("abc")]);
+    e!("[val: (\x07 abc,)]" => s(0,7, 0,8, "expected value, found invalid token"));
+    e!("[val: (key=,)]"     => s(0,11, 0,12, "expected value, found comma"));
+    e!("[val: hi,)]"        => s(0,9, 0,10, "expected value, found closing paren"));
+
+    // Expected comma.
+    v!("(true false)"        => Table![Bool(true), Bool(false)]);
+    e!("[val: (true false)]" => s(0,11, 0,11, "expected comma"));
+
+    // Expected closing paren.
+    e!("[val: (#000]" => s(0,11, 0,11, "expected closing paren"));
+    e!("[val: (key]"  => s(0,10, 0,10, "expected closing paren"));
+    e!("[val: (key=]" => s(0,11, 0,11, "expected value"),
+                            s(0,11, 0,11, "expected closing paren"));
+
+    // Bad key.
+    v!("true=you"        => Bool(true), Id("you"));
+    e!("[val: true=you]" =>
+        s(0,10, 0,10, "expected comma"),
+        s(0,10, 0,11, "expected value, found equals sign"));
+
+    // Unexpected equals sign.
+    v!("z=y=4"        => Num(4.0), "z" => Id("y"));
+    e!("[val: z=y=4]" =>
+        s(0,9, 0,9, "expected comma"),
+        s(0,9, 0,10, "expected value, found equals sign"));
+}
diff --git a/src/parse/tokenizer.rs b/src/parse/tokenizer.rs
new file mode 100644
index 00000000..a251d960
--- /dev/null
+++ b/src/parse/tokenizer.rs
@@ -0,0 +1,639 @@
+//! Tokenization.
+
+use std::iter::Peekable;
+use std::str::Chars;
+use unicode_xid::UnicodeXID;
+
+use crate::length::Length;
+use crate::syntax::{Pos, Span, Spanned, Token};
+
+use Token::*;
+use TokenMode::*;
+
+/// An iterator over the tokens of a string of source code.
+#[derive(Debug)]
+pub struct Tokens<'s> {
+    src: &'s str,
+    iter: Peekable<Chars<'s>>,
+    mode: TokenMode,
+    stack: Vec<TokenMode>,
+    pos: Pos,
+    index: usize,
+}
+
+/// Whether to tokenize in header mode which yields expression, comma and
+/// similar tokens or in body mode which yields text and star, underscore,
+/// backtick tokens.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub enum TokenMode {
+    Header,
+    Body,
+}
+
+impl<'s> Tokens<'s> {
+    /// Create a new token iterator with the given mode.
+    pub fn new(src: &'s str, mode: TokenMode) -> Self {
+        Self {
+            src,
+            iter: src.chars().peekable(),
+            mode,
+            stack: vec![],
+            pos: Pos::ZERO,
+            index: 0,
+        }
+    }
+
+    /// Change the token mode and push the old one on a stack.
+    pub fn push_mode(&mut self, mode: TokenMode) {
+        self.stack.push(self.mode);
+        self.mode = mode;
+    }
+
+    /// Pop the old token mode from the stack. This panics if there is no mode
+    /// on the stack.
+    pub fn pop_mode(&mut self) {
+        self.mode = self.stack.pop().expect("no pushed mode");
+    }
+
+    /// The index in the string at which the last token ends and next token will
+    /// start.
+    pub fn index(&self) -> usize {
+        self.index
+    }
+
+    /// The line-colunn position in the source at which the last token ends and
+    /// next token will start.
+    pub fn pos(&self) -> Pos {
+        self.pos
+    }
+}
+
+impl<'s> Iterator for Tokens<'s> {
+    type Item = Spanned<Token<'s>>;
+
+    /// Parse the next token in the source code.
+    fn next(&mut self) -> Option<Self::Item> {
+        let start = self.pos();
+        let first = self.eat()?;
+
+        let token = match first {
+            // Comments.
+            '/' if self.peek() == Some('/') => self.read_line_comment(),
+            '/' if self.peek() == Some('*') => self.read_block_comment(),
+            '*' if self.peek() == Some('/') => {
+                self.eat();
+                Invalid("*/")
+            }
+
+            // Whitespace.
+            c if c.is_whitespace() => self.read_whitespace(start),
+
+            // Functions and blocks.
+            '[' => LeftBracket,
+            ']' => RightBracket,
+            '{' => LeftBrace,
+            '}' => RightBrace,
+
+            // Syntactic elements in function headers.
+            '(' if self.mode == Header => LeftParen,
+            ')' if self.mode == Header => RightParen,
+            ':' if self.mode == Header => Colon,
+            ',' if self.mode == Header => Comma,
+            '=' if self.mode == Header => Equals,
+            '>' if self.mode == Header && self.peek() == Some('>') => self.read_chain(),
+
+            // Expression operators.
+            '+' if self.mode == Header => Plus,
+            '-' if self.mode == Header => Hyphen,
+            '/' if self.mode == Header => Slash,
+
+            // Star serves a double purpose as a style modifier
+            // and a expression operator in the header.
+            '*' => Star,
+
+            // A hex expression.
+            '#' if self.mode == Header => self.read_hex(),
+
+            // String values.
+            '"' if self.mode == Header => self.read_string(),
+
+            // Style toggles.
+            '_' if self.mode == Body => Underscore,
+            '`' if self.mode == Body => self.read_raw_or_code(),
+
+            // Sections.
+            '#' if self.mode == Body => Hashtag,
+
+            // Non-breaking spaces.
+            '~' if self.mode == Body => Text("\u{00A0}"),
+
+            // An escaped thing.
+            '\\' if self.mode == Body => self.read_escaped(),
+
+            // Expressions or just strings.
+            c => {
+                let body = self.mode == Body;
+
+                let start_offset = -(c.len_utf8() as isize);
+                let mut last_was_e = false;
+
+                let (text, _) = self.read_string_until(false, start_offset, 0, |n| {
+                    let val = match n {
+                        c if c.is_whitespace() => true,
+                        '[' | ']' | '{' | '}' | '/' | '*' => true,
+                        '\\' | '_' | '`' | '#' | '~' if body => true,
+                        ':' | '=' | ',' | '"' | '(' | ')' if !body => true,
+                        '+' | '-' if !body && !last_was_e => true,
+                        _ => false,
+                    };
+
+                    last_was_e = n == 'e' || n == 'E';
+                    val
+                });
+
+                if self.mode == Header {
+                    self.read_expr(text)
+                } else {
+                    Text(text)
+                }
+            }
+        };
+
+        let end = self.pos();
+        let span = Span { start, end };
+
+        Some(Spanned { v: token, span })
+    }
+}
+
+impl<'s> Tokens<'s> {
+    fn read_line_comment(&mut self) -> Token<'s> {
+        self.eat();
+        LineComment(self.read_string_until(false, 0, 0, is_newline_char).0)
+    }
+
+    fn read_block_comment(&mut self) -> Token<'s> {
+        enum Last {
+            Slash,
+            Star,
+            Other,
+        }
+
+        let mut depth = 0;
+        let mut last = Last::Other;
+
+        // Find the first `*/` that does not correspond to a nested `/*`.
+        // Remove the last two bytes to obtain the raw inner text without `*/`.
+        self.eat();
+        let (content, _) = self.read_string_until(true, 0, -2, |c| {
+            match c {
+                '/' => match last {
+                    Last::Star if depth == 0 => return true,
+                    Last::Star => depth -= 1,
+                    _ => last = Last::Slash,
+                },
+                '*' => match last {
+                    Last::Slash => depth += 1,
+                    _ => last = Last::Star,
+                },
+                _ => last = Last::Other,
+            }
+
+            false
+        });
+
+        BlockComment(content)
+    }
+
+    fn read_chain(&mut self) -> Token<'s> {
+        assert!(self.eat() == Some('>'));
+        Chain
+    }
+
+    fn read_whitespace(&mut self, start: Pos) -> Token<'s> {
+        self.read_string_until(false, 0, 0, |n| !n.is_whitespace());
+        let end = self.pos();
+
+        Space(end.line - start.line)
+    }
+
+    fn read_string(&mut self) -> Token<'s> {
+        let (string, terminated) = self.read_until_unescaped('"');
+        Str { string, terminated }
+    }
+
+    fn read_raw_or_code(&mut self) -> Token<'s> {
+        let (raw, terminated) = self.read_until_unescaped('`');
+        if raw.is_empty() && terminated && self.peek() == Some('`') {
+            // Third tick found; this is a code block.
+            self.eat();
+
+            // Reads the lang tag (until newline or whitespace).
+            let start = self.pos();
+            let (lang, _) = self.read_string_until(false, 0, 0, |c| {
+                c == '`' || c.is_whitespace() || is_newline_char(c)
+            });
+            let end = self.pos();
+
+            let lang = if !lang.is_empty() {
+                Some(Spanned::new(lang, Span::new(start, end)))
+            } else {
+                None
+            };
+
+            // Skip to start of raw contents.
+            while let Some(c) = self.peek() {
+                if is_newline_char(c) {
+                    self.eat();
+                    if c == '\r' && self.peek() == Some('\n') {
+                        self.eat();
+                    }
+
+                    break;
+                } else if c.is_whitespace() {
+                    self.eat();
+                } else {
+                    break;
+                }
+            }
+
+            let start = self.index();
+            let mut backticks = 0u32;
+
+            while backticks < 3 {
+                match self.eat() {
+                    Some('`') => backticks += 1,
+                    // Escaping of triple backticks.
+                    Some('\\') if backticks == 1 && self.peek() == Some('`') => {
+                        backticks = 0;
+                    }
+                    Some(_) => {}
+                    None => break,
+                }
+            }
+
+            let terminated = backticks == 3;
+            let end = self.index() - if terminated { 3 } else { 0 };
+
+            Code {
+                lang,
+                raw: &self.src[start .. end],
+                terminated,
+            }
+        } else {
+            Raw { raw, terminated }
+        }
+    }
+
+    fn read_until_unescaped(&mut self, end: char) -> (&'s str, bool) {
+        let mut escaped = false;
+        self.read_string_until(true, 0, -1, |c| {
+            match c {
+                c if c == end && !escaped => return true,
+                '\\' => escaped = !escaped,
+                _ => escaped = false,
+            }
+
+            false
+        })
+    }
+
+    fn read_escaped(&mut self) -> Token<'s> {
+        fn is_escapable(c: char) -> bool {
+            match c {
+                '[' | ']' | '\\' | '/' | '*' | '_' | '`' | '"' | '#' | '~' => true,
+                _ => false,
+            }
+        }
+
+        match self.peek() {
+            Some('u') => {
+                self.eat();
+                if self.peek() == Some('{') {
+                    self.eat();
+                    let (sequence, _) =
+                        self.read_string_until(false, 0, 0, |c| !c.is_ascii_hexdigit());
+
+                    let terminated = self.peek() == Some('}');
+                    if terminated {
+                        self.eat();
+                    }
+
+                    UnicodeEscape { sequence, terminated }
+                } else {
+                    Text("\\u")
+                }
+            }
+            Some(c) if is_escapable(c) => {
+                let index = self.index();
+                self.eat();
+                Text(&self.src[index .. index + c.len_utf8()])
+            }
+            Some(c) if c.is_whitespace() => Backslash,
+            Some(_) => Text("\\"),
+            None => Backslash,
+        }
+    }
+
+    fn read_hex(&mut self) -> Token<'s> {
+        // This will parse more than the permissable 0-9, a-f, A-F character
+        // ranges to provide nicer error messages later.
+        Hex(self.read_string_until(false, 0, 0, |n| !n.is_ascii_alphanumeric()).0)
+    }
+
+    fn read_expr(&mut self, text: &'s str) -> Token<'s> {
+        if let Ok(b) = text.parse::<bool>() {
+            Bool(b)
+        } else if let Ok(num) = text.parse::<f64>() {
+            Number(num)
+        } else if let Some(num) = parse_percentage(text) {
+            Number(num / 100.0)
+        } else if let Ok(length) = text.parse::<Length>() {
+            Length(length)
+        } else if is_identifier(text) {
+            Ident(text)
+        } else {
+            Invalid(text)
+        }
+    }
+
+    /// Will read the input stream until `f` evaluates to `true`. When
+    /// `eat_match` is true, the token for which `f` was true is consumed.
+    /// Returns the string from the index where this was called offset by
+    /// `offset_start` to the end offset by `offset_end`. The end is before or
+    /// after the match depending on `eat_match`.
+    fn read_string_until(
+        &mut self,
+        eat_match: bool,
+        offset_start: isize,
+        offset_end: isize,
+        mut f: impl FnMut(char) -> bool,
+    ) -> (&'s str, bool) {
+        let start = ((self.index() as isize) + offset_start) as usize;
+        let mut matched = false;
+
+        while let Some(c) = self.peek() {
+            if f(c) {
+                matched = true;
+                if eat_match {
+                    self.eat();
+                }
+                break;
+            }
+
+            self.eat();
+        }
+
+        let mut end = self.index();
+        if matched {
+            end = ((end as isize) + offset_end) as usize;
+        }
+
+        (&self.src[start .. end], matched)
+    }
+
+    fn eat(&mut self) -> Option<char> {
+        let c = self.iter.next()?;
+        self.index += c.len_utf8();
+
+        if is_newline_char(c) && !(c == '\r' && self.peek() == Some('\n')) {
+            self.pos.line += 1;
+            self.pos.column = 0;
+        } else {
+            self.pos.column += 1;
+        }
+
+        Some(c)
+    }
+
+    fn peek(&mut self) -> Option<char> {
+        self.iter.peek().copied()
+    }
+}
+
+fn parse_percentage(text: &str) -> Option<f64> {
+    if text.ends_with('%') {
+        text[.. text.len() - 1].parse::<f64>().ok()
+    } else {
+        None
+    }
+}
+
+/// Whether this character denotes a newline.
+pub fn is_newline_char(character: char) -> bool {
+    match character {
+        // Line Feed, Vertical Tab, Form Feed, Carriage Return.
+        '\x0A' ..= '\x0D' => true,
+        // Next Line, Line Separator, Paragraph Separator.
+        '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
+        _ => false,
+    }
+}
+
+/// Whether this word is a valid identifier.
+pub fn is_identifier(string: &str) -> bool {
+    fn is_extra_allowed(c: char) -> bool {
+        c == '.' || c == '-' || c == '_'
+    }
+
+    let mut chars = string.chars();
+    match chars.next() {
+        Some(c) if UnicodeXID::is_xid_start(c) || is_extra_allowed(c) => {}
+        _ => return false,
+    }
+
+    for c in chars {
+        match c {
+            c if UnicodeXID::is_xid_continue(c) || is_extra_allowed(c) => {}
+            _ => return false,
+        }
+    }
+
+    true
+}
+
+#[cfg(test)]
+#[allow(non_snake_case)]
+mod tests {
+    use super::*;
+    use crate::length::Length;
+    use crate::parse::tests::{check, s};
+
+    use Token::{
+        BlockComment as BC, Bool, Chain, Hex, Hyphen as Min, Ident as Id,
+        LeftBrace as LB, LeftBracket as L, LeftParen as LP, Length as Len,
+        LineComment as LC, Number as Num, Plus, RightBrace as RB, RightBracket as R,
+        RightParen as RP, Slash, Space as S, Star, Text as T,
+    };
+
+    fn Str(string: &str, terminated: bool) -> Token {
+        Token::Str { string, terminated }
+    }
+    fn Raw(raw: &str, terminated: bool) -> Token {
+        Token::Raw { raw, terminated }
+    }
+    fn Code<'a>(
+        lang: Option<Spanned<&'a str>>,
+        raw: &'a str,
+        terminated: bool,
+    ) -> Token<'a> {
+        Token::Code { lang, raw, terminated }
+    }
+    fn Lang<'a, T: Into<Spanned<&'a str>>>(lang: T) -> Option<Spanned<&'a str>> {
+        Some(Into::<Spanned<&str>>::into(lang))
+    }
+    fn UE(sequence: &str, terminated: bool) -> Token {
+        Token::UnicodeEscape { sequence, terminated }
+    }
+
+    macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} }
+    macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} }
+    macro_rules! test {
+        (@spans=$spans:expr, $mode:expr, $src:expr => $($token:expr),*) => {
+            let exp = vec![$(Into::<Spanned<Token>>::into($token)),*];
+            let found = Tokens::new($src, $mode).collect::<Vec<_>>();
+            check($src, exp, found, $spans);
+        }
+    }
+
+    #[test]
+    fn tokenize_whitespace() {
+        t!(Body, ""             => );
+        t!(Body, " "            => S(0));
+        t!(Body, "    "         => S(0));
+        t!(Body, "\t"           => S(0));
+        t!(Body, "  \t"         => S(0));
+        t!(Body, "\n"           => S(1));
+        t!(Body, "\n "          => S(1));
+        t!(Body, "  \n"         => S(1));
+        t!(Body, "  \n   "      => S(1));
+        t!(Body, "\r\n"         => S(1));
+        t!(Body, "  \n\t \n  "  => S(2));
+        t!(Body, "\n\r"         => S(2));
+        t!(Body, " \r\r\n \x0D" => S(3));
+        t!(Body, "a~b"          => T("a"), T("\u{00A0}"), T("b"));
+    }
+
+    #[test]
+    fn tokenize_comments() {
+        t!(Body, "a // bc\n "        => T("a"), S(0), LC(" bc"),  S(1));
+        t!(Body, "a //a//b\n "       => T("a"), S(0), LC("a//b"), S(1));
+        t!(Body, "a //a//b\r\n"      => T("a"), S(0), LC("a//b"), S(1));
+        t!(Body, "a //a//b\n\nhello" => T("a"), S(0), LC("a//b"), S(2), T("hello"));
+        t!(Body, "/**/"              => BC(""));
+        t!(Body, "_/*_/*a*/*/"       => Underscore, BC("_/*a*/"));
+        t!(Body, "/*/*/"             => BC("/*/"));
+        t!(Body, "abc*/"             => T("abc"), Invalid("*/"));
+        t!(Body, "/***/"             => BC("*"));
+        t!(Body, "/**\\****/*/*/"    => BC("*\\***"), Invalid("*/"), Invalid("*/"));
+        t!(Body, "/*abc"             => BC("abc"));
+    }
+
+    #[test]
+    fn tokenize_body_only_tokens() {
+        t!(Body, "_*"            => Underscore, Star);
+        t!(Body, "***"           => Star, Star, Star);
+        t!(Body, "[func]*bold*"  => L, T("func"), R, Star, T("bold"), Star);
+        t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there"));
+        t!(Body, "`raw`"         => Raw("raw", true));
+        t!(Body, "# hi"          => Hashtag, S(0), T("hi"));
+        t!(Body, "#()"           => Hashtag, T("()"));
+        t!(Body, "`[func]`"      => Raw("[func]", true));
+        t!(Body, "`]"            => Raw("]", false));
+        t!(Body, "\\ "           => Backslash, S(0));
+        t!(Body, "`\\``"         => Raw("\\`", true));
+        t!(Body, "``not code`"   => Raw("", true), T("not"), S(0), T("code"), Raw("", false));
+        t!(Body, "```rust hi```" => Code(Lang("rust"), "hi", true));
+        t!(Body, "``` hi`\\``"   => Code(None, "hi`\\``", false));
+        t!(Body, "```js   \r\n  document.write(\"go\")" => Code(Lang("js"), "  document.write(\"go\")", false));
+        t!(Header, "_`"          => Invalid("_`"));
+    }
+
+    #[test]
+    fn tokenize_header_only_tokens() {
+        t!(Body, "a: b"                => T("a:"), S(0), T("b"));
+        t!(Body, "c=d, "               => T("c=d,"), S(0));
+        t!(Header, "(){}:=,"           => LP, RP, LB, RB, Colon, Equals, Comma);
+        t!(Header, "a:b"               => Id("a"), Colon, Id("b"));
+        t!(Header, "#6ae6dd"           => Hex("6ae6dd"));
+        t!(Header, "#8A083c"           => Hex("8A083c"));
+        t!(Header, "a: true, x=1"      => Id("a"), Colon, S(0), Bool(true), Comma, S(0),
+                                          Id("x"), Equals, Num(1.0));
+        t!(Header, "=3.14"             => Equals, Num(3.14));
+        t!(Header, "12.3e5"            => Num(12.3e5));
+        t!(Header, "120%"              => Num(1.2));
+        t!(Header, "12e4%"             => Num(1200.0));
+        t!(Header, "__main__"          => Id("__main__"));
+        t!(Header, ">main"             => Invalid(">main"));
+        t!(Header, ".func.box"         => Id(".func.box"));
+        t!(Header, "arg, _b, _1"       => Id("arg"), Comma, S(0), Id("_b"), Comma, S(0), Id("_1"));
+        t!(Header, "f: arg >> g"       => Id("f"), Colon, S(0), Id("arg"), S(0), Chain, S(0), Id("g"));
+        t!(Header, "12_pt, 12pt"       => Invalid("12_pt"), Comma, S(0), Len(Length::pt(12.0)));
+        t!(Header, "1e5in"             => Len(Length::inches(100000.0)));
+        t!(Header, "2.3cm"             => Len(Length::cm(2.3)));
+        t!(Header, "12e-3in"           => Len(Length::inches(12e-3)));
+        t!(Header, "6.1cm + 4pt,a=1*2" => Len(Length::cm(6.1)), S(0), Plus, S(0), Len(Length::pt(4.0)),
+                                          Comma, Id("a"), Equals, Num(1.0), Star, Num(2.0));
+        t!(Header, "(5 - 1) / 2.1"     => LP, Num(5.0), S(0), Min, S(0), Num(1.0), RP,
+                                          S(0), Slash, S(0), Num(2.1));
+        t!(Header, "-1"                => Min, Num(1.0));
+        t!(Header, "--1"               => Min, Min, Num(1.0));
+        t!(Header, "- 1"               => Min, S(0), Num(1.0));
+        t!(Header, "02.4mm"            => Len(Length::mm(2.4)));
+        t!(Header, "2.4.cm"            => Invalid("2.4.cm"));
+        t!(Header, "(1,2)"             => LP, Num(1.0), Comma, Num(2.0), RP);
+        t!(Header, "{abc}"             => LB, Id("abc"), RB);
+        t!(Header, "🌓, 🌍,"          => Invalid("🌓"), Comma, S(0), Invalid("🌍"), Comma);
+    }
+
+    #[test]
+    fn tokenize_strings() {
+        t!(Body, "a \"hi\" string"           => T("a"), S(0), T("\"hi\""), S(0), T("string"));
+        t!(Header, "\"hello"                 => Str("hello", false));
+        t!(Header, "\"hello world\""         => Str("hello world", true));
+        t!(Header, "\"hello\nworld\""        => Str("hello\nworld", true));
+        t!(Header, r#"1"hello\nworld"false"# => Num(1.0), Str("hello\\nworld", true), Bool(false));
+        t!(Header, r#""a\"bc""#              => Str(r#"a\"bc"#, true));
+        t!(Header, r#""a\\"bc""#             => Str(r#"a\\"#, true), Id("bc"), Str("", false));
+        t!(Header, r#""a\tbc"#               => Str("a\\tbc", false));
+        t!(Header, "\"🌎\""                  => Str("🌎", true));
+    }
+
+    #[test]
+    fn tokenize_escaped_symbols() {
+        t!(Body, r"\\"       => T(r"\"));
+        t!(Body, r"\["       => T("["));
+        t!(Body, r"\]"       => T("]"));
+        t!(Body, r"\*"       => T("*"));
+        t!(Body, r"\_"       => T("_"));
+        t!(Body, r"\`"       => T("`"));
+        t!(Body, r"\/"       => T("/"));
+        t!(Body, r"\u{2603}" => UE("2603", true));
+        t!(Body, r"\u{26A4"  => UE("26A4", false));
+        t!(Body, r#"\""#     => T("\""));
+    }
+
+    #[test]
+    fn tokenize_unescapable_symbols() {
+        t!(Body, r"\a"     => T("\\"), T("a"));
+        t!(Body, r"\:"     => T(r"\"), T(":"));
+        t!(Body, r"\="     => T(r"\"), T("="));
+        t!(Body, r"\u{2GA4"=> UE("2", false), T("GA4"));
+        t!(Body, r"\u{ "   => UE("", false), Space(0));
+        t!(Body, r"\u"     => T(r"\u"));
+        t!(Header, r"\\\\" => Invalid(r"\\\\"));
+        t!(Header, r"\a"   => Invalid(r"\a"));
+        t!(Header, r"\:"   => Invalid(r"\"), Colon);
+        t!(Header, r"\="   => Invalid(r"\"), Equals);
+        t!(Header, r"\,"   => Invalid(r"\"), Comma);
+    }
+
+    #[test]
+    fn tokenize_with_spans() {
+        ts!(Body, "hello"          => s(0,0, 0,5, T("hello")));
+        ts!(Body, "ab\r\nc"        => s(0,0, 0,2, T("ab")), s(0,2, 1,0, S(1)), s(1,0, 1,1, T("c")));
+        ts!(Body, "// ab\r\n\nf"   => s(0,0, 0,5, LC(" ab")), s(0,5, 2,0, S(2)), s(2,0, 2,1, T("f")));
+        ts!(Body, "/*b*/_"         => s(0,0, 0,5, BC("b")), s(0,5, 0,6, Underscore));
+        ts!(Header, "a=10"         => s(0,0, 0,1, Id("a")), s(0,1, 0,2, Equals), s(0,2, 0,4, Num(10.0)));
+    }
+}
author	Laurenz <laurmaedje@gmail.com>	2020-09-30 12:38:02 +0200
committer	Laurenz <laurmaedje@gmail.com>	2020-09-30 12:45:33 +0200
commit	bc1b4216a802d09e8d00dd277a0e204d49bcaa7f (patch)
tree	31dabd48d5062fdd684797ed6053bf279ba67490 /src/parse
parent	fee5170a68a6ef97108d731a4873787894f65a06 (diff)