From 01405902ba18726ccae2f71da9dfef26fac9c357 Mon Sep 17 00:00:00 2001
From: Laurenz <laurmaedje@gmail.com>
Date: Wed, 30 Sep 2020 19:13:55 +0200
Subject: =?UTF-8?q?Restructure=20parser=20files=20=F0=9F=8D=94?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/parse/escaping.rs    | 215 ----------------
 src/parse/mod.rs         | 653 ++++++++++++++++++++++++++++++++++++++++++++++-
 src/parse/parser.rs      | 645 ----------------------------------------------
 src/parse/postprocess.rs | 217 ++++++++++++++++
 src/parse/tests.rs       |   2 +
 src/parse/tokenizer.rs   | 606 -------------------------------------------
 src/parse/tokens.rs      | 606 +++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 1473 insertions(+), 1471 deletions(-)
 delete mode 100644 src/parse/escaping.rs
 delete mode 100644 src/parse/parser.rs
 create mode 100644 src/parse/postprocess.rs
 delete mode 100644 src/parse/tokenizer.rs
 create mode 100644 src/parse/tokens.rs

(limited to 'src/parse')
diff --git a/src/parse/escaping.rs b/src/parse/escaping.rs
deleted file mode 100644
index 2e556d0c..00000000
--- a/src/parse/escaping.rs
+++ /dev/null
@@ -1,215 +0,0 @@
-use super::is_newline_char;
-use crate::syntax::{Ident, Raw};
-
-/// Resolves all escape sequences in a string.
-pub fn unescape_string(string: &str) -> String {
-    let mut iter = string.chars().peekable();
-    let mut out = String::with_capacity(string.len());
-
-    while let Some(c) = iter.next() {
-        if c != '\\' {
-            out.push(c);
-            continue;
-        }
-
-        match iter.next() {
-            Some('\\') => out.push('\\'),
-            Some('"') => out.push('"'),
-
-            Some('n') => out.push('\n'),
-            Some('t') => out.push('\t'),
-            Some('u') if iter.peek() == Some(&'{') => {
-                iter.next();
-
-                // TODO: Feedback if closing brace is missing.
-                let mut sequence = String::new();
-                let terminated = loop {
-                    match iter.peek() {
-                        Some('}') => {
-                            iter.next();
-                            break true;
-                        }
-                        Some(&c) if c.is_ascii_hexdigit() => {
-                            iter.next();
-                            sequence.push(c);
-                        }
-                        _ => break false,
-                    }
-                };
-
-                if let Some(c) = hex_to_char(&sequence) {
-                    out.push(c);
-                } else {
-                    // TODO: Feedback that escape sequence is wrong.
-                    out.push_str("\\u{");
-                    out.push_str(&sequence);
-                    if terminated {
-                        out.push('}');
-                    }
-                }
-            }
-
-            other => {
-                out.push('\\');
-                out.extend(other);
-            }
-        }
-    }
-
-    out
-}
-
-/// Resolves the language tag and trims the raw text.
-///
-/// Returns:
-/// - The language tag
-/// - The raw lines
-/// - Whether at least one newline was present in the untrimmed text.
-pub fn process_raw(raw: &str) -> Raw {
-    let (lang, inner) = split_after_lang_tag(raw);
-    let (lines, had_newline) = trim_and_split_raw(inner);
-    Raw { lang, lines, inline: !had_newline }
-}
-
-/// Parse the lang tag and return it alongside the remaining inner raw text.
-fn split_after_lang_tag(raw: &str) -> (Option<Ident>, &str) {
-    let mut lang = String::new();
-
-    let mut inner = raw;
-    let mut iter = raw.chars();
-
-    while let Some(c) = iter.next() {
-        if c == '`' || c.is_whitespace() || is_newline_char(c) {
-            break;
-        }
-
-        inner = iter.as_str();
-        lang.push(c);
-    }
-
-    (Ident::new(lang), inner)
-}
-
-/// Trims raw text and splits it into lines.
-///
-/// Returns whether at least one newline was contained in `raw`.
-fn trim_and_split_raw(raw: &str) -> (Vec<String>, bool) {
-    // Trims one whitespace at end and start.
-    let raw = raw.strip_prefix(' ').unwrap_or(raw);
-    let raw = raw.strip_suffix(' ').unwrap_or(raw);
-
-    let mut lines = split_lines(raw);
-    let had_newline = lines.len() > 1;
-    let is_whitespace = |line: &String| line.chars().all(char::is_whitespace);
-
-    // Trims a sequence of whitespace followed by a newline at the start.
-    if lines.first().map(is_whitespace).unwrap_or(false) {
-        lines.remove(0);
-    }
-
-    // Trims a newline followed by a sequence of whitespace at the end.
-    if lines.last().map(is_whitespace).unwrap_or(false) {
-        lines.pop();
-    }
-
-    (lines, had_newline)
-}
-
-/// Splits a string into a vector of lines (respecting Unicode & Windows line breaks).
-pub fn split_lines(text: &str) -> Vec<String> {
-    let mut iter = text.chars().peekable();
-    let mut line = String::new();
-    let mut lines = Vec::new();
-
-    while let Some(c) = iter.next() {
-        if is_newline_char(c) {
-            if c == '\r' && iter.peek() == Some(&'\n') {
-                iter.next();
-            }
-
-            lines.push(std::mem::take(&mut line));
-        } else {
-            line.push(c);
-        }
-    }
-
-    lines.push(line);
-    lines
-}
-
-/// Converts a hexademical sequence (without braces or "\u") into a character.
-pub fn hex_to_char(sequence: &str) -> Option<char> {
-    u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32)
-}
-
-#[cfg(test)]
-#[rustfmt::skip]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_unescape_strings() {
-        fn test(string: &str, expected: &str) {
-            assert_eq!(unescape_string(string), expected.to_string());
-        }
-
-        test(r#"hello world"#,  "hello world");
-        test(r#"hello\nworld"#, "hello\nworld");
-        test(r#"a\"bc"#,        "a\"bc");
-        test(r#"a\u{2603}bc"#,  "a☃bc");
-        test(r#"a\u{26c3bg"#,   "a𦰻g");
-        test(r#"av\u{6797"#,    "av林");
-        test(r#"a\\"#,          "a\\");
-        test(r#"a\\\nbc"#,      "a\\\nbc");
-        test(r#"a\tbc"#,        "a\tbc");
-        test(r"🌎",             "🌎");
-        test(r"🌎\",            r"🌎\");
-        test(r"\🌎",            r"\🌎");
-    }
-
-    #[test]
-    fn test_split_after_lang_tag() {
-        fn test(raw: &str, lang: Option<&str>, inner: &str) {
-            let (found_lang, found_inner) = split_after_lang_tag(raw);
-            assert_eq!(found_lang.as_ref().map(|id| id.as_str()), lang);
-            assert_eq!(found_inner, inner);
-        }
-
-        test("typst it!",   Some("typst"), " it!");
-        test("typst\n it!", Some("typst"), "\n it!");
-        test("typst\n it!", Some("typst"), "\n it!");
-        test("abc`",        Some("abc"),   "`");
-        test(" hi",         None,          " hi");
-        test("`",           None,          "`");
-    }
-
-    #[test]
-    fn test_trim_raw() {
-        fn test(raw: &str, expected: Vec<&str>) {
-            assert_eq!(trim_and_split_raw(raw).0, expected);
-        }
-
-        test(" hi",          vec!["hi"]);
-        test("  hi",         vec![" hi"]);
-        test("\nhi",         vec!["hi"]);
-        test("    \n hi",    vec![" hi"]);
-        test("hi ",          vec!["hi"]);
-        test("hi  ",         vec!["hi "]);
-        test("hi\n",         vec!["hi"]);
-        test("hi \n   ",     vec!["hi "]);
-        test("  \n hi \n  ", vec![" hi "]);
-    }
-
-    #[test]
-    fn test_split_lines() {
-        fn test(raw: &str, expected: Vec<&str>) {
-            assert_eq!(split_lines(raw), expected);
-        }
-
-        test("raw\ntext",  vec!["raw", "text"]);
-        test("a\r\nb",     vec!["a", "b"]);
-        test("a\n\nb",     vec!["a", "", "b"]);
-        test("a\r\x0Bb",   vec!["a", "", "b"]);
-        test("a\r\n\r\nb", vec!["a", "", "b"]);
-    }
-}
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 340e89ea..e7ab89f1 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -1,11 +1,654 @@
 //! Parsing and tokenization.
 
-mod escaping;
-mod parser;
-mod tokenizer;
+mod postprocess;
+mod tokens;
 
-pub use parser::*;
-pub use tokenizer::*;
+pub use tokens::*;
+
+use std::str::FromStr;
+
+use super::*;
+use crate::color::RgbaColor;
+use crate::compute::table::SpannedEntry;
+use crate::syntax::*;
+use crate::{Feedback, Pass};
+
+/// Parse a string of source code.
+pub fn parse(src: &str) -> Pass<SyntaxTree> {
+    Parser::new(src).parse()
+}
+
+struct Parser<'s> {
+    tokens: Tokens<'s>,
+    peeked: Option<Option<Spanned<Token<'s>>>>,
+    delimiters: Vec<(Pos, Token<'static>)>,
+    at_block_or_line_start: bool,
+    feedback: Feedback,
+}
+
+impl<'s> Parser<'s> {
+    fn new(src: &'s str) -> Self {
+        Self {
+            tokens: Tokens::new(src, TokenMode::Body),
+            peeked: None,
+            delimiters: vec![],
+            at_block_or_line_start: true,
+            feedback: Feedback::new(),
+        }
+    }
+
+    fn parse(mut self) -> Pass<SyntaxTree> {
+        let tree = self.parse_body_contents();
+        Pass::new(tree, self.feedback)
+    }
+}
+
+// Typesetting content.
+impl Parser<'_> {
+    fn parse_body_contents(&mut self) -> SyntaxTree {
+        let mut tree = SyntaxTree::new();
+
+        self.at_block_or_line_start = true;
+        while !self.eof() {
+            if let Some(node) = self.parse_node() {
+                tree.push(node);
+            }
+        }
+
+        tree
+    }
+
+    fn parse_node(&mut self) -> Option<Spanned<SyntaxNode>> {
+        let token = self.peek()?;
+        let end = Span::at(token.span.end);
+
+        // Set block or line start to false because most nodes have that effect, but
+        // remember the old value to actually check it for hashtags and because comments
+        // and spaces want to retain it.
+        let was_at_block_or_line_start = self.at_block_or_line_start;
+        self.at_block_or_line_start = false;
+
+        Some(match token.v {
+            // Starting from two newlines counts as a paragraph break, a single
+            // newline does not.
+            Token::Space(n) => {
+                if n == 0 {
+                    self.at_block_or_line_start = was_at_block_or_line_start;
+                } else if n >= 1 {
+                    self.at_block_or_line_start = true;
+                }
+
+                self.with_span(if n >= 2 {
+                    SyntaxNode::Parbreak
+                } else {
+                    SyntaxNode::Spacing
+                })
+            }
+
+            Token::LineComment(_) | Token::BlockComment(_) => {
+                self.at_block_or_line_start = was_at_block_or_line_start;
+                self.eat();
+                return None;
+            }
+
+            Token::LeftBracket => {
+                let call = self.parse_bracket_call(false);
+                self.at_block_or_line_start = false;
+                call.map(SyntaxNode::Call)
+            }
+
+            Token::Star => self.with_span(SyntaxNode::ToggleBolder),
+            Token::Underscore => self.with_span(SyntaxNode::ToggleItalic),
+            Token::Backslash => self.with_span(SyntaxNode::Linebreak),
+
+            Token::Hashtag if was_at_block_or_line_start => {
+                self.parse_heading().map(SyntaxNode::Heading)
+            }
+
+            Token::Raw { raw, backticks, terminated } => {
+                if !terminated {
+                    error!(@self.feedback, end, "expected backtick(s)");
+                }
+
+                let raw = if backticks > 1 {
+                    postprocess::process_raw(raw)
+                } else {
+                    Raw {
+                        lang: None,
+                        lines: postprocess::split_lines(raw),
+                        inline: true,
+                    }
+                };
+
+                self.with_span(SyntaxNode::Raw(raw))
+            }
+
+            Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())),
+            Token::Hashtag => self.with_span(SyntaxNode::Text("#".to_string())),
+
+            Token::UnicodeEscape { sequence, terminated } => {
+                if !terminated {
+                    error!(@self.feedback, end, "expected closing brace");
+                }
+
+                if let Some(c) = postprocess::hex_to_char(sequence) {
+                    self.with_span(SyntaxNode::Text(c.to_string()))
+                } else {
+                    error!(@self.feedback, token.span, "invalid unicode escape sequence");
+                    self.eat();
+                    return None;
+                }
+            }
+
+            unexpected => {
+                error!(@self.feedback, token.span, "unexpected {}", unexpected.name());
+                self.eat();
+                return None;
+            }
+        })
+    }
+
+    fn parse_heading(&mut self) -> Spanned<Heading> {
+        let start = self.pos();
+        self.assert(Token::Hashtag);
+
+        let mut level = 0;
+        while self.peekv() == Some(Token::Hashtag) {
+            level += 1;
+            self.eat();
+        }
+
+        let span = Span::new(start, self.pos());
+        let level = level.span_with(span);
+
+        if level.v > 5 {
+            warning!(
+                @self.feedback, level.span,
+                "section depth larger than 6 has no effect",
+            );
+        }
+
+        self.skip_ws();
+
+        let mut tree = SyntaxTree::new();
+        while !self.eof() && !matches!(self.peekv(), Some(Token::Space(n)) if n >= 1) {
+            if let Some(node) = self.parse_node() {
+                tree.push(node);
+            }
+        }
+
+        let span = Span::new(start, self.pos());
+        Heading { level, tree }.span_with(span)
+    }
+}
+
+// Function calls.
+impl Parser<'_> {
+    fn parse_bracket_call(&mut self, chained: bool) -> Spanned<CallExpr> {
+        let before_bracket = self.pos();
+        if !chained {
+            self.start_group(Group::Bracket);
+            self.tokens.push_mode(TokenMode::Header);
+        }
+
+        let before_name = self.pos();
+        self.start_group(Group::Subheader);
+        self.skip_ws();
+        let name = self.parse_ident().unwrap_or_else(|| {
+            self.expected_found_or_at("function name", before_name);
+            Ident(String::new()).span_with(Span::at(before_name))
+        });
+
+        self.skip_ws();
+
+        let mut args = match self.eatv() {
+            Some(Token::Colon) => self.parse_table_contents().0,
+            Some(_) => {
+                self.expected_at("colon", name.span.end);
+                while self.eat().is_some() {}
+                TableExpr::new()
+            }
+            None => TableExpr::new(),
+        };
+
+        self.end_group();
+        self.skip_ws();
+        let (has_chained_child, end) = if self.peek().is_some() {
+            let item = self.parse_bracket_call(true);
+            let span = item.span;
+            let t = vec![item.map(SyntaxNode::Call)];
+            args.push(SpannedEntry::val(Expr::Tree(t).span_with(span)));
+            (true, span.end)
+        } else {
+            self.tokens.pop_mode();
+            (false, self.end_group().end)
+        };
+
+        let start = if chained { before_name } else { before_bracket };
+        let mut span = Span::new(start, end);
+
+        if self.check(Token::LeftBracket) && !has_chained_child {
+            self.start_group(Group::Bracket);
+            self.tokens.push_mode(TokenMode::Body);
+
+            let body = self.parse_body_contents();
+
+            self.tokens.pop_mode();
+            let body_span = self.end_group();
+
+            let expr = Expr::Tree(body);
+            args.push(SpannedEntry::val(expr.span_with(body_span)));
+            span.expand(body_span);
+        }
+
+        CallExpr { name, args }.span_with(span)
+    }
+
+    fn parse_paren_call(&mut self, name: Spanned<Ident>) -> Spanned<CallExpr> {
+        self.start_group(Group::Paren);
+        let args = self.parse_table_contents().0;
+        let args_span = self.end_group();
+        let span = Span::merge(name.span, args_span);
+        CallExpr { name, args }.span_with(span)
+    }
+}
+
+// Tables.
+impl Parser<'_> {
+    fn parse_table_contents(&mut self) -> (TableExpr, bool) {
+        let mut table = TableExpr::new();
+        let mut comma_and_keyless = true;
+
+        while {
+            self.skip_ws();
+            !self.eof()
+        } {
+            let (key, val) = if let Some(ident) = self.parse_ident() {
+                self.skip_ws();
+
+                match self.peekv() {
+                    Some(Token::Equals) => {
+                        self.eat();
+                        self.skip_ws();
+                        if let Some(value) = self.parse_expr() {
+                            (Some(ident), value)
+                        } else {
+                            self.expected("value");
+                            continue;
+                        }
+                    }
+
+                    Some(Token::LeftParen) => {
+                        let call = self.parse_paren_call(ident);
+                        (None, call.map(Expr::Call))
+                    }
+
+                    _ => (None, ident.map(Expr::Ident)),
+                }
+            } else if let Some(value) = self.parse_expr() {
+                (None, value)
+            } else {
+                self.expected("value");
+                continue;
+            };
+
+            let behind = val.span.end;
+            if let Some(key) = key {
+                comma_and_keyless = false;
+                table.insert(key.v.0, SpannedEntry::new(key.span, val));
+                self.feedback
+                    .decorations
+                    .push(Decoration::TableKey.span_with(key.span));
+            } else {
+                table.push(SpannedEntry::val(val));
+            }
+
+            if {
+                self.skip_ws();
+                self.eof()
+            } {
+                break;
+            }
+
+            self.expect_at(Token::Comma, behind);
+            comma_and_keyless = false;
+        }
+
+        let coercable = comma_and_keyless && !table.is_empty();
+        (table, coercable)
+    }
+}
+
+type Binop = fn(Box<Spanned<Expr>>, Box<Spanned<Expr>>) -> Expr;
+
+// Expressions and values.
+impl Parser<'_> {
+    fn parse_expr(&mut self) -> Option<Spanned<Expr>> {
+        self.parse_binops("summand", Self::parse_term, |token| match token {
+            Token::Plus => Some(Expr::Add),
+            Token::Hyphen => Some(Expr::Sub),
+            _ => None,
+        })
+    }
+
+    fn parse_term(&mut self) -> Option<Spanned<Expr>> {
+        self.parse_binops("factor", Self::parse_factor, |token| match token {
+            Token::Star => Some(Expr::Mul),
+            Token::Slash => Some(Expr::Div),
+            _ => None,
+        })
+    }
+
+    /// Parse expression of the form `<operand> (<op> <operand>)*`.
+    fn parse_binops(
+        &mut self,
+        operand_name: &str,
+        mut parse_operand: impl FnMut(&mut Self) -> Option<Spanned<Expr>>,
+        mut parse_op: impl FnMut(Token) -> Option<Binop>,
+    ) -> Option<Spanned<Expr>> {
+        let mut left = parse_operand(self)?;
+
+        self.skip_ws();
+        while let Some(token) = self.peek() {
+            if let Some(op) = parse_op(token.v) {
+                self.eat();
+                self.skip_ws();
+
+                if let Some(right) = parse_operand(self) {
+                    let span = Span::merge(left.span, right.span);
+                    let v = op(Box::new(left), Box::new(right));
+                    left = v.span_with(span);
+                    self.skip_ws();
+                    continue;
+                }
+
+                error!(
+                    @self.feedback, Span::merge(left.span, token.span),
+                    "missing right {}", operand_name,
+                );
+            }
+            break;
+        }
+
+        Some(left)
+    }
+
+    fn parse_factor(&mut self) -> Option<Spanned<Expr>> {
+        if let Some(hyph) = self.check_eat(Token::Hyphen) {
+            self.skip_ws();
+            if let Some(factor) = self.parse_factor() {
+                let span = Span::merge(hyph.span, factor.span);
+                Some(Expr::Neg(Box::new(factor)).span_with(span))
+            } else {
+                error!(@self.feedback, hyph.span, "dangling minus");
+                None
+            }
+        } else {
+            self.parse_value()
+        }
+    }
+
+    fn parse_value(&mut self) -> Option<Spanned<Expr>> {
+        let Spanned { v: token, span } = self.peek()?;
+        Some(match token {
+            // This could be a function call or an identifier.
+            Token::Ident(id) => {
+                let name = Ident(id.to_string()).span_with(span);
+                self.eat();
+                self.skip_ws();
+                if self.check(Token::LeftParen) {
+                    self.parse_paren_call(name).map(Expr::Call)
+                } else {
+                    name.map(Expr::Ident)
+                }
+            }
+
+            Token::Str { string, terminated } => {
+                if !terminated {
+                    self.expected_at("quote", span.end);
+                }
+                self.with_span(Expr::Str(postprocess::unescape_string(string)))
+            }
+
+            Token::Bool(b) => self.with_span(Expr::Bool(b)),
+            Token::Number(n) => self.with_span(Expr::Number(n)),
+            Token::Length(s) => self.with_span(Expr::Length(s)),
+            Token::Hex(s) => {
+                if let Ok(color) = RgbaColor::from_str(s) {
+                    self.with_span(Expr::Color(color))
+                } else {
+                    // Heal color by assuming black.
+                    error!(@self.feedback, span, "invalid color");
+                    let healed = RgbaColor::new_healed(0, 0, 0, 255);
+                    self.with_span(Expr::Color(healed))
+                }
+            }
+
+            // This could be a table or a parenthesized expression. We parse as
+            // a table in any case and coerce the table into a value if it is
+            // coercable (length 1 and no trailing comma).
+            Token::LeftParen => {
+                self.start_group(Group::Paren);
+                let (table, coercable) = self.parse_table_contents();
+                let span = self.end_group();
+
+                let expr = if coercable {
+                    table.into_values().next().expect("table is coercable").val.v
+                } else {
+                    Expr::Table(table)
+                };
+
+                expr.span_with(span)
+            }
+
+            // This is a content expression.
+            Token::LeftBrace => {
+                self.start_group(Group::Brace);
+                self.tokens.push_mode(TokenMode::Body);
+
+                let tree = self.parse_body_contents();
+
+                self.tokens.pop_mode();
+                let span = self.end_group();
+                Expr::Tree(tree).span_with(span)
+            }
+
+            // This is a bracketed function call.
+            Token::LeftBracket => {
+                let call = self.parse_bracket_call(false);
+                let tree = vec![call.map(SyntaxNode::Call)];
+                Expr::Tree(tree).span_with(span)
+            }
+
+            _ => return None,
+        })
+    }
+
+    fn parse_ident(&mut self) -> Option<Spanned<Ident>> {
+        self.peek().and_then(|token| match token.v {
+            Token::Ident(id) => Some(self.with_span(Ident(id.to_string()))),
+            _ => None,
+        })
+    }
+}
+
+// Error handling.
+impl Parser<'_> {
+    fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool {
+        if self.check(token) {
+            self.eat();
+            true
+        } else {
+            self.expected_at(token.name(), pos);
+            false
+        }
+    }
+
+    fn expected(&mut self, thing: &str) {
+        if let Some(found) = self.eat() {
+            error!(
+                @self.feedback, found.span,
+                "expected {}, found {}", thing, found.v.name(),
+            );
+        } else {
+            error!(@self.feedback, Span::at(self.pos()), "expected {}", thing);
+        }
+    }
+
+    fn expected_at(&mut self, thing: &str, pos: Pos) {
+        error!(@self.feedback, Span::at(pos), "expected {}", thing);
+    }
+
+    fn expected_found_or_at(&mut self, thing: &str, pos: Pos) {
+        if self.eof() {
+            self.expected_at(thing, pos)
+        } else {
+            self.expected(thing);
+        }
+    }
+}
+
+// Parsing primitives.
+impl<'s> Parser<'s> {
+    fn start_group(&mut self, group: Group) {
+        let start = self.pos();
+        if let Some(start_token) = group.start() {
+            self.assert(start_token);
+        }
+        self.delimiters.push((start, group.end()));
+    }
+
+    fn end_group(&mut self) -> Span {
+        let peeked = self.peek();
+
+        let (start, end_token) = self.delimiters.pop().expect("group was not started");
+
+        if end_token != Token::Chain && peeked != None {
+            self.delimiters.push((start, end_token));
+            assert_eq!(peeked, None, "unfinished group");
+        }
+
+        match self.peeked.unwrap() {
+            Some(token) if token.v == end_token => {
+                self.peeked = None;
+                Span::new(start, token.span.end)
+            }
+            _ => {
+                let end = self.pos();
+                if end_token != Token::Chain {
+                    error!(
+                        @self.feedback, Span::at(end),
+                        "expected {}", end_token.name(),
+                    );
+                }
+                Span::new(start, end)
+            }
+        }
+    }
+
+    fn skip_ws(&mut self) {
+        while matches!(
+            self.peekv(),
+            Some(Token::Space(_)) |
+            Some(Token::LineComment(_)) |
+            Some(Token::BlockComment(_))
+        ) {
+            self.eat();
+        }
+    }
+
+    fn eatv(&mut self) -> Option<Token<'s>> {
+        self.eat().map(Spanned::value)
+    }
+
+    fn peekv(&mut self) -> Option<Token<'s>> {
+        self.peek().map(Spanned::value)
+    }
+
+    fn assert(&mut self, token: Token<'_>) {
+        assert!(self.check_eat(token).is_some());
+    }
+
+    fn check_eat(&mut self, token: Token<'_>) -> Option<Spanned<Token<'s>>> {
+        if self.check(token) { self.eat() } else { None }
+    }
+
+    /// Checks if the next token is of some kind
+    fn check(&mut self, token: Token<'_>) -> bool {
+        self.peekv() == Some(token)
+    }
+
+    fn with_span<T>(&mut self, v: T) -> Spanned<T> {
+        let span = self.eat().expect("expected token").span;
+        v.span_with(span)
+    }
+
+    fn eof(&mut self) -> bool {
+        self.peek().is_none()
+    }
+
+    fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
+        let token = self.peek()?;
+        self.peeked = None;
+        Some(token)
+    }
+
+    fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
+        let tokens = &mut self.tokens;
+        let token = (*self.peeked.get_or_insert_with(|| tokens.next()))?;
+
+        // Check for unclosed groups.
+        if Group::is_delimiter(token.v) {
+            if self.delimiters.iter().rev().any(|&(_, end)| token.v == end) {
+                return None;
+            }
+        }
+
+        Some(token)
+    }
+
+    fn pos(&self) -> Pos {
+        self.peeked
+            .flatten()
+            .map(|s| s.span.start)
+            .unwrap_or_else(|| self.tokens.pos())
+    }
+}
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+enum Group {
+    Paren,
+    Bracket,
+    Brace,
+    Subheader,
+}
+
+impl Group {
+    fn is_delimiter(token: Token<'_>) -> bool {
+        matches!(
+            token,
+            Token::RightParen | Token::RightBracket | Token::RightBrace | Token::Chain
+        )
+    }
+
+    fn start(self) -> Option<Token<'static>> {
+        match self {
+            Self::Paren => Some(Token::LeftParen),
+            Self::Bracket => Some(Token::LeftBracket),
+            Self::Brace => Some(Token::LeftBrace),
+            Self::Subheader => None,
+        }
+    }
+
+    fn end(self) -> Token<'static> {
+        match self {
+            Self::Paren => Token::RightParen,
+            Self::Bracket => Token::RightBracket,
+            Self::Brace => Token::RightBrace,
+            Self::Subheader => Token::Chain,
+        }
+    }
+}
 
 #[cfg(test)]
 mod tests;
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
deleted file mode 100644
index 3446af83..00000000
--- a/src/parse/parser.rs
+++ /dev/null
@@ -1,645 +0,0 @@
-use std::str::FromStr;
-
-use super::escaping::*;
-use super::*;
-use crate::color::RgbaColor;
-use crate::compute::table::SpannedEntry;
-use crate::syntax::*;
-use crate::{Feedback, Pass};
-
-/// Parse a string of source code.
-pub fn parse(src: &str) -> Pass<SyntaxTree> {
-    Parser::new(src).parse()
-}
-
-struct Parser<'s> {
-    tokens: Tokens<'s>,
-    peeked: Option<Option<Spanned<Token<'s>>>>,
-    delimiters: Vec<(Pos, Token<'static>)>,
-    at_block_or_line_start: bool,
-    feedback: Feedback,
-}
-
-impl<'s> Parser<'s> {
-    fn new(src: &'s str) -> Self {
-        Self {
-            tokens: Tokens::new(src, TokenMode::Body),
-            peeked: None,
-            delimiters: vec![],
-            at_block_or_line_start: true,
-            feedback: Feedback::new(),
-        }
-    }
-
-    fn parse(mut self) -> Pass<SyntaxTree> {
-        let tree = self.parse_body_contents();
-        Pass::new(tree, self.feedback)
-    }
-}
-
-// Typesetting content.
-impl Parser<'_> {
-    fn parse_body_contents(&mut self) -> SyntaxTree {
-        let mut tree = SyntaxTree::new();
-
-        self.at_block_or_line_start = true;
-        while !self.eof() {
-            if let Some(node) = self.parse_node() {
-                tree.push(node);
-            }
-        }
-
-        tree
-    }
-
-    fn parse_node(&mut self) -> Option<Spanned<SyntaxNode>> {
-        let token = self.peek()?;
-        let end = Span::at(token.span.end);
-
-        // Set block or line start to false because most nodes have that effect, but
-        // remember the old value to actually check it for hashtags and because comments
-        // and spaces want to retain it.
-        let was_at_block_or_line_start = self.at_block_or_line_start;
-        self.at_block_or_line_start = false;
-
-        Some(match token.v {
-            // Starting from two newlines counts as a paragraph break, a single
-            // newline does not.
-            Token::Space(n) => {
-                if n == 0 {
-                    self.at_block_or_line_start = was_at_block_or_line_start;
-                } else if n >= 1 {
-                    self.at_block_or_line_start = true;
-                }
-
-                self.with_span(if n >= 2 {
-                    SyntaxNode::Parbreak
-                } else {
-                    SyntaxNode::Spacing
-                })
-            }
-
-            Token::LineComment(_) | Token::BlockComment(_) => {
-                self.at_block_or_line_start = was_at_block_or_line_start;
-                self.eat();
-                return None;
-            }
-
-            Token::LeftBracket => {
-                let call = self.parse_bracket_call(false);
-                self.at_block_or_line_start = false;
-                call.map(SyntaxNode::Call)
-            }
-
-            Token::Star => self.with_span(SyntaxNode::ToggleBolder),
-            Token::Underscore => self.with_span(SyntaxNode::ToggleItalic),
-            Token::Backslash => self.with_span(SyntaxNode::Linebreak),
-
-            Token::Hashtag if was_at_block_or_line_start => {
-                self.parse_heading().map(SyntaxNode::Heading)
-            }
-
-            Token::Raw { raw, backticks, terminated } => {
-                if !terminated {
-                    error!(@self.feedback, end, "expected backtick(s)");
-                }
-
-                let raw = if backticks > 1 {
-                    process_raw(raw)
-                } else {
-                    Raw {
-                        lang: None,
-                        lines: split_lines(raw),
-                        inline: true,
-                    }
-                };
-
-                self.with_span(SyntaxNode::Raw(raw))
-            }
-
-            Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())),
-            Token::Hashtag => self.with_span(SyntaxNode::Text("#".to_string())),
-
-            Token::UnicodeEscape { sequence, terminated } => {
-                if !terminated {
-                    error!(@self.feedback, end, "expected closing brace");
-                }
-
-                if let Some(c) = hex_to_char(sequence) {
-                    self.with_span(SyntaxNode::Text(c.to_string()))
-                } else {
-                    error!(@self.feedback, token.span, "invalid unicode escape sequence");
-                    self.eat();
-                    return None;
-                }
-            }
-
-            unexpected => {
-                error!(@self.feedback, token.span, "unexpected {}", unexpected.name());
-                self.eat();
-                return None;
-            }
-        })
-    }
-
-    fn parse_heading(&mut self) -> Spanned<Heading> {
-        let start = self.pos();
-        self.assert(Token::Hashtag);
-
-        let mut level = 0;
-        while self.peekv() == Some(Token::Hashtag) {
-            level += 1;
-            self.eat();
-        }
-
-        let span = Span::new(start, self.pos());
-        let level = level.span_with(span);
-
-        if level.v > 5 {
-            warning!(
-                @self.feedback, level.span,
-                "section depth larger than 6 has no effect",
-            );
-        }
-
-        self.skip_ws();
-
-        let mut tree = SyntaxTree::new();
-        while !self.eof() && !matches!(self.peekv(), Some(Token::Space(n)) if n >= 1) {
-            if let Some(node) = self.parse_node() {
-                tree.push(node);
-            }
-        }
-
-        let span = Span::new(start, self.pos());
-        Heading { level, tree }.span_with(span)
-    }
-}
-
-// Function calls.
-impl Parser<'_> {
-    fn parse_bracket_call(&mut self, chained: bool) -> Spanned<CallExpr> {
-        let before_bracket = self.pos();
-        if !chained {
-            self.start_group(Group::Bracket);
-            self.tokens.push_mode(TokenMode::Header);
-        }
-
-        let before_name = self.pos();
-        self.start_group(Group::Subheader);
-        self.skip_ws();
-        let name = self.parse_ident().unwrap_or_else(|| {
-            self.expected_found_or_at("function name", before_name);
-            Ident(String::new()).span_with(Span::at(before_name))
-        });
-
-        self.skip_ws();
-
-        let mut args = match self.eatv() {
-            Some(Token::Colon) => self.parse_table_contents().0,
-            Some(_) => {
-                self.expected_at("colon", name.span.end);
-                while self.eat().is_some() {}
-                TableExpr::new()
-            }
-            None => TableExpr::new(),
-        };
-
-        self.end_group();
-        self.skip_ws();
-        let (has_chained_child, end) = if self.peek().is_some() {
-            let item = self.parse_bracket_call(true);
-            let span = item.span;
-            let t = vec![item.map(SyntaxNode::Call)];
-            args.push(SpannedEntry::val(Expr::Tree(t).span_with(span)));
-            (true, span.end)
-        } else {
-            self.tokens.pop_mode();
-            (false, self.end_group().end)
-        };
-
-        let start = if chained { before_name } else { before_bracket };
-        let mut span = Span::new(start, end);
-
-        if self.check(Token::LeftBracket) && !has_chained_child {
-            self.start_group(Group::Bracket);
-            self.tokens.push_mode(TokenMode::Body);
-
-            let body = self.parse_body_contents();
-
-            self.tokens.pop_mode();
-            let body_span = self.end_group();
-
-            let expr = Expr::Tree(body);
-            args.push(SpannedEntry::val(expr.span_with(body_span)));
-            span.expand(body_span);
-        }
-
-        CallExpr { name, args }.span_with(span)
-    }
-
-    fn parse_paren_call(&mut self, name: Spanned<Ident>) -> Spanned<CallExpr> {
-        self.start_group(Group::Paren);
-        let args = self.parse_table_contents().0;
-        let args_span = self.end_group();
-        let span = Span::merge(name.span, args_span);
-        CallExpr { name, args }.span_with(span)
-    }
-}
-
-// Tables.
-impl Parser<'_> {
-    fn parse_table_contents(&mut self) -> (TableExpr, bool) {
-        let mut table = TableExpr::new();
-        let mut comma_and_keyless = true;
-
-        while {
-            self.skip_ws();
-            !self.eof()
-        } {
-            let (key, val) = if let Some(ident) = self.parse_ident() {
-                self.skip_ws();
-
-                match self.peekv() {
-                    Some(Token::Equals) => {
-                        self.eat();
-                        self.skip_ws();
-                        if let Some(value) = self.parse_expr() {
-                            (Some(ident), value)
-                        } else {
-                            self.expected("value");
-                            continue;
-                        }
-                    }
-
-                    Some(Token::LeftParen) => {
-                        let call = self.parse_paren_call(ident);
-                        (None, call.map(Expr::Call))
-                    }
-
-                    _ => (None, ident.map(Expr::Ident)),
-                }
-            } else if let Some(value) = self.parse_expr() {
-                (None, value)
-            } else {
-                self.expected("value");
-                continue;
-            };
-
-            let behind = val.span.end;
-            if let Some(key) = key {
-                comma_and_keyless = false;
-                table.insert(key.v.0, SpannedEntry::new(key.span, val));
-                self.feedback
-                    .decorations
-                    .push(Decoration::TableKey.span_with(key.span));
-            } else {
-                table.push(SpannedEntry::val(val));
-            }
-
-            if {
-                self.skip_ws();
-                self.eof()
-            } {
-                break;
-            }
-
-            self.expect_at(Token::Comma, behind);
-            comma_and_keyless = false;
-        }
-
-        let coercable = comma_and_keyless && !table.is_empty();
-        (table, coercable)
-    }
-}
-
-type Binop = fn(Box<Spanned<Expr>>, Box<Spanned<Expr>>) -> Expr;
-
-// Expressions and values.
-impl Parser<'_> {
-    fn parse_expr(&mut self) -> Option<Spanned<Expr>> {
-        self.parse_binops("summand", Self::parse_term, |token| match token {
-            Token::Plus => Some(Expr::Add),
-            Token::Hyphen => Some(Expr::Sub),
-            _ => None,
-        })
-    }
-
-    fn parse_term(&mut self) -> Option<Spanned<Expr>> {
-        self.parse_binops("factor", Self::parse_factor, |token| match token {
-            Token::Star => Some(Expr::Mul),
-            Token::Slash => Some(Expr::Div),
-            _ => None,
-        })
-    }
-
-    /// Parse expression of the form `<operand> (<op> <operand>)*`.
-    fn parse_binops(
-        &mut self,
-        operand_name: &str,
-        mut parse_operand: impl FnMut(&mut Self) -> Option<Spanned<Expr>>,
-        mut parse_op: impl FnMut(Token) -> Option<Binop>,
-    ) -> Option<Spanned<Expr>> {
-        let mut left = parse_operand(self)?;
-
-        self.skip_ws();
-        while let Some(token) = self.peek() {
-            if let Some(op) = parse_op(token.v) {
-                self.eat();
-                self.skip_ws();
-
-                if let Some(right) = parse_operand(self) {
-                    let span = Span::merge(left.span, right.span);
-                    let v = op(Box::new(left), Box::new(right));
-                    left = v.span_with(span);
-                    self.skip_ws();
-                    continue;
-                }
-
-                error!(
-                    @self.feedback, Span::merge(left.span, token.span),
-                    "missing right {}", operand_name,
-                );
-            }
-            break;
-        }
-
-        Some(left)
-    }
-
-    fn parse_factor(&mut self) -> Option<Spanned<Expr>> {
-        if let Some(hyph) = self.check_eat(Token::Hyphen) {
-            self.skip_ws();
-            if let Some(factor) = self.parse_factor() {
-                let span = Span::merge(hyph.span, factor.span);
-                Some(Expr::Neg(Box::new(factor)).span_with(span))
-            } else {
-                error!(@self.feedback, hyph.span, "dangling minus");
-                None
-            }
-        } else {
-            self.parse_value()
-        }
-    }
-
-    fn parse_value(&mut self) -> Option<Spanned<Expr>> {
-        let Spanned { v: token, span } = self.peek()?;
-        Some(match token {
-            // This could be a function call or an identifier.
-            Token::Ident(id) => {
-                let name = Ident(id.to_string()).span_with(span);
-                self.eat();
-                self.skip_ws();
-                if self.check(Token::LeftParen) {
-                    self.parse_paren_call(name).map(Expr::Call)
-                } else {
-                    name.map(Expr::Ident)
-                }
-            }
-
-            Token::Str { string, terminated } => {
-                if !terminated {
-                    self.expected_at("quote", span.end);
-                }
-                self.with_span(Expr::Str(unescape_string(string)))
-            }
-
-            Token::Bool(b) => self.with_span(Expr::Bool(b)),
-            Token::Number(n) => self.with_span(Expr::Number(n)),
-            Token::Length(s) => self.with_span(Expr::Length(s)),
-            Token::Hex(s) => {
-                if let Ok(color) = RgbaColor::from_str(s) {
-                    self.with_span(Expr::Color(color))
-                } else {
-                    // Heal color by assuming black.
-                    error!(@self.feedback, span, "invalid color");
-                    let healed = RgbaColor::new_healed(0, 0, 0, 255);
-                    self.with_span(Expr::Color(healed))
-                }
-            }
-
-            // This could be a table or a parenthesized expression. We parse as
-            // a table in any case and coerce the table into a value if it is
-            // coercable (length 1 and no trailing comma).
-            Token::LeftParen => {
-                self.start_group(Group::Paren);
-                let (table, coercable) = self.parse_table_contents();
-                let span = self.end_group();
-
-                let expr = if coercable {
-                    table.into_values().next().expect("table is coercable").val.v
-                } else {
-                    Expr::Table(table)
-                };
-
-                expr.span_with(span)
-            }
-
-            // This is a content expression.
-            Token::LeftBrace => {
-                self.start_group(Group::Brace);
-                self.tokens.push_mode(TokenMode::Body);
-
-                let tree = self.parse_body_contents();
-
-                self.tokens.pop_mode();
-                let span = self.end_group();
-                Expr::Tree(tree).span_with(span)
-            }
-
-            // This is a bracketed function call.
-            Token::LeftBracket => {
-                let call = self.parse_bracket_call(false);
-                let tree = vec![call.map(SyntaxNode::Call)];
-                Expr::Tree(tree).span_with(span)
-            }
-
-            _ => return None,
-        })
-    }
-
-    fn parse_ident(&mut self) -> Option<Spanned<Ident>> {
-        self.peek().and_then(|token| match token.v {
-            Token::Ident(id) => Some(self.with_span(Ident(id.to_string()))),
-            _ => None,
-        })
-    }
-}
-
-// Error handling.
-impl Parser<'_> {
-    fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool {
-        if self.check(token) {
-            self.eat();
-            true
-        } else {
-            self.expected_at(token.name(), pos);
-            false
-        }
-    }
-
-    fn expected(&mut self, thing: &str) {
-        if let Some(found) = self.eat() {
-            error!(
-                @self.feedback, found.span,
-                "expected {}, found {}", thing, found.v.name(),
-            );
-        } else {
-            error!(@self.feedback, Span::at(self.pos()), "expected {}", thing);
-        }
-    }
-
-    fn expected_at(&mut self, thing: &str, pos: Pos) {
-        error!(@self.feedback, Span::at(pos), "expected {}", thing);
-    }
-
-    fn expected_found_or_at(&mut self, thing: &str, pos: Pos) {
-        if self.eof() {
-            self.expected_at(thing, pos)
-        } else {
-            self.expected(thing);
-        }
-    }
-}
-
-// Parsing primitives.
-impl<'s> Parser<'s> {
-    fn start_group(&mut self, group: Group) {
-        let start = self.pos();
-        if let Some(start_token) = group.start() {
-            self.assert(start_token);
-        }
-        self.delimiters.push((start, group.end()));
-    }
-
-    fn end_group(&mut self) -> Span {
-        let peeked = self.peek();
-
-        let (start, end_token) = self.delimiters.pop().expect("group was not started");
-
-        if end_token != Token::Chain && peeked != None {
-            self.delimiters.push((start, end_token));
-            assert_eq!(peeked, None, "unfinished group");
-        }
-
-        match self.peeked.unwrap() {
-            Some(token) if token.v == end_token => {
-                self.peeked = None;
-                Span::new(start, token.span.end)
-            }
-            _ => {
-                let end = self.pos();
-                if end_token != Token::Chain {
-                    error!(
-                        @self.feedback, Span::at(end),
-                        "expected {}", end_token.name(),
-                    );
-                }
-                Span::new(start, end)
-            }
-        }
-    }
-
-    fn skip_ws(&mut self) {
-        while matches!(
-            self.peekv(),
-            Some(Token::Space(_)) |
-            Some(Token::LineComment(_)) |
-            Some(Token::BlockComment(_))
-        ) {
-            self.eat();
-        }
-    }
-
-    fn eatv(&mut self) -> Option<Token<'s>> {
-        self.eat().map(Spanned::value)
-    }
-
-    fn peekv(&mut self) -> Option<Token<'s>> {
-        self.peek().map(Spanned::value)
-    }
-
-    fn assert(&mut self, token: Token<'_>) {
-        assert!(self.check_eat(token).is_some());
-    }
-
-    fn check_eat(&mut self, token: Token<'_>) -> Option<Spanned<Token<'s>>> {
-        if self.check(token) { self.eat() } else { None }
-    }
-
-    /// Checks if the next token is of some kind
-    fn check(&mut self, token: Token<'_>) -> bool {
-        self.peekv() == Some(token)
-    }
-
-    fn with_span<T>(&mut self, v: T) -> Spanned<T> {
-        let span = self.eat().expect("expected token").span;
-        v.span_with(span)
-    }
-
-    fn eof(&mut self) -> bool {
-        self.peek().is_none()
-    }
-
-    fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
-        let token = self.peek()?;
-        self.peeked = None;
-        Some(token)
-    }
-
-    fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
-        let tokens = &mut self.tokens;
-        let token = (*self.peeked.get_or_insert_with(|| tokens.next()))?;
-
-        // Check for unclosed groups.
-        if Group::is_delimiter(token.v) {
-            if self.delimiters.iter().rev().any(|&(_, end)| token.v == end) {
-                return None;
-            }
-        }
-
-        Some(token)
-    }
-
-    fn pos(&self) -> Pos {
-        self.peeked
-            .flatten()
-            .map(|s| s.span.start)
-            .unwrap_or_else(|| self.tokens.pos())
-    }
-}
-
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-enum Group {
-    Paren,
-    Bracket,
-    Brace,
-    Subheader,
-}
-
-impl Group {
-    fn is_delimiter(token: Token<'_>) -> bool {
-        matches!(
-            token,
-            Token::RightParen | Token::RightBracket | Token::RightBrace | Token::Chain
-        )
-    }
-
-    fn start(self) -> Option<Token<'static>> {
-        match self {
-            Self::Paren => Some(Token::LeftParen),
-            Self::Bracket => Some(Token::LeftBracket),
-            Self::Brace => Some(Token::LeftBrace),
-            Self::Subheader => None,
-        }
-    }
-
-    fn end(self) -> Token<'static> {
-        match self {
-            Self::Paren => Token::RightParen,
-            Self::Bracket => Token::RightBracket,
-            Self::Brace => Token::RightBrace,
-            Self::Subheader => Token::Chain,
-        }
-    }
-}
diff --git a/src/parse/postprocess.rs b/src/parse/postprocess.rs
new file mode 100644
index 00000000..ad4a9057
--- /dev/null
+++ b/src/parse/postprocess.rs
@@ -0,0 +1,217 @@
+//! Post-processing of strings and raw blocks.
+
+use super::is_newline_char;
+use crate::syntax::{Ident, Raw};
+
+/// Resolves all escape sequences in a string.
+pub fn unescape_string(string: &str) -> String {
+    let mut iter = string.chars().peekable();
+    let mut out = String::with_capacity(string.len());
+
+    while let Some(c) = iter.next() {
+        if c != '\\' {
+            out.push(c);
+            continue;
+        }
+
+        match iter.next() {
+            Some('\\') => out.push('\\'),
+            Some('"') => out.push('"'),
+
+            Some('n') => out.push('\n'),
+            Some('t') => out.push('\t'),
+            Some('u') if iter.peek() == Some(&'{') => {
+                iter.next();
+
+                // TODO: Feedback if closing brace is missing.
+                let mut sequence = String::new();
+                let terminated = loop {
+                    match iter.peek() {
+                        Some('}') => {
+                            iter.next();
+                            break true;
+                        }
+                        Some(&c) if c.is_ascii_hexdigit() => {
+                            iter.next();
+                            sequence.push(c);
+                        }
+                        _ => break false,
+                    }
+                };
+
+                if let Some(c) = hex_to_char(&sequence) {
+                    out.push(c);
+                } else {
+                    // TODO: Feedback that escape sequence is wrong.
+                    out.push_str("\\u{");
+                    out.push_str(&sequence);
+                    if terminated {
+                        out.push('}');
+                    }
+                }
+            }
+
+            other => {
+                out.push('\\');
+                out.extend(other);
+            }
+        }
+    }
+
+    out
+}
+
+/// Resolves the language tag and trims the raw text.
+///
+/// Returns:
+/// - The language tag
+/// - The raw lines
+/// - Whether at least one newline was present in the untrimmed text.
+pub fn process_raw(raw: &str) -> Raw {
+    let (lang, inner) = split_after_lang_tag(raw);
+    let (lines, had_newline) = trim_and_split_raw(inner);
+    Raw { lang, lines, inline: !had_newline }
+}
+
+/// Parse the lang tag and return it alongside the remaining inner raw text.
+fn split_after_lang_tag(raw: &str) -> (Option<Ident>, &str) {
+    let mut lang = String::new();
+
+    let mut inner = raw;
+    let mut iter = raw.chars();
+
+    while let Some(c) = iter.next() {
+        if c == '`' || c.is_whitespace() || is_newline_char(c) {
+            break;
+        }
+
+        inner = iter.as_str();
+        lang.push(c);
+    }
+
+    (Ident::new(lang), inner)
+}
+
+/// Trims raw text and splits it into lines.
+///
+/// Returns whether at least one newline was contained in `raw`.
+fn trim_and_split_raw(raw: &str) -> (Vec<String>, bool) {
+    // Trims one whitespace at end and start.
+    let raw = raw.strip_prefix(' ').unwrap_or(raw);
+    let raw = raw.strip_suffix(' ').unwrap_or(raw);
+
+    let mut lines = split_lines(raw);
+    let had_newline = lines.len() > 1;
+    let is_whitespace = |line: &String| line.chars().all(char::is_whitespace);
+
+    // Trims a sequence of whitespace followed by a newline at the start.
+    if lines.first().map(is_whitespace).unwrap_or(false) {
+        lines.remove(0);
+    }
+
+    // Trims a newline followed by a sequence of whitespace at the end.
+    if lines.last().map(is_whitespace).unwrap_or(false) {
+        lines.pop();
+    }
+
+    (lines, had_newline)
+}
+
+/// Splits a string into a vector of lines (respecting Unicode & Windows line breaks).
+pub fn split_lines(text: &str) -> Vec<String> {
+    let mut iter = text.chars().peekable();
+    let mut line = String::new();
+    let mut lines = Vec::new();
+
+    while let Some(c) = iter.next() {
+        if is_newline_char(c) {
+            if c == '\r' && iter.peek() == Some(&'\n') {
+                iter.next();
+            }
+
+            lines.push(std::mem::take(&mut line));
+        } else {
+            line.push(c);
+        }
+    }
+
+    lines.push(line);
+    lines
+}
+
+/// Converts a hexademical sequence (without braces or "\u") into a character.
+pub fn hex_to_char(sequence: &str) -> Option<char> {
+    u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32)
+}
+
+#[cfg(test)]
+#[rustfmt::skip]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_unescape_strings() {
+        fn test(string: &str, expected: &str) {
+            assert_eq!(unescape_string(string), expected.to_string());
+        }
+
+        test(r#"hello world"#,  "hello world");
+        test(r#"hello\nworld"#, "hello\nworld");
+        test(r#"a\"bc"#,        "a\"bc");
+        test(r#"a\u{2603}bc"#,  "a☃bc");
+        test(r#"a\u{26c3bg"#,   "a𦰻g");
+        test(r#"av\u{6797"#,    "av林");
+        test(r#"a\\"#,          "a\\");
+        test(r#"a\\\nbc"#,      "a\\\nbc");
+        test(r#"a\tbc"#,        "a\tbc");
+        test(r"🌎",             "🌎");
+        test(r"🌎\",            r"🌎\");
+        test(r"\🌎",            r"\🌎");
+    }
+
+    #[test]
+    fn test_split_after_lang_tag() {
+        fn test(raw: &str, lang: Option<&str>, inner: &str) {
+            let (found_lang, found_inner) = split_after_lang_tag(raw);
+            assert_eq!(found_lang.as_ref().map(|id| id.as_str()), lang);
+            assert_eq!(found_inner, inner);
+        }
+
+        test("typst it!",   Some("typst"), " it!");
+        test("typst\n it!", Some("typst"), "\n it!");
+        test("typst\n it!", Some("typst"), "\n it!");
+        test("abc`",        Some("abc"),   "`");
+        test(" hi",         None,          " hi");
+        test("`",           None,          "`");
+    }
+
+    #[test]
+    fn test_trim_raw() {
+        fn test(raw: &str, expected: Vec<&str>) {
+            assert_eq!(trim_and_split_raw(raw).0, expected);
+        }
+
+        test(" hi",          vec!["hi"]);
+        test("  hi",         vec![" hi"]);
+        test("\nhi",         vec!["hi"]);
+        test("    \n hi",    vec![" hi"]);
+        test("hi ",          vec!["hi"]);
+        test("hi  ",         vec!["hi "]);
+        test("hi\n",         vec!["hi"]);
+        test("hi \n   ",     vec!["hi "]);
+        test("  \n hi \n  ", vec![" hi "]);
+    }
+
+    #[test]
+    fn test_split_lines() {
+        fn test(raw: &str, expected: Vec<&str>) {
+            assert_eq!(split_lines(raw), expected);
+        }
+
+        test("raw\ntext",  vec!["raw", "text"]);
+        test("a\r\nb",     vec!["a", "b"]);
+        test("a\n\nb",     vec!["a", "", "b"]);
+        test("a\r\x0Bb",   vec!["a", "", "b"]);
+        test("a\r\n\r\nb", vec!["a", "", "b"]);
+    }
+}
diff --git a/src/parse/tests.rs b/src/parse/tests.rs
index 8ddf013d..a753378e 100644
--- a/src/parse/tests.rs
+++ b/src/parse/tests.rs
@@ -1,3 +1,5 @@
+//! Parser tests.
+
 #![allow(non_snake_case)]
 
 use std::fmt::Debug;
diff --git a/src/parse/tokenizer.rs b/src/parse/tokenizer.rs
deleted file mode 100644
index 720bec43..00000000
--- a/src/parse/tokenizer.rs
+++ /dev/null
@@ -1,606 +0,0 @@
-//! Tokenization.
-
-use std::iter::Peekable;
-use std::str::Chars;
-use unicode_xid::UnicodeXID;
-
-use crate::length::Length;
-use crate::syntax::{Pos, Span, SpanWith, Spanned, Token};
-
-use Token::*;
-use TokenMode::*;
-
-/// An iterator over the tokens of a string of source code.
-#[derive(Debug)]
-pub struct Tokens<'s> {
-    src: &'s str,
-    iter: Peekable<Chars<'s>>,
-    mode: TokenMode,
-    stack: Vec<TokenMode>,
-    index: usize,
-}
-
-/// Whether to tokenize in header mode which yields expression, comma and
-/// similar tokens or in body mode which yields text and star, underscore,
-/// backtick tokens.
-#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
-pub enum TokenMode {
-    Header,
-    Body,
-}
-
-impl<'s> Tokens<'s> {
-    /// Create a new token iterator with the given mode.
-    pub fn new(src: &'s str, mode: TokenMode) -> Self {
-        Self {
-            src,
-            iter: src.chars().peekable(),
-            mode,
-            stack: vec![],
-            index: 0,
-        }
-    }
-
-    /// Change the token mode and push the old one on a stack.
-    pub fn push_mode(&mut self, mode: TokenMode) {
-        self.stack.push(self.mode);
-        self.mode = mode;
-    }
-
-    /// Pop the old token mode from the stack. This panics if there is no mode
-    /// on the stack.
-    pub fn pop_mode(&mut self) {
-        self.mode = self.stack.pop().expect("no pushed mode");
-    }
-
-    /// The position in the string at which the last token ends and next token
-    /// will start.
-    pub fn pos(&self) -> Pos {
-        self.index.into()
-    }
-}
-
-impl<'s> Iterator for Tokens<'s> {
-    type Item = Spanned<Token<'s>>;
-
-    /// Parse the next token in the source code.
-    fn next(&mut self) -> Option<Self::Item> {
-        let start = self.pos();
-        let first = self.eat()?;
-
-        let token = match first {
-            // Comments.
-            '/' if self.peek() == Some('/') => self.read_line_comment(),
-            '/' if self.peek() == Some('*') => self.read_block_comment(),
-            '*' if self.peek() == Some('/') => {
-                self.eat();
-                Invalid("*/")
-            }
-
-            // Whitespace.
-            c if c.is_whitespace() => self.read_whitespace(c),
-
-            // Functions and blocks.
-            '[' => LeftBracket,
-            ']' => RightBracket,
-            '{' => LeftBrace,
-            '}' => RightBrace,
-
-            // Syntactic elements in function headers.
-            '(' if self.mode == Header => LeftParen,
-            ')' if self.mode == Header => RightParen,
-            ':' if self.mode == Header => Colon,
-            ',' if self.mode == Header => Comma,
-            '=' if self.mode == Header => Equals,
-            '>' if self.mode == Header && self.peek() == Some('>') => self.read_chain(),
-
-            // Expression operators.
-            '+' if self.mode == Header => Plus,
-            '-' if self.mode == Header => Hyphen,
-            '/' if self.mode == Header => Slash,
-
-            // Star serves a double purpose as a style modifier
-            // and a expression operator in the header.
-            '*' => Star,
-
-            // A hex expression.
-            '#' if self.mode == Header => self.read_hex(),
-
-            // String values.
-            '"' if self.mode == Header => self.read_string(),
-
-            // Style toggles.
-            '_' if self.mode == Body => Underscore,
-            '`' if self.mode == Body => self.read_raw(),
-
-            // Sections.
-            '#' if self.mode == Body => Hashtag,
-
-            // Non-breaking spaces.
-            '~' if self.mode == Body => Text("\u{00A0}"),
-
-            // An escaped thing.
-            '\\' if self.mode == Body => self.read_escaped(),
-
-            // Expressions or just strings.
-            c => {
-                let body = self.mode == Body;
-
-                let start_offset = -(c.len_utf8() as isize);
-                let mut last_was_e = false;
-
-                let (text, _) = self.read_string_until(false, start_offset, 0, |n| {
-                    let val = match n {
-                        c if c.is_whitespace() => true,
-                        '[' | ']' | '{' | '}' | '/' | '*' => true,
-                        '\\' | '_' | '`' | '#' | '~' if body => true,
-                        ':' | '=' | ',' | '"' | '(' | ')' if !body => true,
-                        '+' | '-' if !body && !last_was_e => true,
-                        _ => false,
-                    };
-
-                    last_was_e = n == 'e' || n == 'E';
-                    val
-                });
-
-                if self.mode == Header {
-                    self.read_expr(text)
-                } else {
-                    Text(text)
-                }
-            }
-        };
-
-        let end = self.pos();
-
-        Some(token.span_with(Span::new(start, end)))
-    }
-}
-
-impl<'s> Tokens<'s> {
-    fn read_line_comment(&mut self) -> Token<'s> {
-        self.eat();
-        LineComment(self.read_string_until(false, 0, 0, is_newline_char).0)
-    }
-
-    fn read_block_comment(&mut self) -> Token<'s> {
-        enum Last {
-            Slash,
-            Star,
-            Other,
-        }
-
-        let mut depth = 0;
-        let mut last = Last::Other;
-
-        // Find the first `*/` that does not correspond to a nested `/*`.
-        // Remove the last two bytes to obtain the raw inner text without `*/`.
-        self.eat();
-        let (content, _) = self.read_string_until(true, 0, -2, |c| {
-            match c {
-                '/' => match last {
-                    Last::Star if depth == 0 => return true,
-                    Last::Star => depth -= 1,
-                    _ => last = Last::Slash,
-                },
-                '*' => match last {
-                    Last::Slash => depth += 1,
-                    _ => last = Last::Star,
-                },
-                _ => last = Last::Other,
-            }
-
-            false
-        });
-
-        BlockComment(content)
-    }
-
-    fn read_chain(&mut self) -> Token<'s> {
-        assert!(self.eat() == Some('>'));
-        Chain
-    }
-
-    fn read_whitespace(&mut self, mut c: char) -> Token<'s> {
-        let mut newlines = 0;
-
-        loop {
-            if is_newline_char(c) {
-                if c == '\r' && self.peek() == Some('\n') {
-                    self.eat();
-                }
-
-                newlines += 1;
-            }
-
-            match self.peek() {
-                Some(n) if n.is_whitespace() => {
-                    self.eat();
-                    c = n;
-                }
-                _ => break,
-            }
-        }
-
-        Space(newlines)
-    }
-
-    fn read_string(&mut self) -> Token<'s> {
-        let (string, terminated) = self.read_until_unescaped('"');
-        Str { string, terminated }
-    }
-
-    fn read_raw(&mut self) -> Token<'s> {
-        let mut backticks = 1;
-        while self.peek() == Some('`') {
-            self.eat();
-            backticks += 1;
-        }
-
-        let start = self.index;
-
-        let mut found = 0;
-        while found < backticks {
-            match self.eat() {
-                Some('`') => found += 1,
-                Some(_) => found = 0,
-                None => break,
-            }
-        }
-
-        let terminated = found == backticks;
-        let end = self.index - if terminated { found } else { 0 };
-
-        Raw {
-            raw: &self.src[start .. end],
-            backticks,
-            terminated,
-        }
-    }
-
-    fn read_until_unescaped(&mut self, end: char) -> (&'s str, bool) {
-        let mut escaped = false;
-        self.read_string_until(true, 0, -1, |c| {
-            match c {
-                c if c == end && !escaped => return true,
-                '\\' => escaped = !escaped,
-                _ => escaped = false,
-            }
-
-            false
-        })
-    }
-
-    fn read_escaped(&mut self) -> Token<'s> {
-        fn is_escapable(c: char) -> bool {
-            match c {
-                '[' | ']' | '\\' | '/' | '*' | '_' | '`' | '"' | '#' | '~' => true,
-                _ => false,
-            }
-        }
-
-        match self.peek() {
-            Some('u') => {
-                self.eat();
-                if self.peek() == Some('{') {
-                    self.eat();
-                    let (sequence, _) =
-                        self.read_string_until(false, 0, 0, |c| !c.is_ascii_hexdigit());
-
-                    let terminated = self.peek() == Some('}');
-                    if terminated {
-                        self.eat();
-                    }
-
-                    UnicodeEscape { sequence, terminated }
-                } else {
-                    Text("\\u")
-                }
-            }
-            Some(c) if is_escapable(c) => {
-                let index = self.index;
-                self.eat();
-                Text(&self.src[index .. index + c.len_utf8()])
-            }
-            Some(c) if c.is_whitespace() => Backslash,
-            Some(_) => Text("\\"),
-            None => Backslash,
-        }
-    }
-
-    fn read_hex(&mut self) -> Token<'s> {
-        // This will parse more than the permissable 0-9, a-f, A-F character
-        // ranges to provide nicer error messages later.
-        Hex(self.read_string_until(false, 0, 0, |n| !n.is_ascii_alphanumeric()).0)
-    }
-
-    fn read_expr(&mut self, text: &'s str) -> Token<'s> {
-        if let Ok(b) = text.parse::<bool>() {
-            Bool(b)
-        } else if let Ok(num) = text.parse::<f64>() {
-            Number(num)
-        } else if let Some(num) = parse_percentage(text) {
-            Number(num / 100.0)
-        } else if let Ok(length) = text.parse::<Length>() {
-            Length(length)
-        } else if is_identifier(text) {
-            Ident(text)
-        } else {
-            Invalid(text)
-        }
-    }
-
-    /// Will read the input stream until `f` evaluates to `true`. When
-    /// `eat_match` is true, the token for which `f` was true is consumed.
-    /// Returns the string from the index where this was called offset by
-    /// `offset_start` to the end offset by `offset_end`. The end is before or
-    /// after the match depending on `eat_match`.
-    fn read_string_until(
-        &mut self,
-        eat_match: bool,
-        offset_start: isize,
-        offset_end: isize,
-        mut f: impl FnMut(char) -> bool,
-    ) -> (&'s str, bool) {
-        let start = ((self.index as isize) + offset_start) as usize;
-        let mut matched = false;
-
-        while let Some(c) = self.peek() {
-            if f(c) {
-                matched = true;
-                if eat_match {
-                    self.eat();
-                }
-                break;
-            }
-
-            self.eat();
-        }
-
-        let mut end = self.index;
-        if matched {
-            end = ((end as isize) + offset_end) as usize;
-        }
-
-        (&self.src[start .. end], matched)
-    }
-
-    fn eat(&mut self) -> Option<char> {
-        let c = self.iter.next()?;
-        self.index += c.len_utf8();
-        Some(c)
-    }
-
-    fn peek(&mut self) -> Option<char> {
-        self.iter.peek().copied()
-    }
-}
-
-fn parse_percentage(text: &str) -> Option<f64> {
-    if text.ends_with('%') {
-        text[.. text.len() - 1].parse::<f64>().ok()
-    } else {
-        None
-    }
-}
-
-/// Whether this character denotes a newline.
-pub fn is_newline_char(character: char) -> bool {
-    match character {
-        // Line Feed, Vertical Tab, Form Feed, Carriage Return.
-        '\x0A' ..= '\x0D' => true,
-        // Next Line, Line Separator, Paragraph Separator.
-        '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
-        _ => false,
-    }
-}
-
-/// Whether this word is a valid identifier.
-pub fn is_identifier(string: &str) -> bool {
-    fn is_extra_allowed(c: char) -> bool {
-        c == '.' || c == '-' || c == '_'
-    }
-
-    let mut chars = string.chars();
-    match chars.next() {
-        Some(c) if UnicodeXID::is_xid_start(c) || is_extra_allowed(c) => {}
-        _ => return false,
-    }
-
-    for c in chars {
-        match c {
-            c if UnicodeXID::is_xid_continue(c) || is_extra_allowed(c) => {}
-            _ => return false,
-        }
-    }
-
-    true
-}
-
-#[cfg(test)]
-#[allow(non_snake_case)]
-mod tests {
-    use super::*;
-    use crate::length::Length;
-    use crate::parse::tests::{check, s};
-
-    use Token::{
-        BlockComment as BC, Bool, Chain, Hex, Hyphen as Min, Ident as Id,
-        LeftBrace as LB, LeftBracket as L, LeftParen as LP, Length as Len,
-        LineComment as LC, Number as Num, Plus, RightBrace as RB, RightBracket as R,
-        RightParen as RP, Slash, Space as S, Star, Text as T,
-    };
-
-    fn Str(string: &str, terminated: bool) -> Token {
-        Token::Str { string, terminated }
-    }
-    fn Raw(raw: &str, backticks: usize, terminated: bool) -> Token {
-        Token::Raw { raw, backticks, terminated }
-    }
-    fn UE(sequence: &str, terminated: bool) -> Token {
-        Token::UnicodeEscape { sequence, terminated }
-    }
-
-    macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} }
-    macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} }
-    macro_rules! test {
-        (@spans=$spans:expr, $mode:expr, $src:expr => $($token:expr),*) => {
-            let exp = vec![$(Into::<Spanned<Token>>::into($token)),*];
-            let found = Tokens::new($src, $mode).collect::<Vec<_>>();
-            check($src, exp, found, $spans);
-        }
-    }
-
-    #[test]
-    fn tokenize_whitespace() {
-        t!(Body, ""             => );
-        t!(Body, " "            => S(0));
-        t!(Body, "    "         => S(0));
-        t!(Body, "\t"           => S(0));
-        t!(Body, "  \t"         => S(0));
-        t!(Body, "\n"           => S(1));
-        t!(Body, "\n "          => S(1));
-        t!(Body, "  \n"         => S(1));
-        t!(Body, "  \n   "      => S(1));
-        t!(Body, "\r\n"         => S(1));
-        t!(Body, "  \n\t \n  "  => S(2));
-        t!(Body, "\n\r"         => S(2));
-        t!(Body, " \r\r\n \x0D" => S(3));
-        t!(Body, "a~b"          => T("a"), T("\u{00A0}"), T("b"));
-    }
-
-    #[test]
-    fn tokenize_comments() {
-        t!(Body, "a // bc\n "        => T("a"), S(0), LC(" bc"),  S(1));
-        t!(Body, "a //a//b\n "       => T("a"), S(0), LC("a//b"), S(1));
-        t!(Body, "a //a//b\r\n"      => T("a"), S(0), LC("a//b"), S(1));
-        t!(Body, "a //a//b\n\nhello" => T("a"), S(0), LC("a//b"), S(2), T("hello"));
-        t!(Body, "/**/"              => BC(""));
-        t!(Body, "_/*_/*a*/*/"       => Underscore, BC("_/*a*/"));
-        t!(Body, "/*/*/"             => BC("/*/"));
-        t!(Body, "abc*/"             => T("abc"), Invalid("*/"));
-        t!(Body, "/***/"             => BC("*"));
-        t!(Body, "/**\\****/*/*/"    => BC("*\\***"), Invalid("*/"), Invalid("*/"));
-        t!(Body, "/*abc"             => BC("abc"));
-    }
-
-    #[test]
-    fn tokenize_body_only_tokens() {
-        t!(Body, "_*"            => Underscore, Star);
-        t!(Body, "***"           => Star, Star, Star);
-        t!(Body, "[func]*bold*"  => L, T("func"), R, Star, T("bold"), Star);
-        t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there"));
-        t!(Body, "# hi"          => Hashtag, S(0), T("hi"));
-        t!(Body, "#()"           => Hashtag, T("()"));
-        t!(Header, "_`"          => Invalid("_`"));
-    }
-
-    #[test]
-    fn test_tokenize_raw() {
-        // Basics.
-        t!(Body, "`raw`"    => Raw("raw", 1, true));
-        t!(Body, "`[func]`" => Raw("[func]", 1, true));
-        t!(Body, "`]"       => Raw("]", 1, false));
-        t!(Body, r"`\`` "   => Raw(r"\", 1, true), Raw(" ", 1, false));
-
-        // Language tag.
-        t!(Body, "``` hi```"     => Raw(" hi", 3, true));
-        t!(Body, "```rust hi```" => Raw("rust hi", 3, true));
-        t!(Body, r"``` hi\````"  => Raw(r" hi\", 3, true), Raw("", 1, false));
-        t!(Body, "``` not `y`e`t finished```" => Raw(" not `y`e`t finished", 3, true));
-        t!(Body, "```js   \r\n  document.write(\"go\")`"
-            => Raw("js   \r\n  document.write(\"go\")`", 3, false));
-
-        // More backticks.
-        t!(Body, "`````` ``````hi"  => Raw(" ", 6, true), T("hi"));
-        t!(Body, "````\n```js\nalert()\n```\n````" => Raw("\n```js\nalert()\n```\n", 4, true));
-    }
-
-    #[test]
-    fn tokenize_header_only_tokens() {
-        t!(Body, "a: b"                => T("a:"), S(0), T("b"));
-        t!(Body, "c=d, "               => T("c=d,"), S(0));
-        t!(Header, "(){}:=,"           => LP, RP, LB, RB, Colon, Equals, Comma);
-        t!(Header, "a:b"               => Id("a"), Colon, Id("b"));
-        t!(Header, "#6ae6dd"           => Hex("6ae6dd"));
-        t!(Header, "#8A083c"           => Hex("8A083c"));
-        t!(Header, "a: true, x=1"      => Id("a"), Colon, S(0), Bool(true), Comma, S(0),
-                                          Id("x"), Equals, Num(1.0));
-        t!(Header, "=3.14"             => Equals, Num(3.14));
-        t!(Header, "12.3e5"            => Num(12.3e5));
-        t!(Header, "120%"              => Num(1.2));
-        t!(Header, "12e4%"             => Num(1200.0));
-        t!(Header, "__main__"          => Id("__main__"));
-        t!(Header, ">main"             => Invalid(">main"));
-        t!(Header, ".func.box"         => Id(".func.box"));
-        t!(Header, "arg, _b, _1"       => Id("arg"), Comma, S(0), Id("_b"), Comma, S(0), Id("_1"));
-        t!(Header, "f: arg >> g"       => Id("f"), Colon, S(0), Id("arg"), S(0), Chain, S(0), Id("g"));
-        t!(Header, "12_pt, 12pt"       => Invalid("12_pt"), Comma, S(0), Len(Length::pt(12.0)));
-        t!(Header, "1e5in"             => Len(Length::inches(100000.0)));
-        t!(Header, "2.3cm"             => Len(Length::cm(2.3)));
-        t!(Header, "12e-3in"           => Len(Length::inches(12e-3)));
-        t!(Header, "6.1cm + 4pt,a=1*2" => Len(Length::cm(6.1)), S(0), Plus, S(0), Len(Length::pt(4.0)),
-                                          Comma, Id("a"), Equals, Num(1.0), Star, Num(2.0));
-        t!(Header, "(5 - 1) / 2.1"     => LP, Num(5.0), S(0), Min, S(0), Num(1.0), RP,
-                                          S(0), Slash, S(0), Num(2.1));
-        t!(Header, "-1"                => Min, Num(1.0));
-        t!(Header, "--1"               => Min, Min, Num(1.0));
-        t!(Header, "- 1"               => Min, S(0), Num(1.0));
-        t!(Header, "02.4mm"            => Len(Length::mm(2.4)));
-        t!(Header, "2.4.cm"            => Invalid("2.4.cm"));
-        t!(Header, "(1,2)"             => LP, Num(1.0), Comma, Num(2.0), RP);
-        t!(Header, "{abc}"             => LB, Id("abc"), RB);
-        t!(Header, "🌓, 🌍,"          => Invalid("🌓"), Comma, S(0), Invalid("🌍"), Comma);
-    }
-
-    #[test]
-    fn tokenize_strings() {
-        t!(Body, "a \"hi\" string"           => T("a"), S(0), T("\"hi\""), S(0), T("string"));
-        t!(Header, "\"hello"                 => Str("hello", false));
-        t!(Header, "\"hello world\""         => Str("hello world", true));
-        t!(Header, "\"hello\nworld\""        => Str("hello\nworld", true));
-        t!(Header, r#"1"hello\nworld"false"# => Num(1.0), Str("hello\\nworld", true), Bool(false));
-        t!(Header, r#""a\"bc""#              => Str(r#"a\"bc"#, true));
-        t!(Header, r#""a\\"bc""#             => Str(r#"a\\"#, true), Id("bc"), Str("", false));
-        t!(Header, r#""a\tbc"#               => Str("a\\tbc", false));
-        t!(Header, "\"🌎\""                  => Str("🌎", true));
-    }
-
-    #[test]
-    fn tokenize_escaped_symbols() {
-        t!(Body, r"\\"       => T(r"\"));
-        t!(Body, r"\["       => T("["));
-        t!(Body, r"\]"       => T("]"));
-        t!(Body, r"\*"       => T("*"));
-        t!(Body, r"\_"       => T("_"));
-        t!(Body, r"\`"       => T("`"));
-        t!(Body, r"\/"       => T("/"));
-        t!(Body, r"\u{2603}" => UE("2603", true));
-        t!(Body, r"\u{26A4"  => UE("26A4", false));
-        t!(Body, r#"\""#     => T("\""));
-    }
-
-    #[test]
-    fn tokenize_unescapable_symbols() {
-        t!(Body, r"\a"      => T("\\"), T("a"));
-        t!(Body, r"\:"      => T(r"\"), T(":"));
-        t!(Body, r"\="      => T(r"\"), T("="));
-        t!(Body, r"\u{2GA4" => UE("2", false), T("GA4"));
-        t!(Body, r"\u{ "    => UE("", false), Space(0));
-        t!(Body, r"\u"      => T(r"\u"));
-        t!(Header, r"\\\\"  => Invalid(r"\\\\"));
-        t!(Header, r"\a"    => Invalid(r"\a"));
-        t!(Header, r"\:"    => Invalid(r"\"), Colon);
-        t!(Header, r"\="    => Invalid(r"\"), Equals);
-        t!(Header, r"\,"    => Invalid(r"\"), Comma);
-    }
-
-    #[test]
-    fn tokenize_with_spans() {
-        ts!(Body, "hello"        => s(0, 5, T("hello")));
-        ts!(Body, "ab\r\nc"      => s(0, 2, T("ab")), s(2, 4, S(1)), s(4, 5, T("c")));
-        ts!(Body, "// ab\r\n\nf" => s(0, 5, LC(" ab")), s(5, 8, S(2)), s(8, 9, T("f")));
-        ts!(Body, "/*b*/_"       => s(0, 5, BC("b")), s(5, 6, Underscore));
-        ts!(Header, "a=10"       => s(0, 1, Id("a")), s(1, 2, Equals), s(2, 4, Num(10.0)));
-    }
-}
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
new file mode 100644
index 00000000..720bec43
--- /dev/null
+++ b/src/parse/tokens.rs
@@ -0,0 +1,606 @@
+//! Tokenization.
+
+use std::iter::Peekable;
+use std::str::Chars;
+use unicode_xid::UnicodeXID;
+
+use crate::length::Length;
+use crate::syntax::{Pos, Span, SpanWith, Spanned, Token};
+
+use Token::*;
+use TokenMode::*;
+
+/// An iterator over the tokens of a string of source code.
+#[derive(Debug)]
+pub struct Tokens<'s> {
+    src: &'s str,
+    iter: Peekable<Chars<'s>>,
+    mode: TokenMode,
+    stack: Vec<TokenMode>,
+    index: usize,
+}
+
+/// Whether to tokenize in header mode which yields expression, comma and
+/// similar tokens or in body mode which yields text and star, underscore,
+/// backtick tokens.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub enum TokenMode {
+    Header,
+    Body,
+}
+
+impl<'s> Tokens<'s> {
+    /// Create a new token iterator with the given mode.
+    pub fn new(src: &'s str, mode: TokenMode) -> Self {
+        Self {
+            src,
+            iter: src.chars().peekable(),
+            mode,
+            stack: vec![],
+            index: 0,
+        }
+    }
+
+    /// Change the token mode and push the old one on a stack.
+    pub fn push_mode(&mut self, mode: TokenMode) {
+        self.stack.push(self.mode);
+        self.mode = mode;
+    }
+
+    /// Pop the old token mode from the stack. This panics if there is no mode
+    /// on the stack.
+    pub fn pop_mode(&mut self) {
+        self.mode = self.stack.pop().expect("no pushed mode");
+    }
+
+    /// The position in the string at which the last token ends and next token
+    /// will start.
+    pub fn pos(&self) -> Pos {
+        self.index.into()
+    }
+}
+
+impl<'s> Iterator for Tokens<'s> {
+    type Item = Spanned<Token<'s>>;
+
+    /// Parse the next token in the source code.
+    fn next(&mut self) -> Option<Self::Item> {
+        let start = self.pos();
+        let first = self.eat()?;
+
+        let token = match first {
+            // Comments.
+            '/' if self.peek() == Some('/') => self.read_line_comment(),
+            '/' if self.peek() == Some('*') => self.read_block_comment(),
+            '*' if self.peek() == Some('/') => {
+                self.eat();
+                Invalid("*/")
+            }
+
+            // Whitespace.
+            c if c.is_whitespace() => self.read_whitespace(c),
+
+            // Functions and blocks.
+            '[' => LeftBracket,
+            ']' => RightBracket,
+            '{' => LeftBrace,
+            '}' => RightBrace,
+
+            // Syntactic elements in function headers.
+            '(' if self.mode == Header => LeftParen,
+            ')' if self.mode == Header => RightParen,
+            ':' if self.mode == Header => Colon,
+            ',' if self.mode == Header => Comma,
+            '=' if self.mode == Header => Equals,
+            '>' if self.mode == Header && self.peek() == Some('>') => self.read_chain(),
+
+            // Expression operators.
+            '+' if self.mode == Header => Plus,
+            '-' if self.mode == Header => Hyphen,
+            '/' if self.mode == Header => Slash,
+
+            // Star serves a double purpose as a style modifier
+            // and a expression operator in the header.
+            '*' => Star,
+
+            // A hex expression.
+            '#' if self.mode == Header => self.read_hex(),
+
+            // String values.
+            '"' if self.mode == Header => self.read_string(),
+
+            // Style toggles.
+            '_' if self.mode == Body => Underscore,
+            '`' if self.mode == Body => self.read_raw(),
+
+            // Sections.
+            '#' if self.mode == Body => Hashtag,
+
+            // Non-breaking spaces.
+            '~' if self.mode == Body => Text("\u{00A0}"),
+
+            // An escaped thing.
+            '\\' if self.mode == Body => self.read_escaped(),
+
+            // Expressions or just strings.
+            c => {
+                let body = self.mode == Body;
+
+                let start_offset = -(c.len_utf8() as isize);
+                let mut last_was_e = false;
+
+                let (text, _) = self.read_string_until(false, start_offset, 0, |n| {
+                    let val = match n {
+                        c if c.is_whitespace() => true,
+                        '[' | ']' | '{' | '}' | '/' | '*' => true,
+                        '\\' | '_' | '`' | '#' | '~' if body => true,
+                        ':' | '=' | ',' | '"' | '(' | ')' if !body => true,
+                        '+' | '-' if !body && !last_was_e => true,
+                        _ => false,
+                    };
+
+                    last_was_e = n == 'e' || n == 'E';
+                    val
+                });
+
+                if self.mode == Header {
+                    self.read_expr(text)
+                } else {
+                    Text(text)
+                }
+            }
+        };
+
+        let end = self.pos();
+
+        Some(token.span_with(Span::new(start, end)))
+    }
+}
+
+impl<'s> Tokens<'s> {
+    fn read_line_comment(&mut self) -> Token<'s> {
+        self.eat();
+        LineComment(self.read_string_until(false, 0, 0, is_newline_char).0)
+    }
+
+    fn read_block_comment(&mut self) -> Token<'s> {
+        enum Last {
+            Slash,
+            Star,
+            Other,
+        }
+
+        let mut depth = 0;
+        let mut last = Last::Other;
+
+        // Find the first `*/` that does not correspond to a nested `/*`.
+        // Remove the last two bytes to obtain the raw inner text without `*/`.
+        self.eat();
+        let (content, _) = self.read_string_until(true, 0, -2, |c| {
+            match c {
+                '/' => match last {
+                    Last::Star if depth == 0 => return true,
+                    Last::Star => depth -= 1,
+                    _ => last = Last::Slash,
+                },
+                '*' => match last {
+                    Last::Slash => depth += 1,
+                    _ => last = Last::Star,
+                },
+                _ => last = Last::Other,
+            }
+
+            false
+        });
+
+        BlockComment(content)
+    }
+
+    fn read_chain(&mut self) -> Token<'s> {
+        assert!(self.eat() == Some('>'));
+        Chain
+    }
+
+    fn read_whitespace(&mut self, mut c: char) -> Token<'s> {
+        let mut newlines = 0;
+
+        loop {
+            if is_newline_char(c) {
+                if c == '\r' && self.peek() == Some('\n') {
+                    self.eat();
+                }
+
+                newlines += 1;
+            }
+
+            match self.peek() {
+                Some(n) if n.is_whitespace() => {
+                    self.eat();
+                    c = n;
+                }
+                _ => break,
+            }
+        }
+
+        Space(newlines)
+    }
+
+    fn read_string(&mut self) -> Token<'s> {
+        let (string, terminated) = self.read_until_unescaped('"');
+        Str { string, terminated }
+    }
+
+    fn read_raw(&mut self) -> Token<'s> {
+        let mut backticks = 1;
+        while self.peek() == Some('`') {
+            self.eat();
+            backticks += 1;
+        }
+
+        let start = self.index;
+
+        let mut found = 0;
+        while found < backticks {
+            match self.eat() {
+                Some('`') => found += 1,
+                Some(_) => found = 0,
+                None => break,
+            }
+        }
+
+        let terminated = found == backticks;
+        let end = self.index - if terminated { found } else { 0 };
+
+        Raw {
+            raw: &self.src[start .. end],
+            backticks,
+            terminated,
+        }
+    }
+
+    fn read_until_unescaped(&mut self, end: char) -> (&'s str, bool) {
+        let mut escaped = false;
+        self.read_string_until(true, 0, -1, |c| {
+            match c {
+                c if c == end && !escaped => return true,
+                '\\' => escaped = !escaped,
+                _ => escaped = false,
+            }
+
+            false
+        })
+    }
+
+    fn read_escaped(&mut self) -> Token<'s> {
+        fn is_escapable(c: char) -> bool {
+            match c {
+                '[' | ']' | '\\' | '/' | '*' | '_' | '`' | '"' | '#' | '~' => true,
+                _ => false,
+            }
+        }
+
+        match self.peek() {
+            Some('u') => {
+                self.eat();
+                if self.peek() == Some('{') {
+                    self.eat();
+                    let (sequence, _) =
+                        self.read_string_until(false, 0, 0, |c| !c.is_ascii_hexdigit());
+
+                    let terminated = self.peek() == Some('}');
+                    if terminated {
+                        self.eat();
+                    }
+
+                    UnicodeEscape { sequence, terminated }
+                } else {
+                    Text("\\u")
+                }
+            }
+            Some(c) if is_escapable(c) => {
+                let index = self.index;
+                self.eat();
+                Text(&self.src[index .. index + c.len_utf8()])
+            }
+            Some(c) if c.is_whitespace() => Backslash,
+            Some(_) => Text("\\"),
+            None => Backslash,
+        }
+    }
+
+    fn read_hex(&mut self) -> Token<'s> {
+        // This will parse more than the permissable 0-9, a-f, A-F character
+        // ranges to provide nicer error messages later.
+        Hex(self.read_string_until(false, 0, 0, |n| !n.is_ascii_alphanumeric()).0)
+    }
+
+    fn read_expr(&mut self, text: &'s str) -> Token<'s> {
+        if let Ok(b) = text.parse::<bool>() {
+            Bool(b)
+        } else if let Ok(num) = text.parse::<f64>() {
+            Number(num)
+        } else if let Some(num) = parse_percentage(text) {
+            Number(num / 100.0)
+        } else if let Ok(length) = text.parse::<Length>() {
+            Length(length)
+        } else if is_identifier(text) {
+            Ident(text)
+        } else {
+            Invalid(text)
+        }
+    }
+
+    /// Will read the input stream until `f` evaluates to `true`. When
+    /// `eat_match` is true, the token for which `f` was true is consumed.
+    /// Returns the string from the index where this was called offset by
+    /// `offset_start` to the end offset by `offset_end`. The end is before or
+    /// after the match depending on `eat_match`.
+    fn read_string_until(
+        &mut self,
+        eat_match: bool,
+        offset_start: isize,
+        offset_end: isize,
+        mut f: impl FnMut(char) -> bool,
+    ) -> (&'s str, bool) {
+        let start = ((self.index as isize) + offset_start) as usize;
+        let mut matched = false;
+
+        while let Some(c) = self.peek() {
+            if f(c) {
+                matched = true;
+                if eat_match {
+                    self.eat();
+                }
+                break;
+            }
+
+            self.eat();
+        }
+
+        let mut end = self.index;
+        if matched {
+            end = ((end as isize) + offset_end) as usize;
+        }
+
+        (&self.src[start .. end], matched)
+    }
+
+    fn eat(&mut self) -> Option<char> {
+        let c = self.iter.next()?;
+        self.index += c.len_utf8();
+        Some(c)
+    }
+
+    fn peek(&mut self) -> Option<char> {
+        self.iter.peek().copied()
+    }
+}
+
+fn parse_percentage(text: &str) -> Option<f64> {
+    if text.ends_with('%') {
+        text[.. text.len() - 1].parse::<f64>().ok()
+    } else {
+        None
+    }
+}
+
+/// Whether this character denotes a newline.
+pub fn is_newline_char(character: char) -> bool {
+    match character {
+        // Line Feed, Vertical Tab, Form Feed, Carriage Return.
+        '\x0A' ..= '\x0D' => true,
+        // Next Line, Line Separator, Paragraph Separator.
+        '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
+        _ => false,
+    }
+}
+
+/// Whether this word is a valid identifier.
+pub fn is_identifier(string: &str) -> bool {
+    fn is_extra_allowed(c: char) -> bool {
+        c == '.' || c == '-' || c == '_'
+    }
+
+    let mut chars = string.chars();
+    match chars.next() {
+        Some(c) if UnicodeXID::is_xid_start(c) || is_extra_allowed(c) => {}
+        _ => return false,
+    }
+
+    for c in chars {
+        match c {
+            c if UnicodeXID::is_xid_continue(c) || is_extra_allowed(c) => {}
+            _ => return false,
+        }
+    }
+
+    true
+}
+
+#[cfg(test)]
+#[allow(non_snake_case)]
+mod tests {
+    use super::*;
+    use crate::length::Length;
+    use crate::parse::tests::{check, s};
+
+    use Token::{
+        BlockComment as BC, Bool, Chain, Hex, Hyphen as Min, Ident as Id,
+        LeftBrace as LB, LeftBracket as L, LeftParen as LP, Length as Len,
+        LineComment as LC, Number as Num, Plus, RightBrace as RB, RightBracket as R,
+        RightParen as RP, Slash, Space as S, Star, Text as T,
+    };
+
+    fn Str(string: &str, terminated: bool) -> Token {
+        Token::Str { string, terminated }
+    }
+    fn Raw(raw: &str, backticks: usize, terminated: bool) -> Token {
+        Token::Raw { raw, backticks, terminated }
+    }
+    fn UE(sequence: &str, terminated: bool) -> Token {
+        Token::UnicodeEscape { sequence, terminated }
+    }
+
+    macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} }
+    macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} }
+    macro_rules! test {
+        (@spans=$spans:expr, $mode:expr, $src:expr => $($token:expr),*) => {
+            let exp = vec![$(Into::<Spanned<Token>>::into($token)),*];
+            let found = Tokens::new($src, $mode).collect::<Vec<_>>();
+            check($src, exp, found, $spans);
+        }
+    }
+
+    #[test]
+    fn tokenize_whitespace() {
+        t!(Body, ""             => );
+        t!(Body, " "            => S(0));
+        t!(Body, "    "         => S(0));
+        t!(Body, "\t"           => S(0));
+        t!(Body, "  \t"         => S(0));
+        t!(Body, "\n"           => S(1));
+        t!(Body, "\n "          => S(1));
+        t!(Body, "  \n"         => S(1));
+        t!(Body, "  \n   "      => S(1));
+        t!(Body, "\r\n"         => S(1));
+        t!(Body, "  \n\t \n  "  => S(2));
+        t!(Body, "\n\r"         => S(2));
+        t!(Body, " \r\r\n \x0D" => S(3));
+        t!(Body, "a~b"          => T("a"), T("\u{00A0}"), T("b"));
+    }
+
+    #[test]
+    fn tokenize_comments() {
+        t!(Body, "a // bc\n "        => T("a"), S(0), LC(" bc"),  S(1));
+        t!(Body, "a //a//b\n "       => T("a"), S(0), LC("a//b"), S(1));
+        t!(Body, "a //a//b\r\n"      => T("a"), S(0), LC("a//b"), S(1));
+        t!(Body, "a //a//b\n\nhello" => T("a"), S(0), LC("a//b"), S(2), T("hello"));
+        t!(Body, "/**/"              => BC(""));
+        t!(Body, "_/*_/*a*/*/"       => Underscore, BC("_/*a*/"));
+        t!(Body, "/*/*/"             => BC("/*/"));
+        t!(Body, "abc*/"             => T("abc"), Invalid("*/"));
+        t!(Body, "/***/"             => BC("*"));
+        t!(Body, "/**\\****/*/*/"    => BC("*\\***"), Invalid("*/"), Invalid("*/"));
+        t!(Body, "/*abc"             => BC("abc"));
+    }
+
+    #[test]
+    fn tokenize_body_only_tokens() {
+        t!(Body, "_*"            => Underscore, Star);
+        t!(Body, "***"           => Star, Star, Star);
+        t!(Body, "[func]*bold*"  => L, T("func"), R, Star, T("bold"), Star);
+        t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there"));
+        t!(Body, "# hi"          => Hashtag, S(0), T("hi"));
+        t!(Body, "#()"           => Hashtag, T("()"));
+        t!(Header, "_`"          => Invalid("_`"));
+    }
+
+    #[test]
+    fn test_tokenize_raw() {
+        // Basics.
+        t!(Body, "`raw`"    => Raw("raw", 1, true));
+        t!(Body, "`[func]`" => Raw("[func]", 1, true));
+        t!(Body, "`]"       => Raw("]", 1, false));
+        t!(Body, r"`\`` "   => Raw(r"\", 1, true), Raw(" ", 1, false));
+
+        // Language tag.
+        t!(Body, "``` hi```"     => Raw(" hi", 3, true));
+        t!(Body, "```rust hi```" => Raw("rust hi", 3, true));
+        t!(Body, r"``` hi\````"  => Raw(r" hi\", 3, true), Raw("", 1, false));
+        t!(Body, "``` not `y`e`t finished```" => Raw(" not `y`e`t finished", 3, true));
+        t!(Body, "```js   \r\n  document.write(\"go\")`"
+            => Raw("js   \r\n  document.write(\"go\")`", 3, false));
+
+        // More backticks.
+        t!(Body, "`````` ``````hi"  => Raw(" ", 6, true), T("hi"));
+        t!(Body, "````\n```js\nalert()\n```\n````" => Raw("\n```js\nalert()\n```\n", 4, true));
+    }
+
+    #[test]
+    fn tokenize_header_only_tokens() {
+        t!(Body, "a: b"                => T("a:"), S(0), T("b"));
+        t!(Body, "c=d, "               => T("c=d,"), S(0));
+        t!(Header, "(){}:=,"           => LP, RP, LB, RB, Colon, Equals, Comma);
+        t!(Header, "a:b"               => Id("a"), Colon, Id("b"));
+        t!(Header, "#6ae6dd"           => Hex("6ae6dd"));
+        t!(Header, "#8A083c"           => Hex("8A083c"));
+        t!(Header, "a: true, x=1"      => Id("a"), Colon, S(0), Bool(true), Comma, S(0),
+                                          Id("x"), Equals, Num(1.0));
+        t!(Header, "=3.14"             => Equals, Num(3.14));
+        t!(Header, "12.3e5"            => Num(12.3e5));
+        t!(Header, "120%"              => Num(1.2));
+        t!(Header, "12e4%"             => Num(1200.0));
+        t!(Header, "__main__"          => Id("__main__"));
+        t!(Header, ">main"             => Invalid(">main"));
+        t!(Header, ".func.box"         => Id(".func.box"));
+        t!(Header, "arg, _b, _1"       => Id("arg"), Comma, S(0), Id("_b"), Comma, S(0), Id("_1"));
+        t!(Header, "f: arg >> g"       => Id("f"), Colon, S(0), Id("arg"), S(0), Chain, S(0), Id("g"));
+        t!(Header, "12_pt, 12pt"       => Invalid("12_pt"), Comma, S(0), Len(Length::pt(12.0)));
+        t!(Header, "1e5in"             => Len(Length::inches(100000.0)));
+        t!(Header, "2.3cm"             => Len(Length::cm(2.3)));
+        t!(Header, "12e-3in"           => Len(Length::inches(12e-3)));
+        t!(Header, "6.1cm + 4pt,a=1*2" => Len(Length::cm(6.1)), S(0), Plus, S(0), Len(Length::pt(4.0)),
+                                          Comma, Id("a"), Equals, Num(1.0), Star, Num(2.0));
+        t!(Header, "(5 - 1) / 2.1"     => LP, Num(5.0), S(0), Min, S(0), Num(1.0), RP,
+                                          S(0), Slash, S(0), Num(2.1));
+        t!(Header, "-1"                => Min, Num(1.0));
+        t!(Header, "--1"               => Min, Min, Num(1.0));
+        t!(Header, "- 1"               => Min, S(0), Num(1.0));
+        t!(Header, "02.4mm"            => Len(Length::mm(2.4)));
+        t!(Header, "2.4.cm"            => Invalid("2.4.cm"));
+        t!(Header, "(1,2)"             => LP, Num(1.0), Comma, Num(2.0), RP);
+        t!(Header, "{abc}"             => LB, Id("abc"), RB);
+        t!(Header, "🌓, 🌍,"          => Invalid("🌓"), Comma, S(0), Invalid("🌍"), Comma);
+    }
+
+    #[test]
+    fn tokenize_strings() {
+        t!(Body, "a \"hi\" string"           => T("a"), S(0), T("\"hi\""), S(0), T("string"));
+        t!(Header, "\"hello"                 => Str("hello", false));
+        t!(Header, "\"hello world\""         => Str("hello world", true));
+        t!(Header, "\"hello\nworld\""        => Str("hello\nworld", true));
+        t!(Header, r#"1"hello\nworld"false"# => Num(1.0), Str("hello\\nworld", true), Bool(false));
+        t!(Header, r#""a\"bc""#              => Str(r#"a\"bc"#, true));
+        t!(Header, r#""a\\"bc""#             => Str(r#"a\\"#, true), Id("bc"), Str("", false));
+        t!(Header, r#""a\tbc"#               => Str("a\\tbc", false));
+        t!(Header, "\"🌎\""                  => Str("🌎", true));
+    }
+
+    #[test]
+    fn tokenize_escaped_symbols() {
+        t!(Body, r"\\"       => T(r"\"));
+        t!(Body, r"\["       => T("["));
+        t!(Body, r"\]"       => T("]"));
+        t!(Body, r"\*"       => T("*"));
+        t!(Body, r"\_"       => T("_"));
+        t!(Body, r"\`"       => T("`"));
+        t!(Body, r"\/"       => T("/"));
+        t!(Body, r"\u{2603}" => UE("2603", true));
+        t!(Body, r"\u{26A4"  => UE("26A4", false));
+        t!(Body, r#"\""#     => T("\""));
+    }
+
+    #[test]
+    fn tokenize_unescapable_symbols() {
+        t!(Body, r"\a"      => T("\\"), T("a"));
+        t!(Body, r"\:"      => T(r"\"), T(":"));
+        t!(Body, r"\="      => T(r"\"), T("="));
+        t!(Body, r"\u{2GA4" => UE("2", false), T("GA4"));
+        t!(Body, r"\u{ "    => UE("", false), Space(0));
+        t!(Body, r"\u"      => T(r"\u"));
+        t!(Header, r"\\\\"  => Invalid(r"\\\\"));
+        t!(Header, r"\a"    => Invalid(r"\a"));
+        t!(Header, r"\:"    => Invalid(r"\"), Colon);
+        t!(Header, r"\="    => Invalid(r"\"), Equals);
+        t!(Header, r"\,"    => Invalid(r"\"), Comma);
+    }
+
+    #[test]
+    fn tokenize_with_spans() {
+        ts!(Body, "hello"        => s(0, 5, T("hello")));
+        ts!(Body, "ab\r\nc"      => s(0, 2, T("ab")), s(2, 4, S(1)), s(4, 5, T("c")));
+        ts!(Body, "// ab\r\n\nf" => s(0, 5, LC(" ab")), s(5, 8, S(2)), s(8, 9, T("f")));
+        ts!(Body, "/*b*/_"       => s(0, 5, BC("b")), s(5, 6, Underscore));
+        ts!(Header, "a=10"       => s(0, 1, Id("a")), s(1, 2, Equals), s(2, 4, Num(10.0)));
+    }
+}
-- 
cgit v1.2.3