Create test runner which renders layouts to images 🗺

author: Laurenz <laurmaedje@gmail.com> 2019-10-11 17:53:28 +0200
committer: Laurenz <laurmaedje@gmail.com> 2019-10-11 17:53:28 +0200
commit: c0e4fd55e6fa738cfc5dcc851d0fc3ee2d0f2cd2 (patch)
tree: e5531e605d0ab9e06dc950b4cd9b7a8caa116d34
parent: 8f788f9a4f5e970bbe6147987b711470d57aca8d (diff)
12 files changed, 753 insertions, 519 deletions
diff --git a/.gitignore b/.gitignore
index 4f0a4452..83240860 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 **/*.rs.bk
 Cargo.lock
 things
+test-cache
diff --git a/src/doc.rs b/src/doc.rs
index d6a6096e..d83ae635 100644
--- a/src/doc.rs
+++ b/src/doc.rs
@@ -1,5 +1,6 @@
 //! Representation of typesetted documents.
 
+use std::io::{self, Write};
 use crate::size::{Size, Size2D};
 
 
@@ -31,3 +32,15 @@ pub enum LayoutAction {
     /// Write text starting at the current position.
     WriteText(String),
 }
+
+impl LayoutAction {
+    /// Serialize this layout action into a string representation.
+    pub fn serialize<W: Write>(&self, f: &mut W) -> io::Result<()> {
+        use LayoutAction::*;
+        match self {
+            MoveAbsolute(s) => write!(f, "m {:.4} {:.4}", s.x.to_pt(), s.y.to_pt()),
+            SetFont(i, s) => write!(f, "f {} {}", i, s),
+            WriteText(s) => write!(f, "w {}", s),
+        }
+    }
+}
diff --git a/src/layout/boxed.rs b/src/layout/boxed.rs
index afcd5278..5bd909d4 100644
--- a/src/layout/boxed.rs
+++ b/src/layout/boxed.rs
@@ -1,5 +1,6 @@
 //! Block-style layouting of boxes.
 
+use std::io::{self, Write};
 use crate::doc::{Document, Page, LayoutAction};
 use crate::size::{Size, Size2D};
 use super::{ActionList, LayoutSpace, Alignment, LayoutResult, LayoutError};
@@ -25,6 +26,16 @@ impl BoxLayout {
             }],
         }
     }
+
+    /// Serialize this layout into a string representation.
+    pub fn serialize<W: Write>(&self, f: &mut W) -> io::Result<()> {
+        writeln!(f, "{:.4} {:.4}", self.dimensions.x.to_pt(), self.dimensions.y.to_pt())?;
+        for action in &self.actions {
+            action.serialize(f)?;
+            writeln!(f)?;
+        }
+        Ok(())
+    }
 }
 
 /// The context for layouting boxes.
diff --git a/src/layout/flex.rs b/src/layout/flex.rs
index 8b692691..8c099553 100644
--- a/src/layout/flex.rs
+++ b/src/layout/flex.rs
@@ -157,6 +157,9 @@ impl FlexFinisher {
 
     /// Layout the glue.
     fn glue(&mut self, glue: BoxLayout) {
+        if let Some(glue) = self.glue.take() {
+            self.append(glue);
+        }
         self.glue = Some(glue);
     }
 
diff --git a/src/lib.rs b/src/lib.rs
index cb4be8b4..26543b1d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -136,56 +136,3 @@ error_type! {
     from: (ParseError, TypesetError::Parse(err)),
     from: (LayoutError, TypesetError::Layout(err)),
 }
-
-
-#[cfg(test)]
-mod test {
-    use std::fs::File;
-    use std::io::BufWriter;
-    use crate::Typesetter;
-    use crate::export::pdf::PdfExporter;
-    use toddle::query::FileSystemFontProvider;
-
-    /// Create a _PDF_ with a name from the source code.
-    fn test(name: &str, src: &str) {
-        let mut typesetter = Typesetter::new();
-        let provider = FileSystemFontProvider::from_listing("fonts/fonts.toml").unwrap();
-        typesetter.add_font_provider(provider);
-
-        // Typeset into document.
-        let document = typesetter.typeset(src).unwrap();
-
-        // Write to file.
-        let path = format!("../target/typeset-unit-{}.pdf", name);
-        let file = BufWriter::new(File::create(path).unwrap());
-        let exporter = PdfExporter::new();
-        exporter.export(&document, typesetter.loader(), file).unwrap();
-    }
-
-    #[test]
-    fn features() {
-        test("features", r"
-            *Features Test Page*
-
-            _Multiline:_
-            Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy
-            eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam
-            voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet
-            clita kasd gubergren, no sea takimata sanctus est.
-
-            _Emoji:_ Hello World! 🌍
-
-            _Styles:_ This is made *bold*, that _italic_ and this one `monospace` using the
-            built-in syntax!
-
-            _Styles with functions:_ This [bold][word] is made bold and [italic][that] is italic
-            using the standard library functions [mono][bold] and `italic`!
-        ");
-    }
-
-    #[test]
-    fn shakespeare() {
-        test("shakespeare", include_str!("../test/shakespeare.tps"));
-        test("shakespeare-right", &format!("[align:right][{}]", include_str!("../test/shakespeare.tps")));
-    }
-}
diff --git a/src/parsing.rs b/src/parsing/mod.rs
index 2ccf5f4a..344f3577 100644
--- a/src/parsing.rs
+++ b/src/parsing/mod.rs
@@ -1,346 +1,15 @@
-//! Tokenization and parsing of source code into syntax trees.
+//! Parsing of source code into token streams an syntax trees.
 
 use std::collections::HashMap;
-use std::str::CharIndices;
-
-use smallvec::SmallVec;
 use unicode_xid::UnicodeXID;
 
 use crate::func::{Function, Scope};
 use crate::syntax::*;
 use crate::size::Size;
 
+mod tokens;
+pub use tokens::{tokenize, Tokens};
 
-/// Builds an iterator over the tokens of the source code.
-#[inline]
-pub fn tokenize(src: &str) -> Tokens {
-    Tokens::new(src)
-}
-
-/// An iterator over the tokens of source code.
-#[derive(Debug, Clone)]
-pub struct Tokens<'s> {
-    src: &'s str,
-    chars: PeekableChars<'s>,
-    state: TokensState,
-    stack: SmallVec<[TokensState; 1]>,
-}
-
-/// The state the tokenizer is in.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-enum TokensState {
-    /// The base state if there is nothing special we are in.
-    Body,
-    /// Inside a function header. Here colons and equal signs get parsed
-    /// as distinct tokens rather than text.
-    Function,
-    /// We expect either the end of the function or the beginning of the body.
-    MaybeBody,
-}
-
-impl<'s> Tokens<'s> {
-    /// Create a new token stream from source code.
-    fn new(src: &'s str) -> Tokens<'s> {
-        Tokens {
-            src,
-            chars: PeekableChars::new(src),
-            state: TokensState::Body,
-            stack: SmallVec::new(),
-        }
-    }
-
-    /// Advance the iterator by one step.
-    fn advance(&mut self) {
-        self.chars.next();
-    }
-
-    /// Switch to the given state.
-    fn switch(&mut self, state: TokensState) {
-        self.stack.push(self.state);
-        self.state = state;
-    }
-
-    /// Go back to the top-of-stack state.
-    fn unswitch(&mut self) {
-         self.state = self.stack.pop().unwrap_or(TokensState::Body);
-    }
-
-    /// Advance and return the given token.
-    fn consumed(&mut self, token: Token<'s>) -> Token<'s> {
-        self.advance();
-        token
-    }
-
-    /// Returns a word containing the string bounded by the given indices.
-    fn text(&self, start: usize, end: usize) -> Token<'s> {
-        Token::Text(&self.src[start .. end])
-    }
-}
-
-impl<'s> Iterator for Tokens<'s> {
-    type Item = Token<'s>;
-
-    /// Advance the iterator, return the next token or nothing.
-    fn next(&mut self) -> Option<Token<'s>> {
-        use TokensState as TU;
-
-        // Go to the body state if the function has a body or return to the top-of-stack state.
-        if self.state == TU::MaybeBody {
-            if self.chars.peek()?.1 == '[' {
-                self.state = TU::Body;
-                return Some(self.consumed(Token::LeftBracket));
-            } else {
-                self.unswitch();
-            }
-        }
-
-        // Take the next char and peek at the one behind.
-        let (next_pos, next) = self.chars.next()?;
-        let afterwards = self.chars.peek().map(|p| p.1);
-
-        Some(match next {
-            // Functions
-            '[' => {
-                self.switch(TU::Function);
-                Token::LeftBracket
-            },
-            ']' => {
-                if self.state == TU::Function {
-                    self.state = TU::MaybeBody;
-                } else {
-                    self.unswitch();
-                }
-                Token::RightBracket
-            },
-
-            // Line comment
-            '/' if afterwards == Some('/') => {
-                let mut end = self.chars.next().unwrap();
-                let start = end.0 + end.1.len_utf8();
-
-                while let Some((index, c)) = self.chars.peek() {
-                    if is_newline_char(c) {
-                        break;
-                    }
-                    self.advance();
-                    end = (index, c);
-                }
-
-                let end = end.0 + end.1.len_utf8();
-                Token::LineComment(&self.src[start .. end])
-            },
-
-            // Block comment
-            '/' if afterwards == Some('*') => {
-                let mut end = self.chars.next().unwrap();
-                let start = end.0 + end.1.len_utf8();
-
-                let mut nested = 0;
-                while let Some((index, c)) = self.chars.next() {
-                    let after = self.chars.peek().map(|p| p.1);
-                    match (c, after) {
-                        ('*', Some('/')) if nested == 0 => { self.advance(); break },
-                        ('/', Some('*')) => { self.advance(); nested += 1 },
-                        ('*', Some('/')) => { self.advance(); nested -= 1 },
-                        _ => {},
-                    }
-                    end = (index, c);
-                }
-
-                let end = end.0 + end.1.len_utf8();
-                Token::BlockComment(&self.src[start .. end])
-            },
-
-            // Unexpected end of block comment
-            '*' if afterwards == Some('/') => self.consumed(Token::StarSlash),
-
-            // Whitespace
-            ' ' | '\t' => {
-                while let Some((_, c)) = self.chars.peek() {
-                    match c {
-                        ' ' | '\t' => self.advance(),
-                        _ => break,
-                    }
-                }
-                Token::Space
-            }
-
-            // Newlines
-            '\r' if afterwards == Some('\n') => self.consumed(Token::Newline),
-            c if is_newline_char(c) => Token::Newline,
-
-            // Star/Underscore/Backtick in bodies
-            '*' if self.state == TU::Body => Token::Star,
-            '_' if self.state == TU::Body => Token::Underscore,
-            '`' if self.state == TU::Body => Token::Backtick,
-
-            // Context sensitive operators in headers
-            ':' if self.state == TU::Function => Token::Colon,
-            '=' if self.state == TU::Function => Token::Equals,
-            ',' if self.state == TU::Function => Token::Comma,
-
-            // A string value.
-            '"' if self.state == TU::Function => {
-                // Find out when the word ends.
-                let mut escaped = false;
-                let mut end = (next_pos, next);
-
-                while let Some((index, c)) = self.chars.next() {
-                    if c == '"' && !escaped {
-                        break;
-                    }
-
-                    escaped = c == '\\';
-                    end = (index, c);
-                }
-
-                let end_pos = end.0 + end.1.len_utf8();
-                Token::Quoted(&self.src[next_pos + 1 .. end_pos])
-            }
-
-            // Escaping
-            '\\' => {
-                if let Some((index, c)) = self.chars.peek() {
-                    let escapable = match c {
-                        '[' | ']' | '\\' | '*' | '_' | '`' | ':' | '=' | '/' => true,
-                        _ => false,
-                    };
-
-                    if escapable {
-                        self.advance();
-                        return Some(self.text(index, index + c.len_utf8()));
-                    }
-                }
-
-                Token::Text("\\")
-            },
-
-            // Normal text
-            _ => {
-                // Find out when the word ends.
-                let mut end = (next_pos, next);
-                while let Some((index, c)) = self.chars.peek() {
-                    let second = self.chars.peek_second().map(|p| p.1);
-
-                    // Whether the next token is still from the text or not.
-                    let continues = match c {
-                        '[' | ']' | '\\' => false,
-                        '*' | '_' | '`' if self.state == TU::Body => false,
-                        ':' | '=' | ',' | '"' if self.state == TU::Function => false,
-
-                        '/' => second != Some('/') && second != Some('*'),
-                        '*' => second != Some('/'),
-
-                        ' ' | '\t' => false,
-                        c if is_newline_char(c) => false,
-
-                        _ => true,
-                    };
-
-                    if !continues {
-                        break;
-                    }
-
-                    end = (index, c);
-                    self.advance();
-                }
-
-                let end_pos = end.0 + end.1.len_utf8();
-                self.text(next_pos, end_pos)
-            },
-        })
-    }
-}
-
-/// Whether this character is a newline (or starts one).
-fn is_newline_char(character: char) -> bool {
-    match character {
-        '\n' | '\r' | '\u{000c}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
-        _ => false,
-    }
-}
-
-/// A (index, char) iterator with double lookahead.
-#[derive(Debug, Clone)]
-struct PeekableChars<'s> {
-    offset: usize,
-    string: &'s str,
-    chars: CharIndices<'s>,
-    peek1: Option<Option<(usize, char)>>,
-    peek2: Option<Option<(usize, char)>>,
-}
-
-impl<'s> PeekableChars<'s> {
-    /// Create a new iterator from a string.
-    fn new(string: &'s str) -> PeekableChars<'s> {
-        PeekableChars {
-            offset: 0,
-            string,
-            chars: string.char_indices(),
-            peek1: None,
-            peek2: None,
-        }
-    }
-
-    /// Peek at the next element.
-    fn peek(&mut self) -> Option<(usize, char)> {
-        match self.peek1 {
-            Some(peeked) => peeked,
-            None => {
-                let next = self.next_inner();
-                self.peek1 = Some(next);
-                next
-            }
-        }
-    }
-
-    /// Peek at the element after the next element.
-    fn peek_second(&mut self) -> Option<(usize, char)> {
-        match self.peek2 {
-            Some(peeked) => peeked,
-            None => {
-                self.peek();
-                let next = self.next_inner();
-                self.peek2 = Some(next);
-                next
-            }
-        }
-    }
-
-    /// Return the next value of the inner iterator mapped with the offset.
-    fn next_inner(&mut self) -> Option<(usize, char)> {
-        self.chars.next().map(|(i, c)| (i + self.offset, c))
-    }
-
-    /// The index of the first character of the next token in the source string.
-    fn current_index(&mut self) -> Option<usize> {
-        self.peek().map(|p| p.0)
-    }
-
-    /// Go to a new position in the underlying string.
-    fn goto(&mut self, index: usize) {
-        self.offset = index;
-        self.chars = self.string[index..].char_indices();
-        self.peek1 = None;
-        self.peek2 = None;
-    }
-}
-
-impl Iterator for PeekableChars<'_> {
-    type Item = (usize, char);
-
-    fn next(&mut self) -> Option<(usize, char)> {
-        match self.peek1.take() {
-            Some(value) => {
-                self.peek1 = self.peek2.take();
-                value
-            },
-            None => self.next_inner(),
-        }
-    }
-}
-
-//------------------------------------------------------------------------------------------------//
 
 /// Parses source code into a syntax tree given a context.
 #[inline]
@@ -740,7 +409,6 @@ fn is_identifier(string: &str) -> bool {
     true
 }
 
-//------------------------------------------------------------------------------------------------//
 
 /// The error type for parsing.
 pub struct ParseError(String);
@@ -762,137 +430,7 @@ error_type! {
 
 
 #[cfg(test)]
-mod token_tests {
-    use super::*;
-    use Token::{Space as S, Newline as N, LeftBracket as L, RightBracket as R,
-                Colon as C, Equals as E, Quoted as Q, Underscore as TU, Star as TS,
-                Backtick as TB, Text as T, LineComment as LC, BlockComment as BC,
-                StarSlash as SS};
-
-    /// Test if the source code tokenizes to the tokens.
-    fn test(src: &str, tokens: Vec<Token>) {
-        assert_eq!(Tokens::new(src).collect::<Vec<_>>(), tokens);
-    }
-
-    /// Tokenizes the basic building blocks.
-    #[test]
-    fn tokenize_base() {
-        test("", vec![]);
-        test("Hallo", vec![T("Hallo")]);
-        test("[", vec![L]);
-        test("]", vec![R]);
-        test("*", vec![TS]);
-        test("_", vec![TU]);
-        test("`", vec![TB]);
-        test("\n", vec![N]);
-    }
-
-    /// This test looks if LF- and CRLF-style newlines get both identified correctly.
-    #[test]
-    fn tokenize_whitespace_newlines() {
-        test(" \t", vec![S]);
-        test("First line\r\nSecond line\nThird line\n",
-             vec![T("First"), S, T("line"), N, T("Second"), S, T("line"), N,
-                  T("Third"), S, T("line"), N]);
-        test("Hello \n ", vec![T("Hello"), S, N, S]);
-        test("Dense\nTimes", vec![T("Dense"), N, T("Times")]);
-    }
-
-    /// Tests if escaping with backslash works as it should.
-    #[test]
-    fn tokenize_escape() {
-        test(r"\[", vec![T("[")]);
-        test(r"\]", vec![T("]")]);
-        test(r"\**", vec![T("*"), TS]);
-        test(r"\*", vec![T("*")]);
-        test(r"\__", vec![T("_"), TU]);
-        test(r"\_", vec![T("_")]);
-        test(r"\hello", vec![T("\\"), T("hello")]);
-    }
-
-    /// Tests if escaped strings work.
-    #[test]
-    fn tokenize_quoted() {
-        test(r#"[align: "hello\"world"]"#, vec![L, T("align"), C, S, Q(r#"hello\"world"#), R]);
-    }
-
-    /// Tokenizes some more realistic examples.
-    #[test]
-    fn tokenize_examples() {
-        test(r"
-            [function][
-                Test [italic][example]!
-            ]
-        ", vec![
-            N, S, L, T("function"), R, L, N, S, T("Test"), S, L, T("italic"), R, L,
-            T("example"), R, T("!"), N, S, R, N, S
-        ]);
-
-        test(r"
-            [page: size=A4]
-            [font: size=12pt]
-
-            Das ist ein Beispielsatz mit *fetter* Schrift.
-        ", vec![
-            N, S, L, T("page"), C, S, T("size"), E, T("A4"), R, N, S,
-            L, T("font"), C, S, T("size"), E, T("12pt"), R, N, N, S,
-            T("Das"), S, T("ist"), S, T("ein"), S, T("Beispielsatz"), S, T("mit"), S,
-            TS, T("fetter"), TS, S, T("Schrift."), N, S
-        ]);
-    }
-
-    /// This test checks whether the colon and equals symbols get parsed correctly depending on the
-    /// context: Either in a function header or in a body.
-    #[test]
-    fn tokenize_symbols_context() {
-        test("[func: key=value][Answer: 7]",
-             vec![L, T("func"), C, S, T("key"), E, T("value"), R, L,
-                  T("Answer:"), S, T("7"), R]);
-        test("[[n: k=v]:x][:[=]]:=",
-             vec![L, L, T("n"), C, S, T("k"), E, T("v"), R, C, T("x"), R,
-                  L, T(":"), L, E, R, R, T(":=")]);
-        test("[hi: k=[func][body] v=1][hello]",
-            vec![L, T("hi"), C, S, T("k"), E, L, T("func"), R, L, T("body"), R, S,
-                 T("v"), E, T("1"), R, L, T("hello"), R]);
-        test("[func: __key__=value]",
-             vec![L, T("func"), C, S, T("__key__"), E, T("value"), R]);
-        test("The /*[*/ answer: 7.",
-            vec![T("The"), S, BC("["), S, T("answer:"), S, T("7.")]);
-    }
-
-    /// Test if block and line comments get tokenized as expected.
-    #[test]
-    fn tokenize_comments() {
-        test("These // Line comments.",
-            vec![T("These"), S, LC(" Line comments.")]);
-        test("This /* is */ a comment.",
-            vec![T("This"), S, BC(" is "), S, T("a"), S, T("comment.")]);
-        test("[Head/*of*/][Body]", vec![L, T("Head"), BC("of"), R, L, T("Body"), R]);
-        test("/* Hey */ */", vec![BC(" Hey "), S, SS]);
-        test("Hey\n// Yoo /*\n*/", vec![T("Hey"), N, LC(" Yoo /*"), N, SS]);
-        test("/* My /* line // */ comment */", vec![BC(" My /* line // */ comment ")])
-    }
-
-    /// This test has a special look at the underscore syntax.
-    #[test]
-    fn tokenize_underscores() {
-        test("he_llo_world_ __ Now this_ is_ special!",
-             vec![T("he"), TU, T("llo"), TU, T("world"), TU, S, TU, TU, S, T("Now"), S,
-                  T("this"), TU, S, T("is"), TU, S, T("special!")]);
-    }
-
-    /// This test is for checking if non-ASCII characters get parsed correctly.
-    #[test]
-    fn tokenize_unicode() {
-        test("[document][Hello 🌍!]",
-             vec![L, T("document"), R, L, T("Hello"), S, T("🌍!"), R]);
-        test("[f]⺐.", vec![L, T("f"), R, T("⺐.")]);
-    }
-}
-
-
-#[cfg(test)]
-mod parse_tests {
+mod tests {
     use super::*;
     use crate::func::{Function, Scope};
     use crate::layout::{LayoutContext, LayoutResult, Layout};
diff --git a/src/parsing/tokens.rs b/src/parsing/tokens.rs
new file mode 100644
index 00000000..74b9c11c
--- /dev/null
+++ b/src/parsing/tokens.rs
@@ -0,0 +1,465 @@
+//! Tokenization of text.
+
+use std::str::CharIndices;
+use smallvec::SmallVec;
+use crate::syntax::*;
+
+
+/// Builds an iterator over the tokens of the source code.
+#[inline]
+pub fn tokenize(src: &str) -> Tokens {
+    Tokens::new(src)
+}
+
+/// An iterator over the tokens of source code.
+#[derive(Debug, Clone)]
+pub struct Tokens<'s> {
+    src: &'s str,
+    pub(in super) chars: PeekableChars<'s>,
+    state: TokensState,
+    stack: SmallVec<[TokensState; 1]>,
+}
+
+/// The state the tokenizer is in.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+enum TokensState {
+    /// The base state if there is nothing special we are in.
+    Body,
+    /// Inside a function header. Here colons and equal signs get parsed
+    /// as distinct tokens rather than text.
+    Function,
+    /// We expect either the end of the function or the beginning of the body.
+    MaybeBody,
+}
+
+impl<'s> Tokens<'s> {
+    /// Create a new token stream from source code.
+    fn new(src: &'s str) -> Tokens<'s> {
+        Tokens {
+            src,
+            chars: PeekableChars::new(src),
+            state: TokensState::Body,
+            stack: SmallVec::new(),
+        }
+    }
+
+    /// Advance the iterator by one step.
+    fn advance(&mut self) {
+        self.chars.next();
+    }
+
+    /// Switch to the given state.
+    fn switch(&mut self, state: TokensState) {
+        self.stack.push(self.state);
+        self.state = state;
+    }
+
+    /// Go back to the top-of-stack state.
+    fn unswitch(&mut self) {
+         self.state = self.stack.pop().unwrap_or(TokensState::Body);
+    }
+
+    /// Advance and return the given token.
+    fn consumed(&mut self, token: Token<'s>) -> Token<'s> {
+        self.advance();
+        token
+    }
+
+    /// Returns a word containing the string bounded by the given indices.
+    fn text(&self, start: usize, end: usize) -> Token<'s> {
+        Token::Text(&self.src[start .. end])
+    }
+}
+
+impl<'s> Iterator for Tokens<'s> {
+    type Item = Token<'s>;
+
+    /// Advance the iterator, return the next token or nothing.
+    fn next(&mut self) -> Option<Token<'s>> {
+        use TokensState as TU;
+
+        // Go to the body state if the function has a body or return to the top-of-stack state.
+        if self.state == TU::MaybeBody {
+            if self.chars.peek()?.1 == '[' {
+                self.state = TU::Body;
+                return Some(self.consumed(Token::LeftBracket));
+            } else {
+                self.unswitch();
+            }
+        }
+
+        // Take the next char and peek at the one behind.
+        let (next_pos, next) = self.chars.next()?;
+        let afterwards = self.chars.peek().map(|p| p.1);
+
+        Some(match next {
+            // Functions
+            '[' => {
+                self.switch(TU::Function);
+                Token::LeftBracket
+            },
+            ']' => {
+                if self.state == TU::Function {
+                    self.state = TU::MaybeBody;
+                } else {
+                    self.unswitch();
+                }
+                Token::RightBracket
+            },
+
+            // Line comment
+            '/' if afterwards == Some('/') => {
+                let mut end = self.chars.next().unwrap();
+                let start = end.0 + end.1.len_utf8();
+
+                while let Some((index, c)) = self.chars.peek() {
+                    if is_newline_char(c) {
+                        break;
+                    }
+                    self.advance();
+                    end = (index, c);
+                }
+
+                let end = end.0 + end.1.len_utf8();
+                Token::LineComment(&self.src[start .. end])
+            },
+
+            // Block comment
+            '/' if afterwards == Some('*') => {
+                let mut end = self.chars.next().unwrap();
+                let start = end.0 + end.1.len_utf8();
+
+                let mut nested = 0;
+                while let Some((index, c)) = self.chars.next() {
+                    let after = self.chars.peek().map(|p| p.1);
+                    match (c, after) {
+                        ('*', Some('/')) if nested == 0 => { self.advance(); break },
+                        ('/', Some('*')) => { self.advance(); nested += 1 },
+                        ('*', Some('/')) => { self.advance(); nested -= 1 },
+                        _ => {},
+                    }
+                    end = (index, c);
+                }
+
+                let end = end.0 + end.1.len_utf8();
+                Token::BlockComment(&self.src[start .. end])
+            },
+
+            // Unexpected end of block comment
+            '*' if afterwards == Some('/') => self.consumed(Token::StarSlash),
+
+            // Whitespace
+            ' ' | '\t' => {
+                while let Some((_, c)) = self.chars.peek() {
+                    match c {
+                        ' ' | '\t' => self.advance(),
+                        _ => break,
+                    }
+                }
+                Token::Space
+            }
+
+            // Newlines
+            '\r' if afterwards == Some('\n') => self.consumed(Token::Newline),
+            c if is_newline_char(c) => Token::Newline,
+
+            // Star/Underscore/Backtick in bodies
+            '*' if self.state == TU::Body => Token::Star,
+            '_' if self.state == TU::Body => Token::Underscore,
+            '`' if self.state == TU::Body => Token::Backtick,
+
+            // Context sensitive operators in headers
+            ':' if self.state == TU::Function => Token::Colon,
+            '=' if self.state == TU::Function => Token::Equals,
+            ',' if self.state == TU::Function => Token::Comma,
+
+            // A string value.
+            '"' if self.state == TU::Function => {
+                // Find out when the word ends.
+                let mut escaped = false;
+                let mut end = (next_pos, next);
+
+                while let Some((index, c)) = self.chars.next() {
+                    if c == '"' && !escaped {
+                        break;
+                    }
+
+                    escaped = c == '\\';
+                    end = (index, c);
+                }
+
+                let end_pos = end.0 + end.1.len_utf8();
+                Token::Quoted(&self.src[next_pos + 1 .. end_pos])
+            }
+
+            // Escaping
+            '\\' => {
+                if let Some((index, c)) = self.chars.peek() {
+                    let escapable = match c {
+                        '[' | ']' | '\\' | '*' | '_' | '`' | ':' | '=' | '/' => true,
+                        _ => false,
+                    };
+
+                    if escapable {
+                        self.advance();
+                        return Some(self.text(index, index + c.len_utf8()));
+                    }
+                }
+
+                Token::Text("\\")
+            },
+
+            // Normal text
+            _ => {
+                // Find out when the word ends.
+                let mut end = (next_pos, next);
+                while let Some((index, c)) = self.chars.peek() {
+                    let second = self.chars.peek_second().map(|p| p.1);
+
+                    // Whether the next token is still from the text or not.
+                    let continues = match c {
+                        '[' | ']' | '\\' => false,
+                        '*' | '_' | '`' if self.state == TU::Body => false,
+                        ':' | '=' | ',' | '"' if self.state == TU::Function => false,
+
+                        '/' => second != Some('/') && second != Some('*'),
+                        '*' => second != Some('/'),
+
+                        ' ' | '\t' => false,
+                        c if is_newline_char(c) => false,
+
+                        _ => true,
+                    };
+
+                    if !continues {
+                        break;
+                    }
+
+                    end = (index, c);
+                    self.advance();
+                }
+
+                let end_pos = end.0 + end.1.len_utf8();
+                self.text(next_pos, end_pos)
+            },
+        })
+    }
+}
+
+/// Whether this character is a newline (or starts one).
+fn is_newline_char(character: char) -> bool {
+    match character {
+        '\n' | '\r' | '\u{000c}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
+        _ => false,
+    }
+}
+
+/// A (index, char) iterator with double lookahead.
+#[derive(Debug, Clone)]
+pub struct PeekableChars<'s> {
+    offset: usize,
+    string: &'s str,
+    chars: CharIndices<'s>,
+    peek1: Option<Option<(usize, char)>>,
+    peek2: Option<Option<(usize, char)>>,
+}
+
+impl<'s> PeekableChars<'s> {
+    /// Create a new iterator from a string.
+    pub fn new(string: &'s str) -> PeekableChars<'s> {
+        PeekableChars {
+            offset: 0,
+            string,
+            chars: string.char_indices(),
+            peek1: None,
+            peek2: None,
+        }
+    }
+
+    /// Peek at the next element.
+    pub fn peek(&mut self) -> Option<(usize, char)> {
+        match self.peek1 {
+            Some(peeked) => peeked,
+            None => {
+                let next = self.next_inner();
+                self.peek1 = Some(next);
+                next
+            }
+        }
+    }
+
+    /// Peek at the element after the next element.
+    pub fn peek_second(&mut self) -> Option<(usize, char)> {
+        match self.peek2 {
+            Some(peeked) => peeked,
+            None => {
+                self.peek();
+                let next = self.next_inner();
+                self.peek2 = Some(next);
+                next
+            }
+        }
+    }
+
+    /// Return the next value of the inner iterator mapped with the offset.
+    pub fn next_inner(&mut self) -> Option<(usize, char)> {
+        self.chars.next().map(|(i, c)| (i + self.offset, c))
+    }
+
+    /// The index of the first character of the next token in the source string.
+    pub fn current_index(&mut self) -> Option<usize> {
+        self.peek().map(|p| p.0)
+    }
+
+    /// Go to a new position in the underlying string.
+    pub fn goto(&mut self, index: usize) {
+        self.offset = index;
+        self.chars = self.string[index..].char_indices();
+        self.peek1 = None;
+        self.peek2 = None;
+    }
+}
+
+impl Iterator for PeekableChars<'_> {
+    type Item = (usize, char);
+
+    fn next(&mut self) -> Option<(usize, char)> {
+        match self.peek1.take() {
+            Some(value) => {
+                self.peek1 = self.peek2.take();
+                value
+            },
+            None => self.next_inner(),
+        }
+    }
+}
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use Token::{Space as S, Newline as N, LeftBracket as L, RightBracket as R,
+                Colon as C, Equals as E, Quoted as Q, Underscore as TU, Star as TS,
+                Backtick as TB, Text as T, LineComment as LC, BlockComment as BC,
+                StarSlash as SS};
+
+    /// Test if the source code tokenizes to the tokens.
+    fn test(src: &str, tokens: Vec<Token>) {
+        assert_eq!(Tokens::new(src).collect::<Vec<_>>(), tokens);
+    }
+
+    /// Tokenizes the basic building blocks.
+    #[test]
+    fn tokenize_base() {
+        test("", vec![]);
+        test("Hallo", vec![T("Hallo")]);
+        test("[", vec![L]);
+        test("]", vec![R]);
+        test("*", vec![TS]);
+        test("_", vec![TU]);
+        test("`", vec![TB]);
+        test("\n", vec![N]);
+    }
+
+    /// This test looks if LF- and CRLF-style newlines get both identified correctly.
+    #[test]
+    fn tokenize_whitespace_newlines() {
+        test(" \t", vec![S]);
+        test("First line\r\nSecond line\nThird line\n",
+             vec![T("First"), S, T("line"), N, T("Second"), S, T("line"), N,
+                  T("Third"), S, T("line"), N]);
+        test("Hello \n ", vec![T("Hello"), S, N, S]);
+        test("Dense\nTimes", vec![T("Dense"), N, T("Times")]);
+    }
+
+    /// Tests if escaping with backslash works as it should.
+    #[test]
+    fn tokenize_escape() {
+        test(r"\[", vec![T("[")]);
+        test(r"\]", vec![T("]")]);
+        test(r"\**", vec![T("*"), TS]);
+        test(r"\*", vec![T("*")]);
+        test(r"\__", vec![T("_"), TU]);
+        test(r"\_", vec![T("_")]);
+        test(r"\hello", vec![T("\\"), T("hello")]);
+    }
+
+    /// Tests if escaped strings work.
+    #[test]
+    fn tokenize_quoted() {
+        test(r#"[align: "hello\"world"]"#, vec![L, T("align"), C, S, Q(r#"hello\"world"#), R]);
+    }
+
+    /// Tokenizes some more realistic examples.
+    #[test]
+    fn tokenize_examples() {
+        test(r"
+            [function][
+                Test [italic][example]!
+            ]
+        ", vec![
+            N, S, L, T("function"), R, L, N, S, T("Test"), S, L, T("italic"), R, L,
+            T("example"), R, T("!"), N, S, R, N, S
+        ]);
+
+        test(r"
+            [page: size=A4]
+            [font: size=12pt]
+
+            Das ist ein Beispielsatz mit *fetter* Schrift.
+        ", vec![
+            N, S, L, T("page"), C, S, T("size"), E, T("A4"), R, N, S,
+            L, T("font"), C, S, T("size"), E, T("12pt"), R, N, N, S,
+            T("Das"), S, T("ist"), S, T("ein"), S, T("Beispielsatz"), S, T("mit"), S,
+            TS, T("fetter"), TS, S, T("Schrift."), N, S
+        ]);
+    }
+
+    /// This test checks whether the colon and equals symbols get parsed correctly depending on the
+    /// context: Either in a function header or in a body.
+    #[test]
+    fn tokenize_symbols_context() {
+        test("[func: key=value][Answer: 7]",
+             vec![L, T("func"), C, S, T("key"), E, T("value"), R, L,
+                  T("Answer:"), S, T("7"), R]);
+        test("[[n: k=v]:x][:[=]]:=",
+             vec![L, L, T("n"), C, S, T("k"), E, T("v"), R, C, T("x"), R,
+                  L, T(":"), L, E, R, R, T(":=")]);
+        test("[hi: k=[func][body] v=1][hello]",
+            vec![L, T("hi"), C, S, T("k"), E, L, T("func"), R, L, T("body"), R, S,
+                 T("v"), E, T("1"), R, L, T("hello"), R]);
+        test("[func: __key__=value]",
+             vec![L, T("func"), C, S, T("__key__"), E, T("value"), R]);
+        test("The /*[*/ answer: 7.",
+            vec![T("The"), S, BC("["), S, T("answer:"), S, T("7.")]);
+    }
+
+    /// Test if block and line comments get tokenized as expected.
+    #[test]
+    fn tokenize_comments() {
+        test("These // Line comments.",
+            vec![T("These"), S, LC(" Line comments.")]);
+        test("This /* is */ a comment.",
+            vec![T("This"), S, BC(" is "), S, T("a"), S, T("comment.")]);
+        test("[Head/*of*/][Body]", vec![L, T("Head"), BC("of"), R, L, T("Body"), R]);
+        test("/* Hey */ */", vec![BC(" Hey "), S, SS]);
+        test("Hey\n// Yoo /*\n*/", vec![T("Hey"), N, LC(" Yoo /*"), N, SS]);
+        test("/* My /* line // */ comment */", vec![BC(" My /* line // */ comment ")])
+    }
+
+    /// This test has a special look at the underscore syntax.
+    #[test]
+    fn tokenize_underscores() {
+        test("he_llo_world_ __ Now this_ is_ special!",
+             vec![T("he"), TU, T("llo"), TU, T("world"), TU, S, TU, TU, S, T("Now"), S,
+                  T("this"), TU, S, T("is"), TU, S, T("special!")]);
+    }
+
+    /// This test is for checking if non-ASCII characters get parsed correctly.
+    #[test]
+    fn tokenize_unicode() {
+        test("[document][Hello 🌍!]",
+             vec![L, T("document"), R, L, T("Hello"), S, T("🌍!"), R]);
+        test("[f]⺐.", vec![L, T("f"), R, T("⺐.")]);
+    }
+}
diff --git a/tests/layouting.rs b/tests/layouting.rs
new file mode 100644
index 00000000..b748748f
--- /dev/null
+++ b/tests/layouting.rs
@@ -0,0 +1,82 @@
+use std::fs::{self, File};
+use std::io::{Write, Read, BufWriter};
+use std::process::Command;
+
+use typst::Typesetter;
+use typst::toddle::query::FileSystemFontProvider;
+use typst::export::pdf::PdfExporter;
+use typst::doc::LayoutAction;
+
+const CACHE_DIR: &str = "test-cache";
+
+
+#[test]
+fn layouting() {
+    fs::create_dir_all(format!("{}/serialized", CACHE_DIR)).unwrap();
+    fs::create_dir_all(format!("{}/rendered", CACHE_DIR)).unwrap();
+    fs::create_dir_all(format!("{}/pdf", CACHE_DIR)).unwrap();
+
+    for entry in fs::read_dir("tests/layouts/").unwrap() {
+        let path = entry.unwrap().path();
+
+        let mut file = File::open(&path).unwrap();
+        let mut src = String::new();
+        file.read_to_string(&mut src).unwrap();
+
+        let name = path
+            .file_stem().unwrap()
+            .to_str().unwrap();
+
+        test(name, &src);
+    }
+}
+
+/// Create a _PDF_ with a name from the source code.
+fn test(name: &str, src: &str) {
+    let mut typesetter = Typesetter::new();
+    let provider = FileSystemFontProvider::from_listing("fonts/fonts.toml").unwrap();
+    typesetter.add_font_provider(provider.clone());
+
+    // Layout into box layout.
+    let tree = typesetter.parse(src).unwrap();
+    let layout = typesetter.layout(&tree).unwrap();
+
+    // Write the serialed layout file.
+    let path = format!("{}/serialized/{}.box", CACHE_DIR, name);
+    let mut file = File::create(path).unwrap();
+
+    // Find all used fonts and their filenames.
+    let mut map = Vec::new();
+    let mut loader = typesetter.loader().borrow_mut();
+    for action in &layout.actions {
+        if let LayoutAction::SetFont(index, _) = action {
+            if map.iter().find(|(i, _)| i == index).is_none() {
+                let (_, provider_index) = loader.get_provider_and_index(*index);
+                let filename = provider.get_path(provider_index).to_str().unwrap();
+                map.push((*index, filename));
+            }
+        }
+    }
+    drop(loader);
+
+    // Write the font mapping into the serialization file.
+    writeln!(file, "{}", map.len()).unwrap();
+    for (index, path) in map {
+        writeln!(file, "{} {}", index, path).unwrap();
+    }
+    layout.serialize(&mut file).unwrap();
+
+    // Render the layout into a PNG.
+    Command::new("python")
+        .arg("tests/render.py")
+        .arg(name)
+        .spawn()
+        .expect("failed to run python-based renderer");
+
+    // Write the PDF file.
+    let path = format!("{}/pdf/{}.pdf", CACHE_DIR, name);
+    let file = BufWriter::new(File::create(path).unwrap());
+    let document = layout.into_doc();
+    let exporter = PdfExporter::new();
+    exporter.export(&document, typesetter.loader(), file).unwrap();
+}
diff --git a/tests/layouts/shakespeare-right.tps b/tests/layouts/shakespeare-right.tps
new file mode 100644
index 00000000..db670fdf
--- /dev/null
+++ b/tests/layouts/shakespeare-right.tps
@@ -0,0 +1,88 @@
+[align: right][
+    [bold][Scene 5: _The Tower of London_]
+
+    [italic][Enter Mortimer, brought in a chair, and Gaolers.]
+
+    *Mortimer.* Kind keepers of my weak decaying age,
+        Let dying Mortimer here rest himself.
+        Even like a man new haled from the rack,
+        So fare my limbs with long imprisonment;
+        And these grey locks, the pursuivants of death,
+        Nestor-like aged in an age of care,
+        Argue the end of Edmund Mortimer.
+        These eyes, like lamps whose wasting oil is spent,
+        Wax dim, as drawing to their exigent;
+        Weak shoulders, overborne with burdening grief,
+        And pithless arms, like to a withered vine
+        That droops his sapless branches to the ground.
+        Yet are these feet, whose strengthless stay is numb,
+        Unable to support this lump of clay,
+        Swift-winged with desire to get a grave,
+        As witting I no other comfort have.
+        But tell me, keeper, will my nephew come?
+
+    *First Keeper.* Richard Plantagenet, my lord, will come.
+        We sent unto the Temple, unto his chamber;
+        And answer was return'd that he will come.
+
+    *Mortimer.* Enough; my soul shall then be satisfied.
+        Poor gentleman! his wrong doth equal mine.
+        Since Henry Monmouth first began to reign,
+        Before whose glory I was great in arms,
+        This loathsome sequestration have I had;
+        And even since then hath Richard been obscur'd,
+        Depriv'd of honour and inheritance.
+        But now the arbitrator of despairs,
+        Just Death, kind umpire of men's miseries,
+        With sweet enlargement doth dismiss me hence.
+        I would his troubles likewise were expir'd,
+        That so he might recover what was lost.
+
+
+    [italic][Enter Richard Plantagenet]
+
+    *First Keeper.* My lord, your loving nephew now is come.
+
+    *Mortimer.* Richard Plantagenet, my friend, is he come?
+
+    *Plantagenet.* Ay, noble uncle, thus ignobly us'd,
+        Your nephew, late despised Richard, comes.
+
+    *Mortimer.* Direct mine arms I may embrace his neck
+        And in his bosom spend my latter gasp.
+        O, tell me when my lips do touch his cheeks,
+        That I may kindly give one fainting kiss.
+        And now declare, sweet stem from York's great stock,
+        Why didst thou say of late thou wert despis'd?
+
+    *Plantagenet.* First, lean thine aged back against mine arm;
+        And, in that ease, I'll tell thee my disease.
+        This day, in argument upon a case,
+        Some words there grew 'twixt Somerset and me;
+        Among which terms he us'd his lavish tongue
+        And did upbraid me with my father's death;
+        Which obloquy set bars before my tongue,
+        Else with the like I had requited him.
+        Therefore, good uncle, for my father's sake,
+        In honour of a true Plantagenet,
+        And for alliance sake, declare the cause
+        My father, Earl of Cambridge, lost his head.
+
+    *Mortimer.* That cause, fair nephew, that imprison'd me
+        And hath detain'd me all my flow'ring youth
+        Within a loathsome dungeon, there to pine,
+        Was cursed instrument of his decease.
+
+    *Plantagenet.* Discover more at large what cause that was,
+        For I am ignorant and cannot guess.
+
+    *Mortimer.* I will, if that my fading breath permit
+        And death approach not ere my tale be done.
+        Henry the Fourth, grandfather to this king,
+        Depos'd his nephew Richard, Edward's son,
+        The first-begotten and the lawful heir
+        Of Edward king, the third of that descent;
+        During whose reign the Percies of the north,
+        Finding his usurpation most unjust,
+        Endeavour'd my advancement to the throne ...
+]
diff --git a/test/shakespeare.tps b/tests/layouts/shakespeare.tps
index e0839302..e0839302 100644
--- a/test/shakespeare.tps
+++ b/tests/layouts/shakespeare.tps
diff --git a/tests/layouts/styles.tps b/tests/layouts/styles.tps
new file mode 100644
index 00000000..790d2b38
--- /dev/null
+++ b/tests/layouts/styles.tps
@@ -0,0 +1,13 @@
+_Multiline:_
+Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy
+eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam
+voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet
+clita kasd gubergren, no sea takimata sanctus est.
+
+_Emoji:_ Hello World! 🌍
+
+_Styles:_ This is made *bold*, that _italic_ and this one `monospace` using the
+built-in syntax!
+
+_Styles with functions:_ This [bold][word] is made bold and [italic][that] is italic
+using the standard library functions [mono][bold] and `italic`!
diff --git a/tests/render.py b/tests/render.py
new file mode 100644
index 00000000..02c2693f
--- /dev/null
+++ b/tests/render.py
@@ -0,0 +1,73 @@
+import sys
+import os
+import pathlib
+from PIL import Image, ImageDraw, ImageFont
+
+
+BASE = os.path.dirname(__file__)
+CACHE_DIR = os.path.join(BASE, "../test-cache/");
+
+
+def main():
+    assert len(sys.argv) == 2, "usage: python render.py <name>"
+    name = sys.argv[1]
+
+    filename = os.path.join(CACHE_DIR, f"serialized/{name}.box")
+    with open(filename, encoding="utf-8") as file:
+        lines = [line[:-1] for line in file.readlines()]
+
+    fonts = {}
+    font_count = int(lines[0])
+    for i in range(font_count):
+        parts = lines[1 + i].split(' ', 1)
+        index = int(parts[0])
+        path = parts[1]
+        fonts[index] = os.path.join(BASE, "../fonts", path)
+
+    width, height = (float(s) for s in lines[font_count + 1].split())
+
+    renderer = Renderer(fonts, width, height)
+    for command in lines[font_count + 2:]:
+        renderer.execute(command)
+
+    pathlib.Path(os.path.join(CACHE_DIR, "rendered")).mkdir(parents=True, exist_ok=True)
+    renderer.export(name)
+
+
+class Renderer:
+    def __init__(self, fonts, width, height):
+        self.fonts = fonts
+        self.img = Image.new("RGBA", (pix(width), pix(height)), (255, 255, 255))
+        self.draw = ImageDraw.Draw(self.img)
+        self.cursor = (0, 0)
+
+    def execute(self, command):
+        cmd = command[0]
+        parts = command.split()[1:]
+
+        if cmd == 'm':
+            x, y = (pix(float(s)) for s in parts)
+            self.cursor = (x, y)
+
+        elif cmd == 'f':
+            index = int(parts[0])
+            size = pix(float(parts[1]))
+            self.font = ImageFont.truetype(self.fonts[index], size)
+
+        elif cmd == 'w':
+            text = command[2:]
+            self.draw.text(self.cursor, text, (0, 0, 0), font=self.font)
+
+        else:
+            raise Exception("invalid command")
+
+    def export(self, name):
+        self.img.save(CACHE_DIR + "rendered/" + name + ".png")
+
+
+def pix(points):
+    return int(2 * points)
+
+
+if __name__ == "__main__":
+    main()
author	Laurenz <laurmaedje@gmail.com>	2019-10-11 17:53:28 +0200
committer	Laurenz <laurmaedje@gmail.com>	2019-10-11 17:53:28 +0200
commit	c0e4fd55e6fa738cfc5dcc851d0fc3ee2d0f2cd2 (patch)
tree	e5531e605d0ab9e06dc950b4cd9b7a8caa116d34
parent	8f788f9a4f5e970bbe6147987b711470d57aca8d (diff)