summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2019-10-11 17:53:28 +0200
committerLaurenz <laurmaedje@gmail.com>2019-10-11 17:53:28 +0200
commitc0e4fd55e6fa738cfc5dcc851d0fc3ee2d0f2cd2 (patch)
treee5531e605d0ab9e06dc950b4cd9b7a8caa116d34
parent8f788f9a4f5e970bbe6147987b711470d57aca8d (diff)
Create test runner which renders layouts to images πŸ—Ί
-rw-r--r--.gitignore1
-rw-r--r--src/doc.rs13
-rw-r--r--src/layout/boxed.rs11
-rw-r--r--src/layout/flex.rs3
-rw-r--r--src/lib.rs53
-rw-r--r--src/parsing/mod.rs (renamed from src/parsing.rs)470
-rw-r--r--src/parsing/tokens.rs465
-rw-r--r--tests/layouting.rs82
-rw-r--r--tests/layouts/shakespeare-right.tps88
-rw-r--r--tests/layouts/shakespeare.tps (renamed from test/shakespeare.tps)0
-rw-r--r--tests/layouts/styles.tps13
-rw-r--r--tests/render.py73
12 files changed, 753 insertions, 519 deletions
diff --git a/.gitignore b/.gitignore
index 4f0a4452..83240860 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
**/*.rs.bk
Cargo.lock
things
+test-cache
diff --git a/src/doc.rs b/src/doc.rs
index d6a6096e..d83ae635 100644
--- a/src/doc.rs
+++ b/src/doc.rs
@@ -1,5 +1,6 @@
//! Representation of typesetted documents.
+use std::io::{self, Write};
use crate::size::{Size, Size2D};
@@ -31,3 +32,15 @@ pub enum LayoutAction {
/// Write text starting at the current position.
WriteText(String),
}
+
+impl LayoutAction {
+ /// Serialize this layout action into a string representation.
+ pub fn serialize<W: Write>(&self, f: &mut W) -> io::Result<()> {
+ use LayoutAction::*;
+ match self {
+ MoveAbsolute(s) => write!(f, "m {:.4} {:.4}", s.x.to_pt(), s.y.to_pt()),
+ SetFont(i, s) => write!(f, "f {} {}", i, s),
+ WriteText(s) => write!(f, "w {}", s),
+ }
+ }
+}
diff --git a/src/layout/boxed.rs b/src/layout/boxed.rs
index afcd5278..5bd909d4 100644
--- a/src/layout/boxed.rs
+++ b/src/layout/boxed.rs
@@ -1,5 +1,6 @@
//! Block-style layouting of boxes.
+use std::io::{self, Write};
use crate::doc::{Document, Page, LayoutAction};
use crate::size::{Size, Size2D};
use super::{ActionList, LayoutSpace, Alignment, LayoutResult, LayoutError};
@@ -25,6 +26,16 @@ impl BoxLayout {
}],
}
}
+
+ /// Serialize this layout into a string representation.
+ pub fn serialize<W: Write>(&self, f: &mut W) -> io::Result<()> {
+ writeln!(f, "{:.4} {:.4}", self.dimensions.x.to_pt(), self.dimensions.y.to_pt())?;
+ for action in &self.actions {
+ action.serialize(f)?;
+ writeln!(f)?;
+ }
+ Ok(())
+ }
}
/// The context for layouting boxes.
diff --git a/src/layout/flex.rs b/src/layout/flex.rs
index 8b692691..8c099553 100644
--- a/src/layout/flex.rs
+++ b/src/layout/flex.rs
@@ -157,6 +157,9 @@ impl FlexFinisher {
/// Layout the glue.
fn glue(&mut self, glue: BoxLayout) {
+ if let Some(glue) = self.glue.take() {
+ self.append(glue);
+ }
self.glue = Some(glue);
}
diff --git a/src/lib.rs b/src/lib.rs
index cb4be8b4..26543b1d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -136,56 +136,3 @@ error_type! {
from: (ParseError, TypesetError::Parse(err)),
from: (LayoutError, TypesetError::Layout(err)),
}
-
-
-#[cfg(test)]
-mod test {
- use std::fs::File;
- use std::io::BufWriter;
- use crate::Typesetter;
- use crate::export::pdf::PdfExporter;
- use toddle::query::FileSystemFontProvider;
-
- /// Create a _PDF_ with a name from the source code.
- fn test(name: &str, src: &str) {
- let mut typesetter = Typesetter::new();
- let provider = FileSystemFontProvider::from_listing("fonts/fonts.toml").unwrap();
- typesetter.add_font_provider(provider);
-
- // Typeset into document.
- let document = typesetter.typeset(src).unwrap();
-
- // Write to file.
- let path = format!("../target/typeset-unit-{}.pdf", name);
- let file = BufWriter::new(File::create(path).unwrap());
- let exporter = PdfExporter::new();
- exporter.export(&document, typesetter.loader(), file).unwrap();
- }
-
- #[test]
- fn features() {
- test("features", r"
- *Features Test Page*
-
- _Multiline:_
- Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy
- eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam
- voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet
- clita kasd gubergren, no sea takimata sanctus est.
-
- _Emoji:_ Hello World! 🌍
-
- _Styles:_ This is made *bold*, that _italic_ and this one `monospace` using the
- built-in syntax!
-
- _Styles with functions:_ This [bold][word] is made bold and [italic][that] is italic
- using the standard library functions [mono][bold] and `italic`!
- ");
- }
-
- #[test]
- fn shakespeare() {
- test("shakespeare", include_str!("../test/shakespeare.tps"));
- test("shakespeare-right", &format!("[align:right][{}]", include_str!("../test/shakespeare.tps")));
- }
-}
diff --git a/src/parsing.rs b/src/parsing/mod.rs
index 2ccf5f4a..344f3577 100644
--- a/src/parsing.rs
+++ b/src/parsing/mod.rs
@@ -1,346 +1,15 @@
-//! Tokenization and parsing of source code into syntax trees.
+//! Parsing of source code into token streams an syntax trees.
use std::collections::HashMap;
-use std::str::CharIndices;
-
-use smallvec::SmallVec;
use unicode_xid::UnicodeXID;
use crate::func::{Function, Scope};
use crate::syntax::*;
use crate::size::Size;
+mod tokens;
+pub use tokens::{tokenize, Tokens};
-/// Builds an iterator over the tokens of the source code.
-#[inline]
-pub fn tokenize(src: &str) -> Tokens {
- Tokens::new(src)
-}
-
-/// An iterator over the tokens of source code.
-#[derive(Debug, Clone)]
-pub struct Tokens<'s> {
- src: &'s str,
- chars: PeekableChars<'s>,
- state: TokensState,
- stack: SmallVec<[TokensState; 1]>,
-}
-
-/// The state the tokenizer is in.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-enum TokensState {
- /// The base state if there is nothing special we are in.
- Body,
- /// Inside a function header. Here colons and equal signs get parsed
- /// as distinct tokens rather than text.
- Function,
- /// We expect either the end of the function or the beginning of the body.
- MaybeBody,
-}
-
-impl<'s> Tokens<'s> {
- /// Create a new token stream from source code.
- fn new(src: &'s str) -> Tokens<'s> {
- Tokens {
- src,
- chars: PeekableChars::new(src),
- state: TokensState::Body,
- stack: SmallVec::new(),
- }
- }
-
- /// Advance the iterator by one step.
- fn advance(&mut self) {
- self.chars.next();
- }
-
- /// Switch to the given state.
- fn switch(&mut self, state: TokensState) {
- self.stack.push(self.state);
- self.state = state;
- }
-
- /// Go back to the top-of-stack state.
- fn unswitch(&mut self) {
- self.state = self.stack.pop().unwrap_or(TokensState::Body);
- }
-
- /// Advance and return the given token.
- fn consumed(&mut self, token: Token<'s>) -> Token<'s> {
- self.advance();
- token
- }
-
- /// Returns a word containing the string bounded by the given indices.
- fn text(&self, start: usize, end: usize) -> Token<'s> {
- Token::Text(&self.src[start .. end])
- }
-}
-
-impl<'s> Iterator for Tokens<'s> {
- type Item = Token<'s>;
-
- /// Advance the iterator, return the next token or nothing.
- fn next(&mut self) -> Option<Token<'s>> {
- use TokensState as TU;
-
- // Go to the body state if the function has a body or return to the top-of-stack state.
- if self.state == TU::MaybeBody {
- if self.chars.peek()?.1 == '[' {
- self.state = TU::Body;
- return Some(self.consumed(Token::LeftBracket));
- } else {
- self.unswitch();
- }
- }
-
- // Take the next char and peek at the one behind.
- let (next_pos, next) = self.chars.next()?;
- let afterwards = self.chars.peek().map(|p| p.1);
-
- Some(match next {
- // Functions
- '[' => {
- self.switch(TU::Function);
- Token::LeftBracket
- },
- ']' => {
- if self.state == TU::Function {
- self.state = TU::MaybeBody;
- } else {
- self.unswitch();
- }
- Token::RightBracket
- },
-
- // Line comment
- '/' if afterwards == Some('/') => {
- let mut end = self.chars.next().unwrap();
- let start = end.0 + end.1.len_utf8();
-
- while let Some((index, c)) = self.chars.peek() {
- if is_newline_char(c) {
- break;
- }
- self.advance();
- end = (index, c);
- }
-
- let end = end.0 + end.1.len_utf8();
- Token::LineComment(&self.src[start .. end])
- },
-
- // Block comment
- '/' if afterwards == Some('*') => {
- let mut end = self.chars.next().unwrap();
- let start = end.0 + end.1.len_utf8();
-
- let mut nested = 0;
- while let Some((index, c)) = self.chars.next() {
- let after = self.chars.peek().map(|p| p.1);
- match (c, after) {
- ('*', Some('/')) if nested == 0 => { self.advance(); break },
- ('/', Some('*')) => { self.advance(); nested += 1 },
- ('*', Some('/')) => { self.advance(); nested -= 1 },
- _ => {},
- }
- end = (index, c);
- }
-
- let end = end.0 + end.1.len_utf8();
- Token::BlockComment(&self.src[start .. end])
- },
-
- // Unexpected end of block comment
- '*' if afterwards == Some('/') => self.consumed(Token::StarSlash),
-
- // Whitespace
- ' ' | '\t' => {
- while let Some((_, c)) = self.chars.peek() {
- match c {
- ' ' | '\t' => self.advance(),
- _ => break,
- }
- }
- Token::Space
- }
-
- // Newlines
- '\r' if afterwards == Some('\n') => self.consumed(Token::Newline),
- c if is_newline_char(c) => Token::Newline,
-
- // Star/Underscore/Backtick in bodies
- '*' if self.state == TU::Body => Token::Star,
- '_' if self.state == TU::Body => Token::Underscore,
- '`' if self.state == TU::Body => Token::Backtick,
-
- // Context sensitive operators in headers
- ':' if self.state == TU::Function => Token::Colon,
- '=' if self.state == TU::Function => Token::Equals,
- ',' if self.state == TU::Function => Token::Comma,
-
- // A string value.
- '"' if self.state == TU::Function => {
- // Find out when the word ends.
- let mut escaped = false;
- let mut end = (next_pos, next);
-
- while let Some((index, c)) = self.chars.next() {
- if c == '"' && !escaped {
- break;
- }
-
- escaped = c == '\\';
- end = (index, c);
- }
-
- let end_pos = end.0 + end.1.len_utf8();
- Token::Quoted(&self.src[next_pos + 1 .. end_pos])
- }
-
- // Escaping
- '\\' => {
- if let Some((index, c)) = self.chars.peek() {
- let escapable = match c {
- '[' | ']' | '\\' | '*' | '_' | '`' | ':' | '=' | '/' => true,
- _ => false,
- };
-
- if escapable {
- self.advance();
- return Some(self.text(index, index + c.len_utf8()));
- }
- }
-
- Token::Text("\\")
- },
-
- // Normal text
- _ => {
- // Find out when the word ends.
- let mut end = (next_pos, next);
- while let Some((index, c)) = self.chars.peek() {
- let second = self.chars.peek_second().map(|p| p.1);
-
- // Whether the next token is still from the text or not.
- let continues = match c {
- '[' | ']' | '\\' => false,
- '*' | '_' | '`' if self.state == TU::Body => false,
- ':' | '=' | ',' | '"' if self.state == TU::Function => false,
-
- '/' => second != Some('/') && second != Some('*'),
- '*' => second != Some('/'),
-
- ' ' | '\t' => false,
- c if is_newline_char(c) => false,
-
- _ => true,
- };
-
- if !continues {
- break;
- }
-
- end = (index, c);
- self.advance();
- }
-
- let end_pos = end.0 + end.1.len_utf8();
- self.text(next_pos, end_pos)
- },
- })
- }
-}
-
-/// Whether this character is a newline (or starts one).
-fn is_newline_char(character: char) -> bool {
- match character {
- '\n' | '\r' | '\u{000c}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
- _ => false,
- }
-}
-
-/// A (index, char) iterator with double lookahead.
-#[derive(Debug, Clone)]
-struct PeekableChars<'s> {
- offset: usize,
- string: &'s str,
- chars: CharIndices<'s>,
- peek1: Option<Option<(usize, char)>>,
- peek2: Option<Option<(usize, char)>>,
-}
-
-impl<'s> PeekableChars<'s> {
- /// Create a new iterator from a string.
- fn new(string: &'s str) -> PeekableChars<'s> {
- PeekableChars {
- offset: 0,
- string,
- chars: string.char_indices(),
- peek1: None,
- peek2: None,
- }
- }
-
- /// Peek at the next element.
- fn peek(&mut self) -> Option<(usize, char)> {
- match self.peek1 {
- Some(peeked) => peeked,
- None => {
- let next = self.next_inner();
- self.peek1 = Some(next);
- next
- }
- }
- }
-
- /// Peek at the element after the next element.
- fn peek_second(&mut self) -> Option<(usize, char)> {
- match self.peek2 {
- Some(peeked) => peeked,
- None => {
- self.peek();
- let next = self.next_inner();
- self.peek2 = Some(next);
- next
- }
- }
- }
-
- /// Return the next value of the inner iterator mapped with the offset.
- fn next_inner(&mut self) -> Option<(usize, char)> {
- self.chars.next().map(|(i, c)| (i + self.offset, c))
- }
-
- /// The index of the first character of the next token in the source string.
- fn current_index(&mut self) -> Option<usize> {
- self.peek().map(|p| p.0)
- }
-
- /// Go to a new position in the underlying string.
- fn goto(&mut self, index: usize) {
- self.offset = index;
- self.chars = self.string[index..].char_indices();
- self.peek1 = None;
- self.peek2 = None;
- }
-}
-
-impl Iterator for PeekableChars<'_> {
- type Item = (usize, char);
-
- fn next(&mut self) -> Option<(usize, char)> {
- match self.peek1.take() {
- Some(value) => {
- self.peek1 = self.peek2.take();
- value
- },
- None => self.next_inner(),
- }
- }
-}
-
-//------------------------------------------------------------------------------------------------//
/// Parses source code into a syntax tree given a context.
#[inline]
@@ -740,7 +409,6 @@ fn is_identifier(string: &str) -> bool {
true
}
-//------------------------------------------------------------------------------------------------//
/// The error type for parsing.
pub struct ParseError(String);
@@ -762,137 +430,7 @@ error_type! {
#[cfg(test)]
-mod token_tests {
- use super::*;
- use Token::{Space as S, Newline as N, LeftBracket as L, RightBracket as R,
- Colon as C, Equals as E, Quoted as Q, Underscore as TU, Star as TS,
- Backtick as TB, Text as T, LineComment as LC, BlockComment as BC,
- StarSlash as SS};
-
- /// Test if the source code tokenizes to the tokens.
- fn test(src: &str, tokens: Vec<Token>) {
- assert_eq!(Tokens::new(src).collect::<Vec<_>>(), tokens);
- }
-
- /// Tokenizes the basic building blocks.
- #[test]
- fn tokenize_base() {
- test("", vec![]);
- test("Hallo", vec![T("Hallo")]);
- test("[", vec![L]);
- test("]", vec![R]);
- test("*", vec![TS]);
- test("_", vec![TU]);
- test("`", vec![TB]);
- test("\n", vec![N]);
- }
-
- /// This test looks if LF- and CRLF-style newlines get both identified correctly.
- #[test]
- fn tokenize_whitespace_newlines() {
- test(" \t", vec![S]);
- test("First line\r\nSecond line\nThird line\n",
- vec![T("First"), S, T("line"), N, T("Second"), S, T("line"), N,
- T("Third"), S, T("line"), N]);
- test("Hello \n ", vec![T("Hello"), S, N, S]);
- test("Dense\nTimes", vec![T("Dense"), N, T("Times")]);
- }
-
- /// Tests if escaping with backslash works as it should.
- #[test]
- fn tokenize_escape() {
- test(r"\[", vec![T("[")]);
- test(r"\]", vec![T("]")]);
- test(r"\**", vec![T("*"), TS]);
- test(r"\*", vec![T("*")]);
- test(r"\__", vec![T("_"), TU]);
- test(r"\_", vec![T("_")]);
- test(r"\hello", vec![T("\\"), T("hello")]);
- }
-
- /// Tests if escaped strings work.
- #[test]
- fn tokenize_quoted() {
- test(r#"[align: "hello\"world"]"#, vec![L, T("align"), C, S, Q(r#"hello\"world"#), R]);
- }
-
- /// Tokenizes some more realistic examples.
- #[test]
- fn tokenize_examples() {
- test(r"
- [function][
- Test [italic][example]!
- ]
- ", vec![
- N, S, L, T("function"), R, L, N, S, T("Test"), S, L, T("italic"), R, L,
- T("example"), R, T("!"), N, S, R, N, S
- ]);
-
- test(r"
- [page: size=A4]
- [font: size=12pt]
-
- Das ist ein Beispielsatz mit *fetter* Schrift.
- ", vec![
- N, S, L, T("page"), C, S, T("size"), E, T("A4"), R, N, S,
- L, T("font"), C, S, T("size"), E, T("12pt"), R, N, N, S,
- T("Das"), S, T("ist"), S, T("ein"), S, T("Beispielsatz"), S, T("mit"), S,
- TS, T("fetter"), TS, S, T("Schrift."), N, S
- ]);
- }
-
- /// This test checks whether the colon and equals symbols get parsed correctly depending on the
- /// context: Either in a function header or in a body.
- #[test]
- fn tokenize_symbols_context() {
- test("[func: key=value][Answer: 7]",
- vec![L, T("func"), C, S, T("key"), E, T("value"), R, L,
- T("Answer:"), S, T("7"), R]);
- test("[[n: k=v]:x][:[=]]:=",
- vec![L, L, T("n"), C, S, T("k"), E, T("v"), R, C, T("x"), R,
- L, T(":"), L, E, R, R, T(":=")]);
- test("[hi: k=[func][body] v=1][hello]",
- vec![L, T("hi"), C, S, T("k"), E, L, T("func"), R, L, T("body"), R, S,
- T("v"), E, T("1"), R, L, T("hello"), R]);
- test("[func: __key__=value]",
- vec![L, T("func"), C, S, T("__key__"), E, T("value"), R]);
- test("The /*[*/ answer: 7.",
- vec![T("The"), S, BC("["), S, T("answer:"), S, T("7.")]);
- }
-
- /// Test if block and line comments get tokenized as expected.
- #[test]
- fn tokenize_comments() {
- test("These // Line comments.",
- vec![T("These"), S, LC(" Line comments.")]);
- test("This /* is */ a comment.",
- vec![T("This"), S, BC(" is "), S, T("a"), S, T("comment.")]);
- test("[Head/*of*/][Body]", vec![L, T("Head"), BC("of"), R, L, T("Body"), R]);
- test("/* Hey */ */", vec![BC(" Hey "), S, SS]);
- test("Hey\n// Yoo /*\n*/", vec![T("Hey"), N, LC(" Yoo /*"), N, SS]);
- test("/* My /* line // */ comment */", vec![BC(" My /* line // */ comment ")])
- }
-
- /// This test has a special look at the underscore syntax.
- #[test]
- fn tokenize_underscores() {
- test("he_llo_world_ __ Now this_ is_ special!",
- vec![T("he"), TU, T("llo"), TU, T("world"), TU, S, TU, TU, S, T("Now"), S,
- T("this"), TU, S, T("is"), TU, S, T("special!")]);
- }
-
- /// This test is for checking if non-ASCII characters get parsed correctly.
- #[test]
- fn tokenize_unicode() {
- test("[document][Hello 🌍!]",
- vec![L, T("document"), R, L, T("Hello"), S, T("🌍!"), R]);
- test("[f]⺐.", vec![L, T("f"), R, T("⺐.")]);
- }
-}
-
-
-#[cfg(test)]
-mod parse_tests {
+mod tests {
use super::*;
use crate::func::{Function, Scope};
use crate::layout::{LayoutContext, LayoutResult, Layout};
diff --git a/src/parsing/tokens.rs b/src/parsing/tokens.rs
new file mode 100644
index 00000000..74b9c11c
--- /dev/null
+++ b/src/parsing/tokens.rs
@@ -0,0 +1,465 @@
+//! Tokenization of text.
+
+use std::str::CharIndices;
+use smallvec::SmallVec;
+use crate::syntax::*;
+
+
+/// Builds an iterator over the tokens of the source code.
+#[inline]
+pub fn tokenize(src: &str) -> Tokens {
+ Tokens::new(src)
+}
+
+/// An iterator over the tokens of source code.
+#[derive(Debug, Clone)]
+pub struct Tokens<'s> {
+ src: &'s str,
+ pub(in super) chars: PeekableChars<'s>,
+ state: TokensState,
+ stack: SmallVec<[TokensState; 1]>,
+}
+
+/// The state the tokenizer is in.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+enum TokensState {
+ /// The base state if there is nothing special we are in.
+ Body,
+ /// Inside a function header. Here colons and equal signs get parsed
+ /// as distinct tokens rather than text.
+ Function,
+ /// We expect either the end of the function or the beginning of the body.
+ MaybeBody,
+}
+
+impl<'s> Tokens<'s> {
+ /// Create a new token stream from source code.
+ fn new(src: &'s str) -> Tokens<'s> {
+ Tokens {
+ src,
+ chars: PeekableChars::new(src),
+ state: TokensState::Body,
+ stack: SmallVec::new(),
+ }
+ }
+
+ /// Advance the iterator by one step.
+ fn advance(&mut self) {
+ self.chars.next();
+ }
+
+ /// Switch to the given state.
+ fn switch(&mut self, state: TokensState) {
+ self.stack.push(self.state);
+ self.state = state;
+ }
+
+ /// Go back to the top-of-stack state.
+ fn unswitch(&mut self) {
+ self.state = self.stack.pop().unwrap_or(TokensState::Body);
+ }
+
+ /// Advance and return the given token.
+ fn consumed(&mut self, token: Token<'s>) -> Token<'s> {
+ self.advance();
+ token
+ }
+
+ /// Returns a word containing the string bounded by the given indices.
+ fn text(&self, start: usize, end: usize) -> Token<'s> {
+ Token::Text(&self.src[start .. end])
+ }
+}
+
+impl<'s> Iterator for Tokens<'s> {
+ type Item = Token<'s>;
+
+ /// Advance the iterator, return the next token or nothing.
+ fn next(&mut self) -> Option<Token<'s>> {
+ use TokensState as TU;
+
+ // Go to the body state if the function has a body or return to the top-of-stack state.
+ if self.state == TU::MaybeBody {
+ if self.chars.peek()?.1 == '[' {
+ self.state = TU::Body;
+ return Some(self.consumed(Token::LeftBracket));
+ } else {
+ self.unswitch();
+ }
+ }
+
+ // Take the next char and peek at the one behind.
+ let (next_pos, next) = self.chars.next()?;
+ let afterwards = self.chars.peek().map(|p| p.1);
+
+ Some(match next {
+ // Functions
+ '[' => {
+ self.switch(TU::Function);
+ Token::LeftBracket
+ },
+ ']' => {
+ if self.state == TU::Function {
+ self.state = TU::MaybeBody;
+ } else {
+ self.unswitch();
+ }
+ Token::RightBracket
+ },
+
+ // Line comment
+ '/' if afterwards == Some('/') => {
+ let mut end = self.chars.next().unwrap();
+ let start = end.0 + end.1.len_utf8();
+
+ while let Some((index, c)) = self.chars.peek() {
+ if is_newline_char(c) {
+ break;
+ }
+ self.advance();
+ end = (index, c);
+ }
+
+ let end = end.0 + end.1.len_utf8();
+ Token::LineComment(&self.src[start .. end])
+ },
+
+ // Block comment
+ '/' if afterwards == Some('*') => {
+ let mut end = self.chars.next().unwrap();
+ let start = end.0 + end.1.len_utf8();
+
+ let mut nested = 0;
+ while let Some((index, c)) = self.chars.next() {
+ let after = self.chars.peek().map(|p| p.1);
+ match (c, after) {
+ ('*', Some('/')) if nested == 0 => { self.advance(); break },
+ ('/', Some('*')) => { self.advance(); nested += 1 },
+ ('*', Some('/')) => { self.advance(); nested -= 1 },
+ _ => {},
+ }
+ end = (index, c);
+ }
+
+ let end = end.0 + end.1.len_utf8();
+ Token::BlockComment(&self.src[start .. end])
+ },
+
+ // Unexpected end of block comment
+ '*' if afterwards == Some('/') => self.consumed(Token::StarSlash),
+
+ // Whitespace
+ ' ' | '\t' => {
+ while let Some((_, c)) = self.chars.peek() {
+ match c {
+ ' ' | '\t' => self.advance(),
+ _ => break,
+ }
+ }
+ Token::Space
+ }
+
+ // Newlines
+ '\r' if afterwards == Some('\n') => self.consumed(Token::Newline),
+ c if is_newline_char(c) => Token::Newline,
+
+ // Star/Underscore/Backtick in bodies
+ '*' if self.state == TU::Body => Token::Star,
+ '_' if self.state == TU::Body => Token::Underscore,
+ '`' if self.state == TU::Body => Token::Backtick,
+
+ // Context sensitive operators in headers
+ ':' if self.state == TU::Function => Token::Colon,
+ '=' if self.state == TU::Function => Token::Equals,
+ ',' if self.state == TU::Function => Token::Comma,
+
+ // A string value.
+ '"' if self.state == TU::Function => {
+ // Find out when the word ends.
+ let mut escaped = false;
+ let mut end = (next_pos, next);
+
+ while let Some((index, c)) = self.chars.next() {
+ if c == '"' && !escaped {
+ break;
+ }
+
+ escaped = c == '\\';
+ end = (index, c);
+ }
+
+ let end_pos = end.0 + end.1.len_utf8();
+ Token::Quoted(&self.src[next_pos + 1 .. end_pos])
+ }
+
+ // Escaping
+ '\\' => {
+ if let Some((index, c)) = self.chars.peek() {
+ let escapable = match c {
+ '[' | ']' | '\\' | '*' | '_' | '`' | ':' | '=' | '/' => true,
+ _ => false,
+ };
+
+ if escapable {
+ self.advance();
+ return Some(self.text(index, index + c.len_utf8()));
+ }
+ }
+
+ Token::Text("\\")
+ },
+
+ // Normal text
+ _ => {
+ // Find out when the word ends.
+ let mut end = (next_pos, next);
+ while let Some((index, c)) = self.chars.peek() {
+ let second = self.chars.peek_second().map(|p| p.1);
+
+ // Whether the next token is still from the text or not.
+ let continues = match c {
+ '[' | ']' | '\\' => false,
+ '*' | '_' | '`' if self.state == TU::Body => false,
+ ':' | '=' | ',' | '"' if self.state == TU::Function => false,
+
+ '/' => second != Some('/') && second != Some('*'),
+ '*' => second != Some('/'),
+
+ ' ' | '\t' => false,
+ c if is_newline_char(c) => false,
+
+ _ => true,
+ };
+
+ if !continues {
+ break;
+ }
+
+ end = (index, c);
+ self.advance();
+ }
+
+ let end_pos = end.0 + end.1.len_utf8();
+ self.text(next_pos, end_pos)
+ },
+ })
+ }
+}
+
+/// Whether this character is a newline (or starts one).
+fn is_newline_char(character: char) -> bool {
+ match character {
+ '\n' | '\r' | '\u{000c}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
+ _ => false,
+ }
+}
+
+/// A (index, char) iterator with double lookahead.
+#[derive(Debug, Clone)]
+pub struct PeekableChars<'s> {
+ offset: usize,
+ string: &'s str,
+ chars: CharIndices<'s>,
+ peek1: Option<Option<(usize, char)>>,
+ peek2: Option<Option<(usize, char)>>,
+}
+
+impl<'s> PeekableChars<'s> {
+ /// Create a new iterator from a string.
+ pub fn new(string: &'s str) -> PeekableChars<'s> {
+ PeekableChars {
+ offset: 0,
+ string,
+ chars: string.char_indices(),
+ peek1: None,
+ peek2: None,
+ }
+ }
+
+ /// Peek at the next element.
+ pub fn peek(&mut self) -> Option<(usize, char)> {
+ match self.peek1 {
+ Some(peeked) => peeked,
+ None => {
+ let next = self.next_inner();
+ self.peek1 = Some(next);
+ next
+ }
+ }
+ }
+
+ /// Peek at the element after the next element.
+ pub fn peek_second(&mut self) -> Option<(usize, char)> {
+ match self.peek2 {
+ Some(peeked) => peeked,
+ None => {
+ self.peek();
+ let next = self.next_inner();
+ self.peek2 = Some(next);
+ next
+ }
+ }
+ }
+
+ /// Return the next value of the inner iterator mapped with the offset.
+ pub fn next_inner(&mut self) -> Option<(usize, char)> {
+ self.chars.next().map(|(i, c)| (i + self.offset, c))
+ }
+
+ /// The index of the first character of the next token in the source string.
+ pub fn current_index(&mut self) -> Option<usize> {
+ self.peek().map(|p| p.0)
+ }
+
+ /// Go to a new position in the underlying string.
+ pub fn goto(&mut self, index: usize) {
+ self.offset = index;
+ self.chars = self.string[index..].char_indices();
+ self.peek1 = None;
+ self.peek2 = None;
+ }
+}
+
+impl Iterator for PeekableChars<'_> {
+ type Item = (usize, char);
+
+ fn next(&mut self) -> Option<(usize, char)> {
+ match self.peek1.take() {
+ Some(value) => {
+ self.peek1 = self.peek2.take();
+ value
+ },
+ None => self.next_inner(),
+ }
+ }
+}
+
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use Token::{Space as S, Newline as N, LeftBracket as L, RightBracket as R,
+ Colon as C, Equals as E, Quoted as Q, Underscore as TU, Star as TS,
+ Backtick as TB, Text as T, LineComment as LC, BlockComment as BC,
+ StarSlash as SS};
+
+ /// Test if the source code tokenizes to the tokens.
+ fn test(src: &str, tokens: Vec<Token>) {
+ assert_eq!(Tokens::new(src).collect::<Vec<_>>(), tokens);
+ }
+
+ /// Tokenizes the basic building blocks.
+ #[test]
+ fn tokenize_base() {
+ test("", vec![]);
+ test("Hallo", vec![T("Hallo")]);
+ test("[", vec![L]);
+ test("]", vec![R]);
+ test("*", vec![TS]);
+ test("_", vec![TU]);
+ test("`", vec![TB]);
+ test("\n", vec![N]);
+ }
+
+ /// This test looks if LF- and CRLF-style newlines get both identified correctly.
+ #[test]
+ fn tokenize_whitespace_newlines() {
+ test(" \t", vec![S]);
+ test("First line\r\nSecond line\nThird line\n",
+ vec![T("First"), S, T("line"), N, T("Second"), S, T("line"), N,
+ T("Third"), S, T("line"), N]);
+ test("Hello \n ", vec![T("Hello"), S, N, S]);
+ test("Dense\nTimes", vec![T("Dense"), N, T("Times")]);
+ }
+
+ /// Tests if escaping with backslash works as it should.
+ #[test]
+ fn tokenize_escape() {
+ test(r"\[", vec![T("[")]);
+ test(r"\]", vec![T("]")]);
+ test(r"\**", vec![T("*"), TS]);
+ test(r"\*", vec![T("*")]);
+ test(r"\__", vec![T("_"), TU]);
+ test(r"\_", vec![T("_")]);
+ test(r"\hello", vec![T("\\"), T("hello")]);
+ }
+
+ /// Tests if escaped strings work.
+ #[test]
+ fn tokenize_quoted() {
+ test(r#"[align: "hello\"world"]"#, vec![L, T("align"), C, S, Q(r#"hello\"world"#), R]);
+ }
+
+ /// Tokenizes some more realistic examples.
+ #[test]
+ fn tokenize_examples() {
+ test(r"
+ [function][
+ Test [italic][example]!
+ ]
+ ", vec![
+ N, S, L, T("function"), R, L, N, S, T("Test"), S, L, T("italic"), R, L,
+ T("example"), R, T("!"), N, S, R, N, S
+ ]);
+
+ test(r"
+ [page: size=A4]
+ [font: size=12pt]
+
+ Das ist ein Beispielsatz mit *fetter* Schrift.
+ ", vec![
+ N, S, L, T("page"), C, S, T("size"), E, T("A4"), R, N, S,
+ L, T("font"), C, S, T("size"), E, T("12pt"), R, N, N, S,
+ T("Das"), S, T("ist"), S, T("ein"), S, T("Beispielsatz"), S, T("mit"), S,
+ TS, T("fetter"), TS, S, T("Schrift."), N, S
+ ]);
+ }
+
+ /// This test checks whether the colon and equals symbols get parsed correctly depending on the
+ /// context: Either in a function header or in a body.
+ #[test]
+ fn tokenize_symbols_context() {
+ test("[func: key=value][Answer: 7]",
+ vec![L, T("func"), C, S, T("key"), E, T("value"), R, L,
+ T("Answer:"), S, T("7"), R]);
+ test("[[n: k=v]:x][:[=]]:=",
+ vec![L, L, T("n"), C, S, T("k"), E, T("v"), R, C, T("x"), R,
+ L, T(":"), L, E, R, R, T(":=")]);
+ test("[hi: k=[func][body] v=1][hello]",
+ vec![L, T("hi"), C, S, T("k"), E, L, T("func"), R, L, T("body"), R, S,
+ T("v"), E, T("1"), R, L, T("hello"), R]);
+ test("[func: __key__=value]",
+ vec![L, T("func"), C, S, T("__key__"), E, T("value"), R]);
+ test("The /*[*/ answer: 7.",
+ vec![T("The"), S, BC("["), S, T("answer:"), S, T("7.")]);
+ }
+
+ /// Test if block and line comments get tokenized as expected.
+ #[test]
+ fn tokenize_comments() {
+ test("These // Line comments.",
+ vec![T("These"), S, LC(" Line comments.")]);
+ test("This /* is */ a comment.",
+ vec![T("This"), S, BC(" is "), S, T("a"), S, T("comment.")]);
+ test("[Head/*of*/][Body]", vec![L, T("Head"), BC("of"), R, L, T("Body"), R]);
+ test("/* Hey */ */", vec![BC(" Hey "), S, SS]);
+ test("Hey\n// Yoo /*\n*/", vec![T("Hey"), N, LC(" Yoo /*"), N, SS]);
+ test("/* My /* line // */ comment */", vec![BC(" My /* line // */ comment ")])
+ }
+
+ /// This test has a special look at the underscore syntax.
+ #[test]
+ fn tokenize_underscores() {
+ test("he_llo_world_ __ Now this_ is_ special!",
+ vec![T("he"), TU, T("llo"), TU, T("world"), TU, S, TU, TU, S, T("Now"), S,
+ T("this"), TU, S, T("is"), TU, S, T("special!")]);
+ }
+
+ /// This test is for checking if non-ASCII characters get parsed correctly.
+ #[test]
+ fn tokenize_unicode() {
+ test("[document][Hello 🌍!]",
+ vec![L, T("document"), R, L, T("Hello"), S, T("🌍!"), R]);
+ test("[f]⺐.", vec![L, T("f"), R, T("⺐.")]);
+ }
+}
diff --git a/tests/layouting.rs b/tests/layouting.rs
new file mode 100644
index 00000000..b748748f
--- /dev/null
+++ b/tests/layouting.rs
@@ -0,0 +1,82 @@
+use std::fs::{self, File};
+use std::io::{Write, Read, BufWriter};
+use std::process::Command;
+
+use typst::Typesetter;
+use typst::toddle::query::FileSystemFontProvider;
+use typst::export::pdf::PdfExporter;
+use typst::doc::LayoutAction;
+
+const CACHE_DIR: &str = "test-cache";
+
+
+#[test]
+fn layouting() {
+ fs::create_dir_all(format!("{}/serialized", CACHE_DIR)).unwrap();
+ fs::create_dir_all(format!("{}/rendered", CACHE_DIR)).unwrap();
+ fs::create_dir_all(format!("{}/pdf", CACHE_DIR)).unwrap();
+
+ for entry in fs::read_dir("tests/layouts/").unwrap() {
+ let path = entry.unwrap().path();
+
+ let mut file = File::open(&path).unwrap();
+ let mut src = String::new();
+ file.read_to_string(&mut src).unwrap();
+
+ let name = path
+ .file_stem().unwrap()
+ .to_str().unwrap();
+
+ test(name, &src);
+ }
+}
+
+/// Create a _PDF_ with a name from the source code.
+fn test(name: &str, src: &str) {
+ let mut typesetter = Typesetter::new();
+ let provider = FileSystemFontProvider::from_listing("fonts/fonts.toml").unwrap();
+ typesetter.add_font_provider(provider.clone());
+
+ // Layout into box layout.
+ let tree = typesetter.parse(src).unwrap();
+ let layout = typesetter.layout(&tree).unwrap();
+
+ // Write the serialed layout file.
+ let path = format!("{}/serialized/{}.box", CACHE_DIR, name);
+ let mut file = File::create(path).unwrap();
+
+ // Find all used fonts and their filenames.
+ let mut map = Vec::new();
+ let mut loader = typesetter.loader().borrow_mut();
+ for action in &layout.actions {
+ if let LayoutAction::SetFont(index, _) = action {
+ if map.iter().find(|(i, _)| i == index).is_none() {
+ let (_, provider_index) = loader.get_provider_and_index(*index);
+ let filename = provider.get_path(provider_index).to_str().unwrap();
+ map.push((*index, filename));
+ }
+ }
+ }
+ drop(loader);
+
+ // Write the font mapping into the serialization file.
+ writeln!(file, "{}", map.len()).unwrap();
+ for (index, path) in map {
+ writeln!(file, "{} {}", index, path).unwrap();
+ }
+ layout.serialize(&mut file).unwrap();
+
+ // Render the layout into a PNG.
+ Command::new("python")
+ .arg("tests/render.py")
+ .arg(name)
+ .spawn()
+ .expect("failed to run python-based renderer");
+
+ // Write the PDF file.
+ let path = format!("{}/pdf/{}.pdf", CACHE_DIR, name);
+ let file = BufWriter::new(File::create(path).unwrap());
+ let document = layout.into_doc();
+ let exporter = PdfExporter::new();
+ exporter.export(&document, typesetter.loader(), file).unwrap();
+}
diff --git a/tests/layouts/shakespeare-right.tps b/tests/layouts/shakespeare-right.tps
new file mode 100644
index 00000000..db670fdf
--- /dev/null
+++ b/tests/layouts/shakespeare-right.tps
@@ -0,0 +1,88 @@
+[align: right][
+ [bold][Scene 5: _The Tower of London_]
+
+ [italic][Enter Mortimer, brought in a chair, and Gaolers.]
+
+ *Mortimer.* Kind keepers of my weak decaying age,
+ Let dying Mortimer here rest himself.
+ Even like a man new haled from the rack,
+ So fare my limbs with long imprisonment;
+ And these grey locks, the pursuivants of death,
+ Nestor-like aged in an age of care,
+ Argue the end of Edmund Mortimer.
+ These eyes, like lamps whose wasting oil is spent,
+ Wax dim, as drawing to their exigent;
+ Weak shoulders, overborne with burdening grief,
+ And pithless arms, like to a withered vine
+ That droops his sapless branches to the ground.
+ Yet are these feet, whose strengthless stay is numb,
+ Unable to support this lump of clay,
+ Swift-winged with desire to get a grave,
+ As witting I no other comfort have.
+ But tell me, keeper, will my nephew come?
+
+ *First Keeper.* Richard Plantagenet, my lord, will come.
+ We sent unto the Temple, unto his chamber;
+ And answer was return'd that he will come.
+
+ *Mortimer.* Enough; my soul shall then be satisfied.
+ Poor gentleman! his wrong doth equal mine.
+ Since Henry Monmouth first began to reign,
+ Before whose glory I was great in arms,
+ This loathsome sequestration have I had;
+ And even since then hath Richard been obscur'd,
+ Depriv'd of honour and inheritance.
+ But now the arbitrator of despairs,
+ Just Death, kind umpire of men's miseries,
+ With sweet enlargement doth dismiss me hence.
+ I would his troubles likewise were expir'd,
+ That so he might recover what was lost.
+
+
+ [italic][Enter Richard Plantagenet]
+
+ *First Keeper.* My lord, your loving nephew now is come.
+
+ *Mortimer.* Richard Plantagenet, my friend, is he come?
+
+ *Plantagenet.* Ay, noble uncle, thus ignobly us'd,
+ Your nephew, late despised Richard, comes.
+
+ *Mortimer.* Direct mine arms I may embrace his neck
+ And in his bosom spend my latter gasp.
+ O, tell me when my lips do touch his cheeks,
+ That I may kindly give one fainting kiss.
+ And now declare, sweet stem from York's great stock,
+ Why didst thou say of late thou wert despis'd?
+
+ *Plantagenet.* First, lean thine aged back against mine arm;
+ And, in that ease, I'll tell thee my disease.
+ This day, in argument upon a case,
+ Some words there grew 'twixt Somerset and me;
+ Among which terms he us'd his lavish tongue
+ And did upbraid me with my father's death;
+ Which obloquy set bars before my tongue,
+ Else with the like I had requited him.
+ Therefore, good uncle, for my father's sake,
+ In honour of a true Plantagenet,
+ And for alliance sake, declare the cause
+ My father, Earl of Cambridge, lost his head.
+
+ *Mortimer.* That cause, fair nephew, that imprison'd me
+ And hath detain'd me all my flow'ring youth
+ Within a loathsome dungeon, there to pine,
+ Was cursed instrument of his decease.
+
+ *Plantagenet.* Discover more at large what cause that was,
+ For I am ignorant and cannot guess.
+
+ *Mortimer.* I will, if that my fading breath permit
+ And death approach not ere my tale be done.
+ Henry the Fourth, grandfather to this king,
+ Depos'd his nephew Richard, Edward's son,
+ The first-begotten and the lawful heir
+ Of Edward king, the third of that descent;
+ During whose reign the Percies of the north,
+ Finding his usurpation most unjust,
+ Endeavour'd my advancement to the throne ...
+]
diff --git a/test/shakespeare.tps b/tests/layouts/shakespeare.tps
index e0839302..e0839302 100644
--- a/test/shakespeare.tps
+++ b/tests/layouts/shakespeare.tps
diff --git a/tests/layouts/styles.tps b/tests/layouts/styles.tps
new file mode 100644
index 00000000..790d2b38
--- /dev/null
+++ b/tests/layouts/styles.tps
@@ -0,0 +1,13 @@
+_Multiline:_
+Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy
+eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam
+voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet
+clita kasd gubergren, no sea takimata sanctus est.
+
+_Emoji:_ Hello World! 🌍
+
+_Styles:_ This is made *bold*, that _italic_ and this one `monospace` using the
+built-in syntax!
+
+_Styles with functions:_ This [bold][word] is made bold and [italic][that] is italic
+using the standard library functions [mono][bold] and `italic`!
diff --git a/tests/render.py b/tests/render.py
new file mode 100644
index 00000000..02c2693f
--- /dev/null
+++ b/tests/render.py
@@ -0,0 +1,73 @@
+import sys
+import os
+import pathlib
+from PIL import Image, ImageDraw, ImageFont
+
+
+BASE = os.path.dirname(__file__)
+CACHE_DIR = os.path.join(BASE, "../test-cache/");
+
+
+def main():
+ assert len(sys.argv) == 2, "usage: python render.py <name>"
+ name = sys.argv[1]
+
+ filename = os.path.join(CACHE_DIR, f"serialized/{name}.box")
+ with open(filename, encoding="utf-8") as file:
+ lines = [line[:-1] for line in file.readlines()]
+
+ fonts = {}
+ font_count = int(lines[0])
+ for i in range(font_count):
+ parts = lines[1 + i].split(' ', 1)
+ index = int(parts[0])
+ path = parts[1]
+ fonts[index] = os.path.join(BASE, "../fonts", path)
+
+ width, height = (float(s) for s in lines[font_count + 1].split())
+
+ renderer = Renderer(fonts, width, height)
+ for command in lines[font_count + 2:]:
+ renderer.execute(command)
+
+ pathlib.Path(os.path.join(CACHE_DIR, "rendered")).mkdir(parents=True, exist_ok=True)
+ renderer.export(name)
+
+
+class Renderer:
+ def __init__(self, fonts, width, height):
+ self.fonts = fonts
+ self.img = Image.new("RGBA", (pix(width), pix(height)), (255, 255, 255))
+ self.draw = ImageDraw.Draw(self.img)
+ self.cursor = (0, 0)
+
+ def execute(self, command):
+ cmd = command[0]
+ parts = command.split()[1:]
+
+ if cmd == 'm':
+ x, y = (pix(float(s)) for s in parts)
+ self.cursor = (x, y)
+
+ elif cmd == 'f':
+ index = int(parts[0])
+ size = pix(float(parts[1]))
+ self.font = ImageFont.truetype(self.fonts[index], size)
+
+ elif cmd == 'w':
+ text = command[2:]
+ self.draw.text(self.cursor, text, (0, 0, 0), font=self.font)
+
+ else:
+ raise Exception("invalid command")
+
+ def export(self, name):
+ self.img.save(CACHE_DIR + "rendered/" + name + ".png")
+
+
+def pix(points):
+ return int(2 * points)
+
+
+if __name__ == "__main__":
+ main()