Move crate into workspace subfolder

author: Laurenz <laurmaedje@gmail.com> 2019-02-12 21:31:35 +0100
committer: Laurenz <laurmaedje@gmail.com> 2019-02-12 21:31:35 +0100
commit: 5a600eb354c65ec008cbf020e45705c2f401d669 (patch)
tree: b61d6ae3168716ba198c5631934520053c4a57c4 /src
6 files changed, 1677 insertions, 0 deletions
diff --git a/src/doc.rs b/src/doc.rs
new file mode 100644
index 00000000..04e214a3
--- /dev/null
+++ b/src/doc.rs
@@ -0,0 +1,187 @@
+//! Generation of abstract documents from syntax trees.
+
+use std::fmt;
+use crate::parsing::{SyntaxTree, Node};
+use crate::font::{Font, BuiltinFont};
+
+
+/// Abstract representation of a complete typesetted document.
+///
+/// This abstract thing can then be serialized into a specific format like PDF.
+#[derive(Debug, Clone, PartialEq)]
+pub struct Document {
+    /// The pages of the document.
+    pub pages: Vec<Page>,
+    /// The fonts used by the document.
+    pub fonts: Vec<DocumentFont>,
+}
+
+impl Document {
+    /// Create a new document without content.
+    pub fn new() -> Document {
+        Document {
+            pages: vec![],
+            fonts: vec![],
+        }
+    }
+}
+
+/// A page of a document.
+#[derive(Debug, Clone, PartialEq)]
+pub struct Page {
+    /// The width and height of the page.
+    pub size: [Size; 2],
+    /// The contents of the page.
+    pub contents: Vec<Text>,
+}
+
+/// Plain text.
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub struct Text(pub String);
+
+/// A font (either built-in or external).
+#[derive(Debug, Clone, PartialEq)]
+pub enum DocumentFont {
+    /// One of the 14 built-in fonts.
+    Builtin(BuiltinFont),
+    /// An externally loaded font.
+    Loaded(Font),
+}
+
+/// A distance that can be created from different units of length.
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct Size {
+    /// The size in typographic points (1/72 inches).
+    pub points: f32,
+}
+
+impl Size {
+    /// Create a size from a number of points.
+    pub fn from_points(points: f32) -> Size {
+        Size { points }
+    }
+
+    /// Create a size from a number of inches.
+    pub fn from_inches(inches: f32) -> Size {
+        Size { points: inches / 72.0 }
+    }
+
+    /// Create a size from a number of millimeters.
+    pub fn from_mm(mm: f32) -> Size {
+        Size { points: 2.8345 * mm  }
+    }
+
+    /// Create a size from a number of centimeters.
+    pub fn from_cm(cm: f32) -> Size {
+        Size { points: 0.028345 * cm }
+    }
+}
+
+
+/// A type that can be generated into a document.
+pub trait Generate {
+    /// Generate a document from self.
+    fn generate(self) -> GenResult<Document>;
+}
+
+impl Generate for SyntaxTree<'_> {
+    fn generate(self) -> GenResult<Document> {
+        Generator::new(self).generate()
+    }
+}
+
+/// Result type used for parsing.
+type GenResult<T> = std::result::Result<T, GenerationError>;
+
+/// A failure when generating.
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub struct GenerationError {
+    /// A message describing the error.
+    pub message: String,
+}
+
+impl fmt::Display for GenerationError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "generation error: {}", self.message)
+    }
+}
+
+
+/// Transforms an abstract syntax tree into a document.
+#[derive(Debug, Clone)]
+struct Generator<'s> {
+    tree: SyntaxTree<'s>,
+}
+
+impl<'s> Generator<'s> {
+    /// Create a new generator from a syntax tree.
+    fn new(tree: SyntaxTree<'s>) -> Generator<'s> {
+        Generator { tree }
+    }
+
+    /// Generate the abstract document.
+    fn generate(&mut self) -> GenResult<Document> {
+        let fonts = vec![DocumentFont::Builtin(BuiltinFont::Helvetica)];
+
+        let mut text = String::new();
+        for node in &self.tree.nodes {
+            match node {
+                Node::Space if !text.is_empty() => text.push(' '),
+                Node::Space | Node::Newline => (),
+                Node::Word(word) => text.push_str(word),
+
+                Node::ToggleItalics | Node::ToggleBold | Node::ToggleMath => unimplemented!(),
+                Node::Func(_) => unimplemented!(),
+
+            }
+        }
+
+        let page = Page {
+            size: [Size::from_mm(210.0), Size::from_mm(297.0)],
+            contents: vec![ Text(text) ],
+        };
+
+        Ok(Document {
+            pages: vec![page],
+            fonts,
+        })
+    }
+
+    /// Gives a generation error with a message.
+    #[inline]
+    fn err<R, S: Into<String>>(&self, message: S) -> GenResult<R> {
+        Err(GenerationError { message: message.into() })
+    }
+}
+
+
+#[cfg(test)]
+mod generator_tests {
+    use super::*;
+    use crate::parsing::{Tokenize, Parse};
+
+    /// Test if the source gets generated into the document.
+    fn test(src: &str, doc: Document) {
+        assert_eq!(src.tokenize().parse().unwrap().generate(), Ok(doc));
+    }
+
+    /// Test if generation gives this error for the source code.
+    fn test_err(src: &str, err: GenerationError) {
+        assert_eq!(src.tokenize().parse().unwrap().generate(), Err(err));
+    }
+
+    #[test]
+    fn generator_simple() {
+        test("This is an example of a sentence.", Document {
+            pages: vec![
+                Page {
+                    size: [Size::from_mm(210.0), Size::from_mm(297.0)],
+                    contents: vec![
+                        Text("This is an example of a sentence.".to_owned()),
+                    ]
+                }
+            ],
+            fonts: vec![DocumentFont::Builtin(BuiltinFont::Helvetica)],
+        });
+    }
+}
diff --git a/src/font.rs b/src/font.rs
new file mode 100644
index 00000000..1280aec3
--- /dev/null
+++ b/src/font.rs
@@ -0,0 +1,270 @@
+//! Reading of metrics and font data from _OpenType_ and _TrueType_ font files.
+
+#![allow(unused_variables)]
+
+use std::fmt;
+use std::io::{self, Read, Seek, SeekFrom};
+use byteorder::{BE, ReadBytesExt};
+
+
+/// A loaded opentype (or truetype) font.
+#[derive(Debug, Clone, PartialEq)]
+pub struct Font {
+    /// The PostScript name of this font.
+    pub name: String,
+}
+
+impl Font {
+    /// Create a new font from a byte source.
+    pub fn new<R>(data: &mut R) -> FontResult<Font> where R: Read + Seek {
+        OpenTypeReader::new(data).read()
+    }
+}
+
+/// Built-in fonts.
+#[derive(Debug, Copy, Clone, PartialEq)]
+#[allow(missing_docs)]
+pub enum BuiltinFont {
+    Courier,
+    CourierBold,
+    CourierOblique,
+    CourierBoldOblique,
+    Helvetica,
+    HelveticaBold,
+    HelveticaOblique,
+    HelveticaBoldOblique,
+    TimesRoman,
+    TimesBold,
+    TimeItalic,
+    TimeBoldItalic,
+    Symbol,
+    ZapfDingbats,
+}
+
+impl BuiltinFont {
+    /// The name of the font.
+    pub fn name(&self) -> &'static str {
+        use BuiltinFont::*;
+        match self {
+            Courier => "Courier",
+            CourierBold => "Courier-Bold",
+            CourierOblique => "Courier-Oblique",
+            CourierBoldOblique => "Courier-BoldOblique",
+            Helvetica => "Helvetica",
+            HelveticaBold => "Helvetica-Bold",
+            HelveticaOblique => "Helvetica-Oblique",
+            HelveticaBoldOblique => "Helvetica-BoldOblique",
+            TimesRoman => "Times-Roman",
+            TimesBold => "Times-Bold",
+            TimeItalic => "Time-Italic",
+            TimeBoldItalic => "Time-BoldItalic",
+            Symbol => "Symbol",
+            ZapfDingbats => "ZapfDingbats",
+        }
+    }
+}
+
+
+/// Result type used for tokenization.
+type FontResult<T> = std::result::Result<T, LoadingError>;
+
+/// A failure when loading a font.
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub struct LoadingError {
+    /// A message describing the error.
+    pub message: String,
+}
+
+impl From<io::Error> for LoadingError {
+    fn from(err: io::Error) -> LoadingError {
+        LoadingError { message: format!("io error: {}", err) }
+    }
+}
+
+impl fmt::Display for LoadingError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "font loading error: {}", self.message)
+    }
+}
+
+
+/// Reads a font from a _OpenType_ or _TrueType_ font file.
+struct OpenTypeReader<'r, R> where R: Read + Seek {
+    data: &'r mut R,
+    font: Font,
+    table_records: Vec<TableRecord>,
+}
+
+/// Used to identify a table, design-variation axis, script,
+/// language system, feature, or baseline.
+#[derive(Clone, PartialEq)]
+struct Tag(pub [u8; 4]);
+
+impl PartialEq<&str> for Tag {
+    fn eq(&self, other: &&str) -> bool {
+        other.as_bytes() == &self.0
+    }
+}
+
+impl fmt::Debug for Tag {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "\"{}\"", self)
+    }
+}
+
+impl fmt::Display for Tag {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let a = self.0;
+        write!(f, "{}{}{}{}", a[0] as char, a[1] as char, a[2] as char, a[3] as char)
+    }
+}
+
+/// Stores information about one table.
+#[derive(Debug, Clone, PartialEq)]
+struct TableRecord {
+    table: Tag,
+    check_sum: u32,
+    offset: u32,
+    length: u32,
+}
+
+impl<'r, R> OpenTypeReader<'r, R> where R: Read + Seek {
+    /// Create a new reader from a byte source.
+    pub fn new(data: &'r mut R) -> OpenTypeReader<'r, R> {
+        OpenTypeReader {
+            data,
+            font: Font {
+                name: String::new(),
+            },
+            table_records: vec![],
+        }
+    }
+
+    /// Read the font from the byte source.
+    pub fn read(mut self) -> FontResult<Font> {
+        self.read_table_records()?;
+        self.read_name_table()?;
+
+        Ok(self.font)
+    }
+
+    /// Read the offset table.
+    fn read_table_records(&mut self) -> FontResult<()> {
+        let sfnt_version = self.data.read_u32::<BE>()?;
+        let num_tables = self.data.read_u16::<BE>()?;
+        let search_range = self.data.read_u16::<BE>()?;
+        let entry_selector = self.data.read_u16::<BE>()?;
+        let range_shift = self.data.read_u16::<BE>()?;
+
+        let outlines = match sfnt_version {
+            0x00010000 => "truetype",
+            0x4F54544F => "cff",
+            _ => return self.err("unsuported font outlines"),
+        };
+
+        for _ in 0 .. num_tables {
+            let table = self.read_tag()?;
+            let check_sum = self.data.read_u32::<BE>()?;
+            let offset = self.data.read_u32::<BE>()?;
+            let length = self.data.read_u32::<BE>()?;
+
+            self.table_records.push(TableRecord {
+                table,
+                check_sum,
+                offset,
+                length,
+            });
+        }
+
+        Ok(())
+    }
+
+    /// Read the name table (gives general information about the font).
+    fn read_name_table(&mut self) -> FontResult<()> {
+        let table = match self.table_records.iter().find(|record| record.table == "name") {
+            Some(table) => table,
+            None => return self.err("missing 'name' table"),
+        };
+
+        self.data.seek(SeekFrom::Start(table.offset as u64))?;
+
+        let format = self.data.read_u16::<BE>()?;
+        let count = self.data.read_u16::<BE>()?;
+        let string_offset = self.data.read_u16::<BE>()?;
+
+        let storage = (table.offset + string_offset as u32) as u64;
+
+        let mut name = None;
+
+        for _ in 0 .. count {
+            let platform_id = self.data.read_u16::<BE>()?;
+            let encoding_id = self.data.read_u16::<BE>()?;
+            let language_id = self.data.read_u16::<BE>()?;
+            let name_id = self.data.read_u16::<BE>()?;
+            let length = self.data.read_u16::<BE>()?;
+            let offset = self.data.read_u16::<BE>()?;
+
+            // Postscript name is what we are interested in
+            if name_id == 6 && platform_id == 3 && encoding_id == 1 {
+                if length % 2 != 0 {
+                    return self.err("invalid encoded name");
+                }
+
+                self.data.seek(SeekFrom::Start(storage + offset as u64))?;
+                let mut buffer = Vec::with_capacity(length as usize / 2);
+
+                for _ in 0 .. length / 2 {
+                    buffer.push(self.data.read_u16::<BE>()?);
+                }
+
+                name = match String::from_utf16(&buffer) {
+                    Ok(string) => Some(string),
+                    Err(_) => return self.err("invalid encoded name"),
+                };
+
+                break;
+            }
+        }
+
+        self.font.name = match name {
+            Some(name) => name,
+            None => return self.err("missing postscript font name"),
+        };
+
+        Ok(())
+    }
+
+    /// Read a tag (array of four u8's).
+    fn read_tag(&mut self) -> FontResult<Tag> {
+        let mut tag = [0u8; 4];
+        self.data.read(&mut tag)?;
+        Ok(Tag(tag))
+    }
+
+    /// Gives a font loading error with a message.
+    fn err<T, S: Into<String>>(&self, message: S) -> FontResult<T> {
+        Err(LoadingError { message: message.into() })
+    }
+}
+
+
+#[cfg(test)]
+mod font_tests {
+    use super::*;
+
+    /// Test if the loaded font is the same as the expected font.
+    fn test(path: &str, font: Font) {
+        let mut file = std::fs::File::open(path).unwrap();
+        assert_eq!(Font::new(&mut file), Ok(font));
+    }
+
+    #[test]
+    fn opentype() {
+        test("../fonts/NotoSerif-Regular.ttf", Font {
+            name: "NotoSerif".to_owned(),
+        });
+        test("../fonts/NotoSansMath-Regular.ttf", Font {
+            name: "NotoSansMath-Regular".to_owned(),
+        });
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 00000000..2959925e
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,11 @@
+//! Typeset is a library for compiling _plain-text_ strings written in the
+//! corresponding typesetting language into a typesetted document in a
+//! file format like _PDF_.
+
+#![allow(unused)]
+
+pub mod parsing;
+pub mod doc;
+pub mod font;
+pub mod pdf;
+pub mod utility;
diff --git a/src/parsing.rs b/src/parsing.rs
new file mode 100644
index 00000000..5efa69e5
--- /dev/null
+++ b/src/parsing.rs
@@ -0,0 +1,696 @@
+//! Parsing of source code into tokens and syntax trees.
+
+use std::fmt;
+use std::iter::Peekable;
+use std::mem::swap;
+use unicode_segmentation::{UnicodeSegmentation, UWordBounds};
+use crate::utility::{Splinor, Spline, Splined, StrExt};
+
+
+/// A logical unit of the incoming text stream.
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub enum Token<'s> {
+    /// One or more whitespace (non-newline) codepoints.
+    Space,
+    /// A line feed (either `\n` or `\r\n`).
+    Newline,
+    /// A left bracket: `[`.
+    LeftBracket,
+    /// A right bracket: `]`.
+    RightBracket,
+    /// A colon (`:`) indicating the beginning of function arguments.
+    ///
+    /// If a colon occurs outside of the function header, it will be
+    /// tokenized as a `Word`.
+    Colon,
+    /// Same as with `Colon`.
+    Equals,
+    /// Two underscores, indicating text in _italics_.
+    DoubleUnderscore,
+    /// Two stars, indicating **bold** text.
+    DoubleStar,
+    /// A dollar sign, indicating mathematical content.
+    Dollar,
+    /// A hashtag starting a comment.
+    Hashtag,
+    /// Everything else just is a literal word.
+    Word(&'s str),
+}
+
+
+/// A type that is seperable into logical units (tokens).
+pub trait Tokenize {
+    /// Tokenize self into logical units.
+    fn tokenize<'s>(&'s self) -> Tokens<'s>;
+}
+
+impl Tokenize for str {
+    fn tokenize<'s>(&'s self) -> Tokens<'s> {
+        Tokens::new(self)
+    }
+}
+
+
+/// An iterator over the tokens of a text.
+#[derive(Clone)]
+pub struct Tokens<'s> {
+    source: &'s str,
+    words: Peekable<UWordBounds<'s>>,
+    state: TokensState<'s>,
+    stack: Vec<TokensState<'s>>,
+}
+
+impl fmt::Debug for Tokens<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_struct("Tokens")
+            .field("source", &self.source)
+            .field("words", &"Peekable<UWordBounds>")
+            .field("state", &self.state)
+            .field("stack", &self.stack)
+            .finish()
+    }
+}
+
+/// The state the tokenizer is in.
+#[derive(Debug, Clone)]
+enum TokensState<'s> {
+    /// The base state if there is nothing special we are in.
+    Body,
+    /// Inside a function header. Here colons and equal signs get parsed
+    /// as distinct tokens rather than text.
+    Function,
+    /// We expect either the end of the function or the beginning of the body.
+    MaybeBody,
+    /// We are inside one unicode word that consists of multiple tokens,
+    /// because it contains double underscores.
+    DoubleUnderscore(Spline<'s, Token<'s>>),
+}
+
+impl PartialEq for TokensState<'_> {
+    fn eq(&self, other: &TokensState) -> bool {
+        use TokensState as TS;
+
+        match (self, other) {
+            (TS::Body, TS::Body) => true,
+            (TS::Function, TS::Function) => true,
+            (TS::MaybeBody, TS::MaybeBody) => true,
+            // They are not necessarily different, but we don't care
+            _ => false,
+        }
+    }
+}
+
+impl<'s> Iterator for Tokens<'s> {
+    type Item = Token<'s>;
+
+    /// Advance the iterator, return the next token or nothing.
+    fn next(&mut self) -> Option<Token<'s>> {
+        use TokensState as TS;
+
+        // Return the remaining words and double underscores.
+        if let TS::DoubleUnderscore(ref mut splinor) = self.state {
+            loop {
+                if let Some(splined) = splinor.next() {
+                    return Some(match splined {
+                        Splined::Value(word) if word != "" => Token::Word(word),
+                        Splined::Splinor(s) => s,
+                        _ => continue,
+                    });
+                } else {
+                    self.unswitch();
+                    break;
+                }
+            }
+        }
+
+        // Skip whitespace, but if at least one whitespace word existed,
+        // remember that, because we return a space token.
+        let mut whitespace = false;
+        while let Some(word) = self.words.peek() {
+            if !word.is_whitespace() {
+                break;
+            }
+            whitespace = true;
+            self.advance();
+        }
+        if whitespace {
+            return Some(Token::Space);
+        }
+
+        // Function maybe has a body
+        if self.state == TS::MaybeBody {
+            match *self.words.peek()? {
+                "[" => {
+                    self.state = TS::Body;
+                    return Some(self.consumed(Token::LeftBracket));
+                },
+                _ => self.unswitch(),
+            }
+        }
+
+        // Now all special cases are handled and we can finally look at the
+        // next words.
+        let next = self.words.next()?;
+        let afterwards = self.words.peek();
+
+        Some(match next {
+            // Special characters
+            "[" => {
+                self.switch(TS::Function);
+                Token::LeftBracket
+            },
+            "]" => {
+                if self.state == TS::Function {
+                    self.state = TS::MaybeBody;
+                }
+                Token::RightBracket
+            },
+            "$" => Token::Dollar,
+            "#" => Token::Hashtag,
+
+            // Context sensitive operators
+            ":" if self.state == TS::Function => Token::Colon,
+            "=" if self.state == TS::Function => Token::Equals,
+
+            // Double star/underscore
+            "*" if afterwards == Some(&"*") => {
+                self.consumed(Token::DoubleStar)
+            },
+            "__" => Token::DoubleUnderscore,
+
+            // Newlines
+            "\n" | "\r\n" => Token::Newline,
+
+            // Escaping
+            r"\" => {
+                if let Some(next) = afterwards {
+                    let escapable = match *next {
+                        "[" | "]" | "$" | "#" | r"\" | ":" | "=" | "*" | "_" => true,
+                        w if w.starts_with("__") => true,
+                        _ => false,
+                    };
+
+                    if escapable {
+                        let next = *next;
+                        self.advance();
+                        return Some(Token::Word(next));
+                    }
+                }
+
+                Token::Word(r"\")
+            },
+
+            // Double underscores hidden in words.
+            word if word.contains("__") => {
+                let spline = word.spline("__", Token::DoubleUnderscore);
+                self.switch(TS::DoubleUnderscore(spline));
+                return self.next();
+            },
+
+            // Now it seems like it's just a normal word.
+            word => Token::Word(word),
+        })
+    }
+}
+
+impl<'s> Tokens<'s> {
+    /// Create a new token stream from text.
+    #[inline]
+    pub fn new(source: &'s str) -> Tokens<'s> {
+        Tokens {
+            source,
+            words: source.split_word_bounds().peekable(),
+            state: TokensState::Body,
+            stack: vec![],
+        }
+    }
+
+    /// Advance the iterator by one step.
+    #[inline]
+    fn advance(&mut self) {
+        self.words.next();
+    }
+
+    /// Switch to the given state.
+    #[inline]
+    fn switch(&mut self, mut state: TokensState<'s>) {
+        swap(&mut state, &mut self.state);
+        self.stack.push(state);
+    }
+
+    /// Go back to the top-of-stack state.
+    #[inline]
+    fn unswitch(&mut self) {
+         self.state = self.stack.pop().unwrap_or(TokensState::Body);
+    }
+
+    /// Advance and return the given token.
+    #[inline]
+    fn consumed(&mut self, token: Token<'s>) -> Token<'s> {
+        self.advance();
+        token
+    }
+}
+
+
+/// A tree representation of the source.
+#[derive(Debug, Clone, PartialEq)]
+pub struct SyntaxTree<'s> {
+    /// The children.
+    pub nodes: Vec<Node<'s>>,
+}
+
+impl<'s> SyntaxTree<'s> {
+    /// Create an empty syntax tree.
+    pub fn new() -> SyntaxTree<'s> {
+        SyntaxTree { nodes: vec![] }
+    }
+}
+
+/// A node in the abstract syntax tree.
+#[derive(Debug, Clone, PartialEq)]
+pub enum Node<'s> {
+    /// Whitespace between other nodes.
+    Space,
+    /// A line feed.
+    Newline,
+    /// Indicates that italics were enabled/disabled.
+    ToggleItalics,
+    /// Indicates that boldface was enabled/disabled.
+    ToggleBold,
+    /// Indicates that math mode was enabled/disabled.
+    ToggleMath,
+    /// A literal word.
+    Word(&'s str),
+    /// A function invocation.
+    Func(Function<'s>),
+}
+
+/// A node representing a function invocation.
+#[derive(Debug, Clone, PartialEq)]
+pub struct Function<'s> {
+    /// The name of the function.
+    pub name: &'s str,
+    /// Some syntax tree if the function had a body (second set of brackets),
+    /// otherwise nothing.
+    pub body: Option<SyntaxTree<'s>>,
+}
+
+
+/// A type that is parseable into a syntax tree.
+pub trait Parse<'s> {
+    /// Parse self into a syntax tree.
+    fn parse(self) -> ParseResult<SyntaxTree<'s>>;
+}
+
+impl<'s> Parse<'s> for Tokens<'s> {
+    fn parse(self) -> ParseResult<SyntaxTree<'s>> {
+        Parser::new(self).parse()
+    }
+}
+
+impl<'s> Parse<'s> for Vec<Token<'s>> {
+    fn parse(self) -> ParseResult<SyntaxTree<'s>> {
+        Parser::new(self.into_iter()).parse()
+    }
+}
+
+/// Result type used for parsing.
+type ParseResult<T> = std::result::Result<T, ParseError>;
+
+/// A failure when parsing.
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub struct ParseError {
+    /// A message describing the error.
+    pub message: String,
+}
+
+impl fmt::Display for ParseError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "parse error: {}", self.message)
+    }
+}
+
+
+/// Parses a token stream into an abstract syntax tree.
+#[derive(Debug, Clone)]
+struct Parser<'s, T> where T: Iterator<Item = Token<'s>> {
+    tokens: Peekable<T>,
+    state: ParserState,
+    stack: Vec<Function<'s>>,
+    tree: SyntaxTree<'s>,
+}
+
+/// The state the parser is in.
+#[derive(Debug, Clone, PartialEq)]
+enum ParserState {
+    /// The base state of the parser.
+    Body,
+    /// Inside a function header.
+    Function,
+}
+
+impl<'s, T> Parser<'s, T> where T: Iterator<Item = Token<'s>> {
+    /// Create a new parser from a type that emits results of tokens.
+    fn new(tokens: T) -> Parser<'s, T> {
+        Parser {
+            tokens: tokens.peekable(),
+            state: ParserState::Body,
+            stack: vec![],
+            tree: SyntaxTree::new(),
+        }
+    }
+
+    /// Parse into an abstract syntax tree.
+    fn parse(mut self) -> ParseResult<SyntaxTree<'s>> {
+        use ParserState as PS;
+
+        while let Some(token) = self.tokens.next() {
+            // Comment
+            if token == Token::Hashtag {
+                self.skip_while(|t| *t != Token::Newline);
+                self.advance();
+            }
+
+            match self.state {
+                PS::Body => match token {
+                    // Whitespace
+                    Token::Space => self.append(Node::Space),
+                    Token::Newline => self.append(Node::Newline),
+
+                    // Words
+                    Token::Word(word) => self.append(Node::Word(word)),
+
+                    // Functions
+                    Token::LeftBracket => self.switch(PS::Function),
+                    Token::RightBracket => {
+                        match self.stack.pop() {
+                            Some(func) => self.append(Node::Func(func)),
+                            None => return self.err("unexpected closing bracket"),
+                        }
+                    },
+
+                    // Modifiers
+                    Token::DoubleUnderscore => self.append(Node::ToggleItalics),
+                    Token::DoubleStar => self.append(Node::ToggleBold),
+                    Token::Dollar => self.append(Node::ToggleMath),
+
+                    // Should not happen
+                    Token::Colon | Token::Equals | Token::Hashtag => unreachable!(),
+                },
+
+                PS::Function => {
+                    let name = match token {
+                        Token::Word(word) if word.is_identifier() => word,
+                        _ => return self.err("expected identifier"),
+                    };
+
+                    if self.tokens.next() != Some(Token::RightBracket) {
+                        return self.err("expected closing bracket");
+                    }
+
+                    let mut func = Function {
+                        name,
+                        body: None,
+                    };
+
+                    // This function has a body.
+                    if let Some(Token::LeftBracket) = self.tokens.peek() {
+                        self.advance();
+                        func.body = Some(SyntaxTree::new());
+                        self.stack.push(func);
+                    } else {
+                        self.append(Node::Func(func));
+                    }
+
+                    self.switch(PS::Body);
+                },
+            }
+        }
+
+        if !self.stack.is_empty() {
+            return self.err("expected closing bracket");
+        }
+
+        Ok(self.tree)
+    }
+
+    /// Advance the iterator by one step.
+    #[inline]
+    fn advance(&mut self) {
+        self.tokens.next();
+    }
+
+    /// Skip tokens until the condition is met.
+    #[inline]
+    fn skip_while<F>(&mut self, f: F) where F: Fn(&Token) -> bool {
+        while let Some(token) = self.tokens.peek() {
+            if !f(token) {
+                break;
+            }
+            self.advance();
+        }
+    }
+
+    /// Switch the state.
+    #[inline]
+    fn switch(&mut self, state: ParserState) {
+        self.state = state;
+    }
+
+    /// Append a node to the top-of-stack function or the main tree itself.
+    #[inline]
+    fn append(&mut self, node: Node<'s>) {
+        let tree = match self.stack.last_mut() {
+            Some(func) => func.body.get_or_insert_with(|| SyntaxTree::new()),
+            None => &mut self.tree,
+        };
+
+        tree.nodes.push(node);
+    }
+
+    /// Gives a parsing error with a message.
+    #[inline]
+    fn err<R, S: Into<String>>(&self, message: S) -> ParseResult<R> {
+        Err(ParseError { message: message.into() })
+    }
+}
+
+
+#[cfg(test)]
+mod token_tests {
+    use super::*;
+    use Token::{Space as S, Newline as N, LeftBracket as L, RightBracket as R,
+                Colon as C, Equals as E, DoubleUnderscore as DU, DoubleStar as DS,
+                Dollar as D, Hashtag as H, Word as W};
+
+    /// Test if the source code tokenizes to the tokens.
+    fn test(src: &str, tokens: Vec<Token>) {
+        assert_eq!(src.tokenize().collect::<Vec<_>>(), tokens);
+    }
+
+    /// Tokenizes the basic building blocks.
+    #[test]
+    fn tokenize_base() {
+        test("", vec![]);
+        test("Hallo", vec![W("Hallo")]);
+        test("[", vec![L]);
+        test("]", vec![R]);
+        test("$", vec![D]);
+        test("#", vec![H]);
+        test("**", vec![DS]);
+        test("__", vec![DU]);
+        test("\n", vec![N]);
+    }
+
+    /// Tests if escaping with backslash works as it should.
+    #[test]
+    fn tokenize_escape() {
+        test(r"\[", vec![W("[")]);
+        test(r"\]", vec![W("]")]);
+        test(r"\#", vec![W("#")]);
+        test(r"\$", vec![W("$")]);
+        test(r"\:", vec![W(":")]);
+        test(r"\=", vec![W("=")]);
+        test(r"\**", vec![W("*"), W("*")]);
+        test(r"\*", vec![W("*")]);
+        test(r"\__", vec![W("__")]);
+        test(r"\_", vec![W("_")]);
+        test(r"\hello", vec![W(r"\"), W("hello")]);
+    }
+
+    /// Tokenizes some more realistic examples.
+    #[test]
+    fn tokenize_examples() {
+        test(r"
+            [function][
+                Test [italic][example]!
+            ]
+        ", vec![
+            N, S, L, W("function"), R, L, N, S, W("Test"), S, L, W("italic"), R, L,
+            W("example"), R, W("!"), N, S, R, N, S
+        ]);
+
+        test(r"
+            [page: size=A4]
+            [font: size=12pt]
+
+            Das ist ein Beispielsatz mit **fetter** Schrift.
+        ", vec![
+            N, S, L, W("page"), C, S, W("size"), E, W("A4"), R, N, S,
+            L, W("font"), C, S, W("size"), E, W("12pt"), R, N, N, S,
+            W("Das"), S, W("ist"), S, W("ein"), S, W("Beispielsatz"), S, W("mit"), S,
+            DS, W("fetter"), DS, S, W("Schrift"), W("."), N, S
+        ]);
+    }
+
+    /// This test checks whether the colon and equals symbols get parsed correctly
+    /// depending on the context: Either in a function header or in a body.
+    #[test]
+    fn tokenize_symbols_context() {
+        test("[func: key=value][Answer: 7]",
+             vec![L, W("func"), C, S, W("key"), E, W("value"), R, L,
+                  W("Answer"), W(":"), S, W("7"), R]);
+        test("[[n: k=v]:x][:[=]]:=",
+             vec![L, L, W("n"), C, S, W("k"), E, W("v"), R, C, W("x"), R,
+                  L, W(":"), L, E, R, R, W(":"), W("=")]);
+        test("[func: __key__=value]",
+             vec![L, W("func"), C, S, DU, W("key"), DU, E, W("value"), R]);
+    }
+
+    /// This test has a special look at the double underscore syntax, because
+    /// per Unicode standard they are not seperate words and thus harder to parse
+    /// than the stars.
+    #[test]
+    fn tokenize_double_underscore() {
+        test("he__llo__world_ _ __ Now this_ is__ special!",
+             vec![W("he"), DU, W("llo"), DU, W("world_"), S, W("_"), S, DU, S, W("Now"), S,
+                  W("this_"), S, W("is"), DU, S, W("special"), W("!")]);
+    }
+
+    /// This test is for checking if non-ASCII characters get parsed correctly.
+    #[test]
+    fn tokenize_unicode() {
+        test("[document][Hello 🌍!]",
+             vec![L, W("document"), R, L, W("Hello"), S, W("🌍"), W("!"), R]);
+        test("[f]⺐.", vec![L, W("f"), R, W("⺐"), W(".")]);
+    }
+
+    /// This test looks if LF- and CRLF-style newlines get both identified correctly.
+    #[test]
+    fn tokenize_whitespace_newlines() {
+        test(" \t", vec![S]);
+        test("First line\r\nSecond line\nThird line\n",
+             vec![W("First"), S, W("line"), N, W("Second"), S, W("line"), N,
+                  W("Third"), S, W("line"), N]);
+    }
+}
+
+
+#[cfg(test)]
+mod parse_tests {
+    use super::*;
+    use Node::{Space as S, Newline as N, Word as W, Func as F};
+
+    /// Test if the source code parses into the syntax tree.
+    fn test(src: &str, tree: SyntaxTree) {
+        assert_eq!(src.tokenize().parse(), Ok(tree));
+    }
+
+    /// Test if the source parses into the error.
+    fn test_err(src: &str, err: ParseError) {
+        assert_eq!(src.tokenize().parse(), Err(err));
+    }
+
+    /// Short cut macro to create a syntax tree.
+    /// Is `vec`-like and the elements are the nodes.
+    macro_rules! tree {
+        ($($x:expr),*) => (
+            SyntaxTree { nodes: vec![$($x),*] }
+        );
+        ($($x:expr,)*) => (tree![$($x),*])
+    }
+
+    /// Parse the basic cases.
+    #[test]
+    fn parse_base() {
+        test("", tree! {});
+        test("Hello World!", tree! { W("Hello"), S, W("World"), W("!")});
+    }
+
+    /// Parse things dealing with functions.
+    #[test]
+    fn parse_functions() {
+        test("[test]", tree! { F(Function { name: "test", body: None }) });
+        test("This is an [modifier][example] of a function invocation.", tree! {
+            W("This"), S, W("is"), S, W("an"), S,
+            F(Function { name: "modifier", body: Some(tree! { W("example") }) }), S,
+            W("of"), S, W("a"), S, W("function"), S, W("invocation"), W(".")
+        });
+        test("[func][Hello][links][Here][end]",  tree! {
+            F(Function {
+                name: "func",
+                body: Some(tree! { W("Hello") }),
+            }),
+            F(Function {
+                name: "links",
+                body: Some(tree! { W("Here") }),
+            }),
+            F(Function {
+                name: "end",
+                body: None,
+            }),
+        });
+        test("[bodyempty][]", tree! {
+            F(Function {
+                name: "bodyempty",
+                body: Some(tree! {})
+            })
+        });
+        test("[nested][[func][call]] outside", tree! {
+            F(Function {
+                name: "nested",
+                body: Some(tree! { F(Function {
+                    name: "func",
+                    body: Some(tree! { W("call") }),
+                }), }),
+            }),
+            S, W("outside")
+        });
+    }
+
+    /// Tests if the parser handles non-ASCII stuff correctly.
+    #[test]
+    fn parse_unicode() {
+        test("[lib_parse] ⺐.", tree! {
+            F(Function {
+                name: "lib_parse",
+                body: None
+            }),
+            S, W("⺐"), W(".")
+        });
+        test("[func123][Hello 🌍!]", tree! {
+            F(Function {
+                name: "func123",
+                body: Some(tree! { W("Hello"), S, W("🌍"), W("!") }),
+            })
+        });
+    }
+
+    /// Tests whether errors get reported correctly.
+    #[test]
+    fn parse_errors() {
+        test_err("No functions here]", ParseError {
+            message: "unexpected closing bracket".to_owned(),
+        });
+        test_err("[hello][world", ParseError {
+            message: "expected closing bracket".to_owned(),
+        });
+        test_err("[hello world", ParseError {
+            message: "expected closing bracket".to_owned(),
+        });
+        test_err("[ no-name][Why?]", ParseError {
+            message: "expected identifier".to_owned(),
+        });
+    }
+}
diff --git a/src/pdf.rs b/src/pdf.rs
new file mode 100644
index 00000000..5cdf335c
--- /dev/null
+++ b/src/pdf.rs
@@ -0,0 +1,375 @@
+//! Writing of documents in the _PDF_ format.
+
+use std::io::{self, Write};
+use crate::doc::{Document, Text, DocumentFont, Size};
+
+
+/// A type that is a sink for types that can be written conforming
+/// to the _PDF_ format (that may be things like sizes, other objects
+/// or whole documents).
+pub trait WritePdf<T> {
+    /// Write self into a byte sink, returning how many bytes were written.
+    fn write_pdf(&mut self, object: &T) -> io::Result<usize>;
+}
+
+impl<W: Write> WritePdf<Document> for W {
+    fn write_pdf(&mut self, document: &Document) -> io::Result<usize> {
+        PdfWriter::new(document).write(self)
+    }
+}
+
+impl<W: Write> WritePdf<Size> for W {
+    fn write_pdf(&mut self, size: &Size) -> io::Result<usize> {
+        self.write_str(size.points)
+    }
+}
+
+/// A type that is a sink for types that can be converted to strings
+/// and thus can be written string-like into a byte sink.
+pub trait WriteByteString {
+    /// Write the string-like type into self, returning how many
+    /// bytes were written.
+    fn write_str<S: ToString>(&mut self, string_like: S) -> io::Result<usize>;
+}
+
+impl<W: Write> WriteByteString for W {
+    fn write_str<S: ToString>(&mut self, string_like: S) -> io::Result<usize> {
+        self.write(string_like.to_string().as_bytes())
+    }
+}
+
+
+/// Writes an abstract document into a byte sink in the _PDF_ format.
+#[derive(Debug, Clone)]
+struct PdfWriter<'d> {
+    doc: &'d Document,
+    w: usize,
+    catalog_id: u32,
+    page_tree_id: u32,
+    resources_start: u32,
+    pages_start: u32,
+    content_start: u32,
+    xref_table: Vec<u32>,
+    offset_xref: u32,
+}
+
+impl<'d> PdfWriter<'d> {
+    /// Create a new pdf writer from a document.
+    fn new(doc: &'d Document) -> PdfWriter<'d> {
+        // Calculate unique ids for each object
+        let catalog_id: u32 = 1;
+        let page_tree_id = catalog_id + 1;
+        let pages_start = page_tree_id + 1;
+        let resources_start = pages_start + doc.pages.len() as u32;
+        let content_start = resources_start + doc.fonts.len() as u32;
+
+        PdfWriter {
+            doc,
+            catalog_id,
+            page_tree_id,
+            resources_start,
+            pages_start,
+            content_start,
+            w: 0,
+            xref_table: vec![],
+            offset_xref: 0,
+        }
+    }
+
+    /// Write the document into a byte sink.
+    fn write<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+        self.write_header(target)?;
+
+        self.write_document_catalog(target)?;
+        self.write_page_tree(target)?;
+        self.write_pages(target)?;
+
+        self.write_resources(target)?;
+
+        self.write_content(target)?;
+        // self.write_fonts(target)?;
+
+        self.write_xref_table(target)?;
+        self.write_trailer(target)?;
+        self.write_start_xref(target)?;
+
+        Ok(self.w)
+    }
+
+    /// Write the pdf header.
+    fn write_header<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+        // Write the magic start
+        self.w += target.write(b"%PDF-1.7\n")?;
+        Ok(self.w)
+    }
+
+    /// Write the document catalog (contains general info about the document).
+    fn write_document_catalog<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+        self.xref_table.push(self.w as u32);
+
+        self.w += target.write_str(self.catalog_id)?;
+        self.w += target.write(b" 0 obj\n")?;
+        self.w += target.write(b"<<\n")?;
+        self.w += target.write(b"/Type /Catalog\n")?;
+
+        self.w += target.write(b"/Pages ")?;
+        self.w += target.write_str(self.page_tree_id)?;
+        self.w += target.write(b" 0 R\n")?;
+
+        self.w += target.write(b">>\n")?;
+        self.w += target.write(b"endobj\n")?;
+
+        Ok(self.w)
+    }
+
+    /// Write the page tree (overview over the pages of a document).
+    fn write_page_tree<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+        self.xref_table.push(self.w as u32);
+
+        // Create page tree
+        self.w += target.write_str(self.page_tree_id)?;
+        self.w += target.write(b" 0 obj\n")?;
+        self.w += target.write(b"<<\n")?;
+        self.w += target.write(b"/Type /Pages\n")?;
+
+        self.w += target.write(b"/Count ")?;
+        self.w += target.write_str(self.doc.pages.len())?;
+        self.w += target.write(b"\n")?;
+
+        self.w += target.write(b"/Kids [")?;
+
+        for id in self.pages_start .. self.pages_start + self.doc.pages.len() as u32 {
+            self.w += target.write_str(id)?;
+            self.w += target.write(b" 0 R ")?;
+        }
+
+        self.w += target.write(b"]\n")?;
+
+        self.w += target.write(b"/Resources\n")?;
+        self.w += target.write(b"<<\n")?;
+
+        self.w += target.write(b"/Font\n")?;
+        self.w += target.write(b"<<\n")?;
+
+        let mut font_id = self.resources_start;
+        for nr in 1 ..= self.doc.fonts.len() as u32 {
+            self.w += target.write(b"/F")?;
+            self.w += target.write_str(nr)?;
+            self.w += target.write(b" ")?;
+            self.w += target.write_str(font_id)?;
+            self.w += target.write(b" 0 R\n")?;
+            font_id += 1;
+        }
+
+        self.w += target.write(b">>\n")?;
+        self.w += target.write(b">>\n")?;
+
+        self.w += target.write(b">>\n")?;
+        self.w += target.write(b"endobj\n")?;
+
+        Ok(self.w)
+    }
+
+    /// Write the page descriptions.
+    fn write_pages<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+        let mut page_id = self.pages_start;
+        let mut content_id = self.content_start;
+
+        for page in &self.doc.pages {
+            self.xref_table.push(self.w as u32);
+
+            self.w += target.write_str(page_id)?;
+            self.w += target.write(b" 0 obj\n")?;
+            self.w += target.write(b"<<\n")?;
+            self.w += target.write(b"/Type /Page\n")?;
+
+            self.w += target.write(b"/Parent ")?;
+            self.w += target.write_str(self.page_tree_id)?;
+            self.w += target.write(b" 0 R\n")?;
+
+            self.w += target.write(b"/MediaBox [0 0 ")?;
+            self.w += target.write_pdf(&page.size[0])?;
+            self.w += target.write(b" ")?;
+            self.w += target.write_pdf(&page.size[1])?;
+            self.w += target.write(b"]\n")?;
+
+            self.w += target.write(b"/Contents [")?;
+
+            for _ in &page.contents {
+                self.w += target.write_str(content_id)?;
+                self.w += target.write(b" 0 R ")?;
+
+                content_id += 1;
+            }
+
+            self.w += target.write(b"]\n")?;
+
+            self.w += target.write(b">>\n")?;
+            self.w += target.write(b"endobj\n")?;
+
+            page_id += 1;
+        }
+
+        Ok(self.w)
+    }
+
+    /// Write the resources used by the file (fonts and friends).
+    fn write_resources<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+        let mut id = self.resources_start;
+
+        for font in &self.doc.fonts {
+            self.xref_table.push(self.w as u32);
+
+            self.w += target.write_str(id)?;
+            self.w += target.write(b" 0 obj\n")?;
+            self.w += target.write(b"<<\n")?;
+            self.w += target.write(b"/Type /Font\n")?;
+
+            match font {
+                DocumentFont::Builtin(builtin) => {
+                    self.w += target.write(b"/Subtype /Type1\n")?;
+                    self.w += target.write(b"/BaseFont /")?;
+                    self.w += target.write_str(builtin.name())?;
+                    self.w += target.write(b"\n")?;
+                },
+                DocumentFont::Loaded(font) => {
+                    self.w += target.write(b"/Subtype /TrueType\n")?;
+                    self.w += target.write(b"/BaseFont /")?;
+                    self.w += target.write_str(font.name.as_str())?;
+                    self.w += target.write(b"\n")?;
+                    unimplemented!();
+                },
+            }
+
+            self.w += target.write(b">>\n")?;
+            self.w += target.write(b"endobj\n")?;
+
+            id += 1;
+        }
+
+        Ok(self.w)
+    }
+
+    /// Write the page contents.
+    fn write_content<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+        let mut id = self.content_start;
+
+        for page in &self.doc.pages {
+            for content in &page.contents {
+                self.xref_table.push(self.w as u32);
+
+                self.w += target.write_str(id)?;
+                self.w += target.write(b" 0 obj\n")?;
+                self.w += target.write(b"<<\n")?;
+
+                let mut buffer = Vec::new();
+                    buffer.write(b"BT/\n")?;
+
+                    buffer.write(b"/F1 13 Tf\n")?;
+                    buffer.write(b"108 734 Td\n")?;
+                    buffer.write(b"(")?;
+
+                    let Text(string) = content;
+                    buffer.write(string.as_bytes())?;
+
+                    buffer.write(b") Tj\n")?;
+                    buffer.write(b"ET\n")?;
+
+                self.w += target.write(b"/Length ")?;
+                self.w += target.write_str(buffer.len())?;
+                self.w += target.write(b"\n")?;
+
+                self.w += target.write(b">>\n")?;
+
+                self.w += target.write(b"stream\n")?;
+                self.w += target.write(&buffer)?;
+                self.w += target.write(b"endstream\n")?;
+
+                self.w += target.write(b"endobj\n")?;
+
+                id += 1;
+            }
+        }
+
+        Ok(self.w)
+    }
+
+    /// Write the cross-reference table.
+    fn write_xref_table<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+        self.offset_xref = self.w as u32;
+
+        self.w += target.write(b"xref\n")?;
+        self.w += target.write(b"0 ")?;
+        self.w += target.write_str(self.xref_table.len())?;
+        self.w += target.write(b"\n")?;
+
+        self.w += target.write(b"0000000000 65535 f\r\n")?;
+
+        for offset in &self.xref_table {
+            self.w += target.write(format!("{:010}", offset).as_bytes())?;
+            self.w += target.write(b" 00000 n")?;
+            self.w += target.write(b"\r\n")?;
+        }
+
+        Ok(self.w)
+    }
+
+    /// Write the trailer (points to the root object).
+    fn write_trailer<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+        self.w += target.write(b"trailer\n")?;
+        self.w += target.write(b"<<\n")?;
+
+        self.w += target.write(b"/Root ")?;
+        self.w += target.write_str(self.catalog_id)?;
+        self.w += target.write(b" 0 R\n")?;
+
+        self.w += target.write(b"/Size ")?;
+        self.w += target.write_str(self.xref_table.len() + 1)?;
+        self.w += target.write(b"\n")?;
+
+        self.w += target.write(b">>\n")?;
+
+        Ok(self.w)
+    }
+
+    /// Write where the cross-reference table starts.
+    fn write_start_xref<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+        self.w += target.write(b"startxref\n")?;
+        self.w += target.write_str(self.offset_xref)?;
+        self.w += target.write(b"\n")?;
+
+        Ok(self.w)
+    }
+}
+
+
+#[cfg(test)]
+mod pdf_tests {
+    use super::*;
+    use crate::parsing::{Tokenize, Parse};
+    use crate::doc::Generate;
+
+    /// Create a pdf with a name from the source code.
+    fn test(name: &str, src: &str) {
+        let mut file = std::fs::File::create(name).unwrap();
+        let doc = src.tokenize()
+            .parse().unwrap()
+            .generate().unwrap();
+        file.write_pdf(&doc).unwrap();
+    }
+
+    #[test]
+    fn pdf_simple() {
+        test("../target/write1.pdf", "This is an example of a sentence.");
+        test("../target/write2.pdf","
+             Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed
+             diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam
+             voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd
+             gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor
+             sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut
+             labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et
+             justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est
+             Lorem ipsum dolor sit amet.
+        ");
+    }
+}
diff --git a/src/utility.rs b/src/utility.rs
new file mode 100644
index 00000000..8304025d
--- /dev/null
+++ b/src/utility.rs
@@ -0,0 +1,138 @@
+//! Utility functionality.
+
+use std::str::Split;
+use std::iter::Peekable;
+use unicode_xid::UnicodeXID;
+
+
+/// Types that can be splined.
+pub trait Splinor {
+    /// Returns an iterator over the substrings splitted by the pattern,
+    /// intertwined with the splinor.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// # use typeset::utility::*;
+    /// #[derive(Debug, Copy, Clone, PartialEq)]
+    /// struct Space;
+    ///
+    /// let v: Vec<Splined<Space>> = "My airplane flies!".spline(" ", Space).collect();
+    /// assert_eq!(v, [
+    ///     Splined::Value("My"),
+    ///     Splined::Splinor(Space),
+    ///     Splined::Value("airplane"),
+    ///     Splined::Splinor(Space),
+    ///     Splined::Value("flies!"),
+    /// ]);
+    /// ```
+    fn spline<'s, T: Clone>(&'s self, pat: &'s str, splinor: T) -> Spline<'s, T>;
+}
+
+impl Splinor for str {
+    fn spline<'s, T: Clone>(&'s self, pat: &'s str, splinor: T) -> Spline<'s, T> {
+        Spline {
+            splinor: Splined::Splinor(splinor),
+            split: self.split(pat).peekable(),
+            next_splinor: false,
+        }
+    }
+}
+
+/// Iterator over splitted values and splinors.
+///
+/// Created by the [`spline`](Splinor::spline) function.
+#[derive(Debug, Clone)]
+pub struct Spline<'s, T> {
+    splinor: Splined<'s, T>,
+    split: Peekable<Split<'s, &'s str>>,
+    next_splinor: bool,
+}
+
+/// Represents either a splitted substring or a splinor.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub enum Splined<'s, T> {
+    /// A substring.
+    Value(&'s str),
+    /// An intertwined splinor.
+    Splinor(T),
+}
+
+impl<'s, T: Clone> Iterator for Spline<'s, T> {
+    type Item = Splined<'s, T>;
+
+    fn next(&mut self) -> Option<Splined<'s, T>> {
+        if self.next_splinor && self.split.peek().is_some() {
+            self.next_splinor = false;
+            return Some(self.splinor.clone());
+        } else {
+            self.next_splinor = true;
+            return Some(Splined::Value(self.split.next()?))
+        }
+    }
+}
+
+
+/// More useful functions on `str`'s.
+pub trait StrExt {
+    /// Whether self consists only of whitespace.
+    fn is_whitespace(&self) -> bool;
+
+    /// Whether this word is a valid unicode identifier.
+    fn is_identifier(&self) -> bool;
+}
+
+impl StrExt for str {
+    #[inline]
+    fn is_whitespace(&self) -> bool {
+        self.chars().all(|c| c.is_whitespace() && c != '\n')
+    }
+
+    fn is_identifier(&self) -> bool {
+        let mut chars = self.chars();
+
+        match chars.next() {
+            Some(c) if !UnicodeXID::is_xid_start(c) => return false,
+            None => return false,
+            _ => (),
+        }
+
+        while let Some(c) = chars.next() {
+            if !UnicodeXID::is_xid_continue(c) {
+                return false;
+            }
+        }
+
+        true
+    }
+}
+
+
+#[cfg(test)]
+mod splinor_tests {
+    use super::*;
+    use Splined::{Value as V, Splinor as S};
+
+    #[derive(Debug, Copy, Clone, PartialEq)]
+    enum Token { DoubleUnderscore }
+
+    fn test<T>(string: &str, pat: &str, splinor: T, vec: Vec<Splined<T>>)
+        where T: std::fmt::Debug + Clone + PartialEq {
+        assert_eq!(string.spline(pat, splinor).collect::<Vec<_>>(), vec);
+    }
+
+    #[test]
+    fn splinor() {
+        let s = S(Token::DoubleUnderscore);
+        test("__he__llo__world__", "__", Token::DoubleUnderscore,
+             vec![V(""), s, V("he"), s, V("llo"), s, V("world"), s, V("")]);
+        test("__Italic__", "__", Token::DoubleUnderscore,
+             vec![V(""), s, V("Italic"), s, V("")]);
+        test("Key__Value", "__", Token::DoubleUnderscore,
+             vec![V("Key"), s, V("Value")]);
+        test("__Start__NoEnd", "__", Token::DoubleUnderscore,
+             vec![V(""), s, V("Start"), s, V("NoEnd")]);
+        test("NoStart__End__", "__", Token::DoubleUnderscore,
+             vec![V("NoStart"), s, V("End"), s, V("")]);
+    }
+}
author	Laurenz <laurmaedje@gmail.com>	2019-02-12 21:31:35 +0100
committer	Laurenz <laurmaedje@gmail.com>	2019-02-12 21:31:35 +0100
commit	5a600eb354c65ec008cbf020e45705c2f401d669 (patch)
tree	b61d6ae3168716ba198c5631934520053c4a57c4 /src