summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2019-02-12 21:31:35 +0100
committerLaurenz <laurmaedje@gmail.com>2019-02-12 21:31:35 +0100
commit5a600eb354c65ec008cbf020e45705c2f401d669 (patch)
treeb61d6ae3168716ba198c5631934520053c4a57c4 /src
Move crate into workspace subfolder
Diffstat (limited to 'src')
-rw-r--r--src/doc.rs187
-rw-r--r--src/font.rs270
-rw-r--r--src/lib.rs11
-rw-r--r--src/parsing.rs696
-rw-r--r--src/pdf.rs375
-rw-r--r--src/utility.rs138
6 files changed, 1677 insertions, 0 deletions
diff --git a/src/doc.rs b/src/doc.rs
new file mode 100644
index 00000000..04e214a3
--- /dev/null
+++ b/src/doc.rs
@@ -0,0 +1,187 @@
+//! Generation of abstract documents from syntax trees.
+
+use std::fmt;
+use crate::parsing::{SyntaxTree, Node};
+use crate::font::{Font, BuiltinFont};
+
+
+/// Abstract representation of a complete typesetted document.
+///
+/// This abstract thing can then be serialized into a specific format like PDF.
+#[derive(Debug, Clone, PartialEq)]
+pub struct Document {
+ /// The pages of the document.
+ pub pages: Vec<Page>,
+ /// The fonts used by the document.
+ pub fonts: Vec<DocumentFont>,
+}
+
+impl Document {
+ /// Create a new document without content.
+ pub fn new() -> Document {
+ Document {
+ pages: vec![],
+ fonts: vec![],
+ }
+ }
+}
+
+/// A page of a document.
+#[derive(Debug, Clone, PartialEq)]
+pub struct Page {
+ /// The width and height of the page.
+ pub size: [Size; 2],
+ /// The contents of the page.
+ pub contents: Vec<Text>,
+}
+
+/// Plain text.
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub struct Text(pub String);
+
+/// A font (either built-in or external).
+#[derive(Debug, Clone, PartialEq)]
+pub enum DocumentFont {
+ /// One of the 14 built-in fonts.
+ Builtin(BuiltinFont),
+ /// An externally loaded font.
+ Loaded(Font),
+}
+
+/// A distance that can be created from different units of length.
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct Size {
+ /// The size in typographic points (1/72 inches).
+ pub points: f32,
+}
+
+impl Size {
+ /// Create a size from a number of points.
+ pub fn from_points(points: f32) -> Size {
+ Size { points }
+ }
+
+ /// Create a size from a number of inches.
+ pub fn from_inches(inches: f32) -> Size {
+ Size { points: inches / 72.0 }
+ }
+
+ /// Create a size from a number of millimeters.
+ pub fn from_mm(mm: f32) -> Size {
+ Size { points: 2.8345 * mm }
+ }
+
+ /// Create a size from a number of centimeters.
+ pub fn from_cm(cm: f32) -> Size {
+ Size { points: 0.028345 * cm }
+ }
+}
+
+
+/// A type that can be generated into a document.
+pub trait Generate {
+ /// Generate a document from self.
+ fn generate(self) -> GenResult<Document>;
+}
+
+impl Generate for SyntaxTree<'_> {
+ fn generate(self) -> GenResult<Document> {
+ Generator::new(self).generate()
+ }
+}
+
+/// Result type used for parsing.
+type GenResult<T> = std::result::Result<T, GenerationError>;
+
+/// A failure when generating.
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub struct GenerationError {
+ /// A message describing the error.
+ pub message: String,
+}
+
+impl fmt::Display for GenerationError {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "generation error: {}", self.message)
+ }
+}
+
+
+/// Transforms an abstract syntax tree into a document.
+#[derive(Debug, Clone)]
+struct Generator<'s> {
+ tree: SyntaxTree<'s>,
+}
+
+impl<'s> Generator<'s> {
+ /// Create a new generator from a syntax tree.
+ fn new(tree: SyntaxTree<'s>) -> Generator<'s> {
+ Generator { tree }
+ }
+
+ /// Generate the abstract document.
+ fn generate(&mut self) -> GenResult<Document> {
+ let fonts = vec![DocumentFont::Builtin(BuiltinFont::Helvetica)];
+
+ let mut text = String::new();
+ for node in &self.tree.nodes {
+ match node {
+ Node::Space if !text.is_empty() => text.push(' '),
+ Node::Space | Node::Newline => (),
+ Node::Word(word) => text.push_str(word),
+
+ Node::ToggleItalics | Node::ToggleBold | Node::ToggleMath => unimplemented!(),
+ Node::Func(_) => unimplemented!(),
+
+ }
+ }
+
+ let page = Page {
+ size: [Size::from_mm(210.0), Size::from_mm(297.0)],
+ contents: vec![ Text(text) ],
+ };
+
+ Ok(Document {
+ pages: vec![page],
+ fonts,
+ })
+ }
+
+ /// Gives a generation error with a message.
+ #[inline]
+ fn err<R, S: Into<String>>(&self, message: S) -> GenResult<R> {
+ Err(GenerationError { message: message.into() })
+ }
+}
+
+
+#[cfg(test)]
+mod generator_tests {
+ use super::*;
+ use crate::parsing::{Tokenize, Parse};
+
+ /// Test if the source gets generated into the document.
+ fn test(src: &str, doc: Document) {
+ assert_eq!(src.tokenize().parse().unwrap().generate(), Ok(doc));
+ }
+
+ /// Test if generation gives this error for the source code.
+ fn test_err(src: &str, err: GenerationError) {
+ assert_eq!(src.tokenize().parse().unwrap().generate(), Err(err));
+ }
+
+ #[test]
+ fn generator_simple() {
+ test("This is an example of a sentence.", Document {
+ pages: vec![
+ Page {
+ size: [Size::from_mm(210.0), Size::from_mm(297.0)],
+ contents: vec![
+ Text("This is an example of a sentence.".to_owned()),
+ ]
+ }
+ ],
+ fonts: vec![DocumentFont::Builtin(BuiltinFont::Helvetica)],
+ });
+ }
+}
diff --git a/src/font.rs b/src/font.rs
new file mode 100644
index 00000000..1280aec3
--- /dev/null
+++ b/src/font.rs
@@ -0,0 +1,270 @@
+//! Reading of metrics and font data from _OpenType_ and _TrueType_ font files.
+
+#![allow(unused_variables)]
+
+use std::fmt;
+use std::io::{self, Read, Seek, SeekFrom};
+use byteorder::{BE, ReadBytesExt};
+
+
+/// A loaded opentype (or truetype) font.
+#[derive(Debug, Clone, PartialEq)]
+pub struct Font {
+ /// The PostScript name of this font.
+ pub name: String,
+}
+
+impl Font {
+ /// Create a new font from a byte source.
+ pub fn new<R>(data: &mut R) -> FontResult<Font> where R: Read + Seek {
+ OpenTypeReader::new(data).read()
+ }
+}
+
+/// Built-in fonts.
+#[derive(Debug, Copy, Clone, PartialEq)]
+#[allow(missing_docs)]
+pub enum BuiltinFont {
+ Courier,
+ CourierBold,
+ CourierOblique,
+ CourierBoldOblique,
+ Helvetica,
+ HelveticaBold,
+ HelveticaOblique,
+ HelveticaBoldOblique,
+ TimesRoman,
+ TimesBold,
+ TimeItalic,
+ TimeBoldItalic,
+ Symbol,
+ ZapfDingbats,
+}
+
+impl BuiltinFont {
+ /// The name of the font.
+ pub fn name(&self) -> &'static str {
+ use BuiltinFont::*;
+ match self {
+ Courier => "Courier",
+ CourierBold => "Courier-Bold",
+ CourierOblique => "Courier-Oblique",
+ CourierBoldOblique => "Courier-BoldOblique",
+ Helvetica => "Helvetica",
+ HelveticaBold => "Helvetica-Bold",
+ HelveticaOblique => "Helvetica-Oblique",
+ HelveticaBoldOblique => "Helvetica-BoldOblique",
+ TimesRoman => "Times-Roman",
+ TimesBold => "Times-Bold",
+ TimeItalic => "Time-Italic",
+ TimeBoldItalic => "Time-BoldItalic",
+ Symbol => "Symbol",
+ ZapfDingbats => "ZapfDingbats",
+ }
+ }
+}
+
+
+/// Result type used for tokenization.
+type FontResult<T> = std::result::Result<T, LoadingError>;
+
+/// A failure when loading a font.
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub struct LoadingError {
+ /// A message describing the error.
+ pub message: String,
+}
+
+impl From<io::Error> for LoadingError {
+ fn from(err: io::Error) -> LoadingError {
+ LoadingError { message: format!("io error: {}", err) }
+ }
+}
+
+impl fmt::Display for LoadingError {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "font loading error: {}", self.message)
+ }
+}
+
+
+/// Reads a font from a _OpenType_ or _TrueType_ font file.
+struct OpenTypeReader<'r, R> where R: Read + Seek {
+ data: &'r mut R,
+ font: Font,
+ table_records: Vec<TableRecord>,
+}
+
+/// Used to identify a table, design-variation axis, script,
+/// language system, feature, or baseline.
+#[derive(Clone, PartialEq)]
+struct Tag(pub [u8; 4]);
+
+impl PartialEq<&str> for Tag {
+ fn eq(&self, other: &&str) -> bool {
+ other.as_bytes() == &self.0
+ }
+}
+
+impl fmt::Debug for Tag {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "\"{}\"", self)
+ }
+}
+
+impl fmt::Display for Tag {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let a = self.0;
+ write!(f, "{}{}{}{}", a[0] as char, a[1] as char, a[2] as char, a[3] as char)
+ }
+}
+
+/// Stores information about one table.
+#[derive(Debug, Clone, PartialEq)]
+struct TableRecord {
+ table: Tag,
+ check_sum: u32,
+ offset: u32,
+ length: u32,
+}
+
+impl<'r, R> OpenTypeReader<'r, R> where R: Read + Seek {
+ /// Create a new reader from a byte source.
+ pub fn new(data: &'r mut R) -> OpenTypeReader<'r, R> {
+ OpenTypeReader {
+ data,
+ font: Font {
+ name: String::new(),
+ },
+ table_records: vec![],
+ }
+ }
+
+ /// Read the font from the byte source.
+ pub fn read(mut self) -> FontResult<Font> {
+ self.read_table_records()?;
+ self.read_name_table()?;
+
+ Ok(self.font)
+ }
+
+ /// Read the offset table.
+ fn read_table_records(&mut self) -> FontResult<()> {
+ let sfnt_version = self.data.read_u32::<BE>()?;
+ let num_tables = self.data.read_u16::<BE>()?;
+ let search_range = self.data.read_u16::<BE>()?;
+ let entry_selector = self.data.read_u16::<BE>()?;
+ let range_shift = self.data.read_u16::<BE>()?;
+
+ let outlines = match sfnt_version {
+ 0x00010000 => "truetype",
+ 0x4F54544F => "cff",
+ _ => return self.err("unsuported font outlines"),
+ };
+
+ for _ in 0 .. num_tables {
+ let table = self.read_tag()?;
+ let check_sum = self.data.read_u32::<BE>()?;
+ let offset = self.data.read_u32::<BE>()?;
+ let length = self.data.read_u32::<BE>()?;
+
+ self.table_records.push(TableRecord {
+ table,
+ check_sum,
+ offset,
+ length,
+ });
+ }
+
+ Ok(())
+ }
+
+ /// Read the name table (gives general information about the font).
+ fn read_name_table(&mut self) -> FontResult<()> {
+ let table = match self.table_records.iter().find(|record| record.table == "name") {
+ Some(table) => table,
+ None => return self.err("missing 'name' table"),
+ };
+
+ self.data.seek(SeekFrom::Start(table.offset as u64))?;
+
+ let format = self.data.read_u16::<BE>()?;
+ let count = self.data.read_u16::<BE>()?;
+ let string_offset = self.data.read_u16::<BE>()?;
+
+ let storage = (table.offset + string_offset as u32) as u64;
+
+ let mut name = None;
+
+ for _ in 0 .. count {
+ let platform_id = self.data.read_u16::<BE>()?;
+ let encoding_id = self.data.read_u16::<BE>()?;
+ let language_id = self.data.read_u16::<BE>()?;
+ let name_id = self.data.read_u16::<BE>()?;
+ let length = self.data.read_u16::<BE>()?;
+ let offset = self.data.read_u16::<BE>()?;
+
+ // Postscript name is what we are interested in
+ if name_id == 6 && platform_id == 3 && encoding_id == 1 {
+ if length % 2 != 0 {
+ return self.err("invalid encoded name");
+ }
+
+ self.data.seek(SeekFrom::Start(storage + offset as u64))?;
+ let mut buffer = Vec::with_capacity(length as usize / 2);
+
+ for _ in 0 .. length / 2 {
+ buffer.push(self.data.read_u16::<BE>()?);
+ }
+
+ name = match String::from_utf16(&buffer) {
+ Ok(string) => Some(string),
+ Err(_) => return self.err("invalid encoded name"),
+ };
+
+ break;
+ }
+ }
+
+ self.font.name = match name {
+ Some(name) => name,
+ None => return self.err("missing postscript font name"),
+ };
+
+ Ok(())
+ }
+
+ /// Read a tag (array of four u8's).
+ fn read_tag(&mut self) -> FontResult<Tag> {
+ let mut tag = [0u8; 4];
+ self.data.read(&mut tag)?;
+ Ok(Tag(tag))
+ }
+
+ /// Gives a font loading error with a message.
+ fn err<T, S: Into<String>>(&self, message: S) -> FontResult<T> {
+ Err(LoadingError { message: message.into() })
+ }
+}
+
+
+#[cfg(test)]
+mod font_tests {
+ use super::*;
+
+ /// Test if the loaded font is the same as the expected font.
+ fn test(path: &str, font: Font) {
+ let mut file = std::fs::File::open(path).unwrap();
+ assert_eq!(Font::new(&mut file), Ok(font));
+ }
+
+ #[test]
+ fn opentype() {
+ test("../fonts/NotoSerif-Regular.ttf", Font {
+ name: "NotoSerif".to_owned(),
+ });
+ test("../fonts/NotoSansMath-Regular.ttf", Font {
+ name: "NotoSansMath-Regular".to_owned(),
+ });
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 00000000..2959925e
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,11 @@
+//! Typeset is a library for compiling _plain-text_ strings written in the
+//! corresponding typesetting language into a typesetted document in a
+//! file format like _PDF_.
+
+#![allow(unused)]
+
+pub mod parsing;
+pub mod doc;
+pub mod font;
+pub mod pdf;
+pub mod utility;
diff --git a/src/parsing.rs b/src/parsing.rs
new file mode 100644
index 00000000..5efa69e5
--- /dev/null
+++ b/src/parsing.rs
@@ -0,0 +1,696 @@
+//! Parsing of source code into tokens and syntax trees.
+
+use std::fmt;
+use std::iter::Peekable;
+use std::mem::swap;
+use unicode_segmentation::{UnicodeSegmentation, UWordBounds};
+use crate::utility::{Splinor, Spline, Splined, StrExt};
+
+
+/// A logical unit of the incoming text stream.
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub enum Token<'s> {
+ /// One or more whitespace (non-newline) codepoints.
+ Space,
+ /// A line feed (either `\n` or `\r\n`).
+ Newline,
+ /// A left bracket: `[`.
+ LeftBracket,
+ /// A right bracket: `]`.
+ RightBracket,
+ /// A colon (`:`) indicating the beginning of function arguments.
+ ///
+ /// If a colon occurs outside of the function header, it will be
+ /// tokenized as a `Word`.
+ Colon,
+ /// Same as with `Colon`.
+ Equals,
+ /// Two underscores, indicating text in _italics_.
+ DoubleUnderscore,
+ /// Two stars, indicating **bold** text.
+ DoubleStar,
+ /// A dollar sign, indicating mathematical content.
+ Dollar,
+ /// A hashtag starting a comment.
+ Hashtag,
+ /// Everything else just is a literal word.
+ Word(&'s str),
+}
+
+
+/// A type that is seperable into logical units (tokens).
+pub trait Tokenize {
+ /// Tokenize self into logical units.
+ fn tokenize<'s>(&'s self) -> Tokens<'s>;
+}
+
+impl Tokenize for str {
+ fn tokenize<'s>(&'s self) -> Tokens<'s> {
+ Tokens::new(self)
+ }
+}
+
+
+/// An iterator over the tokens of a text.
+#[derive(Clone)]
+pub struct Tokens<'s> {
+ source: &'s str,
+ words: Peekable<UWordBounds<'s>>,
+ state: TokensState<'s>,
+ stack: Vec<TokensState<'s>>,
+}
+
+impl fmt::Debug for Tokens<'_> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.debug_struct("Tokens")
+ .field("source", &self.source)
+ .field("words", &"Peekable<UWordBounds>")
+ .field("state", &self.state)
+ .field("stack", &self.stack)
+ .finish()
+ }
+}
+
+/// The state the tokenizer is in.
+#[derive(Debug, Clone)]
+enum TokensState<'s> {
+ /// The base state if there is nothing special we are in.
+ Body,
+ /// Inside a function header. Here colons and equal signs get parsed
+ /// as distinct tokens rather than text.
+ Function,
+ /// We expect either the end of the function or the beginning of the body.
+ MaybeBody,
+ /// We are inside one unicode word that consists of multiple tokens,
+ /// because it contains double underscores.
+ DoubleUnderscore(Spline<'s, Token<'s>>),
+}
+
+impl PartialEq for TokensState<'_> {
+ fn eq(&self, other: &TokensState) -> bool {
+ use TokensState as TS;
+
+ match (self, other) {
+ (TS::Body, TS::Body) => true,
+ (TS::Function, TS::Function) => true,
+ (TS::MaybeBody, TS::MaybeBody) => true,
+ // They are not necessarily different, but we don't care
+ _ => false,
+ }
+ }
+}
+
+impl<'s> Iterator for Tokens<'s> {
+ type Item = Token<'s>;
+
+ /// Advance the iterator, return the next token or nothing.
+ fn next(&mut self) -> Option<Token<'s>> {
+ use TokensState as TS;
+
+ // Return the remaining words and double underscores.
+ if let TS::DoubleUnderscore(ref mut splinor) = self.state {
+ loop {
+ if let Some(splined) = splinor.next() {
+ return Some(match splined {
+ Splined::Value(word) if word != "" => Token::Word(word),
+ Splined::Splinor(s) => s,
+ _ => continue,
+ });
+ } else {
+ self.unswitch();
+ break;
+ }
+ }
+ }
+
+ // Skip whitespace, but if at least one whitespace word existed,
+ // remember that, because we return a space token.
+ let mut whitespace = false;
+ while let Some(word) = self.words.peek() {
+ if !word.is_whitespace() {
+ break;
+ }
+ whitespace = true;
+ self.advance();
+ }
+ if whitespace {
+ return Some(Token::Space);
+ }
+
+ // Function maybe has a body
+ if self.state == TS::MaybeBody {
+ match *self.words.peek()? {
+ "[" => {
+ self.state = TS::Body;
+ return Some(self.consumed(Token::LeftBracket));
+ },
+ _ => self.unswitch(),
+ }
+ }
+
+ // Now all special cases are handled and we can finally look at the
+ // next words.
+ let next = self.words.next()?;
+ let afterwards = self.words.peek();
+
+ Some(match next {
+ // Special characters
+ "[" => {
+ self.switch(TS::Function);
+ Token::LeftBracket
+ },
+ "]" => {
+ if self.state == TS::Function {
+ self.state = TS::MaybeBody;
+ }
+ Token::RightBracket
+ },
+ "$" => Token::Dollar,
+ "#" => Token::Hashtag,
+
+ // Context sensitive operators
+ ":" if self.state == TS::Function => Token::Colon,
+ "=" if self.state == TS::Function => Token::Equals,
+
+ // Double star/underscore
+ "*" if afterwards == Some(&"*") => {
+ self.consumed(Token::DoubleStar)
+ },
+ "__" => Token::DoubleUnderscore,
+
+ // Newlines
+ "\n" | "\r\n" => Token::Newline,
+
+ // Escaping
+ r"\" => {
+ if let Some(next) = afterwards {
+ let escapable = match *next {
+ "[" | "]" | "$" | "#" | r"\" | ":" | "=" | "*" | "_" => true,
+ w if w.starts_with("__") => true,
+ _ => false,
+ };
+
+ if escapable {
+ let next = *next;
+ self.advance();
+ return Some(Token::Word(next));
+ }
+ }
+
+ Token::Word(r"\")
+ },
+
+ // Double underscores hidden in words.
+ word if word.contains("__") => {
+ let spline = word.spline("__", Token::DoubleUnderscore);
+ self.switch(TS::DoubleUnderscore(spline));
+ return self.next();
+ },
+
+ // Now it seems like it's just a normal word.
+ word => Token::Word(word),
+ })
+ }
+}
+
+impl<'s> Tokens<'s> {
+ /// Create a new token stream from text.
+ #[inline]
+ pub fn new(source: &'s str) -> Tokens<'s> {
+ Tokens {
+ source,
+ words: source.split_word_bounds().peekable(),
+ state: TokensState::Body,
+ stack: vec![],
+ }
+ }
+
+ /// Advance the iterator by one step.
+ #[inline]
+ fn advance(&mut self) {
+ self.words.next();
+ }
+
+ /// Switch to the given state.
+ #[inline]
+ fn switch(&mut self, mut state: TokensState<'s>) {
+ swap(&mut state, &mut self.state);
+ self.stack.push(state);
+ }
+
+ /// Go back to the top-of-stack state.
+ #[inline]
+ fn unswitch(&mut self) {
+ self.state = self.stack.pop().unwrap_or(TokensState::Body);
+ }
+
+ /// Advance and return the given token.
+ #[inline]
+ fn consumed(&mut self, token: Token<'s>) -> Token<'s> {
+ self.advance();
+ token
+ }
+}
+
+
+/// A tree representation of the source.
+#[derive(Debug, Clone, PartialEq)]
+pub struct SyntaxTree<'s> {
+ /// The children.
+ pub nodes: Vec<Node<'s>>,
+}
+
+impl<'s> SyntaxTree<'s> {
+ /// Create an empty syntax tree.
+ pub fn new() -> SyntaxTree<'s> {
+ SyntaxTree { nodes: vec![] }
+ }
+}
+
+/// A node in the abstract syntax tree.
+#[derive(Debug, Clone, PartialEq)]
+pub enum Node<'s> {
+ /// Whitespace between other nodes.
+ Space,
+ /// A line feed.
+ Newline,
+ /// Indicates that italics were enabled/disabled.
+ ToggleItalics,
+ /// Indicates that boldface was enabled/disabled.
+ ToggleBold,
+ /// Indicates that math mode was enabled/disabled.
+ ToggleMath,
+ /// A literal word.
+ Word(&'s str),
+ /// A function invocation.
+ Func(Function<'s>),
+}
+
+/// A node representing a function invocation.
+#[derive(Debug, Clone, PartialEq)]
+pub struct Function<'s> {
+ /// The name of the function.
+ pub name: &'s str,
+ /// Some syntax tree if the function had a body (second set of brackets),
+ /// otherwise nothing.
+ pub body: Option<SyntaxTree<'s>>,
+}
+
+
+/// A type that is parseable into a syntax tree.
+pub trait Parse<'s> {
+ /// Parse self into a syntax tree.
+ fn parse(self) -> ParseResult<SyntaxTree<'s>>;
+}
+
+impl<'s> Parse<'s> for Tokens<'s> {
+ fn parse(self) -> ParseResult<SyntaxTree<'s>> {
+ Parser::new(self).parse()
+ }
+}
+
+impl<'s> Parse<'s> for Vec<Token<'s>> {
+ fn parse(self) -> ParseResult<SyntaxTree<'s>> {
+ Parser::new(self.into_iter()).parse()
+ }
+}
+
+/// Result type used for parsing.
+type ParseResult<T> = std::result::Result<T, ParseError>;
+
+/// A failure when parsing.
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub struct ParseError {
+ /// A message describing the error.
+ pub message: String,
+}
+
+impl fmt::Display for ParseError {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "parse error: {}", self.message)
+ }
+}
+
+
+/// Parses a token stream into an abstract syntax tree.
+#[derive(Debug, Clone)]
+struct Parser<'s, T> where T: Iterator<Item = Token<'s>> {
+ tokens: Peekable<T>,
+ state: ParserState,
+ stack: Vec<Function<'s>>,
+ tree: SyntaxTree<'s>,
+}
+
+/// The state the parser is in.
+#[derive(Debug, Clone, PartialEq)]
+enum ParserState {
+ /// The base state of the parser.
+ Body,
+ /// Inside a function header.
+ Function,
+}
+
+impl<'s, T> Parser<'s, T> where T: Iterator<Item = Token<'s>> {
+ /// Create a new parser from a type that emits results of tokens.
+ fn new(tokens: T) -> Parser<'s, T> {
+ Parser {
+ tokens: tokens.peekable(),
+ state: ParserState::Body,
+ stack: vec![],
+ tree: SyntaxTree::new(),
+ }
+ }
+
+ /// Parse into an abstract syntax tree.
+ fn parse(mut self) -> ParseResult<SyntaxTree<'s>> {
+ use ParserState as PS;
+
+ while let Some(token) = self.tokens.next() {
+ // Comment
+ if token == Token::Hashtag {
+ self.skip_while(|t| *t != Token::Newline);
+ self.advance();
+ }
+
+ match self.state {
+ PS::Body => match token {
+ // Whitespace
+ Token::Space => self.append(Node::Space),
+ Token::Newline => self.append(Node::Newline),
+
+ // Words
+ Token::Word(word) => self.append(Node::Word(word)),
+
+ // Functions
+ Token::LeftBracket => self.switch(PS::Function),
+ Token::RightBracket => {
+ match self.stack.pop() {
+ Some(func) => self.append(Node::Func(func)),
+ None => return self.err("unexpected closing bracket"),
+ }
+ },
+
+ // Modifiers
+ Token::DoubleUnderscore => self.append(Node::ToggleItalics),
+ Token::DoubleStar => self.append(Node::ToggleBold),
+ Token::Dollar => self.append(Node::ToggleMath),
+
+ // Should not happen
+ Token::Colon | Token::Equals | Token::Hashtag => unreachable!(),
+ },
+
+ PS::Function => {
+ let name = match token {
+ Token::Word(word) if word.is_identifier() => word,
+ _ => return self.err("expected identifier"),
+ };
+
+ if self.tokens.next() != Some(Token::RightBracket) {
+ return self.err("expected closing bracket");
+ }
+
+ let mut func = Function {
+ name,
+ body: None,
+ };
+
+ // This function has a body.
+ if let Some(Token::LeftBracket) = self.tokens.peek() {
+ self.advance();
+ func.body = Some(SyntaxTree::new());
+ self.stack.push(func);
+ } else {
+ self.append(Node::Func(func));
+ }
+
+ self.switch(PS::Body);
+ },
+ }
+ }
+
+ if !self.stack.is_empty() {
+ return self.err("expected closing bracket");
+ }
+
+ Ok(self.tree)
+ }
+
+ /// Advance the iterator by one step.
+ #[inline]
+ fn advance(&mut self) {
+ self.tokens.next();
+ }
+
+ /// Skip tokens until the condition is met.
+ #[inline]
+ fn skip_while<F>(&mut self, f: F) where F: Fn(&Token) -> bool {
+ while let Some(token) = self.tokens.peek() {
+ if !f(token) {
+ break;
+ }
+ self.advance();
+ }
+ }
+
+ /// Switch the state.
+ #[inline]
+ fn switch(&mut self, state: ParserState) {
+ self.state = state;
+ }
+
+ /// Append a node to the top-of-stack function or the main tree itself.
+ #[inline]
+ fn append(&mut self, node: Node<'s>) {
+ let tree = match self.stack.last_mut() {
+ Some(func) => func.body.get_or_insert_with(|| SyntaxTree::new()),
+ None => &mut self.tree,
+ };
+
+ tree.nodes.push(node);
+ }
+
+ /// Gives a parsing error with a message.
+ #[inline]
+ fn err<R, S: Into<String>>(&self, message: S) -> ParseResult<R> {
+ Err(ParseError { message: message.into() })
+ }
+}
+
+
+#[cfg(test)]
+mod token_tests {
+ use super::*;
+ use Token::{Space as S, Newline as N, LeftBracket as L, RightBracket as R,
+ Colon as C, Equals as E, DoubleUnderscore as DU, DoubleStar as DS,
+ Dollar as D, Hashtag as H, Word as W};
+
+ /// Test if the source code tokenizes to the tokens.
+ fn test(src: &str, tokens: Vec<Token>) {
+ assert_eq!(src.tokenize().collect::<Vec<_>>(), tokens);
+ }
+
+ /// Tokenizes the basic building blocks.
+ #[test]
+ fn tokenize_base() {
+ test("", vec![]);
+ test("Hallo", vec![W("Hallo")]);
+ test("[", vec![L]);
+ test("]", vec![R]);
+ test("$", vec![D]);
+ test("#", vec![H]);
+ test("**", vec![DS]);
+ test("__", vec![DU]);
+ test("\n", vec![N]);
+ }
+
+ /// Tests if escaping with backslash works as it should.
+ #[test]
+ fn tokenize_escape() {
+ test(r"\[", vec![W("[")]);
+ test(r"\]", vec![W("]")]);
+ test(r"\#", vec![W("#")]);
+ test(r"\$", vec![W("$")]);
+ test(r"\:", vec![W(":")]);
+ test(r"\=", vec![W("=")]);
+ test(r"\**", vec![W("*"), W("*")]);
+ test(r"\*", vec![W("*")]);
+ test(r"\__", vec![W("__")]);
+ test(r"\_", vec![W("_")]);
+ test(r"\hello", vec![W(r"\"), W("hello")]);
+ }
+
+ /// Tokenizes some more realistic examples.
+ #[test]
+ fn tokenize_examples() {
+ test(r"
+ [function][
+ Test [italic][example]!
+ ]
+ ", vec![
+ N, S, L, W("function"), R, L, N, S, W("Test"), S, L, W("italic"), R, L,
+ W("example"), R, W("!"), N, S, R, N, S
+ ]);
+
+ test(r"
+ [page: size=A4]
+ [font: size=12pt]
+
+ Das ist ein Beispielsatz mit **fetter** Schrift.
+ ", vec![
+ N, S, L, W("page"), C, S, W("size"), E, W("A4"), R, N, S,
+ L, W("font"), C, S, W("size"), E, W("12pt"), R, N, N, S,
+ W("Das"), S, W("ist"), S, W("ein"), S, W("Beispielsatz"), S, W("mit"), S,
+ DS, W("fetter"), DS, S, W("Schrift"), W("."), N, S
+ ]);
+ }
+
+ /// This test checks whether the colon and equals symbols get parsed correctly
+ /// depending on the context: Either in a function header or in a body.
+ #[test]
+ fn tokenize_symbols_context() {
+ test("[func: key=value][Answer: 7]",
+ vec![L, W("func"), C, S, W("key"), E, W("value"), R, L,
+ W("Answer"), W(":"), S, W("7"), R]);
+ test("[[n: k=v]:x][:[=]]:=",
+ vec![L, L, W("n"), C, S, W("k"), E, W("v"), R, C, W("x"), R,
+ L, W(":"), L, E, R, R, W(":"), W("=")]);
+ test("[func: __key__=value]",
+ vec![L, W("func"), C, S, DU, W("key"), DU, E, W("value"), R]);
+ }
+
+ /// This test has a special look at the double underscore syntax, because
+ /// per Unicode standard they are not seperate words and thus harder to parse
+ /// than the stars.
+ #[test]
+ fn tokenize_double_underscore() {
+ test("he__llo__world_ _ __ Now this_ is__ special!",
+ vec![W("he"), DU, W("llo"), DU, W("world_"), S, W("_"), S, DU, S, W("Now"), S,
+ W("this_"), S, W("is"), DU, S, W("special"), W("!")]);
+ }
+
+ /// This test is for checking if non-ASCII characters get parsed correctly.
+ #[test]
+ fn tokenize_unicode() {
+ test("[document][Hello 🌍!]",
+ vec![L, W("document"), R, L, W("Hello"), S, W("🌍"), W("!"), R]);
+ test("[f]⺐.", vec![L, W("f"), R, W("⺐"), W(".")]);
+ }
+
+ /// This test looks if LF- and CRLF-style newlines get both identified correctly.
+ #[test]
+ fn tokenize_whitespace_newlines() {
+ test(" \t", vec![S]);
+ test("First line\r\nSecond line\nThird line\n",
+ vec![W("First"), S, W("line"), N, W("Second"), S, W("line"), N,
+ W("Third"), S, W("line"), N]);
+ }
+}
+
+
+#[cfg(test)]
+mod parse_tests {
+ use super::*;
+ use Node::{Space as S, Newline as N, Word as W, Func as F};
+
+ /// Test if the source code parses into the syntax tree.
+ fn test(src: &str, tree: SyntaxTree) {
+ assert_eq!(src.tokenize().parse(), Ok(tree));
+ }
+
+ /// Test if the source parses into the error.
+ fn test_err(src: &str, err: ParseError) {
+ assert_eq!(src.tokenize().parse(), Err(err));
+ }
+
+ /// Short cut macro to create a syntax tree.
+ /// Is `vec`-like and the elements are the nodes.
+ macro_rules! tree {
+ ($($x:expr),*) => (
+ SyntaxTree { nodes: vec![$($x),*] }
+ );
+ ($($x:expr,)*) => (tree![$($x),*])
+ }
+
+ /// Parse the basic cases.
+ #[test]
+ fn parse_base() {
+ test("", tree! {});
+ test("Hello World!", tree! { W("Hello"), S, W("World"), W("!")});
+ }
+
+ /// Parse things dealing with functions.
+ #[test]
+ fn parse_functions() {
+ test("[test]", tree! { F(Function { name: "test", body: None }) });
+ test("This is an [modifier][example] of a function invocation.", tree! {
+ W("This"), S, W("is"), S, W("an"), S,
+ F(Function { name: "modifier", body: Some(tree! { W("example") }) }), S,
+ W("of"), S, W("a"), S, W("function"), S, W("invocation"), W(".")
+ });
+ test("[func][Hello][links][Here][end]", tree! {
+ F(Function {
+ name: "func",
+ body: Some(tree! { W("Hello") }),
+ }),
+ F(Function {
+ name: "links",
+ body: Some(tree! { W("Here") }),
+ }),
+ F(Function {
+ name: "end",
+ body: None,
+ }),
+ });
+ test("[bodyempty][]", tree! {
+ F(Function {
+ name: "bodyempty",
+ body: Some(tree! {})
+ })
+ });
+ test("[nested][[func][call]] outside", tree! {
+ F(Function {
+ name: "nested",
+ body: Some(tree! { F(Function {
+ name: "func",
+ body: Some(tree! { W("call") }),
+ }), }),
+ }),
+ S, W("outside")
+ });
+ }
+
+ /// Tests if the parser handles non-ASCII stuff correctly.
+ #[test]
+ fn parse_unicode() {
+ test("[lib_parse] ⺐.", tree! {
+ F(Function {
+ name: "lib_parse",
+ body: None
+ }),
+ S, W("⺐"), W(".")
+ });
+ test("[func123][Hello 🌍!]", tree! {
+ F(Function {
+ name: "func123",
+ body: Some(tree! { W("Hello"), S, W("🌍"), W("!") }),
+ })
+ });
+ }
+
+ /// Tests whether errors get reported correctly.
+ #[test]
+ fn parse_errors() {
+ test_err("No functions here]", ParseError {
+ message: "unexpected closing bracket".to_owned(),
+ });
+ test_err("[hello][world", ParseError {
+ message: "expected closing bracket".to_owned(),
+ });
+ test_err("[hello world", ParseError {
+ message: "expected closing bracket".to_owned(),
+ });
+ test_err("[ no-name][Why?]", ParseError {
+ message: "expected identifier".to_owned(),
+ });
+ }
+}
diff --git a/src/pdf.rs b/src/pdf.rs
new file mode 100644
index 00000000..5cdf335c
--- /dev/null
+++ b/src/pdf.rs
@@ -0,0 +1,375 @@
+//! Writing of documents in the _PDF_ format.
+
+use std::io::{self, Write};
+use crate::doc::{Document, Text, DocumentFont, Size};
+
+
+/// A type that is a sink for types that can be written conforming
+/// to the _PDF_ format (that may be things like sizes, other objects
+/// or whole documents).
+pub trait WritePdf<T> {
+ /// Write self into a byte sink, returning how many bytes were written.
+ fn write_pdf(&mut self, object: &T) -> io::Result<usize>;
+}
+
+impl<W: Write> WritePdf<Document> for W {
+ fn write_pdf(&mut self, document: &Document) -> io::Result<usize> {
+ PdfWriter::new(document).write(self)
+ }
+}
+
+impl<W: Write> WritePdf<Size> for W {
+ fn write_pdf(&mut self, size: &Size) -> io::Result<usize> {
+ self.write_str(size.points)
+ }
+}
+
+/// A type that is a sink for types that can be converted to strings
+/// and thus can be written string-like into a byte sink.
+pub trait WriteByteString {
+ /// Write the string-like type into self, returning how many
+ /// bytes were written.
+ fn write_str<S: ToString>(&mut self, string_like: S) -> io::Result<usize>;
+}
+
+impl<W: Write> WriteByteString for W {
+ fn write_str<S: ToString>(&mut self, string_like: S) -> io::Result<usize> {
+ self.write(string_like.to_string().as_bytes())
+ }
+}
+
+
+/// Writes an abstract document into a byte sink in the _PDF_ format.
+#[derive(Debug, Clone)]
+struct PdfWriter<'d> {
+ doc: &'d Document,
+ w: usize,
+ catalog_id: u32,
+ page_tree_id: u32,
+ resources_start: u32,
+ pages_start: u32,
+ content_start: u32,
+ xref_table: Vec<u32>,
+ offset_xref: u32,
+}
+
+impl<'d> PdfWriter<'d> {
+ /// Create a new pdf writer from a document.
+ fn new(doc: &'d Document) -> PdfWriter<'d> {
+ // Calculate unique ids for each object
+ let catalog_id: u32 = 1;
+ let page_tree_id = catalog_id + 1;
+ let pages_start = page_tree_id + 1;
+ let resources_start = pages_start + doc.pages.len() as u32;
+ let content_start = resources_start + doc.fonts.len() as u32;
+
+ PdfWriter {
+ doc,
+ catalog_id,
+ page_tree_id,
+ resources_start,
+ pages_start,
+ content_start,
+ w: 0,
+ xref_table: vec![],
+ offset_xref: 0,
+ }
+ }
+
+ /// Write the document into a byte sink.
+ fn write<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+ self.write_header(target)?;
+
+ self.write_document_catalog(target)?;
+ self.write_page_tree(target)?;
+ self.write_pages(target)?;
+
+ self.write_resources(target)?;
+
+ self.write_content(target)?;
+ // self.write_fonts(target)?;
+
+ self.write_xref_table(target)?;
+ self.write_trailer(target)?;
+ self.write_start_xref(target)?;
+
+ Ok(self.w)
+ }
+
+ /// Write the pdf header.
+ fn write_header<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+ // Write the magic start
+ self.w += target.write(b"%PDF-1.7\n")?;
+ Ok(self.w)
+ }
+
+ /// Write the document catalog (contains general info about the document).
+ fn write_document_catalog<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+ self.xref_table.push(self.w as u32);
+
+ self.w += target.write_str(self.catalog_id)?;
+ self.w += target.write(b" 0 obj\n")?;
+ self.w += target.write(b"<<\n")?;
+ self.w += target.write(b"/Type /Catalog\n")?;
+
+ self.w += target.write(b"/Pages ")?;
+ self.w += target.write_str(self.page_tree_id)?;
+ self.w += target.write(b" 0 R\n")?;
+
+ self.w += target.write(b">>\n")?;
+ self.w += target.write(b"endobj\n")?;
+
+ Ok(self.w)
+ }
+
+ /// Write the page tree (overview over the pages of a document).
+ fn write_page_tree<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+ self.xref_table.push(self.w as u32);
+
+ // Create page tree
+ self.w += target.write_str(self.page_tree_id)?;
+ self.w += target.write(b" 0 obj\n")?;
+ self.w += target.write(b"<<\n")?;
+ self.w += target.write(b"/Type /Pages\n")?;
+
+ self.w += target.write(b"/Count ")?;
+ self.w += target.write_str(self.doc.pages.len())?;
+ self.w += target.write(b"\n")?;
+
+ self.w += target.write(b"/Kids [")?;
+
+ for id in self.pages_start .. self.pages_start + self.doc.pages.len() as u32 {
+ self.w += target.write_str(id)?;
+ self.w += target.write(b" 0 R ")?;
+ }
+
+ self.w += target.write(b"]\n")?;
+
+ self.w += target.write(b"/Resources\n")?;
+ self.w += target.write(b"<<\n")?;
+
+ self.w += target.write(b"/Font\n")?;
+ self.w += target.write(b"<<\n")?;
+
+ let mut font_id = self.resources_start;
+ for nr in 1 ..= self.doc.fonts.len() as u32 {
+ self.w += target.write(b"/F")?;
+ self.w += target.write_str(nr)?;
+ self.w += target.write(b" ")?;
+ self.w += target.write_str(font_id)?;
+ self.w += target.write(b" 0 R\n")?;
+ font_id += 1;
+ }
+
+ self.w += target.write(b">>\n")?;
+ self.w += target.write(b">>\n")?;
+
+ self.w += target.write(b">>\n")?;
+ self.w += target.write(b"endobj\n")?;
+
+ Ok(self.w)
+ }
+
+ /// Write the page descriptions.
+ fn write_pages<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+ let mut page_id = self.pages_start;
+ let mut content_id = self.content_start;
+
+ for page in &self.doc.pages {
+ self.xref_table.push(self.w as u32);
+
+ self.w += target.write_str(page_id)?;
+ self.w += target.write(b" 0 obj\n")?;
+ self.w += target.write(b"<<\n")?;
+ self.w += target.write(b"/Type /Page\n")?;
+
+ self.w += target.write(b"/Parent ")?;
+ self.w += target.write_str(self.page_tree_id)?;
+ self.w += target.write(b" 0 R\n")?;
+
+ self.w += target.write(b"/MediaBox [0 0 ")?;
+ self.w += target.write_pdf(&page.size[0])?;
+ self.w += target.write(b" ")?;
+ self.w += target.write_pdf(&page.size[1])?;
+ self.w += target.write(b"]\n")?;
+
+ self.w += target.write(b"/Contents [")?;
+
+ for _ in &page.contents {
+ self.w += target.write_str(content_id)?;
+ self.w += target.write(b" 0 R ")?;
+
+ content_id += 1;
+ }
+
+ self.w += target.write(b"]\n")?;
+
+ self.w += target.write(b">>\n")?;
+ self.w += target.write(b"endobj\n")?;
+
+ page_id += 1;
+ }
+
+ Ok(self.w)
+ }
+
+ /// Write the resources used by the file (fonts and friends).
+ fn write_resources<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+ let mut id = self.resources_start;
+
+ for font in &self.doc.fonts {
+ self.xref_table.push(self.w as u32);
+
+ self.w += target.write_str(id)?;
+ self.w += target.write(b" 0 obj\n")?;
+ self.w += target.write(b"<<\n")?;
+ self.w += target.write(b"/Type /Font\n")?;
+
+ match font {
+ DocumentFont::Builtin(builtin) => {
+ self.w += target.write(b"/Subtype /Type1\n")?;
+ self.w += target.write(b"/BaseFont /")?;
+ self.w += target.write_str(builtin.name())?;
+ self.w += target.write(b"\n")?;
+ },
+ DocumentFont::Loaded(font) => {
+ self.w += target.write(b"/Subtype /TrueType\n")?;
+ self.w += target.write(b"/BaseFont /")?;
+ self.w += target.write_str(font.name.as_str())?;
+ self.w += target.write(b"\n")?;
+ unimplemented!();
+ },
+ }
+
+ self.w += target.write(b">>\n")?;
+ self.w += target.write(b"endobj\n")?;
+
+ id += 1;
+ }
+
+ Ok(self.w)
+ }
+
+ /// Write the page contents.
+ fn write_content<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+ let mut id = self.content_start;
+
+ for page in &self.doc.pages {
+ for content in &page.contents {
+ self.xref_table.push(self.w as u32);
+
+ self.w += target.write_str(id)?;
+ self.w += target.write(b" 0 obj\n")?;
+ self.w += target.write(b"<<\n")?;
+
+ let mut buffer = Vec::new();
+ buffer.write(b"BT/\n")?;
+
+ buffer.write(b"/F1 13 Tf\n")?;
+ buffer.write(b"108 734 Td\n")?;
+ buffer.write(b"(")?;
+
+ let Text(string) = content;
+ buffer.write(string.as_bytes())?;
+
+ buffer.write(b") Tj\n")?;
+ buffer.write(b"ET\n")?;
+
+ self.w += target.write(b"/Length ")?;
+ self.w += target.write_str(buffer.len())?;
+ self.w += target.write(b"\n")?;
+
+ self.w += target.write(b">>\n")?;
+
+ self.w += target.write(b"stream\n")?;
+ self.w += target.write(&buffer)?;
+ self.w += target.write(b"endstream\n")?;
+
+ self.w += target.write(b"endobj\n")?;
+
+ id += 1;
+ }
+ }
+
+ Ok(self.w)
+ }
+
+ /// Write the cross-reference table.
+ fn write_xref_table<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+ self.offset_xref = self.w as u32;
+
+ self.w += target.write(b"xref\n")?;
+ self.w += target.write(b"0 ")?;
+ self.w += target.write_str(self.xref_table.len())?;
+ self.w += target.write(b"\n")?;
+
+ self.w += target.write(b"0000000000 65535 f\r\n")?;
+
+ for offset in &self.xref_table {
+ self.w += target.write(format!("{:010}", offset).as_bytes())?;
+ self.w += target.write(b" 00000 n")?;
+ self.w += target.write(b"\r\n")?;
+ }
+
+ Ok(self.w)
+ }
+
+ /// Write the trailer (points to the root object).
+ fn write_trailer<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+ self.w += target.write(b"trailer\n")?;
+ self.w += target.write(b"<<\n")?;
+
+ self.w += target.write(b"/Root ")?;
+ self.w += target.write_str(self.catalog_id)?;
+ self.w += target.write(b" 0 R\n")?;
+
+ self.w += target.write(b"/Size ")?;
+ self.w += target.write_str(self.xref_table.len() + 1)?;
+ self.w += target.write(b"\n")?;
+
+ self.w += target.write(b">>\n")?;
+
+ Ok(self.w)
+ }
+
+ /// Write where the cross-reference table starts.
+ fn write_start_xref<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
+ self.w += target.write(b"startxref\n")?;
+ self.w += target.write_str(self.offset_xref)?;
+ self.w += target.write(b"\n")?;
+
+ Ok(self.w)
+ }
+}
+
+
+#[cfg(test)]
+mod pdf_tests {
+ use super::*;
+ use crate::parsing::{Tokenize, Parse};
+ use crate::doc::Generate;
+
+ /// Create a pdf with a name from the source code.
+ fn test(name: &str, src: &str) {
+ let mut file = std::fs::File::create(name).unwrap();
+ let doc = src.tokenize()
+ .parse().unwrap()
+ .generate().unwrap();
+ file.write_pdf(&doc).unwrap();
+ }
+
+ #[test]
+ fn pdf_simple() {
+ test("../target/write1.pdf", "This is an example of a sentence.");
+ test("../target/write2.pdf","
+ Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed
+ diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam
+ voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd
+ gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor
+ sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut
+ labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et
+ justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est
+ Lorem ipsum dolor sit amet.
+ ");
+ }
+}
diff --git a/src/utility.rs b/src/utility.rs
new file mode 100644
index 00000000..8304025d
--- /dev/null
+++ b/src/utility.rs
@@ -0,0 +1,138 @@
+//! Utility functionality.
+
+use std::str::Split;
+use std::iter::Peekable;
+use unicode_xid::UnicodeXID;
+
+
+/// Types that can be splined.
+pub trait Splinor {
+ /// Returns an iterator over the substrings splitted by the pattern,
+ /// intertwined with the splinor.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use typeset::utility::*;
+ /// #[derive(Debug, Copy, Clone, PartialEq)]
+ /// struct Space;
+ ///
+ /// let v: Vec<Splined<Space>> = "My airplane flies!".spline(" ", Space).collect();
+ /// assert_eq!(v, [
+ /// Splined::Value("My"),
+ /// Splined::Splinor(Space),
+ /// Splined::Value("airplane"),
+ /// Splined::Splinor(Space),
+ /// Splined::Value("flies!"),
+ /// ]);
+ /// ```
+ fn spline<'s, T: Clone>(&'s self, pat: &'s str, splinor: T) -> Spline<'s, T>;
+}
+
+impl Splinor for str {
+ fn spline<'s, T: Clone>(&'s self, pat: &'s str, splinor: T) -> Spline<'s, T> {
+ Spline {
+ splinor: Splined::Splinor(splinor),
+ split: self.split(pat).peekable(),
+ next_splinor: false,
+ }
+ }
+}
+
+/// Iterator over splitted values and splinors.
+///
+/// Created by the [`spline`](Splinor::spline) function.
+#[derive(Debug, Clone)]
+pub struct Spline<'s, T> {
+ splinor: Splined<'s, T>,
+ split: Peekable<Split<'s, &'s str>>,
+ next_splinor: bool,
+}
+
+/// Represents either a splitted substring or a splinor.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub enum Splined<'s, T> {
+ /// A substring.
+ Value(&'s str),
+ /// An intertwined splinor.
+ Splinor(T),
+}
+
+impl<'s, T: Clone> Iterator for Spline<'s, T> {
+ type Item = Splined<'s, T>;
+
+ fn next(&mut self) -> Option<Splined<'s, T>> {
+ if self.next_splinor && self.split.peek().is_some() {
+ self.next_splinor = false;
+ return Some(self.splinor.clone());
+ } else {
+ self.next_splinor = true;
+ return Some(Splined::Value(self.split.next()?))
+ }
+ }
+}
+
+
+/// More useful functions on `str`'s.
+pub trait StrExt {
+ /// Whether self consists only of whitespace.
+ fn is_whitespace(&self) -> bool;
+
+ /// Whether this word is a valid unicode identifier.
+ fn is_identifier(&self) -> bool;
+}
+
+impl StrExt for str {
+ #[inline]
+ fn is_whitespace(&self) -> bool {
+ self.chars().all(|c| c.is_whitespace() && c != '\n')
+ }
+
+ fn is_identifier(&self) -> bool {
+ let mut chars = self.chars();
+
+ match chars.next() {
+ Some(c) if !UnicodeXID::is_xid_start(c) => return false,
+ None => return false,
+ _ => (),
+ }
+
+ while let Some(c) = chars.next() {
+ if !UnicodeXID::is_xid_continue(c) {
+ return false;
+ }
+ }
+
+ true
+ }
+}
+
+
+#[cfg(test)]
+mod splinor_tests {
+ use super::*;
+ use Splined::{Value as V, Splinor as S};
+
+ #[derive(Debug, Copy, Clone, PartialEq)]
+ enum Token { DoubleUnderscore }
+
+ fn test<T>(string: &str, pat: &str, splinor: T, vec: Vec<Splined<T>>)
+ where T: std::fmt::Debug + Clone + PartialEq {
+ assert_eq!(string.spline(pat, splinor).collect::<Vec<_>>(), vec);
+ }
+
+ #[test]
+ fn splinor() {
+ let s = S(Token::DoubleUnderscore);
+ test("__he__llo__world__", "__", Token::DoubleUnderscore,
+ vec![V(""), s, V("he"), s, V("llo"), s, V("world"), s, V("")]);
+ test("__Italic__", "__", Token::DoubleUnderscore,
+ vec![V(""), s, V("Italic"), s, V("")]);
+ test("Key__Value", "__", Token::DoubleUnderscore,
+ vec![V("Key"), s, V("Value")]);
+ test("__Start__NoEnd", "__", Token::DoubleUnderscore,
+ vec![V(""), s, V("Start"), s, V("NoEnd")]);
+ test("NoStart__End__", "__", Token::DoubleUnderscore,
+ vec![V("NoStart"), s, V("End"), s, V("")]);
+ }
+}