diff options
| author | Laurenz <laurmaedje@gmail.com> | 2019-03-11 17:24:00 +0100 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2019-03-11 17:24:00 +0100 |
| commit | 67281c4f469716c7f2341676f2ad656d8c544ea3 (patch) | |
| tree | 4a5bd6602306369db2b9e99b7cbb405f72b816d5 /src/pdf.rs | |
| parent | 5942c3ba2ab1cd71f86749a91bc04e45da175f96 (diff) | |
Redesign document representation 🧱
Diffstat (limited to 'src/pdf.rs')
| -rw-r--r-- | src/pdf.rs | 234 |
1 files changed, 127 insertions, 107 deletions
@@ -2,7 +2,7 @@ use std::fmt; use std::io::{self, Write, Cursor}; -use std::collections::{HashMap, HashSet}; +use std::collections::HashSet; use pdf::{PdfWriter, Id, Rect, Version, Trailer}; use pdf::doc::{Catalog, PageTree, Page, Resource, Content}; use pdf::text::Text; @@ -10,8 +10,8 @@ use pdf::font::{ Type0Font, CMapEncoding, CIDFont, CIDFontType, CIDSystemInfo, WidthRecord, FontDescriptor, FontFlags, EmbeddedFont, GlyphUnit }; -use opentype::{OpenTypeReader, tables::{self, NameEntry, MacStyleFlags}}; -use crate::doc::Document; +use opentype::{OpenTypeReader, tables::{self, MacStyleFlags}}; +use crate::doc::{self, Document, TextCommand}; use crate::font::Font; @@ -68,7 +68,7 @@ struct PdfCreator<'a, W: Write> { writer: PdfWriter<'a, W>, doc: &'a Document, offsets: Offsets, - font: PdfFont, + fonts: Vec<PdfFont>, } /// Offsets for the various groups of ids. @@ -87,33 +87,50 @@ impl<'a, W: Write> PdfCreator<'a, W> { let catalog = 1; let page_tree = catalog + 1; let pages = (page_tree + 1, page_tree + doc.pages.len() as Id); - let content_count = doc.pages.iter().flat_map(|p| p.contents.iter()).count() as Id; + let content_count = doc.pages.iter().flat_map(|p| p.text.iter()).count() as Id; let contents = (pages.1 + 1, pages.1 + content_count); - let fonts = (contents.1 + 1, contents.1 + 4); + let fonts = (contents.1 + 1, contents.1 + 4 * doc.fonts.len() as Id); + + let offsets = Offsets { + catalog, + page_tree, + pages, + contents, + fonts, + }; + + assert!(doc.fonts.len() > 0); // Find out which chars are used in this document. - let mut chars = HashSet::new(); + let mut char_sets = vec![HashSet::new(); doc.fonts.len()]; + let mut current_font: usize = 0; for page in &doc.pages { - for content in &page.contents { - chars.extend(content.0.chars()); + for text in &page.text { + for command in &text.commands { + match command { + TextCommand::Text(string) => { + char_sets[current_font].extend(string.chars()); + }, + TextCommand::SetFont(id, _) => { + assert!(*id < doc.fonts.len()); + current_font = *id; + }, + _ => {}, + } + } } } // Create a subsetted pdf font. - let data = std::fs::read(format!("../fonts/{}.ttf", doc.font))?; - let font = PdfFont::new(&doc.font, data, chars)?; + let fonts = doc.fonts.iter().enumerate().map(|(i, font)| { + PdfFont::new(font, &char_sets[i]) + }).collect::<PdfResult<Vec<_>>>()?; Ok(PdfCreator { writer: PdfWriter::new(target), doc, - offsets: Offsets { - catalog, - page_tree, - pages, - contents, - fonts, - }, - font, + offsets, + fonts, }) } @@ -154,8 +171,8 @@ impl<'a, W: Write> PdfCreator<'a, W> { // The page objects let mut id = self.offsets.pages.0; for page in &self.doc.pages { - let width = page.size[0].to_points(); - let height = page.size[1].to_points(); + let width = page.width.to_points(); + let height = page.height.to_points(); self.writer.write_obj(id, Page::new(self.offsets.page_tree) .media_box(Rect::new(0.0, 0.0, width, height)) @@ -172,77 +189,92 @@ impl<'a, W: Write> PdfCreator<'a, W> { fn write_contents(&mut self) -> PdfResult<()> { let mut id = self.offsets.contents.0; for page in &self.doc.pages { - for content in &page.contents { - self.writer.write_obj(id, &Text::new() - .set_font(1, 13.0) - .move_line(108.0, 734.0) - .write_text(&self.encode(&content.0)) - .to_stream() - )?; + for text in &page.text { + self.write_text(id, text)?; id += 1; } } + Ok(()) + } + + fn write_text(&mut self, id: u32, text: &doc::Text) -> PdfResult<()> { + let mut current_font = 0; + let encoded = text.commands.iter().filter_map(|cmd| match cmd { + TextCommand::Text(string) => Some(self.fonts[current_font].encode(&string)), + TextCommand::SetFont(id, _) => { current_font = *id; None }, + _ => None, + }).collect::<Vec<_>>(); + + let mut object = Text::new(); + let mut nr = 0; + + for command in &text.commands { + match command { + TextCommand::Text(_) => { + object.write_text(&encoded[nr]); + nr += 1; + }, + TextCommand::SetFont(id, size) => { + object.set_font(*id as u32 + 1, *size); + }, + TextCommand::Move(x, y) => { + object.move_line(x.to_points(), y.to_points()); + } + } + } + + self.writer.write_obj(id, &object.to_stream())?; Ok(()) } /// Write the fonts. fn write_fonts(&mut self) -> PdfResult<()> { - let id = self.offsets.fonts.0; - - self.writer.write_obj(id, &Type0Font::new( - self.font.name.clone(), - CMapEncoding::Predefined("Identity-H".to_owned()), - id + 1 - )).unwrap(); - - self.writer.write_obj(id + 1, - CIDFont::new( - CIDFontType::Type2, - self.font.name.clone(), - CIDSystemInfo::new("(Adobe)", "(Identity)", 0), - id + 2, - ).widths(vec![WidthRecord::start(0, self.font.widths.clone())]) - ).unwrap(); - - self.writer.write_obj(id + 2, - FontDescriptor::new( - self.font.name.clone(), - self.font.flags, - self.font.italic_angle, - ) - .font_bbox(self.font.bounding_box) - .ascent(self.font.ascender) - .descent(self.font.descender) - .cap_height(self.font.cap_height) - .stem_v(self.font.stem_v) - .font_file_3(id + 3) - ).unwrap(); - - - self.writer.write_obj(id + 3, &EmbeddedFont::OpenType(&self.font.data)).unwrap(); + let mut id = self.offsets.fonts.0; + + for font in &self.fonts { + self.writer.write_obj(id, &Type0Font::new( + font.name.clone(), + CMapEncoding::Predefined("Identity-H".to_owned()), + id + 1 + ))?; + + self.writer.write_obj(id + 1, + CIDFont::new( + CIDFontType::Type2, + font.name.clone(), + CIDSystemInfo::new("(Adobe)", "(Identity)", 0), + id + 2, + ).widths(vec![WidthRecord::start(0, font.widths.clone())]) + )?; - Ok(()) - } + self.writer.write_obj(id + 2, + FontDescriptor::new( + font.name.clone(), + font.flags, + font.italic_angle, + ) + .font_bbox(font.bounding_box) + .ascent(font.ascender) + .descent(font.descender) + .cap_height(font.cap_height) + .stem_v(font.stem_v) + .font_file_3(id + 3) + )?; + + self.writer.write_obj(id + 3, &EmbeddedFont::OpenType(&font.program))?; - /// Encode the given text for our font. - fn encode(&self, text: &str) -> Vec<u8> { - let mut bytes = Vec::with_capacity(2 * text.len()); - for glyph in text.chars().map(|c| self.font.map(c)) { - bytes.push((glyph >> 8) as u8); - bytes.push((glyph & 0xff) as u8); + id += 4; } - bytes + + Ok(()) } } /// The data we need from the font. struct PdfFont { - data: Vec<u8>, - mapping: HashMap<char, u16>, - default_glyph: u16, - name: String, + font: Font, widths: Vec<GlyphUnit>, flags: FontFlags, italic_angle: f32, @@ -256,47 +288,36 @@ struct PdfFont { impl PdfFont { /// Create a subetted version of the font and calculate some information /// needed for creating the _PDF_. - pub fn new(font_name: &str, data: Vec<u8>, chars: HashSet<char>) -> PdfResult<PdfFont> { - let mut readable = Cursor::new(&data); + pub fn new(font: &Font, chars: &HashSet<char>) -> PdfResult<PdfFont> { + let mut readable = Cursor::new(&font.program); let mut reader = OpenTypeReader::new(&mut readable); let head = reader.read_table::<tables::Header>()?; - let name = reader.read_table::<tables::Name>()?; let post = reader.read_table::<tables::Post>()?; let os2 = reader.read_table::<tables::OS2>()?; - let font = Font::new(data); - let (subsetted, mapping) = font.subsetted( - chars, + let subsetted = font.subsetted( + chars.iter().cloned(), &["head", "hhea", "maxp", "hmtx", "loca", "glyf"], &["cvt ", "prep", "fpgm", "OS/2", "cmap", "name", "post"], )?; - let unit_ratio = 1000.0 / (head.units_per_em as f32); - let convert = |x| (unit_ratio * x as f32).round() as GlyphUnit; - - let base_font = name.get_decoded(NameEntry::PostScriptName); - let font_name = base_font.unwrap_or_else(|| font_name.to_owned()); - - let mut flags = FontFlags::empty(); flags.set(FontFlags::FIXED_PITCH, post.is_fixed_pitch); - flags.set(FontFlags::SERIF, font_name.contains("Serif")); + flags.set(FontFlags::SERIF, font.name.contains("Serif")); flags.insert(FontFlags::SYMBOLIC); flags.set(FontFlags::ITALIC, head.mac_style.contains(MacStyleFlags::ITALIC)); flags.insert(FontFlags::SMALL_CAP); - let mut readable = Cursor::new(&subsetted); - let mut reader = OpenTypeReader::new(&mut readable); - let hmtx = reader.read_table::<tables::HorizontalMetrics>()?; - let widths = hmtx.metrics.iter().map(|m| convert(m.advance_width)).collect(); + let widths = subsetted.widths.iter() + .map(|w| (1000.0 * w.to_points()).round() as GlyphUnit) + .collect(); + let unit_ratio = 1.0 / (head.units_per_em as f32); + let convert = |x| (unit_ratio * x as f32).round() as GlyphUnit; Ok(PdfFont { - data: subsetted, - mapping, - default_glyph: os2.us_default_char.unwrap_or(0), - name: font_name, + font: subsetted, widths, flags, italic_angle: post.italic_angle.to_f32(), @@ -312,10 +333,13 @@ impl PdfFont { stem_v: (10.0 + 220.0 * (os2.us_weight_class as f32 - 50.0) / 900.0) as GlyphUnit, }) } +} + +impl std::ops::Deref for PdfFont { + type Target = Font; - /// Map a character to it's glyph index. - fn map(&self, c: char) -> u16 { - self.mapping.get(&c).map(|&g| g).unwrap_or(self.default_glyph) + fn deref(&self) -> &Font { + &self.font } } @@ -324,20 +348,21 @@ impl PdfFont { mod pdf_tests { use super::*; use crate::parsing::ParseTree; - use crate::doc::Generate; + use crate::engine::Typeset; /// Create a pdf with a name from the source code. fn test(name: &str, src: &str) { - let doc = src.parse_tree().unwrap().generate().unwrap(); + let doc = src.parse_tree().unwrap().typeset().unwrap(); let path = format!("../target/typeset-pdf-{}.pdf", name); let mut file = std::fs::File::create(path).unwrap(); file.write_pdf(&doc).unwrap(); } #[test] - fn pdf_simple() { + fn pdf() { test("unicode", "∑mbe∂∂ed font with Unicode!"); test("parentheses", "Text with ) and ( or (enclosed) works."); + test("composite-glyph", "Composite character‼"); test("multiline"," Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed @@ -345,9 +370,4 @@ mod pdf_tests { Stet clita kasd gubergren, no sea takimata sanctus est. "); } - - #[test] - fn pdf_composite_glyph() { - test("composite-glyph", "Composite character‼"); - } } |
