diff options
| author | Laurenz <laurmaedje@gmail.com> | 2022-06-10 23:53:20 +0200 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2022-06-10 23:54:16 +0200 |
| commit | ed6550fdb08eae92bffab6b6b137b1e0eebf62c6 (patch) | |
| tree | 74152a38f7aa2ed2ac2fa190e81494422700ca36 /src/export | |
| parent | 6aff11057bc88257c9383137952bb41b5b85c3dc (diff) | |
Bump dependencies
Diffstat (limited to 'src/export')
| -rw-r--r-- | src/export/mod.rs | 1 | ||||
| -rw-r--r-- | src/export/pdf.rs | 103 | ||||
| -rw-r--r-- | src/export/subset.rs | 814 |
3 files changed, 65 insertions, 853 deletions
diff --git a/src/export/mod.rs b/src/export/mod.rs index b782ae13..d690f4db 100644 --- a/src/export/mod.rs +++ b/src/export/mod.rs @@ -2,7 +2,6 @@ mod pdf; mod render; -mod subset; pub use pdf::*; pub use render::*; diff --git a/src/export/pdf.rs b/src/export/pdf.rs index aa3e0876..4495bceb 100644 --- a/src/export/pdf.rs +++ b/src/export/pdf.rs @@ -3,6 +3,7 @@ use std::cmp::Eq; use std::collections::{BTreeMap, HashMap, HashSet}; use std::hash::Hash; +use std::io::Cursor; use std::sync::Arc; use image::{DynamicImage, GenericImageView, ImageFormat, ImageResult, Rgba}; @@ -14,7 +15,6 @@ use pdf_writer::writers::ColorSpace; use pdf_writer::{Content, Filter, Finish, Name, PdfWriter, Rect, Ref, Str, TextStr}; use ttf_parser::{name_id, GlyphId, Tag}; -use super::subset::subset; use crate::font::{find_name, FaceId, FontStore}; use crate::frame::{Destination, Element, Frame, Group, Role, Text}; use crate::geom::{ @@ -24,6 +24,7 @@ use crate::geom::{ use crate::image::{Image, ImageId, ImageStore, RasterImage}; use crate::library::prelude::EcoString; use crate::library::text::Lang; +use crate::util::SliceExt; use crate::Context; /// Export a collection of frames into a PDF file. @@ -39,7 +40,7 @@ pub fn pdf(ctx: &Context, frames: &[Arc<Frame>]) -> Vec<u8> { /// Identifies the color space definitions. const SRGB: Name<'static> = Name(b"srgb"); -const SRGB_GRAY: Name<'static> = Name(b"srgbgray"); +const D65_GRAY: Name<'static> = Name(b"d65gray"); /// An exporter for a whole PDF document. struct PdfExporter<'a> { @@ -155,23 +156,37 @@ impl<'a> PdfExporter<'a> { // Write the CID font referencing the font descriptor. let mut cid = self.writer.cid_font(cid_ref); - cid.subtype(subtype) - .base_font(base_font) - .system_info(system_info) - .font_descriptor(descriptor_ref) - .widths() - .consecutive(0, { - let num_glyphs = ttf.number_of_glyphs(); - (0 .. num_glyphs).map(|g| { - let x = ttf.glyph_hor_advance(GlyphId(g)).unwrap_or(0); - face.to_em(x).to_font_units() - }) - }); + cid.subtype(subtype); + cid.base_font(base_font); + cid.system_info(system_info); + cid.font_descriptor(descriptor_ref); + cid.default_width(0.0); if subtype == CidFontType::Type2 { cid.cid_to_gid_map_predefined(Name(b"Identity")); } + // Extract the widths of all glyphs. + let num_glyphs = ttf.number_of_glyphs(); + let mut widths = vec![0.0; num_glyphs as usize]; + for &g in glyphs { + let x = ttf.glyph_hor_advance(GlyphId(g)).unwrap_or(0); + widths[g as usize] = face.to_em(x).to_font_units(); + } + + // Write all non-zero glyph widths. + let mut first = 0; + let mut width_writer = cid.widths(); + for (w, group) in widths.group_by_key(|&w| w) { + let end = first + group.len(); + if w != 0.0 { + let last = end - 1; + width_writer.same(first as u16, last as u16, w); + } + first = end; + } + + width_writer.finish(); cid.finish(); let mut flags = FontFlags::empty(); @@ -217,7 +232,9 @@ impl<'a> PdfExporter<'a> { // Compute a reverse mapping from glyphs to unicode. let cmap = { let mut mapping = BTreeMap::new(); - for subtable in ttf.character_mapping_subtables() { + for subtable in + ttf.tables().cmap.into_iter().flat_map(|table| table.subtables) + { if subtable.is_unicode() { subtable.codepoints(|n| { if let Some(c) = std::char::from_u32(n) { @@ -245,16 +262,24 @@ impl<'a> PdfExporter<'a> { .filter(Filter::FlateDecode); // Subset and write the face's bytes. - let buffer = face.buffer(); - let subsetted = subset(buffer, face.index(), glyphs); - let data = deflate(subsetted.as_deref().unwrap_or(buffer)); - let mut font_stream = self.writer.stream(data_ref, &data); + let data = face.buffer(); + let subsetted = { + let glyphs: Vec<_> = glyphs.iter().copied().collect(); + let profile = subsetter::Profile::pdf(&glyphs); + subsetter::subset(data, face.index(), profile) + }; + + // Compress and write the face's byte. + let data = subsetted.as_deref().unwrap_or(data); + let data = deflate(data); + let mut stream = self.writer.stream(data_ref, &data); + stream.filter(Filter::FlateDecode); if subtype == CidFontType::Type0 { - font_stream.pair(Name(b"Subtype"), Name(b"OpenType")); + stream.pair(Name(b"Subtype"), Name(b"OpenType")); } - font_stream.filter(Filter::FlateDecode).finish(); + stream.finish(); } } @@ -346,13 +371,15 @@ impl<'a> PdfExporter<'a> { .uri(Str(uri.as_str().as_bytes())); } Destination::Internal(loc) => { - let index = loc.page - 1; - let height = self.page_heights[index]; - link.action() - .action_type(ActionType::GoTo) - .destination_direct() - .page(self.page_refs[index]) - .xyz(loc.pos.x.to_f32(), height - loc.pos.y.to_f32(), None); + if (1 ..= self.page_heights.len()).contains(&loc.page) { + let index = loc.page - 1; + let height = self.page_heights[index]; + link.action() + .action_type(ActionType::GoTo) + .destination_direct() + .page(self.page_refs[index]) + .xyz(loc.pos.x.to_f32(), height - loc.pos.y.to_f32(), None); + } } } } @@ -360,9 +387,9 @@ impl<'a> PdfExporter<'a> { annotations.finish(); page_writer.finish(); - self.writer - .stream(content_id, &deflate(&page.content.finish())) - .filter(Filter::FlateDecode); + let data = page.content.finish(); + let data = deflate(&data); + self.writer.stream(content_id, &data).filter(Filter::FlateDecode); } fn write_page_tree(&mut self) { @@ -374,7 +401,7 @@ impl<'a> PdfExporter<'a> { let mut resources = pages.resources(); let mut spaces = resources.color_spaces(); spaces.insert(SRGB).start::<ColorSpace>().srgb(); - spaces.insert(SRGB_GRAY).start::<ColorSpace>().srgb_gray(); + spaces.insert(D65_GRAY).start::<ColorSpace>().d65_gray(); spaces.finish(); let mut fonts = resources.fonts(); @@ -855,7 +882,7 @@ impl<'a, 'b> PageExporter<'a, 'b> { let Paint::Solid(color) = fill; match color { Color::Luma(c) => { - self.set_fill_color_space(SRGB_GRAY); + self.set_fill_color_space(D65_GRAY); self.content.set_fill_gray(f(c.0)); } Color::Rgba(c) => { @@ -883,7 +910,7 @@ impl<'a, 'b> PageExporter<'a, 'b> { let Paint::Solid(color) = stroke.paint; match color { Color::Luma(c) => { - self.set_stroke_color_space(SRGB_GRAY); + self.set_stroke_color_space(D65_GRAY); self.content.set_stroke_gray(f(c.0)); } Color::Rgba(c) => { @@ -916,16 +943,16 @@ fn encode_image(img: &RasterImage) -> ImageResult<(Vec<u8>, Filter, bool)> { Ok(match (img.format, &img.buf) { // 8-bit gray JPEG. (ImageFormat::Jpeg, DynamicImage::ImageLuma8(_)) => { - let mut data = vec![]; + let mut data = Cursor::new(vec![]); img.buf.write_to(&mut data, img.format)?; - (data, Filter::DctDecode, false) + (data.into_inner(), Filter::DctDecode, false) } // 8-bit Rgb JPEG (Cmyk JPEGs get converted to Rgb earlier). (ImageFormat::Jpeg, DynamicImage::ImageRgb8(_)) => { - let mut data = vec![]; + let mut data = Cursor::new(vec![]); img.buf.write_to(&mut data, img.format)?; - (data, Filter::DctDecode, true) + (data.into_inner(), Filter::DctDecode, true) } // TODO: Encode flate streams with PNG-predictor? diff --git a/src/export/subset.rs b/src/export/subset.rs deleted file mode 100644 index e688d9cc..00000000 --- a/src/export/subset.rs +++ /dev/null @@ -1,814 +0,0 @@ -//! OpenType font subsetting. - -use std::borrow::Cow; -use std::collections::HashSet; -use std::iter; -use std::ops::Range; - -use ttf_parser::parser::{ - FromData, LazyArray16, LazyArray32, Offset, Offset16, Offset32, Stream, F2DOT14, -}; -use ttf_parser::Tag; - -/// Subset a font face for PDF embedding. -/// -/// This will remove the outlines of all glyphs that are not part of the given -/// slice. Furthmore, all character mapping and layout tables are dropped as -/// shaping has already happened. -/// -/// Returns `None` if the font data is fatally broken (in which case -/// `ttf-parser` would probably already have rejected the font, so this -/// shouldn't happen if the font data has already passed through `ttf-parser`). -pub fn subset(data: &[u8], index: u32, glyphs: &HashSet<u16>) -> Option<Vec<u8>> { - Some(Subsetter::new(data, index, glyphs)?.subset()) -} - -struct Subsetter<'a> { - data: &'a [u8], - magic: Magic, - records: LazyArray16<'a, TableRecord>, - num_glyphs: u16, - glyphs: &'a HashSet<u16>, - tables: Vec<(Tag, Cow<'a, [u8]>)>, -} - -impl<'a> Subsetter<'a> { - /// Parse the font header and create a new subsetter. - fn new(data: &'a [u8], index: u32, glyphs: &'a HashSet<u16>) -> Option<Self> { - let mut s = Stream::new(data); - - let mut magic = s.read::<Magic>()?; - if magic == Magic::Collection { - // Parse font collection header if necessary. - s.skip::<u32>(); - let num_faces = s.read::<u32>()?; - let offsets = s.read_array32::<Offset32>(num_faces)?; - let offset = offsets.get(index)?.to_usize(); - - s = Stream::new_at(data, offset)?; - magic = s.read::<Magic>()?; - if magic == Magic::Collection { - return None; - } - } - - // Read number of table records. - let count = s.read::<u16>()?; - - // Skip boring parts of header. - s.skip::<u16>(); - s.skip::<u16>(); - s.skip::<u16>(); - - // Read the table records. - let records = s.read_array16::<TableRecord>(count)?; - let mut subsetter = Self { - data, - magic, - records, - num_glyphs: 0, - glyphs, - tables: vec![], - }; - - // Find out number of glyphs. - let maxp = subsetter.table_data(MAXP)?; - subsetter.num_glyphs = Stream::read_at::<u16>(maxp, 4)?; - - Some(subsetter) - } - - /// Encode the subsetted font file. - fn subset(mut self) -> Vec<u8> { - self.subset_tables(); - - // Start writing a brand new font. - let mut w = Vec::new(); - w.write(self.magic); - - // Write table directory. - let count = self.tables.len() as u16; - let entry_selector = (count as f32).log2().floor() as u16; - let search_range = 2u16.pow(u32::from(entry_selector)) * 16; - let range_shift = count * 16 - search_range; - w.write(count); - w.write(search_range); - w.write(entry_selector); - w.write(range_shift); - - // Tables shall be sorted by tag. - self.tables.sort_by_key(|&(tag, _)| tag); - - // This variable will hold the offset to the checksum adjustment field - // in the head table, which we'll have to write in the end (after - // checksumming the whole font). - let mut checksum_adjustment_offset = None; - - // Write table records. - let mut offset = 12 + self.tables.len() * TableRecord::SIZE; - for (tag, data) in &mut self.tables { - if *tag == HEAD { - // Zero out checksum field in head table. - data.to_mut()[8 .. 12].fill(0); - checksum_adjustment_offset = Some(offset + 8); - } - - let len = data.len(); - w.write(TableRecord { - tag: *tag, - checksum: checksum(data), - offset: offset as u32, - length: len as u32, - }); - - // Increase offset, plus padding zeros to align to 4 bytes. - offset += len; - while offset % 4 != 0 { - offset += 1; - } - } - - // Write tables. - for (_, data) in &self.tables { - // Write data plus padding zeros to align to 4 bytes. - w.extend(data.as_ref()); - while w.len() % 4 != 0 { - w.push(0); - } - } - - // Write checksumAdjustment field in head table. - if let Some(i) = checksum_adjustment_offset { - let sum = checksum(&w); - let val = 0xB1B0AFBA_u32.wrapping_sub(sum); - w[i .. i + 4].copy_from_slice(&val.to_be_bytes()); - } - - w - } - - /// Subset, drop and copy tables. - fn subset_tables(&mut self) { - // Remove unnecessary name information. - let handled_post = post::subset(self).is_some(); - - // Remove unnecessary glyph outlines. - let handled_glyf_loca = glyf::subset(self).is_some(); - let handled_cff1 = cff::subset_v1(self).is_some(); - - for record in self.records { - // If `handled` is true, we don't take any further action, if it's - // false, we copy the table. - #[rustfmt::skip] - let handled = match &record.tag.to_bytes() { - // Drop: Glyphs are already mapped. - b"cmap" => true, - - // Drop: Layout is already finished. - b"GPOS" | b"GSUB" | b"BASE" | b"JSTF" | b"MATH" | - b"ankr" | b"kern" | b"kerx" | b"mort" | b"morx" | - b"trak" | b"bsln" | b"just" | b"feat" | b"prop" => true, - - // Drop: They don't render in PDF viewers anyway. - // TODO: We probably have to convert fonts with one of these - // tables into Type 3 fonts where glyphs are described by either - // PDF graphics operators or XObject images. - b"CBDT" | b"CBLC" | b"COLR" | b"CPAL" | b"sbix" | b"SVG " => true, - - // Subsetted: Subsetting happens outside the loop, but if it - // failed, we simply copy the affected table(s). - b"post" => handled_post, - b"loca" | b"glyf" => handled_glyf_loca, - b"CFF " => handled_cff1, - - // Copy: All other tables are simply copied. - _ => false, - }; - - if !handled { - if let Some(data) = self.table_data(record.tag) { - self.push_table(record.tag, data); - } - } - } - } - - /// Retrieve the table data for a table. - fn table_data(&mut self, tag: Tag) -> Option<&'a [u8]> { - let (_, record) = self.records.binary_search_by(|record| record.tag.cmp(&tag))?; - let start = record.offset as usize; - let end = start + (record.length as usize); - self.data.get(start .. end) - } - - /// Push a new table. - fn push_table(&mut self, tag: Tag, data: impl Into<Cow<'a, [u8]>>) { - self.tables.push((tag, data.into())); - } -} - -// Some common tags. -const HEAD: Tag = Tag::from_bytes(b"head"); -const MAXP: Tag = Tag::from_bytes(b"maxp"); -const POST: Tag = Tag::from_bytes(b"post"); -const LOCA: Tag = Tag::from_bytes(b"loca"); -const GLYF: Tag = Tag::from_bytes(b"glyf"); -const CFF1: Tag = Tag::from_bytes(b"CFF "); - -/// Calculate a checksum over the sliced data as a sum of u32s. If the data -/// length is not a multiple of four, it is treated as if padded with zero to a -/// length that is a multiple of four. -fn checksum(data: &[u8]) -> u32 { - let mut sum = 0u32; - for chunk in data.chunks(4) { - let mut bytes = [0; 4]; - bytes[.. chunk.len()].copy_from_slice(chunk); - sum = sum.wrapping_add(u32::from_be_bytes(bytes)); - } - sum -} - -/// Zero all bytes in a slice. -fn memzero(slice: &mut [u8]) { - for byte in slice { - *byte = 0; - } -} - -/// Convenience trait for writing into a byte buffer. -trait BufExt { - fn write<T: ToData>(&mut self, v: T); -} - -impl BufExt for Vec<u8> { - fn write<T: ToData>(&mut self, v: T) { - v.write(self); - } -} - -/// A trait for writing raw binary data. -trait ToData { - fn write(&self, data: &mut Vec<u8>); -} - -impl ToData for u8 { - fn write(&self, data: &mut Vec<u8>) { - data.push(*self); - } -} - -impl ToData for u16 { - fn write(&self, data: &mut Vec<u8>) { - data.extend(&self.to_be_bytes()); - } -} - -impl ToData for Offset16 { - fn write(&self, data: &mut Vec<u8>) { - self.0.write(data); - } -} - -impl ToData for u32 { - fn write(&self, data: &mut Vec<u8>) { - data.extend(&self.to_be_bytes()); - } -} - -impl ToData for Offset32 { - fn write(&self, data: &mut Vec<u8>) { - self.0.write(data); - } -} - -impl ToData for Tag { - fn write(&self, data: &mut Vec<u8>) { - self.as_u32().write(data); - } -} - -/// Font magic number. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -enum Magic { - TrueType, - OpenType, - Collection, -} - -impl FromData for Magic { - const SIZE: usize = 4; - - fn parse(data: &[u8]) -> Option<Self> { - match u32::parse(data)? { - 0x00010000 | 0x74727565 => Some(Magic::TrueType), - 0x4F54544F => Some(Magic::OpenType), - 0x74746366 => Some(Magic::Collection), - _ => None, - } - } -} - -impl ToData for Magic { - fn write(&self, data: &mut Vec<u8>) { - let value: u32 = match self { - Magic::TrueType => 0x00010000, - Magic::OpenType => 0x4F54544F, - Magic::Collection => 0x74746366, - }; - value.write(data); - } -} - -/// Locates a table in the font file. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -struct TableRecord { - tag: Tag, - checksum: u32, - offset: u32, - length: u32, -} - -impl FromData for TableRecord { - const SIZE: usize = 16; - - fn parse(data: &[u8]) -> Option<Self> { - let mut s = Stream::new(data); - Some(TableRecord { - tag: s.read::<Tag>()?, - checksum: s.read::<u32>()?, - offset: s.read::<u32>()?, - length: s.read::<u32>()?, - }) - } -} - -impl ToData for TableRecord { - fn write(&self, data: &mut Vec<u8>) { - self.tag.write(data); - self.checksum.write(data); - self.offset.write(data); - self.length.write(data); - } -} - -mod post { - use super::*; - - /// Subset the post table by removing the name information. - pub(super) fn subset(subsetter: &mut Subsetter) -> Option<()> { - // Table version three is the one without names. - let mut new = 0x00030000_u32.to_be_bytes().to_vec(); - new.extend(subsetter.table_data(POST)?.get(4 .. 32)?); - subsetter.push_table(POST, new); - Some(()) - } -} - -mod glyf { - use super::*; - - /// Subset the glyf and loca tables by clearing out glyph data for - /// unused glyphs. - pub(super) fn subset(subsetter: &mut Subsetter) -> Option<()> { - let head = subsetter.table_data(HEAD)?; - let short = Stream::read_at::<i16>(head, 50)? == 0; - if short { - subset_impl::<Offset16>(subsetter) - } else { - subset_impl::<Offset32>(subsetter) - } - } - - fn subset_impl<T>(subsetter: &mut Subsetter) -> Option<()> - where - T: LocaOffset, - { - let loca = subsetter.table_data(LOCA)?; - let glyf = subsetter.table_data(GLYF)?; - - let offsets = LazyArray32::<T>::new(loca); - let glyph_data = |id: u16| { - let from = offsets.get(u32::from(id))?.loca_to_usize(); - let to = offsets.get(u32::from(id) + 1)?.loca_to_usize(); - glyf.get(from .. to) - }; - - // The set of all glyphs we will include in the subset. - let mut subset = HashSet::new(); - - // Because glyphs may depend on other glyphs as components (also with - // multiple layers of nesting), we have to process all glyphs to find - // their components. For notdef and all requested glyphs we simply use - // an iterator, but to track other glyphs that need processing we create - // a work stack. - let mut iter = iter::once(0).chain(subsetter.glyphs.iter().copied()); - let mut work = vec![]; - - // Find composite glyph descriptions. - while let Some(id) = work.pop().or_else(|| iter.next()) { - if subset.insert(id) { - let mut s = Stream::new(glyph_data(id)?); - if let Some(num_contours) = s.read::<i16>() { - // Negative means this is a composite glyph. - if num_contours < 0 { - // Skip min/max metrics. - s.read::<i16>(); - s.read::<i16>(); - s.read::<i16>(); - s.read::<i16>(); - - // Read component glyphs. - work.extend(component_glyphs(s)); - } - } - } - } - - let mut sub_loca = vec![]; - let mut sub_glyf = vec![]; - - for id in 0 .. subsetter.num_glyphs { - // If the glyph shouldn't be contained in the subset, it will - // still get a loca entry, but the glyf data is simply empty. - sub_loca.write(T::usize_to_loca(sub_glyf.len())?); - if subset.contains(&id) { - sub_glyf.extend(glyph_data(id)?); - } - } - - sub_loca.write(T::usize_to_loca(sub_glyf.len())?); - - subsetter.push_table(LOCA, sub_loca); - subsetter.push_table(GLYF, sub_glyf); - - Some(()) - } - - trait LocaOffset: Sized + FromData + ToData { - fn loca_to_usize(self) -> usize; - fn usize_to_loca(offset: usize) -> Option<Self>; - } - - impl LocaOffset for Offset16 { - fn loca_to_usize(self) -> usize { - 2 * usize::from(self.0) - } - - fn usize_to_loca(offset: usize) -> Option<Self> { - if offset % 2 == 0 { - (offset / 2).try_into().ok().map(Self) - } else { - None - } - } - } - - impl LocaOffset for Offset32 { - fn loca_to_usize(self) -> usize { - self.0 as usize - } - - fn usize_to_loca(offset: usize) -> Option<Self> { - offset.try_into().ok().map(Self) - } - } - - /// Returns an iterator over the component glyphs referenced by the given - /// `glyf` table composite glyph description. - fn component_glyphs(mut s: Stream) -> impl Iterator<Item = u16> + '_ { - const ARG_1_AND_2_ARE_WORDS: u16 = 0x0001; - const WE_HAVE_A_SCALE: u16 = 0x0008; - const MORE_COMPONENTS: u16 = 0x0020; - const WE_HAVE_AN_X_AND_Y_SCALE: u16 = 0x0040; - const WE_HAVE_A_TWO_BY_TWO: u16 = 0x0080; - - let mut done = false; - iter::from_fn(move || { - if done { - return None; - } - - let flags = s.read::<u16>()?; - let component = s.read::<u16>()?; - - if flags & ARG_1_AND_2_ARE_WORDS != 0 { - s.skip::<i16>(); - s.skip::<i16>(); - } else { - s.skip::<u16>(); - } - - if flags & WE_HAVE_A_SCALE != 0 { - s.skip::<F2DOT14>(); - } else if flags & WE_HAVE_AN_X_AND_Y_SCALE != 0 { - s.skip::<F2DOT14>(); - s.skip::<F2DOT14>(); - } else if flags & WE_HAVE_A_TWO_BY_TWO != 0 { - s.skip::<F2DOT14>(); - s.skip::<F2DOT14>(); - s.skip::<F2DOT14>(); - s.skip::<F2DOT14>(); - } - - done = flags & MORE_COMPONENTS == 0; - Some(component) - }) - } -} - -mod cff { - use super::*; - - /// Subset the CFF table by zeroing glyph data for unused glyphs. - pub(super) fn subset_v1(subsetter: &mut Subsetter) -> Option<()> { - let cff = subsetter.table_data(CFF1)?; - let mut s = Stream::new(cff); - - let (major, _) = (s.read::<u8>()?, s.skip::<u8>()); - if major != 1 { - return None; - } - - let header_size = s.read::<u8>()?; - s = Stream::new_at(cff, usize::from(header_size))?; - - // Skip the name index. - Index::parse_stream(&mut s); - - // Read the top dict. The index should contain only one item. - let top_dict_index = Index::parse_stream(&mut s)?; - let top_dict = Dict::parse(top_dict_index.get(0)?); - - let mut sub_cff = cff.to_vec(); - - // Because completely rebuilding the CFF structure would be pretty - // complex, for now, we employ a peculiar strategy for CFF subsetting: - // We simply replace unused data with zeros. This way, the font - // structure and offsets can stay the same. And while the CFF table - // itself doesn't shrink, the actual embedded font is compressed and - // greatly benefits from the repeated zeros. - zero_char_strings(subsetter, cff, &top_dict, &mut sub_cff); - zero_subr_indices(subsetter, cff, &top_dict, &mut sub_cff); - - subsetter.push_table(CFF1, sub_cff); - - Some(()) - } - - /// Zero unused char strings. - fn zero_char_strings( - subsetter: &Subsetter, - cff: &[u8], - top_dict: &Dict, - sub_cff: &mut [u8], - ) -> Option<()> { - let char_strings_offset = top_dict.get_offset(Op::CHAR_STRINGS)?; - let char_strings = Index::parse(cff.get(char_strings_offset ..)?)?; - - for (id, _, range) in char_strings.iter() { - if !subsetter.glyphs.contains(&id) { - let start = char_strings_offset + range.start; - let end = char_strings_offset + range.end; - memzero(sub_cff.get_mut(start .. end)?); - } - } - - Some(()) - } - - /// Zero unused local subroutine indices. We don't currently remove - /// individual subroutines because finding out which ones are used is - /// complicated. - fn zero_subr_indices( - subsetter: &Subsetter, - cff: &[u8], - top_dict: &Dict, - sub_cff: &mut [u8], - ) -> Option<()> { - // Parse FD Select data structure, which maps from glyph ids to find - // dict indices. - let fd_select_offset = top_dict.get_offset(Op::FD_SELECT)?; - let fd_select = - parse_fd_select(cff.get(fd_select_offset ..)?, subsetter.num_glyphs)?; - - // Clear local subrs from unused font dicts. - let fd_array_offset = top_dict.get_offset(Op::FD_ARRAY)?; - let fd_array = Index::parse(cff.get(fd_array_offset ..)?)?; - - // Determine which font dict's subrs to keep. - let mut sub_fds = HashSet::new(); - for &glyph in subsetter.glyphs { - sub_fds.insert(fd_select.get(usize::from(glyph))?); - } - - for (i, data, _) in fd_array.iter() { - if !sub_fds.contains(&(i as u8)) { - let font_dict = Dict::parse(data); - if let Some(private_range) = font_dict.get_range(Op::PRIVATE) { - let private_dict = Dict::parse(cff.get(private_range.clone())?); - if let Some(subrs_offset) = private_dict.get_offset(Op::SUBRS) { - let start = private_range.start + subrs_offset; - let index = Index::parse(cff.get(start ..)?)?; - let end = start + index.data.len(); - memzero(sub_cff.get_mut(start .. end)?); - } - } - } - } - - Some(()) - } - - /// Returns the font dict index for each glyph. - fn parse_fd_select(data: &[u8], num_glyphs: u16) -> Option<Cow<'_, [u8]>> { - let mut s = Stream::new(data); - let format = s.read::<u8>()?; - Some(match format { - 0 => Cow::Borrowed(s.read_bytes(usize::from(num_glyphs))?), - 3 => { - let count = usize::from(s.read::<u16>()?); - let mut fds = vec![]; - let mut start = s.read::<u16>()?; - for _ in 0 .. count { - let fd = s.read::<u8>()?; - let end = s.read::<u16>()?; - for _ in start .. end { - fds.push(fd); - } - start = end; - } - Cow::Owned(fds) - } - _ => Cow::Borrowed(&[]), - }) - } - - struct Index<'a> { - /// The data of the whole index (including its header). - data: &'a [u8], - /// The data ranges for the actual items. - items: Vec<Range<usize>>, - } - - impl<'a> Index<'a> { - fn parse(data: &'a [u8]) -> Option<Self> { - let mut s = Stream::new(data); - - let count = usize::from(s.read::<u16>()?); - - let mut items = Vec::with_capacity(count); - let mut len = 2; - - if count > 0 { - let offsize = usize::from(s.read::<u8>()?); - if !matches!(offsize, 1 ..= 4) { - return None; - } - - // Read an offset and transform it to be relative to the start - // of the index. - let data_offset = 3 + offsize * (count + 1); - let mut read_offset = || { - let mut bytes = [0u8; 4]; - bytes[4 - offsize .. 4].copy_from_slice(s.read_bytes(offsize)?); - Some(data_offset - 1 + u32::from_be_bytes(bytes) as usize) - }; - - let mut last = read_offset()?; - for _ in 0 .. count { - let offset = read_offset()?; - data.get(last .. offset)?; - items.push(last .. offset); - last = offset; - } - - len = last; - } - - Some(Self { data: data.get(.. len)?, items }) - } - - fn parse_stream(s: &'a mut Stream) -> Option<Self> { - let index = Index::parse(s.tail()?)?; - s.advance(index.data.len()); - Some(index) - } - - fn get(&self, idx: usize) -> Option<&'a [u8]> { - self.data.get(self.items.get(idx)?.clone()) - } - - fn iter(&self) -> impl Iterator<Item = (u16, &'a [u8], Range<usize>)> + '_ { - self.items - .iter() - .enumerate() - .map(move |(i, item)| (i as u16, &self.data[item.clone()], item.clone())) - } - } - - struct Dict<'a>(Vec<Pair<'a>>); - - impl<'a> Dict<'a> { - fn parse(data: &'a [u8]) -> Self { - let mut s = Stream::new(data); - Self(iter::from_fn(|| Pair::parse(&mut s)).collect()) - } - - fn get(&self, op: Op) -> Option<&[Operand<'a>]> { - self.0 - .iter() - .find(|pair| pair.op == op) - .map(|pair| pair.operands.as_slice()) - } - - fn get_offset(&self, op: Op) -> Option<usize> { - match self.get(op)? { - &[Operand::Int(offset)] if offset > 0 => usize::try_from(offset).ok(), - _ => None, - } - } - - fn get_range(&self, op: Op) -> Option<Range<usize>> { - match self.get(op)? { - &[Operand::Int(len), Operand::Int(offset)] if offset > 0 => { - let offset = usize::try_from(offset).ok()?; - let len = usize::try_from(len).ok()?; - Some(offset .. offset + len) - } - _ => None, - } - } - } - - #[derive(Debug)] - struct Pair<'a> { - operands: Vec<Operand<'a>>, - op: Op, - } - - impl<'a> Pair<'a> { - fn parse(s: &mut Stream<'a>) -> Option<Self> { - let mut operands = vec![]; - while s.clone().read::<u8>()? > 21 { - operands.push(Operand::parse(s)?); - } - Some(Self { operands, op: Op::parse(s)? }) - } - } - - #[derive(Debug, Eq, PartialEq)] - struct Op(u8, u8); - - impl Op { - const CHAR_STRINGS: Self = Self(17, 0); - const PRIVATE: Self = Self(18, 0); - const SUBRS: Self = Self(19, 0); - const FD_ARRAY: Self = Self(12, 36); - const FD_SELECT: Self = Self(12, 37); - - fn parse(s: &mut Stream) -> Option<Self> { - let b0 = s.read::<u8>()?; - match b0 { - 12 => Some(Self(b0, s.read::<u8>()?)), - 0 ..= 21 => Some(Self(b0, 0)), - _ => None, - } - } - } - - #[derive(Debug)] - enum Operand<'a> { - Int(i32), - Real(&'a [u8]), - } - - impl<'a> Operand<'a> { - fn parse(s: &mut Stream<'a>) -> Option<Self> { - let b0 = i32::from(s.read::<u8>()?); - Some(match b0 { - 30 => { - let mut len = 0; - for &byte in s.tail()? { - len += 1; - if byte & 0x0f == 0x0f { - break; - } - } - Self::Real(s.read_bytes(len)?) - } - 32 ..= 246 => Self::Int(b0 - 139), - 247 ..= 250 => { - let b1 = i32::from(s.read::<u8>()?); - Self::Int((b0 - 247) * 256 + b1 + 108) - } - 251 ..= 254 => { - let b1 = i32::from(s.read::<u8>()?); - Self::Int(-(b0 - 251) * 256 - b1 - 108) - } - 28 => Self::Int(i32::from(s.read::<i16>()?)), - 29 => Self::Int(s.read::<i32>()?), - _ => return None, - }) - } - } -} |
