diff options
| author | Laurenz <laurmaedje@gmail.com> | 2019-06-22 20:41:04 +0200 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2019-06-22 20:41:04 +0200 |
| commit | 7b2a4aa040828d1a0b512a16ac574e65ade9c736 (patch) | |
| tree | 41b61b174f5e5e94372834e5e488c56d041df4e2 /src/font/subset.rs | |
| parent | 864ae9f6040f16c7c95eefb13f99f94e33f8bfb8 (diff) | |
Split font module into submodules 🧱
Diffstat (limited to 'src/font/subset.rs')
| -rw-r--r-- | src/font/subset.rs | 475 |
1 files changed, 475 insertions, 0 deletions
diff --git a/src/font/subset.rs b/src/font/subset.rs new file mode 100644 index 00000000..d64f68ca --- /dev/null +++ b/src/font/subset.rs @@ -0,0 +1,475 @@ +//! Subsetting of opentype fonts. + +use std::collections::HashMap; +use std::io::{self, Cursor, Seek, SeekFrom}; + +use byteorder::{BE, ReadBytesExt, WriteBytesExt}; +use opentype::{OpenTypeReader, Outlines, TableRecord, Tag}; +use opentype::tables::{Header, CharMap, MaximumProfile, HorizontalMetrics}; + +use super::{Font, FontError, FontResult}; + + +/// Subsets a font. +#[derive(Debug)] +pub struct Subsetter<'a> { + // The original font + font: &'a Font, + reader: OpenTypeReader<Cursor<&'a [u8]>>, + outlines: Outlines, + tables: Vec<TableRecord>, + cmap: Option<CharMap>, + hmtx: Option<HorizontalMetrics>, + loca: Option<Vec<u32>>, + glyphs: Vec<u16>, + + // The subsetted font + chars: Vec<char>, + records: Vec<TableRecord>, + body: Vec<u8>, +} + +impl<'a> Subsetter<'a> { + /// Subset a font. See [`Font::subetted`] for more details. + pub fn subset<C, I, S>( + font: &Font, + chars: C, + needed_tables: I, + optional_tables: I, + ) -> Result<Font, FontError> + where + C: IntoIterator<Item=char>, + I: IntoIterator<Item=S>, + S: AsRef<str> + { + // Parse some header information and keep the reading around. + let mut reader = OpenTypeReader::from_slice(&font.program); + let outlines = reader.outlines()?; + let tables = reader.tables()?.to_vec(); + + let chars: Vec<_> = chars.into_iter().collect(); + + let subsetter = Subsetter { + font, + reader, + outlines, + tables, + cmap: None, + hmtx: None, + loca: None, + glyphs: Vec::with_capacity(1 + chars.len()), + chars, + records: vec![], + body: vec![], + }; + + subsetter.run(needed_tables, optional_tables) + } + + fn run<I, S>(mut self, needed_tables: I, optional_tables: I) -> FontResult<Font> + where I: IntoIterator<Item=S>, S: AsRef<str> { + // Find out which glyphs to include based on which characters we want and which glyphs are + // used by other composite glyphs. + self.build_glyphs()?; + + // Iterate through the needed tables first + for table in needed_tables.into_iter() { + let table = table.as_ref(); + let tag: Tag = table.parse() + .map_err(|_| FontError::UnsupportedTable(table.to_string()))?; + + if self.contains_table(tag) { + self.write_table(tag)?; + } else { + return Err(FontError::MissingTable(tag.to_string())); + } + } + + // Now iterate through the optional tables + for table in optional_tables.into_iter() { + let table = table.as_ref(); + let tag: Tag = table.parse() + .map_err(|_| FontError::UnsupportedTable(table.to_string()))?; + + if self.contains_table(tag) { + self.write_table(tag)?; + } + } + + self.write_header()?; + + // Build the new widths. + let widths = self.glyphs.iter() + .map(|&glyph| { + self.font.widths.get(glyph as usize).map(|&w| w) + .take_invalid("missing glyph metrics") + }).collect::<FontResult<Vec<_>>>()?; + + // We add one to the index here because we added the default glyph to the front. + let mapping = self.chars.into_iter().enumerate().map(|(i, c)| (c, 1 + i as u16)) + .collect::<HashMap<char, u16>>(); + + Ok(Font { + name: self.font.name.clone(), + program: self.body, + mapping, + widths, + default_glyph: self.font.default_glyph, + metrics: self.font.metrics, + }) + } + + fn build_glyphs(&mut self) -> FontResult<()> { + self.read_cmap()?; + let cmap = self.cmap.as_ref().unwrap(); + + // The default glyph should be always present, others only if used. + self.glyphs.push(self.font.default_glyph); + for &c in &self.chars { + let glyph = cmap.get(c).ok_or_else(|| FontError::MissingCharacter(c))?; + self.glyphs.push(glyph); + } + + // Composite glyphs may need additional glyphs we do not have in our list yet. So now we + // have a look at the `glyf` table to check that and add glyphs we need additionally. + if self.contains_table("glyf".parse().unwrap()) { + self.read_loca()?; + let loca = self.loca.as_ref().unwrap(); + let table = self.get_table_data("glyf".parse().unwrap())?; + + let mut i = 0; + while i < self.glyphs.len() { + let glyph = self.glyphs[i]; + + let start = *loca.get(glyph as usize).take_bytes()? as usize; + let end = *loca.get(glyph as usize + 1).take_bytes()? as usize; + + let glyph = table.get(start..end).take_bytes()?; + + if end > start { + let mut cursor = Cursor::new(&glyph); + let num_contours = cursor.read_i16::<BE>()?; + + // This is a composite glyph + if num_contours < 0 { + cursor.seek(SeekFrom::Current(8))?; + loop { + let flags = cursor.read_u16::<BE>()?; + let glyph_index = cursor.read_u16::<BE>()?; + + if self.glyphs.iter().rev().find(|&&x| x == glyph_index).is_none() { + self.glyphs.push(glyph_index); + } + + // This was the last component + if flags & 0x0020 == 0 { + break; + } + + let args_len = if flags & 0x0001 == 1 { 4 } else { 2 }; + cursor.seek(SeekFrom::Current(args_len))?; + } + } + } + + i += 1; + } + } + + Ok(()) + } + + fn write_header(&mut self) -> FontResult<()> { + // Create an output buffer + let header_len = 12 + self.records.len() * 16; + let mut header = Vec::with_capacity(header_len); + + let num_tables = self.records.len() as u16; + + // The highester power lower than the table count. + let mut max_power = 1u16; + while max_power * 2 <= num_tables { + max_power *= 2; + } + max_power = std::cmp::min(max_power, num_tables); + + let search_range = max_power * 16; + let entry_selector = (max_power as f32).log2() as u16; + let range_shift = num_tables * 16 - search_range; + + // Write the base header + header.write_u32::<BE>(match self.outlines { + Outlines::TrueType => 0x00010000, + Outlines::CFF => 0x4f54544f, + })?; + header.write_u16::<BE>(num_tables)?; + header.write_u16::<BE>(search_range)?; + header.write_u16::<BE>(entry_selector)?; + header.write_u16::<BE>(range_shift)?; + + // Write the table records + for record in &self.records { + header.extend(record.tag.value()); + header.write_u32::<BE>(record.check_sum)?; + header.write_u32::<BE>(header_len as u32 + record.offset)?; + header.write_u32::<BE>(record.length)?; + } + + header.append(&mut self.body); + self.body = header; + + Ok(()) + } + + fn write_table(&mut self, tag: Tag) -> FontResult<()> { + match tag.value() { + b"head" | b"cvt " | b"prep" | b"fpgm" | b"name" | b"post" | b"OS/2" => { + self.copy_table(tag) + }, + b"hhea" => { + let table = self.get_table_data(tag)?; + let glyph_count = self.glyphs.len() as u16; + self.write_table_body(tag, |this| { + this.body.extend(&table[..table.len() - 2]); + Ok(this.body.write_u16::<BE>(glyph_count)?) + }) + }, + b"maxp" => { + let table = self.get_table_data(tag)?; + let glyph_count = self.glyphs.len() as u16; + self.write_table_body(tag, |this| { + this.body.extend(&table[..4]); + this.body.write_u16::<BE>(glyph_count)?; + Ok(this.body.extend(&table[6..])) + }) + }, + b"hmtx" => { + self.write_table_body(tag, |this| { + this.read_hmtx()?; + let metrics = this.hmtx.as_ref().unwrap(); + + for &glyph in &this.glyphs { + let metrics = metrics.get(glyph).take_invalid("missing glyph metrics")?; + + this.body.write_i16::<BE>(metrics.advance_width)?; + this.body.write_i16::<BE>(metrics.left_side_bearing)?; + } + Ok(()) + }) + }, + b"loca" => { + self.write_table_body(tag, |this| { + this.read_loca()?; + let loca = this.loca.as_ref().unwrap(); + + let mut offset = 0; + for &glyph in &this.glyphs { + this.body.write_u32::<BE>(offset)?; + let len = loca.get(glyph as usize + 1).take_bytes()? + - loca.get(glyph as usize).take_bytes()?; + offset += len; + } + this.body.write_u32::<BE>(offset)?; + Ok(()) + }) + }, + + b"glyf" => { + self.write_table_body(tag, |this| { + this.read_loca()?; + let loca = this.loca.as_ref().unwrap(); + let table = this.get_table_data(tag)?; + + for &glyph in &this.glyphs { + let start = *loca.get(glyph as usize).take_bytes()? as usize; + let end = *loca.get(glyph as usize + 1).take_bytes()? as usize; + + let mut data = table.get(start..end).take_bytes()?.to_vec(); + + if end > start { + let mut cursor = Cursor::new(&mut data); + let num_contours = cursor.read_i16::<BE>()?; + + // This is a composite glyph + if num_contours < 0 { + cursor.seek(SeekFrom::Current(8))?; + loop { + let flags = cursor.read_u16::<BE>()?; + + let glyph_index = cursor.read_u16::<BE>()?; + let new_glyph_index = this.glyphs.iter() + .position(|&g| g == glyph_index) + .take_invalid("referenced non-existent glyph")? as u16; + + cursor.seek(SeekFrom::Current(-2))?; + cursor.write_u16::<BE>(new_glyph_index)?; + + // This was the last component + if flags & 0x0020 == 0 { + break; + } + + + let args_len = if flags & 0x0001 == 1 { 4 } else { 2 }; + cursor.seek(SeekFrom::Current(args_len))?; + } + } + } + + this.body.extend(data); + } + Ok(()) + }) + }, + + b"cmap" => { + // Always uses format 12 for simplicity + self.write_table_body(tag, |this| { + // Find out which chars are in consecutive groups + let mut groups = Vec::new(); + let len = this.chars.len(); + let mut i = 0; + while i < len { + let start = i; + while i + 1 < len && this.chars[i+1] as u32 == this.chars[i] as u32 + 1 { + i += 1; + } + + // Add one to the start because we inserted the default glyph in front. + let glyph = 1 + start; + groups.push((this.chars[start], this.chars[i], glyph)); + i += 1; + } + + // Table header + this.body.write_u16::<BE>(0)?; + this.body.write_u16::<BE>(1)?; + this.body.write_u16::<BE>(3)?; + this.body.write_u16::<BE>(1)?; + this.body.write_u32::<BE>(12)?; + + // Subtable header + this.body.write_u16::<BE>(12)?; + this.body.write_u16::<BE>(0)?; + this.body.write_u32::<BE>((16 + 12 * groups.len()) as u32)?; + this.body.write_u32::<BE>(0)?; + this.body.write_u32::<BE>(groups.len() as u32)?; + + // Subtable body + for group in &groups { + this.body.write_u32::<BE>(group.0 as u32)?; + this.body.write_u32::<BE>(group.1 as u32)?; + this.body.write_u32::<BE>(group.2 as u32)?; + } + + Ok(()) + }) + }, + + _ => Err(FontError::UnsupportedTable(tag.to_string())), + } + } + + fn copy_table(&mut self, tag: Tag) -> FontResult<()> { + self.write_table_body(tag, |this| { + let table = this.get_table_data(tag)?; + Ok(this.body.extend(table)) + }) + } + + fn write_table_body<F>(&mut self, tag: Tag, writer: F) -> FontResult<()> + where F: FnOnce(&mut Self) -> FontResult<()> { + let start = self.body.len(); + writer(self)?; + let end = self.body.len(); + while (self.body.len() - start) % 4 != 0 { + self.body.push(0); + } + + Ok(self.records.push(TableRecord { + tag, + check_sum: calculate_check_sum(&self.body[start..]), + offset: start as u32, + length: (end - start) as u32, + })) + } + + fn get_table_data(&self, tag: Tag) -> FontResult<&'a [u8]> { + let record = match self.tables.binary_search_by_key(&tag, |r| r.tag) { + Ok(index) => &self.tables[index], + Err(_) => return Err(FontError::MissingTable(tag.to_string())), + }; + + self.font.program + .get(record.offset as usize .. (record.offset + record.length) as usize) + .take_bytes() + } + + /// Whether this font contains some table. + fn contains_table(&self, tag: Tag) -> bool { + self.tables.binary_search_by_key(&tag, |r| r.tag).is_ok() + } + + fn read_cmap(&mut self) -> FontResult<()> { + Ok(if self.cmap.is_none() { + self.cmap = Some(self.reader.read_table::<CharMap>()?); + }) + } + + fn read_hmtx(&mut self) -> FontResult<()> { + Ok(if self.hmtx.is_none() { + self.hmtx = Some(self.reader.read_table::<HorizontalMetrics>()?); + }) + } + + fn read_loca(&mut self) -> FontResult<()> { + Ok(if self.loca.is_none() { + let mut table = self.get_table_data("loca".parse().unwrap())?; + let format = self.reader.read_table::<Header>()?.index_to_loc_format; + let count = self.reader.read_table::<MaximumProfile>()?.num_glyphs + 1; + + let loca = if format == 0 { + (0..count).map(|_| table.read_u16::<BE>() + .map(|x| (x as u32) * 2)) + .collect::<io::Result<Vec<u32>>>() + } else { + (0..count).map(|_| table.read_u32::<BE>()) + .collect::<io::Result<Vec<u32>>>() + }?; + + self.loca = Some(loca); + }) + } +} + +/// Calculate a checksum over the sliced data as sum of u32's. The data length has to be a multiple +/// of four. +fn calculate_check_sum(data: &[u8]) -> u32 { + let mut sum = 0u32; + data.chunks_exact(4).for_each(|c| { + sum = sum.wrapping_add( + ((c[0] as u32) << 24) + + ((c[1] as u32) << 16) + + ((c[2] as u32) << 8) + + (c[3] as u32) + ); + }); + sum +} + +/// Helper trait to create subsetting errors more easily. +trait TakeInvalid<T>: Sized { + /// Pull the type out of the option, returning an invalid font error if self was not valid. + fn take_invalid<S: Into<String>>(self, message: S) -> FontResult<T>; + + /// Same as above with predefined message "expected more bytes". + fn take_bytes(self) -> FontResult<T> { + self.take_invalid("expected more bytes") + } +} + +impl<T> TakeInvalid<T> for Option<T> { + fn take_invalid<S: Into<String>>(self, message: S) -> FontResult<T> { + self.ok_or(FontError::InvalidFont(message.into())) + } +} |
