summaryrefslogtreecommitdiff
path: root/src/font/subset.rs
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2019-06-22 20:41:04 +0200
committerLaurenz <laurmaedje@gmail.com>2019-06-22 20:41:04 +0200
commit7b2a4aa040828d1a0b512a16ac574e65ade9c736 (patch)
tree41b61b174f5e5e94372834e5e488c56d041df4e2 /src/font/subset.rs
parent864ae9f6040f16c7c95eefb13f99f94e33f8bfb8 (diff)
Split font module into submodules 🧱
Diffstat (limited to 'src/font/subset.rs')
-rw-r--r--src/font/subset.rs475
1 files changed, 475 insertions, 0 deletions
diff --git a/src/font/subset.rs b/src/font/subset.rs
new file mode 100644
index 00000000..d64f68ca
--- /dev/null
+++ b/src/font/subset.rs
@@ -0,0 +1,475 @@
+//! Subsetting of opentype fonts.
+
+use std::collections::HashMap;
+use std::io::{self, Cursor, Seek, SeekFrom};
+
+use byteorder::{BE, ReadBytesExt, WriteBytesExt};
+use opentype::{OpenTypeReader, Outlines, TableRecord, Tag};
+use opentype::tables::{Header, CharMap, MaximumProfile, HorizontalMetrics};
+
+use super::{Font, FontError, FontResult};
+
+
+/// Subsets a font.
+#[derive(Debug)]
+pub struct Subsetter<'a> {
+ // The original font
+ font: &'a Font,
+ reader: OpenTypeReader<Cursor<&'a [u8]>>,
+ outlines: Outlines,
+ tables: Vec<TableRecord>,
+ cmap: Option<CharMap>,
+ hmtx: Option<HorizontalMetrics>,
+ loca: Option<Vec<u32>>,
+ glyphs: Vec<u16>,
+
+ // The subsetted font
+ chars: Vec<char>,
+ records: Vec<TableRecord>,
+ body: Vec<u8>,
+}
+
+impl<'a> Subsetter<'a> {
+ /// Subset a font. See [`Font::subetted`] for more details.
+ pub fn subset<C, I, S>(
+ font: &Font,
+ chars: C,
+ needed_tables: I,
+ optional_tables: I,
+ ) -> Result<Font, FontError>
+ where
+ C: IntoIterator<Item=char>,
+ I: IntoIterator<Item=S>,
+ S: AsRef<str>
+ {
+ // Parse some header information and keep the reading around.
+ let mut reader = OpenTypeReader::from_slice(&font.program);
+ let outlines = reader.outlines()?;
+ let tables = reader.tables()?.to_vec();
+
+ let chars: Vec<_> = chars.into_iter().collect();
+
+ let subsetter = Subsetter {
+ font,
+ reader,
+ outlines,
+ tables,
+ cmap: None,
+ hmtx: None,
+ loca: None,
+ glyphs: Vec::with_capacity(1 + chars.len()),
+ chars,
+ records: vec![],
+ body: vec![],
+ };
+
+ subsetter.run(needed_tables, optional_tables)
+ }
+
+ fn run<I, S>(mut self, needed_tables: I, optional_tables: I) -> FontResult<Font>
+ where I: IntoIterator<Item=S>, S: AsRef<str> {
+ // Find out which glyphs to include based on which characters we want and which glyphs are
+ // used by other composite glyphs.
+ self.build_glyphs()?;
+
+ // Iterate through the needed tables first
+ for table in needed_tables.into_iter() {
+ let table = table.as_ref();
+ let tag: Tag = table.parse()
+ .map_err(|_| FontError::UnsupportedTable(table.to_string()))?;
+
+ if self.contains_table(tag) {
+ self.write_table(tag)?;
+ } else {
+ return Err(FontError::MissingTable(tag.to_string()));
+ }
+ }
+
+ // Now iterate through the optional tables
+ for table in optional_tables.into_iter() {
+ let table = table.as_ref();
+ let tag: Tag = table.parse()
+ .map_err(|_| FontError::UnsupportedTable(table.to_string()))?;
+
+ if self.contains_table(tag) {
+ self.write_table(tag)?;
+ }
+ }
+
+ self.write_header()?;
+
+ // Build the new widths.
+ let widths = self.glyphs.iter()
+ .map(|&glyph| {
+ self.font.widths.get(glyph as usize).map(|&w| w)
+ .take_invalid("missing glyph metrics")
+ }).collect::<FontResult<Vec<_>>>()?;
+
+ // We add one to the index here because we added the default glyph to the front.
+ let mapping = self.chars.into_iter().enumerate().map(|(i, c)| (c, 1 + i as u16))
+ .collect::<HashMap<char, u16>>();
+
+ Ok(Font {
+ name: self.font.name.clone(),
+ program: self.body,
+ mapping,
+ widths,
+ default_glyph: self.font.default_glyph,
+ metrics: self.font.metrics,
+ })
+ }
+
+ fn build_glyphs(&mut self) -> FontResult<()> {
+ self.read_cmap()?;
+ let cmap = self.cmap.as_ref().unwrap();
+
+ // The default glyph should be always present, others only if used.
+ self.glyphs.push(self.font.default_glyph);
+ for &c in &self.chars {
+ let glyph = cmap.get(c).ok_or_else(|| FontError::MissingCharacter(c))?;
+ self.glyphs.push(glyph);
+ }
+
+ // Composite glyphs may need additional glyphs we do not have in our list yet. So now we
+ // have a look at the `glyf` table to check that and add glyphs we need additionally.
+ if self.contains_table("glyf".parse().unwrap()) {
+ self.read_loca()?;
+ let loca = self.loca.as_ref().unwrap();
+ let table = self.get_table_data("glyf".parse().unwrap())?;
+
+ let mut i = 0;
+ while i < self.glyphs.len() {
+ let glyph = self.glyphs[i];
+
+ let start = *loca.get(glyph as usize).take_bytes()? as usize;
+ let end = *loca.get(glyph as usize + 1).take_bytes()? as usize;
+
+ let glyph = table.get(start..end).take_bytes()?;
+
+ if end > start {
+ let mut cursor = Cursor::new(&glyph);
+ let num_contours = cursor.read_i16::<BE>()?;
+
+ // This is a composite glyph
+ if num_contours < 0 {
+ cursor.seek(SeekFrom::Current(8))?;
+ loop {
+ let flags = cursor.read_u16::<BE>()?;
+ let glyph_index = cursor.read_u16::<BE>()?;
+
+ if self.glyphs.iter().rev().find(|&&x| x == glyph_index).is_none() {
+ self.glyphs.push(glyph_index);
+ }
+
+ // This was the last component
+ if flags & 0x0020 == 0 {
+ break;
+ }
+
+ let args_len = if flags & 0x0001 == 1 { 4 } else { 2 };
+ cursor.seek(SeekFrom::Current(args_len))?;
+ }
+ }
+ }
+
+ i += 1;
+ }
+ }
+
+ Ok(())
+ }
+
+ fn write_header(&mut self) -> FontResult<()> {
+ // Create an output buffer
+ let header_len = 12 + self.records.len() * 16;
+ let mut header = Vec::with_capacity(header_len);
+
+ let num_tables = self.records.len() as u16;
+
+ // The highester power lower than the table count.
+ let mut max_power = 1u16;
+ while max_power * 2 <= num_tables {
+ max_power *= 2;
+ }
+ max_power = std::cmp::min(max_power, num_tables);
+
+ let search_range = max_power * 16;
+ let entry_selector = (max_power as f32).log2() as u16;
+ let range_shift = num_tables * 16 - search_range;
+
+ // Write the base header
+ header.write_u32::<BE>(match self.outlines {
+ Outlines::TrueType => 0x00010000,
+ Outlines::CFF => 0x4f54544f,
+ })?;
+ header.write_u16::<BE>(num_tables)?;
+ header.write_u16::<BE>(search_range)?;
+ header.write_u16::<BE>(entry_selector)?;
+ header.write_u16::<BE>(range_shift)?;
+
+ // Write the table records
+ for record in &self.records {
+ header.extend(record.tag.value());
+ header.write_u32::<BE>(record.check_sum)?;
+ header.write_u32::<BE>(header_len as u32 + record.offset)?;
+ header.write_u32::<BE>(record.length)?;
+ }
+
+ header.append(&mut self.body);
+ self.body = header;
+
+ Ok(())
+ }
+
+ fn write_table(&mut self, tag: Tag) -> FontResult<()> {
+ match tag.value() {
+ b"head" | b"cvt " | b"prep" | b"fpgm" | b"name" | b"post" | b"OS/2" => {
+ self.copy_table(tag)
+ },
+ b"hhea" => {
+ let table = self.get_table_data(tag)?;
+ let glyph_count = self.glyphs.len() as u16;
+ self.write_table_body(tag, |this| {
+ this.body.extend(&table[..table.len() - 2]);
+ Ok(this.body.write_u16::<BE>(glyph_count)?)
+ })
+ },
+ b"maxp" => {
+ let table = self.get_table_data(tag)?;
+ let glyph_count = self.glyphs.len() as u16;
+ self.write_table_body(tag, |this| {
+ this.body.extend(&table[..4]);
+ this.body.write_u16::<BE>(glyph_count)?;
+ Ok(this.body.extend(&table[6..]))
+ })
+ },
+ b"hmtx" => {
+ self.write_table_body(tag, |this| {
+ this.read_hmtx()?;
+ let metrics = this.hmtx.as_ref().unwrap();
+
+ for &glyph in &this.glyphs {
+ let metrics = metrics.get(glyph).take_invalid("missing glyph metrics")?;
+
+ this.body.write_i16::<BE>(metrics.advance_width)?;
+ this.body.write_i16::<BE>(metrics.left_side_bearing)?;
+ }
+ Ok(())
+ })
+ },
+ b"loca" => {
+ self.write_table_body(tag, |this| {
+ this.read_loca()?;
+ let loca = this.loca.as_ref().unwrap();
+
+ let mut offset = 0;
+ for &glyph in &this.glyphs {
+ this.body.write_u32::<BE>(offset)?;
+ let len = loca.get(glyph as usize + 1).take_bytes()?
+ - loca.get(glyph as usize).take_bytes()?;
+ offset += len;
+ }
+ this.body.write_u32::<BE>(offset)?;
+ Ok(())
+ })
+ },
+
+ b"glyf" => {
+ self.write_table_body(tag, |this| {
+ this.read_loca()?;
+ let loca = this.loca.as_ref().unwrap();
+ let table = this.get_table_data(tag)?;
+
+ for &glyph in &this.glyphs {
+ let start = *loca.get(glyph as usize).take_bytes()? as usize;
+ let end = *loca.get(glyph as usize + 1).take_bytes()? as usize;
+
+ let mut data = table.get(start..end).take_bytes()?.to_vec();
+
+ if end > start {
+ let mut cursor = Cursor::new(&mut data);
+ let num_contours = cursor.read_i16::<BE>()?;
+
+ // This is a composite glyph
+ if num_contours < 0 {
+ cursor.seek(SeekFrom::Current(8))?;
+ loop {
+ let flags = cursor.read_u16::<BE>()?;
+
+ let glyph_index = cursor.read_u16::<BE>()?;
+ let new_glyph_index = this.glyphs.iter()
+ .position(|&g| g == glyph_index)
+ .take_invalid("referenced non-existent glyph")? as u16;
+
+ cursor.seek(SeekFrom::Current(-2))?;
+ cursor.write_u16::<BE>(new_glyph_index)?;
+
+ // This was the last component
+ if flags & 0x0020 == 0 {
+ break;
+ }
+
+
+ let args_len = if flags & 0x0001 == 1 { 4 } else { 2 };
+ cursor.seek(SeekFrom::Current(args_len))?;
+ }
+ }
+ }
+
+ this.body.extend(data);
+ }
+ Ok(())
+ })
+ },
+
+ b"cmap" => {
+ // Always uses format 12 for simplicity
+ self.write_table_body(tag, |this| {
+ // Find out which chars are in consecutive groups
+ let mut groups = Vec::new();
+ let len = this.chars.len();
+ let mut i = 0;
+ while i < len {
+ let start = i;
+ while i + 1 < len && this.chars[i+1] as u32 == this.chars[i] as u32 + 1 {
+ i += 1;
+ }
+
+ // Add one to the start because we inserted the default glyph in front.
+ let glyph = 1 + start;
+ groups.push((this.chars[start], this.chars[i], glyph));
+ i += 1;
+ }
+
+ // Table header
+ this.body.write_u16::<BE>(0)?;
+ this.body.write_u16::<BE>(1)?;
+ this.body.write_u16::<BE>(3)?;
+ this.body.write_u16::<BE>(1)?;
+ this.body.write_u32::<BE>(12)?;
+
+ // Subtable header
+ this.body.write_u16::<BE>(12)?;
+ this.body.write_u16::<BE>(0)?;
+ this.body.write_u32::<BE>((16 + 12 * groups.len()) as u32)?;
+ this.body.write_u32::<BE>(0)?;
+ this.body.write_u32::<BE>(groups.len() as u32)?;
+
+ // Subtable body
+ for group in &groups {
+ this.body.write_u32::<BE>(group.0 as u32)?;
+ this.body.write_u32::<BE>(group.1 as u32)?;
+ this.body.write_u32::<BE>(group.2 as u32)?;
+ }
+
+ Ok(())
+ })
+ },
+
+ _ => Err(FontError::UnsupportedTable(tag.to_string())),
+ }
+ }
+
+ fn copy_table(&mut self, tag: Tag) -> FontResult<()> {
+ self.write_table_body(tag, |this| {
+ let table = this.get_table_data(tag)?;
+ Ok(this.body.extend(table))
+ })
+ }
+
+ fn write_table_body<F>(&mut self, tag: Tag, writer: F) -> FontResult<()>
+ where F: FnOnce(&mut Self) -> FontResult<()> {
+ let start = self.body.len();
+ writer(self)?;
+ let end = self.body.len();
+ while (self.body.len() - start) % 4 != 0 {
+ self.body.push(0);
+ }
+
+ Ok(self.records.push(TableRecord {
+ tag,
+ check_sum: calculate_check_sum(&self.body[start..]),
+ offset: start as u32,
+ length: (end - start) as u32,
+ }))
+ }
+
+ fn get_table_data(&self, tag: Tag) -> FontResult<&'a [u8]> {
+ let record = match self.tables.binary_search_by_key(&tag, |r| r.tag) {
+ Ok(index) => &self.tables[index],
+ Err(_) => return Err(FontError::MissingTable(tag.to_string())),
+ };
+
+ self.font.program
+ .get(record.offset as usize .. (record.offset + record.length) as usize)
+ .take_bytes()
+ }
+
+ /// Whether this font contains some table.
+ fn contains_table(&self, tag: Tag) -> bool {
+ self.tables.binary_search_by_key(&tag, |r| r.tag).is_ok()
+ }
+
+ fn read_cmap(&mut self) -> FontResult<()> {
+ Ok(if self.cmap.is_none() {
+ self.cmap = Some(self.reader.read_table::<CharMap>()?);
+ })
+ }
+
+ fn read_hmtx(&mut self) -> FontResult<()> {
+ Ok(if self.hmtx.is_none() {
+ self.hmtx = Some(self.reader.read_table::<HorizontalMetrics>()?);
+ })
+ }
+
+ fn read_loca(&mut self) -> FontResult<()> {
+ Ok(if self.loca.is_none() {
+ let mut table = self.get_table_data("loca".parse().unwrap())?;
+ let format = self.reader.read_table::<Header>()?.index_to_loc_format;
+ let count = self.reader.read_table::<MaximumProfile>()?.num_glyphs + 1;
+
+ let loca = if format == 0 {
+ (0..count).map(|_| table.read_u16::<BE>()
+ .map(|x| (x as u32) * 2))
+ .collect::<io::Result<Vec<u32>>>()
+ } else {
+ (0..count).map(|_| table.read_u32::<BE>())
+ .collect::<io::Result<Vec<u32>>>()
+ }?;
+
+ self.loca = Some(loca);
+ })
+ }
+}
+
+/// Calculate a checksum over the sliced data as sum of u32's. The data length has to be a multiple
+/// of four.
+fn calculate_check_sum(data: &[u8]) -> u32 {
+ let mut sum = 0u32;
+ data.chunks_exact(4).for_each(|c| {
+ sum = sum.wrapping_add(
+ ((c[0] as u32) << 24)
+ + ((c[1] as u32) << 16)
+ + ((c[2] as u32) << 8)
+ + (c[3] as u32)
+ );
+ });
+ sum
+}
+
+/// Helper trait to create subsetting errors more easily.
+trait TakeInvalid<T>: Sized {
+ /// Pull the type out of the option, returning an invalid font error if self was not valid.
+ fn take_invalid<S: Into<String>>(self, message: S) -> FontResult<T>;
+
+ /// Same as above with predefined message "expected more bytes".
+ fn take_bytes(self) -> FontResult<T> {
+ self.take_invalid("expected more bytes")
+ }
+}
+
+impl<T> TakeInvalid<T> for Option<T> {
+ fn take_invalid<S: Into<String>>(self, message: S) -> FontResult<T> {
+ self.ok_or(FontError::InvalidFont(message.into()))
+ }
+}