diff options
Diffstat (limited to 'src/font/book.rs')
| -rw-r--r-- | src/font/book.rs | 546 |
1 files changed, 0 insertions, 546 deletions
diff --git a/src/font/book.rs b/src/font/book.rs deleted file mode 100644 index 2b7742bf..00000000 --- a/src/font/book.rs +++ /dev/null @@ -1,546 +0,0 @@ -use std::cmp::Reverse; -use std::collections::BTreeMap; - -use serde::{Deserialize, Serialize}; -use ttf_parser::{name_id, PlatformId, Tag}; -use unicode_segmentation::UnicodeSegmentation; - -use super::{Font, FontStretch, FontStyle, FontVariant, FontWeight}; - -/// Metadata about a collection of fonts. -#[derive(Default, Clone, Hash)] -pub struct FontBook { - /// Maps from lowercased family names to font indices. - families: BTreeMap<String, Vec<usize>>, - /// Metadata about each font in the collection. - infos: Vec<FontInfo>, -} - -impl FontBook { - /// Create a new, empty font book. - pub fn new() -> Self { - Self { families: BTreeMap::new(), infos: vec![] } - } - - /// Create a font book for a collection of fonts. - pub fn from_fonts<'a>(fonts: impl IntoIterator<Item = &'a Font>) -> Self { - let mut book = Self::new(); - for font in fonts { - book.push(font.info().clone()); - } - book - } - - /// Insert metadata into the font book. - pub fn push(&mut self, info: FontInfo) { - let index = self.infos.len(); - let family = info.family.to_lowercase(); - self.families.entry(family).or_default().push(index); - self.infos.push(info); - } - - /// Get the font info for the given index. - pub fn info(&self, index: usize) -> Option<&FontInfo> { - self.infos.get(index) - } - - /// An ordered iterator over all font families this book knows and details - /// about the fonts that are part of them. - pub fn families( - &self, - ) -> impl Iterator<Item = (&str, impl Iterator<Item = &FontInfo>)> + '_ { - // Since the keys are lowercased, we instead use the family field of the - // first face's info. - self.families.values().map(|ids| { - let family = self.infos[ids[0]].family.as_str(); - let infos = ids.iter().map(|&id| &self.infos[id]); - (family, infos) - }) - } - - /// Try to find a font from the given `family` that matches the given - /// `variant` as closely as possible. - /// - /// The `family` should be all lowercase. - pub fn select(&self, family: &str, variant: FontVariant) -> Option<usize> { - let ids = self.families.get(family)?; - self.find_best_variant(None, variant, ids.iter().copied()) - } - - /// Iterate over all variants of a family. - pub fn select_family(&self, family: &str) -> impl Iterator<Item = usize> + '_ { - self.families - .get(family) - .map(|vec| vec.as_slice()) - .unwrap_or_default() - .iter() - .copied() - } - - /// Try to find and load a fallback font that - /// - is as close as possible to the font `like` (if any) - /// - is as close as possible to the given `variant` - /// - is suitable for shaping the given `text` - pub fn select_fallback( - &self, - like: Option<&FontInfo>, - variant: FontVariant, - text: &str, - ) -> Option<usize> { - // Find the fonts that contain the text's first char ... - let c = text.chars().next()?; - let ids = self - .infos - .iter() - .enumerate() - .filter(|(_, info)| info.coverage.contains(c as u32)) - .map(|(index, _)| index); - - // ... and find the best variant among them. - self.find_best_variant(like, variant, ids) - } - - /// Find the font in the passed iterator that - /// - is closest to the font `like` (if any) - /// - is closest to the given `variant` - /// - /// To do that we compute a key for all variants and select the one with the - /// minimal key. This key prioritizes: - /// - If `like` is some other font: - /// - Are both fonts (not) monospaced? - /// - Do both fonts (not) have serifs? - /// - How many words do the families share in their prefix? E.g. "Noto - /// Sans" and "Noto Sans Arabic" share two words, whereas "IBM Plex - /// Arabic" shares none with "Noto Sans", so prefer "Noto Sans Arabic" - /// if `like` is "Noto Sans". In case there are two equally good - /// matches, we prefer the shorter one because it is less special (e.g. - /// if `like` is "Noto Sans Arabic", we prefer "Noto Sans" over "Noto - /// Sans CJK HK".) - /// - The style (normal / italic / oblique). If we want italic or oblique - /// but it doesn't exist, the other one of the two is still better than - /// normal. - /// - The absolute distance to the target stretch. - /// - The absolute distance to the target weight. - fn find_best_variant( - &self, - like: Option<&FontInfo>, - variant: FontVariant, - ids: impl IntoIterator<Item = usize>, - ) -> Option<usize> { - let mut best = None; - let mut best_key = None; - - for id in ids { - let current = &self.infos[id]; - let key = ( - like.map(|like| { - ( - current.flags.contains(FontFlags::MONOSPACE) - != like.flags.contains(FontFlags::MONOSPACE), - current.flags.contains(FontFlags::SERIF) - != like.flags.contains(FontFlags::SERIF), - Reverse(shared_prefix_words(¤t.family, &like.family)), - current.family.len(), - ) - }), - current.variant.style.distance(variant.style), - current.variant.stretch.distance(variant.stretch), - current.variant.weight.distance(variant.weight), - ); - - if best_key.map_or(true, |b| key < b) { - best = Some(id); - best_key = Some(key); - } - } - - best - } -} - -/// Properties of a single font. -#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)] -pub struct FontInfo { - /// The typographic font family this font is part of. - pub family: String, - /// Properties that distinguish this font from other fonts in the same - /// family. - pub variant: FontVariant, - /// Properties of the font. - pub flags: FontFlags, - /// The unicode coverage of the font. - pub coverage: Coverage, -} - -bitflags::bitflags! { - /// Bitflags describing characteristics of a font. - #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] - #[derive(Serialize, Deserialize)] - #[serde(transparent)] - pub struct FontFlags: u32 { - /// All glyphs have the same width. - const MONOSPACE = 1 << 0; - /// Glyphs have short strokes at their stems. - const SERIF = 1 << 1; - } -} - -impl FontInfo { - /// Compute metadata for all fonts in the given data. - pub fn iter(data: &[u8]) -> impl Iterator<Item = FontInfo> + '_ { - let count = ttf_parser::fonts_in_collection(data).unwrap_or(1); - (0..count).filter_map(move |index| { - let ttf = ttf_parser::Face::parse(data, index).ok()?; - Self::from_ttf(&ttf) - }) - } - - /// Compute metadata for a single ttf-parser face. - pub(super) fn from_ttf(ttf: &ttf_parser::Face) -> Option<Self> { - // We cannot use Name ID 16 "Typographic Family", because for some - // fonts it groups together more than just Style / Weight / Stretch - // variants (e.g. Display variants of Noto fonts) and then some - // variants become inaccessible from Typst. And even though the - // fsSelection bit WWS should help us decide whether that is the - // case, it's wrong for some fonts (e.g. for certain variants of "Noto - // Sans Display"). - // - // So, instead we use Name ID 1 "Family" and trim many common - // suffixes for which know that they just describe styling (e.g. - // "ExtraBold"). - // - // Also, for Noto fonts we use Name ID 4 "Full Name" instead, - // because Name ID 1 "Family" sometimes contains "Display" and - // sometimes doesn't for the Display variants and that mixes things - // up. - let family = { - let mut family = find_name(ttf, name_id::FAMILY)?; - if family.starts_with("Noto") - || family.starts_with("NewCM") - || family.starts_with("NewComputerModern") - { - family = find_name(ttf, name_id::FULL_NAME)?; - } - typographic_family(&family).to_string() - }; - - let variant = { - let mut full = find_name(ttf, name_id::FULL_NAME).unwrap_or_default(); - full.make_ascii_lowercase(); - - // Some fonts miss the relevant bits for italic or oblique, so - // we also try to infer that from the full name. - let italic = ttf.is_italic() || full.contains("italic"); - let oblique = - ttf.is_oblique() || full.contains("oblique") || full.contains("slanted"); - - let style = match (italic, oblique) { - (false, false) => FontStyle::Normal, - (true, _) => FontStyle::Italic, - (_, true) => FontStyle::Oblique, - }; - - let weight = { - let mut number = ttf.weight().to_number(); - if (family.starts_with("NewCM") - || family.starts_with("New Computer Modern")) - && full.contains("book") - { - number += 50; - } - FontWeight::from_number(number) - }; - - let stretch = FontStretch::from_number(ttf.width().to_number()); - FontVariant { style, weight, stretch } - }; - - // Determine the unicode coverage. - let mut codepoints = vec![]; - for subtable in ttf.tables().cmap.into_iter().flat_map(|table| table.subtables) { - if subtable.is_unicode() { - subtable.codepoints(|c| codepoints.push(c)); - } - } - - let mut flags = FontFlags::empty(); - flags.set(FontFlags::MONOSPACE, ttf.is_monospaced()); - - // Determine whether this is a serif or sans-serif font. - if let Some(panose) = ttf - .raw_face() - .table(Tag::from_bytes(b"OS/2")) - .and_then(|os2| os2.get(32..45)) - { - if matches!(panose, [2, 2..=10, ..]) { - flags.insert(FontFlags::SERIF); - } - } - - Some(FontInfo { - family, - variant, - flags, - coverage: Coverage::from_vec(codepoints), - }) - } -} - -/// Try to find and decode the name with the given id. -pub(super) fn find_name(ttf: &ttf_parser::Face, name_id: u16) -> Option<String> { - ttf.names().into_iter().find_map(|entry| { - if entry.name_id == name_id { - if let Some(string) = entry.to_string() { - return Some(string); - } - - if entry.platform_id == PlatformId::Macintosh && entry.encoding_id == 0 { - return Some(decode_mac_roman(entry.name)); - } - } - - None - }) -} - -/// Decode mac roman encoded bytes into a string. -fn decode_mac_roman(coded: &[u8]) -> String { - #[rustfmt::skip] - const TABLE: [char; 128] = [ - 'Ä', 'Å', 'Ç', 'É', 'Ñ', 'Ö', 'Ü', 'á', 'à', 'â', 'ä', 'ã', 'å', 'ç', 'é', 'è', - 'ê', 'ë', 'í', 'ì', 'î', 'ï', 'ñ', 'ó', 'ò', 'ô', 'ö', 'õ', 'ú', 'ù', 'û', 'ü', - '†', '°', '¢', '£', '§', '•', '¶', 'ß', '®', '©', '™', '´', '¨', '≠', 'Æ', 'Ø', - '∞', '±', '≤', '≥', '¥', 'µ', '∂', '∑', '∏', 'π', '∫', 'ª', 'º', 'Ω', 'æ', 'ø', - '¿', '¡', '¬', '√', 'ƒ', '≈', '∆', '«', '»', '…', '\u{a0}', 'À', 'Ã', 'Õ', 'Œ', 'œ', - '–', '—', '“', '”', '‘', '’', '÷', '◊', 'ÿ', 'Ÿ', '⁄', '€', '‹', '›', 'fi', 'fl', - '‡', '·', '‚', '„', '‰', 'Â', 'Ê', 'Á', 'Ë', 'È', 'Í', 'Î', 'Ï', 'Ì', 'Ó', 'Ô', - '\u{f8ff}', 'Ò', 'Ú', 'Û', 'Ù', 'ı', 'ˆ', '˜', '¯', '˘', '˙', '˚', '¸', '˝', '˛', 'ˇ', - ]; - - fn char_from_mac_roman(code: u8) -> char { - if code < 128 { - code as char - } else { - TABLE[(code - 128) as usize] - } - } - - coded.iter().copied().map(char_from_mac_roman).collect() -} - -/// Trim style naming from a family name and fix bad names. -fn typographic_family(mut family: &str) -> &str { - // Separators between names, modifiers and styles. - const SEPARATORS: [char; 3] = [' ', '-', '_']; - - // Modifiers that can appear in combination with suffixes. - const MODIFIERS: &[&str] = - &["extra", "ext", "ex", "x", "semi", "sem", "sm", "demi", "dem", "ultra"]; - - // Style suffixes. - #[rustfmt::skip] - const SUFFIXES: &[&str] = &[ - "normal", "italic", "oblique", "slanted", - "thin", "th", "hairline", "light", "lt", "regular", "medium", "med", - "md", "bold", "bd", "demi", "extb", "black", "blk", "bk", "heavy", - "narrow", "condensed", "cond", "cn", "cd", "compressed", "expanded", "exp" - ]; - - let mut extra = [].as_slice(); - let newcm = family.starts_with("NewCM") || family.starts_with("NewComputerModern"); - if newcm { - extra = &["book"]; - } - - // Trim spacing and weird leading dots in Apple fonts. - family = family.trim().trim_start_matches('.'); - - // Lowercase the string so that the suffixes match case-insensitively. - let lower = family.to_ascii_lowercase(); - let mut len = usize::MAX; - let mut trimmed = lower.as_str(); - - // Trim style suffixes repeatedly. - while trimmed.len() < len { - len = trimmed.len(); - - // Find style suffix. - let mut t = trimmed; - let mut shortened = false; - while let Some(s) = SUFFIXES.iter().chain(extra).find_map(|s| t.strip_suffix(s)) { - shortened = true; - t = s; - } - - if !shortened { - break; - } - - // Strip optional separator. - if let Some(s) = t.strip_suffix(SEPARATORS) { - trimmed = s; - t = s; - } - - // Also allow an extra modifier, but apply it only if it is separated it - // from the text before it (to prevent false positives). - if let Some(t) = MODIFIERS.iter().find_map(|s| t.strip_suffix(s)) { - if let Some(stripped) = t.strip_suffix(SEPARATORS) { - trimmed = stripped; - } - } - } - - // Apply style suffix trimming. - family = &family[..len]; - - if newcm { - family = family.trim_end_matches("10"); - } - - // Fix bad names. - match family { - "Noto Sans Symbols2" => "Noto Sans Symbols 2", - "NewComputerModern" => "New Computer Modern", - "NewComputerModernMono" => "New Computer Modern Mono", - "NewComputerModernSans" => "New Computer Modern Sans", - "NewComputerModernMath" => "New Computer Modern Math", - "NewCMUncial" | "NewComputerModernUncial" => "New Computer Modern Uncial", - other => other, - } -} - -/// How many words the two strings share in their prefix. -fn shared_prefix_words(left: &str, right: &str) -> usize { - left.unicode_words() - .zip(right.unicode_words()) - .take_while(|(l, r)| l == r) - .count() -} - -/// A compactly encoded set of codepoints. -/// -/// The set is represented by alternating specifications of how many codepoints -/// are not in the set and how many are in the set. -/// -/// For example, for the set `{2, 3, 4, 9, 10, 11, 15, 18, 19}`, there are: -/// - 2 codepoints not inside (0, 1) -/// - 3 codepoints inside (2, 3, 4) -/// - 4 codepoints not inside (5, 6, 7, 8) -/// - 3 codepoints inside (9, 10, 11) -/// - 3 codepoints not inside (12, 13, 14) -/// - 1 codepoint inside (15) -/// - 2 codepoints not inside (16, 17) -/// - 2 codepoints inside (18, 19) -/// -/// So the resulting encoding is `[2, 3, 4, 3, 3, 1, 2, 2]`. -#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)] -#[serde(transparent)] -pub struct Coverage(Vec<u32>); - -impl Coverage { - /// Encode a vector of codepoints. - pub fn from_vec(mut codepoints: Vec<u32>) -> Self { - codepoints.sort(); - codepoints.dedup(); - - let mut runs = Vec::new(); - let mut next = 0; - - for c in codepoints { - if let Some(run) = runs.last_mut().filter(|_| c == next) { - *run += 1; - } else { - runs.push(c - next); - runs.push(1); - } - - next = c + 1; - } - - Self(runs) - } - - /// Whether the codepoint is covered. - pub fn contains(&self, c: u32) -> bool { - let mut inside = false; - let mut cursor = 0; - - for &run in &self.0 { - if (cursor..cursor + run).contains(&c) { - return inside; - } - cursor += run; - inside = !inside; - } - - false - } - - /// Iterate over all covered codepoints. - pub fn iter(&self) -> impl Iterator<Item = u32> + '_ { - let mut inside = false; - let mut cursor = 0; - self.0.iter().flat_map(move |run| { - let range = if inside { cursor..cursor + run } else { 0..0 }; - inside = !inside; - cursor += run; - range - }) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_trim_styles() { - assert_eq!(typographic_family("Atma Light"), "Atma"); - assert_eq!(typographic_family("eras bold"), "eras"); - assert_eq!(typographic_family("footlight mt light"), "footlight mt"); - assert_eq!(typographic_family("times new roman"), "times new roman"); - assert_eq!(typographic_family("noto sans mono cond sembd"), "noto sans mono"); - assert_eq!(typographic_family("noto serif SEMCOND sembd"), "noto serif"); - assert_eq!(typographic_family("crimson text"), "crimson text"); - assert_eq!(typographic_family("footlight light"), "footlight"); - assert_eq!(typographic_family("Noto Sans"), "Noto Sans"); - assert_eq!(typographic_family("Noto Sans Light"), "Noto Sans"); - assert_eq!(typographic_family("Noto Sans Semicondensed Heavy"), "Noto Sans"); - assert_eq!(typographic_family("Familx"), "Familx"); - assert_eq!(typographic_family("Font Ultra"), "Font Ultra"); - assert_eq!(typographic_family("Font Ultra Bold"), "Font"); - } - - #[test] - fn test_coverage() { - #[track_caller] - fn test(set: &[u32], runs: &[u32]) { - let coverage = Coverage::from_vec(set.to_vec()); - assert_eq!(coverage.0, runs); - - let max = 5 + set.iter().copied().max().unwrap_or_default(); - for c in 0..max { - assert_eq!(set.contains(&c), coverage.contains(c)); - } - } - - test(&[], &[]); - test(&[0], &[0, 1]); - test(&[1], &[1, 1]); - test(&[0, 1], &[0, 2]); - test(&[0, 1, 3], &[0, 2, 1, 1]); - test( - // {2, 3, 4, 9, 10, 11, 15, 18, 19} - &[18, 19, 2, 4, 9, 11, 15, 3, 3, 10], - &[2, 3, 4, 3, 3, 1, 2, 2], - ) - } - - #[test] - fn test_coverage_iter() { - let codepoints = vec![2, 3, 7, 8, 9, 14, 15, 19, 21]; - let coverage = Coverage::from_vec(codepoints.clone()); - assert_eq!(coverage.iter().collect::<Vec<_>>(), codepoints); - } -} |
