diff options
| author | Peng Guanwen <pg999w@outlook.com> | 2024-12-17 22:07:45 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-12-17 14:07:45 +0000 |
| commit | 73253d465192454f0dfe3fe9eef46d495b343aef (patch) | |
| tree | 4d8179d7b0d7c169e283471d7df928d264192ca0 /crates/typst-library/src/text | |
| parent | 54cee16c3128695089d7472451f02646c6d81521 (diff) | |
Support for defining which charset should be covered by a font (#5305)
Co-authored-by: Laurenz <laurmaedje@gmail.com>
Diffstat (limited to 'crates/typst-library/src/text')
| -rw-r--r-- | crates/typst-library/src/text/mod.rs | 159 | ||||
| -rw-r--r-- | crates/typst-library/src/text/shift.rs | 6 |
2 files changed, 137 insertions, 28 deletions
diff --git a/crates/typst-library/src/text/mod.rs b/crates/typst-library/src/text/mod.rs index 91927b57..ee81e3f2 100644 --- a/crates/typst-library/src/text/mod.rs +++ b/crates/typst-library/src/text/mod.rs @@ -29,6 +29,7 @@ pub use self::smartquote::*; pub use self::space::*; use std::fmt::{self, Debug, Formatter}; +use std::hash::Hash; use std::sync::LazyLock; use ecow::{eco_format, EcoString}; @@ -39,13 +40,14 @@ use rustybuzz::Feature; use smallvec::SmallVec; use ttf_parser::Tag; use typst_syntax::Spanned; +use typst_utils::singleton; use crate::diag::{bail, warning, HintedStrResult, SourceResult}; use crate::engine::Engine; use crate::foundations::{ cast, category, dict, elem, Args, Array, Cast, Category, Construct, Content, Dict, - Fold, IntoValue, NativeElement, Never, NoneValue, Packed, PlainText, Repr, Resolve, - Scope, Set, Smart, StyleChain, + Fold, IntoValue, NativeElement, Never, NoneValue, Packed, PlainText, Regex, Repr, + Resolve, Scope, Set, Smart, StyleChain, }; use crate::layout::{Abs, Axis, Dir, Em, Length, Ratio, Rel}; use crate::model::ParElem; @@ -94,7 +96,21 @@ pub(super) fn define(global: &mut Scope) { /// ``` #[elem(Debug, Construct, PlainText, Repr)] pub struct TextElem { - /// A font family name or priority list of font family names. + /// A font family descriptor or priority list of font family descriptor. + /// + /// A font family descriptor can be a plain string representing the family + /// name or a dictionary with the following keys: + /// + /// - `name` (required): The font family name. + /// - `covers` (optional): Defines the Unicode codepoints for which the + /// family shall be used. This can be: + /// - A predefined coverage set: + /// - `{"latin-in-cjk"}` covers all codepoints except for those which + /// exist in Latin fonts, but should preferrably be taken from CJK + /// fonts. + /// - A [regular expression]($regex) that defines exactly which codepoints + /// shall be covered. Accepts only the subset of regular expressions + /// which consist of exactly one dot, letter, or character class. /// /// When processing text, Typst tries all specified font families in order /// until it finds a font that has the necessary glyphs. In the example @@ -129,6 +145,21 @@ pub struct TextElem { /// /// This is Latin. \ /// هذا عربي. + /// + /// // Change font only for numbers. + /// #set text(font: ( + /// (name: "PT Sans", covers: regex("[0-9]")), + /// "Libertinus Serif" + /// )) + /// + /// The number 123. + /// + /// // Mix Latin and CJK fonts. + /// #set text(font: ( + /// (name: "Inria Serif", covers: "latin-in-cjk"), + /// "Noto Serif CJK SC" + /// )) + /// 分别设置“中文”和English字体 /// ``` #[parse({ let font_list: Option<Spanned<FontList>> = args.named("font")?; @@ -766,35 +797,107 @@ impl PlainText for Packed<TextElem> { } /// A lowercased font family like "arial". -#[derive(Clone, Eq, PartialEq, Hash)] -pub struct FontFamily(EcoString); +#[derive(Debug, Clone, PartialEq, Hash)] +pub struct FontFamily { + // The name of the font family + name: EcoString, + // A regex that defines the Unicode codepoints supported by the font. + covers: Option<Covers>, +} impl FontFamily { /// Create a named font family variant. pub fn new(string: &str) -> Self { - Self(string.to_lowercase().into()) + Self::with_coverage(string, None) + } + + /// Create a font family by name and optional Unicode coverage. + pub fn with_coverage(string: &str, covers: Option<Covers>) -> Self { + Self { name: string.to_lowercase().into(), covers } } /// The lowercased family name. pub fn as_str(&self) -> &str { - &self.0 + &self.name } -} -impl Debug for FontFamily { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - self.0.fmt(f) + /// The user-set coverage of the font family. + pub fn covers(&self) -> Option<&Regex> { + self.covers.as_ref().map(|covers| covers.as_regex()) } } cast! { FontFamily, - self => self.0.into_value(), + self => self.name.into_value(), string: EcoString => Self::new(&string), + mut v: Dict => { + let ret = Self::with_coverage( + &v.take("name")?.cast::<EcoString>()?, + v.take("covers").ok().map(|v| v.cast()).transpose()? + ); + v.finish(&["name", "covers"])?; + ret + }, +} + +/// Defines which codepoints a font family will be used for. +#[derive(Debug, Clone, PartialEq, Hash)] +pub enum Covers { + /// Covers all codepoints except those used both in Latin and CJK fonts. + LatinInCjk, + /// Covers the set of codepoints for which the regex matches. + Regex(Regex), +} + +impl Covers { + /// Retrieve the regex for the coverage. + pub fn as_regex(&self) -> &Regex { + match self { + Self::LatinInCjk => singleton!( + Regex, + Regex::new( + "[^\u{00B7}\u{2013}\u{2014}\u{2018}\u{2019}\ + \u{201C}\u{201D}\u{2025}-\u{2027}\u{2E3A}]" + ) + .unwrap() + ), + Self::Regex(regex) => regex, + } + } +} + +cast! { + Covers, + self => match self { + Self::LatinInCjk => "latin-in-cjk".into_value(), + Self::Regex(regex) => regex.into_value(), + }, + + /// Covers all codepoints except those used both in Latin and CJK fonts. + "latin-in-cjk" => Covers::LatinInCjk, + + regex: Regex => { + let ast = regex_syntax::ast::parse::Parser::new().parse(regex.as_str()); + match ast { + Ok( + regex_syntax::ast::Ast::ClassBracketed(..) + | regex_syntax::ast::Ast::ClassUnicode(..) + | regex_syntax::ast::Ast::ClassPerl(..) + | regex_syntax::ast::Ast::Dot(..) + | regex_syntax::ast::Ast::Literal(..), + ) => {} + _ => bail!( + "coverage regex may only use dot, letters, and character classes"; + hint: "the regex is applied to each letter individually" + ), + } + Covers::Regex(regex) + }, } /// Font family fallback list. -#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)] +#[derive(Debug, Default, Clone, PartialEq, Hash)] pub struct FontList(pub Vec<FontFamily>); impl<'a> IntoIterator for &'a FontList { @@ -809,7 +912,7 @@ impl<'a> IntoIterator for &'a FontList { cast! { FontList, self => if self.0.len() == 1 { - self.0.into_iter().next().unwrap().0.into_value() + self.0.into_iter().next().unwrap().name.into_value() } else { self.0.into_value() }, @@ -818,20 +921,22 @@ cast! { } /// Resolve a prioritized iterator over the font families. -pub fn families(styles: StyleChain) -> impl Iterator<Item = &str> + Clone { - const FALLBACKS: &[&str] = &[ - "libertinus serif", - "twitter color emoji", - "noto color emoji", - "apple color emoji", - "segoe ui emoji", - ]; - - let tail = if TextElem::fallback_in(styles) { FALLBACKS } else { &[] }; - TextElem::font_in(styles) +pub fn families(styles: StyleChain) -> impl Iterator<Item = &FontFamily> + Clone { + let fallbacks = singleton!(Vec<FontFamily>, { + [ + "libertinus serif", + "twitter color emoji", + "noto color emoji", + "apple color emoji", + "segoe ui emoji", + ] .into_iter() - .map(|family| family.as_str()) - .chain(tail.iter().copied()) + .map(FontFamily::new) + .collect() + }); + + let tail = if TextElem::fallback_in(styles) { fallbacks.as_slice() } else { &[] }; + TextElem::font_in(styles).into_iter().chain(tail.iter()) } /// Resolve the font variant. diff --git a/crates/typst-library/src/text/shift.rs b/crates/typst-library/src/text/shift.rs index 003ecf47..9723bbf0 100644 --- a/crates/typst-library/src/text/shift.rs +++ b/crates/typst-library/src/text/shift.rs @@ -157,7 +157,11 @@ fn is_shapable(engine: &Engine, text: &str, styles: StyleChain) -> bool { .select(family.as_str(), variant(styles)) .and_then(|id| world.font(id)) { - return text.chars().all(|c| font.ttf().glyph_index(c).is_some()); + let covers = family.covers(); + return text.chars().all(|c| { + covers.map_or(true, |cov| cov.is_match(c.encode_utf8(&mut [0; 4]))) + && font.ttf().glyph_index(c).is_some() + }); } } |
