summaryrefslogtreecommitdiff
path: root/crates/typst-library/src/text
diff options
context:
space:
mode:
authorPeng Guanwen <pg999w@outlook.com>2024-12-17 22:07:45 +0800
committerGitHub <noreply@github.com>2024-12-17 14:07:45 +0000
commit73253d465192454f0dfe3fe9eef46d495b343aef (patch)
tree4d8179d7b0d7c169e283471d7df928d264192ca0 /crates/typst-library/src/text
parent54cee16c3128695089d7472451f02646c6d81521 (diff)
Support for defining which charset should be covered by a font (#5305)
Co-authored-by: Laurenz <laurmaedje@gmail.com>
Diffstat (limited to 'crates/typst-library/src/text')
-rw-r--r--crates/typst-library/src/text/mod.rs159
-rw-r--r--crates/typst-library/src/text/shift.rs6
2 files changed, 137 insertions, 28 deletions
diff --git a/crates/typst-library/src/text/mod.rs b/crates/typst-library/src/text/mod.rs
index 91927b57..ee81e3f2 100644
--- a/crates/typst-library/src/text/mod.rs
+++ b/crates/typst-library/src/text/mod.rs
@@ -29,6 +29,7 @@ pub use self::smartquote::*;
pub use self::space::*;
use std::fmt::{self, Debug, Formatter};
+use std::hash::Hash;
use std::sync::LazyLock;
use ecow::{eco_format, EcoString};
@@ -39,13 +40,14 @@ use rustybuzz::Feature;
use smallvec::SmallVec;
use ttf_parser::Tag;
use typst_syntax::Spanned;
+use typst_utils::singleton;
use crate::diag::{bail, warning, HintedStrResult, SourceResult};
use crate::engine::Engine;
use crate::foundations::{
cast, category, dict, elem, Args, Array, Cast, Category, Construct, Content, Dict,
- Fold, IntoValue, NativeElement, Never, NoneValue, Packed, PlainText, Repr, Resolve,
- Scope, Set, Smart, StyleChain,
+ Fold, IntoValue, NativeElement, Never, NoneValue, Packed, PlainText, Regex, Repr,
+ Resolve, Scope, Set, Smart, StyleChain,
};
use crate::layout::{Abs, Axis, Dir, Em, Length, Ratio, Rel};
use crate::model::ParElem;
@@ -94,7 +96,21 @@ pub(super) fn define(global: &mut Scope) {
/// ```
#[elem(Debug, Construct, PlainText, Repr)]
pub struct TextElem {
- /// A font family name or priority list of font family names.
+ /// A font family descriptor or priority list of font family descriptor.
+ ///
+ /// A font family descriptor can be a plain string representing the family
+ /// name or a dictionary with the following keys:
+ ///
+ /// - `name` (required): The font family name.
+ /// - `covers` (optional): Defines the Unicode codepoints for which the
+ /// family shall be used. This can be:
+ /// - A predefined coverage set:
+ /// - `{"latin-in-cjk"}` covers all codepoints except for those which
+ /// exist in Latin fonts, but should preferrably be taken from CJK
+ /// fonts.
+ /// - A [regular expression]($regex) that defines exactly which codepoints
+ /// shall be covered. Accepts only the subset of regular expressions
+ /// which consist of exactly one dot, letter, or character class.
///
/// When processing text, Typst tries all specified font families in order
/// until it finds a font that has the necessary glyphs. In the example
@@ -129,6 +145,21 @@ pub struct TextElem {
///
/// This is Latin. \
/// هذا عربي.
+ ///
+ /// // Change font only for numbers.
+ /// #set text(font: (
+ /// (name: "PT Sans", covers: regex("[0-9]")),
+ /// "Libertinus Serif"
+ /// ))
+ ///
+ /// The number 123.
+ ///
+ /// // Mix Latin and CJK fonts.
+ /// #set text(font: (
+ /// (name: "Inria Serif", covers: "latin-in-cjk"),
+ /// "Noto Serif CJK SC"
+ /// ))
+ /// 分别设置“中文”和English字体
/// ```
#[parse({
let font_list: Option<Spanned<FontList>> = args.named("font")?;
@@ -766,35 +797,107 @@ impl PlainText for Packed<TextElem> {
}
/// A lowercased font family like "arial".
-#[derive(Clone, Eq, PartialEq, Hash)]
-pub struct FontFamily(EcoString);
+#[derive(Debug, Clone, PartialEq, Hash)]
+pub struct FontFamily {
+ // The name of the font family
+ name: EcoString,
+ // A regex that defines the Unicode codepoints supported by the font.
+ covers: Option<Covers>,
+}
impl FontFamily {
/// Create a named font family variant.
pub fn new(string: &str) -> Self {
- Self(string.to_lowercase().into())
+ Self::with_coverage(string, None)
+ }
+
+ /// Create a font family by name and optional Unicode coverage.
+ pub fn with_coverage(string: &str, covers: Option<Covers>) -> Self {
+ Self { name: string.to_lowercase().into(), covers }
}
/// The lowercased family name.
pub fn as_str(&self) -> &str {
- &self.0
+ &self.name
}
-}
-impl Debug for FontFamily {
- fn fmt(&self, f: &mut Formatter) -> fmt::Result {
- self.0.fmt(f)
+ /// The user-set coverage of the font family.
+ pub fn covers(&self) -> Option<&Regex> {
+ self.covers.as_ref().map(|covers| covers.as_regex())
}
}
cast! {
FontFamily,
- self => self.0.into_value(),
+ self => self.name.into_value(),
string: EcoString => Self::new(&string),
+ mut v: Dict => {
+ let ret = Self::with_coverage(
+ &v.take("name")?.cast::<EcoString>()?,
+ v.take("covers").ok().map(|v| v.cast()).transpose()?
+ );
+ v.finish(&["name", "covers"])?;
+ ret
+ },
+}
+
+/// Defines which codepoints a font family will be used for.
+#[derive(Debug, Clone, PartialEq, Hash)]
+pub enum Covers {
+ /// Covers all codepoints except those used both in Latin and CJK fonts.
+ LatinInCjk,
+ /// Covers the set of codepoints for which the regex matches.
+ Regex(Regex),
+}
+
+impl Covers {
+ /// Retrieve the regex for the coverage.
+ pub fn as_regex(&self) -> &Regex {
+ match self {
+ Self::LatinInCjk => singleton!(
+ Regex,
+ Regex::new(
+ "[^\u{00B7}\u{2013}\u{2014}\u{2018}\u{2019}\
+ \u{201C}\u{201D}\u{2025}-\u{2027}\u{2E3A}]"
+ )
+ .unwrap()
+ ),
+ Self::Regex(regex) => regex,
+ }
+ }
+}
+
+cast! {
+ Covers,
+ self => match self {
+ Self::LatinInCjk => "latin-in-cjk".into_value(),
+ Self::Regex(regex) => regex.into_value(),
+ },
+
+ /// Covers all codepoints except those used both in Latin and CJK fonts.
+ "latin-in-cjk" => Covers::LatinInCjk,
+
+ regex: Regex => {
+ let ast = regex_syntax::ast::parse::Parser::new().parse(regex.as_str());
+ match ast {
+ Ok(
+ regex_syntax::ast::Ast::ClassBracketed(..)
+ | regex_syntax::ast::Ast::ClassUnicode(..)
+ | regex_syntax::ast::Ast::ClassPerl(..)
+ | regex_syntax::ast::Ast::Dot(..)
+ | regex_syntax::ast::Ast::Literal(..),
+ ) => {}
+ _ => bail!(
+ "coverage regex may only use dot, letters, and character classes";
+ hint: "the regex is applied to each letter individually"
+ ),
+ }
+ Covers::Regex(regex)
+ },
}
/// Font family fallback list.
-#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)]
+#[derive(Debug, Default, Clone, PartialEq, Hash)]
pub struct FontList(pub Vec<FontFamily>);
impl<'a> IntoIterator for &'a FontList {
@@ -809,7 +912,7 @@ impl<'a> IntoIterator for &'a FontList {
cast! {
FontList,
self => if self.0.len() == 1 {
- self.0.into_iter().next().unwrap().0.into_value()
+ self.0.into_iter().next().unwrap().name.into_value()
} else {
self.0.into_value()
},
@@ -818,20 +921,22 @@ cast! {
}
/// Resolve a prioritized iterator over the font families.
-pub fn families(styles: StyleChain) -> impl Iterator<Item = &str> + Clone {
- const FALLBACKS: &[&str] = &[
- "libertinus serif",
- "twitter color emoji",
- "noto color emoji",
- "apple color emoji",
- "segoe ui emoji",
- ];
-
- let tail = if TextElem::fallback_in(styles) { FALLBACKS } else { &[] };
- TextElem::font_in(styles)
+pub fn families(styles: StyleChain) -> impl Iterator<Item = &FontFamily> + Clone {
+ let fallbacks = singleton!(Vec<FontFamily>, {
+ [
+ "libertinus serif",
+ "twitter color emoji",
+ "noto color emoji",
+ "apple color emoji",
+ "segoe ui emoji",
+ ]
.into_iter()
- .map(|family| family.as_str())
- .chain(tail.iter().copied())
+ .map(FontFamily::new)
+ .collect()
+ });
+
+ let tail = if TextElem::fallback_in(styles) { fallbacks.as_slice() } else { &[] };
+ TextElem::font_in(styles).into_iter().chain(tail.iter())
}
/// Resolve the font variant.
diff --git a/crates/typst-library/src/text/shift.rs b/crates/typst-library/src/text/shift.rs
index 003ecf47..9723bbf0 100644
--- a/crates/typst-library/src/text/shift.rs
+++ b/crates/typst-library/src/text/shift.rs
@@ -157,7 +157,11 @@ fn is_shapable(engine: &Engine, text: &str, styles: StyleChain) -> bool {
.select(family.as_str(), variant(styles))
.and_then(|id| world.font(id))
{
- return text.chars().all(|c| font.ttf().glyph_index(c).is_some());
+ let covers = family.covers();
+ return text.chars().all(|c| {
+ covers.map_or(true, |cov| cov.is_match(c.encode_utf8(&mut [0; 4])))
+ && font.ttf().glyph_index(c).is_some()
+ });
}
}