summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeng Guanwen <pg999w@outlook.com>2024-12-17 22:07:45 +0800
committerGitHub <noreply@github.com>2024-12-17 14:07:45 +0000
commit73253d465192454f0dfe3fe9eef46d495b343aef (patch)
tree4d8179d7b0d7c169e283471d7df928d264192ca0
parent54cee16c3128695089d7472451f02646c6d81521 (diff)
Support for defining which charset should be covered by a font (#5305)
Co-authored-by: Laurenz <laurmaedje@gmail.com>
-rw-r--r--Cargo.lock1
-rw-r--r--Cargo.toml1
-rw-r--r--crates/typst-layout/src/image.rs2
-rw-r--r--crates/typst-layout/src/inline/shaping.rs42
-rw-r--r--crates/typst-layout/src/math/mod.rs2
-rw-r--r--crates/typst-library/Cargo.toml1
-rw-r--r--crates/typst-library/src/text/mod.rs159
-rw-r--r--crates/typst-library/src/text/shift.rs6
-rw-r--r--crates/typst-macros/src/lib.rs2
-rw-r--r--tests/ref/text-font-covers-chinese.pngbin0 -> 2619 bytes
-rw-r--r--tests/ref/text-font-covers-numbers.pngbin0 -> 500 bytes
-rw-r--r--tests/suite/text/font.typ37
12 files changed, 211 insertions, 42 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 94ce026e..e6c1cf0f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2931,6 +2931,7 @@ dependencies = [
"qcms",
"rayon",
"regex",
+ "regex-syntax",
"roxmltree",
"rust_decimal",
"rustybuzz",
diff --git a/Cargo.toml b/Cargo.toml
index b20d54e8..f4afefa4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -94,6 +94,7 @@ qcms = "0.3.0"
quote = "1"
rayon = "1.7.0"
regex = "1"
+regex-syntax = "0.8"
resvg = { version = "0.43", default-features = false, features = ["raster-images"] }
roxmltree = "0.20"
rust_decimal = { version = "1.36.0", default-features = false, features = ["maths"] }
diff --git a/crates/typst-layout/src/image.rs b/crates/typst-layout/src/image.rs
index 628fe10d..f44d6887 100644
--- a/crates/typst-layout/src/image.rs
+++ b/crates/typst-layout/src/image.rs
@@ -54,7 +54,7 @@ pub fn layout_image(
format,
elem.alt(styles),
engine.world,
- &families(styles).collect::<Vec<_>>(),
+ &families(styles).map(|f| f.as_str()).collect::<Vec<_>>(),
elem.flatten_text(styles),
)
.at(span)?;
diff --git a/crates/typst-layout/src/inline/shaping.rs b/crates/typst-layout/src/inline/shaping.rs
index c2b892d8..d6b7632b 100644
--- a/crates/typst-layout/src/inline/shaping.rs
+++ b/crates/typst-layout/src/inline/shaping.rs
@@ -11,8 +11,8 @@ use typst_library::engine::Engine;
use typst_library::foundations::{Smart, StyleChain};
use typst_library::layout::{Abs, Dir, Em, Frame, FrameItem, Point, Size};
use typst_library::text::{
- families, features, is_default_ignorable, variant, Font, FontVariant, Glyph, Lang,
- Region, TextEdgeBounds, TextElem, TextItem,
+ families, features, is_default_ignorable, variant, Font, FontFamily, FontVariant,
+ Glyph, Lang, Region, TextEdgeBounds, TextElem, TextItem,
};
use typst_library::World;
use typst_utils::SliceExt;
@@ -351,7 +351,7 @@ impl<'a> ShapedText<'a> {
for family in families(self.styles) {
if let Some(font) = world
.book()
- .select(family, self.variant)
+ .select(family.as_str(), self.variant)
.and_then(|id| world.font(id))
{
expand(&font, TextEdgeBounds::Zero);
@@ -463,7 +463,8 @@ impl<'a> ShapedText<'a> {
None
};
let mut chain = families(self.styles)
- .map(|family| book.select(family, self.variant))
+ .filter(|family| family.covers().map_or(true, |c| c.is_match("-")))
+ .map(|family| book.select(family.as_str(), self.variant))
.chain(fallback_func.iter().map(|f| f()))
.flatten();
@@ -719,7 +720,7 @@ fn shape_segment<'a>(
ctx: &mut ShapingContext,
base: usize,
text: &str,
- mut families: impl Iterator<Item = &'a str> + Clone,
+ mut families: impl Iterator<Item = &'a FontFamily> + Clone,
) {
// Don't try shaping newlines, tabs, or default ignorables.
if text
@@ -732,11 +733,18 @@ fn shape_segment<'a>(
// Find the next available family.
let world = ctx.engine.world;
let book = world.book();
- let mut selection = families.find_map(|family| {
- book.select(family, ctx.variant)
+ let mut selection = None;
+ let mut covers = None;
+ for family in families.by_ref() {
+ selection = book
+ .select(family.as_str(), ctx.variant)
.and_then(|id| world.font(id))
- .filter(|font| !ctx.used.contains(font))
- });
+ .filter(|font| !ctx.used.contains(font));
+ if selection.is_some() {
+ covers = family.covers();
+ break;
+ }
+ }
// Do font fallback if the families are exhausted and fallback is enabled.
if selection.is_none() && ctx.fallback {
@@ -795,6 +803,16 @@ fn shape_segment<'a>(
let pos = buffer.glyph_positions();
let ltr = ctx.dir.is_positive();
+ // Whether the character at the given offset is covered by the coverage.
+ let is_covered = |offset| {
+ let end = text[offset..]
+ .char_indices()
+ .nth(1)
+ .map(|(i, _)| offset + i)
+ .unwrap_or(text.len());
+ covers.map_or(true, |cov| cov.is_match(&text[offset..end]))
+ };
+
// Collect the shaped glyphs, doing fallback and shaping parts again with
// the next font if necessary.
let mut i = 0;
@@ -803,7 +821,7 @@ fn shape_segment<'a>(
let cluster = info.cluster as usize;
// Add the glyph to the shaped output.
- if info.glyph_id != 0 {
+ if info.glyph_id != 0 && is_covered(cluster) {
// Determine the text range of the glyph.
let start = base + cluster;
let end = base
@@ -836,7 +854,9 @@ fn shape_segment<'a>(
} else {
// First, search for the end of the tofu sequence.
let k = i;
- while infos.get(i + 1).is_some_and(|info| info.glyph_id == 0) {
+ while infos.get(i + 1).is_some_and(|info| {
+ info.glyph_id == 0 || !is_covered(info.cluster as usize)
+ }) {
i += 1;
}
diff --git a/crates/typst-layout/src/math/mod.rs b/crates/typst-layout/src/math/mod.rs
index 32059cef..e642f633 100644
--- a/crates/typst-layout/src/math/mod.rs
+++ b/crates/typst-layout/src/math/mod.rs
@@ -237,7 +237,7 @@ fn find_math_font(
let variant = variant(styles);
let world = engine.world;
let Some(font) = families(styles).find_map(|family| {
- let id = world.book().select(family, variant)?;
+ let id = world.book().select(family.as_str(), variant)?;
let font = world.font(id)?;
let _ = font.ttf().tables().math?.constants?;
Some(font)
diff --git a/crates/typst-library/Cargo.toml b/crates/typst-library/Cargo.toml
index d854e4d5..cc5e2671 100644
--- a/crates/typst-library/Cargo.toml
+++ b/crates/typst-library/Cargo.toml
@@ -44,6 +44,7 @@ png = { workspace = true }
qcms = { workspace = true }
rayon = { workspace = true }
regex = { workspace = true }
+regex-syntax = { workspace = true }
roxmltree = { workspace = true }
rust_decimal = { workspace = true }
rustybuzz = { workspace = true }
diff --git a/crates/typst-library/src/text/mod.rs b/crates/typst-library/src/text/mod.rs
index 91927b57..ee81e3f2 100644
--- a/crates/typst-library/src/text/mod.rs
+++ b/crates/typst-library/src/text/mod.rs
@@ -29,6 +29,7 @@ pub use self::smartquote::*;
pub use self::space::*;
use std::fmt::{self, Debug, Formatter};
+use std::hash::Hash;
use std::sync::LazyLock;
use ecow::{eco_format, EcoString};
@@ -39,13 +40,14 @@ use rustybuzz::Feature;
use smallvec::SmallVec;
use ttf_parser::Tag;
use typst_syntax::Spanned;
+use typst_utils::singleton;
use crate::diag::{bail, warning, HintedStrResult, SourceResult};
use crate::engine::Engine;
use crate::foundations::{
cast, category, dict, elem, Args, Array, Cast, Category, Construct, Content, Dict,
- Fold, IntoValue, NativeElement, Never, NoneValue, Packed, PlainText, Repr, Resolve,
- Scope, Set, Smart, StyleChain,
+ Fold, IntoValue, NativeElement, Never, NoneValue, Packed, PlainText, Regex, Repr,
+ Resolve, Scope, Set, Smart, StyleChain,
};
use crate::layout::{Abs, Axis, Dir, Em, Length, Ratio, Rel};
use crate::model::ParElem;
@@ -94,7 +96,21 @@ pub(super) fn define(global: &mut Scope) {
/// ```
#[elem(Debug, Construct, PlainText, Repr)]
pub struct TextElem {
- /// A font family name or priority list of font family names.
+ /// A font family descriptor or priority list of font family descriptor.
+ ///
+ /// A font family descriptor can be a plain string representing the family
+ /// name or a dictionary with the following keys:
+ ///
+ /// - `name` (required): The font family name.
+ /// - `covers` (optional): Defines the Unicode codepoints for which the
+ /// family shall be used. This can be:
+ /// - A predefined coverage set:
+ /// - `{"latin-in-cjk"}` covers all codepoints except for those which
+ /// exist in Latin fonts, but should preferrably be taken from CJK
+ /// fonts.
+ /// - A [regular expression]($regex) that defines exactly which codepoints
+ /// shall be covered. Accepts only the subset of regular expressions
+ /// which consist of exactly one dot, letter, or character class.
///
/// When processing text, Typst tries all specified font families in order
/// until it finds a font that has the necessary glyphs. In the example
@@ -129,6 +145,21 @@ pub struct TextElem {
///
/// This is Latin. \
/// هذا عربي.
+ ///
+ /// // Change font only for numbers.
+ /// #set text(font: (
+ /// (name: "PT Sans", covers: regex("[0-9]")),
+ /// "Libertinus Serif"
+ /// ))
+ ///
+ /// The number 123.
+ ///
+ /// // Mix Latin and CJK fonts.
+ /// #set text(font: (
+ /// (name: "Inria Serif", covers: "latin-in-cjk"),
+ /// "Noto Serif CJK SC"
+ /// ))
+ /// 分别设置“中文”和English字体
/// ```
#[parse({
let font_list: Option<Spanned<FontList>> = args.named("font")?;
@@ -766,35 +797,107 @@ impl PlainText for Packed<TextElem> {
}
/// A lowercased font family like "arial".
-#[derive(Clone, Eq, PartialEq, Hash)]
-pub struct FontFamily(EcoString);
+#[derive(Debug, Clone, PartialEq, Hash)]
+pub struct FontFamily {
+ // The name of the font family
+ name: EcoString,
+ // A regex that defines the Unicode codepoints supported by the font.
+ covers: Option<Covers>,
+}
impl FontFamily {
/// Create a named font family variant.
pub fn new(string: &str) -> Self {
- Self(string.to_lowercase().into())
+ Self::with_coverage(string, None)
+ }
+
+ /// Create a font family by name and optional Unicode coverage.
+ pub fn with_coverage(string: &str, covers: Option<Covers>) -> Self {
+ Self { name: string.to_lowercase().into(), covers }
}
/// The lowercased family name.
pub fn as_str(&self) -> &str {
- &self.0
+ &self.name
}
-}
-impl Debug for FontFamily {
- fn fmt(&self, f: &mut Formatter) -> fmt::Result {
- self.0.fmt(f)
+ /// The user-set coverage of the font family.
+ pub fn covers(&self) -> Option<&Regex> {
+ self.covers.as_ref().map(|covers| covers.as_regex())
}
}
cast! {
FontFamily,
- self => self.0.into_value(),
+ self => self.name.into_value(),
string: EcoString => Self::new(&string),
+ mut v: Dict => {
+ let ret = Self::with_coverage(
+ &v.take("name")?.cast::<EcoString>()?,
+ v.take("covers").ok().map(|v| v.cast()).transpose()?
+ );
+ v.finish(&["name", "covers"])?;
+ ret
+ },
+}
+
+/// Defines which codepoints a font family will be used for.
+#[derive(Debug, Clone, PartialEq, Hash)]
+pub enum Covers {
+ /// Covers all codepoints except those used both in Latin and CJK fonts.
+ LatinInCjk,
+ /// Covers the set of codepoints for which the regex matches.
+ Regex(Regex),
+}
+
+impl Covers {
+ /// Retrieve the regex for the coverage.
+ pub fn as_regex(&self) -> &Regex {
+ match self {
+ Self::LatinInCjk => singleton!(
+ Regex,
+ Regex::new(
+ "[^\u{00B7}\u{2013}\u{2014}\u{2018}\u{2019}\
+ \u{201C}\u{201D}\u{2025}-\u{2027}\u{2E3A}]"
+ )
+ .unwrap()
+ ),
+ Self::Regex(regex) => regex,
+ }
+ }
+}
+
+cast! {
+ Covers,
+ self => match self {
+ Self::LatinInCjk => "latin-in-cjk".into_value(),
+ Self::Regex(regex) => regex.into_value(),
+ },
+
+ /// Covers all codepoints except those used both in Latin and CJK fonts.
+ "latin-in-cjk" => Covers::LatinInCjk,
+
+ regex: Regex => {
+ let ast = regex_syntax::ast::parse::Parser::new().parse(regex.as_str());
+ match ast {
+ Ok(
+ regex_syntax::ast::Ast::ClassBracketed(..)
+ | regex_syntax::ast::Ast::ClassUnicode(..)
+ | regex_syntax::ast::Ast::ClassPerl(..)
+ | regex_syntax::ast::Ast::Dot(..)
+ | regex_syntax::ast::Ast::Literal(..),
+ ) => {}
+ _ => bail!(
+ "coverage regex may only use dot, letters, and character classes";
+ hint: "the regex is applied to each letter individually"
+ ),
+ }
+ Covers::Regex(regex)
+ },
}
/// Font family fallback list.
-#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)]
+#[derive(Debug, Default, Clone, PartialEq, Hash)]
pub struct FontList(pub Vec<FontFamily>);
impl<'a> IntoIterator for &'a FontList {
@@ -809,7 +912,7 @@ impl<'a> IntoIterator for &'a FontList {
cast! {
FontList,
self => if self.0.len() == 1 {
- self.0.into_iter().next().unwrap().0.into_value()
+ self.0.into_iter().next().unwrap().name.into_value()
} else {
self.0.into_value()
},
@@ -818,20 +921,22 @@ cast! {
}
/// Resolve a prioritized iterator over the font families.
-pub fn families(styles: StyleChain) -> impl Iterator<Item = &str> + Clone {
- const FALLBACKS: &[&str] = &[
- "libertinus serif",
- "twitter color emoji",
- "noto color emoji",
- "apple color emoji",
- "segoe ui emoji",
- ];
-
- let tail = if TextElem::fallback_in(styles) { FALLBACKS } else { &[] };
- TextElem::font_in(styles)
+pub fn families(styles: StyleChain) -> impl Iterator<Item = &FontFamily> + Clone {
+ let fallbacks = singleton!(Vec<FontFamily>, {
+ [
+ "libertinus serif",
+ "twitter color emoji",
+ "noto color emoji",
+ "apple color emoji",
+ "segoe ui emoji",
+ ]
.into_iter()
- .map(|family| family.as_str())
- .chain(tail.iter().copied())
+ .map(FontFamily::new)
+ .collect()
+ });
+
+ let tail = if TextElem::fallback_in(styles) { fallbacks.as_slice() } else { &[] };
+ TextElem::font_in(styles).into_iter().chain(tail.iter())
}
/// Resolve the font variant.
diff --git a/crates/typst-library/src/text/shift.rs b/crates/typst-library/src/text/shift.rs
index 003ecf47..9723bbf0 100644
--- a/crates/typst-library/src/text/shift.rs
+++ b/crates/typst-library/src/text/shift.rs
@@ -157,7 +157,11 @@ fn is_shapable(engine: &Engine, text: &str, styles: StyleChain) -> bool {
.select(family.as_str(), variant(styles))
.and_then(|id| world.font(id))
{
- return text.chars().all(|c| font.ttf().glyph_index(c).is_some());
+ let covers = family.covers();
+ return text.chars().all(|c| {
+ covers.map_or(true, |cov| cov.is_match(c.encode_utf8(&mut [0; 4])))
+ && font.ttf().glyph_index(c).is_some()
+ });
}
}
diff --git a/crates/typst-macros/src/lib.rs b/crates/typst-macros/src/lib.rs
index e1c3c13a..578389c7 100644
--- a/crates/typst-macros/src/lib.rs
+++ b/crates/typst-macros/src/lib.rs
@@ -280,7 +280,7 @@ pub fn category(stream: BoundaryStream, item: BoundaryStream) -> BoundaryStream
/// - `Reflect` makes Typst's runtime aware of the type's characteristics.
/// It's important for autocompletion, error messages, etc.
/// - `FromValue` defines how to cast from a value into this type.
-/// - `IntoValue` defines how to cast fromthis type into a value.
+/// - `IntoValue` defines how to cast from this type into a value.
///
/// ```ignore
/// /// An integer between 0 and 13.
diff --git a/tests/ref/text-font-covers-chinese.png b/tests/ref/text-font-covers-chinese.png
new file mode 100644
index 00000000..5c9b4b1e
--- /dev/null
+++ b/tests/ref/text-font-covers-chinese.png
Binary files differ
diff --git a/tests/ref/text-font-covers-numbers.png b/tests/ref/text-font-covers-numbers.png
new file mode 100644
index 00000000..9ed95c2f
--- /dev/null
+++ b/tests/ref/text-font-covers-numbers.png
Binary files differ
diff --git a/tests/suite/text/font.typ b/tests/suite/text/font.typ
index bb75f4ae..5af8dcb9 100644
--- a/tests/suite/text/font.typ
+++ b/tests/suite/text/font.typ
@@ -112,3 +112,40 @@ I
[ ]
text(fill: t, "Hello")
})
+
+--- text-font-types ---
+#let ubuntu = (name: "Ubuntu", covers: regex("[\u{20}-\u{FFFF}]"))
+#set text(font: ubuntu)
+#set text(font: (ubuntu, "Ubuntu"))
+
+--- text-font-covers-chinese ---
+// Without ranges, the quotation mark is using the Latin font.
+#set text(font: ("Ubuntu", "Noto Serif CJK SC"))
+分别设置“中文”和English字体
+
+// With ranges, the quotation mark is using the Chinese font.
+#set text(font: ((name: "Noto Serif CJK SC", covers: regex("[\u{00B7}-\u{3134F}]")), "Ubuntu"))
+分别设置“中文”和English字体
+
+// With "latin-in-cjk", the quotation mark is also using the Chinese font.
+#set text(font: ((name: "Ubuntu", covers: "latin-in-cjk"), "Noto Serif CJK SC"))
+分别设置“中文”和English字体
+
+--- text-font-covers-numbers ---
+// Change font only for numbers.
+#set text(font: (
+ (name: "PT Sans", covers: regex("[0-9]")),
+ "Libertinus Serif"
+))
+
+The number 123.
+
+--- text-font-covers-bad-1 ---
+// Error: 17-59 coverage regex may only use dot, letters, and character classes
+// Hint: 17-59 the regex is applied to each letter individually
+#set text(font: (name: "Ubuntu", covers: regex("20-FFFF")))
+
+--- text-font-covers-bad-2 ---
+// Error: 17-65 coverage regex may only use dot, letters, and character classes
+// Hint: 17-65 the regex is applied to each letter individually
+#set text(font: (name: "Ubuntu", covers: regex("\u{20}-\u{10}")))