summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock1
-rw-r--r--Cargo.toml1
-rw-r--r--crates/typst-pdf/Cargo.toml1
-rw-r--r--crates/typst-pdf/src/font.rs33
-rw-r--r--crates/typst-pdf/src/page.rs5
-rw-r--r--crates/typst-pdf/src/resources.rs15
6 files changed, 11 insertions, 45 deletions
diff --git a/Cargo.lock b/Cargo.lock
index a92c0d22..f238f4f5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2790,7 +2790,6 @@ dependencies = [
"typst-assets",
"typst-macros",
"typst-timing",
- "unicode-properties",
"unscanny",
"xmp-writer",
]
diff --git a/Cargo.toml b/Cargo.toml
index e26f058e..09890062 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -114,7 +114,6 @@ typed-arena = "2"
unicode-bidi = "0.3.13"
unicode-ident = "1.0"
unicode-math-class = "0.1"
-unicode-properties = "0.1"
unicode-script = "0.5"
unicode-segmentation = "1"
unscanny = "0.1"
diff --git a/crates/typst-pdf/Cargo.toml b/crates/typst-pdf/Cargo.toml
index d2dcd5f5..a3a693f3 100644
--- a/crates/typst-pdf/Cargo.toml
+++ b/crates/typst-pdf/Cargo.toml
@@ -29,7 +29,6 @@ pdf-writer = { workspace = true }
subsetter = { workspace = true }
svg2pdf = { workspace = true }
ttf-parser = { workspace = true }
-unicode-properties = { workspace = true }
unscanny = { workspace = true }
xmp-writer = { workspace = true }
diff --git a/crates/typst-pdf/src/font.rs b/crates/typst-pdf/src/font.rs
index fd719799..c88c2bfd 100644
--- a/crates/typst-pdf/src/font.rs
+++ b/crates/typst-pdf/src/font.rs
@@ -12,7 +12,6 @@ use subsetter::GlyphRemapper;
use ttf_parser::{name_id, GlyphId, Tag};
use typst::text::Font;
use typst::utils::SliceExt;
-use unicode_properties::{GeneralCategory, UnicodeGeneralCategory};
use crate::{deflate, EmExt, PdfChunk, WithGlobalRefs};
@@ -226,38 +225,6 @@ pub(crate) fn subset_tag<T: Hash>(glyphs: &T) -> EcoString {
std::str::from_utf8(&letter).unwrap().into()
}
-/// For glyphs that have codepoints mapping to them in the font's cmap table, we
-/// prefer them over pre-existing text mappings from the document. Only things
-/// that don't have a corresponding codepoint (or only a private-use one) like
-/// the "Th" in Linux Libertine get the text of their first occurrences in the
-/// document instead.
-///
-/// This function replaces as much copepoints from the document with ones from
-/// the cmap table as possible.
-pub fn improve_glyph_sets(glyph_sets: &mut HashMap<Font, BTreeMap<u16, EcoString>>) {
- for (font, glyph_set) in glyph_sets {
- let ttf = font.ttf();
-
- for subtable in ttf.tables().cmap.into_iter().flat_map(|table| table.subtables) {
- if !subtable.is_unicode() {
- continue;
- }
-
- subtable.codepoints(|n| {
- let Some(c) = std::char::from_u32(n) else { return };
- if c.general_category() == GeneralCategory::PrivateUse {
- return;
- }
-
- let Some(GlyphId(g)) = ttf.glyph_index(c) else { return };
- if glyph_set.contains_key(&g) {
- glyph_set.insert(g, c.into());
- }
- });
- }
- }
-}
-
/// Create a compressed `/ToUnicode` CMap.
#[comemo::memoize]
#[typst_macros::time(name = "create cmap")]
diff --git a/crates/typst-pdf/src/page.rs b/crates/typst-pdf/src/page.rs
index b07490cc..1001d899 100644
--- a/crates/typst-pdf/src/page.rs
+++ b/crates/typst-pdf/src/page.rs
@@ -12,8 +12,8 @@ use typst::layout::{Abs, Page};
use typst::model::{Destination, Numbering};
use typst::text::Case;
+use crate::Resources;
use crate::{content, AbsExt, PdfChunk, WithDocument, WithRefs, WithResources};
-use crate::{font::improve_glyph_sets, Resources};
/// Construct page objects.
#[typst_macros::time(name = "construct pages")]
@@ -52,9 +52,6 @@ pub fn traverse_pages(
}
}
- improve_glyph_sets(&mut resources.glyph_sets);
- improve_glyph_sets(&mut resources.color_glyph_sets);
-
(PdfChunk::new(), (pages, resources))
}
diff --git a/crates/typst-pdf/src/resources.rs b/crates/typst-pdf/src/resources.rs
index a2cf5687..32b6612f 100644
--- a/crates/typst-pdf/src/resources.rs
+++ b/crates/typst-pdf/src/resources.rs
@@ -77,11 +77,16 @@ pub struct Resources<R = Ref> {
pub languages: BTreeMap<Lang, usize>,
/// For each font a mapping from used glyphs to their text representation.
- /// May contain multiple chars in case of ligatures or similar things. The
- /// same glyph can have a different text representation within one document,
- /// then we just save the first one. The resulting strings are used for the
- /// PDF's /ToUnicode map for glyphs that don't have an entry in the font's
- /// cmap. This is important for copy-paste and searching.
+ /// This is used for the PDF's /ToUnicode map, and important for copy-paste
+ /// and searching.
+ ///
+ /// Note that the text representation may contain multiple chars in case of
+ /// ligatures or similar things, and it may have no entry in the font's cmap
+ /// (or only a private-use codepoint), like the “Th” in Linux Libertine.
+ ///
+ /// A glyph may have multiple entries in the font's cmap, and even the same
+ /// glyph can have a different text representation within one document.
+ /// But /ToUnicode does not support that, so we just save the first occurrence.
pub glyph_sets: HashMap<Font, BTreeMap<u16, EcoString>>,
/// Same as `glyph_sets`, but for color fonts.
pub color_glyph_sets: HashMap<Font, BTreeMap<u16, EcoString>>,