1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
|
//! PDF resources.
//!
//! Resources are defined in dictionaries. They map identifiers such as `Im0` to
//! a PDF reference. Each [content stream] is associated with a resource dictionary.
//! The identifiers defined in the resources can then be used in content streams.
//!
//! [content stream]: `crate::content`
use std::collections::{BTreeMap, HashMap};
use std::hash::Hash;
use ecow::{eco_format, EcoString};
use pdf_writer::{Dict, Finish, Name, Ref};
use subsetter::GlyphRemapper;
use typst::text::Lang;
use typst::{text::Font, utils::Deferred, visualize::Image};
use crate::{
color::ColorSpaces, color_font::ColorFontMap, extg::ExtGState, gradient::PdfGradient,
image::EncodedImage, pattern::PatternRemapper, PdfChunk, Renumber, WithEverything,
WithResources,
};
/// All the resources that have been collected when traversing the document.
///
/// This does not allocate references to resources, only track what was used
/// and deduplicate what can be deduplicated.
///
/// You may notice that this structure is a tree: [`PatternRemapper`] and
/// [`ColorFontMap`] (that are present in the fields of [`Resources`]),
/// themselves contain [`Resources`] (that will be called "sub-resources" from
/// now on). Because color glyphs and patterns are defined using content
/// streams, just like pages, they can refer to resources too, which are tracked
/// by the respective sub-resources.
///
/// Each instance of this structure will become a `/Resources` dictionary in
/// the final PDF. It is not possible to use a single shared dictionary for all
/// pages, patterns and color fonts, because if a resource is listed in its own
/// `/Resources` dictionary, some PDF readers will fail to open the document.
///
/// Because we need to lazily initialize sub-resources (we don't know how deep
/// the tree will be before reading the document), and that this is done in a
/// context where no PDF reference allocator is available, `Resources` are
/// originally created with the type parameter `R = ()`. The reference for each
/// dictionary will only be allocated in the next phase, once we know the shape
/// of the tree, at which point `R` becomes `Ref`. No other value of `R` should
/// ever exist.
pub struct Resources<R = Ref> {
/// The global reference to this resource dictionary, or `()` if it has not
/// been allocated yet.
pub reference: R,
/// Handles color space writing.
pub colors: ColorSpaces,
/// Deduplicates fonts used across the document.
pub fonts: Remapper<Font>,
/// Deduplicates images used across the document.
pub images: Remapper<Image>,
/// Handles to deferred image conversions.
pub deferred_images: HashMap<usize, Deferred<EncodedImage>>,
/// Deduplicates gradients used across the document.
pub gradients: Remapper<PdfGradient>,
/// Deduplicates patterns used across the document.
pub patterns: Option<Box<PatternRemapper<R>>>,
/// Deduplicates external graphics states used across the document.
pub ext_gs: Remapper<ExtGState>,
/// Deduplicates color glyphs.
pub color_fonts: Option<Box<ColorFontMap<R>>>,
// The fields below do not correspond to actual resources that will be
// written in a dictionary, but are more meta-data about resources that
// can't really live somewhere else.
/// The number of glyphs for all referenced languages in the content stream.
/// We keep track of this to determine the main document language.
/// BTreeMap is used to write sorted list of languages to metadata.
pub languages: BTreeMap<Lang, usize>,
/// For each font a mapping from used glyphs to their text representation.
/// This is used for the PDF's /ToUnicode map, and important for copy-paste
/// and searching.
///
/// Note that the text representation may contain multiple chars in case of
/// ligatures or similar things, and it may have no entry in the font's cmap
/// (or only a private-use codepoint), like the “Th” in Linux Libertine.
///
/// A glyph may have multiple entries in the font's cmap, and even the same
/// glyph can have a different text representation within one document.
/// But /ToUnicode does not support that, so we just save the first occurrence.
pub glyph_sets: HashMap<Font, BTreeMap<u16, EcoString>>,
/// Same as `glyph_sets`, but for color fonts.
pub color_glyph_sets: HashMap<Font, BTreeMap<u16, EcoString>>,
/// Stores the glyph remapper for each font for the subsetter.
pub glyph_remappers: HashMap<Font, GlyphRemapper>,
}
impl<R: Renumber> Renumber for Resources<R> {
fn renumber(&mut self, offset: i32) {
self.reference.renumber(offset);
if let Some(color_fonts) = &mut self.color_fonts {
color_fonts.resources.renumber(offset);
}
if let Some(patterns) = &mut self.patterns {
patterns.resources.renumber(offset);
}
}
}
impl Default for Resources<()> {
fn default() -> Self {
Resources {
reference: (),
colors: ColorSpaces::default(),
fonts: Remapper::new("F"),
images: Remapper::new("Im"),
deferred_images: HashMap::new(),
gradients: Remapper::new("Gr"),
patterns: None,
ext_gs: Remapper::new("Gs"),
color_fonts: None,
languages: BTreeMap::new(),
glyph_sets: HashMap::new(),
color_glyph_sets: HashMap::new(),
glyph_remappers: HashMap::new(),
}
}
}
impl Resources<()> {
/// Associate a reference with this resource dictionary (and do so
/// recursively for sub-resources).
pub fn with_refs(self, refs: &ResourcesRefs) -> Resources<Ref> {
Resources {
reference: refs.reference,
colors: self.colors,
fonts: self.fonts,
images: self.images,
deferred_images: self.deferred_images,
gradients: self.gradients,
patterns: self
.patterns
.zip(refs.patterns.as_ref())
.map(|(p, r)| Box::new(p.with_refs(r))),
ext_gs: self.ext_gs,
color_fonts: self
.color_fonts
.zip(refs.color_fonts.as_ref())
.map(|(c, r)| Box::new(c.with_refs(r))),
languages: self.languages,
glyph_sets: self.glyph_sets,
color_glyph_sets: self.color_glyph_sets,
glyph_remappers: self.glyph_remappers,
}
}
}
impl<R> Resources<R> {
/// Run a function on this resource dictionary and all
/// of its sub-resources.
pub fn traverse<P>(&self, process: &mut P)
where
P: FnMut(&Self),
{
process(self);
if let Some(color_fonts) = &self.color_fonts {
color_fonts.resources.traverse(process)
}
if let Some(patterns) = &self.patterns {
patterns.resources.traverse(process)
}
}
}
/// References for a resource tree.
///
/// This structure is a tree too, that should have the same structure as the
/// corresponding `Resources`.
pub struct ResourcesRefs {
pub reference: Ref,
pub color_fonts: Option<Box<ResourcesRefs>>,
pub patterns: Option<Box<ResourcesRefs>>,
}
impl Renumber for ResourcesRefs {
fn renumber(&mut self, offset: i32) {
self.reference.renumber(offset);
if let Some(color_fonts) = &mut self.color_fonts {
color_fonts.renumber(offset);
}
if let Some(patterns) = &mut self.patterns {
patterns.renumber(offset);
}
}
}
/// Allocate references for all resource dictionaries.
pub fn alloc_resources_refs(context: &WithResources) -> (PdfChunk, ResourcesRefs) {
let mut chunk = PdfChunk::new();
/// Recursively explore resource dictionaries and assign them references.
fn refs_for(resources: &Resources<()>, chunk: &mut PdfChunk) -> ResourcesRefs {
ResourcesRefs {
reference: chunk.alloc(),
color_fonts: resources
.color_fonts
.as_ref()
.map(|c| Box::new(refs_for(&c.resources, chunk))),
patterns: resources
.patterns
.as_ref()
.map(|p| Box::new(refs_for(&p.resources, chunk))),
}
}
let refs = refs_for(&context.resources, &mut chunk);
(chunk, refs)
}
/// Write the resource dictionaries that will be referenced by all pages.
///
/// We add a reference to this dictionary to each page individually instead of
/// to the root node of the page tree because using the resource inheritance
/// feature breaks PDF merging with Apple Preview.
///
/// Also write resource dictionaries for Type3 fonts and patterns.
pub fn write_resource_dictionaries(ctx: &WithEverything) -> (PdfChunk, ()) {
let mut chunk = PdfChunk::new();
let mut used_color_spaces = ColorSpaces::default();
ctx.resources.traverse(&mut |resources| {
used_color_spaces.merge(&resources.colors);
let images_ref = chunk.alloc.bump();
let patterns_ref = chunk.alloc.bump();
let ext_gs_states_ref = chunk.alloc.bump();
let color_spaces_ref = chunk.alloc.bump();
let mut color_font_slices = Vec::new();
let mut color_font_numbers = HashMap::new();
if let Some(color_fonts) = &resources.color_fonts {
for (_, font_slice) in color_fonts.iter() {
color_font_numbers.insert(font_slice.clone(), color_font_slices.len());
color_font_slices.push(font_slice);
}
}
let color_font_remapper = Remapper {
prefix: "Cf",
to_pdf: color_font_numbers,
to_items: color_font_slices,
};
resources
.images
.write(&ctx.references.images, &mut chunk.indirect(images_ref).dict());
let mut patterns_dict = chunk.indirect(patterns_ref).dict();
resources
.gradients
.write(&ctx.references.gradients, &mut patterns_dict);
if let Some(p) = &resources.patterns {
p.remapper.write(&ctx.references.patterns, &mut patterns_dict);
}
patterns_dict.finish();
resources
.ext_gs
.write(&ctx.references.ext_gs, &mut chunk.indirect(ext_gs_states_ref).dict());
let mut res_dict = chunk
.indirect(resources.reference)
.start::<pdf_writer::writers::Resources>();
res_dict.pair(Name(b"XObject"), images_ref);
res_dict.pair(Name(b"Pattern"), patterns_ref);
res_dict.pair(Name(b"ExtGState"), ext_gs_states_ref);
res_dict.pair(Name(b"ColorSpace"), color_spaces_ref);
// TODO: can't this be an indirect reference too?
let mut fonts_dict = res_dict.fonts();
resources.fonts.write(&ctx.references.fonts, &mut fonts_dict);
color_font_remapper.write(&ctx.references.color_fonts, &mut fonts_dict);
fonts_dict.finish();
res_dict.finish();
let color_spaces = chunk.indirect(color_spaces_ref).dict();
resources
.colors
.write_color_spaces(color_spaces, &ctx.globals.color_functions);
});
used_color_spaces.write_functions(&mut chunk, &ctx.globals.color_functions);
(chunk, ())
}
/// Assigns new, consecutive PDF-internal indices to items.
pub struct Remapper<T> {
/// The prefix to use when naming these resources.
prefix: &'static str,
/// Forwards from the items to the pdf indices.
to_pdf: HashMap<T, usize>,
/// Backwards from the pdf indices to the items.
to_items: Vec<T>,
}
impl<T> Remapper<T>
where
T: Eq + Hash + Clone,
{
/// Create an empty mapping.
pub fn new(prefix: &'static str) -> Self {
Self { prefix, to_pdf: HashMap::new(), to_items: vec![] }
}
/// Insert an item in the mapping if it was not already present.
pub fn insert(&mut self, item: T) -> usize {
let to_layout = &mut self.to_items;
*self.to_pdf.entry(item.clone()).or_insert_with(|| {
let pdf_index = to_layout.len();
to_layout.push(item);
pdf_index
})
}
/// All items in this
pub fn items(&self) -> impl Iterator<Item = &T> + '_ {
self.to_items.iter()
}
/// Write this list of items in a Resource dictionary.
fn write(&self, mapping: &HashMap<T, Ref>, dict: &mut Dict) {
for (number, item) in self.items().enumerate() {
let name = eco_format!("{}{}", self.prefix, number);
let reference = mapping[item];
dict.pair(Name(name.as_bytes()), reference);
}
}
}
|