summaryrefslogtreecommitdiff
path: root/crates/typst-pdf
diff options
context:
space:
mode:
authorNiklas Eicker <git@nikl.me>2025-01-08 10:38:34 +0100
committerGitHub <noreply@github.com>2025-01-08 09:38:34 +0000
commit0a374d238016c0101d11cbc3f4bc621f3895ad36 (patch)
tree5799e3c279e70a371fe7d737ac3ff37655827910 /crates/typst-pdf
parent265df6c29f4d142a372917dd708bfba780f7cfbc (diff)
Embed files associated with the document as a whole (#5221)
Co-authored-by: Laurenz <laurmaedje@gmail.com>
Diffstat (limited to 'crates/typst-pdf')
-rw-r--r--crates/typst-pdf/src/catalog.rs84
-rw-r--r--crates/typst-pdf/src/embed.rs122
-rw-r--r--crates/typst-pdf/src/lib.rs46
3 files changed, 218 insertions, 34 deletions
diff --git a/crates/typst-pdf/src/catalog.rs b/crates/typst-pdf/src/catalog.rs
index c4b0e2e8..709b0155 100644
--- a/crates/typst-pdf/src/catalog.rs
+++ b/crates/typst-pdf/src/catalog.rs
@@ -12,7 +12,7 @@ use typst_syntax::Span;
use xmp_writer::{DateTime, LangId, RenditionClass, XmpWriter};
use crate::page::PdfPageLabel;
-use crate::{hash_base64, outline, TextStrExt, Timezone, WithEverything};
+use crate::{hash_base64, outline, TextStrExt, Timestamp, Timezone, WithEverything};
/// Write the document catalog.
pub fn write_catalog(
@@ -86,23 +86,10 @@ pub fn write_catalog(
info.keywords(TextStr::trimmed(&joined));
xmp.pdf_keywords(&joined);
}
-
- // (1) If the `document.date` is set to specific `datetime` or `none`, use it.
- // (2) If the `document.date` is set to `auto` or not set, try to use the
- // date from the options.
- // (3) Otherwise, we don't write date metadata.
- let (date, tz) = match (ctx.document.info.date, ctx.options.timestamp) {
- (Smart::Custom(date), _) => (date, None),
- (Smart::Auto, Some(timestamp)) => {
- (Some(timestamp.datetime), Some(timestamp.timezone))
- }
- _ => (None, None),
- };
- if let Some(date) = date {
- if let Some(pdf_date) = pdf_date(date, tz) {
- info.creation_date(pdf_date);
- info.modified_date(pdf_date);
- }
+ let (date, tz) = document_date(ctx.document.info.date, ctx.options.timestamp);
+ if let Some(pdf_date) = date.and_then(|date| pdf_date(date, tz)) {
+ info.creation_date(pdf_date);
+ info.modified_date(pdf_date);
}
info.finish();
@@ -154,7 +141,7 @@ pub fn write_catalog(
}
// Assert dominance.
- if ctx.options.standards.pdfa {
+ if let Some((part, conformance)) = ctx.options.standards.pdfa_part {
let mut extension_schemas = xmp.extension_schemas();
extension_schemas
.xmp_media_management()
@@ -162,8 +149,8 @@ pub fn write_catalog(
.describe_instance_id();
extension_schemas.pdf().properties().describe_all();
extension_schemas.finish();
- xmp.pdfa_part(2);
- xmp.pdfa_conformance("B");
+ xmp.pdfa_part(part);
+ xmp.pdfa_conformance(conformance);
}
let xmp_buf = xmp.finish(None);
@@ -182,13 +169,35 @@ pub fn write_catalog(
catalog.viewer_preferences().direction(dir);
catalog.metadata(meta_ref);
- // Write the named destination tree if there are any entries.
- if !ctx.references.named_destinations.dests.is_empty() {
+ let has_dests = !ctx.references.named_destinations.dests.is_empty();
+ let has_embeddings = !ctx.references.embedded_files.is_empty();
+
+ // Write the `/Names` dictionary.
+ if has_dests || has_embeddings {
+ // Write the named destination tree if there are any entries.
let mut name_dict = catalog.names();
- let mut dests_name_tree = name_dict.destinations();
- let mut names = dests_name_tree.names();
- for &(name, dest_ref, ..) in &ctx.references.named_destinations.dests {
- names.insert(Str(name.resolve().as_bytes()), dest_ref);
+ if has_dests {
+ let mut dests_name_tree = name_dict.destinations();
+ let mut names = dests_name_tree.names();
+ for &(name, dest_ref, ..) in &ctx.references.named_destinations.dests {
+ names.insert(Str(name.resolve().as_bytes()), dest_ref);
+ }
+ }
+
+ if has_embeddings {
+ let mut embedded_files = name_dict.embedded_files();
+ let mut names = embedded_files.names();
+ for (name, file_ref) in &ctx.references.embedded_files {
+ names.insert(Str(name.as_bytes()), *file_ref);
+ }
+ }
+ }
+
+ if has_embeddings && ctx.options.standards.pdfa {
+ // PDF 2.0, but ISO 19005-3 (PDF/A-3) Annex E allows it for PDF/A-3.
+ let mut associated_files = catalog.insert(Name(b"AF")).array().typed();
+ for (_, file_ref) in ctx.references.embedded_files {
+ associated_files.item(file_ref).finish();
}
}
@@ -289,8 +298,27 @@ pub(crate) fn write_page_labels(
result
}
+/// Resolve the document date.
+///
+/// (1) If the `document.date` is set to specific `datetime` or `none`, use it.
+/// (2) If the `document.date` is set to `auto` or not set, try to use the
+/// date from the options.
+/// (3) Otherwise, we don't write date metadata.
+pub fn document_date(
+ document_date: Smart<Option<Datetime>>,
+ timestamp: Option<Timestamp>,
+) -> (Option<Datetime>, Option<Timezone>) {
+ match (document_date, timestamp) {
+ (Smart::Custom(date), _) => (date, None),
+ (Smart::Auto, Some(timestamp)) => {
+ (Some(timestamp.datetime), Some(timestamp.timezone))
+ }
+ _ => (None, None),
+ }
+}
+
/// Converts a datetime to a pdf-writer date.
-fn pdf_date(datetime: Datetime, tz: Option<Timezone>) -> Option<pdf_writer::Date> {
+pub fn pdf_date(datetime: Datetime, tz: Option<Timezone>) -> Option<pdf_writer::Date> {
let year = datetime.year().filter(|&y| y >= 0)? as u16;
let mut pdf_date = pdf_writer::Date::new(year);
diff --git a/crates/typst-pdf/src/embed.rs b/crates/typst-pdf/src/embed.rs
new file mode 100644
index 00000000..b32f6e45
--- /dev/null
+++ b/crates/typst-pdf/src/embed.rs
@@ -0,0 +1,122 @@
+use std::collections::BTreeMap;
+
+use ecow::EcoString;
+use pdf_writer::types::AssociationKind;
+use pdf_writer::{Filter, Finish, Name, Ref, Str, TextStr};
+use typst_library::diag::{bail, SourceResult};
+use typst_library::foundations::{NativeElement, Packed, StyleChain};
+use typst_library::pdf::{EmbedElem, EmbeddedFileRelationship};
+
+use crate::catalog::{document_date, pdf_date};
+use crate::{deflate, NameExt, PdfChunk, StrExt, WithGlobalRefs};
+
+/// Query for all [`EmbedElem`] and write them and their file specifications.
+///
+/// This returns a map of embedding names and references so that we can later
+/// add them to the catalog's `/Names` dictionary.
+pub fn write_embedded_files(
+ ctx: &WithGlobalRefs,
+) -> SourceResult<(PdfChunk, BTreeMap<EcoString, Ref>)> {
+ let mut chunk = PdfChunk::new();
+ let mut embedded_files = BTreeMap::default();
+
+ let elements = ctx.document.introspector.query(&EmbedElem::elem().select());
+ for elem in &elements {
+ if !ctx.options.standards.embedded_files {
+ // PDF/A-2 requires embedded files to be PDF/A-1 or PDF/A-2,
+ // which we don't currently check.
+ bail!(
+ elem.span(),
+ "file embeddings are not currently supported for PDF/A-2";
+ hint: "PDF/A-3 supports arbitrary embedded files"
+ );
+ }
+
+ let embed = elem.to_packed::<EmbedElem>().unwrap();
+ if embed.resolved_path.len() > Str::PDFA_LIMIT {
+ bail!(embed.span(), "embedded file path is too long");
+ }
+
+ let id = embed_file(ctx, &mut chunk, embed)?;
+ if embedded_files.insert(embed.resolved_path.clone(), id).is_some() {
+ bail!(
+ elem.span(),
+ "duplicate embedded file for path `{}`", embed.resolved_path;
+ hint: "embedded file paths must be unique",
+ );
+ }
+ }
+
+ Ok((chunk, embedded_files))
+}
+
+/// Write the embedded file stream and its file specification.
+fn embed_file(
+ ctx: &WithGlobalRefs,
+ chunk: &mut PdfChunk,
+ embed: &Packed<EmbedElem>,
+) -> SourceResult<Ref> {
+ let embedded_file_stream_ref = chunk.alloc.bump();
+ let file_spec_dict_ref = chunk.alloc.bump();
+
+ let data = embed.data().as_slice();
+ let compressed = deflate(data);
+
+ let mut embedded_file = chunk.embedded_file(embedded_file_stream_ref, &compressed);
+ embedded_file.filter(Filter::FlateDecode);
+
+ if let Some(mime_type) = embed.mime_type(StyleChain::default()) {
+ if mime_type.len() > Name::PDFA_LIMIT {
+ bail!(embed.span(), "embedded file MIME type is too long");
+ }
+ embedded_file.subtype(Name(mime_type.as_bytes()));
+ } else if ctx.options.standards.pdfa {
+ bail!(embed.span(), "embedded files must have a MIME type in PDF/A-3");
+ }
+
+ let mut params = embedded_file.params();
+ params.size(data.len() as i32);
+
+ let (date, tz) = document_date(ctx.document.info.date, ctx.options.timestamp);
+ if let Some(pdf_date) = date.and_then(|date| pdf_date(date, tz)) {
+ params.modification_date(pdf_date);
+ } else if ctx.options.standards.pdfa {
+ bail!(
+ embed.span(),
+ "the document must have a date when embedding files in PDF/A-3";
+ hint: "`set document(date: none)` must not be used in this case"
+ );
+ }
+
+ params.finish();
+ embedded_file.finish();
+
+ let mut file_spec = chunk.file_spec(file_spec_dict_ref);
+ file_spec.path(Str(embed.resolved_path.as_bytes()));
+ file_spec.unic_file(TextStr(&embed.resolved_path));
+ file_spec
+ .insert(Name(b"EF"))
+ .dict()
+ .pair(Name(b"F"), embedded_file_stream_ref)
+ .pair(Name(b"UF"), embedded_file_stream_ref);
+
+ if ctx.options.standards.pdfa {
+ // PDF 2.0, but ISO 19005-3 (PDF/A-3) Annex E allows it for PDF/A-3.
+ file_spec.association_kind(match embed.relationship(StyleChain::default()) {
+ Some(EmbeddedFileRelationship::Source) => AssociationKind::Source,
+ Some(EmbeddedFileRelationship::Data) => AssociationKind::Data,
+ Some(EmbeddedFileRelationship::Alternative) => AssociationKind::Alternative,
+ Some(EmbeddedFileRelationship::Supplement) => AssociationKind::Supplement,
+ None => AssociationKind::Unspecified,
+ });
+ }
+
+ if let Some(description) = embed.description(StyleChain::default()) {
+ if description.len() > Str::PDFA_LIMIT {
+ bail!(embed.span(), "embedded file description is too long");
+ }
+ file_spec.description(TextStr(description));
+ }
+
+ Ok(file_spec_dict_ref)
+}
diff --git a/crates/typst-pdf/src/lib.rs b/crates/typst-pdf/src/lib.rs
index f45c62bb..88e62389 100644
--- a/crates/typst-pdf/src/lib.rs
+++ b/crates/typst-pdf/src/lib.rs
@@ -4,6 +4,7 @@ mod catalog;
mod color;
mod color_font;
mod content;
+mod embed;
mod extg;
mod font;
mod gradient;
@@ -14,12 +15,13 @@ mod page;
mod resources;
mod tiling;
-use std::collections::HashMap;
+use std::collections::{BTreeMap, HashMap};
use std::fmt::{self, Debug, Formatter};
use std::hash::Hash;
use std::ops::{Deref, DerefMut};
use base64::Engine;
+use ecow::EcoString;
use pdf_writer::{Chunk, Name, Pdf, Ref, Str, TextStr};
use serde::{Deserialize, Serialize};
use typst_library::diag::{bail, SourceResult, StrResult};
@@ -33,6 +35,7 @@ use typst_utils::Deferred;
use crate::catalog::write_catalog;
use crate::color::{alloc_color_functions_refs, ColorFunctionRefs};
use crate::color_font::{write_color_fonts, ColorFontSlice};
+use crate::embed::write_embedded_files;
use crate::extg::{write_graphic_states, ExtGState};
use crate::font::write_fonts;
use crate::gradient::{write_gradients, PdfGradient};
@@ -67,6 +70,7 @@ pub fn pdf(document: &PagedDocument, options: &PdfOptions) -> SourceResult<Vec<u
gradients: builder.run(write_gradients)?,
tilings: builder.run(write_tilings)?,
ext_gs: builder.run(write_graphic_states)?,
+ embedded_files: builder.run(write_embedded_files)?,
})
})?
.phase(|builder| builder.run(write_page_tree))?
@@ -147,16 +151,34 @@ pub enum Timezone {
/// Encapsulates a list of compatible PDF standards.
#[derive(Clone)]
pub struct PdfStandards {
- /// For now, we simplify to just PDF/A, since we only support PDF/A-2b. But
- /// it can be more fine-grained in the future.
+ /// For now, we simplify to just PDF/A. But it can be more fine-grained in
+ /// the future.
pub(crate) pdfa: bool,
+ /// Whether the standard allows for embedding any kind of file into the PDF.
+ /// We disallow this for PDF/A-2, since it only allows embedding
+ /// PDF/A-1 and PDF/A-2 documents.
+ pub(crate) embedded_files: bool,
+ /// Part of the PDF/A standard.
+ pub(crate) pdfa_part: Option<(i32, &'static str)>,
}
impl PdfStandards {
/// Validates a list of PDF standards for compatibility and returns their
/// encapsulated representation.
pub fn new(list: &[PdfStandard]) -> StrResult<Self> {
- Ok(Self { pdfa: list.contains(&PdfStandard::A_2b) })
+ let a2b = list.contains(&PdfStandard::A_2b);
+ let a3b = list.contains(&PdfStandard::A_3b);
+
+ if a2b && a3b {
+ bail!("PDF cannot conform to A-2B and A-3B at the same time")
+ }
+
+ let pdfa = a2b || a3b;
+ Ok(Self {
+ pdfa,
+ embedded_files: !a2b,
+ pdfa_part: pdfa.then_some((if a2b { 2 } else { 3 }, "B")),
+ })
}
}
@@ -166,10 +188,9 @@ impl Debug for PdfStandards {
}
}
-#[allow(clippy::derivable_impls)]
impl Default for PdfStandards {
fn default() -> Self {
- Self { pdfa: false }
+ Self { pdfa: false, embedded_files: true, pdfa_part: None }
}
}
@@ -186,6 +207,9 @@ pub enum PdfStandard {
/// PDF/A-2b.
#[serde(rename = "a-2b")]
A_2b,
+ /// PDF/A-3b.
+ #[serde(rename = "a-3b")]
+ A_3b,
}
/// A struct to build a PDF following a fixed succession of phases.
@@ -316,6 +340,8 @@ struct References {
tilings: HashMap<PdfTiling, Ref>,
/// The IDs of written external graphics states.
ext_gs: HashMap<ExtGState, Ref>,
+ /// The names and references for embedded files.
+ embedded_files: BTreeMap<EcoString, Ref>,
}
/// At this point, the references have been assigned to all resources. The page
@@ -481,6 +507,14 @@ impl<T: Eq + Hash, R: Renumber> Renumber for HashMap<T, R> {
}
}
+impl<T: Ord, R: Renumber> Renumber for BTreeMap<T, R> {
+ fn renumber(&mut self, offset: i32) {
+ for v in self.values_mut() {
+ v.renumber(offset);
+ }
+ }
+}
+
impl<R: Renumber> Renumber for Option<R> {
fn renumber(&mut self, offset: i32) {
if let Some(r) = self {