Switch PDF backend to `krilla` (#5420)

Co-authored-by: Laurenz <laurmaedje@gmail.com>
author: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> 2025-04-01 16:42:52 +0200
committer: GitHub <noreply@github.com> 2025-04-01 14:42:52 +0000
commit: 96dd67e011bb317cf78683bcf1edfdfca5e7b6b3 (patch)
tree: 900a0c4e7723af4289685af35d788041055ad4a2 /crates/typst-pdf/src/convert.rs
parent: 012e14d40cb44997630cf6469a446f217f2e9057 (diff)
1 files changed, 661 insertions, 0 deletions
diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs
new file mode 100644
index 00000000..f5ca3173
--- /dev/null
+++ b/crates/typst-pdf/src/convert.rs
@@ -0,0 +1,661 @@
+use std::collections::{BTreeMap, HashMap, HashSet};
+use std::num::NonZeroU64;
+
+use ecow::{eco_format, EcoVec};
+use krilla::annotation::Annotation;
+use krilla::configure::{Configuration, ValidationError, Validator};
+use krilla::destination::{NamedDestination, XyzDestination};
+use krilla::embed::EmbedError;
+use krilla::error::KrillaError;
+use krilla::geom::PathBuilder;
+use krilla::page::{PageLabel, PageSettings};
+use krilla::surface::Surface;
+use krilla::{Document, SerializeSettings};
+use krilla_svg::render_svg_glyph;
+use typst_library::diag::{bail, error, SourceDiagnostic, SourceResult};
+use typst_library::foundations::NativeElement;
+use typst_library::introspection::Location;
+use typst_library::layout::{
+    Abs, Frame, FrameItem, GroupItem, PagedDocument, Size, Transform,
+};
+use typst_library::model::HeadingElem;
+use typst_library::text::{Font, Lang};
+use typst_library::visualize::{Geometry, Paint};
+use typst_syntax::Span;
+
+use crate::embed::embed_files;
+use crate::image::handle_image;
+use crate::link::handle_link;
+use crate::metadata::build_metadata;
+use crate::outline::build_outline;
+use crate::page::PageLabelExt;
+use crate::shape::handle_shape;
+use crate::text::handle_text;
+use crate::util::{convert_path, display_font, AbsExt, TransformExt};
+use crate::PdfOptions;
+
+#[typst_macros::time(name = "convert document")]
+pub fn convert(
+    typst_document: &PagedDocument,
+    options: &PdfOptions,
+) -> SourceResult<Vec<u8>> {
+    let settings = SerializeSettings {
+        compress_content_streams: true,
+        no_device_cs: true,
+        ascii_compatible: false,
+        xmp_metadata: true,
+        cmyk_profile: None,
+        configuration: options.standards.config,
+        enable_tagging: false,
+        render_svg_glyph_fn: render_svg_glyph,
+    };
+
+    let mut document = Document::new_with(settings);
+    let page_index_converter = PageIndexConverter::new(typst_document, options);
+    let named_destinations =
+        collect_named_destinations(typst_document, &page_index_converter);
+    let mut gc = GlobalContext::new(
+        typst_document,
+        options,
+        named_destinations,
+        page_index_converter,
+    );
+
+    convert_pages(&mut gc, &mut document)?;
+    embed_files(typst_document, &mut document)?;
+
+    document.set_outline(build_outline(&gc));
+    document.set_metadata(build_metadata(&gc));
+
+    finish(document, gc, options.standards.config)
+}
+
+fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResult<()> {
+    for (i, typst_page) in gc.document.pages.iter().enumerate() {
+        if gc.page_index_converter.pdf_page_index(i).is_none() {
+            // Don't export this page.
+            continue;
+        } else {
+            let mut settings = PageSettings::new(
+                typst_page.frame.width().to_f32(),
+                typst_page.frame.height().to_f32(),
+            );
+
+            if let Some(label) = typst_page
+                .numbering
+                .as_ref()
+                .and_then(|num| PageLabel::generate(num, typst_page.number))
+                .or_else(|| {
+                    // When some pages were ignored from export, we show a page label with
+                    // the correct real (not logical) page number.
+                    // This is for consistency with normal output when pages have no numbering
+                    // and all are exported: the final PDF page numbers always correspond to
+                    // the real (not logical) page numbers. Here, the final PDF page number
+                    // will differ, but we can at least use labels to indicate what was
+                    // the corresponding real page number in the Typst document.
+                    gc.page_index_converter
+                        .has_skipped_pages()
+                        .then(|| PageLabel::arabic((i + 1) as u64))
+                })
+            {
+                settings = settings.with_page_label(label);
+            }
+
+            let mut page = document.start_page_with(settings);
+            let mut surface = page.surface();
+            let mut fc = FrameContext::new(typst_page.frame.size());
+
+            handle_frame(
+                &mut fc,
+                &typst_page.frame,
+                typst_page.fill_or_transparent(),
+                &mut surface,
+                gc,
+            )?;
+
+            surface.finish();
+
+            for annotation in fc.annotations {
+                page.add_annotation(annotation);
+            }
+        }
+    }
+
+    Ok(())
+}
+
+/// A state allowing us to keep track of transforms and container sizes,
+/// which is mainly needed to resolve gradients and patterns correctly.
+#[derive(Debug, Clone)]
+pub(crate) struct State {
+    /// The current transform.
+    transform: Transform,
+    /// The transform of first hard frame in the hierarchy.
+    container_transform: Transform,
+    /// The size of the first hard frame in the hierarchy.
+    container_size: Size,
+}
+
+impl State {
+    /// Creates a new, clean state for a given `size`.
+    fn new(size: Size) -> Self {
+        Self {
+            transform: Transform::identity(),
+            container_transform: Transform::identity(),
+            container_size: size,
+        }
+    }
+
+    pub(crate) fn register_container(&mut self, size: Size) {
+        self.container_transform = self.transform;
+        self.container_size = size;
+    }
+
+    pub(crate) fn pre_concat(&mut self, transform: Transform) {
+        self.transform = self.transform.pre_concat(transform);
+    }
+
+    pub(crate) fn transform(&self) -> Transform {
+        self.transform
+    }
+
+    pub(crate) fn container_transform(&self) -> Transform {
+        self.container_transform
+    }
+
+    pub(crate) fn container_size(&self) -> Size {
+        self.container_size
+    }
+}
+
+/// Context needed for converting a single frame.
+pub(crate) struct FrameContext {
+    states: Vec<State>,
+    annotations: Vec<Annotation>,
+}
+
+impl FrameContext {
+    pub(crate) fn new(size: Size) -> Self {
+        Self {
+            states: vec![State::new(size)],
+            annotations: vec![],
+        }
+    }
+
+    pub(crate) fn push(&mut self) {
+        self.states.push(self.states.last().unwrap().clone());
+    }
+
+    pub(crate) fn pop(&mut self) {
+        self.states.pop();
+    }
+
+    pub(crate) fn state(&self) -> &State {
+        self.states.last().unwrap()
+    }
+
+    pub(crate) fn state_mut(&mut self) -> &mut State {
+        self.states.last_mut().unwrap()
+    }
+
+    pub(crate) fn push_annotation(&mut self, annotation: Annotation) {
+        self.annotations.push(annotation);
+    }
+}
+
+/// Globally needed context for converting a typst document.
+pub(crate) struct GlobalContext<'a> {
+    /// Cache the conversion between krilla and Typst fonts (forward and backward).
+    pub(crate) fonts_forward: HashMap<Font, krilla::text::Font>,
+    pub(crate) fonts_backward: HashMap<krilla::text::Font, Font>,
+    /// Mapping between images and their span.
+    // Note: In theory, the same image can have multiple spans
+    // if it appears in the document multiple times. We just store the
+    // first appearance, though.
+    pub(crate) image_to_spans: HashMap<krilla::image::Image, Span>,
+    /// The spans of all images that appear in the document. We use this so
+    /// we can give more accurate error messages.
+    pub(crate) image_spans: HashSet<Span>,
+    /// The document to convert.
+    pub(crate) document: &'a PagedDocument,
+    /// Options for PDF export.
+    pub(crate) options: &'a PdfOptions<'a>,
+    /// Mapping between locations in the document and named destinations.
+    pub(crate) loc_to_names: HashMap<Location, NamedDestination>,
+    /// The languages used throughout the document.
+    pub(crate) languages: BTreeMap<Lang, usize>,
+    pub(crate) page_index_converter: PageIndexConverter,
+}
+
+impl<'a> GlobalContext<'a> {
+    pub(crate) fn new(
+        document: &'a PagedDocument,
+        options: &'a PdfOptions,
+        loc_to_names: HashMap<Location, NamedDestination>,
+        page_index_converter: PageIndexConverter,
+    ) -> GlobalContext<'a> {
+        Self {
+            fonts_forward: HashMap::new(),
+            fonts_backward: HashMap::new(),
+            document,
+            options,
+            loc_to_names,
+            image_to_spans: HashMap::new(),
+            image_spans: HashSet::new(),
+            languages: BTreeMap::new(),
+            page_index_converter,
+        }
+    }
+}
+
+#[typst_macros::time(name = "handle page")]
+pub(crate) fn handle_frame(
+    fc: &mut FrameContext,
+    frame: &Frame,
+    fill: Option<Paint>,
+    surface: &mut Surface,
+    gc: &mut GlobalContext,
+) -> SourceResult<()> {
+    fc.push();
+
+    if frame.kind().is_hard() {
+        fc.state_mut().register_container(frame.size());
+    }
+
+    if let Some(fill) = fill {
+        let shape = Geometry::Rect(frame.size()).filled(fill);
+        handle_shape(fc, &shape, surface, gc, Span::detached())?;
+    }
+
+    for (point, item) in frame.items() {
+        fc.push();
+        fc.state_mut().pre_concat(Transform::translate(point.x, point.y));
+
+        match item {
+            FrameItem::Group(g) => handle_group(fc, g, surface, gc)?,
+            FrameItem::Text(t) => handle_text(fc, t, surface, gc)?,
+            FrameItem::Shape(s, span) => handle_shape(fc, s, surface, gc, *span)?,
+            FrameItem::Image(image, size, span) => {
+                handle_image(gc, fc, image, *size, surface, *span)?
+            }
+            FrameItem::Link(d, s) => handle_link(fc, gc, d, *s),
+            FrameItem::Tag(_) => {}
+        }
+
+        fc.pop();
+    }
+
+    fc.pop();
+
+    Ok(())
+}
+
+pub(crate) fn handle_group(
+    fc: &mut FrameContext,
+    group: &GroupItem,
+    surface: &mut Surface,
+    context: &mut GlobalContext,
+) -> SourceResult<()> {
+    fc.push();
+    fc.state_mut().pre_concat(group.transform);
+
+    let clip_path = group
+        .clip
+        .as_ref()
+        .and_then(|p| {
+            let mut builder = PathBuilder::new();
+            convert_path(p, &mut builder);
+            builder.finish()
+        })
+        .and_then(|p| p.transform(fc.state().transform.to_krilla()));
+
+    if let Some(clip_path) = &clip_path {
+        surface.push_clip_path(clip_path, &krilla::paint::FillRule::NonZero);
+    }
+
+    handle_frame(fc, &group.frame, None, surface, context)?;
+
+    if clip_path.is_some() {
+        surface.pop();
+    }
+
+    fc.pop();
+
+    Ok(())
+}
+
+#[typst_macros::time(name = "finish export")]
+/// Finish a krilla document and handle export errors.
+fn finish(
+    document: Document,
+    gc: GlobalContext,
+    configuration: Configuration,
+) -> SourceResult<Vec<u8>> {
+    let validator = configuration.validator();
+
+    match document.finish() {
+        Ok(r) => Ok(r),
+        Err(e) => match e {
+            KrillaError::Font(f, s) => {
+                let font_str = display_font(gc.fonts_backward.get(&f).unwrap());
+                bail!(
+                    Span::detached(),
+                    "failed to process font {font_str}: {s}";
+                    hint: "make sure the font is valid";
+                    hint: "the used font might be unsupported by Typst"
+                );
+            }
+            KrillaError::Validation(ve) => {
+                let errors = ve
+                    .iter()
+                    .map(|e| convert_error(&gc, validator, e))
+                    .collect::<EcoVec<_>>();
+                Err(errors)
+            }
+            KrillaError::Image(_, loc) => {
+                let span = to_span(loc);
+                bail!(span, "failed to process image");
+            }
+            KrillaError::SixteenBitImage(image, _) => {
+                let span = gc.image_to_spans.get(&image).unwrap();
+                bail!(
+                    *span, "16 bit images are not supported in this export mode";
+                    hint: "convert the image to 8 bit instead"
+                )
+            }
+        },
+    }
+}
+
+/// Converts a krilla error into a Typst error.
+fn convert_error(
+    gc: &GlobalContext,
+    validator: Validator,
+    error: &ValidationError,
+) -> SourceDiagnostic {
+    let prefix = eco_format!("{} error:", validator.as_str());
+    match error {
+        ValidationError::TooLongString => error!(
+            Span::detached(),
+            "{prefix} a PDF string is longer than 32767 characters";
+            hint: "ensure title and author names are short enough"
+        ),
+        // Should in theory never occur, as krilla always trims font names.
+        ValidationError::TooLongName => error!(
+            Span::detached(),
+            "{prefix} a PDF name is longer than 127 characters";
+            hint: "perhaps a font name is too long"
+        ),
+
+        ValidationError::TooLongArray => error!(
+            Span::detached(),
+            "{prefix} a PDF array is longer than 8191 elements";
+            hint: "this can happen if you have a very long text in a single line"
+        ),
+        ValidationError::TooLongDictionary => error!(
+            Span::detached(),
+            "{prefix} a PDF dictionary has more than 4095 entries";
+            hint: "try reducing the complexity of your document"
+        ),
+        ValidationError::TooLargeFloat => error!(
+            Span::detached(),
+            "{prefix} a PDF floating point number is larger than the allowed limit";
+            hint: "try exporting with a higher PDF version"
+        ),
+        ValidationError::TooManyIndirectObjects => error!(
+            Span::detached(),
+            "{prefix} the PDF has too many indirect objects";
+            hint: "reduce the size of your document"
+        ),
+        // Can only occur if we have 27+ nested clip paths
+        ValidationError::TooHighQNestingLevel => error!(
+            Span::detached(),
+            "{prefix} the PDF has too high q nesting";
+            hint: "reduce the number of nested containers"
+        ),
+        ValidationError::ContainsPostScript(loc) => error!(
+            to_span(*loc),
+            "{prefix} the PDF contains PostScript code";
+            hint: "conic gradients are not supported in this PDF standard"
+        ),
+        ValidationError::MissingCMYKProfile => error!(
+            Span::detached(),
+            "{prefix} the PDF is missing a CMYK profile";
+            hint: "CMYK colors are not yet supported in this export mode"
+        ),
+        ValidationError::ContainsNotDefGlyph(f, loc, text) => error!(
+            to_span(*loc),
+            "{prefix} the text '{text}' cannot be displayed using {}",
+            display_font(gc.fonts_backward.get(f).unwrap());
+            hint: "try using a different font"
+        ),
+        ValidationError::InvalidCodepointMapping(_, _, cp, loc) => {
+            if let Some(c) = cp.map(|c| eco_format!("{:#06x}", c as u32)) {
+                let msg = if loc.is_some() {
+                    "the PDF contains text with"
+                } else {
+                    "the text contains"
+                };
+                error!(to_span(*loc), "{prefix} {msg} the disallowed codepoint {c}")
+            } else {
+                // I think this code path is in theory unreachable,
+                // but just to be safe.
+                let msg = if loc.is_some() {
+                    "the PDF contains text with missing codepoints"
+                } else {
+                    "the text was not mapped to a code point"
+                };
+                error!(
+                    to_span(*loc),
+                    "{prefix} {msg}";
+                    hint: "for complex scripts like Arabic, it might not be \
+                           possible to produce a compliant document"
+                )
+            }
+        }
+        ValidationError::UnicodePrivateArea(_, _, c, loc) => {
+            let code_point = eco_format!("{:#06x}", *c as u32);
+            let msg = if loc.is_some() { "the PDF" } else { "the text" };
+            error!(
+                to_span(*loc),
+                "{prefix} {msg} contains the codepoint {code_point}";
+                hint: "codepoints from the Unicode private area are \
+                       forbidden in this export mode"
+            )
+        }
+        ValidationError::Transparency(loc) => {
+            let span = to_span(*loc);
+            let hint1 = "try exporting with a different standard that \
+                         supports transparency";
+            if loc.is_some() {
+                if gc.image_spans.contains(&span) {
+                    error!(
+                        span, "{prefix} the image contains transparency";
+                        hint: "{hint1}";
+                        hint: "or convert the image to a non-transparent one";
+                        hint: "you might have to convert SVGs into \
+                               non-transparent bitmap images"
+                    )
+                } else {
+                    error!(
+                        span, "{prefix} the used fill or stroke has transparency";
+                        hint: "{hint1}";
+                        hint: "or don't use colors with transparency in \
+                               this export mode"
+                    )
+                }
+            } else {
+                error!(
+                    span, "{prefix} the PDF contains transparency";
+                    hint: "{hint1}"
+                )
+            }
+        }
+        ValidationError::ImageInterpolation(loc) => {
+            let span = to_span(*loc);
+            if loc.is_some() {
+                error!(
+                    span, "{prefix} the image has smooth scaling";
+                    hint: "set the `scaling` attribute to `pixelated`"
+                )
+            } else {
+                error!(
+                    span, "{prefix} an image in the PDF has smooth scaling";
+                    hint: "set the `scaling` attribute of all images to `pixelated`"
+                )
+            }
+        }
+        ValidationError::EmbeddedFile(e, s) => {
+            // We always set the span for embedded files, so it cannot be detached.
+            let span = to_span(*s);
+            match e {
+                EmbedError::Existence => {
+                    error!(
+                        span, "{prefix} document contains an embedded file";
+                        hint: "embedded files are not supported in this export mode"
+                    )
+                }
+                EmbedError::MissingDate => {
+                    error!(
+                        span, "{prefix} document date is missing";
+                        hint: "the document must have a date when embedding files";
+                        hint: "`set document(date: none)` must not be used in this case"
+                    )
+                }
+                EmbedError::MissingDescription => {
+                    error!(span, "{prefix} the file description is missing")
+                }
+                EmbedError::MissingMimeType => {
+                    error!(span, "{prefix} the file mime type is missing")
+                }
+            }
+        }
+        // The below errors cannot occur yet, only once Typst supports full PDF/A
+        // and PDF/UA. But let's still add a message just to be on the safe side.
+        ValidationError::MissingAnnotationAltText => error!(
+            Span::detached(),
+            "{prefix} missing annotation alt text";
+            hint: "please report this as a bug"
+        ),
+        ValidationError::MissingAltText => error!(
+            Span::detached(),
+            "{prefix} missing alt text";
+            hint: "make sure your images and equations have alt text"
+        ),
+        ValidationError::NoDocumentLanguage => error!(
+            Span::detached(),
+            "{prefix} missing document language";
+            hint: "set the language of the document"
+        ),
+        // Needs to be set by typst-pdf.
+        ValidationError::MissingHeadingTitle => error!(
+            Span::detached(),
+            "{prefix} missing heading title";
+            hint: "please report this as a bug"
+        ),
+        ValidationError::MissingDocumentOutline => error!(
+            Span::detached(),
+            "{prefix} missing document outline";
+            hint: "please report this as a bug"
+        ),
+        ValidationError::MissingTagging => error!(
+            Span::detached(),
+            "{prefix} missing document tags";
+            hint: "please report this as a bug"
+        ),
+        ValidationError::NoDocumentTitle => error!(
+            Span::detached(),
+            "{prefix} missing document title";
+            hint: "set the title of the document"
+        ),
+        ValidationError::MissingDocumentDate => error!(
+            Span::detached(),
+            "{prefix} missing document date";
+            hint: "set the date of the document"
+        ),
+    }
+}
+
+/// Convert a krilla location to a span.
+fn to_span(loc: Option<krilla::surface::Location>) -> Span {
+    loc.map(|l| Span::from_raw(NonZeroU64::new(l).unwrap()))
+        .unwrap_or(Span::detached())
+}
+
+fn collect_named_destinations(
+    document: &PagedDocument,
+    pic: &PageIndexConverter,
+) -> HashMap<Location, NamedDestination> {
+    let mut locs_to_names = HashMap::new();
+
+    // Find all headings that have a label and are the first among other
+    // headings with the same label.
+    let matches: Vec<_> = {
+        let mut seen = HashSet::new();
+        document
+            .introspector
+            .query(&HeadingElem::elem().select())
+            .iter()
+            .filter_map(|elem| elem.location().zip(elem.label()))
+            .filter(|&(_, label)| seen.insert(label))
+            .collect()
+    };
+
+    for (loc, label) in matches {
+        let pos = document.introspector.position(loc);
+        let index = pos.page.get() - 1;
+        // We are subtracting 10 because the position of links e.g. to headings is always at the
+        // baseline and if you link directly to it, the text will not be visible
+        // because it is right above.
+        let y = (pos.point.y - Abs::pt(10.0)).max(Abs::zero());
+
+        // Only add named destination if page belonging to the position is exported.
+        if let Some(index) = pic.pdf_page_index(index) {
+            let named = NamedDestination::new(
+                label.resolve().to_string(),
+                XyzDestination::new(
+                    index,
+                    krilla::geom::Point::from_xy(pos.point.x.to_f32(), y.to_f32()),
+                ),
+            );
+            locs_to_names.insert(loc, named);
+        }
+    }
+
+    locs_to_names
+}
+
+pub(crate) struct PageIndexConverter {
+    page_indices: HashMap<usize, usize>,
+    skipped_pages: usize,
+}
+
+impl PageIndexConverter {
+    pub fn new(document: &PagedDocument, options: &PdfOptions) -> Self {
+        let mut page_indices = HashMap::new();
+        let mut skipped_pages = 0;
+
+        for i in 0..document.pages.len() {
+            if options
+                .page_ranges
+                .as_ref()
+                .is_some_and(|ranges| !ranges.includes_page_index(i))
+            {
+                skipped_pages += 1;
+            } else {
+                page_indices.insert(i, i - skipped_pages);
+            }
+        }
+
+        Self { page_indices, skipped_pages }
+    }
+
+    pub(crate) fn has_skipped_pages(&self) -> bool {
+        self.skipped_pages > 0
+    }
+
+    /// Get the PDF page index of a page index, if it's not excluded.
+    pub(crate) fn pdf_page_index(&self, page_index: usize) -> Option<usize> {
+        self.page_indices.get(&page_index).copied()
+    }
+}
author	Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com>	2025-04-01 16:42:52 +0200
committer	GitHub <noreply@github.com>	2025-04-01 14:42:52 +0000
commit	96dd67e011bb317cf78683bcf1edfdfca5e7b6b3 (patch)
tree	900a0c4e7723af4289685af35d788041055ad4a2 /crates/typst-pdf/src/convert.rs
parent	012e14d40cb44997630cf6469a446f217f2e9057 (diff)