summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTobias Schmitz <tobiasschmitz2001@gmail.com>2025-06-02 12:02:31 +0200
committerTobias Schmitz <tobiasschmitz2001@gmail.com>2025-07-03 18:41:48 +0200
commit19804305783ee47b0ccc2874c53781d1dff48711 (patch)
treeb632bed2739da61e1118efe91b1ccf111ec95d05
parentcc70a785ddea08375db23d26e2fd6df7f11b5e62 (diff)
feat: mark artifacts
-rw-r--r--crates/typst-layout/src/pages/finalize.rs39
-rw-r--r--crates/typst-layout/src/pages/mod.rs10
-rw-r--r--crates/typst-layout/src/pages/run.rs2
-rw-r--r--crates/typst-library/src/layout/page.rs24
-rw-r--r--crates/typst-library/src/model/outline.rs8
-rw-r--r--crates/typst-pdf/src/convert.rs40
-rw-r--r--crates/typst-pdf/src/tags.rs117
7 files changed, 174 insertions, 66 deletions
diff --git a/crates/typst-layout/src/pages/finalize.rs b/crates/typst-layout/src/pages/finalize.rs
index b16d9569..543dbb0c 100644
--- a/crates/typst-layout/src/pages/finalize.rs
+++ b/crates/typst-layout/src/pages/finalize.rs
@@ -1,7 +1,10 @@
use typst_library::diag::SourceResult;
use typst_library::engine::Engine;
-use typst_library::introspection::{ManualPageCounter, Tag};
-use typst_library::layout::{Frame, FrameItem, Page, Point};
+use typst_library::foundations::{Content, NativeElement};
+use typst_library::introspection::{ManualPageCounter, SplitLocator, Tag};
+use typst_library::layout::{
+ ArtifactKind, ArtifactMarker, Frame, FrameItem, Page, Point,
+};
use super::LayoutedPage;
@@ -10,6 +13,7 @@ use super::LayoutedPage;
/// physical page number, which is unknown during parallel layout.
pub fn finalize(
engine: &mut Engine,
+ locator: &mut SplitLocator,
counter: &mut ManualPageCounter,
tags: &mut Vec<Tag>,
LayoutedPage {
@@ -45,10 +49,12 @@ pub fn finalize(
// important as it affects the relative ordering of introspectable elements
// and thus how counters resolve.
if let Some(background) = background {
- frame.push_frame(Point::zero(), background);
+ let tag = ArtifactMarker::new(ArtifactKind::Page).pack();
+ push_tagged(engine, locator, &mut frame, Point::zero(), background, tag);
}
if let Some(header) = header {
- frame.push_frame(Point::with_x(margin.left), header);
+ let tag = ArtifactMarker::new(ArtifactKind::Header).pack();
+ push_tagged(engine, locator, &mut frame, Point::with_x(margin.left), header, tag);
}
// Add the inner contents.
@@ -57,7 +63,8 @@ pub fn finalize(
// Add the "after" marginals.
if let Some(footer) = footer {
let y = frame.height() - footer.height();
- frame.push_frame(Point::new(margin.left, y), footer);
+ let tag = ArtifactMarker::new(ArtifactKind::Footer).pack();
+ push_tagged(engine, locator, &mut frame, Point::new(margin.left, y), footer, tag);
}
if let Some(foreground) = foreground {
frame.push_frame(Point::zero(), foreground);
@@ -72,3 +79,25 @@ pub fn finalize(
Ok(Page { frame, fill, numbering, supplement, number })
}
+
+fn push_tagged(
+ engine: &mut Engine,
+ locator: &mut SplitLocator,
+ frame: &mut Frame,
+ mut pos: Point,
+ inner: Frame,
+ mut tag: Content,
+) {
+ // TODO: use general PDF Tagged/Artifact element that wraps some content and
+ // is also available to the user.
+ let key = typst_utils::hash128(&tag);
+ let loc = locator.next_location(engine.introspector, key);
+ tag.set_location(loc);
+ frame.push(pos, FrameItem::Tag(Tag::Start(tag)));
+
+ let height = inner.height();
+ frame.push_frame(pos, inner);
+
+ pos.y += height;
+ frame.push(pos, FrameItem::Tag(Tag::End(loc, key)));
+}
diff --git a/crates/typst-layout/src/pages/mod.rs b/crates/typst-layout/src/pages/mod.rs
index 14dc0f3f..a64fee4b 100644
--- a/crates/typst-layout/src/pages/mod.rs
+++ b/crates/typst-layout/src/pages/mod.rs
@@ -123,17 +123,19 @@ fn layout_pages<'a>(
Item::Run(..) => {
let layouted = runs.next().unwrap()?;
for layouted in layouted {
- let page = finalize(engine, &mut counter, &mut tags, layouted)?;
+ let page =
+ finalize(engine, locator, &mut counter, &mut tags, layouted)?;
pages.push(page);
}
}
- Item::Parity(parity, initial, locator) => {
+ Item::Parity(parity, initial, page_locator) => {
if !parity.matches(pages.len()) {
continue;
}
- let layouted = layout_blank_page(engine, locator.relayout(), *initial)?;
- let page = finalize(engine, &mut counter, &mut tags, layouted)?;
+ let layouted =
+ layout_blank_page(engine, page_locator.relayout(), *initial)?;
+ let page = finalize(engine, locator, &mut counter, &mut tags, layouted)?;
pages.push(page);
}
Item::Tags(items) => {
diff --git a/crates/typst-layout/src/pages/run.rs b/crates/typst-layout/src/pages/run.rs
index e9e4e110..6d2d29da 100644
--- a/crates/typst-layout/src/pages/run.rs
+++ b/crates/typst-layout/src/pages/run.rs
@@ -185,8 +185,6 @@ fn layout_page_run_impl(
)?;
// Layouts a single marginal.
- // TODO: add some sort of tag that indicates the marginals and use it to
- // mark them as artifacts for PDF/UA.
let mut layout_marginal = |content: &Option<Content>, area, align| {
let Some(content) = content else { return Ok(None) };
let aligned = content.clone().styled(AlignElem::set_alignment(align));
diff --git a/crates/typst-library/src/layout/page.rs b/crates/typst-library/src/layout/page.rs
index 98afbd06..b6fa5d0b 100644
--- a/crates/typst-library/src/layout/page.rs
+++ b/crates/typst-library/src/layout/page.rs
@@ -10,7 +10,7 @@ use crate::foundations::{
cast, elem, Args, AutoValue, Cast, Construct, Content, Dict, Fold, NativeElement,
Set, Smart, Value,
};
-use crate::introspection::Introspector;
+use crate::introspection::{Introspector, Locatable};
use crate::layout::{
Abs, Alignment, FlushElem, Frame, HAlignment, Length, OuterVAlignment, Ratio, Rel,
Sides, SpecificAlignment,
@@ -451,6 +451,28 @@ impl PagebreakElem {
}
}
+// HACK: this should probably not be an element
+#[derive(Copy)]
+#[elem(Construct, Locatable)]
+pub struct ArtifactMarker {
+ #[internal]
+ #[required]
+ pub kind: ArtifactKind,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum ArtifactKind {
+ Header,
+ Footer,
+ Page,
+}
+
+impl Construct for ArtifactMarker {
+ fn construct(_: &mut Engine, args: &mut Args) -> SourceResult<Content> {
+ bail!(args.span, "cannot be constructed manually");
+ }
+}
+
/// A finished document with metadata and page frames.
#[derive(Debug, Default, Clone)]
pub struct PagedDocument {
diff --git a/crates/typst-library/src/model/outline.rs b/crates/typst-library/src/model/outline.rs
index 11ecc23d..bcdd1565 100644
--- a/crates/typst-library/src/model/outline.rs
+++ b/crates/typst-library/src/model/outline.rs
@@ -18,7 +18,8 @@ use crate::introspection::{
Counter, CounterKey, Introspector, Locatable, Location, Locator, LocatorLink,
};
use crate::layout::{
- Abs, Axes, BlockBody, BlockElem, BoxElem, Dir, Em, Fr, HElem, Length, PageElem, Region, Rel, RepeatElem, Sides
+ Abs, Axes, BlockBody, BlockElem, BoxElem, Dir, Em, Fr, HElem, Length, PageElem,
+ Region, Rel, RepeatElem, Sides,
};
use crate::math::EquationElem;
use crate::model::{Destination, HeadingElem, NumberingPattern, ParElem, Refable};
@@ -426,9 +427,9 @@ impl Show for Packed<OutlineEntry> {
let body = body.plain_text();
let page_str = PageElem::local_name_in(styles);
let page_nr = page.plain_text();
- eco_format!("{prefix} {body} {page_str} {page_nr}")
+ eco_format!("{prefix} \"{body}\", {page_str} {page_nr}")
};
- let inner = self.inner(engine, context, span, body, page)?;
+ let inner = self.inner(context, span, body, page)?;
let block = if self.element.is::<EquationElem>() {
let body = prefix.unwrap_or_default() + inner;
BlockElem::new()
@@ -575,7 +576,6 @@ impl OutlineEntry {
#[func(contextual)]
pub fn inner(
&self,
- engine: &mut Engine,
context: Tracked<Context>,
span: Span,
body: Content,
diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs
index aeb2cbf9..c53fd1e2 100644
--- a/crates/typst-pdf/src/convert.rs
+++ b/crates/typst-pdf/src/convert.rs
@@ -10,7 +10,6 @@ use krilla::error::KrillaError;
use krilla::geom::PathBuilder;
use krilla::page::{PageLabel, PageSettings};
use krilla::surface::Surface;
-use krilla::tagging::{ArtifactType, ContentTag, Node};
use krilla::{Document, SerializeSettings};
use krilla_svg::render_svg_glyph;
use typst_library::diag::{bail, error, SourceDiagnostic, SourceResult};
@@ -31,7 +30,7 @@ use crate::metadata::build_metadata;
use crate::outline::build_outline;
use crate::page::PageLabelExt;
use crate::shape::handle_shape;
-use crate::tags::{handle_close_tag, handle_open_tag, Placeholder, TagNode, Tags};
+use crate::tags::{self, Placeholder, Tags};
use crate::text::handle_text;
use crate::util::{convert_path, display_font, AbsExt, TransformExt};
use crate::PdfOptions;
@@ -42,17 +41,15 @@ pub fn convert(
options: &PdfOptions,
) -> SourceResult<Vec<u8>> {
// HACK
- // let config = Configuration::new();
let config = Configuration::new_with_validator(Validator::UA1);
let settings = SerializeSettings {
- compress_content_streams: true,
+ compress_content_streams: false, // true,
no_device_cs: true,
- ascii_compatible: false,
+ ascii_compatible: true, // false,
xmp_metadata: true,
cmyk_profile: None,
- configuration: config,
- // TODO: Should we just set this to false? If set to `false` this will
- // automatically be enabled if the `UA1` validator is used.
+ configuration: config, // options.standards.config,
+ // TODO: allow opting out of tagging PDFs
enable_tagging: true,
render_svg_glyph_fn: render_svg_glyph,
};
@@ -114,18 +111,7 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul
let mut surface = page.surface();
let mut fc = FrameContext::new(typst_page.frame.size());
- // Marked-content may not cross page boundaries: reopen tag
- // that was closed at the end of the last page.
- if let Some((_, _, nodes)) = gc.tags.stack.last_mut() {
- let tag = if gc.tags.in_artifact {
- ContentTag::Artifact(ArtifactType::Other)
- } else {
- ContentTag::Other
- };
- // TODO: somehow avoid empty marked-content sequences
- let id = surface.start_tagged(tag);
- nodes.push(TagNode::Leaf(id));
- }
+ tags::restart(gc, &mut surface);
handle_frame(
&mut fc,
@@ -135,17 +121,11 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul
gc,
)?;
- // Marked-content may not cross page boundaries: close open tag.
- if !gc.tags.stack.is_empty() {
- surface.end_tagged();
- }
+ tags::end_open(gc, &mut surface);
surface.finish();
- for (placeholder, annotation) in fc.annotations {
- let annotation_id = page.add_tagged_annotation(annotation);
- gc.tags.init_placeholder(placeholder, Node::Leaf(annotation_id));
- }
+ tags::add_annotations(gc, &mut page, fc.annotations);
}
}
@@ -318,10 +298,10 @@ pub(crate) fn handle_frame(
handle_link(fc, gc, alt.as_ref().map(EcoString::to_string), dest, *size)
}
FrameItem::Tag(introspection::Tag::Start(elem)) => {
- handle_open_tag(gc, surface, elem)
+ tags::handle_start(gc, surface, elem)
}
FrameItem::Tag(introspection::Tag::End(loc, _)) => {
- handle_close_tag(gc, surface, loc);
+ tags::handle_end(gc, surface, loc);
}
}
diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs
index 2c43c849..ae15674f 100644
--- a/crates/typst-pdf/src/tags.rs
+++ b/crates/typst-pdf/src/tags.rs
@@ -1,9 +1,15 @@
use std::cell::OnceCell;
+use std::ops::Deref;
+use krilla::annotation::Annotation;
+use krilla::page::Page;
use krilla::surface::Surface;
-use krilla::tagging::{ContentTag, Identifier, Node, Tag, TagGroup, TagTree};
+use krilla::tagging::{
+ ArtifactType, ContentTag, Identifier, Node, Tag, TagGroup, TagTree,
+};
use typst_library::foundations::{Content, StyleChain};
use typst_library::introspection::Location;
+use typst_library::layout::{ArtifactKind, ArtifactMarker};
use typst_library::model::{HeadingElem, OutlineElem, OutlineEntry};
use crate::convert::GlobalContext;
@@ -12,7 +18,7 @@ pub(crate) struct Tags {
/// The intermediary stack of nested tag groups.
pub(crate) stack: Vec<(Location, Tag, Vec<TagNode>)>,
pub(crate) placeholders: Vec<OnceCell<Node>>,
- pub(crate) in_artifact: bool,
+ pub(crate) in_artifact: Option<(Location, ArtifactMarker)>,
/// The output.
pub(crate) tree: Vec<TagNode>,
@@ -34,7 +40,7 @@ impl Tags {
Self {
stack: Vec::new(),
placeholders: Vec::new(),
- in_artifact: false,
+ in_artifact: None,
tree: Vec::new(),
}
@@ -93,7 +99,16 @@ impl Tags {
}
}
- pub(crate) fn context_supports(&self, tag: &Tag) -> bool {
+ /// Returns the current parent's list of children and whether it is the tree root.
+ fn parent_nodes(&mut self) -> (bool, &mut Vec<TagNode>) {
+ if let Some((_, _, parent_nodes)) = self.stack.last_mut() {
+ (false, parent_nodes)
+ } else {
+ (true, &mut self.tree)
+ }
+ }
+
+ fn context_supports(&self, tag: &Tag) -> bool {
let Some((_, parent, _)) = self.stack.last() else { return true };
use Tag::*;
@@ -142,16 +157,57 @@ impl Tags {
}
}
-pub(crate) fn handle_open_tag(
+/// Marked-content may not cross page boundaries: restart tag that was still open
+/// at the end of the last page.
+pub(crate) fn restart(gc: &mut GlobalContext, surface: &mut Surface) {
+ // TODO: somehow avoid empty marked-content sequences
+ if let Some((_, marker)) = gc.tags.in_artifact {
+ start_artifact(gc, surface, marker.kind);
+ } else if let Some((_, _, nodes)) = gc.tags.stack.last_mut() {
+ let id = surface.start_tagged(ContentTag::Other);
+ nodes.push(TagNode::Leaf(id));
+ }
+}
+
+/// Marked-content may not cross page boundaries: end any open tag.
+pub(crate) fn end_open(gc: &mut GlobalContext, surface: &mut Surface) {
+ if !gc.tags.stack.is_empty() || gc.tags.in_artifact.is_some() {
+ surface.end_tagged();
+ }
+}
+
+/// Add all annotations that were found in the page frame.
+pub(crate) fn add_annotations(
+ gc: &mut GlobalContext,
+ page: &mut Page,
+ annotations: Vec<(Placeholder, Annotation)>,
+) {
+ for (placeholder, annotation) in annotations {
+ let annotation_id = page.add_tagged_annotation(annotation);
+ gc.tags.init_placeholder(placeholder, Node::Leaf(annotation_id));
+ }
+}
+
+pub(crate) fn handle_start(
gc: &mut GlobalContext,
surface: &mut Surface,
elem: &Content,
) {
- if gc.tags.in_artifact {
+ if gc.tags.in_artifact.is_some() {
+ // Don't nest artifacts
return;
}
- let Some(loc) = elem.location() else { return };
+ let loc = elem.location().unwrap();
+
+ if let Some(marker) = elem.to_packed::<ArtifactMarker>() {
+ if !gc.tags.stack.is_empty() {
+ surface.end_tagged();
+ }
+ start_artifact(gc, surface, marker.kind);
+ gc.tags.in_artifact = Some((loc, *marker.deref()));
+ return;
+ }
let tag = if let Some(heading) = elem.to_packed::<HeadingElem>() {
let level = heading.resolve_level(StyleChain::default());
@@ -181,29 +237,50 @@ pub(crate) fn handle_open_tag(
if !gc.tags.stack.is_empty() {
surface.end_tagged();
}
- let content_id = surface.start_tagged(krilla::tagging::ContentTag::Other);
-
- gc.tags.stack.push((loc, tag, vec![TagNode::Leaf(content_id)]));
+ let id = surface.start_tagged(krilla::tagging::ContentTag::Other);
+ gc.tags.stack.push((loc, tag, vec![TagNode::Leaf(id)]));
}
-pub(crate) fn handle_close_tag(
- gc: &mut GlobalContext,
- surface: &mut Surface,
- loc: &Location,
-) {
+pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: &Location) {
+ if let Some((l, _)) = &gc.tags.in_artifact {
+ if l == loc {
+ gc.tags.in_artifact = None;
+ surface.end_tagged();
+ if let Some((_, _, nodes)) = gc.tags.stack.last_mut() {
+ let id = surface.start_tagged(ContentTag::Other);
+ nodes.push(TagNode::Leaf(id));
+ }
+ }
+ return;
+ }
+
let Some((_, tag, nodes)) = gc.tags.stack.pop_if(|(l, ..)| l == loc) else {
return;
};
surface.end_tagged();
- if let Some((_, _, parent_nodes)) = gc.tags.stack.last_mut() {
- parent_nodes.push(TagNode::Group(tag, nodes));
-
+ let (is_root, parent_nodes) = gc.tags.parent_nodes();
+ parent_nodes.push(TagNode::Group(tag, nodes));
+ if !is_root {
// TODO: somehow avoid empty marked-content sequences
let id = surface.start_tagged(ContentTag::Other);
parent_nodes.push(TagNode::Leaf(id));
- } else {
- gc.tags.tree.push(TagNode::Group(tag, nodes));
+ }
+}
+
+fn start_artifact(gc: &mut GlobalContext, surface: &mut Surface, kind: ArtifactKind) {
+ let ty = artifact_type(kind);
+ let id = surface.start_tagged(ContentTag::Artifact(ty));
+
+ let (_, parent_nodes) = gc.tags.parent_nodes();
+ parent_nodes.push(TagNode::Leaf(id));
+}
+
+fn artifact_type(kind: ArtifactKind) -> ArtifactType {
+ match kind {
+ ArtifactKind::Header => ArtifactType::Header,
+ ArtifactKind::Footer => ArtifactType::Footer,
+ ArtifactKind::Page => ArtifactType::Page,
}
}