summaryrefslogtreecommitdiff
path: root/crates
diff options
context:
space:
mode:
authorTobias Schmitz <tobiasschmitz2001@gmail.com>2025-05-22 12:03:10 +0200
committerTobias Schmitz <tobiasschmitz2001@gmail.com>2025-07-03 15:58:07 +0200
commitc6b3b371b00403ae5d7ebd74c52809382f055bcb (patch)
treea74ff12923580ad893c9db133f85b1adcfca46cb /crates
parentab7eea23f15e506505743c2016afce5f611b4f59 (diff)
feat: [WIP] write tags
skip-checks:true
Diffstat (limited to 'crates')
-rw-r--r--crates/typst-layout/src/pages/run.rs2
-rw-r--r--crates/typst-pdf/src/convert.rs67
-rw-r--r--crates/typst-pdf/src/lib.rs1
-rw-r--r--crates/typst-pdf/src/tags.rs149
4 files changed, 182 insertions, 37 deletions
diff --git a/crates/typst-layout/src/pages/run.rs b/crates/typst-layout/src/pages/run.rs
index 6d2d29da..e9e4e110 100644
--- a/crates/typst-layout/src/pages/run.rs
+++ b/crates/typst-layout/src/pages/run.rs
@@ -185,6 +185,8 @@ fn layout_page_run_impl(
)?;
// Layouts a single marginal.
+ // TODO: add some sort of tag that indicates the marginals and use it to
+ // mark them as artifacts for PDF/UA.
let mut layout_marginal = |content: &Option<Content>, area, align| {
let Some(content) = content else { return Ok(None) };
let aligned = content.clone().styled(AlignElem::set_alignment(align));
diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs
index 94925756..cd165b12 100644
--- a/crates/typst-pdf/src/convert.rs
+++ b/crates/typst-pdf/src/convert.rs
@@ -10,11 +10,11 @@ use krilla::error::KrillaError;
use krilla::geom::PathBuilder;
use krilla::page::{PageLabel, PageSettings};
use krilla::surface::Surface;
-use krilla::tagging::{Node, SpanTag, Tag, TagGroup, TagTree};
+use krilla::tagging::{ArtifactType, ContentTag, Node};
use krilla::{Document, SerializeSettings};
use krilla_svg::render_svg_glyph;
use typst_library::diag::{bail, error, SourceDiagnostic, SourceResult};
-use typst_library::foundations::{NativeElement, Repr, StyleChain};
+use typst_library::foundations::{NativeElement, Repr};
use typst_library::introspection::{self, Location};
use typst_library::layout::{
Abs, Frame, FrameItem, GroupItem, PagedDocument, Size, Transform,
@@ -31,6 +31,7 @@ use crate::metadata::build_metadata;
use crate::outline::build_outline;
use crate::page::PageLabelExt;
use crate::shape::handle_shape;
+use crate::tags::{handle_close_tag, handle_open_tag, Tags};
use crate::text::handle_text;
use crate::util::{convert_path, display_font, AbsExt, TransformExt};
use crate::PdfOptions;
@@ -49,6 +50,8 @@ pub fn convert(
xmp_metadata: true,
cmyk_profile: None,
configuration: config,
+ // TODO: Should we just set this to false? If set to `false` this will
+ // automatically be enabled if the `UA1` validator is used.
enable_tagging: true,
render_svg_glyph_fn: render_svg_glyph,
};
@@ -70,12 +73,7 @@ pub fn convert(
document.set_outline(build_outline(&gc));
document.set_metadata(build_metadata(&gc));
-
- let mut tag_tree = TagTree::new();
- for tag in gc.tags.drain(..) {
- tag_tree.push(tag);
- }
- document.set_tag_tree(tag_tree);
+ document.set_tag_tree(gc.tags.take_tree());
finish(document, gc, options.standards.config)
}
@@ -115,6 +113,19 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul
let mut surface = page.surface();
let mut fc = FrameContext::new(typst_page.frame.size());
+ // Marked-content may not cross page boundaries: reopen tag
+ // that was closed at the end of the last page.
+ if let Some((_, _, nodes)) = gc.tags.stack.last_mut() {
+ let tag = if gc.tags.in_artifact {
+ ContentTag::Artifact(ArtifactType::Other)
+ } else {
+ ContentTag::Other
+ };
+ // TODO: somehow avoid empty marked-content sequences
+ let id = surface.start_tagged(tag);
+ nodes.push(Node::Leaf(id));
+ }
+
handle_frame(
&mut fc,
&typst_page.frame,
@@ -123,6 +134,11 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul
gc,
)?;
+ // Marked-content may not cross page boundaries: close open tag.
+ if !gc.tags.stack.is_empty() {
+ surface.end_tagged();
+ }
+
surface.finish();
for annotation in fc.annotations {
@@ -235,8 +251,8 @@ pub(crate) struct GlobalContext<'a> {
/// The languages used throughout the document.
pub(crate) languages: BTreeMap<Lang, usize>,
pub(crate) page_index_converter: PageIndexConverter,
- pub(crate) tag_stack: Vec<Location>,
- pub(crate) tags: Vec<Node>,
+ /// Tagged PDF context.
+ pub(crate) tags: Tags,
}
impl<'a> GlobalContext<'a> {
@@ -256,8 +272,8 @@ impl<'a> GlobalContext<'a> {
image_spans: HashSet::new(),
languages: BTreeMap::new(),
page_index_converter,
- tag_stack: Vec::new(),
- tags: Vec::new(),
+
+ tags: Tags::new(),
}
}
}
@@ -294,33 +310,10 @@ pub(crate) fn handle_frame(
}
FrameItem::Link(d, s) => handle_link(fc, gc, d, *s),
FrameItem::Tag(introspection::Tag::Start(elem)) => {
- let Some(heading) = elem.to_packed::<HeadingElem>() else { continue };
- let Some(loc) = heading.location() else { continue };
-
- let level = heading.resolve_level(StyleChain::default());
- let name = heading.body.plain_text().to_string();
- let heading_id = surface
- .start_tagged(krilla::tagging::ContentTag::Span(SpanTag::empty()));
- let tag = match level.get() {
- 1 => Tag::H1(Some(name)),
- 2 => Tag::H2(Some(name)),
- 3 => Tag::H3(Some(name)),
- 4 => Tag::H4(Some(name)),
- 5 => Tag::H5(Some(name)),
- _ => Tag::H6(Some(name)),
- };
- let mut tag_group = TagGroup::new(tag);
- tag_group.push(Node::Leaf(heading_id));
- gc.tags.push(Node::Group(tag_group));
-
- gc.tag_stack.push(loc);
+ handle_open_tag(gc, surface, elem)
}
FrameItem::Tag(introspection::Tag::End(loc, _)) => {
- // FIXME: support or split up content tags that span multiple pages
- if gc.tag_stack.last() == Some(loc) {
- surface.end_tagged();
- gc.tag_stack.pop();
- }
+ handle_close_tag(gc, surface, loc);
}
}
diff --git a/crates/typst-pdf/src/lib.rs b/crates/typst-pdf/src/lib.rs
index 88c6ee55..c3835d24 100644
--- a/crates/typst-pdf/src/lib.rs
+++ b/crates/typst-pdf/src/lib.rs
@@ -9,6 +9,7 @@ mod outline;
mod page;
mod paint;
mod shape;
+mod tags;
mod text;
mod util;
diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs
new file mode 100644
index 00000000..70792dfe
--- /dev/null
+++ b/crates/typst-pdf/src/tags.rs
@@ -0,0 +1,149 @@
+use krilla::surface::Surface;
+use krilla::tagging::{ContentTag, Node, Tag, TagGroup, TagTree};
+use typst_library::foundations::{Content, StyleChain};
+use typst_library::introspection::Location;
+use typst_library::model::{HeadingElem, OutlineElem, OutlineEntry};
+
+use crate::convert::GlobalContext;
+
+pub(crate) struct Tags {
+ /// The intermediary stack of nested tag groups.
+ pub(crate) stack: Vec<(Location, Tag, Vec<Node>)>,
+ pub(crate) in_artifact: bool,
+
+ /// The output.
+ pub(crate) tree: TagTree,
+}
+
+impl Tags {
+ pub(crate) fn new() -> Self {
+ Self {
+ stack: Vec::new(),
+ in_artifact: false,
+ tree: TagTree::new(),
+ }
+ }
+
+ pub(crate) fn take_tree(&mut self) -> TagTree {
+ std::mem::take(&mut self.tree)
+ }
+
+ pub(crate) fn context_supports(&self, tag: &Tag) -> bool {
+ let Some((_, parent, _)) = self.stack.last() else { return true };
+
+ use Tag::*;
+
+ match parent {
+ Part => true,
+ Article => !matches!(tag, Article),
+ Section => true,
+ BlockQuote => todo!(),
+ Caption => todo!(),
+ TOC => matches!(tag, TOC | TOCI),
+ // TODO: NonStruct is allowed to but (currently?) not supported by krilla
+ TOCI => matches!(tag, TOC | Lbl | Reference | P),
+ Index => todo!(),
+ P => todo!(),
+ H1(_) => todo!(),
+ H2(_) => todo!(),
+ H3(_) => todo!(),
+ H4(_) => todo!(),
+ H5(_) => todo!(),
+ H6(_) => todo!(),
+ L(_list_numbering) => todo!(),
+ LI => todo!(),
+ Lbl => todo!(),
+ LBody => todo!(),
+ Table => todo!(),
+ TR => todo!(),
+ TH(_table_header_scope) => todo!(),
+ TD => todo!(),
+ THead => todo!(),
+ TBody => todo!(),
+ TFoot => todo!(),
+ InlineQuote => todo!(),
+ Note => todo!(),
+ Reference => todo!(),
+ BibEntry => todo!(),
+ Code => todo!(),
+ Link => todo!(),
+ Annot => todo!(),
+ Figure(_) => todo!(),
+ Formula(_) => todo!(),
+ Datetime => todo!(),
+ Terms => todo!(),
+ Title => todo!(),
+ }
+ }
+}
+
+pub(crate) fn handle_open_tag(
+ gc: &mut GlobalContext,
+ surface: &mut Surface,
+ elem: &Content,
+) {
+ if gc.tags.in_artifact {
+ return;
+ }
+
+ let Some(loc) = elem.location() else { return };
+
+ let tag = if let Some(heading) = elem.to_packed::<HeadingElem>() {
+ let level = heading.resolve_level(StyleChain::default());
+ let name = heading.body.plain_text().to_string();
+ match level.get() {
+ 1 => Tag::H1(Some(name)),
+ 2 => Tag::H2(Some(name)),
+ 3 => Tag::H3(Some(name)),
+ 4 => Tag::H4(Some(name)),
+ 5 => Tag::H5(Some(name)),
+ // TODO: when targeting PDF 2.0 headings `> 6` are supported
+ _ => Tag::H6(Some(name)),
+ }
+ } else if let Some(_) = elem.to_packed::<OutlineElem>() {
+ Tag::TOC
+ } else if let Some(_outline_entry) = elem.to_packed::<OutlineEntry>() {
+ Tag::TOCI
+ } else {
+ return;
+ };
+
+ if !gc.tags.context_supports(&tag) {
+ // TODO: error or warning?
+ }
+
+ // close previous marked-content and open a nested tag.
+ if !gc.tags.stack.is_empty() {
+ surface.end_tagged();
+ }
+ let content_id = surface.start_tagged(krilla::tagging::ContentTag::Other);
+
+ gc.tags.stack.push((loc, tag, vec![Node::Leaf(content_id)]));
+}
+
+pub(crate) fn handle_close_tag(
+ gc: &mut GlobalContext,
+ surface: &mut Surface,
+ loc: &Location,
+) {
+ let Some((_, tag, nodes)) = gc.tags.stack.pop_if(|(l, ..)| l == loc) else {
+ return;
+ };
+ // TODO: contstruct group directly from nodes
+ let mut tag_group = TagGroup::new(tag);
+ for node in nodes {
+ tag_group.push(node);
+ }
+
+ surface.end_tagged();
+
+ if let Some((_, _, parent_nodes)) = gc.tags.stack.last_mut() {
+ parent_nodes.push(Node::Group(tag_group));
+
+ // TODO: somehow avoid empty marked-content sequences
+ let id = surface.start_tagged(ContentTag::Other);
+ parent_nodes.push(Node::Leaf(id));
+ } else {
+ gc.tags.tree.push(Node::Group(tag_group));
+ }
+}