summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTobias Schmitz <tobiasschmitz2001@gmail.com>2025-07-03 11:22:22 +0200
committerTobias Schmitz <tobiasschmitz2001@gmail.com>2025-07-03 18:43:20 +0200
commit377dc87325795943f0c0dc6ca1047d2d40f3c264 (patch)
treeb53cab884aed7f97e14456b30d519a52d2d97264
parent50cd81ee1f65ce8fdfa1897991bb4770e671e93c (diff)
refactor: split up pdf tagging code into multiple modules
-rw-r--r--crates/typst-pdf/src/tags/mod.rs (renamed from crates/typst-pdf/src/tags.rs)785
-rw-r--r--crates/typst-pdf/src/tags/outline.rs74
-rw-r--r--crates/typst-pdf/src/tags/table.rs330
3 files changed, 606 insertions, 583 deletions
diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags/mod.rs
index 9f49024f..99b52d55 100644
--- a/crates/typst-pdf/src/tags.rs
+++ b/crates/typst-pdf/src/tags/mod.rs
@@ -1,463 +1,221 @@
use std::cell::OnceCell;
-use std::num::{NonZeroU32, NonZeroUsize};
+use std::num::NonZeroU32;
use ecow::EcoString;
use krilla::page::Page;
use krilla::surface::Surface;
use krilla::tagging::{
- ArtifactType, ContentTag, Identifier, Node, SpanTag, TableCellHeaders, TableCellSpan,
- TableDataCell, TableHeaderCell, Tag, TagBuilder, TagGroup, TagId, TagKind, TagTree,
+ ArtifactType, ContentTag, Identifier, Node, SpanTag, Tag, TagBuilder, TagGroup,
+ TagKind, TagTree,
};
-use typst_library::foundations::{Content, LinkMarker, Packed, Smart, StyleChain};
+use typst_library::foundations::{Content, LinkMarker, Packed, StyleChain};
use typst_library::introspection::Location;
use typst_library::layout::RepeatElem;
use typst_library::model::{
Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineBody,
- OutlineEntry, TableCell, TableCellKind, TableElem, TableHeaderScope,
+ OutlineEntry, TableCell, TableElem,
};
use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfTagElem, PdfTagKind};
use typst_library::visualize::ImageElem;
use crate::convert::GlobalContext;
use crate::link::LinkAnnotation;
+use crate::tags::outline::OutlineCtx;
+use crate::tags::table::TableCtx;
-pub(crate) struct Tags {
- /// The intermediary stack of nested tag groups.
- pub(crate) stack: Vec<StackEntry>,
- /// A list of placeholders corresponding to a [`TagNode::Placeholder`].
- pub(crate) placeholders: Vec<OnceCell<Node>>,
- pub(crate) in_artifact: Option<(Location, ArtifactKind)>,
- /// Used to group multiple link annotations using quad points.
- pub(crate) link_id: LinkId,
- /// Used to generate IDs referenced in table `Headers` attributes.
- /// The IDs must be document wide unique.
- pub(crate) table_id: TableId,
-
- /// The output.
- pub(crate) tree: Vec<TagNode>,
-}
-
-#[derive(Clone, Copy, PartialEq, Eq, Hash)]
-pub(crate) struct TableId(u32);
-
-#[derive(Clone, Copy, PartialEq, Eq, Hash)]
-pub(crate) struct LinkId(u32);
-
-pub(crate) struct StackEntry {
- pub(crate) loc: Location,
- pub(crate) kind: StackEntryKind,
- pub(crate) nodes: Vec<TagNode>,
-}
-
-pub(crate) enum StackEntryKind {
- Standard(Tag),
- Outline(OutlineCtx),
- OutlineEntry(Packed<OutlineEntry>),
- Table(TableCtx),
- TableCell(Packed<TableCell>),
- Link(LinkId, Packed<LinkMarker>),
-}
-
-impl StackEntryKind {
- pub(crate) fn as_standard_mut(&mut self) -> Option<&mut Tag> {
- if let Self::Standard(v) = self {
- Some(v)
- } else {
- None
- }
- }
-}
-
-pub(crate) struct OutlineCtx {
- stack: Vec<OutlineSection>,
-}
-
-pub(crate) struct OutlineSection {
- entries: Vec<TagNode>,
-}
-
-impl OutlineSection {
- const fn new() -> Self {
- OutlineSection { entries: Vec::new() }
- }
-
- fn push(&mut self, entry: TagNode) {
- self.entries.push(entry);
- }
-
- fn into_tag(self) -> TagNode {
- TagNode::Group(TagKind::TOC.into(), self.entries)
- }
-}
-
-impl OutlineCtx {
- fn new() -> Self {
- Self { stack: Vec::new() }
- }
-
- fn insert(
- &mut self,
- outline_nodes: &mut Vec<TagNode>,
- entry: Packed<OutlineEntry>,
- nodes: Vec<TagNode>,
- ) {
- let expected_len = entry.level.get() - 1;
- if self.stack.len() < expected_len {
- self.stack.resize_with(expected_len, || OutlineSection::new());
- } else {
- while self.stack.len() > expected_len {
- self.finish_section(outline_nodes);
- }
- }
+mod outline;
+mod table;
- let section_entry = TagNode::Group(TagKind::TOCI.into(), nodes);
- self.push(outline_nodes, section_entry);
- }
-
- fn finish_section(&mut self, outline_nodes: &mut Vec<TagNode>) {
- let sub_section = self.stack.pop().unwrap().into_tag();
- self.push(outline_nodes, sub_section);
+pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) {
+ if gc.tags.in_artifact.is_some() {
+ // Don't nest artifacts
+ return;
}
- fn push(&mut self, outline_nodes: &mut Vec<TagNode>, entry: TagNode) {
- match self.stack.last_mut() {
- Some(section) => section.push(entry),
- None => outline_nodes.push(entry),
- }
- }
+ let loc = elem.location().unwrap();
- fn build_outline(mut self, mut outline_nodes: Vec<TagNode>) -> Vec<TagNode> {
- while self.stack.len() > 0 {
- self.finish_section(&mut outline_nodes);
- }
- outline_nodes
+ if let Some(artifact) = elem.to_packed::<ArtifactElem>() {
+ let kind = artifact.kind(StyleChain::default());
+ start_artifact(gc, loc, kind);
+ return;
+ } else if let Some(_) = elem.to_packed::<RepeatElem>() {
+ start_artifact(gc, loc, ArtifactKind::Other);
+ return;
}
-}
-
-pub(crate) struct TableCtx {
- id: TableId,
- table: Packed<TableElem>,
- rows: Vec<Vec<GridCell>>,
-}
-#[derive(Clone, Default)]
-enum GridCell {
- Cell(TableCtxCell),
- Spanned(usize, usize),
- #[default]
- Missing,
-}
-
-impl GridCell {
- fn as_cell(&self) -> Option<&TableCtxCell> {
- if let Self::Cell(v) = self {
- Some(v)
- } else {
- None
+ let tag: Tag = if let Some(pdf_tag) = elem.to_packed::<PdfTagElem>() {
+ let kind = pdf_tag.kind(StyleChain::default());
+ match kind {
+ PdfTagKind::Part => TagKind::Part.into(),
+ _ => todo!(),
}
- }
+ } else if let Some(heading) = elem.to_packed::<HeadingElem>() {
+ let level = heading.level().try_into().unwrap_or(NonZeroU32::MAX);
+ let name = heading.body.plain_text().to_string();
+ TagKind::Hn(level, Some(name)).into()
+ } else if let Some(_) = elem.to_packed::<OutlineBody>() {
+ push_stack(gc, loc, StackEntryKind::Outline(OutlineCtx::new()));
+ return;
+ } else if let Some(entry) = elem.to_packed::<OutlineEntry>() {
+ push_stack(gc, loc, StackEntryKind::OutlineEntry(entry.clone()));
+ return;
+ } else if let Some(_) = elem.to_packed::<FigureElem>() {
+ let alt = None; // TODO
+ TagKind::Figure.with_alt_text(alt)
+ } else if let Some(image) = elem.to_packed::<ImageElem>() {
+ let alt = image.alt(StyleChain::default()).map(|s| s.to_string());
- fn as_cell_mut(&mut self) -> Option<&mut TableCtxCell> {
- if let Self::Cell(v) = self {
- Some(v)
+ let figure_tag = (gc.tags.parent())
+ .and_then(StackEntryKind::as_standard_mut)
+ .filter(|tag| tag.kind == TagKind::Figure);
+ if let Some(figure_tag) = figure_tag {
+ // Set alt text of outer figure tag, if not present.
+ if figure_tag.alt_text.is_none() {
+ figure_tag.alt_text = alt;
+ }
+ return;
} else {
- None
+ TagKind::Figure.with_alt_text(alt)
}
- }
+ } else if let Some(_) = elem.to_packed::<FigureCaption>() {
+ TagKind::Caption.into()
+ } else if let Some(table) = elem.to_packed::<TableElem>() {
+ let table_id = gc.tags.next_table_id();
+ let ctx = TableCtx::new(table_id, table.clone());
+ push_stack(gc, loc, StackEntryKind::Table(ctx));
+ return;
+ } else if let Some(cell) = elem.to_packed::<TableCell>() {
+ let parent = gc.tags.stack.last_mut().expect("table");
+ let StackEntryKind::Table(table_ctx) = &mut parent.kind else {
+ unreachable!("expected table")
+ };
- fn into_cell(self) -> Option<TableCtxCell> {
- if let Self::Cell(v) = self {
- Some(v)
+ // Only repeated table headers and footer cells are layed out multiple
+ // times. Mark duplicate headers as artifacts, since they have no
+ // semantic meaning in the tag tree, which doesn't use page breaks for
+ // it's semantic structure.
+ if table_ctx.contains(cell) {
+ // TODO: currently the first layouted cell is picked to be part of
+ // the tag tree, for repeating footers this will be the cell on the
+ // first page. Maybe it should be the cell on the last page, but that
+ // would require more changes in the layouting code, or a pre-pass
+ // on the frames to figure out if there are other footers following.
+ start_artifact(gc, loc, ArtifactKind::Other);
} else {
- None
+ push_stack(gc, loc, StackEntryKind::TableCell(cell.clone()));
}
- }
-}
-
-#[derive(Clone)]
-struct TableCtxCell {
- x: u32,
- y: u32,
- rowspan: NonZeroUsize,
- colspan: NonZeroUsize,
- kind: Smart<TableCellKind>,
- headers: TableCellHeaders,
- nodes: Vec<TagNode>,
-}
+ return;
+ } else if let Some(link) = elem.to_packed::<LinkMarker>() {
+ let link_id = gc.tags.next_link_id();
+ push_stack(gc, loc, StackEntryKind::Link(link_id, link.clone()));
+ return;
+ } else {
+ return;
+ };
-impl TableCtxCell {
- fn unwrap_kind(&self) -> TableCellKind {
- self.kind.unwrap_or_else(|| unreachable!())
- }
+ push_stack(gc, loc, StackEntryKind::Standard(tag));
}
-impl TableCtx {
- fn new(id: TableId, table: Packed<TableElem>) -> Self {
- Self { id, table: table.clone(), rows: Vec::new() }
- }
-
- fn get(&self, x: usize, y: usize) -> Option<&TableCtxCell> {
- let cell = self.rows.get(y)?.get(x)?;
- self.resolve_cell(cell)
- }
-
- fn get_mut(&mut self, x: usize, y: usize) -> Option<&mut TableCtxCell> {
- let cell = self.rows.get_mut(y)?.get_mut(x)?;
- match cell {
- GridCell::Cell(cell) => {
- // HACK: Workaround for the second mutable borrow when resolving
- // the spanned cell.
- Some(unsafe { std::mem::transmute(cell) })
- }
- &mut GridCell::Spanned(x, y) => self.rows[y][x].as_cell_mut(),
- GridCell::Missing => None,
- }
+fn push_stack(gc: &mut GlobalContext, loc: Location, kind: StackEntryKind) {
+ if !gc.tags.context_supports(&kind) {
+ // TODO: error or warning?
}
- fn contains(&self, cell: &Packed<TableCell>) -> bool {
- let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!());
- let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!());
- self.get(x, y).is_some()
- }
+ gc.tags.stack.push(StackEntry { loc, kind, nodes: Vec::new() });
+}
- fn resolve_cell<'a>(&'a self, cell: &'a GridCell) -> Option<&'a TableCtxCell> {
- match cell {
- GridCell::Cell(cell) => Some(cell),
- &GridCell::Spanned(x, y) => self.rows[y][x].as_cell(),
- GridCell::Missing => None,
+pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) {
+ if let Some((l, _)) = gc.tags.in_artifact {
+ if l == loc {
+ gc.tags.in_artifact = None;
}
+ return;
}
- fn insert(&mut self, cell: Packed<TableCell>, nodes: Vec<TagNode>) {
- let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!());
- let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!());
- let rowspan = cell.rowspan(StyleChain::default());
- let colspan = cell.colspan(StyleChain::default());
- let kind = cell.kind(StyleChain::default());
-
- // Extend the table grid to fit this cell.
- let required_height = y + rowspan.get();
- let required_width = x + colspan.get();
- if self.rows.len() < required_height {
- self.rows
- .resize(required_height, vec![GridCell::Missing; required_width]);
- }
- let row = &mut self.rows[y];
- if row.len() < required_width {
- row.resize_with(required_width, || GridCell::Missing);
- }
-
- // Store references to the cell for all spanned cells.
- for i in y..y + rowspan.get() {
- for j in x..x + colspan.get() {
- self.rows[i][j] = GridCell::Spanned(x, y);
- }
- }
-
- self.rows[y][x] = GridCell::Cell(TableCtxCell {
- x: x as u32,
- y: y as u32,
- rowspan,
- colspan,
- kind,
- headers: TableCellHeaders::NONE,
- nodes,
- });
- }
+ let Some(entry) = gc.tags.stack.pop_if(|e| e.loc == loc) else {
+ return;
+ };
- fn build_table(mut self, mut nodes: Vec<TagNode>) -> Vec<TagNode> {
- // Table layouting ensures that there are no overlapping cells, and that
- // any gaps left by the user are filled with empty cells.
- if self.rows.is_empty() {
- return nodes;
+ let node = match entry.kind {
+ StackEntryKind::Standard(tag) => TagNode::Group(tag, entry.nodes),
+ StackEntryKind::Outline(ctx) => {
+ let nodes = ctx.build_outline(entry.nodes);
+ TagNode::Group(TagKind::TOC.into(), nodes)
}
- let height = self.rows.len();
- let width = self.rows[0].len();
-
- // Only generate row groups such as `THead`, `TFoot`, and `TBody` if
- // there are no rows with mixed cell kinds.
- let mut gen_row_groups = true;
- let row_kinds = (self.rows.iter())
- .map(|row| {
- row.iter()
- .filter_map(|cell| self.resolve_cell(cell))
- .map(|cell| cell.kind)
- .fold(Smart::Auto, |a, b| {
- if let Smart::Custom(TableCellKind::Header(_, scope)) = b {
- gen_row_groups &= scope == TableHeaderScope::Column;
- }
- if let (Smart::Custom(a), Smart::Custom(b)) = (a, b) {
- gen_row_groups &= a == b;
- }
- a.or(b)
- })
- .unwrap_or(TableCellKind::Data)
- })
- .collect::<Vec<_>>();
+ StackEntryKind::OutlineEntry(outline_entry) => {
+ let parent = gc.tags.stack.last_mut().expect("outline");
+ let StackEntryKind::Outline(outline_ctx) = &mut parent.kind else {
+ unreachable!("expected outline")
+ };
- // Fixup all missing cell kinds.
- for (row, row_kind) in self.rows.iter_mut().zip(row_kinds.iter().copied()) {
- let default_kind =
- if gen_row_groups { row_kind } else { TableCellKind::Data };
- for cell in row.iter_mut() {
- let Some(cell) = cell.as_cell_mut() else { continue };
- cell.kind = cell.kind.or(Smart::Custom(default_kind));
- }
- }
+ outline_ctx.insert(&mut parent.nodes, outline_entry, entry.nodes);
- // Explicitly set the headers attribute for cells.
- for x in 0..width {
- let mut column_header = None;
- for y in 0..height {
- self.resolve_cell_headers(
- (x, y),
- &mut column_header,
- TableHeaderScope::refers_to_column,
- );
- }
- }
- for y in 0..height {
- let mut row_header = None;
- for x in 0..width {
- self.resolve_cell_headers(
- (x, y),
- &mut row_header,
- TableHeaderScope::refers_to_row,
- );
- }
+ return;
}
-
- let mut chunk_kind = row_kinds[0];
- let mut row_chunk = Vec::new();
- for (row, row_kind) in self.rows.into_iter().zip(row_kinds) {
- let row_nodes = row
- .into_iter()
- .filter_map(|cell| {
- let cell = cell.into_cell()?;
- let span = TableCellSpan {
- rows: cell.rowspan.try_into().unwrap(),
- cols: cell.colspan.try_into().unwrap(),
- };
- let tag = match cell.unwrap_kind() {
- TableCellKind::Header(_, scope) => {
- let id = table_cell_id(self.id, cell.x, cell.y);
- let scope = table_header_scope(scope);
- TagKind::TH(
- TableHeaderCell::new(scope)
- .with_span(span)
- .with_headers(cell.headers),
- )
- .with_id(Some(id))
- }
- TableCellKind::Footer | TableCellKind::Data => TagKind::TD(
- TableDataCell::new()
- .with_span(span)
- .with_headers(cell.headers),
- )
- .into(),
- };
-
- Some(TagNode::Group(tag, cell.nodes))
- })
- .collect();
-
- let row = TagNode::Group(TagKind::TR.into(), row_nodes);
-
- // Push the `TR` tags directly.
- if !gen_row_groups {
- nodes.push(row);
- continue;
- }
-
- // Generate row groups.
- if !should_group_rows(chunk_kind, row_kind) {
- let tag = match chunk_kind {
- TableCellKind::Header(..) => TagKind::THead,
- TableCellKind::Footer => TagKind::TFoot,
- TableCellKind::Data => TagKind::TBody,
- };
- nodes.push(TagNode::Group(tag.into(), std::mem::take(&mut row_chunk)));
-
- chunk_kind = row_kind;
- }
- row_chunk.push(row);
+ StackEntryKind::Table(ctx) => {
+ let summary = ctx.table.summary(StyleChain::default()).map(EcoString::into);
+ let nodes = ctx.build_table(entry.nodes);
+ TagNode::Group(TagKind::Table(summary).into(), nodes)
}
-
- if !row_chunk.is_empty() {
- let tag = match chunk_kind {
- TableCellKind::Header(..) => TagKind::THead,
- TableCellKind::Footer => TagKind::TFoot,
- TableCellKind::Data => TagKind::TBody,
+ StackEntryKind::TableCell(cell) => {
+ let parent = gc.tags.stack.last_mut().expect("table");
+ let StackEntryKind::Table(table_ctx) = &mut parent.kind else {
+ unreachable!("expected table")
};
- nodes.push(TagNode::Group(tag.into(), row_chunk));
- }
-
- nodes
- }
- fn resolve_cell_headers<F>(
- &mut self,
- (x, y): (usize, usize),
- current_header: &mut Option<(NonZeroU32, TagId)>,
- refers_to_dir: F,
- ) where
- F: Fn(&TableHeaderScope) -> bool,
- {
- let table_id = self.id;
- let Some(cell) = self.get_mut(x, y) else { return };
-
- if let Some((prev_level, cell_id)) = current_header.clone() {
- // The `Headers` attribute is also set for parent headers.
- let mut is_parent_header = true;
- if let TableCellKind::Header(level, scope) = cell.unwrap_kind() {
- if refers_to_dir(&scope) {
- is_parent_header = prev_level < level;
- }
- }
+ table_ctx.insert(cell, entry.nodes);
- if is_parent_header && !cell.headers.ids.contains(&cell_id) {
- cell.headers.ids.push(cell_id.clone());
- }
+ return;
}
-
- if let TableCellKind::Header(level, scope) = cell.unwrap_kind() {
- if refers_to_dir(&scope) {
- let tag_id = table_cell_id(table_id, x as u32, y as u32);
- *current_header = Some((level, tag_id));
+ StackEntryKind::Link(_, link) => {
+ let alt = link.alt.as_ref().map(EcoString::to_string);
+ let tag = TagKind::Link.with_alt_text(alt);
+ let mut node = TagNode::Group(tag, entry.nodes);
+ // Wrap link in reference tag, if it's not a url.
+ if let Destination::Position(_) | Destination::Location(_) = link.dest {
+ node = TagNode::Group(TagKind::Reference.into(), vec![node]);
}
+ node
}
- }
+ };
+
+ gc.tags.push(node);
}
-fn should_group_rows(a: TableCellKind, b: TableCellKind) -> bool {
- match (a, b) {
- (TableCellKind::Header(..), TableCellKind::Header(..)) => true,
- (TableCellKind::Footer, TableCellKind::Footer) => true,
- (TableCellKind::Data, TableCellKind::Data) => true,
- (_, _) => false,
+/// Add all annotations that were found in the page frame.
+pub(crate) fn add_annotations(
+ gc: &mut GlobalContext,
+ page: &mut Page,
+ annotations: Vec<LinkAnnotation>,
+) {
+ for annotation in annotations.into_iter() {
+ let LinkAnnotation { id: _, placeholder, alt, rect, quad_points, target } =
+ annotation;
+ let annot = krilla::annotation::Annotation::new_link(
+ krilla::annotation::LinkAnnotation::new(rect, Some(quad_points), target),
+ alt,
+ );
+ let annot_id = page.add_tagged_annotation(annot);
+ gc.tags.init_placeholder(placeholder, Node::Leaf(annot_id));
}
}
-fn table_cell_id(table_id: TableId, x: u32, y: u32) -> TagId {
- let mut bytes = [0; 12];
- bytes[0..4].copy_from_slice(&table_id.0.to_ne_bytes());
- bytes[4..8].copy_from_slice(&x.to_ne_bytes());
- bytes[8..12].copy_from_slice(&y.to_ne_bytes());
- TagId::from_bytes(&bytes)
-}
+pub(crate) struct Tags {
+ /// The intermediary stack of nested tag groups.
+ pub(crate) stack: Vec<StackEntry>,
+ /// A list of placeholders corresponding to a [`TagNode::Placeholder`].
+ pub(crate) placeholders: Vec<OnceCell<Node>>,
+ pub(crate) in_artifact: Option<(Location, ArtifactKind)>,
+ /// Used to group multiple link annotations using quad points.
+ pub(crate) link_id: LinkId,
+ /// Used to generate IDs referenced in table `Headers` attributes.
+ /// The IDs must be document wide unique.
+ pub(crate) table_id: TableId,
-#[derive(Clone)]
-pub(crate) enum TagNode {
- Group(Tag, Vec<TagNode>),
- Leaf(Identifier),
- /// Allows inserting a placeholder into the tag tree.
- /// Currently used for [`krilla::page::Page::add_tagged_annotation`].
- Placeholder(Placeholder),
+ /// The output.
+ pub(crate) tree: Vec<TagNode>,
}
-#[derive(Clone, Copy)]
-pub(crate) struct Placeholder(usize);
-
impl Tags {
pub(crate) fn new() -> Self {
Self {
@@ -543,6 +301,49 @@ impl Tags {
}
}
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+pub(crate) struct TableId(u32);
+
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+pub(crate) struct LinkId(u32);
+
+pub(crate) struct StackEntry {
+ pub(crate) loc: Location,
+ pub(crate) kind: StackEntryKind,
+ pub(crate) nodes: Vec<TagNode>,
+}
+
+pub(crate) enum StackEntryKind {
+ Standard(Tag),
+ Outline(OutlineCtx),
+ OutlineEntry(Packed<OutlineEntry>),
+ Table(TableCtx),
+ TableCell(Packed<TableCell>),
+ Link(LinkId, Packed<LinkMarker>),
+}
+
+impl StackEntryKind {
+ pub(crate) fn as_standard_mut(&mut self) -> Option<&mut Tag> {
+ if let Self::Standard(v) = self {
+ Some(v)
+ } else {
+ None
+ }
+ }
+}
+
+#[derive(Clone)]
+pub(crate) enum TagNode {
+ Group(Tag, Vec<TagNode>),
+ Leaf(Identifier),
+ /// Allows inserting a placeholder into the tag tree.
+ /// Currently used for [`krilla::page::Page::add_tagged_annotation`].
+ Placeholder(Placeholder),
+}
+
+#[derive(Clone, Copy)]
+pub(crate) struct Placeholder(usize);
+
/// Automatically calls [`Surface::end_tagged`] when dropped.
pub(crate) struct TagHandle<'a, 'b> {
surface: &'b mut Surface<'a>,
@@ -556,7 +357,7 @@ impl Drop for TagHandle<'_, '_> {
impl<'a> TagHandle<'a, '_> {
pub(crate) fn surface<'c>(&'c mut self) -> &'c mut Surface<'a> {
- &mut self.surface
+ self.surface
}
}
@@ -599,192 +400,10 @@ fn start_content<'a, 'b>(
TagHandle { surface }
}
-/// Add all annotations that were found in the page frame.
-pub(crate) fn add_annotations(
- gc: &mut GlobalContext,
- page: &mut Page,
- annotations: Vec<LinkAnnotation>,
-) {
- for annotation in annotations.into_iter() {
- let LinkAnnotation { id: _, placeholder, alt, rect, quad_points, target } =
- annotation;
- let annot = krilla::annotation::Annotation::new_link(
- krilla::annotation::LinkAnnotation::new(rect, Some(quad_points), target),
- alt,
- );
- let annot_id = page.add_tagged_annotation(annot);
- gc.tags.init_placeholder(placeholder, Node::Leaf(annot_id));
- }
-}
-
-pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) {
- if gc.tags.in_artifact.is_some() {
- // Don't nest artifacts
- return;
- }
-
- let loc = elem.location().unwrap();
-
- if let Some(artifact) = elem.to_packed::<ArtifactElem>() {
- let kind = artifact.kind(StyleChain::default());
- start_artifact(gc, loc, kind);
- return;
- } else if let Some(_) = elem.to_packed::<RepeatElem>() {
- start_artifact(gc, loc, ArtifactKind::Other);
- return;
- }
-
- let tag: Tag = if let Some(pdf_tag) = elem.to_packed::<PdfTagElem>() {
- let kind = pdf_tag.kind(StyleChain::default());
- match kind {
- PdfTagKind::Part => TagKind::Part.into(),
- _ => todo!(),
- }
- } else if let Some(heading) = elem.to_packed::<HeadingElem>() {
- let level = heading.level().try_into().unwrap_or(NonZeroU32::MAX);
- let name = heading.body.plain_text().to_string();
- TagKind::Hn(level, Some(name)).into()
- } else if let Some(_) = elem.to_packed::<OutlineBody>() {
- push_stack(gc, loc, StackEntryKind::Outline(OutlineCtx::new()));
- return;
- } else if let Some(entry) = elem.to_packed::<OutlineEntry>() {
- push_stack(gc, loc, StackEntryKind::OutlineEntry(entry.clone()));
- return;
- } else if let Some(_) = elem.to_packed::<FigureElem>() {
- let alt = None; // TODO
- TagKind::Figure.with_alt_text(alt)
- } else if let Some(image) = elem.to_packed::<ImageElem>() {
- let alt = image.alt(StyleChain::default()).map(|s| s.to_string());
-
- let figure_tag = (gc.tags.parent())
- .and_then(StackEntryKind::as_standard_mut)
- .filter(|tag| tag.kind == TagKind::Figure);
- if let Some(figure_tag) = figure_tag {
- // Set alt text of outer figure tag, if not present.
- if figure_tag.alt_text.is_none() {
- figure_tag.alt_text = alt;
- }
- return;
- } else {
- TagKind::Figure.with_alt_text(alt)
- }
- } else if let Some(_) = elem.to_packed::<FigureCaption>() {
- TagKind::Caption.into()
- } else if let Some(table) = elem.to_packed::<TableElem>() {
- let table_id = gc.tags.next_table_id();
- let ctx = TableCtx::new(table_id, table.clone());
- push_stack(gc, loc, StackEntryKind::Table(ctx));
- return;
- } else if let Some(cell) = elem.to_packed::<TableCell>() {
- let parent = gc.tags.stack.last_mut().expect("table");
- let StackEntryKind::Table(table_ctx) = &mut parent.kind else {
- unreachable!("expected table")
- };
-
- // Only repeated table headers and footer cells are layed out multiple
- // times. Mark duplicate headers as artifacts, since they have no
- // semantic meaning in the tag tree, which doesn't use page breaks for
- // it's semantic structure.
- if table_ctx.contains(cell) {
- // TODO: currently the first layouted cell is picked to be part of
- // the tag tree, for repeating footers this will be the cell on the
- // first page. Maybe it should be the cell on the last page, but that
- // would require more changes in the layouting code, or a pre-pass
- // on the frames to figure out if there are other footers following.
- start_artifact(gc, loc, ArtifactKind::Other);
- } else {
- push_stack(gc, loc, StackEntryKind::TableCell(cell.clone()));
- }
- return;
- } else if let Some(link) = elem.to_packed::<LinkMarker>() {
- let link_id = gc.tags.next_link_id();
- push_stack(gc, loc, StackEntryKind::Link(link_id, link.clone()));
- return;
- } else {
- return;
- };
-
- push_stack(gc, loc, StackEntryKind::Standard(tag));
-}
-
-fn push_stack(gc: &mut GlobalContext, loc: Location, kind: StackEntryKind) {
- if !gc.tags.context_supports(&kind) {
- // TODO: error or warning?
- }
-
- gc.tags.stack.push(StackEntry { loc, kind, nodes: Vec::new() });
-}
-
-pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) {
- if let Some((l, _)) = gc.tags.in_artifact {
- if l == loc {
- gc.tags.in_artifact = None;
- }
- return;
- }
-
- let Some(entry) = gc.tags.stack.pop_if(|e| e.loc == loc) else {
- return;
- };
-
- let node = match entry.kind {
- StackEntryKind::Standard(tag) => TagNode::Group(tag, entry.nodes),
- StackEntryKind::Outline(ctx) => {
- let nodes = ctx.build_outline(entry.nodes);
- TagNode::Group(TagKind::TOC.into(), nodes)
- }
- StackEntryKind::OutlineEntry(outline_entry) => {
- let parent = gc.tags.stack.last_mut().expect("outline");
- let StackEntryKind::Outline(outline_ctx) = &mut parent.kind else {
- unreachable!("expected outline")
- };
-
- outline_ctx.insert(&mut parent.nodes, outline_entry, entry.nodes);
-
- return;
- }
- StackEntryKind::Table(ctx) => {
- let summary = ctx.table.summary(StyleChain::default()).map(EcoString::into);
- let nodes = ctx.build_table(entry.nodes);
- TagNode::Group(TagKind::Table(summary).into(), nodes)
- }
- StackEntryKind::TableCell(cell) => {
- let parent = gc.tags.stack.last_mut().expect("table");
- let StackEntryKind::Table(table_ctx) = &mut parent.kind else {
- unreachable!("expected table")
- };
-
- table_ctx.insert(cell, entry.nodes);
-
- return;
- }
- StackEntryKind::Link(_, link) => {
- let alt = link.alt.as_ref().map(EcoString::to_string);
- let tag = TagKind::Link.with_alt_text(alt);
- let mut node = TagNode::Group(tag, entry.nodes);
- // Wrap link in reference tag, if it's not a url.
- if let Destination::Position(_) | Destination::Location(_) = link.dest {
- node = TagNode::Group(TagKind::Reference.into(), vec![node]);
- }
- node
- }
- };
-
- gc.tags.push(node);
-}
-
fn start_artifact(gc: &mut GlobalContext, loc: Location, kind: ArtifactKind) {
gc.tags.in_artifact = Some((loc, kind));
}
-fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderScope {
- match scope {
- TableHeaderScope::Both => krilla::tagging::TableHeaderScope::Both,
- TableHeaderScope::Column => krilla::tagging::TableHeaderScope::Column,
- TableHeaderScope::Row => krilla::tagging::TableHeaderScope::Row,
- }
-}
-
fn artifact_type(kind: ArtifactKind) -> ArtifactType {
match kind {
ArtifactKind::Header => ArtifactType::Header,
diff --git a/crates/typst-pdf/src/tags/outline.rs b/crates/typst-pdf/src/tags/outline.rs
new file mode 100644
index 00000000..9fbeb8dc
--- /dev/null
+++ b/crates/typst-pdf/src/tags/outline.rs
@@ -0,0 +1,74 @@
+use krilla::tagging::TagKind;
+use typst_library::foundations::Packed;
+use typst_library::model::OutlineEntry;
+
+use crate::tags::TagNode;
+
+pub(crate) struct OutlineCtx {
+ stack: Vec<OutlineSection>,
+}
+
+impl OutlineCtx {
+ pub(crate) fn new() -> Self {
+ Self { stack: Vec::new() }
+ }
+
+ pub(crate) fn insert(
+ &mut self,
+ outline_nodes: &mut Vec<TagNode>,
+ entry: Packed<OutlineEntry>,
+ nodes: Vec<TagNode>,
+ ) {
+ let expected_len = entry.level.get() - 1;
+ if self.stack.len() < expected_len {
+ self.stack.resize_with(expected_len, OutlineSection::new);
+ } else {
+ while self.stack.len() > expected_len {
+ self.finish_section(outline_nodes);
+ }
+ }
+
+ let section_entry = TagNode::Group(TagKind::TOCI.into(), nodes);
+ self.push(outline_nodes, section_entry);
+ }
+
+ fn finish_section(&mut self, outline_nodes: &mut Vec<TagNode>) {
+ let sub_section = self.stack.pop().unwrap().into_tag();
+ self.push(outline_nodes, sub_section);
+ }
+
+ fn push(&mut self, outline_nodes: &mut Vec<TagNode>, entry: TagNode) {
+ match self.stack.last_mut() {
+ Some(section) => section.push(entry),
+ None => outline_nodes.push(entry),
+ }
+ }
+
+ pub(crate) fn build_outline(
+ mut self,
+ mut outline_nodes: Vec<TagNode>,
+ ) -> Vec<TagNode> {
+ while !self.stack.is_empty() {
+ self.finish_section(&mut outline_nodes);
+ }
+ outline_nodes
+ }
+}
+
+pub(crate) struct OutlineSection {
+ entries: Vec<TagNode>,
+}
+
+impl OutlineSection {
+ const fn new() -> Self {
+ OutlineSection { entries: Vec::new() }
+ }
+
+ fn push(&mut self, entry: TagNode) {
+ self.entries.push(entry);
+ }
+
+ fn into_tag(self) -> TagNode {
+ TagNode::Group(TagKind::TOC.into(), self.entries)
+ }
+}
diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs
new file mode 100644
index 00000000..240da4c3
--- /dev/null
+++ b/crates/typst-pdf/src/tags/table.rs
@@ -0,0 +1,330 @@
+use std::num::{NonZeroU32, NonZeroUsize};
+
+use krilla::tagging::{
+ TableCellHeaders, TableCellSpan, TableDataCell, TableHeaderCell, TagBuilder, TagId,
+ TagKind,
+};
+use typst_library::foundations::{Packed, Smart, StyleChain};
+use typst_library::model::{TableCell, TableCellKind, TableElem, TableHeaderScope};
+
+use crate::tags::{TableId, TagNode};
+
+pub(crate) struct TableCtx {
+ pub(crate) id: TableId,
+ pub(crate) table: Packed<TableElem>,
+ rows: Vec<Vec<GridCell>>,
+}
+
+impl TableCtx {
+ pub(crate) fn new(id: TableId, table: Packed<TableElem>) -> Self {
+ Self { id, table: table.clone(), rows: Vec::new() }
+ }
+
+ fn get(&self, x: usize, y: usize) -> Option<&TableCtxCell> {
+ let cell = self.rows.get(y)?.get(x)?;
+ self.resolve_cell(cell)
+ }
+
+ fn get_mut(&mut self, x: usize, y: usize) -> Option<&mut TableCtxCell> {
+ let cell = self.rows.get_mut(y)?.get_mut(x)?;
+ match cell {
+ GridCell::Cell(cell) => {
+ // HACK: Workaround for the second mutable borrow when resolving
+ // the spanned cell.
+ Some(unsafe { std::mem::transmute(cell) })
+ }
+ &mut GridCell::Spanned(x, y) => self.rows[y][x].as_cell_mut(),
+ GridCell::Missing => None,
+ }
+ }
+
+ pub(crate) fn contains(&self, cell: &Packed<TableCell>) -> bool {
+ let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!());
+ let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!());
+ self.get(x, y).is_some()
+ }
+
+ fn resolve_cell<'a>(&'a self, cell: &'a GridCell) -> Option<&'a TableCtxCell> {
+ match cell {
+ GridCell::Cell(cell) => Some(cell),
+ &GridCell::Spanned(x, y) => self.rows[y][x].as_cell(),
+ GridCell::Missing => None,
+ }
+ }
+
+ pub(crate) fn insert(&mut self, cell: Packed<TableCell>, nodes: Vec<TagNode>) {
+ let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!());
+ let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!());
+ let rowspan = cell.rowspan(StyleChain::default());
+ let colspan = cell.colspan(StyleChain::default());
+ let kind = cell.kind(StyleChain::default());
+
+ // Extend the table grid to fit this cell.
+ let required_height = y + rowspan.get();
+ let required_width = x + colspan.get();
+ if self.rows.len() < required_height {
+ self.rows
+ .resize(required_height, vec![GridCell::Missing; required_width]);
+ }
+ let row = &mut self.rows[y];
+ if row.len() < required_width {
+ row.resize_with(required_width, || GridCell::Missing);
+ }
+
+ // Store references to the cell for all spanned cells.
+ for i in y..y + rowspan.get() {
+ for j in x..x + colspan.get() {
+ self.rows[i][j] = GridCell::Spanned(x, y);
+ }
+ }
+
+ self.rows[y][x] = GridCell::Cell(TableCtxCell {
+ x: x as u32,
+ y: y as u32,
+ rowspan,
+ colspan,
+ kind,
+ headers: TableCellHeaders::NONE,
+ nodes,
+ });
+ }
+
+ pub(crate) fn build_table(mut self, mut nodes: Vec<TagNode>) -> Vec<TagNode> {
+ // Table layouting ensures that there are no overlapping cells, and that
+ // any gaps left by the user are filled with empty cells.
+ if self.rows.is_empty() {
+ return nodes;
+ }
+ let height = self.rows.len();
+ let width = self.rows[0].len();
+
+ // Only generate row groups such as `THead`, `TFoot`, and `TBody` if
+ // there are no rows with mixed cell kinds.
+ let mut gen_row_groups = true;
+ let row_kinds = (self.rows.iter())
+ .map(|row| {
+ row.iter()
+ .filter_map(|cell| self.resolve_cell(cell))
+ .map(|cell| cell.kind)
+ .fold(Smart::Auto, |a, b| {
+ if let Smart::Custom(TableCellKind::Header(_, scope)) = b {
+ gen_row_groups &= scope == TableHeaderScope::Column;
+ }
+ if let (Smart::Custom(a), Smart::Custom(b)) = (a, b) {
+ gen_row_groups &= a == b;
+ }
+ a.or(b)
+ })
+ .unwrap_or(TableCellKind::Data)
+ })
+ .collect::<Vec<_>>();
+
+ // Fixup all missing cell kinds.
+ for (row, row_kind) in self.rows.iter_mut().zip(row_kinds.iter().copied()) {
+ let default_kind =
+ if gen_row_groups { row_kind } else { TableCellKind::Data };
+ for cell in row.iter_mut() {
+ let Some(cell) = cell.as_cell_mut() else { continue };
+ cell.kind = cell.kind.or(Smart::Custom(default_kind));
+ }
+ }
+
+ // Explicitly set the headers attribute for cells.
+ for x in 0..width {
+ let mut column_header = None;
+ for y in 0..height {
+ self.resolve_cell_headers(
+ (x, y),
+ &mut column_header,
+ TableHeaderScope::refers_to_column,
+ );
+ }
+ }
+ for y in 0..height {
+ let mut row_header = None;
+ for x in 0..width {
+ self.resolve_cell_headers(
+ (x, y),
+ &mut row_header,
+ TableHeaderScope::refers_to_row,
+ );
+ }
+ }
+
+ let mut chunk_kind = row_kinds[0];
+ let mut row_chunk = Vec::new();
+ for (row, row_kind) in self.rows.into_iter().zip(row_kinds) {
+ let row_nodes = row
+ .into_iter()
+ .filter_map(|cell| {
+ let cell = cell.into_cell()?;
+ let span = TableCellSpan {
+ rows: cell.rowspan.try_into().unwrap(),
+ cols: cell.colspan.try_into().unwrap(),
+ };
+ let tag = match cell.unwrap_kind() {
+ TableCellKind::Header(_, scope) => {
+ let id = table_cell_id(self.id, cell.x, cell.y);
+ let scope = table_header_scope(scope);
+ TagKind::TH(
+ TableHeaderCell::new(scope)
+ .with_span(span)
+ .with_headers(cell.headers),
+ )
+ .with_id(Some(id))
+ }
+ TableCellKind::Footer | TableCellKind::Data => TagKind::TD(
+ TableDataCell::new()
+ .with_span(span)
+ .with_headers(cell.headers),
+ )
+ .into(),
+ };
+
+ Some(TagNode::Group(tag, cell.nodes))
+ })
+ .collect();
+
+ let row = TagNode::Group(TagKind::TR.into(), row_nodes);
+
+ // Push the `TR` tags directly.
+ if !gen_row_groups {
+ nodes.push(row);
+ continue;
+ }
+
+ // Generate row groups.
+ if !should_group_rows(chunk_kind, row_kind) {
+ let tag = match chunk_kind {
+ TableCellKind::Header(..) => TagKind::THead,
+ TableCellKind::Footer => TagKind::TFoot,
+ TableCellKind::Data => TagKind::TBody,
+ };
+ nodes.push(TagNode::Group(tag.into(), std::mem::take(&mut row_chunk)));
+
+ chunk_kind = row_kind;
+ }
+ row_chunk.push(row);
+ }
+
+ if !row_chunk.is_empty() {
+ let tag = match chunk_kind {
+ TableCellKind::Header(..) => TagKind::THead,
+ TableCellKind::Footer => TagKind::TFoot,
+ TableCellKind::Data => TagKind::TBody,
+ };
+ nodes.push(TagNode::Group(tag.into(), row_chunk));
+ }
+
+ nodes
+ }
+
+ fn resolve_cell_headers<F>(
+ &mut self,
+ (x, y): (usize, usize),
+ current_header: &mut Option<(NonZeroU32, TagId)>,
+ refers_to_dir: F,
+ ) where
+ F: Fn(&TableHeaderScope) -> bool,
+ {
+ let table_id = self.id;
+ let Some(cell) = self.get_mut(x, y) else { return };
+
+ if let Some((prev_level, cell_id)) = current_header.clone() {
+ // The `Headers` attribute is also set for parent headers.
+ let mut is_parent_header = true;
+ if let TableCellKind::Header(level, scope) = cell.unwrap_kind() {
+ if refers_to_dir(&scope) {
+ is_parent_header = prev_level < level;
+ }
+ }
+
+ if is_parent_header && !cell.headers.ids.contains(&cell_id) {
+ cell.headers.ids.push(cell_id.clone());
+ }
+ }
+
+ if let TableCellKind::Header(level, scope) = cell.unwrap_kind() {
+ if refers_to_dir(&scope) {
+ let tag_id = table_cell_id(table_id, x as u32, y as u32);
+ *current_header = Some((level, tag_id));
+ }
+ }
+ }
+}
+
+#[derive(Clone, Default)]
+enum GridCell {
+ Cell(TableCtxCell),
+ Spanned(usize, usize),
+ #[default]
+ Missing,
+}
+
+impl GridCell {
+ fn as_cell(&self) -> Option<&TableCtxCell> {
+ if let Self::Cell(v) = self {
+ Some(v)
+ } else {
+ None
+ }
+ }
+
+ fn as_cell_mut(&mut self) -> Option<&mut TableCtxCell> {
+ if let Self::Cell(v) = self {
+ Some(v)
+ } else {
+ None
+ }
+ }
+
+ fn into_cell(self) -> Option<TableCtxCell> {
+ if let Self::Cell(v) = self {
+ Some(v)
+ } else {
+ None
+ }
+ }
+}
+
+#[derive(Clone)]
+struct TableCtxCell {
+ x: u32,
+ y: u32,
+ rowspan: NonZeroUsize,
+ colspan: NonZeroUsize,
+ kind: Smart<TableCellKind>,
+ headers: TableCellHeaders,
+ nodes: Vec<TagNode>,
+}
+
+impl TableCtxCell {
+ fn unwrap_kind(&self) -> TableCellKind {
+ self.kind.unwrap_or_else(|| unreachable!())
+ }
+}
+
+fn should_group_rows(a: TableCellKind, b: TableCellKind) -> bool {
+ match (a, b) {
+ (TableCellKind::Header(..), TableCellKind::Header(..)) => true,
+ (TableCellKind::Footer, TableCellKind::Footer) => true,
+ (TableCellKind::Data, TableCellKind::Data) => true,
+ (_, _) => false,
+ }
+}
+
+fn table_cell_id(table_id: TableId, x: u32, y: u32) -> TagId {
+ let mut bytes = [0; 12];
+ bytes[0..4].copy_from_slice(&table_id.0.to_ne_bytes());
+ bytes[4..8].copy_from_slice(&x.to_ne_bytes());
+ bytes[8..12].copy_from_slice(&y.to_ne_bytes());
+ TagId::from_bytes(&bytes)
+}
+
+fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderScope {
+ match scope {
+ TableHeaderScope::Both => krilla::tagging::TableHeaderScope::Both,
+ TableHeaderScope::Column => krilla::tagging::TableHeaderScope::Column,
+ TableHeaderScope::Row => krilla::tagging::TableHeaderScope::Row,
+ }
+}