diff options
| -rw-r--r-- | crates/typst-pdf/src/tags/mod.rs (renamed from crates/typst-pdf/src/tags.rs) | 785 | ||||
| -rw-r--r-- | crates/typst-pdf/src/tags/outline.rs | 74 | ||||
| -rw-r--r-- | crates/typst-pdf/src/tags/table.rs | 330 |
3 files changed, 606 insertions, 583 deletions
diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags/mod.rs index 9f49024f..99b52d55 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -1,463 +1,221 @@ use std::cell::OnceCell; -use std::num::{NonZeroU32, NonZeroUsize}; +use std::num::NonZeroU32; use ecow::EcoString; use krilla::page::Page; use krilla::surface::Surface; use krilla::tagging::{ - ArtifactType, ContentTag, Identifier, Node, SpanTag, TableCellHeaders, TableCellSpan, - TableDataCell, TableHeaderCell, Tag, TagBuilder, TagGroup, TagId, TagKind, TagTree, + ArtifactType, ContentTag, Identifier, Node, SpanTag, Tag, TagBuilder, TagGroup, + TagKind, TagTree, }; -use typst_library::foundations::{Content, LinkMarker, Packed, Smart, StyleChain}; +use typst_library::foundations::{Content, LinkMarker, Packed, StyleChain}; use typst_library::introspection::Location; use typst_library::layout::RepeatElem; use typst_library::model::{ Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineBody, - OutlineEntry, TableCell, TableCellKind, TableElem, TableHeaderScope, + OutlineEntry, TableCell, TableElem, }; use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfTagElem, PdfTagKind}; use typst_library::visualize::ImageElem; use crate::convert::GlobalContext; use crate::link::LinkAnnotation; +use crate::tags::outline::OutlineCtx; +use crate::tags::table::TableCtx; -pub(crate) struct Tags { - /// The intermediary stack of nested tag groups. - pub(crate) stack: Vec<StackEntry>, - /// A list of placeholders corresponding to a [`TagNode::Placeholder`]. - pub(crate) placeholders: Vec<OnceCell<Node>>, - pub(crate) in_artifact: Option<(Location, ArtifactKind)>, - /// Used to group multiple link annotations using quad points. - pub(crate) link_id: LinkId, - /// Used to generate IDs referenced in table `Headers` attributes. - /// The IDs must be document wide unique. - pub(crate) table_id: TableId, - - /// The output. - pub(crate) tree: Vec<TagNode>, -} - -#[derive(Clone, Copy, PartialEq, Eq, Hash)] -pub(crate) struct TableId(u32); - -#[derive(Clone, Copy, PartialEq, Eq, Hash)] -pub(crate) struct LinkId(u32); - -pub(crate) struct StackEntry { - pub(crate) loc: Location, - pub(crate) kind: StackEntryKind, - pub(crate) nodes: Vec<TagNode>, -} - -pub(crate) enum StackEntryKind { - Standard(Tag), - Outline(OutlineCtx), - OutlineEntry(Packed<OutlineEntry>), - Table(TableCtx), - TableCell(Packed<TableCell>), - Link(LinkId, Packed<LinkMarker>), -} - -impl StackEntryKind { - pub(crate) fn as_standard_mut(&mut self) -> Option<&mut Tag> { - if let Self::Standard(v) = self { - Some(v) - } else { - None - } - } -} - -pub(crate) struct OutlineCtx { - stack: Vec<OutlineSection>, -} - -pub(crate) struct OutlineSection { - entries: Vec<TagNode>, -} - -impl OutlineSection { - const fn new() -> Self { - OutlineSection { entries: Vec::new() } - } - - fn push(&mut self, entry: TagNode) { - self.entries.push(entry); - } - - fn into_tag(self) -> TagNode { - TagNode::Group(TagKind::TOC.into(), self.entries) - } -} - -impl OutlineCtx { - fn new() -> Self { - Self { stack: Vec::new() } - } - - fn insert( - &mut self, - outline_nodes: &mut Vec<TagNode>, - entry: Packed<OutlineEntry>, - nodes: Vec<TagNode>, - ) { - let expected_len = entry.level.get() - 1; - if self.stack.len() < expected_len { - self.stack.resize_with(expected_len, || OutlineSection::new()); - } else { - while self.stack.len() > expected_len { - self.finish_section(outline_nodes); - } - } +mod outline; +mod table; - let section_entry = TagNode::Group(TagKind::TOCI.into(), nodes); - self.push(outline_nodes, section_entry); - } - - fn finish_section(&mut self, outline_nodes: &mut Vec<TagNode>) { - let sub_section = self.stack.pop().unwrap().into_tag(); - self.push(outline_nodes, sub_section); +pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { + if gc.tags.in_artifact.is_some() { + // Don't nest artifacts + return; } - fn push(&mut self, outline_nodes: &mut Vec<TagNode>, entry: TagNode) { - match self.stack.last_mut() { - Some(section) => section.push(entry), - None => outline_nodes.push(entry), - } - } + let loc = elem.location().unwrap(); - fn build_outline(mut self, mut outline_nodes: Vec<TagNode>) -> Vec<TagNode> { - while self.stack.len() > 0 { - self.finish_section(&mut outline_nodes); - } - outline_nodes + if let Some(artifact) = elem.to_packed::<ArtifactElem>() { + let kind = artifact.kind(StyleChain::default()); + start_artifact(gc, loc, kind); + return; + } else if let Some(_) = elem.to_packed::<RepeatElem>() { + start_artifact(gc, loc, ArtifactKind::Other); + return; } -} - -pub(crate) struct TableCtx { - id: TableId, - table: Packed<TableElem>, - rows: Vec<Vec<GridCell>>, -} -#[derive(Clone, Default)] -enum GridCell { - Cell(TableCtxCell), - Spanned(usize, usize), - #[default] - Missing, -} - -impl GridCell { - fn as_cell(&self) -> Option<&TableCtxCell> { - if let Self::Cell(v) = self { - Some(v) - } else { - None + let tag: Tag = if let Some(pdf_tag) = elem.to_packed::<PdfTagElem>() { + let kind = pdf_tag.kind(StyleChain::default()); + match kind { + PdfTagKind::Part => TagKind::Part.into(), + _ => todo!(), } - } + } else if let Some(heading) = elem.to_packed::<HeadingElem>() { + let level = heading.level().try_into().unwrap_or(NonZeroU32::MAX); + let name = heading.body.plain_text().to_string(); + TagKind::Hn(level, Some(name)).into() + } else if let Some(_) = elem.to_packed::<OutlineBody>() { + push_stack(gc, loc, StackEntryKind::Outline(OutlineCtx::new())); + return; + } else if let Some(entry) = elem.to_packed::<OutlineEntry>() { + push_stack(gc, loc, StackEntryKind::OutlineEntry(entry.clone())); + return; + } else if let Some(_) = elem.to_packed::<FigureElem>() { + let alt = None; // TODO + TagKind::Figure.with_alt_text(alt) + } else if let Some(image) = elem.to_packed::<ImageElem>() { + let alt = image.alt(StyleChain::default()).map(|s| s.to_string()); - fn as_cell_mut(&mut self) -> Option<&mut TableCtxCell> { - if let Self::Cell(v) = self { - Some(v) + let figure_tag = (gc.tags.parent()) + .and_then(StackEntryKind::as_standard_mut) + .filter(|tag| tag.kind == TagKind::Figure); + if let Some(figure_tag) = figure_tag { + // Set alt text of outer figure tag, if not present. + if figure_tag.alt_text.is_none() { + figure_tag.alt_text = alt; + } + return; } else { - None + TagKind::Figure.with_alt_text(alt) } - } + } else if let Some(_) = elem.to_packed::<FigureCaption>() { + TagKind::Caption.into() + } else if let Some(table) = elem.to_packed::<TableElem>() { + let table_id = gc.tags.next_table_id(); + let ctx = TableCtx::new(table_id, table.clone()); + push_stack(gc, loc, StackEntryKind::Table(ctx)); + return; + } else if let Some(cell) = elem.to_packed::<TableCell>() { + let parent = gc.tags.stack.last_mut().expect("table"); + let StackEntryKind::Table(table_ctx) = &mut parent.kind else { + unreachable!("expected table") + }; - fn into_cell(self) -> Option<TableCtxCell> { - if let Self::Cell(v) = self { - Some(v) + // Only repeated table headers and footer cells are layed out multiple + // times. Mark duplicate headers as artifacts, since they have no + // semantic meaning in the tag tree, which doesn't use page breaks for + // it's semantic structure. + if table_ctx.contains(cell) { + // TODO: currently the first layouted cell is picked to be part of + // the tag tree, for repeating footers this will be the cell on the + // first page. Maybe it should be the cell on the last page, but that + // would require more changes in the layouting code, or a pre-pass + // on the frames to figure out if there are other footers following. + start_artifact(gc, loc, ArtifactKind::Other); } else { - None + push_stack(gc, loc, StackEntryKind::TableCell(cell.clone())); } - } -} - -#[derive(Clone)] -struct TableCtxCell { - x: u32, - y: u32, - rowspan: NonZeroUsize, - colspan: NonZeroUsize, - kind: Smart<TableCellKind>, - headers: TableCellHeaders, - nodes: Vec<TagNode>, -} + return; + } else if let Some(link) = elem.to_packed::<LinkMarker>() { + let link_id = gc.tags.next_link_id(); + push_stack(gc, loc, StackEntryKind::Link(link_id, link.clone())); + return; + } else { + return; + }; -impl TableCtxCell { - fn unwrap_kind(&self) -> TableCellKind { - self.kind.unwrap_or_else(|| unreachable!()) - } + push_stack(gc, loc, StackEntryKind::Standard(tag)); } -impl TableCtx { - fn new(id: TableId, table: Packed<TableElem>) -> Self { - Self { id, table: table.clone(), rows: Vec::new() } - } - - fn get(&self, x: usize, y: usize) -> Option<&TableCtxCell> { - let cell = self.rows.get(y)?.get(x)?; - self.resolve_cell(cell) - } - - fn get_mut(&mut self, x: usize, y: usize) -> Option<&mut TableCtxCell> { - let cell = self.rows.get_mut(y)?.get_mut(x)?; - match cell { - GridCell::Cell(cell) => { - // HACK: Workaround for the second mutable borrow when resolving - // the spanned cell. - Some(unsafe { std::mem::transmute(cell) }) - } - &mut GridCell::Spanned(x, y) => self.rows[y][x].as_cell_mut(), - GridCell::Missing => None, - } +fn push_stack(gc: &mut GlobalContext, loc: Location, kind: StackEntryKind) { + if !gc.tags.context_supports(&kind) { + // TODO: error or warning? } - fn contains(&self, cell: &Packed<TableCell>) -> bool { - let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); - let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); - self.get(x, y).is_some() - } + gc.tags.stack.push(StackEntry { loc, kind, nodes: Vec::new() }); +} - fn resolve_cell<'a>(&'a self, cell: &'a GridCell) -> Option<&'a TableCtxCell> { - match cell { - GridCell::Cell(cell) => Some(cell), - &GridCell::Spanned(x, y) => self.rows[y][x].as_cell(), - GridCell::Missing => None, +pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) { + if let Some((l, _)) = gc.tags.in_artifact { + if l == loc { + gc.tags.in_artifact = None; } + return; } - fn insert(&mut self, cell: Packed<TableCell>, nodes: Vec<TagNode>) { - let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); - let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); - let rowspan = cell.rowspan(StyleChain::default()); - let colspan = cell.colspan(StyleChain::default()); - let kind = cell.kind(StyleChain::default()); - - // Extend the table grid to fit this cell. - let required_height = y + rowspan.get(); - let required_width = x + colspan.get(); - if self.rows.len() < required_height { - self.rows - .resize(required_height, vec![GridCell::Missing; required_width]); - } - let row = &mut self.rows[y]; - if row.len() < required_width { - row.resize_with(required_width, || GridCell::Missing); - } - - // Store references to the cell for all spanned cells. - for i in y..y + rowspan.get() { - for j in x..x + colspan.get() { - self.rows[i][j] = GridCell::Spanned(x, y); - } - } - - self.rows[y][x] = GridCell::Cell(TableCtxCell { - x: x as u32, - y: y as u32, - rowspan, - colspan, - kind, - headers: TableCellHeaders::NONE, - nodes, - }); - } + let Some(entry) = gc.tags.stack.pop_if(|e| e.loc == loc) else { + return; + }; - fn build_table(mut self, mut nodes: Vec<TagNode>) -> Vec<TagNode> { - // Table layouting ensures that there are no overlapping cells, and that - // any gaps left by the user are filled with empty cells. - if self.rows.is_empty() { - return nodes; + let node = match entry.kind { + StackEntryKind::Standard(tag) => TagNode::Group(tag, entry.nodes), + StackEntryKind::Outline(ctx) => { + let nodes = ctx.build_outline(entry.nodes); + TagNode::Group(TagKind::TOC.into(), nodes) } - let height = self.rows.len(); - let width = self.rows[0].len(); - - // Only generate row groups such as `THead`, `TFoot`, and `TBody` if - // there are no rows with mixed cell kinds. - let mut gen_row_groups = true; - let row_kinds = (self.rows.iter()) - .map(|row| { - row.iter() - .filter_map(|cell| self.resolve_cell(cell)) - .map(|cell| cell.kind) - .fold(Smart::Auto, |a, b| { - if let Smart::Custom(TableCellKind::Header(_, scope)) = b { - gen_row_groups &= scope == TableHeaderScope::Column; - } - if let (Smart::Custom(a), Smart::Custom(b)) = (a, b) { - gen_row_groups &= a == b; - } - a.or(b) - }) - .unwrap_or(TableCellKind::Data) - }) - .collect::<Vec<_>>(); + StackEntryKind::OutlineEntry(outline_entry) => { + let parent = gc.tags.stack.last_mut().expect("outline"); + let StackEntryKind::Outline(outline_ctx) = &mut parent.kind else { + unreachable!("expected outline") + }; - // Fixup all missing cell kinds. - for (row, row_kind) in self.rows.iter_mut().zip(row_kinds.iter().copied()) { - let default_kind = - if gen_row_groups { row_kind } else { TableCellKind::Data }; - for cell in row.iter_mut() { - let Some(cell) = cell.as_cell_mut() else { continue }; - cell.kind = cell.kind.or(Smart::Custom(default_kind)); - } - } + outline_ctx.insert(&mut parent.nodes, outline_entry, entry.nodes); - // Explicitly set the headers attribute for cells. - for x in 0..width { - let mut column_header = None; - for y in 0..height { - self.resolve_cell_headers( - (x, y), - &mut column_header, - TableHeaderScope::refers_to_column, - ); - } - } - for y in 0..height { - let mut row_header = None; - for x in 0..width { - self.resolve_cell_headers( - (x, y), - &mut row_header, - TableHeaderScope::refers_to_row, - ); - } + return; } - - let mut chunk_kind = row_kinds[0]; - let mut row_chunk = Vec::new(); - for (row, row_kind) in self.rows.into_iter().zip(row_kinds) { - let row_nodes = row - .into_iter() - .filter_map(|cell| { - let cell = cell.into_cell()?; - let span = TableCellSpan { - rows: cell.rowspan.try_into().unwrap(), - cols: cell.colspan.try_into().unwrap(), - }; - let tag = match cell.unwrap_kind() { - TableCellKind::Header(_, scope) => { - let id = table_cell_id(self.id, cell.x, cell.y); - let scope = table_header_scope(scope); - TagKind::TH( - TableHeaderCell::new(scope) - .with_span(span) - .with_headers(cell.headers), - ) - .with_id(Some(id)) - } - TableCellKind::Footer | TableCellKind::Data => TagKind::TD( - TableDataCell::new() - .with_span(span) - .with_headers(cell.headers), - ) - .into(), - }; - - Some(TagNode::Group(tag, cell.nodes)) - }) - .collect(); - - let row = TagNode::Group(TagKind::TR.into(), row_nodes); - - // Push the `TR` tags directly. - if !gen_row_groups { - nodes.push(row); - continue; - } - - // Generate row groups. - if !should_group_rows(chunk_kind, row_kind) { - let tag = match chunk_kind { - TableCellKind::Header(..) => TagKind::THead, - TableCellKind::Footer => TagKind::TFoot, - TableCellKind::Data => TagKind::TBody, - }; - nodes.push(TagNode::Group(tag.into(), std::mem::take(&mut row_chunk))); - - chunk_kind = row_kind; - } - row_chunk.push(row); + StackEntryKind::Table(ctx) => { + let summary = ctx.table.summary(StyleChain::default()).map(EcoString::into); + let nodes = ctx.build_table(entry.nodes); + TagNode::Group(TagKind::Table(summary).into(), nodes) } - - if !row_chunk.is_empty() { - let tag = match chunk_kind { - TableCellKind::Header(..) => TagKind::THead, - TableCellKind::Footer => TagKind::TFoot, - TableCellKind::Data => TagKind::TBody, + StackEntryKind::TableCell(cell) => { + let parent = gc.tags.stack.last_mut().expect("table"); + let StackEntryKind::Table(table_ctx) = &mut parent.kind else { + unreachable!("expected table") }; - nodes.push(TagNode::Group(tag.into(), row_chunk)); - } - - nodes - } - fn resolve_cell_headers<F>( - &mut self, - (x, y): (usize, usize), - current_header: &mut Option<(NonZeroU32, TagId)>, - refers_to_dir: F, - ) where - F: Fn(&TableHeaderScope) -> bool, - { - let table_id = self.id; - let Some(cell) = self.get_mut(x, y) else { return }; - - if let Some((prev_level, cell_id)) = current_header.clone() { - // The `Headers` attribute is also set for parent headers. - let mut is_parent_header = true; - if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { - if refers_to_dir(&scope) { - is_parent_header = prev_level < level; - } - } + table_ctx.insert(cell, entry.nodes); - if is_parent_header && !cell.headers.ids.contains(&cell_id) { - cell.headers.ids.push(cell_id.clone()); - } + return; } - - if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { - if refers_to_dir(&scope) { - let tag_id = table_cell_id(table_id, x as u32, y as u32); - *current_header = Some((level, tag_id)); + StackEntryKind::Link(_, link) => { + let alt = link.alt.as_ref().map(EcoString::to_string); + let tag = TagKind::Link.with_alt_text(alt); + let mut node = TagNode::Group(tag, entry.nodes); + // Wrap link in reference tag, if it's not a url. + if let Destination::Position(_) | Destination::Location(_) = link.dest { + node = TagNode::Group(TagKind::Reference.into(), vec![node]); } + node } - } + }; + + gc.tags.push(node); } -fn should_group_rows(a: TableCellKind, b: TableCellKind) -> bool { - match (a, b) { - (TableCellKind::Header(..), TableCellKind::Header(..)) => true, - (TableCellKind::Footer, TableCellKind::Footer) => true, - (TableCellKind::Data, TableCellKind::Data) => true, - (_, _) => false, +/// Add all annotations that were found in the page frame. +pub(crate) fn add_annotations( + gc: &mut GlobalContext, + page: &mut Page, + annotations: Vec<LinkAnnotation>, +) { + for annotation in annotations.into_iter() { + let LinkAnnotation { id: _, placeholder, alt, rect, quad_points, target } = + annotation; + let annot = krilla::annotation::Annotation::new_link( + krilla::annotation::LinkAnnotation::new(rect, Some(quad_points), target), + alt, + ); + let annot_id = page.add_tagged_annotation(annot); + gc.tags.init_placeholder(placeholder, Node::Leaf(annot_id)); } } -fn table_cell_id(table_id: TableId, x: u32, y: u32) -> TagId { - let mut bytes = [0; 12]; - bytes[0..4].copy_from_slice(&table_id.0.to_ne_bytes()); - bytes[4..8].copy_from_slice(&x.to_ne_bytes()); - bytes[8..12].copy_from_slice(&y.to_ne_bytes()); - TagId::from_bytes(&bytes) -} +pub(crate) struct Tags { + /// The intermediary stack of nested tag groups. + pub(crate) stack: Vec<StackEntry>, + /// A list of placeholders corresponding to a [`TagNode::Placeholder`]. + pub(crate) placeholders: Vec<OnceCell<Node>>, + pub(crate) in_artifact: Option<(Location, ArtifactKind)>, + /// Used to group multiple link annotations using quad points. + pub(crate) link_id: LinkId, + /// Used to generate IDs referenced in table `Headers` attributes. + /// The IDs must be document wide unique. + pub(crate) table_id: TableId, -#[derive(Clone)] -pub(crate) enum TagNode { - Group(Tag, Vec<TagNode>), - Leaf(Identifier), - /// Allows inserting a placeholder into the tag tree. - /// Currently used for [`krilla::page::Page::add_tagged_annotation`]. - Placeholder(Placeholder), + /// The output. + pub(crate) tree: Vec<TagNode>, } -#[derive(Clone, Copy)] -pub(crate) struct Placeholder(usize); - impl Tags { pub(crate) fn new() -> Self { Self { @@ -543,6 +301,49 @@ impl Tags { } } +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct TableId(u32); + +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct LinkId(u32); + +pub(crate) struct StackEntry { + pub(crate) loc: Location, + pub(crate) kind: StackEntryKind, + pub(crate) nodes: Vec<TagNode>, +} + +pub(crate) enum StackEntryKind { + Standard(Tag), + Outline(OutlineCtx), + OutlineEntry(Packed<OutlineEntry>), + Table(TableCtx), + TableCell(Packed<TableCell>), + Link(LinkId, Packed<LinkMarker>), +} + +impl StackEntryKind { + pub(crate) fn as_standard_mut(&mut self) -> Option<&mut Tag> { + if let Self::Standard(v) = self { + Some(v) + } else { + None + } + } +} + +#[derive(Clone)] +pub(crate) enum TagNode { + Group(Tag, Vec<TagNode>), + Leaf(Identifier), + /// Allows inserting a placeholder into the tag tree. + /// Currently used for [`krilla::page::Page::add_tagged_annotation`]. + Placeholder(Placeholder), +} + +#[derive(Clone, Copy)] +pub(crate) struct Placeholder(usize); + /// Automatically calls [`Surface::end_tagged`] when dropped. pub(crate) struct TagHandle<'a, 'b> { surface: &'b mut Surface<'a>, @@ -556,7 +357,7 @@ impl Drop for TagHandle<'_, '_> { impl<'a> TagHandle<'a, '_> { pub(crate) fn surface<'c>(&'c mut self) -> &'c mut Surface<'a> { - &mut self.surface + self.surface } } @@ -599,192 +400,10 @@ fn start_content<'a, 'b>( TagHandle { surface } } -/// Add all annotations that were found in the page frame. -pub(crate) fn add_annotations( - gc: &mut GlobalContext, - page: &mut Page, - annotations: Vec<LinkAnnotation>, -) { - for annotation in annotations.into_iter() { - let LinkAnnotation { id: _, placeholder, alt, rect, quad_points, target } = - annotation; - let annot = krilla::annotation::Annotation::new_link( - krilla::annotation::LinkAnnotation::new(rect, Some(quad_points), target), - alt, - ); - let annot_id = page.add_tagged_annotation(annot); - gc.tags.init_placeholder(placeholder, Node::Leaf(annot_id)); - } -} - -pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { - if gc.tags.in_artifact.is_some() { - // Don't nest artifacts - return; - } - - let loc = elem.location().unwrap(); - - if let Some(artifact) = elem.to_packed::<ArtifactElem>() { - let kind = artifact.kind(StyleChain::default()); - start_artifact(gc, loc, kind); - return; - } else if let Some(_) = elem.to_packed::<RepeatElem>() { - start_artifact(gc, loc, ArtifactKind::Other); - return; - } - - let tag: Tag = if let Some(pdf_tag) = elem.to_packed::<PdfTagElem>() { - let kind = pdf_tag.kind(StyleChain::default()); - match kind { - PdfTagKind::Part => TagKind::Part.into(), - _ => todo!(), - } - } else if let Some(heading) = elem.to_packed::<HeadingElem>() { - let level = heading.level().try_into().unwrap_or(NonZeroU32::MAX); - let name = heading.body.plain_text().to_string(); - TagKind::Hn(level, Some(name)).into() - } else if let Some(_) = elem.to_packed::<OutlineBody>() { - push_stack(gc, loc, StackEntryKind::Outline(OutlineCtx::new())); - return; - } else if let Some(entry) = elem.to_packed::<OutlineEntry>() { - push_stack(gc, loc, StackEntryKind::OutlineEntry(entry.clone())); - return; - } else if let Some(_) = elem.to_packed::<FigureElem>() { - let alt = None; // TODO - TagKind::Figure.with_alt_text(alt) - } else if let Some(image) = elem.to_packed::<ImageElem>() { - let alt = image.alt(StyleChain::default()).map(|s| s.to_string()); - - let figure_tag = (gc.tags.parent()) - .and_then(StackEntryKind::as_standard_mut) - .filter(|tag| tag.kind == TagKind::Figure); - if let Some(figure_tag) = figure_tag { - // Set alt text of outer figure tag, if not present. - if figure_tag.alt_text.is_none() { - figure_tag.alt_text = alt; - } - return; - } else { - TagKind::Figure.with_alt_text(alt) - } - } else if let Some(_) = elem.to_packed::<FigureCaption>() { - TagKind::Caption.into() - } else if let Some(table) = elem.to_packed::<TableElem>() { - let table_id = gc.tags.next_table_id(); - let ctx = TableCtx::new(table_id, table.clone()); - push_stack(gc, loc, StackEntryKind::Table(ctx)); - return; - } else if let Some(cell) = elem.to_packed::<TableCell>() { - let parent = gc.tags.stack.last_mut().expect("table"); - let StackEntryKind::Table(table_ctx) = &mut parent.kind else { - unreachable!("expected table") - }; - - // Only repeated table headers and footer cells are layed out multiple - // times. Mark duplicate headers as artifacts, since they have no - // semantic meaning in the tag tree, which doesn't use page breaks for - // it's semantic structure. - if table_ctx.contains(cell) { - // TODO: currently the first layouted cell is picked to be part of - // the tag tree, for repeating footers this will be the cell on the - // first page. Maybe it should be the cell on the last page, but that - // would require more changes in the layouting code, or a pre-pass - // on the frames to figure out if there are other footers following. - start_artifact(gc, loc, ArtifactKind::Other); - } else { - push_stack(gc, loc, StackEntryKind::TableCell(cell.clone())); - } - return; - } else if let Some(link) = elem.to_packed::<LinkMarker>() { - let link_id = gc.tags.next_link_id(); - push_stack(gc, loc, StackEntryKind::Link(link_id, link.clone())); - return; - } else { - return; - }; - - push_stack(gc, loc, StackEntryKind::Standard(tag)); -} - -fn push_stack(gc: &mut GlobalContext, loc: Location, kind: StackEntryKind) { - if !gc.tags.context_supports(&kind) { - // TODO: error or warning? - } - - gc.tags.stack.push(StackEntry { loc, kind, nodes: Vec::new() }); -} - -pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) { - if let Some((l, _)) = gc.tags.in_artifact { - if l == loc { - gc.tags.in_artifact = None; - } - return; - } - - let Some(entry) = gc.tags.stack.pop_if(|e| e.loc == loc) else { - return; - }; - - let node = match entry.kind { - StackEntryKind::Standard(tag) => TagNode::Group(tag, entry.nodes), - StackEntryKind::Outline(ctx) => { - let nodes = ctx.build_outline(entry.nodes); - TagNode::Group(TagKind::TOC.into(), nodes) - } - StackEntryKind::OutlineEntry(outline_entry) => { - let parent = gc.tags.stack.last_mut().expect("outline"); - let StackEntryKind::Outline(outline_ctx) = &mut parent.kind else { - unreachable!("expected outline") - }; - - outline_ctx.insert(&mut parent.nodes, outline_entry, entry.nodes); - - return; - } - StackEntryKind::Table(ctx) => { - let summary = ctx.table.summary(StyleChain::default()).map(EcoString::into); - let nodes = ctx.build_table(entry.nodes); - TagNode::Group(TagKind::Table(summary).into(), nodes) - } - StackEntryKind::TableCell(cell) => { - let parent = gc.tags.stack.last_mut().expect("table"); - let StackEntryKind::Table(table_ctx) = &mut parent.kind else { - unreachable!("expected table") - }; - - table_ctx.insert(cell, entry.nodes); - - return; - } - StackEntryKind::Link(_, link) => { - let alt = link.alt.as_ref().map(EcoString::to_string); - let tag = TagKind::Link.with_alt_text(alt); - let mut node = TagNode::Group(tag, entry.nodes); - // Wrap link in reference tag, if it's not a url. - if let Destination::Position(_) | Destination::Location(_) = link.dest { - node = TagNode::Group(TagKind::Reference.into(), vec![node]); - } - node - } - }; - - gc.tags.push(node); -} - fn start_artifact(gc: &mut GlobalContext, loc: Location, kind: ArtifactKind) { gc.tags.in_artifact = Some((loc, kind)); } -fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderScope { - match scope { - TableHeaderScope::Both => krilla::tagging::TableHeaderScope::Both, - TableHeaderScope::Column => krilla::tagging::TableHeaderScope::Column, - TableHeaderScope::Row => krilla::tagging::TableHeaderScope::Row, - } -} - fn artifact_type(kind: ArtifactKind) -> ArtifactType { match kind { ArtifactKind::Header => ArtifactType::Header, diff --git a/crates/typst-pdf/src/tags/outline.rs b/crates/typst-pdf/src/tags/outline.rs new file mode 100644 index 00000000..9fbeb8dc --- /dev/null +++ b/crates/typst-pdf/src/tags/outline.rs @@ -0,0 +1,74 @@ +use krilla::tagging::TagKind; +use typst_library::foundations::Packed; +use typst_library::model::OutlineEntry; + +use crate::tags::TagNode; + +pub(crate) struct OutlineCtx { + stack: Vec<OutlineSection>, +} + +impl OutlineCtx { + pub(crate) fn new() -> Self { + Self { stack: Vec::new() } + } + + pub(crate) fn insert( + &mut self, + outline_nodes: &mut Vec<TagNode>, + entry: Packed<OutlineEntry>, + nodes: Vec<TagNode>, + ) { + let expected_len = entry.level.get() - 1; + if self.stack.len() < expected_len { + self.stack.resize_with(expected_len, OutlineSection::new); + } else { + while self.stack.len() > expected_len { + self.finish_section(outline_nodes); + } + } + + let section_entry = TagNode::Group(TagKind::TOCI.into(), nodes); + self.push(outline_nodes, section_entry); + } + + fn finish_section(&mut self, outline_nodes: &mut Vec<TagNode>) { + let sub_section = self.stack.pop().unwrap().into_tag(); + self.push(outline_nodes, sub_section); + } + + fn push(&mut self, outline_nodes: &mut Vec<TagNode>, entry: TagNode) { + match self.stack.last_mut() { + Some(section) => section.push(entry), + None => outline_nodes.push(entry), + } + } + + pub(crate) fn build_outline( + mut self, + mut outline_nodes: Vec<TagNode>, + ) -> Vec<TagNode> { + while !self.stack.is_empty() { + self.finish_section(&mut outline_nodes); + } + outline_nodes + } +} + +pub(crate) struct OutlineSection { + entries: Vec<TagNode>, +} + +impl OutlineSection { + const fn new() -> Self { + OutlineSection { entries: Vec::new() } + } + + fn push(&mut self, entry: TagNode) { + self.entries.push(entry); + } + + fn into_tag(self) -> TagNode { + TagNode::Group(TagKind::TOC.into(), self.entries) + } +} diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs new file mode 100644 index 00000000..240da4c3 --- /dev/null +++ b/crates/typst-pdf/src/tags/table.rs @@ -0,0 +1,330 @@ +use std::num::{NonZeroU32, NonZeroUsize}; + +use krilla::tagging::{ + TableCellHeaders, TableCellSpan, TableDataCell, TableHeaderCell, TagBuilder, TagId, + TagKind, +}; +use typst_library::foundations::{Packed, Smart, StyleChain}; +use typst_library::model::{TableCell, TableCellKind, TableElem, TableHeaderScope}; + +use crate::tags::{TableId, TagNode}; + +pub(crate) struct TableCtx { + pub(crate) id: TableId, + pub(crate) table: Packed<TableElem>, + rows: Vec<Vec<GridCell>>, +} + +impl TableCtx { + pub(crate) fn new(id: TableId, table: Packed<TableElem>) -> Self { + Self { id, table: table.clone(), rows: Vec::new() } + } + + fn get(&self, x: usize, y: usize) -> Option<&TableCtxCell> { + let cell = self.rows.get(y)?.get(x)?; + self.resolve_cell(cell) + } + + fn get_mut(&mut self, x: usize, y: usize) -> Option<&mut TableCtxCell> { + let cell = self.rows.get_mut(y)?.get_mut(x)?; + match cell { + GridCell::Cell(cell) => { + // HACK: Workaround for the second mutable borrow when resolving + // the spanned cell. + Some(unsafe { std::mem::transmute(cell) }) + } + &mut GridCell::Spanned(x, y) => self.rows[y][x].as_cell_mut(), + GridCell::Missing => None, + } + } + + pub(crate) fn contains(&self, cell: &Packed<TableCell>) -> bool { + let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); + let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); + self.get(x, y).is_some() + } + + fn resolve_cell<'a>(&'a self, cell: &'a GridCell) -> Option<&'a TableCtxCell> { + match cell { + GridCell::Cell(cell) => Some(cell), + &GridCell::Spanned(x, y) => self.rows[y][x].as_cell(), + GridCell::Missing => None, + } + } + + pub(crate) fn insert(&mut self, cell: Packed<TableCell>, nodes: Vec<TagNode>) { + let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); + let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); + let rowspan = cell.rowspan(StyleChain::default()); + let colspan = cell.colspan(StyleChain::default()); + let kind = cell.kind(StyleChain::default()); + + // Extend the table grid to fit this cell. + let required_height = y + rowspan.get(); + let required_width = x + colspan.get(); + if self.rows.len() < required_height { + self.rows + .resize(required_height, vec![GridCell::Missing; required_width]); + } + let row = &mut self.rows[y]; + if row.len() < required_width { + row.resize_with(required_width, || GridCell::Missing); + } + + // Store references to the cell for all spanned cells. + for i in y..y + rowspan.get() { + for j in x..x + colspan.get() { + self.rows[i][j] = GridCell::Spanned(x, y); + } + } + + self.rows[y][x] = GridCell::Cell(TableCtxCell { + x: x as u32, + y: y as u32, + rowspan, + colspan, + kind, + headers: TableCellHeaders::NONE, + nodes, + }); + } + + pub(crate) fn build_table(mut self, mut nodes: Vec<TagNode>) -> Vec<TagNode> { + // Table layouting ensures that there are no overlapping cells, and that + // any gaps left by the user are filled with empty cells. + if self.rows.is_empty() { + return nodes; + } + let height = self.rows.len(); + let width = self.rows[0].len(); + + // Only generate row groups such as `THead`, `TFoot`, and `TBody` if + // there are no rows with mixed cell kinds. + let mut gen_row_groups = true; + let row_kinds = (self.rows.iter()) + .map(|row| { + row.iter() + .filter_map(|cell| self.resolve_cell(cell)) + .map(|cell| cell.kind) + .fold(Smart::Auto, |a, b| { + if let Smart::Custom(TableCellKind::Header(_, scope)) = b { + gen_row_groups &= scope == TableHeaderScope::Column; + } + if let (Smart::Custom(a), Smart::Custom(b)) = (a, b) { + gen_row_groups &= a == b; + } + a.or(b) + }) + .unwrap_or(TableCellKind::Data) + }) + .collect::<Vec<_>>(); + + // Fixup all missing cell kinds. + for (row, row_kind) in self.rows.iter_mut().zip(row_kinds.iter().copied()) { + let default_kind = + if gen_row_groups { row_kind } else { TableCellKind::Data }; + for cell in row.iter_mut() { + let Some(cell) = cell.as_cell_mut() else { continue }; + cell.kind = cell.kind.or(Smart::Custom(default_kind)); + } + } + + // Explicitly set the headers attribute for cells. + for x in 0..width { + let mut column_header = None; + for y in 0..height { + self.resolve_cell_headers( + (x, y), + &mut column_header, + TableHeaderScope::refers_to_column, + ); + } + } + for y in 0..height { + let mut row_header = None; + for x in 0..width { + self.resolve_cell_headers( + (x, y), + &mut row_header, + TableHeaderScope::refers_to_row, + ); + } + } + + let mut chunk_kind = row_kinds[0]; + let mut row_chunk = Vec::new(); + for (row, row_kind) in self.rows.into_iter().zip(row_kinds) { + let row_nodes = row + .into_iter() + .filter_map(|cell| { + let cell = cell.into_cell()?; + let span = TableCellSpan { + rows: cell.rowspan.try_into().unwrap(), + cols: cell.colspan.try_into().unwrap(), + }; + let tag = match cell.unwrap_kind() { + TableCellKind::Header(_, scope) => { + let id = table_cell_id(self.id, cell.x, cell.y); + let scope = table_header_scope(scope); + TagKind::TH( + TableHeaderCell::new(scope) + .with_span(span) + .with_headers(cell.headers), + ) + .with_id(Some(id)) + } + TableCellKind::Footer | TableCellKind::Data => TagKind::TD( + TableDataCell::new() + .with_span(span) + .with_headers(cell.headers), + ) + .into(), + }; + + Some(TagNode::Group(tag, cell.nodes)) + }) + .collect(); + + let row = TagNode::Group(TagKind::TR.into(), row_nodes); + + // Push the `TR` tags directly. + if !gen_row_groups { + nodes.push(row); + continue; + } + + // Generate row groups. + if !should_group_rows(chunk_kind, row_kind) { + let tag = match chunk_kind { + TableCellKind::Header(..) => TagKind::THead, + TableCellKind::Footer => TagKind::TFoot, + TableCellKind::Data => TagKind::TBody, + }; + nodes.push(TagNode::Group(tag.into(), std::mem::take(&mut row_chunk))); + + chunk_kind = row_kind; + } + row_chunk.push(row); + } + + if !row_chunk.is_empty() { + let tag = match chunk_kind { + TableCellKind::Header(..) => TagKind::THead, + TableCellKind::Footer => TagKind::TFoot, + TableCellKind::Data => TagKind::TBody, + }; + nodes.push(TagNode::Group(tag.into(), row_chunk)); + } + + nodes + } + + fn resolve_cell_headers<F>( + &mut self, + (x, y): (usize, usize), + current_header: &mut Option<(NonZeroU32, TagId)>, + refers_to_dir: F, + ) where + F: Fn(&TableHeaderScope) -> bool, + { + let table_id = self.id; + let Some(cell) = self.get_mut(x, y) else { return }; + + if let Some((prev_level, cell_id)) = current_header.clone() { + // The `Headers` attribute is also set for parent headers. + let mut is_parent_header = true; + if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { + if refers_to_dir(&scope) { + is_parent_header = prev_level < level; + } + } + + if is_parent_header && !cell.headers.ids.contains(&cell_id) { + cell.headers.ids.push(cell_id.clone()); + } + } + + if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { + if refers_to_dir(&scope) { + let tag_id = table_cell_id(table_id, x as u32, y as u32); + *current_header = Some((level, tag_id)); + } + } + } +} + +#[derive(Clone, Default)] +enum GridCell { + Cell(TableCtxCell), + Spanned(usize, usize), + #[default] + Missing, +} + +impl GridCell { + fn as_cell(&self) -> Option<&TableCtxCell> { + if let Self::Cell(v) = self { + Some(v) + } else { + None + } + } + + fn as_cell_mut(&mut self) -> Option<&mut TableCtxCell> { + if let Self::Cell(v) = self { + Some(v) + } else { + None + } + } + + fn into_cell(self) -> Option<TableCtxCell> { + if let Self::Cell(v) = self { + Some(v) + } else { + None + } + } +} + +#[derive(Clone)] +struct TableCtxCell { + x: u32, + y: u32, + rowspan: NonZeroUsize, + colspan: NonZeroUsize, + kind: Smart<TableCellKind>, + headers: TableCellHeaders, + nodes: Vec<TagNode>, +} + +impl TableCtxCell { + fn unwrap_kind(&self) -> TableCellKind { + self.kind.unwrap_or_else(|| unreachable!()) + } +} + +fn should_group_rows(a: TableCellKind, b: TableCellKind) -> bool { + match (a, b) { + (TableCellKind::Header(..), TableCellKind::Header(..)) => true, + (TableCellKind::Footer, TableCellKind::Footer) => true, + (TableCellKind::Data, TableCellKind::Data) => true, + (_, _) => false, + } +} + +fn table_cell_id(table_id: TableId, x: u32, y: u32) -> TagId { + let mut bytes = [0; 12]; + bytes[0..4].copy_from_slice(&table_id.0.to_ne_bytes()); + bytes[4..8].copy_from_slice(&x.to_ne_bytes()); + bytes[8..12].copy_from_slice(&y.to_ne_bytes()); + TagId::from_bytes(&bytes) +} + +fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderScope { + match scope { + TableHeaderScope::Both => krilla::tagging::TableHeaderScope::Both, + TableHeaderScope::Column => krilla::tagging::TableHeaderScope::Column, + TableHeaderScope::Row => krilla::tagging::TableHeaderScope::Row, + } +} |
