diff options
| author | Laurenz <laurmaedje@gmail.com> | 2024-12-02 14:19:52 +0100 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2024-12-04 10:12:07 +0100 |
| commit | e0122a5b509d151b7e0197d37a120fd965a055d5 (patch) | |
| tree | 1045c37c53dc2e08fedc8802cdfec1b244e10dd0 /crates/typst-library/src/introspection | |
| parent | 885c7d96eea73f478faea9877f0dbc40c00b0d7b (diff) | |
Add HTML export format
Diffstat (limited to 'crates/typst-library/src/introspection')
| -rw-r--r-- | crates/typst-library/src/introspection/introspector.rs | 129 |
1 files changed, 92 insertions, 37 deletions
diff --git a/crates/typst-library/src/introspection/introspector.rs b/crates/typst-library/src/introspection/introspector.rs index 388d1f00..8cbaea89 100644 --- a/crates/typst-library/src/introspection/introspector.rs +++ b/crates/typst-library/src/introspection/introspector.rs @@ -10,6 +10,7 @@ use typst_utils::NonZeroExt; use crate::diag::{bail, StrResult}; use crate::foundations::{Content, Label, Repr, Selector}; +use crate::html::{HtmlElement, HtmlNode}; use crate::introspection::{Location, Tag}; use crate::layout::{Frame, FrameItem, Page, Point, Position, Transform}; use crate::model::Numbering; @@ -47,9 +48,15 @@ type Pair = (Content, Position); impl Introspector { /// Creates an introspector for a page list. - #[typst_macros::time(name = "introspect")] - pub fn new(pages: &[Page]) -> Self { - IntrospectorBuilder::new().build(pages) + #[typst_macros::time(name = "introspect pages")] + pub fn paged(pages: &[Page]) -> Self { + IntrospectorBuilder::new().build_paged(pages) + } + + /// Creates an introspector for HTML. + #[typst_macros::time(name = "introspect html")] + pub fn html(root: &HtmlElement) -> Self { + IntrospectorBuilder::new().build_html(root) } /// Iterates over all locatable elements. @@ -346,6 +353,7 @@ impl Clone for QueryCache { /// Builds the introspector. #[derive(Default)] struct IntrospectorBuilder { + pages: usize, page_numberings: Vec<Option<Numbering>>, page_supplements: Vec<Content>, seen: HashSet<Location>, @@ -361,46 +369,37 @@ impl IntrospectorBuilder { Self::default() } - /// Build the introspector. - fn build(mut self, pages: &[Page]) -> Introspector { + /// Build an introspector for a page list. + fn build_paged(mut self, pages: &[Page]) -> Introspector { + self.pages = pages.len(); self.page_numberings.reserve(pages.len()); self.page_supplements.reserve(pages.len()); // Discover all elements. - let mut root = Vec::new(); + let mut elems = Vec::new(); for (i, page) in pages.iter().enumerate() { self.page_numberings.push(page.numbering.clone()); self.page_supplements.push(page.supplement.clone()); - self.discover( - &mut root, + self.discover_in_frame( + &mut elems, &page.frame, NonZeroUsize::new(1 + i).unwrap(), Transform::identity(), ); } - self.locations.reserve(self.seen.len()); - - // Save all pairs and their descendants in the correct order. - let mut elems = Vec::with_capacity(self.seen.len()); - for pair in root { - self.visit(&mut elems, pair); - } + self.finalize(elems) + } - Introspector { - pages: pages.len(), - page_numberings: self.page_numberings, - page_supplements: self.page_supplements, - elems, - keys: self.keys, - locations: self.locations, - labels: self.labels, - queries: QueryCache::default(), - } + /// Build an introspector for an HTML document. + fn build_html(mut self, root: &HtmlElement) -> Introspector { + let mut elems = Vec::new(); + self.discover_in_html(&mut elems, root); + self.finalize(elems) } /// Processes the tags in the frame. - fn discover( + fn discover_in_frame( &mut self, sink: &mut Vec<Pair>, frame: &Frame, @@ -416,27 +415,83 @@ impl IntrospectorBuilder { if let Some(parent) = group.parent { let mut nested = vec![]; - self.discover(&mut nested, &group.frame, page, ts); + self.discover_in_frame(&mut nested, &group.frame, page, ts); self.insertions.insert(parent, nested); } else { - self.discover(sink, &group.frame, page, ts); + self.discover_in_frame(sink, &group.frame, page, ts); } } - FrameItem::Tag(Tag::Start(elem)) => { - let loc = elem.location().unwrap(); - if self.seen.insert(loc) { - let point = pos.transform(ts); - sink.push((elem.clone(), Position { page, point })); - } - } - FrameItem::Tag(Tag::End(loc, key)) => { - self.keys.insert(*key, *loc); + FrameItem::Tag(tag) => { + self.discover_in_tag( + sink, + tag, + Position { page, point: pos.transform(ts) }, + ); } _ => {} } } } + /// Processes the tags in the HTML element. + fn discover_in_html(&mut self, sink: &mut Vec<Pair>, elem: &HtmlElement) { + for child in &elem.children { + match child { + HtmlNode::Tag(tag) => self.discover_in_tag( + sink, + tag, + Position { page: NonZeroUsize::ONE, point: Point::zero() }, + ), + HtmlNode::Text(_, _) => {} + HtmlNode::Element(elem) => self.discover_in_html(sink, elem), + HtmlNode::Frame(frame) => self.discover_in_frame( + sink, + frame, + NonZeroUsize::ONE, + Transform::identity(), + ), + } + } + } + + /// Handle a tag. + fn discover_in_tag(&mut self, sink: &mut Vec<Pair>, tag: &Tag, position: Position) { + match tag { + Tag::Start(elem) => { + let loc = elem.location().unwrap(); + if self.seen.insert(loc) { + sink.push((elem.clone(), position)); + } + } + Tag::End(loc, key) => { + self.keys.insert(*key, *loc); + } + } + } + + /// Build a complete introspector with all acceleration structures from a + /// list of top-level pairs. + fn finalize(mut self, root: Vec<Pair>) -> Introspector { + self.locations.reserve(self.seen.len()); + + // Save all pairs and their descendants in the correct order. + let mut elems = Vec::with_capacity(self.seen.len()); + for pair in root { + self.visit(&mut elems, pair); + } + + Introspector { + pages: self.pages, + page_numberings: self.page_numberings, + page_supplements: self.page_supplements, + elems, + keys: self.keys, + locations: self.locations, + labels: self.labels, + queries: QueryCache::default(), + } + } + /// Saves a pair and all its descendants into `elems` and populates the /// acceleration structures. fn visit(&mut self, elems: &mut Vec<Pair>, pair: Pair) { |
