From 2f0b5eeae09bd880e4552bb83e44d9cd32571c58 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Thu, 11 May 2023 11:27:00 +0200 Subject: More efficient introspection Switches from a mutable locator to one based on tracked chains and optimizes query performance. --- src/model/introspect.rs | 269 ++++++++++++++++++++++++++++++++++-------------- src/model/mod.rs | 45 ++++---- src/model/realize.rs | 2 +- src/model/styles.rs | 77 +------------- 4 files changed, 222 insertions(+), 171 deletions(-) (limited to 'src/model') diff --git a/src/model/introspect.rs b/src/model/introspect.rs index 5a286ec9..87a17a8e 100644 --- a/src/model/introspect.rs +++ b/src/model/introspect.rs @@ -1,7 +1,11 @@ +use std::cell::RefCell; +use std::collections::HashMap; use std::fmt::{self, Debug, Formatter}; use std::hash::Hash; use std::num::NonZeroUsize; +use comemo::{Prehashed, Track, Tracked, Validate}; +use ecow::EcoVec; use indexmap::IndexMap; use super::{Content, Selector}; @@ -12,15 +16,15 @@ use crate::geom::{Point, Transform}; use crate::model::Label; use crate::util::NonZeroExt; -/// Stably identifies an element in the document across multiple layout passes. +/// Uniquely identifies an element in the document across multiple layout passes. /// -/// This struct is created by [`StabilityProvider::locate`]. +/// This struct is created by [`Locator::locate`]. #[derive(Copy, Clone, Eq, PartialEq, Hash)] pub struct Location { /// The hash of the element. hash: u128, /// An unique number among elements with the same hash. This is the reason - /// we need a mutable `StabilityProvider` everywhere. + /// we need a `Locator` everywhere. disambiguator: usize, /// A synthetic location created from another one. This is used for example /// in bibliography management to create individual linkable locations for @@ -46,40 +50,114 @@ cast_from_value! { Location: "location", } -/// Provides stable identities to elements. -#[derive(Clone, Default)] -pub struct StabilityProvider { - hashes: Vec, - checkpoints: Vec, +/// Provides locations for elements in the document. +/// +/// A [`Location`] consists of an element's hash plus a disambiguator. Just the +/// hash is not enough because we can have multiple equal elements with the same +/// hash (not a hash collision, just equal elements!). Between these, we +/// disambiguate with an increasing number. In principle, the disambiguator +/// could just be counted up. However, counting is an impure operation and as +/// such we can't count across a memoization boundary. [^1] +/// +/// Instead, we only mutate within a single "layout run" and combine the results +/// with disambiguators from an outer tracked locator. Thus, the locators form a +/// "tracked chain". When a layout run ends, its mutations are discarded and, on +/// the other side of the memoization boundary, we +/// [reconstruct](Self::visit_frame) them from the resulting [frames](Frame). +/// +/// [^1]: Well, we could with [`TrackedMut`](comemo::TrackedMut), but the +/// overhead is quite high, especially since we need to save & undo the counting +/// when only measuring. +#[derive(Default)] +pub struct Locator<'a> { + /// Maps from a hash to the maximum number we've seen for this hash. This + /// number becomes the `disambiguator`. + hashes: RefCell>, + /// An outer `Locator`, from which we can get disambiguator for hashes + /// outside of the current "layout run". + /// + /// We need to override the constraint's lifetime here so that `Tracked` is + /// covariant over the constraint. If it becomes invariant, we're in for a + /// world of lifetime pain. + outer: Option as Validate>::Constraint>>, } -impl StabilityProvider { - /// Create a new stability provider. +impl<'a> Locator<'a> { + /// Create a new locator. pub fn new() -> Self { Self::default() } -} -#[comemo::track] -impl StabilityProvider { + /// Create a new chained locator. + pub fn chained(outer: Tracked<'a, Self>) -> Self { + Self { outer: Some(outer), ..Default::default() } + } + + /// Start tracking this locator. + /// + /// In comparison to [`Track::track`], this method skips this chain link + /// if it does not contribute anything. + pub fn track(&self) -> Tracked<'_, Self> { + match self.outer { + Some(outer) if self.hashes.borrow().is_empty() => outer, + _ => Track::track(self), + } + } + /// Produce a stable identifier for this call site. pub fn locate(&mut self, hash: u128) -> Location { - let disambiguator = self.hashes.iter().filter(|&&prev| prev == hash).count(); - self.hashes.push(hash); + // Get the current disambiguator for this hash. + let disambiguator = self.disambiguator_impl(hash); + + // Bump the next disambiguator up by one. + self.hashes.borrow_mut().insert(hash, disambiguator + 1); + + // Create the location in its default variant. Location { hash, disambiguator, variant: 0 } } - /// Create a checkpoint of the state that can be restored. - pub fn save(&mut self) { - self.checkpoints.push(self.hashes.len()); + /// Advance past a frame. + pub fn visit_frame(&mut self, frame: &Frame) { + for (_, item) in frame.items() { + match item { + FrameItem::Group(group) => self.visit_frame(&group.frame), + FrameItem::Meta(Meta::Elem(elem), _) => { + let mut hashes = self.hashes.borrow_mut(); + let loc = elem.location().unwrap(); + let entry = hashes.entry(loc.hash).or_default(); + + // Next disambiguator needs to be at least one larger than + // the maximum we've seen so far. + *entry = (*entry).max(loc.disambiguator + 1); + } + _ => {} + } + } } - /// Restore the last checkpoint. - pub fn restore(&mut self) { - if let Some(checkpoint) = self.checkpoints.pop() { - self.hashes.truncate(checkpoint); + /// Advance past a number of frames. + pub fn visit_frames<'b>(&mut self, frames: impl IntoIterator) { + for frame in frames { + self.visit_frame(frame); } } + + /// The current disambiguator for the given hash. + fn disambiguator_impl(&self, hash: u128) -> usize { + *self + .hashes + .borrow_mut() + .entry(hash) + .or_insert_with(|| self.outer.map_or(0, |outer| outer.disambiguator(hash))) + } +} + +#[comemo::track] +impl<'a> Locator<'a> { + /// The current disambiguator for the hash. + fn disambiguator(&self, hash: u128) -> usize { + self.disambiguator_impl(hash) + } } /// Can be queried for elements and their positions. @@ -87,9 +165,14 @@ pub struct Introspector { /// The number of pages in the document. pages: usize, /// All introspectable elements. - elems: IndexMap, + elems: IndexMap, Position)>, /// The page numberings, indexed by page number minus 1. page_numberings: Vec, + /// Caches queries done on the introspector. This is important because + /// even if all top-level queries are distinct, they often have shared + /// subqueries. Example: Individual counter queries with `before` that + /// all depend on a global counter query. + queries: RefCell>>>, } impl Introspector { @@ -100,6 +183,7 @@ impl Introspector { pages: frames.len(), elems: IndexMap::new(), page_numberings: vec![], + queries: RefCell::default(), }; for (i, frame) in frames.iter().enumerate() { let page = NonZeroUsize::new(1 + i).unwrap(); @@ -108,11 +192,6 @@ impl Introspector { introspector } - /// Iterate over all elements. - pub fn all(&self) -> impl Iterator + '_ { - self.elems.values().map(|(c, _)| c).cloned() - } - /// Extract metadata from a frame. #[tracing::instrument(skip_all)] fn extract(&mut self, frame: &Frame, page: NonZeroUsize, ts: Transform) { @@ -130,7 +209,7 @@ impl Introspector { let pos = pos.transform(ts); let ret = self.elems.insert( content.location().unwrap(), - (content.clone(), Position { page, point: pos }), + (Prehashed::new(content.clone()), Position { page, point: pos }), ); assert!(ret.is_none(), "duplicate locations"); } @@ -141,6 +220,23 @@ impl Introspector { } } } + + /// Iterate over all locatable elements. + pub fn all(&self) -> impl Iterator> + '_ { + self.elems.values().map(|(c, _)| c) + } + + /// Get an element by its location. + fn get(&self, location: &Location) -> Option<&Prehashed> { + self.elems.get(location).map(|(elem, _)| elem) + } + + /// Get the index of this element among all. + fn index(&self, elem: &Content) -> usize { + self.elems + .get_index_of(&elem.location().unwrap()) + .unwrap_or(usize::MAX) + } } #[comemo::track] @@ -150,40 +246,75 @@ impl Introspector { self.pages > 0 } - /// Get an element from the position cache. - pub fn location(&self, location: &Location) -> Option { - self.elems.get(location).map(|(c, _)| c).cloned() - } - /// Query for all matching elements. - #[tracing::instrument(skip_all)] - pub fn query<'a>(&'a self, selector: &'a Selector) -> Vec { - match selector { - Selector::Location(location) => self - .elems - .get(location) - .map(|(content, _)| content) - .cloned() - .into_iter() - .collect(), - _ => selector.match_iter(self).collect(), + pub fn query(&self, selector: &Selector) -> EcoVec> { + let hash = crate::util::hash128(selector); + if let Some(output) = self.queries.borrow().get(&hash) { + return output.clone(); } - } - /// Query for the first matching element. - #[tracing::instrument(skip_all)] - pub fn query_first<'a>(&'a self, selector: &'a Selector) -> Option { - match selector { + let output = match selector { + Selector::Elem(..) + | Selector::Label(_) + | Selector::Regex(_) + | Selector::Can(_) + | Selector::Or(_) + | Selector::And(_) => { + self.all().filter(|elem| selector.matches(elem)).cloned().collect() + } + Selector::Location(location) => { - self.elems.get(location).map(|(content, _)| content).cloned() + self.get(location).cloned().into_iter().collect() + } + Selector::Before { selector, end, inclusive } => { + let mut list = self.query(selector); + if let Some(end) = self.query_first(end) { + // Determine which elements are before `end`. + let split = match list + .binary_search_by_key(&self.index(&end), |elem| self.index(elem)) + { + // Element itself is contained. + Ok(i) => i + *inclusive as usize, + // Element itself is not contained. + Err(i) => i, + }; + list = list[..split].into(); + } + list + } + Selector::After { selector, start, inclusive } => { + let mut list = self.query(selector); + if let Some(start) = self.query_first(start) { + // Determine which elements are after `start`. + let split = match list + .binary_search_by_key(&self.index(&start), |elem| { + self.index(elem) + }) { + // Element itself is contained. + Ok(i) => i + !*inclusive as usize, + // Element itself is not contained. + Err(i) => i, + }; + list = list[split..].into(); + } + list } - _ => selector.match_iter(self).next(), + }; + + self.queries.borrow_mut().insert(hash, output.clone()); + output + } + + /// Query for the first element that matches the selector. + pub fn query_first(&self, selector: &Selector) -> Option> { + match selector { + Selector::Location(location) => self.get(location).cloned(), + _ => self.query(selector).first().cloned(), } } /// Query for a unique element with the label. - #[tracing::instrument(skip(self))] - pub fn query_label(&self, label: &Label) -> StrResult { + pub fn query_label(&self, label: &Label) -> StrResult> { let mut found = None; for elem in self.all().filter(|elem| elem.label() == Some(label)) { if found.is_some() { @@ -199,40 +330,28 @@ impl Introspector { NonZeroUsize::new(self.pages).unwrap_or(NonZeroUsize::ONE) } - /// Find the page number for the given location. - pub fn page(&self, location: Location) -> NonZeroUsize { - self.position(location).page - } - /// Gets the page numbering for the given location, if any. - #[tracing::instrument(skip(self))] pub fn page_numbering(&self, location: Location) -> Value { let page = self.page(location); self.page_numberings.get(page.get() - 1).cloned().unwrap_or_default() } + /// Find the page number for the given location. + pub fn page(&self, location: Location) -> NonZeroUsize { + self.position(location).page + } + /// Find the position for the given location. - #[tracing::instrument(skip(self))] pub fn position(&self, location: Location) -> Position { self.elems .get(&location) .map(|(_, loc)| *loc) .unwrap_or(Position { page: NonZeroUsize::ONE, point: Point::zero() }) } +} - /// Checks whether `a` is before `b` in the document. - pub fn is_before(&self, a: Location, b: Location, inclusive: bool) -> bool { - let a = self.elems.get_index_of(&a).unwrap(); - let b = self.elems.get_index_of(&b).unwrap(); - if inclusive { - a <= b - } else { - a < b - } - } - - /// Checks whether `a` is after `b` in the document. - pub fn is_after(&self, a: Location, b: Location, inclusive: bool) -> bool { - !self.is_before(a, b, !inclusive) +impl Default for Introspector { + fn default() -> Self { + Self::new(&[]) } } diff --git a/src/model/mod.rs b/src/model/mod.rs index 92c4293b..e541cd01 100644 --- a/src/model/mod.rs +++ b/src/model/mod.rs @@ -6,13 +6,15 @@ mod introspect; mod realize; mod styles; +pub use typst_macros::element; + pub use self::content::*; pub use self::element::*; pub use self::introspect::*; pub use self::realize::*; pub use self::styles::*; -pub use typst_macros::element; +use std::mem::ManuallyDrop; use comemo::{Track, Tracked, TrackedMut, Validate}; @@ -29,38 +31,51 @@ pub fn typeset( mut tracer: TrackedMut, content: &Content, ) -> SourceResult { - tracing::info!("Starting layout"); + tracing::info!("Starting typesetting"); + let library = world.library(); let styles = StyleChain::new(&library.styles); let mut document; let mut iter = 0; - let mut introspector = Introspector::new(&[]); + + // We need `ManuallyDrop` until this lands in stable: + // https://github.com/rust-lang/rust/issues/70919 + let mut introspector = ManuallyDrop::new(Introspector::new(&[])); // Relayout until all introspections stabilize. // If that doesn't happen within five attempts, we give up. loop { tracing::info!("Layout iteration {iter}"); - let mut provider = StabilityProvider::new(); let constraint = ::Constraint::new(); + let mut locator = Locator::new(); let mut vt = Vt { world, tracer: TrackedMut::reborrow_mut(&mut tracer), - provider: provider.track_mut(), + locator: &mut locator, introspector: introspector.track_with(&constraint), }; - document = (library.items.layout)(&mut vt, content, styles)?; - iter += 1; + // Layout! + let result = (library.items.layout)(&mut vt, content, styles)?; - introspector = Introspector::new(&document.pages); + // Drop the old introspector. + ManuallyDrop::into_inner(introspector); + + // Only now assign the document and construct the new introspector. + document = result; + introspector = ManuallyDrop::new(Introspector::new(&document.pages)); + iter += 1; if iter >= 5 || introspector.validate(&constraint) { break; } } + // Drop the introspector. + ManuallyDrop::into_inner(introspector); + Ok(document) } @@ -73,19 +88,7 @@ pub struct Vt<'a> { /// The tracer for inspection of the values an expression produces. pub tracer: TrackedMut<'a, Tracer>, /// Provides stable identities to elements. - pub provider: TrackedMut<'a, StabilityProvider>, + pub locator: &'a mut Locator<'a>, /// Provides access to information about the document. pub introspector: Tracked<'a, Introspector>, } - -impl Vt<'_> { - /// Mutably reborrow with a shorter lifetime. - pub fn reborrow_mut(&mut self) -> Vt<'_> { - Vt { - world: self.world, - tracer: TrackedMut::reborrow_mut(&mut self.tracer), - provider: TrackedMut::reborrow_mut(&mut self.provider), - introspector: self.introspector, - } - } -} diff --git a/src/model/realize.rs b/src/model/realize.rs index ee9049ad..01c46b81 100644 --- a/src/model/realize.rs +++ b/src/model/realize.rs @@ -37,7 +37,7 @@ pub fn realize( if target.needs_preparation() { let mut elem = target.clone(); if target.can::() || target.label().is_some() { - let location = vt.provider.locate(hash128(target)); + let location = vt.locator.locate(hash128(target)); elem.set_location(location); } diff --git a/src/model/styles.rs b/src/model/styles.rs index 6757ed5d..efbdb9d1 100644 --- a/src/model/styles.rs +++ b/src/model/styles.rs @@ -8,7 +8,7 @@ use std::sync::Arc; use comemo::Prehashed; use ecow::{eco_format, eco_vec, EcoString, EcoVec}; -use super::{Content, ElemFunc, Element, Introspector, Label, Location, Vt}; +use super::{Content, ElemFunc, Element, Label, Location, Vt}; use crate::diag::{SourceResult, StrResult, Trace, Tracepoint}; use crate::eval::{cast_from_value, Args, Cast, CastInfo, Dict, Func, Regex, Value, Vm}; use crate::model::Locatable; @@ -50,8 +50,7 @@ impl Styles { *self = outer; } - /// Apply one outer styles. Like [`chain_one`](StyleChain::chain_one), but - /// in-place. + /// Apply one outer styles. pub fn apply_one(&mut self, outer: Style) { self.0.insert(0, Prehashed::new(outer)); } @@ -322,77 +321,6 @@ impl Selector { } } - /// Matches the selector for an introspector. - pub fn match_iter<'a>( - &'a self, - introspector: &'a Introspector, - ) -> Box + 'a> { - self.match_iter_inner(introspector, introspector.all()) - } - - /// Match the selector against the given list of elements. Returns an - /// iterator over the matching elements. - fn match_iter_inner<'a>( - &'a self, - introspector: &'a Introspector, - parent: impl Iterator + 'a, - ) -> Box + 'a> { - match self { - Self::Location(location) => { - Box::new(introspector.location(location).into_iter()) - } - Self::Or(selectors) => Box::new(parent.filter(|element| { - selectors.iter().any(|selector| { - selector - .match_iter_inner(introspector, std::iter::once(element.clone())) - .next() - .is_some() - }) - })), - Self::And(selectors) => Box::new(parent.filter(|element| { - selectors.iter().all(|selector| { - selector - .match_iter_inner(introspector, std::iter::once(element.clone())) - .next() - .is_some() - }) - })), - Self::Before { selector, end: location, inclusive } => { - if let Some(content) = introspector.query_first(location) { - let loc = content.location().unwrap(); - Box::new(selector.match_iter_inner(introspector, parent).take_while( - move |elem| { - introspector.is_before( - elem.location().unwrap(), - loc, - *inclusive, - ) - }, - )) - } else { - Box::new(selector.match_iter_inner(introspector, parent)) - } - } - Self::After { selector, start: location, inclusive } => { - if let Some(content) = introspector.query_first(location) { - let loc = content.location().unwrap(); - Box::new(selector.match_iter_inner(introspector, parent).skip_while( - move |elem| { - introspector.is_before( - elem.location().unwrap(), - loc, - !*inclusive, - ) - }, - )) - } else { - Box::new(std::iter::empty()) - } - } - other => Box::new(parent.filter(move |content| other.matches(content))), - } - } - /// Whether the selector matches for the target. pub fn matches(&self, target: &Content) -> bool { match self { @@ -412,6 +340,7 @@ impl Selector { Self::Or(selectors) => selectors.iter().any(move |sel| sel.matches(target)), Self::And(selectors) => selectors.iter().all(move |sel| sel.matches(target)), Self::Location(location) => target.location() == Some(*location), + // Not supported here. Self::Before { .. } | Self::After { .. } => false, } } -- cgit v1.2.3