summaryrefslogtreecommitdiff
path: root/src/model/introspect.rs
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2023-05-11 11:27:00 +0200
committerLaurenz <laurmaedje@gmail.com>2023-05-11 11:27:00 +0200
commit2f0b5eeae09bd880e4552bb83e44d9cd32571c58 (patch)
tree6989d423e6bb614b3224be53140e4d649033aeac /src/model/introspect.rs
parent47dff3765de863554ca296448555599fc50d4a8a (diff)
More efficient introspection
Switches from a mutable locator to one based on tracked chains and optimizes query performance.
Diffstat (limited to 'src/model/introspect.rs')
-rw-r--r--src/model/introspect.rs269
1 files changed, 194 insertions, 75 deletions
diff --git a/src/model/introspect.rs b/src/model/introspect.rs
index 5a286ec9..87a17a8e 100644
--- a/src/model/introspect.rs
+++ b/src/model/introspect.rs
@@ -1,7 +1,11 @@
+use std::cell::RefCell;
+use std::collections::HashMap;
use std::fmt::{self, Debug, Formatter};
use std::hash::Hash;
use std::num::NonZeroUsize;
+use comemo::{Prehashed, Track, Tracked, Validate};
+use ecow::EcoVec;
use indexmap::IndexMap;
use super::{Content, Selector};
@@ -12,15 +16,15 @@ use crate::geom::{Point, Transform};
use crate::model::Label;
use crate::util::NonZeroExt;
-/// Stably identifies an element in the document across multiple layout passes.
+/// Uniquely identifies an element in the document across multiple layout passes.
///
-/// This struct is created by [`StabilityProvider::locate`].
+/// This struct is created by [`Locator::locate`].
#[derive(Copy, Clone, Eq, PartialEq, Hash)]
pub struct Location {
/// The hash of the element.
hash: u128,
/// An unique number among elements with the same hash. This is the reason
- /// we need a mutable `StabilityProvider` everywhere.
+ /// we need a `Locator` everywhere.
disambiguator: usize,
/// A synthetic location created from another one. This is used for example
/// in bibliography management to create individual linkable locations for
@@ -46,40 +50,114 @@ cast_from_value! {
Location: "location",
}
-/// Provides stable identities to elements.
-#[derive(Clone, Default)]
-pub struct StabilityProvider {
- hashes: Vec<u128>,
- checkpoints: Vec<usize>,
+/// Provides locations for elements in the document.
+///
+/// A [`Location`] consists of an element's hash plus a disambiguator. Just the
+/// hash is not enough because we can have multiple equal elements with the same
+/// hash (not a hash collision, just equal elements!). Between these, we
+/// disambiguate with an increasing number. In principle, the disambiguator
+/// could just be counted up. However, counting is an impure operation and as
+/// such we can't count across a memoization boundary. [^1]
+///
+/// Instead, we only mutate within a single "layout run" and combine the results
+/// with disambiguators from an outer tracked locator. Thus, the locators form a
+/// "tracked chain". When a layout run ends, its mutations are discarded and, on
+/// the other side of the memoization boundary, we
+/// [reconstruct](Self::visit_frame) them from the resulting [frames](Frame).
+///
+/// [^1]: Well, we could with [`TrackedMut`](comemo::TrackedMut), but the
+/// overhead is quite high, especially since we need to save & undo the counting
+/// when only measuring.
+#[derive(Default)]
+pub struct Locator<'a> {
+ /// Maps from a hash to the maximum number we've seen for this hash. This
+ /// number becomes the `disambiguator`.
+ hashes: RefCell<HashMap<u128, usize>>,
+ /// An outer `Locator`, from which we can get disambiguator for hashes
+ /// outside of the current "layout run".
+ ///
+ /// We need to override the constraint's lifetime here so that `Tracked` is
+ /// covariant over the constraint. If it becomes invariant, we're in for a
+ /// world of lifetime pain.
+ outer: Option<Tracked<'a, Self, <Locator<'static> as Validate>::Constraint>>,
}
-impl StabilityProvider {
- /// Create a new stability provider.
+impl<'a> Locator<'a> {
+ /// Create a new locator.
pub fn new() -> Self {
Self::default()
}
-}
-#[comemo::track]
-impl StabilityProvider {
+ /// Create a new chained locator.
+ pub fn chained(outer: Tracked<'a, Self>) -> Self {
+ Self { outer: Some(outer), ..Default::default() }
+ }
+
+ /// Start tracking this locator.
+ ///
+ /// In comparison to [`Track::track`], this method skips this chain link
+ /// if it does not contribute anything.
+ pub fn track(&self) -> Tracked<'_, Self> {
+ match self.outer {
+ Some(outer) if self.hashes.borrow().is_empty() => outer,
+ _ => Track::track(self),
+ }
+ }
+
/// Produce a stable identifier for this call site.
pub fn locate(&mut self, hash: u128) -> Location {
- let disambiguator = self.hashes.iter().filter(|&&prev| prev == hash).count();
- self.hashes.push(hash);
+ // Get the current disambiguator for this hash.
+ let disambiguator = self.disambiguator_impl(hash);
+
+ // Bump the next disambiguator up by one.
+ self.hashes.borrow_mut().insert(hash, disambiguator + 1);
+
+ // Create the location in its default variant.
Location { hash, disambiguator, variant: 0 }
}
- /// Create a checkpoint of the state that can be restored.
- pub fn save(&mut self) {
- self.checkpoints.push(self.hashes.len());
+ /// Advance past a frame.
+ pub fn visit_frame(&mut self, frame: &Frame) {
+ for (_, item) in frame.items() {
+ match item {
+ FrameItem::Group(group) => self.visit_frame(&group.frame),
+ FrameItem::Meta(Meta::Elem(elem), _) => {
+ let mut hashes = self.hashes.borrow_mut();
+ let loc = elem.location().unwrap();
+ let entry = hashes.entry(loc.hash).or_default();
+
+ // Next disambiguator needs to be at least one larger than
+ // the maximum we've seen so far.
+ *entry = (*entry).max(loc.disambiguator + 1);
+ }
+ _ => {}
+ }
+ }
}
- /// Restore the last checkpoint.
- pub fn restore(&mut self) {
- if let Some(checkpoint) = self.checkpoints.pop() {
- self.hashes.truncate(checkpoint);
+ /// Advance past a number of frames.
+ pub fn visit_frames<'b>(&mut self, frames: impl IntoIterator<Item = &'b Frame>) {
+ for frame in frames {
+ self.visit_frame(frame);
}
}
+
+ /// The current disambiguator for the given hash.
+ fn disambiguator_impl(&self, hash: u128) -> usize {
+ *self
+ .hashes
+ .borrow_mut()
+ .entry(hash)
+ .or_insert_with(|| self.outer.map_or(0, |outer| outer.disambiguator(hash)))
+ }
+}
+
+#[comemo::track]
+impl<'a> Locator<'a> {
+ /// The current disambiguator for the hash.
+ fn disambiguator(&self, hash: u128) -> usize {
+ self.disambiguator_impl(hash)
+ }
}
/// Can be queried for elements and their positions.
@@ -87,9 +165,14 @@ pub struct Introspector {
/// The number of pages in the document.
pages: usize,
/// All introspectable elements.
- elems: IndexMap<Location, (Content, Position)>,
+ elems: IndexMap<Location, (Prehashed<Content>, Position)>,
/// The page numberings, indexed by page number minus 1.
page_numberings: Vec<Value>,
+ /// Caches queries done on the introspector. This is important because
+ /// even if all top-level queries are distinct, they often have shared
+ /// subqueries. Example: Individual counter queries with `before` that
+ /// all depend on a global counter query.
+ queries: RefCell<HashMap<u128, EcoVec<Prehashed<Content>>>>,
}
impl Introspector {
@@ -100,6 +183,7 @@ impl Introspector {
pages: frames.len(),
elems: IndexMap::new(),
page_numberings: vec![],
+ queries: RefCell::default(),
};
for (i, frame) in frames.iter().enumerate() {
let page = NonZeroUsize::new(1 + i).unwrap();
@@ -108,11 +192,6 @@ impl Introspector {
introspector
}
- /// Iterate over all elements.
- pub fn all(&self) -> impl Iterator<Item = Content> + '_ {
- self.elems.values().map(|(c, _)| c).cloned()
- }
-
/// Extract metadata from a frame.
#[tracing::instrument(skip_all)]
fn extract(&mut self, frame: &Frame, page: NonZeroUsize, ts: Transform) {
@@ -130,7 +209,7 @@ impl Introspector {
let pos = pos.transform(ts);
let ret = self.elems.insert(
content.location().unwrap(),
- (content.clone(), Position { page, point: pos }),
+ (Prehashed::new(content.clone()), Position { page, point: pos }),
);
assert!(ret.is_none(), "duplicate locations");
}
@@ -141,6 +220,23 @@ impl Introspector {
}
}
}
+
+ /// Iterate over all locatable elements.
+ pub fn all(&self) -> impl Iterator<Item = &Prehashed<Content>> + '_ {
+ self.elems.values().map(|(c, _)| c)
+ }
+
+ /// Get an element by its location.
+ fn get(&self, location: &Location) -> Option<&Prehashed<Content>> {
+ self.elems.get(location).map(|(elem, _)| elem)
+ }
+
+ /// Get the index of this element among all.
+ fn index(&self, elem: &Content) -> usize {
+ self.elems
+ .get_index_of(&elem.location().unwrap())
+ .unwrap_or(usize::MAX)
+ }
}
#[comemo::track]
@@ -150,40 +246,75 @@ impl Introspector {
self.pages > 0
}
- /// Get an element from the position cache.
- pub fn location(&self, location: &Location) -> Option<Content> {
- self.elems.get(location).map(|(c, _)| c).cloned()
- }
-
/// Query for all matching elements.
- #[tracing::instrument(skip_all)]
- pub fn query<'a>(&'a self, selector: &'a Selector) -> Vec<Content> {
- match selector {
- Selector::Location(location) => self
- .elems
- .get(location)
- .map(|(content, _)| content)
- .cloned()
- .into_iter()
- .collect(),
- _ => selector.match_iter(self).collect(),
+ pub fn query(&self, selector: &Selector) -> EcoVec<Prehashed<Content>> {
+ let hash = crate::util::hash128(selector);
+ if let Some(output) = self.queries.borrow().get(&hash) {
+ return output.clone();
}
- }
- /// Query for the first matching element.
- #[tracing::instrument(skip_all)]
- pub fn query_first<'a>(&'a self, selector: &'a Selector) -> Option<Content> {
- match selector {
+ let output = match selector {
+ Selector::Elem(..)
+ | Selector::Label(_)
+ | Selector::Regex(_)
+ | Selector::Can(_)
+ | Selector::Or(_)
+ | Selector::And(_) => {
+ self.all().filter(|elem| selector.matches(elem)).cloned().collect()
+ }
+
Selector::Location(location) => {
- self.elems.get(location).map(|(content, _)| content).cloned()
+ self.get(location).cloned().into_iter().collect()
+ }
+ Selector::Before { selector, end, inclusive } => {
+ let mut list = self.query(selector);
+ if let Some(end) = self.query_first(end) {
+ // Determine which elements are before `end`.
+ let split = match list
+ .binary_search_by_key(&self.index(&end), |elem| self.index(elem))
+ {
+ // Element itself is contained.
+ Ok(i) => i + *inclusive as usize,
+ // Element itself is not contained.
+ Err(i) => i,
+ };
+ list = list[..split].into();
+ }
+ list
+ }
+ Selector::After { selector, start, inclusive } => {
+ let mut list = self.query(selector);
+ if let Some(start) = self.query_first(start) {
+ // Determine which elements are after `start`.
+ let split = match list
+ .binary_search_by_key(&self.index(&start), |elem| {
+ self.index(elem)
+ }) {
+ // Element itself is contained.
+ Ok(i) => i + !*inclusive as usize,
+ // Element itself is not contained.
+ Err(i) => i,
+ };
+ list = list[split..].into();
+ }
+ list
}
- _ => selector.match_iter(self).next(),
+ };
+
+ self.queries.borrow_mut().insert(hash, output.clone());
+ output
+ }
+
+ /// Query for the first element that matches the selector.
+ pub fn query_first(&self, selector: &Selector) -> Option<Prehashed<Content>> {
+ match selector {
+ Selector::Location(location) => self.get(location).cloned(),
+ _ => self.query(selector).first().cloned(),
}
}
/// Query for a unique element with the label.
- #[tracing::instrument(skip(self))]
- pub fn query_label(&self, label: &Label) -> StrResult<Content> {
+ pub fn query_label(&self, label: &Label) -> StrResult<Prehashed<Content>> {
let mut found = None;
for elem in self.all().filter(|elem| elem.label() == Some(label)) {
if found.is_some() {
@@ -199,40 +330,28 @@ impl Introspector {
NonZeroUsize::new(self.pages).unwrap_or(NonZeroUsize::ONE)
}
- /// Find the page number for the given location.
- pub fn page(&self, location: Location) -> NonZeroUsize {
- self.position(location).page
- }
-
/// Gets the page numbering for the given location, if any.
- #[tracing::instrument(skip(self))]
pub fn page_numbering(&self, location: Location) -> Value {
let page = self.page(location);
self.page_numberings.get(page.get() - 1).cloned().unwrap_or_default()
}
+ /// Find the page number for the given location.
+ pub fn page(&self, location: Location) -> NonZeroUsize {
+ self.position(location).page
+ }
+
/// Find the position for the given location.
- #[tracing::instrument(skip(self))]
pub fn position(&self, location: Location) -> Position {
self.elems
.get(&location)
.map(|(_, loc)| *loc)
.unwrap_or(Position { page: NonZeroUsize::ONE, point: Point::zero() })
}
+}
- /// Checks whether `a` is before `b` in the document.
- pub fn is_before(&self, a: Location, b: Location, inclusive: bool) -> bool {
- let a = self.elems.get_index_of(&a).unwrap();
- let b = self.elems.get_index_of(&b).unwrap();
- if inclusive {
- a <= b
- } else {
- a < b
- }
- }
-
- /// Checks whether `a` is after `b` in the document.
- pub fn is_after(&self, a: Location, b: Location, inclusive: bool) -> bool {
- !self.is_before(a, b, !inclusive)
+impl Default for Introspector {
+ fn default() -> Self {
+ Self::new(&[])
}
}