summaryrefslogtreecommitdiff
path: root/src/model/introspect.rs
blob: b150fabffeff1a43267871f3aa8522f8f76d585d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
use std::cell::RefCell;
use std::collections::HashMap;
use std::fmt::{self, Debug, Formatter};
use std::hash::Hash;
use std::num::NonZeroUsize;

use comemo::{Prehashed, Track, Tracked, Validate};
use ecow::EcoVec;
use indexmap::IndexMap;

use super::{Content, Selector};
use crate::diag::StrResult;
use crate::doc::{Frame, FrameItem, Meta, Position};
use crate::eval::{cast, Value};
use crate::geom::{Point, Transform};
use crate::model::Label;
use crate::util::NonZeroExt;

/// Identifies the location of an element in the document.
///
/// This struct is created by [`Locator::locate`].
#[derive(Copy, Clone, Eq, PartialEq, Hash)]
pub struct Location {
    /// The hash of the element.
    hash: u128,
    /// An unique number among elements with the same hash. This is the reason
    /// we need a `Locator` everywhere.
    disambiguator: usize,
    /// A synthetic location created from another one. This is used for example
    /// in bibliography management to create individual linkable locations for
    /// reference entries from the bibliography's location.
    variant: usize,
}

impl Location {
    /// Produce a variant of this location.
    pub fn variant(mut self, n: usize) -> Self {
        self.variant = n;
        self
    }
}

impl Debug for Location {
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
        f.pad("..")
    }
}

cast! {
    type Location: "location",
}

/// Provides locations for elements in the document.
///
/// A [`Location`] consists of an element's hash plus a disambiguator. Just the
/// hash is not enough because we can have multiple equal elements with the same
/// hash (not a hash collision, just equal elements!). Between these, we
/// disambiguate with an increasing number. In principle, the disambiguator
/// could just be counted up. However, counting is an impure operation and as
/// such we can't count across a memoization boundary. [^1]
///
/// Instead, we only mutate within a single "layout run" and combine the results
/// with disambiguators from an outer tracked locator. Thus, the locators form a
/// "tracked chain". When a layout run ends, its mutations are discarded and, on
/// the other side of the memoization boundary, we
/// [reconstruct](Self::visit_frame) them from the resulting [frames](Frame).
///
/// [^1]: Well, we could with [`TrackedMut`](comemo::TrackedMut), but the
/// overhead is quite high, especially since we need to save & undo the counting
/// when only measuring.
#[derive(Default)]
pub struct Locator<'a> {
    /// Maps from a hash to the maximum number we've seen for this hash. This
    /// number becomes the `disambiguator`.
    hashes: RefCell<HashMap<u128, usize>>,
    /// An outer `Locator`, from which we can get disambiguator for hashes
    /// outside of the current "layout run".
    ///
    /// We need to override the constraint's lifetime here so that `Tracked` is
    /// covariant over the constraint. If it becomes invariant, we're in for a
    /// world of lifetime pain.
    outer: Option<Tracked<'a, Self, <Locator<'static> as Validate>::Constraint>>,
}

impl<'a> Locator<'a> {
    /// Create a new locator.
    pub fn new() -> Self {
        Self::default()
    }

    /// Create a new chained locator.
    pub fn chained(outer: Tracked<'a, Self>) -> Self {
        Self { outer: Some(outer), ..Default::default() }
    }

    /// Start tracking this locator.
    ///
    /// In comparison to [`Track::track`], this method skips this chain link
    /// if it does not contribute anything.
    pub fn track(&self) -> Tracked<'_, Self> {
        match self.outer {
            Some(outer) if self.hashes.borrow().is_empty() => outer,
            _ => Track::track(self),
        }
    }

    /// Produce a stable identifier for this call site.
    pub fn locate(&mut self, hash: u128) -> Location {
        // Get the current disambiguator for this hash.
        let disambiguator = self.disambiguator_impl(hash);

        // Bump the next disambiguator up by one.
        self.hashes.borrow_mut().insert(hash, disambiguator + 1);

        // Create the location in its default variant.
        Location { hash, disambiguator, variant: 0 }
    }

    /// Advance past a frame.
    pub fn visit_frame(&mut self, frame: &Frame) {
        for (_, item) in frame.items() {
            match item {
                FrameItem::Group(group) => self.visit_frame(&group.frame),
                FrameItem::Meta(Meta::Elem(elem), _) => {
                    let mut hashes = self.hashes.borrow_mut();
                    let loc = elem.location().unwrap();
                    let entry = hashes.entry(loc.hash).or_default();

                    // Next disambiguator needs to be at least one larger than
                    // the maximum we've seen so far.
                    *entry = (*entry).max(loc.disambiguator + 1);
                }
                _ => {}
            }
        }
    }

    /// Advance past a number of frames.
    pub fn visit_frames<'b>(&mut self, frames: impl IntoIterator<Item = &'b Frame>) {
        for frame in frames {
            self.visit_frame(frame);
        }
    }

    /// The current disambiguator for the given hash.
    fn disambiguator_impl(&self, hash: u128) -> usize {
        *self
            .hashes
            .borrow_mut()
            .entry(hash)
            .or_insert_with(|| self.outer.map_or(0, |outer| outer.disambiguator(hash)))
    }
}

#[comemo::track]
impl<'a> Locator<'a> {
    /// The current disambiguator for the hash.
    fn disambiguator(&self, hash: u128) -> usize {
        self.disambiguator_impl(hash)
    }
}

/// Can be queried for elements and their positions.
pub struct Introspector {
    /// The number of pages in the document.
    pages: usize,
    /// All introspectable elements.
    elems: IndexMap<Location, (Prehashed<Content>, Position)>,
    /// The page numberings, indexed by page number minus 1.
    page_numberings: Vec<Value>,
    /// Caches queries done on the introspector. This is important because
    /// even if all top-level queries are distinct, they often have shared
    /// subqueries. Example: Individual counter queries with `before` that
    /// all depend on a global counter query.
    queries: RefCell<HashMap<u128, EcoVec<Prehashed<Content>>>>,
}

impl Introspector {
    /// Create a new introspector.
    #[tracing::instrument(skip(frames))]
    pub fn new(frames: &[Frame]) -> Self {
        let mut introspector = Self {
            pages: frames.len(),
            elems: IndexMap::new(),
            page_numberings: vec![],
            queries: RefCell::default(),
        };
        for (i, frame) in frames.iter().enumerate() {
            let page = NonZeroUsize::new(1 + i).unwrap();
            introspector.extract(frame, page, Transform::identity());
        }
        introspector
    }

    /// Extract metadata from a frame.
    #[tracing::instrument(skip_all)]
    fn extract(&mut self, frame: &Frame, page: NonZeroUsize, ts: Transform) {
        for (pos, item) in frame.items() {
            match item {
                FrameItem::Group(group) => {
                    let ts = ts
                        .pre_concat(Transform::translate(pos.x, pos.y))
                        .pre_concat(group.transform);
                    self.extract(&group.frame, page, ts);
                }
                FrameItem::Meta(Meta::Elem(content), _)
                    if !self.elems.contains_key(&content.location().unwrap()) =>
                {
                    let pos = pos.transform(ts);
                    let ret = self.elems.insert(
                        content.location().unwrap(),
                        (Prehashed::new(content.clone()), Position { page, point: pos }),
                    );
                    assert!(ret.is_none(), "duplicate locations");
                }
                FrameItem::Meta(Meta::PageNumbering(numbering), _) => {
                    self.page_numberings.push(numbering.clone());
                }
                _ => {}
            }
        }
    }

    /// Iterate over all locatable elements.
    pub fn all(&self) -> impl Iterator<Item = &Prehashed<Content>> + '_ {
        self.elems.values().map(|(c, _)| c)
    }

    /// Get an element by its location.
    fn get(&self, location: &Location) -> Option<&Prehashed<Content>> {
        self.elems.get(location).map(|(elem, _)| elem)
    }

    /// Get the index of this element among all.
    fn index(&self, elem: &Content) -> usize {
        self.elems
            .get_index_of(&elem.location().unwrap())
            .unwrap_or(usize::MAX)
    }
}

#[comemo::track]
impl Introspector {
    /// Query for all matching elements.
    pub fn query(&self, selector: &Selector) -> EcoVec<Prehashed<Content>> {
        let hash = crate::util::hash128(selector);
        if let Some(output) = self.queries.borrow().get(&hash) {
            return output.clone();
        }

        let output = match selector {
            Selector::Elem(..)
            | Selector::Label(_)
            | Selector::Regex(_)
            | Selector::Can(_)
            | Selector::Or(_)
            | Selector::And(_) => {
                self.all().filter(|elem| selector.matches(elem)).cloned().collect()
            }

            Selector::Location(location) => {
                self.get(location).cloned().into_iter().collect()
            }
            Selector::Before { selector, end, inclusive } => {
                let mut list = self.query(selector);
                if let Some(end) = self.query_first(end) {
                    // Determine which elements are before `end`.
                    let split = match list
                        .binary_search_by_key(&self.index(&end), |elem| self.index(elem))
                    {
                        // Element itself is contained.
                        Ok(i) => i + *inclusive as usize,
                        // Element itself is not contained.
                        Err(i) => i,
                    };
                    list = list[..split].into();
                }
                list
            }
            Selector::After { selector, start, inclusive } => {
                let mut list = self.query(selector);
                if let Some(start) = self.query_first(start) {
                    // Determine which elements are after `start`.
                    let split = match list
                        .binary_search_by_key(&self.index(&start), |elem| {
                            self.index(elem)
                        }) {
                        // Element itself is contained.
                        Ok(i) => i + !*inclusive as usize,
                        // Element itself is not contained.
                        Err(i) => i,
                    };
                    list = list[split..].into();
                }
                list
            }
        };

        self.queries.borrow_mut().insert(hash, output.clone());
        output
    }

    /// Query for the first element that matches the selector.
    pub fn query_first(&self, selector: &Selector) -> Option<Prehashed<Content>> {
        match selector {
            Selector::Location(location) => self.get(location).cloned(),
            _ => self.query(selector).first().cloned(),
        }
    }

    /// Query for a unique element with the label.
    pub fn query_label(&self, label: &Label) -> StrResult<Prehashed<Content>> {
        let mut found = None;
        for elem in self.all().filter(|elem| elem.label() == Some(label)) {
            if found.is_some() {
                return Err("label occurs multiple times in the document".into());
            }
            found = Some(elem.clone());
        }
        found.ok_or_else(|| "label does not exist in the document".into())
    }

    /// The total number pages.
    pub fn pages(&self) -> NonZeroUsize {
        NonZeroUsize::new(self.pages).unwrap_or(NonZeroUsize::ONE)
    }

    /// Gets the page numbering for the given location, if any.
    pub fn page_numbering(&self, location: Location) -> Value {
        let page = self.page(location);
        self.page_numberings.get(page.get() - 1).cloned().unwrap_or_default()
    }

    /// Find the page number for the given location.
    pub fn page(&self, location: Location) -> NonZeroUsize {
        self.position(location).page
    }

    /// Find the position for the given location.
    pub fn position(&self, location: Location) -> Position {
        self.elems
            .get(&location)
            .map(|(_, loc)| *loc)
            .unwrap_or(Position { page: NonZeroUsize::ONE, point: Point::zero() })
    }
}

impl Default for Introspector {
    fn default() -> Self {
        Self::new(&[])
    }
}