From be7cfc85d08c545abfac08098b7b33b4bd71f37e Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sun, 27 Oct 2024 19:04:55 +0100 Subject: Split out four new crates (#5302) --- crates/typst-realize/Cargo.toml | 29 + crates/typst-realize/src/lib.rs | 1218 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 1247 insertions(+) create mode 100644 crates/typst-realize/Cargo.toml create mode 100644 crates/typst-realize/src/lib.rs (limited to 'crates/typst-realize') diff --git a/crates/typst-realize/Cargo.toml b/crates/typst-realize/Cargo.toml new file mode 100644 index 00000000..78bda259 --- /dev/null +++ b/crates/typst-realize/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "typst-realize" +description = "Typst's realization subsystem." +version = { workspace = true } +rust-version = { workspace = true } +authors = { workspace = true } +edition = { workspace = true } +homepage = { workspace = true } +repository = { workspace = true } +license = { workspace = true } +categories = { workspace = true } +keywords = { workspace = true } +readme = { workspace = true } + +[dependencies] +typst-library = { workspace = true } +typst-macros = { workspace = true } +typst-syntax = { workspace = true } +typst-timing = { workspace = true } +typst-utils = { workspace = true } +arrayvec = { workspace = true } +bumpalo = { workspace = true } +comemo = { workspace = true } +ecow = { workspace = true } +once_cell = { workspace = true } +regex = { workspace = true } + +[lints] +workspace = true diff --git a/crates/typst-realize/src/lib.rs b/crates/typst-realize/src/lib.rs new file mode 100644 index 00000000..58668962 --- /dev/null +++ b/crates/typst-realize/src/lib.rs @@ -0,0 +1,1218 @@ +//! Typst's realization subsystem. +//! +//! *Realization* is the process of recursively applying styling and, in +//! particular, show rules to produce well-known elements that can be processed +//! further. + +use std::borrow::Cow; + +use arrayvec::ArrayVec; +use bumpalo::collections::{String as BumpString, Vec as BumpVec}; +use comemo::Track; +use ecow::EcoString; +use once_cell::unsync::Lazy; +use typst_library::diag::{bail, At, SourceResult}; +use typst_library::engine::Engine; +use typst_library::foundations::{ + Content, Context, ContextElem, Element, NativeElement, Recipe, RecipeIndex, Selector, + SequenceElem, Show, ShowSet, Style, StyleChain, StyleVec, StyledElem, Styles, + Synthesize, Transformation, +}; +use typst_library::introspection::{Locatable, SplitLocator, Tag, TagElem}; +use typst_library::layout::{ + AlignElem, BoxElem, HElem, InlineElem, PageElem, PagebreakElem, VElem, +}; +use typst_library::math::{EquationElem, Mathy}; +use typst_library::model::{ + CiteElem, CiteGroup, DocumentElem, EnumElem, ListElem, ListItemLike, ListLike, + ParElem, ParbreakElem, TermsElem, +}; +use typst_library::routines::{Arenas, Pair, RealizationKind}; +use typst_library::text::{LinebreakElem, SmartQuoteElem, SpaceElem, TextElem}; +use typst_syntax::Span; +use typst_utils::{SliceExt, SmallBitSet}; + +/// Realize content into a flat list of well-known, styled items. +#[typst_macros::time(name = "realize")] +pub fn realize<'a>( + kind: RealizationKind, + engine: &mut Engine, + locator: &mut SplitLocator, + arenas: &'a Arenas, + content: &'a Content, + styles: StyleChain<'a>, +) -> SourceResult>> { + let mut s = State { + engine, + locator, + arenas, + rules: match kind { + RealizationKind::Root(_) | RealizationKind::Container => NORMAL_RULES, + RealizationKind::Math => MATH_RULES, + }, + sink: vec![], + groupings: ArrayVec::new(), + outside: matches!(kind, RealizationKind::Root(_)), + may_attach: false, + kind, + }; + + visit(&mut s, content, styles)?; + finish(&mut s)?; + + Ok(s.sink) +} + +/// Mutable state for realization. +/// +/// Sadly, we need that many lifetimes because &mut references are invariant and +/// it would force the lifetimes of e.g. engine and locator to be equal if they +/// shared a lifetime. We can get around it by enforcing the lifetimes on +/// `fn realize`, but that makes it less flexible on the call site, which isn't +/// worth it. +/// +/// The only interesting lifetime is 'a, which is that of the content that comes +/// in and goes out. It's the same 'a as on `fn realize`. +struct State<'a, 'x, 'y, 'z> { + /// Defines what kind of realization we are performing. + kind: RealizationKind<'x>, + /// The engine. + engine: &'x mut Engine<'y>, + /// Assigns unique locations to elements. + locator: &'x mut SplitLocator<'z>, + /// Temporary storage arenas for lifetime extension during realization. + arenas: &'a Arenas, + /// The output elements of well-known types. + sink: Vec>, + /// Grouping rules used for realization. + rules: &'x [&'x GroupingRule], + /// Currently active groupings. + groupings: ArrayVec, MAX_GROUP_NESTING>, + /// Whether we are currently not within any container or show rule output. + /// This is used to determine page styles during layout. + outside: bool, + /// Whether now following attach spacing can survive. + may_attach: bool, +} + +/// Defines a rule for how certain elements shall be grouped during realization. +struct GroupingRule { + /// When an element is visited that matches a rule with higher priority + /// than one that is currently grouped, we start a nested group. + priority: u8, + /// Whether the grouping handles tags itself. If this is set to `false`, + /// realization will transparently take care of tags and they will not + /// be visible to `finish`. + tags: bool, + /// Defines which kinds of elements start and make up this kind of grouping. + trigger: fn(Element) -> bool, + /// Defines elements that may appear in the interior of the grouping, but + /// not at the edges. + inner: fn(Element) -> bool, + /// Defines whether styles for this kind of element interrupt the grouping. + interrupt: fn(Element) -> bool, + /// Should convert the accumulated elements in `s.sink[start..]` into + /// the grouped element. + finish: fn(Grouped) -> SourceResult<()>, +} + +/// A started grouping of some elements. +struct Grouping<'a> { + /// The position in `s.sink` where the group starts. + start: usize, + /// The rule used for this grouping. + rule: &'a GroupingRule, +} + +/// The result of grouping. +struct Grouped<'a, 'x, 'y, 'z, 's> { + /// The realization state. + s: &'s mut State<'a, 'x, 'y, 'z>, + /// The position in `s.sink` where the group starts. + start: usize, +} + +/// What to do with an element when encountering it during realization. +struct Verdict<'a> { + /// Whether the element is already prepared (i.e. things that should only + /// happen once have happened). + prepared: bool, + /// A map of styles to apply to the element. + map: Styles, + /// An optional show rule transformation to apply to the element. + step: Option>, +} + +/// A show rule transformation to apply to the element. +enum ShowStep<'a> { + /// A user-defined transformational show rule. + Recipe(&'a Recipe, RecipeIndex), + /// The built-in show rule. + Builtin, +} + +/// A match of a regex show rule. +struct RegexMatch<'a> { + /// The offset in the string that matched. + offset: usize, + /// The text that matched. + text: EcoString, + /// The style chain of the matching grouping. + styles: StyleChain<'a>, + /// The index of the recipe that matched. + id: RecipeIndex, + /// The recipe that matched. + recipe: &'a Recipe, +} + +/// State kept for space collapsing. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +enum SpaceState { + /// A following space will be collapsed. + Destructive, + /// A following space will be kept unless a destructive element follows. + Supportive, + /// A space exists at this index. + Space(usize), +} + +impl<'a> State<'a, '_, '_, '_> { + /// Lifetime-extends some content. + fn store(&self, content: Content) -> &'a Content { + self.arenas.content.alloc(content) + } + + /// Lifetime-extends some pairs. + /// + /// By using a `BumpVec` instead of a `alloc_slice_copy` we can reuse + /// the space if no other bump allocations have been made by the time + /// the `BumpVec` is dropped. + fn store_slice(&self, pairs: &[Pair<'a>]) -> BumpVec<'a, Pair<'a>> { + let mut vec = BumpVec::new_in(&self.arenas.bump); + vec.extend_from_slice_copy(pairs); + vec + } +} + +impl<'a, 'x, 'y, 'z, 's> Grouped<'a, 'x, 'y, 'z, 's> { + /// Accesses the grouped elements. + fn get(&self) -> &[Pair<'a>] { + &self.s.sink[self.start..] + } + + /// Accesses the grouped elements mutably. + fn get_mut(&mut self) -> (&mut Vec>, usize) { + (&mut self.s.sink, self.start) + } + + /// Removes the grouped elements from the sink and retrieves back the state + /// with which resulting elements can be visited. + fn end(self) -> &'s mut State<'a, 'x, 'y, 'z> { + self.s.sink.truncate(self.start); + self.s + } +} + +/// Handles an arbitrary piece of content during realization. +fn visit<'a>( + s: &mut State<'a, '_, '_, '_>, + content: &'a Content, + styles: StyleChain<'a>, +) -> SourceResult<()> { + // Tags can always simply be pushed. + if content.is::() { + s.sink.push((content, styles)); + return Ok(()); + } + + // Transformations for math content based on the realization kind. Needs + // to happen before show rules. + if visit_math_rules(s, content, styles)? { + return Ok(()); + } + + // Apply show rules and preparation. + if visit_show_rules(s, content, styles)? { + return Ok(()); + } + + // Recurse into sequences. Styled elements and sequences can currently also + // have labels, so this needs to happen before they are handled. + if let Some(sequence) = content.to_packed::() { + for elem in &sequence.children { + visit(s, elem, styles)?; + } + return Ok(()); + } + + // Recurse into styled elements. + if let Some(styled) = content.to_packed::() { + return visit_styled(s, &styled.child, Cow::Borrowed(&styled.styles), styles); + } + + // Apply grouping --- where multiple elements are collected and then + // processed together (typically being transformed into one). + if visit_grouping_rules(s, content, styles)? { + return Ok(()); + } + + // Some elements are skipped based on specific circumstances. + if visit_filter_rules(s, content, styles)? { + return Ok(()); + } + + // No further transformations to apply, so we can finally just push it to + // the output! + s.sink.push((content, styles)); + + Ok(()) +} + +// Handles special cases for math in normal content and nested equations in +// math. +fn visit_math_rules<'a>( + s: &mut State<'a, '_, '_, '_>, + content: &'a Content, + styles: StyleChain<'a>, +) -> SourceResult { + if let RealizationKind::Math = s.kind { + // Transparently recurse into equations nested in math, so that things + // like this work: + // ``` + // #let my = $pi$ + // $ my r^2 $ + // ``` + if let Some(elem) = content.to_packed::() { + visit(s, &elem.body, styles)?; + return Ok(true); + } + + // In normal realization, we apply regex show rules to consecutive + // textual elements via `TEXTUAL` grouping. However, in math, this is + // not desirable, so we just do it on a per-element basis. + if let Some(elem) = content.to_packed::() { + if let Some(m) = find_regex_match_in_str(&elem.text, styles) { + visit_regex_match(s, &[(content, styles)], m)?; + return Ok(true); + } + } + } else { + // Transparently wrap mathy content into equations. + if content.can::() && !content.is::() { + let eq = EquationElem::new(content.clone()).pack().spanned(content.span()); + visit(s, s.store(eq), styles)?; + return Ok(true); + } + } + + Ok(false) +} + +/// Tries to apply show rules to or prepare content. Returns `true` if the +/// element was handled. +fn visit_show_rules<'a>( + s: &mut State<'a, '_, '_, '_>, + content: &'a Content, + styles: StyleChain<'a>, +) -> SourceResult { + // Determines whether and how to proceed with show rule application. + let Some(Verdict { prepared, mut map, step }) = verdict(s.engine, content, styles) + else { + return Ok(false); + }; + + // Create a fresh copy that we can mutate. + let mut output = Cow::Borrowed(content); + + // If the element isn't yet prepared (we're seeing it for the first time), + // prepare it. + let mut tags = None; + if !prepared { + tags = prepare(s.engine, s.locator, output.to_mut(), &mut map, styles)?; + } + + // Apply a show rule step, if there is one. + if let Some(step) = step { + let chained = styles.chain(&map); + let result = match step { + // Apply a user-defined show rule. + ShowStep::Recipe(recipe, guard) => { + let context = Context::new(output.location(), Some(chained)); + recipe.apply( + s.engine, + context.track(), + output.into_owned().guarded(guard), + ) + } + + // Apply a built-in show rule. + ShowStep::Builtin => { + output.with::().unwrap().show(s.engine, chained) + } + }; + + // Errors in show rules don't terminate compilation immediately. We just + // continue with empty content for them and show all errors together, if + // they remain by the end of the introspection loop. + // + // This way, we can ignore errors that only occur in earlier iterations + // and also show more useful errors at once. + output = Cow::Owned(s.engine.delay(result)); + } + + // Lifetime-extend the realized content if necessary. + let realized = match output { + Cow::Borrowed(realized) => realized, + Cow::Owned(realized) => s.store(realized), + }; + + // Push start tag. + let (start, end) = tags.unzip(); + if let Some(tag) = start { + visit(s, s.store(TagElem::packed(tag)), styles)?; + } + + let prev_outside = s.outside; + s.outside &= content.is::(); + s.engine.route.increase(); + s.engine.route.check_show_depth().at(content.span())?; + + visit_styled(s, realized, Cow::Owned(map), styles)?; + + s.outside = prev_outside; + s.engine.route.decrease(); + + // Push end tag. + if let Some(tag) = end { + visit(s, s.store(TagElem::packed(tag)), styles)?; + } + + Ok(true) +} + +/// Inspects a target element and the current styles and determines how to +/// proceed with the styling. +fn verdict<'a>( + engine: &mut Engine, + target: &'a Content, + styles: StyleChain<'a>, +) -> Option> { + let prepared = target.is_prepared(); + let mut map = Styles::new(); + let mut step = None; + + // Do pre-synthesis on a cloned element to be able to match on synthesized + // fields before real synthesis runs (during preparation). It's really + // unfortunate that we have to do this, but otherwise + // `show figure.where(kind: table)` won't work :( + let mut target = target; + let mut slot; + if !prepared && target.can::() { + slot = target.clone(); + slot.with_mut::() + .unwrap() + .synthesize(engine, styles) + .ok(); + target = &slot; + } + + // Lazily computes the total number of recipes in the style chain. We need + // it to determine whether a particular show rule was already applied to the + // `target` previously. For this purpose, show rules are indexed from the + // top of the chain as the chain might grow to the bottom. + let depth = Lazy::new(|| styles.recipes().count()); + + for (r, recipe) in styles.recipes().enumerate() { + // We're not interested in recipes that don't match. + if !recipe + .selector() + .is_some_and(|selector| selector.matches(target, Some(styles))) + { + continue; + } + + // Special handling for show-set rules. + if let Transformation::Style(transform) = recipe.transform() { + if !prepared { + map.apply(transform.clone()); + } + continue; + } + + // If we already have a show step, don't look for one. + if step.is_some() { + continue; + } + + // Check whether this show rule was already applied to the target. + let index = RecipeIndex(*depth - r); + if target.is_guarded(index) { + continue; + } + + // We'll apply this recipe. + step = Some(ShowStep::Recipe(recipe, index)); + + // If we found a show rule and are already prepared, there is nothing + // else to do, so we can just break. If we are not yet prepared, + // continue searching for potential show-set styles. + if prepared { + break; + } + } + + // If we found no user-defined rule, also consider the built-in show rule. + if step.is_none() && target.can::() { + step = Some(ShowStep::Builtin); + } + + // If there's no nothing to do, there is also no verdict. + if step.is_none() + && map.is_empty() + && (prepared || { + target.label().is_none() + && target.location().is_none() + && !target.can::() + && !target.can::() + && !target.can::() + }) + { + return None; + } + + Some(Verdict { prepared, map, step }) +} + +/// This is only executed the first time an element is visited. +fn prepare( + engine: &mut Engine, + locator: &mut SplitLocator, + target: &mut Content, + map: &mut Styles, + styles: StyleChain, +) -> SourceResult> { + // Generate a location for the element, which uniquely identifies it in + // the document. This has some overhead, so we only do it for elements + // that are explicitly marked as locatable and labelled elements. + // + // The element could already have a location even if it is not prepared + // when it stems from a query. + let key = typst_utils::hash128(&target); + if target.location().is_none() + && (target.can::() || target.label().is_some()) + { + let loc = locator.next_location(engine.introspector, key); + target.set_location(loc); + } + + // Apply built-in show-set rules. User-defined show-set rules are already + // considered in the map built while determining the verdict. + if let Some(show_settable) = target.with::() { + map.apply(show_settable.show_set(styles)); + } + + // If necessary, generated "synthesized" fields (which are derived from + // other fields or queries). Do this after show-set so that show-set styles + // are respected. + if let Some(synthesizable) = target.with_mut::() { + synthesizable.synthesize(engine, styles.chain(map))?; + } + + // Copy style chain fields into the element itself, so that they are + // available in rules. + target.materialize(styles.chain(map)); + + // If the element is locatable, create start and end tags to be able to find + // the element in the frames after layout. Do this after synthesis and + // materialization, so that it includes the synthesized fields. Do it before + // marking as prepared so that show-set rules will apply to this element + // when queried. + let tags = target + .location() + .map(|loc| (Tag::Start(target.clone()), Tag::End(loc, key))); + + // Ensure that this preparation only runs once by marking the element as + // prepared. + target.mark_prepared(); + + Ok(tags) +} + +/// Handles a styled element. +fn visit_styled<'a>( + s: &mut State<'a, '_, '_, '_>, + content: &'a Content, + mut local: Cow<'a, Styles>, + outer: StyleChain<'a>, +) -> SourceResult<()> { + // Nothing to do if the styles are actually empty. + if local.is_empty() { + return visit(s, content, outer); + } + + // Check for document and page styles. + let mut pagebreak = false; + for style in local.iter() { + let Some(elem) = style.element() else { continue }; + if elem == DocumentElem::elem() { + let RealizationKind::Root(info) = &mut s.kind else { + let span = style.span(); + bail!(span, "document set rules are not allowed inside of containers"); + }; + + info.populate(&local); + } else if elem == PageElem::elem() { + let RealizationKind::Root(_) = s.kind else { + let span = style.span(); + bail!(span, "page configuration is not allowed inside of containers"); + }; + + // When there are page styles, we "break free" from our show rule cage. + pagebreak = true; + s.outside = true; + } + } + + // If we are not within a container or show rule, mark the styles as + // "outside". This will allow them to be lifted to the page level. + if s.outside { + local = Cow::Owned(local.into_owned().outside()); + } + + // Lifetime-extend the styles if necessary. + let outer = s.arenas.bump.alloc(outer); + let local = match local { + Cow::Borrowed(map) => map, + Cow::Owned(owned) => &*s.arenas.styles.alloc(owned), + }; + + // Generate a weak pagebreak if there is a page interruption. For the + // starting pagebreak we only want the styles before and including the + // interruptions, not trailing styles that happen to be in the same `Styles` + // list, so we trim the local styles. + if pagebreak { + let relevant = local + .as_slice() + .trim_end_matches(|style| style.element() != Some(PageElem::elem())); + visit(s, PagebreakElem::shared_weak(), outer.chain(relevant))?; + } + + finish_interrupted(s, local)?; + visit(s, content, outer.chain(local))?; + finish_interrupted(s, local)?; + + // Generate a weak "boundary" pagebreak at the end. In comparison to a + // normal weak pagebreak, the styles of this are ignored during layout, so + // it doesn't really matter what we use here. + if pagebreak { + visit(s, PagebreakElem::shared_boundary(), *outer)?; + } + + Ok(()) +} + +/// Tries to group the content in an active group or start a new one if any +/// grouping rule matches. Returns `true` if the element was grouped. +fn visit_grouping_rules<'a>( + s: &mut State<'a, '_, '_, '_>, + content: &'a Content, + styles: StyleChain<'a>, +) -> SourceResult { + let elem = content.elem(); + let matching = s.rules.iter().find(|&rule| (rule.trigger)(elem)); + + // Try to continue or finish an existing grouping. + while let Some(active) = s.groupings.last() { + // Start a nested group if a rule with higher priority matches. + if matching.is_some_and(|rule| rule.priority > active.rule.priority) { + break; + } + + // If the element can be added to the active grouping, do it. + if (active.rule.trigger)(elem) || (active.rule.inner)(elem) { + s.sink.push((content, styles)); + return Ok(true); + } + + finish_innermost_grouping(s)?; + } + + // Start a new grouping. + if let Some(rule) = matching { + let start = s.sink.len(); + s.groupings.push(Grouping { start, rule }); + s.sink.push((content, styles)); + return Ok(true); + } + + Ok(false) +} + +/// Some elements don't make it to the sink depending on the realization kind +/// and current state. +fn visit_filter_rules<'a>( + s: &mut State<'a, '_, '_, '_>, + content: &'a Content, + styles: StyleChain<'a>, +) -> SourceResult { + if content.is::() && !matches!(s.kind, RealizationKind::Math) { + // Outside of maths, spaces that were not collected by the paragraph + // grouper don't interest us. + return Ok(true); + } else if content.is::() { + // Paragraph breaks are only a boundary for paragraph grouping, we don't + // need to store them. + s.may_attach = false; + return Ok(true); + } else if !s.may_attach + && content.to_packed::().is_some_and(|elem| elem.attach(styles)) + { + // Delete attach spacing collapses if not immediately following a + // paragraph. + return Ok(true); + } + + // Remember whether following attach spacing can survive. + s.may_attach = content.is::(); + + Ok(false) +} + +/// Finishes all grouping. +fn finish(s: &mut State) -> SourceResult<()> { + finish_grouping_while(s, |s| !s.groupings.is_empty())?; + + // In math, spaces are top-level. + if let RealizationKind::Math = s.kind { + collapse_spaces(&mut s.sink, 0); + } + + Ok(()) +} + +/// Finishes groupings while any active group is interrupted by the styles. +fn finish_interrupted(s: &mut State, local: &Styles) -> SourceResult<()> { + let mut last = None; + for elem in local.iter().filter_map(|style| style.element()) { + if last == Some(elem) { + continue; + } + finish_grouping_while(s, |s| { + s.groupings.iter().any(|grouping| (grouping.rule.interrupt)(elem)) + })?; + last = Some(elem); + } + Ok(()) +} + +/// Finishes groupings while `f` returns `true`. +fn finish_grouping_while(s: &mut State, f: F) -> SourceResult<()> +where + F: Fn(&State) -> bool, +{ + // Finishing of a group may result in new content and new grouping. This + // can, in theory, go on for a bit. To prevent it from becoming an infinite + // loop, we keep track of the iteration count. + let mut i = 0; + while f(s) { + finish_innermost_grouping(s)?; + i += 1; + if i > 512 { + bail!(Span::detached(), "maximum grouping depth exceeded"); + } + } + Ok(()) +} + +/// Finishes the currently innermost grouping. +fn finish_innermost_grouping(s: &mut State) -> SourceResult<()> { + // The grouping we are interrupting. + let Grouping { start, rule } = s.groupings.pop().unwrap(); + + // Trim trailing non-trigger elements. + let trimmed = s.sink[start..].trim_end_matches(|(c, _)| !(rule.trigger)(c.elem())); + let end = start + trimmed.len(); + let tail = s.store_slice(&s.sink[end..]); + s.sink.truncate(end); + + // If the grouping is not interested in tags, remove and collect them. + let mut tags = BumpVec::::new_in(&s.arenas.bump); + if !rule.tags { + let mut k = start; + for i in start..end { + if s.sink[i].0.is::() { + tags.push(s.sink[i]); + continue; + } + + if k < i { + s.sink[k] = s.sink[i]; + } + k += 1; + } + s.sink.truncate(k); + } + + // Execute the grouping's finisher rule. + (rule.finish)(Grouped { s, start })?; + + // Visit the tags and staged elements again. + for &(content, styles) in tags.iter().chain(&tail) { + visit(s, content, styles)?; + } + + Ok(()) +} + +/// The maximum number of nested groups that are possible. Corresponds to the +/// number of unique priority levels. +const MAX_GROUP_NESTING: usize = 3; + +/// Grouping rules used in normal realizations. +static NORMAL_RULES: &[&GroupingRule] = &[&TEXTUAL, &PAR, &CITES, &LIST, &ENUM, &TERMS]; + +/// Grouping rules used in math realization. +static MATH_RULES: &[&GroupingRule] = &[&CITES, &LIST, &ENUM, &TERMS]; + +/// Groups adjacent textual elements for text show rule application. +static TEXTUAL: GroupingRule = GroupingRule { + priority: 3, + tags: true, + trigger: |elem| { + elem == TextElem::elem() + || elem == LinebreakElem::elem() + || elem == SmartQuoteElem::elem() + }, + inner: |elem| elem == SpaceElem::elem(), + // Any kind of style interrupts this kind of grouping since regex show + // rules cannot match over style changes anyway. + interrupt: |_| true, + finish: finish_textual, +}; + +/// Collects inline-level elements into a `ParElem`. +static PAR: GroupingRule = GroupingRule { + priority: 1, + tags: true, + trigger: |elem| { + elem == TextElem::elem() + || elem == HElem::elem() + || elem == LinebreakElem::elem() + || elem == SmartQuoteElem::elem() + || elem == InlineElem::elem() + || elem == BoxElem::elem() + }, + inner: |elem| elem == SpaceElem::elem(), + interrupt: |elem| elem == ParElem::elem() || elem == AlignElem::elem(), + finish: finish_par, +}; + +/// Collects `CiteElem`s into `CiteGroup`s. +static CITES: GroupingRule = GroupingRule { + priority: 2, + tags: false, + trigger: |elem| elem == CiteElem::elem(), + inner: |elem| elem == SpaceElem::elem(), + interrupt: |elem| elem == CiteGroup::elem(), + finish: finish_cites, +}; + +/// Builds a `ListElem` from grouped `ListItems`s. +static LIST: GroupingRule = list_like_grouping::(); + +/// Builds an `EnumElem` from grouped `EnumItem`s. +static ENUM: GroupingRule = list_like_grouping::(); + +/// Builds a `TermsElem` from grouped `TermItem`s. +static TERMS: GroupingRule = list_like_grouping::(); + +/// Collects `ListItemLike` elements into a `ListLike` element. +const fn list_like_grouping() -> GroupingRule { + GroupingRule { + priority: 2, + tags: false, + trigger: |elem| elem == T::Item::elem(), + inner: |elem| elem == SpaceElem::elem() || elem == ParbreakElem::elem(), + interrupt: |elem| elem == T::elem(), + finish: finish_list_like::, + } +} + +/// Processes grouped textual elements. +/// +/// Specifically, it searches for regex matches in grouped textual elements and +/// - if there was a match, visits the results recursively, +/// - if there was no match, tries to simply implicitly use the grouped elements +/// as part of a paragraph grouping, +/// - if that's not possible because another grouping is active, temporarily +/// disables textual grouping and revisits the elements. +fn finish_textual(Grouped { s, mut start }: Grouped) -> SourceResult<()> { + // Try to find a regex match in the grouped textual elements. Returns early + // if there is one. + if visit_textual(s, start)? { + return Ok(()); + } + + // There was no regex match, so we need to collect the text into a paragraph + // grouping. To do that, we first terminate all non-paragraph groupings. + if in_non_par_grouping(s) { + let elems = s.store_slice(&s.sink[start..]); + s.sink.truncate(start); + finish_grouping_while(s, in_non_par_grouping)?; + start = s.sink.len(); + s.sink.extend(elems); + } + + // Now, there are only two options: + // 1. We are already in a paragraph group. In this case, the elements just + // transparently become part of it. + // 2. There is no group at all. In this case, we create one. + if s.groupings.is_empty() { + s.groupings.push(Grouping { start, rule: &PAR }); + } + + Ok(()) +} + +/// Whether there is an active grouping, but it is not a `PAR` grouping. +fn in_non_par_grouping(s: &State) -> bool { + s.groupings + .last() + .is_some_and(|grouping| !std::ptr::eq(grouping.rule, &PAR)) +} + +/// Builds the `ParElem` from inline-level elements. +fn finish_par(mut grouped: Grouped) -> SourceResult<()> { + // Collapse unsupported spaces in-place. + let (sink, start) = grouped.get_mut(); + collapse_spaces(sink, start); + + // Collect the children. + let elems = grouped.get(); + let span = select_span(elems); + let (children, trunk) = StyleVec::create(elems); + + // Create and visit the paragraph. + let s = grouped.end(); + let elem = ParElem::new(children).pack().spanned(span); + visit(s, s.store(elem), trunk) +} + +/// Builds the `CiteGroup` from `CiteElem`s. +fn finish_cites(grouped: Grouped) -> SourceResult<()> { + // Collect the children. + let elems = grouped.get(); + let span = select_span(elems); + let trunk = elems[0].1; + let children = elems + .iter() + .filter_map(|(c, _)| c.to_packed::()) + .cloned() + .collect(); + + // Create and visit the citation group. + let s = grouped.end(); + let elem = CiteGroup::new(children).pack().spanned(span); + visit(s, s.store(elem), trunk) +} + +/// Builds the `ListLike` element from `ListItemLike` elements. +fn finish_list_like(grouped: Grouped) -> SourceResult<()> { + // Collect the children. + let elems = grouped.get(); + let span = select_span(elems); + let tight = !elems.iter().any(|(c, _)| c.is::()); + let styles = elems.iter().filter(|(c, _)| c.is::()).map(|&(_, s)| s); + let trunk = StyleChain::trunk(styles).unwrap(); + let trunk_depth = trunk.links().count(); + let children = elems + .iter() + .copied() + .filter_map(|(c, s)| { + let item = c.to_packed::()?.clone(); + let local = s.suffix(trunk_depth); + Some(T::Item::styled(item, local)) + }) + .collect(); + + // Create and visit the list. + let s = grouped.end(); + let elem = T::create(children, tight).pack().spanned(span); + visit(s, s.store(elem), trunk) +} + +/// Visit textual elements in `s.sink[start..]` and apply regex show rules to +/// them. +fn visit_textual(s: &mut State, start: usize) -> SourceResult { + // Try to find a regex match in the grouped textual elements. + if let Some(m) = find_regex_match_in_elems(s, &s.sink[start..]) { + collapse_spaces(&mut s.sink, start); + let elems = s.store_slice(&s.sink[start..]); + s.sink.truncate(start); + visit_regex_match(s, &elems, m)?; + return Ok(true); + } + + Ok(false) +} + +/// Finds the leftmost regex match for this style chain in the given textual +/// elements. +/// +/// Collects the element's merged textual representation into the bump arena. +/// This merging also takes into account space collapsing so that we don't need +/// to call `collapse_spaces` on every textual group, performing yet another +/// linear pass. We only collapse the spaces elements themselves on the cold +/// path where there is an actual match. +fn find_regex_match_in_elems<'a>( + s: &State, + elems: &[Pair<'a>], +) -> Option> { + let mut buf = BumpString::new_in(&s.arenas.bump); + let mut base = 0; + let mut leftmost = None; + let mut current = StyleChain::default(); + let mut space = SpaceState::Destructive; + + for &(content, styles) in elems { + if content.is::() { + continue; + } + + let linebreak = content.is::(); + if linebreak { + if let SpaceState::Space(_) = space { + buf.pop(); + } + } + + if styles != current && !buf.is_empty() { + leftmost = find_regex_match_in_str(&buf, current); + if leftmost.is_some() { + break; + } + base += buf.len(); + buf.clear(); + } + + current = styles; + space = if content.is::() { + if space != SpaceState::Supportive { + continue; + } + buf.push(' '); + SpaceState::Space(0) + } else if linebreak { + buf.push('\n'); + SpaceState::Destructive + } else if let Some(elem) = content.to_packed::() { + buf.push(if elem.double(styles) { '"' } else { '\'' }); + SpaceState::Supportive + } else if let Some(elem) = content.to_packed::() { + buf.push_str(&elem.text); + SpaceState::Supportive + } else { + panic!("tried to find regex match in non-textual elements"); + }; + } + + if leftmost.is_none() { + leftmost = find_regex_match_in_str(&buf, current); + } + + leftmost.map(|m| RegexMatch { offset: base + m.offset, ..m }) +} + +/// Finds the leftmost regex match for this style chain in the given text. +fn find_regex_match_in_str<'a>( + text: &str, + styles: StyleChain<'a>, +) -> Option> { + let mut r = 0; + let mut revoked = SmallBitSet::new(); + let mut leftmost: Option<(regex::Match, RecipeIndex, &Recipe)> = None; + + let depth = Lazy::new(|| styles.recipes().count()); + + for entry in styles.entries() { + let recipe = match &**entry { + Style::Recipe(recipe) => recipe, + Style::Property(_) => continue, + Style::Revocation(index) => { + revoked.insert(index.0); + continue; + } + }; + r += 1; + + let Some(Selector::Regex(regex)) = recipe.selector() else { continue }; + let Some(m) = regex.find(text) else { continue }; + + // Make sure we don't get any empty matches. + if m.range().is_empty() { + continue; + } + + // If we already have a match that is equally or more to the left, we're + // not interested in this new match. + if leftmost.is_some_and(|(p, ..)| p.start() <= m.start()) { + continue; + } + + // Check whether the rule is already revoked. Do it only now to not + // compute the depth unnecessarily. We subtract 1 from r because we + // already incremented it. + let index = RecipeIndex(*depth - (r - 1)); + if revoked.contains(index.0) { + continue; + } + + leftmost = Some((m, index, recipe)); + } + + leftmost.map(|(m, id, recipe)| RegexMatch { + offset: m.start(), + text: m.as_str().into(), + id, + recipe, + styles, + }) +} + +/// Visit a match of a regular expression. +/// +/// This first revisits all elements before the match, potentially slicing up +/// a text element, then the transformed match, and then the remaining elements +/// after the match. +fn visit_regex_match<'a>( + s: &mut State<'a, '_, '_, '_>, + elems: &[Pair<'a>], + m: RegexMatch<'a>, +) -> SourceResult<()> { + let match_range = m.offset..m.offset + m.text.len(); + let piece = TextElem::packed(m.text); + let context = Context::new(None, Some(m.styles)); + let output = m.recipe.apply(s.engine, context.track(), piece)?; + + let mut cursor = 0; + let mut output = Some(output); + let mut visit_unconsumed_match = |s: &mut State<'a, '_, '_, '_>| -> SourceResult<()> { + if let Some(output) = output.take() { + let revocation = Style::Revocation(m.id).into(); + let outer = s.arenas.bump.alloc(m.styles); + let chained = outer.chain(s.arenas.styles.alloc(revocation)); + visit(s, s.store(output), chained)?; + } + Ok(()) + }; + + for &(content, styles) in elems { + // Just forward tags. + if content.is::() { + visit(s, content, styles)?; + continue; + } + + // At this point, we can have a `TextElem`, `SpaceElem`, + // `LinebreakElem`, or `SmartQuoteElem`. We now determine the range of + // the element. + let len = content.to_packed::().map_or(1, |elem| elem.text.len()); + let elem_range = cursor..cursor + len; + + // If the element starts before the start of match, visit it fully or + // sliced. + if elem_range.start < match_range.start { + if elem_range.end <= match_range.start { + visit(s, content, styles)?; + } else { + let mut elem = content.to_packed::().unwrap().clone(); + elem.text = elem.text[..match_range.start - elem_range.start].into(); + visit(s, s.store(elem.pack()), styles)?; + } + } + + // When the match starts before this element ends, visit it. + if match_range.start < elem_range.end { + visit_unconsumed_match(s)?; + } + + // If the element ends after the end of the match, visit if fully or + // sliced. + if elem_range.end > match_range.end { + if elem_range.start >= match_range.end { + visit(s, content, styles)?; + } else { + let mut elem = content.to_packed::().unwrap().clone(); + elem.text = elem.text[match_range.end - elem_range.start..].into(); + visit(s, s.store(elem.pack()), styles)?; + } + } + + cursor = elem_range.end; + } + + // If the match wasn't consumed yet, visit it. This shouldn't really happen + // in practice (we'd need to have an empty match at the end), but it's an + // extra fail-safe. + visit_unconsumed_match(s)?; + + Ok(()) +} + +/// Collapses all spaces within `buf[start..]` that are at the edges or in the +/// vicinity of destructive elements. +fn collapse_spaces(buf: &mut Vec, start: usize) { + let mut state = SpaceState::Destructive; + let mut k = start; + + // We do one pass over the elements, backshifting everything as necessary + // when a space collapses. The variable `i` is our cursor in the original + // elements. The variable `k` is our cursor in the result. At all times, we + // have `k <= i`, so we can do it in place. + for i in start..buf.len() { + let (content, styles) = buf[i]; + + // Determine the next state. + if content.is::() { + // Nothing to do. + } else if content.is::() { + if state != SpaceState::Supportive { + continue; + } + state = SpaceState::Space(k); + } else if content.is::() { + destruct_space(buf, &mut k, &mut state); + } else if let Some(elem) = content.to_packed::() { + if elem.amount.is_fractional() || elem.weak(styles) { + destruct_space(buf, &mut k, &mut state); + } + } else { + state = SpaceState::Supportive; + }; + + // Copy over normal elements (in place). + if k < i { + buf[k] = buf[i]; + } + k += 1; + } + + destruct_space(buf, &mut k, &mut state); + + // Delete all the excess that's left due to the gaps produced by spaces. + buf.truncate(k); +} + +/// Deletes a preceding space if any. +fn destruct_space(buf: &mut [Pair], end: &mut usize, state: &mut SpaceState) { + if let SpaceState::Space(s) = *state { + buf.copy_within(s + 1..*end, s); + *end -= 1; + } + *state = SpaceState::Destructive; +} + +/// Finds the first non-detached span in the list. +fn select_span(children: &[Pair]) -> Span { + Span::find(children.iter().map(|(c, _)| c.span())) +} -- cgit v1.2.3