summaryrefslogtreecommitdiff
path: root/crates/typst-realize
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2025-01-24 13:11:26 +0100
committerGitHub <noreply@github.com>2025-01-24 12:11:26 +0000
commit26e65bfef5b1da7f6c72e1409237cf03fb5d6069 (patch)
treedae6f71efead43736202dd6aea933b95b1bc7a14 /crates/typst-realize
parent467968af0788a3059e1bed47f9daee846f5b3904 (diff)
Semantic paragraphs (#5746)
Diffstat (limited to 'crates/typst-realize')
-rw-r--r--crates/typst-realize/src/lib.rs155
1 files changed, 112 insertions, 43 deletions
diff --git a/crates/typst-realize/src/lib.rs b/crates/typst-realize/src/lib.rs
index ff42c3e9..754e89aa 100644
--- a/crates/typst-realize/src/lib.rs
+++ b/crates/typst-realize/src/lib.rs
@@ -15,8 +15,8 @@ use typst_library::diag::{bail, At, SourceResult};
use typst_library::engine::Engine;
use typst_library::foundations::{
Content, Context, ContextElem, Element, NativeElement, Recipe, RecipeIndex, Selector,
- SequenceElem, Show, ShowSet, Style, StyleChain, StyleVec, StyledElem, Styles,
- SymbolElem, Synthesize, Transformation,
+ SequenceElem, Show, ShowSet, Style, StyleChain, StyledElem, Styles, SymbolElem,
+ Synthesize, Transformation,
};
use typst_library::html::{tag, HtmlElem};
use typst_library::introspection::{Locatable, SplitLocator, Tag, TagElem};
@@ -28,7 +28,7 @@ use typst_library::model::{
CiteElem, CiteGroup, DocumentElem, EnumElem, ListElem, ListItemLike, ListLike,
ParElem, ParbreakElem, TermsElem,
};
-use typst_library::routines::{Arenas, Pair, RealizationKind};
+use typst_library::routines::{Arenas, FragmentKind, Pair, RealizationKind};
use typst_library::text::{LinebreakElem, SmartQuoteElem, SpaceElem, TextElem};
use typst_syntax::Span;
use typst_utils::{SliceExt, SmallBitSet};
@@ -48,17 +48,18 @@ pub fn realize<'a>(
locator,
arenas,
rules: match kind {
- RealizationKind::LayoutDocument(_) | RealizationKind::LayoutFragment => {
- LAYOUT_RULES
- }
+ RealizationKind::LayoutDocument(_) => LAYOUT_RULES,
+ RealizationKind::LayoutFragment(_) => LAYOUT_RULES,
+ RealizationKind::LayoutPar => LAYOUT_PAR_RULES,
RealizationKind::HtmlDocument(_) => HTML_DOCUMENT_RULES,
- RealizationKind::HtmlFragment => HTML_FRAGMENT_RULES,
+ RealizationKind::HtmlFragment(_) => HTML_FRAGMENT_RULES,
RealizationKind::Math => MATH_RULES,
},
sink: vec![],
groupings: ArrayVec::new(),
outside: matches!(kind, RealizationKind::LayoutDocument(_)),
may_attach: false,
+ saw_parbreak: false,
kind,
};
@@ -98,6 +99,8 @@ struct State<'a, 'x, 'y, 'z> {
outside: bool,
/// Whether now following attach spacing can survive.
may_attach: bool,
+ /// Whether we visited any paragraph breaks.
+ saw_parbreak: bool,
}
/// Defines a rule for how certain elements shall be grouped during realization.
@@ -125,6 +128,10 @@ struct GroupingRule {
struct Grouping<'a> {
/// The position in `s.sink` where the group starts.
start: usize,
+ /// Only applies to `PAR` grouping: Whether this paragraph group is
+ /// interrupted, but not yet finished because it may be ignored due to being
+ /// fully inline.
+ interrupted: bool,
/// The rule used for this grouping.
rule: &'a GroupingRule,
}
@@ -575,19 +582,21 @@ fn visit_styled<'a>(
for style in local.iter() {
let Some(elem) = style.element() else { continue };
if elem == DocumentElem::elem() {
- match &mut s.kind {
- RealizationKind::LayoutDocument(info)
- | RealizationKind::HtmlDocument(info) => info.populate(&local),
- _ => bail!(
+ if let Some(info) = s.kind.as_document_mut() {
+ info.populate(&local)
+ } else {
+ bail!(
style.span(),
"document set rules are not allowed inside of containers"
- ),
+ );
}
} else if elem == PageElem::elem() {
- let RealizationKind::LayoutDocument(_) = s.kind else {
- let span = style.span();
- bail!(span, "page configuration is not allowed inside of containers");
- };
+ if !matches!(s.kind, RealizationKind::LayoutDocument(_)) {
+ bail!(
+ style.span(),
+ "page configuration is not allowed inside of containers"
+ );
+ }
// When there are page styles, we "break free" from our show rule cage.
pagebreak = true;
@@ -650,7 +659,9 @@ fn visit_grouping_rules<'a>(
}
// If the element can be added to the active grouping, do it.
- if (active.rule.trigger)(content, &s.kind) || (active.rule.inner)(content) {
+ if !active.interrupted
+ && ((active.rule.trigger)(content, &s.kind) || (active.rule.inner)(content))
+ {
s.sink.push((content, styles));
return Ok(true);
}
@@ -661,7 +672,7 @@ fn visit_grouping_rules<'a>(
// Start a new grouping.
if let Some(rule) = matching {
let start = s.sink.len();
- s.groupings.push(Grouping { start, rule });
+ s.groupings.push(Grouping { start, rule, interrupted: false });
s.sink.push((content, styles));
return Ok(true);
}
@@ -676,22 +687,24 @@ fn visit_filter_rules<'a>(
content: &'a Content,
styles: StyleChain<'a>,
) -> SourceResult<bool> {
- if content.is::<SpaceElem>()
- && !matches!(s.kind, RealizationKind::Math | RealizationKind::HtmlFragment)
- {
- // Outside of maths, spaces that were not collected by the paragraph
- // grouper don't interest us.
+ if matches!(s.kind, RealizationKind::LayoutPar | RealizationKind::Math) {
+ return Ok(false);
+ }
+
+ if content.is::<SpaceElem>() {
+ // Outside of maths and paragraph realization, spaces that were not
+ // collected by the paragraph grouper don't interest us.
return Ok(true);
} else if content.is::<ParbreakElem>() {
// Paragraph breaks are only a boundary for paragraph grouping, we don't
// need to store them.
s.may_attach = false;
+ s.saw_parbreak = true;
return Ok(true);
} else if !s.may_attach
&& content.to_packed::<VElem>().is_some_and(|elem| elem.attach(styles))
{
- // Delete attach spacing collapses if not immediately following a
- // paragraph.
+ // Attach spacing collapses if not immediately following a paragraph.
return Ok(true);
}
@@ -703,7 +716,18 @@ fn visit_filter_rules<'a>(
/// Finishes all grouping.
fn finish(s: &mut State) -> SourceResult<()> {
- finish_grouping_while(s, |s| !s.groupings.is_empty())?;
+ finish_grouping_while(s, |s| {
+ // If this is a fragment realization and all we've got is inline
+ // content, don't turn it into a paragraph.
+ if is_fully_inline(s) {
+ *s.kind.as_fragment_mut().unwrap() = FragmentKind::Inline;
+ s.groupings.pop();
+ collapse_spaces(&mut s.sink, 0);
+ false
+ } else {
+ !s.groupings.is_empty()
+ }
+ })?;
// In math, spaces are top-level.
if let RealizationKind::Math = s.kind {
@@ -722,6 +746,12 @@ fn finish_interrupted(s: &mut State, local: &Styles) -> SourceResult<()> {
}
finish_grouping_while(s, |s| {
s.groupings.iter().any(|grouping| (grouping.rule.interrupt)(elem))
+ && if is_fully_inline(s) {
+ s.groupings[0].interrupted = true;
+ false
+ } else {
+ true
+ }
})?;
last = Some(elem);
}
@@ -729,9 +759,9 @@ fn finish_interrupted(s: &mut State, local: &Styles) -> SourceResult<()> {
}
/// Finishes groupings while `f` returns `true`.
-fn finish_grouping_while<F>(s: &mut State, f: F) -> SourceResult<()>
+fn finish_grouping_while<F>(s: &mut State, mut f: F) -> SourceResult<()>
where
- F: Fn(&State) -> bool,
+ F: FnMut(&mut State) -> bool,
{
// Finishing of a group may result in new content and new grouping. This
// can, in theory, go on for a bit. To prevent it from becoming an infinite
@@ -750,7 +780,7 @@ where
/// Finishes the currently innermost grouping.
fn finish_innermost_grouping(s: &mut State) -> SourceResult<()> {
// The grouping we are interrupting.
- let Grouping { start, rule } = s.groupings.pop().unwrap();
+ let Grouping { start, rule, .. } = s.groupings.pop().unwrap();
// Trim trailing non-trigger elements.
let trimmed = s.sink[start..].trim_end_matches(|(c, _)| !(rule.trigger)(c, &s.kind));
@@ -794,12 +824,16 @@ const MAX_GROUP_NESTING: usize = 3;
/// Grouping rules used in layout realization.
static LAYOUT_RULES: &[&GroupingRule] = &[&TEXTUAL, &PAR, &CITES, &LIST, &ENUM, &TERMS];
+/// Grouping rules used in paragraph layout realization.
+static LAYOUT_PAR_RULES: &[&GroupingRule] = &[&TEXTUAL, &CITES, &LIST, &ENUM, &TERMS];
+
/// Grouping rules used in HTML root realization.
static HTML_DOCUMENT_RULES: &[&GroupingRule] =
&[&TEXTUAL, &PAR, &CITES, &LIST, &ENUM, &TERMS];
/// Grouping rules used in HTML fragment realization.
-static HTML_FRAGMENT_RULES: &[&GroupingRule] = &[&TEXTUAL, &CITES, &LIST, &ENUM, &TERMS];
+static HTML_FRAGMENT_RULES: &[&GroupingRule] =
+ &[&TEXTUAL, &PAR, &CITES, &LIST, &ENUM, &TERMS];
/// Grouping rules used in math realization.
static MATH_RULES: &[&GroupingRule] = &[&CITES, &LIST, &ENUM, &TERMS];
@@ -836,12 +870,10 @@ static PAR: GroupingRule = GroupingRule {
|| elem == SmartQuoteElem::elem()
|| elem == InlineElem::elem()
|| elem == BoxElem::elem()
- || (matches!(
- kind,
- RealizationKind::HtmlDocument(_) | RealizationKind::HtmlFragment
- ) && content
- .to_packed::<HtmlElem>()
- .is_some_and(|elem| tag::is_inline_by_default(elem.tag)))
+ || (kind.is_html()
+ && content
+ .to_packed::<HtmlElem>()
+ .is_some_and(|elem| tag::is_inline_by_default(elem.tag)))
},
inner: |content| content.elem() == SpaceElem::elem(),
interrupt: |elem| elem == ParElem::elem() || elem == AlignElem::elem(),
@@ -914,17 +946,31 @@ fn finish_textual(Grouped { s, mut start }: Grouped) -> SourceResult<()> {
// transparently become part of it.
// 2. There is no group at all. In this case, we create one.
if s.groupings.is_empty() && s.rules.iter().any(|&rule| std::ptr::eq(rule, &PAR)) {
- s.groupings.push(Grouping { start, rule: &PAR });
+ s.groupings.push(Grouping { start, rule: &PAR, interrupted: false });
}
Ok(())
}
/// Whether there is an active grouping, but it is not a `PAR` grouping.
-fn in_non_par_grouping(s: &State) -> bool {
- s.groupings
- .last()
- .is_some_and(|grouping| !std::ptr::eq(grouping.rule, &PAR))
+fn in_non_par_grouping(s: &mut State) -> bool {
+ s.groupings.last().is_some_and(|grouping| {
+ !std::ptr::eq(grouping.rule, &PAR) || grouping.interrupted
+ })
+}
+
+/// Whether there is exactly one active grouping, it is a `PAR` grouping, and it
+/// spans the whole sink (with the exception of leading tags).
+fn is_fully_inline(s: &State) -> bool {
+ s.kind.is_fragment()
+ && !s.saw_parbreak
+ && match s.groupings.as_slice() {
+ [grouping] => {
+ std::ptr::eq(grouping.rule, &PAR)
+ && s.sink[..grouping.start].iter().all(|(c, _)| c.is::<TagElem>())
+ }
+ _ => false,
+ }
}
/// Builds the `ParElem` from inline-level elements.
@@ -936,11 +982,11 @@ fn finish_par(mut grouped: Grouped) -> SourceResult<()> {
// Collect the children.
let elems = grouped.get();
let span = select_span(elems);
- let (children, trunk) = StyleVec::create(elems);
+ let (body, trunk) = repack(elems);
// Create and visit the paragraph.
let s = grouped.end();
- let elem = ParElem::new(children).pack().spanned(span);
+ let elem = ParElem::new(body).pack().spanned(span);
visit(s, s.store(elem), trunk)
}
@@ -1277,3 +1323,26 @@ fn destruct_space(buf: &mut [Pair], end: &mut usize, state: &mut SpaceState) {
fn select_span(children: &[Pair]) -> Span {
Span::find(children.iter().map(|(c, _)| c.span()))
}
+
+/// Turn realized content with styles back into owned content and a trunk style
+/// chain.
+fn repack<'a>(buf: &[Pair<'a>]) -> (Content, StyleChain<'a>) {
+ let trunk = StyleChain::trunk(buf.iter().map(|&(_, s)| s)).unwrap_or_default();
+ let depth = trunk.links().count();
+
+ let mut seq = Vec::with_capacity(buf.len());
+
+ for (chain, group) in buf.group_by_key(|&(_, s)| s) {
+ let iter = group.iter().map(|&(c, _)| c.clone());
+ let suffix = chain.suffix(depth);
+ if suffix.is_empty() {
+ seq.extend(iter);
+ } else if let &[(element, _)] = group {
+ seq.push(element.clone().styled_with_map(suffix));
+ } else {
+ seq.push(Content::sequence(iter).styled_with_map(suffix));
+ }
+ }
+
+ (Content::sequence(seq), trunk)
+}