summaryrefslogtreecommitdiff
path: root/crates/typst-layout/src/inline
diff options
context:
space:
mode:
Diffstat (limited to 'crates/typst-layout/src/inline')
-rw-r--r--crates/typst-layout/src/inline/box.rs87
-rw-r--r--crates/typst-layout/src/inline/collect.rs328
-rw-r--r--crates/typst-layout/src/inline/deco.rs213
-rw-r--r--crates/typst-layout/src/inline/finalize.rs36
-rw-r--r--crates/typst-layout/src/inline/line.rs750
-rw-r--r--crates/typst-layout/src/inline/linebreak.rs980
-rw-r--r--crates/typst-layout/src/inline/mod.rs105
-rw-r--r--crates/typst-layout/src/inline/prepare.rs196
-rw-r--r--crates/typst-layout/src/inline/shaping.rs1175
9 files changed, 3870 insertions, 0 deletions
diff --git a/crates/typst-layout/src/inline/box.rs b/crates/typst-layout/src/inline/box.rs
new file mode 100644
index 00000000..30572e4e
--- /dev/null
+++ b/crates/typst-layout/src/inline/box.rs
@@ -0,0 +1,87 @@
+use once_cell::unsync::Lazy;
+use typst_library::diag::SourceResult;
+use typst_library::engine::Engine;
+use typst_library::foundations::{Packed, StyleChain};
+use typst_library::introspection::Locator;
+use typst_library::layout::{BoxElem, Frame, FrameKind, Size};
+use typst_library::visualize::Stroke;
+use typst_utils::Numeric;
+
+use crate::flow::unbreakable_pod;
+use crate::shapes::{clip_rect, fill_and_stroke};
+
+/// Lay out a box as part of a paragraph.
+#[typst_macros::time(name = "box", span = elem.span())]
+pub fn layout_box(
+ elem: &Packed<BoxElem>,
+ engine: &mut Engine,
+ locator: Locator,
+ styles: StyleChain,
+ region: Size,
+) -> SourceResult<Frame> {
+ // Fetch sizing properties.
+ let width = elem.width(styles);
+ let height = elem.height(styles);
+ let inset = elem.inset(styles).unwrap_or_default();
+
+ // Build the pod region.
+ let pod = unbreakable_pod(&width, &height.into(), &inset, styles, region);
+
+ // Layout the body.
+ let mut frame = match elem.body(styles) {
+ // If we have no body, just create an empty frame. If necessary,
+ // its size will be adjusted below.
+ None => Frame::hard(Size::zero()),
+
+ // If we have a child, layout it into the body. Boxes are boundaries
+ // for gradient relativeness, so we set the `FrameKind` to `Hard`.
+ Some(body) => crate::layout_frame(engine, body, locator, styles, pod)?
+ .with_kind(FrameKind::Hard),
+ };
+
+ // Enforce a correct frame size on the expanded axes. Do this before
+ // applying the inset, since the pod shrunk.
+ frame.set_size(pod.expand.select(pod.size, frame.size()));
+
+ // Apply the inset.
+ if !inset.is_zero() {
+ crate::pad::grow(&mut frame, &inset);
+ }
+
+ // Prepare fill and stroke.
+ let fill = elem.fill(styles);
+ let stroke = elem
+ .stroke(styles)
+ .unwrap_or_default()
+ .map(|s| s.map(Stroke::unwrap_or_default));
+
+ // Only fetch these if necessary (for clipping or filling/stroking).
+ let outset = Lazy::new(|| elem.outset(styles).unwrap_or_default());
+ let radius = Lazy::new(|| elem.radius(styles).unwrap_or_default());
+
+ // Clip the contents, if requested.
+ if elem.clip(styles) {
+ let size = frame.size() + outset.relative_to(frame.size()).sum_by_axis();
+ frame.clip(clip_rect(size, &radius, &stroke));
+ }
+
+ // Add fill and/or stroke.
+ if fill.is_some() || stroke.iter().any(Option::is_some) {
+ fill_and_stroke(&mut frame, fill, &stroke, &outset, &radius, elem.span());
+ }
+
+ // Assign label to the frame.
+ if let Some(label) = elem.label() {
+ frame.label(label);
+ }
+
+ // Apply baseline shift. Do this after setting the size and applying the
+ // inset, so that a relative shift is resolved relative to the final
+ // height.
+ let shift = elem.baseline(styles).relative_to(frame.height());
+ if !shift.is_zero() {
+ frame.set_baseline(frame.baseline() - shift);
+ }
+
+ Ok(frame)
+}
diff --git a/crates/typst-layout/src/inline/collect.rs b/crates/typst-layout/src/inline/collect.rs
new file mode 100644
index 00000000..fbcddee5
--- /dev/null
+++ b/crates/typst-layout/src/inline/collect.rs
@@ -0,0 +1,328 @@
+use typst_library::diag::bail;
+use typst_library::foundations::{Packed, Resolve};
+use typst_library::introspection::{SplitLocator, Tag, TagElem};
+use typst_library::layout::{
+ Abs, AlignElem, BoxElem, Dir, Fr, Frame, HElem, InlineElem, InlineItem, Sizing,
+ Spacing,
+};
+use typst_library::text::{
+ is_default_ignorable, LinebreakElem, SmartQuoteElem, SmartQuoter, SmartQuotes,
+ SpaceElem, TextElem,
+};
+use typst_syntax::Span;
+use typst_utils::Numeric;
+
+use super::*;
+
+// The characters by which spacing, inline content and pins are replaced in the
+// paragraph's full text.
+const SPACING_REPLACE: &str = " "; // Space
+const OBJ_REPLACE: &str = "\u{FFFC}"; // Object Replacement Character
+
+// Unicode BiDi control characters.
+const LTR_EMBEDDING: &str = "\u{202A}";
+const RTL_EMBEDDING: &str = "\u{202B}";
+const POP_EMBEDDING: &str = "\u{202C}";
+const LTR_ISOLATE: &str = "\u{2066}";
+const POP_ISOLATE: &str = "\u{2069}";
+
+/// A prepared item in a paragraph layout.
+#[derive(Debug)]
+pub enum Item<'a> {
+ /// A shaped text run with consistent style and direction.
+ Text(ShapedText<'a>),
+ /// Absolute spacing between other items, and whether it is weak.
+ Absolute(Abs, bool),
+ /// Fractional spacing between other items.
+ Fractional(Fr, Option<(&'a Packed<BoxElem>, Locator<'a>, StyleChain<'a>)>),
+ /// Layouted inline-level content.
+ Frame(Frame, StyleChain<'a>),
+ /// A tag.
+ Tag(&'a Tag),
+ /// An item that is invisible and needs to be skipped, e.g. a Unicode
+ /// isolate.
+ Skip(&'static str),
+}
+
+impl<'a> Item<'a> {
+ /// If this a text item, return it.
+ pub fn text(&self) -> Option<&ShapedText<'a>> {
+ match self {
+ Self::Text(shaped) => Some(shaped),
+ _ => None,
+ }
+ }
+
+ /// If this a text item, return it mutably.
+ pub fn text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
+ match self {
+ Self::Text(shaped) => Some(shaped),
+ _ => None,
+ }
+ }
+
+ /// Return the textual representation of this item: Either just itself (for
+ /// a text item) or a replacement string (for any other item).
+ pub fn textual(&self) -> &str {
+ match self {
+ Self::Text(shaped) => shaped.text,
+ Self::Absolute(_, _) | Self::Fractional(_, _) => SPACING_REPLACE,
+ Self::Frame(_, _) => OBJ_REPLACE,
+ Self::Tag(_) => "",
+ Self::Skip(s) => s,
+ }
+ }
+
+ /// The text length of the item.
+ pub fn textual_len(&self) -> usize {
+ self.textual().len()
+ }
+
+ /// The natural layouted width of the item.
+ pub fn natural_width(&self) -> Abs {
+ match self {
+ Self::Text(shaped) => shaped.width,
+ Self::Absolute(v, _) => *v,
+ Self::Frame(frame, _) => frame.width(),
+ Self::Fractional(_, _) | Self::Tag(_) => Abs::zero(),
+ Self::Skip(_) => Abs::zero(),
+ }
+ }
+}
+
+/// An item or not-yet shaped text. We can't shape text until we have collected
+/// all items because only then we can compute BiDi, and we need to split shape
+/// runs at level boundaries.
+#[derive(Debug)]
+pub enum Segment<'a> {
+ /// One or multiple collapsed text children. Stores how long the segment is
+ /// (in bytes of the full text string).
+ Text(usize, StyleChain<'a>),
+ /// An already prepared item.
+ Item(Item<'a>),
+}
+
+impl Segment<'_> {
+ /// The text length of the item.
+ pub fn textual_len(&self) -> usize {
+ match self {
+ Self::Text(len, _) => *len,
+ Self::Item(item) => item.textual_len(),
+ }
+ }
+}
+
+/// Collects all text of the paragraph into one string and a collection of
+/// segments that correspond to pieces of that string. This also performs
+/// string-level preprocessing like case transformations.
+#[typst_macros::time]
+pub fn collect<'a>(
+ children: &'a StyleVec,
+ engine: &mut Engine<'_>,
+ locator: &mut SplitLocator<'a>,
+ styles: &'a StyleChain<'a>,
+ region: Size,
+ consecutive: bool,
+) -> SourceResult<(String, Vec<Segment<'a>>, SpanMapper)> {
+ let mut collector = Collector::new(2 + children.len());
+ let mut quoter = SmartQuoter::new();
+
+ let outer_dir = TextElem::dir_in(*styles);
+ let first_line_indent = ParElem::first_line_indent_in(*styles);
+ if !first_line_indent.is_zero()
+ && consecutive
+ && AlignElem::alignment_in(*styles).resolve(*styles).x == outer_dir.start().into()
+ {
+ collector.push_item(Item::Absolute(first_line_indent.resolve(*styles), false));
+ collector.spans.push(1, Span::detached());
+ }
+
+ let hang = ParElem::hanging_indent_in(*styles);
+ if !hang.is_zero() {
+ collector.push_item(Item::Absolute(-hang, false));
+ collector.spans.push(1, Span::detached());
+ }
+
+ for (child, styles) in children.iter(styles) {
+ let prev_len = collector.full.len();
+
+ if child.is::<SpaceElem>() {
+ collector.push_text(" ", styles);
+ } else if let Some(elem) = child.to_packed::<TextElem>() {
+ collector.build_text(styles, |full| {
+ let dir = TextElem::dir_in(styles);
+ if dir != outer_dir {
+ // Insert "Explicit Directional Embedding".
+ match dir {
+ Dir::LTR => full.push_str(LTR_EMBEDDING),
+ Dir::RTL => full.push_str(RTL_EMBEDDING),
+ _ => {}
+ }
+ }
+
+ if let Some(case) = TextElem::case_in(styles) {
+ full.push_str(&case.apply(elem.text()));
+ } else {
+ full.push_str(elem.text());
+ }
+
+ if dir != outer_dir {
+ // Insert "Pop Directional Formatting".
+ full.push_str(POP_EMBEDDING);
+ }
+ });
+ } else if let Some(elem) = child.to_packed::<HElem>() {
+ let amount = elem.amount();
+ if amount.is_zero() {
+ continue;
+ }
+
+ collector.push_item(match amount {
+ Spacing::Fr(fr) => Item::Fractional(*fr, None),
+ Spacing::Rel(rel) => Item::Absolute(
+ rel.resolve(styles).relative_to(region.x),
+ elem.weak(styles),
+ ),
+ });
+ } else if let Some(elem) = child.to_packed::<LinebreakElem>() {
+ collector
+ .push_text(if elem.justify(styles) { "\u{2028}" } else { "\n" }, styles);
+ } else if let Some(elem) = child.to_packed::<SmartQuoteElem>() {
+ let double = elem.double(styles);
+ if elem.enabled(styles) {
+ let quotes = SmartQuotes::get(
+ elem.quotes(styles),
+ TextElem::lang_in(styles),
+ TextElem::region_in(styles),
+ elem.alternative(styles),
+ );
+ let before =
+ collector.full.chars().rev().find(|&c| !is_default_ignorable(c));
+ let quote = quoter.quote(before, &quotes, double);
+ collector.push_text(quote, styles);
+ } else {
+ collector.push_text(if double { "\"" } else { "'" }, styles);
+ }
+ } else if let Some(elem) = child.to_packed::<InlineElem>() {
+ collector.push_item(Item::Skip(LTR_ISOLATE));
+
+ for item in elem.layout(engine, locator.next(&elem.span()), styles, region)? {
+ match item {
+ InlineItem::Space(space, weak) => {
+ collector.push_item(Item::Absolute(space, weak));
+ }
+ InlineItem::Frame(frame) => {
+ collector.push_item(Item::Frame(frame, styles));
+ }
+ }
+ }
+
+ collector.push_item(Item::Skip(POP_ISOLATE));
+ } else if let Some(elem) = child.to_packed::<BoxElem>() {
+ let loc = locator.next(&elem.span());
+ if let Sizing::Fr(v) = elem.width(styles) {
+ collector.push_item(Item::Fractional(v, Some((elem, loc, styles))));
+ } else {
+ let frame = layout_box(elem, engine, loc, styles, region)?;
+ collector.push_item(Item::Frame(frame, styles));
+ }
+ } else if let Some(elem) = child.to_packed::<TagElem>() {
+ collector.push_item(Item::Tag(&elem.tag));
+ } else {
+ bail!(child.span(), "unexpected paragraph child");
+ };
+
+ let len = collector.full.len() - prev_len;
+ collector.spans.push(len, child.span());
+ }
+
+ Ok((collector.full, collector.segments, collector.spans))
+}
+
+/// Collects segments.
+struct Collector<'a> {
+ full: String,
+ segments: Vec<Segment<'a>>,
+ spans: SpanMapper,
+}
+
+impl<'a> Collector<'a> {
+ fn new(capacity: usize) -> Self {
+ Self {
+ full: String::new(),
+ segments: Vec::with_capacity(capacity),
+ spans: SpanMapper::new(),
+ }
+ }
+
+ fn push_text(&mut self, text: &str, styles: StyleChain<'a>) {
+ self.full.push_str(text);
+ self.push_segment(Segment::Text(text.len(), styles));
+ }
+
+ fn build_text<F>(&mut self, styles: StyleChain<'a>, f: F)
+ where
+ F: FnOnce(&mut String),
+ {
+ let prev = self.full.len();
+ f(&mut self.full);
+ let len = self.full.len() - prev;
+ self.push_segment(Segment::Text(len, styles));
+ }
+
+ fn push_item(&mut self, item: Item<'a>) {
+ self.full.push_str(item.textual());
+ self.push_segment(Segment::Item(item));
+ }
+
+ fn push_segment(&mut self, segment: Segment<'a>) {
+ match (self.segments.last_mut(), &segment) {
+ // Merge adjacent text segments with the same styles.
+ (Some(Segment::Text(last_len, last_styles)), Segment::Text(len, styles))
+ if *last_styles == *styles =>
+ {
+ *last_len += *len;
+ }
+
+ // Merge adjacent weak spacing by taking the maximum.
+ (
+ Some(Segment::Item(Item::Absolute(prev_amount, true))),
+ Segment::Item(Item::Absolute(amount, true)),
+ ) => {
+ *prev_amount = (*prev_amount).max(*amount);
+ }
+
+ _ => self.segments.push(segment),
+ }
+ }
+}
+
+/// Maps byte offsets back to spans.
+#[derive(Default)]
+pub struct SpanMapper(Vec<(usize, Span)>);
+
+impl SpanMapper {
+ /// Create a new span mapper.
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Push a span for a segment with the given length.
+ pub fn push(&mut self, len: usize, span: Span) {
+ self.0.push((len, span));
+ }
+
+ /// Determine the span at the given byte offset.
+ ///
+ /// May return a detached span.
+ pub fn span_at(&self, offset: usize) -> (Span, u16) {
+ let mut cursor = 0;
+ for &(len, span) in &self.0 {
+ if (cursor..cursor + len).contains(&offset) {
+ return (span, u16::try_from(offset - cursor).unwrap_or(0));
+ }
+ cursor += len;
+ }
+ (Span::detached(), 0)
+ }
+}
diff --git a/crates/typst-layout/src/inline/deco.rs b/crates/typst-layout/src/inline/deco.rs
new file mode 100644
index 00000000..c01b369b
--- /dev/null
+++ b/crates/typst-layout/src/inline/deco.rs
@@ -0,0 +1,213 @@
+use kurbo::{BezPath, Line, ParamCurve};
+use ttf_parser::{GlyphId, OutlineBuilder};
+use typst_library::layout::{Abs, Em, Frame, FrameItem, Point, Size};
+use typst_library::text::{
+ BottomEdge, DecoLine, Decoration, TextEdgeBounds, TextItem, TopEdge,
+};
+use typst_library::visualize::{FixedStroke, Geometry};
+use typst_syntax::Span;
+
+use crate::shapes::styled_rect;
+
+/// Add line decorations to a single run of shaped text.
+pub fn decorate(
+ frame: &mut Frame,
+ deco: &Decoration,
+ text: &TextItem,
+ width: Abs,
+ shift: Abs,
+ pos: Point,
+) {
+ let font_metrics = text.font.metrics();
+
+ if let DecoLine::Highlight { fill, stroke, top_edge, bottom_edge, radius } =
+ &deco.line
+ {
+ let (top, bottom) = determine_edges(text, *top_edge, *bottom_edge);
+ let size = Size::new(width + 2.0 * deco.extent, top + bottom);
+ let rects = styled_rect(size, radius, fill.clone(), stroke);
+ let origin = Point::new(pos.x - deco.extent, pos.y - top - shift);
+ frame.prepend_multiple(
+ rects
+ .into_iter()
+ .map(|shape| (origin, FrameItem::Shape(shape, Span::detached()))),
+ );
+ return;
+ }
+
+ let (stroke, metrics, offset, evade, background) = match &deco.line {
+ DecoLine::Strikethrough { stroke, offset, background } => {
+ (stroke, font_metrics.strikethrough, offset, false, *background)
+ }
+ DecoLine::Overline { stroke, offset, evade, background } => {
+ (stroke, font_metrics.overline, offset, *evade, *background)
+ }
+ DecoLine::Underline { stroke, offset, evade, background } => {
+ (stroke, font_metrics.underline, offset, *evade, *background)
+ }
+ _ => return,
+ };
+
+ let offset = offset.unwrap_or(-metrics.position.at(text.size)) - shift;
+ let stroke = stroke.clone().unwrap_or(FixedStroke::from_pair(
+ text.fill.as_decoration(),
+ metrics.thickness.at(text.size),
+ ));
+
+ let gap_padding = 0.08 * text.size;
+ let min_width = 0.162 * text.size;
+
+ let start = pos.x - deco.extent;
+ let end = pos.x + width + deco.extent;
+
+ let mut push_segment = |from: Abs, to: Abs, prepend: bool| {
+ let origin = Point::new(from, pos.y + offset);
+ let target = Point::new(to - from, Abs::zero());
+
+ if target.x >= min_width || !evade {
+ let shape = Geometry::Line(target).stroked(stroke.clone());
+
+ if prepend {
+ frame.prepend(origin, FrameItem::Shape(shape, Span::detached()));
+ } else {
+ frame.push(origin, FrameItem::Shape(shape, Span::detached()));
+ }
+ }
+ };
+
+ if !evade {
+ push_segment(start, end, background);
+ return;
+ }
+
+ let line = Line::new(
+ kurbo::Point::new(pos.x.to_raw(), offset.to_raw()),
+ kurbo::Point::new((pos.x + width).to_raw(), offset.to_raw()),
+ );
+
+ let mut x = pos.x;
+ let mut intersections = vec![];
+
+ for glyph in text.glyphs.iter() {
+ let dx = glyph.x_offset.at(text.size) + x;
+ let mut builder =
+ BezPathBuilder::new(font_metrics.units_per_em, text.size, dx.to_raw());
+
+ let bbox = text.font.ttf().outline_glyph(GlyphId(glyph.id), &mut builder);
+ let path = builder.finish();
+
+ x += glyph.x_advance.at(text.size);
+
+ // Only do the costly segments intersection test if the line
+ // intersects the bounding box.
+ let intersect = bbox.is_some_and(|bbox| {
+ let y_min = -text.font.to_em(bbox.y_max).at(text.size);
+ let y_max = -text.font.to_em(bbox.y_min).at(text.size);
+ offset >= y_min && offset <= y_max
+ });
+
+ if intersect {
+ // Find all intersections of segments with the line.
+ intersections.extend(
+ path.segments()
+ .flat_map(|seg| seg.intersect_line(line))
+ .map(|is| Abs::raw(line.eval(is.line_t).x)),
+ );
+ }
+ }
+
+ // Add start and end points, taking padding into account.
+ intersections.push(start - gap_padding);
+ intersections.push(end + gap_padding);
+ // When emitting the decorative line segments, we move from left to
+ // right. The intersections are not necessarily in this order, yet.
+ intersections.sort();
+
+ for edge in intersections.windows(2) {
+ let l = edge[0];
+ let r = edge[1];
+
+ // If we are too close, don't draw the segment
+ if r - l < gap_padding {
+ continue;
+ } else {
+ push_segment(l + gap_padding, r - gap_padding, background);
+ }
+ }
+}
+
+// Return the top/bottom edge of the text given the metric of the font.
+fn determine_edges(
+ text: &TextItem,
+ top_edge: TopEdge,
+ bottom_edge: BottomEdge,
+) -> (Abs, Abs) {
+ let mut top = Abs::zero();
+ let mut bottom = Abs::zero();
+
+ for g in text.glyphs.iter() {
+ let (t, b) = text.font.edges(
+ top_edge,
+ bottom_edge,
+ text.size,
+ TextEdgeBounds::Glyph(g.id),
+ );
+ top.set_max(t);
+ bottom.set_max(b);
+ }
+
+ (top, bottom)
+}
+
+/// Builds a kurbo [`BezPath`] for a glyph.
+struct BezPathBuilder {
+ path: BezPath,
+ units_per_em: f64,
+ font_size: Abs,
+ x_offset: f64,
+}
+
+impl BezPathBuilder {
+ fn new(units_per_em: f64, font_size: Abs, x_offset: f64) -> Self {
+ Self {
+ path: BezPath::new(),
+ units_per_em,
+ font_size,
+ x_offset,
+ }
+ }
+
+ fn finish(self) -> BezPath {
+ self.path
+ }
+
+ fn p(&self, x: f32, y: f32) -> kurbo::Point {
+ kurbo::Point::new(self.s(x) + self.x_offset, -self.s(y))
+ }
+
+ fn s(&self, v: f32) -> f64 {
+ Em::from_units(v, self.units_per_em).at(self.font_size).to_raw()
+ }
+}
+
+impl OutlineBuilder for BezPathBuilder {
+ fn move_to(&mut self, x: f32, y: f32) {
+ self.path.move_to(self.p(x, y));
+ }
+
+ fn line_to(&mut self, x: f32, y: f32) {
+ self.path.line_to(self.p(x, y));
+ }
+
+ fn quad_to(&mut self, x1: f32, y1: f32, x: f32, y: f32) {
+ self.path.quad_to(self.p(x1, y1), self.p(x, y));
+ }
+
+ fn curve_to(&mut self, x1: f32, y1: f32, x2: f32, y2: f32, x: f32, y: f32) {
+ self.path.curve_to(self.p(x1, y1), self.p(x2, y2), self.p(x, y));
+ }
+
+ fn close(&mut self) {
+ self.path.close_path();
+ }
+}
diff --git a/crates/typst-layout/src/inline/finalize.rs b/crates/typst-layout/src/inline/finalize.rs
new file mode 100644
index 00000000..599ace9d
--- /dev/null
+++ b/crates/typst-layout/src/inline/finalize.rs
@@ -0,0 +1,36 @@
+use typst_library::introspection::SplitLocator;
+use typst_utils::Numeric;
+
+use super::*;
+
+/// Turns the selected lines into frames.
+#[typst_macros::time]
+pub fn finalize(
+ engine: &mut Engine,
+ p: &Preparation,
+ lines: &[Line],
+ styles: StyleChain,
+ region: Size,
+ expand: bool,
+ locator: &mut SplitLocator<'_>,
+) -> SourceResult<Fragment> {
+ // Determine the paragraph's width: Full width of the region if we should
+ // expand or there's fractional spacing, fit-to-width otherwise.
+ let width = if !region.x.is_finite()
+ || (!expand && lines.iter().all(|line| line.fr().is_zero()))
+ {
+ region
+ .x
+ .min(p.hang + lines.iter().map(|line| line.width).max().unwrap_or_default())
+ } else {
+ region.x
+ };
+
+ // Stack the lines into one frame per region.
+ let shrink = ParElem::shrink_in(styles);
+ lines
+ .iter()
+ .map(|line| commit(engine, p, line, width, region.y, shrink, locator, styles))
+ .collect::<SourceResult<_>>()
+ .map(Fragment::frames)
+}
diff --git a/crates/typst-layout/src/inline/line.rs b/crates/typst-layout/src/inline/line.rs
new file mode 100644
index 00000000..596e109e
--- /dev/null
+++ b/crates/typst-layout/src/inline/line.rs
@@ -0,0 +1,750 @@
+use std::fmt::{self, Debug, Formatter};
+use std::ops::{Deref, DerefMut};
+
+use typst_library::engine::Engine;
+use typst_library::foundations::NativeElement;
+use typst_library::introspection::{SplitLocator, Tag};
+use typst_library::layout::{Abs, Dir, Em, Fr, Frame, FrameItem, Point};
+use typst_library::model::{ParLine, ParLineMarker};
+use typst_library::text::{Lang, TextElem};
+use typst_utils::Numeric;
+
+use super::*;
+
+const SHY: char = '\u{ad}';
+const HYPHEN: char = '-';
+const EN_DASH: char = '–';
+const EM_DASH: char = '—';
+const LINE_SEPARATOR: char = '\u{2028}'; // We use LS to distinguish justified breaks.
+
+/// A layouted line, consisting of a sequence of layouted paragraph items that
+/// are mostly borrowed from the preparation phase. This type enables you to
+/// measure the size of a line in a range before committing to building the
+/// line's frame.
+///
+/// At most two paragraph items must be created individually for this line: The
+/// first and last one since they may be broken apart by the start or end of the
+/// line, respectively. But even those can partially reuse previous results when
+/// the break index is safe-to-break per rustybuzz.
+pub struct Line<'a> {
+ /// The items the line is made of.
+ pub items: Items<'a>,
+ /// The exact natural width of the line.
+ pub width: Abs,
+ /// Whether the line should be justified.
+ pub justify: bool,
+ /// Whether the line ends with a hyphen or dash, either naturally or through
+ /// hyphenation.
+ pub dash: Option<Dash>,
+}
+
+impl<'a> Line<'a> {
+ /// Create an empty line.
+ pub fn empty() -> Self {
+ Self {
+ items: Items::new(),
+ width: Abs::zero(),
+ justify: false,
+ dash: None,
+ }
+ }
+
+ /// How many glyphs are in the text where we can insert additional
+ /// space when encountering underfull lines.
+ pub fn justifiables(&self) -> usize {
+ let mut count = 0;
+ for shaped in self.items.iter().filter_map(Item::text) {
+ count += shaped.justifiables();
+ }
+
+ // CJK character at line end should not be adjusted.
+ if self
+ .items
+ .last()
+ .and_then(Item::text)
+ .map(|s| s.cjk_justifiable_at_last())
+ .unwrap_or(false)
+ {
+ count -= 1;
+ }
+
+ count
+ }
+
+ /// How much the line can stretch.
+ pub fn stretchability(&self) -> Abs {
+ self.items
+ .iter()
+ .filter_map(Item::text)
+ .map(|s| s.stretchability())
+ .sum()
+ }
+
+ /// How much the line can shrink.
+ pub fn shrinkability(&self) -> Abs {
+ self.items
+ .iter()
+ .filter_map(Item::text)
+ .map(|s| s.shrinkability())
+ .sum()
+ }
+
+ /// Whether the line has items with negative width.
+ pub fn has_negative_width_items(&self) -> bool {
+ self.items.iter().any(|item| match item {
+ Item::Absolute(amount, _) => *amount < Abs::zero(),
+ Item::Frame(frame, _) => frame.width() < Abs::zero(),
+ _ => false,
+ })
+ }
+
+ /// The sum of fractions in the line.
+ pub fn fr(&self) -> Fr {
+ self.items
+ .iter()
+ .filter_map(|item| match item {
+ Item::Fractional(fr, _) => Some(*fr),
+ _ => None,
+ })
+ .sum()
+ }
+}
+
+/// A dash at the end of a line.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum Dash {
+ /// A soft hyphen added to break a word.
+ Soft,
+ /// A regular hyphen, present in a compound word, e.g. beija-flor.
+ Hard,
+ /// Another kind of dash. Only relevant for cost computation.
+ Other,
+}
+
+/// Create a line which spans the given range.
+pub fn line<'a>(
+ engine: &Engine,
+ p: &'a Preparation,
+ range: Range,
+ breakpoint: Breakpoint,
+ pred: Option<&Line>,
+) -> Line<'a> {
+ // The line's full text.
+ let full = &p.text[range.clone()];
+
+ // Whether the line is justified.
+ let justify = full.ends_with(LINE_SEPARATOR)
+ || (p.justify && breakpoint != Breakpoint::Mandatory);
+
+ // Process dashes.
+ let dash = if breakpoint.is_hyphen() || full.ends_with(SHY) {
+ Some(Dash::Soft)
+ } else if full.ends_with(HYPHEN) {
+ Some(Dash::Hard)
+ } else if full.ends_with([EN_DASH, EM_DASH]) {
+ Some(Dash::Other)
+ } else {
+ None
+ };
+
+ // Trim the line at the end, if necessary for this breakpoint.
+ let trim = range.start + breakpoint.trim(full).len();
+
+ // Collect the items for the line.
+ let mut items = collect_items(engine, p, range, trim);
+
+ // Add a hyphen at the line start, if a previous dash should be repeated.
+ if pred.map_or(false, |pred| should_repeat_hyphen(pred, full)) {
+ if let Some(shaped) = items.first_text_mut() {
+ shaped.prepend_hyphen(engine, p.fallback);
+ }
+ }
+
+ // Add a hyphen at the line end, if we ended on a soft hyphen.
+ if dash == Some(Dash::Soft) {
+ if let Some(shaped) = items.last_text_mut() {
+ shaped.push_hyphen(engine, p.fallback);
+ }
+ }
+
+ // Deal with CJ characters at line boundaries.
+ adjust_cj_at_line_boundaries(p, full, &mut items);
+
+ // Compute the line's width.
+ let width = items.iter().map(Item::natural_width).sum();
+
+ Line { items, width, justify, dash }
+}
+
+/// Collects / reshapes all items for the line with the given `range`.
+///
+/// The `trim` defines an end position to which text items are trimmed. For
+/// example, the `range` may span "hello\n", but the `trim` specifies that the
+/// linebreak is trimmed.
+///
+/// We do not factor the `trim` directly into the `range` because we still want
+/// to keep non-text items after the trim (e.g. tags).
+fn collect_items<'a>(
+ engine: &Engine,
+ p: &'a Preparation,
+ range: Range,
+ trim: usize,
+) -> Items<'a> {
+ let mut items = Items::new();
+ let mut fallback = None;
+
+ // Collect the items for each consecutively ordered run.
+ reorder(p, range.clone(), |subrange, rtl| {
+ let from = items.len();
+ collect_range(engine, p, subrange, trim, &mut items, &mut fallback);
+ if rtl {
+ items.reorder(from);
+ }
+ });
+
+ // Trim weak spacing at the start of the line.
+ let prefix = items
+ .iter()
+ .take_while(|item| matches!(item, Item::Absolute(_, true)))
+ .count();
+ if prefix > 0 {
+ items.drain(..prefix);
+ }
+
+ // Trim weak spacing at the end of the line.
+ while matches!(items.last(), Some(Item::Absolute(_, true))) {
+ items.pop();
+ }
+
+ // Add fallback text to expand the line height, if necessary.
+ if !items.iter().any(|item| matches!(item, Item::Text(_))) {
+ if let Some(fallback) = fallback {
+ items.push(fallback);
+ }
+ }
+
+ items
+}
+
+/// Calls `f` for the BiDi-reordered ranges of a line.
+fn reorder<F>(p: &Preparation, range: Range, mut f: F)
+where
+ F: FnMut(Range, bool),
+{
+ // If there is nothing bidirectional going on, skip reordering.
+ let Some(bidi) = &p.bidi else {
+ f(range, p.dir == Dir::RTL);
+ return;
+ };
+
+ // The bidi crate panics for empty lines.
+ if range.is_empty() {
+ f(range, p.dir == Dir::RTL);
+ return;
+ }
+
+ // Find the paragraph that contains the line.
+ let para = bidi
+ .paragraphs
+ .iter()
+ .find(|para| para.range.contains(&range.start))
+ .unwrap();
+
+ // Compute the reordered ranges in visual order (left to right).
+ let (levels, runs) = bidi.visual_runs(para, range.clone());
+
+ // Call `f` for each run.
+ for run in runs {
+ let rtl = levels[run.start].is_rtl();
+ f(run, rtl)
+ }
+}
+
+/// Collects / reshapes all items for the given `subrange` with continuous
+/// direction.
+fn collect_range<'a>(
+ engine: &Engine,
+ p: &'a Preparation,
+ range: Range,
+ trim: usize,
+ items: &mut Items<'a>,
+ fallback: &mut Option<ItemEntry<'a>>,
+) {
+ for (subrange, item) in p.slice(range.clone()) {
+ // All non-text items are just kept, they can't be split.
+ let Item::Text(shaped) = item else {
+ items.push(item);
+ continue;
+ };
+
+ // The intersection range of the item, the subrange, and the line's
+ // trimming.
+ let sliced =
+ range.start.max(subrange.start)..range.end.min(subrange.end).min(trim);
+
+ // Whether the item is split by the line.
+ let split = subrange.start < sliced.start || sliced.end < subrange.end;
+
+ if sliced.is_empty() {
+ // When there is no text, still keep this as a fallback item, which
+ // we can use to force a non-zero line-height when the line doesn't
+ // contain any other text.
+ *fallback = Some(ItemEntry::from(Item::Text(shaped.empty())));
+ } else if split {
+ // When the item is split in half, reshape it.
+ let reshaped = shaped.reshape(engine, sliced);
+ items.push(Item::Text(reshaped));
+ } else {
+ // When the item is fully contained, just keep it.
+ items.push(item);
+ }
+ }
+}
+
+/// Add spacing around punctuation marks for CJ glyphs at line boundaries.
+///
+/// See Requirements for Chinese Text Layout, Section 3.1.6.3 Compression of
+/// punctuation marks at line start or line end.
+fn adjust_cj_at_line_boundaries(p: &Preparation, text: &str, items: &mut Items) {
+ if text.starts_with(BEGIN_PUNCT_PAT)
+ || (p.cjk_latin_spacing && text.starts_with(is_of_cj_script))
+ {
+ adjust_cj_at_line_start(p, items);
+ }
+
+ if text.ends_with(END_PUNCT_PAT)
+ || (p.cjk_latin_spacing && text.ends_with(is_of_cj_script))
+ {
+ adjust_cj_at_line_end(p, items);
+ }
+}
+
+/// Add spacing around punctuation marks for CJ glyphs at the line start.
+fn adjust_cj_at_line_start(p: &Preparation, items: &mut Items) {
+ let Some(shaped) = items.first_text_mut() else { return };
+ let Some(glyph) = shaped.glyphs.first() else { return };
+
+ if glyph.is_cjk_right_aligned_punctuation() {
+ // If the first glyph is a CJK punctuation, we want to
+ // shrink it.
+ let glyph = shaped.glyphs.to_mut().first_mut().unwrap();
+ let shrink = glyph.shrinkability().0;
+ glyph.shrink_left(shrink);
+ shaped.width -= shrink.at(shaped.size);
+ } else if p.cjk_latin_spacing && glyph.is_cj_script() && glyph.x_offset > Em::zero() {
+ // If the first glyph is a CJK character adjusted by
+ // [`add_cjk_latin_spacing`], restore the original width.
+ let glyph = shaped.glyphs.to_mut().first_mut().unwrap();
+ let shrink = glyph.x_offset;
+ glyph.x_advance -= shrink;
+ glyph.x_offset = Em::zero();
+ glyph.adjustability.shrinkability.0 = Em::zero();
+ shaped.width -= shrink.at(shaped.size);
+ }
+}
+
+/// Add spacing around punctuation marks for CJ glyphs at the line end.
+fn adjust_cj_at_line_end(p: &Preparation, items: &mut Items) {
+ let Some(shaped) = items.last_text_mut() else { return };
+ let Some(glyph) = shaped.glyphs.last() else { return };
+
+ // Deal with CJK punctuation at line ends.
+ let style = cjk_punct_style(shaped.lang, shaped.region);
+
+ if glyph.is_cjk_left_aligned_punctuation(style) {
+ // If the last glyph is a CJK punctuation, we want to
+ // shrink it.
+ let shrink = glyph.shrinkability().1;
+ let punct = shaped.glyphs.to_mut().last_mut().unwrap();
+ punct.shrink_right(shrink);
+ shaped.width -= shrink.at(shaped.size);
+ } else if p.cjk_latin_spacing
+ && glyph.is_cj_script()
+ && (glyph.x_advance - glyph.x_offset) > Em::one()
+ {
+ // If the last glyph is a CJK character adjusted by
+ // [`add_cjk_latin_spacing`], restore the original width.
+ let shrink = glyph.x_advance - glyph.x_offset - Em::one();
+ let glyph = shaped.glyphs.to_mut().last_mut().unwrap();
+ glyph.x_advance -= shrink;
+ glyph.adjustability.shrinkability.1 = Em::zero();
+ shaped.width -= shrink.at(shaped.size);
+ }
+}
+
+/// Whether a hyphen should be inserted at the start of the next line.
+fn should_repeat_hyphen(pred_line: &Line, text: &str) -> bool {
+ // If the predecessor line does not end with a `Dash::Hard`, we shall
+ // not place a hyphen at the start of the next line.
+ if pred_line.dash != Some(Dash::Hard) {
+ return false;
+ }
+
+ // The hyphen should repeat only in the languages that require that feature.
+ // For more information see the discussion at https://github.com/typst/typst/issues/3235
+ let Some(Item::Text(shaped)) = pred_line.items.last() else { return false };
+
+ match shaped.lang {
+ // - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3
+ // - Czech: see https://prirucka.ujc.cas.cz/?id=164
+ // - Croatian: see http://pravopis.hr/pravilo/spojnica/68/
+ // - Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni
+ // - Portuguese: see https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf (Base XX)
+ // - Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/
+ Lang::LOWER_SORBIAN
+ | Lang::CZECH
+ | Lang::CROATIAN
+ | Lang::POLISH
+ | Lang::PORTUGUESE
+ | Lang::SLOVAK => true,
+
+ // In Spanish the hyphen is required only if the word next to hyphen is
+ // not capitalized. Otherwise, the hyphen must not be repeated.
+ //
+ // See § 4.1.1.1.2.e on the "Ortografía de la lengua española"
+ // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea
+ Lang::SPANISH => text.chars().next().map_or(false, |c| !c.is_uppercase()),
+
+ _ => false,
+ }
+}
+
+/// Commit to a line and build its frame.
+#[allow(clippy::too_many_arguments)]
+pub fn commit(
+ engine: &mut Engine,
+ p: &Preparation,
+ line: &Line,
+ width: Abs,
+ full: Abs,
+ shrink: bool,
+ locator: &mut SplitLocator<'_>,
+ styles: StyleChain,
+) -> SourceResult<Frame> {
+ let mut remaining = width - line.width - p.hang;
+ let mut offset = Abs::zero();
+
+ // We always build the line from left to right. In an LTR paragraph, we must
+ // thus add the hanging indent to the offset. When the paragraph is RTL, the
+ // hanging indent arises naturally due to the line width.
+ if p.dir == Dir::LTR {
+ offset += p.hang;
+ }
+
+ // Handle hanging punctuation to the left.
+ if let Some(Item::Text(text)) = line.items.first() {
+ if let Some(glyph) = text.glyphs.first() {
+ if !text.dir.is_positive()
+ && TextElem::overhang_in(text.styles)
+ && (line.items.len() > 1 || text.glyphs.len() > 1)
+ {
+ let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
+ offset -= amount;
+ remaining += amount;
+ }
+ }
+ }
+
+ // Handle hanging punctuation to the right.
+ if let Some(Item::Text(text)) = line.items.last() {
+ if let Some(glyph) = text.glyphs.last() {
+ if text.dir.is_positive()
+ && TextElem::overhang_in(text.styles)
+ && (line.items.len() > 1 || text.glyphs.len() > 1)
+ {
+ let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
+ remaining += amount;
+ }
+ }
+ }
+
+ // Determine how much additional space is needed. The justification_ratio is
+ // for the first step justification, extra_justification is for the last
+ // step. For more info on multi-step justification, see Procedures for
+ // Inter- Character Space Expansion in W3C document Chinese Layout
+ // Requirements.
+ let fr = line.fr();
+ let mut justification_ratio = 0.0;
+ let mut extra_justification = Abs::zero();
+
+ let shrinkability = line.shrinkability();
+ let stretchability = line.stretchability();
+ if remaining < Abs::zero() && shrinkability > Abs::zero() && shrink {
+ // Attempt to reduce the length of the line, using shrinkability.
+ justification_ratio = (remaining / shrinkability).max(-1.0);
+ remaining = (remaining + shrinkability).min(Abs::zero());
+ } else if line.justify && fr.is_zero() {
+ // Attempt to increase the length of the line, using stretchability.
+ if stretchability > Abs::zero() {
+ justification_ratio = (remaining / stretchability).min(1.0);
+ remaining = (remaining - stretchability).max(Abs::zero());
+ }
+
+ let justifiables = line.justifiables();
+ if justifiables > 0 && remaining > Abs::zero() {
+ // Underfull line, distribute the extra space.
+ extra_justification = remaining / justifiables as f64;
+ remaining = Abs::zero();
+ }
+ }
+
+ let mut top = Abs::zero();
+ let mut bottom = Abs::zero();
+
+ // Build the frames and determine the height and baseline.
+ let mut frames = vec![];
+ for item in line.items.iter() {
+ let mut push = |offset: &mut Abs, frame: Frame| {
+ let width = frame.width();
+ top.set_max(frame.baseline());
+ bottom.set_max(frame.size().y - frame.baseline());
+ frames.push((*offset, frame));
+ *offset += width;
+ };
+
+ match item {
+ Item::Absolute(v, _) => {
+ offset += *v;
+ }
+ Item::Fractional(v, elem) => {
+ let amount = v.share(fr, remaining);
+ if let Some((elem, loc, styles)) = elem {
+ let region = Size::new(amount, full);
+ let mut frame =
+ layout_box(elem, engine, loc.relayout(), *styles, region)?;
+ frame.translate(Point::with_y(TextElem::baseline_in(*styles)));
+ push(&mut offset, frame.post_processed(*styles));
+ } else {
+ offset += amount;
+ }
+ }
+ Item::Text(shaped) => {
+ let frame = shaped.build(
+ engine,
+ &p.spans,
+ justification_ratio,
+ extra_justification,
+ );
+ push(&mut offset, frame.post_processed(shaped.styles));
+ }
+ Item::Frame(frame, styles) => {
+ let mut frame = frame.clone();
+ frame.translate(Point::with_y(TextElem::baseline_in(*styles)));
+ push(&mut offset, frame.post_processed(*styles));
+ }
+ Item::Tag(tag) => {
+ let mut frame = Frame::soft(Size::zero());
+ frame.push(Point::zero(), FrameItem::Tag((*tag).clone()));
+ frames.push((offset, frame));
+ }
+ Item::Skip(_) => {}
+ }
+ }
+
+ // Remaining space is distributed now.
+ if !fr.is_zero() {
+ remaining = Abs::zero();
+ }
+
+ let size = Size::new(width, top + bottom);
+ let mut output = Frame::soft(size);
+ output.set_baseline(top);
+
+ add_par_line_marker(&mut output, styles, engine, locator, top);
+
+ // Construct the line's frame.
+ for (offset, frame) in frames {
+ let x = offset + p.align.position(remaining);
+ let y = top - frame.baseline();
+ output.push_frame(Point::new(x, y), frame);
+ }
+
+ Ok(output)
+}
+
+/// Adds a paragraph line marker to a paragraph line's output frame if
+/// line numbering is not `None` at this point. Ensures other style properties,
+/// namely number margin, number align and number clearance, are stored in the
+/// marker as well.
+///
+/// The `top` parameter is used to ensure the marker, and thus the line's
+/// number in the margin, is aligned to the line's baseline.
+fn add_par_line_marker(
+ output: &mut Frame,
+ styles: StyleChain,
+ engine: &mut Engine,
+ locator: &mut SplitLocator,
+ top: Abs,
+) {
+ let Some(numbering) = ParLine::numbering_in(styles) else { return };
+ let margin = ParLine::number_margin_in(styles);
+ let align = ParLine::number_align_in(styles);
+
+ // Delay resolving the number clearance until line numbers are laid out to
+ // avoid inconsistent spacing depending on varying font size.
+ let clearance = ParLine::number_clearance_in(styles);
+
+ // Elements in tags must have a location for introspection to work. We do
+ // the work here instead of going through all of the realization process
+ // just for this, given we don't need to actually place the marker as we
+ // manually search for it in the frame later (when building a root flow,
+ // where line numbers can be displayed), so we just need it to be in a tag
+ // and to be valid (to have a location).
+ let mut marker = ParLineMarker::new(numbering, align, margin, clearance).pack();
+ let key = typst_utils::hash128(&marker);
+ let loc = locator.next_location(engine.introspector, key);
+ marker.set_location(loc);
+
+ // Create start and end tags through which we can search for this line's
+ // marker later. The 'x' coordinate is not important, just the 'y'
+ // coordinate, as that's what is used for line numbers. We will place the
+ // tags among other subframes in the line such that it is aligned with the
+ // line's general baseline. However, the line number will still need to
+ // manually adjust its own 'y' position based on its own baseline.
+ let pos = Point::with_y(top);
+ output.push(pos, FrameItem::Tag(Tag::Start(marker)));
+ output.push(pos, FrameItem::Tag(Tag::End(loc, key)));
+}
+
+/// How much a character should hang into the end margin.
+///
+/// For more discussion, see:
+/// <https://recoveringphysicist.com/21/>
+fn overhang(c: char) -> f64 {
+ match c {
+ // Dashes.
+ '–' | '—' => 0.2,
+ '-' => 0.55,
+
+ // Punctuation.
+ '.' | ',' => 0.8,
+ ':' | ';' => 0.3,
+
+ // Arabic
+ '\u{60C}' | '\u{6D4}' => 0.4,
+
+ _ => 0.0,
+ }
+}
+
+/// A collection of owned or borrowed paragraph items.
+pub struct Items<'a>(Vec<ItemEntry<'a>>);
+
+impl<'a> Items<'a> {
+ /// Create empty items.
+ pub fn new() -> Self {
+ Self(vec![])
+ }
+
+ /// Push a new item.
+ pub fn push(&mut self, entry: impl Into<ItemEntry<'a>>) {
+ self.0.push(entry.into());
+ }
+
+ /// Iterate over the items
+ pub fn iter(&self) -> impl Iterator<Item = &Item<'a>> {
+ self.0.iter().map(|item| &**item)
+ }
+
+ /// Access the first item.
+ pub fn first(&self) -> Option<&Item<'a>> {
+ self.0.first().map(|item| &**item)
+ }
+
+ /// Access the last item.
+ pub fn last(&self) -> Option<&Item<'a>> {
+ self.0.last().map(|item| &**item)
+ }
+
+ /// Access the first item mutably, if it is text.
+ pub fn first_text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
+ self.0.first_mut()?.text_mut()
+ }
+
+ /// Access the last item mutably, if it is text.
+ pub fn last_text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
+ self.0.last_mut()?.text_mut()
+ }
+
+ /// Reorder the items starting at the given index to RTL.
+ pub fn reorder(&mut self, from: usize) {
+ self.0[from..].reverse()
+ }
+}
+
+impl<'a> FromIterator<ItemEntry<'a>> for Items<'a> {
+ fn from_iter<I: IntoIterator<Item = ItemEntry<'a>>>(iter: I) -> Self {
+ Self(iter.into_iter().collect())
+ }
+}
+
+impl<'a> Deref for Items<'a> {
+ type Target = Vec<ItemEntry<'a>>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+impl<'a> DerefMut for Items<'a> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.0
+ }
+}
+
+impl Debug for Items<'_> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ f.debug_list().entries(&self.0).finish()
+ }
+}
+
+/// A reference to or a boxed item.
+pub enum ItemEntry<'a> {
+ Ref(&'a Item<'a>),
+ Box(Box<Item<'a>>),
+}
+
+impl<'a> ItemEntry<'a> {
+ fn text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
+ match self {
+ Self::Ref(item) => {
+ let text = item.text()?;
+ *self = Self::Box(Box::new(Item::Text(text.clone())));
+ match self {
+ Self::Box(item) => item.text_mut(),
+ _ => unreachable!(),
+ }
+ }
+ Self::Box(item) => item.text_mut(),
+ }
+ }
+}
+
+impl<'a> Deref for ItemEntry<'a> {
+ type Target = Item<'a>;
+
+ fn deref(&self) -> &Self::Target {
+ match self {
+ Self::Ref(item) => item,
+ Self::Box(item) => item,
+ }
+ }
+}
+
+impl Debug for ItemEntry<'_> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ (**self).fmt(f)
+ }
+}
+
+impl<'a> From<&'a Item<'a>> for ItemEntry<'a> {
+ fn from(item: &'a Item<'a>) -> Self {
+ Self::Ref(item)
+ }
+}
+
+impl<'a> From<Item<'a>> for ItemEntry<'a> {
+ fn from(item: Item<'a>) -> Self {
+ Self::Box(Box::new(item))
+ }
+}
diff --git a/crates/typst-layout/src/inline/linebreak.rs b/crates/typst-layout/src/inline/linebreak.rs
new file mode 100644
index 00000000..7fc8b368
--- /dev/null
+++ b/crates/typst-layout/src/inline/linebreak.rs
@@ -0,0 +1,980 @@
+use std::ops::{Add, Sub};
+
+use az::SaturatingAs;
+use icu_properties::maps::{CodePointMapData, CodePointMapDataBorrowed};
+use icu_properties::LineBreak;
+use icu_provider::AsDeserializingBufferProvider;
+use icu_provider_adapters::fork::ForkByKeyProvider;
+use icu_provider_blob::BlobDataProvider;
+use icu_segmenter::LineSegmenter;
+use once_cell::sync::Lazy;
+use typst_library::engine::Engine;
+use typst_library::layout::{Abs, Em};
+use typst_library::model::Linebreaks;
+use typst_library::text::{is_default_ignorable, Lang, TextElem};
+use typst_syntax::link_prefix;
+use unicode_segmentation::UnicodeSegmentation;
+
+use super::*;
+
+/// The cost of a line or paragraph layout.
+type Cost = f64;
+
+// Cost parameters.
+//
+// We choose higher costs than the Knuth-Plass paper (which would be 50) because
+// it hyphenates way to eagerly in Typst otherwise. Could be related to the
+// ratios coming out differently since Typst doesn't have the concept of glue,
+// so things work a bit differently.
+const DEFAULT_HYPH_COST: Cost = 135.0;
+const DEFAULT_RUNT_COST: Cost = 100.0;
+
+// Other parameters.
+const MIN_RATIO: f64 = -1.0;
+const MIN_APPROX_RATIO: f64 = -0.5;
+const BOUND_EPS: f64 = 1e-3;
+
+/// The ICU blob data.
+fn blob() -> BlobDataProvider {
+ BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap()
+}
+
+/// The general line break segmenter.
+static SEGMENTER: Lazy<LineSegmenter> =
+ Lazy::new(|| LineSegmenter::try_new_lstm_with_buffer_provider(&blob()).unwrap());
+
+/// The line break segmenter for Chinese/Japanese text.
+static CJ_SEGMENTER: Lazy<LineSegmenter> = Lazy::new(|| {
+ let cj_blob =
+ BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU_CJ_SEGMENT)
+ .unwrap();
+ let cj_provider = ForkByKeyProvider::new(cj_blob, blob());
+ LineSegmenter::try_new_lstm_with_buffer_provider(&cj_provider).unwrap()
+});
+
+/// The Unicode line break properties for each code point.
+static LINEBREAK_DATA: Lazy<CodePointMapData<LineBreak>> = Lazy::new(|| {
+ icu_properties::maps::load_line_break(&blob().as_deserializing()).unwrap()
+});
+
+/// A line break opportunity.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum Breakpoint {
+ /// Just a normal opportunity (e.g. after a space).
+ Normal,
+ /// A mandatory breakpoint (after '\n' or at the end of the text).
+ Mandatory,
+ /// An opportunity for hyphenating and how many chars are before/after it
+ /// in the word.
+ Hyphen(u8, u8),
+}
+
+impl Breakpoint {
+ /// Trim a line before this breakpoint.
+ pub fn trim(self, line: &str) -> &str {
+ // Trim default ignorables.
+ let line = line.trim_end_matches(is_default_ignorable);
+
+ match self {
+ // Trim whitespace.
+ Self::Normal => line.trim_end_matches(char::is_whitespace),
+
+ // Trim linebreaks.
+ Self::Mandatory => {
+ let lb = LINEBREAK_DATA.as_borrowed();
+ line.trim_end_matches(|c| {
+ matches!(
+ lb.get(c),
+ LineBreak::MandatoryBreak
+ | LineBreak::CarriageReturn
+ | LineBreak::LineFeed
+ | LineBreak::NextLine
+ )
+ })
+ }
+
+ // Trim nothing further.
+ Self::Hyphen(..) => line,
+ }
+ }
+
+ /// Whether this is a hyphen breakpoint.
+ pub fn is_hyphen(self) -> bool {
+ matches!(self, Self::Hyphen(..))
+ }
+}
+
+/// Breaks the paragraph into lines.
+pub fn linebreak<'a>(
+ engine: &Engine,
+ p: &'a Preparation<'a>,
+ width: Abs,
+) -> Vec<Line<'a>> {
+ let linebreaks = p.linebreaks.unwrap_or_else(|| {
+ if p.justify {
+ Linebreaks::Optimized
+ } else {
+ Linebreaks::Simple
+ }
+ });
+
+ match linebreaks {
+ Linebreaks::Simple => linebreak_simple(engine, p, width),
+ Linebreaks::Optimized => linebreak_optimized(engine, p, width),
+ }
+}
+
+/// Performs line breaking in simple first-fit style. This means that we build
+/// lines greedily, always taking the longest possible line. This may lead to
+/// very unbalanced line, but is fast and simple.
+#[typst_macros::time]
+fn linebreak_simple<'a>(
+ engine: &Engine,
+ p: &'a Preparation<'a>,
+ width: Abs,
+) -> Vec<Line<'a>> {
+ let mut lines = Vec::with_capacity(16);
+ let mut start = 0;
+ let mut last = None;
+
+ breakpoints(p, |end, breakpoint| {
+ // Compute the line and its size.
+ let mut attempt = line(engine, p, start..end, breakpoint, lines.last());
+
+ // If the line doesn't fit anymore, we push the last fitting attempt
+ // into the stack and rebuild the line from the attempt's end. The
+ // resulting line cannot be broken up further.
+ if !width.fits(attempt.width) {
+ if let Some((last_attempt, last_end)) = last.take() {
+ lines.push(last_attempt);
+ start = last_end;
+ attempt = line(engine, p, start..end, breakpoint, lines.last());
+ }
+ }
+
+ // Finish the current line if there is a mandatory line break (i.e. due
+ // to "\n") or if the line doesn't fit horizontally already since then
+ // no shorter line will be possible.
+ if breakpoint == Breakpoint::Mandatory || !width.fits(attempt.width) {
+ lines.push(attempt);
+ start = end;
+ last = None;
+ } else {
+ last = Some((attempt, end));
+ }
+ });
+
+ if let Some((line, _)) = last {
+ lines.push(line);
+ }
+
+ lines
+}
+
+/// Performs line breaking in optimized Knuth-Plass style. Here, we use more
+/// context to determine the line breaks than in the simple first-fit style. For
+/// example, we might choose to cut a line short even though there is still a
+/// bit of space to improve the fit of one of the following lines. The
+/// Knuth-Plass algorithm is based on the idea of "cost". A line which has a
+/// very tight or very loose fit has a higher cost than one that is just right.
+/// Ending a line with a hyphen incurs extra cost and endings two successive
+/// lines with hyphens even more.
+///
+/// To find the layout with the minimal total cost the algorithm uses dynamic
+/// programming: For each possible breakpoint it determines the optimal
+/// paragraph layout _up to that point_. It walks over all possible start points
+/// for a line ending at that point and finds the one for which the cost of the
+/// line plus the cost of the optimal paragraph up to the start point (already
+/// computed and stored in dynamic programming table) is minimal. The final
+/// result is simply the layout determined for the last breakpoint at the end of
+/// text.
+#[typst_macros::time]
+fn linebreak_optimized<'a>(
+ engine: &Engine,
+ p: &'a Preparation<'a>,
+ width: Abs,
+) -> Vec<Line<'a>> {
+ let metrics = CostMetrics::compute(p);
+
+ // Determines the exact costs of a likely good layout through Knuth-Plass
+ // with approximate metrics. We can use this cost as an upper bound to prune
+ // the search space in our proper optimization pass below.
+ let upper_bound = linebreak_optimized_approximate(engine, p, width, &metrics);
+
+ // Using the upper bound, perform exact optimized linebreaking.
+ linebreak_optimized_bounded(engine, p, width, &metrics, upper_bound)
+}
+
+/// Performs line breaking in optimized Knuth-Plass style, but with an upper
+/// bound on the cost. This allows us to skip many parts of the search space.
+#[typst_macros::time]
+fn linebreak_optimized_bounded<'a>(
+ engine: &Engine,
+ p: &'a Preparation<'a>,
+ width: Abs,
+ metrics: &CostMetrics,
+ upper_bound: Cost,
+) -> Vec<Line<'a>> {
+ /// An entry in the dynamic programming table for paragraph optimization.
+ struct Entry<'a> {
+ pred: usize,
+ total: Cost,
+ line: Line<'a>,
+ end: usize,
+ }
+
+ // Dynamic programming table.
+ let mut table = vec![Entry { pred: 0, total: 0.0, line: Line::empty(), end: 0 }];
+
+ let mut active = 0;
+ let mut prev_end = 0;
+
+ breakpoints(p, |end, breakpoint| {
+ // Find the optimal predecessor.
+ let mut best: Option<Entry> = None;
+
+ // A lower bound for the cost of all following line attempts.
+ let mut line_lower_bound = None;
+
+ for (pred_index, pred) in table.iter().enumerate().skip(active) {
+ let start = pred.end;
+ let unbreakable = prev_end == start;
+
+ // If the minimum cost we've established for the line is already
+ // too much, skip this attempt.
+ if line_lower_bound
+ .is_some_and(|lower| pred.total + lower > upper_bound + BOUND_EPS)
+ {
+ continue;
+ }
+
+ // Build the line.
+ let attempt = line(engine, p, start..end, breakpoint, Some(&pred.line));
+
+ // Determine the cost of the line and its stretch ratio.
+ let (line_ratio, line_cost) = ratio_and_cost(
+ p,
+ metrics,
+ width,
+ &pred.line,
+ &attempt,
+ breakpoint,
+ unbreakable,
+ );
+
+ // If the line is overfull, we adjust the set of active candidate
+ // line starts. This is the case if
+ // - justification is on, but we'd need to shrink too much
+ // - justification is off and the line just doesn't fit
+ //
+ // If this is the earliest breakpoint in the active set
+ // (active == i), remove it from the active set. If there is an
+ // earlier one (active < i), then the logically shorter line was
+ // in fact longer (can happen with negative spacing) and we
+ // can't trim the active set just yet.
+ if line_ratio < metrics.min_ratio && active == pred_index {
+ active += 1;
+ }
+
+ // The total cost of this line and its chain of predecessors.
+ let total = pred.total + line_cost;
+
+ // If the line is already underfull (`line_ratio > 0`), any shorter
+ // slice of the line will be even more underfull. So it'll only get
+ // worse from here and further attempts would also have a cost
+ // exceeding `bound`. There is one exception: When the line has
+ // negative spacing, we can't know for sure, so we don't assign the
+ // lower bound in that case.
+ if line_ratio > 0.0
+ && line_lower_bound.is_none()
+ && !attempt.has_negative_width_items()
+ {
+ line_lower_bound = Some(line_cost);
+ }
+
+ // If the cost already exceeds the upper bound, we don't need to
+ // integrate this result into the table.
+ if total > upper_bound + BOUND_EPS {
+ continue;
+ }
+
+ // If this attempt is better than what we had before, take it!
+ if best.as_ref().map_or(true, |best| best.total >= total) {
+ best = Some(Entry { pred: pred_index, total, line: attempt, end });
+ }
+ }
+
+ // If this is a mandatory break, all breakpoints before this one become
+ // inactive since no line can span over the mandatory break.
+ if breakpoint == Breakpoint::Mandatory {
+ active = table.len();
+ }
+
+ table.extend(best);
+ prev_end = end;
+ });
+
+ // Retrace the best path.
+ let mut lines = Vec::with_capacity(16);
+ let mut idx = table.len() - 1;
+
+ // This should only happen if our bound was faulty. Which shouldn't happen!
+ if table[idx].end != p.text.len() {
+ #[cfg(debug_assertions)]
+ panic!("bounded paragraph layout is incomplete");
+
+ #[cfg(not(debug_assertions))]
+ return linebreak_optimized_bounded(engine, p, width, metrics, Cost::INFINITY);
+ }
+
+ while idx != 0 {
+ table.truncate(idx + 1);
+ let entry = table.pop().unwrap();
+ lines.push(entry.line);
+ idx = entry.pred;
+ }
+
+ lines.reverse();
+ lines
+}
+
+/// Runs the normal Knuth-Plass algorithm, but instead of building proper lines
+/// (which is costly) to determine costs, it determines approximate costs using
+/// cumulative arrays.
+///
+/// This results in a likely good paragraph layouts, for which we then compute
+/// the exact cost. This cost is an upper bound for proper optimized
+/// linebreaking. We can use it to heavily prune the search space.
+#[typst_macros::time]
+fn linebreak_optimized_approximate(
+ engine: &Engine,
+ p: &Preparation,
+ width: Abs,
+ metrics: &CostMetrics,
+) -> Cost {
+ // Determine the cumulative estimation metrics.
+ let estimates = Estimates::compute(p);
+
+ /// An entry in the dynamic programming table for paragraph optimization.
+ struct Entry {
+ pred: usize,
+ total: Cost,
+ end: usize,
+ unbreakable: bool,
+ breakpoint: Breakpoint,
+ }
+
+ // Dynamic programming table.
+ let mut table = vec![Entry {
+ pred: 0,
+ total: 0.0,
+ end: 0,
+ unbreakable: false,
+ breakpoint: Breakpoint::Mandatory,
+ }];
+
+ let mut active = 0;
+ let mut prev_end = 0;
+
+ breakpoints(p, |end, breakpoint| {
+ // Find the optimal predecessor.
+ let mut best: Option<Entry> = None;
+ for (pred_index, pred) in table.iter().enumerate().skip(active) {
+ let start = pred.end;
+ let unbreakable = prev_end == start;
+
+ // Whether the line is justified. This is not 100% accurate w.r.t
+ // to line()'s behaviour, but good enough.
+ let justify = p.justify && breakpoint != Breakpoint::Mandatory;
+
+ // We don't really know whether the line naturally ends with a dash
+ // here, so we can miss that case, but it's ok, since all of this
+ // just an estimate.
+ let consecutive_dash = pred.breakpoint.is_hyphen() && breakpoint.is_hyphen();
+
+ // Estimate how much the line's spaces would need to be stretched to
+ // make it the desired width. We trim at the end to not take into
+ // account trailing spaces. This is, again, only an approximation of
+ // the real behaviour of `line`.
+ let trimmed_end = start + p.text[start..end].trim_end().len();
+ let line_ratio = raw_ratio(
+ p,
+ width,
+ estimates.widths.estimate(start..trimmed_end)
+ + if breakpoint.is_hyphen() {
+ metrics.approx_hyphen_width
+ } else {
+ Abs::zero()
+ },
+ estimates.stretchability.estimate(start..trimmed_end),
+ estimates.shrinkability.estimate(start..trimmed_end),
+ estimates.justifiables.estimate(start..trimmed_end),
+ );
+
+ // Determine the line's cost.
+ let line_cost = raw_cost(
+ metrics,
+ breakpoint,
+ line_ratio,
+ justify,
+ unbreakable,
+ consecutive_dash,
+ true,
+ );
+
+ // Adjust the set of active breakpoints.
+ // See `linebreak_optimized` for details.
+ if line_ratio < metrics.min_ratio && active == pred_index {
+ active += 1;
+ }
+
+ // The total cost of this line and its chain of predecessors.
+ let total = pred.total + line_cost;
+
+ // If this attempt is better than what we had before, take it!
+ if best.as_ref().map_or(true, |best| best.total >= total) {
+ best = Some(Entry {
+ pred: pred_index,
+ total,
+ end,
+ unbreakable,
+ breakpoint,
+ });
+ }
+ }
+
+ // If this is a mandatory break, all breakpoints before this one become
+ // inactive.
+ if breakpoint == Breakpoint::Mandatory {
+ active = table.len();
+ }
+
+ table.extend(best);
+ prev_end = end;
+ });
+
+ // Retrace the best path.
+ let mut indices = Vec::with_capacity(16);
+ let mut idx = table.len() - 1;
+ while idx != 0 {
+ indices.push(idx);
+ idx = table[idx].pred;
+ }
+
+ let mut pred = Line::empty();
+ let mut start = 0;
+ let mut exact = 0.0;
+
+ // The cost that we optimized was only an approximate cost, so the layout we
+ // got here is only likely to be good, not guaranteed to be the best. We now
+ // computes its exact cost as that gives us a sound upper bound for the
+ // proper optimization pass.
+ for idx in indices.into_iter().rev() {
+ let Entry { end, breakpoint, unbreakable, .. } = table[idx];
+
+ let attempt = line(engine, p, start..end, breakpoint, Some(&pred));
+ let (ratio, line_cost) =
+ ratio_and_cost(p, metrics, width, &pred, &attempt, breakpoint, unbreakable);
+
+ // If approximation produces a valid layout without too much shrinking,
+ // exact layout is guaranteed to find the same layout. If, however, the
+ // line is overfull, we do not have this guarantee. Then, our bound
+ // becomes useless and actively harmful (it could be lower than what
+ // optimal layout produces). Thus, we immediately bail with an infinite
+ // bound in this case.
+ if ratio < metrics.min_ratio {
+ return Cost::INFINITY;
+ }
+
+ pred = attempt;
+ start = end;
+ exact += line_cost;
+ }
+
+ exact
+}
+
+/// Compute the stretch ratio and cost of a line.
+#[allow(clippy::too_many_arguments)]
+fn ratio_and_cost(
+ p: &Preparation,
+ metrics: &CostMetrics,
+ available_width: Abs,
+ pred: &Line,
+ attempt: &Line,
+ breakpoint: Breakpoint,
+ unbreakable: bool,
+) -> (f64, Cost) {
+ let ratio = raw_ratio(
+ p,
+ available_width,
+ attempt.width,
+ attempt.stretchability(),
+ attempt.shrinkability(),
+ attempt.justifiables(),
+ );
+
+ let cost = raw_cost(
+ metrics,
+ breakpoint,
+ ratio,
+ attempt.justify,
+ unbreakable,
+ pred.dash.is_some() && attempt.dash.is_some(),
+ false,
+ );
+
+ (ratio, cost)
+}
+
+/// Determine the stretch ratio for a line given raw metrics.
+///
+/// - A ratio < min_ratio indicates an overfull line.
+/// - A negative ratio indicates a line that needs shrinking.
+/// - A ratio of zero indicates a perfect line.
+/// - A positive ratio indicates a line that needs stretching.
+fn raw_ratio(
+ p: &Preparation,
+ available_width: Abs,
+ line_width: Abs,
+ stretchability: Abs,
+ shrinkability: Abs,
+ justifiables: usize,
+) -> f64 {
+ // Determine how much the line's spaces would need to be stretched
+ // to make it the desired width.
+ let delta = available_width - line_width;
+
+ // Determine how much stretch or shrink is natural.
+ let adjustability = if delta >= Abs::zero() { stretchability } else { shrinkability };
+
+ // Observations:
+ // - `delta` is negative for a line that needs shrinking and positive for a
+ // line that needs stretching.
+ // - `adjustability` must be non-negative to make sense.
+ // - `ratio` inherits the sign of `delta`.
+ let mut ratio = delta / adjustability.max(Abs::zero());
+
+ // The most likely cause of a NaN result is that `delta` was zero. This
+ // often happens with monospace fonts and CJK texts. It means that the line
+ // already fits perfectly, so `ratio` should be zero then.
+ if ratio.is_nan() {
+ ratio = 0.0;
+ }
+
+ // If the ratio exceeds 1, we should stretch above the natural
+ // stretchability using justifiables.
+ if ratio > 1.0 {
+ // We should stretch the line above its stretchability. Now
+ // calculate the extra amount. Also, don't divide by zero.
+ let extra_stretch = (delta - adjustability) / justifiables.max(1) as f64;
+ // Normalize the amount by half the em size.
+ ratio = 1.0 + extra_stretch / (p.size / 2.0);
+ }
+
+ // The min value must be < MIN_RATIO, but how much smaller doesn't matter
+ // since overfull lines have hard-coded huge costs anyway.
+ //
+ // The max value is clamped to 10 since it doesn't really matter whether a
+ // line is stretched 10x or 20x.
+ ratio.clamp(MIN_RATIO - 1.0, 10.0)
+}
+
+/// Compute the cost of a line given raw metrics.
+///
+/// This mostly follows the formula in the Knuth-Plass paper, but there are some
+/// adjustments.
+fn raw_cost(
+ metrics: &CostMetrics,
+ breakpoint: Breakpoint,
+ ratio: f64,
+ justify: bool,
+ unbreakable: bool,
+ consecutive_dash: bool,
+ approx: bool,
+) -> Cost {
+ // Determine the stretch/shrink cost of the line.
+ let badness = if ratio < metrics.min_ratio(approx) {
+ // Overfull line always has maximum cost.
+ 1_000_000.0
+ } else if breakpoint != Breakpoint::Mandatory || justify || ratio < 0.0 {
+ // If the line shall be justified or needs shrinking, it has normal
+ // badness with cost 100|ratio|^3. We limit the ratio to 10 as to not
+ // get to close to our maximum cost.
+ 100.0 * ratio.abs().powi(3)
+ } else {
+ // If the line shouldn't be justified and doesn't need shrink, we don't
+ // pay any cost.
+ 0.0
+ };
+
+ // Compute penalties.
+ let mut penalty = 0.0;
+
+ // Penalize runts (lone words before a mandatory break / at the end).
+ if unbreakable && breakpoint == Breakpoint::Mandatory {
+ penalty += metrics.runt_cost;
+ }
+
+ // Penalize hyphenation.
+ if let Breakpoint::Hyphen(l, r) = breakpoint {
+ // We penalize hyphenations close to the edges of the word (< LIMIT
+ // chars) extra. For each step of distance from the limit, we add 15%
+ // to the cost.
+ const LIMIT: u8 = 5;
+ let steps = LIMIT.saturating_sub(l) + LIMIT.saturating_sub(r);
+ let extra = 0.15 * steps as f64;
+ penalty += (1.0 + extra) * metrics.hyph_cost;
+ }
+
+ // Penalize two consecutive dashes extra (not necessarily hyphens).
+ // Knuth-Plass does this separately after the squaring, with a higher cost,
+ // but I couldn't find any explanation as to why.
+ if consecutive_dash {
+ penalty += metrics.hyph_cost;
+ }
+
+ // From the Knuth-Plass Paper: $ (1 + beta_j + pi_j)^2 $.
+ //
+ // We add one to minimize the number of lines when everything else is more
+ // or less equal.
+ (1.0 + badness + penalty).powi(2)
+}
+
+/// Calls `f` for all possible points in the text where lines can broken.
+///
+/// Yields for each breakpoint the text index, whether the break is mandatory
+/// (after `\n`) and whether a hyphen is required (when breaking inside of a
+/// word).
+///
+/// This is an internal instead of an external iterator because it makes the
+/// code much simpler and the consumers of this function don't need the
+/// composability and flexibility of external iteration anyway.
+fn breakpoints(p: &Preparation, mut f: impl FnMut(usize, Breakpoint)) {
+ let text = p.text;
+
+ // Single breakpoint at the end for empty text.
+ if text.is_empty() {
+ f(0, Breakpoint::Mandatory);
+ return;
+ }
+
+ let hyphenate = p.hyphenate != Some(false);
+ let lb = LINEBREAK_DATA.as_borrowed();
+ let segmenter = match p.lang {
+ Some(Lang::CHINESE | Lang::JAPANESE) => &CJ_SEGMENTER,
+ _ => &SEGMENTER,
+ };
+
+ let mut last = 0;
+ let mut iter = segmenter.segment_str(text).peekable();
+
+ loop {
+ // Special case for links. UAX #14 doesn't handle them well.
+ let (head, tail) = text.split_at(last);
+ if head.ends_with("://") || tail.starts_with("www.") {
+ let (link, _) = link_prefix(tail);
+ linebreak_link(link, |i| f(last + i, Breakpoint::Normal));
+ last += link.len();
+ while iter.peek().is_some_and(|&p| p < last) {
+ iter.next();
+ }
+ }
+
+ // Get the next UAX #14 linebreak opportunity.
+ let Some(point) = iter.next() else { break };
+
+ // Skip breakpoint if there is no char before it. icu4x generates one
+ // at offset 0, but we don't want it.
+ let Some(c) = text[..point].chars().next_back() else { continue };
+
+ // Find out whether the last break was mandatory by checking against
+ // rules LB4 and LB5, special-casing the end of text according to LB3.
+ // See also: https://docs.rs/icu_segmenter/latest/icu_segmenter/struct.LineSegmenter.html
+ let breakpoint = if point == text.len() {
+ Breakpoint::Mandatory
+ } else {
+ match lb.get(c) {
+ // Fix for: https://github.com/unicode-org/icu4x/issues/4146
+ LineBreak::Glue | LineBreak::WordJoiner | LineBreak::ZWJ => continue,
+ LineBreak::MandatoryBreak
+ | LineBreak::CarriageReturn
+ | LineBreak::LineFeed
+ | LineBreak::NextLine => Breakpoint::Mandatory,
+ _ => Breakpoint::Normal,
+ }
+ };
+
+ // Hyphenate between the last and current breakpoint.
+ if hyphenate && last < point {
+ for segment in text[last..point].split_word_bounds() {
+ if !segment.is_empty() && segment.chars().all(char::is_alphabetic) {
+ hyphenations(p, &lb, last, segment, &mut f);
+ }
+ last += segment.len();
+ }
+ }
+
+ // Call `f` for the UAX #14 break opportunity.
+ f(point, breakpoint);
+ last = point;
+ }
+}
+
+/// Generate breakpoints for hyphenations within a word.
+fn hyphenations(
+ p: &Preparation,
+ lb: &CodePointMapDataBorrowed<LineBreak>,
+ mut offset: usize,
+ word: &str,
+ mut f: impl FnMut(usize, Breakpoint),
+) {
+ let Some(lang) = lang_at(p, offset) else { return };
+ let count = word.chars().count();
+ let end = offset + word.len();
+
+ let mut chars = 0;
+ for syllable in hypher::hyphenate(word, lang) {
+ offset += syllable.len();
+ chars += syllable.chars().count();
+
+ // Don't hyphenate after the final syllable.
+ if offset == end {
+ continue;
+ }
+
+ // Filter out hyphenation opportunities where hyphenation was actually
+ // disabled.
+ if !hyphenate_at(p, offset) {
+ continue;
+ }
+
+ // Filter out forbidden hyphenation opportunities.
+ if matches!(
+ syllable.chars().next_back().map(|c| lb.get(c)),
+ Some(LineBreak::Glue | LineBreak::WordJoiner | LineBreak::ZWJ)
+ ) {
+ continue;
+ }
+
+ // Determine the number of codepoints before and after the hyphenation.
+ let l = chars.saturating_as::<u8>();
+ let r = (count - chars).saturating_as::<u8>();
+
+ // Call `f` for the word-internal hyphenation opportunity.
+ f(offset, Breakpoint::Hyphen(l, r));
+ }
+}
+
+/// Produce linebreak opportunities for a link.
+fn linebreak_link(link: &str, mut f: impl FnMut(usize)) {
+ #[derive(PartialEq)]
+ enum Class {
+ Alphabetic,
+ Digit,
+ Open,
+ Other,
+ }
+
+ impl Class {
+ fn of(c: char) -> Self {
+ if c.is_alphabetic() {
+ Class::Alphabetic
+ } else if c.is_numeric() {
+ Class::Digit
+ } else if matches!(c, '(' | '[') {
+ Class::Open
+ } else {
+ Class::Other
+ }
+ }
+ }
+
+ let mut offset = 0;
+ let mut prev = Class::Other;
+
+ for (end, c) in link.char_indices() {
+ let class = Class::of(c);
+
+ // Emit opportunities when going from
+ // - other -> other
+ // - alphabetic -> numeric
+ // - numeric -> alphabetic
+ // Never before/after opening delimiters.
+ if end > 0
+ && prev != Class::Open
+ && if class == Class::Other { prev == Class::Other } else { class != prev }
+ {
+ let piece = &link[offset..end];
+ if piece.len() < 16 {
+ // For bearably long segments, emit them as one.
+ offset = end;
+ f(offset);
+ } else {
+ // If it gets very long (e.g. a hash in the URL), just allow a
+ // break at every char.
+ for c in piece.chars() {
+ offset += c.len_utf8();
+ f(offset);
+ }
+ }
+ }
+
+ prev = class;
+ }
+}
+
+/// Whether hyphenation is enabled at the given offset.
+fn hyphenate_at(p: &Preparation, offset: usize) -> bool {
+ p.hyphenate
+ .or_else(|| {
+ let (_, item) = p.get(offset);
+ let styles = item.text()?.styles;
+ Some(TextElem::hyphenate_in(styles))
+ })
+ .unwrap_or(false)
+}
+
+/// The text language at the given offset.
+fn lang_at(p: &Preparation, offset: usize) -> Option<hypher::Lang> {
+ let lang = p.lang.or_else(|| {
+ let (_, item) = p.get(offset);
+ let styles = item.text()?.styles;
+ Some(TextElem::lang_in(styles))
+ })?;
+
+ let bytes = lang.as_str().as_bytes().try_into().ok()?;
+ hypher::Lang::from_iso(bytes)
+}
+
+/// Resolved metrics relevant for cost computation.
+struct CostMetrics {
+ min_ratio: f64,
+ min_approx_ratio: f64,
+ approx_hyphen_width: Abs,
+ hyph_cost: Cost,
+ runt_cost: Cost,
+}
+
+impl CostMetrics {
+ /// Compute shared metrics for paragraph optimization.
+ fn compute(p: &Preparation) -> Self {
+ Self {
+ // When justifying, we may stretch spaces below their natural width.
+ min_ratio: if p.justify { MIN_RATIO } else { 0.0 },
+ min_approx_ratio: if p.justify { MIN_APPROX_RATIO } else { 0.0 },
+ // Approximate hyphen width for estimates.
+ approx_hyphen_width: Em::new(0.33).at(p.size),
+ // Costs.
+ hyph_cost: DEFAULT_HYPH_COST * p.costs.hyphenation().get(),
+ runt_cost: DEFAULT_RUNT_COST * p.costs.runt().get(),
+ }
+ }
+
+ /// The minimum line ratio we allow for shrinking. For approximate layout,
+ /// we allow less because otherwise we get an invalid layout fairly often,
+ /// which makes our bound useless.
+ fn min_ratio(&self, approx: bool) -> f64 {
+ if approx {
+ self.min_approx_ratio
+ } else {
+ self.min_ratio
+ }
+ }
+}
+
+/// Estimated line metrics.
+///
+/// Allows to get a quick estimate of a metric for a line between two byte
+/// positions.
+struct Estimates {
+ widths: CumulativeVec<Abs>,
+ stretchability: CumulativeVec<Abs>,
+ shrinkability: CumulativeVec<Abs>,
+ justifiables: CumulativeVec<usize>,
+}
+
+impl Estimates {
+ /// Compute estimations for approximate Knuth-Plass layout.
+ fn compute(p: &Preparation) -> Self {
+ let cap = p.text.len();
+
+ let mut widths = CumulativeVec::with_capacity(cap);
+ let mut stretchability = CumulativeVec::with_capacity(cap);
+ let mut shrinkability = CumulativeVec::with_capacity(cap);
+ let mut justifiables = CumulativeVec::with_capacity(cap);
+
+ for (range, item) in p.items.iter() {
+ if let Item::Text(shaped) = item {
+ for g in shaped.glyphs.iter() {
+ let byte_len = g.range.len();
+ let stretch = g.stretchability().0 + g.stretchability().1;
+ let shrink = g.shrinkability().0 + g.shrinkability().1;
+ widths.push(byte_len, g.x_advance.at(shaped.size));
+ stretchability.push(byte_len, stretch.at(shaped.size));
+ shrinkability.push(byte_len, shrink.at(shaped.size));
+ justifiables.push(byte_len, g.is_justifiable() as usize);
+ }
+ } else {
+ widths.push(range.len(), item.natural_width());
+ }
+
+ widths.adjust(range.end);
+ stretchability.adjust(range.end);
+ shrinkability.adjust(range.end);
+ justifiables.adjust(range.end);
+ }
+
+ Self {
+ widths,
+ stretchability,
+ shrinkability,
+ justifiables,
+ }
+ }
+}
+
+/// An accumulative array of a metric.
+struct CumulativeVec<T> {
+ total: T,
+ summed: Vec<T>,
+}
+
+impl<T> CumulativeVec<T>
+where
+ T: Default + Copy + Add<Output = T> + Sub<Output = T>,
+{
+ /// Create a new instance with the given capacity.
+ fn with_capacity(capacity: usize) -> Self {
+ let total = T::default();
+ let mut summed = Vec::with_capacity(capacity);
+ summed.push(total);
+ Self { total, summed }
+ }
+
+ /// Adjust to cover the given byte length.
+ fn adjust(&mut self, len: usize) {
+ self.summed.resize(len, self.total);
+ }
+
+ /// Adds a new segment with the given byte length and metric.
+ fn push(&mut self, byte_len: usize, metric: T) {
+ self.total = self.total + metric;
+ for _ in 0..byte_len {
+ self.summed.push(self.total);
+ }
+ }
+
+ /// Estimates the metrics for the line spanned by the range.
+ fn estimate(&self, range: Range) -> T {
+ self.get(range.end) - self.get(range.start)
+ }
+
+ /// Get the metric at the given byte position.
+ fn get(&self, index: usize) -> T {
+ match index.checked_sub(1) {
+ None => T::default(),
+ Some(i) => self.summed[i],
+ }
+ }
+}
diff --git a/crates/typst-layout/src/inline/mod.rs b/crates/typst-layout/src/inline/mod.rs
new file mode 100644
index 00000000..658e3084
--- /dev/null
+++ b/crates/typst-layout/src/inline/mod.rs
@@ -0,0 +1,105 @@
+#[path = "box.rs"]
+mod box_;
+mod collect;
+mod deco;
+mod finalize;
+mod line;
+mod linebreak;
+mod prepare;
+mod shaping;
+
+pub use self::box_::layout_box;
+
+use comemo::{Track, Tracked, TrackedMut};
+use typst_library::diag::SourceResult;
+use typst_library::engine::{Engine, Route, Sink, Traced};
+use typst_library::foundations::{StyleChain, StyleVec};
+use typst_library::introspection::{Introspector, Locator, LocatorLink};
+use typst_library::layout::{Fragment, Size};
+use typst_library::model::ParElem;
+use typst_library::routines::Routines;
+use typst_library::World;
+
+use self::collect::{collect, Item, Segment, SpanMapper};
+use self::deco::decorate;
+use self::finalize::finalize;
+use self::line::{commit, line, Line};
+use self::linebreak::{linebreak, Breakpoint};
+use self::prepare::{prepare, Preparation};
+use self::shaping::{
+ cjk_punct_style, is_of_cj_script, shape_range, ShapedGlyph, ShapedText,
+ BEGIN_PUNCT_PAT, END_PUNCT_PAT,
+};
+
+/// Range of a substring of text.
+type Range = std::ops::Range<usize>;
+
+/// Layouts content inline.
+pub fn layout_inline(
+ engine: &mut Engine,
+ children: &StyleVec,
+ locator: Locator,
+ styles: StyleChain,
+ consecutive: bool,
+ region: Size,
+ expand: bool,
+) -> SourceResult<Fragment> {
+ layout_inline_impl(
+ children,
+ engine.routines,
+ engine.world,
+ engine.introspector,
+ engine.traced,
+ TrackedMut::reborrow_mut(&mut engine.sink),
+ engine.route.track(),
+ locator.track(),
+ styles,
+ consecutive,
+ region,
+ expand,
+ )
+}
+
+/// The internal, memoized implementation of `layout_inline`.
+#[comemo::memoize]
+#[allow(clippy::too_many_arguments)]
+fn layout_inline_impl(
+ children: &StyleVec,
+ routines: &Routines,
+ world: Tracked<dyn World + '_>,
+ introspector: Tracked<Introspector>,
+ traced: Tracked<Traced>,
+ sink: TrackedMut<Sink>,
+ route: Tracked<Route>,
+ locator: Tracked<Locator>,
+ styles: StyleChain,
+ consecutive: bool,
+ region: Size,
+ expand: bool,
+) -> SourceResult<Fragment> {
+ let link = LocatorLink::new(locator);
+ let locator = Locator::link(&link);
+ let mut engine = Engine {
+ routines,
+ world,
+ introspector,
+ traced,
+ sink,
+ route: Route::extend(route),
+ };
+
+ let mut locator = locator.split();
+
+ // Collect all text into one string for BiDi analysis.
+ let (text, segments, spans) =
+ collect(children, &mut engine, &mut locator, &styles, region, consecutive)?;
+
+ // Perform BiDi analysis and then prepares paragraph layout.
+ let p = prepare(&mut engine, children, &text, segments, spans, styles)?;
+
+ // Break the paragraph into lines.
+ let lines = linebreak(&engine, &p, region.x - p.hang);
+
+ // Turn the selected lines into frames.
+ finalize(&mut engine, &p, &lines, styles, region, expand, &mut locator)
+}
diff --git a/crates/typst-layout/src/inline/prepare.rs b/crates/typst-layout/src/inline/prepare.rs
new file mode 100644
index 00000000..2dd79aec
--- /dev/null
+++ b/crates/typst-layout/src/inline/prepare.rs
@@ -0,0 +1,196 @@
+use typst_library::foundations::{Resolve, Smart};
+use typst_library::layout::{Abs, AlignElem, Dir, Em, FixedAlignment};
+use typst_library::model::Linebreaks;
+use typst_library::text::{Costs, Lang, TextElem};
+use unicode_bidi::{BidiInfo, Level as BidiLevel};
+
+use super::*;
+
+/// A paragraph representation in which children are already layouted and text
+/// is already preshaped.
+///
+/// In many cases, we can directly reuse these results when constructing a line.
+/// Only when a line break falls onto a text index that is not safe-to-break per
+/// rustybuzz, we have to reshape that portion.
+pub struct Preparation<'a> {
+ /// The paragraph's full text.
+ pub text: &'a str,
+ /// Bidirectional text embedding levels for the paragraph.
+ ///
+ /// This is `None` if the paragraph is BiDi-uniform (all the base direction).
+ pub bidi: Option<BidiInfo<'a>>,
+ /// Text runs, spacing and layouted elements.
+ pub items: Vec<(Range, Item<'a>)>,
+ /// Maps from byte indices to item indices.
+ pub indices: Vec<usize>,
+ /// The span mapper.
+ pub spans: SpanMapper,
+ /// Whether to hyphenate if it's the same for all children.
+ pub hyphenate: Option<bool>,
+ /// Costs for various layout decisions.
+ pub costs: Costs,
+ /// The dominant direction.
+ pub dir: Dir,
+ /// The text language if it's the same for all children.
+ pub lang: Option<Lang>,
+ /// The paragraph's resolved horizontal alignment.
+ pub align: FixedAlignment,
+ /// Whether to justify the paragraph.
+ pub justify: bool,
+ /// The paragraph's hanging indent.
+ pub hang: Abs,
+ /// Whether to add spacing between CJK and Latin characters.
+ pub cjk_latin_spacing: bool,
+ /// Whether font fallback is enabled for this paragraph.
+ pub fallback: bool,
+ /// How to determine line breaks.
+ pub linebreaks: Smart<Linebreaks>,
+ /// The text size.
+ pub size: Abs,
+}
+
+impl<'a> Preparation<'a> {
+ /// Get the item that contains the given `text_offset`.
+ pub fn get(&self, offset: usize) -> &(Range, Item<'a>) {
+ let idx = self.indices.get(offset).copied().unwrap_or(0);
+ &self.items[idx]
+ }
+
+ /// Iterate over the items that intersect the given `sliced` range.
+ pub fn slice(&self, sliced: Range) -> impl Iterator<Item = &(Range, Item<'a>)> {
+ // Usually, we don't want empty-range items at the start of the line
+ // (because they will be part of the previous line), but for the first
+ // line, we need to keep them.
+ let start = match sliced.start {
+ 0 => 0,
+ n => self.indices.get(n).copied().unwrap_or(0),
+ };
+ self.items[start..].iter().take_while(move |(range, _)| {
+ range.start < sliced.end || range.end <= sliced.end
+ })
+ }
+}
+
+/// Performs BiDi analysis and then prepares paragraph layout by building a
+/// representation on which we can do line breaking without layouting each and
+/// every line from scratch.
+#[typst_macros::time]
+pub fn prepare<'a>(
+ engine: &mut Engine,
+ children: &'a StyleVec,
+ text: &'a str,
+ segments: Vec<Segment<'a>>,
+ spans: SpanMapper,
+ styles: StyleChain<'a>,
+) -> SourceResult<Preparation<'a>> {
+ let dir = TextElem::dir_in(styles);
+ let default_level = match dir {
+ Dir::RTL => BidiLevel::rtl(),
+ _ => BidiLevel::ltr(),
+ };
+
+ let bidi = BidiInfo::new(text, Some(default_level));
+ let is_bidi = bidi
+ .levels
+ .iter()
+ .any(|level| level.is_ltr() != default_level.is_ltr());
+
+ let mut cursor = 0;
+ let mut items = Vec::with_capacity(segments.len());
+
+ // Shape the text to finalize the items.
+ for segment in segments {
+ let len = segment.textual_len();
+ let end = cursor + len;
+ let range = cursor..end;
+
+ match segment {
+ Segment::Text(_, styles) => {
+ shape_range(&mut items, engine, text, &bidi, range, styles);
+ }
+ Segment::Item(item) => items.push((range, item)),
+ }
+
+ cursor = end;
+ }
+
+ // Build the mapping from byte to item indices.
+ let mut indices = Vec::with_capacity(text.len());
+ for (i, (range, _)) in items.iter().enumerate() {
+ indices.extend(range.clone().map(|_| i));
+ }
+
+ let cjk_latin_spacing = TextElem::cjk_latin_spacing_in(styles).is_auto();
+ if cjk_latin_spacing {
+ add_cjk_latin_spacing(&mut items);
+ }
+
+ Ok(Preparation {
+ text,
+ bidi: is_bidi.then_some(bidi),
+ items,
+ indices,
+ spans,
+ hyphenate: children.shared_get(styles, TextElem::hyphenate_in),
+ costs: TextElem::costs_in(styles),
+ dir,
+ lang: children.shared_get(styles, TextElem::lang_in),
+ align: AlignElem::alignment_in(styles).resolve(styles).x,
+ justify: ParElem::justify_in(styles),
+ hang: ParElem::hanging_indent_in(styles),
+ cjk_latin_spacing,
+ fallback: TextElem::fallback_in(styles),
+ linebreaks: ParElem::linebreaks_in(styles),
+ size: TextElem::size_in(styles),
+ })
+}
+
+/// Add some spacing between Han characters and western characters. See
+/// Requirements for Chinese Text Layout, Section 3.2.2 Mixed Text Composition
+/// in Horizontal Written Mode
+fn add_cjk_latin_spacing(items: &mut [(Range, Item)]) {
+ let mut items = items
+ .iter_mut()
+ .filter(|(_, x)| !matches!(x, Item::Tag(_)))
+ .peekable();
+
+ let mut prev: Option<&ShapedGlyph> = None;
+ while let Some((_, item)) = items.next() {
+ let Some(text) = item.text_mut() else {
+ prev = None;
+ continue;
+ };
+
+ // Since we only call this function in [`prepare`], we can assume that
+ // the Cow is owned, and `to_mut` can be called without overhead.
+ debug_assert!(matches!(text.glyphs, std::borrow::Cow::Owned(_)));
+ let mut glyphs = text.glyphs.to_mut().iter_mut().peekable();
+
+ while let Some(glyph) = glyphs.next() {
+ let next = glyphs.peek().map(|n| n as _).or_else(|| {
+ items
+ .peek()
+ .and_then(|(_, i)| i.text())
+ .and_then(|shaped| shaped.glyphs.first())
+ });
+
+ // Case 1: CJ followed by a Latin character
+ if glyph.is_cj_script() && next.is_some_and(|g| g.is_letter_or_number()) {
+ // The spacing is default to 1/4 em, and can be shrunk to 1/8 em.
+ glyph.x_advance += Em::new(0.25);
+ glyph.adjustability.shrinkability.1 += Em::new(0.125);
+ text.width += Em::new(0.25).at(text.size);
+ }
+
+ // Case 2: Latin followed by a CJ character
+ if glyph.is_cj_script() && prev.is_some_and(|g| g.is_letter_or_number()) {
+ glyph.x_advance += Em::new(0.25);
+ glyph.x_offset += Em::new(0.25);
+ glyph.adjustability.shrinkability.0 += Em::new(0.125);
+ text.width += Em::new(0.25).at(text.size);
+ }
+
+ prev = Some(glyph);
+ }
+ }
+}
diff --git a/crates/typst-layout/src/inline/shaping.rs b/crates/typst-layout/src/inline/shaping.rs
new file mode 100644
index 00000000..bd803b52
--- /dev/null
+++ b/crates/typst-layout/src/inline/shaping.rs
@@ -0,0 +1,1175 @@
+use std::borrow::Cow;
+use std::fmt::{self, Debug, Formatter};
+use std::str::FromStr;
+use std::sync::Arc;
+
+use az::SaturatingAs;
+use ecow::EcoString;
+use rustybuzz::{BufferFlags, ShapePlan, UnicodeBuffer};
+use ttf_parser::Tag;
+use typst_library::engine::Engine;
+use typst_library::foundations::{Smart, StyleChain};
+use typst_library::layout::{Abs, Dir, Em, Frame, FrameItem, Point, Size};
+use typst_library::text::{
+ families, features, is_default_ignorable, variant, Font, FontVariant, Glyph, Lang,
+ Region, TextEdgeBounds, TextElem, TextItem,
+};
+use typst_library::World;
+use typst_utils::SliceExt;
+use unicode_bidi::{BidiInfo, Level as BidiLevel};
+use unicode_script::{Script, UnicodeScript};
+
+use super::{decorate, Item, Range, SpanMapper};
+
+/// The result of shaping text.
+///
+/// This type contains owned or borrowed shaped text runs, which can be
+/// measured, used to reshape substrings more quickly and converted into a
+/// frame.
+#[derive(Clone)]
+pub struct ShapedText<'a> {
+ /// The start of the text in the full paragraph.
+ pub base: usize,
+ /// The text that was shaped.
+ pub text: &'a str,
+ /// The text direction.
+ pub dir: Dir,
+ /// The text language.
+ pub lang: Lang,
+ /// The text region.
+ pub region: Option<Region>,
+ /// The text's style properties.
+ pub styles: StyleChain<'a>,
+ /// The font variant.
+ pub variant: FontVariant,
+ /// The font size.
+ pub size: Abs,
+ /// The width of the text's bounding box.
+ pub width: Abs,
+ /// The shaped glyphs.
+ pub glyphs: Cow<'a, [ShapedGlyph]>,
+}
+
+/// A single glyph resulting from shaping.
+#[derive(Debug, Clone)]
+pub struct ShapedGlyph {
+ /// The font the glyph is contained in.
+ pub font: Font,
+ /// The glyph's index in the font.
+ pub glyph_id: u16,
+ /// The advance width of the glyph.
+ pub x_advance: Em,
+ /// The horizontal offset of the glyph.
+ pub x_offset: Em,
+ /// The vertical offset of the glyph.
+ pub y_offset: Em,
+ /// The adjustability of the glyph.
+ pub adjustability: Adjustability,
+ /// The byte range of this glyph's cluster in the full paragraph. A cluster
+ /// is a sequence of one or multiple glyphs that cannot be separated and
+ /// must always be treated as a union.
+ ///
+ /// The range values of the glyphs in a [`ShapedText`] should not overlap
+ /// with each other, and they should be monotonically increasing (for
+ /// left-to-right or top-to-bottom text) or monotonically decreasing (for
+ /// right-to-left or bottom-to-top text).
+ pub range: Range,
+ /// Whether splitting the shaping result before this glyph would yield the
+ /// same results as shaping the parts to both sides of `text_index`
+ /// separately.
+ pub safe_to_break: bool,
+ /// The first char in this glyph's cluster.
+ pub c: char,
+ /// Whether this glyph is justifiable for CJK scripts.
+ pub is_justifiable: bool,
+ /// The script of the glyph.
+ pub script: Script,
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct Adjustability {
+ /// The left and right stretchability
+ pub stretchability: (Em, Em),
+ /// The left and right shrinkability
+ pub shrinkability: (Em, Em),
+}
+
+impl ShapedGlyph {
+ /// Whether the glyph is a space.
+ pub fn is_space(&self) -> bool {
+ is_space(self.c)
+ }
+
+ /// Whether the glyph is justifiable.
+ pub fn is_justifiable(&self) -> bool {
+ // GB style is not relevant here.
+ self.is_justifiable
+ }
+
+ /// Whether the glyph is part of Chinese or Japanese script (i.e. CJ, not CJK).
+ pub fn is_cj_script(&self) -> bool {
+ is_cj_script(self.c, self.script)
+ }
+
+ pub fn is_cjk_punctuation(&self) -> bool {
+ self.is_cjk_left_aligned_punctuation(CjkPunctStyle::Gb)
+ || self.is_cjk_right_aligned_punctuation()
+ || self.is_cjk_center_aligned_punctuation(CjkPunctStyle::Gb)
+ }
+
+ /// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
+ pub fn is_cjk_left_aligned_punctuation(&self, style: CjkPunctStyle) -> bool {
+ is_cjk_left_aligned_punctuation(
+ self.c,
+ self.x_advance,
+ self.stretchability(),
+ style,
+ )
+ }
+
+ /// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
+ pub fn is_cjk_right_aligned_punctuation(&self) -> bool {
+ is_cjk_right_aligned_punctuation(self.c, self.x_advance, self.stretchability())
+ }
+
+ /// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
+ pub fn is_cjk_center_aligned_punctuation(&self, style: CjkPunctStyle) -> bool {
+ is_cjk_center_aligned_punctuation(self.c, style)
+ }
+
+ /// Whether the glyph is a western letter or number.
+ pub fn is_letter_or_number(&self) -> bool {
+ matches!(self.c.script(), Script::Latin | Script::Greek | Script::Cyrillic)
+ || matches!(self.c, '#' | '$' | '%' | '&')
+ || self.c.is_ascii_digit()
+ }
+
+ pub fn base_adjustability(&self, style: CjkPunctStyle) -> Adjustability {
+ let width = self.x_advance;
+ if self.is_space() {
+ Adjustability {
+ // The number for spaces is from Knuth-Plass' paper
+ stretchability: (Em::zero(), width / 2.0),
+ shrinkability: (Em::zero(), width / 3.0),
+ }
+ } else if self.is_cjk_left_aligned_punctuation(style) {
+ Adjustability {
+ stretchability: (Em::zero(), Em::zero()),
+ shrinkability: (Em::zero(), width / 2.0),
+ }
+ } else if self.is_cjk_right_aligned_punctuation() {
+ Adjustability {
+ stretchability: (Em::zero(), Em::zero()),
+ shrinkability: (width / 2.0, Em::zero()),
+ }
+ } else if self.is_cjk_center_aligned_punctuation(style) {
+ Adjustability {
+ stretchability: (Em::zero(), Em::zero()),
+ shrinkability: (width / 4.0, width / 4.0),
+ }
+ } else {
+ Adjustability::default()
+ }
+ }
+
+ /// The stretchability of the character.
+ pub fn stretchability(&self) -> (Em, Em) {
+ self.adjustability.stretchability
+ }
+
+ /// The shrinkability of the character.
+ pub fn shrinkability(&self) -> (Em, Em) {
+ self.adjustability.shrinkability
+ }
+
+ /// Shrink the width of glyph on the left side.
+ pub fn shrink_left(&mut self, amount: Em) {
+ self.x_offset -= amount;
+ self.x_advance -= amount;
+ self.adjustability.shrinkability.0 -= amount;
+ }
+
+ /// Shrink the width of glyph on the right side.
+ pub fn shrink_right(&mut self, amount: Em) {
+ self.x_advance -= amount;
+ self.adjustability.shrinkability.1 -= amount;
+ }
+}
+
+/// A side you can go toward.
+enum Side {
+ /// To the left-hand side.
+ Left,
+ /// To the right-hand side.
+ Right,
+}
+
+impl<'a> ShapedText<'a> {
+ /// Build the shaped text's frame.
+ ///
+ /// The `justification` defines how much extra advance width each
+ /// [justifiable glyph](ShapedGlyph::is_justifiable) will get.
+ pub fn build(
+ &self,
+ engine: &Engine,
+ spans: &SpanMapper,
+ justification_ratio: f64,
+ extra_justification: Abs,
+ ) -> Frame {
+ let (top, bottom) = self.measure(engine);
+ let size = Size::new(self.width, top + bottom);
+
+ let mut offset = Abs::zero();
+ let mut frame = Frame::soft(size);
+ frame.set_baseline(top);
+
+ let shift = TextElem::baseline_in(self.styles);
+ let decos = TextElem::deco_in(self.styles);
+ let fill = TextElem::fill_in(self.styles);
+ let stroke = TextElem::stroke_in(self.styles);
+ let span_offset = TextElem::span_offset_in(self.styles);
+
+ for ((font, y_offset), group) in
+ self.glyphs.as_ref().group_by_key(|g| (g.font.clone(), g.y_offset))
+ {
+ let mut range = group[0].range.clone();
+ for glyph in group {
+ range.start = range.start.min(glyph.range.start);
+ range.end = range.end.max(glyph.range.end);
+ }
+
+ let pos = Point::new(offset, top + shift - y_offset.at(self.size));
+ let glyphs: Vec<Glyph> = group
+ .iter()
+ .map(|shaped: &ShapedGlyph| {
+ let adjustability_left = if justification_ratio < 0.0 {
+ shaped.shrinkability().0
+ } else {
+ shaped.stretchability().0
+ };
+ let adjustability_right = if justification_ratio < 0.0 {
+ shaped.shrinkability().1
+ } else {
+ shaped.stretchability().1
+ };
+
+ let justification_left = adjustability_left * justification_ratio;
+ let mut justification_right =
+ adjustability_right * justification_ratio;
+ if shaped.is_justifiable() {
+ justification_right +=
+ Em::from_length(extra_justification, self.size)
+ }
+
+ frame.size_mut().x += justification_left.at(self.size)
+ + justification_right.at(self.size);
+
+ // We may not be able to reach the offset completely if
+ // it exceeds u16, but better to have a roughly correct
+ // span offset than nothing.
+ let mut span = spans.span_at(shaped.range.start);
+ span.1 = span.1.saturating_add(span_offset.saturating_as());
+
+ // |<---- a Glyph ---->|
+ // -->|ShapedGlyph|<--
+ // +---+-----------+---+
+ // | | *********| |
+ // | | * | |
+ // | | * ****| |
+ // | | * *| |
+ // | | *********| |
+ // +---+--+--------+---+
+ // A B C D
+ // Note A, B, D could be positive, zero, or negative.
+ // A: justification_left
+ // B: ShapedGlyph's x_offset
+ // (though a small part of the glyph may go inside B)
+ // B+C: ShapedGlyph's x_advance
+ // D: justification_right
+ // A+B: Glyph's x_offset
+ // A+B+C+D: Glyph's x_advance
+ Glyph {
+ id: shaped.glyph_id,
+ x_advance: shaped.x_advance
+ + justification_left
+ + justification_right,
+ x_offset: shaped.x_offset + justification_left,
+ range: (shaped.range.start - range.start).saturating_as()
+ ..(shaped.range.end - range.start).saturating_as(),
+ span,
+ }
+ })
+ .collect();
+
+ let item = TextItem {
+ font,
+ size: self.size,
+ lang: self.lang,
+ region: self.region,
+ fill: fill.clone(),
+ stroke: stroke.clone().map(|s| s.unwrap_or_default()),
+ text: self.text[range.start - self.base..range.end - self.base].into(),
+ glyphs,
+ };
+
+ let width = item.width();
+ if decos.is_empty() {
+ frame.push(pos, FrameItem::Text(item));
+ } else {
+ // Apply line decorations.
+ frame.push(pos, FrameItem::Text(item.clone()));
+ for deco in &decos {
+ decorate(&mut frame, deco, &item, width, shift, pos);
+ }
+ }
+
+ offset += width;
+ }
+
+ frame
+ }
+
+ /// Measure the top and bottom extent of this text.
+ pub fn measure(&self, engine: &Engine) -> (Abs, Abs) {
+ let mut top = Abs::zero();
+ let mut bottom = Abs::zero();
+
+ let top_edge = TextElem::top_edge_in(self.styles);
+ let bottom_edge = TextElem::bottom_edge_in(self.styles);
+
+ // Expand top and bottom by reading the font's vertical metrics.
+ let mut expand = |font: &Font, bounds: TextEdgeBounds| {
+ let (t, b) = font.edges(top_edge, bottom_edge, self.size, bounds);
+ top.set_max(t);
+ bottom.set_max(b);
+ };
+
+ if self.glyphs.is_empty() {
+ // When there are no glyphs, we just use the vertical metrics of the
+ // first available font.
+ let world = engine.world;
+ for family in families(self.styles) {
+ if let Some(font) = world
+ .book()
+ .select(family, self.variant)
+ .and_then(|id| world.font(id))
+ {
+ expand(&font, TextEdgeBounds::Zero);
+ break;
+ }
+ }
+ } else {
+ for g in self.glyphs.iter() {
+ expand(&g.font, TextEdgeBounds::Glyph(g.glyph_id));
+ }
+ }
+
+ (top, bottom)
+ }
+
+ /// How many glyphs are in the text where we can insert additional
+ /// space when encountering underfull lines.
+ pub fn justifiables(&self) -> usize {
+ self.glyphs.iter().filter(|g| g.is_justifiable()).count()
+ }
+
+ /// Whether the last glyph is a CJK character which should not be justified
+ /// on line end.
+ pub fn cjk_justifiable_at_last(&self) -> bool {
+ self.glyphs
+ .last()
+ .map(|g| g.is_cj_script() || g.is_cjk_punctuation())
+ .unwrap_or(false)
+ }
+
+ /// The stretchability of the text.
+ pub fn stretchability(&self) -> Abs {
+ self.glyphs
+ .iter()
+ .map(|g| g.stretchability().0 + g.stretchability().1)
+ .sum::<Em>()
+ .at(self.size)
+ }
+
+ /// The shrinkability of the text
+ pub fn shrinkability(&self) -> Abs {
+ self.glyphs
+ .iter()
+ .map(|g| g.shrinkability().0 + g.shrinkability().1)
+ .sum::<Em>()
+ .at(self.size)
+ }
+
+ /// Reshape a range of the shaped text, reusing information from this
+ /// shaping process if possible.
+ ///
+ /// The text `range` is relative to the whole paragraph.
+ pub fn reshape(&'a self, engine: &Engine, text_range: Range) -> ShapedText<'a> {
+ let text = &self.text[text_range.start - self.base..text_range.end - self.base];
+ if let Some(glyphs) = self.slice_safe_to_break(text_range.clone()) {
+ #[cfg(debug_assertions)]
+ assert_all_glyphs_in_range(glyphs, text, text_range.clone());
+ Self {
+ base: text_range.start,
+ text,
+ dir: self.dir,
+ lang: self.lang,
+ region: self.region,
+ styles: self.styles,
+ size: self.size,
+ variant: self.variant,
+ width: glyphs.iter().map(|g| g.x_advance).sum::<Em>().at(self.size),
+ glyphs: Cow::Borrowed(glyphs),
+ }
+ } else {
+ shape(
+ engine,
+ text_range.start,
+ text,
+ self.styles,
+ self.dir,
+ self.lang,
+ self.region,
+ )
+ }
+ }
+
+ /// Derive an empty text run with the same properties as this one.
+ pub fn empty(&self) -> Self {
+ Self {
+ text: "",
+ width: Abs::zero(),
+ glyphs: Cow::Borrowed(&[]),
+ ..*self
+ }
+ }
+
+ /// Push a hyphen to end of the text.
+ pub fn push_hyphen(&mut self, engine: &Engine, fallback: bool) {
+ self.insert_hyphen(engine, fallback, Side::Right)
+ }
+
+ /// Prepend a hyphen to start of the text.
+ pub fn prepend_hyphen(&mut self, engine: &Engine, fallback: bool) {
+ self.insert_hyphen(engine, fallback, Side::Left)
+ }
+
+ fn insert_hyphen(&mut self, engine: &Engine, fallback: bool, side: Side) {
+ let world = engine.world;
+ let book = world.book();
+ let fallback_func = if fallback {
+ Some(|| book.select_fallback(None, self.variant, "-"))
+ } else {
+ None
+ };
+ let mut chain = families(self.styles)
+ .map(|family| book.select(family, self.variant))
+ .chain(fallback_func.iter().map(|f| f()))
+ .flatten();
+
+ chain.find_map(|id| {
+ let font = world.font(id)?;
+ let ttf = font.ttf();
+ let glyph_id = ttf.glyph_index('-')?;
+ let x_advance = font.to_em(ttf.glyph_hor_advance(glyph_id)?);
+ let range = match side {
+ Side::Left => self.glyphs.first().map(|g| g.range.start..g.range.start),
+ Side::Right => self.glyphs.last().map(|g| g.range.end..g.range.end),
+ }
+ // In the unlikely chance that we hyphenate after an empty line,
+ // ensure that the glyph range still falls after self.base so
+ // that subtracting either of the endpoints by self.base doesn't
+ // underflow. See <https://github.com/typst/typst/issues/2283>.
+ .unwrap_or_else(|| self.base..self.base);
+ self.width += x_advance.at(self.size);
+ let glyph = ShapedGlyph {
+ font,
+ glyph_id: glyph_id.0,
+ x_advance,
+ x_offset: Em::zero(),
+ y_offset: Em::zero(),
+ adjustability: Adjustability::default(),
+ range,
+ safe_to_break: true,
+ c: '-',
+ is_justifiable: false,
+ script: Script::Common,
+ };
+ match side {
+ Side::Left => self.glyphs.to_mut().insert(0, glyph),
+ Side::Right => self.glyphs.to_mut().push(glyph),
+ }
+ Some(())
+ });
+ }
+
+ /// Find the subslice of glyphs that represent the given text range if both
+ /// sides are safe to break.
+ fn slice_safe_to_break(&self, text_range: Range) -> Option<&[ShapedGlyph]> {
+ let Range { mut start, mut end } = text_range;
+ if !self.dir.is_positive() {
+ std::mem::swap(&mut start, &mut end);
+ }
+
+ let left = self.find_safe_to_break(start)?;
+ let right = self.find_safe_to_break(end)?;
+ Some(&self.glyphs[left..right])
+ }
+
+ /// Find the glyph offset matching the text index that is most towards the
+ /// start of the text and safe-to-break.
+ fn find_safe_to_break(&self, text_index: usize) -> Option<usize> {
+ let ltr = self.dir.is_positive();
+
+ // Handle edge cases.
+ let len = self.glyphs.len();
+ if text_index == self.base {
+ return Some(if ltr { 0 } else { len });
+ } else if text_index == self.base + self.text.len() {
+ return Some(if ltr { len } else { 0 });
+ }
+
+ // Find any glyph with the text index.
+ let found = self.glyphs.binary_search_by(|g: &ShapedGlyph| {
+ let ordering = g.range.start.cmp(&text_index);
+ if ltr {
+ ordering
+ } else {
+ ordering.reverse()
+ }
+ });
+
+ let mut idx = match found {
+ Ok(idx) => idx,
+ Err(idx) => {
+ // Handle the special case where we break before a '\n'
+ //
+ // For example: (assume `a` is a CJK character with three bytes)
+ // text: " a \n b "
+ // index: 0 1 2 3 4 5
+ // text_index: ^
+ // glyphs: 0 . 1
+ //
+ // We will get found = Err(1), because '\n' does not have a
+ // glyph. But it's safe to break here. Thus the following
+ // condition:
+ // - glyphs[0].end == text_index == 3
+ // - text[3] == '\n'
+ return (idx > 0
+ && self.glyphs[idx - 1].range.end == text_index
+ && self.text[text_index - self.base..].starts_with('\n'))
+ .then_some(idx);
+ }
+ };
+
+ // Search for the start-most glyph with the text index. This means
+ // we take empty range glyphs at the start and leave those at the end
+ // for the next line.
+ let dec = if ltr { usize::checked_sub } else { usize::checked_add };
+ while let Some(next) = dec(idx, 1) {
+ if self.glyphs.get(next).map_or(true, |g| g.range.start != text_index) {
+ break;
+ }
+ idx = next;
+ }
+
+ // RTL needs offset one because the left side of the range should be
+ // exclusive and the right side inclusive, contrary to the normal
+ // behaviour of ranges.
+ self.glyphs[idx].safe_to_break.then_some(idx + usize::from(!ltr))
+ }
+}
+
+impl Debug for ShapedText<'_> {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ self.text.fmt(f)
+ }
+}
+
+/// Group a range of text by BiDi level and script, shape the runs and generate
+/// items for them.
+pub fn shape_range<'a>(
+ items: &mut Vec<(Range, Item<'a>)>,
+ engine: &Engine,
+ text: &'a str,
+ bidi: &BidiInfo<'a>,
+ range: Range,
+ styles: StyleChain<'a>,
+) {
+ let script = TextElem::script_in(styles);
+ let lang = TextElem::lang_in(styles);
+ let region = TextElem::region_in(styles);
+ let mut process = |range: Range, level: BidiLevel| {
+ let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL };
+ let shaped =
+ shape(engine, range.start, &text[range.clone()], styles, dir, lang, region);
+ items.push((range, Item::Text(shaped)));
+ };
+
+ let mut prev_level = BidiLevel::ltr();
+ let mut prev_script = Script::Unknown;
+ let mut cursor = range.start;
+
+ // Group by embedding level and script. If the text's script is explicitly
+ // set (rather than inferred from the glyphs), we keep the script at an
+ // unchanging `Script::Unknown` so that only level changes cause breaks.
+ for i in range.clone() {
+ if !text.is_char_boundary(i) {
+ continue;
+ }
+
+ let level = bidi.levels[i];
+ let curr_script = match script {
+ Smart::Auto => {
+ text[i..].chars().next().map_or(Script::Unknown, |c| c.script())
+ }
+ Smart::Custom(_) => Script::Unknown,
+ };
+
+ if level != prev_level || !is_compatible(curr_script, prev_script) {
+ if cursor < i {
+ process(cursor..i, prev_level);
+ }
+ cursor = i;
+ prev_level = level;
+ prev_script = curr_script;
+ } else if is_generic_script(prev_script) {
+ prev_script = curr_script;
+ }
+ }
+
+ process(cursor..range.end, prev_level);
+}
+
+/// Whether this is not a specific script.
+fn is_generic_script(script: Script) -> bool {
+ matches!(script, Script::Unknown | Script::Common | Script::Inherited)
+}
+
+/// Whether these script can be part of the same shape run.
+fn is_compatible(a: Script, b: Script) -> bool {
+ is_generic_script(a) || is_generic_script(b) || a == b
+}
+
+/// Shape text into [`ShapedText`].
+#[allow(clippy::too_many_arguments)]
+fn shape<'a>(
+ engine: &Engine,
+ base: usize,
+ text: &'a str,
+ styles: StyleChain<'a>,
+ dir: Dir,
+ lang: Lang,
+ region: Option<Region>,
+) -> ShapedText<'a> {
+ let size = TextElem::size_in(styles);
+ let mut ctx = ShapingContext {
+ engine,
+ size,
+ glyphs: vec![],
+ used: vec![],
+ styles,
+ variant: variant(styles),
+ features: features(styles),
+ fallback: TextElem::fallback_in(styles),
+ dir,
+ };
+
+ if !text.is_empty() {
+ shape_segment(&mut ctx, base, text, families(styles));
+ }
+
+ track_and_space(&mut ctx);
+ calculate_adjustability(&mut ctx, lang, region);
+
+ #[cfg(debug_assertions)]
+ assert_all_glyphs_in_range(&ctx.glyphs, text, base..(base + text.len()));
+ #[cfg(debug_assertions)]
+ assert_glyph_ranges_in_order(&ctx.glyphs, dir);
+
+ ShapedText {
+ base,
+ text,
+ dir,
+ lang,
+ region,
+ styles,
+ variant: ctx.variant,
+ size,
+ width: ctx.glyphs.iter().map(|g| g.x_advance).sum::<Em>().at(size),
+ glyphs: Cow::Owned(ctx.glyphs),
+ }
+}
+
+/// Holds shaping results and metadata common to all shaped segments.
+struct ShapingContext<'a, 'v> {
+ engine: &'a Engine<'v>,
+ glyphs: Vec<ShapedGlyph>,
+ used: Vec<Font>,
+ styles: StyleChain<'a>,
+ size: Abs,
+ variant: FontVariant,
+ features: Vec<rustybuzz::Feature>,
+ fallback: bool,
+ dir: Dir,
+}
+
+/// Shape text with font fallback using the `families` iterator.
+fn shape_segment<'a>(
+ ctx: &mut ShapingContext,
+ base: usize,
+ text: &str,
+ mut families: impl Iterator<Item = &'a str> + Clone,
+) {
+ // Don't try shaping newlines, tabs, or default ignorables.
+ if text
+ .chars()
+ .all(|c| c == '\n' || c == '\t' || is_default_ignorable(c))
+ {
+ return;
+ }
+
+ // Find the next available family.
+ let world = ctx.engine.world;
+ let book = world.book();
+ let mut selection = families.find_map(|family| {
+ book.select(family, ctx.variant)
+ .and_then(|id| world.font(id))
+ .filter(|font| !ctx.used.contains(font))
+ });
+
+ // Do font fallback if the families are exhausted and fallback is enabled.
+ if selection.is_none() && ctx.fallback {
+ let first = ctx.used.first().map(Font::info);
+ selection = book
+ .select_fallback(first, ctx.variant, text)
+ .and_then(|id| world.font(id))
+ .filter(|font| !ctx.used.contains(font));
+ }
+
+ // Extract the font id or shape notdef glyphs if we couldn't find any font.
+ let Some(font) = selection else {
+ if let Some(font) = ctx.used.first().cloned() {
+ shape_tofus(ctx, base, text, font);
+ }
+ return;
+ };
+
+ ctx.used.push(font.clone());
+
+ // Fill the buffer with our text.
+ let mut buffer = UnicodeBuffer::new();
+ buffer.push_str(text);
+ buffer.set_language(language(ctx.styles));
+ if let Some(script) = TextElem::script_in(ctx.styles).custom().and_then(|script| {
+ rustybuzz::Script::from_iso15924_tag(Tag::from_bytes(script.as_bytes()))
+ }) {
+ buffer.set_script(script)
+ }
+ buffer.set_direction(match ctx.dir {
+ Dir::LTR => rustybuzz::Direction::LeftToRight,
+ Dir::RTL => rustybuzz::Direction::RightToLeft,
+ _ => unimplemented!("vertical text layout"),
+ });
+ buffer.guess_segment_properties();
+
+ // By default, Harfbuzz will create zero-width space glyphs for default
+ // ignorables. This is probably useful for GUI apps that want noticable
+ // effects on the cursor for those, but for us it's not useful and hurts
+ // text extraction.
+ buffer.set_flags(BufferFlags::REMOVE_DEFAULT_IGNORABLES);
+
+ // Prepare the shape plan. This plan depends on direction, script, language,
+ // and features, but is independent from the text and can thus be memoized.
+ let plan = create_shape_plan(
+ &font,
+ buffer.direction(),
+ buffer.script(),
+ buffer.language().as_ref(),
+ &ctx.features,
+ );
+
+ // Shape!
+ let buffer = rustybuzz::shape_with_plan(font.rusty(), &plan, buffer);
+ let infos = buffer.glyph_infos();
+ let pos = buffer.glyph_positions();
+ let ltr = ctx.dir.is_positive();
+
+ // Collect the shaped glyphs, doing fallback and shaping parts again with
+ // the next font if necessary.
+ let mut i = 0;
+ while i < infos.len() {
+ let info = &infos[i];
+ let cluster = info.cluster as usize;
+
+ // Add the glyph to the shaped output.
+ if info.glyph_id != 0 {
+ // Determine the text range of the glyph.
+ let start = base + cluster;
+ let end = base
+ + if ltr { i.checked_add(1) } else { i.checked_sub(1) }
+ .and_then(|last| infos.get(last))
+ .map_or(text.len(), |info| info.cluster as usize);
+
+ let c = text[cluster..].chars().next().unwrap();
+ let script = c.script();
+ let x_advance = font.to_em(pos[i].x_advance);
+ ctx.glyphs.push(ShapedGlyph {
+ font: font.clone(),
+ glyph_id: info.glyph_id as u16,
+ // TODO: Don't ignore y_advance.
+ x_advance,
+ x_offset: font.to_em(pos[i].x_offset),
+ y_offset: font.to_em(pos[i].y_offset),
+ adjustability: Adjustability::default(),
+ range: start..end,
+ safe_to_break: !info.unsafe_to_break(),
+ c,
+ is_justifiable: is_justifiable(
+ c,
+ script,
+ x_advance,
+ Adjustability::default().stretchability,
+ ),
+ script,
+ });
+ } else {
+ // First, search for the end of the tofu sequence.
+ let k = i;
+ while infos.get(i + 1).is_some_and(|info| info.glyph_id == 0) {
+ i += 1;
+ }
+
+ // Then, determine the start and end text index for the tofu
+ // sequence.
+ //
+ // Examples:
+ // Everything is shown in visual order. Tofus are written as "_".
+ // We want to find out that the tofus span the text `2..6`.
+ // Note that the clusters are longer than 1 char.
+ //
+ // Left-to-right:
+ // Text: h a l i h a l l o
+ // Glyphs: A _ _ C E
+ // Clusters: 0 2 4 6 8
+ // k=1 i=2
+ //
+ // Right-to-left:
+ // Text: O L L A H I L A H
+ // Glyphs: E C _ _ A
+ // Clusters: 8 6 4 2 0
+ // k=2 i=3
+ let start = infos[if ltr { k } else { i }].cluster as usize;
+ let end = if ltr { i.checked_add(1) } else { k.checked_sub(1) }
+ .and_then(|last| infos.get(last))
+ .map_or(text.len(), |info| info.cluster as usize);
+
+ // Trim half-baked cluster.
+ let remove = base + start..base + end;
+ while ctx.glyphs.last().is_some_and(|g| remove.contains(&g.range.start)) {
+ ctx.glyphs.pop();
+ }
+
+ // Recursively shape the tofu sequence with the next family.
+ shape_segment(ctx, base + start, &text[start..end], families.clone());
+ }
+
+ i += 1;
+ }
+
+ ctx.used.pop();
+}
+
+/// Create a shape plan.
+#[comemo::memoize]
+fn create_shape_plan(
+ font: &Font,
+ direction: rustybuzz::Direction,
+ script: rustybuzz::Script,
+ language: Option<&rustybuzz::Language>,
+ features: &[rustybuzz::Feature],
+) -> Arc<ShapePlan> {
+ Arc::new(rustybuzz::ShapePlan::new(
+ font.rusty(),
+ direction,
+ Some(script),
+ language,
+ features,
+ ))
+}
+
+/// Shape the text with tofus from the given font.
+fn shape_tofus(ctx: &mut ShapingContext, base: usize, text: &str, font: Font) {
+ let x_advance = font.advance(0).unwrap_or_default();
+ let add_glyph = |(cluster, c): (usize, char)| {
+ let start = base + cluster;
+ let end = start + c.len_utf8();
+ let script = c.script();
+ ctx.glyphs.push(ShapedGlyph {
+ font: font.clone(),
+ glyph_id: 0,
+ x_advance,
+ x_offset: Em::zero(),
+ y_offset: Em::zero(),
+ adjustability: Adjustability::default(),
+ range: start..end,
+ safe_to_break: true,
+ c,
+ is_justifiable: is_justifiable(
+ c,
+ script,
+ x_advance,
+ Adjustability::default().stretchability,
+ ),
+ script,
+ });
+ };
+ if ctx.dir.is_positive() {
+ text.char_indices().for_each(add_glyph);
+ } else {
+ text.char_indices().rev().for_each(add_glyph);
+ }
+}
+
+/// Apply tracking and spacing to the shaped glyphs.
+fn track_and_space(ctx: &mut ShapingContext) {
+ let tracking = Em::from_length(TextElem::tracking_in(ctx.styles), ctx.size);
+ let spacing =
+ TextElem::spacing_in(ctx.styles).map(|abs| Em::from_length(abs, ctx.size));
+
+ let mut glyphs = ctx.glyphs.iter_mut().peekable();
+ while let Some(glyph) = glyphs.next() {
+ // Make non-breaking space same width as normal space.
+ if glyph.c == '\u{00A0}' {
+ glyph.x_advance -= nbsp_delta(&glyph.font).unwrap_or_default();
+ }
+
+ if glyph.is_space() {
+ glyph.x_advance = spacing.relative_to(glyph.x_advance);
+ }
+
+ if glyphs
+ .peek()
+ .is_some_and(|next| glyph.range.start != next.range.start)
+ {
+ glyph.x_advance += tracking;
+ }
+ }
+}
+
+/// Calculate stretchability and shrinkability of each glyph,
+/// and CJK punctuation adjustments according to Chinese Layout Requirements.
+fn calculate_adjustability(ctx: &mut ShapingContext, lang: Lang, region: Option<Region>) {
+ let style = cjk_punct_style(lang, region);
+
+ for glyph in &mut ctx.glyphs {
+ glyph.adjustability = glyph.base_adjustability(style);
+ }
+
+ let mut glyphs = ctx.glyphs.iter_mut().peekable();
+ while let Some(glyph) = glyphs.next() {
+ // CNS style needs not further adjustment.
+ if glyph.is_cjk_punctuation() && matches!(style, CjkPunctStyle::Cns) {
+ continue;
+ }
+
+ // Now we apply consecutive punctuation adjustment, specified in Chinese Layout.
+ // Requirements, section 3.1.6.1 Punctuation Adjustment Space, and Japanese Layout
+ // Requirements, section 3.1 Line Composition Rules for Punctuation Marks
+ let Some(next) = glyphs.peek_mut() else { continue };
+ let width = glyph.x_advance;
+ let delta = width / 2.0;
+ if glyph.is_cjk_punctuation()
+ && next.is_cjk_punctuation()
+ && (glyph.shrinkability().1 + next.shrinkability().0) >= delta
+ {
+ let left_delta = glyph.shrinkability().1.min(delta);
+ glyph.shrink_right(left_delta);
+ next.shrink_left(delta - left_delta);
+ }
+ }
+}
+
+/// Difference between non-breaking and normal space.
+fn nbsp_delta(font: &Font) -> Option<Em> {
+ let space = font.ttf().glyph_index(' ')?.0;
+ let nbsp = font.ttf().glyph_index('\u{00A0}')?.0;
+ Some(font.advance(nbsp)? - font.advance(space)?)
+}
+
+/// Process the language and region of a style chain into a
+/// rustybuzz-compatible BCP 47 language.
+fn language(styles: StyleChain) -> rustybuzz::Language {
+ let mut bcp: EcoString = TextElem::lang_in(styles).as_str().into();
+ if let Some(region) = TextElem::region_in(styles) {
+ bcp.push('-');
+ bcp.push_str(region.as_str());
+ }
+ rustybuzz::Language::from_str(&bcp).unwrap()
+}
+
+/// Returns true if all glyphs in `glyphs` have ranges within the range `range`.
+#[cfg(debug_assertions)]
+fn assert_all_glyphs_in_range(glyphs: &[ShapedGlyph], text: &str, range: Range) {
+ if glyphs
+ .iter()
+ .any(|g| g.range.start < range.start || g.range.end > range.end)
+ {
+ panic!("one or more glyphs in {text:?} fell out of range");
+ }
+}
+
+/// Asserts that the ranges of `glyphs` is in the proper order according to
+/// `dir`.
+///
+/// This asserts instead of returning a bool in order to provide a more
+/// informative message when the invariant is violated.
+#[cfg(debug_assertions)]
+fn assert_glyph_ranges_in_order(glyphs: &[ShapedGlyph], dir: Dir) {
+ if glyphs.is_empty() {
+ return;
+ }
+
+ // Iterator::is_sorted and friends are unstable as of Rust 1.70.0
+ for i in 0..(glyphs.len() - 1) {
+ let a = &glyphs[i];
+ let b = &glyphs[i + 1];
+ let ord = a.range.start.cmp(&b.range.start);
+ let ord = if dir.is_positive() { ord } else { ord.reverse() };
+ if ord == std::cmp::Ordering::Greater {
+ panic!(
+ "glyph ranges should be monotonically {}, \
+ but found glyphs out of order:\n\n\
+ first: {a:#?}\nsecond: {b:#?}",
+ if dir.is_positive() { "increasing" } else { "decreasing" },
+ );
+ }
+ }
+}
+
+// The CJK punctuation that can appear at the beginning or end of a line.
+pub const BEGIN_PUNCT_PAT: &[char] =
+ &['“', '‘', '《', '〈', '(', '『', '「', '【', '〖', '〔', '[', '{'];
+pub const END_PUNCT_PAT: &[char] = &[
+ '”', '’', ',', '.', '。', '、', ':', ';', '》', '〉', ')', '』', '」', '】',
+ '〗', '〕', ']', '}', '?', '!',
+];
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CjkPunctStyle {
+ /// Standard GB/T 15834-2011, used mostly in mainland China.
+ Gb,
+ /// Standard by Taiwan Ministry of Education, used in Taiwan and Hong Kong.
+ Cns,
+ /// Standard JIS X 4051, used in Japan.
+ Jis,
+}
+
+pub fn cjk_punct_style(lang: Lang, region: Option<Region>) -> CjkPunctStyle {
+ match (lang, region.as_ref().map(Region::as_str)) {
+ (Lang::CHINESE, Some("TW" | "HK")) => CjkPunctStyle::Cns,
+ (Lang::JAPANESE, _) => CjkPunctStyle::Jis,
+ // zh-CN, zh-SG, zh-MY use GB-style punctuation,
+ _ => CjkPunctStyle::Gb,
+ }
+}
+
+/// Whether the glyph is a space.
+fn is_space(c: char) -> bool {
+ matches!(c, ' ' | '\u{00A0}' | ' ')
+}
+
+/// Whether the glyph is part of Chinese or Japanese script (i.e. CJ, not CJK).
+pub fn is_of_cj_script(c: char) -> bool {
+ is_cj_script(c, c.script())
+}
+
+/// Whether the glyph is part of Chinese or Japanese script (i.e. CJ, not CJK).
+/// The function is dedicated to typesetting Chinese or Japanese, which do not
+/// have spaces between words, so K is not checked here.
+fn is_cj_script(c: char, script: Script) -> bool {
+ use Script::*;
+ // U+30FC: Katakana-Hiragana Prolonged Sound Mark
+ matches!(script, Hiragana | Katakana | Han) || c == '\u{30FC}'
+}
+
+/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
+fn is_cjk_left_aligned_punctuation(
+ c: char,
+ x_advance: Em,
+ stretchability: (Em, Em),
+ style: CjkPunctStyle,
+) -> bool {
+ use CjkPunctStyle::*;
+
+ // CJK quotation marks shares codepoints with latin quotation marks.
+ // But only the CJK ones have full width.
+ if matches!(c, '”' | '’') && x_advance + stretchability.1 == Em::one() {
+ return true;
+ }
+
+ if matches!(style, Gb | Jis) && matches!(c, ',' | '。' | '.' | '、' | ':' | ';')
+ {
+ return true;
+ }
+
+ if matches!(style, Gb) && matches!(c, '?' | '!') {
+ // In GB style, exclamations and question marks are also left aligned
+ // and can be adjusted. Note that they are not adjustable in other
+ // styles.
+ return true;
+ }
+
+ // See appendix A.3 https://www.w3.org/TR/clreq/#tables_of_chinese_punctuation_marks
+ matches!(c, '》' | ')' | '』' | '」' | '】' | '〗' | '〕' | '〉' | ']' | '}')
+}
+
+/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
+fn is_cjk_right_aligned_punctuation(
+ c: char,
+ x_advance: Em,
+ stretchability: (Em, Em),
+) -> bool {
+ // CJK quotation marks shares codepoints with latin quotation marks.
+ // But only the CJK ones have full width.
+ if matches!(c, '“' | '‘') && x_advance + stretchability.0 == Em::one() {
+ return true;
+ }
+ // See appendix A.3 https://www.w3.org/TR/clreq/#tables_of_chinese_punctuation_marks
+ matches!(c, '《' | '(' | '『' | '「' | '【' | '〖' | '〔' | '〈' | '[' | '{')
+}
+
+/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
+fn is_cjk_center_aligned_punctuation(c: char, style: CjkPunctStyle) -> bool {
+ if matches!(style, CjkPunctStyle::Cns)
+ && matches!(c, ',' | '。' | '.' | '、' | ':' | ';')
+ {
+ return true;
+ }
+
+ // U+30FB: Katakana Middle Dot
+ // U+00B7: Middle Dot
+ matches!(c, '\u{30FB}' | '\u{00B7}')
+}
+
+/// Whether the glyph is justifiable.
+///
+/// Quotations in latin script and CJK are unfortunately the same codepoint
+/// (U+2018, U+2019, U+201C, U+201D), but quotations in Chinese must be
+/// fullwidth. This heuristics can therefore fail for monospace latin fonts.
+/// However, since monospace fonts are usually not justified this edge case
+/// should be rare enough.
+fn is_justifiable(
+ c: char,
+ script: Script,
+ x_advance: Em,
+ stretchability: (Em, Em),
+) -> bool {
+ // punctuation style is not relevant here.
+ let style = CjkPunctStyle::Gb;
+ is_space(c)
+ || is_cj_script(c, script)
+ || is_cjk_left_aligned_punctuation(c, x_advance, stretchability, style)
+ || is_cjk_right_aligned_punctuation(c, x_advance, stretchability)
+ || is_cjk_center_aligned_punctuation(c, style)
+}