diff options
Diffstat (limited to 'crates/typst-layout/src/inline')
| -rw-r--r-- | crates/typst-layout/src/inline/box.rs | 87 | ||||
| -rw-r--r-- | crates/typst-layout/src/inline/collect.rs | 328 | ||||
| -rw-r--r-- | crates/typst-layout/src/inline/deco.rs | 213 | ||||
| -rw-r--r-- | crates/typst-layout/src/inline/finalize.rs | 36 | ||||
| -rw-r--r-- | crates/typst-layout/src/inline/line.rs | 750 | ||||
| -rw-r--r-- | crates/typst-layout/src/inline/linebreak.rs | 980 | ||||
| -rw-r--r-- | crates/typst-layout/src/inline/mod.rs | 105 | ||||
| -rw-r--r-- | crates/typst-layout/src/inline/prepare.rs | 196 | ||||
| -rw-r--r-- | crates/typst-layout/src/inline/shaping.rs | 1175 |
9 files changed, 3870 insertions, 0 deletions
diff --git a/crates/typst-layout/src/inline/box.rs b/crates/typst-layout/src/inline/box.rs new file mode 100644 index 00000000..30572e4e --- /dev/null +++ b/crates/typst-layout/src/inline/box.rs @@ -0,0 +1,87 @@ +use once_cell::unsync::Lazy; +use typst_library::diag::SourceResult; +use typst_library::engine::Engine; +use typst_library::foundations::{Packed, StyleChain}; +use typst_library::introspection::Locator; +use typst_library::layout::{BoxElem, Frame, FrameKind, Size}; +use typst_library::visualize::Stroke; +use typst_utils::Numeric; + +use crate::flow::unbreakable_pod; +use crate::shapes::{clip_rect, fill_and_stroke}; + +/// Lay out a box as part of a paragraph. +#[typst_macros::time(name = "box", span = elem.span())] +pub fn layout_box( + elem: &Packed<BoxElem>, + engine: &mut Engine, + locator: Locator, + styles: StyleChain, + region: Size, +) -> SourceResult<Frame> { + // Fetch sizing properties. + let width = elem.width(styles); + let height = elem.height(styles); + let inset = elem.inset(styles).unwrap_or_default(); + + // Build the pod region. + let pod = unbreakable_pod(&width, &height.into(), &inset, styles, region); + + // Layout the body. + let mut frame = match elem.body(styles) { + // If we have no body, just create an empty frame. If necessary, + // its size will be adjusted below. + None => Frame::hard(Size::zero()), + + // If we have a child, layout it into the body. Boxes are boundaries + // for gradient relativeness, so we set the `FrameKind` to `Hard`. + Some(body) => crate::layout_frame(engine, body, locator, styles, pod)? + .with_kind(FrameKind::Hard), + }; + + // Enforce a correct frame size on the expanded axes. Do this before + // applying the inset, since the pod shrunk. + frame.set_size(pod.expand.select(pod.size, frame.size())); + + // Apply the inset. + if !inset.is_zero() { + crate::pad::grow(&mut frame, &inset); + } + + // Prepare fill and stroke. + let fill = elem.fill(styles); + let stroke = elem + .stroke(styles) + .unwrap_or_default() + .map(|s| s.map(Stroke::unwrap_or_default)); + + // Only fetch these if necessary (for clipping or filling/stroking). + let outset = Lazy::new(|| elem.outset(styles).unwrap_or_default()); + let radius = Lazy::new(|| elem.radius(styles).unwrap_or_default()); + + // Clip the contents, if requested. + if elem.clip(styles) { + let size = frame.size() + outset.relative_to(frame.size()).sum_by_axis(); + frame.clip(clip_rect(size, &radius, &stroke)); + } + + // Add fill and/or stroke. + if fill.is_some() || stroke.iter().any(Option::is_some) { + fill_and_stroke(&mut frame, fill, &stroke, &outset, &radius, elem.span()); + } + + // Assign label to the frame. + if let Some(label) = elem.label() { + frame.label(label); + } + + // Apply baseline shift. Do this after setting the size and applying the + // inset, so that a relative shift is resolved relative to the final + // height. + let shift = elem.baseline(styles).relative_to(frame.height()); + if !shift.is_zero() { + frame.set_baseline(frame.baseline() - shift); + } + + Ok(frame) +} diff --git a/crates/typst-layout/src/inline/collect.rs b/crates/typst-layout/src/inline/collect.rs new file mode 100644 index 00000000..fbcddee5 --- /dev/null +++ b/crates/typst-layout/src/inline/collect.rs @@ -0,0 +1,328 @@ +use typst_library::diag::bail; +use typst_library::foundations::{Packed, Resolve}; +use typst_library::introspection::{SplitLocator, Tag, TagElem}; +use typst_library::layout::{ + Abs, AlignElem, BoxElem, Dir, Fr, Frame, HElem, InlineElem, InlineItem, Sizing, + Spacing, +}; +use typst_library::text::{ + is_default_ignorable, LinebreakElem, SmartQuoteElem, SmartQuoter, SmartQuotes, + SpaceElem, TextElem, +}; +use typst_syntax::Span; +use typst_utils::Numeric; + +use super::*; + +// The characters by which spacing, inline content and pins are replaced in the +// paragraph's full text. +const SPACING_REPLACE: &str = " "; // Space +const OBJ_REPLACE: &str = "\u{FFFC}"; // Object Replacement Character + +// Unicode BiDi control characters. +const LTR_EMBEDDING: &str = "\u{202A}"; +const RTL_EMBEDDING: &str = "\u{202B}"; +const POP_EMBEDDING: &str = "\u{202C}"; +const LTR_ISOLATE: &str = "\u{2066}"; +const POP_ISOLATE: &str = "\u{2069}"; + +/// A prepared item in a paragraph layout. +#[derive(Debug)] +pub enum Item<'a> { + /// A shaped text run with consistent style and direction. + Text(ShapedText<'a>), + /// Absolute spacing between other items, and whether it is weak. + Absolute(Abs, bool), + /// Fractional spacing between other items. + Fractional(Fr, Option<(&'a Packed<BoxElem>, Locator<'a>, StyleChain<'a>)>), + /// Layouted inline-level content. + Frame(Frame, StyleChain<'a>), + /// A tag. + Tag(&'a Tag), + /// An item that is invisible and needs to be skipped, e.g. a Unicode + /// isolate. + Skip(&'static str), +} + +impl<'a> Item<'a> { + /// If this a text item, return it. + pub fn text(&self) -> Option<&ShapedText<'a>> { + match self { + Self::Text(shaped) => Some(shaped), + _ => None, + } + } + + /// If this a text item, return it mutably. + pub fn text_mut(&mut self) -> Option<&mut ShapedText<'a>> { + match self { + Self::Text(shaped) => Some(shaped), + _ => None, + } + } + + /// Return the textual representation of this item: Either just itself (for + /// a text item) or a replacement string (for any other item). + pub fn textual(&self) -> &str { + match self { + Self::Text(shaped) => shaped.text, + Self::Absolute(_, _) | Self::Fractional(_, _) => SPACING_REPLACE, + Self::Frame(_, _) => OBJ_REPLACE, + Self::Tag(_) => "", + Self::Skip(s) => s, + } + } + + /// The text length of the item. + pub fn textual_len(&self) -> usize { + self.textual().len() + } + + /// The natural layouted width of the item. + pub fn natural_width(&self) -> Abs { + match self { + Self::Text(shaped) => shaped.width, + Self::Absolute(v, _) => *v, + Self::Frame(frame, _) => frame.width(), + Self::Fractional(_, _) | Self::Tag(_) => Abs::zero(), + Self::Skip(_) => Abs::zero(), + } + } +} + +/// An item or not-yet shaped text. We can't shape text until we have collected +/// all items because only then we can compute BiDi, and we need to split shape +/// runs at level boundaries. +#[derive(Debug)] +pub enum Segment<'a> { + /// One or multiple collapsed text children. Stores how long the segment is + /// (in bytes of the full text string). + Text(usize, StyleChain<'a>), + /// An already prepared item. + Item(Item<'a>), +} + +impl Segment<'_> { + /// The text length of the item. + pub fn textual_len(&self) -> usize { + match self { + Self::Text(len, _) => *len, + Self::Item(item) => item.textual_len(), + } + } +} + +/// Collects all text of the paragraph into one string and a collection of +/// segments that correspond to pieces of that string. This also performs +/// string-level preprocessing like case transformations. +#[typst_macros::time] +pub fn collect<'a>( + children: &'a StyleVec, + engine: &mut Engine<'_>, + locator: &mut SplitLocator<'a>, + styles: &'a StyleChain<'a>, + region: Size, + consecutive: bool, +) -> SourceResult<(String, Vec<Segment<'a>>, SpanMapper)> { + let mut collector = Collector::new(2 + children.len()); + let mut quoter = SmartQuoter::new(); + + let outer_dir = TextElem::dir_in(*styles); + let first_line_indent = ParElem::first_line_indent_in(*styles); + if !first_line_indent.is_zero() + && consecutive + && AlignElem::alignment_in(*styles).resolve(*styles).x == outer_dir.start().into() + { + collector.push_item(Item::Absolute(first_line_indent.resolve(*styles), false)); + collector.spans.push(1, Span::detached()); + } + + let hang = ParElem::hanging_indent_in(*styles); + if !hang.is_zero() { + collector.push_item(Item::Absolute(-hang, false)); + collector.spans.push(1, Span::detached()); + } + + for (child, styles) in children.iter(styles) { + let prev_len = collector.full.len(); + + if child.is::<SpaceElem>() { + collector.push_text(" ", styles); + } else if let Some(elem) = child.to_packed::<TextElem>() { + collector.build_text(styles, |full| { + let dir = TextElem::dir_in(styles); + if dir != outer_dir { + // Insert "Explicit Directional Embedding". + match dir { + Dir::LTR => full.push_str(LTR_EMBEDDING), + Dir::RTL => full.push_str(RTL_EMBEDDING), + _ => {} + } + } + + if let Some(case) = TextElem::case_in(styles) { + full.push_str(&case.apply(elem.text())); + } else { + full.push_str(elem.text()); + } + + if dir != outer_dir { + // Insert "Pop Directional Formatting". + full.push_str(POP_EMBEDDING); + } + }); + } else if let Some(elem) = child.to_packed::<HElem>() { + let amount = elem.amount(); + if amount.is_zero() { + continue; + } + + collector.push_item(match amount { + Spacing::Fr(fr) => Item::Fractional(*fr, None), + Spacing::Rel(rel) => Item::Absolute( + rel.resolve(styles).relative_to(region.x), + elem.weak(styles), + ), + }); + } else if let Some(elem) = child.to_packed::<LinebreakElem>() { + collector + .push_text(if elem.justify(styles) { "\u{2028}" } else { "\n" }, styles); + } else if let Some(elem) = child.to_packed::<SmartQuoteElem>() { + let double = elem.double(styles); + if elem.enabled(styles) { + let quotes = SmartQuotes::get( + elem.quotes(styles), + TextElem::lang_in(styles), + TextElem::region_in(styles), + elem.alternative(styles), + ); + let before = + collector.full.chars().rev().find(|&c| !is_default_ignorable(c)); + let quote = quoter.quote(before, "es, double); + collector.push_text(quote, styles); + } else { + collector.push_text(if double { "\"" } else { "'" }, styles); + } + } else if let Some(elem) = child.to_packed::<InlineElem>() { + collector.push_item(Item::Skip(LTR_ISOLATE)); + + for item in elem.layout(engine, locator.next(&elem.span()), styles, region)? { + match item { + InlineItem::Space(space, weak) => { + collector.push_item(Item::Absolute(space, weak)); + } + InlineItem::Frame(frame) => { + collector.push_item(Item::Frame(frame, styles)); + } + } + } + + collector.push_item(Item::Skip(POP_ISOLATE)); + } else if let Some(elem) = child.to_packed::<BoxElem>() { + let loc = locator.next(&elem.span()); + if let Sizing::Fr(v) = elem.width(styles) { + collector.push_item(Item::Fractional(v, Some((elem, loc, styles)))); + } else { + let frame = layout_box(elem, engine, loc, styles, region)?; + collector.push_item(Item::Frame(frame, styles)); + } + } else if let Some(elem) = child.to_packed::<TagElem>() { + collector.push_item(Item::Tag(&elem.tag)); + } else { + bail!(child.span(), "unexpected paragraph child"); + }; + + let len = collector.full.len() - prev_len; + collector.spans.push(len, child.span()); + } + + Ok((collector.full, collector.segments, collector.spans)) +} + +/// Collects segments. +struct Collector<'a> { + full: String, + segments: Vec<Segment<'a>>, + spans: SpanMapper, +} + +impl<'a> Collector<'a> { + fn new(capacity: usize) -> Self { + Self { + full: String::new(), + segments: Vec::with_capacity(capacity), + spans: SpanMapper::new(), + } + } + + fn push_text(&mut self, text: &str, styles: StyleChain<'a>) { + self.full.push_str(text); + self.push_segment(Segment::Text(text.len(), styles)); + } + + fn build_text<F>(&mut self, styles: StyleChain<'a>, f: F) + where + F: FnOnce(&mut String), + { + let prev = self.full.len(); + f(&mut self.full); + let len = self.full.len() - prev; + self.push_segment(Segment::Text(len, styles)); + } + + fn push_item(&mut self, item: Item<'a>) { + self.full.push_str(item.textual()); + self.push_segment(Segment::Item(item)); + } + + fn push_segment(&mut self, segment: Segment<'a>) { + match (self.segments.last_mut(), &segment) { + // Merge adjacent text segments with the same styles. + (Some(Segment::Text(last_len, last_styles)), Segment::Text(len, styles)) + if *last_styles == *styles => + { + *last_len += *len; + } + + // Merge adjacent weak spacing by taking the maximum. + ( + Some(Segment::Item(Item::Absolute(prev_amount, true))), + Segment::Item(Item::Absolute(amount, true)), + ) => { + *prev_amount = (*prev_amount).max(*amount); + } + + _ => self.segments.push(segment), + } + } +} + +/// Maps byte offsets back to spans. +#[derive(Default)] +pub struct SpanMapper(Vec<(usize, Span)>); + +impl SpanMapper { + /// Create a new span mapper. + pub fn new() -> Self { + Self::default() + } + + /// Push a span for a segment with the given length. + pub fn push(&mut self, len: usize, span: Span) { + self.0.push((len, span)); + } + + /// Determine the span at the given byte offset. + /// + /// May return a detached span. + pub fn span_at(&self, offset: usize) -> (Span, u16) { + let mut cursor = 0; + for &(len, span) in &self.0 { + if (cursor..cursor + len).contains(&offset) { + return (span, u16::try_from(offset - cursor).unwrap_or(0)); + } + cursor += len; + } + (Span::detached(), 0) + } +} diff --git a/crates/typst-layout/src/inline/deco.rs b/crates/typst-layout/src/inline/deco.rs new file mode 100644 index 00000000..c01b369b --- /dev/null +++ b/crates/typst-layout/src/inline/deco.rs @@ -0,0 +1,213 @@ +use kurbo::{BezPath, Line, ParamCurve}; +use ttf_parser::{GlyphId, OutlineBuilder}; +use typst_library::layout::{Abs, Em, Frame, FrameItem, Point, Size}; +use typst_library::text::{ + BottomEdge, DecoLine, Decoration, TextEdgeBounds, TextItem, TopEdge, +}; +use typst_library::visualize::{FixedStroke, Geometry}; +use typst_syntax::Span; + +use crate::shapes::styled_rect; + +/// Add line decorations to a single run of shaped text. +pub fn decorate( + frame: &mut Frame, + deco: &Decoration, + text: &TextItem, + width: Abs, + shift: Abs, + pos: Point, +) { + let font_metrics = text.font.metrics(); + + if let DecoLine::Highlight { fill, stroke, top_edge, bottom_edge, radius } = + &deco.line + { + let (top, bottom) = determine_edges(text, *top_edge, *bottom_edge); + let size = Size::new(width + 2.0 * deco.extent, top + bottom); + let rects = styled_rect(size, radius, fill.clone(), stroke); + let origin = Point::new(pos.x - deco.extent, pos.y - top - shift); + frame.prepend_multiple( + rects + .into_iter() + .map(|shape| (origin, FrameItem::Shape(shape, Span::detached()))), + ); + return; + } + + let (stroke, metrics, offset, evade, background) = match &deco.line { + DecoLine::Strikethrough { stroke, offset, background } => { + (stroke, font_metrics.strikethrough, offset, false, *background) + } + DecoLine::Overline { stroke, offset, evade, background } => { + (stroke, font_metrics.overline, offset, *evade, *background) + } + DecoLine::Underline { stroke, offset, evade, background } => { + (stroke, font_metrics.underline, offset, *evade, *background) + } + _ => return, + }; + + let offset = offset.unwrap_or(-metrics.position.at(text.size)) - shift; + let stroke = stroke.clone().unwrap_or(FixedStroke::from_pair( + text.fill.as_decoration(), + metrics.thickness.at(text.size), + )); + + let gap_padding = 0.08 * text.size; + let min_width = 0.162 * text.size; + + let start = pos.x - deco.extent; + let end = pos.x + width + deco.extent; + + let mut push_segment = |from: Abs, to: Abs, prepend: bool| { + let origin = Point::new(from, pos.y + offset); + let target = Point::new(to - from, Abs::zero()); + + if target.x >= min_width || !evade { + let shape = Geometry::Line(target).stroked(stroke.clone()); + + if prepend { + frame.prepend(origin, FrameItem::Shape(shape, Span::detached())); + } else { + frame.push(origin, FrameItem::Shape(shape, Span::detached())); + } + } + }; + + if !evade { + push_segment(start, end, background); + return; + } + + let line = Line::new( + kurbo::Point::new(pos.x.to_raw(), offset.to_raw()), + kurbo::Point::new((pos.x + width).to_raw(), offset.to_raw()), + ); + + let mut x = pos.x; + let mut intersections = vec![]; + + for glyph in text.glyphs.iter() { + let dx = glyph.x_offset.at(text.size) + x; + let mut builder = + BezPathBuilder::new(font_metrics.units_per_em, text.size, dx.to_raw()); + + let bbox = text.font.ttf().outline_glyph(GlyphId(glyph.id), &mut builder); + let path = builder.finish(); + + x += glyph.x_advance.at(text.size); + + // Only do the costly segments intersection test if the line + // intersects the bounding box. + let intersect = bbox.is_some_and(|bbox| { + let y_min = -text.font.to_em(bbox.y_max).at(text.size); + let y_max = -text.font.to_em(bbox.y_min).at(text.size); + offset >= y_min && offset <= y_max + }); + + if intersect { + // Find all intersections of segments with the line. + intersections.extend( + path.segments() + .flat_map(|seg| seg.intersect_line(line)) + .map(|is| Abs::raw(line.eval(is.line_t).x)), + ); + } + } + + // Add start and end points, taking padding into account. + intersections.push(start - gap_padding); + intersections.push(end + gap_padding); + // When emitting the decorative line segments, we move from left to + // right. The intersections are not necessarily in this order, yet. + intersections.sort(); + + for edge in intersections.windows(2) { + let l = edge[0]; + let r = edge[1]; + + // If we are too close, don't draw the segment + if r - l < gap_padding { + continue; + } else { + push_segment(l + gap_padding, r - gap_padding, background); + } + } +} + +// Return the top/bottom edge of the text given the metric of the font. +fn determine_edges( + text: &TextItem, + top_edge: TopEdge, + bottom_edge: BottomEdge, +) -> (Abs, Abs) { + let mut top = Abs::zero(); + let mut bottom = Abs::zero(); + + for g in text.glyphs.iter() { + let (t, b) = text.font.edges( + top_edge, + bottom_edge, + text.size, + TextEdgeBounds::Glyph(g.id), + ); + top.set_max(t); + bottom.set_max(b); + } + + (top, bottom) +} + +/// Builds a kurbo [`BezPath`] for a glyph. +struct BezPathBuilder { + path: BezPath, + units_per_em: f64, + font_size: Abs, + x_offset: f64, +} + +impl BezPathBuilder { + fn new(units_per_em: f64, font_size: Abs, x_offset: f64) -> Self { + Self { + path: BezPath::new(), + units_per_em, + font_size, + x_offset, + } + } + + fn finish(self) -> BezPath { + self.path + } + + fn p(&self, x: f32, y: f32) -> kurbo::Point { + kurbo::Point::new(self.s(x) + self.x_offset, -self.s(y)) + } + + fn s(&self, v: f32) -> f64 { + Em::from_units(v, self.units_per_em).at(self.font_size).to_raw() + } +} + +impl OutlineBuilder for BezPathBuilder { + fn move_to(&mut self, x: f32, y: f32) { + self.path.move_to(self.p(x, y)); + } + + fn line_to(&mut self, x: f32, y: f32) { + self.path.line_to(self.p(x, y)); + } + + fn quad_to(&mut self, x1: f32, y1: f32, x: f32, y: f32) { + self.path.quad_to(self.p(x1, y1), self.p(x, y)); + } + + fn curve_to(&mut self, x1: f32, y1: f32, x2: f32, y2: f32, x: f32, y: f32) { + self.path.curve_to(self.p(x1, y1), self.p(x2, y2), self.p(x, y)); + } + + fn close(&mut self) { + self.path.close_path(); + } +} diff --git a/crates/typst-layout/src/inline/finalize.rs b/crates/typst-layout/src/inline/finalize.rs new file mode 100644 index 00000000..599ace9d --- /dev/null +++ b/crates/typst-layout/src/inline/finalize.rs @@ -0,0 +1,36 @@ +use typst_library::introspection::SplitLocator; +use typst_utils::Numeric; + +use super::*; + +/// Turns the selected lines into frames. +#[typst_macros::time] +pub fn finalize( + engine: &mut Engine, + p: &Preparation, + lines: &[Line], + styles: StyleChain, + region: Size, + expand: bool, + locator: &mut SplitLocator<'_>, +) -> SourceResult<Fragment> { + // Determine the paragraph's width: Full width of the region if we should + // expand or there's fractional spacing, fit-to-width otherwise. + let width = if !region.x.is_finite() + || (!expand && lines.iter().all(|line| line.fr().is_zero())) + { + region + .x + .min(p.hang + lines.iter().map(|line| line.width).max().unwrap_or_default()) + } else { + region.x + }; + + // Stack the lines into one frame per region. + let shrink = ParElem::shrink_in(styles); + lines + .iter() + .map(|line| commit(engine, p, line, width, region.y, shrink, locator, styles)) + .collect::<SourceResult<_>>() + .map(Fragment::frames) +} diff --git a/crates/typst-layout/src/inline/line.rs b/crates/typst-layout/src/inline/line.rs new file mode 100644 index 00000000..596e109e --- /dev/null +++ b/crates/typst-layout/src/inline/line.rs @@ -0,0 +1,750 @@ +use std::fmt::{self, Debug, Formatter}; +use std::ops::{Deref, DerefMut}; + +use typst_library::engine::Engine; +use typst_library::foundations::NativeElement; +use typst_library::introspection::{SplitLocator, Tag}; +use typst_library::layout::{Abs, Dir, Em, Fr, Frame, FrameItem, Point}; +use typst_library::model::{ParLine, ParLineMarker}; +use typst_library::text::{Lang, TextElem}; +use typst_utils::Numeric; + +use super::*; + +const SHY: char = '\u{ad}'; +const HYPHEN: char = '-'; +const EN_DASH: char = '–'; +const EM_DASH: char = '—'; +const LINE_SEPARATOR: char = '\u{2028}'; // We use LS to distinguish justified breaks. + +/// A layouted line, consisting of a sequence of layouted paragraph items that +/// are mostly borrowed from the preparation phase. This type enables you to +/// measure the size of a line in a range before committing to building the +/// line's frame. +/// +/// At most two paragraph items must be created individually for this line: The +/// first and last one since they may be broken apart by the start or end of the +/// line, respectively. But even those can partially reuse previous results when +/// the break index is safe-to-break per rustybuzz. +pub struct Line<'a> { + /// The items the line is made of. + pub items: Items<'a>, + /// The exact natural width of the line. + pub width: Abs, + /// Whether the line should be justified. + pub justify: bool, + /// Whether the line ends with a hyphen or dash, either naturally or through + /// hyphenation. + pub dash: Option<Dash>, +} + +impl<'a> Line<'a> { + /// Create an empty line. + pub fn empty() -> Self { + Self { + items: Items::new(), + width: Abs::zero(), + justify: false, + dash: None, + } + } + + /// How many glyphs are in the text where we can insert additional + /// space when encountering underfull lines. + pub fn justifiables(&self) -> usize { + let mut count = 0; + for shaped in self.items.iter().filter_map(Item::text) { + count += shaped.justifiables(); + } + + // CJK character at line end should not be adjusted. + if self + .items + .last() + .and_then(Item::text) + .map(|s| s.cjk_justifiable_at_last()) + .unwrap_or(false) + { + count -= 1; + } + + count + } + + /// How much the line can stretch. + pub fn stretchability(&self) -> Abs { + self.items + .iter() + .filter_map(Item::text) + .map(|s| s.stretchability()) + .sum() + } + + /// How much the line can shrink. + pub fn shrinkability(&self) -> Abs { + self.items + .iter() + .filter_map(Item::text) + .map(|s| s.shrinkability()) + .sum() + } + + /// Whether the line has items with negative width. + pub fn has_negative_width_items(&self) -> bool { + self.items.iter().any(|item| match item { + Item::Absolute(amount, _) => *amount < Abs::zero(), + Item::Frame(frame, _) => frame.width() < Abs::zero(), + _ => false, + }) + } + + /// The sum of fractions in the line. + pub fn fr(&self) -> Fr { + self.items + .iter() + .filter_map(|item| match item { + Item::Fractional(fr, _) => Some(*fr), + _ => None, + }) + .sum() + } +} + +/// A dash at the end of a line. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Dash { + /// A soft hyphen added to break a word. + Soft, + /// A regular hyphen, present in a compound word, e.g. beija-flor. + Hard, + /// Another kind of dash. Only relevant for cost computation. + Other, +} + +/// Create a line which spans the given range. +pub fn line<'a>( + engine: &Engine, + p: &'a Preparation, + range: Range, + breakpoint: Breakpoint, + pred: Option<&Line>, +) -> Line<'a> { + // The line's full text. + let full = &p.text[range.clone()]; + + // Whether the line is justified. + let justify = full.ends_with(LINE_SEPARATOR) + || (p.justify && breakpoint != Breakpoint::Mandatory); + + // Process dashes. + let dash = if breakpoint.is_hyphen() || full.ends_with(SHY) { + Some(Dash::Soft) + } else if full.ends_with(HYPHEN) { + Some(Dash::Hard) + } else if full.ends_with([EN_DASH, EM_DASH]) { + Some(Dash::Other) + } else { + None + }; + + // Trim the line at the end, if necessary for this breakpoint. + let trim = range.start + breakpoint.trim(full).len(); + + // Collect the items for the line. + let mut items = collect_items(engine, p, range, trim); + + // Add a hyphen at the line start, if a previous dash should be repeated. + if pred.map_or(false, |pred| should_repeat_hyphen(pred, full)) { + if let Some(shaped) = items.first_text_mut() { + shaped.prepend_hyphen(engine, p.fallback); + } + } + + // Add a hyphen at the line end, if we ended on a soft hyphen. + if dash == Some(Dash::Soft) { + if let Some(shaped) = items.last_text_mut() { + shaped.push_hyphen(engine, p.fallback); + } + } + + // Deal with CJ characters at line boundaries. + adjust_cj_at_line_boundaries(p, full, &mut items); + + // Compute the line's width. + let width = items.iter().map(Item::natural_width).sum(); + + Line { items, width, justify, dash } +} + +/// Collects / reshapes all items for the line with the given `range`. +/// +/// The `trim` defines an end position to which text items are trimmed. For +/// example, the `range` may span "hello\n", but the `trim` specifies that the +/// linebreak is trimmed. +/// +/// We do not factor the `trim` directly into the `range` because we still want +/// to keep non-text items after the trim (e.g. tags). +fn collect_items<'a>( + engine: &Engine, + p: &'a Preparation, + range: Range, + trim: usize, +) -> Items<'a> { + let mut items = Items::new(); + let mut fallback = None; + + // Collect the items for each consecutively ordered run. + reorder(p, range.clone(), |subrange, rtl| { + let from = items.len(); + collect_range(engine, p, subrange, trim, &mut items, &mut fallback); + if rtl { + items.reorder(from); + } + }); + + // Trim weak spacing at the start of the line. + let prefix = items + .iter() + .take_while(|item| matches!(item, Item::Absolute(_, true))) + .count(); + if prefix > 0 { + items.drain(..prefix); + } + + // Trim weak spacing at the end of the line. + while matches!(items.last(), Some(Item::Absolute(_, true))) { + items.pop(); + } + + // Add fallback text to expand the line height, if necessary. + if !items.iter().any(|item| matches!(item, Item::Text(_))) { + if let Some(fallback) = fallback { + items.push(fallback); + } + } + + items +} + +/// Calls `f` for the BiDi-reordered ranges of a line. +fn reorder<F>(p: &Preparation, range: Range, mut f: F) +where + F: FnMut(Range, bool), +{ + // If there is nothing bidirectional going on, skip reordering. + let Some(bidi) = &p.bidi else { + f(range, p.dir == Dir::RTL); + return; + }; + + // The bidi crate panics for empty lines. + if range.is_empty() { + f(range, p.dir == Dir::RTL); + return; + } + + // Find the paragraph that contains the line. + let para = bidi + .paragraphs + .iter() + .find(|para| para.range.contains(&range.start)) + .unwrap(); + + // Compute the reordered ranges in visual order (left to right). + let (levels, runs) = bidi.visual_runs(para, range.clone()); + + // Call `f` for each run. + for run in runs { + let rtl = levels[run.start].is_rtl(); + f(run, rtl) + } +} + +/// Collects / reshapes all items for the given `subrange` with continuous +/// direction. +fn collect_range<'a>( + engine: &Engine, + p: &'a Preparation, + range: Range, + trim: usize, + items: &mut Items<'a>, + fallback: &mut Option<ItemEntry<'a>>, +) { + for (subrange, item) in p.slice(range.clone()) { + // All non-text items are just kept, they can't be split. + let Item::Text(shaped) = item else { + items.push(item); + continue; + }; + + // The intersection range of the item, the subrange, and the line's + // trimming. + let sliced = + range.start.max(subrange.start)..range.end.min(subrange.end).min(trim); + + // Whether the item is split by the line. + let split = subrange.start < sliced.start || sliced.end < subrange.end; + + if sliced.is_empty() { + // When there is no text, still keep this as a fallback item, which + // we can use to force a non-zero line-height when the line doesn't + // contain any other text. + *fallback = Some(ItemEntry::from(Item::Text(shaped.empty()))); + } else if split { + // When the item is split in half, reshape it. + let reshaped = shaped.reshape(engine, sliced); + items.push(Item::Text(reshaped)); + } else { + // When the item is fully contained, just keep it. + items.push(item); + } + } +} + +/// Add spacing around punctuation marks for CJ glyphs at line boundaries. +/// +/// See Requirements for Chinese Text Layout, Section 3.1.6.3 Compression of +/// punctuation marks at line start or line end. +fn adjust_cj_at_line_boundaries(p: &Preparation, text: &str, items: &mut Items) { + if text.starts_with(BEGIN_PUNCT_PAT) + || (p.cjk_latin_spacing && text.starts_with(is_of_cj_script)) + { + adjust_cj_at_line_start(p, items); + } + + if text.ends_with(END_PUNCT_PAT) + || (p.cjk_latin_spacing && text.ends_with(is_of_cj_script)) + { + adjust_cj_at_line_end(p, items); + } +} + +/// Add spacing around punctuation marks for CJ glyphs at the line start. +fn adjust_cj_at_line_start(p: &Preparation, items: &mut Items) { + let Some(shaped) = items.first_text_mut() else { return }; + let Some(glyph) = shaped.glyphs.first() else { return }; + + if glyph.is_cjk_right_aligned_punctuation() { + // If the first glyph is a CJK punctuation, we want to + // shrink it. + let glyph = shaped.glyphs.to_mut().first_mut().unwrap(); + let shrink = glyph.shrinkability().0; + glyph.shrink_left(shrink); + shaped.width -= shrink.at(shaped.size); + } else if p.cjk_latin_spacing && glyph.is_cj_script() && glyph.x_offset > Em::zero() { + // If the first glyph is a CJK character adjusted by + // [`add_cjk_latin_spacing`], restore the original width. + let glyph = shaped.glyphs.to_mut().first_mut().unwrap(); + let shrink = glyph.x_offset; + glyph.x_advance -= shrink; + glyph.x_offset = Em::zero(); + glyph.adjustability.shrinkability.0 = Em::zero(); + shaped.width -= shrink.at(shaped.size); + } +} + +/// Add spacing around punctuation marks for CJ glyphs at the line end. +fn adjust_cj_at_line_end(p: &Preparation, items: &mut Items) { + let Some(shaped) = items.last_text_mut() else { return }; + let Some(glyph) = shaped.glyphs.last() else { return }; + + // Deal with CJK punctuation at line ends. + let style = cjk_punct_style(shaped.lang, shaped.region); + + if glyph.is_cjk_left_aligned_punctuation(style) { + // If the last glyph is a CJK punctuation, we want to + // shrink it. + let shrink = glyph.shrinkability().1; + let punct = shaped.glyphs.to_mut().last_mut().unwrap(); + punct.shrink_right(shrink); + shaped.width -= shrink.at(shaped.size); + } else if p.cjk_latin_spacing + && glyph.is_cj_script() + && (glyph.x_advance - glyph.x_offset) > Em::one() + { + // If the last glyph is a CJK character adjusted by + // [`add_cjk_latin_spacing`], restore the original width. + let shrink = glyph.x_advance - glyph.x_offset - Em::one(); + let glyph = shaped.glyphs.to_mut().last_mut().unwrap(); + glyph.x_advance -= shrink; + glyph.adjustability.shrinkability.1 = Em::zero(); + shaped.width -= shrink.at(shaped.size); + } +} + +/// Whether a hyphen should be inserted at the start of the next line. +fn should_repeat_hyphen(pred_line: &Line, text: &str) -> bool { + // If the predecessor line does not end with a `Dash::Hard`, we shall + // not place a hyphen at the start of the next line. + if pred_line.dash != Some(Dash::Hard) { + return false; + } + + // The hyphen should repeat only in the languages that require that feature. + // For more information see the discussion at https://github.com/typst/typst/issues/3235 + let Some(Item::Text(shaped)) = pred_line.items.last() else { return false }; + + match shaped.lang { + // - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3 + // - Czech: see https://prirucka.ujc.cas.cz/?id=164 + // - Croatian: see http://pravopis.hr/pravilo/spojnica/68/ + // - Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni + // - Portuguese: see https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf (Base XX) + // - Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/ + Lang::LOWER_SORBIAN + | Lang::CZECH + | Lang::CROATIAN + | Lang::POLISH + | Lang::PORTUGUESE + | Lang::SLOVAK => true, + + // In Spanish the hyphen is required only if the word next to hyphen is + // not capitalized. Otherwise, the hyphen must not be repeated. + // + // See § 4.1.1.1.2.e on the "Ortografía de la lengua española" + // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea + Lang::SPANISH => text.chars().next().map_or(false, |c| !c.is_uppercase()), + + _ => false, + } +} + +/// Commit to a line and build its frame. +#[allow(clippy::too_many_arguments)] +pub fn commit( + engine: &mut Engine, + p: &Preparation, + line: &Line, + width: Abs, + full: Abs, + shrink: bool, + locator: &mut SplitLocator<'_>, + styles: StyleChain, +) -> SourceResult<Frame> { + let mut remaining = width - line.width - p.hang; + let mut offset = Abs::zero(); + + // We always build the line from left to right. In an LTR paragraph, we must + // thus add the hanging indent to the offset. When the paragraph is RTL, the + // hanging indent arises naturally due to the line width. + if p.dir == Dir::LTR { + offset += p.hang; + } + + // Handle hanging punctuation to the left. + if let Some(Item::Text(text)) = line.items.first() { + if let Some(glyph) = text.glyphs.first() { + if !text.dir.is_positive() + && TextElem::overhang_in(text.styles) + && (line.items.len() > 1 || text.glyphs.len() > 1) + { + let amount = overhang(glyph.c) * glyph.x_advance.at(text.size); + offset -= amount; + remaining += amount; + } + } + } + + // Handle hanging punctuation to the right. + if let Some(Item::Text(text)) = line.items.last() { + if let Some(glyph) = text.glyphs.last() { + if text.dir.is_positive() + && TextElem::overhang_in(text.styles) + && (line.items.len() > 1 || text.glyphs.len() > 1) + { + let amount = overhang(glyph.c) * glyph.x_advance.at(text.size); + remaining += amount; + } + } + } + + // Determine how much additional space is needed. The justification_ratio is + // for the first step justification, extra_justification is for the last + // step. For more info on multi-step justification, see Procedures for + // Inter- Character Space Expansion in W3C document Chinese Layout + // Requirements. + let fr = line.fr(); + let mut justification_ratio = 0.0; + let mut extra_justification = Abs::zero(); + + let shrinkability = line.shrinkability(); + let stretchability = line.stretchability(); + if remaining < Abs::zero() && shrinkability > Abs::zero() && shrink { + // Attempt to reduce the length of the line, using shrinkability. + justification_ratio = (remaining / shrinkability).max(-1.0); + remaining = (remaining + shrinkability).min(Abs::zero()); + } else if line.justify && fr.is_zero() { + // Attempt to increase the length of the line, using stretchability. + if stretchability > Abs::zero() { + justification_ratio = (remaining / stretchability).min(1.0); + remaining = (remaining - stretchability).max(Abs::zero()); + } + + let justifiables = line.justifiables(); + if justifiables > 0 && remaining > Abs::zero() { + // Underfull line, distribute the extra space. + extra_justification = remaining / justifiables as f64; + remaining = Abs::zero(); + } + } + + let mut top = Abs::zero(); + let mut bottom = Abs::zero(); + + // Build the frames and determine the height and baseline. + let mut frames = vec![]; + for item in line.items.iter() { + let mut push = |offset: &mut Abs, frame: Frame| { + let width = frame.width(); + top.set_max(frame.baseline()); + bottom.set_max(frame.size().y - frame.baseline()); + frames.push((*offset, frame)); + *offset += width; + }; + + match item { + Item::Absolute(v, _) => { + offset += *v; + } + Item::Fractional(v, elem) => { + let amount = v.share(fr, remaining); + if let Some((elem, loc, styles)) = elem { + let region = Size::new(amount, full); + let mut frame = + layout_box(elem, engine, loc.relayout(), *styles, region)?; + frame.translate(Point::with_y(TextElem::baseline_in(*styles))); + push(&mut offset, frame.post_processed(*styles)); + } else { + offset += amount; + } + } + Item::Text(shaped) => { + let frame = shaped.build( + engine, + &p.spans, + justification_ratio, + extra_justification, + ); + push(&mut offset, frame.post_processed(shaped.styles)); + } + Item::Frame(frame, styles) => { + let mut frame = frame.clone(); + frame.translate(Point::with_y(TextElem::baseline_in(*styles))); + push(&mut offset, frame.post_processed(*styles)); + } + Item::Tag(tag) => { + let mut frame = Frame::soft(Size::zero()); + frame.push(Point::zero(), FrameItem::Tag((*tag).clone())); + frames.push((offset, frame)); + } + Item::Skip(_) => {} + } + } + + // Remaining space is distributed now. + if !fr.is_zero() { + remaining = Abs::zero(); + } + + let size = Size::new(width, top + bottom); + let mut output = Frame::soft(size); + output.set_baseline(top); + + add_par_line_marker(&mut output, styles, engine, locator, top); + + // Construct the line's frame. + for (offset, frame) in frames { + let x = offset + p.align.position(remaining); + let y = top - frame.baseline(); + output.push_frame(Point::new(x, y), frame); + } + + Ok(output) +} + +/// Adds a paragraph line marker to a paragraph line's output frame if +/// line numbering is not `None` at this point. Ensures other style properties, +/// namely number margin, number align and number clearance, are stored in the +/// marker as well. +/// +/// The `top` parameter is used to ensure the marker, and thus the line's +/// number in the margin, is aligned to the line's baseline. +fn add_par_line_marker( + output: &mut Frame, + styles: StyleChain, + engine: &mut Engine, + locator: &mut SplitLocator, + top: Abs, +) { + let Some(numbering) = ParLine::numbering_in(styles) else { return }; + let margin = ParLine::number_margin_in(styles); + let align = ParLine::number_align_in(styles); + + // Delay resolving the number clearance until line numbers are laid out to + // avoid inconsistent spacing depending on varying font size. + let clearance = ParLine::number_clearance_in(styles); + + // Elements in tags must have a location for introspection to work. We do + // the work here instead of going through all of the realization process + // just for this, given we don't need to actually place the marker as we + // manually search for it in the frame later (when building a root flow, + // where line numbers can be displayed), so we just need it to be in a tag + // and to be valid (to have a location). + let mut marker = ParLineMarker::new(numbering, align, margin, clearance).pack(); + let key = typst_utils::hash128(&marker); + let loc = locator.next_location(engine.introspector, key); + marker.set_location(loc); + + // Create start and end tags through which we can search for this line's + // marker later. The 'x' coordinate is not important, just the 'y' + // coordinate, as that's what is used for line numbers. We will place the + // tags among other subframes in the line such that it is aligned with the + // line's general baseline. However, the line number will still need to + // manually adjust its own 'y' position based on its own baseline. + let pos = Point::with_y(top); + output.push(pos, FrameItem::Tag(Tag::Start(marker))); + output.push(pos, FrameItem::Tag(Tag::End(loc, key))); +} + +/// How much a character should hang into the end margin. +/// +/// For more discussion, see: +/// <https://recoveringphysicist.com/21/> +fn overhang(c: char) -> f64 { + match c { + // Dashes. + '–' | '—' => 0.2, + '-' => 0.55, + + // Punctuation. + '.' | ',' => 0.8, + ':' | ';' => 0.3, + + // Arabic + '\u{60C}' | '\u{6D4}' => 0.4, + + _ => 0.0, + } +} + +/// A collection of owned or borrowed paragraph items. +pub struct Items<'a>(Vec<ItemEntry<'a>>); + +impl<'a> Items<'a> { + /// Create empty items. + pub fn new() -> Self { + Self(vec![]) + } + + /// Push a new item. + pub fn push(&mut self, entry: impl Into<ItemEntry<'a>>) { + self.0.push(entry.into()); + } + + /// Iterate over the items + pub fn iter(&self) -> impl Iterator<Item = &Item<'a>> { + self.0.iter().map(|item| &**item) + } + + /// Access the first item. + pub fn first(&self) -> Option<&Item<'a>> { + self.0.first().map(|item| &**item) + } + + /// Access the last item. + pub fn last(&self) -> Option<&Item<'a>> { + self.0.last().map(|item| &**item) + } + + /// Access the first item mutably, if it is text. + pub fn first_text_mut(&mut self) -> Option<&mut ShapedText<'a>> { + self.0.first_mut()?.text_mut() + } + + /// Access the last item mutably, if it is text. + pub fn last_text_mut(&mut self) -> Option<&mut ShapedText<'a>> { + self.0.last_mut()?.text_mut() + } + + /// Reorder the items starting at the given index to RTL. + pub fn reorder(&mut self, from: usize) { + self.0[from..].reverse() + } +} + +impl<'a> FromIterator<ItemEntry<'a>> for Items<'a> { + fn from_iter<I: IntoIterator<Item = ItemEntry<'a>>>(iter: I) -> Self { + Self(iter.into_iter().collect()) + } +} + +impl<'a> Deref for Items<'a> { + type Target = Vec<ItemEntry<'a>>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<'a> DerefMut for Items<'a> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl Debug for Items<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_list().entries(&self.0).finish() + } +} + +/// A reference to or a boxed item. +pub enum ItemEntry<'a> { + Ref(&'a Item<'a>), + Box(Box<Item<'a>>), +} + +impl<'a> ItemEntry<'a> { + fn text_mut(&mut self) -> Option<&mut ShapedText<'a>> { + match self { + Self::Ref(item) => { + let text = item.text()?; + *self = Self::Box(Box::new(Item::Text(text.clone()))); + match self { + Self::Box(item) => item.text_mut(), + _ => unreachable!(), + } + } + Self::Box(item) => item.text_mut(), + } + } +} + +impl<'a> Deref for ItemEntry<'a> { + type Target = Item<'a>; + + fn deref(&self) -> &Self::Target { + match self { + Self::Ref(item) => item, + Self::Box(item) => item, + } + } +} + +impl Debug for ItemEntry<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + (**self).fmt(f) + } +} + +impl<'a> From<&'a Item<'a>> for ItemEntry<'a> { + fn from(item: &'a Item<'a>) -> Self { + Self::Ref(item) + } +} + +impl<'a> From<Item<'a>> for ItemEntry<'a> { + fn from(item: Item<'a>) -> Self { + Self::Box(Box::new(item)) + } +} diff --git a/crates/typst-layout/src/inline/linebreak.rs b/crates/typst-layout/src/inline/linebreak.rs new file mode 100644 index 00000000..7fc8b368 --- /dev/null +++ b/crates/typst-layout/src/inline/linebreak.rs @@ -0,0 +1,980 @@ +use std::ops::{Add, Sub}; + +use az::SaturatingAs; +use icu_properties::maps::{CodePointMapData, CodePointMapDataBorrowed}; +use icu_properties::LineBreak; +use icu_provider::AsDeserializingBufferProvider; +use icu_provider_adapters::fork::ForkByKeyProvider; +use icu_provider_blob::BlobDataProvider; +use icu_segmenter::LineSegmenter; +use once_cell::sync::Lazy; +use typst_library::engine::Engine; +use typst_library::layout::{Abs, Em}; +use typst_library::model::Linebreaks; +use typst_library::text::{is_default_ignorable, Lang, TextElem}; +use typst_syntax::link_prefix; +use unicode_segmentation::UnicodeSegmentation; + +use super::*; + +/// The cost of a line or paragraph layout. +type Cost = f64; + +// Cost parameters. +// +// We choose higher costs than the Knuth-Plass paper (which would be 50) because +// it hyphenates way to eagerly in Typst otherwise. Could be related to the +// ratios coming out differently since Typst doesn't have the concept of glue, +// so things work a bit differently. +const DEFAULT_HYPH_COST: Cost = 135.0; +const DEFAULT_RUNT_COST: Cost = 100.0; + +// Other parameters. +const MIN_RATIO: f64 = -1.0; +const MIN_APPROX_RATIO: f64 = -0.5; +const BOUND_EPS: f64 = 1e-3; + +/// The ICU blob data. +fn blob() -> BlobDataProvider { + BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap() +} + +/// The general line break segmenter. +static SEGMENTER: Lazy<LineSegmenter> = + Lazy::new(|| LineSegmenter::try_new_lstm_with_buffer_provider(&blob()).unwrap()); + +/// The line break segmenter for Chinese/Japanese text. +static CJ_SEGMENTER: Lazy<LineSegmenter> = Lazy::new(|| { + let cj_blob = + BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU_CJ_SEGMENT) + .unwrap(); + let cj_provider = ForkByKeyProvider::new(cj_blob, blob()); + LineSegmenter::try_new_lstm_with_buffer_provider(&cj_provider).unwrap() +}); + +/// The Unicode line break properties for each code point. +static LINEBREAK_DATA: Lazy<CodePointMapData<LineBreak>> = Lazy::new(|| { + icu_properties::maps::load_line_break(&blob().as_deserializing()).unwrap() +}); + +/// A line break opportunity. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Breakpoint { + /// Just a normal opportunity (e.g. after a space). + Normal, + /// A mandatory breakpoint (after '\n' or at the end of the text). + Mandatory, + /// An opportunity for hyphenating and how many chars are before/after it + /// in the word. + Hyphen(u8, u8), +} + +impl Breakpoint { + /// Trim a line before this breakpoint. + pub fn trim(self, line: &str) -> &str { + // Trim default ignorables. + let line = line.trim_end_matches(is_default_ignorable); + + match self { + // Trim whitespace. + Self::Normal => line.trim_end_matches(char::is_whitespace), + + // Trim linebreaks. + Self::Mandatory => { + let lb = LINEBREAK_DATA.as_borrowed(); + line.trim_end_matches(|c| { + matches!( + lb.get(c), + LineBreak::MandatoryBreak + | LineBreak::CarriageReturn + | LineBreak::LineFeed + | LineBreak::NextLine + ) + }) + } + + // Trim nothing further. + Self::Hyphen(..) => line, + } + } + + /// Whether this is a hyphen breakpoint. + pub fn is_hyphen(self) -> bool { + matches!(self, Self::Hyphen(..)) + } +} + +/// Breaks the paragraph into lines. +pub fn linebreak<'a>( + engine: &Engine, + p: &'a Preparation<'a>, + width: Abs, +) -> Vec<Line<'a>> { + let linebreaks = p.linebreaks.unwrap_or_else(|| { + if p.justify { + Linebreaks::Optimized + } else { + Linebreaks::Simple + } + }); + + match linebreaks { + Linebreaks::Simple => linebreak_simple(engine, p, width), + Linebreaks::Optimized => linebreak_optimized(engine, p, width), + } +} + +/// Performs line breaking in simple first-fit style. This means that we build +/// lines greedily, always taking the longest possible line. This may lead to +/// very unbalanced line, but is fast and simple. +#[typst_macros::time] +fn linebreak_simple<'a>( + engine: &Engine, + p: &'a Preparation<'a>, + width: Abs, +) -> Vec<Line<'a>> { + let mut lines = Vec::with_capacity(16); + let mut start = 0; + let mut last = None; + + breakpoints(p, |end, breakpoint| { + // Compute the line and its size. + let mut attempt = line(engine, p, start..end, breakpoint, lines.last()); + + // If the line doesn't fit anymore, we push the last fitting attempt + // into the stack and rebuild the line from the attempt's end. The + // resulting line cannot be broken up further. + if !width.fits(attempt.width) { + if let Some((last_attempt, last_end)) = last.take() { + lines.push(last_attempt); + start = last_end; + attempt = line(engine, p, start..end, breakpoint, lines.last()); + } + } + + // Finish the current line if there is a mandatory line break (i.e. due + // to "\n") or if the line doesn't fit horizontally already since then + // no shorter line will be possible. + if breakpoint == Breakpoint::Mandatory || !width.fits(attempt.width) { + lines.push(attempt); + start = end; + last = None; + } else { + last = Some((attempt, end)); + } + }); + + if let Some((line, _)) = last { + lines.push(line); + } + + lines +} + +/// Performs line breaking in optimized Knuth-Plass style. Here, we use more +/// context to determine the line breaks than in the simple first-fit style. For +/// example, we might choose to cut a line short even though there is still a +/// bit of space to improve the fit of one of the following lines. The +/// Knuth-Plass algorithm is based on the idea of "cost". A line which has a +/// very tight or very loose fit has a higher cost than one that is just right. +/// Ending a line with a hyphen incurs extra cost and endings two successive +/// lines with hyphens even more. +/// +/// To find the layout with the minimal total cost the algorithm uses dynamic +/// programming: For each possible breakpoint it determines the optimal +/// paragraph layout _up to that point_. It walks over all possible start points +/// for a line ending at that point and finds the one for which the cost of the +/// line plus the cost of the optimal paragraph up to the start point (already +/// computed and stored in dynamic programming table) is minimal. The final +/// result is simply the layout determined for the last breakpoint at the end of +/// text. +#[typst_macros::time] +fn linebreak_optimized<'a>( + engine: &Engine, + p: &'a Preparation<'a>, + width: Abs, +) -> Vec<Line<'a>> { + let metrics = CostMetrics::compute(p); + + // Determines the exact costs of a likely good layout through Knuth-Plass + // with approximate metrics. We can use this cost as an upper bound to prune + // the search space in our proper optimization pass below. + let upper_bound = linebreak_optimized_approximate(engine, p, width, &metrics); + + // Using the upper bound, perform exact optimized linebreaking. + linebreak_optimized_bounded(engine, p, width, &metrics, upper_bound) +} + +/// Performs line breaking in optimized Knuth-Plass style, but with an upper +/// bound on the cost. This allows us to skip many parts of the search space. +#[typst_macros::time] +fn linebreak_optimized_bounded<'a>( + engine: &Engine, + p: &'a Preparation<'a>, + width: Abs, + metrics: &CostMetrics, + upper_bound: Cost, +) -> Vec<Line<'a>> { + /// An entry in the dynamic programming table for paragraph optimization. + struct Entry<'a> { + pred: usize, + total: Cost, + line: Line<'a>, + end: usize, + } + + // Dynamic programming table. + let mut table = vec![Entry { pred: 0, total: 0.0, line: Line::empty(), end: 0 }]; + + let mut active = 0; + let mut prev_end = 0; + + breakpoints(p, |end, breakpoint| { + // Find the optimal predecessor. + let mut best: Option<Entry> = None; + + // A lower bound for the cost of all following line attempts. + let mut line_lower_bound = None; + + for (pred_index, pred) in table.iter().enumerate().skip(active) { + let start = pred.end; + let unbreakable = prev_end == start; + + // If the minimum cost we've established for the line is already + // too much, skip this attempt. + if line_lower_bound + .is_some_and(|lower| pred.total + lower > upper_bound + BOUND_EPS) + { + continue; + } + + // Build the line. + let attempt = line(engine, p, start..end, breakpoint, Some(&pred.line)); + + // Determine the cost of the line and its stretch ratio. + let (line_ratio, line_cost) = ratio_and_cost( + p, + metrics, + width, + &pred.line, + &attempt, + breakpoint, + unbreakable, + ); + + // If the line is overfull, we adjust the set of active candidate + // line starts. This is the case if + // - justification is on, but we'd need to shrink too much + // - justification is off and the line just doesn't fit + // + // If this is the earliest breakpoint in the active set + // (active == i), remove it from the active set. If there is an + // earlier one (active < i), then the logically shorter line was + // in fact longer (can happen with negative spacing) and we + // can't trim the active set just yet. + if line_ratio < metrics.min_ratio && active == pred_index { + active += 1; + } + + // The total cost of this line and its chain of predecessors. + let total = pred.total + line_cost; + + // If the line is already underfull (`line_ratio > 0`), any shorter + // slice of the line will be even more underfull. So it'll only get + // worse from here and further attempts would also have a cost + // exceeding `bound`. There is one exception: When the line has + // negative spacing, we can't know for sure, so we don't assign the + // lower bound in that case. + if line_ratio > 0.0 + && line_lower_bound.is_none() + && !attempt.has_negative_width_items() + { + line_lower_bound = Some(line_cost); + } + + // If the cost already exceeds the upper bound, we don't need to + // integrate this result into the table. + if total > upper_bound + BOUND_EPS { + continue; + } + + // If this attempt is better than what we had before, take it! + if best.as_ref().map_or(true, |best| best.total >= total) { + best = Some(Entry { pred: pred_index, total, line: attempt, end }); + } + } + + // If this is a mandatory break, all breakpoints before this one become + // inactive since no line can span over the mandatory break. + if breakpoint == Breakpoint::Mandatory { + active = table.len(); + } + + table.extend(best); + prev_end = end; + }); + + // Retrace the best path. + let mut lines = Vec::with_capacity(16); + let mut idx = table.len() - 1; + + // This should only happen if our bound was faulty. Which shouldn't happen! + if table[idx].end != p.text.len() { + #[cfg(debug_assertions)] + panic!("bounded paragraph layout is incomplete"); + + #[cfg(not(debug_assertions))] + return linebreak_optimized_bounded(engine, p, width, metrics, Cost::INFINITY); + } + + while idx != 0 { + table.truncate(idx + 1); + let entry = table.pop().unwrap(); + lines.push(entry.line); + idx = entry.pred; + } + + lines.reverse(); + lines +} + +/// Runs the normal Knuth-Plass algorithm, but instead of building proper lines +/// (which is costly) to determine costs, it determines approximate costs using +/// cumulative arrays. +/// +/// This results in a likely good paragraph layouts, for which we then compute +/// the exact cost. This cost is an upper bound for proper optimized +/// linebreaking. We can use it to heavily prune the search space. +#[typst_macros::time] +fn linebreak_optimized_approximate( + engine: &Engine, + p: &Preparation, + width: Abs, + metrics: &CostMetrics, +) -> Cost { + // Determine the cumulative estimation metrics. + let estimates = Estimates::compute(p); + + /// An entry in the dynamic programming table for paragraph optimization. + struct Entry { + pred: usize, + total: Cost, + end: usize, + unbreakable: bool, + breakpoint: Breakpoint, + } + + // Dynamic programming table. + let mut table = vec![Entry { + pred: 0, + total: 0.0, + end: 0, + unbreakable: false, + breakpoint: Breakpoint::Mandatory, + }]; + + let mut active = 0; + let mut prev_end = 0; + + breakpoints(p, |end, breakpoint| { + // Find the optimal predecessor. + let mut best: Option<Entry> = None; + for (pred_index, pred) in table.iter().enumerate().skip(active) { + let start = pred.end; + let unbreakable = prev_end == start; + + // Whether the line is justified. This is not 100% accurate w.r.t + // to line()'s behaviour, but good enough. + let justify = p.justify && breakpoint != Breakpoint::Mandatory; + + // We don't really know whether the line naturally ends with a dash + // here, so we can miss that case, but it's ok, since all of this + // just an estimate. + let consecutive_dash = pred.breakpoint.is_hyphen() && breakpoint.is_hyphen(); + + // Estimate how much the line's spaces would need to be stretched to + // make it the desired width. We trim at the end to not take into + // account trailing spaces. This is, again, only an approximation of + // the real behaviour of `line`. + let trimmed_end = start + p.text[start..end].trim_end().len(); + let line_ratio = raw_ratio( + p, + width, + estimates.widths.estimate(start..trimmed_end) + + if breakpoint.is_hyphen() { + metrics.approx_hyphen_width + } else { + Abs::zero() + }, + estimates.stretchability.estimate(start..trimmed_end), + estimates.shrinkability.estimate(start..trimmed_end), + estimates.justifiables.estimate(start..trimmed_end), + ); + + // Determine the line's cost. + let line_cost = raw_cost( + metrics, + breakpoint, + line_ratio, + justify, + unbreakable, + consecutive_dash, + true, + ); + + // Adjust the set of active breakpoints. + // See `linebreak_optimized` for details. + if line_ratio < metrics.min_ratio && active == pred_index { + active += 1; + } + + // The total cost of this line and its chain of predecessors. + let total = pred.total + line_cost; + + // If this attempt is better than what we had before, take it! + if best.as_ref().map_or(true, |best| best.total >= total) { + best = Some(Entry { + pred: pred_index, + total, + end, + unbreakable, + breakpoint, + }); + } + } + + // If this is a mandatory break, all breakpoints before this one become + // inactive. + if breakpoint == Breakpoint::Mandatory { + active = table.len(); + } + + table.extend(best); + prev_end = end; + }); + + // Retrace the best path. + let mut indices = Vec::with_capacity(16); + let mut idx = table.len() - 1; + while idx != 0 { + indices.push(idx); + idx = table[idx].pred; + } + + let mut pred = Line::empty(); + let mut start = 0; + let mut exact = 0.0; + + // The cost that we optimized was only an approximate cost, so the layout we + // got here is only likely to be good, not guaranteed to be the best. We now + // computes its exact cost as that gives us a sound upper bound for the + // proper optimization pass. + for idx in indices.into_iter().rev() { + let Entry { end, breakpoint, unbreakable, .. } = table[idx]; + + let attempt = line(engine, p, start..end, breakpoint, Some(&pred)); + let (ratio, line_cost) = + ratio_and_cost(p, metrics, width, &pred, &attempt, breakpoint, unbreakable); + + // If approximation produces a valid layout without too much shrinking, + // exact layout is guaranteed to find the same layout. If, however, the + // line is overfull, we do not have this guarantee. Then, our bound + // becomes useless and actively harmful (it could be lower than what + // optimal layout produces). Thus, we immediately bail with an infinite + // bound in this case. + if ratio < metrics.min_ratio { + return Cost::INFINITY; + } + + pred = attempt; + start = end; + exact += line_cost; + } + + exact +} + +/// Compute the stretch ratio and cost of a line. +#[allow(clippy::too_many_arguments)] +fn ratio_and_cost( + p: &Preparation, + metrics: &CostMetrics, + available_width: Abs, + pred: &Line, + attempt: &Line, + breakpoint: Breakpoint, + unbreakable: bool, +) -> (f64, Cost) { + let ratio = raw_ratio( + p, + available_width, + attempt.width, + attempt.stretchability(), + attempt.shrinkability(), + attempt.justifiables(), + ); + + let cost = raw_cost( + metrics, + breakpoint, + ratio, + attempt.justify, + unbreakable, + pred.dash.is_some() && attempt.dash.is_some(), + false, + ); + + (ratio, cost) +} + +/// Determine the stretch ratio for a line given raw metrics. +/// +/// - A ratio < min_ratio indicates an overfull line. +/// - A negative ratio indicates a line that needs shrinking. +/// - A ratio of zero indicates a perfect line. +/// - A positive ratio indicates a line that needs stretching. +fn raw_ratio( + p: &Preparation, + available_width: Abs, + line_width: Abs, + stretchability: Abs, + shrinkability: Abs, + justifiables: usize, +) -> f64 { + // Determine how much the line's spaces would need to be stretched + // to make it the desired width. + let delta = available_width - line_width; + + // Determine how much stretch or shrink is natural. + let adjustability = if delta >= Abs::zero() { stretchability } else { shrinkability }; + + // Observations: + // - `delta` is negative for a line that needs shrinking and positive for a + // line that needs stretching. + // - `adjustability` must be non-negative to make sense. + // - `ratio` inherits the sign of `delta`. + let mut ratio = delta / adjustability.max(Abs::zero()); + + // The most likely cause of a NaN result is that `delta` was zero. This + // often happens with monospace fonts and CJK texts. It means that the line + // already fits perfectly, so `ratio` should be zero then. + if ratio.is_nan() { + ratio = 0.0; + } + + // If the ratio exceeds 1, we should stretch above the natural + // stretchability using justifiables. + if ratio > 1.0 { + // We should stretch the line above its stretchability. Now + // calculate the extra amount. Also, don't divide by zero. + let extra_stretch = (delta - adjustability) / justifiables.max(1) as f64; + // Normalize the amount by half the em size. + ratio = 1.0 + extra_stretch / (p.size / 2.0); + } + + // The min value must be < MIN_RATIO, but how much smaller doesn't matter + // since overfull lines have hard-coded huge costs anyway. + // + // The max value is clamped to 10 since it doesn't really matter whether a + // line is stretched 10x or 20x. + ratio.clamp(MIN_RATIO - 1.0, 10.0) +} + +/// Compute the cost of a line given raw metrics. +/// +/// This mostly follows the formula in the Knuth-Plass paper, but there are some +/// adjustments. +fn raw_cost( + metrics: &CostMetrics, + breakpoint: Breakpoint, + ratio: f64, + justify: bool, + unbreakable: bool, + consecutive_dash: bool, + approx: bool, +) -> Cost { + // Determine the stretch/shrink cost of the line. + let badness = if ratio < metrics.min_ratio(approx) { + // Overfull line always has maximum cost. + 1_000_000.0 + } else if breakpoint != Breakpoint::Mandatory || justify || ratio < 0.0 { + // If the line shall be justified or needs shrinking, it has normal + // badness with cost 100|ratio|^3. We limit the ratio to 10 as to not + // get to close to our maximum cost. + 100.0 * ratio.abs().powi(3) + } else { + // If the line shouldn't be justified and doesn't need shrink, we don't + // pay any cost. + 0.0 + }; + + // Compute penalties. + let mut penalty = 0.0; + + // Penalize runts (lone words before a mandatory break / at the end). + if unbreakable && breakpoint == Breakpoint::Mandatory { + penalty += metrics.runt_cost; + } + + // Penalize hyphenation. + if let Breakpoint::Hyphen(l, r) = breakpoint { + // We penalize hyphenations close to the edges of the word (< LIMIT + // chars) extra. For each step of distance from the limit, we add 15% + // to the cost. + const LIMIT: u8 = 5; + let steps = LIMIT.saturating_sub(l) + LIMIT.saturating_sub(r); + let extra = 0.15 * steps as f64; + penalty += (1.0 + extra) * metrics.hyph_cost; + } + + // Penalize two consecutive dashes extra (not necessarily hyphens). + // Knuth-Plass does this separately after the squaring, with a higher cost, + // but I couldn't find any explanation as to why. + if consecutive_dash { + penalty += metrics.hyph_cost; + } + + // From the Knuth-Plass Paper: $ (1 + beta_j + pi_j)^2 $. + // + // We add one to minimize the number of lines when everything else is more + // or less equal. + (1.0 + badness + penalty).powi(2) +} + +/// Calls `f` for all possible points in the text where lines can broken. +/// +/// Yields for each breakpoint the text index, whether the break is mandatory +/// (after `\n`) and whether a hyphen is required (when breaking inside of a +/// word). +/// +/// This is an internal instead of an external iterator because it makes the +/// code much simpler and the consumers of this function don't need the +/// composability and flexibility of external iteration anyway. +fn breakpoints(p: &Preparation, mut f: impl FnMut(usize, Breakpoint)) { + let text = p.text; + + // Single breakpoint at the end for empty text. + if text.is_empty() { + f(0, Breakpoint::Mandatory); + return; + } + + let hyphenate = p.hyphenate != Some(false); + let lb = LINEBREAK_DATA.as_borrowed(); + let segmenter = match p.lang { + Some(Lang::CHINESE | Lang::JAPANESE) => &CJ_SEGMENTER, + _ => &SEGMENTER, + }; + + let mut last = 0; + let mut iter = segmenter.segment_str(text).peekable(); + + loop { + // Special case for links. UAX #14 doesn't handle them well. + let (head, tail) = text.split_at(last); + if head.ends_with("://") || tail.starts_with("www.") { + let (link, _) = link_prefix(tail); + linebreak_link(link, |i| f(last + i, Breakpoint::Normal)); + last += link.len(); + while iter.peek().is_some_and(|&p| p < last) { + iter.next(); + } + } + + // Get the next UAX #14 linebreak opportunity. + let Some(point) = iter.next() else { break }; + + // Skip breakpoint if there is no char before it. icu4x generates one + // at offset 0, but we don't want it. + let Some(c) = text[..point].chars().next_back() else { continue }; + + // Find out whether the last break was mandatory by checking against + // rules LB4 and LB5, special-casing the end of text according to LB3. + // See also: https://docs.rs/icu_segmenter/latest/icu_segmenter/struct.LineSegmenter.html + let breakpoint = if point == text.len() { + Breakpoint::Mandatory + } else { + match lb.get(c) { + // Fix for: https://github.com/unicode-org/icu4x/issues/4146 + LineBreak::Glue | LineBreak::WordJoiner | LineBreak::ZWJ => continue, + LineBreak::MandatoryBreak + | LineBreak::CarriageReturn + | LineBreak::LineFeed + | LineBreak::NextLine => Breakpoint::Mandatory, + _ => Breakpoint::Normal, + } + }; + + // Hyphenate between the last and current breakpoint. + if hyphenate && last < point { + for segment in text[last..point].split_word_bounds() { + if !segment.is_empty() && segment.chars().all(char::is_alphabetic) { + hyphenations(p, &lb, last, segment, &mut f); + } + last += segment.len(); + } + } + + // Call `f` for the UAX #14 break opportunity. + f(point, breakpoint); + last = point; + } +} + +/// Generate breakpoints for hyphenations within a word. +fn hyphenations( + p: &Preparation, + lb: &CodePointMapDataBorrowed<LineBreak>, + mut offset: usize, + word: &str, + mut f: impl FnMut(usize, Breakpoint), +) { + let Some(lang) = lang_at(p, offset) else { return }; + let count = word.chars().count(); + let end = offset + word.len(); + + let mut chars = 0; + for syllable in hypher::hyphenate(word, lang) { + offset += syllable.len(); + chars += syllable.chars().count(); + + // Don't hyphenate after the final syllable. + if offset == end { + continue; + } + + // Filter out hyphenation opportunities where hyphenation was actually + // disabled. + if !hyphenate_at(p, offset) { + continue; + } + + // Filter out forbidden hyphenation opportunities. + if matches!( + syllable.chars().next_back().map(|c| lb.get(c)), + Some(LineBreak::Glue | LineBreak::WordJoiner | LineBreak::ZWJ) + ) { + continue; + } + + // Determine the number of codepoints before and after the hyphenation. + let l = chars.saturating_as::<u8>(); + let r = (count - chars).saturating_as::<u8>(); + + // Call `f` for the word-internal hyphenation opportunity. + f(offset, Breakpoint::Hyphen(l, r)); + } +} + +/// Produce linebreak opportunities for a link. +fn linebreak_link(link: &str, mut f: impl FnMut(usize)) { + #[derive(PartialEq)] + enum Class { + Alphabetic, + Digit, + Open, + Other, + } + + impl Class { + fn of(c: char) -> Self { + if c.is_alphabetic() { + Class::Alphabetic + } else if c.is_numeric() { + Class::Digit + } else if matches!(c, '(' | '[') { + Class::Open + } else { + Class::Other + } + } + } + + let mut offset = 0; + let mut prev = Class::Other; + + for (end, c) in link.char_indices() { + let class = Class::of(c); + + // Emit opportunities when going from + // - other -> other + // - alphabetic -> numeric + // - numeric -> alphabetic + // Never before/after opening delimiters. + if end > 0 + && prev != Class::Open + && if class == Class::Other { prev == Class::Other } else { class != prev } + { + let piece = &link[offset..end]; + if piece.len() < 16 { + // For bearably long segments, emit them as one. + offset = end; + f(offset); + } else { + // If it gets very long (e.g. a hash in the URL), just allow a + // break at every char. + for c in piece.chars() { + offset += c.len_utf8(); + f(offset); + } + } + } + + prev = class; + } +} + +/// Whether hyphenation is enabled at the given offset. +fn hyphenate_at(p: &Preparation, offset: usize) -> bool { + p.hyphenate + .or_else(|| { + let (_, item) = p.get(offset); + let styles = item.text()?.styles; + Some(TextElem::hyphenate_in(styles)) + }) + .unwrap_or(false) +} + +/// The text language at the given offset. +fn lang_at(p: &Preparation, offset: usize) -> Option<hypher::Lang> { + let lang = p.lang.or_else(|| { + let (_, item) = p.get(offset); + let styles = item.text()?.styles; + Some(TextElem::lang_in(styles)) + })?; + + let bytes = lang.as_str().as_bytes().try_into().ok()?; + hypher::Lang::from_iso(bytes) +} + +/// Resolved metrics relevant for cost computation. +struct CostMetrics { + min_ratio: f64, + min_approx_ratio: f64, + approx_hyphen_width: Abs, + hyph_cost: Cost, + runt_cost: Cost, +} + +impl CostMetrics { + /// Compute shared metrics for paragraph optimization. + fn compute(p: &Preparation) -> Self { + Self { + // When justifying, we may stretch spaces below their natural width. + min_ratio: if p.justify { MIN_RATIO } else { 0.0 }, + min_approx_ratio: if p.justify { MIN_APPROX_RATIO } else { 0.0 }, + // Approximate hyphen width for estimates. + approx_hyphen_width: Em::new(0.33).at(p.size), + // Costs. + hyph_cost: DEFAULT_HYPH_COST * p.costs.hyphenation().get(), + runt_cost: DEFAULT_RUNT_COST * p.costs.runt().get(), + } + } + + /// The minimum line ratio we allow for shrinking. For approximate layout, + /// we allow less because otherwise we get an invalid layout fairly often, + /// which makes our bound useless. + fn min_ratio(&self, approx: bool) -> f64 { + if approx { + self.min_approx_ratio + } else { + self.min_ratio + } + } +} + +/// Estimated line metrics. +/// +/// Allows to get a quick estimate of a metric for a line between two byte +/// positions. +struct Estimates { + widths: CumulativeVec<Abs>, + stretchability: CumulativeVec<Abs>, + shrinkability: CumulativeVec<Abs>, + justifiables: CumulativeVec<usize>, +} + +impl Estimates { + /// Compute estimations for approximate Knuth-Plass layout. + fn compute(p: &Preparation) -> Self { + let cap = p.text.len(); + + let mut widths = CumulativeVec::with_capacity(cap); + let mut stretchability = CumulativeVec::with_capacity(cap); + let mut shrinkability = CumulativeVec::with_capacity(cap); + let mut justifiables = CumulativeVec::with_capacity(cap); + + for (range, item) in p.items.iter() { + if let Item::Text(shaped) = item { + for g in shaped.glyphs.iter() { + let byte_len = g.range.len(); + let stretch = g.stretchability().0 + g.stretchability().1; + let shrink = g.shrinkability().0 + g.shrinkability().1; + widths.push(byte_len, g.x_advance.at(shaped.size)); + stretchability.push(byte_len, stretch.at(shaped.size)); + shrinkability.push(byte_len, shrink.at(shaped.size)); + justifiables.push(byte_len, g.is_justifiable() as usize); + } + } else { + widths.push(range.len(), item.natural_width()); + } + + widths.adjust(range.end); + stretchability.adjust(range.end); + shrinkability.adjust(range.end); + justifiables.adjust(range.end); + } + + Self { + widths, + stretchability, + shrinkability, + justifiables, + } + } +} + +/// An accumulative array of a metric. +struct CumulativeVec<T> { + total: T, + summed: Vec<T>, +} + +impl<T> CumulativeVec<T> +where + T: Default + Copy + Add<Output = T> + Sub<Output = T>, +{ + /// Create a new instance with the given capacity. + fn with_capacity(capacity: usize) -> Self { + let total = T::default(); + let mut summed = Vec::with_capacity(capacity); + summed.push(total); + Self { total, summed } + } + + /// Adjust to cover the given byte length. + fn adjust(&mut self, len: usize) { + self.summed.resize(len, self.total); + } + + /// Adds a new segment with the given byte length and metric. + fn push(&mut self, byte_len: usize, metric: T) { + self.total = self.total + metric; + for _ in 0..byte_len { + self.summed.push(self.total); + } + } + + /// Estimates the metrics for the line spanned by the range. + fn estimate(&self, range: Range) -> T { + self.get(range.end) - self.get(range.start) + } + + /// Get the metric at the given byte position. + fn get(&self, index: usize) -> T { + match index.checked_sub(1) { + None => T::default(), + Some(i) => self.summed[i], + } + } +} diff --git a/crates/typst-layout/src/inline/mod.rs b/crates/typst-layout/src/inline/mod.rs new file mode 100644 index 00000000..658e3084 --- /dev/null +++ b/crates/typst-layout/src/inline/mod.rs @@ -0,0 +1,105 @@ +#[path = "box.rs"] +mod box_; +mod collect; +mod deco; +mod finalize; +mod line; +mod linebreak; +mod prepare; +mod shaping; + +pub use self::box_::layout_box; + +use comemo::{Track, Tracked, TrackedMut}; +use typst_library::diag::SourceResult; +use typst_library::engine::{Engine, Route, Sink, Traced}; +use typst_library::foundations::{StyleChain, StyleVec}; +use typst_library::introspection::{Introspector, Locator, LocatorLink}; +use typst_library::layout::{Fragment, Size}; +use typst_library::model::ParElem; +use typst_library::routines::Routines; +use typst_library::World; + +use self::collect::{collect, Item, Segment, SpanMapper}; +use self::deco::decorate; +use self::finalize::finalize; +use self::line::{commit, line, Line}; +use self::linebreak::{linebreak, Breakpoint}; +use self::prepare::{prepare, Preparation}; +use self::shaping::{ + cjk_punct_style, is_of_cj_script, shape_range, ShapedGlyph, ShapedText, + BEGIN_PUNCT_PAT, END_PUNCT_PAT, +}; + +/// Range of a substring of text. +type Range = std::ops::Range<usize>; + +/// Layouts content inline. +pub fn layout_inline( + engine: &mut Engine, + children: &StyleVec, + locator: Locator, + styles: StyleChain, + consecutive: bool, + region: Size, + expand: bool, +) -> SourceResult<Fragment> { + layout_inline_impl( + children, + engine.routines, + engine.world, + engine.introspector, + engine.traced, + TrackedMut::reborrow_mut(&mut engine.sink), + engine.route.track(), + locator.track(), + styles, + consecutive, + region, + expand, + ) +} + +/// The internal, memoized implementation of `layout_inline`. +#[comemo::memoize] +#[allow(clippy::too_many_arguments)] +fn layout_inline_impl( + children: &StyleVec, + routines: &Routines, + world: Tracked<dyn World + '_>, + introspector: Tracked<Introspector>, + traced: Tracked<Traced>, + sink: TrackedMut<Sink>, + route: Tracked<Route>, + locator: Tracked<Locator>, + styles: StyleChain, + consecutive: bool, + region: Size, + expand: bool, +) -> SourceResult<Fragment> { + let link = LocatorLink::new(locator); + let locator = Locator::link(&link); + let mut engine = Engine { + routines, + world, + introspector, + traced, + sink, + route: Route::extend(route), + }; + + let mut locator = locator.split(); + + // Collect all text into one string for BiDi analysis. + let (text, segments, spans) = + collect(children, &mut engine, &mut locator, &styles, region, consecutive)?; + + // Perform BiDi analysis and then prepares paragraph layout. + let p = prepare(&mut engine, children, &text, segments, spans, styles)?; + + // Break the paragraph into lines. + let lines = linebreak(&engine, &p, region.x - p.hang); + + // Turn the selected lines into frames. + finalize(&mut engine, &p, &lines, styles, region, expand, &mut locator) +} diff --git a/crates/typst-layout/src/inline/prepare.rs b/crates/typst-layout/src/inline/prepare.rs new file mode 100644 index 00000000..2dd79aec --- /dev/null +++ b/crates/typst-layout/src/inline/prepare.rs @@ -0,0 +1,196 @@ +use typst_library::foundations::{Resolve, Smart}; +use typst_library::layout::{Abs, AlignElem, Dir, Em, FixedAlignment}; +use typst_library::model::Linebreaks; +use typst_library::text::{Costs, Lang, TextElem}; +use unicode_bidi::{BidiInfo, Level as BidiLevel}; + +use super::*; + +/// A paragraph representation in which children are already layouted and text +/// is already preshaped. +/// +/// In many cases, we can directly reuse these results when constructing a line. +/// Only when a line break falls onto a text index that is not safe-to-break per +/// rustybuzz, we have to reshape that portion. +pub struct Preparation<'a> { + /// The paragraph's full text. + pub text: &'a str, + /// Bidirectional text embedding levels for the paragraph. + /// + /// This is `None` if the paragraph is BiDi-uniform (all the base direction). + pub bidi: Option<BidiInfo<'a>>, + /// Text runs, spacing and layouted elements. + pub items: Vec<(Range, Item<'a>)>, + /// Maps from byte indices to item indices. + pub indices: Vec<usize>, + /// The span mapper. + pub spans: SpanMapper, + /// Whether to hyphenate if it's the same for all children. + pub hyphenate: Option<bool>, + /// Costs for various layout decisions. + pub costs: Costs, + /// The dominant direction. + pub dir: Dir, + /// The text language if it's the same for all children. + pub lang: Option<Lang>, + /// The paragraph's resolved horizontal alignment. + pub align: FixedAlignment, + /// Whether to justify the paragraph. + pub justify: bool, + /// The paragraph's hanging indent. + pub hang: Abs, + /// Whether to add spacing between CJK and Latin characters. + pub cjk_latin_spacing: bool, + /// Whether font fallback is enabled for this paragraph. + pub fallback: bool, + /// How to determine line breaks. + pub linebreaks: Smart<Linebreaks>, + /// The text size. + pub size: Abs, +} + +impl<'a> Preparation<'a> { + /// Get the item that contains the given `text_offset`. + pub fn get(&self, offset: usize) -> &(Range, Item<'a>) { + let idx = self.indices.get(offset).copied().unwrap_or(0); + &self.items[idx] + } + + /// Iterate over the items that intersect the given `sliced` range. + pub fn slice(&self, sliced: Range) -> impl Iterator<Item = &(Range, Item<'a>)> { + // Usually, we don't want empty-range items at the start of the line + // (because they will be part of the previous line), but for the first + // line, we need to keep them. + let start = match sliced.start { + 0 => 0, + n => self.indices.get(n).copied().unwrap_or(0), + }; + self.items[start..].iter().take_while(move |(range, _)| { + range.start < sliced.end || range.end <= sliced.end + }) + } +} + +/// Performs BiDi analysis and then prepares paragraph layout by building a +/// representation on which we can do line breaking without layouting each and +/// every line from scratch. +#[typst_macros::time] +pub fn prepare<'a>( + engine: &mut Engine, + children: &'a StyleVec, + text: &'a str, + segments: Vec<Segment<'a>>, + spans: SpanMapper, + styles: StyleChain<'a>, +) -> SourceResult<Preparation<'a>> { + let dir = TextElem::dir_in(styles); + let default_level = match dir { + Dir::RTL => BidiLevel::rtl(), + _ => BidiLevel::ltr(), + }; + + let bidi = BidiInfo::new(text, Some(default_level)); + let is_bidi = bidi + .levels + .iter() + .any(|level| level.is_ltr() != default_level.is_ltr()); + + let mut cursor = 0; + let mut items = Vec::with_capacity(segments.len()); + + // Shape the text to finalize the items. + for segment in segments { + let len = segment.textual_len(); + let end = cursor + len; + let range = cursor..end; + + match segment { + Segment::Text(_, styles) => { + shape_range(&mut items, engine, text, &bidi, range, styles); + } + Segment::Item(item) => items.push((range, item)), + } + + cursor = end; + } + + // Build the mapping from byte to item indices. + let mut indices = Vec::with_capacity(text.len()); + for (i, (range, _)) in items.iter().enumerate() { + indices.extend(range.clone().map(|_| i)); + } + + let cjk_latin_spacing = TextElem::cjk_latin_spacing_in(styles).is_auto(); + if cjk_latin_spacing { + add_cjk_latin_spacing(&mut items); + } + + Ok(Preparation { + text, + bidi: is_bidi.then_some(bidi), + items, + indices, + spans, + hyphenate: children.shared_get(styles, TextElem::hyphenate_in), + costs: TextElem::costs_in(styles), + dir, + lang: children.shared_get(styles, TextElem::lang_in), + align: AlignElem::alignment_in(styles).resolve(styles).x, + justify: ParElem::justify_in(styles), + hang: ParElem::hanging_indent_in(styles), + cjk_latin_spacing, + fallback: TextElem::fallback_in(styles), + linebreaks: ParElem::linebreaks_in(styles), + size: TextElem::size_in(styles), + }) +} + +/// Add some spacing between Han characters and western characters. See +/// Requirements for Chinese Text Layout, Section 3.2.2 Mixed Text Composition +/// in Horizontal Written Mode +fn add_cjk_latin_spacing(items: &mut [(Range, Item)]) { + let mut items = items + .iter_mut() + .filter(|(_, x)| !matches!(x, Item::Tag(_))) + .peekable(); + + let mut prev: Option<&ShapedGlyph> = None; + while let Some((_, item)) = items.next() { + let Some(text) = item.text_mut() else { + prev = None; + continue; + }; + + // Since we only call this function in [`prepare`], we can assume that + // the Cow is owned, and `to_mut` can be called without overhead. + debug_assert!(matches!(text.glyphs, std::borrow::Cow::Owned(_))); + let mut glyphs = text.glyphs.to_mut().iter_mut().peekable(); + + while let Some(glyph) = glyphs.next() { + let next = glyphs.peek().map(|n| n as _).or_else(|| { + items + .peek() + .and_then(|(_, i)| i.text()) + .and_then(|shaped| shaped.glyphs.first()) + }); + + // Case 1: CJ followed by a Latin character + if glyph.is_cj_script() && next.is_some_and(|g| g.is_letter_or_number()) { + // The spacing is default to 1/4 em, and can be shrunk to 1/8 em. + glyph.x_advance += Em::new(0.25); + glyph.adjustability.shrinkability.1 += Em::new(0.125); + text.width += Em::new(0.25).at(text.size); + } + + // Case 2: Latin followed by a CJ character + if glyph.is_cj_script() && prev.is_some_and(|g| g.is_letter_or_number()) { + glyph.x_advance += Em::new(0.25); + glyph.x_offset += Em::new(0.25); + glyph.adjustability.shrinkability.0 += Em::new(0.125); + text.width += Em::new(0.25).at(text.size); + } + + prev = Some(glyph); + } + } +} diff --git a/crates/typst-layout/src/inline/shaping.rs b/crates/typst-layout/src/inline/shaping.rs new file mode 100644 index 00000000..bd803b52 --- /dev/null +++ b/crates/typst-layout/src/inline/shaping.rs @@ -0,0 +1,1175 @@ +use std::borrow::Cow; +use std::fmt::{self, Debug, Formatter}; +use std::str::FromStr; +use std::sync::Arc; + +use az::SaturatingAs; +use ecow::EcoString; +use rustybuzz::{BufferFlags, ShapePlan, UnicodeBuffer}; +use ttf_parser::Tag; +use typst_library::engine::Engine; +use typst_library::foundations::{Smart, StyleChain}; +use typst_library::layout::{Abs, Dir, Em, Frame, FrameItem, Point, Size}; +use typst_library::text::{ + families, features, is_default_ignorable, variant, Font, FontVariant, Glyph, Lang, + Region, TextEdgeBounds, TextElem, TextItem, +}; +use typst_library::World; +use typst_utils::SliceExt; +use unicode_bidi::{BidiInfo, Level as BidiLevel}; +use unicode_script::{Script, UnicodeScript}; + +use super::{decorate, Item, Range, SpanMapper}; + +/// The result of shaping text. +/// +/// This type contains owned or borrowed shaped text runs, which can be +/// measured, used to reshape substrings more quickly and converted into a +/// frame. +#[derive(Clone)] +pub struct ShapedText<'a> { + /// The start of the text in the full paragraph. + pub base: usize, + /// The text that was shaped. + pub text: &'a str, + /// The text direction. + pub dir: Dir, + /// The text language. + pub lang: Lang, + /// The text region. + pub region: Option<Region>, + /// The text's style properties. + pub styles: StyleChain<'a>, + /// The font variant. + pub variant: FontVariant, + /// The font size. + pub size: Abs, + /// The width of the text's bounding box. + pub width: Abs, + /// The shaped glyphs. + pub glyphs: Cow<'a, [ShapedGlyph]>, +} + +/// A single glyph resulting from shaping. +#[derive(Debug, Clone)] +pub struct ShapedGlyph { + /// The font the glyph is contained in. + pub font: Font, + /// The glyph's index in the font. + pub glyph_id: u16, + /// The advance width of the glyph. + pub x_advance: Em, + /// The horizontal offset of the glyph. + pub x_offset: Em, + /// The vertical offset of the glyph. + pub y_offset: Em, + /// The adjustability of the glyph. + pub adjustability: Adjustability, + /// The byte range of this glyph's cluster in the full paragraph. A cluster + /// is a sequence of one or multiple glyphs that cannot be separated and + /// must always be treated as a union. + /// + /// The range values of the glyphs in a [`ShapedText`] should not overlap + /// with each other, and they should be monotonically increasing (for + /// left-to-right or top-to-bottom text) or monotonically decreasing (for + /// right-to-left or bottom-to-top text). + pub range: Range, + /// Whether splitting the shaping result before this glyph would yield the + /// same results as shaping the parts to both sides of `text_index` + /// separately. + pub safe_to_break: bool, + /// The first char in this glyph's cluster. + pub c: char, + /// Whether this glyph is justifiable for CJK scripts. + pub is_justifiable: bool, + /// The script of the glyph. + pub script: Script, +} + +#[derive(Debug, Clone, Default)] +pub struct Adjustability { + /// The left and right stretchability + pub stretchability: (Em, Em), + /// The left and right shrinkability + pub shrinkability: (Em, Em), +} + +impl ShapedGlyph { + /// Whether the glyph is a space. + pub fn is_space(&self) -> bool { + is_space(self.c) + } + + /// Whether the glyph is justifiable. + pub fn is_justifiable(&self) -> bool { + // GB style is not relevant here. + self.is_justifiable + } + + /// Whether the glyph is part of Chinese or Japanese script (i.e. CJ, not CJK). + pub fn is_cj_script(&self) -> bool { + is_cj_script(self.c, self.script) + } + + pub fn is_cjk_punctuation(&self) -> bool { + self.is_cjk_left_aligned_punctuation(CjkPunctStyle::Gb) + || self.is_cjk_right_aligned_punctuation() + || self.is_cjk_center_aligned_punctuation(CjkPunctStyle::Gb) + } + + /// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment> + pub fn is_cjk_left_aligned_punctuation(&self, style: CjkPunctStyle) -> bool { + is_cjk_left_aligned_punctuation( + self.c, + self.x_advance, + self.stretchability(), + style, + ) + } + + /// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment> + pub fn is_cjk_right_aligned_punctuation(&self) -> bool { + is_cjk_right_aligned_punctuation(self.c, self.x_advance, self.stretchability()) + } + + /// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment> + pub fn is_cjk_center_aligned_punctuation(&self, style: CjkPunctStyle) -> bool { + is_cjk_center_aligned_punctuation(self.c, style) + } + + /// Whether the glyph is a western letter or number. + pub fn is_letter_or_number(&self) -> bool { + matches!(self.c.script(), Script::Latin | Script::Greek | Script::Cyrillic) + || matches!(self.c, '#' | '$' | '%' | '&') + || self.c.is_ascii_digit() + } + + pub fn base_adjustability(&self, style: CjkPunctStyle) -> Adjustability { + let width = self.x_advance; + if self.is_space() { + Adjustability { + // The number for spaces is from Knuth-Plass' paper + stretchability: (Em::zero(), width / 2.0), + shrinkability: (Em::zero(), width / 3.0), + } + } else if self.is_cjk_left_aligned_punctuation(style) { + Adjustability { + stretchability: (Em::zero(), Em::zero()), + shrinkability: (Em::zero(), width / 2.0), + } + } else if self.is_cjk_right_aligned_punctuation() { + Adjustability { + stretchability: (Em::zero(), Em::zero()), + shrinkability: (width / 2.0, Em::zero()), + } + } else if self.is_cjk_center_aligned_punctuation(style) { + Adjustability { + stretchability: (Em::zero(), Em::zero()), + shrinkability: (width / 4.0, width / 4.0), + } + } else { + Adjustability::default() + } + } + + /// The stretchability of the character. + pub fn stretchability(&self) -> (Em, Em) { + self.adjustability.stretchability + } + + /// The shrinkability of the character. + pub fn shrinkability(&self) -> (Em, Em) { + self.adjustability.shrinkability + } + + /// Shrink the width of glyph on the left side. + pub fn shrink_left(&mut self, amount: Em) { + self.x_offset -= amount; + self.x_advance -= amount; + self.adjustability.shrinkability.0 -= amount; + } + + /// Shrink the width of glyph on the right side. + pub fn shrink_right(&mut self, amount: Em) { + self.x_advance -= amount; + self.adjustability.shrinkability.1 -= amount; + } +} + +/// A side you can go toward. +enum Side { + /// To the left-hand side. + Left, + /// To the right-hand side. + Right, +} + +impl<'a> ShapedText<'a> { + /// Build the shaped text's frame. + /// + /// The `justification` defines how much extra advance width each + /// [justifiable glyph](ShapedGlyph::is_justifiable) will get. + pub fn build( + &self, + engine: &Engine, + spans: &SpanMapper, + justification_ratio: f64, + extra_justification: Abs, + ) -> Frame { + let (top, bottom) = self.measure(engine); + let size = Size::new(self.width, top + bottom); + + let mut offset = Abs::zero(); + let mut frame = Frame::soft(size); + frame.set_baseline(top); + + let shift = TextElem::baseline_in(self.styles); + let decos = TextElem::deco_in(self.styles); + let fill = TextElem::fill_in(self.styles); + let stroke = TextElem::stroke_in(self.styles); + let span_offset = TextElem::span_offset_in(self.styles); + + for ((font, y_offset), group) in + self.glyphs.as_ref().group_by_key(|g| (g.font.clone(), g.y_offset)) + { + let mut range = group[0].range.clone(); + for glyph in group { + range.start = range.start.min(glyph.range.start); + range.end = range.end.max(glyph.range.end); + } + + let pos = Point::new(offset, top + shift - y_offset.at(self.size)); + let glyphs: Vec<Glyph> = group + .iter() + .map(|shaped: &ShapedGlyph| { + let adjustability_left = if justification_ratio < 0.0 { + shaped.shrinkability().0 + } else { + shaped.stretchability().0 + }; + let adjustability_right = if justification_ratio < 0.0 { + shaped.shrinkability().1 + } else { + shaped.stretchability().1 + }; + + let justification_left = adjustability_left * justification_ratio; + let mut justification_right = + adjustability_right * justification_ratio; + if shaped.is_justifiable() { + justification_right += + Em::from_length(extra_justification, self.size) + } + + frame.size_mut().x += justification_left.at(self.size) + + justification_right.at(self.size); + + // We may not be able to reach the offset completely if + // it exceeds u16, but better to have a roughly correct + // span offset than nothing. + let mut span = spans.span_at(shaped.range.start); + span.1 = span.1.saturating_add(span_offset.saturating_as()); + + // |<---- a Glyph ---->| + // -->|ShapedGlyph|<-- + // +---+-----------+---+ + // | | *********| | + // | | * | | + // | | * ****| | + // | | * *| | + // | | *********| | + // +---+--+--------+---+ + // A B C D + // Note A, B, D could be positive, zero, or negative. + // A: justification_left + // B: ShapedGlyph's x_offset + // (though a small part of the glyph may go inside B) + // B+C: ShapedGlyph's x_advance + // D: justification_right + // A+B: Glyph's x_offset + // A+B+C+D: Glyph's x_advance + Glyph { + id: shaped.glyph_id, + x_advance: shaped.x_advance + + justification_left + + justification_right, + x_offset: shaped.x_offset + justification_left, + range: (shaped.range.start - range.start).saturating_as() + ..(shaped.range.end - range.start).saturating_as(), + span, + } + }) + .collect(); + + let item = TextItem { + font, + size: self.size, + lang: self.lang, + region: self.region, + fill: fill.clone(), + stroke: stroke.clone().map(|s| s.unwrap_or_default()), + text: self.text[range.start - self.base..range.end - self.base].into(), + glyphs, + }; + + let width = item.width(); + if decos.is_empty() { + frame.push(pos, FrameItem::Text(item)); + } else { + // Apply line decorations. + frame.push(pos, FrameItem::Text(item.clone())); + for deco in &decos { + decorate(&mut frame, deco, &item, width, shift, pos); + } + } + + offset += width; + } + + frame + } + + /// Measure the top and bottom extent of this text. + pub fn measure(&self, engine: &Engine) -> (Abs, Abs) { + let mut top = Abs::zero(); + let mut bottom = Abs::zero(); + + let top_edge = TextElem::top_edge_in(self.styles); + let bottom_edge = TextElem::bottom_edge_in(self.styles); + + // Expand top and bottom by reading the font's vertical metrics. + let mut expand = |font: &Font, bounds: TextEdgeBounds| { + let (t, b) = font.edges(top_edge, bottom_edge, self.size, bounds); + top.set_max(t); + bottom.set_max(b); + }; + + if self.glyphs.is_empty() { + // When there are no glyphs, we just use the vertical metrics of the + // first available font. + let world = engine.world; + for family in families(self.styles) { + if let Some(font) = world + .book() + .select(family, self.variant) + .and_then(|id| world.font(id)) + { + expand(&font, TextEdgeBounds::Zero); + break; + } + } + } else { + for g in self.glyphs.iter() { + expand(&g.font, TextEdgeBounds::Glyph(g.glyph_id)); + } + } + + (top, bottom) + } + + /// How many glyphs are in the text where we can insert additional + /// space when encountering underfull lines. + pub fn justifiables(&self) -> usize { + self.glyphs.iter().filter(|g| g.is_justifiable()).count() + } + + /// Whether the last glyph is a CJK character which should not be justified + /// on line end. + pub fn cjk_justifiable_at_last(&self) -> bool { + self.glyphs + .last() + .map(|g| g.is_cj_script() || g.is_cjk_punctuation()) + .unwrap_or(false) + } + + /// The stretchability of the text. + pub fn stretchability(&self) -> Abs { + self.glyphs + .iter() + .map(|g| g.stretchability().0 + g.stretchability().1) + .sum::<Em>() + .at(self.size) + } + + /// The shrinkability of the text + pub fn shrinkability(&self) -> Abs { + self.glyphs + .iter() + .map(|g| g.shrinkability().0 + g.shrinkability().1) + .sum::<Em>() + .at(self.size) + } + + /// Reshape a range of the shaped text, reusing information from this + /// shaping process if possible. + /// + /// The text `range` is relative to the whole paragraph. + pub fn reshape(&'a self, engine: &Engine, text_range: Range) -> ShapedText<'a> { + let text = &self.text[text_range.start - self.base..text_range.end - self.base]; + if let Some(glyphs) = self.slice_safe_to_break(text_range.clone()) { + #[cfg(debug_assertions)] + assert_all_glyphs_in_range(glyphs, text, text_range.clone()); + Self { + base: text_range.start, + text, + dir: self.dir, + lang: self.lang, + region: self.region, + styles: self.styles, + size: self.size, + variant: self.variant, + width: glyphs.iter().map(|g| g.x_advance).sum::<Em>().at(self.size), + glyphs: Cow::Borrowed(glyphs), + } + } else { + shape( + engine, + text_range.start, + text, + self.styles, + self.dir, + self.lang, + self.region, + ) + } + } + + /// Derive an empty text run with the same properties as this one. + pub fn empty(&self) -> Self { + Self { + text: "", + width: Abs::zero(), + glyphs: Cow::Borrowed(&[]), + ..*self + } + } + + /// Push a hyphen to end of the text. + pub fn push_hyphen(&mut self, engine: &Engine, fallback: bool) { + self.insert_hyphen(engine, fallback, Side::Right) + } + + /// Prepend a hyphen to start of the text. + pub fn prepend_hyphen(&mut self, engine: &Engine, fallback: bool) { + self.insert_hyphen(engine, fallback, Side::Left) + } + + fn insert_hyphen(&mut self, engine: &Engine, fallback: bool, side: Side) { + let world = engine.world; + let book = world.book(); + let fallback_func = if fallback { + Some(|| book.select_fallback(None, self.variant, "-")) + } else { + None + }; + let mut chain = families(self.styles) + .map(|family| book.select(family, self.variant)) + .chain(fallback_func.iter().map(|f| f())) + .flatten(); + + chain.find_map(|id| { + let font = world.font(id)?; + let ttf = font.ttf(); + let glyph_id = ttf.glyph_index('-')?; + let x_advance = font.to_em(ttf.glyph_hor_advance(glyph_id)?); + let range = match side { + Side::Left => self.glyphs.first().map(|g| g.range.start..g.range.start), + Side::Right => self.glyphs.last().map(|g| g.range.end..g.range.end), + } + // In the unlikely chance that we hyphenate after an empty line, + // ensure that the glyph range still falls after self.base so + // that subtracting either of the endpoints by self.base doesn't + // underflow. See <https://github.com/typst/typst/issues/2283>. + .unwrap_or_else(|| self.base..self.base); + self.width += x_advance.at(self.size); + let glyph = ShapedGlyph { + font, + glyph_id: glyph_id.0, + x_advance, + x_offset: Em::zero(), + y_offset: Em::zero(), + adjustability: Adjustability::default(), + range, + safe_to_break: true, + c: '-', + is_justifiable: false, + script: Script::Common, + }; + match side { + Side::Left => self.glyphs.to_mut().insert(0, glyph), + Side::Right => self.glyphs.to_mut().push(glyph), + } + Some(()) + }); + } + + /// Find the subslice of glyphs that represent the given text range if both + /// sides are safe to break. + fn slice_safe_to_break(&self, text_range: Range) -> Option<&[ShapedGlyph]> { + let Range { mut start, mut end } = text_range; + if !self.dir.is_positive() { + std::mem::swap(&mut start, &mut end); + } + + let left = self.find_safe_to_break(start)?; + let right = self.find_safe_to_break(end)?; + Some(&self.glyphs[left..right]) + } + + /// Find the glyph offset matching the text index that is most towards the + /// start of the text and safe-to-break. + fn find_safe_to_break(&self, text_index: usize) -> Option<usize> { + let ltr = self.dir.is_positive(); + + // Handle edge cases. + let len = self.glyphs.len(); + if text_index == self.base { + return Some(if ltr { 0 } else { len }); + } else if text_index == self.base + self.text.len() { + return Some(if ltr { len } else { 0 }); + } + + // Find any glyph with the text index. + let found = self.glyphs.binary_search_by(|g: &ShapedGlyph| { + let ordering = g.range.start.cmp(&text_index); + if ltr { + ordering + } else { + ordering.reverse() + } + }); + + let mut idx = match found { + Ok(idx) => idx, + Err(idx) => { + // Handle the special case where we break before a '\n' + // + // For example: (assume `a` is a CJK character with three bytes) + // text: " a \n b " + // index: 0 1 2 3 4 5 + // text_index: ^ + // glyphs: 0 . 1 + // + // We will get found = Err(1), because '\n' does not have a + // glyph. But it's safe to break here. Thus the following + // condition: + // - glyphs[0].end == text_index == 3 + // - text[3] == '\n' + return (idx > 0 + && self.glyphs[idx - 1].range.end == text_index + && self.text[text_index - self.base..].starts_with('\n')) + .then_some(idx); + } + }; + + // Search for the start-most glyph with the text index. This means + // we take empty range glyphs at the start and leave those at the end + // for the next line. + let dec = if ltr { usize::checked_sub } else { usize::checked_add }; + while let Some(next) = dec(idx, 1) { + if self.glyphs.get(next).map_or(true, |g| g.range.start != text_index) { + break; + } + idx = next; + } + + // RTL needs offset one because the left side of the range should be + // exclusive and the right side inclusive, contrary to the normal + // behaviour of ranges. + self.glyphs[idx].safe_to_break.then_some(idx + usize::from(!ltr)) + } +} + +impl Debug for ShapedText<'_> { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + self.text.fmt(f) + } +} + +/// Group a range of text by BiDi level and script, shape the runs and generate +/// items for them. +pub fn shape_range<'a>( + items: &mut Vec<(Range, Item<'a>)>, + engine: &Engine, + text: &'a str, + bidi: &BidiInfo<'a>, + range: Range, + styles: StyleChain<'a>, +) { + let script = TextElem::script_in(styles); + let lang = TextElem::lang_in(styles); + let region = TextElem::region_in(styles); + let mut process = |range: Range, level: BidiLevel| { + let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL }; + let shaped = + shape(engine, range.start, &text[range.clone()], styles, dir, lang, region); + items.push((range, Item::Text(shaped))); + }; + + let mut prev_level = BidiLevel::ltr(); + let mut prev_script = Script::Unknown; + let mut cursor = range.start; + + // Group by embedding level and script. If the text's script is explicitly + // set (rather than inferred from the glyphs), we keep the script at an + // unchanging `Script::Unknown` so that only level changes cause breaks. + for i in range.clone() { + if !text.is_char_boundary(i) { + continue; + } + + let level = bidi.levels[i]; + let curr_script = match script { + Smart::Auto => { + text[i..].chars().next().map_or(Script::Unknown, |c| c.script()) + } + Smart::Custom(_) => Script::Unknown, + }; + + if level != prev_level || !is_compatible(curr_script, prev_script) { + if cursor < i { + process(cursor..i, prev_level); + } + cursor = i; + prev_level = level; + prev_script = curr_script; + } else if is_generic_script(prev_script) { + prev_script = curr_script; + } + } + + process(cursor..range.end, prev_level); +} + +/// Whether this is not a specific script. +fn is_generic_script(script: Script) -> bool { + matches!(script, Script::Unknown | Script::Common | Script::Inherited) +} + +/// Whether these script can be part of the same shape run. +fn is_compatible(a: Script, b: Script) -> bool { + is_generic_script(a) || is_generic_script(b) || a == b +} + +/// Shape text into [`ShapedText`]. +#[allow(clippy::too_many_arguments)] +fn shape<'a>( + engine: &Engine, + base: usize, + text: &'a str, + styles: StyleChain<'a>, + dir: Dir, + lang: Lang, + region: Option<Region>, +) -> ShapedText<'a> { + let size = TextElem::size_in(styles); + let mut ctx = ShapingContext { + engine, + size, + glyphs: vec![], + used: vec![], + styles, + variant: variant(styles), + features: features(styles), + fallback: TextElem::fallback_in(styles), + dir, + }; + + if !text.is_empty() { + shape_segment(&mut ctx, base, text, families(styles)); + } + + track_and_space(&mut ctx); + calculate_adjustability(&mut ctx, lang, region); + + #[cfg(debug_assertions)] + assert_all_glyphs_in_range(&ctx.glyphs, text, base..(base + text.len())); + #[cfg(debug_assertions)] + assert_glyph_ranges_in_order(&ctx.glyphs, dir); + + ShapedText { + base, + text, + dir, + lang, + region, + styles, + variant: ctx.variant, + size, + width: ctx.glyphs.iter().map(|g| g.x_advance).sum::<Em>().at(size), + glyphs: Cow::Owned(ctx.glyphs), + } +} + +/// Holds shaping results and metadata common to all shaped segments. +struct ShapingContext<'a, 'v> { + engine: &'a Engine<'v>, + glyphs: Vec<ShapedGlyph>, + used: Vec<Font>, + styles: StyleChain<'a>, + size: Abs, + variant: FontVariant, + features: Vec<rustybuzz::Feature>, + fallback: bool, + dir: Dir, +} + +/// Shape text with font fallback using the `families` iterator. +fn shape_segment<'a>( + ctx: &mut ShapingContext, + base: usize, + text: &str, + mut families: impl Iterator<Item = &'a str> + Clone, +) { + // Don't try shaping newlines, tabs, or default ignorables. + if text + .chars() + .all(|c| c == '\n' || c == '\t' || is_default_ignorable(c)) + { + return; + } + + // Find the next available family. + let world = ctx.engine.world; + let book = world.book(); + let mut selection = families.find_map(|family| { + book.select(family, ctx.variant) + .and_then(|id| world.font(id)) + .filter(|font| !ctx.used.contains(font)) + }); + + // Do font fallback if the families are exhausted and fallback is enabled. + if selection.is_none() && ctx.fallback { + let first = ctx.used.first().map(Font::info); + selection = book + .select_fallback(first, ctx.variant, text) + .and_then(|id| world.font(id)) + .filter(|font| !ctx.used.contains(font)); + } + + // Extract the font id or shape notdef glyphs if we couldn't find any font. + let Some(font) = selection else { + if let Some(font) = ctx.used.first().cloned() { + shape_tofus(ctx, base, text, font); + } + return; + }; + + ctx.used.push(font.clone()); + + // Fill the buffer with our text. + let mut buffer = UnicodeBuffer::new(); + buffer.push_str(text); + buffer.set_language(language(ctx.styles)); + if let Some(script) = TextElem::script_in(ctx.styles).custom().and_then(|script| { + rustybuzz::Script::from_iso15924_tag(Tag::from_bytes(script.as_bytes())) + }) { + buffer.set_script(script) + } + buffer.set_direction(match ctx.dir { + Dir::LTR => rustybuzz::Direction::LeftToRight, + Dir::RTL => rustybuzz::Direction::RightToLeft, + _ => unimplemented!("vertical text layout"), + }); + buffer.guess_segment_properties(); + + // By default, Harfbuzz will create zero-width space glyphs for default + // ignorables. This is probably useful for GUI apps that want noticable + // effects on the cursor for those, but for us it's not useful and hurts + // text extraction. + buffer.set_flags(BufferFlags::REMOVE_DEFAULT_IGNORABLES); + + // Prepare the shape plan. This plan depends on direction, script, language, + // and features, but is independent from the text and can thus be memoized. + let plan = create_shape_plan( + &font, + buffer.direction(), + buffer.script(), + buffer.language().as_ref(), + &ctx.features, + ); + + // Shape! + let buffer = rustybuzz::shape_with_plan(font.rusty(), &plan, buffer); + let infos = buffer.glyph_infos(); + let pos = buffer.glyph_positions(); + let ltr = ctx.dir.is_positive(); + + // Collect the shaped glyphs, doing fallback and shaping parts again with + // the next font if necessary. + let mut i = 0; + while i < infos.len() { + let info = &infos[i]; + let cluster = info.cluster as usize; + + // Add the glyph to the shaped output. + if info.glyph_id != 0 { + // Determine the text range of the glyph. + let start = base + cluster; + let end = base + + if ltr { i.checked_add(1) } else { i.checked_sub(1) } + .and_then(|last| infos.get(last)) + .map_or(text.len(), |info| info.cluster as usize); + + let c = text[cluster..].chars().next().unwrap(); + let script = c.script(); + let x_advance = font.to_em(pos[i].x_advance); + ctx.glyphs.push(ShapedGlyph { + font: font.clone(), + glyph_id: info.glyph_id as u16, + // TODO: Don't ignore y_advance. + x_advance, + x_offset: font.to_em(pos[i].x_offset), + y_offset: font.to_em(pos[i].y_offset), + adjustability: Adjustability::default(), + range: start..end, + safe_to_break: !info.unsafe_to_break(), + c, + is_justifiable: is_justifiable( + c, + script, + x_advance, + Adjustability::default().stretchability, + ), + script, + }); + } else { + // First, search for the end of the tofu sequence. + let k = i; + while infos.get(i + 1).is_some_and(|info| info.glyph_id == 0) { + i += 1; + } + + // Then, determine the start and end text index for the tofu + // sequence. + // + // Examples: + // Everything is shown in visual order. Tofus are written as "_". + // We want to find out that the tofus span the text `2..6`. + // Note that the clusters are longer than 1 char. + // + // Left-to-right: + // Text: h a l i h a l l o + // Glyphs: A _ _ C E + // Clusters: 0 2 4 6 8 + // k=1 i=2 + // + // Right-to-left: + // Text: O L L A H I L A H + // Glyphs: E C _ _ A + // Clusters: 8 6 4 2 0 + // k=2 i=3 + let start = infos[if ltr { k } else { i }].cluster as usize; + let end = if ltr { i.checked_add(1) } else { k.checked_sub(1) } + .and_then(|last| infos.get(last)) + .map_or(text.len(), |info| info.cluster as usize); + + // Trim half-baked cluster. + let remove = base + start..base + end; + while ctx.glyphs.last().is_some_and(|g| remove.contains(&g.range.start)) { + ctx.glyphs.pop(); + } + + // Recursively shape the tofu sequence with the next family. + shape_segment(ctx, base + start, &text[start..end], families.clone()); + } + + i += 1; + } + + ctx.used.pop(); +} + +/// Create a shape plan. +#[comemo::memoize] +fn create_shape_plan( + font: &Font, + direction: rustybuzz::Direction, + script: rustybuzz::Script, + language: Option<&rustybuzz::Language>, + features: &[rustybuzz::Feature], +) -> Arc<ShapePlan> { + Arc::new(rustybuzz::ShapePlan::new( + font.rusty(), + direction, + Some(script), + language, + features, + )) +} + +/// Shape the text with tofus from the given font. +fn shape_tofus(ctx: &mut ShapingContext, base: usize, text: &str, font: Font) { + let x_advance = font.advance(0).unwrap_or_default(); + let add_glyph = |(cluster, c): (usize, char)| { + let start = base + cluster; + let end = start + c.len_utf8(); + let script = c.script(); + ctx.glyphs.push(ShapedGlyph { + font: font.clone(), + glyph_id: 0, + x_advance, + x_offset: Em::zero(), + y_offset: Em::zero(), + adjustability: Adjustability::default(), + range: start..end, + safe_to_break: true, + c, + is_justifiable: is_justifiable( + c, + script, + x_advance, + Adjustability::default().stretchability, + ), + script, + }); + }; + if ctx.dir.is_positive() { + text.char_indices().for_each(add_glyph); + } else { + text.char_indices().rev().for_each(add_glyph); + } +} + +/// Apply tracking and spacing to the shaped glyphs. +fn track_and_space(ctx: &mut ShapingContext) { + let tracking = Em::from_length(TextElem::tracking_in(ctx.styles), ctx.size); + let spacing = + TextElem::spacing_in(ctx.styles).map(|abs| Em::from_length(abs, ctx.size)); + + let mut glyphs = ctx.glyphs.iter_mut().peekable(); + while let Some(glyph) = glyphs.next() { + // Make non-breaking space same width as normal space. + if glyph.c == '\u{00A0}' { + glyph.x_advance -= nbsp_delta(&glyph.font).unwrap_or_default(); + } + + if glyph.is_space() { + glyph.x_advance = spacing.relative_to(glyph.x_advance); + } + + if glyphs + .peek() + .is_some_and(|next| glyph.range.start != next.range.start) + { + glyph.x_advance += tracking; + } + } +} + +/// Calculate stretchability and shrinkability of each glyph, +/// and CJK punctuation adjustments according to Chinese Layout Requirements. +fn calculate_adjustability(ctx: &mut ShapingContext, lang: Lang, region: Option<Region>) { + let style = cjk_punct_style(lang, region); + + for glyph in &mut ctx.glyphs { + glyph.adjustability = glyph.base_adjustability(style); + } + + let mut glyphs = ctx.glyphs.iter_mut().peekable(); + while let Some(glyph) = glyphs.next() { + // CNS style needs not further adjustment. + if glyph.is_cjk_punctuation() && matches!(style, CjkPunctStyle::Cns) { + continue; + } + + // Now we apply consecutive punctuation adjustment, specified in Chinese Layout. + // Requirements, section 3.1.6.1 Punctuation Adjustment Space, and Japanese Layout + // Requirements, section 3.1 Line Composition Rules for Punctuation Marks + let Some(next) = glyphs.peek_mut() else { continue }; + let width = glyph.x_advance; + let delta = width / 2.0; + if glyph.is_cjk_punctuation() + && next.is_cjk_punctuation() + && (glyph.shrinkability().1 + next.shrinkability().0) >= delta + { + let left_delta = glyph.shrinkability().1.min(delta); + glyph.shrink_right(left_delta); + next.shrink_left(delta - left_delta); + } + } +} + +/// Difference between non-breaking and normal space. +fn nbsp_delta(font: &Font) -> Option<Em> { + let space = font.ttf().glyph_index(' ')?.0; + let nbsp = font.ttf().glyph_index('\u{00A0}')?.0; + Some(font.advance(nbsp)? - font.advance(space)?) +} + +/// Process the language and region of a style chain into a +/// rustybuzz-compatible BCP 47 language. +fn language(styles: StyleChain) -> rustybuzz::Language { + let mut bcp: EcoString = TextElem::lang_in(styles).as_str().into(); + if let Some(region) = TextElem::region_in(styles) { + bcp.push('-'); + bcp.push_str(region.as_str()); + } + rustybuzz::Language::from_str(&bcp).unwrap() +} + +/// Returns true if all glyphs in `glyphs` have ranges within the range `range`. +#[cfg(debug_assertions)] +fn assert_all_glyphs_in_range(glyphs: &[ShapedGlyph], text: &str, range: Range) { + if glyphs + .iter() + .any(|g| g.range.start < range.start || g.range.end > range.end) + { + panic!("one or more glyphs in {text:?} fell out of range"); + } +} + +/// Asserts that the ranges of `glyphs` is in the proper order according to +/// `dir`. +/// +/// This asserts instead of returning a bool in order to provide a more +/// informative message when the invariant is violated. +#[cfg(debug_assertions)] +fn assert_glyph_ranges_in_order(glyphs: &[ShapedGlyph], dir: Dir) { + if glyphs.is_empty() { + return; + } + + // Iterator::is_sorted and friends are unstable as of Rust 1.70.0 + for i in 0..(glyphs.len() - 1) { + let a = &glyphs[i]; + let b = &glyphs[i + 1]; + let ord = a.range.start.cmp(&b.range.start); + let ord = if dir.is_positive() { ord } else { ord.reverse() }; + if ord == std::cmp::Ordering::Greater { + panic!( + "glyph ranges should be monotonically {}, \ + but found glyphs out of order:\n\n\ + first: {a:#?}\nsecond: {b:#?}", + if dir.is_positive() { "increasing" } else { "decreasing" }, + ); + } + } +} + +// The CJK punctuation that can appear at the beginning or end of a line. +pub const BEGIN_PUNCT_PAT: &[char] = + &['“', '‘', '《', '〈', '(', '『', '「', '【', '〖', '〔', '[', '{']; +pub const END_PUNCT_PAT: &[char] = &[ + '”', '’', ',', '.', '。', '、', ':', ';', '》', '〉', ')', '』', '」', '】', + '〗', '〕', ']', '}', '?', '!', +]; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CjkPunctStyle { + /// Standard GB/T 15834-2011, used mostly in mainland China. + Gb, + /// Standard by Taiwan Ministry of Education, used in Taiwan and Hong Kong. + Cns, + /// Standard JIS X 4051, used in Japan. + Jis, +} + +pub fn cjk_punct_style(lang: Lang, region: Option<Region>) -> CjkPunctStyle { + match (lang, region.as_ref().map(Region::as_str)) { + (Lang::CHINESE, Some("TW" | "HK")) => CjkPunctStyle::Cns, + (Lang::JAPANESE, _) => CjkPunctStyle::Jis, + // zh-CN, zh-SG, zh-MY use GB-style punctuation, + _ => CjkPunctStyle::Gb, + } +} + +/// Whether the glyph is a space. +fn is_space(c: char) -> bool { + matches!(c, ' ' | '\u{00A0}' | ' ') +} + +/// Whether the glyph is part of Chinese or Japanese script (i.e. CJ, not CJK). +pub fn is_of_cj_script(c: char) -> bool { + is_cj_script(c, c.script()) +} + +/// Whether the glyph is part of Chinese or Japanese script (i.e. CJ, not CJK). +/// The function is dedicated to typesetting Chinese or Japanese, which do not +/// have spaces between words, so K is not checked here. +fn is_cj_script(c: char, script: Script) -> bool { + use Script::*; + // U+30FC: Katakana-Hiragana Prolonged Sound Mark + matches!(script, Hiragana | Katakana | Han) || c == '\u{30FC}' +} + +/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment> +fn is_cjk_left_aligned_punctuation( + c: char, + x_advance: Em, + stretchability: (Em, Em), + style: CjkPunctStyle, +) -> bool { + use CjkPunctStyle::*; + + // CJK quotation marks shares codepoints with latin quotation marks. + // But only the CJK ones have full width. + if matches!(c, '”' | '’') && x_advance + stretchability.1 == Em::one() { + return true; + } + + if matches!(style, Gb | Jis) && matches!(c, ',' | '。' | '.' | '、' | ':' | ';') + { + return true; + } + + if matches!(style, Gb) && matches!(c, '?' | '!') { + // In GB style, exclamations and question marks are also left aligned + // and can be adjusted. Note that they are not adjustable in other + // styles. + return true; + } + + // See appendix A.3 https://www.w3.org/TR/clreq/#tables_of_chinese_punctuation_marks + matches!(c, '》' | ')' | '』' | '」' | '】' | '〗' | '〕' | '〉' | ']' | '}') +} + +/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment> +fn is_cjk_right_aligned_punctuation( + c: char, + x_advance: Em, + stretchability: (Em, Em), +) -> bool { + // CJK quotation marks shares codepoints with latin quotation marks. + // But only the CJK ones have full width. + if matches!(c, '“' | '‘') && x_advance + stretchability.0 == Em::one() { + return true; + } + // See appendix A.3 https://www.w3.org/TR/clreq/#tables_of_chinese_punctuation_marks + matches!(c, '《' | '(' | '『' | '「' | '【' | '〖' | '〔' | '〈' | '[' | '{') +} + +/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment> +fn is_cjk_center_aligned_punctuation(c: char, style: CjkPunctStyle) -> bool { + if matches!(style, CjkPunctStyle::Cns) + && matches!(c, ',' | '。' | '.' | '、' | ':' | ';') + { + return true; + } + + // U+30FB: Katakana Middle Dot + // U+00B7: Middle Dot + matches!(c, '\u{30FB}' | '\u{00B7}') +} + +/// Whether the glyph is justifiable. +/// +/// Quotations in latin script and CJK are unfortunately the same codepoint +/// (U+2018, U+2019, U+201C, U+201D), but quotations in Chinese must be +/// fullwidth. This heuristics can therefore fail for monospace latin fonts. +/// However, since monospace fonts are usually not justified this edge case +/// should be rare enough. +fn is_justifiable( + c: char, + script: Script, + x_advance: Em, + stretchability: (Em, Em), +) -> bool { + // punctuation style is not relevant here. + let style = CjkPunctStyle::Gb; + is_space(c) + || is_cj_script(c, script) + || is_cjk_left_aligned_punctuation(c, x_advance, stretchability, style) + || is_cjk_right_aligned_punctuation(c, x_advance, stretchability) + || is_cjk_center_aligned_punctuation(c, style) +} |
