summaryrefslogtreecommitdiff
path: root/crates
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2024-06-24 20:33:41 +0200
committerGitHub <noreply@github.com>2024-06-24 18:33:41 +0000
commite6b531487042aab25fa72c886decf526672a4631 (patch)
treeaac9cab95b2f879706c91195ed5439171f6ed1f0 /crates
parente90c30903d2c93a0d899ebd9326c5cd4ab11bf41 (diff)
Split up paragraph layout into more files (#4443)
Diffstat (limited to 'crates')
-rw-r--r--crates/typst/src/layout/inline/collect.rs350
-rw-r--r--crates/typst/src/layout/inline/finalize.rs63
-rw-r--r--crates/typst/src/layout/inline/line.rs550
-rw-r--r--crates/typst/src/layout/inline/linebreak.rs299
-rw-r--r--crates/typst/src/layout/inline/mod.rs1521
-rw-r--r--crates/typst/src/layout/inline/prepare.rs194
-rw-r--r--crates/typst/src/layout/inline/shaping.rs157
7 files changed, 1588 insertions, 1546 deletions
diff --git a/crates/typst/src/layout/inline/collect.rs b/crates/typst/src/layout/inline/collect.rs
new file mode 100644
index 00000000..79d0d59f
--- /dev/null
+++ b/crates/typst/src/layout/inline/collect.rs
@@ -0,0 +1,350 @@
+use super::*;
+use crate::diag::bail;
+use crate::foundations::{Packed, Resolve};
+use crate::introspection::{Tag, TagElem};
+use crate::layout::{
+ Abs, AlignElem, BoxElem, Dir, Fr, Frame, HElem, InlineElem, InlineItem, Sizing,
+ Spacing,
+};
+use crate::syntax::Span;
+use crate::text::{
+ LinebreakElem, SmartQuoteElem, SmartQuoter, SmartQuotes, SpaceElem, TextElem,
+};
+use crate::utils::Numeric;
+
+// The characters by which spacing, inline content and pins are replaced in the
+// paragraph's full text.
+const SPACING_REPLACE: &str = " "; // Space
+const OBJ_REPLACE: &str = "\u{FFFC}"; // Object Replacement Character
+const SPACING_REPLACE_CHAR: char = ' ';
+const OBJ_REPLACE_CHAR: char = '\u{FFFC}';
+
+// Unicode BiDi control characters.
+const LTR_EMBEDDING: &str = "\u{202A}";
+const RTL_EMBEDDING: &str = "\u{202B}";
+const POP_EMBEDDING: &str = "\u{202C}";
+const LTR_ISOLATE: &str = "\u{2066}";
+const POP_ISOLATE: &str = "\u{2069}";
+
+/// A prepared item in a paragraph layout.
+#[derive(Debug)]
+pub enum Item<'a> {
+ /// A shaped text run with consistent style and direction.
+ Text(ShapedText<'a>),
+ /// Absolute spacing between other items, and whether it is weak.
+ Absolute(Abs, bool),
+ /// Fractional spacing between other items.
+ Fractional(Fr, Option<(&'a Packed<BoxElem>, Locator<'a>, StyleChain<'a>)>),
+ /// Layouted inline-level content.
+ Frame(Frame, StyleChain<'a>),
+ /// A tag.
+ Tag(&'a Tag),
+ /// An item that is invisible and needs to be skipped, e.g. a Unicode
+ /// isolate.
+ Skip(&'static str),
+}
+
+impl<'a> Item<'a> {
+ /// If this a text item, return it.
+ pub fn text(&self) -> Option<&ShapedText<'a>> {
+ match self {
+ Self::Text(shaped) => Some(shaped),
+ _ => None,
+ }
+ }
+
+ /// If this a text item, return it mutably.
+ pub fn text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
+ match self {
+ Self::Text(shaped) => Some(shaped),
+ _ => None,
+ }
+ }
+
+ /// Return the textual representation of this item: Either just itself (for
+ /// a text item) or a replacement string (for any other item).
+ pub fn textual(&self) -> &str {
+ match self {
+ Self::Text(shaped) => shaped.text,
+ Self::Absolute(_, _) | Self::Fractional(_, _) => SPACING_REPLACE,
+ Self::Frame(_, _) => OBJ_REPLACE,
+ Self::Tag(_) => "",
+ Self::Skip(s) => s,
+ }
+ }
+
+ /// The text length of the item.
+ pub fn textual_len(&self) -> usize {
+ self.textual().len()
+ }
+
+ /// The natural layouted width of the item.
+ pub fn width(&self) -> Abs {
+ match self {
+ Self::Text(shaped) => shaped.width,
+ Self::Absolute(v, _) => *v,
+ Self::Frame(frame, _) => frame.width(),
+ Self::Fractional(_, _) | Self::Tag(_) => Abs::zero(),
+ Self::Skip(_) => Abs::zero(),
+ }
+ }
+}
+
+/// An item or not-yet shaped text. We can't shape text until we have collected
+/// all items because only then we can compute BiDi, and we need to split shape
+/// runs at level boundaries.
+#[derive(Debug)]
+pub enum Segment<'a> {
+ /// One or multiple collapsed text children. Stores how long the segment is
+ /// (in bytes of the full text string).
+ Text(usize, StyleChain<'a>),
+ /// An already prepared item.
+ Item(Item<'a>),
+}
+
+impl Segment<'_> {
+ /// The text length of the item.
+ pub fn textual_len(&self) -> usize {
+ match self {
+ Self::Text(len, _) => *len,
+ Self::Item(item) => item.textual_len(),
+ }
+ }
+}
+
+/// Collects all text of the paragraph into one string and a collection of
+/// segments that correspond to pieces of that string. This also performs
+/// string-level preprocessing like case transformations.
+#[typst_macros::time]
+pub fn collect<'a>(
+ children: &'a StyleVec,
+ engine: &mut Engine<'_>,
+ locator: Locator<'a>,
+ styles: &'a StyleChain<'a>,
+ region: Size,
+ consecutive: bool,
+) -> SourceResult<(String, Vec<Segment<'a>>, SpanMapper)> {
+ let mut collector = Collector::new(2 + children.len());
+ let mut iter = children.chain(styles).peekable();
+ let mut locator = locator.split();
+
+ let first_line_indent = ParElem::first_line_indent_in(*styles);
+ if !first_line_indent.is_zero()
+ && consecutive
+ && AlignElem::alignment_in(*styles).resolve(*styles).x
+ == TextElem::dir_in(*styles).start().into()
+ {
+ collector.push_item(Item::Absolute(first_line_indent.resolve(*styles), false));
+ collector.spans.push(1, Span::detached());
+ }
+
+ let hang = ParElem::hanging_indent_in(*styles);
+ if !hang.is_zero() {
+ collector.push_item(Item::Absolute(-hang, false));
+ collector.spans.push(1, Span::detached());
+ }
+
+ let outer_dir = TextElem::dir_in(*styles);
+
+ while let Some((child, styles)) = iter.next() {
+ let prev_len = collector.full.len();
+
+ if child.is::<SpaceElem>() {
+ collector.push_text(" ", styles);
+ } else if let Some(elem) = child.to_packed::<TextElem>() {
+ collector.build_text(styles, |full| {
+ let dir = TextElem::dir_in(styles);
+ if dir != outer_dir {
+ // Insert "Explicit Directional Embedding".
+ match dir {
+ Dir::LTR => full.push_str(LTR_EMBEDDING),
+ Dir::RTL => full.push_str(RTL_EMBEDDING),
+ _ => {}
+ }
+ }
+
+ if let Some(case) = TextElem::case_in(styles) {
+ full.push_str(&case.apply(elem.text()));
+ } else {
+ full.push_str(elem.text());
+ }
+
+ if dir != outer_dir {
+ // Insert "Pop Directional Formatting".
+ full.push_str(POP_EMBEDDING);
+ }
+ });
+ } else if let Some(elem) = child.to_packed::<HElem>() {
+ let amount = elem.amount();
+ if amount.is_zero() {
+ continue;
+ }
+
+ collector.push_item(match amount {
+ Spacing::Fr(fr) => Item::Fractional(*fr, None),
+ Spacing::Rel(rel) => Item::Absolute(
+ rel.resolve(styles).relative_to(region.x),
+ elem.weak(styles),
+ ),
+ });
+ } else if let Some(elem) = child.to_packed::<LinebreakElem>() {
+ collector
+ .push_text(if elem.justify(styles) { "\u{2028}" } else { "\n" }, styles);
+ } else if let Some(elem) = child.to_packed::<SmartQuoteElem>() {
+ let double = elem.double(styles);
+ if elem.enabled(styles) {
+ let quotes = SmartQuotes::new(
+ elem.quotes(styles),
+ TextElem::lang_in(styles),
+ TextElem::region_in(styles),
+ elem.alternative(styles),
+ );
+ let peeked = iter.peek().and_then(|(child, _)| {
+ if let Some(elem) = child.to_packed::<TextElem>() {
+ elem.text().chars().next()
+ } else if child.is::<SmartQuoteElem>() {
+ Some('"')
+ } else if child.is::<SpaceElem>()
+ || child.is::<HElem>()
+ || child.is::<LinebreakElem>()
+ // This is a temporary hack. We should rather skip these
+ // and peek at the next child.
+ || child.is::<TagElem>()
+ {
+ Some(SPACING_REPLACE_CHAR)
+ } else {
+ Some(OBJ_REPLACE_CHAR)
+ }
+ });
+
+ let quote = collector.quoter.quote(&quotes, double, peeked);
+ collector.push_quote(quote, styles);
+ } else {
+ collector.push_text(if double { "\"" } else { "'" }, styles);
+ }
+ } else if let Some(elem) = child.to_packed::<InlineElem>() {
+ collector.push_item(Item::Skip(LTR_ISOLATE));
+
+ for item in elem.layout(engine, locator.next(&elem.span()), styles, region)? {
+ match item {
+ InlineItem::Space(space, weak) => {
+ collector.push_item(Item::Absolute(space, weak));
+ }
+ InlineItem::Frame(frame) => {
+ collector.push_item(Item::Frame(frame, styles));
+ }
+ }
+ }
+
+ collector.push_item(Item::Skip(POP_ISOLATE));
+ } else if let Some(elem) = child.to_packed::<BoxElem>() {
+ let loc = locator.next(&elem.span());
+ if let Sizing::Fr(v) = elem.width(styles) {
+ collector.push_item(Item::Fractional(v, Some((elem, loc, styles))));
+ } else {
+ let frame = elem.layout(engine, loc, styles, region)?;
+ collector.push_item(Item::Frame(frame, styles));
+ }
+ } else if let Some(elem) = child.to_packed::<TagElem>() {
+ collector.push_item(Item::Tag(&elem.tag));
+ } else {
+ bail!(child.span(), "unexpected paragraph child");
+ };
+
+ let len = collector.full.len() - prev_len;
+ collector.spans.push(len, child.span());
+ }
+
+ Ok((collector.full, collector.segments, collector.spans))
+}
+
+/// Collects segments.
+struct Collector<'a> {
+ full: String,
+ segments: Vec<Segment<'a>>,
+ spans: SpanMapper,
+ quoter: SmartQuoter,
+}
+
+impl<'a> Collector<'a> {
+ fn new(capacity: usize) -> Self {
+ Self {
+ full: String::new(),
+ segments: Vec::with_capacity(capacity),
+ spans: SpanMapper::new(),
+ quoter: SmartQuoter::new(),
+ }
+ }
+
+ fn push_text(&mut self, text: &str, styles: StyleChain<'a>) {
+ self.full.push_str(text);
+ self.push_segment(Segment::Text(text.len(), styles), false);
+ }
+
+ fn build_text<F>(&mut self, styles: StyleChain<'a>, f: F)
+ where
+ F: FnOnce(&mut String),
+ {
+ let prev = self.full.len();
+ f(&mut self.full);
+ let len = self.full.len() - prev;
+ self.push_segment(Segment::Text(len, styles), false);
+ }
+
+ fn push_quote(&mut self, quote: &str, styles: StyleChain<'a>) {
+ self.full.push_str(quote);
+ self.push_segment(Segment::Text(quote.len(), styles), true);
+ }
+
+ fn push_item(&mut self, item: Item<'a>) {
+ self.full.push_str(item.textual());
+ self.push_segment(Segment::Item(item), false);
+ }
+
+ fn push_segment(&mut self, segment: Segment<'a>, is_quote: bool) {
+ if let Some(last) = self.full.chars().last() {
+ self.quoter.last(last, is_quote);
+ }
+
+ if let (Some(Segment::Text(last_len, last_styles)), Segment::Text(len, styles)) =
+ (self.segments.last_mut(), &segment)
+ {
+ if *last_styles == *styles {
+ *last_len += *len;
+ return;
+ }
+ }
+
+ self.segments.push(segment);
+ }
+}
+
+/// Maps byte offsets back to spans.
+#[derive(Default)]
+pub struct SpanMapper(Vec<(usize, Span)>);
+
+impl SpanMapper {
+ /// Create a new span mapper.
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Push a span for a segment with the given length.
+ pub fn push(&mut self, len: usize, span: Span) {
+ self.0.push((len, span));
+ }
+
+ /// Determine the span at the given byte offset.
+ ///
+ /// May return a detached span.
+ pub fn span_at(&self, offset: usize) -> (Span, u16) {
+ let mut cursor = 0;
+ for &(len, span) in &self.0 {
+ if (cursor..cursor + len).contains(&offset) {
+ return (span, u16::try_from(offset - cursor).unwrap_or(0));
+ }
+ cursor += len;
+ }
+ (Span::detached(), 0)
+ }
+}
diff --git a/crates/typst/src/layout/inline/finalize.rs b/crates/typst/src/layout/inline/finalize.rs
new file mode 100644
index 00000000..c8ba4729
--- /dev/null
+++ b/crates/typst/src/layout/inline/finalize.rs
@@ -0,0 +1,63 @@
+use super::*;
+use crate::layout::{Abs, Frame, Point};
+use crate::utils::Numeric;
+
+/// Turns the selected lines into frames.
+#[typst_macros::time]
+pub fn finalize(
+ engine: &mut Engine,
+ p: &Preparation,
+ lines: &[Line],
+ styles: StyleChain,
+ region: Size,
+ expand: bool,
+) -> SourceResult<Fragment> {
+ // Determine the paragraph's width: Full width of the region if we should
+ // expand or there's fractional spacing, fit-to-width otherwise.
+ let width = if !region.x.is_finite()
+ || (!expand && lines.iter().all(|line| line.fr().is_zero()))
+ {
+ region
+ .x
+ .min(p.hang + lines.iter().map(|line| line.width).max().unwrap_or_default())
+ } else {
+ region.x
+ };
+
+ // Stack the lines into one frame per region.
+ let shrink = ParElem::shrink_in(styles);
+ let mut frames: Vec<Frame> = lines
+ .iter()
+ .map(|line| commit(engine, p, line, width, region.y, shrink))
+ .collect::<SourceResult<_>>()?;
+
+ // Positive ratios enable prevention, while zero and negative ratios disable
+ // it.
+ if p.costs.orphan().get() > 0.0 {
+ // Prevent orphans.
+ if frames.len() >= 2 && !frames[1].is_empty() {
+ let second = frames.remove(1);
+ let first = &mut frames[0];
+ merge(first, second, p.leading);
+ }
+ }
+ if p.costs.widow().get() > 0.0 {
+ // Prevent widows.
+ let len = frames.len();
+ if len >= 2 && !frames[len - 2].is_empty() {
+ let second = frames.pop().unwrap();
+ let first = frames.last_mut().unwrap();
+ merge(first, second, p.leading);
+ }
+ }
+
+ Ok(Fragment::frames(frames))
+}
+
+/// Merge two line frames
+fn merge(first: &mut Frame, second: Frame, leading: Abs) {
+ let offset = first.height() + leading;
+ let total = offset + second.height();
+ first.push_frame(Point::with_y(offset), second);
+ first.size_mut().y = total;
+}
diff --git a/crates/typst/src/layout/inline/line.rs b/crates/typst/src/layout/inline/line.rs
new file mode 100644
index 00000000..2473f958
--- /dev/null
+++ b/crates/typst/src/layout/inline/line.rs
@@ -0,0 +1,550 @@
+use unicode_bidi::BidiInfo;
+
+use super::*;
+use crate::engine::Engine;
+use crate::layout::{Abs, Em, Fr, Frame, FrameItem, Point};
+use crate::text::TextElem;
+use crate::utils::Numeric;
+
+/// A layouted line, consisting of a sequence of layouted paragraph items that
+/// are mostly borrowed from the preparation phase. This type enables you to
+/// measure the size of a line in a range before committing to building the
+/// line's frame.
+///
+/// At most two paragraph items must be created individually for this line: The
+/// first and last one since they may be broken apart by the start or end of the
+/// line, respectively. But even those can partially reuse previous results when
+/// the break index is safe-to-break per rustybuzz.
+pub struct Line<'a> {
+ /// Bidi information about the paragraph.
+ pub bidi: &'a BidiInfo<'a>,
+ /// The trimmed range the line spans in the paragraph.
+ pub trimmed: Range,
+ /// The untrimmed end where the line ends.
+ pub end: usize,
+ /// A reshaped text item if the line sliced up a text item at the start.
+ pub first: Option<Item<'a>>,
+ /// Inner items which don't need to be reprocessed.
+ pub inner: &'a [Item<'a>],
+ /// A reshaped text item if the line sliced up a text item at the end. If
+ /// there is only one text item, this takes precedence over `first`.
+ pub last: Option<Item<'a>>,
+ /// The width of the line.
+ pub width: Abs,
+ /// Whether the line should be justified.
+ pub justify: bool,
+ /// Whether the line ends with a hyphen or dash, either naturally or through
+ /// hyphenation.
+ pub dash: Option<Dash>,
+}
+
+impl<'a> Line<'a> {
+ /// Iterate over the line's items.
+ pub fn items(&self) -> impl Iterator<Item = &Item<'a>> {
+ self.first.iter().chain(self.inner).chain(&self.last)
+ }
+
+ /// Return items that intersect the given `text_range`.
+ pub fn slice(&self, text_range: Range) -> impl Iterator<Item = &Item<'a>> {
+ let mut cursor = self.trimmed.start;
+ let mut start = 0;
+ let mut end = 0;
+
+ for (i, item) in self.items().enumerate() {
+ if cursor <= text_range.start {
+ start = i;
+ }
+
+ let len = item.textual_len();
+ if cursor < text_range.end || cursor + len <= text_range.end {
+ end = i + 1;
+ } else {
+ break;
+ }
+
+ cursor += len;
+ }
+
+ self.items().skip(start).take(end - start)
+ }
+
+ /// How many glyphs are in the text where we can insert additional
+ /// space when encountering underfull lines.
+ pub fn justifiables(&self) -> usize {
+ let mut count = 0;
+ for shaped in self.items().filter_map(Item::text) {
+ count += shaped.justifiables();
+ }
+ // CJK character at line end should not be adjusted.
+ if self
+ .items()
+ .last()
+ .and_then(Item::text)
+ .map(|s| s.cjk_justifiable_at_last())
+ .unwrap_or(false)
+ {
+ count -= 1;
+ }
+
+ count
+ }
+
+ /// How much can the line stretch
+ pub fn stretchability(&self) -> Abs {
+ self.items().filter_map(Item::text).map(|s| s.stretchability()).sum()
+ }
+
+ /// How much can the line shrink
+ pub fn shrinkability(&self) -> Abs {
+ self.items().filter_map(Item::text).map(|s| s.shrinkability()).sum()
+ }
+
+ /// The sum of fractions in the line.
+ pub fn fr(&self) -> Fr {
+ self.items()
+ .filter_map(|item| match item {
+ Item::Fractional(fr, _) => Some(*fr),
+ _ => None,
+ })
+ .sum()
+ }
+}
+
+/// A dash at the end of a line.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum Dash {
+ /// A hyphen added to break a word.
+ SoftHyphen,
+ /// Regular hyphen, present in a compound word, e.g. beija-flor.
+ HardHyphen,
+ /// An em dash.
+ Long,
+ /// An en dash.
+ Short,
+}
+
+/// Create a line which spans the given range.
+pub fn line<'a>(
+ engine: &Engine,
+ p: &'a Preparation,
+ mut range: Range,
+ breakpoint: Breakpoint,
+ prepend_hyphen: bool,
+) -> Line<'a> {
+ let end = range.end;
+ let mut justify =
+ p.justify && end < p.bidi.text.len() && breakpoint != Breakpoint::Mandatory;
+
+ if range.is_empty() {
+ return Line {
+ bidi: &p.bidi,
+ end,
+ trimmed: range,
+ first: None,
+ inner: &[],
+ last: None,
+ width: Abs::zero(),
+ justify,
+ dash: None,
+ };
+ }
+
+ // Slice out the relevant items.
+ let (mut expanded, mut inner) = p.slice(range.clone());
+ let mut width = Abs::zero();
+
+ // Weak space (`Absolute(_, true)`) is removed at the end of the line
+ while let Some((Item::Absolute(_, true), before)) = inner.split_last() {
+ inner = before;
+ range.end -= 1;
+ expanded.end -= 1;
+ }
+ // Weak space (`Absolute(_, true)`) is removed at the beginning of the line
+ while let Some((Item::Absolute(_, true), after)) = inner.split_first() {
+ inner = after;
+ range.start += 1;
+ expanded.end += 1;
+ }
+
+ // Reshape the last item if it's split in half or hyphenated.
+ let mut last = None;
+ let mut dash = None;
+ if let Some((Item::Text(shaped), before)) = inner.split_last() {
+ // Compute the range we want to shape, trimming whitespace at the
+ // end of the line.
+ let base = expanded.end - shaped.text.len();
+ let start = range.start.max(base);
+ let text = &p.bidi.text[start..range.end];
+ // U+200B ZERO WIDTH SPACE is used to provide a line break opportunity,
+ // we want to trim it too.
+ let trimmed = text.trim_end().trim_end_matches('\u{200B}');
+ range.end = start + trimmed.len();
+
+ // Deal with hyphens, dashes and justification.
+ let shy = trimmed.ends_with('\u{ad}');
+ let hyphen = breakpoint == Breakpoint::Hyphen;
+ dash = if hyphen || shy {
+ Some(Dash::SoftHyphen)
+ } else if trimmed.ends_with('-') {
+ Some(Dash::HardHyphen)
+ } else if trimmed.ends_with('–') {
+ Some(Dash::Short)
+ } else if trimmed.ends_with('—') {
+ Some(Dash::Long)
+ } else {
+ None
+ };
+ justify |= text.ends_with('\u{2028}');
+
+ // Deal with CJK punctuation at line ends.
+ let gb_style = cjk_punct_style(shaped.lang, shaped.region);
+ let maybe_adjust_last_glyph = trimmed.ends_with(END_PUNCT_PAT)
+ || (p.cjk_latin_spacing && trimmed.ends_with(is_of_cj_script));
+
+ // Usually, we don't want to shape an empty string because:
+ // - We don't want the height of trimmed whitespace in a different font
+ // to be considered for the line height.
+ // - Even if it's in the same font, its unnecessary.
+ //
+ // There is one exception though. When the whole line is empty, we need
+ // the shaped empty string to make the line the appropriate height. That
+ // is the case exactly if the string is empty and there are no other
+ // items in the line.
+ if hyphen
+ || start + shaped.text.len() > range.end
+ || maybe_adjust_last_glyph
+ || prepend_hyphen
+ {
+ if hyphen || start < range.end || before.is_empty() {
+ let mut reshaped = shaped.reshape(engine, &p.spans, start..range.end);
+ if hyphen || shy {
+ reshaped.push_hyphen(engine, p.fallback);
+ }
+
+ if let Some(last_glyph) = reshaped.glyphs.last() {
+ if last_glyph.is_cjk_left_aligned_punctuation(gb_style) {
+ // If the last glyph is a CJK punctuation, we want to
+ // shrink it. See Requirements for Chinese Text Layout,
+ // Section 3.1.6.3 Compression of punctuation marks at
+ // line start or line end
+ let shrink_amount = last_glyph.shrinkability().1;
+ let punct = reshaped.glyphs.to_mut().last_mut().unwrap();
+ punct.shrink_right(shrink_amount);
+ reshaped.width -= shrink_amount.at(reshaped.size);
+ } else if p.cjk_latin_spacing
+ && last_glyph.is_cj_script()
+ && (last_glyph.x_advance - last_glyph.x_offset) > Em::one()
+ {
+ // If the last glyph is a CJK character adjusted by
+ // [`add_cjk_latin_spacing`], restore the original
+ // width.
+ let shrink_amount =
+ last_glyph.x_advance - last_glyph.x_offset - Em::one();
+ let glyph = reshaped.glyphs.to_mut().last_mut().unwrap();
+ glyph.x_advance -= shrink_amount;
+ glyph.adjustability.shrinkability.1 = Em::zero();
+ reshaped.width -= shrink_amount.at(reshaped.size);
+ }
+ }
+
+ width += reshaped.width;
+ last = Some(Item::Text(reshaped));
+ }
+
+ inner = before;
+ }
+ }
+
+ // Deal with CJ characters at line starts.
+ let text = &p.bidi.text[range.start..end];
+ let maybe_adjust_first_glyph = text.starts_with(BEGIN_PUNCT_PAT)
+ || (p.cjk_latin_spacing && text.starts_with(is_of_cj_script));
+
+ // Reshape the start item if it's split in half.
+ let mut first = None;
+ if let Some((Item::Text(shaped), after)) = inner.split_first() {
+ // Compute the range we want to shape.
+ let base = expanded.start;
+ let end = range.end.min(base + shaped.text.len());
+
+ // Reshape if necessary.
+ if range.start + shaped.text.len() > end
+ || maybe_adjust_first_glyph
+ || prepend_hyphen
+ {
+ // If the range is empty, we don't want to push an empty text item.
+ if range.start < end {
+ let reshaped = shaped.reshape(engine, &p.spans, range.start..end);
+ width += reshaped.width;
+ first = Some(Item::Text(reshaped));
+ }
+
+ inner = after;
+ }
+ }
+
+ if prepend_hyphen {
+ let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut);
+ if let Some(reshaped) = reshaped {
+ let width_before = reshaped.width;
+ reshaped.prepend_hyphen(engine, p.fallback);
+ width += reshaped.width - width_before;
+ }
+ }
+
+ if maybe_adjust_first_glyph {
+ let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut);
+ if let Some(reshaped) = reshaped {
+ if let Some(first_glyph) = reshaped.glyphs.first() {
+ if first_glyph.is_cjk_right_aligned_punctuation() {
+ // If the first glyph is a CJK punctuation, we want to
+ // shrink it.
+ let shrink_amount = first_glyph.shrinkability().0;
+ let glyph = reshaped.glyphs.to_mut().first_mut().unwrap();
+ glyph.shrink_left(shrink_amount);
+ let amount_abs = shrink_amount.at(reshaped.size);
+ reshaped.width -= amount_abs;
+ width -= amount_abs;
+ } else if p.cjk_latin_spacing
+ && first_glyph.is_cj_script()
+ && first_glyph.x_offset > Em::zero()
+ {
+ // If the first glyph is a CJK character adjusted by
+ // [`add_cjk_latin_spacing`], restore the original width.
+ let shrink_amount = first_glyph.x_offset;
+ let glyph = reshaped.glyphs.to_mut().first_mut().unwrap();
+ glyph.x_advance -= shrink_amount;
+ glyph.x_offset = Em::zero();
+ glyph.adjustability.shrinkability.0 = Em::zero();
+ let amount_abs = shrink_amount.at(reshaped.size);
+ reshaped.width -= amount_abs;
+ width -= amount_abs;
+ }
+ }
+ }
+ }
+
+ // Measure the inner items.
+ for item in inner {
+ width += item.width();
+ }
+
+ Line {
+ bidi: &p.bidi,
+ trimmed: range,
+ end,
+ first,
+ inner,
+ last,
+ width,
+ justify,
+ dash,
+ }
+}
+
+/// Commit to a line and build its frame.
+pub fn commit(
+ engine: &mut Engine,
+ p: &Preparation,
+ line: &Line,
+ width: Abs,
+ full: Abs,
+ shrink: bool,
+) -> SourceResult<Frame> {
+ let mut remaining = width - line.width - p.hang;
+ let mut offset = Abs::zero();
+
+ // Reorder the line from logical to visual order.
+ let (reordered, starts_rtl) = reorder(line);
+ if !starts_rtl {
+ offset += p.hang;
+ }
+
+ // Handle hanging punctuation to the left.
+ if let Some(Item::Text(text)) = reordered.first() {
+ if let Some(glyph) = text.glyphs.first() {
+ if !text.dir.is_positive()
+ && TextElem::overhang_in(text.styles)
+ && (reordered.len() > 1 || text.glyphs.len() > 1)
+ {
+ let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
+ offset -= amount;
+ remaining += amount;
+ }
+ }
+ }
+
+ // Handle hanging punctuation to the right.
+ if let Some(Item::Text(text)) = reordered.last() {
+ if let Some(glyph) = text.glyphs.last() {
+ if text.dir.is_positive()
+ && TextElem::overhang_in(text.styles)
+ && (reordered.len() > 1 || text.glyphs.len() > 1)
+ {
+ let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
+ remaining += amount;
+ }
+ }
+ }
+
+ // Determine how much additional space is needed. The justification_ratio is
+ // for the first step justification, extra_justification is for the last
+ // step. For more info on multi-step justification, see Procedures for
+ // Inter- Character Space Expansion in W3C document Chinese Layout
+ // Requirements.
+ let fr = line.fr();
+ let mut justification_ratio = 0.0;
+ let mut extra_justification = Abs::zero();
+
+ let shrinkability = line.shrinkability();
+ let stretch = line.stretchability();
+ if remaining < Abs::zero() && shrinkability > Abs::zero() && shrink {
+ // Attempt to reduce the length of the line, using shrinkability.
+ justification_ratio = (remaining / shrinkability).max(-1.0);
+ remaining = (remaining + shrinkability).min(Abs::zero());
+ } else if line.justify && fr.is_zero() {
+ // Attempt to increase the length of the line, using stretchability.
+ if stretch > Abs::zero() {
+ justification_ratio = (remaining / stretch).min(1.0);
+ remaining = (remaining - stretch).max(Abs::zero());
+ }
+
+ let justifiables = line.justifiables();
+ if justifiables > 0 && remaining > Abs::zero() {
+ // Underfull line, distribute the extra space.
+ extra_justification = remaining / justifiables as f64;
+ remaining = Abs::zero();
+ }
+ }
+
+ let mut top = Abs::zero();
+ let mut bottom = Abs::zero();
+
+ // Build the frames and determine the height and baseline.
+ let mut frames = vec![];
+ for item in reordered {
+ let mut push = |offset: &mut Abs, frame: Frame| {
+ let width = frame.width();
+ top.set_max(frame.baseline());
+ bottom.set_max(frame.size().y - frame.baseline());
+ frames.push((*offset, frame));
+ *offset += width;
+ };
+
+ match item {
+ Item::Absolute(v, _) => {
+ offset += *v;
+ }
+ Item::Fractional(v, elem) => {
+ let amount = v.share(fr, remaining);
+ if let Some((elem, loc, styles)) = elem {
+ let region = Size::new(amount, full);
+ let mut frame =
+ elem.layout(engine, loc.relayout(), *styles, region)?;
+ frame.post_process(*styles);
+ frame.translate(Point::with_y(TextElem::baseline_in(*styles)));
+ push(&mut offset, frame);
+ } else {
+ offset += amount;
+ }
+ }
+ Item::Text(shaped) => {
+ let mut frame =
+ shaped.build(engine, justification_ratio, extra_justification);
+ frame.post_process(shaped.styles);
+ push(&mut offset, frame);
+ }
+ Item::Frame(frame, styles) => {
+ let mut frame = frame.clone();
+ frame.post_process(*styles);
+ frame.translate(Point::with_y(TextElem::baseline_in(*styles)));
+ push(&mut offset, frame);
+ }
+ Item::Tag(tag) => {
+ let mut frame = Frame::soft(Size::zero());
+ frame.push(Point::zero(), FrameItem::Tag((*tag).clone()));
+ frames.push((offset, frame));
+ }
+ Item::Skip(_) => {}
+ }
+ }
+
+ // Remaining space is distributed now.
+ if !fr.is_zero() {
+ remaining = Abs::zero();
+ }
+
+ let size = Size::new(width, top + bottom);
+ let mut output = Frame::soft(size);
+ output.set_baseline(top);
+
+ // Construct the line's frame.
+ for (offset, frame) in frames {
+ let x = offset + p.align.position(remaining);
+ let y = top - frame.baseline();
+ output.push_frame(Point::new(x, y), frame);
+ }
+
+ Ok(output)
+}
+
+/// Return a line's items in visual order.
+fn reorder<'a>(line: &'a Line<'a>) -> (Vec<&Item<'a>>, bool) {
+ let mut reordered = vec![];
+
+ // The bidi crate doesn't like empty lines.
+ if line.trimmed.is_empty() {
+ return (line.slice(line.trimmed.clone()).collect(), false);
+ }
+
+ // Find the paragraph that contains the line.
+ let para = line
+ .bidi
+ .paragraphs
+ .iter()
+ .find(|para| para.range.contains(&line.trimmed.start))
+ .unwrap();
+
+ // Compute the reordered ranges in visual order (left to right).
+ let (levels, runs) = line.bidi.visual_runs(para, line.trimmed.clone());
+ let starts_rtl = levels.first().is_some_and(|level| level.is_rtl());
+
+ // Collect the reordered items.
+ for run in runs {
+ // Skip reset L1 runs because handling them would require reshaping
+ // again in some cases.
+ if line.bidi.levels[run.start] != levels[run.start] {
+ continue;
+ }
+
+ let prev = reordered.len();
+ reordered.extend(line.slice(run.clone()));
+
+ if levels[run.start].is_rtl() {
+ reordered[prev..].reverse();
+ }
+ }
+
+ (reordered, starts_rtl)
+}
+
+/// How much a character should hang into the end margin.
+///
+/// For more discussion, see:
+/// <https://recoveringphysicist.com/21/>
+fn overhang(c: char) -> f64 {
+ match c {
+ // Dashes.
+ '–' | '—' => 0.2,
+ '-' => 0.55,
+
+ // Punctuation.
+ '.' | ',' => 0.8,
+ ':' | ';' => 0.3,
+
+ // Arabic
+ '\u{60C}' | '\u{6D4}' => 0.4,
+
+ _ => 0.0,
+ }
+}
diff --git a/crates/typst/src/layout/inline/linebreak.rs b/crates/typst/src/layout/inline/linebreak.rs
index a62cda91..ddf7937b 100644
--- a/crates/typst/src/layout/inline/linebreak.rs
+++ b/crates/typst/src/layout/inline/linebreak.rs
@@ -6,7 +6,10 @@ use icu_provider_blob::BlobDataProvider;
use icu_segmenter::LineSegmenter;
use once_cell::sync::Lazy;
-use super::Preparation;
+use super::*;
+use crate::engine::Engine;
+use crate::layout::Abs;
+use crate::model::Linebreaks;
use crate::syntax::link_prefix;
use crate::text::{Lang, TextElem};
@@ -38,7 +41,7 @@ static LINEBREAK_DATA: Lazy<CodePointMapData<LineBreak>> = Lazy::new(|| {
/// A line break opportunity.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub(super) enum Breakpoint {
+pub enum Breakpoint {
/// Just a normal opportunity (e.g. after a space).
Normal,
/// A mandatory breakpoint (after '\n' or at the end of the text).
@@ -47,6 +50,247 @@ pub(super) enum Breakpoint {
Hyphen,
}
+/// Breaks the paragraph into lines.
+pub fn linebreak<'a>(
+ engine: &Engine,
+ p: &'a Preparation<'a>,
+ width: Abs,
+) -> Vec<Line<'a>> {
+ let linebreaks = p.linebreaks.unwrap_or_else(|| {
+ if p.justify {
+ Linebreaks::Optimized
+ } else {
+ Linebreaks::Simple
+ }
+ });
+
+ match linebreaks {
+ Linebreaks::Simple => linebreak_simple(engine, p, width),
+ Linebreaks::Optimized => linebreak_optimized(engine, p, width),
+ }
+}
+
+/// Performs line breaking in simple first-fit style. This means that we build
+/// lines greedily, always taking the longest possible line. This may lead to
+/// very unbalanced line, but is fast and simple.
+#[typst_macros::time]
+fn linebreak_simple<'a>(
+ engine: &Engine,
+ p: &'a Preparation<'a>,
+ width: Abs,
+) -> Vec<Line<'a>> {
+ let mut lines = Vec::with_capacity(16);
+ let mut start = 0;
+ let mut last = None;
+
+ breakpoints(p, |end, breakpoint| {
+ let prepend_hyphen = lines.last().map(should_repeat_hyphen).unwrap_or(false);
+
+ // Compute the line and its size.
+ let mut attempt = line(engine, p, start..end, breakpoint, prepend_hyphen);
+
+ // If the line doesn't fit anymore, we push the last fitting attempt
+ // into the stack and rebuild the line from the attempt's end. The
+ // resulting line cannot be broken up further.
+ if !width.fits(attempt.width) {
+ if let Some((last_attempt, last_end)) = last.take() {
+ lines.push(last_attempt);
+ start = last_end;
+ attempt = line(engine, p, start..end, breakpoint, prepend_hyphen);
+ }
+ }
+
+ // Finish the current line if there is a mandatory line break (i.e. due
+ // to "\n") or if the line doesn't fit horizontally already since then
+ // no shorter line will be possible.
+ if breakpoint == Breakpoint::Mandatory || !width.fits(attempt.width) {
+ lines.push(attempt);
+ start = end;
+ last = None;
+ } else {
+ last = Some((attempt, end));
+ }
+ });
+
+ if let Some((line, _)) = last {
+ lines.push(line);
+ }
+
+ lines
+}
+
+/// Performs line breaking in optimized Knuth-Plass style. Here, we use more
+/// context to determine the line breaks than in the simple first-fit style. For
+/// example, we might choose to cut a line short even though there is still a
+/// bit of space to improve the fit of one of the following lines. The
+/// Knuth-Plass algorithm is based on the idea of "cost". A line which has a
+/// very tight or very loose fit has a higher cost than one that is just right.
+/// Ending a line with a hyphen incurs extra cost and endings two successive
+/// lines with hyphens even more.
+///
+/// To find the layout with the minimal total cost the algorithm uses dynamic
+/// programming: For each possible breakpoint it determines the optimal
+/// paragraph layout _up to that point_. It walks over all possible start points
+/// for a line ending at that point and finds the one for which the cost of the
+/// line plus the cost of the optimal paragraph up to the start point (already
+/// computed and stored in dynamic programming table) is minimal. The final
+/// result is simply the layout determined for the last breakpoint at the end of
+/// text.
+#[typst_macros::time]
+fn linebreak_optimized<'a>(
+ engine: &Engine,
+ p: &'a Preparation<'a>,
+ width: Abs,
+) -> Vec<Line<'a>> {
+ /// The cost of a line or paragraph layout.
+ type Cost = f64;
+
+ /// An entry in the dynamic programming table.
+ struct Entry<'a> {
+ pred: usize,
+ total: Cost,
+ line: Line<'a>,
+ }
+
+ // Cost parameters.
+ const DEFAULT_HYPH_COST: Cost = 0.5;
+ const DEFAULT_RUNT_COST: Cost = 0.5;
+ const CONSECUTIVE_DASH_COST: Cost = 0.3;
+ const MAX_COST: Cost = 1_000_000.0;
+ const MIN_RATIO: f64 = -1.0;
+
+ let hyph_cost = DEFAULT_HYPH_COST * p.costs.hyphenation().get();
+ let runt_cost = DEFAULT_RUNT_COST * p.costs.runt().get();
+
+ // Dynamic programming table.
+ let mut active = 0;
+ let mut table = vec![Entry {
+ pred: 0,
+ total: 0.0,
+ line: line(engine, p, 0..0, Breakpoint::Mandatory, false),
+ }];
+
+ let em = p.size;
+ let mut lines = Vec::with_capacity(16);
+ breakpoints(p, |end, breakpoint| {
+ let k = table.len();
+ let is_end = end == p.bidi.text.len();
+ let mut best: Option<Entry> = None;
+
+ // Find the optimal predecessor.
+ for (i, pred) in table.iter().enumerate().skip(active) {
+ // Layout the line.
+ let start = pred.line.end;
+ let prepend_hyphen = should_repeat_hyphen(&pred.line);
+
+ let attempt = line(engine, p, start..end, breakpoint, prepend_hyphen);
+
+ // Determine how much the line's spaces would need to be stretched
+ // to make it the desired width.
+ let delta = width - attempt.width;
+ // Determine how much stretch are permitted.
+ let adjust = if delta >= Abs::zero() {
+ attempt.stretchability()
+ } else {
+ attempt.shrinkability()
+ };
+ // Ideally, the ratio should between -1.0 and 1.0, but sometimes a
+ // value above 1.0 is possible, in which case the line is underfull.
+ let mut ratio = delta / adjust;
+ if ratio.is_nan() {
+ // The line is not stretchable, but it just fits. This often
+ // happens with monospace fonts and CJK texts.
+ ratio = 0.0;
+ }
+ if ratio > 1.0 {
+ // We should stretch the line above its stretchability. Now
+ // calculate the extra amount. Also, don't divide by zero.
+ let extra_stretch =
+ (delta - adjust) / attempt.justifiables().max(1) as f64;
+ // Normalize the amount by half Em size.
+ ratio = 1.0 + extra_stretch / (em / 2.0);
+ }
+
+ // Determine the cost of the line.
+ let min_ratio = if p.justify { MIN_RATIO } else { 0.0 };
+ let mut cost = if ratio < min_ratio {
+ // The line is overfull. This is the case if
+ // - justification is on, but we'd need to shrink too much
+ // - justification is off and the line just doesn't fit
+ //
+ // If this is the earliest breakpoint in the active set
+ // (active == i), remove it from the active set. If there is an
+ // earlier one (active < i), then the logically shorter line was
+ // in fact longer (can happen with negative spacing) and we
+ // can't trim the active set just yet.
+ if active == i {
+ active += 1;
+ }
+ MAX_COST
+ } else if breakpoint == Breakpoint::Mandatory || is_end {
+ // This is a mandatory break and the line is not overfull, so
+ // all breakpoints before this one become inactive since no line
+ // can span above the mandatory break.
+ active = k;
+ // - If ratio > 0, we need to stretch the line only when justify
+ // is needed.
+ // - If ratio < 0, we always need to shrink the line.
+ if (ratio > 0.0 && attempt.justify) || ratio < 0.0 {
+ ratio.powi(3).abs()
+ } else {
+ 0.0
+ }
+ } else {
+ // Normal line with cost of |ratio^3|.
+ ratio.powi(3).abs()
+ };
+
+ // Penalize runts.
+ if k == i + 1 && is_end {
+ cost += runt_cost;
+ }
+
+ // Penalize hyphens.
+ if breakpoint == Breakpoint::Hyphen {
+ cost += hyph_cost;
+ }
+
+ // In Knuth paper, cost = (1 + 100|r|^3 + p)^2 + a,
+ // where r is the ratio, p=50 is the penalty, and a=3000 is
+ // consecutive the penalty. We divide the whole formula by 10,
+ // resulting (0.01 + |r|^3 + p)^2 + a, where p=0.5 and a=0.3
+ cost = (0.01 + cost).powi(2);
+
+ // Penalize two consecutive dashes (not necessarily hyphens) extra.
+ if attempt.dash.is_some() && pred.line.dash.is_some() {
+ cost += CONSECUTIVE_DASH_COST;
+ }
+
+ // The total cost of this line and its chain of predecessors.
+ let total = pred.total + cost;
+
+ // If this attempt is better than what we had before, take it!
+ if best.as_ref().map_or(true, |best| best.total >= total) {
+ best = Some(Entry { pred: i, total, line: attempt });
+ }
+ }
+
+ table.push(best.unwrap());
+ });
+
+ // Retrace the best path.
+ let mut idx = table.len() - 1;
+ while idx != 0 {
+ table.truncate(idx + 1);
+ let entry = table.pop().unwrap();
+ lines.push(entry.line);
+ idx = entry.pred;
+ }
+
+ lines.reverse();
+ lines
+}
+
/// Calls `f` for all possible points in the text where lines can broken.
///
/// Yields for each breakpoint the text index, whether the break is mandatory
@@ -56,10 +300,7 @@ pub(super) enum Breakpoint {
/// This is an internal instead of an external iterator because it makes the
/// code much simpler and the consumers of this function don't need the
/// composability and flexibility of external iteration anyway.
-pub(super) fn breakpoints<'a>(
- p: &'a Preparation<'a>,
- mut f: impl FnMut(usize, Breakpoint),
-) {
+fn breakpoints<'a>(p: &'a Preparation<'a>, mut f: impl FnMut(usize, Breakpoint)) {
let text = p.bidi.text;
let hyphenate = p.hyphenate != Some(false);
let lb = LINEBREAK_DATA.as_borrowed();
@@ -236,3 +477,49 @@ fn lang_at(p: &Preparation, offset: usize) -> Option<hypher::Lang> {
let bytes = lang.as_str().as_bytes().try_into().ok()?;
hypher::Lang::from_iso(bytes)
}
+
+/// Whether the hyphen should repeat at the start of the next line.
+fn should_repeat_hyphen(pred_line: &Line) -> bool {
+ // If the predecessor line does not end with a Dash::HardHyphen, we shall
+ // not place a hyphen at the start of the next line.
+ if pred_line.dash != Some(Dash::HardHyphen) {
+ return false;
+ }
+
+ // If there's a trimmed out space, we needn't repeat the hyphen. That's the
+ // case of a text like "...kebab é a -melhor- comida que existe", where the
+ // hyphens are a kind of emphasis marker.
+ if pred_line.trimmed.end != pred_line.end {
+ return false;
+ }
+
+ // The hyphen should repeat only in the languages that require that feature.
+ // For more information see the discussion at https://github.com/typst/typst/issues/3235
+ let Some(Item::Text(shape)) = pred_line.last.as_ref() else { return false };
+
+ match shape.lang {
+ // - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3
+ // - Czech: see https://prirucka.ujc.cas.cz/?id=164
+ // - Croatian: see http://pravopis.hr/pravilo/spojnica/68/
+ // - Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni
+ // - Portuguese: see https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf (Base XX)
+ // - Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/
+ Lang::LOWER_SORBIAN
+ | Lang::CZECH
+ | Lang::CROATIAN
+ | Lang::POLISH
+ | Lang::PORTUGUESE
+ | Lang::SLOVAK => true,
+ // In Spanish the hyphen is required only if the word next to hyphen is
+ // not capitalized. Otherwise, the hyphen must not be repeated.
+ //
+ // See § 4.1.1.1.2.e on the "Ortografía de la lengua española"
+ // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea
+ Lang::SPANISH => pred_line.bidi.text[pred_line.end..]
+ .chars()
+ .next()
+ .map(|c| !c.is_uppercase())
+ .unwrap_or(false),
+ _ => false,
+ }
+}
diff --git a/crates/typst/src/layout/inline/mod.rs b/crates/typst/src/layout/inline/mod.rs
index 49bad3f7..94ac89f0 100644
--- a/crates/typst/src/layout/inline/mod.rs
+++ b/crates/typst/src/layout/inline/mod.rs
@@ -1,33 +1,33 @@
+mod collect;
+mod finalize;
+mod line;
mod linebreak;
+mod prepare;
mod shaping;
use comemo::{Track, Tracked, TrackedMut};
-use unicode_bidi::{BidiInfo, Level as BidiLevel};
-use unicode_script::{Script, UnicodeScript};
-use self::linebreak::{breakpoints, Breakpoint};
+use self::collect::{collect, Item, Segment, SpanMapper};
+use self::finalize::finalize;
+use self::line::{commit, line, Dash, Line};
+use self::linebreak::{linebreak, Breakpoint};
+use self::prepare::{prepare, Preparation};
use self::shaping::{
- cjk_punct_style, is_of_cj_script, shape, ShapedGlyph, ShapedText, BEGIN_PUNCT_PAT,
- END_PUNCT_PAT,
+ cjk_punct_style, is_of_cj_script, shape_range, ShapedGlyph, ShapedText,
+ BEGIN_PUNCT_PAT, END_PUNCT_PAT,
};
-use crate::diag::{bail, SourceResult};
+use crate::diag::SourceResult;
use crate::engine::{Engine, Route, Sink, Traced};
-use crate::foundations::{Packed, Resolve, Smart, StyleChain};
-use crate::introspection::{Introspector, Locator, LocatorLink, Tag, TagElem};
-use crate::layout::{
- Abs, AlignElem, BoxElem, Dir, Em, FixedAlignment, Fr, Fragment, Frame, FrameItem,
- HElem, InlineElem, InlineItem, Point, Size, Sizing, Spacing,
-};
-use crate::model::{Linebreaks, ParElem};
+use crate::foundations::StyleChain;
+use crate::introspection::{Introspector, Locator, LocatorLink};
+use crate::layout::{Fragment, Size};
+use crate::model::ParElem;
use crate::realize::StyleVec;
-use crate::syntax::Span;
-use crate::text::{
- Costs, Lang, LinebreakElem, SmartQuoteElem, SmartQuoter, SmartQuotes, SpaceElem,
- TextElem,
-};
-use crate::utils::Numeric;
use crate::World;
+/// Range of a substring of text.
+type Range = std::ops::Range<usize>;
+
/// Layouts content inline.
pub(crate) fn layout_inline(
children: &StyleVec,
@@ -67,17 +67,14 @@ pub(crate) fn layout_inline(
let (text, segments, spans) =
collect(children, &mut engine, locator, &styles, region, consecutive)?;
- // Perform BiDi analysis and then prepare paragraph layout by building a
- // representation on which we can do line breaking without layouting
- // each and every line from scratch.
+ // Perform BiDi analysis and then prepares paragraph layout.
let p = prepare(&mut engine, children, &text, segments, spans, styles)?;
// Break the paragraph into lines.
let lines = linebreak(&engine, &p, region.x - p.hang);
- // Stack the lines into one frame per region.
- let shrink = ParElem::shrink_in(styles);
- finalize(&mut engine, &p, &lines, region, expand, shrink)
+ // Turn the selected lines into frames.
+ finalize(&mut engine, &p, &lines, styles, region, expand)
}
cached(
@@ -94,1477 +91,3 @@ pub(crate) fn layout_inline(
expand,
)
}
-
-/// Range of a substring of text.
-type Range = std::ops::Range<usize>;
-
-// The characters by which spacing, inline content and pins are replaced in the
-// paragraph's full text.
-const SPACING_REPLACE: &str = " "; // Space
-const OBJ_REPLACE: &str = "\u{FFFC}"; // Object Replacement Character
-const SPACING_REPLACE_CHAR: char = ' ';
-const OBJ_REPLACE_CHAR: char = '\u{FFFC}';
-
-// Unicode BiDi control characters.
-const LTR_EMBEDDING: &str = "\u{202A}";
-const RTL_EMBEDDING: &str = "\u{202B}";
-const POP_EMBEDDING: &str = "\u{202C}";
-const LTR_ISOLATE: &str = "\u{2066}";
-const POP_ISOLATE: &str = "\u{2069}";
-
-/// A paragraph representation in which children are already layouted and text
-/// is already preshaped.
-///
-/// In many cases, we can directly reuse these results when constructing a line.
-/// Only when a line break falls onto a text index that is not safe-to-break per
-/// rustybuzz, we have to reshape that portion.
-struct Preparation<'a> {
- /// Bidirectional text embedding levels for the paragraph.
- bidi: BidiInfo<'a>,
- /// Text runs, spacing and layouted elements.
- items: Vec<Item<'a>>,
- /// The span mapper.
- spans: SpanMapper,
- /// Whether to hyphenate if it's the same for all children.
- hyphenate: Option<bool>,
- /// Costs for various layout decisions.
- costs: Costs,
- /// The text language if it's the same for all children.
- lang: Option<Lang>,
- /// The paragraph's resolved horizontal alignment.
- align: FixedAlignment,
- /// Whether to justify the paragraph.
- justify: bool,
- /// The paragraph's hanging indent.
- hang: Abs,
- /// Whether to add spacing between CJK and Latin characters.
- cjk_latin_spacing: bool,
- /// Whether font fallback is enabled for this paragraph.
- fallback: bool,
- /// The leading of the paragraph.
- leading: Abs,
- /// How to determine line breaks.
- linebreaks: Smart<Linebreaks>,
- /// The text size.
- size: Abs,
-}
-
-impl<'a> Preparation<'a> {
- /// Find the item that contains the given `text_offset`.
- fn find(&self, text_offset: usize) -> Option<&Item<'a>> {
- let mut cursor = 0;
- for item in &self.items {
- let end = cursor + item.textual_len();
- if (cursor..end).contains(&text_offset) {
- return Some(item);
- }
- cursor = end;
- }
- None
- }
-
- /// Return the items that intersect the given `text_range`.
- ///
- /// Returns the expanded range around the items and the items.
- fn slice(&self, text_range: Range) -> (Range, &[Item<'a>]) {
- let mut cursor = 0;
- let mut start = 0;
- let mut end = 0;
- let mut expanded = text_range.clone();
-
- for (i, item) in self.items.iter().enumerate() {
- if cursor <= text_range.start {
- start = i;
- expanded.start = cursor;
- }
-
- let len = item.textual_len();
- if cursor < text_range.end || cursor + len <= text_range.end {
- end = i + 1;
- expanded.end = cursor + len;
- } else {
- break;
- }
-
- cursor += len;
- }
-
- (expanded, &self.items[start..end])
- }
-}
-
-/// An item or not-yet shaped text. We can't shape text until we have collected
-/// all items because only then we can compute BiDi, and we need to split shape
-/// runs at level boundaries.
-#[derive(Debug)]
-enum Segment<'a> {
- /// One or multiple collapsed text children. Stores how long the segment is
- /// (in bytes of the full text string).
- Text(usize, StyleChain<'a>),
- /// An already prepared item.
- Item(Item<'a>),
-}
-
-impl Segment<'_> {
- /// The text length of the item.
- fn textual_len(&self) -> usize {
- match self {
- Self::Text(len, _) => *len,
- Self::Item(item) => item.textual_len(),
- }
- }
-}
-
-/// A prepared item in a paragraph layout.
-#[derive(Debug)]
-enum Item<'a> {
- /// A shaped text run with consistent style and direction.
- Text(ShapedText<'a>),
- /// Absolute spacing between other items, and whether it is weak.
- Absolute(Abs, bool),
- /// Fractional spacing between other items.
- Fractional(Fr, Option<(&'a Packed<BoxElem>, Locator<'a>, StyleChain<'a>)>),
- /// Layouted inline-level content.
- Frame(Frame, StyleChain<'a>),
- /// A tag.
- Tag(&'a Tag),
- /// An item that is invisible and needs to be skipped, e.g. a Unicode
- /// isolate.
- Skip(&'static str),
-}
-
-impl<'a> Item<'a> {
- /// If this a text item, return it.
- fn text(&self) -> Option<&ShapedText<'a>> {
- match self {
- Self::Text(shaped) => Some(shaped),
- _ => None,
- }
- }
-
- /// If this a text item, return it mutably.
- fn text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
- match self {
- Self::Text(shaped) => Some(shaped),
- _ => None,
- }
- }
-
- /// Return the textual representation of this item: Either just itself (for
- /// a text item) or a replacement string (for any other item).
- fn textual(&self) -> &str {
- match self {
- Self::Text(shaped) => shaped.text,
- Self::Absolute(_, _) | Self::Fractional(_, _) => SPACING_REPLACE,
- Self::Frame(_, _) => OBJ_REPLACE,
- Self::Tag(_) => "",
- Self::Skip(s) => s,
- }
- }
-
- /// The text length of the item.
- fn textual_len(&self) -> usize {
- self.textual().len()
- }
-
- /// The natural layouted width of the item.
- fn width(&self) -> Abs {
- match self {
- Self::Text(shaped) => shaped.width,
- Self::Absolute(v, _) => *v,
- Self::Frame(frame, _) => frame.width(),
- Self::Fractional(_, _) | Self::Tag(_) => Abs::zero(),
- Self::Skip(_) => Abs::zero(),
- }
- }
-}
-
-/// Maps byte offsets back to spans.
-#[derive(Default)]
-struct SpanMapper(Vec<(usize, Span)>);
-
-impl SpanMapper {
- /// Create a new span mapper.
- fn new() -> Self {
- Self::default()
- }
-
- /// Push a span for a segment with the given length.
- fn push(&mut self, len: usize, span: Span) {
- self.0.push((len, span));
- }
-
- /// Determine the span at the given byte offset.
- ///
- /// May return a detached span.
- fn span_at(&self, offset: usize) -> (Span, u16) {
- let mut cursor = 0;
- for &(len, span) in &self.0 {
- if (cursor..cursor + len).contains(&offset) {
- return (span, u16::try_from(offset - cursor).unwrap_or(0));
- }
- cursor += len;
- }
- (Span::detached(), 0)
- }
-}
-
-/// A dash at the end of a line.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub(super) enum Dash {
- /// A hyphen added to break a word.
- SoftHyphen,
- /// Regular hyphen, present in a compound word, e.g. beija-flor.
- HardHyphen,
- /// An em dash.
- Long,
- /// An en dash.
- Short,
-}
-
-/// A layouted line, consisting of a sequence of layouted paragraph items that
-/// are mostly borrowed from the preparation phase. This type enables you to
-/// measure the size of a line in a range before committing to building the
-/// line's frame.
-///
-/// At most two paragraph items must be created individually for this line: The
-/// first and last one since they may be broken apart by the start or end of the
-/// line, respectively. But even those can partially reuse previous results when
-/// the break index is safe-to-break per rustybuzz.
-struct Line<'a> {
- /// Bidi information about the paragraph.
- bidi: &'a BidiInfo<'a>,
- /// The trimmed range the line spans in the paragraph.
- trimmed: Range,
- /// The untrimmed end where the line ends.
- end: usize,
- /// A reshaped text item if the line sliced up a text item at the start.
- first: Option<Item<'a>>,
- /// Inner items which don't need to be reprocessed.
- inner: &'a [Item<'a>],
- /// A reshaped text item if the line sliced up a text item at the end. If
- /// there is only one text item, this takes precedence over `first`.
- last: Option<Item<'a>>,
- /// The width of the line.
- width: Abs,
- /// Whether the line should be justified.
- justify: bool,
- /// Whether the line ends with a hyphen or dash, either naturally or through
- /// hyphenation.
- dash: Option<Dash>,
-}
-
-impl<'a> Line<'a> {
- /// Iterate over the line's items.
- fn items(&self) -> impl Iterator<Item = &Item<'a>> {
- self.first.iter().chain(self.inner).chain(&self.last)
- }
-
- /// Return items that intersect the given `text_range`.
- fn slice(&self, text_range: Range) -> impl Iterator<Item = &Item<'a>> {
- let mut cursor = self.trimmed.start;
- let mut start = 0;
- let mut end = 0;
-
- for (i, item) in self.items().enumerate() {
- if cursor <= text_range.start {
- start = i;
- }
-
- let len = item.textual_len();
- if cursor < text_range.end || cursor + len <= text_range.end {
- end = i + 1;
- } else {
- break;
- }
-
- cursor += len;
- }
-
- self.items().skip(start).take(end - start)
- }
-
- /// How many glyphs are in the text where we can insert additional
- /// space when encountering underfull lines.
- fn justifiables(&self) -> usize {
- let mut count = 0;
- for shaped in self.items().filter_map(Item::text) {
- count += shaped.justifiables();
- }
- // CJK character at line end should not be adjusted.
- if self
- .items()
- .last()
- .and_then(Item::text)
- .map(|s| s.cjk_justifiable_at_last())
- .unwrap_or(false)
- {
- count -= 1;
- }
-
- count
- }
-
- /// How much can the line stretch
- fn stretchability(&self) -> Abs {
- self.items().filter_map(Item::text).map(|s| s.stretchability()).sum()
- }
-
- /// How much can the line shrink
- fn shrinkability(&self) -> Abs {
- self.items().filter_map(Item::text).map(|s| s.shrinkability()).sum()
- }
-
- /// The sum of fractions in the line.
- fn fr(&self) -> Fr {
- self.items()
- .filter_map(|item| match item {
- Item::Fractional(fr, _) => Some(*fr),
- _ => None,
- })
- .sum()
- }
-}
-
-/// Collect all text of the paragraph into one string and layout equations. This
-/// also performs string-level preprocessing like case transformations.
-fn collect<'a>(
- children: &'a StyleVec,
- engine: &mut Engine<'_>,
- locator: Locator<'a>,
- styles: &'a StyleChain<'a>,
- region: Size,
- consecutive: bool,
-) -> SourceResult<(String, Vec<Segment<'a>>, SpanMapper)> {
- let mut collector = Collector::new(2 + children.len());
- let mut iter = children.chain(styles).peekable();
- let mut locator = locator.split();
-
- let first_line_indent = ParElem::first_line_indent_in(*styles);
- if !first_line_indent.is_zero()
- && consecutive
- && AlignElem::alignment_in(*styles).resolve(*styles).x
- == TextElem::dir_in(*styles).start().into()
- {
- collector.push_item(Item::Absolute(first_line_indent.resolve(*styles), false));
- collector.spans.push(1, Span::detached());
- }
-
- let hang = ParElem::hanging_indent_in(*styles);
- if !hang.is_zero() {
- collector.push_item(Item::Absolute(-hang, false));
- collector.spans.push(1, Span::detached());
- }
-
- let outer_dir = TextElem::dir_in(*styles);
-
- while let Some((child, styles)) = iter.next() {
- let prev_len = collector.full.len();
-
- if child.is::<SpaceElem>() {
- collector.push_text(" ", styles);
- } else if let Some(elem) = child.to_packed::<TextElem>() {
- collector.build_text(styles, |full| {
- let dir = TextElem::dir_in(styles);
- if dir != outer_dir {
- // Insert "Explicit Directional Embedding".
- match dir {
- Dir::LTR => full.push_str(LTR_EMBEDDING),
- Dir::RTL => full.push_str(RTL_EMBEDDING),
- _ => {}
- }
- }
-
- if let Some(case) = TextElem::case_in(styles) {
- full.push_str(&case.apply(elem.text()));
- } else {
- full.push_str(elem.text());
- }
-
- if dir != outer_dir {
- // Insert "Pop Directional Formatting".
- full.push_str(POP_EMBEDDING);
- }
- });
- } else if let Some(elem) = child.to_packed::<HElem>() {
- let amount = elem.amount();
- if amount.is_zero() {
- continue;
- }
-
- collector.push_item(match amount {
- Spacing::Fr(fr) => Item::Fractional(*fr, None),
- Spacing::Rel(rel) => Item::Absolute(
- rel.resolve(styles).relative_to(region.x),
- elem.weak(styles),
- ),
- });
- } else if let Some(elem) = child.to_packed::<LinebreakElem>() {
- collector
- .push_text(if elem.justify(styles) { "\u{2028}" } else { "\n" }, styles);
- } else if let Some(elem) = child.to_packed::<SmartQuoteElem>() {
- let double = elem.double(styles);
- if elem.enabled(styles) {
- let quotes = SmartQuotes::new(
- elem.quotes(styles),
- TextElem::lang_in(styles),
- TextElem::region_in(styles),
- elem.alternative(styles),
- );
- let peeked = iter.peek().and_then(|(child, _)| {
- if let Some(elem) = child.to_packed::<TextElem>() {
- elem.text().chars().next()
- } else if child.is::<SmartQuoteElem>() {
- Some('"')
- } else if child.is::<SpaceElem>()
- || child.is::<HElem>()
- || child.is::<LinebreakElem>()
- // This is a temporary hack. We should rather skip these
- // and peek at the next child.
- || child.is::<TagElem>()
- {
- Some(SPACING_REPLACE_CHAR)
- } else {
- Some(OBJ_REPLACE_CHAR)
- }
- });
-
- let quote = collector.quoter.quote(&quotes, double, peeked);
- collector.push_quote(quote, styles);
- } else {
- collector.push_text(if double { "\"" } else { "'" }, styles);
- }
- } else if let Some(elem) = child.to_packed::<InlineElem>() {
- collector.push_item(Item::Skip(LTR_ISOLATE));
-
- for item in elem.layout(engine, locator.next(&elem.span()), styles, region)? {
- match item {
- InlineItem::Space(space, weak) => {
- collector.push_item(Item::Absolute(space, weak));
- }
- InlineItem::Frame(frame) => {
- collector.push_item(Item::Frame(frame, styles));
- }
- }
- }
-
- collector.push_item(Item::Skip(POP_ISOLATE));
- } else if let Some(elem) = child.to_packed::<BoxElem>() {
- let loc = locator.next(&elem.span());
- if let Sizing::Fr(v) = elem.width(styles) {
- collector.push_item(Item::Fractional(v, Some((elem, loc, styles))));
- } else {
- let frame = elem.layout(engine, loc, styles, region)?;
- collector.push_item(Item::Frame(frame, styles));
- }
- } else if let Some(elem) = child.to_packed::<TagElem>() {
- collector.push_item(Item::Tag(&elem.tag));
- } else {
- bail!(child.span(), "unexpected paragraph child");
- };
-
- let len = collector.full.len() - prev_len;
- collector.spans.push(len, child.span());
- }
-
- Ok((collector.full, collector.segments, collector.spans))
-}
-
-/// Collects segments.
-struct Collector<'a> {
- full: String,
- segments: Vec<Segment<'a>>,
- spans: SpanMapper,
- quoter: SmartQuoter,
-}
-
-impl<'a> Collector<'a> {
- fn new(capacity: usize) -> Self {
- Self {
- full: String::new(),
- segments: Vec::with_capacity(capacity),
- spans: SpanMapper::new(),
- quoter: SmartQuoter::new(),
- }
- }
-
- fn push_text(&mut self, text: &str, styles: StyleChain<'a>) {
- self.full.push_str(text);
- self.push_segment(Segment::Text(text.len(), styles), false);
- }
-
- fn build_text<F>(&mut self, styles: StyleChain<'a>, f: F)
- where
- F: FnOnce(&mut String),
- {
- let prev = self.full.len();
- f(&mut self.full);
- let len = self.full.len() - prev;
- self.push_segment(Segment::Text(len, styles), false);
- }
-
- fn push_quote(&mut self, quote: &str, styles: StyleChain<'a>) {
- self.full.push_str(quote);
- self.push_segment(Segment::Text(quote.len(), styles), true);
- }
-
- fn push_item(&mut self, item: Item<'a>) {
- self.full.push_str(item.textual());
- self.push_segment(Segment::Item(item), false);
- }
-
- fn push_segment(&mut self, segment: Segment<'a>, is_quote: bool) {
- if let Some(last) = self.full.chars().last() {
- self.quoter.last(last, is_quote);
- }
-
- if let (Some(Segment::Text(last_len, last_styles)), Segment::Text(len, styles)) =
- (self.segments.last_mut(), &segment)
- {
- if *last_styles == *styles {
- *last_len += *len;
- return;
- }
- }
-
- self.segments.push(segment);
- }
-}
-
-/// Prepare paragraph layout by shaping the whole paragraph.
-fn prepare<'a>(
- engine: &mut Engine,
- children: &'a StyleVec,
- text: &'a str,
- segments: Vec<Segment<'a>>,
- spans: SpanMapper,
- styles: StyleChain<'a>,
-) -> SourceResult<Preparation<'a>> {
- let bidi = BidiInfo::new(
- text,
- match TextElem::dir_in(styles) {
- Dir::LTR => Some(BidiLevel::ltr()),
- Dir::RTL => Some(BidiLevel::rtl()),
- _ => None,
- },
- );
-
- let mut cursor = 0;
- let mut items = Vec::with_capacity(segments.len());
-
- // Shape the text to finalize the items.
- for segment in segments {
- let end = cursor + segment.textual_len();
- match segment {
- Segment::Text(_, styles) => {
- shape_range(&mut items, engine, &bidi, cursor..end, &spans, styles);
- }
- Segment::Item(item) => items.push(item),
- }
-
- cursor = end;
- }
-
- let cjk_latin_spacing = TextElem::cjk_latin_spacing_in(styles).is_auto();
- if cjk_latin_spacing {
- add_cjk_latin_spacing(&mut items);
- }
-
- Ok(Preparation {
- bidi,
- items,
- spans,
- hyphenate: children.shared_get(styles, TextElem::hyphenate_in),
- costs: TextElem::costs_in(styles),
- lang: children.shared_get(styles, TextElem::lang_in),
- align: AlignElem::alignment_in(styles).resolve(styles).x,
- justify: ParElem::justify_in(styles),
- hang: ParElem::hanging_indent_in(styles),
- cjk_latin_spacing,
- fallback: TextElem::fallback_in(styles),
- leading: ParElem::leading_in(styles),
- linebreaks: ParElem::linebreaks_in(styles),
- size: TextElem::size_in(styles),
- })
-}
-
-/// Add some spacing between Han characters and western characters.
-/// See Requirements for Chinese Text Layout, Section 3.2.2 Mixed Text Composition in Horizontal
-/// Written Mode
-fn add_cjk_latin_spacing(items: &mut [Item]) {
- let mut items = items.iter_mut().filter(|x| !matches!(x, Item::Tag(_))).peekable();
- let mut prev: Option<&ShapedGlyph> = None;
- while let Some(item) = items.next() {
- let Some(text) = item.text_mut() else {
- prev = None;
- continue;
- };
-
- // Since we only call this function in [`prepare`], we can assume
- // that the Cow is owned, and `to_mut` can be called without overhead.
- debug_assert!(matches!(text.glyphs, std::borrow::Cow::Owned(_)));
- let mut glyphs = text.glyphs.to_mut().iter_mut().peekable();
-
- while let Some(glyph) = glyphs.next() {
- let next = glyphs.peek().map(|n| n as _).or_else(|| {
- items
- .peek()
- .and_then(|i| i.text())
- .and_then(|shaped| shaped.glyphs.first())
- });
-
- // Case 1: CJ followed by a Latin character
- if glyph.is_cj_script() && next.is_some_and(|g| g.is_letter_or_number()) {
- // The spacing is default to 1/4 em, and can be shrunk to 1/8 em.
- glyph.x_advance += Em::new(0.25);
- glyph.adjustability.shrinkability.1 += Em::new(0.125);
- text.width += Em::new(0.25).at(text.size);
- }
-
- // Case 2: Latin followed by a CJ character
- if glyph.is_cj_script() && prev.is_some_and(|g| g.is_letter_or_number()) {
- glyph.x_advance += Em::new(0.25);
- glyph.x_offset += Em::new(0.25);
- glyph.adjustability.shrinkability.0 += Em::new(0.125);
- text.width += Em::new(0.25).at(text.size);
- }
-
- prev = Some(glyph);
- }
- }
-}
-
-/// Group a range of text by BiDi level and script, shape the runs and generate
-/// items for them.
-fn shape_range<'a>(
- items: &mut Vec<Item<'a>>,
- engine: &Engine,
- bidi: &BidiInfo<'a>,
- range: Range,
- spans: &SpanMapper,
- styles: StyleChain<'a>,
-) {
- let script = TextElem::script_in(styles);
- let lang = TextElem::lang_in(styles);
- let region = TextElem::region_in(styles);
- let mut process = |range: Range, level: BidiLevel| {
- let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL };
- let shaped = shape(
- engine,
- range.start,
- &bidi.text[range],
- spans,
- styles,
- dir,
- lang,
- region,
- );
- items.push(Item::Text(shaped));
- };
-
- let mut prev_level = BidiLevel::ltr();
- let mut prev_script = Script::Unknown;
- let mut cursor = range.start;
-
- // Group by embedding level and script. If the text's script is explicitly
- // set (rather than inferred from the glyphs), we keep the script at an
- // unchanging `Script::Unknown` so that only level changes cause breaks.
- for i in range.clone() {
- if !bidi.text.is_char_boundary(i) {
- continue;
- }
-
- let level = bidi.levels[i];
- let curr_script = match script {
- Smart::Auto => {
- bidi.text[i..].chars().next().map_or(Script::Unknown, |c| c.script())
- }
- Smart::Custom(_) => Script::Unknown,
- };
-
- if level != prev_level || !is_compatible(curr_script, prev_script) {
- if cursor < i {
- process(cursor..i, prev_level);
- }
- cursor = i;
- prev_level = level;
- prev_script = curr_script;
- } else if is_generic_script(prev_script) {
- prev_script = curr_script;
- }
- }
-
- process(cursor..range.end, prev_level);
-}
-
-/// Whether this is not a specific script.
-fn is_generic_script(script: Script) -> bool {
- matches!(script, Script::Unknown | Script::Common | Script::Inherited)
-}
-
-/// Whether these script can be part of the same shape run.
-fn is_compatible(a: Script, b: Script) -> bool {
- is_generic_script(a) || is_generic_script(b) || a == b
-}
-
-/// Find suitable linebreaks.
-fn linebreak<'a>(engine: &Engine, p: &'a Preparation<'a>, width: Abs) -> Vec<Line<'a>> {
- let linebreaks = p.linebreaks.unwrap_or_else(|| {
- if p.justify {
- Linebreaks::Optimized
- } else {
- Linebreaks::Simple
- }
- });
-
- match linebreaks {
- Linebreaks::Simple => linebreak_simple(engine, p, width),
- Linebreaks::Optimized => linebreak_optimized(engine, p, width),
- }
-}
-
-/// Perform line breaking in simple first-fit style. This means that we build
-/// lines greedily, always taking the longest possible line. This may lead to
-/// very unbalanced line, but is fast and simple.
-fn linebreak_simple<'a>(
- engine: &Engine,
- p: &'a Preparation<'a>,
- width: Abs,
-) -> Vec<Line<'a>> {
- let mut lines = Vec::with_capacity(16);
- let mut start = 0;
- let mut last = None;
-
- breakpoints(p, |end, breakpoint| {
- let prepend_hyphen = lines.last().map(should_repeat_hyphen).unwrap_or(false);
-
- // Compute the line and its size.
- let mut attempt = line(engine, p, start..end, breakpoint, prepend_hyphen);
-
- // If the line doesn't fit anymore, we push the last fitting attempt
- // into the stack and rebuild the line from the attempt's end. The
- // resulting line cannot be broken up further.
- if !width.fits(attempt.width) {
- if let Some((last_attempt, last_end)) = last.take() {
- lines.push(last_attempt);
- start = last_end;
- attempt = line(engine, p, start..end, breakpoint, prepend_hyphen);
- }
- }
-
- // Finish the current line if there is a mandatory line break (i.e.
- // due to "\n") or if the line doesn't fit horizontally already
- // since then no shorter line will be possible.
- if breakpoint == Breakpoint::Mandatory || !width.fits(attempt.width) {
- lines.push(attempt);
- start = end;
- last = None;
- } else {
- last = Some((attempt, end));
- }
- });
-
- if let Some((line, _)) = last {
- lines.push(line);
- }
-
- lines
-}
-
-/// Perform line breaking in optimized Knuth-Plass style. Here, we use more
-/// context to determine the line breaks than in the simple first-fit style. For
-/// example, we might choose to cut a line short even though there is still a
-/// bit of space to improve the fit of one of the following lines. The
-/// Knuth-Plass algorithm is based on the idea of "cost". A line which has a
-/// very tight or very loose fit has a higher cost than one that is just right.
-/// Ending a line with a hyphen incurs extra cost and endings two successive
-/// lines with hyphens even more.
-///
-/// To find the layout with the minimal total cost the algorithm uses dynamic
-/// programming: For each possible breakpoint it determines the optimal
-/// paragraph layout _up to that point_. It walks over all possible start points
-/// for a line ending at that point and finds the one for which the cost of the
-/// line plus the cost of the optimal paragraph up to the start point (already
-/// computed and stored in dynamic programming table) is minimal. The final
-/// result is simply the layout determined for the last breakpoint at the end of
-/// text.
-fn linebreak_optimized<'a>(
- engine: &Engine,
- p: &'a Preparation<'a>,
- width: Abs,
-) -> Vec<Line<'a>> {
- /// The cost of a line or paragraph layout.
- type Cost = f64;
-
- /// An entry in the dynamic programming table.
- struct Entry<'a> {
- pred: usize,
- total: Cost,
- line: Line<'a>,
- }
-
- // Cost parameters.
- const DEFAULT_HYPH_COST: Cost = 0.5;
- const DEFAULT_RUNT_COST: Cost = 0.5;
- const CONSECUTIVE_DASH_COST: Cost = 0.3;
- const MAX_COST: Cost = 1_000_000.0;
- const MIN_RATIO: f64 = -1.0;
-
- let hyph_cost = DEFAULT_HYPH_COST * p.costs.hyphenation().get();
- let runt_cost = DEFAULT_RUNT_COST * p.costs.runt().get();
-
- // Dynamic programming table.
- let mut active = 0;
- let mut table = vec![Entry {
- pred: 0,
- total: 0.0,
- line: line(engine, p, 0..0, Breakpoint::Mandatory, false),
- }];
-
- let em = p.size;
- let mut lines = Vec::with_capacity(16);
- breakpoints(p, |end, breakpoint| {
- let k = table.len();
- let is_end = end == p.bidi.text.len();
- let mut best: Option<Entry> = None;
-
- // Find the optimal predecessor.
- for (i, pred) in table.iter().enumerate().skip(active) {
- // Layout the line.
- let start = pred.line.end;
- let prepend_hyphen = should_repeat_hyphen(&pred.line);
-
- let attempt = line(engine, p, start..end, breakpoint, prepend_hyphen);
-
- // Determine how much the line's spaces would need to be stretched
- // to make it the desired width.
- let delta = width - attempt.width;
- // Determine how much stretch are permitted.
- let adjust = if delta >= Abs::zero() {
- attempt.stretchability()
- } else {
- attempt.shrinkability()
- };
- // Ideally, the ratio should between -1.0 and 1.0, but sometimes a value above 1.0
- // is possible, in which case the line is underfull.
- let mut ratio = delta / adjust;
- if ratio.is_nan() {
- // The line is not stretchable, but it just fits.
- // This often happens with monospace fonts and CJK texts.
- ratio = 0.0;
- }
- if ratio > 1.0 {
- // We should stretch the line above its stretchability. Now
- // calculate the extra amount. Also, don't divide by zero.
- let extra_stretch =
- (delta - adjust) / attempt.justifiables().max(1) as f64;
- // Normalize the amount by half Em size.
- ratio = 1.0 + extra_stretch / (em / 2.0);
- }
-
- // Determine the cost of the line.
- let min_ratio = if p.justify { MIN_RATIO } else { 0.0 };
- let mut cost = if ratio < min_ratio {
- // The line is overfull. This is the case if
- // - justification is on, but we'd need to shrink too much
- // - justification is off and the line just doesn't fit
- //
- // If this is the earliest breakpoint in the active set
- // (active == i), remove it from the active set. If there is an
- // earlier one (active < i), then the logically shorter line was
- // in fact longer (can happen with negative spacing) and we
- // can't trim the active set just yet.
- if active == i {
- active += 1;
- }
- MAX_COST
- } else if breakpoint == Breakpoint::Mandatory || is_end {
- // This is a mandatory break and the line is not overfull, so
- // all breakpoints before this one become inactive since no line
- // can span above the mandatory break.
- active = k;
- // If ratio > 0, we need to stretch the line only when justify is needed.
- // If ratio < 0, we always need to shrink the line.
- if (ratio > 0.0 && attempt.justify) || ratio < 0.0 {
- ratio.powi(3).abs()
- } else {
- 0.0
- }
- } else {
- // Normal line with cost of |ratio^3|.
- ratio.powi(3).abs()
- };
-
- // Penalize runts.
- if k == i + 1 && is_end {
- cost += runt_cost;
- }
-
- // Penalize hyphens.
- if breakpoint == Breakpoint::Hyphen {
- cost += hyph_cost;
- }
-
- // In Knuth paper, cost = (1 + 100|r|^3 + p)^2 + a,
- // where r is the ratio, p=50 is the penalty, and a=3000 is consecutive the penalty.
- // We divide the whole formula by 10, resulting (0.01 + |r|^3 + p)^2 + a,
- // where p=0.5 and a=0.3
- cost = (0.01 + cost).powi(2);
-
- // Penalize two consecutive dashes (not necessarily hyphens) extra.
- if attempt.dash.is_some() && pred.line.dash.is_some() {
- cost += CONSECUTIVE_DASH_COST;
- }
-
- // The total cost of this line and its chain of predecessors.
- let total = pred.total + cost;
-
- // If this attempt is better than what we had before, take it!
- if best.as_ref().map_or(true, |best| best.total >= total) {
- best = Some(Entry { pred: i, total, line: attempt });
- }
- }
-
- table.push(best.unwrap());
- });
-
- // Retrace the best path.
- let mut idx = table.len() - 1;
- while idx != 0 {
- table.truncate(idx + 1);
- let entry = table.pop().unwrap();
- lines.push(entry.line);
- idx = entry.pred;
- }
-
- lines.reverse();
- lines
-}
-
-/// Create a line which spans the given range.
-fn line<'a>(
- engine: &Engine,
- p: &'a Preparation,
- mut range: Range,
- breakpoint: Breakpoint,
- prepend_hyphen: bool,
-) -> Line<'a> {
- let end = range.end;
- let mut justify =
- p.justify && end < p.bidi.text.len() && breakpoint != Breakpoint::Mandatory;
-
- if range.is_empty() {
- return Line {
- bidi: &p.bidi,
- end,
- trimmed: range,
- first: None,
- inner: &[],
- last: None,
- width: Abs::zero(),
- justify,
- dash: None,
- };
- }
-
- // Slice out the relevant items.
- let (mut expanded, mut inner) = p.slice(range.clone());
- let mut width = Abs::zero();
-
- // Weak space (Absolute(_, weak=true)) would be removed if at the end of the line
- while let Some((Item::Absolute(_, true), before)) = inner.split_last() {
- // apply it recursively to ensure the last one is not weak space
- inner = before;
- range.end -= 1;
- expanded.end -= 1;
- }
- // Weak space (Absolute(_, weak=true)) would be removed if at the beginning of the line
- while let Some((Item::Absolute(_, true), after)) = inner.split_first() {
- // apply it recursively to ensure the first one is not weak space
- inner = after;
- range.start += 1;
- expanded.end += 1;
- }
-
- // Reshape the last item if it's split in half or hyphenated.
- let mut last = None;
- let mut dash = None;
- if let Some((Item::Text(shaped), before)) = inner.split_last() {
- // Compute the range we want to shape, trimming whitespace at the
- // end of the line.
- let base = expanded.end - shaped.text.len();
- let start = range.start.max(base);
- let text = &p.bidi.text[start..range.end];
- // U+200B ZERO WIDTH SPACE is used to provide a line break opportunity,
- // we want to trim it too.
- let trimmed = text.trim_end().trim_end_matches('\u{200B}');
- range.end = start + trimmed.len();
-
- // Deal with hyphens, dashes and justification.
- let shy = trimmed.ends_with('\u{ad}');
- let hyphen = breakpoint == Breakpoint::Hyphen;
- dash = if hyphen || shy {
- Some(Dash::SoftHyphen)
- } else if trimmed.ends_with('-') {
- Some(Dash::HardHyphen)
- } else if trimmed.ends_with('–') {
- Some(Dash::Short)
- } else if trimmed.ends_with('—') {
- Some(Dash::Long)
- } else {
- None
- };
- justify |= text.ends_with('\u{2028}');
-
- // Deal with CJK punctuation at line ends.
- let gb_style = cjk_punct_style(shaped.lang, shaped.region);
- let maybe_adjust_last_glyph = trimmed.ends_with(END_PUNCT_PAT)
- || (p.cjk_latin_spacing && trimmed.ends_with(is_of_cj_script));
-
- // Usually, we don't want to shape an empty string because:
- // - We don't want the height of trimmed whitespace in a different
- // font to be considered for the line height.
- // - Even if it's in the same font, its unnecessary.
- //
- // There is one exception though. When the whole line is empty, we
- // need the shaped empty string to make the line the appropriate
- // height. That is the case exactly if the string is empty and there
- // are no other items in the line.
- if hyphen
- || start + shaped.text.len() > range.end
- || maybe_adjust_last_glyph
- || prepend_hyphen
- {
- if hyphen || start < range.end || before.is_empty() {
- let mut reshaped = shaped.reshape(engine, &p.spans, start..range.end);
- if hyphen || shy {
- reshaped.push_hyphen(engine, p.fallback);
- }
-
- if let Some(last_glyph) = reshaped.glyphs.last() {
- if last_glyph.is_cjk_left_aligned_punctuation(gb_style) {
- // If the last glyph is a CJK punctuation, we want to shrink it.
- // See Requirements for Chinese Text Layout, Section 3.1.6.3
- // Compression of punctuation marks at line start or line end
- let shrink_amount = last_glyph.shrinkability().1;
- let punct = reshaped.glyphs.to_mut().last_mut().unwrap();
- punct.shrink_right(shrink_amount);
- reshaped.width -= shrink_amount.at(reshaped.size);
- } else if p.cjk_latin_spacing
- && last_glyph.is_cj_script()
- && (last_glyph.x_advance - last_glyph.x_offset) > Em::one()
- {
- // If the last glyph is a CJK character adjusted by [`add_cjk_latin_spacing`],
- // restore the original width.
- let shrink_amount =
- last_glyph.x_advance - last_glyph.x_offset - Em::one();
- let glyph = reshaped.glyphs.to_mut().last_mut().unwrap();
- glyph.x_advance -= shrink_amount;
- glyph.adjustability.shrinkability.1 = Em::zero();
- reshaped.width -= shrink_amount.at(reshaped.size);
- }
- }
-
- width += reshaped.width;
- last = Some(Item::Text(reshaped));
- }
-
- inner = before;
- }
- }
-
- // Deal with CJ characters at line starts.
- let text = &p.bidi.text[range.start..end];
- let maybe_adjust_first_glyph = text.starts_with(BEGIN_PUNCT_PAT)
- || (p.cjk_latin_spacing && text.starts_with(is_of_cj_script));
-
- // Reshape the start item if it's split in half.
- let mut first = None;
- if let Some((Item::Text(shaped), after)) = inner.split_first() {
- // Compute the range we want to shape.
- let base = expanded.start;
- let end = range.end.min(base + shaped.text.len());
-
- // Reshape if necessary.
- if range.start + shaped.text.len() > end
- || maybe_adjust_first_glyph
- || prepend_hyphen
- {
- // If the range is empty, we don't want to push an empty text item.
- if range.start < end {
- let reshaped = shaped.reshape(engine, &p.spans, range.start..end);
- width += reshaped.width;
- first = Some(Item::Text(reshaped));
- }
-
- inner = after;
- }
- }
-
- if prepend_hyphen {
- let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut);
- if let Some(reshaped) = reshaped {
- let width_before = reshaped.width;
- reshaped.prepend_hyphen(engine, p.fallback);
- width += reshaped.width - width_before;
- }
- }
-
- if maybe_adjust_first_glyph {
- let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut);
- if let Some(reshaped) = reshaped {
- if let Some(first_glyph) = reshaped.glyphs.first() {
- if first_glyph.is_cjk_right_aligned_punctuation() {
- // If the first glyph is a CJK punctuation, we want to shrink it.
- let shrink_amount = first_glyph.shrinkability().0;
- let glyph = reshaped.glyphs.to_mut().first_mut().unwrap();
- glyph.shrink_left(shrink_amount);
- let amount_abs = shrink_amount.at(reshaped.size);
- reshaped.width -= amount_abs;
- width -= amount_abs;
- } else if p.cjk_latin_spacing
- && first_glyph.is_cj_script()
- && first_glyph.x_offset > Em::zero()
- {
- // If the first glyph is a CJK character adjusted by [`add_cjk_latin_spacing`],
- // restore the original width.
- let shrink_amount = first_glyph.x_offset;
- let glyph = reshaped.glyphs.to_mut().first_mut().unwrap();
- glyph.x_advance -= shrink_amount;
- glyph.x_offset = Em::zero();
- glyph.adjustability.shrinkability.0 = Em::zero();
- let amount_abs = shrink_amount.at(reshaped.size);
- reshaped.width -= amount_abs;
- width -= amount_abs;
- }
- }
- }
- }
-
- // Measure the inner items.
- for item in inner {
- width += item.width();
- }
-
- Line {
- bidi: &p.bidi,
- trimmed: range,
- end,
- first,
- inner,
- last,
- width,
- justify,
- dash,
- }
-}
-
-/// Combine layouted lines into one frame per region.
-fn finalize(
- engine: &mut Engine,
- p: &Preparation,
- lines: &[Line],
- region: Size,
- expand: bool,
- shrink: bool,
-) -> SourceResult<Fragment> {
- // Determine the paragraph's width: Full width of the region if we
- // should expand or there's fractional spacing, fit-to-width otherwise.
- let width = if !region.x.is_finite()
- || (!expand && lines.iter().all(|line| line.fr().is_zero()))
- {
- region
- .x
- .min(p.hang + lines.iter().map(|line| line.width).max().unwrap_or_default())
- } else {
- region.x
- };
-
- // Stack the lines into one frame per region.
- let mut frames: Vec<Frame> = lines
- .iter()
- .map(|line| commit(engine, p, line, width, region.y, shrink))
- .collect::<SourceResult<_>>()?;
-
- // Positive ratios enable prevention, while zero and negative ratios disable it.
- if p.costs.orphan().get() > 0.0 {
- // Prevent orphans.
- if frames.len() >= 2 && !frames[1].is_empty() {
- let second = frames.remove(1);
- let first = &mut frames[0];
- merge(first, second, p.leading);
- }
- }
- if p.costs.widow().get() > 0.0 {
- // Prevent widows.
- let len = frames.len();
- if len >= 2 && !frames[len - 2].is_empty() {
- let second = frames.pop().unwrap();
- let first = frames.last_mut().unwrap();
- merge(first, second, p.leading);
- }
- }
-
- Ok(Fragment::frames(frames))
-}
-
-/// Merge two line frames
-fn merge(first: &mut Frame, second: Frame, leading: Abs) {
- let offset = first.height() + leading;
- let total = offset + second.height();
- first.push_frame(Point::with_y(offset), second);
- first.size_mut().y = total;
-}
-
-/// Commit to a line and build its frame.
-fn commit(
- engine: &mut Engine,
- p: &Preparation,
- line: &Line,
- width: Abs,
- full: Abs,
- shrink: bool,
-) -> SourceResult<Frame> {
- let mut remaining = width - line.width - p.hang;
- let mut offset = Abs::zero();
-
- // Reorder the line from logical to visual order.
- let (reordered, starts_rtl) = reorder(line);
- if !starts_rtl {
- offset += p.hang;
- }
-
- // Handle hanging punctuation to the left.
- if let Some(Item::Text(text)) = reordered.first() {
- if let Some(glyph) = text.glyphs.first() {
- if !text.dir.is_positive()
- && TextElem::overhang_in(text.styles)
- && (reordered.len() > 1 || text.glyphs.len() > 1)
- {
- let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
- offset -= amount;
- remaining += amount;
- }
- }
- }
-
- // Handle hanging punctuation to the right.
- if let Some(Item::Text(text)) = reordered.last() {
- if let Some(glyph) = text.glyphs.last() {
- if text.dir.is_positive()
- && TextElem::overhang_in(text.styles)
- && (reordered.len() > 1 || text.glyphs.len() > 1)
- {
- let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
- remaining += amount;
- }
- }
- }
-
- // Determine how much additional space is needed.
- // The justification_ratio is for the first step justification,
- // extra_justification is for the last step.
- // For more info on multi-step justification, see Procedures for Inter-
- // Character Space Expansion in W3C document Chinese Layout Requirements.
- let fr = line.fr();
- let mut justification_ratio = 0.0;
- let mut extra_justification = Abs::zero();
-
- let shrinkability = line.shrinkability();
- let stretch = line.stretchability();
- if remaining < Abs::zero() && shrinkability > Abs::zero() && shrink {
- // Attempt to reduce the length of the line, using shrinkability.
- justification_ratio = (remaining / shrinkability).max(-1.0);
- remaining = (remaining + shrinkability).min(Abs::zero());
- } else if line.justify && fr.is_zero() {
- // Attempt to increase the length of the line, using stretchability.
- if stretch > Abs::zero() {
- justification_ratio = (remaining / stretch).min(1.0);
- remaining = (remaining - stretch).max(Abs::zero());
- }
-
- let justifiables = line.justifiables();
- if justifiables > 0 && remaining > Abs::zero() {
- // Underfull line, distribute the extra space.
- extra_justification = remaining / justifiables as f64;
- remaining = Abs::zero();
- }
- }
-
- let mut top = Abs::zero();
- let mut bottom = Abs::zero();
-
- // Build the frames and determine the height and baseline.
- let mut frames = vec![];
- for item in reordered {
- let mut push = |offset: &mut Abs, frame: Frame| {
- let width = frame.width();
- top.set_max(frame.baseline());
- bottom.set_max(frame.size().y - frame.baseline());
- frames.push((*offset, frame));
- *offset += width;
- };
-
- match item {
- Item::Absolute(v, _) => {
- offset += *v;
- }
- Item::Fractional(v, elem) => {
- let amount = v.share(fr, remaining);
- if let Some((elem, loc, styles)) = elem {
- let region = Size::new(amount, full);
- let mut frame =
- elem.layout(engine, loc.relayout(), *styles, region)?;
- frame.post_process(*styles);
- frame.translate(Point::with_y(TextElem::baseline_in(*styles)));
- push(&mut offset, frame);
- } else {
- offset += amount;
- }
- }
- Item::Text(shaped) => {
- let mut frame =
- shaped.build(engine, justification_ratio, extra_justification);
- frame.post_process(shaped.styles);
- push(&mut offset, frame);
- }
- Item::Frame(frame, styles) => {
- let mut frame = frame.clone();
- frame.post_process(*styles);
- frame.translate(Point::with_y(TextElem::baseline_in(*styles)));
- push(&mut offset, frame);
- }
- Item::Tag(tag) => {
- let mut frame = Frame::soft(Size::zero());
- frame.push(Point::zero(), FrameItem::Tag((*tag).clone()));
- frames.push((offset, frame));
- }
- Item::Skip(_) => {}
- }
- }
-
- // Remaining space is distributed now.
- if !fr.is_zero() {
- remaining = Abs::zero();
- }
-
- let size = Size::new(width, top + bottom);
- let mut output = Frame::soft(size);
- output.set_baseline(top);
-
- // Construct the line's frame.
- for (offset, frame) in frames {
- let x = offset + p.align.position(remaining);
- let y = top - frame.baseline();
- output.push_frame(Point::new(x, y), frame);
- }
-
- Ok(output)
-}
-
-/// Return a line's items in visual order.
-fn reorder<'a>(line: &'a Line<'a>) -> (Vec<&Item<'a>>, bool) {
- let mut reordered = vec![];
-
- // The bidi crate doesn't like empty lines.
- if line.trimmed.is_empty() {
- return (line.slice(line.trimmed.clone()).collect(), false);
- }
-
- // Find the paragraph that contains the line.
- let para = line
- .bidi
- .paragraphs
- .iter()
- .find(|para| para.range.contains(&line.trimmed.start))
- .unwrap();
-
- // Compute the reordered ranges in visual order (left to right).
- let (levels, runs) = line.bidi.visual_runs(para, line.trimmed.clone());
- let starts_rtl = levels.first().is_some_and(|level| level.is_rtl());
-
- // Collect the reordered items.
- for run in runs {
- // Skip reset L1 runs because handling them would require reshaping
- // again in some cases.
- if line.bidi.levels[run.start] != levels[run.start] {
- continue;
- }
-
- let prev = reordered.len();
- reordered.extend(line.slice(run.clone()));
-
- if levels[run.start].is_rtl() {
- reordered[prev..].reverse();
- }
- }
-
- (reordered, starts_rtl)
-}
-
-/// How much a character should hang into the end margin.
-///
-/// For more discussion, see:
-/// <https://recoveringphysicist.com/21/>
-fn overhang(c: char) -> f64 {
- match c {
- // Dashes.
- '–' | '—' => 0.2,
- '-' => 0.55,
-
- // Punctuation.
- '.' | ',' => 0.8,
- ':' | ';' => 0.3,
-
- // Arabic
- '\u{60C}' | '\u{6D4}' => 0.4,
-
- _ => 0.0,
- }
-}
-
-/// Whether the hyphen should repeat at the start of the next line.
-fn should_repeat_hyphen(pred_line: &Line) -> bool {
- // If the predecessor line does not end with a Dash::HardHyphen, we shall
- // not place a hyphen at the start of the next line.
- if pred_line.dash != Some(Dash::HardHyphen) {
- return false;
- }
-
- // If there's a trimmed out space, we needn't repeat the hyphen. That's the
- // case of a text like "...kebab é a -melhor- comida que existe", where the
- // hyphens are a kind of emphasis marker.
- if pred_line.trimmed.end != pred_line.end {
- return false;
- }
-
- // The hyphen should repeat only in the languages that require that feature.
- // For more information see the discussion at https://github.com/typst/typst/issues/3235
- let Some(Item::Text(shape)) = pred_line.last.as_ref() else { return false };
-
- match shape.lang {
- // - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3
- // - Czech: see https://prirucka.ujc.cas.cz/?id=164
- // - Croatian: see http://pravopis.hr/pravilo/spojnica/68/
- // - Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni
- // - Portuguese: see https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf (Base XX)
- // - Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/
- Lang::LOWER_SORBIAN
- | Lang::CZECH
- | Lang::CROATIAN
- | Lang::POLISH
- | Lang::PORTUGUESE
- | Lang::SLOVAK => true,
- // In Spanish the hyphen is required only if the word next to hyphen is
- // not capitalized. Otherwise, the hyphen must not be repeated.
- //
- // See § 4.1.1.1.2.e on the "Ortografía de la lengua española"
- // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea
- Lang::SPANISH => pred_line.bidi.text[pred_line.end..]
- .chars()
- .next()
- .map(|c| !c.is_uppercase())
- .unwrap_or(false),
- _ => false,
- }
-}
diff --git a/crates/typst/src/layout/inline/prepare.rs b/crates/typst/src/layout/inline/prepare.rs
new file mode 100644
index 00000000..90d8d5a4
--- /dev/null
+++ b/crates/typst/src/layout/inline/prepare.rs
@@ -0,0 +1,194 @@
+use unicode_bidi::{BidiInfo, Level as BidiLevel};
+
+use super::*;
+use crate::foundations::{Resolve, Smart};
+use crate::layout::{Abs, AlignElem, Dir, Em, FixedAlignment};
+use crate::model::Linebreaks;
+use crate::text::{Costs, Lang, TextElem};
+
+/// A paragraph representation in which children are already layouted and text
+/// is already preshaped.
+///
+/// In many cases, we can directly reuse these results when constructing a line.
+/// Only when a line break falls onto a text index that is not safe-to-break per
+/// rustybuzz, we have to reshape that portion.
+pub struct Preparation<'a> {
+ /// Bidirectional text embedding levels for the paragraph.
+ pub bidi: BidiInfo<'a>,
+ /// Text runs, spacing and layouted elements.
+ pub items: Vec<Item<'a>>,
+ /// The span mapper.
+ pub spans: SpanMapper,
+ /// Whether to hyphenate if it's the same for all children.
+ pub hyphenate: Option<bool>,
+ /// Costs for various layout decisions.
+ pub costs: Costs,
+ /// The text language if it's the same for all children.
+ pub lang: Option<Lang>,
+ /// The paragraph's resolved horizontal alignment.
+ pub align: FixedAlignment,
+ /// Whether to justify the paragraph.
+ pub justify: bool,
+ /// The paragraph's hanging indent.
+ pub hang: Abs,
+ /// Whether to add spacing between CJK and Latin characters.
+ pub cjk_latin_spacing: bool,
+ /// Whether font fallback is enabled for this paragraph.
+ pub fallback: bool,
+ /// The leading of the paragraph.
+ pub leading: Abs,
+ /// How to determine line breaks.
+ pub linebreaks: Smart<Linebreaks>,
+ /// The text size.
+ pub size: Abs,
+}
+
+impl<'a> Preparation<'a> {
+ /// Find the item that contains the given `text_offset`.
+ pub fn find(&self, text_offset: usize) -> Option<&Item<'a>> {
+ let mut cursor = 0;
+ for item in &self.items {
+ let end = cursor + item.textual_len();
+ if (cursor..end).contains(&text_offset) {
+ return Some(item);
+ }
+ cursor = end;
+ }
+ None
+ }
+
+ /// Return the items that intersect the given `text_range`.
+ ///
+ /// Returns the expanded range around the items and the items.
+ pub fn slice(&self, text_range: Range) -> (Range, &[Item<'a>]) {
+ let mut cursor = 0;
+ let mut start = 0;
+ let mut end = 0;
+ let mut expanded = text_range.clone();
+
+ for (i, item) in self.items.iter().enumerate() {
+ if cursor <= text_range.start {
+ start = i;
+ expanded.start = cursor;
+ }
+
+ let len = item.textual_len();
+ if cursor < text_range.end || cursor + len <= text_range.end {
+ end = i + 1;
+ expanded.end = cursor + len;
+ } else {
+ break;
+ }
+
+ cursor += len;
+ }
+
+ (expanded, &self.items[start..end])
+ }
+}
+
+/// Performs BiDi analysis and then prepares paragraph layout by building a
+/// representation on which we can do line breaking without layouting each and
+/// every line from scratch.
+#[typst_macros::time]
+pub fn prepare<'a>(
+ engine: &mut Engine,
+ children: &'a StyleVec,
+ text: &'a str,
+ segments: Vec<Segment<'a>>,
+ spans: SpanMapper,
+ styles: StyleChain<'a>,
+) -> SourceResult<Preparation<'a>> {
+ let bidi = BidiInfo::new(
+ text,
+ match TextElem::dir_in(styles) {
+ Dir::LTR => Some(BidiLevel::ltr()),
+ Dir::RTL => Some(BidiLevel::rtl()),
+ _ => None,
+ },
+ );
+
+ let mut cursor = 0;
+ let mut items = Vec::with_capacity(segments.len());
+
+ // Shape the text to finalize the items.
+ for segment in segments {
+ let end = cursor + segment.textual_len();
+ match segment {
+ Segment::Text(_, styles) => {
+ shape_range(&mut items, engine, &bidi, cursor..end, &spans, styles);
+ }
+ Segment::Item(item) => items.push(item),
+ }
+
+ cursor = end;
+ }
+
+ let cjk_latin_spacing = TextElem::cjk_latin_spacing_in(styles).is_auto();
+ if cjk_latin_spacing {
+ add_cjk_latin_spacing(&mut items);
+ }
+
+ Ok(Preparation {
+ bidi,
+ items,
+ spans,
+ hyphenate: children.shared_get(styles, TextElem::hyphenate_in),
+ costs: TextElem::costs_in(styles),
+ lang: children.shared_get(styles, TextElem::lang_in),
+ align: AlignElem::alignment_in(styles).resolve(styles).x,
+ justify: ParElem::justify_in(styles),
+ hang: ParElem::hanging_indent_in(styles),
+ cjk_latin_spacing,
+ fallback: TextElem::fallback_in(styles),
+ leading: ParElem::leading_in(styles),
+ linebreaks: ParElem::linebreaks_in(styles),
+ size: TextElem::size_in(styles),
+ })
+}
+
+/// Add some spacing between Han characters and western characters. See
+/// Requirements for Chinese Text Layout, Section 3.2.2 Mixed Text Composition
+/// in Horizontal Written Mode
+fn add_cjk_latin_spacing(items: &mut [Item]) {
+ let mut items = items.iter_mut().filter(|x| !matches!(x, Item::Tag(_))).peekable();
+ let mut prev: Option<&ShapedGlyph> = None;
+ while let Some(item) = items.next() {
+ let Some(text) = item.text_mut() else {
+ prev = None;
+ continue;
+ };
+
+ // Since we only call this function in [`prepare`], we can assume that
+ // the Cow is owned, and `to_mut` can be called without overhead.
+ debug_assert!(matches!(text.glyphs, std::borrow::Cow::Owned(_)));
+ let mut glyphs = text.glyphs.to_mut().iter_mut().peekable();
+
+ while let Some(glyph) = glyphs.next() {
+ let next = glyphs.peek().map(|n| n as _).or_else(|| {
+ items
+ .peek()
+ .and_then(|i| i.text())
+ .and_then(|shaped| shaped.glyphs.first())
+ });
+
+ // Case 1: CJ followed by a Latin character
+ if glyph.is_cj_script() && next.is_some_and(|g| g.is_letter_or_number()) {
+ // The spacing is default to 1/4 em, and can be shrunk to 1/8 em.
+ glyph.x_advance += Em::new(0.25);
+ glyph.adjustability.shrinkability.1 += Em::new(0.125);
+ text.width += Em::new(0.25).at(text.size);
+ }
+
+ // Case 2: Latin followed by a CJ character
+ if glyph.is_cj_script() && prev.is_some_and(|g| g.is_letter_or_number()) {
+ glyph.x_advance += Em::new(0.25);
+ glyph.x_offset += Em::new(0.25);
+ glyph.adjustability.shrinkability.0 += Em::new(0.125);
+ text.width += Em::new(0.25).at(text.size);
+ }
+
+ prev = Some(glyph);
+ }
+ }
+}
diff --git a/crates/typst/src/layout/inline/shaping.rs b/crates/typst/src/layout/inline/shaping.rs
index 15752f1b..44b65391 100644
--- a/crates/typst/src/layout/inline/shaping.rs
+++ b/crates/typst/src/layout/inline/shaping.rs
@@ -1,6 +1,5 @@
use std::borrow::Cow;
use std::fmt::{self, Debug, Formatter};
-use std::ops::Range;
use std::str::FromStr;
use std::sync::Arc;
@@ -8,11 +7,12 @@ use az::SaturatingAs;
use ecow::EcoString;
use rustybuzz::{ShapePlan, UnicodeBuffer};
use ttf_parser::Tag;
+use unicode_bidi::{BidiInfo, Level as BidiLevel};
use unicode_script::{Script, UnicodeScript};
-use super::SpanMapper;
+use super::{Item, Range, SpanMapper};
use crate::engine::Engine;
-use crate::foundations::StyleChain;
+use crate::foundations::{Smart, StyleChain};
use crate::layout::{Abs, Dir, Em, Frame, FrameItem, Point, Size};
use crate::syntax::Span;
use crate::text::{
@@ -27,7 +27,7 @@ use crate::World;
/// This type contains owned or borrowed shaped text runs, which can be
/// measured, used to reshape substrings more quickly and converted into a
/// frame.
-pub(super) struct ShapedText<'a> {
+pub struct ShapedText<'a> {
/// The start of the text in the full paragraph.
pub base: usize,
/// The text that was shaped.
@@ -52,7 +52,7 @@ pub(super) struct ShapedText<'a> {
/// A single glyph resulting from shaping.
#[derive(Debug, Clone)]
-pub(super) struct ShapedGlyph {
+pub struct ShapedGlyph {
/// The font the glyph is contained in.
pub font: Font,
/// The glyph's index in the font.
@@ -69,12 +69,11 @@ pub(super) struct ShapedGlyph {
/// is a sequence of one or multiple glyphs that cannot be separated and
/// must always be treated as a union.
///
- /// The range values of the glyphs in a [`ShapedText`] should not
- /// overlap with each other, and they should be monotonically
- /// increasing (for left-to-right or top-to-bottom text) or
- /// monotonically decreasing (for right-to-left or bottom-to-top
- /// text).
- pub range: Range<usize>,
+ /// The range values of the glyphs in a [`ShapedText`] should not overlap
+ /// with each other, and they should be monotonically increasing (for
+ /// left-to-right or top-to-bottom text) or monotonically decreasing (for
+ /// right-to-left or bottom-to-top text).
+ pub range: Range,
/// Whether splitting the shaping result before this glyph would yield the
/// same results as shaping the parts to both sides of `text_index`
/// separately.
@@ -90,7 +89,7 @@ pub(super) struct ShapedGlyph {
}
#[derive(Debug, Clone, Default)]
-pub(super) struct Adjustability {
+pub struct Adjustability {
/// The left and right strechability
pub stretchability: (Em, Em),
/// The left and right shrinkability
@@ -414,7 +413,7 @@ impl<'a> ShapedText<'a> {
&'a self,
engine: &Engine,
spans: &SpanMapper,
- text_range: Range<usize>,
+ text_range: Range,
) -> ShapedText<'a> {
let text = &self.text[text_range.start - self.base..text_range.end - self.base];
if let Some(glyphs) = self.slice_safe_to_break(text_range.clone()) {
@@ -508,7 +507,7 @@ impl<'a> ShapedText<'a> {
/// Find the subslice of glyphs that represent the given text range if both
/// sides are safe to break.
- fn slice_safe_to_break(&self, text_range: Range<usize>) -> Option<&[ShapedGlyph]> {
+ fn slice_safe_to_break(&self, text_range: Range) -> Option<&[ShapedGlyph]> {
let Range { mut start, mut end } = text_range;
if !self.dir.is_positive() {
std::mem::swap(&mut start, &mut end);
@@ -552,8 +551,9 @@ impl<'a> ShapedText<'a> {
// text_index: ^
// glyphs: 0 . 1
//
- // We will get found = Err(1), because '\n' does not have a glyph.
- // But it's safe to break here. Thus the following condition:
+ // We will get found = Err(1), because '\n' does not have a
+ // glyph. But it's safe to break here. Thus the following
+ // condition:
// - glyphs[0].end == text_index == 3
// - text[3] == '\n'
return (idx > 0
@@ -589,23 +589,82 @@ impl Debug for ShapedText<'_> {
}
}
-/// Holds shaping results and metadata common to all shaped segments.
-struct ShapingContext<'a, 'v> {
- engine: &'a Engine<'v>,
- spans: &'a SpanMapper,
- glyphs: Vec<ShapedGlyph>,
- used: Vec<Font>,
+/// Group a range of text by BiDi level and script, shape the runs and generate
+/// items for them.
+pub fn shape_range<'a>(
+ items: &mut Vec<Item<'a>>,
+ engine: &Engine,
+ bidi: &BidiInfo<'a>,
+ range: Range,
+ spans: &SpanMapper,
styles: StyleChain<'a>,
- size: Abs,
- variant: FontVariant,
- features: Vec<rustybuzz::Feature>,
- fallback: bool,
- dir: Dir,
+) {
+ let script = TextElem::script_in(styles);
+ let lang = TextElem::lang_in(styles);
+ let region = TextElem::region_in(styles);
+ let mut process = |range: Range, level: BidiLevel| {
+ let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL };
+ let shaped = shape(
+ engine,
+ range.start,
+ &bidi.text[range],
+ spans,
+ styles,
+ dir,
+ lang,
+ region,
+ );
+ items.push(Item::Text(shaped));
+ };
+
+ let mut prev_level = BidiLevel::ltr();
+ let mut prev_script = Script::Unknown;
+ let mut cursor = range.start;
+
+ // Group by embedding level and script. If the text's script is explicitly
+ // set (rather than inferred from the glyphs), we keep the script at an
+ // unchanging `Script::Unknown` so that only level changes cause breaks.
+ for i in range.clone() {
+ if !bidi.text.is_char_boundary(i) {
+ continue;
+ }
+
+ let level = bidi.levels[i];
+ let curr_script = match script {
+ Smart::Auto => {
+ bidi.text[i..].chars().next().map_or(Script::Unknown, |c| c.script())
+ }
+ Smart::Custom(_) => Script::Unknown,
+ };
+
+ if level != prev_level || !is_compatible(curr_script, prev_script) {
+ if cursor < i {
+ process(cursor..i, prev_level);
+ }
+ cursor = i;
+ prev_level = level;
+ prev_script = curr_script;
+ } else if is_generic_script(prev_script) {
+ prev_script = curr_script;
+ }
+ }
+
+ process(cursor..range.end, prev_level);
+}
+
+/// Whether this is not a specific script.
+fn is_generic_script(script: Script) -> bool {
+ matches!(script, Script::Unknown | Script::Common | Script::Inherited)
+}
+
+/// Whether these script can be part of the same shape run.
+fn is_compatible(a: Script, b: Script) -> bool {
+ is_generic_script(a) || is_generic_script(b) || a == b
}
/// Shape text into [`ShapedText`].
#[allow(clippy::too_many_arguments)]
-pub(super) fn shape<'a>(
+fn shape<'a>(
engine: &Engine,
base: usize,
text: &'a str,
@@ -655,6 +714,20 @@ pub(super) fn shape<'a>(
}
}
+/// Holds shaping results and metadata common to all shaped segments.
+struct ShapingContext<'a, 'v> {
+ engine: &'a Engine<'v>,
+ spans: &'a SpanMapper,
+ glyphs: Vec<ShapedGlyph>,
+ used: Vec<Font>,
+ styles: StyleChain<'a>,
+ size: Abs,
+ variant: FontVariant,
+ features: Vec<rustybuzz::Feature>,
+ fallback: bool,
+ dir: Dir,
+}
+
/// Shape text with font fallback using the `families` iterator.
fn shape_segment<'a>(
ctx: &mut ShapingContext,
@@ -712,8 +785,7 @@ fn shape_segment<'a>(
buffer.guess_segment_properties();
// Prepare the shape plan. This plan depends on direction, script, language,
- // and features, but is independent from the text and can thus be
- // memoized.
+ // and features, but is independent from the text and can thus be memoized.
let plan = create_shape_plan(
&font,
buffer.direction(),
@@ -908,7 +980,7 @@ fn calculate_adjustability(ctx: &mut ShapingContext, lang: Lang, region: Option<
continue;
}
- // Now we apply consecutive punctuation adjustment, specified in Chinese Layout
+ // Now we apply consecutive punctuation adjustment, specified in Chinese Layout.
// Requirements, section 3.1.6.1 Punctuation Adjustment Space, and Japanese Layout
// Requirements, section 3.1 Line Composition Rules for Punctuation Marks
let Some(next) = glyphs.peek_mut() else { continue };
@@ -945,7 +1017,7 @@ fn language(styles: StyleChain) -> rustybuzz::Language {
/// Returns true if all glyphs in `glyphs` have ranges within the range `range`.
#[cfg(debug_assertions)]
-fn assert_all_glyphs_in_range(glyphs: &[ShapedGlyph], text: &str, range: Range<usize>) {
+fn assert_all_glyphs_in_range(glyphs: &[ShapedGlyph], text: &str, range: Range) {
if glyphs
.iter()
.any(|g| g.range.start < range.start || g.range.end > range.end)
@@ -954,9 +1026,11 @@ fn assert_all_glyphs_in_range(glyphs: &[ShapedGlyph], text: &str, range: Range<u
}
}
-/// Asserts that the ranges of `glyphs` is in the proper order according to `dir`.
+/// Asserts that the ranges of `glyphs` is in the proper order according to
+/// `dir`.
///
-/// This asserts instead of returning a bool in order to provide a more informative message when the invariant is violated.
+/// This asserts instead of returning a bool in order to provide a more
+/// informative message when the invariant is violated.
#[cfg(debug_assertions)]
fn assert_glyph_ranges_in_order(glyphs: &[ShapedGlyph], dir: Dir) {
if glyphs.is_empty() {
@@ -981,15 +1055,15 @@ fn assert_glyph_ranges_in_order(glyphs: &[ShapedGlyph], dir: Dir) {
}
// The CJK punctuation that can appear at the beginning or end of a line.
-pub(super) const BEGIN_PUNCT_PAT: &[char] =
+pub const BEGIN_PUNCT_PAT: &[char] =
&['“', '‘', '《', '〈', '(', '『', '「', '【', '〖', '〔', '[', '{'];
-pub(super) const END_PUNCT_PAT: &[char] = &[
+pub const END_PUNCT_PAT: &[char] = &[
'”', '’', ',', '.', '。', '、', ':', ';', '》', '〉', ')', '』', '」', '】',
'〗', '〕', ']', '}', '?', '!',
];
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub(super) enum CjkPunctStyle {
+pub enum CjkPunctStyle {
/// Standard GB/T 15834-2011, used mostly in mainland China.
Gb,
/// Standard by Taiwan Ministry of Education, used in Taiwan and Hong Kong.
@@ -998,7 +1072,7 @@ pub(super) enum CjkPunctStyle {
Jis,
}
-pub(super) fn cjk_punct_style(lang: Lang, region: Option<Region>) -> CjkPunctStyle {
+pub fn cjk_punct_style(lang: Lang, region: Option<Region>) -> CjkPunctStyle {
match (lang, region.as_ref().map(Region::as_str)) {
(Lang::CHINESE, Some("TW" | "HK")) => CjkPunctStyle::Cns,
(Lang::JAPANESE, _) => CjkPunctStyle::Jis,
@@ -1013,7 +1087,7 @@ fn is_space(c: char) -> bool {
}
/// Whether the glyph is part of Chinese or Japanese script (i.e. CJ, not CJK).
-pub(super) fn is_of_cj_script(c: char) -> bool {
+pub fn is_of_cj_script(c: char) -> bool {
is_cj_script(c, c.script())
}
@@ -1047,8 +1121,9 @@ fn is_cjk_left_aligned_punctuation(
}
if matches!(style, Gb) && matches!(c, '?' | '!') {
- // In GB style, exclamations and question marks are also left aligned and can be adjusted.
- // Note that they are not adjustable in other styles.
+ // In GB style, exclamations and question marks are also left aligned
+ // and can be adjusted. Note that they are not adjustable in other
+ // styles.
return true;
}