Refactor line building (#4497)

author: Laurenz <laurmaedje@gmail.com> 2024-07-04 12:57:40 +0200
committer: GitHub <noreply@github.com> 2024-07-04 10:57:40 +0000
commit: 0ef672c347f368325313c8bccc4f70e3f1016b0a (patch)
tree: 8971d6b305d805b42b55e1e85613e4f4c5ba175d
parent: 75246f930b9041c206a8a3c87e6db03bfc9111fd (diff)
22 files changed, 634 insertions, 500 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 3e99ea81..14dd36f1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2604,8 +2604,7 @@ dependencies = [
 [[package]]
 name = "typst-assets"
 version = "0.11.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f13f85360328da54847dd7fefaf272dfa5b6d1fdeb53f32938924c39bf5b2c6c"
+source = "git+https://github.com/typst/typst-assets?rev=4ee794c#4ee794cf8fb98eb67194e757c9820ab8562d853b"
 
 [[package]]
 name = "typst-cli"
@@ -2656,7 +2655,7 @@ dependencies = [
 [[package]]
 name = "typst-dev-assets"
 version = "0.11.0"
-source = "git+https://github.com/typst/typst-dev-assets?rev=48a924d9de82b631bc775124a69384c8d860db04#48a924d9de82b631bc775124a69384c8d860db04"
+source = "git+https://github.com/typst/typst-dev-assets?rev=48a924d#48a924d9de82b631bc775124a69384c8d860db04"
 
 [[package]]
 name = "typst-docs"
diff --git a/Cargo.toml b/Cargo.toml
index ee50b666..1b5bf0f4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -26,8 +26,8 @@ typst-svg = { path = "crates/typst-svg", version = "0.11.0" }
 typst-syntax = { path = "crates/typst-syntax", version = "0.11.0" }
 typst-timing = { path = "crates/typst-timing", version = "0.11.0" }
 typst-utils = { path = "crates/typst-utils", version = "0.11.0" }
-typst-assets = "0.11.0"
-typst-dev-assets = { git = "https://github.com/typst/typst-dev-assets", rev = "48a924d9de82b631bc775124a69384c8d860db04" }
+typst-assets = { git = "https://github.com/typst/typst-assets", rev = "4ee794c" }
+typst-dev-assets = { git = "https://github.com/typst/typst-dev-assets", rev = "48a924d" }
 az = "1.2"
 base64 = "0.22"
 bitflags = { version = "2", features = ["serde"] }
diff --git a/crates/typst/src/introspection/mod.rs b/crates/typst/src/introspection/mod.rs
index c9dba244..6c982afb 100644
--- a/crates/typst/src/introspection/mod.rs
+++ b/crates/typst/src/introspection/mod.rs
@@ -116,6 +116,6 @@ impl Tag {
 
 impl Debug for Tag {
     fn fmt(&self, f: &mut Formatter) -> fmt::Result {
-        write!(f, "Tag({:?})", self.elem)
+        write!(f, "Tag({:?})", self.elem.elem().name())
     }
 }
diff --git a/crates/typst/src/layout/inline/collect.rs b/crates/typst/src/layout/inline/collect.rs
index 79d0d59f..f1607460 100644
--- a/crates/typst/src/layout/inline/collect.rs
+++ b/crates/typst/src/layout/inline/collect.rs
@@ -79,7 +79,7 @@ impl<'a> Item<'a> {
     }
 
     /// The natural layouted width of the item.
-    pub fn width(&self) -> Abs {
+    pub fn natural_width(&self) -> Abs {
         match self {
             Self::Text(shaped) => shaped.width,
             Self::Absolute(v, _) => *v,
diff --git a/crates/typst/src/layout/inline/line.rs b/crates/typst/src/layout/inline/line.rs
index 232a1c6b..12162ab1 100644
--- a/crates/typst/src/layout/inline/line.rs
+++ b/crates/typst/src/layout/inline/line.rs
@@ -1,11 +1,18 @@
-use unicode_bidi::BidiInfo;
+use std::fmt::{self, Debug, Formatter};
+use std::ops::{Deref, DerefMut};
 
 use super::*;
 use crate::engine::Engine;
-use crate::layout::{Abs, Em, Fr, Frame, FrameItem, Point};
+use crate::layout::{Abs, Dir, Em, Fr, Frame, FrameItem, Point};
 use crate::text::{Lang, TextElem};
 use crate::utils::Numeric;
 
+const SHY: char = '\u{ad}';
+const HYPHEN: char = '-';
+const EN_DASH: char = '–';
+const EM_DASH: char = '—';
+const LINE_SEPARATOR: char = '\u{2028}'; // We use LS to distinguish justified breaks.
+
 /// A layouted line, consisting of a sequence of layouted paragraph items that
 /// are mostly borrowed from the preparation phase. This type enables you to
 /// measure the size of a line in a range before committing to building the
@@ -16,20 +23,9 @@ use crate::utils::Numeric;
 /// line, respectively. But even those can partially reuse previous results when
 /// the break index is safe-to-break per rustybuzz.
 pub struct Line<'a> {
-    /// Bidi information about the paragraph.
-    pub bidi: &'a BidiInfo<'a>,
-    /// The trimmed range the line spans in the paragraph.
-    pub trimmed: Range,
-    /// The untrimmed end where the line ends.
-    pub end: usize,
-    /// A reshaped text item if the line sliced up a text item at the start.
-    pub first: Option<Item<'a>>,
-    /// Inner items which don't need to be reprocessed.
-    pub inner: &'a [Item<'a>],
-    /// A reshaped text item if the line sliced up a text item at the end. If
-    /// there is only one text item, this takes precedence over `first`.
-    pub last: Option<Item<'a>>,
-    /// The width of the line.
+    /// The items the line is made of.
+    pub items: Items<'a>,
+    /// The exact natural width of the line.
     pub width: Abs,
     /// Whether the line should be justified.
     pub justify: bool,
@@ -39,45 +35,27 @@ pub struct Line<'a> {
 }
 
 impl<'a> Line<'a> {
-    /// Iterate over the line's items.
-    pub fn items(&self) -> impl Iterator<Item = &Item<'a>> {
-        self.first.iter().chain(self.inner).chain(&self.last)
-    }
-
-    /// Return items that intersect the given `text_range`.
-    pub fn slice(&self, text_range: Range) -> impl Iterator<Item = &Item<'a>> {
-        let mut cursor = self.trimmed.start;
-        let mut start = 0;
-        let mut end = 0;
-
-        for (i, item) in self.items().enumerate() {
-            if cursor <= text_range.start {
-                start = i;
-            }
-
-            let len = item.textual_len();
-            if cursor < text_range.end || cursor + len <= text_range.end {
-                end = i + 1;
-            } else {
-                break;
-            }
-
-            cursor += len;
+    /// Create an empty line.
+    pub fn empty() -> Self {
+        Self {
+            items: Items::new(),
+            width: Abs::zero(),
+            justify: false,
+            dash: None,
         }
-
-        self.items().skip(start).take(end - start)
     }
 
     /// How many glyphs are in the text where we can insert additional
     /// space when encountering underfull lines.
     pub fn justifiables(&self) -> usize {
         let mut count = 0;
-        for shaped in self.items().filter_map(Item::text) {
+        for shaped in self.items.iter().filter_map(Item::text) {
             count += shaped.justifiables();
         }
+
         // CJK character at line end should not be adjusted.
         if self
-            .items()
+            .items
             .last()
             .and_then(Item::text)
             .map(|s| s.cjk_justifiable_at_last())
@@ -89,19 +67,27 @@ impl<'a> Line<'a> {
         count
     }
 
-    /// How much can the line stretch
+    /// How much the line can stretch.
     pub fn stretchability(&self) -> Abs {
-        self.items().filter_map(Item::text).map(|s| s.stretchability()).sum()
+        self.items
+            .iter()
+            .filter_map(Item::text)
+            .map(|s| s.stretchability())
+            .sum()
     }
 
-    /// How much can the line shrink
+    /// How much the line can shrink.
     pub fn shrinkability(&self) -> Abs {
-        self.items().filter_map(Item::text).map(|s| s.shrinkability()).sum()
+        self.items
+            .iter()
+            .filter_map(Item::text)
+            .map(|s| s.shrinkability())
+            .sum()
     }
 
     /// Whether the line has items with negative width.
     pub fn has_negative_width_items(&self) -> bool {
-        self.items().any(|item| match item {
+        self.items.iter().any(|item| match item {
             Item::Absolute(amount, _) => *amount < Abs::zero(),
             Item::Frame(frame, _) => frame.width() < Abs::zero(),
             _ => false,
@@ -110,7 +96,8 @@ impl<'a> Line<'a> {
 
     /// The sum of fractions in the line.
     pub fn fr(&self) -> Fr {
-        self.items()
+        self.items
+            .iter()
             .filter_map(|item| match item {
                 Item::Fractional(fr, _) => Some(*fr),
                 _ => None,
@@ -122,234 +109,299 @@ impl<'a> Line<'a> {
 /// A dash at the end of a line.
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 pub enum Dash {
-    /// A hyphen added to break a word.
-    SoftHyphen,
-    /// Regular hyphen, present in a compound word, e.g. beija-flor.
-    HardHyphen,
-    /// An em dash.
-    Long,
-    /// An en dash.
-    Short,
+    /// A soft hyphen added to break a word.
+    Soft,
+    /// A regular hyphen, present in a compound word, e.g. beija-flor.
+    Hard,
+    /// Another kind of dash. Only relevant for cost computation.
+    Other,
 }
 
 /// Create a line which spans the given range.
 pub fn line<'a>(
     engine: &Engine,
     p: &'a Preparation,
-    mut range: Range,
+    range: Range,
     breakpoint: Breakpoint,
     pred: Option<&Line>,
 ) -> Line<'a> {
-    let end = range.end;
-    let mut justify =
-        p.justify && end < p.bidi.text.len() && breakpoint != Breakpoint::Mandatory;
+    // The line's full text.
+    let full = &p.text[range.clone()];
+
+    // Whether the line is justified.
+    let justify = full.ends_with(LINE_SEPARATOR)
+        || (p.justify && breakpoint != Breakpoint::Mandatory);
+
+    // Process dashes.
+    let dash = if breakpoint == Breakpoint::Hyphen || full.ends_with(SHY) {
+        Some(Dash::Soft)
+    } else if full.ends_with(HYPHEN) {
+        Some(Dash::Hard)
+    } else if full.ends_with([EN_DASH, EM_DASH]) {
+        Some(Dash::Other)
+    } else {
+        None
+    };
+
+    // Trim the line at the end, if necessary for this breakpoint.
+    let trim = range.start + breakpoint.trim(full).len();
+
+    // Collect the items for the line.
+    let mut items = collect_items(engine, p, range, trim);
+
+    // Add a hyphen at the line start, if a previous dash should be repeated.
+    if pred.map_or(false, |pred| should_repeat_hyphen(pred, full)) {
+        if let Some(shaped) = items.first_text_mut() {
+            shaped.prepend_hyphen(engine, p.fallback);
+        }
+    }
 
-    if range.is_empty() {
-        return Line {
-            bidi: &p.bidi,
-            end,
-            trimmed: range,
-            first: None,
-            inner: &[],
-            last: None,
-            width: Abs::zero(),
-            justify,
-            dash: None,
-        };
+    // Add a hyphen at the line end, if we ended on a soft hyphen.
+    if dash == Some(Dash::Soft) {
+        if let Some(shaped) = items.last_text_mut() {
+            shaped.push_hyphen(engine, p.fallback);
+        }
     }
 
-    let prepend_hyphen = pred.map_or(false, should_insert_hyphen);
-
-    // Slice out the relevant items.
-    let (mut expanded, mut inner) = p.slice(range.clone());
-    let mut width = Abs::zero();
-
-    // Weak space (`Absolute(_, true)`) is removed at the end of the line
-    while let Some((Item::Absolute(_, true), before)) = inner.split_last() {
-        inner = before;
-        range.end -= 1;
-        expanded.end -= 1;
-    }
-    // Weak space (`Absolute(_, true)`) is removed at the beginning of the line
-    while let Some((Item::Absolute(_, true), after)) = inner.split_first() {
-        inner = after;
-        range.start += 1;
-        expanded.end += 1;
-    }
-
-    // Reshape the last item if it's split in half or hyphenated.
-    let mut last = None;
-    let mut dash = None;
-    if let Some((Item::Text(shaped), before)) = inner.split_last() {
-        // Compute the range we want to shape, trimming whitespace at the
-        // end of the line.
-        let base = expanded.end - shaped.text.len();
-        let start = range.start.max(base);
-        let text = &p.bidi.text[start..range.end];
-        // U+200B ZERO WIDTH SPACE is used to provide a line break opportunity,
-        // we want to trim it too.
-        let trimmed = text.trim_end().trim_end_matches('\u{200B}');
-        range.end = start + trimmed.len();
-
-        // Deal with hyphens, dashes and justification.
-        let shy = trimmed.ends_with('\u{ad}');
-        let hyphen = breakpoint == Breakpoint::Hyphen;
-        dash = if hyphen || shy {
-            Some(Dash::SoftHyphen)
-        } else if trimmed.ends_with('-') {
-            Some(Dash::HardHyphen)
-        } else if trimmed.ends_with('–') {
-            Some(Dash::Short)
-        } else if trimmed.ends_with('—') {
-            Some(Dash::Long)
-        } else {
-            None
-        };
-        justify |= text.ends_with('\u{2028}');
+    // Deal with CJ characters at line boundaries.
+    adjust_cj_at_line_boundaries(p, full, &mut items);
 
-        // Deal with CJK punctuation at line ends.
-        let gb_style = cjk_punct_style(shaped.lang, shaped.region);
-        let maybe_adjust_last_glyph = trimmed.ends_with(END_PUNCT_PAT)
-            || (p.cjk_latin_spacing && trimmed.ends_with(is_of_cj_script));
+    // Compute the line's width.
+    let width = items.iter().map(Item::natural_width).sum();
 
-        // Usually, we don't want to shape an empty string because:
-        // - We don't want the height of trimmed whitespace in a different font
-        //   to be considered for the line height.
-        // - Even if it's in the same font, its unnecessary.
-        //
-        // There is one exception though. When the whole line is empty, we need
-        // the shaped empty string to make the line the appropriate height. That
-        // is the case exactly if the string is empty and there are no other
-        // items in the line.
-        if hyphen
-            || start + shaped.text.len() > range.end
-            || maybe_adjust_last_glyph
-            || prepend_hyphen
-        {
-            if hyphen || start < range.end || before.is_empty() {
-                let mut reshaped = shaped.reshape(engine, &p.spans, start..range.end);
-                if hyphen || shy {
-                    reshaped.push_hyphen(engine, p.fallback);
-                }
+    Line { items, width, justify, dash }
+}
 
-                if let Some(last_glyph) = reshaped.glyphs.last() {
-                    if last_glyph.is_cjk_left_aligned_punctuation(gb_style) {
-                        // If the last glyph is a CJK punctuation, we want to
-                        // shrink it. See Requirements for Chinese Text Layout,
-                        // Section 3.1.6.3 Compression of punctuation marks at
-                        // line start or line end
-                        let shrink_amount = last_glyph.shrinkability().1;
-                        let punct = reshaped.glyphs.to_mut().last_mut().unwrap();
-                        punct.shrink_right(shrink_amount);
-                        reshaped.width -= shrink_amount.at(reshaped.size);
-                    } else if p.cjk_latin_spacing
-                        && last_glyph.is_cj_script()
-                        && (last_glyph.x_advance - last_glyph.x_offset) > Em::one()
-                    {
-                        // If the last glyph is a CJK character adjusted by
-                        // [`add_cjk_latin_spacing`], restore the original
-                        // width.
-                        let shrink_amount =
-                            last_glyph.x_advance - last_glyph.x_offset - Em::one();
-                        let glyph = reshaped.glyphs.to_mut().last_mut().unwrap();
-                        glyph.x_advance -= shrink_amount;
-                        glyph.adjustability.shrinkability.1 = Em::zero();
-                        reshaped.width -= shrink_amount.at(reshaped.size);
-                    }
-                }
+/// Collects / reshapes all items for the line with the given `range`.
+///
+/// The `trim` defines an end position to which text items are trimmed. For
+/// example, the `range` may span "hello\n", but the `trim` specifies that the
+/// linebreak is trimmed.
+///
+/// We do not factor the `trim` diredctly into the `range` because we still want
+/// to keep non-text items after the trim (e.g. tags).
+fn collect_items<'a>(
+    engine: &Engine,
+    p: &'a Preparation,
+    range: Range,
+    trim: usize,
+) -> Items<'a> {
+    let mut items = Items::new();
+    let mut fallback = None;
+
+    // Collect the items for each consecutively ordered run.
+    reorder(p, range.clone(), |subrange, rtl| {
+        let from = items.len();
+        collect_range(engine, p, subrange, trim, &mut items, &mut fallback);
+        if rtl {
+            items.reorder(from);
+        }
+    });
 
-                width += reshaped.width;
-                last = Some(Item::Text(reshaped));
-            }
+    // Trim weak spacing at the start of the line.
+    let prefix = items
+        .iter()
+        .take_while(|item| matches!(item, Item::Absolute(_, true)))
+        .count();
+    if prefix > 0 {
+        items.drain(..prefix);
+    }
 
-            inner = before;
+    // Trim weak spacing at the end of the line.
+    while matches!(items.last(), Some(Item::Absolute(_, true))) {
+        items.pop();
+    }
+
+    // Add fallback text to expand the line height, if necessary.
+    if !items.iter().any(|item| matches!(item, Item::Text(_))) {
+        if let Some(fallback) = fallback {
+            items.push(fallback);
         }
     }
 
-    // Deal with CJ characters at line starts.
-    let text = &p.bidi.text[range.start..end];
-    let maybe_adjust_first_glyph = text.starts_with(BEGIN_PUNCT_PAT)
-        || (p.cjk_latin_spacing && text.starts_with(is_of_cj_script));
+    items
+}
 
-    // Reshape the start item if it's split in half.
-    let mut first = None;
-    if let Some((Item::Text(shaped), after)) = inner.split_first() {
-        // Compute the range we want to shape.
-        let base = expanded.start;
-        let end = range.end.min(base + shaped.text.len());
+/// Calls `f` for the the BiDi-reordered ranges of a line.
+fn reorder<F>(p: &Preparation, range: Range, mut f: F)
+where
+    F: FnMut(Range, bool),
+{
+    // If there is nothing bidirectional going on, skip reordering.
+    let Some(bidi) = &p.bidi else {
+        f(range, p.dir == Dir::RTL);
+        return;
+    };
+
+    // The bidi crate panics for empty lines.
+    if range.is_empty() {
+        f(range, p.dir == Dir::RTL);
+        return;
+    }
 
-        // Reshape if necessary.
-        if range.start + shaped.text.len() > end
-            || maybe_adjust_first_glyph
-            || prepend_hyphen
-        {
-            // If the range is empty, we don't want to push an empty text item.
-            if range.start < end {
-                let reshaped = shaped.reshape(engine, &p.spans, range.start..end);
-                width += reshaped.width;
-                first = Some(Item::Text(reshaped));
-            }
+    // Find the paragraph that contains the line.
+    let para = bidi
+        .paragraphs
+        .iter()
+        .find(|para| para.range.contains(&range.start))
+        .unwrap();
 
-            inner = after;
-        }
+    // Compute the reordered ranges in visual order (left to right).
+    let (levels, runs) = bidi.visual_runs(para, range.clone());
+
+    // Call `f` for each run.
+    for run in runs {
+        let rtl = levels[run.start].is_rtl();
+        f(run, rtl)
     }
+}
+
+/// Collects / reshapes all items for the given `subrange` with continous
+/// direction.
+fn collect_range<'a>(
+    engine: &Engine,
+    p: &'a Preparation,
+    range: Range,
+    trim: usize,
+    items: &mut Items<'a>,
+    fallback: &mut Option<ItemEntry<'a>>,
+) {
+    for (subrange, item) in p.slice(range.clone()) {
+        // All non-text items are just kept, they can't be split.
+        let Item::Text(shaped) = item else {
+            items.push(item);
+            continue;
+        };
 
-    if prepend_hyphen {
-        let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut);
-        if let Some(reshaped) = reshaped {
-            let width_before = reshaped.width;
-            reshaped.prepend_hyphen(engine, p.fallback);
-            width += reshaped.width - width_before;
+        // The intersection range of the item, the subrange, and the line's
+        // trimming.
+        let sliced =
+            range.start.max(subrange.start)..range.end.min(subrange.end).min(trim);
+
+        // Whether the item is split by the line.
+        let split = subrange.start < sliced.start || sliced.end < subrange.end;
+
+        if sliced.is_empty() {
+            // When there is no text, still keep this as a fallback item, which
+            // we can use to force a non-zero line-height when the line doesn't
+            // contain any other text.
+            *fallback = Some(ItemEntry::from(Item::Text(shaped.empty())));
+        } else if split {
+            // When the item is split in half, reshape it.
+            let reshaped = shaped.reshape(engine, sliced);
+            items.push(Item::Text(reshaped));
+        } else {
+            // When the item is fully contained, just keep it.
+            items.push(item);
         }
     }
+}
 
-    if maybe_adjust_first_glyph {
-        let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut);
-        if let Some(reshaped) = reshaped {
-            if let Some(first_glyph) = reshaped.glyphs.first() {
-                if first_glyph.is_cjk_right_aligned_punctuation() {
-                    // If the first glyph is a CJK punctuation, we want to
-                    // shrink it.
-                    let shrink_amount = first_glyph.shrinkability().0;
-                    let glyph = reshaped.glyphs.to_mut().first_mut().unwrap();
-                    glyph.shrink_left(shrink_amount);
-                    let amount_abs = shrink_amount.at(reshaped.size);
-                    reshaped.width -= amount_abs;
-                    width -= amount_abs;
-                } else if p.cjk_latin_spacing
-                    && first_glyph.is_cj_script()
-                    && first_glyph.x_offset > Em::zero()
-                {
-                    // If the first glyph is a CJK character adjusted by
-                    // [`add_cjk_latin_spacing`], restore the original width.
-                    let shrink_amount = first_glyph.x_offset;
-                    let glyph = reshaped.glyphs.to_mut().first_mut().unwrap();
-                    glyph.x_advance -= shrink_amount;
-                    glyph.x_offset = Em::zero();
-                    glyph.adjustability.shrinkability.0 = Em::zero();
-                    let amount_abs = shrink_amount.at(reshaped.size);
-                    reshaped.width -= amount_abs;
-                    width -= amount_abs;
-                }
-            }
-        }
+/// Add spacing around punctuation marks for CJ glyphs at line boundaries.
+///
+/// See Requirements for Chinese Text Layout, Section 3.1.6.3 Compression of
+/// punctuation marks at line start or line end.
+fn adjust_cj_at_line_boundaries(p: &Preparation, text: &str, items: &mut Items) {
+    if text.starts_with(BEGIN_PUNCT_PAT)
+        || (p.cjk_latin_spacing && text.starts_with(is_of_cj_script))
+    {
+        adjust_cj_at_line_start(p, items);
+    }
+
+    if text.ends_with(END_PUNCT_PAT)
+        || (p.cjk_latin_spacing && text.ends_with(is_of_cj_script))
+    {
+        adjust_cj_at_line_end(p, items);
+    }
+}
+
+/// Add spacing around punctuation marks for CJ glyphs at the line start.
+fn adjust_cj_at_line_start(p: &Preparation, items: &mut Items) {
+    let Some(shaped) = items.first_text_mut() else { return };
+    let Some(glyph) = shaped.glyphs.first() else { return };
+
+    if glyph.is_cjk_right_aligned_punctuation() {
+        // If the first glyph is a CJK punctuation, we want to
+        // shrink it.
+        let glyph = shaped.glyphs.to_mut().first_mut().unwrap();
+        let shrink = glyph.shrinkability().0;
+        glyph.shrink_left(shrink);
+        shaped.width -= shrink.at(shaped.size);
+    } else if p.cjk_latin_spacing && glyph.is_cj_script() && glyph.x_offset > Em::zero() {
+        // If the first glyph is a CJK character adjusted by
+        // [`add_cjk_latin_spacing`], restore the original width.
+        let glyph = shaped.glyphs.to_mut().first_mut().unwrap();
+        let shrink = glyph.x_offset;
+        glyph.x_advance -= shrink;
+        glyph.x_offset = Em::zero();
+        glyph.adjustability.shrinkability.0 = Em::zero();
+        shaped.width -= shrink.at(shaped.size);
+    }
+}
+
+/// Add spacing around punctuation marks for CJ glyphs at the line end.
+fn adjust_cj_at_line_end(p: &Preparation, items: &mut Items) {
+    let Some(shaped) = items.last_text_mut() else { return };
+    let Some(glyph) = shaped.glyphs.last() else { return };
+
+    // Deal with CJK punctuation at line ends.
+    let style = cjk_punct_style(shaped.lang, shaped.region);
+
+    if glyph.is_cjk_left_aligned_punctuation(style) {
+        // If the last glyph is a CJK punctuation, we want to
+        // shrink it.
+        let shrink = glyph.shrinkability().1;
+        let punct = shaped.glyphs.to_mut().last_mut().unwrap();
+        punct.shrink_right(shrink);
+        shaped.width -= shrink.at(shaped.size);
+    } else if p.cjk_latin_spacing
+        && glyph.is_cj_script()
+        && (glyph.x_advance - glyph.x_offset) > Em::one()
+    {
+        // If the last glyph is a CJK character adjusted by
+        // [`add_cjk_latin_spacing`], restore the original width.
+        let shrink = glyph.x_advance - glyph.x_offset - Em::one();
+        let glyph = shaped.glyphs.to_mut().last_mut().unwrap();
+        glyph.x_advance -= shrink;
+        glyph.adjustability.shrinkability.1 = Em::zero();
+        shaped.width -= shrink.at(shaped.size);
     }
+}
 
-    // Measure the inner items.
-    for item in inner {
-        width += item.width();
+/// Whether a hyphen should be inserted at the start of the next line.
+fn should_repeat_hyphen(pred_line: &Line, text: &str) -> bool {
+    // If the predecessor line does not end with a `Dash::Hard`, we shall
+    // not place a hyphen at the start of the next line.
+    if pred_line.dash != Some(Dash::Hard) {
+        return false;
     }
 
-    Line {
-        bidi: &p.bidi,
-        trimmed: range,
-        end,
-        first,
-        inner,
-        last,
-        width,
-        justify,
-        dash,
+    // The hyphen should repeat only in the languages that require that feature.
+    // For more information see the discussion at https://github.com/typst/typst/issues/3235
+    let Some(Item::Text(shaped)) = pred_line.items.last() else { return false };
+
+    match shaped.lang {
+        // - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3
+        // - Czech: see https://prirucka.ujc.cas.cz/?id=164
+        // - Croatian: see http://pravopis.hr/pravilo/spojnica/68/
+        // - Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni
+        // - Portuguese: see https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf (Base XX)
+        // - Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/
+        Lang::LOWER_SORBIAN
+        | Lang::CZECH
+        | Lang::CROATIAN
+        | Lang::POLISH
+        | Lang::PORTUGUESE
+        | Lang::SLOVAK => true,
+
+        // In Spanish the hyphen is required only if the word next to hyphen is
+        // not capitalized. Otherwise, the hyphen must not be repeated.
+        //
+        // See § 4.1.1.1.2.e on the "Ortografía de la lengua española"
+        // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea
+        Lang::SPANISH => text.chars().next().map_or(false, |c| !c.is_uppercase()),
+
+        _ => false,
     }
 }
 
@@ -365,18 +417,19 @@ pub fn commit(
     let mut remaining = width - line.width - p.hang;
     let mut offset = Abs::zero();
 
-    // Reorder the line from logical to visual order.
-    let (reordered, starts_rtl) = reorder(line);
-    if !starts_rtl {
+    // We always build the line from left to right. In an LTR paragraph, we must
+    // thus add the hanging indent to the offset. When the paragraph is RTL, the
+    // hanging indent arises naturally due to the line width.
+    if p.dir == Dir::LTR {
         offset += p.hang;
     }
 
     // Handle hanging punctuation to the left.
-    if let Some(Item::Text(text)) = reordered.first() {
+    if let Some(Item::Text(text)) = line.items.first() {
         if let Some(glyph) = text.glyphs.first() {
             if !text.dir.is_positive()
                 && TextElem::overhang_in(text.styles)
-                && (reordered.len() > 1 || text.glyphs.len() > 1)
+                && (line.items.len() > 1 || text.glyphs.len() > 1)
             {
                 let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
                 offset -= amount;
@@ -386,11 +439,11 @@ pub fn commit(
     }
 
     // Handle hanging punctuation to the right.
-    if let Some(Item::Text(text)) = reordered.last() {
+    if let Some(Item::Text(text)) = line.items.last() {
         if let Some(glyph) = text.glyphs.last() {
             if text.dir.is_positive()
                 && TextElem::overhang_in(text.styles)
-                && (reordered.len() > 1 || text.glyphs.len() > 1)
+                && (line.items.len() > 1 || text.glyphs.len() > 1)
             {
                 let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
                 remaining += amount;
@@ -408,16 +461,16 @@ pub fn commit(
     let mut extra_justification = Abs::zero();
 
     let shrinkability = line.shrinkability();
-    let stretch = line.stretchability();
+    let stretchability = line.stretchability();
     if remaining < Abs::zero() && shrinkability > Abs::zero() && shrink {
         // Attempt to reduce the length of the line, using shrinkability.
         justification_ratio = (remaining / shrinkability).max(-1.0);
         remaining = (remaining + shrinkability).min(Abs::zero());
     } else if line.justify && fr.is_zero() {
         // Attempt to increase the length of the line, using stretchability.
-        if stretch > Abs::zero() {
-            justification_ratio = (remaining / stretch).min(1.0);
-            remaining = (remaining - stretch).max(Abs::zero());
+        if stretchability > Abs::zero() {
+            justification_ratio = (remaining / stretchability).min(1.0);
+            remaining = (remaining - stretchability).max(Abs::zero());
         }
 
         let justifiables = line.justifiables();
@@ -433,7 +486,7 @@ pub fn commit(
 
     // Build the frames and determine the height and baseline.
     let mut frames = vec![];
-    for item in reordered {
+    for item in line.items.iter() {
         let mut push = |offset: &mut Abs, frame: Frame| {
             let width = frame.width();
             top.set_max(frame.baseline());
@@ -460,8 +513,12 @@ pub fn commit(
                 }
             }
             Item::Text(shaped) => {
-                let mut frame =
-                    shaped.build(engine, justification_ratio, extra_justification);
+                let mut frame = shaped.build(
+                    engine,
+                    &p.spans,
+                    justification_ratio,
+                    extra_justification,
+                );
                 frame.post_process(shaped.styles);
                 push(&mut offset, frame);
             }
@@ -499,111 +556,139 @@ pub fn commit(
     Ok(output)
 }
 
-/// Return a line's items in visual order.
-fn reorder<'a>(line: &'a Line<'a>) -> (Vec<&Item<'a>>, bool) {
-    let mut reordered = vec![];
+/// How much a character should hang into the end margin.
+///
+/// For more discussion, see:
+/// <https://recoveringphysicist.com/21/>
+fn overhang(c: char) -> f64 {
+    match c {
+        // Dashes.
+        '–' | '—' => 0.2,
+        '-' => 0.55,
+
+        // Punctuation.
+        '.' | ',' => 0.8,
+        ':' | ';' => 0.3,
+
+        // Arabic
+        '\u{60C}' | '\u{6D4}' => 0.4,
 
-    // The bidi crate doesn't like empty lines.
-    if line.trimmed.is_empty() {
-        return (line.slice(line.trimmed.clone()).collect(), false);
+        _ => 0.0,
     }
+}
 
-    // Find the paragraph that contains the line.
-    let para = line
-        .bidi
-        .paragraphs
-        .iter()
-        .find(|para| para.range.contains(&line.trimmed.start))
-        .unwrap();
+/// A collection of owned or borrowed paragraph items.
+pub struct Items<'a>(Vec<ItemEntry<'a>>);
 
-    // Compute the reordered ranges in visual order (left to right).
-    let (levels, runs) = line.bidi.visual_runs(para, line.trimmed.clone());
-    let starts_rtl = levels.first().is_some_and(|level| level.is_rtl());
+impl<'a> Items<'a> {
+    /// Create empty items.
+    pub fn new() -> Self {
+        Self(vec![])
+    }
 
-    // Collect the reordered items.
-    for run in runs {
-        // Skip reset L1 runs because handling them would require reshaping
-        // again in some cases.
-        if line.bidi.levels[run.start] != levels[run.start] {
-            continue;
-        }
+    /// Push a new item.
+    pub fn push(&mut self, entry: impl Into<ItemEntry<'a>>) {
+        self.0.push(entry.into());
+    }
 
-        let prev = reordered.len();
-        reordered.extend(line.slice(run.clone()));
+    /// Iterate over the items
+    pub fn iter(&self) -> impl Iterator<Item = &Item<'a>> {
+        self.0.iter().map(|item| &**item)
+    }
 
-        if levels[run.start].is_rtl() {
-            reordered[prev..].reverse();
-        }
+    /// Access the first item.
+    pub fn first(&self) -> Option<&Item<'a>> {
+        self.0.first().map(|item| &**item)
     }
 
-    (reordered, starts_rtl)
-}
+    /// Access the last item.
+    pub fn last(&self) -> Option<&Item<'a>> {
+        self.0.last().map(|item| &**item)
+    }
 
-/// Whether a hyphen should be inserted at the start of the next line.
-fn should_insert_hyphen(pred_line: &Line) -> bool {
-    // If the predecessor line does not end with a Dash::HardHyphen, we shall
-    // not place a hyphen at the start of the next line.
-    if pred_line.dash != Some(Dash::HardHyphen) {
-        return false;
+    /// Access the first item mutably, if it is text.
+    pub fn first_text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
+        self.0.first_mut()?.text_mut()
     }
 
-    // If there's a trimmed out space, we needn't repeat the hyphen. That's the
-    // case of a text like "...kebab é a -melhor- comida que existe", where the
-    // hyphens are a kind of emphasis marker.
-    if pred_line.trimmed.end != pred_line.end {
-        return false;
+    /// Access the last item mutably, if it is text.
+    pub fn last_text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
+        self.0.last_mut()?.text_mut()
     }
 
-    // The hyphen should repeat only in the languages that require that feature.
-    // For more information see the discussion at https://github.com/typst/typst/issues/3235
-    let Some(Item::Text(shape)) = pred_line.last.as_ref() else { return false };
+    /// Reorder the items starting at the given index to RTL.
+    pub fn reorder(&mut self, from: usize) {
+        self.0[from..].reverse()
+    }
+}
 
-    match shape.lang {
-        // - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3
-        // - Czech: see https://prirucka.ujc.cas.cz/?id=164
-        // - Croatian: see http://pravopis.hr/pravilo/spojnica/68/
-        // - Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni
-        // - Portuguese: see https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf (Base XX)
-        // - Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/
-        Lang::LOWER_SORBIAN
-        | Lang::CZECH
-        | Lang::CROATIAN
-        | Lang::POLISH
-        | Lang::PORTUGUESE
-        | Lang::SLOVAK => true,
+impl<'a> FromIterator<ItemEntry<'a>> for Items<'a> {
+    fn from_iter<I: IntoIterator<Item = ItemEntry<'a>>>(iter: I) -> Self {
+        Self(iter.into_iter().collect())
+    }
+}
 
-        // In Spanish the hyphen is required only if the word next to hyphen is
-        // not capitalized. Otherwise, the hyphen must not be repeated.
-        //
-        // See § 4.1.1.1.2.e on the "Ortografía de la lengua española"
-        // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea
-        Lang::SPANISH => pred_line.bidi.text[pred_line.end..]
-            .chars()
-            .next()
-            .map(|c| !c.is_uppercase())
-            .unwrap_or(false),
+impl<'a> Deref for Items<'a> {
+    type Target = Vec<ItemEntry<'a>>;
 
-        _ => false,
+    fn deref(&self) -> &Self::Target {
+        &self.0
     }
 }
 
-/// How much a character should hang into the end margin.
-///
-/// For more discussion, see:
-/// <https://recoveringphysicist.com/21/>
-fn overhang(c: char) -> f64 {
-    match c {
-        // Dashes.
-        '–' | '—' => 0.2,
-        '-' => 0.55,
+impl<'a> DerefMut for Items<'a> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
 
-        // Punctuation.
-        '.' | ',' => 0.8,
-        ':' | ';' => 0.3,
+/// A reference to or a boxed item.
+pub enum ItemEntry<'a> {
+    Ref(&'a Item<'a>),
+    Box(Box<Item<'a>>),
+}
 
-        // Arabic
-        '\u{60C}' | '\u{6D4}' => 0.4,
+impl<'a> ItemEntry<'a> {
+    fn text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
+        match self {
+            Self::Ref(item) => {
+                let text = item.text()?;
+                *self = Self::Box(Box::new(Item::Text(text.clone())));
+                match self {
+                    Self::Box(item) => item.text_mut(),
+                    _ => unreachable!(),
+                }
+            }
+            Self::Box(item) => item.text_mut(),
+        }
+    }
+}
 
-        _ => 0.0,
+impl<'a> Deref for ItemEntry<'a> {
+    type Target = Item<'a>;
+
+    fn deref(&self) -> &Self::Target {
+        match self {
+            Self::Ref(item) => item,
+            Self::Box(item) => item,
+        }
+    }
+}
+
+impl Debug for ItemEntry<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        (**self).fmt(f)
+    }
+}
+
+impl<'a> From<&'a Item<'a>> for ItemEntry<'a> {
+    fn from(item: &'a Item<'a>) -> Self {
+        Self::Ref(item)
+    }
+}
+
+impl<'a> From<Item<'a>> for ItemEntry<'a> {
+    fn from(item: Item<'a>) -> Self {
+        Self::Box(Box::new(item))
     }
 }
diff --git a/crates/typst/src/layout/inline/linebreak.rs b/crates/typst/src/layout/inline/linebreak.rs
index 0555c189..dbaa9c59 100644
--- a/crates/typst/src/layout/inline/linebreak.rs
+++ b/crates/typst/src/layout/inline/linebreak.rs
@@ -1,6 +1,7 @@
 use std::ops::{Add, Sub};
 
 use icu_properties::maps::CodePointMapData;
+use icu_properties::sets::CodePointSetData;
 use icu_properties::LineBreak;
 use icu_provider::AsDeserializingBufferProvider;
 use icu_provider_adapters::fork::ForkByKeyProvider;
@@ -27,30 +28,33 @@ const MIN_RATIO: f64 = -1.0;
 const MIN_APPROX_RATIO: f64 = -0.5;
 const BOUND_EPS: f64 = 1e-3;
 
+/// The ICU blob data.
+fn blob() -> BlobDataProvider {
+    BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap()
+}
+
 /// The general line break segmenter.
-static SEGMENTER: Lazy<LineSegmenter> = Lazy::new(|| {
-    let provider =
-        BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap();
-    LineSegmenter::try_new_lstm_with_buffer_provider(&provider).unwrap()
-});
+static SEGMENTER: Lazy<LineSegmenter> =
+    Lazy::new(|| LineSegmenter::try_new_lstm_with_buffer_provider(&blob()).unwrap());
 
 /// The line break segmenter for Chinese/Japanese text.
 static CJ_SEGMENTER: Lazy<LineSegmenter> = Lazy::new(|| {
-    let provider =
-        BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap();
     let cj_blob =
         BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU_CJ_SEGMENT)
             .unwrap();
-    let cj_provider = ForkByKeyProvider::new(cj_blob, provider);
+    let cj_provider = ForkByKeyProvider::new(cj_blob, blob());
     LineSegmenter::try_new_lstm_with_buffer_provider(&cj_provider).unwrap()
 });
 
 /// The Unicode line break properties for each code point.
 static LINEBREAK_DATA: Lazy<CodePointMapData<LineBreak>> = Lazy::new(|| {
-    let provider =
-        BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap();
-    let deser_provider = provider.as_deserializing();
-    icu_properties::maps::load_line_break(&deser_provider).unwrap()
+    icu_properties::maps::load_line_break(&blob().as_deserializing()).unwrap()
+});
+
+/// The set of Unicode default ignorables.
+static DEFAULT_IGNORABLE_DATA: Lazy<CodePointSetData> = Lazy::new(|| {
+    icu_properties::sets::load_default_ignorable_code_point(&blob().as_deserializing())
+        .unwrap()
 });
 
 /// A line break opportunity.
@@ -64,6 +68,37 @@ pub enum Breakpoint {
     Hyphen,
 }
 
+impl Breakpoint {
+    /// Trim a line before this breakpoint.
+    pub fn trim(self, line: &str) -> &str {
+        // Trim default ignorables.
+        let ignorable = DEFAULT_IGNORABLE_DATA.as_borrowed();
+        let line = line.trim_end_matches(|c| ignorable.contains(c));
+
+        match self {
+            // Trim whitespace.
+            Self::Normal => line.trim_end_matches(char::is_whitespace),
+
+            // Trim linebreaks.
+            Self::Mandatory => {
+                let lb = LINEBREAK_DATA.as_borrowed();
+                line.trim_end_matches(|c| {
+                    matches!(
+                        lb.get(c),
+                        LineBreak::MandatoryBreak
+                            | LineBreak::CarriageReturn
+                            | LineBreak::LineFeed
+                            | LineBreak::NextLine
+                    )
+                })
+            }
+
+            // Trim nothing further.
+            Self::Hyphen => line,
+        }
+    }
+}
+
 /// Breaks the paragraph into lines.
 pub fn linebreak<'a>(
     engine: &Engine,
@@ -180,14 +215,11 @@ fn linebreak_optimized_bounded<'a>(
         pred: usize,
         total: Cost,
         line: Line<'a>,
+        end: usize,
     }
 
     // Dynamic programming table.
-    let mut table = vec![Entry {
-        pred: 0,
-        total: 0.0,
-        line: line(engine, p, 0..0, Breakpoint::Mandatory, None),
-    }];
+    let mut table = vec![Entry { pred: 0, total: 0.0, line: Line::empty(), end: 0 }];
 
     let mut active = 0;
     let mut prev_end = 0;
@@ -200,7 +232,7 @@ fn linebreak_optimized_bounded<'a>(
         let mut line_lower_bound = None;
 
         for (pred_index, pred) in table.iter().enumerate().skip(active) {
-            let start = pred.line.end;
+            let start = pred.end;
             let unbreakable = prev_end == start;
 
             // If the minimum cost we've established for the line is already
@@ -221,6 +253,7 @@ fn linebreak_optimized_bounded<'a>(
                 width,
                 &pred.line,
                 &attempt,
+                end,
                 breakpoint,
                 unbreakable,
             );
@@ -263,7 +296,7 @@ fn linebreak_optimized_bounded<'a>(
 
             // If this attempt is better than what we had before, take it!
             if best.as_ref().map_or(true, |best| best.total >= total) {
-                best = Some(Entry { pred: pred_index, total, line: attempt });
+                best = Some(Entry { pred: pred_index, total, line: attempt, end });
             }
         }
 
@@ -282,7 +315,7 @@ fn linebreak_optimized_bounded<'a>(
     let mut idx = table.len() - 1;
 
     // This should only happen if our bound was faulty. Which shouldn't happen!
-    if table[idx].line.end != p.bidi.text.len() {
+    if table[idx].end != p.text.len() {
         #[cfg(debug_assertions)]
         panic!("bounded paragraph layout is incomplete");
 
@@ -340,7 +373,7 @@ fn linebreak_optimized_approximate(
     let mut prev_end = 0;
 
     breakpoints(p, |end, breakpoint| {
-        let at_end = end == p.bidi.text.len();
+        let at_end = end == p.text.len();
 
         // Find the optimal predecessor.
         let mut best: Option<Entry> = None;
@@ -362,7 +395,7 @@ fn linebreak_optimized_approximate(
             // make it the desired width. We trim at the end to not take into
             // account trailing spaces. This is, again, only an approximation of
             // the real behaviour of `line`.
-            let trimmed_end = start + p.bidi.text[start..end].trim_end().len();
+            let trimmed_end = start + p.text[start..end].trim_end().len();
             let line_ratio = raw_ratio(
                 p,
                 width,
@@ -428,8 +461,9 @@ fn linebreak_optimized_approximate(
         idx = table[idx].pred;
     }
 
+    let mut pred = Line::empty();
+    let mut start = 0;
     let mut exact = 0.0;
-    let mut pred = line(engine, p, 0..0, Breakpoint::Mandatory, None);
 
     // The cost that we optimized was only an approximate cost, so the layout we
     // got here is only likely to be good, not guaranteed to be the best. We now
@@ -438,26 +472,36 @@ fn linebreak_optimized_approximate(
     for idx in indices.into_iter().rev() {
         let Entry { end, breakpoint, unbreakable, .. } = table[idx];
 
-        let start = pred.end;
         let attempt = line(engine, p, start..end, breakpoint, Some(&pred));
 
-        let (_, line_cost) =
-            ratio_and_cost(p, metrics, width, &pred, &attempt, breakpoint, unbreakable);
+        let (_, line_cost) = ratio_and_cost(
+            p,
+            metrics,
+            width,
+            &pred,
+            &attempt,
+            end,
+            breakpoint,
+            unbreakable,
+        );
 
-        exact += line_cost;
         pred = attempt;
+        start = end;
+        exact += line_cost;
     }
 
     exact
 }
 
 /// Compute the stretch ratio and cost of a line.
+#[allow(clippy::too_many_arguments)]
 fn ratio_and_cost(
     p: &Preparation,
     metrics: &CostMetrics,
     available_width: Abs,
     pred: &Line,
     attempt: &Line,
+    end: usize,
     breakpoint: Breakpoint,
     unbreakable: bool,
 ) -> (f64, Cost) {
@@ -474,7 +518,7 @@ fn ratio_and_cost(
         metrics,
         breakpoint,
         ratio,
-        attempt.end == p.bidi.text.len(),
+        end == p.text.len(),
         attempt.justify,
         unbreakable,
         pred.dash.is_some() && attempt.dash.is_some(),
@@ -587,7 +631,14 @@ fn raw_cost(
 /// code much simpler and the consumers of this function don't need the
 /// composability and flexibility of external iteration anyway.
 fn breakpoints<'a>(p: &'a Preparation<'a>, mut f: impl FnMut(usize, Breakpoint)) {
-    let text = p.bidi.text;
+    let text = p.text;
+
+    // Single breakpoint at the end for empty text.
+    if text.is_empty() {
+        f(0, Breakpoint::Mandatory);
+        return;
+    }
+
     let hyphenate = p.hyphenate != Some(false);
     let lb = LINEBREAK_DATA.as_borrowed();
     let segmenter = match p.lang {
@@ -747,8 +798,9 @@ fn linebreak_link(link: &str, mut f: impl FnMut(usize)) {
 fn hyphenate_at(p: &Preparation, offset: usize) -> bool {
     p.hyphenate
         .or_else(|| {
-            let shaped = p.find(offset)?.text()?;
-            Some(TextElem::hyphenate_in(shaped.styles))
+            let (_, item) = p.get(offset);
+            let styles = item.text()?.styles;
+            Some(TextElem::hyphenate_in(styles))
         })
         .unwrap_or(false)
 }
@@ -756,8 +808,9 @@ fn hyphenate_at(p: &Preparation, offset: usize) -> bool {
 /// The text language at the given offset.
 fn lang_at(p: &Preparation, offset: usize) -> Option<hypher::Lang> {
     let lang = p.lang.or_else(|| {
-        let shaped = p.find(offset)?.text()?;
-        Some(TextElem::lang_in(shaped.styles))
+        let (_, item) = p.get(offset);
+        let styles = item.text()?.styles;
+        Some(TextElem::lang_in(styles))
     })?;
 
     let bytes = lang.as_str().as_bytes().try_into().ok()?;
@@ -813,17 +866,14 @@ struct Estimates {
 impl Estimates {
     /// Compute estimations for approximate Knuth-Plass layout.
     fn compute(p: &Preparation) -> Self {
-        let cap = p.bidi.text.len();
+        let cap = p.text.len();
 
         let mut widths = CummulativeVec::with_capacity(cap);
         let mut stretchability = CummulativeVec::with_capacity(cap);
         let mut shrinkability = CummulativeVec::with_capacity(cap);
         let mut justifiables = CummulativeVec::with_capacity(cap);
 
-        for item in &p.items {
-            let textual_len = item.textual_len();
-            let after = widths.len() + textual_len;
-
+        for (range, item) in p.items.iter() {
             if let Item::Text(shaped) = item {
                 for g in shaped.glyphs.iter() {
                     let byte_len = g.range.len();
@@ -835,13 +885,13 @@ impl Estimates {
                     justifiables.push(byte_len, g.is_justifiable() as usize);
                 }
             } else {
-                widths.push(textual_len, item.width());
+                widths.push(range.len(), item.natural_width());
             }
 
-            widths.adjust(after);
-            stretchability.adjust(after);
-            shrinkability.adjust(after);
-            justifiables.adjust(after);
+            widths.adjust(range.end);
+            stretchability.adjust(range.end);
+            shrinkability.adjust(range.end);
+            justifiables.adjust(range.end);
         }
 
         Self {
@@ -871,11 +921,6 @@ where
         Self { total, summed }
     }
 
-    /// Get the covered byte length.
-    fn len(&self) -> usize {
-        self.summed.len()
-    }
-
     /// Adjust to cover the given byte length.
     fn adjust(&mut self, len: usize) {
         self.summed.resize(len, self.total);
diff --git a/crates/typst/src/layout/inline/prepare.rs b/crates/typst/src/layout/inline/prepare.rs
index 90d8d5a4..59682b2c 100644
--- a/crates/typst/src/layout/inline/prepare.rs
+++ b/crates/typst/src/layout/inline/prepare.rs
@@ -13,16 +13,24 @@ use crate::text::{Costs, Lang, TextElem};
 /// Only when a line break falls onto a text index that is not safe-to-break per
 /// rustybuzz, we have to reshape that portion.
 pub struct Preparation<'a> {
+    /// The paragraph's full text.
+    pub text: &'a str,
     /// Bidirectional text embedding levels for the paragraph.
-    pub bidi: BidiInfo<'a>,
+    ///
+    /// This is `None` if the paragraph is BiDi-uniform (all the base direction).
+    pub bidi: Option<BidiInfo<'a>>,
     /// Text runs, spacing and layouted elements.
-    pub items: Vec<Item<'a>>,
+    pub items: Vec<(Range, Item<'a>)>,
+    /// Maps from byte indices to item indices.
+    pub indices: Vec<usize>,
     /// The span mapper.
     pub spans: SpanMapper,
     /// Whether to hyphenate if it's the same for all children.
     pub hyphenate: Option<bool>,
     /// Costs for various layout decisions.
     pub costs: Costs,
+    /// The dominant direction.
+    pub dir: Dir,
     /// The text language if it's the same for all children.
     pub lang: Option<Lang>,
     /// The paragraph's resolved horizontal alignment.
@@ -44,46 +52,18 @@ pub struct Preparation<'a> {
 }
 
 impl<'a> Preparation<'a> {
-    /// Find the item that contains the given `text_offset`.
-    pub fn find(&self, text_offset: usize) -> Option<&Item<'a>> {
-        let mut cursor = 0;
-        for item in &self.items {
-            let end = cursor + item.textual_len();
-            if (cursor..end).contains(&text_offset) {
-                return Some(item);
-            }
-            cursor = end;
-        }
-        None
+    /// Get the item that contains the given `text_offset`.
+    pub fn get(&self, offset: usize) -> &(Range, Item<'a>) {
+        let idx = self.indices.get(offset).copied().unwrap_or(0);
+        &self.items[idx]
     }
 
-    /// Return the items that intersect the given `text_range`.
-    ///
-    /// Returns the expanded range around the items and the items.
-    pub fn slice(&self, text_range: Range) -> (Range, &[Item<'a>]) {
-        let mut cursor = 0;
-        let mut start = 0;
-        let mut end = 0;
-        let mut expanded = text_range.clone();
-
-        for (i, item) in self.items.iter().enumerate() {
-            if cursor <= text_range.start {
-                start = i;
-                expanded.start = cursor;
-            }
-
-            let len = item.textual_len();
-            if cursor < text_range.end || cursor + len <= text_range.end {
-                end = i + 1;
-                expanded.end = cursor + len;
-            } else {
-                break;
-            }
-
-            cursor += len;
-        }
-
-        (expanded, &self.items[start..end])
+    /// Iterate over the items that intersect the given `sliced` range.
+    pub fn slice(&self, sliced: Range) -> impl Iterator<Item = &(Range, Item<'a>)> {
+        let start = self.indices.get(sliced.start).copied().unwrap_or(0);
+        self.items[start..].iter().take_while(move |(range, _)| {
+            range.start < sliced.end || range.end <= sliced.end
+        })
     }
 }
 
@@ -99,42 +79,57 @@ pub fn prepare<'a>(
     spans: SpanMapper,
     styles: StyleChain<'a>,
 ) -> SourceResult<Preparation<'a>> {
-    let bidi = BidiInfo::new(
-        text,
-        match TextElem::dir_in(styles) {
-            Dir::LTR => Some(BidiLevel::ltr()),
-            Dir::RTL => Some(BidiLevel::rtl()),
-            _ => None,
-        },
-    );
+    let dir = TextElem::dir_in(styles);
+    let default_level = match dir {
+        Dir::RTL => BidiLevel::rtl(),
+        _ => BidiLevel::ltr(),
+    };
+
+    let bidi = BidiInfo::new(text, Some(default_level));
+    let is_bidi = bidi
+        .levels
+        .iter()
+        .any(|level| level.is_ltr() != default_level.is_ltr());
 
     let mut cursor = 0;
     let mut items = Vec::with_capacity(segments.len());
 
     // Shape the text to finalize the items.
     for segment in segments {
-        let end = cursor + segment.textual_len();
+        let len = segment.textual_len();
+        let end = cursor + len;
+        let range = cursor..end;
+
         match segment {
             Segment::Text(_, styles) => {
-                shape_range(&mut items, engine, &bidi, cursor..end, &spans, styles);
+                shape_range(&mut items, engine, text, &bidi, range, styles);
             }
-            Segment::Item(item) => items.push(item),
+            Segment::Item(item) => items.push((range, item)),
         }
 
         cursor = end;
     }
 
+    // Build the mapping from byte to item indices.
+    let mut indices = Vec::with_capacity(text.len());
+    for (i, (range, _)) in items.iter().enumerate() {
+        indices.extend(range.clone().map(|_| i));
+    }
+
     let cjk_latin_spacing = TextElem::cjk_latin_spacing_in(styles).is_auto();
     if cjk_latin_spacing {
         add_cjk_latin_spacing(&mut items);
     }
 
     Ok(Preparation {
-        bidi,
+        text,
+        bidi: is_bidi.then_some(bidi),
         items,
+        indices,
         spans,
         hyphenate: children.shared_get(styles, TextElem::hyphenate_in),
         costs: TextElem::costs_in(styles),
+        dir,
         lang: children.shared_get(styles, TextElem::lang_in),
         align: AlignElem::alignment_in(styles).resolve(styles).x,
         justify: ParElem::justify_in(styles),
@@ -150,10 +145,14 @@ pub fn prepare<'a>(
 /// Add some spacing between Han characters and western characters. See
 /// Requirements for Chinese Text Layout, Section 3.2.2 Mixed Text Composition
 /// in Horizontal Written Mode
-fn add_cjk_latin_spacing(items: &mut [Item]) {
-    let mut items = items.iter_mut().filter(|x| !matches!(x, Item::Tag(_))).peekable();
+fn add_cjk_latin_spacing(items: &mut [(Range, Item)]) {
+    let mut items = items
+        .iter_mut()
+        .filter(|(_, x)| !matches!(x, Item::Tag(_)))
+        .peekable();
+
     let mut prev: Option<&ShapedGlyph> = None;
-    while let Some(item) = items.next() {
+    while let Some((_, item)) = items.next() {
         let Some(text) = item.text_mut() else {
             prev = None;
             continue;
@@ -168,7 +167,7 @@ fn add_cjk_latin_spacing(items: &mut [Item]) {
             let next = glyphs.peek().map(|n| n as _).or_else(|| {
                 items
                     .peek()
-                    .and_then(|i| i.text())
+                    .and_then(|(_, i)| i.text())
                     .and_then(|shaped| shaped.glyphs.first())
             });
 
diff --git a/crates/typst/src/layout/inline/shaping.rs b/crates/typst/src/layout/inline/shaping.rs
index 44b65391..43dc351a 100644
--- a/crates/typst/src/layout/inline/shaping.rs
+++ b/crates/typst/src/layout/inline/shaping.rs
@@ -14,7 +14,6 @@ use super::{Item, Range, SpanMapper};
 use crate::engine::Engine;
 use crate::foundations::{Smart, StyleChain};
 use crate::layout::{Abs, Dir, Em, Frame, FrameItem, Point, Size};
-use crate::syntax::Span;
 use crate::text::{
     decorate, families, features, variant, Font, FontVariant, Glyph, Lang, Region,
     TextElem, TextItem,
@@ -27,6 +26,7 @@ use crate::World;
 /// This type contains owned or borrowed shaped text runs, which can be
 /// measured, used to reshape substrings more quickly and converted into a
 /// frame.
+#[derive(Clone)]
 pub struct ShapedText<'a> {
     /// The start of the text in the full paragraph.
     pub base: usize,
@@ -80,8 +80,6 @@ pub struct ShapedGlyph {
     pub safe_to_break: bool,
     /// The first char in this glyph's cluster.
     pub c: char,
-    /// The source code location of the glyph and its byte offset within it.
-    pub span: (Span, u16),
     /// Whether this glyph is justifiable for CJK scripts.
     pub is_justifiable: bool,
     /// The script of the glyph.
@@ -214,6 +212,7 @@ impl<'a> ShapedText<'a> {
     pub fn build(
         &self,
         engine: &Engine,
+        spans: &SpanMapper,
         justification_ratio: f64,
         extra_justification: Abs,
     ) -> Frame {
@@ -268,7 +267,7 @@ impl<'a> ShapedText<'a> {
                     // We may not be able to reach the offset completely if
                     // it exceeds u16, but better to have a roughly correct
                     // span offset than nothing.
-                    let mut span = shaped.span;
+                    let mut span = spans.span_at(shaped.range.start);
                     span.1 = span.1.saturating_add(span_offset.saturating_as());
 
                     // |<---- a Glyph ---->|
@@ -331,7 +330,7 @@ impl<'a> ShapedText<'a> {
     }
 
     /// Measure the top and bottom extent of this text.
-    fn measure(&self, engine: &Engine) -> (Abs, Abs) {
+    pub fn measure(&self, engine: &Engine) -> (Abs, Abs) {
         let mut top = Abs::zero();
         let mut bottom = Abs::zero();
 
@@ -409,12 +408,7 @@ impl<'a> ShapedText<'a> {
     /// shaping process if possible.
     ///
     /// The text `range` is relative to the whole paragraph.
-    pub fn reshape(
-        &'a self,
-        engine: &Engine,
-        spans: &SpanMapper,
-        text_range: Range,
-    ) -> ShapedText<'a> {
+    pub fn reshape(&'a self, engine: &Engine, text_range: Range) -> ShapedText<'a> {
         let text = &self.text[text_range.start - self.base..text_range.end - self.base];
         if let Some(glyphs) = self.slice_safe_to_break(text_range.clone()) {
             #[cfg(debug_assertions)]
@@ -436,7 +430,6 @@ impl<'a> ShapedText<'a> {
                 engine,
                 text_range.start,
                 text,
-                spans,
                 self.styles,
                 self.dir,
                 self.lang,
@@ -445,6 +438,16 @@ impl<'a> ShapedText<'a> {
         }
     }
 
+    /// Derive an empty text run with the same properties as this one.
+    pub fn empty(&self) -> Self {
+        Self {
+            text: "",
+            width: Abs::zero(),
+            glyphs: Cow::Borrowed(&[]),
+            ..*self
+        }
+    }
+
     /// Push a hyphen to end of the text.
     pub fn push_hyphen(&mut self, engine: &Engine, fallback: bool) {
         self.insert_hyphen(engine, fallback, Side::Right)
@@ -493,7 +496,6 @@ impl<'a> ShapedText<'a> {
                 range,
                 safe_to_break: true,
                 c: '-',
-                span: (Span::detached(), 0),
                 is_justifiable: false,
                 script: Script::Common,
             };
@@ -592,11 +594,11 @@ impl Debug for ShapedText<'_> {
 /// Group a range of text by BiDi level and script, shape the runs and generate
 /// items for them.
 pub fn shape_range<'a>(
-    items: &mut Vec<Item<'a>>,
+    items: &mut Vec<(Range, Item<'a>)>,
     engine: &Engine,
+    text: &'a str,
     bidi: &BidiInfo<'a>,
     range: Range,
-    spans: &SpanMapper,
     styles: StyleChain<'a>,
 ) {
     let script = TextElem::script_in(styles);
@@ -604,17 +606,9 @@ pub fn shape_range<'a>(
     let region = TextElem::region_in(styles);
     let mut process = |range: Range, level: BidiLevel| {
         let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL };
-        let shaped = shape(
-            engine,
-            range.start,
-            &bidi.text[range],
-            spans,
-            styles,
-            dir,
-            lang,
-            region,
-        );
-        items.push(Item::Text(shaped));
+        let shaped =
+            shape(engine, range.start, &text[range.clone()], styles, dir, lang, region);
+        items.push((range, Item::Text(shaped)));
     };
 
     let mut prev_level = BidiLevel::ltr();
@@ -625,14 +619,14 @@ pub fn shape_range<'a>(
     // set (rather than inferred from the glyphs), we keep the script at an
     // unchanging `Script::Unknown` so that only level changes cause breaks.
     for i in range.clone() {
-        if !bidi.text.is_char_boundary(i) {
+        if !text.is_char_boundary(i) {
             continue;
         }
 
         let level = bidi.levels[i];
         let curr_script = match script {
             Smart::Auto => {
-                bidi.text[i..].chars().next().map_or(Script::Unknown, |c| c.script())
+                text[i..].chars().next().map_or(Script::Unknown, |c| c.script())
             }
             Smart::Custom(_) => Script::Unknown,
         };
@@ -668,7 +662,6 @@ fn shape<'a>(
     engine: &Engine,
     base: usize,
     text: &'a str,
-    spans: &SpanMapper,
     styles: StyleChain<'a>,
     dir: Dir,
     lang: Lang,
@@ -677,7 +670,6 @@ fn shape<'a>(
     let size = TextElem::size_in(styles);
     let mut ctx = ShapingContext {
         engine,
-        spans,
         size,
         glyphs: vec![],
         used: vec![],
@@ -717,7 +709,6 @@ fn shape<'a>(
 /// Holds shaping results and metadata common to all shaped segments.
 struct ShapingContext<'a, 'v> {
     engine: &'a Engine<'v>,
-    spans: &'a SpanMapper,
     glyphs: Vec<ShapedGlyph>,
     used: Vec<Font>,
     styles: StyleChain<'a>,
@@ -830,7 +821,6 @@ fn shape_segment<'a>(
                 range: start..end,
                 safe_to_break: !info.unsafe_to_break(),
                 c,
-                span: ctx.spans.span_at(start),
                 is_justifiable: is_justifiable(
                     c,
                     script,
@@ -921,7 +911,6 @@ fn shape_tofus(ctx: &mut ShapingContext, base: usize, text: &str, font: Font) {
             range: start..end,
             safe_to_break: true,
             c,
-            span: ctx.spans.span_at(start),
             is_justifiable: is_justifiable(
                 c,
                 script,
diff --git a/crates/typst/src/model/par.rs b/crates/typst/src/model/par.rs
index 7f65a00f..2110995f 100644
--- a/crates/typst/src/model/par.rs
+++ b/crates/typst/src/model/par.rs
@@ -18,9 +18,9 @@ use crate::realize::StyleVec;
 ///
 /// # Example
 /// ```example
-/// #show par: set block(spacing: 0.65em)
 /// #set par(
 ///   first-line-indent: 1em,
+///   spacing: 0.65em,
 ///   justify: true,
 /// )
 ///
@@ -115,8 +115,7 @@ pub struct ParElem {
     /// By typographic convention, paragraph breaks are indicated either by some
     /// space between paragraphs or by indented first lines. Consider reducing
     /// the [paragraph spacing]($block.spacing) to the [`leading`]($par.leading)
-    /// when using this property (e.g. using
-    /// `[#show par: set block(spacing: 0.65em)]`).
+    /// when using this property (e.g. using `[#set par(spacing: 0.65em)]`).
     #[ghost]
     pub first_line_indent: Length,
 
diff --git a/docs/guides/guide-for-latex-users.md b/docs/guides/guide-for-latex-users.md
index 1f3caef9..8c3b5601 100644
--- a/docs/guides/guide-for-latex-users.md
+++ b/docs/guides/guide-for-latex-users.md
@@ -593,10 +593,9 @@ The example below
 
 ```typ
 #set page(margin: 1.75in)
-#set par(leading: 0.55em, first-line-indent: 1.8em, justify: true)
+#set par(leading: 0.55em, spacing: 0.55em, first-line-indent: 1.8em, justify: true)
 #set text(font: "New Computer Modern")
 #show raw: set text(font: "New Computer Modern Mono")
-#show par: set block(spacing: 0.55em)
 #show heading: set block(above: 1.4em, below: 1em)
 ```
 
diff --git a/docs/reference/syntax.md b/docs/reference/syntax.md
index 9a7dc373..b63d1776 100644
--- a/docs/reference/syntax.md
+++ b/docs/reference/syntax.md
@@ -120,7 +120,7 @@ a table listing all syntax that is available in code mode:
 | Named function           | `{let f(x) = 2 * x}`          | [Function]($function)              |
 | Set rule                 | `{set text(14pt)}`            | [Styling]($styling/#set-rules)     |
 | Set-if rule              | `{set text(..) if .. }`       | [Styling]($styling/#set-rules)     |
-| Show-set rule            | `{show par: set block(..)}`   | [Styling]($styling/#show-rules)    |
+| Show-set rule            | `{show heading: set block(..)}` | [Styling]($styling/#show-rules)  |
 | Show rule with function  | `{show raw: it => {..}}`      | [Styling]($styling/#show-rules)    |
 | Show-everything rule     | `{show: columns.with(2)}`     | [Styling]($styling/#show-rules)    |
 | Context expression       | `{context text.lang}`         | [Context]($context)                |
diff --git a/tests/ref/bidi-whitespace-reset.png b/tests/ref/bidi-whitespace-reset.png
index 7d64012f..e9973798 100644
--- a/tests/ref/bidi-whitespace-reset.png
+++ b/tests/ref/bidi-whitespace-reset.png
diff --git a/tests/ref/context-compatibility-locate.png b/tests/ref/context-compatibility-locate.png
index 4c8944ab..32516c00 100644
--- a/tests/ref/context-compatibility-locate.png
+++ b/tests/ref/context-compatibility-locate.png
diff --git a/tests/ref/eval-mode.png b/tests/ref/eval-mode.png
index 5edfa62d..94357ff4 100644
--- a/tests/ref/eval-mode.png
+++ b/tests/ref/eval-mode.png
diff --git a/tests/ref/issue-3601-empty-raw.png b/tests/ref/issue-3601-empty-raw.png
new file mode 100644
index 00000000..be5ea8fc
--- /dev/null
+++ b/tests/ref/issue-3601-empty-raw.png
diff --git a/tests/ref/issue-4278-par-trim-before-equation.png b/tests/ref/issue-4278-par-trim-before-equation.png
new file mode 100644
index 00000000..b0553719
--- /dev/null
+++ b/tests/ref/issue-4278-par-trim-before-equation.png
diff --git a/tests/ref/justify-basically-empty.png b/tests/ref/justify-basically-empty.png
new file mode 100644
index 00000000..3d1b50c1
--- /dev/null
+++ b/tests/ref/justify-basically-empty.png
diff --git a/tests/ref/par-metadata-after-trimmed-space.png b/tests/ref/par-metadata-after-trimmed-space.png
new file mode 100644
index 00000000..b0de98ea
--- /dev/null
+++ b/tests/ref/par-metadata-after-trimmed-space.png
diff --git a/tests/ref/par-trailing-whitespace.png b/tests/ref/par-trailing-whitespace.png
new file mode 100644
index 00000000..10c22da5
--- /dev/null
+++ b/tests/ref/par-trailing-whitespace.png
diff --git a/tests/suite/foundations/version.typ b/tests/suite/foundations/version.typ
index bf2cadb1..a4be7f13 100644
--- a/tests/suite/foundations/version.typ
+++ b/tests/suite/foundations/version.typ
@@ -4,7 +4,7 @@
 // Test version constructor.
 
 // Empty.
-#version()
+#test(array(version()), ())
 
 // Plain.
 #test(version(1, 2).major, 1)
diff --git a/tests/suite/layout/spacing.typ b/tests/suite/layout/spacing.typ
index dd0fced5..c32e6c8f 100644
--- a/tests/suite/layout/spacing.typ
+++ b/tests/suite/layout/spacing.typ
@@ -47,14 +47,14 @@ Totally #h() ignored
 Hello #h(2cm, weak: true)
 
 --- issue-4087 ---
-// weak space at the end of the line would be removed.
+// Weak space at the end of the line is removed.
 This is the first line #h(2cm, weak: true) A new line
 
-// non-weak space would be consume a specified width and push next line.
+// Non-weak space consumes a specified width and pushes to next line.
 This is the first line #h(2cm, weak: false) A new line
 
-// similarly weak space at the beginning of the line would be removed.
-This is the first line\ #h(2cm, weak: true) A new line
+// Similarly, weak space at the beginning of the line is removed.
+This is the first line \ #h(2cm, weak: true) A new line
 
-// non-spacing, on the other hand, is not removed.
-This is the first line\ #h(2cm, weak: false) A new line
+// Non-weak-spacing, on the other hand, is not removed.
+This is the first line \ #h(2cm, weak: false) A new line
diff --git a/tests/suite/model/par.typ b/tests/suite/model/par.typ
index f07c4c6c..80bc9f3e 100644
--- a/tests/suite/model/par.typ
+++ b/tests/suite/model/par.typ
@@ -78,3 +78,22 @@ Welcome \ here. Does this work well?
 #set text(dir: rtl)
 لآن وقد أظلم الليل وبدأت النجوم
 تنضخ وجه الطبيعة التي أعْيَتْ من طول ما انبعثت في النهار
+
+--- par-trailing-whitespace ---
+// Ensure that trailing whitespace layouts as intended.
+#box(fill: aqua, " ")
+
+--- par-empty-metadata ---
+// Check that metadata still works in a zero length paragraph.
+#block(height: 0pt)[#""#metadata(false)<hi>]
+#context test(query(<hi>).first().value, false)
+
+--- par-metadata-after-trimmed-space ---
+// Ensure that metadata doesn't prevent trailing spaces from being trimmed.
+#set par(justify: true, linebreaks: "simple")
+#set text(hyphenate: false)
+Lorem ipsum dolor #metadata(none) nonumy eirmod tempor.
+
+--- issue-4278-par-trim-before-equation ---
+#set par(justify: true)
+#lorem(6) aa $a = c + b$
author	Laurenz <laurmaedje@gmail.com>	2024-07-04 12:57:40 +0200
committer	GitHub <noreply@github.com>	2024-07-04 10:57:40 +0000
commit	0ef672c347f368325313c8bccc4f70e3f1016b0a (patch)
tree	8971d6b305d805b42b55e1e85613e4f4c5ba175d
parent	75246f930b9041c206a8a3c87e6db03bfc9111fd (diff)