summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2024-07-04 12:57:40 +0200
committerGitHub <noreply@github.com>2024-07-04 10:57:40 +0000
commit0ef672c347f368325313c8bccc4f70e3f1016b0a (patch)
tree8971d6b305d805b42b55e1e85613e4f4c5ba175d
parent75246f930b9041c206a8a3c87e6db03bfc9111fd (diff)
Refactor line building (#4497)
-rw-r--r--Cargo.lock5
-rw-r--r--Cargo.toml4
-rw-r--r--crates/typst/src/introspection/mod.rs2
-rw-r--r--crates/typst/src/layout/inline/collect.rs2
-rw-r--r--crates/typst/src/layout/inline/line.rs771
-rw-r--r--crates/typst/src/layout/inline/linebreak.rs141
-rw-r--r--crates/typst/src/layout/inline/prepare.rs111
-rw-r--r--crates/typst/src/layout/inline/shaping.rs55
-rw-r--r--crates/typst/src/model/par.rs5
-rw-r--r--docs/guides/guide-for-latex-users.md3
-rw-r--r--docs/reference/syntax.md2
-rw-r--r--tests/ref/bidi-whitespace-reset.pngbin378 -> 361 bytes
-rw-r--r--tests/ref/context-compatibility-locate.pngbin1514 -> 1530 bytes
-rw-r--r--tests/ref/eval-mode.pngbin850 -> 881 bytes
-rw-r--r--tests/ref/issue-3601-empty-raw.pngbin0 -> 74 bytes
-rw-r--r--tests/ref/issue-4278-par-trim-before-equation.pngbin0 -> 1138 bytes
-rw-r--r--tests/ref/justify-basically-empty.pngbin0 -> 74 bytes
-rw-r--r--tests/ref/par-metadata-after-trimmed-space.pngbin0 -> 1030 bytes
-rw-r--r--tests/ref/par-trailing-whitespace.pngbin0 -> 91 bytes
-rw-r--r--tests/suite/foundations/version.typ2
-rw-r--r--tests/suite/layout/spacing.typ12
-rw-r--r--tests/suite/model/par.typ19
22 files changed, 634 insertions, 500 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 3e99ea81..14dd36f1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2604,8 +2604,7 @@ dependencies = [
[[package]]
name = "typst-assets"
version = "0.11.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f13f85360328da54847dd7fefaf272dfa5b6d1fdeb53f32938924c39bf5b2c6c"
+source = "git+https://github.com/typst/typst-assets?rev=4ee794c#4ee794cf8fb98eb67194e757c9820ab8562d853b"
[[package]]
name = "typst-cli"
@@ -2656,7 +2655,7 @@ dependencies = [
[[package]]
name = "typst-dev-assets"
version = "0.11.0"
-source = "git+https://github.com/typst/typst-dev-assets?rev=48a924d9de82b631bc775124a69384c8d860db04#48a924d9de82b631bc775124a69384c8d860db04"
+source = "git+https://github.com/typst/typst-dev-assets?rev=48a924d#48a924d9de82b631bc775124a69384c8d860db04"
[[package]]
name = "typst-docs"
diff --git a/Cargo.toml b/Cargo.toml
index ee50b666..1b5bf0f4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -26,8 +26,8 @@ typst-svg = { path = "crates/typst-svg", version = "0.11.0" }
typst-syntax = { path = "crates/typst-syntax", version = "0.11.0" }
typst-timing = { path = "crates/typst-timing", version = "0.11.0" }
typst-utils = { path = "crates/typst-utils", version = "0.11.0" }
-typst-assets = "0.11.0"
-typst-dev-assets = { git = "https://github.com/typst/typst-dev-assets", rev = "48a924d9de82b631bc775124a69384c8d860db04" }
+typst-assets = { git = "https://github.com/typst/typst-assets", rev = "4ee794c" }
+typst-dev-assets = { git = "https://github.com/typst/typst-dev-assets", rev = "48a924d" }
az = "1.2"
base64 = "0.22"
bitflags = { version = "2", features = ["serde"] }
diff --git a/crates/typst/src/introspection/mod.rs b/crates/typst/src/introspection/mod.rs
index c9dba244..6c982afb 100644
--- a/crates/typst/src/introspection/mod.rs
+++ b/crates/typst/src/introspection/mod.rs
@@ -116,6 +116,6 @@ impl Tag {
impl Debug for Tag {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
- write!(f, "Tag({:?})", self.elem)
+ write!(f, "Tag({:?})", self.elem.elem().name())
}
}
diff --git a/crates/typst/src/layout/inline/collect.rs b/crates/typst/src/layout/inline/collect.rs
index 79d0d59f..f1607460 100644
--- a/crates/typst/src/layout/inline/collect.rs
+++ b/crates/typst/src/layout/inline/collect.rs
@@ -79,7 +79,7 @@ impl<'a> Item<'a> {
}
/// The natural layouted width of the item.
- pub fn width(&self) -> Abs {
+ pub fn natural_width(&self) -> Abs {
match self {
Self::Text(shaped) => shaped.width,
Self::Absolute(v, _) => *v,
diff --git a/crates/typst/src/layout/inline/line.rs b/crates/typst/src/layout/inline/line.rs
index 232a1c6b..12162ab1 100644
--- a/crates/typst/src/layout/inline/line.rs
+++ b/crates/typst/src/layout/inline/line.rs
@@ -1,11 +1,18 @@
-use unicode_bidi::BidiInfo;
+use std::fmt::{self, Debug, Formatter};
+use std::ops::{Deref, DerefMut};
use super::*;
use crate::engine::Engine;
-use crate::layout::{Abs, Em, Fr, Frame, FrameItem, Point};
+use crate::layout::{Abs, Dir, Em, Fr, Frame, FrameItem, Point};
use crate::text::{Lang, TextElem};
use crate::utils::Numeric;
+const SHY: char = '\u{ad}';
+const HYPHEN: char = '-';
+const EN_DASH: char = '–';
+const EM_DASH: char = '—';
+const LINE_SEPARATOR: char = '\u{2028}'; // We use LS to distinguish justified breaks.
+
/// A layouted line, consisting of a sequence of layouted paragraph items that
/// are mostly borrowed from the preparation phase. This type enables you to
/// measure the size of a line in a range before committing to building the
@@ -16,20 +23,9 @@ use crate::utils::Numeric;
/// line, respectively. But even those can partially reuse previous results when
/// the break index is safe-to-break per rustybuzz.
pub struct Line<'a> {
- /// Bidi information about the paragraph.
- pub bidi: &'a BidiInfo<'a>,
- /// The trimmed range the line spans in the paragraph.
- pub trimmed: Range,
- /// The untrimmed end where the line ends.
- pub end: usize,
- /// A reshaped text item if the line sliced up a text item at the start.
- pub first: Option<Item<'a>>,
- /// Inner items which don't need to be reprocessed.
- pub inner: &'a [Item<'a>],
- /// A reshaped text item if the line sliced up a text item at the end. If
- /// there is only one text item, this takes precedence over `first`.
- pub last: Option<Item<'a>>,
- /// The width of the line.
+ /// The items the line is made of.
+ pub items: Items<'a>,
+ /// The exact natural width of the line.
pub width: Abs,
/// Whether the line should be justified.
pub justify: bool,
@@ -39,45 +35,27 @@ pub struct Line<'a> {
}
impl<'a> Line<'a> {
- /// Iterate over the line's items.
- pub fn items(&self) -> impl Iterator<Item = &Item<'a>> {
- self.first.iter().chain(self.inner).chain(&self.last)
- }
-
- /// Return items that intersect the given `text_range`.
- pub fn slice(&self, text_range: Range) -> impl Iterator<Item = &Item<'a>> {
- let mut cursor = self.trimmed.start;
- let mut start = 0;
- let mut end = 0;
-
- for (i, item) in self.items().enumerate() {
- if cursor <= text_range.start {
- start = i;
- }
-
- let len = item.textual_len();
- if cursor < text_range.end || cursor + len <= text_range.end {
- end = i + 1;
- } else {
- break;
- }
-
- cursor += len;
+ /// Create an empty line.
+ pub fn empty() -> Self {
+ Self {
+ items: Items::new(),
+ width: Abs::zero(),
+ justify: false,
+ dash: None,
}
-
- self.items().skip(start).take(end - start)
}
/// How many glyphs are in the text where we can insert additional
/// space when encountering underfull lines.
pub fn justifiables(&self) -> usize {
let mut count = 0;
- for shaped in self.items().filter_map(Item::text) {
+ for shaped in self.items.iter().filter_map(Item::text) {
count += shaped.justifiables();
}
+
// CJK character at line end should not be adjusted.
if self
- .items()
+ .items
.last()
.and_then(Item::text)
.map(|s| s.cjk_justifiable_at_last())
@@ -89,19 +67,27 @@ impl<'a> Line<'a> {
count
}
- /// How much can the line stretch
+ /// How much the line can stretch.
pub fn stretchability(&self) -> Abs {
- self.items().filter_map(Item::text).map(|s| s.stretchability()).sum()
+ self.items
+ .iter()
+ .filter_map(Item::text)
+ .map(|s| s.stretchability())
+ .sum()
}
- /// How much can the line shrink
+ /// How much the line can shrink.
pub fn shrinkability(&self) -> Abs {
- self.items().filter_map(Item::text).map(|s| s.shrinkability()).sum()
+ self.items
+ .iter()
+ .filter_map(Item::text)
+ .map(|s| s.shrinkability())
+ .sum()
}
/// Whether the line has items with negative width.
pub fn has_negative_width_items(&self) -> bool {
- self.items().any(|item| match item {
+ self.items.iter().any(|item| match item {
Item::Absolute(amount, _) => *amount < Abs::zero(),
Item::Frame(frame, _) => frame.width() < Abs::zero(),
_ => false,
@@ -110,7 +96,8 @@ impl<'a> Line<'a> {
/// The sum of fractions in the line.
pub fn fr(&self) -> Fr {
- self.items()
+ self.items
+ .iter()
.filter_map(|item| match item {
Item::Fractional(fr, _) => Some(*fr),
_ => None,
@@ -122,234 +109,299 @@ impl<'a> Line<'a> {
/// A dash at the end of a line.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum Dash {
- /// A hyphen added to break a word.
- SoftHyphen,
- /// Regular hyphen, present in a compound word, e.g. beija-flor.
- HardHyphen,
- /// An em dash.
- Long,
- /// An en dash.
- Short,
+ /// A soft hyphen added to break a word.
+ Soft,
+ /// A regular hyphen, present in a compound word, e.g. beija-flor.
+ Hard,
+ /// Another kind of dash. Only relevant for cost computation.
+ Other,
}
/// Create a line which spans the given range.
pub fn line<'a>(
engine: &Engine,
p: &'a Preparation,
- mut range: Range,
+ range: Range,
breakpoint: Breakpoint,
pred: Option<&Line>,
) -> Line<'a> {
- let end = range.end;
- let mut justify =
- p.justify && end < p.bidi.text.len() && breakpoint != Breakpoint::Mandatory;
+ // The line's full text.
+ let full = &p.text[range.clone()];
+
+ // Whether the line is justified.
+ let justify = full.ends_with(LINE_SEPARATOR)
+ || (p.justify && breakpoint != Breakpoint::Mandatory);
+
+ // Process dashes.
+ let dash = if breakpoint == Breakpoint::Hyphen || full.ends_with(SHY) {
+ Some(Dash::Soft)
+ } else if full.ends_with(HYPHEN) {
+ Some(Dash::Hard)
+ } else if full.ends_with([EN_DASH, EM_DASH]) {
+ Some(Dash::Other)
+ } else {
+ None
+ };
+
+ // Trim the line at the end, if necessary for this breakpoint.
+ let trim = range.start + breakpoint.trim(full).len();
+
+ // Collect the items for the line.
+ let mut items = collect_items(engine, p, range, trim);
+
+ // Add a hyphen at the line start, if a previous dash should be repeated.
+ if pred.map_or(false, |pred| should_repeat_hyphen(pred, full)) {
+ if let Some(shaped) = items.first_text_mut() {
+ shaped.prepend_hyphen(engine, p.fallback);
+ }
+ }
- if range.is_empty() {
- return Line {
- bidi: &p.bidi,
- end,
- trimmed: range,
- first: None,
- inner: &[],
- last: None,
- width: Abs::zero(),
- justify,
- dash: None,
- };
+ // Add a hyphen at the line end, if we ended on a soft hyphen.
+ if dash == Some(Dash::Soft) {
+ if let Some(shaped) = items.last_text_mut() {
+ shaped.push_hyphen(engine, p.fallback);
+ }
}
- let prepend_hyphen = pred.map_or(false, should_insert_hyphen);
-
- // Slice out the relevant items.
- let (mut expanded, mut inner) = p.slice(range.clone());
- let mut width = Abs::zero();
-
- // Weak space (`Absolute(_, true)`) is removed at the end of the line
- while let Some((Item::Absolute(_, true), before)) = inner.split_last() {
- inner = before;
- range.end -= 1;
- expanded.end -= 1;
- }
- // Weak space (`Absolute(_, true)`) is removed at the beginning of the line
- while let Some((Item::Absolute(_, true), after)) = inner.split_first() {
- inner = after;
- range.start += 1;
- expanded.end += 1;
- }
-
- // Reshape the last item if it's split in half or hyphenated.
- let mut last = None;
- let mut dash = None;
- if let Some((Item::Text(shaped), before)) = inner.split_last() {
- // Compute the range we want to shape, trimming whitespace at the
- // end of the line.
- let base = expanded.end - shaped.text.len();
- let start = range.start.max(base);
- let text = &p.bidi.text[start..range.end];
- // U+200B ZERO WIDTH SPACE is used to provide a line break opportunity,
- // we want to trim it too.
- let trimmed = text.trim_end().trim_end_matches('\u{200B}');
- range.end = start + trimmed.len();
-
- // Deal with hyphens, dashes and justification.
- let shy = trimmed.ends_with('\u{ad}');
- let hyphen = breakpoint == Breakpoint::Hyphen;
- dash = if hyphen || shy {
- Some(Dash::SoftHyphen)
- } else if trimmed.ends_with('-') {
- Some(Dash::HardHyphen)
- } else if trimmed.ends_with('–') {
- Some(Dash::Short)
- } else if trimmed.ends_with('—') {
- Some(Dash::Long)
- } else {
- None
- };
- justify |= text.ends_with('\u{2028}');
+ // Deal with CJ characters at line boundaries.
+ adjust_cj_at_line_boundaries(p, full, &mut items);
- // Deal with CJK punctuation at line ends.
- let gb_style = cjk_punct_style(shaped.lang, shaped.region);
- let maybe_adjust_last_glyph = trimmed.ends_with(END_PUNCT_PAT)
- || (p.cjk_latin_spacing && trimmed.ends_with(is_of_cj_script));
+ // Compute the line's width.
+ let width = items.iter().map(Item::natural_width).sum();
- // Usually, we don't want to shape an empty string because:
- // - We don't want the height of trimmed whitespace in a different font
- // to be considered for the line height.
- // - Even if it's in the same font, its unnecessary.
- //
- // There is one exception though. When the whole line is empty, we need
- // the shaped empty string to make the line the appropriate height. That
- // is the case exactly if the string is empty and there are no other
- // items in the line.
- if hyphen
- || start + shaped.text.len() > range.end
- || maybe_adjust_last_glyph
- || prepend_hyphen
- {
- if hyphen || start < range.end || before.is_empty() {
- let mut reshaped = shaped.reshape(engine, &p.spans, start..range.end);
- if hyphen || shy {
- reshaped.push_hyphen(engine, p.fallback);
- }
+ Line { items, width, justify, dash }
+}
- if let Some(last_glyph) = reshaped.glyphs.last() {
- if last_glyph.is_cjk_left_aligned_punctuation(gb_style) {
- // If the last glyph is a CJK punctuation, we want to
- // shrink it. See Requirements for Chinese Text Layout,
- // Section 3.1.6.3 Compression of punctuation marks at
- // line start or line end
- let shrink_amount = last_glyph.shrinkability().1;
- let punct = reshaped.glyphs.to_mut().last_mut().unwrap();
- punct.shrink_right(shrink_amount);
- reshaped.width -= shrink_amount.at(reshaped.size);
- } else if p.cjk_latin_spacing
- && last_glyph.is_cj_script()
- && (last_glyph.x_advance - last_glyph.x_offset) > Em::one()
- {
- // If the last glyph is a CJK character adjusted by
- // [`add_cjk_latin_spacing`], restore the original
- // width.
- let shrink_amount =
- last_glyph.x_advance - last_glyph.x_offset - Em::one();
- let glyph = reshaped.glyphs.to_mut().last_mut().unwrap();
- glyph.x_advance -= shrink_amount;
- glyph.adjustability.shrinkability.1 = Em::zero();
- reshaped.width -= shrink_amount.at(reshaped.size);
- }
- }
+/// Collects / reshapes all items for the line with the given `range`.
+///
+/// The `trim` defines an end position to which text items are trimmed. For
+/// example, the `range` may span "hello\n", but the `trim` specifies that the
+/// linebreak is trimmed.
+///
+/// We do not factor the `trim` diredctly into the `range` because we still want
+/// to keep non-text items after the trim (e.g. tags).
+fn collect_items<'a>(
+ engine: &Engine,
+ p: &'a Preparation,
+ range: Range,
+ trim: usize,
+) -> Items<'a> {
+ let mut items = Items::new();
+ let mut fallback = None;
+
+ // Collect the items for each consecutively ordered run.
+ reorder(p, range.clone(), |subrange, rtl| {
+ let from = items.len();
+ collect_range(engine, p, subrange, trim, &mut items, &mut fallback);
+ if rtl {
+ items.reorder(from);
+ }
+ });
- width += reshaped.width;
- last = Some(Item::Text(reshaped));
- }
+ // Trim weak spacing at the start of the line.
+ let prefix = items
+ .iter()
+ .take_while(|item| matches!(item, Item::Absolute(_, true)))
+ .count();
+ if prefix > 0 {
+ items.drain(..prefix);
+ }
- inner = before;
+ // Trim weak spacing at the end of the line.
+ while matches!(items.last(), Some(Item::Absolute(_, true))) {
+ items.pop();
+ }
+
+ // Add fallback text to expand the line height, if necessary.
+ if !items.iter().any(|item| matches!(item, Item::Text(_))) {
+ if let Some(fallback) = fallback {
+ items.push(fallback);
}
}
- // Deal with CJ characters at line starts.
- let text = &p.bidi.text[range.start..end];
- let maybe_adjust_first_glyph = text.starts_with(BEGIN_PUNCT_PAT)
- || (p.cjk_latin_spacing && text.starts_with(is_of_cj_script));
+ items
+}
- // Reshape the start item if it's split in half.
- let mut first = None;
- if let Some((Item::Text(shaped), after)) = inner.split_first() {
- // Compute the range we want to shape.
- let base = expanded.start;
- let end = range.end.min(base + shaped.text.len());
+/// Calls `f` for the the BiDi-reordered ranges of a line.
+fn reorder<F>(p: &Preparation, range: Range, mut f: F)
+where
+ F: FnMut(Range, bool),
+{
+ // If there is nothing bidirectional going on, skip reordering.
+ let Some(bidi) = &p.bidi else {
+ f(range, p.dir == Dir::RTL);
+ return;
+ };
+
+ // The bidi crate panics for empty lines.
+ if range.is_empty() {
+ f(range, p.dir == Dir::RTL);
+ return;
+ }
- // Reshape if necessary.
- if range.start + shaped.text.len() > end
- || maybe_adjust_first_glyph
- || prepend_hyphen
- {
- // If the range is empty, we don't want to push an empty text item.
- if range.start < end {
- let reshaped = shaped.reshape(engine, &p.spans, range.start..end);
- width += reshaped.width;
- first = Some(Item::Text(reshaped));
- }
+ // Find the paragraph that contains the line.
+ let para = bidi
+ .paragraphs
+ .iter()
+ .find(|para| para.range.contains(&range.start))
+ .unwrap();
- inner = after;
- }
+ // Compute the reordered ranges in visual order (left to right).
+ let (levels, runs) = bidi.visual_runs(para, range.clone());
+
+ // Call `f` for each run.
+ for run in runs {
+ let rtl = levels[run.start].is_rtl();
+ f(run, rtl)
}
+}
+
+/// Collects / reshapes all items for the given `subrange` with continous
+/// direction.
+fn collect_range<'a>(
+ engine: &Engine,
+ p: &'a Preparation,
+ range: Range,
+ trim: usize,
+ items: &mut Items<'a>,
+ fallback: &mut Option<ItemEntry<'a>>,
+) {
+ for (subrange, item) in p.slice(range.clone()) {
+ // All non-text items are just kept, they can't be split.
+ let Item::Text(shaped) = item else {
+ items.push(item);
+ continue;
+ };
- if prepend_hyphen {
- let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut);
- if let Some(reshaped) = reshaped {
- let width_before = reshaped.width;
- reshaped.prepend_hyphen(engine, p.fallback);
- width += reshaped.width - width_before;
+ // The intersection range of the item, the subrange, and the line's
+ // trimming.
+ let sliced =
+ range.start.max(subrange.start)..range.end.min(subrange.end).min(trim);
+
+ // Whether the item is split by the line.
+ let split = subrange.start < sliced.start || sliced.end < subrange.end;
+
+ if sliced.is_empty() {
+ // When there is no text, still keep this as a fallback item, which
+ // we can use to force a non-zero line-height when the line doesn't
+ // contain any other text.
+ *fallback = Some(ItemEntry::from(Item::Text(shaped.empty())));
+ } else if split {
+ // When the item is split in half, reshape it.
+ let reshaped = shaped.reshape(engine, sliced);
+ items.push(Item::Text(reshaped));
+ } else {
+ // When the item is fully contained, just keep it.
+ items.push(item);
}
}
+}
- if maybe_adjust_first_glyph {
- let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut);
- if let Some(reshaped) = reshaped {
- if let Some(first_glyph) = reshaped.glyphs.first() {
- if first_glyph.is_cjk_right_aligned_punctuation() {
- // If the first glyph is a CJK punctuation, we want to
- // shrink it.
- let shrink_amount = first_glyph.shrinkability().0;
- let glyph = reshaped.glyphs.to_mut().first_mut().unwrap();
- glyph.shrink_left(shrink_amount);
- let amount_abs = shrink_amount.at(reshaped.size);
- reshaped.width -= amount_abs;
- width -= amount_abs;
- } else if p.cjk_latin_spacing
- && first_glyph.is_cj_script()
- && first_glyph.x_offset > Em::zero()
- {
- // If the first glyph is a CJK character adjusted by
- // [`add_cjk_latin_spacing`], restore the original width.
- let shrink_amount = first_glyph.x_offset;
- let glyph = reshaped.glyphs.to_mut().first_mut().unwrap();
- glyph.x_advance -= shrink_amount;
- glyph.x_offset = Em::zero();
- glyph.adjustability.shrinkability.0 = Em::zero();
- let amount_abs = shrink_amount.at(reshaped.size);
- reshaped.width -= amount_abs;
- width -= amount_abs;
- }
- }
- }
+/// Add spacing around punctuation marks for CJ glyphs at line boundaries.
+///
+/// See Requirements for Chinese Text Layout, Section 3.1.6.3 Compression of
+/// punctuation marks at line start or line end.
+fn adjust_cj_at_line_boundaries(p: &Preparation, text: &str, items: &mut Items) {
+ if text.starts_with(BEGIN_PUNCT_PAT)
+ || (p.cjk_latin_spacing && text.starts_with(is_of_cj_script))
+ {
+ adjust_cj_at_line_start(p, items);
+ }
+
+ if text.ends_with(END_PUNCT_PAT)
+ || (p.cjk_latin_spacing && text.ends_with(is_of_cj_script))
+ {
+ adjust_cj_at_line_end(p, items);
+ }
+}
+
+/// Add spacing around punctuation marks for CJ glyphs at the line start.
+fn adjust_cj_at_line_start(p: &Preparation, items: &mut Items) {
+ let Some(shaped) = items.first_text_mut() else { return };
+ let Some(glyph) = shaped.glyphs.first() else { return };
+
+ if glyph.is_cjk_right_aligned_punctuation() {
+ // If the first glyph is a CJK punctuation, we want to
+ // shrink it.
+ let glyph = shaped.glyphs.to_mut().first_mut().unwrap();
+ let shrink = glyph.shrinkability().0;
+ glyph.shrink_left(shrink);
+ shaped.width -= shrink.at(shaped.size);
+ } else if p.cjk_latin_spacing && glyph.is_cj_script() && glyph.x_offset > Em::zero() {
+ // If the first glyph is a CJK character adjusted by
+ // [`add_cjk_latin_spacing`], restore the original width.
+ let glyph = shaped.glyphs.to_mut().first_mut().unwrap();
+ let shrink = glyph.x_offset;
+ glyph.x_advance -= shrink;
+ glyph.x_offset = Em::zero();
+ glyph.adjustability.shrinkability.0 = Em::zero();
+ shaped.width -= shrink.at(shaped.size);
+ }
+}
+
+/// Add spacing around punctuation marks for CJ glyphs at the line end.
+fn adjust_cj_at_line_end(p: &Preparation, items: &mut Items) {
+ let Some(shaped) = items.last_text_mut() else { return };
+ let Some(glyph) = shaped.glyphs.last() else { return };
+
+ // Deal with CJK punctuation at line ends.
+ let style = cjk_punct_style(shaped.lang, shaped.region);
+
+ if glyph.is_cjk_left_aligned_punctuation(style) {
+ // If the last glyph is a CJK punctuation, we want to
+ // shrink it.
+ let shrink = glyph.shrinkability().1;
+ let punct = shaped.glyphs.to_mut().last_mut().unwrap();
+ punct.shrink_right(shrink);
+ shaped.width -= shrink.at(shaped.size);
+ } else if p.cjk_latin_spacing
+ && glyph.is_cj_script()
+ && (glyph.x_advance - glyph.x_offset) > Em::one()
+ {
+ // If the last glyph is a CJK character adjusted by
+ // [`add_cjk_latin_spacing`], restore the original width.
+ let shrink = glyph.x_advance - glyph.x_offset - Em::one();
+ let glyph = shaped.glyphs.to_mut().last_mut().unwrap();
+ glyph.x_advance -= shrink;
+ glyph.adjustability.shrinkability.1 = Em::zero();
+ shaped.width -= shrink.at(shaped.size);
}
+}
- // Measure the inner items.
- for item in inner {
- width += item.width();
+/// Whether a hyphen should be inserted at the start of the next line.
+fn should_repeat_hyphen(pred_line: &Line, text: &str) -> bool {
+ // If the predecessor line does not end with a `Dash::Hard`, we shall
+ // not place a hyphen at the start of the next line.
+ if pred_line.dash != Some(Dash::Hard) {
+ return false;
}
- Line {
- bidi: &p.bidi,
- trimmed: range,
- end,
- first,
- inner,
- last,
- width,
- justify,
- dash,
+ // The hyphen should repeat only in the languages that require that feature.
+ // For more information see the discussion at https://github.com/typst/typst/issues/3235
+ let Some(Item::Text(shaped)) = pred_line.items.last() else { return false };
+
+ match shaped.lang {
+ // - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3
+ // - Czech: see https://prirucka.ujc.cas.cz/?id=164
+ // - Croatian: see http://pravopis.hr/pravilo/spojnica/68/
+ // - Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni
+ // - Portuguese: see https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf (Base XX)
+ // - Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/
+ Lang::LOWER_SORBIAN
+ | Lang::CZECH
+ | Lang::CROATIAN
+ | Lang::POLISH
+ | Lang::PORTUGUESE
+ | Lang::SLOVAK => true,
+
+ // In Spanish the hyphen is required only if the word next to hyphen is
+ // not capitalized. Otherwise, the hyphen must not be repeated.
+ //
+ // See § 4.1.1.1.2.e on the "Ortografía de la lengua española"
+ // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea
+ Lang::SPANISH => text.chars().next().map_or(false, |c| !c.is_uppercase()),
+
+ _ => false,
}
}
@@ -365,18 +417,19 @@ pub fn commit(
let mut remaining = width - line.width - p.hang;
let mut offset = Abs::zero();
- // Reorder the line from logical to visual order.
- let (reordered, starts_rtl) = reorder(line);
- if !starts_rtl {
+ // We always build the line from left to right. In an LTR paragraph, we must
+ // thus add the hanging indent to the offset. When the paragraph is RTL, the
+ // hanging indent arises naturally due to the line width.
+ if p.dir == Dir::LTR {
offset += p.hang;
}
// Handle hanging punctuation to the left.
- if let Some(Item::Text(text)) = reordered.first() {
+ if let Some(Item::Text(text)) = line.items.first() {
if let Some(glyph) = text.glyphs.first() {
if !text.dir.is_positive()
&& TextElem::overhang_in(text.styles)
- && (reordered.len() > 1 || text.glyphs.len() > 1)
+ && (line.items.len() > 1 || text.glyphs.len() > 1)
{
let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
offset -= amount;
@@ -386,11 +439,11 @@ pub fn commit(
}
// Handle hanging punctuation to the right.
- if let Some(Item::Text(text)) = reordered.last() {
+ if let Some(Item::Text(text)) = line.items.last() {
if let Some(glyph) = text.glyphs.last() {
if text.dir.is_positive()
&& TextElem::overhang_in(text.styles)
- && (reordered.len() > 1 || text.glyphs.len() > 1)
+ && (line.items.len() > 1 || text.glyphs.len() > 1)
{
let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
remaining += amount;
@@ -408,16 +461,16 @@ pub fn commit(
let mut extra_justification = Abs::zero();
let shrinkability = line.shrinkability();
- let stretch = line.stretchability();
+ let stretchability = line.stretchability();
if remaining < Abs::zero() && shrinkability > Abs::zero() && shrink {
// Attempt to reduce the length of the line, using shrinkability.
justification_ratio = (remaining / shrinkability).max(-1.0);
remaining = (remaining + shrinkability).min(Abs::zero());
} else if line.justify && fr.is_zero() {
// Attempt to increase the length of the line, using stretchability.
- if stretch > Abs::zero() {
- justification_ratio = (remaining / stretch).min(1.0);
- remaining = (remaining - stretch).max(Abs::zero());
+ if stretchability > Abs::zero() {
+ justification_ratio = (remaining / stretchability).min(1.0);
+ remaining = (remaining - stretchability).max(Abs::zero());
}
let justifiables = line.justifiables();
@@ -433,7 +486,7 @@ pub fn commit(
// Build the frames and determine the height and baseline.
let mut frames = vec![];
- for item in reordered {
+ for item in line.items.iter() {
let mut push = |offset: &mut Abs, frame: Frame| {
let width = frame.width();
top.set_max(frame.baseline());
@@ -460,8 +513,12 @@ pub fn commit(
}
}
Item::Text(shaped) => {
- let mut frame =
- shaped.build(engine, justification_ratio, extra_justification);
+ let mut frame = shaped.build(
+ engine,
+ &p.spans,
+ justification_ratio,
+ extra_justification,
+ );
frame.post_process(shaped.styles);
push(&mut offset, frame);
}
@@ -499,111 +556,139 @@ pub fn commit(
Ok(output)
}
-/// Return a line's items in visual order.
-fn reorder<'a>(line: &'a Line<'a>) -> (Vec<&Item<'a>>, bool) {
- let mut reordered = vec![];
+/// How much a character should hang into the end margin.
+///
+/// For more discussion, see:
+/// <https://recoveringphysicist.com/21/>
+fn overhang(c: char) -> f64 {
+ match c {
+ // Dashes.
+ '–' | '—' => 0.2,
+ '-' => 0.55,
+
+ // Punctuation.
+ '.' | ',' => 0.8,
+ ':' | ';' => 0.3,
+
+ // Arabic
+ '\u{60C}' | '\u{6D4}' => 0.4,
- // The bidi crate doesn't like empty lines.
- if line.trimmed.is_empty() {
- return (line.slice(line.trimmed.clone()).collect(), false);
+ _ => 0.0,
}
+}
- // Find the paragraph that contains the line.
- let para = line
- .bidi
- .paragraphs
- .iter()
- .find(|para| para.range.contains(&line.trimmed.start))
- .unwrap();
+/// A collection of owned or borrowed paragraph items.
+pub struct Items<'a>(Vec<ItemEntry<'a>>);
- // Compute the reordered ranges in visual order (left to right).
- let (levels, runs) = line.bidi.visual_runs(para, line.trimmed.clone());
- let starts_rtl = levels.first().is_some_and(|level| level.is_rtl());
+impl<'a> Items<'a> {
+ /// Create empty items.
+ pub fn new() -> Self {
+ Self(vec![])
+ }
- // Collect the reordered items.
- for run in runs {
- // Skip reset L1 runs because handling them would require reshaping
- // again in some cases.
- if line.bidi.levels[run.start] != levels[run.start] {
- continue;
- }
+ /// Push a new item.
+ pub fn push(&mut self, entry: impl Into<ItemEntry<'a>>) {
+ self.0.push(entry.into());
+ }
- let prev = reordered.len();
- reordered.extend(line.slice(run.clone()));
+ /// Iterate over the items
+ pub fn iter(&self) -> impl Iterator<Item = &Item<'a>> {
+ self.0.iter().map(|item| &**item)
+ }
- if levels[run.start].is_rtl() {
- reordered[prev..].reverse();
- }
+ /// Access the first item.
+ pub fn first(&self) -> Option<&Item<'a>> {
+ self.0.first().map(|item| &**item)
}
- (reordered, starts_rtl)
-}
+ /// Access the last item.
+ pub fn last(&self) -> Option<&Item<'a>> {
+ self.0.last().map(|item| &**item)
+ }
-/// Whether a hyphen should be inserted at the start of the next line.
-fn should_insert_hyphen(pred_line: &Line) -> bool {
- // If the predecessor line does not end with a Dash::HardHyphen, we shall
- // not place a hyphen at the start of the next line.
- if pred_line.dash != Some(Dash::HardHyphen) {
- return false;
+ /// Access the first item mutably, if it is text.
+ pub fn first_text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
+ self.0.first_mut()?.text_mut()
}
- // If there's a trimmed out space, we needn't repeat the hyphen. That's the
- // case of a text like "...kebab é a -melhor- comida que existe", where the
- // hyphens are a kind of emphasis marker.
- if pred_line.trimmed.end != pred_line.end {
- return false;
+ /// Access the last item mutably, if it is text.
+ pub fn last_text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
+ self.0.last_mut()?.text_mut()
}
- // The hyphen should repeat only in the languages that require that feature.
- // For more information see the discussion at https://github.com/typst/typst/issues/3235
- let Some(Item::Text(shape)) = pred_line.last.as_ref() else { return false };
+ /// Reorder the items starting at the given index to RTL.
+ pub fn reorder(&mut self, from: usize) {
+ self.0[from..].reverse()
+ }
+}
- match shape.lang {
- // - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3
- // - Czech: see https://prirucka.ujc.cas.cz/?id=164
- // - Croatian: see http://pravopis.hr/pravilo/spojnica/68/
- // - Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni
- // - Portuguese: see https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf (Base XX)
- // - Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/
- Lang::LOWER_SORBIAN
- | Lang::CZECH
- | Lang::CROATIAN
- | Lang::POLISH
- | Lang::PORTUGUESE
- | Lang::SLOVAK => true,
+impl<'a> FromIterator<ItemEntry<'a>> for Items<'a> {
+ fn from_iter<I: IntoIterator<Item = ItemEntry<'a>>>(iter: I) -> Self {
+ Self(iter.into_iter().collect())
+ }
+}
- // In Spanish the hyphen is required only if the word next to hyphen is
- // not capitalized. Otherwise, the hyphen must not be repeated.
- //
- // See § 4.1.1.1.2.e on the "Ortografía de la lengua española"
- // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea
- Lang::SPANISH => pred_line.bidi.text[pred_line.end..]
- .chars()
- .next()
- .map(|c| !c.is_uppercase())
- .unwrap_or(false),
+impl<'a> Deref for Items<'a> {
+ type Target = Vec<ItemEntry<'a>>;
- _ => false,
+ fn deref(&self) -> &Self::Target {
+ &self.0
}
}
-/// How much a character should hang into the end margin.
-///
-/// For more discussion, see:
-/// <https://recoveringphysicist.com/21/>
-fn overhang(c: char) -> f64 {
- match c {
- // Dashes.
- '–' | '—' => 0.2,
- '-' => 0.55,
+impl<'a> DerefMut for Items<'a> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.0
+ }
+}
- // Punctuation.
- '.' | ',' => 0.8,
- ':' | ';' => 0.3,
+/// A reference to or a boxed item.
+pub enum ItemEntry<'a> {
+ Ref(&'a Item<'a>),
+ Box(Box<Item<'a>>),
+}
- // Arabic
- '\u{60C}' | '\u{6D4}' => 0.4,
+impl<'a> ItemEntry<'a> {
+ fn text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
+ match self {
+ Self::Ref(item) => {
+ let text = item.text()?;
+ *self = Self::Box(Box::new(Item::Text(text.clone())));
+ match self {
+ Self::Box(item) => item.text_mut(),
+ _ => unreachable!(),
+ }
+ }
+ Self::Box(item) => item.text_mut(),
+ }
+ }
+}
- _ => 0.0,
+impl<'a> Deref for ItemEntry<'a> {
+ type Target = Item<'a>;
+
+ fn deref(&self) -> &Self::Target {
+ match self {
+ Self::Ref(item) => item,
+ Self::Box(item) => item,
+ }
+ }
+}
+
+impl Debug for ItemEntry<'_> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ (**self).fmt(f)
+ }
+}
+
+impl<'a> From<&'a Item<'a>> for ItemEntry<'a> {
+ fn from(item: &'a Item<'a>) -> Self {
+ Self::Ref(item)
+ }
+}
+
+impl<'a> From<Item<'a>> for ItemEntry<'a> {
+ fn from(item: Item<'a>) -> Self {
+ Self::Box(Box::new(item))
}
}
diff --git a/crates/typst/src/layout/inline/linebreak.rs b/crates/typst/src/layout/inline/linebreak.rs
index 0555c189..dbaa9c59 100644
--- a/crates/typst/src/layout/inline/linebreak.rs
+++ b/crates/typst/src/layout/inline/linebreak.rs
@@ -1,6 +1,7 @@
use std::ops::{Add, Sub};
use icu_properties::maps::CodePointMapData;
+use icu_properties::sets::CodePointSetData;
use icu_properties::LineBreak;
use icu_provider::AsDeserializingBufferProvider;
use icu_provider_adapters::fork::ForkByKeyProvider;
@@ -27,30 +28,33 @@ const MIN_RATIO: f64 = -1.0;
const MIN_APPROX_RATIO: f64 = -0.5;
const BOUND_EPS: f64 = 1e-3;
+/// The ICU blob data.
+fn blob() -> BlobDataProvider {
+ BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap()
+}
+
/// The general line break segmenter.
-static SEGMENTER: Lazy<LineSegmenter> = Lazy::new(|| {
- let provider =
- BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap();
- LineSegmenter::try_new_lstm_with_buffer_provider(&provider).unwrap()
-});
+static SEGMENTER: Lazy<LineSegmenter> =
+ Lazy::new(|| LineSegmenter::try_new_lstm_with_buffer_provider(&blob()).unwrap());
/// The line break segmenter for Chinese/Japanese text.
static CJ_SEGMENTER: Lazy<LineSegmenter> = Lazy::new(|| {
- let provider =
- BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap();
let cj_blob =
BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU_CJ_SEGMENT)
.unwrap();
- let cj_provider = ForkByKeyProvider::new(cj_blob, provider);
+ let cj_provider = ForkByKeyProvider::new(cj_blob, blob());
LineSegmenter::try_new_lstm_with_buffer_provider(&cj_provider).unwrap()
});
/// The Unicode line break properties for each code point.
static LINEBREAK_DATA: Lazy<CodePointMapData<LineBreak>> = Lazy::new(|| {
- let provider =
- BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap();
- let deser_provider = provider.as_deserializing();
- icu_properties::maps::load_line_break(&deser_provider).unwrap()
+ icu_properties::maps::load_line_break(&blob().as_deserializing()).unwrap()
+});
+
+/// The set of Unicode default ignorables.
+static DEFAULT_IGNORABLE_DATA: Lazy<CodePointSetData> = Lazy::new(|| {
+ icu_properties::sets::load_default_ignorable_code_point(&blob().as_deserializing())
+ .unwrap()
});
/// A line break opportunity.
@@ -64,6 +68,37 @@ pub enum Breakpoint {
Hyphen,
}
+impl Breakpoint {
+ /// Trim a line before this breakpoint.
+ pub fn trim(self, line: &str) -> &str {
+ // Trim default ignorables.
+ let ignorable = DEFAULT_IGNORABLE_DATA.as_borrowed();
+ let line = line.trim_end_matches(|c| ignorable.contains(c));
+
+ match self {
+ // Trim whitespace.
+ Self::Normal => line.trim_end_matches(char::is_whitespace),
+
+ // Trim linebreaks.
+ Self::Mandatory => {
+ let lb = LINEBREAK_DATA.as_borrowed();
+ line.trim_end_matches(|c| {
+ matches!(
+ lb.get(c),
+ LineBreak::MandatoryBreak
+ | LineBreak::CarriageReturn
+ | LineBreak::LineFeed
+ | LineBreak::NextLine
+ )
+ })
+ }
+
+ // Trim nothing further.
+ Self::Hyphen => line,
+ }
+ }
+}
+
/// Breaks the paragraph into lines.
pub fn linebreak<'a>(
engine: &Engine,
@@ -180,14 +215,11 @@ fn linebreak_optimized_bounded<'a>(
pred: usize,
total: Cost,
line: Line<'a>,
+ end: usize,
}
// Dynamic programming table.
- let mut table = vec![Entry {
- pred: 0,
- total: 0.0,
- line: line(engine, p, 0..0, Breakpoint::Mandatory, None),
- }];
+ let mut table = vec![Entry { pred: 0, total: 0.0, line: Line::empty(), end: 0 }];
let mut active = 0;
let mut prev_end = 0;
@@ -200,7 +232,7 @@ fn linebreak_optimized_bounded<'a>(
let mut line_lower_bound = None;
for (pred_index, pred) in table.iter().enumerate().skip(active) {
- let start = pred.line.end;
+ let start = pred.end;
let unbreakable = prev_end == start;
// If the minimum cost we've established for the line is already
@@ -221,6 +253,7 @@ fn linebreak_optimized_bounded<'a>(
width,
&pred.line,
&attempt,
+ end,
breakpoint,
unbreakable,
);
@@ -263,7 +296,7 @@ fn linebreak_optimized_bounded<'a>(
// If this attempt is better than what we had before, take it!
if best.as_ref().map_or(true, |best| best.total >= total) {
- best = Some(Entry { pred: pred_index, total, line: attempt });
+ best = Some(Entry { pred: pred_index, total, line: attempt, end });
}
}
@@ -282,7 +315,7 @@ fn linebreak_optimized_bounded<'a>(
let mut idx = table.len() - 1;
// This should only happen if our bound was faulty. Which shouldn't happen!
- if table[idx].line.end != p.bidi.text.len() {
+ if table[idx].end != p.text.len() {
#[cfg(debug_assertions)]
panic!("bounded paragraph layout is incomplete");
@@ -340,7 +373,7 @@ fn linebreak_optimized_approximate(
let mut prev_end = 0;
breakpoints(p, |end, breakpoint| {
- let at_end = end == p.bidi.text.len();
+ let at_end = end == p.text.len();
// Find the optimal predecessor.
let mut best: Option<Entry> = None;
@@ -362,7 +395,7 @@ fn linebreak_optimized_approximate(
// make it the desired width. We trim at the end to not take into
// account trailing spaces. This is, again, only an approximation of
// the real behaviour of `line`.
- let trimmed_end = start + p.bidi.text[start..end].trim_end().len();
+ let trimmed_end = start + p.text[start..end].trim_end().len();
let line_ratio = raw_ratio(
p,
width,
@@ -428,8 +461,9 @@ fn linebreak_optimized_approximate(
idx = table[idx].pred;
}
+ let mut pred = Line::empty();
+ let mut start = 0;
let mut exact = 0.0;
- let mut pred = line(engine, p, 0..0, Breakpoint::Mandatory, None);
// The cost that we optimized was only an approximate cost, so the layout we
// got here is only likely to be good, not guaranteed to be the best. We now
@@ -438,26 +472,36 @@ fn linebreak_optimized_approximate(
for idx in indices.into_iter().rev() {
let Entry { end, breakpoint, unbreakable, .. } = table[idx];
- let start = pred.end;
let attempt = line(engine, p, start..end, breakpoint, Some(&pred));
- let (_, line_cost) =
- ratio_and_cost(p, metrics, width, &pred, &attempt, breakpoint, unbreakable);
+ let (_, line_cost) = ratio_and_cost(
+ p,
+ metrics,
+ width,
+ &pred,
+ &attempt,
+ end,
+ breakpoint,
+ unbreakable,
+ );
- exact += line_cost;
pred = attempt;
+ start = end;
+ exact += line_cost;
}
exact
}
/// Compute the stretch ratio and cost of a line.
+#[allow(clippy::too_many_arguments)]
fn ratio_and_cost(
p: &Preparation,
metrics: &CostMetrics,
available_width: Abs,
pred: &Line,
attempt: &Line,
+ end: usize,
breakpoint: Breakpoint,
unbreakable: bool,
) -> (f64, Cost) {
@@ -474,7 +518,7 @@ fn ratio_and_cost(
metrics,
breakpoint,
ratio,
- attempt.end == p.bidi.text.len(),
+ end == p.text.len(),
attempt.justify,
unbreakable,
pred.dash.is_some() && attempt.dash.is_some(),
@@ -587,7 +631,14 @@ fn raw_cost(
/// code much simpler and the consumers of this function don't need the
/// composability and flexibility of external iteration anyway.
fn breakpoints<'a>(p: &'a Preparation<'a>, mut f: impl FnMut(usize, Breakpoint)) {
- let text = p.bidi.text;
+ let text = p.text;
+
+ // Single breakpoint at the end for empty text.
+ if text.is_empty() {
+ f(0, Breakpoint::Mandatory);
+ return;
+ }
+
let hyphenate = p.hyphenate != Some(false);
let lb = LINEBREAK_DATA.as_borrowed();
let segmenter = match p.lang {
@@ -747,8 +798,9 @@ fn linebreak_link(link: &str, mut f: impl FnMut(usize)) {
fn hyphenate_at(p: &Preparation, offset: usize) -> bool {
p.hyphenate
.or_else(|| {
- let shaped = p.find(offset)?.text()?;
- Some(TextElem::hyphenate_in(shaped.styles))
+ let (_, item) = p.get(offset);
+ let styles = item.text()?.styles;
+ Some(TextElem::hyphenate_in(styles))
})
.unwrap_or(false)
}
@@ -756,8 +808,9 @@ fn hyphenate_at(p: &Preparation, offset: usize) -> bool {
/// The text language at the given offset.
fn lang_at(p: &Preparation, offset: usize) -> Option<hypher::Lang> {
let lang = p.lang.or_else(|| {
- let shaped = p.find(offset)?.text()?;
- Some(TextElem::lang_in(shaped.styles))
+ let (_, item) = p.get(offset);
+ let styles = item.text()?.styles;
+ Some(TextElem::lang_in(styles))
})?;
let bytes = lang.as_str().as_bytes().try_into().ok()?;
@@ -813,17 +866,14 @@ struct Estimates {
impl Estimates {
/// Compute estimations for approximate Knuth-Plass layout.
fn compute(p: &Preparation) -> Self {
- let cap = p.bidi.text.len();
+ let cap = p.text.len();
let mut widths = CummulativeVec::with_capacity(cap);
let mut stretchability = CummulativeVec::with_capacity(cap);
let mut shrinkability = CummulativeVec::with_capacity(cap);
let mut justifiables = CummulativeVec::with_capacity(cap);
- for item in &p.items {
- let textual_len = item.textual_len();
- let after = widths.len() + textual_len;
-
+ for (range, item) in p.items.iter() {
if let Item::Text(shaped) = item {
for g in shaped.glyphs.iter() {
let byte_len = g.range.len();
@@ -835,13 +885,13 @@ impl Estimates {
justifiables.push(byte_len, g.is_justifiable() as usize);
}
} else {
- widths.push(textual_len, item.width());
+ widths.push(range.len(), item.natural_width());
}
- widths.adjust(after);
- stretchability.adjust(after);
- shrinkability.adjust(after);
- justifiables.adjust(after);
+ widths.adjust(range.end);
+ stretchability.adjust(range.end);
+ shrinkability.adjust(range.end);
+ justifiables.adjust(range.end);
}
Self {
@@ -871,11 +921,6 @@ where
Self { total, summed }
}
- /// Get the covered byte length.
- fn len(&self) -> usize {
- self.summed.len()
- }
-
/// Adjust to cover the given byte length.
fn adjust(&mut self, len: usize) {
self.summed.resize(len, self.total);
diff --git a/crates/typst/src/layout/inline/prepare.rs b/crates/typst/src/layout/inline/prepare.rs
index 90d8d5a4..59682b2c 100644
--- a/crates/typst/src/layout/inline/prepare.rs
+++ b/crates/typst/src/layout/inline/prepare.rs
@@ -13,16 +13,24 @@ use crate::text::{Costs, Lang, TextElem};
/// Only when a line break falls onto a text index that is not safe-to-break per
/// rustybuzz, we have to reshape that portion.
pub struct Preparation<'a> {
+ /// The paragraph's full text.
+ pub text: &'a str,
/// Bidirectional text embedding levels for the paragraph.
- pub bidi: BidiInfo<'a>,
+ ///
+ /// This is `None` if the paragraph is BiDi-uniform (all the base direction).
+ pub bidi: Option<BidiInfo<'a>>,
/// Text runs, spacing and layouted elements.
- pub items: Vec<Item<'a>>,
+ pub items: Vec<(Range, Item<'a>)>,
+ /// Maps from byte indices to item indices.
+ pub indices: Vec<usize>,
/// The span mapper.
pub spans: SpanMapper,
/// Whether to hyphenate if it's the same for all children.
pub hyphenate: Option<bool>,
/// Costs for various layout decisions.
pub costs: Costs,
+ /// The dominant direction.
+ pub dir: Dir,
/// The text language if it's the same for all children.
pub lang: Option<Lang>,
/// The paragraph's resolved horizontal alignment.
@@ -44,46 +52,18 @@ pub struct Preparation<'a> {
}
impl<'a> Preparation<'a> {
- /// Find the item that contains the given `text_offset`.
- pub fn find(&self, text_offset: usize) -> Option<&Item<'a>> {
- let mut cursor = 0;
- for item in &self.items {
- let end = cursor + item.textual_len();
- if (cursor..end).contains(&text_offset) {
- return Some(item);
- }
- cursor = end;
- }
- None
+ /// Get the item that contains the given `text_offset`.
+ pub fn get(&self, offset: usize) -> &(Range, Item<'a>) {
+ let idx = self.indices.get(offset).copied().unwrap_or(0);
+ &self.items[idx]
}
- /// Return the items that intersect the given `text_range`.
- ///
- /// Returns the expanded range around the items and the items.
- pub fn slice(&self, text_range: Range) -> (Range, &[Item<'a>]) {
- let mut cursor = 0;
- let mut start = 0;
- let mut end = 0;
- let mut expanded = text_range.clone();
-
- for (i, item) in self.items.iter().enumerate() {
- if cursor <= text_range.start {
- start = i;
- expanded.start = cursor;
- }
-
- let len = item.textual_len();
- if cursor < text_range.end || cursor + len <= text_range.end {
- end = i + 1;
- expanded.end = cursor + len;
- } else {
- break;
- }
-
- cursor += len;
- }
-
- (expanded, &self.items[start..end])
+ /// Iterate over the items that intersect the given `sliced` range.
+ pub fn slice(&self, sliced: Range) -> impl Iterator<Item = &(Range, Item<'a>)> {
+ let start = self.indices.get(sliced.start).copied().unwrap_or(0);
+ self.items[start..].iter().take_while(move |(range, _)| {
+ range.start < sliced.end || range.end <= sliced.end
+ })
}
}
@@ -99,42 +79,57 @@ pub fn prepare<'a>(
spans: SpanMapper,
styles: StyleChain<'a>,
) -> SourceResult<Preparation<'a>> {
- let bidi = BidiInfo::new(
- text,
- match TextElem::dir_in(styles) {
- Dir::LTR => Some(BidiLevel::ltr()),
- Dir::RTL => Some(BidiLevel::rtl()),
- _ => None,
- },
- );
+ let dir = TextElem::dir_in(styles);
+ let default_level = match dir {
+ Dir::RTL => BidiLevel::rtl(),
+ _ => BidiLevel::ltr(),
+ };
+
+ let bidi = BidiInfo::new(text, Some(default_level));
+ let is_bidi = bidi
+ .levels
+ .iter()
+ .any(|level| level.is_ltr() != default_level.is_ltr());
let mut cursor = 0;
let mut items = Vec::with_capacity(segments.len());
// Shape the text to finalize the items.
for segment in segments {
- let end = cursor + segment.textual_len();
+ let len = segment.textual_len();
+ let end = cursor + len;
+ let range = cursor..end;
+
match segment {
Segment::Text(_, styles) => {
- shape_range(&mut items, engine, &bidi, cursor..end, &spans, styles);
+ shape_range(&mut items, engine, text, &bidi, range, styles);
}
- Segment::Item(item) => items.push(item),
+ Segment::Item(item) => items.push((range, item)),
}
cursor = end;
}
+ // Build the mapping from byte to item indices.
+ let mut indices = Vec::with_capacity(text.len());
+ for (i, (range, _)) in items.iter().enumerate() {
+ indices.extend(range.clone().map(|_| i));
+ }
+
let cjk_latin_spacing = TextElem::cjk_latin_spacing_in(styles).is_auto();
if cjk_latin_spacing {
add_cjk_latin_spacing(&mut items);
}
Ok(Preparation {
- bidi,
+ text,
+ bidi: is_bidi.then_some(bidi),
items,
+ indices,
spans,
hyphenate: children.shared_get(styles, TextElem::hyphenate_in),
costs: TextElem::costs_in(styles),
+ dir,
lang: children.shared_get(styles, TextElem::lang_in),
align: AlignElem::alignment_in(styles).resolve(styles).x,
justify: ParElem::justify_in(styles),
@@ -150,10 +145,14 @@ pub fn prepare<'a>(
/// Add some spacing between Han characters and western characters. See
/// Requirements for Chinese Text Layout, Section 3.2.2 Mixed Text Composition
/// in Horizontal Written Mode
-fn add_cjk_latin_spacing(items: &mut [Item]) {
- let mut items = items.iter_mut().filter(|x| !matches!(x, Item::Tag(_))).peekable();
+fn add_cjk_latin_spacing(items: &mut [(Range, Item)]) {
+ let mut items = items
+ .iter_mut()
+ .filter(|(_, x)| !matches!(x, Item::Tag(_)))
+ .peekable();
+
let mut prev: Option<&ShapedGlyph> = None;
- while let Some(item) = items.next() {
+ while let Some((_, item)) = items.next() {
let Some(text) = item.text_mut() else {
prev = None;
continue;
@@ -168,7 +167,7 @@ fn add_cjk_latin_spacing(items: &mut [Item]) {
let next = glyphs.peek().map(|n| n as _).or_else(|| {
items
.peek()
- .and_then(|i| i.text())
+ .and_then(|(_, i)| i.text())
.and_then(|shaped| shaped.glyphs.first())
});
diff --git a/crates/typst/src/layout/inline/shaping.rs b/crates/typst/src/layout/inline/shaping.rs
index 44b65391..43dc351a 100644
--- a/crates/typst/src/layout/inline/shaping.rs
+++ b/crates/typst/src/layout/inline/shaping.rs
@@ -14,7 +14,6 @@ use super::{Item, Range, SpanMapper};
use crate::engine::Engine;
use crate::foundations::{Smart, StyleChain};
use crate::layout::{Abs, Dir, Em, Frame, FrameItem, Point, Size};
-use crate::syntax::Span;
use crate::text::{
decorate, families, features, variant, Font, FontVariant, Glyph, Lang, Region,
TextElem, TextItem,
@@ -27,6 +26,7 @@ use crate::World;
/// This type contains owned or borrowed shaped text runs, which can be
/// measured, used to reshape substrings more quickly and converted into a
/// frame.
+#[derive(Clone)]
pub struct ShapedText<'a> {
/// The start of the text in the full paragraph.
pub base: usize,
@@ -80,8 +80,6 @@ pub struct ShapedGlyph {
pub safe_to_break: bool,
/// The first char in this glyph's cluster.
pub c: char,
- /// The source code location of the glyph and its byte offset within it.
- pub span: (Span, u16),
/// Whether this glyph is justifiable for CJK scripts.
pub is_justifiable: bool,
/// The script of the glyph.
@@ -214,6 +212,7 @@ impl<'a> ShapedText<'a> {
pub fn build(
&self,
engine: &Engine,
+ spans: &SpanMapper,
justification_ratio: f64,
extra_justification: Abs,
) -> Frame {
@@ -268,7 +267,7 @@ impl<'a> ShapedText<'a> {
// We may not be able to reach the offset completely if
// it exceeds u16, but better to have a roughly correct
// span offset than nothing.
- let mut span = shaped.span;
+ let mut span = spans.span_at(shaped.range.start);
span.1 = span.1.saturating_add(span_offset.saturating_as());
// |<---- a Glyph ---->|
@@ -331,7 +330,7 @@ impl<'a> ShapedText<'a> {
}
/// Measure the top and bottom extent of this text.
- fn measure(&self, engine: &Engine) -> (Abs, Abs) {
+ pub fn measure(&self, engine: &Engine) -> (Abs, Abs) {
let mut top = Abs::zero();
let mut bottom = Abs::zero();
@@ -409,12 +408,7 @@ impl<'a> ShapedText<'a> {
/// shaping process if possible.
///
/// The text `range` is relative to the whole paragraph.
- pub fn reshape(
- &'a self,
- engine: &Engine,
- spans: &SpanMapper,
- text_range: Range,
- ) -> ShapedText<'a> {
+ pub fn reshape(&'a self, engine: &Engine, text_range: Range) -> ShapedText<'a> {
let text = &self.text[text_range.start - self.base..text_range.end - self.base];
if let Some(glyphs) = self.slice_safe_to_break(text_range.clone()) {
#[cfg(debug_assertions)]
@@ -436,7 +430,6 @@ impl<'a> ShapedText<'a> {
engine,
text_range.start,
text,
- spans,
self.styles,
self.dir,
self.lang,
@@ -445,6 +438,16 @@ impl<'a> ShapedText<'a> {
}
}
+ /// Derive an empty text run with the same properties as this one.
+ pub fn empty(&self) -> Self {
+ Self {
+ text: "",
+ width: Abs::zero(),
+ glyphs: Cow::Borrowed(&[]),
+ ..*self
+ }
+ }
+
/// Push a hyphen to end of the text.
pub fn push_hyphen(&mut self, engine: &Engine, fallback: bool) {
self.insert_hyphen(engine, fallback, Side::Right)
@@ -493,7 +496,6 @@ impl<'a> ShapedText<'a> {
range,
safe_to_break: true,
c: '-',
- span: (Span::detached(), 0),
is_justifiable: false,
script: Script::Common,
};
@@ -592,11 +594,11 @@ impl Debug for ShapedText<'_> {
/// Group a range of text by BiDi level and script, shape the runs and generate
/// items for them.
pub fn shape_range<'a>(
- items: &mut Vec<Item<'a>>,
+ items: &mut Vec<(Range, Item<'a>)>,
engine: &Engine,
+ text: &'a str,
bidi: &BidiInfo<'a>,
range: Range,
- spans: &SpanMapper,
styles: StyleChain<'a>,
) {
let script = TextElem::script_in(styles);
@@ -604,17 +606,9 @@ pub fn shape_range<'a>(
let region = TextElem::region_in(styles);
let mut process = |range: Range, level: BidiLevel| {
let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL };
- let shaped = shape(
- engine,
- range.start,
- &bidi.text[range],
- spans,
- styles,
- dir,
- lang,
- region,
- );
- items.push(Item::Text(shaped));
+ let shaped =
+ shape(engine, range.start, &text[range.clone()], styles, dir, lang, region);
+ items.push((range, Item::Text(shaped)));
};
let mut prev_level = BidiLevel::ltr();
@@ -625,14 +619,14 @@ pub fn shape_range<'a>(
// set (rather than inferred from the glyphs), we keep the script at an
// unchanging `Script::Unknown` so that only level changes cause breaks.
for i in range.clone() {
- if !bidi.text.is_char_boundary(i) {
+ if !text.is_char_boundary(i) {
continue;
}
let level = bidi.levels[i];
let curr_script = match script {
Smart::Auto => {
- bidi.text[i..].chars().next().map_or(Script::Unknown, |c| c.script())
+ text[i..].chars().next().map_or(Script::Unknown, |c| c.script())
}
Smart::Custom(_) => Script::Unknown,
};
@@ -668,7 +662,6 @@ fn shape<'a>(
engine: &Engine,
base: usize,
text: &'a str,
- spans: &SpanMapper,
styles: StyleChain<'a>,
dir: Dir,
lang: Lang,
@@ -677,7 +670,6 @@ fn shape<'a>(
let size = TextElem::size_in(styles);
let mut ctx = ShapingContext {
engine,
- spans,
size,
glyphs: vec![],
used: vec![],
@@ -717,7 +709,6 @@ fn shape<'a>(
/// Holds shaping results and metadata common to all shaped segments.
struct ShapingContext<'a, 'v> {
engine: &'a Engine<'v>,
- spans: &'a SpanMapper,
glyphs: Vec<ShapedGlyph>,
used: Vec<Font>,
styles: StyleChain<'a>,
@@ -830,7 +821,6 @@ fn shape_segment<'a>(
range: start..end,
safe_to_break: !info.unsafe_to_break(),
c,
- span: ctx.spans.span_at(start),
is_justifiable: is_justifiable(
c,
script,
@@ -921,7 +911,6 @@ fn shape_tofus(ctx: &mut ShapingContext, base: usize, text: &str, font: Font) {
range: start..end,
safe_to_break: true,
c,
- span: ctx.spans.span_at(start),
is_justifiable: is_justifiable(
c,
script,
diff --git a/crates/typst/src/model/par.rs b/crates/typst/src/model/par.rs
index 7f65a00f..2110995f 100644
--- a/crates/typst/src/model/par.rs
+++ b/crates/typst/src/model/par.rs
@@ -18,9 +18,9 @@ use crate::realize::StyleVec;
///
/// # Example
/// ```example
-/// #show par: set block(spacing: 0.65em)
/// #set par(
/// first-line-indent: 1em,
+/// spacing: 0.65em,
/// justify: true,
/// )
///
@@ -115,8 +115,7 @@ pub struct ParElem {
/// By typographic convention, paragraph breaks are indicated either by some
/// space between paragraphs or by indented first lines. Consider reducing
/// the [paragraph spacing]($block.spacing) to the [`leading`]($par.leading)
- /// when using this property (e.g. using
- /// `[#show par: set block(spacing: 0.65em)]`).
+ /// when using this property (e.g. using `[#set par(spacing: 0.65em)]`).
#[ghost]
pub first_line_indent: Length,
diff --git a/docs/guides/guide-for-latex-users.md b/docs/guides/guide-for-latex-users.md
index 1f3caef9..8c3b5601 100644
--- a/docs/guides/guide-for-latex-users.md
+++ b/docs/guides/guide-for-latex-users.md
@@ -593,10 +593,9 @@ The example below
```typ
#set page(margin: 1.75in)
-#set par(leading: 0.55em, first-line-indent: 1.8em, justify: true)
+#set par(leading: 0.55em, spacing: 0.55em, first-line-indent: 1.8em, justify: true)
#set text(font: "New Computer Modern")
#show raw: set text(font: "New Computer Modern Mono")
-#show par: set block(spacing: 0.55em)
#show heading: set block(above: 1.4em, below: 1em)
```
diff --git a/docs/reference/syntax.md b/docs/reference/syntax.md
index 9a7dc373..b63d1776 100644
--- a/docs/reference/syntax.md
+++ b/docs/reference/syntax.md
@@ -120,7 +120,7 @@ a table listing all syntax that is available in code mode:
| Named function | `{let f(x) = 2 * x}` | [Function]($function) |
| Set rule | `{set text(14pt)}` | [Styling]($styling/#set-rules) |
| Set-if rule | `{set text(..) if .. }` | [Styling]($styling/#set-rules) |
-| Show-set rule | `{show par: set block(..)}` | [Styling]($styling/#show-rules) |
+| Show-set rule | `{show heading: set block(..)}` | [Styling]($styling/#show-rules) |
| Show rule with function | `{show raw: it => {..}}` | [Styling]($styling/#show-rules) |
| Show-everything rule | `{show: columns.with(2)}` | [Styling]($styling/#show-rules) |
| Context expression | `{context text.lang}` | [Context]($context) |
diff --git a/tests/ref/bidi-whitespace-reset.png b/tests/ref/bidi-whitespace-reset.png
index 7d64012f..e9973798 100644
--- a/tests/ref/bidi-whitespace-reset.png
+++ b/tests/ref/bidi-whitespace-reset.png
Binary files differ
diff --git a/tests/ref/context-compatibility-locate.png b/tests/ref/context-compatibility-locate.png
index 4c8944ab..32516c00 100644
--- a/tests/ref/context-compatibility-locate.png
+++ b/tests/ref/context-compatibility-locate.png
Binary files differ
diff --git a/tests/ref/eval-mode.png b/tests/ref/eval-mode.png
index 5edfa62d..94357ff4 100644
--- a/tests/ref/eval-mode.png
+++ b/tests/ref/eval-mode.png
Binary files differ
diff --git a/tests/ref/issue-3601-empty-raw.png b/tests/ref/issue-3601-empty-raw.png
new file mode 100644
index 00000000..be5ea8fc
--- /dev/null
+++ b/tests/ref/issue-3601-empty-raw.png
Binary files differ
diff --git a/tests/ref/issue-4278-par-trim-before-equation.png b/tests/ref/issue-4278-par-trim-before-equation.png
new file mode 100644
index 00000000..b0553719
--- /dev/null
+++ b/tests/ref/issue-4278-par-trim-before-equation.png
Binary files differ
diff --git a/tests/ref/justify-basically-empty.png b/tests/ref/justify-basically-empty.png
new file mode 100644
index 00000000..3d1b50c1
--- /dev/null
+++ b/tests/ref/justify-basically-empty.png
Binary files differ
diff --git a/tests/ref/par-metadata-after-trimmed-space.png b/tests/ref/par-metadata-after-trimmed-space.png
new file mode 100644
index 00000000..b0de98ea
--- /dev/null
+++ b/tests/ref/par-metadata-after-trimmed-space.png
Binary files differ
diff --git a/tests/ref/par-trailing-whitespace.png b/tests/ref/par-trailing-whitespace.png
new file mode 100644
index 00000000..10c22da5
--- /dev/null
+++ b/tests/ref/par-trailing-whitespace.png
Binary files differ
diff --git a/tests/suite/foundations/version.typ b/tests/suite/foundations/version.typ
index bf2cadb1..a4be7f13 100644
--- a/tests/suite/foundations/version.typ
+++ b/tests/suite/foundations/version.typ
@@ -4,7 +4,7 @@
// Test version constructor.
// Empty.
-#version()
+#test(array(version()), ())
// Plain.
#test(version(1, 2).major, 1)
diff --git a/tests/suite/layout/spacing.typ b/tests/suite/layout/spacing.typ
index dd0fced5..c32e6c8f 100644
--- a/tests/suite/layout/spacing.typ
+++ b/tests/suite/layout/spacing.typ
@@ -47,14 +47,14 @@ Totally #h() ignored
Hello #h(2cm, weak: true)
--- issue-4087 ---
-// weak space at the end of the line would be removed.
+// Weak space at the end of the line is removed.
This is the first line #h(2cm, weak: true) A new line
-// non-weak space would be consume a specified width and push next line.
+// Non-weak space consumes a specified width and pushes to next line.
This is the first line #h(2cm, weak: false) A new line
-// similarly weak space at the beginning of the line would be removed.
-This is the first line\ #h(2cm, weak: true) A new line
+// Similarly, weak space at the beginning of the line is removed.
+This is the first line \ #h(2cm, weak: true) A new line
-// non-spacing, on the other hand, is not removed.
-This is the first line\ #h(2cm, weak: false) A new line
+// Non-weak-spacing, on the other hand, is not removed.
+This is the first line \ #h(2cm, weak: false) A new line
diff --git a/tests/suite/model/par.typ b/tests/suite/model/par.typ
index f07c4c6c..80bc9f3e 100644
--- a/tests/suite/model/par.typ
+++ b/tests/suite/model/par.typ
@@ -78,3 +78,22 @@ Welcome \ here. Does this work well?
#set text(dir: rtl)
لآن وقد أظلم الليل وبدأت النجوم
تنضخ وجه الطبيعة التي أعْيَتْ من طول ما انبعثت في النهار
+
+--- par-trailing-whitespace ---
+// Ensure that trailing whitespace layouts as intended.
+#box(fill: aqua, " ")
+
+--- par-empty-metadata ---
+// Check that metadata still works in a zero length paragraph.
+#block(height: 0pt)[#""#metadata(false)<hi>]
+#context test(query(<hi>).first().value, false)
+
+--- par-metadata-after-trimmed-space ---
+// Ensure that metadata doesn't prevent trailing spaces from being trimmed.
+#set par(justify: true, linebreaks: "simple")
+#set text(hyphenate: false)
+Lorem ipsum dolor #metadata(none) nonumy eirmod tempor.
+
+--- issue-4278-par-trim-before-equation ---
+#set par(justify: true)
+#lorem(6) aa $a = c + b$