diff options
| author | Peng Guanwen <pg999w@outlook.com> | 2023-04-13 16:44:39 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-04-13 10:44:39 +0200 |
| commit | ff16f3fb370a1688e59fbbaf8824302d2b2f1a7b (patch) | |
| tree | 6b62a1a755252b52f6eedf88cd2d5741337d5923 /library/src | |
| parent | 03d2ec9f813cb18c350de78614fcbc269b2dfc96 (diff) | |
Refine linebreak algorithm for better Chinese justification (#701)
Diffstat (limited to 'library/src')
| -rw-r--r-- | library/src/layout/par.rs | 108 | ||||
| -rw-r--r-- | library/src/text/shaping.rs | 113 |
2 files changed, 164 insertions, 57 deletions
diff --git a/library/src/layout/par.rs b/library/src/layout/par.rs index 0ad9e171..17e07cd0 100644 --- a/library/src/layout/par.rs +++ b/library/src/layout/par.rs @@ -457,22 +457,35 @@ impl<'a> Line<'a> { self.items().skip(start).take(end - start) } - /// How many justifiable glyphs the line contains. + /// How many glyphs are in the text where we can insert additional + /// space when encountering underfull lines. fn justifiables(&self) -> usize { let mut count = 0; for shaped in self.items().filter_map(Item::text) { count += shaped.justifiables(); } + // CJK character at line end should not be adjusted. + if self + .items() + .last() + .and_then(Item::text) + .map(|s| s.cjk_justifiable_at_last()) + .unwrap_or(false) + { + count -= 1; + } + count } - /// How much of the line is stretchable spaces. - fn stretch(&self) -> Abs { - let mut stretch = Abs::zero(); - for shaped in self.items().filter_map(Item::text) { - stretch += shaped.stretch(); - } - stretch + /// How much can the line stretch + fn stretchability(&self) -> Abs { + self.items().filter_map(Item::text).map(|s| s.stretchability()).sum() + } + + /// How much can the line shrink + fn shrinkability(&self) -> Abs { + self.items().filter_map(Item::text).map(|s| s.shrinkability()).sum() } /// The sum of fractions in the line. @@ -835,10 +848,9 @@ fn linebreak_optimized<'a>(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec<L // Cost parameters. const HYPH_COST: Cost = 0.5; - const CONSECUTIVE_DASH_COST: Cost = 30.0; + const CONSECUTIVE_DASH_COST: Cost = 300.0; const MAX_COST: Cost = 1_000_000.0; - const MIN_COST: Cost = -MAX_COST; - const MIN_RATIO: f64 = -0.15; + const MIN_RATIO: f64 = -1.0; // Dynamic programming table. let mut active = 0; @@ -864,14 +876,31 @@ fn linebreak_optimized<'a>(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec<L // Determine how much the line's spaces would need to be stretched // to make it the desired width. let delta = width - attempt.width; - let mut ratio = delta / attempt.stretch(); + // Determine how much stretch are permitted. + let adjust = if delta >= Abs::zero() { + attempt.stretchability() + } else { + attempt.shrinkability() + }; + // Ideally, the ratio should between -1.0 and 1.0, but sometimes a value above 1.0 + // is possible, in which case the line is underfull. + let mut ratio = delta / adjust; + if ratio.is_nan() { + // The line is not stretchable, but it just fits. + // This often happens with monospace fonts and CJK texts. + ratio = 0.0; + } if ratio.is_infinite() { + // The line's not stretchable, we calculate the ratio in another way... ratio = delta / (em / 2.0); + // ...and because it is underfull/overfull, make sure the ratio is at least 1.0. + if ratio > 0.0 { + ratio += 1.0; + } else { + ratio -= 1.0; + } } - // At some point, it doesn't matter any more. - ratio = ratio.min(10.0); - // Determine the cost of the line. let min_ratio = if attempt.justify { MIN_RATIO } else { 0.0 }; let mut cost = if ratio < min_ratio { @@ -883,11 +912,15 @@ fn linebreak_optimized<'a>(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec<L active = i + 1; MAX_COST } else if mandatory || eof { - // This is a mandatory break and the line is not overfull, so it - // has minimum cost. All breakpoints before this one become - // inactive since no line can span above the mandatory break. + // This is a mandatory break and the line is not overfull, so + // all breakpoints before this one become inactive since no line + // can span above the mandatory break. active = k; - MIN_COST + if attempt.justify { ratio.powi(3).abs() } else { 0.0 } + if attempt.justify { + ratio.powi(3).abs() + } else { + 0.0 + } } else { // Normal line with cost of |ratio^3|. ratio.powi(3).abs() @@ -898,6 +931,12 @@ fn linebreak_optimized<'a>(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec<L cost += HYPH_COST; } + // In Knuth paper, cost = (1 + 100|r|^3 + p)^2 + a, + // where r is the ratio, p=50 is penaty, and a=3000 is consecutive penaty. + // We divide the whole formula by 10, resulting (0.01 + |r|^3 + p)^2 + a, + // where p=0.5 and a=300 + cost = (0.01 + cost).powi(2); + // Penalize two consecutive dashes (not necessarily hyphens) extra. if attempt.dash && pred.line.dash { cost += CONSECUTIVE_DASH_COST; @@ -1233,13 +1272,32 @@ fn commit( } } - // Determine how much to justify each space. + // Determine how much addtional space is needed. + // The justicication_ratio is for the first step justification, + // extra_justification is for the last step. + // For more info on multi-step justification, see Procedures for Inter- + // Character Space Expansion in W3C document Chinese Layout Requirements. let fr = line.fr(); - let mut justification = Abs::zero(); - if remaining < Abs::zero() || (line.justify && fr.is_zero()) { + let mut justification_ratio = 0.0; + let mut extra_justification = Abs::zero(); + + let shrink = line.shrinkability(); + let stretch = line.stretchability(); + if remaining < Abs::zero() && shrink > Abs::zero() { + // Attempt to reduce the length of the line, using shrinkability. + justification_ratio = (remaining / shrink).max(-1.0); + remaining = (remaining + shrink).min(Abs::zero()); + } else if line.justify && fr.is_zero() { + // Attempt to increase the length of the line, using stretchability. + if stretch > Abs::zero() { + justification_ratio = (remaining / stretch).min(1.0); + remaining = (remaining - stretch).max(Abs::zero()); + } + let justifiables = line.justifiables(); - if justifiables > 0 { - justification = remaining / justifiables as f64; + if justifiables > 0 && remaining > Abs::zero() { + // Underfull line, distribute the extra space. + extra_justification = remaining / justifiables as f64; remaining = Abs::zero(); } } @@ -1275,7 +1333,7 @@ fn commit( } } Item::Text(shaped) => { - let frame = shaped.build(vt, justification); + let frame = shaped.build(vt, justification_ratio, extra_justification); push(&mut offset, frame); } Item::Frame(frame) => { diff --git a/library/src/text/shaping.rs b/library/src/text/shaping.rs index 2dd0cd6d..0e5e0a73 100644 --- a/library/src/text/shaping.rs +++ b/library/src/text/shaping.rs @@ -70,22 +70,42 @@ impl ShapedGlyph { } /// Whether the glyph is justifiable. - /// - /// Typst's basic justification strategy is to stretch all the spaces - /// in a line until the line fills the available width. However, some - /// scripts (notably Chinese and Japanese) don't use spaces. - /// - /// In Japanese typography, the convention is to insert space evenly - /// between all glyphs. I assume it's the same in Chinese. pub fn is_justifiable(&self) -> bool { - self.is_space() || is_spaceless(self.c.script()) + self.is_space() || self.is_cjk() || self.is_cjk_punctuation() + } + + pub fn is_cjk(&self) -> bool { + use Script::*; + matches!(self.c.script(), Hiragana | Katakana | Han) + } + + pub fn is_cjk_punctuation(&self) -> bool { + matches!(self.c, ',' | '。' | '、' | ':' | ';') + } + + /// The stretchability of the character. + pub fn stretchability(&self) -> Em { + let width = self.x_advance; + if self.is_space() { + // The number for spaces is from Knuth-Plass' paper + width / 2.0 + } else { + Em::zero() + } } -} -/// Does this script separate its words using spaces? -fn is_spaceless(script: Script) -> bool { - use Script::*; - matches!(script, Hiragana | Katakana | Han) + /// The shrinkability of the character. + pub fn shrinkability(&self) -> Em { + let width = self.x_advance; + if self.is_space() { + // The number for spaces is from Knuth-Plass' paper + width / 3.0 + } else if self.is_cjk_punctuation() { + width / 2.0 + } else { + Em::zero() + } + } } /// A side you can go toward. @@ -101,7 +121,12 @@ impl<'a> ShapedText<'a> { /// /// The `justification` defines how much extra advance width each /// [justifiable glyph](ShapedGlyph::is_justifiable) will get. - pub fn build(&self, vt: &Vt, justification: Abs) -> Frame { + pub fn build( + &self, + vt: &Vt, + justification_ratio: f64, + extra_justification: Abs, + ) -> Frame { let (top, bottom) = self.measure(vt); let size = Size::new(self.width, top + bottom); @@ -120,19 +145,25 @@ impl<'a> ShapedText<'a> { let pos = Point::new(offset, top + shift - y_offset.at(self.size)); let glyphs = group .iter() - .map(|glyph| Glyph { - id: glyph.glyph_id, - x_advance: glyph.x_advance - + if glyph.is_justifiable() { - frame.size_mut().x += justification; - Em::from_length(justification, self.size) - } else { - Em::zero() - }, - x_offset: glyph.x_offset, - c: glyph.c, - span: glyph.span, - offset: glyph.offset, + .map(|glyph| { + let mut justification = Em::zero(); + if justification_ratio < 0.0 { + justification += glyph.shrinkability() * justification_ratio + } else { + justification += glyph.stretchability() * justification_ratio + } + if glyph.is_justifiable() { + justification += Em::from_length(extra_justification, self.size) + } + frame.size_mut().x += justification.at(self.size); + Glyph { + id: glyph.glyph_id, + x_advance: glyph.x_advance + justification, + x_offset: glyph.x_offset, + c: glyph.c, + span: glyph.span, + offset: glyph.offset, + } }) .collect(); @@ -200,17 +231,35 @@ impl<'a> ShapedText<'a> { (top, bottom) } - /// How many justifiable glyphs the text contains. + /// How many glyphs are in the text where we can insert additional + /// space when encountering underfull lines. pub fn justifiables(&self) -> usize { self.glyphs.iter().filter(|g| g.is_justifiable()).count() } - /// The width of the spaces in the text. - pub fn stretch(&self) -> Abs { + /// Whether the last glyph is a CJK character which should not be justified + /// on line end. + pub fn cjk_justifiable_at_last(&self) -> bool { + self.glyphs + .last() + .map(|g| g.is_cjk() || g.is_cjk_punctuation()) + .unwrap_or(false) + } + + /// The stretchability of the text. + pub fn stretchability(&self) -> Abs { + self.glyphs + .iter() + .map(|g| g.stretchability()) + .sum::<Em>() + .at(self.size) + } + + /// The shrinkability of the text + pub fn shrinkability(&self) -> Abs { self.glyphs .iter() - .filter(|g| g.is_justifiable()) - .map(|g| g.x_advance) + .map(|g| g.shrinkability()) .sum::<Em>() .at(self.size) } |
