diff options
| author | Laurenz <laurmaedje@gmail.com> | 2023-05-03 10:33:18 +0200 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2023-05-03 10:33:18 +0200 |
| commit | ad347632ab95e29eb5180b27142f5c264dfc611a (patch) | |
| tree | 2742a33f4c3d800a86e977de04fa2cec7104c43f /library | |
| parent | bcc014c4e177cc4e8cf5ca8c24990908b507c0f8 (diff) | |
Make ligatures copyable and searchable
Fixes #479
Fixes #1040
Diffstat (limited to 'library')
| -rw-r--r-- | library/Cargo.toml | 1 | ||||
| -rw-r--r-- | library/src/layout/par.rs | 6 | ||||
| -rw-r--r-- | library/src/math/fragment.rs | 6 | ||||
| -rw-r--r-- | library/src/text/shaping.rs | 169 |
4 files changed, 93 insertions, 89 deletions
diff --git a/library/Cargo.toml b/library/Cargo.toml index 033058f3..499170cb 100644 --- a/library/Cargo.toml +++ b/library/Cargo.toml @@ -16,6 +16,7 @@ bench = false [dependencies] typst = { path = ".." } +az = "1.2" chinese-number = { version = "0.7.2", default-features = false, features = ["number-to-chinese"] } comemo = "0.2.2" csv = "1" diff --git a/library/src/layout/par.rs b/library/src/layout/par.rs index a6ad647b..0c3a9a3c 100644 --- a/library/src/layout/par.rs +++ b/library/src/layout/par.rs @@ -1139,8 +1139,7 @@ fn line<'a>( // are no other items in the line. if hyphen || start + shaped.text.len() > range.end { if hyphen || start < range.end || before.is_empty() { - let shifted = start - base..range.end - base; - let mut reshaped = shaped.reshape(vt, &p.spans, shifted); + let mut reshaped = shaped.reshape(vt, &p.spans, start..range.end); if hyphen || shy { reshaped.push_hyphen(vt); } @@ -1162,8 +1161,7 @@ fn line<'a>( // Reshape if necessary. if range.start + shaped.text.len() > end { if range.start < end { - let shifted = range.start - base..end - base; - let reshaped = shaped.reshape(vt, &p.spans, shifted); + let reshaped = shaped.reshape(vt, &p.spans, range.start..end); width += reshaped.width; first = Some(Item::Text(reshaped)); } diff --git a/library/src/math/fragment.rs b/library/src/math/fragment.rs index b0991630..40dca347 100644 --- a/library/src/math/fragment.rs +++ b/library/src/math/fragment.rs @@ -222,13 +222,13 @@ impl GlyphFragment { size: self.font_size, fill: self.fill, lang: self.lang, + text: self.c.into(), glyphs: vec![Glyph { id: self.id.0, - c: self.c, x_advance: Em::from_length(self.width, self.font_size), x_offset: Em::zero(), - span: self.span, - offset: 0, + range: 0..self.c.len_utf8() as u16, + span: (self.span, 0), }], }; let size = Size::new(self.width, self.ascent + self.descent); diff --git a/library/src/text/shaping.rs b/library/src/text/shaping.rs index 1e1ccc99..7d5703bc 100644 --- a/library/src/text/shaping.rs +++ b/library/src/text/shaping.rs @@ -1,6 +1,7 @@ use std::ops::Range; use std::str::FromStr; +use az::SaturatingAs; use rustybuzz::{Feature, Tag, UnicodeBuffer}; use typst::font::{Font, FontVariant}; use typst::util::SliceExt; @@ -47,20 +48,18 @@ pub struct ShapedGlyph { pub x_offset: Em, /// The vertical offset of the glyph. pub y_offset: Em, - /// The byte index in the source text where this glyph's cluster starts. A - /// cluster is a sequence of one or multiple glyphs that cannot be - /// separated and must always be treated as a union. - pub cluster: usize, + /// The byte range of this glyph's cluster in the full paragraph. A cluster + /// is a sequence of one or multiple glyphs that cannot be separated and + /// must always be treated as a union. + pub range: Range<usize>, /// Whether splitting the shaping result before this glyph would yield the /// same results as shaping the parts to both sides of `text_index` /// separately. pub safe_to_break: bool, /// The first char in this glyph's cluster. pub c: char, - /// The source code location of the text. - pub span: Span, - /// The offset within the spanned text. - pub offset: u16, + /// The source code location of the glyph and its byte offset within it. + pub span: (Span, u16), } #[derive(Debug, Clone, Default)] @@ -181,6 +180,12 @@ impl<'a> ShapedText<'a> { for ((font, y_offset), group) in self.glyphs.as_ref().group_by_key(|g| (g.font.clone(), g.y_offset)) { + let mut range = group[0].range.clone(); + for glyph in group { + range.start = range.start.min(glyph.range.start); + range.end = range.end.max(glyph.range.end); + } + let pos = Point::new(offset, top + shift - y_offset.at(self.size)); let glyphs = group .iter() @@ -195,8 +200,8 @@ impl<'a> ShapedText<'a> { } else { glyph.stretchability().1 }; - let justification_left = adjustability_left * justification_ratio; + let justification_left = adjustability_left * justification_ratio; let mut justification_right = adjustability_right * justification_ratio; if glyph.is_justifiable() { @@ -206,15 +211,16 @@ impl<'a> ShapedText<'a> { frame.size_mut().x += justification_left.at(self.size) + justification_right.at(self.size); + Glyph { id: glyph.glyph_id, x_advance: glyph.x_advance + justification_left + justification_right, x_offset: glyph.x_offset + justification_left, - c: glyph.c, + range: (glyph.range.start - range.start).saturating_as() + ..(glyph.range.end - range.start).saturating_as(), span: glyph.span, - offset: glyph.offset, } }) .collect(); @@ -224,6 +230,7 @@ impl<'a> ShapedText<'a> { size: self.size, lang, fill: fill.clone(), + text: self.text[range.start - self.base..range.end - self.base].into(), glyphs, }; @@ -318,16 +325,19 @@ impl<'a> ShapedText<'a> { /// Reshape a range of the shaped text, reusing information from this /// shaping process if possible. + /// + /// The text `range` is relative to the whole paragraph. pub fn reshape( &'a self, vt: &Vt, spans: &SpanMapper, text_range: Range<usize>, ) -> ShapedText<'a> { + let text = &self.text[text_range.start - self.base..text_range.end - self.base]; if let Some(glyphs) = self.slice_safe_to_break(text_range.clone()) { Self { - base: self.base + text_range.start, - text: &self.text[text_range], + base: text_range.start, + text, dir: self.dir, styles: self.styles, size: self.size, @@ -336,14 +346,7 @@ impl<'a> ShapedText<'a> { glyphs: Cow::Borrowed(glyphs), } } else { - shape( - vt, - self.base + text_range.start, - &self.text[text_range], - spans, - self.styles, - self.dir, - ) + shape(vt, text_range.start, text, spans, self.styles, self.dir) } } @@ -358,7 +361,11 @@ impl<'a> ShapedText<'a> { let ttf = font.ttf(); let glyph_id = ttf.glyph_index('-')?; let x_advance = font.to_em(ttf.glyph_hor_advance(glyph_id)?); - let cluster = self.glyphs.last().map(|g| g.cluster).unwrap_or_default(); + let range = self + .glyphs + .last() + .map(|g| g.range.end..g.range.end) + .unwrap_or_default(); self.width += x_advance.at(self.size); self.glyphs.to_mut().push(ShapedGlyph { font, @@ -366,11 +373,10 @@ impl<'a> ShapedText<'a> { x_advance, x_offset: Em::zero(), y_offset: Em::zero(), - cluster, + range, safe_to_break: true, c: '-', - span: Span::detached(), - offset: 0, + span: (Span::detached(), 0), }); Some(()) }); @@ -396,9 +402,9 @@ impl<'a> ShapedText<'a> { // Handle edge cases. let len = self.glyphs.len(); - if text_index == 0 { + if text_index == self.base { return Some(if ltr { 0 } else { len }); - } else if text_index == self.text.len() { + } else if text_index == self.base + self.text.len() { return Some(if ltr { len } else { 0 }); } @@ -406,7 +412,7 @@ impl<'a> ShapedText<'a> { let mut idx = self .glyphs .binary_search_by(|g| { - let ordering = g.cluster.cmp(&text_index); + let ordering = g.range.start.cmp(&text_index); if ltr { ordering } else { @@ -422,7 +428,7 @@ impl<'a> ShapedText<'a> { // Search for the outermost glyph with the text index. while let Some(next) = next(idx, 1) { - if self.glyphs.get(next).map_or(true, |g| g.cluster != text_index) { + if self.glyphs.get(next).map_or(true, |g| g.range.start != text_index) { break; } idx = next; @@ -444,7 +450,6 @@ impl Debug for ShapedText<'_> { /// Holds shaping results and metadata common to all shaped segments. struct ShapingContext<'a> { vt: &'a Vt<'a>, - base: usize, spans: &'a SpanMapper, glyphs: Vec<ShapedGlyph>, used: Vec<Font>, @@ -468,7 +473,6 @@ pub fn shape<'a>( let size = TextElem::size_in(styles); let mut ctx = ShapingContext { vt, - base, spans, size, glyphs: vec![], @@ -481,7 +485,7 @@ pub fn shape<'a>( }; if !text.is_empty() { - shape_segment(&mut ctx, 0, text, families(styles)); + shape_segment(&mut ctx, base, text, families(styles)); } track_and_space(&mut ctx); @@ -552,6 +556,7 @@ fn shape_segment( let buffer = rustybuzz::shape(font.rusty(), &ctx.tags, buffer); let infos = buffer.glyph_infos(); let pos = buffer.glyph_positions(); + let ltr = ctx.dir.is_positive(); // Collect the shaped glyphs, doing fallback and shaping parts again with // the next font if necessary. @@ -560,68 +565,66 @@ fn shape_segment( let info = &infos[i]; let cluster = info.cluster as usize; + // Add the glyph to the shaped output. if info.glyph_id != 0 { - // Add the glyph to the shaped output. - // TODO: Don't ignore y_advance. - let (span, offset) = ctx.spans.span_at(ctx.base + cluster); + // Determine the text range of the glyph. + let start = base + cluster; + let end = base + + if ltr { i.checked_add(1) } else { i.checked_sub(1) } + .and_then(|last| infos.get(last)) + .map_or(text.len(), |info| info.cluster as usize); + ctx.glyphs.push(ShapedGlyph { font: font.clone(), glyph_id: info.glyph_id as u16, + // TODO: Don't ignore y_advance. x_advance: font.to_em(pos[i].x_advance), x_offset: font.to_em(pos[i].x_offset), y_offset: font.to_em(pos[i].y_offset), - cluster: base + cluster, + range: start..end, safe_to_break: !info.unsafe_to_break(), c: text[cluster..].chars().next().unwrap(), - span, - offset, + span: ctx.spans.span_at(start), }); } else { - // Determine the source text range for the tofu sequence. - let range = { - // First, search for the end of the tofu sequence. - let k = i; - while infos.get(i + 1).map_or(false, |info| info.glyph_id == 0) { - i += 1; - } - - // Then, determine the start and end text index. - // - // Examples: - // Everything is shown in visual order. Tofus are written as "_". - // We want to find out that the tofus span the text `2..6`. - // Note that the clusters are longer than 1 char. - // - // Left-to-right: - // Text: h a l i h a l l o - // Glyphs: A _ _ C E - // Clusters: 0 2 4 6 8 - // k=1 i=2 - // - // Right-to-left: - // Text: O L L A H I L A H - // Glyphs: E C _ _ A - // Clusters: 8 6 4 2 0 - // k=2 i=3 - let ltr = ctx.dir.is_positive(); - let first = if ltr { k } else { i }; - let start = infos[first].cluster as usize; - let last = if ltr { i.checked_add(1) } else { k.checked_sub(1) }; - let end = last - .and_then(|last| infos.get(last)) - .map_or(text.len(), |info| info.cluster as usize); + // First, search for the end of the tofu sequence. + let k = i; + while infos.get(i + 1).map_or(false, |info| info.glyph_id == 0) { + i += 1; + } - start..end - }; + // Then, determine the start and end text index for the tofu + // sequence. + // + // Examples: + // Everything is shown in visual order. Tofus are written as "_". + // We want to find out that the tofus span the text `2..6`. + // Note that the clusters are longer than 1 char. + // + // Left-to-right: + // Text: h a l i h a l l o + // Glyphs: A _ _ C E + // Clusters: 0 2 4 6 8 + // k=1 i=2 + // + // Right-to-left: + // Text: O L L A H I L A H + // Glyphs: E C _ _ A + // Clusters: 8 6 4 2 0 + // k=2 i=3 + let start = infos[if ltr { k } else { i }].cluster as usize; + let end = if ltr { i.checked_add(1) } else { k.checked_sub(1) } + .and_then(|last| infos.get(last)) + .map_or(text.len(), |info| info.cluster as usize); // Trim half-baked cluster. - let remove = base + range.start..base + range.end; - while ctx.glyphs.last().map_or(false, |g| remove.contains(&g.cluster)) { + let remove = base + start..base + end; + while ctx.glyphs.last().map_or(false, |g| remove.contains(&g.range.start)) { ctx.glyphs.pop(); } // Recursively shape the tofu sequence with the next family. - shape_segment(ctx, base + range.start, &text[range], families.clone()); + shape_segment(ctx, base + start, &text[start..end], families.clone()); } i += 1; @@ -634,19 +637,18 @@ fn shape_segment( fn shape_tofus(ctx: &mut ShapingContext, base: usize, text: &str, font: Font) { let x_advance = font.advance(0).unwrap_or_default(); for (cluster, c) in text.char_indices() { - let cluster = base + cluster; - let (span, offset) = ctx.spans.span_at(ctx.base + cluster); + let start = base + cluster; + let end = start + c.len_utf8(); ctx.glyphs.push(ShapedGlyph { font: font.clone(), glyph_id: 0, x_advance, x_offset: Em::zero(), y_offset: Em::zero(), - cluster, + range: start..end, safe_to_break: true, c, - span, - offset, + span: ctx.spans.span_at(start), }); } } @@ -668,7 +670,10 @@ fn track_and_space(ctx: &mut ShapingContext) { glyph.x_advance = spacing.relative_to(glyph.x_advance); } - if glyphs.peek().map_or(false, |next| glyph.cluster != next.cluster) { + if glyphs + .peek() + .map_or(false, |next| glyph.range.start != next.range.start) + { glyph.x_advance += tracking; } } |
