summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2024-09-17 17:35:56 +0200
committerGitHub <noreply@github.com>2024-09-17 15:35:56 +0000
commitab8295c07dd1883f9c19d3e075e7a4971fec0de0 (patch)
treeb451ae3577f50a8d9471deea6a1dd1789a9b9b9c
parent0abd46c3796e18e997e26f94837c76dc446036d0 (diff)
Fix repetition of Thai characters (#4977)
Co-authored-by: Martin Haug <mhaug@live.de>
-rw-r--r--crates/typst/src/layout/inline/shaping.rs21
-rw-r--r--tests/ref/issue-4468-linebreak-thai.pngbin0 -> 247 bytes
-rw-r--r--tests/ref/linebreak-thai.pngbin6968 -> 6877 bytes
-rw-r--r--tests/suite/layout/inline/linebreak.typ8
4 files changed, 18 insertions, 11 deletions
diff --git a/crates/typst/src/layout/inline/shaping.rs b/crates/typst/src/layout/inline/shaping.rs
index 43dc351a..0b4a17d7 100644
--- a/crates/typst/src/layout/inline/shaping.rs
+++ b/crates/typst/src/layout/inline/shaping.rs
@@ -515,14 +515,14 @@ impl<'a> ShapedText<'a> {
std::mem::swap(&mut start, &mut end);
}
- let left = self.find_safe_to_break(start, Side::Left)?;
- let right = self.find_safe_to_break(end, Side::Right)?;
+ let left = self.find_safe_to_break(start)?;
+ let right = self.find_safe_to_break(end)?;
Some(&self.glyphs[left..right])
}
/// Find the glyph offset matching the text index that is most towards the
- /// given side and safe-to-break.
- fn find_safe_to_break(&self, text_index: usize, towards: Side) -> Option<usize> {
+ /// start of the text and safe-to-break.
+ fn find_safe_to_break(&self, text_index: usize) -> Option<usize> {
let ltr = self.dir.is_positive();
// Handle edge cases.
@@ -542,6 +542,7 @@ impl<'a> ShapedText<'a> {
ordering.reverse()
}
});
+
let mut idx = match found {
Ok(idx) => idx,
Err(idx) => {
@@ -565,13 +566,11 @@ impl<'a> ShapedText<'a> {
}
};
- let next = match towards {
- Side::Left => usize::checked_sub,
- Side::Right => usize::checked_add,
- };
-
- // Search for the outermost glyph with the text index.
- while let Some(next) = next(idx, 1) {
+ // Search for the start-most glyph with the text index. This means
+ // we take empty range glyphs at the start and leave those at the end
+ // for the next line.
+ let dec = if ltr { usize::checked_sub } else { usize::checked_add };
+ while let Some(next) = dec(idx, 1) {
if self.glyphs.get(next).map_or(true, |g| g.range.start != text_index) {
break;
}
diff --git a/tests/ref/issue-4468-linebreak-thai.png b/tests/ref/issue-4468-linebreak-thai.png
new file mode 100644
index 00000000..6257c560
--- /dev/null
+++ b/tests/ref/issue-4468-linebreak-thai.png
Binary files differ
diff --git a/tests/ref/linebreak-thai.png b/tests/ref/linebreak-thai.png
index 8053a212..c31a61bd 100644
--- a/tests/ref/linebreak-thai.png
+++ b/tests/ref/linebreak-thai.png
Binary files differ
diff --git a/tests/suite/layout/inline/linebreak.typ b/tests/suite/layout/inline/linebreak.typ
index 7e959352..8371d76e 100644
--- a/tests/suite/layout/inline/linebreak.typ
+++ b/tests/suite/layout/inline/linebreak.typ
@@ -115,3 +115,11 @@ For info see #link("https://myhost.tld").
// for links because it now splits on word boundaries. We avoid the link markup
// syntax because it's show rule interferes.
#"http://creativecommons.org/licenses/by-nc-sa/4.0/"
+
+--- issue-4468-linebreak-thai ---
+// In this bug, empty-range glyphs at line break boundaries could be duplicated.
+// This happens for Thai specifically because it has both
+// - line break opportunities
+// - shaping that results in multiple glyphs in the same cluster
+#set text(font: "Noto Sans Thai")
+#h(85pt) งบิก