summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Biedert <github@ericbiedert.de>2023-10-22 13:36:33 +0200
committerGitHub <noreply@github.com>2023-10-22 13:36:33 +0200
commit3ed6462ee0a63eb3946ba48dc82b525f7c5c1079 (patch)
treeef8b8950441b47cf93a4c83a59e7063efbcd7196
parentaaac1dbd683501e81bef4d961ed90c6c869030e4 (diff)
Don't hyphenate on no-break characters (#2396)
-rw-r--r--crates/typst-library/src/layout/par.rs26
1 files changed, 18 insertions, 8 deletions
diff --git a/crates/typst-library/src/layout/par.rs b/crates/typst-library/src/layout/par.rs
index 9542e37c..5d21c8e9 100644
--- a/crates/typst-library/src/layout/par.rs
+++ b/crates/typst-library/src/layout/par.rs
@@ -1109,7 +1109,7 @@ static SEGMENTER: Lazy<LineSegmenter> = Lazy::new(|| {
LineSegmenter::try_new_lstm_with_buffer_provider(&provider).unwrap()
});
-/// The Unicode line break properties for each code point.
+/// The line break segmenter for Chinese/Japanese text.
static CJ_SEGMENTER: Lazy<LineSegmenter> = Lazy::new(|| {
let provider = BlobDataProvider::try_new_from_static_blob(ICU_DATA).unwrap();
let cj_blob = BlobDataProvider::try_new_from_static_blob(CJ_LINEBREAK_DATA).unwrap();
@@ -1117,7 +1117,7 @@ static CJ_SEGMENTER: Lazy<LineSegmenter> = Lazy::new(|| {
LineSegmenter::try_new_lstm_with_buffer_provider(&cj_provider).unwrap()
});
-/// The line break segmenter for Chinese/Jpanese text.
+/// The Unicode line break properties for each code point.
static LINEBREAK_DATA: Lazy<CodePointMapData<LineBreak>> = Lazy::new(|| {
let provider = BlobDataProvider::try_new_from_static_blob(ICU_DATA).unwrap();
let deser_provider = provider.as_deserializing();
@@ -1170,6 +1170,8 @@ impl Iterator for Breakpoints<'_> {
type Item = (usize, bool, bool);
fn next(&mut self) -> Option<Self::Item> {
+ let lb = LINEBREAK_DATA.as_borrowed();
+
// If we're currently in a hyphenated "word", process the next syllable.
if let Some(syllable) = self.syllables.as_mut().and_then(Iterator::next) {
self.offset += syllable.len();
@@ -1177,18 +1179,26 @@ impl Iterator for Breakpoints<'_> {
self.offset = self.end;
}
- // Filter out hyphenation opportunities where hyphenation was
- // actually disabled.
let hyphen = self.offset < self.end;
- if hyphen && !self.hyphenate(self.offset) {
- return self.next();
+ if hyphen {
+ // Filter out hyphenation opportunities where hyphenation was
+ // actually disabled.
+ if !self.hyphenate(self.offset) {
+ return self.next();
+ }
+
+ // Filter out forbidden hyphenation opportunities.
+ if matches!(
+ syllable.chars().last().map(|c| lb.get(c)),
+ Some(LineBreak::Glue | LineBreak::WordJoiner | LineBreak::ZWJ)
+ ) {
+ return self.next();
+ }
}
return Some((self.offset, self.mandatory && !hyphen, hyphen));
}
- let lb = LINEBREAK_DATA.as_borrowed();
-
loop {
// Get the next "word".
self.end = self.linebreaks.next()?;