summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2024-07-04 15:27:43 +0200
committerGitHub <noreply@github.com>2024-07-04 13:27:43 +0000
commit129a4d600c1860beb7ec2ae52a9186088f6f043d (patch)
tree8b510cdba4160f37103401bd6dc3fed94d7bbf19
parent0ef672c347f368325313c8bccc4f70e3f1016b0a (diff)
Fix hyphenation outside of words (#4498)
-rw-r--r--crates/typst/src/layout/inline/linebreak.rs88
-rw-r--r--tests/ref/hyphenate-outside-of-words.pngbin0 -> 1011 bytes
-rw-r--r--tests/suite/layout/inline/hyphenate.typ10
3 files changed, 57 insertions, 41 deletions
diff --git a/crates/typst/src/layout/inline/linebreak.rs b/crates/typst/src/layout/inline/linebreak.rs
index dbaa9c59..9deaa92a 100644
--- a/crates/typst/src/layout/inline/linebreak.rs
+++ b/crates/typst/src/layout/inline/linebreak.rs
@@ -1,6 +1,6 @@
use std::ops::{Add, Sub};
-use icu_properties::maps::CodePointMapData;
+use icu_properties::maps::{CodePointMapData, CodePointMapDataBorrowed};
use icu_properties::sets::CodePointSetData;
use icu_properties::LineBreak;
use icu_provider::AsDeserializingBufferProvider;
@@ -8,6 +8,7 @@ use icu_provider_adapters::fork::ForkByKeyProvider;
use icu_provider_blob::BlobDataProvider;
use icu_segmenter::LineSegmenter;
use once_cell::sync::Lazy;
+use unicode_segmentation::UnicodeSegmentation;
use super::*;
use crate::engine::Engine;
@@ -630,7 +631,7 @@ fn raw_cost(
/// This is an internal instead of an external iterator because it makes the
/// code much simpler and the consumers of this function don't need the
/// composability and flexibility of external iteration anyway.
-fn breakpoints<'a>(p: &'a Preparation<'a>, mut f: impl FnMut(usize, Breakpoint)) {
+fn breakpoints(p: &Preparation, mut f: impl FnMut(usize, Breakpoint)) {
let text = p.text;
// Single breakpoint at the end for empty text.
@@ -661,7 +662,7 @@ fn breakpoints<'a>(p: &'a Preparation<'a>, mut f: impl FnMut(usize, Breakpoint))
}
}
- // Get the UAX #14 linebreak opportunities.
+ // Get the next UAX #14 linebreak opportunity.
let Some(point) = iter.next() else { break };
// Skip breakpoint if there is no char before it. icu4x generates one
@@ -686,46 +687,13 @@ fn breakpoints<'a>(p: &'a Preparation<'a>, mut f: impl FnMut(usize, Breakpoint))
};
// Hyphenate between the last and current breakpoint.
- 'hyphenate: {
- if !hyphenate {
- break 'hyphenate;
- }
-
- // Extract a hyphenatable "word".
- let word = &text[last..point].trim_end_matches(|c: char| !c.is_alphabetic());
- if word.is_empty() {
- break 'hyphenate;
- }
-
- let end = last + word.len();
+ if hyphenate {
let mut offset = last;
-
- // Determine the language to hyphenate this word in.
- let Some(lang) = lang_at(p, last) else { break 'hyphenate };
-
- for syllable in hypher::hyphenate(word, lang) {
- // Don't hyphenate after the final syllable.
- offset += syllable.len();
- if offset == end {
- continue;
+ for segment in text[last..point].split_word_bounds() {
+ if !segment.is_empty() && segment.chars().all(char::is_alphabetic) {
+ hyphenations(p, &lb, offset, segment, &mut f);
}
-
- // Filter out hyphenation opportunities where hyphenation was
- // actually disabled.
- if !hyphenate_at(p, offset) {
- continue;
- }
-
- // Filter out forbidden hyphenation opportunities.
- if matches!(
- syllable.chars().next_back().map(|c| lb.get(c)),
- Some(LineBreak::Glue | LineBreak::WordJoiner | LineBreak::ZWJ)
- ) {
- continue;
- }
-
- // Call `f` for the word-internal hyphenation opportunity.
- f(offset, Breakpoint::Hyphen);
+ offset += segment.len();
}
}
@@ -736,6 +704,44 @@ fn breakpoints<'a>(p: &'a Preparation<'a>, mut f: impl FnMut(usize, Breakpoint))
}
}
+/// Generate breakpoints for hyphenations within a word.
+fn hyphenations(
+ p: &Preparation,
+ lb: &CodePointMapDataBorrowed<LineBreak>,
+ mut offset: usize,
+ word: &str,
+ mut f: impl FnMut(usize, Breakpoint),
+) {
+ let Some(lang) = lang_at(p, offset) else { return };
+ let end = offset + word.len();
+
+ for syllable in hypher::hyphenate(word, lang) {
+ offset += syllable.len();
+
+ // Don't hyphenate after the final syllable.
+ if offset == end {
+ continue;
+ }
+
+ // Filter out hyphenation opportunities where hyphenation was actually
+ // disabled.
+ if !hyphenate_at(p, offset) {
+ continue;
+ }
+
+ // Filter out forbidden hyphenation opportunities.
+ if matches!(
+ syllable.chars().next_back().map(|c| lb.get(c)),
+ Some(LineBreak::Glue | LineBreak::WordJoiner | LineBreak::ZWJ)
+ ) {
+ continue;
+ }
+
+ // Call `f` for the word-internal hyphenation opportunity.
+ f(offset, Breakpoint::Hyphen);
+ }
+}
+
/// Produce linebreak opportunities for a link.
fn linebreak_link(link: &str, mut f: impl FnMut(usize)) {
#[derive(PartialEq)]
diff --git a/tests/ref/hyphenate-outside-of-words.png b/tests/ref/hyphenate-outside-of-words.png
new file mode 100644
index 00000000..57b11ed8
--- /dev/null
+++ b/tests/ref/hyphenate-outside-of-words.png
Binary files differ
diff --git a/tests/suite/layout/inline/hyphenate.typ b/tests/suite/layout/inline/hyphenate.typ
index c366b38f..debce1da 100644
--- a/tests/suite/layout/inline/hyphenate.typ
+++ b/tests/suite/layout/inline/hyphenate.typ
@@ -50,6 +50,16 @@ It's a #emph[Tree]beard.
#set text(hyphenate: true)
#h(6pt) networks, the rest.
+--- hyphenate-outside-of-words ---
+// More tests for hyphenation of non-words.
+#set text(hyphenate: true)
+#block(width: 0pt, "doesn't")
+#block(width: 0pt, "(OneNote)")
+#block(width: 0pt, "(present)")
+
+#set text(lang: "de")
+#block(width: 0pt, "(bzw.)")
+
--- hyphenate-pt-repeat-hyphen-natural-word-breaking ---
// The word breaker naturally breaks arco-da-velha at arco-/-da-velha,
// so we shall repeat the hyphen, even that hyphenate is set to false.