summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2022-03-08 19:49:26 +0100
committerLaurenz <laurmaedje@gmail.com>2022-03-08 19:49:26 +0100
commitb71113d37a29bab5c7dc4b501c33ee9afbdb8213 (patch)
tree9cab01b9e7f5930651d587473dc24468a4a5eaa1
parentd24c7030d819fd6c6e8e73663ddfe42047cca397 (diff)
Hyphenation
-rw-r--r--Cargo.lock7
-rw-r--r--Cargo.toml1
-rw-r--r--src/library/text/mod.rs2
-rw-r--r--src/library/text/par.rs76
-rw-r--r--src/library/text/shaping.rs31
-rw-r--r--tests/ref/text/hyphenate.pngbin0 -> 4979 bytes
-rw-r--r--tests/ref/text/justify.pngbin11615 -> 11709 bytes
-rw-r--r--tests/typ/text/hyphenate.typ14
-rw-r--r--tests/typ/text/justify.typ11
9 files changed, 128 insertions, 14 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 4a6d4f0e..b4fd7b72 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -241,6 +241,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
[[package]]
+name = "hypher"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d29349e08e99b98d0e16a0ca738d181d5c73431a9a46b78918318c4bc9b10106"
+
+[[package]]
name = "iai"
version = "0.1.1"
source = "git+https://github.com/reknih/iai#3f0f92736408ebce6545808b98e0cb2aea89b7dd"
@@ -806,6 +812,7 @@ dependencies = [
"either",
"flate2",
"fxhash",
+ "hypher",
"iai",
"image",
"kurbo",
diff --git a/Cargo.toml b/Cargo.toml
index 3971b971..ed265b1a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,6 +22,7 @@ serde = { version = "1", features = ["derive"] }
typed-arena = "2"
# Text and font handling
+hypher = "0.1"
kurbo = "0.8"
ttf-parser = "0.12"
rustybuzz = "0.4"
diff --git a/src/library/text/mod.rs b/src/library/text/mod.rs
index 0df59007..1ce3518c 100644
--- a/src/library/text/mod.rs
+++ b/src/library/text/mod.rs
@@ -1,3 +1,5 @@
+//! Text shaping and paragraph layout.
+
mod deco;
mod link;
mod par;
diff --git a/src/library/text/par.rs b/src/library/text/par.rs
index 812231c2..70cac1be 100644
--- a/src/library/text/par.rs
+++ b/src/library/text/par.rs
@@ -27,12 +27,17 @@ pub enum ParChild {
#[class]
impl ParNode {
+ /// An ISO 639-1 language code.
+ pub const LANG: Option<EcoString> = None;
/// The direction for text and inline objects.
pub const DIR: Dir = Dir::LTR;
/// How to align text and inline objects in their line.
pub const ALIGN: Align = Align::Left;
/// Whether to justify text in its line.
pub const JUSTIFY: bool = false;
+ /// Whether to hyphenate text to improve line breaking. When `auto`, words
+ /// will will be hyphenated if and only if justification is enabled.
+ pub const HYPHENATE: Smart<bool> = Smart::Auto;
/// The spacing between lines (dependent on scaled font size).
pub const LEADING: Linear = Relative::new(0.65).into();
/// The extra spacing between paragraphs (dependent on scaled font size).
@@ -49,13 +54,14 @@ impl ParNode {
}
fn set(args: &mut Args, styles: &mut StyleMap) -> TypResult<()> {
+ let lang = args.named::<Option<EcoString>>("lang")?;
+
let mut dir =
- args.named("lang")?
- .map(|iso: EcoString| match iso.to_lowercase().as_str() {
- "ar" | "he" | "fa" | "ur" | "ps" | "yi" => Dir::RTL,
- "en" | "fr" | "de" => Dir::LTR,
- _ => Dir::LTR,
- });
+ lang.clone().flatten().map(|iso| match iso.to_lowercase().as_str() {
+ "ar" | "dv" | "fa" | "he" | "ks" | "pa" | "ps" | "sd" | "ug" | "ur"
+ | "yi" => Dir::RTL,
+ _ => Dir::LTR,
+ });
if let Some(Spanned { v, span }) = args.named::<Spanned<Dir>>("dir")? {
if v.axis() != SpecAxis::Horizontal {
@@ -74,9 +80,11 @@ impl ParNode {
dir.map(|dir| dir.start().into())
};
+ styles.set_opt(Self::LANG, lang);
styles.set_opt(Self::DIR, dir);
styles.set_opt(Self::ALIGN, align);
styles.set_opt(Self::JUSTIFY, args.named("justify")?);
+ styles.set_opt(Self::HYPHENATE, args.named("hyphenate")?);
styles.set_opt(Self::LEADING, args.named("leading")?);
styles.set_opt(Self::SPACING, args.named("spacing")?);
styles.set_opt(Self::INDENT, args.named("indent")?);
@@ -137,7 +145,7 @@ impl Layout for ParNode {
let par = ParLayout::new(ctx, self, bidi, regions, &styles)?;
// Break the paragraph into lines.
- let lines = break_into_lines(&mut ctx.fonts, &par, regions.first.x);
+ let lines = break_into_lines(&mut ctx.fonts, &par, regions.first.x, styles);
// Stack the lines into one frame per region.
Ok(stack_lines(&ctx.fonts, lines, regions, styles))
@@ -278,6 +286,7 @@ impl<'a> ParLayout<'a> {
fonts: &mut FontStore,
mut range: Range,
mandatory: bool,
+ hyphen: bool,
) -> LineLayout<'a> {
// Find the items which bound the text range.
let last_idx = self.find(range.end.saturating_sub(1)).unwrap();
@@ -308,7 +317,10 @@ impl<'a> ParLayout<'a> {
// empty string.
if !shifted.is_empty() || rest.is_empty() {
// Reshape that part.
- let reshaped = shaped.reshape(fonts, shifted);
+ let mut reshaped = shaped.reshape(fonts, shifted);
+ if hyphen {
+ reshaped.push_hyphen(fonts);
+ }
last = Some(ParItem::Text(reshaped));
}
@@ -524,6 +536,7 @@ fn break_into_lines<'a>(
fonts: &mut FontStore,
par: &'a ParLayout<'a>,
width: Length,
+ styles: StyleChain,
) -> Vec<LineLayout<'a>> {
// The already determined lines and the current line attempt.
let mut lines = vec![];
@@ -531,9 +544,9 @@ fn break_into_lines<'a>(
let mut last = None;
// Find suitable line breaks.
- for (end, mandatory) in LineBreakIterator::new(&par.bidi.text) {
+ for (end, mandatory, hyphen) in breakpoints(&par.bidi.text, styles) {
// Compute the line and its size.
- let mut line = par.line(fonts, start .. end, mandatory);
+ let mut line = par.line(fonts, start .. end, mandatory, hyphen);
// If the line doesn't fit anymore, we push the last fitting attempt
// into the stack and rebuild the line from its end. The resulting
@@ -542,7 +555,7 @@ fn break_into_lines<'a>(
if let Some((last_line, last_end)) = last.take() {
lines.push(last_line);
start = last_end;
- line = par.line(fonts, start .. end, mandatory);
+ line = par.line(fonts, start .. end, mandatory, hyphen);
}
}
@@ -565,6 +578,47 @@ fn break_into_lines<'a>(
lines
}
+/// Determine all possible points in the text where lines can broken.
+fn breakpoints<'a>(
+ text: &'a str,
+ styles: StyleChain,
+) -> impl Iterator<Item = (usize, bool, bool)> + 'a {
+ let mut lang = None;
+ if styles.get(ParNode::HYPHENATE).unwrap_or(styles.get(ParNode::JUSTIFY)) {
+ lang = styles
+ .get_ref(ParNode::LANG)
+ .as_ref()
+ .and_then(|iso| iso.as_bytes().try_into().ok())
+ .and_then(hypher::Lang::from_iso);
+ }
+
+ let breaks = LineBreakIterator::new(text);
+ let mut last = 0;
+
+ if let Some(lang) = lang {
+ Either::Left(breaks.flat_map(move |(end, mandatory)| {
+ let word = &text[last .. end];
+ let trimmed = word.trim_end_matches(|c: char| !c.is_alphabetic());
+ let suffix = last + trimmed.len();
+ let mut start = std::mem::replace(&mut last, end);
+ if trimmed.is_empty() {
+ Either::Left([(end, mandatory, false)].into_iter())
+ } else {
+ Either::Right(hypher::hyphenate(trimmed, lang).map(move |syllable| {
+ start += syllable.len();
+ if start == suffix {
+ start = end;
+ }
+ let hyphen = start < end;
+ (start, mandatory && !hyphen, hyphen)
+ }))
+ }
+ }))
+ } else {
+ Either::Right(breaks.map(|(e, m)| (e, m, false)))
+ }
+}
+
/// Combine the lines into one frame per region.
fn stack_lines(
fonts: &FontStore,
diff --git a/src/library/text/shaping.rs b/src/library/text/shaping.rs
index 26c8daf3..b467abf7 100644
--- a/src/library/text/shaping.rs
+++ b/src/library/text/shaping.rs
@@ -135,6 +135,34 @@ impl<'a> ShapedText<'a> {
}
}
+ /// Push a hyphen to end of the text.
+ pub fn push_hyphen(&mut self, fonts: &mut FontStore) {
+ // When there are no glyphs, we just use the vertical metrics of the
+ // first available font.
+ let size = self.styles.get(TextNode::SIZE).abs;
+ let variant = variant(self.styles);
+ families(self.styles).find_map(|family| {
+ // Allow hyphens to overhang a bit.
+ const INSET: f64 = 0.4;
+ let face_id = fonts.select(family, variant)?;
+ let face = fonts.get(face_id);
+ let ttf = face.ttf();
+ let glyph_id = ttf.glyph_index('-')?;
+ let x_advance = face.to_em(ttf.glyph_hor_advance(glyph_id)?);
+ self.size.x += INSET * x_advance.resolve(size);
+ self.glyphs.to_mut().push(ShapedGlyph {
+ face_id,
+ glyph_id: glyph_id.0,
+ x_advance,
+ x_offset: Em::zero(),
+ text_index: self.text.len(),
+ safe_to_break: true,
+ is_space: false,
+ });
+ Some(())
+ });
+ }
+
/// Find the subslice of glyphs that represent the given text range if both
/// sides are safe to break.
fn slice_safe_to_break(&self, text_range: Range<usize>) -> Option<&[ShapedGlyph]> {
@@ -531,8 +559,9 @@ fn measure(
if glyphs.is_empty() {
// When there are no glyphs, we just use the vertical metrics of the
// first available font.
+ let variant = variant(styles);
for family in families(styles) {
- if let Some(face_id) = fonts.select(family, variant(styles)) {
+ if let Some(face_id) = fonts.select(family, variant) {
expand(fonts.get(face_id));
break;
}
diff --git a/tests/ref/text/hyphenate.png b/tests/ref/text/hyphenate.png
new file mode 100644
index 00000000..050cab12
--- /dev/null
+++ b/tests/ref/text/hyphenate.png
Binary files differ
diff --git a/tests/ref/text/justify.png b/tests/ref/text/justify.png
index 26787af9..38141bdc 100644
--- a/tests/ref/text/justify.png
+++ b/tests/ref/text/justify.png
Binary files differ
diff --git a/tests/typ/text/hyphenate.typ b/tests/typ/text/hyphenate.typ
new file mode 100644
index 00000000..d6f44477
--- /dev/null
+++ b/tests/typ/text/hyphenate.typ
@@ -0,0 +1,14 @@
+// Test hyphenation.
+
+---
+#set page(width: 70pt)
+#set par(lang: "en", hyphenate: true)
+Warm welcomes to Typst.
+
+#h(6pt) networks, the rest.
+
+---
+#set page(width: 60pt)
+#set par(lang: "el", hyphenate: true)
+διαμερίσματα. \
+λατρευτός
diff --git a/tests/typ/text/justify.typ b/tests/typ/text/justify.typ
index 7b8a2829..eb8feb61 100644
--- a/tests/typ/text/justify.typ
+++ b/tests/typ/text/justify.typ
@@ -1,9 +1,16 @@
---
-#set par(indent: 14pt, spacing: 0pt, leading: 5pt, justify: true)
+#set page(width: 180pt)
+#set par(
+ lang: "en",
+ justify: true,
+ indent: 14pt,
+ spacing: 0pt,
+ leading: 5pt,
+)
This text is justified, meaning that spaces are stretched so that the text
-forms as "block" with flush edges at both sides.
+forms a "block" with flush edges at both sides.
First line indents and hyphenation play nicely with justified text.