summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock1
-rw-r--r--Cargo.toml1
-rw-r--r--fonts/IBMPlexSansDevanagari-Regular.ttfbin0 -> 350804 bytes
-rw-r--r--src/library/text/par.rs54
-rw-r--r--src/library/text/shaping.rs11
-rw-r--r--src/util/mod.rs30
-rw-r--r--tests/ref/text/shaping.pngbin17929 -> 2640 bytes
-rw-r--r--tests/ref/text/tracking-spacing.pngbin6474 -> 6483 bytes
-rw-r--r--tests/typ/text/shaping.typ18
9 files changed, 70 insertions, 45 deletions
diff --git a/Cargo.lock b/Cargo.lock
index dcf7bfdf..2341a52e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -873,6 +873,7 @@ dependencies = [
"typed-arena",
"typst-macros",
"unicode-bidi",
+ "unicode-script",
"unicode-segmentation",
"unicode-xid",
"usvg",
diff --git a/Cargo.toml b/Cargo.toml
index e30971bd..fa7449af 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -30,6 +30,7 @@ rustybuzz = "0.4"
unicode-bidi = "0.3.5"
unicode-segmentation = "1"
unicode-xid = "0.2"
+unicode-script = "0.5"
xi-unicode = "0.3"
# Raster and vector graphics handling
diff --git a/fonts/IBMPlexSansDevanagari-Regular.ttf b/fonts/IBMPlexSansDevanagari-Regular.ttf
new file mode 100644
index 00000000..5d7c8f0f
--- /dev/null
+++ b/fonts/IBMPlexSansDevanagari-Regular.ttf
Binary files differ
diff --git a/src/library/text/par.rs b/src/library/text/par.rs
index 57ced189..6eb3da66 100644
--- a/src/library/text/par.rs
+++ b/src/library/text/par.rs
@@ -1,13 +1,14 @@
use std::sync::Arc;
use unicode_bidi::{BidiInfo, Level};
+use unicode_script::{Script, UnicodeScript};
use xi_unicode::LineBreakIterator;
use super::{shape, Lang, ShapedText, TextNode};
use crate::font::FontStore;
use crate::library::layout::Spacing;
use crate::library::prelude::*;
-use crate::util::{ArcExt, EcoString, SliceExt};
+use crate::util::{ArcExt, EcoString};
/// Arrange text, spacing and inline-level nodes into a paragraph.
#[derive(Hash)]
@@ -437,23 +438,46 @@ fn prepare<'a>(
_ => None,
});
- let mut items = vec![];
let mut cursor = 0;
+ let mut items = vec![];
// Layout the children and collect them into items.
for (segment, styles) in segments {
+ let end = cursor + segment.len();
match segment {
- Segment::Text(len) => {
- // TODO: Also split by script.
- let mut start = cursor;
- for (level, count) in bidi.levels[cursor .. cursor + len].group() {
- let end = start + count;
- let text = &bidi.text[start .. end];
+ Segment::Text(_) => {
+ let mut process = |text, level: Level| {
let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL };
let shaped = shape(&mut ctx.fonts, text, styles, dir);
items.push(Item::Text(shaped));
- start = end;
+ };
+
+ let mut prev_level = Level::ltr();
+ let mut prev_script = Script::Unknown;
+
+ // Group by embedding level and script.
+ for i in cursor .. end {
+ if !text.is_char_boundary(i) {
+ continue;
+ }
+
+ let level = bidi.levels[i];
+ let script =
+ text[i ..].chars().next().map_or(Script::Unknown, |c| c.script());
+
+ if level != prev_level || !is_compatible(script, prev_script) {
+ if cursor < i {
+ process(&text[cursor .. i], prev_level);
+ }
+ cursor = i;
+ prev_level = level;
+ prev_script = script;
+ } else if is_generic_script(prev_script) {
+ prev_script = script;
+ }
}
+
+ process(&text[cursor .. end], prev_level);
}
Segment::Spacing(spacing) => match spacing {
Spacing::Relative(v) => {
@@ -482,12 +506,22 @@ fn prepare<'a>(
}
}
- cursor += segment.len();
+ cursor = end;
}
Ok(Preparation { bidi, items, styles, children: &par.0 })
}
+/// Whether this is not a specific script.
+fn is_generic_script(script: Script) -> bool {
+ matches!(script, Script::Unknown | Script::Common | Script::Inherited)
+}
+
+/// Whether these script can be part of the same shape run.
+fn is_compatible(a: Script, b: Script) -> bool {
+ is_generic_script(a) || is_generic_script(b) || a == b
+}
+
/// Find suitable linebreaks.
fn linebreak<'a>(
p: &'a Preparation<'a>,
diff --git a/src/library/text/shaping.rs b/src/library/text/shaping.rs
index 0a480c83..72f86a38 100644
--- a/src/library/text/shaping.rs
+++ b/src/library/text/shaping.rs
@@ -12,7 +12,6 @@ use crate::util::SliceExt;
/// This type contains owned or borrowed shaped text runs, which can be
/// measured, used to reshape substrings more quickly and converted into a
/// frame.
-#[derive(Debug, Clone)]
pub struct ShapedText<'a> {
/// The text that was shaped.
pub text: &'a str,
@@ -269,11 +268,13 @@ impl<'a> ShapedText<'a> {
// RTL needs offset one because the left side of the range should be
// exclusive and the right side inclusive, contrary to the normal
// behaviour of ranges.
- if !ltr {
- idx += 1;
- }
+ self.glyphs[idx].safe_to_break.then(|| idx + (!ltr) as usize)
+ }
+}
- self.glyphs[idx].safe_to_break.then(|| idx)
+impl Debug for ShapedText<'_> {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ self.text.fmt(f)
}
}
diff --git a/src/util/mod.rs b/src/util/mod.rs
index e42d0664..d898f545 100644
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -103,12 +103,6 @@ where
/// Additional methods for slices.
pub trait SliceExt<T> {
- /// Find consecutive runs of the same elements in a slice and yield for
- /// each such run the element and number of times it appears.
- fn group(&self) -> Group<'_, T>
- where
- T: PartialEq;
-
/// Split a slice into consecutive runs with the same key and yield for
/// each such run the key and the slice of elements with that key.
fn group_by_key<K, F>(&self, f: F) -> GroupByKey<'_, T, F>
@@ -118,35 +112,11 @@ pub trait SliceExt<T> {
}
impl<T> SliceExt<T> for [T] {
- fn group(&self) -> Group<'_, T> {
- Group { slice: self }
- }
-
fn group_by_key<K, F>(&self, f: F) -> GroupByKey<'_, T, F> {
GroupByKey { slice: self, f }
}
}
-/// This struct is created by [`SliceExt::group`].
-pub struct Group<'a, T> {
- slice: &'a [T],
-}
-
-impl<'a, T> Iterator for Group<'a, T>
-where
- T: PartialEq,
-{
- type Item = (&'a T, usize);
-
- fn next(&mut self) -> Option<Self::Item> {
- let mut iter = self.slice.iter();
- let first = iter.next()?;
- let count = 1 + iter.take_while(|&t| t == first).count();
- self.slice = &self.slice[count ..];
- Some((first, count))
- }
-}
-
/// This struct is created by [`SliceExt::group_by_key`].
pub struct GroupByKey<'a, T, F> {
slice: &'a [T],
diff --git a/tests/ref/text/shaping.png b/tests/ref/text/shaping.png
index a95f44e0..3d3e611d 100644
--- a/tests/ref/text/shaping.png
+++ b/tests/ref/text/shaping.png
Binary files differ
diff --git a/tests/ref/text/tracking-spacing.png b/tests/ref/text/tracking-spacing.png
index ec130c99..8e6db3cc 100644
--- a/tests/ref/text/tracking-spacing.png
+++ b/tests/ref/text/tracking-spacing.png
Binary files differ
diff --git a/tests/typ/text/shaping.typ b/tests/typ/text/shaping.typ
new file mode 100644
index 00000000..51199090
--- /dev/null
+++ b/tests/typ/text/shaping.typ
@@ -0,0 +1,18 @@
+// Test shaping quirks.
+
+---
+// Test separation by script.
+ABCअपार्टमेंट
+
+// This is how it should look like.
+अपार्टमेंट
+
+// This (without the spaces) is how it would look
+// if we didn't separate by script.
+अ पा र् ट में ट
+
+---
+// Test that RTL safe-to-break doesn't panic even though newline
+// doesn't exist in shaping output.
+#set text(dir: rtl, "Noto Serif Hebrew")
+\ ט