9 files changed, 70 insertions, 45 deletions
diff --git a/Cargo.lock b/Cargo.lock
index dcf7bfdf..2341a52e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -873,6 +873,7 @@ dependencies = [
  "typed-arena",
  "typst-macros",
  "unicode-bidi",
+ "unicode-script",
  "unicode-segmentation",
  "unicode-xid",
  "usvg",
diff --git a/Cargo.toml b/Cargo.toml
index e30971bd..fa7449af 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -30,6 +30,7 @@ rustybuzz = "0.4"
 unicode-bidi = "0.3.5"
 unicode-segmentation = "1"
 unicode-xid = "0.2"
+unicode-script = "0.5"
 xi-unicode = "0.3"
 
 # Raster and vector graphics handling
diff --git a/fonts/IBMPlexSansDevanagari-Regular.ttf b/fonts/IBMPlexSansDevanagari-Regular.ttf
new file mode 100644
index 00000000..5d7c8f0f
--- /dev/null
+++ b/fonts/IBMPlexSansDevanagari-Regular.ttf
diff --git a/src/library/text/par.rs b/src/library/text/par.rs
index 57ced189..6eb3da66 100644
--- a/src/library/text/par.rs
+++ b/src/library/text/par.rs
@@ -1,13 +1,14 @@
 use std::sync::Arc;
 
 use unicode_bidi::{BidiInfo, Level};
+use unicode_script::{Script, UnicodeScript};
 use xi_unicode::LineBreakIterator;
 
 use super::{shape, Lang, ShapedText, TextNode};
 use crate::font::FontStore;
 use crate::library::layout::Spacing;
 use crate::library::prelude::*;
-use crate::util::{ArcExt, EcoString, SliceExt};
+use crate::util::{ArcExt, EcoString};
 
 /// Arrange text, spacing and inline-level nodes into a paragraph.
 #[derive(Hash)]
@@ -437,23 +438,46 @@ fn prepare<'a>(
         _ => None,
     });
 
-    let mut items = vec![];
     let mut cursor = 0;
+    let mut items = vec![];
 
     // Layout the children and collect them into items.
     for (segment, styles) in segments {
+        let end = cursor + segment.len();
         match segment {
-            Segment::Text(len) => {
-                // TODO: Also split by script.
-                let mut start = cursor;
-                for (level, count) in bidi.levels[cursor .. cursor + len].group() {
-                    let end = start + count;
-                    let text = &bidi.text[start .. end];
+            Segment::Text(_) => {
+                let mut process = |text, level: Level| {
                     let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL };
                     let shaped = shape(&mut ctx.fonts, text, styles, dir);
                     items.push(Item::Text(shaped));
-                    start = end;
+                };
+
+                let mut prev_level = Level::ltr();
+                let mut prev_script = Script::Unknown;
+
+                // Group by embedding level and script.
+                for i in cursor .. end {
+                    if !text.is_char_boundary(i) {
+                        continue;
+                    }
+
+                    let level = bidi.levels[i];
+                    let script =
+                        text[i ..].chars().next().map_or(Script::Unknown, |c| c.script());
+
+                    if level != prev_level || !is_compatible(script, prev_script) {
+                        if cursor < i {
+                            process(&text[cursor .. i], prev_level);
+                        }
+                        cursor = i;
+                        prev_level = level;
+                        prev_script = script;
+                    } else if is_generic_script(prev_script) {
+                        prev_script = script;
+                    }
                 }
+
+                process(&text[cursor .. end], prev_level);
             }
             Segment::Spacing(spacing) => match spacing {
                 Spacing::Relative(v) => {
@@ -482,12 +506,22 @@ fn prepare<'a>(
             }
         }
 
-        cursor += segment.len();
+        cursor = end;
     }
 
     Ok(Preparation { bidi, items, styles, children: &par.0 })
 }
 
+/// Whether this is not a specific script.
+fn is_generic_script(script: Script) -> bool {
+    matches!(script, Script::Unknown | Script::Common | Script::Inherited)
+}
+
+/// Whether these script can be part of the same shape run.
+fn is_compatible(a: Script, b: Script) -> bool {
+    is_generic_script(a) || is_generic_script(b) || a == b
+}
+
 /// Find suitable linebreaks.
 fn linebreak<'a>(
     p: &'a Preparation<'a>,
diff --git a/src/library/text/shaping.rs b/src/library/text/shaping.rs
index 0a480c83..72f86a38 100644
--- a/src/library/text/shaping.rs
+++ b/src/library/text/shaping.rs
@@ -12,7 +12,6 @@ use crate::util::SliceExt;
 /// This type contains owned or borrowed shaped text runs, which can be
 /// measured, used to reshape substrings more quickly and converted into a
 /// frame.
-#[derive(Debug, Clone)]
 pub struct ShapedText<'a> {
     /// The text that was shaped.
     pub text: &'a str,
@@ -269,11 +268,13 @@ impl<'a> ShapedText<'a> {
         // RTL needs offset one because the left side of the range should be
         // exclusive and the right side inclusive, contrary to the normal
         // behaviour of ranges.
-        if !ltr {
-            idx += 1;
-        }
+        self.glyphs[idx].safe_to_break.then(|| idx + (!ltr) as usize)
+    }
+}
 
-        self.glyphs[idx].safe_to_break.then(|| idx)
+impl Debug for ShapedText<'_> {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        self.text.fmt(f)
     }
 }
 
diff --git a/src/util/mod.rs b/src/util/mod.rs
index e42d0664..d898f545 100644
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -103,12 +103,6 @@ where
 
 /// Additional methods for slices.
 pub trait SliceExt<T> {
-    /// Find consecutive runs of the same elements in a slice and yield for
-    /// each such run the element and number of times it appears.
-    fn group(&self) -> Group<'_, T>
-    where
-        T: PartialEq;
-
     /// Split a slice into consecutive runs with the same key and yield for
     /// each such run the key and the slice of elements with that key.
     fn group_by_key<K, F>(&self, f: F) -> GroupByKey<'_, T, F>
@@ -118,35 +112,11 @@ pub trait SliceExt<T> {
 }
 
 impl<T> SliceExt<T> for [T] {
-    fn group(&self) -> Group<'_, T> {
-        Group { slice: self }
-    }
-
     fn group_by_key<K, F>(&self, f: F) -> GroupByKey<'_, T, F> {
         GroupByKey { slice: self, f }
     }
 }
 
-/// This struct is created by [`SliceExt::group`].
-pub struct Group<'a, T> {
-    slice: &'a [T],
-}
-
-impl<'a, T> Iterator for Group<'a, T>
-where
-    T: PartialEq,
-{
-    type Item = (&'a T, usize);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let mut iter = self.slice.iter();
-        let first = iter.next()?;
-        let count = 1 + iter.take_while(|&t| t == first).count();
-        self.slice = &self.slice[count ..];
-        Some((first, count))
-    }
-}
-
 /// This struct is created by [`SliceExt::group_by_key`].
 pub struct GroupByKey<'a, T, F> {
     slice: &'a [T],
diff --git a/tests/ref/text/shaping.png b/tests/ref/text/shaping.png
index a95f44e0..3d3e611d 100644
--- a/tests/ref/text/shaping.png
+++ b/tests/ref/text/shaping.png
diff --git a/tests/ref/text/tracking-spacing.png b/tests/ref/text/tracking-spacing.png
index ec130c99..8e6db3cc 100644
--- a/tests/ref/text/tracking-spacing.png
+++ b/tests/ref/text/tracking-spacing.png
diff --git a/tests/typ/text/shaping.typ b/tests/typ/text/shaping.typ
new file mode 100644
index 00000000..51199090
--- /dev/null
+++ b/tests/typ/text/shaping.typ
@@ -0,0 +1,18 @@
+// Test shaping quirks.
+
+---
+// Test separation by script.
+ABCअपार्टमेंट
+
+// This is how it should look like.
+अपार्टमेंट
+
+// This (without the spaces) is how it would look
+// if we didn't separate by script.
+अ पा र् ट में ट
+
+---
+// Test that RTL safe-to-break doesn't panic even though newline
+// doesn't exist in shaping output.
+#set text(dir: rtl, "Noto Serif Hebrew")
+\ ט