summaryrefslogtreecommitdiff
path: root/crates
diff options
context:
space:
mode:
Diffstat (limited to 'crates')
-rw-r--r--crates/typst-syntax/Cargo.toml1
-rw-r--r--crates/typst-syntax/src/lexer.rs13
2 files changed, 12 insertions, 2 deletions
diff --git a/crates/typst-syntax/Cargo.toml b/crates/typst-syntax/Cargo.toml
index 1254e663..681189bd 100644
--- a/crates/typst-syntax/Cargo.toml
+++ b/crates/typst-syntax/Cargo.toml
@@ -23,5 +23,6 @@ serde = { workspace = true }
tracing = { workspace = true }
unicode-ident = { workspace = true }
unicode-math-class = { workspace = true }
+unicode-script = { workspace = true }
unicode-segmentation = { workspace = true }
unscanny = { workspace = true }
diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs
index c702551c..ffe53145 100644
--- a/crates/typst-syntax/src/lexer.rs
+++ b/crates/typst-syntax/src/lexer.rs
@@ -1,5 +1,6 @@
use ecow::{eco_format, EcoString};
use unicode_ident::{is_xid_continue, is_xid_start};
+use unicode_script::{Script, UnicodeScript};
use unicode_segmentation::UnicodeSegmentation;
use unscanny::Scanner;
@@ -343,10 +344,18 @@ impl Lexer<'_> {
}
fn in_word(&self) -> bool {
- let alphanum = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
+ let wordy = |c: Option<char>| {
+ c.map_or(false, |c| {
+ c.is_alphanumeric()
+ && !matches!(
+ c.script(),
+ Script::Han | Script::Hiragana | Script::Katakana
+ )
+ })
+ };
let prev = self.s.scout(-2);
let next = self.s.peek();
- alphanum(prev) && alphanum(next)
+ wordy(prev) && wordy(next)
}
fn space_or_end(&self) -> bool {