diff options
Diffstat (limited to 'crates')
| -rw-r--r-- | crates/typst-syntax/Cargo.toml | 1 | ||||
| -rw-r--r-- | crates/typst-syntax/src/lexer.rs | 13 |
2 files changed, 12 insertions, 2 deletions
diff --git a/crates/typst-syntax/Cargo.toml b/crates/typst-syntax/Cargo.toml index 1254e663..681189bd 100644 --- a/crates/typst-syntax/Cargo.toml +++ b/crates/typst-syntax/Cargo.toml @@ -23,5 +23,6 @@ serde = { workspace = true } tracing = { workspace = true } unicode-ident = { workspace = true } unicode-math-class = { workspace = true } +unicode-script = { workspace = true } unicode-segmentation = { workspace = true } unscanny = { workspace = true } diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs index c702551c..ffe53145 100644 --- a/crates/typst-syntax/src/lexer.rs +++ b/crates/typst-syntax/src/lexer.rs @@ -1,5 +1,6 @@ use ecow::{eco_format, EcoString}; use unicode_ident::{is_xid_continue, is_xid_start}; +use unicode_script::{Script, UnicodeScript}; use unicode_segmentation::UnicodeSegmentation; use unscanny::Scanner; @@ -343,10 +344,18 @@ impl Lexer<'_> { } fn in_word(&self) -> bool { - let alphanum = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric()); + let wordy = |c: Option<char>| { + c.map_or(false, |c| { + c.is_alphanumeric() + && !matches!( + c.script(), + Script::Han | Script::Hiragana | Script::Katakana + ) + }) + }; let prev = self.s.scout(-2); let next = self.s.peek(); - alphanum(prev) && alphanum(next) + wordy(prev) && wordy(next) } fn space_or_end(&self) -> bool { |
