diff options
| author | Peng Guanwen <pg999w@outlook.com> | 2023-11-15 22:01:15 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-11-15 15:01:15 +0100 |
| commit | f4a81091f72299296fb2be7511b08c3cb0e8fa57 (patch) | |
| tree | 7935e1eebf9e4ff1f361e71435becd5801f8eb51 /crates/typst-syntax | |
| parent | 50ea3b4f1651ca6558c118edebbd68d7ef7a93d7 (diff) | |
Lexer change: Allow emphasis in CJK text without spaces (#2648)
Diffstat (limited to 'crates/typst-syntax')
| -rw-r--r-- | crates/typst-syntax/Cargo.toml | 1 | ||||
| -rw-r--r-- | crates/typst-syntax/src/lexer.rs | 13 |
2 files changed, 12 insertions, 2 deletions
diff --git a/crates/typst-syntax/Cargo.toml b/crates/typst-syntax/Cargo.toml index 1254e663..681189bd 100644 --- a/crates/typst-syntax/Cargo.toml +++ b/crates/typst-syntax/Cargo.toml @@ -23,5 +23,6 @@ serde = { workspace = true } tracing = { workspace = true } unicode-ident = { workspace = true } unicode-math-class = { workspace = true } +unicode-script = { workspace = true } unicode-segmentation = { workspace = true } unscanny = { workspace = true } diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs index c702551c..ffe53145 100644 --- a/crates/typst-syntax/src/lexer.rs +++ b/crates/typst-syntax/src/lexer.rs @@ -1,5 +1,6 @@ use ecow::{eco_format, EcoString}; use unicode_ident::{is_xid_continue, is_xid_start}; +use unicode_script::{Script, UnicodeScript}; use unicode_segmentation::UnicodeSegmentation; use unscanny::Scanner; @@ -343,10 +344,18 @@ impl Lexer<'_> { } fn in_word(&self) -> bool { - let alphanum = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric()); + let wordy = |c: Option<char>| { + c.map_or(false, |c| { + c.is_alphanumeric() + && !matches!( + c.script(), + Script::Han | Script::Hiragana | Script::Katakana + ) + }) + }; let prev = self.s.scout(-2); let next = self.s.peek(); - alphanum(prev) && alphanum(next) + wordy(prev) && wordy(next) } fn space_or_end(&self) -> bool { |
