summaryrefslogtreecommitdiff
path: root/crates/typst-syntax/src/lexer.rs
diff options
context:
space:
mode:
authorPeng Guanwen <pg999w@outlook.com>2023-11-15 22:01:15 +0800
committerGitHub <noreply@github.com>2023-11-15 15:01:15 +0100
commitf4a81091f72299296fb2be7511b08c3cb0e8fa57 (patch)
tree7935e1eebf9e4ff1f361e71435becd5801f8eb51 /crates/typst-syntax/src/lexer.rs
parent50ea3b4f1651ca6558c118edebbd68d7ef7a93d7 (diff)
Lexer change: Allow emphasis in CJK text without spaces (#2648)
Diffstat (limited to 'crates/typst-syntax/src/lexer.rs')
-rw-r--r--crates/typst-syntax/src/lexer.rs13
1 files changed, 11 insertions, 2 deletions
diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs
index c702551c..ffe53145 100644
--- a/crates/typst-syntax/src/lexer.rs
+++ b/crates/typst-syntax/src/lexer.rs
@@ -1,5 +1,6 @@
use ecow::{eco_format, EcoString};
use unicode_ident::{is_xid_continue, is_xid_start};
+use unicode_script::{Script, UnicodeScript};
use unicode_segmentation::UnicodeSegmentation;
use unscanny::Scanner;
@@ -343,10 +344,18 @@ impl Lexer<'_> {
}
fn in_word(&self) -> bool {
- let alphanum = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
+ let wordy = |c: Option<char>| {
+ c.map_or(false, |c| {
+ c.is_alphanumeric()
+ && !matches!(
+ c.script(),
+ Script::Han | Script::Hiragana | Script::Katakana
+ )
+ })
+ };
let prev = self.s.scout(-2);
let next = self.s.peek();
- alphanum(prev) && alphanum(next)
+ wordy(prev) && wordy(next)
}
fn space_or_end(&self) -> bool {