diff options
| author | Peng Guanwen <pg999w@outlook.com> | 2023-11-15 22:01:15 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-11-15 15:01:15 +0100 |
| commit | f4a81091f72299296fb2be7511b08c3cb0e8fa57 (patch) | |
| tree | 7935e1eebf9e4ff1f361e71435becd5801f8eb51 | |
| parent | 50ea3b4f1651ca6558c118edebbd68d7ef7a93d7 (diff) | |
Lexer change: Allow emphasis in CJK text without spaces (#2648)
| -rw-r--r-- | Cargo.lock | 1 | ||||
| -rw-r--r-- | assets/fonts/NotoSerifCJKsc-Bold.otf | bin | 0 -> 1385940 bytes | |||
| -rw-r--r-- | assets/fonts/NotoSerifCJKtc-Bold.otf | bin | 0 -> 1617780 bytes | |||
| -rw-r--r-- | crates/typst-syntax/Cargo.toml | 1 | ||||
| -rw-r--r-- | crates/typst-syntax/src/lexer.rs | 13 | ||||
| -rw-r--r-- | tests/ref/text/emphasis.png | bin | 8575 -> 16982 bytes | |||
| -rw-r--r-- | tests/ref/text/lang-with-region.png | bin | 1389 -> 1392 bytes | |||
| -rw-r--r-- | tests/typ/text/emphasis.typ | 7 |
8 files changed, 20 insertions, 2 deletions
@@ -3117,6 +3117,7 @@ dependencies = [ "tracing", "unicode-ident", "unicode-math-class", + "unicode-script", "unicode-segmentation", "unscanny", ] diff --git a/assets/fonts/NotoSerifCJKsc-Bold.otf b/assets/fonts/NotoSerifCJKsc-Bold.otf Binary files differnew file mode 100644 index 00000000..c291490e --- /dev/null +++ b/assets/fonts/NotoSerifCJKsc-Bold.otf diff --git a/assets/fonts/NotoSerifCJKtc-Bold.otf b/assets/fonts/NotoSerifCJKtc-Bold.otf Binary files differnew file mode 100644 index 00000000..73a2135e --- /dev/null +++ b/assets/fonts/NotoSerifCJKtc-Bold.otf diff --git a/crates/typst-syntax/Cargo.toml b/crates/typst-syntax/Cargo.toml index 1254e663..681189bd 100644 --- a/crates/typst-syntax/Cargo.toml +++ b/crates/typst-syntax/Cargo.toml @@ -23,5 +23,6 @@ serde = { workspace = true } tracing = { workspace = true } unicode-ident = { workspace = true } unicode-math-class = { workspace = true } +unicode-script = { workspace = true } unicode-segmentation = { workspace = true } unscanny = { workspace = true } diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs index c702551c..ffe53145 100644 --- a/crates/typst-syntax/src/lexer.rs +++ b/crates/typst-syntax/src/lexer.rs @@ -1,5 +1,6 @@ use ecow::{eco_format, EcoString}; use unicode_ident::{is_xid_continue, is_xid_start}; +use unicode_script::{Script, UnicodeScript}; use unicode_segmentation::UnicodeSegmentation; use unscanny::Scanner; @@ -343,10 +344,18 @@ impl Lexer<'_> { } fn in_word(&self) -> bool { - let alphanum = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric()); + let wordy = |c: Option<char>| { + c.map_or(false, |c| { + c.is_alphanumeric() + && !matches!( + c.script(), + Script::Han | Script::Hiragana | Script::Katakana + ) + }) + }; let prev = self.s.scout(-2); let next = self.s.peek(); - alphanum(prev) && alphanum(next) + wordy(prev) && wordy(next) } fn space_or_end(&self) -> bool { diff --git a/tests/ref/text/emphasis.png b/tests/ref/text/emphasis.png Binary files differindex 333f7006..c19f6ebb 100644 --- a/tests/ref/text/emphasis.png +++ b/tests/ref/text/emphasis.png diff --git a/tests/ref/text/lang-with-region.png b/tests/ref/text/lang-with-region.png Binary files differindex 097f105d..c7753104 100644 --- a/tests/ref/text/lang-with-region.png +++ b/tests/ref/text/lang-with-region.png diff --git a/tests/typ/text/emphasis.typ b/tests/typ/text/emphasis.typ index fd04c8e7..93913dcf 100644 --- a/tests/typ/text/emphasis.typ +++ b/tests/typ/text/emphasis.typ @@ -7,6 +7,13 @@ _Emphasized and *strong* words!_ // Inside of a word it's a normal underscore or star. hello_world Nutzer*innen +// CJK characters will not need spaces. +中文一般使用*粗体*或者_楷体_来表示强调。 + +日本語では、*太字*や_斜体_を使って強調します。 + +中文中混有*Strong*和_Empasis_。 + // Can contain paragraph in nested content block. _Still #[ |
