summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWenzhuo Liu <mgt@oi-wiki.org>2023-10-22 19:45:51 +0800
committerGitHub <noreply@github.com>2023-10-22 13:45:51 +0200
commitb97ee93b8f8c3554d583e467a32494d78fde3f99 (patch)
tree2e6a023f8875e1fbad7b0b08a2cdb9a76571c95f
parentb4e8a2afe3a8b351bcadc02235b8fed1a82c213c (diff)
Support adjustment of more CJK punctuation (#2456)
-rw-r--r--crates/typst-library/src/layout/par.rs7
-rw-r--r--crates/typst-library/src/text/shaping.rs22
-rw-r--r--tests/ref/layout/cjk-punctuation-adjustment.pngbin0 -> 26949 bytes
-rw-r--r--tests/typ/layout/cjk-punctuation-adjustment.typ38
4 files changed, 56 insertions, 11 deletions
diff --git a/crates/typst-library/src/layout/par.rs b/crates/typst-library/src/layout/par.rs
index 5d21c8e9..bb280069 100644
--- a/crates/typst-library/src/layout/par.rs
+++ b/crates/typst-library/src/layout/par.rs
@@ -17,7 +17,7 @@ use crate::math::EquationElem;
use crate::prelude::*;
use crate::text::{
char_is_cjk_script, is_gb_style, shape, LinebreakElem, Quoter, Quotes, ShapedGlyph,
- ShapedText, SmartquoteElem, SpaceElem, TextElem,
+ ShapedText, SmartquoteElem, SpaceElem, TextElem, BEGIN_PUNCT_PAT, END_PUNCT_PAT,
};
/// Arranges text, spacing and inline-level elements into a paragraph.
@@ -1287,11 +1287,6 @@ fn line<'a>(
let end = range.end;
let mut justify = p.justify && end < p.bidi.text.len() && !mandatory;
- // The CJK punctuation that can appear at the beginning or end of a line.
- const BEGIN_PUNCT_PAT: &[char] = &['“', '‘', '《', '(', '『', '「'];
- const END_PUNCT_PAT: &[char] =
- &['”', '’', ',', '。', '、', ':', ';', '》', ')', '』', '」'];
-
if range.is_empty() {
return Line {
bidi: &p.bidi,
diff --git a/crates/typst-library/src/text/shaping.rs b/crates/typst-library/src/text/shaping.rs
index 39122837..0cfffce0 100644
--- a/crates/typst-library/src/text/shaping.rs
+++ b/crates/typst-library/src/text/shaping.rs
@@ -1001,6 +1001,14 @@ fn assert_glyph_ranges_in_order(glyphs: &[ShapedGlyph], dir: Dir) {
}
}
+// The CJK punctuation that can appear at the beginning or end of a line.
+pub(crate) const BEGIN_PUNCT_PAT: &[char] =
+ &['“', '‘', '《', '〈', '(', '『', '「', '【', '〖', '〔', '[', '{'];
+pub(crate) const END_PUNCT_PAT: &[char] = &[
+ '”', '’', ',', '.', '。', '、', ':', ';', '》', '〉', ')', '』', '」', '】',
+ '〗', '〕', ']', '}', '?', '!',
+];
+
/// Whether the glyph is a space.
#[inline]
fn is_space(c: char) -> bool {
@@ -1035,11 +1043,15 @@ fn is_cjk_left_aligned_punctuation(
return true;
}
- if gb_style && matches!(c, ',' | '。' | '、' | ':' | ';') {
+ if gb_style && matches!(c, ',' | '。' | '.' | '、' | ':' | ';' | '!' | '?')
+ {
+ // In GB style, exclamations and question marks are also left aligned and can be adjusted.
+ // Note that they are not adjustable in other styles.
return true;
}
- matches!(c, '》' | ')' | '』' | '」')
+ // See appendix A.3 https://www.w3.org/TR/clreq/#tables_of_chinese_punctuation_marks
+ matches!(c, '》' | ')' | '』' | '」' | '】' | '〗' | '〕' | '〉' | ']' | '}')
}
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
@@ -1054,14 +1066,14 @@ fn is_cjk_right_aligned_punctuation(
if matches!(c, '“' | '‘') && x_advance + stretchability.0 == Em::one() {
return true;
}
-
- matches!(c, '《' | '(' | '『' | '「')
+ // See appendix A.3 https://www.w3.org/TR/clreq/#tables_of_chinese_punctuation_marks
+ matches!(c, '《' | '(' | '『' | '「' | '【' | '〖' | '〔' | '〈' | '[' | '{')
}
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
#[inline]
fn is_cjk_center_aligned_punctuation(c: char, gb_style: bool) -> bool {
- if !gb_style && matches!(c, ',' | '。' | '、' | ':' | ';') {
+ if !gb_style && matches!(c, ',' | '。' | '.' | '、' | ':' | ';') {
return true;
}
diff --git a/tests/ref/layout/cjk-punctuation-adjustment.png b/tests/ref/layout/cjk-punctuation-adjustment.png
new file mode 100644
index 00000000..71179751
--- /dev/null
+++ b/tests/ref/layout/cjk-punctuation-adjustment.png
Binary files differ
diff --git a/tests/typ/layout/cjk-punctuation-adjustment.typ b/tests/typ/layout/cjk-punctuation-adjustment.typ
new file mode 100644
index 00000000..0f1f2894
--- /dev/null
+++ b/tests/typ/layout/cjk-punctuation-adjustment.typ
@@ -0,0 +1,38 @@
+#set page(width: 15em)
+
+// In the following example, the space between 》! and ? should be squeezed.
+// because zh-CN follows GB style
+#set text(lang: "zh", region: "CN", font: "Noto Serif CJK SC")
+原来,你也玩《原神》!?
+
+// However, in the following example, the space between 》! and ? should not be squeezed.
+// because zh-TW does not follow GB style
+#set text(lang: "zh", region: "TW", font: "Noto Serif CJK TC")
+原來,你也玩《原神》! ?
+---
+
+#set text(lang: "zh", region: "CN", font: "Noto Serif CJK SC")
+《书名〈章节〉》 // the space between 〉 and 》 should be squeezed
+
+〔茸毛〕:很细的毛 // the space between 〕 and : should be squeezed
+
+---
+#set page(width: 21em)
+#set text(lang: "zh", region: "CN", font: "Noto Serif CJK SC")
+
+// These examples contain extensive use of Chinese punctuation marks,
+// from 《Which parentheses should be used when applying parentheses?》.
+// link: https://archive.md/2bb1N
+
+
+(〔中〕医、〔中〕药、技)系列评审
+
+(长三角[长江三角洲])(GB/T 16159—2012《汉语拼音正词法基本规则》)
+
+【爱因斯坦(Albert Einstein)】物理学家
+
+〔(2009)民申字第1622号〕
+
+“江南海北长相忆,浅水深山独掩扉。”([唐]刘长卿《会赦后酬主簿所问》)
+
+参看1378页〖象形文字〗。(《现代汉语词典》修订本)