summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2022-04-13 13:07:45 +0200
committerLaurenz <laurmaedje@gmail.com>2022-04-13 13:08:46 +0200
commit2279c26543f7edde910fd89a3f8f0710c67249db (patch)
treee9dfaa89954a4e61ffe2ca7a35be251946b14ad4
parente8dd842c6699c665225c03e904a40887f8424e30 (diff)
Smart quotes
Co-Authored-By: Martin Haug <mhaug@live.de>
-rw-r--r--src/library/text/mod.rs4
-rw-r--r--src/library/text/par.rs30
-rw-r--r--src/library/text/quotes.rs146
-rw-r--r--src/library/text/raw.rs1
-rw-r--r--tests/ref/code/closure.pngbin801 -> 520 bytes
-rw-r--r--tests/ref/code/include.pngbin48417 -> 47947 bytes
-rw-r--r--tests/ref/layout/columns.pngbin109097 -> 122766 bytes
-rw-r--r--tests/ref/text/basic.pngbin58937 -> 58344 bytes
-rw-r--r--tests/ref/text/escape.pngbin13675 -> 13729 bytes
-rw-r--r--tests/ref/text/hyphenate.pngbin20764 -> 20776 bytes
-rw-r--r--tests/ref/text/justify.pngbin29296 -> 29352 bytes
-rw-r--r--tests/ref/text/quotes.pngbin0 -> 59067 bytes
-rw-r--r--tests/ref/text/tracking-spacing.pngbin6483 -> 6479 bytes
-rw-r--r--tests/typ/code/closure.typ7
-rw-r--r--tests/typ/text/escape.typ3
-rw-r--r--tests/typ/text/quotes.typ54
16 files changed, 237 insertions, 8 deletions
diff --git a/src/library/text/mod.rs b/src/library/text/mod.rs
index 1d750689..a25b2827 100644
--- a/src/library/text/mod.rs
+++ b/src/library/text/mod.rs
@@ -3,12 +3,14 @@
mod deco;
mod link;
mod par;
+mod quotes;
mod raw;
mod shaping;
pub use deco::*;
pub use link::*;
pub use par::*;
+pub use quotes::*;
pub use raw::*;
pub use shaping::*;
@@ -72,6 +74,8 @@ impl TextNode {
/// will will be hyphenated if and only if justification is enabled.
#[property(resolve)]
pub const HYPHENATE: Smart<Hyphenate> = Smart::Auto;
+ /// Whether to apply smart quotes.
+ pub const SMART_QUOTES: bool = true;
/// Whether to apply kerning ("kern").
pub const KERNING: bool = true;
diff --git a/src/library/text/par.rs b/src/library/text/par.rs
index cf7dc4a9..8dcbfeb3 100644
--- a/src/library/text/par.rs
+++ b/src/library/text/par.rs
@@ -4,7 +4,7 @@ use unicode_bidi::{BidiInfo, Level};
use unicode_script::{Script, UnicodeScript};
use xi_unicode::LineBreakIterator;
-use super::{shape, Lang, ShapedText, TextNode};
+use super::{shape, Lang, Quoter, Quotes, ShapedText, TextNode};
use crate::font::FontStore;
use crate::library::layout::Spacing;
use crate::library::prelude::*;
@@ -386,9 +386,11 @@ fn collect<'a>(
styles: &'a StyleChain<'a>,
) -> (String, Vec<(Segment<'a>, StyleChain<'a>)>) {
let mut full = String::new();
+ let mut quoter = Quoter::new();
let mut segments = vec![];
+ let mut iter = par.0.iter().peekable();
- for (child, map) in par.0.iter() {
+ while let Some((child, map)) = iter.next() {
let styles = map.chain(&styles);
let segment = match child {
ParChild::Text(text) => {
@@ -402,7 +404,25 @@ fn collect<'a>(
}
ParChild::Quote(double) => {
let prev = full.len();
- full.push(if *double { '"' } else { '\'' });
+ if styles.get(TextNode::SMART_QUOTES) {
+ // TODO: Also get region.
+ let lang = styles.get(TextNode::LANG);
+ let quotes = lang
+ .as_ref()
+ .map(|lang| Quotes::from_lang(lang.as_str(), ""))
+ .unwrap_or_default();
+
+ let peeked = iter.peek().and_then(|(child, _)| match child {
+ ParChild::Text(text) => text.chars().next(),
+ ParChild::Quote(_) => Some('"'),
+ ParChild::Spacing(_) => Some(SPACING_REPLACE),
+ ParChild::Node(_) => Some(NODE_REPLACE),
+ });
+
+ full.push_str(quoter.quote(&quotes, *double, peeked));
+ } else {
+ full.push(if *double { '"' } else { '\'' });
+ }
Segment::Text(full.len() - prev)
}
ParChild::Spacing(spacing) => {
@@ -415,6 +435,10 @@ fn collect<'a>(
}
};
+ if let Some(last) = full.chars().last() {
+ quoter.last(last);
+ }
+
if let (Some((Segment::Text(last_len), last_styles)), Segment::Text(len)) =
(segments.last_mut(), segment)
{
diff --git a/src/library/text/quotes.rs b/src/library/text/quotes.rs
new file mode 100644
index 00000000..5f67bdb5
--- /dev/null
+++ b/src/library/text/quotes.rs
@@ -0,0 +1,146 @@
+use crate::parse::is_newline;
+
+/// State machine for smart quote subtitution.
+#[derive(Debug, Clone)]
+pub struct Quoter {
+ /// How many quotes have been opened.
+ quote_depth: usize,
+ /// Whether an opening quote might follow.
+ expect_opening: bool,
+ /// Whether the last character was numeric.
+ last_num: bool,
+}
+
+impl Quoter {
+ /// Start quoting.
+ pub fn new() -> Self {
+ Self {
+ quote_depth: 0,
+ expect_opening: true,
+ last_num: false,
+ }
+ }
+
+ /// Process the last seen character.
+ pub fn last(&mut self, c: char) {
+ self.expect_opening = is_ignorable(c) || is_opening_bracket(c);
+ self.last_num = c.is_numeric();
+ }
+
+ /// Process and substitute a quote.
+ pub fn quote<'a>(
+ &mut self,
+ quotes: &Quotes<'a>,
+ double: bool,
+ peeked: Option<char>,
+ ) -> &'a str {
+ let peeked = peeked.unwrap_or(' ');
+ if self.expect_opening {
+ self.quote_depth += 1;
+ quotes.open(double)
+ } else if self.quote_depth > 0
+ && (peeked.is_ascii_punctuation() || is_ignorable(peeked))
+ {
+ self.quote_depth -= 1;
+ quotes.close(double)
+ } else if self.last_num {
+ quotes.prime(double)
+ } else {
+ quotes.fallback(double)
+ }
+ }
+}
+
+impl Default for Quoter {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+fn is_ignorable(c: char) -> bool {
+ c.is_whitespace() || is_newline(c)
+}
+
+fn is_opening_bracket(c: char) -> bool {
+ matches!(c, '(' | '{' | '[')
+}
+
+/// Decides which quotes to subtitute smart quotes with.
+pub struct Quotes<'s> {
+ /// The opening single quote.
+ pub single_open: &'s str,
+ /// The closing single quote.
+ pub single_close: &'s str,
+ /// The opening double quote.
+ pub double_open: &'s str,
+ /// The closing double quote.
+ pub double_close: &'s str,
+}
+
+impl<'s> Quotes<'s> {
+ /// Create a new `Quotes` struct with the defaults for a language and
+ /// region.
+ ///
+ /// The language should be specified as an all-lowercase ISO 639-1 code, the
+ /// region as an all-uppercase ISO 3166-alpha2 code.
+ ///
+ /// Currently, the supported languages are: English, Czech, Danish, German,
+ /// Swiss / Liechtensteinian German, Estonian, Icelandic, Lithuanian,
+ /// Latvian, Slovak, Slovenian, Bosnian, Finnish, Swedish, French,
+ /// Hungarian, Polish, Romanian, Japanese, Traditional Chinese, Russian, and
+ /// Norwegian.
+ ///
+ /// For unknown languages, the English quotes are used.
+ pub fn from_lang(language: &str, region: &str) -> Self {
+ let (single_open, single_close, double_open, double_close) = match language {
+ "de" if matches!(region, "CH" | "LI") => ("‹", "›", "«", "»"),
+ "cs" | "da" | "de" | "et" | "is" | "lt" | "lv" | "sk" | "sl" => {
+ ("‚", "‘", "„", "“")
+ }
+ "fr" => ("‹\u{00A0}", "\u{00A0}›", "«\u{00A0}", "\u{00A0}»"),
+ "bs" | "fi" | "sv" => ("’", "’", "”", "”"),
+ "hu" | "pl" | "ro" => ("’", "’", "„", "”"),
+ "ru" | "no" | "nn" => ("’", "’", "«", "»"),
+ _ => return Self::default(),
+ };
+
+ Self {
+ single_open,
+ single_close,
+ double_open,
+ double_close,
+ }
+ }
+
+ /// The opening quote.
+ fn open(&self, double: bool) -> &'s str {
+ if double { self.double_open } else { self.single_open }
+ }
+
+ /// The closing quote.
+ fn close(&self, double: bool) -> &'s str {
+ if double { self.double_close } else { self.single_close }
+ }
+
+ /// Which character should be used as a prime.
+ fn prime(&self, double: bool) -> &'static str {
+ if double { "″" } else { "′" }
+ }
+
+ /// Which character should be used as a fallback quote.
+ fn fallback(&self, double: bool) -> &'static str {
+ if double { "\"" } else { "’" }
+ }
+}
+
+impl Default for Quotes<'_> {
+ /// Returns the english quotes as default.
+ fn default() -> Self {
+ Self {
+ single_open: "‘",
+ single_close: "’",
+ double_open: "“",
+ double_close: "”",
+ }
+ }
+}
diff --git a/src/library/text/raw.rs b/src/library/text/raw.rs
index d96100af..80b6ef2a 100644
--- a/src/library/text/raw.rs
+++ b/src/library/text/raw.rs
@@ -100,6 +100,7 @@ impl Show for RawNode {
let mut map = StyleMap::new();
map.set(TextNode::OVERHANG, false);
map.set(TextNode::HYPHENATE, Smart::Custom(Hyphenate(false)));
+ map.set(TextNode::SMART_QUOTES, false);
if let Smart::Custom(family) = styles.get(Self::FAMILY) {
map.set_family(family.clone(), styles);
diff --git a/tests/ref/code/closure.png b/tests/ref/code/closure.png
index 7d933033..b4c83256 100644
--- a/tests/ref/code/closure.png
+++ b/tests/ref/code/closure.png
Binary files differ
diff --git a/tests/ref/code/include.png b/tests/ref/code/include.png
index 2d5d9ca7..001d7d1e 100644
--- a/tests/ref/code/include.png
+++ b/tests/ref/code/include.png
Binary files differ
diff --git a/tests/ref/layout/columns.png b/tests/ref/layout/columns.png
index 8a65443d..3f471415 100644
--- a/tests/ref/layout/columns.png
+++ b/tests/ref/layout/columns.png
Binary files differ
diff --git a/tests/ref/text/basic.png b/tests/ref/text/basic.png
index e7887f07..bfdf47a2 100644
--- a/tests/ref/text/basic.png
+++ b/tests/ref/text/basic.png
Binary files differ
diff --git a/tests/ref/text/escape.png b/tests/ref/text/escape.png
index 3434d6e0..77cc21f2 100644
--- a/tests/ref/text/escape.png
+++ b/tests/ref/text/escape.png
Binary files differ
diff --git a/tests/ref/text/hyphenate.png b/tests/ref/text/hyphenate.png
index 0560d5b7..48338f58 100644
--- a/tests/ref/text/hyphenate.png
+++ b/tests/ref/text/hyphenate.png
Binary files differ
diff --git a/tests/ref/text/justify.png b/tests/ref/text/justify.png
index d0b6c7bf..396adc77 100644
--- a/tests/ref/text/justify.png
+++ b/tests/ref/text/justify.png
Binary files differ
diff --git a/tests/ref/text/quotes.png b/tests/ref/text/quotes.png
new file mode 100644
index 00000000..d31ae937
--- /dev/null
+++ b/tests/ref/text/quotes.png
Binary files differ
diff --git a/tests/ref/text/tracking-spacing.png b/tests/ref/text/tracking-spacing.png
index 8e6db3cc..69fc1eef 100644
--- a/tests/ref/text/tracking-spacing.png
+++ b/tests/ref/text/tracking-spacing.png
Binary files differ
diff --git a/tests/typ/code/closure.typ b/tests/typ/code/closure.typ
index 5524ba99..29fca404 100644
--- a/tests/typ/code/closure.typ
+++ b/tests/typ/code/closure.typ
@@ -5,11 +5,10 @@
// Don't parse closure directly in content.
// Ref: true
-#let x = "\"hi\""
+#let x = "x"
-// Should output `"hi" => "bye"`.
-#set text(overhang: false)
-#x => "bye"
+// Should output `x => y`.
+#x => y
---
// Basic closure without captures.
diff --git a/tests/typ/text/escape.typ b/tests/typ/text/escape.typ
index 6ec469c1..e03d73e5 100644
--- a/tests/typ/text/escape.typ
+++ b/tests/typ/text/escape.typ
@@ -2,7 +2,8 @@
---
// Escapable symbols.
-\\ \/ \[ \] \{ \} \# \* \_ \= \~ \` \$
+\\ \/ \[ \] \{ \} \# \* \_ \
+\= \~ \` \$ \" \'
// No need to escape.
( ) ; < >
diff --git a/tests/typ/text/quotes.typ b/tests/typ/text/quotes.typ
new file mode 100644
index 00000000..3f0649e8
--- /dev/null
+++ b/tests/typ/text/quotes.typ
@@ -0,0 +1,54 @@
+// Test smart quotes.
+
+---
+#set page(width: 200pt)
+
+// Test simple quotations in various languages.
+#set text(lang: "en")
+"The horse eats no cucumber salad" was the first sentence ever uttered on the 'telephone.'
+
+#set text(lang: "de")
+"Das Pferd frisst keinen Gurkensalat" war der erste jemals am 'Fernsprecher' gesagte Satz.
+
+#set text(lang: "fr")
+"Le cheval ne mange pas de salade de concombres" est la première phrase jamais prononcée au 'téléphone'.
+
+#set text(lang: "fi")
+"Hevonen ei syö kurkkusalaattia" oli ensimmäinen koskaan 'puhelimessa' lausuttu lause.
+
+#set text(lang: "ro")
+"Calul nu mănâncă salată de castraveți" a fost prima propoziție rostită vreodată la 'telefon'.
+
+#set text(lang: "ru")
+"Лошадь не ест салат из огурцов" - это была первая фраза, сказанная по 'телефону'.
+
+---
+// Test single pair of quotes.
+#set text(lang: "en")
+""
+
+---
+// Test sentences with numbers and apostrophes.
+#set text(lang: "en")
+The 5'11" 'quick' brown fox jumps over the "lazy" dog's ear.
+
+He said "I'm a big fella."
+
+---
+// Test escape sequences.
+The 5\'11\" 'quick\' brown fox jumps over the \"lazy" dog\'s ear.
+
+---
+// Test turning smart quotes off.
+#set text(lang: "en")
+He's told some books contain questionable "example text".
+
+#set text(smart-quotes: false)
+He's told some books contain questionable "example text".
+
+---
+// Test changing properties within text.
+#set text(lang: "en")
+"She suddenly started speaking french: #text(lang: "fr")['Je suis une banane.']" Roman told me.
+
+Some people's thought on this would be #text(smart-quotes: false)["strange."]