summaryrefslogtreecommitdiff
path: root/src/library
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2022-04-13 13:07:45 +0200
committerLaurenz <laurmaedje@gmail.com>2022-04-13 13:08:46 +0200
commit2279c26543f7edde910fd89a3f8f0710c67249db (patch)
treee9dfaa89954a4e61ffe2ca7a35be251946b14ad4 /src/library
parente8dd842c6699c665225c03e904a40887f8424e30 (diff)
Smart quotes
Co-Authored-By: Martin Haug <mhaug@live.de>
Diffstat (limited to 'src/library')
-rw-r--r--src/library/text/mod.rs4
-rw-r--r--src/library/text/par.rs30
-rw-r--r--src/library/text/quotes.rs146
-rw-r--r--src/library/text/raw.rs1
4 files changed, 178 insertions, 3 deletions
diff --git a/src/library/text/mod.rs b/src/library/text/mod.rs
index 1d750689..a25b2827 100644
--- a/src/library/text/mod.rs
+++ b/src/library/text/mod.rs
@@ -3,12 +3,14 @@
mod deco;
mod link;
mod par;
+mod quotes;
mod raw;
mod shaping;
pub use deco::*;
pub use link::*;
pub use par::*;
+pub use quotes::*;
pub use raw::*;
pub use shaping::*;
@@ -72,6 +74,8 @@ impl TextNode {
/// will will be hyphenated if and only if justification is enabled.
#[property(resolve)]
pub const HYPHENATE: Smart<Hyphenate> = Smart::Auto;
+ /// Whether to apply smart quotes.
+ pub const SMART_QUOTES: bool = true;
/// Whether to apply kerning ("kern").
pub const KERNING: bool = true;
diff --git a/src/library/text/par.rs b/src/library/text/par.rs
index cf7dc4a9..8dcbfeb3 100644
--- a/src/library/text/par.rs
+++ b/src/library/text/par.rs
@@ -4,7 +4,7 @@ use unicode_bidi::{BidiInfo, Level};
use unicode_script::{Script, UnicodeScript};
use xi_unicode::LineBreakIterator;
-use super::{shape, Lang, ShapedText, TextNode};
+use super::{shape, Lang, Quoter, Quotes, ShapedText, TextNode};
use crate::font::FontStore;
use crate::library::layout::Spacing;
use crate::library::prelude::*;
@@ -386,9 +386,11 @@ fn collect<'a>(
styles: &'a StyleChain<'a>,
) -> (String, Vec<(Segment<'a>, StyleChain<'a>)>) {
let mut full = String::new();
+ let mut quoter = Quoter::new();
let mut segments = vec![];
+ let mut iter = par.0.iter().peekable();
- for (child, map) in par.0.iter() {
+ while let Some((child, map)) = iter.next() {
let styles = map.chain(&styles);
let segment = match child {
ParChild::Text(text) => {
@@ -402,7 +404,25 @@ fn collect<'a>(
}
ParChild::Quote(double) => {
let prev = full.len();
- full.push(if *double { '"' } else { '\'' });
+ if styles.get(TextNode::SMART_QUOTES) {
+ // TODO: Also get region.
+ let lang = styles.get(TextNode::LANG);
+ let quotes = lang
+ .as_ref()
+ .map(|lang| Quotes::from_lang(lang.as_str(), ""))
+ .unwrap_or_default();
+
+ let peeked = iter.peek().and_then(|(child, _)| match child {
+ ParChild::Text(text) => text.chars().next(),
+ ParChild::Quote(_) => Some('"'),
+ ParChild::Spacing(_) => Some(SPACING_REPLACE),
+ ParChild::Node(_) => Some(NODE_REPLACE),
+ });
+
+ full.push_str(quoter.quote(&quotes, *double, peeked));
+ } else {
+ full.push(if *double { '"' } else { '\'' });
+ }
Segment::Text(full.len() - prev)
}
ParChild::Spacing(spacing) => {
@@ -415,6 +435,10 @@ fn collect<'a>(
}
};
+ if let Some(last) = full.chars().last() {
+ quoter.last(last);
+ }
+
if let (Some((Segment::Text(last_len), last_styles)), Segment::Text(len)) =
(segments.last_mut(), segment)
{
diff --git a/src/library/text/quotes.rs b/src/library/text/quotes.rs
new file mode 100644
index 00000000..5f67bdb5
--- /dev/null
+++ b/src/library/text/quotes.rs
@@ -0,0 +1,146 @@
+use crate::parse::is_newline;
+
+/// State machine for smart quote subtitution.
+#[derive(Debug, Clone)]
+pub struct Quoter {
+ /// How many quotes have been opened.
+ quote_depth: usize,
+ /// Whether an opening quote might follow.
+ expect_opening: bool,
+ /// Whether the last character was numeric.
+ last_num: bool,
+}
+
+impl Quoter {
+ /// Start quoting.
+ pub fn new() -> Self {
+ Self {
+ quote_depth: 0,
+ expect_opening: true,
+ last_num: false,
+ }
+ }
+
+ /// Process the last seen character.
+ pub fn last(&mut self, c: char) {
+ self.expect_opening = is_ignorable(c) || is_opening_bracket(c);
+ self.last_num = c.is_numeric();
+ }
+
+ /// Process and substitute a quote.
+ pub fn quote<'a>(
+ &mut self,
+ quotes: &Quotes<'a>,
+ double: bool,
+ peeked: Option<char>,
+ ) -> &'a str {
+ let peeked = peeked.unwrap_or(' ');
+ if self.expect_opening {
+ self.quote_depth += 1;
+ quotes.open(double)
+ } else if self.quote_depth > 0
+ && (peeked.is_ascii_punctuation() || is_ignorable(peeked))
+ {
+ self.quote_depth -= 1;
+ quotes.close(double)
+ } else if self.last_num {
+ quotes.prime(double)
+ } else {
+ quotes.fallback(double)
+ }
+ }
+}
+
+impl Default for Quoter {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+fn is_ignorable(c: char) -> bool {
+ c.is_whitespace() || is_newline(c)
+}
+
+fn is_opening_bracket(c: char) -> bool {
+ matches!(c, '(' | '{' | '[')
+}
+
+/// Decides which quotes to subtitute smart quotes with.
+pub struct Quotes<'s> {
+ /// The opening single quote.
+ pub single_open: &'s str,
+ /// The closing single quote.
+ pub single_close: &'s str,
+ /// The opening double quote.
+ pub double_open: &'s str,
+ /// The closing double quote.
+ pub double_close: &'s str,
+}
+
+impl<'s> Quotes<'s> {
+ /// Create a new `Quotes` struct with the defaults for a language and
+ /// region.
+ ///
+ /// The language should be specified as an all-lowercase ISO 639-1 code, the
+ /// region as an all-uppercase ISO 3166-alpha2 code.
+ ///
+ /// Currently, the supported languages are: English, Czech, Danish, German,
+ /// Swiss / Liechtensteinian German, Estonian, Icelandic, Lithuanian,
+ /// Latvian, Slovak, Slovenian, Bosnian, Finnish, Swedish, French,
+ /// Hungarian, Polish, Romanian, Japanese, Traditional Chinese, Russian, and
+ /// Norwegian.
+ ///
+ /// For unknown languages, the English quotes are used.
+ pub fn from_lang(language: &str, region: &str) -> Self {
+ let (single_open, single_close, double_open, double_close) = match language {
+ "de" if matches!(region, "CH" | "LI") => ("‹", "›", "«", "»"),
+ "cs" | "da" | "de" | "et" | "is" | "lt" | "lv" | "sk" | "sl" => {
+ ("‚", "‘", "„", "“")
+ }
+ "fr" => ("‹\u{00A0}", "\u{00A0}›", "«\u{00A0}", "\u{00A0}»"),
+ "bs" | "fi" | "sv" => ("’", "’", "”", "”"),
+ "hu" | "pl" | "ro" => ("’", "’", "„", "”"),
+ "ru" | "no" | "nn" => ("’", "’", "«", "»"),
+ _ => return Self::default(),
+ };
+
+ Self {
+ single_open,
+ single_close,
+ double_open,
+ double_close,
+ }
+ }
+
+ /// The opening quote.
+ fn open(&self, double: bool) -> &'s str {
+ if double { self.double_open } else { self.single_open }
+ }
+
+ /// The closing quote.
+ fn close(&self, double: bool) -> &'s str {
+ if double { self.double_close } else { self.single_close }
+ }
+
+ /// Which character should be used as a prime.
+ fn prime(&self, double: bool) -> &'static str {
+ if double { "″" } else { "′" }
+ }
+
+ /// Which character should be used as a fallback quote.
+ fn fallback(&self, double: bool) -> &'static str {
+ if double { "\"" } else { "’" }
+ }
+}
+
+impl Default for Quotes<'_> {
+ /// Returns the english quotes as default.
+ fn default() -> Self {
+ Self {
+ single_open: "‘",
+ single_close: "’",
+ double_open: "“",
+ double_close: "”",
+ }
+ }
+}
diff --git a/src/library/text/raw.rs b/src/library/text/raw.rs
index d96100af..80b6ef2a 100644
--- a/src/library/text/raw.rs
+++ b/src/library/text/raw.rs
@@ -100,6 +100,7 @@ impl Show for RawNode {
let mut map = StyleMap::new();
map.set(TextNode::OVERHANG, false);
map.set(TextNode::HYPHENATE, Smart::Custom(Hyphenate(false)));
+ map.set(TextNode::SMART_QUOTES, false);
if let Smart::Custom(family) = styles.get(Self::FAMILY) {
map.set_family(family.clone(), styles);