From 2279c26543f7edde910fd89a3f8f0710c67249db Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 13 Apr 2022 13:07:45 +0200 Subject: Smart quotes Co-Authored-By: Martin Haug --- src/library/text/mod.rs | 4 ++ src/library/text/par.rs | 30 +++++++++- src/library/text/quotes.rs | 146 +++++++++++++++++++++++++++++++++++++++++++++ src/library/text/raw.rs | 1 + 4 files changed, 178 insertions(+), 3 deletions(-) create mode 100644 src/library/text/quotes.rs (limited to 'src/library') diff --git a/src/library/text/mod.rs b/src/library/text/mod.rs index 1d750689..a25b2827 100644 --- a/src/library/text/mod.rs +++ b/src/library/text/mod.rs @@ -3,12 +3,14 @@ mod deco; mod link; mod par; +mod quotes; mod raw; mod shaping; pub use deco::*; pub use link::*; pub use par::*; +pub use quotes::*; pub use raw::*; pub use shaping::*; @@ -72,6 +74,8 @@ impl TextNode { /// will will be hyphenated if and only if justification is enabled. #[property(resolve)] pub const HYPHENATE: Smart = Smart::Auto; + /// Whether to apply smart quotes. + pub const SMART_QUOTES: bool = true; /// Whether to apply kerning ("kern"). pub const KERNING: bool = true; diff --git a/src/library/text/par.rs b/src/library/text/par.rs index cf7dc4a9..8dcbfeb3 100644 --- a/src/library/text/par.rs +++ b/src/library/text/par.rs @@ -4,7 +4,7 @@ use unicode_bidi::{BidiInfo, Level}; use unicode_script::{Script, UnicodeScript}; use xi_unicode::LineBreakIterator; -use super::{shape, Lang, ShapedText, TextNode}; +use super::{shape, Lang, Quoter, Quotes, ShapedText, TextNode}; use crate::font::FontStore; use crate::library::layout::Spacing; use crate::library::prelude::*; @@ -386,9 +386,11 @@ fn collect<'a>( styles: &'a StyleChain<'a>, ) -> (String, Vec<(Segment<'a>, StyleChain<'a>)>) { let mut full = String::new(); + let mut quoter = Quoter::new(); let mut segments = vec![]; + let mut iter = par.0.iter().peekable(); - for (child, map) in par.0.iter() { + while let Some((child, map)) = iter.next() { let styles = map.chain(&styles); let segment = match child { ParChild::Text(text) => { @@ -402,7 +404,25 @@ fn collect<'a>( } ParChild::Quote(double) => { let prev = full.len(); - full.push(if *double { '"' } else { '\'' }); + if styles.get(TextNode::SMART_QUOTES) { + // TODO: Also get region. + let lang = styles.get(TextNode::LANG); + let quotes = lang + .as_ref() + .map(|lang| Quotes::from_lang(lang.as_str(), "")) + .unwrap_or_default(); + + let peeked = iter.peek().and_then(|(child, _)| match child { + ParChild::Text(text) => text.chars().next(), + ParChild::Quote(_) => Some('"'), + ParChild::Spacing(_) => Some(SPACING_REPLACE), + ParChild::Node(_) => Some(NODE_REPLACE), + }); + + full.push_str(quoter.quote("es, *double, peeked)); + } else { + full.push(if *double { '"' } else { '\'' }); + } Segment::Text(full.len() - prev) } ParChild::Spacing(spacing) => { @@ -415,6 +435,10 @@ fn collect<'a>( } }; + if let Some(last) = full.chars().last() { + quoter.last(last); + } + if let (Some((Segment::Text(last_len), last_styles)), Segment::Text(len)) = (segments.last_mut(), segment) { diff --git a/src/library/text/quotes.rs b/src/library/text/quotes.rs new file mode 100644 index 00000000..5f67bdb5 --- /dev/null +++ b/src/library/text/quotes.rs @@ -0,0 +1,146 @@ +use crate::parse::is_newline; + +/// State machine for smart quote subtitution. +#[derive(Debug, Clone)] +pub struct Quoter { + /// How many quotes have been opened. + quote_depth: usize, + /// Whether an opening quote might follow. + expect_opening: bool, + /// Whether the last character was numeric. + last_num: bool, +} + +impl Quoter { + /// Start quoting. + pub fn new() -> Self { + Self { + quote_depth: 0, + expect_opening: true, + last_num: false, + } + } + + /// Process the last seen character. + pub fn last(&mut self, c: char) { + self.expect_opening = is_ignorable(c) || is_opening_bracket(c); + self.last_num = c.is_numeric(); + } + + /// Process and substitute a quote. + pub fn quote<'a>( + &mut self, + quotes: &Quotes<'a>, + double: bool, + peeked: Option, + ) -> &'a str { + let peeked = peeked.unwrap_or(' '); + if self.expect_opening { + self.quote_depth += 1; + quotes.open(double) + } else if self.quote_depth > 0 + && (peeked.is_ascii_punctuation() || is_ignorable(peeked)) + { + self.quote_depth -= 1; + quotes.close(double) + } else if self.last_num { + quotes.prime(double) + } else { + quotes.fallback(double) + } + } +} + +impl Default for Quoter { + fn default() -> Self { + Self::new() + } +} + +fn is_ignorable(c: char) -> bool { + c.is_whitespace() || is_newline(c) +} + +fn is_opening_bracket(c: char) -> bool { + matches!(c, '(' | '{' | '[') +} + +/// Decides which quotes to subtitute smart quotes with. +pub struct Quotes<'s> { + /// The opening single quote. + pub single_open: &'s str, + /// The closing single quote. + pub single_close: &'s str, + /// The opening double quote. + pub double_open: &'s str, + /// The closing double quote. + pub double_close: &'s str, +} + +impl<'s> Quotes<'s> { + /// Create a new `Quotes` struct with the defaults for a language and + /// region. + /// + /// The language should be specified as an all-lowercase ISO 639-1 code, the + /// region as an all-uppercase ISO 3166-alpha2 code. + /// + /// Currently, the supported languages are: English, Czech, Danish, German, + /// Swiss / Liechtensteinian German, Estonian, Icelandic, Lithuanian, + /// Latvian, Slovak, Slovenian, Bosnian, Finnish, Swedish, French, + /// Hungarian, Polish, Romanian, Japanese, Traditional Chinese, Russian, and + /// Norwegian. + /// + /// For unknown languages, the English quotes are used. + pub fn from_lang(language: &str, region: &str) -> Self { + let (single_open, single_close, double_open, double_close) = match language { + "de" if matches!(region, "CH" | "LI") => ("‹", "›", "«", "»"), + "cs" | "da" | "de" | "et" | "is" | "lt" | "lv" | "sk" | "sl" => { + ("‚", "‘", "„", "“") + } + "fr" => ("‹\u{00A0}", "\u{00A0}›", "«\u{00A0}", "\u{00A0}»"), + "bs" | "fi" | "sv" => ("’", "’", "”", "”"), + "hu" | "pl" | "ro" => ("’", "’", "„", "”"), + "ru" | "no" | "nn" => ("’", "’", "«", "»"), + _ => return Self::default(), + }; + + Self { + single_open, + single_close, + double_open, + double_close, + } + } + + /// The opening quote. + fn open(&self, double: bool) -> &'s str { + if double { self.double_open } else { self.single_open } + } + + /// The closing quote. + fn close(&self, double: bool) -> &'s str { + if double { self.double_close } else { self.single_close } + } + + /// Which character should be used as a prime. + fn prime(&self, double: bool) -> &'static str { + if double { "″" } else { "′" } + } + + /// Which character should be used as a fallback quote. + fn fallback(&self, double: bool) -> &'static str { + if double { "\"" } else { "’" } + } +} + +impl Default for Quotes<'_> { + /// Returns the english quotes as default. + fn default() -> Self { + Self { + single_open: "‘", + single_close: "’", + double_open: "“", + double_close: "”", + } + } +} diff --git a/src/library/text/raw.rs b/src/library/text/raw.rs index d96100af..80b6ef2a 100644 --- a/src/library/text/raw.rs +++ b/src/library/text/raw.rs @@ -100,6 +100,7 @@ impl Show for RawNode { let mut map = StyleMap::new(); map.set(TextNode::OVERHANG, false); map.set(TextNode::HYPHENATE, Smart::Custom(Hyphenate(false))); + map.set(TextNode::SMART_QUOTES, false); if let Smart::Custom(family) = styles.get(Self::FAMILY) { map.set_family(family.clone(), styles); -- cgit v1.2.3