summaryrefslogtreecommitdiff
path: root/crates
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2024-08-28 10:21:21 +0200
committerGitHub <noreply@github.com>2024-08-28 08:21:21 +0000
commitef4482ce4b084aa97155dbde89f02dda3f7fb219 (patch)
tree723fdac6b85dcc2a95d7e88984c14dfefe7e829d /crates
parent4e4c5175e531d99ec6f39d9a77d59d02f2109308 (diff)
Better smartquotes (#4849)
Diffstat (limited to 'crates')
-rw-r--r--crates/typst/src/layout/inline/collect.rs51
-rw-r--r--crates/typst/src/model/quote.rs2
-rw-r--r--crates/typst/src/text/smartquote.rs133
3 files changed, 74 insertions, 112 deletions
diff --git a/crates/typst/src/layout/inline/collect.rs b/crates/typst/src/layout/inline/collect.rs
index 53c684d1..624eedf3 100644
--- a/crates/typst/src/layout/inline/collect.rs
+++ b/crates/typst/src/layout/inline/collect.rs
@@ -16,8 +16,6 @@ use crate::utils::Numeric;
// paragraph's full text.
const SPACING_REPLACE: &str = " "; // Space
const OBJ_REPLACE: &str = "\u{FFFC}"; // Object Replacement Character
-const SPACING_REPLACE_CHAR: char = ' ';
-const OBJ_REPLACE_CHAR: char = '\u{FFFC}';
// Unicode BiDi control characters.
const LTR_EMBEDDING: &str = "\u{202A}";
@@ -125,8 +123,8 @@ pub fn collect<'a>(
consecutive: bool,
) -> SourceResult<(String, Vec<Segment<'a>>, SpanMapper)> {
let mut collector = Collector::new(2 + children.len());
- let mut iter = children.iter(styles).peekable();
let mut locator = locator.split();
+ let mut quoter = SmartQuoter::new();
let outer_dir = TextElem::dir_in(*styles);
let first_line_indent = ParElem::first_line_indent_in(*styles);
@@ -144,7 +142,7 @@ pub fn collect<'a>(
collector.spans.push(1, Span::detached());
}
- while let Some((child, styles)) = iter.next() {
+ for (child, styles) in children.iter(styles) {
let prev_len = collector.full.len();
if child.is::<SpaceElem>() {
@@ -191,32 +189,16 @@ pub fn collect<'a>(
} else if let Some(elem) = child.to_packed::<SmartQuoteElem>() {
let double = elem.double(styles);
if elem.enabled(styles) {
- let quotes = SmartQuotes::new(
+ let quotes = SmartQuotes::get(
elem.quotes(styles),
TextElem::lang_in(styles),
TextElem::region_in(styles),
elem.alternative(styles),
);
- let peeked = iter.peek().and_then(|(child, _)| {
- if let Some(elem) = child.to_packed::<TextElem>() {
- elem.text().chars().find(|c| !is_default_ignorable(*c))
- } else if child.is::<SmartQuoteElem>() {
- Some('"')
- } else if child.is::<SpaceElem>()
- || child.is::<HElem>()
- || child.is::<LinebreakElem>()
- // This is a temporary hack. We should rather skip these
- // and peek at the next child.
- || child.is::<TagElem>()
- {
- Some(SPACING_REPLACE_CHAR)
- } else {
- Some(OBJ_REPLACE_CHAR)
- }
- });
-
- let quote = collector.quoter.quote(&quotes, double, peeked);
- collector.push_quote(quote, styles);
+ let before =
+ collector.full.chars().rev().find(|&c| !is_default_ignorable(c));
+ let quote = quoter.quote(before, &quotes, double);
+ collector.push_text(quote, styles);
} else {
collector.push_text(if double { "\"" } else { "'" }, styles);
}
@@ -261,7 +243,6 @@ struct Collector<'a> {
full: String,
segments: Vec<Segment<'a>>,
spans: SpanMapper,
- quoter: SmartQuoter,
}
impl<'a> Collector<'a> {
@@ -270,13 +251,12 @@ impl<'a> Collector<'a> {
full: String::new(),
segments: Vec::with_capacity(capacity),
spans: SpanMapper::new(),
- quoter: SmartQuoter::new(),
}
}
fn push_text(&mut self, text: &str, styles: StyleChain<'a>) {
self.full.push_str(text);
- self.push_segment(Segment::Text(text.len(), styles), false);
+ self.push_segment(Segment::Text(text.len(), styles));
}
fn build_text<F>(&mut self, styles: StyleChain<'a>, f: F)
@@ -286,24 +266,15 @@ impl<'a> Collector<'a> {
let prev = self.full.len();
f(&mut self.full);
let len = self.full.len() - prev;
- self.push_segment(Segment::Text(len, styles), false);
- }
-
- fn push_quote(&mut self, quote: &str, styles: StyleChain<'a>) {
- self.full.push_str(quote);
- self.push_segment(Segment::Text(quote.len(), styles), true);
+ self.push_segment(Segment::Text(len, styles));
}
fn push_item(&mut self, item: Item<'a>) {
self.full.push_str(item.textual());
- self.push_segment(Segment::Item(item), false);
+ self.push_segment(Segment::Item(item));
}
- fn push_segment(&mut self, segment: Segment<'a>, is_quote: bool) {
- if let Some(last) = self.full.chars().rev().find(|c| !is_default_ignorable(*c)) {
- self.quoter.last(last, is_quote);
- }
-
+ fn push_segment(&mut self, segment: Segment<'a>) {
if let (Some(Segment::Text(last_len, last_styles)), Segment::Text(len, styles)) =
(self.segments.last_mut(), &segment)
{
diff --git a/crates/typst/src/model/quote.rs b/crates/typst/src/model/quote.rs
index 65a809dc..528c0998 100644
--- a/crates/typst/src/model/quote.rs
+++ b/crates/typst/src/model/quote.rs
@@ -159,7 +159,7 @@ impl Show for Packed<QuoteElem> {
let block = self.block(styles);
if self.quotes(styles) == Smart::Custom(true) || !block {
- let quotes = SmartQuotes::new(
+ let quotes = SmartQuotes::get(
SmartQuoteElem::quotes_in(styles),
TextElem::lang_in(styles),
TextElem::region_in(styles),
diff --git a/crates/typst/src/text/smartquote.rs b/crates/typst/src/text/smartquote.rs
index 64fecb76..02c93fd6 100644
--- a/crates/typst/src/text/smartquote.rs
+++ b/crates/typst/src/text/smartquote.rs
@@ -97,69 +97,81 @@ impl PlainText for Packed<SmartQuoteElem> {
}
}
-/// State machine for smart quote substitution.
+/// A smart quote substitutor with zero lookahead.
#[derive(Debug, Clone)]
pub struct SmartQuoter {
- /// How many quotes have been opened.
- quote_depth: usize,
- /// Whether an opening quote might follow.
- expect_opening: bool,
- /// Whether the last character was numeric.
- last_num: bool,
- /// The previous type of quote character, if it was an opening quote.
- prev_quote_type: Option<bool>,
+ /// The amount of quotes that have been opened.
+ depth: u8,
+ /// Each bit indicates whether the quote at this nesting depth is a double.
+ /// Maximum supported depth is thus 32.
+ kinds: u32,
}
impl SmartQuoter {
/// Start quoting.
pub fn new() -> Self {
- Self {
- quote_depth: 0,
- expect_opening: true,
- last_num: false,
- prev_quote_type: None,
- }
- }
-
- /// Process the last seen character.
- pub fn last(&mut self, c: char, is_quote: bool) {
- self.expect_opening = is_exterior_to_quote(c) || is_opening_bracket(c);
- self.last_num = c.is_numeric();
- if !is_quote {
- self.prev_quote_type = None;
- }
+ Self { depth: 0, kinds: 0 }
}
- /// Process and substitute a quote.
+ /// Determine which smart quote to substitute given this quoter's nesting
+ /// state and the character immediately preceding the quote.
pub fn quote<'a>(
&mut self,
+ before: Option<char>,
quotes: &SmartQuotes<'a>,
double: bool,
- peeked: Option<char>,
) -> &'a str {
- let peeked = peeked.unwrap_or(' ');
- let mut expect_opening = self.expect_opening;
- if let Some(prev_double) = self.prev_quote_type.take() {
- if double != prev_double {
- expect_opening = true;
- }
+ let opened = self.top();
+ let before = before.unwrap_or(' ');
+
+ // If we are after a number and haven't most recently opened a quote of
+ // this kind, produce a prime. Otherwise, we prefer a closing quote.
+ if before.is_numeric() && opened != Some(double) {
+ return if double { "″" } else { "′" };
}
- if expect_opening {
- self.quote_depth += 1;
- self.prev_quote_type = Some(double);
- quotes.open(double)
- } else if self.quote_depth > 0
- && (peeked.is_ascii_punctuation() || is_exterior_to_quote(peeked))
+ // If we have a single smart quote, didn't recently open a single
+ // quotation, and are after an alphabetic char, interpret this as an
+ // apostrophe.
+ if !double && opened != Some(false) && before.is_alphabetic() {
+ return "’";
+ }
+
+ // If the most recently opened quotation is of this kind and the
+ // previous char does not indicate a nested quotation, close it.
+ if opened == Some(double)
+ && !before.is_whitespace()
+ && !is_newline(before)
+ && !is_opening_bracket(before)
{
- self.quote_depth -= 1;
- quotes.close(double)
- } else if self.last_num {
- quotes.prime(double)
- } else {
- quotes.fallback(double)
+ self.pop();
+ return quotes.close(double);
+ }
+
+ // Otherwise, open a new the quotation.
+ self.push(double);
+ quotes.open(double)
+ }
+
+ /// The top of our quotation stack. Returns `Some(double)` for the most
+ /// recently opened quote or `None` if we didn't open one.
+ fn top(&self) -> Option<bool> {
+ self.depth.checked_sub(1).map(|i| (self.kinds >> i) & 1 == 1)
+ }
+
+ /// Push onto the quotation stack.
+ fn push(&mut self, double: bool) {
+ if self.depth < 32 {
+ self.kinds |= (double as u32) << self.depth;
+ self.depth += 1;
}
}
+
+ /// Pop from the quotation stack.
+ fn pop(&mut self) {
+ self.depth -= 1;
+ self.kinds &= (1 << self.depth) - 1;
+ }
}
impl Default for SmartQuoter {
@@ -168,10 +180,7 @@ impl Default for SmartQuoter {
}
}
-fn is_exterior_to_quote(c: char) -> bool {
- c.is_whitespace() || is_newline(c)
-}
-
+/// Whether the character is an opening bracket, parenthesis, or brace.
fn is_opening_bracket(c: char) -> bool {
matches!(c, '(' | '{' | '[')
}
@@ -196,13 +205,13 @@ impl<'s> SmartQuotes<'s> {
/// region as an all-uppercase ISO 3166-alpha2 code.
///
/// Currently, the supported languages are: English, Czech, Danish, German,
- /// Swiss / Liechtensteinian German, Estonian, Icelandic, Italian, Latin, Lithuanian,
- /// Latvian, Slovak, Slovenian, Spanish, Bosnian, Finnish, Swedish, French,
- /// Hungarian, Polish, Romanian, Japanese, Traditional Chinese, Russian, and
- /// Norwegian.
+ /// Swiss / Liechtensteinian German, Estonian, Icelandic, Italian, Latin,
+ /// Lithuanian, Latvian, Slovak, Slovenian, Spanish, Bosnian, Finnish,
+ /// Swedish, French, Hungarian, Polish, Romanian, Japanese, Traditional
+ /// Chinese, Russian, and Norwegian.
///
/// For unknown languages, the English quotes are used as fallback.
- pub fn new(
+ pub fn get(
quotes: &'s Smart<SmartQuoteDict>,
lang: Lang,
region: Option<Region>,
@@ -281,24 +290,6 @@ impl<'s> SmartQuotes<'s> {
self.single_close
}
}
-
- /// Which character should be used as a prime.
- pub fn prime(&self, double: bool) -> &'static str {
- if double {
- "″"
- } else {
- "′"
- }
- }
-
- /// Which character should be used as a fallback quote.
- pub fn fallback(&self, double: bool) -> &'static str {
- if double {
- "\""
- } else {
- "’"
- }
- }
}
/// An opening and closing quote.