From b274155c6d36dfe956899e3606b535cc94c8aca9 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 13 Apr 2022 15:30:10 +0200 Subject: Improve language and add region controls --- src/library/text/lang.rs | 55 ++++++++++++++++++++++++++++++++++++--------- src/library/text/mod.rs | 4 +++- src/library/text/par.rs | 4 ++-- src/library/text/quotes.rs | 8 ++++--- src/library/text/shaping.rs | 13 +++++++++++ 5 files changed, 68 insertions(+), 16 deletions(-) (limited to 'src/library') diff --git a/src/library/text/lang.rs b/src/library/text/lang.rs index 343359d1..360827fa 100644 --- a/src/library/text/lang.rs +++ b/src/library/text/lang.rs @@ -1,24 +1,30 @@ use crate::eval::Value; use crate::geom::Dir; -/// A natural language. +/// A code for a natural language. #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub struct Lang([u8; 2]); +pub struct Lang([u8; 3], u8); impl Lang { /// The code for the english language. - pub const ENGLISH: Self = Self(*b"en"); + pub const ENGLISH: Self = Self(*b"en ", 2); - /// Construct a language from a two-byte ISO 639-1 code. + /// Construct a language from a two- or three-byte ISO 639-1/2/3 code. pub fn from_str(iso: &str) -> Option { - let mut bytes: [u8; 2] = iso.as_bytes().try_into().ok()?; - bytes.make_ascii_lowercase(); - Some(Self(bytes)) + let len = iso.len(); + if matches!(len, 2 ..= 3) && iso.is_ascii() { + let mut bytes = [b' '; 3]; + bytes[.. len].copy_from_slice(iso.as_bytes()); + bytes.make_ascii_lowercase(); + Some(Self(bytes, len as u8)) + } else { + None + } } - /// Return the language code as a string slice. + /// Return the language code as an all lowercase string slice. pub fn as_str(&self) -> &str { - std::str::from_utf8(&self.0).unwrap_or_default() + std::str::from_utf8(&self.0[.. usize::from(self.1)]).unwrap_or_default() } /// The default direction for the language. @@ -35,5 +41,34 @@ castable! { Lang, Expected: "string", Value::Str(string) => Self::from_str(&string) - .ok_or("expected two letter language code")?, + .ok_or("expected two or three letter language code (ISO 639-1/2/3)")?, +} + +/// A code for a region somewhere in the world. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct Region([u8; 2]); + +impl Region { + /// Construct a region from its two-byte ISO 3166-1 alpha-2 code. + pub fn from_str(iso: &str) -> Option { + if iso.is_ascii() { + let mut bytes: [u8; 2] = iso.as_bytes().try_into().ok()?; + bytes.make_ascii_uppercase(); + Some(Self(bytes)) + } else { + None + } + } + + /// Return the region code as an all uppercase string slice. + pub fn as_str(&self) -> &str { + std::str::from_utf8(&self.0).unwrap_or_default() + } +} + +castable! { + Region, + Expected: "string", + Value::Str(string) => Self::from_str(&string) + .ok_or("expected two letter region code (ISO 3166-1 alpha-2)")?, } diff --git a/src/library/text/mod.rs b/src/library/text/mod.rs index 636b878c..0eb57339 100644 --- a/src/library/text/mod.rs +++ b/src/library/text/mod.rs @@ -65,8 +65,10 @@ impl TextNode { /// The bottom end of the text bounding box. pub const BOTTOM_EDGE: TextEdge = TextEdge::Metric(VerticalFontMetric::Baseline); - /// An ISO 639-1 language code. + /// An ISO 639-1/2/3 language code. pub const LANG: Lang = Lang::ENGLISH; + /// An ISO 3166-1 alpha-2 region code. + pub const REGION: Option = None; /// The direction for text and inline objects. When `auto`, the direction is /// automatically inferred from the language. #[property(resolve)] diff --git a/src/library/text/par.rs b/src/library/text/par.rs index 232a5d0f..fc978357 100644 --- a/src/library/text/par.rs +++ b/src/library/text/par.rs @@ -406,9 +406,9 @@ fn collect<'a>( ParChild::Quote(double) => { let prev = full.len(); if styles.get(TextNode::SMART_QUOTES) { - // TODO: Also get region. let lang = styles.get(TextNode::LANG); - let quotes = Quotes::from_lang(lang.as_str(), ""); + let region = styles.get(TextNode::REGION); + let quotes = Quotes::from_lang(lang, region); let peeked = iter.peek().and_then(|(child, _)| match child { ParChild::Text(text) => text.chars().next(), ParChild::Quote(_) => Some('"'), diff --git a/src/library/text/quotes.rs b/src/library/text/quotes.rs index 5f67bdb5..98402ca4 100644 --- a/src/library/text/quotes.rs +++ b/src/library/text/quotes.rs @@ -1,3 +1,4 @@ +use super::{Lang, Region}; use crate::parse::is_newline; /// State machine for smart quote subtitution. @@ -91,9 +92,10 @@ impl<'s> Quotes<'s> { /// Norwegian. /// /// For unknown languages, the English quotes are used. - pub fn from_lang(language: &str, region: &str) -> Self { - let (single_open, single_close, double_open, double_close) = match language { - "de" if matches!(region, "CH" | "LI") => ("‹", "›", "«", "»"), + pub fn from_lang(lang: Lang, region: Option) -> Self { + let region = region.as_ref().map(Region::as_str); + let (single_open, single_close, double_open, double_close) = match lang.as_str() { + "de" if matches!(region, Some("CH" | "LI")) => ("‹", "›", "«", "»"), "cs" | "da" | "de" | "et" | "is" | "lt" | "lv" | "sk" | "sl" => { ("‚", "‘", "„", "“") } diff --git a/src/library/text/shaping.rs b/src/library/text/shaping.rs index 72f86a38..055761df 100644 --- a/src/library/text/shaping.rs +++ b/src/library/text/shaping.rs @@ -1,4 +1,5 @@ use std::ops::Range; +use std::str::FromStr; use rustybuzz::{Feature, UnicodeBuffer}; @@ -372,6 +373,7 @@ fn shape_segment<'a>( // Fill the buffer with our text. let mut buffer = UnicodeBuffer::new(); buffer.push_str(text); + buffer.set_language(language(ctx.styles)); buffer.set_direction(match ctx.dir { Dir::LTR => rustybuzz::Direction::LeftToRight, Dir::RTL => rustybuzz::Direction::RightToLeft, @@ -613,3 +615,14 @@ fn tags(styles: StyleChain) -> Vec { tags } + +/// Process the language and and region of a style chain into a +/// rustybuzz-compatible BCP 47 language. +fn language(styles: StyleChain) -> rustybuzz::Language { + let mut bcp: EcoString = styles.get(TextNode::LANG).as_str().into(); + if let Some(region) = styles.get(TextNode::REGION) { + bcp.push('-'); + bcp.push_str(region.as_str()); + } + rustybuzz::Language::from_str(&bcp).unwrap() +} -- cgit v1.2.3