summaryrefslogtreecommitdiff
path: root/src/library
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2022-04-13 15:30:10 +0200
committerLaurenz <laurmaedje@gmail.com>2022-04-13 16:01:24 +0200
commitb274155c6d36dfe956899e3606b535cc94c8aca9 (patch)
tree67a439c1a5ecefc1de24099df6e9c2ebb58c1128 /src/library
parentd025854457b4c2d1c2285bd1c5e795edad79a749 (diff)
Improve language and add region controls
Diffstat (limited to 'src/library')
-rw-r--r--src/library/text/lang.rs55
-rw-r--r--src/library/text/mod.rs4
-rw-r--r--src/library/text/par.rs4
-rw-r--r--src/library/text/quotes.rs8
-rw-r--r--src/library/text/shaping.rs13
5 files changed, 68 insertions, 16 deletions
diff --git a/src/library/text/lang.rs b/src/library/text/lang.rs
index 343359d1..360827fa 100644
--- a/src/library/text/lang.rs
+++ b/src/library/text/lang.rs
@@ -1,24 +1,30 @@
use crate::eval::Value;
use crate::geom::Dir;
-/// A natural language.
+/// A code for a natural language.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
-pub struct Lang([u8; 2]);
+pub struct Lang([u8; 3], u8);
impl Lang {
/// The code for the english language.
- pub const ENGLISH: Self = Self(*b"en");
+ pub const ENGLISH: Self = Self(*b"en ", 2);
- /// Construct a language from a two-byte ISO 639-1 code.
+ /// Construct a language from a two- or three-byte ISO 639-1/2/3 code.
pub fn from_str(iso: &str) -> Option<Self> {
- let mut bytes: [u8; 2] = iso.as_bytes().try_into().ok()?;
- bytes.make_ascii_lowercase();
- Some(Self(bytes))
+ let len = iso.len();
+ if matches!(len, 2 ..= 3) && iso.is_ascii() {
+ let mut bytes = [b' '; 3];
+ bytes[.. len].copy_from_slice(iso.as_bytes());
+ bytes.make_ascii_lowercase();
+ Some(Self(bytes, len as u8))
+ } else {
+ None
+ }
}
- /// Return the language code as a string slice.
+ /// Return the language code as an all lowercase string slice.
pub fn as_str(&self) -> &str {
- std::str::from_utf8(&self.0).unwrap_or_default()
+ std::str::from_utf8(&self.0[.. usize::from(self.1)]).unwrap_or_default()
}
/// The default direction for the language.
@@ -35,5 +41,34 @@ castable! {
Lang,
Expected: "string",
Value::Str(string) => Self::from_str(&string)
- .ok_or("expected two letter language code")?,
+ .ok_or("expected two or three letter language code (ISO 639-1/2/3)")?,
+}
+
+/// A code for a region somewhere in the world.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub struct Region([u8; 2]);
+
+impl Region {
+ /// Construct a region from its two-byte ISO 3166-1 alpha-2 code.
+ pub fn from_str(iso: &str) -> Option<Self> {
+ if iso.is_ascii() {
+ let mut bytes: [u8; 2] = iso.as_bytes().try_into().ok()?;
+ bytes.make_ascii_uppercase();
+ Some(Self(bytes))
+ } else {
+ None
+ }
+ }
+
+ /// Return the region code as an all uppercase string slice.
+ pub fn as_str(&self) -> &str {
+ std::str::from_utf8(&self.0).unwrap_or_default()
+ }
+}
+
+castable! {
+ Region,
+ Expected: "string",
+ Value::Str(string) => Self::from_str(&string)
+ .ok_or("expected two letter region code (ISO 3166-1 alpha-2)")?,
}
diff --git a/src/library/text/mod.rs b/src/library/text/mod.rs
index 636b878c..0eb57339 100644
--- a/src/library/text/mod.rs
+++ b/src/library/text/mod.rs
@@ -65,8 +65,10 @@ impl TextNode {
/// The bottom end of the text bounding box.
pub const BOTTOM_EDGE: TextEdge = TextEdge::Metric(VerticalFontMetric::Baseline);
- /// An ISO 639-1 language code.
+ /// An ISO 639-1/2/3 language code.
pub const LANG: Lang = Lang::ENGLISH;
+ /// An ISO 3166-1 alpha-2 region code.
+ pub const REGION: Option<Region> = None;
/// The direction for text and inline objects. When `auto`, the direction is
/// automatically inferred from the language.
#[property(resolve)]
diff --git a/src/library/text/par.rs b/src/library/text/par.rs
index 232a5d0f..fc978357 100644
--- a/src/library/text/par.rs
+++ b/src/library/text/par.rs
@@ -406,9 +406,9 @@ fn collect<'a>(
ParChild::Quote(double) => {
let prev = full.len();
if styles.get(TextNode::SMART_QUOTES) {
- // TODO: Also get region.
let lang = styles.get(TextNode::LANG);
- let quotes = Quotes::from_lang(lang.as_str(), "");
+ let region = styles.get(TextNode::REGION);
+ let quotes = Quotes::from_lang(lang, region);
let peeked = iter.peek().and_then(|(child, _)| match child {
ParChild::Text(text) => text.chars().next(),
ParChild::Quote(_) => Some('"'),
diff --git a/src/library/text/quotes.rs b/src/library/text/quotes.rs
index 5f67bdb5..98402ca4 100644
--- a/src/library/text/quotes.rs
+++ b/src/library/text/quotes.rs
@@ -1,3 +1,4 @@
+use super::{Lang, Region};
use crate::parse::is_newline;
/// State machine for smart quote subtitution.
@@ -91,9 +92,10 @@ impl<'s> Quotes<'s> {
/// Norwegian.
///
/// For unknown languages, the English quotes are used.
- pub fn from_lang(language: &str, region: &str) -> Self {
- let (single_open, single_close, double_open, double_close) = match language {
- "de" if matches!(region, "CH" | "LI") => ("‹", "›", "«", "»"),
+ pub fn from_lang(lang: Lang, region: Option<Region>) -> Self {
+ let region = region.as_ref().map(Region::as_str);
+ let (single_open, single_close, double_open, double_close) = match lang.as_str() {
+ "de" if matches!(region, Some("CH" | "LI")) => ("‹", "›", "«", "»"),
"cs" | "da" | "de" | "et" | "is" | "lt" | "lv" | "sk" | "sl" => {
("‚", "‘", "„", "“")
}
diff --git a/src/library/text/shaping.rs b/src/library/text/shaping.rs
index 72f86a38..055761df 100644
--- a/src/library/text/shaping.rs
+++ b/src/library/text/shaping.rs
@@ -1,4 +1,5 @@
use std::ops::Range;
+use std::str::FromStr;
use rustybuzz::{Feature, UnicodeBuffer};
@@ -372,6 +373,7 @@ fn shape_segment<'a>(
// Fill the buffer with our text.
let mut buffer = UnicodeBuffer::new();
buffer.push_str(text);
+ buffer.set_language(language(ctx.styles));
buffer.set_direction(match ctx.dir {
Dir::LTR => rustybuzz::Direction::LeftToRight,
Dir::RTL => rustybuzz::Direction::RightToLeft,
@@ -613,3 +615,14 @@ fn tags(styles: StyleChain) -> Vec<Feature> {
tags
}
+
+/// Process the language and and region of a style chain into a
+/// rustybuzz-compatible BCP 47 language.
+fn language(styles: StyleChain) -> rustybuzz::Language {
+ let mut bcp: EcoString = styles.get(TextNode::LANG).as_str().into();
+ if let Some(region) = styles.get(TextNode::REGION) {
+ bcp.push('-');
+ bcp.push_str(region.as_str());
+ }
+ rustybuzz::Language::from_str(&bcp).unwrap()
+}