diff options
Diffstat (limited to 'crates/typst-library/src/text')
| -rw-r--r-- | crates/typst-library/src/text/case.rs | 79 | ||||
| -rw-r--r-- | crates/typst-library/src/text/deco.rs | 401 | ||||
| -rw-r--r-- | crates/typst-library/src/text/font/book.rs | 546 | ||||
| -rw-r--r-- | crates/typst-library/src/text/font/color.rs | 627 | ||||
| -rw-r--r-- | crates/typst-library/src/text/font/exceptions.rs | 337 | ||||
| -rw-r--r-- | crates/typst-library/src/text/font/mod.rs | 306 | ||||
| -rw-r--r-- | crates/typst-library/src/text/font/variant.rs | 320 | ||||
| -rw-r--r-- | crates/typst-library/src/text/item.rs | 118 | ||||
| -rw-r--r-- | crates/typst-library/src/text/lang.rs | 317 | ||||
| -rw-r--r-- | crates/typst-library/src/text/linebreak.rs | 46 | ||||
| -rw-r--r-- | crates/typst-library/src/text/lorem.rs | 24 | ||||
| -rw-r--r-- | crates/typst-library/src/text/mod.rs | 1318 | ||||
| -rw-r--r-- | crates/typst-library/src/text/raw.rs | 926 | ||||
| -rw-r--r-- | crates/typst-library/src/text/shift.rs | 210 | ||||
| -rw-r--r-- | crates/typst-library/src/text/smallcaps.rs | 58 | ||||
| -rw-r--r-- | crates/typst-library/src/text/smartquote.rs | 387 | ||||
| -rw-r--r-- | crates/typst-library/src/text/space.rs | 31 |
17 files changed, 6051 insertions, 0 deletions
diff --git a/crates/typst-library/src/text/case.rs b/crates/typst-library/src/text/case.rs new file mode 100644 index 00000000..69dbf5e1 --- /dev/null +++ b/crates/typst-library/src/text/case.rs @@ -0,0 +1,79 @@ +use crate::foundations::{cast, func, Cast, Content, Str}; +use crate::text::TextElem; + +/// Converts a string or content to lowercase. +/// +/// # Example +/// ```example +/// #lower("ABC") \ +/// #lower[*My Text*] \ +/// #lower[already low] +/// ``` +#[func(title = "Lowercase")] +pub fn lower( + /// The text to convert to lowercase. + text: Caseable, +) -> Caseable { + case(text, Case::Lower) +} + +/// Converts a string or content to uppercase. +/// +/// # Example +/// ```example +/// #upper("abc") \ +/// #upper[*my text*] \ +/// #upper[ALREADY HIGH] +/// ``` +#[func(title = "Uppercase")] +pub fn upper( + /// The text to convert to uppercase. + text: Caseable, +) -> Caseable { + case(text, Case::Upper) +} + +/// Change the case of text. +fn case(text: Caseable, case: Case) -> Caseable { + match text { + Caseable::Str(v) => Caseable::Str(case.apply(&v).into()), + Caseable::Content(v) => { + Caseable::Content(v.styled(TextElem::set_case(Some(case)))) + } + } +} + +/// A value whose case can be changed. +pub enum Caseable { + Str(Str), + Content(Content), +} + +cast! { + Caseable, + self => match self { + Self::Str(v) => v.into_value(), + Self::Content(v) => v.into_value(), + }, + v: Str => Self::Str(v), + v: Content => Self::Content(v), +} + +/// A case transformation on text. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)] +pub enum Case { + /// Everything is lowercased. + Lower, + /// Everything is uppercased. + Upper, +} + +impl Case { + /// Apply the case to a string. + pub fn apply(self, text: &str) -> String { + match self { + Self::Lower => text.to_lowercase(), + Self::Upper => text.to_uppercase(), + } + } +} diff --git a/crates/typst-library/src/text/deco.rs b/crates/typst-library/src/text/deco.rs new file mode 100644 index 00000000..5da7ecec --- /dev/null +++ b/crates/typst-library/src/text/deco.rs @@ -0,0 +1,401 @@ +use smallvec::smallvec; + +use crate::diag::SourceResult; +use crate::engine::Engine; +use crate::foundations::{elem, Content, Packed, Show, Smart, StyleChain}; +use crate::layout::{Abs, Corners, Length, Rel, Sides}; +use crate::text::{BottomEdge, BottomEdgeMetric, TextElem, TopEdge, TopEdgeMetric}; +use crate::visualize::{Color, FixedStroke, Paint, Stroke}; + +/// Underlines text. +/// +/// # Example +/// ```example +/// This is #underline[important]. +/// ``` +#[elem(Show)] +pub struct UnderlineElem { + /// How to [stroke] the line. + /// + /// If set to `{auto}`, takes on the text's color and a thickness defined in + /// the current font. + /// + /// ```example + /// Take #underline( + /// stroke: 1.5pt + red, + /// offset: 2pt, + /// [care], + /// ) + /// ``` + #[resolve] + #[fold] + pub stroke: Smart<Stroke>, + + /// The position of the line relative to the baseline, read from the font + /// tables if `{auto}`. + /// + /// ```example + /// #underline(offset: 5pt)[ + /// The Tale Of A Faraway Line I + /// ] + /// ``` + #[resolve] + pub offset: Smart<Length>, + + /// The amount by which to extend the line beyond (or within if negative) + /// the content. + /// + /// ```example + /// #align(center, + /// underline(extent: 2pt)[Chapter 1] + /// ) + /// ``` + #[resolve] + pub extent: Length, + + /// Whether the line skips sections in which it would collide with the + /// glyphs. + /// + /// ```example + /// This #underline(evade: true)[is great]. + /// This #underline(evade: false)[is less great]. + /// ``` + #[default(true)] + pub evade: bool, + + /// Whether the line is placed behind the content it underlines. + /// + /// ```example + /// #set underline(stroke: (thickness: 1em, paint: maroon, cap: "round")) + /// #underline(background: true)[This is stylized.] \ + /// #underline(background: false)[This is partially hidden.] + /// ``` + #[default(false)] + pub background: bool, + + /// The content to underline. + #[required] + pub body: Content, +} + +impl Show for Packed<UnderlineElem> { + #[typst_macros::time(name = "underline", span = self.span())] + fn show(&self, _: &mut Engine, styles: StyleChain) -> SourceResult<Content> { + Ok(self.body().clone().styled(TextElem::set_deco(smallvec![Decoration { + line: DecoLine::Underline { + stroke: self.stroke(styles).unwrap_or_default(), + offset: self.offset(styles), + evade: self.evade(styles), + background: self.background(styles), + }, + extent: self.extent(styles), + }]))) + } +} + +/// Adds a line over text. +/// +/// # Example +/// ```example +/// #overline[A line over text.] +/// ``` +#[elem(Show)] +pub struct OverlineElem { + /// How to [stroke] the line. + /// + /// If set to `{auto}`, takes on the text's color and a thickness defined in + /// the current font. + /// + /// ```example + /// #set text(fill: olive) + /// #overline( + /// stroke: green.darken(20%), + /// offset: -12pt, + /// [The Forest Theme], + /// ) + /// ``` + #[resolve] + #[fold] + pub stroke: Smart<Stroke>, + + /// The position of the line relative to the baseline. Read from the font + /// tables if `{auto}`. + /// + /// ```example + /// #overline(offset: -1.2em)[ + /// The Tale Of A Faraway Line II + /// ] + /// ``` + #[resolve] + pub offset: Smart<Length>, + + /// The amount by which to extend the line beyond (or within if negative) + /// the content. + /// + /// ```example + /// #set overline(extent: 4pt) + /// #set underline(extent: 4pt) + /// #overline(underline[Typography Today]) + /// ``` + #[resolve] + pub extent: Length, + + /// Whether the line skips sections in which it would collide with the + /// glyphs. + /// + /// ```example + /// #overline( + /// evade: false, + /// offset: -7.5pt, + /// stroke: 1pt, + /// extent: 3pt, + /// [Temple], + /// ) + /// ``` + #[default(true)] + pub evade: bool, + + /// Whether the line is placed behind the content it overlines. + /// + /// ```example + /// #set overline(stroke: (thickness: 1em, paint: maroon, cap: "round")) + /// #overline(background: true)[This is stylized.] \ + /// #overline(background: false)[This is partially hidden.] + /// ``` + #[default(false)] + pub background: bool, + + /// The content to add a line over. + #[required] + pub body: Content, +} + +impl Show for Packed<OverlineElem> { + #[typst_macros::time(name = "overline", span = self.span())] + fn show(&self, _: &mut Engine, styles: StyleChain) -> SourceResult<Content> { + Ok(self.body().clone().styled(TextElem::set_deco(smallvec![Decoration { + line: DecoLine::Overline { + stroke: self.stroke(styles).unwrap_or_default(), + offset: self.offset(styles), + evade: self.evade(styles), + background: self.background(styles), + }, + extent: self.extent(styles), + }]))) + } +} + +/// Strikes through text. +/// +/// # Example +/// ```example +/// This is #strike[not] relevant. +/// ``` +#[elem(title = "Strikethrough", Show)] +pub struct StrikeElem { + /// How to [stroke] the line. + /// + /// If set to `{auto}`, takes on the text's color and a thickness defined in + /// the current font. + /// + /// _Note:_ Please don't use this for real redaction as you can still copy + /// paste the text. + /// + /// ```example + /// This is #strike(stroke: 1.5pt + red)[very stricken through]. \ + /// This is #strike(stroke: 10pt)[redacted]. + /// ``` + #[resolve] + #[fold] + pub stroke: Smart<Stroke>, + + /// The position of the line relative to the baseline. Read from the font + /// tables if `{auto}`. + /// + /// This is useful if you are unhappy with the offset your font provides. + /// + /// ```example + /// #set text(font: "Inria Serif") + /// This is #strike(offset: auto)[low-ish]. \ + /// This is #strike(offset: -3.5pt)[on-top]. + /// ``` + #[resolve] + pub offset: Smart<Length>, + + /// The amount by which to extend the line beyond (or within if negative) + /// the content. + /// + /// ```example + /// This #strike(extent: -2pt)[skips] parts of the word. + /// This #strike(extent: 2pt)[extends] beyond the word. + /// ``` + #[resolve] + pub extent: Length, + + /// Whether the line is placed behind the content. + /// + /// ```example + /// #set strike(stroke: red) + /// #strike(background: true)[This is behind.] \ + /// #strike(background: false)[This is in front.] + /// ``` + #[default(false)] + pub background: bool, + + /// The content to strike through. + #[required] + pub body: Content, +} + +impl Show for Packed<StrikeElem> { + #[typst_macros::time(name = "strike", span = self.span())] + fn show(&self, _: &mut Engine, styles: StyleChain) -> SourceResult<Content> { + Ok(self.body().clone().styled(TextElem::set_deco(smallvec![Decoration { + // Note that we do not support evade option for strikethrough. + line: DecoLine::Strikethrough { + stroke: self.stroke(styles).unwrap_or_default(), + offset: self.offset(styles), + background: self.background(styles), + }, + extent: self.extent(styles), + }]))) + } +} + +/// Highlights text with a background color. +/// +/// # Example +/// ```example +/// This is #highlight[important]. +/// ``` +#[elem(Show)] +pub struct HighlightElem { + /// The color to highlight the text with. + /// + /// ```example + /// This is #highlight( + /// fill: blue + /// )[highlighted with blue]. + /// ``` + #[default(Some(Color::from_u8(0xFF, 0xFD, 0x11, 0xA1).into()))] + pub fill: Option<Paint>, + + /// The highlight's border color. See the + /// [rectangle's documentation]($rect.stroke) for more details. + /// + /// ```example + /// This is a #highlight( + /// stroke: fuchsia + /// )[stroked highlighting]. + /// ``` + #[resolve] + #[fold] + pub stroke: Sides<Option<Option<Stroke>>>, + + /// The top end of the background rectangle. + /// + /// ```example + /// #set highlight(top-edge: "ascender") + /// #highlight[a] #highlight[aib] + /// + /// #set highlight(top-edge: "x-height") + /// #highlight[a] #highlight[aib] + /// ``` + #[default(TopEdge::Metric(TopEdgeMetric::Ascender))] + pub top_edge: TopEdge, + + /// The bottom end of the background rectangle. + /// + /// ```example + /// #set highlight(bottom-edge: "descender") + /// #highlight[a] #highlight[ap] + /// + /// #set highlight(bottom-edge: "baseline") + /// #highlight[a] #highlight[ap] + /// ``` + #[default(BottomEdge::Metric(BottomEdgeMetric::Descender))] + pub bottom_edge: BottomEdge, + + /// The amount by which to extend the background to the sides beyond + /// (or within if negative) the content. + /// + /// ```example + /// A long #highlight(extent: 4pt)[background]. + /// ``` + #[resolve] + pub extent: Length, + + /// How much to round the highlight's corners. See the + /// [rectangle's documentation]($rect.radius) for more details. + /// + /// ```example + /// Listen #highlight( + /// radius: 5pt, extent: 2pt + /// )[carefully], it will be on the test. + /// ``` + #[resolve] + #[fold] + pub radius: Corners<Option<Rel<Length>>>, + + /// The content that should be highlighted. + #[required] + pub body: Content, +} + +impl Show for Packed<HighlightElem> { + #[typst_macros::time(name = "highlight", span = self.span())] + fn show(&self, _: &mut Engine, styles: StyleChain) -> SourceResult<Content> { + Ok(self.body().clone().styled(TextElem::set_deco(smallvec![Decoration { + line: DecoLine::Highlight { + fill: self.fill(styles), + stroke: self + .stroke(styles) + .unwrap_or_default() + .map(|stroke| stroke.map(Stroke::unwrap_or_default)), + top_edge: self.top_edge(styles), + bottom_edge: self.bottom_edge(styles), + radius: self.radius(styles).unwrap_or_default(), + }, + extent: self.extent(styles), + }]))) + } +} + +/// A text decoration. +/// +/// Can be positioned over, under, or on top of text, or highlight the text with +/// a background. +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct Decoration { + pub line: DecoLine, + pub extent: Abs, +} + +/// A kind of decorative line. +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub enum DecoLine { + Underline { + stroke: Stroke<Abs>, + offset: Smart<Abs>, + evade: bool, + background: bool, + }, + Strikethrough { + stroke: Stroke<Abs>, + offset: Smart<Abs>, + background: bool, + }, + Overline { + stroke: Stroke<Abs>, + offset: Smart<Abs>, + evade: bool, + background: bool, + }, + Highlight { + fill: Option<Paint>, + stroke: Sides<Option<FixedStroke>>, + top_edge: TopEdge, + bottom_edge: BottomEdge, + radius: Corners<Rel<Abs>>, + }, +} diff --git a/crates/typst-library/src/text/font/book.rs b/crates/typst-library/src/text/font/book.rs new file mode 100644 index 00000000..23e27f64 --- /dev/null +++ b/crates/typst-library/src/text/font/book.rs @@ -0,0 +1,546 @@ +use std::cmp::Reverse; +use std::collections::BTreeMap; +use std::fmt::{self, Debug, Formatter}; + +use serde::{Deserialize, Serialize}; +use ttf_parser::{name_id, PlatformId, Tag}; +use unicode_segmentation::UnicodeSegmentation; + +use super::exceptions::find_exception; +use crate::text::{Font, FontStretch, FontStyle, FontVariant, FontWeight}; + +/// Metadata about a collection of fonts. +#[derive(Debug, Default, Clone, Hash)] +pub struct FontBook { + /// Maps from lowercased family names to font indices. + families: BTreeMap<String, Vec<usize>>, + /// Metadata about each font in the collection. + infos: Vec<FontInfo>, +} + +impl FontBook { + /// Create a new, empty font book. + pub fn new() -> Self { + Self { families: BTreeMap::new(), infos: vec![] } + } + + /// Create a font book from a collection of font infos. + pub fn from_infos(infos: impl IntoIterator<Item = FontInfo>) -> Self { + let mut book = Self::new(); + for info in infos { + book.push(info); + } + book + } + + /// Create a font book for a collection of fonts. + pub fn from_fonts<'a>(fonts: impl IntoIterator<Item = &'a Font>) -> Self { + Self::from_infos(fonts.into_iter().map(|font| font.info().clone())) + } + + /// Insert metadata into the font book. + pub fn push(&mut self, info: FontInfo) { + let index = self.infos.len(); + let family = info.family.to_lowercase(); + self.families.entry(family).or_default().push(index); + self.infos.push(info); + } + + /// Get the font info for the given index. + pub fn info(&self, index: usize) -> Option<&FontInfo> { + self.infos.get(index) + } + + /// Returns true if the book contains a font family with the given name. + pub fn contains_family(&self, family: &str) -> bool { + self.families.contains_key(family) + } + + /// An ordered iterator over all font families this book knows and details + /// about the fonts that are part of them. + pub fn families( + &self, + ) -> impl Iterator<Item = (&str, impl Iterator<Item = &FontInfo>)> + '_ { + // Since the keys are lowercased, we instead use the family field of the + // first face's info. + self.families.values().map(|ids| { + let family = self.infos[ids[0]].family.as_str(); + let infos = ids.iter().map(|&id| &self.infos[id]); + (family, infos) + }) + } + + /// Try to find a font from the given `family` that matches the given + /// `variant` as closely as possible. + /// + /// The `family` should be all lowercase. + pub fn select(&self, family: &str, variant: FontVariant) -> Option<usize> { + let ids = self.families.get(family)?; + self.find_best_variant(None, variant, ids.iter().copied()) + } + + /// Iterate over all variants of a family. + pub fn select_family(&self, family: &str) -> impl Iterator<Item = usize> + '_ { + self.families + .get(family) + .map(|vec| vec.as_slice()) + .unwrap_or_default() + .iter() + .copied() + } + + /// Try to find and load a fallback font that + /// - is as close as possible to the font `like` (if any) + /// - is as close as possible to the given `variant` + /// - is suitable for shaping the given `text` + pub fn select_fallback( + &self, + like: Option<&FontInfo>, + variant: FontVariant, + text: &str, + ) -> Option<usize> { + // Find the fonts that contain the text's first non-space char ... + let c = text.chars().find(|c| !c.is_whitespace())?; + let ids = self + .infos + .iter() + .enumerate() + .filter(|(_, info)| info.coverage.contains(c as u32)) + .map(|(index, _)| index); + + // ... and find the best variant among them. + self.find_best_variant(like, variant, ids) + } + + /// Find the font in the passed iterator that + /// - is closest to the font `like` (if any) + /// - is closest to the given `variant` + /// + /// To do that we compute a key for all variants and select the one with the + /// minimal key. This key prioritizes: + /// - If `like` is some other font: + /// - Are both fonts (not) monospaced? + /// - Do both fonts (not) have serifs? + /// - How many words do the families share in their prefix? E.g. "Noto + /// Sans" and "Noto Sans Arabic" share two words, whereas "IBM Plex + /// Arabic" shares none with "Noto Sans", so prefer "Noto Sans Arabic" + /// if `like` is "Noto Sans". In case there are two equally good + /// matches, we prefer the shorter one because it is less special (e.g. + /// if `like` is "Noto Sans Arabic", we prefer "Noto Sans" over "Noto + /// Sans CJK HK".) + /// - The style (normal / italic / oblique). If we want italic or oblique + /// but it doesn't exist, the other one of the two is still better than + /// normal. + /// - The absolute distance to the target stretch. + /// - The absolute distance to the target weight. + fn find_best_variant( + &self, + like: Option<&FontInfo>, + variant: FontVariant, + ids: impl IntoIterator<Item = usize>, + ) -> Option<usize> { + let mut best = None; + let mut best_key = None; + + for id in ids { + let current = &self.infos[id]; + let key = ( + like.map(|like| { + ( + current.flags.contains(FontFlags::MONOSPACE) + != like.flags.contains(FontFlags::MONOSPACE), + current.flags.contains(FontFlags::SERIF) + != like.flags.contains(FontFlags::SERIF), + Reverse(shared_prefix_words(¤t.family, &like.family)), + current.family.len(), + ) + }), + current.variant.style.distance(variant.style), + current.variant.stretch.distance(variant.stretch), + current.variant.weight.distance(variant.weight), + ); + + if best_key.map_or(true, |b| key < b) { + best = Some(id); + best_key = Some(key); + } + } + + best + } +} + +/// Properties of a single font. +#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)] +pub struct FontInfo { + /// The typographic font family this font is part of. + pub family: String, + /// Properties that distinguish this font from other fonts in the same + /// family. + pub variant: FontVariant, + /// Properties of the font. + pub flags: FontFlags, + /// The unicode coverage of the font. + pub coverage: Coverage, +} + +bitflags::bitflags! { + /// Bitflags describing characteristics of a font. + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + #[derive(Serialize, Deserialize)] + #[serde(transparent)] + pub struct FontFlags: u32 { + /// All glyphs have the same width. + const MONOSPACE = 1 << 0; + /// Glyphs have short strokes at their stems. + const SERIF = 1 << 1; + } +} + +impl FontInfo { + /// Compute metadata for font at the `index` of the given data. + pub fn new(data: &[u8], index: u32) -> Option<Self> { + let ttf = ttf_parser::Face::parse(data, index).ok()?; + Self::from_ttf(&ttf) + } + + /// Compute metadata for all fonts in the given data. + pub fn iter(data: &[u8]) -> impl Iterator<Item = FontInfo> + '_ { + let count = ttf_parser::fonts_in_collection(data).unwrap_or(1); + (0..count).filter_map(move |index| Self::new(data, index)) + } + + /// Compute metadata for a single ttf-parser face. + pub(super) fn from_ttf(ttf: &ttf_parser::Face) -> Option<Self> { + let ps_name = find_name(ttf, name_id::POST_SCRIPT_NAME); + let exception = ps_name.as_deref().and_then(find_exception); + // We cannot use Name ID 16 "Typographic Family", because for some + // fonts it groups together more than just Style / Weight / Stretch + // variants (e.g. Display variants of Noto fonts) and then some + // variants become inaccessible from Typst. And even though the + // fsSelection bit WWS should help us decide whether that is the + // case, it's wrong for some fonts (e.g. for certain variants of "Noto + // Sans Display"). + // + // So, instead we use Name ID 1 "Family" and trim many common + // suffixes for which know that they just describe styling (e.g. + // "ExtraBold"). + let family = + exception.and_then(|c| c.family.map(str::to_string)).or_else(|| { + let family = find_name(ttf, name_id::FAMILY)?; + Some(typographic_family(&family).to_string()) + })?; + + let variant = { + let style = exception.and_then(|c| c.style).unwrap_or_else(|| { + let mut full = find_name(ttf, name_id::FULL_NAME).unwrap_or_default(); + full.make_ascii_lowercase(); + + // Some fonts miss the relevant bits for italic or oblique, so + // we also try to infer that from the full name. + let italic = ttf.is_italic() || full.contains("italic"); + let oblique = ttf.is_oblique() + || full.contains("oblique") + || full.contains("slanted"); + + match (italic, oblique) { + (false, false) => FontStyle::Normal, + (true, _) => FontStyle::Italic, + (_, true) => FontStyle::Oblique, + } + }); + + let weight = exception.and_then(|c| c.weight).unwrap_or_else(|| { + let number = ttf.weight().to_number(); + FontWeight::from_number(number) + }); + + let stretch = exception + .and_then(|c| c.stretch) + .unwrap_or_else(|| FontStretch::from_number(ttf.width().to_number())); + + FontVariant { style, weight, stretch } + }; + + // Determine the unicode coverage. + let mut codepoints = vec![]; + for subtable in ttf.tables().cmap.into_iter().flat_map(|table| table.subtables) { + if subtable.is_unicode() { + subtable.codepoints(|c| codepoints.push(c)); + } + } + + let mut flags = FontFlags::empty(); + flags.set(FontFlags::MONOSPACE, ttf.is_monospaced()); + + // Determine whether this is a serif or sans-serif font. + if let Some(panose) = ttf + .raw_face() + .table(Tag::from_bytes(b"OS/2")) + .and_then(|os2| os2.get(32..45)) + { + if matches!(panose, [2, 2..=10, ..]) { + flags.insert(FontFlags::SERIF); + } + } + + Some(FontInfo { + family, + variant, + flags, + coverage: Coverage::from_vec(codepoints), + }) + } + + /// Whether this is the macOS LastResort font. It can yield tofus with + /// glyph ID != 0. + pub fn is_last_resort(&self) -> bool { + self.family == "LastResort" + } +} + +/// Try to find and decode the name with the given id. +pub(super) fn find_name(ttf: &ttf_parser::Face, name_id: u16) -> Option<String> { + ttf.names().into_iter().find_map(|entry| { + if entry.name_id == name_id { + if let Some(string) = entry.to_string() { + return Some(string); + } + + if entry.platform_id == PlatformId::Macintosh && entry.encoding_id == 0 { + return Some(decode_mac_roman(entry.name)); + } + } + + None + }) +} + +/// Decode mac roman encoded bytes into a string. +fn decode_mac_roman(coded: &[u8]) -> String { + #[rustfmt::skip] + const TABLE: [char; 128] = [ + 'Ä', 'Å', 'Ç', 'É', 'Ñ', 'Ö', 'Ü', 'á', 'à', 'â', 'ä', 'ã', 'å', 'ç', 'é', 'è', + 'ê', 'ë', 'í', 'ì', 'î', 'ï', 'ñ', 'ó', 'ò', 'ô', 'ö', 'õ', 'ú', 'ù', 'û', 'ü', + '†', '°', '¢', '£', '§', '•', '¶', 'ß', '®', '©', '™', '´', '¨', '≠', 'Æ', 'Ø', + '∞', '±', '≤', '≥', '¥', 'µ', '∂', '∑', '∏', 'π', '∫', 'ª', 'º', 'Ω', 'æ', 'ø', + '¿', '¡', '¬', '√', 'ƒ', '≈', '∆', '«', '»', '…', '\u{a0}', 'À', 'Ã', 'Õ', 'Œ', 'œ', + '–', '—', '“', '”', '‘', '’', '÷', '◊', 'ÿ', 'Ÿ', '⁄', '€', '‹', '›', 'fi', 'fl', + '‡', '·', '‚', '„', '‰', 'Â', 'Ê', 'Á', 'Ë', 'È', 'Í', 'Î', 'Ï', 'Ì', 'Ó', 'Ô', + '\u{f8ff}', 'Ò', 'Ú', 'Û', 'Ù', 'ı', 'ˆ', '˜', '¯', '˘', '˙', '˚', '¸', '˝', '˛', 'ˇ', + ]; + + fn char_from_mac_roman(code: u8) -> char { + if code < 128 { + code as char + } else { + TABLE[(code - 128) as usize] + } + } + + coded.iter().copied().map(char_from_mac_roman).collect() +} + +/// Trim style naming from a family name and fix bad names. +fn typographic_family(mut family: &str) -> &str { + // Separators between names, modifiers and styles. + const SEPARATORS: [char; 3] = [' ', '-', '_']; + + // Modifiers that can appear in combination with suffixes. + const MODIFIERS: &[&str] = + &["extra", "ext", "ex", "x", "semi", "sem", "sm", "demi", "dem", "ultra"]; + + // Style suffixes. + #[rustfmt::skip] + const SUFFIXES: &[&str] = &[ + "normal", "italic", "oblique", "slanted", + "thin", "th", "hairline", "light", "lt", "regular", "medium", "med", + "md", "bold", "bd", "demi", "extb", "black", "blk", "bk", "heavy", + "narrow", "condensed", "cond", "cn", "cd", "compressed", "expanded", "exp" + ]; + + // Trim spacing and weird leading dots in Apple fonts. + family = family.trim().trim_start_matches('.'); + + // Lowercase the string so that the suffixes match case-insensitively. + let lower = family.to_ascii_lowercase(); + let mut len = usize::MAX; + let mut trimmed = lower.as_str(); + + // Trim style suffixes repeatedly. + while trimmed.len() < len { + len = trimmed.len(); + + // Find style suffix. + let mut t = trimmed; + let mut shortened = false; + while let Some(s) = SUFFIXES.iter().find_map(|s| t.strip_suffix(s)) { + shortened = true; + t = s; + } + + if !shortened { + break; + } + + // Strip optional separator. + if let Some(s) = t.strip_suffix(SEPARATORS) { + trimmed = s; + t = s; + } + + // Also allow an extra modifier, but apply it only if it is separated it + // from the text before it (to prevent false positives). + if let Some(t) = MODIFIERS.iter().find_map(|s| t.strip_suffix(s)) { + if let Some(stripped) = t.strip_suffix(SEPARATORS) { + trimmed = stripped; + } + } + } + + // Apply style suffix trimming. + family = &family[..len]; + + family +} + +/// How many words the two strings share in their prefix. +fn shared_prefix_words(left: &str, right: &str) -> usize { + left.unicode_words() + .zip(right.unicode_words()) + .take_while(|(l, r)| l == r) + .count() +} + +/// A compactly encoded set of codepoints. +/// +/// The set is represented by alternating specifications of how many codepoints +/// are not in the set and how many are in the set. +/// +/// For example, for the set `{2, 3, 4, 9, 10, 11, 15, 18, 19}`, there are: +/// - 2 codepoints not inside (0, 1) +/// - 3 codepoints inside (2, 3, 4) +/// - 4 codepoints not inside (5, 6, 7, 8) +/// - 3 codepoints inside (9, 10, 11) +/// - 3 codepoints not inside (12, 13, 14) +/// - 1 codepoint inside (15) +/// - 2 codepoints not inside (16, 17) +/// - 2 codepoints inside (18, 19) +/// +/// So the resulting encoding is `[2, 3, 4, 3, 3, 1, 2, 2]`. +#[derive(Clone, Eq, PartialEq, Hash, Serialize, Deserialize)] +#[serde(transparent)] +pub struct Coverage(Vec<u32>); + +impl Coverage { + /// Encode a vector of codepoints. + pub fn from_vec(mut codepoints: Vec<u32>) -> Self { + codepoints.sort(); + codepoints.dedup(); + + let mut runs = Vec::new(); + let mut next = 0; + + for c in codepoints { + if let Some(run) = runs.last_mut().filter(|_| c == next) { + *run += 1; + } else { + runs.push(c - next); + runs.push(1); + } + + next = c + 1; + } + + Self(runs) + } + + /// Whether the codepoint is covered. + pub fn contains(&self, c: u32) -> bool { + let mut inside = false; + let mut cursor = 0; + + for &run in &self.0 { + if (cursor..cursor + run).contains(&c) { + return inside; + } + cursor += run; + inside = !inside; + } + + false + } + + /// Iterate over all covered codepoints. + pub fn iter(&self) -> impl Iterator<Item = u32> + '_ { + let mut inside = false; + let mut cursor = 0; + self.0.iter().flat_map(move |run| { + let range = if inside { cursor..cursor + run } else { 0..0 }; + inside = !inside; + cursor += run; + range + }) + } +} + +impl Debug for Coverage { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + f.pad("Coverage(..)") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_trim_styles() { + assert_eq!(typographic_family("Atma Light"), "Atma"); + assert_eq!(typographic_family("eras bold"), "eras"); + assert_eq!(typographic_family("footlight mt light"), "footlight mt"); + assert_eq!(typographic_family("times new roman"), "times new roman"); + assert_eq!(typographic_family("noto sans mono cond sembd"), "noto sans mono"); + assert_eq!(typographic_family("noto serif SEMCOND sembd"), "noto serif"); + assert_eq!(typographic_family("crimson text"), "crimson text"); + assert_eq!(typographic_family("footlight light"), "footlight"); + assert_eq!(typographic_family("Noto Sans"), "Noto Sans"); + assert_eq!(typographic_family("Noto Sans Light"), "Noto Sans"); + assert_eq!(typographic_family("Noto Sans Semicondensed Heavy"), "Noto Sans"); + assert_eq!(typographic_family("Familx"), "Familx"); + assert_eq!(typographic_family("Font Ultra"), "Font Ultra"); + assert_eq!(typographic_family("Font Ultra Bold"), "Font"); + } + + #[test] + fn test_coverage() { + #[track_caller] + fn test(set: &[u32], runs: &[u32]) { + let coverage = Coverage::from_vec(set.to_vec()); + assert_eq!(coverage.0, runs); + + let max = 5 + set.iter().copied().max().unwrap_or_default(); + for c in 0..max { + assert_eq!(set.contains(&c), coverage.contains(c)); + } + } + + test(&[], &[]); + test(&[0], &[0, 1]); + test(&[1], &[1, 1]); + test(&[0, 1], &[0, 2]); + test(&[0, 1, 3], &[0, 2, 1, 1]); + test( + // {2, 3, 4, 9, 10, 11, 15, 18, 19} + &[18, 19, 2, 4, 9, 11, 15, 3, 3, 10], + &[2, 3, 4, 3, 3, 1, 2, 2], + ) + } + + #[test] + fn test_coverage_iter() { + let codepoints = vec![2, 3, 7, 8, 9, 14, 15, 19, 21]; + let coverage = Coverage::from_vec(codepoints.clone()); + assert_eq!(coverage.iter().collect::<Vec<_>>(), codepoints); + } +} diff --git a/crates/typst-library/src/text/font/color.rs b/crates/typst-library/src/text/font/color.rs new file mode 100644 index 00000000..08f6fe0a --- /dev/null +++ b/crates/typst-library/src/text/font/color.rs @@ -0,0 +1,627 @@ +//! Utilities for color font handling + +use std::io::Read; + +use ttf_parser::{GlyphId, RgbaColor}; +use typst_syntax::Span; +use usvg::tiny_skia_path; +use xmlwriter::XmlWriter; + +use crate::layout::{Abs, Frame, FrameItem, Point, Size}; +use crate::text::{Font, Glyph}; +use crate::visualize::{FixedStroke, Geometry, Image, RasterFormat, VectorFormat}; + +/// Whether this glyph should be rendered via simple outlining instead of via +/// `glyph_frame`. +pub fn should_outline(font: &Font, glyph: &Glyph) -> bool { + let ttf = font.ttf(); + let glyph_id = GlyphId(glyph.id); + (ttf.tables().glyf.is_some() || ttf.tables().cff.is_some()) + && !ttf + .glyph_raster_image(glyph_id, u16::MAX) + .is_some_and(|img| img.format == ttf_parser::RasterImageFormat::PNG) + && !ttf.is_color_glyph(glyph_id) + && ttf.glyph_svg_image(glyph_id).is_none() +} + +/// Returns a frame representing a glyph and whether it is a fallback tofu +/// frame. +/// +/// Should only be called on glyphs for which [`should_outline`] returns false. +/// +/// The glyphs are sized in font units, [`text.item.size`] is not taken into +/// account. +#[comemo::memoize] +pub fn glyph_frame(font: &Font, glyph_id: u16) -> (Frame, bool) { + let upem = Abs::pt(font.units_per_em()); + let glyph_id = GlyphId(glyph_id); + + let mut frame = Frame::soft(Size::splat(upem)); + let mut tofu = false; + + if draw_glyph(&mut frame, font, upem, glyph_id).is_none() + && font.ttf().glyph_index(' ') != Some(glyph_id) + { + // Generate a fallback tofu if the glyph couldn't be drawn, unless it is + // the space glyph. Then, an empty frame does the job. (This happens for + // some rare CBDT fonts, which don't define a bitmap for the space, but + // also don't have a glyf or CFF table.) + draw_fallback_tofu(&mut frame, font, upem, glyph_id); + tofu = true; + } + + (frame, tofu) +} + +/// Tries to draw a glyph. +fn draw_glyph( + frame: &mut Frame, + font: &Font, + upem: Abs, + glyph_id: GlyphId, +) -> Option<()> { + let ttf = font.ttf(); + if let Some(raster_image) = ttf + .glyph_raster_image(glyph_id, u16::MAX) + .filter(|img| img.format == ttf_parser::RasterImageFormat::PNG) + { + draw_raster_glyph(frame, font, upem, raster_image) + } else if ttf.is_color_glyph(glyph_id) { + draw_colr_glyph(frame, font, upem, glyph_id) + } else if ttf.glyph_svg_image(glyph_id).is_some() { + draw_svg_glyph(frame, font, upem, glyph_id) + } else { + None + } +} + +/// Draws a fallback tofu box with the advance width of the glyph. +fn draw_fallback_tofu(frame: &mut Frame, font: &Font, upem: Abs, glyph_id: GlyphId) { + let advance = font + .ttf() + .glyph_hor_advance(glyph_id) + .map(|advance| Abs::pt(advance as f64)) + .unwrap_or(upem / 3.0); + let inset = 0.15 * advance; + let height = 0.7 * upem; + let pos = Point::new(inset, upem - height); + let size = Size::new(advance - inset * 2.0, height); + let thickness = upem / 20.0; + let stroke = FixedStroke { thickness, ..Default::default() }; + let shape = Geometry::Rect(size).stroked(stroke); + frame.push(pos, FrameItem::Shape(shape, Span::detached())); +} + +/// Draws a raster glyph in a frame. +/// +/// Supports only PNG images. +fn draw_raster_glyph( + frame: &mut Frame, + font: &Font, + upem: Abs, + raster_image: ttf_parser::RasterGlyphImage, +) -> Option<()> { + let image = + Image::new(raster_image.data.into(), RasterFormat::Png.into(), None).ok()?; + + // Apple Color emoji doesn't provide offset information (or at least + // not in a way ttf-parser understands), so we artificially shift their + // baseline to make it look good. + let y_offset = if font.info().family.to_lowercase() == "apple color emoji" { + 20.0 + } else { + -(raster_image.y as f64) + }; + + let position = Point::new( + upem * raster_image.x as f64 / raster_image.pixels_per_em as f64, + upem * y_offset / raster_image.pixels_per_em as f64, + ); + let aspect_ratio = image.width() / image.height(); + let size = Size::new(upem, upem * aspect_ratio); + frame.push(position, FrameItem::Image(image, size, Span::detached())); + + Some(()) +} + +/// Draws a glyph from the COLR table into the frame. +fn draw_colr_glyph( + frame: &mut Frame, + font: &Font, + upem: Abs, + glyph_id: GlyphId, +) -> Option<()> { + let mut svg = XmlWriter::new(xmlwriter::Options::default()); + + let ttf = font.ttf(); + let width = ttf.global_bounding_box().width() as f64; + let height = ttf.global_bounding_box().height() as f64; + let x_min = ttf.global_bounding_box().x_min as f64; + let y_max = ttf.global_bounding_box().y_max as f64; + let tx = -x_min; + let ty = -y_max; + + svg.start_element("svg"); + svg.write_attribute("xmlns", "http://www.w3.org/2000/svg"); + svg.write_attribute("xmlns:xlink", "http://www.w3.org/1999/xlink"); + svg.write_attribute("width", &width); + svg.write_attribute("height", &height); + svg.write_attribute_fmt("viewBox", format_args!("0 0 {width} {height}")); + + let mut path_buf = String::with_capacity(256); + let gradient_index = 1; + let clip_path_index = 1; + + svg.start_element("g"); + svg.write_attribute_fmt( + "transform", + format_args!("matrix(1 0 0 -1 0 0) matrix(1 0 0 1 {tx} {ty})"), + ); + + let mut glyph_painter = GlyphPainter { + face: ttf, + svg: &mut svg, + path_buf: &mut path_buf, + gradient_index, + clip_path_index, + palette_index: 0, + transform: ttf_parser::Transform::default(), + outline_transform: ttf_parser::Transform::default(), + transforms_stack: vec![ttf_parser::Transform::default()], + }; + + ttf.paint_color_glyph(glyph_id, 0, RgbaColor::new(0, 0, 0, 255), &mut glyph_painter)?; + svg.end_element(); + + let data = svg.end_document().into_bytes(); + + let image = Image::new(data.into(), VectorFormat::Svg.into(), None).ok()?; + + let y_shift = Abs::pt(upem.to_pt() - y_max); + let position = Point::new(Abs::pt(x_min), y_shift); + let size = Size::new(Abs::pt(width), Abs::pt(height)); + frame.push(position, FrameItem::Image(image, size, Span::detached())); + + Some(()) +} + +/// Draws an SVG glyph in a frame. +fn draw_svg_glyph( + frame: &mut Frame, + font: &Font, + upem: Abs, + glyph_id: GlyphId, +) -> Option<()> { + // TODO: Our current conversion of the SVG table works for Twitter Color Emoji, + // but might not work for others. See also: https://github.com/RazrFalcon/resvg/pull/776 + let mut data = font.ttf().glyph_svg_image(glyph_id)?.data; + + // Decompress SVGZ. + let mut decoded = vec![]; + if data.starts_with(&[0x1f, 0x8b]) { + let mut decoder = flate2::read::GzDecoder::new(data); + decoder.read_to_end(&mut decoded).ok()?; + data = &decoded; + } + + // Parse XML. + let xml = std::str::from_utf8(data).ok()?; + let document = roxmltree::Document::parse(xml).ok()?; + + // Parse SVG. + let opts = usvg::Options::default(); + let tree = usvg::Tree::from_xmltree(&document, &opts).ok()?; + + let bbox = tree.root().bounding_box(); + let width = bbox.width() as f64; + let height = bbox.height() as f64; + let left = bbox.left() as f64; + let top = bbox.top() as f64; + + let mut data = tree.to_string(&usvg::WriteOptions::default()); + + // The SVG coordinates and the font coordinates are not the same: the Y axis + // is mirrored. But the origin of the axes are the same (which means that + // the horizontal axis in the SVG document corresponds to the baseline). See + // the reference for more details: + // https://learn.microsoft.com/en-us/typography/opentype/spec/svg#coordinate-systems-and-glyph-metrics + // + // If we used the SVG document as it is, svg2pdf would produce a cropped + // glyph (only what is under the baseline would be visible). So we need to + // embed the original SVG in another one that has the exact dimensions of + // the glyph, with a transform to make it fit. We also need to remove the + // viewBox, height and width attributes from the inner SVG, otherwise usvg + // takes into account these values to clip the embedded SVG. + make_svg_unsized(&mut data); + let wrapper_svg = format!( + r#" + <svg + width="{width}" + height="{height}" + viewBox="0 0 {width} {height}" + xmlns="http://www.w3.org/2000/svg"> + <g transform="matrix(1 0 0 1 {tx} {ty})"> + {inner} + </g> + </svg> + "#, + inner = data, + tx = -left, + ty = -top, + ); + + let image = + Image::new(wrapper_svg.into_bytes().into(), VectorFormat::Svg.into(), None) + .ok()?; + + let position = Point::new(Abs::pt(left), Abs::pt(top) + upem); + let size = Size::new(Abs::pt(width), Abs::pt(height)); + frame.push(position, FrameItem::Image(image, size, Span::detached())); + + Some(()) +} + +/// Remove all size specifications (viewBox, width and height attributes) from a +/// SVG document. +fn make_svg_unsized(svg: &mut String) { + let mut viewbox_range = None; + let mut width_range = None; + let mut height_range = None; + + let mut s = unscanny::Scanner::new(svg); + + s.eat_until("<svg"); + s.eat_if("<svg"); + while !s.eat_if('>') && !s.done() { + s.eat_whitespace(); + let start = s.cursor(); + let attr_name = s.eat_until('=').trim(); + // Eat the equal sign and the quote. + s.eat(); + s.eat(); + let mut escaped = false; + while (escaped || !s.eat_if('"')) && !s.done() { + escaped = s.eat() == Some('\\'); + } + match attr_name { + "viewBox" => viewbox_range = Some(start..s.cursor()), + "width" => width_range = Some(start..s.cursor()), + "height" => height_range = Some(start..s.cursor()), + _ => {} + } + } + + // Remove the `viewBox` attribute. + if let Some(range) = viewbox_range { + svg.replace_range(range.clone(), &" ".repeat(range.len())); + } + + // Remove the `width` attribute. + if let Some(range) = width_range { + svg.replace_range(range.clone(), &" ".repeat(range.len())); + } + + // Remove the `height` attribute. + if let Some(range) = height_range { + svg.replace_range(range, ""); + } +} + +struct ColrBuilder<'a>(&'a mut String); + +impl ColrBuilder<'_> { + fn finish(&mut self) { + if !self.0.is_empty() { + self.0.pop(); // remove trailing space + } + } +} + +impl ttf_parser::OutlineBuilder for ColrBuilder<'_> { + fn move_to(&mut self, x: f32, y: f32) { + use std::fmt::Write; + write!(self.0, "M {x} {y} ").unwrap() + } + + fn line_to(&mut self, x: f32, y: f32) { + use std::fmt::Write; + write!(self.0, "L {x} {y} ").unwrap() + } + + fn quad_to(&mut self, x1: f32, y1: f32, x: f32, y: f32) { + use std::fmt::Write; + write!(self.0, "Q {x1} {y1} {x} {y} ").unwrap() + } + + fn curve_to(&mut self, x1: f32, y1: f32, x2: f32, y2: f32, x: f32, y: f32) { + use std::fmt::Write; + write!(self.0, "C {x1} {y1} {x2} {y2} {x} {y} ").unwrap() + } + + fn close(&mut self) { + self.0.push_str("Z ") + } +} + +// NOTE: This is only a best-effort translation of COLR into SVG. It's not feature-complete +// and it's also not possible to make it feature-complete using just raw SVG features. +pub(crate) struct GlyphPainter<'a> { + pub(crate) face: &'a ttf_parser::Face<'a>, + pub(crate) svg: &'a mut xmlwriter::XmlWriter, + pub(crate) path_buf: &'a mut String, + pub(crate) gradient_index: usize, + pub(crate) clip_path_index: usize, + pub(crate) palette_index: u16, + pub(crate) transform: ttf_parser::Transform, + pub(crate) outline_transform: ttf_parser::Transform, + pub(crate) transforms_stack: Vec<ttf_parser::Transform>, +} + +impl<'a> GlyphPainter<'a> { + fn write_gradient_stops(&mut self, stops: ttf_parser::colr::GradientStopsIter) { + for stop in stops { + self.svg.start_element("stop"); + self.svg.write_attribute("offset", &stop.stop_offset); + self.write_color_attribute("stop-color", stop.color); + let opacity = f32::from(stop.color.alpha) / 255.0; + self.svg.write_attribute("stop-opacity", &opacity); + self.svg.end_element(); + } + } + + fn write_color_attribute(&mut self, name: &str, color: ttf_parser::RgbaColor) { + self.svg.write_attribute_fmt( + name, + format_args!("rgb({}, {}, {})", color.red, color.green, color.blue), + ); + } + + fn write_transform_attribute(&mut self, name: &str, ts: ttf_parser::Transform) { + if ts.is_default() { + return; + } + + self.svg.write_attribute_fmt( + name, + format_args!("matrix({} {} {} {} {} {})", ts.a, ts.b, ts.c, ts.d, ts.e, ts.f), + ); + } + + fn write_spread_method_attribute( + &mut self, + extend: ttf_parser::colr::GradientExtend, + ) { + self.svg.write_attribute( + "spreadMethod", + match extend { + ttf_parser::colr::GradientExtend::Pad => &"pad", + ttf_parser::colr::GradientExtend::Repeat => &"repeat", + ttf_parser::colr::GradientExtend::Reflect => &"reflect", + }, + ); + } + + fn paint_solid(&mut self, color: ttf_parser::RgbaColor) { + self.svg.start_element("path"); + self.write_color_attribute("fill", color); + let opacity = f32::from(color.alpha) / 255.0; + self.svg.write_attribute("fill-opacity", &opacity); + self.write_transform_attribute("transform", self.outline_transform); + self.svg.write_attribute("d", self.path_buf); + self.svg.end_element(); + } + + fn paint_linear_gradient(&mut self, gradient: ttf_parser::colr::LinearGradient<'a>) { + let gradient_id = format!("lg{}", self.gradient_index); + self.gradient_index += 1; + + let gradient_transform = paint_transform(self.outline_transform, self.transform); + + // TODO: We ignore x2, y2. Have to apply them somehow. + // TODO: The way spreadMode works in ttf and svg is a bit different. In SVG, the spreadMode + // will always be applied based on x1/y1 and x2/y2. However, in TTF the spreadMode will + // be applied from the first/last stop. So if we have a gradient with x1=0 x2=1, and + // a stop at x=0.4 and x=0.6, then in SVG we will always see a padding, while in ttf + // we will see the actual spreadMode. We need to account for that somehow. + self.svg.start_element("linearGradient"); + self.svg.write_attribute("id", &gradient_id); + self.svg.write_attribute("x1", &gradient.x0); + self.svg.write_attribute("y1", &gradient.y0); + self.svg.write_attribute("x2", &gradient.x1); + self.svg.write_attribute("y2", &gradient.y1); + self.svg.write_attribute("gradientUnits", &"userSpaceOnUse"); + self.write_spread_method_attribute(gradient.extend); + self.write_transform_attribute("gradientTransform", gradient_transform); + self.write_gradient_stops( + gradient.stops(self.palette_index, self.face.variation_coordinates()), + ); + self.svg.end_element(); + + self.svg.start_element("path"); + self.svg + .write_attribute_fmt("fill", format_args!("url(#{gradient_id})")); + self.write_transform_attribute("transform", self.outline_transform); + self.svg.write_attribute("d", self.path_buf); + self.svg.end_element(); + } + + fn paint_radial_gradient(&mut self, gradient: ttf_parser::colr::RadialGradient<'a>) { + let gradient_id = format!("rg{}", self.gradient_index); + self.gradient_index += 1; + + let gradient_transform = paint_transform(self.outline_transform, self.transform); + + self.svg.start_element("radialGradient"); + self.svg.write_attribute("id", &gradient_id); + self.svg.write_attribute("cx", &gradient.x1); + self.svg.write_attribute("cy", &gradient.y1); + self.svg.write_attribute("r", &gradient.r1); + self.svg.write_attribute("fr", &gradient.r0); + self.svg.write_attribute("fx", &gradient.x0); + self.svg.write_attribute("fy", &gradient.y0); + self.svg.write_attribute("gradientUnits", &"userSpaceOnUse"); + self.write_spread_method_attribute(gradient.extend); + self.write_transform_attribute("gradientTransform", gradient_transform); + self.write_gradient_stops( + gradient.stops(self.palette_index, self.face.variation_coordinates()), + ); + self.svg.end_element(); + + self.svg.start_element("path"); + self.svg + .write_attribute_fmt("fill", format_args!("url(#{gradient_id})")); + self.write_transform_attribute("transform", self.outline_transform); + self.svg.write_attribute("d", self.path_buf); + self.svg.end_element(); + } + + fn paint_sweep_gradient(&mut self, _: ttf_parser::colr::SweepGradient<'a>) {} +} + +fn paint_transform( + outline_transform: ttf_parser::Transform, + transform: ttf_parser::Transform, +) -> ttf_parser::Transform { + let outline_transform = tiny_skia_path::Transform::from_row( + outline_transform.a, + outline_transform.b, + outline_transform.c, + outline_transform.d, + outline_transform.e, + outline_transform.f, + ); + + let gradient_transform = tiny_skia_path::Transform::from_row( + transform.a, + transform.b, + transform.c, + transform.d, + transform.e, + transform.f, + ); + + let gradient_transform = outline_transform + .invert() + // In theory, we should error out. But the transform shouldn't ever be uninvertible, so let's ignore it. + .unwrap_or_default() + .pre_concat(gradient_transform); + + ttf_parser::Transform { + a: gradient_transform.sx, + b: gradient_transform.ky, + c: gradient_transform.kx, + d: gradient_transform.sy, + e: gradient_transform.tx, + f: gradient_transform.ty, + } +} + +impl GlyphPainter<'_> { + fn clip_with_path(&mut self, path: &str) { + let clip_id = format!("cp{}", self.clip_path_index); + self.clip_path_index += 1; + + self.svg.start_element("clipPath"); + self.svg.write_attribute("id", &clip_id); + self.svg.start_element("path"); + self.write_transform_attribute("transform", self.outline_transform); + self.svg.write_attribute("d", &path); + self.svg.end_element(); + self.svg.end_element(); + + self.svg.start_element("g"); + self.svg + .write_attribute_fmt("clip-path", format_args!("url(#{clip_id})")); + } +} + +impl<'a> ttf_parser::colr::Painter<'a> for GlyphPainter<'a> { + fn outline_glyph(&mut self, glyph_id: ttf_parser::GlyphId) { + self.path_buf.clear(); + let mut builder = ColrBuilder(self.path_buf); + match self.face.outline_glyph(glyph_id, &mut builder) { + Some(v) => v, + None => return, + }; + builder.finish(); + + // We have to write outline using the current transform. + self.outline_transform = self.transform; + } + + fn push_layer(&mut self, mode: ttf_parser::colr::CompositeMode) { + self.svg.start_element("g"); + + use ttf_parser::colr::CompositeMode; + // TODO: Need to figure out how to represent the other blend modes + // in SVG. + let mode = match mode { + CompositeMode::SourceOver => "normal", + CompositeMode::Screen => "screen", + CompositeMode::Overlay => "overlay", + CompositeMode::Darken => "darken", + CompositeMode::Lighten => "lighten", + CompositeMode::ColorDodge => "color-dodge", + CompositeMode::ColorBurn => "color-burn", + CompositeMode::HardLight => "hard-light", + CompositeMode::SoftLight => "soft-light", + CompositeMode::Difference => "difference", + CompositeMode::Exclusion => "exclusion", + CompositeMode::Multiply => "multiply", + CompositeMode::Hue => "hue", + CompositeMode::Saturation => "saturation", + CompositeMode::Color => "color", + CompositeMode::Luminosity => "luminosity", + _ => "normal", + }; + self.svg.write_attribute_fmt( + "style", + format_args!("mix-blend-mode: {mode}; isolation: isolate"), + ); + } + + fn pop_layer(&mut self) { + self.svg.end_element(); // g + } + + fn push_transform(&mut self, transform: ttf_parser::Transform) { + self.transforms_stack.push(self.transform); + self.transform = ttf_parser::Transform::combine(self.transform, transform); + } + + fn paint(&mut self, paint: ttf_parser::colr::Paint<'a>) { + match paint { + ttf_parser::colr::Paint::Solid(color) => self.paint_solid(color), + ttf_parser::colr::Paint::LinearGradient(lg) => self.paint_linear_gradient(lg), + ttf_parser::colr::Paint::RadialGradient(rg) => self.paint_radial_gradient(rg), + ttf_parser::colr::Paint::SweepGradient(sg) => self.paint_sweep_gradient(sg), + } + } + + fn pop_transform(&mut self) { + if let Some(ts) = self.transforms_stack.pop() { + self.transform = ts + } + } + + fn push_clip(&mut self) { + self.clip_with_path(&self.path_buf.clone()); + } + + fn pop_clip(&mut self) { + self.svg.end_element(); + } + + fn push_clip_box(&mut self, clipbox: ttf_parser::colr::ClipBox) { + let x_min = clipbox.x_min; + let x_max = clipbox.x_max; + let y_min = clipbox.y_min; + let y_max = clipbox.y_max; + + let clip_path = format!( + "M {x_min} {y_min} L {x_max} {y_min} L {x_max} {y_max} L {x_min} {y_max} Z" + ); + + self.clip_with_path(&clip_path); + } +} diff --git a/crates/typst-library/src/text/font/exceptions.rs b/crates/typst-library/src/text/font/exceptions.rs new file mode 100644 index 00000000..6393df4b --- /dev/null +++ b/crates/typst-library/src/text/font/exceptions.rs @@ -0,0 +1,337 @@ +use serde::Deserialize; + +use super::{FontStretch, FontStyle, FontWeight}; + +pub fn find_exception(postscript_name: &str) -> Option<&'static Exception> { + EXCEPTION_MAP.get(postscript_name) +} + +#[derive(Debug, Default, Deserialize)] +pub struct Exception { + pub family: Option<&'static str>, + pub style: Option<FontStyle>, + pub weight: Option<FontWeight>, + pub stretch: Option<FontStretch>, +} + +impl Exception { + const fn new() -> Self { + Self { + family: None, + style: None, + weight: None, + stretch: None, + } + } + + const fn family(self, family: &'static str) -> Self { + Self { family: Some(family), ..self } + } + + const fn style(self, style: FontStyle) -> Self { + Self { style: Some(style), ..self } + } + + const fn weight(self, weight: u16) -> Self { + Self { weight: Some(FontWeight(weight)), ..self } + } + + #[allow(unused)] // left for future use + const fn stretch(self, stretch: u16) -> Self { + Self { stretch: Some(FontStretch(stretch)), ..self } + } +} + +/// A map which keys are PostScript name and values are override entries. +static EXCEPTION_MAP: phf::Map<&'static str, Exception> = phf::phf_map! { + // The old version of Arial-Black, published by Microsoft in 1996 in their + // "core fonts for the web" project, has a wrong weight of 400. + // See https://corefonts.sourceforge.net/. + "Arial-Black" => Exception::new() + .weight(900), + // Archivo Narrow is different from Archivo and Archivo Black. Since Archivo Black + // seems identical to Archivo weight 900, only differentiate between Archivo and + // Archivo Narrow. + "ArchivoNarrow-Regular" => Exception::new() + .family("Archivo Narrow"), + "ArchivoNarrow-Italic" => Exception::new() + .family("Archivo Narrow"), + "ArchivoNarrow-Bold" => Exception::new() + .family("Archivo Narrow"), + "ArchivoNarrow-BoldItalic" => Exception::new() + .family("Archivo Narrow"), + // Fandol fonts designed for Chinese typesetting. + // See https://ctan.org/tex-archive/fonts/fandol/. + "FandolHei-Bold" => Exception::new() + .weight(700), + "FandolSong-Bold" => Exception::new() + .weight(700), + // Noto fonts + "NotoNaskhArabicUISemi-Bold" => Exception::new() + .family("Noto Naskh Arabic UI") + .weight(600), + "NotoSansSoraSompengSemi-Bold" => Exception::new() + .family("Noto Sans Sora Sompeng") + .weight(600), + "NotoSans-DisplayBlackItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayCondensedBlackItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayCondensedBold" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayCondensedBoldItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayCondensedExtraBoldItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayCondensedExtraLightItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayCondensedItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayCondensedLightItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayCondensedMediumItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayCondensedSemiBoldItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayCondensedThinItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayExtraBoldItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayExtraCondensedBlackItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayExtraCondensedBold" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayExtraCondensedBoldItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayExtraCondensedExtraBoldItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayExtraCondensedExtraLightItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayExtraCondensedItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayExtraCondensedLightItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayExtraCondensedMediumItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayExtraCondensedSemiBoldItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayExtraCondensedThinItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayExtraLightItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayLightItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayMediumItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplaySemiBoldItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplaySemiCondensedBlackItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplaySemiCondensedBold" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplaySemiCondensedBoldItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplaySemiCondensedExtraBoldItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplaySemiCondensedExtraLightItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplaySemiCondensedItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplaySemiCondensedLightItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplaySemiCondensedMediumItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplaySemiCondensedSemiBoldItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplaySemiCondensedThinItalic" => Exception::new() + .family("Noto Sans Display"), + "NotoSans-DisplayThinItalic" => Exception::new() + .family("Noto Sans Display"), + // The following three postscript names are only used in the version 2.007 + // of the Noto Sans font. Other versions, while have different postscript + // name, happen to have correct metadata. + "NotoSerif-DisplayCondensedBold" => Exception::new() + .family("Noto Serif Display"), + "NotoSerif-DisplayExtraCondensedBold" => Exception::new() + .family("Noto Serif Display"), + "NotoSerif-DisplaySemiCondensedBold" => Exception::new() + .family("Noto Serif Display"), + // New Computer Modern + "NewCM08-Book" => Exception::new() + .family("New Computer Modern 08") + .weight(450), + "NewCM08-BookItalic" => Exception::new() + .family("New Computer Modern 08") + .weight(450), + "NewCM08-Italic" => Exception::new() + .family("New Computer Modern 08"), + "NewCM08-Regular" => Exception::new() + .family("New Computer Modern 08"), + "NewCM10-Bold" => Exception::new() + .family("New Computer Modern"), + "NewCM10-BoldItalic" => Exception::new() + .family("New Computer Modern"), + "NewCM10-Book" => Exception::new() + .family("New Computer Modern") + .weight(450), + "NewCM10-BookItalic" => Exception::new() + .family("New Computer Modern") + .weight(450), + "NewCM10-Italic" => Exception::new() + .family("New Computer Modern"), + "NewCM10-Regular" => Exception::new() + .family("New Computer Modern"), + "NewCMMath-Bold" => Exception::new() + .family("New Computer Modern Math") + .weight(700), + "NewCMMath-Book" => Exception::new() + .family("New Computer Modern Math") + .weight(450), + "NewCMMath-Regular" => Exception::new() + .family("New Computer Modern Math"), + "NewCMMono10-Bold" => Exception::new() + .family("New Computer Modern Mono"), + "NewCMMono10-BoldOblique" => Exception::new() + .family("New Computer Modern Mono"), + "NewCMMono10-Book" => Exception::new() + .family("New Computer Modern Mono") + .weight(450), + "NewCMMono10-BookItalic" => Exception::new() + .family("New Computer Modern Mono") + .weight(450), + "NewCMMono10-Italic" => Exception::new() + .family("New Computer Modern Mono"), + "NewCMMono10-Regular" => Exception::new() + .family("New Computer Modern Mono"), + "NewCMSans08-Book" => Exception::new() + .family("New Computer Modern Sans 08") + .weight(450), + "NewCMSans08-BookOblique" => Exception::new() + .family("New Computer Modern Sans 08") + .weight(450), + "NewCMSans08-Oblique" => Exception::new() + .family("New Computer Modern Sans 08"), + "NewCMSans08-Regular" => Exception::new() + .family("New Computer Modern Sans 08"), + "NewCMSans10-Bold" => Exception::new() + .family("New Computer Modern Sans"), + "NewCMSans10-BoldOblique" => Exception::new() + .family("New Computer Modern Sans"), + "NewCMSans10-Book" => Exception::new() + .family("New Computer Modern Sans") + .weight(450), + "NewCMSans10-BookOblique" => Exception::new() + .family("New Computer Modern Sans") + .weight(450) + .style(FontStyle::Oblique), + "NewCMSans10-Oblique" => Exception::new() + .family("New Computer Modern Sans") + .style(FontStyle::Oblique), + "NewCMSans10-Regular" => Exception::new() + .family("New Computer Modern Sans"), + "NewCMUncial08-Bold" => Exception::new() + .family("New Computer Modern Uncial 08"), + "NewCMUncial08-Book" => Exception::new() + .family("New Computer Modern Uncial 08") + .weight(450), + "NewCMUncial08-Regular" => Exception::new() + .family("New Computer Modern Uncial 08"), + "NewCMUncial10-Bold" => Exception::new() + .family("New Computer Modern Uncial"), + "NewCMUncial10-Book" => Exception::new() + .family("New Computer Modern Uncial") + .weight(450), + "NewCMUncial10-Regular" => Exception::new() + .family("New Computer Modern Uncial"), + // Latin Modern + "LMMono8-Regular" => Exception::new() + .family("Latin Modern Mono 8"), + "LMMono9-Regular" => Exception::new() + .family("Latin Modern Mono 9"), + "LMMono12-Regular" => Exception::new() + .family("Latin Modern Mono 12"), + "LMMonoLt10-BoldOblique" => Exception::new() + .style(FontStyle::Oblique), + "LMMonoLt10-Regular" => Exception::new() + .weight(300), + "LMMonoLt10-Oblique" => Exception::new() + .weight(300) + .style(FontStyle::Oblique), + "LMMonoLtCond10-Regular" => Exception::new() + .weight(300) + .stretch(666), + "LMMonoLtCond10-Oblique" => Exception::new() + .weight(300) + .style(FontStyle::Oblique) + .stretch(666), + "LMMonoPropLt10-Regular" => Exception::new() + .weight(300), + "LMMonoPropLt10-Oblique" => Exception::new() + .weight(300), + "LMRoman5-Regular" => Exception::new() + .family("Latin Modern Roman 5"), + "LMRoman6-Regular" => Exception::new() + .family("Latin Modern Roman 6"), + "LMRoman7-Regular" => Exception::new() + .family("Latin Modern Roman 7"), + "LMRoman8-Regular" => Exception::new() + .family("Latin Modern Roman 8"), + "LMRoman9-Regular" => Exception::new() + .family("Latin Modern Roman 9"), + "LMRoman12-Regular" => Exception::new() + .family("Latin Modern Roman 12"), + "LMRoman17-Regular" => Exception::new() + .family("Latin Modern Roman 17"), + "LMRoman7-Italic" => Exception::new() + .family("Latin Modern Roman 7"), + "LMRoman8-Italic" => Exception::new() + .family("Latin Modern Roman 8"), + "LMRoman9-Italic" => Exception::new() + .family("Latin Modern Roman 9"), + "LMRoman12-Italic" => Exception::new() + .family("Latin Modern Roman 12"), + "LMRoman5-Bold" => Exception::new() + .family("Latin Modern Roman 5"), + "LMRoman6-Bold" => Exception::new() + .family("Latin Modern Roman 6"), + "LMRoman7-Bold" => Exception::new() + .family("Latin Modern Roman 7"), + "LMRoman8-Bold" => Exception::new() + .family("Latin Modern Roman 8"), + "LMRoman9-Bold" => Exception::new() + .family("Latin Modern Roman 9"), + "LMRoman12-Bold" => Exception::new() + .family("Latin Modern Roman 12"), + "LMRomanSlant8-Regular" => Exception::new() + .family("Latin Modern Roman 8"), + "LMRomanSlant9-Regular" => Exception::new() + .family("Latin Modern Roman 9"), + "LMRomanSlant12-Regular" => Exception::new() + .family("Latin Modern Roman 12"), + "LMRomanSlant17-Regular" => Exception::new() + .family("Latin Modern Roman 17"), + "LMSans8-Regular" => Exception::new() + .family("Latin Modern Sans 8"), + "LMSans9-Regular" => Exception::new() + .family("Latin Modern Sans 9"), + "LMSans12-Regular" => Exception::new() + .family("Latin Modern Sans 12"), + "LMSans17-Regular" => Exception::new() + .family("Latin Modern Sans 17"), + "LMSans8-Oblique" => Exception::new() + .family("Latin Modern Sans 8"), + "LMSans9-Oblique" => Exception::new() + .family("Latin Modern Sans 9"), + "LMSans12-Oblique" => Exception::new() + .family("Latin Modern Sans 12"), + "LMSans17-Oblique" => Exception::new() + .family("Latin Modern Sans 17"), + // STKaiti is a set of Kai fonts. Their weight values need to be corrected + // according to their PostScript names. + "STKaitiSC-Regular" => Exception::new().weight(400), + "STKaitiTC-Regular" => Exception::new().weight(400), + "STKaitiSC-Bold" => Exception::new().weight(700), + "STKaitiTC-Bold" => Exception::new().weight(700), + "STKaitiSC-Black" => Exception::new().weight(900), + "STKaitiTC-Black" => Exception::new().weight(900), +}; diff --git a/crates/typst-library/src/text/font/mod.rs b/crates/typst-library/src/text/font/mod.rs new file mode 100644 index 00000000..09837312 --- /dev/null +++ b/crates/typst-library/src/text/font/mod.rs @@ -0,0 +1,306 @@ +//! Font handling. + +pub mod color; + +mod book; +mod exceptions; +mod variant; + +pub use self::book::{Coverage, FontBook, FontFlags, FontInfo}; +pub use self::variant::{FontStretch, FontStyle, FontVariant, FontWeight}; + +use std::cell::OnceCell; +use std::fmt::{self, Debug, Formatter}; +use std::hash::{Hash, Hasher}; +use std::sync::Arc; + +use ttf_parser::GlyphId; + +use self::book::find_name; +use crate::foundations::{Bytes, Cast}; +use crate::layout::{Abs, Em, Frame}; +use crate::text::{BottomEdge, TopEdge}; + +/// An OpenType font. +/// +/// Values of this type are cheap to clone and hash. +#[derive(Clone)] +pub struct Font(Arc<Repr>); + +/// The internal representation of a font. +struct Repr { + /// The raw font data, possibly shared with other fonts from the same + /// collection. The vector's allocation must not move, because `ttf` points + /// into it using unsafe code. + data: Bytes, + /// The font's index in the buffer. + index: u32, + /// Metadata about the font. + info: FontInfo, + /// The font's metrics. + metrics: FontMetrics, + /// The underlying ttf-parser face. + ttf: ttf_parser::Face<'static>, + /// The underlying rustybuzz face. + rusty: rustybuzz::Face<'static>, +} + +impl Font { + /// Parse a font from data and collection index. + pub fn new(data: Bytes, index: u32) -> Option<Self> { + // Safety: + // - The slices's location is stable in memory: + // - We don't move the underlying vector + // - Nobody else can move it since we have a strong ref to the `Arc`. + // - The internal 'static lifetime is not leaked because its rewritten + // to the self-lifetime in `ttf()`. + let slice: &'static [u8] = + unsafe { std::slice::from_raw_parts(data.as_ptr(), data.len()) }; + + let ttf = ttf_parser::Face::parse(slice, index).ok()?; + let rusty = rustybuzz::Face::from_slice(slice, index)?; + let metrics = FontMetrics::from_ttf(&ttf); + let info = FontInfo::from_ttf(&ttf)?; + + Some(Self(Arc::new(Repr { data, index, info, metrics, ttf, rusty }))) + } + + /// Parse all fonts in the given data. + pub fn iter(data: Bytes) -> impl Iterator<Item = Self> { + let count = ttf_parser::fonts_in_collection(&data).unwrap_or(1); + (0..count).filter_map(move |index| Self::new(data.clone(), index)) + } + + /// The underlying buffer. + pub fn data(&self) -> &Bytes { + &self.0.data + } + + /// The font's index in the buffer. + pub fn index(&self) -> u32 { + self.0.index + } + + /// The font's metadata. + pub fn info(&self) -> &FontInfo { + &self.0.info + } + + /// The font's metrics. + pub fn metrics(&self) -> &FontMetrics { + &self.0.metrics + } + + /// The number of font units per one em. + pub fn units_per_em(&self) -> f64 { + self.0.metrics.units_per_em + } + + /// Convert from font units to an em length. + pub fn to_em(&self, units: impl Into<f64>) -> Em { + Em::from_units(units, self.units_per_em()) + } + + /// Look up the horizontal advance width of a glyph. + pub fn advance(&self, glyph: u16) -> Option<Em> { + self.0 + .ttf + .glyph_hor_advance(GlyphId(glyph)) + .map(|units| self.to_em(units)) + } + + /// Lookup a name by id. + pub fn find_name(&self, id: u16) -> Option<String> { + find_name(&self.0.ttf, id) + } + + /// A reference to the underlying `ttf-parser` face. + pub fn ttf(&self) -> &ttf_parser::Face<'_> { + // We can't implement Deref because that would leak the + // internal 'static lifetime. + &self.0.ttf + } + + /// A reference to the underlying `rustybuzz` face. + pub fn rusty(&self) -> &rustybuzz::Face<'_> { + // We can't implement Deref because that would leak the + // internal 'static lifetime. + &self.0.rusty + } + + /// Resolve the top and bottom edges of text. + pub fn edges( + &self, + top_edge: TopEdge, + bottom_edge: BottomEdge, + font_size: Abs, + bounds: TextEdgeBounds, + ) -> (Abs, Abs) { + let cell = OnceCell::new(); + let bbox = |gid, f: fn(ttf_parser::Rect) -> i16| { + cell.get_or_init(|| self.ttf().glyph_bounding_box(GlyphId(gid))) + .map(|bbox| self.to_em(f(bbox)).at(font_size)) + .unwrap_or_default() + }; + + let top = match top_edge { + TopEdge::Metric(metric) => match metric.try_into() { + Ok(metric) => self.metrics().vertical(metric).at(font_size), + Err(_) => match bounds { + TextEdgeBounds::Zero => Abs::zero(), + TextEdgeBounds::Frame(frame) => frame.ascent(), + TextEdgeBounds::Glyph(gid) => bbox(gid, |b| b.y_max), + }, + }, + TopEdge::Length(length) => length.at(font_size), + }; + + let bottom = match bottom_edge { + BottomEdge::Metric(metric) => match metric.try_into() { + Ok(metric) => -self.metrics().vertical(metric).at(font_size), + Err(_) => match bounds { + TextEdgeBounds::Zero => Abs::zero(), + TextEdgeBounds::Frame(frame) => frame.descent(), + TextEdgeBounds::Glyph(gid) => -bbox(gid, |b| b.y_min), + }, + }, + BottomEdge::Length(length) => -length.at(font_size), + }; + + (top, bottom) + } +} + +impl Hash for Font { + fn hash<H: Hasher>(&self, state: &mut H) { + self.0.data.hash(state); + self.0.index.hash(state); + } +} + +impl Debug for Font { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "Font({}, {:?})", self.info().family, self.info().variant) + } +} + +impl Eq for Font {} + +impl PartialEq for Font { + fn eq(&self, other: &Self) -> bool { + self.0.data == other.0.data && self.0.index == other.0.index + } +} + +/// Metrics of a font. +#[derive(Debug, Copy, Clone)] +pub struct FontMetrics { + /// How many font units represent one em unit. + pub units_per_em: f64, + /// The distance from the baseline to the typographic ascender. + pub ascender: Em, + /// The approximate height of uppercase letters. + pub cap_height: Em, + /// The approximate height of non-ascending lowercase letters. + pub x_height: Em, + /// The distance from the baseline to the typographic descender. + pub descender: Em, + /// Recommended metrics for a strikethrough line. + pub strikethrough: LineMetrics, + /// Recommended metrics for an underline. + pub underline: LineMetrics, + /// Recommended metrics for an overline. + pub overline: LineMetrics, +} + +impl FontMetrics { + /// Extract the font's metrics. + pub fn from_ttf(ttf: &ttf_parser::Face) -> Self { + let units_per_em = f64::from(ttf.units_per_em()); + let to_em = |units| Em::from_units(units, units_per_em); + + let ascender = to_em(ttf.typographic_ascender().unwrap_or(ttf.ascender())); + let cap_height = ttf.capital_height().filter(|&h| h > 0).map_or(ascender, to_em); + let x_height = ttf.x_height().filter(|&h| h > 0).map_or(ascender, to_em); + let descender = to_em(ttf.typographic_descender().unwrap_or(ttf.descender())); + let strikeout = ttf.strikeout_metrics(); + let underline = ttf.underline_metrics(); + + let strikethrough = LineMetrics { + position: strikeout.map_or(Em::new(0.25), |s| to_em(s.position)), + thickness: strikeout + .or(underline) + .map_or(Em::new(0.06), |s| to_em(s.thickness)), + }; + + let underline = LineMetrics { + position: underline.map_or(Em::new(-0.2), |s| to_em(s.position)), + thickness: underline + .or(strikeout) + .map_or(Em::new(0.06), |s| to_em(s.thickness)), + }; + + let overline = LineMetrics { + position: cap_height + Em::new(0.1), + thickness: underline.thickness, + }; + + Self { + units_per_em, + ascender, + cap_height, + x_height, + descender, + strikethrough, + underline, + overline, + } + } + + /// Look up a vertical metric. + pub fn vertical(&self, metric: VerticalFontMetric) -> Em { + match metric { + VerticalFontMetric::Ascender => self.ascender, + VerticalFontMetric::CapHeight => self.cap_height, + VerticalFontMetric::XHeight => self.x_height, + VerticalFontMetric::Baseline => Em::zero(), + VerticalFontMetric::Descender => self.descender, + } + } +} + +/// Metrics for a decorative line. +#[derive(Debug, Copy, Clone)] +pub struct LineMetrics { + /// The vertical offset of the line from the baseline. Positive goes + /// upwards, negative downwards. + pub position: Em, + /// The thickness of the line. + pub thickness: Em, +} + +/// Identifies a vertical metric of a font. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)] +pub enum VerticalFontMetric { + /// The font's ascender, which typically exceeds the height of all glyphs. + Ascender, + /// The approximate height of uppercase letters. + CapHeight, + /// The approximate height of non-ascending lowercase letters. + XHeight, + /// The baseline on which the letters rest. + Baseline, + /// The font's ascender, which typically exceeds the depth of all glyphs. + Descender, +} + +/// Defines how to resolve a `Bounds` text edge. +#[derive(Debug, Copy, Clone)] +pub enum TextEdgeBounds<'a> { + /// Set the bounds to zero. + Zero, + /// Use the bounding box of the given glyph for the bounds. + Glyph(u16), + /// Use the dimension of the given frame for the bounds. + Frame(&'a Frame), +} diff --git a/crates/typst-library/src/text/font/variant.rs b/crates/typst-library/src/text/font/variant.rs new file mode 100644 index 00000000..c7a00fb3 --- /dev/null +++ b/crates/typst-library/src/text/font/variant.rs @@ -0,0 +1,320 @@ +use std::fmt::{self, Debug, Formatter}; + +use ecow::EcoString; +use serde::{Deserialize, Serialize}; + +use crate::foundations::{cast, Cast, IntoValue, Repr}; +use crate::layout::Ratio; + +/// Properties that distinguish a font from other fonts in the same family. +#[derive(Default, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +#[derive(Serialize, Deserialize)] +pub struct FontVariant { + /// The style of the font (normal / italic / oblique). + pub style: FontStyle, + /// How heavy the font is (100 - 900). + pub weight: FontWeight, + /// How condensed or expanded the font is (0.5 - 2.0). + pub stretch: FontStretch, +} + +impl FontVariant { + /// Create a variant from its three components. + pub fn new(style: FontStyle, weight: FontWeight, stretch: FontStretch) -> Self { + Self { style, weight, stretch } + } +} + +impl Debug for FontVariant { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{:?}-{:?}-{:?}", self.style, self.weight, self.stretch) + } +} + +/// The style of a font. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +#[derive(Serialize, Deserialize, Cast)] +#[serde(rename_all = "kebab-case")] +pub enum FontStyle { + /// The default, typically upright style. + Normal, + /// A cursive style with custom letterform. + Italic, + /// Just a slanted version of the normal style. + Oblique, +} + +impl FontStyle { + /// The conceptual distance between the styles, expressed as a number. + pub fn distance(self, other: Self) -> u16 { + if self == other { + 0 + } else if self != Self::Normal && other != Self::Normal { + 1 + } else { + 2 + } + } +} + +impl Default for FontStyle { + fn default() -> Self { + Self::Normal + } +} + +impl From<usvg::FontStyle> for FontStyle { + fn from(style: usvg::FontStyle) -> Self { + match style { + usvg::FontStyle::Normal => Self::Normal, + usvg::FontStyle::Italic => Self::Italic, + usvg::FontStyle::Oblique => Self::Oblique, + } + } +} + +/// The weight of a font. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +#[derive(Serialize, Deserialize)] +#[serde(transparent)] +pub struct FontWeight(pub(super) u16); + +/// Font weight names and numbers. +/// See `<https://developer.mozilla.org/en-US/docs/Web/CSS/@font-face/font-weight#common_weight_name_mapping>` +impl FontWeight { + /// Thin weight (100). + pub const THIN: Self = Self(100); + + /// Extra light weight (200). + pub const EXTRALIGHT: Self = Self(200); + + /// Light weight (300). + pub const LIGHT: Self = Self(300); + + /// Regular weight (400). + pub const REGULAR: Self = Self(400); + + /// Medium weight (500). + pub const MEDIUM: Self = Self(500); + + /// Semibold weight (600). + pub const SEMIBOLD: Self = Self(600); + + /// Bold weight (700). + pub const BOLD: Self = Self(700); + + /// Extrabold weight (800). + pub const EXTRABOLD: Self = Self(800); + + /// Black weight (900). + pub const BLACK: Self = Self(900); + + /// Create a font weight from a number between 100 and 900, clamping it if + /// necessary. + pub fn from_number(weight: u16) -> Self { + Self(weight.clamp(100, 900)) + } + + /// The number between 100 and 900. + pub fn to_number(self) -> u16 { + self.0 + } + + /// Add (or remove) weight, saturating at the boundaries of 100 and 900. + pub fn thicken(self, delta: i16) -> Self { + Self((self.0 as i16).saturating_add(delta).clamp(100, 900) as u16) + } + + /// The absolute number distance between this and another font weight. + pub fn distance(self, other: Self) -> u16 { + (self.0 as i16 - other.0 as i16).unsigned_abs() + } +} + +impl Default for FontWeight { + fn default() -> Self { + Self::REGULAR + } +} + +impl Debug for FontWeight { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl From<fontdb::Weight> for FontWeight { + fn from(weight: fontdb::Weight) -> Self { + Self::from_number(weight.0) + } +} + +cast! { + FontWeight, + self => IntoValue::into_value(match self { + FontWeight::THIN => "thin", + FontWeight::EXTRALIGHT => "extralight", + FontWeight::LIGHT => "light", + FontWeight::REGULAR => "regular", + FontWeight::MEDIUM => "medium", + FontWeight::SEMIBOLD => "semibold", + FontWeight::BOLD => "bold", + FontWeight::EXTRABOLD => "extrabold", + FontWeight::BLACK => "black", + _ => return self.to_number().into_value(), + }), + v: i64 => Self::from_number(v.clamp(0, u16::MAX as i64) as u16), + /// Thin weight (100). + "thin" => Self::THIN, + /// Extra light weight (200). + "extralight" => Self::EXTRALIGHT, + /// Light weight (300). + "light" => Self::LIGHT, + /// Regular weight (400). + "regular" => Self::REGULAR, + /// Medium weight (500). + "medium" => Self::MEDIUM, + /// Semibold weight (600). + "semibold" => Self::SEMIBOLD, + /// Bold weight (700). + "bold" => Self::BOLD, + /// Extrabold weight (800). + "extrabold" => Self::EXTRABOLD, + /// Black weight (900). + "black" => Self::BLACK, +} + +/// The width of a font. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +#[derive(Serialize, Deserialize)] +#[serde(transparent)] +pub struct FontStretch(pub(super) u16); + +impl FontStretch { + /// Ultra-condensed stretch (50%). + pub const ULTRA_CONDENSED: Self = Self(500); + + /// Extra-condensed stretch weight (62.5%). + pub const EXTRA_CONDENSED: Self = Self(625); + + /// Condensed stretch (75%). + pub const CONDENSED: Self = Self(750); + + /// Semi-condensed stretch (87.5%). + pub const SEMI_CONDENSED: Self = Self(875); + + /// Normal stretch (100%). + pub const NORMAL: Self = Self(1000); + + /// Semi-expanded stretch (112.5%). + pub const SEMI_EXPANDED: Self = Self(1125); + + /// Expanded stretch (125%). + pub const EXPANDED: Self = Self(1250); + + /// Extra-expanded stretch (150%). + pub const EXTRA_EXPANDED: Self = Self(1500); + + /// Ultra-expanded stretch (200%). + pub const ULTRA_EXPANDED: Self = Self(2000); + + /// Create a font stretch from a ratio between 0.5 and 2.0, clamping it if + /// necessary. + pub fn from_ratio(ratio: Ratio) -> Self { + Self((ratio.get().clamp(0.5, 2.0) * 1000.0) as u16) + } + + /// Create a font stretch from an OpenType-style number between 1 and 9, + /// clamping it if necessary. + pub fn from_number(stretch: u16) -> Self { + match stretch { + 0 | 1 => Self::ULTRA_CONDENSED, + 2 => Self::EXTRA_CONDENSED, + 3 => Self::CONDENSED, + 4 => Self::SEMI_CONDENSED, + 5 => Self::NORMAL, + 6 => Self::SEMI_EXPANDED, + 7 => Self::EXPANDED, + 8 => Self::EXTRA_EXPANDED, + _ => Self::ULTRA_EXPANDED, + } + } + + /// The ratio between 0.5 and 2.0 corresponding to this stretch. + pub fn to_ratio(self) -> Ratio { + Ratio::new(self.0 as f64 / 1000.0) + } + + /// Round to one of the pre-defined variants. + pub fn round(self) -> Self { + match self.0 { + ..=562 => Self::ULTRA_CONDENSED, + 563..=687 => Self::EXTRA_CONDENSED, + 688..=812 => Self::CONDENSED, + 813..=937 => Self::SEMI_CONDENSED, + 938..=1062 => Self::NORMAL, + 1063..=1187 => Self::SEMI_EXPANDED, + 1188..=1374 => Self::EXPANDED, + 1375..=1749 => Self::EXTRA_EXPANDED, + 1750.. => Self::ULTRA_EXPANDED, + } + } + + /// The absolute ratio distance between this and another font stretch. + pub fn distance(self, other: Self) -> Ratio { + (self.to_ratio() - other.to_ratio()).abs() + } +} + +impl Default for FontStretch { + fn default() -> Self { + Self::NORMAL + } +} + +impl Repr for FontStretch { + fn repr(&self) -> EcoString { + self.to_ratio().repr() + } +} + +impl From<usvg::FontStretch> for FontStretch { + fn from(stretch: usvg::FontStretch) -> Self { + match stretch { + usvg::FontStretch::UltraCondensed => Self::ULTRA_CONDENSED, + usvg::FontStretch::ExtraCondensed => Self::EXTRA_CONDENSED, + usvg::FontStretch::Condensed => Self::CONDENSED, + usvg::FontStretch::SemiCondensed => Self::SEMI_CONDENSED, + usvg::FontStretch::Normal => Self::NORMAL, + usvg::FontStretch::SemiExpanded => Self::SEMI_EXPANDED, + usvg::FontStretch::Expanded => Self::EXPANDED, + usvg::FontStretch::ExtraExpanded => Self::EXTRA_EXPANDED, + usvg::FontStretch::UltraExpanded => Self::ULTRA_EXPANDED, + } + } +} + +cast! { + FontStretch, + self => self.to_ratio().into_value(), + v: Ratio => Self::from_ratio(v), +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_font_weight_distance() { + let d = |a, b| FontWeight(a).distance(FontWeight(b)); + assert_eq!(d(500, 200), 300); + assert_eq!(d(500, 500), 0); + assert_eq!(d(500, 900), 400); + assert_eq!(d(10, 100), 90); + } + + #[test] + fn test_font_stretch_debug() { + assert_eq!(FontStretch::EXPANDED.repr(), "125%") + } +} diff --git a/crates/typst-library/src/text/item.rs b/crates/typst-library/src/text/item.rs new file mode 100644 index 00000000..ed559aec --- /dev/null +++ b/crates/typst-library/src/text/item.rs @@ -0,0 +1,118 @@ +use std::fmt::{self, Debug, Formatter}; +use std::ops::Range; + +use ecow::EcoString; +use typst_syntax::Span; + +use crate::layout::{Abs, Em}; +use crate::text::{is_default_ignorable, Font, Lang, Region}; +use crate::visualize::{FixedStroke, Paint}; + +/// A run of shaped text. +#[derive(Clone, Eq, PartialEq, Hash)] +pub struct TextItem { + /// The font the glyphs are contained in. + pub font: Font, + /// The font size. + pub size: Abs, + /// Glyph color. + pub fill: Paint, + /// Glyph stroke. + pub stroke: Option<FixedStroke>, + /// The natural language of the text. + pub lang: Lang, + /// The region of the text. + pub region: Option<Region>, + /// The item's plain text. + pub text: EcoString, + /// The glyphs. The number of glyphs may be different from the number of + /// characters in the plain text due to e.g. ligatures. + pub glyphs: Vec<Glyph>, +} + +impl TextItem { + /// The width of the text run. + pub fn width(&self) -> Abs { + self.glyphs.iter().map(|g| g.x_advance).sum::<Em>().at(self.size) + } +} + +impl Debug for TextItem { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + f.write_str("Text(")?; + self.text.fmt(f)?; + f.write_str(")") + } +} + +/// A glyph in a run of shaped text. +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct Glyph { + /// The glyph's index in the font. + pub id: u16, + /// The advance width of the glyph. + pub x_advance: Em, + /// The horizontal offset of the glyph. + pub x_offset: Em, + /// The range of the glyph in its item's text. The range's length may + /// be more than one due to multi-byte UTF-8 encoding or ligatures. + pub range: Range<u16>, + /// The source code location of the text. + pub span: (Span, u16), +} + +impl Glyph { + /// The range of the glyph in its item's text. + pub fn range(&self) -> Range<usize> { + usize::from(self.range.start)..usize::from(self.range.end) + } +} + +/// A slice of a [`TextItem`]. +pub struct TextItemView<'a> { + /// The whole item this is a part of + pub item: &'a TextItem, + /// The glyphs of this slice + pub glyph_range: Range<usize>, +} + +impl<'a> TextItemView<'a> { + /// Build a TextItemView for the whole contents of a TextItem. + pub fn full(text: &'a TextItem) -> Self { + Self::from_glyph_range(text, 0..text.glyphs.len()) + } + + /// Build a new [`TextItemView`] from a [`TextItem`] and a range of glyphs. + pub fn from_glyph_range(text: &'a TextItem, glyph_range: Range<usize>) -> Self { + TextItemView { item: text, glyph_range } + } + + /// Returns an iterator over the glyphs of the slice. + /// + /// Note that the ranges are not remapped. They still point into the + /// original text. + pub fn glyphs(&self) -> &[Glyph] { + &self.item.glyphs[self.glyph_range.clone()] + } + + /// The plain text for the given glyph from `glyphs()`. This is an + /// approximation since glyphs do not correspond 1-1 with codepoints. + pub fn glyph_text(&self, glyph: &Glyph) -> EcoString { + // Trim default ignorables which might have ended up in the glyph's + // cluster. Keep interior ones so that joined emojis work. All of this + // is a hack and needs to be reworked. See + // https://github.com/typst/typst/pull/5099 + self.item.text[glyph.range()] + .trim_matches(is_default_ignorable) + .into() + } + + /// The total width of this text slice + pub fn width(&self) -> Abs { + self.glyphs() + .iter() + .map(|g| g.x_advance) + .sum::<Em>() + .at(self.item.size) + } +} diff --git a/crates/typst-library/src/text/lang.rs b/crates/typst-library/src/text/lang.rs new file mode 100644 index 00000000..64ab1a7c --- /dev/null +++ b/crates/typst-library/src/text/lang.rs @@ -0,0 +1,317 @@ +use std::collections::HashMap; +use std::str::FromStr; + +use ecow::{eco_format, EcoString}; + +use crate::diag::Hint; +use crate::foundations::{cast, StyleChain}; +use crate::layout::Dir; +use crate::text::TextElem; + +macro_rules! translation { + ($lang:literal) => { + ($lang, include_str!(concat!("../../translations/", $lang, ".txt"))) + }; +} + +const TRANSLATIONS: [(&str, &str); 36] = [ + translation!("ar"), + translation!("ca"), + translation!("cs"), + translation!("da"), + translation!("de"), + translation!("en"), + translation!("es"), + translation!("et"), + translation!("fi"), + translation!("fr"), + translation!("gl"), + translation!("gr"), + translation!("he"), + translation!("hu"), + translation!("is"), + translation!("it"), + translation!("ja"), + translation!("la"), + translation!("nb"), + translation!("nl"), + translation!("nn"), + translation!("pl"), + translation!("pt-PT"), + translation!("pt"), + translation!("ro"), + translation!("ru"), + translation!("sl"), + translation!("sq"), + translation!("sr"), + translation!("sv"), + translation!("tl"), + translation!("tr"), + translation!("ua"), + translation!("vi"), + translation!("zh-TW"), + translation!("zh"), +]; + +/// An identifier for a natural language. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct Lang([u8; 3], u8); + +impl Lang { + pub const ALBANIAN: Self = Self(*b"sq ", 2); + pub const ARABIC: Self = Self(*b"ar ", 2); + pub const BOKMÅL: Self = Self(*b"nb ", 2); + pub const CATALAN: Self = Self(*b"ca ", 2); + pub const CHINESE: Self = Self(*b"zh ", 2); + pub const CROATIAN: Self = Self(*b"hr ", 2); + pub const CZECH: Self = Self(*b"cs ", 2); + pub const DANISH: Self = Self(*b"da ", 2); + pub const DUTCH: Self = Self(*b"nl ", 2); + pub const ENGLISH: Self = Self(*b"en ", 2); + pub const ESTONIAN: Self = Self(*b"et ", 2); + pub const FILIPINO: Self = Self(*b"tl ", 2); + pub const FINNISH: Self = Self(*b"fi ", 2); + pub const FRENCH: Self = Self(*b"fr ", 2); + pub const GALICIAN: Self = Self(*b"gl ", 2); + pub const GERMAN: Self = Self(*b"de ", 2); + pub const GREEK: Self = Self(*b"gr ", 2); + pub const HEBREW: Self = Self(*b"he ", 2); + pub const HUNGARIAN: Self = Self(*b"hu ", 2); + pub const ICELANDIC: Self = Self(*b"is ", 2); + pub const ITALIAN: Self = Self(*b"it ", 2); + pub const JAPANESE: Self = Self(*b"ja ", 2); + pub const LATIN: Self = Self(*b"la ", 2); + pub const LOWER_SORBIAN: Self = Self(*b"dsb", 3); + pub const NYNORSK: Self = Self(*b"nn ", 2); + pub const POLISH: Self = Self(*b"pl ", 2); + pub const PORTUGUESE: Self = Self(*b"pt ", 2); + pub const ROMANIAN: Self = Self(*b"ro ", 2); + pub const RUSSIAN: Self = Self(*b"ru ", 2); + pub const SERBIAN: Self = Self(*b"sr ", 2); + pub const SLOVAK: Self = Self(*b"sk ", 2); + pub const SLOVENIAN: Self = Self(*b"sl ", 2); + pub const SPANISH: Self = Self(*b"es ", 2); + pub const SWEDISH: Self = Self(*b"sv ", 2); + pub const TURKISH: Self = Self(*b"tr ", 2); + pub const UKRAINIAN: Self = Self(*b"ua ", 2); + pub const VIETNAMESE: Self = Self(*b"vi ", 2); + + /// Return the language code as an all lowercase string slice. + pub fn as_str(&self) -> &str { + std::str::from_utf8(&self.0[..usize::from(self.1)]).unwrap_or_default() + } + + /// The default direction for the language. + pub fn dir(self) -> Dir { + match self.as_str() { + "ar" | "dv" | "fa" | "he" | "ks" | "pa" | "ps" | "sd" | "ug" | "ur" + | "yi" => Dir::RTL, + _ => Dir::LTR, + } + } +} + +impl FromStr for Lang { + type Err = &'static str; + + /// Construct a language from a two- or three-byte ISO 639-1/2/3 code. + fn from_str(iso: &str) -> Result<Self, Self::Err> { + let len = iso.len(); + if matches!(len, 2..=3) && iso.is_ascii() { + let mut bytes = [b' '; 3]; + bytes[..len].copy_from_slice(iso.as_bytes()); + bytes.make_ascii_lowercase(); + Ok(Self(bytes, len as u8)) + } else { + Err("expected two or three letter language code (ISO 639-1/2/3)") + } + } +} + +cast! { + Lang, + self => self.as_str().into_value(), + string: EcoString => { + let result = Self::from_str(&string); + if result.is_err() { + if let Some((lang, region)) = string.split_once('-') { + if Lang::from_str(lang).is_ok() && Region::from_str(region).is_ok() { + return result + .hint(eco_format!( + "you should leave only \"{}\" in the `lang` parameter and specify \"{}\" in the `region` parameter", + lang, region, + )); + } + } + } + + result? + } +} + +/// An identifier for a region somewhere in the world. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct Region([u8; 2]); + +impl Region { + /// Return the region code as an all uppercase string slice. + pub fn as_str(&self) -> &str { + std::str::from_utf8(&self.0).unwrap_or_default() + } +} + +impl PartialEq<&str> for Region { + fn eq(&self, other: &&str) -> bool { + self.as_str() == *other + } +} + +impl FromStr for Region { + type Err = &'static str; + + /// Construct a region from its two-byte ISO 3166-1 alpha-2 code. + fn from_str(iso: &str) -> Result<Self, Self::Err> { + if iso.len() == 2 && iso.is_ascii() { + let mut bytes: [u8; 2] = iso.as_bytes().try_into().unwrap(); + bytes.make_ascii_uppercase(); + Ok(Self(bytes)) + } else { + Err("expected two letter region code (ISO 3166-1 alpha-2)") + } + } +} + +cast! { + Region, + self => self.as_str().into_value(), + string: EcoString => Self::from_str(&string)?, +} + +/// An ISO 15924-type script identifier. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct WritingScript([u8; 4], u8); + +impl WritingScript { + /// Return the script as an all lowercase string slice. + pub fn as_str(&self) -> &str { + std::str::from_utf8(&self.0[..usize::from(self.1)]).unwrap_or_default() + } + + /// Return the description of the script as raw bytes. + pub fn as_bytes(&self) -> &[u8; 4] { + &self.0 + } +} + +impl FromStr for WritingScript { + type Err = &'static str; + + /// Construct a region from its ISO 15924 code. + fn from_str(iso: &str) -> Result<Self, Self::Err> { + let len = iso.len(); + if matches!(len, 3..=4) && iso.is_ascii() { + let mut bytes = [b' '; 4]; + bytes[..len].copy_from_slice(iso.as_bytes()); + bytes.make_ascii_lowercase(); + Ok(Self(bytes, len as u8)) + } else { + Err("expected three or four letter script code (ISO 15924 or 'math')") + } + } +} + +cast! { + WritingScript, + self => self.as_str().into_value(), + string: EcoString => Self::from_str(&string)?, +} + +/// The name with which an element is referenced. +pub trait LocalName { + /// The key of an element in order to get its localized name. + const KEY: &'static str; + + /// Get the name in the given language and (optionally) region. + fn local_name(lang: Lang, region: Option<Region>) -> &'static str { + localized_str(lang, region, Self::KEY) + } + + /// Gets the local name from the style chain. + fn local_name_in(styles: StyleChain) -> &'static str + where + Self: Sized, + { + Self::local_name(TextElem::lang_in(styles), TextElem::region_in(styles)) + } +} + +/// Retrieves the localized string for a given language and region. +/// Silently falls back to English if no fitting string exists for +/// the given language + region. Panics if no fitting string exists +/// in both given language + region and English. +#[comemo::memoize] +pub fn localized_str(lang: Lang, region: Option<Region>, key: &str) -> &'static str { + let lang_region_bundle = parse_language_bundle(lang, region).unwrap(); + if let Some(str) = lang_region_bundle.get(key) { + return str; + } + let lang_bundle = parse_language_bundle(lang, None).unwrap(); + if let Some(str) = lang_bundle.get(key) { + return str; + } + let english_bundle = parse_language_bundle(Lang::ENGLISH, None).unwrap(); + english_bundle.get(key).unwrap() +} + +/// Parses the translation file for a given language and region. +/// Only returns an error if the language file is malformed. +#[comemo::memoize] +fn parse_language_bundle( + lang: Lang, + region: Option<Region>, +) -> Result<HashMap<&'static str, &'static str>, &'static str> { + let language_tuple = TRANSLATIONS.iter().find(|it| it.0 == lang_str(lang, region)); + let Some((_lang_name, language_file)) = language_tuple else { + return Ok(HashMap::new()); + }; + + let mut bundle = HashMap::new(); + let lines = language_file.trim().lines(); + for line in lines { + if line.trim().starts_with('#') { + continue; + } + let (key, val) = line + .split_once('=') + .ok_or("malformed translation file: line without \"=\"")?; + let (key, val) = (key.trim(), val.trim()); + if val.is_empty() { + return Err("malformed translation file: empty translation value"); + } + let duplicate = bundle.insert(key.trim(), val.trim()); + if duplicate.is_some() { + return Err("malformed translation file: duplicate key"); + } + } + Ok(bundle) +} + +/// Convert language + region to a string to be able to get a file name. +fn lang_str(lang: Lang, region: Option<Region>) -> EcoString { + EcoString::from(lang.as_str()) + + region.map_or_else(EcoString::new, |r| EcoString::from("-") + r.as_str()) +} + +#[cfg(test)] +mod tests { + use typst_utils::option_eq; + + use super::*; + + #[test] + fn test_region_option_eq() { + let region = Some(Region([b'U', b'S'])); + assert!(option_eq(region, "US")); + assert!(!option_eq(region, "AB")); + } +} diff --git a/crates/typst-library/src/text/linebreak.rs b/crates/typst-library/src/text/linebreak.rs new file mode 100644 index 00000000..0519e1c4 --- /dev/null +++ b/crates/typst-library/src/text/linebreak.rs @@ -0,0 +1,46 @@ +use typst_utils::singleton; + +use crate::foundations::{elem, Content, NativeElement}; + +/// Inserts a line break. +/// +/// Advances the paragraph to the next line. A single trailing line break at the +/// end of a paragraph is ignored, but more than one creates additional empty +/// lines. +/// +/// # Example +/// ```example +/// *Date:* 26.12.2022 \ +/// *Topic:* Infrastructure Test \ +/// *Severity:* High \ +/// ``` +/// +/// # Syntax +/// This function also has dedicated syntax: To insert a line break, simply write +/// a backslash followed by whitespace. This always creates an unjustified +/// break. +#[elem(title = "Line Break")] +pub struct LinebreakElem { + /// Whether to justify the line before the break. + /// + /// This is useful if you found a better line break opportunity in your + /// justified text than Typst did. + /// + /// ```example + /// #set par(justify: true) + /// #let jb = linebreak(justify: true) + /// + /// I have manually tuned the #jb + /// line breaks in this paragraph #jb + /// for an _interesting_ result. #jb + /// ``` + #[default(false)] + pub justify: bool, +} + +impl LinebreakElem { + /// Get the globally shared linebreak element. + pub fn shared() -> &'static Content { + singleton!(Content, LinebreakElem::new().pack()) + } +} diff --git a/crates/typst-library/src/text/lorem.rs b/crates/typst-library/src/text/lorem.rs new file mode 100644 index 00000000..5d01a550 --- /dev/null +++ b/crates/typst-library/src/text/lorem.rs @@ -0,0 +1,24 @@ +use crate::foundations::{func, Str}; + +/// Creates blind text. +/// +/// This function yields a Latin-like _Lorem Ipsum_ blind text with the given +/// number of words. The sequence of words generated by the function is always +/// the same but randomly chosen. As usual for blind texts, it does not make any +/// sense. Use it as a placeholder to try layouts. +/// +/// # Example +/// ```example +/// = Blind Text +/// #lorem(30) +/// +/// = More Blind Text +/// #lorem(15) +/// ``` +#[func(keywords = ["Blind Text"])] +pub fn lorem( + /// The length of the blind text in words. + words: usize, +) -> Str { + lipsum::lipsum(words).replace("--", "–").into() +} diff --git a/crates/typst-library/src/text/mod.rs b/crates/typst-library/src/text/mod.rs new file mode 100644 index 00000000..acf100b5 --- /dev/null +++ b/crates/typst-library/src/text/mod.rs @@ -0,0 +1,1318 @@ +//! Text handling. + +mod case; +mod deco; +mod font; +mod item; +mod lang; +mod linebreak; +#[path = "lorem.rs"] +mod lorem_; +mod raw; +mod shift; +#[path = "smallcaps.rs"] +mod smallcaps_; +mod smartquote; +mod space; + +pub use self::case::*; +pub use self::deco::*; +pub use self::font::*; +pub use self::item::*; +pub use self::lang::*; +pub use self::linebreak::*; +pub use self::lorem_::*; +pub use self::raw::*; +pub use self::shift::*; +pub use self::smallcaps_::*; +pub use self::smartquote::*; +pub use self::space::*; + +use std::fmt::{self, Debug, Formatter}; + +use ecow::{eco_format, EcoString}; +use icu_properties::sets::CodePointSetData; +use icu_provider::AsDeserializingBufferProvider; +use icu_provider_blob::BlobDataProvider; +use once_cell::sync::Lazy; +use rustybuzz::Feature; +use smallvec::SmallVec; +use ttf_parser::Tag; +use typst_syntax::Spanned; + +use crate::diag::{bail, warning, HintedStrResult, SourceResult}; +use crate::engine::Engine; +use crate::foundations::{ + cast, category, dict, elem, Args, Array, Cast, Category, Construct, Content, Dict, + Fold, IntoValue, NativeElement, Never, NoneValue, Packed, PlainText, Repr, Resolve, + Scope, Set, Smart, StyleChain, +}; +use crate::layout::{Abs, Axis, Dir, Em, Length, Ratio, Rel}; +use crate::model::ParElem; +use crate::visualize::{Color, Paint, RelativeTo, Stroke}; +use crate::World; + +/// Text styling. +/// +/// The [text function]($text) is of particular interest. +#[category] +pub static TEXT: Category; + +/// Hook up all `text` definitions. +pub(super) fn define(global: &mut Scope) { + global.category(TEXT); + global.define_elem::<TextElem>(); + global.define_elem::<LinebreakElem>(); + global.define_elem::<SmartQuoteElem>(); + global.define_elem::<SubElem>(); + global.define_elem::<SuperElem>(); + global.define_elem::<UnderlineElem>(); + global.define_elem::<OverlineElem>(); + global.define_elem::<StrikeElem>(); + global.define_elem::<HighlightElem>(); + global.define_elem::<SmallcapsElem>(); + global.define_elem::<RawElem>(); + global.define_func::<lower>(); + global.define_func::<upper>(); + global.define_func::<lorem>(); +} + +/// Customizes the look and layout of text in a variety of ways. +/// +/// This function is used frequently, both with set rules and directly. While +/// the set rule is often the simpler choice, calling the `text` function +/// directly can be useful when passing text as an argument to another function. +/// +/// # Example +/// ```example +/// #set text(18pt) +/// With a set rule. +/// +/// #emph(text(blue)[ +/// With a function call. +/// ]) +/// ``` +#[elem(Debug, Construct, PlainText, Repr)] +pub struct TextElem { + /// A font family name or priority list of font family names. + /// + /// When processing text, Typst tries all specified font families in order + /// until it finds a font that has the necessary glyphs. In the example + /// below, the font `Inria Serif` is preferred, but since it does not + /// contain Arabic glyphs, the arabic text uses `Noto Sans Arabic` instead. + /// + /// The collection of available fonts differs by platform: + /// + /// - In the web app, you can see the list of available fonts by clicking on + /// the "Ag" button. You can provide additional fonts by uploading `.ttf` + /// or `.otf` files into your project. They will be discovered + /// automatically. The priority is: project fonts > server fonts. + /// + /// - Locally, Typst uses your installed system fonts or embedded fonts in + /// the CLI, which are `Libertinus Serif`, `New Computer Modern`, + /// `New Computer Modern Math`, and `DejaVu Sans Mono`. In addition, you + /// can use the `--font-path` argument or `TYPST_FONT_PATHS` environment + /// variable to add directories that should be scanned for fonts. The + /// priority is: `--font-paths` > system fonts > embedded fonts. Run + /// `typst fonts` to see the fonts that Typst has discovered on your + /// system. Note that you can pass the `--ignore-system-fonts` parameter + /// to the CLI to ensure Typst won't search for system fonts. + /// + /// ```example + /// #set text(font: "PT Sans") + /// This is sans-serif. + /// + /// #set text(font: ( + /// "Inria Serif", + /// "Noto Sans Arabic", + /// )) + /// + /// This is Latin. \ + /// هذا عربي. + /// ``` + #[parse({ + let font_list: Option<Spanned<FontList>> = args.named("font")?; + if let Some(list) = &font_list { + check_font_list(engine, list); + } + font_list.map(|font_list| font_list.v) + })] + #[default(FontList(vec![FontFamily::new("Libertinus Serif")]))] + #[borrowed] + #[ghost] + pub font: FontList, + + /// Whether to allow last resort font fallback when the primary font list + /// contains no match. This lets Typst search through all available fonts + /// for the most similar one that has the necessary glyphs. + /// + /// _Note:_ Currently, there are no warnings when fallback is disabled and + /// no glyphs are found. Instead, your text shows up in the form of "tofus": + /// Small boxes that indicate the lack of an appropriate glyph. In the + /// future, you will be able to instruct Typst to issue warnings so you know + /// something is up. + /// + /// ```example + /// #set text(font: "Inria Serif") + /// هذا عربي + /// + /// #set text(fallback: false) + /// هذا عربي + /// ``` + #[default(true)] + #[ghost] + pub fallback: bool, + + /// The desired font style. + /// + /// When an italic style is requested and only an oblique one is available, + /// it is used. Similarly, the other way around, an italic style can stand + /// in for an oblique one. When neither an italic nor an oblique style is + /// available, Typst selects the normal style. Since most fonts are only + /// available either in an italic or oblique style, the difference between + /// italic and oblique style is rarely observable. + /// + /// If you want to emphasize your text, you should do so using the [emph] + /// function instead. This makes it easy to adapt the style later if you + /// change your mind about how to signify the emphasis. + /// + /// ```example + /// #text(font: "Libertinus Serif", style: "italic")[Italic] + /// #text(font: "DejaVu Sans", style: "oblique")[Oblique] + /// ``` + #[ghost] + pub style: FontStyle, + + /// The desired thickness of the font's glyphs. Accepts an integer between + /// `{100}` and `{900}` or one of the predefined weight names. When the + /// desired weight is not available, Typst selects the font from the family + /// that is closest in weight. + /// + /// If you want to strongly emphasize your text, you should do so using the + /// [strong] function instead. This makes it easy to adapt the style later + /// if you change your mind about how to signify the strong emphasis. + /// + /// ```example + /// #set text(font: "IBM Plex Sans") + /// + /// #text(weight: "light")[Light] \ + /// #text(weight: "regular")[Regular] \ + /// #text(weight: "medium")[Medium] \ + /// #text(weight: 500)[Medium] \ + /// #text(weight: "bold")[Bold] + /// ``` + #[ghost] + pub weight: FontWeight, + + /// The desired width of the glyphs. Accepts a ratio between `{50%}` and + /// `{200%}`. When the desired width is not available, Typst selects the + /// font from the family that is closest in stretch. This will only stretch + /// the text if a condensed or expanded version of the font is available. + /// + /// If you want to adjust the amount of space between characters instead of + /// stretching the glyphs itself, use the [`tracking`]($text.tracking) + /// property instead. + /// + /// ```example + /// #text(stretch: 75%)[Condensed] \ + /// #text(stretch: 100%)[Normal] + /// ``` + #[ghost] + pub stretch: FontStretch, + + /// The size of the glyphs. This value forms the basis of the `em` unit: + /// `{1em}` is equivalent to the font size. + /// + /// You can also give the font size itself in `em` units. Then, it is + /// relative to the previous font size. + /// + /// ```example + /// #set text(size: 20pt) + /// very #text(1.5em)[big] text + /// ``` + #[parse(args.named_or_find("size")?)] + #[fold] + #[default(TextSize(Abs::pt(11.0).into()))] + #[resolve] + #[ghost] + pub size: TextSize, + + /// The glyph fill paint. + /// + /// ```example + /// #set text(fill: red) + /// This text is red. + /// ``` + #[parse({ + let paint: Option<Spanned<Paint>> = args.named_or_find("fill")?; + if let Some(paint) = &paint { + if paint.v.relative() == Smart::Custom(RelativeTo::Self_) { + bail!( + paint.span, + "gradients and patterns on text must be relative to the parent"; + hint: "make sure to set `relative: auto` on your text fill" + ); + } + } + paint.map(|paint| paint.v) + })] + #[default(Color::BLACK.into())] + #[ghost] + pub fill: Paint, + + /// How to stroke the text. + /// + /// ```example + /// #text(stroke: 0.5pt + red)[Stroked] + /// ``` + #[resolve] + #[ghost] + pub stroke: Option<Stroke>, + + /// The amount of space that should be added between characters. + /// + /// ```example + /// #set text(tracking: 1.5pt) + /// Distant text. + /// ``` + #[resolve] + #[ghost] + pub tracking: Length, + + /// The amount of space between words. + /// + /// Can be given as an absolute length, but also relative to the width of + /// the space character in the font. + /// + /// If you want to adjust the amount of space between characters rather than + /// words, use the [`tracking`]($text.tracking) property instead. + /// + /// ```example + /// #set text(spacing: 200%) + /// Text with distant words. + /// ``` + #[resolve] + #[default(Rel::one())] + #[ghost] + pub spacing: Rel<Length>, + + /// Whether to automatically insert spacing between CJK and Latin characters. + /// + /// ```example + /// #set text(cjk-latin-spacing: auto) + /// 第4章介绍了基本的API。 + /// + /// #set text(cjk-latin-spacing: none) + /// 第4章介绍了基本的API。 + /// ``` + #[ghost] + pub cjk_latin_spacing: Smart<Option<Never>>, + + /// An amount to shift the text baseline by. + /// + /// ```example + /// A #text(baseline: 3pt)[lowered] + /// word. + /// ``` + #[resolve] + #[ghost] + pub baseline: Length, + + /// Whether certain glyphs can hang over into the margin in justified text. + /// This can make justification visually more pleasing. + /// + /// ```example + /// #set par(justify: true) + /// This justified text has a hyphen in + /// the paragraph's first line. Hanging + /// the hyphen slightly into the margin + /// results in a clearer paragraph edge. + /// + /// #set text(overhang: false) + /// This justified text has a hyphen in + /// the paragraph's first line. Hanging + /// the hyphen slightly into the margin + /// results in a clearer paragraph edge. + /// ``` + #[default(true)] + #[ghost] + pub overhang: bool, + + /// The top end of the conceptual frame around the text used for layout and + /// positioning. This affects the size of containers that hold text. + /// + /// ```example + /// #set rect(inset: 0pt) + /// #set text(size: 20pt) + /// + /// #set text(top-edge: "ascender") + /// #rect(fill: aqua)[Typst] + /// + /// #set text(top-edge: "cap-height") + /// #rect(fill: aqua)[Typst] + /// ``` + #[default(TopEdge::Metric(TopEdgeMetric::CapHeight))] + #[ghost] + pub top_edge: TopEdge, + + /// The bottom end of the conceptual frame around the text used for layout + /// and positioning. This affects the size of containers that hold text. + /// + /// ```example + /// #set rect(inset: 0pt) + /// #set text(size: 20pt) + /// + /// #set text(bottom-edge: "baseline") + /// #rect(fill: aqua)[Typst] + /// + /// #set text(bottom-edge: "descender") + /// #rect(fill: aqua)[Typst] + /// ``` + #[default(BottomEdge::Metric(BottomEdgeMetric::Baseline))] + #[ghost] + pub bottom_edge: BottomEdge, + + /// An [ISO 639-1/2/3 language code.](https://en.wikipedia.org/wiki/ISO_639) + /// + /// Setting the correct language affects various parts of Typst: + /// + /// - The text processing pipeline can make more informed choices. + /// - Hyphenation will use the correct patterns for the language. + /// - [Smart quotes]($smartquote) turns into the correct quotes for the + /// language. + /// - And all other things which are language-aware. + /// + /// ```example + /// #set text(lang: "de") + /// #outline() + /// + /// = Einleitung + /// In diesem Dokument, ... + /// ``` + #[default(Lang::ENGLISH)] + #[ghost] + pub lang: Lang, + + /// An [ISO 3166-1 alpha-2 region code.](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) + /// + /// This lets the text processing pipeline make more informed choices. + #[ghost] + pub region: Option<Region>, + + /// The OpenType writing script. + /// + /// The combination of `{lang}` and `{script}` determine how font features, + /// such as glyph substitution, are implemented. Frequently the value is a + /// modified (all-lowercase) ISO 15924 script identifier, and the `math` + /// writing script is used for features appropriate for mathematical + /// symbols. + /// + /// When set to `{auto}`, the default and recommended setting, an + /// appropriate script is chosen for each block of characters sharing a + /// common Unicode script property. + /// + /// ```example + /// #set text( + /// font: "Libertinus Serif", + /// size: 20pt, + /// ) + /// + /// #let scedilla = [Ş] + /// #scedilla // S with a cedilla + /// + /// #set text(lang: "ro", script: "latn") + /// #scedilla // S with a subscript comma + /// + /// #set text(lang: "ro", script: "grek") + /// #scedilla // S with a cedilla + /// ``` + #[ghost] + pub script: Smart<WritingScript>, + + /// The dominant direction for text and inline objects. Possible values are: + /// + /// - `{auto}`: Automatically infer the direction from the `lang` property. + /// - `{ltr}`: Layout text from left to right. + /// - `{rtl}`: Layout text from right to left. + /// + /// When writing in right-to-left scripts like Arabic or Hebrew, you should + /// set the [text language]($text.lang) or direction. While individual runs + /// of text are automatically layouted in the correct direction, setting the + /// dominant direction gives the bidirectional reordering algorithm the + /// necessary information to correctly place punctuation and inline objects. + /// Furthermore, setting the direction affects the alignment values `start` + /// and `end`, which are equivalent to `left` and `right` in `ltr` text and + /// the other way around in `rtl` text. + /// + /// If you set this to `rtl` and experience bugs or in some way bad looking + /// output, please get in touch with us through the + /// [Forum](https://forum.typst.app/), + /// [Discord server](https://discord.gg/2uDybryKPe), + /// or our [contact form](https://typst.app/contact). + /// + /// ```example + /// #set text(dir: rtl) + /// هذا عربي. + /// ``` + #[resolve] + #[ghost] + pub dir: TextDir, + + /// Whether to hyphenate text to improve line breaking. When `{auto}`, text + /// will be hyphenated if and only if justification is enabled. + /// + /// Setting the [text language]($text.lang) ensures that the correct + /// hyphenation patterns are used. + /// + /// ```example + /// #set page(width: 200pt) + /// + /// #set par(justify: true) + /// This text illustrates how + /// enabling hyphenation can + /// improve justification. + /// + /// #set text(hyphenate: false) + /// This text illustrates how + /// enabling hyphenation can + /// improve justification. + /// ``` + #[resolve] + #[ghost] + pub hyphenate: Hyphenate, + + /// The "cost" of various choices when laying out text. A higher cost means + /// the layout engine will make the choice less often. Costs are specified + /// as a ratio of the default cost, so `{50%}` will make text layout twice + /// as eager to make a given choice, while `{200%}` will make it half as + /// eager. + /// + /// Currently, the following costs can be customized: + /// - `hyphenation`: splitting a word across multiple lines + /// - `runt`: ending a paragraph with a line with a single word + /// - `widow`: leaving a single line of paragraph on the next page + /// - `orphan`: leaving single line of paragraph on the previous page + /// + /// Hyphenation is generally avoided by placing the whole word on the next + /// line, so a higher hyphenation cost can result in awkward justification + /// spacing. + /// + /// Runts are avoided by placing more or fewer words on previous lines, so a + /// higher runt cost can result in more awkward in justification spacing. + /// + /// Text layout prevents widows and orphans by default because they are + /// generally discouraged by style guides. However, in some contexts they + /// are allowed because the prevention method, which moves a line to the + /// next page, can result in an uneven number of lines between pages. The + /// `widow` and `orphan` costs allow disabling these modifications. + /// (Currently, `{0%}` allows widows/orphans; anything else, including the + /// default of `{100%}`, prevents them. More nuanced cost specification for + /// these modifications is planned for the future.) + /// + /// ```example + /// #set text(hyphenate: true, size: 11.4pt) + /// #set par(justify: true) + /// + /// #lorem(10) + /// + /// // Set hyphenation to ten times the normal cost. + /// #set text(costs: (hyphenation: 1000%)) + /// + /// #lorem(10) + /// ``` + #[fold] + pub costs: Costs, + + /// Whether to apply kerning. + /// + /// When enabled, specific letter pairings move closer together or further + /// apart for a more visually pleasing result. The example below + /// demonstrates how decreasing the gap between the "T" and "o" results in a + /// more natural look. Setting this to `{false}` disables kerning by turning + /// off the OpenType `kern` font feature. + /// + /// ```example + /// #set text(size: 25pt) + /// Totally + /// + /// #set text(kerning: false) + /// Totally + /// ``` + #[default(true)] + #[ghost] + pub kerning: bool, + + /// Whether to apply stylistic alternates. + /// + /// Sometimes fonts contain alternative glyphs for the same codepoint. + /// Setting this to `{true}` switches to these by enabling the OpenType + /// `salt` font feature. + /// + /// ```example + /// #set text( + /// font: "IBM Plex Sans", + /// size: 20pt, + /// ) + /// + /// 0, a, g, ß + /// + /// #set text(alternates: true) + /// 0, a, g, ß + /// ``` + #[default(false)] + #[ghost] + pub alternates: bool, + + /// Which stylistic sets to apply. Font designers can categorize alternative + /// glyphs forms into stylistic sets. As this value is highly font-specific, + /// you need to consult your font to know which sets are available. + /// + /// This can be set to an integer or an array of integers, all + /// of which must be between `{1}` and `{20}`, enabling the + /// corresponding OpenType feature(s) from `ss01` to `ss20`. + /// Setting this to `{none}` will disable all stylistic sets. + /// + /// ```example + /// #set text(font: "IBM Plex Serif") + /// ß vs #text(stylistic-set: 5)[ß] \ + /// 10 years ago vs #text(stylistic-set: (1, 2, 3))[10 years ago] + /// ``` + #[ghost] + pub stylistic_set: StylisticSets, + + /// Whether standard ligatures are active. + /// + /// Certain letter combinations like "fi" are often displayed as a single + /// merged glyph called a _ligature._ Setting this to `{false}` disables + /// these ligatures by turning off the OpenType `liga` and `clig` font + /// features. + /// + /// ```example + /// #set text(size: 20pt) + /// A fine ligature. + /// + /// #set text(ligatures: false) + /// A fine ligature. + /// ``` + #[default(true)] + #[ghost] + pub ligatures: bool, + + /// Whether ligatures that should be used sparingly are active. Setting this + /// to `{true}` enables the OpenType `dlig` font feature. + #[default(false)] + #[ghost] + pub discretionary_ligatures: bool, + + /// Whether historical ligatures are active. Setting this to `{true}` + /// enables the OpenType `hlig` font feature. + #[default(false)] + #[ghost] + pub historical_ligatures: bool, + + /// Which kind of numbers / figures to select. When set to `{auto}`, the + /// default numbers for the font are used. + /// + /// ```example + /// #set text(font: "Noto Sans", 20pt) + /// #set text(number-type: "lining") + /// Number 9. + /// + /// #set text(number-type: "old-style") + /// Number 9. + /// ``` + #[ghost] + pub number_type: Smart<NumberType>, + + /// The width of numbers / figures. When set to `{auto}`, the default + /// numbers for the font are used. + /// + /// ```example + /// #set text(font: "Noto Sans", 20pt) + /// #set text(number-width: "proportional") + /// A 12 B 34. \ + /// A 56 B 78. + /// + /// #set text(number-width: "tabular") + /// A 12 B 34. \ + /// A 56 B 78. + /// ``` + #[ghost] + pub number_width: Smart<NumberWidth>, + + /// Whether to have a slash through the zero glyph. Setting this to `{true}` + /// enables the OpenType `zero` font feature. + /// + /// ```example + /// 0, #text(slashed-zero: true)[0] + /// ``` + #[default(false)] + #[ghost] + pub slashed_zero: bool, + + /// Whether to turn numbers into fractions. Setting this to `{true}` + /// enables the OpenType `frac` font feature. + /// + /// It is not advisable to enable this property globally as it will mess + /// with all appearances of numbers after a slash (e.g., in URLs). Instead, + /// enable it locally when you want a fraction. + /// + /// ```example + /// 1/2 \ + /// #text(fractions: true)[1/2] + /// ``` + #[default(false)] + #[ghost] + pub fractions: bool, + + /// Raw OpenType features to apply. + /// + /// - If given an array of strings, sets the features identified by the + /// strings to `{1}`. + /// - If given a dictionary mapping to numbers, sets the features + /// identified by the keys to the values. + /// + /// ```example + /// // Enable the `frac` feature manually. + /// #set text(features: ("frac",)) + /// 1/2 + /// ``` + #[fold] + #[ghost] + pub features: FontFeatures, + + /// Content in which all text is styled according to the other arguments. + #[external] + #[required] + pub body: Content, + + /// The text. + #[required] + pub text: EcoString, + + /// The offset of the text in the text syntax node referenced by this + /// element's span. + #[internal] + #[ghost] + pub span_offset: usize, + + /// A delta to apply on the font weight. + #[internal] + #[fold] + #[ghost] + pub delta: WeightDelta, + + /// Whether the font style should be inverted. + #[internal] + #[fold] + #[default(ItalicToggle(false))] + #[ghost] + pub emph: ItalicToggle, + + /// Decorative lines. + #[internal] + #[fold] + #[ghost] + pub deco: SmallVec<[Decoration; 1]>, + + /// A case transformation that should be applied to the text. + #[internal] + #[ghost] + pub case: Option<Case>, + + /// Whether small capital glyphs should be used. ("smcp") + #[internal] + #[default(false)] + #[ghost] + pub smallcaps: bool, +} + +impl TextElem { + /// Create a new packed text element. + pub fn packed(text: impl Into<EcoString>) -> Content { + Self::new(text.into()).pack() + } +} + +impl Debug for TextElem { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "Text({})", self.text) + } +} + +impl Repr for TextElem { + fn repr(&self) -> EcoString { + eco_format!("[{}]", self.text) + } +} + +impl Construct for TextElem { + fn construct(engine: &mut Engine, args: &mut Args) -> SourceResult<Content> { + // The text constructor is special: It doesn't create a text element. + // Instead, it leaves the passed argument structurally unchanged, but + // styles all text in it. + let styles = Self::set(engine, args)?; + let body = args.expect::<Content>("body")?; + Ok(body.styled_with_map(styles)) + } +} + +impl PlainText for Packed<TextElem> { + fn plain_text(&self, text: &mut EcoString) { + text.push_str(self.text()); + } +} + +/// A lowercased font family like "arial". +#[derive(Clone, Eq, PartialEq, Hash)] +pub struct FontFamily(EcoString); + +impl FontFamily { + /// Create a named font family variant. + pub fn new(string: &str) -> Self { + Self(string.to_lowercase().into()) + } + + /// The lowercased family name. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl Debug for FontFamily { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + self.0.fmt(f) + } +} + +cast! { + FontFamily, + self => self.0.into_value(), + string: EcoString => Self::new(&string), +} + +/// Font family fallback list. +#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)] +pub struct FontList(pub Vec<FontFamily>); + +impl<'a> IntoIterator for &'a FontList { + type IntoIter = std::slice::Iter<'a, FontFamily>; + type Item = &'a FontFamily; + + fn into_iter(self) -> Self::IntoIter { + self.0.iter() + } +} + +cast! { + FontList, + self => if self.0.len() == 1 { + self.0.into_iter().next().unwrap().0.into_value() + } else { + self.0.into_value() + }, + family: FontFamily => Self(vec![family]), + values: Array => Self(values.into_iter().map(|v| v.cast()).collect::<HintedStrResult<_>>()?), +} + +/// Resolve a prioritized iterator over the font families. +pub fn families(styles: StyleChain) -> impl Iterator<Item = &str> + Clone { + const FALLBACKS: &[&str] = &[ + "libertinus serif", + "twitter color emoji", + "noto color emoji", + "apple color emoji", + "segoe ui emoji", + ]; + + let tail = if TextElem::fallback_in(styles) { FALLBACKS } else { &[] }; + TextElem::font_in(styles) + .into_iter() + .map(|family| family.as_str()) + .chain(tail.iter().copied()) +} + +/// Resolve the font variant. +pub fn variant(styles: StyleChain) -> FontVariant { + let mut variant = FontVariant::new( + TextElem::style_in(styles), + TextElem::weight_in(styles), + TextElem::stretch_in(styles), + ); + + let WeightDelta(delta) = TextElem::delta_in(styles); + variant.weight = variant + .weight + .thicken(delta.clamp(i16::MIN as i64, i16::MAX as i64) as i16); + + if TextElem::emph_in(styles).0 { + variant.style = match variant.style { + FontStyle::Normal => FontStyle::Italic, + FontStyle::Italic => FontStyle::Normal, + FontStyle::Oblique => FontStyle::Normal, + } + } + + variant +} + +/// The size of text. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub struct TextSize(pub Length); + +impl Fold for TextSize { + fn fold(self, outer: Self) -> Self { + // Multiply the two linear functions. + Self(Length { + em: Em::new(self.0.em.get() * outer.0.em.get()), + abs: self.0.em.get() * outer.0.abs + self.0.abs, + }) + } +} + +impl Resolve for TextSize { + type Output = Abs; + + fn resolve(self, styles: StyleChain) -> Self::Output { + self.0.resolve(styles) + } +} + +cast! { + TextSize, + self => self.0.into_value(), + v: Length => Self(v), +} + +/// Specifies the top edge of text. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub enum TopEdge { + /// An edge specified via font metrics or bounding box. + Metric(TopEdgeMetric), + /// An edge specified as a length. + Length(Length), +} + +cast! { + TopEdge, + self => match self { + Self::Metric(metric) => metric.into_value(), + Self::Length(length) => length.into_value(), + }, + v: TopEdgeMetric => Self::Metric(v), + v: Length => Self::Length(v), +} + +/// Metrics that describe the top edge of text. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)] +pub enum TopEdgeMetric { + /// The font's ascender, which typically exceeds the height of all glyphs. + Ascender, + /// The approximate height of uppercase letters. + CapHeight, + /// The approximate height of non-ascending lowercase letters. + XHeight, + /// The baseline on which the letters rest. + Baseline, + /// The top edge of the glyph's bounding box. + Bounds, +} + +impl TryInto<VerticalFontMetric> for TopEdgeMetric { + type Error = (); + + fn try_into(self) -> Result<VerticalFontMetric, Self::Error> { + match self { + Self::Ascender => Ok(VerticalFontMetric::Ascender), + Self::CapHeight => Ok(VerticalFontMetric::CapHeight), + Self::XHeight => Ok(VerticalFontMetric::XHeight), + Self::Baseline => Ok(VerticalFontMetric::Baseline), + _ => Err(()), + } + } +} + +/// Specifies the top edge of text. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub enum BottomEdge { + /// An edge specified via font metrics or bounding box. + Metric(BottomEdgeMetric), + /// An edge specified as a length. + Length(Length), +} + +cast! { + BottomEdge, + self => match self { + Self::Metric(metric) => metric.into_value(), + Self::Length(length) => length.into_value(), + }, + v: BottomEdgeMetric => Self::Metric(v), + v: Length => Self::Length(v), +} + +/// Metrics that describe the bottom edge of text. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)] +pub enum BottomEdgeMetric { + /// The baseline on which the letters rest. + Baseline, + /// The font's descender, which typically exceeds the depth of all glyphs. + Descender, + /// The bottom edge of the glyph's bounding box. + Bounds, +} + +impl TryInto<VerticalFontMetric> for BottomEdgeMetric { + type Error = (); + + fn try_into(self) -> Result<VerticalFontMetric, Self::Error> { + match self { + Self::Baseline => Ok(VerticalFontMetric::Baseline), + Self::Descender => Ok(VerticalFontMetric::Descender), + _ => Err(()), + } + } +} + +/// The direction of text and inline objects in their line. +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash)] +pub struct TextDir(pub Smart<Dir>); + +cast! { + TextDir, + self => self.0.into_value(), + v: Smart<Dir> => { + if v.is_custom_and(|dir| dir.axis() == Axis::Y) { + bail!("text direction must be horizontal"); + } + Self(v) + }, +} + +impl Resolve for TextDir { + type Output = Dir; + + fn resolve(self, styles: StyleChain) -> Self::Output { + match self.0 { + Smart::Auto => TextElem::lang_in(styles).dir(), + Smart::Custom(dir) => dir, + } + } +} + +/// Whether to hyphenate text. +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash)] +pub struct Hyphenate(pub Smart<bool>); + +cast! { + Hyphenate, + self => self.0.into_value(), + v: Smart<bool> => Self(v), +} + +impl Resolve for Hyphenate { + type Output = bool; + + fn resolve(self, styles: StyleChain) -> Self::Output { + match self.0 { + Smart::Auto => ParElem::justify_in(styles), + Smart::Custom(v) => v, + } + } +} + +/// A set of stylistic sets to enable. +#[derive(Debug, Copy, Clone, Default, Eq, PartialEq, Hash)] +pub struct StylisticSets(u32); + +impl StylisticSets { + /// Converts this set into a Typst array of values. + pub fn into_array(self) -> Array { + self.sets().map(IntoValue::into_value).collect() + } + + /// Returns whether this set contains a particular stylistic set. + pub fn has(self, ss: u8) -> bool { + self.0 & (1 << (ss as u32)) != 0 + } + + /// Returns an iterator over all stylistic sets to enable. + pub fn sets(self) -> impl Iterator<Item = u8> { + (1..=20).filter(move |i| self.has(*i)) + } +} + +cast! { + StylisticSets, + self => self.into_array().into_value(), + _: NoneValue => Self(0), + v: i64 => match v { + 1 ..= 20 => Self(1 << (v as u32)), + _ => bail!("stylistic set must be between 1 and 20"), + }, + v: Vec<i64> => { + let mut flags = 0; + for i in v { + match i { + 1 ..= 20 => flags |= 1 << (i as u32), + _ => bail!("stylistic set must be between 1 and 20"), + } + } + Self(flags) + }, +} + +/// Which kind of numbers / figures to select. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)] +pub enum NumberType { + /// Numbers that fit well with capital text (the OpenType `lnum` + /// font feature). + Lining, + /// Numbers that fit well into a flow of upper- and lowercase text (the + /// OpenType `onum` font feature). + OldStyle, +} + +/// The width of numbers / figures. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)] +pub enum NumberWidth { + /// Numbers with glyph-specific widths (the OpenType `pnum` font feature). + Proportional, + /// Numbers of equal width (the OpenType `tnum` font feature). + Tabular, +} + +/// OpenType font features settings. +#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)] +pub struct FontFeatures(pub Vec<(Tag, u32)>); + +cast! { + FontFeatures, + self => self.0 + .into_iter() + .map(|(tag, num)| { + let bytes = tag.to_bytes(); + let key = std::str::from_utf8(&bytes).unwrap_or_default(); + (key.into(), num.into_value()) + }) + .collect::<Dict>() + .into_value(), + values: Array => Self(values + .into_iter() + .map(|v| { + let tag = v.cast::<EcoString>()?; + Ok((Tag::from_bytes_lossy(tag.as_bytes()), 1)) + }) + .collect::<HintedStrResult<_>>()?), + values: Dict => Self(values + .into_iter() + .map(|(k, v)| { + let num = v.cast::<u32>()?; + let tag = Tag::from_bytes_lossy(k.as_bytes()); + Ok((tag, num)) + }) + .collect::<HintedStrResult<_>>()?), +} + +impl Fold for FontFeatures { + fn fold(self, outer: Self) -> Self { + Self(self.0.fold(outer.0)) + } +} + +/// Collect the OpenType features to apply. +pub fn features(styles: StyleChain) -> Vec<Feature> { + let mut tags = vec![]; + let mut feat = |tag: &[u8; 4], value: u32| { + tags.push(Feature::new(Tag::from_bytes(tag), value, ..)); + }; + + // Features that are on by default in Harfbuzz are only added if disabled. + if !TextElem::kerning_in(styles) { + feat(b"kern", 0); + } + + // Features that are off by default in Harfbuzz are only added if enabled. + if TextElem::smallcaps_in(styles) { + feat(b"smcp", 1); + } + + if TextElem::alternates_in(styles) { + feat(b"salt", 1); + } + + for set in TextElem::stylistic_set_in(styles).sets() { + let storage = [b's', b's', b'0' + set / 10, b'0' + set % 10]; + feat(&storage, 1); + } + + if !TextElem::ligatures_in(styles) { + feat(b"liga", 0); + feat(b"clig", 0); + } + + if TextElem::discretionary_ligatures_in(styles) { + feat(b"dlig", 1); + } + + if TextElem::historical_ligatures_in(styles) { + feat(b"hlig", 1); + } + + match TextElem::number_type_in(styles) { + Smart::Auto => {} + Smart::Custom(NumberType::Lining) => feat(b"lnum", 1), + Smart::Custom(NumberType::OldStyle) => feat(b"onum", 1), + } + + match TextElem::number_width_in(styles) { + Smart::Auto => {} + Smart::Custom(NumberWidth::Proportional) => feat(b"pnum", 1), + Smart::Custom(NumberWidth::Tabular) => feat(b"tnum", 1), + } + + if TextElem::slashed_zero_in(styles) { + feat(b"zero", 1); + } + + if TextElem::fractions_in(styles) { + feat(b"frac", 1); + } + + for (tag, value) in TextElem::features_in(styles).0 { + tags.push(Feature::new(tag, value, ..)) + } + + tags +} + +/// A toggle that turns on and off alternatingly if folded. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub struct ItalicToggle(pub bool); + +impl Fold for ItalicToggle { + fn fold(self, outer: Self) -> Self { + Self(self.0 ^ outer.0) + } +} + +/// A delta that is summed up when folded. +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash)] +pub struct WeightDelta(pub i64); + +impl Fold for WeightDelta { + fn fold(self, outer: Self) -> Self { + Self(outer.0 + self.0) + } +} + +/// Costs for various layout decisions. +/// +/// Costs are updated (prioritizing the later value) when folded. +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash)] +#[non_exhaustive] +pub struct Costs { + hyphenation: Option<Ratio>, + runt: Option<Ratio>, + widow: Option<Ratio>, + orphan: Option<Ratio>, +} + +impl Costs { + #[must_use] + pub fn hyphenation(&self) -> Ratio { + self.hyphenation.unwrap_or(Ratio::one()) + } + + #[must_use] + pub fn runt(&self) -> Ratio { + self.runt.unwrap_or(Ratio::one()) + } + + #[must_use] + pub fn widow(&self) -> Ratio { + self.widow.unwrap_or(Ratio::one()) + } + + #[must_use] + pub fn orphan(&self) -> Ratio { + self.orphan.unwrap_or(Ratio::one()) + } +} + +impl Fold for Costs { + #[inline] + fn fold(self, outer: Self) -> Self { + Self { + hyphenation: self.hyphenation.or(outer.hyphenation), + runt: self.runt.or(outer.runt), + widow: self.widow.or(outer.widow), + orphan: self.orphan.or(outer.orphan), + } + } +} + +cast! { + Costs, + self => dict![ + "hyphenation" => self.hyphenation(), + "runt" => self.runt(), + "widow" => self.widow(), + "orphan" => self.orphan(), + ].into_value(), + mut v: Dict => { + let ret = Self { + hyphenation: v.take("hyphenation").ok().map(|v| v.cast()).transpose()?, + runt: v.take("runt").ok().map(|v| v.cast()).transpose()?, + widow: v.take("widow").ok().map(|v| v.cast()).transpose()?, + orphan: v.take("orphan").ok().map(|v| v.cast()).transpose()?, + }; + v.finish(&["hyphenation", "runt", "widow", "orphan"])?; + ret + }, +} + +/// Whether a codepoint is Unicode `Default_Ignorable`. +pub fn is_default_ignorable(c: char) -> bool { + /// The set of Unicode default ignorables. + static DEFAULT_IGNORABLE_DATA: Lazy<CodePointSetData> = Lazy::new(|| { + icu_properties::sets::load_default_ignorable_code_point( + &BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU) + .unwrap() + .as_deserializing(), + ) + .unwrap() + }); + DEFAULT_IGNORABLE_DATA.as_borrowed().contains(c) +} + +/// Checks for font families that are not available. +fn check_font_list(engine: &mut Engine, list: &Spanned<FontList>) { + let book = engine.world.book(); + for family in &list.v { + let found = book.contains_family(family.as_str()); + if family.as_str() == "linux libertine" { + let mut warning = warning!( + list.span, + "Typst's default font has changed from Linux Libertine to its successor Libertinus Serif"; + hint: "please set the font to `\"Libertinus Serif\"` instead" + ); + + if found { + warning.hint( + "Linux Libertine is available on your system - \ + you can ignore this warning if you are sure you want to use it", + ); + warning.hint("this warning will be removed in Typst 0.13"); + } + + engine.sink.warn(warning); + } else if !found { + engine.sink.warn(warning!( + list.span, + "unknown font family: {}", + family.as_str(), + )); + } + } +} diff --git a/crates/typst-library/src/text/raw.rs b/crates/typst-library/src/text/raw.rs new file mode 100644 index 00000000..5ce77348 --- /dev/null +++ b/crates/typst-library/src/text/raw.rs @@ -0,0 +1,926 @@ +use std::hash::Hash; +use std::ops::Range; +use std::sync::Arc; + +use ecow::{eco_format, EcoString, EcoVec}; +use once_cell::sync::Lazy; +use once_cell::unsync::Lazy as UnsyncLazy; +use syntect::highlighting::{self as synt, Theme}; +use syntect::parsing::{SyntaxDefinition, SyntaxSet, SyntaxSetBuilder}; +use typst_syntax::{split_newlines, LinkedNode, Span, Spanned}; +use unicode_segmentation::UnicodeSegmentation; + +use super::Lang; +use crate::diag::{At, FileError, HintedStrResult, SourceResult, StrResult}; +use crate::engine::Engine; +use crate::foundations::{ + cast, elem, scope, Args, Array, Bytes, Content, Fold, NativeElement, Packed, + PlainText, Show, ShowSet, Smart, StyleChain, Styles, Synthesize, Value, +}; +use crate::layout::{BlockBody, BlockElem, Em, HAlignment}; +use crate::model::{Figurable, ParElem}; +use crate::text::{ + FontFamily, FontList, Hyphenate, LinebreakElem, LocalName, TextElem, TextSize, +}; +use crate::visualize::Color; +use crate::World; + +// Shorthand for highlighter closures. +type StyleFn<'a> = + &'a mut dyn FnMut(usize, &LinkedNode, Range<usize>, synt::Style) -> Content; +type LineFn<'a> = &'a mut dyn FnMut(usize, Range<usize>, &mut Vec<Content>); +type ThemeArgType = Smart<Option<EcoString>>; + +/// Raw text with optional syntax highlighting. +/// +/// Displays the text verbatim and in a monospace font. This is typically used +/// to embed computer code into your document. +/// +/// # Example +/// ````example +/// Adding `rbx` to `rcx` gives +/// the desired result. +/// +/// What is ```rust fn main()``` in Rust +/// would be ```c int main()``` in C. +/// +/// ```rust +/// fn main() { +/// println!("Hello World!"); +/// } +/// ``` +/// +/// This has ``` `backticks` ``` in it +/// (but the spaces are trimmed). And +/// ``` here``` the leading space is +/// also trimmed. +/// ```` +/// +/// You can also construct a [`raw`] element programmatically from a string (and +/// provide the language tag via the optional [`lang`]($raw.lang) argument). +/// ```example +/// #raw("fn " + "main() {}", lang: "rust") +/// ``` +/// +/// # Syntax +/// This function also has dedicated syntax. You can enclose text in 1 or 3+ +/// backticks (`` ` ``) to make it raw. Two backticks produce empty raw text. +/// This works both in markup and code. +/// +/// When you use three or more backticks, you can additionally specify a +/// language tag for syntax highlighting directly after the opening backticks. +/// Within raw blocks, everything (except for the language tag, if applicable) +/// is rendered as is, in particular, there are no escape sequences. +/// +/// The language tag is an identifier that directly follows the opening +/// backticks only if there are three or more backticks. If your text starts +/// with something that looks like an identifier, but no syntax highlighting is +/// needed, start the text with a single space (which will be trimmed) or use +/// the single backtick syntax. If your text should start or end with a +/// backtick, put a space before or after it (it will be trimmed). +#[elem( + scope, + title = "Raw Text / Code", + Synthesize, + Show, + ShowSet, + LocalName, + Figurable, + PlainText +)] +pub struct RawElem { + /// The raw text. + /// + /// You can also use raw blocks creatively to create custom syntaxes for + /// your automations. + /// + /// ````example + /// // Parse numbers in raw blocks with the + /// // `mydsl` tag and sum them up. + /// #show raw.where(lang: "mydsl"): it => { + /// let sum = 0 + /// for part in it.text.split("+") { + /// sum += int(part.trim()) + /// } + /// sum + /// } + /// + /// ```mydsl + /// 1 + 2 + 3 + 4 + 5 + /// ``` + /// ```` + #[required] + pub text: RawContent, + + /// Whether the raw text is displayed as a separate block. + /// + /// In markup mode, using one-backtick notation makes this `{false}`. + /// Using three-backtick notation makes it `{true}` if the enclosed content + /// contains at least one line break. + /// + /// ````example + /// // Display inline code in a small box + /// // that retains the correct baseline. + /// #show raw.where(block: false): box.with( + /// fill: luma(240), + /// inset: (x: 3pt, y: 0pt), + /// outset: (y: 3pt), + /// radius: 2pt, + /// ) + /// + /// // Display block code in a larger block + /// // with more padding. + /// #show raw.where(block: true): block.with( + /// fill: luma(240), + /// inset: 10pt, + /// radius: 4pt, + /// ) + /// + /// With `rg`, you can search through your files quickly. + /// This example searches the current directory recursively + /// for the text `Hello World`: + /// + /// ```bash + /// rg "Hello World" + /// ``` + /// ```` + #[default(false)] + pub block: bool, + + /// The language to syntax-highlight in. + /// + /// Apart from typical language tags known from Markdown, this supports the + /// `{"typ"}`, `{"typc"}`, and `{"typm"}` tags for + /// [Typst markup]($reference/syntax/#markup), + /// [Typst code]($reference/syntax/#code), and + /// [Typst math]($reference/syntax/#math), respectively. + /// + /// ````example + /// ```typ + /// This is *Typst!* + /// ``` + /// + /// This is ```typ also *Typst*```, but inline! + /// ```` + #[borrowed] + pub lang: Option<EcoString>, + + /// The horizontal alignment that each line in a raw block should have. + /// This option is ignored if this is not a raw block (if specified + /// `block: false` or single backticks were used in markup mode). + /// + /// By default, this is set to `{start}`, meaning that raw text is + /// aligned towards the start of the text direction inside the block + /// by default, regardless of the current context's alignment (allowing + /// you to center the raw block itself without centering the text inside + /// it, for example). + /// + /// ````example + /// #set raw(align: center) + /// + /// ```typc + /// let f(x) = x + /// code = "centered" + /// ``` + /// ```` + #[default(HAlignment::Start)] + pub align: HAlignment, + + /// One or multiple additional syntax definitions to load. The syntax + /// definitions should be in the + /// [`sublime-syntax` file format](https://www.sublimetext.com/docs/syntax.html). + /// + /// ````example + /// #set raw(syntaxes: "SExpressions.sublime-syntax") + /// + /// ```sexp + /// (defun factorial (x) + /// (if (zerop x) + /// ; with a comment + /// 1 + /// (* x (factorial (- x 1))))) + /// ``` + /// ```` + #[parse( + let (syntaxes, syntaxes_data) = parse_syntaxes(engine, args)?; + syntaxes + )] + #[fold] + pub syntaxes: SyntaxPaths, + + /// The raw file buffers of syntax definition files. + #[internal] + #[parse(syntaxes_data)] + #[fold] + pub syntaxes_data: Vec<Bytes>, + + /// The theme to use for syntax highlighting. Theme files should be in the + /// [`tmTheme` file format](https://www.sublimetext.com/docs/color_schemes_tmtheme.html). + /// + /// Applying a theme only affects the color of specifically highlighted + /// text. It does not consider the theme's foreground and background + /// properties, so that you retain control over the color of raw text. You + /// can apply the foreground color yourself with the [`text`] function and + /// the background with a [filled block]($block.fill). You could also use + /// the [`xml`] function to extract these properties from the theme. + /// + /// Additionally, you can set the theme to `{none}` to disable highlighting. + /// + /// ````example + /// #set raw(theme: "halcyon.tmTheme") + /// #show raw: it => block( + /// fill: rgb("#1d2433"), + /// inset: 8pt, + /// radius: 5pt, + /// text(fill: rgb("#a2aabc"), it) + /// ) + /// + /// ```typ + /// = Chapter 1 + /// #let hi = "Hello World" + /// ``` + /// ```` + #[parse( + let (theme_path, theme_data) = parse_theme(engine, args)?; + theme_path + )] + #[borrowed] + pub theme: ThemeArgType, + + /// The raw file buffer of syntax theme file. + #[internal] + #[parse(theme_data.map(Some))] + #[borrowed] + pub theme_data: Option<Bytes>, + + /// The size for a tab stop in spaces. A tab is replaced with enough spaces to + /// align with the next multiple of the size. + /// + /// ````example + /// #set raw(tab-size: 8) + /// ```tsv + /// Year Month Day + /// 2000 2 3 + /// 2001 2 1 + /// 2002 3 10 + /// ``` + /// ```` + #[default(2)] + pub tab_size: usize, + + /// The stylized lines of raw text. + /// + /// Made accessible for the [`raw.line` element]($raw.line). + /// Allows more styling control in `show` rules. + #[synthesized] + pub lines: Vec<Packed<RawLine>>, +} + +#[scope] +impl RawElem { + #[elem] + type RawLine; +} + +impl RawElem { + /// The supported language names and tags. + pub fn languages() -> Vec<(&'static str, Vec<&'static str>)> { + RAW_SYNTAXES + .syntaxes() + .iter() + .map(|syntax| { + ( + syntax.name.as_str(), + syntax.file_extensions.iter().map(|s| s.as_str()).collect(), + ) + }) + .chain([ + ("Typst", vec!["typ"]), + ("Typst (code)", vec!["typc"]), + ("Typst (math)", vec!["typm"]), + ]) + .collect() + } +} + +impl Synthesize for Packed<RawElem> { + fn synthesize(&mut self, _: &mut Engine, styles: StyleChain) -> SourceResult<()> { + let seq = self.highlight(styles); + self.push_lines(seq); + Ok(()) + } +} + +impl Packed<RawElem> { + #[comemo::memoize] + fn highlight(&self, styles: StyleChain) -> Vec<Packed<RawLine>> { + let elem = self.as_ref(); + let lines = preprocess(elem.text(), styles, self.span()); + + let count = lines.len() as i64; + let lang = elem + .lang(styles) + .as_ref() + .as_ref() + .map(|s| s.to_lowercase()) + .or(Some("txt".into())); + + let extra_syntaxes = UnsyncLazy::new(|| { + load_syntaxes(&elem.syntaxes(styles), &elem.syntaxes_data(styles)).unwrap() + }); + let non_highlighted_result = |lines: EcoVec<(EcoString, Span)>| { + lines.into_iter().enumerate().map(|(i, (line, line_span))| { + Packed::new(RawLine::new( + i as i64 + 1, + count, + line.clone(), + TextElem::packed(line).spanned(line_span), + )) + .spanned(line_span) + }) + }; + + let theme = elem.theme(styles).as_ref().as_ref().map(|theme_path| { + theme_path.as_ref().map(|path| { + load_theme(path, elem.theme_data(styles).as_ref().as_ref().unwrap()) + .unwrap() + }) + }); + let theme: &Theme = match theme { + Smart::Auto => &RAW_THEME, + Smart::Custom(Some(ref theme)) => theme, + Smart::Custom(None) => return non_highlighted_result(lines).collect(), + }; + let foreground = theme.settings.foreground.unwrap_or(synt::Color::BLACK); + + let mut seq = vec![]; + if matches!(lang.as_deref(), Some("typ" | "typst" | "typc" | "typm")) { + let text = + lines.iter().map(|(s, _)| s.clone()).collect::<Vec<_>>().join("\n"); + let root = match lang.as_deref() { + Some("typc") => typst_syntax::parse_code(&text), + Some("typm") => typst_syntax::parse_math(&text), + _ => typst_syntax::parse(&text), + }; + + ThemedHighlighter::new( + &text, + LinkedNode::new(&root), + synt::Highlighter::new(theme), + &mut |i, _, range, style| { + // Find span and start of line. + // Note: Dedent is already applied to the text + let span = lines.get(i).map_or_else(Span::detached, |l| l.1); + let span_offset = text[..range.start] + .rfind('\n') + .map_or(0, |i| range.start - (i + 1)); + styled(&text[range], foreground, style, span, span_offset) + }, + &mut |i, range, line| { + let span = lines.get(i).map_or_else(Span::detached, |l| l.1); + seq.push( + Packed::new(RawLine::new( + (i + 1) as i64, + count, + EcoString::from(&text[range]), + Content::sequence(line.drain(..)), + )) + .spanned(span), + ); + }, + ) + .highlight(); + } else if let Some((syntax_set, syntax)) = lang.and_then(|token| { + RAW_SYNTAXES + .find_syntax_by_token(&token) + .map(|syntax| (&*RAW_SYNTAXES, syntax)) + .or_else(|| { + extra_syntaxes + .find_syntax_by_token(&token) + .map(|syntax| (&**extra_syntaxes, syntax)) + }) + }) { + let mut highlighter = syntect::easy::HighlightLines::new(syntax, theme); + for (i, (line, line_span)) in lines.into_iter().enumerate() { + let mut line_content = vec![]; + let mut span_offset = 0; + for (style, piece) in highlighter + .highlight_line(line.as_str(), syntax_set) + .into_iter() + .flatten() + { + line_content.push(styled( + piece, + foreground, + style, + line_span, + span_offset, + )); + span_offset += piece.len(); + } + + seq.push( + Packed::new(RawLine::new( + i as i64 + 1, + count, + line, + Content::sequence(line_content), + )) + .spanned(line_span), + ); + } + } else { + seq.extend(non_highlighted_result(lines)); + }; + + seq + } +} + +impl Show for Packed<RawElem> { + #[typst_macros::time(name = "raw", span = self.span())] + fn show(&self, _: &mut Engine, styles: StyleChain) -> SourceResult<Content> { + let lines = self.lines().map(|v| v.as_slice()).unwrap_or_default(); + + let mut seq = EcoVec::with_capacity((2 * lines.len()).saturating_sub(1)); + for (i, line) in lines.iter().enumerate() { + if i != 0 { + seq.push(LinebreakElem::shared().clone()); + } + + seq.push(line.clone().pack()); + } + + let mut realized = Content::sequence(seq); + if self.block(styles) { + // Align the text before inserting it into the block. + realized = realized.aligned(self.align(styles).into()); + realized = BlockElem::new() + .with_body(Some(BlockBody::Content(realized))) + .pack() + .spanned(self.span()); + } + + Ok(realized) + } +} + +impl ShowSet for Packed<RawElem> { + fn show_set(&self, styles: StyleChain) -> Styles { + let mut out = Styles::new(); + out.set(TextElem::set_overhang(false)); + out.set(TextElem::set_lang(Lang::ENGLISH)); + out.set(TextElem::set_hyphenate(Hyphenate(Smart::Custom(false)))); + out.set(TextElem::set_size(TextSize(Em::new(0.8).into()))); + out.set(TextElem::set_font(FontList(vec![FontFamily::new("DejaVu Sans Mono")]))); + if self.block(styles) { + out.set(ParElem::set_shrink(false)); + } + out + } +} + +impl LocalName for Packed<RawElem> { + const KEY: &'static str = "raw"; +} + +impl Figurable for Packed<RawElem> {} + +impl PlainText for Packed<RawElem> { + fn plain_text(&self, text: &mut EcoString) { + text.push_str(&self.text().get()); + } +} + +/// The content of the raw text. +#[derive(Debug, Clone, Hash, PartialEq)] +pub enum RawContent { + /// From a string. + Text(EcoString), + /// From lines of text. + Lines(EcoVec<(EcoString, Span)>), +} + +impl RawContent { + /// Returns or synthesizes the text content of the raw text. + fn get(&self) -> EcoString { + match self.clone() { + RawContent::Text(text) => text, + RawContent::Lines(lines) => { + let mut lines = lines.into_iter().map(|(s, _)| s); + if lines.len() <= 1 { + lines.next().unwrap_or_default() + } else { + lines.collect::<Vec<_>>().join("\n").into() + } + } + } + } +} + +cast! { + RawContent, + self => self.get().into_value(), + v: EcoString => Self::Text(v), +} + +/// A highlighted line of raw text. +/// +/// This is a helper element that is synthesized by [`raw`] elements. +/// +/// It allows you to access various properties of the line, such as the line +/// number, the raw non-highlighted text, the highlighted text, and whether it +/// is the first or last line of the raw block. +#[elem(name = "line", title = "Raw Text / Code Line", Show, PlainText)] +pub struct RawLine { + /// The line number of the raw line inside of the raw block, starts at 1. + #[required] + pub number: i64, + + /// The total number of lines in the raw block. + #[required] + pub count: i64, + + /// The line of raw text. + #[required] + pub text: EcoString, + + /// The highlighted raw text. + #[required] + pub body: Content, +} + +impl Show for Packed<RawLine> { + #[typst_macros::time(name = "raw.line", span = self.span())] + fn show(&self, _: &mut Engine, _styles: StyleChain) -> SourceResult<Content> { + Ok(self.body().clone()) + } +} + +impl PlainText for Packed<RawLine> { + fn plain_text(&self, text: &mut EcoString) { + text.push_str(self.text()); + } +} + +/// Wrapper struct for the state required to highlight typst code. +struct ThemedHighlighter<'a> { + /// The code being highlighted. + code: &'a str, + /// The current node being highlighted. + node: LinkedNode<'a>, + /// The highlighter. + highlighter: synt::Highlighter<'a>, + /// The current scopes. + scopes: Vec<syntect::parsing::Scope>, + /// The current highlighted line. + current_line: Vec<Content>, + /// The range of the current line. + range: Range<usize>, + /// The current line number. + line: usize, + /// The function to style a piece of text. + style_fn: StyleFn<'a>, + /// The function to append a line. + line_fn: LineFn<'a>, +} + +impl<'a> ThemedHighlighter<'a> { + pub fn new( + code: &'a str, + top: LinkedNode<'a>, + highlighter: synt::Highlighter<'a>, + style_fn: StyleFn<'a>, + line_fn: LineFn<'a>, + ) -> Self { + Self { + code, + node: top, + highlighter, + range: 0..0, + scopes: Vec::new(), + current_line: Vec::new(), + line: 0, + style_fn, + line_fn, + } + } + + pub fn highlight(&mut self) { + self.highlight_inner(); + + if !self.current_line.is_empty() { + (self.line_fn)( + self.line, + self.range.start..self.code.len(), + &mut self.current_line, + ); + + self.current_line.clear(); + } + } + + fn highlight_inner(&mut self) { + if self.node.children().len() == 0 { + let style = self.highlighter.style_for_stack(&self.scopes); + let segment = &self.code[self.node.range()]; + + let mut len = 0; + for (i, line) in split_newlines(segment).into_iter().enumerate() { + if i != 0 { + (self.line_fn)( + self.line, + self.range.start..self.range.end + len - 1, + &mut self.current_line, + ); + self.range.start = self.range.end + len; + self.line += 1; + } + + let offset = self.node.range().start + len; + let token_range = offset..(offset + line.len()); + self.current_line.push((self.style_fn)( + self.line, + &self.node, + token_range, + style, + )); + + len += line.len() + 1; + } + + self.range.end += segment.len(); + } + + for child in self.node.children() { + let mut scopes = self.scopes.clone(); + if let Some(tag) = typst_syntax::highlight(&child) { + scopes.push(syntect::parsing::Scope::new(tag.tm_scope()).unwrap()) + } + + std::mem::swap(&mut scopes, &mut self.scopes); + self.node = child; + self.highlight_inner(); + std::mem::swap(&mut scopes, &mut self.scopes); + } + } +} + +fn preprocess( + text: &RawContent, + styles: StyleChain, + span: Span, +) -> EcoVec<(EcoString, Span)> { + if let RawContent::Lines(lines) = text { + if lines.iter().all(|(s, _)| !s.contains('\t')) { + return lines.clone(); + } + } + + let mut text = text.get(); + if text.contains('\t') { + let tab_size = RawElem::tab_size_in(styles); + text = align_tabs(&text, tab_size); + } + split_newlines(&text) + .into_iter() + .map(|line| (line.into(), span)) + .collect() +} + +/// Style a piece of text with a syntect style. +fn styled( + piece: &str, + foreground: synt::Color, + style: synt::Style, + span: Span, + span_offset: usize, +) -> Content { + let mut body = TextElem::packed(piece).spanned(span); + + if span_offset > 0 { + body = body.styled(TextElem::set_span_offset(span_offset)); + } + + if style.foreground != foreground { + body = body.styled(TextElem::set_fill(to_typst(style.foreground).into())); + } + + if style.font_style.contains(synt::FontStyle::BOLD) { + body = body.strong().spanned(span); + } + + if style.font_style.contains(synt::FontStyle::ITALIC) { + body = body.emph().spanned(span); + } + + if style.font_style.contains(synt::FontStyle::UNDERLINE) { + body = body.underlined().spanned(span); + } + + body +} + +fn to_typst(synt::Color { r, g, b, a }: synt::Color) -> Color { + Color::from_u8(r, g, b, a) +} + +fn to_syn(color: Color) -> synt::Color { + let [r, g, b, a] = color.to_rgb().to_vec4_u8(); + synt::Color { r, g, b, a } +} + +/// A list of raw syntax file paths. +#[derive(Debug, Default, Clone, PartialEq, Hash)] +pub struct SyntaxPaths(Vec<EcoString>); + +cast! { + SyntaxPaths, + self => self.0.into_value(), + v: EcoString => Self(vec![v]), + v: Array => Self(v.into_iter().map(Value::cast).collect::<HintedStrResult<_>>()?), +} + +impl Fold for SyntaxPaths { + fn fold(self, outer: Self) -> Self { + Self(self.0.fold(outer.0)) + } +} + +/// Load a syntax set from a list of syntax file paths. +#[comemo::memoize] +#[typst_macros::time(name = "load syntaxes")] +fn load_syntaxes(paths: &SyntaxPaths, bytes: &[Bytes]) -> StrResult<Arc<SyntaxSet>> { + let mut out = SyntaxSetBuilder::new(); + + // We might have multiple sublime-syntax/yaml files + for (path, bytes) in paths.0.iter().zip(bytes.iter()) { + let src = std::str::from_utf8(bytes).map_err(FileError::from)?; + out.add(SyntaxDefinition::load_from_str(src, false, None).map_err(|err| { + eco_format!("failed to parse syntax file `{path}` ({err})") + })?); + } + + Ok(Arc::new(out.build())) +} + +/// Function to parse the syntaxes argument. +/// Much nicer than having it be part of the `element` macro. +fn parse_syntaxes( + engine: &mut Engine, + args: &mut Args, +) -> SourceResult<(Option<SyntaxPaths>, Option<Vec<Bytes>>)> { + let Some(Spanned { v: paths, span }) = + args.named::<Spanned<SyntaxPaths>>("syntaxes")? + else { + return Ok((None, None)); + }; + + // Load syntax files. + let data = paths + .0 + .iter() + .map(|path| { + let id = span.resolve_path(path).at(span)?; + engine.world.file(id).at(span) + }) + .collect::<SourceResult<Vec<Bytes>>>()?; + + // Check that parsing works. + let _ = load_syntaxes(&paths, &data).at(span)?; + + Ok((Some(paths), Some(data))) +} + +#[comemo::memoize] +#[typst_macros::time(name = "load theme")] +fn load_theme(path: &str, bytes: &Bytes) -> StrResult<Arc<synt::Theme>> { + let mut cursor = std::io::Cursor::new(bytes.as_slice()); + + synt::ThemeSet::load_from_reader(&mut cursor) + .map(Arc::new) + .map_err(|err| eco_format!("failed to parse theme file `{path}` ({err})")) +} + +/// Function to parse the theme argument. +/// Much nicer than having it be part of the `element` macro. +fn parse_theme( + engine: &mut Engine, + args: &mut Args, +) -> SourceResult<(Option<ThemeArgType>, Option<Bytes>)> { + let Some(Spanned { v: path, span }) = args.named::<Spanned<ThemeArgType>>("theme")? + else { + // Argument `theme` not found. + return Ok((None, None)); + }; + + let Smart::Custom(path) = path else { + // Argument `theme` is `auto`. + return Ok((Some(Smart::Auto), None)); + }; + + let Some(path) = path else { + // Argument `theme` is `none`. + return Ok((Some(Smart::Custom(None)), None)); + }; + + // Load theme file. + let id = span.resolve_path(&path).at(span)?; + let data = engine.world.file(id).at(span)?; + + // Check that parsing works. + let _ = load_theme(&path, &data).at(span)?; + + Ok((Some(Smart::Custom(Some(path))), Some(data))) +} + +/// The syntect syntax definitions. +/// +/// Syntax set is generated from the syntaxes from the `bat` project +/// <https://github.com/sharkdp/bat/tree/master/assets/syntaxes> +pub static RAW_SYNTAXES: Lazy<syntect::parsing::SyntaxSet> = + Lazy::new(two_face::syntax::extra_no_newlines); + +/// The default theme used for syntax highlighting. +pub static RAW_THEME: Lazy<synt::Theme> = Lazy::new(|| synt::Theme { + name: Some("Typst Light".into()), + author: Some("The Typst Project Developers".into()), + settings: synt::ThemeSettings::default(), + scopes: vec![ + item("comment", Some("#8a8a8a"), None), + item("constant.character.escape", Some("#1d6c76"), None), + item("markup.bold", None, Some(synt::FontStyle::BOLD)), + item("markup.italic", None, Some(synt::FontStyle::ITALIC)), + item("markup.underline", None, Some(synt::FontStyle::UNDERLINE)), + item("markup.raw", Some("#818181"), None), + item("string.other.math.typst", None, None), + item("punctuation.definition.math", Some("#298e0d"), None), + item("keyword.operator.math", Some("#1d6c76"), None), + item("markup.heading, entity.name.section", None, Some(synt::FontStyle::BOLD)), + item( + "markup.heading.typst", + None, + Some(synt::FontStyle::BOLD | synt::FontStyle::UNDERLINE), + ), + item("punctuation.definition.list", Some("#8b41b1"), None), + item("markup.list.term", None, Some(synt::FontStyle::BOLD)), + item("entity.name.label, markup.other.reference", Some("#1d6c76"), None), + item("keyword, constant.language, variable.language", Some("#d73a49"), None), + item("storage.type, storage.modifier", Some("#d73a49"), None), + item("constant", Some("#b60157"), None), + item("string", Some("#298e0d"), None), + item("entity.name, variable.function, support", Some("#4b69c6"), None), + item("support.macro", Some("#16718d"), None), + item("meta.annotation", Some("#301414"), None), + item("entity.other, meta.interpolation", Some("#8b41b1"), None), + item("meta.diff.range", Some("#8b41b1"), None), + item("markup.inserted, meta.diff.header.to-file", Some("#298e0d"), None), + item("markup.deleted, meta.diff.header.from-file", Some("#d73a49"), None), + ], +}); + +/// Create a syntect theme item. +fn item( + scope: &str, + color: Option<&str>, + font_style: Option<synt::FontStyle>, +) -> synt::ThemeItem { + synt::ThemeItem { + scope: scope.parse().unwrap(), + style: synt::StyleModifier { + foreground: color.map(|s| to_syn(s.parse::<Color>().unwrap())), + background: None, + font_style, + }, + } +} + +/// Replace tabs with spaces to align with multiples of `tab_size`. +fn align_tabs(text: &str, tab_size: usize) -> EcoString { + let replacement = " ".repeat(tab_size); + let divisor = tab_size.max(1); + let amount = text.chars().filter(|&c| c == '\t').count(); + + let mut res = EcoString::with_capacity(text.len() - amount + amount * tab_size); + let mut column = 0; + + for grapheme in text.graphemes(true) { + match grapheme { + "\t" => { + let required = tab_size - column % divisor; + res.push_str(&replacement[..required]); + column += required; + } + "\n" => { + res.push_str(grapheme); + column = 0; + } + _ => { + res.push_str(grapheme); + column += 1; + } + } + } + + res +} diff --git a/crates/typst-library/src/text/shift.rs b/crates/typst-library/src/text/shift.rs new file mode 100644 index 00000000..003ecf47 --- /dev/null +++ b/crates/typst-library/src/text/shift.rs @@ -0,0 +1,210 @@ +use ecow::EcoString; + +use crate::diag::SourceResult; +use crate::engine::Engine; +use crate::foundations::{elem, Content, Packed, SequenceElem, Show, StyleChain}; +use crate::layout::{Em, Length}; +use crate::text::{variant, SpaceElem, TextElem, TextSize}; +use crate::World; + +/// Renders text in subscript. +/// +/// The text is rendered smaller and its baseline is lowered. +/// +/// # Example +/// ```example +/// Revenue#sub[yearly] +/// ``` +#[elem(title = "Subscript", Show)] +pub struct SubElem { + /// Whether to prefer the dedicated subscript characters of the font. + /// + /// If this is enabled, Typst first tries to transform the text to subscript + /// codepoints. If that fails, it falls back to rendering lowered and shrunk + /// normal letters. + /// + /// ```example + /// N#sub(typographic: true)[1] + /// N#sub(typographic: false)[1] + /// ``` + #[default(true)] + pub typographic: bool, + + /// The baseline shift for synthetic subscripts. Does not apply if + /// `typographic` is true and the font has subscript codepoints for the + /// given `body`. + #[default(Em::new(0.2).into())] + pub baseline: Length, + + /// The font size for synthetic subscripts. Does not apply if + /// `typographic` is true and the font has subscript codepoints for the + /// given `body`. + #[default(TextSize(Em::new(0.6).into()))] + pub size: TextSize, + + /// The text to display in subscript. + #[required] + pub body: Content, +} + +impl Show for Packed<SubElem> { + #[typst_macros::time(name = "sub", span = self.span())] + fn show(&self, engine: &mut Engine, styles: StyleChain) -> SourceResult<Content> { + let body = self.body().clone(); + + if self.typographic(styles) { + if let Some(text) = convert_script(&body, true) { + if is_shapable(engine, &text, styles) { + return Ok(TextElem::packed(text)); + } + } + }; + + Ok(body + .styled(TextElem::set_baseline(self.baseline(styles))) + .styled(TextElem::set_size(self.size(styles)))) + } +} + +/// Renders text in superscript. +/// +/// The text is rendered smaller and its baseline is raised. +/// +/// # Example +/// ```example +/// 1#super[st] try! +/// ``` +#[elem(title = "Superscript", Show)] +pub struct SuperElem { + /// Whether to prefer the dedicated superscript characters of the font. + /// + /// If this is enabled, Typst first tries to transform the text to + /// superscript codepoints. If that fails, it falls back to rendering + /// raised and shrunk normal letters. + /// + /// ```example + /// N#super(typographic: true)[1] + /// N#super(typographic: false)[1] + /// ``` + #[default(true)] + pub typographic: bool, + + /// The baseline shift for synthetic superscripts. Does not apply if + /// `typographic` is true and the font has superscript codepoints for the + /// given `body`. + #[default(Em::new(-0.5).into())] + pub baseline: Length, + + /// The font size for synthetic superscripts. Does not apply if + /// `typographic` is true and the font has superscript codepoints for the + /// given `body`. + #[default(TextSize(Em::new(0.6).into()))] + pub size: TextSize, + + /// The text to display in superscript. + #[required] + pub body: Content, +} + +impl Show for Packed<SuperElem> { + #[typst_macros::time(name = "super", span = self.span())] + fn show(&self, engine: &mut Engine, styles: StyleChain) -> SourceResult<Content> { + let body = self.body().clone(); + + if self.typographic(styles) { + if let Some(text) = convert_script(&body, false) { + if is_shapable(engine, &text, styles) { + return Ok(TextElem::packed(text)); + } + } + }; + + Ok(body + .styled(TextElem::set_baseline(self.baseline(styles))) + .styled(TextElem::set_size(self.size(styles)))) + } +} + +/// Find and transform the text contained in `content` to the given script kind +/// if and only if it only consists of `Text`, `Space`, and `Empty` leaves. +fn convert_script(content: &Content, sub: bool) -> Option<EcoString> { + if content.is::<SpaceElem>() { + Some(' '.into()) + } else if let Some(elem) = content.to_packed::<TextElem>() { + if sub { + elem.text().chars().map(to_subscript_codepoint).collect() + } else { + elem.text().chars().map(to_superscript_codepoint).collect() + } + } else if let Some(sequence) = content.to_packed::<SequenceElem>() { + sequence + .children + .iter() + .map(|item| convert_script(item, sub)) + .collect() + } else { + None + } +} + +/// Checks whether the first retrievable family contains all code points of the +/// given string. +fn is_shapable(engine: &Engine, text: &str, styles: StyleChain) -> bool { + let world = engine.world; + for family in TextElem::font_in(styles) { + if let Some(font) = world + .book() + .select(family.as_str(), variant(styles)) + .and_then(|id| world.font(id)) + { + return text.chars().all(|c| font.ttf().glyph_index(c).is_some()); + } + } + + false +} + +/// Convert a character to its corresponding Unicode superscript. +fn to_superscript_codepoint(c: char) -> Option<char> { + match c { + '1' => Some('¹'), + '2' => Some('²'), + '3' => Some('³'), + '0' | '4'..='9' => char::from_u32(c as u32 - '0' as u32 + '⁰' as u32), + '+' => Some('⁺'), + '−' => Some('⁻'), + '=' => Some('⁼'), + '(' => Some('⁽'), + ')' => Some('⁾'), + 'n' => Some('ⁿ'), + 'i' => Some('ⁱ'), + ' ' => Some(' '), + _ => None, + } +} + +/// Convert a character to its corresponding Unicode subscript. +fn to_subscript_codepoint(c: char) -> Option<char> { + match c { + '0'..='9' => char::from_u32(c as u32 - '0' as u32 + '₀' as u32), + '+' => Some('₊'), + '−' => Some('₋'), + '=' => Some('₌'), + '(' => Some('₍'), + ')' => Some('₎'), + 'a' => Some('ₐ'), + 'e' => Some('ₑ'), + 'o' => Some('ₒ'), + 'x' => Some('ₓ'), + 'h' => Some('ₕ'), + 'k' => Some('ₖ'), + 'l' => Some('ₗ'), + 'm' => Some('ₘ'), + 'n' => Some('ₙ'), + 'p' => Some('ₚ'), + 's' => Some('ₛ'), + 't' => Some('ₜ'), + ' ' => Some(' '), + _ => None, + } +} diff --git a/crates/typst-library/src/text/smallcaps.rs b/crates/typst-library/src/text/smallcaps.rs new file mode 100644 index 00000000..bf003bd1 --- /dev/null +++ b/crates/typst-library/src/text/smallcaps.rs @@ -0,0 +1,58 @@ +use crate::diag::SourceResult; +use crate::engine::Engine; +use crate::foundations::{elem, Content, Packed, Show, StyleChain}; +use crate::text::TextElem; + +/// Displays text in small capitals. +/// +/// # Example +/// ```example +/// Hello \ +/// #smallcaps[Hello] +/// ``` +/// +/// # Smallcaps fonts +/// By default, this enables the OpenType `smcp` feature for the font. Not all +/// fonts support this feature. Sometimes smallcaps are part of a dedicated +/// font. This is, for example, the case for the _Latin Modern_ family of fonts. +/// In those cases, you can use a show-set rule to customize the appearance of +/// the text in smallcaps: +/// +/// ```typ +/// #show smallcaps: set text(font: "Latin Modern Roman Caps") +/// ``` +/// +/// In the future, this function will support synthesizing smallcaps from normal +/// letters, but this is not yet implemented. +/// +/// # Smallcaps headings +/// You can use a [show rule]($styling/#show-rules) to apply smallcaps +/// formatting to all your headings. In the example below, we also center-align +/// our headings and disable the standard bold font. +/// +/// ```example +/// #set par(justify: true) +/// #set heading(numbering: "I.") +/// +/// #show heading: smallcaps +/// #show heading: set align(center) +/// #show heading: set text( +/// weight: "regular" +/// ) +/// +/// = Introduction +/// #lorem(40) +/// ``` +#[elem(title = "Small Capitals", Show)] +pub struct SmallcapsElem { + /// The content to display in small capitals. + #[required] + pub body: Content, +} + +impl Show for Packed<SmallcapsElem> { + #[typst_macros::time(name = "smallcaps", span = self.span())] + fn show(&self, _: &mut Engine, _: StyleChain) -> SourceResult<Content> { + Ok(self.body().clone().styled(TextElem::set_smallcaps(true))) + } +} diff --git a/crates/typst-library/src/text/smartquote.rs b/crates/typst-library/src/text/smartquote.rs new file mode 100644 index 00000000..5917550d --- /dev/null +++ b/crates/typst-library/src/text/smartquote.rs @@ -0,0 +1,387 @@ +use ecow::EcoString; +use typst_syntax::is_newline; +use unicode_segmentation::UnicodeSegmentation; + +use crate::diag::{bail, HintedStrResult, StrResult}; +use crate::foundations::{ + array, cast, dict, elem, Array, Dict, FromValue, Packed, PlainText, Smart, Str, +}; +use crate::layout::Dir; +use crate::text::{Lang, Region}; + +/// A language-aware quote that reacts to its context. +/// +/// Automatically turns into an appropriate opening or closing quote based on +/// the active [text language]($text.lang). +/// +/// # Example +/// ```example +/// "This is in quotes." +/// +/// #set text(lang: "de") +/// "Das ist in Anführungszeichen." +/// +/// #set text(lang: "fr") +/// "C'est entre guillemets." +/// ``` +/// +/// # Syntax +/// This function also has dedicated syntax: The normal quote characters +/// (`'` and `"`). Typst automatically makes your quotes smart. +#[elem(name = "smartquote", PlainText)] +pub struct SmartQuoteElem { + /// Whether this should be a double quote. + #[default(true)] + pub double: bool, + + /// Whether smart quotes are enabled. + /// + /// To disable smartness for a single quote, you can also escape it with a + /// backslash. + /// + /// ```example + /// #set smartquote(enabled: false) + /// + /// These are "dumb" quotes. + /// ``` + #[default(true)] + pub enabled: bool, + + /// Whether to use alternative quotes. + /// + /// Does nothing for languages that don't have alternative quotes, or if + /// explicit quotes were set. + /// + /// ```example + /// #set text(lang: "de") + /// #set smartquote(alternative: true) + /// + /// "Das ist in anderen Anführungszeichen." + /// ``` + #[default(false)] + pub alternative: bool, + + /// The quotes to use. + /// + /// - When set to `{auto}`, the appropriate single quotes for the + /// [text language]($text.lang) will be used. This is the default. + /// - Custom quotes can be passed as a string, array, or dictionary of either + /// - [string]($str): a string consisting of two characters containing the + /// opening and closing double quotes (characters here refer to Unicode + /// grapheme clusters) + /// - [array]: an array containing the opening and closing double quotes + /// - [dictionary]: an array containing the double and single quotes, each + /// specified as either `{auto}`, string, or array + /// + /// ```example + /// #set text(lang: "de") + /// 'Das sind normale Anführungszeichen.' + /// + /// #set smartquote(quotes: "()") + /// "Das sind eigene Anführungszeichen." + /// + /// #set smartquote(quotes: (single: ("[[", "]]"), double: auto)) + /// 'Das sind eigene Anführungszeichen.' + /// ``` + #[borrowed] + pub quotes: Smart<SmartQuoteDict>, +} + +impl PlainText for Packed<SmartQuoteElem> { + fn plain_text(&self, text: &mut EcoString) { + if self.double.unwrap_or(true) { + text.push_str("\""); + } else { + text.push_str("'"); + } + } +} + +/// A smart quote substitutor with zero lookahead. +#[derive(Debug, Clone)] +pub struct SmartQuoter { + /// The amount of quotes that have been opened. + depth: u8, + /// Each bit indicates whether the quote at this nesting depth is a double. + /// Maximum supported depth is thus 32. + kinds: u32, +} + +impl SmartQuoter { + /// Start quoting. + pub fn new() -> Self { + Self { depth: 0, kinds: 0 } + } + + /// Determine which smart quote to substitute given this quoter's nesting + /// state and the character immediately preceding the quote. + pub fn quote<'a>( + &mut self, + before: Option<char>, + quotes: &SmartQuotes<'a>, + double: bool, + ) -> &'a str { + let opened = self.top(); + let before = before.unwrap_or(' '); + + // If we are after a number and haven't most recently opened a quote of + // this kind, produce a prime. Otherwise, we prefer a closing quote. + if before.is_numeric() && opened != Some(double) { + return if double { "″" } else { "′" }; + } + + // If we have a single smart quote, didn't recently open a single + // quotation, and are after an alphabetic char or an object (e.g. a + // math equation), interpret this as an apostrophe. + if !double + && opened != Some(false) + && (before.is_alphabetic() || before == '\u{FFFC}') + { + return "’"; + } + + // If the most recently opened quotation is of this kind and the + // previous char does not indicate a nested quotation, close it. + if opened == Some(double) + && !before.is_whitespace() + && !is_newline(before) + && !is_opening_bracket(before) + { + self.pop(); + return quotes.close(double); + } + + // Otherwise, open a new the quotation. + self.push(double); + quotes.open(double) + } + + /// The top of our quotation stack. Returns `Some(double)` for the most + /// recently opened quote or `None` if we didn't open one. + fn top(&self) -> Option<bool> { + self.depth.checked_sub(1).map(|i| (self.kinds >> i) & 1 == 1) + } + + /// Push onto the quotation stack. + fn push(&mut self, double: bool) { + if self.depth < 32 { + self.kinds |= (double as u32) << self.depth; + self.depth += 1; + } + } + + /// Pop from the quotation stack. + fn pop(&mut self) { + self.depth -= 1; + self.kinds &= (1 << self.depth) - 1; + } +} + +impl Default for SmartQuoter { + fn default() -> Self { + Self::new() + } +} + +/// Whether the character is an opening bracket, parenthesis, or brace. +fn is_opening_bracket(c: char) -> bool { + matches!(c, '(' | '{' | '[') +} + +/// Decides which quotes to substitute smart quotes with. +pub struct SmartQuotes<'s> { + /// The opening single quote. + pub single_open: &'s str, + /// The closing single quote. + pub single_close: &'s str, + /// The opening double quote. + pub double_open: &'s str, + /// The closing double quote. + pub double_close: &'s str, +} + +impl<'s> SmartQuotes<'s> { + /// Create a new `Quotes` struct with the given quotes, optionally falling + /// back to the defaults for a language and region. + /// + /// The language should be specified as an all-lowercase ISO 639-1 code, the + /// region as an all-uppercase ISO 3166-alpha2 code. + /// + /// Currently, the supported languages are: English, Czech, Danish, German, + /// Swiss / Liechtensteinian German, Estonian, Icelandic, Italian, Latin, + /// Lithuanian, Latvian, Slovak, Slovenian, Spanish, Bosnian, Finnish, + /// Swedish, French, Swiss French, Hungarian, Polish, Romanian, Japanese, + /// Traditional Chinese, Russian, Norwegian, and Hebrew. + /// + /// For unknown languages, the English quotes are used as fallback. + pub fn get( + quotes: &'s Smart<SmartQuoteDict>, + lang: Lang, + region: Option<Region>, + alternative: bool, + ) -> Self { + let region = region.as_ref().map(Region::as_str); + + let default = ("‘", "’", "“", "”"); + let low_high = ("‚", "‘", "„", "“"); + + let (single_open, single_close, double_open, double_close) = match lang.as_str() { + "de" if matches!(region, Some("CH" | "LI")) => match alternative { + false => ("‹", "›", "«", "»"), + true => low_high, + }, + "fr" if matches!(region, Some("CH")) => match alternative { + false => ("‹\u{202F}", "\u{202F}›", "«\u{202F}", "\u{202F}»"), + true => default, + }, + "cs" | "da" | "de" | "sk" | "sl" if alternative => ("›", "‹", "»", "«"), + "cs" | "de" | "et" | "is" | "lt" | "lv" | "sk" | "sl" => low_high, + "da" => ("‘", "’", "“", "”"), + "fr" | "ru" if alternative => default, + "fr" => ("‹\u{00A0}", "\u{00A0}›", "«\u{00A0}", "\u{00A0}»"), + "fi" | "sv" if alternative => ("’", "’", "»", "»"), + "bs" | "fi" | "sv" => ("’", "’", "”", "”"), + "it" if alternative => default, + "la" if alternative => ("“", "”", "«\u{202F}", "\u{202F}»"), + "it" | "la" => ("“", "”", "«", "»"), + "es" if matches!(region, Some("ES") | None) => ("“", "”", "«", "»"), + "hu" | "pl" | "ro" => ("’", "’", "„", "”"), + "no" | "nb" | "nn" if alternative => low_high, + "ru" | "no" | "nb" | "nn" | "ua" => ("’", "’", "«", "»"), + "gr" => ("‘", "’", "«", "»"), + "he" => ("’", "’", "”", "”"), + _ if lang.dir() == Dir::RTL => ("’", "‘", "”", "“"), + _ => default, + }; + + fn inner_or_default<'s>( + quotes: Smart<&'s SmartQuoteDict>, + f: impl FnOnce(&'s SmartQuoteDict) -> Smart<&'s SmartQuoteSet>, + default: [&'s str; 2], + ) -> [&'s str; 2] { + match quotes.and_then(f) { + Smart::Auto => default, + Smart::Custom(SmartQuoteSet { open, close }) => { + [open, close].map(|s| s.as_str()) + } + } + } + + let quotes = quotes.as_ref(); + let [single_open, single_close] = + inner_or_default(quotes, |q| q.single.as_ref(), [single_open, single_close]); + let [double_open, double_close] = + inner_or_default(quotes, |q| q.double.as_ref(), [double_open, double_close]); + + Self { + single_open, + single_close, + double_open, + double_close, + } + } + + /// The opening quote. + pub fn open(&self, double: bool) -> &'s str { + if double { + self.double_open + } else { + self.single_open + } + } + + /// The closing quote. + pub fn close(&self, double: bool) -> &'s str { + if double { + self.double_close + } else { + self.single_close + } + } +} + +/// An opening and closing quote. +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct SmartQuoteSet { + open: EcoString, + close: EcoString, +} + +cast! { + SmartQuoteSet, + self => array![self.open, self.close].into_value(), + value: Array => { + let [open, close] = array_to_set(value)?; + Self { open, close } + }, + value: Str => { + let [open, close] = str_to_set(value.as_str())?; + Self { open, close } + }, +} + +fn str_to_set(value: &str) -> StrResult<[EcoString; 2]> { + let mut iter = value.graphemes(true); + match (iter.next(), iter.next(), iter.next()) { + (Some(open), Some(close), None) => Ok([open.into(), close.into()]), + _ => { + let count = value.graphemes(true).count(); + bail!( + "expected 2 characters, found {count} character{}", + if count > 1 { "s" } else { "" } + ); + } + } +} + +fn array_to_set(value: Array) -> HintedStrResult<[EcoString; 2]> { + let value = value.as_slice(); + if value.len() != 2 { + bail!( + "expected 2 quotes, found {} quote{}", + value.len(), + if value.len() > 1 { "s" } else { "" } + ); + } + + let open: EcoString = value[0].clone().cast()?; + let close: EcoString = value[1].clone().cast()?; + + Ok([open, close]) +} + +/// A dict of single and double quotes. +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct SmartQuoteDict { + double: Smart<SmartQuoteSet>, + single: Smart<SmartQuoteSet>, +} + +cast! { + SmartQuoteDict, + self => dict! { "double" => self.double, "single" => self.single }.into_value(), + mut value: Dict => { + let keys = ["double", "single"]; + + let double = value + .take("double") + .ok() + .map(FromValue::from_value) + .transpose()? + .unwrap_or(Smart::Auto); + let single = value + .take("single") + .ok() + .map(FromValue::from_value) + .transpose()? + .unwrap_or(Smart::Auto); + + value.finish(&keys)?; + + Self { single, double } + }, + value: SmartQuoteSet => Self { + double: Smart::Custom(value), + single: Smart::Auto, + }, +} diff --git a/crates/typst-library/src/text/space.rs b/crates/typst-library/src/text/space.rs new file mode 100644 index 00000000..38a55482 --- /dev/null +++ b/crates/typst-library/src/text/space.rs @@ -0,0 +1,31 @@ +use ecow::EcoString; +use typst_utils::singleton; + +use crate::foundations::{ + elem, Content, NativeElement, Packed, PlainText, Repr, Unlabellable, +}; + +/// A text space. +#[elem(Unlabellable, PlainText, Repr)] +pub struct SpaceElem {} + +impl SpaceElem { + /// Get the globally shared space element. + pub fn shared() -> &'static Content { + singleton!(Content, SpaceElem::new().pack()) + } +} + +impl Repr for SpaceElem { + fn repr(&self) -> EcoString { + "[ ]".into() + } +} + +impl Unlabellable for Packed<SpaceElem> {} + +impl PlainText for Packed<SpaceElem> { + fn plain_text(&self, text: &mut EcoString) { + text.push(' '); + } +} |
