diff options
| author | Laurenz <laurmaedje@gmail.com> | 2023-07-02 19:59:52 +0200 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2023-07-02 20:07:43 +0200 |
| commit | ebfdb1dafa430786db10dad2ef7d5467c1bdbed1 (patch) | |
| tree | 2bbc24ddb4124c4bb14dec0e536129d4de37b056 /crates/typst-library/src/text | |
| parent | 3ab19185093d7709f824b95b979060ce125389d8 (diff) | |
Move everything into `crates/` directory
Diffstat (limited to 'crates/typst-library/src/text')
| -rw-r--r-- | crates/typst-library/src/text/deco.rs | 420 | ||||
| -rw-r--r-- | crates/typst-library/src/text/misc.rs | 330 | ||||
| -rw-r--r-- | crates/typst-library/src/text/mod.rs | 769 | ||||
| -rw-r--r-- | crates/typst-library/src/text/quotes.rs | 209 | ||||
| -rw-r--r-- | crates/typst-library/src/text/raw.rs | 398 | ||||
| -rw-r--r-- | crates/typst-library/src/text/shaping.rs | 973 | ||||
| -rw-r--r-- | crates/typst-library/src/text/shift.rs | 229 |
7 files changed, 3328 insertions, 0 deletions
diff --git a/crates/typst-library/src/text/deco.rs b/crates/typst-library/src/text/deco.rs new file mode 100644 index 00000000..9ec4ca32 --- /dev/null +++ b/crates/typst-library/src/text/deco.rs @@ -0,0 +1,420 @@ +use kurbo::{BezPath, Line, ParamCurve}; +use ttf_parser::{GlyphId, OutlineBuilder}; + +use super::TextElem; +use crate::prelude::*; + +/// Underlines text. +/// +/// ## Example { #example } +/// ```example +/// This is #underline[important]. +/// ``` +/// +/// Display: Underline +/// Category: text +#[element(Show)] +pub struct UnderlineElem { + /// How to stroke the line. + /// + /// See the [line's documentation]($func/line.stroke) for more details. If + /// set to `{auto}`, takes on the text's color and a thickness defined in + /// the current font. + /// + /// ```example + /// Take #underline( + /// stroke: 1.5pt + red, + /// offset: 2pt, + /// [care], + /// ) + /// ``` + #[resolve] + #[fold] + pub stroke: Smart<PartialStroke>, + + /// The position of the line relative to the baseline, read from the font + /// tables if `{auto}`. + /// + /// ```example + /// #underline(offset: 5pt)[ + /// The Tale Of A Faraway Line I + /// ] + /// ``` + #[resolve] + pub offset: Smart<Length>, + + /// The amount by which to extend the line beyond (or within if negative) + /// the content. + /// + /// ```example + /// #align(center, + /// underline(extent: 2pt)[Chapter 1] + /// ) + /// ``` + #[resolve] + pub extent: Length, + + /// Whether the line skips sections in which it would collide with the + /// glyphs. + /// + /// ```example + /// This #underline(evade: true)[is great]. + /// This #underline(evade: false)[is less great]. + /// ``` + #[default(true)] + pub evade: bool, + + /// The content to underline. + #[required] + pub body: Content, +} + +impl Show for UnderlineElem { + #[tracing::instrument(name = "UnderlineElem::show", skip_all)] + fn show(&self, _: &mut Vt, styles: StyleChain) -> SourceResult<Content> { + Ok(self.body().styled(TextElem::set_deco(Decoration { + line: DecoLine::Underline, + stroke: self.stroke(styles).unwrap_or_default(), + offset: self.offset(styles), + extent: self.extent(styles), + evade: self.evade(styles), + }))) + } +} + +/// Adds a line over text. +/// +/// ## Example { #example } +/// ```example +/// #overline[A line over text.] +/// ``` +/// +/// Display: Overline +/// Category: text +#[element(Show)] +pub struct OverlineElem { + /// How to stroke the line. + /// + /// See the [line's documentation]($func/line.stroke) for more details. If + /// set to `{auto}`, takes on the text's color and a thickness defined in + /// the current font. + /// + /// ```example + /// #set text(fill: olive) + /// #overline( + /// stroke: green.darken(20%), + /// offset: -12pt, + /// [The Forest Theme], + /// ) + /// ``` + #[resolve] + #[fold] + pub stroke: Smart<PartialStroke>, + + /// The position of the line relative to the baseline. Read from the font + /// tables if `{auto}`. + /// + /// ```example + /// #overline(offset: -1.2em)[ + /// The Tale Of A Faraway Line II + /// ] + /// ``` + #[resolve] + pub offset: Smart<Length>, + + /// The amount by which to extend the line beyond (or within if negative) + /// the content. + /// + /// ```example + /// #set overline(extent: 4pt) + /// #set underline(extent: 4pt) + /// #overline(underline[Typography Today]) + /// ``` + #[resolve] + pub extent: Length, + + /// Whether the line skips sections in which it would collide with the + /// glyphs. + /// + /// ```example + /// #overline( + /// evade: false, + /// offset: -7.5pt, + /// stroke: 1pt, + /// extent: 3pt, + /// [Temple], + /// ) + /// ``` + #[default(true)] + pub evade: bool, + + /// The content to add a line over. + #[required] + pub body: Content, +} + +impl Show for OverlineElem { + #[tracing::instrument(name = "OverlineElem::show", skip_all)] + fn show(&self, _: &mut Vt, styles: StyleChain) -> SourceResult<Content> { + Ok(self.body().styled(TextElem::set_deco(Decoration { + line: DecoLine::Overline, + stroke: self.stroke(styles).unwrap_or_default(), + offset: self.offset(styles), + extent: self.extent(styles), + evade: self.evade(styles), + }))) + } +} + +/// Strikes through text. +/// +/// ## Example { #example } +/// ```example +/// This is #strike[not] relevant. +/// ``` +/// +/// Display: Strikethrough +/// Category: text +#[element(Show)] +pub struct StrikeElem { + /// How to stroke the line. + /// + /// See the [line's documentation]($func/line.stroke) for more details. If + /// set to `{auto}`, takes on the text's color and a thickness defined in + /// the current font. + /// + /// _Note:_ Please don't use this for real redaction as you can still + /// copy paste the text. + /// + /// ```example + /// This is #strike(stroke: 1.5pt + red)[very stricken through]. \ + /// This is #strike(stroke: 10pt)[redacted]. + /// ``` + #[resolve] + #[fold] + pub stroke: Smart<PartialStroke>, + + /// The position of the line relative to the baseline. Read from the font + /// tables if `{auto}`. + /// + /// This is useful if you are unhappy with the offset your font provides. + /// + /// ```example + /// #set text(font: "Inria Serif") + /// This is #strike(offset: auto)[low-ish]. \ + /// This is #strike(offset: -3.5pt)[on-top]. + /// ``` + #[resolve] + pub offset: Smart<Length>, + + /// The amount by which to extend the line beyond (or within if negative) + /// the content. + /// + /// ```example + /// This #strike(extent: -2pt)[skips] parts of the word. + /// This #strike(extent: 2pt)[extends] beyond the word. + /// ``` + #[resolve] + pub extent: Length, + + /// The content to strike through. + #[required] + pub body: Content, +} + +impl Show for StrikeElem { + #[tracing::instrument(name = "StrikeElem::show", skip_all)] + fn show(&self, _: &mut Vt, styles: StyleChain) -> SourceResult<Content> { + Ok(self.body().styled(TextElem::set_deco(Decoration { + line: DecoLine::Strikethrough, + stroke: self.stroke(styles).unwrap_or_default(), + offset: self.offset(styles), + extent: self.extent(styles), + evade: false, + }))) + } +} + +/// Defines a line that is positioned over, under or on top of text. +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct Decoration { + pub line: DecoLine, + pub stroke: PartialStroke<Abs>, + pub offset: Smart<Abs>, + pub extent: Abs, + pub evade: bool, +} + +impl Fold for Decoration { + type Output = Vec<Self>; + + fn fold(self, mut outer: Self::Output) -> Self::Output { + outer.insert(0, self); + outer + } +} + +cast! { + type Decoration: "decoration", +} + +/// A kind of decorative line. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub enum DecoLine { + Underline, + Strikethrough, + Overline, +} + +/// Add line decorations to a single run of shaped text. +pub(super) fn decorate( + frame: &mut Frame, + deco: &Decoration, + text: &TextItem, + shift: Abs, + pos: Point, + width: Abs, +) { + let font_metrics = text.font.metrics(); + let metrics = match deco.line { + DecoLine::Strikethrough => font_metrics.strikethrough, + DecoLine::Overline => font_metrics.overline, + DecoLine::Underline => font_metrics.underline, + }; + + let offset = deco.offset.unwrap_or(-metrics.position.at(text.size)) - shift; + let stroke = deco.stroke.clone().unwrap_or(Stroke { + paint: text.fill.clone(), + thickness: metrics.thickness.at(text.size), + ..Stroke::default() + }); + + let gap_padding = 0.08 * text.size; + let min_width = 0.162 * text.size; + + let start = pos.x - deco.extent; + let end = pos.x + (width + 2.0 * deco.extent); + + let mut push_segment = |from: Abs, to: Abs| { + let origin = Point::new(from, pos.y + offset); + let target = Point::new(to - from, Abs::zero()); + + if target.x >= min_width || !deco.evade { + let shape = Geometry::Line(target).stroked(stroke.clone()); + frame.push(origin, FrameItem::Shape(shape, Span::detached())); + } + }; + + if !deco.evade { + push_segment(start, end); + return; + } + + let line = Line::new( + kurbo::Point::new(pos.x.to_raw(), offset.to_raw()), + kurbo::Point::new((pos.x + width).to_raw(), offset.to_raw()), + ); + + let mut x = pos.x; + let mut intersections = vec![]; + + for glyph in text.glyphs.iter() { + let dx = glyph.x_offset.at(text.size) + x; + let mut builder = + BezPathBuilder::new(font_metrics.units_per_em, text.size, dx.to_raw()); + + let bbox = text.font.ttf().outline_glyph(GlyphId(glyph.id), &mut builder); + let path = builder.finish(); + + x += glyph.x_advance.at(text.size); + + // Only do the costly segments intersection test if the line + // intersects the bounding box. + let intersect = bbox.map_or(false, |bbox| { + let y_min = -text.font.to_em(bbox.y_max).at(text.size); + let y_max = -text.font.to_em(bbox.y_min).at(text.size); + offset >= y_min && offset <= y_max + }); + + if intersect { + // Find all intersections of segments with the line. + intersections.extend( + path.segments() + .flat_map(|seg| seg.intersect_line(line)) + .map(|is| Abs::raw(line.eval(is.line_t).x)), + ); + } + } + + // Add start and end points, taking padding into account. + intersections.push(start - gap_padding); + intersections.push(end + gap_padding); + // When emitting the decorative line segments, we move from left to + // right. The intersections are not necessarily in this order, yet. + intersections.sort(); + + for edge in intersections.windows(2) { + let l = edge[0]; + let r = edge[1]; + + // If we are too close, don't draw the segment + if r - l < gap_padding { + continue; + } else { + push_segment(l + gap_padding, r - gap_padding); + } + } +} + +/// Builds a kurbo [`BezPath`] for a glyph. +struct BezPathBuilder { + path: BezPath, + units_per_em: f64, + font_size: Abs, + x_offset: f64, +} + +impl BezPathBuilder { + fn new(units_per_em: f64, font_size: Abs, x_offset: f64) -> Self { + Self { + path: BezPath::new(), + units_per_em, + font_size, + x_offset, + } + } + + fn finish(self) -> BezPath { + self.path + } + + fn p(&self, x: f32, y: f32) -> kurbo::Point { + kurbo::Point::new(self.s(x) + self.x_offset, -self.s(y)) + } + + fn s(&self, v: f32) -> f64 { + Em::from_units(v, self.units_per_em).at(self.font_size).to_raw() + } +} + +impl OutlineBuilder for BezPathBuilder { + fn move_to(&mut self, x: f32, y: f32) { + self.path.move_to(self.p(x, y)); + } + + fn line_to(&mut self, x: f32, y: f32) { + self.path.line_to(self.p(x, y)); + } + + fn quad_to(&mut self, x1: f32, y1: f32, x: f32, y: f32) { + self.path.quad_to(self.p(x1, y1), self.p(x, y)); + } + + fn curve_to(&mut self, x1: f32, y1: f32, x2: f32, y2: f32, x: f32, y: f32) { + self.path.curve_to(self.p(x1, y1), self.p(x2, y2), self.p(x, y)); + } + + fn close(&mut self) { + self.path.close_path(); + } +} diff --git a/crates/typst-library/src/text/misc.rs b/crates/typst-library/src/text/misc.rs new file mode 100644 index 00000000..811b027e --- /dev/null +++ b/crates/typst-library/src/text/misc.rs @@ -0,0 +1,330 @@ +use super::TextElem; +use crate::prelude::*; + +/// A text space. +/// +/// Display: Space +/// Category: text +#[element(Behave, Unlabellable, PlainText)] +pub struct SpaceElem {} + +impl Behave for SpaceElem { + fn behaviour(&self) -> Behaviour { + Behaviour::Weak(2) + } +} + +impl Unlabellable for SpaceElem {} + +impl PlainText for SpaceElem { + fn plain_text(&self, text: &mut EcoString) { + text.push(' '); + } +} + +/// Inserts a line break. +/// +/// Advances the paragraph to the next line. A single trailing line break at the +/// end of a paragraph is ignored, but more than one creates additional empty +/// lines. +/// +/// ## Example { #example } +/// ```example +/// *Date:* 26.12.2022 \ +/// *Topic:* Infrastructure Test \ +/// *Severity:* High \ +/// ``` +/// +/// ## Syntax { #syntax } +/// This function also has dedicated syntax: To insert a line break, simply write +/// a backslash followed by whitespace. This always creates an unjustified +/// break. +/// +/// Display: Line Break +/// Category: text +#[element(Behave)] +pub struct LinebreakElem { + /// Whether to justify the line before the break. + /// + /// This is useful if you found a better line break opportunity in your + /// justified text than Typst did. + /// + /// ```example + /// #set par(justify: true) + /// #let jb = linebreak(justify: true) + /// + /// I have manually tuned the #jb + /// line breaks in this paragraph #jb + /// for an _interesting_ result. #jb + /// ``` + #[default(false)] + pub justify: bool, +} + +impl Behave for LinebreakElem { + fn behaviour(&self) -> Behaviour { + Behaviour::Destructive + } +} + +/// Strongly emphasizes content by increasing the font weight. +/// +/// Increases the current font weight by a given `delta`. +/// +/// ## Example { #example } +/// ```example +/// This is *strong.* \ +/// This is #strong[too.] \ +/// +/// #show strong: set text(red) +/// And this is *evermore.* +/// ``` +/// +/// ## Syntax { #syntax } +/// This function also has dedicated syntax: To strongly emphasize content, +/// simply enclose it in stars/asterisks (`*`). Note that this only works at +/// word boundaries. To strongly emphasize part of a word, you have to use the +/// function. +/// +/// Display: Strong Emphasis +/// Category: text +#[element(Show)] +pub struct StrongElem { + /// The delta to apply on the font weight. + /// + /// ```example + /// #set strong(delta: 0) + /// No *effect!* + /// ``` + #[default(300)] + pub delta: i64, + + /// The content to strongly emphasize. + #[required] + pub body: Content, +} + +impl Show for StrongElem { + #[tracing::instrument(name = "StrongElem::show", skip_all)] + fn show(&self, _: &mut Vt, styles: StyleChain) -> SourceResult<Content> { + Ok(self.body().styled(TextElem::set_delta(Delta(self.delta(styles))))) + } +} + +/// A delta that is summed up when folded. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub struct Delta(pub i64); + +cast! { + Delta, + self => self.0.into_value(), + v: i64 => Self(v), +} + +impl Fold for Delta { + type Output = i64; + + fn fold(self, outer: Self::Output) -> Self::Output { + outer + self.0 + } +} + +/// Emphasizes content by setting it in italics. +/// +/// - If the current [text style]($func/text.style) is `{"normal"}`, +/// this turns it into `{"italic"}`. +/// - If it is already `{"italic"}` or `{"oblique"}`, +/// it turns it back to `{"normal"}`. +/// +/// ## Example { #example } +/// ```example +/// This is _emphasized._ \ +/// This is #emph[too.] +/// +/// #show emph: it => { +/// text(blue, it.body) +/// } +/// +/// This is _emphasized_ differently. +/// ``` +/// +/// ## Syntax { #syntax } +/// This function also has dedicated syntax: To emphasize content, simply +/// enclose it in underscores (`_`). Note that this only works at word +/// boundaries. To emphasize part of a word, you have to use the function. +/// +/// Display: Emphasis +/// Category: text +#[element(Show)] +pub struct EmphElem { + /// The content to emphasize. + #[required] + pub body: Content, +} + +impl Show for EmphElem { + #[tracing::instrument(name = "EmphElem::show", skip(self))] + fn show(&self, _: &mut Vt, _: StyleChain) -> SourceResult<Content> { + Ok(self.body().styled(TextElem::set_emph(Toggle))) + } +} + +/// A toggle that turns on and off alternatingly if folded. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub struct Toggle; + +cast! { + Toggle, + self => Value::None, + _: Value => Self, +} + +impl Fold for Toggle { + type Output = bool; + + fn fold(self, outer: Self::Output) -> Self::Output { + !outer + } +} + +/// Converts text or content to lowercase. +/// +/// ## Example { #example } +/// ```example +/// #lower("ABC") \ +/// #lower[*My Text*] \ +/// #lower[already low] +/// ``` +/// +/// Display: Lowercase +/// Category: text +#[func] +pub fn lower( + /// The text to convert to lowercase. + text: Caseable, +) -> Caseable { + case(text, Case::Lower) +} + +/// Converts text or content to uppercase. +/// +/// ## Example { #example } +/// ```example +/// #upper("abc") \ +/// #upper[*my text*] \ +/// #upper[ALREADY HIGH] +/// ``` +/// +/// Display: Uppercase +/// Category: text +#[func] +pub fn upper( + /// The text to convert to uppercase. + text: Caseable, +) -> Caseable { + case(text, Case::Upper) +} + +/// Change the case of text. +fn case(text: Caseable, case: Case) -> Caseable { + match text { + Caseable::Str(v) => Caseable::Str(case.apply(&v).into()), + Caseable::Content(v) => { + Caseable::Content(v.styled(TextElem::set_case(Some(case)))) + } + } +} + +/// A value whose case can be changed. +pub enum Caseable { + Str(Str), + Content(Content), +} + +cast! { + Caseable, + self => match self { + Self::Str(v) => v.into_value(), + Self::Content(v) => v.into_value(), + }, + v: Str => Self::Str(v), + v: Content => Self::Content(v), +} + +/// A case transformation on text. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)] +pub enum Case { + /// Everything is lowercased. + Lower, + /// Everything is uppercased. + Upper, +} + +impl Case { + /// Apply the case to a string. + pub fn apply(self, text: &str) -> String { + match self { + Self::Lower => text.to_lowercase(), + Self::Upper => text.to_uppercase(), + } + } +} + +/// Displays text in small capitals. +/// +/// _Note:_ This enables the OpenType `smcp` feature for the font. Not all fonts +/// support this feature. Sometimes smallcaps are part of a dedicated font and +/// sometimes they are not available at all. In the future, this function will +/// support selecting a dedicated smallcaps font as well as synthesizing +/// smallcaps from normal letters, but this is not yet implemented. +/// +/// ## Example { #example } +/// ```example +/// #set par(justify: true) +/// #set heading(numbering: "I.") +/// +/// #show heading: it => { +/// set block(below: 10pt) +/// set text(weight: "regular") +/// align(center, smallcaps(it)) +/// } +/// +/// = Introduction +/// #lorem(40) +/// ``` +/// +/// Display: Small Capitals +/// Category: text +#[func] +pub fn smallcaps( + /// The text to display to small capitals. + body: Content, +) -> Content { + body.styled(TextElem::set_smallcaps(true)) +} + +/// Creates blind text. +/// +/// This function yields a Latin-like _Lorem Ipsum_ blind text with the given +/// number of words. The sequence of words generated by the function is always +/// the same but randomly chosen. As usual for blind texts, it does not make any +/// sense. Use it as a placeholder to try layouts. +/// +/// ## Example { #example } +/// ```example +/// = Blind Text +/// #lorem(30) +/// +/// = More Blind Text +/// #lorem(15) +/// ``` +/// +/// Display: Blind Text +/// Category: text +#[func] +pub fn lorem( + /// The length of the blind text in words. + words: usize, +) -> Str { + lipsum::lipsum(words).replace("--", "–").into() +} diff --git a/crates/typst-library/src/text/mod.rs b/crates/typst-library/src/text/mod.rs new file mode 100644 index 00000000..ff8cbad8 --- /dev/null +++ b/crates/typst-library/src/text/mod.rs @@ -0,0 +1,769 @@ +//! Text handling. + +mod deco; +mod misc; +mod quotes; +mod raw; +mod shaping; +mod shift; + +pub use self::deco::*; +pub use self::misc::*; +pub use self::quotes::*; +pub use self::raw::*; +pub use self::shaping::*; +pub use self::shift::*; + +use rustybuzz::Tag; +use typst::font::{FontMetrics, FontStretch, FontStyle, FontWeight, VerticalFontMetric}; + +use crate::layout::ParElem; +use crate::prelude::*; + +/// Hook up all text definitions. +pub(super) fn define(global: &mut Scope) { + global.define("text", TextElem::func()); + global.define("linebreak", LinebreakElem::func()); + global.define("smartquote", SmartQuoteElem::func()); + global.define("strong", StrongElem::func()); + global.define("emph", EmphElem::func()); + global.define("lower", lower_func()); + global.define("upper", upper_func()); + global.define("smallcaps", smallcaps_func()); + global.define("sub", SubElem::func()); + global.define("super", SuperElem::func()); + global.define("underline", UnderlineElem::func()); + global.define("strike", StrikeElem::func()); + global.define("overline", OverlineElem::func()); + global.define("raw", RawElem::func()); + global.define("lorem", lorem_func()); +} + +/// Customizes the look and layout of text in a variety of ways. +/// +/// This function is used frequently, both with set rules and directly. While +/// the set rule is often the simpler choice, calling the `text` function +/// directly can be useful when passing text as an argument to another function. +/// +/// ## Example { #example } +/// ```example +/// #set text(18pt) +/// With a set rule. +/// +/// #emph(text(blue)[ +/// With a function call. +/// ]) +/// ``` +/// +/// Display: Text +/// Category: text +#[element(Construct, PlainText)] +pub struct TextElem { + /// A prioritized sequence of font families. + /// + /// When processing text, Typst tries all specified font families in order + /// until it finds a font that has the necessary glyphs. In the example + /// below, the font `Inria Serif` is preferred, but since it does not + /// contain Arabic glyphs, the arabic text uses `Noto Sans Arabic` instead. + /// + /// ```example + /// #set text(font: ( + /// "Inria Serif", + /// "Noto Sans Arabic", + /// )) + /// + /// This is Latin. \ + /// هذا عربي. + /// + /// ``` + #[default(FontList(vec![FontFamily::new("Linux Libertine")]))] + pub font: FontList, + + /// Whether to allow last resort font fallback when the primary font list + /// contains no match. This lets Typst search through all available fonts + /// for the most similar one that has the necessary glyphs. + /// + /// _Note:_ Currently, there are no warnings when fallback is disabled and + /// no glyphs are found. Instead, your text shows up in the form of "tofus": + /// Small boxes that indicate the lack of an appropriate glyph. In the + /// future, you will be able to instruct Typst to issue warnings so you know + /// something is up. + /// + /// ```example + /// #set text(font: "Inria Serif") + /// هذا عربي + /// + /// #set text(fallback: false) + /// هذا عربي + /// ``` + #[default(true)] + pub fallback: bool, + + /// The desired font style. + /// + /// When an italic style is requested and only an oblique one is available, + /// it is used. Similarly, the other way around, an italic style can stand + /// in for an oblique one. When neither an italic nor an oblique style is + /// available, Typst selects the normal style. Since most fonts are only + /// available either in an italic or oblique style, the difference between + /// italic and oblique style is rarely observable. + /// + /// If you want to emphasize your text, you should do so using the + /// [emph]($func/emph) function instead. This makes it easy to adapt the + /// style later if you change your mind about how to signify the emphasis. + /// + /// ```example + /// #text(font: "Linux Libertine", style: "italic")[Italic] + /// #text(font: "DejaVu Sans", style: "oblique")[Oblique] + /// ``` + pub style: FontStyle, + + /// The desired thickness of the font's glyphs. Accepts an integer between + /// `{100}` and `{900}` or one of the predefined weight names. When the + /// desired weight is not available, Typst selects the font from the family + /// that is closest in weight. + /// + /// If you want to strongly emphasize your text, you should do so using the + /// [strong]($func/strong) function instead. This makes it easy to adapt the + /// style later if you change your mind about how to signify the strong + /// emphasis. + /// + /// ```example + /// #set text(font: "IBM Plex Sans") + /// + /// #text(weight: "light")[Light] \ + /// #text(weight: "regular")[Regular] \ + /// #text(weight: "medium")[Medium] \ + /// #text(weight: 500)[Medium] \ + /// #text(weight: "bold")[Bold] + /// ``` + pub weight: FontWeight, + + /// The desired width of the glyphs. Accepts a ratio between `{50%}` and + /// `{200%}`. When the desired weight is not available, Typst selects the + /// font from the family that is closest in stretch. This will only stretch + /// the text if a condensed or expanded version of the font is available. + /// + /// If you want to adjust the amount of space between characters instead of + /// stretching the glyphs itself, use the [`tracking`]($func/text.tracking) + /// property instead. + /// + /// ```example + /// #text(stretch: 75%)[Condensed] \ + /// #text(stretch: 100%)[Normal] + /// ``` + pub stretch: FontStretch, + + /// The size of the glyphs. This value forms the basis of the `em` unit: + /// `{1em}` is equivalent to the font size. + /// + /// You can also give the font size itself in `em` units. Then, it is + /// relative to the previous font size. + /// + /// ```example + /// #set text(size: 20pt) + /// very #text(1.5em)[big] text + /// ``` + #[parse(args.named_or_find("size")?)] + #[fold] + #[default(Abs::pt(11.0))] + pub size: TextSize, + + /// The glyph fill color. + /// + /// ```example + /// #set text(fill: red) + /// This text is red. + /// ``` + #[parse(args.named_or_find("fill")?)] + #[default(Color::BLACK.into())] + pub fill: Paint, + + /// The amount of space that should be added between characters. + /// + /// ```example + /// #set text(tracking: 1.5pt) + /// Distant text. + /// ``` + #[resolve] + pub tracking: Length, + + /// The amount of space between words. + /// + /// Can be given as an absolute length, but also relative to the width of + /// the space character in the font. + /// + /// If you want to adjust the amount of space between characters rather than + /// words, use the [`tracking`]($func/text.tracking) property instead. + /// + /// ```example + /// #set text(spacing: 200%) + /// Text with distant words. + /// ``` + #[resolve] + #[default(Rel::one())] + pub spacing: Rel<Length>, + + /// An amount to shift the text baseline by. + /// + /// ```example + /// A #text(baseline: 3pt)[lowered] + /// word. + /// ``` + #[resolve] + pub baseline: Length, + + /// Whether certain glyphs can hang over into the margin in justified text. + /// This can make justification visually more pleasing. + /// + /// ```example + /// #set par(justify: true) + /// This justified text has a hyphen in + /// the paragraph's first line. Hanging + /// the hyphen slightly into the margin + /// results in a clearer paragraph edge. + /// + /// #set text(overhang: false) + /// This justified text has a hyphen in + /// the paragraph's first line. Hanging + /// the hyphen slightly into the margin + /// results in a clearer paragraph edge. + /// ``` + #[default(true)] + pub overhang: bool, + + /// The top end of the conceptual frame around the text used for layout and + /// positioning. This affects the size of containers that hold text. + /// + /// ```example + /// #set rect(inset: 0pt) + /// #set text(size: 20pt) + /// + /// #set text(top-edge: "ascender") + /// #rect(fill: aqua)[Typst] + /// + /// #set text(top-edge: "cap-height") + /// #rect(fill: aqua)[Typst] + /// ``` + #[default(TextEdge::Metric(VerticalFontMetric::CapHeight))] + pub top_edge: TextEdge, + + /// The bottom end of the conceptual frame around the text used for layout + /// and positioning. This affects the size of containers that hold text. + /// + /// ```example + /// #set rect(inset: 0pt) + /// #set text(size: 20pt) + /// + /// #set text(bottom-edge: "baseline") + /// #rect(fill: aqua)[Typst] + /// + /// #set text(bottom-edge: "descender") + /// #rect(fill: aqua)[Typst] + /// ``` + #[default(TextEdge::Metric(VerticalFontMetric::Baseline))] + pub bottom_edge: TextEdge, + + /// An [ISO 639-1/2/3 language code.](https://en.wikipedia.org/wiki/ISO_639) + /// + /// Setting the correct language affects various parts of Typst: + /// + /// - The text processing pipeline can make more informed choices. + /// - Hyphenation will use the correct patterns for the language. + /// - [Smart quotes]($func/smartquote) turns into the correct quotes for the + /// language. + /// - And all other things which are language-aware. + /// + /// ```example + /// #set text(lang: "de") + /// #outline() + /// + /// = Einleitung + /// In diesem Dokument, ... + /// ``` + #[default(Lang::ENGLISH)] + pub lang: Lang, + + /// An [ISO 3166-1 alpha-2 region code.](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) + /// + /// This lets the text processing pipeline make more informed choices. + pub region: Option<Region>, + + /// The dominant direction for text and inline objects. Possible values are: + /// + /// - `{auto}`: Automatically infer the direction from the `lang` property. + /// - `{ltr}`: Layout text from left to right. + /// - `{rtl}`: Layout text from right to left. + /// + /// When writing in right-to-left scripts like Arabic or Hebrew, you should + /// set the [text language]($func/text.lang) or direction. While individual + /// runs of text are automatically layouted in the correct direction, + /// setting the dominant direction gives the bidirectional reordering + /// algorithm the necessary information to correctly place punctuation and + /// inline objects. Furthermore, setting the direction affects the alignment + /// values `start` and `end`, which are equivalent to `left` and `right` in + /// `ltr` text and the other way around in `rtl` text. + /// + /// If you set this to `rtl` and experience bugs or in some way bad looking + /// output, please do get in touch with us through the + /// [contact form](https://typst.app/contact) or our + /// [Discord server]($community/#discord)! + /// + /// ```example + /// #set text(dir: rtl) + /// هذا عربي. + /// ``` + #[resolve] + pub dir: TextDir, + + /// Whether to hyphenate text to improve line breaking. When `{auto}`, text + /// will be hyphenated if and only if justification is enabled. + /// + /// Setting the [text language]($func/text.lang) ensures that the correct + /// hyphenation patterns are used. + /// + /// ```example + /// #set page(width: 200pt) + /// + /// #set par(justify: true) + /// This text illustrates how + /// enabling hyphenation can + /// improve justification. + /// + /// #set text(hyphenate: false) + /// This text illustrates how + /// enabling hyphenation can + /// improve justification. + /// ``` + #[resolve] + pub hyphenate: Hyphenate, + + /// Whether to apply kerning. + /// + /// When enabled, specific letter pairings move closer together or further + /// apart for a more visually pleasing result. The example below + /// demonstrates how decreasing the gap between the "T" and "o" results in a + /// more natural look. Setting this to `{false}` disables kerning by turning + /// off the OpenType `kern` font feature. + /// + /// ```example + /// #set text(size: 25pt) + /// Totally + /// + /// #set text(kerning: false) + /// Totally + /// ``` + #[default(true)] + pub kerning: bool, + + /// Whether to apply stylistic alternates. + /// + /// Sometimes fonts contain alternative glyphs for the same codepoint. + /// Setting this to `{true}` switches to these by enabling the OpenType + /// `salt` font feature. + /// + /// ```example + /// #set text( + /// font: "IBM Plex Sans", + /// size: 20pt, + /// ) + /// + /// 0, a, g, ß + /// + /// #set text(alternates: true) + /// 0, a, g, ß + /// ``` + #[default(false)] + pub alternates: bool, + + /// Which stylistic set to apply. Font designers can categorize alternative + /// glyphs forms into stylistic sets. As this value is highly font-specific, + /// you need to consult your font to know which sets are available. When set + /// to an integer between `{1}` and `{20}`, enables the corresponding + /// OpenType font feature from `ss01`, ..., `ss20`. + pub stylistic_set: Option<StylisticSet>, + + /// Whether standard ligatures are active. + /// + /// Certain letter combinations like "fi" are often displayed as a single + /// merged glyph called a _ligature._ Setting this to `{false}` disables + /// these ligatures by turning off the OpenType `liga` and `clig` font + /// features. + /// + /// ```example + /// #set text(size: 20pt) + /// A fine ligature. + /// + /// #set text(ligatures: false) + /// A fine ligature. + /// ``` + #[default(true)] + pub ligatures: bool, + + /// Whether ligatures that should be used sparingly are active. Setting this + /// to `{true}` enables the OpenType `dlig` font feature. + #[default(false)] + pub discretionary_ligatures: bool, + + /// Whether historical ligatures are active. Setting this to `{true}` + /// enables the OpenType `hlig` font feature. + #[default(false)] + pub historical_ligatures: bool, + + /// Which kind of numbers / figures to select. When set to `{auto}`, the + /// default numbers for the font are used. + /// + /// ```example + /// #set text(font: "Noto Sans", 20pt) + /// #set text(number-type: "lining") + /// Number 9. + /// + /// #set text(number-type: "old-style") + /// Number 9. + /// ``` + pub number_type: Smart<NumberType>, + + /// The width of numbers / figures. When set to `{auto}`, the default + /// numbers for the font are used. + /// + /// ```example + /// #set text(font: "Noto Sans", 20pt) + /// #set text(number-width: "proportional") + /// A 12 B 34. \ + /// A 56 B 78. + /// + /// #set text(number-width: "tabular") + /// A 12 B 34. \ + /// A 56 B 78. + /// ``` + pub number_width: Smart<NumberWidth>, + + /// Whether to have a slash through the zero glyph. Setting this to `{true}` + /// enables the OpenType `zero` font feature. + /// + /// ```example + /// 0, #text(slashed-zero: true)[0] + /// ``` + #[default(false)] + pub slashed_zero: bool, + + /// Whether to turn numbers into fractions. Setting this to `{true}` + /// enables the OpenType `frac` font feature. + /// + /// It is not advisable to enable this property globally as it will mess + /// with all appearances of numbers after a slash (e.g., in URLs). Instead, + /// enable it locally when you want a fraction. + /// + /// ```example + /// 1/2 \ + /// #text(fractions: true)[1/2] + /// ``` + #[default(false)] + pub fractions: bool, + + /// Raw OpenType features to apply. + /// + /// - If given an array of strings, sets the features identified by the + /// strings to `{1}`. + /// - If given a dictionary mapping to numbers, sets the features + /// identified by the keys to the values. + /// + /// ```example + /// // Enable the `frac` feature manually. + /// #set text(features: ("frac",)) + /// 1/2 + /// ``` + #[fold] + pub features: FontFeatures, + + /// Content in which all text is styled according to the other arguments. + #[external] + #[required] + pub body: Content, + + /// The text. + #[internal] + #[required] + pub text: EcoString, + + /// A delta to apply on the font weight. + #[internal] + #[fold] + pub delta: Delta, + + /// Whether the font style should be inverted. + #[internal] + #[fold] + #[default(false)] + pub emph: Toggle, + + /// Decorative lines. + #[internal] + #[fold] + pub deco: Decoration, + + /// A case transformation that should be applied to the text. + #[internal] + pub case: Option<Case>, + + /// Whether small capital glyphs should be used. ("smcp") + #[internal] + #[default(false)] + pub smallcaps: bool, +} + +impl TextElem { + /// Create a new packed text element. + pub fn packed(text: impl Into<EcoString>) -> Content { + Self::new(text.into()).pack() + } +} + +impl Construct for TextElem { + fn construct(_: &mut Vm, args: &mut Args) -> SourceResult<Content> { + // The text constructor is special: It doesn't create a text element. + // Instead, it leaves the passed argument structurally unchanged, but + // styles all text in it. + let styles = Self::set(args)?; + let body = args.expect::<Content>("body")?; + Ok(body.styled_with_map(styles)) + } +} + +impl PlainText for TextElem { + fn plain_text(&self, text: &mut EcoString) { + text.push_str(&self.text()); + } +} + +/// A lowercased font family like "arial". +#[derive(Clone, Eq, PartialEq, Hash)] +pub struct FontFamily(EcoString); + +impl FontFamily { + /// Create a named font family variant. + pub fn new(string: &str) -> Self { + Self(string.to_lowercase().into()) + } + + /// The lowercased family name. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl Debug for FontFamily { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + self.0.fmt(f) + } +} + +cast! { + FontFamily, + self => self.0.into_value(), + string: EcoString => Self::new(&string), +} + +/// Font family fallback list. +#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)] +pub struct FontList(pub Vec<FontFamily>); + +impl IntoIterator for FontList { + type IntoIter = std::vec::IntoIter<FontFamily>; + type Item = FontFamily; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +cast! { + FontList, + self => if self.0.len() == 1 { + self.0.into_iter().next().unwrap().0.into_value() + } else { + self.0.into_value() + }, + family: FontFamily => Self(vec![family]), + values: Array => Self(values.into_iter().map(|v| v.cast()).collect::<StrResult<_>>()?), +} + +/// The size of text. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub struct TextSize(pub Length); + +impl Fold for TextSize { + type Output = Abs; + + fn fold(self, outer: Self::Output) -> Self::Output { + self.0.em.at(outer) + self.0.abs + } +} + +cast! { + TextSize, + self => self.0.into_value(), + v: Length => Self(v), +} + +/// Specifies the bottom or top edge of text. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub enum TextEdge { + /// An edge specified using one of the well-known font metrics. + Metric(VerticalFontMetric), + /// An edge specified as a length. + Length(Length), +} + +impl TextEdge { + /// Resolve the value of the text edge given a font's metrics. + pub fn resolve(self, styles: StyleChain, metrics: &FontMetrics) -> Abs { + match self { + Self::Metric(metric) => metrics.vertical(metric).resolve(styles), + Self::Length(length) => length.resolve(styles), + } + } +} + +cast! { + TextEdge, + self => match self { + Self::Metric(metric) => metric.into_value(), + Self::Length(length) => length.into_value(), + }, + v: VerticalFontMetric => Self::Metric(v), + v: Length => Self::Length(v), +} + +/// The direction of text and inline objects in their line. +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash)] +pub struct TextDir(pub Smart<Dir>); + +cast! { + TextDir, + self => self.0.into_value(), + v: Smart<Dir> => { + if v.map_or(false, |dir| dir.axis() == Axis::Y) { + bail!("text direction must be horizontal"); + } + Self(v) + }, +} + +impl Resolve for TextDir { + type Output = Dir; + + fn resolve(self, styles: StyleChain) -> Self::Output { + match self.0 { + Smart::Auto => TextElem::lang_in(styles).dir(), + Smart::Custom(dir) => dir, + } + } +} + +/// Whether to hyphenate text. +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash)] +pub struct Hyphenate(pub Smart<bool>); + +cast! { + Hyphenate, + self => self.0.into_value(), + v: Smart<bool> => Self(v), +} + +impl Resolve for Hyphenate { + type Output = bool; + + fn resolve(self, styles: StyleChain) -> Self::Output { + match self.0 { + Smart::Auto => ParElem::justify_in(styles), + Smart::Custom(v) => v, + } + } +} + +/// A stylistic set in a font. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub struct StylisticSet(u8); + +impl StylisticSet { + /// Create a new set, clamping to 1-20. + pub fn new(index: u8) -> Self { + Self(index.clamp(1, 20)) + } + + /// Get the value, guaranteed to be 1-20. + pub fn get(self) -> u8 { + self.0 + } +} + +cast! { + StylisticSet, + self => self.0.into_value(), + v: i64 => match v { + 1 ..= 20 => Self::new(v as u8), + _ => bail!("stylistic set must be between 1 and 20"), + }, +} + +/// Which kind of numbers / figures to select. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)] +pub enum NumberType { + /// Numbers that fit well with capital text (the OpenType `lnum` + /// font feature). + Lining, + /// Numbers that fit well into a flow of upper- and lowercase text (the + /// OpenType `onum` font feature). + OldStyle, +} + +/// The width of numbers / figures. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)] +pub enum NumberWidth { + /// Numbers with glyph-specific widths (the OpenType `pnum` font feature). + Proportional, + /// Numbers of equal width (the OpenType `tnum` font feature). + Tabular, +} + +/// OpenType font features settings. +#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)] +pub struct FontFeatures(pub Vec<(Tag, u32)>); + +cast! { + FontFeatures, + self => self.0 + .into_iter() + .map(|(tag, num)| { + let bytes = tag.to_bytes(); + let key = std::str::from_utf8(&bytes).unwrap_or_default(); + (key.into(), num.into_value()) + }) + .collect::<Dict>() + .into_value(), + values: Array => Self(values + .into_iter() + .map(|v| { + let tag = v.cast::<EcoString>()?; + Ok((Tag::from_bytes_lossy(tag.as_bytes()), 1)) + }) + .collect::<StrResult<_>>()?), + values: Dict => Self(values + .into_iter() + .map(|(k, v)| { + let num = v.cast::<u32>()?; + let tag = Tag::from_bytes_lossy(k.as_bytes()); + Ok((tag, num)) + }) + .collect::<StrResult<_>>()?), +} + +impl Fold for FontFeatures { + type Output = Self; + + fn fold(mut self, outer: Self::Output) -> Self::Output { + self.0.extend(outer.0); + self + } +} diff --git a/crates/typst-library/src/text/quotes.rs b/crates/typst-library/src/text/quotes.rs new file mode 100644 index 00000000..d0798064 --- /dev/null +++ b/crates/typst-library/src/text/quotes.rs @@ -0,0 +1,209 @@ +use typst::syntax::is_newline; + +use crate::prelude::*; + +/// A language-aware quote that reacts to its context. +/// +/// Automatically turns into an appropriate opening or closing quote based on +/// the active [text language]($func/text.lang). +/// +/// ## Example { #example } +/// ```example +/// "This is in quotes." +/// +/// #set text(lang: "de") +/// "Das ist in Anführungszeichen." +/// +/// #set text(lang: "fr") +/// "C'est entre guillemets." +/// ``` +/// +/// ## Syntax { #syntax } +/// This function also has dedicated syntax: The normal quote characters +/// (`'` and `"`). Typst automatically makes your quotes smart. +/// +/// Display: Smart Quote +/// Category: text +#[element] +pub struct SmartQuoteElem { + /// Whether this should be a double quote. + #[default(true)] + pub double: bool, + + /// Whether smart quotes are enabled. + /// + /// To disable smartness for a single quote, you can also escape it with a + /// backslash. + /// + /// ```example + /// #set smartquote(enabled: false) + /// + /// These are "dumb" quotes. + /// ``` + #[default(true)] + pub enabled: bool, +} + +/// State machine for smart quote substitution. +#[derive(Debug, Clone)] +pub struct Quoter { + /// How many quotes have been opened. + quote_depth: usize, + /// Whether an opening quote might follow. + expect_opening: bool, + /// Whether the last character was numeric. + last_num: bool, +} + +impl Quoter { + /// Start quoting. + pub fn new() -> Self { + Self { + quote_depth: 0, + expect_opening: true, + last_num: false, + } + } + + /// Process the last seen character. + pub fn last(&mut self, c: char) { + self.expect_opening = is_ignorable(c) || is_opening_bracket(c); + self.last_num = c.is_numeric(); + } + + /// Process and substitute a quote. + pub fn quote<'a>( + &mut self, + quotes: &Quotes<'a>, + double: bool, + peeked: Option<char>, + ) -> &'a str { + let peeked = peeked.unwrap_or(' '); + if self.expect_opening { + self.quote_depth += 1; + quotes.open(double) + } else if self.quote_depth > 0 + && (peeked.is_ascii_punctuation() || is_ignorable(peeked)) + { + self.quote_depth -= 1; + quotes.close(double) + } else if self.last_num { + quotes.prime(double) + } else { + quotes.fallback(double) + } + } +} + +impl Default for Quoter { + fn default() -> Self { + Self::new() + } +} + +fn is_ignorable(c: char) -> bool { + c.is_whitespace() || is_newline(c) +} + +fn is_opening_bracket(c: char) -> bool { + matches!(c, '(' | '{' | '[') +} + +/// Decides which quotes to substitute smart quotes with. +pub struct Quotes<'s> { + /// The opening single quote. + pub single_open: &'s str, + /// The closing single quote. + pub single_close: &'s str, + /// The opening double quote. + pub double_open: &'s str, + /// The closing double quote. + pub double_close: &'s str, +} + +impl<'s> Quotes<'s> { + /// Create a new `Quotes` struct with the defaults for a language and + /// region. + /// + /// The language should be specified as an all-lowercase ISO 639-1 code, the + /// region as an all-uppercase ISO 3166-alpha2 code. + /// + /// Currently, the supported languages are: English, Czech, Danish, German, + /// Swiss / Liechtensteinian German, Estonian, Icelandic, Lithuanian, + /// Latvian, Slovak, Slovenian, Spanish, Bosnian, Finnish, Swedish, French, + /// Hungarian, Polish, Romanian, Japanese, Traditional Chinese, Russian, and + /// Norwegian. + /// + /// For unknown languages, the English quotes are used. + pub fn from_lang(lang: Lang, region: Option<Region>) -> Self { + let region = region.as_ref().map(Region::as_str); + let (single_open, single_close, double_open, double_close) = match lang.as_str() { + "de" if matches!(region, Some("CH" | "LI")) => ("‹", "›", "«", "»"), + "cs" | "da" | "de" | "et" | "is" | "lt" | "lv" | "sk" | "sl" => { + ("‚", "‘", "„", "“") + } + "fr" => ("‹\u{00A0}", "\u{00A0}›", "«\u{00A0}", "\u{00A0}»"), + "bs" | "fi" | "sv" => ("’", "’", "”", "”"), + "es" if matches!(region, Some("ES") | None) => ("“", "”", "«", "»"), + "hu" | "pl" | "ro" => ("’", "’", "„", "”"), + "ru" | "no" | "nb" | "nn" | "ua" => ("’", "’", "«", "»"), + _ if lang.dir() == Dir::RTL => ("’", "‘", "”", "“"), + _ => return Self::default(), + }; + + Self { + single_open, + single_close, + double_open, + double_close, + } + } + + /// The opening quote. + fn open(&self, double: bool) -> &'s str { + if double { + self.double_open + } else { + self.single_open + } + } + + /// The closing quote. + fn close(&self, double: bool) -> &'s str { + if double { + self.double_close + } else { + self.single_close + } + } + + /// Which character should be used as a prime. + fn prime(&self, double: bool) -> &'static str { + if double { + "″" + } else { + "′" + } + } + + /// Which character should be used as a fallback quote. + fn fallback(&self, double: bool) -> &'static str { + if double { + "\"" + } else { + "’" + } + } +} + +impl Default for Quotes<'_> { + /// Returns the english quotes as default. + fn default() -> Self { + Self { + single_open: "‘", + single_close: "’", + double_open: "“", + double_close: "”", + } + } +} diff --git a/crates/typst-library/src/text/raw.rs b/crates/typst-library/src/text/raw.rs new file mode 100644 index 00000000..24f39bb6 --- /dev/null +++ b/crates/typst-library/src/text/raw.rs @@ -0,0 +1,398 @@ +use once_cell::sync::Lazy; +use syntect::highlighting as synt; +use typst::syntax::{self, LinkedNode}; + +use super::{ + FontFamily, FontList, Hyphenate, LinebreakElem, SmartQuoteElem, TextElem, TextSize, +}; +use crate::layout::BlockElem; +use crate::meta::{Figurable, LocalName}; +use crate::prelude::*; + +/// Raw text with optional syntax highlighting. +/// +/// Displays the text verbatim and in a monospace font. This is typically used +/// to embed computer code into your document. +/// +/// ## Example { #example } +/// ````example +/// Adding `rbx` to `rcx` gives +/// the desired result. +/// +/// ```rust +/// fn main() { +/// println!("Hello World!"); +/// } +/// ``` +/// ```` +/// +/// ## Syntax { #syntax } +/// This function also has dedicated syntax. You can enclose text in 1 or 3+ +/// backticks (`` ` ``) to make it raw. Two backticks produce empty raw text. +/// When you use three or more backticks, you can additionally specify a +/// language tag for syntax highlighting directly after the opening backticks. +/// Within raw blocks, everything is rendered as is, in particular, there are no +/// escape sequences. +/// +/// Display: Raw Text / Code +/// Category: text +#[element(Synthesize, Show, Finalize, LocalName, Figurable, PlainText)] +pub struct RawElem { + /// The raw text. + /// + /// You can also use raw blocks creatively to create custom syntaxes for + /// your automations. + /// + /// ````example + /// // Parse numbers in raw blocks with the + /// // `mydsl` tag and sum them up. + /// #show raw.where(lang: "mydsl"): it => { + /// let sum = 0 + /// for part in it.text.split("+") { + /// sum += int(part.trim()) + /// } + /// sum + /// } + /// + /// ```mydsl + /// 1 + 2 + 3 + 4 + 5 + /// ``` + /// ```` + #[required] + pub text: EcoString, + + /// Whether the raw text is displayed as a separate block. + /// + /// In markup mode, using one-backtick notation makes this `{false}`, + /// whereas using three-backtick notation makes it `{true}`. + /// + /// ````example + /// // Display inline code in a small box + /// // that retains the correct baseline. + /// #show raw.where(block: false): box.with( + /// fill: luma(240), + /// inset: (x: 3pt, y: 0pt), + /// outset: (y: 3pt), + /// radius: 2pt, + /// ) + /// + /// // Display block code in a larger block + /// // with more padding. + /// #show raw.where(block: true): block.with( + /// fill: luma(240), + /// inset: 10pt, + /// radius: 4pt, + /// ) + /// + /// With `rg`, you can search through your files quickly. + /// + /// ```bash + /// rg "Hello World" + /// ``` + /// ```` + #[default(false)] + pub block: bool, + + /// The language to syntax-highlight in. + /// + /// Apart from typical language tags known from Markdown, this supports the + /// `{"typ"}` and `{"typc"}` tags for Typst markup and Typst code, + /// respectively. + /// + /// ````example + /// ```typ + /// This is *Typst!* + /// ``` + /// ```` + pub lang: Option<EcoString>, + + /// The horizontal alignment that each line in a raw block should have. + /// This option is ignored if this is not a raw block (if specified + /// `block: false` or single backticks were used in markup mode). + /// + /// By default, this is set to `{start}`, meaning that raw text is + /// aligned towards the start of the text direction inside the block + /// by default, regardless of the current context's alignment (allowing + /// you to center the raw block itself without centering the text inside + /// it, for example). + /// + /// ````example + /// #set raw(align: center) + /// + /// ```typc + /// let f(x) = x + /// code = "centered" + /// ``` + /// ```` + #[default(HorizontalAlign(GenAlign::Start))] + pub align: HorizontalAlign, +} + +impl RawElem { + /// The supported language names and tags. + pub fn languages() -> Vec<(&'static str, Vec<&'static str>)> { + SYNTAXES + .syntaxes() + .iter() + .map(|syntax| { + ( + syntax.name.as_str(), + syntax.file_extensions.iter().map(|s| s.as_str()).collect(), + ) + }) + .chain([("Typst", vec!["typ"]), ("Typst (code)", vec!["typc"])]) + .collect() + } +} + +impl Synthesize for RawElem { + fn synthesize(&mut self, _vt: &mut Vt, styles: StyleChain) -> SourceResult<()> { + self.push_lang(self.lang(styles)); + Ok(()) + } +} + +impl Show for RawElem { + #[tracing::instrument(name = "RawElem::show", skip_all)] + fn show(&self, _: &mut Vt, styles: StyleChain) -> SourceResult<Content> { + let text = self.text(); + let lang = self.lang(styles).as_ref().map(|s| s.to_lowercase()); + let foreground = THEME + .settings + .foreground + .map(to_typst) + .map_or(Color::BLACK, Color::from); + + let mut realized = if matches!(lang.as_deref(), Some("typ" | "typst" | "typc")) { + let root = match lang.as_deref() { + Some("typc") => syntax::parse_code(&text), + _ => syntax::parse(&text), + }; + + let mut seq = vec![]; + let highlighter = synt::Highlighter::new(&THEME); + highlight_themed( + &LinkedNode::new(&root), + vec![], + &highlighter, + &mut |node, style| { + seq.push(styled(&text[node.range()], foreground.into(), style)); + }, + ); + + Content::sequence(seq) + } else if let Some(syntax) = + lang.and_then(|token| SYNTAXES.find_syntax_by_token(&token)) + { + let mut seq = vec![]; + let mut highlighter = syntect::easy::HighlightLines::new(syntax, &THEME); + for (i, line) in text.lines().enumerate() { + if i != 0 { + seq.push(LinebreakElem::new().pack()); + } + + for (style, piece) in + highlighter.highlight_line(line, &SYNTAXES).into_iter().flatten() + { + seq.push(styled(piece, foreground.into(), style)); + } + } + + Content::sequence(seq) + } else { + TextElem::packed(text) + }; + + if self.block(styles) { + // Align the text before inserting it into the block. + realized = realized.aligned(Axes::with_x(Some(self.align(styles).into()))); + realized = BlockElem::new().with_body(Some(realized)).pack(); + } + + Ok(realized) + } +} + +impl Finalize for RawElem { + fn finalize(&self, realized: Content, _: StyleChain) -> Content { + let mut styles = Styles::new(); + styles.set(TextElem::set_overhang(false)); + styles.set(TextElem::set_hyphenate(Hyphenate(Smart::Custom(false)))); + styles.set(TextElem::set_size(TextSize(Em::new(0.8).into()))); + styles + .set(TextElem::set_font(FontList(vec![FontFamily::new("DejaVu Sans Mono")]))); + styles.set(SmartQuoteElem::set_enabled(false)); + realized.styled_with_map(styles) + } +} + +impl LocalName for RawElem { + fn local_name(&self, lang: Lang, _: Option<Region>) -> &'static str { + match lang { + Lang::ALBANIAN => "List", + Lang::ARABIC => "قائمة", + Lang::BOKMÅL => "Utskrift", + Lang::CHINESE => "代码", + Lang::CZECH => "Seznam", + Lang::DANISH => "Liste", + Lang::DUTCH => "Listing", + Lang::FILIPINO => "Listahan", + Lang::FRENCH => "Liste", + Lang::GERMAN => "Listing", + Lang::ITALIAN => "Codice", + Lang::NYNORSK => "Utskrift", + Lang::POLISH => "Program", + Lang::RUSSIAN => "Листинг", + Lang::SLOVENIAN => "Program", + Lang::SWEDISH => "Listing", + Lang::TURKISH => "Liste", + Lang::UKRAINIAN => "Лістинг", + Lang::VIETNAMESE => "Chương trình", // TODO: This may be wrong. + Lang::ENGLISH | _ => "Listing", + } + } +} + +impl Figurable for RawElem {} + +impl PlainText for RawElem { + fn plain_text(&self, text: &mut EcoString) { + text.push_str(&self.text()); + } +} + +/// Highlight a syntax node in a theme by calling `f` with ranges and their +/// styles. +fn highlight_themed<F>( + node: &LinkedNode, + scopes: Vec<syntect::parsing::Scope>, + highlighter: &synt::Highlighter, + f: &mut F, +) where + F: FnMut(&LinkedNode, synt::Style), +{ + if node.children().len() == 0 { + let style = highlighter.style_for_stack(&scopes); + f(node, style); + return; + } + + for child in node.children() { + let mut scopes = scopes.clone(); + if let Some(tag) = typst::ide::highlight(&child) { + scopes.push(syntect::parsing::Scope::new(tag.tm_scope()).unwrap()) + } + highlight_themed(&child, scopes, highlighter, f); + } +} + +/// Style a piece of text with a syntect style. +fn styled(piece: &str, foreground: Paint, style: synt::Style) -> Content { + let mut body = TextElem::packed(piece); + + let paint = to_typst(style.foreground).into(); + if paint != foreground { + body = body.styled(TextElem::set_fill(paint)); + } + + if style.font_style.contains(synt::FontStyle::BOLD) { + body = body.strong(); + } + + if style.font_style.contains(synt::FontStyle::ITALIC) { + body = body.emph(); + } + + if style.font_style.contains(synt::FontStyle::UNDERLINE) { + body = body.underlined(); + } + + body +} + +fn to_typst(synt::Color { r, g, b, a }: synt::Color) -> RgbaColor { + RgbaColor { r, g, b, a } +} + +fn to_syn(RgbaColor { r, g, b, a }: RgbaColor) -> synt::Color { + synt::Color { r, g, b, a } +} + +/// The syntect syntax definitions. +/// +/// Code for syntax set generation is below. The `syntaxes` directory is from +/// <https://github.com/sharkdp/bat/tree/master/assets/syntaxes> +/// +/// ```ignore +/// fn main() { +/// let mut builder = syntect::parsing::SyntaxSet::load_defaults_nonewlines().into_builder(); +/// builder.add_from_folder("syntaxes/02_Extra", false).unwrap(); +/// syntect::dumps::dump_to_file(&builder.build(), "syntect.bin").unwrap(); +/// } +/// ``` +/// +/// The following syntaxes are disabled due to compatibility issues: +/// ```text +/// syntaxes/02_Extra/Assembly (ARM).sublime-syntax +/// syntaxes/02_Extra/Elixir/Regular Expressions (Elixir).sublime-syntax +/// syntaxes/02_Extra/JavaScript (Babel).sublime-syntax +/// syntaxes/02_Extra/LiveScript.sublime-syntax +/// syntaxes/02_Extra/PowerShell.sublime-syntax +/// syntaxes/02_Extra/SCSS_Sass/Syntaxes/Sass.sublime-syntax +/// syntaxes/02_Extra/SLS/SLS.sublime-syntax +/// syntaxes/02_Extra/VimHelp.sublime-syntax +/// syntaxes/02_Extra/cmd-help/syntaxes/cmd-help.sublime-syntax +/// ``` +pub static SYNTAXES: Lazy<syntect::parsing::SyntaxSet> = + Lazy::new(|| syntect::dumps::from_binary(include_bytes!("../../assets/syntect.bin"))); + +/// The default theme used for syntax highlighting. +pub static THEME: Lazy<synt::Theme> = Lazy::new(|| synt::Theme { + name: Some("Typst Light".into()), + author: Some("The Typst Project Developers".into()), + settings: synt::ThemeSettings::default(), + scopes: vec![ + item("comment", Some("#8a8a8a"), None), + item("constant.character.escape", Some("#1d6c76"), None), + item("markup.bold", None, Some(synt::FontStyle::BOLD)), + item("markup.italic", None, Some(synt::FontStyle::ITALIC)), + item("markup.underline", None, Some(synt::FontStyle::UNDERLINE)), + item("markup.raw", Some("#818181"), None), + item("string.other.math.typst", None, None), + item("punctuation.definition.math", Some("#298e0d"), None), + item("keyword.operator.math", Some("#1d6c76"), None), + item("markup.heading, entity.name.section", None, Some(synt::FontStyle::BOLD)), + item( + "markup.heading.typst", + None, + Some(synt::FontStyle::BOLD | synt::FontStyle::UNDERLINE), + ), + item("punctuation.definition.list", Some("#8b41b1"), None), + item("markup.list.term", None, Some(synt::FontStyle::BOLD)), + item("entity.name.label, markup.other.reference", Some("#1d6c76"), None), + item("keyword, constant.language, variable.language", Some("#d73a49"), None), + item("storage.type, storage.modifier", Some("#d73a49"), None), + item("constant", Some("#b60157"), None), + item("string", Some("#298e0d"), None), + item("entity.name, variable.function, support", Some("#4b69c6"), None), + item("support.macro", Some("#16718d"), None), + item("meta.annotation", Some("#301414"), None), + item("entity.other, meta.interpolation", Some("#8b41b1"), None), + ], +}); + +/// Create a syntect theme item. +fn item( + scope: &str, + color: Option<&str>, + font_style: Option<synt::FontStyle>, +) -> synt::ThemeItem { + synt::ThemeItem { + scope: scope.parse().unwrap(), + style: synt::StyleModifier { + foreground: color.map(|s| to_syn(s.parse::<RgbaColor>().unwrap())), + background: None, + font_style, + }, + } +} diff --git a/crates/typst-library/src/text/shaping.rs b/crates/typst-library/src/text/shaping.rs new file mode 100644 index 00000000..ec8812fe --- /dev/null +++ b/crates/typst-library/src/text/shaping.rs @@ -0,0 +1,973 @@ +use std::borrow::Cow; +use std::ops::Range; +use std::str::FromStr; + +use az::SaturatingAs; +use rustybuzz::{Feature, Tag, UnicodeBuffer}; +use typst::font::{Font, FontStyle, FontVariant}; +use typst::util::SliceExt; +use unicode_script::{Script, UnicodeScript}; + +use super::{decorate, FontFamily, NumberType, NumberWidth, TextElem}; +use crate::layout::SpanMapper; +use crate::prelude::*; + +/// The result of shaping text. +/// +/// This type contains owned or borrowed shaped text runs, which can be +/// measured, used to reshape substrings more quickly and converted into a +/// frame. +pub struct ShapedText<'a> { + /// The start of the text in the full paragraph. + pub base: usize, + /// The text that was shaped. + pub text: &'a str, + /// The text direction. + pub dir: Dir, + /// The text language. + pub lang: Lang, + /// The text region. + pub region: Option<Region>, + /// The text's style properties. + pub styles: StyleChain<'a>, + /// The font variant. + pub variant: FontVariant, + /// The font size. + pub size: Abs, + /// The width of the text's bounding box. + pub width: Abs, + /// The shaped glyphs. + pub glyphs: Cow<'a, [ShapedGlyph]>, +} + +/// A single glyph resulting from shaping. +#[derive(Debug, Clone)] +pub struct ShapedGlyph { + /// The font the glyph is contained in. + pub font: Font, + /// The glyph's index in the font. + pub glyph_id: u16, + /// The advance width of the glyph. + pub x_advance: Em, + /// The horizontal offset of the glyph. + pub x_offset: Em, + /// The vertical offset of the glyph. + pub y_offset: Em, + /// The adjustability of the glyph. + pub adjustability: Adjustability, + /// The byte range of this glyph's cluster in the full paragraph. A cluster + /// is a sequence of one or multiple glyphs that cannot be separated and + /// must always be treated as a union. + /// + /// The range values of the glyphs in a [`ShapedText`] should not + /// overlap with each other, and they should be monotonically + /// increasing (for left-to-right or top-to-bottom text) or + /// monotonically decreasing (for right-to-left or bottom-to-top + /// text). + pub range: Range<usize>, + /// Whether splitting the shaping result before this glyph would yield the + /// same results as shaping the parts to both sides of `text_index` + /// separately. + pub safe_to_break: bool, + /// The first char in this glyph's cluster. + pub c: char, + /// The source code location of the glyph and its byte offset within it. + pub span: (Span, u16), +} + +#[derive(Debug, Clone, Default)] +pub struct Adjustability { + /// The left and right strechability + pub stretchability: (Em, Em), + /// The left and right shrinkability + pub shrinkability: (Em, Em), +} + +impl ShapedGlyph { + /// Whether the glyph is a space. + pub fn is_space(&self) -> bool { + matches!(self.c, ' ' | '\u{00A0}' | ' ') + } + + /// Whether the glyph is justifiable. + pub fn is_justifiable(&self) -> bool { + // GB style is not relevant here. + self.is_space() + || self.is_cjk_script() + || self.is_cjk_left_aligned_punctuation(true) + || self.is_cjk_right_aligned_punctuation() + || self.is_cjk_center_aligned_punctuation(true) + } + + pub fn is_cjk_script(&self) -> bool { + use Script::*; + // U+30FC: Katakana-Hiragana Prolonged Sound Mark + matches!(self.c.script(), Hiragana | Katakana | Han) || self.c == '\u{30FC}' + } + + pub fn is_cjk_punctuation(&self) -> bool { + self.is_cjk_left_aligned_punctuation(true) + || self.is_cjk_right_aligned_punctuation() + || self.is_cjk_center_aligned_punctuation(true) + } + + /// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment> + pub fn is_cjk_left_aligned_punctuation(&self, gb_style: bool) -> bool { + // CJK quotation marks shares codepoints with latin quotation marks. + // But only the CJK ones have full width. + if matches!(self.c, '”' | '’') + && self.x_advance + self.stretchability().1 == Em::one() + { + return true; + } + + if gb_style && matches!(self.c, ',' | '。' | '、' | ':' | ';') { + return true; + } + + matches!(self.c, '》' | ')' | '』' | '」') + } + + /// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment> + pub fn is_cjk_right_aligned_punctuation(&self) -> bool { + // CJK quotation marks shares codepoints with latin quotation marks. + // But only the CJK ones have full width. + if matches!(self.c, '“' | '‘') + && self.x_advance + self.stretchability().0 == Em::one() + { + return true; + } + + matches!(self.c, '《' | '(' | '『' | '「') + } + + /// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment> + pub fn is_cjk_center_aligned_punctuation(&self, gb_style: bool) -> bool { + if !gb_style && matches!(self.c, ',' | '。' | '、' | ':' | ';') { + return true; + } + + // U+30FB: Katakana Middle Dot + matches!(self.c, '\u{30FB}') + } + + pub fn base_adjustability(&self, gb_style: bool) -> Adjustability { + let width = self.x_advance; + if self.is_space() { + Adjustability { + // The number for spaces is from Knuth-Plass' paper + stretchability: (Em::zero(), width / 2.0), + shrinkability: (Em::zero(), width / 3.0), + } + } else if self.is_cjk_left_aligned_punctuation(gb_style) { + Adjustability { + stretchability: (Em::zero(), Em::zero()), + shrinkability: (Em::zero(), width / 2.0), + } + } else if self.is_cjk_right_aligned_punctuation() { + Adjustability { + stretchability: (Em::zero(), Em::zero()), + shrinkability: (width / 2.0, Em::zero()), + } + } else if self.is_cjk_center_aligned_punctuation(gb_style) { + Adjustability { + stretchability: (Em::zero(), Em::zero()), + shrinkability: (width / 4.0, width / 4.0), + } + } else { + Adjustability::default() + } + } + + /// The stretchability of the character. + pub fn stretchability(&self) -> (Em, Em) { + self.adjustability.stretchability + } + + /// The shrinkability of the character. + pub fn shrinkability(&self) -> (Em, Em) { + self.adjustability.shrinkability + } + + /// Shrink the width of glyph on the left side. + pub fn shrink_left(&mut self, amount: Em) { + self.x_offset -= amount; + self.x_advance -= amount; + self.adjustability.shrinkability.0 -= amount; + self.adjustability.stretchability.0 += amount; + } + + /// Shrink the width of glyph on the right side. + pub fn shrink_right(&mut self, amount: Em) { + self.x_advance -= amount; + self.adjustability.shrinkability.1 -= amount; + self.adjustability.stretchability.1 += amount; + } +} + +/// A side you can go toward. +enum Side { + /// To the left-hand side. + Left, + /// To the right-hand side. + Right, +} + +impl<'a> ShapedText<'a> { + /// Build the shaped text's frame. + /// + /// The `justification` defines how much extra advance width each + /// [justifiable glyph](ShapedGlyph::is_justifiable) will get. + pub fn build( + &self, + vt: &Vt, + justification_ratio: f64, + extra_justification: Abs, + ) -> Frame { + let (top, bottom) = self.measure(vt); + let size = Size::new(self.width, top + bottom); + + let mut offset = Abs::zero(); + let mut frame = Frame::new(size); + frame.set_baseline(top); + + let shift = TextElem::baseline_in(self.styles); + let lang = TextElem::lang_in(self.styles); + let decos = TextElem::deco_in(self.styles); + let fill = TextElem::fill_in(self.styles); + + for ((font, y_offset), group) in + self.glyphs.as_ref().group_by_key(|g| (g.font.clone(), g.y_offset)) + { + let mut range = group[0].range.clone(); + for glyph in group { + range.start = range.start.min(glyph.range.start); + range.end = range.end.max(glyph.range.end); + } + + let pos = Point::new(offset, top + shift - y_offset.at(self.size)); + let glyphs = group + .iter() + .map(|glyph| { + let adjustability_left = if justification_ratio < 0.0 { + glyph.shrinkability().0 + } else { + glyph.stretchability().0 + }; + let adjustability_right = if justification_ratio < 0.0 { + glyph.shrinkability().1 + } else { + glyph.stretchability().1 + }; + + let justification_left = adjustability_left * justification_ratio; + let mut justification_right = + adjustability_right * justification_ratio; + if glyph.is_justifiable() { + justification_right += + Em::from_length(extra_justification, self.size) + } + + frame.size_mut().x += justification_left.at(self.size) + + justification_right.at(self.size); + + Glyph { + id: glyph.glyph_id, + x_advance: glyph.x_advance + + justification_left + + justification_right, + x_offset: glyph.x_offset + justification_left, + range: (glyph.range.start - range.start).saturating_as() + ..(glyph.range.end - range.start).saturating_as(), + span: glyph.span, + } + }) + .collect(); + + let item = TextItem { + font, + size: self.size, + lang, + fill: fill.clone(), + text: self.text[range.start - self.base..range.end - self.base].into(), + glyphs, + }; + + let layer = frame.layer(); + let width = item.width(); + + // Apply line decorations. + for deco in &decos { + decorate(&mut frame, deco, &item, shift, pos, width); + } + + frame.insert(layer, pos, FrameItem::Text(item)); + offset += width; + } + + // Apply metadata. + frame.meta(self.styles, false); + + frame + } + + /// Measure the top and bottom extent of this text. + fn measure(&self, vt: &Vt) -> (Abs, Abs) { + let mut top = Abs::zero(); + let mut bottom = Abs::zero(); + + let top_edge = TextElem::top_edge_in(self.styles); + let bottom_edge = TextElem::bottom_edge_in(self.styles); + + // Expand top and bottom by reading the font's vertical metrics. + let mut expand = |font: &Font| { + let metrics = font.metrics(); + top.set_max(top_edge.resolve(self.styles, metrics)); + bottom.set_max(-bottom_edge.resolve(self.styles, metrics)); + }; + + if self.glyphs.is_empty() { + // When there are no glyphs, we just use the vertical metrics of the + // first available font. + let world = vt.world; + for family in families(self.styles) { + if let Some(font) = world + .book() + .select(family.as_str(), self.variant) + .and_then(|id| world.font(id)) + { + expand(&font); + break; + } + } + } else { + for g in self.glyphs.iter() { + expand(&g.font); + } + } + + (top, bottom) + } + + /// How many glyphs are in the text where we can insert additional + /// space when encountering underfull lines. + pub fn justifiables(&self) -> usize { + self.glyphs.iter().filter(|g| g.is_justifiable()).count() + } + + /// Whether the last glyph is a CJK character which should not be justified + /// on line end. + pub fn cjk_justifiable_at_last(&self) -> bool { + self.glyphs + .last() + .map(|g| g.is_cjk_script() || g.is_cjk_punctuation()) + .unwrap_or(false) + } + + /// The stretchability of the text. + pub fn stretchability(&self) -> Abs { + self.glyphs + .iter() + .map(|g| g.stretchability().0 + g.stretchability().1) + .sum::<Em>() + .at(self.size) + } + + /// The shrinkability of the text + pub fn shrinkability(&self) -> Abs { + self.glyphs + .iter() + .map(|g| g.shrinkability().0 + g.shrinkability().1) + .sum::<Em>() + .at(self.size) + } + + /// Reshape a range of the shaped text, reusing information from this + /// shaping process if possible. + /// + /// The text `range` is relative to the whole paragraph. + pub fn reshape( + &'a self, + vt: &Vt, + spans: &SpanMapper, + text_range: Range<usize>, + ) -> ShapedText<'a> { + let text = &self.text[text_range.start - self.base..text_range.end - self.base]; + if let Some(glyphs) = self.slice_safe_to_break(text_range.clone()) { + #[cfg(debug_assertions)] + assert_all_glyphs_in_range(glyphs, text, text_range.clone()); + Self { + base: text_range.start, + text, + dir: self.dir, + lang: self.lang, + region: self.region, + styles: self.styles, + size: self.size, + variant: self.variant, + width: glyphs.iter().map(|g| g.x_advance).sum::<Em>().at(self.size), + glyphs: Cow::Borrowed(glyphs), + } + } else { + shape( + vt, + text_range.start, + text, + spans, + self.styles, + self.dir, + self.lang, + self.region, + ) + } + } + + /// Push a hyphen to end of the text. + pub fn push_hyphen(&mut self, vt: &Vt) { + families(self.styles).find_map(|family| { + let world = vt.world; + let font = world + .book() + .select(family.as_str(), self.variant) + .and_then(|id| world.font(id))?; + let ttf = font.ttf(); + let glyph_id = ttf.glyph_index('-')?; + let x_advance = font.to_em(ttf.glyph_hor_advance(glyph_id)?); + let range = self + .glyphs + .last() + .map(|g| g.range.end..g.range.end) + .unwrap_or_default(); + self.width += x_advance.at(self.size); + self.glyphs.to_mut().push(ShapedGlyph { + font, + glyph_id: glyph_id.0, + x_advance, + x_offset: Em::zero(), + y_offset: Em::zero(), + adjustability: Adjustability::default(), + range, + safe_to_break: true, + c: '-', + span: (Span::detached(), 0), + }); + Some(()) + }); + } + + /// Find the subslice of glyphs that represent the given text range if both + /// sides are safe to break. + fn slice_safe_to_break(&self, text_range: Range<usize>) -> Option<&[ShapedGlyph]> { + let Range { mut start, mut end } = text_range; + if !self.dir.is_positive() { + std::mem::swap(&mut start, &mut end); + } + + let left = self.find_safe_to_break(start, Side::Left)?; + let right = self.find_safe_to_break(end, Side::Right)?; + Some(&self.glyphs[left..right]) + } + + /// Find the glyph offset matching the text index that is most towards the + /// given side and safe-to-break. + fn find_safe_to_break(&self, text_index: usize, towards: Side) -> Option<usize> { + let ltr = self.dir.is_positive(); + + // Handle edge cases. + let len = self.glyphs.len(); + if text_index == self.base { + return Some(if ltr { 0 } else { len }); + } else if text_index == self.base + self.text.len() { + return Some(if ltr { len } else { 0 }); + } + + // Find any glyph with the text index. + let mut idx = self + .glyphs + .binary_search_by(|g| { + let ordering = g.range.start.cmp(&text_index); + if ltr { + ordering + } else { + ordering.reverse() + } + }) + .ok()?; + + let next = match towards { + Side::Left => usize::checked_sub, + Side::Right => usize::checked_add, + }; + + // Search for the outermost glyph with the text index. + while let Some(next) = next(idx, 1) { + if self.glyphs.get(next).map_or(true, |g| g.range.start != text_index) { + break; + } + idx = next; + } + + // RTL needs offset one because the left side of the range should be + // exclusive and the right side inclusive, contrary to the normal + // behaviour of ranges. + self.glyphs[idx].safe_to_break.then_some(idx + usize::from(!ltr)) + } +} + +impl Debug for ShapedText<'_> { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + self.text.fmt(f) + } +} + +/// Holds shaping results and metadata common to all shaped segments. +struct ShapingContext<'a, 'v> { + vt: &'a Vt<'v>, + spans: &'a SpanMapper, + glyphs: Vec<ShapedGlyph>, + used: Vec<Font>, + styles: StyleChain<'a>, + size: Abs, + variant: FontVariant, + tags: Vec<rustybuzz::Feature>, + fallback: bool, + dir: Dir, +} + +/// Shape text into [`ShapedText`]. +#[allow(clippy::too_many_arguments)] +pub fn shape<'a>( + vt: &Vt, + base: usize, + text: &'a str, + spans: &SpanMapper, + styles: StyleChain<'a>, + dir: Dir, + lang: Lang, + region: Option<Region>, +) -> ShapedText<'a> { + let size = TextElem::size_in(styles); + let mut ctx = ShapingContext { + vt, + spans, + size, + glyphs: vec![], + used: vec![], + styles, + variant: variant(styles), + tags: tags(styles), + fallback: TextElem::fallback_in(styles), + dir, + }; + + if !text.is_empty() { + shape_segment(&mut ctx, base, text, families(styles)); + } + + track_and_space(&mut ctx); + calculate_adjustability(&mut ctx, lang, region); + + #[cfg(debug_assertions)] + assert_all_glyphs_in_range(&ctx.glyphs, text, base..(base + text.len())); + #[cfg(debug_assertions)] + assert_glyph_ranges_in_order(&ctx.glyphs, dir); + + ShapedText { + base, + text, + dir, + lang, + region, + styles, + variant: ctx.variant, + size, + width: ctx.glyphs.iter().map(|g| g.x_advance).sum::<Em>().at(size), + glyphs: Cow::Owned(ctx.glyphs), + } +} + +/// Shape text with font fallback using the `families` iterator. +fn shape_segment( + ctx: &mut ShapingContext, + base: usize, + text: &str, + mut families: impl Iterator<Item = FontFamily> + Clone, +) { + // Fonts dont have newlines and tabs. + if text.chars().all(|c| c == '\n' || c == '\t') { + return; + } + + // Find the next available family. + let world = ctx.vt.world; + let book = world.book(); + let mut selection = families.find_map(|family| { + book.select(family.as_str(), ctx.variant) + .and_then(|id| world.font(id)) + .filter(|font| !ctx.used.contains(font)) + }); + + // Do font fallback if the families are exhausted and fallback is enabled. + if selection.is_none() && ctx.fallback { + let first = ctx.used.first().map(Font::info); + selection = book + .select_fallback(first, ctx.variant, text) + .and_then(|id| world.font(id)) + .filter(|font| !ctx.used.contains(font)); + } + + // Extract the font id or shape notdef glyphs if we couldn't find any font. + let Some(font) = selection else { + if let Some(font) = ctx.used.first().cloned() { + shape_tofus(ctx, base, text, font); + } + return; + }; + + ctx.used.push(font.clone()); + + // Fill the buffer with our text. + let mut buffer = UnicodeBuffer::new(); + buffer.push_str(text); + buffer.set_language(language(ctx.styles)); + buffer.set_direction(match ctx.dir { + Dir::LTR => rustybuzz::Direction::LeftToRight, + Dir::RTL => rustybuzz::Direction::RightToLeft, + _ => unimplemented!("vertical text layout"), + }); + + // Shape! + let buffer = rustybuzz::shape(font.rusty(), &ctx.tags, buffer); + let infos = buffer.glyph_infos(); + let pos = buffer.glyph_positions(); + let ltr = ctx.dir.is_positive(); + + // Collect the shaped glyphs, doing fallback and shaping parts again with + // the next font if necessary. + let mut i = 0; + while i < infos.len() { + let info = &infos[i]; + let cluster = info.cluster as usize; + + // Add the glyph to the shaped output. + if info.glyph_id != 0 { + // Determine the text range of the glyph. + let start = base + cluster; + let end = base + + if ltr { i.checked_add(1) } else { i.checked_sub(1) } + .and_then(|last| infos.get(last)) + .map_or(text.len(), |info| info.cluster as usize); + + ctx.glyphs.push(ShapedGlyph { + font: font.clone(), + glyph_id: info.glyph_id as u16, + // TODO: Don't ignore y_advance. + x_advance: font.to_em(pos[i].x_advance), + x_offset: font.to_em(pos[i].x_offset), + y_offset: font.to_em(pos[i].y_offset), + adjustability: Adjustability::default(), + range: start..end, + safe_to_break: !info.unsafe_to_break(), + c: text[cluster..].chars().next().unwrap(), + span: ctx.spans.span_at(start), + }); + } else { + // First, search for the end of the tofu sequence. + let k = i; + while infos.get(i + 1).map_or(false, |info| info.glyph_id == 0) { + i += 1; + } + + // Then, determine the start and end text index for the tofu + // sequence. + // + // Examples: + // Everything is shown in visual order. Tofus are written as "_". + // We want to find out that the tofus span the text `2..6`. + // Note that the clusters are longer than 1 char. + // + // Left-to-right: + // Text: h a l i h a l l o + // Glyphs: A _ _ C E + // Clusters: 0 2 4 6 8 + // k=1 i=2 + // + // Right-to-left: + // Text: O L L A H I L A H + // Glyphs: E C _ _ A + // Clusters: 8 6 4 2 0 + // k=2 i=3 + let start = infos[if ltr { k } else { i }].cluster as usize; + let end = if ltr { i.checked_add(1) } else { k.checked_sub(1) } + .and_then(|last| infos.get(last)) + .map_or(text.len(), |info| info.cluster as usize); + + // Trim half-baked cluster. + let remove = base + start..base + end; + while ctx.glyphs.last().map_or(false, |g| remove.contains(&g.range.start)) { + ctx.glyphs.pop(); + } + + // Recursively shape the tofu sequence with the next family. + shape_segment(ctx, base + start, &text[start..end], families.clone()); + } + + i += 1; + } + + ctx.used.pop(); +} + +/// Shape the text with tofus from the given font. +fn shape_tofus(ctx: &mut ShapingContext, base: usize, text: &str, font: Font) { + let x_advance = font.advance(0).unwrap_or_default(); + let add_glyph = |(cluster, c): (usize, char)| { + let start = base + cluster; + let end = start + c.len_utf8(); + ctx.glyphs.push(ShapedGlyph { + font: font.clone(), + glyph_id: 0, + x_advance, + x_offset: Em::zero(), + y_offset: Em::zero(), + adjustability: Adjustability::default(), + range: start..end, + safe_to_break: true, + c, + span: ctx.spans.span_at(start), + }); + }; + if ctx.dir.is_positive() { + text.char_indices().for_each(add_glyph); + } else { + text.char_indices().rev().for_each(add_glyph); + } +} + +/// Apply tracking and spacing to the shaped glyphs. +fn track_and_space(ctx: &mut ShapingContext) { + let tracking = Em::from_length(TextElem::tracking_in(ctx.styles), ctx.size); + let spacing = + TextElem::spacing_in(ctx.styles).map(|abs| Em::from_length(abs, ctx.size)); + + let mut glyphs = ctx.glyphs.iter_mut().peekable(); + while let Some(glyph) = glyphs.next() { + // Make non-breaking space same width as normal space. + if glyph.c == '\u{00A0}' { + glyph.x_advance -= nbsp_delta(&glyph.font).unwrap_or_default(); + } + + if glyph.is_space() { + glyph.x_advance = spacing.relative_to(glyph.x_advance); + } + + if glyphs + .peek() + .map_or(false, |next| glyph.range.start != next.range.start) + { + glyph.x_advance += tracking; + } + } +} + +pub fn is_gb_style(lang: Lang, region: Option<Region>) -> bool { + // Most CJK variants, including zh-CN, ja-JP, zh-SG, zh-MY use GB-style punctuation, + // while zh-HK and zh-TW use alternative style. We default to use GB-style. + !(lang == Lang::CHINESE + && matches!(region.as_ref().map(Region::as_str), Some("TW" | "HK"))) +} + +/// Calculate stretchability and shrinkability of each glyph, +/// and CJK punctuation adjustments according to Chinese Layout Requirements. +fn calculate_adjustability(ctx: &mut ShapingContext, lang: Lang, region: Option<Region>) { + let gb_style = is_gb_style(lang, region); + + for glyph in &mut ctx.glyphs { + glyph.adjustability = glyph.base_adjustability(gb_style); + } + + let mut glyphs = ctx.glyphs.iter_mut().peekable(); + while let Some(glyph) = glyphs.next() { + // Only GB style needs further adjustment. + if glyph.is_cjk_punctuation() && !gb_style { + continue; + } + + // Now we apply consecutive punctuation adjustment, specified in Chinese Layout + // Requirements, section 3.1.6.1 Punctuation Adjustment Space, and Japanese Layout + // Requirements, section 3.1 Line Composition Rules for Punctuation Marks + let Some(next) = glyphs.peek_mut() else { continue }; + let width = glyph.x_advance; + let delta = width / 2.0; + if glyph.is_cjk_punctuation() + && next.is_cjk_punctuation() + && (glyph.shrinkability().1 + next.shrinkability().0) >= delta + { + let left_delta = glyph.shrinkability().1.min(delta); + glyph.shrink_right(left_delta); + next.shrink_left(delta - left_delta); + } + } +} + +/// Difference between non-breaking and normal space. +fn nbsp_delta(font: &Font) -> Option<Em> { + let space = font.ttf().glyph_index(' ')?.0; + let nbsp = font.ttf().glyph_index('\u{00A0}')?.0; + Some(font.advance(nbsp)? - font.advance(space)?) +} + +/// Resolve the font variant. +pub fn variant(styles: StyleChain) -> FontVariant { + let mut variant = FontVariant::new( + TextElem::style_in(styles), + TextElem::weight_in(styles), + TextElem::stretch_in(styles), + ); + + let delta = TextElem::delta_in(styles); + variant.weight = variant + .weight + .thicken(delta.clamp(i16::MIN as i64, i16::MAX as i64) as i16); + + if TextElem::emph_in(styles) { + variant.style = match variant.style { + FontStyle::Normal => FontStyle::Italic, + FontStyle::Italic => FontStyle::Normal, + FontStyle::Oblique => FontStyle::Normal, + } + } + + variant +} + +/// Resolve a prioritized iterator over the font families. +pub fn families(styles: StyleChain) -> impl Iterator<Item = FontFamily> + Clone { + const FALLBACKS: &[&str] = &[ + "linux libertine", + "twitter color emoji", + "noto color emoji", + "apple color emoji", + "segoe ui emoji", + ]; + + let tail = if TextElem::fallback_in(styles) { FALLBACKS } else { &[] }; + TextElem::font_in(styles) + .into_iter() + .chain(tail.iter().copied().map(FontFamily::new)) +} + +/// Collect the tags of the OpenType features to apply. +fn tags(styles: StyleChain) -> Vec<Feature> { + let mut tags = vec![]; + let mut feat = |tag, value| { + tags.push(Feature::new(Tag::from_bytes(tag), value, ..)); + }; + + // Features that are on by default in Harfbuzz are only added if disabled. + if !TextElem::kerning_in(styles) { + feat(b"kern", 0); + } + + // Features that are off by default in Harfbuzz are only added if enabled. + if TextElem::smallcaps_in(styles) { + feat(b"smcp", 1); + } + + if TextElem::alternates_in(styles) { + feat(b"salt", 1); + } + + let storage; + if let Some(set) = TextElem::stylistic_set_in(styles) { + storage = [b's', b's', b'0' + set.get() / 10, b'0' + set.get() % 10]; + feat(&storage, 1); + } + + if !TextElem::ligatures_in(styles) { + feat(b"liga", 0); + feat(b"clig", 0); + } + + if TextElem::discretionary_ligatures_in(styles) { + feat(b"dlig", 1); + } + + if TextElem::historical_ligatures_in(styles) { + feat(b"hilg", 1); + } + + match TextElem::number_type_in(styles) { + Smart::Auto => {} + Smart::Custom(NumberType::Lining) => feat(b"lnum", 1), + Smart::Custom(NumberType::OldStyle) => feat(b"onum", 1), + } + + match TextElem::number_width_in(styles) { + Smart::Auto => {} + Smart::Custom(NumberWidth::Proportional) => feat(b"pnum", 1), + Smart::Custom(NumberWidth::Tabular) => feat(b"tnum", 1), + } + + if TextElem::slashed_zero_in(styles) { + feat(b"zero", 1); + } + + if TextElem::fractions_in(styles) { + feat(b"frac", 1); + } + + for (tag, value) in TextElem::features_in(styles).0 { + tags.push(Feature::new(tag, value, ..)) + } + + tags +} + +/// Process the language and and region of a style chain into a +/// rustybuzz-compatible BCP 47 language. +fn language(styles: StyleChain) -> rustybuzz::Language { + let mut bcp: EcoString = TextElem::lang_in(styles).as_str().into(); + if let Some(region) = TextElem::region_in(styles) { + bcp.push('-'); + bcp.push_str(region.as_str()); + } + rustybuzz::Language::from_str(&bcp).unwrap() +} + +/// Returns true if all glyphs in `glyphs` have ranges within the range `range`. +#[cfg(debug_assertions)] +fn assert_all_glyphs_in_range(glyphs: &[ShapedGlyph], text: &str, range: Range<usize>) { + if glyphs + .iter() + .any(|g| g.range.start < range.start || g.range.end > range.end) + { + panic!("one or more glyphs in {text:?} fell out of range"); + } +} + +/// Asserts that the ranges of `glyphs` is in the proper order according to `dir`. +/// +/// This asserts instead of returning a bool in order to provide a more informative message when the invariant is violated. +#[cfg(debug_assertions)] +fn assert_glyph_ranges_in_order(glyphs: &[ShapedGlyph], dir: Dir) { + if glyphs.is_empty() { + return; + } + + // Iterator::is_sorted and friends are unstable as of Rust 1.70.0 + for i in 0..(glyphs.len() - 1) { + let a = &glyphs[i]; + let b = &glyphs[i + 1]; + let ord = a.range.start.cmp(&b.range.start); + let ord = if dir.is_positive() { ord } else { ord.reverse() }; + if ord == std::cmp::Ordering::Greater { + panic!( + "glyph ranges should be monotonically {}, \ + but found glyphs out of order:\n\n\ + first: {a:#?}\nsecond: {b:#?}", + if dir.is_positive() { "increasing" } else { "decreasing" }, + ); + } + } +} diff --git a/crates/typst-library/src/text/shift.rs b/crates/typst-library/src/text/shift.rs new file mode 100644 index 00000000..65e309e1 --- /dev/null +++ b/crates/typst-library/src/text/shift.rs @@ -0,0 +1,229 @@ +use super::{variant, SpaceElem, TextElem, TextSize}; +use crate::prelude::*; + +/// Renders text in subscript. +/// +/// The text is rendered smaller and its baseline is lowered. +/// +/// ## Example { #example } +/// ```example +/// Revenue#sub[yearly] +/// ``` +/// +/// Display: Subscript +/// Category: text +#[element(Show)] +pub struct SubElem { + /// Whether to prefer the dedicated subscript characters of the font. + /// + /// If this is enabled, Typst first tries to transform the text to subscript + /// codepoints. If that fails, it falls back to rendering lowered and shrunk + /// normal letters. + /// + /// ```example + /// N#sub(typographic: true)[1] + /// N#sub(typographic: false)[1] + /// ``` + #[default(true)] + pub typographic: bool, + + /// The baseline shift for synthetic subscripts. Does not apply if + /// `typographic` is true and the font has subscript codepoints for the + /// given `body`. + #[default(Em::new(0.2).into())] + pub baseline: Length, + + /// The font size for synthetic subscripts. Does not apply if + /// `typographic` is true and the font has subscript codepoints for the + /// given `body`. + #[default(TextSize(Em::new(0.6).into()))] + pub size: TextSize, + + /// The text to display in subscript. + #[required] + pub body: Content, +} + +impl Show for SubElem { + #[tracing::instrument(name = "SubElem::show", skip_all)] + fn show(&self, vt: &mut Vt, styles: StyleChain) -> SourceResult<Content> { + let body = self.body(); + let mut transformed = None; + if self.typographic(styles) { + if let Some(text) = search_text(&body, true) { + if is_shapable(vt, &text, styles) { + transformed = Some(TextElem::packed(text)); + } + } + }; + + Ok(transformed.unwrap_or_else(|| { + body.styled(TextElem::set_baseline(self.baseline(styles))) + .styled(TextElem::set_size(self.size(styles))) + })) + } +} + +/// Renders text in superscript. +/// +/// The text is rendered smaller and its baseline is raised. +/// +/// ## Example { #example } +/// ```example +/// 1#super[st] try! +/// ``` +/// +/// Display: Superscript +/// Category: text +#[element(Show)] +pub struct SuperElem { + /// Whether to prefer the dedicated superscript characters of the font. + /// + /// If this is enabled, Typst first tries to transform the text to + /// superscript codepoints. If that fails, it falls back to rendering + /// raised and shrunk normal letters. + /// + /// ```example + /// N#super(typographic: true)[1] + /// N#super(typographic: false)[1] + /// ``` + #[default(true)] + pub typographic: bool, + + /// The baseline shift for synthetic superscripts. Does not apply if + /// `typographic` is true and the font has superscript codepoints for the + /// given `body`. + #[default(Em::new(-0.5).into())] + pub baseline: Length, + + /// The font size for synthetic superscripts. Does not apply if + /// `typographic` is true and the font has superscript codepoints for the + /// given `body`. + #[default(TextSize(Em::new(0.6).into()))] + pub size: TextSize, + + /// The text to display in superscript. + #[required] + pub body: Content, +} + +impl Show for SuperElem { + #[tracing::instrument(name = "SuperElem::show", skip_all)] + fn show(&self, vt: &mut Vt, styles: StyleChain) -> SourceResult<Content> { + let body = self.body(); + let mut transformed = None; + if self.typographic(styles) { + if let Some(text) = search_text(&body, false) { + if is_shapable(vt, &text, styles) { + transformed = Some(TextElem::packed(text)); + } + } + }; + + Ok(transformed.unwrap_or_else(|| { + body.styled(TextElem::set_baseline(self.baseline(styles))) + .styled(TextElem::set_size(self.size(styles))) + })) + } +} + +/// Find and transform the text contained in `content` to the given script kind +/// if and only if it only consists of `Text`, `Space`, and `Empty` leafs. +fn search_text(content: &Content, sub: bool) -> Option<EcoString> { + if content.is::<SpaceElem>() { + Some(' '.into()) + } else if let Some(elem) = content.to::<TextElem>() { + convert_script(&elem.text(), sub) + } else if let Some(children) = content.to_sequence() { + let mut full = EcoString::new(); + for item in children { + match search_text(item, sub) { + Some(text) => full.push_str(&text), + None => return None, + } + } + Some(full) + } else { + None + } +} + +/// Checks whether the first retrievable family contains all code points of the +/// given string. +fn is_shapable(vt: &Vt, text: &str, styles: StyleChain) -> bool { + let world = vt.world; + for family in TextElem::font_in(styles) { + if let Some(font) = world + .book() + .select(family.as_str(), variant(styles)) + .and_then(|id| world.font(id)) + { + return text.chars().all(|c| font.ttf().glyph_index(c).is_some()); + } + } + + false +} + +/// Convert a string to sub- or superscript codepoints if all characters +/// can be mapped to such a codepoint. +fn convert_script(text: &str, sub: bool) -> Option<EcoString> { + let mut result = EcoString::with_capacity(text.len()); + let converter = if sub { to_subscript_codepoint } else { to_superscript_codepoint }; + + for c in text.chars() { + match converter(c) { + Some(c) => result.push(c), + None => return None, + } + } + + Some(result) +} + +/// Convert a character to its corresponding Unicode superscript. +fn to_superscript_codepoint(c: char) -> Option<char> { + char::from_u32(match c { + '0' => 0x2070, + '1' => 0x00B9, + '2' => 0x00B2, + '3' => 0x00B3, + '4'..='9' => 0x2070 + (c as u32 + 4 - '4' as u32), + '+' => 0x207A, + '-' => 0x207B, + '=' => 0x207C, + '(' => 0x207D, + ')' => 0x207E, + 'n' => 0x207F, + 'i' => 0x2071, + ' ' => 0x0020, + _ => return None, + }) +} + +/// Convert a character to its corresponding Unicode subscript. +fn to_subscript_codepoint(c: char) -> Option<char> { + char::from_u32(match c { + '0' => 0x2080, + '1'..='9' => 0x2080 + (c as u32 - '0' as u32), + '+' => 0x208A, + '-' => 0x208B, + '=' => 0x208C, + '(' => 0x208D, + ')' => 0x208E, + 'a' => 0x2090, + 'e' => 0x2091, + 'o' => 0x2092, + 'x' => 0x2093, + 'h' => 0x2095, + 'k' => 0x2096, + 'l' => 0x2097, + 'm' => 0x2098, + 'n' => 0x2099, + 'p' => 0x209A, + 's' => 0x209B, + 't' => 0x209C, + ' ' => 0x0020, + _ => return None, + }) +} |
