summaryrefslogtreecommitdiff
path: root/library/src/text
diff options
context:
space:
mode:
Diffstat (limited to 'library/src/text')
-rw-r--r--library/src/text/deco.rs248
-rw-r--r--library/src/text/link.rs102
-rw-r--r--library/src/text/mod.rs555
-rw-r--r--library/src/text/par.rs1259
-rw-r--r--library/src/text/quotes.rs149
-rw-r--r--library/src/text/raw.rs206
-rw-r--r--library/src/text/shaping.rs655
-rw-r--r--library/src/text/shift.rs188
8 files changed, 3362 insertions, 0 deletions
diff --git a/library/src/text/deco.rs b/library/src/text/deco.rs
new file mode 100644
index 00000000..cd3acef5
--- /dev/null
+++ b/library/src/text/deco.rs
@@ -0,0 +1,248 @@
+use kurbo::{BezPath, Line, ParamCurve};
+use ttf_parser::{GlyphId, OutlineBuilder};
+
+use super::TextNode;
+use crate::prelude::*;
+
+/// Typeset underline, stricken-through or overlined text.
+#[derive(Debug, Hash)]
+pub struct DecoNode<const L: DecoLine>(pub Content);
+
+/// Typeset underlined text.
+pub type UnderlineNode = DecoNode<UNDERLINE>;
+
+/// Typeset stricken-through text.
+pub type StrikethroughNode = DecoNode<STRIKETHROUGH>;
+
+/// Typeset overlined text.
+pub type OverlineNode = DecoNode<OVERLINE>;
+
+#[node(Show)]
+impl<const L: DecoLine> DecoNode<L> {
+ /// How to stroke the line. The text color and thickness are read from the
+ /// font tables if `auto`.
+ #[property(shorthand, resolve, fold)]
+ pub const STROKE: Smart<PartialStroke> = Smart::Auto;
+ /// Position of the line relative to the baseline, read from the font tables
+ /// if `auto`.
+ #[property(resolve)]
+ pub const OFFSET: Smart<Length> = Smart::Auto;
+ /// Amount that the line will be longer or shorter than its associated text.
+ #[property(resolve)]
+ pub const EXTENT: Length = Length::zero();
+ /// Whether the line skips sections in which it would collide
+ /// with the glyphs. Does not apply to strikethrough.
+ pub const EVADE: bool = true;
+
+ fn construct(_: &mut Vm, args: &mut Args) -> SourceResult<Content> {
+ Ok(Self(args.expect("body")?).pack())
+ }
+}
+
+impl<const L: DecoLine> Show for DecoNode<L> {
+ fn unguard_parts(&self, sel: Selector) -> Content {
+ Self(self.0.unguard(sel)).pack()
+ }
+
+ fn field(&self, name: &str) -> Option<Value> {
+ match name {
+ "body" => Some(Value::Content(self.0.clone())),
+ _ => None,
+ }
+ }
+
+ fn realize(
+ &self,
+ _: Tracked<dyn World>,
+ styles: StyleChain,
+ ) -> SourceResult<Content> {
+ Ok(self.0.clone().styled(TextNode::DECO, Decoration {
+ line: L,
+ stroke: styles.get(Self::STROKE).unwrap_or_default(),
+ offset: styles.get(Self::OFFSET),
+ extent: styles.get(Self::EXTENT),
+ evade: styles.get(Self::EVADE),
+ }))
+ }
+}
+
+/// Defines a line that is positioned over, under or on top of text.
+///
+/// For more details, see [`DecoNode`].
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+pub struct Decoration {
+ pub line: DecoLine,
+ pub stroke: PartialStroke<Abs>,
+ pub offset: Smart<Abs>,
+ pub extent: Abs,
+ pub evade: bool,
+}
+
+/// A kind of decorative line.
+pub type DecoLine = usize;
+
+/// A line under text.
+pub const UNDERLINE: DecoLine = 0;
+
+/// A line through text.
+pub const STRIKETHROUGH: DecoLine = 1;
+
+/// A line over text.
+pub const OVERLINE: DecoLine = 2;
+
+/// Add line decorations to a single run of shaped text.
+pub fn decorate(
+ frame: &mut Frame,
+ deco: &Decoration,
+ text: &Text,
+ shift: Abs,
+ pos: Point,
+ width: Abs,
+) {
+ let font_metrics = text.font.metrics();
+ let metrics = match deco.line {
+ STRIKETHROUGH => font_metrics.strikethrough,
+ OVERLINE => font_metrics.overline,
+ UNDERLINE | _ => font_metrics.underline,
+ };
+
+ let evade = deco.evade && deco.line != STRIKETHROUGH;
+ let offset = deco.offset.unwrap_or(-metrics.position.at(text.size)) - shift;
+ let stroke = deco.stroke.unwrap_or(Stroke {
+ paint: text.fill,
+ thickness: metrics.thickness.at(text.size),
+ });
+
+ let gap_padding = 0.08 * text.size;
+ let min_width = 0.162 * text.size;
+
+ let mut start = pos.x - deco.extent;
+ let end = pos.x + (width + 2.0 * deco.extent);
+
+ let mut push_segment = |from: Abs, to: Abs| {
+ let origin = Point::new(from, pos.y + offset);
+ let target = Point::new(to - from, Abs::zero());
+
+ if target.x >= min_width || !evade {
+ let shape = Geometry::Line(target).stroked(stroke);
+ frame.push(origin, Element::Shape(shape));
+ }
+ };
+
+ if !evade {
+ push_segment(start, end);
+ return;
+ }
+
+ let line = Line::new(
+ kurbo::Point::new(pos.x.to_raw(), offset.to_raw()),
+ kurbo::Point::new((pos.x + width).to_raw(), offset.to_raw()),
+ );
+
+ let mut x = pos.x;
+ let mut intersections = vec![];
+
+ for glyph in text.glyphs.iter() {
+ let dx = glyph.x_offset.at(text.size) + x;
+ let mut builder =
+ BezPathBuilder::new(font_metrics.units_per_em, text.size, dx.to_raw());
+
+ let bbox = text.font.ttf().outline_glyph(GlyphId(glyph.id), &mut builder);
+ let path = builder.finish();
+
+ x += glyph.x_advance.at(text.size);
+
+ // Only do the costly segments intersection test if the line
+ // intersects the bounding box.
+ if bbox.map_or(false, |bbox| {
+ let y_min = -text.font.to_em(bbox.y_max).at(text.size);
+ let y_max = -text.font.to_em(bbox.y_min).at(text.size);
+ offset >= y_min && offset <= y_max
+ }) {
+ // Find all intersections of segments with the line.
+ intersections.extend(
+ path.segments()
+ .flat_map(|seg| seg.intersect_line(line))
+ .map(|is| Abs::raw(line.eval(is.line_t).x)),
+ );
+ }
+ }
+
+ // When emitting the decorative line segments, we move from left to
+ // right. The intersections are not necessarily in this order, yet.
+ intersections.sort();
+
+ for gap in intersections.chunks_exact(2) {
+ let l = gap[0] - gap_padding;
+ let r = gap[1] + gap_padding;
+
+ if start >= end {
+ break;
+ }
+
+ if start >= l {
+ start = r;
+ continue;
+ }
+
+ push_segment(start, l);
+ start = r;
+ }
+
+ if start < end {
+ push_segment(start, end);
+ }
+}
+
+/// Builds a kurbo [`BezPath`] for a glyph.
+struct BezPathBuilder {
+ path: BezPath,
+ units_per_em: f64,
+ font_size: Abs,
+ x_offset: f64,
+}
+
+impl BezPathBuilder {
+ fn new(units_per_em: f64, font_size: Abs, x_offset: f64) -> Self {
+ Self {
+ path: BezPath::new(),
+ units_per_em,
+ font_size,
+ x_offset,
+ }
+ }
+
+ fn finish(self) -> BezPath {
+ self.path
+ }
+
+ fn p(&self, x: f32, y: f32) -> kurbo::Point {
+ kurbo::Point::new(self.s(x) + self.x_offset, -self.s(y))
+ }
+
+ fn s(&self, v: f32) -> f64 {
+ Em::from_units(v, self.units_per_em).at(self.font_size).to_raw()
+ }
+}
+
+impl OutlineBuilder for BezPathBuilder {
+ fn move_to(&mut self, x: f32, y: f32) {
+ self.path.move_to(self.p(x, y));
+ }
+
+ fn line_to(&mut self, x: f32, y: f32) {
+ self.path.line_to(self.p(x, y));
+ }
+
+ fn quad_to(&mut self, x1: f32, y1: f32, x: f32, y: f32) {
+ self.path.quad_to(self.p(x1, y1), self.p(x, y));
+ }
+
+ fn curve_to(&mut self, x1: f32, y1: f32, x2: f32, y2: f32, x: f32, y: f32) {
+ self.path.curve_to(self.p(x1, y1), self.p(x2, y2), self.p(x, y));
+ }
+
+ fn close(&mut self) {
+ self.path.close_path();
+ }
+}
diff --git a/library/src/text/link.rs b/library/src/text/link.rs
new file mode 100644
index 00000000..82abe5cd
--- /dev/null
+++ b/library/src/text/link.rs
@@ -0,0 +1,102 @@
+use super::TextNode;
+use crate::prelude::*;
+
+/// Link text and other elements to a destination.
+#[derive(Debug, Hash)]
+pub struct LinkNode {
+ /// The destination the link points to.
+ pub dest: Destination,
+ /// How the link is represented.
+ pub body: Option<Content>,
+}
+
+impl LinkNode {
+ /// Create a link node from a URL with its bare text.
+ pub fn from_url(url: EcoString) -> Self {
+ Self { dest: Destination::Url(url), body: None }
+ }
+}
+
+#[node(Show)]
+impl LinkNode {
+ /// The fill color of text in the link. Just the surrounding text color
+ /// if `auto`.
+ pub const FILL: Smart<Paint> = Smart::Auto;
+ /// Whether to underline the link.
+ pub const UNDERLINE: Smart<bool> = Smart::Auto;
+
+ fn construct(_: &mut Vm, args: &mut Args) -> SourceResult<Content> {
+ let dest = args.expect::<Destination>("destination")?;
+ let body = match dest {
+ Destination::Url(_) => args.eat()?,
+ Destination::Internal(_) => Some(args.expect("body")?),
+ };
+ Ok(Self { dest, body }.pack())
+ }
+}
+
+impl Show for LinkNode {
+ fn unguard_parts(&self, sel: Selector) -> Content {
+ Self {
+ dest: self.dest.clone(),
+ body: self.body.as_ref().map(|body| body.unguard(sel)),
+ }
+ .pack()
+ }
+
+ fn field(&self, name: &str) -> Option<Value> {
+ match name {
+ "url" => Some(match &self.dest {
+ Destination::Url(url) => Value::Str(url.clone().into()),
+ Destination::Internal(loc) => Value::Dict(loc.encode()),
+ }),
+ "body" => Some(match &self.body {
+ Some(body) => Value::Content(body.clone()),
+ None => Value::None,
+ }),
+ _ => None,
+ }
+ }
+
+ fn realize(&self, _: Tracked<dyn World>, _: StyleChain) -> SourceResult<Content> {
+ Ok(self
+ .body
+ .clone()
+ .unwrap_or_else(|| match &self.dest {
+ Destination::Url(url) => {
+ let mut text = url.as_str();
+ for prefix in ["mailto:", "tel:"] {
+ text = text.trim_start_matches(prefix);
+ }
+ let shorter = text.len() < url.len();
+ TextNode(if shorter { text.into() } else { url.clone() }).pack()
+ }
+ Destination::Internal(_) => Content::empty(),
+ })
+ .styled(TextNode::LINK, Some(self.dest.clone())))
+ }
+
+ fn finalize(
+ &self,
+ _: Tracked<dyn World>,
+ styles: StyleChain,
+ mut realized: Content,
+ ) -> SourceResult<Content> {
+ let mut map = StyleMap::new();
+ if let Smart::Custom(fill) = styles.get(Self::FILL) {
+ map.set(TextNode::FILL, fill);
+ }
+
+ if match styles.get(Self::UNDERLINE) {
+ Smart::Auto => match &self.dest {
+ Destination::Url(_) => true,
+ Destination::Internal(_) => false,
+ },
+ Smart::Custom(underline) => underline,
+ } {
+ realized = realized.underlined();
+ }
+
+ Ok(realized.styled_with_map(map))
+ }
+}
diff --git a/library/src/text/mod.rs b/library/src/text/mod.rs
new file mode 100644
index 00000000..d793f614
--- /dev/null
+++ b/library/src/text/mod.rs
@@ -0,0 +1,555 @@
+//! Text handling and paragraph layout.
+
+mod deco;
+mod link;
+mod par;
+mod quotes;
+mod raw;
+mod shaping;
+mod shift;
+
+pub use deco::*;
+pub use link::*;
+pub use par::*;
+pub use quotes::*;
+pub use raw::*;
+pub use shaping::*;
+pub use shift::*;
+
+use std::borrow::Cow;
+
+use rustybuzz::Tag;
+use typst::font::{FontMetrics, FontStretch, FontStyle, FontWeight, VerticalFontMetric};
+use typst::util::EcoString;
+
+use crate::prelude::*;
+
+/// A single run of text with the same style.
+#[derive(Debug, Clone, Hash)]
+pub struct TextNode(pub EcoString);
+
+#[node]
+impl TextNode {
+ /// A prioritized sequence of font families.
+ #[property(skip, referenced)]
+ pub const FAMILY: FallbackList = FallbackList(vec![FontFamily::new("IBM Plex Sans")]);
+ /// Whether to allow font fallback when the primary font list contains no
+ /// match.
+ pub const FALLBACK: bool = true;
+
+ /// How the font is styled.
+ pub const STYLE: FontStyle = FontStyle::Normal;
+ /// The boldness / thickness of the font's glyphs.
+ pub const WEIGHT: FontWeight = FontWeight::REGULAR;
+ /// The width of the glyphs.
+ pub const STRETCH: FontStretch = FontStretch::NORMAL;
+
+ /// The size of the glyphs.
+ #[property(shorthand, fold)]
+ pub const SIZE: TextSize = Abs::pt(11.0);
+ /// The glyph fill color.
+ #[property(shorthand)]
+ pub const FILL: Paint = Color::BLACK.into();
+ /// The amount of space that should be added between characters.
+ #[property(resolve)]
+ pub const TRACKING: Length = Length::zero();
+ /// The width of spaces relative to the font's space width.
+ #[property(resolve)]
+ pub const SPACING: Rel<Length> = Rel::one();
+ /// The offset of the baseline.
+ #[property(resolve)]
+ pub const BASELINE: Length = Length::zero();
+ /// Whether certain glyphs can hang over into the margin.
+ pub const OVERHANG: bool = true;
+ /// The top end of the text bounding box.
+ pub const TOP_EDGE: TextEdge = TextEdge::Metric(VerticalFontMetric::CapHeight);
+ /// The bottom end of the text bounding box.
+ pub const BOTTOM_EDGE: TextEdge = TextEdge::Metric(VerticalFontMetric::Baseline);
+
+ /// An ISO 639-1/2/3 language code.
+ pub const LANG: Lang = Lang::ENGLISH;
+ /// An ISO 3166-1 alpha-2 region code.
+ pub const REGION: Option<Region> = None;
+ /// The direction for text and inline objects. When `auto`, the direction is
+ /// automatically inferred from the language.
+ #[property(resolve)]
+ pub const DIR: HorizontalDir = HorizontalDir(Smart::Auto);
+ /// Whether to hyphenate text to improve line breaking. When `auto`, words
+ /// will will be hyphenated if and only if justification is enabled.
+ #[property(resolve)]
+ pub const HYPHENATE: Hyphenate = Hyphenate(Smart::Auto);
+ /// Whether to apply smart quotes.
+ pub const SMART_QUOTES: bool = true;
+
+ /// Whether to apply kerning ("kern").
+ pub const KERNING: bool = true;
+ /// Whether to apply stylistic alternates. ("salt")
+ pub const ALTERNATES: bool = false;
+ /// Which stylistic set to apply. ("ss01" - "ss20")
+ pub const STYLISTIC_SET: Option<StylisticSet> = None;
+ /// Whether standard ligatures are active. ("liga", "clig")
+ pub const LIGATURES: bool = true;
+ /// Whether ligatures that should be used sparingly are active. ("dlig")
+ pub const DISCRETIONARY_LIGATURES: bool = false;
+ /// Whether historical ligatures are active. ("hlig")
+ pub const HISTORICAL_LIGATURES: bool = false;
+ /// Which kind of numbers / figures to select.
+ pub const NUMBER_TYPE: Smart<NumberType> = Smart::Auto;
+ /// The width of numbers / figures.
+ pub const NUMBER_WIDTH: Smart<NumberWidth> = Smart::Auto;
+ /// Whether to have a slash through the zero glyph. ("zero")
+ pub const SLASHED_ZERO: bool = false;
+ /// Whether to convert fractions. ("frac")
+ pub const FRACTIONS: bool = false;
+ /// Raw OpenType features to apply.
+ #[property(fold)]
+ pub const FEATURES: FontFeatures = FontFeatures(vec![]);
+
+ /// Whether the font weight should be increased by 300.
+ #[property(skip, fold)]
+ pub const BOLD: Toggle = false;
+ /// Whether the font style should be inverted.
+ #[property(skip, fold)]
+ pub const ITALIC: Toggle = false;
+ /// A case transformation that should be applied to the text.
+ #[property(skip)]
+ pub const CASE: Option<Case> = None;
+ /// Whether small capital glyphs should be used. ("smcp")
+ #[property(skip)]
+ pub const SMALLCAPS: bool = false;
+ /// A destination the text should be linked to.
+ #[property(skip, referenced)]
+ pub const LINK: Option<Destination> = None;
+ /// Decorative lines.
+ #[property(skip, fold)]
+ pub const DECO: Decoration = vec![];
+
+ fn construct(_: &mut Vm, args: &mut Args) -> SourceResult<Content> {
+ // The text constructor is special: It doesn't create a text node.
+ // Instead, it leaves the passed argument structurally unchanged, but
+ // styles all text in it.
+ args.expect("body")
+ }
+
+ fn set(...) {
+ if let Some(family) = args.named("family")? {
+ styles.set(Self::FAMILY, family);
+ } else {
+ let mut count = 0;
+ let mut content = false;
+ for item in args.items.iter().filter(|item| item.name.is_none()) {
+ if EcoString::is(&item.value) {
+ count += 1;
+ } else if <Content as Cast<Spanned<Value>>>::is(&item.value) {
+ content = true;
+ }
+ }
+
+ // Skip the final string if it's needed as the body.
+ if constructor && !content && count > 0 {
+ count -= 1;
+ }
+
+ if count > 0 {
+ let mut list = Vec::with_capacity(count);
+ for _ in 0 .. count {
+ list.push(args.find()?.unwrap());
+ }
+
+ styles.set(Self::FAMILY, FallbackList(list));
+ }
+ }
+ }
+}
+
+/// A lowercased font family like "arial".
+#[derive(Clone, Eq, PartialEq, Hash)]
+pub struct FontFamily(EcoString);
+
+impl FontFamily {
+ /// Create a named font family variant.
+ pub fn new(string: &str) -> Self {
+ Self(string.to_lowercase().into())
+ }
+
+ /// The lowercased family name.
+ pub fn as_str(&self) -> &str {
+ &self.0
+ }
+}
+
+impl Debug for FontFamily {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ self.0.fmt(f)
+ }
+}
+
+castable! {
+ FontFamily,
+ Expected: "string",
+ Value::Str(string) => Self::new(&string),
+}
+
+/// Font family fallback list.
+#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)]
+pub struct FallbackList(pub Vec<FontFamily>);
+
+castable! {
+ FallbackList,
+ Expected: "string or array of strings",
+ Value::Str(string) => Self(vec![FontFamily::new(&string)]),
+ Value::Array(values) => Self(values
+ .into_iter()
+ .filter_map(|v| v.cast().ok())
+ .map(|string: EcoString| FontFamily::new(&string))
+ .collect()),
+}
+
+/// The size of text.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub struct TextSize(pub Length);
+
+impl Fold for TextSize {
+ type Output = Abs;
+
+ fn fold(self, outer: Self::Output) -> Self::Output {
+ self.0.em.at(outer) + self.0.abs
+ }
+}
+
+castable!(TextSize: Length);
+
+/// Specifies the bottom or top edge of text.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub enum TextEdge {
+ /// An edge specified using one of the well-known font metrics.
+ Metric(VerticalFontMetric),
+ /// An edge specified as a length.
+ Length(Length),
+}
+
+impl TextEdge {
+ /// Resolve the value of the text edge given a font's metrics.
+ pub fn resolve(self, styles: StyleChain, metrics: &FontMetrics) -> Abs {
+ match self {
+ Self::Metric(metric) => metrics.vertical(metric).resolve(styles),
+ Self::Length(length) => length.resolve(styles),
+ }
+ }
+}
+
+castable! {
+ TextEdge,
+ Expected: "string or length",
+ Value::Length(v) => Self::Length(v),
+ Value::Str(string) => Self::Metric(match string.as_str() {
+ "ascender" => VerticalFontMetric::Ascender,
+ "cap-height" => VerticalFontMetric::CapHeight,
+ "x-height" => VerticalFontMetric::XHeight,
+ "baseline" => VerticalFontMetric::Baseline,
+ "descender" => VerticalFontMetric::Descender,
+ _ => Err("unknown font metric")?,
+ }),
+}
+
+/// The direction of text and inline objects in their line.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub struct HorizontalDir(pub Smart<Dir>);
+
+castable! {
+ HorizontalDir,
+ Expected: "direction or auto",
+ Value::Auto => Self(Smart::Auto),
+ @dir: Dir => match dir.axis() {
+ Axis::X => Self(Smart::Custom(*dir)),
+ Axis::Y => Err("must be horizontal")?,
+ },
+}
+
+impl Resolve for HorizontalDir {
+ type Output = Dir;
+
+ fn resolve(self, styles: StyleChain) -> Self::Output {
+ match self.0 {
+ Smart::Auto => styles.get(TextNode::LANG).dir(),
+ Smart::Custom(dir) => dir,
+ }
+ }
+}
+
+/// Whether to hyphenate text.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub struct Hyphenate(pub Smart<bool>);
+
+castable! {
+ Hyphenate,
+ Expected: "boolean or auto",
+ Value::Auto => Self(Smart::Auto),
+ Value::Bool(v) => Self(Smart::Custom(v)),
+}
+
+impl Resolve for Hyphenate {
+ type Output = bool;
+
+ fn resolve(self, styles: StyleChain) -> Self::Output {
+ match self.0 {
+ Smart::Auto => styles.get(ParNode::JUSTIFY),
+ Smart::Custom(v) => v,
+ }
+ }
+}
+
+/// A stylistic set in a font.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub struct StylisticSet(u8);
+
+impl StylisticSet {
+ /// Create a new set, clamping to 1-20.
+ pub fn new(index: u8) -> Self {
+ Self(index.clamp(1, 20))
+ }
+
+ /// Get the value, guaranteed to be 1-20.
+ pub fn get(self) -> u8 {
+ self.0
+ }
+}
+
+castable! {
+ StylisticSet,
+ Expected: "integer",
+ Value::Int(v) => match v {
+ 1 ..= 20 => Self::new(v as u8),
+ _ => Err("must be between 1 and 20")?,
+ },
+}
+
+/// Which kind of numbers / figures to select.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub enum NumberType {
+ /// Numbers that fit well with capital text. ("lnum")
+ Lining,
+ /// Numbers that fit well into a flow of upper- and lowercase text. ("onum")
+ OldStyle,
+}
+
+castable! {
+ NumberType,
+ Expected: "string",
+ Value::Str(string) => match string.as_str() {
+ "lining" => Self::Lining,
+ "old-style" => Self::OldStyle,
+ _ => Err(r#"expected "lining" or "old-style""#)?,
+ },
+}
+
+/// The width of numbers / figures.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub enum NumberWidth {
+ /// Number widths are glyph specific. ("pnum")
+ Proportional,
+ /// All numbers are of equal width / monospaced. ("tnum")
+ Tabular,
+}
+
+castable! {
+ NumberWidth,
+ Expected: "string",
+ Value::Str(string) => match string.as_str() {
+ "proportional" => Self::Proportional,
+ "tabular" => Self::Tabular,
+ _ => Err(r#"expected "proportional" or "tabular""#)?,
+ },
+}
+
+/// OpenType font features settings.
+#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)]
+pub struct FontFeatures(pub Vec<(Tag, u32)>);
+
+castable! {
+ FontFeatures,
+ Expected: "array of strings or dictionary mapping tags to integers",
+ Value::Array(values) => Self(values
+ .into_iter()
+ .filter_map(|v| v.cast().ok())
+ .map(|string: EcoString| (Tag::from_bytes_lossy(string.as_bytes()), 1))
+ .collect()),
+ Value::Dict(values) => Self(values
+ .into_iter()
+ .filter_map(|(k, v)| {
+ let tag = Tag::from_bytes_lossy(k.as_bytes());
+ let num = v.cast::<i64>().ok()?.try_into().ok()?;
+ Some((tag, num))
+ })
+ .collect()),
+}
+
+impl Fold for FontFeatures {
+ type Output = Self;
+
+ fn fold(mut self, outer: Self::Output) -> Self::Output {
+ self.0.extend(outer.0);
+ self
+ }
+}
+
+/// A text space.
+#[derive(Debug, Clone, Hash)]
+pub struct SpaceNode;
+
+#[node]
+impl SpaceNode {
+ fn construct(_: &mut Vm, _: &mut Args) -> SourceResult<Content> {
+ Ok(Self.pack())
+ }
+}
+
+/// A line break.
+#[derive(Debug, Clone, Hash)]
+pub struct LinebreakNode {
+ pub justify: bool,
+}
+
+#[node]
+impl LinebreakNode {
+ fn construct(_: &mut Vm, args: &mut Args) -> SourceResult<Content> {
+ let justify = args.named("justify")?.unwrap_or(false);
+ Ok(Self { justify }.pack())
+ }
+}
+
+/// A smart quote.
+#[derive(Debug, Clone, Hash)]
+pub struct SmartQuoteNode {
+ pub double: bool,
+}
+
+#[node]
+impl SmartQuoteNode {
+ fn construct(_: &mut Vm, args: &mut Args) -> SourceResult<Content> {
+ let double = args.named("double")?.unwrap_or(true);
+ Ok(Self { double }.pack())
+ }
+}
+
+/// Convert a string or content to lowercase.
+pub fn lower(_: &mut Vm, args: &mut Args) -> SourceResult<Value> {
+ case(Case::Lower, args)
+}
+
+/// Convert a string or content to uppercase.
+pub fn upper(_: &mut Vm, args: &mut Args) -> SourceResult<Value> {
+ case(Case::Upper, args)
+}
+
+/// Change the case of text.
+fn case(case: Case, args: &mut Args) -> SourceResult<Value> {
+ let Spanned { v, span } = args.expect("string or content")?;
+ Ok(match v {
+ Value::Str(v) => Value::Str(case.apply(&v).into()),
+ Value::Content(v) => Value::Content(v.styled(TextNode::CASE, Some(case))),
+ v => bail!(span, "expected string or content, found {}", v.type_name()),
+ })
+}
+
+/// A case transformation on text.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub enum Case {
+ /// Everything is uppercased.
+ Upper,
+ /// Everything is lowercased.
+ Lower,
+}
+
+impl Case {
+ /// Apply the case to a string.
+ pub fn apply(self, text: &str) -> String {
+ match self {
+ Self::Upper => text.to_uppercase(),
+ Self::Lower => text.to_lowercase(),
+ }
+ }
+}
+
+/// Display text in small capitals.
+pub fn smallcaps(_: &mut Vm, args: &mut Args) -> SourceResult<Value> {
+ let body: Content = args.expect("content")?;
+ Ok(Value::Content(body.styled(TextNode::SMALLCAPS, true)))
+}
+
+/// Strong content, rendered in boldface by default.
+#[derive(Debug, Hash)]
+pub struct StrongNode(pub Content);
+
+#[node(Show)]
+impl StrongNode {
+ fn construct(_: &mut Vm, args: &mut Args) -> SourceResult<Content> {
+ Ok(Self(args.expect("body")?).pack())
+ }
+}
+
+impl Show for StrongNode {
+ fn unguard_parts(&self, sel: Selector) -> Content {
+ Self(self.0.unguard(sel)).pack()
+ }
+
+ fn field(&self, name: &str) -> Option<Value> {
+ match name {
+ "body" => Some(Value::Content(self.0.clone())),
+ _ => None,
+ }
+ }
+
+ fn realize(&self, _: Tracked<dyn World>, _: StyleChain) -> SourceResult<Content> {
+ Ok(self.0.clone().styled(TextNode::BOLD, Toggle))
+ }
+}
+
+/// Emphasized content, rendered with an italic font by default.
+#[derive(Debug, Hash)]
+pub struct EmphNode(pub Content);
+
+#[node(Show)]
+impl EmphNode {
+ fn construct(_: &mut Vm, args: &mut Args) -> SourceResult<Content> {
+ Ok(Self(args.expect("body")?).pack())
+ }
+}
+
+impl Show for EmphNode {
+ fn unguard_parts(&self, sel: Selector) -> Content {
+ Self(self.0.unguard(sel)).pack()
+ }
+
+ fn field(&self, name: &str) -> Option<Value> {
+ match name {
+ "body" => Some(Value::Content(self.0.clone())),
+ _ => None,
+ }
+ }
+
+ fn realize(&self, _: Tracked<dyn World>, _: StyleChain) -> SourceResult<Content> {
+ Ok(self.0.clone().styled(TextNode::ITALIC, Toggle))
+ }
+}
+
+/// A toggle that turns on and off alternatingly if folded.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub struct Toggle;
+
+impl Fold for Toggle {
+ type Output = bool;
+
+ fn fold(self, outer: Self::Output) -> Self::Output {
+ !outer
+ }
+}
+
+impl Fold for Decoration {
+ type Output = Vec<Self>;
+
+ fn fold(self, mut outer: Self::Output) -> Self::Output {
+ outer.insert(0, self);
+ outer
+ }
+}
diff --git a/library/src/text/par.rs b/library/src/text/par.rs
new file mode 100644
index 00000000..95371e1a
--- /dev/null
+++ b/library/src/text/par.rs
@@ -0,0 +1,1259 @@
+use std::cmp::Ordering;
+
+use typst::util::EcoString;
+use unicode_bidi::{BidiInfo, Level as BidiLevel};
+use unicode_script::{Script, UnicodeScript};
+use xi_unicode::LineBreakIterator;
+
+use super::{shape, Lang, Quoter, Quotes, ShapedText, TextNode};
+use crate::layout::Spacing;
+use crate::prelude::*;
+
+/// Arrange text, spacing and inline-level nodes into a paragraph.
+#[derive(Hash)]
+pub struct ParNode(pub StyleVec<ParChild>);
+
+/// A uniformly styled atomic piece of a paragraph.
+#[derive(Hash, PartialEq)]
+pub enum ParChild {
+ /// A chunk of text.
+ Text(EcoString),
+ /// A single or double smart quote.
+ Quote { double: bool },
+ /// Horizontal spacing between other children.
+ Spacing(Spacing),
+ /// Arbitrary inline-level content.
+ Inline(Content),
+}
+
+#[node(LayoutBlock)]
+impl ParNode {
+ /// The spacing between lines.
+ #[property(resolve)]
+ pub const LEADING: Length = Em::new(0.65).into();
+ /// The extra spacing between paragraphs.
+ #[property(resolve)]
+ pub const SPACING: Length = Em::new(1.2).into();
+ /// The indent the first line of a consecutive paragraph should have.
+ #[property(resolve)]
+ pub const INDENT: Length = Length::zero();
+ /// Whether to allow paragraph spacing when there is paragraph indent.
+ pub const SPACING_AND_INDENT: bool = false;
+
+ /// How to align text and inline objects in their line.
+ #[property(resolve)]
+ pub const ALIGN: HorizontalAlign = HorizontalAlign(GenAlign::Start);
+ /// Whether to justify text in its line.
+ pub const JUSTIFY: bool = false;
+ /// How to determine line breaks.
+ pub const LINEBREAKS: Smart<Linebreaks> = Smart::Auto;
+
+ fn construct(_: &mut Vm, args: &mut Args) -> SourceResult<Content> {
+ // The paragraph constructor is special: It doesn't create a paragraph
+ // node. Instead, it just ensures that the passed content lives is in a
+ // separate paragraph and styles it.
+ Ok(Content::sequence(vec![
+ ParbreakNode.pack(),
+ args.expect("body")?,
+ ParbreakNode.pack(),
+ ]))
+ }
+}
+
+impl LayoutBlock for ParNode {
+ fn layout_block(
+ &self,
+ world: Tracked<dyn World>,
+ regions: &Regions,
+ styles: StyleChain,
+ ) -> SourceResult<Vec<Frame>> {
+ // Collect all text into one string for BiDi analysis.
+ let (text, segments) = collect(self, &styles);
+
+ // Perform BiDi analysis and then prepare paragraph layout by building a
+ // representation on which we can do line breaking without layouting
+ // each and every line from scratch.
+ let p = prepare(world, self, &text, segments, regions, styles)?;
+
+ // Break the paragraph into lines.
+ let lines = linebreak(&p, world, regions.first.x);
+
+ // Stack the lines into one frame per region.
+ stack(&p, world, &lines, regions)
+ }
+}
+
+impl Debug for ParNode {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ f.write_str("Par ")?;
+ self.0.fmt(f)
+ }
+}
+
+impl Debug for ParChild {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ match self {
+ Self::Text(text) => write!(f, "Text({:?})", text),
+ Self::Quote { double } => write!(f, "Quote({double})"),
+ Self::Spacing(kind) => write!(f, "{:?}", kind),
+ Self::Inline(inline) => inline.fmt(f),
+ }
+ }
+}
+
+impl PartialOrd for ParChild {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ match (self, other) {
+ (Self::Spacing(a), Self::Spacing(b)) => a.partial_cmp(b),
+ _ => None,
+ }
+ }
+}
+
+/// A horizontal alignment.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub struct HorizontalAlign(pub GenAlign);
+
+castable! {
+ HorizontalAlign,
+ Expected: "alignment",
+ @align: GenAlign => match align.axis() {
+ Axis::X => Self(*align),
+ Axis::Y => Err("must be horizontal")?,
+ },
+}
+
+impl Resolve for HorizontalAlign {
+ type Output = Align;
+
+ fn resolve(self, styles: StyleChain) -> Self::Output {
+ self.0.resolve(styles)
+ }
+}
+
+/// How to determine line breaks in a paragraph.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub enum Linebreaks {
+ /// Determine the linebreaks in a simple first-fit style.
+ Simple,
+ /// Optimize the linebreaks for the whole paragraph.
+ Optimized,
+}
+
+castable! {
+ Linebreaks,
+ Expected: "string",
+ Value::Str(string) => match string.as_str() {
+ "simple" => Self::Simple,
+ "optimized" => Self::Optimized,
+ _ => Err(r#"expected "simple" or "optimized""#)?,
+ },
+}
+
+/// A paragraph break.
+#[derive(Debug, Clone, Hash)]
+pub struct ParbreakNode;
+
+#[node]
+impl ParbreakNode {
+ fn construct(_: &mut Vm, _: &mut Args) -> SourceResult<Content> {
+ Ok(Self.pack())
+ }
+}
+
+/// Repeats content to fill a line.
+#[derive(Debug, Hash)]
+pub struct RepeatNode(pub Content);
+
+#[node(LayoutInline)]
+impl RepeatNode {
+ fn construct(_: &mut Vm, args: &mut Args) -> SourceResult<Content> {
+ Ok(Self(args.expect("body")?).pack())
+ }
+}
+
+impl LayoutInline for RepeatNode {
+ fn layout_inline(
+ &self,
+ world: Tracked<dyn World>,
+ regions: &Regions,
+ styles: StyleChain,
+ ) -> SourceResult<Vec<Frame>> {
+ self.0.layout_inline(world, regions, styles)
+ }
+}
+
+/// Range of a substring of text.
+type Range = std::ops::Range<usize>;
+
+// The characters by which spacing, inline content and pins are replaced in the
+// paragraph's full text.
+const SPACING_REPLACE: char = ' '; // Space
+const NODE_REPLACE: char = '\u{FFFC}'; // Object Replacement Character
+
+/// A paragraph representation in which children are already layouted and text
+/// is already preshaped.
+///
+/// In many cases, we can directly reuse these results when constructing a line.
+/// Only when a line break falls onto a text index that is not safe-to-break per
+/// rustybuzz, we have to reshape that portion.
+struct Preparation<'a> {
+ /// Bidirectional text embedding levels for the paragraph.
+ bidi: BidiInfo<'a>,
+ /// Text runs, spacing and layouted nodes.
+ items: Vec<Item<'a>>,
+ /// The styles shared by all children.
+ styles: StyleChain<'a>,
+ /// Whether to hyphenate if it's the same for all children.
+ hyphenate: Option<bool>,
+ /// The text language if it's the same for all children.
+ lang: Option<Lang>,
+ /// The resolved leading between lines.
+ leading: Abs,
+ /// The paragraph's resolved alignment.
+ align: Align,
+ /// Whether to justify the paragraph.
+ justify: bool,
+}
+
+impl<'a> Preparation<'a> {
+ /// Find the item that contains the given `text_offset`.
+ fn find(&self, text_offset: usize) -> Option<&Item<'a>> {
+ let mut cursor = 0;
+ for item in &self.items {
+ let end = cursor + item.len();
+ if (cursor .. end).contains(&text_offset) {
+ return Some(item);
+ }
+ cursor = end;
+ }
+ None
+ }
+
+ /// Return the items that intersect the given `text_range`.
+ ///
+ /// Returns the expanded range around the items and the items.
+ fn slice(&self, text_range: Range) -> (Range, &[Item<'a>]) {
+ let mut cursor = 0;
+ let mut start = 0;
+ let mut end = 0;
+ let mut expanded = text_range.clone();
+
+ for (i, item) in self.items.iter().enumerate() {
+ if cursor <= text_range.start {
+ start = i;
+ expanded.start = cursor;
+ }
+
+ let len = item.len();
+ if cursor < text_range.end || cursor + len <= text_range.end {
+ end = i + 1;
+ expanded.end = cursor + len;
+ } else {
+ break;
+ }
+
+ cursor += len;
+ }
+
+ (expanded, &self.items[start .. end])
+ }
+}
+
+/// A segment of one or multiple collapsed children.
+#[derive(Debug, Copy, Clone)]
+enum Segment<'a> {
+ /// One or multiple collapsed text or text-equivalent children. Stores how
+ /// long the segment is (in bytes of the full text string).
+ Text(usize),
+ /// Horizontal spacing between other segments.
+ Spacing(Spacing),
+ /// Arbitrary inline-level content.
+ Inline(&'a Content),
+}
+
+impl Segment<'_> {
+ /// The text length of the item.
+ fn len(&self) -> usize {
+ match *self {
+ Self::Text(len) => len,
+ Self::Spacing(_) => SPACING_REPLACE.len_utf8(),
+ Self::Inline(_) => NODE_REPLACE.len_utf8(),
+ }
+ }
+}
+
+/// A prepared item in a paragraph layout.
+#[derive(Debug)]
+enum Item<'a> {
+ /// A shaped text run with consistent style and direction.
+ Text(ShapedText<'a>),
+ /// Absolute spacing between other items.
+ Absolute(Abs),
+ /// Fractional spacing between other items.
+ Fractional(Fr),
+ /// Layouted inline-level content.
+ Frame(Frame),
+ /// A repeating node that fills the remaining space in a line.
+ Repeat(&'a RepeatNode, StyleChain<'a>),
+}
+
+impl<'a> Item<'a> {
+ /// If this a text item, return it.
+ fn text(&self) -> Option<&ShapedText<'a>> {
+ match self {
+ Self::Text(shaped) => Some(shaped),
+ _ => None,
+ }
+ }
+
+ /// The text length of the item.
+ fn len(&self) -> usize {
+ match self {
+ Self::Text(shaped) => shaped.text.len(),
+ Self::Absolute(_) | Self::Fractional(_) => SPACING_REPLACE.len_utf8(),
+ Self::Frame(_) | Self::Repeat(_, _) => NODE_REPLACE.len_utf8(),
+ }
+ }
+
+ /// The natural layouted width of the item.
+ fn width(&self) -> Abs {
+ match self {
+ Self::Text(shaped) => shaped.width,
+ Self::Absolute(v) => *v,
+ Self::Frame(frame) => frame.width(),
+ Self::Fractional(_) | Self::Repeat(_, _) => Abs::zero(),
+ }
+ }
+}
+
+/// A layouted line, consisting of a sequence of layouted paragraph items that
+/// are mostly borrowed from the preparation phase. This type enables you to
+/// measure the size of a line in a range before comitting to building the
+/// line's frame.
+///
+/// At most two paragraph items must be created individually for this line: The
+/// first and last one since they may be broken apart by the start or end of the
+/// line, respectively. But even those can partially reuse previous results when
+/// the break index is safe-to-break per rustybuzz.
+struct Line<'a> {
+ /// Bidi information about the paragraph.
+ bidi: &'a BidiInfo<'a>,
+ /// The trimmed range the line spans in the paragraph.
+ trimmed: Range,
+ /// The untrimmed end where the line ends.
+ end: usize,
+ /// A reshaped text item if the line sliced up a text item at the start.
+ first: Option<Item<'a>>,
+ /// Inner items which don't need to be reprocessed.
+ inner: &'a [Item<'a>],
+ /// A reshaped text item if the line sliced up a text item at the end. If
+ /// there is only one text item, this takes precedence over `first`.
+ last: Option<Item<'a>>,
+ /// The width of the line.
+ width: Abs,
+ /// Whether the line should be justified.
+ justify: bool,
+ /// Whether the line ends with a hyphen or dash, either naturally or through
+ /// hyphenation.
+ dash: bool,
+}
+
+impl<'a> Line<'a> {
+ /// Iterate over the line's items.
+ fn items(&self) -> impl Iterator<Item = &Item<'a>> {
+ self.first.iter().chain(self.inner).chain(&self.last)
+ }
+
+ /// Return items that intersect the given `text_range`.
+ fn slice(&self, text_range: Range) -> impl Iterator<Item = &Item<'a>> {
+ let mut cursor = self.trimmed.start;
+ let mut start = 0;
+ let mut end = 0;
+
+ for (i, item) in self.items().enumerate() {
+ if cursor <= text_range.start {
+ start = i;
+ }
+
+ let len = item.len();
+ if cursor < text_range.end || cursor + len <= text_range.end {
+ end = i + 1;
+ } else {
+ break;
+ }
+
+ cursor += len;
+ }
+
+ self.items().skip(start).take(end - start)
+ }
+
+ /// How many justifiable glyphs the line contains.
+ fn justifiables(&self) -> usize {
+ let mut count = 0;
+ for shaped in self.items().filter_map(Item::text) {
+ count += shaped.justifiables();
+ }
+ count
+ }
+
+ /// How much of the line is stretchable spaces.
+ fn stretch(&self) -> Abs {
+ let mut stretch = Abs::zero();
+ for shaped in self.items().filter_map(Item::text) {
+ stretch += shaped.stretch();
+ }
+ stretch
+ }
+
+ /// The sum of fractions in the line.
+ fn fr(&self) -> Fr {
+ self.items()
+ .filter_map(|item| match item {
+ Item::Fractional(fr) => Some(*fr),
+ Item::Repeat(_, _) => Some(Fr::one()),
+ _ => None,
+ })
+ .sum()
+ }
+}
+
+/// Collect all text of the paragraph into one string. This also performs
+/// string-level preprocessing like case transformations.
+fn collect<'a>(
+ par: &'a ParNode,
+ styles: &'a StyleChain<'a>,
+) -> (String, Vec<(Segment<'a>, StyleChain<'a>)>) {
+ let mut full = String::new();
+ let mut quoter = Quoter::new();
+ let mut segments = vec![];
+ let mut iter = par.0.iter().peekable();
+
+ while let Some((child, map)) = iter.next() {
+ let styles = map.chain(styles);
+ let segment = match child {
+ ParChild::Text(text) => {
+ let prev = full.len();
+ if let Some(case) = styles.get(TextNode::CASE) {
+ full.push_str(&case.apply(text));
+ } else {
+ full.push_str(text);
+ }
+ Segment::Text(full.len() - prev)
+ }
+ &ParChild::Quote { double } => {
+ let prev = full.len();
+ if styles.get(TextNode::SMART_QUOTES) {
+ let lang = styles.get(TextNode::LANG);
+ let region = styles.get(TextNode::REGION);
+ let quotes = Quotes::from_lang(lang, region);
+ let peeked = iter.peek().and_then(|(child, _)| match child {
+ ParChild::Text(text) => text.chars().next(),
+ ParChild::Quote { .. } => Some('"'),
+ ParChild::Spacing(_) => Some(SPACING_REPLACE),
+ ParChild::Inline(_) => Some(NODE_REPLACE),
+ });
+
+ full.push_str(quoter.quote(&quotes, double, peeked));
+ } else {
+ full.push(if double { '"' } else { '\'' });
+ }
+ Segment::Text(full.len() - prev)
+ }
+ &ParChild::Spacing(spacing) => {
+ full.push(SPACING_REPLACE);
+ Segment::Spacing(spacing)
+ }
+ ParChild::Inline(inline) => {
+ full.push(NODE_REPLACE);
+ Segment::Inline(inline)
+ }
+ };
+
+ if let Some(last) = full.chars().last() {
+ quoter.last(last);
+ }
+
+ if let (Some((Segment::Text(last_len), last_styles)), Segment::Text(len)) =
+ (segments.last_mut(), segment)
+ {
+ if *last_styles == styles {
+ *last_len += len;
+ continue;
+ }
+ }
+
+ segments.push((segment, styles));
+ }
+
+ (full, segments)
+}
+
+/// Prepare paragraph layout by shaping the whole paragraph and layouting all
+/// contained inline-level content.
+fn prepare<'a>(
+ world: Tracked<dyn World>,
+ par: &'a ParNode,
+ text: &'a str,
+ segments: Vec<(Segment<'a>, StyleChain<'a>)>,
+ regions: &Regions,
+ styles: StyleChain<'a>,
+) -> SourceResult<Preparation<'a>> {
+ let bidi = BidiInfo::new(text, match styles.get(TextNode::DIR) {
+ Dir::LTR => Some(BidiLevel::ltr()),
+ Dir::RTL => Some(BidiLevel::rtl()),
+ _ => None,
+ });
+
+ let mut cursor = 0;
+ let mut items = vec![];
+
+ // Shape / layout the children and collect them into items.
+ for (segment, styles) in segments {
+ let end = cursor + segment.len();
+ match segment {
+ Segment::Text(_) => {
+ shape_range(&mut items, world, &bidi, cursor .. end, styles);
+ }
+ Segment::Spacing(spacing) => match spacing {
+ Spacing::Relative(v) => {
+ let resolved = v.resolve(styles).relative_to(regions.base.x);
+ items.push(Item::Absolute(resolved));
+ }
+ Spacing::Fractional(v) => {
+ items.push(Item::Fractional(v));
+ }
+ },
+ Segment::Inline(inline) => {
+ if let Some(repeat) = inline.downcast::<RepeatNode>() {
+ items.push(Item::Repeat(repeat, styles));
+ } else {
+ let size = Size::new(regions.first.x, regions.base.y);
+ let pod = Regions::one(size, regions.base, Axes::splat(false));
+ let mut frame = inline.layout_inline(world, &pod, styles)?.remove(0);
+ frame.translate(Point::with_y(styles.get(TextNode::BASELINE)));
+ frame.apply_role(Role::GenericInline);
+ items.push(Item::Frame(frame));
+ }
+ }
+ }
+
+ cursor = end;
+ }
+
+ Ok(Preparation {
+ bidi,
+ items,
+ styles,
+ hyphenate: shared_get(styles, &par.0, TextNode::HYPHENATE),
+ lang: shared_get(styles, &par.0, TextNode::LANG),
+ leading: styles.get(ParNode::LEADING),
+ align: styles.get(ParNode::ALIGN),
+ justify: styles.get(ParNode::JUSTIFY),
+ })
+}
+
+/// Group a range of text by BiDi level and script, shape the runs and generate
+/// items for them.
+fn shape_range<'a>(
+ items: &mut Vec<Item<'a>>,
+ world: Tracked<dyn World>,
+ bidi: &BidiInfo<'a>,
+ range: Range,
+ styles: StyleChain<'a>,
+) {
+ let mut process = |text, level: BidiLevel| {
+ let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL };
+ let shaped = shape(world, text, styles, dir);
+ items.push(Item::Text(shaped));
+ };
+
+ let mut prev_level = BidiLevel::ltr();
+ let mut prev_script = Script::Unknown;
+ let mut cursor = range.start;
+
+ // Group by embedding level and script.
+ for i in cursor .. range.end {
+ if !bidi.text.is_char_boundary(i) {
+ continue;
+ }
+
+ let level = bidi.levels[i];
+ let script =
+ bidi.text[i ..].chars().next().map_or(Script::Unknown, |c| c.script());
+
+ if level != prev_level || !is_compatible(script, prev_script) {
+ if cursor < i {
+ process(&bidi.text[cursor .. i], prev_level);
+ }
+ cursor = i;
+ prev_level = level;
+ prev_script = script;
+ } else if is_generic_script(prev_script) {
+ prev_script = script;
+ }
+ }
+
+ process(&bidi.text[cursor .. range.end], prev_level);
+}
+
+/// Whether this is not a specific script.
+fn is_generic_script(script: Script) -> bool {
+ matches!(script, Script::Unknown | Script::Common | Script::Inherited)
+}
+
+/// Whether these script can be part of the same shape run.
+fn is_compatible(a: Script, b: Script) -> bool {
+ is_generic_script(a) || is_generic_script(b) || a == b
+}
+
+/// Get a style property, but only if it is the same for all children of the
+/// paragraph.
+fn shared_get<'a, K: Key<'a>>(
+ styles: StyleChain<'a>,
+ children: &StyleVec<ParChild>,
+ key: K,
+) -> Option<K::Output> {
+ children
+ .styles()
+ .all(|map| !map.contains(key))
+ .then(|| styles.get(key))
+}
+
+/// Find suitable linebreaks.
+fn linebreak<'a>(
+ p: &'a Preparation<'a>,
+ world: Tracked<dyn World>,
+ width: Abs,
+) -> Vec<Line<'a>> {
+ let linebreaks = p.styles.get(ParNode::LINEBREAKS).unwrap_or_else(|| {
+ if p.styles.get(ParNode::JUSTIFY) {
+ Linebreaks::Optimized
+ } else {
+ Linebreaks::Simple
+ }
+ });
+
+ match linebreaks {
+ Linebreaks::Simple => linebreak_simple(p, world, width),
+ Linebreaks::Optimized => linebreak_optimized(p, world, width),
+ }
+}
+
+/// Perform line breaking in simple first-fit style. This means that we build
+/// lines greedily, always taking the longest possible line. This may lead to
+/// very unbalanced line, but is fast and simple.
+fn linebreak_simple<'a>(
+ p: &'a Preparation<'a>,
+ world: Tracked<dyn World>,
+ width: Abs,
+) -> Vec<Line<'a>> {
+ let mut lines = vec![];
+ let mut start = 0;
+ let mut last = None;
+
+ for (end, mandatory, hyphen) in breakpoints(p) {
+ // Compute the line and its size.
+ let mut attempt = line(p, world, start .. end, mandatory, hyphen);
+
+ // If the line doesn't fit anymore, we push the last fitting attempt
+ // into the stack and rebuild the line from the attempt's end. The
+ // resulting line cannot be broken up further.
+ if !width.fits(attempt.width) {
+ if let Some((last_attempt, last_end)) = last.take() {
+ lines.push(last_attempt);
+ start = last_end;
+ attempt = line(p, world, start .. end, mandatory, hyphen);
+ }
+ }
+
+ // Finish the current line if there is a mandatory line break (i.e.
+ // due to "\n") or if the line doesn't fit horizontally already
+ // since then no shorter line will be possible.
+ if mandatory || !width.fits(attempt.width) {
+ lines.push(attempt);
+ start = end;
+ last = None;
+ } else {
+ last = Some((attempt, end));
+ }
+ }
+
+ if let Some((line, _)) = last {
+ lines.push(line);
+ }
+
+ lines
+}
+
+/// Perform line breaking in optimized Knuth-Plass style. Here, we use more
+/// context to determine the line breaks than in the simple first-fit style. For
+/// example, we might choose to cut a line short even though there is still a
+/// bit of space to improve the fit of one of the following lines. The
+/// Knuth-Plass algorithm is based on the idea of "cost". A line which has a
+/// very tight or very loose fit has a higher cost than one that is just right.
+/// Ending a line with a hyphen incurs extra cost and endings two successive
+/// lines with hyphens even more.
+///
+/// To find the layout with the minimal total cost the algorithm uses dynamic
+/// programming: For each possible breakpoint it determines the optimal
+/// paragraph layout _up to that point_. It walks over all possible start points
+/// for a line ending at that point and finds the one for which the cost of the
+/// line plus the cost of the optimal paragraph up to the start point (already
+/// computed and stored in dynamic programming table) is minimal. The final
+/// result is simply the layout determined for the last breakpoint at the end of
+/// text.
+fn linebreak_optimized<'a>(
+ p: &'a Preparation<'a>,
+ world: Tracked<dyn World>,
+ width: Abs,
+) -> Vec<Line<'a>> {
+ /// The cost of a line or paragraph layout.
+ type Cost = f64;
+
+ /// An entry in the dynamic programming table.
+ struct Entry<'a> {
+ pred: usize,
+ total: Cost,
+ line: Line<'a>,
+ }
+
+ // Cost parameters.
+ const HYPH_COST: Cost = 0.5;
+ const CONSECUTIVE_DASH_COST: Cost = 30.0;
+ const MAX_COST: Cost = 1_000_000.0;
+ const MIN_COST: Cost = -MAX_COST;
+ const MIN_RATIO: f64 = -0.15;
+
+ // Dynamic programming table.
+ let mut active = 0;
+ let mut table = vec![Entry {
+ pred: 0,
+ total: 0.0,
+ line: line(p, world, 0 .. 0, false, false),
+ }];
+
+ let em = p.styles.get(TextNode::SIZE);
+
+ for (end, mandatory, hyphen) in breakpoints(p) {
+ let k = table.len();
+ let eof = end == p.bidi.text.len();
+ let mut best: Option<Entry> = None;
+
+ // Find the optimal predecessor.
+ for (i, pred) in table.iter_mut().enumerate().skip(active) {
+ // Layout the line.
+ let start = pred.line.end;
+ let attempt = line(p, world, start .. end, mandatory, hyphen);
+
+ // Determine how much the line's spaces would need to be stretched
+ // to make it the desired width.
+ let delta = width - attempt.width;
+ let mut ratio = delta / attempt.stretch();
+ if ratio.is_infinite() {
+ ratio = delta / (em / 2.0);
+ }
+
+ // At some point, it doesn't matter any more.
+ ratio = ratio.min(10.0);
+
+ // Determine the cost of the line.
+ let min_ratio = if attempt.justify { MIN_RATIO } else { 0.0 };
+ let mut cost = if ratio < min_ratio {
+ // The line is overfull. This is the case if
+ // - justification is on, but we'd need to shrink too much
+ // - justification is off and the line just doesn't fit
+ // Since any longer line will also be overfull, we can deactive
+ // this breakpoint.
+ active = i + 1;
+ MAX_COST
+ } else if mandatory || eof {
+ // This is a mandatory break and the line is not overfull, so it
+ // has minimum cost. All breakpoints before this one become
+ // inactive since no line can span above the mandatory break.
+ active = k;
+ MIN_COST + if attempt.justify { ratio.powi(3).abs() } else { 0.0 }
+ } else {
+ // Normal line with cost of |ratio^3|.
+ ratio.powi(3).abs()
+ };
+
+ // Penalize hyphens.
+ if hyphen {
+ cost += HYPH_COST;
+ }
+
+ // Penalize two consecutive dashes (not necessarily hyphens) extra.
+ if attempt.dash && pred.line.dash {
+ cost += CONSECUTIVE_DASH_COST;
+ }
+
+ // The total cost of this line and its chain of predecessors.
+ let total = pred.total + cost;
+
+ // If this attempt is better than what we had before, take it!
+ if best.as_ref().map_or(true, |best| best.total >= total) {
+ best = Some(Entry { pred: i, total, line: attempt });
+ }
+ }
+
+ table.push(best.unwrap());
+ }
+
+ // Retrace the best path.
+ let mut lines = vec![];
+ let mut idx = table.len() - 1;
+ while idx != 0 {
+ table.truncate(idx + 1);
+ let entry = table.pop().unwrap();
+ lines.push(entry.line);
+ idx = entry.pred;
+ }
+
+ lines.reverse();
+ lines
+}
+
+/// Determine all possible points in the text where lines can broken.
+///
+/// Returns for each breakpoint the text index, whether the break is mandatory
+/// (after `\n`) and whether a hyphen is required (when breaking inside of a
+/// word).
+fn breakpoints<'a>(p: &'a Preparation) -> Breakpoints<'a> {
+ Breakpoints {
+ p,
+ linebreaks: LineBreakIterator::new(p.bidi.text),
+ syllables: None,
+ offset: 0,
+ suffix: 0,
+ end: 0,
+ mandatory: false,
+ }
+}
+
+/// An iterator over the line break opportunities in a text.
+struct Breakpoints<'a> {
+ /// The paragraph's items.
+ p: &'a Preparation<'a>,
+ /// The inner iterator over the unicode line break opportunities.
+ linebreaks: LineBreakIterator<'a>,
+ /// Iterator over syllables of the current word.
+ syllables: Option<hypher::Syllables<'a>>,
+ /// The current text offset.
+ offset: usize,
+ /// The trimmed end of the current word.
+ suffix: usize,
+ /// The untrimmed end of the current word.
+ end: usize,
+ /// Whether the break after the current word is mandatory.
+ mandatory: bool,
+}
+
+impl Iterator for Breakpoints<'_> {
+ type Item = (usize, bool, bool);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ // If we're currently in a hyphenated "word", process the next syllable.
+ if let Some(syllable) = self.syllables.as_mut().and_then(Iterator::next) {
+ self.offset += syllable.len();
+ if self.offset == self.suffix {
+ self.offset = self.end;
+ }
+
+ // Filter out hyphenation opportunities where hyphenation was
+ // actually disabled.
+ let hyphen = self.offset < self.end;
+ if hyphen && !self.hyphenate(self.offset) {
+ return self.next();
+ }
+
+ return Some((self.offset, self.mandatory && !hyphen, hyphen));
+ }
+
+ // Get the next "word".
+ (self.end, self.mandatory) = self.linebreaks.next()?;
+
+ // Hyphenate the next word.
+ if self.p.hyphenate != Some(false) {
+ if let Some(lang) = self.lang(self.offset) {
+ let word = &self.p.bidi.text[self.offset .. self.end];
+ let trimmed = word.trim_end_matches(|c: char| !c.is_alphabetic());
+ if !trimmed.is_empty() {
+ self.suffix = self.offset + trimmed.len();
+ self.syllables = Some(hypher::hyphenate(trimmed, lang));
+ return self.next();
+ }
+ }
+ }
+
+ self.offset = self.end;
+ Some((self.end, self.mandatory, false))
+ }
+}
+
+impl Breakpoints<'_> {
+ /// Whether hyphenation is enabled at the given offset.
+ fn hyphenate(&self, offset: usize) -> bool {
+ self.p
+ .hyphenate
+ .or_else(|| {
+ let shaped = self.p.find(offset)?.text()?;
+ Some(shaped.styles.get(TextNode::HYPHENATE))
+ })
+ .unwrap_or(false)
+ }
+
+ /// The text language at the given offset.
+ fn lang(&self, offset: usize) -> Option<hypher::Lang> {
+ let lang = self.p.lang.or_else(|| {
+ let shaped = self.p.find(offset)?.text()?;
+ Some(shaped.styles.get(TextNode::LANG))
+ })?;
+
+ let bytes = lang.as_str().as_bytes().try_into().ok()?;
+ hypher::Lang::from_iso(bytes)
+ }
+}
+
+/// Create a line which spans the given range.
+fn line<'a>(
+ p: &'a Preparation,
+ world: Tracked<dyn World>,
+ mut range: Range,
+ mandatory: bool,
+ hyphen: bool,
+) -> Line<'a> {
+ let end = range.end;
+ let mut justify = p.justify && end < p.bidi.text.len() && !mandatory;
+
+ if range.is_empty() {
+ return Line {
+ bidi: &p.bidi,
+ end,
+ trimmed: range,
+ first: None,
+ inner: &[],
+ last: None,
+ width: Abs::zero(),
+ justify,
+ dash: false,
+ };
+ }
+
+ // Slice out the relevant items.
+ let (expanded, mut inner) = p.slice(range.clone());
+ let mut width = Abs::zero();
+
+ // Reshape the last item if it's split in half or hyphenated.
+ let mut last = None;
+ let mut dash = false;
+ if let Some((Item::Text(shaped), before)) = inner.split_last() {
+ // Compute the range we want to shape, trimming whitespace at the
+ // end of the line.
+ let base = expanded.end - shaped.text.len();
+ let start = range.start.max(base);
+ let text = &p.bidi.text[start .. range.end];
+ let trimmed = text.trim_end();
+ range.end = start + trimmed.len();
+
+ // Deal with hyphens, dashes and justification.
+ let shy = trimmed.ends_with('\u{ad}');
+ dash = hyphen || shy || trimmed.ends_with(['-', '–', '—']);
+ justify |= text.ends_with('\u{2028}');
+
+ // Usually, we don't want to shape an empty string because:
+ // - We don't want the height of trimmed whitespace in a different
+ // font to be considered for the line height.
+ // - Even if it's in the same font, its unnecessary.
+ //
+ // There is one exception though. When the whole line is empty, we
+ // need the shaped empty string to make the line the appropriate
+ // height. That is the case exactly if the string is empty and there
+ // are no other items in the line.
+ if hyphen || start + shaped.text.len() > range.end {
+ if hyphen || start < range.end || before.is_empty() {
+ let shifted = start - base .. range.end - base;
+ let mut reshaped = shaped.reshape(world, shifted);
+ if hyphen || shy {
+ reshaped.push_hyphen(world);
+ }
+ width += reshaped.width;
+ last = Some(Item::Text(reshaped));
+ }
+
+ inner = before;
+ }
+ }
+
+ // Reshape the start item if it's split in half.
+ let mut first = None;
+ if let Some((Item::Text(shaped), after)) = inner.split_first() {
+ // Compute the range we want to shape.
+ let base = expanded.start;
+ let end = range.end.min(base + shaped.text.len());
+
+ // Reshape if necessary.
+ if range.start + shaped.text.len() > end {
+ if range.start < end {
+ let shifted = range.start - base .. end - base;
+ let reshaped = shaped.reshape(world, shifted);
+ width += reshaped.width;
+ first = Some(Item::Text(reshaped));
+ }
+
+ inner = after;
+ }
+ }
+
+ // Measure the inner items.
+ for item in inner {
+ width += item.width();
+ }
+
+ Line {
+ bidi: &p.bidi,
+ trimmed: range,
+ end,
+ first,
+ inner,
+ last,
+ width,
+ justify,
+ dash,
+ }
+}
+
+/// Combine layouted lines into one frame per region.
+fn stack(
+ p: &Preparation,
+ world: Tracked<dyn World>,
+ lines: &[Line],
+ regions: &Regions,
+) -> SourceResult<Vec<Frame>> {
+ // Determine the paragraph's width: Full width of the region if we
+ // should expand or there's fractional spacing, fit-to-width otherwise.
+ let mut width = regions.first.x;
+ if !regions.expand.x && lines.iter().all(|line| line.fr().is_zero()) {
+ width = lines.iter().map(|line| line.width).max().unwrap_or_default();
+ }
+
+ // State for final frame building.
+ let mut regions = regions.clone();
+ let mut finished = vec![];
+ let mut first = true;
+ let mut output = Frame::new(Size::with_x(width));
+ output.apply_role(Role::Paragraph);
+
+ // Stack the lines into one frame per region.
+ for line in lines {
+ let frame = commit(p, world, line, &regions, width)?;
+ let height = frame.size().y;
+
+ while !regions.first.y.fits(height) && !regions.in_last() {
+ finished.push(output);
+ output = Frame::new(Size::with_x(width));
+ output.apply_role(Role::Paragraph);
+ regions.next();
+ first = true;
+ }
+
+ if !first {
+ output.size_mut().y += p.leading;
+ }
+
+ let pos = Point::with_y(output.height());
+ output.size_mut().y += height;
+ output.push_frame(pos, frame);
+
+ regions.first.y -= height + p.leading;
+ first = false;
+ }
+
+ finished.push(output);
+ Ok(finished)
+}
+
+/// Commit to a line and build its frame.
+fn commit(
+ p: &Preparation,
+ world: Tracked<dyn World>,
+ line: &Line,
+ regions: &Regions,
+ width: Abs,
+) -> SourceResult<Frame> {
+ let mut remaining = width - line.width;
+ let mut offset = Abs::zero();
+
+ // Reorder the line from logical to visual order.
+ let reordered = reorder(line);
+
+ // Handle hanging punctuation to the left.
+ if let Some(Item::Text(text)) = reordered.first() {
+ if let Some(glyph) = text.glyphs.first() {
+ if !text.dir.is_positive()
+ && text.styles.get(TextNode::OVERHANG)
+ && (reordered.len() > 1 || text.glyphs.len() > 1)
+ {
+ let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
+ offset -= amount;
+ remaining += amount;
+ }
+ }
+ }
+
+ // Handle hanging punctuation to the right.
+ if let Some(Item::Text(text)) = reordered.last() {
+ if let Some(glyph) = text.glyphs.last() {
+ if text.dir.is_positive()
+ && text.styles.get(TextNode::OVERHANG)
+ && (reordered.len() > 1 || text.glyphs.len() > 1)
+ {
+ let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
+ remaining += amount;
+ }
+ }
+ }
+
+ // Determine how much to justify each space.
+ let fr = line.fr();
+ let mut justification = Abs::zero();
+ if remaining < Abs::zero() || (line.justify && fr.is_zero()) {
+ let justifiables = line.justifiables();
+ if justifiables > 0 {
+ justification = remaining / justifiables as f64;
+ remaining = Abs::zero();
+ }
+ }
+
+ let mut top = Abs::zero();
+ let mut bottom = Abs::zero();
+
+ // Build the frames and determine the height and baseline.
+ let mut frames = vec![];
+ for item in reordered {
+ let mut push = |offset: &mut Abs, frame: Frame| {
+ let width = frame.width();
+ top.set_max(frame.baseline());
+ bottom.set_max(frame.size().y - frame.baseline());
+ frames.push((*offset, frame));
+ *offset += width;
+ };
+
+ match item {
+ Item::Absolute(v) => {
+ offset += *v;
+ }
+ Item::Fractional(v) => {
+ offset += v.share(fr, remaining);
+ }
+ Item::Text(shaped) => {
+ let frame = shaped.build(world, justification);
+ push(&mut offset, frame);
+ }
+ Item::Frame(frame) => {
+ push(&mut offset, frame.clone());
+ }
+ Item::Repeat(repeat, styles) => {
+ let before = offset;
+ let fill = Fr::one().share(fr, remaining);
+ let size = Size::new(fill, regions.base.y);
+ let pod = Regions::one(size, regions.base, Axes::new(false, false));
+ let frame = repeat.layout_inline(world, &pod, *styles)?.remove(0);
+ let width = frame.width();
+ let count = (fill / width).floor();
+ let remaining = fill % width;
+ let apart = remaining / (count - 1.0);
+ if count == 1.0 {
+ offset += p.align.position(remaining);
+ }
+ if width > Abs::zero() {
+ for _ in 0 .. (count as usize).min(1000) {
+ push(&mut offset, frame.clone());
+ offset += apart;
+ }
+ }
+ offset = before + fill;
+ }
+ }
+ }
+
+ // Remaining space is distributed now.
+ if !fr.is_zero() {
+ remaining = Abs::zero();
+ }
+
+ let size = Size::new(width, top + bottom);
+ let mut output = Frame::new(size);
+ output.set_baseline(top);
+
+ // Construct the line's frame.
+ for (offset, frame) in frames {
+ let x = offset + p.align.position(remaining);
+ let y = top - frame.baseline();
+ output.push_frame(Point::new(x, y), frame);
+ }
+
+ Ok(output)
+}
+
+/// Return a line's items in visual order.
+fn reorder<'a>(line: &'a Line<'a>) -> Vec<&Item<'a>> {
+ let mut reordered = vec![];
+
+ // The bidi crate doesn't like empty lines.
+ if line.trimmed.is_empty() {
+ return line.slice(line.trimmed.clone()).collect();
+ }
+
+ // Find the paragraph that contains the line.
+ let para = line
+ .bidi
+ .paragraphs
+ .iter()
+ .find(|para| para.range.contains(&line.trimmed.start))
+ .unwrap();
+
+ // Compute the reordered ranges in visual order (left to right).
+ let (levels, runs) = line.bidi.visual_runs(para, line.trimmed.clone());
+
+ // Collect the reordered items.
+ for run in runs {
+ // Skip reset L1 runs because handling them would require reshaping
+ // again in some cases.
+ if line.bidi.levels[run.start] != levels[run.start] {
+ continue;
+ }
+
+ let prev = reordered.len();
+ reordered.extend(line.slice(run.clone()));
+
+ if levels[run.start].is_rtl() {
+ reordered[prev ..].reverse();
+ }
+ }
+
+ reordered
+}
+
+/// How much a character should hang into the end margin.
+///
+/// For more discussion, see:
+/// https://recoveringphysicist.com/21/
+fn overhang(c: char) -> f64 {
+ match c {
+ // Dashes.
+ '–' | '—' => 0.2,
+ '-' => 0.55,
+
+ // Punctuation.
+ '.' | ',' => 0.8,
+ ':' | ';' => 0.3,
+
+ // Arabic and Ideographic
+ '\u{60C}' | '\u{6D4}' => 0.4,
+ '\u{3001}' | '\u{3002}' => 1.0,
+
+ _ => 0.0,
+ }
+}
diff --git a/library/src/text/quotes.rs b/library/src/text/quotes.rs
new file mode 100644
index 00000000..ab4d3f9d
--- /dev/null
+++ b/library/src/text/quotes.rs
@@ -0,0 +1,149 @@
+use typst::syntax::is_newline;
+
+use super::{Lang, Region};
+
+/// State machine for smart quote subtitution.
+#[derive(Debug, Clone)]
+pub struct Quoter {
+ /// How many quotes have been opened.
+ quote_depth: usize,
+ /// Whether an opening quote might follow.
+ expect_opening: bool,
+ /// Whether the last character was numeric.
+ last_num: bool,
+}
+
+impl Quoter {
+ /// Start quoting.
+ pub fn new() -> Self {
+ Self {
+ quote_depth: 0,
+ expect_opening: true,
+ last_num: false,
+ }
+ }
+
+ /// Process the last seen character.
+ pub fn last(&mut self, c: char) {
+ self.expect_opening = is_ignorable(c) || is_opening_bracket(c);
+ self.last_num = c.is_numeric();
+ }
+
+ /// Process and substitute a quote.
+ pub fn quote<'a>(
+ &mut self,
+ quotes: &Quotes<'a>,
+ double: bool,
+ peeked: Option<char>,
+ ) -> &'a str {
+ let peeked = peeked.unwrap_or(' ');
+ if self.expect_opening {
+ self.quote_depth += 1;
+ quotes.open(double)
+ } else if self.quote_depth > 0
+ && (peeked.is_ascii_punctuation() || is_ignorable(peeked))
+ {
+ self.quote_depth -= 1;
+ quotes.close(double)
+ } else if self.last_num {
+ quotes.prime(double)
+ } else {
+ quotes.fallback(double)
+ }
+ }
+}
+
+impl Default for Quoter {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+fn is_ignorable(c: char) -> bool {
+ c.is_whitespace() || is_newline(c)
+}
+
+fn is_opening_bracket(c: char) -> bool {
+ matches!(c, '(' | '{' | '[')
+}
+
+/// Decides which quotes to subtitute smart quotes with.
+pub struct Quotes<'s> {
+ /// The opening single quote.
+ pub single_open: &'s str,
+ /// The closing single quote.
+ pub single_close: &'s str,
+ /// The opening double quote.
+ pub double_open: &'s str,
+ /// The closing double quote.
+ pub double_close: &'s str,
+}
+
+impl<'s> Quotes<'s> {
+ /// Create a new `Quotes` struct with the defaults for a language and
+ /// region.
+ ///
+ /// The language should be specified as an all-lowercase ISO 639-1 code, the
+ /// region as an all-uppercase ISO 3166-alpha2 code.
+ ///
+ /// Currently, the supported languages are: English, Czech, Danish, German,
+ /// Swiss / Liechtensteinian German, Estonian, Icelandic, Lithuanian,
+ /// Latvian, Slovak, Slovenian, Bosnian, Finnish, Swedish, French,
+ /// Hungarian, Polish, Romanian, Japanese, Traditional Chinese, Russian, and
+ /// Norwegian.
+ ///
+ /// For unknown languages, the English quotes are used.
+ pub fn from_lang(lang: Lang, region: Option<Region>) -> Self {
+ let region = region.as_ref().map(Region::as_str);
+ let (single_open, single_close, double_open, double_close) = match lang.as_str() {
+ "de" if matches!(region, Some("CH" | "LI")) => ("‹", "›", "«", "»"),
+ "cs" | "da" | "de" | "et" | "is" | "lt" | "lv" | "sk" | "sl" => {
+ ("‚", "‘", "„", "“")
+ }
+ "fr" => ("‹\u{00A0}", "\u{00A0}›", "«\u{00A0}", "\u{00A0}»"),
+ "bs" | "fi" | "sv" => ("’", "’", "”", "”"),
+ "hu" | "pl" | "ro" => ("’", "’", "„", "”"),
+ "ru" | "no" | "nn" => ("’", "’", "«", "»"),
+ _ => return Self::default(),
+ };
+
+ Self {
+ single_open,
+ single_close,
+ double_open,
+ double_close,
+ }
+ }
+
+ /// The opening quote.
+ fn open(&self, double: bool) -> &'s str {
+ if double { self.double_open } else { self.single_open }
+ }
+
+ /// The closing quote.
+ fn close(&self, double: bool) -> &'s str {
+ if double { self.double_close } else { self.single_close }
+ }
+
+ /// Which character should be used as a prime.
+ fn prime(&self, double: bool) -> &'static str {
+ if double { "″" } else { "′" }
+ }
+
+ /// Which character should be used as a fallback quote.
+ fn fallback(&self, double: bool) -> &'static str {
+ if double { "\"" } else { "’" }
+ }
+}
+
+impl Default for Quotes<'_> {
+ /// Returns the english quotes as default.
+ fn default() -> Self {
+ Self {
+ single_open: "‘",
+ single_close: "’",
+ double_open: "“",
+ double_close: "”",
+ }
+ }
+}
diff --git a/library/src/text/raw.rs b/library/src/text/raw.rs
new file mode 100644
index 00000000..31f1517e
--- /dev/null
+++ b/library/src/text/raw.rs
@@ -0,0 +1,206 @@
+use once_cell::sync::Lazy;
+use syntect::easy::HighlightLines;
+use syntect::highlighting::{
+ Color, FontStyle, Style, StyleModifier, Theme, ThemeItem, ThemeSettings,
+};
+use syntect::parsing::SyntaxSet;
+use typst::syntax;
+
+use super::{FontFamily, Hyphenate, LinebreakNode, TextNode};
+use crate::layout::{BlockNode, BlockSpacing};
+use crate::prelude::*;
+
+/// Monospaced text with optional syntax highlighting.
+#[derive(Debug, Hash)]
+pub struct RawNode {
+ /// The raw text.
+ pub text: EcoString,
+ /// Whether the node is block-level.
+ pub block: bool,
+}
+
+#[node(Show)]
+impl RawNode {
+ /// The language to syntax-highlight in.
+ #[property(referenced)]
+ pub const LANG: Option<EcoString> = None;
+ /// The raw text's font family.
+ #[property(referenced)]
+ pub const FAMILY: FontFamily = FontFamily::new("IBM Plex Mono");
+ /// The spacing above block-level raw.
+ #[property(resolve, shorthand(around))]
+ pub const ABOVE: Option<BlockSpacing> = Some(Ratio::one().into());
+ /// The spacing below block-level raw.
+ #[property(resolve, shorthand(around))]
+ pub const BELOW: Option<BlockSpacing> = Some(Ratio::one().into());
+
+ fn construct(_: &mut Vm, args: &mut Args) -> SourceResult<Content> {
+ Ok(Self {
+ text: args.expect("text")?,
+ block: args.named("block")?.unwrap_or(false),
+ }
+ .pack())
+ }
+}
+
+impl Show for RawNode {
+ fn unguard_parts(&self, _: Selector) -> Content {
+ Self { text: self.text.clone(), ..*self }.pack()
+ }
+
+ fn field(&self, name: &str) -> Option<Value> {
+ match name {
+ "text" => Some(Value::Str(self.text.clone().into())),
+ "block" => Some(Value::Bool(self.block)),
+ _ => None,
+ }
+ }
+
+ fn realize(
+ &self,
+ _: Tracked<dyn World>,
+ styles: StyleChain,
+ ) -> SourceResult<Content> {
+ let lang = styles.get(Self::LANG).as_ref().map(|s| s.to_lowercase());
+ let foreground = THEME
+ .settings
+ .foreground
+ .map(Color::from)
+ .unwrap_or(Color::BLACK)
+ .into();
+
+ let mut realized = if matches!(lang.as_deref(), Some("typ" | "typst" | "typc")) {
+ let root = match lang.as_deref() {
+ Some("typc") => syntax::parse_code(&self.text),
+ _ => syntax::parse(&self.text),
+ };
+
+ let mut seq = vec![];
+ syntax::highlight::highlight_themed(&root, &THEME, |range, style| {
+ seq.push(styled(&self.text[range], foreground, style));
+ });
+
+ Content::sequence(seq)
+ } else if let Some(syntax) =
+ lang.and_then(|token| SYNTAXES.find_syntax_by_token(&token))
+ {
+ let mut seq = vec![];
+ let mut highlighter = HighlightLines::new(syntax, &THEME);
+ for (i, line) in self.text.lines().enumerate() {
+ if i != 0 {
+ seq.push(LinebreakNode { justify: false }.pack());
+ }
+
+ for (style, piece) in
+ highlighter.highlight_line(line, &SYNTAXES).into_iter().flatten()
+ {
+ seq.push(styled(piece, foreground, style));
+ }
+ }
+
+ Content::sequence(seq)
+ } else {
+ TextNode(self.text.clone()).pack()
+ };
+
+ if self.block {
+ realized = BlockNode(realized).pack();
+ }
+
+ let mut map = StyleMap::new();
+ map.set(TextNode::OVERHANG, false);
+ map.set(TextNode::HYPHENATE, Hyphenate(Smart::Custom(false)));
+ map.set(TextNode::SMART_QUOTES, false);
+
+ Ok(realized.styled_with_map(map))
+ }
+
+ fn finalize(
+ &self,
+ _: Tracked<dyn World>,
+ styles: StyleChain,
+ mut realized: Content,
+ ) -> SourceResult<Content> {
+ let mut map = StyleMap::new();
+ map.set_family(styles.get(Self::FAMILY).clone(), styles);
+
+ if self.block {
+ realized = realized.spaced(styles.get(Self::ABOVE), styles.get(Self::BELOW));
+ }
+
+ Ok(realized.styled_with_map(map))
+ }
+}
+
+/// Style a piece of text with a syntect style.
+fn styled(piece: &str, foreground: Paint, style: Style) -> Content {
+ let mut body = TextNode(piece.into()).pack();
+
+ let paint = style.foreground.into();
+ if paint != foreground {
+ body = body.styled(TextNode::FILL, paint);
+ }
+
+ if style.font_style.contains(FontStyle::BOLD) {
+ body = body.strong();
+ }
+
+ if style.font_style.contains(FontStyle::ITALIC) {
+ body = body.emph();
+ }
+
+ if style.font_style.contains(FontStyle::UNDERLINE) {
+ body = body.underlined();
+ }
+
+ body
+}
+
+/// The syntect syntax definitions.
+static SYNTAXES: Lazy<SyntaxSet> = Lazy::new(|| SyntaxSet::load_defaults_newlines());
+
+/// The default theme used for syntax highlighting.
+#[rustfmt::skip]
+pub static THEME: Lazy<Theme> = Lazy::new(|| Theme {
+ name: Some("Typst Light".into()),
+ author: Some("The Typst Project Developers".into()),
+ settings: ThemeSettings::default(),
+ scopes: vec![
+ item("comment", Some("#8a8a8a"), None),
+ item("constant.character.escape", Some("#1d6c76"), None),
+ item("constant.character.shortcut", Some("#1d6c76"), None),
+ item("markup.bold", None, Some(FontStyle::BOLD)),
+ item("markup.italic", None, Some(FontStyle::ITALIC)),
+ item("markup.underline", None, Some(FontStyle::UNDERLINE)),
+ item("markup.raw", Some("#818181"), None),
+ item("string.other.math.typst", None, None),
+ item("punctuation.definition.math", Some("#298e0d"), None),
+ item("keyword.operator.math", Some("#1d6c76"), None),
+ item("markup.heading, entity.name.section", None, Some(FontStyle::BOLD)),
+ item("markup.heading.typst", None, Some(FontStyle::BOLD | FontStyle::UNDERLINE)),
+ item("punctuation.definition.list", Some("#8b41b1"), None),
+ item("markup.list.term", None, Some(FontStyle::BOLD)),
+ item("entity.name.label, markup.other.reference", Some("#1d6c76"), None),
+ item("keyword, constant.language, variable.language", Some("#d73a49"), None),
+ item("storage.type, storage.modifier", Some("#d73a49"), None),
+ item("constant", Some("#b60157"), None),
+ item("string", Some("#298e0d"), None),
+ item("entity.name, variable.function, support", Some("#4b69c6"), None),
+ item("support.macro", Some("#16718d"), None),
+ item("meta.annotation", Some("#301414"), None),
+ item("entity.other, meta.interpolation", Some("#8b41b1"), None),
+ item("invalid", Some("#ff0000"), None),
+ ],
+});
+
+/// Create a syntect theme item.
+fn item(scope: &str, color: Option<&str>, font_style: Option<FontStyle>) -> ThemeItem {
+ ThemeItem {
+ scope: scope.parse().unwrap(),
+ style: StyleModifier {
+ foreground: color.map(|s| s.parse::<RgbaColor>().unwrap().into()),
+ background: None,
+ font_style,
+ },
+ }
+}
diff --git a/library/src/text/shaping.rs b/library/src/text/shaping.rs
new file mode 100644
index 00000000..32143862
--- /dev/null
+++ b/library/src/text/shaping.rs
@@ -0,0 +1,655 @@
+use std::ops::Range;
+use std::str::FromStr;
+
+use rustybuzz::{Feature, Tag, UnicodeBuffer};
+use typst::font::{Font, FontVariant};
+use typst::util::SliceExt;
+
+use super::*;
+use crate::prelude::*;
+
+/// The result of shaping text.
+///
+/// This type contains owned or borrowed shaped text runs, which can be
+/// measured, used to reshape substrings more quickly and converted into a
+/// frame.
+pub struct ShapedText<'a> {
+ /// The text that was shaped.
+ pub text: &'a str,
+ /// The text direction.
+ pub dir: Dir,
+ /// The text's style properties.
+ pub styles: StyleChain<'a>,
+ /// The font variant.
+ pub variant: FontVariant,
+ /// The font size.
+ pub size: Abs,
+ /// The width of the text's bounding box.
+ pub width: Abs,
+ /// The shaped glyphs.
+ pub glyphs: Cow<'a, [ShapedGlyph]>,
+}
+
+/// A single glyph resulting from shaping.
+#[derive(Debug, Clone)]
+pub struct ShapedGlyph {
+ /// The font the glyph is contained in.
+ pub font: Font,
+ /// The glyph's index in the font.
+ pub glyph_id: u16,
+ /// The advance width of the glyph.
+ pub x_advance: Em,
+ /// The horizontal offset of the glyph.
+ pub x_offset: Em,
+ /// The vertical offset of the glyph.
+ pub y_offset: Em,
+ /// The byte index in the source text where this glyph's cluster starts. A
+ /// cluster is a sequence of one or multiple glyphs that cannot be
+ /// separated and must always be treated as a union.
+ pub cluster: usize,
+ /// Whether splitting the shaping result before this glyph would yield the
+ /// same results as shaping the parts to both sides of `text_index`
+ /// separately.
+ pub safe_to_break: bool,
+ /// The first char in this glyph's cluster.
+ pub c: char,
+}
+
+impl ShapedGlyph {
+ /// Whether the glyph is a space.
+ pub fn is_space(&self) -> bool {
+ matches!(self.c, ' ' | '\u{00A0}' | ' ')
+ }
+
+ /// Whether the glyph is justifiable.
+ pub fn is_justifiable(&self) -> bool {
+ self.is_space() || matches!(self.c, ',' | '。' | '、')
+ }
+}
+
+/// A side you can go toward.
+enum Side {
+ /// To the left-hand side.
+ Left,
+ /// To the right-hand side.
+ Right,
+}
+
+impl<'a> ShapedText<'a> {
+ /// Build the shaped text's frame.
+ ///
+ /// The `justification` defines how much extra advance width each
+ /// [justifiable glyph](ShapedGlyph::is_justifiable) will get.
+ pub fn build(&self, world: Tracked<dyn World>, justification: Abs) -> Frame {
+ let (top, bottom) = self.measure(world);
+ let size = Size::new(self.width, top + bottom);
+
+ let mut offset = Abs::zero();
+ let mut frame = Frame::new(size);
+ frame.set_baseline(top);
+
+ let shift = self.styles.get(TextNode::BASELINE);
+ let lang = self.styles.get(TextNode::LANG);
+ let decos = self.styles.get(TextNode::DECO);
+ let fill = self.styles.get(TextNode::FILL);
+ let link = self.styles.get(TextNode::LINK);
+
+ for ((font, y_offset), group) in
+ self.glyphs.as_ref().group_by_key(|g| (g.font.clone(), g.y_offset))
+ {
+ let pos = Point::new(offset, top + shift + y_offset.at(self.size));
+
+ let glyphs = group
+ .iter()
+ .map(|glyph| Glyph {
+ id: glyph.glyph_id,
+ x_advance: glyph.x_advance
+ + if glyph.is_justifiable() {
+ frame.size_mut().x += justification;
+ Em::from_length(justification, self.size)
+ } else {
+ Em::zero()
+ },
+ x_offset: glyph.x_offset,
+ c: glyph.c,
+ })
+ .collect();
+
+ let text = Text {
+ font,
+ size: self.size,
+ lang,
+ fill,
+ glyphs,
+ };
+
+ let text_layer = frame.layer();
+ let width = text.width();
+
+ // Apply line decorations.
+ for deco in &decos {
+ decorate(&mut frame, deco, &text, shift, pos, width);
+ }
+
+ frame.insert(text_layer, pos, Element::Text(text));
+ offset += width;
+ }
+
+ // Apply link if it exists.
+ if let Some(dest) = link {
+ frame.link(dest.clone());
+ }
+
+ frame
+ }
+
+ /// Measure the top and bottom extent of this text.
+ fn measure(&self, world: Tracked<dyn World>) -> (Abs, Abs) {
+ let mut top = Abs::zero();
+ let mut bottom = Abs::zero();
+
+ let top_edge = self.styles.get(TextNode::TOP_EDGE);
+ let bottom_edge = self.styles.get(TextNode::BOTTOM_EDGE);
+
+ // Expand top and bottom by reading the font's vertical metrics.
+ let mut expand = |font: &Font| {
+ let metrics = font.metrics();
+ top.set_max(top_edge.resolve(self.styles, metrics));
+ bottom.set_max(-bottom_edge.resolve(self.styles, metrics));
+ };
+
+ if self.glyphs.is_empty() {
+ // When there are no glyphs, we just use the vertical metrics of the
+ // first available font.
+ for family in families(self.styles) {
+ if let Some(font) = world
+ .book()
+ .select(family, self.variant)
+ .and_then(|id| world.font(id))
+ {
+ expand(&font);
+ break;
+ }
+ }
+ } else {
+ for g in self.glyphs.iter() {
+ expand(&g.font);
+ }
+ }
+
+ (top, bottom)
+ }
+
+ /// How many justifiable glyphs the text contains.
+ pub fn justifiables(&self) -> usize {
+ self.glyphs.iter().filter(|g| g.is_justifiable()).count()
+ }
+
+ /// The width of the spaces in the text.
+ pub fn stretch(&self) -> Abs {
+ self.glyphs
+ .iter()
+ .filter(|g| g.is_justifiable())
+ .map(|g| g.x_advance)
+ .sum::<Em>()
+ .at(self.size)
+ }
+
+ /// Reshape a range of the shaped text, reusing information from this
+ /// shaping process if possible.
+ pub fn reshape(
+ &'a self,
+ world: Tracked<dyn World>,
+ text_range: Range<usize>,
+ ) -> ShapedText<'a> {
+ if let Some(glyphs) = self.slice_safe_to_break(text_range.clone()) {
+ Self {
+ text: &self.text[text_range],
+ dir: self.dir,
+ styles: self.styles,
+ size: self.size,
+ variant: self.variant,
+ width: glyphs.iter().map(|g| g.x_advance).sum::<Em>().at(self.size),
+ glyphs: Cow::Borrowed(glyphs),
+ }
+ } else {
+ shape(world, &self.text[text_range], self.styles, self.dir)
+ }
+ }
+
+ /// Push a hyphen to end of the text.
+ pub fn push_hyphen(&mut self, world: Tracked<dyn World>) {
+ families(self.styles).find_map(|family| {
+ let font = world
+ .book()
+ .select(family, self.variant)
+ .and_then(|id| world.font(id))?;
+ let ttf = font.ttf();
+ let glyph_id = ttf.glyph_index('-')?;
+ let x_advance = font.to_em(ttf.glyph_hor_advance(glyph_id)?);
+ let cluster = self.glyphs.last().map(|g| g.cluster).unwrap_or_default();
+ self.width += x_advance.at(self.size);
+ self.glyphs.to_mut().push(ShapedGlyph {
+ font,
+ glyph_id: glyph_id.0,
+ x_advance,
+ x_offset: Em::zero(),
+ y_offset: Em::zero(),
+ cluster,
+ safe_to_break: true,
+ c: '-',
+ });
+ Some(())
+ });
+ }
+
+ /// Find the subslice of glyphs that represent the given text range if both
+ /// sides are safe to break.
+ fn slice_safe_to_break(&self, text_range: Range<usize>) -> Option<&[ShapedGlyph]> {
+ let Range { mut start, mut end } = text_range;
+ if !self.dir.is_positive() {
+ std::mem::swap(&mut start, &mut end);
+ }
+
+ let left = self.find_safe_to_break(start, Side::Left)?;
+ let right = self.find_safe_to_break(end, Side::Right)?;
+ Some(&self.glyphs[left .. right])
+ }
+
+ /// Find the glyph offset matching the text index that is most towards the
+ /// given side and safe-to-break.
+ fn find_safe_to_break(&self, text_index: usize, towards: Side) -> Option<usize> {
+ let ltr = self.dir.is_positive();
+
+ // Handle edge cases.
+ let len = self.glyphs.len();
+ if text_index == 0 {
+ return Some(if ltr { 0 } else { len });
+ } else if text_index == self.text.len() {
+ return Some(if ltr { len } else { 0 });
+ }
+
+ // Find any glyph with the text index.
+ let mut idx = self
+ .glyphs
+ .binary_search_by(|g| {
+ let ordering = g.cluster.cmp(&text_index);
+ if ltr { ordering } else { ordering.reverse() }
+ })
+ .ok()?;
+
+ let next = match towards {
+ Side::Left => usize::checked_sub,
+ Side::Right => usize::checked_add,
+ };
+
+ // Search for the outermost glyph with the text index.
+ while let Some(next) = next(idx, 1) {
+ if self.glyphs.get(next).map_or(true, |g| g.cluster != text_index) {
+ break;
+ }
+ idx = next;
+ }
+
+ // RTL needs offset one because the left side of the range should be
+ // exclusive and the right side inclusive, contrary to the normal
+ // behaviour of ranges.
+ self.glyphs[idx].safe_to_break.then(|| idx + (!ltr) as usize)
+ }
+}
+
+impl Debug for ShapedText<'_> {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ self.text.fmt(f)
+ }
+}
+
+/// Holds shaping results and metadata common to all shaped segments.
+struct ShapingContext<'a> {
+ world: Tracked<'a, dyn World>,
+ glyphs: Vec<ShapedGlyph>,
+ used: Vec<Font>,
+ styles: StyleChain<'a>,
+ size: Abs,
+ variant: FontVariant,
+ tags: Vec<rustybuzz::Feature>,
+ fallback: bool,
+ dir: Dir,
+}
+
+/// Shape text into [`ShapedText`].
+pub fn shape<'a>(
+ world: Tracked<dyn World>,
+ text: &'a str,
+ styles: StyleChain<'a>,
+ dir: Dir,
+) -> ShapedText<'a> {
+ let size = styles.get(TextNode::SIZE);
+
+ let mut ctx = ShapingContext {
+ world,
+ size,
+ glyphs: vec![],
+ used: vec![],
+ styles,
+ variant: variant(styles),
+ tags: tags(styles),
+ fallback: styles.get(TextNode::FALLBACK),
+ dir,
+ };
+
+ if !text.is_empty() {
+ shape_segment(&mut ctx, 0, text, families(styles));
+ }
+
+ track_and_space(&mut ctx);
+
+ ShapedText {
+ text,
+ dir,
+ styles,
+ variant: ctx.variant,
+ size,
+ width: ctx.glyphs.iter().map(|g| g.x_advance).sum::<Em>().at(size),
+ glyphs: Cow::Owned(ctx.glyphs),
+ }
+}
+
+/// Shape text with font fallback using the `families` iterator.
+fn shape_segment<'a>(
+ ctx: &mut ShapingContext,
+ base: usize,
+ text: &str,
+ mut families: impl Iterator<Item = &'a str> + Clone,
+) {
+ // Fonts dont have newlines and tabs.
+ if text.chars().all(|c| c == '\n' || c == '\t') {
+ return;
+ }
+
+ // Find the next available family.
+ let book = ctx.world.book();
+ let mut selection = families.find_map(|family| {
+ book.select(family, ctx.variant)
+ .and_then(|id| ctx.world.font(id))
+ .filter(|font| !ctx.used.contains(font))
+ });
+
+ // Do font fallback if the families are exhausted and fallback is enabled.
+ if selection.is_none() && ctx.fallback {
+ let first = ctx.used.first().map(Font::info);
+ selection = book
+ .select_fallback(first, ctx.variant, text)
+ .and_then(|id| ctx.world.font(id))
+ .filter(|font| !ctx.used.contains(font));
+ }
+
+ // Extract the font id or shape notdef glyphs if we couldn't find any font.
+ let font = if let Some(font) = selection {
+ font
+ } else {
+ if let Some(font) = ctx.used.first().cloned() {
+ shape_tofus(ctx, base, text, font);
+ }
+ return;
+ };
+
+ ctx.used.push(font.clone());
+
+ // Fill the buffer with our text.
+ let mut buffer = UnicodeBuffer::new();
+ buffer.push_str(text);
+ buffer.set_language(language(ctx.styles));
+ buffer.set_direction(match ctx.dir {
+ Dir::LTR => rustybuzz::Direction::LeftToRight,
+ Dir::RTL => rustybuzz::Direction::RightToLeft,
+ _ => unimplemented!("vertical text layout"),
+ });
+
+ // Shape!
+ let buffer = rustybuzz::shape(font.rusty(), &ctx.tags, buffer);
+ let infos = buffer.glyph_infos();
+ let pos = buffer.glyph_positions();
+
+ // Collect the shaped glyphs, doing fallback and shaping parts again with
+ // the next font if necessary.
+ let mut i = 0;
+ while i < infos.len() {
+ let info = &infos[i];
+ let cluster = info.cluster as usize;
+
+ if info.glyph_id != 0 {
+ // Add the glyph to the shaped output.
+ // TODO: Don't ignore y_advance.
+ ctx.glyphs.push(ShapedGlyph {
+ font: font.clone(),
+ glyph_id: info.glyph_id as u16,
+ x_advance: font.to_em(pos[i].x_advance),
+ x_offset: font.to_em(pos[i].x_offset),
+ y_offset: font.to_em(pos[i].y_offset),
+ cluster: base + cluster,
+ safe_to_break: !info.unsafe_to_break(),
+ c: text[cluster ..].chars().next().unwrap(),
+ });
+ } else {
+ // Determine the source text range for the tofu sequence.
+ let range = {
+ // First, search for the end of the tofu sequence.
+ let k = i;
+ while infos.get(i + 1).map_or(false, |info| info.glyph_id == 0) {
+ i += 1;
+ }
+
+ // Then, determine the start and end text index.
+ //
+ // Examples:
+ // Everything is shown in visual order. Tofus are written as "_".
+ // We want to find out that the tofus span the text `2..6`.
+ // Note that the clusters are longer than 1 char.
+ //
+ // Left-to-right:
+ // Text: h a l i h a l l o
+ // Glyphs: A _ _ C E
+ // Clusters: 0 2 4 6 8
+ // k=1 i=2
+ //
+ // Right-to-left:
+ // Text: O L L A H I L A H
+ // Glyphs: E C _ _ A
+ // Clusters: 8 6 4 2 0
+ // k=2 i=3
+ let ltr = ctx.dir.is_positive();
+ let first = if ltr { k } else { i };
+ let start = infos[first].cluster as usize;
+ let last = if ltr { i.checked_add(1) } else { k.checked_sub(1) };
+ let end = last
+ .and_then(|last| infos.get(last))
+ .map_or(text.len(), |info| info.cluster as usize);
+
+ start .. end
+ };
+
+ // Trim half-baked cluster.
+ let remove = base + range.start .. base + range.end;
+ while ctx.glyphs.last().map_or(false, |g| remove.contains(&g.cluster)) {
+ ctx.glyphs.pop();
+ }
+
+ // Recursively shape the tofu sequence with the next family.
+ shape_segment(ctx, base + range.start, &text[range], families.clone());
+ }
+
+ i += 1;
+ }
+
+ ctx.used.pop();
+}
+
+/// Shape the text with tofus from the given font.
+fn shape_tofus(ctx: &mut ShapingContext, base: usize, text: &str, font: Font) {
+ let x_advance = font.advance(0).unwrap_or_default();
+ for (cluster, c) in text.char_indices() {
+ ctx.glyphs.push(ShapedGlyph {
+ font: font.clone(),
+ glyph_id: 0,
+ x_advance,
+ x_offset: Em::zero(),
+ y_offset: Em::zero(),
+ cluster: base + cluster,
+ safe_to_break: true,
+ c,
+ });
+ }
+}
+
+/// Apply tracking and spacing to the shaped glyphs.
+fn track_and_space(ctx: &mut ShapingContext) {
+ let tracking = Em::from_length(ctx.styles.get(TextNode::TRACKING), ctx.size);
+ let spacing = ctx
+ .styles
+ .get(TextNode::SPACING)
+ .map(|abs| Em::from_length(abs, ctx.size));
+
+ let mut glyphs = ctx.glyphs.iter_mut().peekable();
+ while let Some(glyph) = glyphs.next() {
+ // Make non-breaking space same width as normal space.
+ if glyph.c == '\u{00A0}' {
+ glyph.x_advance -= nbsp_delta(&glyph.font).unwrap_or_default();
+ }
+
+ if glyph.is_space() {
+ glyph.x_advance = spacing.relative_to(glyph.x_advance);
+ }
+
+ if glyphs.peek().map_or(false, |next| glyph.cluster != next.cluster) {
+ glyph.x_advance += tracking;
+ }
+ }
+}
+
+/// Difference between non-breaking and normal space.
+fn nbsp_delta(font: &Font) -> Option<Em> {
+ let space = font.ttf().glyph_index(' ')?.0;
+ let nbsp = font.ttf().glyph_index('\u{00A0}')?.0;
+ Some(font.advance(nbsp)? - font.advance(space)?)
+}
+
+/// Resolve the font variant with `BOLD` and `ITALIC` factored in.
+pub fn variant(styles: StyleChain) -> FontVariant {
+ let mut variant = FontVariant::new(
+ styles.get(TextNode::STYLE),
+ styles.get(TextNode::WEIGHT),
+ styles.get(TextNode::STRETCH),
+ );
+
+ if styles.get(TextNode::BOLD) {
+ variant.weight = variant.weight.thicken(300);
+ }
+
+ if styles.get(TextNode::ITALIC) {
+ variant.style = match variant.style {
+ FontStyle::Normal => FontStyle::Italic,
+ FontStyle::Italic => FontStyle::Normal,
+ FontStyle::Oblique => FontStyle::Normal,
+ }
+ }
+
+ variant
+}
+
+/// Resolve a prioritized iterator over the font families.
+fn families(styles: StyleChain) -> impl Iterator<Item = &str> + Clone {
+ const FALLBACKS: &[&str] = &[
+ "ibm plex sans",
+ "twitter color emoji",
+ "noto color emoji",
+ "apple color emoji",
+ "segoe ui emoji",
+ ];
+
+ let tail = if styles.get(TextNode::FALLBACK) { FALLBACKS } else { &[] };
+ styles
+ .get(TextNode::FAMILY)
+ .0
+ .iter()
+ .map(|family| family.as_str())
+ .chain(tail.iter().copied())
+}
+
+/// Collect the tags of the OpenType features to apply.
+fn tags(styles: StyleChain) -> Vec<Feature> {
+ let mut tags = vec![];
+ let mut feat = |tag, value| {
+ tags.push(Feature::new(Tag::from_bytes(tag), value, ..));
+ };
+
+ // Features that are on by default in Harfbuzz are only added if disabled.
+ if !styles.get(TextNode::KERNING) {
+ feat(b"kern", 0);
+ }
+
+ // Features that are off by default in Harfbuzz are only added if enabled.
+ if styles.get(TextNode::SMALLCAPS) {
+ feat(b"smcp", 1);
+ }
+
+ if styles.get(TextNode::ALTERNATES) {
+ feat(b"salt", 1);
+ }
+
+ let storage;
+ if let Some(set) = styles.get(TextNode::STYLISTIC_SET) {
+ storage = [b's', b's', b'0' + set.get() / 10, b'0' + set.get() % 10];
+ feat(&storage, 1);
+ }
+
+ if !styles.get(TextNode::LIGATURES) {
+ feat(b"liga", 0);
+ feat(b"clig", 0);
+ }
+
+ if styles.get(TextNode::DISCRETIONARY_LIGATURES) {
+ feat(b"dlig", 1);
+ }
+
+ if styles.get(TextNode::HISTORICAL_LIGATURES) {
+ feat(b"hilg", 1);
+ }
+
+ match styles.get(TextNode::NUMBER_TYPE) {
+ Smart::Auto => {}
+ Smart::Custom(NumberType::Lining) => feat(b"lnum", 1),
+ Smart::Custom(NumberType::OldStyle) => feat(b"onum", 1),
+ }
+
+ match styles.get(TextNode::NUMBER_WIDTH) {
+ Smart::Auto => {}
+ Smart::Custom(NumberWidth::Proportional) => feat(b"pnum", 1),
+ Smart::Custom(NumberWidth::Tabular) => feat(b"tnum", 1),
+ }
+
+ if styles.get(TextNode::SLASHED_ZERO) {
+ feat(b"zero", 1);
+ }
+
+ if styles.get(TextNode::FRACTIONS) {
+ feat(b"frac", 1);
+ }
+
+ for (tag, value) in styles.get(TextNode::FEATURES).0 {
+ tags.push(Feature::new(tag, value, ..))
+ }
+
+ tags
+}
+
+/// Process the language and and region of a style chain into a
+/// rustybuzz-compatible BCP 47 language.
+fn language(styles: StyleChain) -> rustybuzz::Language {
+ let mut bcp: EcoString = styles.get(TextNode::LANG).as_str().into();
+ if let Some(region) = styles.get(TextNode::REGION) {
+ bcp.push('-');
+ bcp.push_str(region.as_str());
+ }
+ rustybuzz::Language::from_str(&bcp).unwrap()
+}
diff --git a/library/src/text/shift.rs b/library/src/text/shift.rs
new file mode 100644
index 00000000..e5f142dd
--- /dev/null
+++ b/library/src/text/shift.rs
@@ -0,0 +1,188 @@
+use typst::model::SequenceNode;
+use typst::util::EcoString;
+
+use super::{variant, SpaceNode, TextNode, TextSize};
+use crate::prelude::*;
+
+/// Sub or superscript text.
+///
+/// The text is rendered smaller and its baseline is raised. To provide the best
+/// typography possible, we first try to transform the text to superscript
+/// codepoints. If that fails, we fall back to rendering shrunk normal letters
+/// in a raised way.
+#[derive(Debug, Hash)]
+pub struct ShiftNode<const S: ScriptKind>(pub Content);
+
+/// Shift the text into superscript.
+pub type SuperNode = ShiftNode<SUPERSCRIPT>;
+
+/// Shift the text into subscript.
+pub type SubNode = ShiftNode<SUBSCRIPT>;
+
+#[node(Show)]
+impl<const S: ScriptKind> ShiftNode<S> {
+ /// Whether to prefer the dedicated sub- and superscript characters of the
+ /// font.
+ pub const TYPOGRAPHIC: bool = true;
+ /// The baseline shift for synthetic sub- and superscripts.
+ pub const BASELINE: Length =
+ Em::new(if S == SUPERSCRIPT { -0.5 } else { 0.2 }).into();
+ /// The font size for synthetic sub- and superscripts.
+ pub const SIZE: TextSize = TextSize(Em::new(0.6).into());
+
+ fn construct(_: &mut Vm, args: &mut Args) -> SourceResult<Content> {
+ Ok(Self(args.expect("body")?).pack())
+ }
+}
+
+impl<const S: ScriptKind> Show for ShiftNode<S> {
+ fn unguard_parts(&self, _: Selector) -> Content {
+ Self(self.0.clone()).pack()
+ }
+
+ fn field(&self, name: &str) -> Option<Value> {
+ match name {
+ "body" => Some(Value::Content(self.0.clone())),
+ _ => None,
+ }
+ }
+
+ fn realize(
+ &self,
+ world: Tracked<dyn World>,
+ styles: StyleChain,
+ ) -> SourceResult<Content> {
+ let mut transformed = None;
+ if styles.get(Self::TYPOGRAPHIC) {
+ if let Some(text) = search_text(&self.0, S) {
+ if is_shapable(world, &text, styles) {
+ transformed = Some(TextNode(text).pack());
+ }
+ }
+ };
+
+ Ok(transformed.unwrap_or_else(|| {
+ let mut map = StyleMap::new();
+ map.set(TextNode::BASELINE, styles.get(Self::BASELINE));
+ map.set(TextNode::SIZE, styles.get(Self::SIZE));
+ self.0.clone().styled_with_map(map)
+ }))
+ }
+}
+
+/// Find and transform the text contained in `content` to the given script kind
+/// if and only if it only consists of `Text`, `Space`, and `Empty` leaf nodes.
+fn search_text(content: &Content, mode: ScriptKind) -> Option<EcoString> {
+ if content.is_empty() {
+ Some(EcoString::new())
+ } else if content.is::<SpaceNode>() {
+ Some(' '.into())
+ } else if let Some(text) = content.downcast::<TextNode>() {
+ if let Some(sup) = convert_script(&text.0, mode) {
+ return Some(sup);
+ }
+ None
+ } else if let Some(seq) = content.downcast::<SequenceNode>() {
+ let mut full = EcoString::new();
+ for item in seq.0.iter() {
+ match search_text(item, mode) {
+ Some(text) => full.push_str(&text),
+ None => return None,
+ }
+ }
+ Some(full)
+ } else {
+ None
+ }
+}
+
+/// Checks whether the first retrievable family contains all code points of the
+/// given string.
+fn is_shapable(world: Tracked<dyn World>, text: &str, styles: StyleChain) -> bool {
+ for family in styles.get(TextNode::FAMILY).0.iter() {
+ if let Some(font) = world
+ .book()
+ .select(family.as_str(), variant(styles))
+ .and_then(|id| world.font(id))
+ {
+ return text.chars().all(|c| font.ttf().glyph_index(c).is_some());
+ }
+ }
+
+ false
+}
+
+/// Convert a string to sub- or superscript codepoints if all characters
+/// can be mapped to such a codepoint.
+fn convert_script(text: &str, mode: ScriptKind) -> Option<EcoString> {
+ let mut result = EcoString::with_capacity(text.len());
+ let converter = match mode {
+ SUPERSCRIPT => to_superscript_codepoint,
+ SUBSCRIPT | _ => to_subscript_codepoint,
+ };
+
+ for c in text.chars() {
+ match converter(c) {
+ Some(c) => result.push(c),
+ None => return None,
+ }
+ }
+
+ Some(result)
+}
+
+/// Convert a character to its corresponding Unicode superscript.
+fn to_superscript_codepoint(c: char) -> Option<char> {
+ char::from_u32(match c {
+ '0' => 0x2070,
+ '1' => 0x00B9,
+ '2' => 0x00B2,
+ '3' => 0x00B3,
+ '4' ..= '9' => 0x2070 + (c as u32 + 4 - '4' as u32),
+ '+' => 0x207A,
+ '-' => 0x207B,
+ '=' => 0x207C,
+ '(' => 0x207D,
+ ')' => 0x207E,
+ 'n' => 0x207F,
+ 'i' => 0x2071,
+ ' ' => 0x0020,
+ _ => return None,
+ })
+}
+
+/// Convert a character to its corresponding Unicode subscript.
+fn to_subscript_codepoint(c: char) -> Option<char> {
+ char::from_u32(match c {
+ '0' => 0x2080,
+ '1' ..= '9' => 0x2080 + (c as u32 - '0' as u32),
+ '+' => 0x208A,
+ '-' => 0x208B,
+ '=' => 0x208C,
+ '(' => 0x208D,
+ ')' => 0x208E,
+ 'a' => 0x2090,
+ 'e' => 0x2091,
+ 'o' => 0x2092,
+ 'x' => 0x2093,
+ 'h' => 0x2095,
+ 'k' => 0x2096,
+ 'l' => 0x2097,
+ 'm' => 0x2098,
+ 'n' => 0x2099,
+ 'p' => 0x209A,
+ 's' => 0x209B,
+ 't' => 0x209C,
+ ' ' => 0x0020,
+ _ => return None,
+ })
+}
+
+/// A category of script.
+pub type ScriptKind = usize;
+
+/// Text that is rendered smaller and raised, also known as superior.
+const SUPERSCRIPT: ScriptKind = 0;
+
+/// Text that is rendered smaller and lowered, also known as inferior.
+const SUBSCRIPT: ScriptKind = 1;