Factor out `typst-utils` crate (#4125)

author: Laurenz <laurmaedje@gmail.com> 2024-05-13 17:25:43 +0200
committer: GitHub <noreply@github.com> 2024-05-13 15:25:43 +0000
commit: 95cd6adf24cb14ede8896fbb0c610432960f4f3a (patch)
tree: bade59888a5268987eb4fae2dc30ddeaf28ad193 /crates/typst-utils
parent: 7b656b3deb5f05e6e56d666622643a5cde8c435a (diff)
9 files changed, 972 insertions, 0 deletions
diff --git a/crates/typst-utils/Cargo.toml b/crates/typst-utils/Cargo.toml
new file mode 100644
index 00000000..ba75e399
--- /dev/null
+++ b/crates/typst-utils/Cargo.toml
@@ -0,0 +1,22 @@
+[package]
+name = "typst-utils"
+description = "Utilities for Typst."
+version = { workspace = true }
+rust-version = { workspace = true }
+authors = { workspace = true }
+edition = { workspace = true }
+homepage = { workspace = true }
+repository = { workspace = true }
+license = { workspace = true }
+categories = { workspace = true }
+keywords = { workspace = true }
+readme = { workspace = true }
+
+[dependencies]
+once_cell = { workspace = true }
+siphasher = { workspace = true }
+portable-atomic = { workspace = true }
+rayon = { workspace = true }
+
+[lints]
+workspace = true
diff --git a/crates/typst-utils/src/bitset.rs b/crates/typst-utils/src/bitset.rs
new file mode 100644
index 00000000..cbac7a1e
--- /dev/null
+++ b/crates/typst-utils/src/bitset.rs
@@ -0,0 +1,104 @@
+use std::fmt::{self, Debug, Formatter};
+
+/// Efficiently stores a set of numbers which are expected to be very small
+/// (< 32/64 depending on the architecture).
+///
+/// Inserting a very small value is very cheap while inserting a large one may
+/// be very expensive.
+#[derive(Clone, PartialEq, Hash)]
+pub struct BitSet {
+    /// Used to store values < BITS.
+    low: usize,
+    /// Used to store values > BITS. We have the extra `Box` to keep the memory
+    /// size of the `BitSet` down.
+    #[allow(clippy::box_collection)]
+    hi: Option<Box<Vec<usize>>>,
+}
+
+/// The number of bits per chunk.
+const BITS: usize = usize::BITS as usize;
+
+impl BitSet {
+    /// Creates a new empty bit set.
+    pub fn new() -> Self {
+        Self { low: 0, hi: None }
+    }
+
+    /// Inserts a number into the set.
+    pub fn insert(&mut self, value: usize) {
+        if value < BITS {
+            self.low |= 1 << value;
+        } else {
+            let chunk = value / BITS - 1;
+            let within = value % BITS;
+            let vec = self.hi.get_or_insert_with(Default::default);
+            if chunk >= vec.len() {
+                vec.resize(chunk + 1, 0);
+            }
+            vec[chunk] |= 1 << within;
+        }
+    }
+
+    /// Whether a number is present in the set.
+    pub fn contains(&self, value: usize) -> bool {
+        if value < BITS {
+            (self.low & (1 << value)) != 0
+        } else {
+            let Some(hi) = &self.hi else { return false };
+            let chunk = value / BITS - 1;
+            let within = value % BITS;
+            let Some(bits) = hi.get(chunk) else { return false };
+            (bits & (1 << within)) != 0
+        }
+    }
+}
+
+impl Default for BitSet {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl Debug for BitSet {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        let mut list = f.debug_list();
+        let chunks = 1 + self.hi.as_ref().map_or(0, |v| v.len());
+        for v in 0..chunks * BITS {
+            if self.contains(v) {
+                list.entry(&v);
+            }
+        }
+        list.finish()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_bitset() {
+        let mut set = BitSet::new();
+        assert!(!set.contains(0));
+        assert!(!set.contains(5));
+        set.insert(0);
+        set.insert(1);
+        set.insert(5);
+        set.insert(64);
+        set.insert(105);
+        set.insert(208);
+        assert!(set.contains(0));
+        assert!(set.contains(1));
+        assert!(!set.contains(2));
+        assert!(set.contains(5));
+        assert!(!set.contains(63));
+        assert!(set.contains(64));
+        assert!(!set.contains(65));
+        assert!(!set.contains(104));
+        assert!(set.contains(105));
+        assert!(!set.contains(106));
+        assert!(set.contains(208));
+        assert!(!set.contains(209));
+        assert_eq!(format!("{set:?}"), "[0, 1, 5, 64, 105, 208]");
+    }
+}
diff --git a/crates/typst-utils/src/deferred.rs b/crates/typst-utils/src/deferred.rs
new file mode 100644
index 00000000..46cd6e88
--- /dev/null
+++ b/crates/typst-utils/src/deferred.rs
@@ -0,0 +1,53 @@
+use std::sync::Arc;
+
+use once_cell::sync::OnceCell;
+
+/// A value that is lazily executed on another thread.
+///
+/// Execution will be started in the background and can be waited on.
+pub struct Deferred<T>(Arc<OnceCell<T>>);
+
+impl<T: Send + Sync + 'static> Deferred<T> {
+    /// Creates a new deferred value.
+    ///
+    /// The closure will be called on a secondary thread such that the value
+    /// can be initialized in parallel.
+    pub fn new<F>(f: F) -> Self
+    where
+        F: FnOnce() -> T + Send + Sync + 'static,
+    {
+        let inner = Arc::new(OnceCell::new());
+        let cloned = Arc::clone(&inner);
+        rayon::spawn(move || {
+            // Initialize the value if it hasn't been initialized yet.
+            // We do this to avoid panicking in case it was set externally.
+            cloned.get_or_init(f);
+        });
+        Self(inner)
+    }
+
+    /// Waits on the value to be initialized.
+    ///
+    /// If the value has already been initialized, this will return
+    /// immediately. Otherwise, this will block until the value is
+    /// initialized in another thread.
+    pub fn wait(&self) -> &T {
+        // Fast path if the value is already available. We don't want to yield
+        // to rayon in that case.
+        if let Some(value) = self.0.get() {
+            return value;
+        }
+
+        // Ensure that we yield to give the deferred value a chance to compute
+        // single-threaded platforms (for WASM compatibility).
+        while let Some(rayon::Yield::Executed) = rayon::yield_now() {}
+
+        self.0.wait()
+    }
+}
+
+impl<T> Clone for Deferred<T> {
+    fn clone(&self) -> Self {
+        Self(Arc::clone(&self.0))
+    }
+}
diff --git a/crates/typst-utils/src/fat.rs b/crates/typst-utils/src/fat.rs
new file mode 100644
index 00000000..d3c9bb20
--- /dev/null
+++ b/crates/typst-utils/src/fat.rs
@@ -0,0 +1,55 @@
+//! Fat pointer handling.
+//!
+//! This assumes the memory representation of fat pointers. Although it is not
+//! guaranteed by Rust, it's improbable that it will change. Still, when the
+//! pointer metadata APIs are stable, we should definitely move to them:
+//! <https://github.com/rust-lang/rust/issues/81513>
+
+use std::alloc::Layout;
+use std::mem;
+
+/// Create a fat pointer from a data address and a vtable address.
+///
+/// # Safety
+/// Must only be called when `T` is a `dyn Trait`. The data address must point
+/// to a value whose type implements the trait of `T` and the `vtable` must have
+/// been extracted with [`vtable`].
+#[track_caller]
+pub unsafe fn from_raw_parts<T: ?Sized>(data: *const (), vtable: *const ()) -> *const T {
+    let fat = FatPointer { data, vtable };
+    debug_assert_eq!(Layout::new::<*const T>(), Layout::new::<FatPointer>());
+    mem::transmute_copy::<FatPointer, *const T>(&fat)
+}
+
+/// Create a mutable fat pointer from a data address and a vtable address.
+///
+/// # Safety
+/// Must only be called when `T` is a `dyn Trait`. The data address must point
+/// to a value whose type implements the trait of `T` and the `vtable` must have
+/// been extracted with [`vtable`].
+#[track_caller]
+pub unsafe fn from_raw_parts_mut<T: ?Sized>(data: *mut (), vtable: *const ()) -> *mut T {
+    let fat = FatPointer { data, vtable };
+    debug_assert_eq!(Layout::new::<*mut T>(), Layout::new::<FatPointer>());
+    mem::transmute_copy::<FatPointer, *mut T>(&fat)
+}
+
+/// Extract the address to a trait object's vtable.
+///
+/// # Safety
+/// Must only be called when `T` is a `dyn Trait`.
+#[track_caller]
+pub unsafe fn vtable<T: ?Sized>(ptr: *const T) -> *const () {
+    debug_assert_eq!(Layout::new::<*const T>(), Layout::new::<FatPointer>());
+    mem::transmute_copy::<*const T, FatPointer>(&ptr).vtable
+}
+
+/// The memory representation of a trait object pointer.
+///
+/// Although this is not guaranteed by Rust, it's improbable that it will
+/// change.
+#[repr(C)]
+struct FatPointer {
+    data: *const (),
+    vtable: *const (),
+}
diff --git a/crates/typst-utils/src/hash.rs b/crates/typst-utils/src/hash.rs
new file mode 100644
index 00000000..82bb76af
--- /dev/null
+++ b/crates/typst-utils/src/hash.rs
@@ -0,0 +1,163 @@
+use std::any::Any;
+use std::fmt::{self, Debug};
+use std::hash::{Hash, Hasher};
+use std::ops::{Deref, DerefMut};
+
+use portable_atomic::{AtomicU128, Ordering};
+use siphasher::sip128::{Hasher128, SipHasher13};
+
+/// A wrapper type with lazily-computed hash.
+///
+/// This is useful if you want to pass large values of `T` to memoized
+/// functions. Especially recursive structures like trees benefit from
+/// intermediate prehashed nodes.
+///
+/// Note that for a value `v` of type `T`, `hash(v)` is not necessarily equal to
+/// `hash(LazyHash::new(v))`. Writing the precomputed hash into a hasher's
+/// state produces different output than writing the value's parts directly.
+/// However, that seldomly matters as you are typically either dealing with
+/// values of type `T` or with values of type `LazyHash<T>`, not a mix of both.
+///
+/// # Equality
+/// Because Typst uses high-quality 128 bit hashes in all places, the risk of a
+/// hash collision is reduced to an absolute minimum. Therefore, this type
+/// additionally provides `PartialEq` and `Eq` implementations that compare by
+/// hash instead of by value. For this to be correct, your hash implementation
+/// **must feed all information relevant to the `PartialEq` impl to the
+/// hasher.**
+///
+/// # Usage
+/// If the value is expected to be cloned, it is best used inside of an `Arc`
+/// or `Rc` to best re-use the hash once it has been computed.
+pub struct LazyHash<T: ?Sized> {
+    /// The hash for the value.
+    hash: AtomicU128,
+    /// The underlying value.
+    value: T,
+}
+
+impl<T: Default> Default for LazyHash<T> {
+    #[inline]
+    fn default() -> Self {
+        Self::new(Default::default())
+    }
+}
+
+impl<T> LazyHash<T> {
+    /// Wraps an item without pre-computed hash.
+    #[inline]
+    pub fn new(value: T) -> Self {
+        Self { hash: AtomicU128::new(0), value }
+    }
+
+    /// Wrap an item with a pre-computed hash.
+    ///
+    /// **Important:** The hash must be correct for the value. This cannot be
+    /// enforced at compile time, so use with caution.
+    #[inline]
+    pub fn reuse<U: ?Sized>(value: T, existing: &LazyHash<U>) -> Self {
+        LazyHash { hash: AtomicU128::new(existing.load_hash()), value }
+    }
+
+    /// Returns the wrapped value.
+    #[inline]
+    pub fn into_inner(self) -> T {
+        self.value
+    }
+}
+
+impl<T: ?Sized> LazyHash<T> {
+    /// Get the hash, returns zero if not computed yet.
+    #[inline]
+    fn load_hash(&self) -> u128 {
+        self.hash.load(Ordering::SeqCst)
+    }
+}
+
+impl<T: Hash + ?Sized + 'static> LazyHash<T> {
+    /// Get the hash or compute it if not set yet.
+    #[inline]
+    fn load_or_compute_hash(&self) -> u128 {
+        let hash = self.load_hash();
+        if hash == 0 {
+            let hashed = hash_item(&self.value);
+            self.hash.store(hashed, Ordering::SeqCst);
+            hashed
+        } else {
+            hash
+        }
+    }
+
+    /// Reset the hash to zero.
+    #[inline]
+    fn reset_hash(&mut self) {
+        // Because we have a mutable reference, we can skip the atomic
+        *self.hash.get_mut() = 0;
+    }
+}
+
+/// Hash the item.
+#[inline]
+fn hash_item<T: Hash + ?Sized + 'static>(item: &T) -> u128 {
+    // Also hash the TypeId because the type might be converted
+    // through an unsized coercion.
+    let mut state = SipHasher13::new();
+    item.type_id().hash(&mut state);
+    item.hash(&mut state);
+    state.finish128().as_u128()
+}
+
+impl<T: Hash + ?Sized + 'static> Hash for LazyHash<T> {
+    #[inline]
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        state.write_u128(self.load_or_compute_hash());
+    }
+}
+
+impl<T> From<T> for LazyHash<T> {
+    #[inline]
+    fn from(value: T) -> Self {
+        Self::new(value)
+    }
+}
+
+impl<T: Hash + ?Sized + 'static> Eq for LazyHash<T> {}
+
+impl<T: Hash + ?Sized + 'static> PartialEq for LazyHash<T> {
+    #[inline]
+    fn eq(&self, other: &Self) -> bool {
+        self.load_or_compute_hash() == other.load_or_compute_hash()
+    }
+}
+
+impl<T: ?Sized> Deref for LazyHash<T> {
+    type Target = T;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        &self.value
+    }
+}
+
+impl<T: Hash + ?Sized + 'static> DerefMut for LazyHash<T> {
+    #[inline]
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        self.reset_hash();
+        &mut self.value
+    }
+}
+
+impl<T: Hash + Clone + 'static> Clone for LazyHash<T> {
+    fn clone(&self) -> Self {
+        Self {
+            hash: AtomicU128::new(self.load_hash()),
+            value: self.value.clone(),
+        }
+    }
+}
+
+impl<T: Debug> Debug for LazyHash<T> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        self.value.fmt(f)
+    }
+}
diff --git a/crates/typst-utils/src/lib.rs b/crates/typst-utils/src/lib.rs
new file mode 100644
index 00000000..e0a2c835
--- /dev/null
+++ b/crates/typst-utils/src/lib.rs
@@ -0,0 +1,245 @@
+//! Utilities for Typst.
+
+pub mod fat;
+
+#[macro_use]
+mod macros;
+mod bitset;
+mod deferred;
+mod hash;
+mod pico;
+mod scalar;
+
+pub use self::bitset::BitSet;
+pub use self::deferred::Deferred;
+pub use self::hash::LazyHash;
+pub use self::pico::PicoStr;
+pub use self::scalar::Scalar;
+
+use std::fmt::{Debug, Formatter};
+use std::hash::Hash;
+use std::iter::{Chain, Flatten, Rev};
+use std::num::NonZeroUsize;
+use std::ops::{Add, Deref, Div, Mul, Neg, Sub};
+use std::sync::Arc;
+
+use siphasher::sip128::{Hasher128, SipHasher13};
+
+/// Turn a closure into a struct implementing [`Debug`].
+pub fn debug<F>(f: F) -> impl Debug
+where
+    F: Fn(&mut Formatter) -> std::fmt::Result,
+{
+    struct Wrapper<F>(F);
+
+    impl<F> Debug for Wrapper<F>
+    where
+        F: Fn(&mut Formatter) -> std::fmt::Result,
+    {
+        fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+            self.0(f)
+        }
+    }
+
+    Wrapper(f)
+}
+
+/// Calculate a 128-bit siphash of a value.
+pub fn hash128<T: Hash + ?Sized>(value: &T) -> u128 {
+    let mut state = SipHasher13::new();
+    value.hash(&mut state);
+    state.finish128().as_u128()
+}
+
+/// An extra constant for [`NonZeroUsize`].
+pub trait NonZeroExt {
+    /// The number `1`.
+    const ONE: Self;
+}
+
+impl NonZeroExt for NonZeroUsize {
+    const ONE: Self = match Self::new(1) {
+        Some(v) => v,
+        None => unreachable!(),
+    };
+}
+
+/// Extra methods for [`Arc`].
+pub trait ArcExt<T> {
+    /// Takes the inner value if there is exactly one strong reference and
+    /// clones it otherwise.
+    fn take(self) -> T;
+}
+
+impl<T: Clone> ArcExt<T> for Arc<T> {
+    fn take(self) -> T {
+        match Arc::try_unwrap(self) {
+            Ok(v) => v,
+            Err(rc) => (*rc).clone(),
+        }
+    }
+}
+
+/// Extra methods for [`[T]`](slice).
+pub trait SliceExt<T> {
+    /// Split a slice into consecutive runs with the same key and yield for
+    /// each such run the key and the slice of elements with that key.
+    fn group_by_key<K, F>(&self, f: F) -> GroupByKey<'_, T, F>
+    where
+        F: FnMut(&T) -> K,
+        K: PartialEq;
+}
+
+impl<T> SliceExt<T> for [T] {
+    fn group_by_key<K, F>(&self, f: F) -> GroupByKey<'_, T, F> {
+        GroupByKey { slice: self, f }
+    }
+}
+
+/// This struct is created by [`SliceExt::group_by_key`].
+pub struct GroupByKey<'a, T, F> {
+    slice: &'a [T],
+    f: F,
+}
+
+impl<'a, T, K, F> Iterator for GroupByKey<'a, T, F>
+where
+    F: FnMut(&T) -> K,
+    K: PartialEq,
+{
+    type Item = (K, &'a [T]);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let mut iter = self.slice.iter();
+        let key = (self.f)(iter.next()?);
+        let count = 1 + iter.take_while(|t| (self.f)(t) == key).count();
+        let (head, tail) = self.slice.split_at(count);
+        self.slice = tail;
+        Some((key, head))
+    }
+}
+
+/// Adapter for reversing iterators conditionally.
+pub trait MaybeReverseIter {
+    type RevIfIter;
+
+    /// Reverse this iterator (apply .rev()) based on some condition.
+    fn rev_if(self, condition: bool) -> Self::RevIfIter
+    where
+        Self: Sized;
+}
+
+impl<I: Iterator + DoubleEndedIterator> MaybeReverseIter for I {
+    type RevIfIter =
+        Chain<Flatten<std::option::IntoIter<I>>, Flatten<std::option::IntoIter<Rev<I>>>>;
+
+    fn rev_if(self, condition: bool) -> Self::RevIfIter
+    where
+        Self: Sized,
+    {
+        let (maybe_self_iter, maybe_rev_iter) =
+            if condition { (None, Some(self.rev())) } else { (Some(self), None) };
+
+        maybe_self_iter
+            .into_iter()
+            .flatten()
+            .chain(maybe_rev_iter.into_iter().flatten())
+    }
+}
+
+/// Check if the [`Option`]-wrapped L is same to R.
+pub fn option_eq<L, R>(left: Option<L>, other: R) -> bool
+where
+    L: PartialEq<R>,
+{
+    left.is_some_and(|v| v == other)
+}
+
+/// A container around a static reference that is cheap to clone and hash.
+#[derive(Debug)]
+pub struct Static<T: 'static>(pub &'static T);
+
+impl<T> Deref for Static<T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        self.0
+    }
+}
+
+impl<T> Copy for Static<T> {}
+
+impl<T> Clone for Static<T> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<T> Eq for Static<T> {}
+
+impl<T> PartialEq for Static<T> {
+    fn eq(&self, other: &Self) -> bool {
+        std::ptr::eq(self.0, other.0)
+    }
+}
+
+impl<T> Hash for Static<T> {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        state.write_usize(self.0 as *const _ as _);
+    }
+}
+
+/// Generic access to a structure's components.
+pub trait Get<Index> {
+    /// The structure's component type.
+    type Component;
+
+    /// Borrow the component for the specified index.
+    fn get_ref(&self, index: Index) -> &Self::Component;
+
+    /// Borrow the component for the specified index mutably.
+    fn get_mut(&mut self, index: Index) -> &mut Self::Component;
+
+    /// Convenience method for getting a copy of a component.
+    fn get(self, index: Index) -> Self::Component
+    where
+        Self: Sized,
+        Self::Component: Copy,
+    {
+        *self.get_ref(index)
+    }
+
+    /// Convenience method for setting a component.
+    fn set(&mut self, index: Index, component: Self::Component) {
+        *self.get_mut(index) = component;
+    }
+}
+
+/// A numeric type.
+pub trait Numeric:
+    Sized
+    + Debug
+    + Copy
+    + PartialEq
+    + Neg<Output = Self>
+    + Add<Output = Self>
+    + Sub<Output = Self>
+    + Mul<f64, Output = Self>
+    + Div<f64, Output = Self>
+{
+    /// The identity element for addition.
+    fn zero() -> Self;
+
+    /// Whether `self` is zero.
+    fn is_zero(self) -> bool {
+        self == Self::zero()
+    }
+
+    /// Whether `self` consists only of finite parts.
+    fn is_finite(self) -> bool;
+}
+
+/// Round a float to two decimal places.
+pub fn round_2(value: f64) -> f64 {
+    (value * 100.0).round() / 100.0
+}
diff --git a/crates/typst-utils/src/macros.rs b/crates/typst-utils/src/macros.rs
new file mode 100644
index 00000000..dfe0c319
--- /dev/null
+++ b/crates/typst-utils/src/macros.rs
@@ -0,0 +1,49 @@
+/// Implement the `Sub` trait based on existing `Neg` and `Add` impls.
+#[macro_export]
+macro_rules! sub_impl {
+    ($a:ident - $b:ident -> $c:ident) => {
+        impl std::ops::Sub<$b> for $a {
+            type Output = $c;
+
+            fn sub(self, other: $b) -> $c {
+                self + -other
+            }
+        }
+    };
+}
+
+/// Implement an assign trait based on an existing non-assign trait.
+#[macro_export]
+macro_rules! assign_impl {
+    ($a:ident += $b:ident) => {
+        impl std::ops::AddAssign<$b> for $a {
+            fn add_assign(&mut self, other: $b) {
+                *self = *self + other;
+            }
+        }
+    };
+
+    ($a:ident -= $b:ident) => {
+        impl std::ops::SubAssign<$b> for $a {
+            fn sub_assign(&mut self, other: $b) {
+                *self = *self - other;
+            }
+        }
+    };
+
+    ($a:ident *= $b:ident) => {
+        impl std::ops::MulAssign<$b> for $a {
+            fn mul_assign(&mut self, other: $b) {
+                *self = *self * other;
+            }
+        }
+    };
+
+    ($a:ident /= $b:ident) => {
+        impl std::ops::DivAssign<$b> for $a {
+            fn div_assign(&mut self, other: $b) {
+                *self = *self / other;
+            }
+        }
+    };
+}
diff --git a/crates/typst-utils/src/pico.rs b/crates/typst-utils/src/pico.rs
new file mode 100644
index 00000000..b58d6809
--- /dev/null
+++ b/crates/typst-utils/src/pico.rs
@@ -0,0 +1,80 @@
+use std::cmp::Ordering;
+use std::collections::HashMap;
+use std::fmt::{self, Debug, Formatter};
+use std::sync::RwLock;
+
+use once_cell::sync::Lazy;
+
+/// The global string interner.
+static INTERNER: Lazy<RwLock<Interner>> =
+    Lazy::new(|| RwLock::new(Interner { to_id: HashMap::new(), from_id: Vec::new() }));
+
+/// A string interner.
+struct Interner {
+    to_id: HashMap<&'static str, PicoStr>,
+    from_id: Vec<&'static str>,
+}
+
+/// An interned string.
+///
+/// The API is purposefully kept small. This is because it might be relatively
+/// slow to look up a string in the interner, so we want to avoid doing it
+/// unnecessarily. For this reason, the user should use the [`PicoStr::resolve`]
+/// method to get the underlying string, such that the lookup is done only once.
+#[derive(Copy, Clone, Eq, PartialEq, Hash)]
+pub struct PicoStr(u32);
+
+impl PicoStr {
+    /// Creates a new interned string.
+    pub fn new(string: &str) -> Self {
+        if let Some(&id) = INTERNER.read().unwrap().to_id.get(string) {
+            return id;
+        }
+
+        let mut interner = INTERNER.write().unwrap();
+        let num = interner.from_id.len().try_into().expect("out of string ids");
+
+        // Create a new entry forever by leaking the string. PicoStr is only
+        // used for strings that aren't created en masse, so it is okay.
+        let id = Self(num);
+        let string = Box::leak(string.to_string().into_boxed_str());
+        interner.to_id.insert(string, id);
+        interner.from_id.push(string);
+        id
+    }
+
+    /// Resolves the interned string.
+    pub fn resolve(&self) -> &'static str {
+        INTERNER.read().unwrap().from_id[self.0 as usize]
+    }
+}
+
+impl Debug for PicoStr {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        self.resolve().fmt(f)
+    }
+}
+
+impl Ord for PicoStr {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.resolve().cmp(other.resolve())
+    }
+}
+
+impl PartialOrd for PicoStr {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl AsRef<str> for PicoStr {
+    fn as_ref(&self) -> &str {
+        self.resolve()
+    }
+}
+
+impl From<&str> for PicoStr {
+    fn from(value: &str) -> Self {
+        Self::new(value)
+    }
+}
diff --git a/crates/typst-utils/src/scalar.rs b/crates/typst-utils/src/scalar.rs
new file mode 100644
index 00000000..4036c231
--- /dev/null
+++ b/crates/typst-utils/src/scalar.rs
@@ -0,0 +1,201 @@
+use std::cmp::Ordering;
+use std::fmt::{self, Debug, Formatter};
+use std::hash::{Hash, Hasher};
+use std::iter::Sum;
+use std::ops::{
+    Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign,
+};
+
+use crate::Numeric;
+
+/// A 64-bit float that implements `Eq`, `Ord` and `Hash`.
+///
+/// Panics if it's `NaN` during any of those operations.
+#[derive(Default, Copy, Clone)]
+pub struct Scalar(f64);
+
+impl Scalar {
+    /// The scalar containing `0.0`.
+    pub const ZERO: Self = Self(0.0);
+
+    /// The scalar containing `1.0`.
+    pub const ONE: Self = Self(1.0);
+
+    /// The scalar containing `f64::INFINITY`.
+    pub const INFINITY: Self = Self(f64::INFINITY);
+
+    /// Creates a [`Scalar`] with the given value.
+    ///
+    /// If the value is NaN, then it is set to `0.0` in the result.
+    pub const fn new(x: f64) -> Self {
+        Self(if is_nan(x) { 0.0 } else { x })
+    }
+
+    /// Gets the value of this [`Scalar`].
+    pub const fn get(self) -> f64 {
+        self.0
+    }
+}
+
+// We have to detect NaNs this way since `f64::is_nan` isn’t const
+// on stable yet:
+// ([tracking issue](https://github.com/rust-lang/rust/issues/57241))
+#[allow(clippy::unusual_byte_groupings)]
+const fn is_nan(x: f64) -> bool {
+    // Safety: all bit patterns are valid for u64, and f64 has no padding bits.
+    // We cannot use `f64::to_bits` because it is not const.
+    let x_bits = unsafe { std::mem::transmute::<f64, u64>(x) };
+    (x_bits << 1 >> (64 - 12 + 1)) == 0b0_111_1111_1111 && (x_bits << 12) != 0
+}
+
+impl Numeric for Scalar {
+    fn zero() -> Self {
+        Self(0.0)
+    }
+
+    fn is_finite(self) -> bool {
+        self.0.is_finite()
+    }
+}
+
+impl Debug for Scalar {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        self.0.fmt(f)
+    }
+}
+
+impl Eq for Scalar {}
+
+impl PartialEq for Scalar {
+    fn eq(&self, other: &Self) -> bool {
+        assert!(!self.0.is_nan() && !other.0.is_nan(), "float is NaN");
+        self.0 == other.0
+    }
+}
+
+impl PartialEq<f64> for Scalar {
+    fn eq(&self, other: &f64) -> bool {
+        self == &Self(*other)
+    }
+}
+
+impl Ord for Scalar {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.0.partial_cmp(&other.0).expect("float is NaN")
+    }
+}
+
+impl PartialOrd for Scalar {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Hash for Scalar {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        debug_assert!(!self.0.is_nan(), "float is NaN");
+        self.0.to_bits().hash(state);
+    }
+}
+
+impl From<f64> for Scalar {
+    fn from(float: f64) -> Self {
+        Self::new(float)
+    }
+}
+
+impl From<Scalar> for f64 {
+    fn from(scalar: Scalar) -> Self {
+        scalar.0
+    }
+}
+
+impl Neg for Scalar {
+    type Output = Self;
+
+    fn neg(self) -> Self::Output {
+        Self::new(-self.0)
+    }
+}
+
+impl<T: Into<Self>> Add<T> for Scalar {
+    type Output = Self;
+
+    fn add(self, rhs: T) -> Self::Output {
+        Self::new(self.0 + rhs.into().0)
+    }
+}
+
+impl<T: Into<Self>> AddAssign<T> for Scalar {
+    fn add_assign(&mut self, rhs: T) {
+        *self = *self + rhs.into();
+    }
+}
+
+impl<T: Into<Self>> Sub<T> for Scalar {
+    type Output = Self;
+
+    fn sub(self, rhs: T) -> Self::Output {
+        Self::new(self.0 - rhs.into().0)
+    }
+}
+
+impl<T: Into<Self>> SubAssign<T> for Scalar {
+    fn sub_assign(&mut self, rhs: T) {
+        *self = *self - rhs.into();
+    }
+}
+
+impl<T: Into<Self>> Mul<T> for Scalar {
+    type Output = Self;
+
+    fn mul(self, rhs: T) -> Self::Output {
+        Self::new(self.0 * rhs.into().0)
+    }
+}
+
+impl<T: Into<Self>> MulAssign<T> for Scalar {
+    fn mul_assign(&mut self, rhs: T) {
+        *self = *self * rhs.into();
+    }
+}
+
+impl<T: Into<Self>> Div<T> for Scalar {
+    type Output = Self;
+
+    fn div(self, rhs: T) -> Self::Output {
+        Self::new(self.0 / rhs.into().0)
+    }
+}
+
+impl<T: Into<Self>> DivAssign<T> for Scalar {
+    fn div_assign(&mut self, rhs: T) {
+        *self = *self / rhs.into();
+    }
+}
+
+impl<T: Into<Self>> Rem<T> for Scalar {
+    type Output = Self;
+
+    fn rem(self, rhs: T) -> Self::Output {
+        Self::new(self.0 % rhs.into().0)
+    }
+}
+
+impl<T: Into<Self>> RemAssign<T> for Scalar {
+    fn rem_assign(&mut self, rhs: T) {
+        *self = *self % rhs.into();
+    }
+}
+
+impl Sum for Scalar {
+    fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
+        Self::new(iter.map(|s| s.0).sum())
+    }
+}
+
+impl<'a> Sum<&'a Self> for Scalar {
+    fn sum<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
+        Self::new(iter.map(|s| s.0).sum())
+    }
+}
author	Laurenz <laurmaedje@gmail.com>	2024-05-13 17:25:43 +0200
committer	GitHub <noreply@github.com>	2024-05-13 15:25:43 +0000
commit	95cd6adf24cb14ede8896fbb0c610432960f4f3a (patch)
tree	bade59888a5268987eb4fae2dc30ddeaf28ad193 /crates/typst-utils
parent	7b656b3deb5f05e6e56d666622643a5cde8c435a (diff)