summaryrefslogtreecommitdiff
path: root/src/util
diff options
context:
space:
mode:
Diffstat (limited to 'src/util')
-rw-r--r--src/util/eco.rs412
-rw-r--r--src/util/mod.rs155
2 files changed, 567 insertions, 0 deletions
diff --git a/src/util/eco.rs b/src/util/eco.rs
new file mode 100644
index 00000000..a52a163c
--- /dev/null
+++ b/src/util/eco.rs
@@ -0,0 +1,412 @@
+//! An economical string.
+
+use std::borrow::Borrow;
+use std::cmp::Ordering;
+use std::fmt::{self, Debug, Display, Formatter, Write};
+use std::hash::{Hash, Hasher};
+use std::ops::{Add, AddAssign, Deref};
+use std::rc::Rc;
+
+/// An economical string with inline storage and clone-on-write value semantics.
+#[derive(Clone)]
+pub struct EcoString(Repr);
+
+/// The internal representation. Either:
+/// - inline when below a certain number of bytes,
+/// - or reference-counted on the heap with COW semantics.
+#[derive(Clone)]
+enum Repr {
+ Small { buf: [u8; LIMIT], len: u8 },
+ Large(Rc<String>),
+}
+
+/// The maximum number of bytes that can be stored inline.
+///
+/// The value is chosen such that an `EcoString` fits exactly into 16 bytes
+/// (which are needed anyway due to the `Rc`s alignment, at least on 64-bit
+/// platforms).
+///
+/// Must be at least 4 to hold any char.
+const LIMIT: usize = 14;
+
+impl EcoString {
+ /// Create a new, empty string.
+ pub fn new() -> Self {
+ Self(Repr::Small { buf: [0; LIMIT], len: 0 })
+ }
+
+ /// Create a new, empty string with the given `capacity`.
+ pub fn with_capacity(capacity: usize) -> Self {
+ if capacity <= LIMIT {
+ Self::new()
+ } else {
+ Self(Repr::Large(Rc::new(String::with_capacity(capacity))))
+ }
+ }
+
+ /// Create an instance from an existing string-like type.
+ pub fn from_str<S>(s: S) -> Self
+ where
+ S: AsRef<str> + Into<String>,
+ {
+ let slice = s.as_ref();
+ let len = slice.len();
+ Self(if len <= LIMIT {
+ let mut buf = [0; LIMIT];
+ buf[.. len].copy_from_slice(slice.as_bytes());
+ Repr::Small { buf, len: len as u8 }
+ } else {
+ Repr::Large(Rc::new(s.into()))
+ })
+ }
+
+ /// Whether the string is empty.
+ pub fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+
+ /// The length of the string in bytes.
+ pub fn len(&self) -> usize {
+ match &self.0 {
+ Repr::Small { len, .. } => usize::from(*len),
+ Repr::Large(string) => string.len(),
+ }
+ }
+
+ /// A string slice containing the entire string.
+ pub fn as_str(&self) -> &str {
+ self
+ }
+
+ /// Append the given character at the end.
+ pub fn push(&mut self, c: char) {
+ match &mut self.0 {
+ Repr::Small { buf, len } => {
+ let prev = usize::from(*len);
+ if c.len_utf8() == 1 && prev < LIMIT {
+ buf[prev] = c as u8;
+ *len += 1;
+ } else {
+ self.push_str(c.encode_utf8(&mut [0; 4]));
+ }
+ }
+ Repr::Large(rc) => Rc::make_mut(rc).push(c),
+ }
+ }
+
+ /// Append the given string slice at the end.
+ pub fn push_str(&mut self, string: &str) {
+ match &mut self.0 {
+ Repr::Small { buf, len } => {
+ let prev = usize::from(*len);
+ let new = prev + string.len();
+ if new <= LIMIT {
+ buf[prev .. new].copy_from_slice(string.as_bytes());
+ *len = new as u8;
+ } else {
+ let mut spilled = String::with_capacity(new);
+ spilled.push_str(self);
+ spilled.push_str(string);
+ self.0 = Repr::Large(Rc::new(spilled));
+ }
+ }
+ Repr::Large(rc) => Rc::make_mut(rc).push_str(string),
+ }
+ }
+
+ /// Remove the last character from the string.
+ pub fn pop(&mut self) -> Option<char> {
+ let c = self.as_str().chars().rev().next()?;
+ match &mut self.0 {
+ Repr::Small { len, .. } => {
+ *len -= c.len_utf8() as u8;
+ }
+ Repr::Large(rc) => {
+ Rc::make_mut(rc).pop();
+ }
+ }
+ Some(c)
+ }
+
+ /// Clear the string.
+ pub fn clear(&mut self) {
+ match &mut self.0 {
+ Repr::Small { len, .. } => *len = 0,
+ Repr::Large(rc) => {
+ if Rc::strong_count(rc) == 1 {
+ Rc::make_mut(rc).clear();
+ } else {
+ *self = Self::new();
+ }
+ }
+ }
+ }
+
+ /// Repeat this string `n` times.
+ pub fn repeat(&self, n: usize) -> Self {
+ if let Repr::Small { buf, len } = &self.0 {
+ let prev = usize::from(*len);
+ let new = prev.saturating_mul(n);
+ if new <= LIMIT {
+ let src = &buf[.. prev];
+ let mut buf = [0; LIMIT];
+ for i in 0 .. n {
+ buf[prev * i .. prev * (i + 1)].copy_from_slice(src);
+ }
+ return Self(Repr::Small { buf, len: new as u8 });
+ }
+ }
+
+ self.as_str().repeat(n).into()
+ }
+}
+
+impl From<&Self> for EcoString {
+ fn from(s: &Self) -> Self {
+ s.clone()
+ }
+}
+
+impl From<char> for EcoString {
+ fn from(c: char) -> Self {
+ let mut buf = [0; LIMIT];
+ let len = c.encode_utf8(&mut buf).len();
+ Self(Repr::Small { buf, len: len as u8 })
+ }
+}
+
+impl From<&str> for EcoString {
+ fn from(s: &str) -> Self {
+ Self::from_str(s)
+ }
+}
+
+impl From<String> for EcoString {
+ fn from(s: String) -> Self {
+ Self::from_str(s)
+ }
+}
+
+impl From<&String> for EcoString {
+ fn from(s: &String) -> Self {
+ Self::from_str(s)
+ }
+}
+
+impl Deref for EcoString {
+ type Target = str;
+
+ fn deref(&self) -> &str {
+ match &self.0 {
+ Repr::Small { buf, len } => unsafe {
+ std::str::from_utf8_unchecked(&buf[.. usize::from(*len)])
+ },
+ Repr::Large(string) => string.as_str(),
+ }
+ }
+}
+
+impl AsRef<str> for EcoString {
+ fn as_ref(&self) -> &str {
+ self
+ }
+}
+
+impl Borrow<str> for EcoString {
+ fn borrow(&self) -> &str {
+ self
+ }
+}
+
+impl Default for EcoString {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl Display for EcoString {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ Display::fmt(self.as_str(), f)
+ }
+}
+
+impl Debug for EcoString {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ Debug::fmt(self.as_str(), f)
+ }
+}
+
+impl Eq for EcoString {}
+
+impl PartialEq for EcoString {
+ fn eq(&self, other: &Self) -> bool {
+ self.as_str().eq(other.as_str())
+ }
+}
+
+impl PartialEq<str> for EcoString {
+ fn eq(&self, other: &str) -> bool {
+ self.as_str().eq(other)
+ }
+}
+
+impl PartialEq<&str> for EcoString {
+ fn eq(&self, other: &&str) -> bool {
+ self.as_str().eq(*other)
+ }
+}
+
+impl PartialEq<String> for EcoString {
+ fn eq(&self, other: &String) -> bool {
+ self.as_str().eq(other.as_str())
+ }
+}
+
+impl Ord for EcoString {
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.as_str().cmp(other.as_str())
+ }
+}
+
+impl PartialOrd for EcoString {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ self.as_str().partial_cmp(other.as_str())
+ }
+}
+
+impl Add<&Self> for EcoString {
+ type Output = Self;
+
+ fn add(self, rhs: &Self) -> Self::Output {
+ self + rhs.as_str()
+ }
+}
+
+impl Add<&str> for EcoString {
+ type Output = Self;
+
+ fn add(mut self, rhs: &str) -> Self::Output {
+ self.push_str(rhs);
+ self
+ }
+}
+
+impl AddAssign<&str> for EcoString {
+ fn add_assign(&mut self, rhs: &str) {
+ self.push_str(rhs);
+ }
+}
+
+impl Hash for EcoString {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ self.as_str().hash(state);
+ }
+}
+
+impl Write for EcoString {
+ fn write_str(&mut self, s: &str) -> fmt::Result {
+ self.push_str(s);
+ Ok(())
+ }
+
+ fn write_char(&mut self, c: char) -> fmt::Result {
+ self.push(c);
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ const ALPH: &str = "abcdefghijklmnopqrstuvwxyz";
+
+ #[test]
+ fn test_str_new() {
+ // Test inline strings.
+ assert_eq!(EcoString::new(), "");
+ assert_eq!(EcoString::from('a'), "a");
+ assert_eq!(EcoString::from('😀'), "😀");
+ assert_eq!(EcoString::from("abc"), "abc");
+
+ // Test around the inline limit.
+ assert_eq!(EcoString::from(&ALPH[.. LIMIT - 1]), ALPH[.. LIMIT - 1]);
+ assert_eq!(EcoString::from(&ALPH[.. LIMIT]), ALPH[.. LIMIT]);
+ assert_eq!(EcoString::from(&ALPH[.. LIMIT + 1]), ALPH[.. LIMIT + 1]);
+
+ // Test heap string.
+ assert_eq!(EcoString::from(ALPH), ALPH);
+ }
+
+ #[test]
+ fn test_str_push() {
+ let mut v = EcoString::new();
+ v.push('a');
+ v.push('b');
+ v.push_str("cd😀");
+ assert_eq!(v, "abcd😀");
+ assert_eq!(v.len(), 8);
+
+ // Test fully filling the inline storage.
+ v.push_str("efghij");
+ assert_eq!(v.len(), LIMIT);
+
+ // Test spilling with `push`.
+ let mut a = v.clone();
+ a.push('k');
+ assert_eq!(a, "abcd😀efghijk");
+ assert_eq!(a.len(), 15);
+
+ // Test spilling with `push_str`.
+ let mut b = v.clone();
+ b.push_str("klmn");
+ assert_eq!(b, "abcd😀efghijklmn");
+ assert_eq!(b.len(), 18);
+
+ // v should be unchanged.
+ assert_eq!(v.len(), LIMIT);
+ }
+
+ #[test]
+ fn test_str_pop() {
+ // Test with inline string.
+ let mut v = EcoString::from("Hello World!");
+ assert_eq!(v.pop(), Some('!'));
+ assert_eq!(v, "Hello World");
+
+ // Remove one-by-one.
+ for _ in 0 .. 10 {
+ v.pop();
+ }
+
+ assert_eq!(v, "H");
+ assert_eq!(v.pop(), Some('H'));
+ assert_eq!(v, "");
+ assert!(v.is_empty());
+
+ // Test with large string.
+ let mut v = EcoString::from(ALPH);
+ assert_eq!(v.pop(), Some('z'));
+ assert_eq!(v.len(), 25);
+ }
+
+ #[test]
+ fn test_str_index() {
+ // Test that we can use the index syntax.
+ let v = EcoString::from("abc");
+ assert_eq!(&v[.. 2], "ab");
+ }
+
+ #[test]
+ fn test_str_repeat() {
+ // Test with empty string.
+ assert_eq!(EcoString::new().repeat(0), "");
+ assert_eq!(EcoString::new().repeat(100), "");
+
+ // Test non-spilling and spilling case.
+ let v = EcoString::from("abc");
+ assert_eq!(v.repeat(0), "");
+ assert_eq!(v.repeat(3), "abcabcabc");
+ assert_eq!(v.repeat(5), "abcabcabcabcabc");
+ }
+}
diff --git a/src/util/mod.rs b/src/util/mod.rs
new file mode 100644
index 00000000..2c53bc70
--- /dev/null
+++ b/src/util/mod.rs
@@ -0,0 +1,155 @@
+//! Utilities.
+
+mod eco;
+
+pub use eco::EcoString;
+
+use std::cmp::Ordering;
+use std::ops::Range;
+use std::path::{Component, Path, PathBuf};
+
+/// Additional methods for options.
+pub trait OptionExt<T> {
+ /// Replace `self` with `other` if `self` is `Some`.
+ fn and_set(&mut self, other: Option<T>);
+
+ /// Sets `other` as the value if `self` is `None` or if it contains a value
+ /// larger than `other`.
+ fn set_min(&mut self, other: T)
+ where
+ T: Ord;
+
+ /// Sets `other` as the value if `self` is `None` or if it contains a value
+ /// smaller than `other`.
+ fn set_max(&mut self, other: T)
+ where
+ T: Ord;
+}
+
+impl<T> OptionExt<T> for Option<T> {
+ fn and_set(&mut self, other: Option<T>) {
+ if self.is_some() {
+ *self = other;
+ }
+ }
+
+ fn set_min(&mut self, other: T)
+ where
+ T: Ord,
+ {
+ if self.as_ref().map_or(true, |x| other < *x) {
+ *self = Some(other);
+ }
+ }
+
+ fn set_max(&mut self, other: T)
+ where
+ T: Ord,
+ {
+ if self.as_ref().map_or(true, |x| other > *x) {
+ *self = Some(other);
+ }
+ }
+}
+
+/// Additional methods for slices.
+pub trait SliceExt<T> {
+ /// Split a slice into consecutive groups with the same key.
+ ///
+ /// Returns an iterator of pairs of a key and the group with that key.
+ fn group_by_key<K, F>(&self, f: F) -> GroupByKey<'_, T, F>
+ where
+ F: FnMut(&T) -> K,
+ K: PartialEq;
+}
+
+impl<T> SliceExt<T> for [T] {
+ fn group_by_key<K, F>(&self, f: F) -> GroupByKey<'_, T, F>
+ where
+ F: FnMut(&T) -> K,
+ K: PartialEq,
+ {
+ GroupByKey { slice: self, f }
+ }
+}
+
+/// This struct is produced by [`SliceExt::group_by_key`].
+pub struct GroupByKey<'a, T, F> {
+ slice: &'a [T],
+ f: F,
+}
+
+impl<'a, T, K, F> Iterator for GroupByKey<'a, T, F>
+where
+ F: FnMut(&T) -> K,
+ K: PartialEq,
+{
+ type Item = (K, &'a [T]);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let first = self.slice.first()?;
+ let key = (self.f)(first);
+
+ let mut i = 1;
+ while self.slice.get(i).map_or(false, |t| (self.f)(t) == key) {
+ i += 1;
+ }
+
+ let (head, tail) = self.slice.split_at(i);
+ self.slice = tail;
+ Some((key, head))
+ }
+}
+
+/// Additional methods for [`Range<usize>`].
+pub trait RangeExt {
+ /// Locate a position relative to a range.
+ ///
+ /// This can be used for binary searching the range that contains the
+ /// position as follows:
+ /// ```
+ /// # use typst::util::RangeExt;
+ /// assert_eq!(
+ /// [1..2, 2..7, 7..10].binary_search_by(|r| r.locate(5)),
+ /// Ok(1),
+ /// );
+ /// ```
+ fn locate(&self, pos: usize) -> Ordering;
+}
+
+impl RangeExt for Range<usize> {
+ fn locate(&self, pos: usize) -> Ordering {
+ if pos < self.start {
+ Ordering::Greater
+ } else if pos < self.end {
+ Ordering::Equal
+ } else {
+ Ordering::Less
+ }
+ }
+}
+
+/// Additional methods for [`Path`].
+pub trait PathExt {
+ /// Lexically normalize a path.
+ fn normalize(&self) -> PathBuf;
+}
+
+impl PathExt for Path {
+ fn normalize(&self) -> PathBuf {
+ let mut out = PathBuf::new();
+ for component in self.components() {
+ match component {
+ Component::CurDir => {}
+ Component::ParentDir => match out.components().next_back() {
+ Some(Component::Normal(_)) => {
+ out.pop();
+ }
+ _ => out.push(component),
+ },
+ _ => out.push(component),
+ }
+ }
+ out
+ }
+}