From a1d47695a2af5afa466c21ad812a1a8212780293 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Thu, 23 Feb 2023 12:15:38 +0100 Subject: Switch to ecow --- src/util/buffer.rs | 12 +- src/util/eco.rs | 515 ----------------------------------------------------- src/util/mod.rs | 3 - 3 files changed, 3 insertions(+), 527 deletions(-) delete mode 100644 src/util/eco.rs (limited to 'src/util') diff --git a/src/util/buffer.rs b/src/util/buffer.rs index 766b2084..da12b6cb 100644 --- a/src/util/buffer.rs +++ b/src/util/buffer.rs @@ -6,7 +6,7 @@ use comemo::Prehashed; /// A shared buffer that is cheap to clone and hash. #[derive(Clone, Hash, Eq, PartialEq)] -pub struct Buffer(Prehashed>>); +pub struct Buffer(Arc>>); impl Buffer { /// Return a view into the buffer. @@ -22,19 +22,13 @@ impl Buffer { impl From<&[u8]> for Buffer { fn from(slice: &[u8]) -> Self { - Self(Prehashed::new(Arc::new(slice.to_vec()))) + Self(Arc::new(Prehashed::new(slice.to_vec()))) } } impl From> for Buffer { fn from(vec: Vec) -> Self { - Self(Prehashed::new(Arc::new(vec))) - } -} - -impl From>> for Buffer { - fn from(arc: Arc>) -> Self { - Self(Prehashed::new(arc)) + Self(Arc::new(Prehashed::new(vec))) } } diff --git a/src/util/eco.rs b/src/util/eco.rs deleted file mode 100644 index ceaac2f5..00000000 --- a/src/util/eco.rs +++ /dev/null @@ -1,515 +0,0 @@ -use std::borrow::{Borrow, Cow}; -use std::cmp::Ordering; -use std::fmt::{self, Debug, Display, Formatter, Write}; -use std::hash::{Hash, Hasher}; -use std::ops::{Add, AddAssign, Deref}; -use std::sync::Arc; - -use super::ArcExt; - -/// Create a new [`EcoString`] from a format string. -#[macro_export] -#[doc(hidden)] -macro_rules! __format_eco { - ($($tts:tt)*) => {{ - use std::fmt::Write; - let mut s = $crate::util::EcoString::new(); - write!(s, $($tts)*).unwrap(); - s - }}; -} - -#[doc(inline)] -pub use crate::__format_eco as format_eco; - -/// An economical string with inline storage and clone-on-write semantics. -#[derive(Clone)] -pub struct EcoString(Repr); - -/// The internal representation. Either: -/// - inline when below a certain number of bytes, or -/// - reference-counted on the heap with clone-on-write semantics. -#[derive(Clone)] -enum Repr { - Small { buf: [u8; LIMIT], len: u8 }, - Large(Arc), -} - -/// The maximum number of bytes that can be stored inline. -/// -/// The value is chosen such that an `EcoString` fits exactly into 16 bytes -/// (which are needed anyway due to the `Arc`s alignment, at least on 64-bit -/// platforms). -/// -/// Must be at least 4 to hold any char. -const LIMIT: usize = 14; - -impl EcoString { - /// Create a new, empty string. - pub const fn new() -> Self { - Self(Repr::Small { buf: [0; LIMIT], len: 0 }) - } - - /// Create a new, empty string with the given `capacity`. - pub fn with_capacity(capacity: usize) -> Self { - if capacity <= LIMIT { - Self::new() - } else { - Self(Repr::Large(Arc::new(String::with_capacity(capacity)))) - } - } - - /// Create an instance from an existing string-like type. - pub fn from_str_like(s: S) -> Self - where - S: AsRef + Into, - { - let slice = s.as_ref(); - let len = slice.len(); - Self(if len <= LIMIT { - let mut buf = [0; LIMIT]; - buf[..len].copy_from_slice(slice.as_bytes()); - Repr::Small { buf, len: len as u8 } - } else { - Repr::Large(Arc::new(s.into())) - }) - } - - /// Whether the string is empty. - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// The length of the string in bytes. - pub fn len(&self) -> usize { - match &self.0 { - Repr::Small { len, .. } => usize::from(*len), - Repr::Large(string) => string.len(), - } - } - - /// A string slice containing the entire string. - pub fn as_str(&self) -> &str { - self - } - - /// Append the given character at the end. - pub fn push(&mut self, c: char) { - match &mut self.0 { - Repr::Small { buf, len } => { - let prev = usize::from(*len); - if c.len_utf8() == 1 && prev < LIMIT { - buf[prev] = c as u8; - *len += 1; - } else { - self.push_str(c.encode_utf8(&mut [0; 4])); - } - } - Repr::Large(rc) => Arc::make_mut(rc).push(c), - } - } - - /// Append the given string slice at the end. - pub fn push_str(&mut self, string: &str) { - match &mut self.0 { - Repr::Small { buf, len } => { - let prev = usize::from(*len); - let new = prev + string.len(); - if new <= LIMIT { - buf[prev..new].copy_from_slice(string.as_bytes()); - *len = new as u8; - } else { - let mut spilled = String::with_capacity(new); - spilled.push_str(self); - spilled.push_str(string); - self.0 = Repr::Large(Arc::new(spilled)); - } - } - Repr::Large(rc) => Arc::make_mut(rc).push_str(string), - } - } - - /// Remove the last character from the string. - pub fn pop(&mut self) -> Option { - let c = self.as_str().chars().rev().next()?; - match &mut self.0 { - Repr::Small { len, .. } => { - *len -= c.len_utf8() as u8; - } - Repr::Large(rc) => { - Arc::make_mut(rc).pop(); - } - } - Some(c) - } - - /// Clear the string. - pub fn clear(&mut self) { - match &mut self.0 { - Repr::Small { len, .. } => *len = 0, - Repr::Large(rc) => { - if Arc::strong_count(rc) == 1 { - Arc::make_mut(rc).clear(); - } else { - *self = Self::new(); - } - } - } - } - - /// Convert the string to lowercase. - pub fn to_lowercase(&self) -> Self { - if let Repr::Small { mut buf, len } = self.0 { - if self.is_ascii() { - buf[..usize::from(len)].make_ascii_lowercase(); - return Self(Repr::Small { buf, len }); - } - } - - self.as_str().to_lowercase().into() - } - - /// Convert the string to uppercase. - pub fn to_uppercase(&self) -> Self { - if let Repr::Small { mut buf, len } = self.0 { - if self.is_ascii() { - buf[..usize::from(len)].make_ascii_uppercase(); - return Self(Repr::Small { buf, len }); - } - } - - self.as_str().to_uppercase().into() - } - - /// Repeat this string `n` times. - pub fn repeat(&self, n: usize) -> Self { - if n == 0 { - return Self::new(); - } - - if let Repr::Small { buf, len } = self.0 { - let prev = usize::from(len); - let new = prev.saturating_mul(n); - if new <= LIMIT { - let src = &buf[..prev]; - let mut buf = [0; LIMIT]; - for i in 0..n { - buf[prev * i..prev * (i + 1)].copy_from_slice(src); - } - return Self(Repr::Small { buf, len: new as u8 }); - } - } - - self.as_str().repeat(n).into() - } -} - -impl Deref for EcoString { - type Target = str; - - fn deref(&self) -> &str { - match &self.0 { - // Safety: - // The buffer contents stem from correct UTF-8 sources: - // - Valid ASCII characters - // - Other string slices - // - Chars that were encoded with char::encode_utf8 - // Furthermore, we still do the bounds-check on the len in case - // it gets corrupted somehow. - Repr::Small { buf, len } => unsafe { - std::str::from_utf8_unchecked(&buf[..usize::from(*len)]) - }, - Repr::Large(string) => string.as_str(), - } - } -} - -impl Default for EcoString { - fn default() -> Self { - Self::new() - } -} - -impl Debug for EcoString { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - Debug::fmt(self.as_str(), f) - } -} - -impl Display for EcoString { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - Display::fmt(self.as_str(), f) - } -} - -impl Eq for EcoString {} - -impl PartialEq for EcoString { - fn eq(&self, other: &Self) -> bool { - self.as_str().eq(other.as_str()) - } -} - -impl PartialEq for EcoString { - fn eq(&self, other: &str) -> bool { - self.as_str().eq(other) - } -} - -impl PartialEq<&str> for EcoString { - fn eq(&self, other: &&str) -> bool { - self.as_str().eq(*other) - } -} - -impl PartialEq for str { - fn eq(&self, other: &EcoString) -> bool { - self.eq(other.as_str()) - } -} - -impl PartialEq for &str { - fn eq(&self, other: &EcoString) -> bool { - (*self).eq(other.as_str()) - } -} - -impl Ord for EcoString { - fn cmp(&self, other: &Self) -> Ordering { - self.as_str().cmp(other.as_str()) - } -} - -impl PartialOrd for EcoString { - fn partial_cmp(&self, other: &Self) -> Option { - self.as_str().partial_cmp(other.as_str()) - } -} - -impl Hash for EcoString { - fn hash(&self, state: &mut H) { - self.as_str().hash(state); - } -} - -impl Write for EcoString { - fn write_str(&mut self, s: &str) -> fmt::Result { - self.push_str(s); - Ok(()) - } - - fn write_char(&mut self, c: char) -> fmt::Result { - self.push(c); - Ok(()) - } -} - -impl Add for EcoString { - type Output = Self; - - fn add(mut self, rhs: Self) -> Self::Output { - self += rhs; - self - } -} - -impl AddAssign for EcoString { - fn add_assign(&mut self, rhs: Self) { - self.push_str(rhs.as_str()); - } -} - -impl AsRef for EcoString { - fn as_ref(&self) -> &str { - self - } -} - -impl Borrow for EcoString { - fn borrow(&self) -> &str { - self - } -} - -impl From for EcoString { - fn from(c: char) -> Self { - let mut buf = [0; LIMIT]; - let len = c.encode_utf8(&mut buf).len(); - Self(Repr::Small { buf, len: len as u8 }) - } -} - -impl From<&str> for EcoString { - fn from(s: &str) -> Self { - Self::from_str_like(s) - } -} - -impl From for EcoString { - fn from(s: String) -> Self { - Self::from_str_like(s) - } -} - -impl From> for EcoString { - fn from(s: Cow) -> Self { - match s { - Cow::Borrowed(s) => s.into(), - Cow::Owned(s) => s.into(), - } - } -} - -impl FromIterator for EcoString { - fn from_iter>(iter: T) -> Self { - let mut s = Self::new(); - for c in iter { - s.push(c); - } - s - } -} - -impl FromIterator for EcoString { - fn from_iter>(iter: T) -> Self { - let mut s = Self::new(); - for piece in iter { - s.push_str(&piece); - } - s - } -} - -impl Extend for EcoString { - fn extend>(&mut self, iter: T) { - for c in iter { - self.push(c); - } - } -} - -impl From for String { - fn from(s: EcoString) -> Self { - match s.0 { - Repr::Small { .. } => s.as_str().to_owned(), - Repr::Large(rc) => Arc::take(rc), - } - } -} - -impl From<&EcoString> for String { - fn from(s: &EcoString) -> Self { - s.as_str().to_owned() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - const ALPH: &str = "abcdefghijklmnopqrstuvwxyz"; - - #[test] - fn test_str_new() { - // Test inline strings. - assert_eq!(EcoString::new(), ""); - assert_eq!(EcoString::from('a'), "a"); - assert_eq!(EcoString::from('😀'), "😀"); - assert_eq!(EcoString::from("abc"), "abc"); - - // Test around the inline limit. - assert_eq!(EcoString::from(&ALPH[..LIMIT - 1]), ALPH[..LIMIT - 1]); - assert_eq!(EcoString::from(&ALPH[..LIMIT]), ALPH[..LIMIT]); - assert_eq!(EcoString::from(&ALPH[..LIMIT + 1]), ALPH[..LIMIT + 1]); - - // Test heap string. - assert_eq!(EcoString::from(ALPH), ALPH); - } - - #[test] - fn test_str_push() { - let mut v = EcoString::new(); - v.push('a'); - v.push('b'); - v.push_str("cd😀"); - assert_eq!(v, "abcd😀"); - assert_eq!(v.len(), 8); - - // Test fully filling the inline storage. - v.push_str("efghij"); - assert_eq!(v.len(), LIMIT); - - // Test spilling with `push`. - let mut a = v.clone(); - a.push('k'); - assert_eq!(a, "abcd😀efghijk"); - assert_eq!(a.len(), 15); - - // Test spilling with `push_str`. - let mut b = v.clone(); - b.push_str("klmn"); - assert_eq!(b, "abcd😀efghijklmn"); - assert_eq!(b.len(), 18); - - // v should be unchanged. - assert_eq!(v.len(), LIMIT); - } - - #[test] - fn test_str_pop() { - // Test with inline string. - let mut v = EcoString::from("Hello World!"); - assert_eq!(v.pop(), Some('!')); - assert_eq!(v, "Hello World"); - - // Remove one-by-one. - for _ in 0..10 { - v.pop(); - } - - assert_eq!(v, "H"); - assert_eq!(v.pop(), Some('H')); - assert_eq!(v, ""); - assert!(v.is_empty()); - - // Test with large string. - let mut v = EcoString::from(ALPH); - assert_eq!(v.pop(), Some('z')); - assert_eq!(v.len(), 25); - } - - #[test] - fn test_str_index() { - // Test that we can use the index syntax. - let v = EcoString::from("abc"); - assert_eq!(&v[..2], "ab"); - } - - #[test] - fn test_str_case() { - assert_eq!(EcoString::new().to_uppercase(), ""); - assert_eq!(EcoString::from("abc").to_uppercase(), "ABC"); - assert_eq!(EcoString::from("AΣ").to_lowercase(), "aς"); - assert_eq!( - EcoString::from("a").repeat(100).to_uppercase(), - EcoString::from("A").repeat(100) - ); - assert_eq!( - EcoString::from("Ö").repeat(20).to_lowercase(), - EcoString::from("ö").repeat(20) - ); - } - - #[test] - fn test_str_repeat() { - // Test with empty string. - assert_eq!(EcoString::new().repeat(0), ""); - assert_eq!(EcoString::new().repeat(100), ""); - - // Test non-spilling and spilling case. - let v = EcoString::from("abc"); - assert_eq!(v.repeat(0), ""); - assert_eq!(v.repeat(3), "abcabcabc"); - assert_eq!(v.repeat(5), "abcabcabcabcabc"); - } -} diff --git a/src/util/mod.rs b/src/util/mod.rs index b20dd053..ce0ed1aa 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -2,12 +2,9 @@ pub mod fat; -#[macro_use] -mod eco; mod buffer; pub use buffer::Buffer; -pub use eco::{format_eco, EcoString}; use std::any::TypeId; use std::fmt::{self, Debug, Formatter}; -- cgit v1.2.3