From 312dcd070cf79c1dd5503f90ef10588fe4612108 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Thu, 29 Jul 2021 11:35:49 +0200 Subject: Move EcoString and OptionExt into util --- src/eco.rs | 412 ---------------------------------------------- src/eval/dict.rs | 4 +- src/eval/function.rs | 2 +- src/eval/mod.rs | 2 +- src/eval/template.rs | 2 +- src/eval/value.rs | 2 +- src/exec/context.rs | 2 +- src/exec/mod.rs | 2 +- src/layout/incremental.rs | 45 +---- src/layout/mod.rs | 1 + src/layout/par.rs | 3 +- src/lib.rs | 1 - src/library/mod.rs | 2 +- src/parse/mod.rs | 2 +- src/parse/resolve.rs | 2 +- src/syntax/ident.rs | 2 +- src/syntax/mod.rs | 2 +- src/util.rs | 107 ------------ src/util/eco.rs | 412 ++++++++++++++++++++++++++++++++++++++++++++++ src/util/mod.rs | 155 +++++++++++++++++ 20 files changed, 586 insertions(+), 576 deletions(-) delete mode 100644 src/eco.rs delete mode 100644 src/util.rs create mode 100644 src/util/eco.rs create mode 100644 src/util/mod.rs diff --git a/src/eco.rs b/src/eco.rs deleted file mode 100644 index a52a163c..00000000 --- a/src/eco.rs +++ /dev/null @@ -1,412 +0,0 @@ -//! An economical string. - -use std::borrow::Borrow; -use std::cmp::Ordering; -use std::fmt::{self, Debug, Display, Formatter, Write}; -use std::hash::{Hash, Hasher}; -use std::ops::{Add, AddAssign, Deref}; -use std::rc::Rc; - -/// An economical string with inline storage and clone-on-write value semantics. -#[derive(Clone)] -pub struct EcoString(Repr); - -/// The internal representation. Either: -/// - inline when below a certain number of bytes, -/// - or reference-counted on the heap with COW semantics. -#[derive(Clone)] -enum Repr { - Small { buf: [u8; LIMIT], len: u8 }, - Large(Rc), -} - -/// The maximum number of bytes that can be stored inline. -/// -/// The value is chosen such that an `EcoString` fits exactly into 16 bytes -/// (which are needed anyway due to the `Rc`s alignment, at least on 64-bit -/// platforms). -/// -/// Must be at least 4 to hold any char. -const LIMIT: usize = 14; - -impl EcoString { - /// Create a new, empty string. - pub fn new() -> Self { - Self(Repr::Small { buf: [0; LIMIT], len: 0 }) - } - - /// Create a new, empty string with the given `capacity`. - pub fn with_capacity(capacity: usize) -> Self { - if capacity <= LIMIT { - Self::new() - } else { - Self(Repr::Large(Rc::new(String::with_capacity(capacity)))) - } - } - - /// Create an instance from an existing string-like type. - pub fn from_str(s: S) -> Self - where - S: AsRef + Into, - { - let slice = s.as_ref(); - let len = slice.len(); - Self(if len <= LIMIT { - let mut buf = [0; LIMIT]; - buf[.. len].copy_from_slice(slice.as_bytes()); - Repr::Small { buf, len: len as u8 } - } else { - Repr::Large(Rc::new(s.into())) - }) - } - - /// Whether the string is empty. - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// The length of the string in bytes. - pub fn len(&self) -> usize { - match &self.0 { - Repr::Small { len, .. } => usize::from(*len), - Repr::Large(string) => string.len(), - } - } - - /// A string slice containing the entire string. - pub fn as_str(&self) -> &str { - self - } - - /// Append the given character at the end. - pub fn push(&mut self, c: char) { - match &mut self.0 { - Repr::Small { buf, len } => { - let prev = usize::from(*len); - if c.len_utf8() == 1 && prev < LIMIT { - buf[prev] = c as u8; - *len += 1; - } else { - self.push_str(c.encode_utf8(&mut [0; 4])); - } - } - Repr::Large(rc) => Rc::make_mut(rc).push(c), - } - } - - /// Append the given string slice at the end. - pub fn push_str(&mut self, string: &str) { - match &mut self.0 { - Repr::Small { buf, len } => { - let prev = usize::from(*len); - let new = prev + string.len(); - if new <= LIMIT { - buf[prev .. new].copy_from_slice(string.as_bytes()); - *len = new as u8; - } else { - let mut spilled = String::with_capacity(new); - spilled.push_str(self); - spilled.push_str(string); - self.0 = Repr::Large(Rc::new(spilled)); - } - } - Repr::Large(rc) => Rc::make_mut(rc).push_str(string), - } - } - - /// Remove the last character from the string. - pub fn pop(&mut self) -> Option { - let c = self.as_str().chars().rev().next()?; - match &mut self.0 { - Repr::Small { len, .. } => { - *len -= c.len_utf8() as u8; - } - Repr::Large(rc) => { - Rc::make_mut(rc).pop(); - } - } - Some(c) - } - - /// Clear the string. - pub fn clear(&mut self) { - match &mut self.0 { - Repr::Small { len, .. } => *len = 0, - Repr::Large(rc) => { - if Rc::strong_count(rc) == 1 { - Rc::make_mut(rc).clear(); - } else { - *self = Self::new(); - } - } - } - } - - /// Repeat this string `n` times. - pub fn repeat(&self, n: usize) -> Self { - if let Repr::Small { buf, len } = &self.0 { - let prev = usize::from(*len); - let new = prev.saturating_mul(n); - if new <= LIMIT { - let src = &buf[.. prev]; - let mut buf = [0; LIMIT]; - for i in 0 .. n { - buf[prev * i .. prev * (i + 1)].copy_from_slice(src); - } - return Self(Repr::Small { buf, len: new as u8 }); - } - } - - self.as_str().repeat(n).into() - } -} - -impl From<&Self> for EcoString { - fn from(s: &Self) -> Self { - s.clone() - } -} - -impl From for EcoString { - fn from(c: char) -> Self { - let mut buf = [0; LIMIT]; - let len = c.encode_utf8(&mut buf).len(); - Self(Repr::Small { buf, len: len as u8 }) - } -} - -impl From<&str> for EcoString { - fn from(s: &str) -> Self { - Self::from_str(s) - } -} - -impl From for EcoString { - fn from(s: String) -> Self { - Self::from_str(s) - } -} - -impl From<&String> for EcoString { - fn from(s: &String) -> Self { - Self::from_str(s) - } -} - -impl Deref for EcoString { - type Target = str; - - fn deref(&self) -> &str { - match &self.0 { - Repr::Small { buf, len } => unsafe { - std::str::from_utf8_unchecked(&buf[.. usize::from(*len)]) - }, - Repr::Large(string) => string.as_str(), - } - } -} - -impl AsRef for EcoString { - fn as_ref(&self) -> &str { - self - } -} - -impl Borrow for EcoString { - fn borrow(&self) -> &str { - self - } -} - -impl Default for EcoString { - fn default() -> Self { - Self::new() - } -} - -impl Display for EcoString { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - Display::fmt(self.as_str(), f) - } -} - -impl Debug for EcoString { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - Debug::fmt(self.as_str(), f) - } -} - -impl Eq for EcoString {} - -impl PartialEq for EcoString { - fn eq(&self, other: &Self) -> bool { - self.as_str().eq(other.as_str()) - } -} - -impl PartialEq for EcoString { - fn eq(&self, other: &str) -> bool { - self.as_str().eq(other) - } -} - -impl PartialEq<&str> for EcoString { - fn eq(&self, other: &&str) -> bool { - self.as_str().eq(*other) - } -} - -impl PartialEq for EcoString { - fn eq(&self, other: &String) -> bool { - self.as_str().eq(other.as_str()) - } -} - -impl Ord for EcoString { - fn cmp(&self, other: &Self) -> Ordering { - self.as_str().cmp(other.as_str()) - } -} - -impl PartialOrd for EcoString { - fn partial_cmp(&self, other: &Self) -> Option { - self.as_str().partial_cmp(other.as_str()) - } -} - -impl Add<&Self> for EcoString { - type Output = Self; - - fn add(self, rhs: &Self) -> Self::Output { - self + rhs.as_str() - } -} - -impl Add<&str> for EcoString { - type Output = Self; - - fn add(mut self, rhs: &str) -> Self::Output { - self.push_str(rhs); - self - } -} - -impl AddAssign<&str> for EcoString { - fn add_assign(&mut self, rhs: &str) { - self.push_str(rhs); - } -} - -impl Hash for EcoString { - fn hash(&self, state: &mut H) { - self.as_str().hash(state); - } -} - -impl Write for EcoString { - fn write_str(&mut self, s: &str) -> fmt::Result { - self.push_str(s); - Ok(()) - } - - fn write_char(&mut self, c: char) -> fmt::Result { - self.push(c); - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - const ALPH: &str = "abcdefghijklmnopqrstuvwxyz"; - - #[test] - fn test_str_new() { - // Test inline strings. - assert_eq!(EcoString::new(), ""); - assert_eq!(EcoString::from('a'), "a"); - assert_eq!(EcoString::from('😀'), "😀"); - assert_eq!(EcoString::from("abc"), "abc"); - - // Test around the inline limit. - assert_eq!(EcoString::from(&ALPH[.. LIMIT - 1]), ALPH[.. LIMIT - 1]); - assert_eq!(EcoString::from(&ALPH[.. LIMIT]), ALPH[.. LIMIT]); - assert_eq!(EcoString::from(&ALPH[.. LIMIT + 1]), ALPH[.. LIMIT + 1]); - - // Test heap string. - assert_eq!(EcoString::from(ALPH), ALPH); - } - - #[test] - fn test_str_push() { - let mut v = EcoString::new(); - v.push('a'); - v.push('b'); - v.push_str("cd😀"); - assert_eq!(v, "abcd😀"); - assert_eq!(v.len(), 8); - - // Test fully filling the inline storage. - v.push_str("efghij"); - assert_eq!(v.len(), LIMIT); - - // Test spilling with `push`. - let mut a = v.clone(); - a.push('k'); - assert_eq!(a, "abcd😀efghijk"); - assert_eq!(a.len(), 15); - - // Test spilling with `push_str`. - let mut b = v.clone(); - b.push_str("klmn"); - assert_eq!(b, "abcd😀efghijklmn"); - assert_eq!(b.len(), 18); - - // v should be unchanged. - assert_eq!(v.len(), LIMIT); - } - - #[test] - fn test_str_pop() { - // Test with inline string. - let mut v = EcoString::from("Hello World!"); - assert_eq!(v.pop(), Some('!')); - assert_eq!(v, "Hello World"); - - // Remove one-by-one. - for _ in 0 .. 10 { - v.pop(); - } - - assert_eq!(v, "H"); - assert_eq!(v.pop(), Some('H')); - assert_eq!(v, ""); - assert!(v.is_empty()); - - // Test with large string. - let mut v = EcoString::from(ALPH); - assert_eq!(v.pop(), Some('z')); - assert_eq!(v.len(), 25); - } - - #[test] - fn test_str_index() { - // Test that we can use the index syntax. - let v = EcoString::from("abc"); - assert_eq!(&v[.. 2], "ab"); - } - - #[test] - fn test_str_repeat() { - // Test with empty string. - assert_eq!(EcoString::new().repeat(0), ""); - assert_eq!(EcoString::new().repeat(100), ""); - - // Test non-spilling and spilling case. - let v = EcoString::from("abc"); - assert_eq!(v.repeat(0), ""); - assert_eq!(v.repeat(3), "abcabcabc"); - assert_eq!(v.repeat(5), "abcabcabcabcabc"); - } -} diff --git a/src/eval/dict.rs b/src/eval/dict.rs index bf99ea17..c1c3f505 100644 --- a/src/eval/dict.rs +++ b/src/eval/dict.rs @@ -5,7 +5,7 @@ use std::ops::{Add, AddAssign}; use std::rc::Rc; use super::Value; -use crate::eco::EcoString; +use crate::util::EcoString; /// Create a new [`Dict`] from key-value pairs. #[macro_export] @@ -13,7 +13,7 @@ macro_rules! dict { ($($key:expr => $value:expr),* $(,)?) => {{ #[allow(unused_mut)] let mut map = std::collections::BTreeMap::new(); - $(map.insert($crate::eco::EcoString::from($key), $crate::eval::Value::from($value));)* + $(map.insert($crate::util::EcoString::from($key), $crate::eval::Value::from($value));)* $crate::eval::Dict::from_map(map) }}; } diff --git a/src/eval/function.rs b/src/eval/function.rs index daa5ff1e..ca447a48 100644 --- a/src/eval/function.rs +++ b/src/eval/function.rs @@ -3,7 +3,7 @@ use std::ops::Deref; use std::rc::Rc; use super::{Cast, EvalContext, Value}; -use crate::eco::EcoString; +use crate::util::EcoString; use crate::syntax::{Span, Spanned}; /// An evaluatable function. diff --git a/src/eval/mod.rs b/src/eval/mod.rs index 5b00819b..682a3855 100644 --- a/src/eval/mod.rs +++ b/src/eval/mod.rs @@ -26,7 +26,7 @@ use std::path::Path; use std::rc::Rc; use crate::diag::{Diag, DiagSet, Pass}; -use crate::eco::EcoString; +use crate::util::EcoString; use crate::geom::{Angle, Fractional, Length, Relative}; use crate::image::ImageCache; use crate::loading::{FileId, Loader}; diff --git a/src/eval/template.rs b/src/eval/template.rs index 4d003998..0be2492b 100644 --- a/src/eval/template.rs +++ b/src/eval/template.rs @@ -4,7 +4,7 @@ use std::ops::{Add, Deref}; use std::rc::Rc; use super::Value; -use crate::eco::EcoString; +use crate::util::EcoString; use crate::exec::ExecContext; use crate::syntax::{Expr, SyntaxTree}; diff --git a/src/eval/value.rs b/src/eval/value.rs index 4b3cd807..b7fdcbc2 100644 --- a/src/eval/value.rs +++ b/src/eval/value.rs @@ -5,7 +5,7 @@ use std::rc::Rc; use super::{ops, Array, Dict, Function, Template, TemplateFunc}; use crate::color::{Color, RgbaColor}; -use crate::eco::EcoString; +use crate::util::EcoString; use crate::exec::ExecContext; use crate::geom::{Angle, Fractional, Length, Linear, Relative}; use crate::syntax::Spanned; diff --git a/src/exec/context.rs b/src/exec/context.rs index 725907f2..3a3eb702 100644 --- a/src/exec/context.rs +++ b/src/exec/context.rs @@ -3,7 +3,7 @@ use std::rc::Rc; use super::{Exec, ExecWithMap, State}; use crate::diag::{Diag, DiagSet, Pass}; -use crate::eco::EcoString; +use crate::util::EcoString; use crate::eval::{ExprMap, Template}; use crate::geom::{Align, Dir, Gen, GenAxis, Length, Linear, Sides, Size}; use crate::layout::{ diff --git a/src/exec/mod.rs b/src/exec/mod.rs index 8b769def..ff4faa22 100644 --- a/src/exec/mod.rs +++ b/src/exec/mod.rs @@ -9,7 +9,7 @@ pub use state::*; use std::fmt::Write; use crate::diag::Pass; -use crate::eco::EcoString; +use crate::util::EcoString; use crate::eval::{ExprMap, Template, TemplateFunc, TemplateNode, TemplateTree, Value}; use crate::geom::{Dir, Gen}; use crate::layout::{LayoutTree, StackChild, StackNode}; diff --git a/src/layout/incremental.rs b/src/layout/incremental.rs index cbd55330..352434ed 100644 --- a/src/layout/incremental.rs +++ b/src/layout/incremental.rs @@ -269,46 +269,9 @@ impl Constraints { let current = regions.current.to_spec(); let base = regions.base.to_spec(); - self.exact.horizontal.set_if_some(current.horizontal); - self.exact.vertical.set_if_some(current.vertical); - self.base.horizontal.set_if_some(base.horizontal); - self.base.vertical.set_if_some(base.vertical); - } -} - -/// Extends length-related options by providing convenience methods for setting -/// minimum and maximum lengths on them, even if they are `None`. -pub trait OptionExt { - /// Sets `other` as the value if `self` is `None` or if it contains a - /// value larger than `other`. - fn set_min(&mut self, other: Length); - - /// Sets `other` as the value if `self` is `None` or if it contains a - /// value smaller than `other`. - fn set_max(&mut self, other: Length); - - /// Sets `other` as the value if `self` is `Some`. - fn set_if_some(&mut self, other: Length); -} - -impl OptionExt for Option { - fn set_min(&mut self, other: Length) { - match self { - Some(x) => x.set_min(other), - None => *self = Some(other), - } - } - - fn set_max(&mut self, other: Length) { - match self { - Some(x) => x.set_max(other), - None => *self = Some(other), - } - } - - fn set_if_some(&mut self, other: Length) { - if self.is_some() { - *self = Some(other); - } + self.exact.horizontal.and_set(Some(current.horizontal)); + self.exact.vertical.and_set(Some(current.vertical)); + self.base.horizontal.and_set(Some(base.horizontal)); + self.base.vertical.and_set(Some(base.vertical)); } } diff --git a/src/layout/mod.rs b/src/layout/mod.rs index 0f88d150..56e0687a 100644 --- a/src/layout/mod.rs +++ b/src/layout/mod.rs @@ -32,6 +32,7 @@ use std::rc::Rc; use crate::font::FontCache; use crate::geom::*; use crate::image::ImageCache; +use crate::util::OptionExt; use crate::Context; /// Layout a tree into a collection of frames. diff --git a/src/layout/par.rs b/src/layout/par.rs index 56847b9b..03d7efd5 100644 --- a/src/layout/par.rs +++ b/src/layout/par.rs @@ -5,9 +5,8 @@ use unicode_bidi::{BidiInfo, Level}; use xi_unicode::LineBreakIterator; use super::*; -use crate::eco::EcoString; use crate::exec::TextState; -use crate::util::{RangeExt, SliceExt}; +use crate::util::{EcoString, RangeExt, SliceExt}; type Range = std::ops::Range; diff --git a/src/lib.rs b/src/lib.rs index 5c477248..da006333 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -33,7 +33,6 @@ pub mod diag; #[macro_use] pub mod eval; pub mod color; -pub mod eco; pub mod exec; pub mod export; pub mod font; diff --git a/src/library/mod.rs b/src/library/mod.rs index 3c1d752d..213106d6 100644 --- a/src/library/mod.rs +++ b/src/library/mod.rs @@ -17,12 +17,12 @@ use std::fmt::{self, Display, Formatter}; use std::rc::Rc; use crate::color::{Color, RgbaColor}; -use crate::eco::EcoString; use crate::eval::{EvalContext, FuncArgs, Scope, Template, Type, Value}; use crate::exec::{Exec, FontFamily}; use crate::font::{FontStyle, FontWeight, VerticalFontMetric}; use crate::geom::*; use crate::syntax::Spanned; +use crate::util::EcoString; /// Construct a scope containing all standard library definitions. pub fn new() -> Scope { diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 38a489b2..a94345a8 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -15,8 +15,8 @@ pub use tokens::*; use std::rc::Rc; use crate::diag::Pass; -use crate::eco::EcoString; use crate::syntax::*; +use crate::util::EcoString; /// Parse a string of source code. pub fn parse(src: &str) -> Pass { diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index c3676215..f97d5383 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -1,6 +1,6 @@ use super::{is_newline, Scanner}; -use crate::eco::EcoString; use crate::syntax::{Ident, RawNode, Span}; +use crate::util::EcoString; /// Resolve all escape sequences in a string. pub fn resolve_string(string: &str) -> EcoString { diff --git a/src/syntax/ident.rs b/src/syntax/ident.rs index b1328bbe..462361a5 100644 --- a/src/syntax/ident.rs +++ b/src/syntax/ident.rs @@ -3,7 +3,7 @@ use std::ops::Deref; use unicode_xid::UnicodeXID; use super::Span; -use crate::eco::EcoString; +use crate::util::EcoString; /// An unicode identifier with a few extra permissible characters. /// diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 895a5bc5..9a4ca708 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -13,7 +13,7 @@ pub use node::*; pub use span::*; pub use token::*; -use crate::eco::EcoString; +use crate::util::EcoString; /// The abstract syntax tree. /// diff --git a/src/util.rs b/src/util.rs deleted file mode 100644 index 8a8c04b6..00000000 --- a/src/util.rs +++ /dev/null @@ -1,107 +0,0 @@ -//! Utilities. - -use std::cmp::Ordering; -use std::ops::Range; -use std::path::{Component, Path, PathBuf}; - -/// Additional methods for slices. -pub trait SliceExt { - /// Split a slice into consecutive groups with the same key. - /// - /// Returns an iterator of pairs of a key and the group with that key. - fn group_by_key(&self, f: F) -> GroupByKey<'_, T, F> - where - F: FnMut(&T) -> K, - K: PartialEq; -} - -impl SliceExt for [T] { - fn group_by_key(&self, f: F) -> GroupByKey<'_, T, F> - where - F: FnMut(&T) -> K, - K: PartialEq, - { - GroupByKey { slice: self, f } - } -} - -/// This struct is produced by [`SliceExt::group_by_key`]. -pub struct GroupByKey<'a, T, F> { - slice: &'a [T], - f: F, -} - -impl<'a, T, K, F> Iterator for GroupByKey<'a, T, F> -where - F: FnMut(&T) -> K, - K: PartialEq, -{ - type Item = (K, &'a [T]); - - fn next(&mut self) -> Option { - let first = self.slice.first()?; - let key = (self.f)(first); - - let mut i = 1; - while self.slice.get(i).map_or(false, |t| (self.f)(t) == key) { - i += 1; - } - - let (head, tail) = self.slice.split_at(i); - self.slice = tail; - Some((key, head)) - } -} - -/// Additional methods for [`Range`]. -pub trait RangeExt { - /// Locate a position relative to a range. - /// - /// This can be used for binary searching the range that contains the - /// position as follows: - /// ``` - /// # use typst::util::RangeExt; - /// assert_eq!( - /// [1..2, 2..7, 7..10].binary_search_by(|r| r.locate(5)), - /// Ok(1), - /// ); - /// ``` - fn locate(&self, pos: usize) -> Ordering; -} - -impl RangeExt for Range { - fn locate(&self, pos: usize) -> Ordering { - if pos < self.start { - Ordering::Greater - } else if pos < self.end { - Ordering::Equal - } else { - Ordering::Less - } - } -} - -/// Additional methods for [`Path`]. -pub trait PathExt { - /// Lexically normalize a path. - fn normalize(&self) -> PathBuf; -} - -impl PathExt for Path { - fn normalize(&self) -> PathBuf { - let mut out = PathBuf::new(); - for component in self.components() { - match component { - Component::CurDir => {} - Component::ParentDir => match out.components().next_back() { - Some(Component::Normal(_)) => { - out.pop(); - } - _ => out.push(component), - }, - _ => out.push(component), - } - } - out - } -} diff --git a/src/util/eco.rs b/src/util/eco.rs new file mode 100644 index 00000000..a52a163c --- /dev/null +++ b/src/util/eco.rs @@ -0,0 +1,412 @@ +//! An economical string. + +use std::borrow::Borrow; +use std::cmp::Ordering; +use std::fmt::{self, Debug, Display, Formatter, Write}; +use std::hash::{Hash, Hasher}; +use std::ops::{Add, AddAssign, Deref}; +use std::rc::Rc; + +/// An economical string with inline storage and clone-on-write value semantics. +#[derive(Clone)] +pub struct EcoString(Repr); + +/// The internal representation. Either: +/// - inline when below a certain number of bytes, +/// - or reference-counted on the heap with COW semantics. +#[derive(Clone)] +enum Repr { + Small { buf: [u8; LIMIT], len: u8 }, + Large(Rc), +} + +/// The maximum number of bytes that can be stored inline. +/// +/// The value is chosen such that an `EcoString` fits exactly into 16 bytes +/// (which are needed anyway due to the `Rc`s alignment, at least on 64-bit +/// platforms). +/// +/// Must be at least 4 to hold any char. +const LIMIT: usize = 14; + +impl EcoString { + /// Create a new, empty string. + pub fn new() -> Self { + Self(Repr::Small { buf: [0; LIMIT], len: 0 }) + } + + /// Create a new, empty string with the given `capacity`. + pub fn with_capacity(capacity: usize) -> Self { + if capacity <= LIMIT { + Self::new() + } else { + Self(Repr::Large(Rc::new(String::with_capacity(capacity)))) + } + } + + /// Create an instance from an existing string-like type. + pub fn from_str(s: S) -> Self + where + S: AsRef + Into, + { + let slice = s.as_ref(); + let len = slice.len(); + Self(if len <= LIMIT { + let mut buf = [0; LIMIT]; + buf[.. len].copy_from_slice(slice.as_bytes()); + Repr::Small { buf, len: len as u8 } + } else { + Repr::Large(Rc::new(s.into())) + }) + } + + /// Whether the string is empty. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// The length of the string in bytes. + pub fn len(&self) -> usize { + match &self.0 { + Repr::Small { len, .. } => usize::from(*len), + Repr::Large(string) => string.len(), + } + } + + /// A string slice containing the entire string. + pub fn as_str(&self) -> &str { + self + } + + /// Append the given character at the end. + pub fn push(&mut self, c: char) { + match &mut self.0 { + Repr::Small { buf, len } => { + let prev = usize::from(*len); + if c.len_utf8() == 1 && prev < LIMIT { + buf[prev] = c as u8; + *len += 1; + } else { + self.push_str(c.encode_utf8(&mut [0; 4])); + } + } + Repr::Large(rc) => Rc::make_mut(rc).push(c), + } + } + + /// Append the given string slice at the end. + pub fn push_str(&mut self, string: &str) { + match &mut self.0 { + Repr::Small { buf, len } => { + let prev = usize::from(*len); + let new = prev + string.len(); + if new <= LIMIT { + buf[prev .. new].copy_from_slice(string.as_bytes()); + *len = new as u8; + } else { + let mut spilled = String::with_capacity(new); + spilled.push_str(self); + spilled.push_str(string); + self.0 = Repr::Large(Rc::new(spilled)); + } + } + Repr::Large(rc) => Rc::make_mut(rc).push_str(string), + } + } + + /// Remove the last character from the string. + pub fn pop(&mut self) -> Option { + let c = self.as_str().chars().rev().next()?; + match &mut self.0 { + Repr::Small { len, .. } => { + *len -= c.len_utf8() as u8; + } + Repr::Large(rc) => { + Rc::make_mut(rc).pop(); + } + } + Some(c) + } + + /// Clear the string. + pub fn clear(&mut self) { + match &mut self.0 { + Repr::Small { len, .. } => *len = 0, + Repr::Large(rc) => { + if Rc::strong_count(rc) == 1 { + Rc::make_mut(rc).clear(); + } else { + *self = Self::new(); + } + } + } + } + + /// Repeat this string `n` times. + pub fn repeat(&self, n: usize) -> Self { + if let Repr::Small { buf, len } = &self.0 { + let prev = usize::from(*len); + let new = prev.saturating_mul(n); + if new <= LIMIT { + let src = &buf[.. prev]; + let mut buf = [0; LIMIT]; + for i in 0 .. n { + buf[prev * i .. prev * (i + 1)].copy_from_slice(src); + } + return Self(Repr::Small { buf, len: new as u8 }); + } + } + + self.as_str().repeat(n).into() + } +} + +impl From<&Self> for EcoString { + fn from(s: &Self) -> Self { + s.clone() + } +} + +impl From for EcoString { + fn from(c: char) -> Self { + let mut buf = [0; LIMIT]; + let len = c.encode_utf8(&mut buf).len(); + Self(Repr::Small { buf, len: len as u8 }) + } +} + +impl From<&str> for EcoString { + fn from(s: &str) -> Self { + Self::from_str(s) + } +} + +impl From for EcoString { + fn from(s: String) -> Self { + Self::from_str(s) + } +} + +impl From<&String> for EcoString { + fn from(s: &String) -> Self { + Self::from_str(s) + } +} + +impl Deref for EcoString { + type Target = str; + + fn deref(&self) -> &str { + match &self.0 { + Repr::Small { buf, len } => unsafe { + std::str::from_utf8_unchecked(&buf[.. usize::from(*len)]) + }, + Repr::Large(string) => string.as_str(), + } + } +} + +impl AsRef for EcoString { + fn as_ref(&self) -> &str { + self + } +} + +impl Borrow for EcoString { + fn borrow(&self) -> &str { + self + } +} + +impl Default for EcoString { + fn default() -> Self { + Self::new() + } +} + +impl Display for EcoString { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + Display::fmt(self.as_str(), f) + } +} + +impl Debug for EcoString { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + Debug::fmt(self.as_str(), f) + } +} + +impl Eq for EcoString {} + +impl PartialEq for EcoString { + fn eq(&self, other: &Self) -> bool { + self.as_str().eq(other.as_str()) + } +} + +impl PartialEq for EcoString { + fn eq(&self, other: &str) -> bool { + self.as_str().eq(other) + } +} + +impl PartialEq<&str> for EcoString { + fn eq(&self, other: &&str) -> bool { + self.as_str().eq(*other) + } +} + +impl PartialEq for EcoString { + fn eq(&self, other: &String) -> bool { + self.as_str().eq(other.as_str()) + } +} + +impl Ord for EcoString { + fn cmp(&self, other: &Self) -> Ordering { + self.as_str().cmp(other.as_str()) + } +} + +impl PartialOrd for EcoString { + fn partial_cmp(&self, other: &Self) -> Option { + self.as_str().partial_cmp(other.as_str()) + } +} + +impl Add<&Self> for EcoString { + type Output = Self; + + fn add(self, rhs: &Self) -> Self::Output { + self + rhs.as_str() + } +} + +impl Add<&str> for EcoString { + type Output = Self; + + fn add(mut self, rhs: &str) -> Self::Output { + self.push_str(rhs); + self + } +} + +impl AddAssign<&str> for EcoString { + fn add_assign(&mut self, rhs: &str) { + self.push_str(rhs); + } +} + +impl Hash for EcoString { + fn hash(&self, state: &mut H) { + self.as_str().hash(state); + } +} + +impl Write for EcoString { + fn write_str(&mut self, s: &str) -> fmt::Result { + self.push_str(s); + Ok(()) + } + + fn write_char(&mut self, c: char) -> fmt::Result { + self.push(c); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const ALPH: &str = "abcdefghijklmnopqrstuvwxyz"; + + #[test] + fn test_str_new() { + // Test inline strings. + assert_eq!(EcoString::new(), ""); + assert_eq!(EcoString::from('a'), "a"); + assert_eq!(EcoString::from('😀'), "😀"); + assert_eq!(EcoString::from("abc"), "abc"); + + // Test around the inline limit. + assert_eq!(EcoString::from(&ALPH[.. LIMIT - 1]), ALPH[.. LIMIT - 1]); + assert_eq!(EcoString::from(&ALPH[.. LIMIT]), ALPH[.. LIMIT]); + assert_eq!(EcoString::from(&ALPH[.. LIMIT + 1]), ALPH[.. LIMIT + 1]); + + // Test heap string. + assert_eq!(EcoString::from(ALPH), ALPH); + } + + #[test] + fn test_str_push() { + let mut v = EcoString::new(); + v.push('a'); + v.push('b'); + v.push_str("cd😀"); + assert_eq!(v, "abcd😀"); + assert_eq!(v.len(), 8); + + // Test fully filling the inline storage. + v.push_str("efghij"); + assert_eq!(v.len(), LIMIT); + + // Test spilling with `push`. + let mut a = v.clone(); + a.push('k'); + assert_eq!(a, "abcd😀efghijk"); + assert_eq!(a.len(), 15); + + // Test spilling with `push_str`. + let mut b = v.clone(); + b.push_str("klmn"); + assert_eq!(b, "abcd😀efghijklmn"); + assert_eq!(b.len(), 18); + + // v should be unchanged. + assert_eq!(v.len(), LIMIT); + } + + #[test] + fn test_str_pop() { + // Test with inline string. + let mut v = EcoString::from("Hello World!"); + assert_eq!(v.pop(), Some('!')); + assert_eq!(v, "Hello World"); + + // Remove one-by-one. + for _ in 0 .. 10 { + v.pop(); + } + + assert_eq!(v, "H"); + assert_eq!(v.pop(), Some('H')); + assert_eq!(v, ""); + assert!(v.is_empty()); + + // Test with large string. + let mut v = EcoString::from(ALPH); + assert_eq!(v.pop(), Some('z')); + assert_eq!(v.len(), 25); + } + + #[test] + fn test_str_index() { + // Test that we can use the index syntax. + let v = EcoString::from("abc"); + assert_eq!(&v[.. 2], "ab"); + } + + #[test] + fn test_str_repeat() { + // Test with empty string. + assert_eq!(EcoString::new().repeat(0), ""); + assert_eq!(EcoString::new().repeat(100), ""); + + // Test non-spilling and spilling case. + let v = EcoString::from("abc"); + assert_eq!(v.repeat(0), ""); + assert_eq!(v.repeat(3), "abcabcabc"); + assert_eq!(v.repeat(5), "abcabcabcabcabc"); + } +} diff --git a/src/util/mod.rs b/src/util/mod.rs new file mode 100644 index 00000000..2c53bc70 --- /dev/null +++ b/src/util/mod.rs @@ -0,0 +1,155 @@ +//! Utilities. + +mod eco; + +pub use eco::EcoString; + +use std::cmp::Ordering; +use std::ops::Range; +use std::path::{Component, Path, PathBuf}; + +/// Additional methods for options. +pub trait OptionExt { + /// Replace `self` with `other` if `self` is `Some`. + fn and_set(&mut self, other: Option); + + /// Sets `other` as the value if `self` is `None` or if it contains a value + /// larger than `other`. + fn set_min(&mut self, other: T) + where + T: Ord; + + /// Sets `other` as the value if `self` is `None` or if it contains a value + /// smaller than `other`. + fn set_max(&mut self, other: T) + where + T: Ord; +} + +impl OptionExt for Option { + fn and_set(&mut self, other: Option) { + if self.is_some() { + *self = other; + } + } + + fn set_min(&mut self, other: T) + where + T: Ord, + { + if self.as_ref().map_or(true, |x| other < *x) { + *self = Some(other); + } + } + + fn set_max(&mut self, other: T) + where + T: Ord, + { + if self.as_ref().map_or(true, |x| other > *x) { + *self = Some(other); + } + } +} + +/// Additional methods for slices. +pub trait SliceExt { + /// Split a slice into consecutive groups with the same key. + /// + /// Returns an iterator of pairs of a key and the group with that key. + fn group_by_key(&self, f: F) -> GroupByKey<'_, T, F> + where + F: FnMut(&T) -> K, + K: PartialEq; +} + +impl SliceExt for [T] { + fn group_by_key(&self, f: F) -> GroupByKey<'_, T, F> + where + F: FnMut(&T) -> K, + K: PartialEq, + { + GroupByKey { slice: self, f } + } +} + +/// This struct is produced by [`SliceExt::group_by_key`]. +pub struct GroupByKey<'a, T, F> { + slice: &'a [T], + f: F, +} + +impl<'a, T, K, F> Iterator for GroupByKey<'a, T, F> +where + F: FnMut(&T) -> K, + K: PartialEq, +{ + type Item = (K, &'a [T]); + + fn next(&mut self) -> Option { + let first = self.slice.first()?; + let key = (self.f)(first); + + let mut i = 1; + while self.slice.get(i).map_or(false, |t| (self.f)(t) == key) { + i += 1; + } + + let (head, tail) = self.slice.split_at(i); + self.slice = tail; + Some((key, head)) + } +} + +/// Additional methods for [`Range`]. +pub trait RangeExt { + /// Locate a position relative to a range. + /// + /// This can be used for binary searching the range that contains the + /// position as follows: + /// ``` + /// # use typst::util::RangeExt; + /// assert_eq!( + /// [1..2, 2..7, 7..10].binary_search_by(|r| r.locate(5)), + /// Ok(1), + /// ); + /// ``` + fn locate(&self, pos: usize) -> Ordering; +} + +impl RangeExt for Range { + fn locate(&self, pos: usize) -> Ordering { + if pos < self.start { + Ordering::Greater + } else if pos < self.end { + Ordering::Equal + } else { + Ordering::Less + } + } +} + +/// Additional methods for [`Path`]. +pub trait PathExt { + /// Lexically normalize a path. + fn normalize(&self) -> PathBuf; +} + +impl PathExt for Path { + fn normalize(&self) -> PathBuf { + let mut out = PathBuf::new(); + for component in self.components() { + match component { + Component::CurDir => {} + Component::ParentDir => match out.components().next_back() { + Some(Component::Normal(_)) => { + out.pop(); + } + _ => out.push(component), + }, + _ => out.push(component), + } + } + out + } +} -- cgit v1.2.3