diff options
| author | Laurenz <laurmaedje@gmail.com> | 2024-05-24 23:09:54 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-05-24 21:09:54 +0000 |
| commit | ea4c64a7997556871934e20be7415cba8ec275a5 (patch) | |
| tree | 69fbd5edc8f337064ee6e13989c24b022db463af /crates/typst-utils | |
| parent | 34f1a232463bafeb3d8d3fa9adeae33c4bb95b23 (diff) | |
Split `BitSet` into two types and make it a bit nicer (#4249)
Diffstat (limited to 'crates/typst-utils')
| -rw-r--r-- | crates/typst-utils/Cargo.toml | 3 | ||||
| -rw-r--r-- | crates/typst-utils/src/bitset.rs | 103 | ||||
| -rw-r--r-- | crates/typst-utils/src/lib.rs | 2 |
3 files changed, 76 insertions, 32 deletions
diff --git a/crates/typst-utils/Cargo.toml b/crates/typst-utils/Cargo.toml index ba75e399..5f828cff 100644 --- a/crates/typst-utils/Cargo.toml +++ b/crates/typst-utils/Cargo.toml @@ -14,9 +14,10 @@ readme = { workspace = true } [dependencies] once_cell = { workspace = true } -siphasher = { workspace = true } portable-atomic = { workspace = true } rayon = { workspace = true } +siphasher = { workspace = true } +thin-vec = { workspace = true } [lints] workspace = true diff --git a/crates/typst-utils/src/bitset.rs b/crates/typst-utils/src/bitset.rs index cbac7a1e..fa57e631 100644 --- a/crates/typst-utils/src/bitset.rs +++ b/crates/typst-utils/src/bitset.rs @@ -1,27 +1,80 @@ use std::fmt::{self, Debug, Formatter}; -/// Efficiently stores a set of numbers which are expected to be very small -/// (< 32/64 depending on the architecture). +use thin_vec::ThinVec; + +/// The number of bits per chunk. +const BITS: usize = usize::BITS as usize; + +/// Stores a set of numbers which are expected to be rather small. +/// +/// Inserting a very small value is cheap while inserting a large one may be +/// very expensive. /// -/// Inserting a very small value is very cheap while inserting a large one may -/// be very expensive. +/// Unless you're managing small numbers yourself, you should likely prefer +/// `SmallBitSet`, which has a bit larger memory size, but does not allocate +/// for small numbers. #[derive(Clone, PartialEq, Hash)] -pub struct BitSet { +pub struct BitSet(ThinVec<usize>); + +impl BitSet { + /// Creates a new empty bit set. + pub fn new() -> Self { + Self(ThinVec::new()) + } + + /// Inserts a number into the set. + pub fn insert(&mut self, value: usize) { + let chunk = value / BITS; + let within = value % BITS; + if chunk >= self.0.len() { + self.0.resize(chunk + 1, 0); + } + self.0[chunk] |= 1 << within; + } + + /// Whether a number is present in the set. + pub fn contains(&self, value: usize) -> bool { + let chunk = value / BITS; + let within = value % BITS; + let Some(bits) = self.0.get(chunk) else { return false }; + (bits & (1 << within)) != 0 + } +} + +impl Default for BitSet { + fn default() -> Self { + Self::new() + } +} + +impl Debug for BitSet { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let mut list = f.debug_list(); + let chunks = self.0.len(); + for v in 0..chunks * BITS { + if self.contains(v) { + list.entry(&v); + } + } + list.finish() + } +} + +/// Efficiently stores a set of numbers which are expected to be very small. +/// Values `< 32/64` (depending on the architecture) are stored inline, while +/// values larger than that will lead to an allocation. +#[derive(Clone, PartialEq, Hash)] +pub struct SmallBitSet { /// Used to store values < BITS. low: usize, - /// Used to store values > BITS. We have the extra `Box` to keep the memory - /// size of the `BitSet` down. - #[allow(clippy::box_collection)] - hi: Option<Box<Vec<usize>>>, + /// Used to store values > BITS. + hi: BitSet, } -/// The number of bits per chunk. -const BITS: usize = usize::BITS as usize; - -impl BitSet { +impl SmallBitSet { /// Creates a new empty bit set. pub fn new() -> Self { - Self { low: 0, hi: None } + Self { low: 0, hi: BitSet::new() } } /// Inserts a number into the set. @@ -29,13 +82,7 @@ impl BitSet { if value < BITS { self.low |= 1 << value; } else { - let chunk = value / BITS - 1; - let within = value % BITS; - let vec = self.hi.get_or_insert_with(Default::default); - if chunk >= vec.len() { - vec.resize(chunk + 1, 0); - } - vec[chunk] |= 1 << within; + self.hi.insert(value - BITS); } } @@ -44,25 +91,21 @@ impl BitSet { if value < BITS { (self.low & (1 << value)) != 0 } else { - let Some(hi) = &self.hi else { return false }; - let chunk = value / BITS - 1; - let within = value % BITS; - let Some(bits) = hi.get(chunk) else { return false }; - (bits & (1 << within)) != 0 + self.hi.contains(value - BITS) } } } -impl Default for BitSet { +impl Default for SmallBitSet { fn default() -> Self { Self::new() } } -impl Debug for BitSet { +impl Debug for SmallBitSet { fn fmt(&self, f: &mut Formatter) -> fmt::Result { let mut list = f.debug_list(); - let chunks = 1 + self.hi.as_ref().map_or(0, |v| v.len()); + let chunks = 1 + self.hi.0.len(); for v in 0..chunks * BITS { if self.contains(v) { list.entry(&v); @@ -78,7 +121,7 @@ mod tests { #[test] fn test_bitset() { - let mut set = BitSet::new(); + let mut set = SmallBitSet::new(); assert!(!set.contains(0)); assert!(!set.contains(5)); set.insert(0); diff --git a/crates/typst-utils/src/lib.rs b/crates/typst-utils/src/lib.rs index e0a2c835..754fc70d 100644 --- a/crates/typst-utils/src/lib.rs +++ b/crates/typst-utils/src/lib.rs @@ -10,7 +10,7 @@ mod hash; mod pico; mod scalar; -pub use self::bitset::BitSet; +pub use self::bitset::{BitSet, SmallBitSet}; pub use self::deferred::Deferred; pub use self::hash::LazyHash; pub use self::pico::PicoStr; |
