summaryrefslogtreecommitdiff
path: root/crates/typst-library/src/foundations/bytes.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/typst-library/src/foundations/bytes.rs')
-rw-r--r--crates/typst-library/src/foundations/bytes.rs262
1 files changed, 262 insertions, 0 deletions
diff --git a/crates/typst-library/src/foundations/bytes.rs b/crates/typst-library/src/foundations/bytes.rs
new file mode 100644
index 00000000..05fe4763
--- /dev/null
+++ b/crates/typst-library/src/foundations/bytes.rs
@@ -0,0 +1,262 @@
+use std::borrow::Cow;
+use std::fmt::{self, Debug, Formatter};
+use std::ops::{Add, AddAssign, Deref};
+use std::sync::Arc;
+
+use ecow::{eco_format, EcoString};
+use serde::{Serialize, Serializer};
+use typst_utils::LazyHash;
+
+use crate::diag::{bail, StrResult};
+use crate::foundations::{cast, func, scope, ty, Array, Reflect, Repr, Str, Value};
+
+/// A sequence of bytes.
+///
+/// This is conceptually similar to an array of [integers]($int) between `{0}`
+/// and `{255}`, but represented much more efficiently. You can iterate over it
+/// using a [for loop]($scripting/#loops).
+///
+/// You can convert
+/// - a [string]($str) or an [array] of integers to bytes with the [`bytes`]
+/// constructor
+/// - bytes to a string with the [`str`] constructor, with UTF-8 encoding
+/// - bytes to an array of integers with the [`array`] constructor
+///
+/// When [reading]($read) data from a file, you can decide whether to load it
+/// as a string or as raw bytes.
+///
+/// ```example
+/// #bytes((123, 160, 22, 0)) \
+/// #bytes("Hello 😃")
+///
+/// #let data = read(
+/// "rhino.png",
+/// encoding: none,
+/// )
+///
+/// // Magic bytes.
+/// #array(data.slice(0, 4)) \
+/// #str(data.slice(1, 4))
+/// ```
+#[ty(scope, cast)]
+#[derive(Clone, Hash, Eq, PartialEq)]
+pub struct Bytes(Arc<LazyHash<Cow<'static, [u8]>>>);
+
+impl Bytes {
+ /// Create a buffer from a static byte slice.
+ pub fn from_static(slice: &'static [u8]) -> Self {
+ Self(Arc::new(LazyHash::new(Cow::Borrowed(slice))))
+ }
+
+ /// Return `true` if the length is 0.
+ pub fn is_empty(&self) -> bool {
+ self.0.is_empty()
+ }
+
+ /// Return a view into the buffer.
+ pub fn as_slice(&self) -> &[u8] {
+ self
+ }
+
+ /// Return a copy of the buffer as a vector.
+ pub fn to_vec(&self) -> Vec<u8> {
+ self.0.to_vec()
+ }
+
+ /// Resolve an index or throw an out of bounds error.
+ fn locate(&self, index: i64) -> StrResult<usize> {
+ self.locate_opt(index).ok_or_else(|| out_of_bounds(index, self.len()))
+ }
+
+ /// Resolve an index, if it is within bounds.
+ ///
+ /// `index == len` is considered in bounds.
+ fn locate_opt(&self, index: i64) -> Option<usize> {
+ let wrapped =
+ if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) };
+
+ wrapped
+ .and_then(|v| usize::try_from(v).ok())
+ .filter(|&v| v <= self.0.len())
+ }
+}
+
+#[scope]
+impl Bytes {
+ /// Converts a value to bytes.
+ ///
+ /// - Strings are encoded in UTF-8.
+ /// - Arrays of integers between `{0}` and `{255}` are converted directly. The
+ /// dedicated byte representation is much more efficient than the array
+ /// representation and thus typically used for large byte buffers (e.g. image
+ /// data).
+ ///
+ /// ```example
+ /// #bytes("Hello 😃") \
+ /// #bytes((123, 160, 22, 0))
+ /// ```
+ #[func(constructor)]
+ pub fn construct(
+ /// The value that should be converted to bytes.
+ value: ToBytes,
+ ) -> Bytes {
+ value.0
+ }
+
+ /// The length in bytes.
+ #[func(title = "Length")]
+ pub fn len(&self) -> usize {
+ self.0.len()
+ }
+
+ /// Returns the byte at the specified index. Returns the default value if
+ /// the index is out of bounds or fails with an error if no default value
+ /// was specified.
+ #[func]
+ pub fn at(
+ &self,
+ /// The index at which to retrieve the byte.
+ index: i64,
+ /// A default value to return if the index is out of bounds.
+ #[named]
+ default: Option<Value>,
+ ) -> StrResult<Value> {
+ self.locate_opt(index)
+ .and_then(|i| self.0.get(i).map(|&b| Value::Int(b.into())))
+ .or(default)
+ .ok_or_else(|| out_of_bounds_no_default(index, self.len()))
+ }
+
+ /// Extracts a subslice of the bytes. Fails with an error if the start or end
+ /// index is out of bounds.
+ #[func]
+ pub fn slice(
+ &self,
+ /// The start index (inclusive).
+ start: i64,
+ /// The end index (exclusive). If omitted, the whole slice until the end
+ /// is extracted.
+ #[default]
+ end: Option<i64>,
+ /// The number of items to extract. This is equivalent to passing
+ /// `start + count` as the `end` position. Mutually exclusive with
+ /// `end`.
+ #[named]
+ count: Option<i64>,
+ ) -> StrResult<Bytes> {
+ let mut end = end;
+ if end.is_none() {
+ end = count.map(|c: i64| start + c);
+ }
+ let start = self.locate(start)?;
+ let end = self.locate(end.unwrap_or(self.len() as i64))?.max(start);
+ Ok(self.0[start..end].into())
+ }
+}
+
+impl Debug for Bytes {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ write!(f, "Bytes({})", self.len())
+ }
+}
+
+impl Repr for Bytes {
+ fn repr(&self) -> EcoString {
+ eco_format!("bytes({})", self.len())
+ }
+}
+
+impl Deref for Bytes {
+ type Target = [u8];
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+impl AsRef<[u8]> for Bytes {
+ fn as_ref(&self) -> &[u8] {
+ self
+ }
+}
+
+impl From<&[u8]> for Bytes {
+ fn from(slice: &[u8]) -> Self {
+ Self(Arc::new(LazyHash::new(slice.to_vec().into())))
+ }
+}
+
+impl From<Vec<u8>> for Bytes {
+ fn from(vec: Vec<u8>) -> Self {
+ Self(Arc::new(LazyHash::new(vec.into())))
+ }
+}
+
+impl Add for Bytes {
+ type Output = Self;
+
+ fn add(mut self, rhs: Self) -> Self::Output {
+ self += rhs;
+ self
+ }
+}
+
+impl AddAssign for Bytes {
+ fn add_assign(&mut self, rhs: Self) {
+ if rhs.is_empty() {
+ // Nothing to do
+ } else if self.is_empty() {
+ *self = rhs;
+ } else if Arc::strong_count(&self.0) == 1 && matches!(**self.0, Cow::Owned(_)) {
+ Arc::make_mut(&mut self.0).to_mut().extend_from_slice(&rhs);
+ } else {
+ *self = Self::from([self.as_slice(), rhs.as_slice()].concat());
+ }
+ }
+}
+
+impl Serialize for Bytes {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ if serializer.is_human_readable() {
+ serializer.serialize_str(&eco_format!("{self:?}"))
+ } else {
+ serializer.serialize_bytes(self)
+ }
+ }
+}
+
+/// A value that can be cast to bytes.
+pub struct ToBytes(Bytes);
+
+cast! {
+ ToBytes,
+ v: Str => Self(v.as_bytes().into()),
+ v: Array => Self(v.iter()
+ .map(|item| match item {
+ Value::Int(byte @ 0..=255) => Ok(*byte as u8),
+ Value::Int(_) => bail!("number must be between 0 and 255"),
+ value => Err(<u8 as Reflect>::error(value)),
+ })
+ .collect::<Result<Vec<u8>, _>>()?
+ .into()
+ ),
+ v: Bytes => Self(v),
+}
+
+/// The out of bounds access error message.
+#[cold]
+fn out_of_bounds(index: i64, len: usize) -> EcoString {
+ eco_format!("byte index out of bounds (index: {index}, len: {len})")
+}
+
+/// The out of bounds access error message when no default value was given.
+#[cold]
+fn out_of_bounds_no_default(index: i64, len: usize) -> EcoString {
+ eco_format!(
+ "byte index out of bounds (index: {index}, len: {len}) \
+ and no default value was specified",
+ )
+}