summaryrefslogtreecommitdiff
path: root/crates/typst-library/src/loading
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2025-01-09 10:34:16 +0100
committerGitHub <noreply@github.com>2025-01-09 09:34:16 +0000
commite2b37fef33a92a7086790e04fb133472413c0c0a (patch)
treea2bdc638482890183414dce18f8f586786154017 /crates/typst-library/src/loading
parentdacd6acd5e73d35c6e7a7a3b144f16ae70d03daa (diff)
Revamp data loading and deprecate `decode` functions (#5671)
Diffstat (limited to 'crates/typst-library/src/loading')
-rw-r--r--crates/typst-library/src/loading/cbor.rs30
-rw-r--r--crates/typst-library/src/loading/csv.rs109
-rw-r--r--crates/typst-library/src/loading/json.rs29
-rw-r--r--crates/typst-library/src/loading/mod.rs95
-rw-r--r--crates/typst-library/src/loading/read.rs11
-rw-r--r--crates/typst-library/src/loading/toml.rs33
-rw-r--r--crates/typst-library/src/loading/xml.rs39
-rw-r--r--crates/typst-library/src/loading/yaml.rs29
8 files changed, 221 insertions, 154 deletions
diff --git a/crates/typst-library/src/loading/cbor.rs b/crates/typst-library/src/loading/cbor.rs
index a03e5c99..13d55120 100644
--- a/crates/typst-library/src/loading/cbor.rs
+++ b/crates/typst-library/src/loading/cbor.rs
@@ -1,10 +1,10 @@
-use ecow::{eco_format, EcoString};
+use ecow::eco_format;
use typst_syntax::Spanned;
use crate::diag::{At, SourceResult};
use crate::engine::Engine;
use crate::foundations::{func, scope, Bytes, Value};
-use crate::World;
+use crate::loading::{DataSource, Load};
/// Reads structured data from a CBOR file.
///
@@ -21,29 +21,31 @@ use crate::World;
pub fn cbor(
/// The engine.
engine: &mut Engine,
- /// Path to a CBOR file.
+ /// A path to a CBOR file or raw CBOR bytes.
///
- /// For more details, see the [Paths section]($syntax/#paths).
- path: Spanned<EcoString>,
+ /// For more details about paths, see the [Paths section]($syntax/#paths).
+ source: Spanned<DataSource>,
) -> SourceResult<Value> {
- let Spanned { v: path, span } = path;
- let id = span.resolve_path(&path).at(span)?;
- let data = engine.world.file(id).at(span)?;
- cbor::decode(Spanned::new(data, span))
+ let data = source.load(engine.world)?;
+ ciborium::from_reader(data.as_slice())
+ .map_err(|err| eco_format!("failed to parse CBOR ({err})"))
+ .at(source.span)
}
#[scope]
impl cbor {
/// Reads structured data from CBOR bytes.
+ ///
+ /// This function is deprecated. The [`cbor`] function now accepts bytes
+ /// directly.
#[func(title = "Decode CBOR")]
pub fn decode(
- /// cbor data.
+ /// The engine.
+ engine: &mut Engine,
+ /// CBOR data.
data: Spanned<Bytes>,
) -> SourceResult<Value> {
- let Spanned { v: data, span } = data;
- ciborium::from_reader(data.as_slice())
- .map_err(|err| eco_format!("failed to parse CBOR ({err})"))
- .at(span)
+ cbor(engine, data.map(DataSource::Bytes))
}
/// Encode structured data into CBOR bytes.
diff --git a/crates/typst-library/src/loading/csv.rs b/crates/typst-library/src/loading/csv.rs
index 6822505d..8171c483 100644
--- a/crates/typst-library/src/loading/csv.rs
+++ b/crates/typst-library/src/loading/csv.rs
@@ -4,8 +4,7 @@ use typst_syntax::Spanned;
use crate::diag::{bail, At, SourceResult};
use crate::engine::Engine;
use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value};
-use crate::loading::Readable;
-use crate::World;
+use crate::loading::{DataSource, Load, Readable};
/// Reads structured data from a CSV file.
///
@@ -28,10 +27,10 @@ use crate::World;
pub fn csv(
/// The engine.
engine: &mut Engine,
- /// Path to a CSV file.
+ /// Path to a CSV file or raw CSV bytes.
///
- /// For more details, see the [Paths section]($syntax/#paths).
- path: Spanned<EcoString>,
+ /// For more details about paths, see the [Paths section]($syntax/#paths).
+ source: Spanned<DataSource>,
/// The delimiter that separates columns in the CSV file.
/// Must be a single ASCII character.
#[named]
@@ -48,17 +47,63 @@ pub fn csv(
#[default(RowType::Array)]
row_type: RowType,
) -> SourceResult<Array> {
- let Spanned { v: path, span } = path;
- let id = span.resolve_path(&path).at(span)?;
- let data = engine.world.file(id).at(span)?;
- self::csv::decode(Spanned::new(Readable::Bytes(data), span), delimiter, row_type)
+ let data = source.load(engine.world)?;
+
+ let mut builder = ::csv::ReaderBuilder::new();
+ let has_headers = row_type == RowType::Dict;
+ builder.has_headers(has_headers);
+ builder.delimiter(delimiter.0 as u8);
+
+ // Counting lines from 1 by default.
+ let mut line_offset: usize = 1;
+ let mut reader = builder.from_reader(data.as_slice());
+ let mut headers: Option<::csv::StringRecord> = None;
+
+ if has_headers {
+ // Counting lines from 2 because we have a header.
+ line_offset += 1;
+ headers = Some(
+ reader
+ .headers()
+ .map_err(|err| format_csv_error(err, 1))
+ .at(source.span)?
+ .clone(),
+ );
+ }
+
+ let mut array = Array::new();
+ for (line, result) in reader.records().enumerate() {
+ // Original solution was to use line from error, but that is
+ // incorrect with `has_headers` set to `false`. See issue:
+ // https://github.com/BurntSushi/rust-csv/issues/184
+ let line = line + line_offset;
+ let row = result.map_err(|err| format_csv_error(err, line)).at(source.span)?;
+ let item = if let Some(headers) = &headers {
+ let mut dict = Dict::new();
+ for (field, value) in headers.iter().zip(&row) {
+ dict.insert(field.into(), value.into_value());
+ }
+ dict.into_value()
+ } else {
+ let sub = row.into_iter().map(|field| field.into_value()).collect();
+ Value::Array(sub)
+ };
+ array.push(item);
+ }
+
+ Ok(array)
}
#[scope]
impl csv {
/// Reads structured data from a CSV string/bytes.
+ ///
+ /// This function is deprecated. The [`csv`] function now accepts bytes
+ /// directly.
#[func(title = "Decode CSV")]
pub fn decode(
+ /// The engine.
+ engine: &mut Engine,
/// CSV data.
data: Spanned<Readable>,
/// The delimiter that separates columns in the CSV file.
@@ -77,51 +122,7 @@ impl csv {
#[default(RowType::Array)]
row_type: RowType,
) -> SourceResult<Array> {
- let Spanned { v: data, span } = data;
- let has_headers = row_type == RowType::Dict;
-
- let mut builder = ::csv::ReaderBuilder::new();
- builder.has_headers(has_headers);
- builder.delimiter(delimiter.0 as u8);
-
- // Counting lines from 1 by default.
- let mut line_offset: usize = 1;
- let mut reader = builder.from_reader(data.as_slice());
- let mut headers: Option<::csv::StringRecord> = None;
-
- if has_headers {
- // Counting lines from 2 because we have a header.
- line_offset += 1;
- headers = Some(
- reader
- .headers()
- .map_err(|err| format_csv_error(err, 1))
- .at(span)?
- .clone(),
- );
- }
-
- let mut array = Array::new();
- for (line, result) in reader.records().enumerate() {
- // Original solution was to use line from error, but that is
- // incorrect with `has_headers` set to `false`. See issue:
- // https://github.com/BurntSushi/rust-csv/issues/184
- let line = line + line_offset;
- let row = result.map_err(|err| format_csv_error(err, line)).at(span)?;
- let item = if let Some(headers) = &headers {
- let mut dict = Dict::new();
- for (field, value) in headers.iter().zip(&row) {
- dict.insert(field.into(), value.into_value());
- }
- dict.into_value()
- } else {
- let sub = row.into_iter().map(|field| field.into_value()).collect();
- Value::Array(sub)
- };
- array.push(item);
- }
-
- Ok(array)
+ csv(engine, data.map(Readable::into_source), delimiter, row_type)
}
}
diff --git a/crates/typst-library/src/loading/json.rs b/crates/typst-library/src/loading/json.rs
index 597cf4cc..3128d77d 100644
--- a/crates/typst-library/src/loading/json.rs
+++ b/crates/typst-library/src/loading/json.rs
@@ -1,11 +1,10 @@
-use ecow::{eco_format, EcoString};
+use ecow::eco_format;
use typst_syntax::Spanned;
use crate::diag::{At, SourceResult};
use crate::engine::Engine;
use crate::foundations::{func, scope, Str, Value};
-use crate::loading::Readable;
-use crate::World;
+use crate::loading::{DataSource, Load, Readable};
/// Reads structured data from a JSON file.
///
@@ -53,29 +52,31 @@ use crate::World;
pub fn json(
/// The engine.
engine: &mut Engine,
- /// Path to a JSON file.
+ /// Path to a JSON file or raw JSON bytes.
///
- /// For more details, see the [Paths section]($syntax/#paths).
- path: Spanned<EcoString>,
+ /// For more details about paths, see the [Paths section]($syntax/#paths).
+ source: Spanned<DataSource>,
) -> SourceResult<Value> {
- let Spanned { v: path, span } = path;
- let id = span.resolve_path(&path).at(span)?;
- let data = engine.world.file(id).at(span)?;
- json::decode(Spanned::new(Readable::Bytes(data), span))
+ let data = source.load(engine.world)?;
+ serde_json::from_slice(data.as_slice())
+ .map_err(|err| eco_format!("failed to parse JSON ({err})"))
+ .at(source.span)
}
#[scope]
impl json {
/// Reads structured data from a JSON string/bytes.
+ ///
+ /// This function is deprecated. The [`json`] function now accepts bytes
+ /// directly.
#[func(title = "Decode JSON")]
pub fn decode(
+ /// The engine.
+ engine: &mut Engine,
/// JSON data.
data: Spanned<Readable>,
) -> SourceResult<Value> {
- let Spanned { v: data, span } = data;
- serde_json::from_slice(data.as_slice())
- .map_err(|err| eco_format!("failed to parse JSON ({err})"))
- .at(span)
+ json(engine, data.map(Readable::into_source))
}
/// Encodes structured data into a JSON string.
diff --git a/crates/typst-library/src/loading/mod.rs b/crates/typst-library/src/loading/mod.rs
index 120b3e3a..171ae651 100644
--- a/crates/typst-library/src/loading/mod.rs
+++ b/crates/typst-library/src/loading/mod.rs
@@ -15,6 +15,10 @@ mod xml_;
#[path = "yaml.rs"]
mod yaml_;
+use comemo::Tracked;
+use ecow::EcoString;
+use typst_syntax::Spanned;
+
pub use self::cbor_::*;
pub use self::csv_::*;
pub use self::json_::*;
@@ -23,7 +27,10 @@ pub use self::toml_::*;
pub use self::xml_::*;
pub use self::yaml_::*;
+use crate::diag::{At, SourceResult};
+use crate::foundations::OneOrMultiple;
use crate::foundations::{cast, category, Bytes, Category, Scope, Str};
+use crate::World;
/// Data loading from external files.
///
@@ -44,36 +51,96 @@ pub(super) fn define(global: &mut Scope) {
global.define_func::<xml>();
}
-/// A value that can be read from a file.
+/// Something we can retrieve byte data from.
#[derive(Debug, Clone, PartialEq, Hash)]
-pub enum Readable {
- /// A decoded string.
- Str(Str),
+pub enum DataSource {
+ /// A path to a file.
+ Path(EcoString),
/// Raw bytes.
Bytes(Bytes),
}
-impl Readable {
- pub fn as_slice(&self) -> &[u8] {
- match self {
- Self::Bytes(v) => v,
- Self::Str(v) => v.as_bytes(),
- }
+cast! {
+ DataSource,
+ self => match self {
+ Self::Path(v) => v.into_value(),
+ Self::Bytes(v) => v.into_value(),
+ },
+ v: EcoString => Self::Path(v),
+ v: Bytes => Self::Bytes(v),
+}
+
+/// Loads data from a path or provided bytes.
+pub trait Load {
+ /// Bytes or a list of bytes (if there are multiple sources).
+ type Output;
+
+ /// Load the bytes.
+ fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Self::Output>;
+}
+
+impl Load for Spanned<DataSource> {
+ type Output = Bytes;
+
+ fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Bytes> {
+ self.as_ref().load(world)
}
+}
- pub fn as_str(&self) -> Option<&str> {
- match self {
- Self::Str(v) => Some(v.as_str()),
- Self::Bytes(v) => std::str::from_utf8(v).ok(),
+impl Load for Spanned<&DataSource> {
+ type Output = Bytes;
+
+ fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Bytes> {
+ match &self.v {
+ DataSource::Path(path) => {
+ let file_id = self.span.resolve_path(path).at(self.span)?;
+ world.file(file_id).at(self.span)
+ }
+ DataSource::Bytes(bytes) => Ok(bytes.clone()),
}
}
+}
+
+impl Load for Spanned<OneOrMultiple<DataSource>> {
+ type Output = Vec<Bytes>;
+
+ fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Vec<Bytes>> {
+ self.as_ref().load(world)
+ }
+}
+
+impl Load for Spanned<&OneOrMultiple<DataSource>> {
+ type Output = Vec<Bytes>;
+
+ fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Vec<Bytes>> {
+ self.v
+ .0
+ .iter()
+ .map(|source| Spanned::new(source, self.span).load(world))
+ .collect()
+ }
+}
+
+/// A value that can be read from a file.
+#[derive(Debug, Clone, PartialEq, Hash)]
+pub enum Readable {
+ /// A decoded string.
+ Str(Str),
+ /// Raw bytes.
+ Bytes(Bytes),
+}
+impl Readable {
pub fn into_bytes(self) -> Bytes {
match self {
Self::Bytes(v) => v,
Self::Str(v) => Bytes::from_string(v),
}
}
+
+ pub fn into_source(self) -> DataSource {
+ DataSource::Bytes(self.into_bytes())
+ }
}
cast! {
diff --git a/crates/typst-library/src/loading/read.rs b/crates/typst-library/src/loading/read.rs
index 23e6e27e..bf363f84 100644
--- a/crates/typst-library/src/loading/read.rs
+++ b/crates/typst-library/src/loading/read.rs
@@ -1,7 +1,7 @@
use ecow::EcoString;
use typst_syntax::Spanned;
-use crate::diag::{At, SourceResult};
+use crate::diag::{At, FileError, SourceResult};
use crate::engine::Engine;
use crate::foundations::{func, Cast};
use crate::loading::Readable;
@@ -42,12 +42,9 @@ pub fn read(
let data = engine.world.file(id).at(span)?;
Ok(match encoding {
None => Readable::Bytes(data),
- Some(Encoding::Utf8) => Readable::Str(
- std::str::from_utf8(&data)
- .map_err(|_| "file is not valid utf-8")
- .at(span)?
- .into(),
- ),
+ Some(Encoding::Utf8) => {
+ Readable::Str(data.to_str().map_err(FileError::from).at(span)?)
+ }
})
}
diff --git a/crates/typst-library/src/loading/toml.rs b/crates/typst-library/src/loading/toml.rs
index 5167703e..e3a01cdd 100644
--- a/crates/typst-library/src/loading/toml.rs
+++ b/crates/typst-library/src/loading/toml.rs
@@ -1,11 +1,10 @@
use ecow::{eco_format, EcoString};
use typst_syntax::{is_newline, Spanned};
-use crate::diag::{At, SourceResult};
+use crate::diag::{At, FileError, SourceResult};
use crate::engine::Engine;
use crate::foundations::{func, scope, Str, Value};
-use crate::loading::Readable;
-use crate::World;
+use crate::loading::{DataSource, Load, Readable};
/// Reads structured data from a TOML file.
///
@@ -31,32 +30,32 @@ use crate::World;
pub fn toml(
/// The engine.
engine: &mut Engine,
- /// Path to a TOML file.
+ /// A path to a TOML file or raw TOML bytes.
///
- /// For more details, see the [Paths section]($syntax/#paths).
- path: Spanned<EcoString>,
+ /// For more details about paths, see the [Paths section]($syntax/#paths).
+ source: Spanned<DataSource>,
) -> SourceResult<Value> {
- let Spanned { v: path, span } = path;
- let id = span.resolve_path(&path).at(span)?;
- let data = engine.world.file(id).at(span)?;
- toml::decode(Spanned::new(Readable::Bytes(data), span))
+ let data = source.load(engine.world)?;
+ let raw = data.as_str().map_err(FileError::from).at(source.span)?;
+ ::toml::from_str(raw)
+ .map_err(|err| format_toml_error(err, raw))
+ .at(source.span)
}
#[scope]
impl toml {
/// Reads structured data from a TOML string/bytes.
+ ///
+ /// This function is deprecated. The [`toml`] function now accepts bytes
+ /// directly.
#[func(title = "Decode TOML")]
pub fn decode(
+ /// The engine.
+ engine: &mut Engine,
/// TOML data.
data: Spanned<Readable>,
) -> SourceResult<Value> {
- let Spanned { v: data, span } = data;
- let raw = std::str::from_utf8(data.as_slice())
- .map_err(|_| "file is not valid utf-8")
- .at(span)?;
- ::toml::from_str(raw)
- .map_err(|err| format_toml_error(err, raw))
- .at(span)
+ toml(engine, data.map(Readable::into_source))
}
/// Encodes structured data into a TOML string.
diff --git a/crates/typst-library/src/loading/xml.rs b/crates/typst-library/src/loading/xml.rs
index 3b1a9674..53ec3d93 100644
--- a/crates/typst-library/src/loading/xml.rs
+++ b/crates/typst-library/src/loading/xml.rs
@@ -5,8 +5,7 @@ use typst_syntax::Spanned;
use crate::diag::{format_xml_like_error, At, FileError, SourceResult};
use crate::engine::Engine;
use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value};
-use crate::loading::Readable;
-use crate::World;
+use crate::loading::{DataSource, Load, Readable};
/// Reads structured data from an XML file.
///
@@ -60,36 +59,36 @@ use crate::World;
pub fn xml(
/// The engine.
engine: &mut Engine,
- /// Path to an XML file.
+ /// A path to an XML file or raw XML bytes.
///
- /// For more details, see the [Paths section]($syntax/#paths).
- path: Spanned<EcoString>,
+ /// For more details about paths, see the [Paths section]($syntax/#paths).
+ source: Spanned<DataSource>,
) -> SourceResult<Value> {
- let Spanned { v: path, span } = path;
- let id = span.resolve_path(&path).at(span)?;
- let data = engine.world.file(id).at(span)?;
- xml::decode(Spanned::new(Readable::Bytes(data), span))
+ let data = source.load(engine.world)?;
+ let text = data.as_str().map_err(FileError::from).at(source.span)?;
+ let document = roxmltree::Document::parse_with_options(
+ text,
+ ParsingOptions { allow_dtd: true, ..Default::default() },
+ )
+ .map_err(format_xml_error)
+ .at(source.span)?;
+ Ok(convert_xml(document.root()))
}
#[scope]
impl xml {
/// Reads structured data from an XML string/bytes.
+ ///
+ /// This function is deprecated. The [`xml`] function now accepts bytes
+ /// directly.
#[func(title = "Decode XML")]
pub fn decode(
+ /// The engine.
+ engine: &mut Engine,
/// XML data.
data: Spanned<Readable>,
) -> SourceResult<Value> {
- let Spanned { v: data, span } = data;
- let text = std::str::from_utf8(data.as_slice())
- .map_err(FileError::from)
- .at(span)?;
- let document = roxmltree::Document::parse_with_options(
- text,
- ParsingOptions { allow_dtd: true, ..Default::default() },
- )
- .map_err(format_xml_error)
- .at(span)?;
- Ok(convert_xml(document.root()))
+ xml(engine, data.map(Readable::into_source))
}
}
diff --git a/crates/typst-library/src/loading/yaml.rs b/crates/typst-library/src/loading/yaml.rs
index 0e8ca3fb..2eb26be8 100644
--- a/crates/typst-library/src/loading/yaml.rs
+++ b/crates/typst-library/src/loading/yaml.rs
@@ -1,11 +1,10 @@
-use ecow::{eco_format, EcoString};
+use ecow::eco_format;
use typst_syntax::Spanned;
use crate::diag::{At, SourceResult};
use crate::engine::Engine;
use crate::foundations::{func, scope, Str, Value};
-use crate::loading::Readable;
-use crate::World;
+use crate::loading::{DataSource, Load, Readable};
/// Reads structured data from a YAML file.
///
@@ -43,29 +42,31 @@ use crate::World;
pub fn yaml(
/// The engine.
engine: &mut Engine,
- /// Path to a YAML file.
+ /// A path to a YAML file or raw YAML bytes.
///
- /// For more details, see the [Paths section]($syntax/#paths).
- path: Spanned<EcoString>,
+ /// For more details about paths, see the [Paths section]($syntax/#paths).
+ source: Spanned<DataSource>,
) -> SourceResult<Value> {
- let Spanned { v: path, span } = path;
- let id = span.resolve_path(&path).at(span)?;
- let data = engine.world.file(id).at(span)?;
- yaml::decode(Spanned::new(Readable::Bytes(data), span))
+ let data = source.load(engine.world)?;
+ serde_yaml::from_slice(data.as_slice())
+ .map_err(|err| eco_format!("failed to parse YAML ({err})"))
+ .at(source.span)
}
#[scope]
impl yaml {
/// Reads structured data from a YAML string/bytes.
+ ///
+ /// This function is deprecated. The [`yaml`] function now accepts bytes
+ /// directly.
#[func(title = "Decode YAML")]
pub fn decode(
+ /// The engine.
+ engine: &mut Engine,
/// YAML data.
data: Spanned<Readable>,
) -> SourceResult<Value> {
- let Spanned { v: data, span } = data;
- serde_yaml::from_slice(data.as_slice())
- .map_err(|err| eco_format!("failed to parse YAML ({err})"))
- .at(span)
+ yaml(engine, data.map(Readable::into_source))
}
/// Encode structured data into a YAML string.