diff options
| author | Laurenz <laurmaedje@gmail.com> | 2025-01-09 10:34:16 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-01-09 09:34:16 +0000 |
| commit | e2b37fef33a92a7086790e04fb133472413c0c0a (patch) | |
| tree | a2bdc638482890183414dce18f8f586786154017 /crates/typst-library/src/loading | |
| parent | dacd6acd5e73d35c6e7a7a3b144f16ae70d03daa (diff) | |
Revamp data loading and deprecate `decode` functions (#5671)
Diffstat (limited to 'crates/typst-library/src/loading')
| -rw-r--r-- | crates/typst-library/src/loading/cbor.rs | 30 | ||||
| -rw-r--r-- | crates/typst-library/src/loading/csv.rs | 109 | ||||
| -rw-r--r-- | crates/typst-library/src/loading/json.rs | 29 | ||||
| -rw-r--r-- | crates/typst-library/src/loading/mod.rs | 95 | ||||
| -rw-r--r-- | crates/typst-library/src/loading/read.rs | 11 | ||||
| -rw-r--r-- | crates/typst-library/src/loading/toml.rs | 33 | ||||
| -rw-r--r-- | crates/typst-library/src/loading/xml.rs | 39 | ||||
| -rw-r--r-- | crates/typst-library/src/loading/yaml.rs | 29 |
8 files changed, 221 insertions, 154 deletions
diff --git a/crates/typst-library/src/loading/cbor.rs b/crates/typst-library/src/loading/cbor.rs index a03e5c99..13d55120 100644 --- a/crates/typst-library/src/loading/cbor.rs +++ b/crates/typst-library/src/loading/cbor.rs @@ -1,10 +1,10 @@ -use ecow::{eco_format, EcoString}; +use ecow::eco_format; use typst_syntax::Spanned; use crate::diag::{At, SourceResult}; use crate::engine::Engine; use crate::foundations::{func, scope, Bytes, Value}; -use crate::World; +use crate::loading::{DataSource, Load}; /// Reads structured data from a CBOR file. /// @@ -21,29 +21,31 @@ use crate::World; pub fn cbor( /// The engine. engine: &mut Engine, - /// Path to a CBOR file. + /// A path to a CBOR file or raw CBOR bytes. /// - /// For more details, see the [Paths section]($syntax/#paths). - path: Spanned<EcoString>, + /// For more details about paths, see the [Paths section]($syntax/#paths). + source: Spanned<DataSource>, ) -> SourceResult<Value> { - let Spanned { v: path, span } = path; - let id = span.resolve_path(&path).at(span)?; - let data = engine.world.file(id).at(span)?; - cbor::decode(Spanned::new(data, span)) + let data = source.load(engine.world)?; + ciborium::from_reader(data.as_slice()) + .map_err(|err| eco_format!("failed to parse CBOR ({err})")) + .at(source.span) } #[scope] impl cbor { /// Reads structured data from CBOR bytes. + /// + /// This function is deprecated. The [`cbor`] function now accepts bytes + /// directly. #[func(title = "Decode CBOR")] pub fn decode( - /// cbor data. + /// The engine. + engine: &mut Engine, + /// CBOR data. data: Spanned<Bytes>, ) -> SourceResult<Value> { - let Spanned { v: data, span } = data; - ciborium::from_reader(data.as_slice()) - .map_err(|err| eco_format!("failed to parse CBOR ({err})")) - .at(span) + cbor(engine, data.map(DataSource::Bytes)) } /// Encode structured data into CBOR bytes. diff --git a/crates/typst-library/src/loading/csv.rs b/crates/typst-library/src/loading/csv.rs index 6822505d..8171c483 100644 --- a/crates/typst-library/src/loading/csv.rs +++ b/crates/typst-library/src/loading/csv.rs @@ -4,8 +4,7 @@ use typst_syntax::Spanned; use crate::diag::{bail, At, SourceResult}; use crate::engine::Engine; use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value}; -use crate::loading::Readable; -use crate::World; +use crate::loading::{DataSource, Load, Readable}; /// Reads structured data from a CSV file. /// @@ -28,10 +27,10 @@ use crate::World; pub fn csv( /// The engine. engine: &mut Engine, - /// Path to a CSV file. + /// Path to a CSV file or raw CSV bytes. /// - /// For more details, see the [Paths section]($syntax/#paths). - path: Spanned<EcoString>, + /// For more details about paths, see the [Paths section]($syntax/#paths). + source: Spanned<DataSource>, /// The delimiter that separates columns in the CSV file. /// Must be a single ASCII character. #[named] @@ -48,17 +47,63 @@ pub fn csv( #[default(RowType::Array)] row_type: RowType, ) -> SourceResult<Array> { - let Spanned { v: path, span } = path; - let id = span.resolve_path(&path).at(span)?; - let data = engine.world.file(id).at(span)?; - self::csv::decode(Spanned::new(Readable::Bytes(data), span), delimiter, row_type) + let data = source.load(engine.world)?; + + let mut builder = ::csv::ReaderBuilder::new(); + let has_headers = row_type == RowType::Dict; + builder.has_headers(has_headers); + builder.delimiter(delimiter.0 as u8); + + // Counting lines from 1 by default. + let mut line_offset: usize = 1; + let mut reader = builder.from_reader(data.as_slice()); + let mut headers: Option<::csv::StringRecord> = None; + + if has_headers { + // Counting lines from 2 because we have a header. + line_offset += 1; + headers = Some( + reader + .headers() + .map_err(|err| format_csv_error(err, 1)) + .at(source.span)? + .clone(), + ); + } + + let mut array = Array::new(); + for (line, result) in reader.records().enumerate() { + // Original solution was to use line from error, but that is + // incorrect with `has_headers` set to `false`. See issue: + // https://github.com/BurntSushi/rust-csv/issues/184 + let line = line + line_offset; + let row = result.map_err(|err| format_csv_error(err, line)).at(source.span)?; + let item = if let Some(headers) = &headers { + let mut dict = Dict::new(); + for (field, value) in headers.iter().zip(&row) { + dict.insert(field.into(), value.into_value()); + } + dict.into_value() + } else { + let sub = row.into_iter().map(|field| field.into_value()).collect(); + Value::Array(sub) + }; + array.push(item); + } + + Ok(array) } #[scope] impl csv { /// Reads structured data from a CSV string/bytes. + /// + /// This function is deprecated. The [`csv`] function now accepts bytes + /// directly. #[func(title = "Decode CSV")] pub fn decode( + /// The engine. + engine: &mut Engine, /// CSV data. data: Spanned<Readable>, /// The delimiter that separates columns in the CSV file. @@ -77,51 +122,7 @@ impl csv { #[default(RowType::Array)] row_type: RowType, ) -> SourceResult<Array> { - let Spanned { v: data, span } = data; - let has_headers = row_type == RowType::Dict; - - let mut builder = ::csv::ReaderBuilder::new(); - builder.has_headers(has_headers); - builder.delimiter(delimiter.0 as u8); - - // Counting lines from 1 by default. - let mut line_offset: usize = 1; - let mut reader = builder.from_reader(data.as_slice()); - let mut headers: Option<::csv::StringRecord> = None; - - if has_headers { - // Counting lines from 2 because we have a header. - line_offset += 1; - headers = Some( - reader - .headers() - .map_err(|err| format_csv_error(err, 1)) - .at(span)? - .clone(), - ); - } - - let mut array = Array::new(); - for (line, result) in reader.records().enumerate() { - // Original solution was to use line from error, but that is - // incorrect with `has_headers` set to `false`. See issue: - // https://github.com/BurntSushi/rust-csv/issues/184 - let line = line + line_offset; - let row = result.map_err(|err| format_csv_error(err, line)).at(span)?; - let item = if let Some(headers) = &headers { - let mut dict = Dict::new(); - for (field, value) in headers.iter().zip(&row) { - dict.insert(field.into(), value.into_value()); - } - dict.into_value() - } else { - let sub = row.into_iter().map(|field| field.into_value()).collect(); - Value::Array(sub) - }; - array.push(item); - } - - Ok(array) + csv(engine, data.map(Readable::into_source), delimiter, row_type) } } diff --git a/crates/typst-library/src/loading/json.rs b/crates/typst-library/src/loading/json.rs index 597cf4cc..3128d77d 100644 --- a/crates/typst-library/src/loading/json.rs +++ b/crates/typst-library/src/loading/json.rs @@ -1,11 +1,10 @@ -use ecow::{eco_format, EcoString}; +use ecow::eco_format; use typst_syntax::Spanned; use crate::diag::{At, SourceResult}; use crate::engine::Engine; use crate::foundations::{func, scope, Str, Value}; -use crate::loading::Readable; -use crate::World; +use crate::loading::{DataSource, Load, Readable}; /// Reads structured data from a JSON file. /// @@ -53,29 +52,31 @@ use crate::World; pub fn json( /// The engine. engine: &mut Engine, - /// Path to a JSON file. + /// Path to a JSON file or raw JSON bytes. /// - /// For more details, see the [Paths section]($syntax/#paths). - path: Spanned<EcoString>, + /// For more details about paths, see the [Paths section]($syntax/#paths). + source: Spanned<DataSource>, ) -> SourceResult<Value> { - let Spanned { v: path, span } = path; - let id = span.resolve_path(&path).at(span)?; - let data = engine.world.file(id).at(span)?; - json::decode(Spanned::new(Readable::Bytes(data), span)) + let data = source.load(engine.world)?; + serde_json::from_slice(data.as_slice()) + .map_err(|err| eco_format!("failed to parse JSON ({err})")) + .at(source.span) } #[scope] impl json { /// Reads structured data from a JSON string/bytes. + /// + /// This function is deprecated. The [`json`] function now accepts bytes + /// directly. #[func(title = "Decode JSON")] pub fn decode( + /// The engine. + engine: &mut Engine, /// JSON data. data: Spanned<Readable>, ) -> SourceResult<Value> { - let Spanned { v: data, span } = data; - serde_json::from_slice(data.as_slice()) - .map_err(|err| eco_format!("failed to parse JSON ({err})")) - .at(span) + json(engine, data.map(Readable::into_source)) } /// Encodes structured data into a JSON string. diff --git a/crates/typst-library/src/loading/mod.rs b/crates/typst-library/src/loading/mod.rs index 120b3e3a..171ae651 100644 --- a/crates/typst-library/src/loading/mod.rs +++ b/crates/typst-library/src/loading/mod.rs @@ -15,6 +15,10 @@ mod xml_; #[path = "yaml.rs"] mod yaml_; +use comemo::Tracked; +use ecow::EcoString; +use typst_syntax::Spanned; + pub use self::cbor_::*; pub use self::csv_::*; pub use self::json_::*; @@ -23,7 +27,10 @@ pub use self::toml_::*; pub use self::xml_::*; pub use self::yaml_::*; +use crate::diag::{At, SourceResult}; +use crate::foundations::OneOrMultiple; use crate::foundations::{cast, category, Bytes, Category, Scope, Str}; +use crate::World; /// Data loading from external files. /// @@ -44,36 +51,96 @@ pub(super) fn define(global: &mut Scope) { global.define_func::<xml>(); } -/// A value that can be read from a file. +/// Something we can retrieve byte data from. #[derive(Debug, Clone, PartialEq, Hash)] -pub enum Readable { - /// A decoded string. - Str(Str), +pub enum DataSource { + /// A path to a file. + Path(EcoString), /// Raw bytes. Bytes(Bytes), } -impl Readable { - pub fn as_slice(&self) -> &[u8] { - match self { - Self::Bytes(v) => v, - Self::Str(v) => v.as_bytes(), - } +cast! { + DataSource, + self => match self { + Self::Path(v) => v.into_value(), + Self::Bytes(v) => v.into_value(), + }, + v: EcoString => Self::Path(v), + v: Bytes => Self::Bytes(v), +} + +/// Loads data from a path or provided bytes. +pub trait Load { + /// Bytes or a list of bytes (if there are multiple sources). + type Output; + + /// Load the bytes. + fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Self::Output>; +} + +impl Load for Spanned<DataSource> { + type Output = Bytes; + + fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Bytes> { + self.as_ref().load(world) } +} - pub fn as_str(&self) -> Option<&str> { - match self { - Self::Str(v) => Some(v.as_str()), - Self::Bytes(v) => std::str::from_utf8(v).ok(), +impl Load for Spanned<&DataSource> { + type Output = Bytes; + + fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Bytes> { + match &self.v { + DataSource::Path(path) => { + let file_id = self.span.resolve_path(path).at(self.span)?; + world.file(file_id).at(self.span) + } + DataSource::Bytes(bytes) => Ok(bytes.clone()), } } +} + +impl Load for Spanned<OneOrMultiple<DataSource>> { + type Output = Vec<Bytes>; + + fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Vec<Bytes>> { + self.as_ref().load(world) + } +} + +impl Load for Spanned<&OneOrMultiple<DataSource>> { + type Output = Vec<Bytes>; + + fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Vec<Bytes>> { + self.v + .0 + .iter() + .map(|source| Spanned::new(source, self.span).load(world)) + .collect() + } +} + +/// A value that can be read from a file. +#[derive(Debug, Clone, PartialEq, Hash)] +pub enum Readable { + /// A decoded string. + Str(Str), + /// Raw bytes. + Bytes(Bytes), +} +impl Readable { pub fn into_bytes(self) -> Bytes { match self { Self::Bytes(v) => v, Self::Str(v) => Bytes::from_string(v), } } + + pub fn into_source(self) -> DataSource { + DataSource::Bytes(self.into_bytes()) + } } cast! { diff --git a/crates/typst-library/src/loading/read.rs b/crates/typst-library/src/loading/read.rs index 23e6e27e..bf363f84 100644 --- a/crates/typst-library/src/loading/read.rs +++ b/crates/typst-library/src/loading/read.rs @@ -1,7 +1,7 @@ use ecow::EcoString; use typst_syntax::Spanned; -use crate::diag::{At, SourceResult}; +use crate::diag::{At, FileError, SourceResult}; use crate::engine::Engine; use crate::foundations::{func, Cast}; use crate::loading::Readable; @@ -42,12 +42,9 @@ pub fn read( let data = engine.world.file(id).at(span)?; Ok(match encoding { None => Readable::Bytes(data), - Some(Encoding::Utf8) => Readable::Str( - std::str::from_utf8(&data) - .map_err(|_| "file is not valid utf-8") - .at(span)? - .into(), - ), + Some(Encoding::Utf8) => { + Readable::Str(data.to_str().map_err(FileError::from).at(span)?) + } }) } diff --git a/crates/typst-library/src/loading/toml.rs b/crates/typst-library/src/loading/toml.rs index 5167703e..e3a01cdd 100644 --- a/crates/typst-library/src/loading/toml.rs +++ b/crates/typst-library/src/loading/toml.rs @@ -1,11 +1,10 @@ use ecow::{eco_format, EcoString}; use typst_syntax::{is_newline, Spanned}; -use crate::diag::{At, SourceResult}; +use crate::diag::{At, FileError, SourceResult}; use crate::engine::Engine; use crate::foundations::{func, scope, Str, Value}; -use crate::loading::Readable; -use crate::World; +use crate::loading::{DataSource, Load, Readable}; /// Reads structured data from a TOML file. /// @@ -31,32 +30,32 @@ use crate::World; pub fn toml( /// The engine. engine: &mut Engine, - /// Path to a TOML file. + /// A path to a TOML file or raw TOML bytes. /// - /// For more details, see the [Paths section]($syntax/#paths). - path: Spanned<EcoString>, + /// For more details about paths, see the [Paths section]($syntax/#paths). + source: Spanned<DataSource>, ) -> SourceResult<Value> { - let Spanned { v: path, span } = path; - let id = span.resolve_path(&path).at(span)?; - let data = engine.world.file(id).at(span)?; - toml::decode(Spanned::new(Readable::Bytes(data), span)) + let data = source.load(engine.world)?; + let raw = data.as_str().map_err(FileError::from).at(source.span)?; + ::toml::from_str(raw) + .map_err(|err| format_toml_error(err, raw)) + .at(source.span) } #[scope] impl toml { /// Reads structured data from a TOML string/bytes. + /// + /// This function is deprecated. The [`toml`] function now accepts bytes + /// directly. #[func(title = "Decode TOML")] pub fn decode( + /// The engine. + engine: &mut Engine, /// TOML data. data: Spanned<Readable>, ) -> SourceResult<Value> { - let Spanned { v: data, span } = data; - let raw = std::str::from_utf8(data.as_slice()) - .map_err(|_| "file is not valid utf-8") - .at(span)?; - ::toml::from_str(raw) - .map_err(|err| format_toml_error(err, raw)) - .at(span) + toml(engine, data.map(Readable::into_source)) } /// Encodes structured data into a TOML string. diff --git a/crates/typst-library/src/loading/xml.rs b/crates/typst-library/src/loading/xml.rs index 3b1a9674..53ec3d93 100644 --- a/crates/typst-library/src/loading/xml.rs +++ b/crates/typst-library/src/loading/xml.rs @@ -5,8 +5,7 @@ use typst_syntax::Spanned; use crate::diag::{format_xml_like_error, At, FileError, SourceResult}; use crate::engine::Engine; use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value}; -use crate::loading::Readable; -use crate::World; +use crate::loading::{DataSource, Load, Readable}; /// Reads structured data from an XML file. /// @@ -60,36 +59,36 @@ use crate::World; pub fn xml( /// The engine. engine: &mut Engine, - /// Path to an XML file. + /// A path to an XML file or raw XML bytes. /// - /// For more details, see the [Paths section]($syntax/#paths). - path: Spanned<EcoString>, + /// For more details about paths, see the [Paths section]($syntax/#paths). + source: Spanned<DataSource>, ) -> SourceResult<Value> { - let Spanned { v: path, span } = path; - let id = span.resolve_path(&path).at(span)?; - let data = engine.world.file(id).at(span)?; - xml::decode(Spanned::new(Readable::Bytes(data), span)) + let data = source.load(engine.world)?; + let text = data.as_str().map_err(FileError::from).at(source.span)?; + let document = roxmltree::Document::parse_with_options( + text, + ParsingOptions { allow_dtd: true, ..Default::default() }, + ) + .map_err(format_xml_error) + .at(source.span)?; + Ok(convert_xml(document.root())) } #[scope] impl xml { /// Reads structured data from an XML string/bytes. + /// + /// This function is deprecated. The [`xml`] function now accepts bytes + /// directly. #[func(title = "Decode XML")] pub fn decode( + /// The engine. + engine: &mut Engine, /// XML data. data: Spanned<Readable>, ) -> SourceResult<Value> { - let Spanned { v: data, span } = data; - let text = std::str::from_utf8(data.as_slice()) - .map_err(FileError::from) - .at(span)?; - let document = roxmltree::Document::parse_with_options( - text, - ParsingOptions { allow_dtd: true, ..Default::default() }, - ) - .map_err(format_xml_error) - .at(span)?; - Ok(convert_xml(document.root())) + xml(engine, data.map(Readable::into_source)) } } diff --git a/crates/typst-library/src/loading/yaml.rs b/crates/typst-library/src/loading/yaml.rs index 0e8ca3fb..2eb26be8 100644 --- a/crates/typst-library/src/loading/yaml.rs +++ b/crates/typst-library/src/loading/yaml.rs @@ -1,11 +1,10 @@ -use ecow::{eco_format, EcoString}; +use ecow::eco_format; use typst_syntax::Spanned; use crate::diag::{At, SourceResult}; use crate::engine::Engine; use crate::foundations::{func, scope, Str, Value}; -use crate::loading::Readable; -use crate::World; +use crate::loading::{DataSource, Load, Readable}; /// Reads structured data from a YAML file. /// @@ -43,29 +42,31 @@ use crate::World; pub fn yaml( /// The engine. engine: &mut Engine, - /// Path to a YAML file. + /// A path to a YAML file or raw YAML bytes. /// - /// For more details, see the [Paths section]($syntax/#paths). - path: Spanned<EcoString>, + /// For more details about paths, see the [Paths section]($syntax/#paths). + source: Spanned<DataSource>, ) -> SourceResult<Value> { - let Spanned { v: path, span } = path; - let id = span.resolve_path(&path).at(span)?; - let data = engine.world.file(id).at(span)?; - yaml::decode(Spanned::new(Readable::Bytes(data), span)) + let data = source.load(engine.world)?; + serde_yaml::from_slice(data.as_slice()) + .map_err(|err| eco_format!("failed to parse YAML ({err})")) + .at(source.span) } #[scope] impl yaml { /// Reads structured data from a YAML string/bytes. + /// + /// This function is deprecated. The [`yaml`] function now accepts bytes + /// directly. #[func(title = "Decode YAML")] pub fn decode( + /// The engine. + engine: &mut Engine, /// YAML data. data: Spanned<Readable>, ) -> SourceResult<Value> { - let Spanned { v: data, span } = data; - serde_yaml::from_slice(data.as_slice()) - .map_err(|err| eco_format!("failed to parse YAML ({err})")) - .at(span) + yaml(engine, data.map(Readable::into_source)) } /// Encode structured data into a YAML string. |
