diff options
| author | Laurenz <laurmaedje@gmail.com> | 2023-07-02 19:59:52 +0200 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2023-07-02 20:07:43 +0200 |
| commit | ebfdb1dafa430786db10dad2ef7d5467c1bdbed1 (patch) | |
| tree | 2bbc24ddb4124c4bb14dec0e536129d4de37b056 /crates/typst-library/src/compute/data.rs | |
| parent | 3ab19185093d7709f824b95b979060ce125389d8 (diff) | |
Move everything into `crates/` directory
Diffstat (limited to 'crates/typst-library/src/compute/data.rs')
| -rw-r--r-- | crates/typst-library/src/compute/data.rs | 492 |
1 files changed, 492 insertions, 0 deletions
diff --git a/crates/typst-library/src/compute/data.rs b/crates/typst-library/src/compute/data.rs new file mode 100644 index 00000000..6e3a298e --- /dev/null +++ b/crates/typst-library/src/compute/data.rs @@ -0,0 +1,492 @@ +use typst::diag::{format_xml_like_error, FileError}; +use typst::eval::Datetime; + +use crate::prelude::*; + +/// Reads plain text from a file. +/// +/// The file will be read and returned as a string. +/// +/// ## Example { #example } +/// ```example +/// #let text = read("data.html") +/// +/// An example for a HTML file:\ +/// #raw(text, lang: "html") +/// ``` +/// +/// Display: Read +/// Category: data-loading +#[func] +pub fn read( + /// Path to a file. + path: Spanned<EcoString>, + /// The virtual machine. + vm: &mut Vm, +) -> SourceResult<Str> { + let Spanned { v: path, span } = path; + let id = vm.location().join(&path).at(span)?; + let data = vm.world().file(id).at(span)?; + let text = std::str::from_utf8(&data) + .map_err(|_| "file is not valid utf-8") + .at(span)?; + Ok(text.into()) +} + +/// Reads structured data from a CSV file. +/// +/// The CSV file will be read and parsed into a 2-dimensional array of strings: +/// Each row in the CSV file will be represented as an array of strings, and all +/// rows will be collected into a single array. Header rows will not be +/// stripped. +/// +/// ## Example { #example } +/// ```example +/// #let results = csv("data.csv") +/// +/// #table( +/// columns: 2, +/// [*Condition*], [*Result*], +/// ..results.flatten(), +/// ) +/// ``` +/// +/// Display: CSV +/// Category: data-loading +#[func] +pub fn csv( + /// Path to a CSV file. + path: Spanned<EcoString>, + /// The delimiter that separates columns in the CSV file. + /// Must be a single ASCII character. + #[named] + #[default] + delimiter: Delimiter, + /// The virtual machine. + vm: &mut Vm, +) -> SourceResult<Array> { + let Spanned { v: path, span } = path; + let id = vm.location().join(&path).at(span)?; + let data = vm.world().file(id).at(span)?; + + let mut builder = csv::ReaderBuilder::new(); + builder.has_headers(false); + builder.delimiter(delimiter.0 as u8); + + let mut reader = builder.from_reader(data.as_slice()); + let mut array = Array::new(); + + for (line, result) in reader.records().enumerate() { + // Original solution use line from error, but that is incorrect with + // `has_headers` set to `false`. See issue: + // https://github.com/BurntSushi/rust-csv/issues/184 + let line = line + 1; // Counting lines from 1 + let row = result.map_err(|err| format_csv_error(err, line)).at(span)?; + let sub = row.into_iter().map(|field| field.into_value()).collect(); + array.push(Value::Array(sub)) + } + + Ok(array) +} + +/// The delimiter to use when parsing CSV files. +pub struct Delimiter(char); + +impl Default for Delimiter { + fn default() -> Self { + Self(',') + } +} + +cast! { + Delimiter, + self => self.0.into_value(), + v: EcoString => { + let mut chars = v.chars(); + let first = chars.next().ok_or("delimiter must not be empty")?; + if chars.next().is_some() { + bail!("delimiter must be a single character"); + } + + if !first.is_ascii() { + bail!("delimiter must be an ASCII character"); + } + + Self(first) + }, +} + +/// Format the user-facing CSV error message. +fn format_csv_error(error: csv::Error, line: usize) -> EcoString { + match error.kind() { + csv::ErrorKind::Utf8 { .. } => "file is not valid utf-8".into(), + csv::ErrorKind::UnequalLengths { expected_len, len, .. } => { + eco_format!( + "failed to parse csv file: found {len} instead of {expected_len} fields in line {line}" + ) + } + _ => "failed to parse csv file".into(), + } +} + +/// Reads structured data from a JSON file. +/// +/// The file must contain a valid JSON object or array. JSON objects will be +/// converted into Typst dictionaries, and JSON arrays will be converted into +/// Typst arrays. Strings and booleans will be converted into the Typst +/// equivalents, `null` will be converted into `{none}`, and numbers will be +/// converted to floats or integers depending on whether they are whole numbers. +/// +/// The function returns a dictionary or an array, depending on the JSON file. +/// +/// The JSON files in the example contain objects with the keys `temperature`, +/// `unit`, and `weather`. +/// +/// ## Example { #example } +/// ```example +/// #let forecast(day) = block[ +/// #box(square( +/// width: 2cm, +/// inset: 8pt, +/// fill: if day.weather == "sunny" { +/// yellow +/// } else { +/// aqua +/// }, +/// align( +/// bottom + right, +/// strong(day.weather), +/// ), +/// )) +/// #h(6pt) +/// #set text(22pt, baseline: -8pt) +/// #day.temperature °#day.unit +/// ] +/// +/// #forecast(json("monday.json")) +/// #forecast(json("tuesday.json")) +/// ``` +/// +/// Display: JSON +/// Category: data-loading +#[func] +pub fn json( + /// Path to a JSON file. + path: Spanned<EcoString>, + /// The virtual machine. + vm: &mut Vm, +) -> SourceResult<Value> { + let Spanned { v: path, span } = path; + let id = vm.location().join(&path).at(span)?; + let data = vm.world().file(id).at(span)?; + let value: serde_json::Value = + serde_json::from_slice(&data).map_err(format_json_error).at(span)?; + Ok(convert_json(value)) +} + +/// Convert a JSON value to a Typst value. +fn convert_json(value: serde_json::Value) -> Value { + match value { + serde_json::Value::Null => Value::None, + serde_json::Value::Bool(v) => v.into_value(), + serde_json::Value::Number(v) => match v.as_i64() { + Some(int) => int.into_value(), + None => v.as_f64().unwrap_or(f64::NAN).into_value(), + }, + serde_json::Value::String(v) => v.into_value(), + serde_json::Value::Array(v) => { + v.into_iter().map(convert_json).collect::<Array>().into_value() + } + serde_json::Value::Object(v) => v + .into_iter() + .map(|(key, value)| (key.into(), convert_json(value))) + .collect::<Dict>() + .into_value(), + } +} + +/// Format the user-facing JSON error message. +fn format_json_error(error: serde_json::Error) -> EcoString { + assert!(error.is_syntax() || error.is_eof()); + eco_format!("failed to parse json file: syntax error in line {}", error.line()) +} + +/// Reads structured data from a TOML file. +/// +/// The file must contain a valid TOML table. TOML tables will be +/// converted into Typst dictionaries, and TOML arrays will be converted into +/// Typst arrays. Strings and booleans will be converted into the Typst +/// equivalents and numbers will be converted to floats or integers depending on +/// whether they are whole numbers. For the time being, datetimes will be +/// converted to strings as Typst does not have a built-in datetime yet. +/// +/// The TOML file in the example consists of a table with the keys `title`, +/// `version`, and `authors`. +/// +/// ## Example { #example } +/// ```example +/// #let details = toml("details.toml") +/// +/// Title: #details.title \ +/// Version: #details.version \ +/// Authors: #(details.authors +/// .join(", ", last: " and ")) +/// ``` +/// +/// Display: TOML +/// Category: data-loading +#[func] +pub fn toml( + /// Path to a TOML file. + path: Spanned<EcoString>, + /// The virtual machine. + vm: &mut Vm, +) -> SourceResult<Value> { + let Spanned { v: path, span } = path; + let id = vm.location().join(&path).at(span)?; + let data = vm.world().file(id).at(span)?; + + let raw = std::str::from_utf8(&data) + .map_err(|_| "file is not valid utf-8") + .at(span)?; + + let value: toml::Value = toml::from_str(raw).map_err(format_toml_error).at(span)?; + Ok(convert_toml(value)) +} + +/// Convert a TOML value to a Typst value. +fn convert_toml(value: toml::Value) -> Value { + match value { + toml::Value::String(v) => v.into_value(), + toml::Value::Integer(v) => v.into_value(), + toml::Value::Float(v) => v.into_value(), + toml::Value::Boolean(v) => v.into_value(), + toml::Value::Array(v) => { + v.into_iter().map(convert_toml).collect::<Array>().into_value() + } + toml::Value::Table(v) => v + .into_iter() + .map(|(key, value)| (key.into(), convert_toml(value))) + .collect::<Dict>() + .into_value(), + toml::Value::Datetime(v) => match (v.date, v.time) { + (None, None) => Value::None, + (Some(date), None) => { + Datetime::from_ymd(date.year as i32, date.month, date.day).into_value() + } + (None, Some(time)) => { + Datetime::from_hms(time.hour, time.minute, time.second).into_value() + } + (Some(date), Some(time)) => Datetime::from_ymd_hms( + date.year as i32, + date.month, + date.day, + time.hour, + time.minute, + time.second, + ) + .into_value(), + }, + } +} + +/// Format the user-facing TOML error message. +fn format_toml_error(error: toml::de::Error) -> EcoString { + if let Some(range) = error.span() { + eco_format!( + "failed to parse toml file: {}, index {}-{}", + error.message(), + range.start, + range.end + ) + } else { + eco_format!("failed to parse toml file: {}", error.message()) + } +} + +/// Reads structured data from a YAML file. +/// +/// The file must contain a valid YAML object or array. YAML mappings will be +/// converted into Typst dictionaries, and YAML sequences will be converted into +/// Typst arrays. Strings and booleans will be converted into the Typst +/// equivalents, null-values (`null`, `~` or empty ``) will be converted into +/// `{none}`, and numbers will be converted to floats or integers depending on +/// whether they are whole numbers. +/// +/// Note that mapping keys that are not a string cause the entry to be +/// discarded. +/// +/// Custom YAML tags are ignored, though the loaded value will still be +/// present. +/// +/// The function returns a dictionary or value or an array, depending on +/// the YAML file. +/// +/// The YAML files in the example contain objects with authors as keys, +/// each with a sequence of their own submapping with the keys +/// "title" and "published" +/// +/// ## Example { #example } +/// ```example +/// #let bookshelf(contents) = { +/// for (author, works) in contents { +/// author +/// for work in works [ +/// - #work.title (#work.published) +/// ] +/// } +/// } +/// +/// #bookshelf( +/// yaml("scifi-authors.yaml") +/// ) +/// ``` +/// +/// Display: YAML +/// Category: data-loading +#[func] +pub fn yaml( + /// Path to a YAML file. + path: Spanned<EcoString>, + /// The virtual machine. + vm: &mut Vm, +) -> SourceResult<Value> { + let Spanned { v: path, span } = path; + let id = vm.location().join(&path).at(span)?; + let data = vm.world().file(id).at(span)?; + let value: serde_yaml::Value = + serde_yaml::from_slice(&data).map_err(format_yaml_error).at(span)?; + Ok(convert_yaml(value)) +} + +/// Convert a YAML value to a Typst value. +fn convert_yaml(value: serde_yaml::Value) -> Value { + match value { + serde_yaml::Value::Null => Value::None, + serde_yaml::Value::Bool(v) => v.into_value(), + serde_yaml::Value::Number(v) => match v.as_i64() { + Some(int) => int.into_value(), + None => v.as_f64().unwrap_or(f64::NAN).into_value(), + }, + serde_yaml::Value::String(v) => v.into_value(), + serde_yaml::Value::Sequence(v) => { + v.into_iter().map(convert_yaml).collect::<Array>().into_value() + } + serde_yaml::Value::Mapping(v) => v + .into_iter() + .map(|(key, value)| (convert_yaml_key(key), convert_yaml(value))) + .filter_map(|(key, value)| key.map(|key| (key, value))) + .collect::<Dict>() + .into_value(), + } +} + +/// Converts an arbitrary YAML mapping key into a Typst Dict Key. +/// Currently it only does so for strings, everything else +/// returns None +fn convert_yaml_key(key: serde_yaml::Value) -> Option<Str> { + match key { + serde_yaml::Value::String(v) => Some(Str::from(v)), + _ => None, + } +} + +/// Format the user-facing YAML error message. +fn format_yaml_error(error: serde_yaml::Error) -> EcoString { + eco_format!("failed to parse yaml file: {}", error.to_string().trim()) +} + +/// Reads structured data from an XML file. +/// +/// The XML file is parsed into an array of dictionaries and strings. XML nodes +/// can be elements or strings. Elements are represented as dictionaries with +/// the the following keys: +/// +/// - `tag`: The name of the element as a string. +/// - `attrs`: A dictionary of the element's attributes as strings. +/// - `children`: An array of the element's child nodes. +/// +/// The XML file in the example contains a root `news` tag with multiple +/// `article` tags. Each article has a `title`, `author`, and `content` tag. The +/// `content` tag contains one or more paragraphs, which are represented as `p` +/// tags. +/// +/// ## Example { #example } +/// ```example +/// #let find-child(elem, tag) = { +/// elem.children +/// .find(e => "tag" in e and e.tag == tag) +/// } +/// +/// #let article(elem) = { +/// let title = find-child(elem, "title") +/// let author = find-child(elem, "author") +/// let pars = find-child(elem, "content") +/// +/// heading(title.children.first()) +/// text(10pt, weight: "medium")[ +/// Published by +/// #author.children.first() +/// ] +/// +/// for p in pars.children { +/// if (type(p) == "dictionary") { +/// parbreak() +/// p.children.first() +/// } +/// } +/// } +/// +/// #let data = xml("example.xml") +/// #for elem in data.first().children { +/// if (type(elem) == "dictionary") { +/// article(elem) +/// } +/// } +/// ``` +/// +/// Display: XML +/// Category: data-loading +#[func] +pub fn xml( + /// Path to an XML file. + path: Spanned<EcoString>, + /// The virtual machine. + vm: &mut Vm, +) -> SourceResult<Value> { + let Spanned { v: path, span } = path; + let id = vm.location().join(&path).at(span)?; + let data = vm.world().file(id).at(span)?; + let text = std::str::from_utf8(&data).map_err(FileError::from).at(span)?; + let document = roxmltree::Document::parse(text).map_err(format_xml_error).at(span)?; + Ok(convert_xml(document.root())) +} + +/// Convert an XML node to a Typst value. +fn convert_xml(node: roxmltree::Node) -> Value { + if node.is_text() { + return node.text().unwrap_or_default().into_value(); + } + + let children: Array = node.children().map(convert_xml).collect(); + if node.is_root() { + return Value::Array(children); + } + + let tag: Str = node.tag_name().name().into(); + let attrs: Dict = node + .attributes() + .map(|attr| (attr.name().into(), attr.value().into_value())) + .collect(); + + Value::Dict(dict! { + "tag" => tag, + "attrs" => attrs, + "children" => children, + }) +} + +/// Format the user-facing XML error message. +fn format_xml_error(error: roxmltree::Error) -> EcoString { + format_xml_like_error("xml file", error) +} |
