summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiklas Ausborn <niklas@ausborn.de>2023-12-18 17:14:21 +0100
committerGitHub <noreply@github.com>2023-12-18 17:14:21 +0100
commit1f983ced90f1660958eb9ec9854dbbc2b3aa3509 (patch)
tree14f7660aa4d04c6f4decc32903fa9c78e7f31a08
parente8e797c18b43559d3dad23452dc48b60d8097729 (diff)
Implement CSV file header rows support. (#2619)
Co-authored-by: Laurenz <laurmaedje@gmail.com>
-rw-r--r--crates/typst/src/loading/csv.rs89
-rw-r--r--tests/typ/compute/data.typ13
2 files changed, 93 insertions, 9 deletions
diff --git a/crates/typst/src/loading/csv.rs b/crates/typst/src/loading/csv.rs
index 0d009560..101b3812 100644
--- a/crates/typst/src/loading/csv.rs
+++ b/crates/typst/src/loading/csv.rs
@@ -2,7 +2,7 @@ use ecow::{eco_format, EcoString};
use crate::diag::{bail, At, SourceResult};
use crate::engine::Engine;
-use crate::foundations::{cast, func, scope, Array, IntoValue, Value};
+use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value};
use crate::loading::Readable;
use crate::syntax::Spanned;
use crate::World;
@@ -35,11 +35,21 @@ pub fn csv(
#[named]
#[default]
delimiter: Delimiter,
+ /// How to represent the file's rows.
+ ///
+ /// - If set to `array`, each row is represented as a plain array of
+ /// strings.
+ /// - If set to `dictionary`, each row is represented as a dictionary
+ /// mapping from header keys to strings. This option only makes sense when
+ /// a header row is present in the CSV file.
+ #[named]
+ #[default(RowType::Array)]
+ row_type: RowType,
) -> SourceResult<Array> {
let Spanned { v: path, span } = path;
let id = span.resolve_path(&path).at(span)?;
let data = engine.world.file(id).at(span)?;
- self::csv::decode(Spanned::new(Readable::Bytes(data), span), delimiter)
+ self::csv::decode(Spanned::new(Readable::Bytes(data), span), delimiter, row_type)
}
#[scope]
@@ -54,22 +64,59 @@ impl csv {
#[named]
#[default]
delimiter: Delimiter,
+ /// How to represent the file's rows.
+ ///
+ /// - If set to `array`, each row is represented as a plain array of
+ /// strings.
+ /// - If set to `dictionary`, each row is represented as a dictionary
+ /// mapping from header keys to strings. This option only makes sense
+ /// when a header row is present in the CSV file.
+ #[named]
+ #[default(RowType::Array)]
+ row_type: RowType,
) -> SourceResult<Array> {
let Spanned { v: data, span } = data;
+ let has_headers = row_type == RowType::Dict;
+
let mut builder = ::csv::ReaderBuilder::new();
- builder.has_headers(false);
+ builder.has_headers(has_headers);
builder.delimiter(delimiter.0 as u8);
+
+ // Counting lines from 1 by default.
+ let mut line_offset: usize = 1;
let mut reader = builder.from_reader(data.as_slice());
- let mut array = Array::new();
+ let mut headers: Option<::csv::StringRecord> = None;
+
+ if has_headers {
+ // Counting lines from 2 because we have a header.
+ line_offset += 1;
+ headers = Some(
+ reader
+ .headers()
+ .map_err(|err| format_csv_error(err, 1))
+ .at(span)?
+ .clone(),
+ );
+ }
+ let mut array = Array::new();
for (line, result) in reader.records().enumerate() {
- // Original solution use line from error, but that is incorrect with
- // `has_headers` set to `false`. See issue:
+ // Original solution was to use line from error, but that is
+ // incorrect with `has_headers` set to `false`. See issue:
// https://github.com/BurntSushi/rust-csv/issues/184
- let line = line + 1; // Counting lines from 1
+ let line = line + line_offset;
let row = result.map_err(|err| format_csv_error(err, line)).at(span)?;
- let sub = row.into_iter().map(|field| field.into_value()).collect();
- array.push(Value::Array(sub))
+ let item = if let Some(headers) = &headers {
+ let mut dict = Dict::new();
+ for (field, value) in headers.iter().zip(&row) {
+ dict.insert(field.into(), value.into_value());
+ }
+ dict.into_value()
+ } else {
+ let sub = row.into_iter().map(|field| field.into_value()).collect();
+ Value::Array(sub)
+ };
+ array.push(item);
}
Ok(array)
@@ -103,6 +150,30 @@ cast! {
},
}
+/// The type of parsed rows.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub enum RowType {
+ Array,
+ Dict,
+}
+
+cast! {
+ RowType,
+ self => match self {
+ Self::Array => Type::of::<Array>(),
+ Self::Dict => Type::of::<Dict>(),
+ }.into_value(),
+ ty: Type => {
+ if ty == Type::of::<Array>() {
+ Self::Array
+ } else if ty == Type::of::<Dict>() {
+ Self::Dict
+ } else {
+ bail!("expected `array` or `dictionary`");
+ }
+ },
+}
+
/// Format the user-facing CSV error message.
fn format_csv_error(err: ::csv::Error, line: usize) -> EcoString {
match err.kind() {
diff --git a/tests/typ/compute/data.typ b/tests/typ/compute/data.typ
index c0a6ce50..ae964b3d 100644
--- a/tests/typ/compute/data.typ
+++ b/tests/typ/compute/data.typ
@@ -23,6 +23,14 @@
#table(columns: data.at(0).len(), ..cells)
---
+// Test reading CSV data with dictionary rows enabled.
+#let data = csv("/files/zoo.csv", row-type: dictionary)
+#test(data.len(), 3)
+#test(data.at(0).Name, "Debby")
+#test(data.at(2).Weight, "150kg")
+#test(data.at(1).Species, "Tiger")
+
+---
// Error: 6-16 file not found (searched at typ/compute/nope.csv)
#csv("nope.csv")
@@ -31,6 +39,11 @@
#csv("/files/bad.csv")
---
+// Test error numbering with dictionary rows.
+// Error: 6-22 failed to parse CSV (found 3 instead of 2 fields in line 3)
+#csv("/files/bad.csv", row-type: dictionary)
+
+---
// Test reading JSON data.
#let data = json("/files/zoo.json")
#test(data.len(), 3)