diff options
Diffstat (limited to 'crates/typst-library/src/loading/xml.rs')
| -rw-r--r-- | crates/typst-library/src/loading/xml.rs | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/crates/typst-library/src/loading/xml.rs b/crates/typst-library/src/loading/xml.rs new file mode 100644 index 00000000..3b1a9674 --- /dev/null +++ b/crates/typst-library/src/loading/xml.rs @@ -0,0 +1,123 @@ +use ecow::EcoString; +use roxmltree::ParsingOptions; +use typst_syntax::Spanned; + +use crate::diag::{format_xml_like_error, At, FileError, SourceResult}; +use crate::engine::Engine; +use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value}; +use crate::loading::Readable; +use crate::World; + +/// Reads structured data from an XML file. +/// +/// The XML file is parsed into an array of dictionaries and strings. XML nodes +/// can be elements or strings. Elements are represented as dictionaries with +/// the following keys: +/// +/// - `tag`: The name of the element as a string. +/// - `attrs`: A dictionary of the element's attributes as strings. +/// - `children`: An array of the element's child nodes. +/// +/// The XML file in the example contains a root `news` tag with multiple +/// `article` tags. Each article has a `title`, `author`, and `content` tag. The +/// `content` tag contains one or more paragraphs, which are represented as `p` +/// tags. +/// +/// # Example +/// ```example +/// #let find-child(elem, tag) = { +/// elem.children +/// .find(e => "tag" in e and e.tag == tag) +/// } +/// +/// #let article(elem) = { +/// let title = find-child(elem, "title") +/// let author = find-child(elem, "author") +/// let pars = find-child(elem, "content") +/// +/// heading(title.children.first()) +/// text(10pt, weight: "medium")[ +/// Published by +/// #author.children.first() +/// ] +/// +/// for p in pars.children { +/// if (type(p) == "dictionary") { +/// parbreak() +/// p.children.first() +/// } +/// } +/// } +/// +/// #let data = xml("example.xml") +/// #for elem in data.first().children { +/// if (type(elem) == "dictionary") { +/// article(elem) +/// } +/// } +/// ``` +#[func(scope, title = "XML")] +pub fn xml( + /// The engine. + engine: &mut Engine, + /// Path to an XML file. + /// + /// For more details, see the [Paths section]($syntax/#paths). + path: Spanned<EcoString>, +) -> SourceResult<Value> { + let Spanned { v: path, span } = path; + let id = span.resolve_path(&path).at(span)?; + let data = engine.world.file(id).at(span)?; + xml::decode(Spanned::new(Readable::Bytes(data), span)) +} + +#[scope] +impl xml { + /// Reads structured data from an XML string/bytes. + #[func(title = "Decode XML")] + pub fn decode( + /// XML data. + data: Spanned<Readable>, + ) -> SourceResult<Value> { + let Spanned { v: data, span } = data; + let text = std::str::from_utf8(data.as_slice()) + .map_err(FileError::from) + .at(span)?; + let document = roxmltree::Document::parse_with_options( + text, + ParsingOptions { allow_dtd: true, ..Default::default() }, + ) + .map_err(format_xml_error) + .at(span)?; + Ok(convert_xml(document.root())) + } +} + +/// Convert an XML node to a Typst value. +fn convert_xml(node: roxmltree::Node) -> Value { + if node.is_text() { + return node.text().unwrap_or_default().into_value(); + } + + let children: Array = node.children().map(convert_xml).collect(); + if node.is_root() { + return Value::Array(children); + } + + let tag: Str = node.tag_name().name().into(); + let attrs: Dict = node + .attributes() + .map(|attr| (attr.name().into(), attr.value().into_value())) + .collect(); + + Value::Dict(dict! { + "tag" => tag, + "attrs" => attrs, + "children" => children, + }) +} + +/// Format the user-facing XML error message. +fn format_xml_error(error: roxmltree::Error) -> EcoString { + format_xml_like_error("XML", error) +} |
