summaryrefslogtreecommitdiff
path: root/crates/typst-html/src
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2024-12-02 14:19:52 +0100
committerLaurenz <laurmaedje@gmail.com>2024-12-04 10:12:07 +0100
commite0122a5b509d151b7e0197d37a120fd965a055d5 (patch)
tree1045c37c53dc2e08fedc8802cdfec1b244e10dd0 /crates/typst-html/src
parent885c7d96eea73f478faea9877f0dbc40c00b0d7b (diff)
Add HTML export format
Diffstat (limited to 'crates/typst-html/src')
-rw-r--r--crates/typst-html/src/encode.rs104
-rw-r--r--crates/typst-html/src/lib.rs315
2 files changed, 419 insertions, 0 deletions
diff --git a/crates/typst-html/src/encode.rs b/crates/typst-html/src/encode.rs
new file mode 100644
index 00000000..d4ff83d6
--- /dev/null
+++ b/crates/typst-html/src/encode.rs
@@ -0,0 +1,104 @@
+use std::fmt::Write;
+
+use typst_library::diag::{bail, At, SourceResult, StrResult};
+use typst_library::foundations::Repr;
+use typst_library::html::{charsets, tag, HtmlDocument, HtmlElement, HtmlNode};
+use typst_library::layout::Frame;
+use typst_syntax::Span;
+
+/// Encodes an HTML document into a string.
+pub fn html(document: &HtmlDocument) -> SourceResult<String> {
+ let mut w = Writer { buf: String::new() };
+ w.buf.push_str("<!DOCTYPE html>");
+ write_element(&mut w, &document.root)?;
+ Ok(w.buf)
+}
+
+struct Writer {
+ buf: String,
+}
+
+/// Encode an HTML node into the writer.
+fn write_node(w: &mut Writer, node: &HtmlNode) -> SourceResult<()> {
+ match node {
+ HtmlNode::Tag(_) => {}
+ HtmlNode::Text(text, span) => write_text(w, text, *span)?,
+ HtmlNode::Element(element) => write_element(w, element)?,
+ HtmlNode::Frame(frame) => write_frame(w, frame),
+ }
+ Ok(())
+}
+
+/// Encode plain text into the writer.
+fn write_text(w: &mut Writer, text: &str, span: Span) -> SourceResult<()> {
+ for c in text.chars() {
+ if charsets::is_valid_in_normal_element_text(c) {
+ w.buf.push(c);
+ } else {
+ write_escape(w, c).at(span)?;
+ }
+ }
+ Ok(())
+}
+
+/// Encode one element into the write.
+fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
+ w.buf.push('<');
+ w.buf.push_str(&element.tag.resolve());
+
+ for (attr, value) in &element.attrs.0 {
+ w.buf.push(' ');
+ w.buf.push_str(&attr.resolve());
+ w.buf.push('=');
+ w.buf.push('"');
+ for c in value.chars() {
+ if charsets::is_valid_in_attribute_value(c) {
+ w.buf.push(c);
+ } else {
+ write_escape(w, c).at(element.span)?;
+ }
+ }
+ w.buf.push('"');
+ }
+
+ w.buf.push('>');
+
+ if tag::is_void(element.tag) {
+ return Ok(());
+ }
+
+ for node in &element.children {
+ write_node(w, node)?;
+ }
+
+ w.buf.push_str("</");
+ w.buf.push_str(&element.tag.resolve());
+ w.buf.push('>');
+
+ Ok(())
+}
+
+/// Escape a character.
+fn write_escape(w: &mut Writer, c: char) -> StrResult<()> {
+ // See <https://html.spec.whatwg.org/multipage/syntax.html#syntax-charref>
+ match c {
+ '&' => w.buf.push_str("&amp;"),
+ '<' => w.buf.push_str("&lt;"),
+ '>' => w.buf.push_str("&gt;"),
+ '"' => w.buf.push_str("&quot;"),
+ '\'' => w.buf.push_str("&apos;"),
+ c if charsets::is_w3c_text_char(c) && c != '\r' => {
+ write!(w.buf, "&#x{:x};", c as u32).unwrap()
+ }
+ _ => bail!("the character {} cannot be encoded in HTML", c.repr()),
+ }
+ Ok(())
+}
+
+/// Encode a laid out frame into the writer.
+fn write_frame(w: &mut Writer, frame: &Frame) {
+ // FIXME: This string replacement is obviously a hack.
+ let svg = typst_svg::svg_frame(frame)
+ .replace("<svg class", "<svg style=\"overflow: visible;\" class");
+ w.buf.push_str(&svg);
+}
diff --git a/crates/typst-html/src/lib.rs b/crates/typst-html/src/lib.rs
new file mode 100644
index 00000000..ffd8e250
--- /dev/null
+++ b/crates/typst-html/src/lib.rs
@@ -0,0 +1,315 @@
+//! Typst's HTML exporter.
+
+mod encode;
+
+pub use self::encode::html;
+
+use comemo::{Track, Tracked, TrackedMut};
+use typst_library::diag::{bail, warning, At, SourceResult};
+use typst_library::engine::{Engine, Route, Sink, Traced};
+use typst_library::foundations::{Content, StyleChain, Target, TargetElem};
+use typst_library::html::{
+ attr, tag, FrameElem, HtmlDocument, HtmlElem, HtmlElement, HtmlNode,
+};
+use typst_library::introspection::{
+ Introspector, Locator, LocatorLink, SplitLocator, TagElem,
+};
+use typst_library::layout::{Abs, Axes, BoxElem, Region, Size};
+use typst_library::model::{DocumentInfo, ParElem};
+use typst_library::routines::{Arenas, Pair, RealizationKind, Routines};
+use typst_library::text::{LinebreakElem, SmartQuoteElem, SpaceElem, TextElem};
+use typst_library::World;
+use typst_syntax::Span;
+
+/// Produce an HTML document from content.
+///
+/// This first performs root-level realization and then turns the resulting
+/// elements into HTML.
+#[typst_macros::time(name = "html document")]
+pub fn html_document(
+ engine: &mut Engine,
+ content: &Content,
+ styles: StyleChain,
+) -> SourceResult<HtmlDocument> {
+ html_document_impl(
+ engine.routines,
+ engine.world,
+ engine.introspector,
+ engine.traced,
+ TrackedMut::reborrow_mut(&mut engine.sink),
+ engine.route.track(),
+ content,
+ styles,
+ )
+}
+
+/// The internal implementation of `html_document`.
+#[comemo::memoize]
+#[allow(clippy::too_many_arguments)]
+fn html_document_impl(
+ routines: &Routines,
+ world: Tracked<dyn World + '_>,
+ introspector: Tracked<Introspector>,
+ traced: Tracked<Traced>,
+ sink: TrackedMut<Sink>,
+ route: Tracked<Route>,
+ content: &Content,
+ styles: StyleChain,
+) -> SourceResult<HtmlDocument> {
+ let mut locator = Locator::root().split();
+ let mut engine = Engine {
+ routines,
+ world,
+ introspector,
+ traced,
+ sink,
+ route: Route::extend(route).unnested(),
+ };
+
+ // Mark the external styles as "outside" so that they are valid at the page
+ // level.
+ let styles = styles.to_map().outside();
+ let styles = StyleChain::new(&styles);
+
+ let arenas = Arenas::default();
+ let mut info = DocumentInfo::default();
+ let children = (engine.routines.realize)(
+ RealizationKind::HtmlDocument(&mut info),
+ &mut engine,
+ &mut locator,
+ &arenas,
+ content,
+ styles,
+ )?;
+
+ let output = handle_list(&mut engine, &mut locator, children.iter().copied())?;
+ let root = root_element(output, &info)?;
+ let introspector = Introspector::html(&root);
+
+ Ok(HtmlDocument { info, root, introspector })
+}
+
+/// Produce HTML nodes from content.
+#[typst_macros::time(name = "html fragment")]
+pub fn html_fragment(
+ engine: &mut Engine,
+ content: &Content,
+ locator: Locator,
+ styles: StyleChain,
+) -> SourceResult<Vec<HtmlNode>> {
+ html_fragment_impl(
+ engine.routines,
+ engine.world,
+ engine.introspector,
+ engine.traced,
+ TrackedMut::reborrow_mut(&mut engine.sink),
+ engine.route.track(),
+ content,
+ locator.track(),
+ styles,
+ )
+}
+
+/// The cached, internal implementation of [`html_fragment`].
+#[comemo::memoize]
+#[allow(clippy::too_many_arguments)]
+fn html_fragment_impl(
+ routines: &Routines,
+ world: Tracked<dyn World + '_>,
+ introspector: Tracked<Introspector>,
+ traced: Tracked<Traced>,
+ sink: TrackedMut<Sink>,
+ route: Tracked<Route>,
+ content: &Content,
+ locator: Tracked<Locator>,
+ styles: StyleChain,
+) -> SourceResult<Vec<HtmlNode>> {
+ let link = LocatorLink::new(locator);
+ let mut locator = Locator::link(&link).split();
+ let mut engine = Engine {
+ routines,
+ world,
+ introspector,
+ traced,
+ sink,
+ route: Route::extend(route),
+ };
+
+ engine.route.check_html_depth().at(content.span())?;
+
+ let arenas = Arenas::default();
+ let children = (engine.routines.realize)(
+ RealizationKind::HtmlFragment,
+ &mut engine,
+ &mut locator,
+ &arenas,
+ content,
+ styles,
+ )?;
+
+ handle_list(&mut engine, &mut locator, children.iter().copied())
+}
+
+/// Convert children into HTML nodes.
+fn handle_list<'a>(
+ engine: &mut Engine,
+ locator: &mut SplitLocator,
+ children: impl IntoIterator<Item = Pair<'a>>,
+) -> SourceResult<Vec<HtmlNode>> {
+ let mut output = Vec::new();
+ for (child, styles) in children {
+ handle(engine, child, locator, styles, &mut output)?;
+ }
+ Ok(output)
+}
+
+/// Convert a child into HTML node(s).
+fn handle(
+ engine: &mut Engine,
+ child: &Content,
+ locator: &mut SplitLocator,
+ styles: StyleChain,
+ output: &mut Vec<HtmlNode>,
+) -> SourceResult<()> {
+ if let Some(elem) = child.to_packed::<TagElem>() {
+ output.push(HtmlNode::Tag(elem.tag.clone()));
+ } else if let Some(elem) = child.to_packed::<HtmlElem>() {
+ let mut children = vec![];
+ if let Some(body) = elem.body(styles) {
+ children = html_fragment(engine, body, locator.next(&elem.span()), styles)?;
+ }
+ if tag::is_void(elem.tag) && !children.is_empty() {
+ bail!(elem.span(), "HTML void elements may not have children");
+ }
+ let element = HtmlElement {
+ tag: elem.tag,
+ attrs: elem.attrs(styles).clone(),
+ children,
+ span: elem.span(),
+ };
+ output.push(element.into());
+ } else if let Some(elem) = child.to_packed::<ParElem>() {
+ let children = handle_list(engine, locator, elem.children.iter(&styles))?;
+ output.push(
+ HtmlElement::new(tag::p)
+ .with_children(children)
+ .spanned(elem.span())
+ .into(),
+ );
+ } else if let Some(elem) = child.to_packed::<BoxElem>() {
+ // FIXME: Very incomplete and hacky, but makes boxes kind fulfill their
+ // purpose for now.
+ if let Some(body) = elem.body(styles) {
+ let children =
+ html_fragment(engine, body, locator.next(&elem.span()), styles)?;
+ output.extend(children);
+ }
+ } else if child.is::<SpaceElem>() {
+ output.push(HtmlNode::text(' ', child.span()));
+ } else if let Some(elem) = child.to_packed::<TextElem>() {
+ output.push(HtmlNode::text(elem.text.clone(), elem.span()));
+ } else if let Some(elem) = child.to_packed::<LinebreakElem>() {
+ output.push(HtmlElement::new(tag::br).spanned(elem.span()).into());
+ } else if let Some(elem) = child.to_packed::<SmartQuoteElem>() {
+ output.push(HtmlNode::text(
+ if elem.double(styles) { '"' } else { '\'' },
+ child.span(),
+ ));
+ } else if let Some(elem) = child.to_packed::<FrameElem>() {
+ let locator = locator.next(&elem.span());
+ let style = TargetElem::set_target(Target::Paged).wrap();
+ let frame = (engine.routines.layout_frame)(
+ engine,
+ &elem.body,
+ locator,
+ styles.chain(&style),
+ Region::new(Size::splat(Abs::inf()), Axes::splat(false)),
+ )?;
+ output.push(HtmlNode::Frame(frame));
+ } else {
+ engine.sink.warn(warning!(
+ child.span(),
+ "{} was ignored during HTML export",
+ child.elem().name()
+ ));
+ }
+ Ok(())
+}
+
+/// Wrap the nodes in `<html>` and `<body>` if they are not yet rooted,
+/// supplying a suitable `<head>`.
+fn root_element(output: Vec<HtmlNode>, info: &DocumentInfo) -> SourceResult<HtmlElement> {
+ let body = match classify_output(output)? {
+ OutputKind::Html(element) => return Ok(element),
+ OutputKind::Body(body) => body,
+ OutputKind::Leafs(leafs) => HtmlElement::new(tag::body).with_children(leafs),
+ };
+ Ok(HtmlElement::new(tag::html)
+ .with_children(vec![head_element(info).into(), body.into()]))
+}
+
+/// Generate a `<head>` element.
+fn head_element(info: &DocumentInfo) -> HtmlElement {
+ let mut children = vec![];
+
+ children.push(HtmlElement::new(tag::meta).with_attr(attr::charset, "utf-8").into());
+
+ children.push(
+ HtmlElement::new(tag::meta)
+ .with_attr(attr::name, "viewport")
+ .with_attr(attr::content, "width=device-width, initial-scale=1")
+ .into(),
+ );
+
+ if let Some(title) = &info.title {
+ children.push(
+ HtmlElement::new(tag::title)
+ .with_children(vec![HtmlNode::Text(title.clone(), Span::detached())])
+ .into(),
+ );
+ }
+
+ if let Some(description) = &info.description {
+ children.push(
+ HtmlElement::new(tag::meta)
+ .with_attr(attr::name, "description")
+ .with_attr(attr::content, description.clone())
+ .into(),
+ );
+ }
+
+ HtmlElement::new(tag::head).with_children(children)
+}
+
+/// Determine which kind of output the user generated.
+fn classify_output(mut output: Vec<HtmlNode>) -> SourceResult<OutputKind> {
+ let len = output.len();
+ for node in &mut output {
+ let HtmlNode::Element(elem) = node else { continue };
+ let tag = elem.tag;
+ let mut take = || std::mem::replace(elem, HtmlElement::new(tag::html));
+ match (tag, len) {
+ (tag::html, 1) => return Ok(OutputKind::Html(take())),
+ (tag::body, 1) => return Ok(OutputKind::Body(take())),
+ (tag::html | tag::body, _) => bail!(
+ elem.span,
+ "`{}` element must be the only element in the document",
+ elem.tag
+ ),
+ _ => {}
+ }
+ }
+ Ok(OutputKind::Leafs(output))
+}
+
+/// What kinds of output the user generated.
+enum OutputKind {
+ /// The user generated their own `<html>` element. We do not need to supply
+ /// one.
+ Html(HtmlElement),
+ /// The user generate their own `<body>` element. We do not need to supply
+ /// one, but need supply the `<html>` element.
+ Body(HtmlElement),
+ /// The user generated leafs which we wrap in a `<body>` and `<html>`.
+ Leafs(Vec<HtmlNode>),
+}