diff options
| author | Laurenz <laurmaedje@gmail.com> | 2025-06-23 15:54:52 +0200 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2025-06-23 15:59:22 +0200 |
| commit | bf8ef2a4a5ffa9c30fce9fc254ffcf982634e4c6 (patch) | |
| tree | ead60969ba9f14d0c06de5a0ba5327ae54b565b1 /crates/typst-html/src/encode.rs | |
| parent | c2e2fd99f69665e2361a1129dd04121a5b2c61a2 (diff) | |
Properly handle raw text elements
Diffstat (limited to 'crates/typst-html/src/encode.rs')
| -rw-r--r-- | crates/typst-html/src/encode.rs | 110 |
1 files changed, 108 insertions, 2 deletions
diff --git a/crates/typst-html/src/encode.rs b/crates/typst-html/src/encode.rs index 758bf0b9..adcb6e03 100644 --- a/crates/typst-html/src/encode.rs +++ b/crates/typst-html/src/encode.rs @@ -2,7 +2,9 @@ use std::fmt::Write; use typst_library::diag::{bail, At, SourceResult, StrResult}; use typst_library::foundations::Repr; -use typst_library::html::{charsets, tag, HtmlDocument, HtmlElement, HtmlNode, HtmlTag}; +use typst_library::html::{ + attr, charsets, tag, HtmlDocument, HtmlElement, HtmlNode, HtmlTag, +}; use typst_library::layout::Frame; use typst_syntax::Span; @@ -95,7 +97,9 @@ fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> { return Ok(()); } - if !element.children.is_empty() { + if tag::is_raw(element.tag) { + write_raw(w, element)?; + } else if !element.children.is_empty() { write_children(w, element)?; } @@ -157,6 +161,108 @@ fn starts_with_newline(element: &HtmlElement) -> bool { false } +/// Encodes the contents of a raw text element. +fn write_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> { + let text = collect_raw_text(element)?; + + if let Some(closing) = find_closing_tag(&text, element.tag) { + bail!( + element.span, + "HTML raw text element cannot contain its own closing tag"; + hint: "the sequence `{closing}` appears in the raw text", + ) + } + + let mode = if w.pretty { RawMode::of(element, &text) } else { RawMode::Keep }; + match mode { + RawMode::Keep => { + w.buf.push_str(&text); + } + RawMode::Wrap => { + w.buf.push('\n'); + w.buf.push_str(&text); + write_indent(w); + } + RawMode::Indent => { + w.level += 1; + for line in text.lines() { + write_indent(w); + w.buf.push_str(line); + } + w.level -= 1; + write_indent(w); + } + } + + Ok(()) +} + +/// Collects the textual contents of a raw text element. +fn collect_raw_text(element: &HtmlElement) -> SourceResult<String> { + let mut output = String::new(); + for c in &element.children { + match c { + HtmlNode::Tag(_) => continue, + HtmlNode::Text(text, _) => output.push_str(text), + HtmlNode::Element(_) | HtmlNode::Frame(_) => { + let span = match c { + HtmlNode::Element(child) => child.span, + _ => element.span, + }; + bail!(span, "HTML raw text element cannot have non-text children") + } + }; + } + Ok(output) +} + +/// Finds a closing sequence for the given tag in the text, if it exists. +/// +/// See HTML spec ยง 13.1.2.6. +fn find_closing_tag(text: &str, tag: HtmlTag) -> Option<&str> { + let s = tag.resolve(); + let len = s.len(); + text.match_indices("</").find_map(|(i, _)| { + let rest = &text[i + 2..]; + let disallowed = rest.len() >= len + && rest[..len].eq_ignore_ascii_case(&s) + && rest[len..].starts_with(['\t', '\n', '\u{c}', '\r', ' ', '>', '/']); + disallowed.then(|| &text[i..i + 2 + len]) + }) +} + +/// How to format the contents of a raw text element. +enum RawMode { + /// Just don't touch it. + Keep, + /// Newline after the opening and newline + indent before the closing tag. + Wrap, + /// Newlines after opening and before closing tag and each line indented. + Indent, +} + +impl RawMode { + fn of(element: &HtmlElement, text: &str) -> Self { + match element.tag { + tag::script + if !element.attrs.0.iter().any(|(attr, value)| { + *attr == attr::r#type && value != "text/javascript" + }) => + { + // Template literals can be multi-line, so indent may change + // the semantics of the JavaScript. + if text.contains('`') { + Self::Wrap + } else { + Self::Indent + } + } + tag::style => Self::Indent, + _ => Self::Keep, + } + } +} + /// Whether we are allowed to add an extra newline at the start and end of the /// element's contents. /// |
