summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--crates/typst-html/src/encode.rs52
-rw-r--r--crates/typst-library/src/html/dom.rs94
-rw-r--r--tests/ref/html/basic-table.html22
-rw-r--r--tests/ref/html/block-html.html8
-rw-r--r--tests/ref/html/box-html.html4
-rw-r--r--tests/ref/html/enum-start.html3
-rw-r--r--tests/ref/html/heading-html-basic.html28
-rw-r--r--tests/ref/html/link-basic.html16
-rw-r--r--tests/ref/html/quote-attribution-link.html8
-rw-r--r--tests/ref/html/quote-nesting-html.html4
-rw-r--r--tests/ref/html/quote-plato.html16
11 files changed, 135 insertions, 120 deletions
diff --git a/crates/typst-html/src/encode.rs b/crates/typst-html/src/encode.rs
index 71422a0f..612f923f 100644
--- a/crates/typst-html/src/encode.rs
+++ b/crates/typst-html/src/encode.rs
@@ -2,7 +2,7 @@ use std::fmt::Write;
use typst_library::diag::{bail, At, SourceResult, StrResult};
use typst_library::foundations::Repr;
-use typst_library::html::{charsets, tag, HtmlDocument, HtmlElement, HtmlNode};
+use typst_library::html::{charsets, tag, HtmlDocument, HtmlElement, HtmlNode, HtmlTag};
use typst_library::layout::Frame;
use typst_syntax::Span;
@@ -20,10 +20,11 @@ pub fn html(document: &HtmlDocument) -> SourceResult<String> {
#[derive(Default)]
struct Writer {
+ /// The output buffer.
buf: String,
- /// current indentation level
+ /// The current indentation level
level: usize,
- /// pretty printing enabled?
+ /// Whether pretty printing is enabled.
pretty: bool,
}
@@ -88,26 +89,32 @@ fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
let pretty = w.pretty;
if !element.children.is_empty() {
- w.pretty &= is_pretty(element);
+ let pretty_inside = allows_pretty_inside(element.tag)
+ && element.children.iter().any(|node| match node {
+ HtmlNode::Element(child) => wants_pretty_around(child.tag),
+ _ => false,
+ });
+
+ w.pretty &= pretty_inside;
let mut indent = w.pretty;
w.level += 1;
for c in &element.children {
- let pretty_child = match c {
+ let pretty_around = match c {
HtmlNode::Tag(_) => continue,
- HtmlNode::Element(element) => is_pretty(element),
+ HtmlNode::Element(child) => w.pretty && wants_pretty_around(child.tag),
HtmlNode::Text(..) | HtmlNode::Frame(_) => false,
};
- if core::mem::take(&mut indent) || pretty_child {
+ if core::mem::take(&mut indent) || pretty_around {
write_indent(w);
}
write_node(w, c)?;
- indent = pretty_child;
+ indent = pretty_around;
}
w.level -= 1;
- write_indent(w)
+ write_indent(w);
}
w.pretty = pretty;
@@ -118,12 +125,27 @@ fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
Ok(())
}
-/// Whether the element should be pretty-printed.
-fn is_pretty(element: &HtmlElement) -> bool {
- matches!(
- element.tag,
- tag::meta | tag::table | tag::thead | tag::tbody | tag::tfoot | tag::tr
- ) || tag::is_block_by_default(element.tag)
+/// Whether we are allowed to add an extra newline at the start and end of the
+/// element's contents.
+///
+/// Technically, users can change CSS `display` properties such that the
+/// insertion of whitespace may actually impact the visual output. For example,
+/// <https://www.w3.org/TR/css-text-3/#example-af2745cd> shows how adding CSS
+/// rules to `<p>` can make it sensitive to whitespace. For this reason, we
+/// should also respect the `style` tag in the future.
+fn allows_pretty_inside(tag: HtmlTag) -> bool {
+ (tag::is_block_by_default(tag) && tag != tag::pre)
+ || tag::is_tabular_by_default(tag)
+ || tag == tag::li
+}
+
+/// Whether newlines should be added before and after the element if the parent
+/// allows it.
+///
+/// In contrast to `allows_pretty_inside`, which is purely spec-driven, this is
+/// more subjective and depends on preference.
+fn wants_pretty_around(tag: HtmlTag) -> bool {
+ allows_pretty_inside(tag) || tag::is_metadata(tag) || tag == tag::pre
}
/// Escape a character.
diff --git a/crates/typst-library/src/html/dom.rs b/crates/typst-library/src/html/dom.rs
index 2acd839d..1b725d54 100644
--- a/crates/typst-library/src/html/dom.rs
+++ b/crates/typst-library/src/html/dom.rs
@@ -475,17 +475,55 @@ pub mod tag {
wbr
}
+ /// Whether this is a void tag whose associated element may not have a
+ /// children.
+ pub fn is_void(tag: HtmlTag) -> bool {
+ matches!(
+ tag,
+ self::area
+ | self::base
+ | self::br
+ | self::col
+ | self::embed
+ | self::hr
+ | self::img
+ | self::input
+ | self::link
+ | self::meta
+ | self::param
+ | self::source
+ | self::track
+ | self::wbr
+ )
+ }
+
+ /// Whether this is a tag containing raw text.
+ pub fn is_raw(tag: HtmlTag) -> bool {
+ matches!(tag, self::script | self::style)
+ }
+
+ /// Whether this is a tag containing escapable raw text.
+ pub fn is_escapable_raw(tag: HtmlTag) -> bool {
+ matches!(tag, self::textarea | self::title)
+ }
+
+ /// Whether an element is considered metadata.
+ pub fn is_metadata(tag: HtmlTag) -> bool {
+ matches!(
+ tag,
+ self::base
+ | self::link
+ | self::meta
+ | self::noscript
+ | self::script
+ | self::style
+ | self::template
+ | self::title
+ )
+ }
+
/// Whether nodes with the tag have the CSS property `display: block` by
/// default.
- ///
- /// If this is true, then pretty-printing can insert spaces around such
- /// nodes and around the contents of such nodes.
- ///
- /// However, when users change the properties of such tags via CSS, the
- /// insertion of whitespace may actually impact the visual output; for
- /// example, <https://www.w3.org/TR/css-text-3/#example-af2745cd> shows how
- /// adding CSS rules to `<p>` can make it sensitive to whitespace. In such
- /// cases, users should disable pretty-printing.
pub fn is_block_by_default(tag: HtmlTag) -> bool {
matches!(
tag,
@@ -572,37 +610,23 @@ pub mod tag {
)
}
- /// Whether this is a void tag whose associated element may not have a
- /// children.
- pub fn is_void(tag: HtmlTag) -> bool {
+ /// Whether nodes with the tag have the CSS property `display: table(-.*)?`
+ /// by default.
+ pub fn is_tabular_by_default(tag: HtmlTag) -> bool {
matches!(
tag,
- self::area
- | self::base
- | self::br
+ self::table
+ | self::thead
+ | self::tbody
+ | self::tfoot
+ | self::tr
+ | self::th
+ | self::td
+ | self::caption
| self::col
- | self::embed
- | self::hr
- | self::img
- | self::input
- | self::link
- | self::meta
- | self::param
- | self::source
- | self::track
- | self::wbr
+ | self::colgroup
)
}
-
- /// Whether this is a tag containing raw text.
- pub fn is_raw(tag: HtmlTag) -> bool {
- matches!(tag, self::script | self::style)
- }
-
- /// Whether this is a tag containing escapable raw text.
- pub fn is_escapable_raw(tag: HtmlTag) -> bool {
- matches!(tag, self::textarea | self::title)
- }
}
/// Predefined constants for HTML attributes.
diff --git a/tests/ref/html/basic-table.html b/tests/ref/html/basic-table.html
index 6ba1864e..189a5b31 100644
--- a/tests/ref/html/basic-table.html
+++ b/tests/ref/html/basic-table.html
@@ -8,26 +8,36 @@
<table>
<thead>
<tr>
- <th>The</th><th>first</th><th>and</th>
+ <th>The</th>
+ <th>first</th>
+ <th>and</th>
</tr>
<tr>
- <th>the</th><th>second</th><th>row</th>
+ <th>the</th>
+ <th>second</th>
+ <th>row</th>
</tr>
</thead>
<tbody>
<tr>
- <td>Foo</td><td rowspan="2">Baz</td><td>Bar</td>
+ <td>Foo</td>
+ <td rowspan="2">Baz</td>
+ <td>Bar</td>
</tr>
<tr>
- <td>1</td><td>2</td>
+ <td>1</td>
+ <td>2</td>
</tr>
<tr>
- <td colspan="2">3</td><td>4</td>
+ <td colspan="2">3</td>
+ <td>4</td>
</tr>
</tbody>
<tfoot>
<tr>
- <td>The</td><td>last</td><td>row</td>
+ <td>The</td>
+ <td>last</td>
+ <td>row</td>
</tr>
</tfoot>
</table>
diff --git a/tests/ref/html/block-html.html b/tests/ref/html/block-html.html
index 98d971b8..d1716c6d 100644
--- a/tests/ref/html/block-html.html
+++ b/tests/ref/html/block-html.html
@@ -5,11 +5,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
- <p>
- Paragraph
- </p>
- <div>
- Div
- </div>
+ <p>Paragraph</p>
+ <div>Div</div>
</body>
</html>
diff --git a/tests/ref/html/box-html.html b/tests/ref/html/box-html.html
index 5c970a6b..b2a26533 100644
--- a/tests/ref/html/box-html.html
+++ b/tests/ref/html/box-html.html
@@ -5,8 +5,6 @@
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
- <p>
- Text <span style="display: inline-block;">Span</span>.
- </p>
+ <p>Text <span style="display: inline-block;">Span</span>.</p>
</body>
</html>
diff --git a/tests/ref/html/enum-start.html b/tests/ref/html/enum-start.html
index 8a4ff37f..fc9b3c06 100644
--- a/tests/ref/html/enum-start.html
+++ b/tests/ref/html/enum-start.html
@@ -6,7 +6,8 @@
</head>
<body>
<ol start="3">
- <li>Skipping</li><li>Ahead</li>
+ <li>Skipping</li>
+ <li>Ahead</li>
</ol>
</body>
</html>
diff --git a/tests/ref/html/heading-html-basic.html b/tests/ref/html/heading-html-basic.html
index 56b1e32b..54a22faf 100644
--- a/tests/ref/html/heading-html-basic.html
+++ b/tests/ref/html/heading-html-basic.html
@@ -5,26 +5,12 @@
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
- <h2>
- Level 1
- </h2>
- <h3>
- Level 2
- </h3>
- <h4>
- Level 3
- </h4>
- <h5>
- Level 4
- </h5>
- <h6>
- Level 5
- </h6>
- <div role="heading" aria-level="7">
- Level 6
- </div>
- <div role="heading" aria-level="8">
- Level 7
- </div>
+ <h2>Level 1</h2>
+ <h3>Level 2</h3>
+ <h4>Level 3</h4>
+ <h5>Level 4</h5>
+ <h6>Level 5</h6>
+ <div role="heading" aria-level="7">Level 6</div>
+ <div role="heading" aria-level="8">Level 7</div>
</body>
</html>
diff --git a/tests/ref/html/link-basic.html b/tests/ref/html/link-basic.html
index 5d998667..89cb54db 100644
--- a/tests/ref/html/link-basic.html
+++ b/tests/ref/html/link-basic.html
@@ -5,17 +5,9 @@
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
- <p>
- <a href="https://example.com/">https://example.com/</a>
- </p>
- <p>
- <a href="https://typst.org/">Some text text text</a>
- </p>
- <p>
- This link appears <a href="https://google.com/">in the middle of</a> a paragraph.
- </p>
- <p>
- Contact <a href="mailto:hi@typst.app">hi@typst.app</a> or call <a href="tel:123">123</a> for more information.
- </p>
+ <p><a href="https://example.com/">https://example.com/</a></p>
+ <p><a href="https://typst.org/">Some text text text</a></p>
+ <p>This link appears <a href="https://google.com/">in the middle of</a> a paragraph.</p>
+ <p>Contact <a href="mailto:hi@typst.app">hi@typst.app</a> or call <a href="tel:123">123</a> for more information.</p>
</body>
</html>
diff --git a/tests/ref/html/quote-attribution-link.html b/tests/ref/html/quote-attribution-link.html
index 4da8b47f..753807db 100644
--- a/tests/ref/html/quote-attribution-link.html
+++ b/tests/ref/html/quote-attribution-link.html
@@ -5,11 +5,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
- <blockquote cite="https://typst.app/home">
- Compose papers faster
- </blockquote>
- <p>
- — <a href="https://typst.app/home">typst.com</a>
- </p>
+ <blockquote cite="https://typst.app/home"> Compose papers faster </blockquote>
+ <p>— <a href="https://typst.app/home">typst.com</a></p>
</body>
</html>
diff --git a/tests/ref/html/quote-nesting-html.html b/tests/ref/html/quote-nesting-html.html
index c652bd97..6b05a94a 100644
--- a/tests/ref/html/quote-nesting-html.html
+++ b/tests/ref/html/quote-nesting-html.html
@@ -5,8 +5,6 @@
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
- <p>
- When you said that “he surely meant that ‘she intended to say “I'm sorry”’”, I was quite confused.
- </p>
+ <p>When you said that “he surely meant that ‘she intended to say “I'm sorry”’”, I was quite confused.</p>
</body>
</html>
diff --git a/tests/ref/html/quote-plato.html b/tests/ref/html/quote-plato.html
index fc052d10..f516adc2 100644
--- a/tests/ref/html/quote-plato.html
+++ b/tests/ref/html/quote-plato.html
@@ -5,17 +5,9 @@
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
- <blockquote>
- … ἔοικα γοῦν τούτου γε σμικρῷ τινι αὐτῷ τούτῳ σοφώτερος εἶναι, ὅτι ἃ μὴ οἶδα οὐδὲ οἴομαι εἰδέναι.
- </blockquote>
- <p>
- — Plato
- </p>
- <blockquote>
- … I seem, then, in just this little thing to be wiser than this man at any rate, that what I do not know I do not think I know either.
- </blockquote>
- <p>
- — from the Henry Cary literal translation of 1897
- </p>
+ <blockquote> … ἔοικα γοῦν τούτου γε σμικρῷ τινι αὐτῷ τούτῳ σοφώτερος εἶναι, ὅτι ἃ μὴ οἶδα οὐδὲ οἴομαι εἰδέναι. </blockquote>
+ <p>— Plato</p>
+ <blockquote> … I seem, then, in just this little thing to be wiser than this man at any rate, that what I do not know I do not think I know either. </blockquote>
+ <p>— from the Henry Cary literal translation of 1897</p>
</body>
</html>