diff options
| -rw-r--r-- | crates/typst-html/src/encode.rs | 52 | ||||
| -rw-r--r-- | crates/typst-library/src/html/dom.rs | 94 | ||||
| -rw-r--r-- | tests/ref/html/basic-table.html | 22 | ||||
| -rw-r--r-- | tests/ref/html/block-html.html | 8 | ||||
| -rw-r--r-- | tests/ref/html/box-html.html | 4 | ||||
| -rw-r--r-- | tests/ref/html/enum-start.html | 3 | ||||
| -rw-r--r-- | tests/ref/html/heading-html-basic.html | 28 | ||||
| -rw-r--r-- | tests/ref/html/link-basic.html | 16 | ||||
| -rw-r--r-- | tests/ref/html/quote-attribution-link.html | 8 | ||||
| -rw-r--r-- | tests/ref/html/quote-nesting-html.html | 4 | ||||
| -rw-r--r-- | tests/ref/html/quote-plato.html | 16 |
11 files changed, 135 insertions, 120 deletions
diff --git a/crates/typst-html/src/encode.rs b/crates/typst-html/src/encode.rs index 71422a0f..612f923f 100644 --- a/crates/typst-html/src/encode.rs +++ b/crates/typst-html/src/encode.rs @@ -2,7 +2,7 @@ use std::fmt::Write; use typst_library::diag::{bail, At, SourceResult, StrResult}; use typst_library::foundations::Repr; -use typst_library::html::{charsets, tag, HtmlDocument, HtmlElement, HtmlNode}; +use typst_library::html::{charsets, tag, HtmlDocument, HtmlElement, HtmlNode, HtmlTag}; use typst_library::layout::Frame; use typst_syntax::Span; @@ -20,10 +20,11 @@ pub fn html(document: &HtmlDocument) -> SourceResult<String> { #[derive(Default)] struct Writer { + /// The output buffer. buf: String, - /// current indentation level + /// The current indentation level level: usize, - /// pretty printing enabled? + /// Whether pretty printing is enabled. pretty: bool, } @@ -88,26 +89,32 @@ fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> { let pretty = w.pretty; if !element.children.is_empty() { - w.pretty &= is_pretty(element); + let pretty_inside = allows_pretty_inside(element.tag) + && element.children.iter().any(|node| match node { + HtmlNode::Element(child) => wants_pretty_around(child.tag), + _ => false, + }); + + w.pretty &= pretty_inside; let mut indent = w.pretty; w.level += 1; for c in &element.children { - let pretty_child = match c { + let pretty_around = match c { HtmlNode::Tag(_) => continue, - HtmlNode::Element(element) => is_pretty(element), + HtmlNode::Element(child) => w.pretty && wants_pretty_around(child.tag), HtmlNode::Text(..) | HtmlNode::Frame(_) => false, }; - if core::mem::take(&mut indent) || pretty_child { + if core::mem::take(&mut indent) || pretty_around { write_indent(w); } write_node(w, c)?; - indent = pretty_child; + indent = pretty_around; } w.level -= 1; - write_indent(w) + write_indent(w); } w.pretty = pretty; @@ -118,12 +125,27 @@ fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> { Ok(()) } -/// Whether the element should be pretty-printed. -fn is_pretty(element: &HtmlElement) -> bool { - matches!( - element.tag, - tag::meta | tag::table | tag::thead | tag::tbody | tag::tfoot | tag::tr - ) || tag::is_block_by_default(element.tag) +/// Whether we are allowed to add an extra newline at the start and end of the +/// element's contents. +/// +/// Technically, users can change CSS `display` properties such that the +/// insertion of whitespace may actually impact the visual output. For example, +/// <https://www.w3.org/TR/css-text-3/#example-af2745cd> shows how adding CSS +/// rules to `<p>` can make it sensitive to whitespace. For this reason, we +/// should also respect the `style` tag in the future. +fn allows_pretty_inside(tag: HtmlTag) -> bool { + (tag::is_block_by_default(tag) && tag != tag::pre) + || tag::is_tabular_by_default(tag) + || tag == tag::li +} + +/// Whether newlines should be added before and after the element if the parent +/// allows it. +/// +/// In contrast to `allows_pretty_inside`, which is purely spec-driven, this is +/// more subjective and depends on preference. +fn wants_pretty_around(tag: HtmlTag) -> bool { + allows_pretty_inside(tag) || tag::is_metadata(tag) || tag == tag::pre } /// Escape a character. diff --git a/crates/typst-library/src/html/dom.rs b/crates/typst-library/src/html/dom.rs index 2acd839d..1b725d54 100644 --- a/crates/typst-library/src/html/dom.rs +++ b/crates/typst-library/src/html/dom.rs @@ -475,17 +475,55 @@ pub mod tag { wbr } + /// Whether this is a void tag whose associated element may not have a + /// children. + pub fn is_void(tag: HtmlTag) -> bool { + matches!( + tag, + self::area + | self::base + | self::br + | self::col + | self::embed + | self::hr + | self::img + | self::input + | self::link + | self::meta + | self::param + | self::source + | self::track + | self::wbr + ) + } + + /// Whether this is a tag containing raw text. + pub fn is_raw(tag: HtmlTag) -> bool { + matches!(tag, self::script | self::style) + } + + /// Whether this is a tag containing escapable raw text. + pub fn is_escapable_raw(tag: HtmlTag) -> bool { + matches!(tag, self::textarea | self::title) + } + + /// Whether an element is considered metadata. + pub fn is_metadata(tag: HtmlTag) -> bool { + matches!( + tag, + self::base + | self::link + | self::meta + | self::noscript + | self::script + | self::style + | self::template + | self::title + ) + } + /// Whether nodes with the tag have the CSS property `display: block` by /// default. - /// - /// If this is true, then pretty-printing can insert spaces around such - /// nodes and around the contents of such nodes. - /// - /// However, when users change the properties of such tags via CSS, the - /// insertion of whitespace may actually impact the visual output; for - /// example, <https://www.w3.org/TR/css-text-3/#example-af2745cd> shows how - /// adding CSS rules to `<p>` can make it sensitive to whitespace. In such - /// cases, users should disable pretty-printing. pub fn is_block_by_default(tag: HtmlTag) -> bool { matches!( tag, @@ -572,37 +610,23 @@ pub mod tag { ) } - /// Whether this is a void tag whose associated element may not have a - /// children. - pub fn is_void(tag: HtmlTag) -> bool { + /// Whether nodes with the tag have the CSS property `display: table(-.*)?` + /// by default. + pub fn is_tabular_by_default(tag: HtmlTag) -> bool { matches!( tag, - self::area - | self::base - | self::br + self::table + | self::thead + | self::tbody + | self::tfoot + | self::tr + | self::th + | self::td + | self::caption | self::col - | self::embed - | self::hr - | self::img - | self::input - | self::link - | self::meta - | self::param - | self::source - | self::track - | self::wbr + | self::colgroup ) } - - /// Whether this is a tag containing raw text. - pub fn is_raw(tag: HtmlTag) -> bool { - matches!(tag, self::script | self::style) - } - - /// Whether this is a tag containing escapable raw text. - pub fn is_escapable_raw(tag: HtmlTag) -> bool { - matches!(tag, self::textarea | self::title) - } } /// Predefined constants for HTML attributes. diff --git a/tests/ref/html/basic-table.html b/tests/ref/html/basic-table.html index 6ba1864e..189a5b31 100644 --- a/tests/ref/html/basic-table.html +++ b/tests/ref/html/basic-table.html @@ -8,26 +8,36 @@ <table> <thead> <tr> - <th>The</th><th>first</th><th>and</th> + <th>The</th> + <th>first</th> + <th>and</th> </tr> <tr> - <th>the</th><th>second</th><th>row</th> + <th>the</th> + <th>second</th> + <th>row</th> </tr> </thead> <tbody> <tr> - <td>Foo</td><td rowspan="2">Baz</td><td>Bar</td> + <td>Foo</td> + <td rowspan="2">Baz</td> + <td>Bar</td> </tr> <tr> - <td>1</td><td>2</td> + <td>1</td> + <td>2</td> </tr> <tr> - <td colspan="2">3</td><td>4</td> + <td colspan="2">3</td> + <td>4</td> </tr> </tbody> <tfoot> <tr> - <td>The</td><td>last</td><td>row</td> + <td>The</td> + <td>last</td> + <td>row</td> </tr> </tfoot> </table> diff --git a/tests/ref/html/block-html.html b/tests/ref/html/block-html.html index 98d971b8..d1716c6d 100644 --- a/tests/ref/html/block-html.html +++ b/tests/ref/html/block-html.html @@ -5,11 +5,7 @@ <meta name="viewport" content="width=device-width, initial-scale=1"> </head> <body> - <p> - Paragraph - </p> - <div> - Div - </div> + <p>Paragraph</p> + <div>Div</div> </body> </html> diff --git a/tests/ref/html/box-html.html b/tests/ref/html/box-html.html index 5c970a6b..b2a26533 100644 --- a/tests/ref/html/box-html.html +++ b/tests/ref/html/box-html.html @@ -5,8 +5,6 @@ <meta name="viewport" content="width=device-width, initial-scale=1"> </head> <body> - <p> - Text <span style="display: inline-block;">Span</span>. - </p> + <p>Text <span style="display: inline-block;">Span</span>.</p> </body> </html> diff --git a/tests/ref/html/enum-start.html b/tests/ref/html/enum-start.html index 8a4ff37f..fc9b3c06 100644 --- a/tests/ref/html/enum-start.html +++ b/tests/ref/html/enum-start.html @@ -6,7 +6,8 @@ </head> <body> <ol start="3"> - <li>Skipping</li><li>Ahead</li> + <li>Skipping</li> + <li>Ahead</li> </ol> </body> </html> diff --git a/tests/ref/html/heading-html-basic.html b/tests/ref/html/heading-html-basic.html index 56b1e32b..54a22faf 100644 --- a/tests/ref/html/heading-html-basic.html +++ b/tests/ref/html/heading-html-basic.html @@ -5,26 +5,12 @@ <meta name="viewport" content="width=device-width, initial-scale=1"> </head> <body> - <h2> - Level 1 - </h2> - <h3> - Level 2 - </h3> - <h4> - Level 3 - </h4> - <h5> - Level 4 - </h5> - <h6> - Level 5 - </h6> - <div role="heading" aria-level="7"> - Level 6 - </div> - <div role="heading" aria-level="8"> - Level 7 - </div> + <h2>Level 1</h2> + <h3>Level 2</h3> + <h4>Level 3</h4> + <h5>Level 4</h5> + <h6>Level 5</h6> + <div role="heading" aria-level="7">Level 6</div> + <div role="heading" aria-level="8">Level 7</div> </body> </html> diff --git a/tests/ref/html/link-basic.html b/tests/ref/html/link-basic.html index 5d998667..89cb54db 100644 --- a/tests/ref/html/link-basic.html +++ b/tests/ref/html/link-basic.html @@ -5,17 +5,9 @@ <meta name="viewport" content="width=device-width, initial-scale=1"> </head> <body> - <p> - <a href="https://example.com/">https://example.com/</a> - </p> - <p> - <a href="https://typst.org/">Some text text text</a> - </p> - <p> - This link appears <a href="https://google.com/">in the middle of</a> a paragraph. - </p> - <p> - Contact <a href="mailto:hi@typst.app">hi@typst.app</a> or call <a href="tel:123">123</a> for more information. - </p> + <p><a href="https://example.com/">https://example.com/</a></p> + <p><a href="https://typst.org/">Some text text text</a></p> + <p>This link appears <a href="https://google.com/">in the middle of</a> a paragraph.</p> + <p>Contact <a href="mailto:hi@typst.app">hi@typst.app</a> or call <a href="tel:123">123</a> for more information.</p> </body> </html> diff --git a/tests/ref/html/quote-attribution-link.html b/tests/ref/html/quote-attribution-link.html index 4da8b47f..753807db 100644 --- a/tests/ref/html/quote-attribution-link.html +++ b/tests/ref/html/quote-attribution-link.html @@ -5,11 +5,7 @@ <meta name="viewport" content="width=device-width, initial-scale=1"> </head> <body> - <blockquote cite="https://typst.app/home"> - Compose papers faster - </blockquote> - <p> - — <a href="https://typst.app/home">typst.com</a> - </p> + <blockquote cite="https://typst.app/home"> Compose papers faster </blockquote> + <p>— <a href="https://typst.app/home">typst.com</a></p> </body> </html> diff --git a/tests/ref/html/quote-nesting-html.html b/tests/ref/html/quote-nesting-html.html index c652bd97..6b05a94a 100644 --- a/tests/ref/html/quote-nesting-html.html +++ b/tests/ref/html/quote-nesting-html.html @@ -5,8 +5,6 @@ <meta name="viewport" content="width=device-width, initial-scale=1"> </head> <body> - <p> - When you said that “he surely meant that ‘she intended to say “I'm sorry”’”, I was quite confused. - </p> + <p>When you said that “he surely meant that ‘she intended to say “I'm sorry”’”, I was quite confused.</p> </body> </html> diff --git a/tests/ref/html/quote-plato.html b/tests/ref/html/quote-plato.html index fc052d10..f516adc2 100644 --- a/tests/ref/html/quote-plato.html +++ b/tests/ref/html/quote-plato.html @@ -5,17 +5,9 @@ <meta name="viewport" content="width=device-width, initial-scale=1"> </head> <body> - <blockquote> - … ἔοικα γοῦν τούτου γε σμικρῷ τινι αὐτῷ τούτῳ σοφώτερος εἶναι, ὅτι ἃ μὴ οἶδα οὐδὲ οἴομαι εἰδέναι. - </blockquote> - <p> - — Plato - </p> - <blockquote> - … I seem, then, in just this little thing to be wiser than this man at any rate, that what I do not know I do not think I know either. - </blockquote> - <p> - — from the Henry Cary literal translation of 1897 - </p> + <blockquote> … ἔοικα γοῦν τούτου γε σμικρῷ τινι αὐτῷ τούτῳ σοφώτερος εἶναι, ὅτι ἃ μὴ οἶδα οὐδὲ οἴομαι εἰδέναι. </blockquote> + <p>— Plato</p> + <blockquote> … I seem, then, in just this little thing to be wiser than this man at any rate, that what I do not know I do not think I know either. </blockquote> + <p>— from the Henry Cary literal translation of 1897</p> </body> </html> |
