Tidy up raw blocks 🧹

- Better trimming (only trim at the end if necessary) - Fixed block-level layouting - Improved pretty printing - Flip inline variable to block - Flip inline variable to display for math formulas
author: Laurenz <laurmaedje@gmail.com> 2021-02-03 21:30:36 +0100
committer: Laurenz <laurmaedje@gmail.com> 2021-02-03 21:34:49 +0100
commit: d86a5e8a1f469dd79abf3137dba77a71fae2a774 (patch)
tree: fc7ab35d999322b9d124e41ab80948df23965d26 /src/syntax
parent: 6fcef9973be4253e5b377251dd9d1921f9738fc1 (diff)
5 files changed, 162 insertions, 99 deletions
diff --git a/src/syntax/expr.rs b/src/syntax/expr.rs
index ebe82199..a681aa32 100644
--- a/src/syntax/expr.rs
+++ b/src/syntax/expr.rs
@@ -62,24 +62,28 @@ impl Pretty for Expr {
     fn pretty(&self, p: &mut Printer) {
         match self {
             Self::None => p.push_str("none"),
-            Self::Ident(v) => p.push_str(&v),
-            Self::Bool(v) => write!(p, "{}", v).unwrap(),
-            Self::Int(v) => p.push_str(itoa::Buffer::new().format(*v)),
-            Self::Float(v) => p.push_str(ryu::Buffer::new().format(*v)),
-            Self::Length(v, u) => write!(p, "{}{}", v, u).unwrap(),
-            Self::Angle(v, u) => write!(p, "{}{}", v, u).unwrap(),
-            Self::Percent(v) => write!(p, "{}%", v).unwrap(),
-            Self::Color(v) => write!(p, "{}", v).unwrap(),
-            // TODO: Debug escapes a bit more than we want (e.g. apostrophes).
-            // We probably need to do the escaping ourselves.
-            Self::Str(v) => write!(p, "{:?}", &v).unwrap(),
+            Self::Ident(v) => v.pretty(p),
+            Self::Bool(v) => v.pretty(p),
+            Self::Int(v) => v.pretty(p),
+            Self::Float(v) => v.pretty(p),
+            Self::Length(v, u) => {
+                write!(p, "{}{}", ryu::Buffer::new().format(*v), u).unwrap();
+            }
+            Self::Angle(v, u) => {
+                write!(p, "{}{}", ryu::Buffer::new().format(*v), u).unwrap();
+            }
+            Self::Percent(v) => {
+                write!(p, "{}%", ryu::Buffer::new().format(*v)).unwrap();
+            }
+            Self::Color(v) => v.pretty(p),
+            Self::Str(v) => v.pretty(p),
             Self::Array(v) => v.pretty(p),
             Self::Dict(v) => v.pretty(p),
             Self::Template(v) => pretty_template(v, p),
             Self::Group(v) => {
-                p.push_str("(");
+                p.push('(');
                 v.v.pretty(p);
-                p.push_str(")");
+                p.push(')');
             }
             Self::Block(v) => v.pretty(p),
             Self::Unary(v) => v.pretty(p),
@@ -98,12 +102,12 @@ pub type ExprArray = SpanVec<Expr>;
 
 impl Pretty for ExprArray {
     fn pretty(&self, p: &mut Printer) {
-        p.push_str("(");
+        p.push('(');
         p.join(self, ", ", |item, p| item.v.pretty(p));
         if self.len() == 1 {
-            p.push_str(",");
+            p.push(',');
         }
-        p.push_str(")");
+        p.push(')');
     }
 }
 
@@ -112,13 +116,13 @@ pub type ExprDict = Vec<Named>;
 
 impl Pretty for ExprDict {
     fn pretty(&self, p: &mut Printer) {
-        p.push_str("(");
+        p.push('(');
         if self.is_empty() {
-            p.push_str(":");
+            p.push(':');
         } else {
             p.join(self, ", ", |named, p| named.pretty(p));
         }
-        p.push_str(")");
+        p.push(')');
     }
 }
 
@@ -133,7 +137,7 @@ pub struct Named {
 
 impl Pretty for Named {
     fn pretty(&self, p: &mut Printer) {
-        p.push_str(&self.name.v);
+        self.name.v.pretty(p);
         p.push_str(": ");
         self.expr.v.pretty(p);
     }
@@ -147,9 +151,9 @@ pub fn pretty_template(template: &ExprTemplate, p: &mut Printer) {
     if let [Spanned { v: Node::Expr(Expr::Call(call)), .. }] = template.as_slice() {
         pretty_func_template(call, p, false)
     } else {
-        p.push_str("[");
+        p.push('[');
         template.pretty(p);
-        p.push_str("]");
+        p.push(']');
     }
 }
 
@@ -167,15 +171,15 @@ pub struct ExprBlock {
 
 impl Pretty for ExprBlock {
     fn pretty(&self, p: &mut Printer) {
-        p.push_str("{");
+        p.push('{');
         if self.exprs.len() > 1 {
-            p.push_str(" ");
+            p.push(' ');
         }
         p.join(&self.exprs, "; ", |expr, p| expr.v.pretty(p));
         if self.exprs.len() > 1 {
-            p.push_str(" ");
+            p.push(' ');
         }
-        p.push_str("}");
+        p.push('}');
     }
 }
 
@@ -192,7 +196,7 @@ impl Pretty for ExprUnary {
     fn pretty(&self, p: &mut Printer) {
         self.op.v.pretty(p);
         if self.op.v == UnOp::Not {
-            p.push_str(" ");
+            p.push(' ');
         }
         self.expr.v.pretty(p);
     }
@@ -258,9 +262,9 @@ pub struct ExprBinary {
 impl Pretty for ExprBinary {
     fn pretty(&self, p: &mut Printer) {
         self.lhs.v.pretty(p);
-        p.push_str(" ");
+        p.push(' ');
         self.op.v.pretty(p);
-        p.push_str(" ");
+        p.push(' ');
         self.rhs.v.pretty(p);
     }
 }
@@ -419,9 +423,9 @@ pub struct ExprCall {
 impl Pretty for ExprCall {
     fn pretty(&self, p: &mut Printer) {
         self.callee.v.pretty(p);
-        p.push_str("(");
+        p.push('(');
         self.args.v.pretty(p);
-        p.push_str(")");
+        p.push(')');
     }
 }
 
@@ -444,7 +448,7 @@ pub fn pretty_func_template(call: &ExprCall, p: &mut Printer, chained: bool) {
     {
         // Previous arguments.
         if !head.is_empty() {
-            p.push_str(" ");
+            p.push(' ');
             p.join(head, ", ", |item, p| item.pretty(p));
         }
 
@@ -458,12 +462,12 @@ pub fn pretty_func_template(call: &ExprCall, p: &mut Printer, chained: bool) {
             template.pretty(p);
         }
     } else if !call.args.v.is_empty() {
-        p.push_str(" ");
+        p.push(' ');
         call.args.v.pretty(p);
     }
 
     // Either end of header or end of body.
-    p.push_str("]");
+    p.push(']');
 }
 
 /// The arguments to a function: `12, draw: false`.
@@ -508,7 +512,7 @@ pub struct ExprLet {
 impl Pretty for ExprLet {
     fn pretty(&self, p: &mut Printer) {
         p.push_str("#let ");
-        p.push_str(&self.pat.v);
+        self.pat.v.pretty(p);
         if let Some(init) = &self.init {
             p.push_str(" = ");
             init.v.pretty(p);
@@ -531,7 +535,7 @@ impl Pretty for ExprIf {
     fn pretty(&self, p: &mut Printer) {
         p.push_str("#if ");
         self.condition.v.pretty(p);
-        p.push_str(" ");
+        p.push(' ');
         self.if_body.v.pretty(p);
         if let Some(expr) = &self.else_body {
             p.push_str(" #else ");
@@ -557,7 +561,7 @@ impl Pretty for ExprFor {
         self.pat.v.pretty(p);
         p.push_str(" #in ");
         self.iter.v.pretty(p);
-        p.push_str(" ");
+        p.push(' ');
         self.body.v.pretty(p);
     }
 }
@@ -574,11 +578,11 @@ pub enum ForPattern {
 impl Pretty for ForPattern {
     fn pretty(&self, p: &mut Printer) {
         match self {
-            Self::Value(v) => p.push_str(&v),
+            Self::Value(v) => v.pretty(p),
             Self::KeyValue(k, v) => {
-                p.push_str(&k);
+                k.pretty(p);
                 p.push_str(", ");
-                p.push_str(&v);
+                v.pretty(p);
             }
         }
     }
diff --git a/src/syntax/ident.rs b/src/syntax/ident.rs
index 3cb47c47..c4cc19bc 100644
--- a/src/syntax/ident.rs
+++ b/src/syntax/ident.rs
@@ -2,6 +2,8 @@ use std::ops::Deref;
 
 use unicode_xid::UnicodeXID;
 
+use crate::pretty::{Pretty, Printer};
+
 /// An Unicode identifier with a few extra permissible characters.
 ///
 /// In addition to what is specified in the [Unicode Standard][uax31], we allow:
@@ -28,6 +30,12 @@ impl Ident {
     }
 }
 
+impl Pretty for Ident {
+    fn pretty(&self, p: &mut Printer) {
+        p.push_str(self.as_str());
+    }
+}
+
 impl AsRef<str> for Ident {
     fn as_ref(&self) -> &str {
         self
diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs
index 409e8cbf..2a8c4dbb 100644
--- a/src/syntax/mod.rs
+++ b/src/syntax/mod.rs
@@ -64,9 +64,18 @@ mod tests {
 
         // Raw.
         roundtrip("``");
-        roundtrip("`lang 1`");
-        test("``` hi```", "`hi`");
-        test("``` ` ```", "```");
+        roundtrip("`nolang 1`");
+        roundtrip("```lang 1```");
+        roundtrip("```lang 1 ```");
+        roundtrip("```hi  line  ```");
+        roundtrip("```py\ndef\n```");
+        roundtrip("```\n line \n```");
+        roundtrip("```\n`\n```");
+        roundtrip("``` ` ```");
+        test("```1 ```", "``");
+        test("``` 1```", "`1`");
+        test("``` 1 ```", "`1 `");
+        test("```` ` ````", "``` ` ```");
     }
 
     #[test]
@@ -77,12 +86,12 @@ mod tests {
         roundtrip("{true}");
         roundtrip("{10}");
         roundtrip("{3.14}");
-        roundtrip("{10pt}");
+        roundtrip("{10.0pt}");
         roundtrip("{14.1deg}");
-        roundtrip("{20%}");
+        roundtrip("{20.0%}");
         roundtrip("{#abcdef}");
         roundtrip(r#"{"hi"}"#);
-        test(r#"{"let's go"}"#, r#"{"let\'s go"}"#);
+        test(r#"{"let's \" go"}"#, r#"{"let's \" go"}"#);
 
         // Arrays.
         roundtrip("{()}");
diff --git a/src/syntax/node.rs b/src/syntax/node.rs
index f7625036..7b6aa728 100644
--- a/src/syntax/node.rs
+++ b/src/syntax/node.rs
@@ -26,9 +26,9 @@ pub enum Node {
 impl Pretty for Node {
     fn pretty(&self, p: &mut Printer) {
         match self {
-            Self::Strong => p.push_str("*"),
-            Self::Emph => p.push_str("_"),
-            Self::Space => p.push_str(" "),
+            Self::Strong => p.push('*'),
+            Self::Emph => p.push('_'),
+            Self::Space => p.push(' '),
             Self::Linebreak => p.push_str(r"\"),
             Self::Parbreak => p.push_str("\n\n"),
             Self::Text(text) => p.push_str(&text),
@@ -46,10 +46,10 @@ impl Pretty for Node {
     }
 }
 
-/// A section heading: `# Introduction`.
+/// A section heading: `= Introduction`.
 #[derive(Debug, Clone, PartialEq)]
 pub struct NodeHeading {
-    /// The section depth (numer of hashtags minus 1, capped at 5).
+    /// The section depth (numer of equals signs minus 1, capped at 5).
     pub level: Spanned<u8>,
     /// The contents of the heading.
     pub contents: Tree,
@@ -58,7 +58,7 @@ pub struct NodeHeading {
 impl Pretty for NodeHeading {
     fn pretty(&self, p: &mut Printer) {
         for _ in 0 ..= self.level.v {
-            p.push_str("=");
+            p.push('=');
         }
         self.contents.pretty(p);
     }
@@ -67,8 +67,7 @@ impl Pretty for NodeHeading {
 /// A raw block with optional syntax highlighting: `` `raw` ``.
 ///
 /// Raw blocks start with 1 or 3+ backticks and end with the same number of
-/// backticks. If you want to include a sequence of backticks in a raw block,
-/// simply surround the block with more backticks.
+/// backticks.
 ///
 /// When using at least three backticks, an optional language tag may follow
 /// directly after the backticks. This tag defines which language to
@@ -86,7 +85,7 @@ impl Pretty for NodeHeading {
 ///   ````typst
 ///   ```rust println!("hello!")```;
 ///   ````
-///   - Blocks can span multiple lines.
+/// - Blocks can span multiple lines.
 ///   ````typst
 ///   ```rust
 ///   loop {
@@ -94,34 +93,40 @@ impl Pretty for NodeHeading {
 ///   }
 ///   ```
 ///   ````
-///   - Start with a space to omit the language tag (the space will be trimmed
-///     from the output) and use more backticks to allow backticks in the raw
-///     text.
+/// - Start with a space to omit the language tag (the space will be trimmed
+///   from the output).
 ///   `````typst
-///   ```` This contains ```backticks``` and has no leading & trailing spaces. ````
+///   ```` This has no leading space.````
 ///   `````
+/// - Use more backticks to allow backticks in the raw text.
+///   `````typst
+///   ```` This contains ```backticks```.````
+///   `````
+///
+/// # Trimming
+/// If we would always render the raw text between the backticks exactly as
+/// given, some things would become cumbersome/impossible to write:
+/// - Typical multiline code blocks (like in the example above) would have an
+///   additional newline before and after the code.
+/// - Multi-line blocks would need to start with a space since a word would be
+///   interpreted as a language tag.
+/// - Text ending with a backtick would be impossible since the backtick would
+///   be interpreted as belonging to the closing backticks.
 ///
-///   # Trimming
-///   If we would always render the raw text between the backticks exactly as
-///   given, a few things would become problematic or even impossible:
-///   - Typical multiline code blocks (like in the example above) would have an
-///     additional newline before and after the code.
-///   - The first word of text wrapped in more than three backticks would always
-///     be interpreted as a language tag which means that text without leading
-///     space would be impossible.
-///   - A single backtick without surrounding spaces could not exist as raw text
-///     since it would be interpreted as belonging to the opening or closing
-///     backticks.
+/// To fix these problems, we sometimes trim a bit of space from blocks with 3+
+/// backticks:
+/// - At the start, we trim a single space or a sequence of whitespace followed
+///   by a newline.
+/// - At the end, we trim
+///   - a single space if the raw text ends with a backtick followed only by
+///     whitespace,
+///   - a newline followed by a sequence of whitespace.
 ///
-///   To fix these problems, we trim blocks with 3+ backticks as follows:
-///   - A single space or a sequence of whitespace followed by a newline at the start.
-///   - A single space or a newline followed by a sequence of whitespace at the end.
+/// You can thus produce a single backtick without surrounding spaces with the
+/// sequence ```` ``` ` ``` ````.
 ///
-///   With these rules, a single raw backtick can be produced by the sequence
-///   ```` ``` ` ``` ````, ```` ``` unhighlighted text ``` ```` has no
-///   surrounding spaces and multiline code blocks don't have extra empty lines.
-///   Note that you can always force leading or trailing whitespace simply by
-///   adding more spaces.
+/// Note that with these rules you can always force leading or trailing
+/// whitespace simply by adding more spaces.
 #[derive(Debug, Clone, PartialEq)]
 pub struct NodeRaw {
     /// An optional identifier specifying the language to syntax-highlight in.
@@ -129,28 +134,65 @@ pub struct NodeRaw {
     /// The lines of raw text, determined as the raw string between the
     /// backticks trimmed according to the above rules and split at newlines.
     pub lines: Vec<String>,
-    /// Whether the element can be layouted inline.
-    ///
-    /// - When true, it will be layouted integrated within the surrounding
-    ///   paragraph.
-    /// - When false, it will be separated into its own paragraph.
-    ///
-    /// Single-backtick blocks are always inline-level. Multi-backtick blocks
-    /// are inline-level when they contain no newlines.
-    pub inline: bool,
+    /// Whether the element is block-level, that is, it has 3+ backticks
+    /// and contains at least one newline.
+    pub block: bool,
 }
 
 impl Pretty for NodeRaw {
     fn pretty(&self, p: &mut Printer) {
-        p.push_str("`");
+        // Find out how many backticks we need.
+        let mut backticks = 1;
+
+        // Language tag and block-level are only possible with 3+ backticks.
+        if self.lang.is_some() || self.block {
+            backticks = 3;
+        }
+
+        // More backticks may be required if there are lots of consecutive
+        // backticks in the lines.
+        let mut count = 0;
+        for line in &self.lines {
+            for c in line.chars() {
+                if c == '`' {
+                    count += 1;
+                    backticks = backticks.max(3).max(count + 1);
+                } else {
+                    count = 0;
+                }
+            }
+        }
+
+        // Starting backticks.
+        for _ in 0 .. backticks {
+            p.push('`');
+        }
+
+        // Language tag.
         if let Some(lang) = &self.lang {
-            p.push_str(&lang);
-            p.push_str(" ");
+            lang.pretty(p);
+        }
+
+        // Start untrimming.
+        if self.block {
+            p.push('\n');
+        } else if backticks >= 3 {
+            p.push(' ');
         }
-        // TODO: Technically, we should handle backticks in the lines by
-        // wrapping with more backticks, and we should add space before the
-        // first and/or after the last line if necessary.
+
+        // The lines.
         p.join(&self.lines, "\n", |line, p| p.push_str(line));
-        p.push_str("`");
+
+        // End untrimming.
+        if self.block {
+            p.push('\n');
+        } else if self.lines.last().map_or(false, |line| line.trim_end().ends_with('`')) {
+            p.push(' ');
+        }
+
+        // Ending backticks.
+        for _ in 0 .. backticks {
+            p.push('`');
+        }
     }
 }
diff --git a/src/syntax/token.rs b/src/syntax/token.rs
index c4b9ec8f..5e69a350 100644
--- a/src/syntax/token.rs
+++ b/src/syntax/token.rs
@@ -170,9 +170,9 @@ pub struct TokenRaw<'s> {
 pub struct TokenMath<'s> {
     /// The formula between the dollars.
     pub formula: &'s str,
-    /// Whether the formula was surrounded by one dollar (true) or two dollars
-    /// (false).
-    pub inline: bool,
+    /// Whether the formula is display-level, that is, it is surrounded by
+    /// `$[..]`.
+    pub display: bool,
     /// Whether the closing dollars were present.
     pub terminated: bool,
 }
@@ -243,8 +243,8 @@ impl<'s> Token<'s> {
             Self::Bool(_) => "boolean",
             Self::Int(_) => "integer",
             Self::Float(_) => "float",
-            Self::Length(..) => "length",
-            Self::Angle(..) => "angle",
+            Self::Length(_, _) => "length",
+            Self::Angle(_, _) => "angle",
             Self::Percent(_) => "percentage",
             Self::Color(_) => "color",
             Self::Str(_) => "string",
author	Laurenz <laurmaedje@gmail.com>	2021-02-03 21:30:36 +0100
committer	Laurenz <laurmaedje@gmail.com>	2021-02-03 21:34:49 +0100
commit	d86a5e8a1f469dd79abf3137dba77a71fae2a774 (patch)
tree	fc7ab35d999322b9d124e41ab80948df23965d26 /src/syntax
parent	6fcef9973be4253e5b377251dd9d1921f9738fc1 (diff)