Tidy up raw blocks 🧹

- Better trimming (only trim at the end if necessary) - Fixed block-level layouting - Improved pretty printing - Flip inline variable to block - Flip inline variable to display for math formulas
author: Laurenz <laurmaedje@gmail.com> 2021-02-03 21:30:36 +0100
committer: Laurenz <laurmaedje@gmail.com> 2021-02-03 21:34:49 +0100
commit: d86a5e8a1f469dd79abf3137dba77a71fae2a774 (patch)
tree: fc7ab35d999322b9d124e41ab80948df23965d26 /src/parse
parent: 6fcef9973be4253e5b377251dd9d1921f9738fc1 (diff)
4 files changed, 60 insertions, 48 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 3fc7d483..3fd2cca5 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -59,14 +59,14 @@ fn node(p: &mut Parser, at_start: &mut bool) -> Option<Node> {
         Token::Underscore => Node::Emph,
         Token::Eq => {
             if *at_start {
-                return Some(Node::Heading(heading(p)));
+                return Some(heading(p));
             } else {
-                Node::Text(p.get(p.peek_span()).into())
+                Node::Text(p.peek_src().into())
             }
         }
         Token::Tilde => Node::Text("\u{00A0}".into()),
         Token::Backslash => Node::Linebreak,
-        Token::Raw(t) => Node::Raw(raw(p, t)),
+        Token::Raw(t) => raw(p, t),
         Token::UnicodeEscape(t) => Node::Text(unicode_escape(p, t)),
 
         // Keywords.
@@ -122,7 +122,7 @@ fn node(p: &mut Parser, at_start: &mut bool) -> Option<Node> {
 }
 
 /// Parse a heading.
-fn heading(p: &mut Parser) -> NodeHeading {
+fn heading(p: &mut Parser) -> Node {
     // Count depth.
     let mut level = p.span(|p| {
         p.assert(&[Token::Eq]);
@@ -147,16 +147,16 @@ fn heading(p: &mut Parser) -> NodeHeading {
         }
     }
 
-    NodeHeading { level, contents }
+    Node::Heading(NodeHeading { level, contents })
 }
 
 /// Handle a raw block.
-fn raw(p: &mut Parser, token: TokenRaw) -> NodeRaw {
+fn raw(p: &mut Parser, token: TokenRaw) -> Node {
     let raw = resolve::resolve_raw(token.text, token.backticks);
     if !token.terminated {
         p.diag(error!(p.peek_span().end, "expected backtick(s)"));
     }
-    raw
+    Node::Raw(raw)
 }
 
 /// Handle a unicode escape sequence.
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index b7767772..986a36b0 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -243,6 +243,11 @@ impl<'s> Parser<'s> {
         )
     }
 
+    /// Peek at the source of the next token.
+    pub fn peek_src(&self) -> &'s str {
+        self.get(self.peek_span())
+    }
+
     /// Checks whether the next token fulfills a condition.
     ///
     /// Returns `false` if there is no next token.
diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs
index 3adbf11f..a5e831da 100644
--- a/src/parse/resolve.rs
+++ b/src/parse/resolve.rs
@@ -54,13 +54,13 @@ pub fn resolve_raw(text: &str, backticks: usize) -> NodeRaw {
         NodeRaw {
             lang: Ident::new(tag),
             lines,
-            inline: !had_newline,
+            block: had_newline,
         }
     } else {
         NodeRaw {
             lang: None,
             lines: split_lines(text),
-            inline: true,
+            block: false,
         }
     }
 }
@@ -77,10 +77,14 @@ fn split_at_lang_tag(raw: &str) -> (&str, &str) {
 /// Trim raw text and splits it into lines.
 ///
 /// Returns whether at least one newline was contained in `raw`.
-fn trim_and_split_raw(raw: &str) -> (Vec<String>, bool) {
-    // Trims one whitespace at end and start.
-    let raw = raw.strip_prefix(' ').unwrap_or(raw);
-    let raw = raw.strip_suffix(' ').unwrap_or(raw);
+fn trim_and_split_raw(mut raw: &str) -> (Vec<String>, bool) {
+    // Trims one space at the start.
+    raw = raw.strip_prefix(' ').unwrap_or(raw);
+
+    // Trim one space at the end if the last non-whitespace char is a backtick.
+    if raw.trim_end().ends_with('`') {
+        raw = raw.strip_suffix(' ').unwrap_or(raw);
+    }
 
     let mut lines = split_lines(raw);
     let had_newline = lines.len() > 1;
@@ -167,29 +171,29 @@ mod tests {
             backticks: usize,
             lang: Option<&str>,
             lines: &[&str],
-            inline: bool,
+            block: bool,
         ) {
             assert_eq!(resolve_raw(raw, backticks), NodeRaw {
                 lang: lang.map(|id| Ident(id.into())),
                 lines: lines.iter().map(ToString::to_string).collect(),
-                inline,
+                block,
             });
         }
 
         // Just one backtick.
-        test("py",     1, None, &["py"],     true);
-        test("1\n2",   1, None, &["1", "2"], true);
-        test("1\r\n2", 1, None, &["1", "2"], true);
+        test("py",     1, None, &["py"],     false);
+        test("1\n2",   1, None, &["1", "2"], false);
+        test("1\r\n2", 1, None, &["1", "2"], false);
 
         // More than one backtick with lang tag.
-        test("js alert()",     2, Some("js"), &["alert()"],        true);
-        test("py quit(\n\n) ", 3, Some("py"), &["quit(", "", ")"], false);
-        test("♥",              2, None,       &[],                 true);
+        test("js alert()",     2, Some("js"), &["alert()"],        false);
+        test("py quit(\n\n)",  3, Some("py"), &["quit(", "", ")"], true);
+        test("♥",              2, None,       &[],                 false);
 
         // Trimming of whitespace (tested more thoroughly in separate test).
-        test(" a",   2, None, &["a"],  true);
-        test("  a",  2, None, &[" a"], true);
-        test(" \na", 2, None, &["a"],  false);
+        test(" a",   2, None, &["a"],  false);
+        test("  a",  2, None, &[" a"], false);
+        test(" \na", 2, None, &["a"],  true);
     }
 
     #[test]
@@ -203,8 +207,11 @@ mod tests {
         test("  hi",         vec![" hi"]);
         test("\nhi",         vec!["hi"]);
         test("    \n hi",    vec![" hi"]);
-        test("hi ",          vec!["hi"]);
-        test("hi  ",         vec!["hi "]);
+        test("hi` ",         vec!["hi`"]);
+        test("hi`  ",        vec!["hi` "]);
+        test("hi`   ",       vec!["hi`  "]);
+        test("hi ",          vec!["hi "]);
+        test("hi  ",         vec!["hi  "]);
         test("hi\n",         vec!["hi"]);
         test("hi \n   ",     vec!["hi "]);
         test("  \n hi \n  ", vec![" hi "]);
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 405352c3..e3550707 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -254,22 +254,22 @@ impl<'s> Tokens<'s> {
     }
 
     fn math(&mut self) -> Token<'s> {
-        let mut inline = true;
+        let mut display = false;
         if self.s.eat_if('[') {
-            inline = false;
+            display = true;
         }
 
         let start = self.s.index();
 
         let mut escaped = false;
-        let mut dollar = inline;
+        let mut dollar = !display;
 
         let terminated = loop {
             match self.s.eat() {
                 Some('$') if !escaped && dollar => break true,
                 Some(']') if !escaped => dollar = true,
                 Some(c) => {
-                    dollar = inline;
+                    dollar = !display;
                     escaped = c == '\\' && !escaped;
                 }
                 None => break false,
@@ -277,15 +277,15 @@ impl<'s> Tokens<'s> {
         };
 
         let end = self.s.index()
-            - match (terminated, inline) {
+            - match (terminated, display) {
                 (false, _) => 0,
-                (true, true) => 1,
-                (true, false) => 2,
+                (true, false) => 1,
+                (true, true) => 2,
             };
 
         Token::Math(TokenMath {
             formula: self.s.get(start .. end),
-            inline,
+            display,
             terminated,
         })
     }
@@ -470,8 +470,8 @@ mod tests {
         Token::Raw(TokenRaw { text, backticks, terminated })
     }
 
-    const fn Math(formula: &str, inline: bool, terminated: bool) -> Token {
-        Token::Math(TokenMath { formula, inline, terminated })
+    const fn Math(formula: &str, display: bool, terminated: bool) -> Token {
+        Token::Math(TokenMath { formula, display, terminated })
     }
 
     const fn UnicodeEscape(sequence: &str, terminated: bool) -> Token {
@@ -527,7 +527,7 @@ mod tests {
         ('/', None, "//", LineComment("")),
         ('/', None, "/**/", BlockComment("")),
         ('/', Some(Markup), "*", Star),
-        ('/', Some(Markup), "$ $", Math(" ", true, true)),
+        ('/', Some(Markup), "$ $", Math(" ", false, true)),
         ('/', Some(Markup), r"\\", Text(r"\")),
         ('/', Some(Markup), "#let", Let),
         ('/', Some(Code), "#if", If),
@@ -752,21 +752,21 @@ mod tests {
     #[test]
     fn test_tokenize_math_formulas() {
         // Test basic formula.
-        t!(Markup: "$$"        => Math("", true, true));
-        t!(Markup: "$x$"       => Math("x", true, true));
-        t!(Markup: r"$\\$"     => Math(r"\\", true, true));
-        t!(Markup: "$[x + y]$" => Math("x + y", false, true));
-        t!(Markup: r"$[\\]$"   => Math(r"\\", false, true));
+        t!(Markup: "$$"        => Math("", false, true));
+        t!(Markup: "$x$"       => Math("x", false, true));
+        t!(Markup: r"$\\$"     => Math(r"\\", false, true));
+        t!(Markup: "$[x + y]$" => Math("x + y", true, true));
+        t!(Markup: r"$[\\]$"   => Math(r"\\", true, true));
 
         // Test unterminated.
-        t!(Markup[""]: "$x"      => Math("x", true, false));
-        t!(Markup[""]: "$[x"     => Math("x", false, false));
-        t!(Markup[""]: "$[x]\n$" => Math("x]\n$", false, false));
+        t!(Markup[""]: "$x"      => Math("x", false, false));
+        t!(Markup[""]: "$[x"     => Math("x", true, false));
+        t!(Markup[""]: "$[x]\n$" => Math("x]\n$", true, false));
 
         // Test escape sequences.
-        t!(Markup: r"$\$x$"       => Math(r"\$x", true, true));
-        t!(Markup: r"$[\\\]$]$"   => Math(r"\\\]$", false, true));
-        t!(Markup[""]: r"$[ ]\\$" => Math(r" ]\\$", false, false));
+        t!(Markup: r"$\$x$"       => Math(r"\$x", false, true));
+        t!(Markup: r"$[\\\]$]$"   => Math(r"\\\]$", true, true));
+        t!(Markup[""]: r"$[ ]\\$" => Math(r" ]\\$", true, false));
     }
 
     #[test]
author	Laurenz <laurmaedje@gmail.com>	2021-02-03 21:30:36 +0100
committer	Laurenz <laurmaedje@gmail.com>	2021-02-03 21:34:49 +0100
commit	d86a5e8a1f469dd79abf3137dba77a71fae2a774 (patch)
tree	fc7ab35d999322b9d124e41ab80948df23965d26 /src/parse
parent	6fcef9973be4253e5b377251dd9d1921f9738fc1 (diff)