Span tests ↔

author: Laurenz <laurmaedje@gmail.com> 2020-01-11 11:47:07 +0100
committer: Laurenz <laurmaedje@gmail.com> 2020-01-11 11:47:07 +0100
commit: bd702c2029561a741f48095549a2b6ea97b3a09b (patch)
tree: 425d1c709bfec80ef043268cbe80eb728ded1225
parent: b1e956419d94a0c3876891b3d6a4976cc4a3ab09 (diff)
4 files changed, 106 insertions, 70 deletions
diff --git a/src/syntax/span.rs b/src/syntax/span.rs
index bbb6a206..9bf7cafb 100644
--- a/src/syntax/span.rs
+++ b/src/syntax/span.rs
@@ -71,7 +71,7 @@ debug_display!(Span);
 /// A line-column position in source code.
 #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
 pub struct Position {
-    /// The 1-indexed line (inclusive).
+    /// The 0-indexed line (inclusive).
     pub line: usize,
     /// The 0-indexed column (inclusive).
     pub column: usize,
diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs
index efcd1fc0..2e9dd35b 100644
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@@ -102,7 +102,7 @@ impl<'s> Iterator for Tokens<'s> {
             '*' if second == Some('/') => { self.eat(); StarSlash }
 
             // Whitespace.
-            c if c.is_whitespace() => self.parse_whitespace(c),
+            c if c.is_whitespace() => self.parse_whitespace(start),
 
             // Functions.
             '[' => { self.set_state(Header); LeftBracket }
@@ -196,20 +196,11 @@ impl<'s> Tokens<'s> {
         }, true, 0, -2))
     }
 
-    fn parse_whitespace(&mut self, c: char) -> Token<'s> {
-        let mut newlines = if is_newline_char(c) { 1 } else { 0 };
-        let mut last = c;
-
-        self.read_string_until(|n| {
-            if is_newline_char(n) && !(last == '\r' && n == '\n') {
-                newlines += 1;
-            }
-
-            last = n;
-            !n.is_whitespace()
-        }, false, 0, 0);
+    fn parse_whitespace(&mut self, start: Position) -> Token<'s> {
+        self.read_string_until(|n| !n.is_whitespace(), false, 0, 0);
+        let end = self.chars.position();
 
-        Whitespace(newlines)
+        Whitespace(end.line - start.line)
     }
 
     fn parse_string(&mut self) -> Token<'s> {
diff --git a/tests/parse.rs b/tests/parse.rs
index e00b05d8..14a5b22d 100644
--- a/tests/parse.rs
+++ b/tests/parse.rs
@@ -23,12 +23,31 @@ fn BOOL(b: bool) -> Token<'static> { E(Expr::Bool(b)) }
 
 /// Parses the test syntax.
 macro_rules! tokens {
-    ($($src:expr =>($line:expr)=> $tokens:expr)*) => ({
+    ($($task:ident $src:expr =>($line:expr)=> [$($target:tt)*])*) => ({
         #[allow(unused_mut)]
         let mut cases = Vec::new();
-        $(cases.push(($line, $src, $tokens.to_vec()));)*
+        $(cases.push(($line, $src, tokens!(@$task [$($target)*])));)*
         cases
     });
+
+    (@t $tokens:expr) => ({
+        Target::Tokenized($tokens.to_vec())
+    });
+
+    (@ts [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $t:expr)),* $(,)?]) => ({
+        Target::TokenizedSpanned(vec![
+            $(Spanned { v: $t, span: Span {
+                start: Position { line: $sl, column: $sc },
+                end:   Position { line: $el, column: $ec },
+            }}),*
+        ])
+    });
+}
+
+#[derive(Debug)]
+enum Target {
+    Tokenized(Vec<Token<'static>>),
+    TokenizedSpanned(Vec<Spanned<Token<'static>>>),
 }
 
 fn main() {
@@ -47,11 +66,11 @@ fn main() {
         let mut failed = 0;
 
         // Go through all tests in a test file.
-        for (line, src, expected) in cases.into_iter() {
-            let found: Vec<_> = tokenize(src).map(Spanned::value).collect();
+        for (line, src, target) in cases.into_iter() {
+            let (correct, expected, found) = test_case(src, target);
 
             // Check whether the tokenization works correctly.
-            if found == expected {
+            if correct {
                 okay += 1;
             } else {
                 if failed == 0 {
@@ -82,3 +101,17 @@ fn main() {
         std::process::exit(-1);
     }
 }
+
+fn test_case(src: &str, target: Target) -> (bool, String, String) {
+    match target {
+        Target::Tokenized(tokens) => {
+            let found: Vec<_> = tokenize(src).map(Spanned::value).collect();
+            (found == tokens, format!("{:?}", tokens), format!("{:?}", found))
+        }
+
+        Target::TokenizedSpanned(tokens) => {
+            let found: Vec<_> = tokenize(src).collect();
+            (found == tokens, format!("{:?}", tokens), format!("{:?}", found))
+        }
+    }
+}
diff --git a/tests/parsing/tokens.rs b/tests/parsing/tokens.rs
index 4f5474bb..78d891f9 100644
--- a/tests/parsing/tokens.rs
+++ b/tests/parsing/tokens.rs
@@ -1,62 +1,74 @@
 // Whitespace.
-""             => []
-" "            => [W(0)]
-"    "         => [W(0)]
-"\t"           => [W(0)]
-"  \t"         => [W(0)]
-"\n"           => [W(1)]
-"\n "          => [W(1)]
-"  \n"         => [W(1)]
-"  \n   "      => [W(1)]
-"  \n\t \n  "  => [W(2)]
-"\r\n"         => [W(1)]
-" \r\r\n \x0D" => [W(3)]
-"\n\r"         => [W(2)]
+t ""             => []
+t " "            => [W(0)]
+t "    "         => [W(0)]
+t "\t"           => [W(0)]
+t "  \t"         => [W(0)]
+t "\n"           => [W(1)]
+t "\n "          => [W(1)]
+t "  \n"         => [W(1)]
+t "  \n   "      => [W(1)]
+t "  \n\t \n  "  => [W(2)]
+t "\r\n"         => [W(1)]
+t " \r\r\n \x0D" => [W(3)]
+t "\n\r"         => [W(2)]
 
 // Comments.
-"a // bc\n "        => [T("a"), W(0), LC(" bc"), W(1)]
-"a //a//b\n "       => [T("a"), W(0), LC("a//b"), W(1)]
-"a //a//b\r\n"      => [T("a"), W(0), LC("a//b"), W(1)]
-"a //a//b\n\nhello" => [T("a"), W(0), LC("a//b"), W(2), T("hello")]
-"/**/"              => [BC("")]
-"_/*_/*a*/*/"       => [U, BC("_/*a*/")]
-"/*/*/"             => [BC("/*/")]
-"abc*/"             => [T("abc"), SS]
+t "a // bc\n "        => [T("a"), W(0), LC(" bc"), W(1)]
+t "a //a//b\n "       => [T("a"), W(0), LC("a//b"), W(1)]
+t "a //a//b\r\n"      => [T("a"), W(0), LC("a//b"), W(1)]
+t "a //a//b\n\nhello" => [T("a"), W(0), LC("a//b"), W(2), T("hello")]
+t "/**/"              => [BC("")]
+t "_/*_/*a*/*/"       => [U, BC("_/*a*/")]
+t "/*/*/"             => [BC("/*/")]
+t "abc*/"             => [T("abc"), SS]
 
 // Header only tokens.
-"["                   => [LB]
-"]"                   => [RB]
-"[(){}:=,]"           => [LB, LP, RP, LBR, RBR, CL, EQ, CM, RB]
-"[a:b]"               => [LB, ID("a"), CL, ID("b"), RB]
-"[🌓, 🌍,]"          => [LB, T("🌓"), CM, W(0), T("🌍"), CM, RB]
-"[=]"                 => [LB, EQ, RB]
-"[,]"                 => [LB, CM, RB]
-"a: b"                => [T("a"), T(":"), W(0), T("b")]
-"c=d, "               => [T("c"), T("=d"), T(","), W(0)]
-r#"["hello\"world"]"# => [LB, STR(r#"hello\"world"#), RB]
-r#"["hi", 12pt]"#     => [LB, STR("hi"), CM, W(0), SIZE(Size::pt(12.0)), RB]
-"\"hi\""              => [T("\"hi"), T("\"")]
-"[a: true, x=1]"      => [LB, ID("a"), CL, W(0), BOOL(true), CM, W(0),
+t "["                   => [LB]
+t "]"                   => [RB]
+t "[(){}:=,]"           => [LB, LP, RP, LBR, RBR, CL, EQ, CM, RB]
+t "[a:b]"               => [LB, ID("a"), CL, ID("b"), RB]
+t "[🌓, 🌍,]"           => [LB, T("🌓"), CM, W(0), T("🌍"), CM, RB]
+t "[=]"                 => [LB, EQ, RB]
+t "[,]"                 => [LB, CM, RB]
+t "a: b"                => [T("a"), T(":"), W(0), T("b")]
+t "c=d, "               => [T("c"), T("=d"), T(","), W(0)]
+t r#"["hello\"world"]"# => [LB, STR(r#"hello\"world"#), RB]
+t r#"["hi", 12pt]"#     => [LB, STR("hi"), CM, W(0), SIZE(Size::pt(12.0)), RB]
+t "\"hi\""              => [T("\"hi"), T("\"")]
+t "[a: true, x=1]"      => [LB, ID("a"), CL, W(0), BOOL(true), CM, W(0),
                           ID("x"), EQ, NUM(1.0), RB]
-"[120%]"              => [LB, NUM(1.2), RB]
+t "[120%]"              => [LB, NUM(1.2), RB]
 
 // Body only tokens.
-"_*`"           => [U, ST, B]
-"[_*`]"         => [LB, T("_"), T("*"), T("`"), RB]
-"hi_you_ there" => [T("hi"), U, T("you"), U, W(0), T("there")]
+t "_*`"           => [U, ST, B]
+t "[func]*bold*"  => [LB, ID("func"), RB, ST, T("bold"), ST]
+t "[_*`]"         => [LB, T("_"), T("*"), T("`"), RB]
+t "hi_you_ there" => [T("hi"), U, T("you"), U, W(0), T("there")]
 
 // Escapes.
-r"\["   => [T("[")]
-r"\]"   => [T("]")]
-r"\\"   => [T(r"\")]
-r"\/"   => [T("/")]
-r"\*"   => [T("*")]
-r"\_"   => [T("_")]
-r"\`"   => [T("`")]
+t r"\["   => [T("[")]
+t r"\]"   => [T("]")]
+t r"\\"   => [T(r"\")]
+t r"\/"   => [T("/")]
+t r"\*"   => [T("*")]
+t r"\_"   => [T("_")]
+t r"\`"   => [T("`")]
 
 // Unescapable special symbols.
-r"\:"   => [T(r"\"), T(":")]
-r"\="   => [T(r"\"), T("=")]
-r"[\:]" => [LB, T(r"\"), CL, RB]
-r"[\=]" => [LB, T(r"\"), EQ, RB]
-r"[\,]" => [LB, T(r"\"), CM, RB]
+t r"\:"   => [T(r"\"), T(":")]
+t r"\="   => [T(r"\"), T("=")]
+t r"[\:]" => [LB, T(r"\"), CL, RB]
+t r"[\=]" => [LB, T(r"\"), EQ, RB]
+t r"[\,]" => [LB, T(r"\"), CM, RB]
+
+// Spans
+ts "hello"           => [(0:0, 0:5, T("hello"))]
+ts "ab\r\nc"         => [(0:0, 0:2, T("ab")), (0:2, 1:0, W(1)), (1:0, 1:1, T("c"))]
+ts "[a=10]"          => [(0:0, 0:1, LB), (0:1, 0:2, ID("a")), (0:2, 0:3, EQ),
+                         (0:3, 0:5, NUM(10.0)), (0:5, 0:6, RB)]
+ts r#"[x = "(1)"]*"# => [(0:0, 0:1, LB), (0:1, 0:2, ID("x")), (0:2, 0:3, W(0)),
+                        (0:3, 0:4, EQ), (0:4, 0:5, W(0)), (0:5, 0:10, STR("(1)")),
+                        (0:10, 0:11, RB), (0:11, 0:12, ST)]
+ts "// ab\r\n\nf"    => [(0:0, 0:5, LC(" ab")), (0:5, 2:0, W(2)), (2:0, 2:1, T("f"))]
+ts "/*b*/_"          => [(0:0, 0:5, BC("b")), (0:5, 0:6, U)]
author	Laurenz <laurmaedje@gmail.com>	2020-01-11 11:47:07 +0100
committer	Laurenz <laurmaedje@gmail.com>	2020-01-11 11:47:07 +0100
commit	bd702c2029561a741f48095549a2b6ea97b3a09b (patch)
tree	425d1c709bfec80ef043268cbe80eb728ded1225
parent	b1e956419d94a0c3876891b3d6a4976cc4a3ab09 (diff)