summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2020-01-11 11:47:07 +0100
committerLaurenz <laurmaedje@gmail.com>2020-01-11 11:47:07 +0100
commitbd702c2029561a741f48095549a2b6ea97b3a09b (patch)
tree425d1c709bfec80ef043268cbe80eb728ded1225
parentb1e956419d94a0c3876891b3d6a4976cc4a3ab09 (diff)
Span tests ↔
-rw-r--r--src/syntax/span.rs2
-rw-r--r--src/syntax/tokens.rs19
-rw-r--r--tests/parse.rs43
-rw-r--r--tests/parsing/tokens.rs112
4 files changed, 106 insertions, 70 deletions
diff --git a/src/syntax/span.rs b/src/syntax/span.rs
index bbb6a206..9bf7cafb 100644
--- a/src/syntax/span.rs
+++ b/src/syntax/span.rs
@@ -71,7 +71,7 @@ debug_display!(Span);
/// A line-column position in source code.
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Position {
- /// The 1-indexed line (inclusive).
+ /// The 0-indexed line (inclusive).
pub line: usize,
/// The 0-indexed column (inclusive).
pub column: usize,
diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs
index efcd1fc0..2e9dd35b 100644
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@@ -102,7 +102,7 @@ impl<'s> Iterator for Tokens<'s> {
'*' if second == Some('/') => { self.eat(); StarSlash }
// Whitespace.
- c if c.is_whitespace() => self.parse_whitespace(c),
+ c if c.is_whitespace() => self.parse_whitespace(start),
// Functions.
'[' => { self.set_state(Header); LeftBracket }
@@ -196,20 +196,11 @@ impl<'s> Tokens<'s> {
}, true, 0, -2))
}
- fn parse_whitespace(&mut self, c: char) -> Token<'s> {
- let mut newlines = if is_newline_char(c) { 1 } else { 0 };
- let mut last = c;
-
- self.read_string_until(|n| {
- if is_newline_char(n) && !(last == '\r' && n == '\n') {
- newlines += 1;
- }
-
- last = n;
- !n.is_whitespace()
- }, false, 0, 0);
+ fn parse_whitespace(&mut self, start: Position) -> Token<'s> {
+ self.read_string_until(|n| !n.is_whitespace(), false, 0, 0);
+ let end = self.chars.position();
- Whitespace(newlines)
+ Whitespace(end.line - start.line)
}
fn parse_string(&mut self) -> Token<'s> {
diff --git a/tests/parse.rs b/tests/parse.rs
index e00b05d8..14a5b22d 100644
--- a/tests/parse.rs
+++ b/tests/parse.rs
@@ -23,12 +23,31 @@ fn BOOL(b: bool) -> Token<'static> { E(Expr::Bool(b)) }
/// Parses the test syntax.
macro_rules! tokens {
- ($($src:expr =>($line:expr)=> $tokens:expr)*) => ({
+ ($($task:ident $src:expr =>($line:expr)=> [$($target:tt)*])*) => ({
#[allow(unused_mut)]
let mut cases = Vec::new();
- $(cases.push(($line, $src, $tokens.to_vec()));)*
+ $(cases.push(($line, $src, tokens!(@$task [$($target)*])));)*
cases
});
+
+ (@t $tokens:expr) => ({
+ Target::Tokenized($tokens.to_vec())
+ });
+
+ (@ts [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $t:expr)),* $(,)?]) => ({
+ Target::TokenizedSpanned(vec![
+ $(Spanned { v: $t, span: Span {
+ start: Position { line: $sl, column: $sc },
+ end: Position { line: $el, column: $ec },
+ }}),*
+ ])
+ });
+}
+
+#[derive(Debug)]
+enum Target {
+ Tokenized(Vec<Token<'static>>),
+ TokenizedSpanned(Vec<Spanned<Token<'static>>>),
}
fn main() {
@@ -47,11 +66,11 @@ fn main() {
let mut failed = 0;
// Go through all tests in a test file.
- for (line, src, expected) in cases.into_iter() {
- let found: Vec<_> = tokenize(src).map(Spanned::value).collect();
+ for (line, src, target) in cases.into_iter() {
+ let (correct, expected, found) = test_case(src, target);
// Check whether the tokenization works correctly.
- if found == expected {
+ if correct {
okay += 1;
} else {
if failed == 0 {
@@ -82,3 +101,17 @@ fn main() {
std::process::exit(-1);
}
}
+
+fn test_case(src: &str, target: Target) -> (bool, String, String) {
+ match target {
+ Target::Tokenized(tokens) => {
+ let found: Vec<_> = tokenize(src).map(Spanned::value).collect();
+ (found == tokens, format!("{:?}", tokens), format!("{:?}", found))
+ }
+
+ Target::TokenizedSpanned(tokens) => {
+ let found: Vec<_> = tokenize(src).collect();
+ (found == tokens, format!("{:?}", tokens), format!("{:?}", found))
+ }
+ }
+}
diff --git a/tests/parsing/tokens.rs b/tests/parsing/tokens.rs
index 4f5474bb..78d891f9 100644
--- a/tests/parsing/tokens.rs
+++ b/tests/parsing/tokens.rs
@@ -1,62 +1,74 @@
// Whitespace.
-"" => []
-" " => [W(0)]
-" " => [W(0)]
-"\t" => [W(0)]
-" \t" => [W(0)]
-"\n" => [W(1)]
-"\n " => [W(1)]
-" \n" => [W(1)]
-" \n " => [W(1)]
-" \n\t \n " => [W(2)]
-"\r\n" => [W(1)]
-" \r\r\n \x0D" => [W(3)]
-"\n\r" => [W(2)]
+t "" => []
+t " " => [W(0)]
+t " " => [W(0)]
+t "\t" => [W(0)]
+t " \t" => [W(0)]
+t "\n" => [W(1)]
+t "\n " => [W(1)]
+t " \n" => [W(1)]
+t " \n " => [W(1)]
+t " \n\t \n " => [W(2)]
+t "\r\n" => [W(1)]
+t " \r\r\n \x0D" => [W(3)]
+t "\n\r" => [W(2)]
// Comments.
-"a // bc\n " => [T("a"), W(0), LC(" bc"), W(1)]
-"a //a//b\n " => [T("a"), W(0), LC("a//b"), W(1)]
-"a //a//b\r\n" => [T("a"), W(0), LC("a//b"), W(1)]
-"a //a//b\n\nhello" => [T("a"), W(0), LC("a//b"), W(2), T("hello")]
-"/**/" => [BC("")]
-"_/*_/*a*/*/" => [U, BC("_/*a*/")]
-"/*/*/" => [BC("/*/")]
-"abc*/" => [T("abc"), SS]
+t "a // bc\n " => [T("a"), W(0), LC(" bc"), W(1)]
+t "a //a//b\n " => [T("a"), W(0), LC("a//b"), W(1)]
+t "a //a//b\r\n" => [T("a"), W(0), LC("a//b"), W(1)]
+t "a //a//b\n\nhello" => [T("a"), W(0), LC("a//b"), W(2), T("hello")]
+t "/**/" => [BC("")]
+t "_/*_/*a*/*/" => [U, BC("_/*a*/")]
+t "/*/*/" => [BC("/*/")]
+t "abc*/" => [T("abc"), SS]
// Header only tokens.
-"[" => [LB]
-"]" => [RB]
-"[(){}:=,]" => [LB, LP, RP, LBR, RBR, CL, EQ, CM, RB]
-"[a:b]" => [LB, ID("a"), CL, ID("b"), RB]
-"[🌓, 🌍,]" => [LB, T("🌓"), CM, W(0), T("🌍"), CM, RB]
-"[=]" => [LB, EQ, RB]
-"[,]" => [LB, CM, RB]
-"a: b" => [T("a"), T(":"), W(0), T("b")]
-"c=d, " => [T("c"), T("=d"), T(","), W(0)]
-r#"["hello\"world"]"# => [LB, STR(r#"hello\"world"#), RB]
-r#"["hi", 12pt]"# => [LB, STR("hi"), CM, W(0), SIZE(Size::pt(12.0)), RB]
-"\"hi\"" => [T("\"hi"), T("\"")]
-"[a: true, x=1]" => [LB, ID("a"), CL, W(0), BOOL(true), CM, W(0),
+t "[" => [LB]
+t "]" => [RB]
+t "[(){}:=,]" => [LB, LP, RP, LBR, RBR, CL, EQ, CM, RB]
+t "[a:b]" => [LB, ID("a"), CL, ID("b"), RB]
+t "[🌓, 🌍,]" => [LB, T("🌓"), CM, W(0), T("🌍"), CM, RB]
+t "[=]" => [LB, EQ, RB]
+t "[,]" => [LB, CM, RB]
+t "a: b" => [T("a"), T(":"), W(0), T("b")]
+t "c=d, " => [T("c"), T("=d"), T(","), W(0)]
+t r#"["hello\"world"]"# => [LB, STR(r#"hello\"world"#), RB]
+t r#"["hi", 12pt]"# => [LB, STR("hi"), CM, W(0), SIZE(Size::pt(12.0)), RB]
+t "\"hi\"" => [T("\"hi"), T("\"")]
+t "[a: true, x=1]" => [LB, ID("a"), CL, W(0), BOOL(true), CM, W(0),
ID("x"), EQ, NUM(1.0), RB]
-"[120%]" => [LB, NUM(1.2), RB]
+t "[120%]" => [LB, NUM(1.2), RB]
// Body only tokens.
-"_*`" => [U, ST, B]
-"[_*`]" => [LB, T("_"), T("*"), T("`"), RB]
-"hi_you_ there" => [T("hi"), U, T("you"), U, W(0), T("there")]
+t "_*`" => [U, ST, B]
+t "[func]*bold*" => [LB, ID("func"), RB, ST, T("bold"), ST]
+t "[_*`]" => [LB, T("_"), T("*"), T("`"), RB]
+t "hi_you_ there" => [T("hi"), U, T("you"), U, W(0), T("there")]
// Escapes.
-r"\[" => [T("[")]
-r"\]" => [T("]")]
-r"\\" => [T(r"\")]
-r"\/" => [T("/")]
-r"\*" => [T("*")]
-r"\_" => [T("_")]
-r"\`" => [T("`")]
+t r"\[" => [T("[")]
+t r"\]" => [T("]")]
+t r"\\" => [T(r"\")]
+t r"\/" => [T("/")]
+t r"\*" => [T("*")]
+t r"\_" => [T("_")]
+t r"\`" => [T("`")]
// Unescapable special symbols.
-r"\:" => [T(r"\"), T(":")]
-r"\=" => [T(r"\"), T("=")]
-r"[\:]" => [LB, T(r"\"), CL, RB]
-r"[\=]" => [LB, T(r"\"), EQ, RB]
-r"[\,]" => [LB, T(r"\"), CM, RB]
+t r"\:" => [T(r"\"), T(":")]
+t r"\=" => [T(r"\"), T("=")]
+t r"[\:]" => [LB, T(r"\"), CL, RB]
+t r"[\=]" => [LB, T(r"\"), EQ, RB]
+t r"[\,]" => [LB, T(r"\"), CM, RB]
+
+// Spans
+ts "hello" => [(0:0, 0:5, T("hello"))]
+ts "ab\r\nc" => [(0:0, 0:2, T("ab")), (0:2, 1:0, W(1)), (1:0, 1:1, T("c"))]
+ts "[a=10]" => [(0:0, 0:1, LB), (0:1, 0:2, ID("a")), (0:2, 0:3, EQ),
+ (0:3, 0:5, NUM(10.0)), (0:5, 0:6, RB)]
+ts r#"[x = "(1)"]*"# => [(0:0, 0:1, LB), (0:1, 0:2, ID("x")), (0:2, 0:3, W(0)),
+ (0:3, 0:4, EQ), (0:4, 0:5, W(0)), (0:5, 0:10, STR("(1)")),
+ (0:10, 0:11, RB), (0:11, 0:12, ST)]
+ts "// ab\r\n\nf" => [(0:0, 0:5, LC(" ab")), (0:5, 2:0, W(2)), (2:0, 2:1, T("f"))]
+ts "/*b*/_" => [(0:0, 0:5, BC("b")), (0:5, 0:6, U)]