summaryrefslogtreecommitdiff
path: root/src/syntax/tokens.rs
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2020-02-07 22:29:16 +0100
committerLaurenz <laurmaedje@gmail.com>2020-02-07 22:29:16 +0100
commit4e8359385f73e549a563fd356b6858050464991d (patch)
treeb613f3032e623f739b22b24ebf62704f5896ae36 /src/syntax/tokens.rs
parent02dc29d18a7b67edf0eaa5d125be22eec6cfebb7 (diff)
Improve syntax testing framework ♻
... and finally expand a few escape sequences in strings.
Diffstat (limited to 'src/syntax/tokens.rs')
-rw-r--r--src/syntax/tokens.rs106
1 files changed, 61 insertions, 45 deletions
diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs
index f4ea5daf..41acb94f 100644
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@@ -65,6 +65,10 @@ pub enum Token<'s> {
/// A quoted string in a function header: `"..."`.
ExprStr {
/// The string inside the quotes.
+ ///
+ /// _Note_: If the string contains escape sequences these are not yet
+ /// applied to be able to just store a string slice here instead of
+ /// a String. The escaping is done later in the parser.
string: &'s str,
/// Whether the closing quote was present.
terminated: bool
@@ -210,11 +214,13 @@ impl<'s> Iterator for Tokens<'s> {
// Expressions or just strings.
c => {
+ let body = self.mode == Body;
let text = self.read_string_until(|n| {
match n {
c if c.is_whitespace() => true,
- '\\' | '[' | ']' | '*' | '_' | '`' | ':' | '=' |
- ',' | '"' | '/' => true,
+ '\\' | '[' | ']' | '/' => true,
+ '*' | '_' | '`' if body => true,
+ ':' | '=' | ',' | '"' if !body => true,
_ => false,
}
}, false, -(c.len_utf8() as isize), 0).0;
@@ -441,18 +447,19 @@ pub fn is_newline_char(character: char) -> bool {
/// Whether this word is a valid identifier.
pub fn is_identifier(string: &str) -> bool {
- let mut chars = string.chars();
+ fn is_extra_allowed(c: char) -> bool {
+ c == '.' || c == '-' || c == '_'
+ }
+ let mut chars = string.chars();
match chars.next() {
- Some('-') => {}
- Some(c) if UnicodeXID::is_xid_start(c) => {}
+ Some(c) if UnicodeXID::is_xid_start(c) || is_extra_allowed(c) => {}
_ => return false,
}
while let Some(c) = chars.next() {
match c {
- '.' | '-' => {}
- c if UnicodeXID::is_xid_continue(c) => {}
+ c if UnicodeXID::is_xid_continue(c) || is_extra_allowed(c) => {}
_ => return false,
}
}
@@ -460,11 +467,10 @@ pub fn is_identifier(string: &str) -> bool {
true
}
-
#[cfg(test)]
mod tests {
+ use super::super::test::check;
use super::*;
-
use Token::{
Space as S,
LineComment as LC, BlockComment as BC,
@@ -481,32 +487,19 @@ mod tests {
/// Test whether the given string tokenizes into the given list of tokens.
macro_rules! t {
- ($m:expr, $s:expr => [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $t:expr)),* $(,)?]) => {
- let tokens = Tokens::new(Position::ZERO, $s, $m).collect::<Vec<_>>();
- assert_eq!(tokens, vec![$(Spanned {
- span: Span::new(Position::new($sl, $sc), Position::new($el, $ec)),
- v: $t
- }),*]);
-
- };
-
- ($m:expr, $s:expr => [$($t:expr),* $(,)?]) => {
- let tokens = Tokens::new(Position::ZERO, $s, $m)
- .map(Spanned::value)
- .collect::<Vec<_>>();
- assert_eq!(tokens, vec![$($t),*]);
- };
+ ($mode:expr, $source:expr => [$($tokens:tt)*]) => {
+ let (exp, spans) = spanned![vec $($tokens)*];
+ let found = Tokens::new(Position::ZERO, $source, $mode).collect::<Vec<_>>();
+ check($source, exp, found, spans);
+ }
}
- /// Parse a function token.
+ /// Write down a function token compactly.
macro_rules! func {
- ($header:expr, Some(($sl:tt:$sc:tt, $el:tt:$ec:tt, $body:expr)), $terminated:expr) => {
+ ($header:expr, Some($($tokens:tt)*), $terminated:expr) => {
Function {
header: $header,
- body: Some(Spanned {
- span: Span::new(Position::new($sl, $sc), Position::new($el, $ec)),
- v: $body,
- }),
+ body: Some(spanned![item $($tokens)*]),
terminated: $terminated,
}
};
@@ -542,40 +535,63 @@ mod tests {
t!(Body, "_/*_/*a*/*/" => [Underscore, BC("_/*a*/")]);
t!(Body, "/*/*/" => [BC("/*/")]);
t!(Body, "abc*/" => [T("abc"), Invalid("*/")]);
+ t!(Body, "/***/" => [BC("*")]);
+ t!(Body, "/**\\****/*/*/" => [BC("*\\***"), Invalid("*/"), Invalid("*/")]);
+ t!(Body, "/*abc" => [BC("abc")]);
+ }
+
+ #[test]
+ fn tokenize_body_only_tokens() {
+ t!(Body, "_*`" => [Underscore, Star, Backtick]);
+ t!(Body, "***" => [Star, Star, Star]);
+ t!(Body, "[func]*bold*" => [func!("func", None, true), Star, T("bold"), Star]);
+ t!(Body, "hi_you_ there" => [T("hi"), Underscore, T("you"), Underscore, S(0), T("there")]);
+ t!(Header, "_*`" => [Invalid("_*`")]);
}
#[test]
fn tokenize_header_only_tokens() {
- t!(Body, "\"hi\"" => [T("\"hi"), T("\"")]);
- t!(Body, "a: b" => [T("a"), T(":"), S(0), T("b")]);
- t!(Body, "c=d, " => [T("c"), T("=d"), T(","), S(0)]);
- t!(Header, "[" => [func!("", None, false)]);
- t!(Header, "]" => [Invalid("]")]);
+ t!(Body, "a: b" => [T("a:"), S(0), T("b")]);
+ t!(Body, "c=d, " => [T("c=d,"), S(0)]);
t!(Header, "(){}:=," => [LP, RP, LB, RB, Colon, Equals, Comma]);
t!(Header, "a:b" => [Id("a"), Colon, Id("b")]);
- t!(Header, "=" => [Equals]);
- t!(Header, "," => [Comma]);
- t!(Header, r#""hello\"world""# => [Str(r#"hello\"world"#, true)]);
- t!(Header, r#""hi", 12pt"# => [Str("hi", true), Comma, S(0), ExprSize(Size::pt(12.0))]);
t!(Header, "a: true, x=1" => [Id("a"), Colon, S(0), Bool(true), Comma, S(0), Id("x"), Equals, Num(1.0)]);
+ t!(Header, "=3.14" => [Equals, Num(3.14)]);
+ t!(Header, "12.3e5" => [Num(12.3e5)]);
t!(Header, "120%" => [Num(1.2)]);
+ t!(Header, "12e4%" => [Num(1200.0)]);
+ t!(Header, "__main__" => [Id("__main__")]);
+ t!(Header, ".func.box" => [Id(".func.box")]);
+ t!(Header, "--arg, _b, _1" => [Id("--arg"), Comma, S(0), Id("_b"), Comma, S(0), Id("_1")]);
+ t!(Header, "12_pt, 12pt" => [Invalid("12_pt"), Comma, S(0), ExprSize(Size::pt(12.0))]);
+ t!(Header, "1e5in" => [ExprSize(Size::inches(100000.0))]);
+ t!(Header, "2.3cm" => [ExprSize(Size::cm(2.3))]);
+ t!(Header, "02.4mm" => [ExprSize(Size::mm(2.4))]);
+ t!(Header, "2.4.cm" => [Invalid("2.4.cm")]);
t!(Header, "🌓, 🌍," => [Invalid("🌓"), Comma, S(0), Invalid("🌍"), Comma]);
}
#[test]
- fn tokenize_body_only_tokens() {
- t!(Body, "_*`" => [Underscore, Star, Backtick]);
- t!(Body, "[func]*bold*" => [func!("func", None, true), Star, T("bold"), Star]);
- t!(Body, "hi_you_ there" => [T("hi"), Underscore, T("you"), Underscore, S(0), T("there")]);
- t!(Header, "_*`" => [Invalid("_"), Invalid("*"), Invalid("`")]);
+ fn tokenize_strings() {
+ t!(Body, "a \"hi\" string" => [T("a"), S(0), T("\"hi\""), S(0), T("string")]);
+ t!(Header, "\"hello" => [Str("hello", false)]);
+ t!(Header, "\"hello world\"" => [Str("hello world", true)]);
+ t!(Header, "\"hello\nworld\"" => [Str("hello\nworld", true)]);
+ t!(Header, r#"1"hello\nworld"false"# => [Num(1.0), Str("hello\\nworld", true), Bool(false)]);
+ t!(Header, r#""a\"bc""# => [Str(r#"a\"bc"#, true)]);
+ t!(Header, r#""a\\"bc""# => [Str(r#"a\\"#, true), Id("bc"), Str("", false)]);
+ t!(Header, r#""a\tbc"# => [Str("a\\tbc", false)]);
+ t!(Header, "\"🌎\"" => [Str("🌎", true)]);
}
#[test]
- fn tokenize_nested_functions() {
+ fn tokenize_functions() {
t!(Body, "[f: [=][*]]" => [func!("f: [=][*]", None, true)]);
t!(Body, "[_][[,],]," => [func!("_", Some((0:3, 0:9, "[,],")), true), T(",")]);
t!(Body, "[=][=][=]" => [func!("=", Some((0:3, 0:6, "=")), true), func!("=", None, true)]);
t!(Body, "[=][[=][=][=]]" => [func!("=", Some((0:3, 0:14, "[=][=][=]")), true)]);
+ t!(Header, "[" => [func!("", None, false)]);
+ t!(Header, "]" => [Invalid("]")]);
}
#[test]