diff options
| author | Laurenz <laurmaedje@gmail.com> | 2021-01-10 17:52:37 +0100 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2021-01-10 17:52:37 +0100 |
| commit | 9eac62c31a0f75c224cf4d6926e505cf02eafcde (patch) | |
| tree | f1630af3373ce30425ddbc590c06f9419aaa8113 /src/parse | |
| parent | 3b2a28ca8edf61cb1376a095be36c7d006c92d76 (diff) | |
Add lots of new tokens πͺ
Diffstat (limited to 'src/parse')
| -rw-r--r-- | src/parse/mod.rs | 10 | ||||
| -rw-r--r-- | src/parse/resolve.rs | 3 | ||||
| -rw-r--r-- | src/parse/tests.rs | 22 | ||||
| -rw-r--r-- | src/parse/tokens.rs | 725 |
4 files changed, 449 insertions, 311 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 150b5ed1..c03cb63d 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -75,7 +75,7 @@ fn node(p: &mut Parser, at_start: bool) -> Option<Node> { Token::Underscore => Node::Emph, Token::Tilde => Node::Text("\u{00A0}".into()), Token::Backslash => Node::Linebreak, - Token::Hashtag => { + Token::Hash => { if at_start { return Some(Node::Heading(heading(p))); } else { @@ -98,10 +98,10 @@ fn node(p: &mut Parser, at_start: bool) -> Option<Node> { fn heading(p: &mut Parser) -> NodeHeading { // Count hashtags. let mut level = p.span(|p| { - p.eat_assert(Token::Hashtag); + p.eat_assert(Token::Hash); let mut level = 0u8; - while p.eat_if(Token::Hashtag) { + while p.eat_if(Token::Hash) { level = level.saturating_add(1); } level @@ -240,7 +240,7 @@ fn bracket_body(p: &mut Parser) -> Tree { fn expr(p: &mut Parser) -> Option<Expr> { binops(p, term, |token| match token { Token::Plus => Some(BinOp::Add), - Token::Hyphen => Some(BinOp::Sub), + Token::Hyph => Some(BinOp::Sub), _ => None, }) } @@ -282,7 +282,7 @@ fn binops( /// Parse a factor of the form `-?value`. fn factor(p: &mut Parser) -> Option<Expr> { let op = |token| match token { - Token::Hyphen => Some(UnOp::Neg), + Token::Hyph => Some(UnOp::Neg), _ => None, }; diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index c4afc430..3adbf11f 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -17,6 +17,7 @@ pub fn resolve_string(string: &str) -> String { Some('\\') => out.push('\\'), Some('"') => out.push('"'), Some('n') => out.push('\n'), + Some('r') => out.push('\r'), Some('t') => out.push('\t'), Some('u') if s.eat_if('{') => { // TODO: Feedback if closing brace is missing. @@ -137,7 +138,7 @@ mod tests { test(r#"av\u{6797"#, "avζ"); test(r#"a\\"#, "a\\"); test(r#"a\\\nbc"#, "a\\\nbc"); - test(r#"a\tbc"#, "a\tbc"); + test(r#"a\t\r\nbc"#, "a\t\r\nbc"); test(r"π", "π"); test(r"π\", r"π\"); test(r"\π", r"\π"); diff --git a/src/parse/tests.rs b/src/parse/tests.rs index 833d6661..fd8c63ca 100644 --- a/src/parse/tests.rs +++ b/src/parse/tests.rs @@ -226,25 +226,31 @@ fn test_parse_simple_nodes() { #[test] fn test_parse_headings() { // Basics with spans. - t!("#a" - nodes: [S(0..2, Heading(S(0..1, 0), Content![@S(1..2, Text("a"))]))], + t!("# a" + nodes: [S(0..3, Heading(S(0..1, 0), Content![ + @S(1..2, Space), S(2..3, Text("a")) + ]))], spans: true); // Multiple hashtags. - t!("###three" Heading(2, Content![@Text("three")])); + t!("### three" Heading(2, Content![@Space, Text("three")])); t!("###### six" Heading(5, Content![@Space, Text("six")])); // Start of heading. t!("/**/#" Heading(0, Content![@])); - t!("[f][#ok]" Call!("f", Args![Content![Heading(0, Content![@Text("ok")])]])); + t!("[f][# ok]" Call!("f", Args![Content![Heading(0, Content![ + @Space, Text("ok") + ])]])); // End of heading. - t!("#a\nb" Heading(0, Content![@Text("a")]), Space, Text("b")); + t!("# a\nb" Heading(0, Content![@Space, Text("a")]), Space, Text("b")); // Continued heading. - t!("#a{\n1\n}b" Heading(0, Content![@Text("a"), Block(Int(1)), Text("b")])); - t!("#a[f][\n\n]d" Heading(0, Content![@ - Text("a"), Call!("f", Args![Content![Parbreak]]), Text("d"), + t!("# a{\n1\n}b" Heading(0, Content![ + @Space, Text("a"), Block(Int(1)), Text("b") + ])); + t!("# a[f][\n\n]d" Heading(0, Content![@ + Space, Text("a"), Call!("f", Args![Content![Parbreak]]), Text("d"), ])); // No heading. diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 1e49d1c6..7f162b4c 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -1,7 +1,7 @@ use std::fmt::{self, Debug, Formatter}; use super::{is_newline, Scanner}; -use crate::geom::LengthUnit; +use crate::geom::{AngularUnit, LengthUnit}; use crate::syntax::*; use TokenMode::*; @@ -62,20 +62,20 @@ impl<'s> Iterator for Tokens<'s> { loop { // Common elements. return Some(match c { + // Functions and blocks. + '[' => Token::LeftBracket, + ']' => Token::RightBracket, + '{' => Token::LeftBrace, + '}' => Token::RightBrace, + // Whitespace. - c if c.is_whitespace() => self.whitespace(c, start), + c if c.is_whitespace() => self.whitespace(c), // Comments. '/' if self.s.eat_if('/') => self.line_comment(), '/' if self.s.eat_if('*') => self.block_comment(), '*' if self.s.eat_if('/') => Token::Invalid(self.s.eaten_from(start)), - // Functions and blocks. - '[' => Token::LeftBracket, - ']' => Token::RightBracket, - '{' => Token::LeftBrace, - '}' => Token::RightBrace, - _ => break, }); } @@ -86,96 +86,132 @@ impl<'s> Iterator for Tokens<'s> { '*' => Token::Star, '_' => Token::Underscore, '~' => Token::Tilde, - '#' => Token::Hashtag, + '#' => self.hash(start), '`' => self.raw(), - - // Escape sequences. - '\\' => self.escaped(), + '$' => self.math(), + '\\' => self.backslash(), // Plain text. _ => self.text(start), }, Header => match c { - // Syntactic elements in headers. + // Parens. '(' => Token::LeftParen, ')' => Token::RightParen, - ':' => Token::Colon, + + // Length two. + '=' if self.s.eat_if('=') => Token::EqEq, + '!' if self.s.eat_if('=') => Token::BangEq, + '<' if self.s.eat_if('=') => Token::LtEq, + '>' if self.s.eat_if('=') => Token::GtEq, + '+' if self.s.eat_if('=') => Token::PlusEq, + '-' if self.s.eat_if('=') => Token::HyphEq, + '*' if self.s.eat_if('=') => Token::StarEq, + '/' if self.s.eat_if('=') => Token::SlashEq, + '.' if self.s.eat_if('.') => Token::Dots, + '=' if self.s.eat_if('>') => Token::Arrow, + + // Length one. ',' => Token::Comma, + ':' => Token::Colon, '|' => Token::Pipe, '+' => Token::Plus, - '-' => Token::Hyphen, + '-' => Token::Hyph, '*' => Token::Star, '/' => Token::Slash, + '=' => Token::Eq, + '<' => Token::Lt, + '>' => Token::Gt, + '?' => Token::Question, + + // Identifiers. + c if is_id_start(c) => self.ident(start), + + // Numbers. + c if c.is_ascii_digit() + || (c == '.' && self.s.check(|n| n.is_ascii_digit())) => + { + self.number(start, c) + } - // Expressions in headers. + // Hex values and strings. '#' => self.hex(), '"' => self.string(), - // Expressions. - c => self.expr(c, start), + _ => Token::Invalid(self.s.eaten_from(start)), }, }) } } impl<'s> Tokens<'s> { - fn whitespace(&mut self, first: char, start: usize) -> Token<'s> { + fn whitespace(&mut self, first: char) -> Token<'s> { // Fast path for just a single space if first == ' ' && !self.s.check(|c| c.is_whitespace()) { - return Token::Space(0); - } - - self.s.jump(start); + Token::Space(0) + } else { + self.s.uneat(); + + // Count the number of newlines. + let mut newlines = 0; + while let Some(c) = self.s.eat_merging_crlf() { + if !c.is_whitespace() { + self.s.uneat(); + break; + } - // Count the number of newlines. - let mut newlines = 0; - while let Some(c) = self.s.eat_merging_crlf() { - if !c.is_whitespace() { - self.s.uneat(); - break; + if is_newline(c) { + newlines += 1; + } } - if is_newline(c) { - newlines += 1; - } + Token::Space(newlines) } - - Token::Space(newlines) - } - - fn line_comment(&mut self) -> Token<'s> { - Token::LineComment(self.s.eat_until(is_newline)) } - fn block_comment(&mut self) -> Token<'s> { - let start = self.s.index(); - - let mut state = '_'; - let mut depth = 1; - - // Find the first `*/` that does not correspond to a nested `/*`. + fn text(&mut self, start: usize) -> Token<'s> { while let Some(c) = self.s.eat() { - state = match (state, c) { - ('*', '/') => { - depth -= 1; - if depth == 0 { - break; - } - '_' - } - ('/', '*') => { - depth += 1; - '_' - } - _ => c, + if match c { + // Whitespace. + c if c.is_whitespace() => true, + // Comments. + '/' if self.s.check(|c| c == '/' || c == '*') => true, + // Parenthesis. + '[' | ']' | '{' | '}' => true, + // Markup. + '*' | '_' | '#' | '~' | '`' => true, + // Escaping. + '\\' => true, + _ => false, + } { + self.s.uneat(); + break; } } - let terminated = depth == 0; - let end = self.s.index() - if terminated { 2 } else { 0 }; + Token::Text(self.s.eaten_from(start)) + } - Token::BlockComment(self.s.get(start .. end)) + fn hash(&mut self, start: usize) -> Token<'s> { + if self.s.check(is_id_start) { + self.s.eat(); + self.s.eat_while(is_id_continue); + match self.s.eaten_from(start) { + "#let" => Token::Let, + "#if" => Token::If, + "#else" => Token::Else, + "#for" => Token::For, + "#in" => Token::In, + "#while" => Token::While, + "#break" => Token::Break, + "#continue" => Token::Continue, + "#return" => Token::Return, + s => Token::Invalid(s), + } + } else { + Token::Hash + } } fn raw(&mut self) -> Token<'s> { @@ -205,7 +241,38 @@ impl<'s> Tokens<'s> { }) } - fn escaped(&mut self) -> Token<'s> { + fn math(&mut self) -> Token<'s> { + let mut dollars = 1; + if self.s.eat_if('$') { + dollars = 2; + } + + let start = self.s.index(); + + let mut found = 0; + let mut escaped = false; + while found < dollars { + match self.s.eat() { + Some('$') if !escaped => found += 1, + Some(c) => { + found = 0; + escaped = c == '\\' && !escaped; + } + None => break, + } + } + + let terminated = found == dollars; + let end = self.s.index() - if terminated { found } else { 0 }; + + Token::Math(TokenMath { + formula: self.s.get(start .. end), + inline: dollars == 1, + terminated, + }) + } + + fn backslash(&mut self) -> Token<'s> { if let Some(c) = self.s.peek() { match c { // Backslash and comments. @@ -235,27 +302,79 @@ impl<'s> Tokens<'s> { } } - fn text(&mut self, start: usize) -> Token<'s> { - while let Some(c) = self.s.eat() { - if match c { - // Whitespace. - c if c.is_whitespace() => true, - // Comments. - '/' if self.s.check(|c| c == '/' || c == '*') => true, - // Parenthesis. - '[' | ']' | '{' | '}' => true, - // Markup. - '*' | '_' | '#' | '~' | '`' => true, - // Escaping. - '\\' => true, - _ => false, - } { - self.s.uneat(); - break; + fn ident(&mut self, start: usize) -> Token<'s> { + self.s.eat_while(is_id_continue); + match self.s.eaten_from(start) { + "not" => Token::Not, + "and" => Token::And, + "or" => Token::Or, + "let" => Token::Let, + "if" => Token::If, + "else" => Token::Else, + "for" => Token::For, + "in" => Token::In, + "while" => Token::While, + "break" => Token::Break, + "continue" => Token::Continue, + "return" => Token::Return, + "none" => Token::None, + "true" => Token::Bool(true), + "false" => Token::Bool(false), + id => Token::Ident(id), + } + } + + fn number(&mut self, start: usize, first: char) -> Token<'s> { + // Read the first part (integer or fractional depending on `first`). + self.s.eat_while(|c| c.is_ascii_digit()); + + // Read the fractional part if not already done and present. + if first != '.' && self.s.eat_if('.') { + self.s.eat_while(|c| c.is_ascii_digit()); + } + + // Read the exponent. + if self.s.eat_if('e') || self.s.eat_if('E') { + let _ = self.s.eat_if('+') || self.s.eat_if('-'); + self.s.eat_while(|c| c.is_ascii_digit()); + } + + // Read the suffix. + let suffix_start = self.s.index(); + if !self.s.eat_if('%') { + self.s.eat_while(|c| c.is_ascii_alphanumeric()); + } + + let number = self.s.get(start .. suffix_start); + let suffix = self.s.eaten_from(suffix_start); + let all = self.s.eaten_from(start); + + // Find out whether it is a simple number. + if suffix.is_empty() { + if let Ok(int) = number.parse::<i64>() { + return Token::Int(int); + } else if let Ok(float) = number.parse::<f64>() { + return Token::Float(float); } } - Token::Text(self.s.eaten_from(start)) + // Otherwise parse into the fitting numeric type. + let build = match suffix { + "%" => Token::Percent, + "pt" => |x| Token::Length(x, LengthUnit::Pt), + "mm" => |x| Token::Length(x, LengthUnit::Mm), + "cm" => |x| Token::Length(x, LengthUnit::Cm), + "in" => |x| Token::Length(x, LengthUnit::In), + "rad" => |x| Token::Angle(x, AngularUnit::Rad), + "deg" => |x| Token::Angle(x, AngularUnit::Deg), + _ => return Token::Invalid(all), + }; + + if let Ok(float) = number.parse::<f64>() { + build(float) + } else { + Token::Invalid(all) + } } fn hex(&mut self) -> Token<'s> { @@ -278,64 +397,38 @@ impl<'s> Tokens<'s> { }) } - fn expr(&mut self, first: char, start: usize) -> Token<'s> { - if is_id_start(first) { - self.ident(start) - } else if first.is_ascii_digit() - || (first == '.' && self.s.check(|c| c.is_ascii_digit())) - { - self.number(start) - } else { - Token::Invalid(self.s.eaten_from(start)) - } - } - - fn ident(&mut self, start: usize) -> Token<'s> { - self.s.eat_while(is_id_continue); - let string = self.s.eaten_from(start); - match string { - "none" => Token::None, - "true" => Token::Bool(true), - "false" => Token::Bool(false), - _ => Token::Ident(string), - } + fn line_comment(&mut self) -> Token<'s> { + Token::LineComment(self.s.eat_until(is_newline)) } - fn number(&mut self, start: usize) -> Token<'s> { - self.s.jump(start); - - // Read the integer part. - self.s.eat_while(|c| c.is_ascii_digit()); + fn block_comment(&mut self) -> Token<'s> { + let start = self.s.index(); - // Read the fractional part if present. - if self.s.eat_if('.') { - self.s.eat_while(|c| c.is_ascii_digit()); - } + let mut state = '_'; + let mut depth = 1; - // Read the exponent. - if self.s.eat_if('e') || self.s.eat_if('E') { - let _ = self.s.eat_if('+') || self.s.eat_if('-'); - self.s.eat_while(|c| c.is_ascii_digit()); + // Find the first `*/` that does not correspond to a nested `/*`. + while let Some(c) = self.s.eat() { + state = match (state, c) { + ('*', '/') => { + depth -= 1; + if depth == 0 { + break; + } + '_' + } + ('/', '*') => { + depth += 1; + '_' + } + _ => c, + } } - // Read the suffix. - if !self.s.eat_if('%') { - self.s.eat_while(|c| c.is_ascii_alphanumeric()); - } + let terminated = depth == 0; + let end = self.s.index() - if terminated { 2 } else { 0 }; - // Parse into one of the suitable types. - let string = self.s.eaten_from(start); - if let Some(percent) = parse_percent(string) { - Token::Percent(percent) - } else if let Some((val, unit)) = parse_length(string) { - Token::Length(val, unit) - } else if let Ok(int) = string.parse::<i64>() { - Token::Int(int) - } else if let Ok(float) = string.parse::<f64>() { - Token::Float(float) - } else { - Token::Invalid(string) - } + Token::BlockComment(self.s.get(start .. end)) } } @@ -345,40 +438,12 @@ impl Debug for Tokens<'_> { } } -fn parse_percent(string: &str) -> Option<f64> { - string.strip_suffix('%').and_then(|prefix| prefix.parse::<f64>().ok()) -} - -fn parse_length(string: &str) -> Option<(f64, LengthUnit)> { - let len = string.len(); - - // We need at least some number and the unit. - if len <= 2 { - return None; - } - - // We can view the string as bytes since a multibyte UTF-8 char cannot - // have valid ASCII chars as subbytes. - let split = len - 2; - let bytes = string.as_bytes(); - let unit = match &bytes[split ..] { - b"pt" => LengthUnit::Pt, - b"mm" => LengthUnit::Mm, - b"cm" => LengthUnit::Cm, - b"in" => LengthUnit::In, - _ => return None, - }; - - string[.. split].parse::<f64>().ok().map(|val| (val, unit)) -} - #[cfg(test)] #[allow(non_snake_case)] mod tests { use super::*; use crate::parse::tests::check; - use LengthUnit::*; use Option::None; use Token::{Ident, *}; @@ -386,6 +451,10 @@ mod tests { Token::Raw(TokenRaw { text, backticks, terminated }) } + fn Math(formula: &str, inline: bool, terminated: bool) -> Token { + Token::Math(TokenMath { formula, inline, terminated }) + } + fn UnicodeEscape(sequence: &str, terminated: bool) -> Token { Token::UnicodeEscape(TokenUnicodeEscape { sequence, terminated }) } @@ -424,6 +493,7 @@ mod tests { // Letter suffixes. ('a', Some(Body), "hello", Text("hello")), ('a', Some(Body), "π", Text("π")), + ('a', Some(Header), "if", If), ('a', Some(Header), "val", Ident("val")), ('a', Some(Header), "Ξ±", Ident("Ξ±")), ('a', Some(Header), "_", Ident("_")), @@ -437,9 +507,10 @@ mod tests { ('/', Some(Body), "*", Star), ('/', Some(Body), "_", Underscore), ('/', Some(Body), r"\\", Text(r"\")), + ('/', Some(Body), "#let", Let), ('/', Some(Header), "(", LeftParen), ('/', Some(Header), ":", Colon), - ('/', Some(Header), "+", Plus), + ('/', Some(Header), "+=", PlusEq), ('/', Some(Header), "#123", Hex("123")), ]; @@ -473,89 +544,129 @@ mod tests { } #[test] - fn test_length_from_str() { - assert_eq!(parse_length("2.5cm"), Some((2.5, Cm))); - assert_eq!(parse_length("1.e+2cm"), Some((100.0, Cm))); - assert_eq!(parse_length("123π"), None); + fn test_tokenize_brackets() { + // Test body. + t!(Body: "[" => LeftBracket); + t!(Body: "]" => RightBracket); + t!(Body: "{" => LeftBrace); + t!(Body: "}" => RightBrace); + t!(Body[" /"]: "(" => Text("(")); + t!(Body[" /"]: ")" => Text(")")); + + // Test header. + t!(Header: "[" => LeftBracket); + t!(Header: "]" => RightBracket); + t!(Header: "{" => LeftBrace); + t!(Header: "}" => RightBrace); + t!(Header: "(" => LeftParen); + t!(Header: ")" => RightParen); } #[test] - fn test_tokenize_whitespace() { - // Test basic whitespace. - t!(Both["a1/"]: "" => ); - t!(Both["a1/"]: " " => Space(0)); - t!(Both["a1/"]: " " => Space(0)); - t!(Both["a1/"]: "\t" => Space(0)); - t!(Both["a1/"]: " \t" => Space(0)); - t!(Both["a1/"]: "\u{202F}" => Space(0)); - - // Test newline counting. - t!(Both["a1/"]: "\n" => Space(1)); - t!(Both["a1/"]: "\n " => Space(1)); - t!(Both["a1/"]: " \n" => Space(1)); - t!(Both["a1/"]: " \n " => Space(1)); - t!(Both["a1/"]: "\r\n" => Space(1)); - t!(Both["a1/"]: " \n\t \n " => Space(2)); - t!(Both["a1/"]: "\n\r" => Space(2)); - t!(Both["a1/"]: " \r\r\n \x0D" => Space(3)); + fn test_tokenize_body_symbols() { + // Test markup tokens. + t!(Body[" a1"]: "*" => Star); + t!(Body: "_" => Underscore); + t!(Body["a1/"]: "# " => Hash, Space(0)); + t!(Body: "~" => Tilde); + t!(Body[" "]: r"\" => Backslash); } #[test] - fn test_tokenize_line_comments() { - // Test line comment with no trailing newline. - t!(Both[""]: "//" => LineComment("")); - - // Test line comment ends at newline. - t!(Both["a1/"]: "//bc\n" => LineComment("bc"), Space(1)); - t!(Both["a1/"]: "// bc \n" => LineComment(" bc "), Space(1)); - t!(Both["a1/"]: "//bc\r\n" => LineComment("bc"), Space(1)); - - // Test nested line comments. - t!(Both["a1/"]: "//a//b\n" => LineComment("a//b"), Space(1)); + fn test_tokenize_header_symbols() { + // Test all symbols. + t!(Header: "," => Comma); + t!(Header: ":" => Colon); + t!(Header: "|" => Pipe); + t!(Header: "+" => Plus); + t!(Header: "-" => Hyph); + t!(Header[" a1"]: "*" => Star); + t!(Header[" a1"]: "/" => Slash); + t!(Header: "=" => Eq); + t!(Header: "==" => EqEq); + t!(Header: "!=" => BangEq); + t!(Header: "<" => Lt); + t!(Header: "<=" => LtEq); + t!(Header: ">" => Gt); + t!(Header: ">=" => GtEq); + t!(Header: "+=" => PlusEq); + t!(Header: "-=" => HyphEq); + t!(Header: "*=" => StarEq); + t!(Header: "/=" => SlashEq); + t!(Header: "?" => Question); + t!(Header: ".." => Dots); + t!(Header: "=>" => Arrow); + + // Test combinations. + t!(Header: "|=>" => Pipe, Arrow); + t!(Header: "<=>" => LtEq, Gt); + t!(Header[" a/"]: "..." => Dots, Invalid(".")); + + // Test hyphen as symbol vs part of identifier. + t!(Header[" /"]: "-1" => Hyph, Int(1)); + t!(Header[" /"]: "-a" => Hyph, Ident("a")); + t!(Header[" /"]: "--1" => Hyph, Hyph, Int(1)); + t!(Header[" /"]: "--_a" => Hyph, Hyph, Ident("_a")); + t!(Header[" /"]: "a-b" => Ident("a-b")); } #[test] - fn test_tokenize_block_comments() { - // Test basic block comments. - t!(Both[""]: "/*" => BlockComment("")); - t!(Both: "/**/" => BlockComment("")); - t!(Both: "/*π*/" => BlockComment("π")); - t!(Both: "/*\n*/" => BlockComment("\n")); + fn test_tokenize_keywords() { + let both = [ + ("let", Let), + ("if", If), + ("else", Else), + ("for", For), + ("in", In), + ("while", While), + ("break", Break), + ("continue", Continue), + ("return", Return), + ]; - // Test depth 1 and 2 nested block comments. - t!(Both: "/* /* */ */" => BlockComment(" /* */ ")); - t!(Both: "/*/*/**/*/*/" => BlockComment("/*/**/*/")); + for &(s, t) in &both { + t!(Header[" "]: s => t); + t!(Body[" "]: format!("#{}", s) => t); + t!(Body[" "]: format!("#{0}#{0}", s) => t, t); + t!(Body[" /"]: format!("# {}", s) => Hash, Space(0), Text(s)); + } - // Test two nested, one unclosed block comments. - t!(Both[""]: "/*/*/**/*/" => BlockComment("/*/**/*/")); + let header = [ + ("not", Not), + ("and", And), + ("or", Or), + ("none", Token::None), + ("false", Bool(false)), + ("true", Bool(true)), + ]; - // Test all combinations of up to two following slashes and stars. - t!(Both[""]: "/*" => BlockComment("")); - t!(Both[""]: "/*/" => BlockComment("/")); - t!(Both[""]: "/**" => BlockComment("*")); - t!(Both[""]: "/*//" => BlockComment("//")); - t!(Both[""]: "/*/*" => BlockComment("/*")); - t!(Both[""]: "/**/" => BlockComment("")); - t!(Both[""]: "/***" => BlockComment("**")); + for &(s, t) in &header { + t!(Header[" "]: s => t); + t!(Body[" /"]: s => Text(s)); + } + + // Test invalid case. + t!(Header[" /"]: "None" => Ident("None")); + t!(Header[" /"]: "True" => Ident("True")); + + // Test word that contains keyword. + t!(Body[" "]: "#letter" => Invalid("#letter")); + t!(Header[" /"]: "falser" => Ident("falser")); } #[test] - fn test_tokenize_body_tokens() { - // Test parentheses. - t!(Body: "[" => LeftBracket); - t!(Body: "]" => RightBracket); - t!(Body: "{" => LeftBrace); - t!(Body: "}" => RightBrace); - - // Test markup tokens. - t!(Body[" a1"]: "*" => Star); - t!(Body: "_" => Underscore); - t!(Body: "~" => Tilde); - t!(Body: "#" => Hashtag); - t!(Body[" "]: r"\" => Backslash); + fn test_tokenize_text() { + // Test basic text. + t!(Body[" /"]: "hello" => Text("hello")); + t!(Body[" /"]: "hello-world" => Text("hello-world")); - // Test header symbols. + // Test header symbols in text. + t!(Body[" /"]: "a():\"b" => Text("a():\"b")); t!(Body[" /"]: ":,=|/+-" => Text(":,=|/+-")); + + // Test text ends. + t!(Body[""]: "hello " => Text("hello"), Space(0)); + t!(Body[""]: "hello~" => Text("hello"), Tilde); } #[test] @@ -578,6 +689,21 @@ mod tests { } #[test] + fn test_tokenize_math_formulas() { + // Test basic formula. + t!(Body: "$x$" => Math("x", true, true)); + t!(Body: "$$x + y$$" => Math("x + y", false, true)); + + // Test unterminated. + t!(Body[""]: "$$x" => Math("x", false, false)); + t!(Body[""]: "$$x$\n$" => Math("x$\n$", false, false)); + + // Test escape sequences. + t!(Body: r"$$\\\$$$" => Math(r"\\\$", false, true)); + t!(Body[""]: r"$$ $\\$" => Math(r" $\\$", false, false)); + } + + #[test] fn test_tokenize_escape_sequences() { // Test escapable symbols. t!(Body: r"\\" => Text(r"\")); @@ -614,49 +740,24 @@ mod tests { } #[test] - fn test_tokenize_text() { - // Test basic text. - t!(Body[" /"]: "hello" => Text("hello")); - t!(Body[" /"]: "hello-world" => Text("hello-world")); - - // Test header symbols in text. - t!(Body[" /"]: "a():\"b" => Text("a():\"b")); - - // Test text ends. - t!(Body[""]: "hello " => Text("hello"), Space(0)); - t!(Body[""]: "hello~" => Text("hello"), Tilde); - } - - #[test] - fn test_tokenize_header_tokens() { - // Test parentheses. - t!(Header: "[" => LeftBracket); - t!(Header: "]" => RightBracket); - t!(Header: "{" => LeftBrace); - t!(Header: "}" => RightBrace); - t!(Header: "(" => LeftParen); - t!(Header: ")" => RightParen); - - // Test structural tokens. - t!(Header: ":" => Colon); - t!(Header: "," => Comma); - t!(Header: "|" => Pipe); - t!(Header: "+" => Plus); - t!(Header: "-" => Hyphen); - t!(Header[" a1"]: "*" => Star); - t!(Header[" a1"]: "/" => Slash); - - // Test hyphen parsed as symbol. - t!(Header[" /"]: "-1" => Hyphen, Int(1)); - t!(Header[" /"]: "-a" => Hyphen, Ident("a")); - t!(Header[" /"]: "--1" => Hyphen, Hyphen, Int(1)); - t!(Header[" /"]: "--_a" => Hyphen, Hyphen, Ident("_a")); - t!(Header[" /"]: "a-b" => Ident("a-b")); + fn test_tokenize_whitespace() { + // Test basic whitespace. + t!(Both["a1/"]: "" => ); + t!(Both["a1/"]: " " => Space(0)); + t!(Both["a1/"]: " " => Space(0)); + t!(Both["a1/"]: "\t" => Space(0)); + t!(Both["a1/"]: " \t" => Space(0)); + t!(Both["a1/"]: "\u{202F}" => Space(0)); - // Test some operations. - t!(Header[" /"]: "1+3" => Int(1), Plus, Int(3)); - t!(Header[" /"]: "1*3" => Int(1), Star, Int(3)); - t!(Header[" /"]: "1/3" => Int(1), Slash, Int(3)); + // Test newline counting. + t!(Both["a1/"]: "\n" => Space(1)); + t!(Both["a1/"]: "\n " => Space(1)); + t!(Both["a1/"]: " \n" => Space(1)); + t!(Both["a1/"]: " \n " => Space(1)); + t!(Both["a1/"]: "\r\n" => Space(1)); + t!(Both["a1/"]: " \n\t \n " => Space(2)); + t!(Both["a1/"]: "\n\r" => Space(2)); + t!(Both["a1/"]: " \r\r\n \x0D" => Space(3)); } #[test] @@ -677,22 +778,7 @@ mod tests { } #[test] - fn test_tokenize_keywords() { - // Test none. - t!(Header[" /"]: "none" => Token::None); - t!(Header[" /"]: "None" => Ident("None")); - - // Test valid bools. - t!(Header[" /"]: "false" => Bool(false)); - t!(Header[" /"]: "true" => Bool(true)); - - // Test invalid bools. - t!(Header[" /"]: "True" => Ident("True")); - t!(Header[" /"]: "falser" => Ident("falser")); - } - - #[test] - fn test_tokenize_numeric_values() { + fn test_tokenize_numeric() { let ints = [("7", 7), ("012", 12)]; let floats = [ (".3", 0.3), @@ -721,6 +807,7 @@ mod tests { } // Test attached numbers. + t!(Header[" /"]: ".2.3" => Float(0.2), Float(0.3)); t!(Header[" /"]: "1.2.3" => Float(1.2), Float(0.3)); t!(Header[" /"]: "1e-2+3" => Float(0.01), Plus, Int(3)); @@ -731,20 +818,20 @@ mod tests { // Combined integers and floats. let nums = ints.iter().map(|&(k, v)| (k, v as f64)).chain(floats.iter().copied()); - // Test percentages. - for (s, v) in nums.clone() { - t!(Header[" /"]: format!("{}%", s) => Percent(v)); - } + let suffixes = [ + ("%", Percent as fn(f64) -> Token<'static>), + ("mm", |x| Length(x, LengthUnit::Mm)), + ("pt", |x| Length(x, LengthUnit::Pt)), + ("cm", |x| Length(x, LengthUnit::Cm)), + ("in", |x| Length(x, LengthUnit::In)), + ("rad", |x| Angle(x, AngularUnit::Rad)), + ("deg", |x| Angle(x, AngularUnit::Deg)), + ]; - // Test lengths. - for &unit in &[ - LengthUnit::Mm, - LengthUnit::Pt, - LengthUnit::Cm, - LengthUnit::In, - ] { + // Numeric types. + for &(suffix, build) in &suffixes { for (s, v) in nums.clone() { - t!(Header[" /"]: format!("{}{}", s, unit) => Length(v, unit)); + t!(Header[" /"]: format!("{}{}", s, suffix) => build(v)); } } } @@ -765,6 +852,8 @@ mod tests { t!(Header: "\"hi\"" => Str("hi", true)); t!(Header: "\"hi\nthere\"" => Str("hi\nthere", true)); t!(Header: "\"π\"" => Str("π", true)); + + // Test unterminated. t!(Header[""]: "\"hi" => Str("hi", false)); // Test escaped quote. @@ -773,6 +862,45 @@ mod tests { } #[test] + fn test_tokenize_line_comments() { + // Test line comment with no trailing newline. + t!(Both[""]: "//" => LineComment("")); + + // Test line comment ends at newline. + t!(Both["a1/"]: "//bc\n" => LineComment("bc"), Space(1)); + t!(Both["a1/"]: "// bc \n" => LineComment(" bc "), Space(1)); + t!(Both["a1/"]: "//bc\r\n" => LineComment("bc"), Space(1)); + + // Test nested line comments. + t!(Both["a1/"]: "//a//b\n" => LineComment("a//b"), Space(1)); + } + + #[test] + fn test_tokenize_block_comments() { + // Test basic block comments. + t!(Both[""]: "/*" => BlockComment("")); + t!(Both: "/**/" => BlockComment("")); + t!(Both: "/*π*/" => BlockComment("π")); + t!(Both: "/*\n*/" => BlockComment("\n")); + + // Test depth 1 and 2 nested block comments. + t!(Both: "/* /* */ */" => BlockComment(" /* */ ")); + t!(Both: "/*/*/**/*/*/" => BlockComment("/*/**/*/")); + + // Test two nested, one unclosed block comments. + t!(Both[""]: "/*/*/**/*/" => BlockComment("/*/**/*/")); + + // Test all combinations of up to two following slashes and stars. + t!(Both[""]: "/*" => BlockComment("")); + t!(Both[""]: "/*/" => BlockComment("/")); + t!(Both[""]: "/**" => BlockComment("*")); + t!(Both[""]: "/*//" => BlockComment("//")); + t!(Both[""]: "/*/*" => BlockComment("/*")); + t!(Both[""]: "/**/" => BlockComment("")); + t!(Both[""]: "/***" => BlockComment("**")); + } + + #[test] fn test_tokenize_invalid() { // Test invalidly closed block comments. t!(Both: "*/" => Token::Invalid("*/")); @@ -784,11 +912,14 @@ mod tests { t!(Header: r"\:" => Invalid(r"\"), Colon); t!(Header: "mealβ" => Ident("meal"), Invalid("β")); t!(Header[" /"]: r"\a" => Invalid(r"\"), Ident("a")); - t!(Header[" /"]: ">main" => Invalid(">"), Ident("main")); // Test invalid number suffixes. t!(Header[" /"]: "1foo" => Invalid("1foo")); t!(Header: "1p%" => Invalid("1p"), Invalid("%")); t!(Header: "1%%" => Percent(1.0), Invalid("%")); + + // Test invalid keyword. + t!(Body[" /"]: "#-" => Hash, Text("-")); + t!(Body[" "]: "#do" => Invalid("#do")) } } |
