diff options
| author | Laurenz <laurmaedje@gmail.com> | 2022-12-06 12:37:08 +0100 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2022-12-06 12:37:08 +0100 |
| commit | 3ecb0c754bc1777e002a43e4c34b27e676f9a95c (patch) | |
| tree | 49dd299b6671058dd47b7dae84b748f117a962d3 /src/syntax/tokens.rs | |
| parent | c2e458a133772a94009733040b39d58e781af977 (diff) | |
More math syntax
Diffstat (limited to 'src/syntax/tokens.rs')
| -rw-r--r-- | src/syntax/tokens.rs | 581 |
1 files changed, 68 insertions, 513 deletions
diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index 130ad668..57188096 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -35,14 +35,12 @@ pub enum TokenMode { impl<'s> Tokens<'s> { /// Create a new token iterator with the given mode. - #[inline] pub fn new(text: &'s str, mode: TokenMode) -> Self { Self::with_prefix("", text, mode) } /// Create a new token iterator with the given mode and a prefix to offset /// column calculations. - #[inline] pub fn with_prefix(prefix: &str, text: &'s str, mode: TokenMode) -> Self { Self { s: Scanner::new(text), @@ -53,54 +51,46 @@ impl<'s> Tokens<'s> { } /// Get the current token mode. - #[inline] pub fn mode(&self) -> TokenMode { self.mode } /// Change the token mode. - #[inline] pub fn set_mode(&mut self, mode: TokenMode) { self.mode = mode; } /// The index in the string at which the last token ends and next token /// will start. - #[inline] pub fn cursor(&self) -> usize { self.s.cursor() } /// Jump to the given index in the string. - #[inline] pub fn jump(&mut self, index: usize) { self.s.jump(index); } /// The underlying scanner. - #[inline] pub fn scanner(&self) -> Scanner<'s> { self.s } /// Whether the last token was terminated. - #[inline] pub fn terminated(&self) -> bool { self.terminated } /// The column index of a given index in the source string. - #[inline] pub fn column(&self, index: usize) -> usize { column(self.s.string(), index, self.column_offset) } } -impl<'s> Iterator for Tokens<'s> { +impl Iterator for Tokens<'_> { type Item = SyntaxKind; /// Parse the next token in the source code. - #[inline] fn next(&mut self) -> Option<Self::Item> { let start = self.s.cursor(); let c = self.s.eat()?; @@ -124,7 +114,8 @@ impl<'s> Iterator for Tokens<'s> { } } -impl<'s> Tokens<'s> { +/// Shared. +impl Tokens<'_> { fn line_comment(&mut self) -> SyntaxKind { self.s.eat_until(is_newline); if self.s.peek().is_none() { @@ -189,8 +180,9 @@ impl<'s> Tokens<'s> { SyntaxKind::Space { newlines } } +} - #[inline] +impl Tokens<'_> { fn markup(&mut self, start: usize, c: char) -> SyntaxKind { match c { // Blocks. @@ -231,7 +223,6 @@ impl<'s> Tokens<'s> { } } - #[inline] fn text(&mut self, start: usize) -> SyntaxKind { macro_rules! table { ($(|$c:literal)*) => {{ @@ -303,7 +294,11 @@ impl<'s> Tokens<'s> { } fn hash(&mut self, start: usize) -> SyntaxKind { - if self.s.at(is_id_start) { + if self.s.eat_if('{') { + SyntaxKind::LeftBrace + } else if self.s.eat_if('[') { + SyntaxKind::LeftBracket + } else if self.s.at(is_id_start) { let read = self.s.eat_while(is_id_continue); match keyword(read) { Some(keyword) => keyword, @@ -342,8 +337,10 @@ impl<'s> Tokens<'s> { if start < end { self.s.expect(':'); SyntaxKind::Symbol(self.s.get(start..end).into()) - } else { + } else if self.mode == TokenMode::Markup { SyntaxKind::Colon + } else { + SyntaxKind::Atom(":".into()) } } @@ -426,26 +423,25 @@ impl<'s> Tokens<'s> { self.text(start) } - fn label(&mut self) -> SyntaxKind { - let label = self.s.eat_while(is_id_continue); - if self.s.eat_if('>') { - if !label.is_empty() { - SyntaxKind::Label(label.into()) - } else { - SyntaxKind::Error(ErrorPos::Full, "label cannot be empty".into()) - } - } else { - self.terminated = false; - SyntaxKind::Error(ErrorPos::End, "expected closing angle bracket".into()) - } - } - fn reference(&mut self) -> SyntaxKind { SyntaxKind::Ref(self.s.eat_while(is_id_continue).into()) } + fn in_word(&self) -> bool { + let alphanumeric = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric()); + let prev = self.s.scout(-2); + let next = self.s.peek(); + alphanumeric(prev) && alphanumeric(next) + } +} + +/// Math. +impl Tokens<'_> { fn math(&mut self, start: usize, c: char) -> SyntaxKind { match c { + // Multi-char things. + '#' => self.hash(start), + // Escape sequences. '\\' => self.backslash(), @@ -456,18 +452,32 @@ impl<'s> Tokens<'s> { '&' => SyntaxKind::Amp, '$' => SyntaxKind::Dollar, - // Brackets. - '{' => SyntaxKind::LeftBrace, - '}' => SyntaxKind::RightBrace, - '[' => SyntaxKind::LeftBracket, - ']' => SyntaxKind::RightBracket, - '(' => SyntaxKind::LeftParen, - ')' => SyntaxKind::RightParen, + // Symbol notation. + ':' => self.colon(), - // Identifiers. + // Strings. + '"' => self.string(), + + // Identifiers and symbol notation. c if is_math_id_start(c) && self.s.at(is_math_id_continue) => { self.s.eat_while(is_math_id_continue); - SyntaxKind::Ident(self.s.from(start).into()) + + let mut symbol = false; + while self.s.eat_if(':') + && !self.s.eat_while(char::is_alphanumeric).is_empty() + { + symbol = true; + } + + if symbol { + SyntaxKind::Symbol(self.s.from(start).into()) + } else { + if self.s.scout(-1) == Some(':') { + self.s.uneat(); + } + + SyntaxKind::Ident(self.s.from(start).into()) + } } // Numbers. @@ -480,7 +490,10 @@ impl<'s> Tokens<'s> { c => SyntaxKind::Atom(c.into()), } } +} +/// Code. +impl Tokens<'_> { fn code(&mut self, start: usize, c: char) -> SyntaxKind { match c { // Blocks. @@ -493,6 +506,9 @@ impl<'s> Tokens<'s> { '(' => SyntaxKind::LeftParen, ')' => SyntaxKind::RightParen, + // Math. + '$' => SyntaxKind::Dollar, + // Labels. '<' if self.s.at(is_id_continue) => self.label(), @@ -619,14 +635,22 @@ impl<'s> Tokens<'s> { } } - fn in_word(&self) -> bool { - let alphanumeric = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric()); - let prev = self.s.scout(-2); - let next = self.s.peek(); - alphanumeric(prev) && alphanumeric(next) + fn label(&mut self) -> SyntaxKind { + let label = self.s.eat_while(is_id_continue); + if self.s.eat_if('>') { + if !label.is_empty() { + SyntaxKind::Label(label.into()) + } else { + SyntaxKind::Error(ErrorPos::Full, "label cannot be empty".into()) + } + } else { + self.terminated = false; + SyntaxKind::Error(ErrorPos::End, "expected closing angle bracket".into()) + } } } +/// Try to parse an identifier into a keyword. fn keyword(ident: &str) -> Option<SyntaxKind> { Some(match ident { "not" => SyntaxKind::Not, @@ -652,7 +676,6 @@ fn keyword(ident: &str) -> Option<SyntaxKind> { /// The column index of a given index in the source string, given a column /// offset for the first line. -#[inline] fn column(string: &str, index: usize, offset: usize) -> usize { let mut apply_offset = false; let res = string[..index] @@ -729,471 +752,3 @@ fn is_math_id_start(c: char) -> bool { fn is_math_id_continue(c: char) -> bool { c.is_xid_continue() && c != '_' } - -#[cfg(test)] -#[allow(non_snake_case)] -mod tests { - use super::super::tests::check; - use super::*; - - use ErrorPos::*; - use Option::None; - use SyntaxKind::*; - use TokenMode::{Code, Markup}; - - fn Space(newlines: usize) -> SyntaxKind { - SyntaxKind::Space { newlines } - } - - fn Raw(text: &str, lang: Option<&str>, block: bool) -> SyntaxKind { - SyntaxKind::Raw(Arc::new(RawFields { - text: text.into(), - lang: lang.map(Into::into), - block, - })) - } - - fn Str(string: &str) -> SyntaxKind { - SyntaxKind::Str(string.into()) - } - - fn Text(string: &str) -> SyntaxKind { - SyntaxKind::Text(string.into()) - } - - fn Ident(ident: &str) -> SyntaxKind { - SyntaxKind::Ident(ident.into()) - } - - fn Error(pos: ErrorPos, message: &str) -> SyntaxKind { - SyntaxKind::Error(pos, message.into()) - } - - /// Building blocks for suffix testing. - /// - /// We extend each test case with a collection of different suffixes to make - /// sure tokens end at the correct position. These suffixes are split into - /// blocks, which can be disabled/enabled per test case. For example, when - /// testing identifiers we disable letter suffixes because these would - /// mingle with the identifiers. - /// - /// Suffix blocks: - /// - ' ': spacing - /// - 'a': letters - /// - '1': numbers - /// - '/': symbols - const BLOCKS: &str = " a1/"; - - // Suffixes described by four-tuples of: - // - // - block the suffix is part of - // - mode in which the suffix is applicable - // - the suffix string - // - the resulting suffix NodeKind - fn suffixes( - ) -> impl Iterator<Item = (char, Option<TokenMode>, &'static str, SyntaxKind)> { - [ - // Whitespace suffixes. - (' ', None, " ", Space(0)), - (' ', None, "\n", Space(1)), - (' ', None, "\r", Space(1)), - (' ', None, "\r\n", Space(1)), - // Letter suffixes. - ('a', Some(Markup), "hello", Text("hello")), - ('a', Some(Markup), "💚", Text("💚")), - ('a', Some(Code), "val", Ident("val")), - ('a', Some(Code), "α", Ident("α")), - ('a', Some(Code), "_", Ident("_")), - // Number suffixes. - ('1', Some(Code), "2", Int(2)), - ('1', Some(Code), ".2", Float(0.2)), - // Symbol suffixes. - ('/', None, "[", LeftBracket), - ('/', None, "//", LineComment), - ('/', None, "/**/", BlockComment), - ('/', Some(Markup), "*", Star), - ('/', Some(Markup), r"\\", Escape('\\')), - ('/', Some(Markup), "#let", Let), - ('/', Some(Code), "(", LeftParen), - ('/', Some(Code), ":", Colon), - ('/', Some(Code), "+=", PlusEq), - ] - .into_iter() - } - - macro_rules! t { - (Both $($tts:tt)*) => { - t!(Markup $($tts)*); - t!(Code $($tts)*); - }; - ($mode:ident $([$blocks:literal])?: $text:expr => $($token:expr),*) => {{ - // Test without suffix. - t!(@$mode: $text => $($token),*); - - // Test with each applicable suffix. - for (block, mode, suffix, ref token) in suffixes() { - let text = $text; - #[allow(unused_variables)] - let blocks = BLOCKS; - $(let blocks = $blocks;)? - assert!(!blocks.contains(|c| !BLOCKS.contains(c))); - if (mode.is_none() || mode == Some($mode)) && blocks.contains(block) { - t!(@$mode: format!("{}{}", text, suffix) => $($token,)* token); - } - } - }}; - (@$mode:ident: $text:expr => $($token:expr),*) => {{ - let text = $text; - let found = Tokens::new(&text, $mode).collect::<Vec<_>>(); - let expected = vec![$($token.clone()),*]; - check(&text, found, expected); - }}; - } - - #[test] - fn test_tokenize_brackets() { - // Test in markup. - t!(Markup: "{" => LeftBrace); - t!(Markup: "}" => RightBrace); - t!(Markup: "[" => LeftBracket); - t!(Markup: "]" => RightBracket); - t!(Markup[" /"]: "(" => Text("(")); - t!(Markup[" /"]: ")" => Text(")")); - - // Test in code. - t!(Code: "{" => LeftBrace); - t!(Code: "}" => RightBrace); - t!(Code: "[" => LeftBracket); - t!(Code: "]" => RightBracket); - t!(Code: "(" => LeftParen); - t!(Code: ")" => RightParen); - } - - #[test] - fn test_tokenize_whitespace() { - // Test basic whitespace. - t!(Both["a1/"]: "" => ); - t!(Both["a1/"]: " " => Space(0)); - t!(Both["a1/"]: " " => Space(0)); - t!(Both["a1/"]: "\t" => Space(0)); - t!(Both["a1/"]: " \t" => Space(0)); - t!(Both["a1/"]: "\u{202F}" => Space(0)); - - // Test newline counting. - t!(Both["a1/"]: "\n" => Space(1)); - t!(Both["a1/"]: "\n " => Space(1)); - t!(Both["a1/"]: " \n" => Space(1)); - t!(Both["a1/"]: " \n " => Space(1)); - t!(Both["a1/"]: "\r\n" => Space(1)); - t!(Both["a1/"]: "\r\n\r" => Space(2)); - t!(Both["a1/"]: " \n\t \n " => Space(2)); - t!(Both["a1/"]: "\n\r" => Space(2)); - t!(Both["a1/"]: " \r\r\n \x0D" => Space(3)); - } - - #[test] - fn test_tokenize_text() { - // Test basic text. - t!(Markup[" /"]: "hello" => Text("hello")); - t!(Markup[" /"]: "reha-world" => Text("reha-world")); - - // Test code symbols in text. - t!(Markup[" /"]: "a():\"b" => Text("a()"), Colon, SmartQuote { double: true }, Text("b")); - t!(Markup[" /"]: ";,|/+" => Text(";,|/+")); - t!(Markup[" /"]: "=-a" => Eq, Minus, Text("a")); - t!(Markup[" "]: "#123" => Text("#123")); - - // Test text ends. - t!(Markup[""]: "hello " => Text("hello"), Space(0)); - t!(Markup[""]: "hello~" => Text("hello"), Shorthand('\u{00A0}')); - } - - #[test] - fn test_tokenize_escape_sequences() { - // Test escapable symbols. - t!(Markup: r"\\" => Escape('\\')); - t!(Markup: r"\/" => Escape('/')); - t!(Markup: r"\[" => Escape('[')); - t!(Markup: r"\]" => Escape(']')); - t!(Markup: r"\{" => Escape('{')); - t!(Markup: r"\}" => Escape('}')); - t!(Markup: r"\*" => Escape('*')); - t!(Markup: r"\_" => Escape('_')); - t!(Markup: r"\=" => Escape('=')); - t!(Markup: r"\~" => Escape('~')); - t!(Markup: r"\'" => Escape('\'')); - t!(Markup: r#"\""# => Escape('"')); - t!(Markup: r"\`" => Escape('`')); - t!(Markup: r"\$" => Escape('$')); - t!(Markup: r"\#" => Escape('#')); - t!(Markup: r"\a" => Escape('a')); - t!(Markup: r"\u" => Escape('u')); - t!(Markup: r"\1" => Escape('1')); - - // Test basic unicode escapes. - t!(Markup: r"\u{}" => Error(Full, "invalid unicode escape sequence")); - t!(Markup: r"\u{2603}" => Escape('☃')); - t!(Markup: r"\u{P}" => Error(Full, "invalid unicode escape sequence")); - - // Test unclosed unicode escapes. - t!(Markup[" /"]: r"\u{" => Error(End, "expected closing brace")); - t!(Markup[" /"]: r"\u{1" => Error(End, "expected closing brace")); - t!(Markup[" /"]: r"\u{26A4" => Error(End, "expected closing brace")); - t!(Markup[" /"]: r"\u{1Q3P" => Error(End, "expected closing brace")); - t!(Markup: r"\u{1🏕}" => Error(End, "expected closing brace"), Text("🏕"), RightBrace); - } - - #[test] - fn test_tokenize_markup_symbols() { - // Test markup tokens. - t!(Markup[" a1"]: "*" => Star); - t!(Markup: "_" => Underscore); - t!(Markup[""]: "===" => Eq, Eq, Eq); - t!(Markup["a1/"]: "= " => Eq, Space(0)); - t!(Markup[" "]: r"\" => Linebreak); - t!(Markup: "~" => Shorthand('\u{00A0}')); - t!(Markup["a1/"]: "-?" => Shorthand('\u{00AD}')); - t!(Markup["a "]: r"a--" => Text("a"), Shorthand('\u{2013}')); - t!(Markup["a1/"]: "- " => Minus, Space(0)); - t!(Markup[" "]: "+" => Plus); - t!(Markup[" "]: "1." => EnumNumbering(NonZeroUsize::new(1).unwrap())); - t!(Markup[" "]: "1.a" => EnumNumbering(NonZeroUsize::new(1).unwrap()), Text("a")); - t!(Markup[" /"]: "a1." => Text("a1.")); - } - - #[test] - fn test_tokenize_code_symbols() { - // Test all symbols. - t!(Code: "," => Comma); - t!(Code: ";" => Semicolon); - t!(Code: ":" => Colon); - t!(Code: "+" => Plus); - t!(Code: "-" => Minus); - t!(Code[" a1"]: "*" => Star); - t!(Code[" a1"]: "/" => Slash); - t!(Code[" a/"]: "." => Dot); - t!(Code: "=" => Eq); - t!(Code: "==" => EqEq); - t!(Code: "!=" => ExclEq); - t!(Code[" /"]: "<" => Lt); - t!(Code: "<=" => LtEq); - t!(Code: ">" => Gt); - t!(Code: ">=" => GtEq); - t!(Code: "+=" => PlusEq); - t!(Code: "-=" => HyphEq); - t!(Code: "*=" => StarEq); - t!(Code: "/=" => SlashEq); - t!(Code: ".." => Dots); - t!(Code: "=>" => Arrow); - - // Test combinations. - t!(Code: "<=>" => LtEq, Gt); - t!(Code[" a/"]: "..." => Dots, Dot); - - // Test hyphen as symbol vs part of identifier. - t!(Code[" /"]: "-1" => Minus, Int(1)); - t!(Code[" /"]: "-a" => Minus, Ident("a")); - t!(Code[" /"]: "--1" => Minus, Minus, Int(1)); - t!(Code[" /"]: "--_a" => Minus, Minus, Ident("_a")); - t!(Code[" /"]: "a-b" => Ident("a-b")); - - // Test invalid. - t!(Code: r"\" => Error(Full, "not valid here")); - } - - #[test] - fn test_tokenize_keywords() { - // A list of a few (not all) keywords. - let list = [ - ("not", Not), - ("let", Let), - ("if", If), - ("else", Else), - ("for", For), - ("in", In), - ("import", Import), - ]; - - for (s, t) in list.clone() { - t!(Markup[" "]: format!("#{}", s) => t); - t!(Markup[" "]: format!("#{0}#{0}", s) => t, t); - t!(Markup[" /"]: format!("# {}", s) => Text(&format!("# {s}"))); - } - - for (s, t) in list { - t!(Code[" "]: s => t); - t!(Markup[" /"]: s => Text(s)); - } - - // Test simple identifier. - t!(Markup[" "]: "#letter" => Ident("letter")); - t!(Code[" /"]: "falser" => Ident("falser")); - t!(Code[" /"]: "None" => Ident("None")); - t!(Code[" /"]: "True" => Ident("True")); - } - - #[test] - fn test_tokenize_raw_blocks() { - // Test basic raw block. - t!(Markup: "``" => Raw("", None, false)); - t!(Markup: "`raw`" => Raw("raw", None, false)); - t!(Markup[""]: "`]" => Error(End, "expected 1 backtick")); - - // Test special symbols in raw block. - t!(Markup: "`[brackets]`" => Raw("[brackets]", None, false)); - t!(Markup[""]: r"`\`` " => Raw(r"\", None, false), Error(End, "expected 1 backtick")); - - // Test separated closing backticks. - t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), false)); - - // Test more backticks. - t!(Markup: "``nope``" => Raw("", None, false), Text("nope"), Raw("", None, false)); - t!(Markup: "````🚀````" => Raw("", None, false)); - t!(Markup[""]: "`````👩🚀````noend" => Error(End, "expected 5 backticks")); - t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), false), Raw("", None, false)); - } - - #[test] - fn test_tokenize_idents() { - // Test valid identifiers. - t!(Code[" /"]: "x" => Ident("x")); - t!(Code[" /"]: "value" => Ident("value")); - t!(Code[" /"]: "__main__" => Ident("__main__")); - t!(Code[" /"]: "_snake_case" => Ident("_snake_case")); - - // Test non-ascii. - t!(Code[" /"]: "α" => Ident("α")); - t!(Code[" /"]: "ម្តាយ" => Ident("ម្តាយ")); - - // Test hyphen parsed as identifier. - t!(Code[" /"]: "kebab-case" => Ident("kebab-case")); - t!(Code[" /"]: "one-10" => Ident("one-10")); - } - - #[test] - fn test_tokenize_numeric() { - let ints = [("7", 7), ("012", 12)]; - let floats = [ - (".3", 0.3), - ("0.3", 0.3), - ("3.", 3.0), - ("3.0", 3.0), - ("14.3", 14.3), - ("10e2", 1000.0), - ("10e+0", 10.0), - ("10e+1", 100.0), - ("10e-2", 0.1), - ("10.e1", 100.0), - ("10.e-1", 1.0), - (".1e1", 1.0), - ("10E2", 1000.0), - ]; - - // Test integers. - for &(s, v) in &ints { - t!(Code[" /"]: s => Int(v)); - } - - // Test floats. - for &(s, v) in &floats { - t!(Code[" /"]: s => Float(v)); - } - - // Test attached numbers. - t!(Code[" /"]: ".2.3" => Float(0.2), Float(0.3)); - t!(Code[" /"]: "1.2.3" => Float(1.2), Float(0.3)); - t!(Code[" /"]: "1e-2+3" => Float(0.01), Plus, Int(3)); - - // Test float from too large integer. - let large = i64::MAX as f64 + 1.0; - t!(Code[" /"]: large.to_string() => Float(large)); - - // Combined integers and floats. - let nums = ints.iter().map(|&(k, v)| (k, v as f64)).chain(floats); - - let suffixes: &[(&str, fn(f64) -> SyntaxKind)] = &[ - ("mm", |x| Numeric(x, Unit::Length(AbsUnit::Mm))), - ("pt", |x| Numeric(x, Unit::Length(AbsUnit::Pt))), - ("cm", |x| Numeric(x, Unit::Length(AbsUnit::Cm))), - ("in", |x| Numeric(x, Unit::Length(AbsUnit::In))), - ("rad", |x| Numeric(x, Unit::Angle(AngleUnit::Rad))), - ("deg", |x| Numeric(x, Unit::Angle(AngleUnit::Deg))), - ("em", |x| Numeric(x, Unit::Em)), - ("fr", |x| Numeric(x, Unit::Fr)), - ("%", |x| Numeric(x, Unit::Percent)), - ]; - - // Numeric types. - for &(suffix, build) in suffixes { - for (s, v) in nums.clone() { - t!(Code[" /"]: format!("{}{}", s, suffix) => build(v)); - } - } - - // Multiple dots close the number. - t!(Code[" /"]: "1..2" => Int(1), Dots, Int(2)); - t!(Code[" /"]: "1..2.3" => Int(1), Dots, Float(2.3)); - t!(Code[" /"]: "1.2..3" => Float(1.2), Dots, Int(3)); - - // Test invalid. - t!(Code[" /"]: "1foo" => Error(Full, "invalid number suffix")); - } - - #[test] - fn test_tokenize_strings() { - // Test basic strings. - t!(Code: "\"hi\"" => Str("hi")); - t!(Code: "\"hi\nthere\"" => Str("hi\nthere")); - t!(Code: "\"🌎\"" => Str("🌎")); - - // Test unterminated. - t!(Code[""]: "\"hi" => Error(End, "expected quote")); - - // Test escaped quote. - t!(Code: r#""a\"bc""# => Str("a\"bc")); - t!(Code[""]: r#""\""# => Error(End, "expected quote")); - } - - #[test] - fn test_tokenize_line_comments() { - // Test line comment with no trailing newline. - t!(Both[""]: "//" => LineComment); - - // Test line comment ends at newline. - t!(Both["a1/"]: "//bc\n" => LineComment, Space(1)); - t!(Both["a1/"]: "// bc \n" => LineComment, Space(1)); - t!(Both["a1/"]: "//bc\r\n" => LineComment, Space(1)); - - // Test nested line comments. - t!(Both["a1/"]: "//a//b\n" => LineComment, Space(1)); - } - - #[test] - fn test_tokenize_block_comments() { - // Test basic block comments. - t!(Both[""]: "/*" => BlockComment); - t!(Both: "/**/" => BlockComment); - t!(Both: "/*🏞*/" => BlockComment); - t!(Both: "/*\n*/" => BlockComment); - - // Test depth 1 and 2 nested block comments. - t!(Both: "/* /* */ */" => BlockComment); - t!(Both: "/*/*/**/*/*/" => BlockComment); - - // Test two nested, one unclosed block comments. - t!(Both[""]: "/*/*/**/*/" => BlockComment); - - // Test all combinations of up to two following slashes and stars. - t!(Both[""]: "/*" => BlockComment); - t!(Both[""]: "/*/" => BlockComment); - t!(Both[""]: "/**" => BlockComment); - t!(Both[""]: "/*//" => BlockComment); - t!(Both[""]: "/*/*" => BlockComment); - t!(Both[""]: "/**/" => BlockComment); - t!(Both[""]: "/***" => BlockComment); - - // Test unexpected terminator. - t!(Both: "/*Hi*/*/" => BlockComment, - Error(Full, "unexpected end of block comment")); - } -} |
