summaryrefslogtreecommitdiff
path: root/src/syntax/tokens.rs
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2022-12-06 12:37:08 +0100
committerLaurenz <laurmaedje@gmail.com>2022-12-06 12:37:08 +0100
commit3ecb0c754bc1777e002a43e4c34b27e676f9a95c (patch)
tree49dd299b6671058dd47b7dae84b748f117a962d3 /src/syntax/tokens.rs
parentc2e458a133772a94009733040b39d58e781af977 (diff)
More math syntax
Diffstat (limited to 'src/syntax/tokens.rs')
-rw-r--r--src/syntax/tokens.rs581
1 files changed, 68 insertions, 513 deletions
diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs
index 130ad668..57188096 100644
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@@ -35,14 +35,12 @@ pub enum TokenMode {
impl<'s> Tokens<'s> {
/// Create a new token iterator with the given mode.
- #[inline]
pub fn new(text: &'s str, mode: TokenMode) -> Self {
Self::with_prefix("", text, mode)
}
/// Create a new token iterator with the given mode and a prefix to offset
/// column calculations.
- #[inline]
pub fn with_prefix(prefix: &str, text: &'s str, mode: TokenMode) -> Self {
Self {
s: Scanner::new(text),
@@ -53,54 +51,46 @@ impl<'s> Tokens<'s> {
}
/// Get the current token mode.
- #[inline]
pub fn mode(&self) -> TokenMode {
self.mode
}
/// Change the token mode.
- #[inline]
pub fn set_mode(&mut self, mode: TokenMode) {
self.mode = mode;
}
/// The index in the string at which the last token ends and next token
/// will start.
- #[inline]
pub fn cursor(&self) -> usize {
self.s.cursor()
}
/// Jump to the given index in the string.
- #[inline]
pub fn jump(&mut self, index: usize) {
self.s.jump(index);
}
/// The underlying scanner.
- #[inline]
pub fn scanner(&self) -> Scanner<'s> {
self.s
}
/// Whether the last token was terminated.
- #[inline]
pub fn terminated(&self) -> bool {
self.terminated
}
/// The column index of a given index in the source string.
- #[inline]
pub fn column(&self, index: usize) -> usize {
column(self.s.string(), index, self.column_offset)
}
}
-impl<'s> Iterator for Tokens<'s> {
+impl Iterator for Tokens<'_> {
type Item = SyntaxKind;
/// Parse the next token in the source code.
- #[inline]
fn next(&mut self) -> Option<Self::Item> {
let start = self.s.cursor();
let c = self.s.eat()?;
@@ -124,7 +114,8 @@ impl<'s> Iterator for Tokens<'s> {
}
}
-impl<'s> Tokens<'s> {
+/// Shared.
+impl Tokens<'_> {
fn line_comment(&mut self) -> SyntaxKind {
self.s.eat_until(is_newline);
if self.s.peek().is_none() {
@@ -189,8 +180,9 @@ impl<'s> Tokens<'s> {
SyntaxKind::Space { newlines }
}
+}
- #[inline]
+impl Tokens<'_> {
fn markup(&mut self, start: usize, c: char) -> SyntaxKind {
match c {
// Blocks.
@@ -231,7 +223,6 @@ impl<'s> Tokens<'s> {
}
}
- #[inline]
fn text(&mut self, start: usize) -> SyntaxKind {
macro_rules! table {
($(|$c:literal)*) => {{
@@ -303,7 +294,11 @@ impl<'s> Tokens<'s> {
}
fn hash(&mut self, start: usize) -> SyntaxKind {
- if self.s.at(is_id_start) {
+ if self.s.eat_if('{') {
+ SyntaxKind::LeftBrace
+ } else if self.s.eat_if('[') {
+ SyntaxKind::LeftBracket
+ } else if self.s.at(is_id_start) {
let read = self.s.eat_while(is_id_continue);
match keyword(read) {
Some(keyword) => keyword,
@@ -342,8 +337,10 @@ impl<'s> Tokens<'s> {
if start < end {
self.s.expect(':');
SyntaxKind::Symbol(self.s.get(start..end).into())
- } else {
+ } else if self.mode == TokenMode::Markup {
SyntaxKind::Colon
+ } else {
+ SyntaxKind::Atom(":".into())
}
}
@@ -426,26 +423,25 @@ impl<'s> Tokens<'s> {
self.text(start)
}
- fn label(&mut self) -> SyntaxKind {
- let label = self.s.eat_while(is_id_continue);
- if self.s.eat_if('>') {
- if !label.is_empty() {
- SyntaxKind::Label(label.into())
- } else {
- SyntaxKind::Error(ErrorPos::Full, "label cannot be empty".into())
- }
- } else {
- self.terminated = false;
- SyntaxKind::Error(ErrorPos::End, "expected closing angle bracket".into())
- }
- }
-
fn reference(&mut self) -> SyntaxKind {
SyntaxKind::Ref(self.s.eat_while(is_id_continue).into())
}
+ fn in_word(&self) -> bool {
+ let alphanumeric = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
+ let prev = self.s.scout(-2);
+ let next = self.s.peek();
+ alphanumeric(prev) && alphanumeric(next)
+ }
+}
+
+/// Math.
+impl Tokens<'_> {
fn math(&mut self, start: usize, c: char) -> SyntaxKind {
match c {
+ // Multi-char things.
+ '#' => self.hash(start),
+
// Escape sequences.
'\\' => self.backslash(),
@@ -456,18 +452,32 @@ impl<'s> Tokens<'s> {
'&' => SyntaxKind::Amp,
'$' => SyntaxKind::Dollar,
- // Brackets.
- '{' => SyntaxKind::LeftBrace,
- '}' => SyntaxKind::RightBrace,
- '[' => SyntaxKind::LeftBracket,
- ']' => SyntaxKind::RightBracket,
- '(' => SyntaxKind::LeftParen,
- ')' => SyntaxKind::RightParen,
+ // Symbol notation.
+ ':' => self.colon(),
- // Identifiers.
+ // Strings.
+ '"' => self.string(),
+
+ // Identifiers and symbol notation.
c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
self.s.eat_while(is_math_id_continue);
- SyntaxKind::Ident(self.s.from(start).into())
+
+ let mut symbol = false;
+ while self.s.eat_if(':')
+ && !self.s.eat_while(char::is_alphanumeric).is_empty()
+ {
+ symbol = true;
+ }
+
+ if symbol {
+ SyntaxKind::Symbol(self.s.from(start).into())
+ } else {
+ if self.s.scout(-1) == Some(':') {
+ self.s.uneat();
+ }
+
+ SyntaxKind::Ident(self.s.from(start).into())
+ }
}
// Numbers.
@@ -480,7 +490,10 @@ impl<'s> Tokens<'s> {
c => SyntaxKind::Atom(c.into()),
}
}
+}
+/// Code.
+impl Tokens<'_> {
fn code(&mut self, start: usize, c: char) -> SyntaxKind {
match c {
// Blocks.
@@ -493,6 +506,9 @@ impl<'s> Tokens<'s> {
'(' => SyntaxKind::LeftParen,
')' => SyntaxKind::RightParen,
+ // Math.
+ '$' => SyntaxKind::Dollar,
+
// Labels.
'<' if self.s.at(is_id_continue) => self.label(),
@@ -619,14 +635,22 @@ impl<'s> Tokens<'s> {
}
}
- fn in_word(&self) -> bool {
- let alphanumeric = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
- let prev = self.s.scout(-2);
- let next = self.s.peek();
- alphanumeric(prev) && alphanumeric(next)
+ fn label(&mut self) -> SyntaxKind {
+ let label = self.s.eat_while(is_id_continue);
+ if self.s.eat_if('>') {
+ if !label.is_empty() {
+ SyntaxKind::Label(label.into())
+ } else {
+ SyntaxKind::Error(ErrorPos::Full, "label cannot be empty".into())
+ }
+ } else {
+ self.terminated = false;
+ SyntaxKind::Error(ErrorPos::End, "expected closing angle bracket".into())
+ }
}
}
+/// Try to parse an identifier into a keyword.
fn keyword(ident: &str) -> Option<SyntaxKind> {
Some(match ident {
"not" => SyntaxKind::Not,
@@ -652,7 +676,6 @@ fn keyword(ident: &str) -> Option<SyntaxKind> {
/// The column index of a given index in the source string, given a column
/// offset for the first line.
-#[inline]
fn column(string: &str, index: usize, offset: usize) -> usize {
let mut apply_offset = false;
let res = string[..index]
@@ -729,471 +752,3 @@ fn is_math_id_start(c: char) -> bool {
fn is_math_id_continue(c: char) -> bool {
c.is_xid_continue() && c != '_'
}
-
-#[cfg(test)]
-#[allow(non_snake_case)]
-mod tests {
- use super::super::tests::check;
- use super::*;
-
- use ErrorPos::*;
- use Option::None;
- use SyntaxKind::*;
- use TokenMode::{Code, Markup};
-
- fn Space(newlines: usize) -> SyntaxKind {
- SyntaxKind::Space { newlines }
- }
-
- fn Raw(text: &str, lang: Option<&str>, block: bool) -> SyntaxKind {
- SyntaxKind::Raw(Arc::new(RawFields {
- text: text.into(),
- lang: lang.map(Into::into),
- block,
- }))
- }
-
- fn Str(string: &str) -> SyntaxKind {
- SyntaxKind::Str(string.into())
- }
-
- fn Text(string: &str) -> SyntaxKind {
- SyntaxKind::Text(string.into())
- }
-
- fn Ident(ident: &str) -> SyntaxKind {
- SyntaxKind::Ident(ident.into())
- }
-
- fn Error(pos: ErrorPos, message: &str) -> SyntaxKind {
- SyntaxKind::Error(pos, message.into())
- }
-
- /// Building blocks for suffix testing.
- ///
- /// We extend each test case with a collection of different suffixes to make
- /// sure tokens end at the correct position. These suffixes are split into
- /// blocks, which can be disabled/enabled per test case. For example, when
- /// testing identifiers we disable letter suffixes because these would
- /// mingle with the identifiers.
- ///
- /// Suffix blocks:
- /// - ' ': spacing
- /// - 'a': letters
- /// - '1': numbers
- /// - '/': symbols
- const BLOCKS: &str = " a1/";
-
- // Suffixes described by four-tuples of:
- //
- // - block the suffix is part of
- // - mode in which the suffix is applicable
- // - the suffix string
- // - the resulting suffix NodeKind
- fn suffixes(
- ) -> impl Iterator<Item = (char, Option<TokenMode>, &'static str, SyntaxKind)> {
- [
- // Whitespace suffixes.
- (' ', None, " ", Space(0)),
- (' ', None, "\n", Space(1)),
- (' ', None, "\r", Space(1)),
- (' ', None, "\r\n", Space(1)),
- // Letter suffixes.
- ('a', Some(Markup), "hello", Text("hello")),
- ('a', Some(Markup), "💚", Text("💚")),
- ('a', Some(Code), "val", Ident("val")),
- ('a', Some(Code), "α", Ident("α")),
- ('a', Some(Code), "_", Ident("_")),
- // Number suffixes.
- ('1', Some(Code), "2", Int(2)),
- ('1', Some(Code), ".2", Float(0.2)),
- // Symbol suffixes.
- ('/', None, "[", LeftBracket),
- ('/', None, "//", LineComment),
- ('/', None, "/**/", BlockComment),
- ('/', Some(Markup), "*", Star),
- ('/', Some(Markup), r"\\", Escape('\\')),
- ('/', Some(Markup), "#let", Let),
- ('/', Some(Code), "(", LeftParen),
- ('/', Some(Code), ":", Colon),
- ('/', Some(Code), "+=", PlusEq),
- ]
- .into_iter()
- }
-
- macro_rules! t {
- (Both $($tts:tt)*) => {
- t!(Markup $($tts)*);
- t!(Code $($tts)*);
- };
- ($mode:ident $([$blocks:literal])?: $text:expr => $($token:expr),*) => {{
- // Test without suffix.
- t!(@$mode: $text => $($token),*);
-
- // Test with each applicable suffix.
- for (block, mode, suffix, ref token) in suffixes() {
- let text = $text;
- #[allow(unused_variables)]
- let blocks = BLOCKS;
- $(let blocks = $blocks;)?
- assert!(!blocks.contains(|c| !BLOCKS.contains(c)));
- if (mode.is_none() || mode == Some($mode)) && blocks.contains(block) {
- t!(@$mode: format!("{}{}", text, suffix) => $($token,)* token);
- }
- }
- }};
- (@$mode:ident: $text:expr => $($token:expr),*) => {{
- let text = $text;
- let found = Tokens::new(&text, $mode).collect::<Vec<_>>();
- let expected = vec![$($token.clone()),*];
- check(&text, found, expected);
- }};
- }
-
- #[test]
- fn test_tokenize_brackets() {
- // Test in markup.
- t!(Markup: "{" => LeftBrace);
- t!(Markup: "}" => RightBrace);
- t!(Markup: "[" => LeftBracket);
- t!(Markup: "]" => RightBracket);
- t!(Markup[" /"]: "(" => Text("("));
- t!(Markup[" /"]: ")" => Text(")"));
-
- // Test in code.
- t!(Code: "{" => LeftBrace);
- t!(Code: "}" => RightBrace);
- t!(Code: "[" => LeftBracket);
- t!(Code: "]" => RightBracket);
- t!(Code: "(" => LeftParen);
- t!(Code: ")" => RightParen);
- }
-
- #[test]
- fn test_tokenize_whitespace() {
- // Test basic whitespace.
- t!(Both["a1/"]: "" => );
- t!(Both["a1/"]: " " => Space(0));
- t!(Both["a1/"]: " " => Space(0));
- t!(Both["a1/"]: "\t" => Space(0));
- t!(Both["a1/"]: " \t" => Space(0));
- t!(Both["a1/"]: "\u{202F}" => Space(0));
-
- // Test newline counting.
- t!(Both["a1/"]: "\n" => Space(1));
- t!(Both["a1/"]: "\n " => Space(1));
- t!(Both["a1/"]: " \n" => Space(1));
- t!(Both["a1/"]: " \n " => Space(1));
- t!(Both["a1/"]: "\r\n" => Space(1));
- t!(Both["a1/"]: "\r\n\r" => Space(2));
- t!(Both["a1/"]: " \n\t \n " => Space(2));
- t!(Both["a1/"]: "\n\r" => Space(2));
- t!(Both["a1/"]: " \r\r\n \x0D" => Space(3));
- }
-
- #[test]
- fn test_tokenize_text() {
- // Test basic text.
- t!(Markup[" /"]: "hello" => Text("hello"));
- t!(Markup[" /"]: "reha-world" => Text("reha-world"));
-
- // Test code symbols in text.
- t!(Markup[" /"]: "a():\"b" => Text("a()"), Colon, SmartQuote { double: true }, Text("b"));
- t!(Markup[" /"]: ";,|/+" => Text(";,|/+"));
- t!(Markup[" /"]: "=-a" => Eq, Minus, Text("a"));
- t!(Markup[" "]: "#123" => Text("#123"));
-
- // Test text ends.
- t!(Markup[""]: "hello " => Text("hello"), Space(0));
- t!(Markup[""]: "hello~" => Text("hello"), Shorthand('\u{00A0}'));
- }
-
- #[test]
- fn test_tokenize_escape_sequences() {
- // Test escapable symbols.
- t!(Markup: r"\\" => Escape('\\'));
- t!(Markup: r"\/" => Escape('/'));
- t!(Markup: r"\[" => Escape('['));
- t!(Markup: r"\]" => Escape(']'));
- t!(Markup: r"\{" => Escape('{'));
- t!(Markup: r"\}" => Escape('}'));
- t!(Markup: r"\*" => Escape('*'));
- t!(Markup: r"\_" => Escape('_'));
- t!(Markup: r"\=" => Escape('='));
- t!(Markup: r"\~" => Escape('~'));
- t!(Markup: r"\'" => Escape('\''));
- t!(Markup: r#"\""# => Escape('"'));
- t!(Markup: r"\`" => Escape('`'));
- t!(Markup: r"\$" => Escape('$'));
- t!(Markup: r"\#" => Escape('#'));
- t!(Markup: r"\a" => Escape('a'));
- t!(Markup: r"\u" => Escape('u'));
- t!(Markup: r"\1" => Escape('1'));
-
- // Test basic unicode escapes.
- t!(Markup: r"\u{}" => Error(Full, "invalid unicode escape sequence"));
- t!(Markup: r"\u{2603}" => Escape('☃'));
- t!(Markup: r"\u{P}" => Error(Full, "invalid unicode escape sequence"));
-
- // Test unclosed unicode escapes.
- t!(Markup[" /"]: r"\u{" => Error(End, "expected closing brace"));
- t!(Markup[" /"]: r"\u{1" => Error(End, "expected closing brace"));
- t!(Markup[" /"]: r"\u{26A4" => Error(End, "expected closing brace"));
- t!(Markup[" /"]: r"\u{1Q3P" => Error(End, "expected closing brace"));
- t!(Markup: r"\u{1🏕}" => Error(End, "expected closing brace"), Text("🏕"), RightBrace);
- }
-
- #[test]
- fn test_tokenize_markup_symbols() {
- // Test markup tokens.
- t!(Markup[" a1"]: "*" => Star);
- t!(Markup: "_" => Underscore);
- t!(Markup[""]: "===" => Eq, Eq, Eq);
- t!(Markup["a1/"]: "= " => Eq, Space(0));
- t!(Markup[" "]: r"\" => Linebreak);
- t!(Markup: "~" => Shorthand('\u{00A0}'));
- t!(Markup["a1/"]: "-?" => Shorthand('\u{00AD}'));
- t!(Markup["a "]: r"a--" => Text("a"), Shorthand('\u{2013}'));
- t!(Markup["a1/"]: "- " => Minus, Space(0));
- t!(Markup[" "]: "+" => Plus);
- t!(Markup[" "]: "1." => EnumNumbering(NonZeroUsize::new(1).unwrap()));
- t!(Markup[" "]: "1.a" => EnumNumbering(NonZeroUsize::new(1).unwrap()), Text("a"));
- t!(Markup[" /"]: "a1." => Text("a1."));
- }
-
- #[test]
- fn test_tokenize_code_symbols() {
- // Test all symbols.
- t!(Code: "," => Comma);
- t!(Code: ";" => Semicolon);
- t!(Code: ":" => Colon);
- t!(Code: "+" => Plus);
- t!(Code: "-" => Minus);
- t!(Code[" a1"]: "*" => Star);
- t!(Code[" a1"]: "/" => Slash);
- t!(Code[" a/"]: "." => Dot);
- t!(Code: "=" => Eq);
- t!(Code: "==" => EqEq);
- t!(Code: "!=" => ExclEq);
- t!(Code[" /"]: "<" => Lt);
- t!(Code: "<=" => LtEq);
- t!(Code: ">" => Gt);
- t!(Code: ">=" => GtEq);
- t!(Code: "+=" => PlusEq);
- t!(Code: "-=" => HyphEq);
- t!(Code: "*=" => StarEq);
- t!(Code: "/=" => SlashEq);
- t!(Code: ".." => Dots);
- t!(Code: "=>" => Arrow);
-
- // Test combinations.
- t!(Code: "<=>" => LtEq, Gt);
- t!(Code[" a/"]: "..." => Dots, Dot);
-
- // Test hyphen as symbol vs part of identifier.
- t!(Code[" /"]: "-1" => Minus, Int(1));
- t!(Code[" /"]: "-a" => Minus, Ident("a"));
- t!(Code[" /"]: "--1" => Minus, Minus, Int(1));
- t!(Code[" /"]: "--_a" => Minus, Minus, Ident("_a"));
- t!(Code[" /"]: "a-b" => Ident("a-b"));
-
- // Test invalid.
- t!(Code: r"\" => Error(Full, "not valid here"));
- }
-
- #[test]
- fn test_tokenize_keywords() {
- // A list of a few (not all) keywords.
- let list = [
- ("not", Not),
- ("let", Let),
- ("if", If),
- ("else", Else),
- ("for", For),
- ("in", In),
- ("import", Import),
- ];
-
- for (s, t) in list.clone() {
- t!(Markup[" "]: format!("#{}", s) => t);
- t!(Markup[" "]: format!("#{0}#{0}", s) => t, t);
- t!(Markup[" /"]: format!("# {}", s) => Text(&format!("# {s}")));
- }
-
- for (s, t) in list {
- t!(Code[" "]: s => t);
- t!(Markup[" /"]: s => Text(s));
- }
-
- // Test simple identifier.
- t!(Markup[" "]: "#letter" => Ident("letter"));
- t!(Code[" /"]: "falser" => Ident("falser"));
- t!(Code[" /"]: "None" => Ident("None"));
- t!(Code[" /"]: "True" => Ident("True"));
- }
-
- #[test]
- fn test_tokenize_raw_blocks() {
- // Test basic raw block.
- t!(Markup: "``" => Raw("", None, false));
- t!(Markup: "`raw`" => Raw("raw", None, false));
- t!(Markup[""]: "`]" => Error(End, "expected 1 backtick"));
-
- // Test special symbols in raw block.
- t!(Markup: "`[brackets]`" => Raw("[brackets]", None, false));
- t!(Markup[""]: r"`\`` " => Raw(r"\", None, false), Error(End, "expected 1 backtick"));
-
- // Test separated closing backticks.
- t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), false));
-
- // Test more backticks.
- t!(Markup: "``nope``" => Raw("", None, false), Text("nope"), Raw("", None, false));
- t!(Markup: "````🚀````" => Raw("", None, false));
- t!(Markup[""]: "`````👩‍🚀````noend" => Error(End, "expected 5 backticks"));
- t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), false), Raw("", None, false));
- }
-
- #[test]
- fn test_tokenize_idents() {
- // Test valid identifiers.
- t!(Code[" /"]: "x" => Ident("x"));
- t!(Code[" /"]: "value" => Ident("value"));
- t!(Code[" /"]: "__main__" => Ident("__main__"));
- t!(Code[" /"]: "_snake_case" => Ident("_snake_case"));
-
- // Test non-ascii.
- t!(Code[" /"]: "α" => Ident("α"));
- t!(Code[" /"]: "ម្តាយ" => Ident("ម្តាយ"));
-
- // Test hyphen parsed as identifier.
- t!(Code[" /"]: "kebab-case" => Ident("kebab-case"));
- t!(Code[" /"]: "one-10" => Ident("one-10"));
- }
-
- #[test]
- fn test_tokenize_numeric() {
- let ints = [("7", 7), ("012", 12)];
- let floats = [
- (".3", 0.3),
- ("0.3", 0.3),
- ("3.", 3.0),
- ("3.0", 3.0),
- ("14.3", 14.3),
- ("10e2", 1000.0),
- ("10e+0", 10.0),
- ("10e+1", 100.0),
- ("10e-2", 0.1),
- ("10.e1", 100.0),
- ("10.e-1", 1.0),
- (".1e1", 1.0),
- ("10E2", 1000.0),
- ];
-
- // Test integers.
- for &(s, v) in &ints {
- t!(Code[" /"]: s => Int(v));
- }
-
- // Test floats.
- for &(s, v) in &floats {
- t!(Code[" /"]: s => Float(v));
- }
-
- // Test attached numbers.
- t!(Code[" /"]: ".2.3" => Float(0.2), Float(0.3));
- t!(Code[" /"]: "1.2.3" => Float(1.2), Float(0.3));
- t!(Code[" /"]: "1e-2+3" => Float(0.01), Plus, Int(3));
-
- // Test float from too large integer.
- let large = i64::MAX as f64 + 1.0;
- t!(Code[" /"]: large.to_string() => Float(large));
-
- // Combined integers and floats.
- let nums = ints.iter().map(|&(k, v)| (k, v as f64)).chain(floats);
-
- let suffixes: &[(&str, fn(f64) -> SyntaxKind)] = &[
- ("mm", |x| Numeric(x, Unit::Length(AbsUnit::Mm))),
- ("pt", |x| Numeric(x, Unit::Length(AbsUnit::Pt))),
- ("cm", |x| Numeric(x, Unit::Length(AbsUnit::Cm))),
- ("in", |x| Numeric(x, Unit::Length(AbsUnit::In))),
- ("rad", |x| Numeric(x, Unit::Angle(AngleUnit::Rad))),
- ("deg", |x| Numeric(x, Unit::Angle(AngleUnit::Deg))),
- ("em", |x| Numeric(x, Unit::Em)),
- ("fr", |x| Numeric(x, Unit::Fr)),
- ("%", |x| Numeric(x, Unit::Percent)),
- ];
-
- // Numeric types.
- for &(suffix, build) in suffixes {
- for (s, v) in nums.clone() {
- t!(Code[" /"]: format!("{}{}", s, suffix) => build(v));
- }
- }
-
- // Multiple dots close the number.
- t!(Code[" /"]: "1..2" => Int(1), Dots, Int(2));
- t!(Code[" /"]: "1..2.3" => Int(1), Dots, Float(2.3));
- t!(Code[" /"]: "1.2..3" => Float(1.2), Dots, Int(3));
-
- // Test invalid.
- t!(Code[" /"]: "1foo" => Error(Full, "invalid number suffix"));
- }
-
- #[test]
- fn test_tokenize_strings() {
- // Test basic strings.
- t!(Code: "\"hi\"" => Str("hi"));
- t!(Code: "\"hi\nthere\"" => Str("hi\nthere"));
- t!(Code: "\"🌎\"" => Str("🌎"));
-
- // Test unterminated.
- t!(Code[""]: "\"hi" => Error(End, "expected quote"));
-
- // Test escaped quote.
- t!(Code: r#""a\"bc""# => Str("a\"bc"));
- t!(Code[""]: r#""\""# => Error(End, "expected quote"));
- }
-
- #[test]
- fn test_tokenize_line_comments() {
- // Test line comment with no trailing newline.
- t!(Both[""]: "//" => LineComment);
-
- // Test line comment ends at newline.
- t!(Both["a1/"]: "//bc\n" => LineComment, Space(1));
- t!(Both["a1/"]: "// bc \n" => LineComment, Space(1));
- t!(Both["a1/"]: "//bc\r\n" => LineComment, Space(1));
-
- // Test nested line comments.
- t!(Both["a1/"]: "//a//b\n" => LineComment, Space(1));
- }
-
- #[test]
- fn test_tokenize_block_comments() {
- // Test basic block comments.
- t!(Both[""]: "/*" => BlockComment);
- t!(Both: "/**/" => BlockComment);
- t!(Both: "/*🏞*/" => BlockComment);
- t!(Both: "/*\n*/" => BlockComment);
-
- // Test depth 1 and 2 nested block comments.
- t!(Both: "/* /* */ */" => BlockComment);
- t!(Both: "/*/*/**/*/*/" => BlockComment);
-
- // Test two nested, one unclosed block comments.
- t!(Both[""]: "/*/*/**/*/" => BlockComment);
-
- // Test all combinations of up to two following slashes and stars.
- t!(Both[""]: "/*" => BlockComment);
- t!(Both[""]: "/*/" => BlockComment);
- t!(Both[""]: "/**" => BlockComment);
- t!(Both[""]: "/*//" => BlockComment);
- t!(Both[""]: "/*/*" => BlockComment);
- t!(Both[""]: "/**/" => BlockComment);
- t!(Both[""]: "/***" => BlockComment);
-
- // Test unexpected terminator.
- t!(Both: "/*Hi*/*/" => BlockComment,
- Error(Full, "unexpected end of block comment"));
- }
-}