summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2021-01-10 17:52:37 +0100
committerLaurenz <laurmaedje@gmail.com>2021-01-10 17:52:37 +0100
commit9eac62c31a0f75c224cf4d6926e505cf02eafcde (patch)
treef1630af3373ce30425ddbc590c06f9419aaa8113
parent3b2a28ca8edf61cb1376a095be36c7d006c92d76 (diff)
Add lots of new tokens πŸͺ
-rw-r--r--src/parse/mod.rs10
-rw-r--r--src/parse/resolve.rs3
-rw-r--r--src/parse/tests.rs22
-rw-r--r--src/parse/tokens.rs725
-rw-r--r--src/syntax/token.rs225
5 files changed, 606 insertions, 379 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 150b5ed1..c03cb63d 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -75,7 +75,7 @@ fn node(p: &mut Parser, at_start: bool) -> Option<Node> {
Token::Underscore => Node::Emph,
Token::Tilde => Node::Text("\u{00A0}".into()),
Token::Backslash => Node::Linebreak,
- Token::Hashtag => {
+ Token::Hash => {
if at_start {
return Some(Node::Heading(heading(p)));
} else {
@@ -98,10 +98,10 @@ fn node(p: &mut Parser, at_start: bool) -> Option<Node> {
fn heading(p: &mut Parser) -> NodeHeading {
// Count hashtags.
let mut level = p.span(|p| {
- p.eat_assert(Token::Hashtag);
+ p.eat_assert(Token::Hash);
let mut level = 0u8;
- while p.eat_if(Token::Hashtag) {
+ while p.eat_if(Token::Hash) {
level = level.saturating_add(1);
}
level
@@ -240,7 +240,7 @@ fn bracket_body(p: &mut Parser) -> Tree {
fn expr(p: &mut Parser) -> Option<Expr> {
binops(p, term, |token| match token {
Token::Plus => Some(BinOp::Add),
- Token::Hyphen => Some(BinOp::Sub),
+ Token::Hyph => Some(BinOp::Sub),
_ => None,
})
}
@@ -282,7 +282,7 @@ fn binops(
/// Parse a factor of the form `-?value`.
fn factor(p: &mut Parser) -> Option<Expr> {
let op = |token| match token {
- Token::Hyphen => Some(UnOp::Neg),
+ Token::Hyph => Some(UnOp::Neg),
_ => None,
};
diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs
index c4afc430..3adbf11f 100644
--- a/src/parse/resolve.rs
+++ b/src/parse/resolve.rs
@@ -17,6 +17,7 @@ pub fn resolve_string(string: &str) -> String {
Some('\\') => out.push('\\'),
Some('"') => out.push('"'),
Some('n') => out.push('\n'),
+ Some('r') => out.push('\r'),
Some('t') => out.push('\t'),
Some('u') if s.eat_if('{') => {
// TODO: Feedback if closing brace is missing.
@@ -137,7 +138,7 @@ mod tests {
test(r#"av\u{6797"#, "avζž—");
test(r#"a\\"#, "a\\");
test(r#"a\\\nbc"#, "a\\\nbc");
- test(r#"a\tbc"#, "a\tbc");
+ test(r#"a\t\r\nbc"#, "a\t\r\nbc");
test(r"🌎", "🌎");
test(r"🌎\", r"🌎\");
test(r"\🌎", r"\🌎");
diff --git a/src/parse/tests.rs b/src/parse/tests.rs
index 833d6661..fd8c63ca 100644
--- a/src/parse/tests.rs
+++ b/src/parse/tests.rs
@@ -226,25 +226,31 @@ fn test_parse_simple_nodes() {
#[test]
fn test_parse_headings() {
// Basics with spans.
- t!("#a"
- nodes: [S(0..2, Heading(S(0..1, 0), Content![@S(1..2, Text("a"))]))],
+ t!("# a"
+ nodes: [S(0..3, Heading(S(0..1, 0), Content![
+ @S(1..2, Space), S(2..3, Text("a"))
+ ]))],
spans: true);
// Multiple hashtags.
- t!("###three" Heading(2, Content![@Text("three")]));
+ t!("### three" Heading(2, Content![@Space, Text("three")]));
t!("###### six" Heading(5, Content![@Space, Text("six")]));
// Start of heading.
t!("/**/#" Heading(0, Content![@]));
- t!("[f][#ok]" Call!("f", Args![Content![Heading(0, Content![@Text("ok")])]]));
+ t!("[f][# ok]" Call!("f", Args![Content![Heading(0, Content![
+ @Space, Text("ok")
+ ])]]));
// End of heading.
- t!("#a\nb" Heading(0, Content![@Text("a")]), Space, Text("b"));
+ t!("# a\nb" Heading(0, Content![@Space, Text("a")]), Space, Text("b"));
// Continued heading.
- t!("#a{\n1\n}b" Heading(0, Content![@Text("a"), Block(Int(1)), Text("b")]));
- t!("#a[f][\n\n]d" Heading(0, Content![@
- Text("a"), Call!("f", Args![Content![Parbreak]]), Text("d"),
+ t!("# a{\n1\n}b" Heading(0, Content![
+ @Space, Text("a"), Block(Int(1)), Text("b")
+ ]));
+ t!("# a[f][\n\n]d" Heading(0, Content![@
+ Space, Text("a"), Call!("f", Args![Content![Parbreak]]), Text("d"),
]));
// No heading.
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 1e49d1c6..7f162b4c 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -1,7 +1,7 @@
use std::fmt::{self, Debug, Formatter};
use super::{is_newline, Scanner};
-use crate::geom::LengthUnit;
+use crate::geom::{AngularUnit, LengthUnit};
use crate::syntax::*;
use TokenMode::*;
@@ -62,20 +62,20 @@ impl<'s> Iterator for Tokens<'s> {
loop {
// Common elements.
return Some(match c {
+ // Functions and blocks.
+ '[' => Token::LeftBracket,
+ ']' => Token::RightBracket,
+ '{' => Token::LeftBrace,
+ '}' => Token::RightBrace,
+
// Whitespace.
- c if c.is_whitespace() => self.whitespace(c, start),
+ c if c.is_whitespace() => self.whitespace(c),
// Comments.
'/' if self.s.eat_if('/') => self.line_comment(),
'/' if self.s.eat_if('*') => self.block_comment(),
'*' if self.s.eat_if('/') => Token::Invalid(self.s.eaten_from(start)),
- // Functions and blocks.
- '[' => Token::LeftBracket,
- ']' => Token::RightBracket,
- '{' => Token::LeftBrace,
- '}' => Token::RightBrace,
-
_ => break,
});
}
@@ -86,96 +86,132 @@ impl<'s> Iterator for Tokens<'s> {
'*' => Token::Star,
'_' => Token::Underscore,
'~' => Token::Tilde,
- '#' => Token::Hashtag,
+ '#' => self.hash(start),
'`' => self.raw(),
-
- // Escape sequences.
- '\\' => self.escaped(),
+ '$' => self.math(),
+ '\\' => self.backslash(),
// Plain text.
_ => self.text(start),
},
Header => match c {
- // Syntactic elements in headers.
+ // Parens.
'(' => Token::LeftParen,
')' => Token::RightParen,
- ':' => Token::Colon,
+
+ // Length two.
+ '=' if self.s.eat_if('=') => Token::EqEq,
+ '!' if self.s.eat_if('=') => Token::BangEq,
+ '<' if self.s.eat_if('=') => Token::LtEq,
+ '>' if self.s.eat_if('=') => Token::GtEq,
+ '+' if self.s.eat_if('=') => Token::PlusEq,
+ '-' if self.s.eat_if('=') => Token::HyphEq,
+ '*' if self.s.eat_if('=') => Token::StarEq,
+ '/' if self.s.eat_if('=') => Token::SlashEq,
+ '.' if self.s.eat_if('.') => Token::Dots,
+ '=' if self.s.eat_if('>') => Token::Arrow,
+
+ // Length one.
',' => Token::Comma,
+ ':' => Token::Colon,
'|' => Token::Pipe,
'+' => Token::Plus,
- '-' => Token::Hyphen,
+ '-' => Token::Hyph,
'*' => Token::Star,
'/' => Token::Slash,
+ '=' => Token::Eq,
+ '<' => Token::Lt,
+ '>' => Token::Gt,
+ '?' => Token::Question,
+
+ // Identifiers.
+ c if is_id_start(c) => self.ident(start),
+
+ // Numbers.
+ c if c.is_ascii_digit()
+ || (c == '.' && self.s.check(|n| n.is_ascii_digit())) =>
+ {
+ self.number(start, c)
+ }
- // Expressions in headers.
+ // Hex values and strings.
'#' => self.hex(),
'"' => self.string(),
- // Expressions.
- c => self.expr(c, start),
+ _ => Token::Invalid(self.s.eaten_from(start)),
},
})
}
}
impl<'s> Tokens<'s> {
- fn whitespace(&mut self, first: char, start: usize) -> Token<'s> {
+ fn whitespace(&mut self, first: char) -> Token<'s> {
// Fast path for just a single space
if first == ' ' && !self.s.check(|c| c.is_whitespace()) {
- return Token::Space(0);
- }
-
- self.s.jump(start);
+ Token::Space(0)
+ } else {
+ self.s.uneat();
+
+ // Count the number of newlines.
+ let mut newlines = 0;
+ while let Some(c) = self.s.eat_merging_crlf() {
+ if !c.is_whitespace() {
+ self.s.uneat();
+ break;
+ }
- // Count the number of newlines.
- let mut newlines = 0;
- while let Some(c) = self.s.eat_merging_crlf() {
- if !c.is_whitespace() {
- self.s.uneat();
- break;
+ if is_newline(c) {
+ newlines += 1;
+ }
}
- if is_newline(c) {
- newlines += 1;
- }
+ Token::Space(newlines)
}
-
- Token::Space(newlines)
- }
-
- fn line_comment(&mut self) -> Token<'s> {
- Token::LineComment(self.s.eat_until(is_newline))
}
- fn block_comment(&mut self) -> Token<'s> {
- let start = self.s.index();
-
- let mut state = '_';
- let mut depth = 1;
-
- // Find the first `*/` that does not correspond to a nested `/*`.
+ fn text(&mut self, start: usize) -> Token<'s> {
while let Some(c) = self.s.eat() {
- state = match (state, c) {
- ('*', '/') => {
- depth -= 1;
- if depth == 0 {
- break;
- }
- '_'
- }
- ('/', '*') => {
- depth += 1;
- '_'
- }
- _ => c,
+ if match c {
+ // Whitespace.
+ c if c.is_whitespace() => true,
+ // Comments.
+ '/' if self.s.check(|c| c == '/' || c == '*') => true,
+ // Parenthesis.
+ '[' | ']' | '{' | '}' => true,
+ // Markup.
+ '*' | '_' | '#' | '~' | '`' => true,
+ // Escaping.
+ '\\' => true,
+ _ => false,
+ } {
+ self.s.uneat();
+ break;
}
}
- let terminated = depth == 0;
- let end = self.s.index() - if terminated { 2 } else { 0 };
+ Token::Text(self.s.eaten_from(start))
+ }
- Token::BlockComment(self.s.get(start .. end))
+ fn hash(&mut self, start: usize) -> Token<'s> {
+ if self.s.check(is_id_start) {
+ self.s.eat();
+ self.s.eat_while(is_id_continue);
+ match self.s.eaten_from(start) {
+ "#let" => Token::Let,
+ "#if" => Token::If,
+ "#else" => Token::Else,
+ "#for" => Token::For,
+ "#in" => Token::In,
+ "#while" => Token::While,
+ "#break" => Token::Break,
+ "#continue" => Token::Continue,
+ "#return" => Token::Return,
+ s => Token::Invalid(s),
+ }
+ } else {
+ Token::Hash
+ }
}
fn raw(&mut self) -> Token<'s> {
@@ -205,7 +241,38 @@ impl<'s> Tokens<'s> {
})
}
- fn escaped(&mut self) -> Token<'s> {
+ fn math(&mut self) -> Token<'s> {
+ let mut dollars = 1;
+ if self.s.eat_if('$') {
+ dollars = 2;
+ }
+
+ let start = self.s.index();
+
+ let mut found = 0;
+ let mut escaped = false;
+ while found < dollars {
+ match self.s.eat() {
+ Some('$') if !escaped => found += 1,
+ Some(c) => {
+ found = 0;
+ escaped = c == '\\' && !escaped;
+ }
+ None => break,
+ }
+ }
+
+ let terminated = found == dollars;
+ let end = self.s.index() - if terminated { found } else { 0 };
+
+ Token::Math(TokenMath {
+ formula: self.s.get(start .. end),
+ inline: dollars == 1,
+ terminated,
+ })
+ }
+
+ fn backslash(&mut self) -> Token<'s> {
if let Some(c) = self.s.peek() {
match c {
// Backslash and comments.
@@ -235,27 +302,79 @@ impl<'s> Tokens<'s> {
}
}
- fn text(&mut self, start: usize) -> Token<'s> {
- while let Some(c) = self.s.eat() {
- if match c {
- // Whitespace.
- c if c.is_whitespace() => true,
- // Comments.
- '/' if self.s.check(|c| c == '/' || c == '*') => true,
- // Parenthesis.
- '[' | ']' | '{' | '}' => true,
- // Markup.
- '*' | '_' | '#' | '~' | '`' => true,
- // Escaping.
- '\\' => true,
- _ => false,
- } {
- self.s.uneat();
- break;
+ fn ident(&mut self, start: usize) -> Token<'s> {
+ self.s.eat_while(is_id_continue);
+ match self.s.eaten_from(start) {
+ "not" => Token::Not,
+ "and" => Token::And,
+ "or" => Token::Or,
+ "let" => Token::Let,
+ "if" => Token::If,
+ "else" => Token::Else,
+ "for" => Token::For,
+ "in" => Token::In,
+ "while" => Token::While,
+ "break" => Token::Break,
+ "continue" => Token::Continue,
+ "return" => Token::Return,
+ "none" => Token::None,
+ "true" => Token::Bool(true),
+ "false" => Token::Bool(false),
+ id => Token::Ident(id),
+ }
+ }
+
+ fn number(&mut self, start: usize, first: char) -> Token<'s> {
+ // Read the first part (integer or fractional depending on `first`).
+ self.s.eat_while(|c| c.is_ascii_digit());
+
+ // Read the fractional part if not already done and present.
+ if first != '.' && self.s.eat_if('.') {
+ self.s.eat_while(|c| c.is_ascii_digit());
+ }
+
+ // Read the exponent.
+ if self.s.eat_if('e') || self.s.eat_if('E') {
+ let _ = self.s.eat_if('+') || self.s.eat_if('-');
+ self.s.eat_while(|c| c.is_ascii_digit());
+ }
+
+ // Read the suffix.
+ let suffix_start = self.s.index();
+ if !self.s.eat_if('%') {
+ self.s.eat_while(|c| c.is_ascii_alphanumeric());
+ }
+
+ let number = self.s.get(start .. suffix_start);
+ let suffix = self.s.eaten_from(suffix_start);
+ let all = self.s.eaten_from(start);
+
+ // Find out whether it is a simple number.
+ if suffix.is_empty() {
+ if let Ok(int) = number.parse::<i64>() {
+ return Token::Int(int);
+ } else if let Ok(float) = number.parse::<f64>() {
+ return Token::Float(float);
}
}
- Token::Text(self.s.eaten_from(start))
+ // Otherwise parse into the fitting numeric type.
+ let build = match suffix {
+ "%" => Token::Percent,
+ "pt" => |x| Token::Length(x, LengthUnit::Pt),
+ "mm" => |x| Token::Length(x, LengthUnit::Mm),
+ "cm" => |x| Token::Length(x, LengthUnit::Cm),
+ "in" => |x| Token::Length(x, LengthUnit::In),
+ "rad" => |x| Token::Angle(x, AngularUnit::Rad),
+ "deg" => |x| Token::Angle(x, AngularUnit::Deg),
+ _ => return Token::Invalid(all),
+ };
+
+ if let Ok(float) = number.parse::<f64>() {
+ build(float)
+ } else {
+ Token::Invalid(all)
+ }
}
fn hex(&mut self) -> Token<'s> {
@@ -278,64 +397,38 @@ impl<'s> Tokens<'s> {
})
}
- fn expr(&mut self, first: char, start: usize) -> Token<'s> {
- if is_id_start(first) {
- self.ident(start)
- } else if first.is_ascii_digit()
- || (first == '.' && self.s.check(|c| c.is_ascii_digit()))
- {
- self.number(start)
- } else {
- Token::Invalid(self.s.eaten_from(start))
- }
- }
-
- fn ident(&mut self, start: usize) -> Token<'s> {
- self.s.eat_while(is_id_continue);
- let string = self.s.eaten_from(start);
- match string {
- "none" => Token::None,
- "true" => Token::Bool(true),
- "false" => Token::Bool(false),
- _ => Token::Ident(string),
- }
+ fn line_comment(&mut self) -> Token<'s> {
+ Token::LineComment(self.s.eat_until(is_newline))
}
- fn number(&mut self, start: usize) -> Token<'s> {
- self.s.jump(start);
-
- // Read the integer part.
- self.s.eat_while(|c| c.is_ascii_digit());
+ fn block_comment(&mut self) -> Token<'s> {
+ let start = self.s.index();
- // Read the fractional part if present.
- if self.s.eat_if('.') {
- self.s.eat_while(|c| c.is_ascii_digit());
- }
+ let mut state = '_';
+ let mut depth = 1;
- // Read the exponent.
- if self.s.eat_if('e') || self.s.eat_if('E') {
- let _ = self.s.eat_if('+') || self.s.eat_if('-');
- self.s.eat_while(|c| c.is_ascii_digit());
+ // Find the first `*/` that does not correspond to a nested `/*`.
+ while let Some(c) = self.s.eat() {
+ state = match (state, c) {
+ ('*', '/') => {
+ depth -= 1;
+ if depth == 0 {
+ break;
+ }
+ '_'
+ }
+ ('/', '*') => {
+ depth += 1;
+ '_'
+ }
+ _ => c,
+ }
}
- // Read the suffix.
- if !self.s.eat_if('%') {
- self.s.eat_while(|c| c.is_ascii_alphanumeric());
- }
+ let terminated = depth == 0;
+ let end = self.s.index() - if terminated { 2 } else { 0 };
- // Parse into one of the suitable types.
- let string = self.s.eaten_from(start);
- if let Some(percent) = parse_percent(string) {
- Token::Percent(percent)
- } else if let Some((val, unit)) = parse_length(string) {
- Token::Length(val, unit)
- } else if let Ok(int) = string.parse::<i64>() {
- Token::Int(int)
- } else if let Ok(float) = string.parse::<f64>() {
- Token::Float(float)
- } else {
- Token::Invalid(string)
- }
+ Token::BlockComment(self.s.get(start .. end))
}
}
@@ -345,40 +438,12 @@ impl Debug for Tokens<'_> {
}
}
-fn parse_percent(string: &str) -> Option<f64> {
- string.strip_suffix('%').and_then(|prefix| prefix.parse::<f64>().ok())
-}
-
-fn parse_length(string: &str) -> Option<(f64, LengthUnit)> {
- let len = string.len();
-
- // We need at least some number and the unit.
- if len <= 2 {
- return None;
- }
-
- // We can view the string as bytes since a multibyte UTF-8 char cannot
- // have valid ASCII chars as subbytes.
- let split = len - 2;
- let bytes = string.as_bytes();
- let unit = match &bytes[split ..] {
- b"pt" => LengthUnit::Pt,
- b"mm" => LengthUnit::Mm,
- b"cm" => LengthUnit::Cm,
- b"in" => LengthUnit::In,
- _ => return None,
- };
-
- string[.. split].parse::<f64>().ok().map(|val| (val, unit))
-}
-
#[cfg(test)]
#[allow(non_snake_case)]
mod tests {
use super::*;
use crate::parse::tests::check;
- use LengthUnit::*;
use Option::None;
use Token::{Ident, *};
@@ -386,6 +451,10 @@ mod tests {
Token::Raw(TokenRaw { text, backticks, terminated })
}
+ fn Math(formula: &str, inline: bool, terminated: bool) -> Token {
+ Token::Math(TokenMath { formula, inline, terminated })
+ }
+
fn UnicodeEscape(sequence: &str, terminated: bool) -> Token {
Token::UnicodeEscape(TokenUnicodeEscape { sequence, terminated })
}
@@ -424,6 +493,7 @@ mod tests {
// Letter suffixes.
('a', Some(Body), "hello", Text("hello")),
('a', Some(Body), "πŸ’š", Text("πŸ’š")),
+ ('a', Some(Header), "if", If),
('a', Some(Header), "val", Ident("val")),
('a', Some(Header), "Ξ±", Ident("Ξ±")),
('a', Some(Header), "_", Ident("_")),
@@ -437,9 +507,10 @@ mod tests {
('/', Some(Body), "*", Star),
('/', Some(Body), "_", Underscore),
('/', Some(Body), r"\\", Text(r"\")),
+ ('/', Some(Body), "#let", Let),
('/', Some(Header), "(", LeftParen),
('/', Some(Header), ":", Colon),
- ('/', Some(Header), "+", Plus),
+ ('/', Some(Header), "+=", PlusEq),
('/', Some(Header), "#123", Hex("123")),
];
@@ -473,89 +544,129 @@ mod tests {
}
#[test]
- fn test_length_from_str() {
- assert_eq!(parse_length("2.5cm"), Some((2.5, Cm)));
- assert_eq!(parse_length("1.e+2cm"), Some((100.0, Cm)));
- assert_eq!(parse_length("123🚚"), None);
+ fn test_tokenize_brackets() {
+ // Test body.
+ t!(Body: "[" => LeftBracket);
+ t!(Body: "]" => RightBracket);
+ t!(Body: "{" => LeftBrace);
+ t!(Body: "}" => RightBrace);
+ t!(Body[" /"]: "(" => Text("("));
+ t!(Body[" /"]: ")" => Text(")"));
+
+ // Test header.
+ t!(Header: "[" => LeftBracket);
+ t!(Header: "]" => RightBracket);
+ t!(Header: "{" => LeftBrace);
+ t!(Header: "}" => RightBrace);
+ t!(Header: "(" => LeftParen);
+ t!(Header: ")" => RightParen);
}
#[test]
- fn test_tokenize_whitespace() {
- // Test basic whitespace.
- t!(Both["a1/"]: "" => );
- t!(Both["a1/"]: " " => Space(0));
- t!(Both["a1/"]: " " => Space(0));
- t!(Both["a1/"]: "\t" => Space(0));
- t!(Both["a1/"]: " \t" => Space(0));
- t!(Both["a1/"]: "\u{202F}" => Space(0));
-
- // Test newline counting.
- t!(Both["a1/"]: "\n" => Space(1));
- t!(Both["a1/"]: "\n " => Space(1));
- t!(Both["a1/"]: " \n" => Space(1));
- t!(Both["a1/"]: " \n " => Space(1));
- t!(Both["a1/"]: "\r\n" => Space(1));
- t!(Both["a1/"]: " \n\t \n " => Space(2));
- t!(Both["a1/"]: "\n\r" => Space(2));
- t!(Both["a1/"]: " \r\r\n \x0D" => Space(3));
+ fn test_tokenize_body_symbols() {
+ // Test markup tokens.
+ t!(Body[" a1"]: "*" => Star);
+ t!(Body: "_" => Underscore);
+ t!(Body["a1/"]: "# " => Hash, Space(0));
+ t!(Body: "~" => Tilde);
+ t!(Body[" "]: r"\" => Backslash);
}
#[test]
- fn test_tokenize_line_comments() {
- // Test line comment with no trailing newline.
- t!(Both[""]: "//" => LineComment(""));
-
- // Test line comment ends at newline.
- t!(Both["a1/"]: "//bc\n" => LineComment("bc"), Space(1));
- t!(Both["a1/"]: "// bc \n" => LineComment(" bc "), Space(1));
- t!(Both["a1/"]: "//bc\r\n" => LineComment("bc"), Space(1));
-
- // Test nested line comments.
- t!(Both["a1/"]: "//a//b\n" => LineComment("a//b"), Space(1));
+ fn test_tokenize_header_symbols() {
+ // Test all symbols.
+ t!(Header: "," => Comma);
+ t!(Header: ":" => Colon);
+ t!(Header: "|" => Pipe);
+ t!(Header: "+" => Plus);
+ t!(Header: "-" => Hyph);
+ t!(Header[" a1"]: "*" => Star);
+ t!(Header[" a1"]: "/" => Slash);
+ t!(Header: "=" => Eq);
+ t!(Header: "==" => EqEq);
+ t!(Header: "!=" => BangEq);
+ t!(Header: "<" => Lt);
+ t!(Header: "<=" => LtEq);
+ t!(Header: ">" => Gt);
+ t!(Header: ">=" => GtEq);
+ t!(Header: "+=" => PlusEq);
+ t!(Header: "-=" => HyphEq);
+ t!(Header: "*=" => StarEq);
+ t!(Header: "/=" => SlashEq);
+ t!(Header: "?" => Question);
+ t!(Header: ".." => Dots);
+ t!(Header: "=>" => Arrow);
+
+ // Test combinations.
+ t!(Header: "|=>" => Pipe, Arrow);
+ t!(Header: "<=>" => LtEq, Gt);
+ t!(Header[" a/"]: "..." => Dots, Invalid("."));
+
+ // Test hyphen as symbol vs part of identifier.
+ t!(Header[" /"]: "-1" => Hyph, Int(1));
+ t!(Header[" /"]: "-a" => Hyph, Ident("a"));
+ t!(Header[" /"]: "--1" => Hyph, Hyph, Int(1));
+ t!(Header[" /"]: "--_a" => Hyph, Hyph, Ident("_a"));
+ t!(Header[" /"]: "a-b" => Ident("a-b"));
}
#[test]
- fn test_tokenize_block_comments() {
- // Test basic block comments.
- t!(Both[""]: "/*" => BlockComment(""));
- t!(Both: "/**/" => BlockComment(""));
- t!(Both: "/*🏞*/" => BlockComment("🏞"));
- t!(Both: "/*\n*/" => BlockComment("\n"));
+ fn test_tokenize_keywords() {
+ let both = [
+ ("let", Let),
+ ("if", If),
+ ("else", Else),
+ ("for", For),
+ ("in", In),
+ ("while", While),
+ ("break", Break),
+ ("continue", Continue),
+ ("return", Return),
+ ];
- // Test depth 1 and 2 nested block comments.
- t!(Both: "/* /* */ */" => BlockComment(" /* */ "));
- t!(Both: "/*/*/**/*/*/" => BlockComment("/*/**/*/"));
+ for &(s, t) in &both {
+ t!(Header[" "]: s => t);
+ t!(Body[" "]: format!("#{}", s) => t);
+ t!(Body[" "]: format!("#{0}#{0}", s) => t, t);
+ t!(Body[" /"]: format!("# {}", s) => Hash, Space(0), Text(s));
+ }
- // Test two nested, one unclosed block comments.
- t!(Both[""]: "/*/*/**/*/" => BlockComment("/*/**/*/"));
+ let header = [
+ ("not", Not),
+ ("and", And),
+ ("or", Or),
+ ("none", Token::None),
+ ("false", Bool(false)),
+ ("true", Bool(true)),
+ ];
- // Test all combinations of up to two following slashes and stars.
- t!(Both[""]: "/*" => BlockComment(""));
- t!(Both[""]: "/*/" => BlockComment("/"));
- t!(Both[""]: "/**" => BlockComment("*"));
- t!(Both[""]: "/*//" => BlockComment("//"));
- t!(Both[""]: "/*/*" => BlockComment("/*"));
- t!(Both[""]: "/**/" => BlockComment(""));
- t!(Both[""]: "/***" => BlockComment("**"));
+ for &(s, t) in &header {
+ t!(Header[" "]: s => t);
+ t!(Body[" /"]: s => Text(s));
+ }
+
+ // Test invalid case.
+ t!(Header[" /"]: "None" => Ident("None"));
+ t!(Header[" /"]: "True" => Ident("True"));
+
+ // Test word that contains keyword.
+ t!(Body[" "]: "#letter" => Invalid("#letter"));
+ t!(Header[" /"]: "falser" => Ident("falser"));
}
#[test]
- fn test_tokenize_body_tokens() {
- // Test parentheses.
- t!(Body: "[" => LeftBracket);
- t!(Body: "]" => RightBracket);
- t!(Body: "{" => LeftBrace);
- t!(Body: "}" => RightBrace);
-
- // Test markup tokens.
- t!(Body[" a1"]: "*" => Star);
- t!(Body: "_" => Underscore);
- t!(Body: "~" => Tilde);
- t!(Body: "#" => Hashtag);
- t!(Body[" "]: r"\" => Backslash);
+ fn test_tokenize_text() {
+ // Test basic text.
+ t!(Body[" /"]: "hello" => Text("hello"));
+ t!(Body[" /"]: "hello-world" => Text("hello-world"));
- // Test header symbols.
+ // Test header symbols in text.
+ t!(Body[" /"]: "a():\"b" => Text("a():\"b"));
t!(Body[" /"]: ":,=|/+-" => Text(":,=|/+-"));
+
+ // Test text ends.
+ t!(Body[""]: "hello " => Text("hello"), Space(0));
+ t!(Body[""]: "hello~" => Text("hello"), Tilde);
}
#[test]
@@ -578,6 +689,21 @@ mod tests {
}
#[test]
+ fn test_tokenize_math_formulas() {
+ // Test basic formula.
+ t!(Body: "$x$" => Math("x", true, true));
+ t!(Body: "$$x + y$$" => Math("x + y", false, true));
+
+ // Test unterminated.
+ t!(Body[""]: "$$x" => Math("x", false, false));
+ t!(Body[""]: "$$x$\n$" => Math("x$\n$", false, false));
+
+ // Test escape sequences.
+ t!(Body: r"$$\\\$$$" => Math(r"\\\$", false, true));
+ t!(Body[""]: r"$$ $\\$" => Math(r" $\\$", false, false));
+ }
+
+ #[test]
fn test_tokenize_escape_sequences() {
// Test escapable symbols.
t!(Body: r"\\" => Text(r"\"));
@@ -614,49 +740,24 @@ mod tests {
}
#[test]
- fn test_tokenize_text() {
- // Test basic text.
- t!(Body[" /"]: "hello" => Text("hello"));
- t!(Body[" /"]: "hello-world" => Text("hello-world"));
-
- // Test header symbols in text.
- t!(Body[" /"]: "a():\"b" => Text("a():\"b"));
-
- // Test text ends.
- t!(Body[""]: "hello " => Text("hello"), Space(0));
- t!(Body[""]: "hello~" => Text("hello"), Tilde);
- }
-
- #[test]
- fn test_tokenize_header_tokens() {
- // Test parentheses.
- t!(Header: "[" => LeftBracket);
- t!(Header: "]" => RightBracket);
- t!(Header: "{" => LeftBrace);
- t!(Header: "}" => RightBrace);
- t!(Header: "(" => LeftParen);
- t!(Header: ")" => RightParen);
-
- // Test structural tokens.
- t!(Header: ":" => Colon);
- t!(Header: "," => Comma);
- t!(Header: "|" => Pipe);
- t!(Header: "+" => Plus);
- t!(Header: "-" => Hyphen);
- t!(Header[" a1"]: "*" => Star);
- t!(Header[" a1"]: "/" => Slash);
-
- // Test hyphen parsed as symbol.
- t!(Header[" /"]: "-1" => Hyphen, Int(1));
- t!(Header[" /"]: "-a" => Hyphen, Ident("a"));
- t!(Header[" /"]: "--1" => Hyphen, Hyphen, Int(1));
- t!(Header[" /"]: "--_a" => Hyphen, Hyphen, Ident("_a"));
- t!(Header[" /"]: "a-b" => Ident("a-b"));
+ fn test_tokenize_whitespace() {
+ // Test basic whitespace.
+ t!(Both["a1/"]: "" => );
+ t!(Both["a1/"]: " " => Space(0));
+ t!(Both["a1/"]: " " => Space(0));
+ t!(Both["a1/"]: "\t" => Space(0));
+ t!(Both["a1/"]: " \t" => Space(0));
+ t!(Both["a1/"]: "\u{202F}" => Space(0));
- // Test some operations.
- t!(Header[" /"]: "1+3" => Int(1), Plus, Int(3));
- t!(Header[" /"]: "1*3" => Int(1), Star, Int(3));
- t!(Header[" /"]: "1/3" => Int(1), Slash, Int(3));
+ // Test newline counting.
+ t!(Both["a1/"]: "\n" => Space(1));
+ t!(Both["a1/"]: "\n " => Space(1));
+ t!(Both["a1/"]: " \n" => Space(1));
+ t!(Both["a1/"]: " \n " => Space(1));
+ t!(Both["a1/"]: "\r\n" => Space(1));
+ t!(Both["a1/"]: " \n\t \n " => Space(2));
+ t!(Both["a1/"]: "\n\r" => Space(2));
+ t!(Both["a1/"]: " \r\r\n \x0D" => Space(3));
}
#[test]
@@ -677,22 +778,7 @@ mod tests {
}
#[test]
- fn test_tokenize_keywords() {
- // Test none.
- t!(Header[" /"]: "none" => Token::None);
- t!(Header[" /"]: "None" => Ident("None"));
-
- // Test valid bools.
- t!(Header[" /"]: "false" => Bool(false));
- t!(Header[" /"]: "true" => Bool(true));
-
- // Test invalid bools.
- t!(Header[" /"]: "True" => Ident("True"));
- t!(Header[" /"]: "falser" => Ident("falser"));
- }
-
- #[test]
- fn test_tokenize_numeric_values() {
+ fn test_tokenize_numeric() {
let ints = [("7", 7), ("012", 12)];
let floats = [
(".3", 0.3),
@@ -721,6 +807,7 @@ mod tests {
}
// Test attached numbers.
+ t!(Header[" /"]: ".2.3" => Float(0.2), Float(0.3));
t!(Header[" /"]: "1.2.3" => Float(1.2), Float(0.3));
t!(Header[" /"]: "1e-2+3" => Float(0.01), Plus, Int(3));
@@ -731,20 +818,20 @@ mod tests {
// Combined integers and floats.
let nums = ints.iter().map(|&(k, v)| (k, v as f64)).chain(floats.iter().copied());
- // Test percentages.
- for (s, v) in nums.clone() {
- t!(Header[" /"]: format!("{}%", s) => Percent(v));
- }
+ let suffixes = [
+ ("%", Percent as fn(f64) -> Token<'static>),
+ ("mm", |x| Length(x, LengthUnit::Mm)),
+ ("pt", |x| Length(x, LengthUnit::Pt)),
+ ("cm", |x| Length(x, LengthUnit::Cm)),
+ ("in", |x| Length(x, LengthUnit::In)),
+ ("rad", |x| Angle(x, AngularUnit::Rad)),
+ ("deg", |x| Angle(x, AngularUnit::Deg)),
+ ];
- // Test lengths.
- for &unit in &[
- LengthUnit::Mm,
- LengthUnit::Pt,
- LengthUnit::Cm,
- LengthUnit::In,
- ] {
+ // Numeric types.
+ for &(suffix, build) in &suffixes {
for (s, v) in nums.clone() {
- t!(Header[" /"]: format!("{}{}", s, unit) => Length(v, unit));
+ t!(Header[" /"]: format!("{}{}", s, suffix) => build(v));
}
}
}
@@ -765,6 +852,8 @@ mod tests {
t!(Header: "\"hi\"" => Str("hi", true));
t!(Header: "\"hi\nthere\"" => Str("hi\nthere", true));
t!(Header: "\"🌎\"" => Str("🌎", true));
+
+ // Test unterminated.
t!(Header[""]: "\"hi" => Str("hi", false));
// Test escaped quote.
@@ -773,6 +862,45 @@ mod tests {
}
#[test]
+ fn test_tokenize_line_comments() {
+ // Test line comment with no trailing newline.
+ t!(Both[""]: "//" => LineComment(""));
+
+ // Test line comment ends at newline.
+ t!(Both["a1/"]: "//bc\n" => LineComment("bc"), Space(1));
+ t!(Both["a1/"]: "// bc \n" => LineComment(" bc "), Space(1));
+ t!(Both["a1/"]: "//bc\r\n" => LineComment("bc"), Space(1));
+
+ // Test nested line comments.
+ t!(Both["a1/"]: "//a//b\n" => LineComment("a//b"), Space(1));
+ }
+
+ #[test]
+ fn test_tokenize_block_comments() {
+ // Test basic block comments.
+ t!(Both[""]: "/*" => BlockComment(""));
+ t!(Both: "/**/" => BlockComment(""));
+ t!(Both: "/*🏞*/" => BlockComment("🏞"));
+ t!(Both: "/*\n*/" => BlockComment("\n"));
+
+ // Test depth 1 and 2 nested block comments.
+ t!(Both: "/* /* */ */" => BlockComment(" /* */ "));
+ t!(Both: "/*/*/**/*/*/" => BlockComment("/*/**/*/"));
+
+ // Test two nested, one unclosed block comments.
+ t!(Both[""]: "/*/*/**/*/" => BlockComment("/*/**/*/"));
+
+ // Test all combinations of up to two following slashes and stars.
+ t!(Both[""]: "/*" => BlockComment(""));
+ t!(Both[""]: "/*/" => BlockComment("/"));
+ t!(Both[""]: "/**" => BlockComment("*"));
+ t!(Both[""]: "/*//" => BlockComment("//"));
+ t!(Both[""]: "/*/*" => BlockComment("/*"));
+ t!(Both[""]: "/**/" => BlockComment(""));
+ t!(Both[""]: "/***" => BlockComment("**"));
+ }
+
+ #[test]
fn test_tokenize_invalid() {
// Test invalidly closed block comments.
t!(Both: "*/" => Token::Invalid("*/"));
@@ -784,11 +912,14 @@ mod tests {
t!(Header: r"\:" => Invalid(r"\"), Colon);
t!(Header: "meal⌚" => Ident("meal"), Invalid("⌚"));
t!(Header[" /"]: r"\a" => Invalid(r"\"), Ident("a"));
- t!(Header[" /"]: ">main" => Invalid(">"), Ident("main"));
// Test invalid number suffixes.
t!(Header[" /"]: "1foo" => Invalid("1foo"));
t!(Header: "1p%" => Invalid("1p"), Invalid("%"));
t!(Header: "1%%" => Percent(1.0), Invalid("%"));
+
+ // Test invalid keyword.
+ t!(Body[" /"]: "#-" => Hash, Text("-"));
+ t!(Body[" "]: "#do" => Invalid("#do"))
}
}
diff --git a/src/syntax/token.rs b/src/syntax/token.rs
index fb50c4ec..261f2104 100644
--- a/src/syntax/token.rs
+++ b/src/syntax/token.rs
@@ -1,68 +1,114 @@
-use crate::geom::LengthUnit;
+use crate::geom::{AngularUnit, LengthUnit};
/// A minimal semantic entity of source code.
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum Token<'s> {
- /// A consecutive non-markup string.
- Text(&'s str),
- /// One or more whitespace characters.
- ///
- /// The contained `usize` denotes the number of newlines that were contained
- /// in the whitespace.
- Space(usize),
-
- /// A line comment with inner string contents `//<str>\n`.
- LineComment(&'s str),
- /// A block comment with inner string contents `/*<str>*/`.
- ///
- /// The comment can contain nested block comments.
- BlockComment(&'s str),
-
- /// A left bracket: `[`.
+ /// A left square bracket: `[`.
LeftBracket,
- /// A right bracket: `]`.
+ /// A right square bracket: `]`.
RightBracket,
- /// A left brace: `{`.
+ /// A left curly brace: `{`.
LeftBrace,
- /// A right brace: `}`.
+ /// A right curly brace: `}`.
RightBrace,
- /// A left parenthesis: `(`.
+ /// A left round parenthesis: `(`.
LeftParen,
- /// A right parenthesis: `)`.
+ /// A right round parenthesis: `)`.
RightParen,
-
- /// A star: `*`.
+ /// An asterisk: `*`.
Star,
/// An underscore: `_`.
Underscore,
+ /// A hashtag: `#`.
+ Hash,
/// A tilde: `~`.
Tilde,
- /// A backslash followed by whitespace: `\`.
+ /// A backslash followed by nothing or whitespace: `\`.
Backslash,
- /// A hashtag indicating a section heading: `#`.
- Hashtag,
- /// A raw block: `` `...` ``.
- Raw(TokenRaw<'s>),
- /// A unicode escape sequence: `\u{1F5FA}`.
- UnicodeEscape(TokenUnicodeEscape<'s>),
-
- /// A colon: `:`.
- Colon,
/// A comma: `,`.
Comma,
+ /// A colon: `:`.
+ Colon,
/// A pipe: `|`.
Pipe,
/// A plus: `+`.
Plus,
/// A hyphen: `-`.
- Hyphen,
+ Hyph,
/// A slash: `/`.
Slash,
-
+ /// A single equals sign: `=`.
+ Eq,
+ /// Two equals signs: `==`.
+ EqEq,
+ /// An exclamation mark followed by an equals sign: `!=`.
+ BangEq,
+ /// A less-than sign: `<`.
+ Lt,
+ /// A less-than sign followed by an equals sign: `<=`.
+ LtEq,
+ /// A greater-than sign: `>`.
+ Gt,
+ /// A greater-than sign followed by an equals sign: `>=`.
+ GtEq,
+ /// A plus followed by an equals sign: `+=`.
+ PlusEq,
+ /// A hyphen followed by an equals sign: `-=`.
+ HyphEq,
+ /// An asterisk followed by an equals sign: `*=`.
+ StarEq,
+ /// A slash followed by an equals sign: `/=`.
+ SlashEq,
+ /// A question mark: `?`.
+ Question,
+ /// Two dots: `..`.
+ Dots,
+ /// An equals sign followed by a greater-than sign: `=>`.
+ Arrow,
+ /// The `not` operator.
+ Not,
+ /// The `and` operator.
+ And,
+ /// The `or` operator.
+ Or,
+ /// The `let` / `#let` keyword.
+ Let,
+ /// The `if` / `#if` keyword.
+ If,
+ /// The `else` / `#else` keyword.
+ Else,
+ /// The `for` / `#for` keyword.
+ For,
+ /// The `in` / `#in` keyword.
+ In,
+ /// The `while` / `#while` keyword.
+ While,
+ /// The `break` / `#break` keyword.
+ Break,
+ /// The `continue` / `#continue` keyword.
+ Continue,
+ /// The `return` / `#return` keyword.
+ Return,
+ /// The none literal: `none`.
+ None,
+ /// One or more whitespace characters.
+ ///
+ /// The contained `usize` denotes the number of newlines that were contained
+ /// in the whitespace.
+ Space(usize),
+ /// A consecutive non-markup string.
+ Text(&'s str),
+ /// An arbitrary number of backticks followed by inner contents, terminated
+ /// with the same number of backticks: `` `...` ``.
+ Raw(TokenRaw<'s>),
+ /// One or two dollar signs followed by inner contents, terminated with the
+ /// same number of dollar signs.
+ Math(TokenMath<'s>),
+ /// A slash and the letter "u" followed by a hexadecimal unicode entity
+ /// enclosed in curly braces: `\u{1F5FA}`.
+ UnicodeEscape(TokenUnicodeEscape<'s>),
/// An identifier: `center`.
Ident(&'s str),
- /// A none: `none`.
- None,
/// A boolean: `true`, `false`.
Bool(bool),
/// An integer: `120`.
@@ -76,11 +122,20 @@ pub enum Token<'s> {
/// _Note_: `50%` is stored as `50.0` here, as in the corresponding
/// [literal](super::Expr::Percent).
Percent(f64),
+ /// An angle: `90deg`.
+ Angle(f64, AngularUnit),
/// A hex value: `#20d82a`.
Hex(&'s str),
/// A quoted string: `"..."`.
Str(TokenStr<'s>),
-
+ /// Two slashes followed by inner contents, terminated with a newline:
+ /// `//<str>\n`.
+ LineComment(&'s str),
+ /// A slash and a star followed by inner contents, terminated with a star
+ /// and a slash: `/*<str>*/`.
+ ///
+ /// The comment can contain nested block comments.
+ BlockComment(&'s str),
/// Things that are not valid tokens.
Invalid(&'s str),
}
@@ -98,15 +153,6 @@ pub struct TokenStr<'s> {
pub terminated: bool,
}
-/// A unicode escape sequence: `\u{1F5FA}`.
-#[derive(Debug, Copy, Clone, PartialEq)]
-pub struct TokenUnicodeEscape<'s> {
- /// The escape sequence between two braces.
- pub sequence: &'s str,
- /// Whether the closing brace was present.
- pub terminated: bool,
-}
-
/// A raw block: `` `...` ``.
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct TokenRaw<'s> {
@@ -118,48 +164,91 @@ pub struct TokenRaw<'s> {
pub terminated: bool,
}
+/// A math formula: `$2pi + x$`, `$$f'(x) = x^2$$`.
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct TokenMath<'s> {
+ /// The formula between the dollars.
+ pub formula: &'s str,
+ /// Whether the formula was surrounded by one dollar (true) or two dollars
+ /// (false).
+ pub inline: bool,
+ /// Whether the closing dollars were present.
+ pub terminated: bool,
+}
+
+/// A unicode escape sequence: `\u{1F5FA}`.
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct TokenUnicodeEscape<'s> {
+ /// The escape sequence between the braces.
+ pub sequence: &'s str,
+ /// Whether the closing brace was present.
+ pub terminated: bool,
+}
+
impl<'s> Token<'s> {
- /// The natural-language name of this token for use in error messages.
+ /// The English name of this token for use in error messages.
pub fn name(self) -> &'static str {
match self {
- Self::Text(_) => "text",
- Self::Space(_) => "space",
-
- Self::LineComment(_) => "line comment",
- Self::BlockComment(_) => "block comment",
-
Self::LeftBracket => "opening bracket",
Self::RightBracket => "closing bracket",
Self::LeftBrace => "opening brace",
Self::RightBrace => "closing brace",
Self::LeftParen => "opening paren",
Self::RightParen => "closing paren",
-
Self::Star => "star",
Self::Underscore => "underscore",
+ Self::Hash => "hashtag",
+ Self::Tilde => "tilde",
Self::Backslash => "backslash",
- Self::Hashtag => "hashtag",
- Self::Tilde => "tidle",
- Self::Raw { .. } => "raw block",
- Self::UnicodeEscape { .. } => "unicode escape sequence",
-
- Self::Colon => "colon",
Self::Comma => "comma",
+ Self::Colon => "colon",
Self::Pipe => "pipe",
- Self::Plus => "plus sign",
- Self::Hyphen => "minus sign",
+ Self::Plus => "plus",
+ Self::Hyph => "minus",
Self::Slash => "slash",
-
+ Self::Eq => "assignment operator",
+ Self::EqEq => "equality operator",
+ Self::BangEq => "inequality operator",
+ Self::Lt => "less than operator",
+ Self::LtEq => "less than or equal operator",
+ Self::Gt => "greater than operator",
+ Self::GtEq => "greater than or equal operator",
+ Self::PlusEq => "add-assign operator",
+ Self::HyphEq => "subtract-assign operator",
+ Self::StarEq => "multiply-assign operator",
+ Self::SlashEq => "divide-assign operator",
+ Self::Question => "question mark",
+ Self::Dots => "dots",
+ Self::Arrow => "arrow",
+ Self::Not => "not operator",
+ Self::And => "and operator",
+ Self::Or => "or operator",
+ Self::Let => "let keyword",
+ Self::If => "if keyword",
+ Self::Else => "else keyword",
+ Self::For => "for keyword",
+ Self::In => "in keyword",
+ Self::While => "while keyword",
+ Self::Break => "break keyword",
+ Self::Continue => "continue keyword",
+ Self::Return => "return keyword",
Self::None => "none",
+ Self::Space(_) => "space",
+ Self::Text(_) => "text",
+ Self::Raw(_) => "raw block",
+ Self::Math(_) => "math formula",
+ Self::UnicodeEscape(_) => "unicode escape sequence",
Self::Ident(_) => "identifier",
- Self::Bool(_) => "bool",
+ Self::Bool(_) => "boolean",
Self::Int(_) => "integer",
Self::Float(_) => "float",
Self::Length(..) => "length",
+ Self::Angle(..) => "angle",
Self::Percent(_) => "percentage",
Self::Hex(_) => "hex value",
- Self::Str { .. } => "string",
-
+ Self::Str(_) => "string",
+ Self::LineComment(_) => "line comment",
+ Self::BlockComment(_) => "block comment",
Self::Invalid("*/") => "end of block comment",
Self::Invalid(_) => "invalid token",
}