summaryrefslogtreecommitdiff
path: root/src/syntax
diff options
context:
space:
mode:
Diffstat (limited to 'src/syntax')
-rw-r--r--src/syntax/mod.rs57
-rw-r--r--src/syntax/parsing/escaping.rs243
-rw-r--r--src/syntax/parsing/mod.rs9
-rw-r--r--src/syntax/parsing/parser.rs660
-rw-r--r--src/syntax/parsing/tests.rs509
-rw-r--r--src/syntax/span.rs12
-rw-r--r--src/syntax/token.rs152
-rw-r--r--src/syntax/tokens.rs786
-rw-r--r--src/syntax/tree.rs2
9 files changed, 168 insertions, 2262 deletions
diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs
index 70935e79..1b9f8ba8 100644
--- a/src/syntax/mod.rs
+++ b/src/syntax/mod.rs
@@ -1,50 +1,11 @@
-//! Syntax trees, parsing and tokenization.
+//! Syntax types.
-pub mod decoration;
-pub mod parsing;
-pub mod span;
-pub mod tokens;
-pub mod tree;
+mod decoration;
+mod span;
+mod token;
+mod tree;
-#[cfg(test)]
-mod tests {
- use super::span;
- use crate::prelude::*;
- use std::fmt::Debug;
-
- /// Assert that expected and found are equal, printing both and panicking
- /// and the source of their test case if they aren't.
- ///
- /// When `cmp_spans` is false, spans are ignored.
- pub fn check<T>(src: &str, exp: T, found: T, cmp_spans: bool)
- where
- T: Debug + PartialEq,
- {
- span::set_cmp(cmp_spans);
- let equal = exp == found;
- span::set_cmp(true);
-
- if !equal {
- println!("source: {:?}", src);
- if cmp_spans {
- println!("expected: {:#?}", exp);
- println!("found: {:#?}", found);
- } else {
- println!("expected: {:?}", exp);
- println!("found: {:?}", found);
- }
- panic!("test failed");
- }
- }
-
- pub fn s<T>(sl: usize, sc: usize, el: usize, ec: usize, v: T) -> Spanned<T> {
- Spanned::new(v, Span::new(Pos::new(sl, sc), Pos::new(el, ec)))
- }
-
- // Enables tests to optionally specify spans.
- impl<T> From<T> for Spanned<T> {
- fn from(t: T) -> Self {
- Spanned::zero(t)
- }
- }
-}
+pub use decoration::*;
+pub use span::*;
+pub use token::*;
+pub use tree::*;
diff --git a/src/syntax/parsing/escaping.rs b/src/syntax/parsing/escaping.rs
deleted file mode 100644
index 5f06388e..00000000
--- a/src/syntax/parsing/escaping.rs
+++ /dev/null
@@ -1,243 +0,0 @@
-use crate::syntax::tokens::is_newline_char;
-
-/// Resolves all escape sequences in a string.
-pub fn unescape_string(string: &str) -> String {
- let mut iter = string.chars().peekable();
- let mut out = String::with_capacity(string.len());
-
- while let Some(c) = iter.next() {
- if c == '\\' {
- match iter.next() {
- Some('\\') => out.push('\\'),
- Some('"') => out.push('"'),
- Some('u') if iter.peek() == Some(&'{') => {
- iter.next();
-
- let mut sequence = String::new();
- let terminated = loop {
- match iter.peek() {
- // TODO: Feedback that closing brace is missing.
- Some('}') => {
- iter.next();
- break true;
- }
- Some(&c) if c.is_ascii_hexdigit() => {
- iter.next();
- sequence.push(c);
- }
- _ => break false,
- }
- };
-
- // TODO: Feedback that escape sequence is wrong.
- if let Some(c) = hex_to_char(&sequence) {
- out.push(c);
- } else {
- out.push_str("\\u{");
- out.push_str(&sequence);
- if terminated {
- out.push('}');
- }
- }
- }
- Some('n') => out.push('\n'),
- Some('t') => out.push('\t'),
- Some(c) => {
- out.push('\\');
- out.push(c);
- }
- None => out.push('\\'),
- }
- } else {
- out.push(c);
- }
- }
-
- out
-}
-
-/// Resolves all escape sequences in raw markup (between backticks) and splits it into
-/// into lines.
-pub fn unescape_raw(raw: &str) -> Vec<String> {
- let mut iter = raw.chars();
- let mut text = String::new();
-
- while let Some(c) = iter.next() {
- if c == '\\' {
- if let Some(c) = iter.next() {
- if c != '\\' && c != '`' {
- text.push('\\');
- }
-
- text.push(c);
- } else {
- text.push('\\');
- }
- } else {
- text.push(c);
- }
- }
-
- split_lines(&text)
-}
-
-/// Resolves all escape sequences in code markup (between triple backticks) and splits it
-/// into into lines.
-pub fn unescape_code(raw: &str) -> Vec<String> {
- let mut iter = raw.chars().peekable();
- let mut text = String::new();
- let mut backticks = 0u32;
- let mut update_backtick_count;
-
- while let Some(c) = iter.next() {
- update_backtick_count = true;
-
- if c == '\\' && backticks > 0 {
- let mut tail = String::new();
- let mut escape_success = false;
- let mut backticks_after_slash = 0u32;
-
- while let Some(&s) = iter.peek() {
- match s {
- '\\' => {
- if backticks_after_slash == 0 {
- tail.push('\\');
- } else {
- // Pattern like `\`\` should fail
- // escape and just be printed verbantim.
- break;
- }
- }
- '`' => {
- tail.push(s);
- backticks_after_slash += 1;
- if backticks_after_slash == 2 {
- escape_success = true;
- iter.next();
- break;
- }
- }
- _ => break,
- }
-
- iter.next();
- }
-
- if !escape_success {
- text.push(c);
- backticks = backticks_after_slash;
- update_backtick_count = false;
- } else {
- backticks = 0;
- }
-
- text.push_str(&tail);
- } else {
- text.push(c);
- }
-
- if update_backtick_count {
- if c == '`' {
- backticks += 1;
- } else {
- backticks = 0;
- }
- }
- }
-
- split_lines(&text)
-}
-
-/// Converts a hexademical sequence (without braces or "\u") into a character.
-pub fn hex_to_char(sequence: &str) -> Option<char> {
- u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32)
-}
-
-/// Splits a string into a vector of lines (respecting Unicode & Windows line breaks).
-pub fn split_lines(text: &str) -> Vec<String> {
- let mut iter = text.chars().peekable();
- let mut line = String::new();
- let mut lines = Vec::new();
-
- while let Some(c) = iter.next() {
- if is_newline_char(c) {
- if c == '\r' && iter.peek() == Some(&'\n') {
- iter.next();
- }
-
- lines.push(std::mem::take(&mut line));
- } else {
- line.push(c);
- }
- }
-
- lines.push(line);
- lines
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- #[rustfmt::skip]
- fn test_unescape_strings() {
- fn test(string: &str, expected: &str) {
- assert_eq!(unescape_string(string), expected.to_string());
- }
-
- test(r#"hello world"#, "hello world");
- test(r#"hello\nworld"#, "hello\nworld");
- test(r#"a\"bc"#, "a\"bc");
- test(r#"a\u{2603}bc"#, "aβ˜ƒbc");
- test(r#"a\u{26c3bg"#, "a𦰻g");
- test(r#"av\u{6797"#, "avζž—");
- test(r#"a\\"#, "a\\");
- test(r#"a\\\nbc"#, "a\\\nbc");
- test(r#"a\tbc"#, "a\tbc");
- test(r"🌎", "🌎");
- test(r"🌎\", r"🌎\");
- test(r"\🌎", r"\🌎");
- }
-
- #[test]
- #[rustfmt::skip]
- fn test_unescape_raws() {
- fn test(raw: &str, expected: Vec<&str>) {
- assert_eq!(unescape_raw(raw), expected);
- }
-
- test("raw\\`", vec!["raw`"]);
- test("raw\\\\`", vec!["raw\\`"]);
- test("raw\ntext", vec!["raw", "text"]);
- test("a\r\nb", vec!["a", "b"]);
- test("a\n\nb", vec!["a", "", "b"]);
- test("a\r\x0Bb", vec!["a", "", "b"]);
- test("a\r\n\r\nb", vec!["a", "", "b"]);
- test("raw\\a", vec!["raw\\a"]);
- test("raw\\", vec!["raw\\"]);
- }
-
- #[test]
- #[rustfmt::skip]
- fn test_unescape_code() {
- fn test(raw: &str, expected: Vec<&str>) {
- assert_eq!(unescape_code(raw), expected);
- }
-
- test("code\\`", vec!["code\\`"]);
- test("code`\\``", vec!["code```"]);
- test("code`\\`a", vec!["code`\\`a"]);
- test("code``hi`\\``", vec!["code``hi```"]);
- test("code`\\\\``", vec!["code`\\``"]);
- test("code`\\`\\`go", vec!["code`\\`\\`go"]);
- test("code`\\`\\``", vec!["code`\\```"]);
- test("code\ntext", vec!["code", "text"]);
- test("a\r\nb", vec!["a", "b"]);
- test("a\n\nb", vec!["a", "", "b"]);
- test("a\r\x0Bb", vec!["a", "", "b"]);
- test("a\r\n\r\nb", vec!["a", "", "b"]);
- test("code\\a", vec!["code\\a"]);
- test("code\\", vec!["code\\"]);
- }
-}
diff --git a/src/syntax/parsing/mod.rs b/src/syntax/parsing/mod.rs
deleted file mode 100644
index bf34340f..00000000
--- a/src/syntax/parsing/mod.rs
+++ /dev/null
@@ -1,9 +0,0 @@
-//! Parsing of source code into syntax trees.
-
-mod escaping;
-mod parser;
-
-pub use parser::parse;
-
-#[cfg(test)]
-mod tests;
diff --git a/src/syntax/parsing/parser.rs b/src/syntax/parsing/parser.rs
deleted file mode 100644
index ca41bf13..00000000
--- a/src/syntax/parsing/parser.rs
+++ /dev/null
@@ -1,660 +0,0 @@
-use std::str::FromStr;
-
-use super::escaping::*;
-use crate::color::RgbaColor;
-use crate::compute::table::SpannedEntry;
-use crate::syntax::decoration::Decoration;
-use crate::syntax::span::{Pos, Span, Spanned};
-use crate::syntax::tokens::{Token, TokenMode, Tokens};
-use crate::syntax::tree::*;
-use crate::{Feedback, Pass};
-
-/// Parse a string of source code.
-pub fn parse(src: &str) -> Pass<SyntaxTree> {
- Parser::new(src).parse()
-}
-
-struct Parser<'s> {
- tokens: Tokens<'s>,
- peeked: Option<Option<Spanned<Token<'s>>>>,
- delimiters: Vec<(Pos, Token<'static>)>,
- at_block_or_line_start: bool,
- feedback: Feedback,
-}
-
-impl<'s> Parser<'s> {
- fn new(src: &'s str) -> Self {
- Self {
- tokens: Tokens::new(src, TokenMode::Body),
- peeked: None,
- delimiters: vec![],
- at_block_or_line_start: true,
- feedback: Feedback::new(),
- }
- }
-
- fn parse(mut self) -> Pass<SyntaxTree> {
- let tree = self.parse_body_contents();
- Pass::new(tree, self.feedback)
- }
-}
-
-// Typesetting content.
-impl Parser<'_> {
- fn parse_body_contents(&mut self) -> SyntaxTree {
- let mut tree = SyntaxTree::new();
-
- self.at_block_or_line_start = true;
- while !self.eof() {
- if let Some(node) = self.parse_node() {
- tree.push(node);
- }
- }
-
- tree
- }
-
- fn parse_node(&mut self) -> Option<Spanned<SyntaxNode>> {
- let token = self.peek()?;
- let end = Span::at(token.span.end);
-
- // Set block or line start to false because most nodes have that effect, but
- // remember the old value to actually check it for hashtags and because comments
- // and spaces want to retain it.
- let was_at_block_or_line_start = self.at_block_or_line_start;
- self.at_block_or_line_start = false;
-
- Some(match token.v {
- // Starting from two newlines counts as a paragraph break, a single
- // newline does not.
- Token::Space(n) => {
- if n == 0 {
- self.at_block_or_line_start = was_at_block_or_line_start;
- } else if n >= 1 {
- self.at_block_or_line_start = true;
- }
-
- self.with_span(if n >= 2 {
- SyntaxNode::Parbreak
- } else {
- SyntaxNode::Spacing
- })
- }
-
- Token::LineComment(_) | Token::BlockComment(_) => {
- self.at_block_or_line_start = was_at_block_or_line_start;
- self.eat();
- return None;
- }
-
- Token::LeftBracket => {
- let call = self.parse_bracket_call(false);
- self.at_block_or_line_start = false;
- call.map(SyntaxNode::Call)
- }
-
- Token::Star => self.with_span(SyntaxNode::ToggleBolder),
- Token::Underscore => self.with_span(SyntaxNode::ToggleItalic),
- Token::Backslash => self.with_span(SyntaxNode::Linebreak),
-
- Token::Hashtag if was_at_block_or_line_start => {
- self.parse_heading().map(SyntaxNode::Heading)
- }
-
- Token::Raw { raw, terminated } => {
- if !terminated {
- error!(@self.feedback, end, "expected backtick");
- }
- self.with_span(SyntaxNode::Raw(unescape_raw(raw)))
- }
-
- Token::Code { lang, raw, terminated } => {
- if !terminated {
- error!(@self.feedback, end, "expected backticks");
- }
-
- let lang = lang.and_then(|lang| {
- if let Some(ident) = Ident::new(lang.v) {
- Some(Spanned::new(ident, lang.span))
- } else {
- error!(@self.feedback, lang.span, "invalid identifier");
- None
- }
- });
-
- let mut lines = unescape_code(raw);
- let block = lines.len() > 1;
-
- if lines.last().map(|s| s.is_empty()).unwrap_or(false) {
- lines.pop();
- }
-
- self.with_span(SyntaxNode::Code(Code { lang, lines, block }))
- }
-
- Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())),
- Token::Hashtag => self.with_span(SyntaxNode::Text("#".to_string())),
-
- Token::UnicodeEscape { sequence, terminated } => {
- if !terminated {
- error!(@self.feedback, end, "expected closing brace");
- }
-
- if let Some(c) = hex_to_char(sequence) {
- self.with_span(SyntaxNode::Text(c.to_string()))
- } else {
- error!(@self.feedback, token.span, "invalid unicode escape sequence");
- self.eat();
- return None;
- }
- }
-
- unexpected => {
- error!(@self.feedback, token.span, "unexpected {}", unexpected.name());
- self.eat();
- return None;
- }
- })
- }
-
- fn parse_heading(&mut self) -> Spanned<Heading> {
- let start = self.pos();
- self.assert(Token::Hashtag);
-
- let mut level = 0;
- while self.peekv() == Some(Token::Hashtag) {
- level += 1;
- self.eat();
- }
-
- let span = Span::new(start, self.pos());
- let level = Spanned::new(level, span);
-
- if level.v > 5 {
- warning!(
- @self.feedback, level.span,
- "section depth larger than 6 has no effect",
- );
- }
-
- self.skip_ws();
-
- let mut tree = SyntaxTree::new();
- while !self.eof() && !matches!(self.peekv(), Some(Token::Space(n)) if n >= 1) {
- if let Some(node) = self.parse_node() {
- tree.push(node);
- }
- }
-
- let span = Span::new(start, self.pos());
- Spanned::new(Heading { level, tree }, span)
- }
-}
-
-// Function calls.
-impl Parser<'_> {
- fn parse_bracket_call(&mut self, chained: bool) -> Spanned<CallExpr> {
- let before_bracket = self.pos();
- if !chained {
- self.start_group(Group::Bracket);
- self.tokens.push_mode(TokenMode::Header);
- }
-
- let before_name = self.pos();
- self.start_group(Group::Subheader);
- self.skip_ws();
- let name = self.parse_ident().unwrap_or_else(|| {
- self.expected_found_or_at("function name", before_name);
- Spanned::new(Ident(String::new()), Span::at(before_name))
- });
-
- self.skip_ws();
-
- let mut args = match self.eatv() {
- Some(Token::Colon) => self.parse_table_contents().0,
- Some(_) => {
- self.expected_at("colon", name.span.end);
- while self.eat().is_some() {}
- TableExpr::new()
- }
- None => TableExpr::new(),
- };
-
- self.end_group();
- self.skip_ws();
- let (has_chained_child, end) = if self.peek().is_some() {
- let item = self.parse_bracket_call(true);
- let span = item.span;
- let t = vec![item.map(SyntaxNode::Call)];
- args.push(SpannedEntry::val(Spanned::new(Expr::Tree(t), span)));
- (true, span.end)
- } else {
- self.tokens.pop_mode();
- (false, self.end_group().end)
- };
-
- let start = if chained { before_name } else { before_bracket };
- let mut span = Span::new(start, end);
-
- if self.check(Token::LeftBracket) && !has_chained_child {
- self.start_group(Group::Bracket);
- self.tokens.push_mode(TokenMode::Body);
-
- let body = self.parse_body_contents();
-
- self.tokens.pop_mode();
- let body_span = self.end_group();
-
- let expr = Expr::Tree(body);
- args.push(SpannedEntry::val(Spanned::new(expr, body_span)));
- span.expand(body_span);
- }
-
- Spanned::new(CallExpr { name, args }, span)
- }
-
- fn parse_paren_call(&mut self, name: Spanned<Ident>) -> Spanned<CallExpr> {
- self.start_group(Group::Paren);
- let args = self.parse_table_contents().0;
- let args_span = self.end_group();
- let span = Span::merge(name.span, args_span);
- Spanned::new(CallExpr { name, args }, span)
- }
-}
-
-// Tables.
-impl Parser<'_> {
- fn parse_table_contents(&mut self) -> (TableExpr, bool) {
- let mut table = TableExpr::new();
- let mut comma_and_keyless = true;
-
- while {
- self.skip_ws();
- !self.eof()
- } {
- let (key, val) = if let Some(ident) = self.parse_ident() {
- self.skip_ws();
-
- match self.peekv() {
- Some(Token::Equals) => {
- self.eat();
- self.skip_ws();
- if let Some(value) = self.parse_expr() {
- (Some(ident), value)
- } else {
- self.expected("value");
- continue;
- }
- }
-
- Some(Token::LeftParen) => {
- let call = self.parse_paren_call(ident);
- (None, call.map(Expr::Call))
- }
-
- _ => (None, ident.map(Expr::Ident)),
- }
- } else if let Some(value) = self.parse_expr() {
- (None, value)
- } else {
- self.expected("value");
- continue;
- };
-
- let behind = val.span.end;
- if let Some(key) = key {
- comma_and_keyless = false;
- table.insert(key.v.0, SpannedEntry::new(key.span, val));
- self.feedback
- .decorations
- .push(Spanned::new(Decoration::TableKey, key.span));
- } else {
- table.push(SpannedEntry::val(val));
- }
-
- if {
- self.skip_ws();
- self.eof()
- } {
- break;
- }
-
- self.expect_at(Token::Comma, behind);
- comma_and_keyless = false;
- }
-
- let coercable = comma_and_keyless && !table.is_empty();
- (table, coercable)
- }
-}
-
-type Binop = fn(Box<Spanned<Expr>>, Box<Spanned<Expr>>) -> Expr;
-
-// Expressions and values.
-impl Parser<'_> {
- fn parse_expr(&mut self) -> Option<Spanned<Expr>> {
- self.parse_binops("summand", Self::parse_term, |token| match token {
- Token::Plus => Some(Expr::Add),
- Token::Hyphen => Some(Expr::Sub),
- _ => None,
- })
- }
-
- fn parse_term(&mut self) -> Option<Spanned<Expr>> {
- self.parse_binops("factor", Self::parse_factor, |token| match token {
- Token::Star => Some(Expr::Mul),
- Token::Slash => Some(Expr::Div),
- _ => None,
- })
- }
-
- /// Parse expression of the form `<operand> (<op> <operand>)*`.
- fn parse_binops(
- &mut self,
- operand_name: &str,
- mut parse_operand: impl FnMut(&mut Self) -> Option<Spanned<Expr>>,
- mut parse_op: impl FnMut(Token) -> Option<Binop>,
- ) -> Option<Spanned<Expr>> {
- let mut left = parse_operand(self)?;
-
- self.skip_ws();
- while let Some(token) = self.peek() {
- if let Some(op) = parse_op(token.v) {
- self.eat();
- self.skip_ws();
-
- if let Some(right) = parse_operand(self) {
- let span = Span::merge(left.span, right.span);
- let v = op(Box::new(left), Box::new(right));
- left = Spanned::new(v, span);
- self.skip_ws();
- continue;
- }
-
- error!(
- @self.feedback, Span::merge(left.span, token.span),
- "missing right {}", operand_name,
- );
- }
- break;
- }
-
- Some(left)
- }
-
- fn parse_factor(&mut self) -> Option<Spanned<Expr>> {
- if let Some(hyph) = self.check_eat(Token::Hyphen) {
- self.skip_ws();
- if let Some(factor) = self.parse_factor() {
- let span = Span::merge(hyph.span, factor.span);
- Some(Spanned::new(Expr::Neg(Box::new(factor)), span))
- } else {
- error!(@self.feedback, hyph.span, "dangling minus");
- None
- }
- } else {
- self.parse_value()
- }
- }
-
- fn parse_value(&mut self) -> Option<Spanned<Expr>> {
- let Spanned { v: token, span } = self.peek()?;
- Some(match token {
- // This could be a function call or an identifier.
- Token::Ident(id) => {
- let name = Spanned::new(Ident(id.to_string()), span);
- self.eat();
- self.skip_ws();
- if self.check(Token::LeftParen) {
- self.parse_paren_call(name).map(Expr::Call)
- } else {
- name.map(Expr::Ident)
- }
- }
-
- Token::Str { string, terminated } => {
- if !terminated {
- self.expected_at("quote", span.end);
- }
- self.with_span(Expr::Str(unescape_string(string)))
- }
-
- Token::Bool(b) => self.with_span(Expr::Bool(b)),
- Token::Number(n) => self.with_span(Expr::Number(n)),
- Token::Length(s) => self.with_span(Expr::Length(s)),
- Token::Hex(s) => {
- if let Ok(color) = RgbaColor::from_str(s) {
- self.with_span(Expr::Color(color))
- } else {
- // Heal color by assuming black.
- error!(@self.feedback, span, "invalid color");
- let healed = RgbaColor::new_healed(0, 0, 0, 255);
- self.with_span(Expr::Color(healed))
- }
- }
-
- // This could be a table or a parenthesized expression. We parse as
- // a table in any case and coerce the table into a value if it is
- // coercable (length 1 and no trailing comma).
- Token::LeftParen => {
- self.start_group(Group::Paren);
- let (table, coercable) = self.parse_table_contents();
- let span = self.end_group();
-
- let expr = if coercable {
- table.into_values().next().expect("table is coercable").val.v
- } else {
- Expr::Table(table)
- };
-
- Spanned::new(expr, span)
- }
-
- // This is a content expression.
- Token::LeftBrace => {
- self.start_group(Group::Brace);
- self.tokens.push_mode(TokenMode::Body);
-
- let tree = self.parse_body_contents();
-
- self.tokens.pop_mode();
- let span = self.end_group();
- Spanned::new(Expr::Tree(tree), span)
- }
-
- // This is a bracketed function call.
- Token::LeftBracket => {
- let call = self.parse_bracket_call(false);
- let tree = vec![call.map(SyntaxNode::Call)];
- Spanned::new(Expr::Tree(tree), span)
- }
-
- _ => return None,
- })
- }
-
- fn parse_ident(&mut self) -> Option<Spanned<Ident>> {
- self.peek().and_then(|token| match token.v {
- Token::Ident(id) => Some(self.with_span(Ident(id.to_string()))),
- _ => None,
- })
- }
-}
-
-// Error handling.
-impl Parser<'_> {
- fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool {
- if self.check(token) {
- self.eat();
- true
- } else {
- self.expected_at(token.name(), pos);
- false
- }
- }
-
- fn expected(&mut self, thing: &str) {
- if let Some(found) = self.eat() {
- error!(
- @self.feedback, found.span,
- "expected {}, found {}", thing, found.v.name(),
- );
- } else {
- error!(@self.feedback, Span::at(self.pos()), "expected {}", thing);
- }
- }
-
- fn expected_at(&mut self, thing: &str, pos: Pos) {
- error!(@self.feedback, Span::at(pos), "expected {}", thing);
- }
-
- fn expected_found_or_at(&mut self, thing: &str, pos: Pos) {
- if self.eof() {
- self.expected_at(thing, pos)
- } else {
- self.expected(thing);
- }
- }
-}
-
-// Parsing primitives.
-impl<'s> Parser<'s> {
- fn start_group(&mut self, group: Group) {
- let start = self.pos();
- if let Some(start_token) = group.start() {
- self.assert(start_token);
- }
- self.delimiters.push((start, group.end()));
- }
-
- fn end_group(&mut self) -> Span {
- let peeked = self.peek();
-
- let (start, end_token) = self.delimiters.pop().expect("group was not started");
-
- if end_token != Token::Chain && peeked != None {
- self.delimiters.push((start, end_token));
- assert_eq!(peeked, None, "unfinished group");
- }
-
- match self.peeked.unwrap() {
- Some(token) if token.v == end_token => {
- self.peeked = None;
- Span::new(start, token.span.end)
- }
- _ => {
- let end = self.pos();
- if end_token != Token::Chain {
- error!(
- @self.feedback, Span::at(end),
- "expected {}", end_token.name(),
- );
- }
- Span::new(start, end)
- }
- }
- }
-
- fn skip_ws(&mut self) {
- while matches!(
- self.peekv(),
- Some(Token::Space(_)) |
- Some(Token::LineComment(_)) |
- Some(Token::BlockComment(_))
- ) {
- self.eat();
- }
- }
-
- fn eatv(&mut self) -> Option<Token<'s>> {
- self.eat().map(Spanned::value)
- }
-
- fn peekv(&mut self) -> Option<Token<'s>> {
- self.peek().map(Spanned::value)
- }
-
- fn assert(&mut self, token: Token<'_>) {
- assert!(self.check_eat(token).is_some());
- }
-
- fn check_eat(&mut self, token: Token<'_>) -> Option<Spanned<Token<'s>>> {
- if self.check(token) { self.eat() } else { None }
- }
-
- /// Checks if the next token is of some kind
- fn check(&mut self, token: Token<'_>) -> bool {
- self.peekv() == Some(token)
- }
-
- fn with_span<T>(&mut self, v: T) -> Spanned<T> {
- let span = self.eat().expect("expected token").span;
- Spanned::new(v, span)
- }
-
- fn eof(&mut self) -> bool {
- self.peek().is_none()
- }
-
- fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
- let token = self.peek()?;
- self.peeked = None;
- Some(token)
- }
-
- fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
- let tokens = &mut self.tokens;
- let token = (*self.peeked.get_or_insert_with(|| tokens.next()))?;
-
- // Check for unclosed groups.
- if Group::is_delimiter(token.v) {
- if self.delimiters.iter().rev().any(|&(_, end)| token.v == end) {
- return None;
- }
- }
-
- Some(token)
- }
-
- fn pos(&self) -> Pos {
- self.peeked
- .flatten()
- .map(|s| s.span.start)
- .unwrap_or_else(|| self.tokens.pos())
- }
-}
-
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-enum Group {
- Paren,
- Bracket,
- Brace,
- Subheader,
-}
-
-impl Group {
- fn is_delimiter(token: Token<'_>) -> bool {
- matches!(
- token,
- Token::RightParen | Token::RightBracket | Token::RightBrace | Token::Chain
- )
- }
-
- fn start(self) -> Option<Token<'static>> {
- match self {
- Self::Paren => Some(Token::LeftParen),
- Self::Bracket => Some(Token::LeftBracket),
- Self::Brace => Some(Token::LeftBrace),
- Self::Subheader => None,
- }
- }
-
- fn end(self) -> Token<'static> {
- match self {
- Self::Paren => Token::RightParen,
- Self::Bracket => Token::RightBracket,
- Self::Brace => Token::RightBrace,
- Self::Subheader => Token::Chain,
- }
- }
-}
diff --git a/src/syntax/parsing/tests.rs b/src/syntax/parsing/tests.rs
deleted file mode 100644
index 7fdf02ca..00000000
--- a/src/syntax/parsing/tests.rs
+++ /dev/null
@@ -1,509 +0,0 @@
-#![allow(non_snake_case)]
-
-use super::parse;
-use crate::color::RgbaColor;
-use crate::compute::table::SpannedEntry;
-use crate::length::Length;
-use crate::syntax::decoration::Decoration::*;
-use crate::syntax::span::Spanned;
-use crate::syntax::tests::*;
-use crate::syntax::tree::*;
-
-// ------------------------------ Construct Syntax Nodes ------------------------------ //
-
-use SyntaxNode::{
- Linebreak as L, Parbreak as P, Spacing as S, ToggleBolder as B, ToggleItalic as I,
-};
-
-fn T(text: &str) -> SyntaxNode {
- SyntaxNode::Text(text.to_string())
-}
-
-macro_rules! H {
- ($level:expr, $($tts:tt)*) => {
- SyntaxNode::Heading(Heading {
- level: Spanned::zero($level),
- tree: Tree![@$($tts)*],
- })
- };
-}
-
-macro_rules! R {
- ($($line:expr),* $(,)?) => {
- SyntaxNode::Raw(vec![$($line.to_string()),*])
- };
-}
-
-macro_rules! C {
- ($lang:expr, $($line:expr),* $(,)?) => {{
- let lines = vec![$($line.to_string()) ,*];
- SyntaxNode::Code(Code {
- lang: $lang,
- block: lines.len() > 1,
- lines,
- })
- }};
-}
-
-fn Lang<'a, T: Into<Spanned<&'a str>>>(lang: T) -> Option<Spanned<Ident>> {
- Some(Into::<Spanned<&str>>::into(lang).map(|s| Ident(s.to_string())))
-}
-
-macro_rules! F {
- ($($tts:tt)*) => { SyntaxNode::Call(Call!(@$($tts)*)) }
-}
-
-// ------------------------------- Construct Expressions ------------------------------ //
-
-use Expr::{Bool, Color, Length as Len, Number as Num};
-
-fn Id(ident: &str) -> Expr {
- Expr::Ident(Ident(ident.to_string()))
-}
-fn Str(string: &str) -> Expr {
- Expr::Str(string.to_string())
-}
-
-macro_rules! Table {
- (@table=$table:expr,) => {};
- (@table=$table:expr, $key:expr => $value:expr $(, $($tts:tt)*)?) => {{
- let key = Into::<Spanned<&str>>::into($key);
- let val = Into::<Spanned<Expr>>::into($value);
- $table.insert(key.v, SpannedEntry::new(key.span, val));
- Table![@table=$table, $($($tts)*)?];
- }};
- (@table=$table:expr, $value:expr $(, $($tts:tt)*)?) => {
- let val = Into::<Spanned<Expr>>::into($value);
- $table.push(SpannedEntry::val(val));
- Table![@table=$table, $($($tts)*)?];
- };
- (@$($tts:tt)*) => {{
- #[allow(unused_mut)]
- let mut table = TableExpr::new();
- Table![@table=table, $($tts)*];
- table
- }};
- ($($tts:tt)*) => { Expr::Table(Table![@$($tts)*]) };
-}
-
-macro_rules! Tree {
- (@$($node:expr),* $(,)?) => {
- vec![$(Into::<Spanned<SyntaxNode>>::into($node)),*]
- };
- ($($tts:tt)*) => { Expr::Tree(Tree![@$($tts)*]) };
-}
-
-macro_rules! Call {
- (@$name:expr $(; $($tts:tt)*)?) => {{
- let name = Into::<Spanned<&str>>::into($name);
- CallExpr {
- name: name.map(|n| Ident(n.to_string())),
- args: Table![@$($($tts)*)?],
- }
- }};
- ($($tts:tt)*) => { Expr::Call(Call![@$($tts)*]) };
-}
-
-fn Neg<T: Into<Spanned<Expr>>>(e1: T) -> Expr {
- Expr::Neg(Box::new(e1.into()))
-}
-fn Add<T: Into<Spanned<Expr>>>(e1: T, e2: T) -> Expr {
- Expr::Add(Box::new(e1.into()), Box::new(e2.into()))
-}
-fn Sub<T: Into<Spanned<Expr>>>(e1: T, e2: T) -> Expr {
- Expr::Sub(Box::new(e1.into()), Box::new(e2.into()))
-}
-fn Mul<T: Into<Spanned<Expr>>>(e1: T, e2: T) -> Expr {
- Expr::Mul(Box::new(e1.into()), Box::new(e2.into()))
-}
-fn Div<T: Into<Spanned<Expr>>>(e1: T, e2: T) -> Expr {
- Expr::Div(Box::new(e1.into()), Box::new(e2.into()))
-}
-
-// ------------------------------------ Test Macros ----------------------------------- //
-
-// Test syntax trees with or without spans.
-macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} }
-macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} }
-macro_rules! test {
- (@spans=$spans:expr, $src:expr => $($tts:tt)*) => {
- let exp = Tree![@$($tts)*];
- let pass = parse($src);
- check($src, exp, pass.output, $spans);
- };
-}
-
-// Test expressions.
-macro_rules! v {
- ($src:expr => $($tts:tt)*) => {
- t!(concat!("[val: ", $src, "]") => F!("val"; $($tts)*));
- }
-}
-
-// Test error messages.
-macro_rules! e {
- ($src:expr => $($tts:tt)*) => {
- let exp = vec![$($tts)*];
- let pass = parse($src);
- let found = pass.feedback.diagnostics.iter()
- .map(|s| s.as_ref().map(|e| e.message.as_str()))
- .collect::<Vec<_>>();
- check($src, exp, found, true);
- };
-}
-
-// Test decorations.
-macro_rules! d {
- ($src:expr => $($tts:tt)*) => {
- let exp = vec![$($tts)*];
- let pass = parse($src);
- check($src, exp, pass.feedback.decorations, true);
- };
-}
-
-// --------------------------------------- Tests -------------------------------------- //
-
-#[test]
-fn test_parse_groups() {
- e!("[)" => s(0,1, 0,2, "expected function name, found closing paren"),
- s(0,2, 0,2, "expected closing bracket"));
-
- e!("[v:{]}" => s(0,4, 0,4, "expected closing brace"),
- s(0,5, 0,6, "unexpected closing brace"));
-}
-
-#[test]
-fn test_parse_simple_nodes() {
- t!("" => );
- t!("hi" => T("hi"));
- t!("*hi" => B, T("hi"));
- t!("hi_" => T("hi"), I);
- t!("hi you" => T("hi"), S, T("you"));
- t!("special~name" => T("special"), T("\u{00A0}"), T("name"));
- t!("special\\~name" => T("special"), T("~"), T("name"));
- t!("\\u{1f303}" => T("πŸŒƒ"));
- t!("\n\n\nhello" => P, T("hello"));
- t!(r"a\ b" => T("a"), L, S, T("b"));
- t!("`py`" => R!["py"]);
- t!("`hi\nyou" => R!["hi", "you"]);
- e!("`hi\nyou" => s(1,3, 1,3, "expected backtick"));
- t!("`hi\\`du`" => R!["hi`du"]);
-
- ts!("```java out```" => s(0,0, 0,14, C![Lang(s(0,3, 0,7, "java")), "out"]));
- t!("``` console.log(\n\"alert\"\n)" => C![None, "console.log(", "\"alert\"", ")"]);
- t!("```typst \r\n Typst uses `\\`` to indicate code blocks" => C![
- Lang("typst"), " Typst uses ``` to indicate code blocks"
- ]);
-
- e!("``` hi\nyou" => s(1,3, 1,3, "expected backticks"));
- e!("```🌍 hi\nyou```" => s(0,3, 0,4, "invalid identifier"));
- e!("\\u{d421c809}" => s(0,0, 0,12, "invalid unicode escape sequence"));
- e!("\\u{abc" => s(0,6, 0,6, "expected closing brace"));
- t!("πŸ’œ\n\n 🌍" => T("πŸ’œ"), P, T("🌍"));
-
- ts!("hi" => s(0,0, 0,2, T("hi")));
- ts!("*Hi*" => s(0,0, 0,1, B), s(0,1, 0,3, T("Hi")), s(0,3, 0,4, B));
- ts!("πŸ’œ\n\n 🌍" => s(0,0, 0,1, T("πŸ’œ")), s(0,1, 2,1, P), s(2,1, 2,2, T("🌍")));
-}
-
-#[test]
-fn test_parse_comments() {
- // In body.
- t!("hi// you\nw" => T("hi"), S, T("w"));
- t!("first//\n//\nsecond" => T("first"), S, S, T("second"));
- t!("first//\n \nsecond" => T("first"), P, T("second"));
- t!("first/*\n \n*/second" => T("first"), T("second"));
- e!("🌎\n*/n" => s(1,0, 1,2, "unexpected end of block comment"));
-
- // In header.
- t!("[val:/*12pt*/]" => F!("val"));
- t!("[val \n /* \n */:]" => F!("val"));
- e!("[val \n /* \n */:]" => );
- e!("[val : 12, /* \n */ 14]" => );
-}
-
-#[test]
-fn test_parse_headings() {
- t!("## Hello world!" => H![1, T("Hello"), S, T("world!")]);
-
- // Handle various whitespace usages.
- t!("####Simple" => H![3, T("Simple")]);
- t!(" # Whitespace!" => S, H![0, T("Whitespace!")]);
- t!(" /* TODO: Improve */ ## Analysis" => S, S, H!(1, T("Analysis")));
-
- // Complex heading contents.
- t!("Some text [box][### Valuable facts]" => T("Some"), S, T("text"), S,
- F!("box"; Tree![H!(2, T("Valuable"), S, T("facts"))])
- );
- t!("### Grandiose stuff [box][Get it \n\n straight]" => H![2,
- T("Grandiose"), S, T("stuff"), S,
- F!("box"; Tree![T("Get"), S, T("it"), P, T("straight")])
- ]);
- t!("###### Multiline \\ headings" => H![5, T("Multiline"), S, L, S, T("headings")]);
-
- // Things that should not become headings.
- t!("\\## Text" => T("#"), T("#"), S, T("Text"));
- t!(" ###### # Text" => S, H!(5, T("#"), S, T("Text")));
- t!("I am #1" => T("I"), S, T("am"), S, T("#"), T("1"));
- t!("[box][\n] # hi" => F!("box"; Tree![S]), S, T("#"), S, T("hi"));
-
- // Depth warnings.
- e!("########" => s(0,0, 0,8, "section depth larger than 6 has no effect"));
-}
-
-#[test]
-fn test_parse_function_names() {
- // No closing bracket.
- t!("[" => F!(""));
- e!("[" => s(0,1, 0,1, "expected function name"),
- s(0,1, 0,1, "expected closing bracket"));
-
- // No name.
- e!("[]" => s(0,1, 0,1, "expected function name"));
- e!("[\"]" => s(0,1, 0,3, "expected function name, found string"),
- s(0,3, 0,3, "expected closing bracket"));
-
- // A valid name.
- t!("[hi]" => F!("hi"));
- t!("[ f]" => F!("f"));
-
- // An invalid name.
- e!("[12]" => s(0,1, 0,3, "expected function name, found number"));
- e!("[ 🌎]" => s(0,3, 0,4, "expected function name, found invalid token"));
-}
-
-#[test]
-fn test_parse_chaining() {
- // Things the parser has to make sense of
- t!("[hi: (5.0, 2.1 >> you]" => F!("hi"; Table![Num(5.0), Num(2.1)], Tree![F!("you")]));
- t!("[box >>][Hi]" => F!("box"; Tree![T("Hi")]));
- t!("[box >> pad: 1pt][Hi]" => F!("box"; Tree![
- F!("pad"; Len(Length::pt(1.0)), Tree!(T("Hi")))
- ]));
- t!("[bold: 400, >> emph >> sub: 1cm]" => F!("bold"; Num(400.0), Tree![
- F!("emph"; Tree!(F!("sub"; Len(Length::cm(1.0)))))
- ]));
-
- // Errors for unclosed / empty predecessor groups
- e!("[hi: (5.0, 2.1 >> you]" => s(0, 15, 0, 15, "expected closing paren"));
- e!("[>> abc]" => s(0, 1, 0, 1, "expected function name"));
-}
-
-#[test]
-fn test_parse_colon_starting_func_args() {
- // Just colon without args.
- e!("[val:]" => );
-
- // Wrong token.
- t!("[val=]" => F!("val"));
- e!("[val=]" => s(0,4, 0,4, "expected colon"));
- e!("[val/🌎:$]" => s(0,4, 0,4, "expected colon"));
-
- // String in invalid header without colon still parsed as string
- // Note: No "expected quote" error because not even the string was
- // expected.
- e!("[val/\"]" => s(0,4, 0,4, "expected colon"),
- s(0,7, 0,7, "expected closing bracket"));
-}
-
-#[test]
-fn test_parse_function_bodies() {
- t!("[val: 1][*Hi*]" => F!("val"; Num(1.0), Tree![B, T("Hi"), B]));
- e!(" [val][ */ ]" => s(0,8, 0,10, "unexpected end of block comment"));
-
- // Raw in body.
- t!("[val][`Hi]`" => F!("val"; Tree![R!["Hi]"]]));
- e!("[val][`Hi]`" => s(0,11, 0,11, "expected closing bracket"));
-
- // Crazy.
- t!("[v][[v][v][v]]" => F!("v"; Tree![F!("v"; Tree![T("v")]), F!("v")]));
-
- // Spanned.
- ts!(" [box][Oh my]" =>
- s(0,0, 0,1, S),
- s(0,1, 0,13, F!(s(0,2, 0,5, "box");
- s(0,6, 0,13, Tree![
- s(0,7, 0,9, T("Oh")), s(0,9, 0,10, S), s(0,10, 0,12, T("my"))
- ])
- ))
- );
-}
-
-#[test]
-fn test_parse_values() {
- // Simple.
- v!("_" => Id("_"));
- v!("name" => Id("name"));
- v!("Ξ±" => Id("Ξ±"));
- v!("\"hi\"" => Str("hi"));
- v!("true" => Bool(true));
- v!("false" => Bool(false));
- v!("1.0e-4" => Num(1e-4));
- v!("3.14" => Num(3.14));
- v!("50%" => Num(0.5));
- v!("4.5cm" => Len(Length::cm(4.5)));
- v!("12e1pt" => Len(Length::pt(12e1)));
- v!("#f7a20500" => Color(RgbaColor::new(0xf7, 0xa2, 0x05, 0x00)));
- v!("\"a\n[]\\\"string\"" => Str("a\n[]\"string"));
-
- // Content.
- v!("{_hi_}" => Tree![I, T("hi"), I]);
- e!("[val: {_hi_}]" => );
- v!("[hi]" => Tree![F!("hi")]);
- e!("[val: [hi]]" => );
-
- // Healed colors.
- v!("#12345" => Color(RgbaColor::new_healed(0, 0, 0, 0xff)));
- e!("[val: #12345]" => s(0,6, 0,12, "invalid color"));
- e!("[val: #a5]" => s(0,6, 0,9, "invalid color"));
- e!("[val: #14b2ah]" => s(0,6, 0,13, "invalid color"));
- e!("[val: #f075ff011]" => s(0,6, 0,16, "invalid color"));
-
- // Unclosed string.
- v!("\"hello" => Str("hello]"));
- e!("[val: \"hello]" => s(0,13, 0,13, "expected quote"),
- s(0,13, 0,13, "expected closing bracket"));
-
- // Spanned.
- ts!("[val: 1.4]" => s(0,0, 0,10, F!(s(0,1, 0,4, "val"); s(0,6, 0,9, Num(1.4)))));
-}
-
-#[test]
-fn test_parse_expressions() {
- // Coerced table.
- v!("(hi)" => Id("hi"));
-
- // Operations.
- v!("-1" => Neg(Num(1.0)));
- v!("-- 1" => Neg(Neg(Num(1.0))));
- v!("3.2in + 6pt" => Add(Len(Length::inches(3.2)), Len(Length::pt(6.0))));
- v!("5 - 0.01" => Sub(Num(5.0), Num(0.01)));
- v!("(3mm * 2)" => Mul(Len(Length::mm(3.0)), Num(2.0)));
- v!("12e-3cm/1pt" => Div(Len(Length::cm(12e-3)), Len(Length::pt(1.0))));
-
- // More complex.
- v!("(3.2in + 6pt)*(5/2-1)" => Mul(
- Add(Len(Length::inches(3.2)), Len(Length::pt(6.0))),
- Sub(Div(Num(5.0), Num(2.0)), Num(1.0))
- ));
- v!("(6.3E+2+4* - 3.2pt)/2" => Div(
- Add(Num(6.3e2), Mul(Num(4.0), Neg(Len(Length::pt(3.2))))),
- Num(2.0)
- ));
-
- // Associativity of multiplication and division.
- v!("3/4*5" => Mul(Div(Num(3.0), Num(4.0)), Num(5.0)));
-
- // Spanned.
- ts!("[val: 1 + 3]" => s(0,0, 0,12, F!(
- s(0,1, 0,4, "val"); s(0,6, 0,11, Add(
- s(0,6, 0,7, Num(1.0)),
- s(0,10, 0,11, Num(3.0)),
- ))
- )));
-
- // Span of parenthesized expression contains parens.
- ts!("[val: (1)]" => s(0,0, 0,10, F!(s(0,1, 0,4, "val"); s(0,6, 0,9, Num(1.0)))));
-
- // Invalid expressions.
- v!("4pt--" => Len(Length::pt(4.0)));
- e!("[val: 4pt--]" => s(0,10, 0,11, "dangling minus"),
- s(0,6, 0,10, "missing right summand"));
-
- v!("3mm+4pt*" => Add(Len(Length::mm(3.0)), Len(Length::pt(4.0))));
- e!("[val: 3mm+4pt*]" => s(0,10, 0,14, "missing right factor"));
-}
-
-#[test]
-fn test_parse_tables() {
- // Okay.
- v!("()" => Table![]);
- v!("(false)" => Bool(false));
- v!("(true,)" => Table![Bool(true)]);
- v!("(key=val)" => Table!["key" => Id("val")]);
- v!("(1, 2)" => Table![Num(1.0), Num(2.0)]);
- v!("(1, key=\"value\")" => Table![Num(1.0), "key" => Str("value")]);
-
- // Decorations.
- d!("[val: key=hi]" => s(0,6, 0,9, TableKey));
- d!("[val: (key=hi)]" => s(0,7, 0,10, TableKey));
- d!("[val: f(key=hi)]" => s(0,8, 0,11, TableKey));
-
- // Spanned with spacing around keyword arguments.
- ts!("[val: \n hi \n = /* //\n */ \"s\n\"]" => s(0,0, 4,2, F!(
- s(0,1, 0,4, "val"); s(1,1, 1,3, "hi") => s(3,4, 4,1, Str("s\n"))
- )));
- e!("[val: \n hi \n = /* //\n */ \"s\n\"]" => );
-}
-
-#[test]
-fn test_parse_tables_compute_func_calls() {
- v!("empty()" => Call!("empty"));
- v!("add ( 1 , 2 )" => Call!("add"; Num(1.0), Num(2.0)));
- v!("items(\"fire\", #f93a6d)" => Call!("items";
- Str("fire"), Color(RgbaColor::new(0xf9, 0x3a, 0x6d, 0xff))
- ));
-
- // More complex.
- v!("css(1pt, rgb(90, 102, 254), \"solid\")" => Call!(
- "css";
- Len(Length::pt(1.0)),
- Call!("rgb"; Num(90.0), Num(102.0), Num(254.0)),
- Str("solid"),
- ));
-
- // Unclosed.
- v!("lang(δΈ­ζ–‡]" => Call!("lang"; Id("δΈ­ζ–‡")));
- e!("[val: lang(δΈ­ζ–‡]" => s(0,13, 0,13, "expected closing paren"));
-
- // Invalid name.
- v!("πŸ‘ (\"abc\", 13e-5)" => Table!(Str("abc"), Num(13.0e-5)));
- e!("[val: πŸ‘ (\"abc\", 13e-5)]" => s(0,6, 0,7, "expected value, found invalid token"));
-}
-
-#[test]
-fn test_parse_tables_nested() {
- v!("(1, ( ab=(), d = (3, 14pt) )), false" =>
- Table![
- Num(1.0),
- Table!(
- "ab" => Table![],
- "d" => Table!(Num(3.0), Len(Length::pt(14.0))),
- ),
- ],
- Bool(false),
- );
-}
-
-#[test]
-fn test_parse_tables_errors() {
- // Expected value.
- e!("[val: (=)]" => s(0,7, 0,8, "expected value, found equals sign"));
- e!("[val: (,)]" => s(0,7, 0,8, "expected value, found comma"));
- v!("(\x07 abc,)" => Table![Id("abc")]);
- e!("[val: (\x07 abc,)]" => s(0,7, 0,8, "expected value, found invalid token"));
- e!("[val: (key=,)]" => s(0,11, 0,12, "expected value, found comma"));
- e!("[val: hi,)]" => s(0,9, 0,10, "expected value, found closing paren"));
-
- // Expected comma.
- v!("(true false)" => Table![Bool(true), Bool(false)]);
- e!("[val: (true false)]" => s(0,11, 0,11, "expected comma"));
-
- // Expected closing paren.
- e!("[val: (#000]" => s(0,11, 0,11, "expected closing paren"));
- e!("[val: (key]" => s(0,10, 0,10, "expected closing paren"));
- e!("[val: (key=]" => s(0,11, 0,11, "expected value"),
- s(0,11, 0,11, "expected closing paren"));
-
- // Bad key.
- v!("true=you" => Bool(true), Id("you"));
- e!("[val: true=you]" =>
- s(0,10, 0,10, "expected comma"),
- s(0,10, 0,11, "expected value, found equals sign"));
-
- // Unexpected equals sign.
- v!("z=y=4" => Num(4.0), "z" => Id("y"));
- e!("[val: z=y=4]" =>
- s(0,9, 0,9, "expected comma"),
- s(0,9, 0,10, "expected value, found equals sign"));
-}
diff --git a/src/syntax/span.rs b/src/syntax/span.rs
index 89f773c7..9357c345 100644
--- a/src/syntax/span.rs
+++ b/src/syntax/span.rs
@@ -14,12 +14,6 @@ thread_local! {
static CMP_SPANS: Cell<bool> = Cell::new(true);
}
-/// When set to `false` comparisons with `PartialEq` ignore spans.
-#[cfg(test)]
-pub(crate) fn set_cmp(cmp: bool) {
- CMP_SPANS.with(|cell| cell.set(cmp));
-}
-
/// Span offsetting.
pub trait Offset {
/// Offset all spans contained in `Self` by the given position.
@@ -132,6 +126,12 @@ impl Span {
pub fn expand(&mut self, other: Self) {
*self = Self::merge(*self, other)
}
+
+ /// When set to `false` comparisons with `PartialEq` ignore spans.
+ #[cfg(test)]
+ pub(crate) fn set_cmp(cmp: bool) {
+ CMP_SPANS.with(|cell| cell.set(cmp));
+ }
}
impl Offset for Span {
diff --git a/src/syntax/token.rs b/src/syntax/token.rs
new file mode 100644
index 00000000..e91a780c
--- /dev/null
+++ b/src/syntax/token.rs
@@ -0,0 +1,152 @@
+//! Tokenization.
+
+use super::span::Spanned;
+use crate::length::Length;
+
+/// A minimal semantic entity of source code.
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub enum Token<'s> {
+ /// One or more whitespace characters. The contained `usize` denotes the
+ /// number of newlines that were contained in the whitespace.
+ Space(usize),
+
+ /// A line comment with inner string contents `//<str>\n`.
+ LineComment(&'s str),
+ /// A block comment with inner string contents `/*<str>*/`. The comment
+ /// can contain nested block comments.
+ BlockComment(&'s str),
+
+ /// A left bracket starting a function invocation or body: `[`.
+ LeftBracket,
+ /// A right bracket ending a function invocation or body: `]`.
+ RightBracket,
+ /// A left parenthesis in a function header: `(`.
+ LeftParen,
+ /// A right parenthesis in a function header: `)`.
+ RightParen,
+ /// A left brace in a function header: `{`.
+ LeftBrace,
+ /// A right brace in a function header: `}`.
+ RightBrace,
+ /// A double forward chevron in a function header: `>>`.
+ Chain,
+
+ /// A colon in a function header: `:`.
+ Colon,
+ /// A comma in a function header: `,`.
+ Comma,
+ /// An equals sign in a function header: `=`.
+ Equals,
+
+ /// An identifier in a function header: `center`.
+ Ident(&'s str),
+ /// A quoted string in a function header: `"..."`.
+ Str {
+ /// The string inside the quotes.
+ ///
+ /// _Note_: If the string contains escape sequences these are not yet
+ /// applied to be able to just store a string slice here instead of
+ /// a String. The escaping is done later in the parser.
+ string: &'s str,
+ /// Whether the closing quote was present.
+ terminated: bool,
+ },
+ /// A boolean in a function header: `true | false`.
+ Bool(bool),
+ /// A number in a function header: `3.14`.
+ Number(f64),
+ /// A length in a function header: `12pt`.
+ Length(Length),
+ /// A hex value in a function header: `#20d82a`.
+ Hex(&'s str),
+ /// A plus in a function header, signifying the addition of expressions.
+ Plus,
+ /// A hyphen in a function header, signifying the subtraction of
+ /// expressions.
+ Hyphen,
+ /// A slash in a function header, signifying the division of expressions.
+ Slash,
+
+ /// A star. It can appear in a function header where it signifies the
+ /// multiplication of expressions or the body where it modifies the styling.
+ Star,
+ /// An underscore in body-text.
+ Underscore,
+ /// A backslash followed by whitespace in text.
+ Backslash,
+
+ /// A hashtag token in the body can indicate compute mode or headings.
+ Hashtag,
+
+ /// A unicode escape sequence.
+ UnicodeEscape {
+ /// The escape sequence between two braces.
+ sequence: &'s str,
+ /// Whether the closing brace was present.
+ terminated: bool,
+ },
+
+ /// Raw text.
+ Raw {
+ /// The raw text (not yet unescaped as for strings).
+ raw: &'s str,
+ /// Whether the closing backtick was present.
+ terminated: bool,
+ },
+
+ /// Multi-line code block.
+ Code {
+ /// The language of the code block, if specified.
+ lang: Option<Spanned<&'s str>>,
+ /// The raw text (not yet unescaped as for strings).
+ raw: &'s str,
+ /// Whether the closing backticks were present.
+ terminated: bool,
+ },
+
+ /// Any other consecutive string.
+ Text(&'s str),
+
+ /// Things that are not valid in the context they appeared in.
+ Invalid(&'s str),
+}
+
+impl<'s> Token<'s> {
+ /// The natural-language name for this token for use in error messages.
+ pub fn name(self) -> &'static str {
+ match self {
+ Self::Space(_) => "space",
+ Self::LineComment(_) => "line comment",
+ Self::BlockComment(_) => "block comment",
+ Self::LeftBracket => "opening bracket",
+ Self::RightBracket => "closing bracket",
+ Self::LeftParen => "opening paren",
+ Self::RightParen => "closing paren",
+ Self::LeftBrace => "opening brace",
+ Self::RightBrace => "closing brace",
+ Self::Chain => "function chain operator",
+ Self::Colon => "colon",
+ Self::Comma => "comma",
+ Self::Equals => "equals sign",
+ Self::Ident(_) => "identifier",
+ Self::Str { .. } => "string",
+ Self::Bool(_) => "bool",
+ Self::Number(_) => "number",
+ Self::Length(_) => "length",
+ Self::Hex(_) => "hex value",
+ Self::Plus => "plus",
+ Self::Hyphen => "minus",
+ Self::Slash => "slash",
+ Self::Star => "star",
+ Self::Underscore => "underscore",
+ Self::Backslash => "backslash",
+ Self::Hashtag => "hashtag",
+ Self::UnicodeEscape { .. } => "unicode escape sequence",
+ Self::Raw { .. } => "raw text",
+ Self::Code { .. } => "code block",
+ Self::Text(_) => "text",
+ Self::Invalid("*/") => "end of block comment",
+ Self::Invalid(_) => "invalid token",
+ }
+ }
+}
diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs
deleted file mode 100644
index 0c37e992..00000000
--- a/src/syntax/tokens.rs
+++ /dev/null
@@ -1,786 +0,0 @@
-//! Tokenization.
-
-use std::iter::Peekable;
-use std::str::Chars;
-use unicode_xid::UnicodeXID;
-
-use super::span::{Pos, Span, Spanned};
-use crate::length::Length;
-
-use Token::*;
-use TokenMode::*;
-/// A minimal semantic entity of source code.
-#[derive(Debug, Copy, Clone, PartialEq)]
-pub enum Token<'s> {
- /// One or more whitespace characters. The contained `usize` denotes the
- /// number of newlines that were contained in the whitespace.
- Space(usize),
-
- /// A line comment with inner string contents `//<str>\n`.
- LineComment(&'s str),
- /// A block comment with inner string contents `/*<str>*/`. The comment
- /// can contain nested block comments.
- BlockComment(&'s str),
-
- /// A left bracket starting a function invocation or body: `[`.
- LeftBracket,
- /// A right bracket ending a function invocation or body: `]`.
- RightBracket,
- /// A left parenthesis in a function header: `(`.
- LeftParen,
- /// A right parenthesis in a function header: `)`.
- RightParen,
- /// A left brace in a function header: `{`.
- LeftBrace,
- /// A right brace in a function header: `}`.
- RightBrace,
- /// A double forward chevron in a function header: `>>`.
- Chain,
-
- /// A colon in a function header: `:`.
- Colon,
- /// A comma in a function header: `,`.
- Comma,
- /// An equals sign in a function header: `=`.
- Equals,
-
- /// An identifier in a function header: `center`.
- Ident(&'s str),
- /// A quoted string in a function header: `"..."`.
- Str {
- /// The string inside the quotes.
- ///
- /// _Note_: If the string contains escape sequences these are not yet
- /// applied to be able to just store a string slice here instead of
- /// a String. The escaping is done later in the parser.
- string: &'s str,
- /// Whether the closing quote was present.
- terminated: bool,
- },
- /// A boolean in a function header: `true | false`.
- Bool(bool),
- /// A number in a function header: `3.14`.
- Number(f64),
- /// A length in a function header: `12pt`.
- Length(Length),
- /// A hex value in a function header: `#20d82a`.
- Hex(&'s str),
- /// A plus in a function header, signifying the addition of expressions.
- Plus,
- /// A hyphen in a function header, signifying the subtraction of
- /// expressions.
- Hyphen,
- /// A slash in a function header, signifying the division of expressions.
- Slash,
-
- /// A star. It can appear in a function header where it signifies the
- /// multiplication of expressions or the body where it modifies the styling.
- Star,
- /// An underscore in body-text.
- Underscore,
- /// A backslash followed by whitespace in text.
- Backslash,
-
- /// A hashtag token in the body can indicate compute mode or headings.
- Hashtag,
-
- /// A unicode escape sequence.
- UnicodeEscape {
- /// The escape sequence between two braces.
- sequence: &'s str,
- /// Whether the closing brace was present.
- terminated: bool,
- },
-
- /// Raw text.
- Raw {
- /// The raw text (not yet unescaped as for strings).
- raw: &'s str,
- /// Whether the closing backtick was present.
- terminated: bool,
- },
-
- /// Multi-line code block.
- Code {
- /// The language of the code block, if specified.
- lang: Option<Spanned<&'s str>>,
- /// The raw text (not yet unescaped as for strings).
- raw: &'s str,
- /// Whether the closing backticks were present.
- terminated: bool,
- },
-
- /// Any other consecutive string.
- Text(&'s str),
-
- /// Things that are not valid in the context they appeared in.
- Invalid(&'s str),
-}
-
-impl<'s> Token<'s> {
- /// The natural-language name for this token for use in error messages.
- pub fn name(self) -> &'static str {
- match self {
- Space(_) => "space",
- LineComment(_) => "line comment",
- BlockComment(_) => "block comment",
- LeftBracket => "opening bracket",
- RightBracket => "closing bracket",
- LeftParen => "opening paren",
- RightParen => "closing paren",
- LeftBrace => "opening brace",
- RightBrace => "closing brace",
- Chain => "function chain operator",
- Colon => "colon",
- Comma => "comma",
- Equals => "equals sign",
- Ident(_) => "identifier",
- Str { .. } => "string",
- Bool(_) => "bool",
- Number(_) => "number",
- Length(_) => "length",
- Hex(_) => "hex value",
- Plus => "plus",
- Hyphen => "minus",
- Slash => "slash",
- Star => "star",
- Underscore => "underscore",
- Backslash => "backslash",
- Hashtag => "hashtag",
- UnicodeEscape { .. } => "unicode escape sequence",
- Raw { .. } => "raw text",
- Code { .. } => "code block",
- Text(_) => "text",
- Invalid("*/") => "end of block comment",
- Invalid(_) => "invalid token",
- }
- }
-}
-
-/// An iterator over the tokens of a string of source code.
-#[derive(Debug)]
-pub struct Tokens<'s> {
- src: &'s str,
- iter: Peekable<Chars<'s>>,
- mode: TokenMode,
- stack: Vec<TokenMode>,
- pos: Pos,
- index: usize,
-}
-
-/// Whether to tokenize in header mode which yields expression, comma and
-/// similar tokens or in body mode which yields text and star, underscore,
-/// backtick tokens.
-#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
-pub enum TokenMode {
- Header,
- Body,
-}
-
-impl<'s> Tokens<'s> {
- /// Create a new token iterator with the given mode.
- pub fn new(src: &'s str, mode: TokenMode) -> Self {
- Self {
- src,
- iter: src.chars().peekable(),
- mode,
- stack: vec![],
- pos: Pos::ZERO,
- index: 0,
- }
- }
-
- /// Change the token mode and push the old one on a stack.
- pub fn push_mode(&mut self, mode: TokenMode) {
- self.stack.push(self.mode);
- self.mode = mode;
- }
-
- /// Pop the old token mode from the stack. This panics if there is no mode
- /// on the stack.
- pub fn pop_mode(&mut self) {
- self.mode = self.stack.pop().expect("no pushed mode");
- }
-
- /// The index in the string at which the last token ends and next token will
- /// start.
- pub fn index(&self) -> usize {
- self.index
- }
-
- /// The line-colunn position in the source at which the last token ends and
- /// next token will start.
- pub fn pos(&self) -> Pos {
- self.pos
- }
-}
-
-impl<'s> Iterator for Tokens<'s> {
- type Item = Spanned<Token<'s>>;
-
- /// Parse the next token in the source code.
- fn next(&mut self) -> Option<Self::Item> {
- let start = self.pos();
- let first = self.eat()?;
-
- let token = match first {
- // Comments.
- '/' if self.peek() == Some('/') => self.read_line_comment(),
- '/' if self.peek() == Some('*') => self.read_block_comment(),
- '*' if self.peek() == Some('/') => {
- self.eat();
- Invalid("*/")
- }
-
- // Whitespace.
- c if c.is_whitespace() => self.read_whitespace(start),
-
- // Functions and blocks.
- '[' => LeftBracket,
- ']' => RightBracket,
- '{' => LeftBrace,
- '}' => RightBrace,
-
- // Syntactic elements in function headers.
- '(' if self.mode == Header => LeftParen,
- ')' if self.mode == Header => RightParen,
- ':' if self.mode == Header => Colon,
- ',' if self.mode == Header => Comma,
- '=' if self.mode == Header => Equals,
- '>' if self.mode == Header && self.peek() == Some('>') => self.read_chain(),
-
- // Expression operators.
- '+' if self.mode == Header => Plus,
- '-' if self.mode == Header => Hyphen,
- '/' if self.mode == Header => Slash,
-
- // Star serves a double purpose as a style modifier
- // and a expression operator in the header.
- '*' => Star,
-
- // A hex expression.
- '#' if self.mode == Header => self.read_hex(),
-
- // String values.
- '"' if self.mode == Header => self.read_string(),
-
- // Style toggles.
- '_' if self.mode == Body => Underscore,
- '`' if self.mode == Body => self.read_raw_or_code(),
-
- // Sections.
- '#' if self.mode == Body => Hashtag,
-
- // Non-breaking spaces.
- '~' if self.mode == Body => Text("\u{00A0}"),
-
- // An escaped thing.
- '\\' if self.mode == Body => self.read_escaped(),
-
- // Expressions or just strings.
- c => {
- let body = self.mode == Body;
-
- let start_offset = -(c.len_utf8() as isize);
- let mut last_was_e = false;
-
- let (text, _) = self.read_string_until(false, start_offset, 0, |n| {
- let val = match n {
- c if c.is_whitespace() => true,
- '[' | ']' | '{' | '}' | '/' | '*' => true,
- '\\' | '_' | '`' | '#' | '~' if body => true,
- ':' | '=' | ',' | '"' | '(' | ')' if !body => true,
- '+' | '-' if !body && !last_was_e => true,
- _ => false,
- };
-
- last_was_e = n == 'e' || n == 'E';
- val
- });
-
- if self.mode == Header {
- self.read_expr(text)
- } else {
- Text(text)
- }
- }
- };
-
- let end = self.pos();
- let span = Span { start, end };
-
- Some(Spanned { v: token, span })
- }
-}
-
-impl<'s> Tokens<'s> {
- fn read_line_comment(&mut self) -> Token<'s> {
- self.eat();
- LineComment(self.read_string_until(false, 0, 0, is_newline_char).0)
- }
-
- fn read_block_comment(&mut self) -> Token<'s> {
- enum Last {
- Slash,
- Star,
- Other,
- }
-
- let mut depth = 0;
- let mut last = Last::Other;
-
- // Find the first `*/` that does not correspond to a nested `/*`.
- // Remove the last two bytes to obtain the raw inner text without `*/`.
- self.eat();
- let (content, _) = self.read_string_until(true, 0, -2, |c| {
- match c {
- '/' => match last {
- Last::Star if depth == 0 => return true,
- Last::Star => depth -= 1,
- _ => last = Last::Slash,
- },
- '*' => match last {
- Last::Slash => depth += 1,
- _ => last = Last::Star,
- },
- _ => last = Last::Other,
- }
-
- false
- });
-
- BlockComment(content)
- }
-
- fn read_chain(&mut self) -> Token<'s> {
- assert!(self.eat() == Some('>'));
- Chain
- }
-
- fn read_whitespace(&mut self, start: Pos) -> Token<'s> {
- self.read_string_until(false, 0, 0, |n| !n.is_whitespace());
- let end = self.pos();
-
- Space(end.line - start.line)
- }
-
- fn read_string(&mut self) -> Token<'s> {
- let (string, terminated) = self.read_until_unescaped('"');
- Str { string, terminated }
- }
-
- fn read_raw_or_code(&mut self) -> Token<'s> {
- let (raw, terminated) = self.read_until_unescaped('`');
- if raw.is_empty() && terminated && self.peek() == Some('`') {
- // Third tick found; this is a code block.
- self.eat();
-
- // Reads the lang tag (until newline or whitespace).
- let start = self.pos();
- let (lang, _) = self.read_string_until(false, 0, 0, |c| {
- c == '`' || c.is_whitespace() || is_newline_char(c)
- });
- let end = self.pos();
-
- let lang = if !lang.is_empty() {
- Some(Spanned::new(lang, Span::new(start, end)))
- } else {
- None
- };
-
- // Skip to start of raw contents.
- while let Some(c) = self.peek() {
- if is_newline_char(c) {
- self.eat();
- if c == '\r' && self.peek() == Some('\n') {
- self.eat();
- }
-
- break;
- } else if c.is_whitespace() {
- self.eat();
- } else {
- break;
- }
- }
-
- let start = self.index();
- let mut backticks = 0u32;
-
- while backticks < 3 {
- match self.eat() {
- Some('`') => backticks += 1,
- // Escaping of triple backticks.
- Some('\\') if backticks == 1 && self.peek() == Some('`') => {
- backticks = 0;
- }
- Some(_) => {}
- None => break,
- }
- }
-
- let terminated = backticks == 3;
- let end = self.index() - if terminated { 3 } else { 0 };
-
- Code {
- lang,
- raw: &self.src[start .. end],
- terminated,
- }
- } else {
- Raw { raw, terminated }
- }
- }
-
- fn read_until_unescaped(&mut self, end: char) -> (&'s str, bool) {
- let mut escaped = false;
- self.read_string_until(true, 0, -1, |c| {
- match c {
- c if c == end && !escaped => return true,
- '\\' => escaped = !escaped,
- _ => escaped = false,
- }
-
- false
- })
- }
-
- fn read_escaped(&mut self) -> Token<'s> {
- fn is_escapable(c: char) -> bool {
- match c {
- '[' | ']' | '\\' | '/' | '*' | '_' | '`' | '"' | '#' | '~' => true,
- _ => false,
- }
- }
-
- match self.peek() {
- Some('u') => {
- self.eat();
- if self.peek() == Some('{') {
- self.eat();
- let (sequence, _) =
- self.read_string_until(false, 0, 0, |c| !c.is_ascii_hexdigit());
-
- let terminated = self.peek() == Some('}');
- if terminated {
- self.eat();
- }
-
- UnicodeEscape { sequence, terminated }
- } else {
- Text("\\u")
- }
- }
- Some(c) if is_escapable(c) => {
- let index = self.index();
- self.eat();
- Text(&self.src[index .. index + c.len_utf8()])
- }
- Some(c) if c.is_whitespace() => Backslash,
- Some(_) => Text("\\"),
- None => Backslash,
- }
- }
-
- fn read_hex(&mut self) -> Token<'s> {
- // This will parse more than the permissable 0-9, a-f, A-F character
- // ranges to provide nicer error messages later.
- Hex(self.read_string_until(false, 0, 0, |n| !n.is_ascii_alphanumeric()).0)
- }
-
- fn read_expr(&mut self, text: &'s str) -> Token<'s> {
- if let Ok(b) = text.parse::<bool>() {
- Bool(b)
- } else if let Ok(num) = text.parse::<f64>() {
- Number(num)
- } else if let Some(num) = parse_percentage(text) {
- Number(num / 100.0)
- } else if let Ok(length) = text.parse::<Length>() {
- Length(length)
- } else if is_identifier(text) {
- Ident(text)
- } else {
- Invalid(text)
- }
- }
-
- /// Will read the input stream until `f` evaluates to `true`. When
- /// `eat_match` is true, the token for which `f` was true is consumed.
- /// Returns the string from the index where this was called offset by
- /// `offset_start` to the end offset by `offset_end`. The end is before or
- /// after the match depending on `eat_match`.
- fn read_string_until(
- &mut self,
- eat_match: bool,
- offset_start: isize,
- offset_end: isize,
- mut f: impl FnMut(char) -> bool,
- ) -> (&'s str, bool) {
- let start = ((self.index() as isize) + offset_start) as usize;
- let mut matched = false;
-
- while let Some(c) = self.peek() {
- if f(c) {
- matched = true;
- if eat_match {
- self.eat();
- }
- break;
- }
-
- self.eat();
- }
-
- let mut end = self.index();
- if matched {
- end = ((end as isize) + offset_end) as usize;
- }
-
- (&self.src[start .. end], matched)
- }
-
- fn eat(&mut self) -> Option<char> {
- let c = self.iter.next()?;
- self.index += c.len_utf8();
-
- if is_newline_char(c) && !(c == '\r' && self.peek() == Some('\n')) {
- self.pos.line += 1;
- self.pos.column = 0;
- } else {
- self.pos.column += 1;
- }
-
- Some(c)
- }
-
- fn peek(&mut self) -> Option<char> {
- self.iter.peek().copied()
- }
-}
-
-fn parse_percentage(text: &str) -> Option<f64> {
- if text.ends_with('%') {
- text[.. text.len() - 1].parse::<f64>().ok()
- } else {
- None
- }
-}
-
-/// Whether this character denotes a newline.
-pub fn is_newline_char(character: char) -> bool {
- match character {
- // Line Feed, Vertical Tab, Form Feed, Carriage Return.
- '\x0A' ..= '\x0D' => true,
- // Next Line, Line Separator, Paragraph Separator.
- '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
- _ => false,
- }
-}
-
-/// Whether this word is a valid identifier.
-pub fn is_identifier(string: &str) -> bool {
- fn is_extra_allowed(c: char) -> bool {
- c == '.' || c == '-' || c == '_'
- }
-
- let mut chars = string.chars();
- match chars.next() {
- Some(c) if UnicodeXID::is_xid_start(c) || is_extra_allowed(c) => {}
- _ => return false,
- }
-
- for c in chars {
- match c {
- c if UnicodeXID::is_xid_continue(c) || is_extra_allowed(c) => {}
- _ => return false,
- }
- }
-
- true
-}
-
-#[cfg(test)]
-#[allow(non_snake_case)]
-mod tests {
- use super::super::span::Spanned;
- use super::*;
- use crate::length::Length;
- use crate::syntax::tests::*;
- use Token::{
- BlockComment as BC, Bool, Chain, Hex, Hyphen as Min, Ident as Id,
- LeftBrace as LB, LeftBracket as L, LeftParen as LP, Length as Len,
- LineComment as LC, Number as Num, Plus, RightBrace as RB, RightBracket as R,
- RightParen as RP, Slash, Space as S, Star, Text as T,
- };
-
- fn Str(string: &str, terminated: bool) -> Token {
- Token::Str { string, terminated }
- }
- fn Raw(raw: &str, terminated: bool) -> Token {
- Token::Raw { raw, terminated }
- }
- fn Code<'a>(
- lang: Option<Spanned<&'a str>>,
- raw: &'a str,
- terminated: bool,
- ) -> Token<'a> {
- Token::Code { lang, raw, terminated }
- }
- fn Lang<'a, T: Into<Spanned<&'a str>>>(lang: T) -> Option<Spanned<&'a str>> {
- Some(Into::<Spanned<&str>>::into(lang))
- }
- fn UE(sequence: &str, terminated: bool) -> Token {
- Token::UnicodeEscape { sequence, terminated }
- }
-
- macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} }
- macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} }
- macro_rules! test {
- (@spans=$spans:expr, $mode:expr, $src:expr => $($token:expr),*) => {
- let exp = vec![$(Into::<Spanned<Token>>::into($token)),*];
- let found = Tokens::new($src, $mode).collect::<Vec<_>>();
- check($src, exp, found, $spans);
- }
- }
-
- #[test]
- fn tokenize_whitespace() {
- t!(Body, "" => );
- t!(Body, " " => S(0));
- t!(Body, " " => S(0));
- t!(Body, "\t" => S(0));
- t!(Body, " \t" => S(0));
- t!(Body, "\n" => S(1));
- t!(Body, "\n " => S(1));
- t!(Body, " \n" => S(1));
- t!(Body, " \n " => S(1));
- t!(Body, "\r\n" => S(1));
- t!(Body, " \n\t \n " => S(2));
- t!(Body, "\n\r" => S(2));
- t!(Body, " \r\r\n \x0D" => S(3));
- t!(Body, "a~b" => T("a"), T("\u{00A0}"), T("b"));
- }
-
- #[test]
- fn tokenize_comments() {
- t!(Body, "a // bc\n " => T("a"), S(0), LC(" bc"), S(1));
- t!(Body, "a //a//b\n " => T("a"), S(0), LC("a//b"), S(1));
- t!(Body, "a //a//b\r\n" => T("a"), S(0), LC("a//b"), S(1));
- t!(Body, "a //a//b\n\nhello" => T("a"), S(0), LC("a//b"), S(2), T("hello"));
- t!(Body, "/**/" => BC(""));
- t!(Body, "_/*_/*a*/*/" => Underscore, BC("_/*a*/"));
- t!(Body, "/*/*/" => BC("/*/"));
- t!(Body, "abc*/" => T("abc"), Invalid("*/"));
- t!(Body, "/***/" => BC("*"));
- t!(Body, "/**\\****/*/*/" => BC("*\\***"), Invalid("*/"), Invalid("*/"));
- t!(Body, "/*abc" => BC("abc"));
- }
-
- #[test]
- fn tokenize_body_only_tokens() {
- t!(Body, "_*" => Underscore, Star);
- t!(Body, "***" => Star, Star, Star);
- t!(Body, "[func]*bold*" => L, T("func"), R, Star, T("bold"), Star);
- t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there"));
- t!(Body, "`raw`" => Raw("raw", true));
- t!(Body, "# hi" => Hashtag, S(0), T("hi"));
- t!(Body, "#()" => Hashtag, T("()"));
- t!(Body, "`[func]`" => Raw("[func]", true));
- t!(Body, "`]" => Raw("]", false));
- t!(Body, "\\ " => Backslash, S(0));
- t!(Body, "`\\``" => Raw("\\`", true));
- t!(Body, "``not code`" => Raw("", true), T("not"), S(0), T("code"), Raw("", false));
- t!(Body, "```rust hi```" => Code(Lang("rust"), "hi", true));
- t!(Body, "``` hi`\\``" => Code(None, "hi`\\``", false));
- t!(Body, "```js \r\n document.write(\"go\")" => Code(Lang("js"), " document.write(\"go\")", false));
- t!(Header, "_`" => Invalid("_`"));
- }
-
- #[test]
- fn tokenize_header_only_tokens() {
- t!(Body, "a: b" => T("a:"), S(0), T("b"));
- t!(Body, "c=d, " => T("c=d,"), S(0));
- t!(Header, "(){}:=," => LP, RP, LB, RB, Colon, Equals, Comma);
- t!(Header, "a:b" => Id("a"), Colon, Id("b"));
- t!(Header, "#6ae6dd" => Hex("6ae6dd"));
- t!(Header, "#8A083c" => Hex("8A083c"));
- t!(Header, "a: true, x=1" => Id("a"), Colon, S(0), Bool(true), Comma, S(0),
- Id("x"), Equals, Num(1.0));
- t!(Header, "=3.14" => Equals, Num(3.14));
- t!(Header, "12.3e5" => Num(12.3e5));
- t!(Header, "120%" => Num(1.2));
- t!(Header, "12e4%" => Num(1200.0));
- t!(Header, "__main__" => Id("__main__"));
- t!(Header, ">main" => Invalid(">main"));
- t!(Header, ".func.box" => Id(".func.box"));
- t!(Header, "arg, _b, _1" => Id("arg"), Comma, S(0), Id("_b"), Comma, S(0), Id("_1"));
- t!(Header, "f: arg >> g" => Id("f"), Colon, S(0), Id("arg"), S(0), Chain, S(0), Id("g"));
- t!(Header, "12_pt, 12pt" => Invalid("12_pt"), Comma, S(0), Len(Length::pt(12.0)));
- t!(Header, "1e5in" => Len(Length::inches(100000.0)));
- t!(Header, "2.3cm" => Len(Length::cm(2.3)));
- t!(Header, "12e-3in" => Len(Length::inches(12e-3)));
- t!(Header, "6.1cm + 4pt,a=1*2" => Len(Length::cm(6.1)), S(0), Plus, S(0), Len(Length::pt(4.0)),
- Comma, Id("a"), Equals, Num(1.0), Star, Num(2.0));
- t!(Header, "(5 - 1) / 2.1" => LP, Num(5.0), S(0), Min, S(0), Num(1.0), RP,
- S(0), Slash, S(0), Num(2.1));
- t!(Header, "-1" => Min, Num(1.0));
- t!(Header, "--1" => Min, Min, Num(1.0));
- t!(Header, "- 1" => Min, S(0), Num(1.0));
- t!(Header, "02.4mm" => Len(Length::mm(2.4)));
- t!(Header, "2.4.cm" => Invalid("2.4.cm"));
- t!(Header, "(1,2)" => LP, Num(1.0), Comma, Num(2.0), RP);
- t!(Header, "{abc}" => LB, Id("abc"), RB);
- t!(Header, "πŸŒ“, 🌍," => Invalid("πŸŒ“"), Comma, S(0), Invalid("🌍"), Comma);
- }
-
- #[test]
- fn tokenize_strings() {
- t!(Body, "a \"hi\" string" => T("a"), S(0), T("\"hi\""), S(0), T("string"));
- t!(Header, "\"hello" => Str("hello", false));
- t!(Header, "\"hello world\"" => Str("hello world", true));
- t!(Header, "\"hello\nworld\"" => Str("hello\nworld", true));
- t!(Header, r#"1"hello\nworld"false"# => Num(1.0), Str("hello\\nworld", true), Bool(false));
- t!(Header, r#""a\"bc""# => Str(r#"a\"bc"#, true));
- t!(Header, r#""a\\"bc""# => Str(r#"a\\"#, true), Id("bc"), Str("", false));
- t!(Header, r#""a\tbc"# => Str("a\\tbc", false));
- t!(Header, "\"🌎\"" => Str("🌎", true));
- }
-
- #[test]
- fn tokenize_escaped_symbols() {
- t!(Body, r"\\" => T(r"\"));
- t!(Body, r"\[" => T("["));
- t!(Body, r"\]" => T("]"));
- t!(Body, r"\*" => T("*"));
- t!(Body, r"\_" => T("_"));
- t!(Body, r"\`" => T("`"));
- t!(Body, r"\/" => T("/"));
- t!(Body, r"\u{2603}" => UE("2603", true));
- t!(Body, r"\u{26A4" => UE("26A4", false));
- t!(Body, r#"\""# => T("\""));
- }
-
- #[test]
- fn tokenize_unescapable_symbols() {
- t!(Body, r"\a" => T("\\"), T("a"));
- t!(Body, r"\:" => T(r"\"), T(":"));
- t!(Body, r"\=" => T(r"\"), T("="));
- t!(Body, r"\u{2GA4"=> UE("2", false), T("GA4"));
- t!(Body, r"\u{ " => UE("", false), Space(0));
- t!(Body, r"\u" => T(r"\u"));
- t!(Header, r"\\\\" => Invalid(r"\\\\"));
- t!(Header, r"\a" => Invalid(r"\a"));
- t!(Header, r"\:" => Invalid(r"\"), Colon);
- t!(Header, r"\=" => Invalid(r"\"), Equals);
- t!(Header, r"\," => Invalid(r"\"), Comma);
- }
-
- #[test]
- fn tokenize_with_spans() {
- ts!(Body, "hello" => s(0,0, 0,5, T("hello")));
- ts!(Body, "ab\r\nc" => s(0,0, 0,2, T("ab")), s(0,2, 1,0, S(1)), s(1,0, 1,1, T("c")));
- ts!(Body, "// ab\r\n\nf" => s(0,0, 0,5, LC(" ab")), s(0,5, 2,0, S(2)), s(2,0, 2,1, T("f")));
- ts!(Body, "/*b*/_" => s(0,0, 0,5, BC("b")), s(0,5, 0,6, Underscore));
- ts!(Header, "a=10" => s(0,0, 0,1, Id("a")), s(0,1, 0,2, Equals), s(0,2, 0,4, Num(10.0)));
- }
-}
diff --git a/src/syntax/tree.rs b/src/syntax/tree.rs
index 715db109..f243e67a 100644
--- a/src/syntax/tree.rs
+++ b/src/syntax/tree.rs
@@ -4,12 +4,12 @@ use std::fmt::{self, Debug, Formatter};
use super::decoration::Decoration;
use super::span::{SpanVec, Spanned};
-use super::tokens::is_identifier;
use crate::color::RgbaColor;
use crate::compute::table::{SpannedEntry, Table};
use crate::compute::value::{TableValue, Value};
use crate::layout::LayoutContext;
use crate::length::Length;
+use crate::parse::is_identifier;
use crate::{DynFuture, Feedback};
/// A collection of nodes which form a tree together with the nodes' children.