diff options
Diffstat (limited to 'src/syntax')
| -rw-r--r-- | src/syntax/mod.rs | 57 | ||||
| -rw-r--r-- | src/syntax/parsing/escaping.rs | 243 | ||||
| -rw-r--r-- | src/syntax/parsing/mod.rs | 9 | ||||
| -rw-r--r-- | src/syntax/parsing/parser.rs | 660 | ||||
| -rw-r--r-- | src/syntax/parsing/tests.rs | 509 | ||||
| -rw-r--r-- | src/syntax/span.rs | 12 | ||||
| -rw-r--r-- | src/syntax/token.rs | 152 | ||||
| -rw-r--r-- | src/syntax/tokens.rs | 786 | ||||
| -rw-r--r-- | src/syntax/tree.rs | 2 |
9 files changed, 168 insertions, 2262 deletions
diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 70935e79..1b9f8ba8 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -1,50 +1,11 @@ -//! Syntax trees, parsing and tokenization. +//! Syntax types. -pub mod decoration; -pub mod parsing; -pub mod span; -pub mod tokens; -pub mod tree; +mod decoration; +mod span; +mod token; +mod tree; -#[cfg(test)] -mod tests { - use super::span; - use crate::prelude::*; - use std::fmt::Debug; - - /// Assert that expected and found are equal, printing both and panicking - /// and the source of their test case if they aren't. - /// - /// When `cmp_spans` is false, spans are ignored. - pub fn check<T>(src: &str, exp: T, found: T, cmp_spans: bool) - where - T: Debug + PartialEq, - { - span::set_cmp(cmp_spans); - let equal = exp == found; - span::set_cmp(true); - - if !equal { - println!("source: {:?}", src); - if cmp_spans { - println!("expected: {:#?}", exp); - println!("found: {:#?}", found); - } else { - println!("expected: {:?}", exp); - println!("found: {:?}", found); - } - panic!("test failed"); - } - } - - pub fn s<T>(sl: usize, sc: usize, el: usize, ec: usize, v: T) -> Spanned<T> { - Spanned::new(v, Span::new(Pos::new(sl, sc), Pos::new(el, ec))) - } - - // Enables tests to optionally specify spans. - impl<T> From<T> for Spanned<T> { - fn from(t: T) -> Self { - Spanned::zero(t) - } - } -} +pub use decoration::*; +pub use span::*; +pub use token::*; +pub use tree::*; diff --git a/src/syntax/parsing/escaping.rs b/src/syntax/parsing/escaping.rs deleted file mode 100644 index 5f06388e..00000000 --- a/src/syntax/parsing/escaping.rs +++ /dev/null @@ -1,243 +0,0 @@ -use crate::syntax::tokens::is_newline_char; - -/// Resolves all escape sequences in a string. -pub fn unescape_string(string: &str) -> String { - let mut iter = string.chars().peekable(); - let mut out = String::with_capacity(string.len()); - - while let Some(c) = iter.next() { - if c == '\\' { - match iter.next() { - Some('\\') => out.push('\\'), - Some('"') => out.push('"'), - Some('u') if iter.peek() == Some(&'{') => { - iter.next(); - - let mut sequence = String::new(); - let terminated = loop { - match iter.peek() { - // TODO: Feedback that closing brace is missing. - Some('}') => { - iter.next(); - break true; - } - Some(&c) if c.is_ascii_hexdigit() => { - iter.next(); - sequence.push(c); - } - _ => break false, - } - }; - - // TODO: Feedback that escape sequence is wrong. - if let Some(c) = hex_to_char(&sequence) { - out.push(c); - } else { - out.push_str("\\u{"); - out.push_str(&sequence); - if terminated { - out.push('}'); - } - } - } - Some('n') => out.push('\n'), - Some('t') => out.push('\t'), - Some(c) => { - out.push('\\'); - out.push(c); - } - None => out.push('\\'), - } - } else { - out.push(c); - } - } - - out -} - -/// Resolves all escape sequences in raw markup (between backticks) and splits it into -/// into lines. -pub fn unescape_raw(raw: &str) -> Vec<String> { - let mut iter = raw.chars(); - let mut text = String::new(); - - while let Some(c) = iter.next() { - if c == '\\' { - if let Some(c) = iter.next() { - if c != '\\' && c != '`' { - text.push('\\'); - } - - text.push(c); - } else { - text.push('\\'); - } - } else { - text.push(c); - } - } - - split_lines(&text) -} - -/// Resolves all escape sequences in code markup (between triple backticks) and splits it -/// into into lines. -pub fn unescape_code(raw: &str) -> Vec<String> { - let mut iter = raw.chars().peekable(); - let mut text = String::new(); - let mut backticks = 0u32; - let mut update_backtick_count; - - while let Some(c) = iter.next() { - update_backtick_count = true; - - if c == '\\' && backticks > 0 { - let mut tail = String::new(); - let mut escape_success = false; - let mut backticks_after_slash = 0u32; - - while let Some(&s) = iter.peek() { - match s { - '\\' => { - if backticks_after_slash == 0 { - tail.push('\\'); - } else { - // Pattern like `\`\` should fail - // escape and just be printed verbantim. - break; - } - } - '`' => { - tail.push(s); - backticks_after_slash += 1; - if backticks_after_slash == 2 { - escape_success = true; - iter.next(); - break; - } - } - _ => break, - } - - iter.next(); - } - - if !escape_success { - text.push(c); - backticks = backticks_after_slash; - update_backtick_count = false; - } else { - backticks = 0; - } - - text.push_str(&tail); - } else { - text.push(c); - } - - if update_backtick_count { - if c == '`' { - backticks += 1; - } else { - backticks = 0; - } - } - } - - split_lines(&text) -} - -/// Converts a hexademical sequence (without braces or "\u") into a character. -pub fn hex_to_char(sequence: &str) -> Option<char> { - u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32) -} - -/// Splits a string into a vector of lines (respecting Unicode & Windows line breaks). -pub fn split_lines(text: &str) -> Vec<String> { - let mut iter = text.chars().peekable(); - let mut line = String::new(); - let mut lines = Vec::new(); - - while let Some(c) = iter.next() { - if is_newline_char(c) { - if c == '\r' && iter.peek() == Some(&'\n') { - iter.next(); - } - - lines.push(std::mem::take(&mut line)); - } else { - line.push(c); - } - } - - lines.push(line); - lines -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - #[rustfmt::skip] - fn test_unescape_strings() { - fn test(string: &str, expected: &str) { - assert_eq!(unescape_string(string), expected.to_string()); - } - - test(r#"hello world"#, "hello world"); - test(r#"hello\nworld"#, "hello\nworld"); - test(r#"a\"bc"#, "a\"bc"); - test(r#"a\u{2603}bc"#, "aβbc"); - test(r#"a\u{26c3bg"#, "aπ¦°»g"); - test(r#"av\u{6797"#, "avζ"); - test(r#"a\\"#, "a\\"); - test(r#"a\\\nbc"#, "a\\\nbc"); - test(r#"a\tbc"#, "a\tbc"); - test(r"π", "π"); - test(r"π\", r"π\"); - test(r"\π", r"\π"); - } - - #[test] - #[rustfmt::skip] - fn test_unescape_raws() { - fn test(raw: &str, expected: Vec<&str>) { - assert_eq!(unescape_raw(raw), expected); - } - - test("raw\\`", vec!["raw`"]); - test("raw\\\\`", vec!["raw\\`"]); - test("raw\ntext", vec!["raw", "text"]); - test("a\r\nb", vec!["a", "b"]); - test("a\n\nb", vec!["a", "", "b"]); - test("a\r\x0Bb", vec!["a", "", "b"]); - test("a\r\n\r\nb", vec!["a", "", "b"]); - test("raw\\a", vec!["raw\\a"]); - test("raw\\", vec!["raw\\"]); - } - - #[test] - #[rustfmt::skip] - fn test_unescape_code() { - fn test(raw: &str, expected: Vec<&str>) { - assert_eq!(unescape_code(raw), expected); - } - - test("code\\`", vec!["code\\`"]); - test("code`\\``", vec!["code```"]); - test("code`\\`a", vec!["code`\\`a"]); - test("code``hi`\\``", vec!["code``hi```"]); - test("code`\\\\``", vec!["code`\\``"]); - test("code`\\`\\`go", vec!["code`\\`\\`go"]); - test("code`\\`\\``", vec!["code`\\```"]); - test("code\ntext", vec!["code", "text"]); - test("a\r\nb", vec!["a", "b"]); - test("a\n\nb", vec!["a", "", "b"]); - test("a\r\x0Bb", vec!["a", "", "b"]); - test("a\r\n\r\nb", vec!["a", "", "b"]); - test("code\\a", vec!["code\\a"]); - test("code\\", vec!["code\\"]); - } -} diff --git a/src/syntax/parsing/mod.rs b/src/syntax/parsing/mod.rs deleted file mode 100644 index bf34340f..00000000 --- a/src/syntax/parsing/mod.rs +++ /dev/null @@ -1,9 +0,0 @@ -//! Parsing of source code into syntax trees. - -mod escaping; -mod parser; - -pub use parser::parse; - -#[cfg(test)] -mod tests; diff --git a/src/syntax/parsing/parser.rs b/src/syntax/parsing/parser.rs deleted file mode 100644 index ca41bf13..00000000 --- a/src/syntax/parsing/parser.rs +++ /dev/null @@ -1,660 +0,0 @@ -use std::str::FromStr; - -use super::escaping::*; -use crate::color::RgbaColor; -use crate::compute::table::SpannedEntry; -use crate::syntax::decoration::Decoration; -use crate::syntax::span::{Pos, Span, Spanned}; -use crate::syntax::tokens::{Token, TokenMode, Tokens}; -use crate::syntax::tree::*; -use crate::{Feedback, Pass}; - -/// Parse a string of source code. -pub fn parse(src: &str) -> Pass<SyntaxTree> { - Parser::new(src).parse() -} - -struct Parser<'s> { - tokens: Tokens<'s>, - peeked: Option<Option<Spanned<Token<'s>>>>, - delimiters: Vec<(Pos, Token<'static>)>, - at_block_or_line_start: bool, - feedback: Feedback, -} - -impl<'s> Parser<'s> { - fn new(src: &'s str) -> Self { - Self { - tokens: Tokens::new(src, TokenMode::Body), - peeked: None, - delimiters: vec![], - at_block_or_line_start: true, - feedback: Feedback::new(), - } - } - - fn parse(mut self) -> Pass<SyntaxTree> { - let tree = self.parse_body_contents(); - Pass::new(tree, self.feedback) - } -} - -// Typesetting content. -impl Parser<'_> { - fn parse_body_contents(&mut self) -> SyntaxTree { - let mut tree = SyntaxTree::new(); - - self.at_block_or_line_start = true; - while !self.eof() { - if let Some(node) = self.parse_node() { - tree.push(node); - } - } - - tree - } - - fn parse_node(&mut self) -> Option<Spanned<SyntaxNode>> { - let token = self.peek()?; - let end = Span::at(token.span.end); - - // Set block or line start to false because most nodes have that effect, but - // remember the old value to actually check it for hashtags and because comments - // and spaces want to retain it. - let was_at_block_or_line_start = self.at_block_or_line_start; - self.at_block_or_line_start = false; - - Some(match token.v { - // Starting from two newlines counts as a paragraph break, a single - // newline does not. - Token::Space(n) => { - if n == 0 { - self.at_block_or_line_start = was_at_block_or_line_start; - } else if n >= 1 { - self.at_block_or_line_start = true; - } - - self.with_span(if n >= 2 { - SyntaxNode::Parbreak - } else { - SyntaxNode::Spacing - }) - } - - Token::LineComment(_) | Token::BlockComment(_) => { - self.at_block_or_line_start = was_at_block_or_line_start; - self.eat(); - return None; - } - - Token::LeftBracket => { - let call = self.parse_bracket_call(false); - self.at_block_or_line_start = false; - call.map(SyntaxNode::Call) - } - - Token::Star => self.with_span(SyntaxNode::ToggleBolder), - Token::Underscore => self.with_span(SyntaxNode::ToggleItalic), - Token::Backslash => self.with_span(SyntaxNode::Linebreak), - - Token::Hashtag if was_at_block_or_line_start => { - self.parse_heading().map(SyntaxNode::Heading) - } - - Token::Raw { raw, terminated } => { - if !terminated { - error!(@self.feedback, end, "expected backtick"); - } - self.with_span(SyntaxNode::Raw(unescape_raw(raw))) - } - - Token::Code { lang, raw, terminated } => { - if !terminated { - error!(@self.feedback, end, "expected backticks"); - } - - let lang = lang.and_then(|lang| { - if let Some(ident) = Ident::new(lang.v) { - Some(Spanned::new(ident, lang.span)) - } else { - error!(@self.feedback, lang.span, "invalid identifier"); - None - } - }); - - let mut lines = unescape_code(raw); - let block = lines.len() > 1; - - if lines.last().map(|s| s.is_empty()).unwrap_or(false) { - lines.pop(); - } - - self.with_span(SyntaxNode::Code(Code { lang, lines, block })) - } - - Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())), - Token::Hashtag => self.with_span(SyntaxNode::Text("#".to_string())), - - Token::UnicodeEscape { sequence, terminated } => { - if !terminated { - error!(@self.feedback, end, "expected closing brace"); - } - - if let Some(c) = hex_to_char(sequence) { - self.with_span(SyntaxNode::Text(c.to_string())) - } else { - error!(@self.feedback, token.span, "invalid unicode escape sequence"); - self.eat(); - return None; - } - } - - unexpected => { - error!(@self.feedback, token.span, "unexpected {}", unexpected.name()); - self.eat(); - return None; - } - }) - } - - fn parse_heading(&mut self) -> Spanned<Heading> { - let start = self.pos(); - self.assert(Token::Hashtag); - - let mut level = 0; - while self.peekv() == Some(Token::Hashtag) { - level += 1; - self.eat(); - } - - let span = Span::new(start, self.pos()); - let level = Spanned::new(level, span); - - if level.v > 5 { - warning!( - @self.feedback, level.span, - "section depth larger than 6 has no effect", - ); - } - - self.skip_ws(); - - let mut tree = SyntaxTree::new(); - while !self.eof() && !matches!(self.peekv(), Some(Token::Space(n)) if n >= 1) { - if let Some(node) = self.parse_node() { - tree.push(node); - } - } - - let span = Span::new(start, self.pos()); - Spanned::new(Heading { level, tree }, span) - } -} - -// Function calls. -impl Parser<'_> { - fn parse_bracket_call(&mut self, chained: bool) -> Spanned<CallExpr> { - let before_bracket = self.pos(); - if !chained { - self.start_group(Group::Bracket); - self.tokens.push_mode(TokenMode::Header); - } - - let before_name = self.pos(); - self.start_group(Group::Subheader); - self.skip_ws(); - let name = self.parse_ident().unwrap_or_else(|| { - self.expected_found_or_at("function name", before_name); - Spanned::new(Ident(String::new()), Span::at(before_name)) - }); - - self.skip_ws(); - - let mut args = match self.eatv() { - Some(Token::Colon) => self.parse_table_contents().0, - Some(_) => { - self.expected_at("colon", name.span.end); - while self.eat().is_some() {} - TableExpr::new() - } - None => TableExpr::new(), - }; - - self.end_group(); - self.skip_ws(); - let (has_chained_child, end) = if self.peek().is_some() { - let item = self.parse_bracket_call(true); - let span = item.span; - let t = vec![item.map(SyntaxNode::Call)]; - args.push(SpannedEntry::val(Spanned::new(Expr::Tree(t), span))); - (true, span.end) - } else { - self.tokens.pop_mode(); - (false, self.end_group().end) - }; - - let start = if chained { before_name } else { before_bracket }; - let mut span = Span::new(start, end); - - if self.check(Token::LeftBracket) && !has_chained_child { - self.start_group(Group::Bracket); - self.tokens.push_mode(TokenMode::Body); - - let body = self.parse_body_contents(); - - self.tokens.pop_mode(); - let body_span = self.end_group(); - - let expr = Expr::Tree(body); - args.push(SpannedEntry::val(Spanned::new(expr, body_span))); - span.expand(body_span); - } - - Spanned::new(CallExpr { name, args }, span) - } - - fn parse_paren_call(&mut self, name: Spanned<Ident>) -> Spanned<CallExpr> { - self.start_group(Group::Paren); - let args = self.parse_table_contents().0; - let args_span = self.end_group(); - let span = Span::merge(name.span, args_span); - Spanned::new(CallExpr { name, args }, span) - } -} - -// Tables. -impl Parser<'_> { - fn parse_table_contents(&mut self) -> (TableExpr, bool) { - let mut table = TableExpr::new(); - let mut comma_and_keyless = true; - - while { - self.skip_ws(); - !self.eof() - } { - let (key, val) = if let Some(ident) = self.parse_ident() { - self.skip_ws(); - - match self.peekv() { - Some(Token::Equals) => { - self.eat(); - self.skip_ws(); - if let Some(value) = self.parse_expr() { - (Some(ident), value) - } else { - self.expected("value"); - continue; - } - } - - Some(Token::LeftParen) => { - let call = self.parse_paren_call(ident); - (None, call.map(Expr::Call)) - } - - _ => (None, ident.map(Expr::Ident)), - } - } else if let Some(value) = self.parse_expr() { - (None, value) - } else { - self.expected("value"); - continue; - }; - - let behind = val.span.end; - if let Some(key) = key { - comma_and_keyless = false; - table.insert(key.v.0, SpannedEntry::new(key.span, val)); - self.feedback - .decorations - .push(Spanned::new(Decoration::TableKey, key.span)); - } else { - table.push(SpannedEntry::val(val)); - } - - if { - self.skip_ws(); - self.eof() - } { - break; - } - - self.expect_at(Token::Comma, behind); - comma_and_keyless = false; - } - - let coercable = comma_and_keyless && !table.is_empty(); - (table, coercable) - } -} - -type Binop = fn(Box<Spanned<Expr>>, Box<Spanned<Expr>>) -> Expr; - -// Expressions and values. -impl Parser<'_> { - fn parse_expr(&mut self) -> Option<Spanned<Expr>> { - self.parse_binops("summand", Self::parse_term, |token| match token { - Token::Plus => Some(Expr::Add), - Token::Hyphen => Some(Expr::Sub), - _ => None, - }) - } - - fn parse_term(&mut self) -> Option<Spanned<Expr>> { - self.parse_binops("factor", Self::parse_factor, |token| match token { - Token::Star => Some(Expr::Mul), - Token::Slash => Some(Expr::Div), - _ => None, - }) - } - - /// Parse expression of the form `<operand> (<op> <operand>)*`. - fn parse_binops( - &mut self, - operand_name: &str, - mut parse_operand: impl FnMut(&mut Self) -> Option<Spanned<Expr>>, - mut parse_op: impl FnMut(Token) -> Option<Binop>, - ) -> Option<Spanned<Expr>> { - let mut left = parse_operand(self)?; - - self.skip_ws(); - while let Some(token) = self.peek() { - if let Some(op) = parse_op(token.v) { - self.eat(); - self.skip_ws(); - - if let Some(right) = parse_operand(self) { - let span = Span::merge(left.span, right.span); - let v = op(Box::new(left), Box::new(right)); - left = Spanned::new(v, span); - self.skip_ws(); - continue; - } - - error!( - @self.feedback, Span::merge(left.span, token.span), - "missing right {}", operand_name, - ); - } - break; - } - - Some(left) - } - - fn parse_factor(&mut self) -> Option<Spanned<Expr>> { - if let Some(hyph) = self.check_eat(Token::Hyphen) { - self.skip_ws(); - if let Some(factor) = self.parse_factor() { - let span = Span::merge(hyph.span, factor.span); - Some(Spanned::new(Expr::Neg(Box::new(factor)), span)) - } else { - error!(@self.feedback, hyph.span, "dangling minus"); - None - } - } else { - self.parse_value() - } - } - - fn parse_value(&mut self) -> Option<Spanned<Expr>> { - let Spanned { v: token, span } = self.peek()?; - Some(match token { - // This could be a function call or an identifier. - Token::Ident(id) => { - let name = Spanned::new(Ident(id.to_string()), span); - self.eat(); - self.skip_ws(); - if self.check(Token::LeftParen) { - self.parse_paren_call(name).map(Expr::Call) - } else { - name.map(Expr::Ident) - } - } - - Token::Str { string, terminated } => { - if !terminated { - self.expected_at("quote", span.end); - } - self.with_span(Expr::Str(unescape_string(string))) - } - - Token::Bool(b) => self.with_span(Expr::Bool(b)), - Token::Number(n) => self.with_span(Expr::Number(n)), - Token::Length(s) => self.with_span(Expr::Length(s)), - Token::Hex(s) => { - if let Ok(color) = RgbaColor::from_str(s) { - self.with_span(Expr::Color(color)) - } else { - // Heal color by assuming black. - error!(@self.feedback, span, "invalid color"); - let healed = RgbaColor::new_healed(0, 0, 0, 255); - self.with_span(Expr::Color(healed)) - } - } - - // This could be a table or a parenthesized expression. We parse as - // a table in any case and coerce the table into a value if it is - // coercable (length 1 and no trailing comma). - Token::LeftParen => { - self.start_group(Group::Paren); - let (table, coercable) = self.parse_table_contents(); - let span = self.end_group(); - - let expr = if coercable { - table.into_values().next().expect("table is coercable").val.v - } else { - Expr::Table(table) - }; - - Spanned::new(expr, span) - } - - // This is a content expression. - Token::LeftBrace => { - self.start_group(Group::Brace); - self.tokens.push_mode(TokenMode::Body); - - let tree = self.parse_body_contents(); - - self.tokens.pop_mode(); - let span = self.end_group(); - Spanned::new(Expr::Tree(tree), span) - } - - // This is a bracketed function call. - Token::LeftBracket => { - let call = self.parse_bracket_call(false); - let tree = vec![call.map(SyntaxNode::Call)]; - Spanned::new(Expr::Tree(tree), span) - } - - _ => return None, - }) - } - - fn parse_ident(&mut self) -> Option<Spanned<Ident>> { - self.peek().and_then(|token| match token.v { - Token::Ident(id) => Some(self.with_span(Ident(id.to_string()))), - _ => None, - }) - } -} - -// Error handling. -impl Parser<'_> { - fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool { - if self.check(token) { - self.eat(); - true - } else { - self.expected_at(token.name(), pos); - false - } - } - - fn expected(&mut self, thing: &str) { - if let Some(found) = self.eat() { - error!( - @self.feedback, found.span, - "expected {}, found {}", thing, found.v.name(), - ); - } else { - error!(@self.feedback, Span::at(self.pos()), "expected {}", thing); - } - } - - fn expected_at(&mut self, thing: &str, pos: Pos) { - error!(@self.feedback, Span::at(pos), "expected {}", thing); - } - - fn expected_found_or_at(&mut self, thing: &str, pos: Pos) { - if self.eof() { - self.expected_at(thing, pos) - } else { - self.expected(thing); - } - } -} - -// Parsing primitives. -impl<'s> Parser<'s> { - fn start_group(&mut self, group: Group) { - let start = self.pos(); - if let Some(start_token) = group.start() { - self.assert(start_token); - } - self.delimiters.push((start, group.end())); - } - - fn end_group(&mut self) -> Span { - let peeked = self.peek(); - - let (start, end_token) = self.delimiters.pop().expect("group was not started"); - - if end_token != Token::Chain && peeked != None { - self.delimiters.push((start, end_token)); - assert_eq!(peeked, None, "unfinished group"); - } - - match self.peeked.unwrap() { - Some(token) if token.v == end_token => { - self.peeked = None; - Span::new(start, token.span.end) - } - _ => { - let end = self.pos(); - if end_token != Token::Chain { - error!( - @self.feedback, Span::at(end), - "expected {}", end_token.name(), - ); - } - Span::new(start, end) - } - } - } - - fn skip_ws(&mut self) { - while matches!( - self.peekv(), - Some(Token::Space(_)) | - Some(Token::LineComment(_)) | - Some(Token::BlockComment(_)) - ) { - self.eat(); - } - } - - fn eatv(&mut self) -> Option<Token<'s>> { - self.eat().map(Spanned::value) - } - - fn peekv(&mut self) -> Option<Token<'s>> { - self.peek().map(Spanned::value) - } - - fn assert(&mut self, token: Token<'_>) { - assert!(self.check_eat(token).is_some()); - } - - fn check_eat(&mut self, token: Token<'_>) -> Option<Spanned<Token<'s>>> { - if self.check(token) { self.eat() } else { None } - } - - /// Checks if the next token is of some kind - fn check(&mut self, token: Token<'_>) -> bool { - self.peekv() == Some(token) - } - - fn with_span<T>(&mut self, v: T) -> Spanned<T> { - let span = self.eat().expect("expected token").span; - Spanned::new(v, span) - } - - fn eof(&mut self) -> bool { - self.peek().is_none() - } - - fn eat(&mut self) -> Option<Spanned<Token<'s>>> { - let token = self.peek()?; - self.peeked = None; - Some(token) - } - - fn peek(&mut self) -> Option<Spanned<Token<'s>>> { - let tokens = &mut self.tokens; - let token = (*self.peeked.get_or_insert_with(|| tokens.next()))?; - - // Check for unclosed groups. - if Group::is_delimiter(token.v) { - if self.delimiters.iter().rev().any(|&(_, end)| token.v == end) { - return None; - } - } - - Some(token) - } - - fn pos(&self) -> Pos { - self.peeked - .flatten() - .map(|s| s.span.start) - .unwrap_or_else(|| self.tokens.pos()) - } -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -enum Group { - Paren, - Bracket, - Brace, - Subheader, -} - -impl Group { - fn is_delimiter(token: Token<'_>) -> bool { - matches!( - token, - Token::RightParen | Token::RightBracket | Token::RightBrace | Token::Chain - ) - } - - fn start(self) -> Option<Token<'static>> { - match self { - Self::Paren => Some(Token::LeftParen), - Self::Bracket => Some(Token::LeftBracket), - Self::Brace => Some(Token::LeftBrace), - Self::Subheader => None, - } - } - - fn end(self) -> Token<'static> { - match self { - Self::Paren => Token::RightParen, - Self::Bracket => Token::RightBracket, - Self::Brace => Token::RightBrace, - Self::Subheader => Token::Chain, - } - } -} diff --git a/src/syntax/parsing/tests.rs b/src/syntax/parsing/tests.rs deleted file mode 100644 index 7fdf02ca..00000000 --- a/src/syntax/parsing/tests.rs +++ /dev/null @@ -1,509 +0,0 @@ -#![allow(non_snake_case)] - -use super::parse; -use crate::color::RgbaColor; -use crate::compute::table::SpannedEntry; -use crate::length::Length; -use crate::syntax::decoration::Decoration::*; -use crate::syntax::span::Spanned; -use crate::syntax::tests::*; -use crate::syntax::tree::*; - -// ------------------------------ Construct Syntax Nodes ------------------------------ // - -use SyntaxNode::{ - Linebreak as L, Parbreak as P, Spacing as S, ToggleBolder as B, ToggleItalic as I, -}; - -fn T(text: &str) -> SyntaxNode { - SyntaxNode::Text(text.to_string()) -} - -macro_rules! H { - ($level:expr, $($tts:tt)*) => { - SyntaxNode::Heading(Heading { - level: Spanned::zero($level), - tree: Tree![@$($tts)*], - }) - }; -} - -macro_rules! R { - ($($line:expr),* $(,)?) => { - SyntaxNode::Raw(vec![$($line.to_string()),*]) - }; -} - -macro_rules! C { - ($lang:expr, $($line:expr),* $(,)?) => {{ - let lines = vec![$($line.to_string()) ,*]; - SyntaxNode::Code(Code { - lang: $lang, - block: lines.len() > 1, - lines, - }) - }}; -} - -fn Lang<'a, T: Into<Spanned<&'a str>>>(lang: T) -> Option<Spanned<Ident>> { - Some(Into::<Spanned<&str>>::into(lang).map(|s| Ident(s.to_string()))) -} - -macro_rules! F { - ($($tts:tt)*) => { SyntaxNode::Call(Call!(@$($tts)*)) } -} - -// ------------------------------- Construct Expressions ------------------------------ // - -use Expr::{Bool, Color, Length as Len, Number as Num}; - -fn Id(ident: &str) -> Expr { - Expr::Ident(Ident(ident.to_string())) -} -fn Str(string: &str) -> Expr { - Expr::Str(string.to_string()) -} - -macro_rules! Table { - (@table=$table:expr,) => {}; - (@table=$table:expr, $key:expr => $value:expr $(, $($tts:tt)*)?) => {{ - let key = Into::<Spanned<&str>>::into($key); - let val = Into::<Spanned<Expr>>::into($value); - $table.insert(key.v, SpannedEntry::new(key.span, val)); - Table![@table=$table, $($($tts)*)?]; - }}; - (@table=$table:expr, $value:expr $(, $($tts:tt)*)?) => { - let val = Into::<Spanned<Expr>>::into($value); - $table.push(SpannedEntry::val(val)); - Table![@table=$table, $($($tts)*)?]; - }; - (@$($tts:tt)*) => {{ - #[allow(unused_mut)] - let mut table = TableExpr::new(); - Table![@table=table, $($tts)*]; - table - }}; - ($($tts:tt)*) => { Expr::Table(Table![@$($tts)*]) }; -} - -macro_rules! Tree { - (@$($node:expr),* $(,)?) => { - vec![$(Into::<Spanned<SyntaxNode>>::into($node)),*] - }; - ($($tts:tt)*) => { Expr::Tree(Tree![@$($tts)*]) }; -} - -macro_rules! Call { - (@$name:expr $(; $($tts:tt)*)?) => {{ - let name = Into::<Spanned<&str>>::into($name); - CallExpr { - name: name.map(|n| Ident(n.to_string())), - args: Table![@$($($tts)*)?], - } - }}; - ($($tts:tt)*) => { Expr::Call(Call![@$($tts)*]) }; -} - -fn Neg<T: Into<Spanned<Expr>>>(e1: T) -> Expr { - Expr::Neg(Box::new(e1.into())) -} -fn Add<T: Into<Spanned<Expr>>>(e1: T, e2: T) -> Expr { - Expr::Add(Box::new(e1.into()), Box::new(e2.into())) -} -fn Sub<T: Into<Spanned<Expr>>>(e1: T, e2: T) -> Expr { - Expr::Sub(Box::new(e1.into()), Box::new(e2.into())) -} -fn Mul<T: Into<Spanned<Expr>>>(e1: T, e2: T) -> Expr { - Expr::Mul(Box::new(e1.into()), Box::new(e2.into())) -} -fn Div<T: Into<Spanned<Expr>>>(e1: T, e2: T) -> Expr { - Expr::Div(Box::new(e1.into()), Box::new(e2.into())) -} - -// ------------------------------------ Test Macros ----------------------------------- // - -// Test syntax trees with or without spans. -macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} } -macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} } -macro_rules! test { - (@spans=$spans:expr, $src:expr => $($tts:tt)*) => { - let exp = Tree![@$($tts)*]; - let pass = parse($src); - check($src, exp, pass.output, $spans); - }; -} - -// Test expressions. -macro_rules! v { - ($src:expr => $($tts:tt)*) => { - t!(concat!("[val: ", $src, "]") => F!("val"; $($tts)*)); - } -} - -// Test error messages. -macro_rules! e { - ($src:expr => $($tts:tt)*) => { - let exp = vec![$($tts)*]; - let pass = parse($src); - let found = pass.feedback.diagnostics.iter() - .map(|s| s.as_ref().map(|e| e.message.as_str())) - .collect::<Vec<_>>(); - check($src, exp, found, true); - }; -} - -// Test decorations. -macro_rules! d { - ($src:expr => $($tts:tt)*) => { - let exp = vec![$($tts)*]; - let pass = parse($src); - check($src, exp, pass.feedback.decorations, true); - }; -} - -// --------------------------------------- Tests -------------------------------------- // - -#[test] -fn test_parse_groups() { - e!("[)" => s(0,1, 0,2, "expected function name, found closing paren"), - s(0,2, 0,2, "expected closing bracket")); - - e!("[v:{]}" => s(0,4, 0,4, "expected closing brace"), - s(0,5, 0,6, "unexpected closing brace")); -} - -#[test] -fn test_parse_simple_nodes() { - t!("" => ); - t!("hi" => T("hi")); - t!("*hi" => B, T("hi")); - t!("hi_" => T("hi"), I); - t!("hi you" => T("hi"), S, T("you")); - t!("special~name" => T("special"), T("\u{00A0}"), T("name")); - t!("special\\~name" => T("special"), T("~"), T("name")); - t!("\\u{1f303}" => T("π")); - t!("\n\n\nhello" => P, T("hello")); - t!(r"a\ b" => T("a"), L, S, T("b")); - t!("`py`" => R!["py"]); - t!("`hi\nyou" => R!["hi", "you"]); - e!("`hi\nyou" => s(1,3, 1,3, "expected backtick")); - t!("`hi\\`du`" => R!["hi`du"]); - - ts!("```java out```" => s(0,0, 0,14, C![Lang(s(0,3, 0,7, "java")), "out"])); - t!("``` console.log(\n\"alert\"\n)" => C![None, "console.log(", "\"alert\"", ")"]); - t!("```typst \r\n Typst uses `\\`` to indicate code blocks" => C![ - Lang("typst"), " Typst uses ``` to indicate code blocks" - ]); - - e!("``` hi\nyou" => s(1,3, 1,3, "expected backticks")); - e!("```π hi\nyou```" => s(0,3, 0,4, "invalid identifier")); - e!("\\u{d421c809}" => s(0,0, 0,12, "invalid unicode escape sequence")); - e!("\\u{abc" => s(0,6, 0,6, "expected closing brace")); - t!("π\n\n π" => T("π"), P, T("π")); - - ts!("hi" => s(0,0, 0,2, T("hi"))); - ts!("*Hi*" => s(0,0, 0,1, B), s(0,1, 0,3, T("Hi")), s(0,3, 0,4, B)); - ts!("π\n\n π" => s(0,0, 0,1, T("π")), s(0,1, 2,1, P), s(2,1, 2,2, T("π"))); -} - -#[test] -fn test_parse_comments() { - // In body. - t!("hi// you\nw" => T("hi"), S, T("w")); - t!("first//\n//\nsecond" => T("first"), S, S, T("second")); - t!("first//\n \nsecond" => T("first"), P, T("second")); - t!("first/*\n \n*/second" => T("first"), T("second")); - e!("π\n*/n" => s(1,0, 1,2, "unexpected end of block comment")); - - // In header. - t!("[val:/*12pt*/]" => F!("val")); - t!("[val \n /* \n */:]" => F!("val")); - e!("[val \n /* \n */:]" => ); - e!("[val : 12, /* \n */ 14]" => ); -} - -#[test] -fn test_parse_headings() { - t!("## Hello world!" => H![1, T("Hello"), S, T("world!")]); - - // Handle various whitespace usages. - t!("####Simple" => H![3, T("Simple")]); - t!(" # Whitespace!" => S, H![0, T("Whitespace!")]); - t!(" /* TODO: Improve */ ## Analysis" => S, S, H!(1, T("Analysis"))); - - // Complex heading contents. - t!("Some text [box][### Valuable facts]" => T("Some"), S, T("text"), S, - F!("box"; Tree![H!(2, T("Valuable"), S, T("facts"))]) - ); - t!("### Grandiose stuff [box][Get it \n\n straight]" => H![2, - T("Grandiose"), S, T("stuff"), S, - F!("box"; Tree![T("Get"), S, T("it"), P, T("straight")]) - ]); - t!("###### Multiline \\ headings" => H![5, T("Multiline"), S, L, S, T("headings")]); - - // Things that should not become headings. - t!("\\## Text" => T("#"), T("#"), S, T("Text")); - t!(" ###### # Text" => S, H!(5, T("#"), S, T("Text"))); - t!("I am #1" => T("I"), S, T("am"), S, T("#"), T("1")); - t!("[box][\n] # hi" => F!("box"; Tree![S]), S, T("#"), S, T("hi")); - - // Depth warnings. - e!("########" => s(0,0, 0,8, "section depth larger than 6 has no effect")); -} - -#[test] -fn test_parse_function_names() { - // No closing bracket. - t!("[" => F!("")); - e!("[" => s(0,1, 0,1, "expected function name"), - s(0,1, 0,1, "expected closing bracket")); - - // No name. - e!("[]" => s(0,1, 0,1, "expected function name")); - e!("[\"]" => s(0,1, 0,3, "expected function name, found string"), - s(0,3, 0,3, "expected closing bracket")); - - // A valid name. - t!("[hi]" => F!("hi")); - t!("[ f]" => F!("f")); - - // An invalid name. - e!("[12]" => s(0,1, 0,3, "expected function name, found number")); - e!("[ π]" => s(0,3, 0,4, "expected function name, found invalid token")); -} - -#[test] -fn test_parse_chaining() { - // Things the parser has to make sense of - t!("[hi: (5.0, 2.1 >> you]" => F!("hi"; Table![Num(5.0), Num(2.1)], Tree![F!("you")])); - t!("[box >>][Hi]" => F!("box"; Tree![T("Hi")])); - t!("[box >> pad: 1pt][Hi]" => F!("box"; Tree![ - F!("pad"; Len(Length::pt(1.0)), Tree!(T("Hi"))) - ])); - t!("[bold: 400, >> emph >> sub: 1cm]" => F!("bold"; Num(400.0), Tree![ - F!("emph"; Tree!(F!("sub"; Len(Length::cm(1.0))))) - ])); - - // Errors for unclosed / empty predecessor groups - e!("[hi: (5.0, 2.1 >> you]" => s(0, 15, 0, 15, "expected closing paren")); - e!("[>> abc]" => s(0, 1, 0, 1, "expected function name")); -} - -#[test] -fn test_parse_colon_starting_func_args() { - // Just colon without args. - e!("[val:]" => ); - - // Wrong token. - t!("[val=]" => F!("val")); - e!("[val=]" => s(0,4, 0,4, "expected colon")); - e!("[val/π:$]" => s(0,4, 0,4, "expected colon")); - - // String in invalid header without colon still parsed as string - // Note: No "expected quote" error because not even the string was - // expected. - e!("[val/\"]" => s(0,4, 0,4, "expected colon"), - s(0,7, 0,7, "expected closing bracket")); -} - -#[test] -fn test_parse_function_bodies() { - t!("[val: 1][*Hi*]" => F!("val"; Num(1.0), Tree![B, T("Hi"), B])); - e!(" [val][ */ ]" => s(0,8, 0,10, "unexpected end of block comment")); - - // Raw in body. - t!("[val][`Hi]`" => F!("val"; Tree![R!["Hi]"]])); - e!("[val][`Hi]`" => s(0,11, 0,11, "expected closing bracket")); - - // Crazy. - t!("[v][[v][v][v]]" => F!("v"; Tree![F!("v"; Tree![T("v")]), F!("v")])); - - // Spanned. - ts!(" [box][Oh my]" => - s(0,0, 0,1, S), - s(0,1, 0,13, F!(s(0,2, 0,5, "box"); - s(0,6, 0,13, Tree![ - s(0,7, 0,9, T("Oh")), s(0,9, 0,10, S), s(0,10, 0,12, T("my")) - ]) - )) - ); -} - -#[test] -fn test_parse_values() { - // Simple. - v!("_" => Id("_")); - v!("name" => Id("name")); - v!("Ξ±" => Id("Ξ±")); - v!("\"hi\"" => Str("hi")); - v!("true" => Bool(true)); - v!("false" => Bool(false)); - v!("1.0e-4" => Num(1e-4)); - v!("3.14" => Num(3.14)); - v!("50%" => Num(0.5)); - v!("4.5cm" => Len(Length::cm(4.5))); - v!("12e1pt" => Len(Length::pt(12e1))); - v!("#f7a20500" => Color(RgbaColor::new(0xf7, 0xa2, 0x05, 0x00))); - v!("\"a\n[]\\\"string\"" => Str("a\n[]\"string")); - - // Content. - v!("{_hi_}" => Tree![I, T("hi"), I]); - e!("[val: {_hi_}]" => ); - v!("[hi]" => Tree![F!("hi")]); - e!("[val: [hi]]" => ); - - // Healed colors. - v!("#12345" => Color(RgbaColor::new_healed(0, 0, 0, 0xff))); - e!("[val: #12345]" => s(0,6, 0,12, "invalid color")); - e!("[val: #a5]" => s(0,6, 0,9, "invalid color")); - e!("[val: #14b2ah]" => s(0,6, 0,13, "invalid color")); - e!("[val: #f075ff011]" => s(0,6, 0,16, "invalid color")); - - // Unclosed string. - v!("\"hello" => Str("hello]")); - e!("[val: \"hello]" => s(0,13, 0,13, "expected quote"), - s(0,13, 0,13, "expected closing bracket")); - - // Spanned. - ts!("[val: 1.4]" => s(0,0, 0,10, F!(s(0,1, 0,4, "val"); s(0,6, 0,9, Num(1.4))))); -} - -#[test] -fn test_parse_expressions() { - // Coerced table. - v!("(hi)" => Id("hi")); - - // Operations. - v!("-1" => Neg(Num(1.0))); - v!("-- 1" => Neg(Neg(Num(1.0)))); - v!("3.2in + 6pt" => Add(Len(Length::inches(3.2)), Len(Length::pt(6.0)))); - v!("5 - 0.01" => Sub(Num(5.0), Num(0.01))); - v!("(3mm * 2)" => Mul(Len(Length::mm(3.0)), Num(2.0))); - v!("12e-3cm/1pt" => Div(Len(Length::cm(12e-3)), Len(Length::pt(1.0)))); - - // More complex. - v!("(3.2in + 6pt)*(5/2-1)" => Mul( - Add(Len(Length::inches(3.2)), Len(Length::pt(6.0))), - Sub(Div(Num(5.0), Num(2.0)), Num(1.0)) - )); - v!("(6.3E+2+4* - 3.2pt)/2" => Div( - Add(Num(6.3e2), Mul(Num(4.0), Neg(Len(Length::pt(3.2))))), - Num(2.0) - )); - - // Associativity of multiplication and division. - v!("3/4*5" => Mul(Div(Num(3.0), Num(4.0)), Num(5.0))); - - // Spanned. - ts!("[val: 1 + 3]" => s(0,0, 0,12, F!( - s(0,1, 0,4, "val"); s(0,6, 0,11, Add( - s(0,6, 0,7, Num(1.0)), - s(0,10, 0,11, Num(3.0)), - )) - ))); - - // Span of parenthesized expression contains parens. - ts!("[val: (1)]" => s(0,0, 0,10, F!(s(0,1, 0,4, "val"); s(0,6, 0,9, Num(1.0))))); - - // Invalid expressions. - v!("4pt--" => Len(Length::pt(4.0))); - e!("[val: 4pt--]" => s(0,10, 0,11, "dangling minus"), - s(0,6, 0,10, "missing right summand")); - - v!("3mm+4pt*" => Add(Len(Length::mm(3.0)), Len(Length::pt(4.0)))); - e!("[val: 3mm+4pt*]" => s(0,10, 0,14, "missing right factor")); -} - -#[test] -fn test_parse_tables() { - // Okay. - v!("()" => Table![]); - v!("(false)" => Bool(false)); - v!("(true,)" => Table![Bool(true)]); - v!("(key=val)" => Table!["key" => Id("val")]); - v!("(1, 2)" => Table![Num(1.0), Num(2.0)]); - v!("(1, key=\"value\")" => Table![Num(1.0), "key" => Str("value")]); - - // Decorations. - d!("[val: key=hi]" => s(0,6, 0,9, TableKey)); - d!("[val: (key=hi)]" => s(0,7, 0,10, TableKey)); - d!("[val: f(key=hi)]" => s(0,8, 0,11, TableKey)); - - // Spanned with spacing around keyword arguments. - ts!("[val: \n hi \n = /* //\n */ \"s\n\"]" => s(0,0, 4,2, F!( - s(0,1, 0,4, "val"); s(1,1, 1,3, "hi") => s(3,4, 4,1, Str("s\n")) - ))); - e!("[val: \n hi \n = /* //\n */ \"s\n\"]" => ); -} - -#[test] -fn test_parse_tables_compute_func_calls() { - v!("empty()" => Call!("empty")); - v!("add ( 1 , 2 )" => Call!("add"; Num(1.0), Num(2.0))); - v!("items(\"fire\", #f93a6d)" => Call!("items"; - Str("fire"), Color(RgbaColor::new(0xf9, 0x3a, 0x6d, 0xff)) - )); - - // More complex. - v!("css(1pt, rgb(90, 102, 254), \"solid\")" => Call!( - "css"; - Len(Length::pt(1.0)), - Call!("rgb"; Num(90.0), Num(102.0), Num(254.0)), - Str("solid"), - )); - - // Unclosed. - v!("lang(δΈζ]" => Call!("lang"; Id("δΈζ"))); - e!("[val: lang(δΈζ]" => s(0,13, 0,13, "expected closing paren")); - - // Invalid name. - v!("π (\"abc\", 13e-5)" => Table!(Str("abc"), Num(13.0e-5))); - e!("[val: π (\"abc\", 13e-5)]" => s(0,6, 0,7, "expected value, found invalid token")); -} - -#[test] -fn test_parse_tables_nested() { - v!("(1, ( ab=(), d = (3, 14pt) )), false" => - Table![ - Num(1.0), - Table!( - "ab" => Table![], - "d" => Table!(Num(3.0), Len(Length::pt(14.0))), - ), - ], - Bool(false), - ); -} - -#[test] -fn test_parse_tables_errors() { - // Expected value. - e!("[val: (=)]" => s(0,7, 0,8, "expected value, found equals sign")); - e!("[val: (,)]" => s(0,7, 0,8, "expected value, found comma")); - v!("(\x07 abc,)" => Table![Id("abc")]); - e!("[val: (\x07 abc,)]" => s(0,7, 0,8, "expected value, found invalid token")); - e!("[val: (key=,)]" => s(0,11, 0,12, "expected value, found comma")); - e!("[val: hi,)]" => s(0,9, 0,10, "expected value, found closing paren")); - - // Expected comma. - v!("(true false)" => Table![Bool(true), Bool(false)]); - e!("[val: (true false)]" => s(0,11, 0,11, "expected comma")); - - // Expected closing paren. - e!("[val: (#000]" => s(0,11, 0,11, "expected closing paren")); - e!("[val: (key]" => s(0,10, 0,10, "expected closing paren")); - e!("[val: (key=]" => s(0,11, 0,11, "expected value"), - s(0,11, 0,11, "expected closing paren")); - - // Bad key. - v!("true=you" => Bool(true), Id("you")); - e!("[val: true=you]" => - s(0,10, 0,10, "expected comma"), - s(0,10, 0,11, "expected value, found equals sign")); - - // Unexpected equals sign. - v!("z=y=4" => Num(4.0), "z" => Id("y")); - e!("[val: z=y=4]" => - s(0,9, 0,9, "expected comma"), - s(0,9, 0,10, "expected value, found equals sign")); -} diff --git a/src/syntax/span.rs b/src/syntax/span.rs index 89f773c7..9357c345 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -14,12 +14,6 @@ thread_local! { static CMP_SPANS: Cell<bool> = Cell::new(true); } -/// When set to `false` comparisons with `PartialEq` ignore spans. -#[cfg(test)] -pub(crate) fn set_cmp(cmp: bool) { - CMP_SPANS.with(|cell| cell.set(cmp)); -} - /// Span offsetting. pub trait Offset { /// Offset all spans contained in `Self` by the given position. @@ -132,6 +126,12 @@ impl Span { pub fn expand(&mut self, other: Self) { *self = Self::merge(*self, other) } + + /// When set to `false` comparisons with `PartialEq` ignore spans. + #[cfg(test)] + pub(crate) fn set_cmp(cmp: bool) { + CMP_SPANS.with(|cell| cell.set(cmp)); + } } impl Offset for Span { diff --git a/src/syntax/token.rs b/src/syntax/token.rs new file mode 100644 index 00000000..e91a780c --- /dev/null +++ b/src/syntax/token.rs @@ -0,0 +1,152 @@ +//! Tokenization. + +use super::span::Spanned; +use crate::length::Length; + +/// A minimal semantic entity of source code. +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum Token<'s> { + /// One or more whitespace characters. The contained `usize` denotes the + /// number of newlines that were contained in the whitespace. + Space(usize), + + /// A line comment with inner string contents `//<str>\n`. + LineComment(&'s str), + /// A block comment with inner string contents `/*<str>*/`. The comment + /// can contain nested block comments. + BlockComment(&'s str), + + /// A left bracket starting a function invocation or body: `[`. + LeftBracket, + /// A right bracket ending a function invocation or body: `]`. + RightBracket, + /// A left parenthesis in a function header: `(`. + LeftParen, + /// A right parenthesis in a function header: `)`. + RightParen, + /// A left brace in a function header: `{`. + LeftBrace, + /// A right brace in a function header: `}`. + RightBrace, + /// A double forward chevron in a function header: `>>`. + Chain, + + /// A colon in a function header: `:`. + Colon, + /// A comma in a function header: `,`. + Comma, + /// An equals sign in a function header: `=`. + Equals, + + /// An identifier in a function header: `center`. + Ident(&'s str), + /// A quoted string in a function header: `"..."`. + Str { + /// The string inside the quotes. + /// + /// _Note_: If the string contains escape sequences these are not yet + /// applied to be able to just store a string slice here instead of + /// a String. The escaping is done later in the parser. + string: &'s str, + /// Whether the closing quote was present. + terminated: bool, + }, + /// A boolean in a function header: `true | false`. + Bool(bool), + /// A number in a function header: `3.14`. + Number(f64), + /// A length in a function header: `12pt`. + Length(Length), + /// A hex value in a function header: `#20d82a`. + Hex(&'s str), + /// A plus in a function header, signifying the addition of expressions. + Plus, + /// A hyphen in a function header, signifying the subtraction of + /// expressions. + Hyphen, + /// A slash in a function header, signifying the division of expressions. + Slash, + + /// A star. It can appear in a function header where it signifies the + /// multiplication of expressions or the body where it modifies the styling. + Star, + /// An underscore in body-text. + Underscore, + /// A backslash followed by whitespace in text. + Backslash, + + /// A hashtag token in the body can indicate compute mode or headings. + Hashtag, + + /// A unicode escape sequence. + UnicodeEscape { + /// The escape sequence between two braces. + sequence: &'s str, + /// Whether the closing brace was present. + terminated: bool, + }, + + /// Raw text. + Raw { + /// The raw text (not yet unescaped as for strings). + raw: &'s str, + /// Whether the closing backtick was present. + terminated: bool, + }, + + /// Multi-line code block. + Code { + /// The language of the code block, if specified. + lang: Option<Spanned<&'s str>>, + /// The raw text (not yet unescaped as for strings). + raw: &'s str, + /// Whether the closing backticks were present. + terminated: bool, + }, + + /// Any other consecutive string. + Text(&'s str), + + /// Things that are not valid in the context they appeared in. + Invalid(&'s str), +} + +impl<'s> Token<'s> { + /// The natural-language name for this token for use in error messages. + pub fn name(self) -> &'static str { + match self { + Self::Space(_) => "space", + Self::LineComment(_) => "line comment", + Self::BlockComment(_) => "block comment", + Self::LeftBracket => "opening bracket", + Self::RightBracket => "closing bracket", + Self::LeftParen => "opening paren", + Self::RightParen => "closing paren", + Self::LeftBrace => "opening brace", + Self::RightBrace => "closing brace", + Self::Chain => "function chain operator", + Self::Colon => "colon", + Self::Comma => "comma", + Self::Equals => "equals sign", + Self::Ident(_) => "identifier", + Self::Str { .. } => "string", + Self::Bool(_) => "bool", + Self::Number(_) => "number", + Self::Length(_) => "length", + Self::Hex(_) => "hex value", + Self::Plus => "plus", + Self::Hyphen => "minus", + Self::Slash => "slash", + Self::Star => "star", + Self::Underscore => "underscore", + Self::Backslash => "backslash", + Self::Hashtag => "hashtag", + Self::UnicodeEscape { .. } => "unicode escape sequence", + Self::Raw { .. } => "raw text", + Self::Code { .. } => "code block", + Self::Text(_) => "text", + Self::Invalid("*/") => "end of block comment", + Self::Invalid(_) => "invalid token", + } + } +} diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs deleted file mode 100644 index 0c37e992..00000000 --- a/src/syntax/tokens.rs +++ /dev/null @@ -1,786 +0,0 @@ -//! Tokenization. - -use std::iter::Peekable; -use std::str::Chars; -use unicode_xid::UnicodeXID; - -use super::span::{Pos, Span, Spanned}; -use crate::length::Length; - -use Token::*; -use TokenMode::*; -/// A minimal semantic entity of source code. -#[derive(Debug, Copy, Clone, PartialEq)] -pub enum Token<'s> { - /// One or more whitespace characters. The contained `usize` denotes the - /// number of newlines that were contained in the whitespace. - Space(usize), - - /// A line comment with inner string contents `//<str>\n`. - LineComment(&'s str), - /// A block comment with inner string contents `/*<str>*/`. The comment - /// can contain nested block comments. - BlockComment(&'s str), - - /// A left bracket starting a function invocation or body: `[`. - LeftBracket, - /// A right bracket ending a function invocation or body: `]`. - RightBracket, - /// A left parenthesis in a function header: `(`. - LeftParen, - /// A right parenthesis in a function header: `)`. - RightParen, - /// A left brace in a function header: `{`. - LeftBrace, - /// A right brace in a function header: `}`. - RightBrace, - /// A double forward chevron in a function header: `>>`. - Chain, - - /// A colon in a function header: `:`. - Colon, - /// A comma in a function header: `,`. - Comma, - /// An equals sign in a function header: `=`. - Equals, - - /// An identifier in a function header: `center`. - Ident(&'s str), - /// A quoted string in a function header: `"..."`. - Str { - /// The string inside the quotes. - /// - /// _Note_: If the string contains escape sequences these are not yet - /// applied to be able to just store a string slice here instead of - /// a String. The escaping is done later in the parser. - string: &'s str, - /// Whether the closing quote was present. - terminated: bool, - }, - /// A boolean in a function header: `true | false`. - Bool(bool), - /// A number in a function header: `3.14`. - Number(f64), - /// A length in a function header: `12pt`. - Length(Length), - /// A hex value in a function header: `#20d82a`. - Hex(&'s str), - /// A plus in a function header, signifying the addition of expressions. - Plus, - /// A hyphen in a function header, signifying the subtraction of - /// expressions. - Hyphen, - /// A slash in a function header, signifying the division of expressions. - Slash, - - /// A star. It can appear in a function header where it signifies the - /// multiplication of expressions or the body where it modifies the styling. - Star, - /// An underscore in body-text. - Underscore, - /// A backslash followed by whitespace in text. - Backslash, - - /// A hashtag token in the body can indicate compute mode or headings. - Hashtag, - - /// A unicode escape sequence. - UnicodeEscape { - /// The escape sequence between two braces. - sequence: &'s str, - /// Whether the closing brace was present. - terminated: bool, - }, - - /// Raw text. - Raw { - /// The raw text (not yet unescaped as for strings). - raw: &'s str, - /// Whether the closing backtick was present. - terminated: bool, - }, - - /// Multi-line code block. - Code { - /// The language of the code block, if specified. - lang: Option<Spanned<&'s str>>, - /// The raw text (not yet unescaped as for strings). - raw: &'s str, - /// Whether the closing backticks were present. - terminated: bool, - }, - - /// Any other consecutive string. - Text(&'s str), - - /// Things that are not valid in the context they appeared in. - Invalid(&'s str), -} - -impl<'s> Token<'s> { - /// The natural-language name for this token for use in error messages. - pub fn name(self) -> &'static str { - match self { - Space(_) => "space", - LineComment(_) => "line comment", - BlockComment(_) => "block comment", - LeftBracket => "opening bracket", - RightBracket => "closing bracket", - LeftParen => "opening paren", - RightParen => "closing paren", - LeftBrace => "opening brace", - RightBrace => "closing brace", - Chain => "function chain operator", - Colon => "colon", - Comma => "comma", - Equals => "equals sign", - Ident(_) => "identifier", - Str { .. } => "string", - Bool(_) => "bool", - Number(_) => "number", - Length(_) => "length", - Hex(_) => "hex value", - Plus => "plus", - Hyphen => "minus", - Slash => "slash", - Star => "star", - Underscore => "underscore", - Backslash => "backslash", - Hashtag => "hashtag", - UnicodeEscape { .. } => "unicode escape sequence", - Raw { .. } => "raw text", - Code { .. } => "code block", - Text(_) => "text", - Invalid("*/") => "end of block comment", - Invalid(_) => "invalid token", - } - } -} - -/// An iterator over the tokens of a string of source code. -#[derive(Debug)] -pub struct Tokens<'s> { - src: &'s str, - iter: Peekable<Chars<'s>>, - mode: TokenMode, - stack: Vec<TokenMode>, - pos: Pos, - index: usize, -} - -/// Whether to tokenize in header mode which yields expression, comma and -/// similar tokens or in body mode which yields text and star, underscore, -/// backtick tokens. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub enum TokenMode { - Header, - Body, -} - -impl<'s> Tokens<'s> { - /// Create a new token iterator with the given mode. - pub fn new(src: &'s str, mode: TokenMode) -> Self { - Self { - src, - iter: src.chars().peekable(), - mode, - stack: vec![], - pos: Pos::ZERO, - index: 0, - } - } - - /// Change the token mode and push the old one on a stack. - pub fn push_mode(&mut self, mode: TokenMode) { - self.stack.push(self.mode); - self.mode = mode; - } - - /// Pop the old token mode from the stack. This panics if there is no mode - /// on the stack. - pub fn pop_mode(&mut self) { - self.mode = self.stack.pop().expect("no pushed mode"); - } - - /// The index in the string at which the last token ends and next token will - /// start. - pub fn index(&self) -> usize { - self.index - } - - /// The line-colunn position in the source at which the last token ends and - /// next token will start. - pub fn pos(&self) -> Pos { - self.pos - } -} - -impl<'s> Iterator for Tokens<'s> { - type Item = Spanned<Token<'s>>; - - /// Parse the next token in the source code. - fn next(&mut self) -> Option<Self::Item> { - let start = self.pos(); - let first = self.eat()?; - - let token = match first { - // Comments. - '/' if self.peek() == Some('/') => self.read_line_comment(), - '/' if self.peek() == Some('*') => self.read_block_comment(), - '*' if self.peek() == Some('/') => { - self.eat(); - Invalid("*/") - } - - // Whitespace. - c if c.is_whitespace() => self.read_whitespace(start), - - // Functions and blocks. - '[' => LeftBracket, - ']' => RightBracket, - '{' => LeftBrace, - '}' => RightBrace, - - // Syntactic elements in function headers. - '(' if self.mode == Header => LeftParen, - ')' if self.mode == Header => RightParen, - ':' if self.mode == Header => Colon, - ',' if self.mode == Header => Comma, - '=' if self.mode == Header => Equals, - '>' if self.mode == Header && self.peek() == Some('>') => self.read_chain(), - - // Expression operators. - '+' if self.mode == Header => Plus, - '-' if self.mode == Header => Hyphen, - '/' if self.mode == Header => Slash, - - // Star serves a double purpose as a style modifier - // and a expression operator in the header. - '*' => Star, - - // A hex expression. - '#' if self.mode == Header => self.read_hex(), - - // String values. - '"' if self.mode == Header => self.read_string(), - - // Style toggles. - '_' if self.mode == Body => Underscore, - '`' if self.mode == Body => self.read_raw_or_code(), - - // Sections. - '#' if self.mode == Body => Hashtag, - - // Non-breaking spaces. - '~' if self.mode == Body => Text("\u{00A0}"), - - // An escaped thing. - '\\' if self.mode == Body => self.read_escaped(), - - // Expressions or just strings. - c => { - let body = self.mode == Body; - - let start_offset = -(c.len_utf8() as isize); - let mut last_was_e = false; - - let (text, _) = self.read_string_until(false, start_offset, 0, |n| { - let val = match n { - c if c.is_whitespace() => true, - '[' | ']' | '{' | '}' | '/' | '*' => true, - '\\' | '_' | '`' | '#' | '~' if body => true, - ':' | '=' | ',' | '"' | '(' | ')' if !body => true, - '+' | '-' if !body && !last_was_e => true, - _ => false, - }; - - last_was_e = n == 'e' || n == 'E'; - val - }); - - if self.mode == Header { - self.read_expr(text) - } else { - Text(text) - } - } - }; - - let end = self.pos(); - let span = Span { start, end }; - - Some(Spanned { v: token, span }) - } -} - -impl<'s> Tokens<'s> { - fn read_line_comment(&mut self) -> Token<'s> { - self.eat(); - LineComment(self.read_string_until(false, 0, 0, is_newline_char).0) - } - - fn read_block_comment(&mut self) -> Token<'s> { - enum Last { - Slash, - Star, - Other, - } - - let mut depth = 0; - let mut last = Last::Other; - - // Find the first `*/` that does not correspond to a nested `/*`. - // Remove the last two bytes to obtain the raw inner text without `*/`. - self.eat(); - let (content, _) = self.read_string_until(true, 0, -2, |c| { - match c { - '/' => match last { - Last::Star if depth == 0 => return true, - Last::Star => depth -= 1, - _ => last = Last::Slash, - }, - '*' => match last { - Last::Slash => depth += 1, - _ => last = Last::Star, - }, - _ => last = Last::Other, - } - - false - }); - - BlockComment(content) - } - - fn read_chain(&mut self) -> Token<'s> { - assert!(self.eat() == Some('>')); - Chain - } - - fn read_whitespace(&mut self, start: Pos) -> Token<'s> { - self.read_string_until(false, 0, 0, |n| !n.is_whitespace()); - let end = self.pos(); - - Space(end.line - start.line) - } - - fn read_string(&mut self) -> Token<'s> { - let (string, terminated) = self.read_until_unescaped('"'); - Str { string, terminated } - } - - fn read_raw_or_code(&mut self) -> Token<'s> { - let (raw, terminated) = self.read_until_unescaped('`'); - if raw.is_empty() && terminated && self.peek() == Some('`') { - // Third tick found; this is a code block. - self.eat(); - - // Reads the lang tag (until newline or whitespace). - let start = self.pos(); - let (lang, _) = self.read_string_until(false, 0, 0, |c| { - c == '`' || c.is_whitespace() || is_newline_char(c) - }); - let end = self.pos(); - - let lang = if !lang.is_empty() { - Some(Spanned::new(lang, Span::new(start, end))) - } else { - None - }; - - // Skip to start of raw contents. - while let Some(c) = self.peek() { - if is_newline_char(c) { - self.eat(); - if c == '\r' && self.peek() == Some('\n') { - self.eat(); - } - - break; - } else if c.is_whitespace() { - self.eat(); - } else { - break; - } - } - - let start = self.index(); - let mut backticks = 0u32; - - while backticks < 3 { - match self.eat() { - Some('`') => backticks += 1, - // Escaping of triple backticks. - Some('\\') if backticks == 1 && self.peek() == Some('`') => { - backticks = 0; - } - Some(_) => {} - None => break, - } - } - - let terminated = backticks == 3; - let end = self.index() - if terminated { 3 } else { 0 }; - - Code { - lang, - raw: &self.src[start .. end], - terminated, - } - } else { - Raw { raw, terminated } - } - } - - fn read_until_unescaped(&mut self, end: char) -> (&'s str, bool) { - let mut escaped = false; - self.read_string_until(true, 0, -1, |c| { - match c { - c if c == end && !escaped => return true, - '\\' => escaped = !escaped, - _ => escaped = false, - } - - false - }) - } - - fn read_escaped(&mut self) -> Token<'s> { - fn is_escapable(c: char) -> bool { - match c { - '[' | ']' | '\\' | '/' | '*' | '_' | '`' | '"' | '#' | '~' => true, - _ => false, - } - } - - match self.peek() { - Some('u') => { - self.eat(); - if self.peek() == Some('{') { - self.eat(); - let (sequence, _) = - self.read_string_until(false, 0, 0, |c| !c.is_ascii_hexdigit()); - - let terminated = self.peek() == Some('}'); - if terminated { - self.eat(); - } - - UnicodeEscape { sequence, terminated } - } else { - Text("\\u") - } - } - Some(c) if is_escapable(c) => { - let index = self.index(); - self.eat(); - Text(&self.src[index .. index + c.len_utf8()]) - } - Some(c) if c.is_whitespace() => Backslash, - Some(_) => Text("\\"), - None => Backslash, - } - } - - fn read_hex(&mut self) -> Token<'s> { - // This will parse more than the permissable 0-9, a-f, A-F character - // ranges to provide nicer error messages later. - Hex(self.read_string_until(false, 0, 0, |n| !n.is_ascii_alphanumeric()).0) - } - - fn read_expr(&mut self, text: &'s str) -> Token<'s> { - if let Ok(b) = text.parse::<bool>() { - Bool(b) - } else if let Ok(num) = text.parse::<f64>() { - Number(num) - } else if let Some(num) = parse_percentage(text) { - Number(num / 100.0) - } else if let Ok(length) = text.parse::<Length>() { - Length(length) - } else if is_identifier(text) { - Ident(text) - } else { - Invalid(text) - } - } - - /// Will read the input stream until `f` evaluates to `true`. When - /// `eat_match` is true, the token for which `f` was true is consumed. - /// Returns the string from the index where this was called offset by - /// `offset_start` to the end offset by `offset_end`. The end is before or - /// after the match depending on `eat_match`. - fn read_string_until( - &mut self, - eat_match: bool, - offset_start: isize, - offset_end: isize, - mut f: impl FnMut(char) -> bool, - ) -> (&'s str, bool) { - let start = ((self.index() as isize) + offset_start) as usize; - let mut matched = false; - - while let Some(c) = self.peek() { - if f(c) { - matched = true; - if eat_match { - self.eat(); - } - break; - } - - self.eat(); - } - - let mut end = self.index(); - if matched { - end = ((end as isize) + offset_end) as usize; - } - - (&self.src[start .. end], matched) - } - - fn eat(&mut self) -> Option<char> { - let c = self.iter.next()?; - self.index += c.len_utf8(); - - if is_newline_char(c) && !(c == '\r' && self.peek() == Some('\n')) { - self.pos.line += 1; - self.pos.column = 0; - } else { - self.pos.column += 1; - } - - Some(c) - } - - fn peek(&mut self) -> Option<char> { - self.iter.peek().copied() - } -} - -fn parse_percentage(text: &str) -> Option<f64> { - if text.ends_with('%') { - text[.. text.len() - 1].parse::<f64>().ok() - } else { - None - } -} - -/// Whether this character denotes a newline. -pub fn is_newline_char(character: char) -> bool { - match character { - // Line Feed, Vertical Tab, Form Feed, Carriage Return. - '\x0A' ..= '\x0D' => true, - // Next Line, Line Separator, Paragraph Separator. - '\u{0085}' | '\u{2028}' | '\u{2029}' => true, - _ => false, - } -} - -/// Whether this word is a valid identifier. -pub fn is_identifier(string: &str) -> bool { - fn is_extra_allowed(c: char) -> bool { - c == '.' || c == '-' || c == '_' - } - - let mut chars = string.chars(); - match chars.next() { - Some(c) if UnicodeXID::is_xid_start(c) || is_extra_allowed(c) => {} - _ => return false, - } - - for c in chars { - match c { - c if UnicodeXID::is_xid_continue(c) || is_extra_allowed(c) => {} - _ => return false, - } - } - - true -} - -#[cfg(test)] -#[allow(non_snake_case)] -mod tests { - use super::super::span::Spanned; - use super::*; - use crate::length::Length; - use crate::syntax::tests::*; - use Token::{ - BlockComment as BC, Bool, Chain, Hex, Hyphen as Min, Ident as Id, - LeftBrace as LB, LeftBracket as L, LeftParen as LP, Length as Len, - LineComment as LC, Number as Num, Plus, RightBrace as RB, RightBracket as R, - RightParen as RP, Slash, Space as S, Star, Text as T, - }; - - fn Str(string: &str, terminated: bool) -> Token { - Token::Str { string, terminated } - } - fn Raw(raw: &str, terminated: bool) -> Token { - Token::Raw { raw, terminated } - } - fn Code<'a>( - lang: Option<Spanned<&'a str>>, - raw: &'a str, - terminated: bool, - ) -> Token<'a> { - Token::Code { lang, raw, terminated } - } - fn Lang<'a, T: Into<Spanned<&'a str>>>(lang: T) -> Option<Spanned<&'a str>> { - Some(Into::<Spanned<&str>>::into(lang)) - } - fn UE(sequence: &str, terminated: bool) -> Token { - Token::UnicodeEscape { sequence, terminated } - } - - macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} } - macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} } - macro_rules! test { - (@spans=$spans:expr, $mode:expr, $src:expr => $($token:expr),*) => { - let exp = vec![$(Into::<Spanned<Token>>::into($token)),*]; - let found = Tokens::new($src, $mode).collect::<Vec<_>>(); - check($src, exp, found, $spans); - } - } - - #[test] - fn tokenize_whitespace() { - t!(Body, "" => ); - t!(Body, " " => S(0)); - t!(Body, " " => S(0)); - t!(Body, "\t" => S(0)); - t!(Body, " \t" => S(0)); - t!(Body, "\n" => S(1)); - t!(Body, "\n " => S(1)); - t!(Body, " \n" => S(1)); - t!(Body, " \n " => S(1)); - t!(Body, "\r\n" => S(1)); - t!(Body, " \n\t \n " => S(2)); - t!(Body, "\n\r" => S(2)); - t!(Body, " \r\r\n \x0D" => S(3)); - t!(Body, "a~b" => T("a"), T("\u{00A0}"), T("b")); - } - - #[test] - fn tokenize_comments() { - t!(Body, "a // bc\n " => T("a"), S(0), LC(" bc"), S(1)); - t!(Body, "a //a//b\n " => T("a"), S(0), LC("a//b"), S(1)); - t!(Body, "a //a//b\r\n" => T("a"), S(0), LC("a//b"), S(1)); - t!(Body, "a //a//b\n\nhello" => T("a"), S(0), LC("a//b"), S(2), T("hello")); - t!(Body, "/**/" => BC("")); - t!(Body, "_/*_/*a*/*/" => Underscore, BC("_/*a*/")); - t!(Body, "/*/*/" => BC("/*/")); - t!(Body, "abc*/" => T("abc"), Invalid("*/")); - t!(Body, "/***/" => BC("*")); - t!(Body, "/**\\****/*/*/" => BC("*\\***"), Invalid("*/"), Invalid("*/")); - t!(Body, "/*abc" => BC("abc")); - } - - #[test] - fn tokenize_body_only_tokens() { - t!(Body, "_*" => Underscore, Star); - t!(Body, "***" => Star, Star, Star); - t!(Body, "[func]*bold*" => L, T("func"), R, Star, T("bold"), Star); - t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there")); - t!(Body, "`raw`" => Raw("raw", true)); - t!(Body, "# hi" => Hashtag, S(0), T("hi")); - t!(Body, "#()" => Hashtag, T("()")); - t!(Body, "`[func]`" => Raw("[func]", true)); - t!(Body, "`]" => Raw("]", false)); - t!(Body, "\\ " => Backslash, S(0)); - t!(Body, "`\\``" => Raw("\\`", true)); - t!(Body, "``not code`" => Raw("", true), T("not"), S(0), T("code"), Raw("", false)); - t!(Body, "```rust hi```" => Code(Lang("rust"), "hi", true)); - t!(Body, "``` hi`\\``" => Code(None, "hi`\\``", false)); - t!(Body, "```js \r\n document.write(\"go\")" => Code(Lang("js"), " document.write(\"go\")", false)); - t!(Header, "_`" => Invalid("_`")); - } - - #[test] - fn tokenize_header_only_tokens() { - t!(Body, "a: b" => T("a:"), S(0), T("b")); - t!(Body, "c=d, " => T("c=d,"), S(0)); - t!(Header, "(){}:=," => LP, RP, LB, RB, Colon, Equals, Comma); - t!(Header, "a:b" => Id("a"), Colon, Id("b")); - t!(Header, "#6ae6dd" => Hex("6ae6dd")); - t!(Header, "#8A083c" => Hex("8A083c")); - t!(Header, "a: true, x=1" => Id("a"), Colon, S(0), Bool(true), Comma, S(0), - Id("x"), Equals, Num(1.0)); - t!(Header, "=3.14" => Equals, Num(3.14)); - t!(Header, "12.3e5" => Num(12.3e5)); - t!(Header, "120%" => Num(1.2)); - t!(Header, "12e4%" => Num(1200.0)); - t!(Header, "__main__" => Id("__main__")); - t!(Header, ">main" => Invalid(">main")); - t!(Header, ".func.box" => Id(".func.box")); - t!(Header, "arg, _b, _1" => Id("arg"), Comma, S(0), Id("_b"), Comma, S(0), Id("_1")); - t!(Header, "f: arg >> g" => Id("f"), Colon, S(0), Id("arg"), S(0), Chain, S(0), Id("g")); - t!(Header, "12_pt, 12pt" => Invalid("12_pt"), Comma, S(0), Len(Length::pt(12.0))); - t!(Header, "1e5in" => Len(Length::inches(100000.0))); - t!(Header, "2.3cm" => Len(Length::cm(2.3))); - t!(Header, "12e-3in" => Len(Length::inches(12e-3))); - t!(Header, "6.1cm + 4pt,a=1*2" => Len(Length::cm(6.1)), S(0), Plus, S(0), Len(Length::pt(4.0)), - Comma, Id("a"), Equals, Num(1.0), Star, Num(2.0)); - t!(Header, "(5 - 1) / 2.1" => LP, Num(5.0), S(0), Min, S(0), Num(1.0), RP, - S(0), Slash, S(0), Num(2.1)); - t!(Header, "-1" => Min, Num(1.0)); - t!(Header, "--1" => Min, Min, Num(1.0)); - t!(Header, "- 1" => Min, S(0), Num(1.0)); - t!(Header, "02.4mm" => Len(Length::mm(2.4))); - t!(Header, "2.4.cm" => Invalid("2.4.cm")); - t!(Header, "(1,2)" => LP, Num(1.0), Comma, Num(2.0), RP); - t!(Header, "{abc}" => LB, Id("abc"), RB); - t!(Header, "π, π," => Invalid("π"), Comma, S(0), Invalid("π"), Comma); - } - - #[test] - fn tokenize_strings() { - t!(Body, "a \"hi\" string" => T("a"), S(0), T("\"hi\""), S(0), T("string")); - t!(Header, "\"hello" => Str("hello", false)); - t!(Header, "\"hello world\"" => Str("hello world", true)); - t!(Header, "\"hello\nworld\"" => Str("hello\nworld", true)); - t!(Header, r#"1"hello\nworld"false"# => Num(1.0), Str("hello\\nworld", true), Bool(false)); - t!(Header, r#""a\"bc""# => Str(r#"a\"bc"#, true)); - t!(Header, r#""a\\"bc""# => Str(r#"a\\"#, true), Id("bc"), Str("", false)); - t!(Header, r#""a\tbc"# => Str("a\\tbc", false)); - t!(Header, "\"π\"" => Str("π", true)); - } - - #[test] - fn tokenize_escaped_symbols() { - t!(Body, r"\\" => T(r"\")); - t!(Body, r"\[" => T("[")); - t!(Body, r"\]" => T("]")); - t!(Body, r"\*" => T("*")); - t!(Body, r"\_" => T("_")); - t!(Body, r"\`" => T("`")); - t!(Body, r"\/" => T("/")); - t!(Body, r"\u{2603}" => UE("2603", true)); - t!(Body, r"\u{26A4" => UE("26A4", false)); - t!(Body, r#"\""# => T("\"")); - } - - #[test] - fn tokenize_unescapable_symbols() { - t!(Body, r"\a" => T("\\"), T("a")); - t!(Body, r"\:" => T(r"\"), T(":")); - t!(Body, r"\=" => T(r"\"), T("=")); - t!(Body, r"\u{2GA4"=> UE("2", false), T("GA4")); - t!(Body, r"\u{ " => UE("", false), Space(0)); - t!(Body, r"\u" => T(r"\u")); - t!(Header, r"\\\\" => Invalid(r"\\\\")); - t!(Header, r"\a" => Invalid(r"\a")); - t!(Header, r"\:" => Invalid(r"\"), Colon); - t!(Header, r"\=" => Invalid(r"\"), Equals); - t!(Header, r"\," => Invalid(r"\"), Comma); - } - - #[test] - fn tokenize_with_spans() { - ts!(Body, "hello" => s(0,0, 0,5, T("hello"))); - ts!(Body, "ab\r\nc" => s(0,0, 0,2, T("ab")), s(0,2, 1,0, S(1)), s(1,0, 1,1, T("c"))); - ts!(Body, "// ab\r\n\nf" => s(0,0, 0,5, LC(" ab")), s(0,5, 2,0, S(2)), s(2,0, 2,1, T("f"))); - ts!(Body, "/*b*/_" => s(0,0, 0,5, BC("b")), s(0,5, 0,6, Underscore)); - ts!(Header, "a=10" => s(0,0, 0,1, Id("a")), s(0,1, 0,2, Equals), s(0,2, 0,4, Num(10.0))); - } -} diff --git a/src/syntax/tree.rs b/src/syntax/tree.rs index 715db109..f243e67a 100644 --- a/src/syntax/tree.rs +++ b/src/syntax/tree.rs @@ -4,12 +4,12 @@ use std::fmt::{self, Debug, Formatter}; use super::decoration::Decoration; use super::span::{SpanVec, Spanned}; -use super::tokens::is_identifier; use crate::color::RgbaColor; use crate::compute::table::{SpannedEntry, Table}; use crate::compute::value::{TableValue, Value}; use crate::layout::LayoutContext; use crate::length::Length; +use crate::parse::is_identifier; use crate::{DynFuture, Feedback}; /// A collection of nodes which form a tree together with the nodes' children. |
