summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/func/mod.rs2
-rw-r--r--src/library/mod.rs7
-rw-r--r--src/size.rs2
-rw-r--r--src/style.rs13
-rw-r--r--src/syntax/mod.rs60
-rw-r--r--src/syntax/parsing.rs835
-rw-r--r--src/syntax/span.rs2
-rw-r--r--src/syntax/tokens.rs719
-rw-r--r--tests/parse.rs21
-rw-r--r--tests/parsing/base.rs78
-rw-r--r--tests/parsing/tokens.rs62
11 files changed, 375 insertions, 1426 deletions
diff --git a/src/func/mod.rs b/src/func/mod.rs
index 69f28e00..01c77327 100644
--- a/src/func/mod.rs
+++ b/src/func/mod.rs
@@ -15,7 +15,7 @@ pub mod prelude {
pub use crate::func::{Scope, ParseFunc, LayoutFunc, Command, Commands};
pub use crate::layout::prelude::*;
pub use crate::syntax::{
- parse, ParseContext, ParseResult,
+ ParseContext, ParseResult,
SyntaxTree, FuncCall, FuncArgs, PosArg, KeyArg,
Expression, Ident, ExpressionKind,
Spanned, Span
diff --git a/src/library/mod.rs b/src/library/mod.rs
index 013e9962..92c3c948 100644
--- a/src/library/mod.rs
+++ b/src/library/mod.rs
@@ -297,9 +297,10 @@ function! {
parse!(forbidden: body);
if let Some(name) = args.get_pos_opt::<Ident>()? {
- let flip = args.get_key_opt::<bool>("flip")?
- .unwrap_or(false);
- PageSizeFunc::Paper(Paper::from_name(name.as_str())?, flip)
+ let flip = args.get_key_opt::<bool>("flip")?.unwrap_or(false);
+ let paper = Paper::from_name(name.as_str())
+ .ok_or_else(|| error!(@"invalid paper name: `{}`", name))?;
+ PageSizeFunc::Paper(paper, flip)
} else {
PageSizeFunc::Custom(ExtentMap::new(&mut args, true)?)
}
diff --git a/src/size.rs b/src/size.rs
index 5b84c2ad..a5bc5d7f 100644
--- a/src/size.rs
+++ b/src/size.rs
@@ -72,7 +72,7 @@ impl Size {
impl Display for Size {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
- write!(f, "{}cm", self.to_cm())
+ write!(f, "{}pt", self.points)
}
}
diff --git a/src/style.rs b/src/style.rs
index e552a63d..35de5da1 100644
--- a/src/style.rs
+++ b/src/style.rs
@@ -3,7 +3,6 @@
use toddle::query::{FontFallbackTree, FontVariant, FontStyle, FontWeight};
use crate::size::{Size, Size2D, SizeBox, ValueBox, PSize};
-use crate::syntax::ParseResult;
/// Defines properties of pages and text.
@@ -157,7 +156,7 @@ pub struct Paper {
impl Paper {
/// The paper with the given name.
- pub fn from_name(name: &str) -> ParseResult<Paper> {
+ pub fn from_name(name: &str) -> Option<Paper> {
parse_paper(name)
}
}
@@ -193,11 +192,11 @@ macro_rules! papers {
class: $class,
};)*
- fn parse_paper(paper: &str) -> ParseResult<Paper> {
- Ok(match paper.to_lowercase().as_str() {
- $($($patterns)* => $var,)*
- _ => error!("unknown paper size: `{}`", paper),
- })
+ fn parse_paper(paper: &str) -> Option<Paper> {
+ match paper.to_lowercase().as_str() {
+ $($($patterns)* => Some($var),)*
+ _ => None,
+ }
}
};
}
diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs
index b0cbcafa..10a509d2 100644
--- a/src/syntax/mod.rs
+++ b/src/syntax/mod.rs
@@ -11,48 +11,6 @@ pub_use_mod!(parsing);
pub_use_mod!(span);
-/// A logical unit of the incoming text stream.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum Token<'s> {
- /// One or more whitespace (non-newline) codepoints.
- Space,
- /// A line feed (`\n`, `\r\n` and some more as defined by the Unicode standard).
- Newline,
- /// A left bracket: `[`.
- LeftBracket,
- /// A right bracket: `]`.
- RightBracket,
- /// A colon (`:`) indicating the beginning of function arguments (Function
- /// header only).
- ///
- /// If a colon occurs outside of a function header, it will be tokenized as
- /// [Text](Token::Text), just like the other tokens annotated with
- /// _Header only_.
- Colon,
- /// An equals (`=`) sign assigning a function argument a value (Header only).
- Equals,
- /// A comma (`,`) separating two function arguments (Header only).
- Comma,
- /// Quoted text as a string value (Header only).
- Quoted(&'s str),
- /// An underscore, indicating text in italics (Body only).
- Underscore,
- /// A star, indicating bold text (Body only).
- Star,
- /// A backtick, indicating monospace text (Body only).
- Backtick,
- /// A line comment.
- LineComment(&'s str),
- /// A block comment.
- BlockComment(&'s str),
- /// A star followed by a slash unexpectedly ending a block comment
- /// (the comment was not started before, otherwise a
- /// [BlockComment](Token::BlockComment) would be returned).
- StarSlash,
- /// Any consecutive string which does not contain markup.
- Text(&'s str),
-}
-
/// A tree representation of source code.
#[derive(Debug, PartialEq)]
pub struct SyntaxTree {
@@ -256,11 +214,11 @@ debug_display!(Expression);
pub struct Ident(pub String);
impl Ident {
- pub fn new(string: String) -> ParseResult<Ident> {
- if is_identifier(&string) {
- Ok(Ident(string))
+ pub fn new<S>(ident: S) -> Option<Ident> where S: AsRef<str> + Into<String> {
+ if is_identifier(ident.as_ref()) {
+ Some(Ident(ident.into()))
} else {
- error!("invalid identifier: `{}`", string);
+ None
}
}
@@ -277,20 +235,20 @@ impl Display for Ident {
debug_display!(Ident);
-/// Whether this word is a valid unicode identifier.
+/// Whether this word is a valid identifier.
fn is_identifier(string: &str) -> bool {
let mut chars = string.chars();
match chars.next() {
- Some('-') => (),
- Some(c) if UnicodeXID::is_xid_start(c) => (),
+ Some('-') => {}
+ Some(c) if UnicodeXID::is_xid_start(c) => {}
_ => return false,
}
while let Some(c) = chars.next() {
match c {
- '.' | '-' => (),
- c if UnicodeXID::is_xid_continue(c) => (),
+ '.' | '-' => {}
+ c if UnicodeXID::is_xid_continue(c) => {}
_ => return false,
}
}
diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs
index dc39145a..4a50ef96 100644
--- a/src/syntax/parsing.rs
+++ b/src/syntax/parsing.rs
@@ -1,7 +1,4 @@
-//! Parsing of token streams into syntax trees.
-
use crate::func::Scope;
-use crate::size::Size;
use super::*;
@@ -10,7 +7,7 @@ pub type ParseResult<T> = crate::TypesetResult<T>;
/// Parses source code into a syntax tree given a context.
pub fn parse(src: &str, ctx: ParseContext) -> ParseResult<SyntaxTree> {
- Parser::new(src, ctx).parse()
+ unimplemented!()
}
/// The context for parsing.
@@ -19,833 +16,3 @@ pub struct ParseContext<'a> {
/// The scope containing function definitions.
pub scope: &'a Scope,
}
-
-/// Transforms token streams into syntax trees.
-#[derive(Debug)]
-struct Parser<'s> {
- src: &'s str,
- tokens: PeekableTokens<'s>,
- ctx: ParseContext<'s>,
- tree: SyntaxTree,
- color_tokens: Vec<Spanned<ColorToken>>,
-}
-
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-enum NewlineState {
- /// No newline yet.
- Zero,
- /// We saw one newline with the given span already and are
- /// looking for another.
- One(Span),
- /// We saw at least two newlines and wrote one, thus not
- /// writing another one for more newlines.
- TwoOrMore,
-}
-
-impl<'s> Parser<'s> {
- /// Create a new parser from the source code and the context.
- fn new(src: &'s str, ctx: ParseContext<'s>) -> Parser<'s> {
- Parser {
- src,
- tokens: PeekableTokens::new(tokenize(src)),
- ctx,
- tree: SyntaxTree::new(),
- color_tokens: vec![],
- }
- }
-
- /// Parse the source into a syntax tree.
- fn parse(mut self) -> ParseResult<SyntaxTree> {
- while self.tokens.peek().is_some() {
- self.parse_white()?;
- self.parse_body_part()?;
- }
-
- Ok(self.tree)
- }
-
- /// Parse the next part of the body.
- fn parse_body_part(&mut self) -> ParseResult<()> {
- use Token::*;
-
- if let Some(token) = self.tokens.peek() {
- match token.v {
- // Functions.
- LeftBracket => self.parse_func()?,
- RightBracket => error!("unexpected closing bracket"),
-
- // Modifiers.
- Underscore => self.add_consumed(Node::ToggleItalics, token.span),
- Star => self.add_consumed(Node::ToggleBolder, token.span),
- Backtick => self.add_consumed(Node::ToggleMonospace, token.span),
-
- // Normal text.
- Text(word) => self.add_consumed(Node::Text(word.to_owned()), token.span),
-
- // The rest is handled elsewhere or should not happen, because
- // the tokenizer does not yield these in a body.
- Space | Newline | LineComment(_) | BlockComment(_) |
- Colon | Equals | Comma | Quoted(_) | StarSlash
- => panic!("parse_body_part: unexpected token: {:?}", token),
- }
- }
-
- Ok(())
- }
-
- /// Parse a complete function from the current position.
- fn parse_func(&mut self) -> ParseResult<()> {
- // This should only be called if a left bracket was seen.
- let token = self.tokens.next().expect("parse_func: expected token");
- assert!(token.v == Token::LeftBracket);
-
- self.add_color_token(ColorToken::Bracket, token.span);
-
- let mut span = token.span;
- let name = self.parse_func_name()?;
-
- // Check for arguments
- let args = match self.tokens.next() {
- Some(Spanned { v: Token::RightBracket, span }) => {
- self.add_color_token(ColorToken::Bracket, span);
- FuncArgs::new()
- },
- Some(Spanned { v: Token::Colon, span }) => {
- self.add_color_token(ColorToken::Colon, span);
- self.parse_func_args()?
- }
- _ => error!("expected arguments or closing bracket"),
- };
-
- span.end = self.tokens.get_position();
- let (func, body_span) = self.parse_func_call(name, args)?;
-
- if let Some(body_span) = body_span {
- span.expand(body_span);
- }
-
- // Finally this function is parsed to the end.
- self.add(Node::Func(func), span);
-
- Ok(())
- }
-
- /// Parse a function header.
- fn parse_func_name(&mut self) -> ParseResult<Spanned<Ident>> {
- self.skip_white();
-
- let name = match self.tokens.next() {
- Some(Spanned { v: Token::Text(word), span }) => {
- let ident = Ident::new(word.to_string())?;
- Spanned::new(ident, span)
- }
- _ => error!("expected identifier"),
- };
-
- self.add_color_token(ColorToken::FuncName, name.span);
- self.skip_white();
-
- Ok(name)
- }
-
- /// Parse the arguments to a function.
- fn parse_func_args(&mut self) -> ParseResult<FuncArgs> {
- let mut args = FuncArgs::new();
-
- loop {
- self.skip_white();
-
- match self.parse_func_arg()? {
- Some(DynArg::Pos(arg)) => args.add_pos(arg),
- Some(DynArg::Key(arg)) => args.add_key(arg),
- None => {},
- }
-
- match self.tokens.next() {
- Some(Spanned { v: Token::Comma, span }) => {
- self.add_color_token(ColorToken::Comma, span);
- }
- Some(Spanned { v: Token::RightBracket, span }) => {
- self.add_color_token(ColorToken::Bracket, span);
- break;
- }
- _ => error!("expected comma or closing bracket"),
- }
- }
-
- Ok(args)
- }
-
- /// Parse one argument to a function.
- fn parse_func_arg(&mut self) -> ParseResult<Option<DynArg>> {
- let token = match self.tokens.peek() {
- Some(token) => token,
- None => return Ok(None),
- };
-
- Ok(match token.v {
- Token::Text(name) => {
- self.advance();
- self.skip_white();
-
- Some(match self.tokens.peek() {
- Some(Spanned { v: Token::Equals, span }) => {
- self.advance();
- self.skip_white();
-
- let name = Ident::new(name.to_string())?;
- let key = Spanned::new(name, token.span);
-
- self.add_color_token(ColorToken::KeyArg, key.span);
- self.add_color_token(ColorToken::Equals, span);
-
- let next = self.tokens.next()
- .ok_or_else(|| error!(@"expected expression"))?;
-
- let value = Self::parse_expression(next)?;
-
- self.add_expr_token(&value);
-
- let span = Span::merge(key.span, value.span);
- let arg = KeyArg { key, value };
-
- DynArg::Key(Spanned::new(arg, span))
- }
-
- _ => {
- let expr = Self::parse_expression(token)?;
- self.add_expr_token(&expr);
- DynArg::Pos(expr)
- }
- })
- }
-
- Token::Quoted(_) => {
- self.advance();
- self.skip_white();
-
- self.add_color_token(ColorToken::ExprStr, token.span);
-
- Some(DynArg::Pos(Self::parse_expression(token)?))
- }
-
- _ => None,
- })
- }
-
- /// Parse a function call.
- fn parse_func_call(&mut self, name: Spanned<Ident>, args: FuncArgs)
- -> ParseResult<(FuncCall, Option<Span>)> {
- // Now we want to parse this function dynamically.
- let parser = self
- .ctx
- .scope
- .get_parser(&name.v.0)
- .ok_or_else(|| error!(@"unknown function: `{}`", &name.v))?;
-
- let has_body = self.tokens.peek().map(Spanned::value) == Some(Token::LeftBracket);
-
- // Do the parsing dependent on whether the function has a body.
- Ok(if has_body {
- self.advance();
-
- // Find out the string which makes the body of this function.
- let start_index = self.tokens.string_index();
- let mut start_pos = self.tokens.get_position();
- start_pos.column -= 1;
-
- let (mut end_index, mut end_pos) =
- find_closing_bracket(&self.src[start_index..])
- .ok_or_else(|| error!(@"expected closing bracket"))?;
-
- end_index += start_index;
- end_pos.column += 1;
-
- let span = Span::new(start_pos, end_pos);
-
- // Parse the body.
- let body_string = &self.src[start_index..end_index];
- let body = parser(args, Some(body_string), self.ctx)?;
-
- // Skip to the end of the function in the token stream.
- self.tokens.set_string_index(end_index);
-
- // Now the body should be closed.
- let token = self.tokens.next().expect("parse_func_body: expected token");
- assert!(token.v == Token::RightBracket);
-
- (FuncCall(body), Some(span))
- } else {
- (FuncCall(parser(args, None, self.ctx)?), None)
- })
- }
-
- /// Parse an expression.
- fn parse_expression(token: Spanned<Token>) -> ParseResult<Spanned<Expression>> {
- Ok(Spanned::new(match token.v {
- Token::Quoted(text) => Expression::Str(text.to_owned()),
- Token::Text(text) => {
- if let Ok(b) = text.parse::<bool>() {
- Expression::Bool(b)
- } else if let Ok(num) = text.parse::<f64>() {
- Expression::Num(num)
- } else if let Ok(size) = text.parse::<Size>() {
- Expression::Size(size)
- } else {
- // This loop does not actually loop, but is used for breaking.
- loop {
- if text.ends_with('%') {
- if let Ok(percent) = text[.. text.len()-1].parse::<f64>() {
- break Expression::Num(percent / 100.0);
- }
- }
-
- break Expression::Ident(Ident::new(text.to_string())?);
- }
- }
- }
- _ => error!("expected expression"),
- }, token.span))
- }
-
- /// Parse whitespace (as long as there is any) and skip over comments.
- fn parse_white(&mut self) -> ParseResult<()> {
- let mut state = NewlineState::Zero;
-
- while let Some(token) = self.tokens.peek() {
- match token.v {
- Token::Space => {
- self.advance();
- match state {
- NewlineState::Zero | NewlineState::TwoOrMore => {
- self.add_space(token.span);
- }
- _ => {}
- }
- }
-
- Token::Newline => {
- self.advance();
- match state {
- NewlineState::Zero => state = NewlineState::One(token.span),
- NewlineState::One(span) => {
- self.add(Node::Newline, Span::merge(span, token.span));
- state = NewlineState::TwoOrMore;
- },
- NewlineState::TwoOrMore => self.add_space(token.span),
- }
- }
-
- _ => {
- if let NewlineState::One(span) = state {
- self.add_space(Span::new(span.start, token.span.start));
- }
-
- state = NewlineState::Zero;
- match token.v {
- Token::LineComment(_) | Token::BlockComment(_) => self.advance(),
- Token::StarSlash => error!("unexpected end of block comment"),
- _ => break,
- }
- }
- }
- }
-
- Ok(())
- }
-
- /// Skip over whitespace and comments.
- fn skip_white(&mut self) {
- while let Some(token) = self.tokens.peek() {
- match token.v {
- Token::Space | Token::Newline |
- Token::LineComment(_) | Token::BlockComment(_) => self.advance(),
- _ => break,
- }
- }
- }
-
- /// Advance the iterator by one step.
- fn advance(&mut self) {
- self.tokens.next();
- }
-
- /// Append a node to the tree.
- fn add(&mut self, node: Node, span: Span) {
- self.tree.nodes.push(Spanned::new(node, span));
- }
-
- /// Append a space, merging with a previous space if there is one.
- fn add_space(&mut self, span: Span) {
- match self.tree.nodes.last_mut() {
- Some(ref mut node) if node.v == Node::Space => node.span.expand(span),
- _ => self.add(Node::Space, span),
- }
- }
-
- /// Advance and return the given node.
- fn add_consumed(&mut self, node: Node, span: Span) {
- self.advance();
- self.add(node, span);
- }
-
- /// Add a color token to the list.
- fn add_color_token(&mut self, token: ColorToken, span: Span) {
- self.color_tokens.push(Spanned::new(token, span));
- }
-
- /// Add a color token for an expression.
- fn add_expr_token(&mut self, expr: &Spanned<Expression>) {
- let kind = match expr.v {
- Expression::Bool(_) => ColorToken::ExprBool,
- Expression::Ident(_) => ColorToken::ExprIdent,
- Expression::Num(_) => ColorToken::ExprNumber,
- Expression::Size(_) => ColorToken::ExprSize,
- Expression::Str(_) => ColorToken::ExprStr,
- };
-
- self.add_color_token(kind, expr.span);
- }
-}
-
-/// Find the index of the first unbalanced and unescaped closing bracket.
-fn find_closing_bracket(src: &str) -> Option<(usize, Position)> {
- let mut parens = 0;
- let mut escaped = false;
- let mut line = 1;
- let mut line_start_index = 0;
-
- for (index, c) in src.char_indices() {
- match c {
- '\\' => {
- escaped = !escaped;
- continue;
- }
- c if is_newline_char(c) => {
- line += 1;
- line_start_index = index + c.len_utf8();
- }
- ']' if !escaped && parens == 0 => {
- let position = Position {
- line,
- column: index - line_start_index,
- };
-
- return Some((index, position))
- }
- '[' if !escaped => parens += 1,
- ']' if !escaped => parens -= 1,
- _ => {}
- }
- escaped = false;
- }
- None
-}
-
-/// A peekable iterator for tokens which allows access to the original iterator
-/// inside this module (which is needed by the parser).
-#[derive(Debug, Clone)]
-struct PeekableTokens<'s> {
- tokens: Tokens<'s>,
- peeked: Option<Option<Spanned<Token<'s>>>>,
-}
-
-impl<'s> PeekableTokens<'s> {
- /// Create a new iterator from a string.
- fn new(tokens: Tokens<'s>) -> PeekableTokens<'s> {
- PeekableTokens {
- tokens,
- peeked: None,
- }
- }
-
- /// Peek at the next element.
- fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
- let iter = &mut self.tokens;
- *self.peeked.get_or_insert_with(|| iter.next())
- }
-
- fn get_position(&self) -> Position {
- match self.peeked {
- Some(Some(peeked)) => peeked.span.start,
- _ => self.tokens.get_position(),
- }
- }
-
- fn string_index(&self) -> usize {
- match self.peeked {
- Some(Some(peeked)) => peeked.span.start.line,
- _ => self.tokens.string_index(),
- }
- }
-
- fn set_string_index(&mut self, index: usize) {
- self.tokens.set_string_index(index);
- self.peeked = None;
- }
-}
-
-impl<'s> Iterator for PeekableTokens<'s> {
- type Item = Spanned<Token<'s>>;
-
- fn next(&mut self) -> Option<Self::Item> {
- match self.peeked.take() {
- Some(value) => value,
- None => self.tokens.next(),
- }
- }
-}
-
-
-#[cfg(test)]
-#[allow(non_snake_case)]
-mod tests {
- use crate::func::{Commands, Scope};
- use crate::layout::{LayoutContext, LayoutResult};
- use crate::syntax::*;
- use Node::{Func as F, Newline as N, Space as S};
-
- function! {
- /// A testing function which just parses it's body into a syntax
- /// tree.
- #[derive(Debug)]
- pub struct TreeFn { pub tree: SyntaxTree }
-
- parse(args, body, ctx) {
- args.clear();
- TreeFn {
- tree: parse!(expected: body, ctx)
- }
- }
-
- layout() { vec![] }
- }
-
- impl PartialEq for TreeFn {
- fn eq(&self, other: &TreeFn) -> bool {
- assert_tree_equal(&self.tree, &other.tree);
- true
- }
- }
-
- function! {
- /// A testing function without a body.
- #[derive(Debug, Default, PartialEq)]
- pub struct BodylessFn(Vec<Expression>, Vec<(Ident, Expression)>);
-
- parse(args, body) {
- parse!(forbidden: body);
- BodylessFn(
- args.pos().map(Spanned::value).collect(),
- args.keys().map(|arg| (arg.v.key.v, arg.v.value.v)).collect(),
- )
- }
-
- layout() { vec![] }
- }
-
- mod args {
- use super::*;
- use super::Expression;
- pub use Expression::{Num as N, Size as Z, Bool as B};
-
- pub fn S(string: &str) -> Expression { Expression::Str(string.to_owned()) }
- pub fn I(string: &str) -> Expression {
- Expression::Ident(Ident::new(string.to_owned()).unwrap())
- }
- }
-
- /// Asserts that two syntax trees are equal except for all spans inside them.
- fn assert_tree_equal(a: &SyntaxTree, b: &SyntaxTree) {
- for (x, y) in a.nodes.iter().zip(&b.nodes) {
- if x.v != y.v {
- panic!("trees are not equal: ({:#?}) != ({:#?})", x.v, y.v);
- }
- }
- }
-
- /// Test if the source code parses into the syntax tree.
- fn test(src: &str, tree: SyntaxTree) {
- let ctx = ParseContext {
- scope: &Scope::new(),
- };
- assert_tree_equal(&parse(src, ctx).unwrap(), &tree);
- }
-
- /// Test with a scope containing function definitions.
- fn test_scoped(scope: &Scope, src: &str, tree: SyntaxTree) {
- let ctx = ParseContext { scope };
- assert_tree_equal(&parse(src, ctx).unwrap(), &tree);
- }
-
- /// Test if the source parses into the error.
- fn test_err(src: &str, err: &str) {
- let ctx = ParseContext {
- scope: &Scope::new(),
- };
- assert_eq!(parse(src, ctx).unwrap_err().to_string(), err);
- }
-
- /// Test with a scope if the source parses into the error.
- fn test_err_scoped(scope: &Scope, src: &str, err: &str) {
- let ctx = ParseContext { scope };
- assert_eq!(parse(src, ctx).unwrap_err().to_string(), err);
- }
-
- fn test_color(scope: &Scope, src: &str, tokens: Vec<(usize, usize, ColorToken)>) {
- let ctx = ParseContext { scope };
- let tree = parse(src, ctx).unwrap();
- // assert_eq!(tree.tokens,
- // tokens.into_iter()
- // .map(|(s, e, t)| Spanned::new(t, Span::new(s, e)))
- // .collect::<Vec<_>>()
- // );
- }
-
- /// Create a text node.
- fn T(s: &str) -> Node {
- Node::Text(s.to_owned())
- }
-
- fn zerospan<T>(val: T) -> Spanned<T> {
- Spanned::new(val, Span::new(Position::new(0, 0), Position::new(0, 0)))
- }
-
- /// Shortcut macro to create a syntax tree. Is `vec`-like and the elements
- /// are the nodes without spans.
- macro_rules! tree {
- ($($x:expr),*) => ({
- #[allow(unused_mut)] let mut nodes = vec![];
- $(
- nodes.push(zerospan($x));
- )*
- SyntaxTree { nodes }
- });
- ($($x:expr,)*) => (tree![$($x),*])
- }
-
- /// Shortcut macro to create a function.
- macro_rules! func {
- () => (
- FuncCall(Box::new(BodylessFn(vec![], vec![])))
- );
- (body: $tree:expr $(,)*) => (
- FuncCall(Box::new(TreeFn { tree: $tree }))
- );
- (args: $pos:expr, $key:expr) => (
- FuncCall(Box::new(BodylessFn($pos, $key)))
- );
- }
-
- /// Parse the basic cases.
- #[test]
- #[rustfmt::skip]
- fn parse_base() {
- test("", tree! []);
- test("Hello World!", tree! [ T("Hello"), S, T("World!") ]);
- }
-
- /// Test whether newlines generate the correct whitespace.
- #[test]
- #[rustfmt::skip]
- fn parse_newlines_whitespace() {
- test("Hello\nWorld", tree! [ T("Hello"), S, T("World") ]);
- test("Hello \n World", tree! [ T("Hello"), S, T("World") ]);
- test("Hello\n\nWorld", tree! [ T("Hello"), N, T("World") ]);
- test("Hello \n\nWorld", tree! [ T("Hello"), S, N, T("World") ]);
- test("Hello\n\n World", tree! [ T("Hello"), N, S, T("World") ]);
- test("Hello \n \n \n World", tree! [ T("Hello"), S, N, S, T("World") ]);
- test("Hello\n \n\n World", tree! [ T("Hello"), N, S, T("World") ]);
- test("Hello\n \nWorld", tree! [ T("Hello"), N, T("World") ]);
- }
-
- /// Parse things dealing with functions.
- #[test]
- #[rustfmt::skip]
- fn parse_functions() {
- let mut scope = Scope::new();
- scope.add::<BodylessFn>("test");
- scope.add::<BodylessFn>("end");
- scope.add::<TreeFn>("modifier");
- scope.add::<TreeFn>("func");
-
- test_scoped(&scope,"[test]", tree! [ F(func! {}) ]);
- test_scoped(&scope,"[ test]", tree! [ F(func! {}) ]);
- test_scoped(&scope, "This is an [modifier][example] of a function invocation.", tree! [
- T("This"), S, T("is"), S, T("an"), S,
- F(func! { body: tree! [ T("example") ] }), S,
- T("of"), S, T("a"), S, T("function"), S, T("invocation.")
- ]);
- test_scoped(&scope, "[func][Hello][modifier][Here][end]", tree! [
- F(func! { body: tree! [ T("Hello") ] }),
- F(func! { body: tree! [ T("Here") ] }),
- F(func! {}),
- ]);
- test_scoped(&scope, "[func][]", tree! [ F(func! { body: tree! [] }) ]);
- test_scoped(&scope, "[modifier][[func][call]] outside", tree! [
- F(func! { body: tree! [ F(func! { body: tree! [ T("call") ] }) ] }), S, T("outside")
- ]);
-
- }
-
- /// Parse functions with arguments.
- #[test]
- #[rustfmt::skip]
- fn parse_function_args() {
- use args::*;
-
- fn func(
- pos: Vec<Expression>,
- key: Vec<(&str, Expression)>,
- ) -> SyntaxTree {
- let key = key.into_iter()
- .map(|s| (Ident::new(s.0.to_string()).unwrap(), s.1))
- .collect();
-
- tree! [ F(func!(args: pos, key)) ]
- }
-
- let mut scope = Scope::new();
- scope.add::<BodylessFn>("align");
-
- test_scoped(&scope, "[align: left]", func(vec![I("left")], vec![]));
- test_scoped(&scope, "[align: left,right]", func(vec![I("left"), I("right")], vec![]));
- test_scoped(&scope, "[align: left, right]", func(vec![I("left"), I("right")], vec![]));
- test_scoped(&scope, "[align: \"hello\"]", func(vec![S("hello")], vec![]));
- test_scoped(&scope, r#"[align: "hello\"world"]"#, func(vec![S(r#"hello\"world"#)], vec![]));
- test_scoped(&scope, "[align: 12]", func(vec![N(12.0)], vec![]));
- test_scoped(&scope, "[align: 17.53pt]", func(vec![Z(Size::pt(17.53))], vec![]));
- test_scoped(&scope, "[align: 2.4in]", func(vec![Z(Size::inches(2.4))], vec![]));
- test_scoped(&scope, "[align: true, 10mm, left, \"hi, there\"]",
- func(vec![B(true), Z(Size::mm(10.0)), I("left"), S("hi, there")], vec![]));
-
- test_scoped(&scope, "[align: right=true]", func(vec![], vec![("right", B(true))]));
- test_scoped(&scope, "[align: flow = horizontal]",
- func(vec![], vec![("flow", I("horizontal"))]));
- test_scoped(&scope, "[align: x=1cm, y=20mm]",
- func(vec![], vec![("x", Z(Size::cm(1.0))), ("y", Z(Size::mm(20.0)))]));
- test_scoped(&scope, "[align: x=5.14,a, \"b\", c=me,d=you]",
- func(vec![I("a"), S("b")], vec![("x", N(5.14)), ("c", I("me")), ("d", I("you"))]));
- }
-
- /// Parse comments (line and block).
- #[test]
- #[rustfmt::skip]
- fn parse_comments() {
- let mut scope = Scope::new();
- scope.add::<BodylessFn>("test");
- scope.add::<TreeFn>("func");
-
- test_scoped(&scope, "Text\n// Comment\n More text",
- tree! [ T("Text"), S, T("More"), S, T("text") ]);
- test_scoped(&scope, "[test/*world*/]",
- tree! [ F(func! {}) ]);
- test_scoped(&scope, "[test/*]*/]",
- tree! [ F(func! {}) ]);
- }
-
- /// Test if escaped, but unbalanced parens are correctly parsed.
- #[test]
- #[rustfmt::skip]
- fn parse_unbalanced_body_parens() {
- let mut scope = Scope::new();
- scope.add::<TreeFn>("code");
-
- test_scoped(&scope, r"My [code][Close \]] end", tree! [
- T("My"), S, F(func! { body: tree! [ T("Close"), S, T("]") ] }), S, T("end")
- ]);
- test_scoped(&scope, r"My [code][\[ Open] end", tree! [
- T("My"), S, F(func! { body: tree! [ T("["), S, T("Open") ] }), S, T("end")
- ]);
- test_scoped(&scope, r"My [code][Open \] and \[ close]end", tree! [
- T("My"), S, F(func! { body:
- tree! [ T("Open"), S, T("]"), S, T("and"), S, T("["), S, T("close") ]
- }), T("end")
- ]);
- }
-
- /// Tests if the parser handles non-ASCII stuff correctly.
- #[test]
- #[rustfmt::skip]
- fn parse_unicode() {
- let mut scope = Scope::new();
- scope.add::<BodylessFn>("func");
- scope.add::<TreeFn>("bold");
-
- test_scoped(&scope, "[func] ⺐.", tree! [ F(func! {}), S, T("⺐.") ]);
- test_scoped(&scope, "[bold][Hello 🌍!]", tree! [
- F(func! { body: tree! [ T("Hello"), S, T("🌍!") ] })
- ]);
- }
-
- /// Tests whether spans get calculated correctly.
- #[test]
- #[rustfmt::skip]
- fn parse_spans() {
- fn test_span(src: &str, correct: Vec<(usize, usize, usize, usize)>) {
- let mut scope = Scope::new();
- scope.add::<TreeFn>("hello");
- let tree = parse(src, ParseContext { scope: &scope }).unwrap();
- let spans = tree.nodes.into_iter()
- .map(|node| {
- let Span { start, end } = node.span;
- (start.line, start.column, end.line, end.column)
- })
- .collect::<Vec<_>>();
-
- assert_eq!(spans, correct);
- }
-
- test_span("hello world", vec![(1, 0, 1, 5), (1, 5, 1, 6), (1, 6, 1, 11)]);
- test_span("p1\n \np2", vec![(1, 0, 1, 2), (1, 2, 2, 2), (3, 0, 3, 2)]);
-
- let src = "func\n [hello: pos, other][body\r\n _🌍_\n]";
- test_span(src, vec![
- (1, 0, 1, 4),
- (1, 4, 2, 1),
- (2, 1, 4, 1)
- ]);
- }
-
- /// Tests whether errors get reported correctly.
- #[test]
- #[rustfmt::skip]
- fn parse_errors() {
- let mut scope = Scope::new();
- scope.add::<TreeFn>("hello");
-
- test_err("No functions here]", "unexpected closing bracket");
- test_err_scoped(&scope, "[hello][world", "expected closing bracket");
- test_err("[hello world", "expected arguments or closing bracket");
- test_err("[ no^name][Why?]", "invalid identifier: `no^name`");
- test_err("Hello */", "unexpected end of block comment");
- }
-
- /// Tests syntax highlighting.
- #[test]
- #[rustfmt::skip]
- fn test_highlighting() {
- use ColorToken::{Bracket as B, FuncName as F, *};
-
- let mut scope = Scope::new();
- scope.add::<BodylessFn>("func");
- scope.add::<TreeFn>("tree");
-
- test_color(&scope, "[func]", vec![(0, 1, B), (1, 5, F), (5, 6, B)]);
- test_color(&scope, "[func: 12pt]", vec![
- (0, 1, B), (1, 5, F), (5, 6, Colon), (7, 11, ExprSize), (11, 12, B)
- ]);
- test_color(&scope, "[func: x=25.3, y=\"hi\"]", vec![
- (0, 1, B), (1, 5, F), (5, 6, Colon),
- (7, 8, KeyArg), (8, 9, Equals), (9, 13, ExprNumber),
- (13, 14, Comma),
- (15, 16, KeyArg), (16, 17, Equals), (17, 21, ExprStr),
- (21, 22, B),
- ]);
-
- test_color(&scope, "Hello [tree][With [func: 3]]", vec![
- (6, 7, B), (7, 11, F), (11, 12, B),
- (12, 13, B), (18, 19, B)
- ]);
- }
-}
diff --git a/src/syntax/span.rs b/src/syntax/span.rs
index bc7001a9..bbb6a206 100644
--- a/src/syntax/span.rs
+++ b/src/syntax/span.rs
@@ -45,8 +45,6 @@ impl Span {
}
pub fn merge(a: Span, b: Span) -> Span {
- let start = a.start.min(b.start);
-
Span {
start: a.start.min(b.start),
end: a.end.max(b.end),
diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs
index cf37fe48..efcd1fc0 100644
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@@ -1,88 +1,87 @@
-//! Tokenization of source code.
-
-use std::str::CharIndices;
-use smallvec::SmallVec;
+use std::iter::Peekable;
+use std::str::Chars;
use super::*;
+use Token::*;
+use State::*;
-/// Builds an iterator over the tokens of the source code.
pub fn tokenize(src: &str) -> Tokens {
Tokens::new(src)
}
-/// An iterator over the tokens of source code.
-#[derive(Debug, Clone)]
+/// A minimal semantic entity of source code.
+#[derive(Debug, Clone, PartialEq)]
+pub enum Token<'s> {
+ /// One or more whitespace characters. The contained `usize` denotes the
+ /// number of newlines that were contained in the whitespace.
+ Whitespace(usize),
+
+ /// A line comment with inner string contents `//<&'s str>\n`.
+ LineComment(&'s str),
+ /// A block comment with inner string contents `/*<&'s str>*/`. The comment
+ /// can contain nested block comments.
+ BlockComment(&'s str),
+ /// An erroneous `*/` without an opening block comment.
+ StarSlash,
+
+ /// A left bracket: `[`.
+ LeftBracket,
+ /// A right bracket: `]`.
+ RightBracket,
+
+ /// A left parenthesis in a function header: `(`.
+ LeftParen,
+ /// A right parenthesis in a function header: `)`.
+ RightParen,
+ /// A left brace in a function header: `{`.
+ LeftBrace,
+ /// A right brace in a function header: `}`.
+ RightBrace,
+
+ /// A colon in a function header: `:`.
+ Colon,
+ /// A comma in a function header: `:`.
+ Comma,
+ /// An equals sign in a function header: `=`.
+ Equals,
+
+ /// An expression in a function header.
+ Expr(Expression),
+
+ /// A star in body-text.
+ Star,
+ /// An underscore in body-text.
+ Underscore,
+ /// A backtick in body-text.
+ Backtick,
+
+ /// Any other consecutive string.
+ Text(&'s str),
+}
+
+/// An iterator over the tokens of a string of source code.
pub struct Tokens<'s> {
src: &'s str,
- chars: PeekableChars<'s>,
- state: TokensState,
- stack: SmallVec<[TokensState; 1]>,
- line: usize,
- line_start_index: usize,
+ chars: Characters<'s>,
+ state: State,
+ stack: Vec<State>,
}
-/// The state the tokenizer is in.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-enum TokensState {
- /// The base state if there is nothing special we are in.
+enum State {
+ Header,
+ StartBody,
Body,
- /// Inside a function header. Here colons and equal signs get parsed
- /// as distinct tokens rather than text.
- Function,
- /// We expect either the end of the function or the beginning of the body.
- MaybeBody,
}
impl<'s> Tokens<'s> {
- /// Create a new token stream from source code.
pub fn new(src: &'s str) -> Tokens<'s> {
Tokens {
src,
- chars: PeekableChars::new(src),
- state: TokensState::Body,
- stack: SmallVec::new(),
- line: 1,
- line_start_index: 0,
- }
- }
-
- /// The index of the first character of the next token in the source string.
- pub fn string_index(&self) -> usize {
- self.chars.string_index()
- }
-
- /// Go to a new position in the underlying string.
- pub fn set_string_index(&mut self, index: usize) {
- self.chars.set_string_index(index);
- }
-
- /// The current position in the source.
- pub fn get_position(&self) -> Position {
- self.line_position(self.string_index())
- }
-
- /// Advance the iterator by one step.
- fn advance(&mut self) {
- self.chars.next();
- }
-
- /// Switch to the given state.
- fn switch(&mut self, state: TokensState) {
- self.stack.push(self.state);
- self.state = state;
- }
-
- /// Go back to the top-of-stack state.
- fn unswitch(&mut self) {
- self.state = self.stack.pop().unwrap_or(TokensState::Body);
- }
-
- /// The `Position` with line and column for a string index.
- fn line_position(&self, index: usize) -> Position {
- Position {
- line: self.line,
- column: index - self.line_start_index,
+ chars: Characters::new(src),
+ state: State::Body,
+ stack: vec![],
}
}
}
@@ -90,455 +89,281 @@ impl<'s> Tokens<'s> {
impl<'s> Iterator for Tokens<'s> {
type Item = Spanned<Token<'s>>;
- /// Advance the iterator, return the next token or nothing.
- fn next(&mut self) -> Option<Self::Item> {
- use TokensState as TS;
-
- // Go to the body state if the function has a body or return to the top-of-stack
- // state.
- if self.state == TS::MaybeBody {
- if let Some((index, '[')) = self.chars.peek() {
- self.advance();
- self.state = TS::Body;
- let span = Span::at(self.line_position(index));
- return Some(Spanned::new(Token::LeftBracket, span));
- } else {
- self.unswitch();
- }
- }
+ /// Parse the next token in the source code.
+ fn next(&mut self) -> Option<Spanned<Token<'s>>> {
+ let start = self.chars.position();
+ let first = self.chars.next()?;
+ let second = self.chars.peek();
- // Take the next char and peek at the one behind.
- let (pos, next) = self.chars.next()?;
- let afterwards = self.chars.peekc();
+ let token = match first {
+ // Comments.
+ '/' if second == Some('/') => self.parse_line_comment(),
+ '/' if second == Some('*') => self.parse_block_comment(),
+ '*' if second == Some('/') => { self.eat(); StarSlash }
- /// The index at which the line ended, if it did.
- let mut eol = None;
+ // Whitespace.
+ c if c.is_whitespace() => self.parse_whitespace(c),
- let token = match next {
- // Functions
- '[' => {
- self.switch(TS::Function);
- Token::LeftBracket
- }
+ // Functions.
+ '[' => { self.set_state(Header); LeftBracket }
']' => {
- if self.state == TS::Function {
- self.state = TS::MaybeBody;
+ if self.state == Header && second == Some('[') {
+ self.state = StartBody;
} else {
- self.unswitch();
+ self.pop_state();
}
- Token::RightBracket
+ RightBracket
}
- // Line comment
- '/' if afterwards == Some('/') => {
- let start = self.string_index() + 1;
-
- while let Some(c) = self.chars.peekc() {
- if is_newline_char(c) {
- break;
+ // Syntactic elements in function headers.
+ '(' if self.state == Header => LeftParen,
+ ')' if self.state == Header => RightParen,
+ '{' if self.state == Header => LeftBrace,
+ '}' if self.state == Header => RightBrace,
+ ':' if self.state == Header => Colon,
+ ',' if self.state == Header => Comma,
+ '=' if self.state == Header => Equals,
+
+ // String values.
+ '"' if self.state == Header => self.parse_string(),
+
+ // Style toggles.
+ '*' if self.state == Body => Star,
+ '_' if self.state == Body => Underscore,
+ '`' if self.state == Body => Backtick,
+
+ // An escaped thing.
+ '\\' => self.parse_escaped(),
+
+ // Expressions or just strings.
+ c => {
+ let word = self.read_string_until(|n| {
+ match n {
+ c if c.is_whitespace() => true,
+ '\\' | '[' | ']' | '*' | '_' | '`' | ':' | '=' |
+ ',' | '"' | '/' => true,
+ _ => false,
}
- self.advance();
- }
+ }, false, -(c.len_utf8() as isize), 0);
- let end = self.string_index();
- Token::LineComment(&self.src[start..end])
- }
-
- // Block comment
- '/' if afterwards == Some('*') => {
- let start = self.string_index() + 1;
- let mut nested = 0;
-
- while let Some((_, c)) = self.chars.next() {
- let after = self.chars.peekc();
- match (c, after) {
- ('*', Some('/')) if nested == 0 => {
- self.advance();
- break;
- }
- ('/', Some('*')) => {
- self.advance();
- nested += 1
- }
- ('*', Some('/')) => {
- self.advance();
- nested -= 1
- }
- _ => {}
- }
+ if self.state == Header {
+ self.parse_expr(word)
+ } else {
+ Text(word)
}
-
- let end = self.string_index() - 2;
- Token::BlockComment(&self.src[start..end])
- }
-
- // Unexpected end of block comment
- '*' if afterwards == Some('/') => {
- self.advance();
- Token::StarSlash
}
+ };
- // Whitespace
- ' ' | '\t' => {
- while let Some(c) = self.chars.peekc() {
- match c {
- ' ' | '\t' => self.advance(),
- _ => break,
- }
- }
+ let end = self.chars.position();
+ let span = Span { start, end };
- Token::Space
- }
-
- // Newlines
- '\r' if afterwards == Some('\n') => {
- self.advance();
- eol = Some(pos + "\r\n".len());
- Token::Newline
- }
- c if is_newline_char(c) => {
- eol = Some(pos + c.len_utf8());
- Token::Newline
- }
+ Some(Spanned { v: token, span })
+ }
+}
- // Star/Underscore/Backtick in bodies
- '*' if self.state == TS::Body => Token::Star,
- '_' if self.state == TS::Body => Token::Underscore,
- '`' if self.state == TS::Body => Token::Backtick,
-
- // Context sensitive operators in headers
- ':' if self.state == TS::Function => Token::Colon,
- '=' if self.state == TS::Function => Token::Equals,
- ',' if self.state == TS::Function => Token::Comma,
-
- // A string value.
- '"' if self.state == TS::Function => {
- let start = self.string_index();
- let mut end = start;
- let mut escaped = false;
-
- while let Some((index, c)) = self.chars.next() {
- end = index;
- if c == '"' && !escaped {
- break;
- }
+impl<'s> Tokens<'s> {
+ fn parse_line_comment(&mut self) -> Token<'s> {
+ LineComment(self.read_string_until(is_newline_char, false, 1, 0))
+ }
- escaped = c == '\\';
- }
+ fn parse_block_comment(&mut self) -> Token<'s> {
+ enum Last { Slash, Star, Other }
+ use Last::*;
- Token::Quoted(&self.src[start..end])
- }
+ self.eat();
- // Escaping
- '\\' => {
- if let Some((index, c)) = self.chars.peek() {
- let escapable = match c {
- '[' | ']' | '\\' | '*' | '_' | '`' | ':' | '=' | ',' | '/' => true,
- _ => false,
- };
+ let mut depth = 0;
+ let mut last = Last::Other;
- if escapable {
- self.advance();
- Token::Text(&self.src[index..index + c.len_utf8()])
- } else {
- Token::Text("\\")
- }
- } else {
- Token::Text("\\")
+ // Find the first `*/` that does not correspond to a nested `/*`.
+ // Remove the last two bytes to obtain the raw inner text without `*/`.
+ BlockComment(self.read_string_until(|n| {
+ match n {
+ '/' => match last {
+ Star if depth == 0 => return true,
+ Star => depth -= 1,
+ _ => last = Slash
}
+ '*' => match last {
+ Slash => depth += 1,
+ _ => last = Star,
+ }
+ _ => last = Other,
}
- // Normal text
- _ => {
- // Find out when the word ends.
- while let Some((_, c)) = self.chars.peek() {
- let second = self.chars.peekn(1).map(|p| p.1);
-
- // Whether the next token is still from the text or not.
- let continues = match c {
- '[' | ']' | '\\' => false,
- '*' | '_' | '`' if self.state == TS::Body => false,
- ':' | '=' | ',' | '"' if self.state == TS::Function => false,
-
- '/' => second != Some('/') && second != Some('*'),
- '*' => second != Some('/'),
+ false
+ }, true, 0, -2))
+ }
- ' ' | '\t' => false,
- c if is_newline_char(c) => false,
+ fn parse_whitespace(&mut self, c: char) -> Token<'s> {
+ let mut newlines = if is_newline_char(c) { 1 } else { 0 };
+ let mut last = c;
- _ => true,
- };
+ self.read_string_until(|n| {
+ if is_newline_char(n) && !(last == '\r' && n == '\n') {
+ newlines += 1;
+ }
- if !continues {
- break;
- }
+ last = n;
+ !n.is_whitespace()
+ }, false, 0, 0);
- self.advance();
- }
+ Whitespace(newlines)
+ }
- let end = self.string_index();
- Token::Text(&self.src[pos..end])
+ fn parse_string(&mut self) -> Token<'s> {
+ let mut escaped = false;
+ Expr(Expression::Str(self.read_string_until(|n| {
+ if n == '"' && !escaped {
+ return true;
+ } else if n == '\\' {
+ escaped = !escaped;
+ } else {
+ escaped = false;
}
- };
- let start = self.line_position(pos);
- let end = self.get_position();
- let span = Span::new(start, end);
+ false
+ }, true, 0, -1).to_string()))
+ }
- if let Some(index) = eol {
- self.line += 1;
- self.line_start_index = index;
+ fn parse_escaped(&mut self) -> Token<'s> {
+ fn is_escapable(c: char) -> bool {
+ match c {
+ '\\' | '[' | ']' | '*' | '_' | '`' | '/' => true,
+ _ => false,
+ }
}
- Some(Spanned::new(token, span))
- }
-}
-
-/// Whether this character is a newline (or starts one).
-pub(crate) fn is_newline_char(character: char) -> bool {
- match character {
- '\n' | '\r' | '\u{000c}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
- _ => false,
+ let c = self.chars.peek().unwrap_or('n');
+ if self.state == Body && is_escapable(c) {
+ let index = self.chars.index();
+ self.eat();
+ Text(&self.src[index .. index + c.len_utf8()])
+ } else {
+ Text("\\")
+ }
}
-}
-/// A (index, char) iterator with double lookahead.
-#[derive(Debug, Clone)]
-struct PeekableChars<'s> {
- string: &'s str,
- chars: CharIndices<'s>,
- peeked: SmallVec<[Option<(usize, char)>; 2]>,
- base: usize,
- index: usize,
-}
-
-impl<'s> PeekableChars<'s> {
- /// Create a new iterator from a string.
- fn new(string: &'s str) -> PeekableChars<'s> {
- PeekableChars {
- string,
- chars: string.char_indices(),
- peeked: SmallVec::new(),
- base: 0,
- index: 0,
+ fn parse_expr(&mut self, word: &'s str) -> Token<'s> {
+ if let Ok(b) = word.parse::<bool>() {
+ Expr(Expression::Bool(b))
+ } else if let Ok(num) = word.parse::<f64>() {
+ Expr(Expression::Num(num))
+ } else if let Ok(num) = parse_percentage(word) {
+ Expr(Expression::Num(num / 100.0))
+ } else if let Ok(size) = word.parse::<Size>() {
+ Expr(Expression::Size(size))
+ } else if let Some(ident) = Ident::new(word) {
+ Expr(Expression::Ident(ident))
+ } else {
+ Text(word)
}
}
- /// Peek at the next element.
- fn peek(&mut self) -> Option<(usize, char)> {
- self.peekn(0)
- }
+ fn read_string_until<F>(
+ &mut self,
+ mut f: F,
+ eat_match: bool,
+ offset_start: isize,
+ offset_end: isize,
+ ) -> &'s str where F: FnMut(char) -> bool {
+ let start = ((self.chars.index() as isize) + offset_start) as usize;
+ let mut matched = false;
+
+ while let Some(c) = self.chars.peek() {
+ if f(c) {
+ matched = true;
+ if eat_match {
+ self.chars.next();
+ }
+ break;
+ }
- /// Peek at the char of the next element.
- fn peekc(&mut self) -> Option<char> {
- self.peekn(0).map(|p| p.1)
- }
+ self.chars.next();
+ }
- /// Peek at the element after the next element.
- fn peekn(&mut self, n: usize) -> Option<(usize, char)> {
- while self.peeked.len() <= n {
- let next = self.next_inner();
- self.peeked.push(next);
+ let mut end = self.chars.index();
+ if matched {
+ end = ((end as isize) + offset_end) as usize;
}
- self.peeked[n]
+ &self.src[start .. end]
}
- /// Return the next value of the inner iterator mapped with the offset.
- fn next_inner(&mut self) -> Option<(usize, char)> {
- self.chars.next().map(|(i, c)| (self.base + i, c))
+ fn set_state(&mut self, state: State) {
+ self.stack.push(self.state);
+ self.state = state;
}
- fn string_index(&self) -> usize {
- self.index
+ fn pop_state(&mut self) {
+ self.state = self.stack.pop().unwrap_or(Body);
}
- fn set_string_index(&mut self, index: usize) {
- self.chars = self.string[index..].char_indices();
- self.base = index;
- self.index = 0;
- self.peeked.clear();
+ fn eat(&mut self) {
+ self.chars.next();
}
}
-impl Iterator for PeekableChars<'_> {
- type Item = (usize, char);
-
- fn next(&mut self) -> Option<(usize, char)> {
- let next = if !self.peeked.is_empty() {
- self.peeked.remove(0)
- } else {
- self.next_inner()
- };
-
- if let Some((index, c)) = next {
- self.index = index + c.len_utf8();
- }
-
- next
+fn parse_percentage(word: &str) -> Result<f64, ()> {
+ if word.ends_with('%') {
+ word[.. word.len() - 1].parse::<f64>().map_err(|_| ())
+ } else {
+ Err(())
}
}
-#[cfg(test)]
-mod tests {
- use super::*;
- use Token::{
- Backtick as TB, BlockComment as BC, Colon as C, Equals as E, LeftBracket as L,
- LineComment as LC, Newline as N, Quoted as Q, RightBracket as R, Space as S, Star as TS,
- StarSlash as SS, Text as T, Underscore as TU,
- };
-
- /// Test if the source code tokenizes to the tokens.
- fn test(src: &str, tokens: Vec<Token>) {
- assert_eq!(Tokens::new(src)
- .map(|token| token.v)
- .collect::<Vec<_>>(), tokens);
- }
-
- /// Test if the tokens of the source code have the correct spans.
- fn test_span(src: &str, spans: Vec<(usize, usize, usize, usize)>) {
- assert_eq!(Tokens::new(src)
- .map(|token| {
- let Span { start, end } = token.span;
- (start.line, start.column, end.line, end.column)
- })
- .collect::<Vec<_>>(), spans);
- }
-
- /// Tokenizes the basic building blocks.
- #[test]
- #[rustfmt::skip]
- fn tokenize_base() {
- test("", vec![]);
- test("Hallo", vec![T("Hallo")]);
- test("[", vec![L]);
- test("]", vec![R]);
- test("*", vec![TS]);
- test("_", vec![TU]);
- test("`", vec![TB]);
- test("\n", vec![N]);
+/// Whether this character denotes a newline.
+fn is_newline_char(character: char) -> bool {
+ match character {
+ // Line Feed, Vertical Tab, Form Feed, Carriage Return.
+ '\x0A' ..= '\x0D' => true,
+ // Next Line, Line Separator, Paragraph Separator.
+ '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
+ _ => false,
}
+}
- /// This test looks if LF- and CRLF-style newlines get both identified correctly.
- #[test]
- #[rustfmt::skip]
- fn tokenize_whitespace_newlines() {
- test(" \t", vec![S]);
- test("First line\r\nSecond line\nThird line\n", vec![
- T("First"), S, T("line"), N, T("Second"), S, T("line"), N,
- T("Third"), S, T("line"), N
- ]);
- test("Hello \n ", vec![T("Hello"), S, N, S]);
- test("Dense\nTimes", vec![T("Dense"), N, T("Times")]);
- }
+struct Characters<'s> {
+ iter: Peekable<Chars<'s>>,
+ position: Position,
+ index: usize,
+}
- /// Tests if escaping with backslash works as it should.
- #[test]
- #[rustfmt::skip]
- fn tokenize_escape() {
- test(r"\[", vec![T("[")]);
- test(r"\]", vec![T("]")]);
- test(r"\**", vec![T("*"), TS]);
- test(r"\*", vec![T("*")]);
- test(r"\__", vec![T("_"), TU]);
- test(r"\_", vec![T("_")]);
- test(r"\hello", vec![T("\\"), T("hello")]);
+impl<'s> Characters<'s> {
+ fn new(src: &'s str) -> Characters<'s> {
+ Characters {
+ iter: src.chars().peekable(),
+ position: Position::new(0, 0),
+ index: 0,
+ }
}
- /// Tests if escaped strings work.
- #[test]
- #[rustfmt::skip]
- fn tokenize_quoted() {
- test(r#"[align: "hello\"world"]"#, vec![L, T("align"), C, S, Q(r#"hello\"world"#), R]);
- }
+ fn next(&mut self) -> Option<char> {
+ let c = self.iter.next()?;
+ let len = c.len_utf8();
- /// Tokenizes some more realistic examples.
- #[test]
- #[rustfmt::skip]
- fn tokenize_examples() {
- test(r"
- [function][
- Test [italic][example]!
- ]
- ", vec![
- N, S, L, T("function"), R, L, N, S, T("Test"), S, L, T("italic"), R, L,
- T("example"), R, T("!"), N, S, R, N, S
- ]);
-
- test(r"
- [page: size=A4]
- [font: size=12pt]
-
- Das ist ein Beispielsatz mit *fetter* Schrift.
- ", vec![
- N, S, L, T("page"), C, S, T("size"), E, T("A4"), R, N, S,
- L, T("font"), C, S, T("size"), E, T("12pt"), R, N, N, S,
- T("Das"), S, T("ist"), S, T("ein"), S, T("Beispielsatz"), S, T("mit"), S,
- TS, T("fetter"), TS, S, T("Schrift."), N, S
- ]);
- }
+ self.index += len;
- /// This test checks whether the colon and equals symbols get parsed correctly depending on the
- /// context: Either in a function header or in a body.
- #[test]
- #[rustfmt::skip]
- fn tokenize_symbols_context() {
- test("[func: key=value][Answer: 7]", vec![
- L, T("func"), C, S, T("key"), E, T("value"), R, L,
- T("Answer:"), S, T("7"), R
- ]);
- test("[[n: k=v]:x][:[=]]:=", vec![
- L, L, T("n"), C, S, T("k"), E, T("v"), R, C, T("x"), R,
- L, T(":"), L, E, R, R, T(":=")
- ]);
- test("[hi: k=[func][body] v=1][hello]", vec![
- L, T("hi"), C, S, T("k"), E, L, T("func"), R, L, T("body"), R, S,
- T("v"), E, T("1"), R, L, T("hello"), R
- ]);
- test("[func: __key__=value]", vec![L, T("func"), C, S, T("__key__"), E, T("value"), R]);
- test("The /*[*/ answer: 7.", vec![T("The"), S, BC("["), S, T("answer:"), S, T("7.")]);
- }
+ if is_newline_char(c) && !(c == '\r' && self.peek() == Some('\n')) {
+ self.position.line += 1;
+ self.position.column = 0;
+ } else {
+ self.position.column += len;
+ }
- /// Test if block and line comments get tokenized as expected.
- #[test]
- #[rustfmt::skip]
- fn tokenize_comments() {
- test("These // Line comments.", vec![T("These"), S, LC(" Line comments.")]);
- test("This /* is */ a comment.", vec![T("This"), S, BC(" is "), S, T("a"), S, T("comment.")]);
- test("[Head/*of*/][Body]", vec![L, T("Head"), BC("of"), R, L, T("Body"), R]);
- test("/* Hey */ */", vec![BC(" Hey "), S, SS]);
- test("Hey\n// Yoo /*\n*/", vec![T("Hey"), N, LC(" Yoo /*"), N, SS]);
- test("/* My /* line // */ comment */", vec![BC(" My /* line // */ comment ")])
+ Some(c)
}
- /// This test has a special look at the underscore syntax.
- #[test]
- #[rustfmt::skip]
- fn tokenize_underscores() {
- test("he_llo_world_ __ Now this_ is_ special!",
- vec![T("he"), TU, T("llo"), TU, T("world"), TU, S, TU, TU, S, T("Now"), S,
- T("this"), TU, S, T("is"), TU, S, T("special!")]);
+ fn peek(&mut self) -> Option<char> {
+ self.iter.peek().copied()
}
- /// This test is for checking if non-ASCII characters get parsed correctly.
- #[test]
- #[rustfmt::skip]
- fn tokenize_unicode() {
- test("[document][Hello 🌍!]", vec![L, T("document"), R, L, T("Hello"), S, T("🌍!"), R]);
- test("[f]⺐.", vec![L, T("f"), R, T("⺐.")]);
+ fn index(&self) -> usize {
+ self.index
}
- /// This test checks if all tokens have the correct spans.
- #[test]
- #[rustfmt::skip]
- fn tokenize_spans() {
- test_span("Hello World", vec![(1, 0, 1, 5), (1, 5, 1, 6), (1, 6, 1, 11)]);
- test_span("🌍_🎈", vec![(1, 0, 1, 4), (1, 4, 1, 5), (1, 5, 1, 9)]);
- test_span("hello\nworld", vec![(1, 0, 1, 5), (1, 5, 1, 6), (2, 0, 2, 5)]);
- test_span("[hello: world]", vec![
- (1, 0, 1, 1), (1, 1, 1, 6), (1, 6, 1, 7),
- (1, 7, 1, 8), (1, 8, 1, 13), (1, 13, 1, 14)
- ]);
+ fn position(&self) -> Position {
+ self.position
}
}
diff --git a/tests/parse.rs b/tests/parse.rs
index 953cc959..e00b05d8 100644
--- a/tests/parse.rs
+++ b/tests/parse.rs
@@ -1,9 +1,26 @@
+#![allow(unused_imports)]
+#![allow(non_snake_case)]
+
+use typstc::size::Size;
use typstc::syntax::*;
use Token::{
- Space as S, Newline as N, LeftBracket as LB,
- RightBracket as RB, Text as T, *
+ Whitespace as W,
+ LineComment as LC, BlockComment as BC, StarSlash as SS,
+ LeftBracket as LB, RightBracket as RB,
+ LeftParen as LP, RightParen as RP,
+ LeftBrace as LBR, RightBrace as RBR,
+ Colon as CL, Comma as CM, Equals as EQ, Expr as E,
+ Star as ST, Underscore as U, Backtick as B, Text as T,
};
+use Expression as Expr;
+fn ID(ident: &str) -> Token { E(Expr::Ident(Ident::new(ident.to_string()).unwrap())) }
+fn STR(ident: &str) -> Token { E(Expr::Str(ident.to_string())) }
+fn SIZE(size: Size) -> Token<'static> { E(Expr::Size(size)) }
+fn NUM(num: f64) -> Token<'static> { E(Expr::Num(num)) }
+fn BOOL(b: bool) -> Token<'static> { E(Expr::Bool(b)) }
+
+
/// Parses the test syntax.
macro_rules! tokens {
($($src:expr =>($line:expr)=> $tokens:expr)*) => ({
diff --git a/tests/parsing/base.rs b/tests/parsing/base.rs
deleted file mode 100644
index ad7d87c0..00000000
--- a/tests/parsing/base.rs
+++ /dev/null
@@ -1,78 +0,0 @@
-// Spaces, Newlines, Brackets.
-"" => []
-" " => [S]
-" " => [S]
-"\t" => [S]
-" \t" => [S]
-"\n" => [N]
-"\n " => [N, S]
-" \n" => [S, N]
-" \n " => [S, N, S]
-"[" => [LB]
-"]" => [RB]
-
-// Header only tokens.
-"[:]" => [LB, Colon, RB]
-"[=]" => [LB, Equals, RB]
-"[,]" => [LB, Comma, RB]
-":" => [T(":")]
-"=" => [T("=")]
-"," => [T(",")]
-r#"["hi"]"# => [LB, Quoted("hi"), RB]
-r#""hi""# => [T(r#""hi""#)]
-
-// Body only tokens.
-"_" => [Underscore]
-"*" => [Star]
-"`" => [Backtick]
-"[_]" => [LB, T("_"), RB]
-"[*]" => [LB, T("*"), RB]
-"[`]" => [LB, T("`"), RB]
-
-// Comments.
-"//line" => [LineComment("line")]
-"/*block*/" => [BlockComment("block")]
-"*/" => [StarSlash]
-
-// Plain text.
-"A" => [T("A")]
-"Hello" => [T("Hello")]
-"Hello-World" => [T("Hello-World")]
-r#"A"B"# => [T(r#"A"B"#)]
-"🌍" => [T("🌍")]
-
-// Escapes.
-r"\[" => [T("[")]
-r"\]" => [T("]")]
-r"\\" => [T(r"\")]
-r"[\[]" => [LB, T("["), RB]
-r"[\]]" => [LB, T("]"), RB]
-r"[\\]" => [LB, T(r"\"), RB]
-r"\:" => [T(":")]
-r"\=" => [T("=")]
-r"\/" => [T("/")]
-r"[\:]" => [LB, T(":"), RB]
-r"[\=]" => [LB, T("="), RB]
-r"[\,]" => [LB, T(","), RB]
-r"\*" => [T("*")]
-r"\_" => [T("_")]
-r"\`" => [T("`")]
-r"[\*]" => [LB, T("*"), RB]
-r"[\_]" => [LB, T("_"), RB]
-r"[\`]" => [LB, T("`"), RB]
-
-// Whitespace.
-"Hello World" => [T("Hello"), S, T("World")]
-"Hello World" => [T("Hello"), S, T("World")]
-"Hello \t World" => [T("Hello"), S, T("World")]
-
-// Newline.
-"First\n" => [T("First"), N]
-"First \n" => [T("First"), S, N]
-"First\n " => [T("First"), N, S]
-"First \n " => [T("First"), S, N, S]
-"First\nSecond" => [T("First"), N, T("Second")]
-"First\r\nSecond" => [T("First"), N, T("Second")]
-"First \nSecond" => [T("First"), S, N, T("Second")]
-"First\n Second" => [T("First"), N, S, T("Second")]
-"First \n Second" => [T("First"), S, N, S, T("Second")]
diff --git a/tests/parsing/tokens.rs b/tests/parsing/tokens.rs
new file mode 100644
index 00000000..4f5474bb
--- /dev/null
+++ b/tests/parsing/tokens.rs
@@ -0,0 +1,62 @@
+// Whitespace.
+"" => []
+" " => [W(0)]
+" " => [W(0)]
+"\t" => [W(0)]
+" \t" => [W(0)]
+"\n" => [W(1)]
+"\n " => [W(1)]
+" \n" => [W(1)]
+" \n " => [W(1)]
+" \n\t \n " => [W(2)]
+"\r\n" => [W(1)]
+" \r\r\n \x0D" => [W(3)]
+"\n\r" => [W(2)]
+
+// Comments.
+"a // bc\n " => [T("a"), W(0), LC(" bc"), W(1)]
+"a //a//b\n " => [T("a"), W(0), LC("a//b"), W(1)]
+"a //a//b\r\n" => [T("a"), W(0), LC("a//b"), W(1)]
+"a //a//b\n\nhello" => [T("a"), W(0), LC("a//b"), W(2), T("hello")]
+"/**/" => [BC("")]
+"_/*_/*a*/*/" => [U, BC("_/*a*/")]
+"/*/*/" => [BC("/*/")]
+"abc*/" => [T("abc"), SS]
+
+// Header only tokens.
+"[" => [LB]
+"]" => [RB]
+"[(){}:=,]" => [LB, LP, RP, LBR, RBR, CL, EQ, CM, RB]
+"[a:b]" => [LB, ID("a"), CL, ID("b"), RB]
+"[πŸŒ“, 🌍,]" => [LB, T("πŸŒ“"), CM, W(0), T("🌍"), CM, RB]
+"[=]" => [LB, EQ, RB]
+"[,]" => [LB, CM, RB]
+"a: b" => [T("a"), T(":"), W(0), T("b")]
+"c=d, " => [T("c"), T("=d"), T(","), W(0)]
+r#"["hello\"world"]"# => [LB, STR(r#"hello\"world"#), RB]
+r#"["hi", 12pt]"# => [LB, STR("hi"), CM, W(0), SIZE(Size::pt(12.0)), RB]
+"\"hi\"" => [T("\"hi"), T("\"")]
+"[a: true, x=1]" => [LB, ID("a"), CL, W(0), BOOL(true), CM, W(0),
+ ID("x"), EQ, NUM(1.0), RB]
+"[120%]" => [LB, NUM(1.2), RB]
+
+// Body only tokens.
+"_*`" => [U, ST, B]
+"[_*`]" => [LB, T("_"), T("*"), T("`"), RB]
+"hi_you_ there" => [T("hi"), U, T("you"), U, W(0), T("there")]
+
+// Escapes.
+r"\[" => [T("[")]
+r"\]" => [T("]")]
+r"\\" => [T(r"\")]
+r"\/" => [T("/")]
+r"\*" => [T("*")]
+r"\_" => [T("_")]
+r"\`" => [T("`")]
+
+// Unescapable special symbols.
+r"\:" => [T(r"\"), T(":")]
+r"\=" => [T(r"\"), T("=")]
+r"[\:]" => [LB, T(r"\"), CL, RB]
+r"[\=]" => [LB, T(r"\"), EQ, RB]
+r"[\,]" => [LB, T(r"\"), CM, RB]