summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2020-08-17 16:25:09 +0200
committerLaurenz <laurmaedje@gmail.com>2020-08-17 16:25:09 +0200
commit3cbca56a7195bb2a7996530d584300d697c11dc8 (patch)
tree2a9242442bb79ae4b70895cabeb95b7aff2a4f5d
parent8a80503188804d576636265e71f72e9f55a7961a (diff)
Parse braced expressions and bracketed calls in headers 🗳
- Refactors the tokenizer to be lazy: It does not emit pre-parsed function tokens, but instead allows it's mode to be changed. The modes are tracked on a stack to allow nested compute/typesetting (pop/push). - Introduces delimited groups into the parser, which make it easy to parse delimited expressions without handling the delimiters in the parsing code for the group's content. A group is started with `start_group`. When reaching the group's end (matching delimiter) the eat and peek methods will simply return `None` instead of the delimiter, stopping the content parser and bubbling up the call stack until `end_group` is called to clear up the situation.
-rw-r--r--benches/bench_parsing.rs5
-rw-r--r--src/compute/table.rs2
-rw-r--r--src/layout/tree.rs2
-rw-r--r--src/lib.rs2
-rw-r--r--src/syntax/parsing.rs587
-rw-r--r--src/syntax/tokens.rs170
6 files changed, 388 insertions, 380 deletions
diff --git a/benches/bench_parsing.rs b/benches/bench_parsing.rs
index a3a17a84..4a8a7eb2 100644
--- a/benches/bench_parsing.rs
+++ b/benches/bench_parsing.rs
@@ -1,18 +1,17 @@
use criterion::{criterion_group, criterion_main, Criterion};
use typstc::syntax::parsing::parse;
-use typstc::syntax::span::Pos;
// 28 not too dense lines.
const COMA: &str = include_str!("../tests/coma.typ");
fn parsing_benchmark(c: &mut Criterion) {
c.bench_function("parse-coma-28-lines", |b| {
- b.iter(|| parse(COMA, Pos::ZERO))
+ b.iter(|| parse(COMA))
});
let long = COMA.repeat(100);
c.bench_function("parse-coma-2800-lines", |b| {
- b.iter(|| parse(&long, Pos::ZERO))
+ b.iter(|| parse(&long))
});
}
diff --git a/src/compute/table.rs b/src/compute/table.rs
index f11eacfc..75effd60 100644
--- a/src/compute/table.rs
+++ b/src/compute/table.rs
@@ -270,7 +270,7 @@ impl<V> SpannedEntry<V> {
/// Create an entry with the same span for key and value.
pub fn val(val: Spanned<V>) -> Self {
- Self { key: Span::ZERO, val }
+ Self { key: val.span, val }
}
/// Convert from `&SpannedEntry<T>` to `SpannedEntry<&T>`
diff --git a/src/layout/tree.rs b/src/layout/tree.rs
index 39e111bd..092ba582 100644
--- a/src/layout/tree.rs
+++ b/src/layout/tree.rs
@@ -123,7 +123,7 @@ impl<'a> TreeLayouter<'a> {
..self.ctx
}).await;
- self.feedback.extend_offset(pass.feedback, call.span.start);
+ self.feedback.extend(pass.feedback);
if let Value::Commands(commands) = pass.output {
for command in commands {
diff --git a/src/lib.rs b/src/lib.rs
index e30e41b2..30196034 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -87,7 +87,7 @@ impl Typesetter {
/// Parse source code into a syntax tree.
pub fn parse(&self, src: &str) -> Pass<SyntaxTree> {
- parse(src, Pos::ZERO)
+ parse(src)
}
/// Layout a syntax tree and return the produced layout.
diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs
index 8ed778e1..8dd567d3 100644
--- a/src/syntax/parsing.rs
+++ b/src/syntax/parsing.rs
@@ -12,94 +12,110 @@ use super::tree::{CallExpr, Expr, SyntaxNode, SyntaxTree, TableExpr};
use super::Ident;
/// Parse a string of source code.
-///
-/// All spans in the resulting tree and feedback are offset by the given
-/// `offset` position. This is used to make spans of a function body relative to
-/// the start of the function as a whole as opposed to the start of the
-/// function's body.
-pub fn parse(src: &str, offset: Pos) -> Pass<SyntaxTree> {
- let mut tree = SyntaxTree::new();
- let mut par = SyntaxTree::new();
- let mut feedback = Feedback::new();
-
- for token in Tokens::new(src, offset, TokenMode::Body) {
- let span = token.span;
- let node = match token.v {
- // Starting from two newlines counts as a paragraph break, a single
- // newline does not.
- Token::Space(newlines) => if newlines < 2 {
- SyntaxNode::Spacing
- } else {
- // End the current paragraph if it is not empty.
- if let (Some(first), Some(last)) = (par.first(), par.last()) {
- let span = Span::merge(first.span, last.span);
- let node = SyntaxNode::Par(std::mem::take(&mut par));
- tree.push(Spanned::new(node, span));
- }
- continue;
- }
-
- Token::Function { header, body, terminated } => {
- let parsed = FuncParser::new(header, body).parse();
- feedback.extend_offset(parsed.feedback, span.start);
- if !terminated {
- error!(@feedback, Span::at(span.end), "expected closing bracket");
- }
- SyntaxNode::Call(parsed.output)
- }
-
- Token::Star => SyntaxNode::ToggleBolder,
- Token::Underscore => SyntaxNode::ToggleItalic,
- Token::Backslash => SyntaxNode::Linebreak,
- Token::Raw { raw, terminated } => {
- if !terminated {
- error!(@feedback, Span::at(span.end), "expected backtick");
- }
- SyntaxNode::Raw(unescape_raw(raw))
- }
- Token::Text(text) => SyntaxNode::Text(text.to_string()),
-
- Token::LineComment(_) | Token::BlockComment(_) => continue,
- unexpected => {
- error!(@feedback, span, "unexpected {}", unexpected.name());
- continue;
- }
- };
-
- par.push(Spanned::new(node, span));
- }
-
- if let (Some(first), Some(last)) = (par.first(), par.last()) {
- let span = Span::merge(first.span, last.span);
- let node = SyntaxNode::Par(par);
- tree.push(Spanned::new(node, span));
- }
-
- Pass::new(tree, feedback)
+pub fn parse(src: &str) -> Pass<SyntaxTree> {
+ Parser::new(src).parse()
}
-struct FuncParser<'s> {
+struct Parser<'s> {
tokens: Tokens<'s>,
peeked: Option<Option<Spanned<Token<'s>>>>,
- body: Option<Spanned<&'s str>>,
+ delimiters: Vec<(Pos, Token<'static>)>,
feedback: Feedback,
}
-impl<'s> FuncParser<'s> {
- fn new(header: &'s str, body: Option<Spanned<&'s str>>) -> Self {
+impl<'s> Parser<'s> {
+ fn new(src: &'s str) -> Self {
Self {
- // Start at column 1 because the opening bracket is also part of
- // the function, but not part of the `header` string.
- tokens: Tokens::new(header, Pos::new(0, 1), TokenMode::Header),
+ tokens: Tokens::new(src, TokenMode::Body),
peeked: None,
- body,
+ delimiters: vec![],
feedback: Feedback::new(),
}
}
- fn parse(mut self) -> Pass<CallExpr> {
- let after_bracket = self.pos();
+ fn parse(mut self) -> Pass<SyntaxTree> {
+ let tree = self.parse_body_contents();
+ Pass::new(tree, self.feedback)
+ }
+}
+
+// Typesetting content.
+impl Parser<'_> {
+ fn parse_body_contents(&mut self) -> SyntaxTree {
+ let mut tree = SyntaxTree::new();
+ let mut par = SyntaxTree::new();
+
+ while let Some(token) = self.peek() {
+ par.push(match token.v {
+ // Starting from two newlines counts as a paragraph break, a single
+ // newline does not.
+ Token::Space(newlines) => if newlines < 2 {
+ self.with_span(SyntaxNode::Spacing)
+ } else {
+ // End the current paragraph if it is not empty.
+ if let (Some(first), Some(last)) = (par.first(), par.last()) {
+ let span = Span::merge(first.span, last.span);
+ let node = SyntaxNode::Par(std::mem::take(&mut par));
+ tree.push(Spanned::new(node, span));
+ }
+ self.eat();
+ continue;
+ }
+ Token::LineComment(_) | Token::BlockComment(_) => {
+ self.eat();
+ continue
+ }
+
+ Token::LeftBracket => {
+ self.parse_bracket_call().map(|c| SyntaxNode::Call(c))
+ }
+
+ Token::Star => self.with_span(SyntaxNode::ToggleBolder),
+ Token::Underscore => self.with_span(SyntaxNode::ToggleItalic),
+ Token::Backslash => self.with_span(SyntaxNode::Linebreak),
+
+ Token::Raw { raw, terminated } => {
+ if !terminated {
+ error!(
+ @self.feedback, Span::at(token.span.end),
+ "expected backtick",
+ );
+ }
+ self.with_span(SyntaxNode::Raw(unescape_raw(raw)))
+ }
+
+ Token::Text(text) => {
+ self.with_span(SyntaxNode::Text(text.to_string()))
+ }
+
+ unexpected => {
+ self.eat();
+ error!(
+ @self.feedback, token.span,
+ "unexpected {}", unexpected.name(),
+ );
+ continue;
+ }
+ });
+ }
+
+ if let (Some(first), Some(last)) = (par.first(), par.last()) {
+ let span = Span::merge(first.span, last.span);
+ let node = SyntaxNode::Par(par);
+ tree.push(Spanned::new(node, span));
+ }
+
+ tree
+ }
+}
+// Function calls.
+impl Parser<'_> {
+ fn parse_bracket_call(&mut self) -> Spanned<CallExpr> {
+ self.start_group(Delimiter::Bracket);
+ self.tokens.push_mode(TokenMode::Header);
+
+ let after_bracket = self.pos();
self.skip_white();
let name = self.parse_ident().unwrap_or_else(|| {
self.expected_found_or_at("function name", after_bracket);
@@ -107,36 +123,105 @@ impl<'s> FuncParser<'s> {
});
self.skip_white();
- let mut args = match self.eat().map(Spanned::value) {
- Some(Token::Colon) => self.parse_table(false).0.v,
+ let mut args = match self.eatv() {
+ Some(Token::Colon) => self.parse_table_contents().0,
Some(_) => {
self.expected_at("colon", name.span.end);
+ while self.eat().is_some() {}
TableExpr::new()
}
None => TableExpr::new(),
};
- if let Some(body) = self.body {
- args.push(SpannedEntry::val(body.map(|src| {
- let parsed = parse(src, body.span.start);
- self.feedback.extend(parsed.feedback);
- Expr::Tree(parsed.output)
- })));
+ self.tokens.pop_mode();
+ let mut span = self.end_group();
+
+ if self.check(Token::LeftBracket) {
+ self.start_group(Delimiter::Bracket);
+ self.tokens.push_mode(TokenMode::Body);
+
+ let body = self.parse_body_contents();
+
+ self.tokens.pop_mode();
+ let body_span = self.end_group();
+
+ let expr = Expr::Tree(body);
+ args.push(SpannedEntry::val(Spanned::new(expr, body_span)));
+ span.expand(body_span);
}
- Pass::new(CallExpr { name, args }, self.feedback)
+ Spanned::new(CallExpr { name, args }, span)
+ }
+
+ fn parse_paren_call(&mut self, name: Spanned<Ident>) -> Spanned<CallExpr> {
+ self.start_group(Delimiter::Paren);
+ let args = self.parse_table_contents().0;
+ let args_span = self.end_group();
+ let span = Span::merge(name.span, args_span);
+ Spanned::new(CallExpr { name, args }, span)
}
}
-// Parsing expressions and values
-impl FuncParser<'_> {
- fn parse_ident(&mut self) -> Option<Spanned<Ident>> {
- self.peek().and_then(|token| match token.v {
- Token::Ident(id) => self.eat_span(Ident(id.to_string())),
- _ => None,
- })
+// Tables.
+impl Parser<'_> {
+ fn parse_table_contents(&mut self) -> (TableExpr, bool) {
+ let mut table = TableExpr::new();
+ let mut comma_and_keyless = true;
+
+ while { self.skip_white(); !self.eof() } {
+ let (key, val) = if let Some(ident) = self.parse_ident() {
+ self.skip_white();
+
+ match self.peekv() {
+ Some(Token::Equals) => {
+ self.eat();
+ self.skip_white();
+
+ (Some(ident), try_opt_or!(self.parse_expr(), {
+ self.expected("value");
+ continue;
+ }))
+ }
+
+ Some(Token::LeftParen) => {
+ let call = self.parse_paren_call(ident);
+ (None, call.map(|c| Expr::Call(c)))
+ }
+
+ _ => (None, ident.map(|id| Expr::Ident(id)))
+ }
+ } else {
+ (None, try_opt_or!(self.parse_expr(), {
+ self.expected("value");
+ continue;
+ }))
+ };
+
+ let behind = val.span.end;
+ if let Some(key) = key {
+ comma_and_keyless = false;
+ table.insert(key.v.0, SpannedEntry::new(key.span, val));
+ self.feedback.decorations
+ .push(Spanned::new(Decoration::TableKey, key.span));
+ } else {
+ table.push(SpannedEntry::val(val));
+ }
+
+ if { self.skip_white(); self.eof() } {
+ break;
+ }
+
+ self.expect_at(Token::Comma, behind);
+ comma_and_keyless = false;
+ }
+
+ let coercable = comma_and_keyless && !table.is_empty();
+ (table, coercable)
}
+}
+// Expressions and values.
+impl Parser<'_> {
fn parse_expr(&mut self) -> Option<Spanned<Expr>> {
self.parse_binops("summand", Self::parse_term, |token| match token {
Token::Plus => Some(Expr::Add),
@@ -206,37 +291,37 @@ impl FuncParser<'_> {
fn parse_value(&mut self) -> Option<Spanned<Expr>> {
let Spanned { v: token, span } = self.peek()?;
- match token {
+ Some(match token {
// This could be a function call or an identifier.
Token::Ident(id) => {
let name = Spanned::new(Ident(id.to_string()), span);
self.eat();
self.skip_white();
- Some(if self.check(Token::LeftParen) {
- self.parse_func_call(name).map(|call| Expr::Call(call))
+ if self.check(Token::LeftParen) {
+ self.parse_paren_call(name).map(|call| Expr::Call(call))
} else {
name.map(|id| Expr::Ident(id))
- })
+ }
}
Token::Str { string, terminated } => {
if !terminated {
self.expected_at("quote", span.end);
}
- self.eat_span(Expr::Str(unescape_string(string)))
+ self.with_span(Expr::Str(unescape_string(string)))
}
- Token::Bool(b) => self.eat_span(Expr::Bool(b)),
- Token::Number(n) => self.eat_span(Expr::Number(n)),
- Token::Length(s) => self.eat_span(Expr::Length(s)),
+ Token::Bool(b) => self.with_span(Expr::Bool(b)),
+ Token::Number(n) => self.with_span(Expr::Number(n)),
+ Token::Length(s) => self.with_span(Expr::Length(s)),
Token::Hex(s) => {
if let Ok(color) = RgbaColor::from_str(s) {
- self.eat_span(Expr::Color(color))
+ self.with_span(Expr::Color(color))
} else {
// Heal color by assuming black.
error!(@self.feedback, span, "invalid color");
let healed = RgbaColor::new_healed(0, 0, 0, 255);
- self.eat_span(Expr::Color(healed))
+ self.with_span(Expr::Color(healed))
}
}
@@ -244,128 +329,54 @@ impl FuncParser<'_> {
// a table in any case and coerce the table into a value if it is
// coercable (length 1 and no trailing comma).
Token::LeftParen => {
- let (table, coercable) = self.parse_table(true);
- Some(if coercable {
- table.map(|v| {
- v.into_values()
- .next()
- .expect("table is coercable").val.v
- })
+ self.start_group(Delimiter::Paren);
+ let (table, coercable) = self.parse_table_contents();
+ let span = self.end_group();
+
+ let expr = if coercable {
+ table.into_values()
+ .next()
+ .expect("table is coercable").val.v
} else {
- table.map(|tab| Expr::Table(tab))
- })
- }
-
- _ => None,
- }
- }
+ Expr::Table(table)
+ };
- fn parse_func_call(&mut self, name: Spanned<Ident>) -> Spanned<CallExpr> {
- let args = self.parse_table(true).0;
- let span = Span::merge(name.span, args.span);
- Spanned::new(CallExpr { name, args: args.v }, span)
- }
-
- /// Set `parens` to true, when this should expect an opening paren and stop
- /// at the balanced closing paren (this is the case for normal tables and
- /// round-paren function calls). Set it to false, when this is used to parse
- /// the top-level function arguments.
- ///
- /// The returned boolean tells you whether the table can be coerced into an
- /// expression (this is the case when it's length 1 and has no trailing
- /// comma).
- fn parse_table(&mut self, parens: bool) -> (Spanned<TableExpr>, bool) {
- let start = self.pos();
- if parens {
- self.assert(Token::LeftParen);
- }
-
- let mut table = TableExpr::new();
- let mut coercable = true;
-
- loop {
- self.skip_white();
- if self.eof() || (parens && self.check(Token::RightParen)) {
- break;
+ Spanned::new(expr, span)
}
- let behind_arg;
-
- if let Some(ident) = self.parse_ident() {
- // This could be a keyword argument, a function call or a simple
- // identifier.
- self.skip_white();
-
- if self.check_eat(Token::Equals).is_some() {
- self.skip_white();
-
- let key = ident;
- self.feedback.decorations
- .push(Spanned::new(Decoration::TableKey, key.span));
-
- let val = try_opt_or!(self.parse_expr(), {
- self.expected("value");
- continue;
- });
-
- coercable = false;
- behind_arg = val.span.end;
- table.insert(key.v.0, SpannedEntry::new(key.span, val));
-
- } else if self.check(Token::LeftParen) {
- let call = self.parse_func_call(ident);
- let expr = call.map(|call| Expr::Call(call));
+ // This is a content expression.
+ Token::LeftBrace => {
+ self.start_group(Delimiter::Brace);
+ self.tokens.push_mode(TokenMode::Body);
- behind_arg = expr.span.end;
- table.push(SpannedEntry::val(expr));
- } else {
- let expr = ident.map(|id| Expr::Ident(id));
+ let tree = self.parse_body_contents();
- behind_arg = expr.span.end;
- table.push(SpannedEntry::val(expr));
- }
- } else {
- // It's a positional argument.
- let expr = try_opt_or!(self.parse_expr(), {
- self.expected("value");
- continue;
- });
- behind_arg = expr.span.end;
- table.push(SpannedEntry::val(expr));
+ self.tokens.pop_mode();
+ let span = self.end_group();
+ Spanned::new(Expr::Tree(tree), span)
}
- self.skip_white();
- if self.eof() || (parens && self.check(Token::RightParen)) {
- break;
+ // This is a bracketed function call.
+ Token::LeftBracket => {
+ let call = self.parse_bracket_call();
+ let tree = vec![call.map(|c| SyntaxNode::Call(c))];
+ Spanned::new(Expr::Tree(tree), span)
}
- self.expect_at(Token::Comma, behind_arg);
- coercable = false;
- }
-
- if parens {
- self.expect(Token::RightParen);
- }
-
- coercable = coercable && !table.is_empty();
-
- let end = self.pos();
- (Spanned::new(table, Span::new(start, end)), coercable)
+ _ => return None,
+ })
}
-}
-// Error handling
-impl FuncParser<'_> {
- fn expect(&mut self, token: Token<'_>) -> bool {
- if self.check(token) {
- self.eat();
- true
- } else {
- self.expected(token.name());
- false
- }
+ fn parse_ident(&mut self) -> Option<Spanned<Ident>> {
+ self.peek().and_then(|token| match token.v {
+ Token::Ident(id) => Some(self.with_span(Ident(id.to_string()))),
+ _ => None,
+ })
}
+}
+// Error handling.
+impl Parser<'_> {
fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool {
if self.check(token) {
self.eat();
@@ -400,40 +411,58 @@ impl FuncParser<'_> {
}
}
-// Parsing primitives
-impl<'s> FuncParser<'s> {
- fn skip_white(&mut self) {
- loop {
- match self.peek().map(Spanned::value) {
- Some(Token::Space(_))
- | Some(Token::LineComment(_))
- | Some(Token::BlockComment(_)) => { self.eat(); }
- _ => break,
+// Parsing primitives.
+impl<'s> Parser<'s> {
+ fn start_group(&mut self, delimiter: Delimiter) {
+ let start = self.pos();
+ self.assert(delimiter.start());
+ self.delimiters.push((start, delimiter.end()));
+ }
+
+ fn end_group(&mut self) -> Span {
+ assert_eq!(self.peek(), None, "unfinished group");
+ let (start, end_token) = self.delimiters.pop()
+ .expect("group was not started");
+
+ match self.peeked.unwrap() {
+ Some(token) if token.v == end_token => {
+ self.peeked = None;
+ Span::new(start, token.span.end)
+ }
+ _ => {
+ let end = self.pos();
+ error!(
+ @self.feedback, Span::at(end),
+ "expected {}", end_token.name(),
+ );
+ Span::new(start, end)
}
}
}
- fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
- self.peeked.take().unwrap_or_else(|| self.tokens.next())
+ fn skip_white(&mut self) {
+ while matches!(
+ self.peekv(),
+ Some(Token::Space(_)) |
+ Some(Token::LineComment(_)) |
+ Some(Token::BlockComment(_))
+ ) {
+ self.eat();
+ }
}
- fn eat_span<T>(&mut self, v: T) -> Option<Spanned<T>> {
- self.eat().map(|spanned| spanned.map(|_| v))
+ fn eatv(&mut self) -> Option<Token<'s>> {
+ self.eat().map(Spanned::value)
}
- fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
- let tokens = &mut self.tokens;
- *self.peeked.get_or_insert_with(|| tokens.next())
+ fn peekv(&mut self) -> Option<Token<'s>> {
+ self.peek().map(Spanned::value)
}
fn assert(&mut self, token: Token<'_>) {
assert!(self.check_eat(token).is_some());
}
- fn check(&mut self, token: Token<'_>) -> bool {
- self.peek().map(Spanned::value) == Some(token)
- }
-
fn check_eat(&mut self, token: Token<'_>) -> Option<Spanned<Token<'s>>> {
if self.check(token) {
self.eat()
@@ -442,10 +471,39 @@ impl<'s> FuncParser<'s> {
}
}
+ fn check(&mut self, token: Token<'_>) -> bool {
+ self.peekv() == Some(token)
+ }
+
+ fn with_span<T>(&mut self, v: T) -> Spanned<T> {
+ let span = self.eat().expect("expected token").span;
+ Spanned::new(v, span)
+ }
+
fn eof(&mut self) -> bool {
self.peek().is_none()
}
+ fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
+ let token = self.peek()?;
+ self.peeked = None;
+ Some(token)
+ }
+
+ fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
+ let tokens = &mut self.tokens;
+ let token = (*self.peeked.get_or_insert_with(|| tokens.next()))?;
+
+ // Check for unclosed groups.
+ if Delimiter::is_delimiter(token.v) {
+ if self.delimiters.iter().rev().any(|&(_, end)| token.v == end) {
+ return None;
+ }
+ }
+
+ Some(token)
+ }
+
fn pos(&self) -> Pos {
self.peeked
.flatten()
@@ -454,6 +512,38 @@ impl<'s> FuncParser<'s> {
}
}
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+enum Delimiter {
+ Paren,
+ Bracket,
+ Brace,
+}
+
+impl Delimiter {
+ fn is_delimiter(token: Token<'_>) -> bool {
+ matches!(
+ token,
+ Token::RightParen | Token::RightBracket | Token::RightBrace
+ )
+ }
+
+ fn start(self) -> Token<'static> {
+ match self {
+ Self::Paren => Token::LeftParen,
+ Self::Bracket => Token::LeftBracket,
+ Self::Brace => Token::LeftBrace,
+ }
+ }
+
+ fn end(self) -> Token<'static> {
+ match self {
+ Self::Paren => Token::RightParen,
+ Self::Bracket => Token::RightBracket,
+ Self::Brace => Token::RightBrace,
+ }
+ }
+}
+
fn unescape_string(string: &str) -> String {
let mut iter = string.chars();
let mut out = String::with_capacity(string.len());
@@ -608,7 +698,7 @@ mod tests {
macro_rules! test {
(@spans=$spans:expr, $src:expr => $($tts:tt)*) => {
let exp = Tree![@$($tts)*];
- let pass = parse($src, Pos::ZERO);
+ let pass = parse($src);
check($src, exp, pass.output, $spans);
};
}
@@ -624,7 +714,7 @@ mod tests {
macro_rules! e {
($src:expr => $($tts:tt)*) => {
let exp = vec![$($tts)*];
- let pass = parse($src, Pos::ZERO);
+ let pass = parse($src);
let found = pass.feedback.diagnostics.iter()
.map(|s| s.as_ref().map(|e| e.message.as_str()))
.collect::<Vec<_>>();
@@ -636,7 +726,7 @@ mod tests {
macro_rules! d {
($src:expr => $($tts:tt)*) => {
let exp = vec![$($tts)*];
- let pass = parse($src, Pos::ZERO);
+ let pass = parse($src);
check($src, exp, pass.feedback.decorations, true);
};
}
@@ -718,6 +808,15 @@ mod tests {
}
#[test]
+ fn test_parse_groups() {
+ e!("[)" => s(0,1, 0,2, "expected function name, found closing paren"),
+ s(0,2, 0,2, "expected closing bracket"));
+
+ e!("[v:{]}" => s(0,4, 0,4, "expected closing brace"),
+ s(0,5, 0,6, "unexpected closing brace"));
+ }
+
+ #[test]
fn test_parse_function_names() {
// No closing bracket.
t!("[" => P![F!("")]);
@@ -760,19 +859,29 @@ mod tests {
t!("[val: 1][*Hi*]" => P![F!("val"; Num(1.0), Tree![P![B, T("Hi"), B]])]);
e!(" [val][ */ ]" => s(0,8, 0,10, "unexpected end of block comment"));
+ // Raw in body.
+ t!("[val][`Hi]`" => P![F!("val"; Tree![P![R!["Hi]"]]])]);
+ e!("[val][`Hi]`" => s(0,11, 0,11, "expected closing bracket"));
+
+ // Crazy.
+ t!("[v][[v][v][v]]" => P![F!("v"; Tree![P![
+ F!("v"; Tree![P![T("v")]]), F!("v")
+ ]])]);
+
// Spanned.
ts!(" [box][Oh my]" => s(0,0, 0,13, P![
s(0,0, 0,1, S),
- s(0,1, 0,13, F!(s(0,1, 0,4, "box");
- s(0,6, 0,11, Tree![s(0,6, 0,11, P![
- s(0,6, 0,8, T("Oh")), s(0,8, 0,9, S), s(0,9, 0,11, T("my"))
+ s(0,1, 0,13, F!(s(0,2, 0,5, "box");
+ s(0,6, 0,13, Tree![s(0,7, 0,12, P![
+ s(0,7, 0,9, T("Oh")), s(0,9, 0,10, S), s(0,10, 0,12, T("my"))
])])
))
]));
}
#[test]
- fn test_parse_simple_values() {
+ fn test_parse_values() {
+ // Simple.
v!("_" => Id("_"));
v!("name" => Id("name"));
v!("α" => Id("α"));
@@ -787,6 +896,12 @@ mod tests {
v!("#f7a20500" => Color(RgbaColor::new(0xf7, 0xa2, 0x05, 0x00)));
v!("\"a\n[]\\\"string\"" => Str("a\n[]\"string"));
+ // Content.
+ v!("{_hi_}" => Tree![P![I, T("hi"), I]]);
+ e!("[val: {_hi_}]" => );
+ v!("[hi]" => Tree![F!["hi"]]);
+ e!("[val: [hi]]" => );
+
// Healed colors.
v!("#12345" => Color(RgbaColor::new_healed(0, 0, 0, 0xff)));
e!("[val: #12345]" => s(0,6, 0,12, "invalid color"));
@@ -925,7 +1040,7 @@ mod tests {
v!("(\x07 abc,)" => Table![Id("abc")]);
e!("[val: (\x07 abc,)]" => s(0,7, 0,8, "expected value, found invalid token"));
e!("[val: (key=,)]" => s(0,11, 0,12, "expected value, found comma"));
- e!("[val: [hi]]" => s(0,6, 0,10, "expected value, found function"));
+ e!("[val: hi,)]" => s(0,9, 0,10, "expected value, found closing paren"));
// Expected comma.
v!("(true false)" => Table![Bool(true), Bool(false)]);
diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs
index cafc7727..2d371bf8 100644
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@@ -22,27 +22,10 @@ pub enum Token<'s> {
/// can contain nested block comments.
BlockComment(&'s str),
- /// A function invocation.
- Function {
- /// The header string:
- /// ```typst
- /// [header: args][body]
- /// ^^^^^^^^^^^^
- /// ```
- header: &'s str,
- /// The spanned body string:
- /// ```typst
- /// [header][hello *world*]
- /// ^^^^^^^^^^^^^
- /// ^-- The span is relative to right before this bracket
- /// ```
- body: Option<Spanned<&'s str>>,
- /// Whether the last closing bracket was present.
- /// - `[func]` or `[func][body]` => terminated
- /// - `[func` or `[func][body` => not terminated
- terminated: bool,
- },
-
+ /// A left bracket starting a function invocation or body: `[`.
+ LeftBracket,
+ /// A right bracket ending a function invocation or body: `]`.
+ RightBracket,
/// A left parenthesis in a function header: `(`.
LeftParen,
/// A right parenthesis in a function header: `)`.
@@ -119,7 +102,8 @@ impl<'s> Token<'s> {
Space(_) => "space",
LineComment(_) => "line comment",
BlockComment(_) => "block comment",
- Function { .. } => "function",
+ LeftBracket => "opening bracket",
+ RightBracket => "closing bracket",
LeftParen => "opening paren",
RightParen => "closing paren",
LeftBrace => "opening brace",
@@ -141,7 +125,6 @@ impl<'s> Token<'s> {
Backslash => "backslash",
Raw { .. } => "raw text",
Text(_) => "text",
- Invalid("]") => "closing bracket",
Invalid("*/") => "end of block comment",
Invalid(_) => "invalid token",
}
@@ -152,8 +135,9 @@ impl<'s> Token<'s> {
#[derive(Debug)]
pub struct Tokens<'s> {
src: &'s str,
- mode: TokenMode,
iter: Peekable<Chars<'s>>,
+ mode: TokenMode,
+ stack: Vec<TokenMode>,
pos: Pos,
index: usize,
}
@@ -172,16 +156,29 @@ impl<'s> Tokens<'s> {
///
/// The first token's span starts an the given `offset` position instead of
/// the zero position.
- pub fn new(src: &'s str, offset: Pos, mode: TokenMode) -> Self {
+ pub fn new(src: &'s str, mode: TokenMode) -> Self {
Self {
src,
- mode,
iter: src.chars().peekable(),
- pos: offset,
+ mode,
+ stack: vec![],
+ pos: Pos::ZERO,
index: 0,
}
}
+ /// Change the token mode and push the old one on a stack.
+ pub fn push_mode(&mut self, mode: TokenMode) {
+ self.stack.push(self.mode);
+ self.mode = mode;
+ }
+
+ /// Pop the old token mode from the stack. This panics if there is no mode
+ /// on the stack.
+ pub fn pop_mode(&mut self) {
+ self.mode = self.stack.pop().expect("no pushed mode");
+ }
+
/// The index in the string at which the last token ends and next token will
/// start.
pub fn index(&self) -> usize {
@@ -212,15 +209,15 @@ impl<'s> Iterator for Tokens<'s> {
// Whitespace.
c if c.is_whitespace() => self.read_whitespace(start),
- // Functions.
- '[' => self.read_function(start),
- ']' => Invalid("]"),
+ // Functions and blocks.
+ '[' => LeftBracket,
+ ']' => RightBracket,
+ '{' => LeftBrace,
+ '}' => RightBrace,
// Syntactic elements in function headers.
'(' if self.mode == Header => LeftParen,
')' if self.mode == Header => RightParen,
- '{' if self.mode == Header => LeftBrace,
- '}' if self.mode == Header => RightBrace,
':' if self.mode == Header => Colon,
',' if self.mode == Header => Comma,
'=' if self.mode == Header => Equals,
@@ -322,52 +319,6 @@ impl<'s> Tokens<'s> {
Space(end.line - start.line)
}
- fn read_function(&mut self, start: Pos) -> Token<'s> {
- let (header, terminated) = self.read_function_part(Header);
- self.eat();
-
- if self.peek() != Some('[') {
- return Function { header, body: None, terminated };
- }
-
- self.eat();
-
- let body_start = self.pos() - start;
- let (body, terminated) = self.read_function_part(Body);
- let body_end = self.pos() - start;
- let span = Span::new(body_start, body_end);
-
- self.eat();
-
- Function { header, body: Some(Spanned { v: body, span }), terminated }
- }
-
- fn read_function_part(&mut self, mode: TokenMode) -> (&'s str, bool) {
- let start = self.index();
- let mut terminated = false;
-
- while let Some(n) = self.peek() {
- if n == ']' {
- terminated = true;
- break;
- }
-
- self.eat();
- match n {
- '[' => { self.read_function(Pos::ZERO); }
- '/' if self.peek() == Some('/') => { self.read_line_comment(); }
- '/' if self.peek() == Some('*') => { self.read_block_comment(); }
- '"' if mode == Header => { self.read_string(); }
- '`' if mode == Body => { self.read_raw(); }
- '\\' => { self.eat(); }
- _ => {}
- }
- }
-
- let end = self.index();
- (&self.src[start..end], terminated)
- }
-
fn read_string(&mut self) -> Token<'s> {
let (string, terminated) = self.read_until_unescaped('"');
Str { string, terminated }
@@ -540,6 +491,7 @@ mod tests {
use Token::{
Space as S,
LineComment as LC, BlockComment as BC,
+ LeftBracket as L, RightBracket as R,
LeftParen as LP, RightParen as RP,
LeftBrace as LB, RightBrace as RB,
Ident as Id,
@@ -557,25 +509,12 @@ mod tests {
fn Str(string: &str, terminated: bool) -> Token { Token::Str { string, terminated } }
fn Raw(raw: &str, terminated: bool) -> Token { Token::Raw { raw, terminated } }
- macro_rules! F {
- ($h:expr, None, $t:expr) => {
- Token::Function { header: $h, body: None, terminated: $t }
- };
- ($h:expr, $b:expr, $t:expr) => {
- Token::Function {
- header: $h,
- body: Some(Into::<Spanned<&str>>::into($b)),
- terminated: $t,
- }
- };
- }
-
macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} }
macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} }
macro_rules! test {
(@spans=$spans:expr, $mode:expr, $src:expr => $($token:expr),*) => {
let exp = vec![$(Into::<Spanned<Token>>::into($token)),*];
- let found = Tokens::new($src, Pos::ZERO, $mode).collect::<Vec<_>>();
+ let found = Tokens::new($src, $mode).collect::<Vec<_>>();
check($src, exp, found, $spans);
}
}
@@ -616,7 +555,7 @@ mod tests {
fn tokenize_body_only_tokens() {
t!(Body, "_*" => Underscore, Star);
t!(Body, "***" => Star, Star, Star);
- t!(Body, "[func]*bold*" => F!("func", None, true), Star, T("bold"), Star);
+ t!(Body, "[func]*bold*" => L, T("func"), R, Star, T("bold"), Star);
t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there"));
t!(Body, "`raw`" => Raw("raw", true));
t!(Body, "`[func]`" => Raw("[func]", true));
@@ -675,50 +614,6 @@ mod tests {
}
#[test]
- fn tokenize_functions() {
- t!(Body, "a[f]" => T("a"), F!("f", None, true));
- t!(Body, "[f]a" => F!("f", None, true), T("a"));
- t!(Body, "\n\n[f][ ]" => S(2), F!("f", " ", true));
- t!(Body, "abc [f][ ]a" => T("abc"), S(0), F!("f", " ", true), T("a"));
- t!(Body, "[f: [=][*]]" => F!("f: [=][*]", None, true));
- t!(Body, "[_][[,],]," => F!("_", "[,],", true), T(","));
- t!(Body, "[=][=][=]" => F!("=", "=", true), F!("=", None, true));
- t!(Body, "[=][[=][=][=]]" => F!("=", "[=][=][=]", true));
- t!(Header, "[" => F!("", None, false));
- t!(Header, "]" => Invalid("]"));
- }
-
- #[test]
- fn tokenize_correct_end_of_function() {
- // End of function with strings and carets in headers
- t!(Body, r#"[f: "]"# => F!(r#"f: "]"#, None, false));
- t!(Body, "[f: \"s\"]" => F!("f: \"s\"", None, true));
- t!(Body, r#"[f: \"\"\"]"# => F!(r#"f: \"\"\""#, None, true));
- t!(Body, "[f: `]" => F!("f: `", None, true));
-
- // End of function with strings and carets in bodies
- t!(Body, "[f][\"]" => F!("f", s(0,4, 0,5, "\""), true));
- t!(Body, r#"[f][\"]"# => F!("f", s(0,4, 0,6, r#"\""#), true));
- t!(Body, "[f][`]" => F!("f", s(0,4, 0,6, "`]"), false));
- t!(Body, "[f][\\`]" => F!("f", s(0,4, 0,6, "\\`"), true));
- t!(Body, "[f][`raw`]" => F!("f", s(0,4, 0,9, "`raw`"), true));
- t!(Body, "[f][`raw]" => F!("f", s(0,4, 0,9, "`raw]"), false));
- t!(Body, "[f][`raw]`]" => F!("f", s(0,4, 0,10, "`raw]`"), true));
- t!(Body, "[f][`\\`]" => F!("f", s(0,4, 0,8, "`\\`]"), false));
- t!(Body, "[f][`\\\\`]" => F!("f", s(0,4, 0,8, "`\\\\`"), true));
-
- // End of function with comments
- t!(Body, "[f][/*]" => F!("f", s(0,4, 0,7, "/*]"), false));
- t!(Body, "[f][/*`*/]" => F!("f", s(0,4, 0,9, "/*`*/"), true));
- t!(Body, "[f: //]\n]" => F!("f: //]\n", None, true));
- t!(Body, "[f: \"//]\n]" => F!("f: \"//]\n]", None, false));
-
- // End of function with escaped brackets
- t!(Body, "[f][\\]]" => F!("f", s(0,4, 0,6, "\\]"), true));
- t!(Body, "[f][\\[]" => F!("f", s(0,4, 0,6, "\\["), true));
- }
-
- #[test]
fn tokenize_escaped_symbols() {
t!(Body, r"\\" => T(r"\"));
t!(Body, r"\[" => T("["));
@@ -746,7 +641,6 @@ mod tests {
fn tokenize_with_spans() {
ts!(Body, "hello" => s(0,0, 0,5, T("hello")));
ts!(Body, "ab\r\nc" => s(0,0, 0,2, T("ab")), s(0,2, 1,0, S(1)), s(1,0, 1,1, T("c")));
- ts!(Body, "[x = \"(1)\"]*" => s(0,0, 0,11, F!("x = \"(1)\"", None, true)), s(0,11, 0,12, Star));
ts!(Body, "// ab\r\n\nf" => s(0,0, 0,5, LC(" ab")), s(0,5, 2,0, S(2)), s(2,0, 2,1, T("f")));
ts!(Body, "/*b*/_" => s(0,0, 0,5, BC("b")), s(0,5, 0,6, Underscore));
ts!(Header, "a=10" => s(0,0, 0,1, Id("a")), s(0,1, 0,2, Equals), s(0,2, 0,4, Num(10.0)));