summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2020-10-02 15:43:29 +0200
committerLaurenz <laurmaedje@gmail.com>2020-10-02 15:43:29 +0200
commit3533268b1f7a31581e7b8f44dff6d4f553ef348f (patch)
tree3fee21d2df7ce173131f75f46a1ef040f272ed29 /src
parentf8770d2b2a8ac389704897f92f2753398352835b (diff)
Refactor parser 🏞
Diffstat (limited to 'src')
-rw-r--r--src/eval/value.rs82
-rw-r--r--src/layout/tree.rs27
-rw-r--r--src/library/boxed.rs2
-rw-r--r--src/library/mod.rs6
-rw-r--r--src/parse/mod.rs919
-rw-r--r--src/parse/parser.rs292
-rw-r--r--src/parse/resolve.rs26
-rw-r--r--src/parse/scanner.rs38
-rw-r--r--src/parse/tests.rs22
-rw-r--r--src/parse/tokens.rs239
-rw-r--r--src/syntax/ast/expr.rs (renamed from src/syntax/expr.rs)7
-rw-r--r--src/syntax/ast/lit.rs (renamed from src/syntax/lit.rs)10
-rw-r--r--src/syntax/ast/mod.rs9
-rw-r--r--src/syntax/ast/tree.rs (renamed from src/syntax/tree.rs)34
-rw-r--r--src/syntax/mod.rs15
-rw-r--r--src/syntax/span.rs44
-rw-r--r--src/syntax/token.rs147
17 files changed, 1055 insertions, 864 deletions
diff --git a/src/eval/value.rs b/src/eval/value.rs
index 56af4322..6a63a66f 100644
--- a/src/eval/value.rs
+++ b/src/eval/value.rs
@@ -15,12 +15,10 @@ use crate::syntax::{Ident, Span, SpanWith, Spanned, SynNode, SynTree};
use crate::{DynFuture, Feedback, Pass};
/// A computational value.
-#[derive(Clone)]
+#[derive(Clone, PartialEq)]
pub enum Value {
/// An identifier: `ident`.
Ident(Ident),
- /// A string: `"string"`.
- Str(String),
/// A boolean: `true, false`.
Bool(bool),
/// A number: `1.2, 200%`.
@@ -29,6 +27,8 @@ pub enum Value {
Length(Length),
/// A color value with alpha channel: `#f79143ff`.
Color(RgbaColor),
+ /// A string: `"string"`.
+ Str(String),
/// A dictionary value: `(false, 12cm, greeting="hi")`.
Dict(DictValue),
/// A syntax tree containing typesetting content.
@@ -45,11 +45,11 @@ impl Value {
pub fn name(&self) -> &'static str {
match self {
Self::Ident(_) => "identifier",
- Self::Str(_) => "string",
Self::Bool(_) => "bool",
Self::Number(_) => "number",
Self::Length(_) => "length",
Self::Color(_) => "color",
+ Self::Str(_) => "string",
Self::Dict(_) => "dict",
Self::Tree(_) => "syntax tree",
Self::Func(_) => "function",
@@ -65,9 +65,6 @@ impl Spanned<Value> {
/// the value is represented as layoutable content in a reasonable way.
pub fn into_commands(self) -> Commands {
match self.v {
- Value::Commands(commands) => commands,
- Value::Tree(tree) => vec![Command::LayoutSyntaxTree(tree)],
-
// Forward to each entry, separated with spaces.
Value::Dict(dict) => {
let mut commands = vec![];
@@ -75,7 +72,7 @@ impl Spanned<Value> {
for entry in dict.into_values() {
if let Some(last_end) = end {
let span = Span::new(last_end, entry.key.start);
- let tree = vec![SynNode::Spacing.span_with(span)];
+ let tree = vec![SynNode::Space.span_with(span)];
commands.push(Command::LayoutSyntaxTree(tree));
}
@@ -85,6 +82,9 @@ impl Spanned<Value> {
commands
}
+ Value::Tree(tree) => vec![Command::LayoutSyntaxTree(tree)],
+ Value::Commands(commands) => commands,
+
// Format with debug.
val => {
let fmt = format!("{:?}", val);
@@ -99,37 +99,19 @@ impl Debug for Value {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
Self::Ident(i) => i.fmt(f),
- Self::Str(s) => s.fmt(f),
Self::Bool(b) => b.fmt(f),
Self::Number(n) => n.fmt(f),
Self::Length(s) => s.fmt(f),
Self::Color(c) => c.fmt(f),
+ Self::Str(s) => s.fmt(f),
Self::Dict(t) => t.fmt(f),
Self::Tree(t) => t.fmt(f),
- Self::Func(_) => f.pad("<function>"),
+ Self::Func(c) => c.fmt(f),
Self::Commands(c) => c.fmt(f),
}
}
}
-impl PartialEq for Value {
- fn eq(&self, other: &Self) -> bool {
- match (self, other) {
- (Self::Ident(a), Self::Ident(b)) => a == b,
- (Self::Str(a), Self::Str(b)) => a == b,
- (Self::Bool(a), Self::Bool(b)) => a == b,
- (Self::Number(a), Self::Number(b)) => a == b,
- (Self::Length(a), Self::Length(b)) => a == b,
- (Self::Color(a), Self::Color(b)) => a == b,
- (Self::Dict(a), Self::Dict(b)) => a == b,
- (Self::Tree(a), Self::Tree(b)) => a == b,
- (Self::Func(a), Self::Func(b)) => Rc::ptr_eq(a, b),
- (Self::Commands(a), Self::Commands(b)) => a == b,
- _ => false,
- }
- }
-}
-
/// An executable function value.
///
/// The first argument is a dictionary containing the arguments passed to the
@@ -140,8 +122,45 @@ impl PartialEq for Value {
/// layouting engine to do what the function pleases.
///
/// The dynamic function object is wrapped in an `Rc` to keep `Value` clonable.
-pub type FuncValue =
- Rc<dyn Fn(Span, DictValue, LayoutContext<'_>) -> DynFuture<Pass<Value>>>;
+#[derive(Clone)]
+pub struct FuncValue(pub Rc<FuncType>);
+
+/// The dynamic function type backtick [`FuncValue`].
+///
+/// [`FuncValue`]: struct.FuncValue.html
+pub type FuncType = dyn Fn(Span, DictValue, LayoutContext<'_>) -> DynFuture<Pass<Value>>;
+
+impl FuncValue {
+ /// Create a new function value from a rust function or closure.
+ pub fn new<F: 'static>(f: F) -> Self
+ where
+ F: Fn(Span, DictValue, LayoutContext<'_>) -> DynFuture<Pass<Value>>,
+ {
+ Self(Rc::new(f))
+ }
+}
+
+impl Eq for FuncValue {}
+
+impl PartialEq for FuncValue {
+ fn eq(&self, other: &Self) -> bool {
+ Rc::ptr_eq(&self.0, &other.0)
+ }
+}
+
+impl Deref for FuncValue {
+ type Target = FuncType;
+
+ fn deref(&self) -> &Self::Target {
+ self.0.as_ref()
+ }
+}
+
+impl Debug for FuncValue {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ f.pad("<function>")
+ }
+}
/// A dictionary of values.
///
@@ -262,8 +281,7 @@ impl DictValue {
/// Generated `"unexpected argument"` errors for all remaining entries.
pub fn unexpected(&self, f: &mut Feedback) {
for entry in self.values() {
- let span = Span::merge(entry.key, entry.val.span);
- error!(@f, span, "unexpected argument");
+ error!(@f, entry.key.join(entry.val.span), "unexpected argument");
}
}
}
diff --git a/src/layout/tree.rs b/src/layout/tree.rs
index f8e4160c..56313383 100644
--- a/src/layout/tree.rs
+++ b/src/layout/tree.rs
@@ -58,10 +58,19 @@ impl<'a> TreeLayouter<'a> {
};
match &node.v {
- SynNode::Spacing => self.layout_space(),
+ SynNode::Space => self.layout_space(),
+ SynNode::Text(text) => {
+ if self.style.text.italic {
+ decorate(self, Decoration::Italic);
+ }
+ if self.style.text.bolder {
+ decorate(self, Decoration::Bold);
+ }
+ self.layout_text(text).await;
+ }
+
SynNode::Linebreak => self.layouter.finish_line(),
SynNode::Parbreak => self.layout_parbreak(),
-
SynNode::ToggleItalic => {
self.style.text.italic = !self.style.text.italic;
decorate(self, Decoration::Italic);
@@ -71,18 +80,8 @@ impl<'a> TreeLayouter<'a> {
decorate(self, Decoration::Bold);
}
- SynNode::Text(text) => {
- if self.style.text.italic {
- decorate(self, Decoration::Italic);
- }
- if self.style.text.bolder {
- decorate(self, Decoration::Bold);
- }
- self.layout_text(text).await;
- }
-
- SynNode::Raw(raw) => self.layout_raw(raw).await,
SynNode::Heading(heading) => self.layout_heading(heading).await,
+ SynNode::Raw(raw) => self.layout_raw(raw).await,
SynNode::Expr(expr) => {
self.layout_expr(expr.span_with(node.span)).await;
@@ -116,7 +115,7 @@ impl<'a> TreeLayouter<'a> {
async fn layout_heading(&mut self, heading: &NodeHeading) {
let style = self.style.text.clone();
- self.style.text.font_scale *= 1.5 - 0.1 * heading.level.v.min(5) as f64;
+ self.style.text.font_scale *= 1.5 - 0.1 * heading.level.v as f64;
self.style.text.bolder = true;
self.layout_parbreak();
diff --git a/src/library/boxed.rs b/src/library/boxed.rs
index e02b8c0c..85025264 100644
--- a/src/library/boxed.rs
+++ b/src/library/boxed.rs
@@ -13,7 +13,7 @@ pub async fn boxed(
) -> Pass<Value> {
let mut f = Feedback::new();
- let content = args.take::<SynTree>().unwrap_or(SynTree::new());
+ let content = args.take::<SynTree>().unwrap_or_default();
ctx.base = ctx.spaces[0].size;
ctx.spaces.truncate(1);
diff --git a/src/library/mod.rs b/src/library/mod.rs
index a5fdfc4c..43f74318 100644
--- a/src/library/mod.rs
+++ b/src/library/mod.rs
@@ -14,9 +14,7 @@ pub use font::*;
pub use page::*;
pub use spacing::*;
-use std::rc::Rc;
-
-use crate::eval::Scope;
+use crate::eval::{FuncValue, Scope};
use crate::prelude::*;
macro_rules! std {
@@ -32,7 +30,7 @@ macro_rules! std {
macro_rules! wrap {
($func:expr) => {
- Rc::new(|name, args, ctx| Box::pin($func(name, args, ctx)))
+ FuncValue::new(|name, args, ctx| Box::pin($func(name, args, ctx)))
};
}
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index cc0b6378..2f34357c 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -1,11 +1,13 @@
//! Parsing and tokenization.
mod lines;
+mod parser;
mod resolve;
mod scanner;
mod tokens;
pub use lines::*;
+pub use parser::*;
pub use resolve::*;
pub use scanner::*;
pub use tokens::*;
@@ -15,634 +17,469 @@ use std::str::FromStr;
use crate::color::RgbaColor;
use crate::eval::DictKey;
use crate::syntax::*;
-use crate::{Feedback, Pass};
+use crate::Pass;
/// Parse a string of source code.
pub fn parse(src: &str) -> Pass<SynTree> {
- Parser::new(src).parse()
+ let mut p = Parser::new(src);
+ Pass::new(tree(&mut p), p.finish())
}
-struct Parser<'s> {
- tokens: Tokens<'s>,
- peeked: Option<Option<Spanned<Token<'s>>>>,
- delimiters: Vec<(Pos, Token<'static>)>,
- at_block_or_line_start: bool,
- feedback: Feedback,
-}
-
-impl<'s> Parser<'s> {
- fn new(src: &'s str) -> Self {
- Self {
- tokens: Tokens::new(src, TokenMode::Body),
- peeked: None,
- delimiters: vec![],
- at_block_or_line_start: true,
- feedback: Feedback::new(),
+/// Parse a syntax tree.
+fn tree(p: &mut Parser) -> SynTree {
+ // We keep track of whether we are at the start of a block or paragraph
+ // to know whether headings are allowed.
+ let mut at_start = true;
+ let mut tree = vec![];
+ while !p.eof() {
+ if let Some(node) = node(p, at_start) {
+ if node.v == SynNode::Parbreak {
+ at_start = true;
+ } else if node.v != SynNode::Space {
+ at_start = false;
+ }
+ tree.push(node);
}
}
-
- fn parse(mut self) -> Pass<SynTree> {
- let tree = self.parse_body_contents();
- Pass::new(tree, self.feedback)
- }
+ tree
}
-// Typesetting content.
-impl Parser<'_> {
- fn parse_body_contents(&mut self) -> SynTree {
- let mut tree = SynTree::new();
-
- self.at_block_or_line_start = true;
- while !self.eof() {
- if let Some(node) = self.parse_node() {
- tree.push(node);
+/// Parse a syntax node.
+fn node(p: &mut Parser, at_start: bool) -> Option<Spanned<SynNode>> {
+ let token = p.eat()?;
+ let span = token.span;
+ Some(match token.v {
+ // Spaces.
+ Token::Space(newlines) => {
+ if newlines < 2 {
+ SynNode::Space.span_with(span)
+ } else {
+ SynNode::Parbreak.span_with(span)
}
}
-
- tree
- }
-
- fn parse_node(&mut self) -> Option<Spanned<SynNode>> {
- let token = self.peek()?;
- let end = Span::at(token.span.end);
-
- // Set block or line start to false because most nodes have that effect, but
- // remember the old value to actually check it for hashtags and because comments
- // and spaces want to retain it.
- let was_at_block_or_line_start = self.at_block_or_line_start;
- self.at_block_or_line_start = false;
-
- Some(match token.v {
- // Starting from two newlines counts as a paragraph break, a single
- // newline does not.
- Token::Space(n) => {
- if n == 0 {
- self.at_block_or_line_start = was_at_block_or_line_start;
- } else if n >= 1 {
- self.at_block_or_line_start = true;
- }
-
- self.with_span(if n >= 2 { SynNode::Parbreak } else { SynNode::Spacing })
- }
-
- Token::LineComment(_) | Token::BlockComment(_) => {
- self.at_block_or_line_start = was_at_block_or_line_start;
- self.eat();
- return None;
- }
-
- Token::LeftBracket => {
- let call = self.parse_bracket_call(false);
- self.at_block_or_line_start = false;
- call.map(|c| SynNode::Expr(Expr::Call(c)))
- }
-
- Token::Star => self.with_span(SynNode::ToggleBolder),
- Token::Underscore => self.with_span(SynNode::ToggleItalic),
- Token::Backslash => self.with_span(SynNode::Linebreak),
-
- Token::Hashtag if was_at_block_or_line_start => {
- self.parse_heading().map(SynNode::Heading)
- }
-
- Token::Raw { raw, backticks, terminated } => {
- if !terminated {
- error!(@self.feedback, end, "expected backtick(s)");
- }
-
- let raw = resolve::resolve_raw(raw, backticks);
- self.with_span(SynNode::Raw(raw))
- }
-
- Token::Text(text) => self.with_span(SynNode::Text(text.to_string())),
- Token::Hashtag => self.with_span(SynNode::Text("#".to_string())),
-
- Token::UnicodeEscape { sequence, terminated } => {
- if !terminated {
- error!(@self.feedback, end, "expected closing brace");
- }
-
- if let Some(c) = resolve::resolve_hex(sequence) {
- self.with_span(SynNode::Text(c.to_string()))
- } else {
- error!(@self.feedback, token.span, "invalid unicode escape sequence");
- // TODO: Decide whether to render the escape sequence.
- self.eat();
- return None;
- }
- }
-
- unexpected => {
- error!(@self.feedback, token.span, "unexpected {}", unexpected.name());
- self.eat();
- return None;
+ Token::Text(text) => SynNode::Text(text.into()).span_with(span),
+
+ // Comments.
+ Token::LineComment(_) | Token::BlockComment(_) => return None,
+
+ // Markup.
+ Token::Star => SynNode::ToggleBolder.span_with(span),
+ Token::Underscore => SynNode::ToggleItalic.span_with(span),
+ Token::Backslash => SynNode::Linebreak.span_with(span),
+ Token::Hashtag => {
+ if at_start {
+ heading(p, span.start).map(SynNode::Heading)
+ } else {
+ SynNode::Text(p.get(span).into()).span_with(span)
}
- })
- }
-
- fn parse_heading(&mut self) -> Spanned<NodeHeading> {
- let start = self.pos();
- self.assert(Token::Hashtag);
-
- let mut level = 0;
- while self.peekv() == Some(Token::Hashtag) {
- level += 1;
- self.eat();
}
+ Token::Raw(token) => raw(p, token, span).map(SynNode::Raw),
+ Token::UnicodeEscape(token) => unicode_escape(p, token, span).map(SynNode::Text),
- let span = Span::new(start, self.pos());
- let level = level.span_with(span);
-
- if level.v > 5 {
- warning!(
- @self.feedback, level.span,
- "section depth larger than 6 has no effect",
- );
+ // Functions.
+ Token::LeftBracket => {
+ p.jump(span.start);
+ bracket_call(p).map(Expr::Call).map(SynNode::Expr)
}
- self.skip_ws();
-
- let mut tree = SynTree::new();
- while !self.eof() && !matches!(self.peekv(), Some(Token::Space(n)) if n >= 1) {
- if let Some(node) = self.parse_node() {
- tree.push(node);
- }
+ // Bad tokens.
+ _ => {
+ p.diag_unexpected(token);
+ return None;
}
-
- let span = Span::new(start, self.pos());
- NodeHeading { level, contents: tree }.span_with(span)
- }
+ })
}
-// Function calls.
-impl Parser<'_> {
- fn parse_bracket_call(&mut self, chained: bool) -> Spanned<ExprCall> {
- let before_bracket = self.pos();
- if !chained {
- self.start_group(Group::Bracket);
- self.tokens.push_mode(TokenMode::Header);
+/// Parse a heading.
+fn heading(p: &mut Parser, start: Pos) -> Spanned<NodeHeading> {
+ // Parse the section depth.
+ let count = p.eat_while(|c| c == Token::Hashtag);
+ let span = (start, p.pos());
+ let level = (count.min(5) as u8).span_with(span);
+ if count > 5 {
+ p.diag(warning!(span, "section depth larger than 6 has no effect"));
+ }
+
+ // Parse the heading contents.
+ p.skip_white();
+ let mut contents = vec![];
+ while p.check(|t| !matches!(t, Token::Space(n) if n >= 1)) {
+ if let Some(node) = node(p, false) {
+ contents.push(node);
}
+ }
- let before_name = self.pos();
- self.start_group(Group::Subheader);
- self.skip_ws();
- let name = self.parse_ident().unwrap_or_else(|| {
- self.expected_found_or_at("function name", before_name);
- Ident(String::new()).span_with(Span::at(before_name))
- });
-
- self.skip_ws();
-
- let mut args = match self.eatv() {
- Some(Token::Colon) => self.parse_dict_contents().0,
- Some(_) => {
- self.expected_at("colon", name.span.end);
- while self.eat().is_some() {}
- LitDict::default()
- }
- None => LitDict::default(),
- };
-
- self.end_group();
- self.skip_ws();
- let (has_chained_child, end) = if self.peek().is_some() {
- let item = self.parse_bracket_call(true);
- let span = item.span;
- let tree = vec![item.map(|c| SynNode::Expr(Expr::Call(c)))];
- let expr = Expr::Lit(Lit::Content(tree));
- args.0.push(LitDictEntry { key: None, value: expr.span_with(span) });
- (true, span.end)
- } else {
- self.tokens.pop_mode();
- (false, self.end_group().end)
- };
-
- let start = if chained { before_name } else { before_bracket };
- let mut span = Span::new(start, end);
-
- if self.check(Token::LeftBracket) && !has_chained_child {
- self.start_group(Group::Bracket);
- self.tokens.push_mode(TokenMode::Body);
- let body = self.parse_body_contents();
- self.tokens.pop_mode();
- let body_span = self.end_group();
+ NodeHeading { level, contents }.span_with((start, p.pos()))
+}
- let expr = Expr::Lit(Lit::Content(body));
- args.0.push(LitDictEntry {
- key: None,
- value: expr.span_with(body_span),
- });
- span.expand(body_span);
- }
+/// Parse a raw block.
+fn raw(p: &mut Parser, token: TokenRaw, span: Span) -> Spanned<NodeRaw> {
+ let raw = resolve::resolve_raw(token.text, token.backticks);
- ExprCall { name, args }.span_with(span)
+ if !token.terminated {
+ p.diag(error!(span.end, "expected backtick(s)"));
}
- fn parse_paren_call(&mut self, name: Spanned<Ident>) -> Spanned<ExprCall> {
- self.start_group(Group::Paren);
- let args = self.parse_dict_contents().0;
- let args_span = self.end_group();
- let span = Span::merge(name.span, args_span);
- ExprCall { name, args }.span_with(span)
- }
+ raw.span_with(span)
}
-// Dicts.
-impl Parser<'_> {
- fn parse_dict_contents(&mut self) -> (LitDict, bool) {
- let mut dict = LitDict::default();
- let mut comma_and_keyless = true;
-
- while {
- self.skip_ws();
- !self.eof()
- } {
- let (key, value) = if let Some(ident) = self.parse_ident() {
- self.skip_ws();
-
- match self.peekv() {
- Some(Token::Equals) => {
- self.eat();
- self.skip_ws();
- if let Some(value) = self.parse_expr() {
- (Some(ident.map(|id| DictKey::Str(id.0))), value)
- } else {
- self.expected("value");
- continue;
- }
- }
-
- Some(Token::LeftParen) => {
- let call = self.parse_paren_call(ident);
- (None, call.map(Expr::Call))
- }
-
- _ => (None, ident.map(|id| Expr::Lit(Lit::Ident(id)))),
- }
- } else if let Some(value) = self.parse_expr() {
- (None, value)
- } else {
- self.expected("value");
- continue;
- };
-
- if let Some(key) = &key {
- comma_and_keyless = false;
- self.feedback
- .decorations
- .push(Decoration::DictKey.span_with(key.span));
- }
-
- let behind = value.span.end;
- dict.0.push(LitDictEntry { key, value });
+/// Parse a unicode escape sequence.
+fn unicode_escape(
+ p: &mut Parser,
+ token: TokenUnicodeEscape,
+ span: Span,
+) -> Spanned<String> {
+ let text = if let Some(c) = resolve::resolve_hex(token.sequence) {
+ c.to_string()
+ } else {
+ // Print out the escape sequence verbatim if it is
+ // invalid.
+ p.diag(error!(span, "invalid unicode escape sequence"));
+ p.get(span).into()
+ };
+
+ if !token.terminated {
+ p.diag(error!(span.end, "expected closing brace"));
+ }
+
+ text.span_with(span)
+}
- if {
- self.skip_ws();
- self.eof()
- } {
- break;
- }
+/// Parse a bracketed function call.
+fn bracket_call(p: &mut Parser) -> Spanned<ExprCall> {
+ let before_bracket = p.pos();
+ p.start_group(Group::Bracket);
+ p.push_mode(TokenMode::Header);
- self.expect_at(Token::Comma, behind);
- comma_and_keyless = false;
- }
+ // One header is guaranteed, but there may be more (through chaining).
+ let mut outer = vec![];
+ let mut inner = bracket_subheader(p);
- let coercable = comma_and_keyless && !dict.0.is_empty();
- (dict, coercable)
+ while p.eat_if(Token::Chain).is_some() {
+ outer.push(inner);
+ inner = bracket_subheader(p);
}
-}
-// Expressions and values.
-impl Parser<'_> {
- fn parse_expr(&mut self) -> Option<Spanned<Expr>> {
- self.parse_binops("summand", Self::parse_term, |token| match token {
- Token::Plus => Some(BinOp::Add),
- Token::Hyphen => Some(BinOp::Sub),
- _ => None,
- })
- }
+ p.pop_mode();
+ p.end_group();
- fn parse_term(&mut self) -> Option<Spanned<Expr>> {
- self.parse_binops("factor", Self::parse_factor, |token| match token {
- Token::Star => Some(BinOp::Mul),
- Token::Slash => Some(BinOp::Div),
- _ => None,
- })
+ if p.peek() == Some(Token::LeftBracket) {
+ let expr = bracket_body(p).map(Lit::Content).map(Expr::Lit);
+ inner.span.expand(expr.span);
+ inner.v.args.0.push(LitDictEntry { key: None, expr });
}
- /// Parse expression of the form `<operand> (<op> <operand>)*`.
- fn parse_binops(
- &mut self,
- operand_name: &str,
- mut parse_operand: impl FnMut(&mut Self) -> Option<Spanned<Expr>>,
- mut parse_op: impl FnMut(Token) -> Option<BinOp>,
- ) -> Option<Spanned<Expr>> {
- let mut left = parse_operand(self)?;
-
- self.skip_ws();
- while let Some(token) = self.peek() {
- if let Some(op) = parse_op(token.v) {
- self.eat();
- self.skip_ws();
-
- if let Some(right) = parse_operand(self) {
- let span = Span::merge(left.span, right.span);
- let expr = Expr::Binary(ExprBinary {
- lhs: left.map(Box::new),
- op: op.span_with(token.span),
- rhs: right.map(Box::new),
- });
- left = expr.span_with(span);
- self.skip_ws();
- continue;
- }
+ while let Some(mut top) = outer.pop() {
+ let span = inner.span;
+ let node = inner.map(Expr::Call).map(SynNode::Expr);
+ let expr = Expr::Lit(Lit::Content(vec![node])).span_with(span);
+ top.v.args.0.push(LitDictEntry { key: None, expr });
+ inner = top;
+ }
- error!(
- @self.feedback, Span::merge(left.span, token.span),
- "missing right {}", operand_name,
- );
- }
- break;
- }
+ inner.v.span_with((before_bracket, p.pos()))
+}
- Some(left)
- }
+/// Parse one subheader of a bracketed function call.
+fn bracket_subheader(p: &mut Parser) -> Spanned<ExprCall> {
+ p.start_group(Group::Subheader);
+ let before_name = p.pos();
- fn parse_factor(&mut self) -> Option<Spanned<Expr>> {
- if let Some(hyph) = self.check_eat(Token::Hyphen) {
- self.skip_ws();
- if let Some(factor) = self.parse_factor() {
- let span = Span::merge(hyph.span, factor.span);
- let expr = Expr::Unary(ExprUnary {
- op: UnOp::Neg.span_with(hyph.span),
- expr: factor.map(Box::new),
- });
- Some(expr.span_with(span))
- } else {
- error!(@self.feedback, hyph.span, "dangling minus");
- None
- }
+ p.skip_white();
+ let name = ident(p).unwrap_or_else(|| {
+ if p.eof() {
+ p.diag_expected_at("function name", before_name);
} else {
- self.parse_value()
+ p.diag_expected("function name");
}
- }
-
- fn parse_value(&mut self) -> Option<Spanned<Expr>> {
- let Spanned { v: token, span } = self.peek()?;
- Some(match token {
- // This could be a function call or an identifier.
- Token::Ident(id) => {
- let name = Ident(id.to_string()).span_with(span);
- self.eat();
- self.skip_ws();
- if self.check(Token::LeftParen) {
- self.parse_paren_call(name).map(Expr::Call)
- } else {
- name.map(|n| Expr::Lit(Lit::Ident(n)))
- }
- }
-
- Token::Str { string, terminated } => {
- if !terminated {
- self.expected_at("quote", span.end);
- }
- self.with_span(Expr::Lit(Lit::Str(resolve::resolve_string(string))))
- }
-
- Token::Bool(b) => self.with_span(Expr::Lit(Lit::Bool(b))),
- Token::Number(n) => self.with_span(Expr::Lit(Lit::Float(n))),
- Token::Length(s) => self.with_span(Expr::Lit(Lit::Length(s))),
- Token::Hex(s) => {
- let color = RgbaColor::from_str(s).unwrap_or_else(|_| {
- // Heal color by assuming black.
- error!(@self.feedback, span, "invalid color");
- RgbaColor::new_healed(0, 0, 0, 255)
- });
- self.with_span(Expr::Lit(Lit::Color(color)))
- }
-
- // This could be a dictionary or a parenthesized expression. We
- // parse as a dictionary in any case and coerce into a value if
- // that's coercable (length 1 and no trailing comma).
- Token::LeftParen => {
- self.start_group(Group::Paren);
- let (dict, coercable) = self.parse_dict_contents();
- let span = self.end_group();
-
- let expr = if coercable {
- dict.0.into_iter().next().expect("dict is coercable").value.v
- } else {
- Expr::Lit(Lit::Dict(dict))
- };
-
- expr.span_with(span)
- }
-
- // This is a content expression.
- Token::LeftBrace => {
- self.start_group(Group::Brace);
- self.tokens.push_mode(TokenMode::Body);
- let tree = self.parse_body_contents();
- self.tokens.pop_mode();
- let span = self.end_group();
- Expr::Lit(Lit::Content(tree)).span_with(span)
- }
+ Ident(String::new()).span_with(before_name)
+ });
+
+ p.skip_white();
+ let args = if p.eat_if(Token::Colon).is_some() {
+ dict_contents(p).0
+ } else {
+ // Ignore the rest if there's no colon.
+ if !p.eof() {
+ p.diag_expected_at("colon", p.pos());
+ }
+ p.eat_while(|_| true);
+ LitDict::new()
+ };
- // This is a bracketed function call.
- Token::LeftBracket => {
- let call = self.parse_bracket_call(false);
- let tree = vec![call.map(|c| SynNode::Expr(Expr::Call(c)))];
- Expr::Lit(Lit::Content(tree)).span_with(span)
- }
+ ExprCall { name, args }.span_with(p.end_group())
+}
- _ => return None,
- })
- }
+/// Parse the body of a bracketed function call.
+fn bracket_body(p: &mut Parser) -> Spanned<SynTree> {
+ p.start_group(Group::Bracket);
+ p.push_mode(TokenMode::Body);
+ let tree = tree(p);
+ p.pop_mode();
+ tree.span_with(p.end_group())
+}
- fn parse_ident(&mut self) -> Option<Spanned<Ident>> {
- self.peek().and_then(|token| match token.v {
- Token::Ident(id) => Some(self.with_span(Ident(id.to_string()))),
- _ => None,
- })
- }
+/// Parse an expression: `term (+ term)*`.
+fn expr(p: &mut Parser) -> Option<Spanned<Expr>> {
+ binops(p, "summand", term, |token| match token {
+ Token::Plus => Some(BinOp::Add),
+ Token::Hyphen => Some(BinOp::Sub),
+ _ => None,
+ })
}
-// Error handling.
-impl Parser<'_> {
- fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool {
- if self.check(token) {
- self.eat();
- true
- } else {
- self.expected_at(token.name(), pos);
- false
- }
- }
+/// Parse a term: `factor (* factor)*`.
+fn term(p: &mut Parser) -> Option<Spanned<Expr>> {
+ binops(p, "factor", factor, |token| match token {
+ Token::Star => Some(BinOp::Mul),
+ Token::Slash => Some(BinOp::Div),
+ _ => None,
+ })
+}
- fn expected(&mut self, thing: &str) {
- if let Some(found) = self.eat() {
- error!(
- @self.feedback, found.span,
- "expected {}, found {}", thing, found.v.name(),
- );
+/// Parse binary operations of the from `a (<op> b)*`.
+fn binops(
+ p: &mut Parser,
+ operand_name: &str,
+ operand: fn(&mut Parser) -> Option<Spanned<Expr>>,
+ op: fn(Token) -> Option<BinOp>,
+) -> Option<Spanned<Expr>> {
+ let mut lhs = operand(p)?;
+
+ loop {
+ p.skip_white();
+ if let Some(op) = p.eat_map(op) {
+ p.skip_white();
+
+ if let Some(rhs) = operand(p) {
+ let span = lhs.span.join(rhs.span);
+ let expr = Expr::Binary(ExprBinary {
+ lhs: lhs.map(Box::new),
+ op,
+ rhs: rhs.map(Box::new),
+ });
+ lhs = expr.span_with(span);
+ p.skip_white();
+ } else {
+ let span = lhs.span.join(op.span);
+ p.diag(error!(span, "missing right {}", operand_name));
+ break;
+ }
} else {
- error!(@self.feedback, Span::at(self.pos()), "expected {}", thing);
+ break;
}
}
- fn expected_at(&mut self, thing: &str, pos: Pos) {
- error!(@self.feedback, Span::at(pos), "expected {}", thing);
- }
+ Some(lhs)
+}
- fn expected_found_or_at(&mut self, thing: &str, pos: Pos) {
- if self.eof() {
- self.expected_at(thing, pos)
+/// Parse a factor of the form `-?value`.
+fn factor(p: &mut Parser) -> Option<Spanned<Expr>> {
+ if let Some(op) = p.eat_map(|token| match token {
+ Token::Hyphen => Some(UnOp::Neg),
+ _ => None,
+ }) {
+ p.skip_white();
+ if let Some(expr) = factor(p) {
+ let span = op.span.join(expr.span);
+ let expr = Expr::Unary(ExprUnary { op, expr: expr.map(Box::new) });
+ Some(expr.span_with(span))
} else {
- self.expected(thing);
+ p.diag(error!(op.span, "missing factor"));
+ None
}
+ } else {
+ value(p)
}
}
-// Parsing primitives.
-impl<'s> Parser<'s> {
- fn start_group(&mut self, group: Group) {
- let start = self.pos();
- if let Some(start_token) = group.start() {
- self.assert(start_token);
+/// Parse a value.
+fn value(p: &mut Parser) -> Option<Spanned<Expr>> {
+ let Spanned { v: token, span } = p.eat()?;
+ Some(match token {
+ // Bracketed function call.
+ Token::LeftBracket => {
+ p.jump(span.start);
+ let call = bracket_call(p);
+ let span = call.span;
+ let node = call.map(Expr::Call).map(SynNode::Expr);
+ Expr::Lit(Lit::Content(vec![node])).span_with(span)
}
- self.delimiters.push((start, group.end()));
- }
- fn end_group(&mut self) -> Span {
- let peeked = self.peek();
-
- let (start, end_token) = self.delimiters.pop().expect("group was not started");
+ // Content expression.
+ Token::LeftBrace => {
+ p.jump(span.start);
+ content(p).map(Lit::Content).map(Expr::Lit)
+ }
- if end_token != Token::Chain && peeked != None {
- self.delimiters.push((start, end_token));
- assert_eq!(peeked, None, "unfinished group");
+ // Dictionary or just a parenthesized expression.
+ Token::LeftParen => {
+ p.jump(span.start);
+ parenthesized(p)
}
- match self.peeked.unwrap() {
- Some(token) if token.v == end_token => {
- self.peeked = None;
- Span::new(start, token.span.end)
- }
- _ => {
- let end = self.pos();
- if end_token != Token::Chain {
- error!(
- @self.feedback, Span::at(end),
- "expected {}", end_token.name(),
- );
- }
- Span::new(start, end)
+ // Function or just ident.
+ Token::Ident(id) => {
+ let ident = Ident(id.into()).span_with(span);
+
+ p.skip_white();
+ if p.peek() == Some(Token::LeftParen) {
+ paren_call(p, ident).map(Expr::Call)
+ } else {
+ ident.map(Lit::Ident).map(Expr::Lit)
}
}
- }
- fn skip_ws(&mut self) {
- while matches!(
- self.peekv(),
- Some(Token::Space(_)) |
- Some(Token::LineComment(_)) |
- Some(Token::BlockComment(_))
- ) {
- self.eat();
+ // Atomic values.
+ Token::Bool(b) => Expr::Lit(Lit::Bool(b)).span_with(span),
+ Token::Number(f) => Expr::Lit(Lit::Float(f)).span_with(span),
+ Token::Length(l) => Expr::Lit(Lit::Length(l)).span_with(span),
+ Token::Hex(hex) => color(p, hex, span).map(Lit::Color).map(Expr::Lit),
+ Token::Str(token) => string(p, token, span).map(Lit::Str).map(Expr::Lit),
+
+ // No value.
+ _ => {
+ p.jump(span.start);
+ return None;
}
- }
+ })
+}
- fn eatv(&mut self) -> Option<Token<'s>> {
- self.eat().map(Spanned::value)
- }
+// Parse a content expression: `{...}`.
+fn content(p: &mut Parser) -> Spanned<SynTree> {
+ p.start_group(Group::Brace);
+ p.push_mode(TokenMode::Body);
+ let tree = tree(p);
+ p.pop_mode();
+ tree.span_with(p.end_group())
+}
- fn peekv(&mut self) -> Option<Token<'s>> {
- self.peek().map(Spanned::value)
- }
+/// Parse a parenthesized expression: `(a + b)`, `(1, key="value").
+fn parenthesized(p: &mut Parser) -> Spanned<Expr> {
+ p.start_group(Group::Paren);
+ let (dict, coercable) = dict_contents(p);
+ let expr = if coercable {
+ dict.0.into_iter().next().expect("dict is coercable").expr.v
+ } else {
+ Expr::Lit(Lit::Dict(dict))
+ };
+ expr.span_with(p.end_group())
+}
- fn assert(&mut self, token: Token<'_>) {
- assert!(self.check_eat(token).is_some());
- }
+/// Parse a parenthesized function call.
+fn paren_call(p: &mut Parser, name: Spanned<Ident>) -> Spanned<ExprCall> {
+ p.start_group(Group::Paren);
+ let args = dict_contents(p).0;
+ let span = name.span.join(p.end_group());
+ ExprCall { name, args }.span_with(span)
+}
- fn check_eat(&mut self, token: Token<'_>) -> Option<Spanned<Token<'s>>> {
- if self.check(token) { self.eat() } else { None }
- }
+/// Parse the contents of a dictionary.
+fn dict_contents(p: &mut Parser) -> (LitDict, bool) {
+ let mut dict = LitDict::new();
+ let mut comma_and_keyless = true;
- /// Checks if the next token is of some kind
- fn check(&mut self, token: Token<'_>) -> bool {
- self.peekv() == Some(token)
- }
+ loop {
+ p.skip_white();
+ if p.eof() {
+ break;
+ }
- fn with_span<T>(&mut self, v: T) -> Spanned<T> {
- let span = self.eat().expect("expected token").span;
- v.span_with(span)
- }
+ let entry = if let Some(entry) = dict_entry(p) {
+ entry
+ } else {
+ p.diag_expected("value");
+ continue;
+ };
- fn eof(&mut self) -> bool {
- self.peek().is_none()
- }
+ if let Some(key) = &entry.key {
+ comma_and_keyless = false;
+ p.deco(Decoration::DictKey.span_with(key.span));
+ }
- fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
- let token = self.peek()?;
- self.peeked = None;
- Some(token)
- }
+ let behind = entry.expr.span.end;
+ dict.0.push(entry);
- fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
- let tokens = &mut self.tokens;
- let token = (*self.peeked.get_or_insert_with(|| tokens.next()))?;
+ p.skip_white();
+ if p.eof() {
+ break;
+ }
- // Check for unclosed groups.
- if Group::is_delimiter(token.v) {
- if self.delimiters.iter().rev().any(|&(_, end)| token.v == end) {
- return None;
- }
+ if p.eat_if(Token::Comma).is_none() {
+ p.diag_expected_at("comma", behind);
}
- Some(token)
+ comma_and_keyless = false;
}
- fn pos(&self) -> Pos {
- self.peeked
- .flatten()
- .map(|s| s.span.start)
- .unwrap_or_else(|| self.tokens.pos())
- }
+ let coercable = comma_and_keyless && !dict.0.is_empty();
+ (dict, coercable)
}
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-enum Group {
- Paren,
- Bracket,
- Brace,
- Subheader,
-}
+/// Parse a single entry in a dictionary.
+fn dict_entry(p: &mut Parser) -> Option<LitDictEntry> {
+ if let Some(ident) = ident(p) {
+ p.skip_white();
+ match p.peek() {
+ // Key-value pair.
+ Some(Token::Equals) => {
+ p.eat_assert(Token::Equals);
+ p.skip_white();
+ if let Some(expr) = expr(p) {
+ Some(LitDictEntry {
+ key: Some(ident.map(|id| DictKey::Str(id.0))),
+ expr,
+ })
+ } else {
+ None
+ }
+ }
-impl Group {
- fn is_delimiter(token: Token<'_>) -> bool {
- matches!(
- token,
- Token::RightParen | Token::RightBracket | Token::RightBrace | Token::Chain
- )
- }
+ // Function call.
+ Some(Token::LeftParen) => Some(LitDictEntry {
+ key: None,
+ expr: paren_call(p, ident).map(Expr::Call),
+ }),
- fn start(self) -> Option<Token<'static>> {
- match self {
- Self::Paren => Some(Token::LeftParen),
- Self::Bracket => Some(Token::LeftBracket),
- Self::Brace => Some(Token::LeftBrace),
- Self::Subheader => None,
+ // Just an identifier.
+ _ => Some(LitDictEntry {
+ key: None,
+ expr: ident.map(|id| Expr::Lit(Lit::Ident(id))),
+ }),
}
+ } else if let Some(expr) = expr(p) {
+ Some(LitDictEntry { key: None, expr })
+ } else {
+ None
}
+}
- fn end(self) -> Token<'static> {
- match self {
- Self::Paren => Token::RightParen,
- Self::Bracket => Token::RightBracket,
- Self::Brace => Token::RightBrace,
- Self::Subheader => Token::Chain,
- }
+/// Parse an identifier.
+fn ident(p: &mut Parser) -> Option<Spanned<Ident>> {
+ p.eat_map(|token| match token {
+ Token::Ident(id) => Some(Ident(id.into())),
+ _ => None,
+ })
+}
+
+/// Parse a color.
+fn color(p: &mut Parser, hex: &str, span: Span) -> Spanned<RgbaColor> {
+ RgbaColor::from_str(hex)
+ .unwrap_or_else(|_| {
+ // Heal color by assuming black.
+ p.diag(error!(span, "invalid color"));
+ RgbaColor::new_healed(0, 0, 0, 255)
+ })
+ .span_with(span)
+}
+
+/// Parse a string.
+fn string(p: &mut Parser, token: TokenStr, span: Span) -> Spanned<String> {
+ if !token.terminated {
+ p.diag_expected_at("quote", span.end);
}
+
+ resolve::resolve_string(token.string).span_with(span)
}
#[cfg(test)]
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
new file mode 100644
index 00000000..d0735931
--- /dev/null
+++ b/src/parse/parser.rs
@@ -0,0 +1,292 @@
+use std::fmt::{self, Debug, Formatter};
+
+use super::{Scanner, TokenMode, Tokens};
+use crate::diagnostic::Diagnostic;
+use crate::syntax::{Decoration, Pos, Span, SpanWith, Spanned, Token};
+use crate::Feedback;
+
+/// A convenient token-based parser.
+pub struct Parser<'s> {
+ tokens: Tokens<'s>,
+ modes: Vec<TokenMode>,
+ groups: Vec<(Pos, Group)>,
+ f: Feedback,
+}
+
+impl<'s> Parser<'s> {
+ /// Create a new parser for the source string.
+ pub fn new(src: &'s str) -> Self {
+ Self {
+ tokens: Tokens::new(src, TokenMode::Body),
+ modes: vec![],
+ groups: vec![],
+ f: Feedback::new(),
+ }
+ }
+
+ /// Finish parsing and return the accumulated feedback.
+ pub fn finish(self) -> Feedback {
+ self.f
+ }
+
+ /// Add a diagnostic to the feedback.
+ pub fn diag(&mut self, diag: Spanned<Diagnostic>) {
+ self.f.diagnostics.push(diag);
+ }
+
+ /// Eat the next token and add a diagnostic that it was not expected thing.
+ pub fn diag_expected(&mut self, thing: &str) {
+ if let Some(found) = self.eat() {
+ self.diag(error!(
+ found.span,
+ "expected {}, found {}",
+ thing,
+ found.v.name(),
+ ));
+ } else {
+ self.diag_expected_at(thing, self.pos());
+ }
+ }
+
+ /// Add a diagnostic that the thing was expected at the given position.
+ pub fn diag_expected_at(&mut self, thing: &str, pos: Pos) {
+ self.diag(error!(pos, "expected {}", thing));
+ }
+
+ /// Add a diagnostic that the given token was unexpected.
+ pub fn diag_unexpected(&mut self, token: Spanned<Token>) {
+ self.diag(error!(token.span, "unexpected {}", token.v.name()));
+ }
+
+ /// Add a decoration to the feedback.
+ pub fn deco(&mut self, deco: Spanned<Decoration>) {
+ self.f.decorations.push(deco);
+ }
+
+ /// Update the token mode and push the previous mode onto a stack.
+ pub fn push_mode(&mut self, mode: TokenMode) {
+ self.modes.push(self.tokens.mode());
+ self.tokens.set_mode(mode);
+ }
+
+ /// Pop the topmost token mode from the stack.
+ ///
+ /// # Panics
+ /// This panics if there is no mode on the stack.
+ pub fn pop_mode(&mut self) {
+ self.tokens.set_mode(self.modes.pop().expect("no pushed mode"));
+ }
+
+ /// Continues parsing in a group.
+ ///
+ /// When the end delimiter of the group is reached, all subsequent calls to
+ /// `eat()` and `peek()` return `None`. Parsing can only continue with
+ /// a matching call to `end_group`.
+ ///
+ /// # Panics
+ /// This panics if the next token does not start the given group.
+ pub fn start_group(&mut self, group: Group) {
+ let start = self.pos();
+ match group {
+ Group::Paren => self.eat_assert(Token::LeftParen),
+ Group::Bracket => self.eat_assert(Token::LeftBracket),
+ Group::Brace => self.eat_assert(Token::LeftBrace),
+ Group::Subheader => {}
+ }
+ self.groups.push((start, group));
+ }
+
+ /// Ends the parsing of a group and returns the span of the whole group.
+ ///
+ /// # Panics
+ /// This panics if no group was started.
+ pub fn end_group(&mut self) -> Span {
+ debug_assert_eq!(self.peek(), None, "unfinished group");
+
+ let (start, group) = self.groups.pop().expect("unstarted group");
+ let end = match group {
+ Group::Paren => Some(Token::RightParen),
+ Group::Bracket => Some(Token::RightBracket),
+ Group::Brace => Some(Token::RightBrace),
+ Group::Subheader => None,
+ };
+
+ if let Some(token) = end {
+ let next = self.tokens.clone().next().map(|s| s.v);
+ if next == Some(token) {
+ self.tokens.next();
+ } else {
+ self.diag(error!(self.pos(), "expected {}", token.name()));
+ }
+ }
+
+ Span::new(start, self.pos())
+ }
+
+ /// Consume the next token.
+ pub fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
+ next_group_aware(&mut self.tokens, &self.groups)
+ }
+
+ /// Consume the next token if it is the given one.
+ pub fn eat_if(&mut self, t: Token) -> Option<Spanned<Token<'s>>> {
+ // Don't call eat() twice if it suceeds.
+ //
+ // TODO: Benchmark this vs. the naive version.
+ let before = self.pos();
+ let token = self.eat()?;
+ if token.v == t {
+ Some(token)
+ } else {
+ self.jump(before);
+ None
+ }
+ }
+
+ /// Consume the next token if the closure maps to `Some`.
+ pub fn eat_map<T>(
+ &mut self,
+ mut f: impl FnMut(Token<'s>) -> Option<T>,
+ ) -> Option<Spanned<T>> {
+ let before = self.pos();
+ let token = self.eat()?;
+ if let Some(t) = f(token.v) {
+ Some(t.span_with(token.span))
+ } else {
+ self.jump(before);
+ None
+ }
+ }
+
+ /// Consume the next token, debug-asserting that it is the given one.
+ pub fn eat_assert(&mut self, t: Token) {
+ let next = self.eat();
+ debug_assert_eq!(next.map(|s| s.v), Some(t));
+ }
+
+ /// Consume tokens while the condition is true.
+ ///
+ /// Returns how many tokens were eaten.
+ pub fn eat_while(&mut self, mut f: impl FnMut(Token<'s>) -> bool) -> usize {
+ self.eat_until(|t| !f(t))
+ }
+
+ /// Consume tokens until the condition is true.
+ ///
+ /// Returns how many tokens were eaten.
+ pub fn eat_until(&mut self, mut f: impl FnMut(Token<'s>) -> bool) -> usize {
+ let mut count = 0;
+ let mut before = self.pos();
+ while let Some(t) = self.eat() {
+ if f(t.v) {
+ // Undo the last eat by jumping. This prevents
+ // double-tokenization by not peeking all the time.
+ //
+ // TODO: Benchmark this vs. the naive peeking version.
+ self.jump(before);
+ break;
+ }
+ before = self.pos();
+ count += 1;
+ }
+ count
+ }
+
+ /// Peek at the next token without consuming it.
+ pub fn peek(&self) -> Option<Token<'s>> {
+ next_group_aware(&mut self.tokens.clone(), &self.groups).map(|s| s.v)
+ }
+
+ /// Checks whether the next token fulfills a condition.
+ ///
+ /// Returns `false` if there is no next token.
+ pub fn check(&self, f: impl FnMut(Token<'s>) -> bool) -> bool {
+ self.peek().map(f).unwrap_or(false)
+ }
+
+ /// Whether the there is no next token.
+ pub fn eof(&self) -> bool {
+ self.peek().is_none()
+ }
+
+ /// Skip whitespace tokens.
+ pub fn skip_white(&mut self) {
+ self.eat_while(|t| {
+ matches!(t,
+ Token::Space(_) |
+ Token::LineComment(_) |
+ Token::BlockComment(_))
+ });
+ }
+
+ /// The position in the string at which the last token ends and next token
+ /// will start.
+ pub fn pos(&self) -> Pos {
+ self.tokens.pos()
+ }
+
+ /// Jump to a position in the source string.
+ pub fn jump(&mut self, pos: Pos) {
+ self.tokens.jump(pos);
+ }
+
+ /// The full source string.
+ pub fn src(&self) -> &'s str {
+ self.scanner().src()
+ }
+
+ /// The part of the source string that is spanned by the given span.
+ pub fn get(&self, span: Span) -> &'s str {
+ self.scanner().get(span.start.to_usize() .. span.end.to_usize())
+ }
+
+ /// The underlying scanner.
+ pub fn scanner(&self) -> &Scanner<'s> {
+ self.tokens.scanner()
+ }
+}
+
+/// Wraps `tokens.next()`, but is group-aware.
+fn next_group_aware<'s>(
+ tokens: &mut Tokens<'s>,
+ groups: &[(Pos, Group)],
+) -> Option<Spanned<Token<'s>>> {
+ let pos = tokens.pos();
+ let token = tokens.next();
+
+ let group = match token?.v {
+ Token::RightParen => Group::Paren,
+ Token::RightBracket => Group::Bracket,
+ Token::RightBrace => Group::Brace,
+ Token::Chain => Group::Subheader,
+ _ => return token,
+ };
+
+ if groups.iter().rev().any(|&(_, g)| g == group) {
+ tokens.jump(pos);
+ None
+ } else {
+ token
+ }
+}
+
+impl Debug for Parser<'_> {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ let s = self.scanner();
+ write!(f, "Parser({}|{})", s.eaten(), s.rest())
+ }
+}
+
+/// A group, confined by optional start and end delimiters.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum Group {
+ /// A parenthesized group: `(...)`.
+ Paren,
+ /// A bracketed group: `[...]`.
+ Bracket,
+ /// A curly-braced group: `{...}`.
+ Brace,
+ /// A group ended by a chained subheader or a closing bracket:
+ /// `... >>`, `...]`.
+ Subheader,
+}
diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs
index f9919373..6036a74e 100644
--- a/src/parse/resolve.rs
+++ b/src/parse/resolve.rs
@@ -3,7 +3,7 @@
use super::{is_newline, Scanner};
use crate::syntax::{Ident, NodeRaw};
-/// Resolves all escape sequences in a string.
+/// Resolve all escape sequences in a string.
pub fn resolve_string(string: &str) -> String {
let mut out = String::with_capacity(string.len());
let mut s = Scanner::new(string);
@@ -48,10 +48,10 @@ pub fn resolve_hex(sequence: &str) -> Option<char> {
u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32)
}
-/// Resolves the language tag and trims the raw text.
-pub fn resolve_raw(raw: &str, backticks: usize) -> NodeRaw {
+/// Resolve the language tag and trims the raw text.
+pub fn resolve_raw(text: &str, backticks: usize) -> NodeRaw {
if backticks > 1 {
- let (tag, inner) = split_at_lang_tag(raw);
+ let (tag, inner) = split_at_lang_tag(text);
let (lines, had_newline) = trim_and_split_raw(inner);
NodeRaw {
lang: Ident::new(tag),
@@ -61,7 +61,7 @@ pub fn resolve_raw(raw: &str, backticks: usize) -> NodeRaw {
} else {
NodeRaw {
lang: None,
- lines: split_lines(raw),
+ lines: split_lines(text),
inline: true,
}
}
@@ -76,7 +76,7 @@ fn split_at_lang_tag(raw: &str) -> (&str, &str) {
)
}
-/// Trims raw text and splits it into lines.
+/// Trim raw text and splits it into lines.
///
/// Returns whether at least one newline was contained in `raw`.
fn trim_and_split_raw(raw: &str) -> (Vec<String>, bool) {
@@ -101,7 +101,7 @@ fn trim_and_split_raw(raw: &str) -> (Vec<String>, bool) {
(lines, had_newline)
}
-/// Splits a string into a vector of lines
+/// Split a string into a vector of lines
/// (respecting Unicode, Unix, Mac and Windows line breaks).
pub fn split_lines(text: &str) -> Vec<String> {
let mut s = Scanner::new(text);
@@ -147,8 +147,8 @@ mod tests {
#[test]
fn test_split_at_lang_tag() {
- fn test(raw: &str, lang: &str, inner: &str) {
- assert_eq!(split_at_lang_tag(raw), (lang, inner));
+ fn test(text: &str, lang: &str, inner: &str) {
+ assert_eq!(split_at_lang_tag(text), (lang, inner));
}
test("typst it!", "typst", " it!");
@@ -161,8 +161,8 @@ mod tests {
#[test]
fn test_trim_raw() {
- fn test(raw: &str, expected: Vec<&str>) {
- assert_eq!(trim_and_split_raw(raw).0, expected);
+ fn test(text: &str, expected: Vec<&str>) {
+ assert_eq!(trim_and_split_raw(text).0, expected);
}
test(" hi", vec!["hi"]);
@@ -178,8 +178,8 @@ mod tests {
#[test]
fn test_split_lines() {
- fn test(raw: &str, expected: Vec<&str>) {
- assert_eq!(split_lines(raw), expected);
+ fn test(text: &str, expected: Vec<&str>) {
+ assert_eq!(split_lines(text), expected);
}
test("raw\ntext", vec!["raw", "text"]);
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
index 9447222d..6ff8c801 100644
--- a/src/parse/scanner.rs
+++ b/src/parse/scanner.rs
@@ -4,7 +4,8 @@ use std::fmt::{self, Debug, Formatter};
use std::slice::SliceIndex;
use std::str::Chars;
-/// A low-level featureful char scanner.
+/// A low-level featureful char-based scanner.
+#[derive(Clone)]
pub struct Scanner<'s> {
src: &'s str,
iter: Chars<'s>,
@@ -98,24 +99,22 @@ impl<'s> Scanner<'s> {
/// Checks whether the next character fulfills a condition.
///
- /// Returns `false` is there is no next character.
+ /// Returns `false` if there is no next character.
pub fn check(&self, f: impl FnMut(char) -> bool) -> bool {
self.peek().map(f).unwrap_or(false)
}
- /// Go back to the where the index says.
- fn reset(&mut self) {
- self.iter = self.src[self.index ..].chars();
+ /// Whether the end of the source string is reached.
+ pub fn eof(&self) -> bool {
+ self.iter.as_str().is_empty()
}
-}
-impl<'s> Scanner<'s> {
- /// The current index in the string.
+ /// The current index in the source string.
pub fn index(&self) -> usize {
self.index
}
- /// The previous index in the string.
+ /// The previous index in the source string.
pub fn prev_index(&self) -> usize {
self.src[.. self.index]
.chars()
@@ -124,6 +123,17 @@ impl<'s> Scanner<'s> {
.unwrap_or(0)
}
+ /// Jump to an index in the source string.
+ pub fn jump(&mut self, index: usize) {
+ self.index = index;
+ self.reset();
+ }
+
+ /// The full source string.
+ pub fn src(&self) -> &'s str {
+ self.src
+ }
+
/// Slice a part out of the source string.
pub fn get<I>(&self, index: I) -> &'s str
where
@@ -132,11 +142,6 @@ impl<'s> Scanner<'s> {
&self.src[index]
}
- /// The full source string.
- pub fn src(&self) -> &'s str {
- self.src
- }
-
/// The full source string up to the current index.
pub fn eaten(&self) -> &'s str {
&self.src[.. self.index]
@@ -151,6 +156,11 @@ impl<'s> Scanner<'s> {
pub fn rest(&self) -> &'s str {
&self.src[self.index ..]
}
+
+ /// Go back to the where the index says.
+ fn reset(&mut self) {
+ self.iter = self.src[self.index ..].chars();
+ }
}
impl Debug for Scanner<'_> {
diff --git a/src/parse/tests.rs b/src/parse/tests.rs
index 9d6b673f..a1b1fb13 100644
--- a/src/parse/tests.rs
+++ b/src/parse/tests.rs
@@ -14,7 +14,7 @@ use crate::syntax::*;
use Decoration::*;
use SynNode::{
- Linebreak as L, Parbreak as P, Spacing as S, ToggleBolder as B, ToggleItalic as I,
+ Linebreak as L, Parbreak as P, Space as S, ToggleBolder as B, ToggleItalic as I,
};
fn T(text: &str) -> SynNode {
@@ -80,21 +80,21 @@ fn Str(string: &str) -> Expr {
macro_rules! Dict {
(@dict=$dict:expr,) => {};
- (@dict=$dict:expr, $key:expr => $value:expr $(, $($tts:tt)*)?) => {{
+ (@dict=$dict:expr, $key:expr => $expr:expr $(, $($tts:tt)*)?) => {{
let key = Into::<Spanned<&str>>::into($key);
let key = key.map(Into::<DictKey>::into);
- let value = Into::<Spanned<Expr>>::into($value);
- $dict.0.push(LitDictEntry { key: Some(key), value });
+ let expr = Into::<Spanned<Expr>>::into($expr);
+ $dict.0.push(LitDictEntry { key: Some(key), expr });
Dict![@dict=$dict, $($($tts)*)?];
}};
- (@dict=$dict:expr, $value:expr $(, $($tts:tt)*)?) => {
- let value = Into::<Spanned<Expr>>::into($value);
- $dict.0.push(LitDictEntry { key: None, value });
+ (@dict=$dict:expr, $expr:expr $(, $($tts:tt)*)?) => {
+ let expr = Into::<Spanned<Expr>>::into($expr);
+ $dict.0.push(LitDictEntry { key: None, expr });
Dict![@dict=$dict, $($($tts)*)?];
};
(@$($tts:tt)*) => {{
#[allow(unused_mut)]
- let mut dict = LitDict::default();
+ let mut dict = LitDict::new();
Dict![@dict=dict, $($tts)*];
dict
}};
@@ -344,7 +344,6 @@ fn test_parse_function_names() {
fn test_parse_chaining() {
// Things the parser has to make sense of
t!("[hi: (5.0, 2.1 >> you]" => F!("hi"; Dict![Float(5.0), Float(2.1)], Tree![F!("you")]));
- t!("[box >>][Hi]" => F!("box"; Tree![T("Hi")]));
t!("[box >> pad: 1pt][Hi]" => F!("box"; Tree![
F!("pad"; Len(Length::pt(1.0)), Tree!(T("Hi")))
]));
@@ -354,7 +353,8 @@ fn test_parse_chaining() {
// Errors for unclosed / empty predecessor groups
e!("[hi: (5.0, 2.1 >> you]" => s(15, 15, "expected closing paren"));
- e!("[>> abc]" => s(1, 1, "expected function name"));
+ e!("[>> abc]" => s(1, 1, "expected function name"));
+ e!("[box >>][Hi]" => s(7, 7, "expected function name"));
}
#[test]
@@ -482,7 +482,7 @@ fn test_parse_expressions() {
// Invalid expressions.
v!("4pt--" => Len(Length::pt(4.0)));
- e!("[val: 4pt--]" => s(10, 11, "dangling minus"),
+ e!("[val: 4pt--]" => s(10, 11, "missing factor"),
s(6, 10, "missing right summand"));
v!("3mm+4pt*" => Binary(Add, Len(Length::mm(3.0)), Len(Length::pt(4.0))));
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 9f30f587..72d7b2d9 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -1,17 +1,19 @@
//! Tokenization.
+use std::fmt::{self, Debug, Formatter};
+
use super::{is_newline, Scanner};
use crate::length::Length;
-use crate::syntax::{is_ident, Pos, Span, SpanWith, Spanned, Token};
+use crate::syntax::token::*;
+use crate::syntax::{is_ident, Pos, Span, SpanWith, Spanned};
use TokenMode::*;
/// An iterator over the tokens of a string of source code.
-#[derive(Debug)]
+#[derive(Clone)]
pub struct Tokens<'s> {
s: Scanner<'s>,
mode: TokenMode,
- stack: Vec<TokenMode>,
}
/// Whether to tokenize in header mode which yields expression, comma and
@@ -26,23 +28,17 @@ pub enum TokenMode {
impl<'s> Tokens<'s> {
/// Create a new token iterator with the given mode.
pub fn new(src: &'s str, mode: TokenMode) -> Self {
- Self {
- s: Scanner::new(src),
- mode,
- stack: vec![],
- }
+ Self { s: Scanner::new(src), mode }
}
- /// Change the token mode and push the old one on a stack.
- pub fn push_mode(&mut self, mode: TokenMode) {
- self.stack.push(self.mode);
- self.mode = mode;
+ /// Get the current token mode.
+ pub fn mode(&self) -> TokenMode {
+ self.mode
}
- /// Pop the old token mode from the stack. This panics if there is no mode
- /// on the stack.
- pub fn pop_mode(&mut self) {
- self.mode = self.stack.pop().expect("no pushed mode");
+ /// Change the token mode.
+ pub fn set_mode(&mut self, mode: TokenMode) {
+ self.mode = mode;
}
/// The position in the string at which the last token ends and next token
@@ -50,6 +46,16 @@ impl<'s> Tokens<'s> {
pub fn pos(&self) -> Pos {
self.s.index().into()
}
+
+ /// Jump to a position in the source string.
+ pub fn jump(&mut self, pos: Pos) {
+ self.s.jump(pos.to_usize());
+ }
+
+ /// The underlying scanner.
+ pub fn scanner(&self) -> &Scanner<'s> {
+ &self.s
+ }
}
impl<'s> Iterator for Tokens<'s> {
@@ -59,8 +65,12 @@ impl<'s> Iterator for Tokens<'s> {
fn next(&mut self) -> Option<Self::Item> {
let start = self.s.index();
let token = match self.s.eat()? {
- // Whitespace.
- c if c.is_whitespace() => self.read_whitespace(c),
+ // Whitespace with fast path for just a single space.
+ ' ' if !self.s.check(|c| c.is_whitespace()) => Token::Space(0),
+ c if c.is_whitespace() => {
+ self.s.jump(start);
+ self.read_whitespace()
+ }
// Comments.
'/' if self.s.eat_if('/') => self.read_line_comment(),
@@ -76,8 +86,8 @@ impl<'s> Iterator for Tokens<'s> {
// Syntactic elements in body text.
'*' if self.mode == Body => Token::Star,
'_' if self.mode == Body => Token::Underscore,
- '`' if self.mode == Body => self.read_raw(),
'#' if self.mode == Body => Token::Hashtag,
+ '`' if self.mode == Body => self.read_raw(),
'~' if self.mode == Body => Token::Text("\u{00A0}"),
'\\' if self.mode == Body => self.read_escaped(),
@@ -88,12 +98,12 @@ impl<'s> Iterator for Tokens<'s> {
',' if self.mode == Header => Token::Comma,
'=' if self.mode == Header => Token::Equals,
'>' if self.mode == Header && self.s.eat_if('>') => Token::Chain,
-
- // Expressions in headers.
'+' if self.mode == Header => Token::Plus,
'-' if self.mode == Header => Token::Hyphen,
'*' if self.mode == Header => Token::Star,
'/' if self.mode == Header => Token::Slash,
+
+ // Expressions in headers.
'#' if self.mode == Header => self.read_hex(),
'"' if self.mode == Header => self.read_string(),
@@ -107,18 +117,7 @@ impl<'s> Iterator for Tokens<'s> {
}
impl<'s> Tokens<'s> {
- fn read_whitespace(&mut self, first: char) -> Token<'s> {
- // Shortcut for common case of exactly one space.
- if first == ' ' && !self.s.check(|c| c.is_whitespace()) {
- return Token::Space(0);
- }
-
- // Uneat the first char if it's a newline, so that it's counted in the
- // loop.
- if is_newline(first) {
- self.s.uneat();
- }
-
+ fn read_whitespace(&mut self) -> Token<'s> {
// Count the number of newlines.
let mut newlines = 0;
while let Some(c) = self.s.eat_merging_crlf() {
@@ -169,27 +168,6 @@ impl<'s> Tokens<'s> {
Token::BlockComment(self.s.get(start .. end))
}
- fn read_hex(&mut self) -> Token<'s> {
- // This parses more than the permissable 0-9, a-f, A-F character ranges
- // to provide nicer error messages later.
- Token::Hex(self.s.eat_while(|c| c.is_ascii_alphanumeric()))
- }
-
- fn read_string(&mut self) -> Token<'s> {
- let mut escaped = false;
- Token::Str {
- string: self.s.eat_until(|c| {
- if c == '"' && !escaped {
- true
- } else {
- escaped = c == '\\' && !escaped;
- false
- }
- }),
- terminated: self.s.eat_if('"'),
- }
- }
-
fn read_raw(&mut self) -> Token<'s> {
let mut backticks = 1;
while self.s.eat_if('`') {
@@ -210,11 +188,11 @@ impl<'s> Tokens<'s> {
let terminated = found == backticks;
let end = self.s.index() - if terminated { found } else { 0 };
- Token::Raw {
- raw: self.s.get(start .. end),
+ Token::Raw(TokenRaw {
+ text: self.s.get(start .. end),
backticks,
terminated,
- }
+ })
}
fn read_escaped(&mut self) -> Token<'s> {
@@ -228,10 +206,10 @@ impl<'s> Tokens<'s> {
'u' if self.s.peek_nth(1) == Some('{') => {
self.s.eat_assert('u');
self.s.eat_assert('{');
- Token::UnicodeEscape {
+ Token::UnicodeEscape(TokenUnicodeEscape {
sequence: self.s.eat_while(|c| c.is_ascii_hexdigit()),
terminated: self.s.eat_if('}'),
- }
+ })
}
c if c.is_whitespace() => Token::Backslash,
_ => Token::Text("\\"),
@@ -241,6 +219,27 @@ impl<'s> Tokens<'s> {
}
}
+ fn read_hex(&mut self) -> Token<'s> {
+ // This parses more than the permissable 0-9, a-f, A-F character ranges
+ // to provide nicer error messages later.
+ Token::Hex(self.s.eat_while(|c| c.is_ascii_alphanumeric()))
+ }
+
+ fn read_string(&mut self) -> Token<'s> {
+ let mut escaped = false;
+ Token::Str(TokenStr {
+ string: self.s.eat_until(|c| {
+ if c == '"' && !escaped {
+ true
+ } else {
+ escaped = c == '\\' && !escaped;
+ false
+ }
+ }),
+ terminated: self.s.eat_if('"'),
+ })
+ }
+
fn read_text_or_expr(&mut self, start: usize) -> Token<'s> {
let body = self.mode == Body;
let header = self.mode == Header;
@@ -268,6 +267,12 @@ impl<'s> Tokens<'s> {
}
}
+impl Debug for Tokens<'_> {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ write!(f, "Tokens({}|{})", self.s.eaten(), self.s.rest())
+ }
+}
+
fn parse_expr(text: &str) -> Token<'_> {
if let Ok(b) = text.parse::<bool>() {
Token::Bool(b)
@@ -303,13 +308,13 @@ mod tests {
};
fn Str(string: &str, terminated: bool) -> Token {
- Token::Str { string, terminated }
+ Token::Str(TokenStr { string, terminated })
}
- fn Raw(raw: &str, backticks: usize, terminated: bool) -> Token {
- Token::Raw { raw, backticks, terminated }
+ fn Raw(text: &str, backticks: usize, terminated: bool) -> Token {
+ Token::Raw(TokenRaw { text, backticks, terminated })
}
fn UE(sequence: &str, terminated: bool) -> Token {
- Token::UnicodeEscape { sequence, terminated }
+ Token::UnicodeEscape(TokenUnicodeEscape { sequence, terminated })
}
macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} }
@@ -389,36 +394,65 @@ mod tests {
}
#[test]
+ fn tokenize_escaped_symbols() {
+ t!(Body, r"\\" => T(r"\"));
+ t!(Body, r"\[" => T("["));
+ t!(Body, r"\]" => T("]"));
+ t!(Body, r"\*" => T("*"));
+ t!(Body, r"\_" => T("_"));
+ t!(Body, r"\`" => T("`"));
+ t!(Body, r"\/" => T("/"));
+ t!(Body, r"\u{2603}" => UE("2603", true));
+ t!(Body, r"\u{26A4" => UE("26A4", false));
+ t!(Body, r#"\""# => T("\""));
+ }
+
+ #[test]
+ fn tokenize_unescapable_symbols() {
+ t!(Body, r"\a" => T("\\"), T("a"));
+ t!(Body, r"\:" => T(r"\"), T(":"));
+ t!(Body, r"\=" => T(r"\"), T("="));
+ t!(Body, r"\u{2GA4" => UE("2", false), T("GA4"));
+ t!(Body, r"\u{ " => UE("", false), Space(0));
+ t!(Body, r"\u" => T("\\"), T("u"));
+ t!(Header, r"\\\\" => Invalid(r"\\\\"));
+ t!(Header, r"\a" => Invalid(r"\a"));
+ t!(Header, r"\:" => Invalid(r"\"), Colon);
+ t!(Header, r"\=" => Invalid(r"\"), Equals);
+ t!(Header, r"\," => Invalid(r"\"), Comma);
+ }
+
+ #[test]
fn tokenize_header_tokens() {
- t!(Header, "__main__" => Id("__main__"));
- t!(Header, "_func_box" => Id("_func_box"));
- t!(Header, ">main" => Invalid(">main"));
- t!(Header, "🌓, 🌍," => Invalid("🌓"), Comma, S(0), Invalid("🌍"), Comma);
- t!(Header, "{abc}" => LB, Id("abc"), RB);
- t!(Header, "(1,2)" => LP, Num(1.0), Comma, Num(2.0), RP);
- t!(Header, "12_pt, 12pt" => Invalid("12_pt"), Comma, S(0), Len(Length::pt(12.0)));
- t!(Header, "f: arg >> g" => Id("f"), Colon, S(0), Id("arg"), S(0), Chain, S(0), Id("g"));
- t!(Header, "=3.14" => Equals, Num(3.14));
- t!(Header, "arg, _b, _1" => Id("arg"), Comma, S(0), Id("_b"), Comma, S(0), Id("_1"));
- t!(Header, "a:b" => Id("a"), Colon, Id("b"));
- t!(Header, "(){}:=," => LP, RP, LB, RB, Colon, Equals, Comma);
- t!(Body, "c=d, " => T("c=d,"), S(0));
- t!(Body, "a: b" => T("a:"), S(0), T("b"));
- t!(Header, "a: true, x=1" => Id("a"), Colon, S(0), Bool(true), Comma, S(0),
- Id("x"), Equals, Num(1.0));
+ t!(Header, "__main__" => Id("__main__"));
+ t!(Header, "_func_box" => Id("_func_box"));
+ t!(Header, ">main" => Invalid(">main"));
+ t!(Header, "🌓, 🌍," => Invalid("🌓"), Comma, S(0), Invalid("🌍"), Comma);
+ t!(Header, "{abc}" => LB, Id("abc"), RB);
+ t!(Header, "(1,2)" => LP, Num(1.0), Comma, Num(2.0), RP);
+ t!(Header, "12_pt, 12pt" => Invalid("12_pt"), Comma, S(0), Len(Length::pt(12.0)));
+ t!(Header, "f: arg >> g" => Id("f"), Colon, S(0), Id("arg"), S(0), Chain, S(0), Id("g"));
+ t!(Header, "=3.14" => Equals, Num(3.14));
+ t!(Header, "arg, _b, _1" => Id("arg"), Comma, S(0), Id("_b"), Comma, S(0), Id("_1"));
+ t!(Header, "a:b" => Id("a"), Colon, Id("b"));
+ t!(Header, "(){}:=," => LP, RP, LB, RB, Colon, Equals, Comma);
+ t!(Body, "c=d, " => T("c=d,"), S(0));
+ t!(Body, "a: b" => T("a:"), S(0), T("b"));
+ t!(Header, "a: true, x=1" => Id("a"), Colon, S(0), Bool(true), Comma, S(0),
+ Id("x"), Equals, Num(1.0));
}
#[test]
fn tokenize_numeric_values() {
- t!(Header, "12.3e5" => Num(12.3e5));
- t!(Header, "120%" => Num(1.2));
- t!(Header, "12e4%" => Num(1200.0));
- t!(Header, "1e5in" => Len(Length::inches(100000.0)));
- t!(Header, "2.3cm" => Len(Length::cm(2.3)));
- t!(Header, "02.4mm" => Len(Length::mm(2.4)));
- t!(Header, "2.4.cm" => Invalid("2.4.cm"));
- t!(Header, "#6ae6dd" => Hex("6ae6dd"));
- t!(Header, "#8A083c" => Hex("8A083c"));
+ t!(Header, "12.3e5" => Num(12.3e5));
+ t!(Header, "120%" => Num(1.2));
+ t!(Header, "12e4%" => Num(1200.0));
+ t!(Header, "1e5in" => Len(Length::inches(100000.0)));
+ t!(Header, "2.3cm" => Len(Length::cm(2.3)));
+ t!(Header, "02.4mm" => Len(Length::mm(2.4)));
+ t!(Header, "2.4.cm" => Invalid("2.4.cm"));
+ t!(Header, "#6ae6dd" => Hex("6ae6dd"));
+ t!(Header, "#8A083c" => Hex("8A083c"));
}
#[test]
@@ -447,35 +481,6 @@ mod tests {
}
#[test]
- fn tokenize_escaped_symbols() {
- t!(Body, r"\\" => T(r"\"));
- t!(Body, r"\[" => T("["));
- t!(Body, r"\]" => T("]"));
- t!(Body, r"\*" => T("*"));
- t!(Body, r"\_" => T("_"));
- t!(Body, r"\`" => T("`"));
- t!(Body, r"\/" => T("/"));
- t!(Body, r"\u{2603}" => UE("2603", true));
- t!(Body, r"\u{26A4" => UE("26A4", false));
- t!(Body, r#"\""# => T("\""));
- }
-
- #[test]
- fn tokenize_unescapable_symbols() {
- t!(Body, r"\a" => T("\\"), T("a"));
- t!(Body, r"\:" => T(r"\"), T(":"));
- t!(Body, r"\=" => T(r"\"), T("="));
- t!(Body, r"\u{2GA4" => UE("2", false), T("GA4"));
- t!(Body, r"\u{ " => UE("", false), Space(0));
- t!(Body, r"\u" => T("\\"), T("u"));
- t!(Header, r"\\\\" => Invalid(r"\\\\"));
- t!(Header, r"\a" => Invalid(r"\a"));
- t!(Header, r"\:" => Invalid(r"\"), Colon);
- t!(Header, r"\=" => Invalid(r"\"), Equals);
- t!(Header, r"\," => Invalid(r"\"), Comma);
- }
-
- #[test]
fn tokenize_with_spans() {
ts!(Body, "hello" => s(0, 5, T("hello")));
ts!(Body, "ab\r\nc" => s(0, 2, T("ab")), s(2, 4, S(1)), s(4, 5, T("c")));
diff --git a/src/syntax/expr.rs b/src/syntax/ast/expr.rs
index 7f4d03d5..c07c6216 100644
--- a/src/syntax/expr.rs
+++ b/src/syntax/ast/expr.rs
@@ -1,9 +1,8 @@
//! Expressions.
-use super::span::{SpanWith, Spanned};
-use super::{Decoration, Ident, Lit, LitDict};
use crate::eval::Value;
use crate::layout::LayoutContext;
+use crate::syntax::{Decoration, Ident, Lit, LitDict, SpanWith, Spanned};
use crate::Feedback;
/// An expression.
@@ -50,7 +49,7 @@ impl ExprUnary {
}
/// A unary operator.
-#[derive(Debug, Clone, PartialEq)]
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum UnOp {
/// The negation operator: `-`.
Neg,
@@ -80,7 +79,7 @@ impl ExprBinary {
}
/// A binary operator.
-#[derive(Debug, Clone, PartialEq)]
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum BinOp {
/// The addition operator: `+`.
Add,
diff --git a/src/syntax/lit.rs b/src/syntax/ast/lit.rs
index 3cd94583..bbdd0c81 100644
--- a/src/syntax/lit.rs
+++ b/src/syntax/ast/lit.rs
@@ -1,10 +1,10 @@
//! Literals.
-use super::{Expr, Ident, SpanWith, Spanned, SynTree};
use crate::color::RgbaColor;
use crate::eval::{DictKey, DictValue, SpannedEntry, Value};
use crate::layout::LayoutContext;
use crate::length::Length;
+use crate::syntax::{Expr, Ident, SpanWith, Spanned, SynTree};
use crate::{DynFuture, Feedback};
/// A literal.
@@ -55,7 +55,7 @@ impl Lit {
}
/// A dictionary literal: `(false, 12cm, greeting = "hi")`.
-#[derive(Debug, Default, Clone, PartialEq)]
+#[derive(Debug, Clone, PartialEq)]
pub struct LitDict(pub Vec<LitDictEntry>);
impl LitDict {
@@ -74,8 +74,8 @@ impl LitDict {
let mut dict = DictValue::new();
for entry in &self.0 {
- let val = entry.value.v.eval(ctx, f).await;
- let spanned = val.span_with(entry.value.span);
+ let val = entry.expr.v.eval(ctx, f).await;
+ let spanned = val.span_with(entry.expr.span);
if let Some(key) = &entry.key {
dict.insert(&key.v, SpannedEntry::new(key.span, spanned));
} else {
@@ -94,5 +94,5 @@ pub struct LitDictEntry {
/// The key of the entry if there was one: `greeting`.
pub key: Option<Spanned<DictKey>>,
/// The value of the entry: `"hi"`.
- pub value: Spanned<Expr>,
+ pub expr: Spanned<Expr>,
}
diff --git a/src/syntax/ast/mod.rs b/src/syntax/ast/mod.rs
new file mode 100644
index 00000000..56ae4134
--- /dev/null
+++ b/src/syntax/ast/mod.rs
@@ -0,0 +1,9 @@
+//! Abstract syntax tree definition.
+
+mod expr;
+mod lit;
+mod tree;
+
+pub use expr::*;
+pub use lit::*;
+pub use tree::*;
diff --git a/src/syntax/tree.rs b/src/syntax/ast/tree.rs
index 80bca399..03aa3439 100644
--- a/src/syntax/tree.rs
+++ b/src/syntax/ast/tree.rs
@@ -1,7 +1,6 @@
//! The syntax tree.
-use super::span::{SpanVec, Spanned};
-use super::{Expr, Ident};
+use crate::syntax::{Expr, Ident, SpanVec, Spanned};
/// A collection of nodes which form a tree together with the nodes' children.
pub type SynTree = SpanVec<SynNode>;
@@ -11,7 +10,10 @@ pub type SynTree = SpanVec<SynNode>;
#[derive(Debug, Clone, PartialEq)]
pub enum SynNode {
/// Whitespace containing less than two newlines.
- Spacing,
+ Space,
+ /// Plain text.
+ Text(String),
+
/// A forced line break.
Linebreak,
/// A paragraph break.
@@ -20,16 +22,25 @@ pub enum SynNode {
ToggleItalic,
/// Bolder was enabled / disabled.
ToggleBolder,
- /// Plain text.
- Text(String),
- /// An optionally syntax-highlighted raw block.
- Raw(NodeRaw),
+
/// A section heading.
Heading(NodeHeading),
+ /// An optionally syntax-highlighted raw block.
+ Raw(NodeRaw),
+
/// An expression.
Expr(Expr),
}
+/// A section heading.
+#[derive(Debug, Clone, PartialEq)]
+pub struct NodeHeading {
+ /// The section depth (how many hashtags minus 1).
+ pub level: Spanned<u8>,
+ /// The contents of the heading.
+ pub contents: SynTree,
+}
+
/// A raw block, rendered in monospace with optional syntax highlighting.
///
/// Raw blocks start with an arbitrary number of backticks and end with the same
@@ -108,12 +119,3 @@ pub struct NodeRaw {
/// are inline-level when they contain no newlines.
pub inline: bool,
}
-
-/// A section heading.
-#[derive(Debug, Clone, PartialEq)]
-pub struct NodeHeading {
- /// The section depth (how many hashtags minus 1).
- pub level: Spanned<u8>,
- /// The contents of the heading.
- pub contents: SynTree,
-}
diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs
index f4472df5..98e1b4d7 100644
--- a/src/syntax/mod.rs
+++ b/src/syntax/mod.rs
@@ -1,19 +1,10 @@
//! Syntax types.
-mod expr;
+pub mod ast;
+pub mod token;
+
mod ident;
-mod lit;
mod span;
-mod token;
-mod tree;
-
-/// Abstract syntax tree definition.
-pub mod ast {
- use super::*;
- pub use expr::*;
- pub use lit::*;
- pub use tree::*;
-}
pub use ast::*;
pub use ident::*;
diff --git a/src/syntax/span.rs b/src/syntax/span.rs
index 62929706..179c46de 100644
--- a/src/syntax/span.rs
+++ b/src/syntax/span.rs
@@ -13,7 +13,7 @@ thread_local! {
/// Annotate a value with a span.
pub trait SpanWith: Sized {
/// Wraps `self` in a `Spanned` with the given span.
- fn span_with(self, span: Span) -> Spanned<Self> {
+ fn span_with(self, span: impl Into<Span>) -> Spanned<Self> {
Spanned::new(self, span)
}
}
@@ -50,8 +50,8 @@ pub struct Spanned<T> {
impl<T> Spanned<T> {
/// Create a new instance from a value and its span.
- pub fn new(v: T, span: Span) -> Self {
- Self { v, span }
+ pub fn new(v: T, span: impl Into<Span>) -> Self {
+ Self { v, span: span.into() }
}
/// Create a new instance from a value with the zero span.
@@ -123,16 +123,16 @@ impl Span {
}
/// Create a new span with the earlier start and later end position.
- pub fn merge(a: Self, b: Self) -> Self {
+ pub fn join(self, other: Self) -> Self {
Self {
- start: a.start.min(b.start),
- end: a.end.max(b.end),
+ start: self.start.min(other.start),
+ end: self.end.max(other.end),
}
}
/// Expand a span by merging it with another span.
pub fn expand(&mut self, other: Self) {
- *self = Self::merge(*self, other)
+ *self = self.join(other)
}
/// When set to `false` comparisons with `PartialEq` ignore spans.
@@ -164,6 +164,24 @@ impl PartialEq for Span {
}
}
+impl<T> From<T> for Span
+where
+ T: Into<Pos> + Copy,
+{
+ fn from(pos: T) -> Self {
+ Self::at(pos)
+ }
+}
+
+impl<T> From<(T, T)> for Span
+where
+ T: Into<Pos>,
+{
+ fn from((start, end): (T, T)) -> Self {
+ Self::new(start, end)
+ }
+}
+
impl Debug for Span {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "<{:?}-{:?}>", self.start, self.end)
@@ -185,6 +203,12 @@ impl Pos {
}
}
+impl Offset for Pos {
+ fn offset(self, by: Self) -> Self {
+ Pos(self.0 + by.0)
+ }
+}
+
impl From<u32> for Pos {
fn from(index: u32) -> Self {
Self(index)
@@ -197,12 +221,6 @@ impl From<usize> for Pos {
}
}
-impl Offset for Pos {
- fn offset(self, by: Self) -> Self {
- Pos(self.0 + by.0)
- }
-}
-
impl Debug for Pos {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
Debug::fmt(&self.0, f)
diff --git a/src/syntax/token.rs b/src/syntax/token.rs
index 4cb8501f..5c159bbd 100644
--- a/src/syntax/token.rs
+++ b/src/syntax/token.rs
@@ -1,4 +1,4 @@
-//! Tokenization.
+//! Token definition.
use crate::length::Length;
@@ -8,6 +8,8 @@ pub enum Token<'s> {
/// One or more whitespace characters. The contained `usize` denotes the
/// number of newlines that were contained in the whitespace.
Space(usize),
+ /// A consecutive non-markup string.
+ Text(&'s str),
/// A line comment with inner string contents `//<str>\n`.
LineComment(&'s str),
@@ -15,6 +17,20 @@ pub enum Token<'s> {
/// can contain nested block comments.
BlockComment(&'s str),
+ /// A star. It can appear in a function header where it signifies the
+ /// multiplication of expressions or the body where it modifies the styling.
+ Star,
+ /// An underscore in body-text.
+ Underscore,
+ /// A backslash followed by whitespace in text.
+ Backslash,
+ /// A hashtag indicating a section heading.
+ Hashtag,
+ /// A raw block.
+ Raw(TokenRaw<'s>),
+ /// A unicode escape sequence.
+ UnicodeEscape(TokenUnicodeEscape<'s>),
+
/// A left bracket starting a function invocation or body: `[`.
LeftBracket,
/// A right bracket ending a function invocation or body: `]`.
@@ -28,29 +44,24 @@ pub enum Token<'s> {
/// A right parenthesis in a function header: `)`.
RightParen,
- /// A double forward chevron in a function header: `>>`.
- Chain,
-
/// A colon in a function header: `:`.
Colon,
/// A comma in a function header: `,`.
Comma,
/// An equals sign in a function header: `=`.
Equals,
+ /// A double forward chevron in a function header: `>>`.
+ Chain,
+ /// A plus in a function header, signifying the addition of expressions.
+ Plus,
+ /// A hyphen in a function header, signifying the subtraction of
+ /// expressions.
+ Hyphen,
+ /// A slash in a function header, signifying the division of expressions.
+ Slash,
/// An identifier in a function header: `center`.
Ident(&'s str),
- /// A quoted string in a function header: `"..."`.
- Str {
- /// The string inside the quotes.
- ///
- /// _Note_: If the string contains escape sequences these are not yet
- /// applied to be able to just store a string slice here instead of
- /// a String. The escaping is done later in the parser.
- string: &'s str,
- /// Whether the closing quote was present.
- terminated: bool,
- },
/// A boolean in a function header: `true | false`.
Bool(bool),
/// A number in a function header: `3.14`.
@@ -59,48 +70,44 @@ pub enum Token<'s> {
Length(Length),
/// A hex value in a function header: `#20d82a`.
Hex(&'s str),
- /// A plus in a function header, signifying the addition of expressions.
- Plus,
- /// A hyphen in a function header, signifying the subtraction of
- /// expressions.
- Hyphen,
- /// A slash in a function header, signifying the division of expressions.
- Slash,
+ /// A quoted string in a function header: `"..."`.
+ Str(TokenStr<'s>),
- /// A star. It can appear in a function header where it signifies the
- /// multiplication of expressions or the body where it modifies the styling.
- Star,
- /// An underscore in body-text.
- Underscore,
- /// A backslash followed by whitespace in text.
- Backslash,
+ /// Things that are not valid in the context they appeared in.
+ Invalid(&'s str),
+}
- /// A hashtag token in the body can indicate compute mode or headings.
- Hashtag,
+/// A quoted string in a function header: `"..."`.
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct TokenStr<'s> {
+ /// The string inside the quotes.
+ ///
+ /// _Note_: If the string contains escape sequences these are not yet
+ /// applied to be able to just store a string slice here instead of
+ /// a `String`. The resolving is done later in the parser.
+ pub string: &'s str,
+ /// Whether the closing quote was present.
+ pub terminated: bool,
+}
- /// A unicode escape sequence.
- UnicodeEscape {
- /// The escape sequence between two braces.
- sequence: &'s str,
- /// Whether the closing brace was present.
- terminated: bool,
- },
-
- /// Raw block.
- Raw {
- /// The raw text between the backticks.
- raw: &'s str,
- /// The number of opening backticks.
- backticks: usize,
- /// Whether all closing backticks were present.
- terminated: bool,
- },
-
- /// Any other consecutive string.
- Text(&'s str),
+/// A unicode escape sequence.
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct TokenUnicodeEscape<'s> {
+ /// The escape sequence between two braces.
+ pub sequence: &'s str,
+ /// Whether the closing brace was present.
+ pub terminated: bool,
+}
- /// Things that are not valid in the context they appeared in.
- Invalid(&'s str),
+/// A raw block.
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct TokenRaw<'s> {
+ /// The raw text between the backticks.
+ pub text: &'s str,
+ /// The number of opening backticks.
+ pub backticks: usize,
+ /// Whether all closing backticks were present.
+ pub terminated: bool,
}
impl<'s> Token<'s> {
@@ -108,34 +115,40 @@ impl<'s> Token<'s> {
pub fn name(self) -> &'static str {
match self {
Self::Space(_) => "space",
+ Self::Text(_) => "text",
+
Self::LineComment(_) => "line comment",
Self::BlockComment(_) => "block comment",
+
+ Self::Star => "star",
+ Self::Underscore => "underscore",
+ Self::Backslash => "backslash",
+ Self::Hashtag => "hashtag",
+ Self::Raw { .. } => "raw block",
+ Self::UnicodeEscape { .. } => "unicode escape sequence",
+
Self::LeftBracket => "opening bracket",
Self::RightBracket => "closing bracket",
- Self::LeftParen => "opening paren",
- Self::RightParen => "closing paren",
Self::LeftBrace => "opening brace",
Self::RightBrace => "closing brace",
- Self::Chain => "function chain operator",
+ Self::LeftParen => "opening paren",
+ Self::RightParen => "closing paren",
+
Self::Colon => "colon",
Self::Comma => "comma",
Self::Equals => "equals sign",
+ Self::Chain => "function chaining operator",
+ Self::Plus => "plus sign",
+ Self::Hyphen => "minus sign",
+ Self::Slash => "slash",
+
Self::Ident(_) => "identifier",
- Self::Str { .. } => "string",
Self::Bool(_) => "bool",
Self::Number(_) => "number",
Self::Length(_) => "length",
Self::Hex(_) => "hex value",
- Self::Plus => "plus",
- Self::Hyphen => "minus",
- Self::Slash => "slash",
- Self::Star => "star",
- Self::Underscore => "underscore",
- Self::Backslash => "backslash",
- Self::Hashtag => "hashtag",
- Self::UnicodeEscape { .. } => "unicode escape sequence",
- Self::Raw { .. } => "raw block",
- Self::Text(_) => "text",
+ Self::Str { .. } => "string",
+
Self::Invalid("*/") => "end of block comment",
Self::Invalid(_) => "invalid token",
}