summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2019-05-01 17:15:59 +0200
committerLaurenz <laurmaedje@gmail.com>2019-05-01 17:19:27 +0200
commit27947e212cf217005059c4d31fa4301e92cfd3cc (patch)
tree2580c0626918a145da88fa65cb6dcfa46087536a
parent632bf2f2efae9e5820a256bef9e30a5cdf67a148 (diff)
Require scope for parser ♻
-rw-r--r--src/func.rs4
-rw-r--r--src/lib.rs8
-rw-r--r--src/parsing.rs242
3 files changed, 112 insertions, 142 deletions
diff --git a/src/func.rs b/src/func.rs
index e9212278..7558a124 100644
--- a/src/func.rs
+++ b/src/func.rs
@@ -5,7 +5,7 @@ use std::collections::HashMap;
use std::fmt::{self, Debug, Formatter};
use crate::syntax::{FuncHeader, Expression};
-use crate::parsing::{ParseTokens, ParseResult};
+use crate::parsing::{BodyTokens, ParseResult};
/// A function which transforms a parsing context into a boxed function.
@@ -66,7 +66,7 @@ pub struct ParseContext<'s, 't> {
/// The header of the function to be parsed.
pub header: &'s FuncHeader,
/// Tokens if the function has a body, otherwise nothing.
- pub tokens: Option<&'s mut ParseTokens<'t>>,
+ pub tokens: Option<&'s mut BodyTokens<'t>>,
/// The current scope containing function definitions.
pub scope: &'s Scope,
}
diff --git a/src/lib.rs b/src/lib.rs
index 0a17fca2..7fd10f5f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -47,8 +47,9 @@ use std::fmt::{self, Debug, Formatter};
use crate::doc::Document;
use crate::engine::{Engine, Style, TypesetError};
+use crate::func::Scope;
use crate::font::FontProvider;
-use crate::parsing::{Parser, ParseTokens, ParseResult, ParseError};
+use crate::parsing::{Parser, BodyTokens, ParseResult, ParseError};
use crate::syntax::SyntaxTree;
#[macro_use]
@@ -97,8 +98,9 @@ impl<'p> Compiler<'p> {
/// Parse source code into a syntax tree.
#[inline]
pub fn parse(&self, src: &str) -> ParseResult<SyntaxTree> {
- let mut tokens = ParseTokens::new(src);
- Parser::new(&mut tokens).parse()
+ let scope = Scope::new();
+ let mut tokens = BodyTokens::new(src);
+ Parser::new(&mut tokens, &scope).parse()
}
/// Compile a portable typesetted document from source code.
diff --git a/src/parsing.rs b/src/parsing.rs
index d79a942d..8a3f075a 100644
--- a/src/parsing.rs
+++ b/src/parsing.rs
@@ -3,7 +3,6 @@
use std::collections::HashMap;
use std::iter::Peekable;
use std::mem::swap;
-use std::ops::Deref;
use std::str::CharIndices;
use unicode_xid::UnicodeXID;
@@ -90,8 +89,7 @@ impl<'s> Iterator for Tokens<'s> {
}
}
- // Now all special cases are handled and we can finally look at the
- // next words.
+ // Take the next char and peek at the one behind.
let (next_pos, next) = self.chars.next()?;
let afterwards = self.chars.peek().map(|p| p.1);
@@ -250,8 +248,8 @@ impl Iterator for PeekableChars<'_> {
/// Transforms token streams to syntax trees.
pub struct Parser<'s, 't> {
- tokens: &'s mut ParseTokens<'t>,
- scope: ParserScope<'s>,
+ tokens: &'s mut BodyTokens<'t>,
+ scope: &'s Scope,
state: ParserState,
tree: SyntaxTree,
}
@@ -268,20 +266,9 @@ enum ParserState {
}
impl<'s, 't> Parser<'s, 't> {
- /// Create a new parser from a stream of tokens.
+ /// Create a new parser from a stream of tokens and a scope of functions.
#[inline]
- pub fn new(tokens: &'s mut ParseTokens<'t>) -> Parser<'s, 't> {
- Parser::new_internal(tokens, ParserScope::Owned(Scope::new()))
- }
-
- /// Create a new parser with a scope containing function definitions.
- #[inline]
- pub fn with_scope(tokens: &'s mut ParseTokens<'t>, scope: &'s Scope) -> Parser<'s, 't> {
- Parser::new_internal(tokens, ParserScope::Shared(scope))
- }
-
- /// Internal helper for construction.
- fn new_internal(tokens: &'s mut ParseTokens<'t>, scope: ParserScope<'s>) -> Parser<'s, 't> {
+ pub fn new(tokens: &'s mut BodyTokens<'t>, scope: &'s Scope) -> Parser<'s, 't> {
Parser {
tokens,
scope,
@@ -323,7 +310,10 @@ impl<'s, 't> Parser<'s, 't> {
PS::Body => match token {
// Whitespace
Token::Space => self.append_space_consumed(),
- Token::Newline => self.switch_consumed(PS::FirstNewline),
+ Token::Newline => {
+ self.advance();
+ self.switch(PS::FirstNewline);
+ },
// Text
Token::Text(word) => self.append_consumed(Node::Text(word.to_owned())),
@@ -356,7 +346,7 @@ impl<'s, 't> Parser<'s, 't> {
// The next token should be the name of the function.
let name = match self.tokens.next() {
Some(Token::Text(word)) => {
- if word.is_identifier() {
+ if is_identifier(word) {
Ok(word.to_owned())
} else {
Err(ParseError::new("invalid identifier"))
@@ -427,6 +417,11 @@ impl<'s, 't> Parser<'s, 't> {
self.tokens.next();
}
+ /// Switch the state.
+ fn switch(&mut self, state: ParserState) {
+ self.state = state;
+ }
+
/// Append a node to the tree.
fn append(&mut self, node: Node) {
self.tree.nodes.push(node);
@@ -439,11 +434,6 @@ impl<'s, 't> Parser<'s, 't> {
}
}
- /// Switch the state.
- fn switch(&mut self, state: ParserState) {
- self.state = state;
- }
-
/// Advance and return the given node.
fn append_consumed(&mut self, node: Node) {
self.advance();
@@ -456,12 +446,6 @@ impl<'s, 't> Parser<'s, 't> {
self.append_space();
}
- /// Advance and switch the state.
- fn switch_consumed(&mut self, state: ParserState) {
- self.advance();
- self.switch(state);
- }
-
/// Skip tokens until the condition is met.
fn skip_while<F>(&mut self, f: F) where F: Fn(&Token) -> bool {
while let Some(token) = self.tokens.peek() {
@@ -473,46 +457,47 @@ impl<'s, 't> Parser<'s, 't> {
}
}
-/// An owned or shared scope.
-#[derive(Debug)]
-enum ParserScope<'s> {
- Owned(Scope),
- Shared(&'s Scope)
-}
+/// Whether this word is a valid unicode identifier.
+fn is_identifier(string: &str) -> bool {
+ let mut chars = string.chars();
-impl Deref for ParserScope<'_> {
- type Target = Scope;
+ match chars.next() {
+ Some(c) if !UnicodeXID::is_xid_start(c) => return false,
+ None => return false,
+ _ => (),
+ }
- fn deref(&self) -> &Scope {
- match self {
- ParserScope::Owned(scope) => &scope,
- ParserScope::Shared(scope) => scope,
+ while let Some(c) = chars.next() {
+ if !UnicodeXID::is_xid_continue(c) {
+ return false;
}
}
+
+ true
}
/// A token iterator that iterates over exactly one body.
///
/// This iterator wraps [`Tokens`] and yields exactly the tokens of one
-/// function body or the complete top-level body and stops there.
+/// function body or the complete top-level body and stops then.
#[derive(Debug, Clone)]
-pub struct ParseTokens<'s> {
+pub struct BodyTokens<'s> {
tokens: Peekable<Tokens<'s>>,
parens: Vec<u32>,
blocked: bool,
}
-impl<'s> ParseTokens<'s> {
+impl<'s> BodyTokens<'s> {
/// Create a new iterator over text.
#[inline]
- pub fn new(source: &'s str) -> ParseTokens<'s> {
- ParseTokens::from_tokens(Tokens::new(source))
+ pub fn new(source: &'s str) -> BodyTokens<'s> {
+ BodyTokens::from_tokens(Tokens::new(source))
}
/// Create a new iterator operating over an existing one.
#[inline]
- pub fn from_tokens(tokens: Tokens<'s>) -> ParseTokens<'s> {
- ParseTokens {
+ pub fn from_tokens(tokens: Tokens<'s>) -> BodyTokens<'s> {
+ BodyTokens {
tokens: tokens.peekable(),
parens: vec![],
blocked: false,
@@ -546,7 +531,7 @@ impl<'s> ParseTokens<'s> {
}
}
-impl<'s> Iterator for ParseTokens<'s> {
+impl<'s> Iterator for BodyTokens<'s> {
type Item = Token<'s>;
fn next(&mut self) -> Option<Token<'s>> {
@@ -577,44 +562,18 @@ impl<'s> Iterator for ParseTokens<'s> {
}
}
-/// More useful functions on `str`'s.
-trait StrExt {
- /// Whether this word is a valid unicode identifier.
- fn is_identifier(&self) -> bool;
-}
-
-impl StrExt for str {
- fn is_identifier(&self) -> bool {
- let mut chars = self.chars();
-
- match chars.next() {
- Some(c) if !UnicodeXID::is_xid_start(c) => return false,
- None => return false,
- _ => (),
- }
-
- while let Some(c) = chars.next() {
- if !UnicodeXID::is_xid_continue(c) {
- return false;
- }
- }
-
- true
- }
-}
-
/// The error type for parsing.
pub struct ParseError(String);
+/// The result type for parsing.
+pub type ParseResult<T> = Result<T, ParseError>;
+
impl ParseError {
fn new<S: Into<String>>(message: S) -> ParseError {
ParseError(message.into())
}
}
-/// The result type for parsing.
-pub type ParseResult<T> = Result<T, ParseError>;
-
error_type! {
err: ParseError,
show: f => f.write_str(&err.0),
@@ -734,76 +693,46 @@ mod token_tests {
#[cfg(test)]
mod parse_tests {
use super::*;
+ use funcs::*;
use crate::func::{Function, Scope};
use Node::{Space as S, Newline as N, Func as F};
- #[allow(non_snake_case)]
- fn T(s: &str) -> Node { Node::Text(s.to_owned()) }
+ /// Two test functions, one which parses it's body as another syntax tree
+ /// and another one which does not expect a body.
+ mod funcs {
+ use super::*;
- /// A testing function which just parses it's body into a syntax tree.
- #[derive(Debug, PartialEq)]
- struct TreeFn(SyntaxTree);
+ /// A testing function which just parses it's body into a syntax tree.
+ #[derive(Debug, PartialEq)]
+ pub struct TreeFn(pub SyntaxTree);
- impl Function for TreeFn {
- fn parse(context: ParseContext) -> ParseResult<Self> where Self: Sized {
- if let Some(tokens) = context.tokens {
- Parser::with_scope(tokens, context.scope).parse().map(|tree| TreeFn(tree))
- } else {
- Err(ParseError::new("expected body for tree fn"))
+ impl Function for TreeFn {
+ fn parse(context: ParseContext) -> ParseResult<Self> where Self: Sized {
+ if let Some(tokens) = context.tokens {
+ Parser::new(tokens, context.scope).parse().map(|tree| TreeFn(tree))
+ } else {
+ Err(ParseError::new("expected body for tree fn"))
+ }
}
+ fn typeset(&self, _header: &FuncHeader) -> Option<Expression> { None }
}
- fn typeset(&self, _header: &FuncHeader) -> Option<Expression> { None }
- }
- /// A testing function without a body.
- #[derive(Debug, PartialEq)]
- struct BodylessFn;
-
- impl Function for BodylessFn {
- fn parse(context: ParseContext) -> ParseResult<Self> where Self: Sized {
- if context.tokens.is_none() {
- Ok(BodylessFn)
- } else {
- Err(ParseError::new("unexpected body for bodyless fn"))
- }
- }
- fn typeset(&self, _header: &FuncHeader) -> Option<Expression> { None }
- }
+ /// A testing function without a body.
+ #[derive(Debug, PartialEq)]
+ pub struct BodylessFn;
- /// Shortcut macro to create a function.
- macro_rules! func {
- (name => $name:expr, body => None $(,)*) => {
- func!(@$name, Box::new(BodylessFn))
- };
- (name => $name:expr, body => $tree:expr $(,)*) => {
- func!(@$name, Box::new(TreeFn($tree)))
- };
- (@$name:expr, $body:expr) => {
- FuncCall {
- header: FuncHeader {
- name: $name.to_string(),
- args: vec![],
- kwargs: HashMap::new(),
- },
- body: $body,
+ impl Function for BodylessFn {
+ fn parse(context: ParseContext) -> ParseResult<Self> where Self: Sized {
+ if context.tokens.is_none() {
+ Ok(BodylessFn)
+ } else {
+ Err(ParseError::new("unexpected body for bodyless fn"))
+ }
}
+ fn typeset(&self, _header: &FuncHeader) -> Option<Expression> { None }
}
}
- /// Shortcut macro to create a syntax tree.
- /// Is `vec`-like and the elements are the nodes.
- macro_rules! tree {
- ($($x:expr),*) => (
- SyntaxTree { nodes: vec![$($x),*] }
- );
- ($($x:expr,)*) => (tree![$($x),*])
- }
-
- fn parse(src: &str, scope: &Scope) -> ParseResult<SyntaxTree> {
- let mut tokens = ParseTokens::new(src);
- Parser::with_scope(&mut tokens, scope).parse()
- }
-
/// Test if the source code parses into the syntax tree.
fn test(src: &str, tree: SyntaxTree) {
assert_eq!(parse(src, &Scope::new()).unwrap(), tree);
@@ -824,6 +753,45 @@ mod parse_tests {
assert_eq!(parse(src, &scope).unwrap_err().to_string(), err);
}
+ /// Parse the source code with the given scope.
+ fn parse(src: &str, scope: &Scope) -> ParseResult<SyntaxTree> {
+ let mut tokens = BodyTokens::new(src);
+ Parser::new(&mut tokens, scope).parse()
+ }
+
+ /// Create a text node.
+ #[allow(non_snake_case)]
+ fn T(s: &str) -> Node { Node::Text(s.to_owned()) }
+
+ /// Shortcut macro to create a syntax tree.
+ /// Is `vec`-like and the elements are the nodes.
+ macro_rules! tree {
+ ($($x:expr),*) => (
+ SyntaxTree { nodes: vec![$($x),*] }
+ );
+ ($($x:expr,)*) => (tree![$($x),*])
+ }
+
+ /// Shortcut macro to create a function.
+ macro_rules! func {
+ (name => $name:expr, body => None $(,)*) => {
+ func!(@$name, Box::new(BodylessFn))
+ };
+ (name => $name:expr, body => $tree:expr $(,)*) => {
+ func!(@$name, Box::new(TreeFn($tree)))
+ };
+ (@$name:expr, $body:expr) => {
+ FuncCall {
+ header: FuncHeader {
+ name: $name.to_string(),
+ args: vec![],
+ kwargs: HashMap::new(),
+ },
+ body: $body,
+ }
+ }
+ }
+
/// Parse the basic cases.
#[test]
fn parse_base() {