diff options
| author | Laurenz <laurmaedje@gmail.com> | 2019-04-29 00:12:36 +0200 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2019-04-29 00:12:36 +0200 |
| commit | f279c52b503e9dacb558d8a46a75b42bc5222ee4 (patch) | |
| tree | b70dd229877d18b5db45e88966d6114993281dad /src/parsing.rs | |
| parent | a51e7a0758758d92ce3655896db6c56d5c1013cf (diff) | |
Simple dynamic, scoped function parsing π₯
Diffstat (limited to 'src/parsing.rs')
| -rw-r--r-- | src/parsing.rs | 364 |
1 files changed, 271 insertions, 93 deletions
diff --git a/src/parsing.rs b/src/parsing.rs index 9d921fe1..7c4d6041 100644 --- a/src/parsing.rs +++ b/src/parsing.rs @@ -4,8 +4,10 @@ use std::collections::HashMap; use std::fmt; use std::iter::Peekable; use std::mem::swap; +use std::ops::Deref; use unicode_segmentation::{UnicodeSegmentation, UWordBounds}; use crate::syntax::*; +use crate::func::{Scope, BodyTokens}; use crate::utility::{Splinor, Spline, Splined, StrExt}; @@ -205,12 +207,12 @@ impl<'s> Tokens<'s> { } /// Transforms token streams to syntax trees. -#[derive(Debug)] pub struct Parser<'s, T> where T: Iterator<Item=Token<'s>> { tokens: Peekable<T>, + scope: ParserScope<'s>, state: ParserState, stack: Vec<FuncInvocation>, - tree: SyntaxTree<'s>, + tree: SyntaxTree, } /// The state the parser is in. @@ -226,11 +228,39 @@ enum ParserState { Function, } +/// An owned or shared scope. +enum ParserScope<'s> { + Owned(Scope), + Shared(&'s Scope) +} + +impl Deref for ParserScope<'_> { + type Target = Scope; + + fn deref(&self) -> &Scope { + match self { + ParserScope::Owned(scope) => &scope, + ParserScope::Shared(scope) => scope, + } + } +} + impl<'s, T> Parser<'s, T> where T: Iterator<Item=Token<'s>> { /// Create a new parser from a type that emits results of tokens. - pub(crate) fn new(tokens: T) -> Parser<'s, T> { + pub fn new(tokens: T) -> Parser<'s, T> { + Parser::new_internal(ParserScope::Owned(Scope::new()), tokens) + } + + /// Create a new parser with a scope containing function definitions. + pub fn with_scope(scope: &'s Scope, tokens: T) -> Parser<'s, T> { + Parser::new_internal(ParserScope::Shared(scope), tokens) + } + + /// Internal helper for construction. + fn new_internal(scope: ParserScope<'s>, tokens: T) -> Parser<'s, T> { Parser { tokens: tokens.peekable(), + scope, state: ParserState::Body, stack: vec![], tree: SyntaxTree::new(), @@ -238,7 +268,7 @@ impl<'s, T> Parser<'s, T> where T: Iterator<Item=Token<'s>> { } /// Parse into an abstract syntax tree. - pub(crate) fn parse(mut self) -> ParseResult<SyntaxTree<'s>> { + pub(crate) fn parse(mut self) -> ParseResult<SyntaxTree> { use ParserState as PS; while let Some(token) = self.tokens.peek() { @@ -275,7 +305,7 @@ impl<'s, T> Parser<'s, T> where T: Iterator<Item=Token<'s>> { Token::Newline => self.switch_consumed(PS::FirstNewline), // Words - Token::Word(word) => self.append_consumed(Node::Word(word)), + Token::Word(word) => self.append_consumed(Node::Word(word.to_owned())), // Functions Token::LeftBracket => self.switch_consumed(PS::Function), @@ -283,7 +313,7 @@ impl<'s, T> Parser<'s, T> where T: Iterator<Item=Token<'s>> { self.advance(); match self.stack.pop() { Some(func) => self.append(Node::Func(func)), - None => return self.err("unexpected closing bracket"), + None => return Err(ParseError::new("unexpected closing bracket")), } }, @@ -300,46 +330,69 @@ impl<'s, T> Parser<'s, T> where T: Iterator<Item=Token<'s>> { let name = if let Token::Word(word) = token { match Ident::new(word) { Some(ident) => ident, - None => return self.err("invalid identifier"), + None => return Err(ParseError::new("invalid identifier")), } } else { - return self.err("expected identifier"); + return Err(ParseError::new("expected identifier")); }; - self.advance(); + + // Expect the header closing bracket. if self.tokens.next() != Some(Token::RightBracket) { - return self.err("expected closing bracket"); + return Err(ParseError::new("expected closing bracket")); } + // Store the header information of the function invocation. let header = FuncHeader { - name, + name: name.clone(), args: vec![], kwargs: HashMap::new(), }; - let func = FuncInvocation { - header, - body: unimplemented!(), - }; - // This function has a body. - if let Some(Token::LeftBracket) = self.tokens.peek() { + let mut tokens = if let Some(Token::LeftBracket) = self.tokens.peek() { self.advance(); - func.body = Some(SyntaxTree::new()); - self.stack.push(func); + Some(FuncTokens::new(&mut self.tokens)) + } else { + None + }; + + // A mutably borrowed view over the tokens. + let borrow_tokens: BodyTokens<'_> = tokens.as_mut().map(|toks| { + Box::new(toks) as Box<dyn Iterator<Item=Token<'_>>> + }); + + // Run the parser over the tokens. + let body = if let Some(parser) = self.scope.get_parser(&name) { + parser(&header, borrow_tokens, &self.scope)? } else { - self.append(Node::Func(func)); + let message = format!("unknown function: '{}'", &name); + return Err(ParseError::new(message)); + }; + + // Expect the closing bracket if it had a body. + if let Some(tokens) = tokens { + println!("lulz"); + if tokens.unexpected_end { + return Err(ParseError::new("expected closing bracket")); + } } + // Finally this function is parsed to the end. + self.append(Node::Func(FuncInvocation { + header, + body, + })); + self.switch(PS::Body); }, } } - if !self.stack.is_empty() { - return self.err("expected closing bracket"); - } + // if !self.stack.is_empty() { + // return Err(ParseError::new("expected closing bracket")); + // } Ok(self.tree) } @@ -350,15 +403,16 @@ impl<'s, T> Parser<'s, T> where T: Iterator<Item=Token<'s>> { } /// Append a node to the top-of-stack function or the main tree itself. - fn append(&mut self, node: Node<'s>) { - match self.stack.last_mut() { - Some(func) => func.body.as_mut().unwrap(), - None => &mut self.tree, - }.nodes.push(node); + fn append(&mut self, node: Node) { + self.tree.nodes.push(node); + // match self.stack.last_mut() { + // Some(func) => func.body.as_mut().unwrap(), + // None => &mut self.tree, + // }.nodes.push(node); } /// Advance and return the given node. - fn append_consumed(&mut self, node: Node<'s>) { self.advance(); self.append(node); } + fn append_consumed(&mut self, node: Node) { self.advance(); self.append(node); } /// Append a space if there is not one already. fn append_space(&mut self) { @@ -379,11 +433,12 @@ impl<'s, T> Parser<'s, T> where T: Iterator<Item=Token<'s>> { fn switch_consumed(&mut self, state: ParserState) { self.advance(); self.state = state; } /// The last appended node of the top-of-stack function or of the main tree. - fn last(&self) -> Option<&Node<'s>> { - match self.stack.last() { - Some(func) => func.body.as_ref().unwrap(), - None => &self.tree, - }.nodes.last() + fn last(&self) -> Option<&Node> { + self.tree.nodes.last() + // match self.stack.last() { + // Some(func) => func.body.as_ref().unwrap(), + // None => &self.tree, + // }.nodes.last() } /// Skip tokens until the condition is met. @@ -395,10 +450,47 @@ impl<'s, T> Parser<'s, T> where T: Iterator<Item=Token<'s>> { self.advance(); } } +} + +/// A token iterator that that stops after the first unbalanced right paren. +pub struct FuncTokens<'s, T> where T: Iterator<Item=Token<'s>> { + tokens: T, + parens: u32, + unexpected_end: bool, +} - /// Gives a parsing error with a message. - fn err<R, S: Into<String>>(&self, message: S) -> ParseResult<R> { - Err(ParseError { message: message.into() }) +impl<'s, T> FuncTokens<'s, T> where T: Iterator<Item=Token<'s>> { + /// Create a new iterator operating over an existing one. + pub fn new(tokens: T) -> FuncTokens<'s, T> { + FuncTokens { + tokens, + parens: 0, + unexpected_end: false, + } + } +} + +impl<'s, T> Iterator for FuncTokens<'s, T> where T: Iterator<Item=Token<'s>> { + type Item = Token<'s>; + + fn next(&mut self) -> Option<Token<'s>> { + let token = self.tokens.next(); + match token { + Some(Token::RightBracket) if self.parens == 0 => None, + Some(Token::RightBracket) => { + self.parens -= 1; + token + }, + Some(Token::LeftBracket) => { + self.parens += 1; + token + } + None => { + self.unexpected_end = true; + None + } + token => token, + } } } @@ -407,6 +499,12 @@ pub struct ParseError { message: String, } +impl ParseError { + fn new<S: Into<String>>(message: S) -> ParseError { + ParseError { message: message.into() } + } +} + error_type! { err: ParseError, res: ParseResult, @@ -528,110 +626,190 @@ mod token_tests { #[cfg(test)] mod parse_tests { use super::*; - use Node::{Space as S, Word as W, Newline as N, Func as F}; + use crate::func::{Function, Scope, BodyTokens}; + use Node::{Space as S, Newline as N, Func as F}; + + #[allow(non_snake_case)] + fn W(s: &str) -> Node { Node::Word(s.to_owned()) } + + /// A testing function which just parses it's body into a syntax tree. + #[derive(Debug, PartialEq)] + struct TreeFn(SyntaxTree); + + impl Function for TreeFn { + fn parse(_: &FuncHeader, tokens: BodyTokens<'_>, scope: &Scope) + -> ParseResult<Self> where Self: Sized { + if let Some(tokens) = tokens { + Parser::with_scope(scope, tokens).parse().map(|tree| TreeFn(tree)) + } else { + Err(ParseError::new("expected body for tree fn")) + } + } + fn typeset(&self, _header: &FuncHeader) -> Option<Expression> { None } + } + + /// A testing function without a body. + #[derive(Debug, PartialEq)] + struct BodylessFn; + + impl Function for BodylessFn { + fn parse(_: &FuncHeader, tokens: BodyTokens<'_>, _: &Scope) -> ParseResult<Self> where Self: Sized { + if tokens.is_none() { + Ok(BodylessFn) + } else { + Err(ParseError::new("unexpected body for bodyless fn")) + } + } + fn typeset(&self, _header: &FuncHeader) -> Option<Expression> { None } + } + + /// Shortcut macro to create a function. + macro_rules! func { + (name => $name:expr, body => None $(,)*) => { + func!(@$name, Box::new(BodylessFn)) + }; + (name => $name:expr, body => $tree:expr $(,)*) => { + func!(@$name, Box::new(TreeFn($tree))) + }; + (@$name:expr, $body:expr) => { + FuncInvocation { + header: FuncHeader { + name: Ident::new($name).unwrap(), + args: vec![], + kwargs: HashMap::new(), + }, + body: $body, + } + } + } + + /// Shortcut macro to create a syntax tree. + /// Is `vec`-like and the elements are the nodes. + macro_rules! tree { + ($($x:expr),*) => ( + SyntaxTree { nodes: vec![$($x),*] } + ); + ($($x:expr,)*) => (tree![$($x),*]) + } /// Test if the source code parses into the syntax tree. fn test(src: &str, tree: SyntaxTree) { assert_eq!(Parser::new(Tokens::new(src)).parse().unwrap(), tree); } + /// Test with a scope containing function definitions. + fn test_scoped(scope: &Scope, src: &str, tree: SyntaxTree) { + assert_eq!(Parser::with_scope(scope, Tokens::new(src)).parse().unwrap(), tree); + } + /// Test if the source parses into the error. fn test_err(src: &str, err: &str) { assert_eq!(Parser::new(Tokens::new(src)).parse().unwrap_err().message, err); } - /// Short cut macro to create a syntax tree. - /// Is `vec`-like and the elements are the nodes. - macro_rules! tree { - ($($x:expr),*) => ( - SyntaxTree { nodes: vec![$($x),*] } - ); - ($($x:expr,)*) => (tree![$($x),*]) + /// Test if the source parses into the error. + fn test_err_scoped(scope: &Scope, src: &str, err: &str) { + assert_eq!(Parser::with_scope(scope, Tokens::new(src)).parse().unwrap_err().message, err); } /// Parse the basic cases. #[test] fn parse_base() { - test("", tree! {}); - test("Hello World!", tree! { W("Hello"), S, W("World"), W("!")}); + test("", tree! []); + test("Hello World!", tree! [ W("Hello"), S, W("World"), W("!") ]); } /// Test whether newlines generate the correct whitespace. #[test] fn parse_newlines_whitespace() { - test("Hello\nWorld", tree! { W("Hello"), S, W("World") }); - test("Hello \n World", tree! { W("Hello"), S, W("World") }); - test("Hello\n\nWorld", tree! { W("Hello"), N, W("World") }); - test("Hello \n\nWorld", tree! { W("Hello"), S, N, W("World") }); - test("Hello\n\n World", tree! { W("Hello"), N, S, W("World") }); - test("Hello \n \n \n World", tree! { W("Hello"), S, N, S, W("World") }); - test("Hello\n \n\n World", tree! { W("Hello"), S, N, S, W("World") }); + test("Hello\nWorld", tree! [ W("Hello"), S, W("World") ]); + test("Hello \n World", tree! [ W("Hello"), S, W("World") ]); + test("Hello\n\nWorld", tree! [ W("Hello"), N, W("World") ]); + test("Hello \n\nWorld", tree! [ W("Hello"), S, N, W("World") ]); + test("Hello\n\n World", tree! [ W("Hello"), N, S, W("World") ]); + test("Hello \n \n \n World", tree! [ W("Hello"), S, N, S, W("World") ]); + test("Hello\n \n\n World", tree! [ W("Hello"), S, N, S, W("World") ]); } /// Parse things dealing with functions. #[test] fn parse_functions() { - test("[test]", tree! { F(Function { name: "test", body: None }) }); - test("This is an [modifier][example] of a function invocation.", tree! { + let mut scope = Scope::new(); + let tree_fns = ["modifier", "func", "links", "bodyempty", "nested"]; + let bodyless_fns = ["test", "end"]; + for func in &bodyless_fns { scope.add::<BodylessFn>(func); } + for func in &tree_fns { scope.add::<TreeFn>(func); } + + test_scoped(&scope,"[test]", tree! [ F(func! { name => "test", body => None }) ]); + test_scoped(&scope, "This is an [modifier][example] of a function invocation.", tree! [ W("This"), S, W("is"), S, W("an"), S, - F(Function { name: "modifier", body: Some(tree! { W("example") }) }), S, + F(func! { name => "modifier", body => tree! [ W("example") ] }), S, W("of"), S, W("a"), S, W("function"), S, W("invocation"), W(".") - }); - test("[func][Hello][links][Here][end]", tree! { - F(Function { - name: "func", - body: Some(tree! { W("Hello") }), + ]); + test_scoped(&scope, "[func][Hello][links][Here][end]", tree! [ + F(func! { + name => "func", + body => tree! [ W("Hello") ], }), - F(Function { - name: "links", - body: Some(tree! { W("Here") }), + F(func! { + name => "links", + body => tree! [ W("Here") ], }), - F(Function { - name: "end", - body: None, + F(func! { + name => "end", + body => None, }), - }); - test("[bodyempty][]", tree! { - F(Function { - name: "bodyempty", - body: Some(tree! {}) + ]); + test_scoped(&scope, "[bodyempty][]", tree! [ + F(func! { + name => "bodyempty", + body => tree! [], }) - }); - test("[nested][[func][call]] outside", tree! { - F(Function { - name: "nested", - body: Some(tree! { F(Function { - name: "func", - body: Some(tree! { W("call") }), - }), }), + ]); + test_scoped(&scope, "[nested][[func][call]] outside", tree! [ + F(func! { + name => "nested", + body => tree! [ + F(func! { + name => "func", + body => tree! [ W("call") ], + }), + ], }), S, W("outside") - }); + ]); } /// Tests if the parser handles non-ASCII stuff correctly. #[test] fn parse_unicode() { - test("[lib_parse] βΊ.", tree! { - F(Function { - name: "lib_parse", - body: None + let mut scope = Scope::new(); + scope.add::<BodylessFn>("lib_parse"); + scope.add::<TreeFn>("func123"); + + test_scoped(&scope, "[lib_parse] βΊ.", tree! [ + F(func! { + name => "lib_parse", + body => None, }), S, W("βΊ"), W(".") - }); - test("[func123][Hello π!]", tree! { - F(Function { - name: "func123", - body: Some(tree! { W("Hello"), S, W("π"), W("!") }), + ]); + test_scoped(&scope, "[func123][Hello π!]", tree! [ + F(func! { + name => "func123", + body => tree! [ W("Hello"), S, W("π"), W("!") ], }) - }); + ]); } /// Tests whether errors get reported correctly. #[test] fn parse_errors() { + let mut scope = Scope::new(); + scope.add::<TreeFn>("hello"); + test_err("No functions here]", "unexpected closing bracket"); - test_err("[hello][world", "expected closing bracket"); + test_err_scoped(&scope, "[hello][world", "expected closing bracket"); test_err("[hello world", "expected closing bracket"); test_err("[ no-name][Why?]", "expected identifier"); } |
