From 1e4cab393e55df8875c6303ebb7bde8f09f911c9 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Tue, 2 Nov 2021 12:06:22 +0100 Subject: Introduce incremental parsing --- src/parse/mod.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src/parse/mod.rs') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 10aaad23..1ab2fb15 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -25,6 +25,20 @@ pub fn parse(src: &str) -> Rc { } } +/// Parse a block. Returns `Some` if there was only one block. +pub fn parse_block(source: &str) -> Option> { + let mut p = Parser::new(source); + block(&mut p); + if p.eof() { + match p.finish().into_iter().next() { + Some(Green::Node(node)) => Some(node), + _ => unreachable!(), + } + } else { + None + } +} + /// Parse markup. fn markup(p: &mut Parser) { markup_while(p, true, &mut |_| true) -- cgit v1.2.3 From eba7fc34effbec3bcc6d5c40d831b1e15af77c4d Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sat, 6 Nov 2021 16:07:21 +0100 Subject: Incremental-safety based approach --- src/parse/mod.rs | 71 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 24 deletions(-) (limited to 'src/parse/mod.rs') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 1ab2fb15..5d845a55 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -25,18 +25,36 @@ pub fn parse(src: &str) -> Rc { } } -/// Parse a block. Returns `Some` if there was only one block. -pub fn parse_block(source: &str) -> Option> { +/// Parse an atomic primary. Returns `Some` if all of the input was consumed. +pub fn parse_atomic(source: &str, _: bool) -> Option> { let mut p = Parser::new(source); - block(&mut p); - if p.eof() { - match p.finish().into_iter().next() { - Some(Green::Node(node)) => Some(node), - _ => unreachable!(), - } - } else { - None + primary(&mut p, true).ok()?; + p.eject() +} + +/// Parse some markup. Returns `Some` if all of the input was consumed. +pub fn parse_markup(source: &str, _: bool) -> Option> { + let mut p = Parser::new(source); + markup(&mut p); + p.eject() +} + +/// Parse some markup without the topmost node. Returns `Some` if all of the +/// input was consumed. +pub fn parse_markup_elements(source: &str, mut at_start: bool) -> Option> { + let mut p = Parser::new(source); + while !p.eof() { + markup_node(&mut p, &mut at_start); } + p.eject() +} + +/// Parse some code. Returns `Some` if all of the input was consumed. +pub fn parse_code(source: &str, _: bool) -> Option> { + let mut p = Parser::new(source); + p.set_mode(TokenMode::Code); + expr_list(&mut p); + p.eject() } /// Parse markup. @@ -118,7 +136,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { // Line-based markup that is not currently at the start of the line. NodeKind::Eq | NodeKind::Minus | NodeKind::EnumNumbering(_) => { - p.convert(NodeKind::Text(p.peek_src().into())); + p.convert(NodeKind::TextInLine(p.peek_src().into())) } // Hashtag + keyword / identifier. @@ -196,7 +214,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { let marker = p.marker(); // Start the unary expression. - match p.peek().and_then(UnOp::from_token) { + match (!atomic).then(|| p.peek().and_then(UnOp::from_token)).flatten() { Some(op) => { p.eat(); let prec = op.precedence(); @@ -268,7 +286,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { } // Structures. - Some(NodeKind::LeftParen) => parenthesized(p), + Some(NodeKind::LeftParen) => parenthesized(p, atomic), Some(NodeKind::LeftBracket) => { template(p); Ok(()) @@ -329,7 +347,7 @@ fn literal(p: &mut Parser) -> bool { /// - Dictionary literal /// - Parenthesized expression /// - Parameter list of closure expression -fn parenthesized(p: &mut Parser) -> ParseResult { +fn parenthesized(p: &mut Parser, atomic: bool) -> ParseResult { let marker = p.marker(); p.start_group(Group::Paren); @@ -344,7 +362,7 @@ fn parenthesized(p: &mut Parser) -> ParseResult { } // Arrow means this is a closure's parameter list. - if p.at(&NodeKind::Arrow) { + if !atomic && p.at(&NodeKind::Arrow) { params(p, marker); p.eat_assert(&NodeKind::Arrow); return marker.perform(p, NodeKind::Closure, expr); @@ -507,18 +525,23 @@ fn template(p: &mut Parser) { fn block(p: &mut Parser) { p.perform(NodeKind::Block, |p| { p.start_group(Group::Brace); - while !p.eof() { - p.start_group(Group::Stmt); - if expr(p).is_ok() && !p.eof() { - p.expected_at("semicolon or line break"); - } - p.end_group(); + expr_list(p); + p.end_group(); + }); +} - // Forcefully skip over newlines since the group's contents can't. - p.eat_while(|t| matches!(t, NodeKind::Space(_))); +/// Parse a number of code expressions. +fn expr_list(p: &mut Parser) { + while !p.eof() { + p.start_group(Group::Stmt); + if expr(p).is_ok() && !p.eof() { + p.expected_at("semicolon or line break"); } p.end_group(); - }); + + // Forcefully skip over newlines since the group's contents can't. + p.eat_while(|t| matches!(t, NodeKind::Space(_))); + } } /// Parse a function call. -- cgit v1.2.3 From 0663758fbb42651a08bfcd46c27b5cdeab90fb75 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 7 Nov 2021 19:43:01 +0100 Subject: Tests - length updates - dealing with keywords and comments --- src/parse/mod.rs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'src/parse/mod.rs') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 5d845a55..02777350 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -17,7 +17,7 @@ use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind}; /// Parse a source file. pub fn parse(src: &str) -> Rc { - let mut p = Parser::new(src); + let mut p = Parser::new(src, TokenMode::Markup); markup(&mut p); match p.finish().into_iter().next() { Some(Green::Node(node)) => node, @@ -26,23 +26,23 @@ pub fn parse(src: &str) -> Rc { } /// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic(source: &str, _: bool) -> Option> { - let mut p = Parser::new(source); +pub fn parse_atomic(src: &str, _: bool) -> Option> { + let mut p = Parser::new(src, TokenMode::Code); primary(&mut p, true).ok()?; p.eject() } /// Parse some markup. Returns `Some` if all of the input was consumed. -pub fn parse_markup(source: &str, _: bool) -> Option> { - let mut p = Parser::new(source); +pub fn parse_markup(src: &str, _: bool) -> Option> { + let mut p = Parser::new(src, TokenMode::Markup); markup(&mut p); p.eject() } /// Parse some markup without the topmost node. Returns `Some` if all of the /// input was consumed. -pub fn parse_markup_elements(source: &str, mut at_start: bool) -> Option> { - let mut p = Parser::new(source); +pub fn parse_markup_elements(src: &str, mut at_start: bool) -> Option> { + let mut p = Parser::new(src, TokenMode::Markup); while !p.eof() { markup_node(&mut p, &mut at_start); } @@ -50,9 +50,8 @@ pub fn parse_markup_elements(source: &str, mut at_start: bool) -> Option Option> { - let mut p = Parser::new(source); - p.set_mode(TokenMode::Code); +pub fn parse_code(src: &str, _: bool) -> Option> { + let mut p = Parser::new(src, TokenMode::Code); expr_list(&mut p); p.eject() } -- cgit v1.2.3 From 9141cba6a9db6ae3106e39d92508cb91c390049b Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 8 Nov 2021 12:01:35 +0100 Subject: Deal with the effects of keywords --- src/parse/mod.rs | 43 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) (limited to 'src/parse/mod.rs') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 02777350..afeb34f1 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -29,7 +29,7 @@ pub fn parse(src: &str) -> Rc { pub fn parse_atomic(src: &str, _: bool) -> Option> { let mut p = Parser::new(src, TokenMode::Code); primary(&mut p, true).ok()?; - p.eject() + p.eject_partial() } /// Parse some markup. Returns `Some` if all of the input was consumed. @@ -49,10 +49,32 @@ pub fn parse_markup_elements(src: &str, mut at_start: bool) -> Option p.eject() } -/// Parse some code. Returns `Some` if all of the input was consumed. -pub fn parse_code(src: &str, _: bool) -> Option> { - let mut p = Parser::new(src, TokenMode::Code); - expr_list(&mut p); +/// Parse a template literal. Returns `Some` if all of the input was consumed. +pub fn parse_template(source: &str, _: bool) -> Option> { + let mut p = Parser::new(source, TokenMode::Code); + if !matches!(p.peek(), Some(NodeKind::LeftBracket)) { + return None; + } + + template(&mut p); + p.eject() +} + +/// Parse a code block. Returns `Some` if all of the input was consumed. +pub fn parse_block(source: &str, _: bool) -> Option> { + let mut p = Parser::new(source, TokenMode::Code); + if !matches!(p.peek(), Some(NodeKind::LeftBrace)) { + return None; + } + + block(&mut p); + p.eject() +} + +/// Parse a comment. Returns `Some` if all of the input was consumed. +pub fn parse_comment(source: &str, _: bool) -> Option> { + let mut p = Parser::new(source, TokenMode::Code); + comment(&mut p).ok()?; p.eject() } @@ -742,3 +764,14 @@ fn body(p: &mut Parser) -> ParseResult { } Ok(()) } + +/// Parse a comment. +fn comment(p: &mut Parser) -> ParseResult { + match p.peek() { + Some(NodeKind::LineComment | NodeKind::BlockComment) => { + p.eat(); + Ok(()) + } + _ => Err(()), + } +} -- cgit v1.2.3 From 7a631d8b09bbffa8c7d90a1038d986876370ea7a Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Tue, 9 Nov 2021 13:07:55 +0100 Subject: Simplify node mode management --- src/parse/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/parse/mod.rs') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index afeb34f1..1f1ac266 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -772,6 +772,6 @@ fn comment(p: &mut Parser) -> ParseResult { p.eat(); Ok(()) } - _ => Err(()), + _ => Err(ParseError), } } -- cgit v1.2.3 From 3162c6a83a910f34d6ed7e966c11b7e7b5bd4088 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Wed, 10 Nov 2021 20:41:10 +0100 Subject: Comments and neighbors --- src/parse/mod.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'src/parse/mod.rs') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 1f1ac266..f2fae5f2 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -26,14 +26,14 @@ pub fn parse(src: &str) -> Rc { } /// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic(src: &str, _: bool) -> Option> { +pub fn parse_atomic(src: &str, _: bool) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Code); primary(&mut p, true).ok()?; p.eject_partial() } /// Parse some markup. Returns `Some` if all of the input was consumed. -pub fn parse_markup(src: &str, _: bool) -> Option> { +pub fn parse_markup(src: &str, _: bool) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Markup); markup(&mut p); p.eject() @@ -41,7 +41,10 @@ pub fn parse_markup(src: &str, _: bool) -> Option> { /// Parse some markup without the topmost node. Returns `Some` if all of the /// input was consumed. -pub fn parse_markup_elements(src: &str, mut at_start: bool) -> Option> { +pub fn parse_markup_elements( + src: &str, + mut at_start: bool, +) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Markup); while !p.eof() { markup_node(&mut p, &mut at_start); @@ -50,7 +53,7 @@ pub fn parse_markup_elements(src: &str, mut at_start: bool) -> Option } /// Parse a template literal. Returns `Some` if all of the input was consumed. -pub fn parse_template(source: &str, _: bool) -> Option> { +pub fn parse_template(source: &str, _: bool) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); if !matches!(p.peek(), Some(NodeKind::LeftBracket)) { return None; @@ -61,7 +64,7 @@ pub fn parse_template(source: &str, _: bool) -> Option> { } /// Parse a code block. Returns `Some` if all of the input was consumed. -pub fn parse_block(source: &str, _: bool) -> Option> { +pub fn parse_block(source: &str, _: bool) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); if !matches!(p.peek(), Some(NodeKind::LeftBrace)) { return None; @@ -72,7 +75,7 @@ pub fn parse_block(source: &str, _: bool) -> Option> { } /// Parse a comment. Returns `Some` if all of the input was consumed. -pub fn parse_comment(source: &str, _: bool) -> Option> { +pub fn parse_comment(source: &str, _: bool) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); comment(&mut p).ok()?; p.eject() -- cgit v1.2.3 From fdb9d0743d73c278136b9254286fdc4be71c42a5 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Thu, 18 Nov 2021 16:21:45 +0100 Subject: Refactoring and bugfixes --- src/parse/mod.rs | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) (limited to 'src/parse/mod.rs') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index f2fae5f2..f1f1e8b6 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -32,6 +32,13 @@ pub fn parse_atomic(src: &str, _: bool) -> Option<(Vec, bool)> { p.eject_partial() } +/// Parse an atomic primary. Returns `Some` if all of the input was consumed. +pub fn parse_atomic_markup(src: &str, _: bool) -> Option<(Vec, bool)> { + let mut p = Parser::new(src, TokenMode::Markup); + markup_expr(&mut p); + p.eject_partial() +} + /// Parse some markup. Returns `Some` if all of the input was consumed. pub fn parse_markup(src: &str, _: bool) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Markup); @@ -171,17 +178,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { | NodeKind::While | NodeKind::For | NodeKind::Import - | NodeKind::Include => { - let stmt = matches!(token, NodeKind::Let | NodeKind::Set | NodeKind::Import); - let group = if stmt { Group::Stmt } else { Group::Expr }; - - p.start_group(group); - let res = expr_prec(p, true, 0); - if stmt && res.is_ok() && !p.eof() { - p.expected_at("semicolon or line break"); - } - p.end_group(); - } + | NodeKind::Include => markup_expr(p), // Block and template. NodeKind::LeftBrace => block(p), @@ -222,6 +219,21 @@ fn enum_node(p: &mut Parser) { }); } +/// Parse an expression within markup mode. +fn markup_expr(p: &mut Parser) { + if let Some(token) = p.peek() { + let stmt = matches!(token, NodeKind::Let | NodeKind::Set | NodeKind::Import); + let group = if stmt { Group::Stmt } else { Group::Expr }; + + p.start_group(group); + let res = expr_prec(p, true, 0); + if stmt && res.is_ok() && !p.eof() { + p.expected_at("semicolon or line break"); + } + p.end_group(); + } +} + /// Parse an expression. fn expr(p: &mut Parser) -> ParseResult { expr_prec(p, false, 0) -- cgit v1.2.3 From edc686d7384470068858e16f2926cf50f31b2c90 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sat, 27 Nov 2021 16:10:22 +0100 Subject: Make incremental parsing simpler and move it somewhere else --- src/parse/mod.rs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/parse/mod.rs') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index f1f1e8b6..2c421374 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -1,10 +1,12 @@ //! Parsing and tokenization. +mod incremental; mod parser; mod resolve; mod scanner; mod tokens; +pub use incremental::*; pub use parser::*; pub use resolve::*; pub use scanner::*; -- cgit v1.2.3 From e05eb5fda5d1dfeef168b6fc071b20fdbcce2dcd Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 28 Nov 2021 18:18:45 +0100 Subject: Code Review: Parser, I can't let you do this --- src/parse/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/parse/mod.rs') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 2c421374..2c5afb6b 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -64,7 +64,7 @@ pub fn parse_markup_elements( /// Parse a template literal. Returns `Some` if all of the input was consumed. pub fn parse_template(source: &str, _: bool) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); - if !matches!(p.peek(), Some(NodeKind::LeftBracket)) { + if !p.at(&NodeKind::LeftBracket) { return None; } @@ -75,7 +75,7 @@ pub fn parse_template(source: &str, _: bool) -> Option<(Vec, bool)> { /// Parse a code block. Returns `Some` if all of the input was consumed. pub fn parse_block(source: &str, _: bool) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); - if !matches!(p.peek(), Some(NodeKind::LeftBrace)) { + if !p.at(&NodeKind::LeftBrace) { return None; } @@ -252,14 +252,14 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { let marker = p.marker(); // Start the unary expression. - match (!atomic).then(|| p.peek().and_then(UnOp::from_token)).flatten() { - Some(op) => { + match p.peek().and_then(UnOp::from_token) { + Some(op) if !atomic => { p.eat(); let prec = op.precedence(); expr_prec(p, atomic, prec)?; marker.end(p, NodeKind::Unary); } - None => primary(p, atomic)?, + _ => primary(p, atomic)?, }; loop { -- cgit v1.2.3 From 5f114e18eb76a1937941b2ea64842b908c9ad89e Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 2 Jan 2022 00:46:19 +0100 Subject: Added a test framework for incremental parsing Fix several errors: - Indented markup is now reparsed right - All end group errors will now fail a reparse - Rightmost errors will always fail a reparse --- src/parse/mod.rs | 55 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 18 deletions(-) (limited to 'src/parse/mod.rs') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 2c5afb6b..f4826730 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -16,6 +16,7 @@ use std::rc::Rc; use crate::syntax::ast::{Associativity, BinOp, UnOp}; use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind}; +use crate::util::EcoString; /// Parse a source file. pub fn parse(src: &str) -> Rc { @@ -28,23 +29,27 @@ pub fn parse(src: &str) -> Rc { } /// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic(src: &str, _: bool) -> Option<(Vec, bool)> { +pub fn parse_atomic(src: &str, _: bool, _: usize) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Code); primary(&mut p, true).ok()?; p.eject_partial() } /// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic_markup(src: &str, _: bool) -> Option<(Vec, bool)> { +pub fn parse_atomic_markup(src: &str, _: bool, _: usize) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Markup); markup_expr(&mut p); p.eject_partial() } /// Parse some markup. Returns `Some` if all of the input was consumed. -pub fn parse_markup(src: &str, _: bool) -> Option<(Vec, bool)> { +pub fn parse_markup(src: &str, _: bool, column: usize) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Markup); - markup(&mut p); + if column == 0 { + markup(&mut p); + } else { + markup_indented(&mut p, column); + } p.eject() } @@ -53,8 +58,10 @@ pub fn parse_markup(src: &str, _: bool) -> Option<(Vec, bool)> { pub fn parse_markup_elements( src: &str, mut at_start: bool, + column: usize, ) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Markup); + p.offset(column); while !p.eof() { markup_node(&mut p, &mut at_start); } @@ -62,7 +69,7 @@ pub fn parse_markup_elements( } /// Parse a template literal. Returns `Some` if all of the input was consumed. -pub fn parse_template(source: &str, _: bool) -> Option<(Vec, bool)> { +pub fn parse_template(source: &str, _: bool, _: usize) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); if !p.at(&NodeKind::LeftBracket) { return None; @@ -73,7 +80,7 @@ pub fn parse_template(source: &str, _: bool) -> Option<(Vec, bool)> { } /// Parse a code block. Returns `Some` if all of the input was consumed. -pub fn parse_block(source: &str, _: bool) -> Option<(Vec, bool)> { +pub fn parse_block(source: &str, _: bool, _: usize) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); if !p.at(&NodeKind::LeftBrace) { return None; @@ -84,7 +91,7 @@ pub fn parse_block(source: &str, _: bool) -> Option<(Vec, bool)> { } /// Parse a comment. Returns `Some` if all of the input was consumed. -pub fn parse_comment(source: &str, _: bool) -> Option<(Vec, bool)> { +pub fn parse_comment(source: &str, _: bool, _: usize) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); comment(&mut p).ok()?; p.eject() @@ -92,7 +99,7 @@ pub fn parse_comment(source: &str, _: bool) -> Option<(Vec, bool)> { /// Parse markup. fn markup(p: &mut Parser) { - markup_while(p, true, &mut |_| true) + markup_while(p, true, 0, &mut |_| true) } /// Parse markup that stays right of the given column. @@ -103,8 +110,8 @@ fn markup_indented(p: &mut Parser, column: usize) { _ => false, }); - markup_while(p, false, &mut |p| match p.peek() { - Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column, + markup_while(p, false, column, &mut |p| match p.peek() { + Some(NodeKind::Space(n)) if *n >= 1 => p.clean_column(p.current_end()) >= column, _ => true, }) } @@ -113,11 +120,11 @@ fn markup_indented(p: &mut Parser, column: usize) { /// /// If `at_start` is true, things like headings that may only appear at the /// beginning of a line or template are allowed. -fn markup_while(p: &mut Parser, mut at_start: bool, f: &mut F) +fn markup_while(p: &mut Parser, mut at_start: bool, column: usize, f: &mut F) where F: FnMut(&mut Parser) -> bool, { - p.perform(NodeKind::Markup, |p| { + p.perform(NodeKind::Markup(column), |p| { while !p.eof() && f(p) { markup_node(p, &mut at_start); } @@ -205,20 +212,32 @@ fn heading(p: &mut Parser) { /// Parse a single list item. fn list_node(p: &mut Parser) { - p.perform(NodeKind::List, |p| { - p.eat_assert(&NodeKind::Minus); + let marker = p.marker(); + let src: EcoString = p.peek_src().into(); + p.eat_assert(&NodeKind::Minus); + + if p.peek().map_or(true, |kind| kind.is_whitespace()) { let column = p.column(p.prev_end()); markup_indented(p, column); - }); + marker.end(p, NodeKind::List); + } else { + marker.convert(p, NodeKind::TextInLine(src)); + } } /// Parse a single enum item. fn enum_node(p: &mut Parser) { - p.perform(NodeKind::Enum, |p| { - p.eat(); + let marker = p.marker(); + let src: EcoString = p.peek_src().into(); + p.eat(); + + if p.peek().map_or(true, |kind| kind.is_whitespace()) { let column = p.column(p.prev_end()); markup_indented(p, column); - }); + marker.end(p, NodeKind::Enum); + } else { + marker.convert(p, NodeKind::TextInLine(src)); + } } /// Parse an expression within markup mode. -- cgit v1.2.3 From c994cfa7d814e3909682b19322867ed5c676c453 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 3 Jan 2022 23:18:21 +0100 Subject: Code Review: Your parsers were so preoccupied with whether they could --- src/parse/mod.rs | 147 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 87 insertions(+), 60 deletions(-) (limited to 'src/parse/mod.rs') diff --git a/src/parse/mod.rs b/src/parse/mod.rs index f4826730..a9752645 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -29,72 +29,102 @@ pub fn parse(src: &str) -> Rc { } /// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic(src: &str, _: bool, _: usize) -> Option<(Vec, bool)> { - let mut p = Parser::new(src, TokenMode::Code); +pub fn parse_atomic( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); primary(&mut p, true).ok()?; - p.eject_partial() + p.consume_unterminated() } /// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic_markup(src: &str, _: bool, _: usize) -> Option<(Vec, bool)> { - let mut p = Parser::new(src, TokenMode::Markup); +pub fn parse_atomic_markup( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); markup_expr(&mut p); - p.eject_partial() + p.consume_unterminated() } /// Parse some markup. Returns `Some` if all of the input was consumed. -pub fn parse_markup(src: &str, _: bool, column: usize) -> Option<(Vec, bool)> { - let mut p = Parser::new(src, TokenMode::Markup); - if column == 0 { +pub fn parse_markup( + prefix: &str, + src: &str, + _: bool, + min_column: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); + if min_column == 0 { markup(&mut p); } else { - markup_indented(&mut p, column); + markup_indented(&mut p, min_column); } - p.eject() + p.consume() } /// Parse some markup without the topmost node. Returns `Some` if all of the /// input was consumed. pub fn parse_markup_elements( + prefix: &str, src: &str, mut at_start: bool, - column: usize, + _: usize, ) -> Option<(Vec, bool)> { - let mut p = Parser::new(src, TokenMode::Markup); - p.offset(column); + let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); while !p.eof() { markup_node(&mut p, &mut at_start); } - p.eject() + p.consume() } /// Parse a template literal. Returns `Some` if all of the input was consumed. -pub fn parse_template(source: &str, _: bool, _: usize) -> Option<(Vec, bool)> { - let mut p = Parser::new(source, TokenMode::Code); +pub fn parse_template( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); if !p.at(&NodeKind::LeftBracket) { return None; } template(&mut p); - p.eject() + p.consume() } /// Parse a code block. Returns `Some` if all of the input was consumed. -pub fn parse_block(source: &str, _: bool, _: usize) -> Option<(Vec, bool)> { - let mut p = Parser::new(source, TokenMode::Code); +pub fn parse_block( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); if !p.at(&NodeKind::LeftBrace) { return None; } block(&mut p); - p.eject() + p.consume() } /// Parse a comment. Returns `Some` if all of the input was consumed. -pub fn parse_comment(source: &str, _: bool, _: usize) -> Option<(Vec, bool)> { - let mut p = Parser::new(source, TokenMode::Code); +pub fn parse_comment( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); comment(&mut p).ok()?; - p.eject() + p.consume() } /// Parse markup. @@ -111,7 +141,7 @@ fn markup_indented(p: &mut Parser, column: usize) { }); markup_while(p, false, column, &mut |p| match p.peek() { - Some(NodeKind::Space(n)) if *n >= 1 => p.clean_column(p.current_end()) >= column, + Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column, _ => true, }) } @@ -170,14 +200,9 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { p.eat(); } - NodeKind::Eq if *at_start => heading(p), - NodeKind::Minus if *at_start => list_node(p), - NodeKind::EnumNumbering(_) if *at_start => enum_node(p), - - // Line-based markup that is not currently at the start of the line. - NodeKind::Eq | NodeKind::Minus | NodeKind::EnumNumbering(_) => { - p.convert(NodeKind::TextInLine(p.peek_src().into())) - } + NodeKind::Eq => heading(p, *at_start), + NodeKind::Minus => list_node(p, *at_start), + NodeKind::EnumNumbering(_) => enum_node(p, *at_start), // Hashtag + keyword / identifier. NodeKind::Ident(_) @@ -201,42 +226,49 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { } /// Parse a heading. -fn heading(p: &mut Parser) { - p.perform(NodeKind::Heading, |p| { - p.eat_assert(&NodeKind::Eq); - while p.eat_if(&NodeKind::Eq) {} +fn heading(p: &mut Parser, at_start: bool) { + let marker = p.marker(); + let current_start = p.current_start(); + p.eat_assert(&NodeKind::Eq); + while p.eat_if(&NodeKind::Eq) {} + + if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) { let column = p.column(p.prev_end()); markup_indented(p, column); - }); + marker.end(p, NodeKind::Heading); + } else { + let text = p.get(current_start .. p.prev_end()).into(); + marker.convert(p, NodeKind::TextInLine(text)); + } } /// Parse a single list item. -fn list_node(p: &mut Parser) { +fn list_node(p: &mut Parser, at_start: bool) { let marker = p.marker(); - let src: EcoString = p.peek_src().into(); + let text: EcoString = p.peek_src().into(); p.eat_assert(&NodeKind::Minus); - if p.peek().map_or(true, |kind| kind.is_whitespace()) { + if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) { let column = p.column(p.prev_end()); markup_indented(p, column); marker.end(p, NodeKind::List); } else { - marker.convert(p, NodeKind::TextInLine(src)); + marker.convert(p, NodeKind::TextInLine(text)); } } /// Parse a single enum item. -fn enum_node(p: &mut Parser) { +fn enum_node(p: &mut Parser, at_start: bool) { let marker = p.marker(); - let src: EcoString = p.peek_src().into(); + let text: EcoString = p.peek_src().into(); p.eat(); - if p.peek().map_or(true, |kind| kind.is_whitespace()) { + if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) { let column = p.column(p.prev_end()); markup_indented(p, column); marker.end(p, NodeKind::Enum); } else { - marker.convert(p, NodeKind::TextInLine(src)); + marker.convert(p, NodeKind::TextInLine(text)); } } @@ -582,23 +614,18 @@ fn template(p: &mut Parser) { fn block(p: &mut Parser) { p.perform(NodeKind::Block, |p| { p.start_group(Group::Brace); - expr_list(p); - p.end_group(); - }); -} + while !p.eof() { + p.start_group(Group::Stmt); + if expr(p).is_ok() && !p.eof() { + p.expected_at("semicolon or line break"); + } + p.end_group(); -/// Parse a number of code expressions. -fn expr_list(p: &mut Parser) { - while !p.eof() { - p.start_group(Group::Stmt); - if expr(p).is_ok() && !p.eof() { - p.expected_at("semicolon or line break"); + // Forcefully skip over newlines since the group's contents can't. + p.eat_while(|t| matches!(t, NodeKind::Space(_))); } p.end_group(); - - // Forcefully skip over newlines since the group's contents can't. - p.eat_while(|t| matches!(t, NodeKind::Space(_))); - } + }); } /// Parse a function call. -- cgit v1.2.3