diff options
| author | Laurenz <laurmaedje@gmail.com> | 2021-11-08 12:13:32 +0100 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2021-11-08 13:06:11 +0100 |
| commit | 38c5c362419c5eee7a4fdc0b43d3a9dfb339a6d2 (patch) | |
| tree | 51faa3f6bbc56f75636823adeea135ed76e1b33b /src/parse | |
| parent | 75fffc1f9b6ef8bf258b2b1845a4ba74a0f5f2c1 (diff) | |
Final touches
Diffstat (limited to 'src/parse')
| -rw-r--r-- | src/parse/mod.rs | 28 | ||||
| -rw-r--r-- | src/parse/parser.rs | 30 | ||||
| -rw-r--r-- | src/parse/resolve.rs | 12 | ||||
| -rw-r--r-- | src/parse/tokens.rs | 52 |
4 files changed, 59 insertions, 63 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 78e4f896..f9c0049f 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -16,8 +16,8 @@ use crate::syntax::ast::{Associativity, BinOp, UnOp}; use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind}; /// Parse a source file. -pub fn parse(source: &str) -> Rc<GreenNode> { - let mut p = Parser::new(source); +pub fn parse(src: &str) -> Rc<GreenNode> { + let mut p = Parser::new(src); markup(&mut p); match p.finish().into_iter().next() { Some(Green::Node(node)) => node, @@ -93,16 +93,17 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { | NodeKind::Strong | NodeKind::Linebreak | NodeKind::Raw(_) + | NodeKind::Math(_) | NodeKind::UnicodeEscape(_) => { p.eat(); } NodeKind::Eq if *at_start => heading(p), - NodeKind::ListBullet if *at_start => list_node(p), + NodeKind::Minus if *at_start => list_node(p), NodeKind::EnumNumbering(_) if *at_start => enum_node(p), // Line-based markup that is not currently at the start of the line. - NodeKind::Eq | NodeKind::ListBullet | NodeKind::EnumNumbering(_) => { + NodeKind::Eq | NodeKind::Minus | NodeKind::EnumNumbering(_) => { p.convert(NodeKind::Text(p.peek_src().into())); } @@ -149,7 +150,7 @@ fn heading(p: &mut Parser) { /// Parse a single list item. fn list_node(p: &mut Parser) { p.perform(NodeKind::List, |p| { - p.eat_assert(&NodeKind::ListBullet); + p.eat_assert(&NodeKind::Minus); let column = p.column(p.prev_end()); markup_indented(p, column); }); @@ -193,10 +194,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { loop { // Exclamation mark, parenthesis or bracket means this is a function // call. - if matches!( - p.peek_direct(), - Some(NodeKind::LeftParen | NodeKind::LeftBracket) - ) { + if let Some(NodeKind::LeftParen | NodeKind::LeftBracket) = p.peek_direct() { call(p, marker)?; continue; } @@ -241,7 +239,6 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { match p.peek() { // Things that start with an identifier. Some(NodeKind::Ident(_)) => { - // Start closure params. let marker = p.marker(); p.eat(); @@ -364,9 +361,10 @@ enum CollectionKind { /// Returns the length of the collection and whether the literal contained any /// commas. fn collection(p: &mut Parser) -> (CollectionKind, usize) { - let mut items = 0; let mut kind = CollectionKind::Positional; + let mut items = 0; let mut can_group = true; + let mut error = false; let mut missing_coma: Option<Marker> = None; while !p.eof() { @@ -393,12 +391,14 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { if p.eat_if(&NodeKind::Comma) { can_group = false; } else { - missing_coma = Some(p.marker()); + missing_coma = Some(p.trivia_start()); } + } else { + error = true; } } - if can_group && items == 1 { + if error || (can_group && items == 1) { kind = CollectionKind::Group; } @@ -467,7 +467,7 @@ fn params(p: &mut Parser, marker: Marker) { NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => Ok(()), NodeKind::Spread if matches!( - x.children().last().map(|x| x.kind()), + x.children().last().map(|child| child.kind()), Some(&NodeKind::Ident(_)) ) => { diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 5ebc2c17..1c4c2a5c 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -52,6 +52,17 @@ impl<'s> Parser<'s> { Marker(self.children.len()) } + /// Create a markup right before the trailing trivia. + pub fn trivia_start(&self) -> Marker { + let count = self + .children + .iter() + .rev() + .take_while(|node| self.is_trivia(node.kind())) + .count(); + Marker(self.children.len() - count) + } + /// Perform a subparse that wraps its result in a node with the given kind. pub fn perform<F, T>(&mut self, kind: NodeKind, f: F) -> T where @@ -66,7 +77,7 @@ impl<'s> Parser<'s> { // Trailing trivia should not be wrapped into the new node. let idx = self.children.len(); self.children.push(Green::default()); - self.children.extend(children.drain(until ..)); + self.children.extend(children.drain(until.0 ..)); self.children[idx] = GreenNode::with_children(kind, children).into(); } else { self.children.push(GreenNode::with_children(kind, children).into()); @@ -238,7 +249,7 @@ impl<'s> Parser<'s> { // Rescan the peeked token if the mode changed. if rescan { if group_mode == TokenMode::Code { - self.children.truncate(self.trivia_start()); + self.children.truncate(self.trivia_start().0); } self.tokens.jump(self.prev_end()); @@ -290,17 +301,6 @@ impl<'s> Parser<'s> { } } - /// Find the index in the children list where trailing trivia starts. - fn trivia_start(&self) -> usize { - self.children.len() - - self - .children - .iter() - .rev() - .take_while(|node| self.is_trivia(node.kind())) - .count() - } - /// Whether the active group must end at a newline. fn stop_at_newline(&self) -> bool { matches!( @@ -350,7 +350,7 @@ impl Parser<'_> { /// Add an error that the `thing` was expected at the end of the last /// non-trivia token. pub fn expected_at(&mut self, thing: &str) { - Marker(self.trivia_start()).expected(self, thing); + self.trivia_start().expected(self, thing); } } @@ -374,7 +374,7 @@ impl Marker { /// with the given `kind`. pub fn end(self, p: &mut Parser, kind: NodeKind) { let until = p.trivia_start(); - let children = p.children.drain(self.0 .. until).collect(); + let children = p.children.drain(self.0 .. until.0).collect(); p.children .insert(self.0, GreenNode::with_children(kind, children).into()); } diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index 6719f41d..e15ae339 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -1,5 +1,5 @@ use super::{is_ident, is_newline, Scanner}; -use crate::syntax::RawData; +use crate::syntax::ast::RawNode; use crate::util::EcoString; /// Resolve all escape sequences in a string. @@ -46,21 +46,19 @@ pub fn resolve_hex(sequence: &str) -> Option<char> { } /// Resolve the language tag and trims the raw text. -pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawData { +pub fn resolve_raw(column: usize, backticks: usize, text: &str) -> RawNode { if backticks > 1 { let (tag, inner) = split_at_lang_tag(text); let (text, block) = trim_and_split_raw(column, inner); - RawData { + RawNode { lang: is_ident(tag).then(|| tag.into()), text: text.into(), - backticks, block, } } else { - RawData { + RawNode { lang: None, text: split_lines(text).join("\n").into(), - backticks, block: false, } } @@ -181,7 +179,7 @@ mod tests { #[track_caller] fn test( column: usize, - backticks: u8, + backticks: usize, raw: &str, lang: Option<&str>, text: &str, diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 1523cd64..96dfd9d1 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -5,7 +5,8 @@ use super::{ Scanner, }; use crate::geom::{AngularUnit, LengthUnit}; -use crate::syntax::*; +use crate::syntax::ast::{MathNode, RawNode}; +use crate::syntax::{ErrorPos, NodeKind}; use crate::util::EcoString; /// An iterator over the tokens of a string of source code. @@ -26,8 +27,8 @@ pub enum TokenMode { impl<'s> Tokens<'s> { /// Create a new token iterator with the given mode. #[inline] - pub fn new(source: &'s str, mode: TokenMode) -> Self { - Self { s: Scanner::new(source), mode } + pub fn new(src: &'s str, mode: TokenMode) -> Self { + Self { s: Scanner::new(src), mode } } /// Get the current token mode. @@ -254,7 +255,7 @@ impl<'s> Tokens<'s> { } } c if c.is_whitespace() => NodeKind::Linebreak, - _ => NodeKind::Text("\\".into()), + _ => NodeKind::Text('\\'.into()), }, None => NodeKind::Linebreak, } @@ -281,7 +282,7 @@ impl<'s> Tokens<'s> { NodeKind::EnDash } } else if self.s.check_or(true, char::is_whitespace) { - NodeKind::ListBullet + NodeKind::Minus } else { NodeKind::Text("-".into()) } @@ -310,16 +311,15 @@ impl<'s> Tokens<'s> { let column = self.s.column(self.s.index() - 1); let mut backticks = 1; - while self.s.eat_if('`') && backticks < u8::MAX { + while self.s.eat_if('`') { backticks += 1; } // Special case for empty inline block. if backticks == 2 { - return NodeKind::Raw(Rc::new(RawData { + return NodeKind::Raw(Rc::new(RawNode { text: EcoString::new(), lang: None, - backticks: 1, block: false, })); } @@ -389,7 +389,7 @@ impl<'s> Tokens<'s> { }; if terminated { - NodeKind::Math(Rc::new(MathData { + NodeKind::Math(Rc::new(MathNode { formula: self.s.get(start .. end).into(), display, })) @@ -429,9 +429,7 @@ impl<'s> Tokens<'s> { // Read the exponent. if self.s.eat_if('e') || self.s.eat_if('E') { - if !self.s.eat_if('+') { - self.s.eat_if('-'); - } + let _ = self.s.eat_if('+') || self.s.eat_if('-'); self.s.eat_while(|c| c.is_ascii_digit()); } @@ -483,6 +481,7 @@ impl<'s> Tokens<'s> { false } })); + if self.s.eat_if('"') { NodeKind::Str(string) } else { @@ -567,17 +566,16 @@ mod tests { NodeKind::Error(pos, message.into()) } - fn Raw(text: &str, lang: Option<&str>, backticks_left: u8, block: bool) -> NodeKind { - NodeKind::Raw(Rc::new(RawData { + fn Raw(text: &str, lang: Option<&str>, block: bool) -> NodeKind { + NodeKind::Raw(Rc::new(RawNode { text: text.into(), lang: lang.map(Into::into), - backticks: backticks_left, block, })) } fn Math(formula: &str, display: bool) -> NodeKind { - NodeKind::Math(Rc::new(MathData { formula: formula.into(), display })) + NodeKind::Math(Rc::new(MathNode { formula: formula.into(), display })) } fn Str(string: &str) -> NodeKind { @@ -655,13 +653,13 @@ mod tests { ]; // Test with each applicable suffix. - for (block, mode, suffix, token) in suffixes { + for &(block, mode, suffix, ref token) in suffixes { let src = $src; #[allow(unused_variables)] let blocks = BLOCKS; $(let blocks = $blocks;)? assert!(!blocks.contains(|c| !BLOCKS.contains(c))); - if (mode.is_none() || mode == &Some($mode)) && blocks.contains(*block) { + if (mode.is_none() || mode == Some($mode)) && blocks.contains(block) { t!(@$mode: format!("{}{}", src, suffix) => $($token,)* token); } } @@ -790,7 +788,7 @@ mod tests { t!(Markup: "~" => NonBreakingSpace); t!(Markup[" "]: r"\" => Linebreak); t!(Markup["a "]: r"a--" => Text("a"), EnDash); - t!(Markup["a1/"]: "- " => ListBullet, Space(0)); + t!(Markup["a1/"]: "- " => Minus, Space(0)); t!(Markup[" "]: "." => EnumNumbering(None)); t!(Markup[" "]: "1." => EnumNumbering(Some(1))); t!(Markup[" "]: "1.a" => Text("1."), Text("a")); @@ -867,22 +865,22 @@ mod tests { #[test] fn test_tokenize_raw_blocks() { // Test basic raw block. - t!(Markup: "``" => Raw("", None, 1, false)); - t!(Markup: "`raw`" => Raw("raw", None, 1, false)); + t!(Markup: "``" => Raw("", None, false)); + t!(Markup: "`raw`" => Raw("raw", None, false)); t!(Markup[""]: "`]" => Error(End, "expected 1 backtick")); // Test special symbols in raw block. - t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, false)); - t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, false), Error(End, "expected 1 backtick")); + t!(Markup: "`[brackets]`" => Raw("[brackets]", None, false)); + t!(Markup[""]: r"`\`` " => Raw(r"\", None, false), Error(End, "expected 1 backtick")); // Test separated closing backticks. - t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, false)); + t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), false)); // Test more backticks. - t!(Markup: "``nope``" => Raw("", None, 1, false), Text("nope"), Raw("", None, 1, false)); - t!(Markup: "````🚀````" => Raw("", None, 4, false)); + t!(Markup: "``nope``" => Raw("", None, false), Text("nope"), Raw("", None, false)); + t!(Markup: "````🚀````" => Raw("", None, false)); t!(Markup[""]: "`````👩🚀````noend" => Error(End, "expected 5 backticks")); - t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, false), Raw("", None, 1, false)); + t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), false), Raw("", None, false)); } #[test] |
