summaryrefslogtreecommitdiff
path: root/src/parse
diff options
context:
space:
mode:
Diffstat (limited to 'src/parse')
-rw-r--r--src/parse/mod.rs28
-rw-r--r--src/parse/parser.rs30
-rw-r--r--src/parse/resolve.rs12
-rw-r--r--src/parse/tokens.rs52
4 files changed, 59 insertions, 63 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 78e4f896..f9c0049f 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -16,8 +16,8 @@ use crate::syntax::ast::{Associativity, BinOp, UnOp};
use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind};
/// Parse a source file.
-pub fn parse(source: &str) -> Rc<GreenNode> {
- let mut p = Parser::new(source);
+pub fn parse(src: &str) -> Rc<GreenNode> {
+ let mut p = Parser::new(src);
markup(&mut p);
match p.finish().into_iter().next() {
Some(Green::Node(node)) => node,
@@ -93,16 +93,17 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
| NodeKind::Strong
| NodeKind::Linebreak
| NodeKind::Raw(_)
+ | NodeKind::Math(_)
| NodeKind::UnicodeEscape(_) => {
p.eat();
}
NodeKind::Eq if *at_start => heading(p),
- NodeKind::ListBullet if *at_start => list_node(p),
+ NodeKind::Minus if *at_start => list_node(p),
NodeKind::EnumNumbering(_) if *at_start => enum_node(p),
// Line-based markup that is not currently at the start of the line.
- NodeKind::Eq | NodeKind::ListBullet | NodeKind::EnumNumbering(_) => {
+ NodeKind::Eq | NodeKind::Minus | NodeKind::EnumNumbering(_) => {
p.convert(NodeKind::Text(p.peek_src().into()));
}
@@ -149,7 +150,7 @@ fn heading(p: &mut Parser) {
/// Parse a single list item.
fn list_node(p: &mut Parser) {
p.perform(NodeKind::List, |p| {
- p.eat_assert(&NodeKind::ListBullet);
+ p.eat_assert(&NodeKind::Minus);
let column = p.column(p.prev_end());
markup_indented(p, column);
});
@@ -193,10 +194,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
loop {
// Exclamation mark, parenthesis or bracket means this is a function
// call.
- if matches!(
- p.peek_direct(),
- Some(NodeKind::LeftParen | NodeKind::LeftBracket)
- ) {
+ if let Some(NodeKind::LeftParen | NodeKind::LeftBracket) = p.peek_direct() {
call(p, marker)?;
continue;
}
@@ -241,7 +239,6 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult {
match p.peek() {
// Things that start with an identifier.
Some(NodeKind::Ident(_)) => {
- // Start closure params.
let marker = p.marker();
p.eat();
@@ -364,9 +361,10 @@ enum CollectionKind {
/// Returns the length of the collection and whether the literal contained any
/// commas.
fn collection(p: &mut Parser) -> (CollectionKind, usize) {
- let mut items = 0;
let mut kind = CollectionKind::Positional;
+ let mut items = 0;
let mut can_group = true;
+ let mut error = false;
let mut missing_coma: Option<Marker> = None;
while !p.eof() {
@@ -393,12 +391,14 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) {
if p.eat_if(&NodeKind::Comma) {
can_group = false;
} else {
- missing_coma = Some(p.marker());
+ missing_coma = Some(p.trivia_start());
}
+ } else {
+ error = true;
}
}
- if can_group && items == 1 {
+ if error || (can_group && items == 1) {
kind = CollectionKind::Group;
}
@@ -467,7 +467,7 @@ fn params(p: &mut Parser, marker: Marker) {
NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => Ok(()),
NodeKind::Spread
if matches!(
- x.children().last().map(|x| x.kind()),
+ x.children().last().map(|child| child.kind()),
Some(&NodeKind::Ident(_))
) =>
{
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 5ebc2c17..1c4c2a5c 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -52,6 +52,17 @@ impl<'s> Parser<'s> {
Marker(self.children.len())
}
+ /// Create a markup right before the trailing trivia.
+ pub fn trivia_start(&self) -> Marker {
+ let count = self
+ .children
+ .iter()
+ .rev()
+ .take_while(|node| self.is_trivia(node.kind()))
+ .count();
+ Marker(self.children.len() - count)
+ }
+
/// Perform a subparse that wraps its result in a node with the given kind.
pub fn perform<F, T>(&mut self, kind: NodeKind, f: F) -> T
where
@@ -66,7 +77,7 @@ impl<'s> Parser<'s> {
// Trailing trivia should not be wrapped into the new node.
let idx = self.children.len();
self.children.push(Green::default());
- self.children.extend(children.drain(until ..));
+ self.children.extend(children.drain(until.0 ..));
self.children[idx] = GreenNode::with_children(kind, children).into();
} else {
self.children.push(GreenNode::with_children(kind, children).into());
@@ -238,7 +249,7 @@ impl<'s> Parser<'s> {
// Rescan the peeked token if the mode changed.
if rescan {
if group_mode == TokenMode::Code {
- self.children.truncate(self.trivia_start());
+ self.children.truncate(self.trivia_start().0);
}
self.tokens.jump(self.prev_end());
@@ -290,17 +301,6 @@ impl<'s> Parser<'s> {
}
}
- /// Find the index in the children list where trailing trivia starts.
- fn trivia_start(&self) -> usize {
- self.children.len()
- - self
- .children
- .iter()
- .rev()
- .take_while(|node| self.is_trivia(node.kind()))
- .count()
- }
-
/// Whether the active group must end at a newline.
fn stop_at_newline(&self) -> bool {
matches!(
@@ -350,7 +350,7 @@ impl Parser<'_> {
/// Add an error that the `thing` was expected at the end of the last
/// non-trivia token.
pub fn expected_at(&mut self, thing: &str) {
- Marker(self.trivia_start()).expected(self, thing);
+ self.trivia_start().expected(self, thing);
}
}
@@ -374,7 +374,7 @@ impl Marker {
/// with the given `kind`.
pub fn end(self, p: &mut Parser, kind: NodeKind) {
let until = p.trivia_start();
- let children = p.children.drain(self.0 .. until).collect();
+ let children = p.children.drain(self.0 .. until.0).collect();
p.children
.insert(self.0, GreenNode::with_children(kind, children).into());
}
diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs
index 6719f41d..e15ae339 100644
--- a/src/parse/resolve.rs
+++ b/src/parse/resolve.rs
@@ -1,5 +1,5 @@
use super::{is_ident, is_newline, Scanner};
-use crate::syntax::RawData;
+use crate::syntax::ast::RawNode;
use crate::util::EcoString;
/// Resolve all escape sequences in a string.
@@ -46,21 +46,19 @@ pub fn resolve_hex(sequence: &str) -> Option<char> {
}
/// Resolve the language tag and trims the raw text.
-pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawData {
+pub fn resolve_raw(column: usize, backticks: usize, text: &str) -> RawNode {
if backticks > 1 {
let (tag, inner) = split_at_lang_tag(text);
let (text, block) = trim_and_split_raw(column, inner);
- RawData {
+ RawNode {
lang: is_ident(tag).then(|| tag.into()),
text: text.into(),
- backticks,
block,
}
} else {
- RawData {
+ RawNode {
lang: None,
text: split_lines(text).join("\n").into(),
- backticks,
block: false,
}
}
@@ -181,7 +179,7 @@ mod tests {
#[track_caller]
fn test(
column: usize,
- backticks: u8,
+ backticks: usize,
raw: &str,
lang: Option<&str>,
text: &str,
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 1523cd64..96dfd9d1 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -5,7 +5,8 @@ use super::{
Scanner,
};
use crate::geom::{AngularUnit, LengthUnit};
-use crate::syntax::*;
+use crate::syntax::ast::{MathNode, RawNode};
+use crate::syntax::{ErrorPos, NodeKind};
use crate::util::EcoString;
/// An iterator over the tokens of a string of source code.
@@ -26,8 +27,8 @@ pub enum TokenMode {
impl<'s> Tokens<'s> {
/// Create a new token iterator with the given mode.
#[inline]
- pub fn new(source: &'s str, mode: TokenMode) -> Self {
- Self { s: Scanner::new(source), mode }
+ pub fn new(src: &'s str, mode: TokenMode) -> Self {
+ Self { s: Scanner::new(src), mode }
}
/// Get the current token mode.
@@ -254,7 +255,7 @@ impl<'s> Tokens<'s> {
}
}
c if c.is_whitespace() => NodeKind::Linebreak,
- _ => NodeKind::Text("\\".into()),
+ _ => NodeKind::Text('\\'.into()),
},
None => NodeKind::Linebreak,
}
@@ -281,7 +282,7 @@ impl<'s> Tokens<'s> {
NodeKind::EnDash
}
} else if self.s.check_or(true, char::is_whitespace) {
- NodeKind::ListBullet
+ NodeKind::Minus
} else {
NodeKind::Text("-".into())
}
@@ -310,16 +311,15 @@ impl<'s> Tokens<'s> {
let column = self.s.column(self.s.index() - 1);
let mut backticks = 1;
- while self.s.eat_if('`') && backticks < u8::MAX {
+ while self.s.eat_if('`') {
backticks += 1;
}
// Special case for empty inline block.
if backticks == 2 {
- return NodeKind::Raw(Rc::new(RawData {
+ return NodeKind::Raw(Rc::new(RawNode {
text: EcoString::new(),
lang: None,
- backticks: 1,
block: false,
}));
}
@@ -389,7 +389,7 @@ impl<'s> Tokens<'s> {
};
if terminated {
- NodeKind::Math(Rc::new(MathData {
+ NodeKind::Math(Rc::new(MathNode {
formula: self.s.get(start .. end).into(),
display,
}))
@@ -429,9 +429,7 @@ impl<'s> Tokens<'s> {
// Read the exponent.
if self.s.eat_if('e') || self.s.eat_if('E') {
- if !self.s.eat_if('+') {
- self.s.eat_if('-');
- }
+ let _ = self.s.eat_if('+') || self.s.eat_if('-');
self.s.eat_while(|c| c.is_ascii_digit());
}
@@ -483,6 +481,7 @@ impl<'s> Tokens<'s> {
false
}
}));
+
if self.s.eat_if('"') {
NodeKind::Str(string)
} else {
@@ -567,17 +566,16 @@ mod tests {
NodeKind::Error(pos, message.into())
}
- fn Raw(text: &str, lang: Option<&str>, backticks_left: u8, block: bool) -> NodeKind {
- NodeKind::Raw(Rc::new(RawData {
+ fn Raw(text: &str, lang: Option<&str>, block: bool) -> NodeKind {
+ NodeKind::Raw(Rc::new(RawNode {
text: text.into(),
lang: lang.map(Into::into),
- backticks: backticks_left,
block,
}))
}
fn Math(formula: &str, display: bool) -> NodeKind {
- NodeKind::Math(Rc::new(MathData { formula: formula.into(), display }))
+ NodeKind::Math(Rc::new(MathNode { formula: formula.into(), display }))
}
fn Str(string: &str) -> NodeKind {
@@ -655,13 +653,13 @@ mod tests {
];
// Test with each applicable suffix.
- for (block, mode, suffix, token) in suffixes {
+ for &(block, mode, suffix, ref token) in suffixes {
let src = $src;
#[allow(unused_variables)]
let blocks = BLOCKS;
$(let blocks = $blocks;)?
assert!(!blocks.contains(|c| !BLOCKS.contains(c)));
- if (mode.is_none() || mode == &Some($mode)) && blocks.contains(*block) {
+ if (mode.is_none() || mode == Some($mode)) && blocks.contains(block) {
t!(@$mode: format!("{}{}", src, suffix) => $($token,)* token);
}
}
@@ -790,7 +788,7 @@ mod tests {
t!(Markup: "~" => NonBreakingSpace);
t!(Markup[" "]: r"\" => Linebreak);
t!(Markup["a "]: r"a--" => Text("a"), EnDash);
- t!(Markup["a1/"]: "- " => ListBullet, Space(0));
+ t!(Markup["a1/"]: "- " => Minus, Space(0));
t!(Markup[" "]: "." => EnumNumbering(None));
t!(Markup[" "]: "1." => EnumNumbering(Some(1)));
t!(Markup[" "]: "1.a" => Text("1."), Text("a"));
@@ -867,22 +865,22 @@ mod tests {
#[test]
fn test_tokenize_raw_blocks() {
// Test basic raw block.
- t!(Markup: "``" => Raw("", None, 1, false));
- t!(Markup: "`raw`" => Raw("raw", None, 1, false));
+ t!(Markup: "``" => Raw("", None, false));
+ t!(Markup: "`raw`" => Raw("raw", None, false));
t!(Markup[""]: "`]" => Error(End, "expected 1 backtick"));
// Test special symbols in raw block.
- t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, false));
- t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, false), Error(End, "expected 1 backtick"));
+ t!(Markup: "`[brackets]`" => Raw("[brackets]", None, false));
+ t!(Markup[""]: r"`\`` " => Raw(r"\", None, false), Error(End, "expected 1 backtick"));
// Test separated closing backticks.
- t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, false));
+ t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), false));
// Test more backticks.
- t!(Markup: "``nope``" => Raw("", None, 1, false), Text("nope"), Raw("", None, 1, false));
- t!(Markup: "````🚀````" => Raw("", None, 4, false));
+ t!(Markup: "``nope``" => Raw("", None, false), Text("nope"), Raw("", None, false));
+ t!(Markup: "````🚀````" => Raw("", None, false));
t!(Markup[""]: "`````👩‍🚀````noend" => Error(End, "expected 5 backticks"));
- t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, false), Raw("", None, 1, false));
+ t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), false), Raw("", None, false));
}
#[test]