summaryrefslogtreecommitdiff
path: root/src/parse
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2021-06-11 14:00:06 +0200
committerLaurenz <laurmaedje@gmail.com>2021-06-11 14:00:06 +0200
commit4dbd9285c91d59d527f4324df4aaf239ecb007ca (patch)
tree561a9a18a1eea6a2e598157f305667c4ea8e3e08 /src/parse
parent3330767c20e14a05176902a93dcefb08cb509173 (diff)
Basic enums
Diffstat (limited to 'src/parse')
-rw-r--r--src/parse/mod.rs71
-rw-r--r--src/parse/parser.rs22
-rw-r--r--src/parse/tokens.rs38
3 files changed, 79 insertions, 52 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 5ab5b2d8..41257668 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -25,25 +25,33 @@ pub fn parse(src: &str) -> Pass<Tree> {
/// Parse a syntax tree.
fn tree(p: &mut Parser) -> Tree {
- tree_while(p, |_| true)
+ tree_while(p, true, |_| true)
}
/// Parse a syntax tree that stays right of the column at the start of the next
/// non-whitespace token.
fn tree_indented(p: &mut Parser) -> Tree {
- p.skip_white();
+ p.eat_while(|t| match t {
+ Token::Space(n) => n == 0,
+ Token::LineComment(_) | Token::BlockComment(_) => true,
+ _ => false,
+ });
+
let column = p.column(p.next_start());
- tree_while(p, |p| match p.peek() {
+ tree_while(p, false, |p| match p.peek() {
Some(Token::Space(n)) if n >= 1 => p.column(p.next_end()) >= column,
_ => true,
})
}
/// Parse a syntax tree.
-fn tree_while(p: &mut Parser, mut f: impl FnMut(&mut Parser) -> bool) -> Tree {
- // We keep track of whether we are at the start of a block or paragraph
- // to know whether things like headings are allowed.
- let mut at_start = true;
+fn tree_while(
+ p: &mut Parser,
+ mut at_start: bool,
+ mut f: impl FnMut(&mut Parser) -> bool,
+) -> Tree {
+ // We use `at_start` to keep track of whether we are at the start of a line
+ // or template to know whether things like headings are allowed.
let mut tree = vec![];
while !p.eof() && f(p) {
if let Some(node) = node(p, &mut at_start) {
@@ -85,19 +93,13 @@ fn node(p: &mut Parser, at_start: &mut bool) -> Option<Node> {
Token::Star => Node::Strong(span),
Token::Underscore => Node::Emph(span),
Token::Raw(t) => raw(p, t),
- Token::Hashtag => {
- if *at_start {
- return Some(heading(p));
- } else {
- Node::Text(p.peek_src().into())
- }
- }
- Token::Hyph => {
- if *at_start {
- return Some(list(p));
- } else {
- Node::Text(p.peek_src().into())
- }
+ Token::Hashtag if *at_start => return Some(heading(p)),
+ Token::Hyph if *at_start => return Some(list_item(p)),
+ Token::Numbering(number) if *at_start => return Some(enum_item(p, number)),
+
+ // Line-based markup that is not currently at the start of the line.
+ Token::Hashtag | Token::Hyph | Token::Numbering(_) => {
+ Node::Text(p.peek_src().into())
}
// Hashtag + keyword / identifier.
@@ -118,19 +120,12 @@ fn node(p: &mut Parser, at_start: &mut bool) -> Option<Node> {
}
p.end_group();
- // Uneat spaces we might have eaten eagerly.
return expr.map(Node::Expr);
}
- // Block.
- Token::LeftBrace => {
- return Some(Node::Expr(block(p, false)));
- }
-
- // Template.
- Token::LeftBracket => {
- return Some(Node::Expr(template(p)));
- }
+ // Block and template.
+ Token::LeftBrace => return Some(Node::Expr(block(p, false))),
+ Token::LeftBracket => return Some(Node::Expr(template(p))),
// Comments.
Token::LineComment(_) | Token::BlockComment(_) => {
@@ -202,11 +197,19 @@ fn heading(p: &mut Parser) -> Node {
}
/// Parse a single list item.
-fn list(p: &mut Parser) -> Node {
+fn list_item(p: &mut Parser) -> Node {
let start = p.next_start();
p.assert(Token::Hyph);
let body = tree_indented(p);
- Node::List(ListNode { span: p.span(start), body })
+ Node::List(ListItem { span: p.span(start), body })
+}
+
+/// Parse a single enum item.
+fn enum_item(p: &mut Parser, number: Option<usize>) -> Node {
+ let start = p.next_start();
+ p.assert(Token::Numbering(number));
+ let body = tree_indented(p);
+ Node::Enum(EnumItem { span: p.span(start), number, body })
}
/// Parse an expression.
@@ -500,7 +503,9 @@ fn block(p: &mut Parser, scoping: bool) -> Expr {
}
}
p.end_group();
- p.skip_white();
+
+ // Forcefully skip over newlines since the group's contents can't.
+ p.eat_while(|t| matches!(t, Token::Space(_)));
}
let span = p.end_group();
Expr::Block(BlockExpr { span, exprs, scoping })
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 27346587..8ea80d68 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -242,6 +242,16 @@ impl<'s> Parser<'s> {
}
}
+ /// Consume tokens while the condition is true.
+ pub fn eat_while<F>(&mut self, mut f: F)
+ where
+ F: FnMut(Token<'s>) -> bool,
+ {
+ while self.peek().map_or(false, |t| f(t)) {
+ self.eat();
+ }
+ }
+
/// Consume the next token if the closure maps it a to `Some`-variant.
pub fn eat_map<T, F>(&mut self, f: F) -> Option<T>
where
@@ -278,18 +288,6 @@ impl<'s> Parser<'s> {
debug_assert_eq!(next, Some(t));
}
- /// Skip whitespace and comment tokens.
- pub fn skip_white(&mut self) {
- while matches!(
- self.peek(),
- Some(Token::Space(_)) |
- Some(Token::LineComment(_)) |
- Some(Token::BlockComment(_))
- ) {
- self.eat();
- }
- }
-
/// The index at which the last token ended.
///
/// Refers to the end of the last _non-whitespace_ token in code mode.
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index f3ca25d9..a496010e 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -102,6 +102,7 @@ impl<'s> Tokens<'s> {
'`' => self.raw(),
'$' => self.math(),
'-' => self.hyph(start),
+ c if c == '.' || c.is_ascii_digit() => self.numbering(start, c),
// Plain text.
_ => self.text(start),
@@ -185,11 +186,11 @@ impl<'s> Tokens<'s> {
// Whitespace.
c if c.is_whitespace() => true,
// Comments.
- '/' if self.s.check(|c| c == '/' || c == '*') => true,
+ '/' => true,
// Parentheses.
'[' | ']' | '{' | '}' => true,
// Markup.
- '#' | '~' | '*' | '_' | '-' | '`' | '$' => true,
+ '#' | '~' | '*' | '_' | '`' | '$' | '-' => true,
// Escaping.
'\\' => true,
// Just text.
@@ -274,6 +275,25 @@ impl<'s> Tokens<'s> {
}
}
+ fn numbering(&mut self, start: usize, c: char) -> Token<'s> {
+ let number = if c != '.' {
+ self.s.eat_while(|c| c.is_ascii_digit());
+ let read = self.s.eaten_from(start);
+ if !self.s.eat_if('.') {
+ return Token::Text(read);
+ }
+ read.parse().ok()
+ } else {
+ None
+ };
+
+ if self.s.check(|c| !c.is_whitespace()) {
+ return Token::Text(self.s.eaten_from(start));
+ }
+
+ Token::Numbering(number)
+ }
+
fn raw(&mut self) -> Token<'s> {
let mut backticks = 1;
while self.s.eat_if('`') {
@@ -357,12 +377,12 @@ impl<'s> Tokens<'s> {
}
}
- fn number(&mut self, start: usize, first: char) -> Token<'s> {
+ fn number(&mut self, start: usize, c: char) -> Token<'s> {
// Read the first part (integer or fractional depending on `first`).
self.s.eat_while(|c| c.is_ascii_digit());
- // Read the fractional part if not already done and present.
- if first != '.' && self.s.eat_if('.') {
+ // Read the fractional part if not already done.
+ if c != '.' && self.s.eat_if('.') {
self.s.eat_while(|c| c.is_ascii_digit());
}
@@ -654,7 +674,7 @@ mod tests {
// Test code symbols in text.
t!(Markup[" /"]: "a():\"b" => Text("a():\"b"));
- t!(Markup[" /"]: ";:,|/+" => Text(";:,|/+"));
+ t!(Markup[" /"]: ";:,|/+" => Text(";:,|"), Text("/+"));
t!(Markup[" /"]: "#-a" => Text("#"), Text("-"), Text("a"));
t!(Markup[" "]: "#123" => Text("#"), Text("123"));
@@ -707,10 +727,14 @@ mod tests {
t!(Markup: "_" => Underscore);
t!(Markup[""]: "###" => Hashtag, Hashtag, Hashtag);
t!(Markup["a1/"]: "# " => Hashtag, Space(0));
- t!(Markup["a1/"]: "- " => Hyph, Space(0));
t!(Markup: "~" => Tilde);
t!(Markup[" "]: r"\" => Backslash);
t!(Markup["a "]: r"a--" => Text("a"), HyphHyph);
+ t!(Markup["a1/"]: "- " => Hyph, Space(0));
+ t!(Markup[" "]: "." => Numbering(None));
+ t!(Markup[" "]: "1." => Numbering(Some(1)));
+ t!(Markup[" "]: "1.a" => Text("1."), Text("a"));
+ t!(Markup[" /"]: "a1." => Text("a1."));
}
#[test]