diff options
| author | Laurenz <laurmaedje@gmail.com> | 2021-06-11 14:00:06 +0200 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2021-06-11 14:00:06 +0200 |
| commit | 4dbd9285c91d59d527f4324df4aaf239ecb007ca (patch) | |
| tree | 561a9a18a1eea6a2e598157f305667c4ea8e3e08 /src/parse | |
| parent | 3330767c20e14a05176902a93dcefb08cb509173 (diff) | |
Basic enums
Diffstat (limited to 'src/parse')
| -rw-r--r-- | src/parse/mod.rs | 71 | ||||
| -rw-r--r-- | src/parse/parser.rs | 22 | ||||
| -rw-r--r-- | src/parse/tokens.rs | 38 |
3 files changed, 79 insertions, 52 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 5ab5b2d8..41257668 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -25,25 +25,33 @@ pub fn parse(src: &str) -> Pass<Tree> { /// Parse a syntax tree. fn tree(p: &mut Parser) -> Tree { - tree_while(p, |_| true) + tree_while(p, true, |_| true) } /// Parse a syntax tree that stays right of the column at the start of the next /// non-whitespace token. fn tree_indented(p: &mut Parser) -> Tree { - p.skip_white(); + p.eat_while(|t| match t { + Token::Space(n) => n == 0, + Token::LineComment(_) | Token::BlockComment(_) => true, + _ => false, + }); + let column = p.column(p.next_start()); - tree_while(p, |p| match p.peek() { + tree_while(p, false, |p| match p.peek() { Some(Token::Space(n)) if n >= 1 => p.column(p.next_end()) >= column, _ => true, }) } /// Parse a syntax tree. -fn tree_while(p: &mut Parser, mut f: impl FnMut(&mut Parser) -> bool) -> Tree { - // We keep track of whether we are at the start of a block or paragraph - // to know whether things like headings are allowed. - let mut at_start = true; +fn tree_while( + p: &mut Parser, + mut at_start: bool, + mut f: impl FnMut(&mut Parser) -> bool, +) -> Tree { + // We use `at_start` to keep track of whether we are at the start of a line + // or template to know whether things like headings are allowed. let mut tree = vec![]; while !p.eof() && f(p) { if let Some(node) = node(p, &mut at_start) { @@ -85,19 +93,13 @@ fn node(p: &mut Parser, at_start: &mut bool) -> Option<Node> { Token::Star => Node::Strong(span), Token::Underscore => Node::Emph(span), Token::Raw(t) => raw(p, t), - Token::Hashtag => { - if *at_start { - return Some(heading(p)); - } else { - Node::Text(p.peek_src().into()) - } - } - Token::Hyph => { - if *at_start { - return Some(list(p)); - } else { - Node::Text(p.peek_src().into()) - } + Token::Hashtag if *at_start => return Some(heading(p)), + Token::Hyph if *at_start => return Some(list_item(p)), + Token::Numbering(number) if *at_start => return Some(enum_item(p, number)), + + // Line-based markup that is not currently at the start of the line. + Token::Hashtag | Token::Hyph | Token::Numbering(_) => { + Node::Text(p.peek_src().into()) } // Hashtag + keyword / identifier. @@ -118,19 +120,12 @@ fn node(p: &mut Parser, at_start: &mut bool) -> Option<Node> { } p.end_group(); - // Uneat spaces we might have eaten eagerly. return expr.map(Node::Expr); } - // Block. - Token::LeftBrace => { - return Some(Node::Expr(block(p, false))); - } - - // Template. - Token::LeftBracket => { - return Some(Node::Expr(template(p))); - } + // Block and template. + Token::LeftBrace => return Some(Node::Expr(block(p, false))), + Token::LeftBracket => return Some(Node::Expr(template(p))), // Comments. Token::LineComment(_) | Token::BlockComment(_) => { @@ -202,11 +197,19 @@ fn heading(p: &mut Parser) -> Node { } /// Parse a single list item. -fn list(p: &mut Parser) -> Node { +fn list_item(p: &mut Parser) -> Node { let start = p.next_start(); p.assert(Token::Hyph); let body = tree_indented(p); - Node::List(ListNode { span: p.span(start), body }) + Node::List(ListItem { span: p.span(start), body }) +} + +/// Parse a single enum item. +fn enum_item(p: &mut Parser, number: Option<usize>) -> Node { + let start = p.next_start(); + p.assert(Token::Numbering(number)); + let body = tree_indented(p); + Node::Enum(EnumItem { span: p.span(start), number, body }) } /// Parse an expression. @@ -500,7 +503,9 @@ fn block(p: &mut Parser, scoping: bool) -> Expr { } } p.end_group(); - p.skip_white(); + + // Forcefully skip over newlines since the group's contents can't. + p.eat_while(|t| matches!(t, Token::Space(_))); } let span = p.end_group(); Expr::Block(BlockExpr { span, exprs, scoping }) diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 27346587..8ea80d68 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -242,6 +242,16 @@ impl<'s> Parser<'s> { } } + /// Consume tokens while the condition is true. + pub fn eat_while<F>(&mut self, mut f: F) + where + F: FnMut(Token<'s>) -> bool, + { + while self.peek().map_or(false, |t| f(t)) { + self.eat(); + } + } + /// Consume the next token if the closure maps it a to `Some`-variant. pub fn eat_map<T, F>(&mut self, f: F) -> Option<T> where @@ -278,18 +288,6 @@ impl<'s> Parser<'s> { debug_assert_eq!(next, Some(t)); } - /// Skip whitespace and comment tokens. - pub fn skip_white(&mut self) { - while matches!( - self.peek(), - Some(Token::Space(_)) | - Some(Token::LineComment(_)) | - Some(Token::BlockComment(_)) - ) { - self.eat(); - } - } - /// The index at which the last token ended. /// /// Refers to the end of the last _non-whitespace_ token in code mode. diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index f3ca25d9..a496010e 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -102,6 +102,7 @@ impl<'s> Tokens<'s> { '`' => self.raw(), '$' => self.math(), '-' => self.hyph(start), + c if c == '.' || c.is_ascii_digit() => self.numbering(start, c), // Plain text. _ => self.text(start), @@ -185,11 +186,11 @@ impl<'s> Tokens<'s> { // Whitespace. c if c.is_whitespace() => true, // Comments. - '/' if self.s.check(|c| c == '/' || c == '*') => true, + '/' => true, // Parentheses. '[' | ']' | '{' | '}' => true, // Markup. - '#' | '~' | '*' | '_' | '-' | '`' | '$' => true, + '#' | '~' | '*' | '_' | '`' | '$' | '-' => true, // Escaping. '\\' => true, // Just text. @@ -274,6 +275,25 @@ impl<'s> Tokens<'s> { } } + fn numbering(&mut self, start: usize, c: char) -> Token<'s> { + let number = if c != '.' { + self.s.eat_while(|c| c.is_ascii_digit()); + let read = self.s.eaten_from(start); + if !self.s.eat_if('.') { + return Token::Text(read); + } + read.parse().ok() + } else { + None + }; + + if self.s.check(|c| !c.is_whitespace()) { + return Token::Text(self.s.eaten_from(start)); + } + + Token::Numbering(number) + } + fn raw(&mut self) -> Token<'s> { let mut backticks = 1; while self.s.eat_if('`') { @@ -357,12 +377,12 @@ impl<'s> Tokens<'s> { } } - fn number(&mut self, start: usize, first: char) -> Token<'s> { + fn number(&mut self, start: usize, c: char) -> Token<'s> { // Read the first part (integer or fractional depending on `first`). self.s.eat_while(|c| c.is_ascii_digit()); - // Read the fractional part if not already done and present. - if first != '.' && self.s.eat_if('.') { + // Read the fractional part if not already done. + if c != '.' && self.s.eat_if('.') { self.s.eat_while(|c| c.is_ascii_digit()); } @@ -654,7 +674,7 @@ mod tests { // Test code symbols in text. t!(Markup[" /"]: "a():\"b" => Text("a():\"b")); - t!(Markup[" /"]: ";:,|/+" => Text(";:,|/+")); + t!(Markup[" /"]: ";:,|/+" => Text(";:,|"), Text("/+")); t!(Markup[" /"]: "#-a" => Text("#"), Text("-"), Text("a")); t!(Markup[" "]: "#123" => Text("#"), Text("123")); @@ -707,10 +727,14 @@ mod tests { t!(Markup: "_" => Underscore); t!(Markup[""]: "###" => Hashtag, Hashtag, Hashtag); t!(Markup["a1/"]: "# " => Hashtag, Space(0)); - t!(Markup["a1/"]: "- " => Hyph, Space(0)); t!(Markup: "~" => Tilde); t!(Markup[" "]: r"\" => Backslash); t!(Markup["a "]: r"a--" => Text("a"), HyphHyph); + t!(Markup["a1/"]: "- " => Hyph, Space(0)); + t!(Markup[" "]: "." => Numbering(None)); + t!(Markup[" "]: "1." => Numbering(Some(1))); + t!(Markup[" "]: "1.a" => Text("1."), Text("a")); + t!(Markup[" /"]: "a1." => Text("a1.")); } #[test] |
