summaryrefslogtreecommitdiff
path: root/src/parse
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2022-10-04 13:42:49 +0200
committerLaurenz <laurmaedje@gmail.com>2022-10-04 13:45:16 +0200
commit5a8534a395b500a25cbc46ee15ec031c8231de59 (patch)
treea525c447c3243fe315c7ed91923e158df131809b /src/parse
parent7ef6cb31df0fe1ebec99b1077053a586a349f530 (diff)
Parse basic math syntax
Diffstat (limited to 'src/parse')
-rw-r--r--src/parse/mod.rs152
-rw-r--r--src/parse/parser.rs34
-rw-r--r--src/parse/tokens.rs142
3 files changed, 234 insertions, 94 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index ed8bc5ce..7eb7343b 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -11,7 +11,7 @@ pub use tokens::*;
use std::collections::HashSet;
-use crate::syntax::ast::{Associativity, BinOp, UnOp};
+use crate::syntax::ast::{Assoc, BinOp, UnOp};
use crate::syntax::{NodeKind, SpanPos, SyntaxNode};
use crate::util::EcoString;
@@ -22,11 +22,22 @@ pub fn parse(text: &str) -> SyntaxNode {
p.finish().into_iter().next().unwrap()
}
+/// Parse math directly, only used for syntax highlighting.
+pub fn parse_math(text: &str) -> SyntaxNode {
+ let mut p = Parser::new(text, TokenMode::Math);
+ p.perform(NodeKind::Math, |p| {
+ while !p.eof() {
+ math_node(p);
+ }
+ });
+ p.finish().into_iter().next().unwrap()
+}
+
/// Parse code directly, only used for syntax highlighting.
-pub fn parse_code(text: &str) -> Vec<SyntaxNode> {
+pub fn parse_code(text: &str) -> SyntaxNode {
let mut p = Parser::new(text, TokenMode::Code);
- code(&mut p);
- p.finish()
+ p.perform(NodeKind::CodeBlock, code);
+ p.finish().into_iter().next().unwrap()
}
/// Reparse a code block.
@@ -240,20 +251,20 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
// Text and markup.
NodeKind::Text(_)
| NodeKind::Linebreak { .. }
- | NodeKind::NonBreakingSpace
- | NodeKind::Shy
- | NodeKind::EnDash
- | NodeKind::EmDash
- | NodeKind::Ellipsis
+ | NodeKind::Tilde
+ | NodeKind::HyphQuest
+ | NodeKind::Hyph2
+ | NodeKind::Hyph3
+ | NodeKind::Dot3
| NodeKind::Quote { .. }
| NodeKind::Escape(_)
| NodeKind::Link(_)
| NodeKind::Raw(_)
- | NodeKind::Math(_)
| NodeKind::Label(_)
- | NodeKind::Ref(_) => {
- p.eat();
- }
+ | NodeKind::Ref(_) => p.eat(),
+
+ // Math.
+ NodeKind::Dollar => math(p),
// Strong, emph, heading.
NodeKind::Star => strong(p),
@@ -405,6 +416,111 @@ fn markup_expr(p: &mut Parser) {
p.end_group();
}
+/// Parse math.
+fn math(p: &mut Parser) {
+ p.perform(NodeKind::Math, |p| {
+ p.start_group(Group::Math);
+ while !p.eof() {
+ math_node(p);
+ }
+ p.end_group();
+ });
+}
+
+/// Parse a math node.
+fn math_node(p: &mut Parser) {
+ math_node_prec(p, 0, None)
+}
+
+/// Parse a math node with operators having at least the minimum precedence.
+fn math_node_prec(p: &mut Parser, min_prec: usize, stop: Option<NodeKind>) {
+ let marker = p.marker();
+ math_primary(p);
+
+ loop {
+ let (kind, mut prec, assoc, stop) = match p.peek() {
+ v if v == stop.as_ref() => break,
+ Some(NodeKind::Underscore) => {
+ (NodeKind::Script, 2, Assoc::Right, Some(NodeKind::Hat))
+ }
+ Some(NodeKind::Hat) => (
+ NodeKind::Script,
+ 2,
+ Assoc::Right,
+ Some(NodeKind::Underscore),
+ ),
+ Some(NodeKind::Slash) => (NodeKind::Frac, 1, Assoc::Left, None),
+ _ => break,
+ };
+
+ if prec < min_prec {
+ break;
+ }
+
+ match assoc {
+ Assoc::Left => prec += 1,
+ Assoc::Right => {}
+ }
+
+ p.eat();
+ math_node_prec(p, prec, stop);
+
+ // Allow up to two different scripts. We do not risk encountering the
+ // previous script kind again here due to right-associativity.
+ if p.eat_if(NodeKind::Underscore) || p.eat_if(NodeKind::Hat) {
+ math_node_prec(p, prec, None);
+ }
+
+ marker.end(p, kind);
+ }
+}
+
+/// Parse a primary math node.
+fn math_primary(p: &mut Parser) {
+ let token = match p.peek() {
+ Some(t) => t,
+ None => return,
+ };
+
+ match token {
+ // Spaces, atoms and expressions.
+ NodeKind::Space { .. }
+ | NodeKind::Linebreak
+ | NodeKind::Escape(_)
+ | NodeKind::Atom(_)
+ | NodeKind::Ident(_) => p.eat(),
+
+ // Groups.
+ NodeKind::LeftParen => group(p, Group::Paren),
+ NodeKind::LeftBracket => group(p, Group::Bracket),
+ NodeKind::LeftBrace => group(p, Group::Brace),
+
+ // Alignment indactor.
+ NodeKind::Amp => align(p),
+
+ _ => p.unexpected(),
+ }
+}
+
+/// Parse grouped math.
+fn group(p: &mut Parser, group: Group) {
+ p.perform(NodeKind::Math, |p| {
+ p.start_group(group);
+ while !p.eof() {
+ math_node(p);
+ }
+ p.end_group();
+ })
+}
+
+/// Parse an alignment indicator.
+fn align(p: &mut Parser) {
+ p.perform(NodeKind::Align, |p| {
+ p.assert(NodeKind::Amp);
+ while p.eat_if(NodeKind::Amp) {}
+ })
+}
+
/// Parse an expression.
fn expr(p: &mut Parser) -> ParseResult {
expr_prec(p, false, 0)
@@ -434,7 +550,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
loop {
// Parenthesis or bracket means this is a function call.
if let Some(NodeKind::LeftParen | NodeKind::LeftBracket) = p.peek_direct() {
- marker.perform(p, NodeKind::FuncCall, |p| args(p))?;
+ marker.perform(p, NodeKind::FuncCall, args)?;
continue;
}
@@ -446,7 +562,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
if p.eat_if(NodeKind::Dot) {
ident(p)?;
if let Some(NodeKind::LeftParen | NodeKind::LeftBracket) = p.peek_direct() {
- marker.perform(p, NodeKind::MethodCall, |p| args(p))?;
+ marker.perform(p, NodeKind::MethodCall, args)?;
} else {
marker.end(p, NodeKind::FieldAccess);
}
@@ -474,9 +590,9 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
p.eat();
- match op.associativity() {
- Associativity::Left => prec += 1,
- Associativity::Right => {}
+ match op.assoc() {
+ Assoc::Left => prec += 1,
+ Assoc::Right => {}
}
marker.perform(p, NodeKind::BinaryExpr, |p| expr_prec(p, atomic, prec))?;
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index f8ea9614..12dd324b 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -92,14 +92,14 @@ impl<'s> Parser<'s> {
let until = self.trivia_start();
let mut children = mem::replace(&mut self.children, prev);
- if self.tokens.mode() == TokenMode::Code {
+ if self.tokens.mode() == TokenMode::Markup {
+ self.children.push(InnerNode::with_children(kind, children).into());
+ } else {
// Trailing trivia should not be wrapped into the new node.
let idx = self.children.len();
self.children.push(SyntaxNode::default());
self.children.extend(children.drain(until.0 ..));
self.children[idx] = InnerNode::with_children(kind, children).into();
- } else {
- self.children.push(InnerNode::with_children(kind, children).into());
}
output
@@ -122,7 +122,7 @@ impl<'s> Parser<'s> {
self.prev_end = self.tokens.cursor();
self.bump();
- if self.tokens.mode() == TokenMode::Code {
+ if self.tokens.mode() != TokenMode::Markup {
// Skip whitespace and comments.
while self.current.as_ref().map_or(false, |x| self.is_trivia(x)) {
self.bump();
@@ -232,8 +232,17 @@ impl<'s> Parser<'s> {
pub fn start_group(&mut self, kind: Group) {
self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() });
self.tokens.set_mode(match kind {
- Group::Bracket | Group::Strong | Group::Emph => TokenMode::Markup,
- Group::Brace | Group::Paren | Group::Expr | Group::Imports => TokenMode::Code,
+ Group::Strong | Group::Emph => TokenMode::Markup,
+ Group::Bracket => match self.tokens.mode() {
+ TokenMode::Math => TokenMode::Math,
+ _ => TokenMode::Markup,
+ },
+ Group::Brace | Group::Paren => match self.tokens.mode() {
+ TokenMode::Math => TokenMode::Math,
+ _ => TokenMode::Code,
+ },
+ Group::Math => TokenMode::Math,
+ Group::Expr | Group::Imports => TokenMode::Code,
});
match kind {
@@ -242,6 +251,7 @@ impl<'s> Parser<'s> {
Group::Paren => self.assert(NodeKind::LeftParen),
Group::Strong => self.assert(NodeKind::Star),
Group::Emph => self.assert(NodeKind::Underscore),
+ Group::Math => self.assert(NodeKind::Dollar),
Group::Expr => self.repeek(),
Group::Imports => self.repeek(),
}
@@ -260,11 +270,12 @@ impl<'s> Parser<'s> {
// Eat the end delimiter if there is one.
if let Some((end, required)) = match group.kind {
- Group::Paren => Some((NodeKind::RightParen, true)),
- Group::Bracket => Some((NodeKind::RightBracket, true)),
Group::Brace => Some((NodeKind::RightBrace, true)),
+ Group::Bracket => Some((NodeKind::RightBracket, true)),
+ Group::Paren => Some((NodeKind::RightParen, true)),
Group::Strong => Some((NodeKind::Star, true)),
Group::Emph => Some((NodeKind::Underscore, true)),
+ Group::Math => Some((NodeKind::Dollar, true)),
Group::Expr => Some((NodeKind::Semicolon, false)),
Group::Imports => None,
} {
@@ -290,7 +301,7 @@ impl<'s> Parser<'s> {
// Rescan the peeked token if the mode changed.
if rescan {
let mut target = self.prev_end();
- if group_mode == TokenMode::Code {
+ if group_mode != TokenMode::Markup {
let start = self.trivia_start().0;
target = self.current_start
- self.children[start ..].iter().map(SyntaxNode::len).sum::<usize>();
@@ -330,6 +341,7 @@ impl<'s> Parser<'s> {
Some(NodeKind::RightParen) => self.inside(Group::Paren),
Some(NodeKind::Star) => self.inside(Group::Strong),
Some(NodeKind::Underscore) => self.inside(Group::Emph),
+ Some(NodeKind::Dollar) => self.inside(Group::Math),
Some(NodeKind::Semicolon) => self.inside(Group::Expr),
Some(NodeKind::From) => self.inside(Group::Imports),
Some(NodeKind::Space { newlines }) => self.space_ends_group(*newlines),
@@ -472,7 +484,7 @@ impl Marker {
}
// Don't expose trivia in code.
- if p.tokens.mode() == TokenMode::Code && child.kind().is_trivia() {
+ if p.tokens.mode() != TokenMode::Markup && child.kind().is_trivia() {
continue;
}
@@ -515,6 +527,8 @@ pub enum Group {
Strong,
/// A group surrounded with underscore: `_..._`.
Emph,
+ /// A group surrounded by dollar signs: `$...$`.
+ Math,
/// A group ended by a semicolon or a line break: `;`, `\n`.
Expr,
/// A group for import items, ended by a semicolon, line break or `from`.
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index f6d4b0e8..d495afa0 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -5,7 +5,7 @@ use unscanny::Scanner;
use super::resolve::{resolve_hex, resolve_raw, resolve_string};
use crate::geom::{AngleUnit, LengthUnit};
-use crate::syntax::ast::{MathNode, RawNode, Unit};
+use crate::syntax::ast::{RawNode, Unit};
use crate::syntax::{NodeKind, SpanPos};
use crate::util::EcoString;
@@ -27,6 +27,8 @@ pub struct Tokens<'s> {
pub enum TokenMode {
/// Text and markup.
Markup,
+ /// Math atoms, operators, etc.
+ Math,
/// Keywords, literals and operators.
Code,
}
@@ -103,23 +105,16 @@ impl<'s> Iterator for Tokens<'s> {
let start = self.s.cursor();
let c = self.s.eat()?;
Some(match c {
- // Comments.
+ // Trivia.
'/' if self.s.eat_if('/') => self.line_comment(),
'/' if self.s.eat_if('*') => self.block_comment(),
'*' if self.s.eat_if('/') => NodeKind::Unknown("*/".into()),
-
- // Blocks.
- '{' => NodeKind::LeftBrace,
- '}' => NodeKind::RightBrace,
- '[' => NodeKind::LeftBracket,
- ']' => NodeKind::RightBracket,
-
- // Whitespace.
c if c.is_whitespace() => self.whitespace(c),
// Other things.
_ => match self.mode {
TokenMode::Markup => self.markup(start, c),
+ TokenMode::Math => self.math(start, c),
TokenMode::Code => self.code(start, c),
},
})
@@ -195,16 +190,23 @@ impl<'s> Tokens<'s> {
#[inline]
fn markup(&mut self, start: usize, c: char) -> NodeKind {
match c {
+ // Blocks.
+ '{' => NodeKind::LeftBrace,
+ '}' => NodeKind::RightBrace,
+ '[' => NodeKind::LeftBracket,
+ ']' => NodeKind::RightBracket,
+
// Escape sequences.
'\\' => self.backslash(),
// Single-char things.
- '~' => NodeKind::NonBreakingSpace,
- '.' if self.s.eat_if("..") => NodeKind::Ellipsis,
+ '~' => NodeKind::Tilde,
+ '.' if self.s.eat_if("..") => NodeKind::Dot3,
'\'' => NodeKind::Quote { double: false },
'"' => NodeKind::Quote { double: true },
'*' if !self.in_word() => NodeKind::Star,
'_' if !self.in_word() => NodeKind::Underscore,
+ '$' => NodeKind::Dollar,
'=' => NodeKind::Eq,
'+' => NodeKind::Plus,
'/' => NodeKind::Slash,
@@ -217,7 +219,6 @@ impl<'s> Tokens<'s> {
self.link(start)
}
'`' => self.raw(),
- '$' => self.math(),
c if c.is_ascii_digit() => self.numbering(start),
'<' => self.label(),
'@' => self.reference(start),
@@ -313,12 +314,12 @@ impl<'s> Tokens<'s> {
fn hyph(&mut self) -> NodeKind {
if self.s.eat_if('-') {
if self.s.eat_if('-') {
- NodeKind::EmDash
+ NodeKind::Hyph3
} else {
- NodeKind::EnDash
+ NodeKind::Hyph2
}
} else if self.s.eat_if('?') {
- NodeKind::Shy
+ NodeKind::HyphQuest
} else {
NodeKind::Minus
}
@@ -395,29 +396,6 @@ impl<'s> Tokens<'s> {
}
}
- fn math(&mut self) -> NodeKind {
- let mut escaped = false;
- let formula = self.s.eat_until(|c| {
- if c == '$' && !escaped {
- true
- } else {
- escaped = c == '\\' && !escaped;
- false
- }
- });
-
- let display = formula.len() >= 2
- && formula.starts_with(char::is_whitespace)
- && formula.ends_with(char::is_whitespace);
-
- if self.s.eat_if('$') {
- NodeKind::Math(Arc::new(MathNode { formula: formula.into(), display }))
- } else {
- self.terminated = false;
- NodeKind::Error(SpanPos::End, "expected dollar sign".into())
- }
- }
-
fn numbering(&mut self, start: usize) -> NodeKind {
self.s.eat_while(char::is_ascii_digit);
let read = self.s.from(start);
@@ -453,8 +431,51 @@ impl<'s> Tokens<'s> {
}
}
+ fn math(&mut self, start: usize, c: char) -> NodeKind {
+ match c {
+ // Escape sequences.
+ '\\' => self.backslash(),
+
+ // Single-char things.
+ '_' => NodeKind::Underscore,
+ '^' => NodeKind::Hat,
+ '/' => NodeKind::Slash,
+ '&' => NodeKind::Amp,
+ '$' => NodeKind::Dollar,
+
+ // Brackets.
+ '{' => NodeKind::LeftBrace,
+ '}' => NodeKind::RightBrace,
+ '[' => NodeKind::LeftBracket,
+ ']' => NodeKind::RightBracket,
+ '(' => NodeKind::LeftParen,
+ ')' => NodeKind::RightParen,
+
+ // Identifiers.
+ c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
+ self.s.eat_while(is_math_id_continue);
+ NodeKind::Ident(self.s.from(start).into())
+ }
+
+ // Numbers.
+ c if c.is_numeric() => {
+ self.s.eat_while(char::is_numeric);
+ NodeKind::Atom(self.s.from(start).into())
+ }
+
+ // Other math atoms.
+ c => NodeKind::Atom(c.into()),
+ }
+ }
+
fn code(&mut self, start: usize, c: char) -> NodeKind {
match c {
+ // Blocks.
+ '{' => NodeKind::LeftBrace,
+ '}' => NodeKind::RightBrace,
+ '[' => NodeKind::LeftBracket,
+ ']' => NodeKind::RightBracket,
+
// Parentheses.
'(' => NodeKind::LeftParen,
')' => NodeKind::RightParen,
@@ -673,6 +694,18 @@ fn is_id_continue(c: char) -> bool {
c.is_xid_continue() || c == '_' || c == '-'
}
+/// Whether a character can start an identifier in math.
+#[inline]
+fn is_math_id_start(c: char) -> bool {
+ c.is_xid_start()
+}
+
+/// Whether a character can continue an identifier in math.
+#[inline]
+fn is_math_id_continue(c: char) -> bool {
+ c.is_xid_continue() && c != '_'
+}
+
#[cfg(test)]
#[allow(non_snake_case)]
mod tests {
@@ -696,10 +729,6 @@ mod tests {
}))
}
- fn Math(formula: &str, display: bool) -> NodeKind {
- NodeKind::Math(Arc::new(MathNode { formula: formula.into(), display }))
- }
-
fn Str(string: &str) -> NodeKind {
NodeKind::Str(string.into())
}
@@ -770,7 +799,6 @@ mod tests {
('/', None, "//", LineComment),
('/', None, "/**/", BlockComment),
('/', Some(Markup), "*", Star),
- ('/', Some(Markup), "$ $", Math(" ", false)),
('/', Some(Markup), r"\\", Escape('\\')),
('/', Some(Markup), "#let", Let),
('/', Some(Code), "(", LeftParen),
@@ -853,7 +881,7 @@ mod tests {
// Test text ends.
t!(Markup[""]: "hello " => Text("hello"), Space(0));
- t!(Markup[""]: "hello~" => Text("hello"), NonBreakingSpace);
+ t!(Markup[""]: "hello~" => Text("hello"), Tilde);
}
#[test]
@@ -899,9 +927,9 @@ mod tests {
t!(Markup[""]: "===" => Eq, Eq, Eq);
t!(Markup["a1/"]: "= " => Eq, Space(0));
t!(Markup[" "]: r"\" => Linebreak);
- t!(Markup: "~" => NonBreakingSpace);
- t!(Markup["a1/"]: "-?" => Shy);
- t!(Markup["a "]: r"a--" => Text("a"), EnDash);
+ t!(Markup: "~" => Tilde);
+ t!(Markup["a1/"]: "-?" => HyphQuest);
+ t!(Markup["a "]: r"a--" => Text("a"), Hyph2);
t!(Markup["a1/"]: "- " => Minus, Space(0));
t!(Markup[" "]: "+" => Plus);
t!(Markup[" "]: "1." => EnumNumbering(1));
@@ -999,24 +1027,6 @@ mod tests {
}
#[test]
- fn test_tokenize_math_formulas() {
- // Test basic formula.
- t!(Markup: "$$" => Math("", false));
- t!(Markup: "$x$" => Math("x", false));
- t!(Markup: r"$\\$" => Math(r"\\", false));
- t!(Markup: r"$[\\]$" => Math(r"[\\]", false));
- t!(Markup: "$ x + y $" => Math(" x + y ", true));
-
- // Test unterminated.
- t!(Markup[""]: "$x" => Error(End, "expected dollar sign"));
- t!(Markup[""]: "$[x]\n" => Error(End, "expected dollar sign"));
-
- // Test escape sequences.
- t!(Markup: r"$\$x$" => Math(r"\$x", false));
- t!(Markup: r"$\ \$ $" => Math(r"\ \$ ", false));
- }
-
- #[test]
fn test_tokenize_idents() {
// Test valid identifiers.
t!(Code[" /"]: "x" => Ident("x"));