summaryrefslogtreecommitdiff
path: root/src/parse
diff options
context:
space:
mode:
Diffstat (limited to 'src/parse')
-rw-r--r--src/parse/incremental.rs32
-rw-r--r--src/parse/mod.rs69
-rw-r--r--src/parse/resolve.rs2
-rw-r--r--src/parse/tokens.rs491
4 files changed, 304 insertions, 290 deletions
diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs
index 9272dcfc..06096a75 100644
--- a/src/parse/incremental.rs
+++ b/src/parse/incremental.rs
@@ -407,18 +407,18 @@ mod tests {
test("", 0..0, "do it", 0..5);
test("a d e", 1 .. 3, " b c d", 0 .. 9);
test("*~ *", 2..2, "*", 0..5);
- test("_1_\n2a\n3", 5..5, "4", 0..7);
- test("_1_\n2a\n3~", 8..8, "4", 5..10);
+ test("_1_\n2a\n3", 5..5, "4", 4..7);
+ test("_1_\n2a\n3~", 8..8, "4", 4..10);
test("_1_ 2 3a\n4", 7..7, "5", 0..9);
test("* {1+2} *", 5..6, "3", 2..7);
test("a #f() e", 1 .. 6, " b c d", 0 .. 9);
test("a\nb\nc\nd\ne\n", 5 .. 5, "c", 2 .. 7);
test("a\n\nb\n\nc\n\nd\n\ne\n", 7 .. 7, "c", 3 .. 10);
- test("a\nb\nc *hel a b lo* d\nd\ne", 13..13, "c ", 6..20);
+ test("a\nb\nc *hel a b lo* d\nd\ne", 13..13, "c ", 4..20);
test("~~ {a} ~~", 4 .. 5, "b", 3 .. 6);
test("{(0, 1, 2)}", 5 .. 6, "11pt", 0..14);
test("\n= A heading", 4 .. 4, "n evocative", 0 .. 23);
- test("for~your~thing", 9 .. 9, "a", 4 .. 15);
+ test("for~your~thing", 9 .. 9, "a", 0 .. 15);
test("a your thing a", 6 .. 7, "a", 0 .. 14);
test("{call(); abc}", 7 .. 7, "[]", 0 .. 15);
test("#call() abc", 7 .. 7, "[]", 0 .. 10);
@@ -429,17 +429,17 @@ mod tests {
test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 33 .. 40);
test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 0 .. 33);
test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 0 .. 33);
- test("hello~~{x}", 7 .. 10, "#f()", 5 .. 11);
- test("this~is -- in my opinion -- spectacular", 8 .. 10, "---", 5 .. 25);
- test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22);
+ test("hello~~{x}", 7 .. 10, "#f()", 0 .. 11);
+ test("this~is -- in my opinion -- spectacular", 8 .. 10, "---", 0 .. 25);
+ test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 22);
test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17);
- test(r#"a ```typst hello``` b"#, 16 .. 17, "", 2 .. 18);
- test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18);
+ test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 18);
+ test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18);
test("#for", 4 .. 4, "//", 0 .. 6);
test("#show a: f as b..", 16..16, "c", 0..18);
test("a\n#let \nb", 7 .. 7, "i", 2 .. 9);
test("a\n#for i \nb", 9 .. 9, "in", 2 .. 12);
- test("a~https://fun/html", 13..14, "n", 2..18);
+ test("a~https://fun/html", 13..14, "n", 0..18);
}
#[test]
@@ -452,7 +452,7 @@ mod tests {
test("abc\n= a heading\njoke", 3 .. 4, "\nnot ", 0 .. 19);
test("#let x = (1, 2 + ;~ Five\r\n\r", 20 .. 23, "2.", 0 .. 23);
test("hey #myfriend", 4 .. 4, "\\", 0 .. 14);
- test("hey #myfriend", 4 .. 4, "\\", 3 .. 6);
+ test("hey #myfriend", 4 .. 4, "\\", 0 .. 6);
test("= foo\nbar\n - a\n - b", 6 .. 9, "", 0 .. 11);
test("= foo\n bar\n baz", 6 .. 8, "", 0 .. 9);
test(" // hi", 1 .. 1, " ", 0 .. 7);
@@ -461,12 +461,12 @@ mod tests {
#[test]
fn test_parse_incremental_type_invariants() {
- test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22);
- test("a #let x = 1 {5}", 3 .. 6, "if", 2 .. 11);
+ test("a #for x in array {x}", 18 .. 21, "[#x]", 0 .. 22);
+ test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 11);
test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16);
test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 13);
test("{let x = 1 {5}}", 4 .. 4, " if", 0 .. 18);
- test("a // b c #f()", 3 .. 4, "", 2 .. 12);
+ test("a // b c #f()", 3 .. 4, "", 0 .. 12);
test("{\nf()\n//g(a)\n}", 6 .. 8, "", 0 .. 12);
test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13);
test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26);
@@ -477,8 +477,8 @@ mod tests {
#[test]
fn test_parse_incremental_wrongly_or_unclosed_things() {
test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6);
- test(r"this \u{abcd}", 8 .. 9, "", 5 .. 12);
- test(r"this \u{abcd} that", 12 .. 13, "", 5 .. 17);
+ test(r"this \u{abcd}", 8 .. 9, "", 0 .. 12);
+ test(r"this \u{abcd} that", 12 .. 13, "", 0 .. 17);
test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", 0 .. 24);
test("a b c", 1 .. 1, " /* letters */", 0 .. 19);
test("a b c", 1 .. 1, " /* letters", 0 .. 16);
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index a8dee7f3..ed8bc5ce 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -162,11 +162,6 @@ fn markup(p: &mut Parser, mut at_start: bool) {
});
}
-/// Parse a single line of markup.
-fn markup_line(p: &mut Parser) {
- markup_indented(p, usize::MAX);
-}
-
/// Parse markup that stays right of the given `column`.
fn markup_indented(p: &mut Parser, min_indent: usize) {
p.eat_while(|t| match t {
@@ -185,7 +180,6 @@ fn markup_indented(p: &mut Parser, min_indent: usize) {
{
break;
}
- Some(NodeKind::Label(_)) => break,
_ => {}
}
@@ -195,6 +189,33 @@ fn markup_indented(p: &mut Parser, min_indent: usize) {
marker.end(p, NodeKind::Markup { min_indent });
}
+/// Parse a line of markup that can prematurely end if `f` returns true.
+fn markup_line<F>(p: &mut Parser, mut f: F)
+where
+ F: FnMut(&NodeKind) -> bool,
+{
+ p.eat_while(|t| match t {
+ NodeKind::Space { newlines } => *newlines == 0,
+ NodeKind::LineComment | NodeKind::BlockComment => true,
+ _ => false,
+ });
+
+ p.perform(NodeKind::Markup { min_indent: usize::MAX }, |p| {
+ let mut at_start = false;
+ while let Some(kind) = p.peek() {
+ if let NodeKind::Space { newlines: (1 ..) } = kind {
+ break;
+ }
+
+ if f(kind) {
+ break;
+ }
+
+ markup_node(p, &mut at_start);
+ }
+ });
+}
+
/// Parse a markup node.
fn markup_node(p: &mut Parser, at_start: &mut bool) {
let token = match p.peek() {
@@ -226,6 +247,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
| NodeKind::Ellipsis
| NodeKind::Quote { .. }
| NodeKind::Escape(_)
+ | NodeKind::Link(_)
| NodeKind::Raw(_)
| NodeKind::Math(_)
| NodeKind::Label(_)
@@ -233,12 +255,22 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
p.eat();
}
- // Grouping markup.
+ // Strong, emph, heading.
NodeKind::Star => strong(p),
NodeKind::Underscore => emph(p),
NodeKind::Eq => heading(p, *at_start),
+
+ // Lists.
NodeKind::Minus => list_node(p, *at_start),
- NodeKind::EnumNumbering(_) => enum_node(p, *at_start),
+ NodeKind::Plus | NodeKind::EnumNumbering(_) => enum_node(p, *at_start),
+ NodeKind::Slash => {
+ desc_node(p, *at_start).ok();
+ }
+ NodeKind::Colon => {
+ let marker = p.marker();
+ p.eat();
+ marker.convert(p, NodeKind::Text(':'.into()));
+ }
// Hashtag + keyword / identifier.
NodeKind::Ident(_)
@@ -293,7 +325,7 @@ fn heading(p: &mut Parser, at_start: bool) {
if at_start && p.peek().map_or(true, |kind| kind.is_space()) {
p.eat_while(|kind| *kind == NodeKind::Space { newlines: 0 });
- markup_line(p);
+ markup_line(p, |kind| matches!(kind, NodeKind::Label(_)));
marker.end(p, NodeKind::Heading);
} else {
let text = p.get(current_start .. p.prev_end()).into();
@@ -331,6 +363,25 @@ fn enum_node(p: &mut Parser, at_start: bool) {
}
}
+/// Parse a single description list item.
+fn desc_node(p: &mut Parser, at_start: bool) -> ParseResult {
+ let marker = p.marker();
+ let text: EcoString = p.peek_src().into();
+ p.eat();
+
+ let min_indent = p.column(p.prev_end());
+ if at_start && p.eat_if(NodeKind::Space { newlines: 0 }) && !p.eof() {
+ markup_line(p, |node| matches!(node, NodeKind::Colon));
+ p.expect(NodeKind::Colon)?;
+ markup_indented(p, min_indent);
+ marker.end(p, NodeKind::Desc);
+ } else {
+ marker.convert(p, NodeKind::Text(text));
+ }
+
+ Ok(())
+}
+
/// Parse an expression within a markup mode.
fn markup_expr(p: &mut Parser) {
// Does the expression need termination or can content follow directly?
diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs
index bce2da3c..d68282c0 100644
--- a/src/parse/resolve.rs
+++ b/src/parse/resolve.rs
@@ -26,14 +26,12 @@ pub fn resolve_string(string: &str) -> EcoString {
// TODO: Error if closing brace is missing.
let sequence = s.eat_while(char::is_ascii_hexdigit);
let _terminated = s.eat_if('}');
-
match resolve_hex(sequence) {
Some(c) => out.push(c),
None => out.push_str(s.from(start)),
}
}
- // TODO: Error for invalid escape sequence.
_ => out.push_str(s.from(start)),
}
}
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 84a63ba1..f6d4b0e8 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -103,6 +103,11 @@ impl<'s> Iterator for Tokens<'s> {
let start = self.s.cursor();
let c = self.s.eat()?;
Some(match c {
+ // Comments.
+ '/' if self.s.eat_if('/') => self.line_comment(),
+ '/' if self.s.eat_if('*') => self.block_comment(),
+ '*' if self.s.eat_if('/') => NodeKind::Unknown("*/".into()),
+
// Blocks.
'{' => NodeKind::LeftBrace,
'}' => NodeKind::RightBrace,
@@ -110,15 +115,7 @@ impl<'s> Iterator for Tokens<'s> {
']' => NodeKind::RightBracket,
// Whitespace.
- ' ' if self.s.done() || !self.s.at(char::is_whitespace) => {
- NodeKind::Space { newlines: 0 }
- }
- c if c.is_whitespace() => self.whitespace(),
-
- // Comments with special case for URLs.
- '/' if self.s.eat_if('*') => self.block_comment(),
- '/' if !self.maybe_in_url() && self.s.eat_if('/') => self.line_comment(),
- '*' if self.s.eat_if('/') => NodeKind::Unknown(self.s.from(start).into()),
+ c if c.is_whitespace() => self.whitespace(c),
// Other things.
_ => match self.mode {
@@ -130,85 +127,110 @@ impl<'s> Iterator for Tokens<'s> {
}
impl<'s> Tokens<'s> {
+ fn line_comment(&mut self) -> NodeKind {
+ self.s.eat_until(is_newline);
+ if self.s.peek().is_none() {
+ self.terminated = false;
+ }
+ NodeKind::LineComment
+ }
+
+ fn block_comment(&mut self) -> NodeKind {
+ let mut state = '_';
+ let mut depth = 1;
+ self.terminated = false;
+
+ // Find the first `*/` that does not correspond to a nested `/*`.
+ while let Some(c) = self.s.eat() {
+ state = match (state, c) {
+ ('*', '/') => {
+ depth -= 1;
+ if depth == 0 {
+ self.terminated = true;
+ break;
+ }
+ '_'
+ }
+ ('/', '*') => {
+ depth += 1;
+ '_'
+ }
+ ('/', '/') => {
+ self.line_comment();
+ '_'
+ }
+ _ => c,
+ }
+ }
+
+ NodeKind::BlockComment
+ }
+
+ fn whitespace(&mut self, c: char) -> NodeKind {
+ if c == ' ' && !self.s.at(char::is_whitespace) {
+ return NodeKind::Space { newlines: 0 };
+ }
+
+ self.s.uneat();
+
+ // Count the number of newlines.
+ let mut newlines = 0;
+ while let Some(c) = self.s.eat() {
+ if !c.is_whitespace() {
+ self.s.uneat();
+ break;
+ }
+
+ if is_newline(c) {
+ if c == '\r' {
+ self.s.eat_if('\n');
+ }
+ newlines += 1;
+ }
+ }
+
+ NodeKind::Space { newlines }
+ }
+
#[inline]
fn markup(&mut self, start: usize, c: char) -> NodeKind {
match c {
// Escape sequences.
'\\' => self.backslash(),
- // Keywords and identifiers.
- '#' => self.hash(),
-
- // Markup.
+ // Single-char things.
'~' => NodeKind::NonBreakingSpace,
- '-' => self.hyph(),
'.' if self.s.eat_if("..") => NodeKind::Ellipsis,
'\'' => NodeKind::Quote { double: false },
'"' => NodeKind::Quote { double: true },
'*' if !self.in_word() => NodeKind::Star,
'_' if !self.in_word() => NodeKind::Underscore,
- '`' => self.raw(),
'=' => NodeKind::Eq,
- '$' => self.math(),
- '<' => self.label(),
- '@' => self.reference(),
- c if c == '.' || c.is_ascii_digit() => self.numbering(start, c),
-
- // Plain text.
- _ => self.text(start),
- }
- }
-
- fn code(&mut self, start: usize, c: char) -> NodeKind {
- match c {
- // Parens.
- '(' => NodeKind::LeftParen,
- ')' => NodeKind::RightParen,
-
- // Length two.
- '=' if self.s.eat_if('=') => NodeKind::EqEq,
- '!' if self.s.eat_if('=') => NodeKind::ExclEq,
- '<' if self.s.eat_if('=') => NodeKind::LtEq,
- '>' if self.s.eat_if('=') => NodeKind::GtEq,
- '+' if self.s.eat_if('=') => NodeKind::PlusEq,
- '-' if self.s.eat_if('=') => NodeKind::HyphEq,
- '*' if self.s.eat_if('=') => NodeKind::StarEq,
- '/' if self.s.eat_if('=') => NodeKind::SlashEq,
- '.' if self.s.eat_if('.') => NodeKind::Dots,
- '=' if self.s.eat_if('>') => NodeKind::Arrow,
-
- // Length one.
- ',' => NodeKind::Comma,
- ';' => NodeKind::Semicolon,
- ':' => NodeKind::Colon,
'+' => NodeKind::Plus,
- '-' => NodeKind::Minus,
- '*' => NodeKind::Star,
'/' => NodeKind::Slash,
- '=' => NodeKind::Eq,
- '<' => NodeKind::Lt,
- '>' => NodeKind::Gt,
- '.' if self.s.done() || !self.s.at(char::is_ascii_digit) => NodeKind::Dot,
-
- // Identifiers.
- c if is_id_start(c) => self.ident(start),
+ ':' => NodeKind::Colon,
- // Numbers.
- c if c.is_ascii_digit() || (c == '.' && self.s.at(char::is_ascii_digit)) => {
- self.number(start, c)
+ // Multi-char things.
+ '#' => self.hash(start),
+ '-' => self.hyph(),
+ 'h' if self.s.eat_if("ttp://") || self.s.eat_if("ttps://") => {
+ self.link(start)
}
+ '`' => self.raw(),
+ '$' => self.math(),
+ c if c.is_ascii_digit() => self.numbering(start),
+ '<' => self.label(),
+ '@' => self.reference(start),
- // Strings.
- '"' => self.string(),
-
- _ => NodeKind::Unknown(self.s.from(start).into()),
+ // Plain text.
+ _ => self.text(start),
}
}
#[inline]
fn text(&mut self, start: usize) -> NodeKind {
macro_rules! table {
- ($($c:literal)|*) => {{
+ ($(|$c:literal)*) => {{
let mut t = [false; 128];
$(t[$c as usize] = true;)*
t
@@ -216,12 +238,9 @@ impl<'s> Tokens<'s> {
}
const TABLE: [bool; 128] = table! {
- // Ascii whitespace.
- ' ' | '\t' | '\n' | '\x0b' | '\x0c' | '\r' |
- // Comments, parentheses, code.
- '/' | '[' | ']' | '{' | '}' | '#' |
- // Markup
- '~' | '-' | '.' | '\'' | '"' | '*' | '_' | '`' | '$' | '\\'
+ | ' ' | '\t' | '\n' | '\x0b' | '\x0c' | '\r' | '\\' | '/'
+ | '[' | ']' | '{' | '}' | '~' | '-' | '.' | '\'' | '"'
+ | '*' | '_' | ':' | 'h' | '`' | '$' | '<' | '>' | '@' | '#'
};
loop {
@@ -229,14 +248,17 @@ impl<'s> Tokens<'s> {
TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace())
});
- // Allow a single space, optionally preceded by . or - if something
- // alphanumeric follows directly. This leads to less text nodes,
- // which is good for performance.
+ // Continue with the same text node if the thing would become text
+ // anyway.
let mut s = self.s;
- s.eat_if(['.', '-']);
- s.eat_if(' ');
- if !s.at(char::is_alphanumeric) {
- break;
+ match s.eat() {
+ Some('/') if !s.at(['/', '*']) => {}
+ Some(' ') if s.at(char::is_alphanumeric) => {}
+ Some('-') if !s.at(['-', '?']) => {}
+ Some('.') if !s.at("..") => {}
+ Some('h') if !s.at("ttp://") && !s.at("ttps://") => {}
+ Some('@' | '#') if !s.at(is_id_start) => {}
+ _ => break,
}
self.s = s;
@@ -245,47 +267,9 @@ impl<'s> Tokens<'s> {
NodeKind::Text(self.s.from(start).into())
}
- fn whitespace(&mut self) -> NodeKind {
- self.s.uneat();
-
- // Count the number of newlines.
- let mut newlines = 0;
- while let Some(c) = self.s.eat() {
- if !c.is_whitespace() {
- self.s.uneat();
- break;
- }
-
- if is_newline(c) {
- if c == '\r' {
- self.s.eat_if('\n');
- }
- newlines += 1;
- }
- }
-
- NodeKind::Space { newlines }
- }
-
fn backslash(&mut self) -> NodeKind {
- let c = match self.s.peek() {
- Some(c) => c,
- None => return NodeKind::Linebreak { justified: false },
- };
-
- match c {
- // Backslash and comments.
- '\\' | '/' |
- // Parenthesis and hashtag.
- '[' | ']' | '{' | '}' | '#' |
- // Markup.
- '~' | '-' | '.' | ':' |
- '\'' | '"' | '*' | '_' | '`' | '$' | '=' |
- '<' | '>' | '@' => {
- self.s.expect(c);
- NodeKind::Escape(c)
- }
- 'u' if self.s.eat_if("u{") => {
+ match self.s.peek() {
+ Some('u') if self.s.eat_if("u{") => {
let sequence = self.s.eat_while(char::is_ascii_alphanumeric);
if self.s.eat_if('}') {
if let Some(c) = resolve_hex(sequence) {
@@ -298,26 +282,23 @@ impl<'s> Tokens<'s> {
}
} else {
self.terminated = false;
- NodeKind::Error(
- SpanPos::End,
- "expected closing brace".into(),
- )
+ NodeKind::Error(SpanPos::End, "expected closing brace".into())
}
}
// Linebreaks.
- c if c.is_whitespace() => NodeKind::Linebreak { justified: false },
- '+' => {
+ Some(c) if c.is_whitespace() => NodeKind::Linebreak,
+ None => NodeKind::Linebreak,
+
+ // Escapes.
+ Some(c) => {
self.s.expect(c);
- NodeKind::Linebreak { justified: true }
+ NodeKind::Escape(c)
}
-
- // Just the backslash.
- _ => NodeKind::Text('\\'.into()),
}
}
- fn hash(&mut self) -> NodeKind {
+ fn hash(&mut self, start: usize) -> NodeKind {
if self.s.at(is_id_start) {
let read = self.s.eat_while(is_id_continue);
match keyword(read) {
@@ -325,7 +306,7 @@ impl<'s> Tokens<'s> {
None => NodeKind::Ident(read.into()),
}
} else {
- NodeKind::Text('#'.into())
+ self.text(start)
}
}
@@ -343,19 +324,26 @@ impl<'s> Tokens<'s> {
}
}
- fn numbering(&mut self, start: usize, c: char) -> NodeKind {
- let number = if c != '.' {
- self.s.eat_while(char::is_ascii_digit);
- let read = self.s.from(start);
- if !self.s.eat_if('.') {
- return NodeKind::Text(self.s.from(start).into());
- }
- read.parse().ok()
- } else {
- None
- };
+ fn in_word(&self) -> bool {
+ let alphanumeric = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
+ let prev = self.s.scout(-2);
+ let next = self.s.peek();
+ alphanumeric(prev) && alphanumeric(next)
+ }
- NodeKind::EnumNumbering(number)
+ fn link(&mut self, start: usize) -> NodeKind {
+ #[rustfmt::skip]
+ self.s.eat_while(|c: char| matches!(c,
+ | '0' ..= '9'
+ | 'a' ..= 'z'
+ | 'A' ..= 'Z'
+ | '~' | '/' | '%' | '?' | '#' | '&' | '+' | '='
+ | '\'' | '.' | ',' | ';'
+ ));
+ if self.s.scout(-1) == Some('.') {
+ self.s.uneat();
+ }
+ NodeKind::Link(self.s.from(start).into())
}
fn raw(&mut self) -> NodeKind {
@@ -376,7 +364,6 @@ impl<'s> Tokens<'s> {
}
let start = self.s.cursor();
-
let mut found = 0;
while found < backticks {
match self.s.eat() {
@@ -394,10 +381,9 @@ impl<'s> Tokens<'s> {
self.s.get(start .. end),
)))
} else {
+ self.terminated = false;
let remaining = backticks - found;
let noun = if remaining == 1 { "backtick" } else { "backticks" };
-
- self.terminated = false;
NodeKind::Error(
SpanPos::End,
if found == 0 {
@@ -410,51 +396,38 @@ impl<'s> Tokens<'s> {
}
fn math(&mut self) -> NodeKind {
- let mut display = false;
- if self.s.eat_if('[') {
- display = true;
- }
-
- let start = self.s.cursor();
-
let mut escaped = false;
- let mut dollar = !display;
-
- let terminated = loop {
- match self.s.eat() {
- Some('$') if !escaped && dollar => break true,
- Some(']') if !escaped => dollar = true,
- Some(c) => {
- dollar = !display;
- escaped = c == '\\' && !escaped;
- }
- None => break false,
+ let formula = self.s.eat_until(|c| {
+ if c == '$' && !escaped {
+ true
+ } else {
+ escaped = c == '\\' && !escaped;
+ false
}
- };
+ });
+
+ let display = formula.len() >= 2
+ && formula.starts_with(char::is_whitespace)
+ && formula.ends_with(char::is_whitespace);
- let end = self.s.cursor()
- - match (terminated, display) {
- (false, _) => 0,
- (true, false) => 1,
- (true, true) => 2,
- };
-
- if terminated {
- NodeKind::Math(Arc::new(MathNode {
- formula: self.s.get(start .. end).into(),
- display,
- }))
+ if self.s.eat_if('$') {
+ NodeKind::Math(Arc::new(MathNode { formula: formula.into(), display }))
} else {
self.terminated = false;
- NodeKind::Error(
- SpanPos::End,
- if !display || (!escaped && dollar) {
- "expected closing dollar sign".into()
- } else {
- "expected closing bracket and dollar sign".into()
- },
- )
+ NodeKind::Error(SpanPos::End, "expected dollar sign".into())
+ }
+ }
+
+ fn numbering(&mut self, start: usize) -> NodeKind {
+ self.s.eat_while(char::is_ascii_digit);
+ let read = self.s.from(start);
+ if self.s.eat_if('.') {
+ if let Ok(number) = read.parse() {
+ return NodeKind::EnumNumbering(number);
+ }
}
+
+ self.text(start)
}
fn label(&mut self) -> NodeKind {
@@ -471,12 +444,59 @@ impl<'s> Tokens<'s> {
}
}
- fn reference(&mut self) -> NodeKind {
+ fn reference(&mut self, start: usize) -> NodeKind {
let label = self.s.eat_while(is_id_continue);
if !label.is_empty() {
NodeKind::Ref(label.into())
} else {
- NodeKind::Error(SpanPos::Full, "label cannot be empty".into())
+ self.text(start)
+ }
+ }
+
+ fn code(&mut self, start: usize, c: char) -> NodeKind {
+ match c {
+ // Parentheses.
+ '(' => NodeKind::LeftParen,
+ ')' => NodeKind::RightParen,
+
+ // Two-char operators.
+ '=' if self.s.eat_if('=') => NodeKind::EqEq,
+ '!' if self.s.eat_if('=') => NodeKind::ExclEq,
+ '<' if self.s.eat_if('=') => NodeKind::LtEq,
+ '>' if self.s.eat_if('=') => NodeKind::GtEq,
+ '+' if self.s.eat_if('=') => NodeKind::PlusEq,
+ '-' if self.s.eat_if('=') => NodeKind::HyphEq,
+ '*' if self.s.eat_if('=') => NodeKind::StarEq,
+ '/' if self.s.eat_if('=') => NodeKind::SlashEq,
+ '.' if self.s.eat_if('.') => NodeKind::Dots,
+ '=' if self.s.eat_if('>') => NodeKind::Arrow,
+
+ // Single-char operators.
+ ',' => NodeKind::Comma,
+ ';' => NodeKind::Semicolon,
+ ':' => NodeKind::Colon,
+ '+' => NodeKind::Plus,
+ '-' => NodeKind::Minus,
+ '*' => NodeKind::Star,
+ '/' => NodeKind::Slash,
+ '=' => NodeKind::Eq,
+ '<' => NodeKind::Lt,
+ '>' => NodeKind::Gt,
+ '.' if !self.s.at(char::is_ascii_digit) => NodeKind::Dot,
+
+ // Identifiers.
+ c if is_id_start(c) => self.ident(start),
+
+ // Numbers.
+ c if c.is_ascii_digit() || (c == '.' && self.s.at(char::is_ascii_digit)) => {
+ self.number(start, c)
+ }
+
+ // Strings.
+ '"' => self.string(),
+
+ // Invalid token.
+ _ => NodeKind::Unknown(self.s.from(start).into()),
}
}
@@ -543,18 +563,18 @@ impl<'s> Tokens<'s> {
}
}
-
fn string(&mut self) -> NodeKind {
let mut escaped = false;
- let string = resolve_string(self.s.eat_until(|c| {
+ let verbatim = self.s.eat_until(|c| {
if c == '"' && !escaped {
true
} else {
escaped = c == '\\' && !escaped;
false
}
- }));
+ });
+ let string = resolve_string(verbatim);
if self.s.eat_if('"') {
NodeKind::Str(string)
} else {
@@ -562,56 +582,6 @@ impl<'s> Tokens<'s> {
NodeKind::Error(SpanPos::End, "expected quote".into())
}
}
-
- fn line_comment(&mut self) -> NodeKind {
- self.s.eat_until(is_newline);
- if self.s.peek().is_none() {
- self.terminated = false;
- }
- NodeKind::LineComment
- }
-
- fn block_comment(&mut self) -> NodeKind {
- let mut state = '_';
- let mut depth = 1;
- self.terminated = false;
-
- // Find the first `*/` that does not correspond to a nested `/*`.
- while let Some(c) = self.s.eat() {
- state = match (state, c) {
- ('*', '/') => {
- depth -= 1;
- if depth == 0 {
- self.terminated = true;
- break;
- }
- '_'
- }
- ('/', '*') => {
- depth += 1;
- '_'
- }
- ('/', '/') => {
- self.line_comment();
- '_'
- }
- _ => c,
- }
- }
-
- NodeKind::BlockComment
- }
-
- fn in_word(&self) -> bool {
- let alphanumeric = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
- let prev = self.s.scout(-2);
- let next = self.s.peek();
- alphanumeric(prev) && alphanumeric(next)
- }
-
- fn maybe_in_url(&self) -> bool {
- self.mode == TokenMode::Markup && self.s.before().ends_with(":/")
- }
}
fn keyword(ident: &str) -> Option<NodeKind> {
@@ -872,14 +842,14 @@ mod tests {
#[test]
fn test_tokenize_text() {
// Test basic text.
- t!(Markup[" /"]: "hello" => Text("hello"));
- t!(Markup[" /"]: "hello-world" => Text("hello-world"));
+ t!(Markup[" /"]: "hello" => Text("hello"));
+ t!(Markup[" /"]: "reha-world" => Text("reha-world"));
// Test code symbols in text.
- t!(Markup[" /"]: "a():\"b" => Text("a():"), Quote { double: true }, Text("b"));
- t!(Markup[" /"]: ";:,|/+" => Text(";:,|"), Text("/+"));
+ t!(Markup[" /"]: "a():\"b" => Text("a()"), Colon, Quote { double: true }, Text("b"));
+ t!(Markup[" /"]: ";,|/+" => Text(";,|/+"));
t!(Markup[" /"]: "=-a" => Eq, Minus, Text("a"));
- t!(Markup[" "]: "#123" => Text("#"), Text("123"));
+ t!(Markup[" "]: "#123" => Text("#123"));
// Test text ends.
t!(Markup[""]: "hello " => Text("hello"), Space(0));
@@ -904,11 +874,9 @@ mod tests {
t!(Markup: r"\`" => Escape('`'));
t!(Markup: r"\$" => Escape('$'));
t!(Markup: r"\#" => Escape('#'));
-
- // Test unescapable symbols.
- t!(Markup[" /"]: r"\a" => Text(r"\"), Text("a"));
- t!(Markup[" /"]: r"\u" => Text(r"\"), Text("u"));
- t!(Markup[" /"]: r"\1" => Text(r"\"), Text("1"));
+ t!(Markup: r"\a" => Escape('a'));
+ t!(Markup: r"\u" => Escape('u'));
+ t!(Markup: r"\1" => Escape('1'));
// Test basic unicode escapes.
t!(Markup: r"\u{}" => Error(Full, "invalid unicode escape sequence"));
@@ -930,16 +898,15 @@ mod tests {
t!(Markup: "_" => Underscore);
t!(Markup[""]: "===" => Eq, Eq, Eq);
t!(Markup["a1/"]: "= " => Eq, Space(0));
- t!(Markup[" "]: r"\" => Linebreak { justified: false });
- t!(Markup[" "]: r"\+" => Linebreak { justified: true });
+ t!(Markup[" "]: r"\" => Linebreak);
t!(Markup: "~" => NonBreakingSpace);
t!(Markup["a1/"]: "-?" => Shy);
t!(Markup["a "]: r"a--" => Text("a"), EnDash);
t!(Markup["a1/"]: "- " => Minus, Space(0));
- t!(Markup[" "]: "." => EnumNumbering(None));
- t!(Markup[" "]: "1." => EnumNumbering(Some(1)));
- t!(Markup[" "]: "1.a" => EnumNumbering(Some(1)), Text("a"));
- t!(Markup[" /"]: "a1." => Text("a1"), EnumNumbering(None));
+ t!(Markup[" "]: "+" => Plus);
+ t!(Markup[" "]: "1." => EnumNumbering(1));
+ t!(Markup[" "]: "1.a" => EnumNumbering(1), Text("a"));
+ t!(Markup[" /"]: "a1." => Text("a1."));
}
#[test]
@@ -995,7 +962,7 @@ mod tests {
for (s, t) in list.clone() {
t!(Markup[" "]: format!("#{}", s) => t);
t!(Markup[" "]: format!("#{0}#{0}", s) => t, t);
- t!(Markup[" /"]: format!("# {}", s) => Text("#"), Space(0), Text(s));
+ t!(Markup[" /"]: format!("# {}", s) => Text(&format!("# {s}")));
}
for (s, t) in list {
@@ -1037,18 +1004,16 @@ mod tests {
t!(Markup: "$$" => Math("", false));
t!(Markup: "$x$" => Math("x", false));
t!(Markup: r"$\\$" => Math(r"\\", false));
- t!(Markup: "$[x + y]$" => Math("x + y", true));
- t!(Markup: r"$[\\]$" => Math(r"\\", true));
+ t!(Markup: r"$[\\]$" => Math(r"[\\]", false));
+ t!(Markup: "$ x + y $" => Math(" x + y ", true));
// Test unterminated.
- t!(Markup[""]: "$x" => Error(End, "expected closing dollar sign"));
- t!(Markup[""]: "$[x" => Error(End, "expected closing bracket and dollar sign"));
- t!(Markup[""]: "$[x]\n$" => Error(End, "expected closing bracket and dollar sign"));
+ t!(Markup[""]: "$x" => Error(End, "expected dollar sign"));
+ t!(Markup[""]: "$[x]\n" => Error(End, "expected dollar sign"));
// Test escape sequences.
- t!(Markup: r"$\$x$" => Math(r"\$x", false));
- t!(Markup: r"$[\\\]$]$" => Math(r"\\\]$", true));
- t!(Markup[""]: r"$[ ]\\$" => Error(End, "expected closing bracket and dollar sign"));
+ t!(Markup: r"$\$x$" => Math(r"\$x", false));
+ t!(Markup: r"$\ \$ $" => Math(r"\ \$ ", false));
}
#[test]