summaryrefslogtreecommitdiff
path: root/src/syntax/tokens.rs
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2020-02-13 21:58:49 +0100
committerLaurenz <laurmaedje@gmail.com>2020-02-13 21:58:49 +0100
commit1658b00282b631fb5218f477ea7f45f925644cea (patch)
tree13a0f51f335e687c363a5afb9cc73993a69489cc /src/syntax/tokens.rs
parent60099aed50b89daef29543c4700470e566c48798 (diff)
New syntax features 👔
- Forced line breaks with backslash followed by whitespace - (Multline) raw text in backticks - Set font class fallbacks with [font.family] (e.g. [font.family: monospace=("CMU Typewriter Text")]) - More sophisticated procedure to find end of function, which accounts for comments, strings, raw text and nested functions (this is a mix of a feature and a bug fix)
Diffstat (limited to 'src/syntax/tokens.rs')
-rw-r--r--src/syntax/tokens.rs149
1 files changed, 107 insertions, 42 deletions
diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs
index d78938e3..cc65d993 100644
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@@ -83,8 +83,17 @@ pub enum Token<'s> {
Star,
/// An underscore in body-text.
Underscore,
- /// A backtick in body-text.
- Backtick,
+
+ /// A backslash followed by whitespace in text.
+ Backslash,
+
+ /// Raw text.
+ Raw {
+ /// The raw text (not yet unescaped as for strings).
+ raw: &'s str,
+ /// Whether the closing backtick was present.
+ terminated: bool,
+ },
/// Any other consecutive string.
Text(&'s str),
@@ -115,8 +124,9 @@ impl<'s> Token<'s> {
ExprBool(_) => "bool",
Star => "star",
Underscore => "underscore",
- Backtick => "backtick",
- Text(_) => "invalid identifier",
+ Backslash => "backslash",
+ Raw { .. } => "raw text",
+ Text(_) => "text",
Invalid("]") => "closing bracket",
Invalid("*/") => "end of block comment",
Invalid(_) => "invalid token",
@@ -206,7 +216,7 @@ impl<'s> Iterator for Tokens<'s> {
// Style toggles.
'*' if self.mode == Body => Star,
'_' if self.mode == Body => Underscore,
- '`' if self.mode == Body => Backtick,
+ '`' if self.mode == Body => self.parse_raw(),
// An escaped thing.
'\\' if self.mode == Body => self.parse_escaped(),
@@ -281,7 +291,7 @@ impl<'s> Tokens<'s> {
}
fn parse_function(&mut self, start: Position) -> Token<'s> {
- let (header, terminated) = self.read_function_part();
+ let (header, terminated) = self.read_function_part(Header);
self.eat();
if self.peek() != Some('[') {
@@ -291,7 +301,7 @@ impl<'s> Tokens<'s> {
self.eat();
let body_start = self.pos() - start;
- let (body, terminated) = self.read_function_part();
+ let (body, terminated) = self.read_function_part(Body);
let body_end = self.pos() - start;
let span = Span::new(body_start, body_end);
@@ -300,60 +310,73 @@ impl<'s> Tokens<'s> {
Function { header, body: Some(Spanned { v: body, span }), terminated }
}
- fn read_function_part(&mut self) -> (&'s str, bool) {
- let mut escaped = false;
- let mut in_string = false;
- let mut depth = 0;
+ fn read_function_part(&mut self, mode: TokenizationMode) -> (&'s str, bool) {
+ let start = self.index();
+ let mut terminated = false;
- self.read_string_until(|n| {
+ while let Some(n) = self.peek() {
+ if n == ']' {
+ terminated = true;
+ break;
+ }
+
+ self.eat();
match n {
- '"' if !escaped => in_string = !in_string,
- '[' if !escaped && !in_string => depth += 1,
- ']' if !escaped && !in_string => {
- if depth == 0 {
- return true;
- } else {
- depth -= 1;
- }
- }
- '\\' => escaped = !escaped,
- _ => escaped = false,
+ '[' => { self.parse_function(Position::ZERO); }
+ '/' if self.peek() == Some('/') => { self.parse_line_comment(); }
+ '/' if self.peek() == Some('*') => { self.parse_block_comment(); }
+ '"' if mode == Header => { self.parse_string(); }
+ '`' if mode == Body => { self.parse_raw(); }
+ '\\' => { self.eat(); }
+ _ => {}
}
+ }
- false
- }, false, 0, 0)
+ let end = self.index();
+ (&self.src[start .. end], terminated)
}
fn parse_string(&mut self) -> Token<'s> {
+ let (string, terminated) = self.read_until_unescaped('"');
+ ExprStr { string, terminated }
+ }
+
+ fn parse_raw(&mut self) -> Token<'s> {
+ let (raw, terminated) = self.read_until_unescaped('`');
+ Raw { raw, terminated }
+ }
+
+ fn read_until_unescaped(&mut self, c: char) -> (&'s str, bool) {
let mut escaped = false;
- let (string, terminated) = self.read_string_until(|n| {
+ self.read_string_until(|n| {
match n {
- '"' if !escaped => return true,
+ n if n == c && !escaped => return true,
'\\' => escaped = !escaped,
_ => escaped = false,
}
false
- }, true, 0, -1);
- ExprStr { string, terminated }
+ }, true, 0, -1)
}
fn parse_escaped(&mut self) -> Token<'s> {
fn is_escapable(c: char) -> bool {
match c {
- '[' | ']' | '\\' | '/' | '*' | '_' | '`' => true,
+ '[' | ']' | '\\' | '/' | '*' | '_' | '`' | '"' => true,
_ => false,
}
}
- Text(match self.peek() {
+ match self.peek() {
Some(c) if is_escapable(c) => {
let index = self.index();
self.eat();
- &self.src[index .. index + c.len_utf8()]
+ Text(&self.src[index .. index + c.len_utf8()])
}
- _ => "\\"
- })
+ Some(c) if c.is_whitespace() => Backslash,
+ Some(_) => Text("\\"),
+ None => Backslash,
+ }
}
fn parse_expr(&mut self, text: &'s str) -> Token<'s> {
@@ -462,6 +485,7 @@ pub fn is_identifier(string: &str) -> bool {
true
}
+
#[cfg(test)]
mod tests {
use super::super::test::check;
@@ -483,6 +507,11 @@ mod tests {
Token::ExprStr { string, terminated }
}
+ #[allow(non_snake_case)]
+ fn Raw(raw: &'static str, terminated: bool) -> Token<'static> {
+ Token::Raw { raw, terminated }
+ }
+
/// Test whether the given string tokenizes into the given list of tokens.
macro_rules! t {
($mode:expr, $source:expr => [$($tokens:tt)*]) => {
@@ -540,10 +569,15 @@ mod tests {
#[test]
fn tokenize_body_only_tokens() {
- t!(Body, "_*`" => [Underscore, Star, Backtick]);
+ t!(Body, "_*" => [Underscore, Star]);
t!(Body, "***" => [Star, Star, Star]);
t!(Body, "[func]*bold*" => [func!("func", None, true), Star, T("bold"), Star]);
t!(Body, "hi_you_ there" => [T("hi"), Underscore, T("you"), Underscore, S(0), T("there")]);
+ t!(Body, "`raw`" => [Raw("raw", true)]);
+ t!(Body, "`[func]`" => [Raw("[func]", true)]);
+ t!(Body, "`]" => [Raw("]", false)]);
+ t!(Body, "`\\``" => [Raw("\\`", true)]);
+ t!(Body, "\\ " => [Backslash, S(0)]);
t!(Header, "_*`" => [Invalid("_*`")]);
}
@@ -599,14 +633,45 @@ mod tests {
}
#[test]
+ fn tokenize_correct_end_of_function() {
+ // End of function with strings and carets in headers
+ t!(Body, r#"[f: "]"# => [func!(r#"f: "]"#, None, false)]);
+ t!(Body, "[f: \"s\"]" => [func!("f: \"s\"", None, true)]);
+ t!(Body, r#"[f: \"\"\"]"# => [func!(r#"f: \"\"\""#, None, true)]);
+ t!(Body, "[f: `]" => [func!("f: `", None, true)]);
+
+ // End of function with strings and carets in bodies
+ t!(Body, "[f][\"]" => [func!("f", Some((0:4, 0:5, "\"")), true)]);
+ t!(Body, r#"[f][\"]"# => [func!("f", Some((0:4, 0:6, r#"\""#)), true)]);
+ t!(Body, "[f][`]" => [func!("f", Some((0:4, 0:6, "`]")), false)]);
+ t!(Body, "[f][\\`]" => [func!("f", Some((0:4, 0:6, "\\`")), true)]);
+ t!(Body, "[f][`raw`]" => [func!("f", Some((0:4, 0:9, "`raw`")), true)]);
+ t!(Body, "[f][`raw]" => [func!("f", Some((0:4, 0:9, "`raw]")), false)]);
+ t!(Body, "[f][`raw]`]" => [func!("f", Some((0:4, 0:10, "`raw]`")), true)]);
+ t!(Body, "[f][`\\`]" => [func!("f", Some((0:4, 0:8, "`\\`]")), false)]);
+ t!(Body, "[f][`\\\\`]" => [func!("f", Some((0:4, 0:8, "`\\\\`")), true)]);
+
+ // End of function with comments
+ t!(Body, "[f][/*]" => [func!("f", Some((0:4, 0:7, "/*]")), false)]);
+ t!(Body, "[f][/*`*/]" => [func!("f", Some((0:4, 0:9, "/*`*/")), true)]);
+ t!(Body, "[f: //]\n]" => [func!("f: //]\n", None, true)]);
+ t!(Body, "[f: \"//]\n]" => [func!("f: \"//]\n]", None, false)]);
+
+ // End of function with escaped brackets
+ t!(Body, "[f][\\]]" => [func!("f", Some((0:4, 0:6, "\\]")), true)]);
+ t!(Body, "[f][\\[]" => [func!("f", Some((0:4, 0:6, "\\[")), true)]);
+ }
+
+ #[test]
fn tokenize_escaped_symbols() {
- t!(Body, r"\\" => [T(r"\")]);
- t!(Body, r"\[" => [T("[")]);
- t!(Body, r"\]" => [T("]")]);
- t!(Body, r"\*" => [T("*")]);
- t!(Body, r"\_" => [T("_")]);
- t!(Body, r"\`" => [T("`")]);
- t!(Body, r"\/" => [T("/")]);
+ t!(Body, r"\\" => [T(r"\")]);
+ t!(Body, r"\[" => [T("[")]);
+ t!(Body, r"\]" => [T("]")]);
+ t!(Body, r"\*" => [T("*")]);
+ t!(Body, r"\_" => [T("_")]);
+ t!(Body, r"\`" => [T("`")]);
+ t!(Body, r"\/" => [T("/")]);
+ t!(Body, r#"\""# => [T("\"")]);
}
#[test]