summaryrefslogtreecommitdiff
path: root/src/syntax/tokens.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/syntax/tokens.rs')
-rw-r--r--src/syntax/tokens.rs170
1 files changed, 32 insertions, 138 deletions
diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs
index cafc7727..2d371bf8 100644
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@@ -22,27 +22,10 @@ pub enum Token<'s> {
/// can contain nested block comments.
BlockComment(&'s str),
- /// A function invocation.
- Function {
- /// The header string:
- /// ```typst
- /// [header: args][body]
- /// ^^^^^^^^^^^^
- /// ```
- header: &'s str,
- /// The spanned body string:
- /// ```typst
- /// [header][hello *world*]
- /// ^^^^^^^^^^^^^
- /// ^-- The span is relative to right before this bracket
- /// ```
- body: Option<Spanned<&'s str>>,
- /// Whether the last closing bracket was present.
- /// - `[func]` or `[func][body]` => terminated
- /// - `[func` or `[func][body` => not terminated
- terminated: bool,
- },
-
+ /// A left bracket starting a function invocation or body: `[`.
+ LeftBracket,
+ /// A right bracket ending a function invocation or body: `]`.
+ RightBracket,
/// A left parenthesis in a function header: `(`.
LeftParen,
/// A right parenthesis in a function header: `)`.
@@ -119,7 +102,8 @@ impl<'s> Token<'s> {
Space(_) => "space",
LineComment(_) => "line comment",
BlockComment(_) => "block comment",
- Function { .. } => "function",
+ LeftBracket => "opening bracket",
+ RightBracket => "closing bracket",
LeftParen => "opening paren",
RightParen => "closing paren",
LeftBrace => "opening brace",
@@ -141,7 +125,6 @@ impl<'s> Token<'s> {
Backslash => "backslash",
Raw { .. } => "raw text",
Text(_) => "text",
- Invalid("]") => "closing bracket",
Invalid("*/") => "end of block comment",
Invalid(_) => "invalid token",
}
@@ -152,8 +135,9 @@ impl<'s> Token<'s> {
#[derive(Debug)]
pub struct Tokens<'s> {
src: &'s str,
- mode: TokenMode,
iter: Peekable<Chars<'s>>,
+ mode: TokenMode,
+ stack: Vec<TokenMode>,
pos: Pos,
index: usize,
}
@@ -172,16 +156,29 @@ impl<'s> Tokens<'s> {
///
/// The first token's span starts an the given `offset` position instead of
/// the zero position.
- pub fn new(src: &'s str, offset: Pos, mode: TokenMode) -> Self {
+ pub fn new(src: &'s str, mode: TokenMode) -> Self {
Self {
src,
- mode,
iter: src.chars().peekable(),
- pos: offset,
+ mode,
+ stack: vec![],
+ pos: Pos::ZERO,
index: 0,
}
}
+ /// Change the token mode and push the old one on a stack.
+ pub fn push_mode(&mut self, mode: TokenMode) {
+ self.stack.push(self.mode);
+ self.mode = mode;
+ }
+
+ /// Pop the old token mode from the stack. This panics if there is no mode
+ /// on the stack.
+ pub fn pop_mode(&mut self) {
+ self.mode = self.stack.pop().expect("no pushed mode");
+ }
+
/// The index in the string at which the last token ends and next token will
/// start.
pub fn index(&self) -> usize {
@@ -212,15 +209,15 @@ impl<'s> Iterator for Tokens<'s> {
// Whitespace.
c if c.is_whitespace() => self.read_whitespace(start),
- // Functions.
- '[' => self.read_function(start),
- ']' => Invalid("]"),
+ // Functions and blocks.
+ '[' => LeftBracket,
+ ']' => RightBracket,
+ '{' => LeftBrace,
+ '}' => RightBrace,
// Syntactic elements in function headers.
'(' if self.mode == Header => LeftParen,
')' if self.mode == Header => RightParen,
- '{' if self.mode == Header => LeftBrace,
- '}' if self.mode == Header => RightBrace,
':' if self.mode == Header => Colon,
',' if self.mode == Header => Comma,
'=' if self.mode == Header => Equals,
@@ -322,52 +319,6 @@ impl<'s> Tokens<'s> {
Space(end.line - start.line)
}
- fn read_function(&mut self, start: Pos) -> Token<'s> {
- let (header, terminated) = self.read_function_part(Header);
- self.eat();
-
- if self.peek() != Some('[') {
- return Function { header, body: None, terminated };
- }
-
- self.eat();
-
- let body_start = self.pos() - start;
- let (body, terminated) = self.read_function_part(Body);
- let body_end = self.pos() - start;
- let span = Span::new(body_start, body_end);
-
- self.eat();
-
- Function { header, body: Some(Spanned { v: body, span }), terminated }
- }
-
- fn read_function_part(&mut self, mode: TokenMode) -> (&'s str, bool) {
- let start = self.index();
- let mut terminated = false;
-
- while let Some(n) = self.peek() {
- if n == ']' {
- terminated = true;
- break;
- }
-
- self.eat();
- match n {
- '[' => { self.read_function(Pos::ZERO); }
- '/' if self.peek() == Some('/') => { self.read_line_comment(); }
- '/' if self.peek() == Some('*') => { self.read_block_comment(); }
- '"' if mode == Header => { self.read_string(); }
- '`' if mode == Body => { self.read_raw(); }
- '\\' => { self.eat(); }
- _ => {}
- }
- }
-
- let end = self.index();
- (&self.src[start..end], terminated)
- }
-
fn read_string(&mut self) -> Token<'s> {
let (string, terminated) = self.read_until_unescaped('"');
Str { string, terminated }
@@ -540,6 +491,7 @@ mod tests {
use Token::{
Space as S,
LineComment as LC, BlockComment as BC,
+ LeftBracket as L, RightBracket as R,
LeftParen as LP, RightParen as RP,
LeftBrace as LB, RightBrace as RB,
Ident as Id,
@@ -557,25 +509,12 @@ mod tests {
fn Str(string: &str, terminated: bool) -> Token { Token::Str { string, terminated } }
fn Raw(raw: &str, terminated: bool) -> Token { Token::Raw { raw, terminated } }
- macro_rules! F {
- ($h:expr, None, $t:expr) => {
- Token::Function { header: $h, body: None, terminated: $t }
- };
- ($h:expr, $b:expr, $t:expr) => {
- Token::Function {
- header: $h,
- body: Some(Into::<Spanned<&str>>::into($b)),
- terminated: $t,
- }
- };
- }
-
macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} }
macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} }
macro_rules! test {
(@spans=$spans:expr, $mode:expr, $src:expr => $($token:expr),*) => {
let exp = vec![$(Into::<Spanned<Token>>::into($token)),*];
- let found = Tokens::new($src, Pos::ZERO, $mode).collect::<Vec<_>>();
+ let found = Tokens::new($src, $mode).collect::<Vec<_>>();
check($src, exp, found, $spans);
}
}
@@ -616,7 +555,7 @@ mod tests {
fn tokenize_body_only_tokens() {
t!(Body, "_*" => Underscore, Star);
t!(Body, "***" => Star, Star, Star);
- t!(Body, "[func]*bold*" => F!("func", None, true), Star, T("bold"), Star);
+ t!(Body, "[func]*bold*" => L, T("func"), R, Star, T("bold"), Star);
t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there"));
t!(Body, "`raw`" => Raw("raw", true));
t!(Body, "`[func]`" => Raw("[func]", true));
@@ -675,50 +614,6 @@ mod tests {
}
#[test]
- fn tokenize_functions() {
- t!(Body, "a[f]" => T("a"), F!("f", None, true));
- t!(Body, "[f]a" => F!("f", None, true), T("a"));
- t!(Body, "\n\n[f][ ]" => S(2), F!("f", " ", true));
- t!(Body, "abc [f][ ]a" => T("abc"), S(0), F!("f", " ", true), T("a"));
- t!(Body, "[f: [=][*]]" => F!("f: [=][*]", None, true));
- t!(Body, "[_][[,],]," => F!("_", "[,],", true), T(","));
- t!(Body, "[=][=][=]" => F!("=", "=", true), F!("=", None, true));
- t!(Body, "[=][[=][=][=]]" => F!("=", "[=][=][=]", true));
- t!(Header, "[" => F!("", None, false));
- t!(Header, "]" => Invalid("]"));
- }
-
- #[test]
- fn tokenize_correct_end_of_function() {
- // End of function with strings and carets in headers
- t!(Body, r#"[f: "]"# => F!(r#"f: "]"#, None, false));
- t!(Body, "[f: \"s\"]" => F!("f: \"s\"", None, true));
- t!(Body, r#"[f: \"\"\"]"# => F!(r#"f: \"\"\""#, None, true));
- t!(Body, "[f: `]" => F!("f: `", None, true));
-
- // End of function with strings and carets in bodies
- t!(Body, "[f][\"]" => F!("f", s(0,4, 0,5, "\""), true));
- t!(Body, r#"[f][\"]"# => F!("f", s(0,4, 0,6, r#"\""#), true));
- t!(Body, "[f][`]" => F!("f", s(0,4, 0,6, "`]"), false));
- t!(Body, "[f][\\`]" => F!("f", s(0,4, 0,6, "\\`"), true));
- t!(Body, "[f][`raw`]" => F!("f", s(0,4, 0,9, "`raw`"), true));
- t!(Body, "[f][`raw]" => F!("f", s(0,4, 0,9, "`raw]"), false));
- t!(Body, "[f][`raw]`]" => F!("f", s(0,4, 0,10, "`raw]`"), true));
- t!(Body, "[f][`\\`]" => F!("f", s(0,4, 0,8, "`\\`]"), false));
- t!(Body, "[f][`\\\\`]" => F!("f", s(0,4, 0,8, "`\\\\`"), true));
-
- // End of function with comments
- t!(Body, "[f][/*]" => F!("f", s(0,4, 0,7, "/*]"), false));
- t!(Body, "[f][/*`*/]" => F!("f", s(0,4, 0,9, "/*`*/"), true));
- t!(Body, "[f: //]\n]" => F!("f: //]\n", None, true));
- t!(Body, "[f: \"//]\n]" => F!("f: \"//]\n]", None, false));
-
- // End of function with escaped brackets
- t!(Body, "[f][\\]]" => F!("f", s(0,4, 0,6, "\\]"), true));
- t!(Body, "[f][\\[]" => F!("f", s(0,4, 0,6, "\\["), true));
- }
-
- #[test]
fn tokenize_escaped_symbols() {
t!(Body, r"\\" => T(r"\"));
t!(Body, r"\[" => T("["));
@@ -746,7 +641,6 @@ mod tests {
fn tokenize_with_spans() {
ts!(Body, "hello" => s(0,0, 0,5, T("hello")));
ts!(Body, "ab\r\nc" => s(0,0, 0,2, T("ab")), s(0,2, 1,0, S(1)), s(1,0, 1,1, T("c")));
- ts!(Body, "[x = \"(1)\"]*" => s(0,0, 0,11, F!("x = \"(1)\"", None, true)), s(0,11, 0,12, Star));
ts!(Body, "// ab\r\n\nf" => s(0,0, 0,5, LC(" ab")), s(0,5, 2,0, S(2)), s(2,0, 2,1, T("f")));
ts!(Body, "/*b*/_" => s(0,0, 0,5, BC("b")), s(0,5, 0,6, Underscore));
ts!(Header, "a=10" => s(0,0, 0,1, Id("a")), s(0,1, 0,2, Equals), s(0,2, 0,4, Num(10.0)));