summaryrefslogtreecommitdiff
path: root/src/parse/tokens.rs
diff options
context:
space:
mode:
authorMartin Haug <mhaug@live.de>2022-02-23 20:06:48 +0100
committerMartin Haug <mhaug@live.de>2022-02-23 20:49:47 +0100
commit9fda623b02b2a0a9e9cdf806d9945d0759c8bf01 (patch)
tree93a2517cdbbd59e998d2004fb7cf804dadf0f4d6 /src/parse/tokens.rs
parent4c8634c600ad0bba03ccdf884b32f234ecbff30c (diff)
Code Review: That's just like your struct, man.
Diffstat (limited to 'src/parse/tokens.rs')
-rw-r--r--src/parse/tokens.rs51
1 files changed, 47 insertions, 4 deletions
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 4a13694a..91bbf9e8 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -11,9 +11,14 @@ use crate::util::EcoString;
/// An iterator over the tokens of a string of source code.
pub struct Tokens<'s> {
+ /// The underlying scanner.
s: Scanner<'s>,
+ /// The mode the scanner is in. This determines what tokens it recognizes.
mode: TokenMode,
+ /// Whether the last token has been terminated.
terminated: bool,
+ /// Offsets the indentation on the first line of the source.
+ column_offset: usize,
}
/// What kind of tokens to emit.
@@ -28,11 +33,19 @@ pub enum TokenMode {
impl<'s> Tokens<'s> {
/// Create a new token iterator with the given mode.
#[inline]
- pub fn new(src: &'s str, mode: TokenMode, offset: usize) -> Self {
+ pub fn new(src: &'s str, mode: TokenMode) -> Self {
+ Self::with_prefix("", src, mode)
+ }
+
+ /// Create a new token iterator with the given mode and a prefix to offset
+ /// column calculations.
+ #[inline]
+ pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self {
Self {
- s: Scanner::with_indent_offset(src, offset),
+ s: Scanner::new(src),
mode,
terminated: true,
+ column_offset: column(prefix, prefix.len(), 0),
}
}
@@ -74,6 +87,12 @@ impl<'s> Tokens<'s> {
pub fn terminated(&self) -> bool {
self.terminated
}
+
+ /// The column index of a given index in the source string.
+ #[inline]
+ pub fn column(&self, index: usize) -> usize {
+ column(self.s.src(), index, self.column_offset)
+ }
}
impl<'s> Iterator for Tokens<'s> {
@@ -321,7 +340,7 @@ impl<'s> Tokens<'s> {
}
fn raw(&mut self) -> NodeKind {
- let column = self.s.column(self.s.index() - 1);
+ let column = self.column(self.s.index() - 1);
let mut backticks = 1;
while self.s.eat_if('`') {
@@ -574,6 +593,30 @@ fn keyword(ident: &str) -> Option<NodeKind> {
})
}
+/// The column index of a given index in the source string, given a column offset for the first line.
+#[inline]
+fn column(string: &str, index: usize, offset: usize) -> usize {
+ let mut apply_offset = false;
+ let res = string[.. index]
+ .char_indices()
+ .rev()
+ .take_while(|&(_, c)| !is_newline(c))
+ .inspect(|&(i, _)| {
+ if i == 0 {
+ apply_offset = true
+ }
+ })
+ .count();
+
+ // The loop is never executed if the slice is empty, but we are of
+ // course still at the start of the first line.
+ if index == 0 {
+ apply_offset = true;
+ }
+
+ if apply_offset { res + offset } else { res }
+}
+
#[cfg(test)]
#[allow(non_snake_case)]
mod tests {
@@ -689,7 +732,7 @@ mod tests {
}};
(@$mode:ident: $src:expr => $($token:expr),*) => {{
let src = $src;
- let found = Tokens::new(&src, $mode, 0).collect::<Vec<_>>();
+ let found = Tokens::new(&src, $mode).collect::<Vec<_>>();
let expected = vec![$($token.clone()),*];
check(&src, found, expected);
}};