summaryrefslogtreecommitdiff
path: root/src/syntax
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2020-01-13 13:02:33 +0100
committerLaurenz <laurmaedje@gmail.com>2020-01-13 13:02:33 +0100
commit6527d31dfba78330a39e52d7772f6c8561fb23ef (patch)
tree0c0640ce40d14662d21bc0cac61af4caca03f237 /src/syntax
parenta8f711d49ad65ee08c96fae2a8b52873667bdf5c (diff)
Merge Characters struct into tokenizer 🔀
Diffstat (limited to 'src/syntax')
-rw-r--r--src/syntax/expr.rs21
-rw-r--r--src/syntax/tokens.rs127
2 files changed, 63 insertions, 85 deletions
diff --git a/src/syntax/expr.rs b/src/syntax/expr.rs
index a1680861..ed5e50df 100644
--- a/src/syntax/expr.rs
+++ b/src/syntax/expr.rs
@@ -166,27 +166,6 @@ impl Display for Ident {
debug_display!(Ident);
-/// Whether this word is a valid identifier.
-pub fn is_identifier(string: &str) -> bool {
- let mut chars = string.chars();
-
- match chars.next() {
- Some('-') => {}
- Some(c) if UnicodeXID::is_xid_start(c) => {}
- _ => return false,
- }
-
- while let Some(c) = chars.next() {
- match c {
- '.' | '-' => {}
- c if UnicodeXID::is_xid_continue(c) => {}
- _ => return false,
- }
- }
-
- true
-}
-
/// Kinds of expressions.
pub trait ExpressionKind: Sized {
const NAME: &'static str;
diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs
index d355b3cc..ae5cfe48 100644
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@@ -72,9 +72,11 @@ pub fn tokenize(src: &str) -> Tokens {
/// An iterator over the tokens of a string of source code.
pub struct Tokens<'s> {
src: &'s str,
- chars: Characters<'s>,
state: State,
stack: Vec<State>,
+ iter: Peekable<Chars<'s>>,
+ position: Position,
+ index: usize,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
@@ -88,9 +90,11 @@ impl<'s> Tokens<'s> {
pub fn new(src: &'s str) -> Tokens<'s> {
Tokens {
src,
- chars: Characters::new(src),
state: State::Body,
stack: vec![],
+ iter: src.chars().peekable(),
+ position: Position::ZERO,
+ index: 0,
}
}
}
@@ -100,26 +104,29 @@ impl<'s> Iterator for Tokens<'s> {
/// Parse the next token in the source code.
fn next(&mut self) -> Option<Spanned<Token<'s>>> {
- let start = self.chars.position();
- let first = self.chars.next()?;
- let second = self.chars.peek();
+ let start = self.pos();
+ let first = self.eat()?;
let token = match first {
// Comments.
- '/' if second == Some('/') => self.parse_line_comment(),
- '/' if second == Some('*') => self.parse_block_comment(),
- '*' if second == Some('/') => { self.eat(); StarSlash }
+ '/' if self.peek() == Some('/') => self.parse_line_comment(),
+ '/' if self.peek() == Some('*') => self.parse_block_comment(),
+ '*' if self.peek() == Some('/') => { self.eat(); StarSlash }
// Whitespace.
c if c.is_whitespace() => self.parse_whitespace(start),
// Functions.
- '[' => { self.set_state(Header); LeftBracket }
+ '[' => {
+ self.stack.push(self.state);
+ self.state = Header;
+ LeftBracket
+ }
']' => {
- if self.state == Header && second == Some('[') {
+ if self.state == Header && self.peek() == Some('[') {
self.state = StartBody;
} else {
- self.pop_state();
+ self.state = self.stack.pop().unwrap_or(Body);
}
RightBracket
@@ -164,7 +171,7 @@ impl<'s> Iterator for Tokens<'s> {
}
};
- let end = self.chars.position();
+ let end = self.pos();
let span = Span { start, end };
Some(Spanned { v: token, span })
@@ -206,7 +213,7 @@ impl<'s> Tokens<'s> {
fn parse_whitespace(&mut self, start: Position) -> Token<'s> {
self.read_string_until(|n| !n.is_whitespace(), false, 0, 0);
- let end = self.chars.position();
+ let end = self.pos();
Whitespace(end.line - start.line)
}
@@ -234,9 +241,9 @@ impl<'s> Tokens<'s> {
}
}
- let c = self.chars.peek().unwrap_or('n');
+ let c = self.peek().unwrap_or('n');
if self.state == Body && is_escapable(c) {
- let index = self.chars.index();
+ let index = self.index();
self.eat();
Text(&self.src[index .. index + c.len_utf8()])
} else {
@@ -267,22 +274,22 @@ impl<'s> Tokens<'s> {
offset_start: isize,
offset_end: isize,
) -> &'s str where F: FnMut(char) -> bool {
- let start = ((self.chars.index() as isize) + offset_start) as usize;
+ let start = ((self.index() as isize) + offset_start) as usize;
let mut matched = false;
- while let Some(c) = self.chars.peek() {
+ while let Some(c) = self.peek() {
if f(c) {
matched = true;
if eat_match {
- self.chars.next();
+ self.eat();
}
break;
}
- self.chars.next();
+ self.eat();
}
- let mut end = self.chars.index();
+ let mut end = self.index();
if matched {
end = ((end as isize) + offset_end) as usize;
}
@@ -290,17 +297,32 @@ impl<'s> Tokens<'s> {
&self.src[start .. end]
}
- fn set_state(&mut self, state: State) {
- self.stack.push(self.state);
- self.state = state;
+ fn eat(&mut self) -> Option<char> {
+ let c = self.iter.next()?;
+ let len = c.len_utf8();
+
+ self.index += len;
+
+ if is_newline_char(c) && !(c == '\r' && self.peek() == Some('\n')) {
+ self.position.line += 1;
+ self.position.column = 0;
+ } else {
+ self.position.column += len;
+ }
+
+ Some(c)
+ }
+
+ fn peek(&mut self) -> Option<char> {
+ self.iter.peek().copied()
}
- fn pop_state(&mut self) {
- self.state = self.stack.pop().unwrap_or(Body);
+ fn index(&self) -> usize {
+ self.index
}
- fn eat(&mut self) {
- self.chars.next();
+ fn pos(&self) -> Position {
+ self.position
}
}
@@ -313,7 +335,7 @@ fn parse_percentage(text: &str) -> Option<f64> {
}
/// Whether this character denotes a newline.
-fn is_newline_char(character: char) -> bool {
+pub fn is_newline_char(character: char) -> bool {
match character {
// Line Feed, Vertical Tab, Form Feed, Carriage Return.
'\x0A' ..= '\x0D' => true,
@@ -323,46 +345,23 @@ fn is_newline_char(character: char) -> bool {
}
}
-struct Characters<'s> {
- iter: Peekable<Chars<'s>>,
- position: Position,
- index: usize,
-}
+/// Whether this word is a valid identifier.
+pub fn is_identifier(string: &str) -> bool {
+ let mut chars = string.chars();
-impl<'s> Characters<'s> {
- fn new(src: &'s str) -> Characters<'s> {
- Characters {
- iter: src.chars().peekable(),
- position: Position::ZERO,
- index: 0,
- }
+ match chars.next() {
+ Some('-') => {}
+ Some(c) if UnicodeXID::is_xid_start(c) => {}
+ _ => return false,
}
- fn next(&mut self) -> Option<char> {
- let c = self.iter.next()?;
- let len = c.len_utf8();
-
- self.index += len;
-
- if is_newline_char(c) && !(c == '\r' && self.peek() == Some('\n')) {
- self.position.line += 1;
- self.position.column = 0;
- } else {
- self.position.column += len;
+ while let Some(c) = chars.next() {
+ match c {
+ '.' | '-' => {}
+ c if UnicodeXID::is_xid_continue(c) => {}
+ _ => return false,
}
-
- Some(c)
- }
-
- fn peek(&mut self) -> Option<char> {
- self.iter.peek().copied()
}
- fn index(&self) -> usize {
- self.index
- }
-
- fn position(&self) -> Position {
- self.position
- }
+ true
}