summaryrefslogtreecommitdiff
path: root/src/parse/scanner.rs
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2020-10-01 11:05:16 +0200
committerLaurenz <laurmaedje@gmail.com>2020-10-01 11:08:53 +0200
commit16f0bd430e0864a3bbd0139803e476be413cb3cb (patch)
treec7c5f01cb10b0503cbc1c43494bf3b2c6c6ff173 /src/parse/scanner.rs
parentc0998b48022f2dc010106044fdcb4d5f6f2b9d77 (diff)
Rename CharParser to Scanner ✏
Diffstat (limited to 'src/parse/scanner.rs')
-rw-r--r--src/parse/scanner.rs171
1 files changed, 171 insertions, 0 deletions
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
new file mode 100644
index 00000000..1bffc204
--- /dev/null
+++ b/src/parse/scanner.rs
@@ -0,0 +1,171 @@
+//! Low-level char-based scanner.
+
+use std::fmt::{self, Debug, Formatter};
+use std::slice::SliceIndex;
+use std::str::Chars;
+
+/// A low-level featureful char scanner.
+pub struct Scanner<'s> {
+ src: &'s str,
+ iter: Chars<'s>,
+ index: usize,
+}
+
+impl<'s> Scanner<'s> {
+ /// Create a new char scanner.
+ pub fn new(src: &'s str) -> Self {
+ Self { src, iter: src.chars(), index: 0 }
+ }
+
+ /// Consume the next char.
+ pub fn eat(&mut self) -> Option<char> {
+ let next = self.iter.next();
+ if let Some(c) = next {
+ self.index += c.len_utf8();
+ }
+ next
+ }
+
+ /// Consume the next char if it is the given one.
+ ///
+ /// Returns whether the char was consumed.
+ pub fn eat_if(&mut self, c: char) -> bool {
+ // Don't decode the char twice through peek() and eat().
+ //
+ // TODO: Benchmark this vs. the naive version.
+ if self.iter.next() == Some(c) {
+ self.index += c.len_utf8();
+ true
+ } else {
+ self.reset();
+ false
+ }
+ }
+
+ /// Consume the next char, debug-asserting that it is the given one.
+ pub fn eat_assert(&mut self, c: char) {
+ let next = self.eat();
+ debug_assert_eq!(next, Some(c));
+ }
+
+ /// Consume the next char, coalescing `\r\n` to just `\n`.
+ pub fn eat_merging_crlf(&mut self) -> Option<char> {
+ let c = self.eat();
+ if c == Some('\r') && self.eat_if('\n') {
+ Some('\n')
+ } else {
+ c
+ }
+ }
+
+ /// Eat chars while the condition is true.
+ pub fn eat_while(&mut self, mut f: impl FnMut(char) -> bool) -> &'s str {
+ self.eat_until(|c| !f(c))
+ }
+
+ /// Eat chars until the condition is true.
+ pub fn eat_until(&mut self, mut f: impl FnMut(char) -> bool) -> &'s str {
+ let start = self.index;
+ while let Some(c) = self.iter.next() {
+ if f(c) {
+ // Undo the previous `next()` without peeking all the time
+ // during iteration.
+ //
+ // TODO: Benchmark this vs. the naive peeking version.
+ self.reset();
+ break;
+ }
+ self.index += c.len_utf8();
+ }
+ &self.src[start .. self.index]
+ }
+
+ /// Uneat the last eaten character.
+ pub fn uneat(&mut self) {
+ self.index = self.prev_index();
+ self.reset();
+ }
+
+ /// Peek at the next char without consuming it.
+ pub fn peek(&self) -> Option<char> {
+ self.iter.clone().next()
+ }
+
+ /// Peek at the nth-next char without consuming anything.
+ pub fn peek_nth(&self, n: usize) -> Option<char> {
+ self.iter.clone().nth(n)
+ }
+
+ /// Checks whether the next character fulfills a condition.
+ ///
+ /// Returns `false` is there is no next character.
+ pub fn check(&self, f: impl FnMut(char) -> bool) -> bool {
+ self.peek().map(f).unwrap_or(false)
+ }
+}
+
+impl<'s> Scanner<'s> {
+ /// Slice a part out of the source string.
+ pub fn get<I>(&self, index: I) -> &'s str
+ where
+ I: SliceIndex<str, Output = str>,
+ {
+ &self.src[index]
+ }
+
+ /// The full source string.
+ pub fn src(&self) -> &'s str {
+ self.src
+ }
+
+ /// The full string up to the current index.
+ pub fn eaten(&self) -> &'s str {
+ &self.src[.. self.index]
+ }
+
+ /// The string from `start` to the current index.
+ pub fn eaten_from(&self, start: usize) -> &'s str {
+ &self.src[start .. self.index]
+ }
+
+ /// The remaining string after the current index.
+ pub fn rest(&self) -> &'s str {
+ &self.src[self.index ..]
+ }
+
+ /// The current index in the string.
+ pub fn index(&self) -> usize {
+ self.index
+ }
+
+ /// The previous index in the string.
+ pub fn prev_index(&self) -> usize {
+ self.src[.. self.index]
+ .chars()
+ .next_back()
+ .map(|c| self.index - c.len_utf8())
+ .unwrap_or(0)
+ }
+
+ /// Go back to the where the index says.
+ fn reset(&mut self) {
+ self.iter = self.src[self.index ..].chars();
+ }
+}
+
+impl Debug for Scanner<'_> {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ write!(f, "Scanner({}|{})", self.eaten(), self.rest())
+ }
+}
+
+/// Whether this character denotes a newline.
+pub fn is_newline_char(character: char) -> bool {
+ match character {
+ // Line Feed, Vertical Tab, Form Feed, Carriage Return.
+ '\n' | '\x0B' | '\x0C' | '\r' |
+ // Next Line, Line Separator, Paragraph Separator.
+ '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
+ _ => false,
+ }
+}