From 16f0bd430e0864a3bbd0139803e476be413cb3cb Mon Sep 17 00:00:00 2001
From: Laurenz <laurmaedje@gmail.com>
Date: Thu, 1 Oct 2020 11:05:16 +0200
Subject: =?UTF-8?q?Rename=20CharParser=20to=20Scanner=20=E2=9C=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/parse/chars.rs   | 171 ---------------------------------------------------
 src/parse/mod.rs     |   4 +-
 src/parse/resolve.rs |  30 ++++-----
 src/parse/scanner.rs | 171 +++++++++++++++++++++++++++++++++++++++++++++++++++
 src/parse/tokens.rs  |  78 +++++++++++------------
 src/syntax/lines.rs  |   8 +--
 6 files changed, 231 insertions(+), 231 deletions(-)
 delete mode 100644 src/parse/chars.rs
 create mode 100644 src/parse/scanner.rs

(limited to 'src')
diff --git a/src/parse/chars.rs b/src/parse/chars.rs
deleted file mode 100644
index 62d40771..00000000
--- a/src/parse/chars.rs
+++ /dev/null
@@ -1,171 +0,0 @@
-//! Low-level char parser.
-
-use std::fmt::{self, Debug, Formatter};
-use std::slice::SliceIndex;
-use std::str::Chars;
-
-/// A low-level featureful char parser.
-pub struct CharParser<'s> {
-    src: &'s str,
-    iter: Chars<'s>,
-    index: usize,
-}
-
-impl<'s> CharParser<'s> {
-    /// Create a new char parser.
-    pub fn new(src: &'s str) -> Self {
-        Self { src, iter: src.chars(), index: 0 }
-    }
-
-    /// Consume the next char.
-    pub fn eat(&mut self) -> Option<char> {
-        let next = self.iter.next();
-        if let Some(c) = next {
-            self.index += c.len_utf8();
-        }
-        next
-    }
-
-    /// Consume the next char if it is the given one.
-    ///
-    /// Returns whether the char was consumed.
-    pub fn eat_if(&mut self, c: char) -> bool {
-        // Don't decode the char twice through peek() and eat().
-        //
-        // TODO: Benchmark this vs. the naive version.
-        if self.iter.next() == Some(c) {
-            self.index += c.len_utf8();
-            true
-        } else {
-            self.reset();
-            false
-        }
-    }
-
-    /// Consume the next char, debug-asserting that it is the given one.
-    pub fn eat_assert(&mut self, c: char) {
-        let next = self.eat();
-        debug_assert_eq!(next, Some(c));
-    }
-
-    /// Consume the next char, coalescing `\r\n` to just `\n`.
-    pub fn eat_merging_crlf(&mut self) -> Option<char> {
-        let c = self.eat();
-        if c == Some('\r') && self.eat_if('\n') {
-            Some('\n')
-        } else {
-            c
-        }
-    }
-
-    /// Eat chars while the condition is true.
-    pub fn eat_while(&mut self, mut f: impl FnMut(char) -> bool) -> &'s str {
-        self.eat_until(|c| !f(c))
-    }
-
-    /// Eat chars until the condition is true.
-    pub fn eat_until(&mut self, mut f: impl FnMut(char) -> bool) -> &'s str {
-        let start = self.index;
-        while let Some(c) = self.iter.next() {
-            if f(c) {
-                // Undo the previous `next()` without peeking all the time
-                // during iteration.
-                //
-                // TODO: Benchmark this vs. the naive peeking version.
-                self.reset();
-                break;
-            }
-            self.index += c.len_utf8();
-        }
-        &self.src[start .. self.index]
-    }
-
-    /// Uneat the last eaten character.
-    pub fn uneat(&mut self) {
-        self.index = self.prev_index();
-        self.reset();
-    }
-
-    /// Peek at the next char without consuming it.
-    pub fn peek(&self) -> Option<char> {
-        self.iter.clone().next()
-    }
-
-    /// Peek at the nth-next char without consuming anything.
-    pub fn peek_nth(&self, n: usize) -> Option<char> {
-        self.iter.clone().nth(n)
-    }
-
-    /// Checks whether the next character fulfills a condition.
-    ///
-    /// Returns `false` is there is no next character.
-    pub fn check(&self, f: impl FnMut(char) -> bool) -> bool {
-        self.peek().map(f).unwrap_or(false)
-    }
-}
-
-impl<'s> CharParser<'s> {
-    /// Slice a part out of the source string.
-    pub fn get<I>(&self, index: I) -> &'s str
-    where
-        I: SliceIndex<str, Output = str>,
-    {
-        &self.src[index]
-    }
-
-    /// The full source string.
-    pub fn src(&self) -> &'s str {
-        self.src
-    }
-
-    /// The full string up to the current index.
-    pub fn eaten(&self) -> &'s str {
-        &self.src[.. self.index]
-    }
-
-    /// The string from `start` to the current index.
-    pub fn eaten_from(&self, start: usize) -> &'s str {
-        &self.src[start .. self.index]
-    }
-
-    /// The remaining string after the current index.
-    pub fn rest(&self) -> &'s str {
-        &self.src[self.index ..]
-    }
-
-    /// The current index in the string.
-    pub fn index(&self) -> usize {
-        self.index
-    }
-
-    /// The previous index in the string.
-    pub fn prev_index(&self) -> usize {
-        self.src[.. self.index]
-            .chars()
-            .next_back()
-            .map(|c| self.index - c.len_utf8())
-            .unwrap_or(0)
-    }
-
-    /// Go back to the where the index says.
-    fn reset(&mut self) {
-        self.iter = self.src[self.index ..].chars();
-    }
-}
-
-impl Debug for CharParser<'_> {
-    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
-        write!(f, "CharParser({}|{})", self.eaten(), self.rest())
-    }
-}
-
-/// Whether this character denotes a newline.
-pub fn is_newline_char(character: char) -> bool {
-    match character {
-        // Line Feed, Vertical Tab, Form Feed, Carriage Return.
-        '\n' | '\x0B' | '\x0C' | '\r' |
-        // Next Line, Line Separator, Paragraph Separator.
-        '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
-        _ => false,
-    }
-}
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 4d79c11b..8c879d12 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -1,11 +1,11 @@
 //! Parsing and tokenization.
 
-mod chars;
 mod resolve;
+mod scanner;
 mod tokens;
 
-pub use chars::*;
 pub use resolve::*;
+pub use scanner::*;
 pub use tokens::*;
 
 use std::str::FromStr;
diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs
index 422f9385..0e2ebd4b 100644
--- a/src/parse/resolve.rs
+++ b/src/parse/resolve.rs
@@ -1,41 +1,41 @@
 //! Resolve strings and raw blocks.
 
-use super::{is_newline_char, CharParser};
+use super::{is_newline_char, Scanner};
 use crate::syntax::{Ident, Raw};
 
 /// Resolves all escape sequences in a string.
 pub fn resolve_string(string: &str) -> String {
     let mut out = String::with_capacity(string.len());
-    let mut p = CharParser::new(string);
+    let mut s = Scanner::new(string);
 
-    while let Some(c) = p.eat() {
+    while let Some(c) = s.eat() {
         if c != '\\' {
             out.push(c);
             continue;
         }
 
-        let start = p.prev_index();
-        match p.eat() {
+        let start = s.prev_index();
+        match s.eat() {
             Some('\\') => out.push('\\'),
             Some('"') => out.push('"'),
 
             Some('n') => out.push('\n'),
             Some('t') => out.push('\t'),
-            Some('u') if p.eat_if('{') => {
+            Some('u') if s.eat_if('{') => {
                 // TODO: Feedback if closing brace is missing.
-                let sequence = p.eat_while(|c| c.is_ascii_hexdigit());
-                let _terminated = p.eat_if('}');
+                let sequence = s.eat_while(|c| c.is_ascii_hexdigit());
+                let _terminated = s.eat_if('}');
 
                 if let Some(c) = resolve_hex(sequence) {
                     out.push(c);
                 } else {
                     // TODO: Feedback that escape sequence is wrong.
-                    out += p.eaten_from(start);
+                    out += s.eaten_from(start);
                 }
             }
 
             // TODO: Feedback about invalid escape sequence.
-            _ => out += p.eaten_from(start),
+            _ => out += s.eaten_from(start),
         }
     }
 
@@ -69,10 +69,10 @@ pub fn resolve_raw(raw: &str, backticks: usize) -> Raw {
 
 /// Parse the lang tag and return it alongside the remaining inner raw text.
 fn split_at_lang_tag(raw: &str) -> (&str, &str) {
-    let mut p = CharParser::new(raw);
+    let mut s = Scanner::new(raw);
     (
-        p.eat_until(|c| c == '`' || c.is_whitespace() || is_newline_char(c)),
-        p.rest(),
+        s.eat_until(|c| c == '`' || c.is_whitespace() || is_newline_char(c)),
+        s.rest(),
     )
 }
 
@@ -104,11 +104,11 @@ fn trim_and_split_raw(raw: &str) -> (Vec<String>, bool) {
 /// Splits a string into a vector of lines (respecting Unicode & Windows line
 /// breaks).
 pub fn split_lines(text: &str) -> Vec<String> {
-    let mut p = CharParser::new(text);
+    let mut s = Scanner::new(text);
     let mut line = String::new();
     let mut lines = Vec::new();
 
-    while let Some(c) = p.eat_merging_crlf() {
+    while let Some(c) = s.eat_merging_crlf() {
         if is_newline_char(c) {
             lines.push(std::mem::take(&mut line));
         } else {
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
new file mode 100644
index 00000000..1bffc204
--- /dev/null
+++ b/src/parse/scanner.rs
@@ -0,0 +1,171 @@
+//! Low-level char-based scanner.
+
+use std::fmt::{self, Debug, Formatter};
+use std::slice::SliceIndex;
+use std::str::Chars;
+
+/// A low-level featureful char scanner.
+pub struct Scanner<'s> {
+    src: &'s str,
+    iter: Chars<'s>,
+    index: usize,
+}
+
+impl<'s> Scanner<'s> {
+    /// Create a new char scanner.
+    pub fn new(src: &'s str) -> Self {
+        Self { src, iter: src.chars(), index: 0 }
+    }
+
+    /// Consume the next char.
+    pub fn eat(&mut self) -> Option<char> {
+        let next = self.iter.next();
+        if let Some(c) = next {
+            self.index += c.len_utf8();
+        }
+        next
+    }
+
+    /// Consume the next char if it is the given one.
+    ///
+    /// Returns whether the char was consumed.
+    pub fn eat_if(&mut self, c: char) -> bool {
+        // Don't decode the char twice through peek() and eat().
+        //
+        // TODO: Benchmark this vs. the naive version.
+        if self.iter.next() == Some(c) {
+            self.index += c.len_utf8();
+            true
+        } else {
+            self.reset();
+            false
+        }
+    }
+
+    /// Consume the next char, debug-asserting that it is the given one.
+    pub fn eat_assert(&mut self, c: char) {
+        let next = self.eat();
+        debug_assert_eq!(next, Some(c));
+    }
+
+    /// Consume the next char, coalescing `\r\n` to just `\n`.
+    pub fn eat_merging_crlf(&mut self) -> Option<char> {
+        let c = self.eat();
+        if c == Some('\r') && self.eat_if('\n') {
+            Some('\n')
+        } else {
+            c
+        }
+    }
+
+    /// Eat chars while the condition is true.
+    pub fn eat_while(&mut self, mut f: impl FnMut(char) -> bool) -> &'s str {
+        self.eat_until(|c| !f(c))
+    }
+
+    /// Eat chars until the condition is true.
+    pub fn eat_until(&mut self, mut f: impl FnMut(char) -> bool) -> &'s str {
+        let start = self.index;
+        while let Some(c) = self.iter.next() {
+            if f(c) {
+                // Undo the previous `next()` without peeking all the time
+                // during iteration.
+                //
+                // TODO: Benchmark this vs. the naive peeking version.
+                self.reset();
+                break;
+            }
+            self.index += c.len_utf8();
+        }
+        &self.src[start .. self.index]
+    }
+
+    /// Uneat the last eaten character.
+    pub fn uneat(&mut self) {
+        self.index = self.prev_index();
+        self.reset();
+    }
+
+    /// Peek at the next char without consuming it.
+    pub fn peek(&self) -> Option<char> {
+        self.iter.clone().next()
+    }
+
+    /// Peek at the nth-next char without consuming anything.
+    pub fn peek_nth(&self, n: usize) -> Option<char> {
+        self.iter.clone().nth(n)
+    }
+
+    /// Checks whether the next character fulfills a condition.
+    ///
+    /// Returns `false` is there is no next character.
+    pub fn check(&self, f: impl FnMut(char) -> bool) -> bool {
+        self.peek().map(f).unwrap_or(false)
+    }
+}
+
+impl<'s> Scanner<'s> {
+    /// Slice a part out of the source string.
+    pub fn get<I>(&self, index: I) -> &'s str
+    where
+        I: SliceIndex<str, Output = str>,
+    {
+        &self.src[index]
+    }
+
+    /// The full source string.
+    pub fn src(&self) -> &'s str {
+        self.src
+    }
+
+    /// The full string up to the current index.
+    pub fn eaten(&self) -> &'s str {
+        &self.src[.. self.index]
+    }
+
+    /// The string from `start` to the current index.
+    pub fn eaten_from(&self, start: usize) -> &'s str {
+        &self.src[start .. self.index]
+    }
+
+    /// The remaining string after the current index.
+    pub fn rest(&self) -> &'s str {
+        &self.src[self.index ..]
+    }
+
+    /// The current index in the string.
+    pub fn index(&self) -> usize {
+        self.index
+    }
+
+    /// The previous index in the string.
+    pub fn prev_index(&self) -> usize {
+        self.src[.. self.index]
+            .chars()
+            .next_back()
+            .map(|c| self.index - c.len_utf8())
+            .unwrap_or(0)
+    }
+
+    /// Go back to the where the index says.
+    fn reset(&mut self) {
+        self.iter = self.src[self.index ..].chars();
+    }
+}
+
+impl Debug for Scanner<'_> {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        write!(f, "Scanner({}|{})", self.eaten(), self.rest())
+    }
+}
+
+/// Whether this character denotes a newline.
+pub fn is_newline_char(character: char) -> bool {
+    match character {
+        // Line Feed, Vertical Tab, Form Feed, Carriage Return.
+        '\n' | '\x0B' | '\x0C' | '\r' |
+        // Next Line, Line Separator, Paragraph Separator.
+        '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
+        _ => false,
+    }
+}
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 2700b6c8..cdb92c59 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -1,6 +1,6 @@
 //! Tokenization.
 
-use super::{is_newline_char, CharParser};
+use super::{is_newline_char, Scanner};
 use crate::length::Length;
 use crate::syntax::{Ident, Pos, Span, SpanWith, Spanned, Token};
 
@@ -9,7 +9,7 @@ use TokenMode::*;
 /// An iterator over the tokens of a string of source code.
 #[derive(Debug)]
 pub struct Tokens<'s> {
-    p: CharParser<'s>,
+    s: Scanner<'s>,
     mode: TokenMode,
     stack: Vec<TokenMode>,
 }
@@ -27,7 +27,7 @@ impl<'s> Tokens<'s> {
     /// Create a new token iterator with the given mode.
     pub fn new(src: &'s str, mode: TokenMode) -> Self {
         Self {
-            p: CharParser::new(src),
+            s: Scanner::new(src),
             mode,
             stack: vec![],
         }
@@ -48,7 +48,7 @@ impl<'s> Tokens<'s> {
     /// The position in the string at which the last token ends and next token
     /// will start.
     pub fn pos(&self) -> Pos {
-        self.p.index().into()
+        self.s.index().into()
     }
 }
 
@@ -57,15 +57,15 @@ impl<'s> Iterator for Tokens<'s> {
 
     /// Parse the next token in the source code.
     fn next(&mut self) -> Option<Self::Item> {
-        let start = self.p.index();
-        let token = match self.p.eat()? {
+        let start = self.s.index();
+        let token = match self.s.eat()? {
             // Whitespace.
             c if c.is_whitespace() => self.read_whitespace(c),
 
             // Comments.
-            '/' if self.p.eat_if('/') => self.read_line_comment(),
-            '/' if self.p.eat_if('*') => self.read_block_comment(),
-            '*' if self.p.eat_if('/') => Token::Invalid("*/"),
+            '/' if self.s.eat_if('/') => self.read_line_comment(),
+            '/' if self.s.eat_if('*') => self.read_block_comment(),
+            '*' if self.s.eat_if('/') => Token::Invalid("*/"),
 
             // Functions.
             '[' => Token::LeftBracket,
@@ -87,7 +87,7 @@ impl<'s> Iterator for Tokens<'s> {
             ':' if self.mode == Header => Token::Colon,
             ',' if self.mode == Header => Token::Comma,
             '=' if self.mode == Header => Token::Equals,
-            '>' if self.mode == Header && self.p.eat_if('>') => Token::Chain,
+            '>' if self.mode == Header && self.s.eat_if('>') => Token::Chain,
 
             // Expressions in headers.
             '+' if self.mode == Header => Token::Plus,
@@ -101,7 +101,7 @@ impl<'s> Iterator for Tokens<'s> {
             _ => self.read_text_or_expr(start),
         };
 
-        let end = self.p.index();
+        let end = self.s.index();
         Some(token.span_with(Span::new(start, end)))
     }
 }
@@ -109,21 +109,21 @@ impl<'s> Iterator for Tokens<'s> {
 impl<'s> Tokens<'s> {
     fn read_whitespace(&mut self, first: char) -> Token<'s> {
         // Shortcut for common case of exactly one space.
-        if first == ' ' && !self.p.check(|c| c.is_whitespace()) {
+        if first == ' ' && !self.s.check(|c| c.is_whitespace()) {
             return Token::Space(0);
         }
 
         // Uneat the first char if it's a newline, so that it's counted in the
         // loop.
         if is_newline_char(first) {
-            self.p.uneat();
+            self.s.uneat();
         }
 
         // Count the number of newlines.
         let mut newlines = 0;
-        while let Some(c) = self.p.eat_merging_crlf() {
+        while let Some(c) = self.s.eat_merging_crlf() {
             if !c.is_whitespace() {
-                self.p.uneat();
+                self.s.uneat();
                 break;
             }
 
@@ -136,17 +136,17 @@ impl<'s> Tokens<'s> {
     }
 
     fn read_line_comment(&mut self) -> Token<'s> {
-        Token::LineComment(self.p.eat_until(is_newline_char))
+        Token::LineComment(self.s.eat_until(is_newline_char))
     }
 
     fn read_block_comment(&mut self) -> Token<'s> {
-        let start = self.p.index();
+        let start = self.s.index();
 
         let mut state = '_';
         let mut depth = 1;
 
         // Find the first `*/` that does not correspond to a nested `/*`.
-        while let Some(c) = self.p.eat() {
+        while let Some(c) = self.s.eat() {
             state = match (state, c) {
                 ('*', '/') => {
                     depth -= 1;
@@ -164,21 +164,21 @@ impl<'s> Tokens<'s> {
         }
 
         let terminated = depth == 0;
-        let end = self.p.index() - if terminated { 2 } else { 0 };
+        let end = self.s.index() - if terminated { 2 } else { 0 };
 
-        Token::BlockComment(self.p.get(start .. end))
+        Token::BlockComment(self.s.get(start .. end))
     }
 
     fn read_hex(&mut self) -> Token<'s> {
         // This parses more than the permissable 0-9, a-f, A-F character ranges
         // to provide nicer error messages later.
-        Token::Hex(self.p.eat_while(|c| c.is_ascii_alphanumeric()))
+        Token::Hex(self.s.eat_while(|c| c.is_ascii_alphanumeric()))
     }
 
     fn read_string(&mut self) -> Token<'s> {
         let mut escaped = false;
         Token::Str {
-            string: self.p.eat_until(|c| {
+            string: self.s.eat_until(|c| {
                 if c == '"' && !escaped {
                     true
                 } else {
@@ -186,21 +186,21 @@ impl<'s> Tokens<'s> {
                     false
                 }
             }),
-            terminated: self.p.eat_if('"'),
+            terminated: self.s.eat_if('"'),
         }
     }
 
     fn read_raw(&mut self) -> Token<'s> {
         let mut backticks = 1;
-        while self.p.eat_if('`') {
+        while self.s.eat_if('`') {
             backticks += 1;
         }
 
-        let start = self.p.index();
+        let start = self.s.index();
 
         let mut found = 0;
         while found < backticks {
-            match self.p.eat() {
+            match self.s.eat() {
                 Some('`') => found += 1,
                 Some(_) => found = 0,
                 None => break,
@@ -208,29 +208,29 @@ impl<'s> Tokens<'s> {
         }
 
         let terminated = found == backticks;
-        let end = self.p.index() - if terminated { found } else { 0 };
+        let end = self.s.index() - if terminated { found } else { 0 };
 
         Token::Raw {
-            raw: self.p.get(start .. end),
+            raw: self.s.get(start .. end),
             backticks,
             terminated,
         }
     }
 
     fn read_escaped(&mut self) -> Token<'s> {
-        if let Some(c) = self.p.peek() {
+        if let Some(c) = self.s.peek() {
             match c {
                 '[' | ']' | '\\' | '/' | '*' | '_' | '`' | '"' | '#' | '~' => {
-                    let start = self.p.index();
-                    self.p.eat_assert(c);
-                    Token::Text(&self.p.eaten_from(start))
+                    let start = self.s.index();
+                    self.s.eat_assert(c);
+                    Token::Text(&self.s.eaten_from(start))
                 }
-                'u' if self.p.peek_nth(1) == Some('{') => {
-                    self.p.eat_assert('u');
-                    self.p.eat_assert('{');
+                'u' if self.s.peek_nth(1) == Some('{') => {
+                    self.s.eat_assert('u');
+                    self.s.eat_assert('{');
                     Token::UnicodeEscape {
-                        sequence: self.p.eat_while(|c| c.is_ascii_hexdigit()),
-                        terminated: self.p.eat_if('}'),
+                        sequence: self.s.eat_while(|c| c.is_ascii_hexdigit()),
+                        terminated: self.s.eat_if('}'),
                     }
                 }
                 c if c.is_whitespace() => Token::Backslash,
@@ -246,7 +246,7 @@ impl<'s> Tokens<'s> {
         let header = self.mode == Header;
 
         let mut last_was_e = false;
-        self.p.eat_until(|c| {
+        self.s.eat_until(|c| {
             let end = match c {
                 c if c.is_whitespace() => true,
                 '[' | ']' | '*' | '/' => true,
@@ -259,7 +259,7 @@ impl<'s> Tokens<'s> {
             end
         });
 
-        let read = self.p.eaten_from(start);
+        let read = self.s.eaten_from(start);
         if self.mode == Header {
             parse_expr(read)
         } else {
diff --git a/src/syntax/lines.rs b/src/syntax/lines.rs
index 7f7ee049..6ea223c4 100644
--- a/src/syntax/lines.rs
+++ b/src/syntax/lines.rs
@@ -3,7 +3,7 @@
 use std::fmt::{self, Debug, Display, Formatter};
 
 use super::Pos;
-use crate::parse::{is_newline_char, CharParser};
+use crate::parse::{is_newline_char, Scanner};
 
 /// Enables conversion of byte position to locations.
 pub struct LineMap<'s> {
@@ -15,11 +15,11 @@ impl<'s> LineMap<'s> {
     /// Create a new line map for a source string.
     pub fn new(src: &'s str) -> Self {
         let mut line_starts = vec![Pos::ZERO];
-        let mut p = CharParser::new(src);
+        let mut s = Scanner::new(src);
 
-        while let Some(c) = p.eat_merging_crlf() {
+        while let Some(c) = s.eat_merging_crlf() {
             if is_newline_char(c) {
-                line_starts.push(p.index().into());
+                line_starts.push(s.index().into());
             }
         }
 
-- 
cgit v1.2.3