diff options
Diffstat (limited to 'src/parse/scanner.rs')
| -rw-r--r-- | src/parse/scanner.rs | 211 |
1 files changed, 0 insertions, 211 deletions
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs deleted file mode 100644 index e4cf56e9..00000000 --- a/src/parse/scanner.rs +++ /dev/null @@ -1,211 +0,0 @@ -use std::slice::SliceIndex; - -use unicode_xid::UnicodeXID; - -/// A featureful char-based scanner. -#[derive(Copy, Clone)] -pub struct Scanner<'s> { - /// The string to scan. - src: &'s str, - /// The index at which the peekable character starts. Must be in bounds and - /// at a codepoint boundary to guarantee safety. - index: usize, -} - -impl<'s> Scanner<'s> { - /// Create a new char scanner. - #[inline] - pub fn new(src: &'s str) -> Self { - Self { src, index: 0 } - } - - /// Whether the end of the string is reached. - pub fn eof(&self) -> bool { - self.index == self.src.len() - } - - /// Consume the next char. - #[inline] - pub fn eat(&mut self) -> Option<char> { - let next = self.peek(); - if let Some(c) = next { - self.index += c.len_utf8(); - } - next - } - - /// Consume the next char if it is the given one. - /// - /// Returns whether the char was consumed. - #[inline] - pub fn eat_if(&mut self, c: char) -> bool { - let matches = self.peek() == Some(c); - if matches { - self.index += c.len_utf8(); - } - matches - } - - /// Consume the next char, debug-asserting that it is the given one. - #[inline] - pub fn eat_assert(&mut self, c: char) { - let next = self.eat(); - debug_assert_eq!(next, Some(c)); - } - - /// Eat chars while the condition is true. - #[inline] - pub fn eat_while<F>(&mut self, mut f: F) -> &'s str - where - F: FnMut(char) -> bool, - { - self.eat_until(|c| !f(c)) - } - - /// Eat chars until the condition is true. - #[inline] - pub fn eat_until<F>(&mut self, mut f: F) -> &'s str - where - F: FnMut(char) -> bool, - { - let start = self.index; - while let Some(c) = self.peek() { - if f(c) { - break; - } - self.index += c.len_utf8(); - } - self.eaten_from(start) - } - - /// Uneat the last eaten char. - #[inline] - pub fn uneat(&mut self) { - self.index = self.last_index(); - } - - /// Peek at the next char without consuming it. - #[inline] - pub fn peek(&self) -> Option<char> { - self.rest().chars().next() - } - - /// Get the nth-previous eaten char. - #[inline] - pub fn prev(&self, n: usize) -> Option<char> { - self.eaten().chars().nth_back(n) - } - - /// Checks whether the next char fulfills a condition. - /// - /// Returns `default` if there is no next char. - #[inline] - pub fn check_or<F>(&self, default: bool, f: F) -> bool - where - F: FnOnce(char) -> bool, - { - self.peek().map_or(default, f) - } - - /// The previous index in the source string. - #[inline] - pub fn last_index(&self) -> usize { - self.eaten().chars().last().map_or(0, |c| self.index - c.len_utf8()) - } - - /// The current index in the source string. - #[inline] - pub fn index(&self) -> usize { - self.index - } - - /// Jump to an index in the source string. - #[inline] - pub fn jump(&mut self, index: usize) { - // Make sure that the index is in bounds and on a codepoint boundary. - self.src.get(index ..).expect("jumped to invalid index"); - self.index = index; - } - - /// The full source string. - #[inline] - pub fn src(&self) -> &'s str { - self.src - } - - /// Slice out part of the source string. - #[inline] - pub fn get<I>(&self, index: I) -> &'s str - where - I: SliceIndex<str, Output = str>, - { - // See `eaten_from` for details about `unwrap_or_default`. - self.src.get(index).unwrap_or_default() - } - - /// The remaining source string after the current index. - #[inline] - pub fn rest(&self) -> &'s str { - // Safety: The index is always in bounds and on a codepoint boundary - // since it starts at zero and is is: - // - either increased by the length of a scanned character, advacing - // from one codepoint boundary to the next, - // - or checked upon jumping. - unsafe { self.src.get_unchecked(self.index ..) } - } - - /// The full source string up to the current index. - #[inline] - pub fn eaten(&self) -> &'s str { - // Safety: The index is always okay, for details see `rest()`. - unsafe { self.src.get_unchecked(.. self.index) } - } - - /// The source string from `start` to the current index. - #[inline] - pub fn eaten_from(&self, start: usize) -> &'s str { - // Using `unwrap_or_default` is much faster than unwrap, probably - // because then the whole call to `eaten_from` is pure and can be - // optimized away in some cases. - self.src.get(start .. self.index).unwrap_or_default() - } -} - -/// Whether this character denotes a newline. -#[inline] -pub fn is_newline(character: char) -> bool { - matches!( - character, - // Line Feed, Vertical Tab, Form Feed, Carriage Return. - '\n' | '\x0B' | '\x0C' | '\r' | - // Next Line, Line Separator, Paragraph Separator. - '\u{0085}' | '\u{2028}' | '\u{2029}' - ) -} - -/// Whether a string is a valid unicode identifier. -/// -/// In addition to what is specified in the [Unicode Standard][uax31], we allow: -/// - `_` as a starting character, -/// - `_` and `-` as continuing characters. -/// -/// [uax31]: http://www.unicode.org/reports/tr31/ -#[inline] -pub fn is_ident(string: &str) -> bool { - let mut chars = string.chars(); - chars - .next() - .map_or(false, |c| is_id_start(c) && chars.all(is_id_continue)) -} - -/// Whether a character can start an identifier. -#[inline] -pub fn is_id_start(c: char) -> bool { - c.is_xid_start() || c == '_' -} - -/// Whether a character can continue an identifier. -#[inline] -pub fn is_id_continue(c: char) -> bool { - c.is_xid_continue() || c == '_' || c == '-' -} |
