1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
|
use std::slice::SliceIndex;
use unicode_xid::UnicodeXID;
/// A featureful char-based scanner.
#[derive(Copy, Clone)]
pub struct Scanner<'s> {
/// The string to scan.
src: &'s str,
/// The index at which the peekable character starts. Must be in bounds and
/// at a codepoint boundary to guarantee safety.
index: usize,
/// Offsets the indentation on the first line of the source.
column_offset: usize,
}
impl<'s> Scanner<'s> {
/// Create a new char scanner.
#[inline]
pub fn new(src: &'s str) -> Self {
Self { src, index: 0, column_offset: 0 }
}
/// Create a new char scanner with an offset for the first line indent.
#[inline]
pub fn with_indent_offset(src: &'s str, column_offset: usize) -> Self {
Self { src, index: 0, column_offset }
}
/// Whether the end of the string is reached.
pub fn eof(&self) -> bool {
self.index == self.src.len()
}
/// Consume the next char.
#[inline]
pub fn eat(&mut self) -> Option<char> {
let next = self.peek();
if let Some(c) = next {
self.index += c.len_utf8();
}
next
}
/// Consume the next char if it is the given one.
///
/// Returns whether the char was consumed.
#[inline]
pub fn eat_if(&mut self, c: char) -> bool {
let matches = self.peek() == Some(c);
if matches {
self.index += c.len_utf8();
}
matches
}
/// Consume the next char, debug-asserting that it is the given one.
#[inline]
pub fn eat_assert(&mut self, c: char) {
let next = self.eat();
debug_assert_eq!(next, Some(c));
}
/// Eat chars while the condition is true.
#[inline]
pub fn eat_while<F>(&mut self, mut f: F) -> &'s str
where
F: FnMut(char) -> bool,
{
self.eat_until(|c| !f(c))
}
/// Eat chars until the condition is true.
#[inline]
pub fn eat_until<F>(&mut self, mut f: F) -> &'s str
where
F: FnMut(char) -> bool,
{
let start = self.index;
while let Some(c) = self.peek() {
if f(c) {
break;
}
self.index += c.len_utf8();
}
self.eaten_from(start)
}
/// Uneat the last eaten char.
#[inline]
pub fn uneat(&mut self) {
self.index = self.last_index();
}
/// Peek at the next char without consuming it.
#[inline]
pub fn peek(&self) -> Option<char> {
self.rest().chars().next()
}
/// Get the nth-previous eaten char.
#[inline]
pub fn prev(&self, n: usize) -> Option<char> {
self.eaten().chars().nth_back(n)
}
/// Checks whether the next char fulfills a condition.
///
/// Returns `default` if there is no next char.
#[inline]
pub fn check_or<F>(&self, default: bool, f: F) -> bool
where
F: FnOnce(char) -> bool,
{
self.peek().map_or(default, f)
}
/// The previous index in the source string.
#[inline]
pub fn last_index(&self) -> usize {
self.eaten().chars().last().map_or(0, |c| self.index - c.len_utf8())
}
/// The current index in the source string.
#[inline]
pub fn index(&self) -> usize {
self.index
}
/// Jump to an index in the source string.
#[inline]
pub fn jump(&mut self, index: usize) {
// Make sure that the index is in bounds and on a codepoint boundary.
self.src.get(index ..).expect("jumped to invalid index");
self.index = index;
}
/// The full source string.
#[inline]
pub fn src(&self) -> &'s str {
self.src
}
/// Slice out part of the source string.
#[inline]
pub fn get<I>(&self, index: I) -> &'s str
where
I: SliceIndex<str, Output = str>,
{
// See `eaten_from` for details about `unwrap_or_default`.
self.src.get(index).unwrap_or_default()
}
/// The remaining source string after the current index.
#[inline]
pub fn rest(&self) -> &'s str {
// Safety: The index is always in bounds and on a codepoint boundary
// since it starts at zero and is is:
// - either increased by the length of a scanned character, advacing
// from one codepoint boundary to the next,
// - or checked upon jumping.
unsafe { self.src.get_unchecked(self.index ..) }
}
/// The full source string up to the current index.
#[inline]
pub fn eaten(&self) -> &'s str {
// Safety: The index is always okay, for details see `rest()`.
unsafe { self.src.get_unchecked(.. self.index) }
}
/// The source string from `start` to the current index.
#[inline]
pub fn eaten_from(&self, start: usize) -> &'s str {
// Using `unwrap_or_default` is much faster than unwrap, probably
// because then the whole call to `eaten_from` is pure and can be
// optimized away in some cases.
self.src.get(start .. self.index).unwrap_or_default()
}
/// The column index of a given index in the source string.
#[inline]
pub fn column(&self, index: usize) -> usize {
let mut apply_offset = false;
let res = self.src[.. index]
.char_indices()
.rev()
.take_while(|&(_, c)| !is_newline(c))
.inspect(|&(i, _)| {
if i == 0 {
apply_offset = true
}
})
.count();
// The loop is never executed if the slice is empty, but we are of
// course still at the start of the first line.
if self.src[.. index].len() == 0 {
apply_offset = true;
}
if apply_offset { res + self.column_offset } else { res }
}
}
/// Whether this character denotes a newline.
#[inline]
pub fn is_newline(character: char) -> bool {
matches!(
character,
// Line Feed, Vertical Tab, Form Feed, Carriage Return.
'\n' | '\x0B' | '\x0C' | '\r' |
// Next Line, Line Separator, Paragraph Separator.
'\u{0085}' | '\u{2028}' | '\u{2029}'
)
}
/// Whether a string is a valid unicode identifier.
///
/// In addition to what is specified in the [Unicode Standard][uax31], we allow:
/// - `_` as a starting character,
/// - `_` and `-` as continuing characters.
///
/// [uax31]: http://www.unicode.org/reports/tr31/
#[inline]
pub fn is_ident(string: &str) -> bool {
let mut chars = string.chars();
chars
.next()
.map_or(false, |c| is_id_start(c) && chars.all(is_id_continue))
}
/// Whether a character can start an identifier.
#[inline]
pub fn is_id_start(c: char) -> bool {
c.is_xid_start() || c == '_'
}
/// Whether a character can continue an identifier.
#[inline]
pub fn is_id_continue(c: char) -> bool {
c.is_xid_continue() || c == '_' || c == '-'
}
|