summaryrefslogtreecommitdiff
path: root/src/parse/scanner.rs
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2021-11-08 13:08:15 +0100
committerGitHub <noreply@github.com>2021-11-08 13:08:15 +0100
commitc6f8ad35f45248f1fd36ee00195966f1629c6ca7 (patch)
tree51faa3f6bbc56f75636823adeea135ed76e1b33b /src/parse/scanner.rs
parentea6ee3f667e922ed2f21b08719a45d2395787932 (diff)
parent38c5c362419c5eee7a4fdc0b43d3a9dfb339a6d2 (diff)
Merge pull request #46 from typst/parser-ng
Next Generation Parser
Diffstat (limited to 'src/parse/scanner.rs')
-rw-r--r--src/parse/scanner.rs45
1 files changed, 45 insertions, 0 deletions
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
index 8e3e4278..ea06a2e0 100644
--- a/src/parse/scanner.rs
+++ b/src/parse/scanner.rs
@@ -1,5 +1,7 @@
use std::slice::SliceIndex;
+use unicode_xid::UnicodeXID;
+
/// A featureful char-based scanner.
#[derive(Copy, Clone)]
pub struct Scanner<'s> {
@@ -114,6 +116,12 @@ impl<'s> Scanner<'s> {
self.index = index;
}
+ /// The full source string.
+ #[inline]
+ pub fn src(&self) -> &'s str {
+ &self.src
+ }
+
/// Slice out part of the source string.
#[inline]
pub fn get<I>(&self, index: I) -> &'s str
@@ -150,6 +158,16 @@ impl<'s> Scanner<'s> {
// optimized away in some cases.
self.src.get(start .. self.index).unwrap_or_default()
}
+
+ /// The column index of a given index in the source string.
+ #[inline]
+ pub fn column(&self, index: usize) -> usize {
+ self.src[.. index]
+ .chars()
+ .rev()
+ .take_while(|&c| !is_newline(c))
+ .count()
+ }
}
/// Whether this character denotes a newline.
@@ -163,3 +181,30 @@ pub fn is_newline(character: char) -> bool {
'\u{0085}' | '\u{2028}' | '\u{2029}'
)
}
+
+/// Whether a string is a valid unicode identifier.
+///
+/// In addition to what is specified in the [Unicode Standard][uax31], we allow:
+/// - `_` as a starting character,
+/// - `_` and `-` as continuing characters.
+///
+/// [uax31]: http://www.unicode.org/reports/tr31/
+#[inline]
+pub fn is_ident(string: &str) -> bool {
+ let mut chars = string.chars();
+ chars
+ .next()
+ .map_or(false, |c| is_id_start(c) && chars.all(is_id_continue))
+}
+
+/// Whether a character can start an identifier.
+#[inline]
+pub fn is_id_start(c: char) -> bool {
+ c.is_xid_start() || c == '_'
+}
+
+/// Whether a character can continue an identifier.
+#[inline]
+pub fn is_id_continue(c: char) -> bool {
+ c.is_xid_continue() || c == '_' || c == '-'
+}