From 6bbedeaa2c6e0068e2fb6602cbf0002fb6a6ce03 Mon Sep 17 00:00:00 2001
From: Laurenz <laurmaedje@gmail.com>
Date: Wed, 16 Dec 2020 15:42:02 +0100
Subject: =?UTF-8?q?Better=20tokenization=20testing=20=F0=9F=8C=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Better tokenization test coverage.
- Suffix testing: Each test case is tested with many different suffixes to ensure correct token ends.
- Improves expression parsing (fixes #3).
---
 src/syntax/ident.rs | 18 +++++++++++-------
 src/syntax/token.rs | 14 +++++++-------
 2 files changed, 18 insertions(+), 14 deletions(-)

(limited to 'src/syntax')

diff --git a/src/syntax/ident.rs b/src/syntax/ident.rs
index f8c38cfb..55f97f95 100644
--- a/src/syntax/ident.rs
+++ b/src/syntax/ident.rs
@@ -46,13 +46,17 @@ impl Deref for Ident {
 /// Whether the string is a valid identifier.
 pub fn is_ident(string: &str) -> bool {
     let mut chars = string.chars();
-    if matches!(chars.next(), Some(c) if c.is_xid_start() || is_also_ok(c)) {
-        chars.all(|c| c.is_xid_continue() || is_also_ok(c))
-    } else {
-        false
-    }
+    chars
+        .next()
+        .map_or(false, |c| is_id_start(c) && chars.all(is_id_continue))
+}
+
+/// Whether the character can start an identifier.
+pub fn is_id_start(c: char) -> bool {
+    c.is_xid_start() || c == '_'
 }
 
-fn is_also_ok(c: char) -> bool {
-    c == '-' || c == '_'
+/// Whether the character can continue an identifier.
+pub fn is_id_continue(c: char) -> bool {
+    c.is_xid_continue() || c == '_' || c == '-'
 }
diff --git a/src/syntax/token.rs b/src/syntax/token.rs
index e630c50c..21a56004 100644
--- a/src/syntax/token.rs
+++ b/src/syntax/token.rs
@@ -24,16 +24,16 @@ pub enum Token<'s> {
     Star,
     /// An underscore: `_`.
     Underscore,
-    /// A backslash followed by whitespace: `\`.
-    Backslash,
     /// A hashtag indicating a section heading: `#`.
     Hashtag,
-    /// A non-breaking space: `~`.
-    NonBreakingSpace,
-    /// A raw block: `` `...` ``.
-    Raw(TokenRaw<'s>),
+    /// A tilde: `~`.
+    Tilde,
+    /// A backslash followed by whitespace: `\`.
+    Backslash,
     /// A unicode escape sequence: `\u{1F5FA}`.
     UnicodeEscape(TokenUnicodeEscape<'s>),
+    /// A raw block: `` `...` ``.
+    Raw(TokenRaw<'s>),
 
     /// A left bracket: `[`.
     LeftBracket,
@@ -134,7 +134,7 @@ impl<'s> Token<'s> {
             Self::Underscore => "underscore",
             Self::Backslash => "backslash",
             Self::Hashtag => "hashtag",
-            Self::NonBreakingSpace => "non-breaking space",
+            Self::Tilde => "tidle",
             Self::Raw { .. } => "raw block",
             Self::UnicodeEscape { .. } => "unicode escape sequence",
 
-- 
cgit v1.2.3