From 6bbedeaa2c6e0068e2fb6602cbf0002fb6a6ce03 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 16 Dec 2020 15:42:02 +0100 Subject: =?UTF-8?q?Better=20tokenization=20testing=20=F0=9F=8C=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Better tokenization test coverage. - Suffix testing: Each test case is tested with many different suffixes to ensure correct token ends. - Improves expression parsing (fixes #3). --- src/syntax/ident.rs | 18 +++++++++++------- src/syntax/token.rs | 14 +++++++------- 2 files changed, 18 insertions(+), 14 deletions(-) (limited to 'src/syntax') diff --git a/src/syntax/ident.rs b/src/syntax/ident.rs index f8c38cfb..55f97f95 100644 --- a/src/syntax/ident.rs +++ b/src/syntax/ident.rs @@ -46,13 +46,17 @@ impl Deref for Ident { /// Whether the string is a valid identifier. pub fn is_ident(string: &str) -> bool { let mut chars = string.chars(); - if matches!(chars.next(), Some(c) if c.is_xid_start() || is_also_ok(c)) { - chars.all(|c| c.is_xid_continue() || is_also_ok(c)) - } else { - false - } + chars + .next() + .map_or(false, |c| is_id_start(c) && chars.all(is_id_continue)) +} + +/// Whether the character can start an identifier. +pub fn is_id_start(c: char) -> bool { + c.is_xid_start() || c == '_' } -fn is_also_ok(c: char) -> bool { - c == '-' || c == '_' +/// Whether the character can continue an identifier. +pub fn is_id_continue(c: char) -> bool { + c.is_xid_continue() || c == '_' || c == '-' } diff --git a/src/syntax/token.rs b/src/syntax/token.rs index e630c50c..21a56004 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -24,16 +24,16 @@ pub enum Token<'s> { Star, /// An underscore: `_`. Underscore, - /// A backslash followed by whitespace: `\`. - Backslash, /// A hashtag indicating a section heading: `#`. Hashtag, - /// A non-breaking space: `~`. - NonBreakingSpace, - /// A raw block: `` `...` ``. - Raw(TokenRaw<'s>), + /// A tilde: `~`. + Tilde, + /// A backslash followed by whitespace: `\`. + Backslash, /// A unicode escape sequence: `\u{1F5FA}`. UnicodeEscape(TokenUnicodeEscape<'s>), + /// A raw block: `` `...` ``. + Raw(TokenRaw<'s>), /// A left bracket: `[`. LeftBracket, @@ -134,7 +134,7 @@ impl<'s> Token<'s> { Self::Underscore => "underscore", Self::Backslash => "backslash", Self::Hashtag => "hashtag", - Self::NonBreakingSpace => "non-breaking space", + Self::Tilde => "tidle", Self::Raw { .. } => "raw block", Self::UnicodeEscape { .. } => "unicode escape sequence", -- cgit v1.2.3