summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2019-04-30 09:15:31 +0200
committerLaurenz <laurmaedje@gmail.com>2019-04-30 09:20:22 +0200
commit9d605c3128bcb20c51988ecea72c956a79f4306b (patch)
treeb764aaa51e6f5286ece5796dea568eb33c522339
parent90848df5de3cf2e7d99ee8c2a7a07b768f207f7a (diff)
Remove the concept of words from tokenization 🎈
-rw-r--r--Cargo.toml1
-rw-r--r--src/engine/mod.rs2
-rw-r--r--src/lib.rs1
-rw-r--r--src/parsing.rs294
-rw-r--r--src/syntax.rs8
-rw-r--r--src/utility.rs135
6 files changed, 152 insertions, 289 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 068ea471..dab6a6a4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,7 +7,6 @@ edition = "2018"
[dependencies]
pdf = { path = "../pdf" }
opentype = { path = "../opentype" }
-unicode-segmentation = "1.2"
unicode-xid = "0.1.0"
byteorder = "1"
smallvec = "0.6.9"
diff --git a/src/engine/mod.rs b/src/engine/mod.rs
index 622dfc5e..65fcd9a1 100644
--- a/src/engine/mod.rs
+++ b/src/engine/mod.rs
@@ -62,7 +62,7 @@ impl<'a> Engine<'a> {
// Iterate through the documents nodes.
for node in &self.tree.nodes {
match node {
- Node::Word(word) => self.write_word(word)?,
+ Node::Text(text) => self.write_word(text)?,
Node::Space => self.write_space()?,
Node::Newline => {
self.write_buffered_text();
diff --git a/src/lib.rs b/src/lib.rs
index 5a11345a..0a17fca2 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -53,7 +53,6 @@ use crate::syntax::SyntaxTree;
#[macro_use]
mod error;
-mod utility;
pub mod doc;
pub mod engine;
pub mod export;
diff --git a/src/parsing.rs b/src/parsing.rs
index fed3a3ad..a161f096 100644
--- a/src/parsing.rs
+++ b/src/parsing.rs
@@ -1,30 +1,29 @@
//! Tokenization and parsing of source code into syntax trees.
use std::collections::HashMap;
-use std::fmt;
use std::iter::Peekable;
use std::mem::swap;
use std::ops::Deref;
+use std::str::CharIndices;
-use unicode_segmentation::{UnicodeSegmentation, UWordBounds};
+use unicode_xid::UnicodeXID;
use crate::syntax::*;
use crate::func::{ParseContext, Scope};
-use crate::utility::{Splinor, Spline, Splined, StrExt};
/// An iterator over the tokens of source code.
-#[derive(Clone)]
+#[derive(Debug, Clone)]
pub struct Tokens<'s> {
source: &'s str,
- words: Peekable<UWordBounds<'s>>,
- state: TokensState<'s>,
- stack: Vec<TokensState<'s>>,
+ chars: Peekable<CharIndices<'s>>,
+ state: TokensState,
+ stack: Vec<TokensState>,
}
/// The state the tokenizer is in.
-#[derive(Debug, Clone)]
-enum TokensState<'s> {
+#[derive(Debug, Clone, PartialEq)]
+enum TokensState {
/// The base state if there is nothing special we are in.
Body,
/// Inside a function header. Here colons and equal signs get parsed
@@ -32,9 +31,6 @@ enum TokensState<'s> {
Function,
/// We expect either the end of the function or the beginning of the body.
MaybeBody,
- /// We are inside one unicode word that consists of multiple tokens,
- /// because it contains double underscores.
- DoubleUnderscore(Spline<'s, Token<'s>>),
}
impl<'s> Tokens<'s> {
@@ -43,7 +39,7 @@ impl<'s> Tokens<'s> {
pub fn new(source: &'s str) -> Tokens<'s> {
Tokens {
source,
- words: source.split_word_bounds().peekable(),
+ chars: source.char_indices().peekable(),
state: TokensState::Body,
stack: vec![],
}
@@ -51,11 +47,11 @@ impl<'s> Tokens<'s> {
/// Advance the iterator by one step.
fn advance(&mut self) {
- self.words.next();
+ self.chars.next();
}
/// Switch to the given state.
- fn switch(&mut self, mut state: TokensState<'s>) {
+ fn switch(&mut self, mut state: TokensState) {
swap(&mut state, &mut self.state);
self.stack.push(state);
}
@@ -70,6 +66,11 @@ impl<'s> Tokens<'s> {
self.advance();
token
}
+
+ /// Returns a word containing the string bounded by the given indices.
+ fn text(&self, start: usize, end: usize) -> Token<'s> {
+ Token::Text(&self.source[start .. end])
+ }
}
impl<'s> Iterator for Tokens<'s> {
@@ -79,27 +80,11 @@ impl<'s> Iterator for Tokens<'s> {
fn next(&mut self) -> Option<Token<'s>> {
use TokensState as TS;
- // Return the remaining words and double underscores.
- if let TS::DoubleUnderscore(splinor) = &mut self.state {
- loop {
- if let Some(splined) = splinor.next() {
- return Some(match splined {
- Splined::Value(word) if word != "" => Token::Word(word),
- Splined::Splinor(s) => s,
- _ => continue,
- });
- } else {
- self.unswitch();
- break;
- }
- }
- }
-
- // Skip whitespace, but if at least one whitespace word existed,
- // remember that, because we return a space token.
+ // Skip whitespace, but if at least one whitespace character existed,
+ // remember that, because then we return a space token.
let mut whitespace = false;
- while let Some(word) = self.words.peek() {
- if !word.is_whitespace() {
+ while let Some(&(_, c)) = self.chars.peek() {
+ if !c.is_whitespace() || c == '\n' || c == '\r' {
break;
}
whitespace = true;
@@ -111,100 +96,82 @@ impl<'s> Iterator for Tokens<'s> {
// Function maybe has a body
if self.state == TS::MaybeBody {
- match *self.words.peek()? {
- "[" => {
- self.state = TS::Body;
- return Some(self.consumed(Token::LeftBracket));
- },
- _ => self.unswitch(),
+ if self.chars.peek()?.1 == '[' {
+ self.state = TS::Body;
+ return Some(self.consumed(Token::LeftBracket));
+ } else {
+ self.unswitch();
}
}
// Now all special cases are handled and we can finally look at the
// next words.
- let next = self.words.next()?;
- let afterwards = self.words.peek();
+ let (next_pos, next) = self.chars.next()?;
+ let afterwards = self.chars.peek().map(|&(_, c)| c);
Some(match next {
// Special characters
- "[" => {
+ '[' => {
self.switch(TS::Function);
Token::LeftBracket
},
- "]" => {
+ ']' => {
if self.state == TS::Function {
self.state = TS::MaybeBody;
}
Token::RightBracket
},
- "$" => Token::Dollar,
- "#" => Token::Hashtag,
+ '$' => Token::Dollar,
+ '#' => Token::Hashtag,
// Context sensitive operators
- ":" if self.state == TS::Function => Token::Colon,
- "=" if self.state == TS::Function => Token::Equals,
+ ':' if self.state == TS::Function => Token::Colon,
+ '=' if self.state == TS::Function => Token::Equals,
// Double star/underscore
- "*" if afterwards == Some(&"*") => self.consumed(Token::DoubleStar),
- "__" => Token::DoubleUnderscore,
+ '*' if afterwards == Some('*') => self.consumed(Token::DoubleStar),
+ '_' if afterwards == Some('_') => self.consumed(Token::DoubleUnderscore),
// Newlines
- "\n" | "\r\n" => Token::Newline,
+ '\n' => Token::Newline,
+ '\r' if afterwards == Some('\n') => self.consumed(Token::Newline),
// Escaping
- r"\" => {
- if let Some(next) = afterwards {
- let escapable = match *next {
- "[" | "]" | "$" | "#" | r"\" | ":" | "=" | "*" | "_" => true,
- w if w.starts_with("__") => true,
- _ => false,
- };
-
- if escapable {
- let next = *next;
+ '\\' => {
+ if let Some(&(index, c)) = self.chars.peek() {
+ if is_special_character(c) {
self.advance();
- return Some(Token::Word(next));
+ return Some(self.text(index, index + c.len_utf8()));
}
}
- Token::Word(r"\")
- },
-
- // Double underscores hidden in words.
- word if word.contains("__") => {
- let spline = word.spline("__", Token::DoubleUnderscore);
- self.switch(TS::DoubleUnderscore(spline));
- return self.next();
+ Token::Text("\\")
},
// Now it seems like it's just a normal word.
- word => Token::Word(word),
- })
- }
-}
+ _ => {
+ // Find out when the word ends.
+ let mut end = (next_pos, next);
+ while let Some(&(index, c)) = self.chars.peek() {
+ if is_special_character(c) || c.is_whitespace() {
+ break;
+ }
+ end = (index, c);
+ self.advance();
+ }
-impl fmt::Debug for Tokens<'_> {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- f.debug_struct("Tokens")
- .field("source", &self.source)
- .field("words", &"Peekable<UWordBounds>")
- .field("state", &self.state)
- .field("stack", &self.stack)
- .finish()
+ let end_pos = end.0 + end.1.len_utf8();
+ self.text(next_pos, end_pos)
+ },
+ })
}
}
-impl PartialEq for TokensState<'_> {
- fn eq(&self, other: &TokensState) -> bool {
- use TokensState as TS;
-
- match (self, other) {
- (TS::Body, TS::Body) => true,
- (TS::Function, TS::Function) => true,
- (TS::MaybeBody, TS::MaybeBody) => true,
- // They are not necessarily different, but we don't care
- _ => false,
- }
+/// Whether this character has a special meaning in the language.
+fn is_special_character(character: char) -> bool {
+ match character {
+ '[' | ']' | '$' | '#' | '\\' | ':' | '=' | '*' | '_' => true,
+ _ => false,
}
}
@@ -285,8 +252,8 @@ impl<'s, 't> Parser<'s, 't> {
Token::Space => self.append_space_consumed(),
Token::Newline => self.switch_consumed(PS::FirstNewline),
- // Words
- Token::Word(word) => self.append_consumed(Node::Word(word.to_owned())),
+ // Text
+ Token::Text(word) => self.append_consumed(Node::Text(word.to_owned())),
// Functions
Token::LeftBracket => self.parse_function()?,
@@ -315,7 +282,7 @@ impl<'s, 't> Parser<'s, 't> {
// The next token should be the name of the function.
let name = match self.tokens.next() {
- Some(Token::Word(word)) => {
+ Some(Token::Text(word)) => {
if word.is_identifier() {
Ok(word.to_owned())
} else {
@@ -537,6 +504,39 @@ impl<'s> Iterator for ParseTokens<'s> {
}
}
+/// More useful functions on `str`'s.
+trait StrExt {
+ /// Whether self consists only of whitespace.
+ fn is_whitespace(&self) -> bool;
+
+ /// Whether this word is a valid unicode identifier.
+ fn is_identifier(&self) -> bool;
+}
+
+impl StrExt for str {
+ fn is_whitespace(&self) -> bool {
+ self.chars().all(|c| c.is_whitespace() && c != '\n')
+ }
+
+ fn is_identifier(&self) -> bool {
+ let mut chars = self.chars();
+
+ match chars.next() {
+ Some(c) if !UnicodeXID::is_xid_start(c) => return false,
+ None => return false,
+ _ => (),
+ }
+
+ while let Some(c) = chars.next() {
+ if !UnicodeXID::is_xid_continue(c) {
+ return false;
+ }
+ }
+
+ true
+ }
+}
+
/// The error type for parsing.
pub struct ParseError(String);
@@ -560,7 +560,7 @@ mod token_tests {
use super::*;
use Token::{Space as S, Newline as N, LeftBracket as L, RightBracket as R,
Colon as C, Equals as E, DoubleUnderscore as DU, DoubleStar as DS,
- Dollar as D, Hashtag as H, Word as W};
+ Dollar as D, Hashtag as H, Text as T};
/// Test if the source code tokenizes to the tokens.
fn test(src: &str, tokens: Vec<Token>) {
@@ -571,7 +571,7 @@ mod token_tests {
#[test]
fn tokenize_base() {
test("", vec![]);
- test("Hallo", vec![W("Hallo")]);
+ test("Hallo", vec![T("Hallo")]);
test("[", vec![L]);
test("]", vec![R]);
test("$", vec![D]);
@@ -586,26 +586,26 @@ mod token_tests {
fn tokenize_whitespace_newlines() {
test(" \t", vec![S]);
test("First line\r\nSecond line\nThird line\n",
- vec![W("First"), S, W("line"), N, W("Second"), S, W("line"), N,
- W("Third"), S, W("line"), N]);
- test("Hello \n ", vec![W("Hello"), S, N, S]);
- test("Dense\nTimes", vec![W("Dense"), N, W("Times")]);
+ vec![T("First"), S, T("line"), N, T("Second"), S, T("line"), N,
+ T("Third"), S, T("line"), N]);
+ test("Hello \n ", vec![T("Hello"), S, N, S]);
+ test("Dense\nTimes", vec![T("Dense"), N, T("Times")]);
}
/// Tests if escaping with backslash works as it should.
#[test]
fn tokenize_escape() {
- test(r"\[", vec![W("[")]);
- test(r"\]", vec![W("]")]);
- test(r"\#", vec![W("#")]);
- test(r"\$", vec![W("$")]);
- test(r"\:", vec![W(":")]);
- test(r"\=", vec![W("=")]);
- test(r"\**", vec![W("*"), W("*")]);
- test(r"\*", vec![W("*")]);
- test(r"\__", vec![W("__")]);
- test(r"\_", vec![W("_")]);
- test(r"\hello", vec![W(r"\"), W("hello")]);
+ test(r"\[", vec![T("[")]);
+ test(r"\]", vec![T("]")]);
+ test(r"\#", vec![T("#")]);
+ test(r"\$", vec![T("$")]);
+ test(r"\:", vec![T(":")]);
+ test(r"\=", vec![T("=")]);
+ test(r"\**", vec![T("*"), T("*")]);
+ test(r"\*", vec![T("*")]);
+ test(r"\__", vec![T("_"), T("_")]);
+ test(r"\_", vec![T("_")]);
+ test(r"\hello", vec![T("\\"), T("hello")]);
}
/// Tokenizes some more realistic examples.
@@ -616,8 +616,8 @@ mod token_tests {
Test [italic][example]!
]
", vec![
- N, S, L, W("function"), R, L, N, S, W("Test"), S, L, W("italic"), R, L,
- W("example"), R, W("!"), N, S, R, N, S
+ N, S, L, T("function"), R, L, N, S, T("Test"), S, L, T("italic"), R, L,
+ T("example"), R, T("!"), N, S, R, N, S
]);
test(r"
@@ -626,10 +626,10 @@ mod token_tests {
Das ist ein Beispielsatz mit **fetter** Schrift.
", vec![
- N, S, L, W("page"), C, S, W("size"), E, W("A4"), R, N, S,
- L, W("font"), C, S, W("size"), E, W("12pt"), R, N, N, S,
- W("Das"), S, W("ist"), S, W("ein"), S, W("Beispielsatz"), S, W("mit"), S,
- DS, W("fetter"), DS, S, W("Schrift"), W("."), N, S
+ N, S, L, T("page"), C, S, T("size"), E, T("A4"), R, N, S,
+ L, T("font"), C, S, T("size"), E, T("12pt"), R, N, N, S,
+ T("Das"), S, T("ist"), S, T("ein"), S, T("Beispielsatz"), S, T("mit"), S,
+ DS, T("fetter"), DS, S, T("Schrift."), N, S
]);
}
@@ -638,13 +638,13 @@ mod token_tests {
#[test]
fn tokenize_symbols_context() {
test("[func: key=value][Answer: 7]",
- vec![L, W("func"), C, S, W("key"), E, W("value"), R, L,
- W("Answer"), W(":"), S, W("7"), R]);
+ vec![L, T("func"), C, S, T("key"), E, T("value"), R, L,
+ T("Answer"), T(":"), S, T("7"), R]);
test("[[n: k=v]:x][:[=]]:=",
- vec![L, L, W("n"), C, S, W("k"), E, W("v"), R, C, W("x"), R,
- L, W(":"), L, E, R, R, W(":"), W("=")]);
+ vec![L, L, T("n"), C, S, T("k"), E, T("v"), R, C, T("x"), R,
+ L, T(":"), L, E, R, R, T(":"), T("=")]);
test("[func: __key__=value]",
- vec![L, W("func"), C, S, DU, W("key"), DU, E, W("value"), R]);
+ vec![L, T("func"), C, S, DU, T("key"), DU, E, T("value"), R]);
}
/// This test has a special look at the double underscore syntax, because
@@ -653,16 +653,16 @@ mod token_tests {
#[test]
fn tokenize_double_underscore() {
test("he__llo__world_ _ __ Now this_ is__ special!",
- vec![W("he"), DU, W("llo"), DU, W("world_"), S, W("_"), S, DU, S, W("Now"), S,
- W("this_"), S, W("is"), DU, S, W("special"), W("!")]);
+ vec![T("he"), DU, T("llo"), DU, T("world"), T("_"), S, T("_"), S, DU, S, T("Now"), S,
+ T("this"), T("_"), S, T("is"), DU, S, T("special!")]);
}
/// This test is for checking if non-ASCII characters get parsed correctly.
#[test]
fn tokenize_unicode() {
test("[document][Hello 🌍!]",
- vec![L, W("document"), R, L, W("Hello"), S, W("🌍"), W("!"), R]);
- test("[f]⺐.", vec![L, W("f"), R, W("⺐"), W(".")]);
+ vec![L, T("document"), R, L, T("Hello"), S, T("🌍!"), R]);
+ test("[f]⺐.", vec![L, T("f"), R, T("⺐.")]);
}
}
@@ -674,7 +674,7 @@ mod parse_tests {
use Node::{Space as S, Newline as N, Func as F};
#[allow(non_snake_case)]
- fn W(s: &str) -> Node { Node::Word(s.to_owned()) }
+ fn T(s: &str) -> Node { Node::Text(s.to_owned()) }
/// A testing function which just parses it's body into a syntax tree.
#[derive(Debug, PartialEq)]
@@ -764,19 +764,19 @@ mod parse_tests {
#[test]
fn parse_base() {
test("", tree! []);
- test("Hello World!", tree! [ W("Hello"), S, W("World"), W("!") ]);
+ test("Hello World!", tree! [ T("Hello"), S, T("World!") ]);
}
/// Test whether newlines generate the correct whitespace.
#[test]
fn parse_newlines_whitespace() {
- test("Hello\nWorld", tree! [ W("Hello"), S, W("World") ]);
- test("Hello \n World", tree! [ W("Hello"), S, W("World") ]);
- test("Hello\n\nWorld", tree! [ W("Hello"), N, W("World") ]);
- test("Hello \n\nWorld", tree! [ W("Hello"), S, N, W("World") ]);
- test("Hello\n\n World", tree! [ W("Hello"), N, S, W("World") ]);
- test("Hello \n \n \n World", tree! [ W("Hello"), S, N, S, W("World") ]);
- test("Hello\n \n\n World", tree! [ W("Hello"), S, N, S, W("World") ]);
+ test("Hello\nWorld", tree! [ T("Hello"), S, T("World") ]);
+ test("Hello \n World", tree! [ T("Hello"), S, T("World") ]);
+ test("Hello\n\nWorld", tree! [ T("Hello"), N, T("World") ]);
+ test("Hello \n\nWorld", tree! [ T("Hello"), S, N, T("World") ]);
+ test("Hello\n\n World", tree! [ T("Hello"), N, S, T("World") ]);
+ test("Hello \n \n \n World", tree! [ T("Hello"), S, N, S, T("World") ]);
+ test("Hello\n \n\n World", tree! [ T("Hello"), S, N, S, T("World") ]);
}
/// Parse things dealing with functions.
@@ -790,18 +790,18 @@ mod parse_tests {
test_scoped(&scope,"[test]", tree! [ F(func! { name => "test", body => None }) ]);
test_scoped(&scope, "This is an [modifier][example] of a function invocation.", tree! [
- W("This"), S, W("is"), S, W("an"), S,
- F(func! { name => "modifier", body => tree! [ W("example") ] }), S,
- W("of"), S, W("a"), S, W("function"), S, W("invocation"), W(".")
+ T("This"), S, T("is"), S, T("an"), S,
+ F(func! { name => "modifier", body => tree! [ T("example") ] }), S,
+ T("of"), S, T("a"), S, T("function"), S, T("invocation.")
]);
test_scoped(&scope, "[func][Hello][modifier][Here][end]", tree! [
F(func! {
name => "func",
- body => tree! [ W("Hello") ],
+ body => tree! [ T("Hello") ],
}),
F(func! {
name => "modifier",
- body => tree! [ W("Here") ],
+ body => tree! [ T("Here") ],
}),
F(func! {
name => "end",
@@ -820,11 +820,11 @@ mod parse_tests {
body => tree! [
F(func! {
name => "func",
- body => tree! [ W("call") ],
+ body => tree! [ T("call") ],
}),
],
}),
- S, W("outside")
+ S, T("outside")
]);
}
@@ -839,12 +839,12 @@ mod parse_tests {
name => "func",
body => None,
}),
- S, W("⺐"), W(".")
+ S, T("⺐.")
]);
test_scoped(&scope, "[bold][Hello 🌍!]", tree! [
F(func! {
name => "bold",
- body => tree! [ W("Hello"), S, W("🌍"), W("!") ],
+ body => tree! [ T("Hello"), S, T("🌍!") ],
})
]);
}
diff --git a/src/syntax.rs b/src/syntax.rs
index bad7ab26..a8ae930d 100644
--- a/src/syntax.rs
+++ b/src/syntax.rs
@@ -30,8 +30,8 @@ pub enum Token<'s> {
Dollar,
/// A hashtag starting a _comment_.
Hashtag,
- /// Everything else just is a literal word.
- Word(&'s str),
+ /// Everything else is just text.
+ Text(&'s str),
}
/// A tree representation of the source.
@@ -62,8 +62,8 @@ pub enum Node {
ToggleBold,
/// Indicates that math mode was enabled/disabled.
ToggleMath,
- /// A literal word.
- Word(String),
+ /// Literal text.
+ Text(String),
/// A function invocation.
Func(FuncCall),
}
diff --git a/src/utility.rs b/src/utility.rs
deleted file mode 100644
index 675016ee..00000000
--- a/src/utility.rs
+++ /dev/null
@@ -1,135 +0,0 @@
-//! Utility functionality.
-
-use std::iter::Peekable;
-use std::str::Split;
-use unicode_xid::UnicodeXID;
-
-
-/// Types that can be splined.
-pub trait Splinor {
- /// Returns an iterator over the substrings splitted by the pattern,
- /// intertwined with the splinor.
- ///
- /// # Example
- ///
- /// ```ignore
- /// #[derive(Debug, Copy, Clone, PartialEq)]
- /// struct Space;
- ///
- /// let v: Vec<Splined<Space>> = "My airplane flies!".spline(" ", Space).collect();
- /// assert_eq!(v, [
- /// Splined::Value("My"),
- /// Splined::Splinor(Space),
- /// Splined::Value("airplane"),
- /// Splined::Splinor(Space),
- /// Splined::Value("flies!"),
- /// ]);
- /// ```
- fn spline<'s, T: Clone>(&'s self, pat: &'s str, splinor: T) -> Spline<'s, T>;
-}
-
-impl Splinor for str {
- fn spline<'s, T: Clone>(&'s self, pat: &'s str, splinor: T) -> Spline<'s, T> {
- Spline {
- splinor: Splined::Splinor(splinor),
- split: self.split(pat).peekable(),
- next_splinor: false,
- }
- }
-}
-
-/// Iterator over splitted values and splinors.
-///
-/// Created by the [`spline`](Splinor::spline) function.
-#[derive(Debug, Clone)]
-pub struct Spline<'s, T> {
- splinor: Splined<'s, T>,
- split: Peekable<Split<'s, &'s str>>,
- next_splinor: bool,
-}
-
-/// Represents either a splitted substring or a splinor.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum Splined<'s, T> {
- /// A substring.
- Value(&'s str),
- /// An intertwined splinor.
- Splinor(T),
-}
-
-impl<'s, T: Clone> Iterator for Spline<'s, T> {
- type Item = Splined<'s, T>;
-
- fn next(&mut self) -> Option<Splined<'s, T>> {
- if self.next_splinor && self.split.peek().is_some() {
- self.next_splinor = false;
- return Some(self.splinor.clone());
- } else {
- self.next_splinor = true;
- return Some(Splined::Value(self.split.next()?))
- }
- }
-}
-
-/// More useful functions on `str`'s.
-pub trait StrExt {
- /// Whether self consists only of whitespace.
- fn is_whitespace(&self) -> bool;
-
- /// Whether this word is a valid unicode identifier.
- fn is_identifier(&self) -> bool;
-}
-
-impl StrExt for str {
- fn is_whitespace(&self) -> bool {
- self.chars().all(|c| c.is_whitespace() && c != '\n')
- }
-
- fn is_identifier(&self) -> bool {
- let mut chars = self.chars();
-
- match chars.next() {
- Some(c) if !UnicodeXID::is_xid_start(c) => return false,
- None => return false,
- _ => (),
- }
-
- while let Some(c) = chars.next() {
- if !UnicodeXID::is_xid_continue(c) {
- return false;
- }
- }
-
- true
- }
-}
-
-
-#[cfg(test)]
-mod splinor_tests {
- use super::*;
- use Splined::{Value as V, Splinor as S};
-
- #[derive(Debug, Copy, Clone, PartialEq)]
- enum Token { DoubleUnderscore }
-
- fn test<T>(string: &str, pat: &str, splinor: T, vec: Vec<Splined<T>>)
- where T: std::fmt::Debug + Clone + PartialEq {
- assert_eq!(string.spline(pat, splinor).collect::<Vec<_>>(), vec);
- }
-
- #[test]
- fn splinor() {
- let s = S(Token::DoubleUnderscore);
- test("__he__llo__world__", "__", Token::DoubleUnderscore,
- vec![V(""), s, V("he"), s, V("llo"), s, V("world"), s, V("")]);
- test("__Italic__", "__", Token::DoubleUnderscore,
- vec![V(""), s, V("Italic"), s, V("")]);
- test("Key__Value", "__", Token::DoubleUnderscore,
- vec![V("Key"), s, V("Value")]);
- test("__Start__NoEnd", "__", Token::DoubleUnderscore,
- vec![V(""), s, V("Start"), s, V("NoEnd")]);
- test("NoStart__End__", "__", Token::DoubleUnderscore,
- vec![V("NoStart"), s, V("End"), s, V("")]);
- }
-}