summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMartin Haug <mhaug@live.de>2020-08-31 14:47:52 +0200
committerMartin Haug <mhaug@live.de>2020-08-31 14:47:52 +0200
commit1942a25793ce11c2854deed8d1dcd56ae851e1d6 (patch)
tree30d90f46dfcdf855335f6feb07305cd5b9c2baf7 /src
parent08433ab79fa8e775c6574b75e1e6222ecdca7ef1 (diff)
Move Nbsp logic to tokenizer 🚛
Diffstat (limited to 'src')
-rw-r--r--src/syntax/parsing.rs23
-rw-r--r--src/syntax/tokens.rs6
2 files changed, 7 insertions, 22 deletions
diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs
index d48b9ff6..6a8b8103 100644
--- a/src/syntax/parsing.rs
+++ b/src/syntax/parsing.rs
@@ -104,26 +104,7 @@ impl Parser<'_> {
self.with_span(SyntaxNode::Code(Code { lang, lines, block }))
}
- Token::Text(text) => {
- let mut text_s = String::with_capacity(text.len());
- let mut iter = text.chars();
- while let Some(c) = iter.next() {
- match c {
- '~' => {
- // The escape sequence will separate
- // the ~ into its own text node, therefore
- // check the length here.
- if text.len() == 1 {
- text_s.push('~');
- } else {
- text_s.push('\u{00A0}');
- }
- },
- _ => text_s.push(c),
- }
- }
- self.with_span(SyntaxNode::Text(text_s.to_string()))
- },
+ Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())),
Token::UnicodeEscape { sequence, terminated } => {
if !terminated {
@@ -1025,7 +1006,7 @@ mod tests {
t!("*hi" => B, T("hi"));
t!("hi_" => T("hi"), I);
t!("hi you" => T("hi"), S, T("you"));
- t!("special~name" => T("special\u{00A0}name"));
+ t!("special~name" => T("special"), T("\u{00A0}"), T("name"));
t!("special\\~name" => T("special"), T("~"), T("name"));
t!("\\u{1f303}" => T("🌃"));
t!("\n\n\nhello" => P, T("hello"));
diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs
index f41babbc..e333968b 100644
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@@ -265,6 +265,9 @@ impl<'s> Iterator for Tokens<'s> {
'_' if self.mode == Body => Underscore,
'`' if self.mode == Body => self.read_raw_or_code(),
+ // Non-breaking spaces.
+ '~' if self.mode == Body => Text("\u{00A0}"),
+
// An escaped thing.
'\\' if self.mode == Body => self.read_escaped(),
@@ -279,7 +282,7 @@ impl<'s> Iterator for Tokens<'s> {
let val = match n {
c if c.is_whitespace() => true,
'[' | ']' | '{' | '}' | '/' | '*' => true,
- '\\' | '_' | '`' if body => true,
+ '\\' | '_' | '`' | '~' if body => true,
':' | '=' | ',' | '"' | '(' | ')' if !body => true,
'+' | '-' if !body && !last_was_e => true,
_ => false,
@@ -646,6 +649,7 @@ mod tests {
t!(Body, " \n\t \n " => S(2));
t!(Body, "\n\r" => S(2));
t!(Body, " \r\r\n \x0D" => S(3));
+ t!(Body, "a~b" => T("a"), T("\u{00A0}"), T("b"));
}
#[test]