summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMartin Haug <mhaug@live.de>2020-08-30 13:39:21 +0200
committerMartin Haug <mhaug@live.de>2020-08-30 13:39:21 +0200
commitc043692c96360e919eebc07fb8fcf46178a4d664 (patch)
tree2244deca6da43d9b490ac5dc345b5360a23b5095 /src
parent7041e0938dd17e9a1777857459df4e8ad29b6c4a (diff)
Also provide escapes for strings 🗒
Diffstat (limited to 'src')
-rw-r--r--src/syntax/parsing.rs45
-rw-r--r--src/syntax/tokens.rs2
2 files changed, 46 insertions, 1 deletions
diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs
index 95c88c6e..06ea8167 100644
--- a/src/syntax/parsing.rs
+++ b/src/syntax/parsing.rs
@@ -616,6 +616,48 @@ fn unescape_string(string: &str) -> String {
match iter.next() {
Some('\\') => out.push('\\'),
Some('"') => out.push('"'),
+ Some('u') => {
+ // Index which points to start of escape sequence
+ let mut seen = "\\u".to_string();
+
+ let next = iter.next();
+ if next == Some('{') {
+ seen.push('{');
+
+ let mut valid = true;
+ let mut closed = false;
+ while let Some(c) = iter.next() {
+ seen.push(c);
+ if c == '}' {
+ closed = true;
+ break;
+ }
+
+ if !c.is_ascii_hexdigit() {
+ valid = false;
+ break;
+ }
+ }
+ if valid != false && seen.len() >= 3 {
+ if let Some(c) = std::char::from_u32(
+ u32::from_str_radix(&seen[3..seen.len() - if closed { 1 } else { 0 }], 16)
+ .expect("Unicode escape string not convertible to int")
+ ) {
+ out.push(c);
+ } else {
+ // Somehow provide feedback here that conversion failed?
+ out.push_str(&seen);
+ }
+ } else {
+ out.push_str(&seen);
+ }
+ } else {
+ out.push_str("\\u");
+ if let Some(c) = next {
+ out.push(c);
+ }
+ }
+ }
Some('n') => out.push('\n'),
Some('t') => out.push('\t'),
Some(c) => { out.push('\\'); out.push(c); }
@@ -904,6 +946,9 @@ mod tests {
test(r#"hello world"#, "hello world");
test(r#"hello\nworld"#, "hello\nworld");
test(r#"a\"bc"#, "a\"bc");
+ test(r#"a\u{2603}bc"#, "a☃bc");
+ test(r#"a\u{26c3bg"#, "a\\u{26c3bg");
+ test(r#"av\u{6797"#, "avæž—");
test(r#"a\\"#, "a\\");
test(r#"a\\\nbc"#, "a\\\nbc");
test(r#"a\tbc"#, "a\tbc");
diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs
index f00c1b66..92576d89 100644
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@@ -430,7 +430,7 @@ impl<'s> Tokens<'s> {
}
match self.peek() {
- Some(c) if c == 'u' => {
+ Some('u') => {
// Index which points to start of escape sequence
let index = self.index() - 1;
self.eat();