diff options
| author | Martin Haug <mhaug@live.de> | 2021-10-31 18:52:48 +0100 |
|---|---|---|
| committer | Martin Haug <mhaug@live.de> | 2021-11-05 13:44:49 +0100 |
| commit | 2e7d359e59a45849f53eea6e022ca83295f5a6e7 (patch) | |
| tree | 6fc2b3e3f1c12f1326061cd09a374fc6dca3026a /src/parse/tokens.rs | |
| parent | c569e14c07902b23b7b3e29df4076cea1f4496cf (diff) | |
Unicode escape error moved to tokenizer
Diffstat (limited to 'src/parse/tokens.rs')
| -rw-r--r-- | src/parse/tokens.rs | 92 |
1 files changed, 42 insertions, 50 deletions
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 7c500ce7..1d2e32ec 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -200,7 +200,7 @@ impl<'s> Tokens<'s> { TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace()) }); - NodeKind::Text(resolve_string(self.s.eaten_from(start))) + NodeKind::Text(self.s.eaten_from(start).into()) } fn whitespace(&mut self) -> NodeKind { @@ -243,10 +243,16 @@ impl<'s> Tokens<'s> { let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into(); if self.s.eat_if('}') { - NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken { - character: resolve_hex(&sequence), - sequence, - })) + if let Some(character) = resolve_hex(&sequence) { + NodeKind::UnicodeEscape(UnicodeEscapeToken { + character, + }) + } else { + NodeKind::Error( + ErrorPosition::Full, + "invalid unicode escape sequence".into(), + ) + } } else { NodeKind::Error( ErrorPosition::End, @@ -560,35 +566,21 @@ mod tests { use Option::None; use TokenMode::{Code, Markup}; - fn UnicodeEscape(sequence: &str, terminated: bool) -> NodeKind { - if terminated { - NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken { - character: resolve_hex(sequence), - sequence: sequence.into(), - })) - } else { - NodeKind::Error(ErrorPosition::End, "expected closing brace".into()) - } + fn UnicodeEscape(character: char) -> NodeKind { + NodeKind::UnicodeEscape(UnicodeEscapeToken { character }) } - fn Raw( - text: &str, - lang: Option<&str>, - backticks_left: u8, - err_msg: Option<&str>, - block: bool, - ) -> NodeKind { - match err_msg { - None => NodeKind::Raw(Rc::new(RawToken { - text: text.into(), - lang: lang.map(Into::into), - backticks: backticks_left, - block, - })), - Some(msg) => { - NodeKind::Error(ErrorPosition::End, format!("expected {}", msg).into()) - } - } + fn Error(pos: ErrorPosition, message: &str) -> NodeKind { + NodeKind::Error(pos, message.into()) + } + + fn Raw(text: &str, lang: Option<&str>, backticks_left: u8, block: bool) -> NodeKind { + NodeKind::Raw(Rc::new(RawToken { + text: text.into(), + lang: lang.map(Into::into), + backticks: backticks_left, + block, + })) } fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind { @@ -795,16 +787,16 @@ mod tests { t!(Markup[" /"]: r#"\""# => Text(r"\"), Text("\"")); // Test basic unicode escapes. - t!(Markup: r"\u{}" => UnicodeEscape("", true)); - t!(Markup: r"\u{2603}" => UnicodeEscape("2603", true)); - t!(Markup: r"\u{P}" => UnicodeEscape("P", true)); + t!(Markup: r"\u{}" => Error(ErrorPosition::Full, "invalid unicode escape sequence")); + t!(Markup: r"\u{2603}" => UnicodeEscape('☃')); + t!(Markup: r"\u{P}" => Error(ErrorPosition::Full, "invalid unicode escape sequence")); // Test unclosed unicode escapes. - t!(Markup[" /"]: r"\u{" => UnicodeEscape("", false)); - t!(Markup[" /"]: r"\u{1" => UnicodeEscape("1", false)); - t!(Markup[" /"]: r"\u{26A4" => UnicodeEscape("26A4", false)); - t!(Markup[" /"]: r"\u{1Q3P" => UnicodeEscape("1Q3P", false)); - t!(Markup: r"\u{1🏕}" => UnicodeEscape("1", false), Text("🏕"), RightBrace); + t!(Markup[" /"]: r"\u{" => Error(ErrorPosition::End, "expected closing brace")); + t!(Markup[" /"]: r"\u{1" => Error(ErrorPosition::End, "expected closing brace")); + t!(Markup[" /"]: r"\u{26A4" => Error(ErrorPosition::End, "expected closing brace")); + t!(Markup[" /"]: r"\u{1Q3P" => Error(ErrorPosition::End, "expected closing brace")); + t!(Markup: r"\u{1🏕}" => Error(ErrorPosition::End, "expected closing brace"), Text("🏕"), RightBrace); } #[test] @@ -894,22 +886,22 @@ mod tests { #[test] fn test_tokenize_raw_blocks() { // Test basic raw block. - t!(Markup: "``" => Raw("", None, 1, None, false)); - t!(Markup: "`raw`" => Raw("raw", None, 1, None, false)); - t!(Markup[""]: "`]" => Raw("]", None, 1, Some("1 backtick"), false)); + t!(Markup: "``" => Raw("", None, 1, false)); + t!(Markup: "`raw`" => Raw("raw", None, 1, false)); + t!(Markup[""]: "`]" => Error(ErrorPosition::End, "expected 1 backtick")); // Test special symbols in raw block. - t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, None, false)); - t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, None, false), Raw(" ", None, 1, Some("1 backtick"), false)); + t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, false)); + t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, false), Error(ErrorPosition::End, "expected 1 backtick")); // Test separated closing backticks. - t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, None, false)); + t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, false)); // Test more backticks. - t!(Markup: "``nope``" => Raw("", None, 1, None, false), Text("nope"), Raw("", None, 1, None, false)); - t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, None, false)); - t!(Markup[""]: "`````👩🚀````noend" => Raw("````noend", Some("👩🚀"), 5, Some("5 backticks"), false)); - t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, None, false), Raw("", None, 1, None, false)); + t!(Markup: "``nope``" => Raw("", None, 1, false), Text("nope"), Raw("", None, 1, false)); + t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, false)); + t!(Markup[""]: "`````👩🚀````noend" => Error(ErrorPosition::End, "expected 5 backticks")); + t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, false), Raw("", None, 1, false)); } #[test] |
