diff options
| author | Laurenz <laurmaedje@gmail.com> | 2020-09-30 18:59:33 +0200 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2020-09-30 18:59:33 +0200 |
| commit | 4077a7c11ea19b1b6b6b6fe3014b9018846cf21b (patch) | |
| tree | 70e4c891c2c660b4136890cebbae7c375fe36c05 /src/parse/escaping.rs | |
| parent | 7cc279f7ae122f4c40592004dde89792c636b3c8 (diff) | |
Refactor raw blocks 💱
Diffstat (limited to 'src/parse/escaping.rs')
| -rw-r--r-- | src/parse/escaping.rs | 198 |
1 files changed, 84 insertions, 114 deletions
diff --git a/src/parse/escaping.rs b/src/parse/escaping.rs index 55b1fe67..a2ff963b 100644 --- a/src/parse/escaping.rs +++ b/src/parse/escaping.rs @@ -1,4 +1,5 @@ use super::is_newline_char; +use crate::syntax::{Ident, Raw}; /// Resolves all escape sequences in a string. pub fn unescape_string(string: &str) -> String { @@ -56,101 +57,60 @@ pub fn unescape_string(string: &str) -> String { out } -/// Resolves all escape sequences in raw markup (between backticks) and splits it into -/// into lines. -pub fn unescape_raw(raw: &str) -> Vec<String> { +/// Resolves the language tag and trims the raw text. +/// +/// Returns: +/// - The language tag +/// - The raw lines +/// - Whether at least one newline was present in the untrimmed text. +pub fn process_raw(raw: &str) -> Raw { + let (lang, inner) = split_after_lang_tag(raw); + let (lines, had_newline) = trim_and_split_raw(inner); + Raw { lang, lines, inline: !had_newline } +} + +/// Parse the lang tag and return it alongside the remaining inner raw text. +fn split_after_lang_tag(raw: &str) -> (Option<Ident>, &str) { + let mut lang = String::new(); + + let mut inner = raw; let mut iter = raw.chars(); - let mut text = String::new(); while let Some(c) = iter.next() { - if c == '\\' { - if let Some(c) = iter.next() { - if c != '\\' && c != '`' { - text.push('\\'); - } - - text.push(c); - } else { - text.push('\\'); - } - } else { - text.push(c); + if c == '`' || c.is_whitespace() || is_newline_char(c) { + break; } + + inner = iter.as_str(); + lang.push(c); } - split_lines(&text) + (Ident::new(lang), inner) } -/// Resolves all escape sequences in code markup (between triple backticks) and splits it -/// into into lines. -pub fn unescape_code(raw: &str) -> Vec<String> { - let mut iter = raw.chars().peekable(); - let mut text = String::new(); - let mut backticks = 0u32; - let mut update_backtick_count; - - while let Some(c) = iter.next() { - update_backtick_count = true; - - if c == '\\' && backticks > 0 { - let mut tail = String::new(); - let mut escape_success = false; - let mut backticks_after_slash = 0u32; - - while let Some(&s) = iter.peek() { - match s { - '\\' => { - if backticks_after_slash == 0 { - tail.push('\\'); - } else { - // Pattern like `\`\` should fail - // escape and just be printed verbantim. - break; - } - } - '`' => { - tail.push(s); - backticks_after_slash += 1; - if backticks_after_slash == 2 { - escape_success = true; - iter.next(); - break; - } - } - _ => break, - } - - iter.next(); - } - - if !escape_success { - text.push(c); - backticks = backticks_after_slash; - update_backtick_count = false; - } else { - backticks = 0; - } - - text.push_str(&tail); - } else { - text.push(c); - } - - if update_backtick_count { - if c == '`' { - backticks += 1; - } else { - backticks = 0; - } - } +/// Trims raw text and splits it into lines. +/// +/// Returns whether at least one newline was contained in `raw`. +fn trim_and_split_raw(raw: &str) -> (Vec<String>, bool) { + // Trims one whitespace at end and start. + let raw = raw.strip_prefix(' ').unwrap_or(raw); + let raw = raw.strip_suffix(' ').unwrap_or(raw); + + let mut lines = split_lines(raw); + let had_newline = lines.len() > 1; + let is_whitespace = |line: &String| line.chars().all(char::is_whitespace); + + // Trims a sequence of whitespace followed by a newline at the start. + if lines.first().map(is_whitespace).unwrap_or(false) { + lines.remove(0); } - split_lines(&text) -} + // Trims a newline followed by a sequence of whitespace at the end. + if lines.last().map(is_whitespace).unwrap_or(false) { + lines.pop(); + } -/// Converts a hexademical sequence (without braces or "\u") into a character. -pub fn hex_to_char(sequence: &str) -> Option<char> { - u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32) + (lines, had_newline) } /// Splits a string into a vector of lines (respecting Unicode & Windows line breaks). @@ -175,12 +135,17 @@ pub fn split_lines(text: &str) -> Vec<String> { lines } +/// Converts a hexademical sequence (without braces or "\u") into a character. +pub fn hex_to_char(sequence: &str) -> Option<char> { + u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32) +} + #[cfg(test)] +#[rustfmt::skip] mod tests { use super::*; #[test] - #[rustfmt::skip] fn test_unescape_strings() { fn test(string: &str, expected: &str) { assert_eq!(unescape_string(string), expected.to_string()); @@ -201,43 +166,48 @@ mod tests { } #[test] - #[rustfmt::skip] - fn test_unescape_raws() { + fn test_split_after_lang_tag() { + fn test(raw: &str, lang: Option<&str>, inner: &str) { + let (found_lang, found_inner) = split_after_lang_tag(raw); + assert_eq!(found_lang.as_ref().map(|id| id.as_str()), lang); + assert_eq!(found_inner, inner); + } + + test("typst it!", Some("typst"), " it!"); + test("typst\n it!", Some("typst"), "\n it!"); + test("typst\n it!", Some("typst"), "\n it!"); + test("abc`", Some("abc"), "`"); + test(" hi", None, " hi"); + test("`", None, "`"); + } + + #[test] + fn test_trim_raw() { fn test(raw: &str, expected: Vec<&str>) { - assert_eq!(unescape_raw(raw), expected); + assert_eq!(trim_and_split_raw(raw).0, expected); } - test("raw\\`", vec!["raw`"]); - test("raw\\\\`", vec!["raw\\`"]); - test("raw\ntext", vec!["raw", "text"]); - test("a\r\nb", vec!["a", "b"]); - test("a\n\nb", vec!["a", "", "b"]); - test("a\r\x0Bb", vec!["a", "", "b"]); - test("a\r\n\r\nb", vec!["a", "", "b"]); - test("raw\\a", vec!["raw\\a"]); - test("raw\\", vec!["raw\\"]); + test(" hi", vec!["hi"]); + test(" hi", vec![" hi"]); + test("\nhi", vec!["hi"]); + test(" \n hi", vec![" hi"]); + test("hi ", vec!["hi"]); + test("hi ", vec!["hi "]); + test("hi\n", vec!["hi"]); + test("hi \n ", vec!["hi "]); + test(" \n hi \n ", vec![" hi "]); } #[test] - #[rustfmt::skip] - fn test_unescape_code() { + fn test_split_lines() { fn test(raw: &str, expected: Vec<&str>) { - assert_eq!(unescape_code(raw), expected); + assert_eq!(split_lines(raw), expected); } - test("code\\`", vec!["code\\`"]); - test("code`\\``", vec!["code```"]); - test("code`\\`a", vec!["code`\\`a"]); - test("code``hi`\\``", vec!["code``hi```"]); - test("code`\\\\``", vec!["code`\\``"]); - test("code`\\`\\`go", vec!["code`\\`\\`go"]); - test("code`\\`\\``", vec!["code`\\```"]); - test("code\ntext", vec!["code", "text"]); - test("a\r\nb", vec!["a", "b"]); - test("a\n\nb", vec!["a", "", "b"]); - test("a\r\x0Bb", vec!["a", "", "b"]); - test("a\r\n\r\nb", vec!["a", "", "b"]); - test("code\\a", vec!["code\\a"]); - test("code\\", vec!["code\\"]); + test("raw\ntext", vec!["raw", "text"]); + test("a\r\nb", vec!["a", "b"]); + test("a\n\nb", vec!["a", "", "b"]); + test("a\r\x0Bb", vec!["a", "", "b"]); + test("a\r\n\r\nb", vec!["a", "", "b"]); } } |
