summaryrefslogtreecommitdiff
path: root/src/parse/escaping.rs
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2020-09-30 18:59:33 +0200
committerLaurenz <laurmaedje@gmail.com>2020-09-30 18:59:33 +0200
commit4077a7c11ea19b1b6b6b6fe3014b9018846cf21b (patch)
tree70e4c891c2c660b4136890cebbae7c375fe36c05 /src/parse/escaping.rs
parent7cc279f7ae122f4c40592004dde89792c636b3c8 (diff)
Refactor raw blocks 💱
Diffstat (limited to 'src/parse/escaping.rs')
-rw-r--r--src/parse/escaping.rs198
1 files changed, 84 insertions, 114 deletions
diff --git a/src/parse/escaping.rs b/src/parse/escaping.rs
index 55b1fe67..a2ff963b 100644
--- a/src/parse/escaping.rs
+++ b/src/parse/escaping.rs
@@ -1,4 +1,5 @@
use super::is_newline_char;
+use crate::syntax::{Ident, Raw};
/// Resolves all escape sequences in a string.
pub fn unescape_string(string: &str) -> String {
@@ -56,101 +57,60 @@ pub fn unescape_string(string: &str) -> String {
out
}
-/// Resolves all escape sequences in raw markup (between backticks) and splits it into
-/// into lines.
-pub fn unescape_raw(raw: &str) -> Vec<String> {
+/// Resolves the language tag and trims the raw text.
+///
+/// Returns:
+/// - The language tag
+/// - The raw lines
+/// - Whether at least one newline was present in the untrimmed text.
+pub fn process_raw(raw: &str) -> Raw {
+ let (lang, inner) = split_after_lang_tag(raw);
+ let (lines, had_newline) = trim_and_split_raw(inner);
+ Raw { lang, lines, inline: !had_newline }
+}
+
+/// Parse the lang tag and return it alongside the remaining inner raw text.
+fn split_after_lang_tag(raw: &str) -> (Option<Ident>, &str) {
+ let mut lang = String::new();
+
+ let mut inner = raw;
let mut iter = raw.chars();
- let mut text = String::new();
while let Some(c) = iter.next() {
- if c == '\\' {
- if let Some(c) = iter.next() {
- if c != '\\' && c != '`' {
- text.push('\\');
- }
-
- text.push(c);
- } else {
- text.push('\\');
- }
- } else {
- text.push(c);
+ if c == '`' || c.is_whitespace() || is_newline_char(c) {
+ break;
}
+
+ inner = iter.as_str();
+ lang.push(c);
}
- split_lines(&text)
+ (Ident::new(lang), inner)
}
-/// Resolves all escape sequences in code markup (between triple backticks) and splits it
-/// into into lines.
-pub fn unescape_code(raw: &str) -> Vec<String> {
- let mut iter = raw.chars().peekable();
- let mut text = String::new();
- let mut backticks = 0u32;
- let mut update_backtick_count;
-
- while let Some(c) = iter.next() {
- update_backtick_count = true;
-
- if c == '\\' && backticks > 0 {
- let mut tail = String::new();
- let mut escape_success = false;
- let mut backticks_after_slash = 0u32;
-
- while let Some(&s) = iter.peek() {
- match s {
- '\\' => {
- if backticks_after_slash == 0 {
- tail.push('\\');
- } else {
- // Pattern like `\`\` should fail
- // escape and just be printed verbantim.
- break;
- }
- }
- '`' => {
- tail.push(s);
- backticks_after_slash += 1;
- if backticks_after_slash == 2 {
- escape_success = true;
- iter.next();
- break;
- }
- }
- _ => break,
- }
-
- iter.next();
- }
-
- if !escape_success {
- text.push(c);
- backticks = backticks_after_slash;
- update_backtick_count = false;
- } else {
- backticks = 0;
- }
-
- text.push_str(&tail);
- } else {
- text.push(c);
- }
-
- if update_backtick_count {
- if c == '`' {
- backticks += 1;
- } else {
- backticks = 0;
- }
- }
+/// Trims raw text and splits it into lines.
+///
+/// Returns whether at least one newline was contained in `raw`.
+fn trim_and_split_raw(raw: &str) -> (Vec<String>, bool) {
+ // Trims one whitespace at end and start.
+ let raw = raw.strip_prefix(' ').unwrap_or(raw);
+ let raw = raw.strip_suffix(' ').unwrap_or(raw);
+
+ let mut lines = split_lines(raw);
+ let had_newline = lines.len() > 1;
+ let is_whitespace = |line: &String| line.chars().all(char::is_whitespace);
+
+ // Trims a sequence of whitespace followed by a newline at the start.
+ if lines.first().map(is_whitespace).unwrap_or(false) {
+ lines.remove(0);
}
- split_lines(&text)
-}
+ // Trims a newline followed by a sequence of whitespace at the end.
+ if lines.last().map(is_whitespace).unwrap_or(false) {
+ lines.pop();
+ }
-/// Converts a hexademical sequence (without braces or "\u") into a character.
-pub fn hex_to_char(sequence: &str) -> Option<char> {
- u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32)
+ (lines, had_newline)
}
/// Splits a string into a vector of lines (respecting Unicode & Windows line breaks).
@@ -175,12 +135,17 @@ pub fn split_lines(text: &str) -> Vec<String> {
lines
}
+/// Converts a hexademical sequence (without braces or "\u") into a character.
+pub fn hex_to_char(sequence: &str) -> Option<char> {
+ u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32)
+}
+
#[cfg(test)]
+#[rustfmt::skip]
mod tests {
use super::*;
#[test]
- #[rustfmt::skip]
fn test_unescape_strings() {
fn test(string: &str, expected: &str) {
assert_eq!(unescape_string(string), expected.to_string());
@@ -201,43 +166,48 @@ mod tests {
}
#[test]
- #[rustfmt::skip]
- fn test_unescape_raws() {
+ fn test_split_after_lang_tag() {
+ fn test(raw: &str, lang: Option<&str>, inner: &str) {
+ let (found_lang, found_inner) = split_after_lang_tag(raw);
+ assert_eq!(found_lang.as_ref().map(|id| id.as_str()), lang);
+ assert_eq!(found_inner, inner);
+ }
+
+ test("typst it!", Some("typst"), " it!");
+ test("typst\n it!", Some("typst"), "\n it!");
+ test("typst\n it!", Some("typst"), "\n it!");
+ test("abc`", Some("abc"), "`");
+ test(" hi", None, " hi");
+ test("`", None, "`");
+ }
+
+ #[test]
+ fn test_trim_raw() {
fn test(raw: &str, expected: Vec<&str>) {
- assert_eq!(unescape_raw(raw), expected);
+ assert_eq!(trim_and_split_raw(raw).0, expected);
}
- test("raw\\`", vec!["raw`"]);
- test("raw\\\\`", vec!["raw\\`"]);
- test("raw\ntext", vec!["raw", "text"]);
- test("a\r\nb", vec!["a", "b"]);
- test("a\n\nb", vec!["a", "", "b"]);
- test("a\r\x0Bb", vec!["a", "", "b"]);
- test("a\r\n\r\nb", vec!["a", "", "b"]);
- test("raw\\a", vec!["raw\\a"]);
- test("raw\\", vec!["raw\\"]);
+ test(" hi", vec!["hi"]);
+ test(" hi", vec![" hi"]);
+ test("\nhi", vec!["hi"]);
+ test(" \n hi", vec![" hi"]);
+ test("hi ", vec!["hi"]);
+ test("hi ", vec!["hi "]);
+ test("hi\n", vec!["hi"]);
+ test("hi \n ", vec!["hi "]);
+ test(" \n hi \n ", vec![" hi "]);
}
#[test]
- #[rustfmt::skip]
- fn test_unescape_code() {
+ fn test_split_lines() {
fn test(raw: &str, expected: Vec<&str>) {
- assert_eq!(unescape_code(raw), expected);
+ assert_eq!(split_lines(raw), expected);
}
- test("code\\`", vec!["code\\`"]);
- test("code`\\``", vec!["code```"]);
- test("code`\\`a", vec!["code`\\`a"]);
- test("code``hi`\\``", vec!["code``hi```"]);
- test("code`\\\\``", vec!["code`\\``"]);
- test("code`\\`\\`go", vec!["code`\\`\\`go"]);
- test("code`\\`\\``", vec!["code`\\```"]);
- test("code\ntext", vec!["code", "text"]);
- test("a\r\nb", vec!["a", "b"]);
- test("a\n\nb", vec!["a", "", "b"]);
- test("a\r\x0Bb", vec!["a", "", "b"]);
- test("a\r\n\r\nb", vec!["a", "", "b"]);
- test("code\\a", vec!["code\\a"]);
- test("code\\", vec!["code\\"]);
+ test("raw\ntext", vec!["raw", "text"]);
+ test("a\r\nb", vec!["a", "b"]);
+ test("a\n\nb", vec!["a", "", "b"]);
+ test("a\r\x0Bb", vec!["a", "", "b"]);
+ test("a\r\n\r\nb", vec!["a", "", "b"]);
}
}