diff options
| author | Laurenz <laurmaedje@gmail.com> | 2022-10-17 19:26:24 +0200 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2022-10-17 20:04:22 +0200 |
| commit | e21822665591dc19766275da1e185215a6b945ef (patch) | |
| tree | 7788e211c3c33c8b5a8ad7d5eb7574e33631eb16 /src/syntax/resolve.rs | |
| parent | 4fd031a256b2ecfe524859d5599fafb386395572 (diff) | |
Merge some modules
Diffstat (limited to 'src/syntax/resolve.rs')
| -rw-r--r-- | src/syntax/resolve.rs | 237 |
1 files changed, 237 insertions, 0 deletions
diff --git a/src/syntax/resolve.rs b/src/syntax/resolve.rs new file mode 100644 index 00000000..2ad35cec --- /dev/null +++ b/src/syntax/resolve.rs @@ -0,0 +1,237 @@ +use unscanny::Scanner; + +use super::{is_ident, is_newline, RawKind}; +use crate::util::EcoString; + +/// Resolve all escape sequences in a string. +pub fn resolve_string(string: &str) -> EcoString { + let mut out = EcoString::with_capacity(string.len()); + let mut s = Scanner::new(string); + + while let Some(c) = s.eat() { + if c != '\\' { + out.push(c); + continue; + } + + let start = s.locate(-1); + match s.eat() { + Some('\\') => out.push('\\'), + Some('"') => out.push('"'), + Some('n') => out.push('\n'), + Some('r') => out.push('\r'), + Some('t') => out.push('\t'), + Some('u') if s.eat_if('{') => { + // TODO: Error if closing brace is missing. + let sequence = s.eat_while(char::is_ascii_hexdigit); + let _terminated = s.eat_if('}'); + match resolve_hex(sequence) { + Some(c) => out.push(c), + None => out.push_str(s.from(start)), + } + } + + _ => out.push_str(s.from(start)), + } + } + + out +} + +/// Resolve a hexadecimal escape sequence into a character +/// (only the inner hex letters without braces or `\u`). +pub fn resolve_hex(sequence: &str) -> Option<char> { + u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32) +} + +/// Resolve the language tag and trim the raw text. +pub fn resolve_raw(column: usize, backticks: usize, text: &str) -> RawKind { + if backticks > 1 { + let (tag, inner) = split_at_lang_tag(text); + let (text, block) = trim_and_split_raw(column, inner); + RawKind { + lang: is_ident(tag).then(|| tag.into()), + text: text.into(), + block, + } + } else { + RawKind { + lang: None, + text: split_lines(text).join("\n").into(), + block: false, + } + } +} + +/// Parse the lang tag and return it alongside the remaining inner raw text. +fn split_at_lang_tag(raw: &str) -> (&str, &str) { + let mut s = Scanner::new(raw); + ( + s.eat_until(|c: char| c == '`' || c.is_whitespace() || is_newline(c)), + s.after(), + ) +} + +/// Trim raw text and splits it into lines. +/// +/// Also returns whether at least one newline was contained in `raw`. +fn trim_and_split_raw(column: usize, mut raw: &str) -> (String, bool) { + // Trims one space at the start. + raw = raw.strip_prefix(' ').unwrap_or(raw); + + // Trim one space at the end if the last non-whitespace char is a backtick. + if raw.trim_end().ends_with('`') { + raw = raw.strip_suffix(' ').unwrap_or(raw); + } + + let mut lines = split_lines(raw); + + // Dedent based on column, but not for the first line. + for line in lines.iter_mut().skip(1) { + let offset = line + .chars() + .take(column) + .take_while(|c| c.is_whitespace()) + .map(char::len_utf8) + .sum(); + *line = &line[offset ..]; + } + + let had_newline = lines.len() > 1; + let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace); + + // Trims a sequence of whitespace followed by a newline at the start. + if lines.first().map_or(false, is_whitespace) { + lines.remove(0); + } + + // Trims a newline followed by a sequence of whitespace at the end. + if lines.last().map_or(false, is_whitespace) { + lines.pop(); + } + + (lines.join("\n"), had_newline) +} + +/// Split a string into a vector of lines +/// (respecting Unicode, Unix, Mac and Windows line breaks). +fn split_lines(text: &str) -> Vec<&str> { + let mut s = Scanner::new(text); + let mut lines = Vec::new(); + let mut start = 0; + let mut end = 0; + + while let Some(c) = s.eat() { + if is_newline(c) { + if c == '\r' { + s.eat_if('\n'); + } + + lines.push(&text[start .. end]); + start = s.cursor(); + } + end = s.cursor(); + } + + lines.push(&text[start ..]); + lines +} + +#[cfg(test)] +#[rustfmt::skip] +mod tests { + use super::*; + + #[test] + fn test_resolve_strings() { + #[track_caller] + fn test(string: &str, expected: &str) { + assert_eq!(resolve_string(string), expected); + } + + test(r#"hello world"#, "hello world"); + test(r#"hello\nworld"#, "hello\nworld"); + test(r#"a\"bc"#, "a\"bc"); + test(r#"a\u{2603}bc"#, "a☃bc"); + test(r#"a\u{26c3bg"#, "a𦰻g"); + test(r#"av\u{6797"#, "av林"); + test(r#"a\\"#, "a\\"); + test(r#"a\\\nbc"#, "a\\\nbc"); + test(r#"a\t\r\nbc"#, "a\t\r\nbc"); + test(r"🌎", "🌎"); + test(r"🌎\", r"🌎\"); + test(r"\🌎", r"\🌎"); + } + + #[test] + fn test_split_at_lang_tag() { + #[track_caller] + fn test(text: &str, lang: &str, inner: &str) { + assert_eq!(split_at_lang_tag(text), (lang, inner)); + } + + test("typst it!", "typst", " it!"); + test("typst\n it!", "typst", "\n it!"); + test("typst\n it!", "typst", "\n it!"); + test("abc`", "abc", "`"); + test(" hi", "", " hi"); + test("`", "", "`"); + } + + #[test] + fn test_resolve_raw() { + #[track_caller] + fn test( + column: usize, + backticks: usize, + raw: &str, + lang: Option<&str>, + text: &str, + block: bool, + ) { + let node = resolve_raw(column, backticks, raw); + assert_eq!(node.lang.as_deref(), lang); + assert_eq!(node.text, text); + assert_eq!(node.block, block); + } + + // Just one backtick. + test(0, 1, "py", None, "py", false); + test(0, 1, "1\n2", None, "1\n2", false); + test(0, 1, "1\r\n2", None, "1\n2", false); + + // More than one backtick with lang tag. + test(0, 2, "js alert()", Some("js"), "alert()", false); + test(0, 3, "py quit(\n\n)", Some("py"), "quit(\n\n)", true); + test(0, 2, "♥", None, "", false); + + // Trimming of whitespace (tested more thoroughly in separate test). + test(0, 2, " a", None, "a", false); + test(0, 2, " a", None, " a", false); + test(0, 2, " \na", None, "a", true); + + // Dedenting + test(2, 3, " def foo():\n bar()", None, "def foo():\n bar()", true); + } + + #[test] + fn test_trim_raw() { + #[track_caller] + fn test(text: &str, expected: &str) { + assert_eq!(trim_and_split_raw(0, text).0, expected); + } + + test(" hi", "hi"); + test(" hi", " hi"); + test("\nhi", "hi"); + test(" \n hi", " hi"); + test("hi` ", "hi`"); + test("hi` ", "hi` "); + test("hi` ", "hi` "); + test("hi ", "hi "); + test("hi ", "hi "); + test("hi\n", "hi"); + test("hi \n ", "hi "); + test(" \n hi \n ", " hi "); + } +} |
