summaryrefslogtreecommitdiff
path: root/src/syntax/resolve.rs
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2022-10-17 19:26:24 +0200
committerLaurenz <laurmaedje@gmail.com>2022-10-17 20:04:22 +0200
commite21822665591dc19766275da1e185215a6b945ef (patch)
tree7788e211c3c33c8b5a8ad7d5eb7574e33631eb16 /src/syntax/resolve.rs
parent4fd031a256b2ecfe524859d5599fafb386395572 (diff)
Merge some modules
Diffstat (limited to 'src/syntax/resolve.rs')
-rw-r--r--src/syntax/resolve.rs237
1 files changed, 237 insertions, 0 deletions
diff --git a/src/syntax/resolve.rs b/src/syntax/resolve.rs
new file mode 100644
index 00000000..2ad35cec
--- /dev/null
+++ b/src/syntax/resolve.rs
@@ -0,0 +1,237 @@
+use unscanny::Scanner;
+
+use super::{is_ident, is_newline, RawKind};
+use crate::util::EcoString;
+
+/// Resolve all escape sequences in a string.
+pub fn resolve_string(string: &str) -> EcoString {
+ let mut out = EcoString::with_capacity(string.len());
+ let mut s = Scanner::new(string);
+
+ while let Some(c) = s.eat() {
+ if c != '\\' {
+ out.push(c);
+ continue;
+ }
+
+ let start = s.locate(-1);
+ match s.eat() {
+ Some('\\') => out.push('\\'),
+ Some('"') => out.push('"'),
+ Some('n') => out.push('\n'),
+ Some('r') => out.push('\r'),
+ Some('t') => out.push('\t'),
+ Some('u') if s.eat_if('{') => {
+ // TODO: Error if closing brace is missing.
+ let sequence = s.eat_while(char::is_ascii_hexdigit);
+ let _terminated = s.eat_if('}');
+ match resolve_hex(sequence) {
+ Some(c) => out.push(c),
+ None => out.push_str(s.from(start)),
+ }
+ }
+
+ _ => out.push_str(s.from(start)),
+ }
+ }
+
+ out
+}
+
+/// Resolve a hexadecimal escape sequence into a character
+/// (only the inner hex letters without braces or `\u`).
+pub fn resolve_hex(sequence: &str) -> Option<char> {
+ u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32)
+}
+
+/// Resolve the language tag and trim the raw text.
+pub fn resolve_raw(column: usize, backticks: usize, text: &str) -> RawKind {
+ if backticks > 1 {
+ let (tag, inner) = split_at_lang_tag(text);
+ let (text, block) = trim_and_split_raw(column, inner);
+ RawKind {
+ lang: is_ident(tag).then(|| tag.into()),
+ text: text.into(),
+ block,
+ }
+ } else {
+ RawKind {
+ lang: None,
+ text: split_lines(text).join("\n").into(),
+ block: false,
+ }
+ }
+}
+
+/// Parse the lang tag and return it alongside the remaining inner raw text.
+fn split_at_lang_tag(raw: &str) -> (&str, &str) {
+ let mut s = Scanner::new(raw);
+ (
+ s.eat_until(|c: char| c == '`' || c.is_whitespace() || is_newline(c)),
+ s.after(),
+ )
+}
+
+/// Trim raw text and splits it into lines.
+///
+/// Also returns whether at least one newline was contained in `raw`.
+fn trim_and_split_raw(column: usize, mut raw: &str) -> (String, bool) {
+ // Trims one space at the start.
+ raw = raw.strip_prefix(' ').unwrap_or(raw);
+
+ // Trim one space at the end if the last non-whitespace char is a backtick.
+ if raw.trim_end().ends_with('`') {
+ raw = raw.strip_suffix(' ').unwrap_or(raw);
+ }
+
+ let mut lines = split_lines(raw);
+
+ // Dedent based on column, but not for the first line.
+ for line in lines.iter_mut().skip(1) {
+ let offset = line
+ .chars()
+ .take(column)
+ .take_while(|c| c.is_whitespace())
+ .map(char::len_utf8)
+ .sum();
+ *line = &line[offset ..];
+ }
+
+ let had_newline = lines.len() > 1;
+ let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace);
+
+ // Trims a sequence of whitespace followed by a newline at the start.
+ if lines.first().map_or(false, is_whitespace) {
+ lines.remove(0);
+ }
+
+ // Trims a newline followed by a sequence of whitespace at the end.
+ if lines.last().map_or(false, is_whitespace) {
+ lines.pop();
+ }
+
+ (lines.join("\n"), had_newline)
+}
+
+/// Split a string into a vector of lines
+/// (respecting Unicode, Unix, Mac and Windows line breaks).
+fn split_lines(text: &str) -> Vec<&str> {
+ let mut s = Scanner::new(text);
+ let mut lines = Vec::new();
+ let mut start = 0;
+ let mut end = 0;
+
+ while let Some(c) = s.eat() {
+ if is_newline(c) {
+ if c == '\r' {
+ s.eat_if('\n');
+ }
+
+ lines.push(&text[start .. end]);
+ start = s.cursor();
+ }
+ end = s.cursor();
+ }
+
+ lines.push(&text[start ..]);
+ lines
+}
+
+#[cfg(test)]
+#[rustfmt::skip]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_resolve_strings() {
+ #[track_caller]
+ fn test(string: &str, expected: &str) {
+ assert_eq!(resolve_string(string), expected);
+ }
+
+ test(r#"hello world"#, "hello world");
+ test(r#"hello\nworld"#, "hello\nworld");
+ test(r#"a\"bc"#, "a\"bc");
+ test(r#"a\u{2603}bc"#, "a☃bc");
+ test(r#"a\u{26c3bg"#, "a𦰻g");
+ test(r#"av\u{6797"#, "avæž—");
+ test(r#"a\\"#, "a\\");
+ test(r#"a\\\nbc"#, "a\\\nbc");
+ test(r#"a\t\r\nbc"#, "a\t\r\nbc");
+ test(r"🌎", "🌎");
+ test(r"🌎\", r"🌎\");
+ test(r"\🌎", r"\🌎");
+ }
+
+ #[test]
+ fn test_split_at_lang_tag() {
+ #[track_caller]
+ fn test(text: &str, lang: &str, inner: &str) {
+ assert_eq!(split_at_lang_tag(text), (lang, inner));
+ }
+
+ test("typst it!", "typst", " it!");
+ test("typst\n it!", "typst", "\n it!");
+ test("typst\n it!", "typst", "\n it!");
+ test("abc`", "abc", "`");
+ test(" hi", "", " hi");
+ test("`", "", "`");
+ }
+
+ #[test]
+ fn test_resolve_raw() {
+ #[track_caller]
+ fn test(
+ column: usize,
+ backticks: usize,
+ raw: &str,
+ lang: Option<&str>,
+ text: &str,
+ block: bool,
+ ) {
+ let node = resolve_raw(column, backticks, raw);
+ assert_eq!(node.lang.as_deref(), lang);
+ assert_eq!(node.text, text);
+ assert_eq!(node.block, block);
+ }
+
+ // Just one backtick.
+ test(0, 1, "py", None, "py", false);
+ test(0, 1, "1\n2", None, "1\n2", false);
+ test(0, 1, "1\r\n2", None, "1\n2", false);
+
+ // More than one backtick with lang tag.
+ test(0, 2, "js alert()", Some("js"), "alert()", false);
+ test(0, 3, "py quit(\n\n)", Some("py"), "quit(\n\n)", true);
+ test(0, 2, "♥", None, "", false);
+
+ // Trimming of whitespace (tested more thoroughly in separate test).
+ test(0, 2, " a", None, "a", false);
+ test(0, 2, " a", None, " a", false);
+ test(0, 2, " \na", None, "a", true);
+
+ // Dedenting
+ test(2, 3, " def foo():\n bar()", None, "def foo():\n bar()", true);
+ }
+
+ #[test]
+ fn test_trim_raw() {
+ #[track_caller]
+ fn test(text: &str, expected: &str) {
+ assert_eq!(trim_and_split_raw(0, text).0, expected);
+ }
+
+ test(" hi", "hi");
+ test(" hi", " hi");
+ test("\nhi", "hi");
+ test(" \n hi", " hi");
+ test("hi` ", "hi`");
+ test("hi` ", "hi` ");
+ test("hi` ", "hi` ");
+ test("hi ", "hi ");
+ test("hi ", "hi ");
+ test("hi\n", "hi");
+ test("hi \n ", "hi ");
+ test(" \n hi \n ", " hi ");
+ }
+}