summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2020-09-30 18:59:33 +0200
committerLaurenz <laurmaedje@gmail.com>2020-09-30 18:59:33 +0200
commit4077a7c11ea19b1b6b6b6fe3014b9018846cf21b (patch)
tree70e4c891c2c660b4136890cebbae7c375fe36c05
parent7cc279f7ae122f4c40592004dde89792c636b3c8 (diff)
Refactor raw blocks 💱
-rw-r--r--src/layout/tree.rs23
-rw-r--r--src/parse/escaping.rs198
-rw-r--r--src/parse/parser.rs35
-rw-r--r--src/parse/tests.rs60
-rw-r--r--src/parse/tokenizer.rs121
-rw-r--r--src/syntax/span.rs6
-rw-r--r--src/syntax/token.rs22
-rw-r--r--src/syntax/tree.rs93
8 files changed, 275 insertions, 283 deletions
diff --git a/src/layout/tree.rs b/src/layout/tree.rs
index 82a91131..24a00367 100644
--- a/src/layout/tree.rs
+++ b/src/layout/tree.rs
@@ -5,7 +5,7 @@ use super::text::{layout_text, TextContext};
use super::*;
use crate::style::LayoutStyle;
use crate::syntax::{
- CallExpr, Code, Decoration, Heading, Span, SpanWith, Spanned, SyntaxNode, SyntaxTree,
+ CallExpr, Decoration, Heading, Raw, Span, SpanWith, Spanned, SyntaxNode, SyntaxTree,
};
use crate::{DynFuture, Feedback, Pass};
@@ -83,8 +83,7 @@ impl<'a> TreeLayouter<'a> {
SyntaxNode::Heading(heading) => self.layout_heading(heading).await,
- SyntaxNode::Raw(lines) => self.layout_raw(lines).await,
- SyntaxNode::Code(block) => self.layout_code(block).await,
+ SyntaxNode::Raw(raw) => self.layout_raw(raw).await,
SyntaxNode::Call(call) => {
self.layout_call(call.span_with(node.span)).await;
@@ -128,14 +127,18 @@ impl<'a> TreeLayouter<'a> {
self.style.text = style;
}
- async fn layout_raw(&mut self, lines: &[String]) {
+ async fn layout_raw(&mut self, raw: &Raw) {
+ if !raw.inline {
+ self.layout_parbreak();
+ }
+
// TODO: Make this more efficient.
let fallback = self.style.text.fallback.clone();
self.style.text.fallback.list.insert(0, "monospace".to_string());
self.style.text.fallback.flatten();
let mut first = true;
- for line in lines {
+ for line in &raw.lines {
if !first {
self.layouter.finish_line();
}
@@ -144,18 +147,10 @@ impl<'a> TreeLayouter<'a> {
}
self.style.text.fallback = fallback;
- }
- async fn layout_code(&mut self, code: &Code) {
- if code.block {
+ if !raw.inline {
self.layout_parbreak();
}
-
- self.layout_raw(&code.lines).await;
-
- if code.block {
- self.layout_parbreak()
- }
}
async fn layout_call(&mut self, call: Spanned<&CallExpr>) {
diff --git a/src/parse/escaping.rs b/src/parse/escaping.rs
index 55b1fe67..a2ff963b 100644
--- a/src/parse/escaping.rs
+++ b/src/parse/escaping.rs
@@ -1,4 +1,5 @@
use super::is_newline_char;
+use crate::syntax::{Ident, Raw};
/// Resolves all escape sequences in a string.
pub fn unescape_string(string: &str) -> String {
@@ -56,101 +57,60 @@ pub fn unescape_string(string: &str) -> String {
out
}
-/// Resolves all escape sequences in raw markup (between backticks) and splits it into
-/// into lines.
-pub fn unescape_raw(raw: &str) -> Vec<String> {
+/// Resolves the language tag and trims the raw text.
+///
+/// Returns:
+/// - The language tag
+/// - The raw lines
+/// - Whether at least one newline was present in the untrimmed text.
+pub fn process_raw(raw: &str) -> Raw {
+ let (lang, inner) = split_after_lang_tag(raw);
+ let (lines, had_newline) = trim_and_split_raw(inner);
+ Raw { lang, lines, inline: !had_newline }
+}
+
+/// Parse the lang tag and return it alongside the remaining inner raw text.
+fn split_after_lang_tag(raw: &str) -> (Option<Ident>, &str) {
+ let mut lang = String::new();
+
+ let mut inner = raw;
let mut iter = raw.chars();
- let mut text = String::new();
while let Some(c) = iter.next() {
- if c == '\\' {
- if let Some(c) = iter.next() {
- if c != '\\' && c != '`' {
- text.push('\\');
- }
-
- text.push(c);
- } else {
- text.push('\\');
- }
- } else {
- text.push(c);
+ if c == '`' || c.is_whitespace() || is_newline_char(c) {
+ break;
}
+
+ inner = iter.as_str();
+ lang.push(c);
}
- split_lines(&text)
+ (Ident::new(lang), inner)
}
-/// Resolves all escape sequences in code markup (between triple backticks) and splits it
-/// into into lines.
-pub fn unescape_code(raw: &str) -> Vec<String> {
- let mut iter = raw.chars().peekable();
- let mut text = String::new();
- let mut backticks = 0u32;
- let mut update_backtick_count;
-
- while let Some(c) = iter.next() {
- update_backtick_count = true;
-
- if c == '\\' && backticks > 0 {
- let mut tail = String::new();
- let mut escape_success = false;
- let mut backticks_after_slash = 0u32;
-
- while let Some(&s) = iter.peek() {
- match s {
- '\\' => {
- if backticks_after_slash == 0 {
- tail.push('\\');
- } else {
- // Pattern like `\`\` should fail
- // escape and just be printed verbantim.
- break;
- }
- }
- '`' => {
- tail.push(s);
- backticks_after_slash += 1;
- if backticks_after_slash == 2 {
- escape_success = true;
- iter.next();
- break;
- }
- }
- _ => break,
- }
-
- iter.next();
- }
-
- if !escape_success {
- text.push(c);
- backticks = backticks_after_slash;
- update_backtick_count = false;
- } else {
- backticks = 0;
- }
-
- text.push_str(&tail);
- } else {
- text.push(c);
- }
-
- if update_backtick_count {
- if c == '`' {
- backticks += 1;
- } else {
- backticks = 0;
- }
- }
+/// Trims raw text and splits it into lines.
+///
+/// Returns whether at least one newline was contained in `raw`.
+fn trim_and_split_raw(raw: &str) -> (Vec<String>, bool) {
+ // Trims one whitespace at end and start.
+ let raw = raw.strip_prefix(' ').unwrap_or(raw);
+ let raw = raw.strip_suffix(' ').unwrap_or(raw);
+
+ let mut lines = split_lines(raw);
+ let had_newline = lines.len() > 1;
+ let is_whitespace = |line: &String| line.chars().all(char::is_whitespace);
+
+ // Trims a sequence of whitespace followed by a newline at the start.
+ if lines.first().map(is_whitespace).unwrap_or(false) {
+ lines.remove(0);
}
- split_lines(&text)
-}
+ // Trims a newline followed by a sequence of whitespace at the end.
+ if lines.last().map(is_whitespace).unwrap_or(false) {
+ lines.pop();
+ }
-/// Converts a hexademical sequence (without braces or "\u") into a character.
-pub fn hex_to_char(sequence: &str) -> Option<char> {
- u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32)
+ (lines, had_newline)
}
/// Splits a string into a vector of lines (respecting Unicode & Windows line breaks).
@@ -175,12 +135,17 @@ pub fn split_lines(text: &str) -> Vec<String> {
lines
}
+/// Converts a hexademical sequence (without braces or "\u") into a character.
+pub fn hex_to_char(sequence: &str) -> Option<char> {
+ u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32)
+}
+
#[cfg(test)]
+#[rustfmt::skip]
mod tests {
use super::*;
#[test]
- #[rustfmt::skip]
fn test_unescape_strings() {
fn test(string: &str, expected: &str) {
assert_eq!(unescape_string(string), expected.to_string());
@@ -201,43 +166,48 @@ mod tests {
}
#[test]
- #[rustfmt::skip]
- fn test_unescape_raws() {
+ fn test_split_after_lang_tag() {
+ fn test(raw: &str, lang: Option<&str>, inner: &str) {
+ let (found_lang, found_inner) = split_after_lang_tag(raw);
+ assert_eq!(found_lang.as_ref().map(|id| id.as_str()), lang);
+ assert_eq!(found_inner, inner);
+ }
+
+ test("typst it!", Some("typst"), " it!");
+ test("typst\n it!", Some("typst"), "\n it!");
+ test("typst\n it!", Some("typst"), "\n it!");
+ test("abc`", Some("abc"), "`");
+ test(" hi", None, " hi");
+ test("`", None, "`");
+ }
+
+ #[test]
+ fn test_trim_raw() {
fn test(raw: &str, expected: Vec<&str>) {
- assert_eq!(unescape_raw(raw), expected);
+ assert_eq!(trim_and_split_raw(raw).0, expected);
}
- test("raw\\`", vec!["raw`"]);
- test("raw\\\\`", vec!["raw\\`"]);
- test("raw\ntext", vec!["raw", "text"]);
- test("a\r\nb", vec!["a", "b"]);
- test("a\n\nb", vec!["a", "", "b"]);
- test("a\r\x0Bb", vec!["a", "", "b"]);
- test("a\r\n\r\nb", vec!["a", "", "b"]);
- test("raw\\a", vec!["raw\\a"]);
- test("raw\\", vec!["raw\\"]);
+ test(" hi", vec!["hi"]);
+ test(" hi", vec![" hi"]);
+ test("\nhi", vec!["hi"]);
+ test(" \n hi", vec![" hi"]);
+ test("hi ", vec!["hi"]);
+ test("hi ", vec!["hi "]);
+ test("hi\n", vec!["hi"]);
+ test("hi \n ", vec!["hi "]);
+ test(" \n hi \n ", vec![" hi "]);
}
#[test]
- #[rustfmt::skip]
- fn test_unescape_code() {
+ fn test_split_lines() {
fn test(raw: &str, expected: Vec<&str>) {
- assert_eq!(unescape_code(raw), expected);
+ assert_eq!(split_lines(raw), expected);
}
- test("code\\`", vec!["code\\`"]);
- test("code`\\``", vec!["code```"]);
- test("code`\\`a", vec!["code`\\`a"]);
- test("code``hi`\\``", vec!["code``hi```"]);
- test("code`\\\\``", vec!["code`\\``"]);
- test("code`\\`\\`go", vec!["code`\\`\\`go"]);
- test("code`\\`\\``", vec!["code`\\```"]);
- test("code\ntext", vec!["code", "text"]);
- test("a\r\nb", vec!["a", "b"]);
- test("a\n\nb", vec!["a", "", "b"]);
- test("a\r\x0Bb", vec!["a", "", "b"]);
- test("a\r\n\r\nb", vec!["a", "", "b"]);
- test("code\\a", vec!["code\\a"]);
- test("code\\", vec!["code\\"]);
+ test("raw\ntext", vec!["raw", "text"]);
+ test("a\r\nb", vec!["a", "b"]);
+ test("a\n\nb", vec!["a", "", "b"]);
+ test("a\r\x0Bb", vec!["a", "", "b"]);
+ test("a\r\n\r\nb", vec!["a", "", "b"]);
}
}
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index bbd7ee1d..3446af83 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -99,35 +99,22 @@ impl Parser<'_> {
self.parse_heading().map(SyntaxNode::Heading)
}
- Token::Raw { raw, terminated } => {
+ Token::Raw { raw, backticks, terminated } => {
if !terminated {
- error!(@self.feedback, end, "expected backtick");
+ error!(@self.feedback, end, "expected backtick(s)");
}
- self.with_span(SyntaxNode::Raw(unescape_raw(raw)))
- }
- Token::Code { lang, raw, terminated } => {
- if !terminated {
- error!(@self.feedback, end, "expected backticks");
- }
-
- let lang = lang.and_then(|lang| {
- if let Some(ident) = Ident::new(lang.v) {
- Some(ident.span_with(lang.span))
- } else {
- error!(@self.feedback, lang.span, "invalid identifier");
- None
+ let raw = if backticks > 1 {
+ process_raw(raw)
+ } else {
+ Raw {
+ lang: None,
+ lines: split_lines(raw),
+ inline: true,
}
- });
-
- let mut lines = unescape_code(raw);
- let block = lines.len() > 1;
-
- if lines.last().map(|s| s.is_empty()).unwrap_or(false) {
- lines.pop();
- }
+ };
- self.with_span(SyntaxNode::Code(Code { lang, lines, block }))
+ self.with_span(SyntaxNode::Raw(raw))
}
Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())),
diff --git a/src/parse/tests.rs b/src/parse/tests.rs
index d663aa2a..8ddf013d 100644
--- a/src/parse/tests.rs
+++ b/src/parse/tests.rs
@@ -29,24 +29,17 @@ macro_rules! H {
}
macro_rules! R {
- ($($line:expr),* $(,)?) => {
- SyntaxNode::Raw(vec![$($line.to_string()),*])
- };
-}
-
-macro_rules! C {
- ($lang:expr, $($line:expr),* $(,)?) => {{
- let lines = vec![$($line.to_string()) ,*];
- SyntaxNode::Code(Code {
+ ($lang:expr, $inline:expr, $($line:expr),* $(,)?) => {{
+ SyntaxNode::Raw(Raw {
lang: $lang,
- block: lines.len() > 1,
- lines,
+ lines: vec![$($line.to_string()) ,*],
+ inline: $inline,
})
}};
}
-fn Lang<'a, T: Into<Spanned<&'a str>>>(lang: T) -> Option<Spanned<Ident>> {
- Some(Into::<Spanned<&str>>::into(lang).map(|s| Ident(s.to_string())))
+fn Lang(lang: &str) -> Option<Ident> {
+ Some(Ident(lang.to_string()))
}
macro_rules! F {
@@ -220,19 +213,7 @@ fn test_parse_simple_nodes() {
t!("\\u{1f303}" => T("🌃"));
t!("\n\n\nhello" => P, T("hello"));
t!(r"a\ b" => T("a"), L, S, T("b"));
- t!("`py`" => R!["py"]);
- t!("`hi\nyou" => R!["hi", "you"]);
- e!("`hi\nyou" => s(7, 7, "expected backtick"));
- t!("`hi\\`du`" => R!["hi`du"]);
-
- ts!("```java out```" => s(0, 14, C![Lang(s(3, 7, "java")), "out"]));
- t!("``` console.log(\n\"alert\"\n)" => C![None, "console.log(", "\"alert\"", ")"]);
- t!("```typst \r\n Typst uses `\\`` to indicate code blocks" => C![
- Lang("typst"), " Typst uses ``` to indicate code blocks"
- ]);
- e!("``` hi\nyou" => s(10, 10, "expected backticks"));
- e!("```🌍 hi\nyou```" => s(3, 7, "invalid identifier"));
e!("\\u{d421c809}" => s(0, 12, "invalid unicode escape sequence"));
e!("\\u{abc" => s(6, 6, "expected closing brace"));
t!("💜\n\n 🌍" => T("💜"), P, T("🌍"));
@@ -243,6 +224,33 @@ fn test_parse_simple_nodes() {
}
#[test]
+fn test_parse_raw() {
+ t!("`py`" => R![None, true, "py"]);
+ t!("`hi\nyou" => R![None, true, "hi", "you"]);
+ t!(r"`` hi\`du``" => R![None, true, r"hi\`du"]);
+
+ // More than one backtick with optional language tag.
+ t!("``` console.log(\n\"alert\"\n)" => R![None, false, "console.log(", "\"alert\"", ")"]);
+ t!("````typst \r\n Typst uses ``` to indicate code blocks````!"
+ => R![Lang("typst"), false, " Typst uses ``` to indicate code blocks"], T("!"));
+
+ // Trimming of whitespace.
+ t!("`` a ``" => R![None, true, "a"]);
+ t!("`` a ``" => R![None, true, "a "]);
+ t!("`` ` ``" => R![None, true, "`"]);
+ t!("``` ` ```" => R![None, true, " ` "]);
+ t!("``` ` \n ```" => R![None, false, " ` "]);
+
+ // Errors.
+ e!("`hi\nyou" => s(7, 7, "expected backtick(s)"));
+ e!("``` hi\nyou" => s(10, 10, "expected backtick(s)"));
+
+ // TODO: Bring back when spans/errors are in place.
+ // ts!("``java out``" => s(0, 12, R![Lang(s(2, 6, "java")), true, "out"]));
+ // e!("```🌍 hi\nyou```" => s(3, 7, "invalid identifier"));
+}
+
+#[test]
fn test_parse_comments() {
// In body.
t!("hi// you\nw" => T("hi"), S, T("w"));
@@ -348,7 +356,7 @@ fn test_parse_function_bodies() {
e!(" [val][ */]" => s(8, 10, "unexpected end of block comment"));
// Raw in body.
- t!("[val][`Hi]`" => F!("val"; Tree![R!["Hi]"]]));
+ t!("[val][`Hi]`" => F!("val"; Tree![R![None, true, "Hi]"]]));
e!("[val][`Hi]`" => s(11, 11, "expected closing bracket"));
// Crazy.
diff --git a/src/parse/tokenizer.rs b/src/parse/tokenizer.rs
index 92d15edc..720bec43 100644
--- a/src/parse/tokenizer.rs
+++ b/src/parse/tokenizer.rs
@@ -56,7 +56,7 @@ impl<'s> Tokens<'s> {
/// The position in the string at which the last token ends and next token
/// will start.
pub fn pos(&self) -> Pos {
- Pos(self.index as u32)
+ self.index.into()
}
}
@@ -111,7 +111,7 @@ impl<'s> Iterator for Tokens<'s> {
// Style toggles.
'_' if self.mode == Body => Underscore,
- '`' if self.mode == Body => self.read_raw_or_code(),
+ '`' if self.mode == Body => self.read_raw(),
// Sections.
'#' if self.mode == Body => Hashtag,
@@ -230,66 +230,31 @@ impl<'s> Tokens<'s> {
Str { string, terminated }
}
- fn read_raw_or_code(&mut self) -> Token<'s> {
- let (raw, terminated) = self.read_until_unescaped('`');
- if raw.is_empty() && terminated && self.peek() == Some('`') {
- // Third tick found; this is a code block.
+ fn read_raw(&mut self) -> Token<'s> {
+ let mut backticks = 1;
+ while self.peek() == Some('`') {
self.eat();
+ backticks += 1;
+ }
- // Reads the lang tag (until newline or whitespace).
- let start = self.pos();
- let (lang, _) = self.read_string_until(false, 0, 0, |c| {
- c == '`' || c.is_whitespace() || is_newline_char(c)
- });
- let end = self.pos();
-
- let lang = if !lang.is_empty() {
- Some(lang.span_with(Span::new(start, end)))
- } else {
- None
- };
-
- // Skip to start of raw contents.
- while let Some(c) = self.peek() {
- if is_newline_char(c) {
- self.eat();
- if c == '\r' && self.peek() == Some('\n') {
- self.eat();
- }
-
- break;
- } else if c.is_whitespace() {
- self.eat();
- } else {
- break;
- }
- }
-
- let start = self.index;
- let mut backticks = 0u32;
+ let start = self.index;
- while backticks < 3 {
- match self.eat() {
- Some('`') => backticks += 1,
- // Escaping of triple backticks.
- Some('\\') if backticks == 1 && self.peek() == Some('`') => {
- backticks = 0;
- }
- Some(_) => {}
- None => break,
- }
+ let mut found = 0;
+ while found < backticks {
+ match self.eat() {
+ Some('`') => found += 1,
+ Some(_) => found = 0,
+ None => break,
}
+ }
- let terminated = backticks == 3;
- let end = self.index - if terminated { 3 } else { 0 };
+ let terminated = found == backticks;
+ let end = self.index - if terminated { found } else { 0 };
- Code {
- lang,
- raw: &self.src[start .. end],
- terminated,
- }
- } else {
- Raw { raw, terminated }
+ Raw {
+ raw: &self.src[start .. end],
+ backticks,
+ terminated,
}
}
@@ -469,18 +434,8 @@ mod tests {
fn Str(string: &str, terminated: bool) -> Token {
Token::Str { string, terminated }
}
- fn Raw(raw: &str, terminated: bool) -> Token {
- Token::Raw { raw, terminated }
- }
- fn Code<'a>(
- lang: Option<Spanned<&'a str>>,
- raw: &'a str,
- terminated: bool,
- ) -> Token<'a> {
- Token::Code { lang, raw, terminated }
- }
- fn Lang<'a, T: Into<Spanned<&'a str>>>(lang: T) -> Option<Spanned<&'a str>> {
- Some(Into::<Spanned<&str>>::into(lang))
+ fn Raw(raw: &str, backticks: usize, terminated: bool) -> Token {
+ Token::Raw { raw, backticks, terminated }
}
fn UE(sequence: &str, terminated: bool) -> Token {
Token::UnicodeEscape { sequence, terminated }
@@ -535,21 +490,33 @@ mod tests {
t!(Body, "***" => Star, Star, Star);
t!(Body, "[func]*bold*" => L, T("func"), R, Star, T("bold"), Star);
t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there"));
- t!(Body, "`raw`" => Raw("raw", true));
t!(Body, "# hi" => Hashtag, S(0), T("hi"));
t!(Body, "#()" => Hashtag, T("()"));
- t!(Body, "`[func]`" => Raw("[func]", true));
- t!(Body, "`]" => Raw("]", false));
- t!(Body, "\\ " => Backslash, S(0));
- t!(Body, "`\\``" => Raw("\\`", true));
- t!(Body, "``not code`" => Raw("", true), T("not"), S(0), T("code"), Raw("", false));
- t!(Body, "```rust hi```" => Code(Lang("rust"), "hi", true));
- t!(Body, "``` hi`\\``" => Code(None, "hi`\\``", false));
- t!(Body, "```js \r\n document.write(\"go\")" => Code(Lang("js"), " document.write(\"go\")", false));
t!(Header, "_`" => Invalid("_`"));
}
#[test]
+ fn test_tokenize_raw() {
+ // Basics.
+ t!(Body, "`raw`" => Raw("raw", 1, true));
+ t!(Body, "`[func]`" => Raw("[func]", 1, true));
+ t!(Body, "`]" => Raw("]", 1, false));
+ t!(Body, r"`\`` " => Raw(r"\", 1, true), Raw(" ", 1, false));
+
+ // Language tag.
+ t!(Body, "``` hi```" => Raw(" hi", 3, true));
+ t!(Body, "```rust hi```" => Raw("rust hi", 3, true));
+ t!(Body, r"``` hi\````" => Raw(r" hi\", 3, true), Raw("", 1, false));
+ t!(Body, "``` not `y`e`t finished```" => Raw(" not `y`e`t finished", 3, true));
+ t!(Body, "```js \r\n document.write(\"go\")`"
+ => Raw("js \r\n document.write(\"go\")`", 3, false));
+
+ // More backticks.
+ t!(Body, "`````` ``````hi" => Raw(" ", 6, true), T("hi"));
+ t!(Body, "````\n```js\nalert()\n```\n````" => Raw("\n```js\nalert()\n```\n", 4, true));
+ }
+
+ #[test]
fn tokenize_header_only_tokens() {
t!(Body, "a: b" => T("a:"), S(0), T("b"));
t!(Body, "c=d, " => T("c=d,"), S(0));
diff --git a/src/syntax/span.rs b/src/syntax/span.rs
index 1bd14c65..d803eeeb 100644
--- a/src/syntax/span.rs
+++ b/src/syntax/span.rs
@@ -189,6 +189,12 @@ impl From<u32> for Pos {
}
}
+impl From<usize> for Pos {
+ fn from(index: usize) -> Self {
+ Self(index as u32)
+ }
+}
+
impl Offset for Pos {
fn offset(self, by: Self) -> Self {
Pos(self.0 + by.0)
diff --git a/src/syntax/token.rs b/src/syntax/token.rs
index e91a780c..b7d4c4e2 100644
--- a/src/syntax/token.rs
+++ b/src/syntax/token.rs
@@ -1,6 +1,5 @@
//! Tokenization.
-use super::span::Spanned;
use crate::length::Length;
/// A minimal semantic entity of source code.
@@ -86,21 +85,13 @@ pub enum Token<'s> {
terminated: bool,
},
- /// Raw text.
+ /// Raw block.
Raw {
- /// The raw text (not yet unescaped as for strings).
+ /// The raw text between the backticks.
raw: &'s str,
- /// Whether the closing backtick was present.
- terminated: bool,
- },
-
- /// Multi-line code block.
- Code {
- /// The language of the code block, if specified.
- lang: Option<Spanned<&'s str>>,
- /// The raw text (not yet unescaped as for strings).
- raw: &'s str,
- /// Whether the closing backticks were present.
+ /// The number of opening backticks.
+ backticks: usize,
+ /// Whether all closing backticks were present.
terminated: bool,
},
@@ -142,8 +133,7 @@ impl<'s> Token<'s> {
Self::Backslash => "backslash",
Self::Hashtag => "hashtag",
Self::UnicodeEscape { .. } => "unicode escape sequence",
- Self::Raw { .. } => "raw text",
- Self::Code { .. } => "code block",
+ Self::Raw { .. } => "raw block",
Self::Text(_) => "text",
Self::Invalid("*/") => "end of block comment",
Self::Invalid(_) => "invalid token",
diff --git a/src/syntax/tree.rs b/src/syntax/tree.rs
index 5327bfa4..51a7937a 100644
--- a/src/syntax/tree.rs
+++ b/src/syntax/tree.rs
@@ -31,16 +31,93 @@ pub enum SyntaxNode {
ToggleBolder,
/// Plain text.
Text(String),
+ /// An optionally syntax-highlighted raw block.
+ Raw(Raw),
/// Section headings.
Heading(Heading),
- /// Lines of raw text.
- Raw(Vec<String>),
- /// An optionally highlighted (multi-line) code block.
- Code(Code),
/// A function call.
Call(CallExpr),
}
+/// A raw block, rendered in monospace with optional syntax highlighting.
+///
+/// Raw blocks start with an arbitrary number of backticks and end with the same
+/// number of backticks. If you want to include a sequence of backticks in a raw
+/// block, simply surround the block with more backticks.
+///
+/// When using at least two backticks, an optional language tag may follow
+/// directly after the backticks. This tag defines which language to
+/// syntax-highlight the text in. Apart from the language tag and some
+/// whitespace trimming discussed below, everything inside a raw block is
+/// rendered verbatim, in particular, there are no escape sequences.
+///
+/// # Examples
+/// - Raw text is surrounded by backticks.
+/// ```typst
+/// `raw`
+/// ```
+/// - An optional language tag may follow directly at the start when the block
+/// is surrounded by at least two backticks.
+/// ```typst
+/// ``rust println!("hello!")``;
+/// ```
+/// - Blocks can span multiple lines. Two backticks suffice to be able to
+/// specify the language tag, but three are fine, too.
+/// ```typst
+/// ``rust
+/// loop {
+/// find_yak().shave();
+/// }
+/// ``
+/// ```
+/// - Start with a space to omit the language tag (the space will be trimmed
+/// from the output) and use more backticks to allow backticks in the raw
+/// text.
+/// `````typst
+/// ```` This contains ```backticks``` and has no leading & trailing spaces. ````
+/// `````
+///
+/// # Trimming
+/// If we would always render the raw text between the backticks exactly as
+/// given, a few things would become problematic or even impossible:
+/// - Typical multiline code blocks (like in the example above) would have an
+/// additional newline before and after the code.
+/// - Raw text wrapped in more than one backtick could not exist without
+/// leading whitespace since the first word would be interpreted as a
+/// language tag.
+/// - A single backtick without surrounding spaces could not exist as raw text
+/// since it would be interpreted as belonging to the opening or closing
+/// backticks.
+///
+/// To fix these problems, we trim text in multi-backtick blocks as follows:
+/// - We trim a single space or a sequence of whitespace followed by a newline
+/// at the start.
+/// - We trim a single space or a newline followed by a sequence of whitespace
+/// at the end.
+///
+/// With these rules, a single raw backtick can be produced by the sequence
+/// ``` `` ` `` ```, ``` `` unhighlighted text `` ``` has no surrounding
+/// spaces and multiline code blocks don't have extra empty lines. Note that
+/// you can always force leading or trailing whitespace simply by adding more
+/// spaces.
+#[derive(Debug, Clone, PartialEq)]
+pub struct Raw {
+ /// An optional identifier specifying the language to syntax-highlight in.
+ pub lang: Option<Ident>,
+ /// The lines of raw text, determined as the raw string between the
+ /// backticks trimmed according to the above rules and split at newlines.
+ pub lines: Vec<String>,
+ /// Whether the element can be layouted inline.
+ ///
+ /// - When true, it will be layouted integrated within the surrounding
+ /// paragraph.
+ /// - When false, it will be separated into its own paragraph.
+ ///
+ /// Single-backtick blocks are always inline-level. Multi-backtick blocks
+ /// are inline-level when they contain no newlines.
+ pub inline: bool,
+}
+
/// A section heading.
#[derive(Debug, Clone, PartialEq)]
pub struct Heading {
@@ -49,14 +126,6 @@ pub struct Heading {
pub tree: SyntaxTree,
}
-/// A code block.
-#[derive(Debug, Clone, PartialEq)]
-pub struct Code {
- pub lang: Option<Spanned<Ident>>,
- pub lines: Vec<String>,
- pub block: bool,
-}
-
/// An expression.
#[derive(Clone, PartialEq)]
pub enum Expr {