summaryrefslogtreecommitdiff
path: root/src/syntax
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2020-08-29 17:26:49 +0200
committerGitHub <noreply@github.com>2020-08-29 17:26:49 +0200
commit2a6cde72726c057e2166fb4277b8fe53c398b3f9 (patch)
tree0899e1cc799fff1aedec8a19e63170a671cf969f /src/syntax
parent236750c35fbad916b63774df917cbc436f1d1a8c (diff)
parentd68367f32a9e698923b554984c59f0671e27ba5f (diff)
Merge pull request #11 from typst/code-blocks
Added code blocks 🚟
Diffstat (limited to 'src/syntax')
-rw-r--r--src/syntax/parsing.rs183
-rw-r--r--src/syntax/tokens.rs83
-rw-r--r--src/syntax/tree.rs9
3 files changed, 264 insertions, 11 deletions
diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs
index ea72c838..0d12f6e1 100644
--- a/src/syntax/parsing.rs
+++ b/src/syntax/parsing.rs
@@ -8,7 +8,9 @@ use crate::compute::table::SpannedEntry;
use super::decoration::Decoration;
use super::span::{Pos, Span, Spanned};
use super::tokens::{is_newline_char, Token, TokenMode, Tokens};
-use super::tree::{CallExpr, Expr, SyntaxNode, SyntaxTree, TableExpr};
+use super::tree::{
+ CallExpr, Expr, SyntaxNode, SyntaxTree, TableExpr, Code,
+};
use super::Ident;
/// Parse a string of source code.
@@ -77,6 +79,33 @@ impl Parser<'_> {
self.with_span(SyntaxNode::Raw(unescape_raw(raw)))
}
+ Token::Code { lang, raw, terminated } => {
+ if !terminated {
+ error!(
+ @self.feedback, Span::at(token.span.end),
+ "expected backticks",
+ );
+ }
+
+ let lang = lang.and_then(|lang| {
+ if let Some(ident) = Ident::new(lang.v) {
+ Some(Spanned::new(ident, lang.span))
+ } else {
+ error!(@self.feedback, lang.span, "invalid identifier");
+ None
+ }
+ });
+
+ let mut lines = unescape_code(raw);
+ let block = lines.len() > 1;
+
+ if lines.last().map(|s| s.is_empty()).unwrap_or(false) {
+ lines.pop();
+ }
+
+ self.with_span(SyntaxNode::Code(Code { lang, lines, block }))
+ }
+
Token::Text(text) => {
self.with_span(SyntaxNode::Text(text.to_string()))
}
@@ -589,17 +618,100 @@ fn unescape_string(string: &str) -> String {
/// Unescape raw markup and split it into into lines.
fn unescape_raw(raw: &str) -> Vec<String> {
let mut iter = raw.chars().peekable();
- let mut line = String::new();
- let mut lines = Vec::new();
+ let mut text = String::new();
while let Some(c) = iter.next() {
if c == '\\' {
- match iter.next() {
- Some('`') => line.push('`'),
- Some(c) => { line.push('\\'); line.push(c); }
- None => line.push('\\'),
+ if let Some(c) = iter.next() {
+ if c != '\\' && c != '`' {
+ text.push('\\');
+ }
+
+ text.push(c);
+ } else {
+ text.push('\\');
}
- } else if is_newline_char(c) {
+ } else {
+ text.push(c);
+ }
+ }
+
+ split_lines(&text)
+}
+
+/// Unescape raw markup and split it into into lines.
+fn unescape_code(raw: &str) -> Vec<String> {
+ let mut iter = raw.chars().peekable();
+ let mut text = String::new();
+ let mut backticks = 0u32;
+ let mut update_backtick_count;
+
+ while let Some(c) = iter.next() {
+ update_backtick_count = true;
+
+ if c == '\\' && backticks > 0 {
+ let mut tail = String::new();
+ let mut escape_success = false;
+ let mut backticks_after_slash = 0u32;
+
+ while let Some(&s) = iter.peek() {
+ match s {
+ '\\' => {
+ if backticks_after_slash == 0 {
+ tail.push('\\');
+ } else {
+ // Pattern like `\`\` should fail
+ // escape and just be printed verbantim.
+ break;
+ }
+ }
+ '`' => {
+ tail.push(s);
+ backticks_after_slash += 1;
+ if backticks_after_slash == 2 {
+ escape_success = true;
+ iter.next();
+ break;
+ }
+ }
+ _ => break,
+ }
+
+ iter.next();
+ }
+
+ if !escape_success {
+ text.push(c);
+ backticks = backticks_after_slash;
+ update_backtick_count = false;
+ } else {
+ backticks = 0;
+ }
+
+ text.push_str(&tail);
+ } else {
+ text.push(c);
+ }
+
+ if update_backtick_count {
+ if c == '`' {
+ backticks += 1;
+ } else {
+ backticks = 0;
+ }
+ }
+ }
+
+ split_lines(&text)
+}
+
+fn split_lines(text: &str) -> Vec<String> {
+ let mut iter = text.chars().peekable();
+ let mut line = String::new();
+ let mut lines = Vec::new();
+
+ while let Some(c) = iter.next() {
+ if is_newline_char(c) {
if c == '\r' && iter.peek() == Some(&'\n') {
iter.next();
}
@@ -640,6 +752,25 @@ mod tests {
};
}
+ macro_rules! C {
+ (None, $($line:expr),* $(,)?) => {{
+ let lines = vec![$($line.to_string()) ,*];
+ SyntaxNode::Code(Code {
+ lang: None,
+ block: lines.len() > 1,
+ lines,
+ })
+ }};
+ (Some($lang:expr), $($line:expr),* $(,)?) => {{
+ let lines = vec![$($line.to_string()) ,*];
+ SyntaxNode::Code(Code {
+ lang: Some(Into::<Spanned<&str>>::into($lang).map(|s| Ident(s.to_string()))),
+ block: lines.len() > 1,
+ lines,
+ })
+ }};
+ }
+
macro_rules! F {
($($tts:tt)*) => { SyntaxNode::Call(Call!(@$($tts)*)) }
}
@@ -774,6 +905,7 @@ mod tests {
}
test("raw\\`", vec!["raw`"]);
+ test("raw\\\\`", vec!["raw\\`"]);
test("raw\ntext", vec!["raw", "text"]);
test("a\r\nb", vec!["a", "b"]);
test("a\n\nb", vec!["a", "", "b"]);
@@ -784,6 +916,28 @@ mod tests {
}
#[test]
+ fn test_unescape_code() {
+ fn test(raw: &str, expected: Vec<&str>) {
+ assert_eq!(unescape_code(raw), expected);
+ }
+
+ test("code\\`", vec!["code\\`"]);
+ test("code`\\``", vec!["code```"]);
+ test("code`\\`a", vec!["code`\\`a"]);
+ test("code``hi`\\``", vec!["code``hi```"]);
+ test("code`\\\\``", vec!["code`\\``"]);
+ test("code`\\`\\`go", vec!["code`\\`\\`go"]);
+ test("code`\\`\\``", vec!["code`\\```"]);
+ test("code\ntext", vec!["code", "text"]);
+ test("a\r\nb", vec!["a", "b"]);
+ test("a\n\nb", vec!["a", "", "b"]);
+ test("a\r\x0Bb", vec!["a", "", "b"]);
+ test("a\r\n\r\nb", vec!["a", "", "b"]);
+ test("code\\a", vec!["code\\a"]);
+ test("code\\", vec!["code\\"]);
+ }
+
+ #[test]
fn test_parse_simple_nodes() {
t!("" => );
t!("hi" => T("hi"));
@@ -797,6 +951,19 @@ mod tests {
e!("`hi\nyou" => s(1,3, 1,3, "expected backtick"));
t!("`hi\\`du`" => R!["hi`du"]);
+ t!("```java System.out.print```" => C![
+ Some("java"), "System.out.print"
+ ]);
+ t!("``` console.log(\n\"alert\"\n)" => C![
+ None, "console.log(", "\"alert\"", ")"
+ ]);
+ t!("```typst \r\n Typst uses `\\`` to indicate code blocks" => C![
+ Some("typst"), " Typst uses ``` to indicate code blocks"
+ ]);
+ e!("``` hi\nyou" => s(1,3, 1,3, "expected backticks"));
+ e!("```🌍 hi\nyou```" => s(0,3, 0,4, "invalid identifier"));
+ t!("💜\n\n 🌍" => T("💜"), P, T("🌍"));
+
ts!("hi" => s(0,0, 0,2, T("hi")));
ts!("*Hi*" => s(0,0, 0,1, B), s(0,1, 0,3, T("Hi")), s(0,3, 0,4, B));
ts!("💜\n\n 🌍" => s(0,0, 0,1, T("💜")), s(0,1, 2,1, P), s(2,1, 2,2, T("🌍")));
diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs
index 1dcf9022..7ecb05fe 100644
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@@ -90,6 +90,16 @@ pub enum Token<'s> {
terminated: bool,
},
+ /// Multi-line code block.
+ Code {
+ /// The language of the code block, if specified.
+ lang: Option<Spanned<&'s str>>,
+ /// The raw text (not yet unescaped as for strings).
+ raw: &'s str,
+ /// Whether the closing backticks were present.
+ terminated: bool,
+ },
+
/// Any other consecutive string.
Text(&'s str),
@@ -127,6 +137,7 @@ impl<'s> Token<'s> {
Underscore => "underscore",
Backslash => "backslash",
Raw { .. } => "raw text",
+ Code { .. } => "code block",
Text(_) => "text",
Invalid("*/") => "end of block comment",
Invalid(_) => "invalid token",
@@ -241,7 +252,7 @@ impl<'s> Iterator for Tokens<'s> {
// Style toggles.
'_' if self.mode == Body => Underscore,
- '`' if self.mode == Body => self.read_raw(),
+ '`' if self.mode == Body => self.read_raw_or_code(),
// An escaped thing.
'\\' if self.mode == Body => self.read_escaped(),
@@ -330,9 +341,67 @@ impl<'s> Tokens<'s> {
Str { string, terminated }
}
- fn read_raw(&mut self) -> Token<'s> {
+ fn read_raw_or_code(&mut self) -> Token<'s> {
let (raw, terminated) = self.read_until_unescaped('`');
- Raw { raw, terminated }
+ if raw.is_empty() && terminated && self.peek() == Some('`') {
+ // Third tick found; this is a code block.
+ self.eat();
+
+ // Reads the lang tag (until newline or whitespace).
+ let start = self.pos();
+ let lang = self.read_string_until(
+ |c| c == '`' || c.is_whitespace() || is_newline_char(c),
+ false, 0, 0,
+ ).0;
+ let end = self.pos();
+ let lang = if !lang.is_empty() {
+ Some(Spanned::new(lang, Span::new(start, end)))
+ } else {
+ None
+ };
+
+ // Skip to start of raw contents.
+ while let Some(c) = self.peek() {
+ if is_newline_char(c) {
+ self.eat();
+ if c == '\r' && self.peek() == Some('\n') {
+ self.eat();
+ }
+
+ break;
+ } else if c.is_whitespace() {
+ self.eat();
+ } else {
+ break;
+ }
+ }
+
+ let start = self.index();
+ let mut backticks = 0u32;
+
+ while backticks < 3 {
+ match self.eat() {
+ Some('`') => backticks += 1,
+ // Escaping of triple backticks.
+ Some('\\') if backticks == 1 && self.peek() == Some('`') => {
+ backticks = 0;
+ }
+ Some(_) => {}
+ None => break,
+ }
+ }
+
+ let terminated = backticks == 3;
+ let end = self.index() - if terminated { 3 } else { 0 };
+
+ Code {
+ lang,
+ raw: &self.src[start..end],
+ terminated
+ }
+ } else {
+ Raw { raw, terminated }
+ }
}
fn read_until_unescaped(&mut self, c: char) -> (&'s str, bool) {
@@ -494,6 +563,7 @@ mod tests {
use crate::length::Length;
use crate::syntax::tests::*;
use super::*;
+ use super::super::span::Spanned;
use Token::{
Space as S,
LineComment as LC, BlockComment as BC,
@@ -515,6 +585,9 @@ mod tests {
fn Str(string: &str, terminated: bool) -> Token { Token::Str { string, terminated } }
fn Raw(raw: &str, terminated: bool) -> Token { Token::Raw { raw, terminated } }
+ fn Code<'a>(lang: Option<&'a str>, raw: &'a str, terminated: bool) -> Token<'a> {
+ Token::Code { lang: lang.map(Spanned::zero), raw, terminated }
+ }
macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} }
macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} }
@@ -568,6 +641,10 @@ mod tests {
t!(Body, "`[func]`" => Raw("[func]", true));
t!(Body, "`]" => Raw("]", false));
t!(Body, "`\\``" => Raw("\\`", true));
+ t!(Body, "``not code`" => Raw("", true), T("not"), S(0), T("code"), Raw("", false));
+ t!(Body, "```rust hi```" => Code(Some("rust"), "hi", true));
+ t!(Body, "``` hi`\\``" => Code(None, "hi`\\``", false));
+ t!(Body, "```js \r\n document.write(\"go\")" => Code(Some("js"), " document.write(\"go\")", false));
t!(Body, "\\ " => Backslash, S(0));
t!(Header, "_`" => Invalid("_`"));
}
diff --git a/src/syntax/tree.rs b/src/syntax/tree.rs
index 31f334d2..44acd023 100644
--- a/src/syntax/tree.rs
+++ b/src/syntax/tree.rs
@@ -33,6 +33,8 @@ pub enum SyntaxNode {
Text(String),
/// Lines of raw text.
Raw(Vec<String>),
+ /// An optionally highlighted (multi-line) code block.
+ Code(Code),
/// A function call.
Call(CallExpr),
}
@@ -199,3 +201,10 @@ impl CallExpr {
}
}
}
+/// A code block.
+#[derive(Debug, Clone, PartialEq)]
+pub struct Code {
+ pub lang: Option<Spanned<Ident>>,
+ pub lines: Vec<String>,
+ pub block: bool,
+}