summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMartin Haug <mhaug@live.de>2020-08-29 17:20:04 +0200
committerMartin Haug <mhaug@live.de>2020-08-29 17:20:04 +0200
commitd68367f32a9e698923b554984c59f0671e27ba5f (patch)
tree0899e1cc799fff1aedec8a19e63170a671cf969f /src
parent1eb584e256a3ce780029c7ab55c9e5891d05df3a (diff)
Newlines are complicated, y'all 😱
Co-authored-by: laurmaedje@outlook.de <laurmaedje@outlook.de>
Diffstat (limited to 'src')
-rw-r--r--src/layout/tree.rs35
-rw-r--r--src/syntax/parsing.rs222
-rw-r--r--src/syntax/tokens.rs87
-rw-r--r--src/syntax/tree.rs11
4 files changed, 184 insertions, 171 deletions
diff --git a/src/layout/tree.rs b/src/layout/tree.rs
index 714cfe27..16a2930a 100644
--- a/src/layout/tree.rs
+++ b/src/layout/tree.rs
@@ -3,7 +3,7 @@
use crate::style::LayoutStyle;
use crate::syntax::decoration::Decoration;
use crate::syntax::span::{Span, Spanned};
-use crate::syntax::tree::{CallExpr, SyntaxNode, SyntaxTree, CodeBlockExpr};
+use crate::syntax::tree::{CallExpr, SyntaxNode, SyntaxTree, Code};
use crate::{DynFuture, Feedback, Pass};
use super::line::{LineContext, LineLayouter};
use super::text::{layout_text, TextContext};
@@ -63,10 +63,7 @@ impl<'a> TreeLayouter<'a> {
match &node.v {
SyntaxNode::Spacing => self.layout_space(),
SyntaxNode::Linebreak => self.layouter.finish_line(),
- SyntaxNode::Parbreak => self.layouter.add_secondary_spacing(
- self.style.text.paragraph_spacing(),
- SpacingKind::PARAGRAPH,
- ),
+ SyntaxNode::Parbreak => self.layout_parbreak(),
SyntaxNode::ToggleItalic => {
self.style.text.italic = !self.style.text.italic;
@@ -84,7 +81,7 @@ impl<'a> TreeLayouter<'a> {
}
SyntaxNode::Raw(lines) => self.layout_raw(lines).await,
- SyntaxNode::CodeBlock(block) => self.layout_code(block).await,
+ SyntaxNode::Code(block) => self.layout_code(block).await,
SyntaxNode::Call(call) => {
self.layout_call(Spanned::new(call, node.span)).await;
@@ -99,6 +96,13 @@ impl<'a> TreeLayouter<'a> {
);
}
+ fn layout_parbreak(&mut self) {
+ self.layouter.add_secondary_spacing(
+ self.style.text.paragraph_spacing(),
+ SpacingKind::PARAGRAPH,
+ );
+ }
+
async fn layout_text(&mut self, text: &str) {
self.layouter.add(
layout_text(
@@ -133,19 +137,16 @@ impl<'a> TreeLayouter<'a> {
self.style.text.fallback = fallback;
}
- async fn layout_code(&mut self, block: &CodeBlockExpr) {
- let fallback = self.style.text.fallback.clone();
- self.style.text.fallback
- .list_mut()
- .insert(0, "monospace".to_string());
- self.style.text.fallback.flatten();
-
- for line in &block.raw {
- self.layout_text(line).await;
- self.layouter.finish_line();
+ async fn layout_code(&mut self, code: &Code) {
+ if code.block {
+ self.layout_parbreak();
}
- self.style.text.fallback = fallback;
+ self.layout_raw(&code.lines).await;
+
+ if code.block {
+ self.layout_parbreak()
+ }
}
async fn layout_call(&mut self, call: Spanned<&CallExpr>) {
diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs
index 76509fae..0d12f6e1 100644
--- a/src/syntax/parsing.rs
+++ b/src/syntax/parsing.rs
@@ -7,14 +7,9 @@ use crate::color::RgbaColor;
use crate::compute::table::SpannedEntry;
use super::decoration::Decoration;
use super::span::{Pos, Span, Spanned};
-use super::tokens::{is_newline_char, Token, TokenMode, Tokens, is_identifier};
+use super::tokens::{is_newline_char, Token, TokenMode, Tokens};
use super::tree::{
- CallExpr,
- Expr,
- SyntaxNode,
- SyntaxTree,
- TableExpr,
- CodeBlockExpr,
+ CallExpr, Expr, SyntaxNode, SyntaxTree, TableExpr, Code,
};
use super::Ident;
@@ -88,28 +83,27 @@ impl Parser<'_> {
if !terminated {
error!(
@self.feedback, Span::at(token.span.end),
- "expected code block to close",
+ "expected backticks",
);
}
- let mut valid_ident = false;
- let mut lang = lang.map(|s| s.map(|v| {
- if is_identifier(v) {
- valid_ident = true;
- }
- Ident(v.to_string())
- }));
-
- if !valid_ident {
- if let Some(l) = lang {
- error!(
- @self.feedback, l.span,
- "expected language to be a valid identifier",
- );
+
+ let lang = lang.and_then(|lang| {
+ if let Some(ident) = Ident::new(lang.v) {
+ Some(Spanned::new(ident, lang.span))
+ } else {
+ error!(@self.feedback, lang.span, "invalid identifier");
+ None
}
- lang = None;
+ });
+
+ let mut lines = unescape_code(raw);
+ let block = lines.len() > 1;
+
+ if lines.last().map(|s| s.is_empty()).unwrap_or(false) {
+ lines.pop();
}
- self.with_span(SyntaxNode::CodeBlock(CodeBlockExpr { raw: unescape_code(raw), lang }))
+ self.with_span(SyntaxNode::Code(Code { lang, lines, block }))
}
Token::Text(text) => {
@@ -624,102 +618,107 @@ fn unescape_string(string: &str) -> String {
/// Unescape raw markup and split it into into lines.
fn unescape_raw(raw: &str) -> Vec<String> {
let mut iter = raw.chars().peekable();
- let mut line = String::new();
- let mut lines = Vec::new();
+ let mut text = String::new();
while let Some(c) = iter.next() {
if c == '\\' {
- match iter.next() {
- Some('`') => line.push('`'),
- Some(c) => { line.push('\\'); line.push(c); }
- None => line.push('\\'),
- }
- } else if is_newline_char(c) {
- if c == '\r' && iter.peek() == Some(&'\n') {
- iter.next();
- }
+ if let Some(c) = iter.next() {
+ if c != '\\' && c != '`' {
+ text.push('\\');
+ }
- lines.push(std::mem::take(&mut line));
+ text.push(c);
+ } else {
+ text.push('\\');
+ }
} else {
- line.push(c);
+ text.push(c);
}
}
- lines.push(line);
- lines
+ split_lines(&text)
}
/// Unescape raw markup and split it into into lines.
fn unescape_code(raw: &str) -> Vec<String> {
let mut iter = raw.chars().peekable();
- let mut line = String::new();
- let mut lines = Vec::new();
- let mut backticks: usize = 0;
-
- // This assignment is used in line 731, 733;
- // the compiler does not want to acknowledge that, however.
- #[allow(unused_assignments)]
- let mut update_backtick_count = true;
+ let mut text = String::new();
+ let mut backticks = 0u32;
+ let mut update_backtick_count;
while let Some(c) = iter.next() {
update_backtick_count = true;
- if is_newline_char(c) {
- if c == '\r' && iter.peek() == Some(&'\n') {
- iter.next();
- }
- lines.push(std::mem::take(&mut line));
- } else {
- if c == '\\' && backticks > 0 {
- let mut tail = String::new();
- let mut escape_success = false;
-
- let mut backticks_after_slash: u8 = 0;
-
- while let Some(&s) = iter.peek() {
- match s {
- '\\' => {
- if backticks_after_slash == 0 {
- tail.push(s);
- } else {
- // Pattern like `\`\` should fail
- // escape and just be printed verbantim.
- break;
- }
+ if c == '\\' && backticks > 0 {
+ let mut tail = String::new();
+ let mut escape_success = false;
+ let mut backticks_after_slash = 0u32;
+
+ while let Some(&s) = iter.peek() {
+ match s {
+ '\\' => {
+ if backticks_after_slash == 0 {
+ tail.push('\\');
+ } else {
+ // Pattern like `\`\` should fail
+ // escape and just be printed verbantim.
+ break;
}
- '`' => {
- tail.push(s);
- backticks_after_slash += 1;
- if backticks_after_slash == 2 {
- escape_success = true;
- iter.next();
- break;
- }
+ }
+ '`' => {
+ tail.push(s);
+ backticks_after_slash += 1;
+ if backticks_after_slash == 2 {
+ escape_success = true;
+ iter.next();
+ break;
}
- _ => { break }
}
-
- iter.next();
+ _ => break,
}
- if !escape_success {
- line.push(c);
- backticks = backticks_after_slash as usize;
- update_backtick_count = false;
- } else {
- backticks = 0;
- }
+ iter.next();
+ }
- line.push_str(&tail);
+ if !escape_success {
+ text.push(c);
+ backticks = backticks_after_slash;
+ update_backtick_count = false;
} else {
- line.push(c);
+ backticks = 0;
}
+
+ text.push_str(&tail);
+ } else {
+ text.push(c);
}
- if update_backtick_count && c == '`' {
- backticks += 1;
- } else if update_backtick_count {
- backticks = 0;
+ if update_backtick_count {
+ if c == '`' {
+ backticks += 1;
+ } else {
+ backticks = 0;
+ }
+ }
+ }
+
+ split_lines(&text)
+}
+
+fn split_lines(text: &str) -> Vec<String> {
+ let mut iter = text.chars().peekable();
+ let mut line = String::new();
+ let mut lines = Vec::new();
+
+ while let Some(c) = iter.next() {
+ if is_newline_char(c) {
+ if c == '\r' && iter.peek() == Some(&'\n') {
+ iter.next();
+ }
+
+ lines.push(std::mem::take(&mut line));
+ } else {
+ line.push(c);
}
}
@@ -753,13 +752,23 @@ mod tests {
};
}
-
- fn Lang(text: &str) -> Option<Spanned<Ident>> { Some(Spanned::zero(Ident(text.to_string()))) }
-
macro_rules! C {
- ($lang:expr, $($line:expr),* $(,)?) => {
- SyntaxNode::CodeBlock(CodeBlockExpr { raw: vec![$($line.to_string()) ,*], lang: $lang })
- };
+ (None, $($line:expr),* $(,)?) => {{
+ let lines = vec![$($line.to_string()) ,*];
+ SyntaxNode::Code(Code {
+ lang: None,
+ block: lines.len() > 1,
+ lines,
+ })
+ }};
+ (Some($lang:expr), $($line:expr),* $(,)?) => {{
+ let lines = vec![$($line.to_string()) ,*];
+ SyntaxNode::Code(Code {
+ lang: Some(Into::<Spanned<&str>>::into($lang).map(|s| Ident(s.to_string()))),
+ block: lines.len() > 1,
+ lines,
+ })
+ }};
}
macro_rules! F {
@@ -896,6 +905,7 @@ mod tests {
}
test("raw\\`", vec!["raw`"]);
+ test("raw\\\\`", vec!["raw\\`"]);
test("raw\ntext", vec!["raw", "text"]);
test("a\r\nb", vec!["a", "b"]);
test("a\n\nb", vec!["a", "", "b"]);
@@ -942,16 +952,16 @@ mod tests {
t!("`hi\\`du`" => R!["hi`du"]);
t!("```java System.out.print```" => C![
- Lang("java"), "System.out.print"
- ]);
+ Some("java"), "System.out.print"
+ ]);
t!("``` console.log(\n\"alert\"\n)" => C![
None, "console.log(", "\"alert\"", ")"
- ]);
+ ]);
t!("```typst \r\n Typst uses `\\`` to indicate code blocks" => C![
- Lang("typst"), " Typst uses ``` to indicate code blocks"
- ]);
- e!("``` hi\nyou" => s(1,3, 1,3, "expected code block to close"));
- e!("```🌍 hi\nyou```" => s(0,3, 0,4, "expected language to be a valid identifier"));
+ Some("typst"), " Typst uses ``` to indicate code blocks"
+ ]);
+ e!("``` hi\nyou" => s(1,3, 1,3, "expected backticks"));
+ e!("```🌍 hi\nyou```" => s(0,3, 0,4, "invalid identifier"));
t!("💜\n\n 🌍" => T("💜"), P, T("🌍"));
ts!("hi" => s(0,0, 0,2, T("hi")));
diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs
index dbba175d..7ecb05fe 100644
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@@ -252,7 +252,7 @@ impl<'s> Iterator for Tokens<'s> {
// Style toggles.
'_' if self.mode == Body => Underscore,
- '`' if self.mode == Body => self.read_raw_and_code(),
+ '`' if self.mode == Body => self.read_raw_or_code(),
// An escaped thing.
'\\' if self.mode == Body => self.read_escaped(),
@@ -341,66 +341,67 @@ impl<'s> Tokens<'s> {
Str { string, terminated }
}
- fn read_raw_and_code(&mut self) -> Token<'s> {
+ fn read_raw_or_code(&mut self) -> Token<'s> {
let (raw, terminated) = self.read_until_unescaped('`');
- if raw.len() == 0 && terminated && self.peek() == Some('`') {
- // Third tick found; this is a code block
+ if raw.is_empty() && terminated && self.peek() == Some('`') {
+ // Third tick found; this is a code block.
self.eat();
- let mut backticks = 0;
- let mut terminated = true;
- // Reads the lang tag (until newline or whitespace)
- let lang_start = self.pos();
- let (lang_opt, _) = self.read_string_until(
+
+ // Reads the lang tag (until newline or whitespace).
+ let start = self.pos();
+ let lang = self.read_string_until(
|c| c == '`' || c.is_whitespace() || is_newline_char(c),
- false, 0, 0);
- let lang_end = self.pos();
+ false, 0, 0,
+ ).0;
+ let end = self.pos();
+ let lang = if !lang.is_empty() {
+ Some(Spanned::new(lang, Span::new(start, end)))
+ } else {
+ None
+ };
+
+ // Skip to start of raw contents.
+ while let Some(c) = self.peek() {
+ if is_newline_char(c) {
+ self.eat();
+ if c == '\r' && self.peek() == Some('\n') {
+ self.eat();
+ }
+
+ break;
+ } else if c.is_whitespace() {
+ self.eat();
+ } else {
+ break;
+ }
+ }
- #[derive(Debug, PartialEq)]
- enum WhitespaceIngestion { All, ExceptNewline, Never }
- let mut ingest_whitespace = WhitespaceIngestion::Never;
- let mut start = self.index();
+ let start = self.index();
+ let mut backticks = 0u32;
while backticks < 3 {
match self.eat() {
Some('`') => backticks += 1,
+ // Escaping of triple backticks.
Some('\\') if backticks == 1 && self.peek() == Some('`') => {
backticks = 0;
}
- Some(c) => {
- // Remove whitespace between language and content or
- // first line break, deal with CRLF and CR line endings.
- if ingest_whitespace != WhitespaceIngestion::All
- && c == '\n' {
- start += 1;
- ingest_whitespace = WhitespaceIngestion::All;
- } else if ingest_whitespace != WhitespaceIngestion::All
- && c == '\r' {
- start += 1;
- ingest_whitespace = WhitespaceIngestion::ExceptNewline;
- } else if ingest_whitespace == WhitespaceIngestion::Never
- && c.is_whitespace() {
- start += 1;
- } else {
- ingest_whitespace = WhitespaceIngestion::All;
- }
- }
- None => {
- terminated = false;
- break;
- }
+ Some(_) => {}
+ None => break,
}
}
- let end = self.index() - (if terminated { 3 } else { 0 });
- return Code {
- lang: if lang_opt.len() == 0 { None } else {
- Some(Spanned::new(lang_opt, Span::new(lang_start, lang_end)))
- },
+ let terminated = backticks == 3;
+ let end = self.index() - if terminated { 3 } else { 0 };
+
+ Code {
+ lang,
raw: &self.src[start..end],
terminated
}
+ } else {
+ Raw { raw, terminated }
}
- Raw { raw, terminated }
}
fn read_until_unescaped(&mut self, c: char) -> (&'s str, bool) {
diff --git a/src/syntax/tree.rs b/src/syntax/tree.rs
index 313e76a4..44acd023 100644
--- a/src/syntax/tree.rs
+++ b/src/syntax/tree.rs
@@ -33,8 +33,8 @@ pub enum SyntaxNode {
Text(String),
/// Lines of raw text.
Raw(Vec<String>),
- /// An optionally highlighted multi-line code block.
- CodeBlock(CodeBlockExpr),
+ /// An optionally highlighted (multi-line) code block.
+ Code(Code),
/// A function call.
Call(CallExpr),
}
@@ -201,9 +201,10 @@ impl CallExpr {
}
}
}
-/// An code block.
+/// A code block.
#[derive(Debug, Clone, PartialEq)]
-pub struct CodeBlockExpr {
+pub struct Code {
pub lang: Option<Spanned<Ident>>,
- pub raw: Vec<String>,
+ pub lines: Vec<String>,
+ pub block: bool,
}