summaryrefslogtreecommitdiff
path: root/src/syntax
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2020-09-30 18:59:33 +0200
committerLaurenz <laurmaedje@gmail.com>2020-09-30 18:59:33 +0200
commit4077a7c11ea19b1b6b6b6fe3014b9018846cf21b (patch)
tree70e4c891c2c660b4136890cebbae7c375fe36c05 /src/syntax
parent7cc279f7ae122f4c40592004dde89792c636b3c8 (diff)
Refactor raw blocks 💱
Diffstat (limited to 'src/syntax')
-rw-r--r--src/syntax/span.rs6
-rw-r--r--src/syntax/token.rs22
-rw-r--r--src/syntax/tree.rs93
3 files changed, 93 insertions, 28 deletions
diff --git a/src/syntax/span.rs b/src/syntax/span.rs
index 1bd14c65..d803eeeb 100644
--- a/src/syntax/span.rs
+++ b/src/syntax/span.rs
@@ -189,6 +189,12 @@ impl From<u32> for Pos {
}
}
+impl From<usize> for Pos {
+ fn from(index: usize) -> Self {
+ Self(index as u32)
+ }
+}
+
impl Offset for Pos {
fn offset(self, by: Self) -> Self {
Pos(self.0 + by.0)
diff --git a/src/syntax/token.rs b/src/syntax/token.rs
index e91a780c..b7d4c4e2 100644
--- a/src/syntax/token.rs
+++ b/src/syntax/token.rs
@@ -1,6 +1,5 @@
//! Tokenization.
-use super::span::Spanned;
use crate::length::Length;
/// A minimal semantic entity of source code.
@@ -86,21 +85,13 @@ pub enum Token<'s> {
terminated: bool,
},
- /// Raw text.
+ /// Raw block.
Raw {
- /// The raw text (not yet unescaped as for strings).
+ /// The raw text between the backticks.
raw: &'s str,
- /// Whether the closing backtick was present.
- terminated: bool,
- },
-
- /// Multi-line code block.
- Code {
- /// The language of the code block, if specified.
- lang: Option<Spanned<&'s str>>,
- /// The raw text (not yet unescaped as for strings).
- raw: &'s str,
- /// Whether the closing backticks were present.
+ /// The number of opening backticks.
+ backticks: usize,
+ /// Whether all closing backticks were present.
terminated: bool,
},
@@ -142,8 +133,7 @@ impl<'s> Token<'s> {
Self::Backslash => "backslash",
Self::Hashtag => "hashtag",
Self::UnicodeEscape { .. } => "unicode escape sequence",
- Self::Raw { .. } => "raw text",
- Self::Code { .. } => "code block",
+ Self::Raw { .. } => "raw block",
Self::Text(_) => "text",
Self::Invalid("*/") => "end of block comment",
Self::Invalid(_) => "invalid token",
diff --git a/src/syntax/tree.rs b/src/syntax/tree.rs
index 5327bfa4..51a7937a 100644
--- a/src/syntax/tree.rs
+++ b/src/syntax/tree.rs
@@ -31,16 +31,93 @@ pub enum SyntaxNode {
ToggleBolder,
/// Plain text.
Text(String),
+ /// An optionally syntax-highlighted raw block.
+ Raw(Raw),
/// Section headings.
Heading(Heading),
- /// Lines of raw text.
- Raw(Vec<String>),
- /// An optionally highlighted (multi-line) code block.
- Code(Code),
/// A function call.
Call(CallExpr),
}
+/// A raw block, rendered in monospace with optional syntax highlighting.
+///
+/// Raw blocks start with an arbitrary number of backticks and end with the same
+/// number of backticks. If you want to include a sequence of backticks in a raw
+/// block, simply surround the block with more backticks.
+///
+/// When using at least two backticks, an optional language tag may follow
+/// directly after the backticks. This tag defines which language to
+/// syntax-highlight the text in. Apart from the language tag and some
+/// whitespace trimming discussed below, everything inside a raw block is
+/// rendered verbatim, in particular, there are no escape sequences.
+///
+/// # Examples
+/// - Raw text is surrounded by backticks.
+/// ```typst
+/// `raw`
+/// ```
+/// - An optional language tag may follow directly at the start when the block
+/// is surrounded by at least two backticks.
+/// ```typst
+/// ``rust println!("hello!")``;
+/// ```
+/// - Blocks can span multiple lines. Two backticks suffice to be able to
+/// specify the language tag, but three are fine, too.
+/// ```typst
+/// ``rust
+/// loop {
+/// find_yak().shave();
+/// }
+/// ``
+/// ```
+/// - Start with a space to omit the language tag (the space will be trimmed
+/// from the output) and use more backticks to allow backticks in the raw
+/// text.
+/// `````typst
+/// ```` This contains ```backticks``` and has no leading & trailing spaces. ````
+/// `````
+///
+/// # Trimming
+/// If we would always render the raw text between the backticks exactly as
+/// given, a few things would become problematic or even impossible:
+/// - Typical multiline code blocks (like in the example above) would have an
+/// additional newline before and after the code.
+/// - Raw text wrapped in more than one backtick could not exist without
+/// leading whitespace since the first word would be interpreted as a
+/// language tag.
+/// - A single backtick without surrounding spaces could not exist as raw text
+/// since it would be interpreted as belonging to the opening or closing
+/// backticks.
+///
+/// To fix these problems, we trim text in multi-backtick blocks as follows:
+/// - We trim a single space or a sequence of whitespace followed by a newline
+/// at the start.
+/// - We trim a single space or a newline followed by a sequence of whitespace
+/// at the end.
+///
+/// With these rules, a single raw backtick can be produced by the sequence
+/// ``` `` ` `` ```, ``` `` unhighlighted text `` ``` has no surrounding
+/// spaces and multiline code blocks don't have extra empty lines. Note that
+/// you can always force leading or trailing whitespace simply by adding more
+/// spaces.
+#[derive(Debug, Clone, PartialEq)]
+pub struct Raw {
+ /// An optional identifier specifying the language to syntax-highlight in.
+ pub lang: Option<Ident>,
+ /// The lines of raw text, determined as the raw string between the
+ /// backticks trimmed according to the above rules and split at newlines.
+ pub lines: Vec<String>,
+ /// Whether the element can be layouted inline.
+ ///
+ /// - When true, it will be layouted integrated within the surrounding
+ /// paragraph.
+ /// - When false, it will be separated into its own paragraph.
+ ///
+ /// Single-backtick blocks are always inline-level. Multi-backtick blocks
+ /// are inline-level when they contain no newlines.
+ pub inline: bool,
+}
+
/// A section heading.
#[derive(Debug, Clone, PartialEq)]
pub struct Heading {
@@ -49,14 +126,6 @@ pub struct Heading {
pub tree: SyntaxTree,
}
-/// A code block.
-#[derive(Debug, Clone, PartialEq)]
-pub struct Code {
- pub lang: Option<Spanned<Ident>>,
- pub lines: Vec<String>,
- pub block: bool,
-}
-
/// An expression.
#[derive(Clone, PartialEq)]
pub enum Expr {