diff options
| author | Laurenz <laurmaedje@gmail.com> | 2020-09-30 18:59:33 +0200 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2020-09-30 18:59:33 +0200 |
| commit | 4077a7c11ea19b1b6b6b6fe3014b9018846cf21b (patch) | |
| tree | 70e4c891c2c660b4136890cebbae7c375fe36c05 /src/syntax | |
| parent | 7cc279f7ae122f4c40592004dde89792c636b3c8 (diff) | |
Refactor raw blocks 💱
Diffstat (limited to 'src/syntax')
| -rw-r--r-- | src/syntax/span.rs | 6 | ||||
| -rw-r--r-- | src/syntax/token.rs | 22 | ||||
| -rw-r--r-- | src/syntax/tree.rs | 93 |
3 files changed, 93 insertions, 28 deletions
diff --git a/src/syntax/span.rs b/src/syntax/span.rs index 1bd14c65..d803eeeb 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -189,6 +189,12 @@ impl From<u32> for Pos { } } +impl From<usize> for Pos { + fn from(index: usize) -> Self { + Self(index as u32) + } +} + impl Offset for Pos { fn offset(self, by: Self) -> Self { Pos(self.0 + by.0) diff --git a/src/syntax/token.rs b/src/syntax/token.rs index e91a780c..b7d4c4e2 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -1,6 +1,5 @@ //! Tokenization. -use super::span::Spanned; use crate::length::Length; /// A minimal semantic entity of source code. @@ -86,21 +85,13 @@ pub enum Token<'s> { terminated: bool, }, - /// Raw text. + /// Raw block. Raw { - /// The raw text (not yet unescaped as for strings). + /// The raw text between the backticks. raw: &'s str, - /// Whether the closing backtick was present. - terminated: bool, - }, - - /// Multi-line code block. - Code { - /// The language of the code block, if specified. - lang: Option<Spanned<&'s str>>, - /// The raw text (not yet unescaped as for strings). - raw: &'s str, - /// Whether the closing backticks were present. + /// The number of opening backticks. + backticks: usize, + /// Whether all closing backticks were present. terminated: bool, }, @@ -142,8 +133,7 @@ impl<'s> Token<'s> { Self::Backslash => "backslash", Self::Hashtag => "hashtag", Self::UnicodeEscape { .. } => "unicode escape sequence", - Self::Raw { .. } => "raw text", - Self::Code { .. } => "code block", + Self::Raw { .. } => "raw block", Self::Text(_) => "text", Self::Invalid("*/") => "end of block comment", Self::Invalid(_) => "invalid token", diff --git a/src/syntax/tree.rs b/src/syntax/tree.rs index 5327bfa4..51a7937a 100644 --- a/src/syntax/tree.rs +++ b/src/syntax/tree.rs @@ -31,16 +31,93 @@ pub enum SyntaxNode { ToggleBolder, /// Plain text. Text(String), + /// An optionally syntax-highlighted raw block. + Raw(Raw), /// Section headings. Heading(Heading), - /// Lines of raw text. - Raw(Vec<String>), - /// An optionally highlighted (multi-line) code block. - Code(Code), /// A function call. Call(CallExpr), } +/// A raw block, rendered in monospace with optional syntax highlighting. +/// +/// Raw blocks start with an arbitrary number of backticks and end with the same +/// number of backticks. If you want to include a sequence of backticks in a raw +/// block, simply surround the block with more backticks. +/// +/// When using at least two backticks, an optional language tag may follow +/// directly after the backticks. This tag defines which language to +/// syntax-highlight the text in. Apart from the language tag and some +/// whitespace trimming discussed below, everything inside a raw block is +/// rendered verbatim, in particular, there are no escape sequences. +/// +/// # Examples +/// - Raw text is surrounded by backticks. +/// ```typst +/// `raw` +/// ``` +/// - An optional language tag may follow directly at the start when the block +/// is surrounded by at least two backticks. +/// ```typst +/// ``rust println!("hello!")``; +/// ``` +/// - Blocks can span multiple lines. Two backticks suffice to be able to +/// specify the language tag, but three are fine, too. +/// ```typst +/// ``rust +/// loop { +/// find_yak().shave(); +/// } +/// `` +/// ``` +/// - Start with a space to omit the language tag (the space will be trimmed +/// from the output) and use more backticks to allow backticks in the raw +/// text. +/// `````typst +/// ```` This contains ```backticks``` and has no leading & trailing spaces. ```` +/// ````` +/// +/// # Trimming +/// If we would always render the raw text between the backticks exactly as +/// given, a few things would become problematic or even impossible: +/// - Typical multiline code blocks (like in the example above) would have an +/// additional newline before and after the code. +/// - Raw text wrapped in more than one backtick could not exist without +/// leading whitespace since the first word would be interpreted as a +/// language tag. +/// - A single backtick without surrounding spaces could not exist as raw text +/// since it would be interpreted as belonging to the opening or closing +/// backticks. +/// +/// To fix these problems, we trim text in multi-backtick blocks as follows: +/// - We trim a single space or a sequence of whitespace followed by a newline +/// at the start. +/// - We trim a single space or a newline followed by a sequence of whitespace +/// at the end. +/// +/// With these rules, a single raw backtick can be produced by the sequence +/// ``` `` ` `` ```, ``` `` unhighlighted text `` ``` has no surrounding +/// spaces and multiline code blocks don't have extra empty lines. Note that +/// you can always force leading or trailing whitespace simply by adding more +/// spaces. +#[derive(Debug, Clone, PartialEq)] +pub struct Raw { + /// An optional identifier specifying the language to syntax-highlight in. + pub lang: Option<Ident>, + /// The lines of raw text, determined as the raw string between the + /// backticks trimmed according to the above rules and split at newlines. + pub lines: Vec<String>, + /// Whether the element can be layouted inline. + /// + /// - When true, it will be layouted integrated within the surrounding + /// paragraph. + /// - When false, it will be separated into its own paragraph. + /// + /// Single-backtick blocks are always inline-level. Multi-backtick blocks + /// are inline-level when they contain no newlines. + pub inline: bool, +} + /// A section heading. #[derive(Debug, Clone, PartialEq)] pub struct Heading { @@ -49,14 +126,6 @@ pub struct Heading { pub tree: SyntaxTree, } -/// A code block. -#[derive(Debug, Clone, PartialEq)] -pub struct Code { - pub lang: Option<Spanned<Ident>>, - pub lines: Vec<String>, - pub block: bool, -} - /// An expression. #[derive(Clone, PartialEq)] pub enum Expr { |
