summaryrefslogtreecommitdiff
path: root/src/syntax/node.rs
blob: c94ee5b0111daf0210fe1f7f5d7650116c21acb1 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
use super::*;

/// A syntax node, encompassing a single logical entity of parsed source code.
#[derive(Debug, Clone, PartialEq)]
pub enum Node {
    /// Strong text was enabled / disabled.
    Strong,
    /// Emphasized text was enabled / disabled.
    Emph,
    /// Whitespace containing less than two newlines.
    Space,
    /// A forced line break.
    Linebreak,
    /// A paragraph break.
    Parbreak,
    /// Plain text.
    Text(String),
    /// A section heading.
    Heading(HeadingNode),
    /// An optionally syntax-highlighted raw block.
    Raw(RawNode),
    /// An expression.
    Expr(Expr),
}

/// A section heading: `= Introduction`.
#[derive(Debug, Clone, PartialEq)]
pub struct HeadingNode {
    /// The section depth (numer of equals signs minus 1).
    pub level: usize,
    /// The contents of the heading.
    pub contents: Tree,
}

/// A raw block with optional syntax highlighting: `` `...` ``.
///
/// Raw blocks start with 1 or 3+ backticks and end with the same number of
/// backticks.
///
/// When using at least three backticks, an optional language tag may follow
/// directly after the backticks. This tag defines which language to
/// syntax-highlight the text in. Apart from the language tag and some
/// whitespace trimming discussed below, everything inside a raw block is
/// rendered verbatim, in particular, there are no escape sequences.
///
/// # Examples
/// - Raw text is surrounded by backticks.
///   ```typst
///   `raw`
///   ```
/// - An optional language tag may follow directly at the start when the block
///   is surrounded by at least three backticks.
///   ````typst
///   ```rust println!("hello!")```;
///   ````
/// - Blocks can span multiple lines.
///   ````typst
///   ```rust
///   loop {
///      find_yak().shave();
///   }
///   ```
///   ````
/// - Start with a space to omit the language tag (the space will be trimmed
///   from the output).
///   `````typst
///   ```` This has no leading space.````
///   `````
/// - Use more backticks to allow backticks in the raw text.
///   `````typst
///   ```` This contains ```backticks```.````
///   `````
///
/// # Trimming
/// If we would always render the raw text between the backticks exactly as
/// given, some things would become cumbersome/impossible to write:
/// - Typical multiline code blocks (like in the example above) would have an
///   additional newline before and after the code.
/// - Multi-line blocks would need to start with a space since a word would be
///   interpreted as a language tag.
/// - Text ending with a backtick would be impossible since the backtick would
///   be interpreted as belonging to the closing backticks.
///
/// To fix these problems, we sometimes trim a bit of space from blocks with 3+
/// backticks:
/// - At the start, we trim a single space or a sequence of whitespace followed
///   by a newline.
/// - At the end, we trim
///   - a single space if the raw text ends with a backtick followed only by
///     whitespace,
///   - a newline followed by a sequence of whitespace.
///
/// You can thus produce a single backtick without surrounding spaces with the
/// sequence ```` ``` ` ``` ````.
///
/// Note that with these rules you can always force leading or trailing
/// whitespace simply by adding more spaces.
#[derive(Debug, Clone, PartialEq)]
pub struct RawNode {
    /// An optional identifier specifying the language to syntax-highlight in.
    pub lang: Option<Ident>,
    /// The lines of raw text, determined as the raw string between the
    /// backticks trimmed according to the above rules and split at newlines.
    pub lines: Vec<String>,
    /// Whether the element is block-level, that is, it has 3+ backticks
    /// and contains at least one newline.
    pub block: bool,
}