summaryrefslogtreecommitdiff
path: root/src/syntax/token.rs
blob: 5b055e39f3fadb9104a68e8bfd9d9760a79a507c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
//! Token definition.

use crate::geom::Unit;

/// A minimal semantic entity of source code.
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum Token<'s> {
    /// One or more whitespace characters.
    ///
    /// The contained `usize` denotes the number of newlines that were contained
    /// in the whitespace.
    Space(usize),
    /// A consecutive non-markup string.
    Text(&'s str),

    /// A line comment with inner string contents `//<str>\n`.
    LineComment(&'s str),
    /// A block comment with inner string contents `/*<str>*/`.
    ///
    /// The comment can contain nested block comments.
    BlockComment(&'s str),

    /// A star: `*`.
    Star,
    /// An underscore: `_`.
    Underscore,
    /// A backslash followed by whitespace: `\`.
    Backslash,
    /// A hashtag indicating a section heading: `#`.
    Hashtag,
    /// A non-breaking space: `~`.
    NonBreakingSpace,
    /// A raw block: `` `...` ``.
    Raw(TokenRaw<'s>),
    /// A unicode escape sequence: `\u{1F5FA}`.
    UnicodeEscape(TokenUnicodeEscape<'s>),

    /// A left bracket: `[`.
    LeftBracket,
    /// A right bracket: `]`.
    RightBracket,
    /// A left brace: `{`.
    LeftBrace,
    /// A right brace: `}`.
    RightBrace,
    /// A left parenthesis: `(`.
    LeftParen,
    /// A right parenthesis: `)`.
    RightParen,

    /// A colon: `:`.
    Colon,
    /// A comma: `,`.
    Comma,
    /// An equals sign: `=`.
    Equals,
    /// A double forward chevron: `>>`.
    Chain,
    /// A plus: `+`.
    Plus,
    /// A hyphen: `-`.
    Hyphen,
    /// A slash: `/`.
    Slash,

    /// An identifier: `center`.
    Ident(&'s str),
    /// A boolean: `true`, `false`.
    Bool(bool),
    /// An integer: `120`.
    Int(i64),
    /// A floating-point number: `1.2`, `10e-4`.
    Float(f64),
    /// A length: `12pt`, `3cm`.
    Length(f64, Unit),
    /// A percentage: `50%`.
    ///
    /// _Note_: `50%` is stored as `50.0` here, as in the corresponding
    /// [literal].
    ///
    /// [literal]: ../ast/enum.Lit.html#variant.Percent
    Percent(f64),
    /// A hex value: `#20d82a`.
    Hex(&'s str),
    /// A quoted string: `"..."`.
    Str(TokenStr<'s>),

    /// Things that are not valid in the context they appeared in.
    Invalid(&'s str),
}

/// A quoted string: `"..."`.
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct TokenStr<'s> {
    /// The string inside the quotes.
    ///
    /// _Note_: If the string contains escape sequences these are not yet
    /// applied to be able to just store a string slice here instead of
    /// a `String`. The resolving is done later in the parser.
    pub string: &'s str,
    /// Whether the closing quote was present.
    pub terminated: bool,
}

/// A unicode escape sequence: `\u{1F5FA}`.
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct TokenUnicodeEscape<'s> {
    /// The escape sequence between two braces.
    pub sequence: &'s str,
    /// Whether the closing brace was present.
    pub terminated: bool,
}

/// A raw block: `` `...` ``.
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct TokenRaw<'s> {
    /// The raw text between the backticks.
    pub text: &'s str,
    /// The number of opening backticks.
    pub backticks: usize,
    /// Whether all closing backticks were present.
    pub terminated: bool,
}

impl<'s> Token<'s> {
    /// The natural-language name of this token for use in error messages.
    pub fn name(self) -> &'static str {
        match self {
            Self::Space(_) => "space",
            Self::Text(_) => "text",

            Self::LineComment(_) => "line comment",
            Self::BlockComment(_) => "block comment",

            Self::Star => "star",
            Self::Underscore => "underscore",
            Self::Backslash => "backslash",
            Self::Hashtag => "hashtag",
            Self::NonBreakingSpace => "non-breaking space",
            Self::Raw { .. } => "raw block",
            Self::UnicodeEscape { .. } => "unicode escape sequence",

            Self::LeftBracket => "opening bracket",
            Self::RightBracket => "closing bracket",
            Self::LeftBrace => "opening brace",
            Self::RightBrace => "closing brace",
            Self::LeftParen => "opening paren",
            Self::RightParen => "closing paren",

            Self::Colon => "colon",
            Self::Comma => "comma",
            Self::Equals => "equals sign",
            Self::Chain => "function chaining operator",
            Self::Plus => "plus sign",
            Self::Hyphen => "minus sign",
            Self::Slash => "slash",

            Self::Ident(_) => "identifier",
            Self::Bool(_) => "bool",
            Self::Int(_) => "integer",
            Self::Float(_) => "float",
            Self::Length(..) => "length",
            Self::Percent(_) => "percentage",
            Self::Hex(_) => "hex value",
            Self::Str { .. } => "string",

            Self::Invalid("*/") => "end of block comment",
            Self::Invalid(_) => "invalid token",
        }
    }
}