summaryrefslogtreecommitdiff
path: root/crates/typst-syntax
diff options
context:
space:
mode:
authorPgBiel <9021226+PgBiel@users.noreply.github.com>2024-06-20 22:20:39 -0300
committerPgBiel <9021226+PgBiel@users.noreply.github.com>2024-06-26 12:17:53 -0300
commit9f9e8770b8ea9c637ce4dc14e2e28c50c2d2d1ba (patch)
tree9438dafe14fdc892cfbab5a43637929fafd3a6f0 /crates/typst-syntax
parent3e69560267f3dbe57e3bacd27e62caeee2bd68b6 (diff)
use lexer subtrees to parse decorators
Diffstat (limited to 'crates/typst-syntax')
-rw-r--r--crates/typst-syntax/src/highlight.rs1
-rw-r--r--crates/typst-syntax/src/kind.rs16
-rw-r--r--crates/typst-syntax/src/lexer.rs53
-rw-r--r--crates/typst-syntax/src/parser.rs31
-rw-r--r--crates/typst-syntax/src/set.rs2
5 files changed, 44 insertions, 59 deletions
diff --git a/crates/typst-syntax/src/highlight.rs b/crates/typst-syntax/src/highlight.rs
index 59cad25e..83c15b14 100644
--- a/crates/typst-syntax/src/highlight.rs
+++ b/crates/typst-syntax/src/highlight.rs
@@ -286,7 +286,6 @@ pub fn highlight(node: &LinkedNode) -> Option<Tag> {
SyntaxKind::Destructuring => None,
SyntaxKind::DestructAssignment => None,
- SyntaxKind::DecoratorMarker => Some(Tag::Comment),
SyntaxKind::Decorator => None,
SyntaxKind::LineComment => Some(Tag::Comment),
diff --git a/crates/typst-syntax/src/kind.rs b/crates/typst-syntax/src/kind.rs
index b9ccf45c..50d40960 100644
--- a/crates/typst-syntax/src/kind.rs
+++ b/crates/typst-syntax/src/kind.rs
@@ -13,8 +13,8 @@ pub enum SyntaxKind {
LineComment,
/// A block comment: `/* ... */`.
BlockComment,
- /// A decorator's marker: `/!`
- DecoratorMarker,
+ /// A decorator: `/! allow("warning")`
+ Decorator,
/// The contents of a file or content block.
Markup,
@@ -280,9 +280,6 @@ pub enum SyntaxKind {
Destructuring,
/// A destructuring assignment expression: `(x, y) = (1, 2)`.
DestructAssignment,
-
- /// A decorator: `/! allow("amogus")`
- Decorator,
}
impl SyntaxKind {
@@ -360,7 +357,11 @@ impl SyntaxKind {
pub fn is_trivia(self) -> bool {
matches!(
self,
- Self::LineComment | Self::BlockComment | Self::Space | Self::Parbreak
+ Self::LineComment
+ | Self::BlockComment
+ | Self::Space
+ | Self::Parbreak
+ | Self::Decorator
)
}
@@ -376,7 +377,7 @@ impl SyntaxKind {
Self::Error => "syntax error",
Self::LineComment => "line comment",
Self::BlockComment => "block comment",
- Self::DecoratorMarker => "decorator marker",
+ Self::Decorator => "decorator",
Self::Markup => "markup",
Self::Text => "text",
Self::Space => "space",
@@ -504,7 +505,6 @@ impl SyntaxKind {
Self::FuncReturn => "`return` expression",
Self::Destructuring => "destructuring pattern",
Self::DestructAssignment => "destructuring assignment expression",
- Self::Decorator => "decorator",
}
}
}
diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs
index 05b12570..341484c7 100644
--- a/crates/typst-syntax/src/lexer.rs
+++ b/crates/typst-syntax/src/lexer.rs
@@ -18,8 +18,10 @@ pub(super) struct Lexer<'s> {
newline: bool,
/// The state held by raw line lexing.
raw: Vec<(SyntaxKind, usize)>,
- /// The state held by decorator lexing.
- decorator: Vec<(SyntaxKind, usize)>,
+ /// The subtree of tokens associated with this token.
+ /// The parser is responsible for converting this subtree into syntax nodes
+ /// matching this structure.
+ subtree: Vec<(SyntaxKind, usize)>,
/// An error for the last token.
error: Option<SyntaxError>,
}
@@ -35,8 +37,6 @@ pub(super) enum LexMode {
Code,
/// The contents of a raw block.
Raw,
- /// The contents of a decorator.
- Decorator,
}
impl<'s> Lexer<'s> {
@@ -49,7 +49,7 @@ impl<'s> Lexer<'s> {
newline: false,
error: None,
raw: Vec::new(),
- decorator: Vec::new(),
+ subtree: Vec::new(),
}
}
@@ -113,16 +113,9 @@ impl Lexer<'_> {
return kind;
}
- if self.mode == LexMode::Decorator {
- let Some((kind, end)) = self.decorator.pop() else {
- return SyntaxKind::End;
- };
- self.s.jump(end);
- return kind;
- }
-
self.newline = false;
self.error = None;
+ self.subtree.clear();
let start = self.s.cursor();
match self.s.eat() {
Some(c) if is_space(c, self.mode) => self.whitespace(start, c),
@@ -142,13 +135,17 @@ impl Lexer<'_> {
LexMode::Math => self.math(start, c),
LexMode::Code => self.code(start, c),
LexMode::Raw => unreachable!(),
- LexMode::Decorator => unreachable!(),
},
None => SyntaxKind::End,
}
}
+ /// Takes the subtree associated with the latest token.
+ pub fn take_subtree(&mut self) -> Vec<(SyntaxKind, usize)> {
+ std::mem::take(&mut self.subtree)
+ }
+
/// Eat whitespace characters greedily.
fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind {
let more = self.s.eat_while(|c| is_space(c, self.mode));
@@ -194,15 +191,8 @@ impl Lexer<'_> {
SyntaxKind::BlockComment
}
-}
-/// Decorators.
-impl Lexer<'_> {
fn decorator(&mut self) -> SyntaxKind {
- let start = self.s.cursor() - 2;
-
- self.decorator.clear();
-
while !self.s.eat_newline() {
let start = self.s.cursor();
let token = match self.s.eat() {
@@ -215,25 +205,20 @@ impl Lexer<'_> {
Some(c @ '0'..='9') => self.number(start, c),
Some(',') => SyntaxKind::Comma,
Some(c) if is_id_start(c) => self.ident(start),
- Some(c) => {
- return self.error(eco_format!(
- "the character {c} is not valid in a decorator"
- ))
- }
+ Some(c) => self
+ .error(eco_format!("the character {c} is not valid in a decorator")),
None => break,
};
+ if token.is_error() {
+ return token;
+ }
+
let end = self.s.cursor();
- self.decorator.push((token, end));
+ self.subtree.push((token, end));
}
- // The saved tokens will be removed in reverse.
- self.decorator.reverse();
-
- // Already collected all we need from the decorator.
- self.s.jump(start + 2);
-
- SyntaxKind::DecoratorMarker
+ SyntaxKind::Decorator
}
}
diff --git a/crates/typst-syntax/src/parser.rs b/crates/typst-syntax/src/parser.rs
index 54417e01..e1d4bb95 100644
--- a/crates/typst-syntax/src/parser.rs
+++ b/crates/typst-syntax/src/parser.rs
@@ -108,7 +108,8 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
SyntaxKind::Space
| SyntaxKind::Parbreak
| SyntaxKind::LineComment
- | SyntaxKind::BlockComment => {
+ | SyntaxKind::BlockComment
+ | SyntaxKind::Decorator => {
p.eat();
return;
}
@@ -121,7 +122,6 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
| SyntaxKind::Link
| SyntaxKind::Label => p.eat(),
- SyntaxKind::DecoratorMarker => decorator(p),
SyntaxKind::Hash => embedded_code_expr(p),
SyntaxKind::Star => strong(p),
SyntaxKind::Underscore => emph(p),
@@ -147,19 +147,6 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
*at_start = false;
}
-fn decorator(p: &mut Parser) {
- let m = p.marker();
- p.enter(LexMode::Decorator);
- p.assert(SyntaxKind::DecoratorMarker);
-
- while !p.end() {
- p.eat();
- }
-
- p.exit();
- p.wrap(m, SyntaxKind::Decorator);
-}
-
/// Parses strong content: `*Strong*`.
fn strong(p: &mut Parser) {
const END: SyntaxSet = SyntaxSet::new()
@@ -1775,9 +1762,23 @@ impl<'s> Parser<'s> {
fn save(&mut self) {
let text = self.current_text();
+ let subtree = self.lexer.take_subtree();
if self.at(SyntaxKind::Error) {
let error = self.lexer.take_error().unwrap();
self.nodes.push(SyntaxNode::error(error, text));
+ } else if !subtree.is_empty() {
+ let mut text_cursor = self.current_start;
+ let mut children = Vec::with_capacity(subtree.len());
+
+ for (kind, end) in subtree {
+ // Ensure no errors in the subtree
+ assert!(!kind.is_error());
+
+ children.push(SyntaxNode::leaf(kind, &self.text[text_cursor..end]));
+ text_cursor = end;
+ }
+
+ self.nodes.push(SyntaxNode::inner(self.current, children));
} else {
self.nodes.push(SyntaxNode::leaf(self.current, text));
}
diff --git a/crates/typst-syntax/src/set.rs b/crates/typst-syntax/src/set.rs
index b5b19b46..3f5a2606 100644
--- a/crates/typst-syntax/src/set.rs
+++ b/crates/typst-syntax/src/set.rs
@@ -54,7 +54,7 @@ pub const MARKUP_EXPR: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::Parbreak)
.add(SyntaxKind::LineComment)
.add(SyntaxKind::BlockComment)
- .add(SyntaxKind::DecoratorMarker)
+ .add(SyntaxKind::Decorator)
.add(SyntaxKind::Text)
.add(SyntaxKind::Linebreak)
.add(SyntaxKind::Escape)