summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2023-01-15 12:00:13 +0100
committerLaurenz <laurmaedje@gmail.com>2023-01-15 12:40:27 +0100
commit40561e57fbbc68becac07acd54a34f94f591f277 (patch)
tree9e3401f987f1b19ef30162ac00395b7bbba871c6 /src
parent15f0434d1fdd03bc84cacaf6a39ac294a0c75789 (diff)
Remove most fields from `SyntaxKind` enum
Diffstat (limited to 'src')
-rw-r--r--src/diag.rs2
-rw-r--r--src/ide/complete.rs30
-rw-r--r--src/ide/highlight.rs47
-rw-r--r--src/ide/tooltip.rs16
-rw-r--r--src/model/eval.rs32
-rw-r--r--src/model/library.rs4
-rw-r--r--src/syntax/ast.rs441
-rw-r--r--src/syntax/kind.rs281
-rw-r--r--src/syntax/lexer.rs494
-rw-r--r--src/syntax/linked.rs0
-rw-r--r--src/syntax/mod.rs9
-rw-r--r--src/syntax/node.rs316
-rw-r--r--src/syntax/parser.rs1311
-rw-r--r--src/syntax/parsing.rs1118
-rw-r--r--src/syntax/reparse.rs (renamed from src/syntax/incremental.rs)16
-rw-r--r--src/syntax/resolve.rs233
-rw-r--r--src/syntax/source.rs13
17 files changed, 2045 insertions, 2318 deletions
diff --git a/src/diag.rs b/src/diag.rs
index e0015fcc..5c5d9de9 100644
--- a/src/diag.rs
+++ b/src/diag.rs
@@ -50,7 +50,7 @@ pub type SourceResult<T> = Result<T, Box<Vec<SourceError>>>;
/// An error in a source file.
///
-/// This contained spans will only be detached if any of the input source files
+/// The contained spans will only be detached if any of the input source files
/// were detached.
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct SourceError {
diff --git a/src/ide/complete.rs b/src/ide/complete.rs
index bbc4115a..9e13fc8d 100644
--- a/src/ide/complete.rs
+++ b/src/ide/complete.rs
@@ -138,7 +138,7 @@ fn complete_params(ctx: &mut CompletionContext) -> bool {
(SyntaxKind::Colon, _) => prev.prev_leaf(),
_ => None,
};
- if let SyntaxKind::Ident(param) = before_colon.kind();
+ if let Some(param) = before_colon.cast::<ast::Ident>();
then {
ctx.from = match ctx.leaf.kind() {
SyntaxKind::Colon | SyntaxKind::Space { .. } => ctx.cursor,
@@ -160,11 +160,11 @@ fn complete_params(ctx: &mut CompletionContext) -> bool {
deciding.kind(),
SyntaxKind::LeftParen
| SyntaxKind::Comma
- | SyntaxKind::Ident(_)
+ | SyntaxKind::Ident
);
then {
ctx.from = match deciding.kind() {
- SyntaxKind::Ident(_) => deciding.offset(),
+ SyntaxKind::Ident => deciding.offset(),
_ => ctx.cursor,
};
@@ -192,9 +192,9 @@ fn complete_symbols(ctx: &mut CompletionContext) -> bool {
// Behind half-completed symbol: "$arrow:|$".
if_chain! {
- if matches!(ctx.leaf.kind(), SyntaxKind::Atom(s) if s == ":");
+ if matches!(ctx.leaf.kind(), SyntaxKind::Atom if ctx.leaf.text() == ":");
if let Some(prev) = ctx.leaf.prev_leaf();
- if matches!(prev.kind(), SyntaxKind::Ident(_));
+ if matches!(prev.kind(), SyntaxKind::Ident);
then {
ctx.from = prev.offset();
ctx.symbol_completions(false);
@@ -205,7 +205,7 @@ fn complete_symbols(ctx: &mut CompletionContext) -> bool {
// Start of a symbol: ":|".
// Checking for a text node ensures that "\:" isn't completed.
if ctx.before.ends_with(':')
- && matches!(ctx.leaf.kind(), SyntaxKind::Text(_) | SyntaxKind::Atom(_))
+ && matches!(ctx.leaf.kind(), SyntaxKind::Text | SyntaxKind::Atom)
{
ctx.from = ctx.cursor;
ctx.symbol_completions(needs_colon);
@@ -213,7 +213,7 @@ fn complete_symbols(ctx: &mut CompletionContext) -> bool {
}
// An existing symbol: ":arrow:".
- if matches!(ctx.leaf.kind(), SyntaxKind::Symbol(_)) {
+ if matches!(ctx.leaf.kind(), SyntaxKind::Symbol) {
// We want to complete behind the colon, therefore plus 1.
let has_colon = ctx.after.starts_with(':');
ctx.from = ctx.leaf.offset() + (has_colon as usize);
@@ -225,12 +225,12 @@ fn complete_symbols(ctx: &mut CompletionContext) -> bool {
if_chain! {
if matches!(
ctx.leaf.kind(),
- SyntaxKind::Text(_) | SyntaxKind::Atom(_) | SyntaxKind::Ident(_)
+ SyntaxKind::Text | SyntaxKind::Atom | SyntaxKind::Ident
);
if let Some(prev) = ctx.leaf.prev_leaf();
- if matches!(prev.kind(), SyntaxKind::Symbol(_)) || matches!(
+ if matches!(prev.kind(), SyntaxKind::Symbol) || matches!(
prev.kind(),
- SyntaxKind::Text(s) | SyntaxKind::Atom(s) if s == ":"
+ SyntaxKind::Text | SyntaxKind::Atom if prev.text() == ":"
);
then {
// We want to complete behind the colon, therefore plus 1.
@@ -252,14 +252,14 @@ fn complete_markup(ctx: &mut CompletionContext) -> bool {
// Start of an interpolated identifier: "#|".
// Checking for a text node ensures that "\#" isn't completed.
- if ctx.before.ends_with('#') && matches!(ctx.leaf.kind(), SyntaxKind::Text(_)) {
+ if ctx.before.ends_with('#') && matches!(ctx.leaf.kind(), SyntaxKind::Text) {
ctx.from = ctx.cursor;
ctx.expr_completions(true);
return true;
}
// An existing identifier: "#pa|".
- if matches!(ctx.leaf.kind(), SyntaxKind::Ident(_)) {
+ if matches!(ctx.leaf.kind(), SyntaxKind::Ident) {
// We want to complete behind the hashtag, therefore plus 1.
ctx.from = ctx.leaf.offset() + 1;
ctx.expr_completions(true);
@@ -298,14 +298,14 @@ fn complete_math(ctx: &mut CompletionContext) -> bool {
}
// Start of an interpolated identifier: "#|".
- if matches!(ctx.leaf.kind(), SyntaxKind::Atom(s) if s == "#") {
+ if matches!(ctx.leaf.kind(), SyntaxKind::Atom if ctx.leaf.text() == "#") {
ctx.from = ctx.cursor;
ctx.expr_completions(true);
return true;
}
// Behind existing atom or identifier: "$a|$" or "$abc|$".
- if matches!(ctx.leaf.kind(), SyntaxKind::Atom(_) | SyntaxKind::Ident(_)) {
+ if matches!(ctx.leaf.kind(), SyntaxKind::Atom | SyntaxKind::Ident) {
ctx.from = ctx.leaf.offset();
ctx.math_completions();
return true;
@@ -331,7 +331,7 @@ fn complete_code(ctx: &mut CompletionContext) -> bool {
}
// An existing identifier: "{ pa| }".
- if matches!(ctx.leaf.kind(), SyntaxKind::Ident(_)) {
+ if matches!(ctx.leaf.kind(), SyntaxKind::Ident) {
ctx.from = ctx.leaf.offset();
ctx.expr_completions(false);
return true;
diff --git a/src/ide/highlight.rs b/src/ide/highlight.rs
index 321bf9a6..cc502537 100644
--- a/src/ide/highlight.rs
+++ b/src/ide/highlight.rs
@@ -119,7 +119,6 @@ pub fn highlight(node: &LinkedNode) -> Option<Category> {
_ => Category::Operator,
}),
SyntaxKind::Hat => Some(Category::MathOperator),
- SyntaxKind::Amp => Some(Category::MathOperator),
SyntaxKind::Dot => Some(Category::Punctuation),
SyntaxKind::Eq => match node.parent_kind() {
Some(SyntaxKind::Heading) => None,
@@ -159,38 +158,38 @@ pub fn highlight(node: &LinkedNode) -> Option<Category> {
SyntaxKind::As => Some(Category::Keyword),
SyntaxKind::Markup { .. }
- if node.parent_kind() == Some(&SyntaxKind::TermItem)
+ if node.parent_kind() == Some(SyntaxKind::TermItem)
&& node.next_sibling().as_ref().map(|v| v.kind())
- == Some(&SyntaxKind::Colon) =>
+ == Some(SyntaxKind::Colon) =>
{
Some(Category::ListTerm)
}
SyntaxKind::Markup { .. } => None,
- SyntaxKind::Text(_) => None,
+ SyntaxKind::Text => None,
SyntaxKind::Linebreak => Some(Category::Escape),
- SyntaxKind::Escape(_) => Some(Category::Escape),
- SyntaxKind::Shorthand(_) => Some(Category::Escape),
- SyntaxKind::Symbol(_) => Some(Category::Escape),
+ SyntaxKind::Escape => Some(Category::Escape),
+ SyntaxKind::Shorthand => Some(Category::Escape),
+ SyntaxKind::Symbol => Some(Category::Escape),
SyntaxKind::SmartQuote { .. } => None,
SyntaxKind::Strong => Some(Category::Strong),
SyntaxKind::Emph => Some(Category::Emph),
- SyntaxKind::Raw(_) => Some(Category::Raw),
- SyntaxKind::Link(_) => Some(Category::Link),
- SyntaxKind::Label(_) => Some(Category::Label),
- SyntaxKind::Ref(_) => Some(Category::Ref),
+ SyntaxKind::Raw { .. } => Some(Category::Raw),
+ SyntaxKind::Link => Some(Category::Link),
+ SyntaxKind::Label => Some(Category::Label),
+ SyntaxKind::Ref => Some(Category::Ref),
SyntaxKind::Heading => Some(Category::Heading),
SyntaxKind::ListItem => None,
SyntaxKind::EnumItem => None,
- SyntaxKind::EnumNumbering(_) => Some(Category::ListMarker),
+ SyntaxKind::EnumNumbering => Some(Category::ListMarker),
SyntaxKind::TermItem => None,
SyntaxKind::Math => None,
- SyntaxKind::Atom(_) => None,
+ SyntaxKind::Atom => None,
SyntaxKind::Script => None,
SyntaxKind::Frac => None,
- SyntaxKind::AlignPoint => None,
+ SyntaxKind::AlignPoint => Some(Category::MathOperator),
- SyntaxKind::Ident(_) => match node.parent_kind() {
+ SyntaxKind::Ident => match node.parent_kind() {
Some(
SyntaxKind::Markup { .. }
| SyntaxKind::Math
@@ -202,9 +201,9 @@ pub fn highlight(node: &LinkedNode) -> Option<Category> {
if node
.parent()
.and_then(|p| p.parent())
- .filter(|gp| gp.kind() == &SyntaxKind::Parenthesized)
+ .filter(|gp| gp.kind() == SyntaxKind::Parenthesized)
.and_then(|gp| gp.parent())
- .map_or(false, |ggp| ggp.kind() == &SyntaxKind::FuncCall)
+ .map_or(false, |ggp| ggp.kind() == SyntaxKind::FuncCall)
&& node.next_sibling().is_none() =>
{
Some(Category::Function)
@@ -218,17 +217,17 @@ pub fn highlight(node: &LinkedNode) -> Option<Category> {
Some(SyntaxKind::SetRule) => Some(Category::Function),
Some(SyntaxKind::ShowRule)
if node.prev_sibling().as_ref().map(|v| v.kind())
- == Some(&SyntaxKind::Show) =>
+ == Some(SyntaxKind::Show) =>
{
Some(Category::Function)
}
_ => None,
},
- SyntaxKind::Bool(_) => Some(Category::Keyword),
- SyntaxKind::Int(_) => Some(Category::Number),
- SyntaxKind::Float(_) => Some(Category::Number),
- SyntaxKind::Numeric(_, _) => Some(Category::Number),
- SyntaxKind::Str(_) => Some(Category::String),
+ SyntaxKind::Bool => Some(Category::Keyword),
+ SyntaxKind::Int => Some(Category::Number),
+ SyntaxKind::Float => Some(Category::Number),
+ SyntaxKind::Numeric => Some(Category::Number),
+ SyntaxKind::Str => Some(Category::String),
SyntaxKind::CodeBlock => None,
SyntaxKind::ContentBlock => None,
SyntaxKind::Parenthesized => None,
@@ -259,7 +258,7 @@ pub fn highlight(node: &LinkedNode) -> Option<Category> {
SyntaxKind::LoopContinue => None,
SyntaxKind::FuncReturn => None,
- SyntaxKind::Error(_, _) => Some(Category::Error),
+ SyntaxKind::Error => Some(Category::Error),
}
}
diff --git a/src/ide/tooltip.rs b/src/ide/tooltip.rs
index 62cb11c1..8c734bbb 100644
--- a/src/ide/tooltip.rs
+++ b/src/ide/tooltip.rs
@@ -18,12 +18,12 @@ pub fn tooltip(world: &dyn World, source: &Source, cursor: usize) -> Option<Stri
/// Tooltip for a function or set rule name.
fn function_tooltip(world: &dyn World, leaf: &LinkedNode) -> Option<String> {
if_chain! {
- if let SyntaxKind::Ident(ident) = leaf.kind();
+ if let Some(ident) = leaf.cast::<ast::Ident>();
if matches!(
leaf.parent_kind(),
Some(SyntaxKind::FuncCall | SyntaxKind::SetRule),
);
- if let Some(Value::Func(func)) = world.library().scope.get(ident);
+ if let Some(Value::Func(func)) = world.library().scope.get(&ident);
if let Some(info) = func.info();
then {
return Some(plain_docs_sentence(&info.docs));
@@ -60,8 +60,8 @@ fn named_param_tooltip(world: &dyn World, leaf: &LinkedNode) -> Option<String> {
// Hovering over the parameter name.
if_chain! {
if leaf.index() == 0;
- if let SyntaxKind::Ident(ident) = leaf.kind();
- if let Some(param) = info.param(ident);
+ if let Some(ident) = leaf.cast::<ast::Ident>();
+ if let Some(param) = info.param(&ident);
then {
return Some(plain_docs_sentence(param.docs));
}
@@ -69,9 +69,9 @@ fn named_param_tooltip(world: &dyn World, leaf: &LinkedNode) -> Option<String> {
// Hovering over a string parameter value.
if_chain! {
- if let SyntaxKind::Str(string) = leaf.kind();
+ if let Some(string) = leaf.cast::<ast::Str>();
if let Some(param) = info.param(&named.name());
- if let Some(docs) = find_string_doc(&param.cast, string);
+ if let Some(docs) = find_string_doc(&param.cast, &string.get());
then {
return Some(docs.into());
}
@@ -95,8 +95,8 @@ fn find_string_doc(info: &CastInfo, string: &str) -> Option<&'static str> {
fn font_family_tooltip(world: &dyn World, leaf: &LinkedNode) -> Option<String> {
if_chain! {
// Ensure that we are on top of a string.
- if let SyntaxKind::Str(string) = leaf.kind();
- let lower = string.to_lowercase();
+ if let Some(string) = leaf.cast::<ast::Str>();
+ let lower = string.get().to_lowercase();
// Ensure that we are in the arguments to the text function.
if let Some(parent) = leaf.parent();
diff --git a/src/model/eval.rs b/src/model/eval.rs
index 789df0c7..8e8c93c5 100644
--- a/src/model/eval.rs
+++ b/src/model/eval.rs
@@ -16,8 +16,8 @@ use crate::diag::{
};
use crate::geom::{Abs, Angle, Em, Fr, Ratio};
use crate::syntax::ast::AstNode;
-use crate::syntax::{ast, Source, SourceId, Span, Spanned, SyntaxKind, SyntaxNode, Unit};
-use crate::util::PathExt;
+use crate::syntax::{ast, Source, SourceId, Span, Spanned, SyntaxKind, SyntaxNode};
+use crate::util::{EcoString, PathExt};
use crate::World;
const MAX_ITERATIONS: usize = 10_000;
@@ -389,13 +389,13 @@ impl Eval for ast::Symbol {
type Output = Content;
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {
- Ok((vm.items.symbol)(self.get().clone()))
+ Ok((vm.items.symbol)(self.get().into()))
}
}
impl ast::Symbol {
fn eval_in_math(&self, vm: &mut Vm) -> SourceResult<Content> {
- Ok((vm.items.symbol)(self.get().clone() + ":op".into()))
+ Ok((vm.items.symbol)(EcoString::from(self.get()) + ":op".into()))
}
}
@@ -427,8 +427,8 @@ impl Eval for ast::Raw {
type Output = Content;
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {
- let text = self.text().clone();
- let lang = self.lang().cloned();
+ let text = self.text();
+ let lang = self.lang().map(Into::into);
let block = self.block();
Ok((vm.items.raw)(text, lang, block))
}
@@ -446,7 +446,7 @@ impl Eval for ast::Label {
type Output = Value;
fn eval(&self, _: &mut Vm) -> SourceResult<Self::Output> {
- Ok(Value::Label(Label(self.get().clone())))
+ Ok(Value::Label(Label(self.get().into())))
}
}
@@ -454,7 +454,7 @@ impl Eval for ast::Ref {
type Output = Content;
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {
- Ok((vm.items.ref_)(self.get().clone()))
+ Ok((vm.items.ref_)(self.get().into()))
}
}
@@ -542,7 +542,7 @@ impl Eval for ast::AlignPoint {
type Output = Content;
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {
- Ok((vm.items.math_align_point)(self.count()))
+ Ok((vm.items.math_align_point)())
}
}
@@ -563,7 +563,7 @@ impl ast::Ident {
if self.as_untyped().len() == self.len()
&& matches!(vm.scopes.get(&self), Ok(Value::Func(_)) | Err(_))
{
- Ok((vm.items.symbol)(self.get().clone() + ":op".into()))
+ Ok((vm.items.symbol)(EcoString::from(self.get()) + ":op".into()))
} else {
Ok(self.eval(vm)?.display_in_math())
}
@@ -616,11 +616,11 @@ impl Eval for ast::Numeric {
fn eval(&self, _: &mut Vm) -> SourceResult<Self::Output> {
let (v, unit) = self.get();
Ok(match unit {
- Unit::Length(unit) => Abs::with_unit(v, unit).into(),
- Unit::Angle(unit) => Angle::with_unit(v, unit).into(),
- Unit::Em => Em::new(v).into(),
- Unit::Fr => Fr::new(v).into(),
- Unit::Percent => Ratio::new(v / 100.0).into(),
+ ast::Unit::Length(unit) => Abs::with_unit(v, unit).into(),
+ ast::Unit::Angle(unit) => Angle::with_unit(v, unit).into(),
+ ast::Unit::Em => Em::new(v).into(),
+ ast::Unit::Fr => Fr::new(v).into(),
+ ast::Unit::Percent => Ratio::new(v / 100.0).into(),
})
}
}
@@ -743,7 +743,7 @@ impl Eval for ast::Dict {
map.insert(named.name().take().into(), named.expr().eval(vm)?);
}
ast::DictItem::Keyed(keyed) => {
- map.insert(keyed.key().into(), keyed.expr().eval(vm)?);
+ map.insert(keyed.key().get().into(), keyed.expr().eval(vm)?);
}
ast::DictItem::Spread(expr) => match expr.eval(vm)? {
Value::None => {}
diff --git a/src/model/library.rs b/src/model/library.rs
index 5360b00a..96218bb1 100644
--- a/src/model/library.rs
+++ b/src/model/library.rs
@@ -74,8 +74,8 @@ pub struct LangItems {
fn(base: Content, sub: Option<Content>, sup: Option<Content>) -> Content,
/// A fraction in a formula: `x/2`.
pub math_frac: fn(num: Content, denom: Content) -> Content,
- /// An alignment point in a formula: `&`, `&&`.
- pub math_align_point: fn(count: NonZeroUsize) -> Content,
+ /// An alignment point in a formula: `&`.
+ pub math_align_point: fn() -> Content,
}
impl Debug for LangItems {
diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs
index 3b3186e4..bf4b37bc 100644
--- a/src/syntax/ast.rs
+++ b/src/syntax/ast.rs
@@ -5,7 +5,12 @@
use std::num::NonZeroUsize;
use std::ops::Deref;
-use super::{RawFields, Span, SyntaxKind, SyntaxNode, Unit};
+use unscanny::Scanner;
+
+use super::{
+ is_id_continue, is_id_start, is_newline, split_newlines, Span, SyntaxKind, SyntaxNode,
+};
+use crate::geom::{AbsUnit, AngleUnit};
use crate::util::EcoString;
/// A typed AST node.
@@ -117,7 +122,7 @@ pub enum Expr {
Script(Script),
/// A fraction in a math formula: `x/2`.
Frac(Frac),
- /// An alignment point in a math formula: `&`, `&&`.
+ /// An alignment point in a math formula: `&`.
AlignPoint(AlignPoint),
/// An identifier: `left`.
Ident(Ident),
@@ -194,34 +199,34 @@ impl AstNode for Expr {
fn from_untyped(node: &SyntaxNode) -> Option<Self> {
match node.kind() {
SyntaxKind::Linebreak => node.cast().map(Self::Linebreak),
- SyntaxKind::Text(_) => node.cast().map(Self::Text),
- SyntaxKind::Escape(_) => node.cast().map(Self::Escape),
- SyntaxKind::Shorthand(_) => node.cast().map(Self::Shorthand),
- SyntaxKind::Symbol(_) => node.cast().map(Self::Symbol),
+ SyntaxKind::Text => node.cast().map(Self::Text),
+ SyntaxKind::Escape => node.cast().map(Self::Escape),
+ SyntaxKind::Shorthand => node.cast().map(Self::Shorthand),
+ SyntaxKind::Symbol => node.cast().map(Self::Symbol),
SyntaxKind::SmartQuote { .. } => node.cast().map(Self::SmartQuote),
SyntaxKind::Strong => node.cast().map(Self::Strong),
SyntaxKind::Emph => node.cast().map(Self::Emph),
- SyntaxKind::Raw(_) => node.cast().map(Self::Raw),
- SyntaxKind::Link(_) => node.cast().map(Self::Link),
- SyntaxKind::Label(_) => node.cast().map(Self::Label),
- SyntaxKind::Ref(_) => node.cast().map(Self::Ref),
+ SyntaxKind::Raw { .. } => node.cast().map(Self::Raw),
+ SyntaxKind::Link => node.cast().map(Self::Link),
+ SyntaxKind::Label => node.cast().map(Self::Label),
+ SyntaxKind::Ref => node.cast().map(Self::Ref),
SyntaxKind::Heading => node.cast().map(Self::Heading),
SyntaxKind::ListItem => node.cast().map(Self::List),
SyntaxKind::EnumItem => node.cast().map(Self::Enum),
SyntaxKind::TermItem => node.cast().map(Self::Term),
SyntaxKind::Math => node.cast().map(Self::Math),
- SyntaxKind::Atom(_) => node.cast().map(Self::Atom),
+ SyntaxKind::Atom => node.cast().map(Self::Atom),
SyntaxKind::Script => node.cast().map(Self::Script),
SyntaxKind::Frac => node.cast().map(Self::Frac),
SyntaxKind::AlignPoint => node.cast().map(Self::AlignPoint),
- SyntaxKind::Ident(_) => node.cast().map(Self::Ident),
+ SyntaxKind::Ident => node.cast().map(Self::Ident),
SyntaxKind::None => node.cast().map(Self::None),
SyntaxKind::Auto => node.cast().map(Self::Auto),
- SyntaxKind::Bool(_) => node.cast().map(Self::Bool),
- SyntaxKind::Int(_) => node.cast().map(Self::Int),
- SyntaxKind::Float(_) => node.cast().map(Self::Float),
- SyntaxKind::Numeric(_, _) => node.cast().map(Self::Numeric),
- SyntaxKind::Str(_) => node.cast().map(Self::Str),
+ SyntaxKind::Bool => node.cast().map(Self::Bool),
+ SyntaxKind::Int => node.cast().map(Self::Int),
+ SyntaxKind::Float => node.cast().map(Self::Float),
+ SyntaxKind::Numeric => node.cast().map(Self::Numeric),
+ SyntaxKind::Str => node.cast().map(Self::Str),
SyntaxKind::CodeBlock => node.cast().map(Self::Code),
SyntaxKind::ContentBlock => node.cast().map(Self::Content),
SyntaxKind::Parenthesized => node.cast().map(Self::Parenthesized),
@@ -315,7 +320,7 @@ impl Space {
/// Get the number of newlines.
pub fn newlines(&self) -> usize {
match self.0.kind() {
- &SyntaxKind::Space { newlines } => newlines,
+ SyntaxKind::Space { newlines } => newlines,
_ => panic!("space is of wrong kind"),
}
}
@@ -334,10 +339,7 @@ node! {
impl Text {
/// Get the text.
pub fn get(&self) -> &EcoString {
- match self.0.kind() {
- SyntaxKind::Text(v) => v,
- _ => panic!("text is of wrong kind"),
- }
+ self.0.text()
}
}
@@ -349,15 +351,22 @@ node! {
impl Escape {
/// Get the escaped character.
pub fn get(&self) -> char {
- match self.0.kind() {
- &SyntaxKind::Escape(v) => v,
- _ => panic!("escape is of wrong kind"),
+ let mut s = Scanner::new(self.0.text());
+ s.expect('\\');
+ if s.eat_if("u{") {
+ let hex = s.eat_while(char::is_ascii_hexdigit);
+ u32::from_str_radix(hex, 16)
+ .ok()
+ .and_then(std::char::from_u32)
+ .expect("unicode escape is invalid")
+ } else {
+ s.eat().expect("escape is missing escaped character")
}
}
}
node! {
- /// A shorthand for a unicode codepoint. For example, `~` for non-breaking
+ /// A shorthand for a unicode codepoint. For example, `~` for a non-breaking
/// space or `-?` for a soft hyphen.
Shorthand
}
@@ -365,9 +374,26 @@ node! {
impl Shorthand {
/// Get the shorthanded character.
pub fn get(&self) -> char {
- match self.0.kind() {
- &SyntaxKind::Shorthand(v) => v,
- _ => panic!("shorthand is of wrong kind"),
+ match self.0.text().as_str() {
+ "~" => '\u{00A0}',
+ "..." => '\u{2026}',
+ "--" => '\u{2013}',
+ "---" => '\u{2014}',
+ "-?" => '\u{00AD}',
+ "!=" => '≠',
+ "<=" => '≤',
+ ">=" => '≥',
+ "<-" => '←',
+ "->" => '→',
+ "=>" => '⇒',
+ ":=" => '≔',
+ "[|" => '⟦',
+ "|]" => '⟧',
+ "||" => '‖',
+ "|->" => '↦',
+ "<->" => '↔',
+ "<=>" => '⇔',
+ _ => panic!("shorthand is invalid"),
}
}
}
@@ -379,11 +405,8 @@ node! {
impl Symbol {
/// Get the symbol's notation.
- pub fn get(&self) -> &EcoString {
- match self.0.kind() {
- SyntaxKind::Symbol(v) => v,
- _ => panic!("symbol is of wrong kind"),
- }
+ pub fn get(&self) -> &str {
+ self.0.text().trim_matches(':')
}
}
@@ -395,10 +418,7 @@ node! {
impl SmartQuote {
/// Whether this is a double quote.
pub fn double(&self) -> bool {
- match self.0.kind() {
- &SyntaxKind::SmartQuote { double } => double,
- _ => panic!("smart quote is of wrong kind"),
- }
+ self.0.text() == "\""
}
}
@@ -410,7 +430,7 @@ node! {
impl Strong {
/// The contents of the strong node.
pub fn body(&self) -> Markup {
- self.0.cast_first_child().expect("strong node is missing markup body")
+ self.0.cast_first_match().expect("strong emphasis is missing body")
}
}
@@ -422,9 +442,7 @@ node! {
impl Emph {
/// The contents of the emphasis node.
pub fn body(&self) -> Markup {
- self.0
- .cast_first_child()
- .expect("emphasis node is missing markup body")
+ self.0.cast_first_match().expect("emphasis is missing body")
}
}
@@ -434,27 +452,75 @@ node! {
}
impl Raw {
- /// The raw text.
- pub fn text(&self) -> &EcoString {
- &self.get().text
+ /// The trimmed raw text.
+ pub fn text(&self) -> EcoString {
+ let SyntaxKind::Raw { column } = self.0.kind() else {
+ panic!("raw node is of wrong kind");
+ };
+
+ let mut text = self.0.text().as_str();
+ let blocky = text.starts_with("```");
+ text = text.trim_matches('`');
+
+ // Trim tag, one space at the start, and one space at the end if the
+ // last non-whitespace char is a backtick.
+ if blocky {
+ let mut s = Scanner::new(text);
+ if s.eat_if(is_id_start) {
+ s.eat_while(is_id_continue);
+ }
+ text = s.after();
+ text = text.strip_prefix(' ').unwrap_or(text);
+ if text.trim_end().ends_with('`') {
+ text = text.strip_suffix(' ').unwrap_or(text);
+ }
+ }
+
+ // Split into lines.
+ let mut lines = split_newlines(text);
+
+ if blocky {
+ // Dedent based on column, but not for the first line.
+ for line in lines.iter_mut().skip(1) {
+ let offset = line
+ .chars()
+ .take(column)
+ .take_while(|c| c.is_whitespace())
+ .map(char::len_utf8)
+ .sum();
+ *line = &line[offset..];
+ }
+
+ let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace);
+
+ // Trims a sequence of whitespace followed by a newline at the start.
+ if lines.first().map_or(false, is_whitespace) {
+ lines.remove(0);
+ }
+
+ // Trims a newline followed by a sequence of whitespace at the end.
+ if lines.last().map_or(false, is_whitespace) {
+ lines.pop();
+ }
+ }
+
+ lines.join("\n").into()
}
/// An optional identifier specifying the language to syntax-highlight in.
- pub fn lang(&self) -> Option<&EcoString> {
- self.get().lang.as_ref()
+ pub fn lang(&self) -> Option<&str> {
+ let inner = self.0.text().trim_start_matches('`');
+ let mut s = Scanner::new(inner);
+ s.eat_if(is_id_start).then(|| {
+ s.eat_while(is_id_continue);
+ s.before()
+ })
}
/// Whether the raw text should be displayed in a separate block.
pub fn block(&self) -> bool {
- self.get().block
- }
-
- /// The raw fields.
- fn get(&self) -> &RawFields {
- match self.0.kind() {
- SyntaxKind::Raw(v) => v.as_ref(),
- _ => panic!("raw is of wrong kind"),
- }
+ let text = self.0.text();
+ text.starts_with("```") && text.chars().any(is_newline)
}
}
@@ -466,10 +532,7 @@ node! {
impl Link {
/// Get the URL.
pub fn url(&self) -> &EcoString {
- match self.0.kind() {
- SyntaxKind::Link(url) => url,
- _ => panic!("link is of wrong kind"),
- }
+ self.0.text()
}
}
@@ -480,11 +543,8 @@ node! {
impl Label {
/// Get the label's text.
- pub fn get(&self) -> &EcoString {
- match self.0.kind() {
- SyntaxKind::Label(v) => v,
- _ => panic!("label is of wrong kind"),
- }
+ pub fn get(&self) -> &str {
+ self.0.text().trim_start_matches('<').trim_end_matches('>')
}
}
@@ -495,11 +555,8 @@ node! {
impl Ref {
/// Get the target.
- pub fn get(&self) -> &EcoString {
- match self.0.kind() {
- SyntaxKind::Ref(v) => v,
- _ => panic!("reference is of wrong kind"),
- }
+ pub fn get(&self) -> &str {
+ self.0.text().trim_start_matches('@')
}
}
@@ -511,14 +568,14 @@ node! {
impl Heading {
/// The contents of the heading.
pub fn body(&self) -> Markup {
- self.0.cast_first_child().expect("heading is missing markup body")
+ self.0.cast_first_match().expect("heading is missing markup body")
}
/// The section depth (numer of equals signs).
pub fn level(&self) -> NonZeroUsize {
self.0
.children()
- .filter(|n| n.kind() == &SyntaxKind::Eq)
+ .filter(|n| n.kind() == SyntaxKind::Eq)
.count()
.try_into()
.expect("heading is missing equals sign")
@@ -533,7 +590,7 @@ node! {
impl ListItem {
/// The contents of the list item.
pub fn body(&self) -> Markup {
- self.0.cast_first_child().expect("list item is missing body")
+ self.0.cast_first_match().expect("list item is missing body")
}
}
@@ -546,14 +603,14 @@ impl EnumItem {
/// The explicit numbering, if any: `23.`.
pub fn number(&self) -> Option<NonZeroUsize> {
self.0.children().find_map(|node| match node.kind() {
- SyntaxKind::EnumNumbering(num) => Some(*num),
+ SyntaxKind::EnumNumbering => node.text().trim_end_matches('.').parse().ok(),
_ => Option::None,
})
}
/// The contents of the list item.
pub fn body(&self) -> Markup {
- self.0.cast_first_child().expect("enum item is missing body")
+ self.0.cast_first_match().expect("enum item is missing body")
}
}
@@ -565,13 +622,13 @@ node! {
impl TermItem {
/// The term described by the item.
pub fn term(&self) -> Markup {
- self.0.cast_first_child().expect("term list item is missing term")
+ self.0.cast_first_match().expect("term list item is missing term")
}
/// The description of the term.
pub fn description(&self) -> Markup {
self.0
- .cast_last_child()
+ .cast_last_match()
.expect("term list item is missing description")
}
}
@@ -602,10 +659,7 @@ node! {
impl Atom {
/// Get the atom's text.
pub fn get(&self) -> &EcoString {
- match self.0.kind() {
- SyntaxKind::Atom(v) => v,
- _ => panic!("atom is of wrong kind"),
- }
+ self.0.text()
}
}
@@ -617,7 +671,7 @@ node! {
impl Script {
/// The base of the script.
pub fn base(&self) -> Expr {
- self.0.cast_first_child().expect("script node is missing base")
+ self.0.cast_first_match().expect("script node is missing base")
}
/// The subscript.
@@ -647,32 +701,20 @@ node! {
impl Frac {
/// The numerator.
pub fn num(&self) -> Expr {
- self.0.cast_first_child().expect("fraction is missing numerator")
+ self.0.cast_first_match().expect("fraction is missing numerator")
}
/// The denominator.
pub fn denom(&self) -> Expr {
- self.0.cast_last_child().expect("fraction is missing denominator")
+ self.0.cast_last_match().expect("fraction is missing denominator")
}
}
node! {
- /// An alignment point in a formula: `&`, `&&`.
+ /// An alignment point in a formula: `&`.
AlignPoint
}
-impl AlignPoint {
- /// The number of ampersands.
- pub fn count(&self) -> NonZeroUsize {
- self.0
- .children()
- .filter(|n| n.kind() == &SyntaxKind::Amp)
- .count()
- .try_into()
- .expect("alignment point is missing ampersand sign")
- }
-}
-
node! {
/// An identifier: `it`.
Ident
@@ -680,18 +722,16 @@ node! {
impl Ident {
/// Get the identifier.
- pub fn get(&self) -> &EcoString {
- match self.0.kind() {
- SyntaxKind::Ident(id) => id,
- _ => panic!("identifier is of wrong kind"),
- }
+ pub fn get(&self) -> &str {
+ self.0.text().trim_start_matches('#')
}
/// Take out the container identifier.
pub fn take(self) -> EcoString {
- match self.0.take() {
- SyntaxKind::Ident(id) => id,
- _ => panic!("identifier is of wrong kind"),
+ let text = self.0.into_text();
+ match text.strip_prefix('#') {
+ Some(text) => text.into(),
+ Option::None => text,
}
}
@@ -727,10 +767,7 @@ node! {
impl Bool {
/// Get the value.
pub fn get(&self) -> bool {
- match self.0.kind() {
- SyntaxKind::Bool(v) => *v,
- _ => panic!("boolean is of wrong kind"),
- }
+ self.0.text() == "true"
}
}
@@ -742,10 +779,7 @@ node! {
impl Int {
/// Get the value.
pub fn get(&self) -> i64 {
- match self.0.kind() {
- SyntaxKind::Int(v) => *v,
- _ => panic!("integer is of wrong kind"),
- }
+ self.0.text().parse().expect("integer is invalid")
}
}
@@ -757,10 +791,7 @@ node! {
impl Float {
/// Get the value.
pub fn get(&self) -> f64 {
- match self.0.kind() {
- SyntaxKind::Float(v) => *v,
- _ => panic!("float is of wrong kind"),
- }
+ self.0.text().parse().expect("float is invalid")
}
}
@@ -772,13 +803,47 @@ node! {
impl Numeric {
/// Get the value and unit.
pub fn get(&self) -> (f64, Unit) {
- match self.0.kind() {
- SyntaxKind::Numeric(v, unit) => (*v, *unit),
- _ => panic!("numeric is of wrong kind"),
- }
+ let text = self.0.text();
+ let count = text
+ .chars()
+ .rev()
+ .take_while(|c| matches!(c, 'a'..='z' | '%'))
+ .count();
+
+ let split = text.len() - count;
+ let value = text[..split].parse().expect("number is invalid");
+ let unit = match &text[split..] {
+ "pt" => Unit::Length(AbsUnit::Pt),
+ "mm" => Unit::Length(AbsUnit::Mm),
+ "cm" => Unit::Length(AbsUnit::Cm),
+ "in" => Unit::Length(AbsUnit::In),
+ "deg" => Unit::Angle(AngleUnit::Deg),
+ "rad" => Unit::Angle(AngleUnit::Rad),
+ "em" => Unit::Em,
+ "fr" => Unit::Fr,
+ "%" => Unit::Percent,
+ _ => panic!("number has invalid suffix"),
+ };
+
+ (value, unit)
}
}
+/// Unit of a numeric value.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub enum Unit {
+ /// An absolute length unit.
+ Length(AbsUnit),
+ /// An angular unit.
+ Angle(AngleUnit),
+ /// Font-relative: `1em` is the same as the font size.
+ Em,
+ /// Fractions: `fr`.
+ Fr,
+ /// Percentage: `%`.
+ Percent,
+}
+
node! {
/// A quoted string: `"..."`.
Str
@@ -786,11 +851,46 @@ node! {
impl Str {
/// Get the value.
- pub fn get(&self) -> &EcoString {
- match self.0.kind() {
- SyntaxKind::Str(v) => v,
- _ => panic!("string is of wrong kind"),
+ pub fn get(&self) -> EcoString {
+ let text = self.0.text();
+ let unquoted = &text[1..text.len() - 1];
+ if !unquoted.contains('\\') {
+ return unquoted.into();
}
+
+ let mut out = EcoString::with_capacity(unquoted.len());
+ let mut s = Scanner::new(unquoted);
+
+ while let Some(c) = s.eat() {
+ if c != '\\' {
+ out.push(c);
+ continue;
+ }
+
+ let start = s.locate(-1);
+ match s.eat() {
+ Some('\\') => out.push('\\'),
+ Some('"') => out.push('"'),
+ Some('n') => out.push('\n'),
+ Some('r') => out.push('\r'),
+ Some('t') => out.push('\t'),
+ Some('u') if s.eat_if('{') => {
+ let sequence = s.eat_while(char::is_ascii_hexdigit);
+ s.eat_if('}');
+
+ match u32::from_str_radix(sequence, 16)
+ .ok()
+ .and_then(std::char::from_u32)
+ {
+ Some(c) => out.push(c),
+ Option::None => out.push_str(s.from(start)),
+ }
+ }
+ _ => out.push_str(s.from(start)),
+ }
+ }
+
+ out
}
}
@@ -814,7 +914,7 @@ node! {
impl ContentBlock {
/// The contained markup.
pub fn body(&self) -> Markup {
- self.0.cast_first_child().expect("content block is missing body")
+ self.0.cast_first_match().expect("content block is missing body")
}
}
@@ -827,7 +927,7 @@ impl Parenthesized {
/// The wrapped expression.
pub fn expr(&self) -> Expr {
self.0
- .cast_first_child()
+ .cast_first_match()
.expect("parenthesized expression is missing expression")
}
}
@@ -856,7 +956,7 @@ pub enum ArrayItem {
impl AstNode for ArrayItem {
fn from_untyped(node: &SyntaxNode) -> Option<Self> {
match node.kind() {
- SyntaxKind::Spread => node.cast_first_child().map(Self::Spread),
+ SyntaxKind::Spread => node.cast_first_match().map(Self::Spread),
_ => node.cast().map(Self::Pos),
}
}
@@ -897,7 +997,7 @@ impl AstNode for DictItem {
match node.kind() {
SyntaxKind::Named => node.cast().map(Self::Named),
SyntaxKind::Keyed => node.cast().map(Self::Keyed),
- SyntaxKind::Spread => node.cast_first_child().map(Self::Spread),
+ SyntaxKind::Spread => node.cast_first_match().map(Self::Spread),
_ => Option::None,
}
}
@@ -919,12 +1019,12 @@ node! {
impl Named {
/// The name: `thickness`.
pub fn name(&self) -> Ident {
- self.0.cast_first_child().expect("named pair is missing name")
+ self.0.cast_first_match().expect("named pair is missing name")
}
/// The right-hand side of the pair: `3pt`.
pub fn expr(&self) -> Expr {
- self.0.cast_last_child().expect("named pair is missing expression")
+ self.0.cast_last_match().expect("named pair is missing expression")
}
}
@@ -935,19 +1035,16 @@ node! {
impl Keyed {
/// The key: `"spacy key"`.
- pub fn key(&self) -> EcoString {
+ pub fn key(&self) -> Str {
self.0
.children()
- .find_map(|node| match node.kind() {
- SyntaxKind::Str(key) => Some(key.clone()),
- _ => Option::None,
- })
+ .find_map(|node| node.cast::<Str>())
.expect("keyed pair is missing key")
}
/// The right-hand side of the pair: `true`.
pub fn expr(&self) -> Expr {
- self.0.cast_last_child().expect("keyed pair is missing expression")
+ self.0.cast_last_match().expect("keyed pair is missing expression")
}
}
@@ -967,7 +1064,7 @@ impl Unary {
/// The expression to operate on: `x`.
pub fn expr(&self) -> Expr {
- self.0.cast_last_child().expect("unary operation is missing child")
+ self.0.cast_last_match().expect("unary operation is missing child")
}
}
@@ -984,7 +1081,7 @@ pub enum UnOp {
impl UnOp {
/// Try to convert the token into a unary operation.
- pub fn from_token(token: &SyntaxKind) -> Option<Self> {
+ pub fn from_token(token: SyntaxKind) -> Option<Self> {
Some(match token {
SyntaxKind::Plus => Self::Pos,
SyntaxKind::Minus => Self::Neg,
@@ -1036,14 +1133,14 @@ impl Binary {
/// The left-hand side of the operation: `a`.
pub fn lhs(&self) -> Expr {
self.0
- .cast_first_child()
+ .cast_first_match()
.expect("binary operation is missing left-hand side")
}
/// The right-hand side of the operation: `b`.
pub fn rhs(&self) -> Expr {
self.0
- .cast_last_child()
+ .cast_last_match()
.expect("binary operation is missing right-hand side")
}
}
@@ -1093,7 +1190,7 @@ pub enum BinOp {
impl BinOp {
/// Try to convert the token into a binary operation.
- pub fn from_token(token: &SyntaxKind) -> Option<Self> {
+ pub fn from_token(token: SyntaxKind) -> Option<Self> {
Some(match token {
SyntaxKind::Plus => Self::Add,
SyntaxKind::Minus => Self::Sub,
@@ -1210,12 +1307,12 @@ node! {
impl FieldAccess {
/// The expression to access the field on.
pub fn target(&self) -> Expr {
- self.0.cast_first_child().expect("field access is missing object")
+ self.0.cast_first_match().expect("field access is missing object")
}
/// The name of the field.
pub fn field(&self) -> Ident {
- self.0.cast_last_child().expect("field access is missing name")
+ self.0.cast_last_match().expect("field access is missing name")
}
}
@@ -1227,13 +1324,13 @@ node! {
impl FuncCall {
/// The function to call.
pub fn callee(&self) -> Expr {
- self.0.cast_first_child().expect("function call is missing callee")
+ self.0.cast_first_match().expect("function call is missing callee")
}
/// The arguments to the function.
pub fn args(&self) -> Args {
self.0
- .cast_last_child()
+ .cast_last_match()
.expect("function call is missing argument list")
}
}
@@ -1246,18 +1343,18 @@ node! {
impl MethodCall {
/// The expression to call the method on.
pub fn target(&self) -> Expr {
- self.0.cast_first_child().expect("method call is missing target")
+ self.0.cast_first_match().expect("method call is missing target")
}
/// The name of the method.
pub fn method(&self) -> Ident {
- self.0.cast_last_child().expect("method call is missing name")
+ self.0.cast_last_match().expect("method call is missing name")
}
/// The arguments to the method.
pub fn args(&self) -> Args {
self.0
- .cast_last_child()
+ .cast_last_match()
.expect("method call is missing argument list")
}
}
@@ -1289,7 +1386,7 @@ impl AstNode for Arg {
fn from_untyped(node: &SyntaxNode) -> Option<Self> {
match node.kind() {
SyntaxKind::Named => node.cast().map(Self::Named),
- SyntaxKind::Spread => node.cast_first_child().map(Self::Spread),
+ SyntaxKind::Spread => node.cast_first_match().map(Self::Spread),
_ => node.cast().map(Self::Pos),
}
}
@@ -1320,7 +1417,7 @@ impl Closure {
pub fn params(&self) -> impl DoubleEndedIterator<Item = Param> + '_ {
self.0
.children()
- .find(|x| x.kind() == &SyntaxKind::Params)
+ .find(|x| x.kind() == SyntaxKind::Params)
.expect("closure is missing parameter list")
.children()
.filter_map(SyntaxNode::cast)
@@ -1328,7 +1425,7 @@ impl Closure {
/// The body of the closure.
pub fn body(&self) -> Expr {
- self.0.cast_last_child().expect("closure is missing body")
+ self.0.cast_last_match().expect("closure is missing body")
}
}
@@ -1346,9 +1443,9 @@ pub enum Param {
impl AstNode for Param {
fn from_untyped(node: &SyntaxNode) -> Option<Self> {
match node.kind() {
- SyntaxKind::Ident(_) => node.cast().map(Self::Pos),
+ SyntaxKind::Ident => node.cast().map(Self::Pos),
SyntaxKind::Named => node.cast().map(Self::Named),
- SyntaxKind::Spread => node.cast_first_child().map(Self::Sink),
+ SyntaxKind::Spread => node.cast_first_match().map(Self::Sink),
_ => Option::None,
}
}
@@ -1370,7 +1467,7 @@ node! {
impl LetBinding {
/// The binding to assign to.
pub fn binding(&self) -> Ident {
- match self.0.cast_first_child() {
+ match self.0.cast_first_match() {
Some(Expr::Ident(binding)) => binding,
Some(Expr::Closure(closure)) => {
closure.name().expect("let-bound closure is missing name")
@@ -1381,12 +1478,12 @@ impl LetBinding {
/// The expression the binding is initialized with.
pub fn init(&self) -> Option<Expr> {
- if self.0.cast_first_child::<Ident>().is_some() {
+ if self.0.cast_first_match::<Ident>().is_some() {
// This is a normal binding like `let x = 1`.
self.0.children().filter_map(SyntaxNode::cast).nth(1)
} else {
// This is a closure binding like `let f(x) = 1`.
- self.0.cast_first_child()
+ self.0.cast_first_match()
}
}
}
@@ -1399,19 +1496,19 @@ node! {
impl SetRule {
/// The function to set style properties for.
pub fn target(&self) -> Ident {
- self.0.cast_first_child().expect("set rule is missing target")
+ self.0.cast_first_match().expect("set rule is missing target")
}
/// The style properties to set.
pub fn args(&self) -> Args {
- self.0.cast_last_child().expect("set rule is missing argument list")
+ self.0.cast_last_match().expect("set rule is missing argument list")
}
/// A condition under which the set rule applies.
pub fn condition(&self) -> Option<Expr> {
self.0
.children()
- .skip_while(|child| child.kind() != &SyntaxKind::If)
+ .skip_while(|child| child.kind() != SyntaxKind::If)
.find_map(SyntaxNode::cast)
}
}
@@ -1427,13 +1524,13 @@ impl ShowRule {
self.0
.children()
.rev()
- .skip_while(|child| child.kind() != &SyntaxKind::Colon)
+ .skip_while(|child| child.kind() != SyntaxKind::Colon)
.find_map(SyntaxNode::cast)
}
/// The transformation recipe.
pub fn transform(&self) -> Expr {
- self.0.cast_last_child().expect("show rule is missing transform")
+ self.0.cast_last_match().expect("show rule is missing transform")
}
}
@@ -1445,7 +1542,7 @@ node! {
impl Conditional {
/// The condition which selects the body to evaluate.
pub fn condition(&self) -> Expr {
- self.0.cast_first_child().expect("conditional is missing condition")
+ self.0.cast_first_match().expect("conditional is missing condition")
}
/// The expression to evaluate if the condition is true.
@@ -1471,12 +1568,12 @@ node! {
impl WhileLoop {
/// The condition which selects whether to evaluate the body.
pub fn condition(&self) -> Expr {
- self.0.cast_first_child().expect("while loop is missing condition")
+ self.0.cast_first_match().expect("while loop is missing condition")
}
/// The expression to evaluate while the condition is true.
pub fn body(&self) -> Expr {
- self.0.cast_last_child().expect("while loop is missing body")
+ self.0.cast_last_match().expect("while loop is missing body")
}
}
@@ -1488,17 +1585,17 @@ node! {
impl ForLoop {
/// The pattern to assign to.
pub fn pattern(&self) -> ForPattern {
- self.0.cast_first_child().expect("for loop is missing pattern")
+ self.0.cast_first_match().expect("for loop is missing pattern")
}
/// The expression to iterate over.
pub fn iter(&self) -> Expr {
- self.0.cast_first_child().expect("for loop is missing iterable")
+ self.0.cast_first_match().expect("for loop is missing iterable")
}
/// The expression to evaluate for each iteration.
pub fn body(&self) -> Expr {
- self.0.cast_last_child().expect("for loop is missing body")
+ self.0.cast_last_match().expect("for loop is missing body")
}
}
@@ -1521,7 +1618,7 @@ impl ForPattern {
/// The value part of the pattern.
pub fn value(&self) -> Ident {
- self.0.cast_last_child().expect("for loop pattern is missing value")
+ self.0.cast_last_match().expect("for loop pattern is missing value")
}
}
@@ -1533,7 +1630,7 @@ node! {
impl ModuleImport {
/// The module or path from which the items should be imported.
pub fn source(&self) -> Expr {
- self.0.cast_last_child().expect("module import is missing source")
+ self.0.cast_last_match().expect("module import is missing source")
}
/// The items to be imported.
@@ -1566,7 +1663,7 @@ node! {
impl ModuleInclude {
/// The module or path from which the content should be included.
pub fn source(&self) -> Expr {
- self.0.cast_last_child().expect("module include is missing path")
+ self.0.cast_last_match().expect("module include is missing path")
}
}
@@ -1588,6 +1685,6 @@ node! {
impl FuncReturn {
/// The expression to return.
pub fn body(&self) -> Option<Expr> {
- self.0.cast_last_child()
+ self.0.cast_last_match()
}
}
diff --git a/src/syntax/kind.rs b/src/syntax/kind.rs
index 55f4b3ad..26e92b93 100644
--- a/src/syntax/kind.rs
+++ b/src/syntax/kind.rs
@@ -1,14 +1,7 @@
-use std::hash::{Hash, Hasher};
-use std::num::NonZeroUsize;
-use std::sync::Arc;
-
-use crate::geom::{AbsUnit, AngleUnit};
-use crate::util::EcoString;
-
/// All syntactical building blocks that can be part of a Typst document.
///
/// Can be created by the lexer or by the parser.
-#[derive(Debug, Clone, PartialEq)]
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub enum SyntaxKind {
/// A line comment: `// ...`.
LineComment,
@@ -58,8 +51,6 @@ pub enum SyntaxKind {
Slash,
/// The superscript operator in a formula: `^`.
Hat,
- /// The alignment operator in a formula: `&`.
- Amp,
/// The field access and method call operator: `.`.
Dot,
/// The assignment operator: `=`.
@@ -135,31 +126,31 @@ pub enum SyntaxKind {
/// so it is zero except inside indent-aware constructs like lists.
Markup { min_indent: usize },
/// Plain text without markup.
- Text(EcoString),
+ Text,
/// A forced line break: `\`.
Linebreak,
/// An escape sequence: `\#`, `\u{1F5FA}`.
- Escape(char),
+ Escape,
/// A shorthand for a unicode codepoint. For example, `~` for non-breaking
/// space or `-?` for a soft hyphen.
- Shorthand(char),
+ Shorthand,
/// Symbol notation: `:arrow:l:`. The string only contains the inner part
/// without leading and trailing dot.
- Symbol(EcoString),
+ Symbol,
/// A smart quote: `'` or `"`.
- SmartQuote { double: bool },
+ SmartQuote,
/// Strong content: `*Strong*`.
Strong,
/// Emphasized content: `_Emphasized_`.
Emph,
/// Raw text with optional syntax highlighting: `` `...` ``.
- Raw(Arc<RawFields>),
+ Raw { column: usize },
/// A hyperlink: `https://typst.org`.
- Link(EcoString),
+ Link,
/// A label: `<intro>`.
- Label(EcoString),
+ Label,
/// A reference: `@target`.
- Ref(EcoString),
+ Ref,
/// A section heading: `= Introduction`.
Heading,
/// An item in a bullet list: `- ...`.
@@ -167,32 +158,32 @@ pub enum SyntaxKind {
/// An item in an enumeration (numbered list): `+ ...` or `1. ...`.
EnumItem,
/// An explicit enumeration numbering: `23.`.
- EnumNumbering(NonZeroUsize),
+ EnumNumbering,
/// An item in a term list: `/ Term: Details`.
TermItem,
/// A mathematical formula: `$x$`, `$ x^2 $`.
Math,
/// An atom in a formula: `x`, `+`, `12`.
- Atom(EcoString),
+ Atom,
/// A base with optional sub- and superscripts in a formula: `a_1^2`.
Script,
/// A fraction in a formula: `x/2`.
Frac,
- /// An alignment point in a formula: `&`, `&&`.
+ /// An alignment point in a formula: `&`.
AlignPoint,
/// An identifier: `it`.
- Ident(EcoString),
+ Ident,
/// A boolean: `true`, `false`.
- Bool(bool),
+ Bool,
/// An integer: `120`.
- Int(i64),
+ Int,
/// A floating-point number: `1.2`, `10e-4`.
- Float(f64),
+ Float,
/// A numeric value with a unit: `12pt`, `3cm`, `2em`, `90deg`, `50%`.
- Numeric(f64, Unit),
+ Numeric,
/// A quoted string: `"..."`.
- Str(EcoString),
+ Str,
/// A code block: `{ let x = 1; x + 2 }`.
CodeBlock,
/// A content block: `[*Hi* there!]`.
@@ -253,73 +244,37 @@ pub enum SyntaxKind {
FuncReturn,
/// An invalid sequence of characters.
- Error(ErrorPos, EcoString),
-}
-
-/// Fields of the raw syntax kind.
-#[derive(Debug, Clone, PartialEq, Hash)]
-pub struct RawFields {
- /// An optional identifier specifying the language to syntax-highlight in.
- pub lang: Option<EcoString>,
- /// The raw text, determined as the raw string between the backticks trimmed
- /// according to the above rules.
- pub text: EcoString,
- /// Whether the element is block-level, that is, it has 3+ backticks
- /// and contains at least one newline.
- pub block: bool,
-}
-
-/// Unit of a numeric value.
-#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
-pub enum Unit {
- /// An absolute length unit.
- Length(AbsUnit),
- /// An angular unit.
- Angle(AngleUnit),
- /// Font-relative: `1em` is the same as the font size.
- Em,
- /// Fractions: `fr`.
- Fr,
- /// Percentage: `%`.
- Percent,
-}
-
-/// Where in a node an error should be annotated,
-#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
-pub enum ErrorPos {
- /// Over the full width of the node.
- Full,
- /// At the start of the node.
- Start,
- /// At the end of the node.
- End,
+ Error,
}
impl SyntaxKind {
/// Whether this is trivia.
- pub fn is_trivia(&self) -> bool {
- self.is_space()
- || self.is_error()
- || matches!(self, Self::LineComment | Self::BlockComment)
+ pub fn is_trivia(self) -> bool {
+ self.is_space() || self.is_comment() || self.is_error()
}
/// Whether this is a space.
- pub fn is_space(&self) -> bool {
+ pub fn is_space(self) -> bool {
matches!(self, Self::Space { .. })
}
- /// Whether this is a left or right parenthesis.
- pub fn is_paren(&self) -> bool {
- matches!(self, Self::LeftParen | Self::RightParen)
+ /// Whether this is a comment.
+ pub fn is_comment(self) -> bool {
+ matches!(self, Self::LineComment | Self::BlockComment)
}
/// Whether this is an error.
- pub fn is_error(&self) -> bool {
- matches!(self, SyntaxKind::Error(_, _))
+ pub fn is_error(self) -> bool {
+ matches!(self, SyntaxKind::Error)
+ }
+
+ /// Whether this is a left or right parenthesis.
+ pub fn is_paren(self) -> bool {
+ matches!(self, Self::LeftParen | Self::RightParen)
}
/// Does this node need termination through a semicolon or linebreak?
- pub fn is_stmt(&self) -> bool {
+ pub fn is_stmt(self) -> bool {
matches!(
self,
SyntaxKind::LetBinding
@@ -331,7 +286,7 @@ impl SyntaxKind {
}
/// A human-readable name for the kind.
- pub fn name(&self) -> &'static str {
+ pub fn name(self) -> &'static str {
match self {
Self::LineComment => "line comment",
Self::BlockComment => "block comment",
@@ -348,13 +303,11 @@ impl SyntaxKind {
Self::Star => "star",
Self::Underscore => "underscore",
Self::Dollar => "dollar sign",
- Self::SmartQuote { double: false } => "single quote",
- Self::SmartQuote { double: true } => "double quote",
+ Self::SmartQuote => "smart quote",
Self::Plus => "plus",
Self::Minus => "minus",
Self::Slash => "slash",
Self::Hat => "hat",
- Self::Amp => "ampersand",
Self::Dot => "dot",
Self::Eq => "assignment operator",
Self::EqEq => "equality operator",
@@ -389,41 +342,33 @@ impl SyntaxKind {
Self::Include => "keyword `include`",
Self::As => "keyword `as`",
Self::Markup { .. } => "markup",
- Self::Text(_) => "text",
+ Self::Text => "text",
Self::Linebreak => "linebreak",
- Self::Escape(_) => "escape sequence",
- Self::Shorthand(_) => "shorthand",
- Self::Symbol(_) => "symbol notation",
+ Self::Escape => "escape sequence",
+ Self::Shorthand => "shorthand",
+ Self::Symbol => "symbol notation",
Self::Strong => "strong content",
Self::Emph => "emphasized content",
- Self::Raw(_) => "raw block",
- Self::Link(_) => "link",
- Self::Label(_) => "label",
- Self::Ref(_) => "reference",
+ Self::Raw { .. } => "raw block",
+ Self::Link => "link",
+ Self::Label => "label",
+ Self::Ref => "reference",
Self::Heading => "heading",
Self::ListItem => "list item",
Self::EnumItem => "enumeration item",
- Self::EnumNumbering(_) => "enumeration item numbering",
+ Self::EnumNumbering => "enumeration item numbering",
Self::TermItem => "term list item",
Self::Math => "math formula",
- Self::Atom(s) => match s.as_str() {
- "(" => "opening paren",
- ")" => "closing paren",
- "{" => "opening brace",
- "}" => "closing brace",
- "[" => "opening bracket",
- "]" => "closing bracket",
- _ => "math atom",
- },
+ Self::Atom => "math atom",
Self::Script => "script",
Self::Frac => "fraction",
Self::AlignPoint => "alignment point",
- Self::Ident(_) => "identifier",
- Self::Bool(_) => "boolean",
- Self::Int(_) => "integer",
- Self::Float(_) => "float",
- Self::Numeric(_, _) => "numeric value",
- Self::Str(_) => "string",
+ Self::Ident => "identifier",
+ Self::Bool => "boolean",
+ Self::Int => "integer",
+ Self::Float => "float",
+ Self::Numeric => "numeric value",
+ Self::Str => "string",
Self::CodeBlock => "code block",
Self::ContentBlock => "content block",
Self::Parenthesized => "group",
@@ -453,127 +398,7 @@ impl SyntaxKind {
Self::LoopBreak => "`break` expression",
Self::LoopContinue => "`continue` expression",
Self::FuncReturn => "`return` expression",
- Self::Error(_, _) => "syntax error",
- }
- }
-}
-
-impl Hash for SyntaxKind {
- fn hash<H: Hasher>(&self, state: &mut H) {
- std::mem::discriminant(self).hash(state);
- match self {
- Self::LineComment => {}
- Self::BlockComment => {}
- Self::Space { newlines } => newlines.hash(state),
- Self::LeftBrace => {}
- Self::RightBrace => {}
- Self::LeftBracket => {}
- Self::RightBracket => {}
- Self::LeftParen => {}
- Self::RightParen => {}
- Self::Comma => {}
- Self::Semicolon => {}
- Self::Colon => {}
- Self::Star => {}
- Self::Underscore => {}
- Self::Dollar => {}
- Self::Plus => {}
- Self::Minus => {}
- Self::Slash => {}
- Self::Hat => {}
- Self::Amp => {}
- Self::Dot => {}
- Self::Eq => {}
- Self::EqEq => {}
- Self::ExclEq => {}
- Self::Lt => {}
- Self::LtEq => {}
- Self::Gt => {}
- Self::GtEq => {}
- Self::PlusEq => {}
- Self::HyphEq => {}
- Self::StarEq => {}
- Self::SlashEq => {}
- Self::Dots => {}
- Self::Arrow => {}
- Self::Not => {}
- Self::And => {}
- Self::Or => {}
- Self::None => {}
- Self::Auto => {}
- Self::Let => {}
- Self::Set => {}
- Self::Show => {}
- Self::If => {}
- Self::Else => {}
- Self::For => {}
- Self::In => {}
- Self::While => {}
- Self::Break => {}
- Self::Continue => {}
- Self::Return => {}
- Self::Import => {}
- Self::Include => {}
- Self::As => {}
- Self::Markup { min_indent } => min_indent.hash(state),
- Self::Text(s) => s.hash(state),
- Self::Linebreak => {}
- Self::Escape(c) => c.hash(state),
- Self::Shorthand(c) => c.hash(state),
- Self::Symbol(s) => s.hash(state),
- Self::SmartQuote { double } => double.hash(state),
- Self::Strong => {}
- Self::Emph => {}
- Self::Raw(raw) => raw.hash(state),
- Self::Link(link) => link.hash(state),
- Self::Label(c) => c.hash(state),
- Self::Ref(c) => c.hash(state),
- Self::Heading => {}
- Self::ListItem => {}
- Self::EnumItem => {}
- Self::EnumNumbering(num) => num.hash(state),
- Self::TermItem => {}
- Self::Math => {}
- Self::Atom(c) => c.hash(state),
- Self::Script => {}
- Self::Frac => {}
- Self::AlignPoint => {}
- Self::Ident(v) => v.hash(state),
- Self::Bool(v) => v.hash(state),
- Self::Int(v) => v.hash(state),
- Self::Float(v) => v.to_bits().hash(state),
- Self::Numeric(v, u) => (v.to_bits(), u).hash(state),
- Self::Str(v) => v.hash(state),
- Self::CodeBlock => {}
- Self::ContentBlock => {}
- Self::Parenthesized => {}
- Self::Array => {}
- Self::Dict => {}
- Self::Named => {}
- Self::Keyed => {}
- Self::Unary => {}
- Self::Binary => {}
- Self::FieldAccess => {}
- Self::FuncCall => {}
- Self::MethodCall => {}
- Self::Args => {}
- Self::Spread => {}
- Self::Closure => {}
- Self::Params => {}
- Self::LetBinding => {}
- Self::SetRule => {}
- Self::ShowRule => {}
- Self::Conditional => {}
- Self::WhileLoop => {}
- Self::ForLoop => {}
- Self::ForPattern => {}
- Self::ModuleImport => {}
- Self::ImportItems => {}
- Self::ModuleInclude => {}
- Self::LoopBreak => {}
- Self::LoopContinue => {}
- Self::FuncReturn => {}
- Self::Error(pos, msg) => (pos, msg).hash(state),
+ Self::Error => "syntax error",
}
}
}
diff --git a/src/syntax/lexer.rs b/src/syntax/lexer.rs
index d5476774..f082bd28 100644
--- a/src/syntax/lexer.rs
+++ b/src/syntax/lexer.rs
@@ -1,17 +1,12 @@
-use std::num::NonZeroUsize;
-use std::sync::Arc;
-
use unicode_xid::UnicodeXID;
use unscanny::Scanner;
-use super::resolve::{resolve_hex, resolve_raw, resolve_string};
-use super::{ErrorPos, RawFields, SyntaxKind, Unit};
-use crate::geom::{AbsUnit, AngleUnit};
+use super::{ErrorPos, SyntaxKind};
use crate::util::{format_eco, EcoString};
/// Splits up a string of source code into tokens.
#[derive(Clone)]
-pub struct Lexer<'s> {
+pub(super) struct Lexer<'s> {
/// The underlying scanner.
s: Scanner<'s>,
/// The mode the lexer is in. This determines what tokens it recognizes.
@@ -20,11 +15,13 @@ pub struct Lexer<'s> {
terminated: bool,
/// Offsets the indentation on the first line of the source.
column_offset: usize,
+ /// An error for the last token.
+ error: Option<(EcoString, ErrorPos)>,
}
/// What kind of tokens to emit.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum LexMode {
+pub(super) enum LexMode {
/// Text and markup.
Markup,
/// Math atoms, operators, etc.
@@ -34,11 +31,6 @@ pub enum LexMode {
}
impl<'s> Lexer<'s> {
- /// Create a new lexer with the given mode.
- pub fn new(text: &'s str, mode: LexMode) -> Self {
- Self::with_prefix("", text, mode)
- }
-
/// Create a new lexer with the given mode and a prefix to offset column
/// calculations.
pub fn with_prefix(prefix: &str, text: &'s str, mode: LexMode) -> Self {
@@ -47,6 +39,7 @@ impl<'s> Lexer<'s> {
mode,
terminated: true,
column_offset: column(prefix, prefix.len(), 0),
+ error: None,
}
}
@@ -85,6 +78,23 @@ impl<'s> Lexer<'s> {
pub fn column(&self, index: usize) -> usize {
column(self.s.string(), index, self.column_offset)
}
+
+ /// Take out the last error.
+ pub fn last_error(&mut self) -> Option<(EcoString, ErrorPos)> {
+ self.error.take()
+ }
+
+ /// Construct a full-positioned syntax error.
+ fn error(&mut self, message: impl Into<EcoString>) -> SyntaxKind {
+ self.error = Some((message.into(), ErrorPos::Full));
+ SyntaxKind::Error
+ }
+
+ /// Construct a positioned syntax error.
+ fn error_at_end(&mut self, message: impl Into<EcoString>) -> SyntaxKind {
+ self.error = Some((message.into(), ErrorPos::End));
+ SyntaxKind::Error
+ }
}
impl Iterator for Lexer<'_> {
@@ -92,22 +102,20 @@ impl Iterator for Lexer<'_> {
/// Produce the next token.
fn next(&mut self) -> Option<Self::Item> {
+ self.error = None;
let start = self.s.cursor();
let c = self.s.eat()?;
Some(match c {
// Trivia.
+ c if c.is_whitespace() => self.whitespace(c),
'/' if self.s.eat_if('/') => self.line_comment(),
'/' if self.s.eat_if('*') => self.block_comment(),
- '*' if self.s.eat_if('/') => SyntaxKind::Error(
- ErrorPos::Full,
- "unexpected end of block comment".into(),
- ),
- c if c.is_whitespace() => self.whitespace(c),
+ '*' if self.s.eat_if('/') => self.error("unexpected end of block comment"),
// Other things.
_ => match self.mode {
LexMode::Markup => self.markup(start, c),
- LexMode::Math => self.math(start, c),
+ LexMode::Math => self.math(c),
LexMode::Code => self.code(start, c),
},
})
@@ -118,7 +126,7 @@ impl Iterator for Lexer<'_> {
impl Lexer<'_> {
fn line_comment(&mut self) -> SyntaxKind {
self.s.eat_until(is_newline);
- if self.s.peek().is_none() {
+ if self.s.done() {
self.terminated = false;
}
SyntaxKind::LineComment
@@ -182,57 +190,64 @@ impl Lexer<'_> {
}
}
+/// Markup.
impl Lexer<'_> {
fn markup(&mut self, start: usize, c: char) -> SyntaxKind {
match c {
- // Blocks.
- '{' => SyntaxKind::LeftBrace,
- '}' => SyntaxKind::RightBrace,
- '[' => SyntaxKind::LeftBracket,
- ']' => SyntaxKind::RightBracket,
-
- // Multi-char things.
- '#' => self.hash(start),
- '.' if self.s.eat_if("..") => SyntaxKind::Shorthand('\u{2026}'),
- '-' => self.hyph(),
- ':' => self.colon(),
- 'h' if self.s.eat_if("ttp://") || self.s.eat_if("ttps://") => {
- self.link(start)
- }
+ '\\' => self.backslash(),
+ ':' if self.s.at(is_id_start) => self.maybe_symbol(),
'`' => self.raw(),
- c if c.is_ascii_digit() => self.numbering(start),
+ 'h' if self.s.eat_if("ttp://") => self.link(),
+ 'h' if self.s.eat_if("ttps://") => self.link(),
'<' if self.s.at(is_id_continue) => self.label(),
'@' if self.s.at(is_id_continue) => self.reference(),
+ '0'..='9' => self.numbering(start),
+ '#' if self.s.eat_if('{') => SyntaxKind::LeftBrace,
+ '#' if self.s.eat_if('[') => SyntaxKind::LeftBracket,
+ '#' if self.s.at(is_id_start) => {
+ match keyword(self.s.eat_while(is_id_continue)) {
+ Some(keyword) => keyword,
+ None => SyntaxKind::Ident,
+ }
+ }
- // Escape sequences.
- '\\' => self.backslash(),
-
- // Single-char things.
- '~' => SyntaxKind::Shorthand('\u{00A0}'),
- '\'' => SyntaxKind::SmartQuote { double: false },
- '"' => SyntaxKind::SmartQuote { double: true },
+ '.' if self.s.eat_if("..") => SyntaxKind::Shorthand,
+ '-' if self.s.eat_if("--") => SyntaxKind::Shorthand,
+ '-' if self.s.eat_if('-') => SyntaxKind::Shorthand,
+ '-' if self.s.eat_if('?') => SyntaxKind::Shorthand,
'*' if !self.in_word() => SyntaxKind::Star,
'_' if !self.in_word() => SyntaxKind::Underscore,
+
+ '{' => SyntaxKind::LeftBrace,
+ '}' => SyntaxKind::RightBrace,
+ '[' => SyntaxKind::LeftBracket,
+ ']' => SyntaxKind::RightBracket,
+ '\'' => SyntaxKind::SmartQuote,
+ '"' => SyntaxKind::SmartQuote,
'$' => SyntaxKind::Dollar,
'=' => SyntaxKind::Eq,
'+' => SyntaxKind::Plus,
'/' => SyntaxKind::Slash,
+ '~' => SyntaxKind::Shorthand,
+ ':' => SyntaxKind::Colon,
+ '-' => SyntaxKind::Minus,
- // Plain text.
- _ => self.text(start),
+ _ => self.text(),
}
}
- fn text(&mut self, start: usize) -> SyntaxKind {
+ fn text(&mut self) -> SyntaxKind {
macro_rules! table {
- ($(|$c:literal)*) => {{
- let mut t = [false; 128];
- $(t[$c as usize] = true;)*
- t
- }}
+ ($(|$c:literal)*) => {
+ static TABLE: [bool; 128] = {
+ let mut t = [false; 128];
+ $(t[$c as usize] = true;)*
+ t
+ };
+ };
}
- const TABLE: [bool; 128] = table! {
+ table! {
| ' ' | '\t' | '\n' | '\x0b' | '\x0c' | '\r' | '\\' | '/'
| '[' | ']' | '{' | '}' | '~' | '-' | '.' | '\'' | '"'
| '*' | '_' | ':' | 'h' | '`' | '$' | '<' | '>' | '@' | '#'
@@ -247,8 +262,8 @@ impl Lexer<'_> {
// anyway.
let mut s = self.s;
match s.eat() {
- Some('/') if !s.at(['/', '*']) => {}
Some(' ') if s.at(char::is_alphanumeric) => {}
+ Some('/') if !s.at(['/', '*']) => {}
Some('-') if !s.at(['-', '?']) => {}
Some('.') if !s.at("..") => {}
Some('h') if !s.at("ttp://") && !s.at("ttps://") => {}
@@ -259,77 +274,40 @@ impl Lexer<'_> {
self.s = s;
}
- SyntaxKind::Text(self.s.from(start).into())
+ SyntaxKind::Text
}
fn backslash(&mut self) -> SyntaxKind {
- match self.s.peek() {
- Some('u') if self.s.eat_if("u{") => {
- let sequence = self.s.eat_while(char::is_ascii_alphanumeric);
- if self.s.eat_if('}') {
- if let Some(c) = resolve_hex(sequence) {
- SyntaxKind::Escape(c)
- } else {
- SyntaxKind::Error(
- ErrorPos::Full,
- "invalid unicode escape sequence".into(),
- )
- }
- } else {
- self.terminated = false;
- SyntaxKind::Error(ErrorPos::End, "expected closing brace".into())
- }
+ if self.s.eat_if("u{") {
+ let hex = self.s.eat_while(char::is_ascii_alphanumeric);
+ if !self.s.eat_if('}') {
+ self.terminated = false;
+ return self.error_at_end("expected closing brace");
}
- // Linebreaks.
- Some(c) if c.is_whitespace() => SyntaxKind::Linebreak,
- None => SyntaxKind::Linebreak,
-
- // Escapes.
- Some(c) => {
- self.s.expect(c);
- SyntaxKind::Escape(c)
+ if u32::from_str_radix(hex, 16)
+ .ok()
+ .and_then(std::char::from_u32)
+ .is_none()
+ {
+ return self.error("invalid unicode escape sequence");
}
- }
- }
- fn hash(&mut self, start: usize) -> SyntaxKind {
- if self.s.eat_if('{') {
- SyntaxKind::LeftBrace
- } else if self.s.eat_if('[') {
- SyntaxKind::LeftBracket
- } else if self.s.at(is_id_start) {
- let read = self.s.eat_while(is_id_continue);
- match keyword(read) {
- Some(keyword) => keyword,
- None => SyntaxKind::Ident(read.into()),
- }
- } else if self.mode == LexMode::Markup {
- self.text(start)
- } else {
- SyntaxKind::Atom("#".into())
+ return SyntaxKind::Escape;
}
- }
- fn hyph(&mut self) -> SyntaxKind {
- if self.s.eat_if('-') {
- if self.s.eat_if('-') {
- SyntaxKind::Shorthand('\u{2014}')
- } else {
- SyntaxKind::Shorthand('\u{2013}')
- }
- } else if self.s.eat_if('?') {
- SyntaxKind::Shorthand('\u{00AD}')
+ if self.s.done() || self.s.at(char::is_whitespace) {
+ SyntaxKind::Linebreak
} else {
- SyntaxKind::Minus
+ self.s.eat();
+ SyntaxKind::Escape
}
}
- fn colon(&mut self) -> SyntaxKind {
+ fn maybe_symbol(&mut self) -> SyntaxKind {
let start = self.s.cursor();
let mut end = start;
- while !self.s.eat_while(char::is_ascii_alphanumeric).is_empty() && self.s.at(':')
- {
+ while !self.s.eat_while(is_id_continue).is_empty() && self.s.at(':') {
end = self.s.cursor();
self.s.eat();
}
@@ -338,15 +316,15 @@ impl Lexer<'_> {
if start < end {
self.s.expect(':');
- SyntaxKind::Symbol(self.s.get(start..end).into())
+ SyntaxKind::Symbol
} else if self.mode == LexMode::Markup {
SyntaxKind::Colon
} else {
- SyntaxKind::Atom(":".into())
+ SyntaxKind::Atom
}
}
- fn link(&mut self, start: usize) -> SyntaxKind {
+ fn link(&mut self) -> SyntaxKind {
#[rustfmt::skip]
self.s.eat_while(|c: char| matches!(c,
| '0' ..= '9'
@@ -355,10 +333,12 @@ impl Lexer<'_> {
| '~' | '/' | '%' | '?' | '#' | '&' | '+' | '='
| '\'' | '.' | ',' | ';'
));
+
if self.s.scout(-1) == Some('.') {
self.s.uneat();
}
- SyntaxKind::Link(self.s.from(start).into())
+
+ SyntaxKind::Link
}
fn raw(&mut self) -> SyntaxKind {
@@ -369,16 +349,10 @@ impl Lexer<'_> {
backticks += 1;
}
- // Special case for empty inline block.
if backticks == 2 {
- return SyntaxKind::Raw(Arc::new(RawFields {
- text: EcoString::new(),
- lang: None,
- block: false,
- }));
+ return SyntaxKind::Raw { column };
}
- let start = self.s.cursor();
let mut found = 0;
while found < backticks {
match self.s.eat() {
@@ -388,45 +362,40 @@ impl Lexer<'_> {
}
}
- if found == backticks {
- let end = self.s.cursor() - found as usize;
- SyntaxKind::Raw(Arc::new(resolve_raw(
- column,
- backticks,
- self.s.get(start..end),
- )))
- } else {
+ if found != backticks {
self.terminated = false;
let remaining = backticks - found;
let noun = if remaining == 1 { "backtick" } else { "backticks" };
- SyntaxKind::Error(
- ErrorPos::End,
- if found == 0 {
- format_eco!("expected {} {}", remaining, noun)
- } else {
- format_eco!("expected {} more {}", remaining, noun)
- },
- )
+ return self.error_at_end(if found == 0 {
+ format_eco!("expected {} {}", remaining, noun)
+ } else {
+ format_eco!("expected {} more {}", remaining, noun)
+ });
}
+
+ SyntaxKind::Raw { column }
}
fn numbering(&mut self, start: usize) -> SyntaxKind {
self.s.eat_while(char::is_ascii_digit);
+
let read = self.s.from(start);
if self.s.eat_if('.') {
if let Ok(number) = read.parse::<usize>() {
- return match NonZeroUsize::new(number) {
- Some(number) => SyntaxKind::EnumNumbering(number),
- None => SyntaxKind::Error(ErrorPos::Full, "must be positive".into()),
- };
+ if number == 0 {
+ return self.error("must be positive");
+ }
+
+ return SyntaxKind::EnumNumbering;
}
}
- self.text(start)
+ self.text()
}
fn reference(&mut self) -> SyntaxKind {
- SyntaxKind::Ref(self.s.eat_while(is_id_continue).into())
+ self.s.eat_while(is_id_continue);
+ SyntaxKind::Ref
}
fn in_word(&self) -> bool {
@@ -439,70 +408,70 @@ impl Lexer<'_> {
/// Math.
impl Lexer<'_> {
- fn math(&mut self, start: usize, c: char) -> SyntaxKind {
+ fn math(&mut self, c: char) -> SyntaxKind {
match c {
- // Symbol shorthands.
- '|' if self.s.eat_if("->") => SyntaxKind::Shorthand('\u{21A6}'),
- '<' if self.s.eat_if("->") => SyntaxKind::Shorthand('\u{2194}'),
- '<' if self.s.eat_if("=>") => SyntaxKind::Shorthand('\u{21D4}'),
- '!' if self.s.eat_if('=') => SyntaxKind::Shorthand('\u{2260}'),
- '<' if self.s.eat_if('=') => SyntaxKind::Shorthand('\u{2264}'),
- '>' if self.s.eat_if('=') => SyntaxKind::Shorthand('\u{2265}'),
- '<' if self.s.eat_if('-') => SyntaxKind::Shorthand('\u{2190}'),
- '-' if self.s.eat_if('>') => SyntaxKind::Shorthand('\u{2192}'),
- '=' if self.s.eat_if('>') => SyntaxKind::Shorthand('\u{21D2}'),
- ':' if self.s.eat_if('=') => SyntaxKind::Shorthand('\u{2254}'),
-
- // Multi-char things.
- '#' => self.hash(start),
-
- // Escape sequences.
'\\' => self.backslash(),
+ ':' if self.s.at(is_id_start) => self.maybe_symbol(),
+ '"' => self.string(),
+ '#' if self.s.eat_if('{') => SyntaxKind::LeftBrace,
+ '#' if self.s.eat_if('[') => SyntaxKind::LeftBracket,
+ '#' if self.s.at(is_id_start) => {
+ match keyword(self.s.eat_while(is_id_continue)) {
+ Some(keyword) => keyword,
+ None => SyntaxKind::Ident,
+ }
+ }
+
+ '|' if self.s.eat_if("->") => SyntaxKind::Shorthand,
+ '<' if self.s.eat_if("->") => SyntaxKind::Shorthand,
+ '<' if self.s.eat_if("=>") => SyntaxKind::Shorthand,
+ '!' if self.s.eat_if('=') => SyntaxKind::Shorthand,
+ '<' if self.s.eat_if('=') => SyntaxKind::Shorthand,
+ '>' if self.s.eat_if('=') => SyntaxKind::Shorthand,
+ '<' if self.s.eat_if('-') => SyntaxKind::Shorthand,
+ '-' if self.s.eat_if('>') => SyntaxKind::Shorthand,
+ '=' if self.s.eat_if('>') => SyntaxKind::Shorthand,
+ ':' if self.s.eat_if('=') => SyntaxKind::Shorthand,
- // Single-char things.
'_' => SyntaxKind::Underscore,
- '^' => SyntaxKind::Hat,
- '/' => SyntaxKind::Slash,
- '&' => SyntaxKind::Amp,
'$' => SyntaxKind::Dollar,
-
- // Symbol notation.
- ':' => self.colon(),
-
- // Strings.
- '"' => self.string(),
+ '/' => SyntaxKind::Slash,
+ '^' => SyntaxKind::Hat,
+ '&' => SyntaxKind::AlignPoint,
// Identifiers and symbol notation.
c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
- self.s.eat_while(is_math_id_continue);
+ self.math_ident()
+ }
- let mut symbol = false;
- while self.s.eat_if(':')
- && !self.s.eat_while(char::is_alphanumeric).is_empty()
- {
- symbol = true;
+ // Other math atoms.
+ _ => {
+ // Keep numbers together.
+ if c.is_numeric() {
+ self.s.eat_while(char::is_numeric);
}
+ SyntaxKind::Atom
+ }
+ }
+ }
- if symbol {
- SyntaxKind::Symbol(self.s.from(start).into())
- } else {
- if self.s.scout(-1) == Some(':') {
- self.s.uneat();
- }
+ fn math_ident(&mut self) -> SyntaxKind {
+ self.s.eat_while(is_math_id_continue);
- SyntaxKind::Ident(self.s.from(start).into())
- }
- }
+ let mut symbol = false;
+ while self.s.eat_if(':') && !self.s.eat_while(char::is_alphanumeric).is_empty() {
+ symbol = true;
+ }
- // Numbers.
- c if c.is_numeric() => {
- self.s.eat_while(char::is_numeric);
- SyntaxKind::Atom(self.s.from(start).into())
- }
+ if symbol {
+ return SyntaxKind::Symbol;
+ }
- // Other math atoms.
- c => SyntaxKind::Atom(c.into()),
+ if self.s.scout(-1) == Some(':') {
+ self.s.uneat();
}
+
+ SyntaxKind::Ident
}
}
@@ -510,24 +479,12 @@ impl Lexer<'_> {
impl Lexer<'_> {
fn code(&mut self, start: usize, c: char) -> SyntaxKind {
match c {
- // Blocks.
- '{' => SyntaxKind::LeftBrace,
- '}' => SyntaxKind::RightBrace,
- '[' => SyntaxKind::LeftBracket,
- ']' => SyntaxKind::RightBracket,
-
- // Parentheses.
- '(' => SyntaxKind::LeftParen,
- ')' => SyntaxKind::RightParen,
-
- // Math.
- '$' => SyntaxKind::Dollar,
-
- // Labels and raw.
- '<' if self.s.at(is_id_continue) => self.label(),
'`' => self.raw(),
+ '<' if self.s.at(is_id_continue) => self.label(),
+ '0'..='9' => self.number(start, c),
+ '.' if self.s.at(char::is_ascii_digit) => self.number(start, c),
+ '"' => self.string(),
- // Two-char operators.
'=' if self.s.eat_if('=') => SyntaxKind::EqEq,
'!' if self.s.eat_if('=') => SyntaxKind::ExclEq,
'<' if self.s.eat_if('=') => SyntaxKind::LtEq,
@@ -539,10 +496,17 @@ impl Lexer<'_> {
'.' if self.s.eat_if('.') => SyntaxKind::Dots,
'=' if self.s.eat_if('>') => SyntaxKind::Arrow,
- // Single-char operators.
+ '{' => SyntaxKind::LeftBrace,
+ '}' => SyntaxKind::RightBrace,
+ '[' => SyntaxKind::LeftBracket,
+ ']' => SyntaxKind::RightBracket,
+ '(' => SyntaxKind::LeftParen,
+ ')' => SyntaxKind::RightParen,
+ '$' => SyntaxKind::Dollar,
',' => SyntaxKind::Comma,
';' => SyntaxKind::Semicolon,
':' => SyntaxKind::Colon,
+ '.' => SyntaxKind::Dot,
'+' => SyntaxKind::Plus,
'-' => SyntaxKind::Minus,
'*' => SyntaxKind::Star,
@@ -550,21 +514,10 @@ impl Lexer<'_> {
'=' => SyntaxKind::Eq,
'<' => SyntaxKind::Lt,
'>' => SyntaxKind::Gt,
- '.' if !self.s.at(char::is_ascii_digit) => SyntaxKind::Dot,
- // Identifiers.
c if is_id_start(c) => self.ident(start),
- // Numbers.
- c if c.is_ascii_digit() || (c == '.' && self.s.at(char::is_ascii_digit)) => {
- self.number(start, c)
- }
-
- // Strings.
- '"' => self.string(),
-
- // Invalid token.
- _ => SyntaxKind::Error(ErrorPos::Full, "not valid here".into()),
+ _ => self.error("not valid here"),
}
}
@@ -573,9 +526,9 @@ impl Lexer<'_> {
match self.s.from(start) {
"none" => SyntaxKind::None,
"auto" => SyntaxKind::Auto,
- "true" => SyntaxKind::Bool(true),
- "false" => SyntaxKind::Bool(false),
- id => keyword(id).unwrap_or_else(|| SyntaxKind::Ident(id.into())),
+ "true" => SyntaxKind::Bool,
+ "false" => SyntaxKind::Bool,
+ id => keyword(id).unwrap_or(SyntaxKind::Ident),
}
}
@@ -604,64 +557,54 @@ impl Lexer<'_> {
let number = self.s.get(start..suffix_start);
let suffix = self.s.from(suffix_start);
- // Find out whether it is a simple number.
if suffix.is_empty() {
- if let Ok(i) = number.parse::<i64>() {
- return SyntaxKind::Int(i);
- }
+ return if number.parse::<i64>().is_ok() {
+ SyntaxKind::Int
+ } else if number.parse::<f64>().is_ok() {
+ SyntaxKind::Float
+ } else {
+ self.error("invalid number")
+ };
}
- let Ok(v) = number.parse::<f64>() else {
- return SyntaxKind::Error(ErrorPos::Full, "invalid number".into());
- };
-
- match suffix {
- "" => SyntaxKind::Float(v),
- "pt" => SyntaxKind::Numeric(v, Unit::Length(AbsUnit::Pt)),
- "mm" => SyntaxKind::Numeric(v, Unit::Length(AbsUnit::Mm)),
- "cm" => SyntaxKind::Numeric(v, Unit::Length(AbsUnit::Cm)),
- "in" => SyntaxKind::Numeric(v, Unit::Length(AbsUnit::In)),
- "deg" => SyntaxKind::Numeric(v, Unit::Angle(AngleUnit::Deg)),
- "rad" => SyntaxKind::Numeric(v, Unit::Angle(AngleUnit::Rad)),
- "em" => SyntaxKind::Numeric(v, Unit::Em),
- "fr" => SyntaxKind::Numeric(v, Unit::Fr),
- "%" => SyntaxKind::Numeric(v, Unit::Percent),
- _ => SyntaxKind::Error(ErrorPos::Full, "invalid number suffix".into()),
+ if !matches!(
+ suffix,
+ "pt" | "mm" | "cm" | "in" | "deg" | "rad" | "em" | "fr" | "%"
+ ) {
+ return self.error("invalid number suffix");
}
+
+ SyntaxKind::Numeric
}
fn string(&mut self) -> SyntaxKind {
let mut escaped = false;
- let verbatim = self.s.eat_until(|c| {
- if c == '"' && !escaped {
- true
- } else {
- escaped = c == '\\' && !escaped;
- false
- }
+ self.s.eat_until(|c| {
+ let stop = c == '"' && !escaped;
+ escaped = c == '\\' && !escaped;
+ stop
});
- let string = resolve_string(verbatim);
- if self.s.eat_if('"') {
- SyntaxKind::Str(string)
- } else {
+ if !self.s.eat_if('"') {
self.terminated = false;
- SyntaxKind::Error(ErrorPos::End, "expected quote".into())
+ return self.error_at_end("expected quote");
}
+
+ SyntaxKind::Str
}
fn label(&mut self) -> SyntaxKind {
let label = self.s.eat_while(is_id_continue);
- if self.s.eat_if('>') {
- if !label.is_empty() {
- SyntaxKind::Label(label.into())
- } else {
- SyntaxKind::Error(ErrorPos::Full, "label cannot be empty".into())
- }
- } else {
+ if label.is_empty() {
+ return self.error("label cannot be empty");
+ }
+
+ if !self.s.eat_if('>') {
self.terminated = false;
- SyntaxKind::Error(ErrorPos::End, "expected closing angle bracket".into())
+ return self.error_at_end("expected closing angle bracket");
}
+
+ SyntaxKind::Label
}
}
@@ -729,6 +672,29 @@ pub fn is_newline(character: char) -> bool {
)
}
+/// Split text at newlines.
+pub(super) fn split_newlines(text: &str) -> Vec<&str> {
+ let mut s = Scanner::new(text);
+ let mut lines = Vec::new();
+ let mut start = 0;
+ let mut end = 0;
+
+ while let Some(c) = s.eat() {
+ if is_newline(c) {
+ if c == '\r' {
+ s.eat_if('\n');
+ }
+
+ lines.push(&text[start..end]);
+ start = s.cursor();
+ }
+ end = s.cursor();
+ }
+
+ lines.push(&text[start..]);
+ lines
+}
+
/// Whether a string is a valid unicode identifier.
///
/// In addition to what is specified in the [Unicode Standard][uax31], we allow:
@@ -746,13 +712,13 @@ pub fn is_ident(string: &str) -> bool {
/// Whether a character can start an identifier.
#[inline]
-fn is_id_start(c: char) -> bool {
+pub(super) fn is_id_start(c: char) -> bool {
c.is_xid_start() || c == '_'
}
/// Whether a character can continue an identifier.
#[inline]
-fn is_id_continue(c: char) -> bool {
+pub(super) fn is_id_continue(c: char) -> bool {
c.is_xid_continue() || c == '_' || c == '-'
}
diff --git a/src/syntax/linked.rs b/src/syntax/linked.rs
deleted file mode 100644
index e69de29b..00000000
--- a/src/syntax/linked.rs
+++ /dev/null
diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs
index 81524aa2..a2bb5766 100644
--- a/src/syntax/mod.rs
+++ b/src/syntax/mod.rs
@@ -2,22 +2,17 @@
pub mod ast;
-mod incremental;
mod kind;
mod lexer;
mod node;
mod parser;
-mod parsing;
-mod resolve;
+mod reparse;
mod source;
mod span;
pub use self::kind::*;
pub use self::lexer::*;
pub use self::node::*;
-pub use self::parsing::*;
+pub use self::parser::*;
pub use self::source::*;
pub use self::span::*;
-
-use incremental::reparse;
-use parser::*;
diff --git a/src/syntax/node.rs b/src/syntax/node.rs
index 13556ede..283d55b4 100644
--- a/src/syntax/node.rs
+++ b/src/syntax/node.rs
@@ -6,6 +6,7 @@ use std::sync::Arc;
use super::ast::AstNode;
use super::{SourceId, Span, SyntaxKind};
use crate::diag::SourceError;
+use crate::util::EcoString;
/// A node in the untyped syntax tree.
#[derive(Clone, PartialEq, Hash)]
@@ -15,84 +16,106 @@ pub struct SyntaxNode(Repr);
#[derive(Clone, PartialEq, Hash)]
enum Repr {
/// A leaf node.
- Leaf(NodeData),
+ Leaf(LeafNode),
/// A reference-counted inner node.
Inner(Arc<InnerNode>),
+ /// An error.
+ Error(ErrorNode),
}
impl SyntaxNode {
/// Create a new leaf node.
- pub fn leaf(kind: SyntaxKind, len: usize) -> Self {
- Self(Repr::Leaf(NodeData::new(kind, len)))
+ pub fn leaf(kind: SyntaxKind, text: impl Into<EcoString>) -> Self {
+ Self(Repr::Leaf(LeafNode::new(kind, text)))
}
/// Create a new inner node with children.
pub fn inner(kind: SyntaxKind, children: Vec<SyntaxNode>) -> Self {
- Self(Repr::Inner(Arc::new(InnerNode::with_children(kind, children))))
+ Self(Repr::Inner(Arc::new(InnerNode::new(kind, children))))
}
- /// The type of the node.
- pub fn kind(&self) -> &SyntaxKind {
- &self.data().kind
+ /// Create a new error node.
+ pub fn error(message: impl Into<EcoString>, pos: ErrorPos, len: usize) -> Self {
+ Self(Repr::Error(ErrorNode::new(message, pos, len)))
}
- /// Take the kind out of the node.
- pub fn take(self) -> SyntaxKind {
- match self.0 {
+ /// The type of the node.
+ pub fn kind(&self) -> SyntaxKind {
+ match &self.0 {
Repr::Leaf(leaf) => leaf.kind,
- Repr::Inner(inner) => inner.data.kind.clone(),
+ Repr::Inner(inner) => inner.kind,
+ Repr::Error(_) => SyntaxKind::Error,
}
}
- /// The length of the node.
+ /// The byte length of the node in the source text.
pub fn len(&self) -> usize {
- self.data().len
+ match &self.0 {
+ Repr::Leaf(leaf) => leaf.len(),
+ Repr::Inner(inner) => inner.len,
+ Repr::Error(error) => error.len,
+ }
}
/// The span of the node.
pub fn span(&self) -> Span {
- self.data().span
+ match &self.0 {
+ Repr::Leaf(leaf) => leaf.span,
+ Repr::Inner(inner) => inner.span,
+ Repr::Error(error) => error.span,
+ }
}
- /// The number of descendants, including the node itself.
- pub fn descendants(&self) -> usize {
+ /// The text of the node if it is a leaf node.
+ ///
+ /// Returns an empty string if this is an inner or error node.
+ pub fn text(&self) -> &EcoString {
+ static EMPTY: EcoString = EcoString::new();
match &self.0 {
- Repr::Inner(inner) => inner.descendants,
- Repr::Leaf(_) => 1,
+ Repr::Leaf(leaf) => &leaf.text,
+ Repr::Inner(_) | Repr::Error(_) => &EMPTY,
+ }
+ }
+
+ /// Extract the text from the node.
+ ///
+ /// Returns an empty string if this is an inner or error node.
+ pub fn into_text(self) -> EcoString {
+ match self.0 {
+ Repr::Leaf(leaf) => leaf.text,
+ Repr::Inner(_) | Repr::Error(_) => EcoString::new(),
}
}
/// The node's children.
pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> {
match &self.0 {
+ Repr::Leaf(_) | Repr::Error(_) => [].iter(),
Repr::Inner(inner) => inner.children.iter(),
- Repr::Leaf(_) => [].iter(),
}
}
- /// Convert the node to a typed AST node.
- pub fn cast<T>(&self) -> Option<T>
- where
- T: AstNode,
- {
+ /// Try to convert the node to a typed AST node.
+ pub fn cast<T: AstNode>(&self) -> Option<T> {
T::from_untyped(self)
}
- /// Get the first child that can cast to the AST type `T`.
- pub fn cast_first_child<T: AstNode>(&self) -> Option<T> {
+ /// Cast the first child that can cast to the AST type `T`.
+ pub fn cast_first_match<T: AstNode>(&self) -> Option<T> {
self.children().find_map(Self::cast)
}
- /// Get the last child that can cast to the AST type `T`.
- pub fn cast_last_child<T: AstNode>(&self) -> Option<T> {
+ /// Cast the last child that can cast to the AST type `T`.
+ pub fn cast_last_match<T: AstNode>(&self) -> Option<T> {
self.children().rev().find_map(Self::cast)
}
/// Whether the node or its children contain an error.
pub fn erroneous(&self) -> bool {
match &self.0 {
+ Repr::Leaf(_) => false,
Repr::Inner(node) => node.erroneous,
- Repr::Leaf(data) => data.kind.is_error(),
+ Repr::Error(_) => true,
}
}
@@ -102,35 +125,41 @@ impl SyntaxNode {
return vec![];
}
- match self.kind() {
- SyntaxKind::Error(pos, message) => {
- vec![SourceError::new(self.span(), message.clone()).with_pos(*pos)]
- }
- _ => self
- .children()
+ if let Repr::Error(error) = &self.0 {
+ vec![SourceError::new(error.span, error.message.clone()).with_pos(error.pos)]
+ } else {
+ self.children()
.filter(|node| node.erroneous())
.flat_map(|node| node.errors())
- .collect(),
+ .collect()
}
}
/// Change the type of the node.
- pub(super) fn convert(&mut self, kind: SyntaxKind) {
+ pub(super) fn convert_to(&mut self, kind: SyntaxKind) {
+ debug_assert!(!kind.is_error());
match &mut self.0 {
+ Repr::Leaf(leaf) => leaf.kind = kind,
Repr::Inner(inner) => {
let node = Arc::make_mut(inner);
- node.erroneous |= kind.is_error();
- node.data.kind = kind;
+ node.kind = kind;
}
- Repr::Leaf(leaf) => leaf.kind = kind,
+ Repr::Error(_) => {}
}
}
+ /// Convert the child to an error.
+ pub(super) fn convert_to_error(&mut self, message: impl Into<EcoString>) {
+ let len = self.len();
+ *self = SyntaxNode::error(message, ErrorPos::Full, len);
+ }
+
/// Set a synthetic span for the node and all its descendants.
pub(super) fn synthesize(&mut self, span: Span) {
match &mut self.0 {
+ Repr::Leaf(leaf) => leaf.span = span,
Repr::Inner(inner) => Arc::make_mut(inner).synthesize(span),
- Repr::Leaf(leaf) => leaf.synthesize(span),
+ Repr::Error(error) => error.span = span,
}
}
@@ -140,17 +169,25 @@ impl SyntaxNode {
id: SourceId,
within: Range<u64>,
) -> NumberingResult {
+ if within.start >= within.end {
+ return Err(Unnumberable);
+ }
+
+ let mid = Span::new(id, (within.start + within.end) / 2);
match &mut self.0 {
- Repr::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within),
- Repr::Leaf(leaf) => leaf.numberize(id, within),
+ Repr::Leaf(leaf) => leaf.span = mid,
+ Repr::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within)?,
+ Repr::Error(error) => error.span = mid,
}
+
+ Ok(())
}
/// If the span points into this node, convert it to a byte range.
pub(super) fn range(&self, span: Span, offset: usize) -> Option<Range<usize>> {
match &self.0 {
Repr::Inner(inner) => inner.range(span, offset),
- Repr::Leaf(leaf) => leaf.range(span, offset),
+ _ => (self.span() == span).then(|| offset..offset + self.len()),
}
}
@@ -159,10 +196,18 @@ impl SyntaxNode {
matches!(self.0, Repr::Leaf(_))
}
+ /// The number of descendants, including the node itself.
+ pub(super) fn descendants(&self) -> usize {
+ match &self.0 {
+ Repr::Leaf(_) | Repr::Error(_) => 1,
+ Repr::Inner(inner) => inner.descendants,
+ }
+ }
+
/// The node's children, mutably.
pub(super) fn children_mut(&mut self) -> &mut [SyntaxNode] {
match &mut self.0 {
- Repr::Leaf(_) => &mut [],
+ Repr::Leaf(_) | Repr::Error(_) => &mut [],
Repr::Inner(inner) => &mut Arc::make_mut(inner).children,
}
}
@@ -199,19 +244,12 @@ impl SyntaxNode {
}
}
- /// The metadata of the node.
- fn data(&self) -> &NodeData {
- match &self.0 {
- Repr::Inner(inner) => &inner.data,
- Repr::Leaf(leaf) => leaf,
- }
- }
-
/// The upper bound of assigned numbers in this subtree.
fn upper(&self) -> u64 {
match &self.0 {
Repr::Inner(inner) => inner.upper,
Repr::Leaf(leaf) => leaf.span.number() + 1,
+ Repr::Error(error) => error.span.number() + 1,
}
}
}
@@ -221,21 +259,64 @@ impl Debug for SyntaxNode {
match &self.0 {
Repr::Inner(node) => node.fmt(f),
Repr::Leaf(node) => node.fmt(f),
+ Repr::Error(node) => node.fmt(f),
}
}
}
impl Default for SyntaxNode {
fn default() -> Self {
- Self::leaf(SyntaxKind::None, 0)
+ Self::error("", ErrorPos::Full, 0)
+ }
+}
+
+/// A leaf node in the untyped syntax tree.
+#[derive(Clone, Hash)]
+struct LeafNode {
+ /// What kind of node this is (each kind would have its own struct in a
+ /// strongly typed AST).
+ kind: SyntaxKind,
+ /// The source text of the node.
+ text: EcoString,
+ /// The node's span.
+ span: Span,
+}
+
+impl LeafNode {
+ /// Create a new leaf node.
+ fn new(kind: SyntaxKind, text: impl Into<EcoString>) -> Self {
+ debug_assert!(!kind.is_error());
+ Self { kind, text: text.into(), span: Span::detached() }
+ }
+
+ /// The byte length of the node in the source text.
+ fn len(&self) -> usize {
+ self.text.len()
+ }
+}
+
+impl Debug for LeafNode {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ write!(f, "{:?}: {}", self.kind, self.len())
+ }
+}
+
+impl PartialEq for LeafNode {
+ fn eq(&self, other: &Self) -> bool {
+ self.kind == other.kind && self.text == other.text
}
}
/// An inner node in the untyped syntax tree.
#[derive(Clone, Hash)]
struct InnerNode {
- /// Node metadata.
- data: NodeData,
+ /// What kind of node this is (each kind would have its own struct in a
+ /// strongly typed AST).
+ kind: SyntaxKind,
+ /// The byte length of the node in the source.
+ len: usize,
+ /// The node's span.
+ span: Span,
/// The number of nodes in the whole subtree, including this node.
descendants: usize,
/// Whether this node or any of its children are erroneous.
@@ -248,10 +329,12 @@ struct InnerNode {
impl InnerNode {
/// Create a new inner node with the given kind and children.
- fn with_children(kind: SyntaxKind, children: Vec<SyntaxNode>) -> Self {
+ fn new(kind: SyntaxKind, children: Vec<SyntaxNode>) -> Self {
+ debug_assert!(!kind.is_error());
+
let mut len = 0;
let mut descendants = 1;
- let mut erroneous = kind.is_error();
+ let mut erroneous = false;
for child in &children {
len += child.len();
@@ -260,7 +343,9 @@ impl InnerNode {
}
Self {
- data: NodeData::new(kind, len),
+ kind,
+ len,
+ span: Span::detached(),
descendants,
erroneous,
upper: 0,
@@ -270,7 +355,7 @@ impl InnerNode {
/// Set a synthetic span for the node and all its descendants.
fn synthesize(&mut self, span: Span) {
- self.data.synthesize(span);
+ self.span = span;
for child in &mut self.children {
child.synthesize(span);
}
@@ -310,7 +395,7 @@ impl InnerNode {
let mut start = within.start;
if range.is_none() {
let end = start + stride;
- self.data.numberize(id, start..end)?;
+ self.span = Span::new(id, (start + end) / 2);
self.upper = within.end;
start = end;
}
@@ -329,14 +414,14 @@ impl InnerNode {
/// If the span points into this node, convert it to a byte range.
fn range(&self, span: Span, mut offset: usize) -> Option<Range<usize>> {
// Check whether we found it.
- if let Some(range) = self.data.range(span, offset) {
- return Some(range);
+ if span == self.span {
+ return Some(offset..offset + self.len);
}
// The parent of a subtree has a smaller span number than all of its
// descendants. Therefore, we can bail out early if the target span's
// number is smaller than our number.
- if span.number() < self.data.span.number() {
+ if span.number() < self.span.number() {
return None;
}
@@ -371,8 +456,7 @@ impl InnerNode {
let superseded = &self.children[range.clone()];
// Compute the new byte length.
- self.data.len = self.data.len
- + replacement.iter().map(SyntaxNode::len).sum::<usize>()
+ self.len = self.len + replacement.iter().map(SyntaxNode::len).sum::<usize>()
- superseded.iter().map(SyntaxNode::len).sum::<usize>();
// Compute the new number of descendants.
@@ -412,7 +496,7 @@ impl InnerNode {
.start
.checked_sub(1)
.and_then(|i| self.children.get(i))
- .map_or(self.data.span.number() + 1, |child| child.upper());
+ .map_or(self.span.number() + 1, |child| child.upper());
// The upper bound for renumbering is either
// - the span number of the first child after the to-be-renumbered
@@ -426,7 +510,7 @@ impl InnerNode {
// Try to renumber.
let within = start_number..end_number;
- let id = self.data.span.source();
+ let id = self.span.source();
if self.numberize(id, Some(renumber), within).is_ok() {
return Ok(());
}
@@ -450,7 +534,7 @@ impl InnerNode {
prev_descendants: usize,
new_descendants: usize,
) {
- self.data.len = self.data.len + new_len - prev_len;
+ self.len = self.len + new_len - prev_len;
self.descendants = self.descendants + new_descendants - prev_descendants;
self.erroneous = self.children.iter().any(SyntaxNode::erroneous);
}
@@ -458,7 +542,7 @@ impl InnerNode {
impl Debug for InnerNode {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
- self.data.fmt(f)?;
+ write!(f, "{:?}: {}", self.kind, self.len)?;
if !self.children.is_empty() {
f.write_str(" ")?;
f.debug_list().entries(&self.children).finish()?;
@@ -469,64 +553,62 @@ impl Debug for InnerNode {
impl PartialEq for InnerNode {
fn eq(&self, other: &Self) -> bool {
- self.data == other.data
+ self.kind == other.kind
+ && self.len == other.len
&& self.descendants == other.descendants
&& self.erroneous == other.erroneous
&& self.children == other.children
}
}
-/// Data shared between leaf and inner nodes.
+/// An error node in the untyped syntax tree.
#[derive(Clone, Hash)]
-struct NodeData {
- /// What kind of node this is (each kind would have its own struct in a
- /// strongly typed AST).
- kind: SyntaxKind,
- /// The byte length of the node in the source.
+struct ErrorNode {
+ /// The error message.
+ message: EcoString,
+ /// Where in the node an error should be annotated.
+ pos: ErrorPos,
+ /// The byte length of the error in the source.
len: usize,
/// The node's span.
span: Span,
}
-impl NodeData {
- /// Create new node metadata.
- fn new(kind: SyntaxKind, len: usize) -> Self {
- Self { len, kind, span: Span::detached() }
- }
-
- /// Set a synthetic span for the node.
- fn synthesize(&mut self, span: Span) {
- self.span = span;
- }
-
- /// Assign a span to the node.
- fn numberize(&mut self, id: SourceId, within: Range<u64>) -> NumberingResult {
- if within.start < within.end {
- self.span = Span::new(id, (within.start + within.end) / 2);
- Ok(())
- } else {
- Err(Unnumberable)
+impl ErrorNode {
+ /// Create new error node.
+ fn new(message: impl Into<EcoString>, pos: ErrorPos, len: usize) -> Self {
+ Self {
+ message: message.into(),
+ pos,
+ len,
+ span: Span::detached(),
}
}
-
- /// If the span points into this node, convert it to a byte range.
- fn range(&self, span: Span, offset: usize) -> Option<Range<usize>> {
- (self.span == span).then(|| offset..offset + self.len)
- }
}
-impl Debug for NodeData {
+impl Debug for ErrorNode {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
- write!(f, "{:?}: {}", self.kind, self.len)
+ write!(f, "({}): {}", self.message, self.len)
}
}
-impl PartialEq for NodeData {
+impl PartialEq for ErrorNode {
fn eq(&self, other: &Self) -> bool {
- self.kind == other.kind && self.len == other.len
+ self.message == other.message && self.pos == other.pos && self.len == other.len
}
}
+/// Where in a node an error should be annotated,
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub enum ErrorPos {
+ /// Over the full width of the node.
+ Full,
+ /// At the start of the node.
+ Start,
+ /// At the end of the node.
+ End,
+}
+
/// A syntax node in a context.
///
/// Knows its exact offset in the file and provides access to its
@@ -542,7 +624,7 @@ pub struct LinkedNode<'a> {
}
impl<'a> LinkedNode<'a> {
- /// Start a new traversal at the source's root node.
+ /// Start a new traversal at a root node.
pub fn new(root: &'a SyntaxNode) -> Self {
Self { node: root, parent: None, index: 0, offset: 0 }
}
@@ -557,17 +639,17 @@ impl<'a> LinkedNode<'a> {
self.index
}
- /// The absolute byte offset of the this node in the source file.
+ /// The absolute byte offset of this node in the source file.
pub fn offset(&self) -> usize {
self.offset
}
- /// The byte range of the this node in the source file.
+ /// The byte range of this node in the source file.
pub fn range(&self) -> Range<usize> {
self.offset..self.offset + self.node.len()
}
- /// Get this node's children.
+ /// An iterator over this node's children.
pub fn children(&self) -> LinkedChildren<'a> {
LinkedChildren {
parent: Rc::new(self.clone()),
@@ -586,7 +668,7 @@ impl<'a> LinkedNode<'a> {
}
/// Get the kind of this node's parent.
- pub fn parent_kind(&self) -> Option<&'a SyntaxKind> {
+ pub fn parent_kind(&self) -> Option<SyntaxKind> {
Some(self.parent()?.node.kind())
}
@@ -648,7 +730,7 @@ impl<'a> LinkedNode<'a> {
None
}
- /// Get the leaf at the specified cursor position.
+ /// Get the leaf at the specified byte offset.
pub fn leaf_at(&self, cursor: usize) -> Option<Self> {
if self.node.children().len() == 0 && cursor <= self.offset + self.len() {
return Some(self.clone());
@@ -784,13 +866,13 @@ mod tests {
let node = LinkedNode::new(source.root()).leaf_at(7).unwrap();
assert_eq!(node.offset(), 5);
assert_eq!(node.len(), 4);
- assert_eq!(node.kind(), &SyntaxKind::Ident("text".into()));
+ assert_eq!(node.kind(), SyntaxKind::Ident);
// Go back to "#set". Skips the space.
let prev = node.prev_sibling().unwrap();
assert_eq!(prev.offset(), 0);
assert_eq!(prev.len(), 4);
- assert_eq!(prev.kind(), &SyntaxKind::Set);
+ assert_eq!(prev.kind(), SyntaxKind::Set);
}
#[test]
@@ -798,15 +880,15 @@ mod tests {
let source = Source::detached("#set fun(12pt, red)");
let leaf = LinkedNode::new(source.root()).leaf_at(6).unwrap();
let prev = leaf.prev_leaf().unwrap();
- assert_eq!(leaf.kind(), &SyntaxKind::Ident("fun".into()));
- assert_eq!(prev.kind(), &SyntaxKind::Set);
+ assert_eq!(leaf.kind(), SyntaxKind::Ident);
+ assert_eq!(prev.kind(), SyntaxKind::Set);
let source = Source::detached("#let x = 10");
let leaf = LinkedNode::new(source.root()).leaf_at(9).unwrap();
let prev = leaf.prev_leaf().unwrap();
let next = leaf.next_leaf().unwrap();
- assert_eq!(prev.kind(), &SyntaxKind::Eq);
- assert_eq!(leaf.kind(), &SyntaxKind::Space { newlines: 0 });
- assert_eq!(next.kind(), &SyntaxKind::Int(10));
+ assert_eq!(prev.kind(), SyntaxKind::Eq);
+ assert_eq!(leaf.kind(), SyntaxKind::Space { newlines: 0 });
+ assert_eq!(next.kind(), SyntaxKind::Int);
}
}
diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs
index d2ef6e0e..1584e59b 100644
--- a/src/syntax/parser.rs
+++ b/src/syntax/parser.rs
@@ -1,14 +1,1118 @@
+use std::collections::HashSet;
use std::fmt::{self, Display, Formatter};
use std::mem;
-use std::ops::Range;
+use super::ast::{self, Assoc, BinOp, UnOp};
use super::{ErrorPos, LexMode, Lexer, SyntaxKind, SyntaxNode};
use crate::util::{format_eco, EcoString};
+/// Parse a source file.
+pub fn parse(text: &str) -> SyntaxNode {
+ let mut p = Parser::new(text, LexMode::Markup);
+ markup(&mut p, true);
+ p.finish().into_iter().next().unwrap()
+}
+
+/// Parse code directly, only used for syntax highlighting.
+pub fn parse_code(text: &str) -> SyntaxNode {
+ let mut p = Parser::new(text, LexMode::Code);
+ p.perform(SyntaxKind::CodeBlock, code);
+ p.finish().into_iter().next().unwrap()
+}
+
+/// Reparse a code block.
+///
+/// Returns `Some` if all of the input was consumed.
+pub(super) fn reparse_code_block(
+ prefix: &str,
+ text: &str,
+ end_pos: usize,
+) -> Option<(Vec<SyntaxNode>, bool, usize)> {
+ let mut p = Parser::with_prefix(prefix, text, LexMode::Code);
+ if !p.at(SyntaxKind::LeftBrace) {
+ return None;
+ }
+
+ code_block(&mut p);
+
+ let (mut node, terminated) = p.consume()?;
+ let first = node.remove(0);
+ if first.len() != end_pos {
+ return None;
+ }
+
+ Some((vec![first], terminated, 1))
+}
+
+/// Reparse a content block.
+///
+/// Returns `Some` if all of the input was consumed.
+pub(super) fn reparse_content_block(
+ prefix: &str,
+ text: &str,
+ end_pos: usize,
+) -> Option<(Vec<SyntaxNode>, bool, usize)> {
+ let mut p = Parser::with_prefix(prefix, text, LexMode::Code);
+ if !p.at(SyntaxKind::LeftBracket) {
+ return None;
+ }
+
+ content_block(&mut p);
+
+ let (mut node, terminated) = p.consume()?;
+ let first = node.remove(0);
+ if first.len() != end_pos {
+ return None;
+ }
+
+ Some((vec![first], terminated, 1))
+}
+
+/// Reparse a sequence markup elements without the topmost node.
+///
+/// Returns `Some` if all of the input was consumed.
+pub(super) fn reparse_markup_elements(
+ prefix: &str,
+ text: &str,
+ end_pos: usize,
+ differential: isize,
+ reference: &[SyntaxNode],
+ mut at_start: bool,
+ min_indent: usize,
+) -> Option<(Vec<SyntaxNode>, bool, usize)> {
+ let mut p = Parser::with_prefix(prefix, text, LexMode::Markup);
+
+ let mut node: Option<&SyntaxNode> = None;
+ let mut iter = reference.iter();
+ let mut offset = differential;
+ let mut replaced = 0;
+ let mut stopped = false;
+
+ 'outer: while !p.eof() {
+ if let Some(SyntaxKind::Space { newlines: (1..) }) = p.peek() {
+ if p.column(p.current_end()) < min_indent {
+ return None;
+ }
+ }
+
+ markup_node(&mut p, &mut at_start);
+
+ if p.prev_end() <= end_pos {
+ continue;
+ }
+
+ let recent = p.marker().before(&p).unwrap();
+ let recent_start = p.prev_end() - recent.len();
+
+ while offset <= recent_start as isize {
+ if let Some(node) = node {
+ // The nodes are equal, at the same position and have the
+ // same content. The parsing trees have converged again, so
+ // the reparse may stop here.
+ if offset == recent_start as isize && node == recent {
+ replaced -= 1;
+ stopped = true;
+ break 'outer;
+ }
+ }
+
+ if let Some(node) = node {
+ offset += node.len() as isize;
+ }
+
+ node = iter.next();
+ if node.is_none() {
+ break;
+ }
+
+ replaced += 1;
+ }
+ }
+
+ if p.eof() && !stopped {
+ replaced = reference.len();
+ }
+
+ let (mut res, terminated) = p.consume()?;
+ if stopped {
+ res.pop().unwrap();
+ }
+
+ Some((res, terminated, replaced))
+}
+
+/// Parse markup.
+///
+/// If `at_start` is true, things like headings that may only appear at the
+/// beginning of a line or content block are initially allowed.
+fn markup(p: &mut Parser, mut at_start: bool) {
+ p.perform(SyntaxKind::Markup { min_indent: 0 }, |p| {
+ while !p.eof() {
+ markup_node(p, &mut at_start);
+ }
+ });
+}
+
+/// Parse markup that stays right of the given `column`.
+fn markup_indented(p: &mut Parser, min_indent: usize) {
+ p.eat_while(|t| match t {
+ SyntaxKind::Space { newlines } => newlines == 0,
+ SyntaxKind::LineComment | SyntaxKind::BlockComment => true,
+ _ => false,
+ });
+
+ let marker = p.marker();
+ let mut at_start = false;
+
+ while !p.eof() {
+ match p.peek() {
+ Some(SyntaxKind::Space { newlines: (1..) })
+ if p.column(p.current_end()) < min_indent =>
+ {
+ break;
+ }
+ _ => {}
+ }
+
+ markup_node(p, &mut at_start);
+ }
+
+ marker.end(p, SyntaxKind::Markup { min_indent });
+}
+
+/// Parse a line of markup that can prematurely end if `f` returns true.
+fn markup_line<F>(p: &mut Parser, mut f: F)
+where
+ F: FnMut(SyntaxKind) -> bool,
+{
+ p.eat_while(|t| match t {
+ SyntaxKind::Space { newlines } => newlines == 0,
+ SyntaxKind::LineComment | SyntaxKind::BlockComment => true,
+ _ => false,
+ });
+
+ p.perform(SyntaxKind::Markup { min_indent: usize::MAX }, |p| {
+ let mut at_start = false;
+ while let Some(kind) = p.peek() {
+ if let SyntaxKind::Space { newlines: (1..) } = kind {
+ break;
+ }
+
+ if f(kind) {
+ break;
+ }
+
+ markup_node(p, &mut at_start);
+ }
+ });
+}
+
+fn markup_node(p: &mut Parser, at_start: &mut bool) {
+ let Some(token) = p.peek() else { return };
+ match token {
+ // Whitespace.
+ SyntaxKind::Space { newlines } => {
+ *at_start |= newlines > 0;
+ p.eat();
+ return;
+ }
+
+ // Comments.
+ SyntaxKind::LineComment | SyntaxKind::BlockComment => {
+ p.eat();
+ return;
+ }
+
+ // Text and markup.
+ SyntaxKind::Text
+ | SyntaxKind::Linebreak
+ | SyntaxKind::SmartQuote { .. }
+ | SyntaxKind::Escape
+ | SyntaxKind::Shorthand
+ | SyntaxKind::Symbol
+ | SyntaxKind::Link
+ | SyntaxKind::Raw { .. }
+ | SyntaxKind::Ref => p.eat(),
+
+ // Math.
+ SyntaxKind::Dollar => math(p),
+
+ // Strong, emph, heading.
+ SyntaxKind::Star => strong(p),
+ SyntaxKind::Underscore => emph(p),
+ SyntaxKind::Eq => heading(p, *at_start),
+
+ // Lists.
+ SyntaxKind::Minus => list_item(p, *at_start),
+ SyntaxKind::Plus | SyntaxKind::EnumNumbering => enum_item(p, *at_start),
+ SyntaxKind::Slash => {
+ term_item(p, *at_start).ok();
+ }
+ SyntaxKind::Colon => {
+ let marker = p.marker();
+ p.eat();
+ marker.convert(p, SyntaxKind::Text);
+ }
+
+ // Hashtag + keyword / identifier.
+ SyntaxKind::Ident
+ | SyntaxKind::Label
+ | SyntaxKind::Let
+ | SyntaxKind::Set
+ | SyntaxKind::Show
+ | SyntaxKind::If
+ | SyntaxKind::While
+ | SyntaxKind::For
+ | SyntaxKind::Import
+ | SyntaxKind::Include
+ | SyntaxKind::Break
+ | SyntaxKind::Continue
+ | SyntaxKind::Return => embedded_expr(p),
+
+ // Code and content block.
+ SyntaxKind::LeftBrace => code_block(p),
+ SyntaxKind::LeftBracket => content_block(p),
+
+ SyntaxKind::Error => p.eat(),
+ _ => p.unexpected(),
+ };
+
+ *at_start = false;
+}
+
+fn strong(p: &mut Parser) {
+ p.perform(SyntaxKind::Strong, |p| {
+ p.start_group(Group::Strong);
+ markup(p, false);
+ p.end_group();
+ })
+}
+
+fn emph(p: &mut Parser) {
+ p.perform(SyntaxKind::Emph, |p| {
+ p.start_group(Group::Emph);
+ markup(p, false);
+ p.end_group();
+ })
+}
+
+fn heading(p: &mut Parser, at_start: bool) {
+ let marker = p.marker();
+ let mut markers = vec![];
+ while p.at(SyntaxKind::Eq) {
+ markers.push(p.marker());
+ p.eat();
+ }
+
+ if at_start && p.peek().map_or(true, |kind| kind.is_space()) {
+ p.eat_while(|kind| kind == SyntaxKind::Space { newlines: 0 });
+ markup_line(p, |kind| matches!(kind, SyntaxKind::Label));
+ marker.end(p, SyntaxKind::Heading);
+ } else {
+ for marker in markers {
+ marker.convert(p, SyntaxKind::Text);
+ }
+ }
+}
+
+fn list_item(p: &mut Parser, at_start: bool) {
+ let marker = p.marker();
+ p.assert(SyntaxKind::Minus);
+
+ let min_indent = p.column(p.prev_end());
+ if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() {
+ markup_indented(p, min_indent);
+ marker.end(p, SyntaxKind::ListItem);
+ } else {
+ marker.convert(p, SyntaxKind::Text);
+ }
+}
+
+fn enum_item(p: &mut Parser, at_start: bool) {
+ let marker = p.marker();
+ p.eat();
+
+ let min_indent = p.column(p.prev_end());
+ if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() {
+ markup_indented(p, min_indent);
+ marker.end(p, SyntaxKind::EnumItem);
+ } else {
+ marker.convert(p, SyntaxKind::Text);
+ }
+}
+
+fn term_item(p: &mut Parser, at_start: bool) -> ParseResult {
+ let marker = p.marker();
+ p.eat();
+
+ let min_indent = p.column(p.prev_end());
+ if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() {
+ markup_line(p, |node| matches!(node, SyntaxKind::Colon));
+ p.expect(SyntaxKind::Colon)?;
+ markup_indented(p, min_indent);
+ marker.end(p, SyntaxKind::TermItem);
+ } else {
+ marker.convert(p, SyntaxKind::Text);
+ }
+
+ Ok(())
+}
+
+fn embedded_expr(p: &mut Parser) {
+ // Does the expression need termination or can content follow directly?
+ let stmt = matches!(
+ p.peek(),
+ Some(
+ SyntaxKind::Let
+ | SyntaxKind::Set
+ | SyntaxKind::Show
+ | SyntaxKind::Import
+ | SyntaxKind::Include
+ )
+ );
+
+ p.start_group(Group::Expr);
+ let res = expr_prec(p, true, 0);
+ if stmt && res.is_ok() && !p.eof() {
+ p.expected("semicolon or line break");
+ }
+ p.end_group();
+}
+
+fn math(p: &mut Parser) {
+ p.perform(SyntaxKind::Math, |p| {
+ p.start_group(Group::Math);
+ while !p.eof() {
+ math_node(p);
+ }
+ p.end_group();
+ });
+}
+
+fn math_node(p: &mut Parser) {
+ math_node_prec(p, 0, None)
+}
+
+fn math_node_prec(p: &mut Parser, min_prec: usize, stop: Option<SyntaxKind>) {
+ let marker = p.marker();
+ math_primary(p);
+
+ loop {
+ let (kind, mut prec, assoc, stop) = match p.peek() {
+ v if v == stop => break,
+ Some(SyntaxKind::Underscore) => {
+ (SyntaxKind::Script, 2, Assoc::Right, Some(SyntaxKind::Hat))
+ }
+ Some(SyntaxKind::Hat) => {
+ (SyntaxKind::Script, 2, Assoc::Right, Some(SyntaxKind::Underscore))
+ }
+ Some(SyntaxKind::Slash) => (SyntaxKind::Frac, 1, Assoc::Left, None),
+ _ => break,
+ };
+
+ if prec < min_prec {
+ break;
+ }
+
+ match assoc {
+ Assoc::Left => prec += 1,
+ Assoc::Right => {}
+ }
+
+ p.eat();
+ math_node_prec(p, prec, stop);
+
+ // Allow up to two different scripts. We do not risk encountering the
+ // previous script kind again here due to right-associativity.
+ if p.eat_if(SyntaxKind::Underscore) || p.eat_if(SyntaxKind::Hat) {
+ math_node_prec(p, prec, None);
+ }
+
+ marker.end(p, kind);
+ }
+}
+
+/// Parse a primary math node.
+fn math_primary(p: &mut Parser) {
+ let Some(token) = p.peek() else { return };
+ match token {
+ // Spaces and expressions.
+ SyntaxKind::Space { .. }
+ | SyntaxKind::Linebreak
+ | SyntaxKind::Escape
+ | SyntaxKind::Str
+ | SyntaxKind::Shorthand
+ | SyntaxKind::AlignPoint
+ | SyntaxKind::Symbol => p.eat(),
+
+ // Atoms.
+ SyntaxKind::Atom => match p.peek_src() {
+ "(" => math_group(p, Group::MathRow('(', ')')),
+ "{" => math_group(p, Group::MathRow('{', '}')),
+ "[" => math_group(p, Group::MathRow('[', ']')),
+ _ => p.eat(),
+ },
+
+ // Identifiers and math calls.
+ SyntaxKind::Ident => {
+ let marker = p.marker();
+ p.eat();
+
+ // Parenthesis or bracket means this is a function call.
+ if matches!(p.peek_direct(), Some(SyntaxKind::Atom) if p.peek_src() == "(") {
+ marker.perform(p, SyntaxKind::FuncCall, math_args);
+ }
+ }
+
+ // Hashtag + keyword / identifier.
+ SyntaxKind::Let
+ | SyntaxKind::Set
+ | SyntaxKind::Show
+ | SyntaxKind::If
+ | SyntaxKind::While
+ | SyntaxKind::For
+ | SyntaxKind::Import
+ | SyntaxKind::Include
+ | SyntaxKind::Break
+ | SyntaxKind::Continue
+ | SyntaxKind::Return => embedded_expr(p),
+
+ // Code and content block.
+ SyntaxKind::LeftBrace => code_block(p),
+ SyntaxKind::LeftBracket => content_block(p),
+
+ _ => p.unexpected(),
+ }
+}
+
+fn math_group(p: &mut Parser, group: Group) {
+ p.perform(SyntaxKind::Math, |p| {
+ p.start_group(group);
+ while !p.eof() {
+ math_node(p);
+ }
+ p.end_group();
+ })
+}
+
+fn expr(p: &mut Parser) -> ParseResult {
+ expr_prec(p, false, 0)
+}
+
+/// Parse an expression with operators having at least the minimum precedence.
+///
+/// If `atomic` is true, this does not parse binary operations and arrow
+/// functions, which is exactly what we want in a shorthand expression directly
+/// in markup.
+///
+/// Stops parsing at operations with lower precedence than `min_prec`,
+fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
+ let marker = p.marker();
+
+ // Start the unary expression.
+ match p.peek().and_then(UnOp::from_token) {
+ Some(op) if !atomic => {
+ p.eat();
+ let prec = op.precedence();
+ expr_prec(p, atomic, prec)?;
+ marker.end(p, SyntaxKind::Unary);
+ }
+ _ => primary(p, atomic)?,
+ };
+
+ loop {
+ // Parenthesis or bracket means this is a function call.
+ if let Some(SyntaxKind::LeftParen | SyntaxKind::LeftBracket) = p.peek_direct() {
+ marker.perform(p, SyntaxKind::FuncCall, args)?;
+ continue;
+ }
+
+ if atomic {
+ break;
+ }
+
+ // Method call or field access.
+ if p.eat_if(SyntaxKind::Dot) {
+ ident(p)?;
+ if let Some(SyntaxKind::LeftParen | SyntaxKind::LeftBracket) = p.peek_direct()
+ {
+ marker.perform(p, SyntaxKind::MethodCall, args)?;
+ } else {
+ marker.end(p, SyntaxKind::FieldAccess);
+ }
+ continue;
+ }
+
+ let op = if p.eat_if(SyntaxKind::Not) {
+ if p.at(SyntaxKind::In) {
+ BinOp::NotIn
+ } else {
+ p.expected("keyword `in`");
+ return Err(ParseError);
+ }
+ } else {
+ match p.peek().and_then(BinOp::from_token) {
+ Some(binop) => binop,
+ None => break,
+ }
+ };
+
+ let mut prec = op.precedence();
+ if prec < min_prec {
+ break;
+ }
+
+ p.eat();
+
+ match op.assoc() {
+ Assoc::Left => prec += 1,
+ Assoc::Right => {}
+ }
+
+ marker.perform(p, SyntaxKind::Binary, |p| expr_prec(p, atomic, prec))?;
+ }
+
+ Ok(())
+}
+
+fn primary(p: &mut Parser, atomic: bool) -> ParseResult {
+ match p.peek() {
+ // Literals and few other things.
+ Some(
+ SyntaxKind::None
+ | SyntaxKind::Auto
+ | SyntaxKind::Int
+ | SyntaxKind::Float
+ | SyntaxKind::Bool
+ | SyntaxKind::Numeric
+ | SyntaxKind::Str
+ | SyntaxKind::Label
+ | SyntaxKind::Raw { .. },
+ ) => {
+ p.eat();
+ Ok(())
+ }
+
+ // Things that start with an identifier.
+ Some(SyntaxKind::Ident) => {
+ let marker = p.marker();
+ p.eat();
+
+ // Arrow means this is a closure's lone parameter.
+ if !atomic && p.at(SyntaxKind::Arrow) {
+ marker.end(p, SyntaxKind::Params);
+ p.assert(SyntaxKind::Arrow);
+ marker.perform(p, SyntaxKind::Closure, expr)
+ } else {
+ Ok(())
+ }
+ }
+
+ // Structures.
+ Some(SyntaxKind::LeftParen) => parenthesized(p, atomic),
+ Some(SyntaxKind::LeftBrace) => Ok(code_block(p)),
+ Some(SyntaxKind::LeftBracket) => Ok(content_block(p)),
+ Some(SyntaxKind::Dollar) => Ok(math(p)),
+
+ // Keywords.
+ Some(SyntaxKind::Let) => let_binding(p),
+ Some(SyntaxKind::Set) => set_rule(p),
+ Some(SyntaxKind::Show) => show_rule(p),
+ Some(SyntaxKind::If) => conditional(p),
+ Some(SyntaxKind::While) => while_loop(p),
+ Some(SyntaxKind::For) => for_loop(p),
+ Some(SyntaxKind::Import) => module_import(p),
+ Some(SyntaxKind::Include) => module_include(p),
+ Some(SyntaxKind::Break) => break_stmt(p),
+ Some(SyntaxKind::Continue) => continue_stmt(p),
+ Some(SyntaxKind::Return) => return_stmt(p),
+
+ Some(SyntaxKind::Error) => {
+ p.eat();
+ Err(ParseError)
+ }
+
+ // Nothing.
+ _ => {
+ p.expected_found("expression");
+ Err(ParseError)
+ }
+ }
+}
+
+fn ident(p: &mut Parser) -> ParseResult {
+ match p.peek() {
+ Some(SyntaxKind::Ident) => {
+ p.eat();
+ Ok(())
+ }
+ _ => {
+ p.expected_found("identifier");
+ Err(ParseError)
+ }
+ }
+}
+
+/// Parse something that starts with a parenthesis, which can be either of:
+/// - Array literal
+/// - Dictionary literal
+/// - Parenthesized expression
+/// - Parameter list of closure expression
+fn parenthesized(p: &mut Parser, atomic: bool) -> ParseResult {
+ let marker = p.marker();
+
+ p.start_group(Group::Paren);
+ let colon = p.eat_if(SyntaxKind::Colon);
+ let kind = collection(p, true).0;
+ p.end_group();
+
+ // Leading colon makes this a dictionary.
+ if colon {
+ dict(p, marker);
+ return Ok(());
+ }
+
+ // Arrow means this is a closure's parameter list.
+ if !atomic && p.at(SyntaxKind::Arrow) {
+ params(p, marker);
+ p.assert(SyntaxKind::Arrow);
+ return marker.perform(p, SyntaxKind::Closure, expr);
+ }
+
+ // Transform into the identified collection.
+ match kind {
+ CollectionKind::Group => marker.end(p, SyntaxKind::Parenthesized),
+ CollectionKind::Positional => array(p, marker),
+ CollectionKind::Named => dict(p, marker),
+ }
+
+ Ok(())
+}
+
+/// The type of a collection.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+enum CollectionKind {
+ /// The collection is only one item and has no comma.
+ Group,
+ /// The collection starts with a positional item and has multiple items or a
+ /// trailing comma.
+ Positional,
+ /// The collection starts with a colon or named item.
+ Named,
+}
+
+/// Parse a collection.
+///
+/// Returns the length of the collection and whether the literal contained any
+/// commas.
+fn collection(p: &mut Parser, keyed: bool) -> (CollectionKind, usize) {
+ let mut collection_kind = None;
+ let mut items = 0;
+ let mut can_group = true;
+ let mut missing_coma: Option<Marker> = None;
+
+ while !p.eof() {
+ let Ok(item_kind) = item(p, keyed) else {
+ p.eat_if(SyntaxKind::Comma);
+ collection_kind = Some(CollectionKind::Group);
+ continue;
+ };
+
+ match item_kind {
+ SyntaxKind::Spread => can_group = false,
+ SyntaxKind::Named if collection_kind.is_none() => {
+ collection_kind = Some(CollectionKind::Named);
+ can_group = false;
+ }
+ _ if collection_kind.is_none() => {
+ collection_kind = Some(CollectionKind::Positional);
+ }
+ _ => {}
+ }
+
+ items += 1;
+
+ if let Some(marker) = missing_coma.take() {
+ p.expected_at(marker, "comma");
+ }
+
+ if p.eof() {
+ break;
+ }
+
+ if p.eat_if(SyntaxKind::Comma) {
+ can_group = false;
+ } else {
+ missing_coma = Some(p.trivia_start());
+ }
+ }
+
+ let kind = if can_group && items == 1 {
+ CollectionKind::Group
+ } else {
+ collection_kind.unwrap_or(CollectionKind::Positional)
+ };
+
+ (kind, items)
+}
+
+fn item(p: &mut Parser, keyed: bool) -> ParseResult<SyntaxKind> {
+ let marker = p.marker();
+ if p.eat_if(SyntaxKind::Dots) {
+ marker.perform(p, SyntaxKind::Spread, expr)?;
+ return Ok(SyntaxKind::Spread);
+ }
+
+ expr(p)?;
+
+ if p.at(SyntaxKind::Colon) {
+ match marker.after(p).map(|c| c.kind()) {
+ Some(SyntaxKind::Ident) => {
+ p.eat();
+ marker.perform(p, SyntaxKind::Named, expr)?;
+ }
+ Some(SyntaxKind::Str) if keyed => {
+ p.eat();
+ marker.perform(p, SyntaxKind::Keyed, expr)?;
+ }
+ kind => {
+ let mut msg = EcoString::from("expected identifier");
+ if keyed {
+ msg.push_str(" or string");
+ }
+ if let Some(kind) = kind {
+ msg.push_str(", found ");
+ msg.push_str(kind.name());
+ }
+ marker.to_error(p, msg);
+ p.eat();
+ marker.perform(p, SyntaxKind::Named, expr).ok();
+ return Err(ParseError);
+ }
+ }
+
+ Ok(SyntaxKind::Named)
+ } else {
+ Ok(SyntaxKind::None)
+ }
+}
+
+fn array(p: &mut Parser, marker: Marker) {
+ marker.filter_children(p, |x| match x.kind() {
+ SyntaxKind::Named | SyntaxKind::Keyed => Err("expected expression"),
+ _ => Ok(()),
+ });
+ marker.end(p, SyntaxKind::Array);
+}
+
+fn dict(p: &mut Parser, marker: Marker) {
+ let mut used = HashSet::new();
+ marker.filter_children(p, |x| match x.kind() {
+ kind if kind.is_paren() => Ok(()),
+ SyntaxKind::Named | SyntaxKind::Keyed => {
+ if let Some(child) = x.children().next() {
+ let key = match child.cast::<ast::Str>() {
+ Some(str) => str.get(),
+ None => child.text().clone(),
+ };
+
+ if !used.insert(key) {
+ return Err("pair has duplicate key");
+ }
+ }
+ Ok(())
+ }
+ SyntaxKind::Spread | SyntaxKind::Comma | SyntaxKind::Colon => Ok(()),
+ _ => Err("expected named or keyed pair"),
+ });
+ marker.end(p, SyntaxKind::Dict);
+}
+
+fn params(p: &mut Parser, marker: Marker) {
+ marker.filter_children(p, |x| match x.kind() {
+ kind if kind.is_paren() => Ok(()),
+ SyntaxKind::Named | SyntaxKind::Ident | SyntaxKind::Comma => Ok(()),
+ SyntaxKind::Spread
+ if matches!(
+ x.children().last().map(|child| child.kind()),
+ Some(SyntaxKind::Ident)
+ ) =>
+ {
+ Ok(())
+ }
+ _ => Err("expected identifier, named pair or argument sink"),
+ });
+ marker.end(p, SyntaxKind::Params);
+}
+
+/// Parse a code block: `{...}`.
+fn code_block(p: &mut Parser) {
+ p.perform(SyntaxKind::CodeBlock, |p| {
+ p.start_group(Group::Brace);
+ code(p);
+ p.end_group();
+ });
+}
+
+fn code(p: &mut Parser) {
+ while !p.eof() {
+ p.start_group(Group::Expr);
+ if expr(p).is_ok() && !p.eof() {
+ p.expected("semicolon or line break");
+ }
+ p.end_group();
+
+ // Forcefully skip over newlines since the group's contents can't.
+ p.eat_while(SyntaxKind::is_space);
+ }
+}
+
+fn content_block(p: &mut Parser) {
+ p.perform(SyntaxKind::ContentBlock, |p| {
+ p.start_group(Group::Bracket);
+ markup(p, true);
+ p.end_group();
+ });
+}
+
+fn args(p: &mut Parser) -> ParseResult {
+ match p.peek_direct() {
+ Some(SyntaxKind::LeftParen) => {}
+ Some(SyntaxKind::LeftBracket) => {}
+ _ => {
+ p.expected_found("argument list");
+ return Err(ParseError);
+ }
+ }
+
+ p.perform(SyntaxKind::Args, |p| {
+ if p.at(SyntaxKind::LeftParen) {
+ let marker = p.marker();
+ p.start_group(Group::Paren);
+ collection(p, false);
+ p.end_group();
+
+ let mut used = HashSet::new();
+ marker.filter_children(p, |x| match x.kind() {
+ SyntaxKind::Named => {
+ if let Some(ident) =
+ x.children().next().and_then(|child| child.cast::<ast::Ident>())
+ {
+ if !used.insert(ident.take()) {
+ return Err("duplicate argument");
+ }
+ }
+ Ok(())
+ }
+ _ => Ok(()),
+ });
+ }
+
+ while p.peek_direct() == Some(SyntaxKind::LeftBracket) {
+ content_block(p);
+ }
+ });
+
+ Ok(())
+}
+
+fn math_args(p: &mut Parser) {
+ p.start_group(Group::MathRow('(', ')'));
+ p.perform(SyntaxKind::Args, |p| {
+ let mut marker = p.marker();
+ while !p.eof() {
+ if matches!(p.peek(), Some(SyntaxKind::Atom) if p.peek_src() == ",") {
+ marker.end(p, SyntaxKind::Math);
+ let comma = p.marker();
+ p.eat();
+ comma.convert(p, SyntaxKind::Comma);
+ marker = p.marker();
+ } else {
+ math_node(p);
+ }
+ }
+ if marker != p.marker() {
+ marker.end(p, SyntaxKind::Math);
+ }
+ });
+ p.end_group();
+}
+
+fn let_binding(p: &mut Parser) -> ParseResult {
+ p.perform(SyntaxKind::LetBinding, |p| {
+ p.assert(SyntaxKind::Let);
+
+ let marker = p.marker();
+ ident(p)?;
+
+ // If a parenthesis follows, this is a function definition.
+ let has_params = p.peek_direct() == Some(SyntaxKind::LeftParen);
+ if has_params {
+ let marker = p.marker();
+ p.start_group(Group::Paren);
+ collection(p, false);
+ p.end_group();
+ params(p, marker);
+ }
+
+ if p.eat_if(SyntaxKind::Eq) {
+ expr(p)?;
+ } else if has_params {
+ // Function definitions must have a body.
+ p.expected("body");
+ }
+
+ // Rewrite into a closure expression if it's a function definition.
+ if has_params {
+ marker.end(p, SyntaxKind::Closure);
+ }
+
+ Ok(())
+ })
+}
+
+fn set_rule(p: &mut Parser) -> ParseResult {
+ p.perform(SyntaxKind::SetRule, |p| {
+ p.assert(SyntaxKind::Set);
+ ident(p)?;
+ args(p)?;
+ if p.eat_if(SyntaxKind::If) {
+ expr(p)?;
+ }
+ Ok(())
+ })
+}
+
+fn show_rule(p: &mut Parser) -> ParseResult {
+ p.perform(SyntaxKind::ShowRule, |p| {
+ p.assert(SyntaxKind::Show);
+ expr(p)?;
+ if p.eat_if(SyntaxKind::Colon) {
+ expr(p)?;
+ }
+ Ok(())
+ })
+}
+
+fn conditional(p: &mut Parser) -> ParseResult {
+ p.perform(SyntaxKind::Conditional, |p| {
+ p.assert(SyntaxKind::If);
+
+ expr(p)?;
+ body(p)?;
+
+ if p.eat_if(SyntaxKind::Else) {
+ if p.at(SyntaxKind::If) {
+ conditional(p)?;
+ } else {
+ body(p)?;
+ }
+ }
+
+ Ok(())
+ })
+}
+
+fn while_loop(p: &mut Parser) -> ParseResult {
+ p.perform(SyntaxKind::WhileLoop, |p| {
+ p.assert(SyntaxKind::While);
+ expr(p)?;
+ body(p)
+ })
+}
+
+fn for_loop(p: &mut Parser) -> ParseResult {
+ p.perform(SyntaxKind::ForLoop, |p| {
+ p.assert(SyntaxKind::For);
+ for_pattern(p)?;
+ p.expect(SyntaxKind::In)?;
+ expr(p)?;
+ body(p)
+ })
+}
+
+fn for_pattern(p: &mut Parser) -> ParseResult {
+ p.perform(SyntaxKind::ForPattern, |p| {
+ ident(p)?;
+ if p.eat_if(SyntaxKind::Comma) {
+ ident(p)?;
+ }
+ Ok(())
+ })
+}
+
+fn module_import(p: &mut Parser) -> ParseResult {
+ p.perform(SyntaxKind::ModuleImport, |p| {
+ p.assert(SyntaxKind::Import);
+ expr(p)?;
+
+ if !p.eat_if(SyntaxKind::Colon) || p.eat_if(SyntaxKind::Star) {
+ return Ok(());
+ }
+
+ // This is the list of identifiers scenario.
+ p.perform(SyntaxKind::ImportItems, |p| {
+ let marker = p.marker();
+ let items = collection(p, false).1;
+ if items == 0 {
+ p.expected("import items");
+ }
+ marker.filter_children(p, |n| match n.kind() {
+ SyntaxKind::Ident | SyntaxKind::Comma => Ok(()),
+ _ => Err("expected identifier"),
+ });
+ });
+
+ Ok(())
+ })
+}
+
+fn module_include(p: &mut Parser) -> ParseResult {
+ p.perform(SyntaxKind::ModuleInclude, |p| {
+ p.assert(SyntaxKind::Include);
+ expr(p)
+ })
+}
+
+fn break_stmt(p: &mut Parser) -> ParseResult {
+ p.perform(SyntaxKind::LoopBreak, |p| {
+ p.assert(SyntaxKind::Break);
+ Ok(())
+ })
+}
+
+fn continue_stmt(p: &mut Parser) -> ParseResult {
+ p.perform(SyntaxKind::LoopContinue, |p| {
+ p.assert(SyntaxKind::Continue);
+ Ok(())
+ })
+}
+
+fn return_stmt(p: &mut Parser) -> ParseResult {
+ p.perform(SyntaxKind::FuncReturn, |p| {
+ p.assert(SyntaxKind::Return);
+ if !p.at(SyntaxKind::Comma) && !p.eof() {
+ expr(p)?;
+ }
+ Ok(())
+ })
+}
+
+fn body(p: &mut Parser) -> ParseResult {
+ match p.peek() {
+ Some(SyntaxKind::LeftBracket) => Ok(content_block(p)),
+ Some(SyntaxKind::LeftBrace) => Ok(code_block(p)),
+ _ => {
+ p.expected("body");
+ Err(ParseError)
+ }
+ }
+}
+
/// A convenient token-based parser.
-pub struct Parser<'s> {
+struct Parser<'s> {
/// An iterator over the source tokens.
- tokens: Lexer<'s>,
+ lexer: Lexer<'s>,
/// Whether we are at the end of the file or of a group.
eof: bool,
/// The current token.
@@ -29,18 +1133,18 @@ pub struct Parser<'s> {
impl<'s> Parser<'s> {
/// Create a new parser for the source string.
- pub fn new(text: &'s str, mode: LexMode) -> Self {
+ fn new(text: &'s str, mode: LexMode) -> Self {
Self::with_prefix("", text, mode)
}
/// Create a new parser for the source string that is prefixed by some text
/// that does not need to be parsed but taken into account for column
/// calculation.
- pub fn with_prefix(prefix: &str, text: &'s str, mode: LexMode) -> Self {
- let mut tokens = Lexer::with_prefix(prefix, text, mode);
- let current = tokens.next();
+ fn with_prefix(prefix: &str, text: &'s str, mode: LexMode) -> Self {
+ let mut lexer = Lexer::with_prefix(prefix, text, mode);
+ let current = lexer.next();
Self {
- tokens,
+ lexer,
eof: current.is_none(),
current,
prev_end: 0,
@@ -53,7 +1157,7 @@ impl<'s> Parser<'s> {
}
/// End the parsing process and return the parsed children.
- pub fn finish(self) -> Vec<SyntaxNode> {
+ fn finish(self) -> Vec<SyntaxNode> {
self.children
}
@@ -61,17 +1165,17 @@ impl<'s> Parser<'s> {
/// - the parsed children and whether the last token was terminated, if all
/// groups were terminated correctly, or
/// - `None` otherwise.
- pub fn consume(self) -> Option<(Vec<SyntaxNode>, bool)> {
- self.terminated().then(|| (self.children, self.tokens.terminated()))
+ fn consume(self) -> Option<(Vec<SyntaxNode>, bool)> {
+ self.terminated().then(|| (self.children, self.lexer.terminated()))
}
/// Create a new marker.
- pub fn marker(&mut self) -> Marker {
+ fn marker(&mut self) -> Marker {
Marker(self.children.len())
}
/// Create a marker right before the trailing trivia.
- pub fn trivia_start(&self) -> Marker {
+ fn trivia_start(&self) -> Marker {
let count = self
.children
.iter()
@@ -82,7 +1186,7 @@ impl<'s> Parser<'s> {
}
/// Perform a subparse that wraps its result in a node with the given kind.
- pub fn perform<F, T>(&mut self, kind: SyntaxKind, f: F) -> T
+ fn perform<F, T>(&mut self, kind: SyntaxKind, f: F) -> T
where
F: FnOnce(&mut Self) -> T,
{
@@ -91,7 +1195,7 @@ impl<'s> Parser<'s> {
let until = self.trivia_start();
let mut children = mem::replace(&mut self.children, prev);
- if self.tokens.mode() == LexMode::Markup {
+ if self.lexer.mode() == LexMode::Markup {
self.children.push(SyntaxNode::inner(kind, children));
} else {
// Trailing trivia should not be wrapped into the new node.
@@ -105,12 +1209,12 @@ impl<'s> Parser<'s> {
}
/// Whether the end of the source string or group is reached.
- pub fn eof(&self) -> bool {
+ fn eof(&self) -> bool {
self.eof
}
/// Consume the current token and also trailing trivia.
- pub fn eat(&mut self) {
+ fn eat(&mut self) {
self.stray_terminator |= match self.current {
Some(SyntaxKind::RightParen) => !self.inside(Group::Paren),
Some(SyntaxKind::RightBracket) => !self.inside(Group::Bracket),
@@ -118,12 +1222,12 @@ impl<'s> Parser<'s> {
_ => false,
};
- self.prev_end = self.tokens.cursor();
+ self.prev_end = self.lexer.cursor();
self.bump();
- if self.tokens.mode() != LexMode::Markup {
+ if self.lexer.mode() != LexMode::Markup {
// Skip whitespace and comments.
- while self.current.as_ref().map_or(false, |x| self.is_trivia(x)) {
+ while self.current.map_or(false, |kind| self.is_trivia(kind)) {
self.bump();
}
}
@@ -132,7 +1236,7 @@ impl<'s> Parser<'s> {
}
/// Consume the current token if it is the given one.
- pub fn eat_if(&mut self, kind: SyntaxKind) -> bool {
+ fn eat_if(&mut self, kind: SyntaxKind) -> bool {
let at = self.at(kind);
if at {
self.eat();
@@ -141,9 +1245,9 @@ impl<'s> Parser<'s> {
}
/// Eat tokens while the condition is true.
- pub fn eat_while<F>(&mut self, mut f: F)
+ fn eat_while<F>(&mut self, mut f: F)
where
- F: FnMut(&SyntaxKind) -> bool,
+ F: FnMut(SyntaxKind) -> bool,
{
while self.peek().map_or(false, |t| f(t)) {
self.eat();
@@ -152,8 +1256,8 @@ impl<'s> Parser<'s> {
/// Consume the current token if it is the given one and produce an error if
/// not.
- pub fn expect(&mut self, kind: SyntaxKind) -> ParseResult {
- let at = self.peek() == Some(&kind);
+ fn expect(&mut self, kind: SyntaxKind) -> ParseResult {
+ let at = self.peek() == Some(kind);
if at {
self.eat();
Ok(())
@@ -165,28 +1269,28 @@ impl<'s> Parser<'s> {
/// Consume the current token, debug-asserting that it is the given one.
#[track_caller]
- pub fn assert(&mut self, kind: SyntaxKind) {
- debug_assert_eq!(self.peek(), Some(&kind));
+ fn assert(&mut self, kind: SyntaxKind) {
+ debug_assert_eq!(self.peek(), Some(kind));
self.eat();
}
/// Whether the current token is of the given type.
- pub fn at(&self, kind: SyntaxKind) -> bool {
- self.peek() == Some(&kind)
+ fn at(&self, kind: SyntaxKind) -> bool {
+ self.peek() == Some(kind)
}
/// Peek at the current token without consuming it.
- pub fn peek(&self) -> Option<&SyntaxKind> {
+ fn peek(&self) -> Option<SyntaxKind> {
if self.eof {
None
} else {
- self.current.as_ref()
+ self.current
}
}
/// Peek at the current token, but only if it follows immediately after the
/// last one without any trivia in between.
- pub fn peek_direct(&self) -> Option<&SyntaxKind> {
+ fn peek_direct(&self) -> Option<SyntaxKind> {
if self.prev_end() == self.current_start() {
self.peek()
} else {
@@ -194,34 +1298,34 @@ impl<'s> Parser<'s> {
}
}
- /// Peek at the source of the current token.
- pub fn peek_src(&self) -> &'s str {
- self.get(self.current_start()..self.current_end())
- }
-
- /// Obtain a range of the source code.
- pub fn get(&self, range: Range<usize>) -> &'s str {
- self.tokens.scanner().get(range)
- }
-
/// The byte index at which the last non-trivia token ended.
- pub fn prev_end(&self) -> usize {
+ fn prev_end(&self) -> usize {
self.prev_end
}
/// The byte index at which the current token starts.
- pub fn current_start(&self) -> usize {
+ fn current_start(&self) -> usize {
self.current_start
}
/// The byte index at which the current token ends.
- pub fn current_end(&self) -> usize {
- self.tokens.cursor()
+ fn current_end(&self) -> usize {
+ self.lexer.cursor()
+ }
+
+ /// The byte length of the current token.
+ fn current_len(&self) -> usize {
+ self.current_end() - self.current_start()
+ }
+
+ /// The text of the current node.
+ fn peek_src(&self) -> &str {
+ self.lexer.scanner().from(self.current_start)
}
/// Determine the column index for the given byte index.
- pub fn column(&self, index: usize) -> usize {
- self.tokens.column(index)
+ fn column(&self, index: usize) -> usize {
+ self.lexer.column(index)
}
/// Continue parsing in a group.
@@ -232,9 +1336,9 @@ impl<'s> Parser<'s> {
///
/// This panics if the current token does not start the given group.
#[track_caller]
- pub fn start_group(&mut self, kind: Group) {
- self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() });
- self.tokens.set_mode(match kind {
+ fn start_group(&mut self, kind: Group) {
+ self.groups.push(GroupEntry { kind, prev_mode: self.lexer.mode() });
+ self.lexer.set_mode(match kind {
Group::Bracket | Group::Strong | Group::Emph => LexMode::Markup,
Group::Math | Group::MathRow(_, _) => LexMode::Math,
Group::Brace | Group::Paren | Group::Expr => LexMode::Code,
@@ -247,7 +1351,7 @@ impl<'s> Parser<'s> {
Group::Strong => self.assert(SyntaxKind::Star),
Group::Emph => self.assert(SyntaxKind::Underscore),
Group::Math => self.assert(SyntaxKind::Dollar),
- Group::MathRow(l, _) => self.assert(SyntaxKind::Atom(l.into())),
+ Group::MathRow(..) => self.assert(SyntaxKind::Atom),
Group::Expr => self.repeek(),
}
}
@@ -256,12 +1360,12 @@ impl<'s> Parser<'s> {
///
/// This panics if no group was started.
#[track_caller]
- pub fn end_group(&mut self) {
- let group_mode = self.tokens.mode();
+ fn end_group(&mut self) {
+ let group_mode = self.lexer.mode();
let group = self.groups.pop().expect("no started group");
- self.tokens.set_mode(group.prev_mode);
+ self.lexer.set_mode(group.prev_mode);
- let mut rescan = self.tokens.mode() != group_mode;
+ let mut rescan = self.lexer.mode() != group_mode;
// Eat the end delimiter if there is one.
if let Some((end, required)) = match group.kind {
@@ -271,7 +1375,7 @@ impl<'s> Parser<'s> {
Group::Strong => Some((SyntaxKind::Star, true)),
Group::Emph => Some((SyntaxKind::Underscore, true)),
Group::Math => Some((SyntaxKind::Dollar, true)),
- Group::MathRow(_, r) => Some((SyntaxKind::Atom(r.into()), true)),
+ Group::MathRow(..) => Some((SyntaxKind::Atom, true)),
Group::Expr => Some((SyntaxKind::Semicolon, false)),
} {
if self.current.as_ref() == Some(&end) {
@@ -303,10 +1407,10 @@ impl<'s> Parser<'s> {
self.children.truncate(start);
}
- self.tokens.jump(target);
- self.prev_end = self.tokens.cursor();
- self.current_start = self.tokens.cursor();
- self.current = self.tokens.next();
+ self.lexer.jump(target);
+ self.prev_end = self.lexer.cursor();
+ self.current_start = self.lexer.cursor();
+ self.current = self.lexer.next();
}
self.repeek();
@@ -320,11 +1424,16 @@ impl<'s> Parser<'s> {
/// Low-level bump that consumes exactly one token without special trivia
/// handling.
fn bump(&mut self) {
- let kind = self.current.take().unwrap();
- let len = self.tokens.cursor() - self.current_start;
- self.children.push(SyntaxNode::leaf(kind, len));
- self.current_start = self.tokens.cursor();
- self.current = self.tokens.next();
+ if let Some((message, pos)) = self.lexer.last_error() {
+ let len = self.current_len();
+ self.children.push(SyntaxNode::error(message, pos, len))
+ } else {
+ let kind = self.current.unwrap();
+ let text = self.peek_src();
+ self.children.push(SyntaxNode::leaf(kind, text));
+ }
+ self.current_start = self.lexer.cursor();
+ self.current = self.lexer.next();
}
/// Take another look at the current token to recheck whether it ends a
@@ -344,7 +1453,7 @@ impl<'s> Parser<'s> {
.next()
.map_or(false, |group| group.kind == Group::Math),
Some(SyntaxKind::Semicolon) => self.inside(Group::Expr),
- Some(SyntaxKind::Atom(s)) => match s.as_str() {
+ Some(SyntaxKind::Atom) => match self.peek_src() {
")" => self.inside(Group::MathRow('(', ')')),
"}" => self.inside(Group::MathRow('{', '}')),
"]" => self.inside(Group::MathRow('[', ']')),
@@ -357,9 +1466,9 @@ impl<'s> Parser<'s> {
}
/// Returns whether the given type can be skipped over.
- fn is_trivia(&self, token: &SyntaxKind) -> bool {
+ fn is_trivia(&self, token: SyntaxKind) -> bool {
match token {
- SyntaxKind::Space { newlines } => !self.space_ends_group(*newlines),
+ SyntaxKind::Space { newlines } => !self.space_ends_group(newlines),
SyntaxKind::LineComment => true,
SyntaxKind::BlockComment => true,
_ => false,
@@ -379,7 +1488,7 @@ impl<'s> Parser<'s> {
self.groups.iter().nth_back(1).map(|group| group.kind)
!= Some(Group::Brace)
|| !matches!(
- self.tokens.clone().next(),
+ self.lexer.clone().next(),
Some(SyntaxKind::Else | SyntaxKind::Dot)
)
}
@@ -400,35 +1509,37 @@ impl<'s> Parser<'s> {
/// Error handling.
impl Parser<'_> {
/// Eat the current token and add an error that it is unexpected.
- pub fn unexpected(&mut self) {
+ fn unexpected(&mut self) {
if let Some(found) = self.peek() {
+ let marker = self.marker();
let msg = format_eco!("unexpected {}", found.name());
- let error = SyntaxKind::Error(ErrorPos::Full, msg);
- self.perform(error, Self::eat);
+ self.eat();
+ marker.to_error(self, msg);
}
}
/// Add an error that the `thing` was expected at the end of the last
/// non-trivia token.
- pub fn expected(&mut self, thing: &str) {
+ fn expected(&mut self, thing: &str) {
self.expected_at(self.trivia_start(), thing);
}
/// Insert an error message that `what` was expected at the marker position.
- pub fn expected_at(&mut self, marker: Marker, what: &str) {
+ fn expected_at(&mut self, marker: Marker, what: &str) {
let msg = format_eco!("expected {}", what);
- let error = SyntaxKind::Error(ErrorPos::Full, msg);
- self.children.insert(marker.0, SyntaxNode::leaf(error, 0));
+ self.children
+ .insert(marker.0, SyntaxNode::error(msg, ErrorPos::Full, 0));
}
/// Eat the current token and add an error that it is not the expected
/// `thing`.
- pub fn expected_found(&mut self, thing: &str) {
+ fn expected_found(&mut self, thing: &str) {
match self.peek() {
Some(found) => {
+ let marker = self.marker();
let msg = format_eco!("expected {}, found {}", thing, found.name());
- let error = SyntaxKind::Error(ErrorPos::Full, msg);
- self.perform(error, Self::eat);
+ self.eat();
+ marker.to_error(self, msg);
}
None => self.expected(thing),
}
@@ -437,29 +1548,36 @@ impl Parser<'_> {
/// Marks a location in a parser's child list.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub struct Marker(usize);
+struct Marker(usize);
impl Marker {
/// Peek at the child directly before the marker.
- pub fn before<'a>(self, p: &'a Parser) -> Option<&'a SyntaxNode> {
+ fn before<'a>(self, p: &'a Parser) -> Option<&'a SyntaxNode> {
p.children.get(self.0.checked_sub(1)?)
}
/// Peek at the child directly after the marker.
- pub fn after<'a>(self, p: &'a Parser) -> Option<&'a SyntaxNode> {
+ fn after<'a>(self, p: &'a Parser) -> Option<&'a SyntaxNode> {
p.children.get(self.0)
}
/// Convert the child directly after marker.
- pub fn convert(self, p: &mut Parser, kind: SyntaxKind) {
+ fn convert(self, p: &mut Parser, kind: SyntaxKind) {
+ if let Some(child) = p.children.get_mut(self.0) {
+ child.convert_to(kind);
+ }
+ }
+
+ /// Convert the child directly after marker.
+ fn to_error(self, p: &mut Parser, message: impl Into<EcoString>) {
if let Some(child) = p.children.get_mut(self.0) {
- child.convert(kind);
+ child.convert_to_error(message);
}
}
/// Perform a subparse that wraps all children after the marker in a node
/// with the given kind.
- pub fn perform<T, F>(self, p: &mut Parser, kind: SyntaxKind, f: F) -> T
+ fn perform<T, F>(self, p: &mut Parser, kind: SyntaxKind, f: F) -> T
where
F: FnOnce(&mut Parser) -> T,
{
@@ -470,14 +1588,14 @@ impl Marker {
/// Wrap all children after the marker (excluding trailing trivia) in a node
/// with the given `kind`.
- pub fn end(self, p: &mut Parser, kind: SyntaxKind) {
+ fn end(self, p: &mut Parser, kind: SyntaxKind) {
let until = p.trivia_start().0.max(self.0);
let children = p.children.drain(self.0..until).collect();
p.children.insert(self.0, SyntaxNode::inner(kind, children));
}
/// Wrap all children that do not fulfill the predicate in error nodes.
- pub fn filter_children<F>(self, p: &mut Parser, mut f: F)
+ fn filter_children<F>(self, p: &mut Parser, mut f: F)
where
F: FnMut(&SyntaxNode) -> Result<(), &'static str>,
{
@@ -488,7 +1606,7 @@ impl Marker {
}
// Don't expose trivia in code.
- if p.tokens.mode() != LexMode::Markup && child.kind().is_trivia() {
+ if p.lexer.mode() != LexMode::Markup && child.kind().is_trivia() {
continue;
}
@@ -498,9 +1616,8 @@ impl Marker {
msg.push_str(", found ");
msg.push_str(child.kind().name());
}
- let error = SyntaxKind::Error(ErrorPos::Full, msg);
- let inner = mem::take(child);
- *child = SyntaxNode::inner(error, vec![inner]);
+ let len = child.len();
+ *child = SyntaxNode::error(msg, ErrorPos::Full, len);
}
}
}
@@ -512,15 +1629,15 @@ struct GroupEntry {
/// The kind of group this is. This decides which token(s) will end the
/// group. For example, a [`Group::Paren`] will be ended by
/// [`Token::RightParen`].
- pub kind: Group,
+ kind: Group,
/// The mode the parser was in _before_ the group started (to which we go
/// back once the group ends).
- pub prev_mode: LexMode,
+ prev_mode: LexMode,
}
/// A group, confined by optional start and end delimiters.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum Group {
+enum Group {
/// A curly-braced group: `{...}`.
Brace,
/// A bracketed group: `[...]`.
@@ -548,11 +1665,11 @@ impl Group {
/// Allows parser methods to use the try operator. Never returned top-level
/// because the parser recovers from all errors.
-pub type ParseResult<T = ()> = Result<T, ParseError>;
+type ParseResult<T = ()> = Result<T, ParseError>;
/// The error type for parsing.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub struct ParseError;
+struct ParseError;
impl Display for ParseError {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs
deleted file mode 100644
index a6e6c861..00000000
--- a/src/syntax/parsing.rs
+++ /dev/null
@@ -1,1118 +0,0 @@
-use std::collections::HashSet;
-
-use super::ast::{Assoc, BinOp, UnOp};
-use super::{
- ErrorPos, Group, LexMode, Marker, ParseError, ParseResult, Parser, SyntaxKind,
- SyntaxNode,
-};
-use crate::util::EcoString;
-
-/// Parse a source file.
-pub fn parse(text: &str) -> SyntaxNode {
- let mut p = Parser::new(text, LexMode::Markup);
- markup(&mut p, true);
- p.finish().into_iter().next().unwrap()
-}
-
-/// Parse code directly, only used for syntax highlighting.
-pub fn parse_code(text: &str) -> SyntaxNode {
- let mut p = Parser::new(text, LexMode::Code);
- p.perform(SyntaxKind::CodeBlock, code);
- p.finish().into_iter().next().unwrap()
-}
-
-/// Reparse a code block.
-///
-/// Returns `Some` if all of the input was consumed.
-pub(crate) fn reparse_code_block(
- prefix: &str,
- text: &str,
- end_pos: usize,
-) -> Option<(Vec<SyntaxNode>, bool, usize)> {
- let mut p = Parser::with_prefix(prefix, text, LexMode::Code);
- if !p.at(SyntaxKind::LeftBrace) {
- return None;
- }
-
- code_block(&mut p);
-
- let (mut node, terminated) = p.consume()?;
- let first = node.remove(0);
- if first.len() != end_pos {
- return None;
- }
-
- Some((vec![first], terminated, 1))
-}
-
-/// Reparse a content block.
-///
-/// Returns `Some` if all of the input was consumed.
-pub(crate) fn reparse_content_block(
- prefix: &str,
- text: &str,
- end_pos: usize,
-) -> Option<(Vec<SyntaxNode>, bool, usize)> {
- let mut p = Parser::with_prefix(prefix, text, LexMode::Code);
- if !p.at(SyntaxKind::LeftBracket) {
- return None;
- }
-
- content_block(&mut p);
-
- let (mut node, terminated) = p.consume()?;
- let first = node.remove(0);
- if first.len() != end_pos {
- return None;
- }
-
- Some((vec![first], terminated, 1))
-}
-
-/// Reparse a sequence markup elements without the topmost node.
-///
-/// Returns `Some` if all of the input was consumed.
-pub(crate) fn reparse_markup_elements(
- prefix: &str,
- text: &str,
- end_pos: usize,
- differential: isize,
- reference: &[SyntaxNode],
- mut at_start: bool,
- min_indent: usize,
-) -> Option<(Vec<SyntaxNode>, bool, usize)> {
- let mut p = Parser::with_prefix(prefix, text, LexMode::Markup);
-
- let mut node: Option<&SyntaxNode> = None;
- let mut iter = reference.iter();
- let mut offset = differential;
- let mut replaced = 0;
- let mut stopped = false;
-
- 'outer: while !p.eof() {
- if let Some(SyntaxKind::Space { newlines: (1..) }) = p.peek() {
- if p.column(p.current_end()) < min_indent {
- return None;
- }
- }
-
- markup_node(&mut p, &mut at_start);
-
- if p.prev_end() <= end_pos {
- continue;
- }
-
- let recent = p.marker().before(&p).unwrap();
- let recent_start = p.prev_end() - recent.len();
-
- while offset <= recent_start as isize {
- if let Some(node) = node {
- // The nodes are equal, at the same position and have the
- // same content. The parsing trees have converged again, so
- // the reparse may stop here.
- if offset == recent_start as isize && node == recent {
- replaced -= 1;
- stopped = true;
- break 'outer;
- }
- }
-
- if let Some(node) = node {
- offset += node.len() as isize;
- }
-
- node = iter.next();
- if node.is_none() {
- break;
- }
-
- replaced += 1;
- }
- }
-
- if p.eof() && !stopped {
- replaced = reference.len();
- }
-
- let (mut res, terminated) = p.consume()?;
- if stopped {
- res.pop().unwrap();
- }
-
- Some((res, terminated, replaced))
-}
-
-/// Parse markup.
-///
-/// If `at_start` is true, things like headings that may only appear at the
-/// beginning of a line or content block are initially allowed.
-fn markup(p: &mut Parser, mut at_start: bool) {
- p.perform(SyntaxKind::Markup { min_indent: 0 }, |p| {
- while !p.eof() {
- markup_node(p, &mut at_start);
- }
- });
-}
-
-/// Parse markup that stays right of the given `column`.
-fn markup_indented(p: &mut Parser, min_indent: usize) {
- p.eat_while(|t| match t {
- SyntaxKind::Space { newlines } => *newlines == 0,
- SyntaxKind::LineComment | SyntaxKind::BlockComment => true,
- _ => false,
- });
-
- let marker = p.marker();
- let mut at_start = false;
-
- while !p.eof() {
- match p.peek() {
- Some(SyntaxKind::Space { newlines: (1..) })
- if p.column(p.current_end()) < min_indent =>
- {
- break;
- }
- _ => {}
- }
-
- markup_node(p, &mut at_start);
- }
-
- marker.end(p, SyntaxKind::Markup { min_indent });
-}
-
-/// Parse a line of markup that can prematurely end if `f` returns true.
-fn markup_line<F>(p: &mut Parser, mut f: F)
-where
- F: FnMut(&SyntaxKind) -> bool,
-{
- p.eat_while(|t| match t {
- SyntaxKind::Space { newlines } => *newlines == 0,
- SyntaxKind::LineComment | SyntaxKind::BlockComment => true,
- _ => false,
- });
-
- p.perform(SyntaxKind::Markup { min_indent: usize::MAX }, |p| {
- let mut at_start = false;
- while let Some(kind) = p.peek() {
- if let SyntaxKind::Space { newlines: (1..) } = kind {
- break;
- }
-
- if f(kind) {
- break;
- }
-
- markup_node(p, &mut at_start);
- }
- });
-}
-
-fn markup_node(p: &mut Parser, at_start: &mut bool) {
- let Some(token) = p.peek() else { return };
- match token {
- // Whitespace.
- SyntaxKind::Space { newlines } => {
- *at_start |= *newlines > 0;
- p.eat();
- return;
- }
-
- // Comments.
- SyntaxKind::LineComment | SyntaxKind::BlockComment => {
- p.eat();
- return;
- }
-
- // Text and markup.
- SyntaxKind::Text(_)
- | SyntaxKind::Linebreak
- | SyntaxKind::SmartQuote { .. }
- | SyntaxKind::Escape(_)
- | SyntaxKind::Shorthand(_)
- | SyntaxKind::Symbol(_)
- | SyntaxKind::Link(_)
- | SyntaxKind::Raw(_)
- | SyntaxKind::Ref(_) => p.eat(),
-
- // Math.
- SyntaxKind::Dollar => math(p),
-
- // Strong, emph, heading.
- SyntaxKind::Star => strong(p),
- SyntaxKind::Underscore => emph(p),
- SyntaxKind::Eq => heading(p, *at_start),
-
- // Lists.
- SyntaxKind::Minus => list_item(p, *at_start),
- SyntaxKind::Plus | SyntaxKind::EnumNumbering(_) => enum_item(p, *at_start),
- SyntaxKind::Slash => {
- term_item(p, *at_start).ok();
- }
- SyntaxKind::Colon => {
- let marker = p.marker();
- p.eat();
- marker.convert(p, SyntaxKind::Text(':'.into()));
- }
-
- // Hashtag + keyword / identifier.
- SyntaxKind::Ident(_)
- | SyntaxKind::Label(_)
- | SyntaxKind::Let
- | SyntaxKind::Set
- | SyntaxKind::Show
- | SyntaxKind::If
- | SyntaxKind::While
- | SyntaxKind::For
- | SyntaxKind::Import
- | SyntaxKind::Include
- | SyntaxKind::Break
- | SyntaxKind::Continue
- | SyntaxKind::Return => embedded_expr(p),
-
- // Code and content block.
- SyntaxKind::LeftBrace => code_block(p),
- SyntaxKind::LeftBracket => content_block(p),
-
- SyntaxKind::Error(_, _) => p.eat(),
- _ => p.unexpected(),
- };
-
- *at_start = false;
-}
-
-fn strong(p: &mut Parser) {
- p.perform(SyntaxKind::Strong, |p| {
- p.start_group(Group::Strong);
- markup(p, false);
- p.end_group();
- })
-}
-
-fn emph(p: &mut Parser) {
- p.perform(SyntaxKind::Emph, |p| {
- p.start_group(Group::Emph);
- markup(p, false);
- p.end_group();
- })
-}
-
-fn heading(p: &mut Parser, at_start: bool) {
- let marker = p.marker();
- let current_start = p.current_start();
- p.assert(SyntaxKind::Eq);
- while p.eat_if(SyntaxKind::Eq) {}
-
- if at_start && p.peek().map_or(true, |kind| kind.is_space()) {
- p.eat_while(|kind| *kind == SyntaxKind::Space { newlines: 0 });
- markup_line(p, |kind| matches!(kind, SyntaxKind::Label(_)));
- marker.end(p, SyntaxKind::Heading);
- } else {
- let text = p.get(current_start..p.prev_end()).into();
- marker.convert(p, SyntaxKind::Text(text));
- }
-}
-
-fn list_item(p: &mut Parser, at_start: bool) {
- let marker = p.marker();
- let text: EcoString = p.peek_src().into();
- p.assert(SyntaxKind::Minus);
-
- let min_indent = p.column(p.prev_end());
- if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() {
- markup_indented(p, min_indent);
- marker.end(p, SyntaxKind::ListItem);
- } else {
- marker.convert(p, SyntaxKind::Text(text));
- }
-}
-
-fn enum_item(p: &mut Parser, at_start: bool) {
- let marker = p.marker();
- let text: EcoString = p.peek_src().into();
- p.eat();
-
- let min_indent = p.column(p.prev_end());
- if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() {
- markup_indented(p, min_indent);
- marker.end(p, SyntaxKind::EnumItem);
- } else {
- marker.convert(p, SyntaxKind::Text(text));
- }
-}
-
-fn term_item(p: &mut Parser, at_start: bool) -> ParseResult {
- let marker = p.marker();
- let text: EcoString = p.peek_src().into();
- p.eat();
-
- let min_indent = p.column(p.prev_end());
- if at_start && p.eat_if(SyntaxKind::Space { newlines: 0 }) && !p.eof() {
- markup_line(p, |node| matches!(node, SyntaxKind::Colon));
- p.expect(SyntaxKind::Colon)?;
- markup_indented(p, min_indent);
- marker.end(p, SyntaxKind::TermItem);
- } else {
- marker.convert(p, SyntaxKind::Text(text));
- }
-
- Ok(())
-}
-
-fn embedded_expr(p: &mut Parser) {
- // Does the expression need termination or can content follow directly?
- let stmt = matches!(
- p.peek(),
- Some(
- SyntaxKind::Let
- | SyntaxKind::Set
- | SyntaxKind::Show
- | SyntaxKind::Import
- | SyntaxKind::Include
- )
- );
-
- p.start_group(Group::Expr);
- let res = expr_prec(p, true, 0);
- if stmt && res.is_ok() && !p.eof() {
- p.expected("semicolon or line break");
- }
- p.end_group();
-}
-
-fn math(p: &mut Parser) {
- p.perform(SyntaxKind::Math, |p| {
- p.start_group(Group::Math);
- while !p.eof() {
- math_node(p);
- }
- p.end_group();
- });
-}
-
-fn math_node(p: &mut Parser) {
- math_node_prec(p, 0, None)
-}
-
-fn math_node_prec(p: &mut Parser, min_prec: usize, stop: Option<SyntaxKind>) {
- let marker = p.marker();
- math_primary(p);
-
- loop {
- let (kind, mut prec, assoc, stop) = match p.peek() {
- v if v == stop.as_ref() => break,
- Some(SyntaxKind::Underscore) => {
- (SyntaxKind::Script, 2, Assoc::Right, Some(SyntaxKind::Hat))
- }
- Some(SyntaxKind::Hat) => {
- (SyntaxKind::Script, 2, Assoc::Right, Some(SyntaxKind::Underscore))
- }
- Some(SyntaxKind::Slash) => (SyntaxKind::Frac, 1, Assoc::Left, None),
- _ => break,
- };
-
- if prec < min_prec {
- break;
- }
-
- match assoc {
- Assoc::Left => prec += 1,
- Assoc::Right => {}
- }
-
- p.eat();
- math_node_prec(p, prec, stop);
-
- // Allow up to two different scripts. We do not risk encountering the
- // previous script kind again here due to right-associativity.
- if p.eat_if(SyntaxKind::Underscore) || p.eat_if(SyntaxKind::Hat) {
- math_node_prec(p, prec, None);
- }
-
- marker.end(p, kind);
- }
-}
-
-/// Parse a primary math node.
-fn math_primary(p: &mut Parser) {
- let Some(token) = p.peek() else { return };
- match token {
- // Spaces and expressions.
- SyntaxKind::Space { .. }
- | SyntaxKind::Linebreak
- | SyntaxKind::Escape(_)
- | SyntaxKind::Str(_)
- | SyntaxKind::Shorthand(_)
- | SyntaxKind::Symbol(_) => p.eat(),
-
- // Atoms.
- SyntaxKind::Atom(s) => match s.as_str() {
- "(" => math_group(p, Group::MathRow('(', ')')),
- "{" => math_group(p, Group::MathRow('{', '}')),
- "[" => math_group(p, Group::MathRow('[', ']')),
- _ => p.eat(),
- },
-
- // Alignment indactor.
- SyntaxKind::Amp => math_align(p),
-
- // Identifiers and math calls.
- SyntaxKind::Ident(_) => {
- let marker = p.marker();
- p.eat();
-
- // Parenthesis or bracket means this is a function call.
- if matches!(p.peek_direct(), Some(SyntaxKind::Atom(s)) if s == "(") {
- marker.perform(p, SyntaxKind::FuncCall, math_args);
- }
- }
-
- // Hashtag + keyword / identifier.
- SyntaxKind::Let
- | SyntaxKind::Set
- | SyntaxKind::Show
- | SyntaxKind::If
- | SyntaxKind::While
- | SyntaxKind::For
- | SyntaxKind::Import
- | SyntaxKind::Include
- | SyntaxKind::Break
- | SyntaxKind::Continue
- | SyntaxKind::Return => embedded_expr(p),
-
- // Code and content block.
- SyntaxKind::LeftBrace => code_block(p),
- SyntaxKind::LeftBracket => content_block(p),
-
- _ => p.unexpected(),
- }
-}
-
-fn math_group(p: &mut Parser, group: Group) {
- p.perform(SyntaxKind::Math, |p| {
- p.start_group(group);
- while !p.eof() {
- math_node(p);
- }
- p.end_group();
- })
-}
-
-fn math_align(p: &mut Parser) {
- p.perform(SyntaxKind::AlignPoint, |p| {
- p.assert(SyntaxKind::Amp);
- while p.eat_if(SyntaxKind::Amp) {}
- })
-}
-
-fn expr(p: &mut Parser) -> ParseResult {
- expr_prec(p, false, 0)
-}
-
-/// Parse an expression with operators having at least the minimum precedence.
-///
-/// If `atomic` is true, this does not parse binary operations and arrow
-/// functions, which is exactly what we want in a shorthand expression directly
-/// in markup.
-///
-/// Stops parsing at operations with lower precedence than `min_prec`,
-fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
- let marker = p.marker();
-
- // Start the unary expression.
- match p.peek().and_then(UnOp::from_token) {
- Some(op) if !atomic => {
- p.eat();
- let prec = op.precedence();
- expr_prec(p, atomic, prec)?;
- marker.end(p, SyntaxKind::Unary);
- }
- _ => primary(p, atomic)?,
- };
-
- loop {
- // Parenthesis or bracket means this is a function call.
- if let Some(SyntaxKind::LeftParen | SyntaxKind::LeftBracket) = p.peek_direct() {
- marker.perform(p, SyntaxKind::FuncCall, args)?;
- continue;
- }
-
- if atomic {
- break;
- }
-
- // Method call or field access.
- if p.eat_if(SyntaxKind::Dot) {
- ident(p)?;
- if let Some(SyntaxKind::LeftParen | SyntaxKind::LeftBracket) = p.peek_direct()
- {
- marker.perform(p, SyntaxKind::MethodCall, args)?;
- } else {
- marker.end(p, SyntaxKind::FieldAccess);
- }
- continue;
- }
-
- let op = if p.eat_if(SyntaxKind::Not) {
- if p.at(SyntaxKind::In) {
- BinOp::NotIn
- } else {
- p.expected("keyword `in`");
- return Err(ParseError);
- }
- } else {
- match p.peek().and_then(BinOp::from_token) {
- Some(binop) => binop,
- None => break,
- }
- };
-
- let mut prec = op.precedence();
- if prec < min_prec {
- break;
- }
-
- p.eat();
-
- match op.assoc() {
- Assoc::Left => prec += 1,
- Assoc::Right => {}
- }
-
- marker.perform(p, SyntaxKind::Binary, |p| expr_prec(p, atomic, prec))?;
- }
-
- Ok(())
-}
-
-fn primary(p: &mut Parser, atomic: bool) -> ParseResult {
- match p.peek() {
- // Literals and few other things.
- Some(
- SyntaxKind::None
- | SyntaxKind::Auto
- | SyntaxKind::Int(_)
- | SyntaxKind::Float(_)
- | SyntaxKind::Bool(_)
- | SyntaxKind::Numeric(_, _)
- | SyntaxKind::Str(_)
- | SyntaxKind::Label(_)
- | SyntaxKind::Raw(_),
- ) => {
- p.eat();
- Ok(())
- }
-
- // Things that start with an identifier.
- Some(SyntaxKind::Ident(_)) => {
- let marker = p.marker();
- p.eat();
-
- // Arrow means this is a closure's lone parameter.
- if !atomic && p.at(SyntaxKind::Arrow) {
- marker.end(p, SyntaxKind::Params);
- p.assert(SyntaxKind::Arrow);
- marker.perform(p, SyntaxKind::Closure, expr)
- } else {
- Ok(())
- }
- }
-
- // Structures.
- Some(SyntaxKind::LeftParen) => parenthesized(p, atomic),
- Some(SyntaxKind::LeftBrace) => Ok(code_block(p)),
- Some(SyntaxKind::LeftBracket) => Ok(content_block(p)),
- Some(SyntaxKind::Dollar) => Ok(math(p)),
-
- // Keywords.
- Some(SyntaxKind::Let) => let_binding(p),
- Some(SyntaxKind::Set) => set_rule(p),
- Some(SyntaxKind::Show) => show_rule(p),
- Some(SyntaxKind::If) => conditional(p),
- Some(SyntaxKind::While) => while_loop(p),
- Some(SyntaxKind::For) => for_loop(p),
- Some(SyntaxKind::Import) => module_import(p),
- Some(SyntaxKind::Include) => module_include(p),
- Some(SyntaxKind::Break) => break_stmt(p),
- Some(SyntaxKind::Continue) => continue_stmt(p),
- Some(SyntaxKind::Return) => return_stmt(p),
-
- Some(SyntaxKind::Error(_, _)) => {
- p.eat();
- Err(ParseError)
- }
-
- // Nothing.
- _ => {
- p.expected_found("expression");
- Err(ParseError)
- }
- }
-}
-
-fn ident(p: &mut Parser) -> ParseResult {
- match p.peek() {
- Some(SyntaxKind::Ident(_)) => {
- p.eat();
- Ok(())
- }
- _ => {
- p.expected_found("identifier");
- Err(ParseError)
- }
- }
-}
-
-/// Parse something that starts with a parenthesis, which can be either of:
-/// - Array literal
-/// - Dictionary literal
-/// - Parenthesized expression
-/// - Parameter list of closure expression
-fn parenthesized(p: &mut Parser, atomic: bool) -> ParseResult {
- let marker = p.marker();
-
- p.start_group(Group::Paren);
- let colon = p.eat_if(SyntaxKind::Colon);
- let kind = collection(p, true).0;
- p.end_group();
-
- // Leading colon makes this a dictionary.
- if colon {
- dict(p, marker);
- return Ok(());
- }
-
- // Arrow means this is a closure's parameter list.
- if !atomic && p.at(SyntaxKind::Arrow) {
- params(p, marker);
- p.assert(SyntaxKind::Arrow);
- return marker.perform(p, SyntaxKind::Closure, expr);
- }
-
- // Transform into the identified collection.
- match kind {
- CollectionKind::Group => marker.end(p, SyntaxKind::Parenthesized),
- CollectionKind::Positional => array(p, marker),
- CollectionKind::Named => dict(p, marker),
- }
-
- Ok(())
-}
-
-/// The type of a collection.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-enum CollectionKind {
- /// The collection is only one item and has no comma.
- Group,
- /// The collection starts with a positional item and has multiple items or a
- /// trailing comma.
- Positional,
- /// The collection starts with a colon or named item.
- Named,
-}
-
-/// Parse a collection.
-///
-/// Returns the length of the collection and whether the literal contained any
-/// commas.
-fn collection(p: &mut Parser, keyed: bool) -> (CollectionKind, usize) {
- let mut collection_kind = None;
- let mut items = 0;
- let mut can_group = true;
- let mut missing_coma: Option<Marker> = None;
-
- while !p.eof() {
- let Ok(item_kind) = item(p, keyed) else {
- p.eat_if(SyntaxKind::Comma);
- collection_kind = Some(CollectionKind::Group);
- continue;
- };
-
- match item_kind {
- SyntaxKind::Spread => can_group = false,
- SyntaxKind::Named if collection_kind.is_none() => {
- collection_kind = Some(CollectionKind::Named);
- can_group = false;
- }
- _ if collection_kind.is_none() => {
- collection_kind = Some(CollectionKind::Positional);
- }
- _ => {}
- }
-
- items += 1;
-
- if let Some(marker) = missing_coma.take() {
- p.expected_at(marker, "comma");
- }
-
- if p.eof() {
- break;
- }
-
- if p.eat_if(SyntaxKind::Comma) {
- can_group = false;
- } else {
- missing_coma = Some(p.trivia_start());
- }
- }
-
- let kind = if can_group && items == 1 {
- CollectionKind::Group
- } else {
- collection_kind.unwrap_or(CollectionKind::Positional)
- };
-
- (kind, items)
-}
-
-fn item(p: &mut Parser, keyed: bool) -> ParseResult<SyntaxKind> {
- let marker = p.marker();
- if p.eat_if(SyntaxKind::Dots) {
- marker.perform(p, SyntaxKind::Spread, expr)?;
- return Ok(SyntaxKind::Spread);
- }
-
- expr(p)?;
-
- if p.at(SyntaxKind::Colon) {
- match marker.after(p).map(|c| c.kind()) {
- Some(SyntaxKind::Ident(_)) => {
- p.eat();
- marker.perform(p, SyntaxKind::Named, expr)?;
- }
- Some(SyntaxKind::Str(_)) if keyed => {
- p.eat();
- marker.perform(p, SyntaxKind::Keyed, expr)?;
- }
- kind => {
- let mut msg = EcoString::from("expected identifier");
- if keyed {
- msg.push_str(" or string");
- }
- if let Some(kind) = kind {
- msg.push_str(", found ");
- msg.push_str(kind.name());
- }
- let error = SyntaxKind::Error(ErrorPos::Full, msg);
- marker.end(p, error);
- p.eat();
- marker.perform(p, SyntaxKind::Named, expr).ok();
- return Err(ParseError);
- }
- }
-
- Ok(SyntaxKind::Named)
- } else {
- Ok(SyntaxKind::None)
- }
-}
-
-fn array(p: &mut Parser, marker: Marker) {
- marker.filter_children(p, |x| match x.kind() {
- SyntaxKind::Named | SyntaxKind::Keyed => Err("expected expression"),
- _ => Ok(()),
- });
- marker.end(p, SyntaxKind::Array);
-}
-
-fn dict(p: &mut Parser, marker: Marker) {
- let mut used = HashSet::new();
- marker.filter_children(p, |x| match x.kind() {
- kind if kind.is_paren() => Ok(()),
- SyntaxKind::Named | SyntaxKind::Keyed => {
- if let Some(SyntaxKind::Ident(key) | SyntaxKind::Str(key)) =
- x.children().next().map(|child| child.kind())
- {
- if !used.insert(key.clone()) {
- return Err("pair has duplicate key");
- }
- }
- Ok(())
- }
- SyntaxKind::Spread | SyntaxKind::Comma | SyntaxKind::Colon => Ok(()),
- _ => Err("expected named or keyed pair"),
- });
- marker.end(p, SyntaxKind::Dict);
-}
-
-fn params(p: &mut Parser, marker: Marker) {
- marker.filter_children(p, |x| match x.kind() {
- kind if kind.is_paren() => Ok(()),
- SyntaxKind::Named | SyntaxKind::Ident(_) | SyntaxKind::Comma => Ok(()),
- SyntaxKind::Spread
- if matches!(
- x.children().last().map(|child| child.kind()),
- Some(&SyntaxKind::Ident(_))
- ) =>
- {
- Ok(())
- }
- _ => Err("expected identifier, named pair or argument sink"),
- });
- marker.end(p, SyntaxKind::Params);
-}
-
-/// Parse a code block: `{...}`.
-fn code_block(p: &mut Parser) {
- p.perform(SyntaxKind::CodeBlock, |p| {
- p.start_group(Group::Brace);
- code(p);
- p.end_group();
- });
-}
-
-fn code(p: &mut Parser) {
- while !p.eof() {
- p.start_group(Group::Expr);
- if expr(p).is_ok() && !p.eof() {
- p.expected("semicolon or line break");
- }
- p.end_group();
-
- // Forcefully skip over newlines since the group's contents can't.
- p.eat_while(SyntaxKind::is_space);
- }
-}
-
-fn content_block(p: &mut Parser) {
- p.perform(SyntaxKind::ContentBlock, |p| {
- p.start_group(Group::Bracket);
- markup(p, true);
- p.end_group();
- });
-}
-
-fn args(p: &mut Parser) -> ParseResult {
- match p.peek_direct() {
- Some(SyntaxKind::LeftParen) => {}
- Some(SyntaxKind::LeftBracket) => {}
- _ => {
- p.expected_found("argument list");
- return Err(ParseError);
- }
- }
-
- p.perform(SyntaxKind::Args, |p| {
- if p.at(SyntaxKind::LeftParen) {
- let marker = p.marker();
- p.start_group(Group::Paren);
- collection(p, false);
- p.end_group();
-
- let mut used = HashSet::new();
- marker.filter_children(p, |x| match x.kind() {
- SyntaxKind::Named => {
- if let Some(SyntaxKind::Ident(ident)) =
- x.children().next().map(|child| child.kind())
- {
- if !used.insert(ident.clone()) {
- return Err("duplicate argument");
- }
- }
- Ok(())
- }
- _ => Ok(()),
- });
- }
-
- while p.peek_direct() == Some(&SyntaxKind::LeftBracket) {
- content_block(p);
- }
- });
-
- Ok(())
-}
-
-fn math_args(p: &mut Parser) {
- p.start_group(Group::MathRow('(', ')'));
- p.perform(SyntaxKind::Args, |p| {
- let mut marker = p.marker();
- while !p.eof() {
- if matches!(p.peek(), Some(SyntaxKind::Atom(s)) if s == ",") {
- marker.end(p, SyntaxKind::Math);
- let comma = p.marker();
- p.eat();
- comma.convert(p, SyntaxKind::Comma);
- marker = p.marker();
- } else {
- math_node(p);
- }
- }
- if marker != p.marker() {
- marker.end(p, SyntaxKind::Math);
- }
- });
- p.end_group();
-}
-
-fn let_binding(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::LetBinding, |p| {
- p.assert(SyntaxKind::Let);
-
- let marker = p.marker();
- ident(p)?;
-
- // If a parenthesis follows, this is a function definition.
- let has_params = p.peek_direct() == Some(&SyntaxKind::LeftParen);
- if has_params {
- let marker = p.marker();
- p.start_group(Group::Paren);
- collection(p, false);
- p.end_group();
- params(p, marker);
- }
-
- if p.eat_if(SyntaxKind::Eq) {
- expr(p)?;
- } else if has_params {
- // Function definitions must have a body.
- p.expected("body");
- }
-
- // Rewrite into a closure expression if it's a function definition.
- if has_params {
- marker.end(p, SyntaxKind::Closure);
- }
-
- Ok(())
- })
-}
-
-fn set_rule(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::SetRule, |p| {
- p.assert(SyntaxKind::Set);
- ident(p)?;
- args(p)?;
- if p.eat_if(SyntaxKind::If) {
- expr(p)?;
- }
- Ok(())
- })
-}
-
-fn show_rule(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::ShowRule, |p| {
- p.assert(SyntaxKind::Show);
- expr(p)?;
- if p.eat_if(SyntaxKind::Colon) {
- expr(p)?;
- }
- Ok(())
- })
-}
-
-fn conditional(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::Conditional, |p| {
- p.assert(SyntaxKind::If);
-
- expr(p)?;
- body(p)?;
-
- if p.eat_if(SyntaxKind::Else) {
- if p.at(SyntaxKind::If) {
- conditional(p)?;
- } else {
- body(p)?;
- }
- }
-
- Ok(())
- })
-}
-
-fn while_loop(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::WhileLoop, |p| {
- p.assert(SyntaxKind::While);
- expr(p)?;
- body(p)
- })
-}
-
-fn for_loop(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::ForLoop, |p| {
- p.assert(SyntaxKind::For);
- for_pattern(p)?;
- p.expect(SyntaxKind::In)?;
- expr(p)?;
- body(p)
- })
-}
-
-fn for_pattern(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::ForPattern, |p| {
- ident(p)?;
- if p.eat_if(SyntaxKind::Comma) {
- ident(p)?;
- }
- Ok(())
- })
-}
-
-fn module_import(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::ModuleImport, |p| {
- p.assert(SyntaxKind::Import);
- expr(p)?;
-
- if !p.eat_if(SyntaxKind::Colon) || p.eat_if(SyntaxKind::Star) {
- return Ok(());
- }
-
- // This is the list of identifiers scenario.
- p.perform(SyntaxKind::ImportItems, |p| {
- let marker = p.marker();
- let items = collection(p, false).1;
- if items == 0 {
- p.expected("import items");
- }
- marker.filter_children(p, |n| match n.kind() {
- SyntaxKind::Ident(_) | SyntaxKind::Comma => Ok(()),
- _ => Err("expected identifier"),
- });
- });
-
- Ok(())
- })
-}
-
-fn module_include(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::ModuleInclude, |p| {
- p.assert(SyntaxKind::Include);
- expr(p)
- })
-}
-
-fn break_stmt(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::LoopBreak, |p| {
- p.assert(SyntaxKind::Break);
- Ok(())
- })
-}
-
-fn continue_stmt(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::LoopContinue, |p| {
- p.assert(SyntaxKind::Continue);
- Ok(())
- })
-}
-
-fn return_stmt(p: &mut Parser) -> ParseResult {
- p.perform(SyntaxKind::FuncReturn, |p| {
- p.assert(SyntaxKind::Return);
- if !p.at(SyntaxKind::Comma) && !p.eof() {
- expr(p)?;
- }
- Ok(())
- })
-}
-
-fn body(p: &mut Parser) -> ParseResult {
- match p.peek() {
- Some(SyntaxKind::LeftBracket) => Ok(content_block(p)),
- Some(SyntaxKind::LeftBrace) => Ok(code_block(p)),
- _ => {
- p.expected("body");
- Err(ParseError)
- }
- }
-}
diff --git a/src/syntax/incremental.rs b/src/syntax/reparse.rs
index 606daa2e..e72192ff 100644
--- a/src/syntax/incremental.rs
+++ b/src/syntax/reparse.rs
@@ -87,8 +87,8 @@ fn try_reparse(
// reject text that points to the special case for URL
// evasion and line comments.
if !child.kind().is_space()
- && child.kind() != &SyntaxKind::Semicolon
- && child.kind() != &SyntaxKind::Text('/'.into())
+ && child.kind() != SyntaxKind::Semicolon
+ && (child.kind() != SyntaxKind::Text || child.text() != "/")
&& (ahead.is_none() || change.replaced.start > child_span.end)
&& !ahead.map_or(false, Ahead::is_compulsory)
{
@@ -177,7 +177,7 @@ fn try_reparse(
// Make sure this is a markup node and that we may replace. If so, save
// the current indent.
let min_indent = match node.kind() {
- SyntaxKind::Markup { min_indent } if safe_to_replace => *min_indent,
+ SyntaxKind::Markup { min_indent } if safe_to_replace => min_indent,
_ => return None,
};
@@ -375,23 +375,23 @@ enum ReparseMode {
/// Whether changes _inside_ this node are safely encapsulated, so that only
/// this node must be reparsed.
-fn is_bounded(kind: &SyntaxKind) -> bool {
+fn is_bounded(kind: SyntaxKind) -> bool {
matches!(
kind,
SyntaxKind::CodeBlock
| SyntaxKind::ContentBlock
| SyntaxKind::Linebreak
- | SyntaxKind::SmartQuote { .. }
+ | SyntaxKind::SmartQuote
| SyntaxKind::BlockComment
| SyntaxKind::Space { .. }
- | SyntaxKind::Escape(_)
- | SyntaxKind::Shorthand(_)
+ | SyntaxKind::Escape
+ | SyntaxKind::Shorthand
)
}
/// Whether `at_start` would still be true after this node given the
/// previous value of the property.
-fn next_at_start(kind: &SyntaxKind, prev: bool) -> bool {
+fn next_at_start(kind: SyntaxKind, prev: bool) -> bool {
match kind {
SyntaxKind::Space { newlines: (1..) } => true,
SyntaxKind::Space { .. } | SyntaxKind::LineComment | SyntaxKind::BlockComment => {
diff --git a/src/syntax/resolve.rs b/src/syntax/resolve.rs
deleted file mode 100644
index 3ba9a252..00000000
--- a/src/syntax/resolve.rs
+++ /dev/null
@@ -1,233 +0,0 @@
-use unscanny::Scanner;
-
-use super::{is_ident, is_newline, RawFields};
-use crate::util::EcoString;
-
-/// Resolve all escape sequences in a string.
-pub fn resolve_string(string: &str) -> EcoString {
- let mut out = EcoString::with_capacity(string.len());
- let mut s = Scanner::new(string);
-
- while let Some(c) = s.eat() {
- if c != '\\' {
- out.push(c);
- continue;
- }
-
- let start = s.locate(-1);
- match s.eat() {
- Some('\\') => out.push('\\'),
- Some('"') => out.push('"'),
- Some('n') => out.push('\n'),
- Some('r') => out.push('\r'),
- Some('t') => out.push('\t'),
- Some('u') if s.eat_if('{') => {
- // TODO: Error if closing brace is missing.
- let sequence = s.eat_while(char::is_ascii_hexdigit);
- let _terminated = s.eat_if('}');
- match resolve_hex(sequence) {
- Some(c) => out.push(c),
- None => out.push_str(s.from(start)),
- }
- }
- _ => out.push_str(s.from(start)),
- }
- }
-
- out
-}
-
-/// Resolve a hexadecimal escape sequence into a character
-/// (only the inner hex letters without braces or `\u`).
-pub fn resolve_hex(sequence: &str) -> Option<char> {
- u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32)
-}
-
-/// Resolve the language tag and trim the raw text.
-pub fn resolve_raw(column: usize, backticks: usize, text: &str) -> RawFields {
- if backticks > 1 {
- let (tag, inner) = split_at_lang_tag(text);
- let (text, block) = trim_and_split_raw(column, inner);
- RawFields {
- lang: is_ident(tag).then(|| tag.into()),
- text: text.into(),
- block,
- }
- } else {
- RawFields {
- lang: None,
- text: split_lines(text).join("\n").into(),
- block: false,
- }
- }
-}
-
-/// Parse the lang tag and return it alongside the remaining inner raw text.
-fn split_at_lang_tag(raw: &str) -> (&str, &str) {
- let mut s = Scanner::new(raw);
- (s.eat_until(|c: char| c == '`' || c.is_whitespace() || is_newline(c)), s.after())
-}
-
-/// Trim raw text and splits it into lines.
-///
-/// Also returns whether at least one newline was contained in `raw`.
-fn trim_and_split_raw(column: usize, mut raw: &str) -> (String, bool) {
- // Trims one space at the start.
- raw = raw.strip_prefix(' ').unwrap_or(raw);
-
- // Trim one space at the end if the last non-whitespace char is a backtick.
- if raw.trim_end().ends_with('`') {
- raw = raw.strip_suffix(' ').unwrap_or(raw);
- }
-
- let mut lines = split_lines(raw);
-
- // Dedent based on column, but not for the first line.
- for line in lines.iter_mut().skip(1) {
- let offset = line
- .chars()
- .take(column)
- .take_while(|c| c.is_whitespace())
- .map(char::len_utf8)
- .sum();
- *line = &line[offset..];
- }
-
- let had_newline = lines.len() > 1;
- let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace);
-
- // Trims a sequence of whitespace followed by a newline at the start.
- if lines.first().map_or(false, is_whitespace) {
- lines.remove(0);
- }
-
- // Trims a newline followed by a sequence of whitespace at the end.
- if lines.last().map_or(false, is_whitespace) {
- lines.pop();
- }
-
- (lines.join("\n"), had_newline)
-}
-
-/// Split a string into a vector of lines
-/// (respecting Unicode, Unix, Mac and Windows line breaks).
-fn split_lines(text: &str) -> Vec<&str> {
- let mut s = Scanner::new(text);
- let mut lines = Vec::new();
- let mut start = 0;
- let mut end = 0;
-
- while let Some(c) = s.eat() {
- if is_newline(c) {
- if c == '\r' {
- s.eat_if('\n');
- }
-
- lines.push(&text[start..end]);
- start = s.cursor();
- }
- end = s.cursor();
- }
-
- lines.push(&text[start..]);
- lines
-}
-
-#[cfg(test)]
-#[rustfmt::skip]
-mod tests {
- use super::*;
-
- #[test]
- fn test_resolve_strings() {
- #[track_caller]
- fn test(string: &str, expected: &str) {
- assert_eq!(resolve_string(string), expected);
- }
-
- test(r#"hello world"#, "hello world");
- test(r#"hello\nworld"#, "hello\nworld");
- test(r#"a\"bc"#, "a\"bc");
- test(r#"a\u{2603}bc"#, "a☃bc");
- test(r#"a\u{26c3bg"#, "a𦰻g");
- test(r#"av\u{6797"#, "av林");
- test(r#"a\\"#, "a\\");
- test(r#"a\\\nbc"#, "a\\\nbc");
- test(r#"a\t\r\nbc"#, "a\t\r\nbc");
- test(r"🌎", "🌎");
- test(r"🌎\", r"🌎\");
- test(r"\🌎", r"\🌎");
- }
-
- #[test]
- fn test_split_at_lang_tag() {
- #[track_caller]
- fn test(text: &str, lang: &str, inner: &str) {
- assert_eq!(split_at_lang_tag(text), (lang, inner));
- }
-
- test("typst it!", "typst", " it!");
- test("typst\n it!", "typst", "\n it!");
- test("typst\n it!", "typst", "\n it!");
- test("abc`", "abc", "`");
- test(" hi", "", " hi");
- test("`", "", "`");
- }
-
- #[test]
- fn test_resolve_raw() {
- #[track_caller]
- fn test(
- column: usize,
- backticks: usize,
- raw: &str,
- lang: Option<&str>,
- text: &str,
- block: bool,
- ) {
- let node = resolve_raw(column, backticks, raw);
- assert_eq!(node.lang.as_deref(), lang);
- assert_eq!(node.text, text);
- assert_eq!(node.block, block);
- }
-
- // Just one backtick.
- test(0, 1, "py", None, "py", false);
- test(0, 1, "1\n2", None, "1\n2", false);
- test(0, 1, "1\r\n2", None, "1\n2", false);
-
- // More than one backtick with lang tag.
- test(0, 2, "js alert()", Some("js"), "alert()", false);
- test(0, 3, "py quit(\n\n)", Some("py"), "quit(\n\n)", true);
- test(0, 2, "♥", None, "", false);
-
- // Trimming of whitespace (tested more thoroughly in separate test).
- test(0, 2, " a", None, "a", false);
- test(0, 2, " a", None, " a", false);
- test(0, 2, " \na", None, "a", true);
-
- // Dedenting
- test(2, 3, " def foo():\n bar()", None, "def foo():\n bar()", true);
- }
-
- #[test]
- fn test_trim_raw() {
- #[track_caller]
- fn test(text: &str, expected: &str) {
- assert_eq!(trim_and_split_raw(0, text).0, expected);
- }
-
- test(" hi", "hi");
- test(" hi", " hi");
- test("\nhi", "hi");
- test(" \n hi", " hi");
- test("hi` ", "hi`");
- test("hi` ", "hi` ");
- test("hi` ", "hi` ");
- test("hi ", "hi ");
- test("hi ", "hi ");
- test("hi\n", "hi");
- test("hi \n ", "hi ");
- test(" \n hi \n ", " hi ");
- }
-}
diff --git a/src/syntax/source.rs b/src/syntax/source.rs
index 9b76af12..41805a60 100644
--- a/src/syntax/source.rs
+++ b/src/syntax/source.rs
@@ -8,10 +8,10 @@ use std::path::{Path, PathBuf};
use comemo::Prehashed;
use unscanny::Scanner;
+use super::ast::Markup;
+use super::reparse::reparse;
+use super::{is_newline, parse, Span, SyntaxNode};
use crate::diag::SourceResult;
-use crate::syntax::ast::Markup;
-use crate::syntax::{is_newline, parse, reparse};
-use crate::syntax::{Span, SyntaxNode};
use crate::util::{PathExt, StrExt};
/// A source file.
@@ -124,11 +124,8 @@ impl Source {
}
// Recalculate the line starts after the edit.
- self.lines.extend(lines_from(
- start_byte,
- start_utf16,
- &self.text[start_byte..],
- ));
+ self.lines
+ .extend(lines_from(start_byte, start_utf16, &self.text[start_byte..]));
// Incrementally reparse the replaced range.
let mut root = std::mem::take(&mut self.root).into_inner();