summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2022-12-06 12:37:08 +0100
committerLaurenz <laurmaedje@gmail.com>2022-12-06 12:37:08 +0100
commit3ecb0c754bc1777e002a43e4c34b27e676f9a95c (patch)
tree49dd299b6671058dd47b7dae84b748f117a962d3
parentc2e458a133772a94009733040b39d58e781af977 (diff)
More math syntax
-rw-r--r--Cargo.lock1
-rw-r--r--library/Cargo.toml1
-rw-r--r--library/src/lib.rs6
-rw-r--r--library/src/math/mod.rs284
-rw-r--r--library/src/math/tex.rs33
-rw-r--r--library/src/text/raw.rs2
-rw-r--r--src/model/eval.rs30
-rw-r--r--src/syntax/ast.rs13
-rw-r--r--src/syntax/highlight.rs11
-rw-r--r--src/syntax/mod.rs17
-rw-r--r--src/syntax/parser.rs28
-rw-r--r--src/syntax/parsing.rs78
-rw-r--r--src/syntax/tests.rs483
-rw-r--r--src/syntax/tokens.rs581
-rw-r--r--tests/ref/math/simple.pngbin6554 -> 6555 bytes
-rw-r--r--tests/ref/math/syntax.pngbin0 -> 53462 bytes
-rw-r--r--tests/typ/math/syntax.typ24
-rw-r--r--tools/test-helper/extension.js8
18 files changed, 945 insertions, 655 deletions
diff --git a/Cargo.lock b/Cargo.lock
index fe522d84..37fe60d1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1188,6 +1188,7 @@ dependencies = [
"unicode-bidi",
"unicode-math",
"unicode-script",
+ "unicode-segmentation",
"xi-unicode",
]
diff --git a/library/Cargo.toml b/library/Cargo.toml
index 2410cb0c..f5377d64 100644
--- a/library/Cargo.toml
+++ b/library/Cargo.toml
@@ -28,4 +28,5 @@ typed-arena = "2"
unicode-bidi = "0.3.5"
unicode-math = { git = "https://github.com/s3bk/unicode-math/" }
unicode-script = "0.5"
+unicode-segmentation = "1"
xi-unicode = "0.3"
diff --git a/library/src/lib.rs b/library/src/lib.rs
index d549c1cd..af5c252b 100644
--- a/library/src/lib.rs
+++ b/library/src/lib.rs
@@ -52,11 +52,7 @@ fn scope() -> Scope {
std.def_node::<math::MathNode>("math");
std.def_node::<math::AtomNode>("atom");
std.def_node::<math::FracNode>("frac");
- std.define("sum", "∑");
- std.define("in", "∈");
- std.define("arrow", "→");
- std.define("NN", "ℕ");
- std.define("RR", "ℝ");
+ std.def_node::<math::SqrtNode>("sqrt");
// Layout.
std.def_node::<layout::PageNode>("page");
diff --git a/library/src/math/mod.rs b/library/src/math/mod.rs
index a276908d..1e8145cc 100644
--- a/library/src/math/mod.rs
+++ b/library/src/math/mod.rs
@@ -2,13 +2,12 @@
mod tex;
-use std::fmt::Write;
+use typst::model::{Guard, SequenceNode};
+use unicode_segmentation::UnicodeSegmentation;
-use typst::model::Guard;
-
-use self::tex::{layout_tex, Texify};
+use self::tex::layout_tex;
use crate::prelude::*;
-use crate::text::FontFamily;
+use crate::text::{FontFamily, LinebreakNode, SpaceNode, SymbolNode, TextNode};
/// A piece of a mathematical formula.
#[derive(Debug, Clone, Hash)]
@@ -55,15 +54,182 @@ impl Layout for MathNode {
styles: StyleChain,
_: &Regions,
) -> SourceResult<Fragment> {
- layout_tex(vt, &self.texify(), self.display, styles)
+ let mut t = Texifier::new();
+ self.texify(&mut t)?;
+ layout_tex(vt, &t.finish(), self.display, styles)
}
}
impl Inline for MathNode {}
+/// Turn a math node into TeX math code.
+#[capability]
+trait Texify {
+ /// Perform the conversion.
+ fn texify(&self, t: &mut Texifier) -> SourceResult<()>;
+
+ /// Texify the node, but trim parentheses..
+ fn texify_unparen(&self, t: &mut Texifier) -> SourceResult<()> {
+ let s = {
+ let mut sub = Texifier::new();
+ self.texify(&mut sub)?;
+ sub.finish()
+ };
+
+ let unparened = if s.starts_with("\\left(") && s.ends_with("\\right)") {
+ s[6..s.len() - 7].into()
+ } else {
+ s
+ };
+
+ t.push_str(&unparened);
+ Ok(())
+ }
+}
+
+/// Builds the TeX representation of the formula.
+struct Texifier {
+ tex: EcoString,
+ support: bool,
+ space: bool,
+}
+
+impl Texifier {
+ /// Create a new texifier.
+ fn new() -> Self {
+ Self {
+ tex: EcoString::new(),
+ support: false,
+ space: false,
+ }
+ }
+
+ /// Finish texifier and return the TeX string.
+ fn finish(self) -> EcoString {
+ self.tex
+ }
+
+ /// Push a weak space.
+ fn push_space(&mut self) {
+ self.space = !self.tex.is_empty();
+ }
+
+ /// Mark this position as supportive. This allows a space before or after
+ /// to exist.
+ fn support(&mut self) {
+ self.support = true;
+ }
+
+ /// Flush a space.
+ fn flush(&mut self) {
+ if self.space && self.support {
+ self.tex.push_str("\\ ");
+ }
+
+ self.space = false;
+ self.support = false;
+ }
+
+ /// Push a string.
+ fn push_str(&mut self, s: &str) {
+ self.flush();
+ self.tex.push_str(s);
+ }
+
+ /// Escape and push a char for TeX usage.
+ #[rustfmt::skip]
+ fn push_escaped(&mut self, c: char) {
+ self.flush();
+ match c {
+ ' ' => self.tex.push_str("\\ "),
+ '%' | '&' | '$' | '#' => {
+ self.tex.push('\\');
+ self.tex.push(c);
+ self.tex.push(' ');
+ }
+ '{' => self.tex.push_str("\\left\\{"),
+ '}' => self.tex.push_str("\\right\\}"),
+ '[' | '(' => {
+ self.tex.push_str("\\left");
+ self.tex.push(c);
+ }
+ ']' | ')' => {
+ self.tex.push_str("\\right");
+ self.tex.push(c);
+ }
+ 'a' ..= 'z' | 'A' ..= 'Z' | '0' ..= '9' | 'Α' ..= 'Ω' | 'α' ..= 'ω' |
+ '*' | '+' | '-' | '?' | '!' | '=' | '<' | '>' |
+ ':' | ',' | ';' | '|' | '/' | '@' | '.' | '"' => self.tex.push(c),
+ c => {
+ if let Some(sym) = unicode_math::SYMBOLS
+ .iter()
+ .find(|sym| sym.codepoint == c) {
+ self.tex.push('\\');
+ self.tex.push_str(sym.name);
+ self.tex.push(' ');
+ }
+ }
+ }
+ }
+}
+
impl Texify for MathNode {
- fn texify(&self) -> EcoString {
- self.children.iter().map(Texify::texify).collect()
+ fn texify(&self, t: &mut Texifier) -> SourceResult<()> {
+ for child in &self.children {
+ child.texify(t)?;
+ }
+ Ok(())
+ }
+}
+
+impl Texify for Content {
+ fn texify(&self, t: &mut Texifier) -> SourceResult<()> {
+ if self.is::<SpaceNode>() {
+ t.push_space();
+ return Ok(());
+ }
+
+ if self.is::<LinebreakNode>() {
+ t.push_str("\\");
+ return Ok(());
+ }
+
+ if let Some(node) = self.to::<SymbolNode>() {
+ if let Some(c) = symmie::get(&node.0) {
+ t.push_escaped(c);
+ return Ok(());
+ } else if let Some(span) = self.span() {
+ bail!(span, "unknown symbol");
+ }
+ }
+
+ if let Some(node) = self.to::<TextNode>() {
+ t.support();
+ t.push_str("\\mathrm{");
+ for c in node.0.chars() {
+ t.push_escaped(c);
+ }
+ t.push_str("}");
+ t.support();
+ return Ok(());
+ }
+
+ if let Some(node) = self.to::<SequenceNode>() {
+ for child in &node.0 {
+ child.texify(t)?;
+ }
+ return Ok(());
+ }
+
+ if let Some(node) = self.with::<dyn Texify>() {
+ return node.texify(t);
+ }
+
+ if let Some(span) = self.span() {
+ bail!(span, "not allowed here");
+ }
+
+ Ok(())
}
}
@@ -72,11 +238,35 @@ impl Texify for MathNode {
pub struct AtomNode(pub EcoString);
#[node(Texify)]
-impl AtomNode {}
+impl AtomNode {
+ fn construct(_: &Vm, args: &mut Args) -> SourceResult<Content> {
+ Ok(Self(args.expect("text")?).pack())
+ }
+}
impl Texify for AtomNode {
- fn texify(&self) -> EcoString {
- self.0.chars().map(escape_char).collect()
+ fn texify(&self, t: &mut Texifier) -> SourceResult<()> {
+ let multi = self.0.graphemes(true).count() > 1;
+ if multi {
+ t.push_str("\\mathrm{");
+ }
+
+ for c in self.0.chars() {
+ let supportive = c == '|';
+ if supportive {
+ t.support();
+ }
+ t.push_escaped(c);
+ if supportive {
+ t.support();
+ }
+ }
+
+ if multi {
+ t.push_str("}");
+ }
+
+ Ok(())
}
}
@@ -90,15 +280,22 @@ pub struct FracNode {
}
#[node(Texify)]
-impl FracNode {}
+impl FracNode {
+ fn construct(_: &Vm, args: &mut Args) -> SourceResult<Content> {
+ let num = args.expect("numerator")?;
+ let denom = args.expect("denominator")?;
+ Ok(Self { num, denom }.pack())
+ }
+}
impl Texify for FracNode {
- fn texify(&self) -> EcoString {
- format_eco!(
- "\\frac{{{}}}{{{}}}",
- unparen(self.num.texify()),
- unparen(self.denom.texify())
- )
+ fn texify(&self, t: &mut Texifier) -> SourceResult<()> {
+ t.push_str("\\frac{");
+ self.num.texify_unparen(t)?;
+ t.push_str("}{");
+ self.denom.texify_unparen(t)?;
+ t.push_str("}");
+ Ok(())
}
}
@@ -117,18 +314,22 @@ pub struct ScriptNode {
impl ScriptNode {}
impl Texify for ScriptNode {
- fn texify(&self) -> EcoString {
- let mut tex = self.base.texify();
+ fn texify(&self, t: &mut Texifier) -> SourceResult<()> {
+ self.base.texify(t)?;
if let Some(sub) = &self.sub {
- write!(tex, "_{{{}}}", unparen(sub.texify())).unwrap();
+ t.push_str("_{");
+ sub.texify_unparen(t)?;
+ t.push_str("}");
}
if let Some(sup) = &self.sup {
- write!(tex, "^{{{}}}", unparen(sup.texify())).unwrap();
+ t.push_str("^{");
+ sup.texify_unparen(t)?;
+ t.push_str("}");
}
- tex
+ Ok(())
}
}
@@ -140,32 +341,27 @@ pub struct AlignNode(pub usize);
impl AlignNode {}
impl Texify for AlignNode {
- fn texify(&self) -> EcoString {
- EcoString::new()
+ fn texify(&self, _: &mut Texifier) -> SourceResult<()> {
+ Ok(())
}
}
-/// Escape a char for TeX usage.
-#[rustfmt::skip]
-fn escape_char(c: char) -> EcoString {
- match c {
- '{' | '}' | '%' | '&' | '$' | '#' => format_eco!(" \\{c} "),
- 'a' ..= 'z' | 'A' ..= 'Z' | '0' ..= '9' | 'Α' ..= 'Ω' | 'α' ..= 'ω' |
- '*' | '+' | '-' | '[' | '(' | ']' | ')' | '?' | '!' | '=' | '<' | '>' |
- ':' | ',' | ';' | '|' | '/' | '@' | '.' | '"' => c.into(),
- c => unicode_math::SYMBOLS
- .iter()
- .find(|sym| sym.codepoint == c)
- .map(|sym| format_eco!("\\{} ", sym.name))
- .unwrap_or_default(),
+/// A square root node.
+#[derive(Debug, Hash)]
+pub struct SqrtNode(Content);
+
+#[node(Texify)]
+impl SqrtNode {
+ fn construct(_: &Vm, args: &mut Args) -> SourceResult<Content> {
+ Ok(Self(args.expect("body")?).pack())
}
}
-/// Trim grouping parenthesis≤.
-fn unparen(s: EcoString) -> EcoString {
- if s.starts_with('(') && s.ends_with(')') {
- s[1..s.len() - 1].into()
- } else {
- s
+impl Texify for SqrtNode {
+ fn texify(&self, t: &mut Texifier) -> SourceResult<()> {
+ t.push_str("\\sqrt{");
+ self.0.texify_unparen(t)?;
+ t.push_str("}");
+ Ok(())
}
}
diff --git a/library/src/math/tex.rs b/library/src/math/tex.rs
index b2b6486e..da07f1d6 100644
--- a/library/src/math/tex.rs
+++ b/library/src/math/tex.rs
@@ -6,32 +6,7 @@ use rex::render::{Backend, Cursor, Renderer};
use typst::font::Font;
use crate::prelude::*;
-use crate::text::{families, variant, LinebreakNode, SpaceNode, TextNode};
-
-/// Turn a math node into TeX math code.
-#[capability]
-pub trait Texify {
- /// Perform the conversion.
- fn texify(&self) -> EcoString;
-}
-
-impl Texify for Content {
- fn texify(&self) -> EcoString {
- if self.is::<SpaceNode>() {
- return EcoString::new();
- }
-
- if self.is::<LinebreakNode>() {
- return r"\\".into();
- }
-
- if let Some(node) = self.with::<dyn Texify>() {
- return node.texify();
- }
-
- panic!("{self:?} is not math");
- }
-}
+use crate::text::{families, variant, TextNode};
/// Layout a TeX formula into a frame.
pub fn layout_tex(
@@ -63,13 +38,15 @@ pub fn layout_tex(
let style = if display { Style::Display } else { Style::Text };
let settings = LayoutSettings::new(&ctx, em.to_pt(), style);
let renderer = Renderer::new();
- let layout = renderer
+ let Ok(layout) = renderer
.layout(&tex, settings)
.map_err(|err| match err {
Error::Parse(err) => err.to_string(),
Error::Layout(LayoutError::Font(err)) => err.to_string(),
})
- .expect("failed to layout with rex");
+ else {
+ panic!("failed to layout with rex: {tex}");
+ };
// Determine the metrics.
let (x0, y0, x1, y1) = renderer.size(&layout);
diff --git a/library/src/text/raw.rs b/library/src/text/raw.rs
index a043019a..7c1e3600 100644
--- a/library/src/text/raw.rs
+++ b/library/src/text/raw.rs
@@ -169,7 +169,7 @@ pub static THEME: Lazy<Theme> = Lazy::new(|| Theme {
item("entity.name, variable.function, support", Some("#4b69c6"), None),
item("support.macro", Some("#16718d"), None),
item("meta.annotation", Some("#301414"), None),
- item("entity.other, meta.interpolation", Some("#8b41b1"), None),
+ item("entity.other, meta.interpolation, constant.symbol.typst", Some("#8b41b1"), None),
item("invalid", Some("#ff0000"), None),
],
});
diff --git a/src/model/eval.rs b/src/model/eval.rs
index 1d942dd0..a32b0cd2 100644
--- a/src/model/eval.rs
+++ b/src/model/eval.rs
@@ -271,7 +271,6 @@ impl Eval for ast::MarkupNode {
Self::Emph(v) => v.eval(vm)?,
Self::Link(v) => v.eval(vm)?,
Self::Raw(v) => v.eval(vm)?,
- Self::Math(v) => v.eval(vm)?,
Self::Heading(v) => v.eval(vm)?,
Self::List(v) => v.eval(vm)?,
Self::Enum(v) => v.eval(vm)?,
@@ -426,19 +425,29 @@ impl Eval for ast::MathNode {
Self::Linebreak(v) => v.eval(vm)?,
Self::Escape(v) => (vm.items.math_atom)(v.get().into()),
Self::Atom(v) => v.eval(vm)?,
+ Self::Symbol(v) => (vm.items.symbol)(v.get().clone()),
Self::Script(v) => v.eval(vm)?,
Self::Frac(v) => v.eval(vm)?,
Self::Align(v) => v.eval(vm)?,
Self::Group(v) => v.eval(vm)?,
- Self::Expr(v) => match v.eval(vm)? {
- Value::None => Content::empty(),
- Value::Int(v) => (vm.items.math_atom)(format_eco!("{}", v)),
- Value::Float(v) => (vm.items.math_atom)(format_eco!("{}", v)),
- Value::Str(v) => (vm.items.math_atom)(v.into()),
- Value::Content(v) => v,
- _ => bail!(v.span(), "unexpected garbage"),
- },
- })
+ Self::Expr(v) => {
+ if let ast::Expr::Ident(ident) = v {
+ if self.as_untyped().len() == ident.len()
+ && !vm.scopes.get(ident).is_ok()
+ {
+ let node = (vm.items.symbol)(ident.get().clone());
+ return Ok(node.spanned(self.span()));
+ }
+ }
+
+ match v.eval(vm)? {
+ Value::Int(v) => (vm.items.math_atom)(format_eco!("{}", v)),
+ Value::Float(v) => (vm.items.math_atom)(format_eco!("{}", v)),
+ v => v.display(),
+ }
+ }
+ }
+ .spanned(self.span()))
}
}
@@ -494,6 +503,7 @@ impl Eval for ast::Expr {
Self::Ident(v) => v.eval(vm),
Self::Code(v) => v.eval(vm),
Self::Content(v) => v.eval(vm).map(Value::Content),
+ Self::Math(v) => v.eval(vm).map(Value::Content),
Self::Array(v) => v.eval(vm).map(Value::Array),
Self::Dict(v) => v.eval(vm).map(Value::Dict),
Self::Parenthesized(v) => v.eval(vm),
diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs
index c44fa2a0..55586feb 100644
--- a/src/syntax/ast.rs
+++ b/src/syntax/ast.rs
@@ -107,8 +107,6 @@ pub enum MarkupNode {
Enum(EnumItem),
/// An item in a description list: `/ Term: Details`.
Desc(DescItem),
- /// A math formula: `$x$`, `$ x^2 $`.
- Math(Math),
/// An expression.
Expr(Expr),
}
@@ -132,7 +130,6 @@ impl AstNode for MarkupNode {
SyntaxKind::ListItem => node.cast().map(Self::List),
SyntaxKind::EnumItem => node.cast().map(Self::Enum),
SyntaxKind::DescItem => node.cast().map(Self::Desc),
- SyntaxKind::Math => node.cast().map(Self::Math),
_ => node.cast().map(Self::Expr),
}
}
@@ -155,7 +152,6 @@ impl AstNode for MarkupNode {
Self::List(v) => v.as_untyped(),
Self::Enum(v) => v.as_untyped(),
Self::Desc(v) => v.as_untyped(),
- Self::Math(v) => v.as_untyped(),
Self::Expr(v) => v.as_untyped(),
}
}
@@ -447,6 +443,9 @@ pub enum MathNode {
Escape(Escape),
/// An atom: `x`, `+`, `12`.
Atom(Atom),
+ /// Symbol notation: `:arrow:l:` or `arrow:l`. Notations without any colons
+ /// are parsed as identifier expression and handled during evaluation.
+ Symbol(Symbol),
/// A base with optional sub- and superscripts: `a_1^2`.
Script(Script),
/// A fraction: `x/2`.
@@ -466,6 +465,7 @@ impl AstNode for MathNode {
SyntaxKind::Linebreak => node.cast().map(Self::Linebreak),
SyntaxKind::Escape(_) => node.cast().map(Self::Escape),
SyntaxKind::Atom(_) => node.cast().map(Self::Atom),
+ SyntaxKind::Symbol(_) => node.cast().map(Self::Symbol),
SyntaxKind::Script => node.cast().map(Self::Script),
SyntaxKind::Frac => node.cast().map(Self::Frac),
SyntaxKind::Align => node.cast().map(Self::Align),
@@ -480,6 +480,7 @@ impl AstNode for MathNode {
Self::Linebreak(v) => v.as_untyped(),
Self::Escape(v) => v.as_untyped(),
Self::Atom(v) => v.as_untyped(),
+ Self::Symbol(v) => v.as_untyped(),
Self::Script(v) => v.as_untyped(),
Self::Frac(v) => v.as_untyped(),
Self::Align(v) => v.as_untyped(),
@@ -574,6 +575,8 @@ pub enum Expr {
Code(CodeBlock),
/// A content block: `[*Hi* there!]`.
Content(ContentBlock),
+ /// A math formula: `$x$`, `$ x^2 $`.
+ Math(Math),
/// A grouped expression: `(1 + 2)`.
Parenthesized(Parenthesized),
/// An array: `(1, "hi", 12cm)`.
@@ -622,6 +625,7 @@ impl AstNode for Expr {
SyntaxKind::Ident(_) => node.cast().map(Self::Ident),
SyntaxKind::CodeBlock => node.cast().map(Self::Code),
SyntaxKind::ContentBlock => node.cast().map(Self::Content),
+ SyntaxKind::Math => node.cast().map(Self::Math),
SyntaxKind::Parenthesized => node.cast().map(Self::Parenthesized),
SyntaxKind::Array => node.cast().map(Self::Array),
SyntaxKind::Dict => node.cast().map(Self::Dict),
@@ -651,6 +655,7 @@ impl AstNode for Expr {
Self::Lit(v) => v.as_untyped(),
Self::Code(v) => v.as_untyped(),
Self::Content(v) => v.as_untyped(),
+ Self::Math(v) => v.as_untyped(),
Self::Ident(v) => v.as_untyped(),
Self::Array(v) => v.as_untyped(),
Self::Dict(v) => v.as_untyped(),
diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs
index d4da7b3e..3fed905f 100644
--- a/src/syntax/highlight.rs
+++ b/src/syntax/highlight.rs
@@ -163,8 +163,6 @@ pub enum Category {
ListMarker,
/// A term in a description list.
ListTerm,
- /// A full math formula.
- Math,
/// The delimiters of a math formula.
MathDelimiter,
/// An operator with special meaning in a math formula.
@@ -300,15 +298,17 @@ impl Category {
SyntaxKind::EnumItem => Some(Category::ListItem),
SyntaxKind::EnumNumbering(_) => Some(Category::ListMarker),
SyntaxKind::DescItem => Some(Category::ListItem),
- SyntaxKind::Math => Some(Category::Math),
+ SyntaxKind::Math => None,
SyntaxKind::Atom(_) => None,
SyntaxKind::Script => None,
SyntaxKind::Frac => None,
SyntaxKind::Align => None,
SyntaxKind::Ident(_) => match parent.kind() {
- SyntaxKind::Markup { .. } => Some(Category::Interpolated),
- SyntaxKind::Math => Some(Category::Interpolated),
+ SyntaxKind::Markup { .. }
+ | SyntaxKind::Math
+ | SyntaxKind::Script
+ | SyntaxKind::Frac => Some(Category::Interpolated),
SyntaxKind::FuncCall => Some(Category::Function),
SyntaxKind::MethodCall if i > 0 => Some(Category::Function),
SyntaxKind::Closure if i == 0 => Some(Category::Function),
@@ -378,7 +378,6 @@ impl Category {
Self::Emph => "markup.italic.typst",
Self::Link => "markup.underline.link.typst",
Self::Raw => "markup.raw.typst",
- Self::Math => "string.other.math.typst",
Self::MathDelimiter => "punctuation.definition.math.typst",
Self::MathOperator => "keyword.operator.math.typst",
Self::Heading => "markup.heading.typst",
diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs
index 2ef49322..c461a589 100644
--- a/src/syntax/mod.rs
+++ b/src/syntax/mod.rs
@@ -24,19 +24,4 @@ use incremental::reparse;
use parser::*;
#[cfg(test)]
-mod tests {
- use std::fmt::Debug;
-
- #[track_caller]
- pub fn check<T>(text: &str, found: T, expected: T)
- where
- T: Debug + PartialEq,
- {
- if found != expected {
- println!("source: {text:?}");
- println!("expected: {expected:#?}");
- println!("found: {found:#?}");
- panic!("test failed");
- }
- }
-}
+mod tests;
diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs
index ff4a4952..3e133fe1 100644
--- a/src/syntax/parser.rs
+++ b/src/syntax/parser.rs
@@ -235,17 +235,9 @@ impl<'s> Parser<'s> {
pub fn start_group(&mut self, kind: Group) {
self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() });
self.tokens.set_mode(match kind {
- Group::Strong | Group::Emph => TokenMode::Markup,
- Group::Bracket => match self.tokens.mode() {
- TokenMode::Math => TokenMode::Math,
- _ => TokenMode::Markup,
- },
- Group::Brace | Group::Paren => match self.tokens.mode() {
- TokenMode::Math => TokenMode::Math,
- _ => TokenMode::Code,
- },
- Group::Math => TokenMode::Math,
- Group::Expr | Group::Imports => TokenMode::Code,
+ Group::Bracket | Group::Strong | Group::Emph => TokenMode::Markup,
+ Group::Math | Group::MathRow(_, _) => TokenMode::Math,
+ Group::Brace | Group::Paren | Group::Expr | Group::Imports => TokenMode::Code,
});
match kind {
@@ -255,6 +247,7 @@ impl<'s> Parser<'s> {
Group::Strong => self.assert(SyntaxKind::Star),
Group::Emph => self.assert(SyntaxKind::Underscore),
Group::Math => self.assert(SyntaxKind::Dollar),
+ Group::MathRow(l, _) => self.assert(SyntaxKind::Atom(l.into())),
Group::Expr => self.repeek(),
Group::Imports => self.repeek(),
}
@@ -279,6 +272,7 @@ impl<'s> Parser<'s> {
Group::Strong => Some((SyntaxKind::Star, true)),
Group::Emph => Some((SyntaxKind::Underscore, true)),
Group::Math => Some((SyntaxKind::Dollar, true)),
+ Group::MathRow(_, r) => Some((SyntaxKind::Atom(r.into()), true)),
Group::Expr => Some((SyntaxKind::Semicolon, false)),
Group::Imports => None,
} {
@@ -344,9 +338,17 @@ impl<'s> Parser<'s> {
Some(SyntaxKind::RightParen) => self.inside(Group::Paren),
Some(SyntaxKind::Star) => self.inside(Group::Strong),
Some(SyntaxKind::Underscore) => self.inside(Group::Emph),
- Some(SyntaxKind::Dollar) => self.inside(Group::Math),
+ Some(SyntaxKind::Dollar) => {
+ self.groups.last().map(|group| group.kind) == Some(Group::Math)
+ }
Some(SyntaxKind::Semicolon) => self.inside(Group::Expr),
Some(SyntaxKind::From) => self.inside(Group::Imports),
+ Some(SyntaxKind::Atom(s)) => match s.as_str() {
+ ")" => self.inside(Group::MathRow('(', ')')),
+ "}" => self.inside(Group::MathRow('{', '}')),
+ "]" => self.inside(Group::MathRow('[', ']')),
+ _ => false,
+ },
Some(SyntaxKind::Space { newlines }) => self.space_ends_group(*newlines),
Some(_) => false,
None => true,
@@ -531,6 +533,8 @@ pub enum Group {
Emph,
/// A group surrounded by dollar signs: `$...$`.
Math,
+ /// A group surrounded by math delimiters.
+ MathRow(char, char),
/// A group ended by a semicolon or a line break: `;`, `\n`.
Expr,
/// A group for import items, ended by a semicolon, line break or `from`.
diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs
index 59e066a6..5bd5e63b 100644
--- a/src/syntax/parsing.rs
+++ b/src/syntax/parsing.rs
@@ -268,7 +268,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
| SyntaxKind::Include
| SyntaxKind::Break
| SyntaxKind::Continue
- | SyntaxKind::Return => markup_expr(p),
+ | SyntaxKind::Return => embedded_expr(p),
// Code and content block.
SyntaxKind::LeftBrace => code_block(p),
@@ -359,7 +359,7 @@ fn desc_item(p: &mut Parser, at_start: bool) -> ParseResult {
Ok(())
}
-fn markup_expr(p: &mut Parser) {
+fn embedded_expr(p: &mut Parser) {
// Does the expression need termination or can content follow directly?
let stmt = matches!(
p.peek(),
@@ -437,36 +437,63 @@ fn math_node_prec(p: &mut Parser, min_prec: usize, stop: Option<SyntaxKind>) {
fn math_primary(p: &mut Parser) {
let Some(token) = p.peek() else { return };
match token {
- // Spaces, atoms and expressions.
+ // Spaces and expressions.
SyntaxKind::Space { .. }
| SyntaxKind::Linebreak
| SyntaxKind::Escape(_)
- | SyntaxKind::Atom(_)
- | SyntaxKind::Ident(_) => p.eat(),
+ | SyntaxKind::Str(_)
+ | SyntaxKind::Symbol(_) => p.eat(),
- // Groups.
- SyntaxKind::LeftParen => math_group(p, Group::Paren, '(', ')'),
- SyntaxKind::LeftBracket => math_group(p, Group::Bracket, '[', ']'),
- SyntaxKind::LeftBrace => math_group(p, Group::Brace, '{', '}'),
+ // Atoms.
+ SyntaxKind::Atom(s) => match s.as_str() {
+ "(" => math_group(p, Group::MathRow('(', ')')),
+ "{" => math_group(p, Group::MathRow('{', '}')),
+ "[" => math_group(p, Group::MathRow('[', ']')),
+ _ => p.eat(),
+ },
// Alignment indactor.
SyntaxKind::Amp => math_align(p),
+ // Identifiers and math calls.
+ SyntaxKind::Ident(_) => {
+ let marker = p.marker();
+ p.eat();
+
+ // Parenthesis or bracket means this is a function call.
+ if matches!(p.peek_direct(), Some(SyntaxKind::Atom(s)) if s == "(") {
+ marker.perform(p, SyntaxKind::FuncCall, math_args);
+ }
+ }
+
+ // Hashtag + keyword / identifier.
+ SyntaxKind::Let
+ | SyntaxKind::Set
+ | SyntaxKind::Show
+ | SyntaxKind::If
+ | SyntaxKind::While
+ | SyntaxKind::For
+ | SyntaxKind::Import
+ | SyntaxKind::Include
+ | SyntaxKind::Break
+ | SyntaxKind::Continue
+ | SyntaxKind::Return => embedded_expr(p),
+
+ // Code and content block.
+ SyntaxKind::LeftBrace => code_block(p),
+ SyntaxKind::LeftBracket => content_block(p),
+
_ => p.unexpected(),
}
}
-fn math_group(p: &mut Parser, group: Group, l: char, r: char) {
+fn math_group(p: &mut Parser, group: Group) {
p.perform(SyntaxKind::Math, |p| {
- let marker = p.marker();
p.start_group(group);
- marker.convert(p, SyntaxKind::Atom(l.into()));
while !p.eof() {
math_node(p);
}
- let marker = p.marker();
p.end_group();
- marker.convert(p, SyntaxKind::Atom(r.into()));
})
}
@@ -582,6 +609,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult {
Some(SyntaxKind::LeftParen) => parenthesized(p, atomic),
Some(SyntaxKind::LeftBrace) => Ok(code_block(p)),
Some(SyntaxKind::LeftBracket) => Ok(content_block(p)),
+ Some(SyntaxKind::Dollar) => Ok(math(p)),
// Keywords.
Some(SyntaxKind::Let) => let_binding(p),
@@ -902,6 +930,28 @@ fn args(p: &mut Parser) -> ParseResult {
Ok(())
}
+fn math_args(p: &mut Parser) {
+ p.start_group(Group::MathRow('(', ')'));
+ p.perform(SyntaxKind::Args, |p| {
+ let mut marker = p.marker();
+ while !p.eof() {
+ if matches!(p.peek(), Some(SyntaxKind::Atom(s)) if s == ",") {
+ marker.end(p, SyntaxKind::Math);
+ let comma = p.marker();
+ p.eat();
+ comma.convert(p, SyntaxKind::Comma);
+ marker = p.marker();
+ } else {
+ math_node(p);
+ }
+ }
+ if marker != p.marker() {
+ marker.end(p, SyntaxKind::Math);
+ }
+ });
+ p.end_group();
+}
+
fn let_binding(p: &mut Parser) -> ParseResult {
p.perform(SyntaxKind::LetBinding, |p| {
p.assert(SyntaxKind::Let);
diff --git a/src/syntax/tests.rs b/src/syntax/tests.rs
new file mode 100644
index 00000000..7b5dd870
--- /dev/null
+++ b/src/syntax/tests.rs
@@ -0,0 +1,483 @@
+#![allow(non_snake_case)]
+
+use std::num::NonZeroUsize;
+use std::sync::Arc;
+
+use super::*;
+use crate::geom::{AbsUnit, AngleUnit};
+
+use ErrorPos::*;
+use Option::None;
+use SyntaxKind::*;
+use TokenMode::{Code, Markup};
+
+use std::fmt::Debug;
+
+#[track_caller]
+pub fn check<T>(text: &str, found: T, expected: T)
+where
+ T: Debug + PartialEq,
+{
+ if found != expected {
+ println!("source: {text:?}");
+ println!("expected: {expected:#?}");
+ println!("found: {found:#?}");
+ panic!("test failed");
+ }
+}
+
+fn Space(newlines: usize) -> SyntaxKind {
+ SyntaxKind::Space { newlines }
+}
+
+fn Raw(text: &str, lang: Option<&str>, block: bool) -> SyntaxKind {
+ SyntaxKind::Raw(Arc::new(RawFields {
+ text: text.into(),
+ lang: lang.map(Into::into),
+ block,
+ }))
+}
+
+fn Str(string: &str) -> SyntaxKind {
+ SyntaxKind::Str(string.into())
+}
+
+fn Text(string: &str) -> SyntaxKind {
+ SyntaxKind::Text(string.into())
+}
+
+fn Ident(ident: &str) -> SyntaxKind {
+ SyntaxKind::Ident(ident.into())
+}
+
+fn Error(pos: ErrorPos, message: &str) -> SyntaxKind {
+ SyntaxKind::Error(pos, message.into())
+}
+
+/// Building blocks for suffix testing.
+///
+/// We extend each test case with a collection of different suffixes to make
+/// sure tokens end at the correct position. These suffixes are split into
+/// blocks, which can be disabled/enabled per test case. For example, when
+/// testing identifiers we disable letter suffixes because these would
+/// mingle with the identifiers.
+///
+/// Suffix blocks:
+/// - ' ': spacing
+/// - 'a': letters
+/// - '1': numbers
+/// - '/': symbols
+const BLOCKS: &str = " a1/";
+
+// Suffixes described by four-tuples of:
+//
+// - block the suffix is part of
+// - mode in which the suffix is applicable
+// - the suffix string
+// - the resulting suffix NodeKind
+fn suffixes() -> impl Iterator<Item = (char, Option<TokenMode>, &'static str, SyntaxKind)>
+{
+ [
+ // Whitespace suffixes.
+ (' ', None, " ", Space(0)),
+ (' ', None, "\n", Space(1)),
+ (' ', None, "\r", Space(1)),
+ (' ', None, "\r\n", Space(1)),
+ // Letter suffixes.
+ ('a', Some(Markup), "hello", Text("hello")),
+ ('a', Some(Markup), "💚", Text("💚")),
+ ('a', Some(Code), "val", Ident("val")),
+ ('a', Some(Code), "α", Ident("α")),
+ ('a', Some(Code), "_", Ident("_")),
+ // Number suffixes.
+ ('1', Some(Code), "2", Int(2)),
+ ('1', Some(Code), ".2", Float(0.2)),
+ // Symbol suffixes.
+ ('/', None, "[", LeftBracket),
+ ('/', None, "//", LineComment),
+ ('/', None, "/**/", BlockComment),
+ ('/', Some(Markup), "*", Star),
+ ('/', Some(Markup), r"\\", Escape('\\')),
+ ('/', Some(Markup), "#let", Let),
+ ('/', Some(Code), "(", LeftParen),
+ ('/', Some(Code), ":", Colon),
+ ('/', Some(Code), "+=", PlusEq),
+ ]
+ .into_iter()
+}
+
+macro_rules! t {
+ (Both $($tts:tt)*) => {
+ t!(Markup $($tts)*);
+ t!(Code $($tts)*);
+ };
+ ($mode:ident $([$blocks:literal])?: $text:expr => $($token:expr),*) => {{
+ // Test without suffix.
+ t!(@$mode: $text => $($token),*);
+
+ // Test with each applicable suffix.
+ for (block, mode, suffix, ref token) in suffixes() {
+ let text = $text;
+ #[allow(unused_variables)]
+ let blocks = BLOCKS;
+ $(let blocks = $blocks;)?
+ assert!(!blocks.contains(|c| !BLOCKS.contains(c)));
+ if (mode.is_none() || mode == Some($mode)) && blocks.contains(block) {
+ t!(@$mode: format!("{}{}", text, suffix) => $($token,)* token);
+ }
+ }
+ }};
+ (@$mode:ident: $text:expr => $($token:expr),*) => {{
+ let text = $text;
+ let found = Tokens::new(&text, $mode).collect::<Vec<_>>();
+ let expected = vec![$($token.clone()),*];
+ check(&text, found, expected);
+ }};
+}
+
+#[test]
+fn test_tokenize_brackets() {
+ // Test in markup.
+ t!(Markup: "{" => LeftBrace);
+ t!(Markup: "}" => RightBrace);
+ t!(Markup: "[" => LeftBracket);
+ t!(Markup: "]" => RightBracket);
+ t!(Markup[" /"]: "(" => Text("("));
+ t!(Markup[" /"]: ")" => Text(")"));
+
+ // Test in code.
+ t!(Code: "{" => LeftBrace);
+ t!(Code: "}" => RightBrace);
+ t!(Code: "[" => LeftBracket);
+ t!(Code: "]" => RightBracket);
+ t!(Code: "(" => LeftParen);
+ t!(Code: ")" => RightParen);
+}
+
+#[test]
+fn test_tokenize_whitespace() {
+ // Test basic whitespace.
+ t!(Both["a1/"]: "" => );
+ t!(Both["a1/"]: " " => Space(0));
+ t!(Both["a1/"]: " " => Space(0));
+ t!(Both["a1/"]: "\t" => Space(0));
+ t!(Both["a1/"]: " \t" => Space(0));
+ t!(Both["a1/"]: "\u{202F}" => Space(0));
+
+ // Test newline counting.
+ t!(Both["a1/"]: "\n" => Space(1));
+ t!(Both["a1/"]: "\n " => Space(1));
+ t!(Both["a1/"]: " \n" => Space(1));
+ t!(Both["a1/"]: " \n " => Space(1));
+ t!(Both["a1/"]: "\r\n" => Space(1));
+ t!(Both["a1/"]: "\r\n\r" => Space(2));
+ t!(Both["a1/"]: " \n\t \n " => Space(2));
+ t!(Both["a1/"]: "\n\r" => Space(2));
+ t!(Both["a1/"]: " \r\r\n \x0D" => Space(3));
+}
+
+#[test]
+fn test_tokenize_text() {
+ // Test basic text.
+ t!(Markup[" /"]: "hello" => Text("hello"));
+ t!(Markup[" /"]: "reha-world" => Text("reha-world"));
+
+ // Test code symbols in text.
+ t!(Markup[" /"]: "a():\"b" => Text("a()"), Colon, SmartQuote { double: true }, Text("b"));
+ t!(Markup[" /"]: ";,|/+" => Text(";,|/+"));
+ t!(Markup[" /"]: "=-a" => Eq, Minus, Text("a"));
+ t!(Markup[" "]: "#123" => Text("#123"));
+
+ // Test text ends.
+ t!(Markup[""]: "hello " => Text("hello"), Space(0));
+ t!(Markup[""]: "hello~" => Text("hello"), Shorthand('\u{00A0}'));
+}
+
+#[test]
+fn test_tokenize_escape_sequences() {
+ // Test escapable symbols.
+ t!(Markup: r"\\" => Escape('\\'));
+ t!(Markup: r"\/" => Escape('/'));
+ t!(Markup: r"\[" => Escape('['));
+ t!(Markup: r"\]" => Escape(']'));
+ t!(Markup: r"\{" => Escape('{'));
+ t!(Markup: r"\}" => Escape('}'));
+ t!(Markup: r"\*" => Escape('*'));
+ t!(Markup: r"\_" => Escape('_'));
+ t!(Markup: r"\=" => Escape('='));
+ t!(Markup: r"\~" => Escape('~'));
+ t!(Markup: r"\'" => Escape('\''));
+ t!(Markup: r#"\""# => Escape('"'));
+ t!(Markup: r"\`" => Escape('`'));
+ t!(Markup: r"\$" => Escape('$'));
+ t!(Markup: r"\#" => Escape('#'));
+ t!(Markup: r"\a" => Escape('a'));
+ t!(Markup: r"\u" => Escape('u'));
+ t!(Markup: r"\1" => Escape('1'));
+
+ // Test basic unicode escapes.
+ t!(Markup: r"\u{}" => Error(Full, "invalid unicode escape sequence"));
+ t!(Markup: r"\u{2603}" => Escape('☃'));
+ t!(Markup: r"\u{P}" => Error(Full, "invalid unicode escape sequence"));
+
+ // Test unclosed unicode escapes.
+ t!(Markup[" /"]: r"\u{" => Error(End, "expected closing brace"));
+ t!(Markup[" /"]: r"\u{1" => Error(End, "expected closing brace"));
+ t!(Markup[" /"]: r"\u{26A4" => Error(End, "expected closing brace"));
+ t!(Markup[" /"]: r"\u{1Q3P" => Error(End, "expected closing brace"));
+ t!(Markup: r"\u{1🏕}" => Error(End, "expected closing brace"), Text("🏕"), RightBrace);
+}
+
+#[test]
+fn test_tokenize_markup_symbols() {
+ // Test markup tokens.
+ t!(Markup[" a1"]: "*" => Star);
+ t!(Markup: "_" => Underscore);
+ t!(Markup[""]: "===" => Eq, Eq, Eq);
+ t!(Markup["a1/"]: "= " => Eq, Space(0));
+ t!(Markup[" "]: r"\" => Linebreak);
+ t!(Markup: "~" => Shorthand('\u{00A0}'));
+ t!(Markup["a1/"]: "-?" => Shorthand('\u{00AD}'));
+ t!(Markup["a "]: r"a--" => Text("a"), Shorthand('\u{2013}'));
+ t!(Markup["a1/"]: "- " => Minus, Space(0));
+ t!(Markup[" "]: "+" => Plus);
+ t!(Markup[" "]: "1." => EnumNumbering(NonZeroUsize::new(1).unwrap()));
+ t!(Markup[" "]: "1.a" => EnumNumbering(NonZeroUsize::new(1).unwrap()), Text("a"));
+ t!(Markup[" /"]: "a1." => Text("a1."));
+}
+
+#[test]
+fn test_tokenize_code_symbols() {
+ // Test all symbols.
+ t!(Code: "," => Comma);
+ t!(Code: ";" => Semicolon);
+ t!(Code: ":" => Colon);
+ t!(Code: "+" => Plus);
+ t!(Code: "-" => Minus);
+ t!(Code[" a1"]: "*" => Star);
+ t!(Code[" a1"]: "/" => Slash);
+ t!(Code[" a/"]: "." => Dot);
+ t!(Code: "=" => Eq);
+ t!(Code: "==" => EqEq);
+ t!(Code: "!=" => ExclEq);
+ t!(Code[" /"]: "<" => Lt);
+ t!(Code: "<=" => LtEq);
+ t!(Code: ">" => Gt);
+ t!(Code: ">=" => GtEq);
+ t!(Code: "+=" => PlusEq);
+ t!(Code: "-=" => HyphEq);
+ t!(Code: "*=" => StarEq);
+ t!(Code: "/=" => SlashEq);
+ t!(Code: ".." => Dots);
+ t!(Code: "=>" => Arrow);
+
+ // Test combinations.
+ t!(Code: "<=>" => LtEq, Gt);
+ t!(Code[" a/"]: "..." => Dots, Dot);
+
+ // Test hyphen as symbol vs part of identifier.
+ t!(Code[" /"]: "-1" => Minus, Int(1));
+ t!(Code[" /"]: "-a" => Minus, Ident("a"));
+ t!(Code[" /"]: "--1" => Minus, Minus, Int(1));
+ t!(Code[" /"]: "--_a" => Minus, Minus, Ident("_a"));
+ t!(Code[" /"]: "a-b" => Ident("a-b"));
+
+ // Test invalid.
+ t!(Code: r"\" => Error(Full, "not valid here"));
+}
+
+#[test]
+fn test_tokenize_keywords() {
+ // A list of a few (not all) keywords.
+ let list = [
+ ("not", Not),
+ ("let", Let),
+ ("if", If),
+ ("else", Else),
+ ("for", For),
+ ("in", In),
+ ("import", Import),
+ ];
+
+ for (s, t) in list.clone() {
+ t!(Markup[" "]: format!("#{}", s) => t);
+ t!(Markup[" "]: format!("#{0}#{0}", s) => t, t);
+ t!(Markup[" /"]: format!("# {}", s) => Text(&format!("# {s}")));
+ }
+
+ for (s, t) in list {
+ t!(Code[" "]: s => t);
+ t!(Markup[" /"]: s => Text(s));
+ }
+
+ // Test simple identifier.
+ t!(Markup[" "]: "#letter" => Ident("letter"));
+ t!(Code[" /"]: "falser" => Ident("falser"));
+ t!(Code[" /"]: "None" => Ident("None"));
+ t!(Code[" /"]: "True" => Ident("True"));
+}
+
+#[test]
+fn test_tokenize_raw_blocks() {
+ // Test basic raw block.
+ t!(Markup: "``" => Raw("", None, false));
+ t!(Markup: "`raw`" => Raw("raw", None, false));
+ t!(Markup[""]: "`]" => Error(End, "expected 1 backtick"));
+
+ // Test special symbols in raw block.
+ t!(Markup: "`[brackets]`" => Raw("[brackets]", None, false));
+ t!(Markup[""]: r"`\`` " => Raw(r"\", None, false), Error(End, "expected 1 backtick"));
+
+ // Test separated closing backticks.
+ t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), false));
+
+ // Test more backticks.
+ t!(Markup: "``nope``" => Raw("", None, false), Text("nope"), Raw("", None, false));
+ t!(Markup: "````🚀````" => Raw("", None, false));
+ t!(Markup[""]: "`````👩‍🚀````noend" => Error(End, "expected 5 backticks"));
+ t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), false), Raw("", None, false));
+}
+
+#[test]
+fn test_tokenize_idents() {
+ // Test valid identifiers.
+ t!(Code[" /"]: "x" => Ident("x"));
+ t!(Code[" /"]: "value" => Ident("value"));
+ t!(Code[" /"]: "__main__" => Ident("__main__"));
+ t!(Code[" /"]: "_snake_case" => Ident("_snake_case"));
+
+ // Test non-ascii.
+ t!(Code[" /"]: "α" => Ident("α"));
+ t!(Code[" /"]: "ម្តាយ" => Ident("ម្តាយ"));
+
+ // Test hyphen parsed as identifier.
+ t!(Code[" /"]: "kebab-case" => Ident("kebab-case"));
+ t!(Code[" /"]: "one-10" => Ident("one-10"));
+}
+
+#[test]
+fn test_tokenize_numeric() {
+ let ints = [("7", 7), ("012", 12)];
+ let floats = [
+ (".3", 0.3),
+ ("0.3", 0.3),
+ ("3.", 3.0),
+ ("3.0", 3.0),
+ ("14.3", 14.3),
+ ("10e2", 1000.0),
+ ("10e+0", 10.0),
+ ("10e+1", 100.0),
+ ("10e-2", 0.1),
+ ("10.e1", 100.0),
+ ("10.e-1", 1.0),
+ (".1e1", 1.0),
+ ("10E2", 1000.0),
+ ];
+
+ // Test integers.
+ for &(s, v) in &ints {
+ t!(Code[" /"]: s => Int(v));
+ }
+
+ // Test floats.
+ for &(s, v) in &floats {
+ t!(Code[" /"]: s => Float(v));
+ }
+
+ // Test attached numbers.
+ t!(Code[" /"]: ".2.3" => Float(0.2), Float(0.3));
+ t!(Code[" /"]: "1.2.3" => Float(1.2), Float(0.3));
+ t!(Code[" /"]: "1e-2+3" => Float(0.01), Plus, Int(3));
+
+ // Test float from too large integer.
+ let large = i64::MAX as f64 + 1.0;
+ t!(Code[" /"]: large.to_string() => Float(large));
+
+ // Combined integers and floats.
+ let nums = ints.iter().map(|&(k, v)| (k, v as f64)).chain(floats);
+
+ let suffixes: &[(&str, fn(f64) -> SyntaxKind)] = &[
+ ("mm", |x| Numeric(x, Unit::Length(AbsUnit::Mm))),
+ ("pt", |x| Numeric(x, Unit::Length(AbsUnit::Pt))),
+ ("cm", |x| Numeric(x, Unit::Length(AbsUnit::Cm))),
+ ("in", |x| Numeric(x, Unit::Length(AbsUnit::In))),
+ ("rad", |x| Numeric(x, Unit::Angle(AngleUnit::Rad))),
+ ("deg", |x| Numeric(x, Unit::Angle(AngleUnit::Deg))),
+ ("em", |x| Numeric(x, Unit::Em)),
+ ("fr", |x| Numeric(x, Unit::Fr)),
+ ("%", |x| Numeric(x, Unit::Percent)),
+ ];
+
+ // Numeric types.
+ for &(suffix, build) in suffixes {
+ for (s, v) in nums.clone() {
+ t!(Code[" /"]: format!("{}{}", s, suffix) => build(v));
+ }
+ }
+
+ // Multiple dots close the number.
+ t!(Code[" /"]: "1..2" => Int(1), Dots, Int(2));
+ t!(Code[" /"]: "1..2.3" => Int(1), Dots, Float(2.3));
+ t!(Code[" /"]: "1.2..3" => Float(1.2), Dots, Int(3));
+
+ // Test invalid.
+ t!(Code[" /"]: "1foo" => Error(Full, "invalid number suffix"));
+}
+
+#[test]
+fn test_tokenize_strings() {
+ // Test basic strings.
+ t!(Code: "\"hi\"" => Str("hi"));
+ t!(Code: "\"hi\nthere\"" => Str("hi\nthere"));
+ t!(Code: "\"🌎\"" => Str("🌎"));
+
+ // Test unterminated.
+ t!(Code[""]: "\"hi" => Error(End, "expected quote"));
+
+ // Test escaped quote.
+ t!(Code: r#""a\"bc""# => Str("a\"bc"));
+ t!(Code[""]: r#""\""# => Error(End, "expected quote"));
+}
+
+#[test]
+fn test_tokenize_line_comments() {
+ // Test line comment with no trailing newline.
+ t!(Both[""]: "//" => LineComment);
+
+ // Test line comment ends at newline.
+ t!(Both["a1/"]: "//bc\n" => LineComment, Space(1));
+ t!(Both["a1/"]: "// bc \n" => LineComment, Space(1));
+ t!(Both["a1/"]: "//bc\r\n" => LineComment, Space(1));
+
+ // Test nested line comments.
+ t!(Both["a1/"]: "//a//b\n" => LineComment, Space(1));
+}
+
+#[test]
+fn test_tokenize_block_comments() {
+ // Test basic block comments.
+ t!(Both[""]: "/*" => BlockComment);
+ t!(Both: "/**/" => BlockComment);
+ t!(Both: "/*🏞*/" => BlockComment);
+ t!(Both: "/*\n*/" => BlockComment);
+
+ // Test depth 1 and 2 nested block comments.
+ t!(Both: "/* /* */ */" => BlockComment);
+ t!(Both: "/*/*/**/*/*/" => BlockComment);
+
+ // Test two nested, one unclosed block comments.
+ t!(Both[""]: "/*/*/**/*/" => BlockComment);
+
+ // Test all combinations of up to two following slashes and stars.
+ t!(Both[""]: "/*" => BlockComment);
+ t!(Both[""]: "/*/" => BlockComment);
+ t!(Both[""]: "/**" => BlockComment);
+ t!(Both[""]: "/*//" => BlockComment);
+ t!(Both[""]: "/*/*" => BlockComment);
+ t!(Both[""]: "/**/" => BlockComment);
+ t!(Both[""]: "/***" => BlockComment);
+
+ // Test unexpected terminator.
+ t!(Both: "/*Hi*/*/" => BlockComment,
+ Error(Full, "unexpected end of block comment"));
+}
diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs
index 130ad668..57188096 100644
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@@ -35,14 +35,12 @@ pub enum TokenMode {
impl<'s> Tokens<'s> {
/// Create a new token iterator with the given mode.
- #[inline]
pub fn new(text: &'s str, mode: TokenMode) -> Self {
Self::with_prefix("", text, mode)
}
/// Create a new token iterator with the given mode and a prefix to offset
/// column calculations.
- #[inline]
pub fn with_prefix(prefix: &str, text: &'s str, mode: TokenMode) -> Self {
Self {
s: Scanner::new(text),
@@ -53,54 +51,46 @@ impl<'s> Tokens<'s> {
}
/// Get the current token mode.
- #[inline]
pub fn mode(&self) -> TokenMode {
self.mode
}
/// Change the token mode.
- #[inline]
pub fn set_mode(&mut self, mode: TokenMode) {
self.mode = mode;
}
/// The index in the string at which the last token ends and next token
/// will start.
- #[inline]
pub fn cursor(&self) -> usize {
self.s.cursor()
}
/// Jump to the given index in the string.
- #[inline]
pub fn jump(&mut self, index: usize) {
self.s.jump(index);
}
/// The underlying scanner.
- #[inline]
pub fn scanner(&self) -> Scanner<'s> {
self.s
}
/// Whether the last token was terminated.
- #[inline]
pub fn terminated(&self) -> bool {
self.terminated
}
/// The column index of a given index in the source string.
- #[inline]
pub fn column(&self, index: usize) -> usize {
column(self.s.string(), index, self.column_offset)
}
}
-impl<'s> Iterator for Tokens<'s> {
+impl Iterator for Tokens<'_> {
type Item = SyntaxKind;
/// Parse the next token in the source code.
- #[inline]
fn next(&mut self) -> Option<Self::Item> {
let start = self.s.cursor();
let c = self.s.eat()?;
@@ -124,7 +114,8 @@ impl<'s> Iterator for Tokens<'s> {
}
}
-impl<'s> Tokens<'s> {
+/// Shared.
+impl Tokens<'_> {
fn line_comment(&mut self) -> SyntaxKind {
self.s.eat_until(is_newline);
if self.s.peek().is_none() {
@@ -189,8 +180,9 @@ impl<'s> Tokens<'s> {
SyntaxKind::Space { newlines }
}
+}
- #[inline]
+impl Tokens<'_> {
fn markup(&mut self, start: usize, c: char) -> SyntaxKind {
match c {
// Blocks.
@@ -231,7 +223,6 @@ impl<'s> Tokens<'s> {
}
}
- #[inline]
fn text(&mut self, start: usize) -> SyntaxKind {
macro_rules! table {
($(|$c:literal)*) => {{
@@ -303,7 +294,11 @@ impl<'s> Tokens<'s> {
}
fn hash(&mut self, start: usize) -> SyntaxKind {
- if self.s.at(is_id_start) {
+ if self.s.eat_if('{') {
+ SyntaxKind::LeftBrace
+ } else if self.s.eat_if('[') {
+ SyntaxKind::LeftBracket
+ } else if self.s.at(is_id_start) {
let read = self.s.eat_while(is_id_continue);
match keyword(read) {
Some(keyword) => keyword,
@@ -342,8 +337,10 @@ impl<'s> Tokens<'s> {
if start < end {
self.s.expect(':');
SyntaxKind::Symbol(self.s.get(start..end).into())
- } else {
+ } else if self.mode == TokenMode::Markup {
SyntaxKind::Colon
+ } else {
+ SyntaxKind::Atom(":".into())
}
}
@@ -426,26 +423,25 @@ impl<'s> Tokens<'s> {
self.text(start)
}
- fn label(&mut self) -> SyntaxKind {
- let label = self.s.eat_while(is_id_continue);
- if self.s.eat_if('>') {
- if !label.is_empty() {
- SyntaxKind::Label(label.into())
- } else {
- SyntaxKind::Error(ErrorPos::Full, "label cannot be empty".into())
- }
- } else {
- self.terminated = false;
- SyntaxKind::Error(ErrorPos::End, "expected closing angle bracket".into())
- }
- }
-
fn reference(&mut self) -> SyntaxKind {
SyntaxKind::Ref(self.s.eat_while(is_id_continue).into())
}
+ fn in_word(&self) -> bool {
+ let alphanumeric = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
+ let prev = self.s.scout(-2);
+ let next = self.s.peek();
+ alphanumeric(prev) && alphanumeric(next)
+ }
+}
+
+/// Math.
+impl Tokens<'_> {
fn math(&mut self, start: usize, c: char) -> SyntaxKind {
match c {
+ // Multi-char things.
+ '#' => self.hash(start),
+
// Escape sequences.
'\\' => self.backslash(),
@@ -456,18 +452,32 @@ impl<'s> Tokens<'s> {
'&' => SyntaxKind::Amp,
'$' => SyntaxKind::Dollar,
- // Brackets.
- '{' => SyntaxKind::LeftBrace,
- '}' => SyntaxKind::RightBrace,
- '[' => SyntaxKind::LeftBracket,
- ']' => SyntaxKind::RightBracket,
- '(' => SyntaxKind::LeftParen,
- ')' => SyntaxKind::RightParen,
+ // Symbol notation.
+ ':' => self.colon(),
- // Identifiers.
+ // Strings.
+ '"' => self.string(),
+
+ // Identifiers and symbol notation.
c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
self.s.eat_while(is_math_id_continue);
- SyntaxKind::Ident(self.s.from(start).into())
+
+ let mut symbol = false;
+ while self.s.eat_if(':')
+ && !self.s.eat_while(char::is_alphanumeric).is_empty()
+ {
+ symbol = true;
+ }
+
+ if symbol {
+ SyntaxKind::Symbol(self.s.from(start).into())
+ } else {
+ if self.s.scout(-1) == Some(':') {
+ self.s.uneat();
+ }
+
+ SyntaxKind::Ident(self.s.from(start).into())
+ }
}
// Numbers.
@@ -480,7 +490,10 @@ impl<'s> Tokens<'s> {
c => SyntaxKind::Atom(c.into()),
}
}
+}
+/// Code.
+impl Tokens<'_> {
fn code(&mut self, start: usize, c: char) -> SyntaxKind {
match c {
// Blocks.
@@ -493,6 +506,9 @@ impl<'s> Tokens<'s> {
'(' => SyntaxKind::LeftParen,
')' => SyntaxKind::RightParen,
+ // Math.
+ '$' => SyntaxKind::Dollar,
+
// Labels.
'<' if self.s.at(is_id_continue) => self.label(),
@@ -619,14 +635,22 @@ impl<'s> Tokens<'s> {
}
}
- fn in_word(&self) -> bool {
- let alphanumeric = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
- let prev = self.s.scout(-2);
- let next = self.s.peek();
- alphanumeric(prev) && alphanumeric(next)
+ fn label(&mut self) -> SyntaxKind {
+ let label = self.s.eat_while(is_id_continue);
+ if self.s.eat_if('>') {
+ if !label.is_empty() {
+ SyntaxKind::Label(label.into())
+ } else {
+ SyntaxKind::Error(ErrorPos::Full, "label cannot be empty".into())
+ }
+ } else {
+ self.terminated = false;
+ SyntaxKind::Error(ErrorPos::End, "expected closing angle bracket".into())
+ }
}
}
+/// Try to parse an identifier into a keyword.
fn keyword(ident: &str) -> Option<SyntaxKind> {
Some(match ident {
"not" => SyntaxKind::Not,
@@ -652,7 +676,6 @@ fn keyword(ident: &str) -> Option<SyntaxKind> {
/// The column index of a given index in the source string, given a column
/// offset for the first line.
-#[inline]
fn column(string: &str, index: usize, offset: usize) -> usize {
let mut apply_offset = false;
let res = string[..index]
@@ -729,471 +752,3 @@ fn is_math_id_start(c: char) -> bool {
fn is_math_id_continue(c: char) -> bool {
c.is_xid_continue() && c != '_'
}
-
-#[cfg(test)]
-#[allow(non_snake_case)]
-mod tests {
- use super::super::tests::check;
- use super::*;
-
- use ErrorPos::*;
- use Option::None;
- use SyntaxKind::*;
- use TokenMode::{Code, Markup};
-
- fn Space(newlines: usize) -> SyntaxKind {
- SyntaxKind::Space { newlines }
- }
-
- fn Raw(text: &str, lang: Option<&str>, block: bool) -> SyntaxKind {
- SyntaxKind::Raw(Arc::new(RawFields {
- text: text.into(),
- lang: lang.map(Into::into),
- block,
- }))
- }
-
- fn Str(string: &str) -> SyntaxKind {
- SyntaxKind::Str(string.into())
- }
-
- fn Text(string: &str) -> SyntaxKind {
- SyntaxKind::Text(string.into())
- }
-
- fn Ident(ident: &str) -> SyntaxKind {
- SyntaxKind::Ident(ident.into())
- }
-
- fn Error(pos: ErrorPos, message: &str) -> SyntaxKind {
- SyntaxKind::Error(pos, message.into())
- }
-
- /// Building blocks for suffix testing.
- ///
- /// We extend each test case with a collection of different suffixes to make
- /// sure tokens end at the correct position. These suffixes are split into
- /// blocks, which can be disabled/enabled per test case. For example, when
- /// testing identifiers we disable letter suffixes because these would
- /// mingle with the identifiers.
- ///
- /// Suffix blocks:
- /// - ' ': spacing
- /// - 'a': letters
- /// - '1': numbers
- /// - '/': symbols
- const BLOCKS: &str = " a1/";
-
- // Suffixes described by four-tuples of:
- //
- // - block the suffix is part of
- // - mode in which the suffix is applicable
- // - the suffix string
- // - the resulting suffix NodeKind
- fn suffixes(
- ) -> impl Iterator<Item = (char, Option<TokenMode>, &'static str, SyntaxKind)> {
- [
- // Whitespace suffixes.
- (' ', None, " ", Space(0)),
- (' ', None, "\n", Space(1)),
- (' ', None, "\r", Space(1)),
- (' ', None, "\r\n", Space(1)),
- // Letter suffixes.
- ('a', Some(Markup), "hello", Text("hello")),
- ('a', Some(Markup), "💚", Text("💚")),
- ('a', Some(Code), "val", Ident("val")),
- ('a', Some(Code), "α", Ident("α")),
- ('a', Some(Code), "_", Ident("_")),
- // Number suffixes.
- ('1', Some(Code), "2", Int(2)),
- ('1', Some(Code), ".2", Float(0.2)),
- // Symbol suffixes.
- ('/', None, "[", LeftBracket),
- ('/', None, "//", LineComment),
- ('/', None, "/**/", BlockComment),
- ('/', Some(Markup), "*", Star),
- ('/', Some(Markup), r"\\", Escape('\\')),
- ('/', Some(Markup), "#let", Let),
- ('/', Some(Code), "(", LeftParen),
- ('/', Some(Code), ":", Colon),
- ('/', Some(Code), "+=", PlusEq),
- ]
- .into_iter()
- }
-
- macro_rules! t {
- (Both $($tts:tt)*) => {
- t!(Markup $($tts)*);
- t!(Code $($tts)*);
- };
- ($mode:ident $([$blocks:literal])?: $text:expr => $($token:expr),*) => {{
- // Test without suffix.
- t!(@$mode: $text => $($token),*);
-
- // Test with each applicable suffix.
- for (block, mode, suffix, ref token) in suffixes() {
- let text = $text;
- #[allow(unused_variables)]
- let blocks = BLOCKS;
- $(let blocks = $blocks;)?
- assert!(!blocks.contains(|c| !BLOCKS.contains(c)));
- if (mode.is_none() || mode == Some($mode)) && blocks.contains(block) {
- t!(@$mode: format!("{}{}", text, suffix) => $($token,)* token);
- }
- }
- }};
- (@$mode:ident: $text:expr => $($token:expr),*) => {{
- let text = $text;
- let found = Tokens::new(&text, $mode).collect::<Vec<_>>();
- let expected = vec![$($token.clone()),*];
- check(&text, found, expected);
- }};
- }
-
- #[test]
- fn test_tokenize_brackets() {
- // Test in markup.
- t!(Markup: "{" => LeftBrace);
- t!(Markup: "}" => RightBrace);
- t!(Markup: "[" => LeftBracket);
- t!(Markup: "]" => RightBracket);
- t!(Markup[" /"]: "(" => Text("("));
- t!(Markup[" /"]: ")" => Text(")"));
-
- // Test in code.
- t!(Code: "{" => LeftBrace);
- t!(Code: "}" => RightBrace);
- t!(Code: "[" => LeftBracket);
- t!(Code: "]" => RightBracket);
- t!(Code: "(" => LeftParen);
- t!(Code: ")" => RightParen);
- }
-
- #[test]
- fn test_tokenize_whitespace() {
- // Test basic whitespace.
- t!(Both["a1/"]: "" => );
- t!(Both["a1/"]: " " => Space(0));
- t!(Both["a1/"]: " " => Space(0));
- t!(Both["a1/"]: "\t" => Space(0));
- t!(Both["a1/"]: " \t" => Space(0));
- t!(Both["a1/"]: "\u{202F}" => Space(0));
-
- // Test newline counting.
- t!(Both["a1/"]: "\n" => Space(1));
- t!(Both["a1/"]: "\n " => Space(1));
- t!(Both["a1/"]: " \n" => Space(1));
- t!(Both["a1/"]: " \n " => Space(1));
- t!(Both["a1/"]: "\r\n" => Space(1));
- t!(Both["a1/"]: "\r\n\r" => Space(2));
- t!(Both["a1/"]: " \n\t \n " => Space(2));
- t!(Both["a1/"]: "\n\r" => Space(2));
- t!(Both["a1/"]: " \r\r\n \x0D" => Space(3));
- }
-
- #[test]
- fn test_tokenize_text() {
- // Test basic text.
- t!(Markup[" /"]: "hello" => Text("hello"));
- t!(Markup[" /"]: "reha-world" => Text("reha-world"));
-
- // Test code symbols in text.
- t!(Markup[" /"]: "a():\"b" => Text("a()"), Colon, SmartQuote { double: true }, Text("b"));
- t!(Markup[" /"]: ";,|/+" => Text(";,|/+"));
- t!(Markup[" /"]: "=-a" => Eq, Minus, Text("a"));
- t!(Markup[" "]: "#123" => Text("#123"));
-
- // Test text ends.
- t!(Markup[""]: "hello " => Text("hello"), Space(0));
- t!(Markup[""]: "hello~" => Text("hello"), Shorthand('\u{00A0}'));
- }
-
- #[test]
- fn test_tokenize_escape_sequences() {
- // Test escapable symbols.
- t!(Markup: r"\\" => Escape('\\'));
- t!(Markup: r"\/" => Escape('/'));
- t!(Markup: r"\[" => Escape('['));
- t!(Markup: r"\]" => Escape(']'));
- t!(Markup: r"\{" => Escape('{'));
- t!(Markup: r"\}" => Escape('}'));
- t!(Markup: r"\*" => Escape('*'));
- t!(Markup: r"\_" => Escape('_'));
- t!(Markup: r"\=" => Escape('='));
- t!(Markup: r"\~" => Escape('~'));
- t!(Markup: r"\'" => Escape('\''));
- t!(Markup: r#"\""# => Escape('"'));
- t!(Markup: r"\`" => Escape('`'));
- t!(Markup: r"\$" => Escape('$'));
- t!(Markup: r"\#" => Escape('#'));
- t!(Markup: r"\a" => Escape('a'));
- t!(Markup: r"\u" => Escape('u'));
- t!(Markup: r"\1" => Escape('1'));
-
- // Test basic unicode escapes.
- t!(Markup: r"\u{}" => Error(Full, "invalid unicode escape sequence"));
- t!(Markup: r"\u{2603}" => Escape('☃'));
- t!(Markup: r"\u{P}" => Error(Full, "invalid unicode escape sequence"));
-
- // Test unclosed unicode escapes.
- t!(Markup[" /"]: r"\u{" => Error(End, "expected closing brace"));
- t!(Markup[" /"]: r"\u{1" => Error(End, "expected closing brace"));
- t!(Markup[" /"]: r"\u{26A4" => Error(End, "expected closing brace"));
- t!(Markup[" /"]: r"\u{1Q3P" => Error(End, "expected closing brace"));
- t!(Markup: r"\u{1🏕}" => Error(End, "expected closing brace"), Text("🏕"), RightBrace);
- }
-
- #[test]
- fn test_tokenize_markup_symbols() {
- // Test markup tokens.
- t!(Markup[" a1"]: "*" => Star);
- t!(Markup: "_" => Underscore);
- t!(Markup[""]: "===" => Eq, Eq, Eq);
- t!(Markup["a1/"]: "= " => Eq, Space(0));
- t!(Markup[" "]: r"\" => Linebreak);
- t!(Markup: "~" => Shorthand('\u{00A0}'));
- t!(Markup["a1/"]: "-?" => Shorthand('\u{00AD}'));
- t!(Markup["a "]: r"a--" => Text("a"), Shorthand('\u{2013}'));
- t!(Markup["a1/"]: "- " => Minus, Space(0));
- t!(Markup[" "]: "+" => Plus);
- t!(Markup[" "]: "1." => EnumNumbering(NonZeroUsize::new(1).unwrap()));
- t!(Markup[" "]: "1.a" => EnumNumbering(NonZeroUsize::new(1).unwrap()), Text("a"));
- t!(Markup[" /"]: "a1." => Text("a1."));
- }
-
- #[test]
- fn test_tokenize_code_symbols() {
- // Test all symbols.
- t!(Code: "," => Comma);
- t!(Code: ";" => Semicolon);
- t!(Code: ":" => Colon);
- t!(Code: "+" => Plus);
- t!(Code: "-" => Minus);
- t!(Code[" a1"]: "*" => Star);
- t!(Code[" a1"]: "/" => Slash);
- t!(Code[" a/"]: "." => Dot);
- t!(Code: "=" => Eq);
- t!(Code: "==" => EqEq);
- t!(Code: "!=" => ExclEq);
- t!(Code[" /"]: "<" => Lt);
- t!(Code: "<=" => LtEq);
- t!(Code: ">" => Gt);
- t!(Code: ">=" => GtEq);
- t!(Code: "+=" => PlusEq);
- t!(Code: "-=" => HyphEq);
- t!(Code: "*=" => StarEq);
- t!(Code: "/=" => SlashEq);
- t!(Code: ".." => Dots);
- t!(Code: "=>" => Arrow);
-
- // Test combinations.
- t!(Code: "<=>" => LtEq, Gt);
- t!(Code[" a/"]: "..." => Dots, Dot);
-
- // Test hyphen as symbol vs part of identifier.
- t!(Code[" /"]: "-1" => Minus, Int(1));
- t!(Code[" /"]: "-a" => Minus, Ident("a"));
- t!(Code[" /"]: "--1" => Minus, Minus, Int(1));
- t!(Code[" /"]: "--_a" => Minus, Minus, Ident("_a"));
- t!(Code[" /"]: "a-b" => Ident("a-b"));
-
- // Test invalid.
- t!(Code: r"\" => Error(Full, "not valid here"));
- }
-
- #[test]
- fn test_tokenize_keywords() {
- // A list of a few (not all) keywords.
- let list = [
- ("not", Not),
- ("let", Let),
- ("if", If),
- ("else", Else),
- ("for", For),
- ("in", In),
- ("import", Import),
- ];
-
- for (s, t) in list.clone() {
- t!(Markup[" "]: format!("#{}", s) => t);
- t!(Markup[" "]: format!("#{0}#{0}", s) => t, t);
- t!(Markup[" /"]: format!("# {}", s) => Text(&format!("# {s}")));
- }
-
- for (s, t) in list {
- t!(Code[" "]: s => t);
- t!(Markup[" /"]: s => Text(s));
- }
-
- // Test simple identifier.
- t!(Markup[" "]: "#letter" => Ident("letter"));
- t!(Code[" /"]: "falser" => Ident("falser"));
- t!(Code[" /"]: "None" => Ident("None"));
- t!(Code[" /"]: "True" => Ident("True"));
- }
-
- #[test]
- fn test_tokenize_raw_blocks() {
- // Test basic raw block.
- t!(Markup: "``" => Raw("", None, false));
- t!(Markup: "`raw`" => Raw("raw", None, false));
- t!(Markup[""]: "`]" => Error(End, "expected 1 backtick"));
-
- // Test special symbols in raw block.
- t!(Markup: "`[brackets]`" => Raw("[brackets]", None, false));
- t!(Markup[""]: r"`\`` " => Raw(r"\", None, false), Error(End, "expected 1 backtick"));
-
- // Test separated closing backticks.
- t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), false));
-
- // Test more backticks.
- t!(Markup: "``nope``" => Raw("", None, false), Text("nope"), Raw("", None, false));
- t!(Markup: "````🚀````" => Raw("", None, false));
- t!(Markup[""]: "`````👩‍🚀````noend" => Error(End, "expected 5 backticks"));
- t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), false), Raw("", None, false));
- }
-
- #[test]
- fn test_tokenize_idents() {
- // Test valid identifiers.
- t!(Code[" /"]: "x" => Ident("x"));
- t!(Code[" /"]: "value" => Ident("value"));
- t!(Code[" /"]: "__main__" => Ident("__main__"));
- t!(Code[" /"]: "_snake_case" => Ident("_snake_case"));
-
- // Test non-ascii.
- t!(Code[" /"]: "α" => Ident("α"));
- t!(Code[" /"]: "ម្តាយ" => Ident("ម្តាយ"));
-
- // Test hyphen parsed as identifier.
- t!(Code[" /"]: "kebab-case" => Ident("kebab-case"));
- t!(Code[" /"]: "one-10" => Ident("one-10"));
- }
-
- #[test]
- fn test_tokenize_numeric() {
- let ints = [("7", 7), ("012", 12)];
- let floats = [
- (".3", 0.3),
- ("0.3", 0.3),
- ("3.", 3.0),
- ("3.0", 3.0),
- ("14.3", 14.3),
- ("10e2", 1000.0),
- ("10e+0", 10.0),
- ("10e+1", 100.0),
- ("10e-2", 0.1),
- ("10.e1", 100.0),
- ("10.e-1", 1.0),
- (".1e1", 1.0),
- ("10E2", 1000.0),
- ];
-
- // Test integers.
- for &(s, v) in &ints {
- t!(Code[" /"]: s => Int(v));
- }
-
- // Test floats.
- for &(s, v) in &floats {
- t!(Code[" /"]: s => Float(v));
- }
-
- // Test attached numbers.
- t!(Code[" /"]: ".2.3" => Float(0.2), Float(0.3));
- t!(Code[" /"]: "1.2.3" => Float(1.2), Float(0.3));
- t!(Code[" /"]: "1e-2+3" => Float(0.01), Plus, Int(3));
-
- // Test float from too large integer.
- let large = i64::MAX as f64 + 1.0;
- t!(Code[" /"]: large.to_string() => Float(large));
-
- // Combined integers and floats.
- let nums = ints.iter().map(|&(k, v)| (k, v as f64)).chain(floats);
-
- let suffixes: &[(&str, fn(f64) -> SyntaxKind)] = &[
- ("mm", |x| Numeric(x, Unit::Length(AbsUnit::Mm))),
- ("pt", |x| Numeric(x, Unit::Length(AbsUnit::Pt))),
- ("cm", |x| Numeric(x, Unit::Length(AbsUnit::Cm))),
- ("in", |x| Numeric(x, Unit::Length(AbsUnit::In))),
- ("rad", |x| Numeric(x, Unit::Angle(AngleUnit::Rad))),
- ("deg", |x| Numeric(x, Unit::Angle(AngleUnit::Deg))),
- ("em", |x| Numeric(x, Unit::Em)),
- ("fr", |x| Numeric(x, Unit::Fr)),
- ("%", |x| Numeric(x, Unit::Percent)),
- ];
-
- // Numeric types.
- for &(suffix, build) in suffixes {
- for (s, v) in nums.clone() {
- t!(Code[" /"]: format!("{}{}", s, suffix) => build(v));
- }
- }
-
- // Multiple dots close the number.
- t!(Code[" /"]: "1..2" => Int(1), Dots, Int(2));
- t!(Code[" /"]: "1..2.3" => Int(1), Dots, Float(2.3));
- t!(Code[" /"]: "1.2..3" => Float(1.2), Dots, Int(3));
-
- // Test invalid.
- t!(Code[" /"]: "1foo" => Error(Full, "invalid number suffix"));
- }
-
- #[test]
- fn test_tokenize_strings() {
- // Test basic strings.
- t!(Code: "\"hi\"" => Str("hi"));
- t!(Code: "\"hi\nthere\"" => Str("hi\nthere"));
- t!(Code: "\"🌎\"" => Str("🌎"));
-
- // Test unterminated.
- t!(Code[""]: "\"hi" => Error(End, "expected quote"));
-
- // Test escaped quote.
- t!(Code: r#""a\"bc""# => Str("a\"bc"));
- t!(Code[""]: r#""\""# => Error(End, "expected quote"));
- }
-
- #[test]
- fn test_tokenize_line_comments() {
- // Test line comment with no trailing newline.
- t!(Both[""]: "//" => LineComment);
-
- // Test line comment ends at newline.
- t!(Both["a1/"]: "//bc\n" => LineComment, Space(1));
- t!(Both["a1/"]: "// bc \n" => LineComment, Space(1));
- t!(Both["a1/"]: "//bc\r\n" => LineComment, Space(1));
-
- // Test nested line comments.
- t!(Both["a1/"]: "//a//b\n" => LineComment, Space(1));
- }
-
- #[test]
- fn test_tokenize_block_comments() {
- // Test basic block comments.
- t!(Both[""]: "/*" => BlockComment);
- t!(Both: "/**/" => BlockComment);
- t!(Both: "/*🏞*/" => BlockComment);
- t!(Both: "/*\n*/" => BlockComment);
-
- // Test depth 1 and 2 nested block comments.
- t!(Both: "/* /* */ */" => BlockComment);
- t!(Both: "/*/*/**/*/*/" => BlockComment);
-
- // Test two nested, one unclosed block comments.
- t!(Both[""]: "/*/*/**/*/" => BlockComment);
-
- // Test all combinations of up to two following slashes and stars.
- t!(Both[""]: "/*" => BlockComment);
- t!(Both[""]: "/*/" => BlockComment);
- t!(Both[""]: "/**" => BlockComment);
- t!(Both[""]: "/*//" => BlockComment);
- t!(Both[""]: "/*/*" => BlockComment);
- t!(Both[""]: "/**/" => BlockComment);
- t!(Both[""]: "/***" => BlockComment);
-
- // Test unexpected terminator.
- t!(Both: "/*Hi*/*/" => BlockComment,
- Error(Full, "unexpected end of block comment"));
- }
-}
diff --git a/tests/ref/math/simple.png b/tests/ref/math/simple.png
index 902354df..72f9c1c6 100644
--- a/tests/ref/math/simple.png
+++ b/tests/ref/math/simple.png
Binary files differ
diff --git a/tests/ref/math/syntax.png b/tests/ref/math/syntax.png
new file mode 100644
index 00000000..eaf18528
--- /dev/null
+++ b/tests/ref/math/syntax.png
Binary files differ
diff --git a/tests/typ/math/syntax.typ b/tests/typ/math/syntax.typ
new file mode 100644
index 00000000..79c306a2
--- /dev/null
+++ b/tests/typ/math/syntax.typ
@@ -0,0 +1,24 @@
+#set page(width: auto)
+#set text("Latin Modern Roman")
+#show <table>: it => table(
+ columns: 2,
+ padding: 8pt,
+ ..it.text
+ .split("\n")
+ .map(line => (text(10pt, raw(line, lang: "typ")), eval(line) + [ ]))
+ .flatten()
+)
+
+```
+Let $x in NN$ be ...
+$ (1 + x/2)^2 $
+$ x arrow:l y $
+$ sum_(n=1)^mu 1 + (2pi (5 + n)) / k $
+$ { x in RR | x "is natural" and x < 10 } $
+$ sqrt(x^2) = frac(x, 1) $
+$ "profit" = "income" - "expenses" $
+$ x < #for i in range(5) [$ #i < $] y $
+$ 1 + 2 = #{1 + 2} $
+$ A sub:eq:not B $
+```
+<table>
diff --git a/tools/test-helper/extension.js b/tools/test-helper/extension.js
index 253c78c7..60fbe71d 100644
--- a/tools/test-helper/extension.js
+++ b/tools/test-helper/extension.js
@@ -132,15 +132,19 @@ function getWebviewContent(pngSrc, refSrc, stdout, stderr) {
</div>
<h1>Standard output</h1>
- <pre>${stdout}</pre>
+ <pre>${escape(stdout)}</pre>
<h1>Standard error</h1>
- <pre>${stderr}</pre>
+ <pre>${escape(stderr)}</pre>
</body>
</html>
`
}
+function escape(text) {
+ return text.replace(/</g, "&lt;").replace(/>/g, "&gt;");
+}
+
function deactivate() {}
module.exports = { activate, deactivate }