From 5a8534a395b500a25cbc46ee15ec031c8231de59 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Tue, 4 Oct 2022 13:42:49 +0200 Subject: Parse basic math syntax --- src/syntax/ast.rs | 231 ++++++++++++++++++++++++++++---------- src/syntax/highlight.rs | 120 ++++++++++---------- src/syntax/mod.rs | 292 +++++++++++++++++++++++++++--------------------- 3 files changed, 400 insertions(+), 243 deletions(-) (limited to 'src/syntax') diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 8d3696a8..6a016e79 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -5,7 +5,7 @@ use std::num::NonZeroUsize; use std::ops::Deref; -use super::{NodeData, NodeKind, Span, Spanned, SyntaxNode}; +use super::{NodeData, NodeKind, Span, SyntaxNode}; use crate::geom::{AngleUnit, LengthUnit}; use crate::util::EcoString; @@ -60,34 +60,7 @@ node! { impl Markup { /// The markup nodes. pub fn nodes(&self) -> impl Iterator + '_ { - self.0.children().filter_map(|node| match node.kind() { - NodeKind::Space { newlines: (2 ..) } => Some(MarkupNode::Parbreak), - NodeKind::Space { .. } => Some(MarkupNode::Space), - NodeKind::Linebreak => Some(MarkupNode::Linebreak), - NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), - NodeKind::Escape(c) => Some(MarkupNode::Text((*c).into())), - NodeKind::NonBreakingSpace => Some(MarkupNode::Text('\u{00A0}'.into())), - NodeKind::Shy => Some(MarkupNode::Text('\u{00AD}'.into())), - NodeKind::EnDash => Some(MarkupNode::Text('\u{2013}'.into())), - NodeKind::EmDash => Some(MarkupNode::Text('\u{2014}'.into())), - NodeKind::Ellipsis => Some(MarkupNode::Text('\u{2026}'.into())), - &NodeKind::Quote { double } => Some(MarkupNode::Quote { double }), - NodeKind::Strong => node.cast().map(MarkupNode::Strong), - NodeKind::Emph => node.cast().map(MarkupNode::Emph), - NodeKind::Link(url) => Some(MarkupNode::Link(url.clone())), - NodeKind::Raw(raw) => Some(MarkupNode::Raw(raw.as_ref().clone())), - NodeKind::Math(math) => Some(MarkupNode::Math(Spanned::new( - math.as_ref().clone(), - node.span(), - ))), - NodeKind::Heading => node.cast().map(MarkupNode::Heading), - NodeKind::List => node.cast().map(MarkupNode::List), - NodeKind::Enum => node.cast().map(MarkupNode::Enum), - NodeKind::Desc => node.cast().map(MarkupNode::Desc), - NodeKind::Label(v) => Some(MarkupNode::Label(v.clone())), - NodeKind::Ref(v) => Some(MarkupNode::Ref(v.clone())), - _ => node.cast().map(MarkupNode::Expr), - }) + self.0.children().filter_map(SyntaxNode::cast) } } @@ -113,7 +86,7 @@ pub enum MarkupNode { /// A raw block with optional syntax highlighting: `` `...` ``. Raw(RawNode), /// A math formula: `$a^2 = b^2 + c^2$`. - Math(Spanned), + Math(Math), /// A section heading: `= Introduction`. Heading(HeadingNode), /// An item in an unordered list: `- ...`. @@ -130,6 +103,40 @@ pub enum MarkupNode { Expr(Expr), } +impl TypedNode for MarkupNode { + fn from_untyped(node: &SyntaxNode) -> Option { + match node.kind() { + NodeKind::Space { newlines: (2 ..) } => Some(Self::Parbreak), + NodeKind::Space { .. } => Some(Self::Space), + NodeKind::Linebreak => Some(Self::Linebreak), + NodeKind::Text(s) => Some(Self::Text(s.clone())), + NodeKind::Escape(c) => Some(Self::Text((*c).into())), + NodeKind::Tilde => Some(Self::Text('\u{00A0}'.into())), + NodeKind::HyphQuest => Some(Self::Text('\u{00AD}'.into())), + NodeKind::Hyph2 => Some(Self::Text('\u{2013}'.into())), + NodeKind::Hyph3 => Some(Self::Text('\u{2014}'.into())), + NodeKind::Dot3 => Some(Self::Text('\u{2026}'.into())), + NodeKind::Quote { double } => Some(Self::Quote { double: *double }), + NodeKind::Strong => node.cast().map(Self::Strong), + NodeKind::Emph => node.cast().map(Self::Emph), + NodeKind::Link(url) => Some(Self::Link(url.clone())), + NodeKind::Raw(raw) => Some(Self::Raw(raw.as_ref().clone())), + NodeKind::Math => node.cast().map(Self::Math), + NodeKind::Heading => node.cast().map(Self::Heading), + NodeKind::List => node.cast().map(Self::List), + NodeKind::Enum => node.cast().map(Self::Enum), + NodeKind::Desc => node.cast().map(Self::Desc), + NodeKind::Label(v) => Some(Self::Label(v.clone())), + NodeKind::Ref(v) => Some(Self::Ref(v.clone())), + _ => node.cast().map(Self::Expr), + } + } + + fn as_untyped(&self) -> &SyntaxNode { + unimplemented!("MarkupNode::as_untyped") + } +} + node! { /// Strong content: `*Strong*`. StrongNode: Strong @@ -169,14 +176,122 @@ pub struct RawNode { pub block: bool, } -/// A math formula: `$x$`, `$[x^2]$`. +node! { + /// A math formula: `$x$`, `$ x^2 $`. + Math: NodeKind::Math { .. } +} + +impl Math { + /// The math nodes. + pub fn nodes(&self) -> impl Iterator + '_ { + self.0.children().filter_map(SyntaxNode::cast) + } +} + +/// A single piece of a math formula. #[derive(Debug, Clone, PartialEq, Hash)] -pub struct MathNode { - /// The formula between the dollars / brackets. - pub formula: EcoString, - /// Whether the formula is display-level, that is, it contains whitespace - /// after the starting dollar sign and before the ending dollar sign. - pub display: bool, +pub enum MathNode { + /// Whitespace. + Space, + /// A forced line break. + Linebreak, + /// An atom: `x`, `+`, `12`. + Atom(EcoString), + /// A base with an optional sub- and superscript: `a_1^2`. + Script(ScriptNode), + /// A fraction: `x/2`. + Frac(FracNode), + /// A math alignment indicator: `&`, `&&`. + Align(AlignNode), + /// Grouped mathematical material. + Group(Math), + /// An expression. + Expr(Expr), +} + +impl TypedNode for MathNode { + fn from_untyped(node: &SyntaxNode) -> Option { + match node.kind() { + NodeKind::Space { .. } => Some(Self::Space), + NodeKind::LeftBrace => Some(Self::Atom('{'.into())), + NodeKind::RightBrace => Some(Self::Atom('}'.into())), + NodeKind::LeftBracket => Some(Self::Atom('['.into())), + NodeKind::RightBracket => Some(Self::Atom(']'.into())), + NodeKind::LeftParen => Some(Self::Atom('('.into())), + NodeKind::RightParen => Some(Self::Atom(')'.into())), + NodeKind::Linebreak => Some(Self::Linebreak), + NodeKind::Escape(c) => Some(Self::Atom((*c).into())), + NodeKind::Atom(atom) => Some(Self::Atom(atom.clone())), + NodeKind::Script => node.cast().map(Self::Script), + NodeKind::Frac => node.cast().map(Self::Frac), + NodeKind::Align => node.cast().map(Self::Align), + NodeKind::Math => node.cast().map(Self::Group), + _ => node.cast().map(Self::Expr), + } + } + + fn as_untyped(&self) -> &SyntaxNode { + unimplemented!("MathNode::as_untyped") + } +} + +node! { + /// A base with an optional sub- and superscript in a formula: `a_1^2`. + ScriptNode: Script +} + +impl ScriptNode { + /// The base of the script. + pub fn base(&self) -> MathNode { + self.0.cast_first_child().expect("subscript is missing base") + } + + /// The subscript. + pub fn sub(&self) -> Option { + self.0 + .children() + .skip_while(|node| !matches!(node.kind(), NodeKind::Underscore)) + .nth(1) + .map(|node| node.cast().expect("script node has invalid subscript")) + } + + /// The superscript. + pub fn sup(&self) -> Option { + self.0 + .children() + .skip_while(|node| !matches!(node.kind(), NodeKind::Hat)) + .nth(1) + .map(|node| node.cast().expect("script node has invalid superscript")) + } +} + +node! { + /// A fraction in a formula: `x/2` + FracNode: Frac +} + +impl FracNode { + /// The numerator. + pub fn num(&self) -> MathNode { + self.0.cast_first_child().expect("fraction is missing numerator") + } + + /// The denominator. + pub fn denom(&self) -> MathNode { + self.0.cast_last_child().expect("fraction is missing denominator") + } +} + +node! { + /// A math alignment indicator: `&`, `&&`. + AlignNode: Align +} + +impl AlignNode { + /// The number of ampersands. + pub fn count(&self) -> usize { + self.0.children().filter(|n| n.kind() == &NodeKind::Amp).count() + } } node! { @@ -799,27 +914,27 @@ impl BinOp { } /// The associativity of this operator. - pub fn associativity(self) -> Associativity { + pub fn assoc(self) -> Assoc { match self { - Self::Add => Associativity::Left, - Self::Sub => Associativity::Left, - Self::Mul => Associativity::Left, - Self::Div => Associativity::Left, - Self::And => Associativity::Left, - Self::Or => Associativity::Left, - Self::Eq => Associativity::Left, - Self::Neq => Associativity::Left, - Self::Lt => Associativity::Left, - Self::Leq => Associativity::Left, - Self::Gt => Associativity::Left, - Self::Geq => Associativity::Left, - Self::In => Associativity::Left, - Self::NotIn => Associativity::Left, - Self::Assign => Associativity::Right, - Self::AddAssign => Associativity::Right, - Self::SubAssign => Associativity::Right, - Self::MulAssign => Associativity::Right, - Self::DivAssign => Associativity::Right, + Self::Add => Assoc::Left, + Self::Sub => Assoc::Left, + Self::Mul => Assoc::Left, + Self::Div => Assoc::Left, + Self::And => Assoc::Left, + Self::Or => Assoc::Left, + Self::Eq => Assoc::Left, + Self::Neq => Assoc::Left, + Self::Lt => Assoc::Left, + Self::Leq => Assoc::Left, + Self::Gt => Assoc::Left, + Self::Geq => Assoc::Left, + Self::In => Assoc::Left, + Self::NotIn => Assoc::Left, + Self::Assign => Assoc::Right, + Self::AddAssign => Assoc::Right, + Self::SubAssign => Assoc::Right, + Self::MulAssign => Assoc::Right, + Self::DivAssign => Assoc::Right, } } @@ -851,7 +966,7 @@ impl BinOp { /// The associativity of a binary operator. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub enum Associativity { +pub enum Assoc { /// Left-associative: `a + b + c` is equivalent to `(a + b) + c`. Left, /// Right-associative: `a = b = c` is equivalent to `a = (b = c)`. diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs index 4a453480..e3640562 100644 --- a/src/syntax/highlight.rs +++ b/src/syntax/highlight.rs @@ -1,11 +1,10 @@ use std::fmt::Write; use std::ops::Range; -use std::sync::Arc; use syntect::highlighting::{Color, FontStyle, Highlighter, Style, Theme}; use syntect::parsing::Scope; -use super::{InnerNode, NodeKind, SyntaxNode}; +use super::{NodeKind, SyntaxNode}; use crate::parse::TokenMode; /// Provide highlighting categories for the descendants of a node that fall into @@ -47,13 +46,8 @@ where { let root = match mode { TokenMode::Markup => crate::parse::parse(text), - TokenMode::Code => { - let children = crate::parse::parse_code(text); - SyntaxNode::Inner(Arc::new(InnerNode::with_children( - NodeKind::CodeBlock, - children, - ))) - } + TokenMode::Math => crate::parse::parse_math(text), + TokenMode::Code => crate::parse::parse_code(text), }; let highlighter = Highlighter::new(&theme); @@ -169,8 +163,8 @@ pub enum Category { Math, /// A section heading. Heading, - /// A symbol of a list, enumeration, or description list. - List, + /// A marker of a list, enumeration, or description list. + ListMarker, /// A term in a description list. Term, /// A label. @@ -210,71 +204,50 @@ impl Category { match child.kind() { NodeKind::LineComment => Some(Category::Comment), NodeKind::BlockComment => Some(Category::Comment), + NodeKind::Space { .. } => None, + NodeKind::LeftBrace => Some(Category::Bracket), NodeKind::RightBrace => Some(Category::Bracket), NodeKind::LeftBracket => Some(Category::Bracket), NodeKind::RightBracket => Some(Category::Bracket), NodeKind::LeftParen => Some(Category::Bracket), NodeKind::RightParen => Some(Category::Bracket), - - NodeKind::Markup { .. } => match parent.kind() { - NodeKind::Desc - if parent - .children() - .take_while(|child| child.kind() != &NodeKind::Colon) - .find(|c| matches!(c.kind(), NodeKind::Markup { .. })) - .map_or(false, |ident| std::ptr::eq(ident, child)) => - { - Some(Category::Term) - } - _ => None, - }, - NodeKind::Space { .. } => None, - NodeKind::Linebreak { .. } => Some(Category::Shortcut), - NodeKind::Text(_) => None, - NodeKind::Escape(_) => Some(Category::Escape), - NodeKind::NonBreakingSpace => Some(Category::Shortcut), - NodeKind::Shy => Some(Category::Shortcut), - NodeKind::EnDash => Some(Category::Shortcut), - NodeKind::EmDash => Some(Category::Shortcut), - NodeKind::Ellipsis => Some(Category::Shortcut), - NodeKind::Quote { .. } => None, - NodeKind::Star => match parent.kind() { - NodeKind::Strong => None, - _ => Some(Category::Operator), - }, - NodeKind::Underscore => None, - NodeKind::Strong => Some(Category::Strong), - NodeKind::Emph => Some(Category::Emph), - NodeKind::Link(_) => Some(Category::Link), - NodeKind::Raw(_) => Some(Category::Raw), - NodeKind::Math(_) => Some(Category::Math), - NodeKind::Heading => Some(Category::Heading), - NodeKind::List => None, - NodeKind::Enum => None, - NodeKind::EnumNumbering(_) => Some(Category::List), - NodeKind::Desc => None, - NodeKind::Label(_) => Some(Category::Label), - NodeKind::Ref(_) => Some(Category::Ref), - NodeKind::Comma => Some(Category::Punctuation), NodeKind::Semicolon => Some(Category::Punctuation), NodeKind::Colon => match parent.kind() { NodeKind::Desc => Some(Category::Term), _ => Some(Category::Punctuation), }, + NodeKind::Star => match parent.kind() { + NodeKind::Strong => None, + _ => Some(Category::Operator), + }, + NodeKind::Underscore => match parent.kind() { + NodeKind::Script => Some(Category::Shortcut), + _ => None, + }, + NodeKind::Dollar => Some(Category::Math), + NodeKind::Tilde => Some(Category::Shortcut), + NodeKind::HyphQuest => Some(Category::Shortcut), + NodeKind::Hyph2 => Some(Category::Shortcut), + NodeKind::Hyph3 => Some(Category::Shortcut), + NodeKind::Dot3 => Some(Category::Shortcut), + NodeKind::Quote { .. } => None, NodeKind::Plus => match parent.kind() { - NodeKind::Enum => Some(Category::List), + NodeKind::Enum => Some(Category::ListMarker), _ => Some(Category::Operator), }, NodeKind::Minus => match parent.kind() { - NodeKind::List => Some(Category::List), + NodeKind::List => Some(Category::ListMarker), _ => Some(Category::Operator), }, NodeKind::Slash => match parent.kind() { - NodeKind::Desc => Some(Category::List), + NodeKind::Desc => Some(Category::ListMarker), + NodeKind::Frac => Some(Category::Shortcut), _ => Some(Category::Operator), }, + NodeKind::Hat => Some(Category::Shortcut), + NodeKind::Amp => Some(Category::Shortcut), NodeKind::Dot => Some(Category::Punctuation), NodeKind::Eq => match parent.kind() { NodeKind::Heading => None, @@ -292,6 +265,7 @@ impl Category { NodeKind::SlashEq => Some(Category::Operator), NodeKind::Dots => Some(Category::Operator), NodeKind::Arrow => Some(Category::Operator), + NodeKind::Not => Some(Category::Keyword), NodeKind::And => Some(Category::Keyword), NodeKind::Or => Some(Category::Keyword), @@ -314,8 +288,42 @@ impl Category { NodeKind::From => Some(Category::Keyword), NodeKind::As => Some(Category::Keyword), + NodeKind::Markup { .. } => match parent.kind() { + NodeKind::Desc + if parent + .children() + .take_while(|child| child.kind() != &NodeKind::Colon) + .find(|c| matches!(c.kind(), NodeKind::Markup { .. })) + .map_or(false, |ident| std::ptr::eq(ident, child)) => + { + Some(Category::Term) + } + _ => None, + }, + NodeKind::Linebreak { .. } => Some(Category::Shortcut), + NodeKind::Text(_) => None, + NodeKind::Escape(_) => Some(Category::Escape), + NodeKind::Strong => Some(Category::Strong), + NodeKind::Emph => Some(Category::Emph), + NodeKind::Link(_) => Some(Category::Link), + NodeKind::Raw(_) => Some(Category::Raw), + NodeKind::Math => None, + NodeKind::Heading => Some(Category::Heading), + NodeKind::List => None, + NodeKind::Enum => None, + NodeKind::EnumNumbering(_) => Some(Category::ListMarker), + NodeKind::Desc => None, + NodeKind::Label(_) => Some(Category::Label), + NodeKind::Ref(_) => Some(Category::Ref), + + NodeKind::Atom(_) => None, + NodeKind::Script => None, + NodeKind::Frac => None, + NodeKind::Align => None, + NodeKind::Ident(_) => match parent.kind() { NodeKind::Markup { .. } => Some(Category::Interpolated), + NodeKind::Math => Some(Category::Interpolated), NodeKind::FuncCall => Some(Category::Function), NodeKind::MethodCall if i > 0 => Some(Category::Function), NodeKind::ClosureExpr if i == 0 => Some(Category::Function), @@ -388,7 +396,7 @@ impl Category { Self::Raw => "markup.raw.typst", Self::Math => "string.other.math.typst", Self::Heading => "markup.heading.typst", - Self::List => "markup.list.typst", + Self::ListMarker => "markup.list.typst", Self::Term => "markup.list.term.typst", Self::Label => "entity.name.label.typst", Self::Ref => "markup.other.reference.typst", diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 6c6f690c..367d0062 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -12,7 +12,7 @@ use std::sync::Arc; pub use highlight::*; pub use span::*; -use self::ast::{MathNode, RawNode, TypedNode, Unit}; +use self::ast::{RawNode, TypedNode, Unit}; use crate::diag::SourceError; use crate::source::SourceId; use crate::util::EcoString; @@ -579,6 +579,12 @@ pub enum NodeKind { /// /// The comment can contain nested block comments. BlockComment, + /// One or more whitespace characters. Single spaces are collapsed into text + /// nodes if they would otherwise be surrounded by text nodes. + /// + /// Also stores how many newlines are contained. + Space { newlines: usize }, + /// A left curly brace, starting a code block: `{`. LeftBrace, /// A right curly brace, terminating a code block: `}`. @@ -593,69 +599,6 @@ pub enum NodeKind { /// A right round parenthesis, terminating a grouped expression, collection, /// argument or parameter list: `)`. RightParen, - - /// Markup of which all lines must have a minimal indentation. - /// - /// Notably, the number does not determine in which column the markup - /// started, but to the right of which column all markup elements must be, - /// so it is zero except for headings and lists. - Markup { min_indent: usize }, - /// One or more whitespace characters. Single spaces are collapsed into text - /// nodes if they would otherwise be surrounded by text nodes. - /// - /// Also stores how many newlines are contained. - Space { newlines: usize }, - /// A forced line break. - Linebreak, - /// Consecutive text without markup. While basic text with just single - /// spaces is collapsed into a single node, certain symbols that could - /// possibly be markup force text into multiple nodes. - Text(EcoString), - /// A slash and the letter "u" followed by a hexadecimal unicode entity - /// enclosed in curly braces: `\u{1F5FA}`. - Escape(char), - /// A non-breaking space: `~`. - NonBreakingSpace, - /// A soft hyphen: `-?`. - Shy, - /// An en-dash: `--`. - EnDash, - /// An em-dash: `---`. - EmDash, - /// An ellipsis: `...`. - Ellipsis, - /// A smart quote: `'` or `"`. - Quote { double: bool }, - /// The strong text toggle, multiplication operator, and wildcard import - /// symbol: `*`. - Star, - /// Toggles emphasized text: `_`. - Underscore, - /// Strong content: `*Strong*`. - Strong, - /// Emphasized content: `_Emphasized_`. - Emph, - /// A hyperlink. - Link(EcoString), - /// A raw block with optional syntax highlighting: `` `...` ``. - Raw(Arc), - /// A math formula: `$x$`, `$[x^2]$`. - Math(Arc), - /// A section heading: `= Introduction`. - Heading, - /// An item in an unordered list: `- ...`. - List, - /// An item in an enumeration (ordered list): `+ ...` or `1. ...`. - Enum, - /// An explicit enumeration numbering: `23.`. - EnumNumbering(usize), - /// An item in a description list: `/ Term: Details. - Desc, - /// A label: `