diff options
| author | Martin Haug <mhaug@live.de> | 2021-11-01 10:57:45 +0100 |
|---|---|---|
| committer | Martin Haug <mhaug@live.de> | 2021-11-05 13:44:50 +0100 |
| commit | 7d34a548ccd14debe0668e23454e1ced70e485ec (patch) | |
| tree | fe99070ed803a976dfc3f52c9ed8468cc98234d4 /src | |
| parent | 2e7d359e59a45849f53eea6e022ca83295f5a6e7 (diff) | |
Reorganize syntax module
Diffstat (limited to 'src')
| -rw-r--r-- | src/parse/mod.rs | 4 | ||||
| -rw-r--r-- | src/syntax/ast.rs (renamed from src/syntax/expr.rs) | 158 | ||||
| -rw-r--r-- | src/syntax/markup.rs | 159 | ||||
| -rw-r--r-- | src/syntax/mod.rs | 779 | ||||
| -rw-r--r-- | src/syntax/token.rs | 40 |
5 files changed, 566 insertions, 574 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 8775e8a1..22288d01 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -520,7 +520,7 @@ fn dict(p: &mut Parser, items: usize) { p.child_count() - items, |x| { x.kind() == &NodeKind::Named - || x.kind().is_parenthesis() + || x.kind().is_paren() || x.kind() == &NodeKind::Comma || x.kind() == &NodeKind::Colon }, @@ -550,7 +550,7 @@ fn params(p: &mut Parser, count: usize, allow_parens: bool) { ), _ => false, } - || (allow_parens && x.kind().is_parenthesis()), + || (allow_parens && x.kind().is_paren()), |_| (ErrorPosition::Full, "expected identifier".into()), ); } diff --git a/src/syntax/expr.rs b/src/syntax/ast.rs index 1439cbdb..bdd0767d 100644 --- a/src/syntax/expr.rs +++ b/src/syntax/ast.rs @@ -1,8 +1,164 @@ -use super::{Ident, Markup, NodeKind, RedNode, RedRef, Span, TypedNode}; +use super::{Ident, NodeKind, RedNode, RedRef, Span, TypedNode}; use crate::geom::{AngularUnit, LengthUnit}; use crate::node; use crate::util::EcoString; +node! { + /// The syntactical root capable of representing a full parsed document. + Markup +} + +impl Markup { + pub fn nodes<'a>(&'a self) -> impl Iterator<Item = MarkupNode> + 'a { + self.0.children().filter_map(RedRef::cast) + } +} + +/// A single piece of markup. +#[derive(Debug, Clone, PartialEq)] +pub enum MarkupNode { + /// Whitespace containing less than two newlines. + Space, + /// A forced line break: `\`. + Linebreak, + /// A paragraph break: Two or more newlines. + Parbreak, + /// Strong text was enabled / disabled: `*`. + Strong, + /// Emphasized text was enabled / disabled: `_`. + Emph, + /// Plain text. + Text(EcoString), + /// A raw block with optional syntax highlighting: `` `...` ``. + Raw(RawNode), + /// A section heading: `= Introduction`. + Heading(HeadingNode), + /// An item in an unordered list: `- ...`. + List(ListNode), + /// An item in an enumeration (ordered list): `1. ...`. + Enum(EnumNode), + /// An expression. + Expr(Expr), +} + +impl TypedNode for MarkupNode { + fn cast_from(node: RedRef) -> Option<Self> { + match node.kind() { + NodeKind::Space(_) => Some(MarkupNode::Space), + NodeKind::Linebreak => Some(MarkupNode::Linebreak), + NodeKind::Parbreak => Some(MarkupNode::Parbreak), + NodeKind::Strong => Some(MarkupNode::Strong), + NodeKind::Emph => Some(MarkupNode::Emph), + NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), + NodeKind::UnicodeEscape(u) => Some(MarkupNode::Text(u.character.into())), + NodeKind::EnDash => Some(MarkupNode::Text(EcoString::from("\u{2013}"))), + NodeKind::EmDash => Some(MarkupNode::Text(EcoString::from("\u{2014}"))), + NodeKind::NonBreakingSpace => { + Some(MarkupNode::Text(EcoString::from("\u{00A0}"))) + } + NodeKind::Raw(_) => node.cast().map(MarkupNode::Raw), + NodeKind::Heading => node.cast().map(MarkupNode::Heading), + NodeKind::List => node.cast().map(MarkupNode::List), + NodeKind::Enum => node.cast().map(MarkupNode::Enum), + NodeKind::Error(_, _) => None, + _ => node.cast().map(MarkupNode::Expr), + } + } +} + +/// A raw block with optional syntax highlighting: `` `...` ``. +#[derive(Debug, Clone, PartialEq)] +pub struct RawNode { + /// An optional identifier specifying the language to syntax-highlight in. + pub lang: Option<Ident>, + /// The raw text, determined as the raw string between the backticks trimmed + /// according to the above rules. + pub text: EcoString, + /// Whether the element is block-level, that is, it has 3+ backticks + /// and contains at least one newline. + pub block: bool, +} + +impl TypedNode for RawNode { + fn cast_from(node: RedRef) -> Option<Self> { + match node.kind() { + NodeKind::Raw(raw) => { + let span = node.span(); + let start = span.start + raw.backticks as usize; + Some(Self { + block: raw.block, + lang: raw.lang.as_ref().and_then(|x| { + let span = Span::new(span.source, start, start + x.len()); + Ident::new(x, span) + }), + text: raw.text.clone(), + }) + } + _ => None, + } + } +} + +node! { + /// A section heading: `= Introduction`. + Heading => HeadingNode +} + +impl HeadingNode { + /// The contents of the heading. + pub fn body(&self) -> Markup { + self.0 + .cast_first_child() + .expect("heading node is missing markup body") + } + + /// The section depth (numer of equals signs). + pub fn level(&self) -> u8 { + self.0 + .children() + .find_map(|node| match node.kind() { + NodeKind::HeadingLevel(heading) => Some(*heading), + _ => None, + }) + .expect("heading node is missing heading level") + } +} + +node! { + /// An item in an unordered list: `- ...`. + List => ListNode +} + +impl ListNode { + /// The contents of the list item. + pub fn body(&self) -> Markup { + self.0.cast_first_child().expect("list node is missing body") + } +} + +node! { + /// An item in an enumeration (ordered list): `1. ...`. + Enum => EnumNode +} + +impl EnumNode { + /// The contents of the list item. + pub fn body(&self) -> Markup { + self.0.cast_first_child().expect("enumeration node is missing body") + } + + /// The number, if any. + pub fn number(&self) -> Option<usize> { + self.0 + .children() + .find_map(|node| match node.kind() { + NodeKind::EnumNumbering(num) => Some(num.clone()), + _ => None, + }) + .expect("enumeration node is missing number") + } +} + /// An expression. #[derive(Debug, Clone, PartialEq)] pub enum Expr { diff --git a/src/syntax/markup.rs b/src/syntax/markup.rs deleted file mode 100644 index f43a618a..00000000 --- a/src/syntax/markup.rs +++ /dev/null @@ -1,159 +0,0 @@ -use super::{Expr, Ident, NodeKind, RedNode, RedRef, Span, TypedNode}; -use crate::node; -use crate::util::EcoString; - -node! { - /// The syntactical root capable of representing a full parsed document. - Markup -} - -impl Markup { - pub fn nodes<'a>(&'a self) -> impl Iterator<Item = MarkupNode> + 'a { - self.0.children().filter_map(RedRef::cast) - } -} - -/// A single piece of markup. -#[derive(Debug, Clone, PartialEq)] -pub enum MarkupNode { - /// Whitespace containing less than two newlines. - Space, - /// A forced line break: `\`. - Linebreak, - /// A paragraph break: Two or more newlines. - Parbreak, - /// Strong text was enabled / disabled: `*`. - Strong, - /// Emphasized text was enabled / disabled: `_`. - Emph, - /// Plain text. - Text(EcoString), - /// A raw block with optional syntax highlighting: `` `...` ``. - Raw(RawNode), - /// A section heading: `= Introduction`. - Heading(HeadingNode), - /// An item in an unordered list: `- ...`. - List(ListNode), - /// An item in an enumeration (ordered list): `1. ...`. - Enum(EnumNode), - /// An expression. - Expr(Expr), -} - -impl TypedNode for MarkupNode { - fn cast_from(node: RedRef) -> Option<Self> { - match node.kind() { - NodeKind::Space(_) => Some(MarkupNode::Space), - NodeKind::Linebreak => Some(MarkupNode::Linebreak), - NodeKind::Parbreak => Some(MarkupNode::Parbreak), - NodeKind::Strong => Some(MarkupNode::Strong), - NodeKind::Emph => Some(MarkupNode::Emph), - NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), - NodeKind::UnicodeEscape(u) => Some(MarkupNode::Text(u.character.into())), - NodeKind::EnDash => Some(MarkupNode::Text(EcoString::from("\u{2013}"))), - NodeKind::EmDash => Some(MarkupNode::Text(EcoString::from("\u{2014}"))), - NodeKind::NonBreakingSpace => { - Some(MarkupNode::Text(EcoString::from("\u{00A0}"))) - } - NodeKind::Raw(_) => node.cast().map(MarkupNode::Raw), - NodeKind::Heading => node.cast().map(MarkupNode::Heading), - NodeKind::List => node.cast().map(MarkupNode::List), - NodeKind::Enum => node.cast().map(MarkupNode::Enum), - NodeKind::Error(_, _) => None, - _ => node.cast().map(MarkupNode::Expr), - } - } -} - -/// A raw block with optional syntax highlighting: `` `...` ``. -#[derive(Debug, Clone, PartialEq)] -pub struct RawNode { - /// An optional identifier specifying the language to syntax-highlight in. - pub lang: Option<Ident>, - /// The raw text, determined as the raw string between the backticks trimmed - /// according to the above rules. - pub text: EcoString, - /// Whether the element is block-level, that is, it has 3+ backticks - /// and contains at least one newline. - pub block: bool, -} - -impl TypedNode for RawNode { - fn cast_from(node: RedRef) -> Option<Self> { - match node.kind() { - NodeKind::Raw(raw) => { - let span = node.span(); - let start = span.start + raw.backticks as usize; - Some(Self { - block: raw.block, - lang: raw.lang.as_ref().and_then(|x| { - let span = Span::new(span.source, start, start + x.len()); - Ident::new(x, span) - }), - text: raw.text.clone(), - }) - } - _ => None, - } - } -} - -node! { - /// A section heading: `= Introduction`. - Heading => HeadingNode -} - -impl HeadingNode { - /// The contents of the heading. - pub fn body(&self) -> Markup { - self.0 - .cast_first_child() - .expect("heading node is missing markup body") - } - - /// The section depth (numer of equals signs). - pub fn level(&self) -> u8 { - self.0 - .children() - .find_map(|node| match node.kind() { - NodeKind::HeadingLevel(heading) => Some(*heading), - _ => None, - }) - .expect("heading node is missing heading level") - } -} - -node! { - /// An item in an unordered list: `- ...`. - List => ListNode -} - -impl ListNode { - /// The contents of the list item. - pub fn body(&self) -> Markup { - self.0.cast_first_child().expect("list node is missing body") - } -} - -node! { - /// An item in an enumeration (ordered list): `1. ...`. - Enum => EnumNode -} - -impl EnumNode { - /// The contents of the list item. - pub fn body(&self) -> Markup { - self.0.cast_first_child().expect("enumeration node is missing body") - } - - /// The number, if any. - pub fn number(&self) -> Option<usize> { - self.0 - .children() - .find_map(|node| match node.kind() { - NodeKind::EnumNumbering(num) => Some(num.clone()), - _ => None, - }) - .expect("enumeration node is missing number") - } -} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 9d4beb6c..9fd2b21d 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -1,28 +1,316 @@ //! Syntax types. -mod expr; +mod ast; mod ident; -mod markup; mod pretty; mod span; -mod token; use std::fmt; use std::fmt::{Debug, Display, Formatter}; use std::mem; use std::rc::Rc; -pub use expr::*; +pub use ast::*; pub use ident::*; -pub use markup::*; pub use pretty::*; pub use span::*; -pub use token::*; use crate::geom::{AngularUnit, LengthUnit}; use crate::source::SourceId; use crate::util::EcoString; +/// Children of a [`GreenNode`]. +#[derive(Clone, PartialEq)] +pub enum Green { + /// A non-terminal node in an Rc. + Node(Rc<GreenNode>), + /// A terminal owned token. + Token(GreenData), +} + +impl Green { + fn data(&self) -> &GreenData { + match self { + Green::Node(n) => &n.data, + Green::Token(t) => &t, + } + } + + pub fn kind(&self) -> &NodeKind { + self.data().kind() + } + + pub fn len(&self) -> usize { + self.data().len() + } + + pub fn erroneous(&self) -> bool { + self.data().erroneous() + } + + pub fn children(&self) -> &[Green] { + match self { + Green::Node(n) => &n.children(), + Green::Token(_) => &[], + } + } +} + +impl Default for Green { + fn default() -> Self { + Self::Token(GreenData::new(NodeKind::None, 0)) + } +} + +impl Debug for Green { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{:?}: {}", self.kind(), self.len())?; + if let Self::Node(n) = self { + if !n.children.is_empty() { + f.write_str(" ")?; + f.debug_list().entries(&n.children).finish()?; + } + } + + Ok(()) + } +} + +/// A syntactical node. +#[derive(Clone, PartialEq)] +pub struct GreenNode { + /// Node metadata. + data: GreenData, + /// This node's children, losslessly make up this node. + children: Vec<Green>, +} + +impl GreenNode { + pub fn new(kind: NodeKind, len: usize) -> Self { + Self { + data: GreenData::new(kind, len), + children: Vec::new(), + } + } + + pub fn with_children(kind: NodeKind, len: usize, children: Vec<Green>) -> Self { + let mut meta = GreenData::new(kind, len); + meta.erroneous |= children.iter().any(|c| c.erroneous()); + Self { data: meta, children } + } + + pub fn with_child(kind: NodeKind, len: usize, child: impl Into<Green>) -> Self { + Self::with_children(kind, len, vec![child.into()]) + } + + pub fn children(&self) -> &[Green] { + &self.children + } +} + +impl From<GreenNode> for Green { + fn from(node: GreenNode) -> Self { + Rc::new(node).into() + } +} + +impl From<Rc<GreenNode>> for Green { + fn from(node: Rc<GreenNode>) -> Self { + Self::Node(node) + } +} + +/// Data shared between [`GreenNode`]s and [`GreenToken`]s. +#[derive(Clone, PartialEq)] +pub struct GreenData { + /// What kind of node this is (each kind would have its own struct in a + /// strongly typed AST). + kind: NodeKind, + /// The byte length of the node in the source. + len: usize, + /// Whether this node or any of its children are erroneous. + erroneous: bool, +} + +impl GreenData { + pub fn new(kind: NodeKind, len: usize) -> Self { + Self { len, erroneous: kind.is_error(), kind } + } + + pub fn kind(&self) -> &NodeKind { + &self.kind + } + + pub fn len(&self) -> usize { + self.len + } + + pub fn erroneous(&self) -> bool { + self.erroneous + } +} + +impl From<GreenData> for Green { + fn from(token: GreenData) -> Self { + Self::Token(token) + } +} + +#[derive(Copy, Clone, PartialEq)] +pub struct RedRef<'a> { + id: SourceId, + offset: usize, + green: &'a Green, +} + +impl<'a> RedRef<'a> { + pub fn own(self) -> RedNode { + RedNode { + id: self.id, + offset: self.offset, + green: self.green.clone(), + } + } + + pub fn kind(&self) -> &NodeKind { + self.green.kind() + } + + pub fn span(&self) -> Span { + Span::new(self.id, self.offset, self.offset + self.green.len()) + } + + pub fn cast<T>(self) -> Option<T> + where + T: TypedNode, + { + T::cast_from(self) + } + + pub fn erroneous(&self) -> bool { + self.green.erroneous() + } + + pub fn children(self) -> impl Iterator<Item = RedRef<'a>> + Clone { + let children = match &self.green { + Green::Node(node) => node.children(), + Green::Token(_) => &[], + }; + + let mut offset = self.offset; + children.iter().map(move |green| { + let child_offset = offset; + offset += green.len(); + RedRef { id: self.id, offset: child_offset, green } + }) + } + + pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option<RedRef> { + self.children() + .find(|x| mem::discriminant(x.kind()) == mem::discriminant(kind)) + } + + pub(crate) fn cast_first_child<T: TypedNode>(&self) -> Option<T> { + self.children().find_map(RedRef::cast) + } + + pub(crate) fn cast_last_child<T: TypedNode>(&self) -> Option<T> { + self.children().filter_map(RedRef::cast).last() + } +} + +#[derive(Clone, PartialEq)] +pub struct RedNode { + id: SourceId, + offset: usize, + green: Green, +} + +impl RedNode { + pub fn new_root(root: Rc<GreenNode>, id: SourceId) -> Self { + Self { id, offset: 0, green: root.into() } + } + + pub fn span(&self) -> Span { + self.as_ref().span() + } + + pub fn len(&self) -> usize { + self.green.len() + } + + pub fn kind(&self) -> &NodeKind { + self.green.kind() + } + + pub fn children<'a>(&'a self) -> impl Iterator<Item = RedRef<'a>> + Clone { + self.as_ref().children() + } + + pub fn errors(&self) -> Vec<(Span, EcoString)> { + if !self.green.erroneous() { + return vec![]; + } + + match self.kind() { + NodeKind::Error(pos, msg) => { + let span = match pos { + ErrorPosition::Start => self.span().at_start(), + ErrorPosition::Full => self.span(), + ErrorPosition::End => self.span().at_end(), + }; + + vec![(span, msg.clone())] + } + _ => self + .as_ref() + .children() + .filter(|red| red.green.erroneous()) + .flat_map(|red| red.own().errors()) + .collect(), + } + } + + pub fn as_ref<'a>(&'a self) -> RedRef<'a> { + RedRef { + id: self.id, + offset: self.offset, + green: &self.green, + } + } + + pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option<RedNode> { + self.as_ref().typed_child(kind).map(RedRef::own) + } + + pub(crate) fn cast_first_child<T: TypedNode>(&self) -> Option<T> { + self.as_ref().cast_first_child() + } + + pub(crate) fn cast_last_child<T: TypedNode>(&self) -> Option<T> { + self.as_ref().cast_last_child() + } +} + +impl Debug for RedNode { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{:?}: {:?}", self.kind(), self.span())?; + let children = self.as_ref().children().collect::<Vec<_>>(); + if !children.is_empty() { + f.write_str(" ")?; + f.debug_list() + .entries(children.into_iter().map(RedRef::own)) + .finish()?; + } + Ok(()) + } +} + +pub trait TypedNode: Sized { + /// Performs the conversion. + fn cast_from(value: RedRef) -> Option<Self>; +} + #[derive(Debug, Clone, PartialEq)] pub enum NodeKind { /// A left square bracket: `[`. @@ -73,10 +361,6 @@ pub enum NodeKind { StarEq, /// A slash followed by an equals sign: `/=`. SlashEq, - /// Two dots: `..`. - Dots, - /// An equals sign followed by a greater-than sign: `=>`. - Arrow, /// The `not` operator. Not, /// The `and` operator. @@ -85,8 +369,10 @@ pub enum NodeKind { Or, /// The `with` operator. With, - /// The `with` expression: `with (1)`. - WithExpr, + /// Two dots: `..`. + Dots, + /// An equals sign followed by a greater-than sign: `=>`. + Arrow, /// The none literal: `none`. None, /// The auto literal: `auto`. @@ -115,22 +401,48 @@ pub enum NodeKind { Include, /// The `from` keyword. From, + /// Template markup. + Markup, /// One or more whitespace characters. Space(usize), + /// A forced line break: `\`. + Linebreak, + /// A paragraph break: Two or more newlines. + Parbreak, /// A consecutive non-markup string. Text(EcoString), + /// A non-breaking space: `~`. + NonBreakingSpace, + /// An en-dash: `--`. + EnDash, + /// An em-dash: `---`. + EmDash, /// A slash and the letter "u" followed by a hexadecimal unicode entity /// enclosed in curly braces: `\u{1F5FA}`. UnicodeEscape(UnicodeEscapeToken), + /// Strong text was enabled / disabled: `*`. + Strong, + /// Emphasized text was enabled / disabled: `_`. + Emph, + /// A section heading: `= Introduction`. + Heading, + /// A heading's level: `=`, `==`, `===`, etc. + HeadingLevel(u8), + /// An item in an enumeration (ordered list): `1. ...`. + Enum, + /// A numbering: `23.`. + /// + /// Can also exist without the number: `.`. + EnumNumbering(Option<usize>), + /// An item in an unordered list: `- ...`. + List, + /// The bullet character of an item in an unordered list: `-`. + ListBullet, /// An arbitrary number of backticks followed by inner contents, terminated /// with the same number of backticks: `` `...` ``. Raw(Rc<RawToken>), /// Dollar signs surrounding inner contents. Math(Rc<MathToken>), - /// A numbering: `23.`. - /// - /// Can also exist without the number: `.`. - EnumNumbering(Option<usize>), /// An identifier: `center`. Ident(EcoString), /// A boolean: `true`, `false`. @@ -152,56 +464,14 @@ pub enum NodeKind { Fraction(f64), /// A quoted string: `"..."`. Str(StrToken), - /// Two slashes followed by inner contents, terminated with a newline: - /// `//<str>\n`. - LineComment, - /// A slash and a star followed by inner contents, terminated with a star - /// and a slash: `/*<str>*/`. - /// - /// The comment can contain nested block comments. - BlockComment, - /// Tokens that appear in the wrong place. - Error(ErrorPosition, EcoString), - /// Unknown character sequences. - Unknown(EcoString), - /// Template markup. - Markup, - /// A forced line break: `\`. - Linebreak, - /// A paragraph break: Two or more newlines. - Parbreak, - /// Strong text was enabled / disabled: `*`. - Strong, - /// Emphasized text was enabled / disabled: `_`. - Emph, - /// A non-breaking space: `~`. - NonBreakingSpace, - /// An en-dash: `--`. - EnDash, - /// An em-dash: `---`. - EmDash, - /// A section heading: `= Introduction`. - Heading, - /// A heading's level: `=`, `==`, `===`, etc. - HeadingLevel(u8), - /// An item in an unordered list: `- ...`. - List, - /// The bullet character of an item in an unordered list: `-`. - ListBullet, - /// An item in an enumeration (ordered list): `1. ...`. - Enum, /// An array expression: `(1, "hi", 12cm)`. Array, /// A dictionary expression: `(thickness: 3pt, pattern: dashed)`. Dict, /// A named argument: `thickness: 3pt`. Named, - /// A template expression: `[*Hi* there!]`. - Template, /// A grouped expression: `(1 + 2)`. Group, - /// A block expression: `{ let x = 1; x + 2 }`. - Block, /// A unary operation: `-x`. Unary, /// A binary operation: `a + b`. @@ -216,6 +486,10 @@ pub enum NodeKind { ClosureParams, /// A parameter sink: `..x`. ParameterSink, + /// A template expression: `[*Hi* there!]`. + Template, + /// A block expression: `{ let x = 1; x + 2 }`. + Block, /// A for loop expression: `for x in y { ... }`. ForExpr, /// A while loop expression: `while x { ... }`. @@ -224,6 +498,8 @@ pub enum NodeKind { IfExpr, /// A let expression: `let x = 1`. LetExpr, + /// The `with` expression: `with (1)`. + WithExpr, /// A for loop's destructuring pattern: `x` or `x, y`. ForPattern, /// The import expression: `import x from "foo.typ"`. @@ -232,6 +508,18 @@ pub enum NodeKind { ImportItems, /// The include expression: `include "foo.typ"`. IncludeExpr, + /// Two slashes followed by inner contents, terminated with a newline: + /// `//<str>\n`. + LineComment, + /// A slash and a star followed by inner contents, terminated with a star + /// and a slash: `/*<str>*/`. + /// + /// The comment can contain nested block comments. + BlockComment, + /// Tokens that appear in the wrong place. + Error(ErrorPosition, EcoString), + /// Unknown character sequences. + Unknown(EcoString), } #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -244,6 +532,45 @@ pub enum ErrorPosition { End, } +/// A quoted string token: `"..."`. +#[derive(Debug, Clone, PartialEq)] +#[repr(transparent)] +pub struct StrToken { + /// The string inside the quotes. + pub string: EcoString, +} + +/// A raw block token: `` `...` ``. +#[derive(Debug, Clone, PartialEq)] +pub struct RawToken { + /// The raw text in the block. + pub text: EcoString, + /// The programming language of the raw text. + pub lang: Option<EcoString>, + /// The number of opening backticks. + pub backticks: u8, + /// Whether to display this as a block. + pub block: bool, +} + +/// A math formula token: `$2pi + x$` or `$[f'(x) = x^2]$`. +#[derive(Debug, Clone, PartialEq)] +pub struct MathToken { + /// The formula between the dollars. + pub formula: EcoString, + /// Whether the formula is display-level, that is, it is surrounded by + /// `$[..]`. + pub display: bool, +} + +/// A unicode escape sequence token: `\u{1F5FA}`. +#[derive(Debug, Clone, PartialEq)] +#[repr(transparent)] +pub struct UnicodeEscapeToken { + /// The resulting unicode character. + pub character: char, +} + impl Display for NodeKind { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.pad(self.as_str()) @@ -251,7 +578,7 @@ impl Display for NodeKind { } impl NodeKind { - pub fn is_parenthesis(&self) -> bool { + pub fn is_paren(&self) -> bool { match self { Self::LeftParen => true, Self::RightParen => true, @@ -305,13 +632,12 @@ impl NodeKind { Self::HyphEq => "subtract-assign operator", Self::StarEq => "multiply-assign operator", Self::SlashEq => "divide-assign operator", - Self::Dots => "dots", - Self::Arrow => "arrow", Self::Not => "operator `not`", Self::And => "operator `and`", Self::Or => "operator `or`", Self::With => "operator `with`", - Self::WithExpr => "`with` expression", + Self::Dots => "dots", + Self::Arrow => "arrow", Self::None => "`none`", Self::Auto => "`auto`", Self::Let => "keyword `let`", @@ -326,28 +652,25 @@ impl NodeKind { Self::Import => "keyword `import`", Self::Include => "keyword `include`", Self::From => "keyword `from`", - Self::Space(_) => "space", - Self::Math(_) => "math formula", - Self::EnumNumbering(_) => "numbering", - Self::Str(_) => "string", - Self::LineComment => "line comment", - Self::BlockComment => "block comment", Self::Markup => "markup", + Self::Space(_) => "space", Self::Linebreak => "forced linebreak", Self::Parbreak => "paragraph break", - Self::Strong => "strong", - Self::Emph => "emphasis", Self::Text(_) => "text", Self::NonBreakingSpace => "non-breaking space", Self::EnDash => "en dash", Self::EmDash => "em dash", Self::UnicodeEscape(_) => "unicode escape sequence", - Self::Raw(_) => "raw block", + Self::Strong => "strong", + Self::Emph => "emphasis", Self::Heading => "heading", Self::HeadingLevel(_) => "heading level", - Self::List => "list", + Self::Enum => "enumeration item", + Self::EnumNumbering(_) => "enumeration item numbering", + Self::List => "list item", Self::ListBullet => "list bullet", - Self::Enum => "enum", + Self::Raw(_) => "raw block", + Self::Math(_) => "math formula", Self::Ident(_) => "identifier", Self::Bool(_) => "boolean", Self::Int(_) => "integer", @@ -356,12 +679,11 @@ impl NodeKind { Self::Angle(_, _) => "angle", Self::Percentage(_) => "percentage", Self::Fraction(_) => "`fr` value", + Self::Str(_) => "string", Self::Array => "array", Self::Dict => "dictionary", Self::Named => "named argument", - Self::Template => "template", Self::Group => "group", - Self::Block => "block", Self::Unary => "unary expression", Self::Binary => "binary expression", Self::Call => "call", @@ -369,313 +691,26 @@ impl NodeKind { Self::Closure => "closure", Self::ClosureParams => "closure parameters", Self::ParameterSink => "parameter sink", + Self::Template => "template", + Self::Block => "block", Self::ForExpr => "for-loop expression", Self::WhileExpr => "while-loop expression", - Self::IfExpr => "if expression", - Self::LetExpr => "let expression", + Self::IfExpr => "`if` expression", + Self::LetExpr => "`let` expression", + Self::WithExpr => "`with` expression", Self::ForPattern => "for-loop destructuring pattern", - Self::ImportExpr => "import expression", + Self::ImportExpr => "`import` expression", Self::ImportItems => "import items", - Self::IncludeExpr => "include expression", + Self::IncludeExpr => "`include` expression", + Self::LineComment => "line comment", + Self::BlockComment => "block comment", + Self::Error(_, _) => "parse error", Self::Unknown(src) => match src.as_str() { "*/" => "end of block comment", _ => "invalid token", }, - Self::Error(_, _) => "parse error", - } - } -} - -/// A syntactical node. -#[derive(Clone, PartialEq)] -pub struct GreenNode { - /// Node metadata. - data: GreenData, - /// This node's children, losslessly make up this node. - children: Vec<Green>, -} - -/// Data shared between [`GreenNode`]s and [`GreenToken`]s. -#[derive(Clone, PartialEq)] -pub struct GreenData { - /// What kind of node this is (each kind would have its own struct in a - /// strongly typed AST). - kind: NodeKind, - /// The byte length of the node in the source. - len: usize, - /// Whether this node or any of its children are erroneous. - erroneous: bool, -} - -impl GreenData { - pub fn new(kind: NodeKind, len: usize) -> Self { - Self { len, erroneous: kind.is_error(), kind } - } - - pub fn kind(&self) -> &NodeKind { - &self.kind - } - - pub fn len(&self) -> usize { - self.len - } - - pub fn erroneous(&self) -> bool { - self.erroneous - } -} - -impl From<GreenData> for Green { - fn from(token: GreenData) -> Self { - Self::Token(token) - } -} - -/// Children of a [`GreenNode`]. -#[derive(Clone, PartialEq)] -pub enum Green { - /// A terminal owned token. - Token(GreenData), - /// A non-terminal node in an Rc. - Node(Rc<GreenNode>), -} - -impl Green { - fn data(&self) -> &GreenData { - match self { - Green::Token(t) => &t, - Green::Node(n) => &n.data, - } - } - - pub fn kind(&self) -> &NodeKind { - self.data().kind() - } - - pub fn len(&self) -> usize { - self.data().len() - } - - pub fn erroneous(&self) -> bool { - self.data().erroneous() - } - - pub fn children(&self) -> &[Green] { - match self { - Green::Token(_) => &[], - Green::Node(n) => &n.children(), - } - } -} - -impl GreenNode { - pub fn new(kind: NodeKind, len: usize) -> Self { - Self { - data: GreenData::new(kind, len), - children: Vec::new(), - } - } - - pub fn with_children(kind: NodeKind, len: usize, children: Vec<Green>) -> Self { - let mut meta = GreenData::new(kind, len); - meta.erroneous |= children.iter().any(|c| c.erroneous()); - Self { data: meta, children } - } - - pub fn with_child(kind: NodeKind, len: usize, child: impl Into<Green>) -> Self { - Self::with_children(kind, len, vec![child.into()]) - } - - pub fn children(&self) -> &[Green] { - &self.children - } -} - -impl From<GreenNode> for Green { - fn from(node: GreenNode) -> Self { - Rc::new(node).into() - } -} - -impl From<Rc<GreenNode>> for Green { - fn from(node: Rc<GreenNode>) -> Self { - Self::Node(node) - } -} - -impl Default for Green { - fn default() -> Self { - Self::Token(GreenData::new(NodeKind::None, 0)) - } -} - -impl Debug for Green { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{:?}: {}", self.kind(), self.len())?; - if let Self::Node(n) = self { - if !n.children.is_empty() { - f.write_str(" ")?; - f.debug_list().entries(&n.children).finish()?; - } - } - - Ok(()) - } -} - -#[derive(Copy, Clone, PartialEq)] -pub struct RedRef<'a> { - id: SourceId, - offset: usize, - green: &'a Green, -} - -impl<'a> RedRef<'a> { - pub fn own(self) -> RedNode { - RedNode { - id: self.id, - offset: self.offset, - green: self.green.clone(), - } - } - - pub fn kind(&self) -> &NodeKind { - self.green.kind() - } - - pub fn span(&self) -> Span { - Span::new(self.id, self.offset, self.offset + self.green.len()) - } - - pub fn cast<T>(self) -> Option<T> - where - T: TypedNode, - { - T::cast_from(self) - } - - pub fn erroneous(&self) -> bool { - self.green.erroneous() - } - - pub fn children(self) -> impl Iterator<Item = RedRef<'a>> + Clone { - let children = match &self.green { - Green::Node(node) => node.children(), - Green::Token(_) => &[], - }; - - let mut offset = self.offset; - children.iter().map(move |green| { - let child_offset = offset; - offset += green.len(); - RedRef { id: self.id, offset: child_offset, green } - }) - } - - pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option<RedRef> { - self.children() - .find(|x| mem::discriminant(x.kind()) == mem::discriminant(kind)) - } - - pub(crate) fn cast_first_child<T: TypedNode>(&self) -> Option<T> { - self.children().find_map(RedRef::cast) - } - - pub(crate) fn cast_last_child<T: TypedNode>(&self) -> Option<T> { - self.children().filter_map(RedRef::cast).last() - } -} - -#[derive(Clone, PartialEq)] -pub struct RedNode { - id: SourceId, - offset: usize, - green: Green, -} - -impl RedNode { - pub fn new_root(root: Rc<GreenNode>, id: SourceId) -> Self { - Self { id, offset: 0, green: root.into() } - } - - pub fn span(&self) -> Span { - self.as_ref().span() - } - - pub fn len(&self) -> usize { - self.green.len() - } - - pub fn kind(&self) -> &NodeKind { - self.green.kind() - } - - pub fn children<'a>(&'a self) -> impl Iterator<Item = RedRef<'a>> + Clone { - self.as_ref().children() - } - - pub fn errors(&self) -> Vec<(Span, EcoString)> { - if !self.green.erroneous() { - return vec![]; - } - - match self.kind() { - NodeKind::Error(pos, msg) => { - let span = match pos { - ErrorPosition::Start => self.span().at_start(), - ErrorPosition::Full => self.span(), - ErrorPosition::End => self.span().at_end(), - }; - - vec![(span, msg.clone())] - } - _ => self - .as_ref() - .children() - .filter(|red| red.green.erroneous()) - .flat_map(|red| red.own().errors()) - .collect(), } } - - pub fn as_ref<'a>(&'a self) -> RedRef<'a> { - RedRef { - id: self.id, - offset: self.offset, - green: &self.green, - } - } - - pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option<RedNode> { - self.as_ref().typed_child(kind).map(RedRef::own) - } - - pub(crate) fn cast_first_child<T: TypedNode>(&self) -> Option<T> { - self.as_ref().cast_first_child() - } - - pub(crate) fn cast_last_child<T: TypedNode>(&self) -> Option<T> { - self.as_ref().cast_last_child() - } -} - -impl Debug for RedNode { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{:?}: {:?}", self.kind(), self.span())?; - let children = self.as_ref().children().collect::<Vec<_>>(); - if !children.is_empty() { - f.write_str(" ")?; - f.debug_list() - .entries(children.into_iter().map(RedRef::own)) - .finish()?; - } - Ok(()) - } -} - -pub trait TypedNode: Sized { - /// Performs the conversion. - fn cast_from(value: RedRef) -> Option<Self>; } #[macro_export] diff --git a/src/syntax/token.rs b/src/syntax/token.rs deleted file mode 100644 index 4f43bb4f..00000000 --- a/src/syntax/token.rs +++ /dev/null @@ -1,40 +0,0 @@ -use crate::util::EcoString; - -/// A quoted string token: `"..."`. -#[derive(Debug, Clone, PartialEq)] -#[repr(transparent)] -pub struct StrToken { - /// The string inside the quotes. - pub string: EcoString, -} - -/// A raw block token: `` `...` ``. -#[derive(Debug, Clone, PartialEq)] -pub struct RawToken { - /// The raw text in the block. - pub text: EcoString, - /// The programming language of the raw text. - pub lang: Option<EcoString>, - /// The number of opening backticks. - pub backticks: u8, - /// Whether to display this as a block. - pub block: bool, -} - -/// A math formula token: `$2pi + x$` or `$[f'(x) = x^2]$`. -#[derive(Debug, Clone, PartialEq)] -pub struct MathToken { - /// The formula between the dollars. - pub formula: EcoString, - /// Whether the formula is display-level, that is, it is surrounded by - /// `$[..]`. - pub display: bool, -} - -/// A unicode escape sequence token: `\u{1F5FA}`. -#[derive(Debug, Clone, PartialEq)] -#[repr(transparent)] -pub struct UnicodeEscapeToken { - /// The resulting unicode character. - pub character: char, -} |
