From 704f2fbaf1b4483caa12f249a222c49e44f08961 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Mon, 26 Sep 2022 15:39:32 +0200 Subject: Description lists, link syntax, and new enum syntax --- src/syntax/ast.rs | 54 +++++++--- src/syntax/highlight.rs | 130 ++++++++++++++--------- src/syntax/mod.rs | 272 ++++++++++++++++++++++++++---------------------- 3 files changed, 264 insertions(+), 192 deletions(-) (limited to 'src/syntax') diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 10bee4e8..8d3696a8 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -63,9 +63,7 @@ impl Markup { self.0.children().filter_map(|node| match node.kind() { NodeKind::Space { newlines: (2 ..) } => Some(MarkupNode::Parbreak), NodeKind::Space { .. } => Some(MarkupNode::Space), - &NodeKind::Linebreak { justified } => { - Some(MarkupNode::Linebreak { justified }) - } + NodeKind::Linebreak => Some(MarkupNode::Linebreak), NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), NodeKind::Escape(c) => Some(MarkupNode::Text((*c).into())), NodeKind::NonBreakingSpace => Some(MarkupNode::Text('\u{00A0}'.into())), @@ -76,6 +74,7 @@ impl Markup { &NodeKind::Quote { double } => Some(MarkupNode::Quote { double }), NodeKind::Strong => node.cast().map(MarkupNode::Strong), NodeKind::Emph => node.cast().map(MarkupNode::Emph), + NodeKind::Link(url) => Some(MarkupNode::Link(url.clone())), NodeKind::Raw(raw) => Some(MarkupNode::Raw(raw.as_ref().clone())), NodeKind::Math(math) => Some(MarkupNode::Math(Spanned::new( math.as_ref().clone(), @@ -84,6 +83,7 @@ impl Markup { NodeKind::Heading => node.cast().map(MarkupNode::Heading), NodeKind::List => node.cast().map(MarkupNode::List), NodeKind::Enum => node.cast().map(MarkupNode::Enum), + NodeKind::Desc => node.cast().map(MarkupNode::Desc), NodeKind::Label(v) => Some(MarkupNode::Label(v.clone())), NodeKind::Ref(v) => Some(MarkupNode::Ref(v.clone())), _ => node.cast().map(MarkupNode::Expr), @@ -96,8 +96,8 @@ impl Markup { pub enum MarkupNode { /// Whitespace containing less than two newlines. Space, - /// A forced line break: `\` or `\+` if justified. - Linebreak { justified: bool }, + /// A forced line break. + Linebreak, /// A paragraph break: Two or more newlines. Parbreak, /// Plain text. @@ -108,6 +108,8 @@ pub enum MarkupNode { Strong(StrongNode), /// Emphasized content: `_Emphasized_`. Emph(EmphNode), + /// A hyperlink. + Link(EcoString), /// A raw block with optional syntax highlighting: `` `...` ``. Raw(RawNode), /// A math formula: `$a^2 = b^2 + c^2$`. @@ -116,8 +118,10 @@ pub enum MarkupNode { Heading(HeadingNode), /// An item in an unordered list: `- ...`. List(ListNode), - /// An item in an enumeration (ordered list): `1. ...`. + /// An item in an enumeration (ordered list): `+ ...` or `1. ...`. Enum(EnumNode), + /// An item in a description list: `/ Term: Details. + Desc(DescNode), /// A label. Label(EcoString), /// A reference. @@ -170,8 +174,8 @@ pub struct RawNode { pub struct MathNode { /// The formula between the dollars / brackets. pub formula: EcoString, - /// Whether the formula is display-level, that is, it is surrounded by - /// `$[..]$`. + /// Whether the formula is display-level, that is, it contains whitespace + /// after the starting dollar sign and before the ending dollar sign. pub display: bool, } @@ -205,7 +209,7 @@ node! { impl ListNode { /// The contents of the list item. pub fn body(&self) -> Markup { - self.0.cast_first_child().expect("list node is missing body") + self.0.cast_first_child().expect("list item is missing body") } } @@ -217,18 +221,36 @@ node! { impl EnumNode { /// The contents of the list item. pub fn body(&self) -> Markup { - self.0.cast_first_child().expect("enum node is missing body") + self.0.cast_first_child().expect("enum item is missing body") } /// The number, if any. pub fn number(&self) -> Option { + self.0.children().find_map(|node| match node.kind() { + NodeKind::EnumNumbering(num) => Some(*num), + _ => None, + }) + } +} + +node! { + /// An item in a description list: `/ Term: Details. + DescNode: Desc +} + +impl DescNode { + /// The term described by the list item. + pub fn term(&self) -> Markup { self.0 - .children() - .find_map(|node| match node.kind() { - NodeKind::EnumNumbering(num) => Some(*num), - _ => None, - }) - .expect("enum node is missing number") + .cast_first_child() + .expect("description list item is missing term") + } + + /// The description of the term. + pub fn body(&self) -> Markup { + self.0 + .cast_last_child() + .expect("description list item is missing body") } } diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs index 7f5ee083..de7c70a2 100644 --- a/src/syntax/highlight.rs +++ b/src/syntax/highlight.rs @@ -147,12 +147,12 @@ pub fn highlight_pre(text: &str, mode: TokenMode, theme: &Theme) -> String { /// The syntax highlighting category of a node. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] pub enum Category { + /// A line or block comment. + Comment, /// Any kind of bracket, parenthesis or brace. Bracket, /// Punctuation in code. Punctuation, - /// A line or block comment. - Comment, /// An easily typable shortcut to a unicode codepoint. Shortcut, /// An escape sequence. @@ -161,14 +161,18 @@ pub enum Category { Strong, /// Emphasized text. Emph, + /// A hyperlink. + Link, /// Raw text or code. Raw, /// A math formula. Math, /// A section heading. Heading, - /// A list or enumeration. + /// A symbol of a list, enumeration, or description list. List, + /// A term in a description list. + Term, /// A label. Label, /// A reference. @@ -204,66 +208,74 @@ impl Category { i: usize, ) -> Option { match child.kind() { + NodeKind::LineComment => Some(Category::Comment), + NodeKind::BlockComment => Some(Category::Comment), NodeKind::LeftBrace => Some(Category::Bracket), NodeKind::RightBrace => Some(Category::Bracket), NodeKind::LeftBracket => Some(Category::Bracket), NodeKind::RightBracket => Some(Category::Bracket), NodeKind::LeftParen => Some(Category::Bracket), NodeKind::RightParen => Some(Category::Bracket), - NodeKind::Comma => Some(Category::Punctuation), - NodeKind::Semicolon => Some(Category::Punctuation), - NodeKind::Colon => Some(Category::Punctuation), - NodeKind::Dot => Some(Category::Punctuation), - NodeKind::LineComment => Some(Category::Comment), - NodeKind::BlockComment => Some(Category::Comment), + + NodeKind::Markup { .. } => match parent.kind() { + NodeKind::Desc + if parent + .children() + .take_while(|child| child.kind() != &NodeKind::Colon) + .find(|c| matches!(c.kind(), NodeKind::Markup { .. })) + .map_or(false, |ident| std::ptr::eq(ident, child)) => + { + Some(Category::Term) + } + _ => None, + }, + NodeKind::Space { .. } => None, NodeKind::Linebreak { .. } => Some(Category::Shortcut), + NodeKind::Text(_) => None, + NodeKind::Escape(_) => Some(Category::Escape), NodeKind::NonBreakingSpace => Some(Category::Shortcut), NodeKind::Shy => Some(Category::Shortcut), NodeKind::EnDash => Some(Category::Shortcut), NodeKind::EmDash => Some(Category::Shortcut), NodeKind::Ellipsis => Some(Category::Shortcut), - NodeKind::Escape(_) => Some(Category::Escape), + NodeKind::Quote { .. } => None, + NodeKind::Star => match parent.kind() { + NodeKind::Strong => None, + _ => Some(Category::Operator), + }, + NodeKind::Underscore => None, NodeKind::Strong => Some(Category::Strong), NodeKind::Emph => Some(Category::Emph), + NodeKind::Link(_) => Some(Category::Link), NodeKind::Raw(_) => Some(Category::Raw), NodeKind::Math(_) => Some(Category::Math), NodeKind::Heading => Some(Category::Heading), + NodeKind::List => None, + NodeKind::Enum => None, + NodeKind::EnumNumbering(_) => Some(Category::List), + NodeKind::Desc => None, + NodeKind::Label(_) => Some(Category::Label), + NodeKind::Ref(_) => Some(Category::Ref), + + NodeKind::Comma => Some(Category::Punctuation), + NodeKind::Semicolon => Some(Category::Punctuation), + NodeKind::Colon => match parent.kind() { + NodeKind::Desc => Some(Category::Term), + _ => Some(Category::Punctuation), + }, + NodeKind::Plus => match parent.kind() { + NodeKind::Enum => Some(Category::List), + _ => Some(Category::Operator), + }, NodeKind::Minus => match parent.kind() { NodeKind::List => Some(Category::List), _ => Some(Category::Operator), }, - NodeKind::EnumNumbering(_) => Some(Category::List), - NodeKind::Label(_) => Some(Category::Label), - NodeKind::Ref(_) => Some(Category::Ref), - NodeKind::Not => Some(Category::Keyword), - NodeKind::And => Some(Category::Keyword), - NodeKind::Or => Some(Category::Keyword), - NodeKind::Let => Some(Category::Keyword), - NodeKind::Set => Some(Category::Keyword), - NodeKind::Show => Some(Category::Keyword), - NodeKind::Wrap => Some(Category::Keyword), - NodeKind::If => Some(Category::Keyword), - NodeKind::Else => Some(Category::Keyword), - NodeKind::While => Some(Category::Keyword), - NodeKind::For => Some(Category::Keyword), - NodeKind::In => Some(Category::Keyword), - NodeKind::As => Some(Category::Keyword), - NodeKind::Break => Some(Category::Keyword), - NodeKind::Continue => Some(Category::Keyword), - NodeKind::Return => Some(Category::Keyword), - NodeKind::Import => Some(Category::Keyword), - NodeKind::From => Some(Category::Keyword), - NodeKind::Include => Some(Category::Keyword), - NodeKind::Plus => Some(Category::Operator), - NodeKind::Star => match parent.kind() { - NodeKind::Strong => None, + NodeKind::Slash => match parent.kind() { + NodeKind::Desc => Some(Category::List), _ => Some(Category::Operator), }, - NodeKind::Slash => Some(Category::Operator), - NodeKind::PlusEq => Some(Category::Operator), - NodeKind::HyphEq => Some(Category::Operator), - NodeKind::StarEq => Some(Category::Operator), - NodeKind::SlashEq => Some(Category::Operator), + NodeKind::Dot => Some(Category::Punctuation), NodeKind::Eq => match parent.kind() { NodeKind::Heading => None, _ => Some(Category::Operator), @@ -274,10 +286,34 @@ impl Category { NodeKind::LtEq => Some(Category::Operator), NodeKind::Gt => Some(Category::Operator), NodeKind::GtEq => Some(Category::Operator), + NodeKind::PlusEq => Some(Category::Operator), + NodeKind::HyphEq => Some(Category::Operator), + NodeKind::StarEq => Some(Category::Operator), + NodeKind::SlashEq => Some(Category::Operator), NodeKind::Dots => Some(Category::Operator), NodeKind::Arrow => Some(Category::Operator), + NodeKind::Not => Some(Category::Keyword), + NodeKind::And => Some(Category::Keyword), + NodeKind::Or => Some(Category::Keyword), NodeKind::None => Some(Category::None), NodeKind::Auto => Some(Category::Auto), + NodeKind::Let => Some(Category::Keyword), + NodeKind::Set => Some(Category::Keyword), + NodeKind::Show => Some(Category::Keyword), + NodeKind::Wrap => Some(Category::Keyword), + NodeKind::If => Some(Category::Keyword), + NodeKind::Else => Some(Category::Keyword), + NodeKind::For => Some(Category::Keyword), + NodeKind::In => Some(Category::Keyword), + NodeKind::While => Some(Category::Keyword), + NodeKind::Break => Some(Category::Keyword), + NodeKind::Continue => Some(Category::Keyword), + NodeKind::Return => Some(Category::Keyword), + NodeKind::Import => Some(Category::Keyword), + NodeKind::Include => Some(Category::Keyword), + NodeKind::From => Some(Category::Keyword), + NodeKind::As => Some(Category::Keyword), + NodeKind::Ident(_) => match parent.kind() { NodeKind::Markup { .. } => Some(Category::Interpolated), NodeKind::FuncCall => Some(Category::Function), @@ -302,15 +338,6 @@ impl Category { NodeKind::Float(_) => Some(Category::Number), NodeKind::Numeric(_, _) => Some(Category::Number), NodeKind::Str(_) => Some(Category::String), - NodeKind::Error(_, _) => Some(Category::Invalid), - NodeKind::Unknown(_) => Some(Category::Invalid), - NodeKind::Underscore => None, - NodeKind::Markup { .. } => None, - NodeKind::Space { .. } => None, - NodeKind::Text(_) => None, - NodeKind::Quote { .. } => None, - NodeKind::List => None, - NodeKind::Enum => None, NodeKind::CodeBlock => None, NodeKind::ContentBlock => None, NodeKind::GroupExpr => None, @@ -341,6 +368,9 @@ impl Category { NodeKind::BreakExpr => None, NodeKind::ContinueExpr => None, NodeKind::ReturnExpr => None, + + NodeKind::Error(_, _) => Some(Category::Invalid), + NodeKind::Unknown(_) => Some(Category::Invalid), } } @@ -354,10 +384,12 @@ impl Category { Self::Escape => "constant.character.escape.content.typst", Self::Strong => "markup.bold.typst", Self::Emph => "markup.italic.typst", + Self::Link => "markup.underline.link.typst", Self::Raw => "markup.raw.typst", Self::Math => "string.other.math.typst", Self::Heading => "markup.heading.typst", Self::List => "markup.list.typst", + Self::Term => "markup.bold.typst", Self::Label => "entity.name.label.typst", Self::Ref => "markup.other.reference.typst", Self::Keyword => "keyword.typst", diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 89937f2c..6c6f690c 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -571,6 +571,14 @@ impl PartialEq for NodeData { /// the parser. #[derive(Debug, Clone, PartialEq)] pub enum NodeKind { + /// A line comment, two slashes followed by inner contents, terminated with + /// a newline: `//\n`. + LineComment, + /// A block comment, a slash and a star followed by inner contents, + /// terminated with a star and a slash: `/**/`. + /// + /// The comment can contain nested block comments. + BlockComment, /// A left curly brace, starting a code block: `{`. LeftBrace, /// A right curly brace, terminating a code block: `}`. @@ -585,23 +593,83 @@ pub enum NodeKind { /// A right round parenthesis, terminating a grouped expression, collection, /// argument or parameter list: `)`. RightParen, + + /// Markup of which all lines must have a minimal indentation. + /// + /// Notably, the number does not determine in which column the markup + /// started, but to the right of which column all markup elements must be, + /// so it is zero except for headings and lists. + Markup { min_indent: usize }, + /// One or more whitespace characters. Single spaces are collapsed into text + /// nodes if they would otherwise be surrounded by text nodes. + /// + /// Also stores how many newlines are contained. + Space { newlines: usize }, + /// A forced line break. + Linebreak, + /// Consecutive text without markup. While basic text with just single + /// spaces is collapsed into a single node, certain symbols that could + /// possibly be markup force text into multiple nodes. + Text(EcoString), + /// A slash and the letter "u" followed by a hexadecimal unicode entity + /// enclosed in curly braces: `\u{1F5FA}`. + Escape(char), + /// A non-breaking space: `~`. + NonBreakingSpace, + /// A soft hyphen: `-?`. + Shy, + /// An en-dash: `--`. + EnDash, + /// An em-dash: `---`. + EmDash, + /// An ellipsis: `...`. + Ellipsis, + /// A smart quote: `'` or `"`. + Quote { double: bool }, /// The strong text toggle, multiplication operator, and wildcard import /// symbol: `*`. Star, /// Toggles emphasized text: `_`. Underscore, + /// Strong content: `*Strong*`. + Strong, + /// Emphasized content: `_Emphasized_`. + Emph, + /// A hyperlink. + Link(EcoString), + /// A raw block with optional syntax highlighting: `` `...` ``. + Raw(Arc), + /// A math formula: `$x$`, `$[x^2]$`. + Math(Arc), + /// A section heading: `= Introduction`. + Heading, + /// An item in an unordered list: `- ...`. + List, + /// An item in an enumeration (ordered list): `+ ...` or `1. ...`. + Enum, + /// An explicit enumeration numbering: `23.`. + EnumNumbering(usize), + /// An item in a description list: `/ Term: Details. + Desc, + /// A label: `