From 3533268b1f7a31581e7b8f44dff6d4f553ef348f Mon Sep 17 00:00:00 2001 From: Laurenz Date: Fri, 2 Oct 2020 15:43:29 +0200 Subject: =?UTF-8?q?Refactor=20parser=20=F0=9F=8F=9E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/syntax/ast/expr.rs | 123 +++++++++++++++++++++++++++++++++++++++++ src/syntax/ast/lit.rs | 98 +++++++++++++++++++++++++++++++++ src/syntax/ast/mod.rs | 9 +++ src/syntax/ast/tree.rs | 121 ++++++++++++++++++++++++++++++++++++++++ src/syntax/expr.rs | 124 ----------------------------------------- src/syntax/lit.rs | 98 --------------------------------- src/syntax/mod.rs | 15 +---- src/syntax/span.rs | 44 ++++++++++----- src/syntax/token.rs | 147 +++++++++++++++++++++++++++---------------------- src/syntax/tree.rs | 119 --------------------------------------- 10 files changed, 465 insertions(+), 433 deletions(-) create mode 100644 src/syntax/ast/expr.rs create mode 100644 src/syntax/ast/lit.rs create mode 100644 src/syntax/ast/mod.rs create mode 100644 src/syntax/ast/tree.rs delete mode 100644 src/syntax/expr.rs delete mode 100644 src/syntax/lit.rs delete mode 100644 src/syntax/tree.rs (limited to 'src/syntax') diff --git a/src/syntax/ast/expr.rs b/src/syntax/ast/expr.rs new file mode 100644 index 00000000..c07c6216 --- /dev/null +++ b/src/syntax/ast/expr.rs @@ -0,0 +1,123 @@ +//! Expressions. + +use crate::eval::Value; +use crate::layout::LayoutContext; +use crate::syntax::{Decoration, Ident, Lit, LitDict, SpanWith, Spanned}; +use crate::Feedback; + +/// An expression. +#[derive(Debug, Clone, PartialEq)] +pub enum Expr { + /// A literal: `true`, `1cm`, `"hi"`, `{_Hey!_}`. + Lit(Lit), + /// A unary operation: `-x`. + Unary(ExprUnary), + /// A binary operation: `a + b`, `a / b`. + Binary(ExprBinary), + /// An invocation of a function: `[foo: ...]`, `foo(...)`. + Call(ExprCall), +} + +impl Expr { + /// Evaluate the expression to a value. + pub async fn eval(&self, ctx: &LayoutContext<'_>, f: &mut Feedback) -> Value { + match self { + Self::Lit(lit) => lit.eval(ctx, f).await, + Self::Unary(unary) => unary.eval(ctx, f).await, + Self::Binary(binary) => binary.eval(ctx, f).await, + Self::Call(call) => call.eval(ctx, f).await, + } + } +} + +/// A unary operation: `-x`. +#[derive(Debug, Clone, PartialEq)] +pub struct ExprUnary { + /// The operator: `-`. + pub op: Spanned, + /// The expression to operator on: `x`. + pub expr: Spanned>, +} + +impl ExprUnary { + /// Evaluate the expression to a value. + pub async fn eval(&self, _: &LayoutContext<'_>, _: &mut Feedback) -> Value { + match self.op.v { + UnOp::Neg => todo!("eval neg"), + } + } +} + +/// A unary operator. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum UnOp { + /// The negation operator: `-`. + Neg, +} + +/// A binary operation: `a + b`, `a / b`. +#[derive(Debug, Clone, PartialEq)] +pub struct ExprBinary { + /// The left-hand side of the operation: `a`. + pub lhs: Spanned>, + /// The operator: `+`. + pub op: Spanned, + /// The right-hand side of the operation: `b`. + pub rhs: Spanned>, +} + +impl ExprBinary { + /// Evaluate the expression to a value. + pub async fn eval(&self, _: &LayoutContext<'_>, _: &mut Feedback) -> Value { + match self.op.v { + BinOp::Add => todo!("eval add"), + BinOp::Sub => todo!("eval sub"), + BinOp::Mul => todo!("eval mul"), + BinOp::Div => todo!("eval div"), + } + } +} + +/// A binary operator. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum BinOp { + /// The addition operator: `+`. + Add, + /// The subtraction operator: `-`. + Sub, + /// The multiplication operator: `*`. + Mul, + /// The division operator: `/`. + Div, +} + +/// An invocation of a function: `[foo: ...]`, `foo(...)`. +#[derive(Debug, Clone, PartialEq)] +pub struct ExprCall { + /// The name of the function. + pub name: Spanned, + /// The arguments to the function. + pub args: LitDict, +} + +impl ExprCall { + /// Evaluate the call expression to a value. + pub async fn eval(&self, ctx: &LayoutContext<'_>, f: &mut Feedback) -> Value { + let name = &self.name.v; + let span = self.name.span; + let args = self.args.eval(ctx, f).await; + + if let Some(func) = ctx.scope.func(name) { + let pass = func(span, args, ctx.clone()).await; + f.extend(pass.feedback); + f.decorations.push(Decoration::Resolved.span_with(span)); + pass.output + } else { + if !name.is_empty() { + error!(@f, span, "unknown function"); + f.decorations.push(Decoration::Unresolved.span_with(span)); + } + Value::Dict(args) + } + } +} diff --git a/src/syntax/ast/lit.rs b/src/syntax/ast/lit.rs new file mode 100644 index 00000000..bbdd0c81 --- /dev/null +++ b/src/syntax/ast/lit.rs @@ -0,0 +1,98 @@ +//! Literals. + +use crate::color::RgbaColor; +use crate::eval::{DictKey, DictValue, SpannedEntry, Value}; +use crate::layout::LayoutContext; +use crate::length::Length; +use crate::syntax::{Expr, Ident, SpanWith, Spanned, SynTree}; +use crate::{DynFuture, Feedback}; + +/// A literal. +#[derive(Debug, Clone, PartialEq)] +pub enum Lit { + /// A identifier literal: `left`. + Ident(Ident), + /// A boolean literal: `true`, `false`. + Bool(bool), + /// An integer literal: `120`. + Int(i64), + /// A floating-point literal: `1.2`, `10e-4`. + Float(f64), + /// A percent literal: `50%`. + Percent(f64), + /// A length literal: `12pt`, `3cm`. + Length(Length), + /// A color literal: `#ffccee`. + Color(RgbaColor), + /// A string literal: `"hello!"`. + Str(String), + /// A dictionary literal: `(false, 12cm, greeting = "hi")`. + Dict(LitDict), + /// A content literal: `{*Hello* there!}`. + Content(SynTree), +} + +impl Lit { + /// Evaluate the dictionary literal to a dictionary value. + pub async fn eval<'a>( + &'a self, + ctx: &'a LayoutContext<'a>, + f: &'a mut Feedback, + ) -> Value { + match *self { + Lit::Ident(ref i) => Value::Ident(i.clone()), + Lit::Bool(b) => Value::Bool(b), + Lit::Int(i) => Value::Number(i as f64), + Lit::Float(f) => Value::Number(f as f64), + Lit::Percent(p) => Value::Number(p as f64 / 100.0), + Lit::Length(l) => Value::Length(l), + Lit::Color(c) => Value::Color(c), + Lit::Str(ref s) => Value::Str(s.clone()), + Lit::Dict(ref d) => Value::Dict(d.eval(ctx, f).await), + Lit::Content(ref c) => Value::Tree(c.clone()), + } + } +} + +/// A dictionary literal: `(false, 12cm, greeting = "hi")`. +#[derive(Debug, Clone, PartialEq)] +pub struct LitDict(pub Vec); + +impl LitDict { + /// Create an empty dict literal. + pub fn new() -> Self { + Self(vec![]) + } + + /// Evaluate the dictionary literal to a dictionary value. + pub fn eval<'a>( + &'a self, + ctx: &'a LayoutContext<'a>, + f: &'a mut Feedback, + ) -> DynFuture<'a, DictValue> { + Box::pin(async move { + let mut dict = DictValue::new(); + + for entry in &self.0 { + let val = entry.expr.v.eval(ctx, f).await; + let spanned = val.span_with(entry.expr.span); + if let Some(key) = &entry.key { + dict.insert(&key.v, SpannedEntry::new(key.span, spanned)); + } else { + dict.push(SpannedEntry::val(spanned)); + } + } + + dict + }) + } +} + +/// An entry in a dictionary literal: `false` or `greeting = "hi"`. +#[derive(Debug, Clone, PartialEq)] +pub struct LitDictEntry { + /// The key of the entry if there was one: `greeting`. + pub key: Option>, + /// The value of the entry: `"hi"`. + pub expr: Spanned, +} diff --git a/src/syntax/ast/mod.rs b/src/syntax/ast/mod.rs new file mode 100644 index 00000000..56ae4134 --- /dev/null +++ b/src/syntax/ast/mod.rs @@ -0,0 +1,9 @@ +//! Abstract syntax tree definition. + +mod expr; +mod lit; +mod tree; + +pub use expr::*; +pub use lit::*; +pub use tree::*; diff --git a/src/syntax/ast/tree.rs b/src/syntax/ast/tree.rs new file mode 100644 index 00000000..03aa3439 --- /dev/null +++ b/src/syntax/ast/tree.rs @@ -0,0 +1,121 @@ +//! The syntax tree. + +use crate::syntax::{Expr, Ident, SpanVec, Spanned}; + +/// A collection of nodes which form a tree together with the nodes' children. +pub type SynTree = SpanVec; + +/// A syntax node, which encompasses a single logical entity of parsed source +/// code. +#[derive(Debug, Clone, PartialEq)] +pub enum SynNode { + /// Whitespace containing less than two newlines. + Space, + /// Plain text. + Text(String), + + /// A forced line break. + Linebreak, + /// A paragraph break. + Parbreak, + /// Italics were enabled / disabled. + ToggleItalic, + /// Bolder was enabled / disabled. + ToggleBolder, + + /// A section heading. + Heading(NodeHeading), + /// An optionally syntax-highlighted raw block. + Raw(NodeRaw), + + /// An expression. + Expr(Expr), +} + +/// A section heading. +#[derive(Debug, Clone, PartialEq)] +pub struct NodeHeading { + /// The section depth (how many hashtags minus 1). + pub level: Spanned, + /// The contents of the heading. + pub contents: SynTree, +} + +/// A raw block, rendered in monospace with optional syntax highlighting. +/// +/// Raw blocks start with an arbitrary number of backticks and end with the same +/// number of backticks. If you want to include a sequence of backticks in a raw +/// block, simply surround the block with more backticks. +/// +/// When using at least two backticks, an optional language tag may follow +/// directly after the backticks. This tag defines which language to +/// syntax-highlight the text in. Apart from the language tag and some +/// whitespace trimming discussed below, everything inside a raw block is +/// rendered verbatim, in particular, there are no escape sequences. +/// +/// # Examples +/// - Raw text is surrounded by backticks. +/// ```typst +/// `raw` +/// ``` +/// - An optional language tag may follow directly at the start when the block +/// is surrounded by at least two backticks. +/// ```typst +/// ``rust println!("hello!")``; +/// ``` +/// - Blocks can span multiple lines. Two backticks suffice to be able to +/// specify the language tag, but three are fine, too. +/// ```typst +/// ``rust +/// loop { +/// find_yak().shave(); +/// } +/// `` +/// ``` +/// - Start with a space to omit the language tag (the space will be trimmed +/// from the output) and use more backticks to allow backticks in the raw +/// text. +/// `````typst +/// ```` This contains ```backticks``` and has no leading & trailing spaces. ```` +/// ````` +/// +/// # Trimming +/// If we would always render the raw text between the backticks exactly as +/// given, a few things would become problematic or even impossible: +/// - Typical multiline code blocks (like in the example above) would have an +/// additional newline before and after the code. +/// - Raw text wrapped in more than one backtick could not exist without +/// leading whitespace since the first word would be interpreted as a +/// language tag. +/// - A single backtick without surrounding spaces could not exist as raw text +/// since it would be interpreted as belonging to the opening or closing +/// backticks. +/// +/// To fix these problems, we trim text in multi-backtick blocks as follows: +/// - We trim a single space or a sequence of whitespace followed by a newline +/// at the start. +/// - We trim a single space or a newline followed by a sequence of whitespace +/// at the end. +/// +/// With these rules, a single raw backtick can be produced by the sequence +/// ``` `` ` `` ```, ``` `` unhighlighted text `` ``` has no surrounding +/// spaces and multiline code blocks don't have extra empty lines. Note that +/// you can always force leading or trailing whitespace simply by adding more +/// spaces. +#[derive(Debug, Clone, PartialEq)] +pub struct NodeRaw { + /// An optional identifier specifying the language to syntax-highlight in. + pub lang: Option, + /// The lines of raw text, determined as the raw string between the + /// backticks trimmed according to the above rules and split at newlines. + pub lines: Vec, + /// Whether the element can be layouted inline. + /// + /// - When true, it will be layouted integrated within the surrounding + /// paragraph. + /// - When false, it will be separated into its own paragraph. + /// + /// Single-backtick blocks are always inline-level. Multi-backtick blocks + /// are inline-level when they contain no newlines. + pub inline: bool, +} diff --git a/src/syntax/expr.rs b/src/syntax/expr.rs deleted file mode 100644 index 7f4d03d5..00000000 --- a/src/syntax/expr.rs +++ /dev/null @@ -1,124 +0,0 @@ -//! Expressions. - -use super::span::{SpanWith, Spanned}; -use super::{Decoration, Ident, Lit, LitDict}; -use crate::eval::Value; -use crate::layout::LayoutContext; -use crate::Feedback; - -/// An expression. -#[derive(Debug, Clone, PartialEq)] -pub enum Expr { - /// A literal: `true`, `1cm`, `"hi"`, `{_Hey!_}`. - Lit(Lit), - /// A unary operation: `-x`. - Unary(ExprUnary), - /// A binary operation: `a + b`, `a / b`. - Binary(ExprBinary), - /// An invocation of a function: `[foo: ...]`, `foo(...)`. - Call(ExprCall), -} - -impl Expr { - /// Evaluate the expression to a value. - pub async fn eval(&self, ctx: &LayoutContext<'_>, f: &mut Feedback) -> Value { - match self { - Self::Lit(lit) => lit.eval(ctx, f).await, - Self::Unary(unary) => unary.eval(ctx, f).await, - Self::Binary(binary) => binary.eval(ctx, f).await, - Self::Call(call) => call.eval(ctx, f).await, - } - } -} - -/// A unary operation: `-x`. -#[derive(Debug, Clone, PartialEq)] -pub struct ExprUnary { - /// The operator: `-`. - pub op: Spanned, - /// The expression to operator on: `x`. - pub expr: Spanned>, -} - -impl ExprUnary { - /// Evaluate the expression to a value. - pub async fn eval(&self, _: &LayoutContext<'_>, _: &mut Feedback) -> Value { - match self.op.v { - UnOp::Neg => todo!("eval neg"), - } - } -} - -/// A unary operator. -#[derive(Debug, Clone, PartialEq)] -pub enum UnOp { - /// The negation operator: `-`. - Neg, -} - -/// A binary operation: `a + b`, `a / b`. -#[derive(Debug, Clone, PartialEq)] -pub struct ExprBinary { - /// The left-hand side of the operation: `a`. - pub lhs: Spanned>, - /// The operator: `+`. - pub op: Spanned, - /// The right-hand side of the operation: `b`. - pub rhs: Spanned>, -} - -impl ExprBinary { - /// Evaluate the expression to a value. - pub async fn eval(&self, _: &LayoutContext<'_>, _: &mut Feedback) -> Value { - match self.op.v { - BinOp::Add => todo!("eval add"), - BinOp::Sub => todo!("eval sub"), - BinOp::Mul => todo!("eval mul"), - BinOp::Div => todo!("eval div"), - } - } -} - -/// A binary operator. -#[derive(Debug, Clone, PartialEq)] -pub enum BinOp { - /// The addition operator: `+`. - Add, - /// The subtraction operator: `-`. - Sub, - /// The multiplication operator: `*`. - Mul, - /// The division operator: `/`. - Div, -} - -/// An invocation of a function: `[foo: ...]`, `foo(...)`. -#[derive(Debug, Clone, PartialEq)] -pub struct ExprCall { - /// The name of the function. - pub name: Spanned, - /// The arguments to the function. - pub args: LitDict, -} - -impl ExprCall { - /// Evaluate the call expression to a value. - pub async fn eval(&self, ctx: &LayoutContext<'_>, f: &mut Feedback) -> Value { - let name = &self.name.v; - let span = self.name.span; - let args = self.args.eval(ctx, f).await; - - if let Some(func) = ctx.scope.func(name) { - let pass = func(span, args, ctx.clone()).await; - f.extend(pass.feedback); - f.decorations.push(Decoration::Resolved.span_with(span)); - pass.output - } else { - if !name.is_empty() { - error!(@f, span, "unknown function"); - f.decorations.push(Decoration::Unresolved.span_with(span)); - } - Value::Dict(args) - } - } -} diff --git a/src/syntax/lit.rs b/src/syntax/lit.rs deleted file mode 100644 index 3cd94583..00000000 --- a/src/syntax/lit.rs +++ /dev/null @@ -1,98 +0,0 @@ -//! Literals. - -use super::{Expr, Ident, SpanWith, Spanned, SynTree}; -use crate::color::RgbaColor; -use crate::eval::{DictKey, DictValue, SpannedEntry, Value}; -use crate::layout::LayoutContext; -use crate::length::Length; -use crate::{DynFuture, Feedback}; - -/// A literal. -#[derive(Debug, Clone, PartialEq)] -pub enum Lit { - /// A identifier literal: `left`. - Ident(Ident), - /// A boolean literal: `true`, `false`. - Bool(bool), - /// An integer literal: `120`. - Int(i64), - /// A floating-point literal: `1.2`, `10e-4`. - Float(f64), - /// A percent literal: `50%`. - Percent(f64), - /// A length literal: `12pt`, `3cm`. - Length(Length), - /// A color literal: `#ffccee`. - Color(RgbaColor), - /// A string literal: `"hello!"`. - Str(String), - /// A dictionary literal: `(false, 12cm, greeting = "hi")`. - Dict(LitDict), - /// A content literal: `{*Hello* there!}`. - Content(SynTree), -} - -impl Lit { - /// Evaluate the dictionary literal to a dictionary value. - pub async fn eval<'a>( - &'a self, - ctx: &'a LayoutContext<'a>, - f: &'a mut Feedback, - ) -> Value { - match *self { - Lit::Ident(ref i) => Value::Ident(i.clone()), - Lit::Bool(b) => Value::Bool(b), - Lit::Int(i) => Value::Number(i as f64), - Lit::Float(f) => Value::Number(f as f64), - Lit::Percent(p) => Value::Number(p as f64 / 100.0), - Lit::Length(l) => Value::Length(l), - Lit::Color(c) => Value::Color(c), - Lit::Str(ref s) => Value::Str(s.clone()), - Lit::Dict(ref d) => Value::Dict(d.eval(ctx, f).await), - Lit::Content(ref c) => Value::Tree(c.clone()), - } - } -} - -/// A dictionary literal: `(false, 12cm, greeting = "hi")`. -#[derive(Debug, Default, Clone, PartialEq)] -pub struct LitDict(pub Vec); - -impl LitDict { - /// Create an empty dict literal. - pub fn new() -> Self { - Self(vec![]) - } - - /// Evaluate the dictionary literal to a dictionary value. - pub fn eval<'a>( - &'a self, - ctx: &'a LayoutContext<'a>, - f: &'a mut Feedback, - ) -> DynFuture<'a, DictValue> { - Box::pin(async move { - let mut dict = DictValue::new(); - - for entry in &self.0 { - let val = entry.value.v.eval(ctx, f).await; - let spanned = val.span_with(entry.value.span); - if let Some(key) = &entry.key { - dict.insert(&key.v, SpannedEntry::new(key.span, spanned)); - } else { - dict.push(SpannedEntry::val(spanned)); - } - } - - dict - }) - } -} - -/// An entry in a dictionary literal: `false` or `greeting = "hi"`. -#[derive(Debug, Clone, PartialEq)] -pub struct LitDictEntry { - /// The key of the entry if there was one: `greeting`. - pub key: Option>, - /// The value of the entry: `"hi"`. - pub value: Spanned, -} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index f4472df5..98e1b4d7 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -1,19 +1,10 @@ //! Syntax types. -mod expr; +pub mod ast; +pub mod token; + mod ident; -mod lit; mod span; -mod token; -mod tree; - -/// Abstract syntax tree definition. -pub mod ast { - use super::*; - pub use expr::*; - pub use lit::*; - pub use tree::*; -} pub use ast::*; pub use ident::*; diff --git a/src/syntax/span.rs b/src/syntax/span.rs index 62929706..179c46de 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -13,7 +13,7 @@ thread_local! { /// Annotate a value with a span. pub trait SpanWith: Sized { /// Wraps `self` in a `Spanned` with the given span. - fn span_with(self, span: Span) -> Spanned { + fn span_with(self, span: impl Into) -> Spanned { Spanned::new(self, span) } } @@ -50,8 +50,8 @@ pub struct Spanned { impl Spanned { /// Create a new instance from a value and its span. - pub fn new(v: T, span: Span) -> Self { - Self { v, span } + pub fn new(v: T, span: impl Into) -> Self { + Self { v, span: span.into() } } /// Create a new instance from a value with the zero span. @@ -123,16 +123,16 @@ impl Span { } /// Create a new span with the earlier start and later end position. - pub fn merge(a: Self, b: Self) -> Self { + pub fn join(self, other: Self) -> Self { Self { - start: a.start.min(b.start), - end: a.end.max(b.end), + start: self.start.min(other.start), + end: self.end.max(other.end), } } /// Expand a span by merging it with another span. pub fn expand(&mut self, other: Self) { - *self = Self::merge(*self, other) + *self = self.join(other) } /// When set to `false` comparisons with `PartialEq` ignore spans. @@ -164,6 +164,24 @@ impl PartialEq for Span { } } +impl From for Span +where + T: Into + Copy, +{ + fn from(pos: T) -> Self { + Self::at(pos) + } +} + +impl From<(T, T)> for Span +where + T: Into, +{ + fn from((start, end): (T, T)) -> Self { + Self::new(start, end) + } +} + impl Debug for Span { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "<{:?}-{:?}>", self.start, self.end) @@ -185,6 +203,12 @@ impl Pos { } } +impl Offset for Pos { + fn offset(self, by: Self) -> Self { + Pos(self.0 + by.0) + } +} + impl From for Pos { fn from(index: u32) -> Self { Self(index) @@ -197,12 +221,6 @@ impl From for Pos { } } -impl Offset for Pos { - fn offset(self, by: Self) -> Self { - Pos(self.0 + by.0) - } -} - impl Debug for Pos { fn fmt(&self, f: &mut Formatter) -> fmt::Result { Debug::fmt(&self.0, f) diff --git a/src/syntax/token.rs b/src/syntax/token.rs index 4cb8501f..5c159bbd 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -1,4 +1,4 @@ -//! Tokenization. +//! Token definition. use crate::length::Length; @@ -8,6 +8,8 @@ pub enum Token<'s> { /// One or more whitespace characters. The contained `usize` denotes the /// number of newlines that were contained in the whitespace. Space(usize), + /// A consecutive non-markup string. + Text(&'s str), /// A line comment with inner string contents `//\n`. LineComment(&'s str), @@ -15,6 +17,20 @@ pub enum Token<'s> { /// can contain nested block comments. BlockComment(&'s str), + /// A star. It can appear in a function header where it signifies the + /// multiplication of expressions or the body where it modifies the styling. + Star, + /// An underscore in body-text. + Underscore, + /// A backslash followed by whitespace in text. + Backslash, + /// A hashtag indicating a section heading. + Hashtag, + /// A raw block. + Raw(TokenRaw<'s>), + /// A unicode escape sequence. + UnicodeEscape(TokenUnicodeEscape<'s>), + /// A left bracket starting a function invocation or body: `[`. LeftBracket, /// A right bracket ending a function invocation or body: `]`. @@ -28,29 +44,24 @@ pub enum Token<'s> { /// A right parenthesis in a function header: `)`. RightParen, - /// A double forward chevron in a function header: `>>`. - Chain, - /// A colon in a function header: `:`. Colon, /// A comma in a function header: `,`. Comma, /// An equals sign in a function header: `=`. Equals, + /// A double forward chevron in a function header: `>>`. + Chain, + /// A plus in a function header, signifying the addition of expressions. + Plus, + /// A hyphen in a function header, signifying the subtraction of + /// expressions. + Hyphen, + /// A slash in a function header, signifying the division of expressions. + Slash, /// An identifier in a function header: `center`. Ident(&'s str), - /// A quoted string in a function header: `"..."`. - Str { - /// The string inside the quotes. - /// - /// _Note_: If the string contains escape sequences these are not yet - /// applied to be able to just store a string slice here instead of - /// a String. The escaping is done later in the parser. - string: &'s str, - /// Whether the closing quote was present. - terminated: bool, - }, /// A boolean in a function header: `true | false`. Bool(bool), /// A number in a function header: `3.14`. @@ -59,48 +70,44 @@ pub enum Token<'s> { Length(Length), /// A hex value in a function header: `#20d82a`. Hex(&'s str), - /// A plus in a function header, signifying the addition of expressions. - Plus, - /// A hyphen in a function header, signifying the subtraction of - /// expressions. - Hyphen, - /// A slash in a function header, signifying the division of expressions. - Slash, + /// A quoted string in a function header: `"..."`. + Str(TokenStr<'s>), - /// A star. It can appear in a function header where it signifies the - /// multiplication of expressions or the body where it modifies the styling. - Star, - /// An underscore in body-text. - Underscore, - /// A backslash followed by whitespace in text. - Backslash, + /// Things that are not valid in the context they appeared in. + Invalid(&'s str), +} - /// A hashtag token in the body can indicate compute mode or headings. - Hashtag, +/// A quoted string in a function header: `"..."`. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct TokenStr<'s> { + /// The string inside the quotes. + /// + /// _Note_: If the string contains escape sequences these are not yet + /// applied to be able to just store a string slice here instead of + /// a `String`. The resolving is done later in the parser. + pub string: &'s str, + /// Whether the closing quote was present. + pub terminated: bool, +} - /// A unicode escape sequence. - UnicodeEscape { - /// The escape sequence between two braces. - sequence: &'s str, - /// Whether the closing brace was present. - terminated: bool, - }, - - /// Raw block. - Raw { - /// The raw text between the backticks. - raw: &'s str, - /// The number of opening backticks. - backticks: usize, - /// Whether all closing backticks were present. - terminated: bool, - }, - - /// Any other consecutive string. - Text(&'s str), +/// A unicode escape sequence. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct TokenUnicodeEscape<'s> { + /// The escape sequence between two braces. + pub sequence: &'s str, + /// Whether the closing brace was present. + pub terminated: bool, +} - /// Things that are not valid in the context they appeared in. - Invalid(&'s str), +/// A raw block. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct TokenRaw<'s> { + /// The raw text between the backticks. + pub text: &'s str, + /// The number of opening backticks. + pub backticks: usize, + /// Whether all closing backticks were present. + pub terminated: bool, } impl<'s> Token<'s> { @@ -108,34 +115,40 @@ impl<'s> Token<'s> { pub fn name(self) -> &'static str { match self { Self::Space(_) => "space", + Self::Text(_) => "text", + Self::LineComment(_) => "line comment", Self::BlockComment(_) => "block comment", + + Self::Star => "star", + Self::Underscore => "underscore", + Self::Backslash => "backslash", + Self::Hashtag => "hashtag", + Self::Raw { .. } => "raw block", + Self::UnicodeEscape { .. } => "unicode escape sequence", + Self::LeftBracket => "opening bracket", Self::RightBracket => "closing bracket", - Self::LeftParen => "opening paren", - Self::RightParen => "closing paren", Self::LeftBrace => "opening brace", Self::RightBrace => "closing brace", - Self::Chain => "function chain operator", + Self::LeftParen => "opening paren", + Self::RightParen => "closing paren", + Self::Colon => "colon", Self::Comma => "comma", Self::Equals => "equals sign", + Self::Chain => "function chaining operator", + Self::Plus => "plus sign", + Self::Hyphen => "minus sign", + Self::Slash => "slash", + Self::Ident(_) => "identifier", - Self::Str { .. } => "string", Self::Bool(_) => "bool", Self::Number(_) => "number", Self::Length(_) => "length", Self::Hex(_) => "hex value", - Self::Plus => "plus", - Self::Hyphen => "minus", - Self::Slash => "slash", - Self::Star => "star", - Self::Underscore => "underscore", - Self::Backslash => "backslash", - Self::Hashtag => "hashtag", - Self::UnicodeEscape { .. } => "unicode escape sequence", - Self::Raw { .. } => "raw block", - Self::Text(_) => "text", + Self::Str { .. } => "string", + Self::Invalid("*/") => "end of block comment", Self::Invalid(_) => "invalid token", } diff --git a/src/syntax/tree.rs b/src/syntax/tree.rs deleted file mode 100644 index 80bca399..00000000 --- a/src/syntax/tree.rs +++ /dev/null @@ -1,119 +0,0 @@ -//! The syntax tree. - -use super::span::{SpanVec, Spanned}; -use super::{Expr, Ident}; - -/// A collection of nodes which form a tree together with the nodes' children. -pub type SynTree = SpanVec; - -/// A syntax node, which encompasses a single logical entity of parsed source -/// code. -#[derive(Debug, Clone, PartialEq)] -pub enum SynNode { - /// Whitespace containing less than two newlines. - Spacing, - /// A forced line break. - Linebreak, - /// A paragraph break. - Parbreak, - /// Italics were enabled / disabled. - ToggleItalic, - /// Bolder was enabled / disabled. - ToggleBolder, - /// Plain text. - Text(String), - /// An optionally syntax-highlighted raw block. - Raw(NodeRaw), - /// A section heading. - Heading(NodeHeading), - /// An expression. - Expr(Expr), -} - -/// A raw block, rendered in monospace with optional syntax highlighting. -/// -/// Raw blocks start with an arbitrary number of backticks and end with the same -/// number of backticks. If you want to include a sequence of backticks in a raw -/// block, simply surround the block with more backticks. -/// -/// When using at least two backticks, an optional language tag may follow -/// directly after the backticks. This tag defines which language to -/// syntax-highlight the text in. Apart from the language tag and some -/// whitespace trimming discussed below, everything inside a raw block is -/// rendered verbatim, in particular, there are no escape sequences. -/// -/// # Examples -/// - Raw text is surrounded by backticks. -/// ```typst -/// `raw` -/// ``` -/// - An optional language tag may follow directly at the start when the block -/// is surrounded by at least two backticks. -/// ```typst -/// ``rust println!("hello!")``; -/// ``` -/// - Blocks can span multiple lines. Two backticks suffice to be able to -/// specify the language tag, but three are fine, too. -/// ```typst -/// ``rust -/// loop { -/// find_yak().shave(); -/// } -/// `` -/// ``` -/// - Start with a space to omit the language tag (the space will be trimmed -/// from the output) and use more backticks to allow backticks in the raw -/// text. -/// `````typst -/// ```` This contains ```backticks``` and has no leading & trailing spaces. ```` -/// ````` -/// -/// # Trimming -/// If we would always render the raw text between the backticks exactly as -/// given, a few things would become problematic or even impossible: -/// - Typical multiline code blocks (like in the example above) would have an -/// additional newline before and after the code. -/// - Raw text wrapped in more than one backtick could not exist without -/// leading whitespace since the first word would be interpreted as a -/// language tag. -/// - A single backtick without surrounding spaces could not exist as raw text -/// since it would be interpreted as belonging to the opening or closing -/// backticks. -/// -/// To fix these problems, we trim text in multi-backtick blocks as follows: -/// - We trim a single space or a sequence of whitespace followed by a newline -/// at the start. -/// - We trim a single space or a newline followed by a sequence of whitespace -/// at the end. -/// -/// With these rules, a single raw backtick can be produced by the sequence -/// ``` `` ` `` ```, ``` `` unhighlighted text `` ``` has no surrounding -/// spaces and multiline code blocks don't have extra empty lines. Note that -/// you can always force leading or trailing whitespace simply by adding more -/// spaces. -#[derive(Debug, Clone, PartialEq)] -pub struct NodeRaw { - /// An optional identifier specifying the language to syntax-highlight in. - pub lang: Option, - /// The lines of raw text, determined as the raw string between the - /// backticks trimmed according to the above rules and split at newlines. - pub lines: Vec, - /// Whether the element can be layouted inline. - /// - /// - When true, it will be layouted integrated within the surrounding - /// paragraph. - /// - When false, it will be separated into its own paragraph. - /// - /// Single-backtick blocks are always inline-level. Multi-backtick blocks - /// are inline-level when they contain no newlines. - pub inline: bool, -} - -/// A section heading. -#[derive(Debug, Clone, PartialEq)] -pub struct NodeHeading { - /// The section depth (how many hashtags minus 1). - pub level: Spanned, - /// The contents of the heading. - pub contents: SynTree, -} -- cgit v1.2.3