summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMyriad-Dreamin <35292584+Myriad-Dreamin@users.noreply.github.com>2024-03-01 17:17:41 +0800
committerGitHub <noreply@github.com>2024-03-01 09:17:41 +0000
commit030041466b5b8453ca23e43a6385f4592f78a56c (patch)
tree7af9f2d34c349980881a2b9908a5ad8decce1616
parent57ab6d09248ba036e7feb32f8b9527ec643f826c (diff)
Provide more fine-grained spans in raw blocks (#3257)
Co-authored-by: Laurenz <laurmaedje@gmail.com>
-rw-r--r--crates/typst-syntax/src/ast.rs100
-rw-r--r--crates/typst-syntax/src/highlight.rs3
-rw-r--r--crates/typst-syntax/src/kind.rs9
-rw-r--r--crates/typst-syntax/src/lexer.rs134
-rw-r--r--crates/typst-syntax/src/parser.rs22
-rw-r--r--crates/typst-syntax/src/set.rs16
-rw-r--r--crates/typst/src/eval/markup.rs9
-rw-r--r--crates/typst/src/foundations/value.rs4
-rw-r--r--crates/typst/src/layout/inline/mod.rs2
-rw-r--r--crates/typst/src/layout/inline/shaping.rs9
-rw-r--r--crates/typst/src/text/mod.rs6
-rw-r--r--crates/typst/src/text/raw.rs140
-rw-r--r--tests/typ/compiler/raw.typ170
13 files changed, 500 insertions, 124 deletions
diff --git a/crates/typst-syntax/src/ast.rs b/crates/typst-syntax/src/ast.rs
index 8f8eaac4..fc689a68 100644
--- a/crates/typst-syntax/src/ast.rs
+++ b/crates/typst-syntax/src/ast.rs
@@ -8,9 +8,7 @@ use std::ops::Deref;
use ecow::EcoString;
use unscanny::Scanner;
-use crate::{
- is_id_continue, is_id_start, is_newline, split_newlines, Span, SyntaxKind, SyntaxNode,
-};
+use crate::{is_newline, Span, SyntaxKind, SyntaxNode};
/// A typed AST node.
pub trait AstNode<'a>: Sized {
@@ -558,87 +556,51 @@ node! {
}
impl<'a> Raw<'a> {
- /// The trimmed raw text.
- pub fn text(self) -> EcoString {
- let mut text = self.0.text().as_str();
- let blocky = text.starts_with("```");
- text = text.trim_matches('`');
-
- // Trim tag, one space at the start, and one space at the end if the
- // last non-whitespace char is a backtick.
- if blocky {
- let mut s = Scanner::new(text);
- if s.eat_if(is_id_start) {
- s.eat_while(is_id_continue);
- }
- text = s.after();
- text = text.strip_prefix(' ').unwrap_or(text);
- if text.trim_end().ends_with('`') {
- text = text.strip_suffix(' ').unwrap_or(text);
- }
- }
-
- // Split into lines.
- let mut lines = split_newlines(text);
-
- if blocky {
- let dedent = lines
- .iter()
- .skip(1)
- .filter(|line| !line.chars().all(char::is_whitespace))
- // The line with the closing ``` is always taken into account
- .chain(lines.last())
- .map(|line| line.chars().take_while(|c| c.is_whitespace()).count())
- .min()
- .unwrap_or(0);
-
- // Dedent based on column, but not for the first line.
- for line in lines.iter_mut().skip(1) {
- let offset = line.chars().take(dedent).map(char::len_utf8).sum();
- *line = &line[offset..];
- }
-
- let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace);
-
- // Trims a sequence of whitespace followed by a newline at the start.
- if lines.first().is_some_and(is_whitespace) {
- lines.remove(0);
- }
-
- // Trims a newline followed by a sequence of whitespace at the end.
- if lines.last().is_some_and(is_whitespace) {
- lines.pop();
- }
- }
-
- lines.join("\n").into()
+ /// The lines in the raw block.
+ pub fn lines(self) -> impl DoubleEndedIterator<Item = Text<'a>> {
+ self.0.children().filter_map(SyntaxNode::cast)
}
/// An optional identifier specifying the language to syntax-highlight in.
- pub fn lang(self) -> Option<&'a str> {
- let text = self.0.text();
-
+ pub fn lang(self) -> Option<RawLang<'a>> {
// Only blocky literals are supposed to contain a language.
- if !text.starts_with("```") {
+ let delim: RawDelim = self.0.cast_first_match()?;
+ if delim.0.len() < 3 {
return Option::None;
}
- let inner = text.trim_start_matches('`');
- let mut s = Scanner::new(inner);
- s.eat_if(is_id_start).then(|| {
- s.eat_while(is_id_continue);
- s.before()
- })
+ self.0.cast_first_match()
}
/// Whether the raw text should be displayed in a separate block.
pub fn block(self) -> bool {
- let text = self.0.text();
- text.starts_with("```") && text.chars().any(is_newline)
+ self.0
+ .cast_first_match()
+ .is_some_and(|delim: RawDelim| delim.0.len() >= 3)
+ && self.0.children().any(|e| {
+ e.kind() == SyntaxKind::RawTrimmed && e.text().chars().any(is_newline)
+ })
}
}
node! {
+ /// A language tag at the start of raw element: ``typ ``.
+ RawLang
+}
+
+impl<'a> RawLang<'a> {
+ /// Get the language tag.
+ pub fn get(self) -> &'a EcoString {
+ self.0.text()
+ }
+}
+
+node! {
+ /// A raw delimiter in single or 3+ backticks: `` ` ``.
+ RawDelim
+}
+
+node! {
/// A hyperlink: `https://typst.org`.
Link
}
diff --git a/crates/typst-syntax/src/highlight.rs b/crates/typst-syntax/src/highlight.rs
index 19d35d0a..f1c8a298 100644
--- a/crates/typst-syntax/src/highlight.rs
+++ b/crates/typst-syntax/src/highlight.rs
@@ -153,6 +153,9 @@ pub fn highlight(node: &LinkedNode) -> Option<Tag> {
SyntaxKind::Strong => Some(Tag::Strong),
SyntaxKind::Emph => Some(Tag::Emph),
SyntaxKind::Raw => Some(Tag::Raw),
+ SyntaxKind::RawLang => None,
+ SyntaxKind::RawTrimmed => None,
+ SyntaxKind::RawDelim => None,
SyntaxKind::Link => Some(Tag::Link),
SyntaxKind::Label => Some(Tag::Label),
SyntaxKind::Ref => Some(Tag::Ref),
diff --git a/crates/typst-syntax/src/kind.rs b/crates/typst-syntax/src/kind.rs
index e5dd4e9b..c34f6002 100644
--- a/crates/typst-syntax/src/kind.rs
+++ b/crates/typst-syntax/src/kind.rs
@@ -28,6 +28,12 @@ pub enum SyntaxKind {
Emph,
/// Raw text with optional syntax highlighting: `` `...` ``.
Raw,
+ /// A language tag at the start of raw text: ``typ ``.
+ RawLang,
+ /// A raw delimiter consisting of 1 or 3+ backticks: `` ` ``.
+ RawDelim,
+ /// A sequence of whitespace to ignore in a raw block: ` `.
+ RawTrimmed,
/// A hyperlink: `https://typst.org`.
Link,
/// A label: `<intro>`.
@@ -369,6 +375,9 @@ impl SyntaxKind {
Self::Strong => "strong content",
Self::Emph => "emphasized content",
Self::Raw => "raw block",
+ Self::RawLang => "raw language tag",
+ Self::RawTrimmed => "raw trimmed",
+ Self::RawDelim => "raw delimiter",
Self::Link => "link",
Self::Label => "label",
Self::Ref => "reference",
diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs
index 300a8353..aacbee62 100644
--- a/crates/typst-syntax/src/lexer.rs
+++ b/crates/typst-syntax/src/lexer.rs
@@ -16,6 +16,8 @@ pub(super) struct Lexer<'s> {
mode: LexMode,
/// Whether the last token contained a newline.
newline: bool,
+ /// The state held by raw line lexing.
+ raw: Vec<(SyntaxKind, usize)>,
/// An error for the last token.
error: Option<EcoString>,
}
@@ -29,6 +31,8 @@ pub(super) enum LexMode {
Math,
/// Keywords, literals and operators.
Code,
+ /// The contents of a raw block.
+ Raw,
}
impl<'s> Lexer<'s> {
@@ -40,6 +44,7 @@ impl<'s> Lexer<'s> {
mode,
newline: false,
error: None,
+ raw: Vec::new(),
}
}
@@ -86,6 +91,14 @@ impl Lexer<'_> {
/// Shared.
impl Lexer<'_> {
pub fn next(&mut self) -> SyntaxKind {
+ if self.mode == LexMode::Raw {
+ let Some((kind, end)) = self.raw.pop() else {
+ return SyntaxKind::Eof;
+ };
+ self.s.jump(end);
+ return kind;
+ }
+
self.newline = false;
self.error = None;
let start = self.s.cursor();
@@ -101,6 +114,7 @@ impl Lexer<'_> {
LexMode::Markup => self.markup(start, c),
LexMode::Math => self.math(start, c),
LexMode::Code => self.code(start, c),
+ LexMode::Raw => unreachable!(),
},
None => SyntaxKind::Eof,
@@ -224,15 +238,23 @@ impl Lexer<'_> {
}
fn raw(&mut self) -> SyntaxKind {
+ let start = self.s.cursor() - 1;
+ self.raw.clear();
+
+ // Determine number of opening backticks.
let mut backticks = 1;
while self.s.eat_if('`') {
backticks += 1;
}
+ // Special case for ``.
if backticks == 2 {
- return SyntaxKind::Raw;
+ self.push_raw(SyntaxKind::RawDelim);
+ self.s.jump(start + 1);
+ return SyntaxKind::RawDelim;
}
+ // Find end of raw text.
let mut found = 0;
while found < backticks {
match self.s.eat() {
@@ -246,12 +268,99 @@ impl Lexer<'_> {
return self.error("unclosed raw text");
}
- SyntaxKind::Raw
+ let end = self.s.cursor();
+ if backticks >= 3 {
+ self.blocky_raw(start, end, backticks);
+ } else {
+ // Single backtick needs no trimming or extra fancyness.
+ self.s.jump(end - backticks);
+ self.push_raw(SyntaxKind::Text);
+ self.s.jump(end);
+ }
+
+ // Closing delimiter.
+ self.push_raw(SyntaxKind::RawDelim);
+
+ // The saved tokens will be removed in reverse.
+ self.raw.reverse();
+
+ // Opening delimiter.
+ self.s.jump(start + backticks);
+ SyntaxKind::RawDelim
+ }
+
+ fn blocky_raw(&mut self, start: usize, end: usize, backticks: usize) {
+ // Language tag.
+ self.s.jump(start + backticks);
+ if self.s.eat_if(is_id_start) {
+ self.s.eat_while(is_id_continue);
+ self.push_raw(SyntaxKind::RawLang);
+ }
+
+ // Determine inner content between backticks and with trimmed
+ // single spaces (line trimming comes later).
+ self.s.eat_if(' ');
+ let mut inner = self.s.to(end - backticks);
+ if inner.trim_end().ends_with('`') {
+ inner = inner.strip_suffix(' ').unwrap_or(inner);
+ }
+
+ // Determine dedent level.
+ let lines = split_newlines(inner);
+ let dedent = lines
+ .iter()
+ .skip(1)
+ .filter(|line| !line.chars().all(char::is_whitespace))
+ // The line with the closing ``` is always taken into account
+ .chain(lines.last())
+ .map(|line| line.chars().take_while(|c| c.is_whitespace()).count())
+ .min()
+ .unwrap_or(0);
+
+ let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace);
+ let starts_whitespace = lines.first().is_some_and(is_whitespace);
+ let ends_whitespace = lines.last().is_some_and(is_whitespace);
+
+ let mut lines = lines.into_iter();
+ let mut skipped = false;
+
+ // Trim whitespace + newline at start.
+ if starts_whitespace {
+ self.s.advance(lines.next().unwrap().len());
+ skipped = true;
+ }
+ // Trim whitespace + newline at end.
+ if ends_whitespace {
+ lines.next_back();
+ }
+
+ // Add lines.
+ for (i, line) in lines.enumerate() {
+ let dedent = if i == 0 && !skipped { 0 } else { dedent };
+ let offset: usize = line.chars().take(dedent).map(char::len_utf8).sum();
+ self.s.eat_newline();
+ self.s.advance(offset);
+ self.push_raw(SyntaxKind::RawTrimmed);
+ self.s.advance(line.len() - offset);
+ self.push_raw(SyntaxKind::Text);
+ }
+
+ // Add final trimmed.
+ if self.s.cursor() < end - backticks {
+ self.s.jump(end - backticks);
+ self.push_raw(SyntaxKind::RawTrimmed);
+ }
+ self.s.jump(end);
+ }
+
+ fn push_raw(&mut self, kind: SyntaxKind) {
+ let end = self.s.cursor();
+ self.raw.push((kind, end));
}
fn link(&mut self) -> SyntaxKind {
let (link, balanced) = link_prefix(self.s.after());
- self.s.jump(self.s.cursor() + link.len());
+ self.s.advance(link.len());
if !balanced {
return self.error(
@@ -632,6 +741,25 @@ fn keyword(ident: &str) -> Option<SyntaxKind> {
})
}
+trait ScannerExt {
+ fn advance(&mut self, by: usize);
+ fn eat_newline(&mut self) -> bool;
+}
+
+impl ScannerExt for Scanner<'_> {
+ fn advance(&mut self, by: usize) {
+ self.jump(self.cursor() + by);
+ }
+
+ fn eat_newline(&mut self) -> bool {
+ let ate = self.eat_if(is_newline);
+ if ate && self.before().ends_with('\r') {
+ self.eat_if('\n');
+ }
+ ate
+ }
+}
+
/// Whether a character will become a Space token in Typst
#[inline]
fn is_space(character: char, mode: LexMode) -> bool {
diff --git a/crates/typst-syntax/src/parser.rs b/crates/typst-syntax/src/parser.rs
index f4bb19e1..4785b8a1 100644
--- a/crates/typst-syntax/src/parser.rs
+++ b/crates/typst-syntax/src/parser.rs
@@ -116,13 +116,13 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
| SyntaxKind::Escape
| SyntaxKind::Shorthand
| SyntaxKind::SmartQuote
- | SyntaxKind::Raw
| SyntaxKind::Link
| SyntaxKind::Label => p.eat(),
SyntaxKind::Hash => embedded_code_expr(p),
SyntaxKind::Star => strong(p),
SyntaxKind::Underscore => emph(p),
+ SyntaxKind::RawDelim => raw(p),
SyntaxKind::HeadingMarker if *at_start => heading(p),
SyntaxKind::ListMarker if *at_start => list_item(p),
SyntaxKind::EnumMarker if *at_start => enum_item(p),
@@ -172,6 +172,22 @@ fn emph(p: &mut Parser) {
p.wrap(m, SyntaxKind::Emph);
}
+/// Parses raw text with optional syntax highlighting: `` `...` ``.
+fn raw(p: &mut Parser) {
+ let m = p.marker();
+ p.enter(LexMode::Raw);
+ p.assert(SyntaxKind::RawDelim);
+
+ // Eats until the closing delimiter.
+ while !p.eof() && !p.at(SyntaxKind::RawDelim) {
+ p.eat();
+ }
+
+ p.expect(SyntaxKind::RawDelim);
+ p.exit();
+ p.wrap(m, SyntaxKind::Raw);
+}
+
/// Parses a section heading: `= Introduction`.
fn heading(p: &mut Parser) {
const END: SyntaxSet = SyntaxSet::new()
@@ -747,6 +763,7 @@ fn code_primary(p: &mut Parser, atomic: bool) {
SyntaxKind::LeftBrace => code_block(p),
SyntaxKind::LeftBracket => content_block(p),
SyntaxKind::LeftParen => expr_with_paren(p, atomic),
+ SyntaxKind::RawDelim => raw(p),
SyntaxKind::Dollar => equation(p),
SyntaxKind::Let => let_binding(p),
SyntaxKind::Set => set_rule(p),
@@ -768,8 +785,7 @@ fn code_primary(p: &mut Parser, atomic: bool) {
| SyntaxKind::Bool
| SyntaxKind::Numeric
| SyntaxKind::Str
- | SyntaxKind::Label
- | SyntaxKind::Raw => p.eat(),
+ | SyntaxKind::Label => p.eat(),
_ => p.expected("expression"),
}
diff --git a/crates/typst-syntax/src/set.rs b/crates/typst-syntax/src/set.rs
index 906d5fac..39e64651 100644
--- a/crates/typst-syntax/src/set.rs
+++ b/crates/typst-syntax/src/set.rs
@@ -15,7 +15,10 @@ impl SyntaxSet {
}
/// Insert a syntax kind into the set.
+ ///
+ /// You can only add kinds with discriminator < 128.
pub const fn add(self, kind: SyntaxKind) -> Self {
+ assert!((kind as u8) < BITS);
Self(self.0 | bit(kind))
}
@@ -26,10 +29,12 @@ impl SyntaxSet {
/// Whether the set contains the given syntax kind.
pub const fn contains(&self, kind: SyntaxKind) -> bool {
- (self.0 & bit(kind)) != 0
+ (kind as u8) < BITS && (self.0 & bit(kind)) != 0
}
}
+const BITS: u8 = 128;
+
const fn bit(kind: SyntaxKind) -> u128 {
1 << (kind as usize)
}
@@ -54,7 +59,7 @@ pub const MARKUP_EXPR: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::Escape)
.add(SyntaxKind::Shorthand)
.add(SyntaxKind::SmartQuote)
- .add(SyntaxKind::Raw)
+ .add(SyntaxKind::RawDelim)
.add(SyntaxKind::Link)
.add(SyntaxKind::Label)
.add(SyntaxKind::Hash)
@@ -119,7 +124,7 @@ pub const ATOMIC_CODE_PRIMARY: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::Numeric)
.add(SyntaxKind::Str)
.add(SyntaxKind::Label)
- .add(SyntaxKind::Raw);
+ .add(SyntaxKind::RawDelim);
/// Syntax kinds that are unary operators.
pub const UNARY_OP: SyntaxSet = SyntaxSet::new()
@@ -172,11 +177,6 @@ mod tests {
use super::*;
#[test]
- fn test_size() {
- assert!((SyntaxKind::Eof as usize) < 128);
- }
-
- #[test]
fn test_set() {
let set = SyntaxSet::new().add(SyntaxKind::And).add(SyntaxKind::Or);
assert!(set.contains(SyntaxKind::And));
diff --git a/crates/typst/src/eval/markup.rs b/crates/typst/src/eval/markup.rs
index 1bb12d49..d43e4495 100644
--- a/crates/typst/src/eval/markup.rs
+++ b/crates/typst/src/eval/markup.rs
@@ -8,7 +8,9 @@ use crate::model::{
};
use crate::symbols::Symbol;
use crate::syntax::ast::{self, AstNode};
-use crate::text::{LinebreakElem, RawElem, SmartQuoteElem, SpaceElem, TextElem};
+use crate::text::{
+ LinebreakElem, RawContent, RawElem, SmartQuoteElem, SpaceElem, TextElem,
+};
impl Eval for ast::Markup<'_> {
type Output = Content;
@@ -165,9 +167,10 @@ impl Eval for ast::Raw<'_> {
type Output = Content;
fn eval(self, _: &mut Vm) -> SourceResult<Self::Output> {
- let mut elem = RawElem::new(self.text()).with_block(self.block());
+ let lines = self.lines().map(|line| (line.get().clone(), line.span())).collect();
+ let mut elem = RawElem::new(RawContent::Lines(lines)).with_block(self.block());
if let Some(lang) = self.lang() {
- elem.push_lang(Some(lang.into()));
+ elem.push_lang(Some(lang.get().clone()));
}
Ok(elem.pack())
}
diff --git a/crates/typst/src/foundations/value.rs b/crates/typst/src/foundations/value.rs
index b5f143d2..f661228a 100644
--- a/crates/typst/src/foundations/value.rs
+++ b/crates/typst/src/foundations/value.rs
@@ -19,7 +19,7 @@ use crate::foundations::{
use crate::layout::{Abs, Angle, Em, Fr, Length, Ratio, Rel};
use crate::symbols::Symbol;
use crate::syntax::{ast, Span};
-use crate::text::{RawElem, TextElem};
+use crate::text::{RawContent, RawElem, TextElem};
use crate::util::ArcExt;
use crate::visualize::{Color, Gradient, Pattern};
@@ -209,7 +209,7 @@ impl Value {
Self::Symbol(v) => TextElem::packed(v.get()),
Self::Content(v) => v,
Self::Module(module) => module.content(),
- _ => RawElem::new(self.repr())
+ _ => RawElem::new(RawContent::Text(self.repr()))
.with_lang(Some("typc".into()))
.with_block(false)
.pack(),
diff --git a/crates/typst/src/layout/inline/mod.rs b/crates/typst/src/layout/inline/mod.rs
index 2802bbcb..6add4310 100644
--- a/crates/typst/src/layout/inline/mod.rs
+++ b/crates/typst/src/layout/inline/mod.rs
@@ -287,7 +287,7 @@ impl SpanMapper {
fn span_at(&self, offset: usize) -> (Span, u16) {
let mut cursor = 0;
for &(len, span) in &self.0 {
- if (cursor..=cursor + len).contains(&offset) {
+ if (cursor..cursor + len).contains(&offset) {
return (span, u16::try_from(offset - cursor).unwrap_or(0));
}
cursor += len;
diff --git a/crates/typst/src/layout/inline/shaping.rs b/crates/typst/src/layout/inline/shaping.rs
index b558d5ad..f914d347 100644
--- a/crates/typst/src/layout/inline/shaping.rs
+++ b/crates/typst/src/layout/inline/shaping.rs
@@ -231,6 +231,7 @@ impl<'a> ShapedText<'a> {
let decos = TextElem::deco_in(self.styles);
let fill = TextElem::fill_in(self.styles);
let stroke = TextElem::stroke_in(self.styles);
+ let span_offset = TextElem::span_offset_in(self.styles);
for ((font, y_offset), group) in
self.glyphs.as_ref().group_by_key(|g| (g.font.clone(), g.y_offset))
@@ -267,6 +268,12 @@ impl<'a> ShapedText<'a> {
frame.size_mut().x += justification_left.at(self.size)
+ justification_right.at(self.size);
+ // We may not be able to reach the offset completely if
+ // it exceeds u16, but better to have a roughly correct
+ // span offset than nothing.
+ let mut span = shaped.span;
+ span.1 = span.1.saturating_add(span_offset.saturating_as());
+
// |<---- a Glyph ---->|
// -->|ShapedGlyph|<--
// +---+-----------+---+
@@ -293,7 +300,7 @@ impl<'a> ShapedText<'a> {
x_offset: shaped.x_offset + justification_left,
range: (shaped.range.start - range.start).saturating_as()
..(shaped.range.end - range.start).saturating_as(),
- span: shaped.span,
+ span,
}
})
.collect();
diff --git a/crates/typst/src/text/mod.rs b/crates/typst/src/text/mod.rs
index 13049b12..13193fe8 100644
--- a/crates/typst/src/text/mod.rs
+++ b/crates/typst/src/text/mod.rs
@@ -622,6 +622,12 @@ pub struct TextElem {
#[required]
pub text: EcoString,
+ /// The offset of the text in the text syntax node referenced by this
+ /// element's span.
+ #[internal]
+ #[ghost]
+ pub span_offset: usize,
+
/// A delta to apply on the font weight.
#[internal]
#[fold]
diff --git a/crates/typst/src/text/raw.rs b/crates/typst/src/text/raw.rs
index c71b16b1..d47cd947 100644
--- a/crates/typst/src/text/raw.rs
+++ b/crates/typst/src/text/raw.rs
@@ -17,7 +17,7 @@ use crate::foundations::{
};
use crate::layout::{BlockElem, Em, HAlignment};
use crate::model::Figurable;
-use crate::syntax::{split_newlines, LinkedNode, Spanned};
+use crate::syntax::{split_newlines, LinkedNode, Span, Spanned};
use crate::text::{
FontFamily, FontList, Hyphenate, Lang, LinebreakElem, LocalName, Region,
SmartQuoteElem, TextElem, TextSize,
@@ -27,8 +27,9 @@ use crate::visualize::Color;
use crate::{syntax, World};
// Shorthand for highlighter closures.
-type StyleFn<'a> = &'a mut dyn FnMut(&LinkedNode, Range<usize>, synt::Style) -> Content;
-type LineFn<'a> = &'a mut dyn FnMut(i64, Range<usize>, &mut Vec<Content>);
+type StyleFn<'a> =
+ &'a mut dyn FnMut(usize, &LinkedNode, Range<usize>, synt::Style) -> Content;
+type LineFn<'a> = &'a mut dyn FnMut(usize, Range<usize>, &mut Vec<Content>);
/// Raw text with optional syntax highlighting.
///
@@ -101,7 +102,7 @@ pub struct RawElem {
/// ```
/// ````
#[required]
- pub text: EcoString,
+ pub text: RawContent,
/// Whether the raw text is displayed as a separate block.
///
@@ -300,17 +301,24 @@ impl Packed<RawElem> {
#[comemo::memoize]
fn highlight(&self, styles: StyleChain) -> Vec<Packed<RawLine>> {
let elem = self.as_ref();
- let span = self.span();
- let mut text = elem.text().clone();
- if text.contains('\t') {
- let tab_size = RawElem::tab_size_in(styles);
- text = align_tabs(&text, tab_size);
- }
+ let text = elem.text();
+ let lines = match text {
+ RawContent::Lines(lines) if !lines.iter().any(|(s, _)| s.contains('\t')) => {
+ lines.clone()
+ }
+ _ => {
+ let mut text = text.get();
+ if text.contains('\t') {
+ let tab_size = RawElem::tab_size_in(styles);
+ text = align_tabs(&text, tab_size);
+ }
+ let lines = split_newlines(&text);
+ lines.into_iter().map(|line| (line.into(), self.span())).collect()
+ }
+ };
- let lines = split_newlines(&text);
let count = lines.len() as i64;
-
let lang = elem
.lang(styles)
.as_ref()
@@ -332,6 +340,7 @@ impl Packed<RawElem> {
let mut seq = vec![];
if matches!(lang.as_deref(), Some("typ" | "typst" | "typc")) {
+ let text = text.get();
let root = match lang.as_deref() {
Some("typc") => syntax::parse_code(&text),
_ => syntax::parse(&text),
@@ -341,16 +350,23 @@ impl Packed<RawElem> {
&text,
LinkedNode::new(&root),
synt::Highlighter::new(theme),
- &mut |_, range, style| styled(&text[range], foreground, style),
+ &mut |i, _, range, style| {
+ // Find start of line.
+ // Note: Dedent is already applied to the text
+ let span_offset = text[..range.start]
+ .rfind('\n')
+ .map_or(0, |i| range.start - (i + 1));
+ styled(&text[range], foreground, style, lines[i].1, span_offset)
+ },
&mut |i, range, line| {
seq.push(
Packed::new(RawLine::new(
- i + 1,
+ (i + 1) as i64,
count,
EcoString::from(&text[range]),
Content::sequence(line.drain(..)),
))
- .spanned(span),
+ .spanned(lines[i].1),
);
},
)
@@ -366,33 +382,43 @@ impl Packed<RawElem> {
})
}) {
let mut highlighter = syntect::easy::HighlightLines::new(syntax, theme);
- for (i, line) in lines.into_iter().enumerate() {
+ for (i, (line, line_span)) in lines.into_iter().enumerate() {
let mut line_content = vec![];
- for (style, piece) in
- highlighter.highlight_line(line, syntax_set).into_iter().flatten()
+ let mut span_offset = 0;
+ for (style, piece) in highlighter
+ .highlight_line(line.as_str(), syntax_set)
+ .into_iter()
+ .flatten()
{
- line_content.push(styled(piece, foreground, style));
+ line_content.push(styled(
+ piece,
+ foreground,
+ style,
+ line_span,
+ span_offset,
+ ));
+ span_offset += piece.len();
}
seq.push(
Packed::new(RawLine::new(
i as i64 + 1,
count,
- EcoString::from(line),
+ line,
Content::sequence(line_content),
))
- .spanned(span),
+ .spanned(line_span),
);
}
} else {
- seq.extend(lines.into_iter().enumerate().map(|(i, line)| {
+ seq.extend(lines.into_iter().enumerate().map(|(i, (line, line_span))| {
Packed::new(RawLine::new(
i as i64 + 1,
count,
- EcoString::from(line),
- TextElem::packed(line),
+ line.clone(),
+ TextElem::packed(line).spanned(line_span),
))
- .spanned(span)
+ .spanned(line_span)
}));
};
@@ -478,10 +504,42 @@ impl Figurable for Packed<RawElem> {}
impl PlainText for Packed<RawElem> {
fn plain_text(&self, text: &mut EcoString) {
- text.push_str(self.text());
+ text.push_str(&self.text().get());
+ }
+}
+
+/// The content of the raw text.
+#[derive(Debug, Clone, Hash, PartialEq)]
+pub enum RawContent {
+ /// From a string.
+ Text(EcoString),
+ /// From lines of text.
+ Lines(EcoVec<(EcoString, Span)>),
+}
+
+impl RawContent {
+ /// Returns or synthesizes the text content of the raw text.
+ fn get(&self) -> EcoString {
+ match self.clone() {
+ RawContent::Text(text) => text,
+ RawContent::Lines(lines) => {
+ let mut lines = lines.into_iter().map(|(s, _)| s);
+ if lines.len() <= 1 {
+ lines.next().unwrap_or_default()
+ } else {
+ lines.collect::<Vec<_>>().join("\n").into()
+ }
+ }
+ }
}
}
+cast! {
+ RawContent,
+ self => self.get().into_value(),
+ v: EcoString => Self::Text(v),
+}
+
/// A highlighted line of raw text.
///
/// This is a helper element that is synthesized by [`raw`]($raw) elements.
@@ -536,7 +594,7 @@ struct ThemedHighlighter<'a> {
/// The range of the current line.
range: Range<usize>,
/// The current line number.
- line: i64,
+ line: usize,
/// The function to style a piece of text.
style_fn: StyleFn<'a>,
/// The function to append a line.
@@ -597,8 +655,12 @@ impl<'a> ThemedHighlighter<'a> {
let offset = self.node.range().start + len;
let token_range = offset..(offset + line.len());
- self.current_line
- .push((self.style_fn)(&self.node, token_range, style));
+ self.current_line.push((self.style_fn)(
+ self.line,
+ &self.node,
+ token_range,
+ style,
+ ));
len += line.len() + 1;
}
@@ -621,23 +683,33 @@ impl<'a> ThemedHighlighter<'a> {
}
/// Style a piece of text with a syntect style.
-fn styled(piece: &str, foreground: synt::Color, style: synt::Style) -> Content {
- let mut body = TextElem::packed(piece);
+fn styled(
+ piece: &str,
+ foreground: synt::Color,
+ style: synt::Style,
+ span: Span,
+ span_offset: usize,
+) -> Content {
+ let mut body = TextElem::packed(piece).spanned(span);
+
+ if span_offset > 0 {
+ body = body.styled(TextElem::set_span_offset(span_offset));
+ }
if style.foreground != foreground {
body = body.styled(TextElem::set_fill(to_typst(style.foreground).into()));
}
if style.font_style.contains(synt::FontStyle::BOLD) {
- body = body.strong();
+ body = body.strong().spanned(span);
}
if style.font_style.contains(synt::FontStyle::ITALIC) {
- body = body.emph();
+ body = body.emph().spanned(span);
}
if style.font_style.contains(synt::FontStyle::UNDERLINE) {
- body = body.underlined();
+ body = body.underlined().spanned(span);
}
body
diff --git a/tests/typ/compiler/raw.typ b/tests/typ/compiler/raw.typ
new file mode 100644
index 00000000..3084146d
--- /dev/null
+++ b/tests/typ/compiler/raw.typ
@@ -0,0 +1,170 @@
+// Test new raw parser
+// Ref: false
+
+---
+#let empty = (
+ name: "empty",
+ input: ``,
+ text: "",
+)
+
+#let backtick = (
+ name: "backtick",
+ input: ``` ` ```,
+ text: "`",
+ block: false,
+)
+
+#let lang-backtick = (
+ name: "lang-backtick",
+ input: ```js ` ```,
+ lang: "js",
+ text: "`",
+ block: false,
+)
+
+// The language tag stops on space
+#let lang-space = (
+ name: "lang-space",
+ input: ```js test ```,
+ lang: "js",
+ text: "test ",
+ block: false,
+)
+
+// The language tag stops on newline
+#let lang-newline = (
+ name: "lang-newline",
+ input: ```js
+test
+```,
+ lang: "js",
+ text: "test",
+ block: true,
+)
+
+// The first line and the last line are ignored
+#let blocky = (
+ name: "blocky",
+ input: {
+```
+test
+```
+},
+ text: "test",
+ block: true,
+)
+
+// A blocky raw should handle dedents
+#let blocky-dedent = (
+ name: "blocky-dedent",
+ input: {
+```
+ test
+ ```
+ },
+ text: "test",
+ block: true,
+)
+
+// When there is content in the first line, it should exactly eat a whitespace char.
+#let blocky-dedent-firstline = (
+ name: "blocky-dedent-firstline",
+ input: ``` test
+ ```,
+ text: "test",
+ block: true,
+)
+
+// When there is content in the first line, it should exactly eat a whitespace char.
+#let blocky-dedent-firstline2 = (
+ name: "blocky-dedent-firstline2",
+ input: ``` test
+```,
+ text: "test",
+ block: true,
+)
+
+// The first line is not affected by dedent, and the middle lines don't consider the whitespace prefix of the first line.
+#let blocky-dedent-firstline3 = (
+ name: "blocky-dedent-firstline3",
+ input: ``` test
+ test2
+ ```,
+ text: "test\n test2",
+ block: true,
+)
+
+// The first line is not affected by dedent, and the middle lines don't consider the whitespace prefix of the first line.
+#let blocky-dedent-firstline4 = (
+ name: "blocky-dedent-firstline4",
+ input: ``` test
+ test2
+ ```,
+ text: " test\ntest2",
+ block: true,
+)
+
+#let blocky-dedent-lastline = (
+ name: "blocky-dedent-lastline",
+ input: ```
+ test
+ ```,
+ text: " test",
+ block: true,
+)
+
+#let blocky-dedent-lastline2 = (
+ name: "blocky-dedent-lastline2",
+ input: ```
+ test
+ ```,
+ text: "test",
+ block: true,
+)
+
+#let blocky-tab = (
+ name: "blocky-tab",
+ input: {
+```
+ test
+```
+},
+ text: "\ttest",
+ block: true,
+)
+
+#let blocky-tab-dedent = (
+ name: "blocky-tab-dedent",
+ input: {
+```
+ test
+
+ ```
+},
+ text: "test\n ",
+ block: true,
+)
+
+#let cases = (
+ empty,
+ backtick,
+ lang-backtick,
+ lang-space,
+ lang-newline,
+ blocky,
+ blocky-dedent,
+ blocky-dedent-firstline,
+ blocky-dedent-firstline2,
+ blocky-dedent-firstline3,
+ blocky-dedent-lastline,
+ blocky-dedent-lastline2,
+ blocky-tab,
+ blocky-tab-dedent,
+)
+
+#for c in cases {
+ assert.eq(c.text, c.input.text, message: "in point " + c.name + ", expect " + repr(c.text) + ", got " + repr(c.input.text) + "")
+ let block = c.at("block", default: false)
+ assert.eq(block, c.input.block, message: "in point " + c.name + ", expect " + repr(block) + ", got " + repr(c.input.block) + "")
+}