summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Haug <mhaug@live.de>2021-11-01 13:03:18 +0100
committerMartin Haug <mhaug@live.de>2021-11-05 13:44:50 +0100
commit49fb3cd4e2a5d6997ad4046d3514f154d8c866dd (patch)
tree4fb2a245a4cb84a6ef238ac1bc71786a0996913d
parent7d34a548ccd14debe0668e23454e1ced70e485ec (diff)
Code Review: Life is Like a Box of Iterators
-rw-r--r--benches/oneshot.rs8
-rw-r--r--src/parse/mod.rs3
-rw-r--r--src/parse/parser.rs24
-rw-r--r--src/parse/resolve.rs8
-rw-r--r--src/parse/scanner.rs10
-rw-r--r--src/parse/tokens.rs30
-rw-r--r--src/source.rs20
-rw-r--r--src/syntax/ast.rs33
-rw-r--r--src/syntax/mod.rs156
9 files changed, 150 insertions, 142 deletions
diff --git a/benches/oneshot.rs b/benches/oneshot.rs
index a42a710d..63f201ac 100644
--- a/benches/oneshot.rs
+++ b/benches/oneshot.rs
@@ -6,7 +6,7 @@ use typst::eval::eval;
use typst::layout::layout;
use typst::loading::MemLoader;
use typst::parse::{parse, Scanner, TokenMode, Tokens};
-use typst::source::{SourceFile, SourceId};
+use typst::source::SourceId;
use typst::Context;
const SRC: &str = include_str!("bench.typ");
@@ -44,13 +44,11 @@ fn bench_scan(iai: &mut Iai) {
}
fn bench_tokenize(iai: &mut Iai) {
- let src = SourceFile::detached(SRC);
- iai.run(|| Tokens::new(black_box(&src), black_box(TokenMode::Markup)).count());
+ iai.run(|| Tokens::new(black_box(&SRC), black_box(TokenMode::Markup)).count());
}
fn bench_parse(iai: &mut Iai) {
- let src = SourceFile::detached(SRC);
- iai.run(|| parse(&src));
+ iai.run(|| parse(&SRC));
}
fn bench_eval(iai: &mut Iai) {
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 22288d01..c6def4dc 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -12,12 +12,11 @@ pub use tokens::*;
use std::rc::Rc;
-use crate::source::SourceFile;
use crate::syntax::*;
use crate::util::EcoString;
/// Parse a source file.
-pub fn parse(source: &SourceFile) -> Rc<GreenNode> {
+pub fn parse(source: &str) -> Rc<GreenNode> {
let mut p = Parser::new(source);
markup(&mut p);
p.finish()
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 240de43d..374e7c09 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -1,15 +1,14 @@
use std::ops::Range;
use std::rc::Rc;
-use super::{TokenMode, Tokens};
-use crate::source::{SourceFile, SourceId};
+use super::{is_newline, TokenMode, Tokens};
use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind};
use crate::util::EcoString;
/// A convenient token-based parser.
pub struct Parser<'s> {
/// The parsed file.
- source: &'s SourceFile,
+ src: &'s str,
/// An iterator over the source tokens.
tokens: Tokens<'s>,
/// The stack of open groups.
@@ -61,11 +60,11 @@ pub enum Group {
impl<'s> Parser<'s> {
/// Create a new parser for the source string.
- pub fn new(source: &'s SourceFile) -> Self {
- let mut tokens = Tokens::new(source, TokenMode::Markup);
+ pub fn new(src: &'s str) -> Self {
+ let mut tokens = Tokens::new(src, TokenMode::Markup);
let next = tokens.next();
Self {
- source,
+ src,
tokens,
groups: vec![],
next: next.clone(),
@@ -78,11 +77,6 @@ impl<'s> Parser<'s> {
}
}
- /// The id of the parsed source file.
- pub fn id(&self) -> SourceId {
- self.source.id()
- }
-
/// Start a nested node.
///
/// Each start call has to be matched with a call to `end`,
@@ -366,12 +360,16 @@ impl<'s> Parser<'s> {
/// Determine the column index for the given byte index.
pub fn column(&self, index: usize) -> usize {
- self.source.byte_to_column(index).unwrap()
+ self.src[.. index]
+ .chars()
+ .rev()
+ .take_while(|&c| !is_newline(c))
+ .count()
}
/// Slice out part of the source string.
pub fn get(&self, range: Range<usize>) -> &'s str {
- self.source.get(range).unwrap()
+ self.src.get(range).unwrap()
}
/// Continue parsing in a group.
diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs
index 8d4c04d4..3fab98a4 100644
--- a/src/parse/resolve.rs
+++ b/src/parse/resolve.rs
@@ -1,5 +1,5 @@
use super::{is_newline, Scanner};
-use crate::syntax::RawToken;
+use crate::syntax::RawData;
use crate::util::EcoString;
/// Resolve all escape sequences in a string.
@@ -46,18 +46,18 @@ pub fn resolve_hex(sequence: &str) -> Option<char> {
}
/// Resolve the language tag and trims the raw text.
-pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawToken {
+pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawData {
if backticks > 1 {
let (tag, inner) = split_at_lang_tag(text);
let (text, block) = trim_and_split_raw(column, inner);
- RawToken {
+ RawData {
lang: Some(tag.into()),
text: text.into(),
backticks,
block,
}
} else {
- RawToken {
+ RawData {
lang: None,
text: split_lines(text).join("\n").into(),
backticks,
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
index 8e3e4278..edf28e17 100644
--- a/src/parse/scanner.rs
+++ b/src/parse/scanner.rs
@@ -106,6 +106,16 @@ impl<'s> Scanner<'s> {
self.index
}
+ /// The column index of a given index in the source string.
+ #[inline]
+ pub fn column(&self, index: usize) -> usize {
+ self.src[.. index]
+ .chars()
+ .rev()
+ .take_while(|&c| !is_newline(c))
+ .count()
+ }
+
/// Jump to an index in the source string.
#[inline]
pub fn jump(&mut self, index: usize) {
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 1d2e32ec..ef2678d4 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -1,7 +1,6 @@
use super::{is_newline, resolve_raw, Scanner};
use crate::geom::{AngularUnit, LengthUnit};
use crate::parse::resolve::{resolve_hex, resolve_string};
-use crate::source::SourceFile;
use crate::syntax::*;
use crate::util::EcoString;
@@ -9,7 +8,6 @@ use std::rc::Rc;
/// An iterator over the tokens of a string of source code.
pub struct Tokens<'s> {
- source: &'s SourceFile,
s: Scanner<'s>,
mode: TokenMode,
}
@@ -26,12 +24,8 @@ pub enum TokenMode {
impl<'s> Tokens<'s> {
/// Create a new token iterator with the given mode.
#[inline]
- pub fn new(source: &'s SourceFile, mode: TokenMode) -> Self {
- Self {
- s: Scanner::new(source.src()),
- source,
- mode,
- }
+ pub fn new(source: &'s str, mode: TokenMode) -> Self {
+ Self { s: Scanner::new(source), mode }
}
/// Get the current token mode.
@@ -244,7 +238,7 @@ impl<'s> Tokens<'s> {
if self.s.eat_if('}') {
if let Some(character) = resolve_hex(&sequence) {
- NodeKind::UnicodeEscape(UnicodeEscapeToken {
+ NodeKind::UnicodeEscape(UnicodeEscapeData {
character,
})
} else {
@@ -314,7 +308,7 @@ impl<'s> Tokens<'s> {
}
fn raw(&mut self) -> NodeKind {
- let column = self.source.byte_to_column(self.s.index() - 1).unwrap();
+ let column = self.s.column(self.s.index() - 1);
let mut backticks = 1;
while self.s.eat_if('`') && backticks < u8::MAX {
backticks += 1;
@@ -322,7 +316,7 @@ impl<'s> Tokens<'s> {
// Special case for empty inline block.
if backticks == 2 {
- return NodeKind::Raw(Rc::new(RawToken {
+ return NodeKind::Raw(Rc::new(RawData {
text: EcoString::new(),
lang: None,
backticks: 1,
@@ -397,7 +391,7 @@ impl<'s> Tokens<'s> {
};
if terminated {
- NodeKind::Math(Rc::new(MathToken {
+ NodeKind::Math(Rc::new(MathData {
formula: self.s.get(start .. end).into(),
display,
}))
@@ -492,7 +486,7 @@ impl<'s> Tokens<'s> {
}
}));
if self.s.eat_if('"') {
- NodeKind::Str(StrToken { string })
+ NodeKind::Str(StrData { string })
} else {
NodeKind::Error(ErrorPosition::End, "expected quote".into())
}
@@ -567,7 +561,7 @@ mod tests {
use TokenMode::{Code, Markup};
fn UnicodeEscape(character: char) -> NodeKind {
- NodeKind::UnicodeEscape(UnicodeEscapeToken { character })
+ NodeKind::UnicodeEscape(UnicodeEscapeData { character })
}
fn Error(pos: ErrorPosition, message: &str) -> NodeKind {
@@ -575,7 +569,7 @@ mod tests {
}
fn Raw(text: &str, lang: Option<&str>, backticks_left: u8, block: bool) -> NodeKind {
- NodeKind::Raw(Rc::new(RawToken {
+ NodeKind::Raw(Rc::new(RawData {
text: text.into(),
lang: lang.map(Into::into),
backticks: backticks_left,
@@ -586,7 +580,7 @@ mod tests {
fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind {
match err_msg {
None => {
- NodeKind::Math(Rc::new(MathToken { formula: formula.into(), display }))
+ NodeKind::Math(Rc::new(MathData { formula: formula.into(), display }))
}
Some(msg) => NodeKind::Error(
ErrorPosition::End,
@@ -597,7 +591,7 @@ mod tests {
fn Str(string: &str, terminated: bool) -> NodeKind {
if terminated {
- NodeKind::Str(StrToken { string: string.into() })
+ NodeKind::Str(StrData { string: string.into() })
} else {
NodeKind::Error(ErrorPosition::End, "expected quote".into())
}
@@ -687,7 +681,7 @@ mod tests {
}};
(@$mode:ident: $src:expr => $($token:expr),*) => {{
let src = $src;
- let found = Tokens::new(&SourceFile::detached(src.clone()), $mode).collect::<Vec<_>>();
+ let found = Tokens::new(&src, $mode).collect::<Vec<_>>();
let expected = vec![$($token.clone()),*];
check(&src, found, expected);
}};
diff --git a/src/source.rs b/src/source.rs
index e3803f57..3b721251 100644
--- a/src/source.rs
+++ b/src/source.rs
@@ -8,10 +8,10 @@ use std::rc::Rc;
use serde::{Deserialize, Serialize};
-use crate::diag::{Error, TypResult};
+use crate::diag::TypResult;
use crate::loading::{FileHash, Loader};
use crate::parse::{is_newline, parse, Scanner};
-use crate::syntax::{GreenNode, Markup, NodeKind, RedNode};
+use crate::syntax::{GreenNode, Markup, RedNode};
use crate::util::PathExt;
#[cfg(feature = "codespan-reporting")]
@@ -134,28 +134,22 @@ impl SourceFile {
pub fn new(id: SourceId, path: &Path, src: String) -> Self {
let mut line_starts = vec![0];
line_starts.extend(newlines(&src));
- let mut init = Self {
+ Self {
id,
path: path.normalize(),
+ root: parse(&src),
src,
line_starts,
- root: Rc::new(GreenNode::new(NodeKind::Markup, 0)),
- };
-
- let root = parse(&init);
- init.root = root;
- init
+ }
}
pub fn ast(&self) -> TypResult<Markup> {
let red = RedNode::new_root(self.root.clone(), self.id);
let errors = red.errors();
if errors.is_empty() {
- Ok(red.as_ref().cast().unwrap())
+ Ok(red.cast().unwrap())
} else {
- Err(Box::new(
- errors.into_iter().map(|(span, msg)| Error::new(span, msg)).collect(),
- ))
+ Err(Box::new(errors))
}
}
diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs
index bdd0767d..6ca271a9 100644
--- a/src/syntax/ast.rs
+++ b/src/syntax/ast.rs
@@ -1,8 +1,39 @@
use super::{Ident, NodeKind, RedNode, RedRef, Span, TypedNode};
use crate::geom::{AngularUnit, LengthUnit};
-use crate::node;
use crate::util::EcoString;
+macro_rules! node {
+ ($(#[$attr:meta])* $name:ident) => {
+ node!{$(#[$attr])* $name => $name}
+ };
+ ($(#[$attr:meta])* $variant:ident => $name:ident) => {
+ #[derive(Debug, Clone, PartialEq)]
+ #[repr(transparent)]
+ $(#[$attr])*
+ pub struct $name(RedNode);
+
+ impl TypedNode for $name {
+ fn cast_from(node: RedRef) -> Option<Self> {
+ if node.kind() != &NodeKind::$variant {
+ return None;
+ }
+
+ Some(Self(node.own()))
+ }
+ }
+
+ impl $name {
+ pub fn span(&self) -> Span {
+ self.0.span()
+ }
+
+ pub fn underlying(&self) -> RedRef {
+ self.0.as_ref()
+ }
+ }
+ };
+}
+
node! {
/// The syntactical root capable of representing a full parsed document.
Markup
diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs
index 9fd2b21d..ca41d33f 100644
--- a/src/syntax/mod.rs
+++ b/src/syntax/mod.rs
@@ -15,6 +15,7 @@ pub use ident::*;
pub use pretty::*;
pub use span::*;
+use crate::diag::Error;
use crate::geom::{AngularUnit, LengthUnit};
use crate::source::SourceId;
use crate::util::EcoString;
@@ -94,9 +95,9 @@ impl GreenNode {
}
pub fn with_children(kind: NodeKind, len: usize, children: Vec<Green>) -> Self {
- let mut meta = GreenData::new(kind, len);
- meta.erroneous |= children.iter().any(|c| c.erroneous());
- Self { data: meta, children }
+ let mut data = GreenData::new(kind, len);
+ data.erroneous |= children.iter().any(|c| c.erroneous());
+ Self { data, children }
}
pub fn with_child(kind: NodeKind, len: usize, child: impl Into<Green>) -> Self {
@@ -180,6 +181,10 @@ impl<'a> RedRef<'a> {
Span::new(self.id, self.offset, self.offset + self.green.len())
}
+ pub fn len(&self) -> usize {
+ self.green.len()
+ }
+
pub fn cast<T>(self) -> Option<T>
where
T: TypedNode,
@@ -205,6 +210,29 @@ impl<'a> RedRef<'a> {
})
}
+ pub fn errors(&self) -> Vec<Error> {
+ if !self.green.erroneous() {
+ return vec![];
+ }
+
+ match self.kind() {
+ NodeKind::Error(pos, msg) => {
+ let span = match pos {
+ ErrorPosition::Start => self.span().at_start(),
+ ErrorPosition::Full => self.span(),
+ ErrorPosition::End => self.span().at_end(),
+ };
+
+ vec![Error::new(span, msg.to_string())]
+ }
+ _ => self
+ .children()
+ .filter(|red| red.green.erroneous())
+ .flat_map(|red| red.errors())
+ .collect(),
+ }
+ }
+
pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option<RedRef> {
self.children()
.find(|x| mem::discriminant(x.kind()) == mem::discriminant(kind))
@@ -219,6 +247,18 @@ impl<'a> RedRef<'a> {
}
}
+impl Debug for RedRef<'_> {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ write!(f, "{:?}: {:?}", self.kind(), self.span())?;
+ let mut children = self.children().peekable();
+ if children.peek().is_some() {
+ f.write_str(" ")?;
+ f.debug_list().entries(children.map(RedRef::own)).finish()?;
+ }
+ Ok(())
+ }
+}
+
#[derive(Clone, PartialEq)]
pub struct RedNode {
id: SourceId,
@@ -231,12 +271,27 @@ impl RedNode {
Self { id, offset: 0, green: root.into() }
}
+ pub fn as_ref<'a>(&'a self) -> RedRef<'a> {
+ RedRef {
+ id: self.id,
+ offset: self.offset,
+ green: &self.green,
+ }
+ }
+
pub fn span(&self) -> Span {
self.as_ref().span()
}
pub fn len(&self) -> usize {
- self.green.len()
+ self.as_ref().len()
+ }
+
+ pub fn cast<T>(self) -> Option<T>
+ where
+ T: TypedNode,
+ {
+ T::cast_from(self.as_ref())
}
pub fn kind(&self) -> &NodeKind {
@@ -247,36 +302,8 @@ impl RedNode {
self.as_ref().children()
}
- pub fn errors(&self) -> Vec<(Span, EcoString)> {
- if !self.green.erroneous() {
- return vec![];
- }
-
- match self.kind() {
- NodeKind::Error(pos, msg) => {
- let span = match pos {
- ErrorPosition::Start => self.span().at_start(),
- ErrorPosition::Full => self.span(),
- ErrorPosition::End => self.span().at_end(),
- };
-
- vec![(span, msg.clone())]
- }
- _ => self
- .as_ref()
- .children()
- .filter(|red| red.green.erroneous())
- .flat_map(|red| red.own().errors())
- .collect(),
- }
- }
-
- pub fn as_ref<'a>(&'a self) -> RedRef<'a> {
- RedRef {
- id: self.id,
- offset: self.offset,
- green: &self.green,
- }
+ pub fn errors<'a>(&'a self) -> Vec<Error> {
+ self.as_ref().errors()
}
pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option<RedNode> {
@@ -294,15 +321,7 @@ impl RedNode {
impl Debug for RedNode {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
- write!(f, "{:?}: {:?}", self.kind(), self.span())?;
- let children = self.as_ref().children().collect::<Vec<_>>();
- if !children.is_empty() {
- f.write_str(" ")?;
- f.debug_list()
- .entries(children.into_iter().map(RedRef::own))
- .finish()?;
- }
- Ok(())
+ self.as_ref().fmt(f)
}
}
@@ -419,7 +438,7 @@ pub enum NodeKind {
EmDash,
/// A slash and the letter "u" followed by a hexadecimal unicode entity
/// enclosed in curly braces: `\u{1F5FA}`.
- UnicodeEscape(UnicodeEscapeToken),
+ UnicodeEscape(UnicodeEscapeData),
/// Strong text was enabled / disabled: `*`.
Strong,
/// Emphasized text was enabled / disabled: `_`.
@@ -440,9 +459,9 @@ pub enum NodeKind {
ListBullet,
/// An arbitrary number of backticks followed by inner contents, terminated
/// with the same number of backticks: `` `...` ``.
- Raw(Rc<RawToken>),
+ Raw(Rc<RawData>),
/// Dollar signs surrounding inner contents.
- Math(Rc<MathToken>),
+ Math(Rc<MathData>),
/// An identifier: `center`.
Ident(EcoString),
/// A boolean: `true`, `false`.
@@ -463,7 +482,7 @@ pub enum NodeKind {
/// A fraction unit: `3fr`.
Fraction(f64),
/// A quoted string: `"..."`.
- Str(StrToken),
+ Str(StrData),
/// An array expression: `(1, "hi", 12cm)`.
Array,
/// A dictionary expression: `(thickness: 3pt, pattern: dashed)`.
@@ -534,15 +553,14 @@ pub enum ErrorPosition {
/// A quoted string token: `"..."`.
#[derive(Debug, Clone, PartialEq)]
-#[repr(transparent)]
-pub struct StrToken {
+pub struct StrData {
/// The string inside the quotes.
pub string: EcoString,
}
/// A raw block token: `` `...` ``.
#[derive(Debug, Clone, PartialEq)]
-pub struct RawToken {
+pub struct RawData {
/// The raw text in the block.
pub text: EcoString,
/// The programming language of the raw text.
@@ -555,7 +573,7 @@ pub struct RawToken {
/// A math formula token: `$2pi + x$` or `$[f'(x) = x^2]$`.
#[derive(Debug, Clone, PartialEq)]
-pub struct MathToken {
+pub struct MathData {
/// The formula between the dollars.
pub formula: EcoString,
/// Whether the formula is display-level, that is, it is surrounded by
@@ -565,8 +583,7 @@ pub struct MathToken {
/// A unicode escape sequence token: `\u{1F5FA}`.
#[derive(Debug, Clone, PartialEq)]
-#[repr(transparent)]
-pub struct UnicodeEscapeToken {
+pub struct UnicodeEscapeData {
/// The resulting unicode character.
pub character: char,
}
@@ -712,36 +729,3 @@ impl NodeKind {
}
}
}
-
-#[macro_export]
-macro_rules! node {
- ($(#[$attr:meta])* $name:ident) => {
- node!{$(#[$attr])* $name => $name}
- };
- ($(#[$attr:meta])* $variant:ident => $name:ident) => {
- #[derive(Debug, Clone, PartialEq)]
- #[repr(transparent)]
- $(#[$attr])*
- pub struct $name(RedNode);
-
- impl TypedNode for $name {
- fn cast_from(node: RedRef) -> Option<Self> {
- if node.kind() != &NodeKind::$variant {
- return None;
- }
-
- Some(Self(node.own()))
- }
- }
-
- impl $name {
- pub fn span(&self) -> Span {
- self.0.span()
- }
-
- pub fn underlying(&self) -> RedRef {
- self.0.as_ref()
- }
- }
- };
-}