summaryrefslogtreecommitdiff
path: root/src/parse
diff options
context:
space:
mode:
authorMartin Haug <mhaug@live.de>2021-11-01 13:03:18 +0100
committerMartin Haug <mhaug@live.de>2021-11-05 13:44:50 +0100
commit49fb3cd4e2a5d6997ad4046d3514f154d8c866dd (patch)
tree4fb2a245a4cb84a6ef238ac1bc71786a0996913d /src/parse
parent7d34a548ccd14debe0668e23454e1ced70e485ec (diff)
Code Review: Life is Like a Box of Iterators
Diffstat (limited to 'src/parse')
-rw-r--r--src/parse/mod.rs3
-rw-r--r--src/parse/parser.rs24
-rw-r--r--src/parse/resolve.rs8
-rw-r--r--src/parse/scanner.rs10
-rw-r--r--src/parse/tokens.rs30
5 files changed, 38 insertions, 37 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 22288d01..c6def4dc 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -12,12 +12,11 @@ pub use tokens::*;
use std::rc::Rc;
-use crate::source::SourceFile;
use crate::syntax::*;
use crate::util::EcoString;
/// Parse a source file.
-pub fn parse(source: &SourceFile) -> Rc<GreenNode> {
+pub fn parse(source: &str) -> Rc<GreenNode> {
let mut p = Parser::new(source);
markup(&mut p);
p.finish()
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 240de43d..374e7c09 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -1,15 +1,14 @@
use std::ops::Range;
use std::rc::Rc;
-use super::{TokenMode, Tokens};
-use crate::source::{SourceFile, SourceId};
+use super::{is_newline, TokenMode, Tokens};
use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind};
use crate::util::EcoString;
/// A convenient token-based parser.
pub struct Parser<'s> {
/// The parsed file.
- source: &'s SourceFile,
+ src: &'s str,
/// An iterator over the source tokens.
tokens: Tokens<'s>,
/// The stack of open groups.
@@ -61,11 +60,11 @@ pub enum Group {
impl<'s> Parser<'s> {
/// Create a new parser for the source string.
- pub fn new(source: &'s SourceFile) -> Self {
- let mut tokens = Tokens::new(source, TokenMode::Markup);
+ pub fn new(src: &'s str) -> Self {
+ let mut tokens = Tokens::new(src, TokenMode::Markup);
let next = tokens.next();
Self {
- source,
+ src,
tokens,
groups: vec![],
next: next.clone(),
@@ -78,11 +77,6 @@ impl<'s> Parser<'s> {
}
}
- /// The id of the parsed source file.
- pub fn id(&self) -> SourceId {
- self.source.id()
- }
-
/// Start a nested node.
///
/// Each start call has to be matched with a call to `end`,
@@ -366,12 +360,16 @@ impl<'s> Parser<'s> {
/// Determine the column index for the given byte index.
pub fn column(&self, index: usize) -> usize {
- self.source.byte_to_column(index).unwrap()
+ self.src[.. index]
+ .chars()
+ .rev()
+ .take_while(|&c| !is_newline(c))
+ .count()
}
/// Slice out part of the source string.
pub fn get(&self, range: Range<usize>) -> &'s str {
- self.source.get(range).unwrap()
+ self.src.get(range).unwrap()
}
/// Continue parsing in a group.
diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs
index 8d4c04d4..3fab98a4 100644
--- a/src/parse/resolve.rs
+++ b/src/parse/resolve.rs
@@ -1,5 +1,5 @@
use super::{is_newline, Scanner};
-use crate::syntax::RawToken;
+use crate::syntax::RawData;
use crate::util::EcoString;
/// Resolve all escape sequences in a string.
@@ -46,18 +46,18 @@ pub fn resolve_hex(sequence: &str) -> Option<char> {
}
/// Resolve the language tag and trims the raw text.
-pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawToken {
+pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawData {
if backticks > 1 {
let (tag, inner) = split_at_lang_tag(text);
let (text, block) = trim_and_split_raw(column, inner);
- RawToken {
+ RawData {
lang: Some(tag.into()),
text: text.into(),
backticks,
block,
}
} else {
- RawToken {
+ RawData {
lang: None,
text: split_lines(text).join("\n").into(),
backticks,
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
index 8e3e4278..edf28e17 100644
--- a/src/parse/scanner.rs
+++ b/src/parse/scanner.rs
@@ -106,6 +106,16 @@ impl<'s> Scanner<'s> {
self.index
}
+ /// The column index of a given index in the source string.
+ #[inline]
+ pub fn column(&self, index: usize) -> usize {
+ self.src[.. index]
+ .chars()
+ .rev()
+ .take_while(|&c| !is_newline(c))
+ .count()
+ }
+
/// Jump to an index in the source string.
#[inline]
pub fn jump(&mut self, index: usize) {
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 1d2e32ec..ef2678d4 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -1,7 +1,6 @@
use super::{is_newline, resolve_raw, Scanner};
use crate::geom::{AngularUnit, LengthUnit};
use crate::parse::resolve::{resolve_hex, resolve_string};
-use crate::source::SourceFile;
use crate::syntax::*;
use crate::util::EcoString;
@@ -9,7 +8,6 @@ use std::rc::Rc;
/// An iterator over the tokens of a string of source code.
pub struct Tokens<'s> {
- source: &'s SourceFile,
s: Scanner<'s>,
mode: TokenMode,
}
@@ -26,12 +24,8 @@ pub enum TokenMode {
impl<'s> Tokens<'s> {
/// Create a new token iterator with the given mode.
#[inline]
- pub fn new(source: &'s SourceFile, mode: TokenMode) -> Self {
- Self {
- s: Scanner::new(source.src()),
- source,
- mode,
- }
+ pub fn new(source: &'s str, mode: TokenMode) -> Self {
+ Self { s: Scanner::new(source), mode }
}
/// Get the current token mode.
@@ -244,7 +238,7 @@ impl<'s> Tokens<'s> {
if self.s.eat_if('}') {
if let Some(character) = resolve_hex(&sequence) {
- NodeKind::UnicodeEscape(UnicodeEscapeToken {
+ NodeKind::UnicodeEscape(UnicodeEscapeData {
character,
})
} else {
@@ -314,7 +308,7 @@ impl<'s> Tokens<'s> {
}
fn raw(&mut self) -> NodeKind {
- let column = self.source.byte_to_column(self.s.index() - 1).unwrap();
+ let column = self.s.column(self.s.index() - 1);
let mut backticks = 1;
while self.s.eat_if('`') && backticks < u8::MAX {
backticks += 1;
@@ -322,7 +316,7 @@ impl<'s> Tokens<'s> {
// Special case for empty inline block.
if backticks == 2 {
- return NodeKind::Raw(Rc::new(RawToken {
+ return NodeKind::Raw(Rc::new(RawData {
text: EcoString::new(),
lang: None,
backticks: 1,
@@ -397,7 +391,7 @@ impl<'s> Tokens<'s> {
};
if terminated {
- NodeKind::Math(Rc::new(MathToken {
+ NodeKind::Math(Rc::new(MathData {
formula: self.s.get(start .. end).into(),
display,
}))
@@ -492,7 +486,7 @@ impl<'s> Tokens<'s> {
}
}));
if self.s.eat_if('"') {
- NodeKind::Str(StrToken { string })
+ NodeKind::Str(StrData { string })
} else {
NodeKind::Error(ErrorPosition::End, "expected quote".into())
}
@@ -567,7 +561,7 @@ mod tests {
use TokenMode::{Code, Markup};
fn UnicodeEscape(character: char) -> NodeKind {
- NodeKind::UnicodeEscape(UnicodeEscapeToken { character })
+ NodeKind::UnicodeEscape(UnicodeEscapeData { character })
}
fn Error(pos: ErrorPosition, message: &str) -> NodeKind {
@@ -575,7 +569,7 @@ mod tests {
}
fn Raw(text: &str, lang: Option<&str>, backticks_left: u8, block: bool) -> NodeKind {
- NodeKind::Raw(Rc::new(RawToken {
+ NodeKind::Raw(Rc::new(RawData {
text: text.into(),
lang: lang.map(Into::into),
backticks: backticks_left,
@@ -586,7 +580,7 @@ mod tests {
fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind {
match err_msg {
None => {
- NodeKind::Math(Rc::new(MathToken { formula: formula.into(), display }))
+ NodeKind::Math(Rc::new(MathData { formula: formula.into(), display }))
}
Some(msg) => NodeKind::Error(
ErrorPosition::End,
@@ -597,7 +591,7 @@ mod tests {
fn Str(string: &str, terminated: bool) -> NodeKind {
if terminated {
- NodeKind::Str(StrToken { string: string.into() })
+ NodeKind::Str(StrData { string: string.into() })
} else {
NodeKind::Error(ErrorPosition::End, "expected quote".into())
}
@@ -687,7 +681,7 @@ mod tests {
}};
(@$mode:ident: $src:expr => $($token:expr),*) => {{
let src = $src;
- let found = Tokens::new(&SourceFile::detached(src.clone()), $mode).collect::<Vec<_>>();
+ let found = Tokens::new(&src, $mode).collect::<Vec<_>>();
let expected = vec![$($token.clone()),*];
check(&src, found, expected);
}};