diff options
Diffstat (limited to 'src/parse')
| -rw-r--r-- | src/parse/mod.rs | 3 | ||||
| -rw-r--r-- | src/parse/parser.rs | 24 | ||||
| -rw-r--r-- | src/parse/resolve.rs | 8 | ||||
| -rw-r--r-- | src/parse/scanner.rs | 10 | ||||
| -rw-r--r-- | src/parse/tokens.rs | 30 |
5 files changed, 38 insertions, 37 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 22288d01..c6def4dc 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -12,12 +12,11 @@ pub use tokens::*; use std::rc::Rc; -use crate::source::SourceFile; use crate::syntax::*; use crate::util::EcoString; /// Parse a source file. -pub fn parse(source: &SourceFile) -> Rc<GreenNode> { +pub fn parse(source: &str) -> Rc<GreenNode> { let mut p = Parser::new(source); markup(&mut p); p.finish() diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 240de43d..374e7c09 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,15 +1,14 @@ use std::ops::Range; use std::rc::Rc; -use super::{TokenMode, Tokens}; -use crate::source::{SourceFile, SourceId}; +use super::{is_newline, TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; /// A convenient token-based parser. pub struct Parser<'s> { /// The parsed file. - source: &'s SourceFile, + src: &'s str, /// An iterator over the source tokens. tokens: Tokens<'s>, /// The stack of open groups. @@ -61,11 +60,11 @@ pub enum Group { impl<'s> Parser<'s> { /// Create a new parser for the source string. - pub fn new(source: &'s SourceFile) -> Self { - let mut tokens = Tokens::new(source, TokenMode::Markup); + pub fn new(src: &'s str) -> Self { + let mut tokens = Tokens::new(src, TokenMode::Markup); let next = tokens.next(); Self { - source, + src, tokens, groups: vec![], next: next.clone(), @@ -78,11 +77,6 @@ impl<'s> Parser<'s> { } } - /// The id of the parsed source file. - pub fn id(&self) -> SourceId { - self.source.id() - } - /// Start a nested node. /// /// Each start call has to be matched with a call to `end`, @@ -366,12 +360,16 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.source.byte_to_column(index).unwrap() + self.src[.. index] + .chars() + .rev() + .take_while(|&c| !is_newline(c)) + .count() } /// Slice out part of the source string. pub fn get(&self, range: Range<usize>) -> &'s str { - self.source.get(range).unwrap() + self.src.get(range).unwrap() } /// Continue parsing in a group. diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index 8d4c04d4..3fab98a4 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -1,5 +1,5 @@ use super::{is_newline, Scanner}; -use crate::syntax::RawToken; +use crate::syntax::RawData; use crate::util::EcoString; /// Resolve all escape sequences in a string. @@ -46,18 +46,18 @@ pub fn resolve_hex(sequence: &str) -> Option<char> { } /// Resolve the language tag and trims the raw text. -pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawToken { +pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawData { if backticks > 1 { let (tag, inner) = split_at_lang_tag(text); let (text, block) = trim_and_split_raw(column, inner); - RawToken { + RawData { lang: Some(tag.into()), text: text.into(), backticks, block, } } else { - RawToken { + RawData { lang: None, text: split_lines(text).join("\n").into(), backticks, diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index 8e3e4278..edf28e17 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -106,6 +106,16 @@ impl<'s> Scanner<'s> { self.index } + /// The column index of a given index in the source string. + #[inline] + pub fn column(&self, index: usize) -> usize { + self.src[.. index] + .chars() + .rev() + .take_while(|&c| !is_newline(c)) + .count() + } + /// Jump to an index in the source string. #[inline] pub fn jump(&mut self, index: usize) { diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 1d2e32ec..ef2678d4 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -1,7 +1,6 @@ use super::{is_newline, resolve_raw, Scanner}; use crate::geom::{AngularUnit, LengthUnit}; use crate::parse::resolve::{resolve_hex, resolve_string}; -use crate::source::SourceFile; use crate::syntax::*; use crate::util::EcoString; @@ -9,7 +8,6 @@ use std::rc::Rc; /// An iterator over the tokens of a string of source code. pub struct Tokens<'s> { - source: &'s SourceFile, s: Scanner<'s>, mode: TokenMode, } @@ -26,12 +24,8 @@ pub enum TokenMode { impl<'s> Tokens<'s> { /// Create a new token iterator with the given mode. #[inline] - pub fn new(source: &'s SourceFile, mode: TokenMode) -> Self { - Self { - s: Scanner::new(source.src()), - source, - mode, - } + pub fn new(source: &'s str, mode: TokenMode) -> Self { + Self { s: Scanner::new(source), mode } } /// Get the current token mode. @@ -244,7 +238,7 @@ impl<'s> Tokens<'s> { if self.s.eat_if('}') { if let Some(character) = resolve_hex(&sequence) { - NodeKind::UnicodeEscape(UnicodeEscapeToken { + NodeKind::UnicodeEscape(UnicodeEscapeData { character, }) } else { @@ -314,7 +308,7 @@ impl<'s> Tokens<'s> { } fn raw(&mut self) -> NodeKind { - let column = self.source.byte_to_column(self.s.index() - 1).unwrap(); + let column = self.s.column(self.s.index() - 1); let mut backticks = 1; while self.s.eat_if('`') && backticks < u8::MAX { backticks += 1; @@ -322,7 +316,7 @@ impl<'s> Tokens<'s> { // Special case for empty inline block. if backticks == 2 { - return NodeKind::Raw(Rc::new(RawToken { + return NodeKind::Raw(Rc::new(RawData { text: EcoString::new(), lang: None, backticks: 1, @@ -397,7 +391,7 @@ impl<'s> Tokens<'s> { }; if terminated { - NodeKind::Math(Rc::new(MathToken { + NodeKind::Math(Rc::new(MathData { formula: self.s.get(start .. end).into(), display, })) @@ -492,7 +486,7 @@ impl<'s> Tokens<'s> { } })); if self.s.eat_if('"') { - NodeKind::Str(StrToken { string }) + NodeKind::Str(StrData { string }) } else { NodeKind::Error(ErrorPosition::End, "expected quote".into()) } @@ -567,7 +561,7 @@ mod tests { use TokenMode::{Code, Markup}; fn UnicodeEscape(character: char) -> NodeKind { - NodeKind::UnicodeEscape(UnicodeEscapeToken { character }) + NodeKind::UnicodeEscape(UnicodeEscapeData { character }) } fn Error(pos: ErrorPosition, message: &str) -> NodeKind { @@ -575,7 +569,7 @@ mod tests { } fn Raw(text: &str, lang: Option<&str>, backticks_left: u8, block: bool) -> NodeKind { - NodeKind::Raw(Rc::new(RawToken { + NodeKind::Raw(Rc::new(RawData { text: text.into(), lang: lang.map(Into::into), backticks: backticks_left, @@ -586,7 +580,7 @@ mod tests { fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind { match err_msg { None => { - NodeKind::Math(Rc::new(MathToken { formula: formula.into(), display })) + NodeKind::Math(Rc::new(MathData { formula: formula.into(), display })) } Some(msg) => NodeKind::Error( ErrorPosition::End, @@ -597,7 +591,7 @@ mod tests { fn Str(string: &str, terminated: bool) -> NodeKind { if terminated { - NodeKind::Str(StrToken { string: string.into() }) + NodeKind::Str(StrData { string: string.into() }) } else { NodeKind::Error(ErrorPosition::End, "expected quote".into()) } @@ -687,7 +681,7 @@ mod tests { }}; (@$mode:ident: $src:expr => $($token:expr),*) => {{ let src = $src; - let found = Tokens::new(&SourceFile::detached(src.clone()), $mode).collect::<Vec<_>>(); + let found = Tokens::new(&src, $mode).collect::<Vec<_>>(); let expected = vec![$($token.clone()),*]; check(&src, found, expected); }}; |
