Code Review: Life is Like a Box of Iterators

author: Martin Haug <mhaug@live.de> 2021-11-01 13:03:18 +0100
committer: Martin Haug <mhaug@live.de> 2021-11-05 13:44:50 +0100
commit: 49fb3cd4e2a5d6997ad4046d3514f154d8c866dd (patch)
tree: 4fb2a245a4cb84a6ef238ac1bc71786a0996913d /src/parse
parent: 7d34a548ccd14debe0668e23454e1ced70e485ec (diff)
5 files changed, 38 insertions, 37 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 22288d01..c6def4dc 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -12,12 +12,11 @@ pub use tokens::*;
 
 use std::rc::Rc;
 
-use crate::source::SourceFile;
 use crate::syntax::*;
 use crate::util::EcoString;
 
 /// Parse a source file.
-pub fn parse(source: &SourceFile) -> Rc<GreenNode> {
+pub fn parse(source: &str) -> Rc<GreenNode> {
     let mut p = Parser::new(source);
     markup(&mut p);
     p.finish()
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 240de43d..374e7c09 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -1,15 +1,14 @@
 use std::ops::Range;
 use std::rc::Rc;
 
-use super::{TokenMode, Tokens};
-use crate::source::{SourceFile, SourceId};
+use super::{is_newline, TokenMode, Tokens};
 use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind};
 use crate::util::EcoString;
 
 /// A convenient token-based parser.
 pub struct Parser<'s> {
     /// The parsed file.
-    source: &'s SourceFile,
+    src: &'s str,
     /// An iterator over the source tokens.
     tokens: Tokens<'s>,
     /// The stack of open groups.
@@ -61,11 +60,11 @@ pub enum Group {
 
 impl<'s> Parser<'s> {
     /// Create a new parser for the source string.
-    pub fn new(source: &'s SourceFile) -> Self {
-        let mut tokens = Tokens::new(source, TokenMode::Markup);
+    pub fn new(src: &'s str) -> Self {
+        let mut tokens = Tokens::new(src, TokenMode::Markup);
         let next = tokens.next();
         Self {
-            source,
+            src,
             tokens,
             groups: vec![],
             next: next.clone(),
@@ -78,11 +77,6 @@ impl<'s> Parser<'s> {
         }
     }
 
-    /// The id of the parsed source file.
-    pub fn id(&self) -> SourceId {
-        self.source.id()
-    }
-
     /// Start a nested node.
     ///
     /// Each start call has to be matched with a call to `end`,
@@ -366,12 +360,16 @@ impl<'s> Parser<'s> {
 
     /// Determine the column index for the given byte index.
     pub fn column(&self, index: usize) -> usize {
-        self.source.byte_to_column(index).unwrap()
+        self.src[.. index]
+            .chars()
+            .rev()
+            .take_while(|&c| !is_newline(c))
+            .count()
     }
 
     /// Slice out part of the source string.
     pub fn get(&self, range: Range<usize>) -> &'s str {
-        self.source.get(range).unwrap()
+        self.src.get(range).unwrap()
     }
 
     /// Continue parsing in a group.
diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs
index 8d4c04d4..3fab98a4 100644
--- a/src/parse/resolve.rs
+++ b/src/parse/resolve.rs
@@ -1,5 +1,5 @@
 use super::{is_newline, Scanner};
-use crate::syntax::RawToken;
+use crate::syntax::RawData;
 use crate::util::EcoString;
 
 /// Resolve all escape sequences in a string.
@@ -46,18 +46,18 @@ pub fn resolve_hex(sequence: &str) -> Option<char> {
 }
 
 /// Resolve the language tag and trims the raw text.
-pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawToken {
+pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawData {
     if backticks > 1 {
         let (tag, inner) = split_at_lang_tag(text);
         let (text, block) = trim_and_split_raw(column, inner);
-        RawToken {
+        RawData {
             lang: Some(tag.into()),
             text: text.into(),
             backticks,
             block,
         }
     } else {
-        RawToken {
+        RawData {
             lang: None,
             text: split_lines(text).join("\n").into(),
             backticks,
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
index 8e3e4278..edf28e17 100644
--- a/src/parse/scanner.rs
+++ b/src/parse/scanner.rs
@@ -106,6 +106,16 @@ impl<'s> Scanner<'s> {
         self.index
     }
 
+    /// The column index of a given index in the source string.
+    #[inline]
+    pub fn column(&self, index: usize) -> usize {
+        self.src[.. index]
+            .chars()
+            .rev()
+            .take_while(|&c| !is_newline(c))
+            .count()
+    }
+
     /// Jump to an index in the source string.
     #[inline]
     pub fn jump(&mut self, index: usize) {
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 1d2e32ec..ef2678d4 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -1,7 +1,6 @@
 use super::{is_newline, resolve_raw, Scanner};
 use crate::geom::{AngularUnit, LengthUnit};
 use crate::parse::resolve::{resolve_hex, resolve_string};
-use crate::source::SourceFile;
 use crate::syntax::*;
 use crate::util::EcoString;
 
@@ -9,7 +8,6 @@ use std::rc::Rc;
 
 /// An iterator over the tokens of a string of source code.
 pub struct Tokens<'s> {
-    source: &'s SourceFile,
     s: Scanner<'s>,
     mode: TokenMode,
 }
@@ -26,12 +24,8 @@ pub enum TokenMode {
 impl<'s> Tokens<'s> {
     /// Create a new token iterator with the given mode.
     #[inline]
-    pub fn new(source: &'s SourceFile, mode: TokenMode) -> Self {
-        Self {
-            s: Scanner::new(source.src()),
-            source,
-            mode,
-        }
+    pub fn new(source: &'s str, mode: TokenMode) -> Self {
+        Self { s: Scanner::new(source), mode }
     }
 
     /// Get the current token mode.
@@ -244,7 +238,7 @@ impl<'s> Tokens<'s> {
 
                     if self.s.eat_if('}') {
                         if let Some(character) = resolve_hex(&sequence) {
-                            NodeKind::UnicodeEscape(UnicodeEscapeToken {
+                            NodeKind::UnicodeEscape(UnicodeEscapeData {
                                 character,
                             })
                         } else {
@@ -314,7 +308,7 @@ impl<'s> Tokens<'s> {
     }
 
     fn raw(&mut self) -> NodeKind {
-        let column = self.source.byte_to_column(self.s.index() - 1).unwrap();
+        let column = self.s.column(self.s.index() - 1);
         let mut backticks = 1;
         while self.s.eat_if('`') && backticks < u8::MAX {
             backticks += 1;
@@ -322,7 +316,7 @@ impl<'s> Tokens<'s> {
 
         // Special case for empty inline block.
         if backticks == 2 {
-            return NodeKind::Raw(Rc::new(RawToken {
+            return NodeKind::Raw(Rc::new(RawData {
                 text: EcoString::new(),
                 lang: None,
                 backticks: 1,
@@ -397,7 +391,7 @@ impl<'s> Tokens<'s> {
             };
 
         if terminated {
-            NodeKind::Math(Rc::new(MathToken {
+            NodeKind::Math(Rc::new(MathData {
                 formula: self.s.get(start .. end).into(),
                 display,
             }))
@@ -492,7 +486,7 @@ impl<'s> Tokens<'s> {
             }
         }));
         if self.s.eat_if('"') {
-            NodeKind::Str(StrToken { string })
+            NodeKind::Str(StrData { string })
         } else {
             NodeKind::Error(ErrorPosition::End, "expected quote".into())
         }
@@ -567,7 +561,7 @@ mod tests {
     use TokenMode::{Code, Markup};
 
     fn UnicodeEscape(character: char) -> NodeKind {
-        NodeKind::UnicodeEscape(UnicodeEscapeToken { character })
+        NodeKind::UnicodeEscape(UnicodeEscapeData { character })
     }
 
     fn Error(pos: ErrorPosition, message: &str) -> NodeKind {
@@ -575,7 +569,7 @@ mod tests {
     }
 
     fn Raw(text: &str, lang: Option<&str>, backticks_left: u8, block: bool) -> NodeKind {
-        NodeKind::Raw(Rc::new(RawToken {
+        NodeKind::Raw(Rc::new(RawData {
             text: text.into(),
             lang: lang.map(Into::into),
             backticks: backticks_left,
@@ -586,7 +580,7 @@ mod tests {
     fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind {
         match err_msg {
             None => {
-                NodeKind::Math(Rc::new(MathToken { formula: formula.into(), display }))
+                NodeKind::Math(Rc::new(MathData { formula: formula.into(), display }))
             }
             Some(msg) => NodeKind::Error(
                 ErrorPosition::End,
@@ -597,7 +591,7 @@ mod tests {
 
     fn Str(string: &str, terminated: bool) -> NodeKind {
         if terminated {
-            NodeKind::Str(StrToken { string: string.into() })
+            NodeKind::Str(StrData { string: string.into() })
         } else {
             NodeKind::Error(ErrorPosition::End, "expected quote".into())
         }
@@ -687,7 +681,7 @@ mod tests {
         }};
         (@$mode:ident: $src:expr => $($token:expr),*) => {{
             let src = $src;
-            let found = Tokens::new(&SourceFile::detached(src.clone()), $mode).collect::<Vec<_>>();
+            let found = Tokens::new(&src, $mode).collect::<Vec<_>>();
             let expected = vec![$($token.clone()),*];
             check(&src, found, expected);
         }};
author	Martin Haug <mhaug@live.de>	2021-11-01 13:03:18 +0100
committer	Martin Haug <mhaug@live.de>	2021-11-05 13:44:50 +0100
commit	49fb3cd4e2a5d6997ad4046d3514f154d8c866dd (patch)
tree	4fb2a245a4cb84a6ef238ac1bc71786a0996913d /src/parse
parent	7d34a548ccd14debe0668e23454e1ced70e485ec (diff)