Refactoring

Co-Authored-By: Martin <mhaug@live.de>
author: Laurenz <laurmaedje@gmail.com> 2021-11-02 12:13:45 +0100
committer: Martin Haug <mhaug@live.de> 2021-11-05 13:46:41 +0100
commit: 65fac0e57c9852eb2131aa06c0bac43b70bfbfbc (patch)
tree: 8ed11d7cefd4e64f523b975f077e4b10f67a7cb9 /src/parse
parent: 42afb27cef5540535420fb6d8d9d2fcda7300a47 (diff)
4 files changed, 95 insertions, 73 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index c6def4dc..bfe93896 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -12,7 +12,8 @@ pub use tokens::*;
 
 use std::rc::Rc;
 
-use crate::syntax::*;
+use crate::syntax::ast::{Associativity, BinOp, UnOp};
+use crate::syntax::{ErrorPosition, GreenNode, NodeKind};
 use crate::util::EcoString;
 
 /// Parse a source file.
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 8c68d630..5833c724 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -1,7 +1,7 @@
 use std::ops::Range;
 use std::rc::Rc;
 
-use super::{is_newline, TokenMode, Tokens};
+use super::{TokenMode, Tokens};
 use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind};
 use crate::util::EcoString;
 
@@ -375,11 +375,7 @@ impl<'s> Parser<'s> {
 
     /// Determine the column index for the given byte index.
     pub fn column(&self, index: usize) -> usize {
-        self.src[.. index]
-            .chars()
-            .rev()
-            .take_while(|&c| !is_newline(c))
-            .count()
+        self.tokens.column(index)
     }
 
     /// Slice out part of the source string.
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
index edf28e17..92a2333d 100644
--- a/src/parse/scanner.rs
+++ b/src/parse/scanner.rs
@@ -1,5 +1,7 @@
 use std::slice::SliceIndex;
 
+use unicode_xid::UnicodeXID;
+
 /// A featureful char-based scanner.
 #[derive(Copy, Clone)]
 pub struct Scanner<'s> {
@@ -106,16 +108,6 @@ impl<'s> Scanner<'s> {
         self.index
     }
 
-    /// The column index of a given index in the source string.
-    #[inline]
-    pub fn column(&self, index: usize) -> usize {
-        self.src[.. index]
-            .chars()
-            .rev()
-            .take_while(|&c| !is_newline(c))
-            .count()
-    }
-
     /// Jump to an index in the source string.
     #[inline]
     pub fn jump(&mut self, index: usize) {
@@ -124,6 +116,12 @@ impl<'s> Scanner<'s> {
         self.index = index;
     }
 
+    /// The full source string.
+    #[inline]
+    pub fn src(&self) -> &'s str {
+        &self.src
+    }
+
     /// Slice out part of the source string.
     #[inline]
     pub fn get<I>(&self, index: I) -> &'s str
@@ -160,6 +158,16 @@ impl<'s> Scanner<'s> {
         // optimized away in some cases.
         self.src.get(start .. self.index).unwrap_or_default()
     }
+
+    /// The column index of a given index in the source string.
+    #[inline]
+    pub fn column(&self, index: usize) -> usize {
+        self.src[.. index]
+            .chars()
+            .rev()
+            .take_while(|&c| !is_newline(c))
+            .count()
+    }
 }
 
 /// Whether this character denotes a newline.
@@ -173,3 +181,24 @@ pub fn is_newline(character: char) -> bool {
         '\u{0085}' | '\u{2028}' | '\u{2029}'
     )
 }
+
+/// Whether a string is a valid identifier.
+#[inline]
+pub fn is_ident(string: &str) -> bool {
+    let mut chars = string.chars();
+    chars
+        .next()
+        .map_or(false, |c| is_id_start(c) && chars.all(is_id_continue))
+}
+
+/// Whether a character can start an identifier.
+#[inline]
+pub fn is_id_start(c: char) -> bool {
+    c.is_xid_start() || c == '_'
+}
+
+/// Whether a character can continue an identifier.
+#[inline]
+pub fn is_id_continue(c: char) -> bool {
+    c.is_xid_continue() || c == '_' || c == '-'
+}
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index ef2678d4..aa28e1f5 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -1,11 +1,13 @@
-use super::{is_newline, resolve_raw, Scanner};
+use std::rc::Rc;
+
+use super::{
+    is_id_continue, is_id_start, is_newline, resolve_hex, resolve_raw, resolve_string,
+    Scanner,
+};
 use crate::geom::{AngularUnit, LengthUnit};
-use crate::parse::resolve::{resolve_hex, resolve_string};
 use crate::syntax::*;
 use crate::util::EcoString;
 
-use std::rc::Rc;
-
 /// An iterator over the tokens of a string of source code.
 pub struct Tokens<'s> {
     s: Scanner<'s>,
@@ -55,6 +57,12 @@ impl<'s> Tokens<'s> {
         self.s.jump(index);
     }
 
+    /// The column of a given index in the source string.
+    #[inline]
+    pub fn column(&self, index: usize) -> usize {
+        self.s.column(index)
+    }
+
     /// The underlying scanner.
     #[inline]
     pub fn scanner(&self) -> Scanner<'s> {
@@ -237,10 +245,8 @@ impl<'s> Tokens<'s> {
                     let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into();
 
                     if self.s.eat_if('}') {
-                        if let Some(character) = resolve_hex(&sequence) {
-                            NodeKind::UnicodeEscape(UnicodeEscapeData {
-                                character,
-                            })
+                        if let Some(c) = resolve_hex(&sequence) {
+                            NodeKind::UnicodeEscape(c)
                         } else {
                             NodeKind::Error(
                                 ErrorPosition::Full,
@@ -308,7 +314,8 @@ impl<'s> Tokens<'s> {
     }
 
     fn raw(&mut self) -> NodeKind {
-        let column = self.s.column(self.s.index() - 1);
+        let column = self.column(self.s.index() - 1);
+
         let mut backticks = 1;
         while self.s.eat_if('`') && backticks < u8::MAX {
             backticks += 1;
@@ -486,7 +493,7 @@ impl<'s> Tokens<'s> {
             }
         }));
         if self.s.eat_if('"') {
-            NodeKind::Str(StrData { string })
+            NodeKind::Str(string)
         } else {
             NodeKind::Error(ErrorPosition::End, "expected quote".into())
         }
@@ -556,12 +563,13 @@ mod tests {
 
     use super::*;
 
+    use ErrorPosition::*;
     use NodeKind::*;
     use Option::None;
     use TokenMode::{Code, Markup};
 
-    fn UnicodeEscape(character: char) -> NodeKind {
-        NodeKind::UnicodeEscape(UnicodeEscapeData { character })
+    fn UnicodeEscape(c: char) -> NodeKind {
+        NodeKind::UnicodeEscape(c)
     }
 
     fn Error(pos: ErrorPosition, message: &str) -> NodeKind {
@@ -577,24 +585,12 @@ mod tests {
         }))
     }
 
-    fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind {
-        match err_msg {
-            None => {
-                NodeKind::Math(Rc::new(MathData { formula: formula.into(), display }))
-            }
-            Some(msg) => NodeKind::Error(
-                ErrorPosition::End,
-                format!("expected closing {}", msg).into(),
-            ),
-        }
+    fn Math(formula: &str, display: bool) -> NodeKind {
+        NodeKind::Math(Rc::new(MathData { formula: formula.into(), display }))
     }
 
-    fn Str(string: &str, terminated: bool) -> NodeKind {
-        if terminated {
-            NodeKind::Str(StrData { string: string.into() })
-        } else {
-            NodeKind::Error(ErrorPosition::End, "expected quote".into())
-        }
+    fn Str(string: &str) -> NodeKind {
+        NodeKind::Str(string.into())
     }
 
     fn Text(string: &str) -> NodeKind {
@@ -659,7 +655,7 @@ mod tests {
                 ('/', None, "//", LineComment),
                 ('/', None, "/**/", BlockComment),
                 ('/', Some(Markup), "*", Strong),
-                ('/', Some(Markup), "$ $", Math(" ", false, None)),
+                ('/', Some(Markup), "$ $", Math(" ", false)),
                 ('/', Some(Markup), r"\\", Text("\\")),
                 ('/', Some(Markup), "#let", Let),
                 ('/', Some(Code), "(", LeftParen),
@@ -781,16 +777,16 @@ mod tests {
         t!(Markup[" /"]: r#"\""# => Text(r"\"), Text("\""));
 
         // Test basic unicode escapes.
-        t!(Markup: r"\u{}"     => Error(ErrorPosition::Full, "invalid unicode escape sequence"));
+        t!(Markup: r"\u{}"     => Error(Full, "invalid unicode escape sequence"));
         t!(Markup: r"\u{2603}" => UnicodeEscape('☃'));
-        t!(Markup: r"\u{P}"    => Error(ErrorPosition::Full, "invalid unicode escape sequence"));
+        t!(Markup: r"\u{P}"    => Error(Full, "invalid unicode escape sequence"));
 
         // Test unclosed unicode escapes.
-        t!(Markup[" /"]: r"\u{"     => Error(ErrorPosition::End, "expected closing brace"));
-        t!(Markup[" /"]: r"\u{1"    => Error(ErrorPosition::End, "expected closing brace"));
-        t!(Markup[" /"]: r"\u{26A4" => Error(ErrorPosition::End, "expected closing brace"));
-        t!(Markup[" /"]: r"\u{1Q3P" => Error(ErrorPosition::End, "expected closing brace"));
-        t!(Markup: r"\u{1🏕}"       => Error(ErrorPosition::End, "expected closing brace"), Text("🏕"), RightBrace);
+        t!(Markup[" /"]: r"\u{"     => Error(End, "expected closing brace"));
+        t!(Markup[" /"]: r"\u{1"    => Error(End, "expected closing brace"));
+        t!(Markup[" /"]: r"\u{26A4" => Error(End, "expected closing brace"));
+        t!(Markup[" /"]: r"\u{1Q3P" => Error(End, "expected closing brace"));
+        t!(Markup: r"\u{1🏕}"       => Error(End, "expected closing brace"), Text("🏕"), RightBrace);
     }
 
     #[test]
@@ -882,11 +878,11 @@ mod tests {
         // Test basic raw block.
         t!(Markup: "``"     => Raw("", None, 1, false));
         t!(Markup: "`raw`"  => Raw("raw", None, 1, false));
-        t!(Markup[""]: "`]" => Error(ErrorPosition::End, "expected 1 backtick"));
+        t!(Markup[""]: "`]" => Error(End, "expected 1 backtick"));
 
         // Test special symbols in raw block.
         t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, false));
-        t!(Markup[""]: r"`\`` "   => Raw(r"\", None, 1, false), Error(ErrorPosition::End, "expected 1 backtick"));
+        t!(Markup[""]: r"`\`` "   => Raw(r"\", None, 1, false), Error(End, "expected 1 backtick"));
 
         // Test separated closing backticks.
         t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, false));
@@ -894,28 +890,28 @@ mod tests {
         // Test more backticks.
         t!(Markup: "``nope``"             => Raw("", None, 1, false), Text("nope"), Raw("", None, 1, false));
         t!(Markup: "````🚀````"           => Raw("", Some("🚀"), 4, false));
-        t!(Markup[""]: "`````👩‍🚀````noend" => Error(ErrorPosition::End, "expected 5 backticks"));
+        t!(Markup[""]: "`````👩‍🚀````noend" => Error(End, "expected 5 backticks"));
         t!(Markup[""]: "````raw``````"    => Raw("", Some("raw"), 4, false), Raw("", None, 1, false));
     }
 
     #[test]
     fn test_tokenize_math_formulas() {
         // Test basic formula.
-        t!(Markup: "$$"        => Math("", false, None));
-        t!(Markup: "$x$"       => Math("x", false, None));
-        t!(Markup: r"$\\$"     => Math(r"\\", false, None));
-        t!(Markup: "$[x + y]$" => Math("x + y", true, None));
-        t!(Markup: r"$[\\]$"   => Math(r"\\", true, None));
+        t!(Markup: "$$"        => Math("", false));
+        t!(Markup: "$x$"       => Math("x", false));
+        t!(Markup: r"$\\$"     => Math(r"\\", false));
+        t!(Markup: "$[x + y]$" => Math("x + y", true));
+        t!(Markup: r"$[\\]$"   => Math(r"\\", true));
 
         // Test unterminated.
-        t!(Markup[""]: "$x"      => Math("x", false, Some("dollar sign")));
-        t!(Markup[""]: "$[x"     => Math("x", true, Some("bracket and dollar sign")));
-        t!(Markup[""]: "$[x]\n$" => Math("x]\n$", true, Some("bracket and dollar sign")));
+        t!(Markup[""]: "$x"      => Error(End, "expected closing dollar sign"));
+        t!(Markup[""]: "$[x"     => Error(End, "expected closing bracket and dollar sign"));
+        t!(Markup[""]: "$[x]\n$" => Error(End, "expected closing bracket and dollar sign"));
 
         // Test escape sequences.
-        t!(Markup: r"$\$x$"       => Math(r"\$x", false, None));
-        t!(Markup: r"$[\\\]$]$"   => Math(r"\\\]$", true, None));
-        t!(Markup[""]: r"$[ ]\\$" => Math(r" ]\\$", true, Some("bracket and dollar sign")));
+        t!(Markup: r"$\$x$"       => Math(r"\$x", false));
+        t!(Markup: r"$[\\\]$]$"   => Math(r"\\\]$", true));
+        t!(Markup[""]: r"$[ ]\\$" => Error(End, "expected closing bracket and dollar sign"));
     }
 
     #[test]
@@ -1003,16 +999,16 @@ mod tests {
     #[test]
     fn test_tokenize_strings() {
         // Test basic strings.
-        t!(Code: "\"hi\""        => Str("hi", true));
-        t!(Code: "\"hi\nthere\"" => Str("hi\nthere", true));
-        t!(Code: "\"🌎\""        => Str("🌎", true));
+        t!(Code: "\"hi\""        => Str("hi"));
+        t!(Code: "\"hi\nthere\"" => Str("hi\nthere"));
+        t!(Code: "\"🌎\""        => Str("🌎"));
 
         // Test unterminated.
-        t!(Code[""]: "\"hi"      => Str("hi", false));
+        t!(Code[""]: "\"hi" => Error(End, "expected quote"));
 
         // Test escaped quote.
-        t!(Code: r#""a\"bc""# => Str("a\"bc", true));
-        t!(Code[""]: r#""\""# => Str("\"", false));
+        t!(Code: r#""a\"bc""# => Str("a\"bc"));
+        t!(Code[""]: r#""\""# => Error(End, "expected quote"));
     }
 
     #[test]
author	Laurenz <laurmaedje@gmail.com>	2021-11-02 12:13:45 +0100
committer	Martin Haug <mhaug@live.de>	2021-11-05 13:46:41 +0100
commit	65fac0e57c9852eb2131aa06c0bac43b70bfbfbc (patch)
tree	8ed11d7cefd4e64f523b975f077e4b10f67a7cb9 /src/parse
parent	42afb27cef5540535420fb6d8d9d2fcda7300a47 (diff)