diff options
| author | Laurenz <laurmaedje@gmail.com> | 2021-11-02 12:13:45 +0100 |
|---|---|---|
| committer | Martin Haug <mhaug@live.de> | 2021-11-05 13:46:41 +0100 |
| commit | 65fac0e57c9852eb2131aa06c0bac43b70bfbfbc (patch) | |
| tree | 8ed11d7cefd4e64f523b975f077e4b10f67a7cb9 /src/parse | |
| parent | 42afb27cef5540535420fb6d8d9d2fcda7300a47 (diff) | |
Refactoring
Co-Authored-By: Martin <mhaug@live.de>
Diffstat (limited to 'src/parse')
| -rw-r--r-- | src/parse/mod.rs | 3 | ||||
| -rw-r--r-- | src/parse/parser.rs | 8 | ||||
| -rw-r--r-- | src/parse/scanner.rs | 49 | ||||
| -rw-r--r-- | src/parse/tokens.rs | 108 |
4 files changed, 95 insertions, 73 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs index c6def4dc..bfe93896 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -12,7 +12,8 @@ pub use tokens::*; use std::rc::Rc; -use crate::syntax::*; +use crate::syntax::ast::{Associativity, BinOp, UnOp}; +use crate::syntax::{ErrorPosition, GreenNode, NodeKind}; use crate::util::EcoString; /// Parse a source file. diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 8c68d630..5833c724 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,7 +1,7 @@ use std::ops::Range; use std::rc::Rc; -use super::{is_newline, TokenMode, Tokens}; +use super::{TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; @@ -375,11 +375,7 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.src[.. index] - .chars() - .rev() - .take_while(|&c| !is_newline(c)) - .count() + self.tokens.column(index) } /// Slice out part of the source string. diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index edf28e17..92a2333d 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -1,5 +1,7 @@ use std::slice::SliceIndex; +use unicode_xid::UnicodeXID; + /// A featureful char-based scanner. #[derive(Copy, Clone)] pub struct Scanner<'s> { @@ -106,16 +108,6 @@ impl<'s> Scanner<'s> { self.index } - /// The column index of a given index in the source string. - #[inline] - pub fn column(&self, index: usize) -> usize { - self.src[.. index] - .chars() - .rev() - .take_while(|&c| !is_newline(c)) - .count() - } - /// Jump to an index in the source string. #[inline] pub fn jump(&mut self, index: usize) { @@ -124,6 +116,12 @@ impl<'s> Scanner<'s> { self.index = index; } + /// The full source string. + #[inline] + pub fn src(&self) -> &'s str { + &self.src + } + /// Slice out part of the source string. #[inline] pub fn get<I>(&self, index: I) -> &'s str @@ -160,6 +158,16 @@ impl<'s> Scanner<'s> { // optimized away in some cases. self.src.get(start .. self.index).unwrap_or_default() } + + /// The column index of a given index in the source string. + #[inline] + pub fn column(&self, index: usize) -> usize { + self.src[.. index] + .chars() + .rev() + .take_while(|&c| !is_newline(c)) + .count() + } } /// Whether this character denotes a newline. @@ -173,3 +181,24 @@ pub fn is_newline(character: char) -> bool { '\u{0085}' | '\u{2028}' | '\u{2029}' ) } + +/// Whether a string is a valid identifier. +#[inline] +pub fn is_ident(string: &str) -> bool { + let mut chars = string.chars(); + chars + .next() + .map_or(false, |c| is_id_start(c) && chars.all(is_id_continue)) +} + +/// Whether a character can start an identifier. +#[inline] +pub fn is_id_start(c: char) -> bool { + c.is_xid_start() || c == '_' +} + +/// Whether a character can continue an identifier. +#[inline] +pub fn is_id_continue(c: char) -> bool { + c.is_xid_continue() || c == '_' || c == '-' +} diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index ef2678d4..aa28e1f5 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -1,11 +1,13 @@ -use super::{is_newline, resolve_raw, Scanner}; +use std::rc::Rc; + +use super::{ + is_id_continue, is_id_start, is_newline, resolve_hex, resolve_raw, resolve_string, + Scanner, +}; use crate::geom::{AngularUnit, LengthUnit}; -use crate::parse::resolve::{resolve_hex, resolve_string}; use crate::syntax::*; use crate::util::EcoString; -use std::rc::Rc; - /// An iterator over the tokens of a string of source code. pub struct Tokens<'s> { s: Scanner<'s>, @@ -55,6 +57,12 @@ impl<'s> Tokens<'s> { self.s.jump(index); } + /// The column of a given index in the source string. + #[inline] + pub fn column(&self, index: usize) -> usize { + self.s.column(index) + } + /// The underlying scanner. #[inline] pub fn scanner(&self) -> Scanner<'s> { @@ -237,10 +245,8 @@ impl<'s> Tokens<'s> { let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into(); if self.s.eat_if('}') { - if let Some(character) = resolve_hex(&sequence) { - NodeKind::UnicodeEscape(UnicodeEscapeData { - character, - }) + if let Some(c) = resolve_hex(&sequence) { + NodeKind::UnicodeEscape(c) } else { NodeKind::Error( ErrorPosition::Full, @@ -308,7 +314,8 @@ impl<'s> Tokens<'s> { } fn raw(&mut self) -> NodeKind { - let column = self.s.column(self.s.index() - 1); + let column = self.column(self.s.index() - 1); + let mut backticks = 1; while self.s.eat_if('`') && backticks < u8::MAX { backticks += 1; @@ -486,7 +493,7 @@ impl<'s> Tokens<'s> { } })); if self.s.eat_if('"') { - NodeKind::Str(StrData { string }) + NodeKind::Str(string) } else { NodeKind::Error(ErrorPosition::End, "expected quote".into()) } @@ -556,12 +563,13 @@ mod tests { use super::*; + use ErrorPosition::*; use NodeKind::*; use Option::None; use TokenMode::{Code, Markup}; - fn UnicodeEscape(character: char) -> NodeKind { - NodeKind::UnicodeEscape(UnicodeEscapeData { character }) + fn UnicodeEscape(c: char) -> NodeKind { + NodeKind::UnicodeEscape(c) } fn Error(pos: ErrorPosition, message: &str) -> NodeKind { @@ -577,24 +585,12 @@ mod tests { })) } - fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind { - match err_msg { - None => { - NodeKind::Math(Rc::new(MathData { formula: formula.into(), display })) - } - Some(msg) => NodeKind::Error( - ErrorPosition::End, - format!("expected closing {}", msg).into(), - ), - } + fn Math(formula: &str, display: bool) -> NodeKind { + NodeKind::Math(Rc::new(MathData { formula: formula.into(), display })) } - fn Str(string: &str, terminated: bool) -> NodeKind { - if terminated { - NodeKind::Str(StrData { string: string.into() }) - } else { - NodeKind::Error(ErrorPosition::End, "expected quote".into()) - } + fn Str(string: &str) -> NodeKind { + NodeKind::Str(string.into()) } fn Text(string: &str) -> NodeKind { @@ -659,7 +655,7 @@ mod tests { ('/', None, "//", LineComment), ('/', None, "/**/", BlockComment), ('/', Some(Markup), "*", Strong), - ('/', Some(Markup), "$ $", Math(" ", false, None)), + ('/', Some(Markup), "$ $", Math(" ", false)), ('/', Some(Markup), r"\\", Text("\\")), ('/', Some(Markup), "#let", Let), ('/', Some(Code), "(", LeftParen), @@ -781,16 +777,16 @@ mod tests { t!(Markup[" /"]: r#"\""# => Text(r"\"), Text("\"")); // Test basic unicode escapes. - t!(Markup: r"\u{}" => Error(ErrorPosition::Full, "invalid unicode escape sequence")); + t!(Markup: r"\u{}" => Error(Full, "invalid unicode escape sequence")); t!(Markup: r"\u{2603}" => UnicodeEscape('☃')); - t!(Markup: r"\u{P}" => Error(ErrorPosition::Full, "invalid unicode escape sequence")); + t!(Markup: r"\u{P}" => Error(Full, "invalid unicode escape sequence")); // Test unclosed unicode escapes. - t!(Markup[" /"]: r"\u{" => Error(ErrorPosition::End, "expected closing brace")); - t!(Markup[" /"]: r"\u{1" => Error(ErrorPosition::End, "expected closing brace")); - t!(Markup[" /"]: r"\u{26A4" => Error(ErrorPosition::End, "expected closing brace")); - t!(Markup[" /"]: r"\u{1Q3P" => Error(ErrorPosition::End, "expected closing brace")); - t!(Markup: r"\u{1🏕}" => Error(ErrorPosition::End, "expected closing brace"), Text("🏕"), RightBrace); + t!(Markup[" /"]: r"\u{" => Error(End, "expected closing brace")); + t!(Markup[" /"]: r"\u{1" => Error(End, "expected closing brace")); + t!(Markup[" /"]: r"\u{26A4" => Error(End, "expected closing brace")); + t!(Markup[" /"]: r"\u{1Q3P" => Error(End, "expected closing brace")); + t!(Markup: r"\u{1🏕}" => Error(End, "expected closing brace"), Text("🏕"), RightBrace); } #[test] @@ -882,11 +878,11 @@ mod tests { // Test basic raw block. t!(Markup: "``" => Raw("", None, 1, false)); t!(Markup: "`raw`" => Raw("raw", None, 1, false)); - t!(Markup[""]: "`]" => Error(ErrorPosition::End, "expected 1 backtick")); + t!(Markup[""]: "`]" => Error(End, "expected 1 backtick")); // Test special symbols in raw block. t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, false)); - t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, false), Error(ErrorPosition::End, "expected 1 backtick")); + t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, false), Error(End, "expected 1 backtick")); // Test separated closing backticks. t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, false)); @@ -894,28 +890,28 @@ mod tests { // Test more backticks. t!(Markup: "``nope``" => Raw("", None, 1, false), Text("nope"), Raw("", None, 1, false)); t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, false)); - t!(Markup[""]: "`````👩🚀````noend" => Error(ErrorPosition::End, "expected 5 backticks")); + t!(Markup[""]: "`````👩🚀````noend" => Error(End, "expected 5 backticks")); t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, false), Raw("", None, 1, false)); } #[test] fn test_tokenize_math_formulas() { // Test basic formula. - t!(Markup: "$$" => Math("", false, None)); - t!(Markup: "$x$" => Math("x", false, None)); - t!(Markup: r"$\\$" => Math(r"\\", false, None)); - t!(Markup: "$[x + y]$" => Math("x + y", true, None)); - t!(Markup: r"$[\\]$" => Math(r"\\", true, None)); + t!(Markup: "$$" => Math("", false)); + t!(Markup: "$x$" => Math("x", false)); + t!(Markup: r"$\\$" => Math(r"\\", false)); + t!(Markup: "$[x + y]$" => Math("x + y", true)); + t!(Markup: r"$[\\]$" => Math(r"\\", true)); // Test unterminated. - t!(Markup[""]: "$x" => Math("x", false, Some("dollar sign"))); - t!(Markup[""]: "$[x" => Math("x", true, Some("bracket and dollar sign"))); - t!(Markup[""]: "$[x]\n$" => Math("x]\n$", true, Some("bracket and dollar sign"))); + t!(Markup[""]: "$x" => Error(End, "expected closing dollar sign")); + t!(Markup[""]: "$[x" => Error(End, "expected closing bracket and dollar sign")); + t!(Markup[""]: "$[x]\n$" => Error(End, "expected closing bracket and dollar sign")); // Test escape sequences. - t!(Markup: r"$\$x$" => Math(r"\$x", false, None)); - t!(Markup: r"$[\\\]$]$" => Math(r"\\\]$", true, None)); - t!(Markup[""]: r"$[ ]\\$" => Math(r" ]\\$", true, Some("bracket and dollar sign"))); + t!(Markup: r"$\$x$" => Math(r"\$x", false)); + t!(Markup: r"$[\\\]$]$" => Math(r"\\\]$", true)); + t!(Markup[""]: r"$[ ]\\$" => Error(End, "expected closing bracket and dollar sign")); } #[test] @@ -1003,16 +999,16 @@ mod tests { #[test] fn test_tokenize_strings() { // Test basic strings. - t!(Code: "\"hi\"" => Str("hi", true)); - t!(Code: "\"hi\nthere\"" => Str("hi\nthere", true)); - t!(Code: "\"🌎\"" => Str("🌎", true)); + t!(Code: "\"hi\"" => Str("hi")); + t!(Code: "\"hi\nthere\"" => Str("hi\nthere")); + t!(Code: "\"🌎\"" => Str("🌎")); // Test unterminated. - t!(Code[""]: "\"hi" => Str("hi", false)); + t!(Code[""]: "\"hi" => Error(End, "expected quote")); // Test escaped quote. - t!(Code: r#""a\"bc""# => Str("a\"bc", true)); - t!(Code[""]: r#""\""# => Str("\"", false)); + t!(Code: r#""a\"bc""# => Str("a\"bc")); + t!(Code[""]: r#""\""# => Error(End, "expected quote")); } #[test] |
