summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMartin Haug <mhaug@live.de>2021-10-31 18:52:48 +0100
committerMartin Haug <mhaug@live.de>2021-11-05 13:44:49 +0100
commit2e7d359e59a45849f53eea6e022ca83295f5a6e7 (patch)
tree6fc2b3e3f1c12f1326061cd09a374fc6dca3026a /src
parentc569e14c07902b23b7b3e29df4076cea1f4496cf (diff)
Unicode escape error moved to tokenizer
Diffstat (limited to 'src')
-rw-r--r--src/parse/mod.rs36
-rw-r--r--src/parse/parser.rs24
-rw-r--r--src/parse/tokens.rs92
-rw-r--r--src/syntax/markup.rs10
-rw-r--r--src/syntax/mod.rs2
-rw-r--r--src/syntax/token.rs5
6 files changed, 78 insertions, 91 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index ce992834..8775e8a1 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -54,7 +54,10 @@ where
while !p.eof() && f(p) {
markup_node(p, &mut at_start);
if let Some(node) = p.last_child() {
- at_start &= matches!(node.kind(), &NodeKind::Space(_) | &NodeKind::Parbreak | &NodeKind::LineComment | &NodeKind::BlockComment);
+ at_start &= matches!(node.kind(),
+ &NodeKind::Space(_) | &NodeKind::Parbreak |
+ &NodeKind::LineComment | &NodeKind::BlockComment
+ );
}
}
@@ -88,22 +91,8 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
| NodeKind::Emph
| NodeKind::Strong
| NodeKind::Linebreak
- | NodeKind::Raw(_) => p.eat(),
-
- NodeKind::UnicodeEscape(u) => {
- if u.character.is_none() {
- let src = p.peek_src();
- p.convert(NodeKind::Error(
- ErrorPosition::Full,
- "invalid unicode escape sequence".into(),
- ));
- p.start();
- p.end(NodeKind::Text(src.into()));
- return;
- }
-
- p.eat();
- }
+ | NodeKind::Raw(_)
+ | NodeKind::UnicodeEscape(_) => p.eat(),
NodeKind::Eq if *at_start => heading(p),
NodeKind::ListBullet if *at_start => list_node(p),
@@ -503,9 +492,8 @@ fn item(p: &mut Parser) -> NodeKind {
/// Convert a collection into an array, producing errors for anything other than
/// expressions.
fn array(p: &mut Parser, items: usize) {
- p.start_with(items);
p.filter_children(
- 0,
+ p.child_count() - items,
|x| match x.kind() {
NodeKind::Named | NodeKind::ParameterSink => false,
_ => true,
@@ -522,15 +510,14 @@ fn array(p: &mut Parser, items: usize) {
},
);
- p.end(NodeKind::Array)
+ p.convert_with(items, NodeKind::Array);
}
/// Convert a collection into a dictionary, producing errors for anything other
/// than named pairs.
fn dict(p: &mut Parser, items: usize) {
- p.start_with(items);
p.filter_children(
- 0,
+ p.child_count() - items,
|x| {
x.kind() == &NodeKind::Named
|| x.kind().is_parenthesis()
@@ -547,7 +534,7 @@ fn dict(p: &mut Parser, items: usize) {
),
},
);
- p.end(NodeKind::Dict);
+ p.convert_with(items, NodeKind::Dict);
}
/// Convert a collection into a list of parameters, producing errors for
@@ -684,8 +671,7 @@ fn let_expr(p: &mut Parser) {
return;
}
- p.start_with(p.child_count() - offset);
- p.end(NodeKind::Closure)
+ p.convert_with(p.child_count() - offset, NodeKind::Closure);
}
}
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index e6fcc1ae..240de43d 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -186,9 +186,27 @@ impl<'s> Parser<'s> {
}
pub fn convert(&mut self, kind: NodeKind) {
- self.start();
- self.eat();
- self.end(kind);
+ let len = self.tokens.index() - self.next_start;
+
+ self.children.push(
+ GreenNode::with_child(
+ kind,
+ len,
+ GreenData::new(self.next.clone().unwrap(), len),
+ )
+ .into(),
+ );
+ self.fast_forward();
+ self.success = true;
+ }
+
+ pub fn convert_with(&mut self, preserve: usize, kind: NodeKind) {
+ let preserved: Vec<_> =
+ self.children.drain(self.children.len() - preserve ..).collect();
+ let len = preserved.iter().map(|c| c.len()).sum();
+ self.children
+ .push(GreenNode::with_children(kind, len, preserved).into());
+ self.success = true;
}
/// End the current node and undo its existence, inling all accumulated
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 7c500ce7..1d2e32ec 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -200,7 +200,7 @@ impl<'s> Tokens<'s> {
TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace())
});
- NodeKind::Text(resolve_string(self.s.eaten_from(start)))
+ NodeKind::Text(self.s.eaten_from(start).into())
}
fn whitespace(&mut self) -> NodeKind {
@@ -243,10 +243,16 @@ impl<'s> Tokens<'s> {
let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into();
if self.s.eat_if('}') {
- NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken {
- character: resolve_hex(&sequence),
- sequence,
- }))
+ if let Some(character) = resolve_hex(&sequence) {
+ NodeKind::UnicodeEscape(UnicodeEscapeToken {
+ character,
+ })
+ } else {
+ NodeKind::Error(
+ ErrorPosition::Full,
+ "invalid unicode escape sequence".into(),
+ )
+ }
} else {
NodeKind::Error(
ErrorPosition::End,
@@ -560,35 +566,21 @@ mod tests {
use Option::None;
use TokenMode::{Code, Markup};
- fn UnicodeEscape(sequence: &str, terminated: bool) -> NodeKind {
- if terminated {
- NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken {
- character: resolve_hex(sequence),
- sequence: sequence.into(),
- }))
- } else {
- NodeKind::Error(ErrorPosition::End, "expected closing brace".into())
- }
+ fn UnicodeEscape(character: char) -> NodeKind {
+ NodeKind::UnicodeEscape(UnicodeEscapeToken { character })
}
- fn Raw(
- text: &str,
- lang: Option<&str>,
- backticks_left: u8,
- err_msg: Option<&str>,
- block: bool,
- ) -> NodeKind {
- match err_msg {
- None => NodeKind::Raw(Rc::new(RawToken {
- text: text.into(),
- lang: lang.map(Into::into),
- backticks: backticks_left,
- block,
- })),
- Some(msg) => {
- NodeKind::Error(ErrorPosition::End, format!("expected {}", msg).into())
- }
- }
+ fn Error(pos: ErrorPosition, message: &str) -> NodeKind {
+ NodeKind::Error(pos, message.into())
+ }
+
+ fn Raw(text: &str, lang: Option<&str>, backticks_left: u8, block: bool) -> NodeKind {
+ NodeKind::Raw(Rc::new(RawToken {
+ text: text.into(),
+ lang: lang.map(Into::into),
+ backticks: backticks_left,
+ block,
+ }))
}
fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind {
@@ -795,16 +787,16 @@ mod tests {
t!(Markup[" /"]: r#"\""# => Text(r"\"), Text("\""));
// Test basic unicode escapes.
- t!(Markup: r"\u{}" => UnicodeEscape("", true));
- t!(Markup: r"\u{2603}" => UnicodeEscape("2603", true));
- t!(Markup: r"\u{P}" => UnicodeEscape("P", true));
+ t!(Markup: r"\u{}" => Error(ErrorPosition::Full, "invalid unicode escape sequence"));
+ t!(Markup: r"\u{2603}" => UnicodeEscape('☃'));
+ t!(Markup: r"\u{P}" => Error(ErrorPosition::Full, "invalid unicode escape sequence"));
// Test unclosed unicode escapes.
- t!(Markup[" /"]: r"\u{" => UnicodeEscape("", false));
- t!(Markup[" /"]: r"\u{1" => UnicodeEscape("1", false));
- t!(Markup[" /"]: r"\u{26A4" => UnicodeEscape("26A4", false));
- t!(Markup[" /"]: r"\u{1Q3P" => UnicodeEscape("1Q3P", false));
- t!(Markup: r"\u{1🏕}" => UnicodeEscape("1", false), Text("🏕"), RightBrace);
+ t!(Markup[" /"]: r"\u{" => Error(ErrorPosition::End, "expected closing brace"));
+ t!(Markup[" /"]: r"\u{1" => Error(ErrorPosition::End, "expected closing brace"));
+ t!(Markup[" /"]: r"\u{26A4" => Error(ErrorPosition::End, "expected closing brace"));
+ t!(Markup[" /"]: r"\u{1Q3P" => Error(ErrorPosition::End, "expected closing brace"));
+ t!(Markup: r"\u{1🏕}" => Error(ErrorPosition::End, "expected closing brace"), Text("🏕"), RightBrace);
}
#[test]
@@ -894,22 +886,22 @@ mod tests {
#[test]
fn test_tokenize_raw_blocks() {
// Test basic raw block.
- t!(Markup: "``" => Raw("", None, 1, None, false));
- t!(Markup: "`raw`" => Raw("raw", None, 1, None, false));
- t!(Markup[""]: "`]" => Raw("]", None, 1, Some("1 backtick"), false));
+ t!(Markup: "``" => Raw("", None, 1, false));
+ t!(Markup: "`raw`" => Raw("raw", None, 1, false));
+ t!(Markup[""]: "`]" => Error(ErrorPosition::End, "expected 1 backtick"));
// Test special symbols in raw block.
- t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, None, false));
- t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, None, false), Raw(" ", None, 1, Some("1 backtick"), false));
+ t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, false));
+ t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, false), Error(ErrorPosition::End, "expected 1 backtick"));
// Test separated closing backticks.
- t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, None, false));
+ t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, false));
// Test more backticks.
- t!(Markup: "``nope``" => Raw("", None, 1, None, false), Text("nope"), Raw("", None, 1, None, false));
- t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, None, false));
- t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, Some("5 backticks"), false));
- t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, None, false), Raw("", None, 1, None, false));
+ t!(Markup: "``nope``" => Raw("", None, 1, false), Text("nope"), Raw("", None, 1, false));
+ t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, false));
+ t!(Markup[""]: "`````👩‍🚀````noend" => Error(ErrorPosition::End, "expected 5 backticks"));
+ t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, false), Raw("", None, 1, false));
}
#[test]
diff --git a/src/syntax/markup.rs b/src/syntax/markup.rs
index 49b2a519..f43a618a 100644
--- a/src/syntax/markup.rs
+++ b/src/syntax/markup.rs
@@ -1,7 +1,6 @@
use super::{Expr, Ident, NodeKind, RedNode, RedRef, Span, TypedNode};
use crate::node;
use crate::util::EcoString;
-use std::fmt::Write;
node! {
/// The syntactical root capable of representing a full parsed document.
@@ -50,14 +49,7 @@ impl TypedNode for MarkupNode {
NodeKind::Strong => Some(MarkupNode::Strong),
NodeKind::Emph => Some(MarkupNode::Emph),
NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())),
- NodeKind::UnicodeEscape(u) => Some(MarkupNode::Text(match u.character {
- Some(c) => c.into(),
- None => {
- let mut eco = EcoString::with_capacity(u.sequence.len() + 4);
- write!(&mut eco, "\\u{{{}}}", u.sequence).unwrap();
- eco
- }
- })),
+ NodeKind::UnicodeEscape(u) => Some(MarkupNode::Text(u.character.into())),
NodeKind::EnDash => Some(MarkupNode::Text(EcoString::from("\u{2013}"))),
NodeKind::EmDash => Some(MarkupNode::Text(EcoString::from("\u{2014}"))),
NodeKind::NonBreakingSpace => {
diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs
index afa0ab86..9d4beb6c 100644
--- a/src/syntax/mod.rs
+++ b/src/syntax/mod.rs
@@ -121,7 +121,7 @@ pub enum NodeKind {
Text(EcoString),
/// A slash and the letter "u" followed by a hexadecimal unicode entity
/// enclosed in curly braces: `\u{1F5FA}`.
- UnicodeEscape(Rc<UnicodeEscapeToken>),
+ UnicodeEscape(UnicodeEscapeToken),
/// An arbitrary number of backticks followed by inner contents, terminated
/// with the same number of backticks: `` `...` ``.
Raw(Rc<RawToken>),
diff --git a/src/syntax/token.rs b/src/syntax/token.rs
index 5a621495..4f43bb4f 100644
--- a/src/syntax/token.rs
+++ b/src/syntax/token.rs
@@ -33,9 +33,8 @@ pub struct MathToken {
/// A unicode escape sequence token: `\u{1F5FA}`.
#[derive(Debug, Clone, PartialEq)]
+#[repr(transparent)]
pub struct UnicodeEscapeToken {
- /// The escape sequence between the braces.
- pub sequence: EcoString,
/// The resulting unicode character.
- pub character: Option<char>,
+ pub character: char,
}