summaryrefslogtreecommitdiff
path: root/src/parse
diff options
context:
space:
mode:
Diffstat (limited to 'src/parse')
-rw-r--r--src/parse/incremental.rs69
-rw-r--r--src/parse/mod.rs55
-rw-r--r--src/parse/parser.rs54
-rw-r--r--src/parse/tokens.rs16
4 files changed, 145 insertions, 49 deletions
diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs
index 0e2d196c..1ee37a51 100644
--- a/src/parse/incremental.rs
+++ b/src/parse/incremental.rs
@@ -47,6 +47,10 @@ pub enum Precondition {
/// safe left neighbors has to check this invariant. Otherwise, this node is
/// safe.
NotAtStart,
+ /// These nodes could end up somewhere else up the tree if the parse was
+ /// happening from scratch. The parse result has to be checked for such
+ /// nodes. They are safe to add if followed up by other nodes.
+ NotAtEnd,
/// No additional requirements.
None,
}
@@ -88,6 +92,12 @@ impl Reparser<'_> {
let child_mode = green.kind().mode().unwrap_or(TokenMode::Code);
let child_count = green.children().len();
+ // Save the current indent if this is a markup node.
+ let indent = match green.kind() {
+ NodeKind::Markup(n) => *n,
+ _ => 0,
+ };
+
let mut first = None;
let mut at_start = true;
@@ -170,12 +180,29 @@ impl Reparser<'_> {
}
// We now have a child that we can replace and a function to do so.
- let func = last_kind.reparsing_func(child_mode)?;
+ let func = last_kind.reparsing_func(child_mode, indent)?;
let post = last_kind.post();
+ let mut column = if mode == TokenMode::Markup {
+ // In this case, we want to pass the indentation to the function.
+ Scanner::new(self.src).column(children_span.start)
+ } else {
+ 0
+ };
+
+ // If this is a markup node, we want to save its indent instead to pass
+ // the right indent argument.
+ if children_range.len() == 1 {
+ let child = &mut green.children_mut()[children_range.start];
+ if let NodeKind::Markup(n) = child.kind() {
+ column = *n;
+ }
+ }
+
// The span of the to-be-reparsed children in the new source.
let replace_span = children_span.start
- .. children_span.end + self.replace_len - self.replace_range.len();
+ ..
+ children_span.end + self.replace_len - self.replace_range.len();
// For atomic primaries we need to pass in the whole remaining string to
// check whether the parser would eat more stuff illicitly.
@@ -186,7 +213,7 @@ impl Reparser<'_> {
};
// Do the reparsing!
- let (mut newborns, terminated) = func(&self.src[reparse_span], at_start)?;
+ let (mut newborns, terminated) = func(&self.src[reparse_span], at_start, column)?;
// Make sure that atomic primaries ate only what they were supposed to.
if post == Postcondition::AtomicPrimary {
@@ -311,6 +338,14 @@ fn validate(
at_start = child.kind().is_at_start(at_start);
}
+ // Verify that the last of the newborns is not `NotAtEnd`.
+ if newborns
+ .last()
+ .map_or(false, |child| child.kind().pre() == Precondition::NotAtEnd)
+ {
+ return false;
+ }
+
// We have to check whether the last non-trivia newborn is `AtStart` and
// verify the indent of its right neighbors in order to make sure its
// indentation requirements are fulfilled.
@@ -351,21 +386,22 @@ impl NodeKind {
fn reparsing_func(
&self,
parent_mode: TokenMode,
- ) -> Option<fn(&str, bool) -> Option<(Vec<Green>, bool)>> {
+ indent: usize,
+ ) -> Option<fn(&str, bool, usize) -> Option<(Vec<Green>, bool)>> {
let mode = self.mode().unwrap_or(parent_mode);
match self.post() {
Postcondition::Unsafe | Postcondition::UnsafeLayer => None,
Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic),
Postcondition::AtomicPrimary => Some(parse_atomic_markup),
Postcondition::SameKind(x) if x == None || x == Some(mode) => match self {
+ NodeKind::Markup(_) => Some(parse_markup),
NodeKind::Template => Some(parse_template),
NodeKind::Block => Some(parse_block),
NodeKind::LineComment | NodeKind::BlockComment => Some(parse_comment),
_ => None,
},
_ => match mode {
- TokenMode::Markup if self == &Self::Markup => Some(parse_markup),
- TokenMode::Markup => Some(parse_markup_elements),
+ TokenMode::Markup if indent == 0 => Some(parse_markup_elements),
_ => return None,
},
}
@@ -452,8 +488,9 @@ impl NodeKind {
Postcondition::UnsafeLayer
}
- // Only markup is expected at the points where it does occur.
- Self::Markup => Postcondition::SameKind(None),
+ // Only markup is expected at the points where it does occur. The
+ // indentation must be preserved as well, also for the children.
+ Self::Markup(_) => Postcondition::SameKind(None),
// These can appear everywhere and must not change to other stuff
// because that could change the outer expression.
@@ -493,6 +530,10 @@ impl NodeKind {
| Self::ImportExpr
| Self::IncludeExpr => Postcondition::AtomicPrimary,
+ // This element always has to remain in the same column so better
+ // reparse the whole parent.
+ Self::Raw(_) => Postcondition::Unsafe,
+
// These are all replaceable by other tokens.
Self::Parbreak
| Self::Linebreak
@@ -507,7 +548,6 @@ impl NodeKind {
| Self::Heading
| Self::Enum
| Self::List
- | Self::Raw(_)
| Self::Math(_) => Postcondition::Safe,
}
}
@@ -517,6 +557,7 @@ impl NodeKind {
match self {
Self::Heading | Self::Enum | Self::List => Precondition::AtStart,
Self::TextInLine(_) => Precondition::NotAtStart,
+ Self::Error(_, _) => Precondition::NotAtEnd,
_ => Precondition::None,
}
}
@@ -557,12 +598,12 @@ mod tests {
test("a d e", 1 .. 3, " b c d", 0 .. 8);
test("a #f() e", 1 .. 6, " b c d", 0 .. 8);
test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9);
- test("= A heading", 3 .. 3, "n evocative", 2 .. 15);
+ test("= A heading", 3 .. 3, "n evocative", 2 .. 22);
test("your thing", 5 .. 5, "a", 4 .. 11);
test("a your thing a", 6 .. 7, "a", 2 .. 12);
test("{call(); abc}", 7 .. 7, "[]", 0 .. 15);
test("#call() abc", 7 .. 7, "[]", 0 .. 10);
- test("hi[\n- item\n- item 2\n - item 3]", 11 .. 11, " ", 2 .. 35);
+ test("hi[\n- item\n- item 2\n - item 3]", 11 .. 11, " ", 3 .. 34);
test("hi\n- item\nno item\n - item 3", 10 .. 10, "- ", 0 .. 32);
test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 16 .. 20);
test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33 .. 42);
@@ -571,7 +612,7 @@ mod tests {
test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 10 .. 32);
test("hello {x}", 6 .. 9, "#f()", 5 .. 10);
test("this is -- in my opinion -- spectacular", 8 .. 10, "---", 7 .. 12);
- test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22);
+ test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 37);
test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15);
test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34);
@@ -596,7 +637,7 @@ mod tests {
test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13);
test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26);
test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14);
- test("a b c", 1 .. 1, "{[}", 0 .. 5);
+ test("a b c", 1 .. 1, "{[}", 0 .. 8);
// Test unclosed things.
test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6);
@@ -610,6 +651,6 @@ mod tests {
// Test raw tokens.
test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20);
- test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18);
+ test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18);
}
}
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 2c5afb6b..f4826730 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -16,6 +16,7 @@ use std::rc::Rc;
use crate::syntax::ast::{Associativity, BinOp, UnOp};
use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind};
+use crate::util::EcoString;
/// Parse a source file.
pub fn parse(src: &str) -> Rc<GreenNode> {
@@ -28,23 +29,27 @@ pub fn parse(src: &str) -> Rc<GreenNode> {
}
/// Parse an atomic primary. Returns `Some` if all of the input was consumed.
-pub fn parse_atomic(src: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+pub fn parse_atomic(src: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
let mut p = Parser::new(src, TokenMode::Code);
primary(&mut p, true).ok()?;
p.eject_partial()
}
/// Parse an atomic primary. Returns `Some` if all of the input was consumed.
-pub fn parse_atomic_markup(src: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+pub fn parse_atomic_markup(src: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
let mut p = Parser::new(src, TokenMode::Markup);
markup_expr(&mut p);
p.eject_partial()
}
/// Parse some markup. Returns `Some` if all of the input was consumed.
-pub fn parse_markup(src: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+pub fn parse_markup(src: &str, _: bool, column: usize) -> Option<(Vec<Green>, bool)> {
let mut p = Parser::new(src, TokenMode::Markup);
- markup(&mut p);
+ if column == 0 {
+ markup(&mut p);
+ } else {
+ markup_indented(&mut p, column);
+ }
p.eject()
}
@@ -53,8 +58,10 @@ pub fn parse_markup(src: &str, _: bool) -> Option<(Vec<Green>, bool)> {
pub fn parse_markup_elements(
src: &str,
mut at_start: bool,
+ column: usize,
) -> Option<(Vec<Green>, bool)> {
let mut p = Parser::new(src, TokenMode::Markup);
+ p.offset(column);
while !p.eof() {
markup_node(&mut p, &mut at_start);
}
@@ -62,7 +69,7 @@ pub fn parse_markup_elements(
}
/// Parse a template literal. Returns `Some` if all of the input was consumed.
-pub fn parse_template(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+pub fn parse_template(source: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
let mut p = Parser::new(source, TokenMode::Code);
if !p.at(&NodeKind::LeftBracket) {
return None;
@@ -73,7 +80,7 @@ pub fn parse_template(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
}
/// Parse a code block. Returns `Some` if all of the input was consumed.
-pub fn parse_block(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+pub fn parse_block(source: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
let mut p = Parser::new(source, TokenMode::Code);
if !p.at(&NodeKind::LeftBrace) {
return None;
@@ -84,7 +91,7 @@ pub fn parse_block(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
}
/// Parse a comment. Returns `Some` if all of the input was consumed.
-pub fn parse_comment(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
+pub fn parse_comment(source: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
let mut p = Parser::new(source, TokenMode::Code);
comment(&mut p).ok()?;
p.eject()
@@ -92,7 +99,7 @@ pub fn parse_comment(source: &str, _: bool) -> Option<(Vec<Green>, bool)> {
/// Parse markup.
fn markup(p: &mut Parser) {
- markup_while(p, true, &mut |_| true)
+ markup_while(p, true, 0, &mut |_| true)
}
/// Parse markup that stays right of the given column.
@@ -103,8 +110,8 @@ fn markup_indented(p: &mut Parser, column: usize) {
_ => false,
});
- markup_while(p, false, &mut |p| match p.peek() {
- Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column,
+ markup_while(p, false, column, &mut |p| match p.peek() {
+ Some(NodeKind::Space(n)) if *n >= 1 => p.clean_column(p.current_end()) >= column,
_ => true,
})
}
@@ -113,11 +120,11 @@ fn markup_indented(p: &mut Parser, column: usize) {
///
/// If `at_start` is true, things like headings that may only appear at the
/// beginning of a line or template are allowed.
-fn markup_while<F>(p: &mut Parser, mut at_start: bool, f: &mut F)
+fn markup_while<F>(p: &mut Parser, mut at_start: bool, column: usize, f: &mut F)
where
F: FnMut(&mut Parser) -> bool,
{
- p.perform(NodeKind::Markup, |p| {
+ p.perform(NodeKind::Markup(column), |p| {
while !p.eof() && f(p) {
markup_node(p, &mut at_start);
}
@@ -205,20 +212,32 @@ fn heading(p: &mut Parser) {
/// Parse a single list item.
fn list_node(p: &mut Parser) {
- p.perform(NodeKind::List, |p| {
- p.eat_assert(&NodeKind::Minus);
+ let marker = p.marker();
+ let src: EcoString = p.peek_src().into();
+ p.eat_assert(&NodeKind::Minus);
+
+ if p.peek().map_or(true, |kind| kind.is_whitespace()) {
let column = p.column(p.prev_end());
markup_indented(p, column);
- });
+ marker.end(p, NodeKind::List);
+ } else {
+ marker.convert(p, NodeKind::TextInLine(src));
+ }
}
/// Parse a single enum item.
fn enum_node(p: &mut Parser) {
- p.perform(NodeKind::Enum, |p| {
- p.eat();
+ let marker = p.marker();
+ let src: EcoString = p.peek_src().into();
+ p.eat();
+
+ if p.peek().map_or(true, |kind| kind.is_whitespace()) {
let column = p.column(p.prev_end());
markup_indented(p, column);
- });
+ marker.end(p, NodeKind::Enum);
+ } else {
+ marker.convert(p, NodeKind::TextInLine(src));
+ }
}
/// Parse an expression within markup mode.
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index ade9b5df..b31f69d3 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -21,8 +21,12 @@ pub struct Parser<'s> {
groups: Vec<GroupEntry>,
/// The children of the currently built node.
children: Vec<Green>,
- /// Whether the last group was terminated.
- last_terminated: bool,
+ /// Is `Some` if there is an unterminated group at the last position where
+ /// groups were terminated.
+ last_unterminated: Option<usize>,
+ /// Offset the indentation. This can be used if the parser is processing a
+ /// subslice of the source and there was leading indent.
+ column_offset: usize,
}
impl<'s> Parser<'s> {
@@ -38,7 +42,8 @@ impl<'s> Parser<'s> {
current_start: 0,
groups: vec![],
children: vec![],
- last_terminated: true,
+ last_unterminated: None,
+ column_offset: 0,
}
}
@@ -102,6 +107,11 @@ impl<'s> Parser<'s> {
.then(|| (self.children, self.tokens.was_terminated()))
}
+ /// Set an indentation offset.
+ pub fn offset(&mut self, columns: usize) {
+ self.column_offset = columns;
+ }
+
/// Whether the end of the source string or group is reached.
pub fn eof(&self) -> bool {
self.eof
@@ -206,6 +216,12 @@ impl<'s> Parser<'s> {
/// Determine the column index for the given byte index.
pub fn column(&self, index: usize) -> usize {
+ self.tokens.scanner().column(index) + self.column_offset
+ }
+
+ /// Determine the column index for the given byte index while ignoring the
+ /// offset.
+ pub fn clean_column(&self, index: usize) -> usize {
self.tokens.scanner().column(index)
}
@@ -244,7 +260,11 @@ impl<'s> Parser<'s> {
let group = self.groups.pop().expect("no started group");
self.tokens.set_mode(group.prev_mode);
self.repeek();
- self.last_terminated = true;
+ if let Some(n) = self.last_unterminated {
+ if n != self.prev_end() {
+ self.last_unterminated = None;
+ }
+ }
let mut rescan = self.tokens.mode() != group_mode;
@@ -262,8 +282,14 @@ impl<'s> Parser<'s> {
self.eat();
rescan = false;
} else if required {
+ // FIXME The error has to be inserted before any space rolls
+ // around because the rescan will set the cursor back in front
+ // of the space and reconsume it. Supressing the rescan is not
+ // an option since additional rescans (e.g. for statements) can
+ // be triggered directly afterwards, without processing any
+ // other token.
self.push_error(format_eco!("expected {}", end));
- self.last_terminated = false;
+ self.last_unterminated = Some(self.prev_end());
}
}
@@ -283,13 +309,21 @@ impl<'s> Parser<'s> {
/// Check if the group processing was successfully terminated.
pub fn group_success(&self) -> bool {
- self.last_terminated && self.groups.is_empty()
+ self.last_unterminated.is_none() && self.groups.is_empty()
}
/// Low-level bump that consumes exactly one token without special trivia
/// handling.
fn bump(&mut self) {
let kind = self.current.take().unwrap();
+ if match kind {
+ NodeKind::Space(n) if n > 0 => true,
+ NodeKind::Parbreak => true,
+ _ => false,
+ } {
+ self.column_offset = 0;
+ }
+
let len = self.tokens.index() - self.current_start;
self.children.push(GreenData::new(kind, len).into());
self.current_start = self.tokens.index();
@@ -346,6 +380,13 @@ impl Parser<'_> {
/// Push an error into the children list.
pub fn push_error(&mut self, msg: impl Into<EcoString>) {
let error = NodeKind::Error(ErrorPos::Full, msg.into());
+ for i in (0 .. self.children.len()).rev() {
+ if Self::is_trivia_ext(self.children[i].kind(), false) {
+ self.children.remove(i);
+ } else {
+ break;
+ }
+ }
self.children.push(GreenData::new(error, 0).into());
}
@@ -445,6 +486,7 @@ impl Marker {
}
/// A logical group of tokens, e.g. `[...]`.
+#[derive(Debug)]
struct GroupEntry {
/// The kind of group this is. This decides which tokens will end the group.
/// For example, a [`Group::Paren`] will be ended by
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 836e8cf1..3a0ad1ad 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -293,10 +293,8 @@ impl<'s> Tokens<'s> {
} else {
NodeKind::EnDash
}
- } else if self.s.check_or(true, char::is_whitespace) {
- NodeKind::Minus
} else {
- NodeKind::Text('-'.into())
+ NodeKind::Minus
}
}
@@ -312,11 +310,7 @@ impl<'s> Tokens<'s> {
None
};
- if self.s.check_or(true, char::is_whitespace) {
- NodeKind::EnumNumbering(number)
- } else {
- NodeKind::Text(self.s.eaten_from(start).into())
- }
+ NodeKind::EnumNumbering(number)
}
fn raw(&mut self) -> NodeKind {
@@ -742,12 +736,12 @@ mod tests {
fn test_tokenize_text() {
// Test basic text.
t!(Markup[" /"]: "hello" => Text("hello"));
- t!(Markup[" /"]: "hello-world" => Text("hello"), Text("-"), Text("world"));
+ t!(Markup[" /"]: "hello-world" => Text("hello"), Minus, Text("world"));
// Test code symbols in text.
t!(Markup[" /"]: "a():\"b" => Text("a():\"b"));
t!(Markup[" /"]: ";:,|/+" => Text(";:,|"), Text("/+"));
- t!(Markup[" /"]: "=-a" => Text("="), Text("-"), Text("a"));
+ t!(Markup[" /"]: "=-a" => Text("="), Minus, Text("a"));
t!(Markup[" "]: "#123" => Text("#"), Text("123"));
// Test text ends.
@@ -804,7 +798,7 @@ mod tests {
t!(Markup["a1/"]: "- " => Minus, Space(0));
t!(Markup[" "]: "." => EnumNumbering(None));
t!(Markup[" "]: "1." => EnumNumbering(Some(1)));
- t!(Markup[" "]: "1.a" => Text("1."), Text("a"));
+ t!(Markup[" "]: "1.a" => EnumNumbering(Some(1)), Text("a"));
t!(Markup[" /"]: "a1." => Text("a1."));
}