summaryrefslogtreecommitdiff
path: root/src/parse
diff options
context:
space:
mode:
Diffstat (limited to 'src/parse')
-rw-r--r--src/parse/incremental.rs151
-rw-r--r--src/parse/mod.rs72
-rw-r--r--src/parse/parser.rs20
-rw-r--r--src/parse/scanner.rs34
-rw-r--r--src/parse/tokens.rs51
5 files changed, 170 insertions, 158 deletions
diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs
index d3edff6e..4736845f 100644
--- a/src/parse/incremental.rs
+++ b/src/parse/incremental.rs
@@ -4,19 +4,10 @@ use std::sync::Arc;
use crate::syntax::{Green, GreenNode, NodeKind};
use super::{
- is_newline, parse, parse_block, parse_markup_elements, parse_template, TokenMode,
+ is_newline, parse, reparse_block, reparse_markup_elements, reparse_template,
+ TokenMode,
};
-type ReparseFunc = fn(
- &str,
- &str,
- usize,
- isize,
- &[Green],
- bool,
- usize,
-) -> Option<(Vec<Green>, bool, usize)>;
-
/// Allows partial refreshs of the [`Green`] node tree.
///
/// This struct holds a description of a change. Its methods can be used to try
@@ -55,16 +46,12 @@ impl Reparser<'_> {
let child_mode = green.kind().mode().unwrap_or(TokenMode::Code);
let original_count = green.children().len();
- // Save the current indent if this is a markup node.
- let indent = match green.kind() {
- NodeKind::Markup(n) => *n,
- _ => 0,
- };
-
let mut search = SearchState::default();
let mut ahead_nontrivia = None;
+
// Whether the first node that should be replaced is at start.
let mut at_start = true;
+ // Whether the last searched child is the outermost child.
let mut child_outermost = false;
// Find the the first child in the range of children to reparse.
@@ -83,18 +70,17 @@ impl Reparser<'_> {
search = if child_span.end == self.replace_range.end
&& child_mode == TokenMode::Markup
{
- SearchState::RequireNonWS(pos)
+ SearchState::RequireNonTrivia(pos)
} else {
SearchState::Contained(pos)
};
} else if child_span.contains(&self.replace_range.start) {
search = SearchState::Inside(pos);
} else {
- if (self.replace_range.len() != 0
- || self.replace_range.end != child_span.end
- || ahead_nontrivia.is_none())
- && (!child.kind().is_space()
- && child.kind() != &NodeKind::Semicolon)
+ if (!child.kind().is_space()
+ && child.kind() != &NodeKind::Semicolon)
+ && (ahead_nontrivia.is_none()
+ || self.replace_range.start > child_span.end)
{
ahead_nontrivia = Some((pos, at_start));
}
@@ -103,12 +89,12 @@ impl Reparser<'_> {
}
SearchState::Inside(start) => {
if child_span.end == self.replace_range.end {
- search = SearchState::RequireNonWS(start);
+ search = SearchState::RequireNonTrivia(start);
} else if child_span.end > self.replace_range.end {
search = SearchState::SpanFound(start, pos);
}
}
- SearchState::RequireNonWS(start) => {
+ SearchState::RequireNonTrivia(start) => {
if !child.kind().is_trivia() {
search = SearchState::SpanFound(start, pos);
}
@@ -118,11 +104,21 @@ impl Reparser<'_> {
offset += child.len();
child_outermost = outermost && i + 1 == original_count;
- if search.end().is_some() {
+
+ if search.done().is_some() {
break;
}
}
+ // If we were looking for a non-whitespace element and hit the end of
+ // the file here, we instead use EOF as the end of the span.
+ if let SearchState::RequireNonTrivia(start) = search {
+ search = SearchState::SpanFound(start, GreenPos {
+ idx: green.children().len() - 1,
+ offset: offset - green.children().last().unwrap().len(),
+ })
+ }
+
if let SearchState::Contained(pos) = search {
let child = &mut green.children_mut()[pos.idx];
let prev_len = child.len();
@@ -139,20 +135,20 @@ impl Reparser<'_> {
}
let superseded_span = pos.offset .. pos.offset + prev_len;
- let func: Option<ReparseFunc> = match child.kind() {
- NodeKind::Template => Some(parse_template),
- NodeKind::Block => Some(parse_block),
+ let func: Option<ReparseMode> = match child.kind() {
+ NodeKind::Template => Some(ReparseMode::Template),
+ NodeKind::Block => Some(ReparseMode::Block),
_ => None,
};
+ // Return if the element was reparsable on its own, otherwise try to
+ // treat it as a markup element.
if let Some(func) = func {
if let Some(result) = self.replace(
green,
func,
pos.idx .. pos.idx + 1,
superseded_span,
- at_start,
- indent,
outermost,
) {
return Some(result);
@@ -160,11 +156,13 @@ impl Reparser<'_> {
}
}
- if !matches!(green.kind(), NodeKind::Markup(_)) {
- return None;
- }
+ // Save the current indent if this is a markup node and stop otherwise.
+ let indent = match green.kind() {
+ NodeKind::Markup(n) => *n,
+ _ => return None,
+ };
- let (mut start, end) = search.end()?;
+ let (mut start, end) = search.done()?;
if let Some((ahead, ahead_at_start)) = ahead_nontrivia {
let ahead_kind = green.children()[ahead.idx].kind();
@@ -179,13 +177,12 @@ impl Reparser<'_> {
let superseded_span =
start.offset .. end.offset + green.children()[end.idx].len();
+
self.replace(
green,
- parse_markup_elements,
+ ReparseMode::MarkupElements(at_start, indent),
start.idx .. end.idx + 1,
superseded_span,
- at_start,
- indent,
outermost,
)
}
@@ -193,19 +190,17 @@ impl Reparser<'_> {
fn replace(
&self,
green: &mut GreenNode,
- func: ReparseFunc,
+ mode: ReparseMode,
superseded_idx: Range<usize>,
superseded_span: Range<usize>,
- at_start: bool,
- indent: usize,
outermost: bool,
) -> Option<Range<usize>> {
+ let superseded_start = superseded_idx.start;
+
let differential: isize =
self.replace_len as isize - self.replace_range.len() as isize;
- let newborn_span = superseded_span.start
- ..
- (superseded_span.end as isize + differential) as usize;
- let superseded_start = superseded_idx.start;
+ let newborn_end = (superseded_span.end as isize + differential) as usize;
+ let newborn_span = superseded_span.start .. newborn_end;
let mut prefix = "";
for (i, c) in self.src[.. newborn_span.start].char_indices().rev() {
@@ -215,15 +210,27 @@ impl Reparser<'_> {
prefix = &self.src[i .. newborn_span.start];
}
- let (newborns, terminated, amount) = func(
- &prefix,
- &self.src[newborn_span.start ..],
- newborn_span.len(),
- differential,
- &green.children()[superseded_start ..],
- at_start,
- indent,
- )?;
+ let (newborns, terminated, amount) = match mode {
+ ReparseMode::Block => reparse_block(
+ &prefix,
+ &self.src[newborn_span.start ..],
+ newborn_span.len(),
+ ),
+ ReparseMode::Template => reparse_template(
+ &prefix,
+ &self.src[newborn_span.start ..],
+ newborn_span.len(),
+ ),
+ ReparseMode::MarkupElements(at_start, indent) => reparse_markup_elements(
+ &prefix,
+ &self.src[newborn_span.start ..],
+ newborn_span.len(),
+ differential,
+ &green.children()[superseded_start ..],
+ at_start,
+ indent,
+ ),
+ }?;
// Do not accept unclosed nodes if the old node wasn't at the right edge
// of the tree.
@@ -236,6 +243,8 @@ impl Reparser<'_> {
}
}
+/// The position of a green node in terms of its string offset and index within
+/// the parent node.
#[derive(Clone, Copy, Debug, PartialEq)]
struct GreenPos {
idx: usize,
@@ -256,7 +265,7 @@ enum SearchState {
Inside(GreenPos),
/// The search has found the end of the modified nodes but the change
/// touched its boundries so another non-trivia node is needed.
- RequireNonWS(GreenPos),
+ RequireNonTrivia(GreenPos),
/// The search has concluded by finding a start and an end index for nodes
/// with a pending reparse.
SpanFound(GreenPos, GreenPos),
@@ -269,17 +278,29 @@ impl Default for SearchState {
}
impl SearchState {
- fn end(&self) -> Option<(GreenPos, GreenPos)> {
+ fn done(self) -> Option<(GreenPos, GreenPos)> {
match self {
Self::NoneFound => None,
- Self::Contained(s) => Some((*s, *s)),
+ Self::Contained(s) => Some((s, s)),
Self::Inside(_) => None,
- Self::RequireNonWS(_) => None,
- Self::SpanFound(s, e) => Some((*s, *e)),
+ Self::RequireNonTrivia(_) => None,
+ Self::SpanFound(s, e) => Some((s, e)),
}
}
}
+/// Which reparse function to choose for a span of elements.
+#[derive(Clone, Copy, Debug, PartialEq)]
+enum ReparseMode {
+ /// Reparse a code block with its braces.
+ Block,
+ /// Reparse a template, including its square brackets.
+ Template,
+ /// Reparse elements of the markup. The variant carries whether the node is
+ /// `at_start` and the minimum indent of the containing markup node.
+ MarkupElements(bool, usize),
+}
+
impl NodeKind {
/// Whether this node has to appear at the start of a line.
pub fn only_at_start(&self) -> bool {
@@ -330,7 +351,7 @@ mod tests {
test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 33 .. 40);
test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 0 .. 33);
test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 0 .. 33);
- test("hello~~{x}", 7 .. 10, "#f()", 0 .. 11);
+ test("hello~~{x}", 7 .. 10, "#f()", 5 .. 11);
test("this~is -- in my opinion -- spectacular", 8 .. 10, "---", 5 .. 25);
test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22);
test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17);
@@ -344,8 +365,8 @@ mod tests {
#[test]
fn test_parse_incremental_whitespace_invariants() {
- test("hello \\ world", 7 .. 8, "a ", 6 .. 14);
- test("hello \\ world", 7 .. 8, " a", 6 .. 14);
+ test("hello \\ world", 7 .. 8, "a ", 0 .. 14);
+ test("hello \\ world", 7 .. 8, " a", 0 .. 14);
test("x = y", 1 .. 1, " + y", 0 .. 6);
test("x = y", 1 .. 1, " + y\n", 0 .. 7);
test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21);
@@ -353,13 +374,13 @@ mod tests {
test("#let x = (1, 2 + ;~ Five\r\n\r", 20 .. 23, "2.", 18 .. 23);
test("hey #myfriend", 4 .. 4, "\\", 0 .. 14);
test("hey #myfriend", 4 .. 4, "\\", 3 .. 6);
- test("= foo\nbar\n - a\n - b", 6 .. 9, "", 0..11);
- test("= foo\n bar\n baz", 6..8, "", 0..15);
+ test("= foo\nbar\n - a\n - b", 6 .. 9, "", 0 .. 11);
+ test("= foo\n bar\n baz", 6 .. 8, "", 0 .. 15);
}
#[test]
fn test_parse_incremental_type_invariants() {
- test("a #for x in array {x}", 18 .. 21, "[#x]", 0 .. 22);
+ test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22);
test("a #let x = 1 {5}", 3 .. 6, "if", 2 .. 11);
test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16);
test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 13);
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index c08c5d6f..11ce872f 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -30,7 +30,7 @@ pub fn parse(src: &str) -> Arc<GreenNode> {
/// Parse some markup without the topmost node. Returns `Some` if all of the
/// input was consumed.
-pub fn parse_markup_elements(
+pub fn reparse_markup_elements(
prefix: &str,
src: &str,
end_pos: usize,
@@ -43,11 +43,11 @@ pub fn parse_markup_elements(
let mut node: Option<&Green> = None;
let mut iter = reference.iter();
- let mut offset = 0;
+ let mut offset = differential;
let mut replaced = 0;
let mut stopped = false;
- while !p.eof() {
+ 'outer: while !p.eof() {
if let Some(NodeKind::Space(1 ..)) = p.peek() {
if p.column(p.current_end()) < column {
return None;
@@ -56,44 +56,36 @@ pub fn parse_markup_elements(
markup_node(&mut p, &mut at_start);
- if p.prev_end() >= end_pos {
- let recent = p.children.last().unwrap();
- let recent_start = p.prev_end() - recent.len();
-
- while offset <= recent_start {
- if let Some(node) = node {
- // The nodes are equal, at the same position and have the
- // same content. The parsing trees have converged again, so
- // the reparse may stop here.
- if (offset as isize + differential) as usize == recent_start
- && node == recent
- {
- replaced -= 1;
- stopped = true;
- break;
- }
- }
+ if p.prev_end() < end_pos {
+ continue;
+ }
- let result = iter.next();
- if let Some(node) = node {
- offset += node.len();
- }
- node = result;
- if node.is_none() {
- break;
- } else {
- replaced += 1;
+ let recent = p.children.last().unwrap();
+ let recent_start = p.prev_end() - recent.len();
+
+ while offset <= recent_start as isize {
+ if let Some(node) = node {
+ // The nodes are equal, at the same position and have the
+ // same content. The parsing trees have converged again, so
+ // the reparse may stop here.
+ if offset == recent_start as isize && node == recent {
+ replaced -= 1;
+ stopped = true;
+ break 'outer;
}
}
- if stopped {
+ if let Some(node) = node {
+ offset += node.len() as isize;
+ }
+
+ node = iter.next();
+ if node.is_none() {
break;
}
- }
- }
- if p.prev_end() < end_pos {
- return None;
+ replaced += 1;
+ }
}
if p.eof() && !stopped {
@@ -109,14 +101,10 @@ pub fn parse_markup_elements(
}
/// Parse a template literal. Returns `Some` if all of the input was consumed.
-pub fn parse_template(
+pub fn reparse_template(
prefix: &str,
src: &str,
end_pos: usize,
- _: isize,
- _: &[Green],
- _: bool,
- _: usize,
) -> Option<(Vec<Green>, bool, usize)> {
let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
if !p.at(&NodeKind::LeftBracket) {
@@ -135,14 +123,10 @@ pub fn parse_template(
}
/// Parse a code block. Returns `Some` if all of the input was consumed.
-pub fn parse_block(
+pub fn reparse_block(
prefix: &str,
src: &str,
end_pos: usize,
- _: isize,
- _: &[Green],
- _: bool,
- _: usize,
) -> Option<(Vec<Green>, bool, usize)> {
let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
if !p.at(&NodeKind::LeftBrace) {
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 8588e586..123871a5 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -2,7 +2,7 @@ use core::slice::SliceIndex;
use std::fmt::{self, Display, Formatter};
use std::mem;
-use super::{Scanner, TokenMode, Tokens};
+use super::{TokenMode, Tokens};
use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind};
/// A convenient token-based parser.
@@ -30,11 +30,14 @@ pub struct Parser<'s> {
impl<'s> Parser<'s> {
/// Create a new parser for the source string.
pub fn new(src: &'s str, mode: TokenMode) -> Self {
- Self::with_offset(src, mode, 0)
+ Self::with_prefix("", src, mode)
}
- fn with_offset(src: &'s str, mode: TokenMode, offset: usize) -> Self {
- let mut tokens = Tokens::new(src, mode, offset);
+ /// Create a new parser for the source string that is prefixed by some text
+ /// that does not need to be parsed but taken into account for column
+ /// calculation.
+ pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self {
+ let mut tokens = Tokens::with_prefix(prefix, src, mode);
let current = tokens.next();
Self {
tokens,
@@ -49,13 +52,6 @@ impl<'s> Parser<'s> {
}
}
- /// Create a new parser for the source string that is prefixed by some text
- /// that does not need to be parsed but taken into account for column
- /// calculation.
- pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self {
- Self::with_offset(src, mode, Scanner::new(prefix).column(prefix.len()))
- }
-
/// End the parsing process and return the last child.
pub fn finish(self) -> Vec<Green> {
self.children
@@ -218,7 +214,7 @@ impl<'s> Parser<'s> {
/// Determine the column index for the given byte index.
pub fn column(&self, index: usize) -> usize {
- self.tokens.scanner().column(index)
+ self.tokens.column(index)
}
/// Continue parsing in a group.
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
index 15060c7b..e4cf56e9 100644
--- a/src/parse/scanner.rs
+++ b/src/parse/scanner.rs
@@ -10,21 +10,13 @@ pub struct Scanner<'s> {
/// The index at which the peekable character starts. Must be in bounds and
/// at a codepoint boundary to guarantee safety.
index: usize,
- /// Offsets the indentation on the first line of the source.
- column_offset: usize,
}
impl<'s> Scanner<'s> {
/// Create a new char scanner.
#[inline]
pub fn new(src: &'s str) -> Self {
- Self { src, index: 0, column_offset: 0 }
- }
-
- /// Create a new char scanner with an offset for the first line indent.
- #[inline]
- pub fn with_indent_offset(src: &'s str, column_offset: usize) -> Self {
- Self { src, index: 0, column_offset }
+ Self { src, index: 0 }
}
/// Whether the end of the string is reached.
@@ -177,30 +169,6 @@ impl<'s> Scanner<'s> {
// optimized away in some cases.
self.src.get(start .. self.index).unwrap_or_default()
}
-
- /// The column index of a given index in the source string.
- #[inline]
- pub fn column(&self, index: usize) -> usize {
- let mut apply_offset = false;
- let res = self.src[.. index]
- .char_indices()
- .rev()
- .take_while(|&(_, c)| !is_newline(c))
- .inspect(|&(i, _)| {
- if i == 0 {
- apply_offset = true
- }
- })
- .count();
-
- // The loop is never executed if the slice is empty, but we are of
- // course still at the start of the first line.
- if self.src[.. index].len() == 0 {
- apply_offset = true;
- }
-
- if apply_offset { res + self.column_offset } else { res }
- }
}
/// Whether this character denotes a newline.
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 4a13694a..91bbf9e8 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -11,9 +11,14 @@ use crate::util::EcoString;
/// An iterator over the tokens of a string of source code.
pub struct Tokens<'s> {
+ /// The underlying scanner.
s: Scanner<'s>,
+ /// The mode the scanner is in. This determines what tokens it recognizes.
mode: TokenMode,
+ /// Whether the last token has been terminated.
terminated: bool,
+ /// Offsets the indentation on the first line of the source.
+ column_offset: usize,
}
/// What kind of tokens to emit.
@@ -28,11 +33,19 @@ pub enum TokenMode {
impl<'s> Tokens<'s> {
/// Create a new token iterator with the given mode.
#[inline]
- pub fn new(src: &'s str, mode: TokenMode, offset: usize) -> Self {
+ pub fn new(src: &'s str, mode: TokenMode) -> Self {
+ Self::with_prefix("", src, mode)
+ }
+
+ /// Create a new token iterator with the given mode and a prefix to offset
+ /// column calculations.
+ #[inline]
+ pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self {
Self {
- s: Scanner::with_indent_offset(src, offset),
+ s: Scanner::new(src),
mode,
terminated: true,
+ column_offset: column(prefix, prefix.len(), 0),
}
}
@@ -74,6 +87,12 @@ impl<'s> Tokens<'s> {
pub fn terminated(&self) -> bool {
self.terminated
}
+
+ /// The column index of a given index in the source string.
+ #[inline]
+ pub fn column(&self, index: usize) -> usize {
+ column(self.s.src(), index, self.column_offset)
+ }
}
impl<'s> Iterator for Tokens<'s> {
@@ -321,7 +340,7 @@ impl<'s> Tokens<'s> {
}
fn raw(&mut self) -> NodeKind {
- let column = self.s.column(self.s.index() - 1);
+ let column = self.column(self.s.index() - 1);
let mut backticks = 1;
while self.s.eat_if('`') {
@@ -574,6 +593,30 @@ fn keyword(ident: &str) -> Option<NodeKind> {
})
}
+/// The column index of a given index in the source string, given a column offset for the first line.
+#[inline]
+fn column(string: &str, index: usize, offset: usize) -> usize {
+ let mut apply_offset = false;
+ let res = string[.. index]
+ .char_indices()
+ .rev()
+ .take_while(|&(_, c)| !is_newline(c))
+ .inspect(|&(i, _)| {
+ if i == 0 {
+ apply_offset = true
+ }
+ })
+ .count();
+
+ // The loop is never executed if the slice is empty, but we are of
+ // course still at the start of the first line.
+ if index == 0 {
+ apply_offset = true;
+ }
+
+ if apply_offset { res + offset } else { res }
+}
+
#[cfg(test)]
#[allow(non_snake_case)]
mod tests {
@@ -689,7 +732,7 @@ mod tests {
}};
(@$mode:ident: $src:expr => $($token:expr),*) => {{
let src = $src;
- let found = Tokens::new(&src, $mode, 0).collect::<Vec<_>>();
+ let found = Tokens::new(&src, $mode).collect::<Vec<_>>();
let expected = vec![$($token.clone()),*];
check(&src, found, expected);
}};