Code Review: Your parsers were so preoccupied with whether they could

author: Martin Haug <mhaug@live.de> 2022-01-03 23:18:21 +0100
committer: Martin Haug <mhaug@live.de> 2022-01-04 00:21:33 +0100
commit: c994cfa7d814e3909682b19322867ed5c676c453 (patch)
tree: 03349230f74786c7128876889c07a31a4932f108 /src/parse
parent: 98c96ba1cb8a46e327de313118e4ce1a84795ae9 (diff)
5 files changed, 291 insertions, 243 deletions
diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs
index 5cb016d2..4c82f158 100644
--- a/src/parse/incremental.rs
+++ b/src/parse/incremental.rs
@@ -4,8 +4,8 @@ use std::rc::Rc;
 use crate::syntax::{Green, GreenNode, NodeKind};
 
 use super::{
-    parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup,
-    parse_markup_elements, parse_template, Scanner, TokenMode,
+    is_newline, parse, parse_atomic, parse_atomic_markup, parse_block, parse_comment,
+    parse_markup, parse_markup_elements, parse_template, Scanner, TokenMode,
 };
 
 /// The conditions that a node has to fulfill in order to be replaced.
@@ -13,21 +13,21 @@ use super::{
 /// This can dictate if a node can be replaced at all and if yes, what can take
 /// its place.
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum Postcondition {
+pub enum SuccessionRule {
     /// Changing this node can never have an influence on the other nodes.
     Safe,
     /// This node has to be replaced with a single token of the same kind.
     SameKind(Option<TokenMode>),
-    /// Changing this node into a single atomic expression is allowed if it
-    /// appears in code mode, otherwise it is safe.
+    /// In code mode, this node can only be changed into a single atomic
+    /// expression, otherwise it is safe.
     AtomicPrimary,
-    /// Changing an unsafe layer node changes what the parents or the
-    /// surrounding nodes would be and is therefore disallowed. Change the
+    /// Changing an unsafe layer node in code mode changes what the parents or
+    /// the surrounding nodes would be and is therefore disallowed. Change the
     /// parents or children instead. If it appears in Markup, however, it is
     /// safe to change.
     UnsafeLayer,
-    /// Changing an unsafe node or any of its children will trigger undefined
-    /// behavior. Change the parents instead.
+    /// Changing an unsafe node or any of its children is not allowed. Change
+    /// the parents instead.
     Unsafe,
 }
 
@@ -37,11 +37,12 @@ pub enum Postcondition {
 /// existence is plausible with them present. This can be used to encode some
 /// context-free language components for incremental parsing.
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum Precondition {
+pub enum NeighbourRule {
     /// These nodes depend on being at the start of a line. Reparsing of safe
-    /// left neighbors has to check this invariant. Otherwise, this node is
-    /// safe. Additionally, the indentation of the first right non-trivia,
-    /// non-whitespace sibling must not be greater than the current indentation.
+    /// left neighbors has to check this invariant. Additionally, when
+    /// exchanging the right sibling or inserting such a node the indentation of
+    /// the first right non-trivia, non-whitespace sibling must not be greater
+    /// than the current indentation.
     AtStart,
     /// These nodes depend on not being at the start of a line. Reparsing of
     /// safe left neighbors has to check this invariant. Otherwise, this node is
@@ -77,8 +78,12 @@ impl<'a> Reparser<'a> {
 
 impl Reparser<'_> {
     /// Find the innermost child that is incremental safe.
-    pub fn reparse(&self, green: &mut GreenNode) -> Option<Range<usize>> {
-        self.reparse_step(green, 0, TokenMode::Markup, true)
+    pub fn reparse(&self, green: &mut Rc<GreenNode>) -> Range<usize> {
+        self.reparse_step(Rc::make_mut(green), 0, TokenMode::Markup, true)
+            .unwrap_or_else(|| {
+                *green = parse(self.src);
+                0 .. self.src.len()
+            })
     }
 
     fn reparse_step(
@@ -90,7 +95,7 @@ impl Reparser<'_> {
     ) -> Option<Range<usize>> {
         let mode = green.kind().mode().unwrap_or(parent_mode);
         let child_mode = green.kind().mode().unwrap_or(TokenMode::Code);
-        let child_count = green.children().len();
+        let original_count = green.children().len();
 
         // Save the current indent if this is a markup node.
         let indent = match green.kind() {
@@ -134,12 +139,14 @@ impl Reparser<'_> {
             // neighbor!
             if child_span.contains(&self.replace_range.end)
                 || self.replace_range.end == child_span.end
-                    && (mode != TokenMode::Markup || i + 1 == child_count)
+                    && (mode != TokenMode::Markup || i + 1 == original_count)
             {
-                outermost &= i + 1 == child_count;
+                outermost &= i + 1 == original_count;
                 last = Some((i, offset + child.len()));
                 break;
-            } else if mode != TokenMode::Markup || !child.kind().post().safe_in_markup() {
+            } else if mode != TokenMode::Markup
+                || !child.kind().succession_rule().safe_in_markup()
+            {
                 break;
             }
 
@@ -147,17 +154,17 @@ impl Reparser<'_> {
         }
 
         let (last_idx, last_end) = last?;
-        let children_range = first_idx .. last_idx + 1;
-        let children_span = first_start .. last_end;
+        let superseded_range = first_idx .. last_idx + 1;
+        let superseded_span = first_start .. last_end;
         let last_kind = green.children()[last_idx].kind().clone();
 
         // First, we try if the child itself has another, more specific
         // applicable child.
-        if children_range.len() == 1 {
-            let child = &mut green.children_mut()[children_range.start];
+        if superseded_range.len() == 1 {
+            let child = &mut green.children_mut()[superseded_range.start];
             let prev_len = child.len();
 
-            if last_kind.post() != Postcondition::Unsafe {
+            if last_kind.succession_rule() != SuccessionRule::Unsafe {
                 if let Some(range) = match child {
                     Green::Node(node) => self.reparse_step(
                         Rc::make_mut(node),
@@ -168,56 +175,64 @@ impl Reparser<'_> {
                     Green::Token(_) => None,
                 } {
                     let new_len = child.len();
-                    green.update_child_len(new_len, prev_len);
+                    green.update_parent(new_len, prev_len);
                     return Some(range);
                 }
             }
         }
 
         // We only replace multiple children in markup mode.
-        if children_range.len() > 1 && mode == TokenMode::Code {
+        if superseded_range.len() > 1 && mode == TokenMode::Code {
             return None;
         }
 
         // We now have a child that we can replace and a function to do so.
         let func = last_kind.reparsing_func(child_mode, indent)?;
-        let post = last_kind.post();
+        let succession = last_kind.succession_rule();
 
-        let mut column = if mode == TokenMode::Markup {
-            // In this case, we want to pass the indentation to the function.
-            Scanner::new(self.src).column(children_span.start)
-        } else {
-            0
-        };
+        let mut markup_min_column = 0;
 
         // If this is a markup node, we want to save its indent instead to pass
         // the right indent argument.
-        if children_range.len() == 1 {
-            let child = &mut green.children_mut()[children_range.start];
+        if superseded_range.len() == 1 {
+            let child = &mut green.children_mut()[superseded_range.start];
             if let NodeKind::Markup(n) = child.kind() {
-                column = *n;
+                markup_min_column = *n;
             }
         }
 
         // The span of the to-be-reparsed children in the new source.
-        let replace_span = children_span.start
+        let newborn_span = superseded_span.start
             ..
-            children_span.end + self.replace_len - self.replace_range.len();
+            superseded_span.end + self.replace_len - self.replace_range.len();
 
         // For atomic primaries we need to pass in the whole remaining string to
         // check whether the parser would eat more stuff illicitly.
-        let reparse_span = if post == Postcondition::AtomicPrimary {
-            replace_span.start .. self.src.len()
+        let reparse_span = if succession == SuccessionRule::AtomicPrimary {
+            newborn_span.start .. self.src.len()
         } else {
-            replace_span.clone()
+            newborn_span.clone()
         };
 
+        let mut prefix = "";
+        for (i, c) in self.src[.. reparse_span.start].char_indices().rev() {
+            if is_newline(c) {
+                break;
+            }
+            prefix = &self.src[i .. reparse_span.start];
+        }
+
         // Do the reparsing!
-        let (mut newborns, terminated) = func(&self.src[reparse_span], at_start, column)?;
+        let (mut newborns, terminated) = func(
+            &prefix,
+            &self.src[reparse_span.clone()],
+            at_start,
+            markup_min_column,
+        )?;
 
         // Make sure that atomic primaries ate only what they were supposed to.
-        if post == Postcondition::AtomicPrimary {
-            let len = replace_span.len();
+        if succession == SuccessionRule::AtomicPrimary {
+            let len = newborn_span.len();
             if newborns.len() > 1 && newborns[0].len() == len {
                 newborns.truncate(1);
             } else if newborns.iter().map(Green::len).sum::<usize>() != len {
@@ -234,16 +249,16 @@ impl Reparser<'_> {
         // If all post- and preconditions match, we are good to go!
         if validate(
             green.children(),
-            children_range.clone(),
+            superseded_range.clone(),
             at_start,
             &newborns,
             mode,
-            post,
-            replace_span.clone(),
+            succession,
+            newborn_span.clone(),
             self.src,
         ) {
-            green.replace_child_range(children_range, newborns);
-            Some(replace_span)
+            green.replace_children(superseded_range, newborns);
+            Some(newborn_span)
         } else {
             None
         }
@@ -252,27 +267,27 @@ impl Reparser<'_> {
 
 /// Validate that a node replacement is allowed by post- and preconditions.
 fn validate(
-    prev_children: &[Green],
-    children_range: Range<usize>,
+    superseded: &[Green],
+    superseded_range: Range<usize>,
     mut at_start: bool,
     newborns: &[Green],
     mode: TokenMode,
-    post: Postcondition,
-    replace_span: Range<usize>,
+    post: SuccessionRule,
+    newborn_span: Range<usize>,
     src: &str,
 ) -> bool {
     // Atomic primaries must only generate one new child.
-    if post == Postcondition::AtomicPrimary && newborns.len() != 1 {
+    if post == SuccessionRule::AtomicPrimary && newborns.len() != 1 {
         return false;
     }
 
     // Same kind in mode `inside` must generate only one child and that child
     // must be of the same kind as previously.
-    if let Postcondition::SameKind(inside) = post {
-        let prev_kind = prev_children[children_range.start].kind();
-        let prev_mode = prev_kind.mode().unwrap_or(mode);
-        if inside.map_or(true, |m| m == prev_mode)
-            && (newborns.len() != 1 || prev_kind != newborns[0].kind())
+    if let SuccessionRule::SameKind(inside) = post {
+        let superseded_kind = superseded[superseded_range.start].kind();
+        let superseded_mode = superseded_kind.mode().unwrap_or(mode);
+        if inside.map_or(true, |m| m == superseded_mode)
+            && (newborns.len() != 1 || superseded_kind != newborns[0].kind())
         {
             return false;
         }
@@ -286,15 +301,15 @@ fn validate(
     // Check if there are any `AtStart` predecessors which require a certain
     // indentation.
     let s = Scanner::new(src);
-    let mut prev_pos = replace_span.start;
-    for child in (&prev_children[.. children_range.start]).iter().rev() {
+    let mut prev_pos = newborn_span.start;
+    for child in (&superseded[.. superseded_range.start]).iter().rev() {
         prev_pos -= child.len();
         if !child.kind().is_trivia() {
-            if child.kind().pre() == Precondition::AtStart {
+            if child.kind().neighbour_rule() == NeighbourRule::AtStart {
                 let left_col = s.column(prev_pos);
 
                 // Search for the first non-trivia newborn.
-                let mut new_pos = replace_span.start;
+                let mut new_pos = newborn_span.start;
                 let mut child_col = None;
                 for child in newborns {
                     if !child.kind().is_trivia() {
@@ -323,15 +338,15 @@ fn validate(
 
     // Ensure that a possible at-start or not-at-start precondition of
     // a node after the replacement range is satisfied.
-    for child in &prev_children[children_range.end ..] {
-        if !child.kind().is_trivia() {
-            let pre = child.kind().pre();
-            if (pre == Precondition::AtStart && !at_start)
-                || (pre == Precondition::NotAtStart && at_start)
-            {
-                return false;
-            }
+    for child in &superseded[superseded_range.end ..] {
+        let neighbour_rule = child.kind().neighbour_rule();
+        if (neighbour_rule == NeighbourRule::AtStart && !at_start)
+            || (neighbour_rule == NeighbourRule::NotAtStart && at_start)
+        {
+            return false;
+        }
 
+        if !child.kind().is_trivia() {
             break;
         }
 
@@ -339,42 +354,40 @@ fn validate(
     }
 
     // Verify that the last of the newborns is not `NotAtEnd`.
-    if newborns
-        .last()
-        .map_or(false, |child| child.kind().pre() == Precondition::NotAtEnd)
-    {
+    if newborns.last().map_or(false, |child| {
+        child.kind().neighbour_rule() == NeighbourRule::NotAtEnd
+    }) {
         return false;
     }
 
     // We have to check whether the last non-trivia newborn is `AtStart` and
     // verify the indent of its right neighbors in order to make sure its
     // indentation requirements are fulfilled.
-    let mut child_pos = replace_span.end;
-    let mut child_col = None;
+    let mut child_pos = newborn_span.end;
     for child in newborns.iter().rev() {
         child_pos -= child.len();
 
-        if !child.kind().is_trivia() {
-            if child.kind().pre() == Precondition::AtStart {
-                child_col = Some(s.column(child_pos));
-            }
-            break;
+        if child.kind().is_trivia() {
+            continue;
         }
-    }
 
-    if let Some(child_col) = child_col {
-        let mut right_pos = replace_span.end;
-        for child in &prev_children[children_range.end ..] {
-            if !child.kind().is_trivia() {
+        if child.kind().neighbour_rule() == NeighbourRule::AtStart {
+            let child_col = s.column(child_pos);
+
+            let mut right_pos = newborn_span.end;
+            for child in &superseded[superseded_range.end ..] {
+                if child.kind().is_trivia() {
+                    right_pos += child.len();
+                    continue;
+                }
+
                 if s.column(right_pos) > child_col {
                     return false;
                 }
-
                 break;
             }
-
-            right_pos += child.len();
         }
+        break;
     }
 
     true
@@ -387,13 +400,15 @@ impl NodeKind {
         &self,
         parent_mode: TokenMode,
         indent: usize,
-    ) -> Option<fn(&str, bool, usize) -> Option<(Vec<Green>, bool)>> {
+    ) -> Option<fn(&str, &str, bool, usize) -> Option<(Vec<Green>, bool)>> {
         let mode = self.mode().unwrap_or(parent_mode);
-        match self.post() {
-            Postcondition::Unsafe | Postcondition::UnsafeLayer => None,
-            Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic),
-            Postcondition::AtomicPrimary => Some(parse_atomic_markup),
-            Postcondition::SameKind(x) if x == None || x == Some(mode) => match self {
+        match self.succession_rule() {
+            SuccessionRule::Unsafe | SuccessionRule::UnsafeLayer => None,
+            SuccessionRule::AtomicPrimary if mode == TokenMode::Code => {
+                Some(parse_atomic)
+            }
+            SuccessionRule::AtomicPrimary => Some(parse_atomic_markup),
+            SuccessionRule::SameKind(x) if x == None || x == Some(mode) => match self {
                 NodeKind::Markup(_) => Some(parse_markup),
                 NodeKind::Template => Some(parse_template),
                 NodeKind::Block => Some(parse_block),
@@ -409,7 +424,7 @@ impl NodeKind {
 
     /// Whether it is safe to do incremental parsing on this node. Never allow
     /// non-termination errors if this is not already the last leaf node.
-    pub fn post(&self) -> Postcondition {
+    pub fn succession_rule(&self) -> SuccessionRule {
         match self {
             // Replacing parenthesis changes if the expression is balanced and
             // is therefore not safe.
@@ -418,7 +433,7 @@ impl NodeKind {
             | Self::LeftBrace
             | Self::RightBrace
             | Self::LeftParen
-            | Self::RightParen => Postcondition::Unsafe,
+            | Self::RightParen => SuccessionRule::Unsafe,
 
             // Replacing an operator can change whether the parent is an
             // operation which makes it unsafe. The star can appear in markup.
@@ -445,7 +460,7 @@ impl NodeKind {
             | Self::Or
             | Self::With
             | Self::Dots
-            | Self::Arrow => Postcondition::Unsafe,
+            | Self::Arrow => SuccessionRule::Unsafe,
 
             // These keywords change what kind of expression the parent is and
             // how far the expression would go.
@@ -461,14 +476,14 @@ impl NodeKind {
             | Self::Return
             | Self::Import
             | Self::Include
-            | Self::From => Postcondition::Unsafe,
+            | Self::From => SuccessionRule::Unsafe,
 
             // Changing the heading level, enum numbering, or list bullet
             // changes the next layer.
-            Self::EnumNumbering(_) => Postcondition::Unsafe,
+            Self::EnumNumbering(_) => SuccessionRule::Unsafe,
 
             // This can be anything, so we don't make any promises.
-            Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe,
+            Self::Error(_, _) | Self::Unknown(_) => SuccessionRule::Unsafe,
 
             // These are complex expressions which may screw with their
             // environments.
@@ -477,33 +492,33 @@ impl NodeKind {
             | Self::Binary
             | Self::CallArgs
             | Self::Named
-            | Self::Spread => Postcondition::UnsafeLayer,
+            | Self::Spread => SuccessionRule::UnsafeLayer,
 
             // The closure is a bit magic with the let expression, and also it
             // is not atomic.
-            Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer,
+            Self::Closure | Self::ClosureParams => SuccessionRule::UnsafeLayer,
 
             // Missing these creates errors for the parents.
             Self::WithExpr | Self::ForPattern | Self::ImportItems => {
-                Postcondition::UnsafeLayer
+                SuccessionRule::UnsafeLayer
             }
 
             // Only markup is expected at the points where it does occur. The
             // indentation must be preserved as well, also for the children.
-            Self::Markup(_) => Postcondition::SameKind(None),
+            Self::Markup(_) => SuccessionRule::SameKind(None),
 
             // These can appear everywhere and must not change to other stuff
             // because that could change the outer expression.
-            Self::LineComment | Self::BlockComment => Postcondition::SameKind(None),
+            Self::LineComment | Self::BlockComment => SuccessionRule::SameKind(None),
 
             // These can appear as bodies and would trigger an error if they
             // became something else.
-            Self::Template => Postcondition::SameKind(None),
-            Self::Block => Postcondition::SameKind(Some(TokenMode::Code)),
+            Self::Template => SuccessionRule::SameKind(None),
+            Self::Block => SuccessionRule::SameKind(Some(TokenMode::Code)),
 
             // Whitespace in code mode has to remain whitespace or else the type
             // of things would change.
-            Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)),
+            Self::Space(_) => SuccessionRule::SameKind(Some(TokenMode::Code)),
 
             // These are expressions that can be replaced by other expressions.
             Self::Ident(_)
@@ -519,7 +534,7 @@ impl NodeKind {
             | Self::Dict
             | Self::Group
             | Self::None
-            | Self::Auto => Postcondition::AtomicPrimary,
+            | Self::Auto => SuccessionRule::AtomicPrimary,
 
             // More complex, but still an expression.
             Self::ForExpr
@@ -528,11 +543,11 @@ impl NodeKind {
             | Self::LetExpr
             | Self::SetExpr
             | Self::ImportExpr
-            | Self::IncludeExpr => Postcondition::AtomicPrimary,
+            | Self::IncludeExpr => SuccessionRule::AtomicPrimary,
 
             // This element always has to remain in the same column so better
             // reparse the whole parent.
-            Self::Raw(_) => Postcondition::Unsafe,
+            Self::Raw(_) => SuccessionRule::Unsafe,
 
             // These are all replaceable by other tokens.
             Self::Parbreak
@@ -548,22 +563,22 @@ impl NodeKind {
             | Self::Heading
             | Self::Enum
             | Self::List
-            | Self::Math(_) => Postcondition::Safe,
+            | Self::Math(_) => SuccessionRule::Safe,
         }
     }
 
     /// The appropriate precondition for the type.
-    pub fn pre(&self) -> Precondition {
+    pub fn neighbour_rule(&self) -> NeighbourRule {
         match self {
-            Self::Heading | Self::Enum | Self::List => Precondition::AtStart,
-            Self::TextInLine(_) => Precondition::NotAtStart,
-            Self::Error(_, _) => Precondition::NotAtEnd,
-            _ => Precondition::None,
+            Self::Heading | Self::Enum | Self::List => NeighbourRule::AtStart,
+            Self::TextInLine(_) => NeighbourRule::NotAtStart,
+            Self::Error(_, _) => NeighbourRule::NotAtEnd,
+            _ => NeighbourRule::None,
         }
     }
 }
 
-impl Postcondition {
+impl SuccessionRule {
     /// Whether a node with this condition can be reparsed in markup mode.
     pub fn safe_in_markup(&self) -> bool {
         match self {
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index f4826730..a9752645 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -29,72 +29,102 @@ pub fn parse(src: &str) -> Rc<GreenNode> {
 }
 
 /// Parse an atomic primary. Returns `Some` if all of the input was consumed.
-pub fn parse_atomic(src: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
-    let mut p = Parser::new(src, TokenMode::Code);
+pub fn parse_atomic(
+    prefix: &str,
+    src: &str,
+    _: bool,
+    _: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
     primary(&mut p, true).ok()?;
-    p.eject_partial()
+    p.consume_unterminated()
 }
 
 /// Parse an atomic primary. Returns `Some` if all of the input was consumed.
-pub fn parse_atomic_markup(src: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
-    let mut p = Parser::new(src, TokenMode::Markup);
+pub fn parse_atomic_markup(
+    prefix: &str,
+    src: &str,
+    _: bool,
+    _: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup);
     markup_expr(&mut p);
-    p.eject_partial()
+    p.consume_unterminated()
 }
 
 /// Parse some markup. Returns `Some` if all of the input was consumed.
-pub fn parse_markup(src: &str, _: bool, column: usize) -> Option<(Vec<Green>, bool)> {
-    let mut p = Parser::new(src, TokenMode::Markup);
-    if column == 0 {
+pub fn parse_markup(
+    prefix: &str,
+    src: &str,
+    _: bool,
+    min_column: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup);
+    if min_column == 0 {
         markup(&mut p);
     } else {
-        markup_indented(&mut p, column);
+        markup_indented(&mut p, min_column);
     }
-    p.eject()
+    p.consume()
 }
 
 /// Parse some markup without the topmost node. Returns `Some` if all of the
 /// input was consumed.
 pub fn parse_markup_elements(
+    prefix: &str,
     src: &str,
     mut at_start: bool,
-    column: usize,
+    _: usize,
 ) -> Option<(Vec<Green>, bool)> {
-    let mut p = Parser::new(src, TokenMode::Markup);
-    p.offset(column);
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup);
     while !p.eof() {
         markup_node(&mut p, &mut at_start);
     }
-    p.eject()
+    p.consume()
 }
 
 /// Parse a template literal. Returns `Some` if all of the input was consumed.
-pub fn parse_template(source: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
-    let mut p = Parser::new(source, TokenMode::Code);
+pub fn parse_template(
+    prefix: &str,
+    src: &str,
+    _: bool,
+    _: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
     if !p.at(&NodeKind::LeftBracket) {
         return None;
     }
 
     template(&mut p);
-    p.eject()
+    p.consume()
 }
 
 /// Parse a code block. Returns `Some` if all of the input was consumed.
-pub fn parse_block(source: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
-    let mut p = Parser::new(source, TokenMode::Code);
+pub fn parse_block(
+    prefix: &str,
+    src: &str,
+    _: bool,
+    _: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
     if !p.at(&NodeKind::LeftBrace) {
         return None;
     }
 
     block(&mut p);
-    p.eject()
+    p.consume()
 }
 
 /// Parse a comment. Returns `Some` if all of the input was consumed.
-pub fn parse_comment(source: &str, _: bool, _: usize) -> Option<(Vec<Green>, bool)> {
-    let mut p = Parser::new(source, TokenMode::Code);
+pub fn parse_comment(
+    prefix: &str,
+    src: &str,
+    _: bool,
+    _: usize,
+) -> Option<(Vec<Green>, bool)> {
+    let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
     comment(&mut p).ok()?;
-    p.eject()
+    p.consume()
 }
 
 /// Parse markup.
@@ -111,7 +141,7 @@ fn markup_indented(p: &mut Parser, column: usize) {
     });
 
     markup_while(p, false, column, &mut |p| match p.peek() {
-        Some(NodeKind::Space(n)) if *n >= 1 => p.clean_column(p.current_end()) >= column,
+        Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column,
         _ => true,
     })
 }
@@ -170,14 +200,9 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
             p.eat();
         }
 
-        NodeKind::Eq if *at_start => heading(p),
-        NodeKind::Minus if *at_start => list_node(p),
-        NodeKind::EnumNumbering(_) if *at_start => enum_node(p),
-
-        // Line-based markup that is not currently at the start of the line.
-        NodeKind::Eq | NodeKind::Minus | NodeKind::EnumNumbering(_) => {
-            p.convert(NodeKind::TextInLine(p.peek_src().into()))
-        }
+        NodeKind::Eq => heading(p, *at_start),
+        NodeKind::Minus => list_node(p, *at_start),
+        NodeKind::EnumNumbering(_) => enum_node(p, *at_start),
 
         // Hashtag + keyword / identifier.
         NodeKind::Ident(_)
@@ -201,42 +226,49 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
 }
 
 /// Parse a heading.
-fn heading(p: &mut Parser) {
-    p.perform(NodeKind::Heading, |p| {
-        p.eat_assert(&NodeKind::Eq);
-        while p.eat_if(&NodeKind::Eq) {}
+fn heading(p: &mut Parser, at_start: bool) {
+    let marker = p.marker();
+    let current_start = p.current_start();
+    p.eat_assert(&NodeKind::Eq);
+    while p.eat_if(&NodeKind::Eq) {}
+
+    if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) {
         let column = p.column(p.prev_end());
         markup_indented(p, column);
-    });
+        marker.end(p, NodeKind::Heading);
+    } else {
+        let text = p.get(current_start .. p.prev_end()).into();
+        marker.convert(p, NodeKind::TextInLine(text));
+    }
 }
 
 /// Parse a single list item.
-fn list_node(p: &mut Parser) {
+fn list_node(p: &mut Parser, at_start: bool) {
     let marker = p.marker();
-    let src: EcoString = p.peek_src().into();
+    let text: EcoString = p.peek_src().into();
     p.eat_assert(&NodeKind::Minus);
 
-    if p.peek().map_or(true, |kind| kind.is_whitespace()) {
+    if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) {
         let column = p.column(p.prev_end());
         markup_indented(p, column);
         marker.end(p, NodeKind::List);
     } else {
-        marker.convert(p, NodeKind::TextInLine(src));
+        marker.convert(p, NodeKind::TextInLine(text));
     }
 }
 
 /// Parse a single enum item.
-fn enum_node(p: &mut Parser) {
+fn enum_node(p: &mut Parser, at_start: bool) {
     let marker = p.marker();
-    let src: EcoString = p.peek_src().into();
+    let text: EcoString = p.peek_src().into();
     p.eat();
 
-    if p.peek().map_or(true, |kind| kind.is_whitespace()) {
+    if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) {
         let column = p.column(p.prev_end());
         markup_indented(p, column);
         marker.end(p, NodeKind::Enum);
     } else {
-        marker.convert(p, NodeKind::TextInLine(src));
+        marker.convert(p, NodeKind::TextInLine(text));
     }
 }
 
@@ -582,23 +614,18 @@ fn template(p: &mut Parser) {
 fn block(p: &mut Parser) {
     p.perform(NodeKind::Block, |p| {
         p.start_group(Group::Brace);
-        expr_list(p);
-        p.end_group();
-    });
-}
+        while !p.eof() {
+            p.start_group(Group::Stmt);
+            if expr(p).is_ok() && !p.eof() {
+                p.expected_at("semicolon or line break");
+            }
+            p.end_group();
 
-/// Parse a number of code expressions.
-fn expr_list(p: &mut Parser) {
-    while !p.eof() {
-        p.start_group(Group::Stmt);
-        if expr(p).is_ok() && !p.eof() {
-            p.expected_at("semicolon or line break");
+            // Forcefully skip over newlines since the group's contents can't.
+            p.eat_while(|t| matches!(t, NodeKind::Space(_)));
         }
         p.end_group();
-
-        // Forcefully skip over newlines since the group's contents can't.
-        p.eat_while(|t| matches!(t, NodeKind::Space(_)));
-    }
+    });
 }
 
 /// Parse a function call.
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index f36155d5..4e5b277d 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -1,7 +1,8 @@
+use core::slice::SliceIndex;
 use std::fmt::{self, Display, Formatter};
 use std::mem;
 
-use super::{TokenMode, Tokens};
+use super::{Scanner, TokenMode, Tokens};
 use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind};
 use crate::util::EcoString;
 
@@ -24,8 +25,7 @@ pub struct Parser<'s> {
     /// Is `Some` if there is an unterminated group at the last position where
     /// groups were terminated.
     last_unterminated: Option<usize>,
-    /// Offset the indentation. This can be used if the parser is processing a
-    /// subslice of the source and there was leading indent.
+    /// Offsets the indentation on the first line of the source.
     column_offset: usize,
 }
 
@@ -47,18 +47,31 @@ impl<'s> Parser<'s> {
         }
     }
 
+    /// Create a new parser for the source string that is prefixed by some text
+    /// that does not need to be parsed but taken into account for column
+    /// calculation.
+    pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self {
+        let mut p = Self::new(src, mode);
+        p.column_offset = Scanner::new(prefix).column(prefix.len());
+        p
+    }
+
     /// End the parsing process and return the last child.
     pub fn finish(self) -> Vec<Green> {
         self.children
     }
 
-    /// End the parsing process and return multiple children.
-    pub fn eject(self) -> Option<(Vec<Green>, bool)> {
-        if self.eof() && self.group_success() {
-            Some((self.children, self.tokens.was_terminated()))
-        } else {
-            None
-        }
+    /// End the parsing process and return multiple children and whether the
+    /// last token was terminated.
+    pub fn consume(self) -> Option<(Vec<Green>, bool)> {
+        (self.eof() && self.terminated())
+            .then(|| (self.children, self.tokens.terminated()))
+    }
+
+    /// End the parsing process and return multiple children and whether the
+    /// last token was terminated, even if there remains stuff in the string.
+    pub fn consume_unterminated(self) -> Option<(Vec<Green>, bool)> {
+        self.terminated().then(|| (self.children, self.tokens.terminated()))
     }
 
     /// Create a new marker.
@@ -100,18 +113,6 @@ impl<'s> Parser<'s> {
         output
     }
 
-    /// End the parsing process and return multiple children, even if there
-    /// remains stuff in the string.
-    pub fn eject_partial(self) -> Option<(Vec<Green>, bool)> {
-        self.group_success()
-            .then(|| (self.children, self.tokens.was_terminated()))
-    }
-
-    /// Set an indentation offset.
-    pub fn offset(&mut self, columns: usize) {
-        self.column_offset = columns;
-    }
-
     /// Whether the end of the source string or group is reached.
     pub fn eof(&self) -> bool {
         self.eof
@@ -199,6 +200,14 @@ impl<'s> Parser<'s> {
         self.tokens.scanner().get(self.current_start() .. self.current_end())
     }
 
+    /// Obtain a range of the source code.
+    pub fn get<I>(&self, index: I) -> &'s str
+    where
+        I: SliceIndex<str, Output = str>,
+    {
+        self.tokens.scanner().get(index)
+    }
+
     /// The byte index at which the last non-trivia token ended.
     pub fn prev_end(&self) -> usize {
         self.prev_end
@@ -216,13 +225,7 @@ impl<'s> Parser<'s> {
 
     /// Determine the column index for the given byte index.
     pub fn column(&self, index: usize) -> usize {
-        self.tokens.scanner().column(index) + self.column_offset
-    }
-
-    /// Determine the column index for the given byte index while ignoring the
-    /// offset.
-    pub fn clean_column(&self, index: usize) -> usize {
-        self.tokens.scanner().column(index)
+        self.tokens.scanner().column_offset(index, self.column_offset)
     }
 
     /// Continue parsing in a group.
@@ -260,10 +263,8 @@ impl<'s> Parser<'s> {
         let group = self.groups.pop().expect("no started group");
         self.tokens.set_mode(group.prev_mode);
         self.repeek();
-        if let Some(n) = self.last_unterminated {
-            if n != self.prev_end() {
-                self.last_unterminated = None;
-            }
+        if self.last_unterminated != Some(self.prev_end()) {
+            self.last_unterminated = None;
         }
 
         let mut rescan = self.tokens.mode() != group_mode;
@@ -301,23 +302,15 @@ impl<'s> Parser<'s> {
         }
     }
 
-    /// Check if the group processing was successfully terminated.
-    pub fn group_success(&self) -> bool {
-        self.last_unterminated.is_none() && self.groups.is_empty()
+    /// Checks if all groups were correctly terminated.
+    pub fn terminated(&self) -> bool {
+        self.groups.is_empty() && self.last_unterminated.is_none()
     }
 
     /// Low-level bump that consumes exactly one token without special trivia
     /// handling.
     fn bump(&mut self) {
         let kind = self.current.take().unwrap();
-        if match kind {
-            NodeKind::Space(n) if n > 0 => true,
-            NodeKind::Parbreak => true,
-            _ => false,
-        } {
-            self.column_offset = 0;
-        }
-
         let len = self.tokens.index() - self.current_start;
         self.children.push(GreenData::new(kind, len).into());
         self.current_start = self.tokens.index();
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
index c735be40..6db89132 100644
--- a/src/parse/scanner.rs
+++ b/src/parse/scanner.rs
@@ -162,11 +162,26 @@ impl<'s> Scanner<'s> {
     /// The column index of a given index in the source string.
     #[inline]
     pub fn column(&self, index: usize) -> usize {
-        self.src[.. index]
-            .chars()
+        self.column_offset(index, 0)
+    }
+
+    /// The column index of a given index in the source string when an offset is
+    /// applied to the first line of the string.
+    #[inline]
+    pub fn column_offset(&self, index: usize, offset: usize) -> usize {
+        let mut apply_offset = false;
+        let res = self.src[.. index]
+            .char_indices()
             .rev()
-            .take_while(|&c| !is_newline(c))
-            .count()
+            .take_while(|&(_, c)| !is_newline(c))
+            .inspect(|&(i, _)| {
+                if i == 0 {
+                    apply_offset = true
+                }
+            })
+            .count();
+
+        if apply_offset { res + offset } else { res }
     }
 }
 
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 7dfca2bf..69c4d2de 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -13,7 +13,7 @@ use crate::util::EcoString;
 pub struct Tokens<'s> {
     s: Scanner<'s>,
     mode: TokenMode,
-    was_terminated: bool,
+    terminated: bool,
 }
 
 /// What kind of tokens to emit.
@@ -32,7 +32,7 @@ impl<'s> Tokens<'s> {
         Self {
             s: Scanner::new(src),
             mode,
-            was_terminated: true,
+            terminated: true,
         }
     }
 
@@ -71,8 +71,8 @@ impl<'s> Tokens<'s> {
 
     /// Whether the last token was terminated.
     #[inline]
-    pub fn was_terminated(&self) -> bool {
-        self.was_terminated
+    pub fn terminated(&self) -> bool {
+        self.terminated
     }
 }
 
@@ -128,9 +128,7 @@ impl<'s> Tokens<'s> {
             '`' => self.raw(),
             '$' => self.math(),
             '-' => self.hyph(),
-            '=' if self.s.check_or(true, |c| c == '=' || c.is_whitespace()) => {
-                NodeKind::Eq
-            }
+            '=' => NodeKind::Eq,
             c if c == '.' || c.is_ascii_digit() => self.numbering(start, c),
 
             // Plain text.
@@ -259,7 +257,7 @@ impl<'s> Tokens<'s> {
                             )
                         }
                     } else {
-                        self.was_terminated = false;
+                        self.terminated = false;
                         NodeKind::Error(
                             ErrorPos::End,
                             "expected closing brace".into(),
@@ -352,7 +350,7 @@ impl<'s> Tokens<'s> {
             let remaining = backticks - found;
             let noun = if remaining == 1 { "backtick" } else { "backticks" };
 
-            self.was_terminated = false;
+            self.terminated = false;
             NodeKind::Error(
                 ErrorPos::End,
                 if found == 0 {
@@ -400,7 +398,7 @@ impl<'s> Tokens<'s> {
                 display,
             }))
         } else {
-            self.was_terminated = false;
+            self.terminated = false;
             NodeKind::Error(
                 ErrorPos::End,
                 if !display || (!escaped && dollar) {
@@ -489,7 +487,7 @@ impl<'s> Tokens<'s> {
         if self.s.eat_if('"') {
             NodeKind::Str(string)
         } else {
-            self.was_terminated = false;
+            self.terminated = false;
             NodeKind::Error(ErrorPos::End, "expected quote".into())
         }
     }
@@ -497,7 +495,7 @@ impl<'s> Tokens<'s> {
     fn line_comment(&mut self) -> NodeKind {
         self.s.eat_until(is_newline);
         if self.s.peek().is_none() {
-            self.was_terminated = false;
+            self.terminated = false;
         }
         NodeKind::LineComment
     }
@@ -505,7 +503,7 @@ impl<'s> Tokens<'s> {
     fn block_comment(&mut self) -> NodeKind {
         let mut state = '_';
         let mut depth = 1;
-        self.was_terminated = false;
+        self.terminated = false;
 
         // Find the first `*/` that does not correspond to a nested `/*`.
         while let Some(c) = self.s.eat() {
@@ -513,7 +511,7 @@ impl<'s> Tokens<'s> {
                 ('*', '/') => {
                     depth -= 1;
                     if depth == 0 {
-                        self.was_terminated = true;
+                        self.terminated = true;
                         break;
                     }
                     '_'
@@ -742,7 +740,7 @@ mod tests {
         // Test code symbols in text.
         t!(Markup[" /"]: "a():\"b" => Text("a():\"b"));
         t!(Markup[" /"]: ";:,|/+"  => Text(";:,|"), Text("/+"));
-        t!(Markup[" /"]: "=-a"     => Text("="), Minus, Text("a"));
+        t!(Markup[" /"]: "=-a"     => Eq, Minus, Text("a"));
         t!(Markup[" "]: "#123"     => Text("#"), Text("123"));
 
         // Test text ends.
author	Martin Haug <mhaug@live.de>	2022-01-03 23:18:21 +0100
committer	Martin Haug <mhaug@live.de>	2022-01-04 00:21:33 +0100
commit	c994cfa7d814e3909682b19322867ed5c676c453 (patch)
tree	03349230f74786c7128876889c07a31a4932f108 /src/parse
parent	98c96ba1cb8a46e327de313118e4ce1a84795ae9 (diff)