diff options
| author | Martin Haug <mhaug@live.de> | 2022-06-01 16:57:38 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-06-01 16:57:38 +0200 |
| commit | a937462491a63f5cff3551b5bb8bc45fb350f0b6 (patch) | |
| tree | 7916fca31328fef3e21d3bd62eca132369da81b0 /src/syntax/mod.rs | |
| parent | 665ed12825918bd02a6d6dbcb67860a83dd41600 (diff) | |
| parent | af10b08cc1bd5ef78c5c048a0cbc83b123f1ffd4 (diff) | |
Merge pull request #73 from typst/span-numbers
Diffstat (limited to 'src/syntax/mod.rs')
| -rw-r--r-- | src/syntax/mod.rs | 725 |
1 files changed, 379 insertions, 346 deletions
diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 69bcb0a0..4a163d78 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -13,25 +13,25 @@ pub use highlight::*; pub use span::*; use self::ast::{MathNode, RawNode, TypedNode, Unit}; -use crate::diag::Error; +use crate::diag::{Error, ErrorPos}; use crate::source::SourceId; use crate::util::EcoString; -/// An inner or leaf node in the untyped green tree. +/// An inner or leaf node in the untyped syntax tree. #[derive(Clone, PartialEq, Hash)] -pub enum Green { +pub enum SyntaxNode { /// A reference-counted inner node. - Node(Arc<GreenNode>), - /// A terminal, owned token. - Token(GreenData), + Inner(Arc<InnerNode>), + /// A leaf token. + Leaf(NodeData), } -impl Green { +impl SyntaxNode { /// Returns the metadata of the node. - fn data(&self) -> &GreenData { + pub fn data(&self) -> &NodeData { match self { - Green::Node(n) => &n.data, - Green::Token(t) => t, + Self::Inner(inner) => &inner.data, + Self::Leaf(leaf) => leaf, } } @@ -45,106 +45,191 @@ impl Green { self.data().len() } - /// Whether the node or its children contain an error. - pub fn erroneous(&self) -> bool { + /// The number of descendants, including the node itself. + pub fn descendants(&self) -> usize { match self { - Self::Node(node) => node.erroneous, - Self::Token(data) => data.kind.is_error(), + Self::Inner(inner) => inner.descendants(), + Self::Leaf(_) => 1, } } + /// The span of the node. + pub fn span(&self) -> Span { + self.data().span() + } + /// The node's children. - pub fn children(&self) -> &[Green] { + pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> { match self { - Green::Node(n) => n.children(), - Green::Token(_) => &[], + Self::Inner(inner) => inner.children(), + Self::Leaf(_) => [].iter(), } } - /// Whether the node is a leaf node in the green tree. - pub fn is_leaf(&self) -> bool { + /// Whether the node or its children contain an error. + pub fn erroneous(&self) -> bool { match self { - Green::Node(n) => n.children().is_empty(), - Green::Token(_) => true, + Self::Inner(node) => node.erroneous, + Self::Leaf(data) => data.kind.is_error(), } } + /// The error messages for this node and its descendants. + pub fn errors(&self) -> Vec<Error> { + if !self.erroneous() { + return vec![]; + } + + match self.kind() { + &NodeKind::Error(pos, ref message) => { + vec![Error { pos, ..Error::new(self.span(), message) }] + } + _ => self + .children() + .filter(|node| node.erroneous()) + .flat_map(|node| node.errors()) + .collect(), + } + } + + /// Convert the node to a typed AST node. + pub fn cast<T>(&self) -> Option<T> + where + T: TypedNode, + { + T::from_untyped(self) + } + + /// Get the first child that can cast to some AST type. + pub fn cast_first_child<T: TypedNode>(&self) -> Option<T> { + self.children().find_map(Self::cast) + } + + /// Get the last child that can cast to some AST type. + pub fn cast_last_child<T: TypedNode>(&self) -> Option<T> { + self.children().rev().find_map(Self::cast) + } + /// Change the type of the node. pub fn convert(&mut self, kind: NodeKind) { match self { - Self::Node(node) => { - let node = Arc::make_mut(node); + Self::Inner(inner) => { + let node = Arc::make_mut(inner); node.erroneous |= kind.is_error(); node.data.kind = kind; } - Self::Token(data) => data.kind = kind, + Self::Leaf(leaf) => leaf.kind = kind, + } + } + + /// Set a synthetic span for the node and all its descendants. + pub fn synthesize(&mut self, span: Span) { + match self { + Self::Inner(inner) => Arc::make_mut(inner).synthesize(span), + Self::Leaf(leaf) => leaf.synthesize(span), + } + } + + /// Assign spans to each node. + pub fn numberize(&mut self, id: SourceId, within: Range<u64>) -> NumberingResult { + match self { + Self::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within), + Self::Leaf(leaf) => leaf.numberize(id, within), + } + } + + /// The upper bound of assigned numbers in this subtree. + pub fn upper(&self) -> u64 { + match self { + Self::Inner(inner) => inner.upper(), + Self::Leaf(leaf) => leaf.span().number() + 1, } } - /// Set a synthetic span for the node and all its children. - pub fn synthesize(&mut self, span: Arc<Span>) { + /// If the span points into this node, convert it to a byte range. + pub fn range(&self, span: Span, offset: usize) -> Option<Range<usize>> { match self { - Green::Node(n) => Arc::make_mut(n).synthesize(span), - Green::Token(t) => t.synthesize(span), + Self::Inner(inner) => inner.range(span, offset), + Self::Leaf(leaf) => { + (span == leaf.span).then(|| offset .. offset + self.len()) + } + } + } + + /// Returns all leaf descendants of this node (may include itself). + /// + /// This method is slow and only intended for testing. + pub fn leafs(&self) -> Vec<Self> { + if match self { + Self::Inner(inner) => inner.children.is_empty(), + Self::Leaf(_) => true, + } { + vec![self.clone()] + } else { + self.children().flat_map(Self::leafs).collect() } } } -impl Default for Green { +impl Default for SyntaxNode { fn default() -> Self { - Self::Token(GreenData::new(NodeKind::None, 0)) + Self::Leaf(NodeData::new(NodeKind::None, 0)) } } -impl Debug for Green { +impl Debug for SyntaxNode { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { - Self::Node(node) => node.fmt(f), - Self::Token(token) => token.fmt(f), + Self::Inner(node) => node.fmt(f), + Self::Leaf(token) => token.fmt(f), } } } -/// An inner node in the untyped green tree. -#[derive(Clone, PartialEq, Hash)] -pub struct GreenNode { +/// An inner node in the untyped syntax tree. +#[derive(Clone, Hash)] +pub struct InnerNode { /// Node metadata. - data: GreenData, - /// This node's children, losslessly make up this node. - children: Vec<Green>, + data: NodeData, + /// The number of nodes in the whole subtree, including this node. + descendants: usize, /// Whether this node or any of its children are erroneous. erroneous: bool, + /// The upper bound of this node's numbering range. + upper: u64, + /// This node's children, losslessly make up this node. + children: Vec<SyntaxNode>, } -impl GreenNode { +impl InnerNode { /// Creates a new node with the given kind and a single child. - pub fn with_child(kind: NodeKind, child: impl Into<Green>) -> Self { + pub fn with_child(kind: NodeKind, child: impl Into<SyntaxNode>) -> Self { Self::with_children(kind, vec![child.into()]) } /// Creates a new node with the given kind and children. - pub fn with_children(kind: NodeKind, children: Vec<Green>) -> Self { + pub fn with_children(kind: NodeKind, children: Vec<SyntaxNode>) -> Self { + let mut len = 0; + let mut descendants = 1; let mut erroneous = kind.is_error(); - let len = children - .iter() - .inspect(|c| erroneous |= c.erroneous()) - .map(Green::len) - .sum(); + + for child in &children { + len += child.len(); + descendants += child.descendants(); + erroneous |= child.erroneous(); + } Self { - data: GreenData::new(kind, len), - children, + data: NodeData::new(kind, len), + descendants, erroneous, + upper: 0, + children, } } - /// The node's children. - pub fn children(&self) -> &[Green] { - &self.children - } - /// The node's metadata. - fn data(&self) -> &GreenData { + pub fn data(&self) -> &NodeData { &self.data } @@ -158,59 +243,233 @@ impl GreenNode { self.data().len() } - /// Set a synthetic span for the node and all its children. - pub fn synthesize(&mut self, span: Arc<Span>) { - self.data.synthesize(span.clone()); + /// The node's span. + pub fn span(&self) -> Span { + self.data().span() + } + + /// The number of descendants, including the node itself. + pub fn descendants(&self) -> usize { + self.descendants + } + + /// The node's children. + pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> { + self.children.iter() + } + + /// Set a synthetic span for the node and all its descendants. + pub fn synthesize(&mut self, span: Span) { + self.data.synthesize(span); for child in &mut self.children { - child.synthesize(span.clone()); + child.synthesize(span); + } + } + + /// Assign span numbers `within` an interval to this node's subtree or just + /// a `range` of its children. + pub fn numberize( + &mut self, + id: SourceId, + range: Option<Range<usize>>, + within: Range<u64>, + ) -> NumberingResult { + // Determine how many nodes we will number. + let descendants = match &range { + Some(range) if range.is_empty() => return Ok(()), + Some(range) => self.children[range.clone()] + .iter() + .map(SyntaxNode::descendants) + .sum::<usize>(), + None => self.descendants, + }; + + // Determine the distance between two neighbouring assigned numbers. If + // possible, we try to fit all numbers into the left half of `within` + // so that there is space for future insertions. + let space = within.end - within.start; + let mut stride = space / (2 * descendants as u64); + if stride == 0 { + stride = space / self.descendants as u64; + if stride == 0 { + return Err(Unnumberable); + } + } + + // Number this node itself. + let mut start = within.start; + if range.is_none() { + let end = start + stride; + self.data.numberize(id, start .. end)?; + self.upper = within.end; + start = end; + } + + // Number the children. + let len = self.children.len(); + for child in &mut self.children[range.unwrap_or(0 .. len)] { + let end = start + child.descendants() as u64 * stride; + child.numberize(id, start .. end)?; + start = end; + } + + Ok(()) + } + + /// The upper bound of assigned numbers in this subtree. + pub fn upper(&self) -> u64 { + self.upper + } + + /// If the span points into this node, convert it to a byte range. + pub fn range(&self, span: Span, mut offset: usize) -> Option<Range<usize>> { + // Check whether we found it. + if self.data.span == span { + return Some(offset .. offset + self.len()); } + + // The parent of a subtree has a smaller span number than all of its + // descendants. Therefore, we can bail out early if the target span's + // number is smaller than our number. + if span.number() < self.span().number() { + return None; + } + + let mut children = self.children.iter().peekable(); + while let Some(child) = children.next() { + // Every node in this child's subtree has a smaller span number than + // the next sibling. Therefore we only need to recurse if the next + // sibling's span number is larger than the target span's number. + if children + .peek() + .map_or(true, |next| next.span().number() > span.number()) + { + if let Some(range) = child.range(span, offset) { + return Some(range); + } + } + + offset += child.len(); + } + + None } /// The node's children, mutably. - pub(crate) fn children_mut(&mut self) -> &mut [Green] { + pub(crate) fn children_mut(&mut self) -> &mut [SyntaxNode] { &mut self.children } /// Replaces a range of children with some replacement. + /// + /// May have mutated the children if it returns `Err(_)`. pub(crate) fn replace_children( &mut self, - range: Range<usize>, - replacement: Vec<Green>, - ) { + mut range: Range<usize>, + replacement: Vec<SyntaxNode>, + ) -> NumberingResult { let superseded = &self.children[range.clone()]; - let superseded_len: usize = superseded.iter().map(Green::len).sum(); - let replacement_len: usize = replacement.iter().map(Green::len).sum(); - // If we're erroneous, but not due to the superseded range, then we will - // still be erroneous after the replacement. - let still_erroneous = self.erroneous && !superseded.iter().any(Green::erroneous); + // Compute the new byte length. + self.data.len = self.data.len + + replacement.iter().map(SyntaxNode::len).sum::<usize>() + - superseded.iter().map(SyntaxNode::len).sum::<usize>(); + + // Compute the new number of descendants. + self.descendants = self.descendants + + replacement.iter().map(SyntaxNode::descendants).sum::<usize>() + - superseded.iter().map(SyntaxNode::descendants).sum::<usize>(); + + // Determine whether we're still erroneous after the replacement. That's + // the case if + // - any of the new nodes is erroneous, + // - or if we were erroneous before due to a non-superseded node. + self.erroneous = replacement.iter().any(SyntaxNode::erroneous) + || (self.erroneous + && (self.children[.. range.start].iter().any(SyntaxNode::erroneous)) + || self.children[range.end ..].iter().any(SyntaxNode::erroneous)); + + // Perform the replacement. + let replacement_count = replacement.len(); + self.children.splice(range.clone(), replacement); + range.end = range.start + replacement_count; + + // Renumber the new children. Retries until it works, taking + // exponentially more children into account. + let mut left = 0; + let mut right = 0; + let max_left = range.start; + let max_right = self.children.len() - range.end; + loop { + let renumber = range.start - left .. range.end + right; + + // The minimum assignable number is either + // - the upper bound of the node right before the to-be-renumbered + // children, + // - or this inner node's span number plus one if renumbering starts + // at the first child. + let start_number = renumber + .start + .checked_sub(1) + .and_then(|i| self.children.get(i)) + .map_or(self.span().number() + 1, |child| child.upper()); + + // The upper bound for renumbering is either + // - the span number of the first child after the to-be-renumbered + // children, + // - or this node's upper bound if renumbering ends behind the last + // child. + let end_number = self + .children + .get(renumber.end) + .map_or(self.upper(), |next| next.span().number()); + + // Try to renumber. + let within = start_number .. end_number; + let id = self.span().source(); + if self.numberize(id, Some(renumber), within).is_ok() { + return Ok(()); + } - self.children.splice(range, replacement); - self.data.len = self.data.len + replacement_len - superseded_len; - self.erroneous = still_erroneous || self.children.iter().any(Green::erroneous); + // If it didn't even work with all children, we give up. + if left == max_left && right == max_right { + return Err(Unnumberable); + } + + // Exponential expansion to both sides. + left = (left + 1).next_power_of_two().min(max_left); + right = (right + 1).next_power_of_two().min(max_right); + } } - /// Update the length of this node given the old and new length of - /// replaced children. - pub(crate) fn update_parent(&mut self, new_len: usize, old_len: usize) { - self.data.len = self.data.len() + new_len - old_len; - self.erroneous = self.children.iter().any(Green::erroneous); + /// Update the this node given after changes were made to one of its + /// children. + pub(crate) fn update_parent( + &mut self, + prev_len: usize, + new_len: usize, + prev_descendants: usize, + new_descendants: usize, + ) { + self.data.len = self.data.len + new_len - prev_len; + self.descendants = self.descendants + new_descendants - prev_descendants; + self.erroneous = self.children.iter().any(SyntaxNode::erroneous); } } -impl From<GreenNode> for Green { - fn from(node: GreenNode) -> Self { +impl From<InnerNode> for SyntaxNode { + fn from(node: InnerNode) -> Self { Arc::new(node).into() } } -impl From<Arc<GreenNode>> for Green { - fn from(node: Arc<GreenNode>) -> Self { - Self::Node(node) +impl From<Arc<InnerNode>> for SyntaxNode { + fn from(node: Arc<InnerNode>) -> Self { + Self::Inner(node) } } -impl Debug for GreenNode { +impl Debug for InnerNode { fn fmt(&self, f: &mut Formatter) -> fmt::Result { self.data.fmt(f)?; if !self.children.is_empty() { @@ -221,300 +480,85 @@ impl Debug for GreenNode { } } +impl PartialEq for InnerNode { + fn eq(&self, other: &Self) -> bool { + self.data == other.data + && self.descendants == other.descendants + && self.erroneous == other.erroneous + && self.children == other.children + } +} + /// Data shared between inner and leaf nodes. -#[derive(Clone, PartialEq, Hash)] -pub struct GreenData { +#[derive(Clone, Hash)] +pub struct NodeData { /// What kind of node this is (each kind would have its own struct in a /// strongly typed AST). kind: NodeKind, /// The byte length of the node in the source. len: usize, - /// A synthetic span for the node, usually this is `None`. - span: Option<Arc<Span>>, + /// The node's span. + span: Span, } -impl GreenData { +impl NodeData { /// Create new node metadata. pub fn new(kind: NodeKind, len: usize) -> Self { - Self { len, kind, span: None } + Self { len, kind, span: Span::detached() } } - /// The type of the node. + /// The node's type. pub fn kind(&self) -> &NodeKind { &self.kind } - /// The length of the node. + /// The node's length. pub fn len(&self) -> usize { self.len } - /// Set a synthetic span for the node. - pub fn synthesize(&mut self, span: Arc<Span>) { - self.span = Some(span) - } -} - -impl From<GreenData> for Green { - fn from(token: GreenData) -> Self { - Self::Token(token) - } -} - -impl Debug for GreenData { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{:?}: {}", self.kind, self.len) - } -} - -/// A owned wrapper for a green node with span information. -/// -/// Owned variant of [`RedRef`]. Can be [cast](Self::cast) to an AST node. -#[derive(Clone, PartialEq, Hash)] -pub struct RedNode { - id: SourceId, - offset: usize, - green: Green, -} - -impl RedNode { - /// Create a new red node from a root [`GreenNode`]. - pub fn from_root(root: Arc<GreenNode>, id: SourceId) -> Self { - Self { id, offset: 0, green: root.into() } - } - - /// Convert to a borrowed representation. - pub fn as_ref(&self) -> RedRef<'_> { - RedRef { - id: self.id, - offset: self.offset, - green: &self.green, - } - } - - /// The node's metadata. - pub fn data(&self) -> &GreenData { - self.as_ref().data() - } - - /// The type of the node. - pub fn kind(&self) -> &NodeKind { - self.as_ref().kind() - } - - /// The length of the node. - pub fn len(&self) -> usize { - self.as_ref().len() - } - - /// The span of the node. + /// The node's span. pub fn span(&self) -> Span { - self.as_ref().span() - } - - /// The error messages for this node and its descendants. - pub fn errors(&self) -> Vec<Error> { - self.as_ref().errors() - } - - /// Convert the node to a typed AST node. - pub fn cast<T>(self) -> Option<T> - where - T: TypedNode, - { - self.as_ref().cast() - } - - /// The children of the node. - pub fn children(&self) -> Children<'_> { - self.as_ref().children() - } - - /// Get the first child that can cast to some AST type. - pub fn cast_first_child<T: TypedNode>(&self) -> Option<T> { - self.as_ref().cast_first_child() - } - - /// Get the last child that can cast to some AST type. - pub fn cast_last_child<T: TypedNode>(&self) -> Option<T> { - self.as_ref().cast_last_child() - } -} - -impl Debug for RedNode { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - self.as_ref().fmt(f) - } -} - -/// A borrowed wrapper for a [`GreenNode`] with span information. -/// -/// Borrowed variant of [`RedNode`]. Can be [cast](Self::cast) to an AST node. -#[derive(Copy, Clone, PartialEq, Hash)] -pub struct RedRef<'a> { - id: SourceId, - offset: usize, - green: &'a Green, -} - -impl<'a> RedRef<'a> { - /// Convert to an owned representation. - pub fn own(self) -> RedNode { - RedNode { - id: self.id, - offset: self.offset, - green: self.green.clone(), - } - } - - /// The node's metadata. - pub fn data(self) -> &'a GreenData { - self.green.data() - } - - /// The type of the node. - pub fn kind(self) -> &'a NodeKind { - self.green.kind() + self.span } - /// The length of the node. - pub fn len(self) -> usize { - self.green.len() - } - - /// The span of the node. - pub fn span(self) -> Span { - match self.data().span.as_deref() { - Some(&span) => span, - None => Span::new(self.id, self.offset, self.offset + self.len()), - } - } - - /// Whether the node is a leaf node. - pub fn is_leaf(self) -> bool { - self.green.is_leaf() - } - - /// The error messages for this node and its descendants. - pub fn errors(self) -> Vec<Error> { - if !self.green.erroneous() { - return vec![]; - } - - match self.kind() { - NodeKind::Error(pos, msg) => { - let mut span = self.span(); - if self.data().span.is_none() { - span = match pos { - ErrorPos::Start => span.at_start(), - ErrorPos::Full => span, - ErrorPos::End => span.at_end(), - }; - } - - vec![Error::new(span, msg.to_string())] - } - _ => self - .children() - .filter(|red| red.green.erroneous()) - .flat_map(|red| red.errors()) - .collect(), - } + /// Set a synthetic span for the node. + pub fn synthesize(&mut self, span: Span) { + self.span = span; } - /// Returns all leaf descendants of this node (may include itself). - pub fn leafs(self) -> Vec<Self> { - if self.is_leaf() { - vec![self] + /// Assign a span to the node. + pub fn numberize(&mut self, id: SourceId, within: Range<u64>) -> NumberingResult { + if within.start < within.end { + self.span = Span::new(id, (within.start + within.end) / 2); + Ok(()) } else { - self.children().flat_map(Self::leafs).collect() - } - } - - /// Convert the node to a typed AST node. - pub fn cast<T>(self) -> Option<T> - where - T: TypedNode, - { - T::from_red(self) - } - - /// The node's children. - pub fn children(self) -> Children<'a> { - let children = match &self.green { - Green::Node(node) => node.children(), - Green::Token(_) => &[], - }; - - Children { - id: self.id, - iter: children.iter(), - front: self.offset, - back: self.offset + self.len(), + Err(Unnumberable) } } - - /// Get the first child that can cast to some AST type. - pub fn cast_first_child<T: TypedNode>(self) -> Option<T> { - self.children().find_map(RedRef::cast) - } - - /// Get the last child that can cast to some AST type. - pub fn cast_last_child<T: TypedNode>(self) -> Option<T> { - self.children().rev().find_map(RedRef::cast) - } } -impl Debug for RedRef<'_> { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{:?}: {:?}", self.kind(), self.span())?; - let mut children = self.children().peekable(); - if children.peek().is_some() { - f.write_str(" ")?; - f.debug_list().entries(children.map(RedRef::own)).finish()?; - } - Ok(()) +impl From<NodeData> for SyntaxNode { + fn from(token: NodeData) -> Self { + Self::Leaf(token) } } -/// An iterator over the children of a red node. -pub struct Children<'a> { - id: SourceId, - iter: std::slice::Iter<'a, Green>, - front: usize, - back: usize, -} - -impl<'a> Iterator for Children<'a> { - type Item = RedRef<'a>; - - fn next(&mut self) -> Option<Self::Item> { - self.iter.next().map(|green| { - let offset = self.front; - self.front += green.len(); - RedRef { id: self.id, offset, green } - }) - } - - fn size_hint(&self) -> (usize, Option<usize>) { - self.iter.size_hint() +impl Debug for NodeData { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{:?}: {}", self.kind, self.len) } } -impl DoubleEndedIterator for Children<'_> { - fn next_back(&mut self) -> Option<Self::Item> { - self.iter.next_back().map(|green| { - self.back -= green.len(); - RedRef { id: self.id, offset: self.back, green } - }) +impl PartialEq for NodeData { + fn eq(&self, other: &Self) -> bool { + self.kind == other.kind && self.len == other.len } } -impl ExactSizeIterator for Children<'_> {} - /// All syntactical building blocks that can be part of a Typst document. /// -/// Can be emitted as a token by the tokenizer or as part of a green node by +/// Can be emitted as a token by the tokenizer or as part of a syntax node by /// the parser. #[derive(Debug, Clone, PartialEq)] pub enum NodeKind { @@ -748,17 +792,6 @@ pub enum NodeKind { Unknown(EcoString), } -/// Where in a node an error should be annotated. -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub enum ErrorPos { - /// At the start of the node. - Start, - /// Over the full width of the node. - Full, - /// At the end of the node. - End, -} - impl NodeKind { /// Whether this is some kind of brace. pub fn is_brace(&self) -> bool { |
