summaryrefslogtreecommitdiff
path: root/src/syntax/node.rs
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2022-10-17 19:26:24 +0200
committerLaurenz <laurmaedje@gmail.com>2022-10-17 20:04:22 +0200
commite21822665591dc19766275da1e185215a6b945ef (patch)
tree7788e211c3c33c8b5a8ad7d5eb7574e33631eb16 /src/syntax/node.rs
parent4fd031a256b2ecfe524859d5599fafb386395572 (diff)
Merge some modules
Diffstat (limited to 'src/syntax/node.rs')
-rw-r--r--src/syntax/node.rs548
1 files changed, 548 insertions, 0 deletions
diff --git a/src/syntax/node.rs b/src/syntax/node.rs
new file mode 100644
index 00000000..6a7d424a
--- /dev/null
+++ b/src/syntax/node.rs
@@ -0,0 +1,548 @@
+use std::fmt::{self, Debug, Formatter};
+use std::ops::Range;
+use std::sync::Arc;
+
+use super::ast::TypedNode;
+use super::{NodeKind, NumberingResult, SourceId, Span, Unnumberable};
+use crate::diag::SourceError;
+
+/// An inner or leaf node in the untyped syntax tree.
+#[derive(Clone, PartialEq, Hash)]
+pub enum SyntaxNode {
+ /// A reference-counted inner node.
+ Inner(Arc<InnerNode>),
+ /// A leaf token.
+ Leaf(NodeData),
+}
+
+impl SyntaxNode {
+ /// The metadata of the node.
+ pub fn data(&self) -> &NodeData {
+ match self {
+ Self::Inner(inner) => &inner.data,
+ Self::Leaf(leaf) => leaf,
+ }
+ }
+
+ /// The type of the node.
+ pub fn kind(&self) -> &NodeKind {
+ self.data().kind()
+ }
+
+ /// The length of the node.
+ pub fn len(&self) -> usize {
+ self.data().len()
+ }
+
+ /// The number of descendants, including the node itself.
+ pub fn descendants(&self) -> usize {
+ match self {
+ Self::Inner(inner) => inner.descendants(),
+ Self::Leaf(_) => 1,
+ }
+ }
+
+ /// The span of the node.
+ pub fn span(&self) -> Span {
+ self.data().span()
+ }
+
+ /// Whether the node or its children contain an error.
+ pub fn erroneous(&self) -> bool {
+ match self {
+ Self::Inner(node) => node.erroneous,
+ Self::Leaf(data) => data.kind.is_error(),
+ }
+ }
+
+ /// The error messages for this node and its descendants.
+ pub fn errors(&self) -> Vec<SourceError> {
+ if !self.erroneous() {
+ return vec![];
+ }
+
+ match self.kind() {
+ NodeKind::Error(pos, message) => {
+ vec![SourceError::new(self.span(), message.clone()).with_pos(*pos)]
+ }
+ _ => self
+ .children()
+ .filter(|node| node.erroneous())
+ .flat_map(|node| node.errors())
+ .collect(),
+ }
+ }
+
+ /// The node's children.
+ pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> {
+ match self {
+ Self::Inner(inner) => inner.children(),
+ Self::Leaf(_) => [].iter(),
+ }
+ }
+
+ /// Convert the node to a typed AST node.
+ pub fn cast<T>(&self) -> Option<T>
+ where
+ T: TypedNode,
+ {
+ T::from_untyped(self)
+ }
+
+ /// Get the first child that can cast to the AST type `T`.
+ pub fn cast_first_child<T: TypedNode>(&self) -> Option<T> {
+ self.children().find_map(Self::cast)
+ }
+
+ /// Get the last child that can cast to the AST type `T`.
+ pub fn cast_last_child<T: TypedNode>(&self) -> Option<T> {
+ self.children().rev().find_map(Self::cast)
+ }
+
+ /// Change the type of the node.
+ pub fn convert(&mut self, kind: NodeKind) {
+ match self {
+ Self::Inner(inner) => {
+ let node = Arc::make_mut(inner);
+ node.erroneous |= kind.is_error();
+ node.data.kind = kind;
+ }
+ Self::Leaf(leaf) => leaf.kind = kind,
+ }
+ }
+
+ /// Set a synthetic span for the node and all its descendants.
+ pub fn synthesize(&mut self, span: Span) {
+ match self {
+ Self::Inner(inner) => Arc::make_mut(inner).synthesize(span),
+ Self::Leaf(leaf) => leaf.synthesize(span),
+ }
+ }
+
+ /// Assign spans to each node.
+ pub fn numberize(&mut self, id: SourceId, within: Range<u64>) -> NumberingResult {
+ match self {
+ Self::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within),
+ Self::Leaf(leaf) => leaf.numberize(id, within),
+ }
+ }
+
+ /// The upper bound of assigned numbers in this subtree.
+ pub fn upper(&self) -> u64 {
+ match self {
+ Self::Inner(inner) => inner.upper(),
+ Self::Leaf(leaf) => leaf.span().number() + 1,
+ }
+ }
+
+ /// If the span points into this node, convert it to a byte range.
+ pub fn range(&self, span: Span, offset: usize) -> Option<Range<usize>> {
+ match self {
+ Self::Inner(inner) => inner.range(span, offset),
+ Self::Leaf(leaf) => leaf.range(span, offset),
+ }
+ }
+
+ /// Returns all leaf descendants of this node (may include itself).
+ ///
+ /// This method is slow and only intended for testing.
+ pub fn leafs(&self) -> Vec<Self> {
+ if match self {
+ Self::Inner(inner) => inner.children.is_empty(),
+ Self::Leaf(_) => true,
+ } {
+ vec![self.clone()]
+ } else {
+ self.children().flat_map(Self::leafs).collect()
+ }
+ }
+}
+
+impl Default for SyntaxNode {
+ fn default() -> Self {
+ Self::Leaf(NodeData::new(NodeKind::None, 0))
+ }
+}
+
+impl Debug for SyntaxNode {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ match self {
+ Self::Inner(node) => node.fmt(f),
+ Self::Leaf(token) => token.fmt(f),
+ }
+ }
+}
+
+/// An inner node in the untyped syntax tree.
+#[derive(Clone, Hash)]
+pub struct InnerNode {
+ /// Node metadata.
+ data: NodeData,
+ /// The number of nodes in the whole subtree, including this node.
+ descendants: usize,
+ /// Whether this node or any of its children are erroneous.
+ erroneous: bool,
+ /// The upper bound of this node's numbering range.
+ upper: u64,
+ /// This node's children, losslessly make up this node.
+ children: Vec<SyntaxNode>,
+}
+
+impl InnerNode {
+ /// Creates a new node with the given kind and a single child.
+ pub fn with_child(kind: NodeKind, child: impl Into<SyntaxNode>) -> Self {
+ Self::with_children(kind, vec![child.into()])
+ }
+
+ /// Creates a new node with the given kind and children.
+ pub fn with_children(kind: NodeKind, children: Vec<SyntaxNode>) -> Self {
+ let mut len = 0;
+ let mut descendants = 1;
+ let mut erroneous = kind.is_error();
+
+ for child in &children {
+ len += child.len();
+ descendants += child.descendants();
+ erroneous |= child.erroneous();
+ }
+
+ Self {
+ data: NodeData::new(kind, len),
+ descendants,
+ erroneous,
+ upper: 0,
+ children,
+ }
+ }
+
+ /// The node's metadata.
+ pub fn data(&self) -> &NodeData {
+ &self.data
+ }
+
+ /// The node's type.
+ pub fn kind(&self) -> &NodeKind {
+ self.data().kind()
+ }
+
+ /// The node's length.
+ pub fn len(&self) -> usize {
+ self.data().len()
+ }
+
+ /// The node's span.
+ pub fn span(&self) -> Span {
+ self.data().span()
+ }
+
+ /// The number of descendants, including the node itself.
+ pub fn descendants(&self) -> usize {
+ self.descendants
+ }
+
+ /// The node's children.
+ pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> {
+ self.children.iter()
+ }
+
+ /// Set a synthetic span for the node and all its descendants.
+ pub fn synthesize(&mut self, span: Span) {
+ self.data.synthesize(span);
+ for child in &mut self.children {
+ child.synthesize(span);
+ }
+ }
+
+ /// Assign span numbers `within` an interval to this node's subtree or just
+ /// a `range` of its children.
+ pub fn numberize(
+ &mut self,
+ id: SourceId,
+ range: Option<Range<usize>>,
+ within: Range<u64>,
+ ) -> NumberingResult {
+ // Determine how many nodes we will number.
+ let descendants = match &range {
+ Some(range) if range.is_empty() => return Ok(()),
+ Some(range) => self.children[range.clone()]
+ .iter()
+ .map(SyntaxNode::descendants)
+ .sum::<usize>(),
+ None => self.descendants,
+ };
+
+ // Determine the distance between two neighbouring assigned numbers. If
+ // possible, we try to fit all numbers into the left half of `within`
+ // so that there is space for future insertions.
+ let space = within.end - within.start;
+ let mut stride = space / (2 * descendants as u64);
+ if stride == 0 {
+ stride = space / self.descendants as u64;
+ if stride == 0 {
+ return Err(Unnumberable);
+ }
+ }
+
+ // Number this node itself.
+ let mut start = within.start;
+ if range.is_none() {
+ let end = start + stride;
+ self.data.numberize(id, start .. end)?;
+ self.upper = within.end;
+ start = end;
+ }
+
+ // Number the children.
+ let len = self.children.len();
+ for child in &mut self.children[range.unwrap_or(0 .. len)] {
+ let end = start + child.descendants() as u64 * stride;
+ child.numberize(id, start .. end)?;
+ start = end;
+ }
+
+ Ok(())
+ }
+
+ /// The upper bound of assigned numbers in this subtree.
+ pub fn upper(&self) -> u64 {
+ self.upper
+ }
+
+ /// If the span points into this node, convert it to a byte range.
+ pub fn range(&self, span: Span, mut offset: usize) -> Option<Range<usize>> {
+ // Check whether we found it.
+ if let Some(range) = self.data.range(span, offset) {
+ return Some(range);
+ }
+
+ // The parent of a subtree has a smaller span number than all of its
+ // descendants. Therefore, we can bail out early if the target span's
+ // number is smaller than our number.
+ if span.number() < self.span().number() {
+ return None;
+ }
+
+ let mut children = self.children.iter().peekable();
+ while let Some(child) = children.next() {
+ // Every node in this child's subtree has a smaller span number than
+ // the next sibling. Therefore we only need to recurse if the next
+ // sibling's span number is larger than the target span's number.
+ if children
+ .peek()
+ .map_or(true, |next| next.span().number() > span.number())
+ {
+ if let Some(range) = child.range(span, offset) {
+ return Some(range);
+ }
+ }
+
+ offset += child.len();
+ }
+
+ None
+ }
+
+ /// The node's children, mutably.
+ pub(crate) fn children_mut(&mut self) -> &mut [SyntaxNode] {
+ &mut self.children
+ }
+
+ /// Replaces a range of children with a replacement.
+ ///
+ /// May have mutated the children if it returns `Err(_)`.
+ pub(crate) fn replace_children(
+ &mut self,
+ mut range: Range<usize>,
+ replacement: Vec<SyntaxNode>,
+ ) -> NumberingResult {
+ let superseded = &self.children[range.clone()];
+
+ // Compute the new byte length.
+ self.data.len = self.data.len
+ + replacement.iter().map(SyntaxNode::len).sum::<usize>()
+ - superseded.iter().map(SyntaxNode::len).sum::<usize>();
+
+ // Compute the new number of descendants.
+ self.descendants = self.descendants
+ + replacement.iter().map(SyntaxNode::descendants).sum::<usize>()
+ - superseded.iter().map(SyntaxNode::descendants).sum::<usize>();
+
+ // Determine whether we're still erroneous after the replacement. That's
+ // the case if
+ // - any of the new nodes is erroneous,
+ // - or if we were erroneous before due to a non-superseded node.
+ self.erroneous = replacement.iter().any(SyntaxNode::erroneous)
+ || (self.erroneous
+ && (self.children[.. range.start].iter().any(SyntaxNode::erroneous))
+ || self.children[range.end ..].iter().any(SyntaxNode::erroneous));
+
+ // Perform the replacement.
+ let replacement_count = replacement.len();
+ self.children.splice(range.clone(), replacement);
+ range.end = range.start + replacement_count;
+
+ // Renumber the new children. Retries until it works, taking
+ // exponentially more children into account.
+ let mut left = 0;
+ let mut right = 0;
+ let max_left = range.start;
+ let max_right = self.children.len() - range.end;
+ loop {
+ let renumber = range.start - left .. range.end + right;
+
+ // The minimum assignable number is either
+ // - the upper bound of the node right before the to-be-renumbered
+ // children,
+ // - or this inner node's span number plus one if renumbering starts
+ // at the first child.
+ let start_number = renumber
+ .start
+ .checked_sub(1)
+ .and_then(|i| self.children.get(i))
+ .map_or(self.span().number() + 1, |child| child.upper());
+
+ // The upper bound for renumbering is either
+ // - the span number of the first child after the to-be-renumbered
+ // children,
+ // - or this node's upper bound if renumbering ends behind the last
+ // child.
+ let end_number = self
+ .children
+ .get(renumber.end)
+ .map_or(self.upper(), |next| next.span().number());
+
+ // Try to renumber.
+ let within = start_number .. end_number;
+ let id = self.span().source();
+ if self.numberize(id, Some(renumber), within).is_ok() {
+ return Ok(());
+ }
+
+ // If it didn't even work with all children, we give up.
+ if left == max_left && right == max_right {
+ return Err(Unnumberable);
+ }
+
+ // Exponential expansion to both sides.
+ left = (left + 1).next_power_of_two().min(max_left);
+ right = (right + 1).next_power_of_two().min(max_right);
+ }
+ }
+
+ /// Update this node after changes were made to one of its children.
+ pub(crate) fn update_parent(
+ &mut self,
+ prev_len: usize,
+ new_len: usize,
+ prev_descendants: usize,
+ new_descendants: usize,
+ ) {
+ self.data.len = self.data.len + new_len - prev_len;
+ self.descendants = self.descendants + new_descendants - prev_descendants;
+ self.erroneous = self.children.iter().any(SyntaxNode::erroneous);
+ }
+}
+
+impl From<InnerNode> for SyntaxNode {
+ fn from(node: InnerNode) -> Self {
+ Arc::new(node).into()
+ }
+}
+
+impl From<Arc<InnerNode>> for SyntaxNode {
+ fn from(node: Arc<InnerNode>) -> Self {
+ Self::Inner(node)
+ }
+}
+
+impl Debug for InnerNode {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ self.data.fmt(f)?;
+ if !self.children.is_empty() {
+ f.write_str(" ")?;
+ f.debug_list().entries(&self.children).finish()?;
+ }
+ Ok(())
+ }
+}
+
+impl PartialEq for InnerNode {
+ fn eq(&self, other: &Self) -> bool {
+ self.data == other.data
+ && self.descendants == other.descendants
+ && self.erroneous == other.erroneous
+ && self.children == other.children
+ }
+}
+
+/// Data shared between inner and leaf nodes.
+#[derive(Clone, Hash)]
+pub struct NodeData {
+ /// What kind of node this is (each kind would have its own struct in a
+ /// strongly typed AST).
+ pub(super) kind: NodeKind,
+ /// The byte length of the node in the source.
+ len: usize,
+ /// The node's span.
+ span: Span,
+}
+
+impl NodeData {
+ /// Create new node metadata.
+ pub fn new(kind: NodeKind, len: usize) -> Self {
+ Self { len, kind, span: Span::detached() }
+ }
+
+ /// The node's type.
+ pub fn kind(&self) -> &NodeKind {
+ &self.kind
+ }
+
+ /// The node's length.
+ pub fn len(&self) -> usize {
+ self.len
+ }
+
+ /// The node's span.
+ pub fn span(&self) -> Span {
+ self.span
+ }
+
+ /// Set a synthetic span for the node.
+ pub fn synthesize(&mut self, span: Span) {
+ self.span = span;
+ }
+
+ /// Assign a span to the node.
+ pub fn numberize(&mut self, id: SourceId, within: Range<u64>) -> NumberingResult {
+ if within.start < within.end {
+ self.span = Span::new(id, (within.start + within.end) / 2);
+ Ok(())
+ } else {
+ Err(Unnumberable)
+ }
+ }
+
+ /// If the span points into this node, convert it to a byte range.
+ pub fn range(&self, span: Span, offset: usize) -> Option<Range<usize>> {
+ (self.span == span).then(|| offset .. offset + self.len())
+ }
+}
+
+impl From<NodeData> for SyntaxNode {
+ fn from(token: NodeData) -> Self {
+ Self::Leaf(token)
+ }
+}
+
+impl Debug for NodeData {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ write!(f, "{:?}: {}", self.kind, self.len)
+ }
+}
+
+impl PartialEq for NodeData {
+ fn eq(&self, other: &Self) -> bool {
+ self.kind == other.kind && self.len == other.len
+ }
+}