diff options
| author | Laurenz <laurmaedje@gmail.com> | 2022-05-31 12:37:05 +0200 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2022-05-31 12:37:05 +0200 |
| commit | 9bbebd69ddb4a7d7da98c3a79ff7d0cb187873fd (patch) | |
| tree | 0fc651f43337d65e13cccb2bbe85ab1b79666725 /src/syntax | |
| parent | 08a6188123ad0806986fa4f5477b728a07d081cc (diff) | |
Numbered spans
Diffstat (limited to 'src/syntax')
| -rw-r--r-- | src/syntax/highlight.rs | 34 | ||||
| -rw-r--r-- | src/syntax/mod.rs | 210 | ||||
| -rw-r--r-- | src/syntax/span.rs | 145 |
3 files changed, 217 insertions, 172 deletions
diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs index 1b84fdba..630a451d 100644 --- a/src/syntax/highlight.rs +++ b/src/syntax/highlight.rs @@ -10,24 +10,38 @@ use crate::parse::TokenMode; /// Provide highlighting categories for the descendants of a node that fall into /// a range. -pub fn highlight_node<F>(node: &SyntaxNode, range: Range<usize>, f: &mut F) +pub fn highlight_node<F>(root: &SyntaxNode, range: Range<usize>, mut f: F) where F: FnMut(Range<usize>, Category), { + highlight_node_impl(0, root, range, &mut f) +} + +/// Provide highlighting categories for the descendants of a node that fall into +/// a range. +pub fn highlight_node_impl<F>( + mut offset: usize, + node: &SyntaxNode, + range: Range<usize>, + f: &mut F, +) where + F: FnMut(Range<usize>, Category), +{ for (i, child) in node.children().enumerate() { - let span = child.span(); + let span = offset .. offset + child.len(); if range.start <= span.end && range.end >= span.start { if let Some(category) = Category::determine(child, node, i) { - f(span.to_range(), category); + f(span, category); } - highlight_node(child, range.clone(), f); + highlight_node_impl(offset, child, range.clone(), f); } + offset += child.len(); } } /// Highlight source text in a theme by calling `f` with each consecutive piece /// and its style. -pub fn highlight_themed<F>(text: &str, mode: TokenMode, theme: &Theme, f: &mut F) +pub fn highlight_themed<F>(text: &str, mode: TokenMode, theme: &Theme, mut f: F) where F: FnMut(&str, Style), { @@ -43,12 +57,13 @@ where }; let highlighter = Highlighter::new(&theme); - highlight_themed_impl(text, &root, vec![], &highlighter, f); + highlight_themed_impl(text, 0, &root, vec![], &highlighter, &mut f); } /// Recursive implementation for returning syntect styles. fn highlight_themed_impl<F>( text: &str, + mut offset: usize, node: &SyntaxNode, scopes: Vec<Scope>, highlighter: &Highlighter, @@ -57,7 +72,7 @@ fn highlight_themed_impl<F>( F: FnMut(&str, Style), { if node.children().len() == 0 { - let piece = &text[node.span().to_range()]; + let piece = &text[offset .. offset + node.len()]; let style = highlighter.style_for_stack(&scopes); f(piece, style); return; @@ -68,7 +83,8 @@ fn highlight_themed_impl<F>( if let Some(category) = Category::determine(child, node, i) { scopes.push(Scope::new(category.tm_scope()).unwrap()) } - highlight_themed_impl(text, child, scopes, highlighter, f); + highlight_themed_impl(text, offset, child, scopes, highlighter, f); + offset += child.len(); } } @@ -92,7 +108,7 @@ pub fn highlight_pre(text: &str, mode: TokenMode, theme: &Theme) -> String { let mut buf = String::new(); buf.push_str("<pre>\n"); - highlight_themed(text, mode, theme, &mut |piece, style| { + highlight_themed(text, mode, theme, |piece, style| { let styled = style != Style::default(); if styled { buf.push_str("<span style=\""); diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 841c16ff..6cecfd2a 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -13,7 +13,8 @@ pub use highlight::*; pub use span::*; use self::ast::{MathNode, RawNode, TypedNode, Unit}; -use crate::diag::Error; +use crate::diag::{Error, ErrorPos}; +use crate::source::SourceId; use crate::util::EcoString; /// An inner or leaf node in the untyped syntax tree. @@ -29,8 +30,8 @@ impl SyntaxNode { /// Returns the metadata of the node. pub fn data(&self) -> &NodeData { match self { - SyntaxNode::Inner(n) => &n.data, - SyntaxNode::Leaf(t) => t, + SyntaxNode::Inner(inner) => &inner.data, + SyntaxNode::Leaf(leaf) => leaf, } } @@ -44,15 +45,31 @@ impl SyntaxNode { self.data().len() } + /// The number of descendants, including the node itself. + pub fn count(&self) -> usize { + match self { + SyntaxNode::Inner(inner) => inner.count(), + SyntaxNode::Leaf(_) => 1, + } + } + /// The span of the node. pub fn span(&self) -> Span { - todo!() + self.data().span() + } + + /// If the span points into this node, convert it to a byte range. + pub fn range(&self, span: Span, offset: usize) -> Option<Range<usize>> { + match self { + SyntaxNode::Inner(inner) => inner.range(span, offset), + SyntaxNode::Leaf(leaf) => leaf.range(span, offset), + } } /// The node's children. pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> { match self { - SyntaxNode::Inner(n) => n.children(), + SyntaxNode::Inner(inner) => inner.children(), SyntaxNode::Leaf(_) => [].iter(), } } @@ -62,7 +79,7 @@ impl SyntaxNode { /// This method is slow and only intended for testing. pub fn leafs(&self) -> Vec<Self> { if match self { - SyntaxNode::Inner(n) => n.children().len() == 0, + SyntaxNode::Inner(inner) => inner.children().len() == 0, SyntaxNode::Leaf(_) => true, } { vec![self.clone()] @@ -86,7 +103,9 @@ impl SyntaxNode { } match self.kind() { - NodeKind::Error(..) => todo!(), + &NodeKind::Error(pos, ref message) => { + vec![Error { pos, ..Error::new(self.span(), message) }] + } _ => self .children() .filter(|node| node.erroneous()) @@ -116,20 +135,28 @@ impl SyntaxNode { /// Change the type of the node. pub fn convert(&mut self, kind: NodeKind) { match self { - Self::Inner(node) => { - let node = Arc::make_mut(node); + Self::Inner(inner) => { + let node = Arc::make_mut(inner); node.erroneous |= kind.is_error(); node.data.kind = kind; } - Self::Leaf(data) => data.kind = kind, + Self::Leaf(leaf) => leaf.kind = kind, } } - /// Set a synthetic span for the node and all its children. - pub fn synthesize(&mut self, span: Arc<Span>) { + /// Assign spans to each node. + pub fn number(&mut self, id: SourceId, from: u64) { match self { - SyntaxNode::Inner(n) => Arc::make_mut(n).synthesize(span), - SyntaxNode::Leaf(t) => t.synthesize(span), + SyntaxNode::Inner(inner) => Arc::make_mut(inner).number(id, from), + SyntaxNode::Leaf(leaf) => leaf.number(id, from), + } + } + + /// Set a synthetic node id for the node and all its descendants. + pub fn synthesize(&mut self, span: Span) { + match self { + SyntaxNode::Inner(inner) => Arc::make_mut(inner).synthesize(span), + SyntaxNode::Leaf(leaf) => leaf.synthesize(span), } } } @@ -154,10 +181,12 @@ impl Debug for SyntaxNode { pub struct InnerNode { /// Node metadata. data: NodeData, - /// This node's children, losslessly make up this node. - children: Vec<SyntaxNode>, + /// The number of nodes in the whole subtree, including this node. + count: usize, /// Whether this node or any of its children are erroneous. erroneous: bool, + /// This node's children, losslessly make up this node. + children: Vec<SyntaxNode>, } impl InnerNode { @@ -168,17 +197,21 @@ impl InnerNode { /// Creates a new node with the given kind and children. pub fn with_children(kind: NodeKind, children: Vec<SyntaxNode>) -> Self { + let mut len = 0; + let mut count = 1; let mut erroneous = kind.is_error(); - let len = children - .iter() - .inspect(|c| erroneous |= c.erroneous()) - .map(SyntaxNode::len) - .sum(); + + for child in &children { + len += child.len(); + count += child.count(); + erroneous |= child.erroneous(); + } Self { data: NodeData::new(kind, len), - children, + count, erroneous, + children, } } @@ -197,16 +230,54 @@ impl InnerNode { self.data().len() } + /// The node's span. + pub fn span(&self) -> Span { + self.data().span() + } + + /// The number of descendants, including the node itself. + pub fn count(&self) -> usize { + self.count + } + + /// If the span points into this node, convert it to a byte range. + pub fn range(&self, span: Span, mut offset: usize) -> Option<Range<usize>> { + if let Some(range) = self.data.range(span, offset) { + return Some(range); + } + + for child in &self.children { + if let Some(range) = child.range(span, offset) { + return Some(range); + } + + offset += child.len(); + } + + None + } + /// The node's children. pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> { self.children.iter() } - /// Set a synthetic span for the node and all its children. - pub fn synthesize(&mut self, span: Arc<Span>) { - self.data.synthesize(span.clone()); + /// Assign spans to each node. + pub fn number(&mut self, id: SourceId, mut from: u64) { + self.data.number(id, from); + from += 1; + for child in &mut self.children { - child.synthesize(span.clone()); + child.number(id, from); + from += child.count() as u64; + } + } + + /// Set a synthetic node id for the node and all its descendants. + pub fn synthesize(&mut self, span: Span) { + self.data.synthesize(span); + for child in &mut self.children { + child.synthesize(span); } } @@ -222,24 +293,41 @@ impl InnerNode { replacement: Vec<SyntaxNode>, ) { let superseded = &self.children[range.clone()]; - let superseded_len: usize = superseded.iter().map(SyntaxNode::len).sum(); - let replacement_len: usize = replacement.iter().map(SyntaxNode::len).sum(); - - // If we're erroneous, but not due to the superseded range, then we will - // still be erroneous after the replacement. - let still_erroneous = - self.erroneous && !superseded.iter().any(SyntaxNode::erroneous); + // Compute the new byte length. + self.data.len = self.data.len + + replacement.iter().map(SyntaxNode::len).sum::<usize>() + - superseded.iter().map(SyntaxNode::len).sum::<usize>(); + + // Compute the new descendant count. + self.count = self.count + + replacement.iter().map(SyntaxNode::count).sum::<usize>() + - superseded.iter().map(SyntaxNode::count).sum::<usize>(); + + // Determine whether we're still erroneous after the replacement. That's + // the case if + // - any of the new nodes is erroneous, + // - or if we were erroneous before due to a non-superseded node. + self.erroneous = replacement.iter().any(SyntaxNode::erroneous) + || (self.erroneous + && (self.children[.. range.start].iter().any(SyntaxNode::erroneous) + || self.children[range.end ..].iter().any(SyntaxNode::erroneous))); + + // Perform the replacement. self.children.splice(range, replacement); - self.data.len = self.data.len + replacement_len - superseded_len; - self.erroneous = - still_erroneous || self.children.iter().any(SyntaxNode::erroneous); } /// Update the length of this node given the old and new length of /// replaced children. - pub(crate) fn update_parent(&mut self, new_len: usize, old_len: usize) { - self.data.len = self.data.len() + new_len - old_len; + pub(crate) fn update_parent( + &mut self, + prev_len: usize, + new_len: usize, + prev_count: usize, + new_count: usize, + ) { + self.data.len = self.data.len + new_len - prev_len; + self.count = self.count + new_count - prev_count; self.erroneous = self.children.iter().any(SyntaxNode::erroneous); } } @@ -268,34 +356,51 @@ impl Debug for InnerNode { } /// Data shared between inner and leaf nodes. -#[derive(Clone, PartialEq, Hash)] +#[derive(Clone, Hash)] pub struct NodeData { /// What kind of node this is (each kind would have its own struct in a /// strongly typed AST). kind: NodeKind, /// The byte length of the node in the source. len: usize, + /// The node's span. + span: Span, } impl NodeData { /// Create new node metadata. pub fn new(kind: NodeKind, len: usize) -> Self { - Self { len, kind } + Self { len, kind, span: Span::detached() } } - /// The type of the node. + /// The node's type. pub fn kind(&self) -> &NodeKind { &self.kind } - /// The length of the node. + /// The node's length. pub fn len(&self) -> usize { self.len } + /// The node's span. + pub fn span(&self) -> Span { + self.span + } + + /// If the span points into this node, convert it to a byte range. + pub fn range(&self, span: Span, offset: usize) -> Option<Range<usize>> { + (self.span == span).then(|| offset .. offset + self.len()) + } + + /// Assign spans to each node. + pub fn number(&mut self, id: SourceId, from: u64) { + self.span = Span::new(id, from); + } + /// Set a synthetic span for the node. - pub fn synthesize(&mut self, _: Arc<Span>) { - todo!() + pub fn synthesize(&mut self, span: Span) { + self.span = span; } } @@ -311,6 +416,12 @@ impl Debug for NodeData { } } +impl PartialEq for NodeData { + fn eq(&self, other: &Self) -> bool { + self.kind == other.kind && self.len == other.len + } +} + /// All syntactical building blocks that can be part of a Typst document. /// /// Can be emitted as a token by the tokenizer or as part of a syntax node by @@ -547,17 +658,6 @@ pub enum NodeKind { Unknown(EcoString), } -/// Where in a node an error should be annotated. -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub enum ErrorPos { - /// At the start of the node. - Start, - /// Over the full width of the node. - Full, - /// At the end of the node. - End, -} - impl NodeKind { /// Whether this is some kind of brace. pub fn is_brace(&self) -> bool { diff --git a/src/syntax/span.rs b/src/syntax/span.rs index d1e29dd3..3f6e6824 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -1,8 +1,7 @@ -use std::cmp::Ordering; use std::fmt::{self, Debug, Formatter}; -use std::ops::Range; +use std::num::NonZeroU64; -use crate::source::SourceId; +use crate::syntax::SourceId; /// A value with the span it corresponds to in the source code. #[derive(Copy, Clone, Eq, PartialEq, Hash)] @@ -35,122 +34,52 @@ impl<T> Spanned<T> { impl<T: Debug> Debug for Spanned<T> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - self.v.fmt(f)?; - if f.alternate() { - f.write_str(" <")?; - self.span.fmt(f)?; - f.write_str(">")?; - } - Ok(()) + self.v.fmt(f) } } -/// Bounds of a slice of source code. -#[derive(Copy, Clone, Eq, PartialEq, Hash)] -pub struct Span { - /// The id of the source file. - pub source: SourceId, - /// The inclusive start position. - pub start: usize, - /// The inclusive end position. - pub end: usize, -} +/// A unique identifier for a syntax node. +/// +/// This is used throughout the compiler to track which source section an error +/// or element stems from. Can be mapped back to a source id + byte range for +/// user facing display. +/// +/// Node ids are ordered in the tree to enable quickly finding the node with +/// some id: +/// - The id of a parent is always smaller than the ids of any of its children. +/// - The id of a node is always greater than any id in the subtrees of any left +/// sibling and smaller than any id the subtrees of any right sibling. +/// +/// Node ids stay mostly stable, even for nodes behind an insertion. This is not +/// true for simple spans/ranges as they shift. Node ids can be used as inputs +/// to memoized functions without hurting cache performance when text is +/// inserted somewhere in the document other than the end. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct Span(NonZeroU64); impl Span { - /// Create a new span from start and end positions. - pub fn new(source: SourceId, start: usize, end: usize) -> Self { - Self { source, start, end } - } - - /// Create a span including just a single position. - pub fn at(source: SourceId, pos: usize) -> Self { - Self::new(source, pos, pos) - } - - /// Create a span without real location information, usually for testing. - pub fn detached() -> Self { - Self { - source: SourceId::from_raw(0), - start: 0, - end: 0, - } - } - - /// Create a span with a different start position. - pub fn with_start(self, start: usize) -> Self { - Self { start, ..self } - } - - /// Create a span with a different end position. - pub fn with_end(self, end: usize) -> Self { - Self { end, ..self } - } - - /// Whether the span is a single point. - pub fn is_empty(self) -> bool { - self.start == self.end - } - - /// The byte length of the spanned region. - pub fn len(self) -> usize { - self.end - self.start + /// Create a new span from a source id and a unique number. + pub const fn new(id: SourceId, number: u64) -> Self { + assert!(number > 0 && number < (1 << 48)); + let bits = ((id.into_raw() as u64) << 48) | number; + Self(nonzero(bits)) } - /// A new span at the position of this span's start. - pub fn at_start(&self) -> Span { - Self::at(self.source, self.start) + /// A node that does not belong to any source file. + pub const fn detached() -> Self { + Self(nonzero(1)) } - /// A new span at the position of this span's end. - pub fn at_end(&self) -> Span { - Self::at(self.source, self.end) - } - - /// Create a new span with the earlier start and later end position. - /// - /// This panics if the spans come from different files. - pub fn join(self, other: Self) -> Self { - debug_assert_eq!(self.source, other.source); - Self { - source: self.source, - start: self.start.min(other.start), - end: self.end.max(other.end), - } - } - - /// Expand a span by merging it with another span. - pub fn expand(&mut self, other: Self) { - *self = self.join(other) - } - - /// Test whether a position is within the span. - pub fn contains(&self, pos: usize) -> bool { - self.start <= pos && self.end >= pos - } - - /// Test whether one span complete contains the other span. - pub fn surrounds(self, other: Self) -> bool { - self.source == other.source && self.start <= other.start && self.end >= other.end - } - - /// Convert to a `Range<usize>` for indexing. - pub fn to_range(self) -> Range<usize> { - self.start .. self.end - } -} - -impl Debug for Span { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{:?}-{:?}", self.start, self.end) + /// The id of the source file the span points into. + pub const fn source(self) -> SourceId { + SourceId::from_raw((self.0.get() >> 48) as u16) } } -impl PartialOrd for Span { - fn partial_cmp(&self, other: &Self) -> Option<Ordering> { - if self.source == other.source { - Some(self.start.cmp(&other.start).then(self.end.cmp(&other.end))) - } else { - None - } +/// Convert to a non zero u64. +const fn nonzero(v: u64) -> NonZeroU64 { + match NonZeroU64::new(v) { + Some(v) => v, + None => unreachable!(), } } |
