summaryrefslogtreecommitdiff
path: root/crates/typst-syntax/src/span.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/typst-syntax/src/span.rs')
-rw-r--r--crates/typst-syntax/src/span.rs170
1 files changed, 129 insertions, 41 deletions
diff --git a/crates/typst-syntax/src/span.rs b/crates/typst-syntax/src/span.rs
index 0847ceea..3618b8f2 100644
--- a/crates/typst-syntax/src/span.rs
+++ b/crates/typst-syntax/src/span.rs
@@ -1,21 +1,37 @@
use std::fmt::{self, Debug, Formatter};
-use std::num::NonZeroU64;
+use std::num::{NonZeroU16, NonZeroU64};
use std::ops::Range;
use ecow::EcoString;
use crate::FileId;
-/// A unique identifier for a syntax node.
+/// Defines a range in a file.
///
-/// This is used throughout the compiler to track which source section an error
-/// or element stems from. Can be [mapped back](crate::Source::range) to a byte
-/// range for user facing display.
+/// This is used throughout the compiler to track which source section an
+/// element stems from or an error applies to.
///
-/// During editing, the span values stay mostly stable, even for nodes behind an
-/// insertion. This is not true for simple ranges as they would shift. Spans can
-/// be used as inputs to memoized functions without hurting cache performance
-/// when text is inserted somewhere in the document other than the end.
+/// - The [`.id()`](Self::id) function can be used to get the `FileId` for the
+/// span and, by extension, its file system path.
+/// - The `WorldExt::range` function can be used to map the span to a
+/// `Range<usize>`.
+///
+/// This type takes up 8 bytes and is copyable and null-optimized (i.e.
+/// `Option<Span>` also takes 8 bytes).
+///
+/// Spans come in two flavors: Numbered spans and raw range spans. The
+/// `WorldExt::range` function automatically handles both cases, yielding a
+/// `Range<usize>`.
+///
+/// # Numbered spans
+/// Typst source files use _numbered spans._ Rather than using byte ranges,
+/// which shift a lot as you type, each AST node gets a unique number.
+///
+/// During editing, the span numbers stay mostly stable, even for nodes behind
+/// an insertion. This is not true for simple ranges as they would shift. Spans
+/// can be used as inputs to memoized functions without hurting cache
+/// performance when text is inserted somewhere in the document other than the
+/// end.
///
/// Span ids are ordered in the syntax tree to enable quickly finding the node
/// with some id:
@@ -23,41 +39,81 @@ use crate::FileId;
/// - The id of a node is always greater than any id in the subtrees of any left
/// sibling and smaller than any id in the subtrees of any right sibling.
///
-/// This type takes up 8 bytes and is null-optimized (i.e. `Option<Span>` also
-/// takes 8 bytes).
+/// # Raw range spans
+/// Non Typst-files use raw ranges instead of numbered spans. The maximum
+/// encodable value for start and end is 2^23. Larger values will be saturated.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub struct Span(NonZeroU64);
impl Span {
- /// The full range of numbers available for span numbering.
- pub(super) const FULL: Range<u64> = 2..(1 << Self::BITS);
+ /// The full range of numbers available for source file span numbering.
+ pub(crate) const FULL: Range<u64> = 2..(1 << 47);
/// The value reserved for the detached span.
const DETACHED: u64 = 1;
/// Data layout:
- /// | 16 bits source id | 48 bits number |
- const BITS: usize = 48;
+ /// | 16 bits file id | 48 bits number |
+ ///
+ /// Number =
+ /// - 1 means detached
+ /// - 2..2^47-1 is a numbered span
+ /// - 2^47..2^48-1 is a raw range span. To retrieve it, you must subtract
+ /// `RANGE_BASE` and then use shifting/bitmasking to extract the
+ /// components.
+ const NUMBER_BITS: usize = 48;
+ const FILE_ID_SHIFT: usize = Self::NUMBER_BITS;
+ const NUMBER_MASK: u64 = (1 << Self::NUMBER_BITS) - 1;
+ const RANGE_BASE: u64 = Self::FULL.end;
+ const RANGE_PART_BITS: usize = 23;
+ const RANGE_PART_SHIFT: usize = Self::RANGE_PART_BITS;
+ const RANGE_PART_MASK: u64 = (1 << Self::RANGE_PART_BITS) - 1;
- /// Create a new span from a source id and a unique number.
+ /// Create a span that does not point into any file.
+ pub const fn detached() -> Self {
+ match NonZeroU64::new(Self::DETACHED) {
+ Some(v) => Self(v),
+ None => unreachable!(),
+ }
+ }
+
+ /// Create a new span from a file id and a number.
///
/// Returns `None` if `number` is not contained in `FULL`.
- pub(super) const fn new(id: FileId, number: u64) -> Option<Self> {
+ pub(crate) const fn from_number(id: FileId, number: u64) -> Option<Self> {
if number < Self::FULL.start || number >= Self::FULL.end {
return None;
}
+ Some(Self::pack(id, number))
+ }
- let bits = ((id.into_raw() as u64) << Self::BITS) | number;
- match NonZeroU64::new(bits) {
- Some(v) => Some(Self(v)),
- None => unreachable!(),
- }
+ /// Create a new span from a raw byte range instead of a span number.
+ ///
+ /// If one of the range's parts exceeds the maximum value (2^23), it is
+ /// saturated.
+ pub const fn from_range(id: FileId, range: Range<usize>) -> Self {
+ let max = 1 << Self::RANGE_PART_BITS;
+ let start = if range.start > max { max } else { range.start } as u64;
+ let end = if range.end > max { max } else { range.end } as u64;
+ let number = (start << Self::RANGE_PART_SHIFT) | end;
+ Self::pack(id, Self::RANGE_BASE + number)
}
- /// Create a span that does not point into any source file.
- pub const fn detached() -> Self {
- match NonZeroU64::new(Self::DETACHED) {
+ /// Construct from a raw number.
+ ///
+ /// Should only be used with numbers retrieved via
+ /// [`into_raw`](Self::into_raw). Misuse may results in panics, but no
+ /// unsafety.
+ pub const fn from_raw(v: NonZeroU64) -> Self {
+ Self(v)
+ }
+
+ /// Pack a file ID and the low bits into a span.
+ const fn pack(id: FileId, low: u64) -> Self {
+ let bits = ((id.into_raw().get() as u64) << Self::FILE_ID_SHIFT) | low;
+ match NonZeroU64::new(bits) {
Some(v) => Self(v),
+ // The file ID is non-zero.
None => unreachable!(),
}
}
@@ -67,29 +123,34 @@ impl Span {
self.0.get() == Self::DETACHED
}
- /// The id of the source file the span points into.
+ /// The id of the file the span points into.
///
/// Returns `None` if the span is detached.
pub const fn id(self) -> Option<FileId> {
- if self.is_detached() {
- return None;
+ // Detached span has only zero high bits, so it will trigger the
+ // `None` case.
+ match NonZeroU16::new((self.0.get() >> Self::FILE_ID_SHIFT) as u16) {
+ Some(v) => Some(FileId::from_raw(v)),
+ None => None,
}
- let bits = (self.0.get() >> Self::BITS) as u16;
- Some(FileId::from_raw(bits))
}
/// The unique number of the span within its [`Source`](crate::Source).
- pub const fn number(self) -> u64 {
- self.0.get() & ((1 << Self::BITS) - 1)
+ pub(crate) const fn number(self) -> u64 {
+ self.0.get() & Self::NUMBER_MASK
}
- /// Construct from a raw number.
+ /// Extract a raw byte range from the span, if it is a raw range span.
///
- /// Should only be used with numbers retrieved via
- /// [`into_raw`](Self::into_raw). Misuse may results in panics, but no
- /// unsafety.
- pub const fn from_raw(v: NonZeroU64) -> Self {
- Self(v)
+ /// Typically, you should use `WorldExt::range` instead.
+ pub const fn range(self) -> Option<Range<usize>> {
+ let Some(number) = self.number().checked_sub(Self::RANGE_BASE) else {
+ return None;
+ };
+
+ let start = (number >> Self::RANGE_PART_SHIFT) as usize;
+ let end = (number & Self::RANGE_PART_MASK) as usize;
+ Some(start..end)
}
/// Extract the raw underlying number.
@@ -159,13 +220,40 @@ impl<T: Debug> Debug for Spanned<T> {
#[cfg(test)]
mod tests {
+ use std::num::NonZeroU16;
+ use std::ops::Range;
+
use crate::{FileId, Span};
#[test]
- fn test_span_encoding() {
- let id = FileId::from_raw(5);
- let span = Span::new(id, 10).unwrap();
+ fn test_span_detached() {
+ let span = Span::detached();
+ assert!(span.is_detached());
+ assert_eq!(span.id(), None);
+ assert_eq!(span.range(), None);
+ }
+
+ #[test]
+ fn test_span_number_encoding() {
+ let id = FileId::from_raw(NonZeroU16::new(5).unwrap());
+ let span = Span::from_number(id, 10).unwrap();
assert_eq!(span.id(), Some(id));
assert_eq!(span.number(), 10);
+ assert_eq!(span.range(), None);
+ }
+
+ #[test]
+ fn test_span_range_encoding() {
+ let id = FileId::from_raw(NonZeroU16::new(u16::MAX).unwrap());
+ let roundtrip = |range: Range<usize>| {
+ let span = Span::from_range(id, range.clone());
+ assert_eq!(span.id(), Some(id));
+ assert_eq!(span.range(), Some(range));
+ };
+
+ roundtrip(0..0);
+ roundtrip(177..233);
+ roundtrip(0..8388607);
+ roundtrip(8388606..8388607);
}
}