diff options
| author | Laurenz <laurmaedje@gmail.com> | 2023-06-26 13:57:21 +0200 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2023-06-27 18:40:17 +0200 |
| commit | 7b92bd7c340d9f9c094ed2fa57912049317d9b20 (patch) | |
| tree | b91399526ba94d87309d09d864df2935dd7a4d0a /src/syntax/source.rs | |
| parent | 9c7f31870b4e1bf37df79ebbe1df9a56df83d878 (diff) | |
Basic package management
Diffstat (limited to 'src/syntax/source.rs')
| -rw-r--r-- | src/syntax/source.rs | 179 |
1 files changed, 79 insertions, 100 deletions
diff --git a/src/syntax/source.rs b/src/syntax/source.rs index 277271db..6eb6fd5d 100644 --- a/src/syntax/source.rs +++ b/src/syntax/source.rs @@ -3,105 +3,107 @@ use std::fmt::{self, Debug, Formatter}; use std::hash::{Hash, Hasher}; use std::ops::Range; -use std::path::{Path, PathBuf}; +use std::sync::Arc; use comemo::Prehashed; -use unscanny::Scanner; use super::ast::Markup; use super::reparser::reparse; use super::{is_newline, parse, LinkedNode, Span, SyntaxNode}; use crate::diag::SourceResult; -use crate::util::{PathExt, StrExt}; +use crate::file::FileId; +use crate::util::StrExt; /// A source file. /// /// All line and column indices start at zero, just like byte indices. Only for /// user-facing display, you should add 1 to them. +/// +/// Values of this type are cheap to clone and hash. #[derive(Clone)] -pub struct Source { - id: SourceId, - path: PathBuf, - lines: Vec<Line>, +pub struct Source(Arc<Repr>); + +/// The internal representation. +#[derive(Clone)] +struct Repr { + id: FileId, text: Prehashed<String>, root: Prehashed<SyntaxNode>, + lines: Vec<Line>, } impl Source { /// Create a new source file. + /// + /// The path must be canonical, so that the same source file has the same + /// id even if accessed through different paths. #[tracing::instrument(skip_all)] - pub fn new(id: SourceId, path: &Path, text: String) -> Self { + pub fn new(id: FileId, text: String) -> Self { let mut root = parse(&text); root.numberize(id, Span::FULL).unwrap(); - Self { + Self(Arc::new(Repr { id, - path: path.normalize(), lines: lines(&text), text: Prehashed::new(text), root: Prehashed::new(root), - } + })) } /// Create a source file without a real id and path, usually for testing. pub fn detached(text: impl Into<String>) -> Self { - Self::new(SourceId::detached(), Path::new(""), text.into()) + Self::new(FileId::detached(), text.into()) } /// Create a source file with the same synthetic span for all nodes. pub fn synthesized(text: String, span: Span) -> Self { let mut root = parse(&text); root.synthesize(span); - Self { - id: SourceId::detached(), - path: PathBuf::new(), + Self(Arc::new(Repr { + id: FileId::detached(), lines: lines(&text), text: Prehashed::new(text), root: Prehashed::new(root), - } + })) } /// The root node of the file's untyped syntax tree. pub fn root(&self) -> &SyntaxNode { - &self.root + &self.0.root } /// The root node of the file's typed abstract syntax tree. pub fn ast(&self) -> SourceResult<Markup> { - let errors = self.root.errors(); + let errors = self.root().errors(); if errors.is_empty() { - Ok(self.root.cast().expect("root node must be markup")) + Ok(self.root().cast().expect("root node must be markup")) } else { Err(Box::new(errors)) } } /// The id of the source file. - pub fn id(&self) -> SourceId { - self.id - } - - /// The normalized path to the source file. - pub fn path(&self) -> &Path { - &self.path + pub fn id(&self) -> FileId { + self.0.id } /// The whole source as a string slice. pub fn text(&self) -> &str { - &self.text + &self.0.text } /// Slice out the part of the source code enclosed by the range. pub fn get(&self, range: Range<usize>) -> Option<&str> { - self.text.get(range) + self.text().get(range) } /// Fully replace the source text. pub fn replace(&mut self, text: String) { - self.text = Prehashed::new(text); - self.lines = lines(&self.text); - let mut root = parse(&self.text); - root.numberize(self.id, Span::FULL).unwrap(); - self.root = Prehashed::new(root); + let inner = Arc::make_mut(&mut self.0); + inner.text = Prehashed::new(text); + inner.lines = lines(&inner.text); + let mut root = parse(&inner.text); + root.numberize(inner.id, Span::FULL).unwrap(); + inner.root = Prehashed::new(root); } /// Edit the source file by replacing the given range. @@ -112,72 +114,70 @@ impl Source { #[track_caller] pub fn edit(&mut self, replace: Range<usize>, with: &str) -> Range<usize> { let start_byte = replace.start; - let start_utf16 = self.byte_to_utf16(replace.start).unwrap(); - self.text.update(|text| text.replace_range(replace.clone(), with)); + let start_utf16 = self.byte_to_utf16(start_byte).unwrap(); + let line = self.byte_to_line(start_byte).unwrap(); + + let inner = Arc::make_mut(&mut self.0); + + // Update the text itself. + inner.text.update(|text| text.replace_range(replace.clone(), with)); // Remove invalidated line starts. - let line = self.byte_to_line(start_byte).unwrap(); - self.lines.truncate(line + 1); + inner.lines.truncate(line + 1); // Handle adjoining of \r and \n. - if self.text[..start_byte].ends_with('\r') && with.starts_with('\n') { - self.lines.pop(); + if inner.text[..start_byte].ends_with('\r') && with.starts_with('\n') { + inner.lines.pop(); } // Recalculate the line starts after the edit. - self.lines - .extend(lines_from(start_byte, start_utf16, &self.text[start_byte..])); + inner.lines.extend(lines_from( + start_byte, + start_utf16, + &inner.text[start_byte..], + )); // Incrementally reparse the replaced range. - self.root - .update(|root| reparse(root, &self.text, replace, with.len())) + inner + .root + .update(|root| reparse(root, &inner.text, replace, with.len())) } /// Get the length of the file in UTF-8 encoded bytes. pub fn len_bytes(&self) -> usize { - self.text.len() + self.text().len() } /// Get the length of the file in UTF-16 code units. pub fn len_utf16(&self) -> usize { - let last = self.lines.last().unwrap(); - last.utf16_idx + self.text[last.byte_idx..].len_utf16() + let last = self.0.lines.last().unwrap(); + last.utf16_idx + self.0.text[last.byte_idx..].len_utf16() } /// Get the length of the file in lines. pub fn len_lines(&self) -> usize { - self.lines.len() + self.0.lines.len() } /// Find the node with the given span. /// /// Returns `None` if the span does not point into this source file. pub fn find(&self, span: Span) -> Option<LinkedNode<'_>> { - LinkedNode::new(&self.root).find(span) - } - - /// Map a span that points into this source file to a byte range. - /// - /// Panics if the span does not point into this source file. - #[track_caller] - pub fn range(&self, span: Span) -> Range<usize> { - self.find(span) - .expect("span does not point into this source file") - .range() + LinkedNode::new(self.root()).find(span) } /// Return the index of the UTF-16 code unit at the byte index. pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> { let line_idx = self.byte_to_line(byte_idx)?; - let line = self.lines.get(line_idx)?; - let head = self.text.get(line.byte_idx..byte_idx)?; + let line = self.0.lines.get(line_idx)?; + let head = self.0.text.get(line.byte_idx..byte_idx)?; Some(line.utf16_idx + head.len_utf16()) } /// Return the index of the line that contains the given byte index. pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> { - (byte_idx <= self.text.len()).then(|| { - match self.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) { + (byte_idx <= self.0.text.len()).then(|| { + match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) { Ok(i) => i, Err(i) => i - 1, } @@ -197,33 +197,33 @@ impl Source { /// Return the byte index at the UTF-16 code unit. pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> { - let line = self.lines.get( - match self.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) { + let line = self.0.lines.get( + match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) { Ok(i) => i, Err(i) => i - 1, }, )?; let mut k = line.utf16_idx; - for (i, c) in self.text[line.byte_idx..].char_indices() { + for (i, c) in self.0.text[line.byte_idx..].char_indices() { if k >= utf16_idx { return Some(line.byte_idx + i); } k += c.len_utf16(); } - (k == utf16_idx).then_some(self.text.len()) + (k == utf16_idx).then_some(self.0.text.len()) } /// Return the byte position at which the given line starts. pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> { - self.lines.get(line_idx).map(|line| line.byte_idx) + self.0.lines.get(line_idx).map(|line| line.byte_idx) } /// Return the range which encloses the given line. pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> { let start = self.line_to_byte(line_idx)?; - let end = self.line_to_byte(line_idx + 1).unwrap_or(self.text.len()); + let end = self.line_to_byte(line_idx + 1).unwrap_or(self.0.text.len()); Some(start..end) } @@ -248,42 +248,21 @@ impl Source { impl Debug for Source { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "Source({})", self.path.display()) + write!(f, "Source({})", self.id().path().display()) } } impl Hash for Source { fn hash<H: Hasher>(&self, state: &mut H) { - self.id.hash(state); - self.path.hash(state); - self.text.hash(state); - self.root.hash(state); + self.0.id.hash(state); + self.0.text.hash(state); + self.0.root.hash(state); } } -/// A unique identifier for a loaded source file. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub struct SourceId(u16); - -impl SourceId { - /// Create a new source id for a file that is not part of the world. - pub const fn detached() -> Self { - Self(u16::MAX) - } - - /// Whether the source id is the detached. - pub const fn is_detached(self) -> bool { - self.0 == Self::detached().0 - } - - /// Create a source id from a number. - pub const fn from_u16(v: u16) -> Self { - Self(v) - } - - /// Extract the underlying number. - pub const fn as_u16(self) -> u16 { - self.0 +impl AsRef<str> for Source { + fn as_ref(&self) -> &str { + self.text() } } @@ -309,7 +288,7 @@ fn lines_from( utf16_offset: usize, text: &str, ) -> impl Iterator<Item = Line> + '_ { - let mut s = Scanner::new(text); + let mut s = unscanny::Scanner::new(text); let mut utf16_idx = utf16_offset; std::iter::from_fn(move || { @@ -340,7 +319,7 @@ mod tests { fn test_source_file_new() { let source = Source::detached(TEST); assert_eq!( - source.lines, + source.0.lines, [ Line { byte_idx: 0, utf16_idx: 0 }, Line { byte_idx: 7, utf16_idx: 6 }, @@ -421,8 +400,8 @@ mod tests { let mut source = Source::detached(prev); let result = Source::detached(after); source.edit(range, with); - assert_eq!(source.text, result.text); - assert_eq!(source.lines, result.lines); + assert_eq!(source.text(), result.text()); + assert_eq!(source.0.lines, result.0.lines); } // Test inserting at the beginning. |
