diff options
Diffstat (limited to 'src/source.rs')
| -rw-r--r-- | src/source.rs | 278 |
1 files changed, 55 insertions, 223 deletions
diff --git a/src/source.rs b/src/source.rs index 24d830ad..c3648e11 100644 --- a/src/source.rs +++ b/src/source.rs @@ -1,158 +1,16 @@ //! Source file management. -use std::collections::HashMap; -use std::io; use std::ops::Range; use std::path::{Path, PathBuf}; -use std::sync::Arc; use unscanny::Scanner; -use crate::diag::{failed_to_load, StrResult, TypResult}; -use crate::loading::{FileHash, Loader}; +use crate::diag::TypResult; use crate::parse::{is_newline, parse, reparse}; use crate::syntax::ast::Markup; use crate::syntax::{Span, SyntaxNode}; use crate::util::{PathExt, StrExt}; -#[cfg(feature = "codespan-reporting")] -use codespan_reporting::files::{self, Files}; - -/// A unique identifier for a loaded source file. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub struct SourceId(u16); - -impl SourceId { - /// Create a new source id for a file that is not part of a store. - pub const fn detached() -> Self { - Self(u16::MAX) - } - - /// Create a source id from the raw underlying value. - /// - /// This should only be called with values returned by - /// [`into_raw`](Self::into_raw). - pub const fn from_raw(v: u16) -> Self { - Self(v) - } - - /// Convert into the raw underlying value. - pub const fn into_raw(self) -> u16 { - self.0 - } -} - -/// Storage for loaded source files. -pub struct SourceStore { - loader: Arc<dyn Loader>, - files: HashMap<FileHash, SourceId>, - sources: Vec<Source>, -} - -impl SourceStore { - /// Create a new, empty source store. - pub fn new(loader: Arc<dyn Loader>) -> Self { - Self { - loader, - files: HashMap::new(), - sources: vec![], - } - } - - /// Get a reference to a loaded source file. - /// - /// This panics if no source file with this `id` exists. This function - /// should only be called with ids returned by this store's - /// [`load()`](Self::load) and [`provide()`](Self::provide) methods. - #[track_caller] - pub fn get(&self, id: SourceId) -> &Source { - &self.sources[id.0 as usize] - } - - /// Load a source file from a path relative to the compilation environment's - /// root. - /// - /// If there already exists a source file for this path, it is - /// [replaced](Source::replace). - pub fn load(&mut self, path: &Path) -> StrResult<SourceId> { - let mut try_load = || -> io::Result<SourceId> { - let hash = self.loader.resolve(path)?; - if let Some(&id) = self.files.get(&hash) { - return Ok(id); - } - - let data = self.loader.file(path)?; - let src = String::from_utf8(data.to_vec()).map_err(|_| { - io::Error::new(io::ErrorKind::InvalidData, "file is not valid utf-8") - })?; - - Ok(self.provide(path, src)) - }; - - try_load().map_err(|err| failed_to_load("source file", path, err)) - } - - /// Directly provide a source file. - /// - /// The `path` does not need to be [resolvable](Loader::resolve) through the - /// `loader`. If it is though, imports that resolve to the same file hash - /// will use the inserted file instead of going through [`Loader::file`]. - /// - /// If the path is resolvable and points to an existing source file, it is - /// [replaced](Source::replace). - pub fn provide(&mut self, path: impl AsRef<Path>, src: String) -> SourceId { - let path = path.as_ref(); - let hash = self.loader.resolve(path).ok(); - - // Check for existing file and replace if one exists. - if let Some(&id) = hash.and_then(|hash| self.files.get(&hash)) { - self.replace(id, src); - return id; - } - - // No existing file yet, so we allocate a new id. - let id = SourceId(self.sources.len() as u16); - self.sources.push(Source::new(id, path, src)); - - // Register in file map if the path was known to the loader. - if let Some(hash) = hash { - self.files.insert(hash, id); - } - - id - } - - /// Fully [replace](Source::replace) the source text of a file. - /// - /// This panics if no source file with this `id` exists. - #[track_caller] - pub fn replace(&mut self, id: SourceId, src: String) { - self.sources[id.0 as usize].replace(src) - } - - /// [Edit](Source::edit) a source file by replacing the given range. - /// - /// This panics if no source file with this `id` exists or if the `replace` - /// range is out of bounds. - #[track_caller] - pub fn edit( - &mut self, - id: SourceId, - replace: Range<usize>, - with: &str, - ) -> Range<usize> { - self.sources[id.0 as usize].edit(replace, with) - } - - /// Map a span that points into a [file](Source::range) stored in this - /// source store to a byte range. - /// - /// Panics if the span does not point into this source store. - pub fn range(&self, span: Span) -> Range<usize> { - self.get(span.source()).range(span) - } -} - /// A single source file. /// /// _Note_: All line and column indices start at zero, just like byte indices. @@ -160,7 +18,7 @@ impl SourceStore { pub struct Source { id: SourceId, path: PathBuf, - src: String, + text: String, lines: Vec<Line>, root: SyntaxNode, rev: usize, @@ -168,32 +26,32 @@ pub struct Source { impl Source { /// Create a new source file. - pub fn new(id: SourceId, path: &Path, src: String) -> Self { + pub fn new(id: SourceId, path: &Path, text: String) -> Self { let lines = std::iter::once(Line { byte_idx: 0, utf16_idx: 0 }) - .chain(lines(0, 0, &src)) + .chain(lines(0, 0, &text)) .collect(); - let mut root = parse(&src); + let mut root = parse(&text); root.numberize(id, Span::FULL).unwrap(); Self { id, path: path.normalize(), root, - src, + text, lines, rev: 0, } } /// Create a source file without a real id and path, usually for testing. - pub fn detached(src: impl Into<String>) -> Self { - Self::new(SourceId::detached(), Path::new(""), src.into()) + pub fn detached(text: impl Into<String>) -> Self { + Self::new(SourceId::detached(), Path::new(""), text.into()) } /// Create a source file with the same synthetic span for all nodes. - pub fn synthesized(src: impl Into<String>, span: Span) -> Self { - let mut file = Self::detached(src); + pub fn synthesized(text: impl Into<String>, span: Span) -> Self { + let mut file = Self::detached(text); file.root.synthesize(span); file.id = span.source(); file @@ -225,8 +83,8 @@ impl Source { } /// The whole source as a string slice. - pub fn src(&self) -> &str { - &self.src + pub fn text(&self) -> &str { + &self.text } /// The revision number of the file. @@ -239,15 +97,15 @@ impl Source { /// Slice out the part of the source code enclosed by the range. pub fn get(&self, range: Range<usize>) -> Option<&str> { - self.src.get(range) + self.text.get(range) } /// Fully replace the source text and increase the revision number. - pub fn replace(&mut self, src: String) { - self.src = src; + pub fn replace(&mut self, text: String) { + self.text = text; self.lines = vec![Line { byte_idx: 0, utf16_idx: 0 }]; - self.lines.extend(lines(0, 0, &self.src)); - self.root = parse(&self.src); + self.lines.extend(lines(0, 0, &self.text)); + self.root = parse(&self.text); self.root.numberize(self.id(), Span::FULL).unwrap(); self.rev = self.rev.wrapping_add(1); } @@ -263,34 +121,34 @@ impl Source { let start_byte = replace.start; let start_utf16 = self.byte_to_utf16(replace.start).unwrap(); - self.src.replace_range(replace.clone(), with); + self.text.replace_range(replace.clone(), with); // Remove invalidated line starts. let line = self.byte_to_line(start_byte).unwrap(); self.lines.truncate(line + 1); // Handle adjoining of \r and \n. - if self.src[.. start_byte].ends_with('\r') && with.starts_with('\n') { + if self.text[.. start_byte].ends_with('\r') && with.starts_with('\n') { self.lines.pop(); } // Recalculate the line starts after the edit. self.lines - .extend(lines(start_byte, start_utf16, &self.src[start_byte ..])); + .extend(lines(start_byte, start_utf16, &self.text[start_byte ..])); // Incrementally reparse the replaced range. - reparse(&mut self.root, &self.src, replace, with.len()) + reparse(&mut self.root, &self.text, replace, with.len()) } /// Get the length of the file in UTF-8 encoded bytes. pub fn len_bytes(&self) -> usize { - self.src.len() + self.text.len() } /// Get the length of the file in UTF-16 code units. pub fn len_utf16(&self) -> usize { let last = self.lines.last().unwrap(); - last.utf16_idx + self.src[last.byte_idx ..].len_utf16() + last.utf16_idx + self.text[last.byte_idx ..].len_utf16() } /// Get the length of the file in lines. @@ -311,13 +169,13 @@ impl Source { pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> { let line_idx = self.byte_to_line(byte_idx)?; let line = self.lines.get(line_idx)?; - let head = self.src.get(line.byte_idx .. byte_idx)?; + let head = self.text.get(line.byte_idx .. byte_idx)?; Some(line.utf16_idx + head.len_utf16()) } /// Return the index of the line that contains the given byte index. pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> { - (byte_idx <= self.src.len()).then(|| { + (byte_idx <= self.text.len()).then(|| { match self.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) { Ok(i) => i, Err(i) => i - 1, @@ -346,14 +204,14 @@ impl Source { )?; let mut k = line.utf16_idx; - for (i, c) in self.src[line.byte_idx ..].char_indices() { + for (i, c) in self.text[line.byte_idx ..].char_indices() { if k >= utf16_idx { return Some(line.byte_idx + i); } k += c.len_utf16(); } - (k == utf16_idx).then(|| self.src.len()) + (k == utf16_idx).then(|| self.text.len()) } @@ -365,7 +223,7 @@ impl Source { /// Return the range which encloses the given line. pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> { let start = self.line_to_byte(line_idx)?; - let end = self.line_to_byte(line_idx + 1).unwrap_or(self.src.len()); + let end = self.line_to_byte(line_idx + 1).unwrap_or(self.text.len()); Some(start .. end) } @@ -388,6 +246,30 @@ impl Source { } } +/// A unique identifier for a loaded source file. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub struct SourceId(u16); + +impl SourceId { + /// Create a new source id for a file that is not part of a store. + pub const fn detached() -> Self { + Self(u16::MAX) + } + + /// Create a source id from the raw underlying value. + /// + /// This should only be called with values returned by + /// [`into_raw`](Self::into_raw). + pub const fn from_raw(v: u16) -> Self { + Self(v) + } + + /// Convert into the raw underlying value. + pub const fn into_raw(self) -> u16 { + self.0 + } +} + /// Metadata about a line. #[derive(Debug, Copy, Clone, Eq, PartialEq)] struct Line { @@ -401,9 +283,9 @@ struct Line { fn lines( byte_offset: usize, utf16_offset: usize, - string: &str, + text: &str, ) -> impl Iterator<Item = Line> + '_ { - let mut s = Scanner::new(string); + let mut s = Scanner::new(text); let mut utf16_idx = utf16_offset; std::iter::from_fn(move || { @@ -427,56 +309,6 @@ fn lines( }) } -#[cfg(feature = "codespan-reporting")] -impl<'a> Files<'a> for SourceStore { - type FileId = SourceId; - type Name = std::path::Display<'a>; - type Source = &'a str; - - fn name(&'a self, id: SourceId) -> Result<Self::Name, files::Error> { - Ok(self.get(id).path().display()) - } - - fn source(&'a self, id: SourceId) -> Result<Self::Source, files::Error> { - Ok(self.get(id).src()) - } - - fn line_index(&'a self, id: SourceId, given: usize) -> Result<usize, files::Error> { - let source = self.get(id); - source - .byte_to_line(given) - .ok_or_else(|| files::Error::IndexTooLarge { given, max: source.len_bytes() }) - } - - fn line_range( - &'a self, - id: SourceId, - given: usize, - ) -> Result<std::ops::Range<usize>, files::Error> { - let source = self.get(id); - source - .line_to_range(given) - .ok_or_else(|| files::Error::LineTooLarge { given, max: source.len_lines() }) - } - - fn column_number( - &'a self, - id: SourceId, - _: usize, - given: usize, - ) -> Result<usize, files::Error> { - let source = self.get(id); - source.byte_to_column(given).ok_or_else(|| { - let max = source.len_bytes(); - if given <= max { - files::Error::InvalidCharBoundary { given } - } else { - files::Error::IndexTooLarge { given, max } - } - }) - } -} - #[cfg(test)] mod tests { use super::*; @@ -563,7 +395,7 @@ mod tests { let mut source = Source::detached(prev); let result = Source::detached(after); source.edit(range, with); - assert_eq!(source.src, result.src); + assert_eq!(source.text, result.text); assert_eq!(source.root, result.root); assert_eq!(source.lines, result.lines); } |
