summaryrefslogtreecommitdiff
path: root/src/source.rs
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2022-09-20 13:05:55 +0200
committerLaurenz <laurmaedje@gmail.com>2022-09-20 16:37:15 +0200
commit757a701c1aa2a6fb80033c7e75666661818da6f9 (patch)
tree0415fec94d3856f4ebc97a1744cf2ba75fe8e7aa /src/source.rs
parente29f55bb294cc298daad97accf6d8a76976b409c (diff)
A New World
Diffstat (limited to 'src/source.rs')
-rw-r--r--src/source.rs278
1 files changed, 55 insertions, 223 deletions
diff --git a/src/source.rs b/src/source.rs
index 24d830ad..c3648e11 100644
--- a/src/source.rs
+++ b/src/source.rs
@@ -1,158 +1,16 @@
//! Source file management.
-use std::collections::HashMap;
-use std::io;
use std::ops::Range;
use std::path::{Path, PathBuf};
-use std::sync::Arc;
use unscanny::Scanner;
-use crate::diag::{failed_to_load, StrResult, TypResult};
-use crate::loading::{FileHash, Loader};
+use crate::diag::TypResult;
use crate::parse::{is_newline, parse, reparse};
use crate::syntax::ast::Markup;
use crate::syntax::{Span, SyntaxNode};
use crate::util::{PathExt, StrExt};
-#[cfg(feature = "codespan-reporting")]
-use codespan_reporting::files::{self, Files};
-
-/// A unique identifier for a loaded source file.
-#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
-pub struct SourceId(u16);
-
-impl SourceId {
- /// Create a new source id for a file that is not part of a store.
- pub const fn detached() -> Self {
- Self(u16::MAX)
- }
-
- /// Create a source id from the raw underlying value.
- ///
- /// This should only be called with values returned by
- /// [`into_raw`](Self::into_raw).
- pub const fn from_raw(v: u16) -> Self {
- Self(v)
- }
-
- /// Convert into the raw underlying value.
- pub const fn into_raw(self) -> u16 {
- self.0
- }
-}
-
-/// Storage for loaded source files.
-pub struct SourceStore {
- loader: Arc<dyn Loader>,
- files: HashMap<FileHash, SourceId>,
- sources: Vec<Source>,
-}
-
-impl SourceStore {
- /// Create a new, empty source store.
- pub fn new(loader: Arc<dyn Loader>) -> Self {
- Self {
- loader,
- files: HashMap::new(),
- sources: vec![],
- }
- }
-
- /// Get a reference to a loaded source file.
- ///
- /// This panics if no source file with this `id` exists. This function
- /// should only be called with ids returned by this store's
- /// [`load()`](Self::load) and [`provide()`](Self::provide) methods.
- #[track_caller]
- pub fn get(&self, id: SourceId) -> &Source {
- &self.sources[id.0 as usize]
- }
-
- /// Load a source file from a path relative to the compilation environment's
- /// root.
- ///
- /// If there already exists a source file for this path, it is
- /// [replaced](Source::replace).
- pub fn load(&mut self, path: &Path) -> StrResult<SourceId> {
- let mut try_load = || -> io::Result<SourceId> {
- let hash = self.loader.resolve(path)?;
- if let Some(&id) = self.files.get(&hash) {
- return Ok(id);
- }
-
- let data = self.loader.file(path)?;
- let src = String::from_utf8(data.to_vec()).map_err(|_| {
- io::Error::new(io::ErrorKind::InvalidData, "file is not valid utf-8")
- })?;
-
- Ok(self.provide(path, src))
- };
-
- try_load().map_err(|err| failed_to_load("source file", path, err))
- }
-
- /// Directly provide a source file.
- ///
- /// The `path` does not need to be [resolvable](Loader::resolve) through the
- /// `loader`. If it is though, imports that resolve to the same file hash
- /// will use the inserted file instead of going through [`Loader::file`].
- ///
- /// If the path is resolvable and points to an existing source file, it is
- /// [replaced](Source::replace).
- pub fn provide(&mut self, path: impl AsRef<Path>, src: String) -> SourceId {
- let path = path.as_ref();
- let hash = self.loader.resolve(path).ok();
-
- // Check for existing file and replace if one exists.
- if let Some(&id) = hash.and_then(|hash| self.files.get(&hash)) {
- self.replace(id, src);
- return id;
- }
-
- // No existing file yet, so we allocate a new id.
- let id = SourceId(self.sources.len() as u16);
- self.sources.push(Source::new(id, path, src));
-
- // Register in file map if the path was known to the loader.
- if let Some(hash) = hash {
- self.files.insert(hash, id);
- }
-
- id
- }
-
- /// Fully [replace](Source::replace) the source text of a file.
- ///
- /// This panics if no source file with this `id` exists.
- #[track_caller]
- pub fn replace(&mut self, id: SourceId, src: String) {
- self.sources[id.0 as usize].replace(src)
- }
-
- /// [Edit](Source::edit) a source file by replacing the given range.
- ///
- /// This panics if no source file with this `id` exists or if the `replace`
- /// range is out of bounds.
- #[track_caller]
- pub fn edit(
- &mut self,
- id: SourceId,
- replace: Range<usize>,
- with: &str,
- ) -> Range<usize> {
- self.sources[id.0 as usize].edit(replace, with)
- }
-
- /// Map a span that points into a [file](Source::range) stored in this
- /// source store to a byte range.
- ///
- /// Panics if the span does not point into this source store.
- pub fn range(&self, span: Span) -> Range<usize> {
- self.get(span.source()).range(span)
- }
-}
-
/// A single source file.
///
/// _Note_: All line and column indices start at zero, just like byte indices.
@@ -160,7 +18,7 @@ impl SourceStore {
pub struct Source {
id: SourceId,
path: PathBuf,
- src: String,
+ text: String,
lines: Vec<Line>,
root: SyntaxNode,
rev: usize,
@@ -168,32 +26,32 @@ pub struct Source {
impl Source {
/// Create a new source file.
- pub fn new(id: SourceId, path: &Path, src: String) -> Self {
+ pub fn new(id: SourceId, path: &Path, text: String) -> Self {
let lines = std::iter::once(Line { byte_idx: 0, utf16_idx: 0 })
- .chain(lines(0, 0, &src))
+ .chain(lines(0, 0, &text))
.collect();
- let mut root = parse(&src);
+ let mut root = parse(&text);
root.numberize(id, Span::FULL).unwrap();
Self {
id,
path: path.normalize(),
root,
- src,
+ text,
lines,
rev: 0,
}
}
/// Create a source file without a real id and path, usually for testing.
- pub fn detached(src: impl Into<String>) -> Self {
- Self::new(SourceId::detached(), Path::new(""), src.into())
+ pub fn detached(text: impl Into<String>) -> Self {
+ Self::new(SourceId::detached(), Path::new(""), text.into())
}
/// Create a source file with the same synthetic span for all nodes.
- pub fn synthesized(src: impl Into<String>, span: Span) -> Self {
- let mut file = Self::detached(src);
+ pub fn synthesized(text: impl Into<String>, span: Span) -> Self {
+ let mut file = Self::detached(text);
file.root.synthesize(span);
file.id = span.source();
file
@@ -225,8 +83,8 @@ impl Source {
}
/// The whole source as a string slice.
- pub fn src(&self) -> &str {
- &self.src
+ pub fn text(&self) -> &str {
+ &self.text
}
/// The revision number of the file.
@@ -239,15 +97,15 @@ impl Source {
/// Slice out the part of the source code enclosed by the range.
pub fn get(&self, range: Range<usize>) -> Option<&str> {
- self.src.get(range)
+ self.text.get(range)
}
/// Fully replace the source text and increase the revision number.
- pub fn replace(&mut self, src: String) {
- self.src = src;
+ pub fn replace(&mut self, text: String) {
+ self.text = text;
self.lines = vec![Line { byte_idx: 0, utf16_idx: 0 }];
- self.lines.extend(lines(0, 0, &self.src));
- self.root = parse(&self.src);
+ self.lines.extend(lines(0, 0, &self.text));
+ self.root = parse(&self.text);
self.root.numberize(self.id(), Span::FULL).unwrap();
self.rev = self.rev.wrapping_add(1);
}
@@ -263,34 +121,34 @@ impl Source {
let start_byte = replace.start;
let start_utf16 = self.byte_to_utf16(replace.start).unwrap();
- self.src.replace_range(replace.clone(), with);
+ self.text.replace_range(replace.clone(), with);
// Remove invalidated line starts.
let line = self.byte_to_line(start_byte).unwrap();
self.lines.truncate(line + 1);
// Handle adjoining of \r and \n.
- if self.src[.. start_byte].ends_with('\r') && with.starts_with('\n') {
+ if self.text[.. start_byte].ends_with('\r') && with.starts_with('\n') {
self.lines.pop();
}
// Recalculate the line starts after the edit.
self.lines
- .extend(lines(start_byte, start_utf16, &self.src[start_byte ..]));
+ .extend(lines(start_byte, start_utf16, &self.text[start_byte ..]));
// Incrementally reparse the replaced range.
- reparse(&mut self.root, &self.src, replace, with.len())
+ reparse(&mut self.root, &self.text, replace, with.len())
}
/// Get the length of the file in UTF-8 encoded bytes.
pub fn len_bytes(&self) -> usize {
- self.src.len()
+ self.text.len()
}
/// Get the length of the file in UTF-16 code units.
pub fn len_utf16(&self) -> usize {
let last = self.lines.last().unwrap();
- last.utf16_idx + self.src[last.byte_idx ..].len_utf16()
+ last.utf16_idx + self.text[last.byte_idx ..].len_utf16()
}
/// Get the length of the file in lines.
@@ -311,13 +169,13 @@ impl Source {
pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> {
let line_idx = self.byte_to_line(byte_idx)?;
let line = self.lines.get(line_idx)?;
- let head = self.src.get(line.byte_idx .. byte_idx)?;
+ let head = self.text.get(line.byte_idx .. byte_idx)?;
Some(line.utf16_idx + head.len_utf16())
}
/// Return the index of the line that contains the given byte index.
pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> {
- (byte_idx <= self.src.len()).then(|| {
+ (byte_idx <= self.text.len()).then(|| {
match self.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) {
Ok(i) => i,
Err(i) => i - 1,
@@ -346,14 +204,14 @@ impl Source {
)?;
let mut k = line.utf16_idx;
- for (i, c) in self.src[line.byte_idx ..].char_indices() {
+ for (i, c) in self.text[line.byte_idx ..].char_indices() {
if k >= utf16_idx {
return Some(line.byte_idx + i);
}
k += c.len_utf16();
}
- (k == utf16_idx).then(|| self.src.len())
+ (k == utf16_idx).then(|| self.text.len())
}
@@ -365,7 +223,7 @@ impl Source {
/// Return the range which encloses the given line.
pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> {
let start = self.line_to_byte(line_idx)?;
- let end = self.line_to_byte(line_idx + 1).unwrap_or(self.src.len());
+ let end = self.line_to_byte(line_idx + 1).unwrap_or(self.text.len());
Some(start .. end)
}
@@ -388,6 +246,30 @@ impl Source {
}
}
+/// A unique identifier for a loaded source file.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub struct SourceId(u16);
+
+impl SourceId {
+ /// Create a new source id for a file that is not part of a store.
+ pub const fn detached() -> Self {
+ Self(u16::MAX)
+ }
+
+ /// Create a source id from the raw underlying value.
+ ///
+ /// This should only be called with values returned by
+ /// [`into_raw`](Self::into_raw).
+ pub const fn from_raw(v: u16) -> Self {
+ Self(v)
+ }
+
+ /// Convert into the raw underlying value.
+ pub const fn into_raw(self) -> u16 {
+ self.0
+ }
+}
+
/// Metadata about a line.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
struct Line {
@@ -401,9 +283,9 @@ struct Line {
fn lines(
byte_offset: usize,
utf16_offset: usize,
- string: &str,
+ text: &str,
) -> impl Iterator<Item = Line> + '_ {
- let mut s = Scanner::new(string);
+ let mut s = Scanner::new(text);
let mut utf16_idx = utf16_offset;
std::iter::from_fn(move || {
@@ -427,56 +309,6 @@ fn lines(
})
}
-#[cfg(feature = "codespan-reporting")]
-impl<'a> Files<'a> for SourceStore {
- type FileId = SourceId;
- type Name = std::path::Display<'a>;
- type Source = &'a str;
-
- fn name(&'a self, id: SourceId) -> Result<Self::Name, files::Error> {
- Ok(self.get(id).path().display())
- }
-
- fn source(&'a self, id: SourceId) -> Result<Self::Source, files::Error> {
- Ok(self.get(id).src())
- }
-
- fn line_index(&'a self, id: SourceId, given: usize) -> Result<usize, files::Error> {
- let source = self.get(id);
- source
- .byte_to_line(given)
- .ok_or_else(|| files::Error::IndexTooLarge { given, max: source.len_bytes() })
- }
-
- fn line_range(
- &'a self,
- id: SourceId,
- given: usize,
- ) -> Result<std::ops::Range<usize>, files::Error> {
- let source = self.get(id);
- source
- .line_to_range(given)
- .ok_or_else(|| files::Error::LineTooLarge { given, max: source.len_lines() })
- }
-
- fn column_number(
- &'a self,
- id: SourceId,
- _: usize,
- given: usize,
- ) -> Result<usize, files::Error> {
- let source = self.get(id);
- source.byte_to_column(given).ok_or_else(|| {
- let max = source.len_bytes();
- if given <= max {
- files::Error::InvalidCharBoundary { given }
- } else {
- files::Error::IndexTooLarge { given, max }
- }
- })
- }
-}
-
#[cfg(test)]
mod tests {
use super::*;
@@ -563,7 +395,7 @@ mod tests {
let mut source = Source::detached(prev);
let result = Source::detached(after);
source.edit(range, with);
- assert_eq!(source.src, result.src);
+ assert_eq!(source.text, result.text);
assert_eq!(source.root, result.root);
assert_eq!(source.lines, result.lines);
}