diff options
| author | Laurenz <laurmaedje@gmail.com> | 2023-06-26 13:57:21 +0200 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2023-06-27 18:40:17 +0200 |
| commit | 7b92bd7c340d9f9c094ed2fa57912049317d9b20 (patch) | |
| tree | b91399526ba94d87309d09d864df2935dd7a4d0a /src | |
| parent | 9c7f31870b4e1bf37df79ebbe1df9a56df83d878 (diff) | |
Basic package management
Diffstat (limited to 'src')
| -rw-r--r-- | src/diag.rs | 83 | ||||
| -rw-r--r-- | src/eval/func.rs | 8 | ||||
| -rw-r--r-- | src/eval/library.rs | 7 | ||||
| -rw-r--r-- | src/eval/mod.rs | 105 | ||||
| -rw-r--r-- | src/eval/module.rs | 38 | ||||
| -rw-r--r-- | src/eval/value.rs | 9 | ||||
| -rw-r--r-- | src/export/pdf/font.rs | 4 | ||||
| -rw-r--r-- | src/export/pdf/image.rs | 4 | ||||
| -rw-r--r-- | src/file.rs | 285 | ||||
| -rw-r--r-- | src/font/mod.rs | 12 | ||||
| -rw-r--r-- | src/ide/analyze.rs | 20 | ||||
| -rw-r--r-- | src/ide/jump.rs | 11 | ||||
| -rw-r--r-- | src/image.rs | 14 | ||||
| -rw-r--r-- | src/lib.rs | 53 | ||||
| -rw-r--r-- | src/syntax/lexer.rs | 8 | ||||
| -rw-r--r-- | src/syntax/mod.rs | 4 | ||||
| -rw-r--r-- | src/syntax/node.rs | 43 | ||||
| -rw-r--r-- | src/syntax/parser.rs | 10 | ||||
| -rw-r--r-- | src/syntax/reparser.rs | 2 | ||||
| -rw-r--r-- | src/syntax/source.rs | 179 | ||||
| -rw-r--r-- | src/syntax/span.rs | 52 | ||||
| -rw-r--r-- | src/util/bytes.rs (renamed from src/util/buffer.rs) | 18 | ||||
| -rw-r--r-- | src/util/mod.rs | 42 |
23 files changed, 680 insertions, 331 deletions
diff --git a/src/diag.rs b/src/diag.rs index 9399058a..b5995be4 100644 --- a/src/diag.rs +++ b/src/diag.rs @@ -2,14 +2,14 @@ use std::fmt::{self, Display, Formatter}; use std::io; -use std::ops::Range; use std::path::{Path, PathBuf}; use std::str::Utf8Error; use std::string::FromUtf8Error; use comemo::Tracked; -use crate::syntax::{ErrorPos, Span, Spanned}; +use crate::file::PackageSpec; +use crate::syntax::{Span, Spanned}; use crate::World; /// Early-return with a [`StrResult`] or [`SourceResult`]. @@ -76,8 +76,6 @@ pub type SourceResult<T> = Result<T, Box<Vec<SourceError>>>; pub struct SourceError { /// The span of the erroneous node in the source code. pub span: Span, - /// The position in the node where the error should be annotated. - pub pos: ErrorPos, /// A diagnostic message describing the problem. pub message: EcoString, /// The trace of function calls leading to the error. @@ -92,36 +90,17 @@ impl SourceError { pub fn new(span: Span, message: impl Into<EcoString>) -> Self { Self { span, - pos: ErrorPos::Full, trace: vec![], message: message.into(), hints: vec![], } } - /// Adjust the position in the node where the error should be annotated. - pub fn with_pos(mut self, pos: ErrorPos) -> Self { - self.pos = pos; - self - } - /// Adds user-facing hints to the error. pub fn with_hints(mut self, hints: impl IntoIterator<Item = EcoString>) -> Self { self.hints.extend(hints); self } - - /// The range in the source file identified by - /// [`self.span.source()`](Span::source) where the error should be - /// annotated. - pub fn range(&self, world: &dyn World) -> Range<usize> { - let full = world.source(self.span.source()).range(self.span); - match self.pos { - ErrorPos::Full => full, - ErrorPos::Start => full.start..full.start, - ErrorPos::End => full.end..full.end, - } - } } /// A part of an error's [trace](SourceError::trace). @@ -171,12 +150,17 @@ impl<T> Trace<T> for SourceResult<T> { if span.is_detached() { return errors; } - let range = world.source(span.source()).range(span); + + let trace_range = span.range(&*world); for error in errors.iter_mut().filter(|e| !e.span.is_detached()) { // Skip traces that surround the error. - let error_range = world.source(error.span.source()).range(error.span); - if range.start <= error_range.start && range.end >= error_range.end { - continue; + if error.span.id() == span.id() { + let error_range = error.span.range(&*world); + if trace_range.start <= error_range.start + && trace_range.end >= error_range.end + { + continue; + } } error.trace.push(Spanned::new(make_point(), span)); @@ -262,6 +246,8 @@ pub enum FileError { NotSource, /// The file was not valid UTF-8, but should have been. InvalidUtf8, + /// The package the file is part of could not be loaded. + Package(PackageError), /// Another error. Other, } @@ -294,6 +280,7 @@ impl Display for FileError { Self::IsDirectory => f.pad("failed to load file (is a directory)"), Self::NotSource => f.pad("not a typst source file"), Self::InvalidUtf8 => f.pad("file is not valid utf-8"), + Self::Package(error) => error.fmt(f), Self::Other => f.pad("failed to load file"), } } @@ -311,12 +298,54 @@ impl From<FromUtf8Error> for FileError { } } +impl From<PackageError> for FileError { + fn from(error: PackageError) -> Self { + Self::Package(error) + } +} + impl From<FileError> for EcoString { fn from(error: FileError) -> Self { eco_format!("{error}") } } +/// A result type with a package-related error. +pub type PackageResult<T> = Result<T, PackageError>; + +/// An error that occured while trying to load a package. +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub enum PackageError { + /// The specified package does not exist. + NotFound(PackageSpec), + /// Failed to retrieve the package through the network. + NetworkFailed, + /// The package archive was malformed. + MalformedArchive, + /// Another error. + Other, +} + +impl std::error::Error for PackageError {} + +impl Display for PackageError { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + Self::NotFound(spec) => { + write!(f, "package not found (searched for {spec})",) + } + Self::NetworkFailed => f.pad("failed to load package (network failed)"), + Self::MalformedArchive => f.pad("failed to load package (archive malformed)"), + Self::Other => f.pad("failed to load package"), + } + } +} + +impl From<PackageError> for EcoString { + fn from(error: PackageError) -> Self { + eco_format!("{error}") + } +} /// Format a user-facing error message for an XML-like file format. pub fn format_xml_like_error(format: &str, error: roxmltree::Error) -> EcoString { match error { diff --git a/src/eval/func.rs b/src/eval/func.rs index 28d4a40d..22f948ce 100644 --- a/src/eval/func.rs +++ b/src/eval/func.rs @@ -11,9 +11,10 @@ use super::{ Value, Vm, }; use crate::diag::{bail, SourceResult, StrResult}; +use crate::file::FileId; use crate::model::{DelayedErrors, ElemFunc, Introspector, Locator, Vt}; use crate::syntax::ast::{self, AstNode, Expr, Ident}; -use crate::syntax::{SourceId, Span, SyntaxNode}; +use crate::syntax::{Span, SyntaxNode}; use crate::World; /// An evaluatable function. @@ -125,7 +126,6 @@ impl Func { args: impl IntoIterator<Item = T>, ) -> SourceResult<Value> { let route = Route::default(); - let id = SourceId::detached(); let scopes = Scopes::new(None); let mut locator = Locator::chained(vt.locator.track()); let vt = Vt { @@ -135,7 +135,7 @@ impl Func { delayed: TrackedMut::reborrow_mut(&mut vt.delayed), tracer: TrackedMut::reborrow_mut(&mut vt.tracer), }; - let mut vm = Vm::new(vt, route.track(), id, scopes); + let mut vm = Vm::new(vt, route.track(), FileId::detached(), scopes); let args = Args::new(self.span(), args); self.call_vm(&mut vm, args) } @@ -297,7 +297,7 @@ pub struct ParamInfo { #[derive(Hash)] pub(super) struct Closure { /// The source file where the closure was defined. - pub location: SourceId, + pub location: FileId, /// The name of the closure. pub name: Option<Ident>, /// Captured values from outer scopes. diff --git a/src/eval/library.rs b/src/eval/library.rs index 4978ada2..1b05de83 100644 --- a/src/eval/library.rs +++ b/src/eval/library.rs @@ -13,7 +13,6 @@ use crate::geom::{Abs, Dir}; use crate::model::{Content, ElemFunc, Introspector, Label, StyleChain, Styles, Vt}; use crate::syntax::Span; use crate::util::hash128; -use crate::World; /// Definition of Typst's standard library. #[derive(Debug, Clone, Hash)] @@ -66,10 +65,8 @@ pub struct LangItems { pub reference: fn(target: Label, supplement: Option<Content>) -> Content, /// The keys contained in the bibliography and short descriptions of them. #[allow(clippy::type_complexity)] - pub bibliography_keys: fn( - world: Tracked<dyn World + '_>, - introspector: Tracked<Introspector>, - ) -> Vec<(EcoString, Option<EcoString>)>, + pub bibliography_keys: + fn(introspector: Tracked<Introspector>) -> Vec<(EcoString, Option<EcoString>)>, /// A section heading: `= Introduction`. pub heading: fn(level: NonZeroUsize, body: Content) -> Content, /// The heading function. diff --git a/src/eval/mod.rs b/src/eval/mod.rs index 93a73ea4..0805f9cc 100644 --- a/src/eval/mod.rs +++ b/src/eval/mod.rs @@ -55,27 +55,24 @@ pub use self::value::{Dynamic, Type, Value}; use std::collections::HashSet; use std::mem; -use std::path::{Path, PathBuf}; +use std::path::Path; use comemo::{Track, Tracked, TrackedMut, Validate}; use ecow::{EcoString, EcoVec}; use unicode_segmentation::UnicodeSegmentation; use self::func::{CapturesVisitor, Closure}; -use crate::model::{ - Content, Introspector, Label, Locator, Recipe, ShowableSelector, Styles, Transform, - Unlabellable, Vt, +use crate::diag::{ + bail, error, At, SourceError, SourceResult, StrResult, Trace, Tracepoint, }; -use crate::syntax::ast::AstNode; -use crate::syntax::{ - ast, parse_code, Source, SourceId, Span, Spanned, SyntaxKind, SyntaxNode, +use crate::file::{FileId, PackageManifest, PackageSpec}; +use crate::model::{ + Content, DelayedErrors, Introspector, Label, Locator, Recipe, ShowableSelector, + Styles, Transform, Unlabellable, Vt, }; -use crate::util::PathExt; +use crate::syntax::ast::{self, AstNode}; +use crate::syntax::{parse_code, Source, Span, Spanned, SyntaxKind, SyntaxNode}; use crate::World; -use crate::{ - diag::{bail, error, At, SourceError, SourceResult, StrResult, Trace, Tracepoint}, - model::DelayedErrors, -}; const MAX_ITERATIONS: usize = 10_000; const MAX_CALL_DEPTH: usize = 64; @@ -91,9 +88,8 @@ pub fn eval( ) -> SourceResult<Module> { // Prevent cyclic evaluation. let id = source.id(); - let path = if id.is_detached() { Path::new("") } else { world.source(id).path() }; if route.contains(id) { - panic!("Tried to cyclicly evaluate {}", path.display()); + panic!("Tried to cyclicly evaluate {}", id.path().display()); } // Hook up the lang items. @@ -130,7 +126,7 @@ pub fn eval( } // Assemble the module. - let name = path.file_stem().unwrap_or_default().to_string_lossy(); + let name = id.path().file_stem().unwrap_or_default().to_string_lossy(); Ok(Module::new(name).with_scope(vm.scopes.top).with_content(result?)) } @@ -166,7 +162,7 @@ pub fn eval_string( // Prepare VM. let route = Route::default(); - let id = SourceId::detached(); + let id = FileId::detached(); let scopes = Scopes::new(Some(world.library())); let mut vm = Vm::new(vt, route.track(), id, scopes); @@ -194,7 +190,7 @@ pub struct Vm<'a> { /// The route of source ids the VM took to reach its current location. route: Tracked<'a, Route<'a>>, /// The current location. - location: SourceId, + location: FileId, /// A control flow event that is currently happening. flow: Option<FlowEvent>, /// The stack of scopes. @@ -210,7 +206,7 @@ impl<'a> Vm<'a> { fn new( vt: Vt<'a>, route: Tracked<'a, Route>, - location: SourceId, + location: FileId, scopes: Scopes<'a>, ) -> Self { let traced = vt.tracer.span(location); @@ -232,6 +228,11 @@ impl<'a> Vm<'a> { self.vt.world } + /// The location to which paths are relative currently. + pub fn location(&self) -> FileId { + self.location + } + /// Define a variable in the current scope. #[tracing::instrument(skip_all)] pub fn define(&mut self, var: ast::Ident, value: impl IntoValue) { @@ -241,23 +242,6 @@ impl<'a> Vm<'a> { } self.scopes.top.define(var.take(), value); } - - /// Resolve a user-entered path to be relative to the compilation - /// environment's root. - #[tracing::instrument(skip_all)] - pub fn locate(&self, path: &str) -> StrResult<PathBuf> { - if !self.location.is_detached() { - if let Some(path) = path.strip_prefix('/') { - return Ok(self.world().root().join(path).normalize()); - } - - if let Some(dir) = self.world().source(self.location).path().parent() { - return Ok(dir.join(path).normalize()); - } - } - - bail!("cannot access file system from here") - } } /// A control flow event that occurred during evaluation. @@ -296,12 +280,12 @@ pub struct Route<'a> { // covariant over the constraint. If it becomes invariant, we're in for a // world of lifetime pain. outer: Option<Tracked<'a, Self, <Route<'static> as Validate>::Constraint>>, - id: Option<SourceId>, + id: Option<FileId>, } impl<'a> Route<'a> { /// Create a new route with just one entry. - pub fn new(id: SourceId) -> Self { + pub fn new(id: FileId) -> Self { Self { id: Some(id), outer: None } } @@ -309,7 +293,7 @@ impl<'a> Route<'a> { /// /// You must guarantee that `outer` lives longer than the resulting /// route is ever used. - pub fn insert(outer: Tracked<'a, Self>, id: SourceId) -> Self { + pub fn insert(outer: Tracked<'a, Self>, id: FileId) -> Self { Route { outer: Some(outer), id: Some(id) } } @@ -328,7 +312,7 @@ impl<'a> Route<'a> { #[comemo::track] impl<'a> Route<'a> { /// Whether the given id is part of the route. - fn contains(&self, id: SourceId) -> bool { + fn contains(&self, id: FileId) -> bool { self.id == Some(id) || self.outer.map_or(false, |outer| outer.contains(id)) } } @@ -358,8 +342,8 @@ impl Tracer { #[comemo::track] impl Tracer { /// The traced span if it is part of the given source file. - fn span(&self, id: SourceId) -> Option<Span> { - if self.span.map(Span::source) == Some(id) { + fn span(&self, id: FileId) -> Option<Span> { + if self.span.map(Span::id) == Some(id) { self.span } else { None @@ -1764,20 +1748,49 @@ fn import( } }; + // Handle package and file imports. + let path = path.as_str(); + if path.starts_with('@') { + let spec = path.parse::<PackageSpec>().at(span)?; + import_package(vm, spec, span) + } else { + import_file(vm, path, span) + } +} + +/// Import an external package. +fn import_package(vm: &mut Vm, spec: PackageSpec, span: Span) -> SourceResult<Module> { + // Evaluate the manifest. + let manifest_id = FileId::new(Some(spec.clone()), Path::new("/typst.toml")); + let bytes = vm.world().file(manifest_id).at(span)?; + let manifest = PackageManifest::parse(&bytes).at(span)?; + manifest.validate(&spec).at(span)?; + + // Evaluate the entry point. + let entrypoint = Path::new("/").join(manifest.package.entrypoint.as_str()); + let entrypoint_id = FileId::new(Some(spec), &entrypoint); + let source = vm.world().source(entrypoint_id).at(span)?; + let point = || Tracepoint::Import; + Ok(eval(vm.world(), vm.route, TrackedMut::reborrow_mut(&mut vm.vt.tracer), &source) + .trace(vm.world(), point, span)? + .with_name(manifest.package.name)) +} + +/// Import a file from a path. +fn import_file(vm: &mut Vm, path: &str, span: Span) -> SourceResult<Module> { // Load the source file. let world = vm.world(); - let full = vm.locate(&path).at(span)?; - let id = world.resolve(&full).at(span)?; + let id = vm.location().join(path).at(span)?; + let source = world.source(id).at(span)?; // Prevent cyclic importing. - if vm.route.contains(id) { + if vm.route.contains(source.id()) { bail!(span, "cyclic import"); } // Evaluate the file. - let source = world.source(id); let point = || Tracepoint::Import; - eval(world, vm.route, TrackedMut::reborrow_mut(&mut vm.vt.tracer), source) + eval(world, vm.route, TrackedMut::reborrow_mut(&mut vm.vt.tracer), &source) .trace(world, point, span) } diff --git a/src/eval/module.rs b/src/eval/module.rs index fbfdd4e6..0bc6bf38 100644 --- a/src/eval/module.rs +++ b/src/eval/module.rs @@ -7,15 +7,20 @@ use super::{Content, Scope, Value}; use crate::diag::StrResult; /// An evaluated module, ready for importing or typesetting. +/// +/// Values of this type are cheap to clone and hash. #[derive(Clone, Hash)] #[allow(clippy::derived_hash_with_manual_eq)] -pub struct Module(Arc<Repr>); +pub struct Module { + /// The module's name. + name: EcoString, + /// The reference-counted inner fields. + inner: Arc<Repr>, +} /// The internal representation. #[derive(Clone, Hash)] struct Repr { - /// The module's name. - name: EcoString, /// The top-level definitions that were bound in this module. scope: Scope, /// The module's layoutable contents. @@ -25,38 +30,43 @@ struct Repr { impl Module { /// Create a new module. pub fn new(name: impl Into<EcoString>) -> Self { - Self(Arc::new(Repr { + Self { name: name.into(), - scope: Scope::new(), - content: Content::empty(), - })) + inner: Arc::new(Repr { scope: Scope::new(), content: Content::empty() }), + } + } + + /// Update the module's name. + pub fn with_name(mut self, name: impl Into<EcoString>) -> Self { + self.name = name.into(); + self } /// Update the module's scope. pub fn with_scope(mut self, scope: Scope) -> Self { - Arc::make_mut(&mut self.0).scope = scope; + Arc::make_mut(&mut self.inner).scope = scope; self } /// Update the module's content. pub fn with_content(mut self, content: Content) -> Self { - Arc::make_mut(&mut self.0).content = content; + Arc::make_mut(&mut self.inner).content = content; self } /// Get the module's name. pub fn name(&self) -> &EcoString { - &self.0.name + &self.name } /// Access the module's scope. pub fn scope(&self) -> &Scope { - &self.0.scope + &self.inner.scope } /// Access the module's scope, mutably. pub fn scope_mut(&mut self) -> &mut Scope { - &mut Arc::make_mut(&mut self.0).scope + &mut Arc::make_mut(&mut self.inner).scope } /// Try to access a definition in the module. @@ -68,7 +78,7 @@ impl Module { /// Extract the module's content. pub fn content(self) -> Content { - match Arc::try_unwrap(self.0) { + match Arc::try_unwrap(self.inner) { Ok(repr) => repr.content, Err(arc) => arc.content.clone(), } @@ -83,6 +93,6 @@ impl Debug for Module { impl PartialEq for Module { fn eq(&self, other: &Self) -> bool { - Arc::ptr_eq(&self.0, &other.0) + self.name == other.name && Arc::ptr_eq(&self.inner, &other.inner) } } diff --git a/src/eval/value.rs b/src/eval/value.rs index 91fdadbe..b1782cab 100644 --- a/src/eval/value.rs +++ b/src/eval/value.rs @@ -15,9 +15,10 @@ use crate::diag::StrResult; use crate::geom::{Abs, Angle, Color, Em, Fr, Length, Ratio, Rel}; use crate::model::{Label, Styles}; use crate::syntax::{ast, Span}; +use crate::util::Bytes; /// A computational value. -#[derive(Clone, Default)] +#[derive(Default, Clone)] pub enum Value { /// The value that indicates the absence of a meaningful value. #[default] @@ -46,6 +47,8 @@ pub enum Value { Symbol(Symbol), /// A string: `"string"`. Str(Str), + /// Raw bytes. + Bytes(Bytes), /// A label: `<intro>`. Label(Label), /// A content value: `[*Hi* there]`. @@ -103,6 +106,7 @@ impl Value { Self::Color(_) => Color::TYPE_NAME, Self::Symbol(_) => Symbol::TYPE_NAME, Self::Str(_) => Str::TYPE_NAME, + Self::Bytes(_) => Bytes::TYPE_NAME, Self::Label(_) => Label::TYPE_NAME, Self::Content(_) => Content::TYPE_NAME, Self::Styles(_) => Styles::TYPE_NAME, @@ -186,6 +190,7 @@ impl Debug for Value { Self::Color(v) => Debug::fmt(v, f), Self::Symbol(v) => Debug::fmt(v, f), Self::Str(v) => Debug::fmt(v, f), + Self::Bytes(v) => Debug::fmt(v, f), Self::Label(v) => Debug::fmt(v, f), Self::Content(v) => Debug::fmt(v, f), Self::Styles(v) => Debug::fmt(v, f), @@ -228,6 +233,7 @@ impl Hash for Value { Self::Color(v) => v.hash(state), Self::Symbol(v) => v.hash(state), Self::Str(v) => v.hash(state), + Self::Bytes(v) => v.hash(state), Self::Label(v) => v.hash(state), Self::Content(v) => v.hash(state), Self::Styles(v) => v.hash(state), @@ -400,6 +406,7 @@ primitive! { Str, Symbol(symbol) => symbol.get().into() } +primitive! { Bytes: "bytes", Bytes } primitive! { Label: "label", Label } primitive! { Content: "content", Content, diff --git a/src/export/pdf/font.rs b/src/export/pdf/font.rs index d5de5118..f0676d8f 100644 --- a/src/export/pdf/font.rs +++ b/src/export/pdf/font.rs @@ -8,7 +8,7 @@ use unicode_general_category::GeneralCategory; use super::{deflate, EmExt, PdfContext, RefExt}; use crate::font::Font; -use crate::util::{Buffer, SliceExt}; +use crate::util::{Bytes, SliceExt}; const CMAP_NAME: Name = Name(b"Custom"); const SYSTEM_INFO: SystemInfo = SystemInfo { @@ -154,7 +154,7 @@ pub fn write_fonts(ctx: &mut PdfContext) { /// Subset a font to the given glyphs. #[comemo::memoize] -fn subset_font(font: &Font, glyphs: &[u16]) -> Buffer { +fn subset_font(font: &Font, glyphs: &[u16]) -> Bytes { let data = font.data(); let profile = subsetter::Profile::pdf(glyphs); let subsetted = subsetter::subset(data, font.index(), profile); diff --git a/src/export/pdf/image.rs b/src/export/pdf/image.rs index a7ec4744..48472d9f 100644 --- a/src/export/pdf/image.rs +++ b/src/export/pdf/image.rs @@ -5,7 +5,7 @@ use pdf_writer::{Filter, Finish}; use super::{deflate, PdfContext, RefExt}; use crate::image::{DecodedImage, Image, RasterFormat}; -use crate::util::Buffer; +use crate::util::Bytes; /// Embed all used images into the PDF. #[tracing::instrument(skip_all)] @@ -89,7 +89,7 @@ pub fn write_images(ctx: &mut PdfContext) { /// Skips the alpha channel as that's encoded separately. #[comemo::memoize] #[tracing::instrument(skip_all)] -fn encode_image(image: &Image) -> (Buffer, Filter, bool) { +fn encode_image(image: &Image) -> (Bytes, Filter, bool) { let decoded = image.decoded(); let (dynamic, format) = match decoded.as_ref() { DecodedImage::Raster(dynamic, _, format) => (dynamic, *format), diff --git a/src/file.rs b/src/file.rs new file mode 100644 index 00000000..a9d3c85e --- /dev/null +++ b/src/file.rs @@ -0,0 +1,285 @@ +//! File and package management. + +use std::collections::HashMap; +use std::fmt::{self, Debug, Display, Formatter}; +use std::path::{Path, PathBuf}; +use std::str::FromStr; +use std::sync::RwLock; + +use ecow::{eco_format, EcoString}; +use once_cell::sync::Lazy; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +use crate::diag::{bail, FileError, StrResult}; +use crate::syntax::is_ident; +use crate::util::PathExt; + +/// The global package-path interner. +static INTERNER: Lazy<RwLock<Interner>> = + Lazy::new(|| RwLock::new(Interner { to_id: HashMap::new(), from_id: Vec::new() })); + +/// A package-path interner. +struct Interner { + to_id: HashMap<Pair, FileId>, + from_id: Vec<Pair>, +} + +/// An interned pair of a package specification and a path. +type Pair = &'static (Option<PackageSpec>, PathBuf); + +/// Identifies a file. +/// +/// This type is interned and thus cheap to clone, compare, and hash. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct FileId(u16); + +impl FileId { + /// Create a new interned file specification. + /// + /// Normalizes the path before interning. + pub fn new(package: Option<PackageSpec>, path: &Path) -> Self { + let pair = (package, path.normalize()); + let mut interner = INTERNER.write().unwrap(); + interner.to_id.get(&pair).copied().unwrap_or_else(|| { + let leaked = Box::leak(Box::new(pair)); + let len = interner.from_id.len(); + if len >= usize::from(u16::MAX) { + panic!("too many file specifications"); + } + let id = FileId(len as u16); + interner.to_id.insert(leaked, id); + interner.from_id.push(leaked); + id + }) + } + + /// Get an id that does not identify any real file. + pub const fn detached() -> Self { + Self(u16::MAX) + } + + /// Whether the id is the detached. + pub const fn is_detached(self) -> bool { + self.0 == Self::detached().0 + } + + /// The package the file resides in, if any. + pub fn package(&self) -> Option<&'static PackageSpec> { + if self.is_detached() { + None + } else { + self.pair().0.as_ref() + } + } + + /// The normalized path to the file (within the package if there's a + /// package). + pub fn path(&self) -> &'static Path { + if self.is_detached() { + Path::new("<detached>") + } else { + &self.pair().1 + } + } + + /// Resolve a file location relative to this file. + pub fn join(self, path: &str) -> StrResult<Self> { + if self.is_detached() { + bail!("cannot access file system from here"); + } + + let package = self.package().cloned(); + let base = self.path(); + Ok(if let Some(parent) = base.parent() { + Self::new(package, &parent.join(path)) + } else { + Self::new(package, Path::new(path)) + }) + } + + /// Construct from a raw number. + pub(crate) const fn from_u16(v: u16) -> Self { + Self(v) + } + + /// Extract the raw underlying number. + pub(crate) const fn as_u16(self) -> u16 { + self.0 + } + + /// Get the static pair. + fn pair(&self) -> Pair { + INTERNER.read().unwrap().from_id[usize::from(self.0)] + } +} + +impl Display for FileId { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + let path = self.path().display(); + match self.package() { + Some(package) => write!(f, "{package}/{path}"), + None => write!(f, "{path}"), + } + } +} + +impl Debug for FileId { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + Display::fmt(self, f) + } +} + +/// Identifies a package. +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct PackageSpec { + /// The namespace the package lives in. + pub namespace: EcoString, + /// The name of the package within its namespace. + pub name: EcoString, + /// The package's version. + pub version: Version, +} + +impl FromStr for PackageSpec { + type Err = EcoString; + + fn from_str(s: &str) -> Result<Self, Self::Err> { + let mut s = unscanny::Scanner::new(s); + if !s.eat_if('@') { + bail!("package specification must start with '@'"); + } + + let namespace = s.eat_until('/'); + if namespace.is_empty() { + bail!("package specification is missing namespace"); + } else if !is_ident(namespace) { + bail!("`{namespace}` is not a valid package namespace"); + } + + s.eat_if('/'); + + let name = s.eat_until(':'); + if name.is_empty() { + bail!("package specification is missing name"); + } else if !is_ident(name) { + bail!("`{name}` is not a valid package name"); + } + + s.eat_if(':'); + + let version = s.after(); + if version.is_empty() { + bail!("package specification is missing version"); + } + + Ok(Self { + namespace: namespace.into(), + name: name.into(), + version: version.parse()?, + }) + } +} + +impl Display for PackageSpec { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "@{}/{}:{}", self.namespace, self.name, self.version) + } +} + +/// A package's version. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct Version { + /// The package's major version. + pub major: u32, + /// The package's minor version. + pub minor: u32, + /// The package's patch version. + pub patch: u32, +} + +impl FromStr for Version { + type Err = EcoString; + + fn from_str(s: &str) -> Result<Self, Self::Err> { + let mut parts = s.split('.'); + let mut next = |kind| { + let Some(part) = parts.next().filter(|s| !s.is_empty()) else { + bail!("version number is missing {kind} version"); + }; + part.parse::<u32>() + .map_err(|_| eco_format!("`{part}` is not a valid {kind} version")) + }; + + let major = next("major")?; + let minor = next("minor")?; + let patch = next("patch")?; + if let Some(rest) = parts.next() { + bail!("version number has unexpected fourth component: `{rest}`"); + } + + Ok(Self { major, minor, patch }) + } +} + +impl Display for Version { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{}.{}.{}", self.major, self.minor, self.patch) + } +} + +impl Serialize for Version { + fn serialize<S: Serializer>(&self, s: S) -> Result<S::Ok, S::Error> { + s.collect_str(self) + } +} + +impl<'de> Deserialize<'de> for Version { + fn deserialize<D: Deserializer<'de>>(d: D) -> Result<Self, D::Error> { + let string = EcoString::deserialize(d)?; + string.parse().map_err(serde::de::Error::custom) + } +} + +/// A parsed package manifest. +#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)] +pub struct PackageManifest { + /// Details about the package itself. + pub package: PackageInfo, +} + +impl PackageManifest { + /// Parse the manifest from raw bytes. + pub fn parse(bytes: &[u8]) -> StrResult<Self> { + let string = std::str::from_utf8(bytes).map_err(FileError::from)?; + toml::from_str(string).map_err(|err| { + eco_format!("package manifest is malformed: {}", err.message()) + }) + } + + /// Ensure that this manifest is indeed for the specified package. + pub fn validate(&self, spec: &PackageSpec) -> StrResult<()> { + if self.package.name != spec.name { + bail!("package manifest contains mismatched name `{}`", self.package.name); + } + + if self.package.version != spec.version { + bail!( + "package manifest contains mismatched version {}", + self.package.version + ); + } + + Ok(()) + } +} + +/// The `package` key in the manifest. +#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)] +pub struct PackageInfo { + /// The name of the package within its namespace. + pub name: EcoString, + /// The package's version. + pub version: Version, + /// The path of the entrypoint into the package. + pub entrypoint: EcoString, +} diff --git a/src/font/mod.rs b/src/font/mod.rs index 032783e4..2353e51c 100644 --- a/src/font/mod.rs +++ b/src/font/mod.rs @@ -15,9 +15,11 @@ use ttf_parser::GlyphId; use self::book::find_name; use crate::eval::Cast; use crate::geom::Em; -use crate::util::Buffer; +use crate::util::Bytes; /// An OpenType font. +/// +/// Values of this type are cheap to clone and hash. #[derive(Clone)] pub struct Font(Arc<Repr>); @@ -26,7 +28,7 @@ struct Repr { /// The raw font data, possibly shared with other fonts from the same /// collection. The vector's allocation must not move, because `ttf` points /// into it using unsafe code. - data: Buffer, + data: Bytes, /// The font's index in the buffer. index: u32, /// Metadata about the font. @@ -41,7 +43,7 @@ struct Repr { impl Font { /// Parse a font from data and collection index. - pub fn new(data: Buffer, index: u32) -> Option<Self> { + pub fn new(data: Bytes, index: u32) -> Option<Self> { // Safety: // - The slices's location is stable in memory: // - We don't move the underlying vector @@ -60,13 +62,13 @@ impl Font { } /// Parse all fonts in the given data. - pub fn iter(data: Buffer) -> impl Iterator<Item = Self> { + pub fn iter(data: Bytes) -> impl Iterator<Item = Self> { let count = ttf_parser::fonts_in_collection(&data).unwrap_or(1); (0..count).filter_map(move |index| Self::new(data.clone(), index)) } /// The underlying buffer. - pub fn data(&self) -> &Buffer { + pub fn data(&self) -> &Bytes { &self.0.data } diff --git a/src/ide/analyze.rs b/src/ide/analyze.rs index ba3a9b78..55ec8281 100644 --- a/src/ide/analyze.rs +++ b/src/ide/analyze.rs @@ -1,5 +1,3 @@ -use std::path::PathBuf; - use comemo::Track; use ecow::EcoString; @@ -7,7 +5,6 @@ use crate::doc::Frame; use crate::eval::{eval, Module, Route, Tracer, Value}; use crate::model::{Introspector, Label}; use crate::syntax::{ast, LinkedNode, Source, SyntaxKind}; -use crate::util::PathExt; use crate::World; /// Try to determine a set of possible values for an expression. @@ -42,7 +39,7 @@ pub fn analyze_expr(world: &(dyn World + 'static), node: &LinkedNode) -> Vec<Val world.track(), route.track(), tracer.track_mut(), - world.main(), + &world.main(), ) .and_then(|module| { typst::model::typeset( @@ -66,18 +63,11 @@ pub fn analyze_import( source: &Source, path: &str, ) -> Option<Module> { - let full: PathBuf = if let Some(path) = path.strip_prefix('/') { - world.root().join(path).normalize() - } else if let Some(dir) = source.path().parent() { - dir.join(path).normalize() - } else { - path.into() - }; let route = Route::default(); let mut tracer = Tracer::default(); - let id = world.resolve(&full).ok()?; - let source = world.source(id); - eval(world.track(), route.track(), tracer.track_mut(), source).ok() + let id = source.id().join(path).ok()?; + let source = world.source(id).ok()?; + eval(world.track(), route.track(), tracer.track_mut(), &source).ok() } /// Find all labels and details for them. @@ -112,7 +102,7 @@ pub fn analyze_labels( let split = output.len(); // Bibliography keys. - for (key, detail) in (items.bibliography_keys)(world.track(), introspector.track()) { + for (key, detail) in (items.bibliography_keys)(introspector.track()) { output.push((Label(key), detail)); } diff --git a/src/ide/jump.rs b/src/ide/jump.rs index 42ed2ab5..14a82e26 100644 --- a/src/ide/jump.rs +++ b/src/ide/jump.rs @@ -3,16 +3,17 @@ use std::num::NonZeroUsize; use ecow::EcoString; use crate::doc::{Destination, Frame, FrameItem, Meta, Position}; +use crate::file::FileId; use crate::geom::{Geometry, Point, Size}; use crate::model::Introspector; -use crate::syntax::{LinkedNode, Source, SourceId, Span, SyntaxKind}; +use crate::syntax::{LinkedNode, Source, Span, SyntaxKind}; use crate::World; /// Where to [jump](jump_from_click) to. #[derive(Debug, Clone, Eq, PartialEq)] pub enum Jump { /// Jump to a position in a source file. - Source(SourceId, usize), + Source(FileId, usize), /// Jump to an external URL. Url(EcoString), /// Jump to a point on a page. @@ -21,9 +22,9 @@ pub enum Jump { impl Jump { fn from_span(world: &dyn World, span: Span) -> Option<Self> { - let source = world.source(span.source()); + let source = world.source(span.id()).ok()?; let node = source.find(span)?; - Some(Self::Source(source.id(), node.offset())) + Some(Self::Source(span.id(), node.offset())) } } @@ -78,7 +79,7 @@ pub fn jump_from_click( Size::new(width, text.size), click, ) { - let source = world.source(span.source()); + let source = world.source(span.id()).ok()?; let node = source.find(span)?; let pos = if node.kind() == SyntaxKind::Text { let range = node.range(); diff --git a/src/image.rs b/src/image.rs index c6bf3198..3a245c14 100644 --- a/src/image.rs +++ b/src/image.rs @@ -18,7 +18,7 @@ use usvg::{TreeParsing, TreeTextToPath}; use crate::diag::{format_xml_like_error, StrResult}; use crate::font::Font; use crate::geom::Axes; -use crate::util::Buffer; +use crate::util::Bytes; use crate::World; /// A raster or vector image. @@ -31,7 +31,7 @@ pub struct Image(Arc<Prehashed<Repr>>); #[derive(Hash)] struct Repr { /// The raw, undecoded image data. - data: Buffer, + data: Bytes, /// The format of the encoded `buffer`. format: ImageFormat, /// The size of the image. @@ -47,7 +47,7 @@ impl Image { /// Create an image from a buffer and a format. #[comemo::memoize] pub fn new( - data: Buffer, + data: Bytes, format: ImageFormat, alt: Option<EcoString>, ) -> StrResult<Self> { @@ -71,7 +71,7 @@ impl Image { /// Create a font-dependant image from a buffer and a format. #[comemo::memoize] pub fn with_fonts( - data: Buffer, + data: Bytes, format: ImageFormat, world: Tracked<dyn World + '_>, fallback_family: Option<&str>, @@ -95,7 +95,7 @@ impl Image { } /// The raw image data. - pub fn data(&self) -> &Buffer { + pub fn data(&self) -> &Bytes { &self.0.data } @@ -234,7 +234,7 @@ pub struct IccProfile(pub Vec<u8>); /// Decode a raster image. #[comemo::memoize] -fn decode_raster(data: &Buffer, format: RasterFormat) -> StrResult<Arc<DecodedImage>> { +fn decode_raster(data: &Bytes, format: RasterFormat) -> StrResult<Arc<DecodedImage>> { fn decode_with<'a, T: ImageDecoder<'a>>( decoder: ImageResult<T>, ) -> ImageResult<(image::DynamicImage, Option<IccProfile>)> { @@ -259,7 +259,7 @@ fn decode_raster(data: &Buffer, format: RasterFormat) -> StrResult<Arc<DecodedIm /// Decode an SVG image. #[comemo::memoize] fn decode_svg( - data: &Buffer, + data: &Bytes, loader: Tracked<dyn SvgFontLoader + '_>, ) -> StrResult<Arc<DecodedImage>> { // Disable usvg's default to "Times New Roman". Instead, we default to @@ -45,6 +45,7 @@ pub mod diag; pub mod eval; pub mod doc; pub mod export; +pub mod file; pub mod font; pub mod geom; pub mod ide; @@ -52,16 +53,15 @@ pub mod image; pub mod model; pub mod syntax; -use std::path::Path; - use comemo::{Prehashed, Track, TrackedMut}; use crate::diag::{FileResult, SourceResult}; use crate::doc::Document; use crate::eval::{Datetime, Library, Route, Tracer}; +use crate::file::FileId; use crate::font::{Font, FontBook}; -use crate::syntax::{Source, SourceId}; -use crate::util::Buffer; +use crate::syntax::Source; +use crate::util::Bytes; /// Compile a source file into a fully layouted document. #[tracing::instrument(skip(world))] @@ -79,7 +79,7 @@ pub fn compile(world: &dyn World) -> SourceResult<Document> { world, route.track(), TrackedMut::reborrow_mut(&mut tracer), - world.main(), + &world.main(), )?; // Typeset the module's contents. @@ -87,35 +87,38 @@ pub fn compile(world: &dyn World) -> SourceResult<Document> { } /// The environment in which typesetting occurs. +/// +/// All loading functions (`main`, `source`, `file`, `font`) should perform +/// internal caching so that they are relatively cheap on repeated invocations +/// with the same argument. [`Source`], [`Bytes`], and [`Font`] are +/// all reference-counted and thus cheap to clone. +/// +/// The compiler doesn't do the caching itself because the world has much more +/// information on when something can change. For example, fonts typically don't +/// change and can thus even be cached across multiple compilations (for +/// long-running applications like `typst watch`). Source files on the other +/// hand can change and should thus be cleared after. Advanced clients like +/// language servers can also retain the source files and [edited](Source::edit) +/// them in-place to benefit from better incremental performance. #[comemo::track] pub trait World { - /// The path relative to which absolute paths are. - /// - /// Defaults to the empty path. - fn root(&self) -> &Path { - Path::new("") - } - /// The standard library. fn library(&self) -> &Prehashed<Library>; - /// The main source file. - fn main(&self) -> &Source; - - /// Try to resolve the unique id of a source file. - fn resolve(&self, path: &Path) -> FileResult<SourceId>; - - /// Access a source file by id. - fn source(&self, id: SourceId) -> &Source; - /// Metadata about all known fonts. fn book(&self) -> &Prehashed<FontBook>; - /// Try to access the font with the given id. - fn font(&self, id: usize) -> Option<Font>; + /// Access the main source file. + fn main(&self) -> Source; + + /// Try to access the specified source file. + fn source(&self, id: FileId) -> FileResult<Source>; + + /// Try to access the specified file. + fn file(&self, id: FileId) -> FileResult<Bytes>; - /// Try to access a file at a path. - fn file(&self, path: &Path) -> FileResult<Buffer>; + /// Try to access the font with the given index in the font book. + fn font(&self, index: usize) -> Option<Font>; /// Get the current date. /// diff --git a/src/syntax/lexer.rs b/src/syntax/lexer.rs index ae4462d9..d95b5b7b 100644 --- a/src/syntax/lexer.rs +++ b/src/syntax/lexer.rs @@ -3,7 +3,7 @@ use unicode_ident::{is_xid_continue, is_xid_start}; use unicode_segmentation::UnicodeSegmentation; use unscanny::Scanner; -use super::{ErrorPos, SyntaxKind}; +use super::SyntaxKind; /// Splits up a string of source code into tokens. #[derive(Clone)] @@ -16,7 +16,7 @@ pub(super) struct Lexer<'s> { /// Whether the last token contained a newline. newline: bool, /// An error for the last token. - error: Option<(EcoString, ErrorPos)>, + error: Option<EcoString>, } /// What kind of tokens to emit. @@ -69,7 +69,7 @@ impl<'s> Lexer<'s> { } /// Take out the last error, if any. - pub fn take_error(&mut self) -> Option<(EcoString, ErrorPos)> { + pub fn take_error(&mut self) -> Option<EcoString> { self.error.take() } } @@ -77,7 +77,7 @@ impl<'s> Lexer<'s> { impl Lexer<'_> { /// Construct a full-positioned syntax error. fn error(&mut self, message: impl Into<EcoString>) -> SyntaxKind { - self.error = Some((message.into(), ErrorPos::Full)); + self.error = Some(message.into()); SyntaxKind::Error } } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index d4aee9d3..1ce1e4c0 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -12,9 +12,9 @@ mod span; pub use self::kind::SyntaxKind; pub use self::lexer::{is_ident, is_newline}; -pub use self::node::{ErrorPos, LinkedChildren, LinkedNode, SyntaxNode}; +pub use self::node::{LinkedChildren, LinkedNode, SyntaxNode}; pub use self::parser::{parse, parse_code}; -pub use self::source::{Source, SourceId}; +pub use self::source::Source; pub use self::span::{Span, Spanned}; pub(crate) use self::lexer::{is_id_continue, is_id_start}; diff --git a/src/syntax/node.rs b/src/syntax/node.rs index d2adc13a..6a66416d 100644 --- a/src/syntax/node.rs +++ b/src/syntax/node.rs @@ -6,8 +6,9 @@ use std::sync::Arc; use ecow::EcoString; use super::ast::AstNode; -use super::{SourceId, Span, SyntaxKind}; +use super::{Span, SyntaxKind}; use crate::diag::SourceError; +use crate::file::FileId; /// A node in the untyped syntax tree. #[derive(Clone, Eq, PartialEq, Hash)] @@ -36,12 +37,8 @@ impl SyntaxNode { } /// Create a new error node. - pub fn error( - message: impl Into<EcoString>, - text: impl Into<EcoString>, - pos: ErrorPos, - ) -> Self { - Self(Repr::Error(Arc::new(ErrorNode::new(message, text, pos)))) + pub fn error(message: impl Into<EcoString>, text: impl Into<EcoString>) -> Self { + Self(Repr::Error(Arc::new(ErrorNode::new(message, text)))) } /// The type of the node. @@ -145,7 +142,7 @@ impl SyntaxNode { } if let Repr::Error(error) = &self.0 { - vec![SourceError::new(error.span, error.message.clone()).with_pos(error.pos)] + vec![SourceError::new(error.span, error.message.clone())] } else { self.children() .filter(|node| node.erroneous()) @@ -186,14 +183,14 @@ impl SyntaxNode { /// Convert the child to an error. pub(super) fn convert_to_error(&mut self, message: impl Into<EcoString>) { let text = std::mem::take(self).into_text(); - *self = SyntaxNode::error(message, text, ErrorPos::Full); + *self = SyntaxNode::error(message, text); } /// Assign spans to each node. #[tracing::instrument(skip_all)] pub(super) fn numberize( &mut self, - id: SourceId, + id: FileId, within: Range<u64>, ) -> NumberingResult { if within.start >= within.end { @@ -285,7 +282,7 @@ impl Debug for SyntaxNode { impl Default for SyntaxNode { fn default() -> Self { - Self::error("", "", ErrorPos::Full) + Self::error("", "") } } @@ -381,7 +378,7 @@ impl InnerNode { /// a `range` of its children. fn numberize( &mut self, - id: SourceId, + id: FileId, range: Option<Range<usize>>, within: Range<u64>, ) -> NumberingResult { @@ -492,7 +489,7 @@ impl InnerNode { // Try to renumber. let within = start_number..end_number; - let id = self.span.source(); + let id = self.span.id(); if self.numberize(id, Some(renumber), within).is_ok() { return Ok(()); } @@ -540,23 +537,16 @@ struct ErrorNode { message: EcoString, /// The source text of the node. text: EcoString, - /// Where in the node an error should be annotated. - pos: ErrorPos, /// The node's span. span: Span, } impl ErrorNode { /// Create new error node. - fn new( - message: impl Into<EcoString>, - text: impl Into<EcoString>, - pos: ErrorPos, - ) -> Self { + fn new(message: impl Into<EcoString>, text: impl Into<EcoString>) -> Self { Self { message: message.into(), text: text.into(), - pos, span: Span::detached(), } } @@ -573,17 +563,6 @@ impl Debug for ErrorNode { } } -/// Where in a node an error should be annotated, -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub enum ErrorPos { - /// Over the full width of the node. - Full, - /// At the start of the node. - Start, - /// At the end of the node. - End, -} - /// A syntax node in a context. /// /// Knows its exact offset in the file and provides access to its diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs index 7d057ab9..54670df5 100644 --- a/src/syntax/parser.rs +++ b/src/syntax/parser.rs @@ -4,7 +4,7 @@ use std::ops::Range; use ecow::{eco_format, EcoString}; use unicode_math_class::MathClass; -use super::{ast, is_newline, ErrorPos, LexMode, Lexer, SyntaxKind, SyntaxNode}; +use super::{ast, is_newline, LexMode, Lexer, SyntaxKind, SyntaxNode}; /// Parse a source file. pub fn parse(text: &str) -> SyntaxNode { @@ -1560,8 +1560,8 @@ impl<'s> Parser<'s> { fn save(&mut self) { let text = self.current_text(); if self.at(SyntaxKind::Error) { - let (message, pos) = self.lexer.take_error().unwrap(); - self.nodes.push(SyntaxNode::error(message, text, pos)); + let message = self.lexer.take_error().unwrap(); + self.nodes.push(SyntaxNode::error(message, text)); } else { self.nodes.push(SyntaxNode::leaf(self.current, text)); } @@ -1608,14 +1608,14 @@ impl<'s> Parser<'s> { .map_or(true, |child| child.kind() != SyntaxKind::Error) { let message = eco_format!("expected {}", thing); - self.nodes.push(SyntaxNode::error(message, "", ErrorPos::Full)); + self.nodes.push(SyntaxNode::error(message, "")); } self.skip(); } fn expected_at(&mut self, m: Marker, thing: &str) { let message = eco_format!("expected {}", thing); - let error = SyntaxNode::error(message, "", ErrorPos::Full); + let error = SyntaxNode::error(message, ""); self.nodes.insert(m.0, error); } diff --git a/src/syntax/reparser.rs b/src/syntax/reparser.rs index 9e2b0a1b..a4186fa7 100644 --- a/src/syntax/reparser.rs +++ b/src/syntax/reparser.rs @@ -19,7 +19,7 @@ pub fn reparse( replacement_len: usize, ) -> Range<usize> { try_reparse(text, replaced, replacement_len, None, root, 0).unwrap_or_else(|| { - let id = root.span().source(); + let id = root.span().id(); *root = parse(text); root.numberize(id, Span::FULL).unwrap(); 0..text.len() diff --git a/src/syntax/source.rs b/src/syntax/source.rs index 277271db..6eb6fd5d 100644 --- a/src/syntax/source.rs +++ b/src/syntax/source.rs @@ -3,105 +3,107 @@ use std::fmt::{self, Debug, Formatter}; use std::hash::{Hash, Hasher}; use std::ops::Range; -use std::path::{Path, PathBuf}; +use std::sync::Arc; use comemo::Prehashed; -use unscanny::Scanner; use super::ast::Markup; use super::reparser::reparse; use super::{is_newline, parse, LinkedNode, Span, SyntaxNode}; use crate::diag::SourceResult; -use crate::util::{PathExt, StrExt}; +use crate::file::FileId; +use crate::util::StrExt; /// A source file. /// /// All line and column indices start at zero, just like byte indices. Only for /// user-facing display, you should add 1 to them. +/// +/// Values of this type are cheap to clone and hash. #[derive(Clone)] -pub struct Source { - id: SourceId, - path: PathBuf, - lines: Vec<Line>, +pub struct Source(Arc<Repr>); + +/// The internal representation. +#[derive(Clone)] +struct Repr { + id: FileId, text: Prehashed<String>, root: Prehashed<SyntaxNode>, + lines: Vec<Line>, } impl Source { /// Create a new source file. + /// + /// The path must be canonical, so that the same source file has the same + /// id even if accessed through different paths. #[tracing::instrument(skip_all)] - pub fn new(id: SourceId, path: &Path, text: String) -> Self { + pub fn new(id: FileId, text: String) -> Self { let mut root = parse(&text); root.numberize(id, Span::FULL).unwrap(); - Self { + Self(Arc::new(Repr { id, - path: path.normalize(), lines: lines(&text), text: Prehashed::new(text), root: Prehashed::new(root), - } + })) } /// Create a source file without a real id and path, usually for testing. pub fn detached(text: impl Into<String>) -> Self { - Self::new(SourceId::detached(), Path::new(""), text.into()) + Self::new(FileId::detached(), text.into()) } /// Create a source file with the same synthetic span for all nodes. pub fn synthesized(text: String, span: Span) -> Self { let mut root = parse(&text); root.synthesize(span); - Self { - id: SourceId::detached(), - path: PathBuf::new(), + Self(Arc::new(Repr { + id: FileId::detached(), lines: lines(&text), text: Prehashed::new(text), root: Prehashed::new(root), - } + })) } /// The root node of the file's untyped syntax tree. pub fn root(&self) -> &SyntaxNode { - &self.root + &self.0.root } /// The root node of the file's typed abstract syntax tree. pub fn ast(&self) -> SourceResult<Markup> { - let errors = self.root.errors(); + let errors = self.root().errors(); if errors.is_empty() { - Ok(self.root.cast().expect("root node must be markup")) + Ok(self.root().cast().expect("root node must be markup")) } else { Err(Box::new(errors)) } } /// The id of the source file. - pub fn id(&self) -> SourceId { - self.id - } - - /// The normalized path to the source file. - pub fn path(&self) -> &Path { - &self.path + pub fn id(&self) -> FileId { + self.0.id } /// The whole source as a string slice. pub fn text(&self) -> &str { - &self.text + &self.0.text } /// Slice out the part of the source code enclosed by the range. pub fn get(&self, range: Range<usize>) -> Option<&str> { - self.text.get(range) + self.text().get(range) } /// Fully replace the source text. pub fn replace(&mut self, text: String) { - self.text = Prehashed::new(text); - self.lines = lines(&self.text); - let mut root = parse(&self.text); - root.numberize(self.id, Span::FULL).unwrap(); - self.root = Prehashed::new(root); + let inner = Arc::make_mut(&mut self.0); + inner.text = Prehashed::new(text); + inner.lines = lines(&inner.text); + let mut root = parse(&inner.text); + root.numberize(inner.id, Span::FULL).unwrap(); + inner.root = Prehashed::new(root); } /// Edit the source file by replacing the given range. @@ -112,72 +114,70 @@ impl Source { #[track_caller] pub fn edit(&mut self, replace: Range<usize>, with: &str) -> Range<usize> { let start_byte = replace.start; - let start_utf16 = self.byte_to_utf16(replace.start).unwrap(); - self.text.update(|text| text.replace_range(replace.clone(), with)); + let start_utf16 = self.byte_to_utf16(start_byte).unwrap(); + let line = self.byte_to_line(start_byte).unwrap(); + + let inner = Arc::make_mut(&mut self.0); + + // Update the text itself. + inner.text.update(|text| text.replace_range(replace.clone(), with)); // Remove invalidated line starts. - let line = self.byte_to_line(start_byte).unwrap(); - self.lines.truncate(line + 1); + inner.lines.truncate(line + 1); // Handle adjoining of \r and \n. - if self.text[..start_byte].ends_with('\r') && with.starts_with('\n') { - self.lines.pop(); + if inner.text[..start_byte].ends_with('\r') && with.starts_with('\n') { + inner.lines.pop(); } // Recalculate the line starts after the edit. - self.lines - .extend(lines_from(start_byte, start_utf16, &self.text[start_byte..])); + inner.lines.extend(lines_from( + start_byte, + start_utf16, + &inner.text[start_byte..], + )); // Incrementally reparse the replaced range. - self.root - .update(|root| reparse(root, &self.text, replace, with.len())) + inner + .root + .update(|root| reparse(root, &inner.text, replace, with.len())) } /// Get the length of the file in UTF-8 encoded bytes. pub fn len_bytes(&self) -> usize { - self.text.len() + self.text().len() } /// Get the length of the file in UTF-16 code units. pub fn len_utf16(&self) -> usize { - let last = self.lines.last().unwrap(); - last.utf16_idx + self.text[last.byte_idx..].len_utf16() + let last = self.0.lines.last().unwrap(); + last.utf16_idx + self.0.text[last.byte_idx..].len_utf16() } /// Get the length of the file in lines. pub fn len_lines(&self) -> usize { - self.lines.len() + self.0.lines.len() } /// Find the node with the given span. /// /// Returns `None` if the span does not point into this source file. pub fn find(&self, span: Span) -> Option<LinkedNode<'_>> { - LinkedNode::new(&self.root).find(span) - } - - /// Map a span that points into this source file to a byte range. - /// - /// Panics if the span does not point into this source file. - #[track_caller] - pub fn range(&self, span: Span) -> Range<usize> { - self.find(span) - .expect("span does not point into this source file") - .range() + LinkedNode::new(self.root()).find(span) } /// Return the index of the UTF-16 code unit at the byte index. pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> { let line_idx = self.byte_to_line(byte_idx)?; - let line = self.lines.get(line_idx)?; - let head = self.text.get(line.byte_idx..byte_idx)?; + let line = self.0.lines.get(line_idx)?; + let head = self.0.text.get(line.byte_idx..byte_idx)?; Some(line.utf16_idx + head.len_utf16()) } /// Return the index of the line that contains the given byte index. pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> { - (byte_idx <= self.text.len()).then(|| { - match self.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) { + (byte_idx <= self.0.text.len()).then(|| { + match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) { Ok(i) => i, Err(i) => i - 1, } @@ -197,33 +197,33 @@ impl Source { /// Return the byte index at the UTF-16 code unit. pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> { - let line = self.lines.get( - match self.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) { + let line = self.0.lines.get( + match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) { Ok(i) => i, Err(i) => i - 1, }, )?; let mut k = line.utf16_idx; - for (i, c) in self.text[line.byte_idx..].char_indices() { + for (i, c) in self.0.text[line.byte_idx..].char_indices() { if k >= utf16_idx { return Some(line.byte_idx + i); } k += c.len_utf16(); } - (k == utf16_idx).then_some(self.text.len()) + (k == utf16_idx).then_some(self.0.text.len()) } /// Return the byte position at which the given line starts. pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> { - self.lines.get(line_idx).map(|line| line.byte_idx) + self.0.lines.get(line_idx).map(|line| line.byte_idx) } /// Return the range which encloses the given line. pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> { let start = self.line_to_byte(line_idx)?; - let end = self.line_to_byte(line_idx + 1).unwrap_or(self.text.len()); + let end = self.line_to_byte(line_idx + 1).unwrap_or(self.0.text.len()); Some(start..end) } @@ -248,42 +248,21 @@ impl Source { impl Debug for Source { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "Source({})", self.path.display()) + write!(f, "Source({})", self.id().path().display()) } } impl Hash for Source { fn hash<H: Hasher>(&self, state: &mut H) { - self.id.hash(state); - self.path.hash(state); - self.text.hash(state); - self.root.hash(state); + self.0.id.hash(state); + self.0.text.hash(state); + self.0.root.hash(state); } } -/// A unique identifier for a loaded source file. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub struct SourceId(u16); - -impl SourceId { - /// Create a new source id for a file that is not part of the world. - pub const fn detached() -> Self { - Self(u16::MAX) - } - - /// Whether the source id is the detached. - pub const fn is_detached(self) -> bool { - self.0 == Self::detached().0 - } - - /// Create a source id from a number. - pub const fn from_u16(v: u16) -> Self { - Self(v) - } - - /// Extract the underlying number. - pub const fn as_u16(self) -> u16 { - self.0 +impl AsRef<str> for Source { + fn as_ref(&self) -> &str { + self.text() } } @@ -309,7 +288,7 @@ fn lines_from( utf16_offset: usize, text: &str, ) -> impl Iterator<Item = Line> + '_ { - let mut s = Scanner::new(text); + let mut s = unscanny::Scanner::new(text); let mut utf16_idx = utf16_offset; std::iter::from_fn(move || { @@ -340,7 +319,7 @@ mod tests { fn test_source_file_new() { let source = Source::detached(TEST); assert_eq!( - source.lines, + source.0.lines, [ Line { byte_idx: 0, utf16_idx: 0 }, Line { byte_idx: 7, utf16_idx: 6 }, @@ -421,8 +400,8 @@ mod tests { let mut source = Source::detached(prev); let result = Source::detached(after); source.edit(range, with); - assert_eq!(source.text, result.text); - assert_eq!(source.lines, result.lines); + assert_eq!(source.text(), result.text()); + assert_eq!(source.0.lines, result.0.lines); } // Test inserting at the beginning. diff --git a/src/syntax/span.rs b/src/syntax/span.rs index 91e0a3cf..5c220252 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -2,13 +2,15 @@ use std::fmt::{self, Debug, Formatter}; use std::num::NonZeroU64; use std::ops::Range; -use super::SourceId; +use super::Source; +use crate::file::FileId; +use crate::World; /// A unique identifier for a syntax node. /// /// This is used throughout the compiler to track which source section an error -/// or element stems from. Can be [mapped back](super::Source::range) to a byte -/// range for user facing display. +/// or element stems from. Can be [mapped back](Self::range) to a byte range for +/// user facing display. /// /// During editing, the span values stay mostly stable, even for nodes behind an /// insertion. This is not true for simple ranges as they would shift. Spans can @@ -39,7 +41,7 @@ impl Span { /// /// Panics if the `number` is not contained in `FULL`. #[track_caller] - pub const fn new(id: SourceId, number: u64) -> Self { + pub const fn new(id: FileId, number: u64) -> Self { assert!( Self::FULL.start <= number && number < Self::FULL.end, "span number outside valid range" @@ -50,12 +52,12 @@ impl Span { /// A span that does not point into any source file. pub const fn detached() -> Self { - Self::pack(SourceId::detached(), Self::DETACHED) + Self::pack(FileId::detached(), Self::DETACHED) } /// Pack the components into a span. #[track_caller] - const fn pack(id: SourceId, number: u64) -> Span { + const fn pack(id: FileId, number: u64) -> Span { let bits = ((id.as_u16() as u64) << Self::BITS) | number; match NonZeroU64::new(bits) { Some(v) => Self(v), @@ -63,20 +65,38 @@ impl Span { } } - /// Whether the span is detached. - pub const fn is_detached(self) -> bool { - self.source().is_detached() - } - /// The id of the source file the span points into. - pub const fn source(self) -> SourceId { - SourceId::from_u16((self.0.get() >> Self::BITS) as u16) + pub const fn id(self) -> FileId { + FileId::from_u16((self.0.get() >> Self::BITS) as u16) } /// The unique number of the span within its source file. pub const fn number(self) -> u64 { self.0.get() & ((1 << Self::BITS) - 1) } + + /// Whether the span is detached. + pub const fn is_detached(self) -> bool { + self.id().is_detached() + } + + /// Get the byte range for this span. + #[track_caller] + pub fn range(self, world: &dyn World) -> Range<usize> { + let source = world + .source(self.id()) + .expect("span does not point into any source file"); + self.range_in(&source) + } + + /// Get the byte range for this span in the given source file. + #[track_caller] + pub fn range_in(self, source: &Source) -> Range<usize> { + source + .find(self) + .expect("span does not point into this source file") + .range() + } } /// A value with a span locating it in the source code. @@ -116,13 +136,13 @@ impl<T: Debug> Debug for Spanned<T> { #[cfg(test)] mod tests { - use super::{SourceId, Span}; + use super::{FileId, Span}; #[test] fn test_span_encoding() { - let id = SourceId::from_u16(5); + let id = FileId::from_u16(5); let span = Span::new(id, 10); - assert_eq!(span.source(), id); + assert_eq!(span.id(), id); assert_eq!(span.number(), 10); } } diff --git a/src/util/buffer.rs b/src/util/bytes.rs index 23fb9802..9165467b 100644 --- a/src/util/buffer.rs +++ b/src/util/bytes.rs @@ -5,11 +5,11 @@ use std::sync::Arc; use comemo::Prehashed; -/// A shared buffer that is cheap to clone and hash. +/// A shared byte buffer that is cheap to clone and hash. #[derive(Clone, Hash, Eq, PartialEq)] -pub struct Buffer(Arc<Prehashed<Cow<'static, [u8]>>>); +pub struct Bytes(Arc<Prehashed<Cow<'static, [u8]>>>); -impl Buffer { +impl Bytes { /// Create a buffer from a static byte slice. pub fn from_static(slice: &'static [u8]) -> Self { Self(Arc::new(Prehashed::new(Cow::Borrowed(slice)))) @@ -26,19 +26,19 @@ impl Buffer { } } -impl From<&[u8]> for Buffer { +impl From<&[u8]> for Bytes { fn from(slice: &[u8]) -> Self { Self(Arc::new(Prehashed::new(slice.to_vec().into()))) } } -impl From<Vec<u8>> for Buffer { +impl From<Vec<u8>> for Bytes { fn from(vec: Vec<u8>) -> Self { Self(Arc::new(Prehashed::new(vec.into()))) } } -impl Deref for Buffer { +impl Deref for Bytes { type Target = [u8]; fn deref(&self) -> &Self::Target { @@ -46,14 +46,14 @@ impl Deref for Buffer { } } -impl AsRef<[u8]> for Buffer { +impl AsRef<[u8]> for Bytes { fn as_ref(&self) -> &[u8] { self } } -impl Debug for Buffer { +impl Debug for Bytes { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - f.pad("Buffer(..)") + write!(f, "bytes({})", self.len()) } } diff --git a/src/util/mod.rs b/src/util/mod.rs index 71c5aefc..78c7bedf 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -2,9 +2,9 @@ pub mod fat; -mod buffer; +mod bytes; -pub use buffer::Buffer; +pub use bytes::Bytes; use std::fmt::{self, Debug, Formatter}; use std::hash::Hash; @@ -125,26 +125,60 @@ where pub trait PathExt { /// Lexically normalize a path. fn normalize(&self) -> PathBuf; + + /// Treat `self` as a virtual root relative to which the `path` is resolved. + /// + /// Returns `None` if the path lexically escapes the root. The path + /// might still escape through symlinks. + fn join_rooted(&self, path: &Path) -> Option<PathBuf>; } impl PathExt for Path { - #[tracing::instrument(skip_all)] fn normalize(&self) -> PathBuf { let mut out = PathBuf::new(); for component in self.components() { match component { Component::CurDir => {} Component::ParentDir => match out.components().next_back() { + Some(Component::RootDir) => {} Some(Component::Normal(_)) => { out.pop(); } _ => out.push(component), }, - _ => out.push(component), + Component::Prefix(_) | Component::RootDir | Component::Normal(_) => { + out.push(component) + } } } + if out.as_os_str().is_empty() { + out.push(Component::CurDir); + } out } + + fn join_rooted(&self, path: &Path) -> Option<PathBuf> { + let mut parts: Vec<_> = self.components().collect(); + let root = parts.len(); + for component in path.components() { + match component { + Component::Prefix(_) => return None, + Component::RootDir => parts.truncate(root), + Component::CurDir => {} + Component::ParentDir => { + if parts.len() <= root { + return None; + } + parts.pop(); + } + Component::Normal(_) => parts.push(component), + } + } + if parts.len() < root { + return None; + } + Some(parts.into_iter().collect()) + } } /// Format pieces separated with commas and a final "and" or "or". |
