summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2021-07-31 22:59:14 +0200
committerLaurenz <laurmaedje@gmail.com>2021-08-01 00:00:36 +0200
commit3c92bad9a7cd6b880de197806443ffcce2cac9d8 (patch)
tree1faf79c66e23bc37711af16ad690a9878e28d348 /src
parentfbd3d191137aac8188ab8c6503d257d65d873972 (diff)
Pretty-printed diagnostics with traceback
Diffstat (limited to 'src')
-rw-r--r--src/diag.rs14
-rw-r--r--src/eval/capture.rs4
-rw-r--r--src/eval/mod.rs69
-rw-r--r--src/font.rs4
-rw-r--r--src/image.rs4
-rw-r--r--src/layout/frame.rs5
-rw-r--r--src/layout/incremental.rs8
-rw-r--r--src/lib.rs20
-rw-r--r--src/loading/fs.rs7
-rw-r--r--src/loading/mod.rs4
-rw-r--r--src/main.rs169
-rw-r--r--src/parse/lines.rs145
-rw-r--r--src/parse/mod.rs8
-rw-r--r--src/parse/parser.rs20
-rw-r--r--src/parse/resolve.rs17
-rw-r--r--src/parse/scanner.rs23
-rw-r--r--src/parse/tokens.rs9
-rw-r--r--src/pretty.rs4
-rw-r--r--src/source.rs195
-rw-r--r--src/syntax/span.rs95
-rw-r--r--src/util/eco.rs18
21 files changed, 510 insertions, 332 deletions
diff --git a/src/diag.rs b/src/diag.rs
index 76d7c6b7..397a833f 100644
--- a/src/diag.rs
+++ b/src/diag.rs
@@ -16,10 +16,21 @@ pub type StrResult<T> = Result<T, String>;
pub struct Error {
/// The file that contains the error.
pub file: FileId,
- /// The erronous location in the source code.
+ /// The erroneous location in the source code.
pub span: Span,
/// A diagnostic message describing the problem.
pub message: String,
+ /// The trace of function calls leading to the error.
+ pub trace: Vec<(FileId, Span, Tracepoint)>,
+}
+
+/// A part of an error's [trace](Error::trace).
+#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
+pub enum Tracepoint {
+ /// A function call.
+ Call(Option<String>),
+ /// A module import.
+ Import,
}
impl Error {
@@ -28,6 +39,7 @@ impl Error {
Self {
file,
span: span.into(),
+ trace: vec![],
message: message.into(),
}
}
diff --git a/src/eval/capture.rs b/src/eval/capture.rs
index 10f7ec83..a6e543f9 100644
--- a/src/eval/capture.rs
+++ b/src/eval/capture.rs
@@ -43,8 +43,8 @@ impl<'ast> Visit<'ast> for CapturesVisitor<'_> {
}
}
- fn visit_binding(&mut self, id: &'ast Ident) {
- self.internal.def_mut(id.as_str(), Value::None);
+ fn visit_binding(&mut self, ident: &'ast Ident) {
+ self.internal.def_mut(ident.as_str(), Value::None);
}
fn visit_enter(&mut self) {
diff --git a/src/eval/mod.rs b/src/eval/mod.rs
index decd4281..8f5532eb 100644
--- a/src/eval/mod.rs
+++ b/src/eval/mod.rs
@@ -25,11 +25,12 @@ use std::mem;
use std::path::Path;
use std::rc::Rc;
-use crate::diag::{Error, StrResult, TypResult};
+use crate::diag::{Error, StrResult, Tracepoint, TypResult};
use crate::geom::{Angle, Fractional, Length, Relative};
use crate::image::ImageCache;
use crate::loading::{FileId, Loader};
use crate::parse::parse;
+use crate::source::{SourceFile, SourceMap};
use crate::syntax::visit::Visit;
use crate::syntax::*;
use crate::util::EcoString;
@@ -67,6 +68,8 @@ pub trait Eval {
pub struct EvalContext<'a> {
/// The loader from which resources (files and images) are loaded.
pub loader: &'a dyn Loader,
+ /// The store for source files.
+ pub sources: &'a mut SourceMap,
/// The cache for decoded images.
pub images: &'a mut ImageCache,
/// The cache for loaded modules.
@@ -86,6 +89,7 @@ impl<'a> EvalContext<'a> {
pub fn new(ctx: &'a mut Context, file: FileId) -> Self {
Self {
loader: ctx.loader.as_ref(),
+ sources: &mut ctx.sources,
images: &mut ctx.images,
modules: &mut ctx.modules,
scopes: Scopes::new(Some(&ctx.std)),
@@ -106,49 +110,58 @@ impl<'a> EvalContext<'a> {
/// Process an import of a module relative to the current location.
pub fn import(&mut self, path: &str, span: Span) -> TypResult<FileId> {
- let id = self.resolve(path, span)?;
+ let file = self.resolve(path, span)?;
// Prevent cyclic importing.
- if self.file == id || self.route.contains(&id) {
+ if self.file == file || self.route.contains(&file) {
bail!(self.file, span, "cyclic import");
}
// Check whether the module was already loaded.
- if self.modules.get(&id).is_some() {
- return Ok(id);
+ if self.modules.get(&file).is_some() {
+ return Ok(file);
}
// Load the source file.
let buffer = self
.loader
- .load_file(id)
+ .load_file(file)
.map_err(|_| Error::boxed(self.file, span, "failed to load file"))?;
// Decode UTF-8.
- let string = std::str::from_utf8(&buffer)
+ let string = String::from_utf8(buffer)
.map_err(|_| Error::boxed(self.file, span, "file is not valid utf-8"))?;
// Parse the file.
- let ast = parse(id, string)?;
+ let source = self.sources.insert(SourceFile::new(file, string));
+ let ast = parse(&source)?;
// Prepare the new context.
let new_scopes = Scopes::new(self.scopes.base);
let old_scopes = mem::replace(&mut self.scopes, new_scopes);
self.route.push(self.file);
- self.file = id;
+ self.file = file;
// Evaluate the module.
- let template = Rc::new(ast).eval(self)?;
+ let result = Rc::new(ast).eval(self);
// Restore the old context.
let new_scopes = mem::replace(&mut self.scopes, old_scopes);
self.file = self.route.pop().unwrap();
+ // Add a tracepoint to the errors.
+ let template = result.map_err(|mut errors| {
+ for error in errors.iter_mut() {
+ error.trace.push((self.file, span, Tracepoint::Import));
+ }
+ errors
+ })?;
+
// Save the evaluated module.
let module = Module { scope: new_scopes.top, template };
- self.modules.insert(id, module);
+ self.modules.insert(file, module);
- Ok(id)
+ Ok(file)
}
}
@@ -399,7 +412,22 @@ impl Eval for CallExpr {
.map_err(Error::partial(ctx.file, self.callee.span()))?;
let mut args = self.args.eval(ctx)?;
- let returned = callee(ctx, &mut args)?;
+ let returned = callee(ctx, &mut args).map_err(|mut errors| {
+ for error in errors.iter_mut() {
+ // Skip errors directly related to arguments.
+ if error.file == ctx.file && self.span.contains(error.span) {
+ continue;
+ }
+
+ error.trace.push((
+ ctx.file,
+ self.span,
+ Tracepoint::Call(callee.name().map(Into::into)),
+ ));
+ }
+ errors
+ })?;
+
args.finish()?;
Ok(returned)
@@ -445,6 +473,7 @@ impl Eval for ClosureExpr {
type Output = Value;
fn eval(&self, ctx: &mut EvalContext) -> TypResult<Self::Output> {
+ let file = ctx.file;
let params = Rc::clone(&self.params);
let body = Rc::clone(&self.body);
@@ -459,7 +488,8 @@ impl Eval for ClosureExpr {
let func = Function::new(name, move |ctx, args| {
// Don't leak the scopes from the call site. Instead, we use the
// scope of captured variables we collected earlier.
- let prev = mem::take(&mut ctx.scopes);
+ let prev_scopes = mem::take(&mut ctx.scopes);
+ let prev_file = mem::replace(&mut ctx.file, file);
ctx.scopes.top = captured.clone();
for param in params.iter() {
@@ -468,7 +498,8 @@ impl Eval for ClosureExpr {
}
let result = body.eval(ctx);
- ctx.scopes = prev;
+ ctx.scopes = prev_scopes;
+ ctx.file = prev_file;
result
});
@@ -630,8 +661,8 @@ impl Eval for ImportExpr {
.cast::<EcoString>()
.map_err(Error::partial(ctx.file, self.path.span()))?;
- let id = ctx.import(&path, self.path.span())?;
- let module = &ctx.modules[&id];
+ let file = ctx.import(&path, self.path.span())?;
+ let module = &ctx.modules[&file];
match &self.imports {
Imports::Wildcard => {
@@ -664,8 +695,8 @@ impl Eval for IncludeExpr {
.cast::<EcoString>()
.map_err(Error::partial(ctx.file, self.path.span()))?;
- let id = ctx.import(&path, self.path.span())?;
- let module = &ctx.modules[&id];
+ let file = ctx.import(&path, self.path.span())?;
+ let module = &ctx.modules[&file];
Ok(Value::Template(module.template.clone()))
}
diff --git a/src/font.rs b/src/font.rs
index 94548699..a609e934 100644
--- a/src/font.rs
+++ b/src/font.rs
@@ -325,12 +325,12 @@ impl FaceId {
///
/// This should only be called with values returned by
/// [`into_raw`](Self::into_raw).
- pub fn from_raw(v: u64) -> Self {
+ pub const fn from_raw(v: u64) -> Self {
Self(v)
}
/// Convert into the raw underlying value.
- pub fn into_raw(self) -> u64 {
+ pub const fn into_raw(self) -> u64 {
self.0
}
}
diff --git a/src/image.rs b/src/image.rs
index 93b95bda..f041fac1 100644
--- a/src/image.rs
+++ b/src/image.rs
@@ -112,12 +112,12 @@ impl ImageId {
///
/// This should only be called with values returned by
/// [`into_raw`](Self::into_raw).
- pub fn from_raw(v: u64) -> Self {
+ pub const fn from_raw(v: u64) -> Self {
Self(v)
}
/// Convert into the raw underlying value.
- pub fn into_raw(self) -> u64 {
+ pub const fn into_raw(self) -> u64 {
self.0
}
}
diff --git a/src/layout/frame.rs b/src/layout/frame.rs
index 82ac71a9..6d5cc2f3 100644
--- a/src/layout/frame.rs
+++ b/src/layout/frame.rs
@@ -146,9 +146,8 @@ impl Text {
pub fn encode_glyphs_be(&self) -> Vec<u8> {
let mut bytes = Vec::with_capacity(2 * self.glyphs.len());
for glyph in &self.glyphs {
- let id = glyph.id;
- bytes.push((id >> 8) as u8);
- bytes.push((id & 0xff) as u8);
+ bytes.push((glyph.id >> 8) as u8);
+ bytes.push((glyph.id & 0xff) as u8);
}
bytes
}
diff --git a/src/layout/incremental.rs b/src/layout/incremental.rs
index e7d12d10..32353d6f 100644
--- a/src/layout/incremental.rs
+++ b/src/layout/incremental.rs
@@ -8,7 +8,7 @@ use super::*;
///
/// _This is only available when the `layout-cache` feature is enabled._
#[cfg(feature = "layout-cache")]
-#[derive(Debug, Default, Clone)]
+#[derive(Default, Clone)]
pub struct LayoutCache {
/// Maps from node hashes to the resulting frames and regions in which the
/// frames are valid. The right hand side of the hash map is a vector of
@@ -70,9 +70,9 @@ impl LayoutCache {
) {
let entry = FramesEntry::new(frames, level);
match self.frames.entry(hash) {
- Entry::Occupied(o) => o.into_mut().push(entry),
- Entry::Vacant(v) => {
- v.insert(vec![entry]);
+ Entry::Occupied(occupied) => occupied.into_mut().push(entry),
+ Entry::Vacant(vacant) => {
+ vacant.insert(vec![entry]);
}
}
}
diff --git a/src/lib.rs b/src/lib.rs
index be99fb58..0f556989 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -44,6 +44,7 @@ pub mod loading;
pub mod paper;
pub mod parse;
pub mod pretty;
+pub mod source;
pub mod syntax;
pub mod util;
@@ -57,18 +58,21 @@ use crate::image::ImageCache;
use crate::layout::Frame;
#[cfg(feature = "layout-cache")]
use crate::layout::LayoutCache;
-use crate::loading::{FileId, Loader};
+use crate::loading::Loader;
+use crate::source::{SourceFile, SourceMap};
/// The core context which holds the loader, configuration and cached artifacts.
pub struct Context {
/// The loader the context was created with.
pub loader: Rc<dyn Loader>,
+ /// Stores loaded source files.
+ pub sources: SourceMap,
+ /// Caches evaluated modules.
+ pub modules: ModuleCache,
/// Caches parsed font faces.
pub fonts: FontCache,
/// Caches decoded images.
pub images: ImageCache,
- /// Caches evaluated modules.
- pub modules: ModuleCache,
/// Caches layouting artifacts.
#[cfg(feature = "layout-cache")]
pub layouts: LayoutCache,
@@ -97,15 +101,12 @@ impl Context {
/// Typeset a source file into a collection of layouted frames.
///
- /// The `file` identifies the source file and is used to resolve relative
- /// paths (for importing and image loading).
- ///
/// Returns either a vector of frames representing individual pages or
/// diagnostics in the form of a vector of error message with file and span
/// information.
- pub fn typeset(&mut self, file: FileId, src: &str) -> TypResult<Vec<Rc<Frame>>> {
- let ast = parse::parse(file, src)?;
- let module = eval::eval(self, file, Rc::new(ast))?;
+ pub fn typeset(&mut self, source: &SourceFile) -> TypResult<Vec<Rc<Frame>>> {
+ let ast = parse::parse(source)?;
+ let module = eval::eval(self, source.file(), Rc::new(ast))?;
let tree = exec::exec(self, &module.template);
let frames = layout::layout(self, &tree);
Ok(frames)
@@ -140,6 +141,7 @@ impl ContextBuilder {
pub fn build(self, loader: Rc<dyn Loader>) -> Context {
Context {
loader: Rc::clone(&loader),
+ sources: SourceMap::new(),
fonts: FontCache::new(Rc::clone(&loader)),
images: ImageCache::new(loader),
modules: ModuleCache::new(),
diff --git a/src/loading/fs.rs b/src/loading/fs.rs
index d41a7dc1..c3ca332e 100644
--- a/src/loading/fs.rs
+++ b/src/loading/fs.rs
@@ -1,4 +1,4 @@
-use std::cell::RefCell;
+use std::cell::{Ref, RefCell};
use std::collections::HashMap;
use std::fs::{self, File};
use std::io;
@@ -92,6 +92,11 @@ impl FsLoader {
}
}
+ /// Return the path of a resolved file.
+ pub fn path(&self, id: FileId) -> Ref<Path> {
+ Ref::map(self.paths.borrow(), |paths| paths[&id].as_path())
+ }
+
#[cfg(all(unix, not(target_os = "macos")))]
fn search_system_impl(&mut self) {
self.search_path("/usr/share/fonts");
diff --git a/src/loading/mod.rs b/src/loading/mod.rs
index ab52439d..65eb25c6 100644
--- a/src/loading/mod.rs
+++ b/src/loading/mod.rs
@@ -40,12 +40,12 @@ pub struct FileId(u64);
impl FileId {
/// Create a file id from a raw value.
- pub fn from_raw(v: u64) -> Self {
+ pub const fn from_raw(v: u64) -> Self {
Self(v)
}
/// Convert into the raw underlying value.
- pub fn into_raw(self) -> u64 {
+ pub const fn into_raw(self) -> u64 {
self.0
}
}
diff --git a/src/main.rs b/src/main.rs
index 7891082d..51a6d833 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,14 +1,33 @@
use std::fs;
+use std::io::{self, Write};
+use std::ops::Range;
use std::path::{Path, PathBuf};
+use std::process;
use anyhow::{anyhow, bail, Context};
+use codespan_reporting::diagnostic::{Diagnostic, Label};
+use codespan_reporting::files::{self, Files};
+use codespan_reporting::term::{self, termcolor, Config, Styles};
use same_file::is_same_file;
+use termcolor::{ColorChoice, StandardStream, WriteColor};
-fn main() -> anyhow::Result<()> {
+use typst::diag::{Error, Tracepoint};
+use typst::loading::{FileId, FsLoader};
+use typst::source::{SourceFile, SourceMap};
+
+fn main() {
+ if let Err(error) = try_main() {
+ print_error(error).unwrap();
+ process::exit(1);
+ }
+}
+
+/// The main compiler logic.
+fn try_main() -> anyhow::Result<()> {
let args: Vec<_> = std::env::args().collect();
if args.len() < 2 || args.len() > 3 {
- println!("usage: typst src.typ [out.pdf]");
- return Ok(());
+ print_usage().unwrap();
+ process::exit(2);
}
// Determine source and destination path.
@@ -36,12 +55,12 @@ fn main() -> anyhow::Result<()> {
// Resolve the file id of the source file and read the file.
let file = loader.resolve(src_path).context("source file not found")?;
- let src = fs::read_to_string(&src_path)
- .map_err(|_| anyhow!("failed to read source file"))?;
+ let string = fs::read_to_string(&src_path).context("failed to read source file")?;
+ let source = SourceFile::new(file, string);
// Typeset.
- let mut ctx = typst::Context::new(loader);
- match ctx.typeset(file, &src) {
+ let mut ctx = typst::Context::new(loader.clone());
+ match ctx.typeset(&source) {
// Export the PDF.
Ok(document) => {
let buffer = typst::export::pdf(&ctx, &document);
@@ -50,20 +69,132 @@ fn main() -> anyhow::Result<()> {
// Print diagnostics.
Err(errors) => {
- let map = typst::parse::LineMap::new(&src);
- for error in errors.iter() {
- let start = map.location(error.span.start).unwrap();
- let end = map.location(error.span.end).unwrap();
- println!(
- "Error: {}:{}-{}: {}",
- src_path.display(),
- start,
- end,
- error.message,
- );
- }
+ ctx.sources.insert(source);
+ print_diagnostics(&loader, &ctx.sources, *errors)
+ .context("failed to print diagnostics")?;
+ }
+ }
+
+ Ok(())
+}
+
+/// Print a usage message.
+fn print_usage() -> io::Result<()> {
+ let mut writer = StandardStream::stderr(ColorChoice::Always);
+ let styles = Styles::default();
+
+ writer.set_color(&styles.header_help)?;
+ write!(writer, "usage")?;
+
+ writer.set_color(&styles.header_message)?;
+ writeln!(writer, ": typst document.typ [output.pdf]")?;
+
+ writer.reset()
+}
+
+/// Print an error outside of a source file.
+fn print_error(error: anyhow::Error) -> io::Result<()> {
+ let mut writer = StandardStream::stderr(ColorChoice::Always);
+ let styles = Styles::default();
+
+ for (i, cause) in error.chain().enumerate() {
+ writer.set_color(&styles.header_error)?;
+ write!(writer, "{}", if i == 0 { "error" } else { "cause" })?;
+
+ writer.set_color(&styles.header_message)?;
+ writeln!(writer, ": {}", cause)?;
+ }
+
+ writer.reset()
+}
+
+/// Print diagnostics messages to the terminal.
+fn print_diagnostics(
+ loader: &FsLoader,
+ sources: &SourceMap,
+ errors: Vec<Error>,
+) -> Result<(), files::Error> {
+ let mut writer = StandardStream::stderr(ColorChoice::Always);
+ let config = Config { tab_width: 2, ..Default::default() };
+ let files = FilesImpl(loader, sources);
+
+ for error in errors {
+ // The main diagnostic.
+ let main = Diagnostic::error()
+ .with_message(error.message)
+ .with_labels(vec![Label::primary(error.file, error.span.to_range())]);
+
+ term::emit(&mut writer, &config, &files, &main)?;
+
+ // Stacktrace-like helper diagnostics.
+ for (file, span, point) in error.trace {
+ let message = match point {
+ Tracepoint::Call(Some(name)) => {
+ format!("error occured in this call of function `{}`", name)
+ }
+ Tracepoint::Call(None) => "error occured in this function call".into(),
+ Tracepoint::Import => "error occured while importing this module".into(),
+ };
+
+ let help = Diagnostic::help()
+ .with_message(message)
+ .with_labels(vec![Label::primary(file, span.to_range())]);
+
+ term::emit(&mut writer, &config, &files, &help)?;
}
}
Ok(())
}
+
+/// Required for error message formatting with codespan-reporting.
+struct FilesImpl<'a>(&'a FsLoader, &'a SourceMap);
+
+impl FilesImpl<'_> {
+ fn source(&self, id: FileId) -> Result<&SourceFile, files::Error> {
+ self.1.get(id).ok_or(files::Error::FileMissing)
+ }
+}
+
+impl<'a> Files<'a> for FilesImpl<'a> {
+ type FileId = FileId;
+ type Name = String;
+ type Source = &'a str;
+
+ fn name(&'a self, id: FileId) -> Result<Self::Name, files::Error> {
+ Ok(self.0.path(id).display().to_string())
+ }
+
+ fn source(&'a self, id: FileId) -> Result<Self::Source, files::Error> {
+ Ok(self.source(id)?.src())
+ }
+
+ fn line_index(
+ &'a self,
+ id: FileId,
+ byte_index: usize,
+ ) -> Result<usize, files::Error> {
+ let source = self.source(id)?;
+ source.pos_to_line(byte_index.into()).ok_or_else(|| {
+ let (given, max) = (byte_index, source.len_bytes());
+ if given <= max {
+ files::Error::InvalidCharBoundary { given }
+ } else {
+ files::Error::IndexTooLarge { given, max }
+ }
+ })
+ }
+
+ fn line_range(
+ &'a self,
+ id: FileId,
+ line_index: usize,
+ ) -> Result<Range<usize>, files::Error> {
+ let source = self.source(id)?;
+ let span = source.line_to_span(line_index).ok_or(files::Error::LineTooLarge {
+ given: line_index,
+ max: source.len_lines(),
+ })?;
+ Ok(span.to_range())
+ }
+}
diff --git a/src/parse/lines.rs b/src/parse/lines.rs
deleted file mode 100644
index 2d97a25c..00000000
--- a/src/parse/lines.rs
+++ /dev/null
@@ -1,145 +0,0 @@
-// FIXME:
-// Both `LineMap::location` and `search_column` can lead to quadratic compile
-// times for very long lines. We probably need some smart acceleration structure
-// to determine columns.
-
-use super::Scanner;
-use crate::syntax::{Location, Pos};
-
-/// Enables conversion of byte position to locations.
-pub struct LineMap<'s> {
- src: &'s str,
- line_starts: Vec<Pos>,
-}
-
-impl<'s> LineMap<'s> {
- /// Create a new line map for a source string.
- pub fn new(src: &'s str) -> Self {
- let mut line_starts = vec![Pos::ZERO];
- let mut s = Scanner::new(src);
-
- while let Some(c) = s.eat_merging_crlf() {
- if is_newline(c) {
- line_starts.push(s.index().into());
- }
- }
-
- Self { src, line_starts }
- }
-
- /// Convert a byte position to a location.
- pub fn location(&self, pos: Pos) -> Option<Location> {
- // Find the line which contains the position.
- let line_index = match self.line_starts.binary_search(&pos) {
- Ok(i) => i,
- Err(i) => i - 1,
- };
-
- let start = self.line_starts.get(line_index)?;
- let head = self.src.get(start.to_usize() .. pos.to_usize())?;
-
- // TODO: What about tabs?
- let column_index = head.chars().count();
-
- Some(Location {
- line: 1 + line_index as u32,
- column: 1 + column_index as u32,
- })
- }
-
- /// Convert a location to a byte position.
- pub fn pos(&self, location: Location) -> Option<Pos> {
- // Determine the boundaries of the line.
- let line_idx = location.line.checked_sub(1)? as usize;
- let line_start = *self.line_starts.get(line_idx)?;
- let line_end = self
- .line_starts
- .get(location.line as usize)
- .map_or(self.src.len(), |pos| pos.to_usize());
-
- let line = self.src.get(line_start.to_usize() .. line_end)?;
-
- // Find the index in the line. For the first column, the index is always
- // zero. For other columns, we have to look at which byte the char
- // directly before the column in question ends. We can't do
- // `nth(column_idx)` directly since the column may be behind the last
- // char.
- let column_idx = location.column.checked_sub(1)? as usize;
- let line_offset = if let Some(prev_idx) = column_idx.checked_sub(1) {
- // TODO: What about tabs?
- let (idx, prev) = line.char_indices().nth(prev_idx)?;
- idx + prev.len_utf8()
- } else {
- 0
- };
-
- Some(line_start + line_offset)
- }
-}
-
-/// Count how many column the string would fill.
-pub fn count_columns(src: &str) -> usize {
- let mut column = 0;
- for c in src.chars().rev() {
- if is_newline(c) {
- break;
- } else if c == '\t' {
- // TODO: How many columns per tab?
- column += 2;
- } else {
- column += 1;
- }
- }
- column
-}
-
-/// Whether this character denotes a newline.
-#[inline]
-pub fn is_newline(character: char) -> bool {
- matches!(
- character,
- // Line Feed, Vertical Tab, Form Feed, Carriage Return.
- '\n' | '\x0B' | '\x0C' | '\r' |
- // Next Line, Line Separator, Paragraph Separator.
- '\u{0085}' | '\u{2028}' | '\u{2029}'
- )
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- const TEST: &str = "äbcde\nf💛g\r\nhi\rjkl";
-
- #[test]
- fn test_line_map_new() {
- let map = LineMap::new(TEST);
- assert_eq!(map.line_starts, vec![Pos(0), Pos(7), Pos(15), Pos(18)]);
- }
-
- #[test]
- fn test_line_map_location() {
- let map = LineMap::new(TEST);
- assert_eq!(map.location(Pos(0)), Some(Location::new(1, 1)));
- assert_eq!(map.location(Pos(2)), Some(Location::new(1, 2)));
- assert_eq!(map.location(Pos(6)), Some(Location::new(1, 6)));
- assert_eq!(map.location(Pos(7)), Some(Location::new(2, 1)));
- assert_eq!(map.location(Pos(8)), Some(Location::new(2, 2)));
- assert_eq!(map.location(Pos(12)), Some(Location::new(2, 3)));
- assert_eq!(map.location(Pos(21)), Some(Location::new(4, 4)));
- assert_eq!(map.location(Pos(22)), None);
- }
-
- #[test]
- fn test_line_map_pos() {
- fn assert_round_trip(map: &LineMap, pos: Pos) {
- assert_eq!(map.location(pos).and_then(|loc| map.pos(loc)), Some(pos));
- }
-
- let map = LineMap::new(TEST);
- assert_round_trip(&map, Pos(0));
- assert_round_trip(&map, Pos(7));
- assert_round_trip(&map, Pos(12));
- assert_round_trip(&map, Pos(21));
- }
-}
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index c103c342..f033e01f 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -1,12 +1,10 @@
//! Parsing and tokenization.
-mod lines;
mod parser;
mod resolve;
mod scanner;
mod tokens;
-pub use lines::*;
pub use parser::*;
pub use resolve::*;
pub use scanner::*;
@@ -15,13 +13,13 @@ pub use tokens::*;
use std::rc::Rc;
use crate::diag::TypResult;
-use crate::loading::FileId;
+use crate::source::SourceFile;
use crate::syntax::*;
use crate::util::EcoString;
/// Parse a string of source code.
-pub fn parse(file: FileId, src: &str) -> TypResult<SyntaxTree> {
- let mut p = Parser::new(file, src);
+pub fn parse(source: &SourceFile) -> TypResult<SyntaxTree> {
+ let mut p = Parser::new(source);
let tree = tree(&mut p);
let errors = p.finish();
if errors.is_empty() {
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 0238c8be..6b478780 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -1,15 +1,15 @@
use std::fmt::{self, Debug, Formatter};
use std::ops::Range;
-use super::{count_columns, TokenMode, Tokens};
+use super::{TokenMode, Tokens};
use crate::diag::Error;
-use crate::loading::FileId;
+use crate::source::SourceFile;
use crate::syntax::{Pos, Span, Token};
/// A convenient token-based parser.
pub struct Parser<'s> {
/// The id of the parsed file.
- file: FileId,
+ source: &'s SourceFile,
/// Parsing errors.
errors: Vec<Error>,
/// An iterator over the source tokens.
@@ -60,11 +60,11 @@ pub enum Group {
impl<'s> Parser<'s> {
/// Create a new parser for the source string.
- pub fn new(file: FileId, src: &'s str) -> Self {
- let mut tokens = Tokens::new(src, TokenMode::Markup);
+ pub fn new(source: &'s SourceFile) -> Self {
+ let mut tokens = Tokens::new(source.src(), TokenMode::Markup);
let next = tokens.next();
Self {
- file,
+ source,
errors: vec![],
tokens,
groups: vec![],
@@ -82,11 +82,7 @@ impl<'s> Parser<'s> {
/// Add an error with location and message.
pub fn error(&mut self, span: impl Into<Span>, message: impl Into<String>) {
- self.errors.push(Error {
- file: self.file,
- span: span.into(),
- message: message.into(),
- });
+ self.errors.push(Error::new(self.source.file(), span, message));
}
/// Eat the next token and add an error that it is not the expected `thing`.
@@ -324,7 +320,7 @@ impl<'s> Parser<'s> {
/// Determine the column for the given index in the source.
pub fn column(&self, index: usize) -> usize {
- count_columns(self.tokens.scanner().get(.. index))
+ self.source.pos_to_column(index.into()).unwrap()
}
/// The span from `start` to [`self.prev_end()`](Self::prev_end).
diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs
index f97d5383..7bd160f9 100644
--- a/src/parse/resolve.rs
+++ b/src/parse/resolve.rs
@@ -109,8 +109,11 @@ fn split_lines(text: &str) -> Vec<String> {
let mut line = String::new();
let mut lines = Vec::new();
- while let Some(c) = s.eat_merging_crlf() {
+ while let Some(c) = s.eat() {
if is_newline(c) {
+ if c == '\r' {
+ s.eat_if('\n');
+ }
lines.push(std::mem::take(&mut line));
} else {
line.push(c);
@@ -173,14 +176,10 @@ mod tests {
text: &str,
block: bool,
) {
- Span::without_cmp(|| {
- assert_eq!(resolve_raw(Span::ZERO, raw, backticks), RawNode {
- span: Span::ZERO,
- lang: lang.and_then(|id| Ident::new(id, 0)),
- text: text.into(),
- block,
- });
- });
+ let node = resolve_raw(Span::ZERO, raw, backticks);
+ assert_eq!(node.lang.as_deref(), lang);
+ assert_eq!(node.text, text);
+ assert_eq!(node.block, block);
}
// Just one backtick.
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
index 9ee7641c..bb827255 100644
--- a/src/parse/scanner.rs
+++ b/src/parse/scanner.rs
@@ -47,17 +47,6 @@ impl<'s> Scanner<'s> {
debug_assert_eq!(next, Some(c));
}
- /// Consume the next char, coalescing `\r\n` to just `\n`.
- #[inline]
- pub fn eat_merging_crlf(&mut self) -> Option<char> {
- if self.rest().starts_with("\r\n") {
- self.index += 2;
- Some('\n')
- } else {
- self.eat()
- }
- }
-
/// Eat chars while the condition is true.
#[inline]
pub fn eat_while<F>(&mut self, mut f: F) -> &'s str
@@ -168,3 +157,15 @@ impl Debug for Scanner<'_> {
write!(f, "Scanner({}|{})", self.eaten(), self.rest())
}
}
+
+/// Whether this character denotes a newline.
+#[inline]
+pub fn is_newline(character: char) -> bool {
+ matches!(
+ character,
+ // Line Feed, Vertical Tab, Form Feed, Carriage Return.
+ '\n' | '\x0B' | '\x0C' | '\r' |
+ // Next Line, Line Separator, Paragraph Separator.
+ '\u{0085}' | '\u{2028}' | '\u{2029}'
+ )
+}
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 356a2f96..9fd13ecc 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -198,13 +198,16 @@ impl<'s> Tokens<'s> {
// Count the number of newlines.
let mut newlines = 0;
- while let Some(c) = self.s.eat_merging_crlf() {
+ while let Some(c) = self.s.eat() {
if !c.is_whitespace() {
self.s.uneat();
break;
}
if is_newline(c) {
+ if c == '\r' {
+ self.s.eat_if('\n');
+ }
newlines += 1;
}
}
@@ -484,8 +487,8 @@ impl Debug for Tokens<'_> {
}
}
-fn keyword(id: &str) -> Option<Token<'static>> {
- Some(match id {
+fn keyword(ident: &str) -> Option<Token<'static>> {
+ Some(match ident {
"not" => Token::Not,
"and" => Token::And,
"or" => Token::Or,
diff --git a/src/pretty.rs b/src/pretty.rs
index a4e9b334..2f3a6ef9 100644
--- a/src/pretty.rs
+++ b/src/pretty.rs
@@ -610,6 +610,7 @@ mod tests {
use super::*;
use crate::loading::FileId;
use crate::parse::parse;
+ use crate::source::SourceFile;
#[track_caller]
fn roundtrip(src: &str) {
@@ -618,7 +619,8 @@ mod tests {
#[track_caller]
fn test_parse(src: &str, exp: &str) {
- let ast = parse(FileId::from_raw(0), src).unwrap();
+ let source = SourceFile::new(FileId::from_raw(0), src.into());
+ let ast = parse(&source).unwrap();
let found = pretty(&ast);
if exp != found {
println!("tree: {:#?}", ast);
diff --git a/src/source.rs b/src/source.rs
new file mode 100644
index 00000000..abd3c246
--- /dev/null
+++ b/src/source.rs
@@ -0,0 +1,195 @@
+//! Source files.
+
+use std::collections::{hash_map::Entry, HashMap};
+
+use crate::loading::FileId;
+use crate::parse::{is_newline, Scanner};
+use crate::syntax::{Pos, Span};
+
+/// A store for loaded source files.
+#[derive(Default)]
+pub struct SourceMap {
+ sources: HashMap<FileId, SourceFile>,
+}
+
+impl SourceMap {
+ /// Create a new, empty source map
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Get a source file by id.
+ pub fn get(&self, file: FileId) -> Option<&SourceFile> {
+ self.sources.get(&file)
+ }
+
+ /// Insert a sources.
+ pub fn insert(&mut self, source: SourceFile) -> &SourceFile {
+ match self.sources.entry(source.file) {
+ Entry::Occupied(mut entry) => {
+ entry.insert(source);
+ entry.into_mut()
+ }
+ Entry::Vacant(entry) => entry.insert(source),
+ }
+ }
+
+ /// Remove all sources.
+ pub fn clear(&mut self) {
+ self.sources.clear();
+ }
+}
+
+/// A single source file.
+pub struct SourceFile {
+ file: FileId,
+ src: String,
+ line_starts: Vec<Pos>,
+}
+
+impl SourceFile {
+ /// Create a new source file from string.
+ pub fn new(file: FileId, src: String) -> Self {
+ let mut line_starts = vec![Pos::ZERO];
+ let mut s = Scanner::new(&src);
+
+ while let Some(c) = s.eat() {
+ if is_newline(c) {
+ if c == '\r' {
+ s.eat_if('\n');
+ }
+ line_starts.push(s.index().into());
+ }
+ }
+
+ Self { file, src, line_starts }
+ }
+
+ /// The file id.
+ pub fn file(&self) -> FileId {
+ self.file
+ }
+
+ /// The whole source as a string slice.
+ pub fn src(&self) -> &str {
+ &self.src
+ }
+
+ /// Get the length of the file in bytes.
+ pub fn len_bytes(&self) -> usize {
+ self.src.len()
+ }
+
+ /// Get the length of the file in lines.
+ pub fn len_lines(&self) -> usize {
+ self.line_starts.len()
+ }
+
+ /// Slice out the part of the source code enclosed by the span.
+ pub fn get(&self, span: Span) -> Option<&str> {
+ self.src.get(span.to_range())
+ }
+
+ /// Return the index of the line that contains the given byte position.
+ pub fn pos_to_line(&self, byte_pos: Pos) -> Option<usize> {
+ (byte_pos.to_usize() <= self.src.len()).then(|| {
+ match self.line_starts.binary_search(&byte_pos) {
+ Ok(i) => i,
+ Err(i) => i - 1,
+ }
+ })
+ }
+
+ /// Return the column of the byte index.
+ ///
+ /// Tabs are counted as occupying two columns.
+ pub fn pos_to_column(&self, byte_pos: Pos) -> Option<usize> {
+ let line = self.pos_to_line(byte_pos)?;
+ let start = self.line_to_pos(line)?;
+ let head = self.get(Span::new(start, byte_pos))?;
+ Some(head.chars().map(width).sum())
+ }
+
+ /// Return the byte position at which the given line starts.
+ pub fn line_to_pos(&self, line_idx: usize) -> Option<Pos> {
+ self.line_starts.get(line_idx).copied()
+ }
+
+ /// Return the span which encloses the given line.
+ pub fn line_to_span(&self, line_idx: usize) -> Option<Span> {
+ let start = self.line_to_pos(line_idx)?;
+ let end = self.line_to_pos(line_idx + 1).unwrap_or(self.src.len().into());
+ Some(Span::new(start, end))
+ }
+
+ /// Return the byte position of the given (line, column) pair.
+ ///
+ /// Tabs are counted as occupying two columns.
+ pub fn line_column_to_pos(&self, line_idx: usize, column_idx: usize) -> Option<Pos> {
+ let span = self.line_to_span(line_idx)?;
+ let line = self.get(span)?;
+
+ if column_idx == 0 {
+ return Some(span.start);
+ }
+
+ let mut column = 0;
+ for (i, c) in line.char_indices() {
+ column += width(c);
+ if column >= column_idx {
+ return Some(span.start + Pos::from(i + c.len_utf8()));
+ }
+ }
+
+ None
+ }
+}
+
+/// The display width of the character.
+fn width(c: char) -> usize {
+ if c == '\t' { 2 } else { 1 }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ const ID: FileId = FileId::from_raw(0);
+ const TEST: &str = "äbcde\nf💛g\r\nhi\rjkl";
+
+ #[test]
+ fn test_source_file_new() {
+ let source = SourceFile::new(ID, TEST.into());
+ assert_eq!(source.line_starts, vec![Pos(0), Pos(7), Pos(15), Pos(18)]);
+ }
+
+ #[test]
+ fn test_source_file_pos_to_line() {
+ let source = SourceFile::new(ID, TEST.into());
+ assert_eq!(source.pos_to_line(Pos(0)), Some(0));
+ assert_eq!(source.pos_to_line(Pos(2)), Some(0));
+ assert_eq!(source.pos_to_line(Pos(6)), Some(0));
+ assert_eq!(source.pos_to_line(Pos(7)), Some(1));
+ assert_eq!(source.pos_to_line(Pos(8)), Some(1));
+ assert_eq!(source.pos_to_line(Pos(12)), Some(1));
+ assert_eq!(source.pos_to_line(Pos(21)), Some(3));
+ assert_eq!(source.pos_to_line(Pos(22)), None);
+ }
+
+ #[test]
+ fn test_source_file_roundtrip() {
+ #[track_caller]
+ fn roundtrip(source: &SourceFile, byte_pos: Pos) {
+ let line = source.pos_to_line(byte_pos).unwrap();
+ let column = source.pos_to_column(byte_pos).unwrap();
+ let result = source.line_column_to_pos(line, column).unwrap();
+ assert_eq!(result, byte_pos);
+ }
+
+ let source = SourceFile::new(ID, TEST.into());
+ roundtrip(&source, Pos(0));
+ roundtrip(&source, Pos(7));
+ roundtrip(&source, Pos(12));
+ roundtrip(&source, Pos(21));
+ }
+}
diff --git a/src/syntax/span.rs b/src/syntax/span.rs
index 8a630faa..800cca19 100644
--- a/src/syntax/span.rs
+++ b/src/syntax/span.rs
@@ -1,13 +1,8 @@
-use std::cell::Cell;
-use std::fmt::{self, Debug, Display, Formatter};
+use std::fmt::{self, Debug, Formatter};
use std::ops::{Add, Range};
use serde::{Deserialize, Serialize};
-thread_local! {
- static CMP_SPANS: Cell<bool> = Cell::new(true);
-}
-
/// A value with the span it corresponds to in the source code.
#[derive(Default, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
#[derive(Serialize, Deserialize)]
@@ -47,15 +42,17 @@ impl<T: Debug> Debug for Spanned<T> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
self.v.fmt(f)?;
if f.alternate() {
- f.write_str(" ")?;
+ f.write_str(" <")?;
self.span.fmt(f)?;
+ f.write_str(">")?;
}
Ok(())
}
}
/// Bounds of a slice of source code.
-#[derive(Copy, Clone, Ord, PartialOrd, Serialize, Deserialize)]
+#[derive(Default, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
+#[derive(Serialize, Deserialize)]
pub struct Span {
/// The inclusive start position.
pub start: Pos,
@@ -90,34 +87,15 @@ impl Span {
*self = self.join(other)
}
+ /// Test whether one span complete contains the other span.
+ pub fn contains(self, other: Self) -> bool {
+ self.start <= other.start && self.end >= other.end
+ }
+
/// Convert to a `Range<usize>` for indexing.
pub fn to_range(self) -> Range<usize> {
self.start.to_usize() .. self.end.to_usize()
}
-
- /// Run some code with span comparisons disabled.
- pub fn without_cmp<F, T>(f: F) -> T
- where
- F: FnOnce() -> T,
- {
- let prev = Self::cmp();
- Self::set_cmp(false);
- let val = f();
- Self::set_cmp(prev);
- val
- }
-
- /// Whether spans will currently be compared.
- fn cmp() -> bool {
- CMP_SPANS.with(Cell::get)
- }
-
- /// Whether spans should be compared.
- ///
- /// When set to `false` comparisons with `PartialEq` ignore spans.
- fn set_cmp(cmp: bool) {
- CMP_SPANS.with(|cell| cell.set(cmp));
- }
}
impl<T> From<T> for Span
@@ -138,28 +116,15 @@ where
}
}
-impl Default for Span {
- fn default() -> Self {
- Span::ZERO
- }
-}
-
impl Debug for Span {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
- write!(f, "<{:?}-{:?}>", self.start, self.end)
- }
-}
-
-impl Eq for Span {}
-
-impl PartialEq for Span {
- fn eq(&self, other: &Self) -> bool {
- !Self::cmp() || (self.start == other.start && self.end == other.end)
+ write!(f, "{:?}-{:?}", self.start, self.end)
}
}
/// A byte position in source code.
-#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
+#[derive(Default, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
+#[derive(Serialize, Deserialize)]
pub struct Pos(pub u32);
impl Pos {
@@ -178,12 +143,6 @@ impl From<u32> for Pos {
}
}
-impl From<i32> for Pos {
- fn from(index: i32) -> Self {
- Self(index as u32)
- }
-}
-
impl From<usize> for Pos {
fn from(index: usize) -> Self {
Self(index as u32)
@@ -206,31 +165,3 @@ where
Pos(self.0 + rhs.into().0)
}
}
-
-/// A one-indexed line-column position in source code.
-#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
-pub struct Location {
- /// The one-indexed line.
- pub line: u32,
- /// The one-indexed column.
- pub column: u32,
-}
-
-impl Location {
- /// Create a new location from line and column.
- pub fn new(line: u32, column: u32) -> Self {
- Self { line, column }
- }
-}
-
-impl Display for Location {
- fn fmt(&self, f: &mut Formatter) -> fmt::Result {
- write!(f, "{}:{}", self.line, self.column)
- }
-}
-
-impl Debug for Location {
- fn fmt(&self, f: &mut Formatter) -> fmt::Result {
- Display::fmt(self, f)
- }
-}
diff --git a/src/util/eco.rs b/src/util/eco.rs
index 7fe1ac9c..00f87872 100644
--- a/src/util/eco.rs
+++ b/src/util/eco.rs
@@ -193,6 +193,24 @@ impl From<&String> for EcoString {
}
}
+impl From<EcoString> for String {
+ fn from(s: EcoString) -> Self {
+ match s.0 {
+ Repr::Small { .. } => s.as_str().to_owned(),
+ Repr::Large(rc) => match Rc::try_unwrap(rc) {
+ Ok(string) => string,
+ Err(rc) => (*rc).clone(),
+ },
+ }
+ }
+}
+
+impl From<&EcoString> for String {
+ fn from(s: &EcoString) -> Self {
+ s.as_str().to_owned()
+ }
+}
+
impl Deref for EcoString {
type Target = str;