summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2023-09-19 15:03:43 +0200
committerLaurenz <laurmaedje@gmail.com>2023-09-19 15:19:12 +0200
commitbb59f0e2b21c42a796b5eb8d8882a1d2b2a0c35f (patch)
treee55db10e9ec146cda10b5f3ff91caff55002d674
parent13758b9c9795d7d6d2fa9551a8936e0f0ff27136 (diff)
Incremental parsing in CLI
Reparses files in the CLI incrementally and also uses the file modification timestamp to completely skip reparsing if possible.
-rw-r--r--Cargo.lock1
-rw-r--r--crates/typst-cli/Cargo.toml1
-rw-r--r--crates/typst-cli/src/compile.rs3
-rw-r--r--crates/typst-cli/src/watch.rs3
-rw-r--r--crates/typst-cli/src/world.rs151
-rw-r--r--crates/typst-syntax/src/source.rs68
6 files changed, 176 insertions, 51 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 8f629dc5..861370f1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2760,6 +2760,7 @@ dependencies = [
"dirs",
"ecow",
"env_proxy",
+ "filetime",
"flate2",
"inferno",
"memmap2",
diff --git a/crates/typst-cli/Cargo.toml b/crates/typst-cli/Cargo.toml
index 0da03afc..5fcb2797 100644
--- a/crates/typst-cli/Cargo.toml
+++ b/crates/typst-cli/Cargo.toml
@@ -29,6 +29,7 @@ comemo = "0.3"
ecow = "0.1.1"
dirs = "5"
flate2 = "1"
+filetime = "0.2"
inferno = "0.11.15"
memmap2 = "0.7"
notify = "6"
diff --git a/crates/typst-cli/src/compile.rs b/crates/typst-cli/src/compile.rs
index 49a6b6eb..2962355e 100644
--- a/crates/typst-cli/src/compile.rs
+++ b/crates/typst-cli/src/compile.rs
@@ -75,8 +75,7 @@ pub fn compile_once(
Status::Compiling.print(command).unwrap();
}
- // Reset everything and ensure that the main file is present.
- world.reset();
+ // Ensure that the main file is present.
world.source(world.main()).map_err(|err| err.to_string())?;
let mut tracer = Tracer::new();
diff --git a/crates/typst-cli/src/watch.rs b/crates/typst-cli/src/watch.rs
index f53cb5fd..412c2062 100644
--- a/crates/typst-cli/src/watch.rs
+++ b/crates/typst-cli/src/watch.rs
@@ -70,6 +70,9 @@ pub fn watch(mut command: CompileCommand) -> StrResult<()> {
.map(ToOwned::to_owned)
.collect();
+ // Reset all dependencies.
+ world.reset();
+
// Recompile.
compile_once(&mut world, &mut command, true)?;
comemo::evict(10);
diff --git a/crates/typst-cli/src/world.rs b/crates/typst-cli/src/world.rs
index cfbe3791..bd9ef414 100644
--- a/crates/typst-cli/src/world.rs
+++ b/crates/typst-cli/src/world.rs
@@ -1,4 +1,4 @@
-use std::cell::{OnceCell, RefCell, RefMut};
+use std::cell::{Cell, OnceCell, RefCell, RefMut};
use std::collections::HashMap;
use std::fs;
use std::hash::Hash;
@@ -6,6 +6,7 @@ use std::path::{Path, PathBuf};
use chrono::{DateTime, Datelike, Local};
use comemo::Prehashed;
+use filetime::FileTime;
use same_file::Handle;
use siphasher::sip128::{Hasher128, SipHasher13};
use typst::diag::{FileError, FileResult, StrResult};
@@ -37,7 +38,7 @@ pub struct SystemWorld {
/// be used in conjunction with `paths`.
hashes: RefCell<HashMap<FileId, FileResult<PathHash>>>,
/// Maps canonical path hashes to source files and buffers.
- paths: RefCell<HashMap<PathHash, PathSlot>>,
+ slots: RefCell<HashMap<PathHash, PathSlot>>,
/// The current datetime if requested. This is stored here to ensure it is
/// always the same within one compilation. Reset between compilations.
now: OnceCell<DateTime<Local>>,
@@ -78,7 +79,7 @@ impl SystemWorld {
book: Prehashed::new(searcher.book),
fonts: searcher.fonts,
hashes: RefCell::default(),
- paths: RefCell::default(),
+ slots: RefCell::default(),
now: OnceCell::new(),
})
}
@@ -100,13 +101,19 @@ impl SystemWorld {
/// Return all paths the last compilation depended on.
pub fn dependencies(&mut self) -> impl Iterator<Item = &Path> {
- self.paths.get_mut().values().map(|slot| slot.system_path.as_path())
+ self.slots
+ .get_mut()
+ .values()
+ .filter(|slot| slot.accessed())
+ .map(|slot| slot.path.as_path())
}
/// Reset the compilation state in preparation of a new compilation.
pub fn reset(&mut self) {
self.hashes.borrow_mut().clear();
- self.paths.borrow_mut().clear();
+ for slot in self.slots.borrow_mut().values_mut() {
+ slot.reset();
+ }
self.now.take();
}
@@ -185,15 +192,8 @@ impl SystemWorld {
})
.clone()?;
- Ok(RefMut::map(self.paths.borrow_mut(), |paths| {
- paths.entry(hash).or_insert_with(|| PathSlot {
- id,
- // This will only trigger if the `or_insert_with` above also
- // triggered.
- system_path,
- source: OnceCell::new(),
- buffer: OnceCell::new(),
- })
+ Ok(RefMut::map(self.slots.borrow_mut(), |paths| {
+ paths.entry(hash).or_insert_with(|| PathSlot::new(id, system_path))
}))
}
}
@@ -205,28 +205,110 @@ struct PathSlot {
/// The slot's canonical file id.
id: FileId,
/// The slot's path on the system.
- system_path: PathBuf,
- /// The lazily loaded source file for a path hash.
- source: OnceCell<FileResult<Source>>,
- /// The lazily loaded buffer for a path hash.
- buffer: OnceCell<FileResult<Bytes>>,
+ path: PathBuf,
+ /// The lazily loaded and incrementally updated source file.
+ source: SlotCell<Source>,
+ /// The lazily loaded raw byte buffer.
+ file: SlotCell<Bytes>,
}
impl PathSlot {
+ /// Create a new path slot.
+ fn new(id: FileId, path: PathBuf) -> Self {
+ Self {
+ id,
+ path,
+ file: SlotCell::new(),
+ source: SlotCell::new(),
+ }
+ }
+
+ /// Whether the file was accessed in the ongoing compilation.
+ fn accessed(&self) -> bool {
+ self.source.accessed() || self.file.accessed()
+ }
+
+ /// Marks the file as not yet accessed in preparation of the next
+ /// compilation.
+ fn reset(&self) {
+ self.source.reset();
+ self.file.reset();
+ }
+
+ /// Retrieve the source for this file.
fn source(&self) -> FileResult<Source> {
- self.source
- .get_or_init(|| {
- let buf = read(&self.system_path)?;
- let text = decode_utf8(buf)?;
- Ok(Source::new(self.id, text))
- })
- .clone()
+ self.source.get_or_init(&self.path, |data, prev| {
+ let text = decode_utf8(&data)?;
+ if let Some(mut prev) = prev {
+ prev.replace(text);
+ Ok(prev)
+ } else {
+ Ok(Source::new(self.id, text.into()))
+ }
+ })
}
+ /// Retrieve the file's bytes.
fn file(&self) -> FileResult<Bytes> {
- self.buffer
- .get_or_init(|| read(&self.system_path).map(Bytes::from))
- .clone()
+ self.file.get_or_init(&self.path, |data, _| Ok(data.into()))
+ }
+}
+
+/// Lazily processes data for a file.
+struct SlotCell<T> {
+ data: RefCell<Option<FileResult<T>>>,
+ refreshed: Cell<FileTime>,
+ accessed: Cell<bool>,
+}
+
+impl<T: Clone> SlotCell<T> {
+ /// Creates a new, empty cell.
+ fn new() -> Self {
+ Self {
+ data: RefCell::new(None),
+ refreshed: Cell::new(FileTime::zero()),
+ accessed: Cell::new(false),
+ }
+ }
+
+ /// Whether the cell was accessed in the ongoing compilation.
+ fn accessed(&self) -> bool {
+ self.accessed.get()
+ }
+
+ /// Marks the cell as not yet accessed in preparation of the next
+ /// compilation.
+ fn reset(&self) {
+ self.accessed.set(false);
+ }
+
+ /// Gets the contents of the cell or initialize them.
+ fn get_or_init(
+ &self,
+ path: &Path,
+ f: impl FnOnce(Vec<u8>, Option<T>) -> FileResult<T>,
+ ) -> FileResult<T> {
+ let mut borrow = self.data.borrow_mut();
+ if let Some(data) = &*borrow {
+ if self.accessed.replace(true) || self.current(path) {
+ return data.clone();
+ }
+ }
+
+ self.accessed.set(true);
+ self.refreshed.set(FileTime::now());
+ let prev = borrow.take().and_then(Result::ok);
+ let value = read(path).and_then(|data| f(data, prev));
+ *borrow = Some(value.clone());
+ value
+ }
+
+ /// Whether the cell contents are still up to date with the file system.
+ fn current(&self, path: &Path) -> bool {
+ fs::metadata(path).map_or(false, |meta| {
+ let modified = FileTime::from_last_modification_time(&meta);
+ modified < self.refreshed.get()
+ })
}
}
@@ -255,12 +337,7 @@ fn read(path: &Path) -> FileResult<Vec<u8>> {
}
/// Decode UTF-8 with an optional BOM.
-fn decode_utf8(buf: Vec<u8>) -> FileResult<String> {
- Ok(if buf.starts_with(b"\xef\xbb\xbf") {
- // Remove UTF-8 BOM.
- std::str::from_utf8(&buf[3..])?.into()
- } else {
- // Assume UTF-8.
- String::from_utf8(buf)?
- })
+fn decode_utf8(buf: &[u8]) -> FileResult<&str> {
+ // Remove UTF-8 BOM.
+ Ok(std::str::from_utf8(buf.strip_prefix(b"\xef\xbb\xbf").unwrap_or(buf))?)
}
diff --git a/crates/typst-syntax/src/source.rs b/crates/typst-syntax/src/source.rs
index 56b27195..f3392889 100644
--- a/crates/typst-syntax/src/source.rs
+++ b/crates/typst-syntax/src/source.rs
@@ -69,13 +69,47 @@ impl Source {
}
/// Fully replace the source text.
- pub fn replace(&mut self, text: String) {
- let inner = Arc::make_mut(&mut self.0);
- inner.text = Prehashed::new(text);
- inner.lines = lines(&inner.text);
- let mut root = parse(&inner.text);
- root.numberize(inner.id, Span::FULL).unwrap();
- inner.root = Prehashed::new(root);
+ ///
+ /// This performs a naive (suffix/prefix-based) diff of the old and new text
+ /// to produce the smallest single edit that transforms old into new and
+ /// then calls [`edit`](Self::edit) with it.
+ ///
+ /// Returns the range in the new source that was ultimately reparsed.
+ pub fn replace(&mut self, new: &str) -> Range<usize> {
+ let old = self.text();
+
+ let mut prefix = old
+ .as_bytes()
+ .iter()
+ .zip(new.as_bytes())
+ .take_while(|(x, y)| x == y)
+ .count();
+
+ if prefix == old.len() && prefix == new.len() {
+ return 0..0;
+ }
+
+ while !old.is_char_boundary(prefix) || !new.is_char_boundary(prefix) {
+ prefix -= 1;
+ }
+
+ let mut suffix = old[prefix..]
+ .as_bytes()
+ .iter()
+ .zip(new[prefix..].as_bytes())
+ .rev()
+ .take_while(|(x, y)| x == y)
+ .count();
+
+ while !old.is_char_boundary(old.len() - suffix)
+ || !new.is_char_boundary(new.len() - suffix)
+ {
+ suffix += 1;
+ }
+
+ let replace = prefix..old.len() - suffix;
+ let with = &new[prefix..new.len() - suffix];
+ self.edit(replace, with)
}
/// Edit the source file by replacing the given range.
@@ -382,11 +416,21 @@ mod tests {
// tested separately.
#[track_caller]
fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
- let mut source = Source::detached(prev);
- let result = Source::detached(after);
- source.edit(range, with);
- assert_eq!(source.text(), result.text());
- assert_eq!(source.0.lines, result.0.lines);
+ let reference = Source::detached(after);
+
+ let mut edited = Source::detached(prev);
+ edited.edit(range.clone(), with);
+ assert_eq!(edited.text(), reference.text());
+ assert_eq!(edited.0.lines, reference.0.lines);
+
+ let mut replaced = Source::detached(prev);
+ replaced.replace(&{
+ let mut s = prev.to_string();
+ s.replace_range(range, with);
+ s
+ });
+ assert_eq!(replaced.text(), reference.text());
+ assert_eq!(replaced.0.lines, reference.0.lines);
}
// Test inserting at the beginning.