diff options
Diffstat (limited to 'src/syntax')
| -rw-r--r-- | src/syntax/lexer.rs | 8 | ||||
| -rw-r--r-- | src/syntax/mod.rs | 4 | ||||
| -rw-r--r-- | src/syntax/node.rs | 43 | ||||
| -rw-r--r-- | src/syntax/parser.rs | 10 | ||||
| -rw-r--r-- | src/syntax/reparser.rs | 2 | ||||
| -rw-r--r-- | src/syntax/source.rs | 179 | ||||
| -rw-r--r-- | src/syntax/span.rs | 52 |
7 files changed, 138 insertions, 160 deletions
diff --git a/src/syntax/lexer.rs b/src/syntax/lexer.rs index ae4462d9..d95b5b7b 100644 --- a/src/syntax/lexer.rs +++ b/src/syntax/lexer.rs @@ -3,7 +3,7 @@ use unicode_ident::{is_xid_continue, is_xid_start}; use unicode_segmentation::UnicodeSegmentation; use unscanny::Scanner; -use super::{ErrorPos, SyntaxKind}; +use super::SyntaxKind; /// Splits up a string of source code into tokens. #[derive(Clone)] @@ -16,7 +16,7 @@ pub(super) struct Lexer<'s> { /// Whether the last token contained a newline. newline: bool, /// An error for the last token. - error: Option<(EcoString, ErrorPos)>, + error: Option<EcoString>, } /// What kind of tokens to emit. @@ -69,7 +69,7 @@ impl<'s> Lexer<'s> { } /// Take out the last error, if any. - pub fn take_error(&mut self) -> Option<(EcoString, ErrorPos)> { + pub fn take_error(&mut self) -> Option<EcoString> { self.error.take() } } @@ -77,7 +77,7 @@ impl<'s> Lexer<'s> { impl Lexer<'_> { /// Construct a full-positioned syntax error. fn error(&mut self, message: impl Into<EcoString>) -> SyntaxKind { - self.error = Some((message.into(), ErrorPos::Full)); + self.error = Some(message.into()); SyntaxKind::Error } } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index d4aee9d3..1ce1e4c0 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -12,9 +12,9 @@ mod span; pub use self::kind::SyntaxKind; pub use self::lexer::{is_ident, is_newline}; -pub use self::node::{ErrorPos, LinkedChildren, LinkedNode, SyntaxNode}; +pub use self::node::{LinkedChildren, LinkedNode, SyntaxNode}; pub use self::parser::{parse, parse_code}; -pub use self::source::{Source, SourceId}; +pub use self::source::Source; pub use self::span::{Span, Spanned}; pub(crate) use self::lexer::{is_id_continue, is_id_start}; diff --git a/src/syntax/node.rs b/src/syntax/node.rs index d2adc13a..6a66416d 100644 --- a/src/syntax/node.rs +++ b/src/syntax/node.rs @@ -6,8 +6,9 @@ use std::sync::Arc; use ecow::EcoString; use super::ast::AstNode; -use super::{SourceId, Span, SyntaxKind}; +use super::{Span, SyntaxKind}; use crate::diag::SourceError; +use crate::file::FileId; /// A node in the untyped syntax tree. #[derive(Clone, Eq, PartialEq, Hash)] @@ -36,12 +37,8 @@ impl SyntaxNode { } /// Create a new error node. - pub fn error( - message: impl Into<EcoString>, - text: impl Into<EcoString>, - pos: ErrorPos, - ) -> Self { - Self(Repr::Error(Arc::new(ErrorNode::new(message, text, pos)))) + pub fn error(message: impl Into<EcoString>, text: impl Into<EcoString>) -> Self { + Self(Repr::Error(Arc::new(ErrorNode::new(message, text)))) } /// The type of the node. @@ -145,7 +142,7 @@ impl SyntaxNode { } if let Repr::Error(error) = &self.0 { - vec![SourceError::new(error.span, error.message.clone()).with_pos(error.pos)] + vec![SourceError::new(error.span, error.message.clone())] } else { self.children() .filter(|node| node.erroneous()) @@ -186,14 +183,14 @@ impl SyntaxNode { /// Convert the child to an error. pub(super) fn convert_to_error(&mut self, message: impl Into<EcoString>) { let text = std::mem::take(self).into_text(); - *self = SyntaxNode::error(message, text, ErrorPos::Full); + *self = SyntaxNode::error(message, text); } /// Assign spans to each node. #[tracing::instrument(skip_all)] pub(super) fn numberize( &mut self, - id: SourceId, + id: FileId, within: Range<u64>, ) -> NumberingResult { if within.start >= within.end { @@ -285,7 +282,7 @@ impl Debug for SyntaxNode { impl Default for SyntaxNode { fn default() -> Self { - Self::error("", "", ErrorPos::Full) + Self::error("", "") } } @@ -381,7 +378,7 @@ impl InnerNode { /// a `range` of its children. fn numberize( &mut self, - id: SourceId, + id: FileId, range: Option<Range<usize>>, within: Range<u64>, ) -> NumberingResult { @@ -492,7 +489,7 @@ impl InnerNode { // Try to renumber. let within = start_number..end_number; - let id = self.span.source(); + let id = self.span.id(); if self.numberize(id, Some(renumber), within).is_ok() { return Ok(()); } @@ -540,23 +537,16 @@ struct ErrorNode { message: EcoString, /// The source text of the node. text: EcoString, - /// Where in the node an error should be annotated. - pos: ErrorPos, /// The node's span. span: Span, } impl ErrorNode { /// Create new error node. - fn new( - message: impl Into<EcoString>, - text: impl Into<EcoString>, - pos: ErrorPos, - ) -> Self { + fn new(message: impl Into<EcoString>, text: impl Into<EcoString>) -> Self { Self { message: message.into(), text: text.into(), - pos, span: Span::detached(), } } @@ -573,17 +563,6 @@ impl Debug for ErrorNode { } } -/// Where in a node an error should be annotated, -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub enum ErrorPos { - /// Over the full width of the node. - Full, - /// At the start of the node. - Start, - /// At the end of the node. - End, -} - /// A syntax node in a context. /// /// Knows its exact offset in the file and provides access to its diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs index 7d057ab9..54670df5 100644 --- a/src/syntax/parser.rs +++ b/src/syntax/parser.rs @@ -4,7 +4,7 @@ use std::ops::Range; use ecow::{eco_format, EcoString}; use unicode_math_class::MathClass; -use super::{ast, is_newline, ErrorPos, LexMode, Lexer, SyntaxKind, SyntaxNode}; +use super::{ast, is_newline, LexMode, Lexer, SyntaxKind, SyntaxNode}; /// Parse a source file. pub fn parse(text: &str) -> SyntaxNode { @@ -1560,8 +1560,8 @@ impl<'s> Parser<'s> { fn save(&mut self) { let text = self.current_text(); if self.at(SyntaxKind::Error) { - let (message, pos) = self.lexer.take_error().unwrap(); - self.nodes.push(SyntaxNode::error(message, text, pos)); + let message = self.lexer.take_error().unwrap(); + self.nodes.push(SyntaxNode::error(message, text)); } else { self.nodes.push(SyntaxNode::leaf(self.current, text)); } @@ -1608,14 +1608,14 @@ impl<'s> Parser<'s> { .map_or(true, |child| child.kind() != SyntaxKind::Error) { let message = eco_format!("expected {}", thing); - self.nodes.push(SyntaxNode::error(message, "", ErrorPos::Full)); + self.nodes.push(SyntaxNode::error(message, "")); } self.skip(); } fn expected_at(&mut self, m: Marker, thing: &str) { let message = eco_format!("expected {}", thing); - let error = SyntaxNode::error(message, "", ErrorPos::Full); + let error = SyntaxNode::error(message, ""); self.nodes.insert(m.0, error); } diff --git a/src/syntax/reparser.rs b/src/syntax/reparser.rs index 9e2b0a1b..a4186fa7 100644 --- a/src/syntax/reparser.rs +++ b/src/syntax/reparser.rs @@ -19,7 +19,7 @@ pub fn reparse( replacement_len: usize, ) -> Range<usize> { try_reparse(text, replaced, replacement_len, None, root, 0).unwrap_or_else(|| { - let id = root.span().source(); + let id = root.span().id(); *root = parse(text); root.numberize(id, Span::FULL).unwrap(); 0..text.len() diff --git a/src/syntax/source.rs b/src/syntax/source.rs index 277271db..6eb6fd5d 100644 --- a/src/syntax/source.rs +++ b/src/syntax/source.rs @@ -3,105 +3,107 @@ use std::fmt::{self, Debug, Formatter}; use std::hash::{Hash, Hasher}; use std::ops::Range; -use std::path::{Path, PathBuf}; +use std::sync::Arc; use comemo::Prehashed; -use unscanny::Scanner; use super::ast::Markup; use super::reparser::reparse; use super::{is_newline, parse, LinkedNode, Span, SyntaxNode}; use crate::diag::SourceResult; -use crate::util::{PathExt, StrExt}; +use crate::file::FileId; +use crate::util::StrExt; /// A source file. /// /// All line and column indices start at zero, just like byte indices. Only for /// user-facing display, you should add 1 to them. +/// +/// Values of this type are cheap to clone and hash. #[derive(Clone)] -pub struct Source { - id: SourceId, - path: PathBuf, - lines: Vec<Line>, +pub struct Source(Arc<Repr>); + +/// The internal representation. +#[derive(Clone)] +struct Repr { + id: FileId, text: Prehashed<String>, root: Prehashed<SyntaxNode>, + lines: Vec<Line>, } impl Source { /// Create a new source file. + /// + /// The path must be canonical, so that the same source file has the same + /// id even if accessed through different paths. #[tracing::instrument(skip_all)] - pub fn new(id: SourceId, path: &Path, text: String) -> Self { + pub fn new(id: FileId, text: String) -> Self { let mut root = parse(&text); root.numberize(id, Span::FULL).unwrap(); - Self { + Self(Arc::new(Repr { id, - path: path.normalize(), lines: lines(&text), text: Prehashed::new(text), root: Prehashed::new(root), - } + })) } /// Create a source file without a real id and path, usually for testing. pub fn detached(text: impl Into<String>) -> Self { - Self::new(SourceId::detached(), Path::new(""), text.into()) + Self::new(FileId::detached(), text.into()) } /// Create a source file with the same synthetic span for all nodes. pub fn synthesized(text: String, span: Span) -> Self { let mut root = parse(&text); root.synthesize(span); - Self { - id: SourceId::detached(), - path: PathBuf::new(), + Self(Arc::new(Repr { + id: FileId::detached(), lines: lines(&text), text: Prehashed::new(text), root: Prehashed::new(root), - } + })) } /// The root node of the file's untyped syntax tree. pub fn root(&self) -> &SyntaxNode { - &self.root + &self.0.root } /// The root node of the file's typed abstract syntax tree. pub fn ast(&self) -> SourceResult<Markup> { - let errors = self.root.errors(); + let errors = self.root().errors(); if errors.is_empty() { - Ok(self.root.cast().expect("root node must be markup")) + Ok(self.root().cast().expect("root node must be markup")) } else { Err(Box::new(errors)) } } /// The id of the source file. - pub fn id(&self) -> SourceId { - self.id - } - - /// The normalized path to the source file. - pub fn path(&self) -> &Path { - &self.path + pub fn id(&self) -> FileId { + self.0.id } /// The whole source as a string slice. pub fn text(&self) -> &str { - &self.text + &self.0.text } /// Slice out the part of the source code enclosed by the range. pub fn get(&self, range: Range<usize>) -> Option<&str> { - self.text.get(range) + self.text().get(range) } /// Fully replace the source text. pub fn replace(&mut self, text: String) { - self.text = Prehashed::new(text); - self.lines = lines(&self.text); - let mut root = parse(&self.text); - root.numberize(self.id, Span::FULL).unwrap(); - self.root = Prehashed::new(root); + let inner = Arc::make_mut(&mut self.0); + inner.text = Prehashed::new(text); + inner.lines = lines(&inner.text); + let mut root = parse(&inner.text); + root.numberize(inner.id, Span::FULL).unwrap(); + inner.root = Prehashed::new(root); } /// Edit the source file by replacing the given range. @@ -112,72 +114,70 @@ impl Source { #[track_caller] pub fn edit(&mut self, replace: Range<usize>, with: &str) -> Range<usize> { let start_byte = replace.start; - let start_utf16 = self.byte_to_utf16(replace.start).unwrap(); - self.text.update(|text| text.replace_range(replace.clone(), with)); + let start_utf16 = self.byte_to_utf16(start_byte).unwrap(); + let line = self.byte_to_line(start_byte).unwrap(); + + let inner = Arc::make_mut(&mut self.0); + + // Update the text itself. + inner.text.update(|text| text.replace_range(replace.clone(), with)); // Remove invalidated line starts. - let line = self.byte_to_line(start_byte).unwrap(); - self.lines.truncate(line + 1); + inner.lines.truncate(line + 1); // Handle adjoining of \r and \n. - if self.text[..start_byte].ends_with('\r') && with.starts_with('\n') { - self.lines.pop(); + if inner.text[..start_byte].ends_with('\r') && with.starts_with('\n') { + inner.lines.pop(); } // Recalculate the line starts after the edit. - self.lines - .extend(lines_from(start_byte, start_utf16, &self.text[start_byte..])); + inner.lines.extend(lines_from( + start_byte, + start_utf16, + &inner.text[start_byte..], + )); // Incrementally reparse the replaced range. - self.root - .update(|root| reparse(root, &self.text, replace, with.len())) + inner + .root + .update(|root| reparse(root, &inner.text, replace, with.len())) } /// Get the length of the file in UTF-8 encoded bytes. pub fn len_bytes(&self) -> usize { - self.text.len() + self.text().len() } /// Get the length of the file in UTF-16 code units. pub fn len_utf16(&self) -> usize { - let last = self.lines.last().unwrap(); - last.utf16_idx + self.text[last.byte_idx..].len_utf16() + let last = self.0.lines.last().unwrap(); + last.utf16_idx + self.0.text[last.byte_idx..].len_utf16() } /// Get the length of the file in lines. pub fn len_lines(&self) -> usize { - self.lines.len() + self.0.lines.len() } /// Find the node with the given span. /// /// Returns `None` if the span does not point into this source file. pub fn find(&self, span: Span) -> Option<LinkedNode<'_>> { - LinkedNode::new(&self.root).find(span) - } - - /// Map a span that points into this source file to a byte range. - /// - /// Panics if the span does not point into this source file. - #[track_caller] - pub fn range(&self, span: Span) -> Range<usize> { - self.find(span) - .expect("span does not point into this source file") - .range() + LinkedNode::new(self.root()).find(span) } /// Return the index of the UTF-16 code unit at the byte index. pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> { let line_idx = self.byte_to_line(byte_idx)?; - let line = self.lines.get(line_idx)?; - let head = self.text.get(line.byte_idx..byte_idx)?; + let line = self.0.lines.get(line_idx)?; + let head = self.0.text.get(line.byte_idx..byte_idx)?; Some(line.utf16_idx + head.len_utf16()) } /// Return the index of the line that contains the given byte index. pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> { - (byte_idx <= self.text.len()).then(|| { - match self.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) { + (byte_idx <= self.0.text.len()).then(|| { + match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) { Ok(i) => i, Err(i) => i - 1, } @@ -197,33 +197,33 @@ impl Source { /// Return the byte index at the UTF-16 code unit. pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> { - let line = self.lines.get( - match self.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) { + let line = self.0.lines.get( + match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) { Ok(i) => i, Err(i) => i - 1, }, )?; let mut k = line.utf16_idx; - for (i, c) in self.text[line.byte_idx..].char_indices() { + for (i, c) in self.0.text[line.byte_idx..].char_indices() { if k >= utf16_idx { return Some(line.byte_idx + i); } k += c.len_utf16(); } - (k == utf16_idx).then_some(self.text.len()) + (k == utf16_idx).then_some(self.0.text.len()) } /// Return the byte position at which the given line starts. pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> { - self.lines.get(line_idx).map(|line| line.byte_idx) + self.0.lines.get(line_idx).map(|line| line.byte_idx) } /// Return the range which encloses the given line. pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> { let start = self.line_to_byte(line_idx)?; - let end = self.line_to_byte(line_idx + 1).unwrap_or(self.text.len()); + let end = self.line_to_byte(line_idx + 1).unwrap_or(self.0.text.len()); Some(start..end) } @@ -248,42 +248,21 @@ impl Source { impl Debug for Source { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "Source({})", self.path.display()) + write!(f, "Source({})", self.id().path().display()) } } impl Hash for Source { fn hash<H: Hasher>(&self, state: &mut H) { - self.id.hash(state); - self.path.hash(state); - self.text.hash(state); - self.root.hash(state); + self.0.id.hash(state); + self.0.text.hash(state); + self.0.root.hash(state); } } -/// A unique identifier for a loaded source file. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub struct SourceId(u16); - -impl SourceId { - /// Create a new source id for a file that is not part of the world. - pub const fn detached() -> Self { - Self(u16::MAX) - } - - /// Whether the source id is the detached. - pub const fn is_detached(self) -> bool { - self.0 == Self::detached().0 - } - - /// Create a source id from a number. - pub const fn from_u16(v: u16) -> Self { - Self(v) - } - - /// Extract the underlying number. - pub const fn as_u16(self) -> u16 { - self.0 +impl AsRef<str> for Source { + fn as_ref(&self) -> &str { + self.text() } } @@ -309,7 +288,7 @@ fn lines_from( utf16_offset: usize, text: &str, ) -> impl Iterator<Item = Line> + '_ { - let mut s = Scanner::new(text); + let mut s = unscanny::Scanner::new(text); let mut utf16_idx = utf16_offset; std::iter::from_fn(move || { @@ -340,7 +319,7 @@ mod tests { fn test_source_file_new() { let source = Source::detached(TEST); assert_eq!( - source.lines, + source.0.lines, [ Line { byte_idx: 0, utf16_idx: 0 }, Line { byte_idx: 7, utf16_idx: 6 }, @@ -421,8 +400,8 @@ mod tests { let mut source = Source::detached(prev); let result = Source::detached(after); source.edit(range, with); - assert_eq!(source.text, result.text); - assert_eq!(source.lines, result.lines); + assert_eq!(source.text(), result.text()); + assert_eq!(source.0.lines, result.0.lines); } // Test inserting at the beginning. diff --git a/src/syntax/span.rs b/src/syntax/span.rs index 91e0a3cf..5c220252 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -2,13 +2,15 @@ use std::fmt::{self, Debug, Formatter}; use std::num::NonZeroU64; use std::ops::Range; -use super::SourceId; +use super::Source; +use crate::file::FileId; +use crate::World; /// A unique identifier for a syntax node. /// /// This is used throughout the compiler to track which source section an error -/// or element stems from. Can be [mapped back](super::Source::range) to a byte -/// range for user facing display. +/// or element stems from. Can be [mapped back](Self::range) to a byte range for +/// user facing display. /// /// During editing, the span values stay mostly stable, even for nodes behind an /// insertion. This is not true for simple ranges as they would shift. Spans can @@ -39,7 +41,7 @@ impl Span { /// /// Panics if the `number` is not contained in `FULL`. #[track_caller] - pub const fn new(id: SourceId, number: u64) -> Self { + pub const fn new(id: FileId, number: u64) -> Self { assert!( Self::FULL.start <= number && number < Self::FULL.end, "span number outside valid range" @@ -50,12 +52,12 @@ impl Span { /// A span that does not point into any source file. pub const fn detached() -> Self { - Self::pack(SourceId::detached(), Self::DETACHED) + Self::pack(FileId::detached(), Self::DETACHED) } /// Pack the components into a span. #[track_caller] - const fn pack(id: SourceId, number: u64) -> Span { + const fn pack(id: FileId, number: u64) -> Span { let bits = ((id.as_u16() as u64) << Self::BITS) | number; match NonZeroU64::new(bits) { Some(v) => Self(v), @@ -63,20 +65,38 @@ impl Span { } } - /// Whether the span is detached. - pub const fn is_detached(self) -> bool { - self.source().is_detached() - } - /// The id of the source file the span points into. - pub const fn source(self) -> SourceId { - SourceId::from_u16((self.0.get() >> Self::BITS) as u16) + pub const fn id(self) -> FileId { + FileId::from_u16((self.0.get() >> Self::BITS) as u16) } /// The unique number of the span within its source file. pub const fn number(self) -> u64 { self.0.get() & ((1 << Self::BITS) - 1) } + + /// Whether the span is detached. + pub const fn is_detached(self) -> bool { + self.id().is_detached() + } + + /// Get the byte range for this span. + #[track_caller] + pub fn range(self, world: &dyn World) -> Range<usize> { + let source = world + .source(self.id()) + .expect("span does not point into any source file"); + self.range_in(&source) + } + + /// Get the byte range for this span in the given source file. + #[track_caller] + pub fn range_in(self, source: &Source) -> Range<usize> { + source + .find(self) + .expect("span does not point into this source file") + .range() + } } /// A value with a span locating it in the source code. @@ -116,13 +136,13 @@ impl<T: Debug> Debug for Spanned<T> { #[cfg(test)] mod tests { - use super::{SourceId, Span}; + use super::{FileId, Span}; #[test] fn test_span_encoding() { - let id = SourceId::from_u16(5); + let id = FileId::from_u16(5); let span = Span::new(id, 10); - assert_eq!(span.source(), id); + assert_eq!(span.id(), id); assert_eq!(span.number(), 10); } } |
