summaryrefslogtreecommitdiff
path: root/src/parse
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2021-07-31 22:59:14 +0200
committerLaurenz <laurmaedje@gmail.com>2021-08-01 00:00:36 +0200
commit3c92bad9a7cd6b880de197806443ffcce2cac9d8 (patch)
tree1faf79c66e23bc37711af16ad690a9878e28d348 /src/parse
parentfbd3d191137aac8188ab8c6503d257d65d873972 (diff)
Pretty-printed diagnostics with traceback
Diffstat (limited to 'src/parse')
-rw-r--r--src/parse/lines.rs145
-rw-r--r--src/parse/mod.rs8
-rw-r--r--src/parse/parser.rs20
-rw-r--r--src/parse/resolve.rs17
-rw-r--r--src/parse/scanner.rs23
-rw-r--r--src/parse/tokens.rs9
6 files changed, 37 insertions, 185 deletions
diff --git a/src/parse/lines.rs b/src/parse/lines.rs
deleted file mode 100644
index 2d97a25c..00000000
--- a/src/parse/lines.rs
+++ /dev/null
@@ -1,145 +0,0 @@
-// FIXME:
-// Both `LineMap::location` and `search_column` can lead to quadratic compile
-// times for very long lines. We probably need some smart acceleration structure
-// to determine columns.
-
-use super::Scanner;
-use crate::syntax::{Location, Pos};
-
-/// Enables conversion of byte position to locations.
-pub struct LineMap<'s> {
- src: &'s str,
- line_starts: Vec<Pos>,
-}
-
-impl<'s> LineMap<'s> {
- /// Create a new line map for a source string.
- pub fn new(src: &'s str) -> Self {
- let mut line_starts = vec![Pos::ZERO];
- let mut s = Scanner::new(src);
-
- while let Some(c) = s.eat_merging_crlf() {
- if is_newline(c) {
- line_starts.push(s.index().into());
- }
- }
-
- Self { src, line_starts }
- }
-
- /// Convert a byte position to a location.
- pub fn location(&self, pos: Pos) -> Option<Location> {
- // Find the line which contains the position.
- let line_index = match self.line_starts.binary_search(&pos) {
- Ok(i) => i,
- Err(i) => i - 1,
- };
-
- let start = self.line_starts.get(line_index)?;
- let head = self.src.get(start.to_usize() .. pos.to_usize())?;
-
- // TODO: What about tabs?
- let column_index = head.chars().count();
-
- Some(Location {
- line: 1 + line_index as u32,
- column: 1 + column_index as u32,
- })
- }
-
- /// Convert a location to a byte position.
- pub fn pos(&self, location: Location) -> Option<Pos> {
- // Determine the boundaries of the line.
- let line_idx = location.line.checked_sub(1)? as usize;
- let line_start = *self.line_starts.get(line_idx)?;
- let line_end = self
- .line_starts
- .get(location.line as usize)
- .map_or(self.src.len(), |pos| pos.to_usize());
-
- let line = self.src.get(line_start.to_usize() .. line_end)?;
-
- // Find the index in the line. For the first column, the index is always
- // zero. For other columns, we have to look at which byte the char
- // directly before the column in question ends. We can't do
- // `nth(column_idx)` directly since the column may be behind the last
- // char.
- let column_idx = location.column.checked_sub(1)? as usize;
- let line_offset = if let Some(prev_idx) = column_idx.checked_sub(1) {
- // TODO: What about tabs?
- let (idx, prev) = line.char_indices().nth(prev_idx)?;
- idx + prev.len_utf8()
- } else {
- 0
- };
-
- Some(line_start + line_offset)
- }
-}
-
-/// Count how many column the string would fill.
-pub fn count_columns(src: &str) -> usize {
- let mut column = 0;
- for c in src.chars().rev() {
- if is_newline(c) {
- break;
- } else if c == '\t' {
- // TODO: How many columns per tab?
- column += 2;
- } else {
- column += 1;
- }
- }
- column
-}
-
-/// Whether this character denotes a newline.
-#[inline]
-pub fn is_newline(character: char) -> bool {
- matches!(
- character,
- // Line Feed, Vertical Tab, Form Feed, Carriage Return.
- '\n' | '\x0B' | '\x0C' | '\r' |
- // Next Line, Line Separator, Paragraph Separator.
- '\u{0085}' | '\u{2028}' | '\u{2029}'
- )
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- const TEST: &str = "äbcde\nf💛g\r\nhi\rjkl";
-
- #[test]
- fn test_line_map_new() {
- let map = LineMap::new(TEST);
- assert_eq!(map.line_starts, vec![Pos(0), Pos(7), Pos(15), Pos(18)]);
- }
-
- #[test]
- fn test_line_map_location() {
- let map = LineMap::new(TEST);
- assert_eq!(map.location(Pos(0)), Some(Location::new(1, 1)));
- assert_eq!(map.location(Pos(2)), Some(Location::new(1, 2)));
- assert_eq!(map.location(Pos(6)), Some(Location::new(1, 6)));
- assert_eq!(map.location(Pos(7)), Some(Location::new(2, 1)));
- assert_eq!(map.location(Pos(8)), Some(Location::new(2, 2)));
- assert_eq!(map.location(Pos(12)), Some(Location::new(2, 3)));
- assert_eq!(map.location(Pos(21)), Some(Location::new(4, 4)));
- assert_eq!(map.location(Pos(22)), None);
- }
-
- #[test]
- fn test_line_map_pos() {
- fn assert_round_trip(map: &LineMap, pos: Pos) {
- assert_eq!(map.location(pos).and_then(|loc| map.pos(loc)), Some(pos));
- }
-
- let map = LineMap::new(TEST);
- assert_round_trip(&map, Pos(0));
- assert_round_trip(&map, Pos(7));
- assert_round_trip(&map, Pos(12));
- assert_round_trip(&map, Pos(21));
- }
-}
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index c103c342..f033e01f 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -1,12 +1,10 @@
//! Parsing and tokenization.
-mod lines;
mod parser;
mod resolve;
mod scanner;
mod tokens;
-pub use lines::*;
pub use parser::*;
pub use resolve::*;
pub use scanner::*;
@@ -15,13 +13,13 @@ pub use tokens::*;
use std::rc::Rc;
use crate::diag::TypResult;
-use crate::loading::FileId;
+use crate::source::SourceFile;
use crate::syntax::*;
use crate::util::EcoString;
/// Parse a string of source code.
-pub fn parse(file: FileId, src: &str) -> TypResult<SyntaxTree> {
- let mut p = Parser::new(file, src);
+pub fn parse(source: &SourceFile) -> TypResult<SyntaxTree> {
+ let mut p = Parser::new(source);
let tree = tree(&mut p);
let errors = p.finish();
if errors.is_empty() {
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 0238c8be..6b478780 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -1,15 +1,15 @@
use std::fmt::{self, Debug, Formatter};
use std::ops::Range;
-use super::{count_columns, TokenMode, Tokens};
+use super::{TokenMode, Tokens};
use crate::diag::Error;
-use crate::loading::FileId;
+use crate::source::SourceFile;
use crate::syntax::{Pos, Span, Token};
/// A convenient token-based parser.
pub struct Parser<'s> {
/// The id of the parsed file.
- file: FileId,
+ source: &'s SourceFile,
/// Parsing errors.
errors: Vec<Error>,
/// An iterator over the source tokens.
@@ -60,11 +60,11 @@ pub enum Group {
impl<'s> Parser<'s> {
/// Create a new parser for the source string.
- pub fn new(file: FileId, src: &'s str) -> Self {
- let mut tokens = Tokens::new(src, TokenMode::Markup);
+ pub fn new(source: &'s SourceFile) -> Self {
+ let mut tokens = Tokens::new(source.src(), TokenMode::Markup);
let next = tokens.next();
Self {
- file,
+ source,
errors: vec![],
tokens,
groups: vec![],
@@ -82,11 +82,7 @@ impl<'s> Parser<'s> {
/// Add an error with location and message.
pub fn error(&mut self, span: impl Into<Span>, message: impl Into<String>) {
- self.errors.push(Error {
- file: self.file,
- span: span.into(),
- message: message.into(),
- });
+ self.errors.push(Error::new(self.source.file(), span, message));
}
/// Eat the next token and add an error that it is not the expected `thing`.
@@ -324,7 +320,7 @@ impl<'s> Parser<'s> {
/// Determine the column for the given index in the source.
pub fn column(&self, index: usize) -> usize {
- count_columns(self.tokens.scanner().get(.. index))
+ self.source.pos_to_column(index.into()).unwrap()
}
/// The span from `start` to [`self.prev_end()`](Self::prev_end).
diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs
index f97d5383..7bd160f9 100644
--- a/src/parse/resolve.rs
+++ b/src/parse/resolve.rs
@@ -109,8 +109,11 @@ fn split_lines(text: &str) -> Vec<String> {
let mut line = String::new();
let mut lines = Vec::new();
- while let Some(c) = s.eat_merging_crlf() {
+ while let Some(c) = s.eat() {
if is_newline(c) {
+ if c == '\r' {
+ s.eat_if('\n');
+ }
lines.push(std::mem::take(&mut line));
} else {
line.push(c);
@@ -173,14 +176,10 @@ mod tests {
text: &str,
block: bool,
) {
- Span::without_cmp(|| {
- assert_eq!(resolve_raw(Span::ZERO, raw, backticks), RawNode {
- span: Span::ZERO,
- lang: lang.and_then(|id| Ident::new(id, 0)),
- text: text.into(),
- block,
- });
- });
+ let node = resolve_raw(Span::ZERO, raw, backticks);
+ assert_eq!(node.lang.as_deref(), lang);
+ assert_eq!(node.text, text);
+ assert_eq!(node.block, block);
}
// Just one backtick.
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
index 9ee7641c..bb827255 100644
--- a/src/parse/scanner.rs
+++ b/src/parse/scanner.rs
@@ -47,17 +47,6 @@ impl<'s> Scanner<'s> {
debug_assert_eq!(next, Some(c));
}
- /// Consume the next char, coalescing `\r\n` to just `\n`.
- #[inline]
- pub fn eat_merging_crlf(&mut self) -> Option<char> {
- if self.rest().starts_with("\r\n") {
- self.index += 2;
- Some('\n')
- } else {
- self.eat()
- }
- }
-
/// Eat chars while the condition is true.
#[inline]
pub fn eat_while<F>(&mut self, mut f: F) -> &'s str
@@ -168,3 +157,15 @@ impl Debug for Scanner<'_> {
write!(f, "Scanner({}|{})", self.eaten(), self.rest())
}
}
+
+/// Whether this character denotes a newline.
+#[inline]
+pub fn is_newline(character: char) -> bool {
+ matches!(
+ character,
+ // Line Feed, Vertical Tab, Form Feed, Carriage Return.
+ '\n' | '\x0B' | '\x0C' | '\r' |
+ // Next Line, Line Separator, Paragraph Separator.
+ '\u{0085}' | '\u{2028}' | '\u{2029}'
+ )
+}
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 356a2f96..9fd13ecc 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -198,13 +198,16 @@ impl<'s> Tokens<'s> {
// Count the number of newlines.
let mut newlines = 0;
- while let Some(c) = self.s.eat_merging_crlf() {
+ while let Some(c) = self.s.eat() {
if !c.is_whitespace() {
self.s.uneat();
break;
}
if is_newline(c) {
+ if c == '\r' {
+ self.s.eat_if('\n');
+ }
newlines += 1;
}
}
@@ -484,8 +487,8 @@ impl Debug for Tokens<'_> {
}
}
-fn keyword(id: &str) -> Option<Token<'static>> {
- Some(match id {
+fn keyword(ident: &str) -> Option<Token<'static>> {
+ Some(match ident {
"not" => Token::Not,
"and" => Token::And,
"or" => Token::Or,