Pretty-printed diagnostics with traceback

author: Laurenz <laurmaedje@gmail.com> 2021-07-31 22:59:14 +0200
committer: Laurenz <laurmaedje@gmail.com> 2021-08-01 00:00:36 +0200
commit: 3c92bad9a7cd6b880de197806443ffcce2cac9d8 (patch)
tree: 1faf79c66e23bc37711af16ad690a9878e28d348 /src/parse
parent: fbd3d191137aac8188ab8c6503d257d65d873972 (diff)
6 files changed, 37 insertions, 185 deletions
diff --git a/src/parse/lines.rs b/src/parse/lines.rs
deleted file mode 100644
index 2d97a25c..00000000
--- a/src/parse/lines.rs
+++ /dev/null
@@ -1,145 +0,0 @@
-// FIXME:
-// Both `LineMap::location` and `search_column` can lead to quadratic compile
-// times for very long lines. We probably need some smart acceleration structure
-// to determine columns.
-
-use super::Scanner;
-use crate::syntax::{Location, Pos};
-
-/// Enables conversion of byte position to locations.
-pub struct LineMap<'s> {
-    src: &'s str,
-    line_starts: Vec<Pos>,
-}
-
-impl<'s> LineMap<'s> {
-    /// Create a new line map for a source string.
-    pub fn new(src: &'s str) -> Self {
-        let mut line_starts = vec![Pos::ZERO];
-        let mut s = Scanner::new(src);
-
-        while let Some(c) = s.eat_merging_crlf() {
-            if is_newline(c) {
-                line_starts.push(s.index().into());
-            }
-        }
-
-        Self { src, line_starts }
-    }
-
-    /// Convert a byte position to a location.
-    pub fn location(&self, pos: Pos) -> Option<Location> {
-        // Find the line which contains the position.
-        let line_index = match self.line_starts.binary_search(&pos) {
-            Ok(i) => i,
-            Err(i) => i - 1,
-        };
-
-        let start = self.line_starts.get(line_index)?;
-        let head = self.src.get(start.to_usize() .. pos.to_usize())?;
-
-        // TODO: What about tabs?
-        let column_index = head.chars().count();
-
-        Some(Location {
-            line: 1 + line_index as u32,
-            column: 1 + column_index as u32,
-        })
-    }
-
-    /// Convert a location to a byte position.
-    pub fn pos(&self, location: Location) -> Option<Pos> {
-        // Determine the boundaries of the line.
-        let line_idx = location.line.checked_sub(1)? as usize;
-        let line_start = *self.line_starts.get(line_idx)?;
-        let line_end = self
-            .line_starts
-            .get(location.line as usize)
-            .map_or(self.src.len(), |pos| pos.to_usize());
-
-        let line = self.src.get(line_start.to_usize() .. line_end)?;
-
-        // Find the index in the line. For the first column, the index is always
-        // zero. For other columns, we have to look at which byte the char
-        // directly before the column in question ends. We can't do
-        // `nth(column_idx)` directly since the column may be behind the last
-        // char.
-        let column_idx = location.column.checked_sub(1)? as usize;
-        let line_offset = if let Some(prev_idx) = column_idx.checked_sub(1) {
-            // TODO: What about tabs?
-            let (idx, prev) = line.char_indices().nth(prev_idx)?;
-            idx + prev.len_utf8()
-        } else {
-            0
-        };
-
-        Some(line_start + line_offset)
-    }
-}
-
-/// Count how many column the string would fill.
-pub fn count_columns(src: &str) -> usize {
-    let mut column = 0;
-    for c in src.chars().rev() {
-        if is_newline(c) {
-            break;
-        } else if c == '\t' {
-            // TODO: How many columns per tab?
-            column += 2;
-        } else {
-            column += 1;
-        }
-    }
-    column
-}
-
-/// Whether this character denotes a newline.
-#[inline]
-pub fn is_newline(character: char) -> bool {
-    matches!(
-        character,
-        // Line Feed, Vertical Tab, Form Feed, Carriage Return.
-        '\n' | '\x0B' | '\x0C' | '\r' |
-        // Next Line, Line Separator, Paragraph Separator.
-        '\u{0085}' | '\u{2028}' | '\u{2029}'
-    )
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    const TEST: &str = "äbcde\nf💛g\r\nhi\rjkl";
-
-    #[test]
-    fn test_line_map_new() {
-        let map = LineMap::new(TEST);
-        assert_eq!(map.line_starts, vec![Pos(0), Pos(7), Pos(15), Pos(18)]);
-    }
-
-    #[test]
-    fn test_line_map_location() {
-        let map = LineMap::new(TEST);
-        assert_eq!(map.location(Pos(0)), Some(Location::new(1, 1)));
-        assert_eq!(map.location(Pos(2)), Some(Location::new(1, 2)));
-        assert_eq!(map.location(Pos(6)), Some(Location::new(1, 6)));
-        assert_eq!(map.location(Pos(7)), Some(Location::new(2, 1)));
-        assert_eq!(map.location(Pos(8)), Some(Location::new(2, 2)));
-        assert_eq!(map.location(Pos(12)), Some(Location::new(2, 3)));
-        assert_eq!(map.location(Pos(21)), Some(Location::new(4, 4)));
-        assert_eq!(map.location(Pos(22)), None);
-    }
-
-    #[test]
-    fn test_line_map_pos() {
-        fn assert_round_trip(map: &LineMap, pos: Pos) {
-            assert_eq!(map.location(pos).and_then(|loc| map.pos(loc)), Some(pos));
-        }
-
-        let map = LineMap::new(TEST);
-        assert_round_trip(&map, Pos(0));
-        assert_round_trip(&map, Pos(7));
-        assert_round_trip(&map, Pos(12));
-        assert_round_trip(&map, Pos(21));
-    }
-}
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index c103c342..f033e01f 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -1,12 +1,10 @@
 //! Parsing and tokenization.
 
-mod lines;
 mod parser;
 mod resolve;
 mod scanner;
 mod tokens;
 
-pub use lines::*;
 pub use parser::*;
 pub use resolve::*;
 pub use scanner::*;
@@ -15,13 +13,13 @@ pub use tokens::*;
 use std::rc::Rc;
 
 use crate::diag::TypResult;
-use crate::loading::FileId;
+use crate::source::SourceFile;
 use crate::syntax::*;
 use crate::util::EcoString;
 
 /// Parse a string of source code.
-pub fn parse(file: FileId, src: &str) -> TypResult<SyntaxTree> {
-    let mut p = Parser::new(file, src);
+pub fn parse(source: &SourceFile) -> TypResult<SyntaxTree> {
+    let mut p = Parser::new(source);
     let tree = tree(&mut p);
     let errors = p.finish();
     if errors.is_empty() {
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 0238c8be..6b478780 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -1,15 +1,15 @@
 use std::fmt::{self, Debug, Formatter};
 use std::ops::Range;
 
-use super::{count_columns, TokenMode, Tokens};
+use super::{TokenMode, Tokens};
 use crate::diag::Error;
-use crate::loading::FileId;
+use crate::source::SourceFile;
 use crate::syntax::{Pos, Span, Token};
 
 /// A convenient token-based parser.
 pub struct Parser<'s> {
     /// The id of the parsed file.
-    file: FileId,
+    source: &'s SourceFile,
     /// Parsing errors.
     errors: Vec<Error>,
     /// An iterator over the source tokens.
@@ -60,11 +60,11 @@ pub enum Group {
 
 impl<'s> Parser<'s> {
     /// Create a new parser for the source string.
-    pub fn new(file: FileId, src: &'s str) -> Self {
-        let mut tokens = Tokens::new(src, TokenMode::Markup);
+    pub fn new(source: &'s SourceFile) -> Self {
+        let mut tokens = Tokens::new(source.src(), TokenMode::Markup);
         let next = tokens.next();
         Self {
-            file,
+            source,
             errors: vec![],
             tokens,
             groups: vec![],
@@ -82,11 +82,7 @@ impl<'s> Parser<'s> {
 
     /// Add an error with location and message.
     pub fn error(&mut self, span: impl Into<Span>, message: impl Into<String>) {
-        self.errors.push(Error {
-            file: self.file,
-            span: span.into(),
-            message: message.into(),
-        });
+        self.errors.push(Error::new(self.source.file(), span, message));
     }
 
     /// Eat the next token and add an error that it is not the expected `thing`.
@@ -324,7 +320,7 @@ impl<'s> Parser<'s> {
 
     /// Determine the column for the given index in the source.
     pub fn column(&self, index: usize) -> usize {
-        count_columns(self.tokens.scanner().get(.. index))
+        self.source.pos_to_column(index.into()).unwrap()
     }
 
     /// The span from `start` to [`self.prev_end()`](Self::prev_end).
diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs
index f97d5383..7bd160f9 100644
--- a/src/parse/resolve.rs
+++ b/src/parse/resolve.rs
@@ -109,8 +109,11 @@ fn split_lines(text: &str) -> Vec<String> {
     let mut line = String::new();
     let mut lines = Vec::new();
 
-    while let Some(c) = s.eat_merging_crlf() {
+    while let Some(c) = s.eat() {
         if is_newline(c) {
+            if c == '\r' {
+                s.eat_if('\n');
+            }
             lines.push(std::mem::take(&mut line));
         } else {
             line.push(c);
@@ -173,14 +176,10 @@ mod tests {
             text: &str,
             block: bool,
         ) {
-            Span::without_cmp(|| {
-                assert_eq!(resolve_raw(Span::ZERO, raw, backticks), RawNode {
-                    span: Span::ZERO,
-                    lang: lang.and_then(|id| Ident::new(id, 0)),
-                    text: text.into(),
-                    block,
-                });
-            });
+            let node = resolve_raw(Span::ZERO, raw, backticks);
+            assert_eq!(node.lang.as_deref(), lang);
+            assert_eq!(node.text, text);
+            assert_eq!(node.block, block);
         }
 
         // Just one backtick.
diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs
index 9ee7641c..bb827255 100644
--- a/src/parse/scanner.rs
+++ b/src/parse/scanner.rs
@@ -47,17 +47,6 @@ impl<'s> Scanner<'s> {
         debug_assert_eq!(next, Some(c));
     }
 
-    /// Consume the next char, coalescing `\r\n` to just `\n`.
-    #[inline]
-    pub fn eat_merging_crlf(&mut self) -> Option<char> {
-        if self.rest().starts_with("\r\n") {
-            self.index += 2;
-            Some('\n')
-        } else {
-            self.eat()
-        }
-    }
-
     /// Eat chars while the condition is true.
     #[inline]
     pub fn eat_while<F>(&mut self, mut f: F) -> &'s str
@@ -168,3 +157,15 @@ impl Debug for Scanner<'_> {
         write!(f, "Scanner({}|{})", self.eaten(), self.rest())
     }
 }
+
+/// Whether this character denotes a newline.
+#[inline]
+pub fn is_newline(character: char) -> bool {
+    matches!(
+        character,
+        // Line Feed, Vertical Tab, Form Feed, Carriage Return.
+        '\n' | '\x0B' | '\x0C' | '\r' |
+        // Next Line, Line Separator, Paragraph Separator.
+        '\u{0085}' | '\u{2028}' | '\u{2029}'
+    )
+}
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index 356a2f96..9fd13ecc 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -198,13 +198,16 @@ impl<'s> Tokens<'s> {
 
         // Count the number of newlines.
         let mut newlines = 0;
-        while let Some(c) = self.s.eat_merging_crlf() {
+        while let Some(c) = self.s.eat() {
             if !c.is_whitespace() {
                 self.s.uneat();
                 break;
             }
 
             if is_newline(c) {
+                if c == '\r' {
+                    self.s.eat_if('\n');
+                }
                 newlines += 1;
             }
         }
@@ -484,8 +487,8 @@ impl Debug for Tokens<'_> {
     }
 }
 
-fn keyword(id: &str) -> Option<Token<'static>> {
-    Some(match id {
+fn keyword(ident: &str) -> Option<Token<'static>> {
+    Some(match ident {
         "not" => Token::Not,
         "and" => Token::And,
         "or" => Token::Or,
author	Laurenz <laurmaedje@gmail.com>	2021-07-31 22:59:14 +0200
committer	Laurenz <laurmaedje@gmail.com>	2021-08-01 00:00:36 +0200
commit	3c92bad9a7cd6b880de197806443ffcce2cac9d8 (patch)
tree	1faf79c66e23bc37711af16ad690a9878e28d348 /src/parse
parent	fbd3d191137aac8188ab8c6503d257d65d873972 (diff)