diff options
| author | Laurenz <laurmaedje@gmail.com> | 2020-10-01 11:32:48 +0200 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2020-10-01 11:32:48 +0200 |
| commit | 885bfec5d7524845b41e180fadc9cf5626157eec (patch) | |
| tree | f798e03d101d568a110a5c56f4a9bfa2be892928 /src/parse/lines.rs | |
| parent | 16f0bd430e0864a3bbd0139803e476be413cb3cb (diff) | |
Make syntax not depend on parse 📩
This would make it possible to split them into two separate crates.
Diffstat (limited to 'src/parse/lines.rs')
| -rw-r--r-- | src/parse/lines.rs | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/src/parse/lines.rs b/src/parse/lines.rs new file mode 100644 index 00000000..ce5a1fe5 --- /dev/null +++ b/src/parse/lines.rs @@ -0,0 +1,88 @@ +//! Conversion of byte positions to line/column locations. + +use super::Scanner; +use crate::syntax::{Location, Pos}; + +/// Enables conversion of byte position to locations. +pub struct LineMap<'s> { + src: &'s str, + line_starts: Vec<Pos>, +} + +impl<'s> LineMap<'s> { + /// Create a new line map for a source string. + pub fn new(src: &'s str) -> Self { + let mut line_starts = vec![Pos::ZERO]; + let mut s = Scanner::new(src); + + while let Some(c) = s.eat_merging_crlf() { + if is_newline(c) { + line_starts.push(s.index().into()); + } + } + + Self { src, line_starts } + } + + /// Convert a byte position to a location. + /// + /// # Panics + /// This panics if the position is out of bounds. + pub fn location(&self, pos: Pos) -> Location { + let line_index = match self.line_starts.binary_search(&pos) { + Ok(i) => i, + Err(i) => i - 1, + }; + + let line_start = self.line_starts[line_index]; + let head = &self.src[line_start.to_usize() .. pos.to_usize()]; + let column_index = head.chars().count(); + + Location { + line: 1 + line_index as u32, + column: 1 + column_index as u32, + } + } +} + +/// Whether this character denotes a newline. +pub fn is_newline(character: char) -> bool { + match character { + // Line Feed, Vertical Tab, Form Feed, Carriage Return. + '\n' | '\x0B' | '\x0C' | '\r' | + // Next Line, Line Separator, Paragraph Separator. + '\u{0085}' | '\u{2028}' | '\u{2029}' => true, + _ => false, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const TEST: &str = "äbcde\nf💛g\r\nhi\rjkl"; + + #[test] + fn test_line_map_new() { + let map = LineMap::new(TEST); + assert_eq!(map.line_starts, vec![Pos(0), Pos(7), Pos(15), Pos(18)]); + } + + #[test] + fn test_line_map_location() { + let map = LineMap::new(TEST); + assert_eq!(map.location(Pos(0)), Location::new(1, 1)); + assert_eq!(map.location(Pos(2)), Location::new(1, 2)); + assert_eq!(map.location(Pos(6)), Location::new(1, 6)); + assert_eq!(map.location(Pos(7)), Location::new(2, 1)); + assert_eq!(map.location(Pos(8)), Location::new(2, 2)); + assert_eq!(map.location(Pos(12)), Location::new(2, 3)); + assert_eq!(map.location(Pos(21)), Location::new(4, 4)); + } + + #[test] + #[should_panic] + fn test_line_map_panics_out_of_bounds() { + LineMap::new(TEST).location(Pos(22)); + } +} |
