diff options
| author | Laurenz <laurmaedje@gmail.com> | 2020-09-30 17:25:09 +0200 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2020-09-30 17:25:09 +0200 |
| commit | 7cc279f7ae122f4c40592004dde89792c636b3c8 (patch) | |
| tree | a71d3567950c147d41bfa649ca6cd76edb47cc4f /src/syntax | |
| parent | 3c3730425f0a9a4241c4f57cb7f4d00b71db201e (diff) | |
Replace line/column with byte positions 🔢
Diffstat (limited to 'src/syntax')
| -rw-r--r-- | src/syntax/lines.rs | 114 | ||||
| -rw-r--r-- | src/syntax/mod.rs | 2 | ||||
| -rw-r--r-- | src/syntax/span.rs | 84 |
3 files changed, 140 insertions, 60 deletions
diff --git a/src/syntax/lines.rs b/src/syntax/lines.rs new file mode 100644 index 00000000..86fc461b --- /dev/null +++ b/src/syntax/lines.rs @@ -0,0 +1,114 @@ +//! Conversion of byte positions to line/column locations. + +use std::fmt::{self, Debug, Display, Formatter}; + +use super::Pos; +use crate::parse::is_newline_char; + +/// Enables conversion of byte position to locations. +pub struct LineMap<'s> { + src: &'s str, + line_starts: Vec<Pos>, +} + +impl<'s> LineMap<'s> { + /// Create a new line map for a source string. + pub fn new(src: &'s str) -> Self { + let mut line_starts = vec![Pos::ZERO]; + let mut iter = src.char_indices().peekable(); + + while let Some((mut i, c)) = iter.next() { + if is_newline_char(c) { + i += c.len_utf8(); + if c == '\r' && matches!(iter.peek(), Some((_, '\n'))) { + i += '\n'.len_utf8(); + iter.next(); + } + + line_starts.push(Pos(i as u32)); + } + } + + Self { src, line_starts } + } + + /// Convert a byte position to a location. + /// + /// # Panics + /// This panics if the position is out of bounds. + pub fn location(&self, pos: Pos) -> Location { + let line_index = match self.line_starts.binary_search(&pos) { + Ok(i) => i, + Err(i) => i - 1, + }; + + let line_start = self.line_starts[line_index]; + let head = &self.src[line_start.to_usize() .. pos.to_usize()]; + let column_index = head.chars().count(); + + Location { + line: 1 + line_index as u32, + column: 1 + column_index as u32, + } + } +} + +/// One-indexed line-column position in source code. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +#[cfg_attr(feature = "serialize", derive(serde::Serialize))] +pub struct Location { + /// The one-indexed line. + pub line: u32, + /// The one-indexed column. + pub column: u32, +} + +impl Location { + /// Create a new location from line and column. + pub fn new(line: u32, column: u32) -> Self { + Self { line, column } + } +} + +impl Debug for Location { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + Display::fmt(self, f) + } +} + +impl Display for Location { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{}:{}", self.line, self.column) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const TEST: &str = "äbcde\nf💛g\r\nhi\rjkl"; + + #[test] + fn test_line_map_new() { + let map = LineMap::new(TEST); + assert_eq!(map.line_starts, vec![Pos(0), Pos(7), Pos(15), Pos(18)]); + } + + #[test] + fn test_line_map_location() { + let map = LineMap::new(TEST); + assert_eq!(map.location(Pos(0)), Location::new(1, 1)); + assert_eq!(map.location(Pos(2)), Location::new(1, 2)); + assert_eq!(map.location(Pos(6)), Location::new(1, 6)); + assert_eq!(map.location(Pos(7)), Location::new(2, 1)); + assert_eq!(map.location(Pos(8)), Location::new(2, 2)); + assert_eq!(map.location(Pos(12)), Location::new(2, 3)); + assert_eq!(map.location(Pos(21)), Location::new(4, 4)); + } + + #[test] + #[should_panic] + fn test_line_map_panics_out_of_bounds() { + LineMap::new(TEST).location(Pos(22)); + } +} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index fe887c2f..f442ba9e 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -1,9 +1,11 @@ //! Syntax types. +mod lines; mod span; mod token; mod tree; +pub use lines::*; pub use span::*; pub use token::*; pub use tree::*; diff --git a/src/syntax/span.rs b/src/syntax/span.rs index cda35ec0..1bd14c65 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -1,7 +1,6 @@ //! Mapping of values to the locations they originate from in source code. use std::fmt::{self, Debug, Formatter}; -use std::ops::{Add, Sub}; #[cfg(test)] use std::cell::Cell; @@ -11,12 +10,6 @@ thread_local! { static CMP_SPANS: Cell<bool> = Cell::new(true); } -/// Span offsetting. -pub trait Offset { - /// Offset all spans contained in `Self` by the given position. - fn offset(self, by: Pos) -> Self; -} - /// Annotate a value with a span. pub trait SpanWith: Sized { /// Wraps `self` in a `Spanned` with the given span. @@ -27,6 +20,12 @@ pub trait SpanWith: Sized { impl<T> SpanWith for T {} +/// Span offsetting. +pub trait Offset { + /// Offset all spans contained in `Self` by the given position. + fn offset(self, by: Pos) -> Self; +} + /// A vector of spanned values of type `T`. pub type SpanVec<T> = Vec<Spanned<T>>; @@ -112,13 +111,13 @@ impl Span { pub const ZERO: Self = Self { start: Pos::ZERO, end: Pos::ZERO }; /// Create a new span from start and end positions. - pub fn new(start: Pos, end: Pos) -> Self { - Self { start, end } + pub fn new(start: impl Into<Pos>, end: impl Into<Pos>) -> Self { + Self { start: start.into(), end: end.into() } } /// Create a span including just a single position. - pub fn at(pos: Pos) -> Self { - Self { start: pos, end: pos } + pub fn at(pos: impl Into<Pos> + Copy) -> Self { + Self::new(pos, pos) } /// Create a new span with the earlier start and later end position. @@ -169,70 +168,35 @@ impl Debug for Span { } } -/// Zero-indexed line-column position in source code. +/// A byte position. #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serialize", derive(serde::Serialize))] -pub struct Pos { - /// The zero-indexed line. - pub line: usize, - /// The zero-indexed column. - pub column: usize, -} +pub struct Pos(pub u32); impl Pos { - /// The line 0, column 0 position. - pub const ZERO: Self = Self { line: 0, column: 0 }; - - /// Create a new position from line and column. - pub fn new(line: usize, column: usize) -> Self { - Self { line, column } - } -} + /// The zero position. + pub const ZERO: Self = Self(0); -impl Offset for Pos { - fn offset(self, by: Self) -> Self { - by + self + /// Convert to a usize for indexing. + pub fn to_usize(self) -> usize { + self.0 as usize } } -impl Add for Pos { - type Output = Self; - - fn add(self, rhs: Self) -> Self { - if rhs.line == 0 { - Self { - line: self.line, - column: self.column + rhs.column, - } - } else { - Self { - line: self.line + rhs.line, - column: rhs.column, - } - } +impl From<u32> for Pos { + fn from(index: u32) -> Self { + Self(index) } } -impl Sub for Pos { - type Output = Self; - - fn sub(self, rhs: Self) -> Self { - if self.line == rhs.line { - Self { - line: 0, - column: self.column - rhs.column, - } - } else { - Self { - line: self.line - rhs.line, - column: self.column, - } - } +impl Offset for Pos { + fn offset(self, by: Self) -> Self { + Pos(self.0 + by.0) } } impl Debug for Pos { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{}:{}", self.line, self.column) + self.0.fmt(f) } } |
