summaryrefslogtreecommitdiff
path: root/src/syntax
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2020-09-30 17:25:09 +0200
committerLaurenz <laurmaedje@gmail.com>2020-09-30 17:25:09 +0200
commit7cc279f7ae122f4c40592004dde89792c636b3c8 (patch)
treea71d3567950c147d41bfa649ca6cd76edb47cc4f /src/syntax
parent3c3730425f0a9a4241c4f57cb7f4d00b71db201e (diff)
Replace line/column with byte positions 🔢
Diffstat (limited to 'src/syntax')
-rw-r--r--src/syntax/lines.rs114
-rw-r--r--src/syntax/mod.rs2
-rw-r--r--src/syntax/span.rs84
3 files changed, 140 insertions, 60 deletions
diff --git a/src/syntax/lines.rs b/src/syntax/lines.rs
new file mode 100644
index 00000000..86fc461b
--- /dev/null
+++ b/src/syntax/lines.rs
@@ -0,0 +1,114 @@
+//! Conversion of byte positions to line/column locations.
+
+use std::fmt::{self, Debug, Display, Formatter};
+
+use super::Pos;
+use crate::parse::is_newline_char;
+
+/// Enables conversion of byte position to locations.
+pub struct LineMap<'s> {
+ src: &'s str,
+ line_starts: Vec<Pos>,
+}
+
+impl<'s> LineMap<'s> {
+ /// Create a new line map for a source string.
+ pub fn new(src: &'s str) -> Self {
+ let mut line_starts = vec![Pos::ZERO];
+ let mut iter = src.char_indices().peekable();
+
+ while let Some((mut i, c)) = iter.next() {
+ if is_newline_char(c) {
+ i += c.len_utf8();
+ if c == '\r' && matches!(iter.peek(), Some((_, '\n'))) {
+ i += '\n'.len_utf8();
+ iter.next();
+ }
+
+ line_starts.push(Pos(i as u32));
+ }
+ }
+
+ Self { src, line_starts }
+ }
+
+ /// Convert a byte position to a location.
+ ///
+ /// # Panics
+ /// This panics if the position is out of bounds.
+ pub fn location(&self, pos: Pos) -> Location {
+ let line_index = match self.line_starts.binary_search(&pos) {
+ Ok(i) => i,
+ Err(i) => i - 1,
+ };
+
+ let line_start = self.line_starts[line_index];
+ let head = &self.src[line_start.to_usize() .. pos.to_usize()];
+ let column_index = head.chars().count();
+
+ Location {
+ line: 1 + line_index as u32,
+ column: 1 + column_index as u32,
+ }
+ }
+}
+
+/// One-indexed line-column position in source code.
+#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
+#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
+pub struct Location {
+ /// The one-indexed line.
+ pub line: u32,
+ /// The one-indexed column.
+ pub column: u32,
+}
+
+impl Location {
+ /// Create a new location from line and column.
+ pub fn new(line: u32, column: u32) -> Self {
+ Self { line, column }
+ }
+}
+
+impl Debug for Location {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ Display::fmt(self, f)
+ }
+}
+
+impl Display for Location {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ write!(f, "{}:{}", self.line, self.column)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ const TEST: &str = "äbcde\nf💛g\r\nhi\rjkl";
+
+ #[test]
+ fn test_line_map_new() {
+ let map = LineMap::new(TEST);
+ assert_eq!(map.line_starts, vec![Pos(0), Pos(7), Pos(15), Pos(18)]);
+ }
+
+ #[test]
+ fn test_line_map_location() {
+ let map = LineMap::new(TEST);
+ assert_eq!(map.location(Pos(0)), Location::new(1, 1));
+ assert_eq!(map.location(Pos(2)), Location::new(1, 2));
+ assert_eq!(map.location(Pos(6)), Location::new(1, 6));
+ assert_eq!(map.location(Pos(7)), Location::new(2, 1));
+ assert_eq!(map.location(Pos(8)), Location::new(2, 2));
+ assert_eq!(map.location(Pos(12)), Location::new(2, 3));
+ assert_eq!(map.location(Pos(21)), Location::new(4, 4));
+ }
+
+ #[test]
+ #[should_panic]
+ fn test_line_map_panics_out_of_bounds() {
+ LineMap::new(TEST).location(Pos(22));
+ }
+}
diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs
index fe887c2f..f442ba9e 100644
--- a/src/syntax/mod.rs
+++ b/src/syntax/mod.rs
@@ -1,9 +1,11 @@
//! Syntax types.
+mod lines;
mod span;
mod token;
mod tree;
+pub use lines::*;
pub use span::*;
pub use token::*;
pub use tree::*;
diff --git a/src/syntax/span.rs b/src/syntax/span.rs
index cda35ec0..1bd14c65 100644
--- a/src/syntax/span.rs
+++ b/src/syntax/span.rs
@@ -1,7 +1,6 @@
//! Mapping of values to the locations they originate from in source code.
use std::fmt::{self, Debug, Formatter};
-use std::ops::{Add, Sub};
#[cfg(test)]
use std::cell::Cell;
@@ -11,12 +10,6 @@ thread_local! {
static CMP_SPANS: Cell<bool> = Cell::new(true);
}
-/// Span offsetting.
-pub trait Offset {
- /// Offset all spans contained in `Self` by the given position.
- fn offset(self, by: Pos) -> Self;
-}
-
/// Annotate a value with a span.
pub trait SpanWith: Sized {
/// Wraps `self` in a `Spanned` with the given span.
@@ -27,6 +20,12 @@ pub trait SpanWith: Sized {
impl<T> SpanWith for T {}
+/// Span offsetting.
+pub trait Offset {
+ /// Offset all spans contained in `Self` by the given position.
+ fn offset(self, by: Pos) -> Self;
+}
+
/// A vector of spanned values of type `T`.
pub type SpanVec<T> = Vec<Spanned<T>>;
@@ -112,13 +111,13 @@ impl Span {
pub const ZERO: Self = Self { start: Pos::ZERO, end: Pos::ZERO };
/// Create a new span from start and end positions.
- pub fn new(start: Pos, end: Pos) -> Self {
- Self { start, end }
+ pub fn new(start: impl Into<Pos>, end: impl Into<Pos>) -> Self {
+ Self { start: start.into(), end: end.into() }
}
/// Create a span including just a single position.
- pub fn at(pos: Pos) -> Self {
- Self { start: pos, end: pos }
+ pub fn at(pos: impl Into<Pos> + Copy) -> Self {
+ Self::new(pos, pos)
}
/// Create a new span with the earlier start and later end position.
@@ -169,70 +168,35 @@ impl Debug for Span {
}
}
-/// Zero-indexed line-column position in source code.
+/// A byte position.
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
-pub struct Pos {
- /// The zero-indexed line.
- pub line: usize,
- /// The zero-indexed column.
- pub column: usize,
-}
+pub struct Pos(pub u32);
impl Pos {
- /// The line 0, column 0 position.
- pub const ZERO: Self = Self { line: 0, column: 0 };
-
- /// Create a new position from line and column.
- pub fn new(line: usize, column: usize) -> Self {
- Self { line, column }
- }
-}
+ /// The zero position.
+ pub const ZERO: Self = Self(0);
-impl Offset for Pos {
- fn offset(self, by: Self) -> Self {
- by + self
+ /// Convert to a usize for indexing.
+ pub fn to_usize(self) -> usize {
+ self.0 as usize
}
}
-impl Add for Pos {
- type Output = Self;
-
- fn add(self, rhs: Self) -> Self {
- if rhs.line == 0 {
- Self {
- line: self.line,
- column: self.column + rhs.column,
- }
- } else {
- Self {
- line: self.line + rhs.line,
- column: rhs.column,
- }
- }
+impl From<u32> for Pos {
+ fn from(index: u32) -> Self {
+ Self(index)
}
}
-impl Sub for Pos {
- type Output = Self;
-
- fn sub(self, rhs: Self) -> Self {
- if self.line == rhs.line {
- Self {
- line: 0,
- column: self.column - rhs.column,
- }
- } else {
- Self {
- line: self.line - rhs.line,
- column: self.column,
- }
- }
+impl Offset for Pos {
+ fn offset(self, by: Self) -> Self {
+ Pos(self.0 + by.0)
}
}
impl Debug for Pos {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
- write!(f, "{}:{}", self.line, self.column)
+ self.0.fmt(f)
}
}