diff options
| author | Laurenz <laurmaedje@gmail.com> | 2022-07-26 23:24:50 +0200 |
|---|---|---|
| committer | Laurenz <laurmaedje@gmail.com> | 2022-07-26 23:27:44 +0200 |
| commit | fc574b39454aec77cf2c33270566225917c7c823 (patch) | |
| tree | ccebc217ce9f869bb0078753a7749789d77db551 /src/eval/str.rs | |
| parent | 1e9a5eda48c65096b482b396d550d139a4c2e61d (diff) | |
New `Str` type with methods
Diffstat (limited to 'src/eval/str.rs')
| -rw-r--r-- | src/eval/str.rs | 445 |
1 files changed, 421 insertions, 24 deletions
diff --git a/src/eval/str.rs b/src/eval/str.rs index a0345312..9d2375d3 100644 --- a/src/eval/str.rs +++ b/src/eval/str.rs @@ -1,42 +1,403 @@ -use std::fmt::{self, Debug, Formatter}; +use std::borrow::{Borrow, Cow}; +use std::fmt::{self, Debug, Formatter, Write}; use std::hash::{Hash, Hasher}; -use std::ops::Deref; +use std::ops::{Add, AddAssign, Deref}; -use super::{Array, Value}; +use unicode_segmentation::UnicodeSegmentation; + +use super::{Array, Dict, RawAlign, Value}; use crate::diag::StrResult; use crate::util::EcoString; -/// Extra methods on strings. -pub trait StrExt { - /// Repeat a string a number of times. - fn repeat(&self, n: i64) -> StrResult<EcoString>; +/// Create a new [`Str`] from a format string. +#[allow(unused_macros)] +macro_rules! format_str { + ($($tts:tt)*) => {{ + $crate::eval::Str::from(format_eco!($($tts)*)) + }}; +} + +/// An immutable reference counted string. +#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct Str(EcoString); + +impl Str { + /// Create a new, empty string. + pub fn new() -> Self { + Self(EcoString::new()) + } + + /// The length of the string in bytes. + pub fn len(&self) -> i64 { + self.0.len() as i64 + } + + /// A string slice containing the entire string. + pub fn as_str(&self) -> &str { + self + } + + /// The codepoints the string consists of. + pub fn codepoints(&self) -> Array { + self.as_str().chars().map(|c| Value::Str(c.into())).collect() + } + + /// The grapheme clusters the string consists of. + pub fn graphemes(&self) -> Array { + self.as_str().graphemes(true).map(|s| Value::Str(s.into())).collect() + } + + /// Extract a contigous substring. + pub fn slice(&self, start: i64, end: Option<i64>) -> StrResult<Self> { + let len = self.len(); + let start = self + .locate(start) + .filter(|&start| start <= self.0.len()) + .ok_or_else(|| out_of_bounds(start, len))?; + + let end = end.unwrap_or(self.len()); + let end = self + .locate(end) + .filter(|&end| end <= self.0.len()) + .ok_or_else(|| out_of_bounds(end, len))? + .max(start); + + Ok(self.0[start .. end].into()) + } + + /// Resolve an index. + fn locate(&self, index: i64) -> Option<usize> { + usize::try_from(if index >= 0 { + index + } else { + self.len().checked_add(index)? + }) + .ok() + } + + /// Whether the given pattern exists in this string. + pub fn contains(&self, pattern: TextPattern) -> bool { + match pattern { + TextPattern::Str(pat) => self.0.contains(pat.as_str()), + TextPattern::Regex(re) => re.is_match(self), + } + } + + /// Whether this string begins with the given pattern. + pub fn starts_with(&self, pattern: TextPattern) -> bool { + match pattern { + TextPattern::Str(pat) => self.0.starts_with(pat.as_str()), + TextPattern::Regex(re) => re.find(self).map_or(false, |m| m.start() == 0), + } + } + + /// Whether this string ends with the given pattern. + pub fn ends_with(&self, pattern: TextPattern) -> bool { + match pattern { + TextPattern::Str(pat) => self.0.ends_with(pat.as_str()), + TextPattern::Regex(re) => { + re.find_iter(self).last().map_or(false, |m| m.end() == self.0.len()) + } + } + } + + /// The text of the pattern's first match in this string. + pub fn find(&self, pattern: TextPattern) -> Option<Self> { + match pattern { + TextPattern::Str(pat) => self.0.contains(pat.as_str()).then(|| pat), + TextPattern::Regex(re) => re.find(self).map(|m| m.as_str().into()), + } + } + + /// The position of the pattern's first match in this string. + pub fn position(&self, pattern: TextPattern) -> Option<i64> { + match pattern { + TextPattern::Str(pat) => self.0.find(pat.as_str()).map(|i| i as i64), + TextPattern::Regex(re) => re.find(self).map(|m| m.start() as i64), + } + } + + /// The start and, text and capture groups (if any) of the first match of + /// the pattern in this string. + pub fn match_(&self, pattern: TextPattern) -> Option<Dict> { + match pattern { + TextPattern::Str(pat) => { + self.0.match_indices(pat.as_str()).next().map(match_to_dict) + } + TextPattern::Regex(re) => re.captures(self).map(captures_to_dict), + } + } + + /// The start, end, text and capture groups (if any) of all matches of the + /// pattern in this string. + pub fn matches(&self, pattern: TextPattern) -> Array { + match pattern { + TextPattern::Str(pat) => self + .0 + .match_indices(pat.as_str()) + .map(match_to_dict) + .map(Value::Dict) + .collect(), + TextPattern::Regex(re) => re + .captures_iter(self) + .map(captures_to_dict) + .map(Value::Dict) + .collect(), + } + } /// Split this string at whitespace or a specific pattern. - fn split(&self, at: Option<EcoString>) -> Array; -} + pub fn split(&self, pattern: Option<TextPattern>) -> Array { + let s = self.as_str(); + match pattern { + None => s.split_whitespace().map(|v| Value::Str(v.into())).collect(), + Some(TextPattern::Str(pat)) => { + s.split(pat.as_str()).map(|v| Value::Str(v.into())).collect() + } + Some(TextPattern::Regex(re)) => { + re.split(s).map(|v| Value::Str(v.into())).collect() + } + } + } -impl StrExt for EcoString { - fn repeat(&self, n: i64) -> StrResult<EcoString> { + /// Trim either whitespace or the given pattern at both or just one side of + /// the string. If `repeat` is true, the pattern is trimmed repeatedly + /// instead of just once. Repeat must only be given in combination with a + /// pattern. + pub fn trim( + &self, + pattern: Option<TextPattern>, + at: Option<TextSide>, + repeat: bool, + ) -> Self { + let mut start = matches!(at, Some(TextSide::Start) | None); + let end = matches!(at, Some(TextSide::End) | None); + + let trimmed = match pattern { + None => match at { + None => self.0.trim(), + Some(TextSide::Start) => self.0.trim_start(), + Some(TextSide::End) => self.0.trim_end(), + }, + Some(TextPattern::Str(pat)) => { + let pat = pat.as_str(); + let mut s = self.as_str(); + if repeat { + if start { + s = s.trim_start_matches(pat); + } + if end { + s = s.trim_end_matches(pat); + } + } else { + if start { + s = s.strip_prefix(pat).unwrap_or(s); + } + if end { + s = s.strip_suffix(pat).unwrap_or(s); + } + } + s + } + Some(TextPattern::Regex(re)) => { + let s = self.as_str(); + let mut last = 0; + let mut range = 0 .. s.len(); + + for m in re.find_iter(s) { + // Does this match follow directly after the last one? + let consecutive = last == m.start(); + + // As long as we're consecutive and still trimming at the + // start, trim. + start &= consecutive; + if start { + range.start = m.end(); + start &= repeat; + } + + // Reset end trim if we aren't consecutive anymore or aren't + // repeating. + if end && (!consecutive || !repeat) { + range.end = m.start(); + } + + last = m.end(); + } + + // Is the last match directly at the end? + if last < s.len() { + range.end = s.len(); + } + + &s[range.start .. range.start.max(range.end)] + } + }; + + trimmed.into() + } + + /// Replace at most `count` occurances of the given pattern with a + /// replacement string (beginning from the start). + pub fn replace( + &self, + pattern: TextPattern, + with: Self, + count: Option<usize>, + ) -> Self { + match pattern { + TextPattern::Str(pat) => match count { + Some(n) => self.0.replacen(pat.as_str(), &with, n).into(), + None => self.0.replace(pat.as_str(), &with).into(), + }, + TextPattern::Regex(re) => match count { + Some(n) => re.replacen(self, n, with.as_str()).into(), + None => re.replace(self, with.as_str()).into(), + }, + } + } + + /// Repeat the string a number of times. + pub fn repeat(&self, n: i64) -> StrResult<Self> { let n = usize::try_from(n) .ok() - .and_then(|n| self.len().checked_mul(n).map(|_| n)) + .and_then(|n| self.0.len().checked_mul(n).map(|_| n)) .ok_or_else(|| format!("cannot repeat this string {} times", n))?; - Ok(self.repeat(n)) + Ok(Self(self.0.repeat(n))) } +} - fn split(&self, at: Option<EcoString>) -> Array { - if let Some(pat) = at { - self.as_str() - .split(pat.as_str()) - .map(|s| Value::Str(s.into())) - .collect() - } else { - self.as_str() - .split_whitespace() - .map(|s| Value::Str(s.into())) - .collect() +/// The out of bounds access error message. +#[cold] +fn out_of_bounds(index: i64, len: i64) -> String { + format!( + "string index out of bounds (index: {}, len: {})", + index, len + ) +} + +/// Convert an item of std's `match_indices` to a dictionary. +fn match_to_dict((start, text): (usize, &str)) -> Dict { + dict! { + "start" => Value::Int(start as i64), + "end" => Value::Int((start + text.len()) as i64), + "text" => Value::Str(text.into()), + "captures" => Value::Array(Array::new()), + } +} + +/// Convert regex captures to a dictionary. +fn captures_to_dict(cap: regex::Captures) -> Dict { + let m = cap.get(0).expect("missing first match"); + dict! { + "start" => Value::Int(m.start() as i64), + "end" => Value::Int(m.end() as i64), + "text" => Value::Str(m.as_str().into()), + "captures" => Value::Array( + cap.iter() + .skip(1) + .map(|opt| opt.map_or(Value::None, |m| m.as_str().into())) + .collect(), + ), + } +} + +impl Deref for Str { + type Target = str; + + fn deref(&self) -> &str { + &self.0 + } +} + +impl Debug for Str { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + f.write_char('"')?; + for c in self.chars() { + match c { + '\\' => f.write_str(r"\\")?, + '"' => f.write_str(r#"\""#)?, + '\n' => f.write_str(r"\n")?, + '\r' => f.write_str(r"\r")?, + '\t' => f.write_str(r"\t")?, + _ => f.write_char(c)?, + } } + f.write_char('"') + } +} + +impl Add for Str { + type Output = Self; + + fn add(mut self, rhs: Self) -> Self::Output { + self += rhs; + self + } +} + +impl AddAssign for Str { + fn add_assign(&mut self, rhs: Self) { + self.0.push_str(rhs.as_str()); + } +} + +impl AsRef<str> for Str { + fn as_ref(&self) -> &str { + self + } +} + +impl Borrow<str> for Str { + fn borrow(&self) -> &str { + self + } +} + +impl From<char> for Str { + fn from(c: char) -> Self { + Self(c.into()) + } +} + +impl From<&str> for Str { + fn from(s: &str) -> Self { + Self(s.into()) + } +} + +impl From<EcoString> for Str { + fn from(s: EcoString) -> Self { + Self(s) + } +} + +impl From<String> for Str { + fn from(s: String) -> Self { + Self(s.into()) + } +} +impl From<Cow<'_, str>> for Str { + fn from(s: Cow<str>) -> Self { + Self(s.into()) + } +} +impl FromIterator<char> for Str { + fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self { + Self(iter.into_iter().collect()) + } +} + +impl From<Str> for EcoString { + fn from(str: Str) -> Self { + str.0 + } +} + +impl From<Str> for String { + fn from(s: Str) -> Self { + s.0.into() } } @@ -76,3 +437,39 @@ impl Hash for Regex { self.0.as_str().hash(state); } } + +/// A pattern which can be searched for in a string. +#[derive(Debug, Clone)] +pub enum TextPattern { + /// Just a string. + Str(Str), + /// A regular expression. + Regex(Regex), +} + +castable! { + TextPattern, + Expected: "string or regular expression", + Value::Str(text) => Self::Str(text), + @regex: Regex => Self::Regex(regex.clone()), +} + +/// A side of a string. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)] +pub enum TextSide { + /// The logical start of the string, may be left or right depending on the + /// language. + Start, + /// The logical end of the string. + End, +} + +castable! { + TextSide, + Expected: "start or end", + @align: RawAlign => match align { + RawAlign::Start => Self::Start, + RawAlign::End => Self::End, + _ => Err("expected either `start` or `end`")?, + }, +} |
