diff options
Diffstat (limited to 'src/model/str.rs')
| -rw-r--r-- | src/model/str.rs | 514 |
1 files changed, 0 insertions, 514 deletions
diff --git a/src/model/str.rs b/src/model/str.rs deleted file mode 100644 index 5fcc1d05..00000000 --- a/src/model/str.rs +++ /dev/null @@ -1,514 +0,0 @@ -use std::borrow::{Borrow, Cow}; -use std::fmt::{self, Debug, Display, Formatter, Write}; -use std::hash::{Hash, Hasher}; -use std::ops::{Add, AddAssign, Deref}; - -use ecow::EcoString; -use unicode_segmentation::UnicodeSegmentation; - -use super::{castable, dict, Array, Dict, Value}; -use crate::diag::StrResult; -use crate::geom::GenAlign; - -/// Create a new [`Str`] from a format string. -#[macro_export] -#[doc(hidden)] -macro_rules! __format_str { - ($($tts:tt)*) => {{ - $crate::model::Str::from($crate::model::eco_format!($($tts)*)) - }}; -} - -#[doc(inline)] -pub use crate::__format_str as format_str; -#[doc(hidden)] -pub use ecow::eco_format; - -/// An immutable reference counted string. -#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub struct Str(EcoString); - -impl Str { - /// Create a new, empty string. - pub fn new() -> Self { - Self(EcoString::new()) - } - - /// The length of the string in bytes. - pub fn len(&self) -> i64 { - self.0.len() as i64 - } - - /// A string slice containing the entire string. - pub fn as_str(&self) -> &str { - self - } - - /// Extract the first grapheme cluster. - pub fn first(&self) -> StrResult<Self> { - self.0 - .graphemes(true) - .next() - .map(Into::into) - .ok_or_else(string_is_empty) - } - - /// Extract the last grapheme cluster. - pub fn last(&self) -> StrResult<Self> { - self.0 - .graphemes(true) - .next_back() - .map(Into::into) - .ok_or_else(string_is_empty) - } - - /// Extract the grapheme cluster at the given index. - pub fn at(&self, index: i64) -> StrResult<Self> { - let len = self.len(); - let grapheme = self.0[self.locate(index)?..] - .graphemes(true) - .next() - .ok_or_else(|| out_of_bounds(index, len))?; - Ok(grapheme.into()) - } - - /// Extract a contigous substring. - pub fn slice(&self, start: i64, end: Option<i64>) -> StrResult<Self> { - let start = self.locate(start)?; - let end = self.locate(end.unwrap_or(self.len()))?.max(start); - Ok(self.0[start..end].into()) - } - - /// The grapheme clusters the string consists of. - pub fn clusters(&self) -> Array { - self.as_str().graphemes(true).map(|s| Value::Str(s.into())).collect() - } - - /// The codepoints the string consists of. - pub fn codepoints(&self) -> Array { - self.chars().map(|c| Value::Str(c.into())).collect() - } - - /// Whether the given pattern exists in this string. - pub fn contains(&self, pattern: StrPattern) -> bool { - match pattern { - StrPattern::Str(pat) => self.0.contains(pat.as_str()), - StrPattern::Regex(re) => re.is_match(self), - } - } - - /// Whether this string begins with the given pattern. - pub fn starts_with(&self, pattern: StrPattern) -> bool { - match pattern { - StrPattern::Str(pat) => self.0.starts_with(pat.as_str()), - StrPattern::Regex(re) => re.find(self).map_or(false, |m| m.start() == 0), - } - } - - /// Whether this string ends with the given pattern. - pub fn ends_with(&self, pattern: StrPattern) -> bool { - match pattern { - StrPattern::Str(pat) => self.0.ends_with(pat.as_str()), - StrPattern::Regex(re) => { - re.find_iter(self).last().map_or(false, |m| m.end() == self.0.len()) - } - } - } - - /// The text of the pattern's first match in this string. - pub fn find(&self, pattern: StrPattern) -> Option<Self> { - match pattern { - StrPattern::Str(pat) => self.0.contains(pat.as_str()).then(|| pat), - StrPattern::Regex(re) => re.find(self).map(|m| m.as_str().into()), - } - } - - /// The position of the pattern's first match in this string. - pub fn position(&self, pattern: StrPattern) -> Option<i64> { - match pattern { - StrPattern::Str(pat) => self.0.find(pat.as_str()).map(|i| i as i64), - StrPattern::Regex(re) => re.find(self).map(|m| m.start() as i64), - } - } - - /// The start and, text and capture groups (if any) of the first match of - /// the pattern in this string. - pub fn match_(&self, pattern: StrPattern) -> Option<Dict> { - match pattern { - StrPattern::Str(pat) => { - self.0.match_indices(pat.as_str()).next().map(match_to_dict) - } - StrPattern::Regex(re) => re.captures(self).map(captures_to_dict), - } - } - - /// The start, end, text and capture groups (if any) of all matches of the - /// pattern in this string. - pub fn matches(&self, pattern: StrPattern) -> Array { - match pattern { - StrPattern::Str(pat) => self - .0 - .match_indices(pat.as_str()) - .map(match_to_dict) - .map(Value::Dict) - .collect(), - StrPattern::Regex(re) => re - .captures_iter(self) - .map(captures_to_dict) - .map(Value::Dict) - .collect(), - } - } - - /// Split this string at whitespace or a specific pattern. - pub fn split(&self, pattern: Option<StrPattern>) -> Array { - let s = self.as_str(); - match pattern { - None => s.split_whitespace().map(|v| Value::Str(v.into())).collect(), - Some(StrPattern::Str(pat)) => { - s.split(pat.as_str()).map(|v| Value::Str(v.into())).collect() - } - Some(StrPattern::Regex(re)) => { - re.split(s).map(|v| Value::Str(v.into())).collect() - } - } - } - - /// Trim either whitespace or the given pattern at both or just one side of - /// the string. If `repeat` is true, the pattern is trimmed repeatedly - /// instead of just once. Repeat must only be given in combination with a - /// pattern. - pub fn trim( - &self, - pattern: Option<StrPattern>, - at: Option<StrSide>, - repeat: bool, - ) -> Self { - let mut start = matches!(at, Some(StrSide::Start) | None); - let end = matches!(at, Some(StrSide::End) | None); - - let trimmed = match pattern { - None => match at { - None => self.0.trim(), - Some(StrSide::Start) => self.0.trim_start(), - Some(StrSide::End) => self.0.trim_end(), - }, - Some(StrPattern::Str(pat)) => { - let pat = pat.as_str(); - let mut s = self.as_str(); - if repeat { - if start { - s = s.trim_start_matches(pat); - } - if end { - s = s.trim_end_matches(pat); - } - } else { - if start { - s = s.strip_prefix(pat).unwrap_or(s); - } - if end { - s = s.strip_suffix(pat).unwrap_or(s); - } - } - s - } - Some(StrPattern::Regex(re)) => { - let s = self.as_str(); - let mut last = 0; - let mut range = 0..s.len(); - - for m in re.find_iter(s) { - // Does this match follow directly after the last one? - let consecutive = last == m.start(); - - // As long as we're consecutive and still trimming at the - // start, trim. - start &= consecutive; - if start { - range.start = m.end(); - start &= repeat; - } - - // Reset end trim if we aren't consecutive anymore or aren't - // repeating. - if end && (!consecutive || !repeat) { - range.end = m.start(); - } - - last = m.end(); - } - - // Is the last match directly at the end? - if last < s.len() { - range.end = s.len(); - } - - &s[range.start..range.start.max(range.end)] - } - }; - - trimmed.into() - } - - /// Replace at most `count` occurances of the given pattern with a - /// replacement string (beginning from the start). - pub fn replace(&self, pattern: StrPattern, with: Self, count: Option<usize>) -> Self { - match pattern { - StrPattern::Str(pat) => match count { - Some(n) => self.0.replacen(pat.as_str(), &with, n).into(), - None => self.0.replace(pat.as_str(), &with).into(), - }, - StrPattern::Regex(re) => match count { - Some(n) => re.replacen(self, n, with.as_str()).into(), - None => re.replace(self, with.as_str()).into(), - }, - } - } - - /// Repeat the string a number of times. - pub fn repeat(&self, n: i64) -> StrResult<Self> { - let n = usize::try_from(n) - .ok() - .and_then(|n| self.0.len().checked_mul(n).map(|_| n)) - .ok_or_else(|| format!("cannot repeat this string {} times", n))?; - - Ok(Self(self.0.repeat(n))) - } - - /// Resolve an index. - fn locate(&self, index: i64) -> StrResult<usize> { - let wrapped = - if index >= 0 { Some(index) } else { self.len().checked_add(index) }; - - let resolved = wrapped - .and_then(|v| usize::try_from(v).ok()) - .filter(|&v| v <= self.0.len()) - .ok_or_else(|| out_of_bounds(index, self.len()))?; - - if !self.0.is_char_boundary(resolved) { - return Err(not_a_char_boundary(index)); - } - - Ok(resolved) - } -} - -/// The out of bounds access error message. -#[cold] -fn out_of_bounds(index: i64, len: i64) -> EcoString { - eco_format!("string index out of bounds (index: {}, len: {})", index, len) -} - -/// The char boundary access error message. -#[cold] -fn not_a_char_boundary(index: i64) -> EcoString { - eco_format!("string index {} is not a character boundary", index) -} - -/// The error message when the string is empty. -#[cold] -fn string_is_empty() -> EcoString { - "string is empty".into() -} - -/// Convert an item of std's `match_indices` to a dictionary. -fn match_to_dict((start, text): (usize, &str)) -> Dict { - dict! { - "start" => Value::Int(start as i64), - "end" => Value::Int((start + text.len()) as i64), - "text" => Value::Str(text.into()), - "captures" => Value::Array(Array::new()), - } -} - -/// Convert regex captures to a dictionary. -fn captures_to_dict(cap: regex::Captures) -> Dict { - let m = cap.get(0).expect("missing first match"); - dict! { - "start" => Value::Int(m.start() as i64), - "end" => Value::Int(m.end() as i64), - "text" => Value::Str(m.as_str().into()), - "captures" => Value::Array( - cap.iter() - .skip(1) - .map(|opt| opt.map_or(Value::None, |m| m.as_str().into())) - .collect(), - ), - } -} - -impl Deref for Str { - type Target = str; - - fn deref(&self) -> &str { - &self.0 - } -} - -impl Display for Str { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - f.pad(self) - } -} - -impl Debug for Str { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - f.write_char('"')?; - for c in self.chars() { - match c { - '\0' => f.write_str("\\u{0}")?, - '\'' => f.write_str("'")?, - '"' => f.write_str(r#"\""#)?, - _ => Display::fmt(&c.escape_debug(), f)?, - } - } - f.write_char('"') - } -} - -impl Add for Str { - type Output = Self; - - fn add(mut self, rhs: Self) -> Self::Output { - self += rhs; - self - } -} - -impl AddAssign for Str { - fn add_assign(&mut self, rhs: Self) { - self.0.push_str(rhs.as_str()); - } -} - -impl AsRef<str> for Str { - fn as_ref(&self) -> &str { - self - } -} - -impl Borrow<str> for Str { - fn borrow(&self) -> &str { - self - } -} - -impl From<char> for Str { - fn from(c: char) -> Self { - Self(c.into()) - } -} - -impl From<&str> for Str { - fn from(s: &str) -> Self { - Self(s.into()) - } -} - -impl From<EcoString> for Str { - fn from(s: EcoString) -> Self { - Self(s) - } -} - -impl From<String> for Str { - fn from(s: String) -> Self { - Self(s.into()) - } -} - -impl From<Cow<'_, str>> for Str { - fn from(s: Cow<str>) -> Self { - Self(s.into()) - } -} - -impl FromIterator<char> for Str { - fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self { - Self(iter.into_iter().collect()) - } -} - -impl From<Str> for EcoString { - fn from(str: Str) -> Self { - str.0 - } -} - -impl From<Str> for String { - fn from(s: Str) -> Self { - s.0.into() - } -} - -/// A regular expression. -#[derive(Clone)] -pub struct Regex(regex::Regex); - -impl Regex { - /// Create a new regular expression. - pub fn new(re: &str) -> StrResult<Self> { - regex::Regex::new(re).map(Self).map_err(|err| eco_format!("{err}")) - } -} - -impl Deref for Regex { - type Target = regex::Regex; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl Debug for Regex { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "regex({:?})", self.0.as_str()) - } -} - -impl PartialEq for Regex { - fn eq(&self, other: &Self) -> bool { - self.0.as_str() == other.0.as_str() - } -} - -impl Hash for Regex { - fn hash<H: Hasher>(&self, state: &mut H) { - self.0.as_str().hash(state); - } -} - -/// A pattern which can be searched for in a string. -#[derive(Debug, Clone)] -pub enum StrPattern { - /// Just a string. - Str(Str), - /// A regular expression. - Regex(Regex), -} - -castable! { - StrPattern, - text: Str => Self::Str(text), - regex: Regex => Self::Regex(regex), -} - -/// A side of a string. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)] -pub enum StrSide { - /// The logical start of the string, may be left or right depending on the - /// language. - Start, - /// The logical end of the string. - End, -} - -castable! { - StrSide, - align: GenAlign => match align { - GenAlign::Start => Self::Start, - GenAlign::End => Self::End, - _ => Err("expected either `start` or `end`")?, - }, -} |
