summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurenz <laurmaedje@gmail.com>2023-02-17 10:29:55 +0100
committerLaurenz <laurmaedje@gmail.com>2023-02-17 10:53:47 +0100
commitdd5f07eb9110cc5e19dcb4441743a323128426fc (patch)
tree010384b8c8ab7a129df65bc0c802a1ac039beebb
parent585f6564874d16a8f81a6c29e73091a008ccd484 (diff)
Add `clusters` and `codepoints` methods
-rw-r--r--docs/src/reference/types.md30
-rw-r--r--src/model/methods.rs4
-rw-r--r--src/model/str.rs23
-rw-r--r--tests/typ/compiler/string.typ7
4 files changed, 44 insertions, 20 deletions
diff --git a/docs/src/reference/types.md b/docs/src/reference/types.md
index 11580c8b..47f8d9e1 100644
--- a/docs/src/reference/types.md
+++ b/docs/src/reference/types.md
@@ -197,18 +197,18 @@ $arrow.t.quad$
# String
A sequence of Unicode codepoints.
-You can iterate over the characters (or rather, grapheme clusters) of the string
-using a [for loop]($scripting/#loops). Strings can be added with
-the `+` operator, [joined together]($scripting/#blocks) and
-multiplied with integers.
+You can iterate over the grapheme clusters of the string using a
+[for loop]($scripting/#loops). Grapheme clusters are basically characters but
+keep together things that belong together, e.g. multiple codepoints that
+together form a flag emoji. Strings can be added with the `+` operator,
+[joined together]($scripting/#blocks) and multiplied with integers.
Typst provides utility methods for string manipulation. Many of these methods
(e.g., `split`, `trim` and `replace`) operate on _patterns:_ A pattern can be
either a string or a [regular expression]($func/regex). This makes the methods
quite versatile.
-_Note:_ Currently all lengths and indices are expressed in terms of UTF-8 bytes.
-This _might_ change to grapheme clusters in the future.
+All lengths and indices are expressed in terms of UTF-8 bytes.
### Example
```example
@@ -236,20 +236,20 @@ The length of the string in UTF-8 encoded bytes.
- returns: integer
### first()
-Extract the first character (or rather, grapheme cluster) of the string.
+Extract the first grapheme cluster of the string.
Fails with an error if the string is empty.
- returns: any
### last()
-Extract the last character (or rather, grapheme cluster) of the string.
+Extract the last grapheme cluster of the string.
Fails with an error if the string is empty.
- returns: any
### at()
-Extract the first character (or rather, grapheme cluster) after the specified
-index. Fails with an error if the index is out of bounds.
+Extract the first grapheme cluster after the specified index. Fails with an
+error if the index is out of bounds.
- index: integer (positional, required)
The byte index.
@@ -269,6 +269,16 @@ Fails with an error if the start or end index is out of bounds.
as the `end` position. Mutually exclusive with `end`.
- returns: string
+### clusters()
+Returns the grapheme clusters of the string as array of substrings.
+
+- returns: array
+
+### codepoints()
+Returns the Unicode codepoints of the string as array of substrings.
+
+- returns: array
+
### contains()
Whether the string contains the specified pattern.
diff --git a/src/model/methods.rs b/src/model/methods.rs
index c0b63669..38ebebda 100644
--- a/src/model/methods.rs
+++ b/src/model/methods.rs
@@ -37,6 +37,8 @@ pub fn call(
}
Value::Str(string.slice(start, end).at(span)?)
}
+ "clusters" => Value::Array(string.clusters()),
+ "codepoints" => Value::Array(string.codepoints()),
"contains" => Value::Bool(string.contains(args.expect("pattern")?)),
"starts-with" => Value::Bool(string.starts_with(args.expect("pattern")?)),
"ends-with" => Value::Bool(string.ends_with(args.expect("pattern")?)),
@@ -218,6 +220,8 @@ pub fn methods_on(type_name: &str) -> &[(&'static str, bool)] {
"string" => &[
("len", false),
("at", true),
+ ("clusters", false),
+ ("codepoints", false),
("contains", true),
("ends-with", true),
("find", true),
diff --git a/src/model/str.rs b/src/model/str.rs
index ae0ef899..8da5b50c 100644
--- a/src/model/str.rs
+++ b/src/model/str.rs
@@ -42,11 +42,6 @@ impl Str {
self
}
- /// The grapheme clusters the string consists of.
- pub fn graphemes(&self) -> Array {
- self.as_str().graphemes(true).map(|s| Value::Str(s.into())).collect()
- }
-
/// Extract the first grapheme cluster.
pub fn first(&self) -> StrResult<Self> {
self.0
@@ -82,6 +77,16 @@ impl Str {
Ok(self.0[start..end].into())
}
+ /// The grapheme clusters the string consists of.
+ pub fn clusters(&self) -> Array {
+ self.as_str().graphemes(true).map(|s| Value::Str(s.into())).collect()
+ }
+
+ /// The codepoints the string consists of.
+ pub fn codepoints(&self) -> Array {
+ self.chars().map(|c| Value::Str(c.into())).collect()
+ }
+
/// Whether the given pattern exists in this string.
pub fn contains(&self, pattern: StrPattern) -> bool {
match pattern {
@@ -350,12 +355,10 @@ impl Debug for Str {
f.write_char('"')?;
for c in self.chars() {
match c {
- '\\' => f.write_str(r"\\")?,
+ '\0' => f.write_str("\\u{0}")?,
+ '\'' => f.write_str("'")?,
'"' => f.write_str(r#"\""#)?,
- '\n' => f.write_str(r"\n")?,
- '\r' => f.write_str(r"\r")?,
- '\t' => f.write_str(r"\t")?,
- _ => f.write_char(c)?,
+ _ => Display::fmt(&c.escape_debug(), f)?,
}
}
f.write_char('"')
diff --git a/tests/typ/compiler/string.typ b/tests/typ/compiler/string.typ
index 017e1cdd..7692b41f 100644
--- a/tests/typ/compiler/string.typ
+++ b/tests/typ/compiler/string.typ
@@ -46,6 +46,13 @@
#"🏳️‍🌈".slice(0, -1)
---
+// Test the `clusters` and `codepoints` methods.
+#test("abc".clusters(), ("a", "b", "c"))
+#test("abc".clusters(), ("a", "b", "c"))
+#test("🏳️‍🌈!".clusters(), ("🏳️‍🌈", "!"))
+#test("🏳️‍🌈!".codepoints(), ("🏳", "\u{fe0f}", "\u{200d}", "🌈", "!"))
+
+---
// Test the `contains` method.
#test("abc".contains("b"), true)
#test("b" in "abc", true)