summaryrefslogtreecommitdiff
path: root/script/unicodify.sed
diff options
context:
space:
mode:
authorCharlotte Koch <charlotte@magentastripe.com>2025-04-06 13:04:30 -0700
committerCharlotte Koch <charlotte@magentastripe.com>2025-04-06 17:36:31 -0700
commit31a6527fe830734191a7fc9ff78b6c0a5140688f (patch)
treef13b03a3915980a14d0249ef1a4d76f7af724e0a /script/unicodify.sed
parent4cbf1771db9a4703d8b9644dca3128f5acc9f485 (diff)
More nuanced way of handling ellipses
Diffstat (limited to 'script/unicodify.sed')
-rw-r--r--script/unicodify.sed19
1 files changed, 13 insertions, 6 deletions
diff --git a/script/unicodify.sed b/script/unicodify.sed
index 4668732..138003e 100644
--- a/script/unicodify.sed
+++ b/script/unicodify.sed
@@ -1,6 +1,12 @@
-# Remove spaces on either end of an em-dash or ellipsis.
-s,[[:space:]]--[[:space:]],\&\#8212;,g
-s,[[:space:]]\.\.\.[[:space:]],\&\#8230;,g
+# Remove Asciidoc comments now, for the sake of getting more accurate
+# wordcounts.
+s,//.*,,g
+
+# Remove spaces on either end of an em-dash.
+s,[[:space:]]*--[[:space:]]*,\&\#8212;,g
+
+# Remove spaces before an ellipsis, while ensuring one space after.
+s,[[:space:]]*\.\.\.[[:space:]]*,\&\#8230;\&nbsp;,g
# Explicitly handle curly double quotes before curly single quotes.
s,"`,\&\#8220;,g
@@ -14,6 +20,7 @@ s,\&egrave;,\&\#232;,g
s,\&eacute;,\&\#233;,g
s,\&iuml;,\&\#239;,g
-# Remove Asciidoc comments now, for the sake of getting more accurate
-# wordcounts.
-s,//.*,,g
+# Remove spaces before a close-quote, which might have accidentally been
+# introduced while converting ellipses earlier.
+s,[[:space:]]*\&\#8221;,\&\#8221;,g
+s,\&nbsp;\&\#8221;,\&\#8221;,g