summaryrefslogtreecommitdiff
path: root/script/unicodify.sed
diff options
context:
space:
mode:
Diffstat (limited to 'script/unicodify.sed')
-rw-r--r--script/unicodify.sed19
1 files changed, 13 insertions, 6 deletions
diff --git a/script/unicodify.sed b/script/unicodify.sed
index 4668732..138003e 100644
--- a/script/unicodify.sed
+++ b/script/unicodify.sed
@@ -1,6 +1,12 @@
-# Remove spaces on either end of an em-dash or ellipsis.
-s,[[:space:]]--[[:space:]],\&\#8212;,g
-s,[[:space:]]\.\.\.[[:space:]],\&\#8230;,g
+# Remove Asciidoc comments now, for the sake of getting more accurate
+# wordcounts.
+s,//.*,,g
+
+# Remove spaces on either end of an em-dash.
+s,[[:space:]]*--[[:space:]]*,\&\#8212;,g
+
+# Remove spaces before an ellipsis, while ensuring one space after.
+s,[[:space:]]*\.\.\.[[:space:]]*,\&\#8230;\ ,g
# Explicitly handle curly double quotes before curly single quotes.
s,"`,\&\#8220;,g
@@ -14,6 +20,7 @@ s,\è,\&\#232;,g
s,\é,\&\#233;,g
s,\ï,\&\#239;,g
-# Remove Asciidoc comments now, for the sake of getting more accurate
-# wordcounts.
-s,//.*,,g
+# Remove spaces before a close-quote, which might have accidentally been
+# introduced while converting ellipses earlier.
+s,[[:space:]]*\&\#8221;,\&\#8221;,g
+s,\ \&\#8221;,\&\#8221;,g