diff options
Diffstat (limited to 'script/unicodify.sed')
| -rw-r--r-- | script/unicodify.sed | 19 |
1 files changed, 13 insertions, 6 deletions
diff --git a/script/unicodify.sed b/script/unicodify.sed index 4668732..138003e 100644 --- a/script/unicodify.sed +++ b/script/unicodify.sed @@ -1,6 +1,12 @@ -# Remove spaces on either end of an em-dash or ellipsis. -s,[[:space:]]--[[:space:]],\&\#8212;,g -s,[[:space:]]\.\.\.[[:space:]],\&\#8230;,g +# Remove Asciidoc comments now, for the sake of getting more accurate +# wordcounts. +s,//.*,,g + +# Remove spaces on either end of an em-dash. +s,[[:space:]]*--[[:space:]]*,\&\#8212;,g + +# Remove spaces before an ellipsis, while ensuring one space after. +s,[[:space:]]*\.\.\.[[:space:]]*,\&\#8230;\ ,g # Explicitly handle curly double quotes before curly single quotes. s,"`,\&\#8220;,g @@ -14,6 +20,7 @@ s,\è,\&\#232;,g s,\é,\&\#233;,g s,\ï,\&\#239;,g -# Remove Asciidoc comments now, for the sake of getting more accurate -# wordcounts. -s,//.*,,g +# Remove spaces before a close-quote, which might have accidentally been +# introduced while converting ellipses earlier. +s,[[:space:]]*\&\#8221;,\&\#8221;,g +s,\ \&\#8221;,\&\#8221;,g |
