diff options
| author | Charlotte Koch <charlotte@magentastripe.com> | 2025-04-06 13:04:30 -0700 |
|---|---|---|
| committer | Charlotte Koch <charlotte@magentastripe.com> | 2025-04-06 17:36:31 -0700 |
| commit | 31a6527fe830734191a7fc9ff78b6c0a5140688f (patch) | |
| tree | f13b03a3915980a14d0249ef1a4d76f7af724e0a /script/unicodify.sed | |
| parent | 4cbf1771db9a4703d8b9644dca3128f5acc9f485 (diff) | |
More nuanced way of handling ellipses
Diffstat (limited to 'script/unicodify.sed')
| -rw-r--r-- | script/unicodify.sed | 19 |
1 files changed, 13 insertions, 6 deletions
diff --git a/script/unicodify.sed b/script/unicodify.sed index 4668732..138003e 100644 --- a/script/unicodify.sed +++ b/script/unicodify.sed @@ -1,6 +1,12 @@ -# Remove spaces on either end of an em-dash or ellipsis. -s,[[:space:]]--[[:space:]],\&\#8212;,g -s,[[:space:]]\.\.\.[[:space:]],\&\#8230;,g +# Remove Asciidoc comments now, for the sake of getting more accurate +# wordcounts. +s,//.*,,g + +# Remove spaces on either end of an em-dash. +s,[[:space:]]*--[[:space:]]*,\&\#8212;,g + +# Remove spaces before an ellipsis, while ensuring one space after. +s,[[:space:]]*\.\.\.[[:space:]]*,\&\#8230;\ ,g # Explicitly handle curly double quotes before curly single quotes. s,"`,\&\#8220;,g @@ -14,6 +20,7 @@ s,\è,\&\#232;,g s,\é,\&\#233;,g s,\ï,\&\#239;,g -# Remove Asciidoc comments now, for the sake of getting more accurate -# wordcounts. -s,//.*,,g +# Remove spaces before a close-quote, which might have accidentally been +# introduced while converting ellipses earlier. +s,[[:space:]]*\&\#8221;,\&\#8221;,g +s,\ \&\#8221;,\&\#8221;,g |
