diff options
| author | Charlotte Koch <charlotte@magentastripe.com> | 2025-05-13 15:57:41 -0700 |
|---|---|---|
| committer | Charlotte Koch <charlotte@magentastripe.com> | 2025-05-13 15:57:41 -0700 |
| commit | 09749920569e7426e028cc284404d076526b4bb1 (patch) | |
| tree | 9bc679e6c041a4ae49722548e7d4b2daf3971ee8 /script/unicodify.rb | |
| parent | f5c65e7566b9ee4fb15e86d673ee684a3bee407c (diff) | |
WIL-6 WIP better unicode substitutionsbetter_unicode
Diffstat (limited to 'script/unicodify.rb')
| -rw-r--r-- | script/unicodify.rb | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/script/unicodify.rb b/script/unicodify.rb new file mode 100644 index 0000000..aac8cbd --- /dev/null +++ b/script/unicodify.rb @@ -0,0 +1,23 @@ +# +# unicodify.rb +# Charlotte Koch <charlotte@magentastripe.com> +# +# This file is part of Willora. +# +# This script translates HTML entities on the standard input to numerical +# Unicode codepoints on the standard output. This script uses a whole bunch +# of memory in order to keep it fast. +# + +require 'json' + +entities = JSON.load(File.read("./private/entities.min.json")) + +out = $stdin.read + +entities.each do |entity, value| + result = value["codepoints"].map { |n| sprintf('&#%d;', n) }.join("") + out.gsub!(entity, result) +end + +$stdout.write(out) |
