summaryrefslogtreecommitdiff
path: root/script/unicodify.rb
diff options
context:
space:
mode:
authorCharlotte Koch <charlotte@magentastripe.com>2025-05-13 15:57:41 -0700
committerCharlotte Koch <charlotte@magentastripe.com>2025-05-13 15:57:41 -0700
commit09749920569e7426e028cc284404d076526b4bb1 (patch)
tree9bc679e6c041a4ae49722548e7d4b2daf3971ee8 /script/unicodify.rb
parentf5c65e7566b9ee4fb15e86d673ee684a3bee407c (diff)
WIL-6 WIP better unicode substitutionsbetter_unicode
Diffstat (limited to 'script/unicodify.rb')
-rw-r--r--script/unicodify.rb23
1 files changed, 23 insertions, 0 deletions
diff --git a/script/unicodify.rb b/script/unicodify.rb
new file mode 100644
index 0000000..aac8cbd
--- /dev/null
+++ b/script/unicodify.rb
@@ -0,0 +1,23 @@
+#
+# unicodify.rb
+# Charlotte Koch <charlotte@magentastripe.com>
+#
+# This file is part of Willora.
+#
+# This script translates HTML entities on the standard input to numerical
+# Unicode codepoints on the standard output. This script uses a whole bunch
+# of memory in order to keep it fast.
+#
+
+require 'json'
+
+entities = JSON.load(File.read("./private/entities.min.json"))
+
+out = $stdin.read
+
+entities.each do |entity, value|
+ result = value["codepoints"].map { |n| sprintf('&#%d;', n) }.join("")
+ out.gsub!(entity, result)
+end
+
+$stdout.write(out)