blob: 8367b6ac1bbd1a61186c145d756f93bcccafbab7 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
#
# The entities.json file from WHATWG contains a bunch of duplicates. In
# particular, I don't care about entities that aren't terminated with a
# semicolon -- i.e., Willora users MUST terminate HTML codes with a
# semicolon.
#
# This script massages entites.json into a smaller and (in my opinion)
# equivalent file.
#
# References:
#
# - WHATWG's table of named chars
# https://html.spec.whatwg.org/multipage/named-characters.html
#
# - URL for the entities.json itself
# https://html.spec.whatwg.org/entities.json
#
require 'json'
def semicoloned?(str)
return str[-1] == ";"
end
arg = ARGV.shift
entities = JSON.load(File.read(arg))
new_entities = entities.
select { |k, v| semicoloned?(k) }.
map { |k, v| v.delete("characters"); [k, v] }
puts JSON.generate(Hash[new_entities])
|