1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
# frozen_string_literal: true
module Asciidoctor
module PDF
module FormattedText
grammar Markup
rule text
complex
end
rule complex
(cdata / element / charref)* {
def content
elements.map {|e| e.content }
end
}
end
rule element
# strict tag matching (costs a minor toll)
# void_element / start_tag complex end_tag &{|seq| seq[0].name == seq[2].name } {
void_element / start_tag complex end_tag {
# NOTE: content only applies to non-void elements (second part of rule)
def content
{ type: :element, name: (tag_element = elements[0]).name.to_sym, attributes: tag_element.attributes, pcdata: elements[1].content }
end
}
end
rule void_element
'<' void_tag_name attributes (spaces? '/')? '>' {
def content
{ type: :element, name: elements[1].text_value.to_sym, attributes: elements[2].content }
end
}
end
rule start_tag
'<' tag_name attributes '>' {
def name
elements[1].text_value
end
def attributes
elements[2].content
end
}
end
rule tag_name
# QUESTION: faster to do regex?
# QUESTION: what about supporting hr?
'a' / 'strong' / 'em' / 'code' / 'font' / 'span' / 'button' / 'kbd' / 'sup' / 'sub' / 'mark' / 'menu' / 'del'
end
rule void_tag_name
'br' / 'img'
end
rule attributes
attribute* {
def content
attrs = {}
elements.each {|e|
attr_name, attr_val = e.content
attrs[attr_name.to_sym] = attr_val
}
attrs
end
}
end
rule attribute
spaces [a-z_]+ '=' '"' [^"]* '"' {
def content
[elements[1].text_value, elements[4].text_value]
end
}
end
rule end_tag
'</' tag_name '>' {
def name
elements[1].text_value
end
}
end
rule cdata
[^<&]+ {
def content
{ type: :text, value: text_value }
end
}
end
rule charref
'&' ('#' character_decimal / '#x' character_hex / character_name) ';' {
def content
if (ref_data = elements[1]).terminal?
{ type: :charref, reference_type: :name, value: ref_data.text_value.to_sym }
elsif ref_data.elements[0].text_value == '#'
{ type: :charref, reference_type: :decimal, value: ref_data.elements[1].text_value.to_i }
else
{ type: :charref, reference_type: :hex, value: ref_data.elements[1].text_value }
end
end
}
end
rule character_decimal
[0-9] 2..6
end
rule character_hex
[0-9a-fA-F] 2..5
end
rule character_name
'amp' / 'apos' / 'gt' / 'lt' / 'nbsp' / 'quot'
end
rule spaces
' '+
end
end
end
end
end
|