summaryrefslogtreecommitdiff
path: root/lib/asciidoctor/pdf/formatted_text/parser.treetop
blob: 561445f74155e177e828d789e20391db91705d4c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# frozen_string_literal: true
module Asciidoctor
module PDF
module FormattedText
grammar Markup
  rule text
    complex
  end

  rule complex
    (cdata / element / charref)* {
      def content
        elements.map {|e| e.content }
      end
    }
  end

  rule element
    # strict tag matching (costs a minor toll)
    # void_element / start_tag complex end_tag &{|seq| seq[0].name == seq[2].name } {

    void_element / start_tag complex end_tag {
      # NOTE: content only applies to non-void elements (second part of rule)
      def content
        { type: :element, name: (tag_element = elements[0]).name.to_sym, attributes: tag_element.attributes, pcdata: elements[1].content }
      end
    }
  end

  rule void_element
    '<' void_tag_name attributes (spaces? '/')? '>' {
      def content
        { type: :element, name: elements[1].text_value.to_sym, attributes: elements[2].content }
      end
    }
  end

  rule start_tag
    '<' tag_name attributes '>' {
      def name
        elements[1].text_value
      end

      def attributes
        elements[2].content
      end
    }
  end

  rule tag_name
    # QUESTION: faster to do regex?
    # QUESTION: what about supporting hr?
    'a' / 'strong' / 'em' / 'code' / 'font' / 'span' / 'button' / 'kbd' / 'sup' / 'sub' / 'mark' / 'menu' / 'del'
  end

  rule void_tag_name
    'br' / 'img'
  end

  rule attributes
    attribute* {
      def content
        attrs = {}
        elements.each {|e|
          attr_name, attr_val = e.content
          attrs[attr_name.to_sym] = attr_val
        }
        attrs
      end
    }
  end

  rule attribute
    spaces [a-z_]+ '=' '"' [^"]* '"' {
      def content
        [elements[1].text_value, elements[4].text_value]
      end
    }
  end

  rule end_tag
    '</' tag_name '>' {
      def name
        elements[1].text_value
      end
    }
  end

  rule cdata
    [^<&]+ {
      def content
        { type: :text, value: text_value }
      end
    }
  end

  rule charref
    '&' ('#' character_decimal / '#x' character_hex / character_name) ';' {
      def content
        if (ref_data = elements[1]).terminal?
          { type: :charref, reference_type: :name, value: ref_data.text_value.to_sym }
        elsif ref_data.elements[0].text_value == '#'
          { type: :charref, reference_type: :decimal, value: ref_data.elements[1].text_value.to_i }
        else
          { type: :charref, reference_type: :hex, value: ref_data.elements[1].text_value }
        end
      end
    }
  end

  rule character_decimal
    [0-9] 2..6
  end

  rule character_hex
    [0-9a-fA-F] 2..5
  end

  rule character_name
    'amp' / 'apos' / 'gt' / 'lt' / 'nbsp' / 'quot'
  end

  rule spaces
    ' '+
  end
end
end
end
end