summaryrefslogtreecommitdiff
path: root/lib/asciidoctor/pdf/formatted_text/transform.rb
blob: 223c1335f40dca6e23b20df0a31a6344dcfcd22e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
# frozen_string_literal: true

module Asciidoctor
  module PDF
    module FormattedText
      class Transform
        include TextTransformer

        DummyText = ?\u0000
        ZeroWidthSpace = ?\u200b
        LF = ?\n
        InnerLF = LF + ZeroWidthSpace # without trailing character, use of fallback font can change line height
        DoubleSpace = '  '
        CharEntityTable = { amp: '&', apos: ?', gt: '>', lt: '<', nbsp: ?\u00a0, quot: '"' }
        CharRefRx = /&(?:(#{CharEntityTable.keys.join '|'})|#(?:(\d\d\d{0,4})|x(\h\h\h{0,3})));/
        HexColorRx = /^#\h\h\h\h{0,3}$/
        TextDecorationTable = { 'underline' => :underline, 'line-through' => :strikethrough }
        ThemeKeyToFragmentProperty = {
          'background_color' => :background_color,
          'border_color' => :border_color,
          'border_offset' => :border_offset,
          'border_radius' => :border_radius,
          'border_width' => :border_width,
          'font_color' => :color,
          'font_family' => :font,
          'font_size' => :size,
          'text_decoration_color' => :text_decoration_color,
          'text_decoration_width' => :text_decoration_width,
          'text_transform' => :text_transform,
        }

        def initialize options = {}
          @merge_adjacent_text_nodes = options[:merge_adjacent_text_nodes]
          # TODO: add support for character spacing
          if (theme = options[:theme])
            @theme_settings = {
              button: {
                color: theme.button_font_color,
                font: theme.button_font_family,
                size: theme.button_font_size,
                styles: (to_styles theme.button_font_style),
                background_color: (button_bg_color = theme.button_background_color),
                border_width: (button_border_width = theme.button_border_width),
                border_color: button_border_width && (theme.button_border_color || theme.base_border_color),
                border_offset: (button_border_offset = (button_bg_or_border = button_bg_color || button_border_width) && theme.button_border_offset),
                border_radius: button_bg_or_border && theme.button_border_radius,
                align: button_border_offset && :center,
                callback: button_bg_or_border && [TextBackgroundAndBorderRenderer],
              }.compact,
              code: {
                color: theme.codespan_font_color,
                font: theme.codespan_font_family,
                size: theme.codespan_font_size,
                styles: (to_styles theme.codespan_font_style),
                background_color: (mono_bg_color = theme.codespan_background_color),
                border_width: (mono_border_width = theme.codespan_border_width),
                border_color: mono_border_width && (theme.codespan_border_color || theme.base_border_color),
                border_offset: (mono_border_offset = (mono_bg_or_border = mono_bg_color || mono_border_width) && theme.codespan_border_offset),
                border_radius: mono_bg_or_border && theme.codespan_border_radius,
                align: mono_border_offset && :center,
                callback: mono_bg_or_border && [TextBackgroundAndBorderRenderer],
              }.compact,
              kbd: {
                color: theme.kbd_font_color,
                font: theme.kbd_font_family || theme.codespan_font_family,
                size: theme.kbd_font_size,
                styles: (to_styles theme.kbd_font_style),
                background_color: (kbd_bg_color = theme.kbd_background_color),
                border_width: (kbd_border_width = theme.kbd_border_width),
                border_color: kbd_border_width && (theme.kbd_border_color || theme.base_border_color),
                border_offset: (kbd_border_offset = (kbd_bg_or_border = kbd_bg_color || kbd_border_width) && theme.kbd_border_offset),
                border_radius: kbd_bg_or_border && theme.kbd_border_radius,
                align: kbd_border_offset && :center,
                callback: kbd_bg_or_border && [TextBackgroundAndBorderRenderer],
              }.compact,
              link: {
                color: theme.link_font_color,
                font: theme.link_font_family,
                size: theme.link_font_size,
                styles: (to_styles theme.link_font_style, theme.link_text_decoration),
                text_decoration_color: theme.link_text_decoration_color,
                text_decoration_width: theme.link_text_decoration_width,
                background_color: (link_bg_color = theme.link_background_color),
                border_offset: (link_border_offset = link_bg_color && theme.link_border_offset),
                align: link_border_offset && :center,
                callback: link_bg_color && [TextBackgroundAndBorderRenderer],
              }.compact,
              mark: {
                color: theme.mark_font_color,
                styles: (to_styles theme.mark_font_style),
                background_color: (mark_bg_color = theme.mark_background_color),
                border_offset: (mark_border_offset = mark_bg_color && theme.mark_border_offset),
                align: mark_border_offset && :center,
                callback: mark_bg_color && [TextBackgroundAndBorderRenderer],
              }.compact,
              menu: {
                color: theme.menu_font_color,
                font: theme.menu_font_family,
                size: theme.menu_font_size,
                styles: (to_styles theme.menu_font_style),
              }.compact,
            }
            @theme_settings.tap do |accum|
              roles_with_styles = [].to_set
              theme.each_pair do |key, val|
                next unless (key = key.to_s).start_with? 'role_'
                role, key = (key.slice 5, key.length).split '_', 2
                if (prop = ThemeKeyToFragmentProperty[key])
                  (accum[role] ||= {})[prop] = val
                  if key == 'border_width' && val && !(theme[%(role_#{role}_border_color)])
                    accum[role][:border_color] = theme.base_border_color
                  end
                #elsif key == 'font_kerning'
                #  unless (resolved_val = val == 'none' ? false : (val == 'normal' ? true : nil)).nil?
                #    (accum[role] ||= {})[:kerning] = resolved_val
                #  end
                elsif key == 'font_style' || key == 'text_decoration'
                  roles_with_styles << role
                end
              end
              roles_with_styles.each do |role|
                (accum[role] ||= {})[:styles] = to_styles theme[%(role_#{role}_font_style)], theme[%(role_#{role}_text_decoration)]
              end
            end
            @theme_settings['line-through'] = { styles: [:strikethrough].to_set } unless @theme_settings.key? 'line-through'
            @theme_settings['underline'] = { styles: [:underline].to_set } unless @theme_settings.key? 'underline'
            unless @theme_settings.key? 'big'
              if (base_font_size_large = theme.base_font_size_large)
                @theme_settings['big'] = { size: %(#{(base_font_size_large / theme.base_font_size.to_f).round 5}em) }
              else
                @theme_settings['big'] = { size: '1.1667em' }
              end
            end
            unless @theme_settings.key? 'small'
              if (base_font_size_small = theme.base_font_size_small)
                @theme_settings['small'] = { size: %(#{(base_font_size_small / theme.base_font_size.to_f).round 5}em) }
              else
                @theme_settings['small'] = { size: '0.8333em' }
              end
            end
          else
            @theme_settings = {
              button: { font: 'Courier', styles: [:bold].to_set },
              code: { font: 'Courier' },
              kbd: { font: 'Courier', styles: [:italic].to_set },
              link: { color: '0000FF' },
              mark: { background_color: 'FFFF00', callback: [TextBackgroundAndBorderRenderer] },
              menu: { styles: [:bold].to_set },
              'line-through' => { styles: [:strikethrough].to_set },
              'underline' => { styles: [:underline].to_set },
              'big' => { size: '1.667em' },
              'small' => { size: '0.8333em' },
            }
          end
        end

        def apply parsed, fragments = [], inherited = nil, normalize_space: nil
          previous_fragment_is_text = false
          previous_fragment_end_with_space = false
          last_node = parsed[-1]
          # NOTE: we use each since using inject is slower than a manual loop
          parsed.each do |node|
            case node[:type]
            when :element
              # case 1: non-void element
              if node.key? :pcdata
                # NOTE: skip element if it has no children
                unless (pcdata = node[:pcdata]).empty?
                  tag_name = node[:name]
                  attributes = node[:attributes]
                  fragment = build_fragment (clone_fragment inherited), tag_name, attributes
                  if tag_name == :a && pcdata[0][:value] == DummyText && pcdata.length == 1
                    fragment[:text] = DummyText
                    fragments << fragment
                  else
                    if (text_transform = fragment.delete :text_transform)
                      text = (text_chunks = extract_text pcdata).join
                      chars = (StringIO.new transform_text text, text_transform).each_char
                      restore_text pcdata, (text_chunks.each_with_object [] do |chunk, accum|
                        accum << chunk.length.times.map { chars.next }.join
                      end)
                    end
                    # NOTE: decorate child fragments with inherited properties from this element
                    apply pcdata, fragments, fragment, normalize_space: normalize_space
                    previous_fragment_end_with_space = false
                  end
                  previous_fragment_is_text = false
                end
              # case 2: void element
              else
                case node[:name]
                when :img
                  attributes = node[:attributes]
                  fragment = {
                    image_path: attributes[:src],
                    image_format: attributes[:format],
                    # a zero-width space in the text will cause the image to be duplicated
                    # NOTE: add enclosing square brackets here to avoid errors in parsing
                    text: %([#{attributes[:alt].delete ZeroWidthSpace}]),
                    object_id: node.object_id, # used to deduplicate if fragment gets split up
                  }
                  if inherited && (callback = inherited[:callback]) && (callback.include? TextBackgroundAndBorderRenderer)
                    # NOTE: if we keep InlineTextAligner, it needs to skip draw_text! for image fragment
                    fragment[:callback] = [TextBackgroundAndBorderRenderer, InlineImageRenderer]
                    fragment.update inherited.slice :border_color, :border_offset, :border_radius, :border_width, :background_color
                  else
                    fragment[:callback] = [InlineImageRenderer]
                  end
                  attributes[:class].split.each do |class_name|
                    next unless @theme_settings.key? class_name
                    update_fragment fragment, @theme_settings[class_name]
                    if fragment[:background_color] || (fragment[:border_color] && fragment[:border_width])
                      fragment[:callback] = [TextBackgroundAndBorderRenderer] | fragment[:callback]
                    end
                  end if attributes.key? :class
                  if inherited && (link = inherited[:link])
                    fragment[:link] = link
                  end
                  if (img_w = attributes[:width])
                    fragment[:image_width] = img_w
                  end
                  if (img_fit = attributes[:fit])
                    fragment[:image_fit] = img_fit
                  end
                  fragments << fragment
                  previous_fragment_is_text = previous_fragment_end_with_space = false
                else # :br
                  lf = node == last_node ? LF : InnerLF
                  text = @merge_adjacent_text_nodes && previous_fragment_is_text ? %(#{fragments.pop[:text]}#{lf}) : lf
                  fragments << (clone_fragment inherited, text: text)
                  previous_fragment_is_text = previous_fragment_end_with_space = true
                end
              end
            when :charref
              if (ref_type = node[:reference_type]) == :name
                text = CharEntityTable[node[:value]]
              elsif ref_type == :decimal
                # FIXME: AFM fonts do not include a thin space glyph; set fallback_fonts to allow glyph to be resolved
                text = [node[:value]].pack 'U1'
              else
                # FIXME: AFM fonts do not include a thin space glyph; set fallback_fonts to allow glyph to be resolved
                text = [(node[:value].to_i 16)].pack 'U1'
              end
              text = %(#{fragments.pop[:text]}#{text}) if @merge_adjacent_text_nodes && previous_fragment_is_text
              fragments << (clone_fragment inherited, text: text)
              previous_fragment_is_text = true
              previous_fragment_end_with_space = false
            else # :text
              preserve_space = inherited && inherited[:preserve_space]
              unless (text = previous_fragment_end_with_space && normalize_space && !preserve_space ? node[:value].lstrip : node[:value]).empty?
                text = %(#{fragments.pop[:text]}#{text}) if @merge_adjacent_text_nodes && previous_fragment_is_text
                if normalize_space && !preserve_space && (text.include? DoubleSpace)
                  text = text.tr_s ' ', ' '
                end
                fragments << (clone_fragment inherited, text: text)
                previous_fragment_is_text = true
                previous_fragment_end_with_space = text.end_with? ' '
              end
            end
          end
          fragments
        end

        private

        def build_fragment fragment, tag_name, attrs
          styles = (fragment[:styles] ||= ::Set.new)
          case tag_name
          when :strong
            styles << :bold
          when :em
            styles << :italic
          when :button, :code, :kbd, :mark, :menu
            update_fragment fragment, @theme_settings[tag_name]
          when :font
            if (value = attrs[:name])
              fragment[:font] = value
            end
            if (value = attrs[:size])
              if value.end_with? 'em'
                fragment[:size] = value unless value == '1em'
              else
                fragment[:size] = value.to_f
              end
            end
            # NOTE: width is used for font-based icons
            if (value = attrs[:width])
              fragment[:width] = value
              fragment[:align] = :center
            end
            if (value = attrs[:color])
              case value.chr
              when '#' # hex string (e.g., #FF0000)
                fragment[:color] = value.length == 7 ? (value.slice 1, 6) : (value.slice 1, 3).each_char.map {|c| c * 2 }.join if HexColorRx.match? value
              when '[' # CMYK array (e.g., [50, 100, 0, 0])
                fragment[:color] = [0, 0, 0, 0].tap do |accum|
                  (((value.slice 1, value.length).chomp ']').split ', ', 4).each_with_index do |it, idx|
                    accum[idx] = (ival = it.to_i) == (fval = it.to_f) ? ival : fval
                  end
                end
              else # assume a 6-character hex color (internal only)
                fragment[:color] = value
              end
            end
            #if (value = attrs[:character_spacing])
            #  fragment[:character_spacing] = value.to_f
            #end
          when :a
            visible = true
            # a element can have no attributes, so short-circuit if that's the case
            unless attrs.empty?
              # NOTE: href, anchor, and name are mutually exclusive; nesting is not supported
              if (value = attrs[:anchor])
                fragment[:anchor] = value
              elsif (value = attrs[:href])
                fragment[:link] = (value.include? ';') ? (value.gsub CharRefRx do
                  $1 ? CharEntityTable[$1.to_sym] : [$2 ? $2.to_i : ($3.to_i 16)].pack('U1')
                end) : value
              elsif (value = attrs[:id])
                # NOTE: text is null character, which is used as placeholder text so Prawn doesn't drop fragment
                new_fragment = { name: value, callback: [InlineDestinationMarker] }
                new_fragment[:wj] = true if fragment[:wj]
                if (type = attrs[:type])
                  new_fragment[:type] = type.to_sym
                end
                fragment = new_fragment
                visible = nil
              end
            end
            update_fragment fragment, @theme_settings[:link] if visible
          when :sub
            styles << :subscript
          when :sup
            styles << :superscript
          when :del
            styles << :strikethrough
          else # :span
            # NOTE: spaces in style value are superfluous for our purpose; split drops record after trailing ;
            attrs[:style].tr(' ', '').split(';').each do |style|
              pname, pvalue = style.split ':', 2
              case pname
              when 'color' # color needed to support syntax highlighters
                fragment[:color] = pvalue.length == 7 ? (pvalue.slice 1, 6) : (pvalue.slice 1, 3).each_char.map {|c| c * 2 }.join if (pvalue.start_with? '#') && (HexColorRx.match? pvalue)
              when 'font-weight'
                styles << :bold if pvalue == 'bold'
              when 'font-style'
                styles << :italic if pvalue == 'italic'
              when 'align', 'text-align'
                fragment[:align] = pvalue.to_sym
              when 'width'
                # NOTE: implicitly activates inline-block behavior
                fragment[:width] = pvalue
              when 'background-color' # background-color needed to support syntax highlighters
                if (pvalue.start_with? '#') && (HexColorRx.match? pvalue)
                  fragment[:background_color] = pvalue.length == 7 ? (pvalue.slice 1, 6) : (pvalue.slice 1, 3).each_char.map {|c| c * 2 }.join
                  # Q: is it possible that callback would already be set?
                  #fragment[:callback] = [TextBackgroundAndBorderRenderer] | (fragment[:callback] || [])
                  fragment[:callback] = [TextBackgroundAndBorderRenderer]
                end
              end
            end if attrs.key? :style
          end
          # TODO: we could limit to select tags, but doesn't seem to really affect performance
          attrs[:class].split.each do |class_name|
            fragment[:single_token] = true if class_name == 'nobreak' || class_name == 'nowrap'
            fragment[:wj] = true if class_name == 'wj'
            fragment[:preserve_space] = true if class_name == 'pre-wrap'
            next unless @theme_settings.key? class_name
            update_fragment fragment, @theme_settings[class_name]
            # NOTE: defer assignment of callback since we must look at combined styles of element and role
            if fragment[:background_color] || (fragment[:border_color] && fragment[:border_width])
              fragment[:callback] = [TextBackgroundAndBorderRenderer] | (fragment[:callback] || [])
              fragment[:align] = :center if fragment[:border_offset]
            end
          end if attrs.key? :class
          fragment.delete :styles if styles.empty?
          fragment[:callback] = (fragment[:callback] || []) | [InlineTextAligner] if fragment.key? :align
          fragment
        end

        def clone_fragment fragment, append = nil
          if fragment
            fragment = fragment.merge
            fragment[:styles] = fragment[:styles].dup if fragment.key? :styles
            fragment[:callback] = fragment[:callback].drop 0 if fragment.key? :callback
            append ? (fragment.update append) : fragment
          else
            append || {}
          end
        end

        def to_styles font_style, text_decoration = nil
          case font_style
          when 'bold'
            styles = [:bold].to_set
          when 'italic'
            styles = [:italic].to_set
          when 'bold_italic'
            styles = [:bold, :italic].to_set
          when 'normal_italic'
            styles = [:normal, :italic].to_set
          end
          if (style = TextDecorationTable[text_decoration])
            styles ? (styles << style) : [style].to_set
          else
            styles
          end
        end

        def update_fragment fragment, props
          fragment.update props do |k, oval, nval|
            case k
            when :styles
              if nval
                oval.subtract [:bold, :italic] if nval.delete? :normal
                oval.merge nval
              else
                oval.clear
              end
            #when :callback
            #  oval | nval
            else
              nval
            end
          end
        end

        def extract_text pcdata
          pcdata.reduce [] do |accum, it|
            case it[:type]
            when :text
              accum << it[:value]
            when :element
              accum += (extract_text it[:pcdata]) if it.key? :pcdata
            end
            accum
          end
        end

        def restore_text pcdata, text_chunks
          pcdata.each do |it|
            case it[:type]
            when :text
              it[:value] = text_chunks.shift
            when :element
              restore_text it[:pcdata], text_chunks if it.key? :pcdata
            end
          end
        end
      end
    end
  end
end