diff options
| -rw-r--r-- | examples/chronicles.adoc | 4 | ||||
| -rw-r--r-- | examples/chronicles.pdf | bin | 191355 -> 193662 bytes | |||
| -rw-r--r-- | lib/asciidoctor-pdf/converter.rb | 46 | ||||
| -rw-r--r-- | lib/asciidoctor-pdf/core_ext/array.rb | 2 | ||||
| -rw-r--r-- | lib/asciidoctor-pdf/core_ext/ostruct.rb | 4 | ||||
| -rw-r--r-- | lib/asciidoctor-pdf/pdf_core_ext.rb | 2 | ||||
| -rw-r--r-- | lib/asciidoctor-pdf/pdf_core_ext/pdf_object.rb | 25 | ||||
| -rw-r--r-- | lib/asciidoctor-pdf/pdfmarks.rb | 14 | ||||
| -rw-r--r-- | lib/asciidoctor-pdf/prawn_ext/extensions.rb | 2 | ||||
| -rw-r--r-- | lib/asciidoctor-pdf/sanitizer.rb | 28 |
10 files changed, 88 insertions, 39 deletions
diff --git a/examples/chronicles.adoc b/examples/chronicles.adoc index d2cc82cb..b417a388 100644 --- a/examples/chronicles.adoc +++ b/examples/chronicles.adoc @@ -1,5 +1,5 @@ -= The Dangerous and Thrilling Documentation Chronicles -Kismet Chameleon; Lazarus het_Draeke += The Dangerous & _Thrilling_ Documentation Chronicles +Kismet Caméléon; Lazarus het_Draeke v1.0, 2014-01-01: The first incarnation of {doctitle} :description: This story chronicles the inexplicable hazards and vicious beasts a + team must surmount and vanquish on the journey to finding their open source + diff --git a/examples/chronicles.pdf b/examples/chronicles.pdf Binary files differindex d523e739..7564442e 100644 --- a/examples/chronicles.pdf +++ b/examples/chronicles.pdf diff --git a/lib/asciidoctor-pdf/converter.rb b/lib/asciidoctor-pdf/converter.rb index 1605ed67..1e134219 100644 --- a/lib/asciidoctor-pdf/converter.rb +++ b/lib/asciidoctor-pdf/converter.rb @@ -6,6 +6,8 @@ require 'prawn-svg' require 'prawn/table' require 'prawn/templates' require 'prawn/icon' +require_relative 'pdf_core_ext' +require_relative 'sanitizer' require_relative 'prawn_ext' require_relative 'pdfmarks' require_relative 'asciidoctor_ext' @@ -48,13 +50,6 @@ class Converter < ::Prawn::Document circle: (unicode_char 0x25e6), square: (unicode_char 0x25aa) } - BuiltInEntityChars = { - '<' => '<', - '>' => '>', - '&' => '&' - } - BuiltInEntityCharsRx = /(?:#{BuiltInEntityChars.keys * '|'})/ - XmlOrBuiltInEntityCharsRx = /(?:#{BuiltInEntityChars.keys * '|'}|<)/ ImageAttributeValueRx = /^image:{1,2}(.*?)\[(.*?)\]$/ def initialize backend, opts @@ -77,7 +72,7 @@ class Converter < ::Prawn::Document warn %(asciidoctor: WARNING: conversion missing in backend #{@backend} for #{name}) end # NOTE inline nodes generate pseudo-HTML strings; the remainder write directly to PDF object - (node.is_a? ::Asciidoctor::Inline) ? result : self + ::Asciidoctor::Inline === node ? result : self end def convert_content_for_block node, opts = {} @@ -225,23 +220,25 @@ class Converter < ::Prawn::Document pdf_opts end + # TODO create helper method for creating literal PDF string + # FIXME PdfMarks should use the PDF info result def build_pdf_info doc info = {} - # TODO create helper method for creating literal PDF string - info[:Title] = ::PDF::Core::LiteralString.new(doc.doctitle sanitize: true, use_fallback: true) + # FIXME use sanitize: :plain_text once available + info[:Title] = str2pdfval sanitize(doc.doctitle use_fallback: true) if doc.attr? 'authors' - info[:Author] = ::PDF::Core::LiteralString.new(doc.attr 'authors') + info[:Author] = str2pdfval(doc.attr 'authors') end if doc.attr? 'subject' - info[:Subject] = ::PDF::Core::LiteralString.new(doc.attr 'subject') + info[:Subject] = str2pdfval(doc.attr 'subject') end if doc.attr? 'keywords' - info[:Keywords] = ::PDF::Core::LiteralString.new(doc.attr 'keywords') + info[:Keywords] = str2pdfval(doc.attr 'keywords') end if (doc.attr? 'publisher') - info[:Producer] = ::PDF::Core::LiteralString.new(doc.attr 'publisher') + info[:Producer] = str2pdfval(doc.attr 'publisher') end - info[:Creator] = ::PDF::Core::LiteralString.new %(Asciidoctor PDF #{::Asciidoctor::Pdf::VERSION}, based on Prawn #{::Prawn::VERSION}) + info[:Creator] = str2pdfval %(Asciidoctor PDF #{::Asciidoctor::Pdf::VERSION}, based on Prawn #{::Prawn::VERSION}) info[:Producer] ||= (info[:Author] || info[:Creator]) # FIXME use docdate attribute info[:ModDate] = info[:CreationDate] = ::Time.now @@ -670,7 +667,7 @@ class Converter < ::Prawn::Document ((bounds.width - width) / 2.0).floor end keep_together do - svg IO.read(image_path), at: [left, cursor], width: width + svg ::IO.read(image_path), at: [left, cursor], width: width layout_caption node, position: :bottom if node.title? end else @@ -739,7 +736,7 @@ class Converter < ::Prawn::Document conum_mapping ? (restore_conums fragments, conum_mapping) : fragments else # NOTE only format if we detect a need - (source_string =~ XmlOrBuiltInEntityCharsRx) ? (text_formatter.format source_string) : [{ text: source_string }] + (source_string =~ BuiltInEntityCharOrTagRx) ? (text_formatter.format source_string) : [{ text: source_string }] end #move_down @theme.block_margin_top unless at_page_top? @@ -1393,7 +1390,8 @@ class Converter < ::Prawn::Document numbering_offset = front_matter_counter.to_i - 1 outline.define do - if (doctitle = (doc.doctitle sanitize: true, use_fallback: true)) + # FIXME use sanitize: :plain_text once available + if (doctitle = document.sanitize(doc.doctitle use_fallback: true)) page title: doctitle, destination: (document.dest_top 1) end if doc.attr? 'toc' @@ -1412,7 +1410,7 @@ class Converter < ::Prawn::Document # TODO only nest inside root node if doctype=article def add_outline_level outline, sections, num_levels, page_num_labels, numbering_offset, num_front_matter_pages sections.each do |sect| - sect_title = sanitize(sect.numbered_title formal: true) + sect_title = sanitize sect.numbered_title formal: true sect_destination = sect.attr 'destination' sect_page_num = (sect.attr 'page_start') - num_front_matter_pages page_num_labels[sect_page_num + numbering_offset] = { P: ::PDF::Core::LiteralString.new(sect_page_num.to_s) } @@ -1543,16 +1541,6 @@ class Converter < ::Prawn::Document string.gsub(IndentationRx) { NoBreakSpace * $&.length } end - # Remove all HTML tags and resolve all entities in a string - # FIXME add option to control escaping entities, or a filter mechanism in general - def sanitize string - string.gsub(/<[^>]+>/, '') - .gsub(/&#(\d{2,4});/) { [$1.to_i].pack('U*') } - .gsub('<', '<').gsub('>', '>').gsub('&', '&') - .tr_s(' ', ' ') - .strip - end - # QUESTION is this method still necessary? def resolve_imagesdir doc @imagesdir ||= begin diff --git a/lib/asciidoctor-pdf/core_ext/array.rb b/lib/asciidoctor-pdf/core_ext/array.rb index 3f716dfb..e1ac36d7 100644 --- a/lib/asciidoctor-pdf/core_ext/array.rb +++ b/lib/asciidoctor-pdf/core_ext/array.rb @@ -2,4 +2,4 @@ class Array def to_h Hash[to_a] end unless respond_to? :to_h -end +end if RUBY_VERSION < '2.1.0' diff --git a/lib/asciidoctor-pdf/core_ext/ostruct.rb b/lib/asciidoctor-pdf/core_ext/ostruct.rb index 597a3e6f..7b6fc4fe 100644 --- a/lib/asciidoctor-pdf/core_ext/ostruct.rb +++ b/lib/asciidoctor-pdf/core_ext/ostruct.rb @@ -1,9 +1,9 @@ class OpenStruct def [] key send key - end + end unless respond_to? :[] def []= key, val send %(#{key}=), val - end + end unless respond_to? :[]= end if RUBY_VERSION < '2.0.0' diff --git a/lib/asciidoctor-pdf/pdf_core_ext.rb b/lib/asciidoctor-pdf/pdf_core_ext.rb new file mode 100644 index 00000000..de453c70 --- /dev/null +++ b/lib/asciidoctor-pdf/pdf_core_ext.rb @@ -0,0 +1,2 @@ +# the following modules / classes are organized under the Asciidoctor::PdfCore namespace +require_relative 'pdf_core_ext/pdf_object' diff --git a/lib/asciidoctor-pdf/pdf_core_ext/pdf_object.rb b/lib/asciidoctor-pdf/pdf_core_ext/pdf_object.rb new file mode 100644 index 00000000..91f76114 --- /dev/null +++ b/lib/asciidoctor-pdf/pdf_core_ext/pdf_object.rb @@ -0,0 +1,25 @@ +module Asciidoctor +module PdfCore +module PdfObject + # Convert the string to a PDF literal string if it can be encoded as ASCII-8BIT. + # Otherwise, return the specified string. + #-- + # QUESTION mixin to String and NilClass as to_pdf_value? + def str2pdfval string + begin + ::PDF::Core::LiteralString.new(string.encode ::Encoding::ASCII_8BIT) + rescue + string + end + end + + # Convert the string to a PDF object, first attempting to + # convert it to a PDF literal string. + #-- + # QUESTION mixin to String and NilClass as to_pdf_object? + def str2pdfobj string + ::PDF::Core::PdfObject(str2pdfval string) + end +end +end +end diff --git a/lib/asciidoctor-pdf/pdfmarks.rb b/lib/asciidoctor-pdf/pdfmarks.rb index e6eb1e91..b15a73cb 100644 --- a/lib/asciidoctor-pdf/pdfmarks.rb +++ b/lib/asciidoctor-pdf/pdfmarks.rb @@ -1,6 +1,9 @@ module Asciidoctor module Pdf class Pdfmarks + include ::Asciidoctor::Pdf::Sanitizer + include ::Asciidoctor::PdfCore::PdfObject + def initialize doc @doc = doc end @@ -8,15 +11,16 @@ class Pdfmarks def generate current_datetime = ::DateTime.now.strftime '%Y%m%d%H%M%S' doc = @doc + # FIXME use sanitize: :plain_text once available content = <<-EOS -[ /Title (#{doc.doctitle sanitize: true, use_fallback: true}) - /Author (#{doc.attr 'authors'}) - /Subject (#{doc.attr 'subject'}) - /Keywords (#{doc.attr 'keywords'}) +[ /Title #{str2pdfobj sanitize(doc.doctitle use_fallback: true)} + /Author #{str2pdfobj(doc.attr 'authors')} + /Subject #{str2pdfobj(doc.attr 'subject')} + /Keywords #{str2pdfobj(doc.attr 'keywords')} /ModDate (D:#{current_datetime}) /CreationDate (D:#{current_datetime}) /Creator (Asciidoctor PDF #{::Asciidoctor::Pdf::VERSION}, based on Prawn #{::Prawn::VERSION}) - /Producer (#{doc.attr 'publisher'}) + /Producer #{str2pdfobj(doc.attr 'publisher')} /DOCINFO pdfmark EOS content diff --git a/lib/asciidoctor-pdf/prawn_ext/extensions.rb b/lib/asciidoctor-pdf/prawn_ext/extensions.rb index ab6d4fc1..cd1daf04 100644 --- a/lib/asciidoctor-pdf/prawn_ext/extensions.rb +++ b/lib/asciidoctor-pdf/prawn_ext/extensions.rb @@ -2,6 +2,8 @@ module Asciidoctor module Prawn module Extensions include ::Prawn::Measurements + include ::Asciidoctor::Pdf::Sanitizer + include ::Asciidoctor::PdfCore::PdfObject # - :height is the height of a line # - :leading is spacing between adjacent lines diff --git a/lib/asciidoctor-pdf/sanitizer.rb b/lib/asciidoctor-pdf/sanitizer.rb new file mode 100644 index 00000000..5b5d4ce1 --- /dev/null +++ b/lib/asciidoctor-pdf/sanitizer.rb @@ -0,0 +1,28 @@ +module Asciidoctor +module Pdf +module Sanitizer + BuiltInEntityChars = { + '<' => '<', + '>' => '>', + '&' => '&' + } + BuiltInEntityCharRx = /(?:#{BuiltInEntityChars.keys * '|'})/ + BuiltInEntityCharOrTagRx = /(?:#{BuiltInEntityChars.keys * '|'}|<)/ + NumericCharRefRx = /&#(\d{2,4});/ + XmlSanitizeRx = /<[^>]+>/ + + # Strip leading, trailing and repeating whitespace, remove XML tags and + # resolve all entities in the specified string. + # + # FIXME move to a module so we can mix it in elsewhere + # FIXME add option to control escaping entities, or a filter mechanism in general + def sanitize string + string.strip + .gsub(XmlSanitizeRx, '') + .tr_s(' ', ' ') + .gsub(NumericCharRefRx) { [$1.to_i].pack('U*') } + .gsub(BuiltInEntityCharRx, BuiltInEntityChars) + end +end +end +end |
