10 files changed, 88 insertions, 39 deletions
diff --git a/examples/chronicles.adoc b/examples/chronicles.adoc
index d2cc82cb..b417a388 100644
--- a/examples/chronicles.adoc
+++ b/examples/chronicles.adoc
@@ -1,5 +1,5 @@
-= The Dangerous and Thrilling Documentation Chronicles
-Kismet Chameleon; Lazarus het_Draeke
+= The Dangerous & _Thrilling_ Documentation Chronicles
+Kismet Caméléon; Lazarus het_Draeke
 v1.0, 2014-01-01: The first incarnation of {doctitle}
 :description: This story chronicles the inexplicable hazards and vicious beasts a +
 team must surmount and vanquish on the journey to finding their open source +
diff --git a/examples/chronicles.pdf b/examples/chronicles.pdf
index d523e739..7564442e 100644
--- a/examples/chronicles.pdf
+++ b/examples/chronicles.pdf
diff --git a/lib/asciidoctor-pdf/converter.rb b/lib/asciidoctor-pdf/converter.rb
index 1605ed67..1e134219 100644
--- a/lib/asciidoctor-pdf/converter.rb
+++ b/lib/asciidoctor-pdf/converter.rb
@@ -6,6 +6,8 @@ require 'prawn-svg'
 require 'prawn/table'
 require 'prawn/templates'
 require 'prawn/icon'
+require_relative 'pdf_core_ext'
+require_relative 'sanitizer'
 require_relative 'prawn_ext'
 require_relative 'pdfmarks'
 require_relative 'asciidoctor_ext'
@@ -48,13 +50,6 @@ class Converter < ::Prawn::Document
     circle: (unicode_char 0x25e6),
     square: (unicode_char 0x25aa)
   }
-  BuiltInEntityChars = {
-    '&lt;' => '<',
-    '&gt;' => '>',
-    '&amp;' => '&'
-  }
-  BuiltInEntityCharsRx = /(?:#{BuiltInEntityChars.keys * '|'})/
-  XmlOrBuiltInEntityCharsRx = /(?:#{BuiltInEntityChars.keys * '|'}|<)/
   ImageAttributeValueRx = /^image:{1,2}(.*?)\[(.*?)\]$/
 
   def initialize backend, opts
@@ -77,7 +72,7 @@ class Converter < ::Prawn::Document
       warn %(asciidoctor: WARNING: conversion missing in backend #{@backend} for #{name})
     end
     # NOTE inline nodes generate pseudo-HTML strings; the remainder write directly to PDF object
-    (node.is_a? ::Asciidoctor::Inline) ? result : self
+    ::Asciidoctor::Inline === node ? result : self
   end
 
   def convert_content_for_block node, opts = {}
@@ -225,23 +220,25 @@ class Converter < ::Prawn::Document
     pdf_opts
   end
 
+  # TODO create helper method for creating literal PDF string
+  # FIXME PdfMarks should use the PDF info result
   def build_pdf_info doc
     info = {}
-    # TODO create helper method for creating literal PDF string
-    info[:Title] = ::PDF::Core::LiteralString.new(doc.doctitle sanitize: true, use_fallback: true)
+    # FIXME use sanitize: :plain_text once available
+    info[:Title] = str2pdfval sanitize(doc.doctitle use_fallback: true)
     if doc.attr? 'authors'
-      info[:Author] = ::PDF::Core::LiteralString.new(doc.attr 'authors')
+      info[:Author] = str2pdfval(doc.attr 'authors')
     end
     if doc.attr? 'subject'
-      info[:Subject] = ::PDF::Core::LiteralString.new(doc.attr 'subject')
+      info[:Subject] = str2pdfval(doc.attr 'subject')
     end
     if doc.attr? 'keywords'
-      info[:Keywords] = ::PDF::Core::LiteralString.new(doc.attr 'keywords')
+      info[:Keywords] = str2pdfval(doc.attr 'keywords')
     end
     if (doc.attr? 'publisher')
-      info[:Producer] = ::PDF::Core::LiteralString.new(doc.attr 'publisher')
+      info[:Producer] = str2pdfval(doc.attr 'publisher')
     end
-    info[:Creator] = ::PDF::Core::LiteralString.new %(Asciidoctor PDF #{::Asciidoctor::Pdf::VERSION}, based on Prawn #{::Prawn::VERSION})
+    info[:Creator] = str2pdfval %(Asciidoctor PDF #{::Asciidoctor::Pdf::VERSION}, based on Prawn #{::Prawn::VERSION})
     info[:Producer] ||= (info[:Author] || info[:Creator])
     # FIXME use docdate attribute
     info[:ModDate] = info[:CreationDate] = ::Time.now
@@ -670,7 +667,7 @@ class Converter < ::Prawn::Document
         ((bounds.width - width) / 2.0).floor
       end
       keep_together do
-        svg IO.read(image_path), at: [left, cursor], width: width
+        svg ::IO.read(image_path), at: [left, cursor], width: width
         layout_caption node, position: :bottom if node.title?
       end
     else
@@ -739,7 +736,7 @@ class Converter < ::Prawn::Document
       conum_mapping ? (restore_conums fragments, conum_mapping) : fragments
     else
       # NOTE only format if we detect a need
-      (source_string =~ XmlOrBuiltInEntityCharsRx) ? (text_formatter.format source_string) : [{ text: source_string }]
+      (source_string =~ BuiltInEntityCharOrTagRx) ? (text_formatter.format source_string) : [{ text: source_string }]
     end
 
     #move_down @theme.block_margin_top unless at_page_top?
@@ -1393,7 +1390,8 @@ class Converter < ::Prawn::Document
     numbering_offset = front_matter_counter.to_i - 1
 
     outline.define do
-      if (doctitle = (doc.doctitle sanitize: true, use_fallback: true))
+      # FIXME use sanitize: :plain_text once available
+      if (doctitle = document.sanitize(doc.doctitle use_fallback: true))
         page title: doctitle, destination: (document.dest_top 1)
       end
       if doc.attr? 'toc'
@@ -1412,7 +1410,7 @@ class Converter < ::Prawn::Document
   # TODO only nest inside root node if doctype=article
   def add_outline_level outline, sections, num_levels, page_num_labels, numbering_offset, num_front_matter_pages
     sections.each do |sect|
-      sect_title = sanitize(sect.numbered_title formal: true)
+      sect_title = sanitize sect.numbered_title formal: true
       sect_destination = sect.attr 'destination'
       sect_page_num = (sect.attr 'page_start') - num_front_matter_pages
       page_num_labels[sect_page_num + numbering_offset] = { P: ::PDF::Core::LiteralString.new(sect_page_num.to_s) }
@@ -1543,16 +1541,6 @@ class Converter < ::Prawn::Document
     string.gsub(IndentationRx) { NoBreakSpace * $&.length }
   end
 
-  # Remove all HTML tags and resolve all entities in a string
-  # FIXME add option to control escaping entities, or a filter mechanism in general
-  def sanitize string
-    string.gsub(/<[^>]+>/, '')
-        .gsub(/&#(\d{2,4});/) { [$1.to_i].pack('U*') }
-        .gsub('&lt;', '<').gsub('&gt;', '>').gsub('&amp;', '&')
-        .tr_s(' ', ' ')
-        .strip
-  end
-
   # QUESTION is this method still necessary?
   def resolve_imagesdir doc
     @imagesdir ||= begin
diff --git a/lib/asciidoctor-pdf/core_ext/array.rb b/lib/asciidoctor-pdf/core_ext/array.rb
index 3f716dfb..e1ac36d7 100644
--- a/lib/asciidoctor-pdf/core_ext/array.rb
+++ b/lib/asciidoctor-pdf/core_ext/array.rb
@@ -2,4 +2,4 @@ class Array
   def to_h
     Hash[to_a]
   end unless respond_to? :to_h
-end
+end if RUBY_VERSION < '2.1.0'
diff --git a/lib/asciidoctor-pdf/core_ext/ostruct.rb b/lib/asciidoctor-pdf/core_ext/ostruct.rb
index 597a3e6f..7b6fc4fe 100644
--- a/lib/asciidoctor-pdf/core_ext/ostruct.rb
+++ b/lib/asciidoctor-pdf/core_ext/ostruct.rb
@@ -1,9 +1,9 @@
 class OpenStruct
   def [] key
     send key
-  end
+  end unless respond_to? :[]
 
   def []= key, val
     send %(#{key}=), val
-  end
+  end unless respond_to? :[]=
 end if RUBY_VERSION < '2.0.0'
diff --git a/lib/asciidoctor-pdf/pdf_core_ext.rb b/lib/asciidoctor-pdf/pdf_core_ext.rb
new file mode 100644
index 00000000..de453c70
--- /dev/null
+++ b/lib/asciidoctor-pdf/pdf_core_ext.rb
@@ -0,0 +1,2 @@
+# the following modules / classes are organized under the Asciidoctor::PdfCore namespace
+require_relative 'pdf_core_ext/pdf_object'
diff --git a/lib/asciidoctor-pdf/pdf_core_ext/pdf_object.rb b/lib/asciidoctor-pdf/pdf_core_ext/pdf_object.rb
new file mode 100644
index 00000000..91f76114
--- /dev/null
+++ b/lib/asciidoctor-pdf/pdf_core_ext/pdf_object.rb
@@ -0,0 +1,25 @@
+module Asciidoctor
+module PdfCore
+module PdfObject
+  # Convert the string to a PDF literal string if it can be encoded as ASCII-8BIT.
+  # Otherwise, return the specified string.
+  #--
+  # QUESTION mixin to String and NilClass as to_pdf_value?
+  def str2pdfval string
+    begin
+      ::PDF::Core::LiteralString.new(string.encode ::Encoding::ASCII_8BIT)
+    rescue
+      string
+    end
+  end
+
+  # Convert the string to a PDF object, first attempting to
+  # convert it to a PDF literal string.
+  #--
+  # QUESTION mixin to String and NilClass as to_pdf_object?
+  def str2pdfobj string
+    ::PDF::Core::PdfObject(str2pdfval string)
+  end
+end
+end
+end
diff --git a/lib/asciidoctor-pdf/pdfmarks.rb b/lib/asciidoctor-pdf/pdfmarks.rb
index e6eb1e91..b15a73cb 100644
--- a/lib/asciidoctor-pdf/pdfmarks.rb
+++ b/lib/asciidoctor-pdf/pdfmarks.rb
@@ -1,6 +1,9 @@
 module Asciidoctor
 module Pdf
 class Pdfmarks
+  include ::Asciidoctor::Pdf::Sanitizer
+  include ::Asciidoctor::PdfCore::PdfObject
+
   def initialize doc
     @doc = doc
   end
@@ -8,15 +11,16 @@ class Pdfmarks
   def generate
     current_datetime = ::DateTime.now.strftime '%Y%m%d%H%M%S'
     doc = @doc
+    # FIXME use sanitize: :plain_text once available
     content = <<-EOS
-[ /Title (#{doc.doctitle sanitize: true, use_fallback: true})
-  /Author (#{doc.attr 'authors'})
-  /Subject (#{doc.attr 'subject'})
-  /Keywords (#{doc.attr 'keywords'})
+[ /Title #{str2pdfobj sanitize(doc.doctitle use_fallback: true)}
+  /Author #{str2pdfobj(doc.attr 'authors')}
+  /Subject #{str2pdfobj(doc.attr 'subject')}
+  /Keywords #{str2pdfobj(doc.attr 'keywords')}
   /ModDate (D:#{current_datetime})
   /CreationDate (D:#{current_datetime})
   /Creator (Asciidoctor PDF #{::Asciidoctor::Pdf::VERSION}, based on Prawn #{::Prawn::VERSION})
-  /Producer (#{doc.attr 'publisher'})
+  /Producer #{str2pdfobj(doc.attr 'publisher')}
   /DOCINFO pdfmark
     EOS
     content
diff --git a/lib/asciidoctor-pdf/prawn_ext/extensions.rb b/lib/asciidoctor-pdf/prawn_ext/extensions.rb
index ab6d4fc1..cd1daf04 100644
--- a/lib/asciidoctor-pdf/prawn_ext/extensions.rb
+++ b/lib/asciidoctor-pdf/prawn_ext/extensions.rb
@@ -2,6 +2,8 @@ module Asciidoctor
 module Prawn
 module Extensions
   include ::Prawn::Measurements
+  include ::Asciidoctor::Pdf::Sanitizer
+  include ::Asciidoctor::PdfCore::PdfObject
 
   # - :height is the height of a line
   # - :leading is spacing between adjacent lines
diff --git a/lib/asciidoctor-pdf/sanitizer.rb b/lib/asciidoctor-pdf/sanitizer.rb
new file mode 100644
index 00000000..5b5d4ce1
--- /dev/null
+++ b/lib/asciidoctor-pdf/sanitizer.rb
@@ -0,0 +1,28 @@
+module Asciidoctor
+module Pdf
+module Sanitizer
+  BuiltInEntityChars = {
+    '&lt;' => '<',
+    '&gt;' => '>',
+    '&amp;' => '&'
+  }
+  BuiltInEntityCharRx = /(?:#{BuiltInEntityChars.keys * '|'})/
+  BuiltInEntityCharOrTagRx = /(?:#{BuiltInEntityChars.keys * '|'}|<)/
+  NumericCharRefRx = /&#(\d{2,4});/
+  XmlSanitizeRx = /<[^>]+>/
+
+  # Strip leading, trailing and repeating whitespace, remove XML tags and
+  # resolve all entities in the specified string.
+  #
+  # FIXME move to a module so we can mix it in elsewhere
+  # FIXME add option to control escaping entities, or a filter mechanism in general
+  def sanitize string
+    string.strip
+        .gsub(XmlSanitizeRx, '')
+        .tr_s(' ', ' ')
+        .gsub(NumericCharRefRx) { [$1.to_i].pack('U*') }
+        .gsub(BuiltInEntityCharRx, BuiltInEntityChars)
+  end
+end
+end
+end