summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--examples/chronicles.adoc4
-rw-r--r--examples/chronicles.pdfbin191355 -> 193662 bytes
-rw-r--r--lib/asciidoctor-pdf/converter.rb46
-rw-r--r--lib/asciidoctor-pdf/core_ext/array.rb2
-rw-r--r--lib/asciidoctor-pdf/core_ext/ostruct.rb4
-rw-r--r--lib/asciidoctor-pdf/pdf_core_ext.rb2
-rw-r--r--lib/asciidoctor-pdf/pdf_core_ext/pdf_object.rb25
-rw-r--r--lib/asciidoctor-pdf/pdfmarks.rb14
-rw-r--r--lib/asciidoctor-pdf/prawn_ext/extensions.rb2
-rw-r--r--lib/asciidoctor-pdf/sanitizer.rb28
10 files changed, 88 insertions, 39 deletions
diff --git a/examples/chronicles.adoc b/examples/chronicles.adoc
index d2cc82cb..b417a388 100644
--- a/examples/chronicles.adoc
+++ b/examples/chronicles.adoc
@@ -1,5 +1,5 @@
-= The Dangerous and Thrilling Documentation Chronicles
-Kismet Chameleon; Lazarus het_Draeke
+= The Dangerous & _Thrilling_ Documentation Chronicles
+Kismet Caméléon; Lazarus het_Draeke
v1.0, 2014-01-01: The first incarnation of {doctitle}
:description: This story chronicles the inexplicable hazards and vicious beasts a +
team must surmount and vanquish on the journey to finding their open source +
diff --git a/examples/chronicles.pdf b/examples/chronicles.pdf
index d523e739..7564442e 100644
--- a/examples/chronicles.pdf
+++ b/examples/chronicles.pdf
Binary files differ
diff --git a/lib/asciidoctor-pdf/converter.rb b/lib/asciidoctor-pdf/converter.rb
index 1605ed67..1e134219 100644
--- a/lib/asciidoctor-pdf/converter.rb
+++ b/lib/asciidoctor-pdf/converter.rb
@@ -6,6 +6,8 @@ require 'prawn-svg'
require 'prawn/table'
require 'prawn/templates'
require 'prawn/icon'
+require_relative 'pdf_core_ext'
+require_relative 'sanitizer'
require_relative 'prawn_ext'
require_relative 'pdfmarks'
require_relative 'asciidoctor_ext'
@@ -48,13 +50,6 @@ class Converter < ::Prawn::Document
circle: (unicode_char 0x25e6),
square: (unicode_char 0x25aa)
}
- BuiltInEntityChars = {
- '&lt;' => '<',
- '&gt;' => '>',
- '&amp;' => '&'
- }
- BuiltInEntityCharsRx = /(?:#{BuiltInEntityChars.keys * '|'})/
- XmlOrBuiltInEntityCharsRx = /(?:#{BuiltInEntityChars.keys * '|'}|<)/
ImageAttributeValueRx = /^image:{1,2}(.*?)\[(.*?)\]$/
def initialize backend, opts
@@ -77,7 +72,7 @@ class Converter < ::Prawn::Document
warn %(asciidoctor: WARNING: conversion missing in backend #{@backend} for #{name})
end
# NOTE inline nodes generate pseudo-HTML strings; the remainder write directly to PDF object
- (node.is_a? ::Asciidoctor::Inline) ? result : self
+ ::Asciidoctor::Inline === node ? result : self
end
def convert_content_for_block node, opts = {}
@@ -225,23 +220,25 @@ class Converter < ::Prawn::Document
pdf_opts
end
+ # TODO create helper method for creating literal PDF string
+ # FIXME PdfMarks should use the PDF info result
def build_pdf_info doc
info = {}
- # TODO create helper method for creating literal PDF string
- info[:Title] = ::PDF::Core::LiteralString.new(doc.doctitle sanitize: true, use_fallback: true)
+ # FIXME use sanitize: :plain_text once available
+ info[:Title] = str2pdfval sanitize(doc.doctitle use_fallback: true)
if doc.attr? 'authors'
- info[:Author] = ::PDF::Core::LiteralString.new(doc.attr 'authors')
+ info[:Author] = str2pdfval(doc.attr 'authors')
end
if doc.attr? 'subject'
- info[:Subject] = ::PDF::Core::LiteralString.new(doc.attr 'subject')
+ info[:Subject] = str2pdfval(doc.attr 'subject')
end
if doc.attr? 'keywords'
- info[:Keywords] = ::PDF::Core::LiteralString.new(doc.attr 'keywords')
+ info[:Keywords] = str2pdfval(doc.attr 'keywords')
end
if (doc.attr? 'publisher')
- info[:Producer] = ::PDF::Core::LiteralString.new(doc.attr 'publisher')
+ info[:Producer] = str2pdfval(doc.attr 'publisher')
end
- info[:Creator] = ::PDF::Core::LiteralString.new %(Asciidoctor PDF #{::Asciidoctor::Pdf::VERSION}, based on Prawn #{::Prawn::VERSION})
+ info[:Creator] = str2pdfval %(Asciidoctor PDF #{::Asciidoctor::Pdf::VERSION}, based on Prawn #{::Prawn::VERSION})
info[:Producer] ||= (info[:Author] || info[:Creator])
# FIXME use docdate attribute
info[:ModDate] = info[:CreationDate] = ::Time.now
@@ -670,7 +667,7 @@ class Converter < ::Prawn::Document
((bounds.width - width) / 2.0).floor
end
keep_together do
- svg IO.read(image_path), at: [left, cursor], width: width
+ svg ::IO.read(image_path), at: [left, cursor], width: width
layout_caption node, position: :bottom if node.title?
end
else
@@ -739,7 +736,7 @@ class Converter < ::Prawn::Document
conum_mapping ? (restore_conums fragments, conum_mapping) : fragments
else
# NOTE only format if we detect a need
- (source_string =~ XmlOrBuiltInEntityCharsRx) ? (text_formatter.format source_string) : [{ text: source_string }]
+ (source_string =~ BuiltInEntityCharOrTagRx) ? (text_formatter.format source_string) : [{ text: source_string }]
end
#move_down @theme.block_margin_top unless at_page_top?
@@ -1393,7 +1390,8 @@ class Converter < ::Prawn::Document
numbering_offset = front_matter_counter.to_i - 1
outline.define do
- if (doctitle = (doc.doctitle sanitize: true, use_fallback: true))
+ # FIXME use sanitize: :plain_text once available
+ if (doctitle = document.sanitize(doc.doctitle use_fallback: true))
page title: doctitle, destination: (document.dest_top 1)
end
if doc.attr? 'toc'
@@ -1412,7 +1410,7 @@ class Converter < ::Prawn::Document
# TODO only nest inside root node if doctype=article
def add_outline_level outline, sections, num_levels, page_num_labels, numbering_offset, num_front_matter_pages
sections.each do |sect|
- sect_title = sanitize(sect.numbered_title formal: true)
+ sect_title = sanitize sect.numbered_title formal: true
sect_destination = sect.attr 'destination'
sect_page_num = (sect.attr 'page_start') - num_front_matter_pages
page_num_labels[sect_page_num + numbering_offset] = { P: ::PDF::Core::LiteralString.new(sect_page_num.to_s) }
@@ -1543,16 +1541,6 @@ class Converter < ::Prawn::Document
string.gsub(IndentationRx) { NoBreakSpace * $&.length }
end
- # Remove all HTML tags and resolve all entities in a string
- # FIXME add option to control escaping entities, or a filter mechanism in general
- def sanitize string
- string.gsub(/<[^>]+>/, '')
- .gsub(/&#(\d{2,4});/) { [$1.to_i].pack('U*') }
- .gsub('&lt;', '<').gsub('&gt;', '>').gsub('&amp;', '&')
- .tr_s(' ', ' ')
- .strip
- end
-
# QUESTION is this method still necessary?
def resolve_imagesdir doc
@imagesdir ||= begin
diff --git a/lib/asciidoctor-pdf/core_ext/array.rb b/lib/asciidoctor-pdf/core_ext/array.rb
index 3f716dfb..e1ac36d7 100644
--- a/lib/asciidoctor-pdf/core_ext/array.rb
+++ b/lib/asciidoctor-pdf/core_ext/array.rb
@@ -2,4 +2,4 @@ class Array
def to_h
Hash[to_a]
end unless respond_to? :to_h
-end
+end if RUBY_VERSION < '2.1.0'
diff --git a/lib/asciidoctor-pdf/core_ext/ostruct.rb b/lib/asciidoctor-pdf/core_ext/ostruct.rb
index 597a3e6f..7b6fc4fe 100644
--- a/lib/asciidoctor-pdf/core_ext/ostruct.rb
+++ b/lib/asciidoctor-pdf/core_ext/ostruct.rb
@@ -1,9 +1,9 @@
class OpenStruct
def [] key
send key
- end
+ end unless respond_to? :[]
def []= key, val
send %(#{key}=), val
- end
+ end unless respond_to? :[]=
end if RUBY_VERSION < '2.0.0'
diff --git a/lib/asciidoctor-pdf/pdf_core_ext.rb b/lib/asciidoctor-pdf/pdf_core_ext.rb
new file mode 100644
index 00000000..de453c70
--- /dev/null
+++ b/lib/asciidoctor-pdf/pdf_core_ext.rb
@@ -0,0 +1,2 @@
+# the following modules / classes are organized under the Asciidoctor::PdfCore namespace
+require_relative 'pdf_core_ext/pdf_object'
diff --git a/lib/asciidoctor-pdf/pdf_core_ext/pdf_object.rb b/lib/asciidoctor-pdf/pdf_core_ext/pdf_object.rb
new file mode 100644
index 00000000..91f76114
--- /dev/null
+++ b/lib/asciidoctor-pdf/pdf_core_ext/pdf_object.rb
@@ -0,0 +1,25 @@
+module Asciidoctor
+module PdfCore
+module PdfObject
+ # Convert the string to a PDF literal string if it can be encoded as ASCII-8BIT.
+ # Otherwise, return the specified string.
+ #--
+ # QUESTION mixin to String and NilClass as to_pdf_value?
+ def str2pdfval string
+ begin
+ ::PDF::Core::LiteralString.new(string.encode ::Encoding::ASCII_8BIT)
+ rescue
+ string
+ end
+ end
+
+ # Convert the string to a PDF object, first attempting to
+ # convert it to a PDF literal string.
+ #--
+ # QUESTION mixin to String and NilClass as to_pdf_object?
+ def str2pdfobj string
+ ::PDF::Core::PdfObject(str2pdfval string)
+ end
+end
+end
+end
diff --git a/lib/asciidoctor-pdf/pdfmarks.rb b/lib/asciidoctor-pdf/pdfmarks.rb
index e6eb1e91..b15a73cb 100644
--- a/lib/asciidoctor-pdf/pdfmarks.rb
+++ b/lib/asciidoctor-pdf/pdfmarks.rb
@@ -1,6 +1,9 @@
module Asciidoctor
module Pdf
class Pdfmarks
+ include ::Asciidoctor::Pdf::Sanitizer
+ include ::Asciidoctor::PdfCore::PdfObject
+
def initialize doc
@doc = doc
end
@@ -8,15 +11,16 @@ class Pdfmarks
def generate
current_datetime = ::DateTime.now.strftime '%Y%m%d%H%M%S'
doc = @doc
+ # FIXME use sanitize: :plain_text once available
content = <<-EOS
-[ /Title (#{doc.doctitle sanitize: true, use_fallback: true})
- /Author (#{doc.attr 'authors'})
- /Subject (#{doc.attr 'subject'})
- /Keywords (#{doc.attr 'keywords'})
+[ /Title #{str2pdfobj sanitize(doc.doctitle use_fallback: true)}
+ /Author #{str2pdfobj(doc.attr 'authors')}
+ /Subject #{str2pdfobj(doc.attr 'subject')}
+ /Keywords #{str2pdfobj(doc.attr 'keywords')}
/ModDate (D:#{current_datetime})
/CreationDate (D:#{current_datetime})
/Creator (Asciidoctor PDF #{::Asciidoctor::Pdf::VERSION}, based on Prawn #{::Prawn::VERSION})
- /Producer (#{doc.attr 'publisher'})
+ /Producer #{str2pdfobj(doc.attr 'publisher')}
/DOCINFO pdfmark
EOS
content
diff --git a/lib/asciidoctor-pdf/prawn_ext/extensions.rb b/lib/asciidoctor-pdf/prawn_ext/extensions.rb
index ab6d4fc1..cd1daf04 100644
--- a/lib/asciidoctor-pdf/prawn_ext/extensions.rb
+++ b/lib/asciidoctor-pdf/prawn_ext/extensions.rb
@@ -2,6 +2,8 @@ module Asciidoctor
module Prawn
module Extensions
include ::Prawn::Measurements
+ include ::Asciidoctor::Pdf::Sanitizer
+ include ::Asciidoctor::PdfCore::PdfObject
# - :height is the height of a line
# - :leading is spacing between adjacent lines
diff --git a/lib/asciidoctor-pdf/sanitizer.rb b/lib/asciidoctor-pdf/sanitizer.rb
new file mode 100644
index 00000000..5b5d4ce1
--- /dev/null
+++ b/lib/asciidoctor-pdf/sanitizer.rb
@@ -0,0 +1,28 @@
+module Asciidoctor
+module Pdf
+module Sanitizer
+ BuiltInEntityChars = {
+ '&lt;' => '<',
+ '&gt;' => '>',
+ '&amp;' => '&'
+ }
+ BuiltInEntityCharRx = /(?:#{BuiltInEntityChars.keys * '|'})/
+ BuiltInEntityCharOrTagRx = /(?:#{BuiltInEntityChars.keys * '|'}|<)/
+ NumericCharRefRx = /&#(\d{2,4});/
+ XmlSanitizeRx = /<[^>]+>/
+
+ # Strip leading, trailing and repeating whitespace, remove XML tags and
+ # resolve all entities in the specified string.
+ #
+ # FIXME move to a module so we can mix it in elsewhere
+ # FIXME add option to control escaping entities, or a filter mechanism in general
+ def sanitize string
+ string.strip
+ .gsub(XmlSanitizeRx, '')
+ .tr_s(' ', ' ')
+ .gsub(NumericCharRefRx) { [$1.to_i].pack('U*') }
+ .gsub(BuiltInEntityCharRx, BuiltInEntityChars)
+ end
+end
+end
+end