summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Allen <dan.j.allen@gmail.com>2019-10-29 15:51:35 -0600
committerGitHub <noreply@github.com>2019-10-29 15:51:35 -0600
commitfefd4f05d85d7f0af08b6199e60baedecaa53d69 (patch)
treeaacbbca5e60c8f9b22c2d2da0c55ee7242694aa0
parent1839e9dc5d181a44ad139651113890a8c82552b9 (diff)
resolves #1300 allow ranges of pages from PDF file to be imported using image macro (PR #1354)
-rw-r--r--CHANGELOG.adoc1
-rw-r--r--README.adoc25
-rw-r--r--lib/asciidoctor/pdf/converter.rb22
-rw-r--r--spec/image_spec.rb13
4 files changed, 53 insertions, 8 deletions
diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc
index bc669c10..8f38512a 100644
--- a/CHANGELOG.adoc
+++ b/CHANGELOG.adoc
@@ -15,6 +15,7 @@ For a detailed view of what has changed, refer to the {uri-repo}/commits/master[
* allow elements on title page to be disabled from theme using display: none (#1346)
* set chapter-title attribute to value of toc-title attribute on toc pages in book (#1338)
* set section-title attribute to value of toc-title attribute on toc pages in article if page has no other sections (#1338)
+* allow ranges of pages from PDF file to be imported using image macro as specified by pages attribute (#1300)
* set default footer content in base theme; remove logic to process `footer_<side>_content: none` key (#1320)
* include doctitle in outline for article when article is only a single page (#1322)
* allow custom (inline) role to control text decoration property (#1326)
diff --git a/README.adoc b/README.adoc
index ed3980a9..cbbfbcdc 100644
--- a/README.adoc
+++ b/README.adoc
@@ -647,25 +647,36 @@ To import the first page from a PDF file, use the block image macro with the PDF
image::custom-page.pdf[]
----
+The converter will insert the page from the PDF as a dedicated page that matches the size and layout of the page being imported (no matter where the block image occurs).
+Therefore, there's no need to put a manual page break (i.e., `<<<`) around the image macro.
+
By default, this macro will import the first page of the PDF.
To import a different page, specify it as a 1-based index using the `page` attribute.
[source,asciidoc]
----
-image::custom-page.pdf[page=2]
+image::custom-pages.pdf[page=2]
----
-To import multiple pages, you'll need to use multiple image macros, each specifying the page number to import.
+You can import multiple pages either using multiple image macros or using the `pages` attribute.
+The `pages` attribute accepts individual page numbers or page number ranges (two page numbers separated by `..`).
+The values can be separated either by commas or semi-colons.
+(The syntax is similar to the syntax uses for the `lines` attribute of the AsciiDoc include directive).
-CAUTION: An image macro that imports a PDF page should never be nested inside a delimited block or table.
-It should be a direct descendant of the document or a section.
-Otherwise, the behavior is unspecified.
+[source,asciidoc]
+----
+image::custom-pages.pdf[pages=3;1..2]
+----
-The converter will insert the page from the PDF as a dedicated page that matches the size and layout of the page being imported (no matter where the block image occurs).
-Therefore, there's no need to put a manual page break (i.e., `<<<`) around the image macro.
+Pages are imported in the order listed.
To see a practical example of how to use this feature, refer to the blog post https://fromplantoprototype.com/blog/2019/08/07/importing-pdf-pages-in-asciidoctor-pdf/[Importing PDF Pages in asciidoctor-pdf].
+CAUTION: An image macro used to imports PDF pages should never be nested inside a delimited block or table cell.
+It should be a direct descendant of the document or a section.
+That's because what it imports are entire pages.
+If it's used inside a delimited block or table cell, the behavior is unspecified.
+
== Crafting Interdocument Xrefs
This converter produces a single PDF file, which means content from multiple source documents get colocated into the same output file.
diff --git a/lib/asciidoctor/pdf/converter.rb b/lib/asciidoctor/pdf/converter.rb
index d87d758b..277467d0 100644
--- a/lib/asciidoctor/pdf/converter.rb
+++ b/lib/asciidoctor/pdf/converter.rb
@@ -1439,7 +1439,13 @@ class Converter < ::Prawn::Document
if ::File.readable? image_path
# NOTE import_page automatically advances to next page afterwards
# QUESTION should we add destination to top of imported page?
- import_page image_path, page: [(node.attr 'page').to_i, 1].max, replace: page.empty?
+ if (pgnums = node.attr 'pages', nil, false)
+ (resolve_pagenums pgnums).each_with_index do |pgnum, idx|
+ import_page image_path, page: pgnum, replace: (idx == 0 ? page.empty? : true)
+ end
+ else
+ import_page image_path, page: [(node.attr 'page', nil, 1).to_i, 1].max, replace: page.empty?
+ end
else
# QUESTION should we use alt text in this case?
logger.warn %(pdf to insert not found or not readable: #{image_path})
@@ -4209,6 +4215,20 @@ class Converter < ::Prawn::Document
end
end
+ def resolve_pagenums val
+ pgnums = []
+ ((val.include? ',') ? (val.split ',') : (val.split ';')).each do |entry|
+ if entry.include? '..'
+ from, _, to = entry.partition '..'
+ pgnums += ([from.to_i, 1].max..[to.to_i, 1].max).to_a
+ else
+ pgnums << entry.to_i
+ end
+ end
+
+ pgnums
+ end
+
def get_char code
(code.start_with? '\u') ? ([((code.slice 2, code.length).to_i 16)].pack 'U1') : code
end
diff --git a/spec/image_spec.rb b/spec/image_spec.rb
index ce3d3240..69c49aca 100644
--- a/spec/image_spec.rb
+++ b/spec/image_spec.rb
@@ -403,6 +403,19 @@ describe 'Asciidoctor::PDF::Converter - Image' do
p2_contents = pdf.objects[(pdf.page 2).page_object[:Contents][0]].data
(expect (p2_contents.split ?\n).slice 0, 3).to eql ['q', '/DeviceRGB cs', '0.0 1.0 0.0 scn']
end
+
+ it 'should insert all pages specified by pages attribute without leaving blank pages in between' do
+ pdf = to_pdf <<~'EOS'
+ image::red-green-blue.pdf[pages=3;1..2]
+ EOS
+ (expect pdf.pages).to have_size 3
+ p1_contents = pdf.objects[(pdf.page 1).page_object[:Contents][0]].data
+ (expect (p1_contents.split ?\n).slice 0, 3).to eql ['q', '/DeviceRGB cs', '0.0 0.0 1.0 scn']
+ p2_contents = pdf.objects[(pdf.page 2).page_object[:Contents][0]].data
+ (expect (p2_contents.split ?\n).slice 0, 3).to eql ['q', '/DeviceRGB cs', '1.0 0.0 0.0 scn']
+ p3_contents = pdf.objects[(pdf.page 3).page_object[:Contents][0]].data
+ (expect (p3_contents.split ?\n).slice 0, 3).to eql ['q', '/DeviceRGB cs', '0.0 1.0 0.0 scn']
+ end
end
context 'Data URI' do