summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Allen <dan.j.allen@gmail.com>2023-06-22 23:54:31 -0600
committerGitHub <noreply@github.com>2023-06-22 23:54:31 -0600
commit46393dc39203ef0579d03c5cc965fc03270d6a80 (patch)
tree61edbe44a82290c59c3d15718884f38e988241ed
parent0c2f648cbdf1c4ea522b920e8dd6759adfc1d0e5 (diff)
resolves #2430 remove null character enclosed by XML tag when sanitizing text (PR #2431)
-rw-r--r--CHANGELOG.adoc1
-rw-r--r--lib/asciidoctor/pdf/sanitizer.rb4
-rw-r--r--spec/outline_spec.rb8
3 files changed, 9 insertions, 4 deletions
diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc
index 4cda737d..86f0eb3d 100644
--- a/CHANGELOG.adoc
+++ b/CHANGELOG.adoc
@@ -40,6 +40,7 @@ Improvements::
Bug Fixes::
+* remove null character enclosed in XML tag when santizing text; fixes invisible text in outline when heading contains index term (#2430)
* correctly map all icons from FontAwesome 4 (#2373)
* resolve remote image in document title or section title with autogenerated ID
* keep caret between items in menu macro with previous item if items wrap
diff --git a/lib/asciidoctor/pdf/sanitizer.rb b/lib/asciidoctor/pdf/sanitizer.rb
index 8c009ca9..e5574414 100644
--- a/lib/asciidoctor/pdf/sanitizer.rb
+++ b/lib/asciidoctor/pdf/sanitizer.rb
@@ -19,11 +19,11 @@ module Asciidoctor
'nbsp' => ' ',
'quot' => '"',
}).default = '?'
- SanitizeXMLRx = /<[^>]+>/
+ SanitizeXMLRx = /<[^>]+>\0?/
CharRefRx = /&(?:amp;)?(?:([a-z][a-z]+\d{0,2})|#(?:(\d\d\d{0,4})|x(\h\h\h{0,3})));/
UnescapedAmpersandRx = /&(?!(?:[a-z][a-z]+\d{0,2}|#(?:\d\d\d{0,4}|x\h\h\h{0,3}));)/
- # Strip leading, trailing and repeating whitespace, remove XML tags and
+ # Strip leading, trailing and repeating whitespace, remove XML tags along with an enclosed null character, and
# resolve all entities in the specified string.
#
# FIXME: move to a module so we can mix it in elsewhere
diff --git a/spec/outline_spec.rb b/spec/outline_spec.rb
index ace1107c..5b8e85a9 100644
--- a/spec/outline_spec.rb
+++ b/spec/outline_spec.rb
@@ -768,14 +768,18 @@ describe 'Asciidoctor::PDF::Converter - Outline' do
pdf = to_pdf <<~'END'
= _Document_ *Title*
:doctype: book
+ :sectnums:
== _First_ *Chapter*
+
+ == ((Wetland Birds))
END
outline = extract_outline pdf
- (expect outline).to have_size 2
+ (expect outline).to have_size 3
(expect outline[0][:title]).to eql 'Document Title'
- (expect outline[1][:title]).to eql 'First Chapter'
+ (expect outline[1][:title]).to eql 'Chapter 1. First Chapter'
+ (expect outline[2][:title]).to eql 'Chapter 2. Wetland Birds'
end
it 'should decode character references in entries' do