summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Allen <dan.j.allen@gmail.com>2018-04-17 00:41:39 -0600
committerDan Allen <dan.j.allen@gmail.com>2018-04-17 01:34:53 -0600
commitd55e2b936cdd4e1792828fb3c5c5d8cf0c2c63ca (patch)
tree2e66098ed2c04e4e7697ca0fb7d882ce344c3fb0
parentce927bdf045f6d176e0c173e67654786e7abbce9 (diff)
resolves #794 drop XML tags, character refs, and non-word chars when generating ID for section
- drop character refs and non-word chars (except for hyphen and space) instead of replacing with ID separator - drop XML tags (but not the contents of the tag) - replace spaces and hyphens with ID separator - optimize logic in Section.generate_id method
-rw-r--r--CHANGELOG.adoc1
-rw-r--r--features/xref.feature4
-rw-r--r--lib/asciidoctor.rb6
-rw-r--r--lib/asciidoctor/section.rb34
-rw-r--r--test/sections_test.rb42
5 files changed, 59 insertions, 28 deletions
diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc
index d8e1ca20..15cd4022 100644
--- a/CHANGELOG.adoc
+++ b/CHANGELOG.adoc
@@ -17,6 +17,7 @@ For a detailed view of what has changed, refer to the {uri-repo}/commits/master[
Enhancements::
+ * BREAKING: drop XML tags, character refs, and non-word characters (except hyphen and space) when generating ID for section (#794)
* route messages through a logger instead of using Kernel#warn (#44, PR #2660)
* add MemoryLogger for capturing messages sent to logger into memory (#44, PR #2660)
* add NullLogger to prevent messages from being logged (#44, PR #2660)
diff --git a/features/xref.feature b/features/xref.feature
index 1809668a..9a5974ee 100644
--- a/features/xref.feature
+++ b/features/xref.feature
@@ -669,14 +669,14 @@ Feature: Cross References
Then the result should match the HTML structure
"""
.sect1
- h2#_section_strong_one_strong
+ h2#_section_one
|Section <strong>One</strong>
.sectionbody: .paragraph: p content
.sect1
h2#_section_two Section Two
.sectionbody: .paragraph: p
|refer to
- a< href='#_section_strong_one_strong' Section <strong>One</strong>
+ a< href='#_section_one' Section <strong>One</strong>
"""
Scenario: Does not process a natural cross reference in compat mode
diff --git a/lib/asciidoctor.rb b/lib/asciidoctor.rb
index 5f92723d..3d7a3608 100644
--- a/lib/asciidoctor.rb
+++ b/lib/asciidoctor.rb
@@ -661,10 +661,10 @@ module Asciidoctor
#
InlineSectionAnchorRx = / (\\)?\[\[([#{CC_ALPHA}_:][#{CC_WORD}:.-]*)(?:, *(.+))?\]\]$/
- # Matches invalid characters in a section id.
+ # Matches invalid ID characters in a section title.
#
- # NOTE uppercase chars are not included since the expression is used on a lowercased string
- InvalidSectionIdCharsRx = /&(?:[a-z][a-z]+\d{0,2}|#\d\d\d{0,4}|#x[\da-f][\da-f][\da-f]{0,3});|[^#{CC_WORD}]+?/
+ # NOTE uppercase chars not included since expression is only run on a lowercase string
+ InvalidSectionIdCharsRx = /<[^>]+>|&(?:[a-z][a-z]+\d{0,2}|#\d\d\d{0,4}|#x[\da-f][\da-f][\da-f]{0,3});|[^ #{CC_WORD}\-]+?/
# Matches the block style used to designate a discrete (aka free-floating) heading.
#
diff --git a/lib/asciidoctor/section.rb b/lib/asciidoctor/section.rb
index 766deb40..7ce38209 100644
--- a/lib/asciidoctor/section.rb
+++ b/lib/asciidoctor/section.rb
@@ -169,31 +169,41 @@ class Section < AbstractBlock
# Public: Generate a String ID from the given section title.
#
# The generated ID is prefixed with value of the 'idprefix' attribute, which
- # is an underscore by default. Invalid characters are replaced with the
- # value of the 'idseparator' attribute, which is an underscore by default.
+ # is an underscore (_) by default. Invalid characters are then removed and
+ # spaces are replaced with the value of the 'idseparator' attribute, which is
+ # an underscore (_) by default.
#
- # If the generated ID is already in use in the document, a count is appended
- # until a unique id is found.
+ # If the generated ID is already in use in the document, a count is appended,
+ # offset by the separator, until a unique ID is found.
#
- # Section ID generation can be disabled by undefining the 'sectids' attribute.
+ # Section ID generation can be disabled by unsetting the 'sectids' document attribute.
#
# Examples
#
# Section.generate_id 'Foo', document
# => "_foo"
#
+ # Returns the generated [String] ID.
def self.generate_id title, document
attrs = document.attributes
+ pre = attrs['idprefix'] || '_'
if (sep = attrs['idseparator'])
- sep, sep_len = (attrs['idseparator'] = sep.chr), sep.length > 0 ? 1 : 0
+ if sep.length == 1
+ sep_sub = sep == '-' ? ' -' : %( #{sep}-)
+ elsif sep.empty?
+ no_sep = true
+ else
+ sep_sub = (sep = attrs['idseparator'] = sep.chr) == '-' ? ' -' : %( #{sep}-)
+ end
else
- sep, sep_len = '_', 1
+ sep, sep_sub = '_', ' _-'
end
- pre = attrs['idprefix'] || '_'
- gen_id = %(#{pre}#{title.downcase.gsub InvalidSectionIdCharsRx, sep})
- if sep_len > 0
- # remove repeat and trailing separator characters
- gen_id = gen_id.tr_s sep, sep
+ gen_id = %(#{pre}#{title.downcase.gsub InvalidSectionIdCharsRx, ''})
+ if no_sep
+ gen_id = gen_id.delete ' '
+ else
+ # replace space with separator and remove repeating and trailing separator characters
+ gen_id = gen_id.tr_s sep_sub, sep
gen_id = gen_id.chop if gen_id.end_with? sep
# ensure id doesn't begin with idseparator if idprefix is empty (assuming idseparator is not empty)
gen_id = gen_id.slice 1, gen_id.length if pre.empty? && (gen_id.start_with? sep)
diff --git a/test/sections_test.rb b/test/sections_test.rb
index 05120d69..3e60f5d9 100644
--- a/test/sections_test.rb
+++ b/test/sections_test.rb
@@ -11,12 +11,12 @@ context 'Sections' do
assert_equal '_section_one', sec.id
end
- test 'synthetic id replaces non-word characters with underscores' do
+ test 'synthetic id removes non-word characters' do
sec = block_from_string("== We're back!")
- assert_equal '_we_re_back', sec.id
+ assert_equal '_were_back', sec.id
end
- test 'synthetic id removes repeating underscores' do
+ test 'synthetic id removes repeating separators' do
sec = block_from_string('== Section $ One')
assert_equal '_section_one', sec.id
end
@@ -31,6 +31,21 @@ context 'Sections' do
assert_equal '_a_b', sec.id
end
+ test 'synthetic id removes XML tags' do
+ sec = block_from_string('== Use the `run` command to make it icon:gear[]')
+ assert_equal '_use_the_run_command_to_make_it_gear', sec.id
+ end
+
+ test 'synthetic id collapses repeating spaces' do
+ sec = block_from_string('== Go Far')
+ assert_equal '_go_far', sec.id
+ end
+
+ test 'synthetic id replaces hyphens with separator' do
+ sec = block_from_string('== State-of-the-art design')
+ assert_equal '_state_of_the_art_design', sec.id
+ end
+
test 'synthetic id prefix can be customized' do
sec = block_from_string(":idprefix: id_\n\n== Section One")
assert_equal 'id_section_one', sec.id
@@ -51,6 +66,11 @@ context 'Sections' do
assert_equal '_section-one', sec.id
end
+ test 'synthetic id separator can be hyphen and hyphens are preserved' do
+ sec = block_from_string(":idseparator: -\n\n== State-of-the-art design")
+ assert_equal '_state-of-the-art-design', sec.id
+ end
+
test 'synthetic id separator can only be one character' do
input = <<-EOS
:idseparator: -=-
@@ -481,11 +501,11 @@ endif::[]
end
test "with XML entity" do
- assert_xpath "//h2[@id='_where_s_the_love'][text() = \"Where#{decode_char 8217}s the love?\"]", render_string("== Where's the love?")
+ assert_xpath "//h2[@id='_whats_new'][text() = \"What#{decode_char 8217}s new?\"]", render_string("== What's new?")
end
test "with non-word character" do
- assert_xpath "//h2[@id='_where_s_the_love'][text() = \"Where’s the love?\"]", render_string("== Where’s the love?")
+ assert_xpath "//h2[@id='_whats_new'][text() = \"What’s new?\"]", render_string("== What’s new?")
end
test "with sequential non-word characters" do
@@ -2773,9 +2793,9 @@ content
assert_xpath '/*[@id="toc"]', output, 1
toc_links = xmlnodes_at_xpath '/*[@id="toc"]//li', output
assert_equal 3, toc_links.size
- toc_links.each do |toc_link|
- assert_equal 1, toc_link.inner_html.scan('<a').size
- end
+ assert_equal '<a href="#_section_one">Section One</a>', toc_links[0].inner_html
+ assert_equal '<a href="#_section_two">Section Two</a>', toc_links[1].inner_html
+ assert_equal '<a href="#_plant_trees_by_searching">Plant Trees by Searching</a>', toc_links[2].inner_html
end
test 'should not remove non-anchor tags from contents of entries in table of contents' do
@@ -2801,9 +2821,9 @@ content
assert_xpath '/*[@id="toc"]', output, 1
toc_links = xmlnodes_at_xpath '/*[@id="toc"]//li', output
assert_equal 3, toc_links.size
- assert_match(/^<a[^>]+><code>run<\/code> command<\/a>$/, toc_links[0].inner_html)
- assert_match(/^<a[^>]+><span class="icon"><i class="fa fa-bug"><\/i><\/span> Issues<\/a>$/, toc_links[1].inner_html)
- assert_match(/^<a[^>]+><em>Sustainable<\/em> Searches<\/a>/, toc_links[2].inner_html)
+ assert_equal '<a href="#_run_command"><code>run</code> command</a>', toc_links[0].inner_html
+ assert_equal '<a href="#_issues"><span class="icon"><i class="fa fa-bug"></i></span> Issues</a>', toc_links[1].inner_html
+ assert_equal '<a href="#_sustainable_searches"><em>Sustainable</em> Searches</a>', toc_links[2].inner_html
end
end