diff options
| author | Dan Allen <dan.j.allen@gmail.com> | 2018-04-17 00:41:39 -0600 |
|---|---|---|
| committer | Dan Allen <dan.j.allen@gmail.com> | 2018-04-17 01:34:53 -0600 |
| commit | d55e2b936cdd4e1792828fb3c5c5d8cf0c2c63ca (patch) | |
| tree | 2e66098ed2c04e4e7697ca0fb7d882ce344c3fb0 | |
| parent | ce927bdf045f6d176e0c173e67654786e7abbce9 (diff) | |
resolves #794 drop XML tags, character refs, and non-word chars when generating ID for section
- drop character refs and non-word chars (except for hyphen and space) instead of replacing with ID separator
- drop XML tags (but not the contents of the tag)
- replace spaces and hyphens with ID separator
- optimize logic in Section.generate_id method
| -rw-r--r-- | CHANGELOG.adoc | 1 | ||||
| -rw-r--r-- | features/xref.feature | 4 | ||||
| -rw-r--r-- | lib/asciidoctor.rb | 6 | ||||
| -rw-r--r-- | lib/asciidoctor/section.rb | 34 | ||||
| -rw-r--r-- | test/sections_test.rb | 42 |
5 files changed, 59 insertions, 28 deletions
diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc index d8e1ca20..15cd4022 100644 --- a/CHANGELOG.adoc +++ b/CHANGELOG.adoc @@ -17,6 +17,7 @@ For a detailed view of what has changed, refer to the {uri-repo}/commits/master[ Enhancements:: + * BREAKING: drop XML tags, character refs, and non-word characters (except hyphen and space) when generating ID for section (#794) * route messages through a logger instead of using Kernel#warn (#44, PR #2660) * add MemoryLogger for capturing messages sent to logger into memory (#44, PR #2660) * add NullLogger to prevent messages from being logged (#44, PR #2660) diff --git a/features/xref.feature b/features/xref.feature index 1809668a..9a5974ee 100644 --- a/features/xref.feature +++ b/features/xref.feature @@ -669,14 +669,14 @@ Feature: Cross References Then the result should match the HTML structure """ .sect1 - h2#_section_strong_one_strong + h2#_section_one |Section <strong>One</strong> .sectionbody: .paragraph: p content .sect1 h2#_section_two Section Two .sectionbody: .paragraph: p |refer to - a< href='#_section_strong_one_strong' Section <strong>One</strong> + a< href='#_section_one' Section <strong>One</strong> """ Scenario: Does not process a natural cross reference in compat mode diff --git a/lib/asciidoctor.rb b/lib/asciidoctor.rb index 5f92723d..3d7a3608 100644 --- a/lib/asciidoctor.rb +++ b/lib/asciidoctor.rb @@ -661,10 +661,10 @@ module Asciidoctor # InlineSectionAnchorRx = / (\\)?\[\[([#{CC_ALPHA}_:][#{CC_WORD}:.-]*)(?:, *(.+))?\]\]$/ - # Matches invalid characters in a section id. + # Matches invalid ID characters in a section title. # - # NOTE uppercase chars are not included since the expression is used on a lowercased string - InvalidSectionIdCharsRx = /&(?:[a-z][a-z]+\d{0,2}|#\d\d\d{0,4}|#x[\da-f][\da-f][\da-f]{0,3});|[^#{CC_WORD}]+?/ + # NOTE uppercase chars not included since expression is only run on a lowercase string + InvalidSectionIdCharsRx = /<[^>]+>|&(?:[a-z][a-z]+\d{0,2}|#\d\d\d{0,4}|#x[\da-f][\da-f][\da-f]{0,3});|[^ #{CC_WORD}\-]+?/ # Matches the block style used to designate a discrete (aka free-floating) heading. # diff --git a/lib/asciidoctor/section.rb b/lib/asciidoctor/section.rb index 766deb40..7ce38209 100644 --- a/lib/asciidoctor/section.rb +++ b/lib/asciidoctor/section.rb @@ -169,31 +169,41 @@ class Section < AbstractBlock # Public: Generate a String ID from the given section title. # # The generated ID is prefixed with value of the 'idprefix' attribute, which - # is an underscore by default. Invalid characters are replaced with the - # value of the 'idseparator' attribute, which is an underscore by default. + # is an underscore (_) by default. Invalid characters are then removed and + # spaces are replaced with the value of the 'idseparator' attribute, which is + # an underscore (_) by default. # - # If the generated ID is already in use in the document, a count is appended - # until a unique id is found. + # If the generated ID is already in use in the document, a count is appended, + # offset by the separator, until a unique ID is found. # - # Section ID generation can be disabled by undefining the 'sectids' attribute. + # Section ID generation can be disabled by unsetting the 'sectids' document attribute. # # Examples # # Section.generate_id 'Foo', document # => "_foo" # + # Returns the generated [String] ID. def self.generate_id title, document attrs = document.attributes + pre = attrs['idprefix'] || '_' if (sep = attrs['idseparator']) - sep, sep_len = (attrs['idseparator'] = sep.chr), sep.length > 0 ? 1 : 0 + if sep.length == 1 + sep_sub = sep == '-' ? ' -' : %( #{sep}-) + elsif sep.empty? + no_sep = true + else + sep_sub = (sep = attrs['idseparator'] = sep.chr) == '-' ? ' -' : %( #{sep}-) + end else - sep, sep_len = '_', 1 + sep, sep_sub = '_', ' _-' end - pre = attrs['idprefix'] || '_' - gen_id = %(#{pre}#{title.downcase.gsub InvalidSectionIdCharsRx, sep}) - if sep_len > 0 - # remove repeat and trailing separator characters - gen_id = gen_id.tr_s sep, sep + gen_id = %(#{pre}#{title.downcase.gsub InvalidSectionIdCharsRx, ''}) + if no_sep + gen_id = gen_id.delete ' ' + else + # replace space with separator and remove repeating and trailing separator characters + gen_id = gen_id.tr_s sep_sub, sep gen_id = gen_id.chop if gen_id.end_with? sep # ensure id doesn't begin with idseparator if idprefix is empty (assuming idseparator is not empty) gen_id = gen_id.slice 1, gen_id.length if pre.empty? && (gen_id.start_with? sep) diff --git a/test/sections_test.rb b/test/sections_test.rb index 05120d69..3e60f5d9 100644 --- a/test/sections_test.rb +++ b/test/sections_test.rb @@ -11,12 +11,12 @@ context 'Sections' do assert_equal '_section_one', sec.id end - test 'synthetic id replaces non-word characters with underscores' do + test 'synthetic id removes non-word characters' do sec = block_from_string("== We're back!") - assert_equal '_we_re_back', sec.id + assert_equal '_were_back', sec.id end - test 'synthetic id removes repeating underscores' do + test 'synthetic id removes repeating separators' do sec = block_from_string('== Section $ One') assert_equal '_section_one', sec.id end @@ -31,6 +31,21 @@ context 'Sections' do assert_equal '_a_b', sec.id end + test 'synthetic id removes XML tags' do + sec = block_from_string('== Use the `run` command to make it icon:gear[]') + assert_equal '_use_the_run_command_to_make_it_gear', sec.id + end + + test 'synthetic id collapses repeating spaces' do + sec = block_from_string('== Go Far') + assert_equal '_go_far', sec.id + end + + test 'synthetic id replaces hyphens with separator' do + sec = block_from_string('== State-of-the-art design') + assert_equal '_state_of_the_art_design', sec.id + end + test 'synthetic id prefix can be customized' do sec = block_from_string(":idprefix: id_\n\n== Section One") assert_equal 'id_section_one', sec.id @@ -51,6 +66,11 @@ context 'Sections' do assert_equal '_section-one', sec.id end + test 'synthetic id separator can be hyphen and hyphens are preserved' do + sec = block_from_string(":idseparator: -\n\n== State-of-the-art design") + assert_equal '_state-of-the-art-design', sec.id + end + test 'synthetic id separator can only be one character' do input = <<-EOS :idseparator: -=- @@ -481,11 +501,11 @@ endif::[] end test "with XML entity" do - assert_xpath "//h2[@id='_where_s_the_love'][text() = \"Where#{decode_char 8217}s the love?\"]", render_string("== Where's the love?") + assert_xpath "//h2[@id='_whats_new'][text() = \"What#{decode_char 8217}s new?\"]", render_string("== What's new?") end test "with non-word character" do - assert_xpath "//h2[@id='_where_s_the_love'][text() = \"Where’s the love?\"]", render_string("== Where’s the love?") + assert_xpath "//h2[@id='_whats_new'][text() = \"What’s new?\"]", render_string("== What’s new?") end test "with sequential non-word characters" do @@ -2773,9 +2793,9 @@ content assert_xpath '/*[@id="toc"]', output, 1 toc_links = xmlnodes_at_xpath '/*[@id="toc"]//li', output assert_equal 3, toc_links.size - toc_links.each do |toc_link| - assert_equal 1, toc_link.inner_html.scan('<a').size - end + assert_equal '<a href="#_section_one">Section One</a>', toc_links[0].inner_html + assert_equal '<a href="#_section_two">Section Two</a>', toc_links[1].inner_html + assert_equal '<a href="#_plant_trees_by_searching">Plant Trees by Searching</a>', toc_links[2].inner_html end test 'should not remove non-anchor tags from contents of entries in table of contents' do @@ -2801,9 +2821,9 @@ content assert_xpath '/*[@id="toc"]', output, 1 toc_links = xmlnodes_at_xpath '/*[@id="toc"]//li', output assert_equal 3, toc_links.size - assert_match(/^<a[^>]+><code>run<\/code> command<\/a>$/, toc_links[0].inner_html) - assert_match(/^<a[^>]+><span class="icon"><i class="fa fa-bug"><\/i><\/span> Issues<\/a>$/, toc_links[1].inner_html) - assert_match(/^<a[^>]+><em>Sustainable<\/em> Searches<\/a>/, toc_links[2].inner_html) + assert_equal '<a href="#_run_command"><code>run</code> command</a>', toc_links[0].inner_html + assert_equal '<a href="#_issues"><span class="icon"><i class="fa fa-bug"></i></span> Issues</a>', toc_links[1].inner_html + assert_equal '<a href="#_sustainable_searches"><em>Sustainable</em> Searches</a>', toc_links[2].inner_html end end |
