Merge pull request #951 from mojavelinux/issue-892

resolves #892 - match word characters defined by Unicode
author: Dan Allen <dan.j.allen@gmail.com> 2014-05-16 03:32:05 -0600
committer: Dan Allen <dan.j.allen@gmail.com> 2014-05-16 03:32:05 -0600
commit: cddfc7b0ae26db4b355bae276247fa04766fdc5f (patch)
tree: 395466ec8d9d4fd1d4087c60841441a6999e84d3
parent: 686e4237a14f1923818ac956a95b4396bb47156b (diff)
parent: a1b17518d6f699ed6cb55f244dc7385a1a4bb845 (diff)
6 files changed, 212 insertions, 104 deletions
diff --git a/README.adoc b/README.adoc
index d61be259..388cf0ee 100644
--- a/README.adoc
+++ b/README.adoc
@@ -567,9 +567,9 @@ To run all the tests, simply execute +rake+:
 
  $ rake
 
-If you want to run a single test file, you can use +testrb+:
+If you want to run a single test file, you can use +ruby+:
 
- $ testrb test/blocks_test.rb
+ $ ruby test/blocks_test.rb
 
 To test a single test case, first add the string "wip" to the beginning of the description.
 For example:
@@ -579,9 +579,9 @@ test 'wip should render ...' do
   ...
 end
 
-Then, run +testrb+ again, but this time pass a selector argument so it finds matching tests:
+Then, run +ruby+ again, but this time pass a selector argument so it finds matching tests:
 
- $ testrb test/blocks_test.rb -n /wip/
+ $ ruby test/blocks_test.rb -n /wip/
 
 Once you are done with your test, make sure to remove "wip" from the description and run all the tests again using +rake+.
 
diff --git a/lib/asciidoctor.rb b/lib/asciidoctor.rb
index 633c3090..1ea10e0d 100644
--- a/lib/asciidoctor.rb
+++ b/lib/asciidoctor.rb
@@ -2,6 +2,7 @@ RUBY_ENGINE = 'unknown' unless defined? RUBY_ENGINE
 RUBY_ENGINE_OPAL = (RUBY_ENGINE == 'opal')
 RUBY_ENGINE_JRUBY = (RUBY_ENGINE == 'jruby')
 RUBY_MIN_VERSION_1_9 = (RUBY_VERSION >= '1.9')
+RUBY_MIN_VERSION_2 = (RUBY_VERSION >= '2')
 
 require 'set'
 
@@ -51,6 +52,9 @@ $:.unshift File.dirname __FILE__
 #
 module Asciidoctor
 
+  # alias the RUBY_ENGINE constant inside the Asciidoctor namespace
+  RUBY_ENGINE = ::RUBY_ENGINE
+
   module SafeMode
 
     # A safe mode level that disables any of the security features enforced
@@ -334,22 +338,46 @@ module Asciidoctor
   #(pseudo)module Rx
 
     ## Regular expression character classes (to ensure regexp compatibility between Ruby and JavaScript)
+    ## CC stands for "character class", CG stands for "character class group"
+
+    # NOTE \w matches only the ASCII word characters, whereas [[:word:]] or \p{Word} matches any character in the Unicode word category.
 
-    # character classes for JavaScript Regexp engine
-    # NOTE use of double quotes are intentional to work around Opal issue
+    # character classes for the Regexp engine(s) in JavaScript
     if RUBY_ENGINE == 'opal'
       CC_ALPHA = 'a-zA-Z'
+      CG_ALPHA = '[a-zA-Z]'
       CC_ALNUM = 'a-zA-Z0-9'
-      CC_BLANK = "[ \\t]"
-      CC_GRAPH = '[\x21-\x7E]' # non-blank character; broken in Opal!
-      CC_EOL   = "(?=\\n|$)"
-    # character classes for Ruby Regexp engine
+      CG_ALNUM = '[a-zA-Z0-9]'
+      CG_BLANK = '[ \\t]'
+      CC_EOL   = '(?=\\n|$)'
+      CG_GRAPH = '[\\x21-\\x7E]' # non-blank character
+      CC_WORD  = 'a-zA-Z0-9_'
+      CG_WORD  = '[a-zA-Z0-9_]'
+    # character classes for the Regexp engine in Ruby >= 2 (Ruby 1.9 supports \p{} but has problems w/ encoding)
+    elsif ::RUBY_MIN_VERSION_2
+      CC_ALPHA = CG_ALPHA = '\p{Alpha}'
+      CC_ALNUM = CG_ALNUM = '\p{Alnum}'
+      CG_BLANK = '\p{Blank}'
+      CC_EOL   = '$'
+      CG_GRAPH = '\p{Graph}'
+      CC_WORD  = CG_WORD = '\p{Word}'
+    # character classes for the Regexp engine in Ruby < 2
     else
       CC_ALPHA = '[:alpha:]'
+      CG_ALPHA = '[[:alpha:]]'
       CC_ALNUM = '[:alnum:]'
-      CC_BLANK = '[[:blank:]]'
-      CC_GRAPH = '[[:graph:]]' # non-blank character
+      CG_ALNUM = '[[:alnum:]]'
+      CG_BLANK = '[[:blank:]]'
       CC_EOL   = '$'
+      CG_GRAPH = '[[:graph:]]' # non-blank character
+      if ::RUBY_MIN_VERSION_1_9
+        CC_WORD = '[:word:]'
+        CG_WORD = '[[:word:]]'
+      else
+        # NOTE Ruby 1.8 cannot match word characters beyond the ASCII range; if you need this feature, upgrade!
+        CC_WORD = '[:alnum:]_'
+        CG_WORD = '[[:alnum:]_]'
+      end
     end
 
     ## Document header
@@ -359,8 +387,9 @@ module Asciidoctor
     # Examples
     #
     #   Doc Writer <doc@example.com>
+    #   Mary_Sue Brontë
     #
-    AuthorInfoLineRx = /^(\w[\w\-'.]*)(?: +(\w[\w\-'.]*))?(?: +(\w[\w\-'.]*))?(?: +<([^>]+)>)?$/
+    AuthorInfoLineRx = /^(#{CG_WORD}[#{CC_WORD}\-'.]*)(?: +(#{CG_WORD}[#{CC_WORD}\-'.]*))?(?: +(#{CG_WORD}[#{CC_WORD}\-'.]*))?(?: +<([^>]+)>)?$/
 
     # Matches the revision info line, which appears immediately following
     # the author info line beneath the document title.
@@ -385,7 +414,7 @@ module Asciidoctor
     #
     #   asciidoctor - converts AsciiDoc source files to HTML, DocBook and other formats
     #
-    ManpageNamePurposeRx = /^(.*?)#{CC_BLANK}+-#{CC_BLANK}+(.*)$/
+    ManpageNamePurposeRx = /^(.*?)#{CG_BLANK}+-#{CG_BLANK}+(.*)$/
 
     ## Preprocessor directives
 
@@ -409,9 +438,7 @@ module Asciidoctor
     #
     #   "{asciidoctor-version}" >= "0.1.0"
     #
-    EvalExpressionRx = /^(\S.*?)#{CC_BLANK}*(==|!=|<=|>=|<|>)#{CC_BLANK}*(\S.*)$/
-    # ...or if we want to be more strict up front about what's on each side
-    # EvalExpressionRx = /^(true|false|("|'|)\{\w+(?:\-\w+)*\}\2|("|')[^\3]*\3|\-?\d+(?:\.\d+)*)#{CC_BLANK}*(==|!=|<=|>=|<|>)#{CC_BLANK}*(true|false|("|'|)\{\w+(?:\-\w+)*\}\6|("|')[^\7]*\7|\-?\d+(?:\.\d+)*)$/
+    EvalExpressionRx = /^(\S.*?)#{CG_BLANK}*(==|!=|<=|>=|<|>)#{CG_BLANK}*(\S.*)$/
 
     # Matches an include preprocessor directive.
     #
@@ -437,7 +464,7 @@ module Asciidoctor
     #                collapsing the line breaks and indentation to
     #                a single space.
     #
-    AttributeEntryRx = /^:(!?\w.*?):(?:#{CC_BLANK}+(.*))?$/
+    AttributeEntryRx = /^:(!?\w.*?):(?:#{CG_BLANK}+(.*))?$/
 
     # Matches invalid characters in an attribute name.
     InvalidAttributeNameCharsRx = /[^\w\-]/
@@ -470,7 +497,7 @@ module Asciidoctor
     #   [[idname]]
     #   [[idname,Reference Text]]
     #
-    BlockAnchorRx = /^\[\[(?:|([#{CC_ALPHA}:_][\w:.-]*)(?:,#{CC_BLANK}*(\S.*))?)\]\]$/
+    BlockAnchorRx = /^\[\[(?:|([#{CC_ALPHA}:_][#{CC_WORD}:.-]*)(?:,#{CG_BLANK}*(\S.*))?)\]\]$/
 
     # Matches an attribute list above a block element.
     #
@@ -485,12 +512,12 @@ module Asciidoctor
     #   # as attribute reference
     #   [{lead}]
     #
-    BlockAttributeListRx = /^\[(|#{CC_BLANK}*[\w\{,.#"'%].*)\]$/
+    BlockAttributeListRx = /^\[(|#{CG_BLANK}*[#{CC_WORD}\{,.#"'%].*)\]$/
 
     # A combined pattern that matches either a block anchor or a block attribute list.
     #
     # TODO this one gets hit a lot, should be optimized as much as possible
-    BlockAttributeLineRx = /^\[(|#{CC_BLANK}*[\w\{,.#"'%].*|\[(?:|[#{CC_ALPHA}:_][\w:.-]*(?:,#{CC_BLANK}*\S.*)?)\])\]$/
+    BlockAttributeLineRx = /^\[(|#{CG_BLANK}*[#{CC_WORD}\{,.#"'%].*|\[(?:|[#{CC_ALPHA}:_][#{CC_WORD}:.-]*(?:,#{CG_BLANK}*\S.*)?)\])\]$/
 
     # Matches a title above a block.
     #
@@ -507,7 +534,7 @@ module Asciidoctor
     #   NOTE: Just a little note.
     #   TIP: Don't forget!
     #
-    AdmonitionParagraphRx = /^(#{ADMONITION_STYLES.to_a * '|'}):#{CC_BLANK}/
+    AdmonitionParagraphRx = /^(#{ADMONITION_STYLES.to_a * '|'}):#{CG_BLANK}/
 
     # Matches a literal paragraph, which is a line of text preceded by at least one space.
     #
@@ -515,7 +542,7 @@ module Asciidoctor
     #
     #   <SPACE>Foo
     #   <TAB>Foo
-    LiteralParagraphRx = /^(#{CC_BLANK}+.*)$/
+    LiteralParagraphRx = /^(#{CG_BLANK}+.*)$/
 
     # Matches a comment block.
     #
@@ -550,11 +577,11 @@ module Asciidoctor
     # match[1] is the delimiter, whose length determines the level
     # match[2] is the title itself
     # match[3] is an inline anchor, which becomes the section id
-    AtxSectionRx = /^((?:=|#){1,6})#{CC_BLANK}+(\S.*?)(?:#{CC_BLANK}+\1)?$/
+    AtxSectionRx = /^((?:=|#){1,6})#{CG_BLANK}+(\S.*?)(?:#{CG_BLANK}+\1)?$/
 
     # Matches the restricted section name for a two-line (Setext-style) section title.
     # The name cannot begin with a dot and has at least one alphanumeric character.
-    SetextSectionTitleRx = /^((?=.*\w+.*)[^.].*?)$/
+    SetextSectionTitleRx = /^((?=.*#{CG_WORD}+.*)[^.].*?)$/
 
     # Matches the underline in a two-line (Setext-style) section title.
     #
@@ -571,10 +598,10 @@ module Asciidoctor
     #   Section Title [[idname]]
     #   Section Title [[idname,Reference Text]]
     #
-    InlineSectionAnchorRx = /^(.*?)#{CC_BLANK}+(\\)?\[\[([#{CC_ALPHA}:_][\w:.-]*)(?:,#{CC_BLANK}*(\S.*?))?\]\]$/
+    InlineSectionAnchorRx = /^(.*?)#{CG_BLANK}+(\\)?\[\[([#{CC_ALPHA}:_][#{CC_WORD}:.-]*)(?:,#{CG_BLANK}*(\S.*?))?\]\]$/
 
     # Matches invalid characters in a section id.
-    InvalidSectionIdCharsRx = /&(?:[a-zA-Z]{2,}|#\d{2,5}|#x[a-fA-F0-9]{2,4});|\W+?/
+    InvalidSectionIdCharsRx = /&(?:[a-zA-Z]{2,}|#\d{2,5}|#x[a-fA-F0-9]{2,4});|[^#{CC_WORD}]+?/
 
     # Matches the block style used to designate a section title as a floating title.
     #
@@ -588,7 +615,7 @@ module Asciidoctor
     ## Lists
 
     # Detects the start of any list item.
-    AnyListRx = /^(?:<?\d+>#{CC_BLANK}+#{CC_GRAPH}|#{CC_BLANK}*(?:-|(?:\*|\.){1,5}|\d+\.|[a-zA-Z]\.|[IVXivx]+\))#{CC_BLANK}+#{CC_GRAPH}|#{CC_BLANK}*.*?(?::{2,4}|;;)(?:#{CC_BLANK}+#{CC_GRAPH}|$))/
+    AnyListRx = /^(?:<?\d+>#{CG_BLANK}+#{CG_GRAPH}|#{CG_BLANK}*(?:-|(?:\*|\.){1,5}|\d+\.|[a-zA-Z]\.|[IVXivx]+\))#{CG_BLANK}+#{CG_GRAPH}|#{CG_BLANK}*.*?(?::{2,4}|;;)(?:#{CG_BLANK}+#{CG_GRAPH}|$))/
 
     # Matches an unordered list item (one level for hyphens, up to 5 levels for asterisks).
     #
@@ -597,7 +624,7 @@ module Asciidoctor
     #   * Foo
     #   - Foo
     #
-    UnorderedListRx = /^#{CC_BLANK}*(-|\*{1,5})#{CC_BLANK}+(.*)$/
+    UnorderedListRx = /^#{CG_BLANK}*(-|\*{1,5})#{CG_BLANK}+(.*)$/
 
     # Matches an ordered list item (explicit numbering or up to 5 consecutive dots).
     #
@@ -612,7 +639,7 @@ module Asciidoctor
     #   I. Foo (upperroman)
     #
     # NOTE leading space match is not always necessary, but is used for list reader
-    OrderedListRx = /^#{CC_BLANK}*(\.{1,5}|\d+\.|[a-zA-Z]\.|[IVXivx]+\))#{CC_BLANK}+(.*)$/
+    OrderedListRx = /^#{CG_BLANK}*(\.{1,5}|\d+\.|[a-zA-Z]\.|[IVXivx]+\))#{CG_BLANK}+(.*)$/
 
     # Matches the ordinals for each type of ordered list.
     OrderedListMarkerRxMap = {
@@ -649,15 +676,15 @@ module Asciidoctor
     # NOTE negative match for comment line is intentional since that isn't handled when looking for next list item
     # QUESTION should we check for line comment in regex or when scanning the lines?
     # 
-    DefinitionListRx = /^(?!\/\/)#{CC_BLANK}*(.*?)(:{2,4}|;;)(?:#{CC_BLANK}+(.*))?$/
+    DefinitionListRx = /^(?!\/\/)#{CG_BLANK}*(.*?)(:{2,4}|;;)(?:#{CG_BLANK}+(.*))?$/
 
     # Matches a sibling definition list item (which does not include the keyed type).
     DefinitionListSiblingRx = {
       # (?:.*?[^:])? - a non-capturing group which grabs longest sequence of characters that doesn't end w/ colon
-      '::' => /^(?!\/\/)#{CC_BLANK}*((?:.*[^:])?)(::)(?:#{CC_BLANK}+(.*))?$/,
-      ':::' => /^(?!\/\/)#{CC_BLANK}*((?:.*[^:])?)(:::)(?:#{CC_BLANK}+(.*))?$/,
-      '::::' => /^(?!\/\/)#{CC_BLANK}*((?:.*[^:])?)(::::)(?:#{CC_BLANK}+(.*))?$/,
-      ';;' => /^(?!\/\/)#{CC_BLANK}*(.*)(;;)(?:#{CC_BLANK}+(.*))?$/
+      '::' => /^(?!\/\/)#{CG_BLANK}*((?:.*[^:])?)(::)(?:#{CG_BLANK}+(.*))?$/,
+      ':::' => /^(?!\/\/)#{CG_BLANK}*((?:.*[^:])?)(:::)(?:#{CG_BLANK}+(.*))?$/,
+      '::::' => /^(?!\/\/)#{CG_BLANK}*((?:.*[^:])?)(::::)(?:#{CG_BLANK}+(.*))?$/,
+      ';;' => /^(?!\/\/)#{CG_BLANK}*(.*)(;;)(?:#{CG_BLANK}+(.*))?$/
     }
 
     # Matches a callout list item.
@@ -666,7 +693,7 @@ module Asciidoctor
     #
     #   <1> Foo
     #
-    CalloutListRx = /^<?(\d+)>#{CC_BLANK}+(.*)/
+    CalloutListRx = /^<?(\d+)>#{CG_BLANK}+(.*)/
 
     # Matches a callout reference inside literal text.
     # 
@@ -706,8 +733,8 @@ module Asciidoctor
     #   2.3+<.>m
     #
     # FIXME use step-wise scan (or treetop) rather than this mega-regexp
-    CellSpecStartRx = /^#{CC_BLANK}*(?:(\d+(?:\.\d*)?|(?:\d*\.)?\d+)([*+]))?([<^>](?:\.[<^>]?)?|(?:[<^>]?\.)?[<^>])?([a-z])?\|/
-    CellSpecEndRx = /#{CC_BLANK}+(?:(\d+(?:\.\d*)?|(?:\d*\.)?\d+)([*+]))?([<^>](?:\.[<^>]?)?|(?:[<^>]?\.)?[<^>])?([a-z])?$/
+    CellSpecStartRx = /^#{CG_BLANK}*(?:(\d+(?:\.\d*)?|(?:\d*\.)?\d+)([*+]))?([<^>](?:\.[<^>]?)?|(?:[<^>]?\.)?[<^>])?([a-z])?\|/
+    CellSpecEndRx = /#{CG_BLANK}+(?:(\d+(?:\.\d*)?|(?:\d*\.)?\d+)([*+]))?([<^>](?:\.[<^>]?)?|(?:[<^>]?\.)?[<^>])?([a-z])?$/
 
     # Block macros
 
@@ -719,7 +746,7 @@ module Asciidoctor
     #
     #--
     # NOTE we've relaxed the match for target to accomodate the short format (e.g., name::[attrlist])
-    GenericBlockMacroRx = /^(\w[\w\-]*)::(\S*?)\[((?:\\\]|[^\]])*?)\]$/
+    GenericBlockMacroRx = /^(#{CG_WORD}+)::(\S*?)\[((?:\\\]|[^\]])*?)\]$/
 
     # Matches an image, video or audio block macro.
     #
@@ -750,7 +777,7 @@ module Asciidoctor
     #   anchor:idname[]
     #   anchor:idname[Reference Text]
     #
-    InlineAnchorRx = /\\?(?:\[\[([#{CC_ALPHA}:_][\w:.-]*)(?:,#{CC_BLANK}*(\S.*?))?\]\]|anchor:(\S+)\[(.*?[^\\])?\])/
+    InlineAnchorRx = /\\?(?:\[\[([#{CC_ALPHA}:_][#{CC_WORD}:.-]*)(?:,#{CG_BLANK}*(\S.*?))?\]\]|anchor:(\S+)\[(.*?[^\\])?\])/
 
     # Matches a bibliography anchor anywhere inline.
     #
@@ -758,13 +785,13 @@ module Asciidoctor
     #
     #   [[[Foo]]]
     #
-    InlineBiblioAnchorRx = /\\?\[\[\[([\w:][\w:.-]*?)\]\]\]/
+    InlineBiblioAnchorRx = /\\?\[\[\[([#{CC_WORD}:][#{CC_WORD}:.-]*?)\]\]\]/
 
     # Matches an inline e-mail address.
     #
     #   doc.writer@example.com
     #
-    EmailInlineMacroRx = /([\\>:\/])?\w[\w.%+-]*@[#{CC_ALNUM}][#{CC_ALNUM}.-]*\.[#{CC_ALPHA}]{2,4}\b/
+    EmailInlineMacroRx = /([\\>:\/])?#{CG_WORD}[#{CC_WORD}.%+-]*@#{CG_ALNUM}[#{CC_ALNUM}.-]*\.#{CG_ALPHA}{2,4}\b/
 
     # Matches an inline footnote macro, which is allowed to span multiple lines.
     #
@@ -816,7 +843,7 @@ module Asciidoctor
     #   Ctrl + Alt+T
     #   Ctrl,T
     #
-    KbdDelimiterRx = /(?:\+|,)(?=#{CC_BLANK}*[^\1])/
+    KbdDelimiterRx = /(?:\+|,)(?=#{CG_BLANK}*[^\1])/
 
     # Matches an implicit link and some of the link inline macro.
     #
@@ -855,7 +882,7 @@ module Asciidoctor
     #   menu:View[Page Style > No Style]
     #   menu:View[Page Style, No Style]
     #
-    MenuInlineMacroRx = /\\?menu:(\w|\w.*?\S)\[#{CC_BLANK}*(.+?)?\]/
+    MenuInlineMacroRx = /\\?menu:(#{CG_WORD}|#{CG_WORD}.*?\S)\[#{CG_BLANK}*(.+?)?\]/
 
     # Matches an implicit menu inline macro.
     #
@@ -863,7 +890,7 @@ module Asciidoctor
     #
     #   "File > New..."
     #
-    MenuInlineRx = /\\?"(\w[^"]*?#{CC_BLANK}*&gt;#{CC_BLANK}*[^" \t][^"]*)"/
+    MenuInlineRx = /\\?"(#{CG_WORD}[^"]*?#{CG_BLANK}*&gt;#{CG_BLANK}*[^" \t][^"]*)"/
 
     # Matches a passthrough literal value, which may span multiple lines.
     #
@@ -871,7 +898,7 @@ module Asciidoctor
     #
     #   `text`
     #
-    PassInlineLiteralRx = /(^|[^`\w])(?:\[([^\]]+?)\])?(\\?`([^`\s]|[^`\s].*?\S)`)(?![`\w])/m
+    PassInlineLiteralRx = /(^|[^`#{CC_WORD}])(?:\[([^\]]+?)\])?(\\?`([^`\s]|[^`\s].*?\S)`)(?![`#{CC_WORD}])/m
 
     # Matches several variants of the passthrough inline macro, which may span multiple lines.
     #
@@ -891,7 +918,7 @@ module Asciidoctor
     #   xref:id[reftext]
     #
     # NOTE special characters have already been escaped, hence the entity references
-    XrefInlineMacroRx = /\\?(?:&lt;&lt;([\w":].*?)&gt;&gt;|xref:([\w":].*?)\[(.*?)\])/m
+    XrefInlineMacroRx = /\\?(?:&lt;&lt;([#{CC_WORD}":].*?)&gt;&gt;|xref:([#{CC_WORD}":].*?)\[(.*?)\])/m
 
     ## Layout
 
@@ -904,8 +931,11 @@ module Asciidoctor
     #   Foo +
     #
     # NOTE: JavaScript only treats ^ and $ as line boundaries in multiline regexp
-    #LineBreakRx = /^(.*)[[:blank:]]\+$/
-    LineBreakRx = ::RUBY_ENGINE_OPAL ? %x(/^(.*?)[ \\t]\\+$/m) : %r{^(.*)[[:blank:]]\+$}
+    LineBreakRx = if RUBY_ENGINE == 'opal'
+      /^(.*)[ \t]\+$/m
+    else
+      /^(.*)[[:blank:]]\+$/
+    end
 
     # Matches an AsciiDoc horizontal rule or AsciiDoc page break.
     #
@@ -932,7 +962,7 @@ module Asciidoctor
     # Matches a blank line.
     #
     # NOTE allows for empty space in line as it could be left by the template engine
-    BlankLineRx = /^#{CC_BLANK}*\n/
+    BlankLineRx = /^#{CG_BLANK}*\n/
 
     # Matches a comma or semi-colon delimiter.
     #
@@ -984,7 +1014,7 @@ module Asciidoctor
     # 
     #   one\ two\ three
     #
-    EscapedSpaceRx = /\\(#{CC_BLANK})/
+    EscapedSpaceRx = /\\(#{CG_BLANK})/
 
     # Matches a space delimiter that's not escaped.
     #
@@ -992,13 +1022,15 @@ module Asciidoctor
     #
     #   one two	three	four
     #
-    SpaceDelimiterRx = /([^\\])#{CC_BLANK}+/
+    SpaceDelimiterRx = /([^\\])#{CG_BLANK}+/
 
     # Matches any character with multibyte support explicitly enabled (length of multibyte char = 1)
     #
-    # NOTE It's necessary to hide the use of the language modifier (u) from JavaScript
+    # NOTE If necessary to hide use of the language modifier (u) from JavaScript, use (Regexp.new '.', false, 'u')
     #
-    UnicodeCharScanRx = FORCE_UNICODE_LINE_LENGTH ? (Regexp.new '.', false, 'u') : nil
+    UnicodeCharScanRx = unless RUBY_ENGINE == 'opal'
+      FORCE_UNICODE_LINE_LENGTH ? /./u : nil
+    end
 
     # Detects strings that resemble URIs.
     #
@@ -1007,7 +1039,7 @@ module Asciidoctor
     #   https://domain
     #   data:info
     #
-    UriSniffRx = %r{^[#{CC_ALPHA}][#{CC_ALNUM}.+-]*:/{0,2}}
+    UriSniffRx = %r{^#{CG_ALPHA}[#{CC_ALNUM}.+-]*:/{0,2}}
 
     # Detects the end of an implicit URI in the text
     #
@@ -1036,12 +1068,12 @@ module Asciidoctor
     #
     #   Here\'s Johnny!
     #
-    #EscapedSingleQuoteRx = /(\w)\\'(\w)/
+    #EscapedSingleQuoteRx = /(#{CG_WORD})\\'(#{CG_WORD})/
     # an alternative if our backend generates single-quoted html/xml attributes
-    #EscapedSingleQuoteRx = /(\w|=)\\'(\w)/
+    #EscapedSingleQuoteRx = /(#{CG_WORD}|=)\\'(#{CG_WORD})/
 
     # Matches whitespace at the beginning of the line
-    #LeadingSpacesRx = /^(#{CC_BLANK}*)/
+    #LeadingSpacesRx = /^(#{CG_BLANK}*)/
 
     # Matches parent directory references at the beginning of a path
     #LeadingParentDirsRx = /^(?:\.\.\/)*/
@@ -1091,34 +1123,34 @@ module Asciidoctor
     [:strong, :unconstrained, /\\?(?:\[([^\]]+?)\])?\*\*(.+?)\*\*/m],
 
     # *strong*
-    [:strong, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?\*(\S|\S.*?\S)\*(?=\W|$)/m],
+    [:strong, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?\*(\S|\S.*?\S)\*(?!#{CG_WORD})/m],
 
     # ``double-quoted''
-    [:double, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?``(\S|\S.*?\S)''(?=\W|$)/m],
+    [:double, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?``(\S|\S.*?\S)''(?!#{CG_WORD})/m],
 
     # 'emphasis'
-    [:emphasis, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?'(\S|\S.*?\S)'(?=\W|$)/m],
+    [:emphasis, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?'(\S|\S.*?\S)'(?!#{CG_WORD})/m],
 
     # `single-quoted'
-    [:single, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?`(\S|\S.*?\S)'(?=\W|$)/m],
+    [:single, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?`(\S|\S.*?\S)'(?!#{CG_WORD})/m],
 
     # ++monospaced++
     [:monospaced, :unconstrained, /\\?(?:\[([^\]]+?)\])?\+\+(.+?)\+\+/m],
 
     # +monospaced+
-    [:monospaced, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?\+(\S|\S.*?\S)\+(?=\W|$)/m],
+    [:monospaced, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?\+(\S|\S.*?\S)\+(?!#{CG_WORD})/m],
 
     # __emphasis__
     [:emphasis, :unconstrained, /\\?(?:\[([^\]]+?)\])?__(.+?)__/m],
 
     # _emphasis_
-    [:emphasis, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?_(\S|\S.*?\S)_(?=\W|$)/m],
+    [:emphasis, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?_(\S|\S.*?\S)_(?!#{CG_WORD})/m],
 
     # ##unquoted##
     [:none, :unconstrained, /\\?(?:\[([^\]]+?)\])?##(.+?)##/m],
 
     # #unquoted#
-    [:none, :constrained, /(^|[^\w;:}])(?:\[([^\]]+?)\])?#(\S|\S.*?\S)#(?=\W|$)/m],
+    [:none, :constrained, /(^|[^#{CC_WORD};:}])(?:\[([^\]]+?)\])?#(\S|\S.*?\S)#(?!#{CG_WORD})/m],
 
     # ^superscript^
     [:superscript, :unconstrained, /\\?(?:\[([^\]]+?)\])?\^(.+?)\^/m],
@@ -1140,13 +1172,13 @@ module Asciidoctor
     # foo -- bar
     [/(^|\n| |\\)--( |\n|$)/, '&#8201;&#8212;&#8201;', :none],
     # foo--bar
-    [/(\w)\\?--(?=\w)/, '&#8212;', :leading],
+    [/(#{CG_WORD})\\?--(?=#{CG_WORD})/, '&#8212;', :leading],
     # ellipsis
     [/\\?\.\.\./, '&#8230;', :leading],
     # apostrophe or a closing single quote (planned)
-    [/([#{CC_ALPHA}])\\?'(?!')/, '&#8217;', :leading],
+    [/(#{CG_ALPHA})\\?'(?!')/, '&#8217;', :leading],
     # an opening single quote (planned)
-    #[/\B\\?'(?=[#{CC_ALPHA}])/, '&#8216;', :none],
+    #[/\B\\?'(?=#{CG_ALPHA})/, '&#8216;', :none],
     # right arrow ->
     [/\\?-&gt;/, '&#8594;', :none],
     # right double arrow =>
diff --git a/lib/asciidoctor/substitutors.rb b/lib/asciidoctor/substitutors.rb
index 677506dd..1685de28 100644
--- a/lib/asciidoctor/substitutors.rb
+++ b/lib/asciidoctor/substitutors.rb
@@ -189,7 +189,7 @@ module Substitutors
     text = text.gsub(PassInlineLiteralRx) {
       # alias match for Ruby 1.8.7 compat
       m = $~
-      # fix nil results in Opal
+      # fix non-matching group results in Opal under Firefox
       if ::RUBY_ENGINE_OPAL
         m[2] = nil if m[2] == ''
       end
@@ -197,8 +197,7 @@ module Substitutors
       unescaped_attrs = nil
       # honor the escape
       if m[3].start_with? '\\'
-        # NOTE Opal may not like %() as an enclosure around this string
-        next m[2] ? "#{m[1]}[#{m[2]}]#{m[3][1..-1]}" : "#{m[1]}#{m[3][1..-1]}"
+        next m[2] ? %(#{m[1]}[#{m[2]}]#{m[3][1..-1]}) : %(#{m[1]}#{m[3][1..-1]})
       elsif m[1] == '\\' && m[2]
         unescaped_attrs = "[#{m[2]}]"
       end
@@ -600,7 +599,7 @@ module Substitutors
           next m[0][1..-1]
         end
 
-        # fix nil results in Opal
+        # fix non-matching group results in Opal under Firefox
         if ::RUBY_ENGINE_OPAL
           m[1] = nil if m[1] == ''
         end
@@ -653,7 +652,7 @@ module Substitutors
           # NOTE Opal doesn't like %() as an enclosure around this string
           next "#{m[1]}#{m[2][1..-1]}#{m[3]}"
         end
-        # fix nil results in Opal
+        # fix non-matching group results in Opal under Firefox
         if ::RUBY_ENGINE_OPAL
           m[3] = nil if m[3] == ''
         end
@@ -873,7 +872,7 @@ module Substitutors
         if m[0].start_with? '\\'
           next m[0][1..-1]
         end
-        # fix nil results in Opal
+        # fix non-matching group results in Opal under Firefox
         if ::RUBY_ENGINE_OPAL
           m[1] = nil if m[1] == ''
           m[2] = nil if m[2] == ''
@@ -913,18 +912,18 @@ module Substitutors
         if m[0].start_with? '\\'
           next m[0][1..-1]
         end
-        # fix nil results in Opal
+        # fix non-matching group results in Opal under Firefox
         if ::RUBY_ENGINE_OPAL
           m[1] = nil if m[1] == ''
         end
         if m[1]
           id, reftext = m[1].split(',', 2).map {|it| it.strip }
-          id = id.sub(DoubleQuotedRx, ::RUBY_ENGINE_OPAL ? '$2' : '\2')
+          id = id.sub(DoubleQuotedRx, '\2')
           # NOTE In Opal, reftext is set to empty string if comma is missing
           reftext = if reftext.nil_or_empty?
             nil
           else
-            reftext.sub(DoubleQuotedMultiRx, ::RUBY_ENGINE_OPAL ? '$2' : '\2')
+            reftext.sub(DoubleQuotedMultiRx, '\2')
           end
         else
           id = m[2]
diff --git a/test/parser_test.rb b/test/parser_test.rb
index e5965d82..2045f380 100644
--- a/test/parser_test.rb
+++ b/test/parser_test.rb
@@ -286,7 +286,7 @@ context "Parser" do
   end
 
   test "parse author first" do
-    metadata, = parse_header_metadata 'Stuart'
+    metadata, _ = parse_header_metadata 'Stuart'
     assert_equal 5, metadata.size
     assert_equal 1, metadata['authorcount']
     assert_equal metadata['author'], metadata['authors']
@@ -295,7 +295,7 @@ context "Parser" do
   end
 
   test "parse author first last" do
-    metadata, = parse_header_metadata 'Yukihiro Matsumoto'
+    metadata, _ = parse_header_metadata 'Yukihiro Matsumoto'
     assert_equal 6, metadata.size
     assert_equal 1, metadata['authorcount']
     assert_equal 'Yukihiro Matsumoto', metadata['author']
@@ -306,7 +306,7 @@ context "Parser" do
   end
 
   test "parse author first middle last" do
-    metadata, = parse_header_metadata 'David Heinemeier Hansson'
+    metadata, _ = parse_header_metadata 'David Heinemeier Hansson'
     assert_equal 7, metadata.size
     assert_equal 1, metadata['authorcount']
     assert_equal 'David Heinemeier Hansson', metadata['author']
@@ -318,7 +318,7 @@ context "Parser" do
   end
 
   test "parse author first middle last email" do
-    metadata, = parse_header_metadata 'David Heinemeier Hansson <rails@ruby-lang.org>'
+    metadata, _ = parse_header_metadata 'David Heinemeier Hansson <rails@ruby-lang.org>'
     assert_equal 8, metadata.size
     assert_equal 1, metadata['authorcount']
     assert_equal 'David Heinemeier Hansson', metadata['author']
@@ -331,7 +331,7 @@ context "Parser" do
   end
 
   test "parse author first email" do
-    metadata, = parse_header_metadata 'Stuart <founder@asciidoc.org>'
+    metadata, _ = parse_header_metadata 'Stuart <founder@asciidoc.org>'
     assert_equal 6, metadata.size
     assert_equal 1, metadata['authorcount']
     assert_equal 'Stuart', metadata['author']
@@ -342,7 +342,7 @@ context "Parser" do
   end
 
   test "parse author first last email" do
-    metadata, = parse_header_metadata 'Stuart Rackham <founder@asciidoc.org>'
+    metadata, _ = parse_header_metadata 'Stuart Rackham <founder@asciidoc.org>'
     assert_equal 7, metadata.size
     assert_equal 1, metadata['authorcount']
     assert_equal 'Stuart Rackham', metadata['author']
@@ -354,7 +354,7 @@ context "Parser" do
   end
 
   test "parse author with hyphen" do
-    metadata, = parse_header_metadata 'Tim Berners-Lee <founder@www.org>'
+    metadata, _ = parse_header_metadata 'Tim Berners-Lee <founder@www.org>'
     assert_equal 7, metadata.size
     assert_equal 1, metadata['authorcount']
     assert_equal 'Tim Berners-Lee', metadata['author']
@@ -366,7 +366,7 @@ context "Parser" do
   end
 
   test "parse author with single quote" do
-    metadata, = parse_header_metadata 'Stephen O\'Grady <founder@redmonk.com>'
+    metadata, _ = parse_header_metadata 'Stephen O\'Grady <founder@redmonk.com>'
     assert_equal 7, metadata.size
     assert_equal 1, metadata['authorcount']
     assert_equal 'Stephen O\'Grady', metadata['author']
@@ -378,7 +378,7 @@ context "Parser" do
   end
 
   test "parse author with dotted initial" do
-    metadata, = parse_header_metadata 'Heiko W. Rupp <hwr@example.de>'
+    metadata, _ = parse_header_metadata 'Heiko W. Rupp <hwr@example.de>'
     assert_equal 8, metadata.size
     assert_equal 1, metadata['authorcount']
     assert_equal 'Heiko W. Rupp', metadata['author']
@@ -391,7 +391,7 @@ context "Parser" do
   end
 
   test "parse author with underscore" do
-    metadata, = parse_header_metadata 'Tim_E Fella'
+    metadata, _ = parse_header_metadata 'Tim_E Fella'
     assert_equal 6, metadata.size
     assert_equal 1, metadata['authorcount']
     assert_equal 'Tim E Fella', metadata['author']
@@ -401,8 +401,31 @@ context "Parser" do
     assert_equal 'TF', metadata['authorinitials']
   end
 
+  test 'parse author name with letters outside basic latin' do
+    metadata, _ = parse_header_metadata 'Stéphane Brontë'
+    assert_equal 6, metadata.size
+    assert_equal 1, metadata['authorcount']
+    assert_equal 'Stéphane Brontë', metadata['author']
+    assert_equal metadata['author'], metadata['authors']
+    assert_equal 'Stéphane', metadata['firstname']
+    assert_equal 'Brontë', metadata['lastname']
+    assert_equal 'SB', metadata['authorinitials']
+  end if ::RUBY_MIN_VERSION_1_9
+
+  test 'parse ideographic author names' do
+    metadata, _ = parse_header_metadata '李 四 <si.li@example.com>'
+    assert_equal 7, metadata.size
+    assert_equal 1, metadata['authorcount']
+    assert_equal '李 四', metadata['author']
+    assert_equal metadata['author'], metadata['authors']
+    assert_equal '李', metadata['firstname']
+    assert_equal '四', metadata['lastname']
+    assert_equal 'si.li@example.com', metadata['email']
+    assert_equal '李四', metadata['authorinitials']
+  end if ::RUBY_MIN_VERSION_1_9
+
   test "parse author condenses whitespace" do
-    metadata, = parse_header_metadata '   Stuart       Rackham     <founder@asciidoc.org>'
+    metadata, _ = parse_header_metadata '   Stuart       Rackham     <founder@asciidoc.org>'
     assert_equal 7, metadata.size
     assert_equal 1, metadata['authorcount']
     assert_equal 'Stuart Rackham', metadata['author']
@@ -414,7 +437,7 @@ context "Parser" do
   end
 
   test "parse invalid author line becomes author" do
-    metadata, = parse_header_metadata '   Stuart       Rackham, founder of AsciiDoc   <founder@asciidoc.org>'
+    metadata, _ = parse_header_metadata '   Stuart       Rackham, founder of AsciiDoc   <founder@asciidoc.org>'
     assert_equal 5, metadata.size
     assert_equal 1, metadata['authorcount']
     assert_equal 'Stuart Rackham, founder of AsciiDoc <founder@asciidoc.org>', metadata['author']
@@ -424,7 +447,7 @@ context "Parser" do
   end
 
   test 'parse multiple authors' do
-    metadata, = parse_header_metadata 'Doc Writer <doc.writer@asciidoc.org>; John Smith <john.smith@asciidoc.org>'
+    metadata, _ = parse_header_metadata 'Doc Writer <doc.writer@asciidoc.org>; John Smith <john.smith@asciidoc.org>'
     assert_equal 2, metadata['authorcount']
     assert_equal 'Doc Writer, John Smith', metadata['authors']
     assert_equal 'Doc Writer', metadata['author']
@@ -437,7 +460,7 @@ context "Parser" do
 Ryan Waldron
 v0.0.7, 2013-12-18: The first release you can stand on
     EOS
-    metadata, = parse_header_metadata input
+    metadata, _ = parse_header_metadata input
     assert_equal 9, metadata.size
     assert_equal '0.0.7', metadata['revnumber']
     assert_equal '2013-12-18', metadata['revdate']
@@ -449,7 +472,7 @@ v0.0.7, 2013-12-18: The first release you can stand on
 Ryan Waldron
 2013-12-18
     EOS
-    metadata, = parse_header_metadata input
+    metadata, _ = parse_header_metadata input
     assert_equal 7, metadata.size
     assert_equal '2013-12-18', metadata['revdate']
   end
@@ -460,7 +483,7 @@ Ryan Waldron
 Ryan Waldron
 foobar
     EOS
-    metadata, = parse_header_metadata input
+    metadata, _ = parse_header_metadata input
     assert_equal 7, metadata.size
     assert_equal 'foobar', metadata['revdate']
   end
@@ -470,7 +493,7 @@ foobar
 Ryan Waldron
 2013-12-18:  The first release you can stand on
     EOS
-    metadata, = parse_header_metadata input
+    metadata, _ = parse_header_metadata input
     assert_equal 8, metadata.size
     assert_equal '2013-12-18', metadata['revdate']
     assert_equal 'The first release you can stand on', metadata['revremark']
@@ -481,7 +504,7 @@ Ryan Waldron
 Joe Cool
 :page-layout: post
     EOS
-    metadata, = parse_header_metadata input
+    metadata, _ = parse_header_metadata input
     refute_equal 'page-layout: post', metadata['revremark']
     assert !metadata.has_key?('revdate')
   end
@@ -491,7 +514,7 @@ Joe Cool
 Joe Cool
  :Must start revremark-only line with space
     EOS
-    metadata, = parse_header_metadata input
+    metadata, _ = parse_header_metadata input
     assert_equal 'Must start revremark-only line with space', metadata['revremark']
     assert_equal '', metadata['revdate']
   end
@@ -502,7 +525,7 @@ Joe Cool
 // release artist
 Ryan Waldron
     EOS
-    metadata, = parse_header_metadata input
+    metadata, _ = parse_header_metadata input
     assert_equal 6, metadata.size
     assert_equal 1, metadata['authorcount']
     assert_equal 'Ryan Waldron', metadata['author']
@@ -519,7 +542,7 @@ release artist
 ////
 Ryan Waldron
     EOS
-    metadata, = parse_header_metadata input
+    metadata, _ = parse_header_metadata input
     assert_equal 6, metadata.size
     assert_equal 1, metadata['authorcount']
     assert_equal 'Ryan Waldron', metadata['author']
@@ -537,7 +560,7 @@ release info
 ////
 v0.0.7, 2013-12-18
     EOS
-    metadata, = parse_header_metadata input
+    metadata, _ = parse_header_metadata input
     assert_equal 8, metadata.size
     assert_equal 1, metadata['authorcount']
     assert_equal 'Ryan Waldron', metadata['author']
diff --git a/test/sections_test.rb b/test/sections_test.rb
index 82e4c188..6f4adb7e 100644
--- a/test/sections_test.rb
+++ b/test/sections_test.rb
@@ -231,9 +231,13 @@ preamble
       assert_xpath "//h2[@id='_my_title'][text() = 'My Title ===']", render_string("== My Title ===")
     end
 
-    test "with non-word character" do
+    test "with XML entity" do
       assert_xpath "//h2[@id='_where_s_the_love'][text() = \"Where#{[8217].pack('U*')}s the love?\"]", render_string("== Where's the love?")
     end
+    
+    test "with non-word character" do
+      assert_xpath "//h2[@id='_where_s_the_love'][text() = \"Where’s the love?\"]", render_string("== Where’s the love?")
+    end
 
     test "with sequential non-word characters" do
       assert_xpath "//h2[@id='_what_the_is_this'][text() = 'What the \#@$ is this?']", render_string('== What the #@$ is this?')
@@ -256,8 +260,37 @@ preamble
 == Asciidoctor in 中文
       EOS
       output = render_string input
-      assert_xpath '//h2[@id="_asciidoctor_in"][text()="Asciidoctor in 中文"]', output
+      if ::RUBY_MIN_VERSION_1_9
+        assert_xpath '//h2[@id="_asciidoctor_in_中文"][text()="Asciidoctor in 中文"]', output
+      else
+        assert_xpath '//h2[@id="_asciidoctor_in"][text()="Asciidoctor in 中文"]', output
+      end
     end
+
+    test 'with only multibyte characters' do
+      input = <<-EOS
+== 视图
+      EOS
+      output = render_embedded_string input
+      assert_xpath '//h2[@id="_视图"][text()="视图"]', output
+    end if ::RUBY_MIN_VERSION_1_9
+
+    test 'multiline syntax with only multibyte characters' do
+      input = <<-EOS
+视图
+--
+
+content
+
+连接器
+---
+
+content
+      EOS
+      output = render_embedded_string input
+      assert_xpath '//h2[@id="_视图"][text()="视图"]', output
+      assert_xpath '//h2[@id="_连接器"][text()="连接器"]', output
+    end if ::RUBY_MIN_VERSION_1_9
   end
 
   context "level 2" do 
diff --git a/test/substitutions_test.rb b/test/substitutions_test.rb
index 8df8de5b..73907245 100644
--- a/test/substitutions_test.rb
+++ b/test/substitutions_test.rb
@@ -118,6 +118,11 @@ context 'Substitutions' do
       assert_equal '<strong>bl*ck</strong>-eye', para.sub_quotes(para.source)
     end
 
+    test 'constrained strong string containing an asterisk and multibyte word chars' do
+      para = block_from_string(%q{*黑*眼圈*})
+      assert_equal '<strong>黑*眼圈</strong>', para.sub_quotes(para.source)
+    end if ::RUBY_MIN_VERSION_1_9
+
     test 'single-line constrained quote variation emphasized string' do
       para = block_from_string(%q{'a few emphasized words'})
       assert_equal '<em>a few emphasized words</em>', para.sub_quotes(para.source)
@@ -985,6 +990,16 @@ EOS
         para = block_from_string('<span class="xmltag">&lt;node&gt;</span><span class="classname">r</span>', :attributes => {'experimental' => ''})
         assert_equal %q{<span class="xmltag">&lt;node&gt;</span><span class="classname">r</span>}, para.sub_macros(para.source)
       end
+
+      test 'should process menu macro with items containing multibyte characters' do
+        para = block_from_string('menu:视图[放大, 重置]', :attributes => {'experimental' => ''})
+        assert_equal %q{<span class="menuseq"><span class="menu">视图</span>&#160;&#9656; <span class="submenu">放大</span>&#160;&#9656; <span class="menuitem">重置</span></span>}, para.sub_macros(para.source)
+      end if ::RUBY_MIN_VERSION_1_9
+
+      test 'should process inline menu with items containing multibyte characters' do
+        para = block_from_string('"视图 &gt; 放大 &gt; 重置"', :attributes => {'experimental' => ''})
+        assert_equal %q{<span class="menuseq"><span class="menu">视图</span>&#160;&#9656; <span class="submenu">放大</span>&#160;&#9656; <span class="menuitem">重置</span></span>}, para.sub_macros(para.source)
+      end if ::RUBY_MIN_VERSION_1_9
     end
   end
 
@@ -1193,21 +1208,27 @@ EOS
     end
 
     test 'replaces dashes' do
-      para = block_from_string %(-- foo foo--bar foo\\--bar foo -- bar foo \\-- bar
+      para = block_from_string %(-- foo foo--bar foo\\--bar foo -- bar foo \\-- bar 
 stuff in between
 -- foo
 stuff in between
 foo --
 stuff in between
 foo --)
-      expected = %(&#8201;&#8212;&#8201;foo foo&#8212;bar foo--bar foo&#8201;&#8212;&#8201;bar foo -- bar
+      expected = '&#8201;&#8212;&#8201;foo foo&#8212;bar foo--bar foo&#8201;&#8212;&#8201;bar foo -- bar
 stuff in between&#8201;&#8212;&#8201;foo
 stuff in between
 foo&#8201;&#8212;&#8201;stuff in between
-foo&#8201;&#8212;&#8201;)
+foo&#8201;&#8212;&#8201;'
       assert_equal expected, para.sub_replacements(para.source)
     end
 
+    test 'replaces dashes between multibyte word characters' do
+      para = block_from_string %(富--巴)
+      expected = '富&#8212;巴'
+      assert_equal expected, para.sub_replacements(para.source)
+    end if ::RUBY_MIN_VERSION_1_9
+
     test 'replaces marks' do
       para = block_from_string '(C) (R) (TM) \(C) \(R) \(TM)' 
       assert_equal '&#169; &#174; &#8482; (C) (R) (TM)', para.sub_replacements(para.source)
author	Dan Allen <dan.j.allen@gmail.com>	2014-05-16 03:32:05 -0600
committer	Dan Allen <dan.j.allen@gmail.com>	2014-05-16 03:32:05 -0600
commit	cddfc7b0ae26db4b355bae276247fa04766fdc5f (patch)
tree	395466ec8d9d4fd1d4087c60841441a6999e84d3
parent	686e4237a14f1923818ac956a95b4396bb47156b (diff)
parent	a1b17518d6f699ed6cb55f244dc7385a1a4bb845 (diff)