resolves #187 complete masquerade functionality for blocks & paragraphs

- rework block parsing - move block creation to a separate method - add method to assign or generate block caption - add set_attr method to block - use sets where appropriate to improve speed - cleanup two-line section matching - optimize reader operations - add flags to control compliance w/ AsciiDoc - tests for masquerading, captions and other compliance
author: Dan Allen <dallen@redhat.com> 2013-04-20 12:49:51 -0600
committer: Dan Allen <dallen@redhat.com> 2013-04-22 12:26:48 -0600
commit: efeda152d248735bc2ccfcb580010fa7a5dfac92 (patch)
tree: 4510aa744f70541ee09f52e939c90278aeb0fedb /lib
parent: 7705123f8dc9e89468ac5b86ce4862809d46d5a3 (diff)
9 files changed, 508 insertions, 359 deletions
diff --git a/lib/asciidoctor.rb b/lib/asciidoctor.rb
index 3efbcb73..eb90eabb 100755
--- a/lib/asciidoctor.rb
+++ b/lib/asciidoctor.rb
@@ -1,4 +1,5 @@
 require 'strscan'
+require 'set'
 
 $:.unshift(File.dirname(__FILE__))
 #$:.unshift(File.join(File.dirname(__FILE__), '..', 'vendor'))
@@ -112,19 +113,35 @@ module Asciidoctor
     'markdown' => '.md'
   }
 
+  SECTION_LEVELS = {
+    '=' => 0,
+    '-' => 1,
+    '~' => 2,
+    '^' => 3,
+    '+' => 4
+  }
+
+  ADMONITION_STYLES = ['NOTE', 'TIP', 'IMPORTANT', 'WARNING', 'CAUTION'].to_set
+
+  # NOTE: AsciiDoc doesn't support pass style for paragraph
+  PARAGRAPH_STYLES = ['comment', 'example', 'literal', 'listing', 'normal', 'pass', 'quote', 'sidebar', 'source', 'verse'].to_set
+
+  VERBATIM_STYLES = ['literal', 'listing', 'source', 'verse'].to_set
+
   DELIMITED_BLOCKS = {
-    '--'   => :open,
-    '----' => :listing,
-    '....' => :literal,
-    '====' => :example,
-    '****' => :sidebar,
-    '____' => :quote,
-    '++++' => :pass,
-    '|===' => :table,
-    '!===' => :table,
-    '////' => :comment,
-    '```'  => :fenced_code,
-    '~~~'  => :fenced_code
+    # NOTE: AsciiDoc doesn't support pass style for open block
+    '--'   => [:open, ['comment', 'example', 'literal', 'listing', 'pass', 'quote', 'sidebar', 'source', 'verse', 'admonition'].to_set],
+    '----' => [:listing, ['literal', 'source'].to_set],
+    '....' => [:literal, ['listing', 'source'].to_set],
+    '====' => [:example, ['admonition'].to_set],
+    '****' => [:sidebar, Set.new],
+    '____' => [:quote, ['verse'].to_set],
+    '++++' => [:pass, Set.new],
+    '|===' => [:table, Set.new],
+    '!===' => [:table, Set.new],
+    '////' => [:comment, Set.new],
+    '```'  => [:fenced_code, Set.new],
+    '~~~'  => [:fenced_code, Set.new]
   }
 
   BREAK_LINES = {
@@ -155,6 +172,27 @@ module Asciidoctor
 
   LINE_FEED_ENTITY = '&#10;' # or &#x0A;
 
+  # Flags to control compliance with the behavior of AsciiDoc
+  COMPLIANCE = {
+    # AsciiDoc terminates paragraphs adjacent to
+    # block content (delimiter or block attribute list)
+    # Compliance value: true
+    # TODO what about literal paragraph?
+    :block_terminates_paragraph => true,
+
+    # AsciiDoc does not treat paragraphs labeled with a
+    # verbatim style (literal, listing, source, verse)
+    # as verbatim; override this behavior
+    # Compliance value: false
+    :strict_verbatim_paragraphs => true,
+
+    # AsciiDoc allows start and end delimiters around
+    # a block to be different lengths
+    # this option requires that they be the same
+    # Compliance value: false
+    :congruent_block_delimiters => true
+  }
+
   # The following pattern, which appears frequently, captures the contents between square brackets,
   # ignoring escaped closing brackets (closing brackets prefixed with a backslash '\' character)
   #
@@ -163,8 +201,11 @@ module Asciidoctor
   # Matches:
   # [enclosed text here] or [enclosed [text\] here]
   REGEXP = {
+    # NOTE: this is a inline admonition note
+    :admonition_inline => /^(#{ADMONITION_STYLES.to_a * '|'}):\s/,
+
     # [[Foo]]
-    :anchor           => /^\[\[([^\[\]]+)\]\]$/,
+    :anchor           => /^\[\[([^\s\[\]]+)\]\]$/,
 
     # Foowhatevs [[Bar]]
     :anchor_embedded  => /^(.*?)\s*\[\[([^\[\]]+)\]\]$/,
@@ -203,10 +244,10 @@ module Asciidoctor
     # [NOTE, caption="Good to know"]
     # Can be defined by an attribute
     # [{lead}]
-    :blk_attr_list    => /^\[(|[\w\{].*)\]$/,
+    :blk_attr_list    => /^\[(|[[:blank:]]*[\w\{,].*)\]$/,
 
     # block attribute list or block id (bulk query)
-    :attr_line        => /^\[(|[\w\{].*|\[[^\[\]]*\])\]$/,
+    :attr_line        => /^\[(|[[:blank:]]*[\w\{,].*|\[[^\[\]]*\])\]$/,
 
     # attribute reference
     # {foo}
@@ -425,7 +466,8 @@ module Asciidoctor
     :section_name      => /^((?=.*\w+.*)[^.].*?)$/,
 
     # ======  || ------ || ~~~~~~ || ^^^^^^ || ++++++
-    :section_underline => /^([=\-~^\+])+$/,
+    # TODO build from SECTION_LEVELS keys
+    :section_underline => /^(?:=|-|~|\^|\+)+$/,
 
     # * Foo (up to 5 consecutive asterisks)
     # - Foo
@@ -462,8 +504,6 @@ module Asciidoctor
     :uri_encode_chars => /[^\w\-.!~*';:@=+$,()\[\]]/
   }
 
-  ADMONITION_STYLES = ['NOTE', 'TIP', 'IMPORTANT', 'WARNING', 'CAUTION']
-
   INTRINSICS = Hash.new{|h,k| STDERR.puts "Missing intrinsic: #{k.inspect}"; "{#{k}}"}.merge(
     {
     'startsb'    => '[',
diff --git a/lib/asciidoctor/abstract_block.rb b/lib/asciidoctor/abstract_block.rb
index 73b6086e..fef6e8e0 100644
--- a/lib/asciidoctor/abstract_block.rb
+++ b/lib/asciidoctor/abstract_block.rb
@@ -189,6 +189,45 @@ class AbstractBlock < AbstractNode
     }
   end
 
+  # Public: Generate a caption and assign it to this block if one
+  # is not already assigned.
+  #
+  # If the block has a title and a caption prefix is available
+  # for this block, then build a caption from this information,
+  # assign it a number and store it to the caption attribute on
+  # the block.
+  #
+  # If an explicit caption has been specified on this block, then
+  # do nothing.
+  #
+  # key         - The prefix of the caption and counter attribute names.
+  #               If not provided, the name of the context for this block
+  #               is used. (default: nil).
+  #
+  # returns nothing
+  def assign_caption(caption = nil, key = nil)
+    unless title? || @caption.nil?
+      return nil
+    end
+
+    if caption.nil?
+      if @document.attr?('caption')
+        @caption = @document.attr('caption')
+      else
+        key ||= @context.to_s
+        caption_key = "#{key}-caption"
+        if @document.attributes.has_key?(caption_key)
+          caption_title = @document.attributes["#{key}-caption"]
+          caption_num = @document.counter_increment("#{key}-number", self)
+          @caption = @attributes['caption'] = "#{caption_title} #{caption_num}. "
+        end
+      end
+    else
+      @caption = caption
+    end
+    nil
+  end
+
   # Internal: Assign the next index (0-based) to this section
   #
   # Assign the next index of this section within the parent
diff --git a/lib/asciidoctor/abstract_node.rb b/lib/asciidoctor/abstract_node.rb
index f765af98..ba2dffa4 100644
--- a/lib/asciidoctor/abstract_node.rb
+++ b/lib/asciidoctor/abstract_node.rb
@@ -94,6 +94,27 @@ class AbstractNode
     end
   end
 
+  # Public: Assign the value to the specified key in this
+  # block's attributes hash.
+  #
+  # key - The attribute key (or name)
+  # val - The value to assign to the key
+  #
+  # returns a flag indicating whether the assignment was performed
+  def set_attr(key, val, overwrite = nil)
+    if overwrite.nil?
+      @attributes[key] = val
+      true
+    else
+      if overwrite || @attributes.has_key?(key)
+        @attributes[key] = val
+        true
+      else
+        false
+      end
+    end
+  end
+
   # Public: Get the execution context of this object (via Kernel#binding).
   #
   # This method is used to set the 'self' reference as well as local variables
diff --git a/lib/asciidoctor/attribute_list.rb b/lib/asciidoctor/attribute_list.rb
index b5f64e45..51120675 100644
--- a/lib/asciidoctor/attribute_list.rb
+++ b/lib/asciidoctor/attribute_list.rb
@@ -84,6 +84,7 @@ class AttributeList
 
   def self.rekey(attributes, pos_attrs)
     pos_attrs.each_with_index do |key, index|
+      next if key.nil?
       pos = index + 1
       unless (val = attributes[pos]).nil?
         attributes[key] = val
diff --git a/lib/asciidoctor/backends/html5.rb b/lib/asciidoctor/backends/html5.rb
index addec254..17c268ab 100644
--- a/lib/asciidoctor/backends/html5.rb
+++ b/lib/asciidoctor/backends/html5.rb
@@ -344,8 +344,8 @@ end
 
 class BlockParagraphTemplate < BaseTemplate
   def paragraph(id, role, title, content)
-    %(<div#{id && " id=\"#{id}\""} class=\"paragraph#{role && " #{role}"}\">
-  #{title && "<div class=\"title\">#{title}</div>"}  
+    %(<div#{id && " id=\"#{id}\""} class=\"paragraph#{role && " #{role}"}\">#{title && "
+  <div class=\"title\">#{title}</div>"}  
   <p>#{content}</p>
 </div>)
   end
diff --git a/lib/asciidoctor/block.rb b/lib/asciidoctor/block.rb
index a43e9485..9c44d02f 100644
--- a/lib/asciidoctor/block.rb
+++ b/lib/asciidoctor/block.rb
@@ -26,6 +26,7 @@ class Block < AbstractBlock
   def initialize(parent, context, buffer = nil)
     super(parent, context)
     @buffer = buffer
+    @caption = nil
   end
 
   # Public: Get the rendered String content for this Block.  If the block
@@ -95,7 +96,7 @@ class Block < AbstractBlock
   #   => ["<em>This</em> is what happens when you &lt;meet&gt; a stranger in the &lt;alps&gt;!"]
   def content
     case @context
-    when :preamble, :open, :example, :sidebar
+    when :preamble, :open
       @blocks.map {|b| b.render }.join
     # lists get iterated in the template (for now)
     # list items recurse into this block when their text and content methods are called
@@ -105,7 +106,7 @@ class Block < AbstractBlock
       apply_literal_subs(@buffer)
     when :pass
       apply_passthrough_subs(@buffer)
-    when :quote, :verse, :admonition
+    when :admonition, :example, :sidebar, :quote, :verse
       if !@buffer.nil?
         apply_para_subs(@buffer)
       else
diff --git a/lib/asciidoctor/document.rb b/lib/asciidoctor/document.rb
index 43e8a719..9b224878 100644
--- a/lib/asciidoctor/document.rb
+++ b/lib/asciidoctor/document.rb
@@ -269,6 +269,18 @@ class Document < AbstractBlock
     (@attributes[name] = @counters[name])
   end
 
+  # Public: Increment the specified counter and store it in the block's attributes
+  #
+  # counter_name - the String name of the counter attribute
+  # block        - the Block on which to save the counter
+  #
+  # returns the next number in the sequence for the specified counter
+  def counter_increment(counter_name, block)
+    val = counter(counter_name)
+    AttributeEntry.new(counter_name, val).save_to(block.attributes)
+    val
+  end
+
   # Internal: Get the next value in the sequence.
   #
   # Handles both integer and character sequences.
diff --git a/lib/asciidoctor/lexer.rb b/lib/asciidoctor/lexer.rb
index f7dde9ec..e875e4b5 100644
--- a/lib/asciidoctor/lexer.rb
+++ b/lib/asciidoctor/lexer.rb
@@ -23,7 +23,7 @@ module Asciidoctor
 #   # => Asciidoctor::Block
 class Lexer
 
-  BlockMatchData = Struct.new(:name, :tip, :terminator)
+  BlockMatchData = Struct.new(:context, :masq, :tip, :terminator)
 
   # Public: Make sure the Lexer object doesn't get initialized.
   #
@@ -137,6 +137,8 @@ class Lexer
   def self.next_section(reader, parent, attributes = {})
     preamble = false
 
+    # FIXME if attributes[1] is a verbatim style, then don't check for section
+
     # check if we are at the start of processing the document
     # NOTE we could drop a hint in the attributes to indicate
     # that we are at a section title (so we don't have to check)
@@ -249,351 +251,316 @@ class Lexer
     # bail if we've reached the end of the parent block or document
     return nil unless reader.has_more_lines?
 
+    # check for option to find list item text only
+    # if skipped a line, assume a list continuation was
+    # used and block content is acceptable
     if options[:text] && skipped > 0
       options.delete(:text)
     end
-
-    Debug.debug {
-      msg = []
-      msg << '/' * 64
-      msg << 'next_block() - First two lines are:'
-      msg.concat reader.peek_lines(2)
-      msg << '/' * 64
-      msg * "\n"
-    }
     
-    parse_metadata = options[:parse_metadata] || true
-    parse_sections = options[:parse_sections] || false
+    parse_metadata = options.fetch(:parse_metadata, true)
+    #parse_sections = options.fetch(:parse_sections, false)
 
     document = parent.document
-    context = parent.is_a?(Block) ? parent.context : nil
+    parent_context = parent.is_a?(Block) ? parent.context : nil
     block = nil
+    style = nil
 
     while reader.has_more_lines? && block.nil?
+      # if parsing metadata, read until there is no more to read
       if parse_metadata && parse_block_metadata_line(reader, document, attributes, options)
         reader.advance
         next
-      elsif parse_sections && context.nil? && is_next_line_section?(reader, attributes)
-        block, attributes = next_section(reader, parent, attributes)
-        break
+      #elsif parse_sections && parent_context.nil? && is_next_line_section?(reader, attributes)
+      #  block, attributes = next_section(reader, parent, attributes)
+      #  break
       end
 
+      # QUESTION introduce parsing context object?
       this_line = reader.get_line
-
+      delimited_block = false
       block_context = nil
       terminator = nil
+      # QUESTION put this inside call to rekey attributes?
+      if attributes.has_key? 1
+        style = attributes['style'] = attributes[1]
+      end
+
       if delimited_blk_match = is_delimited_block?(this_line, true)
-        block_context = delimited_blk_match.name
+        delimited_block = true
+        block_context = delimited_blk_match.context
         terminator = delimited_blk_match.terminator
+        if !style
+          style = attributes['style'] = block_context.to_s
+        elsif style != block_context.to_s
+          if delimited_blk_match.masq.include? style
+            block_context = style.to_sym
+          elsif delimited_blk_match.masq.include?('admonition') && ADMONITION_STYLES.include?(style)
+            block_context = :admonition
+          else
+            puts "asciidoctor: WARNING: line #{reader.lineno}: invalid style for #{block_context} block: #{style}"
+            style = block_context.to_s
+          end
+        end
       end
 
-      # NOTE we're letting break lines (ruler, page_break, etc) have attributes
-      if !options[:text] && block_context.nil? && (match = this_line.match(REGEXP[:break_line]))
-        block = Block.new(parent, BREAK_LINES[match[0][0..2]])
-        reader.skip_blank_lines
-
-      elsif !options[:text] && block_context.nil? && (match = this_line.match(REGEXP[:image_blk]))
-        block = Block.new(parent, :image)
-
-        block.parse_attributes(match[2], ['alt', 'width', 'height'], :sub_input => true, :sub_result => false, :into => attributes)
-        target = block.sub_attributes(match[1])
-        if !target.to_s.empty?
-          attributes['target'] = target
-          document.register(:images, target)
-          attributes['alt'] ||= File.basename(target, File.extname(target))
-          block.title = attributes['title']
-          if block.title? && !attributes.has_key?('caption') && !block.attr?('caption')
-            number = document.counter('figure-number')
-            attributes['caption'] = "#{document.attributes['figure-caption']} #{number}. "
-            Document::AttributeEntry.new('figure-number', number).save_to(attributes)
+      if !delimited_block
+
+        # use begin block for flow control (yes, could be done better)
+        begin
+
+          # process lines verbatim
+          if !style.nil? && COMPLIANCE[:strict_verbatim_paragraphs] && VERBATIM_STYLES.include?(style)
+            block_context = style.to_sym
+            reader.unshift_line this_line
+            # advance to block parsing =>
+            break
           end
-        else
-          # drop the line if target resolves to nothing
-          block = nil
-        end
-        reader.skip_blank_lines
 
-      elsif block_context == :open
-        # an open block is surrounded by '--' lines and has zero or more blocks inside
-        buffer = Reader.new reader.grab_lines_until(:terminator => terminator)
+          # process lines normally
+          if !options[:text]
+            # NOTE we're letting break lines (ruler, page_break, etc) have attributes
+            if (match = this_line.match(REGEXP[:break_line]))
+              block = Block.new(parent, BREAK_LINES[match[0][0..2]])
+              reader.skip_blank_lines
+              break
+
+            elsif (match = this_line.match(REGEXP[:image_blk]))
+              block = Block.new(parent, :image)
+              unless style.nil?
+                attributes['alt'] = style
+                attributes.delete('style')
+                style = nil
+              end
+              block.parse_attributes(match[2], ['alt', 'width', 'height'], :sub_input => true, :sub_result => false, :into => attributes)
+              target = block.sub_attributes(match[1])
+              if !target.to_s.empty?
+                attributes['target'] = target
+                document.register(:images, target)
+                attributes['alt'] ||= File.basename(target, File.extname(target))
+                block.title = attributes.delete('title') if attributes.has_key?('title')
+                block.assign_caption attributes.delete('caption'), 'figure'
+                break
+              else
+                # drop the line if target resolves to nothing
+                block = nil
+                reader.skip_blank_lines
+                return nil
+              end
+            end
+          end
 
-        # Strip lines off end of block - not implemented yet
-        # while buffer.has_more_lines? && buffer.last.strip.empty?
-        #   buffer.pop
-        # end
+          # haven't found anything yet, continue
+          if (match = this_line.match(REGEXP[:colist]))
+            block = Block.new(parent, :colist)
+            attributes['style'] = 'arabic'
+            items = []
+            block.buffer = items
+            reader.unshift_line this_line
+            expected_index = 1
+            begin
+              # might want to move this check to a validate method
+              if match[1].to_i != expected_index
+                puts "asciidoctor: WARNING: line #{reader.lineno + 1}: callout list item index: expected #{expected_index} got #{match[1]}"
+              end
+              list_item = next_list_item(reader, block, match)
+              expected_index += 1
+              if !list_item.nil?
+                items << list_item
+                coids = document.callouts.callout_ids(items.size)
+                if !coids.empty?
+                  list_item.attributes['coids'] = coids
+                else
+                  puts "asciidoctor: WARNING: line #{reader.lineno}: no callouts refer to list item #{items.size}"
+                end
+              end
+            end while reader.has_more_lines? && match = reader.peek_line.match(REGEXP[:colist])
 
-        block = Block.new(parent, block_context)
-        while buffer.has_more_lines?
-          new_block = next_block(buffer, block)
-          block.blocks << new_block unless new_block.nil?
-        end
+            document.callouts.next_list
+            break
 
-      # needs to come before list detection
-      elsif block_context == :sidebar
-        # sidebar is surrounded by '****' (4 or more '*' chars) lines
-        # FIXME violates DRY because it's a duplication of quote parsing
-        block = Block.new(parent, block_context)
-        buffer = Reader.new reader.grab_lines_until(:terminator => terminator)
+          elsif (match = this_line.match(REGEXP[:ulist]))
+            reader.unshift_line this_line
+            block = next_outline_list(reader, :ulist, parent)
+            break
 
-        while buffer.has_more_lines?
-          new_block = next_block(buffer, block)
-          block.blocks << new_block unless new_block.nil?
-        end
+          elsif (match = this_line.match(REGEXP[:olist]))
+            reader.unshift_line this_line
+            block = next_outline_list(reader, :olist, parent)
+            # QUESTION move this logic to next_outline_list?
+            if !(attributes.has_key? 'style') && !(block.attributes.has_key? 'style')
+              marker = block.buffer.first.marker
+              if marker.start_with? '.'
+                # first one makes more sense, but second on is AsciiDoc-compliant
+                #attributes['style'] = (ORDERED_LIST_STYLES[block.level - 1] || ORDERED_LIST_STYLES.first).to_s
+                attributes['style'] = (ORDERED_LIST_STYLES[marker.length - 1] || ORDERED_LIST_STYLES.first).to_s
+              else
+                style = ORDERED_LIST_STYLES.detect{|s| marker.match(ORDERED_LIST_MARKER_PATTERNS[s]) }
+                attributes['style'] = (style || ORDERED_LIST_STYLES.first).to_s
+              end
+            end
+            break
 
-      elsif block_context.nil? && (match = this_line.match(REGEXP[:colist]))
-        block = Block.new(parent, :colist)
-        attributes['style'] = 'arabic'
-        items = []
-        block.buffer = items
-        reader.unshift_line this_line
-        expected_index = 1
-        begin
-          # might want to move this check to a validate method
-          if match[1].to_i != expected_index
-            puts "asciidoctor: WARNING: line #{reader.lineno + 1}: callout list item index: expected #{expected_index} got #{match[1]}"
-          end
-          list_item = next_list_item(reader, block, match)
-          expected_index += 1
-          if !list_item.nil?
-            items << list_item
-            coids = document.callouts.callout_ids(items.size)
-            if !coids.empty?
-              list_item.attributes['coids'] = coids
+          elsif (match = this_line.match(REGEXP[:dlist]))
+            reader.unshift_line this_line
+            block = next_labeled_list(reader, match, parent)
+            break
+
+          elsif (style == 'float' || style == 'discrete') && is_section_title?(this_line, reader.peek_line)
+            reader.unshift_line this_line
+            float_id, float_title, float_level, _ = parse_section_title(reader, document)
+            float_id ||= attributes['id'] if attributes.has_key?('id')
+            block = Block.new(parent, :floating_title)
+            if float_id.nil? || float_id.empty?
+              # FIXME remove hack of creating throwaway Section to get at the generate_id method
+              tmp_sect = Section.new(parent)
+              tmp_sect.title = float_title
+              block.id = tmp_sect.generate_id
             else
-              puts "asciidoctor: WARNING: line #{reader.lineno}: no callouts refer to list item #{items.size}"
+              block.id = float_id
+              document.register(:ids, [float_id, float_title])
             end
-          end
-        end while reader.has_more_lines? && match = reader.peek_line.match(REGEXP[:colist])
-
-        document.callouts.next_list
-
-      elsif block_context.nil? && (match = this_line.match(REGEXP[:ulist]))
-        AttributeList.rekey(attributes, ['style'])
-        reader.unshift_line this_line
-        block = next_outline_list(reader, :ulist, parent)
-
-      elsif block_context.nil? && (match = this_line.match(REGEXP[:olist]))
-        AttributeList.rekey(attributes, ['style'])
-        reader.unshift_line this_line
-        block = next_outline_list(reader, :olist, parent)
-        # QUESTION move this logic to next_outline_list?
-        if !(attributes.has_key? 'style') && !(block.attributes.has_key? 'style')
-          marker = block.buffer.first.marker
-          if marker.start_with? '.'
-            # first one makes more sense, but second on is AsciiDoc-compliant
-            #attributes['style'] = (ORDERED_LIST_STYLES[block.level - 1] || ORDERED_LIST_STYLES.first).to_s
-            attributes['style'] = (ORDERED_LIST_STYLES[marker.length - 1] || ORDERED_LIST_STYLES.first).to_s
-          else
-            style = ORDERED_LIST_STYLES.detect{|s| marker.match(ORDERED_LIST_MARKER_PATTERNS[s]) }
-            attributes['style'] = (style || ORDERED_LIST_STYLES.first).to_s
-          end
-        end
+            block.level = float_level
+            block.title = float_title
+            break
 
-      elsif block_context.nil? && (match = this_line.match(REGEXP[:dlist]))
-        reader.unshift_line this_line
-        block = next_labeled_list(reader, match, parent)
-        AttributeList.rekey(attributes, ['style'])
-
-      elsif block_context == :table
-        # table is surrounded by lines starting with a | followed by 3 or more '=' chars
-        AttributeList.rekey(attributes, ['style'])
-        table_reader = Reader.new reader.grab_lines_until(:terminator => terminator, :skip_line_comments => true)
-        block = next_table(table_reader, parent, attributes)
-        block.title = attributes['title']
-        if block.title? && !attributes.has_key?('caption') && !block.attr?('caption')
-          number = document.counter('table-number')
-          attributes['caption'] = "#{document.attributes['table-caption']} #{number}. "
-          Document::AttributeEntry.new('table-number', number).save_to(attributes)
-        end
-    
-      # FIXME violates DRY because it's a duplication of other block parsing
-      elsif block_context == :example
-        # example is surrounded by lines with 4 or more '=' chars
-        AttributeList.rekey(attributes, ['style'])
-        if admonition_style = ADMONITION_STYLES.detect {|s| attributes['style'] == s}
-          block = Block.new(parent, :admonition)
-          attributes['name'] = admonition_name = admonition_style.downcase
-          attributes['caption'] ||= document.attributes["#{admonition_name}-caption"]
-        else
-          block = Block.new(parent, block_context)
-          block.title = attributes['title']
-          if block.title? && !attributes.has_key?('caption') && !block.attr?('caption')
-            number = document.counter('example-number')
-            attributes['caption'] = "#{document.attributes['example-caption']} #{number}. "
-            Document::AttributeEntry.new('example-number', number).save_to(attributes)
+          elsif !style.nil? && style != 'normal'
+            if PARAGRAPH_STYLES.include?(style)
+              block_context = style.to_sym
+              reader.unshift_line this_line
+              # advance to block parsing =>
+              break
+            elsif ADMONITION_STYLES.include?(style)
+              block_context = :admonition
+              reader.unshift_line this_line
+              # advance to block parsing =>
+              break
+            else
+              puts "asciidoctor: WARNING: line #{reader.lineno}: invalid style for paragraph: #{style}"
+              style = nil
+            end
           end
-        end
-        buffer = Reader.new reader.grab_lines_until(:terminator => terminator)
 
-        while buffer.has_more_lines?
-          new_block = next_block(buffer, block)
-          block.blocks << new_block unless new_block.nil?
-        end
+          # a literal paragraph is contiguous lines starting at least one space
+          if style != 'normal' && this_line.match(REGEXP[:lit_par])
+            # So we need to actually include this one in the grab_lines group
+            reader.unshift_line this_line
+            buffer = reader.grab_lines_until(
+                :break_on_blank_lines => true,
+                :break_on_list_continuation => true,
+                :preserve_last_line => true) {|line|
+              # labeled list terms can be indented, but a preceding blank line indicates
+              # we are in a list continuation and therefore literals should be strictly literal
+              (skipped == 0 && parent_context == :dlist && line.match(REGEXP[:dlist])) ||
+              (COMPLIANCE[:block_terminates_paragraph] && (is_delimited_block?(line) || line.match(REGEXP[:attr_line])))
+            }
 
-      # FIXME violates DRY w/ non-delimited block listing
-      elsif block_context == :listing || block_context == :fenced_code
-        if block_context == :fenced_code
-          attributes['style'] = 'source'
-          lang = this_line[3..-1].strip
-          attributes['language'] = lang unless lang.empty?
-          terminator = terminator[0..2] if terminator.length > 3
-        else
-          AttributeList.rekey(attributes, ['style', 'language', 'linenums'])
-        end
-        buffer = reader.grab_lines_until(:terminator => terminator)
-        buffer.last.chomp! unless buffer.empty?
-        block = Block.new(parent, :listing, buffer)
-        block.title = attributes['title']
-        if document.attributes.has_key?('listing-caption') &&
-            block.title? && !attributes.has_key?('caption') && !block.attr?('caption')
-          number = document.counter('listing-number')
-          attributes['caption'] = "#{document.attributes['listing-caption']} #{number}. "
-          Document::AttributeEntry.new('listing-number', number).save_to(attributes)
-        end
+            # trim off the indentation equivalent to the size of the least indented line
+            if !buffer.empty?
+              offset = buffer.map {|line| line.match(REGEXP[:leading_blanks])[1].length }.min
+              if offset > 0
+                buffer = buffer.map {|l| l.sub(/^\s{1,#{offset}}/, '') }
+              end
+            end
 
-      elsif block_context == :quote
-        # multi-line verse or quote is surrounded by a block delimiter
-        AttributeList.rekey(attributes, ['style', 'attribution', 'citetitle'])
-        quote_context = (attributes['style'] == 'verse' ? :verse : :quote)
-        block_reader = Reader.new reader.grab_lines_until(:terminator => terminator)
+            block = Block.new(parent, :literal, buffer)
+            # a literal gets special meaning inside of a definition list
+            if LIST_CONTEXTS.include?(parent_context)
+              attributes['options'] ||= []
+              # TODO this feels hacky, better way to distinguish from explicit literal block?
+              attributes['options'] << 'listparagraph'
+            end
+            break
 
-        # only quote can have other section elements (as section block)
-        section_body = (quote_context == :quote)
+          # a paragraph is contiguous nonblank/noncontinuation lines
+          else
+            reader.unshift_line this_line
+            buffer = reader.grab_lines_until(
+                :break_on_blank_lines => true,
+                :break_on_list_continuation => true,
+                :preserve_last_line => true,
+                :skip_line_comments => true) {|line|
+              # a preceding blank line (skipped > 0) indicates we are in a list continuation
+              # and therefore we should not break at a definition list term
+              # however, this won't stop breaking on item of same level since we've already parsed them out
+              (skipped == 0 && parent_context == :dlist && line.match(REGEXP[:dlist])) ||
+              (COMPLIANCE[:block_terminates_paragraph] && (is_delimited_block?(line) || line.match(REGEXP[:attr_line])))
+            }
 
-        if section_body
-          block = Block.new(parent, quote_context)
-          while block_reader.has_more_lines?
-            new_block = next_block(block_reader, block)
-            block.blocks << new_block unless new_block.nil?
-          end
-        else
-          block_reader.chomp_last!
-          block = Block.new(parent, quote_context, block_reader.lines)
-        end
+            # NOTE we need this logic because we've asked the reader to skip
+            # line comments, which may leave us w/ an empty buffer if those
+            # were the only lines found
+            if buffer.empty?
+              reader.get_line
+              break
+            end
 
-      elsif block_context == :literal || block_context == :pass
-        # literal is surrounded by '....' (4 or more '.' chars) lines
-        # pass is surrounded by '++++' (4 or more '+' chars) lines
-        buffer = reader.grab_lines_until(:terminator => terminator)
-        buffer.last.chomp! unless buffer.empty?
-        # a literal can masquerade as a listing
-        if attributes[1] == 'listing'
-          block_context = :listing
-        end
-        block = Block.new(parent, block_context, buffer)
-
-      elsif this_line.match(REGEXP[:lit_par])
-        # literal paragraph is contiguous lines starting with
-        # one or more space or tab characters
-
-        # So we need to actually include this one in the grab_lines group
-        reader.unshift_line this_line
-        buffer = reader.grab_lines_until(:preserve_last_line => true, :break_on_blank_lines => true) {|line|
-          # labeled list terms can be indented, but a preceding blank indicates
-          # we are in a list continuation and therefore literals should be strictly literal
-          (context == :dlist && skipped == 0 && line.match(REGEXP[:dlist])) ||
-          is_delimited_block?(line)
-        }
+            catalog_inline_anchors(buffer.join, document)
 
-        # trim off the indentation equivalent to the size of the least indented line
-        if !buffer.empty?
-          offset = buffer.map {|line| line.match(REGEXP[:leading_blanks])[1].length }.min
-          if offset > 0
-            buffer = buffer.map {|l| l.sub(/^\s{1,#{offset}}/, '') }
+            if !options[:text] && (admonition_match = buffer.first.match(REGEXP[:admonition_inline]))
+              buffer[0] = admonition_match.post_match.lstrip
+              block = Block.new(parent, :admonition, buffer)
+              attributes['style'] = admonition_match[1]
+              attributes['name'] = admonition_name = admonition_match[1].downcase
+              attributes['caption'] ||= document.attributes["#{admonition_name}-caption"]
+            else
+              # QUESTION is this necessary?
+              #if style == 'normal' && [' ', "\t"].include?(buffer.first[0..0])
+              #  # QUESTION should we only trim leading blanks?
+              #  buffer.map! &:lstrip
+              #end
+
+              block = Block.new(parent, :paragraph, buffer)
+            end
+            break
           end
-          buffer.last.chomp!
-        end
+        end while false
+      end
 
-        block = Block.new(parent, :literal, buffer)
-        # a literal gets special meaning inside of a definition list
-        if LIST_CONTEXTS.include?(context)
-          attributes['options'] ||= []
-          # TODO this feels hacky, better way to distinguish from explicit literal block?
-          attributes['options'] << 'listparagraph'
-        end
+      # either delimited block or styled paragraph
+      if block.nil? && !block_context.nil?
+        case block_context
+        when :admonition
+          attributes['name'] = admonition_name = style.downcase
+          attributes['caption'] ||= document.attributes["#{admonition_name}-caption"]
+          block = build_block(block_context, :complex, terminator, parent, reader, attributes)
 
-      ## these switches based on style need to come immediately before the else ##
+        when :comment
+          reader.grab_lines_until(:break_on_blank_lines => true, :chomp_last_line => false)
+          # QUESTION should we skip blank lines here?
+          break
 
-      elsif attributes[1] == 'source' || attributes[1] == 'listing'
-        if attributes[1] == 'source'
-          AttributeList.rekey(attributes, ['style', 'language', 'linenums'])
-        end
-        reader.unshift_line this_line
-        buffer = reader.grab_lines_until(:break_on_blank_lines => true)
-        buffer.last.chomp! unless buffer.empty?
-        block = Block.new(parent, :listing, buffer)
-
-      elsif attributes[1] == 'literal'
-        reader.unshift_line this_line
-        buffer = reader.grab_lines_until(:break_on_blank_lines => true)
-        buffer.last.chomp! unless buffer.empty?
-        block = Block.new(parent, :literal, buffer)
-
-      elsif admonition_style = ADMONITION_STYLES.detect{|s| attributes[1] == s}
-        # an admonition preceded by [<TYPE>] and lasts until a blank line
-        reader.unshift_line this_line
-        buffer = reader.grab_lines_until(:break_on_blank_lines => true)
-        buffer.last.chomp! unless buffer.empty?
-        block = Block.new(parent, :admonition, buffer)
-        attributes['style'] = admonition_style
-        attributes['name'] = admonition_name = admonition_style.downcase
-        attributes['caption'] ||= document.attributes["#{admonition_name}-caption"]
-
-      elsif quote_context = [:quote, :verse].detect{|s| attributes[1] == s.to_s}
-        # single-paragraph verse or quote is preceded by [verse] or [quote], respectively, and lasts until a blank line
-        AttributeList.rekey(attributes, ['style', 'attribution', 'citetitle'])
-        reader.unshift_line this_line
-        buffer = reader.grab_lines_until(:break_on_blank_lines => true)
-        buffer.last.chomp! unless buffer.empty?
-        block = Block.new(parent, quote_context, buffer)
-
-      # a floating (i.e., discrete) title
-      elsif ['float', 'discrete'].include?(attributes[1]) && is_section_title?(this_line, reader.peek_line)
-        attributes['style'] = attributes[1]
-        reader.unshift_line this_line
-        float_id, float_title, float_level, _ = parse_section_title(reader, document)
-        block = Block.new(parent, :floating_title)
-        if float_id.nil? || float_id.empty?
-          # FIXME remove hack of creating throwaway Section to get at the generate_id method
-          tmp_sect = Section.new(parent)
-          tmp_sect.title = float_title
-          block.id = tmp_sect.generate_id
-        else
-          block.id = float_id
-          @document.register(:ids, [float_id, float_title])
-        end
-        block.level = float_level
-        block.title = float_title
+        when :example
+          block = build_block(block_context, :complex, terminator, parent, reader, attributes, true)
+
+        when :listing, :fenced_code, :source
+          if block_context == :fenced_code
+            style = attributes['style'] = 'source'
+            lang = this_line[3..-1].strip
+            attributes['language'] = lang unless lang.empty?
+            terminator = terminator[0..2] if terminator.length > 3
+          elsif block_context == :source
+            AttributeList.rekey(attributes, [nil, 'language', 'linenums'])
+          end
+          block = build_block(:listing, :verbatim, terminator, parent, reader, attributes, true)
 
-      # a paragraph - contiguous nonblank/noncontinuation lines
-      else
-        reader.unshift_line this_line
-        buffer = reader.grab_lines_until(:break_on_blank_lines => true, :preserve_last_line => true, :skip_line_comments => true) {|line|
-          is_delimited_block?(line) || line.match(REGEXP[:attr_line]) ||
-          # next list item can be directly adjacent to paragraph of previous list item
-          context == :dlist && line.match(REGEXP[:dlist])
-          # not sure if there are any cases when we need this check for other list types
-          #LIST_CONTEXTS.include?(context) && line.match(REGEXP[context])
-        }
+        when :literal
+          block = build_block(block_context, :verbatim, terminator, parent, reader, attributes)
+        
+        when :pass
+          block = build_block(block_context, :simple, terminator, parent, reader, attributes)
 
-        # NOTE we need this logic because the reader is processing line
-        # comments and that might leave us w/ an empty buffer
-        if buffer.empty?
-          reader.get_line
-          break
-        end
+        when :open, :sidebar
+          block = build_block(block_context, :complex, terminator, parent, reader, attributes)
 
-        catalog_inline_anchors(buffer.join, document)
+        when :table
+          block_reader = Reader.new reader.grab_lines_until(:terminator => terminator, :skip_line_comments => true)
+          block = next_table(block_reader, parent, attributes)
+
+        when :quote, :verse
+          AttributeList.rekey(attributes, [nil, 'attribution', 'citetitle'])
+          block = build_block(block_context, (block_context == :verse ? :verbatim : :complex), terminator, parent, reader, attributes)
 
-        if !options[:text] && (admonition = buffer.first.match(Regexp.new('^(' + ADMONITION_STYLES.join('|') + '):\s+')))
-          buffer[0] = admonition.post_match
-          block = Block.new(parent, :admonition, buffer)
-          attributes['style'] = admonition[1]
-          attributes['name'] = admonition_name = admonition[1].downcase
-          attributes['caption'] ||= document.attributes["#{admonition_name}-caption"]
-        else
-          buffer.last.chomp!
-          block = Block.new(parent, :paragraph, buffer)
         end
       end
     end
@@ -602,7 +569,7 @@ class Lexer
     # blocks or trailing attribute lists could leave us without a block,
     # so handle accordingly
     if !block.nil?
-      block.id        = attributes['id'] if attributes.has_key?('id')
+      block.id      ||= attributes['id'] if attributes.has_key?('id')
       block.title     = attributes['title'] unless block.title?
       block.caption ||= attributes['caption'] unless block.is_a?(Section)
       # AsciiDoc always use [id] as the reftext in HTML output,
@@ -645,9 +612,21 @@ class Lexer
       if DELIMITED_BLOCKS.has_key? tip
         # if tip is the full line
         if tl == line_len - 1
-          return_match_data ? BlockMatchData.new(DELIMITED_BLOCKS[tip], tip, tip) : true
+          #return_match_data ? BlockMatchData.new(DELIMITED_BLOCKS[tip], tip, tip) : true
+          if return_match_data
+            context, masq = *DELIMITED_BLOCKS[tip]
+            BlockMatchData.new(context, masq, tip, tip)
+          else
+            true
+          end
         elsif match = line.match(REGEXP[:any_blk])
-          return_match_data ? BlockMatchData.new(DELIMITED_BLOCKS[tip], tip, match[0]) : true
+          #return_match_data ? BlockMatchData.new(DELIMITED_BLOCKS[tip], tip, match[0]) : true
+          if return_match_data
+            context, masq = *DELIMITED_BLOCKS[tip]
+            BlockMatchData.new(context, masq, tip, match[0])
+          else
+            true
+          end
         else
           nil
         end
@@ -659,6 +638,44 @@ class Lexer
     end
   end
 
+  # whether a block supports complex content should be a config setting
+  def self.build_block(block_context, content_type, terminator, parent, reader, attributes, supports_caption = false)
+    if terminator.nil?
+      if content_type == :verbatim
+        buffer = reader.grab_lines_until(:break_on_blank_lines => true, :break_on_list_continuation => true)
+      else
+        buffer = reader.grab_lines_until(
+            :break_on_blank_lines => true,
+            :break_on_list_continuation => true,
+            :preserve_last_line => true,
+            :skip_line_comments => true) {|line|
+          COMPLIANCE[:block_terminates_paragraph] && (is_delimited_block?(line) || line.match(REGEXP[:attr_line]))
+        }
+        # QUESTION check for empty buffer?
+      end
+    elsif content_type != :complex
+      buffer = reader.grab_lines_until(:terminator => terminator, :chomp_last_line => true)
+    else
+      buffer = nil
+      block_reader = Reader.new reader.grab_lines_until(:terminator => terminator)
+    end
+
+    block = Block.new(parent, block_context, buffer)
+    # should supports_caption be necessary?
+    if supports_caption
+      block.title = attributes.delete('title') if attributes.has_key?('title')
+      block.assign_caption attributes.delete('caption')
+    end
+
+    if buffer.nil?
+      while block_reader.has_more_lines?
+        parsed_block = next_block(block_reader, block)
+        block.blocks << parsed_block unless parsed_block.nil?
+      end
+    end
+    block
+  end
+
   # Internal: Parse and construct an outline list Block from the current position of the Reader
   #
   # reader    - The Reader from which to retrieve the outline list
@@ -939,12 +956,12 @@ class Lexer
           if this_line.match(REGEXP[:lit_par])
             reader.unshift_line this_line
             buffer.concat reader.grab_lines_until(
-              :preserve_last_line => true,
-              :break_on_blank_lines => true,
-              :break_on_list_continuation => true) {|line|
-                # we may be in an indented list disguised as a literal paragraph
-                # so we need to make sure we don't slurp up a legitimate sibling
-                list_type == :dlist && is_sibling_list_item?(line, list_type, sibling_trait)
+                :preserve_last_line => true,
+                :break_on_blank_lines => true,
+                :break_on_list_continuation => true) {|line|
+              # we may be in an indented list disguised as a literal paragraph
+              # so we need to make sure we don't slurp up a legitimate sibling
+              list_type == :dlist && is_sibling_list_item?(line, list_type, sibling_trait)
             }
             continuation = :inactive
           # let block metadata play out until we find the block
@@ -982,13 +999,13 @@ class Lexer
               if this_line.match(REGEXP[:lit_par])
                 reader.unshift_line this_line
                 buffer.concat reader.grab_lines_until(
-                  :preserve_last_line => true,
-                  :break_on_blank_lines => true,
-                  :break_on_list_continuation => true) {|line|
-                    # we may be in an indented list disguised as a literal paragraph
-                    # so we need to make sure we don't slurp up a legitimate sibling
-                    list_type == :dlist && is_sibling_list_item?(line, list_type, sibling_trait)
-                  }
+                    :preserve_last_line => true,
+                    :break_on_blank_lines => true,
+                    :break_on_list_continuation => true) {|line|
+                  # we may be in an indented list disguised as a literal paragraph
+                  # so we need to make sure we don't slurp up a legitimate sibling
+                  list_type == :dlist && is_sibling_list_item?(line, list_type, sibling_trait)
+                }
               # TODO any way to combine this with the check after skipping blank lines?
               elsif is_sibling_list_item?(this_line, list_type, sibling_trait)
                 break
@@ -1068,6 +1085,7 @@ class Lexer
       section.sectname = attributes[1]
       section.special = true
       document = parent.document
+      # FIXME refactor to use assign_caption (also check requirements)
       if section.sectname == 'appendix' &&
           !attributes.has_key?('caption') &&
           !document.attributes.has_key?('caption')
@@ -1089,14 +1107,7 @@ class Lexer
   #
   # line - the String line from under the section title.
   def self.section_level(line)
-    char = line.chomp.chars.to_a.uniq
-    case char
-    when ['=']; 0
-    when ['-']; 1
-    when ['~']; 2
-    when ['^']; 3
-    when ['+']; 4
-    end
+    SECTION_LEVELS[line[0..0]]
   end
 
   #--
@@ -1619,6 +1630,8 @@ class Lexer
   # returns an instance of Asciidoctor::Table parsed from the provided reader
   def self.next_table(table_reader, parent, attributes)
     table = Table.new(parent, attributes)
+    table.title = attributes.delete('title') if attributes.has_key?('title')
+    table.assign_caption attributes.delete('caption')
 
     if attributes.has_key? 'cols'
       table.create_columns(parse_col_specs(attributes['cols']))
diff --git a/lib/asciidoctor/reader.rb b/lib/asciidoctor/reader.rb
index 31e353bf..c0a7a5ee 100644
--- a/lib/asciidoctor/reader.rb
+++ b/lib/asciidoctor/reader.rb
@@ -592,32 +592,54 @@ class Reader
   def grab_lines_until(options = {}, &block)
     buffer = []
 
-    finis = false
     advance if options[:skip_first_line]
-    # save options to locals for minor optimization
-    terminator = options[:terminator]
-    terminator.chomp! if terminator
-    break_on_blank_lines = options[:break_on_blank_lines]
-    break_on_list_continuation = options[:break_on_list_continuation]
+    # very hot code
+    # save options to locals for minor optimizations
+    if options.has_key? :terminator
+      terminator = options[:terminator]
+      break_on_blank_lines = false
+      break_on_list_continuation = false
+      chomp_last_line = options[:chomp_last_line] || false
+    else
+      terminator = nil
+      break_on_blank_lines = options[:break_on_blank_lines]
+      break_on_list_continuation = options[:break_on_list_continuation]
+      chomp_last_line = break_on_blank_lines
+    end
     skip_line_comments = options[:skip_line_comments]
     preprocess = options.fetch(:preprocess, true)
+    buffer_empty = true
     while !(this_line = get_line(preprocess)).nil?
-      Debug.debug { "Reader processing line: '#{this_line}'" }
-      finis = true if terminator && this_line.chomp == terminator
-      finis = true if !finis && break_on_blank_lines && this_line.strip.empty?
-      finis = true if !finis && break_on_list_continuation && this_line.chomp == LIST_CONTINUATION
-      finis = true if !finis && block && yield(this_line)
-      if finis
-        buffer << this_line if options[:grab_last_line]
-        unshift_line(this_line) if options[:preserve_last_line]
+      # effectively a no-args lamba, but much faster
+      finish = while true
+        break true if terminator && this_line.chomp == terminator
+        break true if break_on_blank_lines && this_line.strip.empty?
+        if break_on_list_continuation && !buffer_empty && this_line.chomp == LIST_CONTINUATION
+          options[:preserve_last_line] = true
+          break true
+        end
+        break true if block && yield(this_line)
+        break false
+      end
+
+      if finish
+        if options[:grab_last_line]
+          buffer << this_line
+          buffer_empty = false
+        end
+        # QUESTION should we dup this_line when restoring??
+        unshift_line this_line if options[:preserve_last_line]
         break
       end
 
       unless skip_line_comments && this_line.match(REGEXP[:comment])
         buffer << this_line
+        buffer_empty = false
       end
     end
 
+    # should we dup the line before chopping?
+    buffer.last.chomp! if chomp_last_line && !buffer_empty
     buffer
   end
author	Dan Allen <dallen@redhat.com>	2013-04-20 12:49:51 -0600
committer	Dan Allen <dallen@redhat.com>	2013-04-22 12:26:48 -0600
commit	efeda152d248735bc2ccfcb580010fa7a5dfac92 (patch)
tree	4510aa744f70541ee09f52e939c90278aeb0fedb /lib
parent	7705123f8dc9e89468ac5b86ce4862809d46d5a3 (diff)