From 6a235ba60693596af3f13b093b83defa37501e09 Mon Sep 17 00:00:00 2001
From: Alexander Kondratskiy <kholdstare0.0@gmail.com>
Date: Sat, 13 Jul 2013 02:23:27 -0400
Subject: Checking options before applying syntax highlighting for HTML output

---
 src/Text/Pandoc/Writers/HTML.hs | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 57bf2a349..cfc187e02 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -422,7 +422,10 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
       adjCode  = if tolhs
                     then unlines . map ("> " ++) . lines $ rawCode
                     else rawCode
-  case highlight formatHtmlBlock (id',classes',keyvals) adjCode of
+      hlCode   = if writerHighlight opts -- check highlighting options
+                    then highlight formatHtmlBlock (id',classes',keyvals) adjCode
+                    else Nothing
+  case hlCode of
          Nothing -> return $ addAttrs opts (id',classes,keyvals)
                            $ H.pre $ H.code $ toHtml adjCode
          Just  h -> modify (\st -> st{ stHighlighting = True }) >>
@@ -589,14 +592,17 @@ inlineToHtml opts inline =
     (LineBreak)      -> return $ if writerHtml5 opts then H5.br else H.br
     (Emph lst)       -> inlineListToHtml opts lst >>= return . H.em
     (Strong lst)     -> inlineListToHtml opts lst >>= return . H.strong
-    (Code attr str)  -> case highlight formatHtmlInline attr str of
+    (Code attr str)  -> case hlCode of
                              Nothing -> return
                                         $ addAttrs opts attr
                                         $ H.code $ strToHtml str
                              Just  h -> do
                                modify $ \st -> st{ stHighlighting = True }
                                return $ addAttrs opts (id',[],keyvals) h
-                                where (id',_,keyvals) = attr
+                        where (id',_,keyvals) = attr
+                              hlCode = if writerHighlight opts
+                                          then highlight formatHtmlInline attr str
+                                          else Nothing
     (Strikeout lst)  -> inlineListToHtml opts lst >>=
                         return . H.del
     (SmallCaps lst)   -> inlineListToHtml opts lst >>=
-- 
cgit v1.2.3


From f42095b7b72fc3419a661c65d17f46ba3cbc8d62 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 13 Jul 2013 13:48:50 -0700
Subject: Docx writer:  Make `--no-highlight` work properly.

---
 src/Text/Pandoc/Writers/Docx.hs | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index e899200f6..d579d4fa6 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -214,7 +214,8 @@ writeDocx opts doc@(Pandoc meta _) = do
   let newstyles = styleToOpenXml $ writerHighlightStyle opts
   let stylepath = "word/styles.xml"
   styledoc <- parseXml refArchive stylepath
-  let styledoc' = styledoc{ elContent = elContent styledoc ++ map Elem newstyles }
+  let styledoc' = styledoc{ elContent = elContent styledoc ++
+                  [Elem x | x <- newstyles, writerHighlight opts] }
   let styleEntry = toEntry stylepath epochtime $ renderXml styledoc'
 
   -- construct word/numbering.xml
@@ -665,13 +666,16 @@ inlineToOpenXML opts (Math mathType str) = do
         Right r -> return [r]
         Left  _ -> inlinesToOpenXML opts (readTeXMath str)
 inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst
-inlineToOpenXML _ (Code attrs str) =
+inlineToOpenXML opts (Code attrs str) =
   withTextProp (rStyle "VerbatimChar")
-  $ case highlight formatOpenXML attrs str of
-         Nothing  -> intercalate [br]
-                     `fmap` (mapM formattedString $ lines str)
-         Just h   -> return h
-     where formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
+  $ if writerHighlight opts
+       then case highlight formatOpenXML attrs str of
+             Nothing  -> unhighlighted
+             Just h   -> return h
+       else unhighlighted
+     where unhighlighted = intercalate [br] `fmap`
+                             (mapM formattedString $ lines str)
+           formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
            toHlTok (toktype,tok) = mknode "w:r" []
                                      [ mknode "w:rPr" []
                                        [ rStyle $ show toktype ]
-- 
cgit v1.2.3


From 0b49f810f401b9154b50727d2179d1ec39cd8d3e Mon Sep 17 00:00:00 2001
From: Alexander Kondratskiy <kholdstare0.0@gmail.com>
Date: Sun, 14 Jul 2013 14:33:58 -0400
Subject: Fixing wrong numbered-list indentation in open document format

---
 src/Text/Pandoc/Writers/OpenDocument.hs | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 30f99c3e4..0efbf7580 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -489,14 +489,16 @@ paraStyle parent attrs = do
       tight     = if t then [ ("fo:margin-top"          , "0in"    )
                             , ("fo:margin-bottom"       , "0in"    )]
                        else []
-      indent    = when (i /= 0 || b || t) $
-                  selfClosingTag "style:paragraph-properties" $
-                           [ ("fo:margin-left"         , indentVal)
+      indent    = if (i /= 0 || b) 
+                      then [ ("fo:margin-left"         , indentVal)
                            , ("fo:margin-right"        , "0in"    )
                            , ("fo:text-indent"         , "0in"    )
                            , ("style:auto-text-indent" , "false"  )]
-                         ++ tight
-  addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) indent
+                      else []
+      attributes = indent ++ tight
+      paraProps = when (not $ null attributes) $
+                    selfClosingTag "style:paragraph-properties" attributes
+  addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) paraProps
   return pn
 
 paraListStyle :: Int -> State WriterState Int
-- 
cgit v1.2.3


From 0bd5830ad4cbf056d18595208532082fe674c6d2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 16 Jul 2013 15:37:15 -0700
Subject: HTML reader:  Generalized table parser.

This commit doesn't change the present behavior at all, but
it will make it easier to support non-simple tables in the future.
---
 src/Text/Pandoc/Readers/HTML.hs | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index f6657a4d1..56d35160c 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -88,7 +88,7 @@ block = choice
             , pCodeBlock
             , pList
             , pHrule
-            , pSimpleTable
+            , pTable
             , pHead
             , pBody
             , pPlain
@@ -212,8 +212,8 @@ pHrule = do
   pSelfClosing (=="hr") (const True)
   return [HorizontalRule]
 
-pSimpleTable :: TagParser [Block]
-pSimpleTable = try $ do
+pTable :: TagParser [Block]
+pTable = try $ do
   TagOpen _ _ <- pSatisfy (~== TagOpen "table" [])
   skipMany pBlank
   caption <- option [] $ pInTags "caption" inline >>~ skipMany pBlank
@@ -225,6 +225,11 @@ pSimpleTable = try $ do
           $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td")
   skipMany pBlank
   TagClose _ <- pSatisfy (~== TagClose "table")
+  let isSinglePlain []        = True
+      isSinglePlain [Plain _] = True
+      isSinglePlain _         = False
+  let isSimple = all isSinglePlain $ concat (head':rows)
+  guard isSimple
   let cols = maximum $ map length rows
   let aligns = replicate cols AlignLeft
   let widths = replicate cols 0
@@ -233,7 +238,7 @@ pSimpleTable = try $ do
 pCell :: String -> TagParser [TableCell]
 pCell celltype = try $ do
   skipMany pBlank
-  res <- pInTags celltype pPlain
+  res <- pInTags celltype block
   skipMany pBlank
   return [res]
 
-- 
cgit v1.2.3


From 8483b5756fbf45270a84fa3e9174081041ff5558 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 16 Jul 2013 15:49:53 -0700
Subject: HTML reader:  Handle non-simple tables (#893).

Column widths are divided equally.

TODO:  Get column widths from col tags if present.
---
 src/Text/Pandoc/Readers/HTML.hs | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 56d35160c..35b667fb0 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -217,6 +217,7 @@ pTable = try $ do
   TagOpen _ _ <- pSatisfy (~== TagOpen "table" [])
   skipMany pBlank
   caption <- option [] $ pInTags "caption" inline >>~ skipMany pBlank
+  -- TODO actually read these and take width information from them
   skipMany $ (pInTags "col" block >> skipMany pBlank) <|>
              (pInTags "colgroup" block >> skipMany pBlank)
   head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
@@ -229,10 +230,15 @@ pTable = try $ do
       isSinglePlain [Plain _] = True
       isSinglePlain _         = False
   let isSimple = all isSinglePlain $ concat (head':rows)
-  guard isSimple
-  let cols = maximum $ map length rows
+  let cols = length $ if null head'
+                         then head rows
+                         else head'
+  -- fail if there are colspans or rowspans
+  guard $ all (\r -> length r == cols) rows
   let aligns = replicate cols AlignLeft
-  let widths = replicate cols 0
+  let widths = if isSimple
+                  then replicate cols 0
+                  else replicate cols (1.0 / fromIntegral cols)
   return [Table caption aligns widths head' rows]
 
 pCell :: String -> TagParser [TableCell]
-- 
cgit v1.2.3


From 94c9825468692a343af7ef1686b1c92e1ec71adf Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 16 Jul 2013 17:03:28 -0700
Subject: HTML reader: read widths from col tags if present.

Closes #893.
---
 src/Text/Pandoc/Readers/HTML.hs | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 35b667fb0..0068ab5c1 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -47,7 +47,7 @@ import Data.Maybe ( fromMaybe, isJust )
 import Data.List ( intercalate )
 import Data.Char ( isDigit )
 import Control.Monad ( liftM, guard, when, mzero )
-import Control.Applicative ( (<$>), (<$) )
+import Control.Applicative ( (<$>), (<$), (<*) )
 
 isSpace :: Char -> Bool
 isSpace ' '  = True
@@ -218,8 +218,7 @@ pTable = try $ do
   skipMany pBlank
   caption <- option [] $ pInTags "caption" inline >>~ skipMany pBlank
   -- TODO actually read these and take width information from them
-  skipMany $ (pInTags "col" block >> skipMany pBlank) <|>
-             (pInTags "colgroup" block >> skipMany pBlank)
+  widths' <- pColgroup <|> many pCol
   head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
   skipMany pBlank
   rows <- pOptInTag "tbody"
@@ -236,11 +235,29 @@ pTable = try $ do
   -- fail if there are colspans or rowspans
   guard $ all (\r -> length r == cols) rows
   let aligns = replicate cols AlignLeft
-  let widths = if isSimple
-                  then replicate cols 0
-                  else replicate cols (1.0 / fromIntegral cols)
+  let widths = if null widths'
+                  then if isSimple
+                       then replicate cols 0
+                       else replicate cols (1.0 / fromIntegral cols)
+                  else widths'
   return [Table caption aligns widths head' rows]
 
+pCol :: TagParser Double
+pCol = try $ do
+  TagOpen _ attribs <- pSatisfy (~== TagOpen "col" [])
+  optional $ pSatisfy (~== TagClose "col")
+  skipMany pBlank
+  return $ case lookup "width" attribs of
+           Just x | not (null x) && last x == '%' ->
+             maybe 0.0 id $ safeRead ('0':'.':init x)
+           _ -> 0.0
+
+pColgroup :: TagParser [Double]
+pColgroup = try $ do
+  pSatisfy (~== TagOpen "colgroup" [])
+  skipMany pBlank
+  manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank
+
 pCell :: String -> TagParser [TableCell]
 pCell celltype = try $ do
   skipMany pBlank
-- 
cgit v1.2.3


From b2385d0e9bf13f2fc152a3983893c47f2ab5d4c0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 16 Jul 2013 22:04:59 -0700
Subject: Text.Pandoc.ImageSize:  Handle EPS.

Closes #903.  This change will make EPS images properly sized
on conversion to Word.
---
 src/Text/Pandoc/ImageSize.hs    | 24 +++++++++++++++++++++++-
 src/Text/Pandoc/Writers/Docx.hs |  1 +
 2 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index 273a1a428..9b0850efb 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -34,11 +34,12 @@ import Data.ByteString (ByteString, unpack)
 import qualified Data.ByteString.Char8 as B
 import Control.Monad
 import Data.Bits
+import Text.Pandoc.Shared (safeRead)
 
 -- quick and dirty functions to get image sizes
 -- algorithms borrowed from wwwis.pl
 
-data ImageType = Png | Gif | Jpeg | Pdf deriving Show
+data ImageType = Png | Gif | Jpeg | Pdf | Eps deriving Show
 
 data ImageSize = ImageSize{
                      pxX   :: Integer
@@ -54,6 +55,9 @@ imageType img = case B.take 4 img of
                      "\x47\x49\x46\x38" -> return Gif
                      "\xff\xd8\xff\xe0" -> return Jpeg
                      "%PDF"             -> return Pdf
+                     "%!PS"
+                       | (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF"
+                                        -> return Eps
                      _                  -> fail "Unknown image type"
 
 imageSize :: ByteString -> Maybe ImageSize
@@ -63,6 +67,7 @@ imageSize img = do
        Png  -> pngSize img
        Gif  -> gifSize img
        Jpeg -> jpegSize img
+       Eps  -> epsSize img
        Pdf  -> Nothing  -- TODO
 
 sizeInPixels :: ImageSize -> (Integer, Integer)
@@ -71,6 +76,23 @@ sizeInPixels s = (pxX s, pxY s)
 sizeInPoints :: ImageSize -> (Integer, Integer)
 sizeInPoints s = (pxX s * 72 `div` dpiX s, pxY s * 72 `div` dpiY s)
 
+epsSize :: ByteString -> Maybe ImageSize
+epsSize img = do
+  let ls = takeWhile ("%" `B.isPrefixOf`) $ B.lines img
+  let ls' = dropWhile (not . ("%%BoundingBox:" `B.isPrefixOf`)) ls
+  case ls' of
+       []    -> mzero
+       (x:_) -> case B.words x of
+                     (_:_:_:ux:uy:[]) -> do
+                        ux' <- safeRead $ B.unpack ux
+                        uy' <- safeRead $ B.unpack uy
+                        return ImageSize{
+                            pxX  = ux'
+                          , pxY  = uy'
+                          , dpiX = 72
+                          , dpiY = 72 }
+                     _ -> mzero
+
 pngSize :: ByteString -> Maybe ImageSize
 pngSize img = do
   let (h, rest) = B.splitAt 8 img
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index d579d4fa6..1ed8c2fa5 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -776,6 +776,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
                              Just Jpeg -> ".jpeg"
                              Just Gif  -> ".gif"
                              Just Pdf  -> ".pdf"
+                             Just Eps  -> ".eps"
                              Nothing   -> takeExtension src
           if null imgext
              then -- without an extension there is no rule for content type
-- 
cgit v1.2.3


From 6c2e76ac617e5972db5d118525e7f6f59f43caac Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 17 Jul 2013 15:38:56 -0700
Subject: Added `ignore_line_breaks` markdown extension.

This causes intra-paragraph line breaks to be ignored,
rather than being treated as hard line breaks or spaces.
This is useful for some East Asian languages, where spaces
aren't used between words, but text is separated into lines
for readability.
---
 src/Text/Pandoc/Options.hs          | 1 +
 src/Text/Pandoc/Readers/Markdown.hs | 1 +
 2 files changed, 2 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index c9a5e27da..61a85cf6e 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -92,6 +92,7 @@ data Extension =
     | Ext_superscript         -- ^ Superscript using ^this^ syntax
     | Ext_subscript           -- ^ Subscript using ~this~ syntax
     | Ext_hard_line_breaks    -- ^ All newlines become hard line breaks
+    | Ext_ignore_line_breaks  -- ^ Newlines in paragraphs are ignored
     | Ext_literate_haskell    -- ^ Enable literate Haskell conventions
     | Ext_abbreviations       -- ^ PHP markdown extra abbreviation definitions
     | Ext_auto_identifiers    -- ^ Automatic identifiers for headers
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index a3500fbcf..1aa392162 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1566,6 +1566,7 @@ endline = try $ do
      notFollowedBy' bulletListStart
      notFollowedBy' anyOrderedListStart
   (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
+    <|> (guardEnabled Ext_ignore_line_breaks >> return mempty)
     <|> (return $ return B.space)
 
 --
-- 
cgit v1.2.3


From 7c980f39bf1cff941d3e78056fd69e0b371833e3 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 Jul 2013 20:58:14 -0700
Subject: Improved fetching of external resources.

* In Shared, openURL and fetchItem now return an Either, for
  better error handling. (API change.)
* Better error message when fetching a URL fails with
  `--self-contained`.
* EPUB writer: If resource not found, skip it, as in Docx writer.
* Closes #916.
---
 src/Text/Pandoc/SelfContained.hs |  5 +++--
 src/Text/Pandoc/Shared.hs        | 16 +++++++++-------
 src/Text/Pandoc/Writers/Docx.hs  |  2 +-
 src/Text/Pandoc/Writers/EPUB.hs  | 13 +++++++++----
 src/Text/Pandoc/Writers/ODT.hs   |  5 ++---
 5 files changed, 24 insertions(+), 17 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index c4613992a..0547bc065 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -40,7 +40,7 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
 import Data.Char (toLower, isAscii, isAlphaNum)
 import Codec.Compression.GZip as Gzip
 import qualified Data.ByteString.Lazy as L
-import Text.Pandoc.Shared (renderTags', openURL, readDataFile)
+import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err)
 import Text.Pandoc.UTF8 (toString,  fromString)
 import Text.Pandoc.MIME (getMimeType)
 import System.Directory (doesFileExist)
@@ -98,7 +98,7 @@ cssURLs userdata d orig =
 getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String)
 getItem userdata f =
   if isAbsoluteURI f
-     then openURL f
+     then openURL f >>= either handleErr return
      else do
        -- strip off trailing query or fragment part, if relative URL.
        -- this is needed for things like cmunrm.eot?#iefix,
@@ -110,6 +110,7 @@ getItem userdata f =
        exists <- doesFileExist f'
        cont <- if exists then B.readFile f' else readDataFile userdata f'
        return (cont, mime)
+  where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e
 
 getRaw :: Maybe FilePath -> String -> String -> IO (ByteString, String)
 getRaw userdata mimetype src = do
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 09086da1f..0f2e16d2e 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -95,6 +95,7 @@ import Text.Pandoc.MIME (getMimeType)
 import System.FilePath ( (</>), takeExtension, dropExtension )
 import Data.Generics (Typeable, Data)
 import qualified Control.Monad.State as S
+import qualified Control.Exception as E
 import Control.Monad (msum, unless)
 import Text.Pandoc.Pretty (charWidth)
 import System.Locale (defaultTimeLocale)
@@ -586,12 +587,13 @@ readDataFileUTF8 userDir fname =
 
 -- | Fetch an image or other item from the local filesystem or the net.
 -- Returns raw content and maybe mime type.
-fetchItem :: String -> String -> IO (BS.ByteString, Maybe String)
+fetchItem :: String -> String
+          -> IO (Either E.SomeException (BS.ByteString, Maybe String))
 fetchItem sourceDir s =
   case s of
     _ | isAbsoluteURI s         -> openURL s
       | isAbsoluteURI sourceDir -> openURL $ sourceDir ++ "/" ++ s
-      | otherwise               -> do
+      | otherwise               -> E.try $ do
           let mime = case takeExtension s of
                         ".gz" -> getMimeType $ dropExtension s
                         x     -> getMimeType x
@@ -600,21 +602,21 @@ fetchItem sourceDir s =
           return (cont, mime)
 
 -- | Read from a URL and return raw data and maybe mime type.
-openURL :: String -> IO (BS.ByteString, Maybe String)
+openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String))
 openURL u
   | "data:" `isPrefixOf` u =
     let mime     = takeWhile (/=',') $ drop 5 u
         contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u
-    in  return (contents, Just mime)
+    in  return $ Right (contents, Just mime)
 #ifdef HTTP_CONDUIT
-  | otherwise = do
+  | otherwise = E.try $ do
      req <- parseUrl u
      resp <- withManager $ httpLbs req
      return (BS.concat $ toChunks $ responseBody resp,
              UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
 #else
-  | otherwise = getBodyAndMimeType `fmap` browse
-              (do S.liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
+  | otherwise = E.try $ getBodyAndMimeType `fmap` browse
+              (do UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
                   setOutHandler $ const (return ())
                   setAllowRedirects True
                   request (getRequest' u'))
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 1ed8c2fa5..611cddc65 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -726,7 +726,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
     Just (_,_,_,elt,_) -> return [elt]
     Nothing -> do
       let sourceDir = writerSourceDirectory opts
-      res <- liftIO $ E.try $ fetchItem sourceDir src
+      res <- liftIO $ fetchItem sourceDir src
       case res of
         Left (_ :: E.SomeException) -> do
           liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..."
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index f171a2560..42863ef86 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -123,10 +123,15 @@ writeEPUB opts doc@(Pandoc meta _) = do
   Pandoc _ blocks <- bottomUpM
        (transformInline opts' sourceDir picsRef) doc
   pics <- readIORef picsRef
-  let readPicEntry (oldsrc, newsrc) = do
-        (img,_) <- fetchItem sourceDir oldsrc
-        return $ toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img
-  picEntries <- mapM readPicEntry pics
+  let readPicEntry entries (oldsrc, newsrc) = do
+        res <- fetchItem sourceDir oldsrc
+        case res of
+             Left e        -> do
+              warn $ "Could not find image `" ++ oldsrc ++ "', skipping..."
+              return entries
+             Right (img,_) -> return $
+              (toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img) : entries
+  picEntries <- foldM readPicEntry [] pics
 
   -- handle fonts
   let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index db27286e8..589010bb9 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -42,7 +42,6 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Generic
 import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument )
 import Control.Monad (liftM)
-import Control.Monad.Trans (liftIO)
 import Text.Pandoc.XML
 import Text.Pandoc.Pretty
 import qualified Control.Exception as E
@@ -114,10 +113,10 @@ writeODT opts doc@(Pandoc meta _) = do
 
 transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline
 transformPic sourceDir entriesRef (Image lab (src,_)) = do
-  res <- liftIO $ E.try $ fetchItem sourceDir src
+  res <- fetchItem sourceDir src
   case res of
      Left (_ :: E.SomeException) -> do
-       liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..."
+       warn $ "Could not find image `" ++ src ++ "', skipping..."
        return $ Emph lab
      Right (img, _) -> do
        let size = imageSize img
-- 
cgit v1.2.3


From 93e096fe1d23bf60a7ca7fa39fa6e730336338eb Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 Jul 2013 21:51:11 -0700
Subject: Fixed warning.

---
 src/Text/Pandoc/Writers/EPUB.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 42863ef86..e625931fc 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -126,7 +126,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
   let readPicEntry entries (oldsrc, newsrc) = do
         res <- fetchItem sourceDir oldsrc
         case res of
-             Left e        -> do
+             Left _        -> do
               warn $ "Could not find image `" ++ oldsrc ++ "', skipping..."
               return entries
              Right (img,_) -> return $
-- 
cgit v1.2.3


From fd0f8c1a8a03cedf868ea2b26e40bfe00852d0b2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 Jul 2013 21:51:23 -0700
Subject: Text.Pandoc.PDF:  put temporary output directory in TEXINPUTS.

This will help later when we try to download external resources.
We can put them in the temp directory.  See #917.
---
 src/Text/Pandoc/PDF.hs | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index 3227fd0bd..b36f2a0af 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -38,6 +38,7 @@ import System.Exit (ExitCode (..))
 import System.FilePath
 import System.Directory
 import System.Process
+import System.Environment
 import Control.Exception (evaluate)
 import System.IO (hClose)
 import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO)
@@ -102,7 +103,12 @@ runTeXProgram program runsLeft tmpDir source = do
     unless exists $ UTF8.writeFile file source
     let programArgs = ["-halt-on-error", "-interaction", "nonstopmode",
          "-output-directory", tmpDir, file]
-    (exit, out, err) <- readCommand program programArgs
+    env' <- getEnvironment
+    let texinputs = maybe (tmpDir ++ ":") ((tmpDir ++ ":") ++)
+          $ lookup "TEXINPUTS" env'
+    let env'' = ("TEXINPUTS", texinputs) :
+                  [(k,v) | (k,v) <- env', k /= "TEXINPUTS"]
+    (exit, out, err) <- readCommand (Just env'') program programArgs
     if runsLeft > 1
        then runTeXProgram program (runsLeft - 1) tmpDir source
        else do
@@ -118,12 +124,14 @@ runTeXProgram program runsLeft tmpDir source = do
 -- Run a command and return exitcode, contents of stdout, and
 -- contents of stderr. (Based on
 -- 'readProcessWithExitCode' from 'System.Process'.)
-readCommand :: FilePath                            -- ^ command to run
+readCommand :: Maybe [(String, String)]            -- ^ environment variables
+            -> FilePath                            -- ^ command to run
             -> [String]                            -- ^ any arguments
             -> IO (ExitCode,ByteString,ByteString) -- ^ exit, stdout, stderr
-readCommand cmd args = do
+readCommand mbenv cmd args = do
     (Just inh, Just outh, Just errh, pid) <-
-        createProcess (proc cmd args){ std_in  = CreatePipe,
+        createProcess (proc cmd args){ env     = mbenv,
+                                       std_in  = CreatePipe,
                                        std_out = CreatePipe,
                                        std_err = CreatePipe }
     outMVar <- newEmptyMVar
-- 
cgit v1.2.3


From 7102254e244b37c91d6b35b4940511a8656edc49 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 20 Jul 2013 12:14:43 -0700
Subject: PDF generation improvements.

* `Text.Pandoc.PDF` exports `makePDF` instead of `tex2pdf`.
  (API change.)
* `makePDF` walks the pandoc AST and checks for the existence of
  images in the local directory.  If they are not found, it attempts
  to find them, either in the directory containing the first source
  file, or at an absolute URL, or at a URL relative to the base URL
  of the first command line argument.
* Closes #917.
---
 src/Text/Pandoc/MIME.hs | 11 +++++++++--
 src/Text/Pandoc/PDF.hs  | 50 ++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 54 insertions(+), 7 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index eb54bd48d..d9cb94a33 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -27,7 +27,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 Mime type lookup for ODT writer.
 -}
-module Text.Pandoc.MIME ( getMimeType )
+module Text.Pandoc.MIME ( getMimeType, extensionFromMimeType )
 where
 import System.FilePath
 import Data.Char ( toLower )
@@ -37,7 +37,14 @@ import qualified Data.Map as M
 getMimeType :: FilePath -> Maybe String
 getMimeType "layout-cache" = Just "application/binary"  -- in ODT
 getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes
-  where mimeTypes = M.fromList -- List borrowed from happstack-server.
+  where mimeTypes = M.fromList mimeTypesList
+
+extensionFromMimeType :: String -> Maybe String
+extensionFromMimeType mimetype = M.lookup mimetype reverseMimeTypes
+  where reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList
+
+mimeTypesList :: [(String, String)]
+mimeTypesList = -- List borrowed from happstack-server.
            [("gz","application/x-gzip")
            ,("cabal","application/x-cabal")
            ,("%","application/x-trash")
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index b36f2a0af..49b455285 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -28,12 +28,13 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 Conversion of LaTeX documents to PDF.
 -}
-module Text.Pandoc.PDF ( tex2pdf ) where
+module Text.Pandoc.PDF ( makePDF ) where
 
 import System.IO.Temp
 import Data.ByteString.Lazy (ByteString)
 import qualified Data.ByteString.Lazy as B
 import qualified Data.ByteString.Lazy.Char8 as BC
+import qualified Data.ByteString as BS
 import System.Exit (ExitCode (..))
 import System.FilePath
 import System.Directory
@@ -42,9 +43,15 @@ import System.Environment
 import Control.Exception (evaluate)
 import System.IO (hClose)
 import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO)
-import Text.Pandoc.UTF8 as UTF8
 import Control.Monad (unless)
 import Data.List (isInfixOf)
+import qualified Data.ByteString.Base64 as B64
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Definition
+import Text.Pandoc.Generic (bottomUpM)
+import Text.Pandoc.Shared (fetchItem, warn)
+import Text.Pandoc.Options (WriterOptions(..))
+import Text.Pandoc.MIME (extensionFromMimeType)
 
 withTempDir :: String -> (FilePath -> IO a) -> IO a
 withTempDir =
@@ -54,12 +61,45 @@ withTempDir =
   withSystemTempDirectory
 #endif
 
-tex2pdf :: String       -- ^ tex program (pdflatex, lualatex, xelatex)
-        -> String       -- ^ latex source
+makePDF :: String              -- ^ pdf creator (pdflatex, lualatex, xelatex)
+        -> (WriterOptions -> Pandoc -> String)  -- ^ writer
+        -> WriterOptions       -- ^ options
+        -> Pandoc              -- ^ document
         -> IO (Either ByteString ByteString)
-tex2pdf program source = withTempDir "tex2pdf." $ \tmpdir ->
+makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do
+  doc' <- handleImages (writerSourceDirectory opts) tmpdir doc
+  let source = writer opts doc'
   tex2pdf' tmpdir program source
 
+handleImages :: String        -- ^ source directory/base URL
+             -> FilePath      -- ^ temp dir to store images
+             -> Pandoc        -- ^ document
+             -> IO Pandoc
+handleImages baseURL tmpdir = bottomUpM (handleImage' baseURL tmpdir)
+
+handleImage' :: String
+             -> FilePath
+             -> Inline
+             -> IO Inline
+handleImage' baseURL tmpdir (Image ils (src,tit)) = do
+    exists <- doesFileExist src
+    if exists
+       then return $ Image ils (src,tit)
+       else do
+         res <- fetchItem baseURL src
+         case res of
+              Right (contents, Just mime) -> do
+                let ext = maybe (takeExtension src) id $
+                          extensionFromMimeType mime
+                let basename = UTF8.toString $ B64.encode $ UTF8.fromString src
+                let fname = tmpdir </> basename <.> ext
+                BS.writeFile fname contents
+                return $ Image ils (fname,tit)
+              _ -> do
+                warn $ "Could not find image `" ++ src ++ "', skipping..."
+                return $ Image ils (src,tit)
+handleImage' _ _ x = return x
+
 tex2pdf' :: FilePath                        -- ^ temp directory for output
          -> String                          -- ^ tex program
          -> String                          -- ^ tex source
-- 
cgit v1.2.3


From bd1979f1b74fb18baa70c4b77cc58931e980087a Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 20 Jul 2013 21:14:38 -0700
Subject: Markdown reader: Improved strong/emph parsing.

Using technique from github.com/jgm/Markdown.

The new parsing algorithm requires no backtracking, and no keeping
track of nesting levels.

It will give different results in some edge cases but should not
affect most people.
---
 src/Text/Pandoc/Readers/Markdown.hs | 88 +++++++++++++++++++++++--------------
 1 file changed, 54 insertions(+), 34 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 1aa392162..28f69eae4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1340,17 +1340,15 @@ inline = choice [ whitespace
                 , str
                 , endline
                 , code
-                , fours
-                , strong
-                , emph
+                , strongOrEmph
                 , note
                 , cite
                 , link
                 , image
                 , math
                 , strikeout
-                , superscript
                 , subscript
+                , superscript
                 , inlineNote  -- after superscript because of ^[link](/foo)^
                 , autoLink
                 , rawHtmlInline
@@ -1455,14 +1453,58 @@ mathInlineWith op cl = try $ do
   notFollowedBy digit  -- to prevent capture of $5
   return $ concat words'
 
--- to avoid performance problems, treat 4 or more _ or * or ~ or ^ in a row
--- as a literal rather than attempting to parse for emph/strong/strikeout/super/sub
-fours :: Parser [Char] st (F Inlines)
-fours = try $ do
-  x <- char '*' <|> char '_' <|> char '~' <|> char '^'
-  count 2 $ satisfy (==x)
-  rest <- many1 (satisfy (==x))
-  return $ return $ B.str (x:x:x:rest)
+-- Parses material enclosed in *s, **s, _s, or __s.
+-- Designed to avoid backtracking.
+enclosure :: Char
+          -> MarkdownParser (F Inlines)
+enclosure c = do
+  cs <- many1 (char c)
+  (return (B.str cs) <>) <$> whitespace
+    <|> case length cs of
+             3  -> three c
+             2  -> two   c mempty
+             1  -> one   c mempty
+             _  -> return (return $ B.str cs)
+
+-- Parse inlines til you hit one c or a sequence of two cs.
+-- If one c, emit emph and then parse two.
+-- If two cs, emit strong and then parse one.
+three :: Char -> MarkdownParser (F Inlines)
+three c = do
+  contents <- mconcat <$> many (notFollowedBy (char c) >> inline)
+  (try (string [c,c,c]) >> return ((B.strong . B.emph) <$> contents))
+    <|> (try (string [c,c]) >> one c (B.strong <$> contents))
+    <|> (char c >> two c (B.emph <$> contents))
+    <|> return (return (B.str [c,c,c]) <> contents)
+
+-- Parse inlines til you hit two c's, and emit strong.
+-- If you never do hit two cs, emit ** plus inlines parsed.
+two :: Char -> F Inlines -> MarkdownParser (F Inlines)
+two c prefix' = do
+  let ender = try $ string [c,c]
+  contents <- mconcat <$> many (try $ notFollowedBy ender >> inline)
+  (ender >> return (B.strong <$> (prefix' <> contents)))
+    <|> return (return (B.str [c,c]) <> (prefix' <> contents))
+
+-- Parse inlines til you hit a c, and emit emph.
+-- If you never hit a c, emit * plus inlines parsed.
+one :: Char -> F Inlines -> MarkdownParser (F Inlines)
+one c prefix' = do
+  contents <- mconcat <$> many (  (notFollowedBy (char c) >> inline)
+                           <|> try (string [c,c] >>
+                                    notFollowedBy (char c) >>
+                                    two c prefix') )
+  (char c >> return (B.emph <$> (prefix' <> contents)))
+    <|> return (return (B.str [c]) <> (prefix' <> contents))
+
+strongOrEmph :: MarkdownParser (F Inlines)
+strongOrEmph =  enclosure '*' <|> (checkIntraword >> enclosure '_')
+  where  checkIntraword = do
+           exts <- getOption readerExtensions
+           when (Ext_intraword_underscores `Set.member` exts) $ do
+             pos <- getPosition
+             lastStrPos <- stateLastStrPos <$> getState
+             guard $ lastStrPos /= Just pos
 
 -- | Parses a list of inlines between start and end delimiters.
 inlinesBetween :: (Show b)
@@ -1474,28 +1516,6 @@ inlinesBetween start end =
     where inner      = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
           innerSpace = try $ whitespace >>~ notFollowedBy' end
 
-emph :: MarkdownParser (F Inlines)
-emph = fmap B.emph <$> nested
-  (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
-    where starStart = char '*' >> lookAhead nonspaceChar
-          starEnd   = notFollowedBy' (() <$ strong) >> char '*'
-          ulStart   = checkIntraword >> char '_' >> lookAhead nonspaceChar
-          ulEnd     = notFollowedBy' (() <$ strong) >> char '_'
-          checkIntraword = do
-            exts <- getOption readerExtensions
-            when (Ext_intraword_underscores `Set.member` exts) $ do
-              pos <- getPosition
-              lastStrPos <- stateLastStrPos <$> getState
-              guard $ lastStrPos /= Just pos
-
-strong :: MarkdownParser (F Inlines)
-strong = fmap B.strong <$> nested
-  (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
-    where starStart = string "**" >> lookAhead nonspaceChar
-          starEnd   = try $ string "**"
-          ulStart   = string "__" >> lookAhead nonspaceChar
-          ulEnd     = try $ string "__"
-
 strikeout :: MarkdownParser (F Inlines)
 strikeout = fmap B.strikeout <$>
  (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd)
-- 
cgit v1.2.3


From 800c5490ec080520268c9c3348f2b4199a21e6db Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 21 Jul 2013 11:44:49 -0700
Subject: LaTeX reader:  Don't add spurious ", " to citation suffixes.

This is added when needed in Text.Pandoc.Biblio anyway.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 1a22f2ad2..7c7ae9fef 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -44,7 +44,7 @@ import qualified Text.Pandoc.UTF8 as UTF8
 import Data.Char ( chr, ord )
 import Control.Monad
 import Text.Pandoc.Builder
-import Data.Char (isLetter, isPunctuation, isSpace)
+import Data.Char (isLetter)
 import Control.Applicative
 import Data.Monoid
 import System.Environment (getEnv)
@@ -986,12 +986,8 @@ addPrefix _ _ = []
 
 addSuffix :: [Inline] -> [Citation] -> [Citation]
 addSuffix s ks@(_:_) =
-  let k  = last ks
-      s' = case s of
-                (Str (c:_):_)
-                  | not (isPunctuation c || isSpace c) -> Str "," : Space : s
-                _                                      -> s
-  in  init ks ++ [k {citationSuffix = citationSuffix k ++ s'}]
+  let k = last ks
+  in  init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
 addSuffix _ _ = []
 
 simpleCiteArgs :: LP [Citation]
-- 
cgit v1.2.3


From 6f99ad80135c06d30d92ad275d482e841ef1e872 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 21 Jul 2013 12:16:52 -0700
Subject: Biblio:  Tweaks to improve default behavior.

* A suffix beginning with a digit gets 'p' inserted before it
  before passing to citeproc-hs, so that bare numbers are treated
  as page numbers by default.
* A suffix not beginning with punctuation has a space added at
  the beginning (rather than a comma and space, as was done before).
* This adding occurs not just in author-in-text citations, but in
  all citations.

The result of these changes (and the last commit) is that
`\citep[23]{item1}` in LaTeX will be interpreted properly,
with '23' treated as a locator of type 'page'.
---
 src/Text/Pandoc/Biblio.hs | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs
index 4dd82dd08..ae371a46d 100644
--- a/src/Text/Pandoc/Biblio.hs
+++ b/src/Text/Pandoc/Biblio.hs
@@ -119,12 +119,12 @@ toCslCite :: Citation -> CSL.Cite
 toCslCite c
     = let (l, s)  = locatorWords $ citationSuffix c
           (la,lo) = parseLocator l
-          s'      = case (l,s,citationMode c) of
-                         -- treat a bare locator as if it begins with comma
+          s'      = case (l,s) of
+                         -- treat a bare locator as if it begins with space
                          -- so @item1 [blah] is like [@item1, blah]
-                         ("",(x:_),AuthorInText) | not (isPunct x)
-                                                 -> [Str ",",Space] ++ s
-                         _                       -> s
+                         ("",(x:_))
+                           | not (isPunct x) -> [Space] ++ s
+                         _                   -> s
           isPunct (Str (x:_)) = isPunctuation x
           isPunct _           = False
           citMode = case citationMode c of
@@ -173,13 +173,21 @@ pLocator :: Parsec [Inline] st String
 pLocator = try $ do
   optional $ pMatch (== Str ",")
   optional pSpace
-  f  <- many1 (notFollowedBy pSpace >> anyToken)
+  f  <- (guardFollowingDigit >> return [Str "p"]) -- "page" the default
+     <|> many1 (notFollowedBy pSpace >> anyToken)
   gs <- many1 pWordWithDigits
   return $ stringify f ++ (' ' : unwords gs)
 
+guardFollowingDigit :: Parsec [Inline] st ()
+guardFollowingDigit = do
+  t <- lookAhead anyToken
+  case t of
+       Str (d:_) | isDigit d -> return ()
+       _                     -> mzero
+
 pWordWithDigits :: Parsec [Inline] st String
 pWordWithDigits = try $ do
-  pSpace
+  optional pSpace
   r <- many1 (notFollowedBy pSpace >> anyToken)
   let s = stringify r
   guard $ any isDigit s
-- 
cgit v1.2.3


From 5592666ca44aa0d027cd95bf11cff09825896584 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Tue, 23 Jul 2013 22:31:50 -0700
Subject: Text.Pandoc:  Added readJSON, writeJSON to the API.

Closes #817.
---
 src/Text/Pandoc.hs | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 86e78ce53..b5b698e09 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -72,9 +72,11 @@ module Text.Pandoc
                , readOPML
                , readHaddock
                , readNative
+               , readJSON
                -- * Writers: converting /from/ Pandoc format
                , Writer (..)
                , writeNative
+               , writeJSON
                , writeMarkdown
                , writePlain
                , writeRST
@@ -190,9 +192,8 @@ markdown o s = do
 
 -- | Association list of formats and readers.
 readers :: [(String, ReaderOptions -> String -> IO Pandoc)]
-readers = [("native"       , \_ s -> return $ readNative s)
-          ,("json"         , \_ s -> return $ checkJSON
-                                            $ decode $ UTF8.fromStringLazy s)
+readers = [ ("native"       , \_ s -> return $ readNative s)
+           ,("json"         , \o s -> return $ readJSON o s)
            ,("markdown"     , markdown)
            ,("markdown_strict" , markdown)
            ,("markdown_phpextra" , markdown)
@@ -205,8 +206,8 @@ readers = [("native"       , \_ s -> return $ readNative s)
            ,("textile"      , \o s -> return $ readTextile o s) -- TODO : textile+lhs
            ,("html"         , \o s -> return $ readHtml o s)
            ,("latex"        , \o s -> return $ readLaTeX o s)
-          ,("haddock"      , \o s -> return $ readHaddock o s)
-          ]
+           ,("haddock"      , \o s -> return $ readHaddock o s)
+           ]
 
 data Writer = PureStringWriter   (WriterOptions -> Pandoc -> String)
             | IOStringWriter     (WriterOptions -> Pandoc -> IO String)
@@ -216,12 +217,12 @@ data Writer = PureStringWriter   (WriterOptions -> Pandoc -> String)
 writers :: [ ( String, Writer ) ]
 writers = [
    ("native"       , PureStringWriter writeNative)
-  ,("json"         , PureStringWriter $ \_ -> UTF8.toStringLazy . encode)
+  ,("json"         , PureStringWriter writeJSON)
   ,("docx"         , IOByteStringWriter writeDocx)
- ,("odt"          , IOByteStringWriter writeODT)
- ,("epub"         , IOByteStringWriter $ \o ->
+  ,("odt"          , IOByteStringWriter writeODT)
+  ,("epub"         , IOByteStringWriter $ \o ->
                       writeEPUB o{ writerEpubVersion = Just EPUB2 })
- ,("epub3"        , IOByteStringWriter $ \o ->
+  ,("epub3"        , IOByteStringWriter $ \o ->
                        writeEPUB o{ writerEpubVersion = Just EPUB3 })
   ,("fb2"          , IOStringWriter writeFB2)
   ,("html"         , PureStringWriter writeHtmlString)
@@ -359,3 +360,10 @@ instance (Data a) => ToJsonFilter (a -> IO [a]) where
 checkJSON :: Maybe a -> a
 checkJSON Nothing  = error "Error parsing JSON"
 checkJSON (Just r) = r
+
+readJSON :: ReaderOptions -> String -> Pandoc
+readJSON _ = checkJSON . decode . UTF8.fromStringLazy
+
+writeJSON :: WriterOptions -> Pandoc -> String
+writeJSON _ = UTF8.toStringLazy . encode
+
-- 
cgit v1.2.3


From 85cc140744b01148da944a58948d9e4a87cb64c4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 25 Jul 2013 09:45:23 -0700
Subject: Textile reader: Improved handling of `<pre>` blocks.

* Closed #927 (a bug in which `<pre>` in certain contexts was
  not recognized as a code block).
* Remove internal HTML tags in code blocks, rather than printing
  them verbatim.
* Parse attributes on `<pre>` tag for code blocks.
---
 src/Text/Pandoc/Readers/Textile.hs | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index a1687a691..9191f6908 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -57,6 +57,7 @@ import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
 import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
+import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
 import Text.HTML.TagSoup.Match
 import Data.List ( intercalate )
 import Data.Char ( digitToInt, isUpper )
@@ -152,8 +153,10 @@ codeBlockBc = try $ do
 -- | Code Blocks in Textile are between <pre> and </pre>
 codeBlockPre :: Parser [Char] ParserState Block
 codeBlockPre = try $ do
-  htmlTag (tagOpen (=="pre") null)
-  result' <- manyTill anyChar (try $ htmlTag (tagClose (=="pre")) >> blockBreak)
+  (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
+  result' <- (innerText . parseTags) `fmap` -- remove internal tags
+               manyTill anyChar (htmlTag (tagClose (=="pre")))
+  optional blanklines
   -- drop leading newline if any
   let result'' = case result' of
                       '\n':xs -> xs
@@ -162,7 +165,10 @@ codeBlockPre = try $ do
   let result''' = case reverse result'' of
                        '\n':_ -> init result''
                        _      -> result''
-  return $ CodeBlock ("",[],[]) result'''
+  let classes = words $ fromAttrib "class" t
+  let ident = fromAttrib "id" t
+  let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+  return $ CodeBlock (ident,classes,kvs) result'''
 
 -- | Header of the form "hN. content" with N in 1..6
 header :: Parser [Char] ParserState Block
@@ -275,7 +281,7 @@ definitionListItem = try $ do
 -- blocks support, we have to lookAhead for a rawHtmlBlock.
 blockBreak :: Parser [Char] ParserState ()
 blockBreak = try (newline >> blanklines >> return ()) <|>
-              (lookAhead rawHtmlBlock >> return ())
+             try (optional spaces >> lookAhead rawHtmlBlock >> return ())
 
 -- raw content
 
-- 
cgit v1.2.3


From fb9f2e4bd5f71c7b515566921c5c5a7bff73c52c Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 25 Jul 2013 10:00:11 -0700
Subject: LaTeX reader:  Support `\v{}` for hacek.

Closes #926.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 7c7ae9fef..6b5035d93 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -415,6 +415,7 @@ inlineCommands = M.fromList $
   , (".", option (str ".") $ try $ tok >>= accent dot)
   , ("=", option (str "=") $ try $ tok >>= accent macron)
   , ("c", option (str "c") $ try $ tok >>= accent cedilla)
+  , ("v", option (str "v") $ try $ tok >>= accent hacek)
   , ("i", lit "i")
   , ("\\", linebreak <$ (optional (bracketed inline) *> optional sp))
   , (",", pure mempty)
@@ -671,6 +672,42 @@ cedilla 's' = 'ş'
 cedilla 'S' = 'Ş'
 cedilla c = c
 
+hacek :: Char -> Char
+hacek 'A' = 'Ǎ'
+hacek 'a' = 'ǎ'
+hacek 'C' = 'Č'
+hacek 'c' = 'č'
+hacek 'D' = 'Ď'
+hacek 'd' = 'ď'
+hacek 'E' = 'Ě'
+hacek 'e' = 'ě'
+hacek 'G' = 'Ǧ'
+hacek 'g' = 'ǧ'
+hacek 'H' = 'Ȟ'
+hacek 'h' = 'ȟ'
+hacek 'I' = 'Ǐ'
+hacek 'i' = 'ǐ'
+hacek 'j' = 'ǰ'
+hacek 'K' = 'Ǩ'
+hacek 'k' = 'ǩ'
+hacek 'L' = 'Ľ'
+hacek 'l' = 'ľ'
+hacek 'N' = 'Ň'
+hacek 'n' = 'ň'
+hacek 'O' = 'Ǒ'
+hacek 'o' = 'ǒ'
+hacek 'R' = 'Ř'
+hacek 'r' = 'ř'
+hacek 'S' = 'Š'
+hacek 's' = 'š'
+hacek 'T' = 'Ť'
+hacek 't' = 'ť'
+hacek 'U' = 'Ǔ'
+hacek 'u' = 'ǔ'
+hacek 'Z' = 'Ž'
+hacek 'z' = 'ž'
+hacek c   = c
+
 tok :: LP Inlines
 tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar)
 
-- 
cgit v1.2.3


From d5fad2306a27b3fcf2c85782dd13bc8e516a5df9 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 25 Jul 2013 20:29:42 -0700
Subject: LaTeX writer:  Change `\` to `/` in paths.

`/` works even on Windows in LaTeX.  `\` will cause major problems
if unescaped.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 2b4a608a7..06a04ade2 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -202,7 +202,8 @@ stringToLaTeX  ctx (x:xs) = do
                    _       -> '-' : rest
        '~' | not isUrl -> "\\textasciitilde{}" ++ rest
        '^' -> "\\^{}" ++ rest
-       '\\' -> "\\textbackslash{}" ++ rest
+       '\\'| isUrl     -> '/' : rest  -- NB. / works as path sep even on Windows
+           | otherwise -> "\\textbackslash{}" ++ rest
        '|' -> "\\textbar{}" ++ rest
        '<' -> "\\textless{}" ++ rest
        '>' -> "\\textgreater{}" ++ rest
@@ -648,7 +649,8 @@ inlineToLaTeX (Image _ (source, _)) = do
   let source' = if isAbsoluteURI source
                    then source
                    else unEscapeString source
-  return $ "\\includegraphics" <> braces (text source')
+  source'' <- stringToLaTeX URLString source'
+  return $ "\\includegraphics" <> braces (text source'')
 inlineToLaTeX (Note contents) = do
   modify (\s -> s{stInNote = True})
   contents' <- blockListToLaTeX contents
-- 
cgit v1.2.3


From a97f39c12e7b47a272575b69ad4cdd38966c043e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 26 Jul 2013 12:40:56 -0700
Subject: Beamer:  add allowframebreaks to slide if set in header classes.

It's recommended that your bibliography slide have this
attribute:

    # References {.allowframebreaks}

This causes multiple slides to be created if necessary, depending
on the length of the bibliography.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 06a04ade2..aa5bfa623 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -232,7 +232,7 @@ toSlides bs = do
 
 elementToBeamer :: Int -> Element -> State WriterState [Block]
 elementToBeamer _slideLevel (Blk b) = return [b]
-elementToBeamer slideLevel  (Sec lvl _num (ident,classes,_) tit elts)
+elementToBeamer slideLevel  (Sec lvl _num (ident,classes,kvs) tit elts)
   | lvl >  slideLevel = do
       bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts
       return $ Para ( RawInline "latex" "\\begin{block}{"
@@ -240,7 +240,7 @@ elementToBeamer slideLevel  (Sec lvl _num (ident,classes,_) tit elts)
              : bs ++ [RawBlock "latex" "\\end{block}"]
   | lvl <  slideLevel = do
       bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts
-      return $ (Header lvl (ident,classes,[]) tit) : bs
+      return $ (Header lvl (ident,classes,kvs) tit) : bs
   | otherwise = do -- lvl == slideLevel
       -- note: [fragile] is required or verbatim breaks
       let hasCodeBlock (CodeBlock _ _) = [True]
@@ -248,17 +248,20 @@ elementToBeamer slideLevel  (Sec lvl _num (ident,classes,_) tit elts)
       let hasCode (Code _ _) = [True]
           hasCode _          = []
       opts <- gets stOptions
-      let fragile = if not $ null $ queryWith hasCodeBlock elts ++
+      let fragile = not $ null $ queryWith hasCodeBlock elts ++
                                      if writerListings opts
                                         then queryWith hasCode elts
                                         else []
-                       then "[fragile]"
-                       else ""
-      let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ fragile) :
+      let allowframebreaks = "allowframebreaks" `elem` classes
+      let optionslist = ["fragile" | fragile] ++
+                        ["allowframebreaks" | allowframebreaks]
+      let options = if null optionslist
+                       then ""
+                       else "[" ++ intercalate "," optionslist ++ "]"
+      let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ options) :
                 if tit == [Str "\0"]  -- marker for hrule
                    then []
-                   else (RawInline "latex" "\\frametitle{") : tit ++
-                        [RawInline "latex" "}"]
+                   else (RawInline "latex" "{") : tit ++ [RawInline "latex" "}"]
       let slideEnd = RawBlock "latex" "\\end{frame}"
       -- now carve up slide into blocks if there are sections inside
       bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts
-- 
cgit v1.2.3


From 3c06e2692a8fd7307658498b44401868e1059d61 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Mon, 29 Jul 2013 08:38:46 -0700
Subject: Markdown atx headers:  Allow `.` or `)` after `#` if no
 `fancy_lists`.

---
 src/Text/Pandoc/Readers/Markdown.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 28f69eae4..076706b4e 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -494,7 +494,8 @@ addToHeaderList (ident,classes,kvs) text = do
 atxHeader :: MarkdownParser (F Blocks)
 atxHeader = try $ do
   level <- many1 (char '#') >>= return . length
-  notFollowedBy (char '.' <|> char ')') -- this would be a list
+  notFollowedBy $ guardEnabled Ext_fancy_lists >>
+                  (char '.' <|> char ')') -- this would be a list
   skipSpaces
   text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline)
   attr <- atxClosing
-- 
cgit v1.2.3


From 7024664ddae00fc459953bb5d4bbc91d5877be1b Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Tue, 30 Jul 2013 08:38:13 -0700
Subject: Fixed compilation with http-conduit flag False.

---
 src/Text/Pandoc/Shared.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 0f2e16d2e..09874299d 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -104,7 +104,6 @@ import System.IO (stderr)
 import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
          renderOptions)
 import qualified Data.ByteString as BS
-import Data.ByteString.Lazy (toChunks)
 import qualified Data.ByteString.Char8 as B8
 
 #ifdef EMBED_DATA_FILES
@@ -114,6 +113,7 @@ import System.FilePath ( joinPath, splitDirectories )
 import Paths_pandoc (getDataFileName)
 #endif
 #ifdef HTTP_CONDUIT
+import Data.ByteString.Lazy (toChunks)
 import Network.HTTP.Conduit (httpLbs, parseUrl, withManager,
                              responseBody, responseHeaders)
 import Network.HTTP.Types.Header ( hContentType)
@@ -616,7 +616,7 @@ openURL u
              UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
 #else
   | otherwise = E.try $ getBodyAndMimeType `fmap` browse
-              (do UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
+              (do S.liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
                   setOutHandler $ const (return ())
                   setAllowRedirects True
                   request (getRequest' u'))
-- 
cgit v1.2.3


From dceffeb04370e8661dd0534a6e97fd15caaeddcf Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 2 Aug 2013 14:20:44 -0700
Subject: Biblio:  Override citeproc-hs's endWithPunct.

The new version correctly sees a sentence ending in '.)' as ending
with punctuation.  This fixes a bug which led such sentences to receive
an extra period at the end: '.).'.  Thanks to Steve Petersen for
reporting.
---
 src/Text/Pandoc/Biblio.hs | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs
index ae371a46d..31c55472e 100644
--- a/src/Text/Pandoc/Biblio.hs
+++ b/src/Text/Pandoc/Biblio.hs
@@ -32,7 +32,7 @@ module Text.Pandoc.Biblio ( processBiblio ) where
 import Data.List
 import Data.Char ( isDigit, isPunctuation )
 import qualified Data.Map as M
-import Text.CSL hiding ( Cite(..), Citation(..) )
+import Text.CSL hiding ( Cite(..), Citation(..), endWithPunct )
 import qualified Text.CSL as CSL ( Cite(..) )
 import Text.Pandoc.Definition
 import Text.Pandoc.Generic
@@ -88,6 +88,19 @@ sanitize :: [Inline] -> [Inline]
 sanitize xs | endWithPunct xs = toCapital xs
             | otherwise       = toCapital (xs ++ [Str "."])
 
+
+-- A replacement for citeproc-hs's endWithPunct, which wrongly treats
+-- a sentence ending in '.)' as not ending with punctuation, leading
+-- to an extra period.
+endWithPunct :: [Inline] -> Bool
+endWithPunct [] = True
+endWithPunct xs@(_:_) = case reverse (stringify [last xs]) of
+                              []                       -> True
+                              (')':c:_) | isEndPunct c -> True
+                              (c:_) | isEndPunct c     -> True
+                                    | otherwise        -> False
+  where isEndPunct c = c `elem` ".,;:!?"
+
 deNote :: Pandoc -> Pandoc
 deNote = topDown go
   where go (Note [Para xs]) = Note $ bottomUp go' [Para $ sanitize xs]
-- 
cgit v1.2.3


From a32417378e8023b5dd8af4d8a9ea66eddb99a0eb Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 2 Aug 2013 15:37:09 -0700
Subject: Biblio:  Don't interfere with Notes that aren't citation notes.

Closes #898: notes not generated from citations were being
adjusted (first letter capitalized, for example, against author's
intentions).
---
 src/Text/Pandoc/Biblio.hs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs
index 31c55472e..d0db35ae7 100644
--- a/src/Text/Pandoc/Biblio.hs
+++ b/src/Text/Pandoc/Biblio.hs
@@ -53,7 +53,7 @@ processBiblio (Just style) r p =
                       map (map toCslCite) grps)
       cits_map   = M.fromList $ zip grps (citations result)
       biblioList = map (renderPandoc' style) (bibliography result)
-      Pandoc m b = bottomUp mvPunct . deNote . bottomUp (processCite style cits_map) $ p'
+      Pandoc m b = bottomUp mvPunct . deNote . topDown (processCite style cits_map) $ p'
   in  Pandoc m $ b ++ biblioList
 
 -- | Substitute 'Cite' elements with formatted citations.
@@ -103,7 +103,8 @@ endWithPunct xs@(_:_) = case reverse (stringify [last xs]) of
 
 deNote :: Pandoc -> Pandoc
 deNote = topDown go
-  where go (Note [Para xs]) = Note $ bottomUp go' [Para $ sanitize xs]
+  where go (Cite cs [Note [Para xs]]) =
+            Cite cs [Note $ bottomUp go' [Para $ sanitize xs]]
         go (Note xs) = Note $ bottomUp go' xs
         go x = x
         go' (Note [Para xs]:ys) =
-- 
cgit v1.2.3


From 1567d291a3aed0e55ddaaa65492ab19741e515b5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 3 Aug 2013 16:39:43 -0700
Subject: Text.Pandoc.JSON: Use To/FromJSON instances from pandoc-types.

* These use GHC generics rather than syb, and are faster.
* toJsonFilter is now a deprecated synonym of toJSONFilter from
  Text.Pandoc.JSON.
* The deprecated jsonFilter function has been removed.
---
 src/Text/Pandoc.hs | 74 +++++++++---------------------------------------------
 1 file changed, 12 insertions(+), 62 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index b5b698e09..db0f0e5fe 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -106,12 +106,13 @@ module Text.Pandoc
                -- * Miscellaneous
                , getReader
                , getWriter
-               , jsonFilter
                , ToJsonFilter(..)
+               , ToJSONFilter(..)
              ) where
 
 import Text.Pandoc.Definition
 import Text.Pandoc.Generic
+import Text.Pandoc.JSON
 import Text.Pandoc.Readers.Markdown
 import Text.Pandoc.Readers.MediaWiki
 import Text.Pandoc.Readers.RST
@@ -146,13 +147,11 @@ import Text.Pandoc.Writers.Custom
 import Text.Pandoc.Templates
 import Text.Pandoc.Options
 import Text.Pandoc.Shared (safeRead, warn)
-import Data.ByteString.Lazy (ByteString)
+import Data.Aeson
 import qualified Data.ByteString.Lazy as BL
 import Data.List (intercalate, isSuffixOf)
 import Data.Version (showVersion)
-import Data.Aeson.Generic
 import Data.Set (Set)
-import Data.Data
 import qualified Data.Set as Set
 import Text.Parsec
 import Text.Parsec.Error
@@ -211,7 +210,7 @@ readers = [ ("native"       , \_ s -> return $ readNative s)
 
 data Writer = PureStringWriter   (WriterOptions -> Pandoc -> String)
             | IOStringWriter     (WriterOptions -> Pandoc -> IO String)
-            | IOByteStringWriter (WriterOptions -> Pandoc -> IO ByteString)
+            | IOByteStringWriter (WriterOptions -> Pandoc -> IO BL.ByteString)
 
 -- | Association list of formats and writers.
 writers :: [ ( String, Writer ) ]
@@ -304,66 +303,17 @@ getWriter s =
                            \o -> r o{ writerExtensions = setExts $
                                             getDefaultExtensions writerName }
 
-{-# DEPRECATED jsonFilter "Use toJsonFilter instead" #-}
--- | Converts a transformation on the Pandoc AST into a function
--- that reads and writes a JSON-encoded string.  This is useful
--- for writing small scripts.
-jsonFilter :: (Pandoc -> Pandoc) -> String -> String
-jsonFilter f = UTF8.toStringLazy . encode . f . checkJSON . decode . UTF8.fromStringLazy
-
--- | 'toJsonFilter' convert a function into a filter that reads pandoc's json output
--- from stdin, transforms it by walking the AST and applying the specified
--- function, and writes the result as json to stdout.  Usage example:
---
--- > -- capitalize.hs
--- > -- compile with:  ghc --make capitalize
--- > -- run with:      pandoc -t json | ./capitalize | pandoc -f json
--- >
--- > import Text.Pandoc
--- > import Data.Char (toUpper)
--- >
--- > main :: IO ()
--- > main = toJsonFilter capitalizeStrings
--- >
--- > capitalizeStrings :: Inline -> Inline
--- > capitalizeStrings (Str s) = Str $ map toUpper s
--- > capitalizeStrings x       = x
---
--- The function can be any type @(a -> a)@, @(a -> IO a)@, @(a -> [a])@,
--- or @(a -> IO [a])@, where @a@ is an instance of 'Data'.
--- So, for example, @a@ can be 'Pandoc', 'Inline', 'Block', ['Inline'],
--- ['Block'], 'Meta', 'ListNumberStyle', 'Alignment', 'ListNumberDelim',
--- 'QuoteType', etc. See 'Text.Pandoc.Definition'.
-class ToJsonFilter a where
-  toJsonFilter :: a -> IO ()
-
-instance (Data a) => ToJsonFilter (a -> a) where
-  toJsonFilter f = BL.getContents >>=
-    BL.putStr . encode . (bottomUp f :: Pandoc -> Pandoc) . checkJSON . decode
-
-instance (Data a) => ToJsonFilter (a -> IO a) where
-  toJsonFilter f = BL.getContents >>=
-     (bottomUpM f :: Pandoc -> IO Pandoc) . checkJSON . decode >>=
-     BL.putStr . encode
-
-instance (Data a) => ToJsonFilter (a -> [a]) where
-  toJsonFilter f = BL.getContents >>=
-    BL.putStr . encode . (bottomUp (concatMap f) :: Pandoc -> Pandoc) .
-      checkJSON . decode
-
-instance (Data a) => ToJsonFilter (a -> IO [a]) where
-  toJsonFilter f = BL.getContents >>=
-    (bottomUpM (fmap concat . mapM f) :: Pandoc -> IO Pandoc)
-      . checkJSON . decode >>=
-    BL.putStr . encode
-
-checkJSON :: Maybe a -> a
-checkJSON Nothing  = error "Error parsing JSON"
-checkJSON (Just r) = r
+{-# DEPRECATED toJsonFilter "Use toJSONFilter instead" #-}
+class ToJSONFilter a => ToJsonFilter a
+  where toJsonFilter :: a -> IO ()
+        toJsonFilter = toJSONFilter
 
 readJSON :: ReaderOptions -> String -> Pandoc
-readJSON _ = checkJSON . decode . UTF8.fromStringLazy
+readJSON _ = checkJSON . eitherDecode' . UTF8.fromStringLazy
 
 writeJSON :: WriterOptions -> Pandoc -> String
 writeJSON _ = UTF8.toStringLazy . encode
 
+checkJSON :: Either String a -> a
+checkJSON (Right x) = x
+checkJSON (Left e)  = error e
-- 
cgit v1.2.3


From 97b2be599e11bbe7aed73a30d8c7900f4276a3df Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 3 Aug 2013 17:02:35 -0700
Subject: Text.Pandoc: Don't reexport ToJSONFilter.

It's better just to import this from Text.Pandoc.JSON.
That way, compiled filters will be smaller in size.
---
 src/Text/Pandoc.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index db0f0e5fe..27aa02a75 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -107,7 +107,6 @@ module Text.Pandoc
                , getReader
                , getWriter
                , ToJsonFilter(..)
-               , ToJSONFilter(..)
              ) where
 
 import Text.Pandoc.Definition
@@ -303,7 +302,8 @@ getWriter s =
                            \o -> r o{ writerExtensions = setExts $
                                             getDefaultExtensions writerName }
 
-{-# DEPRECATED toJsonFilter "Use toJSONFilter instead" #-}
+{-# DEPRECATED toJsonFilter "Use 'toJSONFilter' from 'Text.Pandoc.JSON' instead" #-}
+-- | Deprecated.  Use @toJSONFilter@ from @Text.Pandoc.JSON@ instead.
 class ToJSONFilter a => ToJsonFilter a
   where toJsonFilter :: a -> IO ()
         toJsonFilter = toJSONFilter
-- 
cgit v1.2.3


From 4a84b78100f2cfa0f7f7d13a24693a37af60003d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 3 Aug 2013 23:05:14 -0700
Subject: MediaWiki writer:  Use native mediawiki tables instead of HTML.

Closes #720.
---
 src/Text/Pandoc/Writers/MediaWiki.hs | 83 +++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 44 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index b3b319c2a..e1bfd18b2 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -36,7 +36,7 @@ import Text.Pandoc.Shared
 import Text.Pandoc.Writers.Shared
 import Text.Pandoc.Templates (renderTemplate')
 import Text.Pandoc.XML ( escapeStringForXML )
-import Data.List ( intersect, intercalate )
+import Data.List ( intersect, intercalate, intersperse )
 import Network.URI ( isURI )
 import Control.Monad.State
 
@@ -135,25 +135,17 @@ blockToMediaWiki opts (BlockQuote blocks) = do
   return $ "<blockquote>" ++ contents ++ "</blockquote>"
 
 blockToMediaWiki opts (Table capt aligns widths headers rows') = do
-  let alignStrings = map alignmentToString aligns
-  captionDoc <- if null capt
-                   then return ""
-                   else do
-                      c <- inlineListToMediaWiki opts capt
-                      return $ "<caption>" ++ c ++ "</caption>\n"
-  let percent w = show (truncate (100*w) :: Integer) ++ "%"
-  let coltags = if all (== 0.0) widths
-                   then ""
-                   else unlines $ map
-                         (\w -> "<col width=\"" ++ percent w ++ "\" />") widths
-  head' <- if all null headers
-              then return ""
-              else do
-                 hs <- tableRowToMediaWiki opts alignStrings 0 headers
-                 return $ "<thead>\n" ++ hs ++ "\n</thead>\n"
-  body' <- zipWithM (tableRowToMediaWiki opts alignStrings) [1..] rows'
-  return $ "<table>\n" ++ captionDoc ++ coltags ++ head' ++
-            "<tbody>\n" ++ unlines body' ++ "</tbody>\n</table>\n"
+  caption <- if null capt
+                then return ""
+                else do
+                   c <- inlineListToMediaWiki opts capt
+                   return $ "|+ " ++ trimr c ++ "\n"
+  let headless = all null headers
+  let allrows = if headless then rows' else headers:rows'
+  tableBody <- (concat . intersperse "|-\n") `fmap`
+                mapM (tableRowToMediaWiki opts headless aligns widths)
+                     (zip [1..] allrows)
+  return $ "{|\n" ++ caption ++ tableBody ++ "|}\n"
 
 blockToMediaWiki opts x@(BulletList items) = do
   oldUseTags <- get >>= return . stUseTags
@@ -285,20 +277,34 @@ vcat = intercalate "\n"
 -- Auxiliary functions for tables:
 
 tableRowToMediaWiki :: WriterOptions
-                    -> [String]
-                    -> Int
-                    -> [[Block]]
+                    -> Bool
+                    -> [Alignment]
+                    -> [Double]
+                    -> (Int, [[Block]])
                     -> State WriterState String
-tableRowToMediaWiki opts alignStrings rownum cols' = do
-  let celltype = if rownum == 0 then "th" else "td"
-  let rowclass = case rownum of
-                      0                  -> "header"
-                      x | x `rem` 2 == 1 -> "odd"
-                      _                  -> "even"
-  cols'' <- sequence $ zipWith
-            (\alignment item -> tableItemToMediaWiki opts celltype alignment item)
-            alignStrings cols'
-  return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>"
+tableRowToMediaWiki opts headless alignments widths (rownum, cells) = do
+  cells' <- mapM (\cellData ->
+          tableCellToMediaWiki opts headless rownum cellData)
+          $ zip3 alignments widths cells
+  return $ unlines cells'
+
+tableCellToMediaWiki :: WriterOptions
+                     -> Bool
+                     -> Int
+                     -> (Alignment, Double, [Block])
+                     -> State WriterState String
+tableCellToMediaWiki opts headless rownum (alignment, width, bs) = do
+  contents <- blockListToMediaWiki opts bs
+  let marker = if rownum == 1 && not headless then "!" else "|"
+  let percent w = show (truncate (100*w) :: Integer) ++ "%"
+  let attrs = ["align=" ++ show (alignmentToString alignment) |
+                 alignment /= AlignDefault && alignment /= AlignLeft] ++
+              ["width=\"" ++ percent width ++ "\"" |
+                 width /= 0.0 && rownum == 1]
+  let attr = if null attrs
+                then ""
+                else unwords attrs ++ "|"
+  return $ marker ++ attr ++ trimr contents
 
 alignmentToString :: Alignment -> [Char]
 alignmentToString alignment = case alignment of
@@ -307,17 +313,6 @@ alignmentToString alignment = case alignment of
                                  AlignCenter  -> "center"
                                  AlignDefault -> "left"
 
-tableItemToMediaWiki :: WriterOptions
-                     -> String
-                     -> String
-                     -> [Block]
-                     -> State WriterState String
-tableItemToMediaWiki opts celltype align' item = do
-  let mkcell x = "<" ++ celltype ++ " align=\"" ++ align' ++ "\">" ++
-                    x ++ "</" ++ celltype ++ ">"
-  contents <- blockListToMediaWiki opts item
-  return $ mkcell contents
-
 -- | Convert list of Pandoc block elements to MediaWiki.
 blockListToMediaWiki :: WriterOptions -- ^ Options
                     -> [Block]       -- ^ List of block elements
-- 
cgit v1.2.3


From 5050cff37cbe2dffd7f7f09db11da40d7c1e48d0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 3 Aug 2013 23:16:54 -0700
Subject: Removed comment that chokes recent cpp.

Closes #933.
---
 src/Text/Pandoc/Parsing.hs | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 0913d8c6c..4ade6def8 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -421,7 +421,6 @@ uri :: Parser [Char] st (String, String)
 uri = try $ do
   scheme <- uriScheme
   char ':'
-  -- /^[\/\w\u0080-\uffff]+|%[A-Fa-f0-9]+|&#?\w+;|(?:[,]+|[\S])[%&~\w\u0080-\uffff]/
   -- We allow punctuation except at the end, since
   -- we don't want the trailing '.' in 'http://google.com.' We want to allow
   -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation)
-- 
cgit v1.2.3


From 2d6e0b1530e61fa2d6a22d8b61042734b20f0af5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 4 Aug 2013 14:12:13 -0700
Subject: Remove CPP from default-extensions; add pragmas to modules as needed.

---
 src/Text/Pandoc/Pretty.hs       | 2 +-
 src/Text/Pandoc/UTF8.hs         | 1 +
 src/Text/Pandoc/Writers/EPUB.hs | 6 +-----
 3 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
index 21121a506..faf2a6797 100644
--- a/src/Text/Pandoc/Pretty.hs
+++ b/src/Text/Pandoc/Pretty.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving, CPP #-}
 {-
 Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
 
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index 9fa743cd9..229442543 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE CPP #-}
 {-
 Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
 
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index e625931fc..fb756f196 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE PatternGuards, CPP #-}
 {-
 Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
 
@@ -62,11 +62,7 @@ import Text.Pandoc.MIME (getMimeType)
 import Prelude hiding (catch)
 #endif
 import Control.Exception (catch, SomeException)
-#if MIN_VERSION_blaze_html(0,5,0)
 import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
-#else
-import Text.Blaze.Renderer.Utf8 (renderHtml)
-#endif
 
 -- A Chapter includes a list of blocks and maybe a section
 -- number offset.  Note, some chapters are unnumbered. The section
-- 
cgit v1.2.3


From 52c5cdb04e6c574f897c948e45084bf9343bf57c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 6 Aug 2013 16:19:34 -0700
Subject: Biblio:  Capitalize citation note only if it has a prefix.

So, author names or titles that aren't capitalized will stay
uncapitalized.
---
 src/Text/Pandoc/Biblio.hs | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs
index d0db35ae7..755c779ea 100644
--- a/src/Text/Pandoc/Biblio.hs
+++ b/src/Text/Pandoc/Biblio.hs
@@ -84,11 +84,6 @@ mvPunct (Space : x : ys) | isNote x, startWithPunct ys =
 mvPunct (Space : x : ys) | isNote x = x : ys
 mvPunct xs = xs
 
-sanitize :: [Inline] -> [Inline]
-sanitize xs | endWithPunct xs = toCapital xs
-            | otherwise       = toCapital (xs ++ [Str "."])
-
-
 -- A replacement for citeproc-hs's endWithPunct, which wrongly treats
 -- a sentence ending in '.)' as not ending with punctuation, leading
 -- to an extra period.
@@ -103,8 +98,8 @@ endWithPunct xs@(_:_) = case reverse (stringify [last xs]) of
 
 deNote :: Pandoc -> Pandoc
 deNote = topDown go
-  where go (Cite cs [Note [Para xs]]) =
-            Cite cs [Note $ bottomUp go' [Para $ sanitize xs]]
+  where go (Cite (c:cs) [Note xs]) =
+            Cite (c:cs) [Note $ bottomUp go' $ sanitize c xs]
         go (Note xs) = Note $ bottomUp go' xs
         go x = x
         go' (Note [Para xs]:ys) =
@@ -112,6 +107,14 @@ deNote = topDown go
                 then initInline xs ++ ys
                 else xs ++ ys
         go' xs = xs
+        sanitize :: Citation -> [Block] -> [Block]
+        sanitize Citation{citationPrefix = pref} [Para xs] =
+           case (null pref, endWithPunct xs) of
+                (True, False)  -> [Para $ xs ++ [Str "."]]
+                (True, True)   -> [Para xs]
+                (False, False) -> [Para $ toCapital $ xs ++ [Str "."]]
+                (False, True)  -> [Para $ toCapital xs]
+        sanitize _ bs = bs
 
 isTextualCitation :: [Citation] -> Bool
 isTextualCitation (c:_) = citationMode c == AuthorInText
-- 
cgit v1.2.3


From 7d18770b008c12e13c324223304c6703e06f3a4a Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Tue, 6 Aug 2013 23:31:01 -0700
Subject: Added support for MetaBool.

---
 src/Text/Pandoc/Readers/Markdown.hs | 2 +-
 src/Text/Pandoc/Writers/Custom.hs   | 2 ++
 src/Text/Pandoc/Writers/Shared.hs   | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 076706b4e..a880c09de 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -278,7 +278,7 @@ toMetaValue opts x =
 yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue
 yamlToMeta opts (Yaml.String t) = toMetaValue opts t
 yamlToMeta _    (Yaml.Number n) = MetaString $ show n
-yamlToMeta _    (Yaml.Bool b) = MetaString $ map toLower $ show b
+yamlToMeta _    (Yaml.Bool b) = MetaBool b
 yamlToMeta opts (Yaml.Array xs) = B.toMetaValue $ map (yamlToMeta opts)
                                                 $ V.toList xs
 yamlToMeta opts (Yaml.Object o) = MetaMap $ H.foldrWithKey (\k v m ->
diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
index 732497616..5c82fe0e1 100644
--- a/src/Text/Pandoc/Writers/Custom.hs
+++ b/src/Text/Pandoc/Writers/Custom.hs
@@ -110,12 +110,14 @@ instance StackValue [Block] where
 instance StackValue MetaValue where
   push l (MetaMap m) = Lua.push l m
   push l (MetaList xs) = Lua.push l xs
+  push l (MetaBool x) = Lua.push l x
   push l (MetaString s) = Lua.push l s
   push l (MetaInlines ils) = Lua.push l ils
   push l (MetaBlocks bs) = Lua.push l bs
   peek _ _ = undefined
   valuetype (MetaMap _) = Lua.TTABLE
   valuetype (MetaList _) = Lua.TTABLE
+  valuetype (MetaBool _) = Lua.TBOOLEAN
   valuetype (MetaString _) = Lua.TSTRING
   valuetype (MetaInlines _) = Lua.TSTRING
   valuetype (MetaBlocks _) = Lua.TSTRING
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index c6c30d070..e6ec853f8 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -74,6 +74,7 @@ metaValueToJSON blockWriter inlineWriter (MetaMap metamap) = liftM toJSON $
   Traversable.mapM (metaValueToJSON blockWriter inlineWriter) metamap
 metaValueToJSON blockWriter inlineWriter (MetaList xs) = liftM toJSON $
   Traversable.mapM (metaValueToJSON blockWriter inlineWriter) xs
+metaValueToJSON _ _ (MetaBool b) = return $ toJSON b
 metaValueToJSON _ _ (MetaString s) = return $ toJSON s
 metaValueToJSON blockWriter _ (MetaBlocks bs) = liftM toJSON $ blockWriter bs
 metaValueToJSON _ inlineWriter (MetaInlines bs) = liftM toJSON $ inlineWriter bs
-- 
cgit v1.2.3


From d44d1664312f0d05ada61eb49a678ef8a04d90d0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Wed, 7 Aug 2013 08:43:42 -0700
Subject: Allow YAML title blocks to contain only comments.

---
 src/Text/Pandoc/Readers/Markdown.hs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index a880c09de..251554de1 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -231,7 +231,9 @@ yamlTitleBlock = try $ do
   pos <- getPosition
   string "---"
   blankline
-  rawYaml <- unlines <$> manyTill anyLine stopLine
+  rawYamlLines <- manyTill anyLine stopLine
+  -- by including --- and ..., we allow yaml blocks with just comments:
+  let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
   optional blanklines
   opts <- stateOptions <$> getState
   case Yaml.decodeEither' $ UTF8.fromString rawYaml of
@@ -241,6 +243,7 @@ yamlTitleBlock = try $ do
                         then f
                         else B.setMeta (T.unpack k) (yamlToMeta opts v) . f)
                   id hashmap
+       Right Yaml.Null -> return $ return id
        Right _ -> do
                    addWarning (Just pos) "YAML header is not an object"
                    return $ return id
-- 
cgit v1.2.3


From bb61624bb2bba416e1992ecdf101f9660a3edcae Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 7 Aug 2013 14:30:47 -0700
Subject: Textile reader: Removed raw LaTeX parsing.

This isn't part of Textile.
---
 src/Text/Pandoc/Readers/Textile.hs | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 9191f6908..d4f092d07 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -56,7 +56,6 @@ import Text.Pandoc.Shared
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
-import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
 import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
 import Text.HTML.TagSoup.Match
 import Data.List ( intercalate )
@@ -126,7 +125,6 @@ blockParsers = [ codeBlock
                , commentBlock
                , anyList
                , rawHtmlBlock
-               , rawLaTeXBlock'
                , maybeExplicitBlock "table" table
                , maybeExplicitBlock "p" para
                ]
@@ -292,13 +290,6 @@ rawHtmlBlock = try $ do
   optional blanklines
   return $ RawBlock "html" b
 
--- | Raw block of LaTeX content
-rawLaTeXBlock' :: Parser [Char] ParserState Block
-rawLaTeXBlock' = do
-  guardEnabled Ext_raw_tex
-  RawBlock "latex" <$> (rawLaTeXBlock <* spaces)
-
-
 -- | In textile, paragraphs are separated by blank lines.
 para :: Parser [Char] ParserState Block
 para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak
@@ -373,7 +364,6 @@ inlineParsers = [ str
                 , escapedInline
                 , htmlSpan
                 , rawHtmlInline
-                , rawLaTeXInline'
                 , note
                 , try $ (char '[' *> inlineMarkup <* char ']')
                 , inlineMarkup
@@ -489,12 +479,6 @@ endline = try $ do
 rawHtmlInline :: Parser [Char] ParserState Inline
 rawHtmlInline = RawInline "html" . snd <$> htmlTag isInlineTag
 
--- | Raw LaTeX Inline
-rawLaTeXInline' :: Parser [Char] ParserState Inline
-rawLaTeXInline' = try $ do
-  guardEnabled Ext_raw_tex
-  rawLaTeXInline
-
 -- | Textile standard link syntax is "label":target. But we
 -- can also have ["label":target].
 link :: Parser [Char] ParserState Inline
-- 
cgit v1.2.3


From 802dc9a8b9f206eb3be592ab19067f637eb2a3ee Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 8 Aug 2013 10:41:39 -0700
Subject: Added Text.Pandoc.Compat.Monoid.

This allows pandoc to compile with base < 4.5, where Data.Monoid
doesn't export `<>`.  Thanks to Dirk Ullirch for the patch.
---
 src/Text/Pandoc/Compat/Monoid.hs | 16 ++++++++++++++++
 src/Text/Pandoc/Templates.hs     |  2 +-
 src/Text/Pandoc/Writers/Docx.hs  |  2 +-
 3 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 src/Text/Pandoc/Compat/Monoid.hs

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Compat/Monoid.hs b/src/Text/Pandoc/Compat/Monoid.hs
new file mode 100644
index 000000000..80ffcbbd6
--- /dev/null
+++ b/src/Text/Pandoc/Compat/Monoid.hs
@@ -0,0 +1,16 @@
+{-# LANGUAGE CPP #-}
+module Text.Pandoc.Compat.Monoid ( Monoid(..)
+                                 , (<>)
+                          ) where
+
+#if MIN_VERSION_base(4,5,0)
+import Data.Monoid ((<>), Monoid(..))
+#else
+import Data.Monoid (mappend, Monoid(..))
+#endif
+
+#if MIN_VERSION_base(4,5,0)
+#else
+(<>) :: Monoid m => m -> m -> m
+(<>) = mappend
+#endif
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index c95c84ca8..22a44e735 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -102,7 +102,7 @@ import Control.Applicative
 import qualified Data.Text as T
 import Data.Text (Text)
 import Data.Text.Encoding (encodeUtf8)
-import Data.Monoid ((<>), Monoid(..))
+import Text.Pandoc.Compat.Monoid ((<>), Monoid(..))
 import Data.List (intersperse, nub)
 import System.FilePath ((</>), (<.>))
 import qualified Data.Map as M
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 611cddc65..6bb4d5569 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -35,7 +35,7 @@ import qualified Data.ByteString.Lazy as BL
 import qualified Data.ByteString.Lazy.Char8 as BL8
 import qualified Data.Map as M
 import qualified Text.Pandoc.UTF8 as UTF8
-import Data.Monoid ((<>))
+import Text.Pandoc.Compat.Monoid ((<>))
 import Codec.Archive.Zip
 import Data.Time.Clock.POSIX
 import Text.Pandoc.Definition
-- 
cgit v1.2.3


From 12e7ec40707bfb716bb9add82e4320558e065492 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 8 Aug 2013 10:42:52 -0700
Subject: Added Text.Pandoc.Compat.TagSoupEntity.

This allows pandoc to compile with tagsoup 0.13.x.
Thanks to Dirk Ullrich for the patch.
---
 src/Text/Pandoc/Compat/TagSoupEntity.hs | 15 +++++++++++++++
 src/Text/Pandoc/Parsing.hs              |  2 +-
 src/Text/Pandoc/Readers/DocBook.hs      |  2 +-
 src/Text/Pandoc/Readers/OPML.hs         |  2 +-
 src/Text/Pandoc/XML.hs                  |  2 +-
 5 files changed, 19 insertions(+), 4 deletions(-)
 create mode 100644 src/Text/Pandoc/Compat/TagSoupEntity.hs

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Compat/TagSoupEntity.hs b/src/Text/Pandoc/Compat/TagSoupEntity.hs
new file mode 100644
index 000000000..80985aef9
--- /dev/null
+++ b/src/Text/Pandoc/Compat/TagSoupEntity.hs
@@ -0,0 +1,15 @@
+{-# LANGUAGE CPP #-}
+module Text.Pandoc.Compat.TagSoupEntity (lookupEntity
+                          ) where
+
+import qualified Text.HTML.TagSoup.Entity as TE
+
+lookupEntity :: String -> Maybe Char
+#if MIN_VERSION_tagsoup(0,13,0)
+lookupEntity = str2chr . TE.lookupEntity
+  where str2chr :: Maybe String -> Maybe Char
+        str2chr (Just [c]) = Just c
+        str2chr _ = Nothing
+#else
+lookupEntity = TE.lookupEntity
+#endif
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 4ade6def8..2f42aba41 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -161,7 +161,7 @@ import Data.List ( intercalate, transpose )
 import Text.Pandoc.Shared
 import qualified Data.Map as M
 import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions)
-import Text.HTML.TagSoup.Entity ( lookupEntity )
+import Text.Pandoc.Compat.TagSoupEntity ( lookupEntity )
 import Data.Default
 import qualified Data.Set as Set
 import Control.Monad.Reader
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 0058e889c..6a799e270 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -4,7 +4,7 @@ import Text.Pandoc.Options
 import Text.Pandoc.Definition
 import Text.Pandoc.Builder
 import Text.XML.Light
-import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Compat.TagSoupEntity (lookupEntity)
 import Data.Generics
 import Data.Monoid
 import Data.Char (isSpace)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index c9726d195..35d01e877 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -6,7 +6,7 @@ import Text.Pandoc.Builder
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
 import Text.XML.Light
-import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Compat.TagSoupEntity (lookupEntity)
 import Data.Generics
 import Data.Monoid
 import Control.Monad.State
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index 89ae81a10..c11af9a19 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -38,7 +38,7 @@ module Text.Pandoc.XML ( escapeCharForXML,
 
 import Text.Pandoc.Pretty
 import Data.Char (ord, isAscii, isSpace)
-import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Compat.TagSoupEntity (lookupEntity)
 
 -- | Escape one character as needed for XML.
 escapeCharForXML :: Char -> String
-- 
cgit v1.2.3


From 9aa9d5cf68386acd127427cc62f6004b2a17057a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 8 Aug 2013 10:52:53 -0700
Subject: Revert "Textile reader: Removed raw LaTeX parsing."

This reverts commit bb61624bb2bba416e1992ecdf101f9660a3edcae.
Apparently someone put this there for a reason, since it's in
the test suite.
---
 src/Text/Pandoc/Readers/Textile.hs | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index d4f092d07..9191f6908 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -56,6 +56,7 @@ import Text.Pandoc.Shared
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
+import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
 import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
 import Text.HTML.TagSoup.Match
 import Data.List ( intercalate )
@@ -125,6 +126,7 @@ blockParsers = [ codeBlock
                , commentBlock
                , anyList
                , rawHtmlBlock
+               , rawLaTeXBlock'
                , maybeExplicitBlock "table" table
                , maybeExplicitBlock "p" para
                ]
@@ -290,6 +292,13 @@ rawHtmlBlock = try $ do
   optional blanklines
   return $ RawBlock "html" b
 
+-- | Raw block of LaTeX content
+rawLaTeXBlock' :: Parser [Char] ParserState Block
+rawLaTeXBlock' = do
+  guardEnabled Ext_raw_tex
+  RawBlock "latex" <$> (rawLaTeXBlock <* spaces)
+
+
 -- | In textile, paragraphs are separated by blank lines.
 para :: Parser [Char] ParserState Block
 para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak
@@ -364,6 +373,7 @@ inlineParsers = [ str
                 , escapedInline
                 , htmlSpan
                 , rawHtmlInline
+                , rawLaTeXInline'
                 , note
                 , try $ (char '[' *> inlineMarkup <* char ']')
                 , inlineMarkup
@@ -479,6 +489,12 @@ endline = try $ do
 rawHtmlInline :: Parser [Char] ParserState Inline
 rawHtmlInline = RawInline "html" . snd <$> htmlTag isInlineTag
 
+-- | Raw LaTeX Inline
+rawLaTeXInline' :: Parser [Char] ParserState Inline
+rawLaTeXInline' = try $ do
+  guardEnabled Ext_raw_tex
+  rawLaTeXInline
+
 -- | Textile standard link syntax is "label":target. But we
 -- can also have ["label":target].
 link :: Parser [Char] ParserState Inline
-- 
cgit v1.2.3


From 7d694e15697a4b1cc974b6316a08117afe663a74 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 8 Aug 2013 15:13:28 -0700
Subject: Added Text.Pandoc.Process (pipeProcess).

A souped up version of readProcessWithErrorCode that uses lazy bytestrings
and allows setting environment.
---
 src/Text/Pandoc/Process.hs | 105 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 105 insertions(+)
 create mode 100644 src/Text/Pandoc/Process.hs

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs
new file mode 100644
index 000000000..112c5b974
--- /dev/null
+++ b/src/Text/Pandoc/Process.hs
@@ -0,0 +1,105 @@
+{-
+Copyright (C) 2013 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Process
+   Copyright   : Copyright (C) 2013 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+ByteString variant of 'readProcessWithExitCode'.
+-}
+module Text.Pandoc.Process (pipeProcess)
+where
+import System.Process
+import System.Exit (ExitCode (..))
+import Control.Exception
+import System.IO (hClose, hFlush)
+import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO)
+import Control.Monad (unless)
+import qualified Data.ByteString.Lazy as BL
+
+{- |
+Version of 'System.Process.readProcessWithExitCode' that uses lazy bytestrings
+instead of strings and allows setting environment variables.
+
+@readProcessWithExitCode@ creates an external process, reads its
+standard output and standard error strictly, waits until the process
+terminates, and then returns the 'ExitCode' of the process,
+the standard output, and the standard error.
+
+If an asynchronous exception is thrown to the thread executing
+@readProcessWithExitCode@. The forked process will be terminated and
+@readProcessWithExitCode@ will wait (block) until the process has been
+terminated.
+-}
+
+pipeProcess
+    :: Maybe [(String, String)] -- ^ environment variables
+    -> FilePath                 -- ^ Filename of the executable (see 'proc' for details)
+    -> [String]                 -- ^ any arguments
+    -> BL.ByteString            -- ^ standard input
+    -> IO (ExitCode,BL.ByteString,BL.ByteString) -- ^ exitcode, stdout, stderr
+pipeProcess mbenv cmd args input =
+    mask $ \restore -> do
+      (Just inh, Just outh, Just errh, pid) <- createProcess (proc cmd args)
+                                                   { env     = mbenv,
+                                                     std_in  = CreatePipe,
+                                                     std_out = CreatePipe,
+                                                     std_err = CreatePipe }
+      flip onException
+        (do hClose inh; hClose outh; hClose errh;
+            terminateProcess pid; waitForProcess pid) $ restore $ do
+        -- fork off a thread to start consuming stdout
+        out <- BL.hGetContents outh
+        waitOut <- forkWait $ evaluate $ BL.length out
+
+        -- fork off a thread to start consuming stderr
+        err <- BL.hGetContents errh
+        waitErr <- forkWait $ evaluate $ BL.length err
+
+        -- now write and flush any input
+        let writeInput = do
+              unless (BL.null input) $ do
+                BL.hPutStr inh input
+                hFlush inh
+              hClose inh
+
+        writeInput
+
+        -- wait on the output
+        waitOut
+        waitErr
+
+        hClose outh
+        hClose errh
+
+        -- wait on the process
+        ex <- waitForProcess pid
+
+        return (ex, out, err)
+
+forkWait :: IO a -> IO (IO a)
+forkWait a = do
+  res <- newEmptyMVar
+  _ <- mask $ \restore -> forkIO $ try (restore a) >>= putMVar res
+  return (takeMVar res >>= either (\ex -> throwIO (ex :: SomeException)) return)
+
-- 
cgit v1.2.3


From 83f263110f364e87d8c0908b4a52be801aa77802 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 8 Aug 2013 15:15:20 -0700
Subject: Use pipeProcess in Text.Pandoc.PDF.

---
 src/Text/Pandoc/PDF.hs | 39 +++------------------------------------
 1 file changed, 3 insertions(+), 36 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index 49b455285..b030e2ca7 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -38,11 +38,7 @@ import qualified Data.ByteString as BS
 import System.Exit (ExitCode (..))
 import System.FilePath
 import System.Directory
-import System.Process
 import System.Environment
-import Control.Exception (evaluate)
-import System.IO (hClose)
-import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO)
 import Control.Monad (unless)
 import Data.List (isInfixOf)
 import qualified Data.ByteString.Base64 as B64
@@ -52,6 +48,8 @@ import Text.Pandoc.Generic (bottomUpM)
 import Text.Pandoc.Shared (fetchItem, warn)
 import Text.Pandoc.Options (WriterOptions(..))
 import Text.Pandoc.MIME (extensionFromMimeType)
+import Text.Pandoc.Process (pipeProcess)
+import qualified Data.ByteString.Lazy as BL
 
 withTempDir :: String -> (FilePath -> IO a) -> IO a
 withTempDir =
@@ -148,7 +146,7 @@ runTeXProgram program runsLeft tmpDir source = do
           $ lookup "TEXINPUTS" env'
     let env'' = ("TEXINPUTS", texinputs) :
                   [(k,v) | (k,v) <- env', k /= "TEXINPUTS"]
-    (exit, out, err) <- readCommand (Just env'') program programArgs
+    (exit, out, err) <- pipeProcess (Just env'') program programArgs BL.empty
     if runsLeft > 1
        then runTeXProgram program (runsLeft - 1) tmpDir source
        else do
@@ -159,34 +157,3 @@ runTeXProgram program runsLeft tmpDir source = do
                    else return Nothing
          return (exit, out <> err, pdf)
 
--- utility functions
-
--- Run a command and return exitcode, contents of stdout, and
--- contents of stderr. (Based on
--- 'readProcessWithExitCode' from 'System.Process'.)
-readCommand :: Maybe [(String, String)]            -- ^ environment variables
-            -> FilePath                            -- ^ command to run
-            -> [String]                            -- ^ any arguments
-            -> IO (ExitCode,ByteString,ByteString) -- ^ exit, stdout, stderr
-readCommand mbenv cmd args = do
-    (Just inh, Just outh, Just errh, pid) <-
-        createProcess (proc cmd args){ env     = mbenv,
-                                       std_in  = CreatePipe,
-                                       std_out = CreatePipe,
-                                       std_err = CreatePipe }
-    outMVar <- newEmptyMVar
-    -- fork off a thread to start consuming stdout
-    out  <- B.hGetContents outh
-    _ <- forkIO $ evaluate (B.length out) >> putMVar outMVar ()
-    -- fork off a thread to start consuming stderr
-    err  <- B.hGetContents errh
-    _ <- forkIO $ evaluate (B.length err) >> putMVar outMVar ()
-    -- now write and flush any input
-    hClose inh -- done with stdin
-    -- wait on the output
-    takeMVar outMVar
-    takeMVar outMVar
-    hClose outh
-    -- wait on the process
-    ex <- waitForProcess pid
-    return (ex, out, err)
-- 
cgit v1.2.3


From 99bb066bb925134b506d39c8d6694fe81337d9c1 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 8 Aug 2013 15:15:58 -0700
Subject: Pass writename as argument to filters.

This way filters can figure out what the target format is
and react appropriately.

Example:

    #!/usr/bin/env runghc
    import Text.Pandoc.JSON
    import Data.Char

    main = toJSONFilter cap
      where cap (Just "html") (Str xs) = Str $ map toUpper xs
            cap _ x = x

This capitalizes text only for html output.
---
 src/Text/Pandoc.hs | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 27aa02a75..703bb876a 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -309,11 +309,8 @@ class ToJSONFilter a => ToJsonFilter a
         toJsonFilter = toJSONFilter
 
 readJSON :: ReaderOptions -> String -> Pandoc
-readJSON _ = checkJSON . eitherDecode' . UTF8.fromStringLazy
+readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy
 
 writeJSON :: WriterOptions -> Pandoc -> String
 writeJSON _ = UTF8.toStringLazy . encode
 
-checkJSON :: Either String a -> a
-checkJSON (Right x) = x
-checkJSON (Left e)  = error e
-- 
cgit v1.2.3


From e9de0f0e22b9b64b5684efe81d03539c3f57a71c Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 8 Aug 2013 23:14:12 -0700
Subject: Preliminary support for new Div and Span elements in writers.

Currently these are "transparent" containers, except in HTML,
where they produce div and span elements with attributes.
---
 src/Text/Pandoc/Writers/AsciiDoc.hs     | 2 ++
 src/Text/Pandoc/Writers/ConTeXt.hs      | 2 ++
 src/Text/Pandoc/Writers/Custom.hs       | 5 +++++
 src/Text/Pandoc/Writers/Docbook.hs      | 3 +++
 src/Text/Pandoc/Writers/Docx.hs         | 2 ++
 src/Text/Pandoc/Writers/FB2.hs          | 3 +++
 src/Text/Pandoc/Writers/HTML.hs         | 5 +++++
 src/Text/Pandoc/Writers/LaTeX.hs        | 2 ++
 src/Text/Pandoc/Writers/Man.hs          | 2 ++
 src/Text/Pandoc/Writers/Markdown.hs     | 3 +++
 src/Text/Pandoc/Writers/MediaWiki.hs    | 6 ++++++
 src/Text/Pandoc/Writers/OpenDocument.hs | 2 ++
 src/Text/Pandoc/Writers/Org.hs          | 3 +++
 src/Text/Pandoc/Writers/RST.hs          | 2 ++
 src/Text/Pandoc/Writers/RTF.hs          | 3 +++
 src/Text/Pandoc/Writers/Texinfo.hs      | 5 +++++
 src/Text/Pandoc/Writers/Textile.hs      | 6 ++++++
 17 files changed, 56 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index 6c3c6955e..00cea27e5 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -246,6 +246,7 @@ blockToAsciiDoc opts (OrderedList (_start, sty, _delim) items) = do
 blockToAsciiDoc opts (DefinitionList items) = do
   contents <- mapM (definitionListItemToAsciiDoc opts) items
   return $ cat contents <> blankline
+blockToAsciiDoc opts (Div _ bs) = blockListToAsciiDoc opts bs
 
 -- | Convert bullet list item (list of blocks) to asciidoc.
 bulletListItemToAsciiDoc :: WriterOptions -> [Block] -> State WriterState Doc
@@ -383,3 +384,4 @@ inlineToAsciiDoc opts (Note [Plain inlines]) = do
   return $ text "footnote:[" <> contents <> "]"
 -- asciidoc can't handle blank lines in notes
 inlineToAsciiDoc _ (Note _) = return "[multiblock footnote omitted]"
+inlineToAsciiDoc opts (Span _ ils) = inlineListToAsciiDoc opts ils
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 32588dc8f..40dc1deb5 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -143,6 +143,7 @@ blockToConTeXt (CodeBlock _ str) =
   -- blankline because \stoptyping can't have anything after it, inc. '}'
 blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline
 blockToConTeXt (RawBlock _ _ ) = return empty
+blockToConTeXt (Div _ bs) = blockListToConTeXt bs
 blockToConTeXt (BulletList lst) = do
   contents <- mapM listItemToConTeXt lst
   return $ ("\\startitemize" <> if isTightList lst
@@ -330,6 +331,7 @@ inlineToConTeXt (Note contents) = do
               then text "\\footnote{" <> nest 2 contents' <> char '}'
               else text "\\startbuffer " <> nest 2 contents' <>
                    text "\\stopbuffer\\footnote{\\getbuffer}"
+inlineToConTeXt (Span _ ils) = inlineListToConTeXt ils
 
 -- | Craft the section header, inserting the secton reference, if supplied.
 sectionHeader :: Attr
diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
index 5c82fe0e1..c250a240e 100644
--- a/src/Text/Pandoc/Writers/Custom.hs
+++ b/src/Text/Pandoc/Writers/Custom.hs
@@ -178,6 +178,9 @@ blockToCustom lua (OrderedList (num,sty,delim) items) =
 blockToCustom lua (DefinitionList items) =
   callfunc lua "DefinitionList" items
 
+blockToCustom lua (Div attr items) =
+  callfunc lua "Div" items (attrToMap attr)
+
 -- | Convert list of Pandoc block elements to Custom.
 blockListToCustom :: LuaState -- ^ Options
                   -> [Block]       -- ^ List of block elements
@@ -240,3 +243,5 @@ inlineToCustom lua (Image alt (src,tit)) =
 
 inlineToCustom lua (Note contents) = callfunc lua "Note" contents
 
+inlineToCustom lua (Span attr items) =
+  callfunc lua "Span" items (attrToMap attr)
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 6f4b61a79..2f415f3ee 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -148,6 +148,7 @@ listItemToDocbook opts item =
 -- | Convert a Pandoc block element to Docbook.
 blockToDocbook :: WriterOptions -> Block -> Doc
 blockToDocbook _ Null = empty
+blockToDocbook opts (Div _ bs) = blocksToDocbook opts bs
 blockToDocbook _ (Header _ _ _) = empty -- should not occur after hierarchicalize
 blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst
 -- title beginning with fig: indicates that the image is a figure
@@ -267,6 +268,8 @@ inlineToDocbook opts (Quoted _ lst) =
   inTagsSimple "quote" $ inlinesToDocbook opts lst
 inlineToDocbook opts (Cite _ lst) =
   inlinesToDocbook opts lst
+inlineToDocbook opts (Span _ ils) =
+  inlinesToDocbook opts ils
 inlineToDocbook _ (Code _ str) =
   inTagsSimple "literal" $ text (escapeStringForXML str)
 inlineToDocbook opts (Math t str)
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 6bb4d5569..d93254971 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -428,6 +428,7 @@ getUniqueId = liftIO $ (show . (+ 20) . hashUnique) `fmap` newUnique
 -- | Convert a Pandoc block element to OpenXML.
 blockToOpenXML :: WriterOptions -> Block -> WS [Element]
 blockToOpenXML _ Null = return []
+blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs
 blockToOpenXML opts (Header lev (ident,_,_) lst) = do
   contents <- withParaProp (pStyle $ "Heading" ++ show lev) $
                blockToOpenXML opts (Para lst)
@@ -633,6 +634,7 @@ formattedString str = do
 inlineToOpenXML :: WriterOptions -> Inline -> WS [Element]
 inlineToOpenXML _ (Str str) = formattedString str
 inlineToOpenXML opts Space = inlineToOpenXML opts (Str " ")
+inlineToOpenXML opts (Span _ ils) = inlinesToOpenXML opts ils
 inlineToOpenXML opts (Strong lst) =
   withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst
 inlineToOpenXML opts (Emph lst) =
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 27f0c8305..2576b2dc2 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -324,6 +324,7 @@ blockToXml (CodeBlock _ s) = return . spaceBeforeAfter .
                              map (el "p" . el "code") . lines $ s
 blockToXml (RawBlock _ s) = return . spaceBeforeAfter .
                             map (el "p" . el "code") . lines $ s
+blockToXml (Div _ bs) = cMapM blockToXml bs
 blockToXml (BlockQuote bs) = liftM (list . el "cite") $ cMapM blockToXml bs
 blockToXml (OrderedList a bss) = do
     state <- get
@@ -425,6 +426,7 @@ indent = indentBlock
 -- | Convert a Pandoc's Inline element to FictionBook XML representation.
 toXml :: Inline -> FBM [Content]
 toXml (Str s) = return [txt s]
+toXml (Span _ ils) = cMapM toXml ils
 toXml (Emph ss) = list `liftM` wrap "emphasis" ss
 toXml (Strong ss) = list `liftM` wrap "strong" ss
 toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss
@@ -560,6 +562,7 @@ list = (:[])
 plain :: Inline -> String
 plain (Str s) = s
 plain (Emph ss) = concat (map plain ss)
+plain (Span _ ss) = concat (map plain ss)
 plain (Strong ss) = concat (map plain ss)
 plain (Strikeout ss) = concat (map plain ss)
 plain (Superscript ss) = concat (map plain ss)
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index cfc187e02..560c26c76 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -407,6 +407,9 @@ blockToHtml opts (Para [Str ".",Space,Str ".",Space,Str "."])
 blockToHtml opts (Para lst) = do
   contents <- inlineListToHtml opts lst
   return $ H.p contents
+blockToHtml opts (Div attr bs) = do
+  contents <- blockListToHtml opts bs
+  return $ addAttrs opts attr $ H.div $ nl opts >> contents >> nl opts
 blockToHtml _ (RawBlock "html" str) = return $ preEscapedString str
 blockToHtml _ (RawBlock _ _) = return mempty
 blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr
@@ -590,6 +593,8 @@ inlineToHtml opts inline =
     (Str str)        -> return $ strToHtml str
     (Space)          -> return $ strToHtml " "
     (LineBreak)      -> return $ if writerHtml5 opts then H5.br else H.br
+    (Span attr ils)  -> inlineListToHtml opts ils >>=
+                           return . addAttrs opts attr . H.span
     (Emph lst)       -> inlineListToHtml opts lst >>= return . H.em
     (Strong lst)     -> inlineListToHtml opts lst >>= return . H.strong
     (Code attr str)  -> case hlCode of
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index aa5bfa623..37de03e0f 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -282,6 +282,7 @@ isLineBreakOrSpace _ = False
 blockToLaTeX :: Block     -- ^ Block to convert
              -> State WriterState Doc
 blockToLaTeX Null = return empty
+blockToLaTeX (Div _ bs) = blockListToLaTeX bs
 blockToLaTeX (Plain lst) =
   inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst
 -- title beginning with fig: indicates that the image is a figure
@@ -560,6 +561,7 @@ isQuoted _ = False
 -- | Convert inline element to LaTeX
 inlineToLaTeX :: Inline    -- ^ Inline to convert
               -> State WriterState Doc
+inlineToLaTeX (Span _ ils) = inlineListToLaTeX ils >>= return . braces
 inlineToLaTeX (Emph lst) =
   inlineListToLaTeX lst >>= return . inCmd "emph"
 inlineToLaTeX (Strong lst) =
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 0508b6c27..ed66c7c2b 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -160,6 +160,7 @@ blockToMan :: WriterOptions -- ^ Options
                 -> Block         -- ^ Block element
                 -> State WriterState Doc
 blockToMan _ Null = return empty
+blockToMan opts (Div _ bs) = blockListToMan opts bs
 blockToMan opts (Plain inlines) =
   liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines
 blockToMan opts (Para inlines) = do
@@ -300,6 +301,7 @@ inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat)
 
 -- | Convert Pandoc inline element to man.
 inlineToMan :: WriterOptions -> Inline -> State WriterState Doc
+inlineToMan opts (Span _ ils) = inlineListToMan opts ils
 inlineToMan opts (Emph lst) = do
   contents <- inlineListToMan opts lst
   return $ text "\\f[I]" <> contents <> text "\\f[]"
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 80402a757..d195d8445 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -301,6 +301,7 @@ blockToMarkdown :: WriterOptions -- ^ Options
                 -> Block         -- ^ Block element
                 -> State WriterState Doc
 blockToMarkdown _ Null = return empty
+blockToMarkdown opts (Div _ bs) = blockListToMarkdown opts bs
 blockToMarkdown opts (Plain inlines) = do
   contents <- inlineListToMarkdown opts inlines
   return $ contents <> cr
@@ -628,6 +629,8 @@ escapeSpaces x = x
 
 -- | Convert Pandoc inline element to markdown.
 inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
+inlineToMarkdown opts (Span _ ils) =
+  inlineListToMarkdown opts ils
 inlineToMarkdown opts (Emph lst) = do
   contents <- inlineListToMarkdown opts lst
   return $ "*" <> contents <> "*"
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index e1bfd18b2..fccf25753 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -83,6 +83,9 @@ blockToMediaWiki :: WriterOptions -- ^ Options
 
 blockToMediaWiki _ Null = return ""
 
+blockToMediaWiki opts (Div _ bs) =
+  blockListToMediaWiki opts bs
+
 blockToMediaWiki opts (Plain inlines) =
   inlineListToMediaWiki opts inlines
 
@@ -328,6 +331,9 @@ inlineListToMediaWiki opts lst =
 -- | Convert Pandoc inline element to MediaWiki.
 inlineToMediaWiki :: WriterOptions -> Inline -> State WriterState String
 
+inlineToMediaWiki opts (Span _ ils) =
+  inlineListToMediaWiki opts ils
+
 inlineToMediaWiki opts (Emph lst) = do
   contents <- inlineListToMediaWiki opts lst
   return $ "''" ++ contents ++ "''"
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 0efbf7580..d76d0f6ad 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -285,6 +285,7 @@ blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc
 blockToOpenDocument o bs
     | Plain          b <- bs = inParagraphTags =<< inlinesToOpenDocument o b
     | Para           b <- bs = inParagraphTags =<< inlinesToOpenDocument o b
+    | Div _ xs         <- bs = blocksToOpenDocument o xs
     | Header     i _ b <- bs = setFirstPara >>
                                (inHeaderTags  i =<< inlinesToOpenDocument o b)
     | BlockQuote     b <- bs = setFirstPara >> mkBlockQuote b
@@ -360,6 +361,7 @@ inlinesToOpenDocument o l = hcat <$> mapM (inlineToOpenDocument o) l
 inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc
 inlineToOpenDocument o ils
     | Space         <- ils = inTextStyle space
+    | Span _ xs     <- ils = inlinesToOpenDocument o xs
     | LineBreak     <- ils = return $ selfClosingTag "text:line-break" []
     | Str         s <- ils = inTextStyle $ handleSpaces $ escapeStringForXML s
     | Emph        l <- ils = withTextStyle Italic $ inlinesToOpenDocument o l
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 40e8abf7e..34ae532b0 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -106,6 +106,7 @@ escapeString = escapeStringUsing $
 blockToOrg :: Block         -- ^ Block element
            -> State WriterState Doc
 blockToOrg Null = return empty
+blockToOrg (Div _ bs) = blockListToOrg bs
 blockToOrg (Plain inlines) = inlineListToOrg inlines
 -- title beginning with fig: indicates that the image is a figure
 blockToOrg (Para [Image txt (src,'f':'i':'g':':':tit)]) = do
@@ -229,6 +230,8 @@ inlineListToOrg lst = mapM inlineToOrg lst >>= return . hcat
 
 -- | Convert Pandoc inline element to Org.
 inlineToOrg :: Inline -> State WriterState Doc
+inlineToOrg (Span _ lst) =
+  inlineListToOrg lst
 inlineToOrg (Emph lst) = do
   contents <- inlineListToOrg lst
   return $ "/" <> contents <> "/"
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 606793842..4d8daa15b 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -161,6 +161,7 @@ bordered contents c =
 blockToRST :: Block         -- ^ Block element
            -> State WriterState Doc
 blockToRST Null = return empty
+blockToRST (Div _ bs) = blockListToRST bs
 blockToRST (Plain inlines) = inlineListToRST inlines
 -- title beginning with fig: indicates that the image is a figure
 blockToRST (Para [Image txt (src,'f':'i':'g':':':tit)]) = do
@@ -338,6 +339,7 @@ inlineListToRST lst = mapM inlineToRST (insertBS lst) >>= return . hcat
 
 -- | Convert Pandoc inline element to RST.
 inlineToRST :: Inline -> State WriterState Doc
+inlineToRST (Span _ ils) = inlineListToRST ils
 inlineToRST (Emph lst) = do
   contents <- inlineListToRST lst
   return $ "*" <> contents <> "*"
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index 0db1c52c4..7e5d33c50 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -208,6 +208,8 @@ blockToRTF :: Int       -- ^ indent level
            -> Block     -- ^ block to convert
            -> String
 blockToRTF _ _ Null = ""
+blockToRTF indent alignment (Div _ bs) =
+  concatMap (blockToRTF indent alignment) bs
 blockToRTF indent alignment (Plain lst) =
   rtfCompact indent 0 alignment $ inlineListToRTF lst
 blockToRTF indent alignment (Para lst) =
@@ -308,6 +310,7 @@ inlineListToRTF lst = concatMap inlineToRTF lst
 -- | Convert inline item to RTF.
 inlineToRTF :: Inline         -- ^ inline to convert
             -> String
+inlineToRTF (Span _ lst) = inlineListToRTF lst
 inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "}"
 inlineToRTF (Strong lst) = "{\\b " ++ (inlineListToRTF lst) ++ "}"
 inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "}"
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 0f57d14b2..f8b460001 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -123,6 +123,8 @@ blockToTexinfo :: Block     -- ^ Block to convert
 
 blockToTexinfo Null = return empty
 
+blockToTexinfo (Div _ bs) = blockListToTexinfo bs
+
 blockToTexinfo (Plain lst) =
   inlineListToTexinfo lst
 
@@ -374,6 +376,9 @@ disallowedInNode c = c `elem` ".,:()"
 inlineToTexinfo :: Inline    -- ^ Inline to convert
                 -> State WriterState Doc
 
+inlineToTexinfo (Span _ lst) =
+  inlineListToTexinfo lst
+
 inlineToTexinfo (Emph lst) =
   inlineListToTexinfo lst >>= return . inCmd "emph"
 
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
index 3288ce222..3fb554dca 100644
--- a/src/Text/Pandoc/Writers/Textile.hs
+++ b/src/Text/Pandoc/Writers/Textile.hs
@@ -101,6 +101,9 @@ blockToTextile :: WriterOptions -- ^ Options
 
 blockToTextile _ Null = return ""
 
+blockToTextile opts (Div _ bs) =
+  blockListToTextile opts bs
+
 blockToTextile opts (Plain inlines) =
   inlineListToTextile opts inlines
 
@@ -343,6 +346,9 @@ inlineListToTextile opts lst =
 -- | Convert Pandoc inline element to Textile.
 inlineToTextile :: WriterOptions -> Inline -> State WriterState String
 
+inlineToTextile opts (Span _ lst) =
+  inlineListToTextile opts lst
+
 inlineToTextile opts (Emph lst) = do
   contents <- inlineListToTextile opts lst
   return $ if '_' `elem` contents
-- 
cgit v1.2.3


From cbfa9321066212b912583481015224f3c944ae21 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 10 Aug 2013 17:23:51 -0700
Subject: Adjustments for new Format newtype.

---
 src/Text/Pandoc/Readers/HTML.hs         |  4 ++--
 src/Text/Pandoc/Readers/LaTeX.hs        |  2 +-
 src/Text/Pandoc/Readers/RST.hs          |  1 +
 src/Text/Pandoc/Readers/Textile.hs      |  6 +++---
 src/Text/Pandoc/Writers/AsciiDoc.hs     |  8 ++++++--
 src/Text/Pandoc/Writers/Custom.hs       |  6 ++++++
 src/Text/Pandoc/Writers/Docbook.hs      |  9 +++++----
 src/Text/Pandoc/Writers/Docx.hs         | 10 +++++-----
 src/Text/Pandoc/Writers/EPUB.hs         |  6 +++---
 src/Text/Pandoc/Writers/HTML.hs         | 13 ++++++++-----
 src/Text/Pandoc/Writers/LaTeX.hs        | 13 ++++++++-----
 src/Text/Pandoc/Writers/Man.hs          | 10 ++++++----
 src/Text/Pandoc/Writers/MediaWiki.hs    | 14 ++++++++------
 src/Text/Pandoc/Writers/OpenDocument.hs | 12 +++++++-----
 src/Text/Pandoc/Writers/RST.hs          | 15 +++++++++------
 src/Text/Pandoc/Writers/RTF.hs          | 12 +++++++-----
 src/Text/Pandoc/Writers/Texinfo.hs      | 19 +++++++++++--------
 src/Text/Pandoc/Writers/Textile.hs      | 14 ++++++--------
 18 files changed, 102 insertions(+), 72 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 0068ab5c1..7ca554fa3 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -182,7 +182,7 @@ pRawHtmlBlock = do
   raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag
   parseRaw <- getOption readerParseRaw
   if parseRaw && not (null raw)
-     then return [RawBlock "html" raw]
+     then return [RawBlock (Format "html") raw]
      else return []
 
 pHtmlBlock :: String -> TagParser String
@@ -408,7 +408,7 @@ pRawHtmlInline = do
   result <- pSatisfy (tagComment (const True)) <|> pSatisfy isInlineTag
   parseRaw <- getOption readerParseRaw
   if parseRaw
-     then return [RawInline "html" $ renderTags' [result]]
+     then return [RawInline (Format "html") $ renderTags' [result]]
      else return []
 
 pInlinesInTags :: String -> ([Inline] -> Inline)
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 6b5035d93..eb0baedda 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE ScopedTypeVariables, OverloadedStrings #-}
 {-
 Copyright (C) 2006-2012 John MacFarlane <jgm@berkeley.edu>
 
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 34962b553..df0a8294d 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {-
 Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
 
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 9191f6908..8ccd1e227 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -290,13 +290,13 @@ rawHtmlBlock :: Parser [Char] ParserState Block
 rawHtmlBlock = try $ do
   (_,b) <- htmlTag isBlockTag
   optional blanklines
-  return $ RawBlock "html" b
+  return $ RawBlock (Format "html") b
 
 -- | Raw block of LaTeX content
 rawLaTeXBlock' :: Parser [Char] ParserState Block
 rawLaTeXBlock' = do
   guardEnabled Ext_raw_tex
-  RawBlock "latex" <$> (rawLaTeXBlock <* spaces)
+  RawBlock (Format "latex") <$> (rawLaTeXBlock <* spaces)
 
 
 -- | In textile, paragraphs are separated by blank lines.
@@ -487,7 +487,7 @@ endline = try $ do
   return LineBreak
 
 rawHtmlInline :: Parser [Char] ParserState Inline
-rawHtmlInline = RawInline "html" . snd <$> htmlTag isInlineTag
+rawHtmlInline = RawInline (Format "html") . snd <$> htmlTag isInlineTag
 
 -- | Raw LaTeX Inline
 rawLaTeXInline' :: Parser [Char] ParserState Inline
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index 00cea27e5..68b525742 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -132,7 +132,9 @@ blockToAsciiDoc opts (Para inlines) = do
                then text "\\"
                else empty
   return $ esc <> contents <> blankline
-blockToAsciiDoc _ (RawBlock _ _) = return empty
+blockToAsciiDoc _ (RawBlock f s)
+  | f == "asciidoc" = return $ text s
+  | otherwise       = return empty
 blockToAsciiDoc _ HorizontalRule =
   return $ blankline <> text "'''''" <> blankline
 blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do
@@ -347,7 +349,9 @@ inlineToAsciiDoc _ (Math InlineMath str) =
   return $ "latexmath:[$" <> text str <> "$]"
 inlineToAsciiDoc _ (Math DisplayMath str) =
   return $ "latexmath:[\\[" <> text str <> "\\]]"
-inlineToAsciiDoc _ (RawInline _ _) = return empty
+inlineToAsciiDoc _ (RawInline f s)
+  | f == "asciidoc" = return $ text s
+  | otherwise       = return empty
 inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr
 inlineToAsciiDoc _ Space = return space
 inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst
diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
index c250a240e..0234e1e35 100644
--- a/src/Text/Pandoc/Writers/Custom.hs
+++ b/src/Text/Pandoc/Writers/Custom.hs
@@ -33,6 +33,7 @@ module Text.Pandoc.Writers.Custom ( writeCustom ) where
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Data.List ( intersperse )
+import Data.Char ( toLower )
 import Scripting.Lua (LuaState, StackValue, callfunc)
 import qualified Scripting.Lua as Lua
 import Text.Pandoc.UTF8 (fromString, toString)
@@ -78,6 +79,11 @@ instance StackValue a => StackValue [a] where
     return (Just lst)
   valuetype _ = Lua.TTABLE
 
+instance StackValue Format where
+  push lua (Format f) = Lua.push lua (map toLower f)
+  peek l n = fmap Format `fmap` Lua.peek l n
+  valuetype _ = Lua.TSTRING
+
 instance (StackValue a, StackValue b) => StackValue (M.Map a b) where
   push lua m = do
     let xs = M.toList m
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 2f415f3ee..3d150d19b 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {-
 Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
 
@@ -199,10 +200,10 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) =
   in  inTags True "orderedlist" attribs items
 blockToDocbook opts (DefinitionList lst) =
   inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst
-blockToDocbook _ (RawBlock "docbook" str) = text str -- raw XML block
--- we allow html for compatibility with earlier versions of pandoc
-blockToDocbook _ (RawBlock "html" str) = text str -- raw XML block
-blockToDocbook _ (RawBlock _ _) = empty
+blockToDocbook _ (RawBlock f str)
+  | f == "docbook" = text str -- raw XML block
+  | f == "html"    = text str -- allow html for backwards compatibility
+  | otherwise      = empty
 blockToDocbook _ HorizontalRule = empty -- not semantic
 blockToDocbook opts (Table caption aligns widths headers rows) =
   let captionDoc   = if null caption
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index d93254971..2483e243f 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -460,8 +460,8 @@ blockToOpenXML opts (Para lst) = do
     contents <- inlinesToOpenXML opts lst
     return [mknode "w:p" [] (paraProps ++ contents)]
 blockToOpenXML _ (RawBlock format str)
-  | format == "openxml" = return [ x | Elem x <- parseXML str ]
-  | otherwise           = return []
+  | format == Format "openxml" = return [ x | Elem x <- parseXML str ]
+  | otherwise                  = return []
 blockToOpenXML opts (BlockQuote blocks) =
   withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks
 blockToOpenXML opts (CodeBlock attrs str) =
@@ -653,8 +653,8 @@ inlineToOpenXML opts (Strikeout lst) =
   $ inlinesToOpenXML opts lst
 inlineToOpenXML _ LineBreak = return [br]
 inlineToOpenXML _ (RawInline f str)
-  | f == "openxml" = return [ x | Elem x <- parseXML str ]
-  | otherwise      = return []
+  | f == Format "openxml" = return [ x | Elem x <- parseXML str ]
+  | otherwise            = return []
 inlineToOpenXML opts (Quoted quoteType lst) =
   inlinesToOpenXML opts $ [Str open] ++ lst ++ [Str close]
     where (open, close) = case quoteType of
@@ -688,7 +688,7 @@ inlineToOpenXML opts (Note bs) = do
   let notemarker = mknode "w:r" []
                    [ mknode "w:rPr" [] (rStyle "FootnoteRef")
                    , mknode "w:footnoteRef" [] () ]
-  let notemarkerXml = RawInline "openxml" $ ppElement notemarker
+  let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker
   let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs
       insertNoteRef (Para ils  : xs) = Para  (notemarkerXml : ils) : xs
       insertNoteRef xs               = Para [notemarkerXml] : xs
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index fb756f196..ab14ff8a0 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -103,7 +103,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
                      Just img  -> do
                        let coverImage = "cover-image" ++ takeExtension img
                        let cpContent = renderHtml $ writeHtml opts'
-                               (Pandoc meta [RawBlock "html" $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"])
+                               (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"])
                        imgContent <- B.readFile img
                        return ( [mkEntry "cover.xhtml" cpContent]
                               , [mkEntry coverImage imgContent] )
@@ -422,7 +422,7 @@ transformInline opts sourceDir picsRef (Image lab (src,tit))
   | isAbsoluteURI src = do
     raw <- makeSelfContained Nothing
              $ writeHtmlInline opts (Image lab (src,tit))
-    return $ RawInline "html" raw
+    return $ RawInline (Format "html") raw
   | otherwise = do
     let src' = unEscapeString src
     pics <- readIORef picsRef
@@ -438,7 +438,7 @@ transformInline opts sourceDir picsRef (Image lab (src,tit))
 transformInline opts _ _ (x@(Math _ _))
   | WebTeX _ <- writerHTMLMathMethod opts = do
     raw <- makeSelfContained Nothing $ writeHtmlInline opts x
-    return $ RawInline "html" raw
+    return $ RawInline (Format "html") raw
 transformInline _ _ _ x = return x
 
 writeHtmlInline :: WriterOptions -> Inline -> String
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 560c26c76..25079574e 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -410,8 +410,9 @@ blockToHtml opts (Para lst) = do
 blockToHtml opts (Div attr bs) = do
   contents <- blockListToHtml opts bs
   return $ addAttrs opts attr $ H.div $ nl opts >> contents >> nl opts
-blockToHtml _ (RawBlock "html" str) = return $ preEscapedString str
-blockToHtml _ (RawBlock _ _) = return mempty
+blockToHtml _ (RawBlock f str)
+  | f == Format "html" = return $ preEscapedString str
+  | otherwise          = return mempty
 blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr
 blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
   let tolhs = isEnabled Ext_literate_haskell opts &&
@@ -678,12 +679,14 @@ inlineToHtml opts inline =
                                   return  $ case t of
                                              InlineMath  -> m
                                              DisplayMath -> brtag >> m >> brtag )
-    (RawInline "latex" str) -> case writerHTMLMathMethod opts of
+    (RawInline f str)
+      | f == Format "latex" ->
+                          case writerHTMLMathMethod opts of
                                LaTeXMathML _ -> do modify (\st -> st {stMath = True})
                                                    return $ toHtml str
                                _             -> return mempty
-    (RawInline "html" str) -> return $ preEscapedString str
-    (RawInline _ _) -> return mempty
+      | f == Format "html" -> return $ preEscapedString str
+      | otherwise          -> return mempty
     (Link [Str str] (s,_)) | "mailto:" `isPrefixOf` s &&
                              s == escapeURI ("mailto" ++ str) ->
                         -- autolink
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 37de03e0f..d09ccc3b8 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -356,8 +356,10 @@ blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do
                   Nothing -> rawCodeBlock
                   Just  h -> modify (\st -> st{ stHighlighting = True }) >>
                              return (flush $ text h)
-blockToLaTeX (RawBlock "latex" x) = return $ text x
-blockToLaTeX (RawBlock _ _) = return empty
+blockToLaTeX (RawBlock f x)
+  | f == Format "latex" || f == Format "tex"
+                        = return $ text x
+  | otherwise           = return empty
 blockToLaTeX (BulletList []) = return empty  -- otherwise latex error
 blockToLaTeX (BulletList lst) = do
   incremental <- gets stIncremental
@@ -630,9 +632,10 @@ inlineToLaTeX (Math InlineMath str) =
   return $ char '$' <> text str <> char '$'
 inlineToLaTeX (Math DisplayMath str) =
   return $ "\\[" <> text str <> "\\]"
-inlineToLaTeX (RawInline "latex" str) = return $ text str
-inlineToLaTeX (RawInline "tex" str) = return $ text str
-inlineToLaTeX (RawInline _ _) = return empty
+inlineToLaTeX (RawInline f str)
+  | f == Format "latex" || f == Format "tex"
+                        = return $ text str
+  | otherwise           = return empty
 inlineToLaTeX (LineBreak) = return "\\\\"
 inlineToLaTeX Space = return space
 inlineToLaTeX (Link txt ('#':ident, _)) = do
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index ed66c7c2b..642a002d6 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -167,8 +167,9 @@ blockToMan opts (Para inlines) = do
   contents <- liftM vcat $ mapM (inlineListToMan opts) $
     splitSentences inlines
   return $ text ".PP" $$ contents
-blockToMan _ (RawBlock "man" str) = return $ text str
-blockToMan _ (RawBlock _ _) = return empty
+blockToMan _ (RawBlock f str)
+  | f == Format "man" = return $ text str
+  | otherwise         = return empty
 blockToMan _ HorizontalRule = return $ text ".PP" $$ text "   *   *   *   *   *"
 blockToMan opts (Header level _ inlines) = do
   contents <- inlineListToMan opts inlines
@@ -333,8 +334,9 @@ inlineToMan opts (Math InlineMath str) = inlineListToMan opts $ readTeXMath str
 inlineToMan opts (Math DisplayMath str) = do
   contents <- inlineListToMan opts $ readTeXMath str
   return $ cr <> text ".RS" $$ contents $$ text ".RE"
-inlineToMan _ (RawInline "man" str) = return $ text str
-inlineToMan _ (RawInline _ _) = return empty
+inlineToMan _ (RawInline f str)
+  | f == Format "man" = return $ text str
+  | otherwise         = return empty
 inlineToMan _ (LineBreak) = return $
   cr <> text ".PD 0" $$ text ".P" $$ text ".PD" <> cr
 inlineToMan _ Space = return space
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index fccf25753..4ffba1100 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -107,9 +107,10 @@ blockToMediaWiki opts (Para inlines) = do
               then  "<p>" ++ contents ++ "</p>"
               else contents ++ if null listLevel then "\n" else ""
 
-blockToMediaWiki _ (RawBlock "mediawiki" str) = return str
-blockToMediaWiki _ (RawBlock "html" str) = return str
-blockToMediaWiki _ (RawBlock _ _) = return ""
+blockToMediaWiki _ (RawBlock f str)
+  | f == Format "mediawiki" = return str
+  | f == Format "html"      = return str
+  | otherwise               = return ""
 
 blockToMediaWiki _ HorizontalRule = return "\n-----\n"
 
@@ -374,9 +375,10 @@ inlineToMediaWiki _ (Str str) = return $ escapeString str
 inlineToMediaWiki _ (Math _ str) = return $ "<math>" ++ str ++ "</math>"
                                  -- note:  str should NOT be escaped
 
-inlineToMediaWiki _ (RawInline "mediawiki" str) = return str
-inlineToMediaWiki _ (RawInline "html" str) = return str
-inlineToMediaWiki _ (RawInline _ _) = return ""
+inlineToMediaWiki _ (RawInline f str)
+  | f == Format "mediawiki" = return str
+  | f == Format "html"      = return str
+  | otherwise               = return ""
 
 inlineToMediaWiki _ (LineBreak) = return "<br />"
 
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index d76d0f6ad..05c576c20 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE PatternGuards, OverloadedStrings #-}
 {-
 Copyright (C) 2008-2010 Andrea Rossato <andrea.rossato@ing.unitn.it>
 and John MacFarlane.
@@ -296,7 +296,9 @@ blockToOpenDocument o bs
     | Table  c a w h r <- bs = setFirstPara >> table c a w h r
     | HorizontalRule   <- bs = setFirstPara >> return (selfClosingTag "text:p"
                                 [ ("text:style-name", "Horizontal_20_Line") ])
-    | RawBlock _     _ <- bs = return empty
+    | RawBlock f     s <- bs = if f == "opendocument"
+                                  then preformatted s
+                                  else return empty
     | Null             <- bs = return empty
     | otherwise              = return empty
     where
@@ -374,9 +376,9 @@ inlineToOpenDocument o ils
     | Code      _ s <- ils = preformatted s
     | Math      _ s <- ils = inlinesToOpenDocument o (readTeXMath s)
     | Cite      _ l <- ils = inlinesToOpenDocument o l
-    | RawInline "opendocument" s <- ils = preformatted s
-    | RawInline "html" s <- ils = preformatted s  -- for backwards compat.
-    | RawInline _ _ <- ils = return empty
+    | RawInline f s <- ils = if f == "opendocument" || f == "html"
+                                then preformatted s
+                                else return empty
     | Link  l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l
     | Image _ (s,t) <- ils = return $ mkImg  s t
     | Note        l <- ils = mkNote l
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 4d8daa15b..5fbbb6afc 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -42,7 +42,7 @@ import Network.URI (isAbsoluteURI)
 import Text.Pandoc.Pretty
 import Control.Monad.State
 import Control.Applicative ( (<$>) )
-import Data.Char (isSpace)
+import Data.Char (isSpace, toLower)
 
 type Refs = [([Inline], Target)]
 
@@ -176,9 +176,11 @@ blockToRST (Para inlines)
   | otherwise = do
       contents <- inlineListToRST inlines
       return $ contents <> blankline
-blockToRST (RawBlock f str) =
-  return $ blankline <> ".. raw:: " <> text f $+$
-           (nest 3 $ text str) $$ blankline
+blockToRST (RawBlock f str)
+  | f == "rst" = return $ text str
+  | otherwise  = return $ blankline <> ".. raw:: " <>
+                    text (map toLower $ unFormat f) $+$
+                    (nest 3 $ text str) $$ blankline
 blockToRST HorizontalRule =
   return $ blankline $$ "--------------" $$ blankline
 blockToRST (Header level _ inlines) = do
@@ -374,8 +376,9 @@ inlineToRST (Math t str) = do
                    then blankline $$ ".. math::" $$
                         blankline $$ nest 3 (text str) $$ blankline
                    else blankline $$ (".. math:: " <> text str) $$ blankline
-inlineToRST (RawInline "rst" x) = return $ text x
-inlineToRST (RawInline _ _) = return empty
+inlineToRST (RawInline f x)
+  | f == "rst" = return $ text x
+  | otherwise  = return empty
 inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para)
 inlineToRST Space = return space
 -- autolink
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index 7e5d33c50..6d2b1229d 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -62,7 +62,7 @@ rtfEmbedImage x@(Image _ (src,_)) = do
        let raw = "{\\pict" ++ filetype ++ " " ++ concat bytes ++ "}"
        return $ if B.null imgdata
                    then x
-                   else RawInline "rtf" raw
+                   else RawInline (Format "rtf") raw
      else return x
 rtfEmbedImage x = return x
 
@@ -218,8 +218,9 @@ blockToRTF indent alignment (BlockQuote lst) =
   concatMap (blockToRTF (indent + indentIncrement) alignment) lst
 blockToRTF indent _ (CodeBlock _ str) =
   rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str))
-blockToRTF _ _ (RawBlock "rtf" str) = str
-blockToRTF _ _ (RawBlock _ _) = ""
+blockToRTF _ _ (RawBlock f str)
+  | f == Format "rtf" = str
+  | otherwise         = ""
 blockToRTF indent alignment (BulletList lst) = spaceAtEnd $
   concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst
 blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $
@@ -325,8 +326,9 @@ inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}"
 inlineToRTF (Str str) = stringToRTF str
 inlineToRTF (Math _ str) = inlineListToRTF $ readTeXMath str
 inlineToRTF (Cite _ lst) = inlineListToRTF lst
-inlineToRTF (RawInline "rtf" str) = str
-inlineToRTF (RawInline _ _) = ""
+inlineToRTF (RawInline f str)
+  | f == Format "rtf" = str
+  | otherwise         = ""
 inlineToRTF (LineBreak) = "\\line "
 inlineToRTF Space = " "
 inlineToRTF (Link text (src, _)) =
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index f8b460001..b1fd3d6af 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {-
 Copyright (C) 2008-2010 John MacFarlane and Peter Wang
 
@@ -152,10 +153,11 @@ blockToTexinfo (CodeBlock _ str) = do
            flush (text str) $$
            text "@end verbatim" <> blankline
 
-blockToTexinfo (RawBlock "texinfo" str) = return $ text str
-blockToTexinfo (RawBlock "latex" str) =
-  return $ text "@tex" $$ text str $$ text "@end tex"
-blockToTexinfo (RawBlock _ _) = return empty
+blockToTexinfo (RawBlock f str)
+  | f == "texinfo" = return $ text str
+  | f == "latex" || f == "tex" =
+                      return $ text "@tex" $$ text str $$ text "@end tex"
+  | otherwise      = return empty
 
 blockToTexinfo (BulletList lst) = do
   items <- mapM listItemToTexinfo lst
@@ -418,10 +420,11 @@ inlineToTexinfo (Cite _ lst) =
   inlineListToTexinfo lst
 inlineToTexinfo (Str str) = return $ text (stringToTexinfo str)
 inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str
-inlineToTexinfo (RawInline f str) | f == "latex" || f == "tex" =
-  return $ text "@tex" $$ text str $$ text "@end tex"
-inlineToTexinfo (RawInline "texinfo" str) = return $ text str
-inlineToTexinfo (RawInline _ _) = return empty
+inlineToTexinfo (RawInline f str)
+  | f == "latex" || f == "tex" =
+                      return $ text "@tex" $$ text str $$ text "@end tex"
+  | f == "texinfo" =  return $ text str
+  | otherwise      =  return empty
 inlineToTexinfo (LineBreak) = return $ text "@*"
 inlineToTexinfo Space = return $ char ' '
 
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
index 3fb554dca..27e8b60ec 100644
--- a/src/Text/Pandoc/Writers/Textile.hs
+++ b/src/Text/Pandoc/Writers/Textile.hs
@@ -121,10 +121,9 @@ blockToTextile opts (Para inlines) = do
               then "<p>" ++ contents ++ "</p>"
               else contents ++ if null listLevel then "\n" else ""
 
-blockToTextile _ (RawBlock f str) =
-  if f == "html" || f == "textile"
-     then return str
-     else return ""
+blockToTextile _ (RawBlock f str)
+  | f == Format "html" || f == Format "textile" = return str
+  | otherwise                                   = return ""
 
 blockToTextile _ HorizontalRule = return "<hr />\n"
 
@@ -401,10 +400,9 @@ inlineToTextile _ (Str str) = return $ escapeStringForTextile str
 inlineToTextile _ (Math _ str) =
   return $ "<span class=\"math\">" ++ escapeStringForXML str ++ "</math>"
 
-inlineToTextile _ (RawInline f str) =
-  if f == "html" || f == "textile"
-     then return str
-     else return ""
+inlineToTextile _ (RawInline f str)
+  | f == Format "html" || f == Format "textile" = return str
+  | otherwise                                   = return ""
 
 inlineToTextile _ (LineBreak) = return "\n"
 
-- 
cgit v1.2.3


From 9152fa1a95346e26bc290b3f5018b2eeb5d4e077 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 10 Aug 2013 18:13:38 -0700
Subject: Use query instead of queryWith.

---
 src/Text/Pandoc/Biblio.hs          |  5 +++--
 src/Text/Pandoc/Shared.hs          | 29 +++++++++++++++++++++++++++--
 src/Text/Pandoc/Writers/ConTeXt.hs |  4 ++--
 src/Text/Pandoc/Writers/LaTeX.hs   |  7 ++++---
 4 files changed, 36 insertions(+), 9 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs
index 755c779ea..206b38530 100644
--- a/src/Text/Pandoc/Biblio.hs
+++ b/src/Text/Pandoc/Biblio.hs
@@ -36,6 +36,7 @@ import Text.CSL hiding ( Cite(..), Citation(..), endWithPunct )
 import qualified Text.CSL as CSL ( Cite(..) )
 import Text.Pandoc.Definition
 import Text.Pandoc.Generic
+import Text.Pandoc.Walk
 import Text.Pandoc.Shared (stringify)
 import Text.Parsec hiding (State)
 import Control.Monad
@@ -48,7 +49,7 @@ processBiblio Nothing _ p = p
 processBiblio _      [] p = p
 processBiblio (Just style) r p =
   let p'         = evalState (bottomUpM setHash p) 1
-      grps       = queryWith getCitation p'
+      grps       = query getCitation p'
       result     = citeproc procOpts style r (setNearNote style $
                       map (map toCslCite) grps)
       cits_map   = M.fromList $ zip grps (citations result)
@@ -121,7 +122,7 @@ isTextualCitation (c:_) = citationMode c == AuthorInText
 isTextualCitation _     = False
 
 -- | Retrieve all citations from a 'Pandoc' docuument. To be used with
--- 'queryWith'.
+-- 'query'.
 getCitation :: Inline -> [[Citation]]
 getCitation i | Cite t _ <- i = [t]
               | otherwise     = []
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 09874299d..2b692dc3c 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE DeriveDataTypeable, CPP #-}
+{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses #-}
 {-
 Copyright (C) 2006-2013 John MacFarlane <jgm@berkeley.edu>
 
@@ -79,6 +79,7 @@ module Text.Pandoc.Shared (
                     ) where
 
 import Text.Pandoc.Definition
+import Text.Pandoc.Walk
 import Text.Pandoc.Generic
 import Text.Pandoc.Builder (Blocks, ToMetaValue(..))
 import qualified Text.Pandoc.Builder as B
@@ -105,6 +106,7 @@ import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
          renderOptions)
 import qualified Data.ByteString as BS
 import qualified Data.ByteString.Char8 as B8
+import Text.Pandoc.Compat.Monoid
 
 #ifdef EMBED_DATA_FILES
 import Text.Pandoc.Data (dataFiles)
@@ -383,7 +385,7 @@ consolidateInlines [] = []
 
 -- | Convert list of inlines to a string with formatting removed.
 stringify :: [Inline] -> String
-stringify = queryWith go
+stringify = query go
   where go :: Inline -> [Char]
         go Space = " "
         go (Str x) = x
@@ -433,6 +435,29 @@ data Element = Blk Block
              --    lvl  num attributes label    contents
              deriving (Eq, Read, Show, Typeable, Data)
 
+instance Walkable Inline Element where
+  walk f (Blk x) = Blk (walk f x)
+  walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
+  walkM f (Blk x) = Blk `fmap` walkM f x
+  walkM f (Sec lev nums attr ils elts) = do
+    ils' <- walkM f ils
+    elts' <- walkM f elts
+    return $ Sec lev nums attr ils' elts'
+  query f (Blk x) = query f x
+  query f (Sec _ _ _ ils elts) = query f ils <> query f elts
+
+instance Walkable Block Element where
+  walk f (Blk x) = Blk (walk f x)
+  walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
+  walkM f (Blk x) = Blk `fmap` walkM f x
+  walkM f (Sec lev nums attr ils elts) = do
+    ils' <- walkM f ils
+    elts' <- walkM f elts
+    return $ Sec lev nums attr ils' elts'
+  query f (Blk x) = query f x
+  query f (Sec _ _ _ ils elts) = query f ils <> query f elts
+
+
 -- | Convert Pandoc inline list to plain text identifier.  HTML
 -- identifiers must start with a letter, and may contain only
 -- letters, digits, and the characters _-.
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 40dc1deb5..0379f8b0a 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -33,7 +33,7 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Shared
 import Text.Pandoc.Writers.Shared
 import Text.Pandoc.Options
-import Text.Pandoc.Generic (queryWith)
+import Text.Pandoc.Walk (query)
 import Text.Printf ( printf )
 import Data.List ( intercalate, isPrefixOf )
 import Control.Monad.State
@@ -326,7 +326,7 @@ inlineToConTeXt (Note contents) = do
   contents' <- blockListToConTeXt contents
   let codeBlock x@(CodeBlock _ _) = [x]
       codeBlock _ = []
-  let codeBlocks = queryWith codeBlock contents
+  let codeBlocks = query codeBlock contents
   return $ if null codeBlocks
               then text "\\footnote{" <> nest 2 contents' <> char '}'
               else text "\\startbuffer " <> nest 2 contents' <>
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index d09ccc3b8..860ca8349 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -30,6 +30,7 @@ Conversion of 'Pandoc' format into LaTeX.
 -}
 module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where
 import Text.Pandoc.Definition
+import Text.Pandoc.Walk
 import Text.Pandoc.Generic
 import Text.Pandoc.Shared
 import Text.Pandoc.Writers.Shared
@@ -86,7 +87,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do
   -- see if there are internal links
   let isInternalLink (Link _ ('#':xs,_))  = [xs]
       isInternalLink _                    = []
-  modify $ \s -> s{ stInternalLinks = queryWith isInternalLink blocks }
+  modify $ \s -> s{ stInternalLinks = query isInternalLink blocks }
   let template = writerTemplate options
   -- set stBook depending on documentclass
   let bookClasses = ["memoir","book","report","scrreprt","scrbook"]
@@ -248,9 +249,9 @@ elementToBeamer slideLevel  (Sec lvl _num (ident,classes,kvs) tit elts)
       let hasCode (Code _ _) = [True]
           hasCode _          = []
       opts <- gets stOptions
-      let fragile = not $ null $ queryWith hasCodeBlock elts ++
+      let fragile = not $ null $ query hasCodeBlock elts ++
                                      if writerListings opts
-                                        then queryWith hasCode elts
+                                        then query hasCode elts
                                         else []
       let allowframebreaks = "allowframebreaks" `elem` classes
       let optionslist = ["fragile" | fragile] ++
-- 
cgit v1.2.3


From 02a125d0aa8becd258c99b27c5e30116f0cbacb4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 10 Aug 2013 18:45:00 -0700
Subject: Use walk, walkM in place of bottomUp, bottomUpM when possible.

They are significantly faster.
---
 src/Text/Pandoc/PDF.hs               |  4 ++--
 src/Text/Pandoc/Readers/LaTeX.hs     |  4 ++--
 src/Text/Pandoc/Readers/MediaWiki.hs |  4 ++--
 src/Text/Pandoc/Shared.hs            |  2 +-
 src/Text/Pandoc/Writers/Docx.hs      | 11 ++++++-----
 src/Text/Pandoc/Writers/EPUB.hs      |  6 +++---
 src/Text/Pandoc/Writers/FB2.hs       |  8 ++++++--
 src/Text/Pandoc/Writers/LaTeX.hs     |  3 +--
 src/Text/Pandoc/Writers/Markdown.hs  |  8 ++++----
 src/Text/Pandoc/Writers/ODT.hs       |  4 ++--
 src/Text/Pandoc/Writers/RTF.hs       |  4 ++--
 11 files changed, 31 insertions(+), 27 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index b030e2ca7..ce20ac1b4 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -44,7 +44,7 @@ import Data.List (isInfixOf)
 import qualified Data.ByteString.Base64 as B64
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.Definition
-import Text.Pandoc.Generic (bottomUpM)
+import Text.Pandoc.Walk (walkM)
 import Text.Pandoc.Shared (fetchItem, warn)
 import Text.Pandoc.Options (WriterOptions(..))
 import Text.Pandoc.MIME (extensionFromMimeType)
@@ -73,7 +73,7 @@ handleImages :: String        -- ^ source directory/base URL
              -> FilePath      -- ^ temp dir to store images
              -> Pandoc        -- ^ document
              -> IO Pandoc
-handleImages baseURL tmpdir = bottomUpM (handleImage' baseURL tmpdir)
+handleImages baseURL tmpdir = walkM (handleImage' baseURL tmpdir)
 
 handleImage' :: String
              -> FilePath
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index eb0baedda..71e1e0ac2 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -35,7 +35,7 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX,
                                  ) where
 
 import Text.Pandoc.Definition
-import Text.Pandoc.Generic
+import Text.Pandoc.Walk
 import Text.Pandoc.Shared
 import Text.Pandoc.Options
 import Text.Pandoc.Biblio (processBiblio)
@@ -815,7 +815,7 @@ keyvals :: LP [(String, String)]
 keyvals = try $ char '[' *> manyTill keyval (char ']')
 
 alltt :: String -> LP Blocks
-alltt t = bottomUp strToCode <$> parseFromString blocks
+alltt t = walk strToCode <$> parseFromString blocks
   (substitute " " "\\ " $ substitute "%" "\\%" $
    concat $ intersperse "\\\\\n" $ lines t)
   where strToCode (Str s) = Code nullAttr s
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 56049e035..8f1ff2776 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -42,7 +42,7 @@ import Text.Pandoc.Options
 import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag )
 import Text.Pandoc.XML ( fromEntities )
 import Text.Pandoc.Parsing hiding ( nested )
-import Text.Pandoc.Generic ( bottomUp )
+import Text.Pandoc.Walk ( walk )
 import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead )
 import Data.Monoid (mconcat, mempty)
 import Control.Applicative ((<$>), (<*), (*>), (<$))
@@ -342,7 +342,7 @@ preformatted = try $ do
       spacesStr _        = False
   if F.all spacesStr contents
      then return mempty
-     else return $ B.para $ bottomUp strToCode contents
+     else return $ B.para $ walk strToCode contents
 
 header :: MWParser Blocks
 header = try $ do
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 2b692dc3c..6fd78b188 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -518,7 +518,7 @@ isHeaderBlock _ = False
 
 -- | Shift header levels up or down.
 headerShift :: Int -> Pandoc -> Pandoc
-headerShift n = bottomUp shift
+headerShift n = walk shift
   where shift :: Block -> Block
         shift (Header level attr inner) = Header (level + n) attr inner
         shift x                         = x
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 2483e243f..aa618b2cc 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -45,6 +45,7 @@ import Text.Pandoc.Shared hiding (Element)
 import Text.Pandoc.Options
 import Text.Pandoc.Readers.TeXMath
 import Text.Pandoc.Highlighting ( highlight )
+import Text.Pandoc.Walk
 import Text.Highlighting.Kate.Types ()
 import Text.XML.Light
 import Text.TeXMath
@@ -108,7 +109,7 @@ writeDocx :: WriterOptions  -- ^ Writer options
           -> IO BL.ByteString
 writeDocx opts doc@(Pandoc meta _) = do
   let datadir = writerUserDataDir opts
-  let doc' = bottomUp (concatMap fixDisplayMath) doc
+  let doc' = walk fixDisplayMath doc
   refArchive <- liftM (toArchive . toLazy) $
        case writerReferenceDocx opts of
              Just f  -> B.readFile f
@@ -810,17 +811,17 @@ stripLeadingTrailingSpace = go . reverse . go . reverse
   where go (Space:xs) = xs
         go xs         = xs
 
-fixDisplayMath :: Block -> [Block]
+fixDisplayMath :: Block -> Block
 fixDisplayMath (Plain lst)
   | any isDisplayMath lst && not (all isDisplayMath lst) =
     -- chop into several paragraphs so each displaymath is its own
-    map (Plain . stripLeadingTrailingSpace) $
+    Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $
        groupBy (\x y -> (isDisplayMath x && isDisplayMath y) ||
                          not (isDisplayMath x || isDisplayMath y)) lst
 fixDisplayMath (Para lst)
   | any isDisplayMath lst && not (all isDisplayMath lst) =
     -- chop into several paragraphs so each displaymath is its own
-    map (Para . stripLeadingTrailingSpace) $
+    Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $
        groupBy (\x y -> (isDisplayMath x && isDisplayMath y) ||
                          not (isDisplayMath x || isDisplayMath y)) lst
-fixDisplayMath x = [x]
+fixDisplayMath x = x
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index ab14ff8a0..fa2b45036 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -48,7 +48,7 @@ import qualified Text.Pandoc.Shared as Shared
 import Text.Pandoc.Builder (fromList, setMeta)
 import Text.Pandoc.Options
 import Text.Pandoc.Definition
-import Text.Pandoc.Generic
+import Text.Pandoc.Walk
 import Control.Monad.State
 import Text.XML.Light hiding (ppTopElement)
 import Text.Pandoc.UUID
@@ -116,7 +116,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
 
   -- handle pictures
   picsRef <- newIORef []
-  Pandoc _ blocks <- bottomUpM
+  Pandoc _ blocks <- walkM
        (transformInline opts' sourceDir picsRef) doc
   pics <- readIORef picsRef
   let readPicEntry entries (oldsrc, newsrc) = do
@@ -520,7 +520,7 @@ correlateRefs chapterHeaderLevel bs =
 -- Replace internal link references using the table produced
 -- by correlateRefs.
 replaceRefs :: [(String,String)] -> [Block] -> [Block]
-replaceRefs refTable = bottomUp replaceOneRef
+replaceRefs refTable = walk replaceOneRef
   where replaceOneRef x@(Link lab ('#':xs,tit)) =
           case lookup xs refTable of
                 Just url -> Link lab (url,tit)
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 2576b2dc2..adbe948be 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -45,7 +45,7 @@ import qualified Text.XML.Light.Cursor as XC
 import Text.Pandoc.Definition
 import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def)
 import Text.Pandoc.Shared (orderedListMarkers)
-import Text.Pandoc.Generic (bottomUp)
+import Text.Pandoc.Walk
 
 -- | Data to be written at the end of the document:
 -- (foot)notes, URLs, references, images.
@@ -423,6 +423,10 @@ indent = indentBlock
   indentLines ins = let lns = split isLineBreak ins :: [[Inline]]
                     in  intercalate [LineBreak] $ map ((Str spacer):) lns
 
+capitalize :: Inline -> Inline
+capitalize (Str xs) = Str $ map toUpper xs
+capitalize x = x
+
 -- | Convert a Pandoc's Inline element to FictionBook XML representation.
 toXml :: Inline -> FBM [Content]
 toXml (Str s) = return [txt s]
@@ -432,7 +436,7 @@ toXml (Strong ss) = list `liftM` wrap "strong" ss
 toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss
 toXml (Superscript ss) = list `liftM` wrap "sup" ss
 toXml (Subscript ss) = list `liftM` wrap "sub" ss
-toXml (SmallCaps ss) = cMapM toXml $ bottomUp (map toUpper) ss
+toXml (SmallCaps ss) = cMapM toXml $ walk capitalize ss
 toXml (Quoted SingleQuote ss) = do  -- FIXME: should be language-specific
   inner <- cMapM toXml ss
   return $ [txt "‘"] ++ inner ++ [txt "’"]
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 860ca8349..7f9a99801 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -31,7 +31,6 @@ Conversion of 'Pandoc' format into LaTeX.
 module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where
 import Text.Pandoc.Definition
 import Text.Pandoc.Walk
-import Text.Pandoc.Generic
 import Text.Pandoc.Shared
 import Text.Pandoc.Writers.Shared
 import Text.Pandoc.Options
@@ -498,7 +497,7 @@ sectionHeader unnumbered ref level lst = do
   txt <- inlineListToLaTeX lst
   let noNote (Note _) = Str ""
       noNote x        = x
-  let lstNoNotes = bottomUp noNote lst
+  let lstNoNotes = walk noNote lst
   let star = if unnumbered then text "*" else empty
   -- footnotes in sections don't work unless you specify an optional
   -- argument:  \section[mysec]{mysec\footnote{blah}}
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index d195d8445..3d0ed8702 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -32,7 +32,7 @@ Markdown:  <http://daringfireball.net/projects/markdown/>
 -}
 module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where
 import Text.Pandoc.Definition
-import Text.Pandoc.Generic
+import Text.Pandoc.Walk
 import Text.Pandoc.Templates (renderTemplate')
 import Text.Pandoc.Shared
 import Text.Pandoc.Writers.Shared
@@ -82,7 +82,7 @@ writePlain opts document =
     where document' = plainify document
 
 plainify :: Pandoc -> Pandoc
-plainify = bottomUp go
+plainify = walk go
   where go :: Inline -> Inline
         go (Emph xs) = SmallCaps xs
         go (Strong xs) = SmallCaps xs
@@ -643,13 +643,13 @@ inlineToMarkdown opts (Strikeout lst) = do
               then "~~" <> contents <> "~~"
               else "<s>" <> contents <> "</s>"
 inlineToMarkdown opts (Superscript lst) = do
-  let lst' = bottomUp escapeSpaces lst
+  let lst' = walk escapeSpaces lst
   contents <- inlineListToMarkdown opts lst'
   return $ if isEnabled Ext_superscript opts
               then "^" <> contents <> "^"
               else "<sup>" <> contents <> "</sup>"
 inlineToMarkdown opts (Subscript lst) = do
-  let lst' = bottomUp escapeSpaces lst
+  let lst' = walk escapeSpaces lst
   contents <- inlineListToMarkdown opts lst'
   return $ if isEnabled Ext_subscript opts
               then "~" <> contents <> "~"
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 589010bb9..fb94d9ffb 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -39,7 +39,7 @@ import Text.Pandoc.Shared ( stringify, readDataFile, fetchItem, warn )
 import Text.Pandoc.ImageSize ( imageSize, sizeInPoints )
 import Text.Pandoc.MIME ( getMimeType )
 import Text.Pandoc.Definition
-import Text.Pandoc.Generic
+import Text.Pandoc.Walk
 import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument )
 import Control.Monad (liftM)
 import Text.Pandoc.XML
@@ -63,7 +63,7 @@ writeODT opts doc@(Pandoc meta _) = do
   -- handle pictures
   picEntriesRef <- newIORef ([] :: [Entry])
   let sourceDir = writerSourceDirectory opts
-  doc' <- bottomUpM (transformPic sourceDir picEntriesRef) doc
+  doc' <- walkM (transformPic sourceDir picEntriesRef) doc
   let newContents = writeOpenDocument opts{writerWrapText = False} doc'
   epochtime <- floor `fmap` getPOSIXTime
   let contentEntry = toEntry "content.xml" epochtime $ fromStringLazy newContents
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index 6d2b1229d..0e8ce2ece 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -34,7 +34,7 @@ import Text.Pandoc.Shared
 import Text.Pandoc.Writers.Shared
 import Text.Pandoc.Readers.TeXMath
 import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Generic (bottomUpM)
+import Text.Pandoc.Walk
 import Data.List ( isSuffixOf, intercalate )
 import Data.Char ( ord, chr, isDigit, toLower )
 import System.FilePath ( takeExtension )
@@ -70,7 +70,7 @@ rtfEmbedImage x = return x
 -- images embedded as encoded binary data.
 writeRTFWithEmbeddedImages :: WriterOptions -> Pandoc -> IO String
 writeRTFWithEmbeddedImages options doc =
-  writeRTF options `fmap` bottomUpM rtfEmbedImage doc
+  writeRTF options `fmap` walkM rtfEmbedImage doc
 
 -- | Convert Pandoc to a string in rich text format.
 writeRTF :: WriterOptions -> Pandoc -> String
-- 
cgit v1.2.3


From e279175ea517e2df65fe5d716bc02e383b04fc36 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 11 Aug 2013 15:58:09 -0700
Subject: Options: Changed `writerSourceDir` to `writerSourceURL` (now a
 Maybe).

Previously we used to store the directory of the first input file,
even if it was local, and used this as a base directory for
finding images in ODT, EPUB, Docx, and PDF.

This has been confusing to many users.  It seems better to look for
images relative to the current working directory, even if the first
file argument is in another directory.

writerSourceURL is set to 'Just url' when the first command-line
argument is an absolute URL.  (So, relative links will be resolved
in relation to the first page.)  Otherwise, 'Nothing'.

The ODT, EPUB, Docx, and PDF writers have been modified accordingly.

Note that this change may break some existing workflows.  If you
have been assuming that relative links will be interpreted relative
to the directory of the first file argument, you'll need to
make that the current directory before running pandoc.

Closes #942.
---
 src/Text/Pandoc/Options.hs      |  4 ++--
 src/Text/Pandoc/PDF.hs          |  6 +++---
 src/Text/Pandoc/Shared.hs       | 20 ++++++++++----------
 src/Text/Pandoc/Writers/Docx.hs |  3 +--
 src/Text/Pandoc/Writers/EPUB.hs | 21 +++++++--------------
 src/Text/Pandoc/Writers/ODT.hs  |  9 ++++-----
 6 files changed, 27 insertions(+), 36 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 61a85cf6e..c7c37d6b8 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -286,7 +286,7 @@ data WriterOptions = WriterOptions
   , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails
   , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML
                                      -- and for footnote marks in markdown
-  , writerSourceDirectory  :: FilePath -- ^ Directory path of 1st source file
+  , writerSourceURL        :: Maybe String  -- ^ Absolute URL + directory of 1st source file
   , writerUserDataDir      :: Maybe FilePath -- ^ Path of user data directory
   , writerCiteMethod       :: CiteMethod -- ^ How to print cites
   , writerBiblioFiles      :: [FilePath] -- ^ Biblio files to use for citations
@@ -329,7 +329,7 @@ instance Default WriterOptions where
                       , writerColumns          = 72
                       , writerEmailObfuscation = JavascriptObfuscation
                       , writerIdentifierPrefix = ""
-                      , writerSourceDirectory  = "."
+                      , writerSourceURL        = Nothing
                       , writerUserDataDir      = Nothing
                       , writerCiteMethod       = Citeproc
                       , writerBiblioFiles      = []
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index ce20ac1b4..ae611bc37 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -65,17 +65,17 @@ makePDF :: String              -- ^ pdf creator (pdflatex, lualatex, xelatex)
         -> Pandoc              -- ^ document
         -> IO (Either ByteString ByteString)
 makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do
-  doc' <- handleImages (writerSourceDirectory opts) tmpdir doc
+  doc' <- handleImages (writerSourceURL opts) tmpdir doc
   let source = writer opts doc'
   tex2pdf' tmpdir program source
 
-handleImages :: String        -- ^ source directory/base URL
+handleImages :: Maybe String  -- ^ source base URL
              -> FilePath      -- ^ temp dir to store images
              -> Pandoc        -- ^ document
              -> IO Pandoc
 handleImages baseURL tmpdir = walkM (handleImage' baseURL tmpdir)
 
-handleImage' :: String
+handleImage' :: Maybe String
              -> FilePath
              -> Inline
              -> IO Inline
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 6fd78b188..d670a35bc 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -612,18 +612,18 @@ readDataFileUTF8 userDir fname =
 
 -- | Fetch an image or other item from the local filesystem or the net.
 -- Returns raw content and maybe mime type.
-fetchItem :: String -> String
+fetchItem :: Maybe String -> String
           -> IO (Either E.SomeException (BS.ByteString, Maybe String))
-fetchItem sourceDir s =
-  case s of
-    _ | isAbsoluteURI s         -> openURL s
-      | isAbsoluteURI sourceDir -> openURL $ sourceDir ++ "/" ++ s
-      | otherwise               -> E.try $ do
+fetchItem sourceURL s
+  | isAbsoluteURI s = openURL s
+  | otherwise       = case sourceURL of
+                           Just u  -> openURL (u ++ "/" ++ s)
+                           Nothing -> E.try readLocalFile
+  where readLocalFile = do
           let mime = case takeExtension s of
-                        ".gz" -> getMimeType $ dropExtension s
-                        x     -> getMimeType x
-          let f = sourceDir </> s
-          cont <- BS.readFile f
+                          ".gz" -> getMimeType $ dropExtension s
+                          x     -> getMimeType x
+          cont <- BS.readFile s
           return (cont, mime)
 
 -- | Read from a URL and return raw data and maybe mime type.
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index aa618b2cc..c8673ae48 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -728,8 +728,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
   case M.lookup src imgs of
     Just (_,_,_,elt,_) -> return [elt]
     Nothing -> do
-      let sourceDir = writerSourceDirectory opts
-      res <- liftIO $ fetchItem sourceDir src
+      res <- liftIO $ fetchItem (writerSourceURL opts) src
       case res of
         Left (_ :: E.SomeException) -> do
           liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..."
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index fa2b45036..ac0e7610c 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -55,7 +55,7 @@ import Text.Pandoc.UUID
 import Text.Pandoc.Writers.HTML
 import Text.Pandoc.Writers.Markdown ( writePlain )
 import Data.Char ( toLower )
-import Network.URI ( isAbsoluteURI, unEscapeString )
+import Network.URI ( unEscapeString )
 import Text.Pandoc.MIME (getMimeType)
 #if MIN_VERSION_base(4,6,0)
 #else
@@ -93,7 +93,6 @@ writeEPUB opts doc@(Pandoc meta _) = do
                           then MathML Nothing
                           else writerHTMLMathMethod opts
                   , writerWrapText = False }
-  let sourceDir = writerSourceDirectory opts'
   let mbCoverImage = lookup "epub-cover-image" vars
 
   -- cover page
@@ -117,10 +116,10 @@ writeEPUB opts doc@(Pandoc meta _) = do
   -- handle pictures
   picsRef <- newIORef []
   Pandoc _ blocks <- walkM
-       (transformInline opts' sourceDir picsRef) doc
+       (transformInline opts' picsRef) doc
   pics <- readIORef picsRef
   let readPicEntry entries (oldsrc, newsrc) = do
-        res <- fetchItem sourceDir oldsrc
+        res <- fetchItem (writerSourceURL opts') oldsrc
         case res of
              Left _        -> do
               warn $ "Could not find image `" ++ oldsrc ++ "', skipping..."
@@ -414,19 +413,13 @@ showDateTimeISO8601 :: UTCTime -> String
 showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
 
 transformInline  :: WriterOptions
-                 -> FilePath
                  -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images
                  -> Inline
                  -> IO Inline
-transformInline opts sourceDir picsRef (Image lab (src,tit))
-  | isAbsoluteURI src = do
-    raw <- makeSelfContained Nothing
-             $ writeHtmlInline opts (Image lab (src,tit))
-    return $ RawInline (Format "html") raw
-  | otherwise = do
+transformInline opts picsRef (Image lab (src,tit)) = do
     let src' = unEscapeString src
     pics <- readIORef picsRef
-    let oldsrc = sourceDir </> src'
+    let oldsrc = maybe src' (</> src) $ writerSourceURL opts
     let ext = takeExtension src'
     newsrc <- case lookup oldsrc pics of
                     Just n  -> return n
@@ -435,11 +428,11 @@ transformInline opts sourceDir picsRef (Image lab (src,tit))
                           modifyIORef picsRef ( (oldsrc, new): )
                           return new
     return $ Image lab (newsrc, tit)
-transformInline opts _ _ (x@(Math _ _))
+transformInline opts _ (x@(Math _ _))
   | WebTeX _ <- writerHTMLMathMethod opts = do
     raw <- makeSelfContained Nothing $ writeHtmlInline opts x
     return $ RawInline (Format "html") raw
-transformInline _ _ _ x = return x
+transformInline _ _ x = return x
 
 writeHtmlInline :: WriterOptions -> Inline -> String
 writeHtmlInline opts z = trimr $
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index fb94d9ffb..751a323f5 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -62,8 +62,7 @@ writeODT opts doc@(Pandoc meta _) = do
                            readDataFile datadir "reference.odt"
   -- handle pictures
   picEntriesRef <- newIORef ([] :: [Entry])
-  let sourceDir = writerSourceDirectory opts
-  doc' <- walkM (transformPic sourceDir picEntriesRef) doc
+  doc' <- walkM (transformPic opts picEntriesRef) doc
   let newContents = writeOpenDocument opts{writerWrapText = False} doc'
   epochtime <- floor `fmap` getPOSIXTime
   let contentEntry = toEntry "content.xml" epochtime $ fromStringLazy newContents
@@ -111,9 +110,9 @@ writeODT opts doc@(Pandoc meta _) = do
   let archive'' = addEntryToArchive metaEntry archive'
   return $ fromArchive archive''
 
-transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline
-transformPic sourceDir entriesRef (Image lab (src,_)) = do
-  res <- fetchItem sourceDir src
+transformPic :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline
+transformPic opts entriesRef (Image lab (src,_)) = do
+  res <- fetchItem (writerSourceURL opts) src
   case res of
      Left (_ :: E.SomeException) -> do
        warn $ "Could not find image `" ++ src ++ "', skipping..."
-- 
cgit v1.2.3


From 7b975c2bcc32e5ddd96338afdb32a1ceacdc0980 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 11 Aug 2013 16:16:24 -0700
Subject: PDF:  Add suggestion to use --latex-engine=xelatex on encoding error.

---
 src/Text/Pandoc/PDF.hs | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index ae611bc37..a445e2991 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -109,8 +109,14 @@ tex2pdf' tmpDir program source = do
   (exit, log', mbPdf) <- runTeXProgram program numruns tmpDir source
   let msg = "Error producing PDF from TeX source."
   case (exit, mbPdf) of
-       (ExitFailure _, _)      -> return $ Left $
-                                     msg <> "\n" <> extractMsg log'
+       (ExitFailure _, _)      -> do
+          let logmsg = extractMsg log'
+          let extramsg =
+                case logmsg of
+                     x | "! Package inputenc Error" `BC.isPrefixOf` x ->
+                           "\nTry running pandoc with --latex-engine=xelatex."
+                     _ -> ""
+          return $ Left $ msg <> "\n" <> extractMsg log' <> extramsg
        (ExitSuccess, Nothing)  -> return $ Left msg
        (ExitSuccess, Just pdf) -> return $ Right pdf
 
-- 
cgit v1.2.3


From eb0c0b86ed518982eb5d3336e73ff5cb1d59d87c Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 11 Aug 2013 17:13:46 -0700
Subject: ODT/OpenDocument writer:  Minor changes for ODF 1.2 conformance.

See #939.  We leave the nonconforming contextual-spacing attribute,
which is provided by LibreOffice itself and seems to be supported.
---
 src/Text/Pandoc/Writers/ODT.hs          | 20 ++++++++++++++------
 src/Text/Pandoc/Writers/OpenDocument.hs |  3 ++-
 2 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 751a323f5..cc0a06243 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -65,26 +65,30 @@ writeODT opts doc@(Pandoc meta _) = do
   doc' <- walkM (transformPic opts picEntriesRef) doc
   let newContents = writeOpenDocument opts{writerWrapText = False} doc'
   epochtime <- floor `fmap` getPOSIXTime
-  let contentEntry = toEntry "content.xml" epochtime $ fromStringLazy newContents
+  let contentEntry = toEntry "content.xml" epochtime
+                     $ fromStringLazy newContents
   picEntries <- readIORef picEntriesRef
-  let archive = foldr addEntryToArchive refArchive $ contentEntry : picEntries
+  let archive = foldr addEntryToArchive refArchive
+                $ contentEntry : picEntries
   -- construct META-INF/manifest.xml based on archive
   let toFileEntry fp = case getMimeType fp of
                         Nothing  -> empty
                         Just m   -> selfClosingTag "manifest:file-entry"
                                      [("manifest:media-type", m)
                                      ,("manifest:full-path", fp)
+                                     ,("manifest:version", "1.2")
                                      ]
-  let files = [ ent | ent <- filesInArchive archive, not ("META-INF" `isPrefixOf` ent) ]
+  let files = [ ent | ent <- filesInArchive archive,
+                             not ("META-INF" `isPrefixOf` ent) ]
   let manifestEntry = toEntry "META-INF/manifest.xml" epochtime
         $ fromStringLazy $ render Nothing
         $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
         $$
          ( inTags True "manifest:manifest"
-            [("xmlns:manifest","urn:oasis:names:tc:opendocument:xmlns:manifest:1.0")]
+            [("xmlns:manifest","urn:oasis:names:tc:opendocument:xmlns:manifest:1.0")
+            ,("manifest:version","1.2")]
             $ ( selfClosingTag "manifest:file-entry"
                  [("manifest:media-type","application/vnd.oasis.opendocument.text")
-                 ,("manifest:version","1.2")
                  ,("manifest:full-path","/")]
                 $$ vcat ( map toFileEntry $ files )
               )
@@ -107,7 +111,11 @@ writeODT opts doc@(Pandoc meta _) = do
                   )
              )
         )
-  let archive'' = addEntryToArchive metaEntry archive'
+  -- make sure mimetype is first
+  let mimetypeEntry = toEntry "mimetype" epochtime
+                      $ fromStringLazy "application/vnd.oasis.opendocument.text"
+  let archive'' = addEntryToArchive mimetypeEntry
+                  $ addEntryToArchive metaEntry archive'
   return $ fromArchive archive''
 
 transformPic :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 05c576c20..3ec5c2073 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -461,7 +461,8 @@ tableStyle :: Int -> [(Char,Double)] -> Doc
 tableStyle num wcs =
     let tableId        = "Table" ++ show (num + 1)
         table          = inTags True "style:style"
-                         [("style:name", tableId)] $
+                         [("style:name", tableId)
+                         ,("style:family", "table")] $
                          selfClosingTag "style:table-properties"
                          [("table:align"    , "center")]
         colStyle (c,0) = selfClosingTag "style:style"
-- 
cgit v1.2.3


From 3ebdc5b5f0f5bc88f727a36268d55921672899c0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 12 Aug 2013 16:21:24 -0700
Subject: Text.Pandoc.Compat.Monoid: Small improvements to the (<>) definition.

---
 src/Text/Pandoc/Compat/Monoid.hs | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Compat/Monoid.hs b/src/Text/Pandoc/Compat/Monoid.hs
index 80ffcbbd6..cb7ea2527 100644
--- a/src/Text/Pandoc/Compat/Monoid.hs
+++ b/src/Text/Pandoc/Compat/Monoid.hs
@@ -11,6 +11,10 @@ import Data.Monoid (mappend, Monoid(..))
 
 #if MIN_VERSION_base(4,5,0)
 #else
+infixr 6 <>
+
+-- | An infix synonym for 'mappend'.
 (<>) :: Monoid m => m -> m -> m
 (<>) = mappend
+{-# INLINE (<>) #-}
 #endif
-- 
cgit v1.2.3


From 3e8bd8aa15a57c3dc87772049aabedeb1e0c7582 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Wed, 14 Aug 2013 23:24:45 -0700
Subject: Updated for removed unMeta, unFormat in pandoc-types.

---
 src/Text/Pandoc/Writers/RST.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 5fbbb6afc..557658bc8 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -176,10 +176,10 @@ blockToRST (Para inlines)
   | otherwise = do
       contents <- inlineListToRST inlines
       return $ contents <> blankline
-blockToRST (RawBlock f str)
+blockToRST (RawBlock f@(Format f') str)
   | f == "rst" = return $ text str
   | otherwise  = return $ blankline <> ".. raw:: " <>
-                    text (map toLower $ unFormat f) $+$
+                    text (map toLower f') $+$
                     (nest 3 $ text str) $$ blankline
 blockToRST HorizontalRule =
   return $ blankline $$ "--------------" $$ blankline
-- 
cgit v1.2.3


From c45bd6d468b272a2737dcc2a3c9f4afaebf37494 Mon Sep 17 00:00:00 2001
From: Scott Morrison <scott@tqft.net>
Date: Fri, 16 Aug 2013 10:03:54 +1000
Subject: adding support for breve accents via \u{} while reading LaTeX

---
 src/Text/Pandoc/Readers/LaTeX.hs | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 71e1e0ac2..7c370dd47 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -416,6 +416,7 @@ inlineCommands = M.fromList $
   , ("=", option (str "=") $ try $ tok >>= accent macron)
   , ("c", option (str "c") $ try $ tok >>= accent cedilla)
   , ("v", option (str "v") $ try $ tok >>= accent hacek)
+  , ("u", option (str "u") $ try $ tok >>= accent breve)
   , ("i", lit "i")
   , ("\\", linebreak <$ (optional (bracketed inline) *> optional sp))
   , (",", pure mempty)
@@ -708,6 +709,21 @@ hacek 'Z' = 'Ž'
 hacek 'z' = 'ž'
 hacek c   = c
 
+breve :: Char -> Char
+breve 'A' = 'Ă'
+breve 'a' = 'ă'
+breve 'E' = 'Ĕ'
+breve 'e' = 'ĕ'
+breve 'G' = 'Ğ'
+breve 'g' = 'ğ'
+breve 'I' = 'Ĭ'
+breve 'i' = 'ĭ'
+breve 'O' = 'Ŏ'
+breve 'o' = 'ŏ'
+breve 'U' = 'Ŭ'
+breve 'u' = 'ŭ'
+breve c   = c
+
 tok :: LP Inlines
 tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar)
 
-- 
cgit v1.2.3


From 172f020bc5b59950afd29411b7d80200d0b38e83 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 15 Aug 2013 17:21:56 -0700
Subject: Shared:  Better error message when default data file not found.

Listing the full path can confuse people who are using
`--self-contained`:  they might have intended the file to be
found locally.  So now we just list the data file name.
---
 src/Text/Pandoc/Shared.hs | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index d670a35bc..72b467da5 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -583,8 +583,7 @@ readDefaultDataFile :: FilePath -> IO BS.ByteString
 readDefaultDataFile fname =
 #ifdef EMBED_DATA_FILES
   case lookup (makeCanonical fname) dataFiles of
-    Nothing       -> ioError $ userError
-                             $ "Data file `" ++ fname ++ "' does not exist"
+    Nothing       -> err 97 $ "Could not find data file " ++ fname
     Just contents -> return contents
   where makeCanonical = joinPath . transformPathParts . splitDirectories
         transformPathParts = reverse . foldl go []
@@ -592,7 +591,12 @@ readDefaultDataFile fname =
         go (_:as) ".." = as
         go as     x    = x : as
 #else
-  getDataFileName ("data" </> fname) >>= BS.readFile
+  getDataFileName ("data" </> fname) >>= checkExistence >>= BS.readFile
+   where checkExistence fn = do
+           exists <- doesFileExist fn
+           if exists
+              then return fn
+              else err 97 ("Could not find data file " ++ fname)
 #endif
 
 -- | Read file from specified user data directory or, if not found there, from
-- 
cgit v1.2.3


From d3ebca6f553efa37cb6795dbd72e84051edea356 Mon Sep 17 00:00:00 2001
From: Scott Morrison <scott@tqft.net>
Date: Fri, 16 Aug 2013 14:48:24 +1000
Subject: LaTeX reader missing \oe and \OE characters

---
 src/Text/Pandoc/Readers/LaTeX.hs | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 7c370dd47..414e50fc8 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -402,6 +402,8 @@ inlineCommands = M.fromList $
   , ("l", lit "ł")
   , ("ae", lit "æ")
   , ("AE", lit "Æ")
+  , ("oe", lit "œ")
+  , ("OE", lit "Œ")
   , ("pounds", lit "£")
   , ("euro", lit "€")
   , ("copyright", lit "©")
-- 
cgit v1.2.3


From ab8c0dcd410282baaa9429f755ad55e6d01a2466 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Fri, 16 Aug 2013 12:40:38 -0700
Subject: LaTeX reader:  parse label after section command and set id.

Closes #951.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 414e50fc8..50a95c361 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -315,12 +315,14 @@ authors = try $ do
   addMeta "authors" (map trimInlines auths)
 
 section :: Attr -> Int -> LP Blocks
-section attr lvl = do
+section (ident, classes, kvs) lvl = do
   hasChapters <- stateHasChapters `fmap` getState
   let lvl' = if hasChapters then lvl + 1 else lvl
   skipopts
   contents <- grouped inline
-  return $ headerWith attr lvl' contents
+  lab <- option ident $ try $ spaces >> controlSeq "label" >>
+           spaces >> braced
+  return $ headerWith (lab, classes, kvs) lvl' contents
 
 inlineCommand :: LP Inlines
 inlineCommand = try $ do
-- 
cgit v1.2.3


From 441a7aebf8c141612203d1cab0032f8c55e536ed Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Fri, 16 Aug 2013 13:02:55 -0700
Subject: LaTeX writer:  Avoid problem with footnotes in unnumbered headers.

Closes #940.
Added test case.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 7f9a99801..98553c421 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -498,14 +498,15 @@ sectionHeader unnumbered ref level lst = do
   let noNote (Note _) = Str ""
       noNote x        = x
   let lstNoNotes = walk noNote lst
+  txtNoNotes <- inlineListToLaTeX lstNoNotes
   let star = if unnumbered then text "*" else empty
-  -- footnotes in sections don't work unless you specify an optional
-  -- argument:  \section[mysec]{mysec\footnote{blah}}
-  optional <- if lstNoNotes == lst
+  -- footnotes in sections don't work (except for starred variants)
+  -- unless you specify an optional argument:
+  -- \section[mysec]{mysec\footnote{blah}}
+  optional <- if unnumbered || lstNoNotes == lst
                  then return empty
                  else do
-                   res <- inlineListToLaTeX lstNoNotes
-                   return $ char '[' <> res <> char ']'
+                   return $ brackets txtNoNotes
   let stuffing = star <> optional <> braces txt
   book <- gets stBook
   opts <- gets stOptions
@@ -536,7 +537,7 @@ sectionHeader unnumbered ref level lst = do
                    $$ if unnumbered
                          then "\\addcontentsline{toc}" <>
                                 braces (text sectionType) <>
-                                braces txt
+                                braces txtNoNotes
                          else empty
 
 -- | Convert list of inline elements to LaTeX.
-- 
cgit v1.2.3


From 19591df739a6c50a3d0a9af55ba90b883264b21d Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Fri, 16 Aug 2013 13:05:06 -0700
Subject: Shared:  stringify now skips over footnotes.

That is usually the right thing to do for section labels, etc.
---
 src/Text/Pandoc/Shared.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 72b467da5..bf92601ef 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -391,6 +391,7 @@ stringify = query go
         go (Str x) = x
         go (Code _ x) = x
         go (Math _ x) = x
+        go (Note _)  = ""
         go LineBreak = " "
         go _ = ""
 
-- 
cgit v1.2.3


From 399c75da448dc0f90855b43ee44e9d7cf8009f1c Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Fri, 16 Aug 2013 13:08:11 -0700
Subject: Revert "Shared:  stringify now skips over footnotes."

This reverts commit 19591df739a6c50a3d0a9af55ba90b883264b21d.

This change didn't work; query has already written the contents
of the note by the time it gets to Note.
---
 src/Text/Pandoc/Shared.hs | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index bf92601ef..72b467da5 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -391,7 +391,6 @@ stringify = query go
         go (Str x) = x
         go (Code _ x) = x
         go (Math _ x) = x
-        go (Note _)  = ""
         go LineBreak = " "
         go _ = ""
 
-- 
cgit v1.2.3


From 89a7703260703599a033be16e1581a0494326c2b Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Fri, 16 Aug 2013 13:22:27 -0700
Subject: Shared:  Changed stringify so it ignores notes.

Also documented this in README.
---
 src/Text/Pandoc/Shared.hs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 72b467da5..eef150351 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -384,8 +384,10 @@ consolidateInlines (x : xs) = x : consolidateInlines xs
 consolidateInlines [] = []
 
 -- | Convert list of inlines to a string with formatting removed.
+-- Footnotes are skipped (since we don't want their contents in link
+-- labels).
 stringify :: [Inline] -> String
-stringify = query go
+stringify = query go . walk deNote
   where go :: Inline -> [Char]
         go Space = " "
         go (Str x) = x
@@ -393,6 +395,8 @@ stringify = query go
         go (Math _ x) = x
         go LineBreak = " "
         go _ = ""
+        deNote (Note _) = Str ""
+        deNote x = x
 
 -- | Change final list item from @Para@ to @Plain@ if the list contains
 -- no other @Para@ blocks.
-- 
cgit v1.2.3


From 5a5a2522163d73c3b91db2cb2b73e697a5dcfb23 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 17 Aug 2013 10:29:12 -0700
Subject: Markdown reader:  Don't generate blank title, author, date elements.

---
 src/Text/Pandoc/Readers/Markdown.hs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 251554de1..906dd10f2 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -221,9 +221,9 @@ pandocTitleBlock = try $ do
     title' <- title
     author' <- author
     date' <- date
-    return $ B.setMeta "title" title'
-           . B.setMeta "author" author'
-           . B.setMeta "date" date'
+    return $ if B.isNull title' then id else B.setMeta "title" title'
+           . if null author' then id else B.setMeta "author" author'
+           . if B.isNull date' then id else B.setMeta "date" date'
 
 yamlTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc))
 yamlTitleBlock = try $ do
-- 
cgit v1.2.3


From 3117c668a7d245689bfc291d5d9a64cb3178b52c Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 15 Aug 2013 22:39:14 -0700
Subject: Markdown reader: Parse span, div tags as Span, Div elements.

Assuming markdown_in_html extension is set.
---
 src/Text/Pandoc/Readers/Markdown.hs | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 906dd10f2..535fc02c6 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -446,6 +446,7 @@ block = choice [ mempty <$ blanklines
                , header
                , lhsCodeBlock
                , rawTeXBlock
+               , divHtml
                , htmlBlock
                , table
                , lineBlock
@@ -1355,6 +1356,7 @@ inline = choice [ whitespace
                 , superscript
                 , inlineNote  -- after superscript because of ^[link](/foo)^
                 , autoLink
+                , spanHtml
                 , rawHtmlInline
                 , escapedChar
                 , rawLaTeXInline'
@@ -1755,6 +1757,26 @@ inBrackets parser = do
   char ']'
   return $ "[" ++ contents ++ "]"
 
+spanHtml :: MarkdownParser (F Inlines)
+spanHtml = try $ do
+  guardEnabled Ext_markdown_in_html_blocks
+  (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
+  contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span"))
+  let ident = maybe "" id $ lookup "id" attrs
+  let classes = maybe [] words $ lookup "class" attrs
+  let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+  return $ B.spanWith (ident, classes, keyvals) <$> contents
+
+divHtml :: MarkdownParser (F Blocks)
+divHtml = try $ do
+  guardEnabled Ext_markdown_in_html_blocks
+  (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "div" [])
+  contents <- mconcat <$> manyTill block (htmlTag (~== TagClose "div"))
+  let ident = maybe "" id $ lookup "id" attrs
+  let classes = maybe [] words $ lookup "class" attrs
+  let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+  return $ B.divWith (ident, classes, keyvals) <$> contents
+
 rawHtmlInline :: MarkdownParser (F Inlines)
 rawHtmlInline = do
   guardEnabled Ext_raw_html
-- 
cgit v1.2.3


From 8d441af3da4709fd48a44e860d5a0cd4d35792af Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 18 Aug 2013 14:36:40 -0700
Subject: Adjusted writers and tests for change in parsing of div/span.

Textile, MediaWiki, Markdown, Org, RST will emit raw HTML div tags for divs.
Otherwise Div and Span are "transparent" block containers.
---
 src/Text/Pandoc/Writers/Docbook.hs   |  2 +-
 src/Text/Pandoc/Writers/Markdown.hs  | 17 ++++++++++++-----
 src/Text/Pandoc/Writers/MediaWiki.hs | 12 ++++++++----
 src/Text/Pandoc/Writers/Org.hs       |  9 ++++++++-
 src/Text/Pandoc/Writers/RST.hs       |  6 +++++-
 src/Text/Pandoc/Writers/Shared.hs    | 18 ++++++++++++++++++
 src/Text/Pandoc/Writers/Textile.hs   |  8 ++++++--
 7 files changed, 58 insertions(+), 14 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 3d150d19b..7c03c07dc 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -149,7 +149,7 @@ listItemToDocbook opts item =
 -- | Convert a Pandoc block element to Docbook.
 blockToDocbook :: WriterOptions -> Block -> Doc
 blockToDocbook _ Null = empty
-blockToDocbook opts (Div _ bs) = blocksToDocbook opts bs
+blockToDocbook opts (Div _ bs) = blocksToDocbook opts $ map plainToPara bs
 blockToDocbook _ (Header _ _ _) = empty -- should not occur after hierarchicalize
 blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst
 -- title beginning with fig: indicates that the image is a figure
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 3d0ed8702..623c445df 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-}
 {-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2013 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.Markdown
-   Copyright   : Copyright (C) 2006-2010 John MacFarlane
+   Copyright   : Copyright (C) 2006-2013 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -301,7 +301,13 @@ blockToMarkdown :: WriterOptions -- ^ Options
                 -> Block         -- ^ Block element
                 -> State WriterState Doc
 blockToMarkdown _ Null = return empty
-blockToMarkdown opts (Div _ bs) = blockListToMarkdown opts bs
+blockToMarkdown opts (Div attrs ils) = do
+  isPlain <- gets stPlain
+  contents <- blockListToMarkdown opts ils
+  return $ if isPlain
+              then contents <> blankline
+              else tagWithAttrs "div" attrs <> blankline <>
+                     contents <> blankline <> "</div>" <> blankline
 blockToMarkdown opts (Plain inlines) = do
   contents <- inlineListToMarkdown opts inlines
   return $ contents <> cr
@@ -629,8 +635,9 @@ escapeSpaces x = x
 
 -- | Convert Pandoc inline element to markdown.
 inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
-inlineToMarkdown opts (Span _ ils) =
-  inlineListToMarkdown opts ils
+inlineToMarkdown opts (Span attrs ils) = do
+  contents <- inlineListToMarkdown opts ils
+  return $ tagWithAttrs "span" attrs <> contents <> text "</span>"
 inlineToMarkdown opts (Emph lst) = do
   contents <- inlineListToMarkdown opts lst
   return $ "*" <> contents <> "*"
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index 4ffba1100..61741a61e 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -34,6 +34,7 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Shared
 import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Pretty (render)
 import Text.Pandoc.Templates (renderTemplate')
 import Text.Pandoc.XML ( escapeStringForXML )
 import Data.List ( intersect, intercalate, intersperse )
@@ -83,8 +84,10 @@ blockToMediaWiki :: WriterOptions -- ^ Options
 
 blockToMediaWiki _ Null = return ""
 
-blockToMediaWiki opts (Div _ bs) =
-  blockListToMediaWiki opts bs
+blockToMediaWiki opts (Div attrs bs) = do
+  contents <- blockListToMediaWiki opts bs
+  return $ render Nothing (tagWithAttrs "div" attrs) ++ "\n\n" ++
+                     contents ++ "\n\n" ++ "</div>"
 
 blockToMediaWiki opts (Plain inlines) =
   inlineListToMediaWiki opts inlines
@@ -332,8 +335,9 @@ inlineListToMediaWiki opts lst =
 -- | Convert Pandoc inline element to MediaWiki.
 inlineToMediaWiki :: WriterOptions -> Inline -> State WriterState String
 
-inlineToMediaWiki opts (Span _ ils) =
-  inlineListToMediaWiki opts ils
+inlineToMediaWiki opts (Span attrs ils) = do
+  contents <- inlineListToMediaWiki opts ils
+  return $ render Nothing (tagWithAttrs "span" attrs) ++ contents ++ "</span>"
 
 inlineToMediaWiki opts (Emph lst) = do
   contents <- inlineListToMediaWiki opts lst
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 34ae532b0..51083f52b 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -106,7 +106,14 @@ escapeString = escapeStringUsing $
 blockToOrg :: Block         -- ^ Block element
            -> State WriterState Doc
 blockToOrg Null = return empty
-blockToOrg (Div _ bs) = blockListToOrg bs
+blockToOrg (Div attrs bs) = do
+  contents <- blockListToOrg bs
+  let startTag = tagWithAttrs "div" attrs
+  let endTag = text "</div>"
+  return $ blankline $$ "#+BEGIN_HTML" $$
+           nest 2 startTag $$ "#+END_HTML" $$ blankline $$
+           contents $$ blankline $$ "#+BEGIN_HTML" $$
+           nest 2 endTag $$ "#+END_HTML" $$ blankline
 blockToOrg (Plain inlines) = inlineListToOrg inlines
 -- title beginning with fig: indicates that the image is a figure
 blockToOrg (Para [Image txt (src,'f':'i':'g':':':tit)]) = do
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 557658bc8..70c6b4421 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -161,7 +161,11 @@ bordered contents c =
 blockToRST :: Block         -- ^ Block element
            -> State WriterState Doc
 blockToRST Null = return empty
-blockToRST (Div _ bs) = blockListToRST bs
+blockToRST (Div attr bs) = do
+  contents <- blockListToRST bs
+  let startTag = ".. raw:: html" $+$ nest 3 (tagWithAttrs "div" attr)
+  let endTag = ".. raw:: html" $+$ nest 3 "</div>"
+  return $ blankline <> startTag $+$ contents $+$ endTag $$ blankline
 blockToRST (Plain inlines) = inlineListToRST inlines
 -- title beginning with fig: indicates that the image is a figure
 blockToRST (Para [Image txt (src,'f':'i':'g':':':tit)]) = do
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index e6ec853f8..89923822c 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {-
 Copyright (C) 2013 John MacFarlane <jgm@berkeley.edu>
 
@@ -32,9 +33,12 @@ module Text.Pandoc.Writers.Shared (
                      , getField
                      , setField
                      , defField
+                     , tagWithAttrs
                      )
 where
 import Text.Pandoc.Definition
+import Text.Pandoc.Pretty
+import Text.Pandoc.XML (escapeStringForXML)
 import Control.Monad (liftM)
 import Text.Pandoc.Options (WriterOptions(..))
 import qualified Data.HashMap.Strict as H
@@ -120,3 +124,17 @@ defField field val (Object hashmap) =
     where f _newval oldval = oldval
 defField _ _  x = x
 
+-- Produce an HTML tag with the given pandoc attributes.
+tagWithAttrs :: String -> Attr -> Doc
+tagWithAttrs tag (ident,classes,kvs) = hsep
+  ["<" <> text tag
+  ,if null ident
+      then empty
+      else "id=" <> doubleQuotes (text ident)
+  ,if null classes
+      then empty
+      else "class=" <> doubleQuotes (text (unwords classes))
+  ]
+  <> hsep (map (\(k,v) -> text k <> "=" <>
+                doubleQuotes (text (escapeStringForXML v))) kvs)
+  <> ">"
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
index 27e8b60ec..7c102cc86 100644
--- a/src/Text/Pandoc/Writers/Textile.hs
+++ b/src/Text/Pandoc/Writers/Textile.hs
@@ -33,6 +33,7 @@ module Text.Pandoc.Writers.Textile ( writeTextile ) where
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Shared
+import Text.Pandoc.Pretty (render)
 import Text.Pandoc.Writers.Shared
 import Text.Pandoc.Templates (renderTemplate')
 import Text.Pandoc.XML ( escapeStringForXML )
@@ -101,8 +102,11 @@ blockToTextile :: WriterOptions -- ^ Options
 
 blockToTextile _ Null = return ""
 
-blockToTextile opts (Div _ bs) =
-  blockListToTextile opts bs
+blockToTextile opts (Div attr bs) = do
+  let startTag = render Nothing $ tagWithAttrs "div" attr
+  let endTag = "</div>"
+  contents <- blockListToTextile opts bs
+  return $ startTag ++ "\n\n" ++ contents ++ "\n\n" ++ endTag ++ "\n"
 
 blockToTextile opts (Plain inlines) =
   inlineListToTextile opts inlines
-- 
cgit v1.2.3


From af786829a0d64e373218f4c84c105796e9663b6f Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 18 Aug 2013 16:22:56 -0700
Subject: Parsing: Added stateMeta' to ParserState.

---
 src/Text/Pandoc/Parsing.hs | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 2f42aba41..c16d5bb1d 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -801,6 +801,7 @@ data ParserState = ParserState
       stateNotes           :: NoteTable,     -- ^ List of notes (raw bodies)
       stateNotes'          :: NoteTable',    -- ^ List of notes (parsed bodies)
       stateMeta            :: Meta,          -- ^ Document metadata
+      stateMeta'           :: F Meta,        -- ^ Document metadata
       stateHeaderTable     :: [HeaderType],  -- ^ Ordered list of header types used
       stateHeaders         :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links)
       stateIdentifiers     :: [String],      -- ^ List of header identifiers used
@@ -834,6 +835,7 @@ defaultParserState =
                   stateNotes           = [],
                   stateNotes'          = [],
                   stateMeta            = nullMeta,
+                  stateMeta'           = return nullMeta,
                   stateHeaderTable     = [],
                   stateHeaders         = M.empty,
                   stateIdentifiers     = [],
-- 
cgit v1.2.3


From 0e2605ffdf69b7a6a7c942a986dec4283a886e82 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 18 Aug 2013 18:39:04 -0700
Subject: Allow multiple YAML metadata blocks in document.

---
 src/Text/Pandoc/Readers/Markdown.hs | 107 +++++++++++++++++++-----------------
 1 file changed, 56 insertions(+), 51 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 535fc02c6..a653c2e98 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -203,13 +203,10 @@ dateLine = try $ do
   skipSpaces
   trimInlinesF . mconcat <$> manyTill inline newline
 
-titleBlock :: MarkdownParser (F (Pandoc -> Pandoc))
-titleBlock = pandocTitleBlock
-         <|> yamlTitleBlock
-         <|> mmdTitleBlock
-         <|> return (return id)
+titleBlock :: MarkdownParser ()
+titleBlock = pandocTitleBlock <|> mmdTitleBlock
 
-pandocTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc))
+pandocTitleBlock :: MarkdownParser ()
 pandocTitleBlock = try $ do
   guardEnabled Ext_pandoc_title_block
   lookAhead (char '%')
@@ -217,16 +214,18 @@ pandocTitleBlock = try $ do
   author <- option (return []) authorsLine
   date <- option mempty dateLine
   optional blanklines
-  return $ do
-    title' <- title
-    author' <- author
-    date' <- date
-    return $ if B.isNull title' then id else B.setMeta "title" title'
-           . if null author' then id else B.setMeta "author" author'
-           . if B.isNull date' then id else B.setMeta "date" date'
-
-yamlTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc))
-yamlTitleBlock = try $ do
+  let meta' = do title' <- title
+                 author' <- author
+                 date' <- date
+                 return $
+                   ( if B.isNull title' then id else B.setMeta "title" title'
+                   . if null author' then id else B.setMeta "author" author'
+                   . if B.isNull date' then id else B.setMeta "date" date' )
+                   nullMeta
+  updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
+
+yamlMetaBlock :: MarkdownParser (F Blocks)
+yamlMetaBlock = try $ do
   guardEnabled Ext_yaml_metadata_block
   pos <- getPosition
   string "---"
@@ -236,33 +235,39 @@ yamlTitleBlock = try $ do
   let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
   optional blanklines
   opts <- stateOptions <$> getState
-  case Yaml.decodeEither' $ UTF8.fromString rawYaml of
-       Right (Yaml.Object hashmap) -> return $ return $
-                H.foldrWithKey (\k v f ->
-                     if ignorable k
-                        then f
-                        else B.setMeta (T.unpack k) (yamlToMeta opts v) . f)
-                  id hashmap
-       Right Yaml.Null -> return $ return id
-       Right _ -> do
-                   addWarning (Just pos) "YAML header is not an object"
-                   return $ return id
-       Left err' -> do
-                case err' of
-                   InvalidYaml (Just YamlParseException{
-                               yamlProblem = problem
-                             , yamlContext = _ctxt
-                             , yamlProblemMark = Yaml.YamlMark {
-                                   yamlLine = yline
-                                 , yamlColumn = ycol
-                             }}) ->
-                        addWarning (Just $ setSourceLine
-                           (setSourceColumn pos (sourceColumn pos + ycol))
-                           (sourceLine pos + 1 + yline))
-                           $ "Could not parse YAML header: " ++ problem
-                   _ -> addWarning (Just pos)
-                           $ "Could not parse YAML header: " ++ show err'
-                return $ return id
+  meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of
+                Right (Yaml.Object hashmap) -> return $ return $
+                         H.foldrWithKey (\k v m ->
+                              if ignorable k
+                                 then m
+                                 else B.setMeta (T.unpack k)
+                                            (yamlToMeta opts v) m)
+                           nullMeta hashmap
+                Right Yaml.Null -> return $ return nullMeta
+                Right _ -> do
+                            addWarning (Just pos) "YAML header is not an object"
+                            return $ return nullMeta
+                Left err' -> do
+                         case err' of
+                            InvalidYaml (Just YamlParseException{
+                                        yamlProblem = problem
+                                      , yamlContext = _ctxt
+                                      , yamlProblemMark = Yaml.YamlMark {
+                                            yamlLine = yline
+                                          , yamlColumn = ycol
+                                      }}) ->
+                                 addWarning (Just $ setSourceLine
+                                    (setSourceColumn pos
+                                       (sourceColumn pos + ycol))
+                                    (sourceLine pos + 1 + yline))
+                                    $ "Could not parse YAML header: " ++
+                                        problem
+                            _ -> addWarning (Just pos)
+                                    $ "Could not parse YAML header: " ++
+                                        show err'
+                         return $ return nullMeta
+  updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
+  return mempty
 
 -- ignore fields ending with _
 ignorable :: Text -> Bool
@@ -295,13 +300,13 @@ yamlToMeta _ _ = MetaString ""
 stopLine :: MarkdownParser ()
 stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
 
-mmdTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc))
+mmdTitleBlock :: MarkdownParser ()
 mmdTitleBlock = try $ do
   guardEnabled Ext_mmd_title_block
   kvPairs <- many1 kvPair
   blanklines
-  return $ return $ \(Pandoc m bs) ->
-             Pandoc (foldl (\m' (k,v) -> addMetaField k v m') m kvPairs) bs
+  updateState $ \st -> st{ stateMeta' = stateMeta' st <>
+                             return (Meta $ M.fromList kvPairs) }
 
 kvPair :: MarkdownParser (String, MetaValue)
 kvPair = try $ do
@@ -318,15 +323,14 @@ parseMarkdown = do
   updateState $ \state -> state { stateOptions =
                 let oldOpts = stateOptions state in
                     oldOpts{ readerParseRaw = True } }
-  titleTrans <- option (return id) titleBlock
+  optional titleBlock
   blocks <- parseBlocks
   st <- getState
+  let meta = runF (stateMeta' st) st
+  let Pandoc _ bs = B.doc $ runF blocks st
   mbsty <- getOption readerCitationStyle
   refs <- getOption readerReferences
-  return $ processBiblio mbsty refs
-         $ runF titleTrans st
-         $ B.doc
-         $ runF blocks st
+  return $ processBiblio mbsty refs $ Pandoc meta bs
 
 addWarning :: Maybe SourcePos -> String -> MarkdownParser ()
 addWarning mbpos msg =
@@ -442,6 +446,7 @@ parseBlocks = mconcat <$> manyTill block eof
 block :: MarkdownParser (F Blocks)
 block = choice [ mempty <$ blanklines
                , codeBlockFenced
+               , yamlMetaBlock
                , guardEnabled Ext_latex_macros *> (macro >>= return . return)
                , header
                , lhsCodeBlock
-- 
cgit v1.2.3


From e8ddcfd997bd1733b715a4321f0e57c7860071d2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Mon, 19 Aug 2013 16:03:22 -0700
Subject: Scale LaTeX tables so they don't exceed columnwidth.

---
 src/Text/Pandoc/Writers/LaTeX.hs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 98553c421..ab579a326 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -470,9 +470,13 @@ tableRowToLaTeX header aligns widths cols = do
                   AlignRight   -> "\\raggedleft"
                   AlignCenter  -> "\\centering"
                   AlignDefault -> "\\raggedright"
+  -- scale factor compensates for extra space between columns
+  -- so the whole table isn't larger than columnwidth
+  let scaleFactor = 0.97 ** fromIntegral (length aligns)
   let toCell 0 _ c = c
       toCell w a c = "\\begin{minipage}" <> valign <>
-                     braces (text (printf "%.2f\\columnwidth" w)) <>
+                     braces (text (printf "%.2f\\columnwidth"
+                                    (w * scaleFactor))) <>
                      (halign a <> cr <> c <> cr) <> "\\end{minipage}"
   let cells = zipWith3 toCell widths aligns renderedCells
   return $ hsep (intersperse "&" cells) $$ "\\\\\\noalign{\\medskip}"
-- 
cgit v1.2.3


From 7048c130ec9d128dd1c9d1ddf8e7ce3c15eaf435 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 18 Aug 2013 23:01:23 -0700
Subject: Create Cite element even if no matching reference in the biblio.

* Add ??? as fallback text for non-resolved citations.
* Biblio:  Put references (including a header at the end of
  the document, if one exists) inside a Div with class "references".
  This gives some control over styling of references, and allows
  scripts to manipulate them.
* Markdown writer:  Print markdown citation codes, and disable
  printing of references, if `citations` extension is enabled.

NOTE:  It would be good to improve what citeproc-hs does for
a nonexistent key.
---
 src/Text/Pandoc/Biblio.hs           |  5 ++++-
 src/Text/Pandoc/Readers/Markdown.hs | 21 ++++++++++++---------
 src/Text/Pandoc/Writers/Markdown.hs | 35 +++++++++++++++++++++--------------
 3 files changed, 37 insertions(+), 24 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs
index 206b38530..1c0975f11 100644
--- a/src/Text/Pandoc/Biblio.hs
+++ b/src/Text/Pandoc/Biblio.hs
@@ -55,7 +55,10 @@ processBiblio (Just style) r p =
       cits_map   = M.fromList $ zip grps (citations result)
       biblioList = map (renderPandoc' style) (bibliography result)
       Pandoc m b = bottomUp mvPunct . deNote . topDown (processCite style cits_map) $ p'
-  in  Pandoc m $ b ++ biblioList
+      (bs, lastb) = case reverse b of
+                         x@(Header _ _ _) : xs -> (reverse xs, [x])
+                         _                     -> (b,  [])
+  in  Pandoc m $ bs ++ [Div ("",["references"],[]) (lastb ++ biblioList)]
 
 -- | Substitute 'Cite' elements with formatted citations.
 processCite :: Style -> M.Map [Citation] [FormattedOutput] -> Inline -> Inline
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index a653c2e98..05662d9b5 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -55,7 +55,6 @@ import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
 import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
                                   isTextTag, isCommentTag )
 import Text.Pandoc.Biblio (processBiblio)
-import qualified Text.CSL as CSL
 import Data.Monoid (mconcat, mempty)
 import Control.Applicative ((<$>), (<*), (*>), (<$))
 import Control.Monad
@@ -1797,11 +1796,13 @@ rawHtmlInline = do
 cite :: MarkdownParser (F Inlines)
 cite = do
   guardEnabled Ext_citations
-  getOption readerReferences >>= guard . not . null
-  citations <- textualCite <|> normalCite
-  return $ flip B.cite mempty <$> citations
+  citations <- textualCite <|> (fmap (flip B.cite unknownC) <$> normalCite)
+  return citations
+
+unknownC :: Inlines
+unknownC = B.str "???"
 
-textualCite :: MarkdownParser (F [Citation])
+textualCite :: MarkdownParser (F Inlines)
 textualCite = try $ do
   (_, key) <- citeKey
   let first = Citation{ citationId      = key
@@ -1813,8 +1814,12 @@ textualCite = try $ do
                       }
   mbrest <- option Nothing $ try $ spnl >> Just <$> normalCite
   case mbrest of
-       Just rest  -> return $ (first:) <$> rest
-       Nothing    -> option (return [first]) $ bareloc first
+       Just rest -> return $ (flip B.cite unknownC . (first:)) <$> rest
+       Nothing   -> (fmap (flip B.cite unknownC) <$> bareloc first) <|>
+                    return (do st <- askF
+                               return $ case M.lookup key (stateExamples st) of
+                                              Just n -> B.str (show n)
+                                              _      -> B.cite [first] unknownC)
 
 bareloc :: Citation -> MarkdownParser (F [Citation])
 bareloc c = try $ do
@@ -1846,8 +1851,6 @@ citeKey = try $ do
   let internal p = try $ p >>~ lookAhead (letter <|> digit)
   rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_+?<>~/")
   let key = first:rest
-  citations' <- map CSL.refId <$> getOption readerReferences
-  guard $ key `elem` citations'
   return (suppress_author, key)
 
 suffix :: MarkdownParser (F Inlines)
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 623c445df..d617954dd 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -186,7 +186,12 @@ pandocToMarkdown opts (Pandoc meta blocks) = do
   let toc = if writerTableOfContents opts
                then tableOfContents opts headerBlocks
                else empty
-  body <- blockListToMarkdown opts blocks
+  -- Strip off final 'references' header if markdown citations enabled
+  let blocks' = case reverse blocks of
+                     (Div (_,["references"],_) _):xs
+                       | isEnabled Ext_citations opts -> reverse xs
+                     _ -> blocks
+  body <- blockListToMarkdown opts blocks'
   st <- get
   notes' <- notesToMarkdown opts (reverse $ stNotes st)
   st' <- get  -- note that the notes may contain refs
@@ -304,10 +309,10 @@ blockToMarkdown _ Null = return empty
 blockToMarkdown opts (Div attrs ils) = do
   isPlain <- gets stPlain
   contents <- blockListToMarkdown opts ils
-  return $ if isPlain
+  return $ if isPlain || not (isEnabled Ext_markdown_in_html_blocks opts)
               then contents <> blankline
               else tagWithAttrs "div" attrs <> blankline <>
-                     contents <> blankline <> "</div>" <> blankline
+                      contents <> blankline <> "</div>" <> blankline
 blockToMarkdown opts (Plain inlines) = do
   contents <- inlineListToMarkdown opts inlines
   return $ contents <> cr
@@ -711,17 +716,20 @@ inlineToMarkdown opts (LineBreak)
   | isEnabled Ext_escaped_line_breaks opts = return $ "\\" <> cr
   | otherwise                              = return $ "  " <> cr
 inlineToMarkdown _ Space = return space
-inlineToMarkdown opts (Cite (c:cs) lst@[RawInline "latex" _])
+inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst
+inlineToMarkdown opts (Cite (c:cs) lst)
   | not (isEnabled Ext_citations opts) = inlineListToMarkdown opts lst
-  | citationMode c == AuthorInText = do
-    suffs <- inlineListToMarkdown opts $ citationSuffix c
-    rest <- mapM convertOne cs
-    let inbr = suffs <+> joincits rest
-        br   = if isEmpty inbr then empty else char '[' <> inbr <> char ']'
-    return $ text ("@" ++ citationId c) <+> br
-  | otherwise = do
-    cits <- mapM convertOne (c:cs)
-    return $ text "[" <> joincits cits <> text "]"
+  | otherwise =
+      if citationMode c == AuthorInText
+         then do
+           suffs <- inlineListToMarkdown opts $ citationSuffix c
+           rest <- mapM convertOne cs
+           let inbr = suffs <+> joincits rest
+               br   = if isEmpty inbr then empty else char '[' <> inbr <> char ']'
+           return $ text ("@" ++ citationId c) <+> br
+         else do
+           cits <- mapM convertOne (c:cs)
+           return $ text "[" <> joincits cits <> text "]"
   where
         joincits = hcat . intersperse (text "; ") . filter (not . isEmpty)
         convertOne Citation { citationId      = k
@@ -738,7 +746,6 @@ inlineToMarkdown opts (Cite (c:cs) lst@[RawInline "latex" _])
            return $ pdoc <+> r
         modekey SuppressAuthor = "-"
         modekey _              = ""
-inlineToMarkdown opts (Cite _ lst) = inlineListToMarkdown opts lst
 inlineToMarkdown opts (Link txt (src, tit)) = do
   linktext <- inlineListToMarkdown opts txt
   let linktitle = if null tit
-- 
cgit v1.2.3


From 0b5156cc7e6e77bb072e6f4e09f6468f6d8a8f60 Mon Sep 17 00:00:00 2001
From: Scott Morrison <scott@tqft.net>
Date: Wed, 21 Aug 2013 16:04:06 +1000
Subject: adding some cedilla characters to the LaTeX reader

---
 src/Text/Pandoc/Readers/LaTeX.hs | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 414e50fc8..37cec697b 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -673,6 +673,14 @@ cedilla 'c' = 'ç'
 cedilla 'C' = 'Ç'
 cedilla 's' = 'ş'
 cedilla 'S' = 'Ş'
+cedilla 't' = 'ţ'
+cedilla 'T' = 'Ţ'
+cedilla 'e' = 'ȩ'
+cedilla 'E' = 'Ȩ'
+cedilla 'h' = 'ḩ'
+cedilla 'H' = 'Ḩ'
+cedilla 'o' = 'o̧'
+cedilla 'O' = ''O̧
 cedilla c = c
 
 hacek :: Char -> Char
-- 
cgit v1.2.3


From 5b97b150cc63a5cab48a544e2b15881409702781 Mon Sep 17 00:00:00 2001
From: Scott Morrison <scott@tqft.net>
Date: Wed, 21 Aug 2013 16:10:42 +1000
Subject: cedilla-o breaks the compile, removing again

---
 src/Text/Pandoc/Readers/LaTeX.hs | 2 --
 1 file changed, 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 37cec697b..20ed88717 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -679,8 +679,6 @@ cedilla 'e' = 'ȩ'
 cedilla 'E' = 'Ȩ'
 cedilla 'h' = 'ḩ'
 cedilla 'H' = 'Ḩ'
-cedilla 'o' = 'o̧'
-cedilla 'O' = ''O̧
 cedilla c = c
 
 hacek :: Char -> Char
-- 
cgit v1.2.3


From 1d91e2cdb380a22b8d988291d726dd1612318b80 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Wed, 21 Aug 2013 20:07:36 -0700
Subject: LaTeX reader: Added o-cedilla.

---
 src/Text/Pandoc/Readers/LaTeX.hs | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index ded57df5a..028d83e24 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -681,6 +681,8 @@ cedilla 'e' = 'ȩ'
 cedilla 'E' = 'Ȩ'
 cedilla 'h' = 'ḩ'
 cedilla 'H' = 'Ḩ'
+cedilla 'o' = 'o̧'
+cedilla 'O' = 'O̧'
 cedilla c = c
 
 hacek :: Char -> Char
-- 
cgit v1.2.3


From 5f09cf7ff033ae11c5094fe39f8cd2ac11657229 Mon Sep 17 00:00:00 2001
From: Florian Eitel <florian.eitel@wooga.net>
Date: Thu, 22 Aug 2013 20:15:36 +0200
Subject: Write id for code block to label attr in latex when listing is used

The code:

    ~~~{#test}
    asdf
    ~~~

gets compiled to html:

    <pre id="test">
    asdf
    </pre>

So it is possible to link to the identifier `test`

But this doesn't happen on latex

When using the listings package (`--listings`) it is possible to set the
identifier using the `label=test` property:

    \begin{lstlisting}[label=id]
    hi
    \end{lstlisting}

And this is exactly what this patch is doing.

Modified LaTeX Reader/Writer and added tests for this.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 3 ++-
 src/Text/Pandoc/Writers/LaTeX.hs | 8 ++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index ded57df5a..b785a9852 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -47,6 +47,7 @@ import Text.Pandoc.Builder
 import Data.Char (isLetter)
 import Control.Applicative
 import Data.Monoid
+import Data.Maybe (fromMaybe)
 import System.Environment (getEnv)
 import System.FilePath (replaceExtension, (</>))
 import Data.List (intercalate, intersperse)
@@ -901,7 +902,7 @@ environments = M.fromList
                                       lookup "numbers" options == Just "left" ]
                                  ++ maybe [] (:[]) (lookup "language" options
                                          >>= fromListingsLanguage)
-                      let attr = ("",classes,kvs)
+                      let attr = (fromMaybe "" (lookup "label" options),classes,kvs)
                       codeBlockWith attr <$> (verbEnv "lstlisting"))
   , ("minted",     do options <- option [] keyvals
                       lang <- grouped (many1 $ satisfy (/='}'))
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index ab579a326..bf056001f 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -313,7 +313,7 @@ blockToLaTeX (BlockQuote lst) = do
        _ -> do
          contents <- blockListToLaTeX lst
          return $ "\\begin{quote}" $$ contents $$ "\\end{quote}"
-blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do
+blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do
   opts <- gets stOptions
   case () of
      _ | isEnabled Ext_literate_haskell opts && "haskell" `elem` classes &&
@@ -344,7 +344,11 @@ blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do
                              [ (if key == "startFrom"
                                    then "firstnumber"
                                    else key) ++ "=" ++ attr |
-                                   (key,attr) <- keyvalAttr ]
+                                   (key,attr) <- keyvalAttr ] ++
+                             (if identifier == ""
+                                   then []
+                                   else [ "label=" ++ identifier ])
+
                         else []
                printParams
                    | null params = empty
-- 
cgit v1.2.3


From 74250b6c351180cb350150b8069824111193b913 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 24 Aug 2013 16:10:13 -0700
Subject: Moved most of Text.Pandoc.Readers.TeXMath to texmath 0.6.4.

---
 src/Text/Pandoc/Readers/TeXMath.hs | 84 +-------------------------------------
 1 file changed, 2 insertions(+), 82 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
index fe49a992e..1f7088f72 100644
--- a/src/Text/Pandoc/Readers/TeXMath.hs
+++ b/src/Text/Pandoc/Readers/TeXMath.hs
@@ -30,93 +30,13 @@ Conversion of TeX math to a list of 'Pandoc' inline elements.
 module Text.Pandoc.Readers.TeXMath ( readTeXMath ) where
 
 import Text.Pandoc.Definition
-import Text.TeXMath.Types
-import Text.TeXMath.Parser
+import Text.TeXMath
 
 -- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
 -- Defaults to raw formula between @$@ characters if entire formula
 -- can't be converted.
 readTeXMath :: String    -- ^ String to parse (assumes @'\n'@ line endings)
             -> [Inline]
-readTeXMath inp = case texMathToPandoc inp of
+readTeXMath inp = case texMathToPandoc DisplayInline inp of
                         Left _    -> [Str ("$" ++ inp ++ "$")]
                         Right res -> res
-
-texMathToPandoc :: String -> Either String [Inline]
-texMathToPandoc inp = inp `seq`
-  case parseFormula inp of
-         Left err    -> Left err
-         Right exps  -> case expsToInlines exps of
-                             Nothing  -> Left "Formula too complex for [Inline]"
-                             Just r   -> Right r
-
-expsToInlines :: [Exp] -> Maybe [Inline]
-expsToInlines xs = do
-  res <- mapM expToInlines xs
-  return (concat res)
-
-expToInlines :: Exp -> Maybe [Inline]
-expToInlines (ENumber s) = Just [Str s]
-expToInlines (EIdentifier s) = Just [Emph [Str s]]
-expToInlines (EMathOperator s) = Just [Str s]
-expToInlines (ESymbol t s) = Just $ addSpace t (Str s)
-  where addSpace Op x = [x, thinspace]
-        addSpace Bin x = [medspace, x, medspace]
-        addSpace Rel x = [widespace, x, widespace]
-        addSpace Pun x = [x, thinspace]
-        addSpace _ x = [x]
-        thinspace = Str "\x2006"
-        medspace  = Str "\x2005"
-        widespace = Str "\x2004"
-expToInlines (EStretchy x) = expToInlines x
-expToInlines (EDelimited start end xs) = do
-  xs' <- mapM expToInlines xs
-  return $ [Str start] ++ concat xs' ++ [Str end]
-expToInlines (EGrouped xs) = expsToInlines xs
-expToInlines (ESpace "0.167em") = Just [Str "\x2009"]
-expToInlines (ESpace "0.222em") = Just [Str "\x2005"]
-expToInlines (ESpace "0.278em") = Just [Str "\x2004"]
-expToInlines (ESpace "0.333em") = Just [Str "\x2004"]
-expToInlines (ESpace "1em")     = Just [Str "\x2001"]
-expToInlines (ESpace "2em")     = Just [Str "\x2001\x2001"]
-expToInlines (ESpace _)         = Just [Str " "]
-expToInlines (EBinary _ _ _) = Nothing
-expToInlines (ESub x y) = do
-  x' <- expToInlines x
-  y' <- expToInlines y
-  return $ x' ++ [Subscript y']
-expToInlines (ESuper x y) = do
-  x' <- expToInlines x
-  y' <- expToInlines y
-  return $ x' ++ [Superscript y']
-expToInlines (ESubsup x y z) = do
-  x' <- expToInlines x
-  y' <- expToInlines y
-  z' <- expToInlines z
-  return $ x' ++ [Subscript y'] ++ [Superscript z']
-expToInlines (EDown x y) = expToInlines (ESub x y)
-expToInlines (EUp x y) = expToInlines (ESuper x y)
-expToInlines (EDownup x y z) = expToInlines (ESubsup x y z)
-expToInlines (EText TextNormal x) = Just [Str x]
-expToInlines (EText TextBold x) = Just [Strong [Str x]]
-expToInlines (EText TextMonospace x) = Just [Code nullAttr x]
-expToInlines (EText TextItalic x) = Just [Emph [Str x]]
-expToInlines (EText _ x) = Just [Str x]
-expToInlines (EOver (EGrouped [EIdentifier [c]]) (ESymbol Accent [accent])) =
-    case accent of
-         '\x203E' -> Just [Emph [Str [c,'\x0304']]]  -- bar
-         '\x00B4' -> Just [Emph [Str [c,'\x0301']]]  -- acute
-         '\x0060' -> Just [Emph [Str [c,'\x0300']]]  -- grave
-         '\x02D8' -> Just [Emph [Str [c,'\x0306']]]  -- breve
-         '\x02C7' -> Just [Emph [Str [c,'\x030C']]]  -- check
-         '.'      -> Just [Emph [Str [c,'\x0307']]]  -- dot
-         '\x00B0' -> Just [Emph [Str [c,'\x030A']]]  -- ring
-         '\x20D7' -> Just [Emph [Str [c,'\x20D7']]]  -- arrow right
-         '\x20D6' -> Just [Emph [Str [c,'\x20D6']]]  -- arrow left
-         '\x005E' -> Just [Emph [Str [c,'\x0302']]]  -- hat
-         '\x0302' -> Just [Emph [Str [c,'\x0302']]]  -- hat
-         '~'      -> Just [Emph [Str [c,'\x0303']]]  -- tilde
-         _        -> Nothing
-expToInlines _ = Nothing
-
-
-- 
cgit v1.2.3


From deb59b62354e38df9c85ce6985e5c28dd2301ee7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 24 Aug 2013 22:27:08 -0700
Subject: Removed dependency on citeproc-hs.

Going forward we'll use pandoc-citeproc, as an external filter.

The `--bibliography`, `--csl`, and `--citation-abbreviation` fields
have been removed.  Instead one must include `bibliography`, `csl`,
or `csl-abbrevs` fields in the document's YAML metadata.  The filter
can then be used as follows:

    pandoc --filter pandoc-citeproc

The `Text.Pandoc.Biblio` module has been removed.  Henceforth,
`Text.CSL.Pandoc` from pandoc-citations can be used by library users.

The Markdown and LaTeX readers now longer format bibliographies and
citations.  That must be done using `processCites` or `processCites'`
from Text.CSL.Pandoc.

All bibliography-related fields have been removed from `ReaderOptions`
and `WriterOptions`: `writerBiblioFiles`, `readerReferences`,
`readerCitationStyle`.

API change.
---
 src/Text/Pandoc/Biblio.hs           | 216 ------------------------------------
 src/Text/Pandoc/Options.hs          |   7 --
 src/Text/Pandoc/Readers/LaTeX.hs    |   5 +-
 src/Text/Pandoc/Readers/Markdown.hs |   5 +-
 src/Text/Pandoc/Writers/LaTeX.hs    |   8 +-
 5 files changed, 4 insertions(+), 237 deletions(-)
 delete mode 100644 src/Text/Pandoc/Biblio.hs

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs
deleted file mode 100644
index 1c0975f11..000000000
--- a/src/Text/Pandoc/Biblio.hs
+++ /dev/null
@@ -1,216 +0,0 @@
-{-# LANGUAGE PatternGuards #-}
-{-
-Copyright (C) 2008 Andrea Rossato <andrea.rossato@ing.unitn.it>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
--}
-
-{- |
-   Module      : Text.Pandoc.Biblio
-   Copyright   : Copyright (C) 2008-2010 Andrea Rossato
-   License     : GNU GPL, version 2 or above
-
-   Maintainer  : Andrea Rossato <andrea.rossato@unitn.it>
-   Stability   : alpha
-   Portability : portable
--}
-
-module Text.Pandoc.Biblio ( processBiblio ) where
-
-import Data.List
-import Data.Char ( isDigit, isPunctuation )
-import qualified Data.Map as M
-import Text.CSL hiding ( Cite(..), Citation(..), endWithPunct )
-import qualified Text.CSL as CSL ( Cite(..) )
-import Text.Pandoc.Definition
-import Text.Pandoc.Generic
-import Text.Pandoc.Walk
-import Text.Pandoc.Shared (stringify)
-import Text.Parsec hiding (State)
-import Control.Monad
-import Control.Monad.State
-
--- | Process a 'Pandoc' document by adding citations formatted
--- according to a CSL style, using 'citeproc' from citeproc-hs.
-processBiblio :: Maybe Style -> [Reference] -> Pandoc -> Pandoc
-processBiblio Nothing _ p = p
-processBiblio _      [] p = p
-processBiblio (Just style) r p =
-  let p'         = evalState (bottomUpM setHash p) 1
-      grps       = query getCitation p'
-      result     = citeproc procOpts style r (setNearNote style $
-                      map (map toCslCite) grps)
-      cits_map   = M.fromList $ zip grps (citations result)
-      biblioList = map (renderPandoc' style) (bibliography result)
-      Pandoc m b = bottomUp mvPunct . deNote . topDown (processCite style cits_map) $ p'
-      (bs, lastb) = case reverse b of
-                         x@(Header _ _ _) : xs -> (reverse xs, [x])
-                         _                     -> (b,  [])
-  in  Pandoc m $ bs ++ [Div ("",["references"],[]) (lastb ++ biblioList)]
-
--- | Substitute 'Cite' elements with formatted citations.
-processCite :: Style -> M.Map [Citation] [FormattedOutput] -> Inline -> Inline
-processCite s cs (Cite t _) =
-   case M.lookup t cs of
-        Just (x:xs)
-          | isTextualCitation t && not (null xs) ->
-             let xs' = renderPandoc s xs
-             in  if styleClass s == "note"
-                    then Cite t (renderPandoc s [x] ++ [Note [Para xs']])
-                    else Cite t (renderPandoc s [x] ++ [Space | not (startWithPunct xs')] ++ xs')
-          | otherwise -> if styleClass s == "note"
-                            then Cite t [Note [Para $ renderPandoc s (x:xs)]]
-                            else Cite t (renderPandoc s (x:xs))
-        _             -> Strong [Str "???"]  -- TODO raise error instead?
-processCite _ _ x = x
-
-isNote :: Inline -> Bool
-isNote (Note _) = True
-isNote (Cite _ [Note _]) = True
-isNote _ = False
-
-mvPunct :: [Inline] -> [Inline]
-mvPunct (Space : Space : xs) = Space : xs
-mvPunct (Space : x : ys) | isNote x, startWithPunct ys =
-   Str (headInline ys) : x : tailFirstInlineStr ys
-mvPunct (Space : x : ys) | isNote x = x : ys
-mvPunct xs = xs
-
--- A replacement for citeproc-hs's endWithPunct, which wrongly treats
--- a sentence ending in '.)' as not ending with punctuation, leading
--- to an extra period.
-endWithPunct :: [Inline] -> Bool
-endWithPunct [] = True
-endWithPunct xs@(_:_) = case reverse (stringify [last xs]) of
-                              []                       -> True
-                              (')':c:_) | isEndPunct c -> True
-                              (c:_) | isEndPunct c     -> True
-                                    | otherwise        -> False
-  where isEndPunct c = c `elem` ".,;:!?"
-
-deNote :: Pandoc -> Pandoc
-deNote = topDown go
-  where go (Cite (c:cs) [Note xs]) =
-            Cite (c:cs) [Note $ bottomUp go' $ sanitize c xs]
-        go (Note xs) = Note $ bottomUp go' xs
-        go x = x
-        go' (Note [Para xs]:ys) =
-             if startWithPunct ys && endWithPunct xs
-                then initInline xs ++ ys
-                else xs ++ ys
-        go' xs = xs
-        sanitize :: Citation -> [Block] -> [Block]
-        sanitize Citation{citationPrefix = pref} [Para xs] =
-           case (null pref, endWithPunct xs) of
-                (True, False)  -> [Para $ xs ++ [Str "."]]
-                (True, True)   -> [Para xs]
-                (False, False) -> [Para $ toCapital $ xs ++ [Str "."]]
-                (False, True)  -> [Para $ toCapital xs]
-        sanitize _ bs = bs
-
-isTextualCitation :: [Citation] -> Bool
-isTextualCitation (c:_) = citationMode c == AuthorInText
-isTextualCitation _     = False
-
--- | Retrieve all citations from a 'Pandoc' docuument. To be used with
--- 'query'.
-getCitation :: Inline -> [[Citation]]
-getCitation i | Cite t _ <- i = [t]
-              | otherwise     = []
-
-setHash :: Citation -> State Int Citation
-setHash c = do
-  ident <- get
-  put $ ident + 1
-  return c{ citationHash = ident }
-
-toCslCite :: Citation -> CSL.Cite
-toCslCite c
-    = let (l, s)  = locatorWords $ citationSuffix c
-          (la,lo) = parseLocator l
-          s'      = case (l,s) of
-                         -- treat a bare locator as if it begins with space
-                         -- so @item1 [blah] is like [@item1, blah]
-                         ("",(x:_))
-                           | not (isPunct x) -> [Space] ++ s
-                         _                   -> s
-          isPunct (Str (x:_)) = isPunctuation x
-          isPunct _           = False
-          citMode = case citationMode c of
-                      AuthorInText   -> (True, False)
-                      SuppressAuthor -> (False,True )
-                      NormalCitation -> (False,False)
-      in   emptyCite { CSL.citeId         = citationId c
-                     , CSL.citePrefix     = PandocText $ citationPrefix c
-                     , CSL.citeSuffix     = PandocText s'
-                     , CSL.citeLabel      = la
-                     , CSL.citeLocator    = lo
-                     , CSL.citeNoteNumber = show $ citationNoteNum c
-                     , CSL.authorInText   = fst citMode
-                     , CSL.suppressAuthor = snd citMode
-                     , CSL.citeHash       = citationHash c
-                     }
-
-locatorWords :: [Inline] -> (String, [Inline])
-locatorWords inp =
-  case parse pLocatorWords "suffix" $ breakup inp of
-       Right r   -> r
-       Left _    -> ("",inp)
-   where breakup [] = []
-         breakup (Str x : xs) = map Str (splitup x) ++ breakup xs
-         breakup (x : xs) = x : breakup xs
-         splitup = groupBy (\x y -> x /= '\160' && y /= '\160')
-
-pLocatorWords :: Parsec [Inline] st (String, [Inline])
-pLocatorWords = do
-  l <- pLocator
-  s <- getInput -- rest is suffix
-  if length l > 0 && last l == ','
-     then return (init l, Str "," : s)
-     else return (l, s)
-
-pMatch :: (Inline -> Bool) -> Parsec [Inline] st Inline
-pMatch condition = try $ do
-  t <- anyToken
-  guard $ condition t
-  return t
-
-pSpace :: Parsec [Inline] st Inline
-pSpace = pMatch (\t -> t == Space || t == Str "\160")
-
-pLocator :: Parsec [Inline] st String
-pLocator = try $ do
-  optional $ pMatch (== Str ",")
-  optional pSpace
-  f  <- (guardFollowingDigit >> return [Str "p"]) -- "page" the default
-     <|> many1 (notFollowedBy pSpace >> anyToken)
-  gs <- many1 pWordWithDigits
-  return $ stringify f ++ (' ' : unwords gs)
-
-guardFollowingDigit :: Parsec [Inline] st ()
-guardFollowingDigit = do
-  t <- lookAhead anyToken
-  case t of
-       Str (d:_) | isDigit d -> return ()
-       _                     -> mzero
-
-pWordWithDigits :: Parsec [Inline] st String
-pWordWithDigits = try $ do
-  optional pSpace
-  r <- many1 (notFollowedBy pSpace >> anyToken)
-  let s = stringify r
-  guard $ any isDigit s
-  return s
-
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index c7c37d6b8..48e418ab2 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -48,7 +48,6 @@ import Data.Set (Set)
 import qualified Data.Set as Set
 import Data.Default
 import Text.Pandoc.Highlighting (Style, pygments)
-import qualified Text.CSL as CSL
 
 -- | Individually selectable syntax extensions.
 data Extension =
@@ -205,8 +204,6 @@ data ReaderOptions = ReaderOptions{
        , readerOldDashes       :: Bool -- ^ Use pandoc <= 1.8.2.1 behavior
                                        --   in parsing dashes; -- is em-dash;
                                        --   - before numerial is en-dash
-       , readerReferences      :: [CSL.Reference]  -- ^ Bibliographic references
-       , readerCitationStyle   :: Maybe CSL.Style -- ^ Citation style
        , readerApplyMacros     :: Bool -- ^ Apply macros to TeX math
        , readerIndentedCodeClasses :: [String] -- ^ Default classes for
                                        -- indented code blocks
@@ -223,8 +220,6 @@ instance Default ReaderOptions
                , readerColumns               = 80
                , readerTabStop               = 4
                , readerOldDashes             = False
-               , readerReferences            = []
-               , readerCitationStyle         = Nothing
                , readerApplyMacros           = True
                , readerIndentedCodeClasses   = []
                , readerDefaultImageExtension = ""
@@ -289,7 +284,6 @@ data WriterOptions = WriterOptions
   , writerSourceURL        :: Maybe String  -- ^ Absolute URL + directory of 1st source file
   , writerUserDataDir      :: Maybe FilePath -- ^ Path of user data directory
   , writerCiteMethod       :: CiteMethod -- ^ How to print cites
-  , writerBiblioFiles      :: [FilePath] -- ^ Biblio files to use for citations
   , writerHtml5            :: Bool       -- ^ Produce HTML5
   , writerHtmlQTags        :: Bool       -- ^ Use @<q>@ tags for quotes in HTML
   , writerBeamer           :: Bool       -- ^ Produce beamer LaTeX slide show
@@ -332,7 +326,6 @@ instance Default WriterOptions where
                       , writerSourceURL        = Nothing
                       , writerUserDataDir      = Nothing
                       , writerCiteMethod       = Citeproc
-                      , writerBiblioFiles      = []
                       , writerHtml5            = False
                       , writerHtmlQTags        = False
                       , writerBeamer           = False
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index ded57df5a..e558ed1b9 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -38,7 +38,6 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Walk
 import Text.Pandoc.Shared
 import Text.Pandoc.Options
-import Text.Pandoc.Biblio (processBiblio)
 import Text.Pandoc.Parsing hiding ((<|>), many, optional, space)
 import qualified Text.Pandoc.UTF8 as UTF8
 import Data.Char ( chr, ord )
@@ -67,9 +66,7 @@ parseLaTeX = do
   eof
   st <- getState
   let meta = stateMeta st
-  refs <- getOption readerReferences
-  mbsty <- getOption readerCitationStyle
-  let (Pandoc _ bs') = processBiblio mbsty refs $ doc bs
+  let (Pandoc _ bs') = doc bs
   return $ Pandoc meta bs'
 
 type LP = Parser [Char] ParserState
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 05662d9b5..658335202 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -54,7 +54,6 @@ import Text.Pandoc.Parsing hiding (tableWith)
 import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
 import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
                                   isTextTag, isCommentTag )
-import Text.Pandoc.Biblio (processBiblio)
 import Data.Monoid (mconcat, mempty)
 import Control.Applicative ((<$>), (<*), (*>), (<$))
 import Control.Monad
@@ -327,9 +326,7 @@ parseMarkdown = do
   st <- getState
   let meta = runF (stateMeta' st) st
   let Pandoc _ bs = B.doc $ runF blocks st
-  mbsty <- getOption readerCitationStyle
-  refs <- getOption readerReferences
-  return $ processBiblio mbsty refs $ Pandoc meta bs
+  return $ Pandoc meta bs
 
 addWarning :: Maybe SourcePos -> String -> MarkdownParser ()
 addWarning mbpos msg =
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index ab579a326..6a781ddec 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -43,7 +43,6 @@ import Data.Char ( toLower, isPunctuation )
 import Control.Applicative ((<|>))
 import Control.Monad.State
 import Text.Pandoc.Pretty
-import System.FilePath (dropExtension)
 import Text.Pandoc.Slides
 import Text.Pandoc.Highlighting (highlight, styleToLaTeX,
                                  formatLaTeXInline, formatLaTeXBlock,
@@ -120,7 +119,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do
   (biblioTitle :: String) <- liftM (render colwidth) $ inlineListToLaTeX lastHeader
   let main = render colwidth $ vsep body
   st <- get
-  let biblioFiles = intercalate "," $ map dropExtension $  writerBiblioFiles options
   let context  =  defField "toc" (writerTableOfContents options) $
                   defField "toc-depth" (show (writerTOCDepth options -
                                               if writerChapters options
@@ -152,11 +150,9 @@ pandocToLaTeX options (Pandoc meta blocks) = do
                                 $ writerHighlightStyle options )
                       else id) $
                   (case writerCiteMethod options of
-                         Natbib   -> defField "biblio-files" biblioFiles .
-                                     defField "biblio-title" biblioTitle .
+                         Natbib   -> defField "biblio-title" biblioTitle .
                                      defField "natbib" True
-                         Biblatex -> defField "biblio-files" biblioFiles .
-                                     defField "biblio-title" biblioTitle .
+                         Biblatex -> defField "biblio-title" biblioTitle .
                                      defField "biblatex" True
                          _        -> id) $
                   metadata
-- 
cgit v1.2.3


From 80148095781b44c7f5af132b48605adaa93a0558 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Tue, 27 Aug 2013 20:12:21 -0700
Subject: LaTeX reader: Allow accents with combining characters.

accent now returns [Char], not Char.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 370 +++++++++++++++++++--------------------
 1 file changed, 185 insertions(+), 185 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 5d73134cd..b9ca986fb 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -543,196 +543,196 @@ doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '
 lit :: String -> LP Inlines
 lit = pure . str
 
-accent :: (Char -> Char) -> Inlines -> LP Inlines
+accent :: (Char -> String) -> Inlines -> LP Inlines
 accent f ils =
   case toList ils of
-       (Str (x:xs) : ys) -> return $ fromList $ (Str (f x : xs) : ys)
+       (Str (x:xs) : ys) -> return $ fromList $ (Str (f x ++ xs) : ys)
        []                -> mzero
        _                 -> return ils
 
-grave :: Char -> Char
-grave 'A' = 'À'
-grave 'E' = 'È'
-grave 'I' = 'Ì'
-grave 'O' = 'Ò'
-grave 'U' = 'Ù'
-grave 'a' = 'à'
-grave 'e' = 'è'
-grave 'i' = 'ì'
-grave 'o' = 'ò'
-grave 'u' = 'ù'
-grave c   = c
-
-acute :: Char -> Char
-acute 'A' = 'Á'
-acute 'E' = 'É'
-acute 'I' = 'Í'
-acute 'O' = 'Ó'
-acute 'U' = 'Ú'
-acute 'Y' = 'Ý'
-acute 'a' = 'á'
-acute 'e' = 'é'
-acute 'i' = 'í'
-acute 'o' = 'ó'
-acute 'u' = 'ú'
-acute 'y' = 'ý'
-acute 'C' = 'Ć'
-acute 'c' = 'ć'
-acute 'L' = 'Ĺ'
-acute 'l' = 'ĺ'
-acute 'N' = 'Ń'
-acute 'n' = 'ń'
-acute 'R' = 'Ŕ'
-acute 'r' = 'ŕ'
-acute 'S' = 'Ś'
-acute 's' = 'ś'
-acute 'Z' = 'Ź'
-acute 'z' = 'ź'
-acute c = c
-
-circ :: Char -> Char
-circ 'A' = 'Â'
-circ 'E' = 'Ê'
-circ 'I' = 'Î'
-circ 'O' = 'Ô'
-circ 'U' = 'Û'
-circ 'a' = 'â'
-circ 'e' = 'ê'
-circ 'i' = 'î'
-circ 'o' = 'ô'
-circ 'u' = 'û'
-circ 'C' = 'Ĉ'
-circ 'c' = 'ĉ'
-circ 'G' = 'Ĝ'
-circ 'g' = 'ĝ'
-circ 'H' = 'Ĥ'
-circ 'h' = 'ĥ'
-circ 'J' = 'Ĵ'
-circ 'j' = 'ĵ'
-circ 'S' = 'Ŝ'
-circ 's' = 'ŝ'
-circ 'W' = 'Ŵ'
-circ 'w' = 'ŵ'
-circ 'Y' = 'Ŷ'
-circ 'y' = 'ŷ'
-circ c = c
-
-tilde :: Char -> Char
-tilde 'A' = 'Ã'
-tilde 'a' = 'ã'
-tilde 'O' = 'Õ'
-tilde 'o' = 'õ'
-tilde 'I' = 'Ĩ'
-tilde 'i' = 'ĩ'
-tilde 'U' = 'Ũ'
-tilde 'u' = 'ũ'
-tilde 'N' = 'Ñ'
-tilde 'n' = 'ñ'
-tilde c   = c
-
-umlaut :: Char -> Char
-umlaut 'A' = 'Ä'
-umlaut 'E' = 'Ë'
-umlaut 'I' = 'Ï'
-umlaut 'O' = 'Ö'
-umlaut 'U' = 'Ü'
-umlaut 'a' = 'ä'
-umlaut 'e' = 'ë'
-umlaut 'i' = 'ï'
-umlaut 'o' = 'ö'
-umlaut 'u' = 'ü'
-umlaut c = c
-
-dot :: Char -> Char
-dot 'C' = 'Ċ'
-dot 'c' = 'ċ'
-dot 'E' = 'Ė'
-dot 'e' = 'ė'
-dot 'G' = 'Ġ'
-dot 'g' = 'ġ'
-dot 'I' = 'İ'
-dot 'Z' = 'Ż'
-dot 'z' = 'ż'
-dot c = c
-
-macron :: Char -> Char
-macron 'A' = 'Ā'
-macron 'E' = 'Ē'
-macron 'I' = 'Ī'
-macron 'O' = 'Ō'
-macron 'U' = 'Ū'
-macron 'a' = 'ā'
-macron 'e' = 'ē'
-macron 'i' = 'ī'
-macron 'o' = 'ō'
-macron 'u' = 'ū'
-macron c = c
-
-cedilla :: Char -> Char
-cedilla 'c' = 'ç'
-cedilla 'C' = 'Ç'
-cedilla 's' = 'ş'
-cedilla 'S' = 'Ş'
-cedilla 't' = 'ţ'
-cedilla 'T' = 'Ţ'
-cedilla 'e' = 'ȩ'
-cedilla 'E' = 'Ȩ'
-cedilla 'h' = 'ḩ'
-cedilla 'H' = 'Ḩ'
-cedilla 'o' = 'o̧'
-cedilla 'O' = 'O̧'
-cedilla c = c
-
-hacek :: Char -> Char
-hacek 'A' = 'Ǎ'
-hacek 'a' = 'ǎ'
-hacek 'C' = 'Č'
-hacek 'c' = 'č'
-hacek 'D' = 'Ď'
-hacek 'd' = 'ď'
-hacek 'E' = 'Ě'
-hacek 'e' = 'ě'
-hacek 'G' = 'Ǧ'
-hacek 'g' = 'ǧ'
-hacek 'H' = 'Ȟ'
-hacek 'h' = 'ȟ'
-hacek 'I' = 'Ǐ'
-hacek 'i' = 'ǐ'
-hacek 'j' = 'ǰ'
-hacek 'K' = 'Ǩ'
-hacek 'k' = 'ǩ'
-hacek 'L' = 'Ľ'
-hacek 'l' = 'ľ'
-hacek 'N' = 'Ň'
-hacek 'n' = 'ň'
-hacek 'O' = 'Ǒ'
-hacek 'o' = 'ǒ'
-hacek 'R' = 'Ř'
-hacek 'r' = 'ř'
-hacek 'S' = 'Š'
-hacek 's' = 'š'
-hacek 'T' = 'Ť'
-hacek 't' = 'ť'
-hacek 'U' = 'Ǔ'
-hacek 'u' = 'ǔ'
-hacek 'Z' = 'Ž'
-hacek 'z' = 'ž'
-hacek c   = c
-
-breve :: Char -> Char
-breve 'A' = 'Ă'
-breve 'a' = 'ă'
-breve 'E' = 'Ĕ'
-breve 'e' = 'ĕ'
-breve 'G' = 'Ğ'
-breve 'g' = 'ğ'
-breve 'I' = 'Ĭ'
-breve 'i' = 'ĭ'
-breve 'O' = 'Ŏ'
-breve 'o' = 'ŏ'
-breve 'U' = 'Ŭ'
-breve 'u' = 'ŭ'
-breve c   = c
+grave :: Char -> String
+grave 'A' = "À"
+grave 'E' = "È"
+grave 'I' = "Ì"
+grave 'O' = "Ò"
+grave 'U' = "Ù"
+grave 'a' = "à"
+grave 'e' = "è"
+grave 'i' = "ì"
+grave 'o' = "ò"
+grave 'u' = "ù"
+grave c   = [c]
+
+acute :: Char -> String
+acute 'A' = "Á"
+acute 'E' = "É"
+acute 'I' = "Í"
+acute 'O' = "Ó"
+acute 'U' = "Ú"
+acute 'Y' = "Ý"
+acute 'a' = "á"
+acute 'e' = "é"
+acute 'i' = "í"
+acute 'o' = "ó"
+acute 'u' = "ú"
+acute 'y' = "ý"
+acute 'C' = "Ć"
+acute 'c' = "ć"
+acute 'L' = "Ĺ"
+acute 'l' = "ĺ"
+acute 'N' = "Ń"
+acute 'n' = "ń"
+acute 'R' = "Ŕ"
+acute 'r' = "ŕ"
+acute 'S' = "Ś"
+acute 's' = "ś"
+acute 'Z' = "Ź"
+acute 'z' = "ź"
+acute c   = [c]
+
+circ :: Char -> String
+circ 'A' = "Â"
+circ 'E' = "Ê"
+circ 'I' = "Î"
+circ 'O' = "Ô"
+circ 'U' = "Û"
+circ 'a' = "â"
+circ 'e' = "ê"
+circ 'i' = "î"
+circ 'o' = "ô"
+circ 'u' = "û"
+circ 'C' = "Ĉ"
+circ 'c' = "ĉ"
+circ 'G' = "Ĝ"
+circ 'g' = "ĝ"
+circ 'H' = "Ĥ"
+circ 'h' = "ĥ"
+circ 'J' = "Ĵ"
+circ 'j' = "ĵ"
+circ 'S' = "Ŝ"
+circ 's' = "ŝ"
+circ 'W' = "Ŵ"
+circ 'w' = "ŵ"
+circ 'Y' = "Ŷ"
+circ 'y' = "ŷ"
+circ c   = [c]
+
+tilde :: Char -> String
+tilde 'A' = "Ã"
+tilde 'a' = "ã"
+tilde 'O' = "Õ"
+tilde 'o' = "õ"
+tilde 'I' = "Ĩ"
+tilde 'i' = "ĩ"
+tilde 'U' = "Ũ"
+tilde 'u' = "ũ"
+tilde 'N' = "Ñ"
+tilde 'n' = "ñ"
+tilde c   = [c]
+
+umlaut :: Char -> String
+umlaut 'A' = "Ä"
+umlaut 'E' = "Ë"
+umlaut 'I' = "Ï"
+umlaut 'O' = "Ö"
+umlaut 'U' = "Ü"
+umlaut 'a' = "ä"
+umlaut 'e' = "ë"
+umlaut 'i' = "ï"
+umlaut 'o' = "ö"
+umlaut 'u' = "ü"
+umlaut c   = [c]
+
+dot :: Char -> String
+dot 'C' = "Ċ"
+dot 'c' = "ċ"
+dot 'E' = "Ė"
+dot 'e' = "ė"
+dot 'G' = "Ġ"
+dot 'g' = "ġ"
+dot 'I' = "İ"
+dot 'Z' = "Ż"
+dot 'z' = "ż"
+dot c   = [c]
+
+macron :: Char -> String
+macron 'A' = "Ā"
+macron 'E' = "Ē"
+macron 'I' = "Ī"
+macron 'O' = "Ō"
+macron 'U' = "Ū"
+macron 'a' = "ā"
+macron 'e' = "ē"
+macron 'i' = "ī"
+macron 'o' = "ō"
+macron 'u' = "ū"
+macron c   = [c]
+
+cedilla :: Char -> String
+cedilla 'c' = "ç"
+cedilla 'C' = "Ç"
+cedilla 's' = "ş"
+cedilla 'S' = "Ş"
+cedilla 't' = "ţ"
+cedilla 'T' = "Ţ"
+cedilla 'e' = "ȩ"
+cedilla 'E' = "Ȩ"
+cedilla 'h' = "ḩ"
+cedilla 'H' = "Ḩ"
+cedilla 'o' = "o̧"
+cedilla 'O' = "O̧"
+cedilla c   = [c]
+
+hacek :: Char -> String
+hacek 'A' = "Ǎ"
+hacek 'a' = "ǎ"
+hacek 'C' = "Č"
+hacek 'c' = "č"
+hacek 'D' = "Ď"
+hacek 'd' = "ď"
+hacek 'E' = "Ě"
+hacek 'e' = "ě"
+hacek 'G' = "Ǧ"
+hacek 'g' = "ǧ"
+hacek 'H' = "Ȟ"
+hacek 'h' = "ȟ"
+hacek 'I' = "Ǐ"
+hacek 'i' = "ǐ"
+hacek 'j' = "ǰ"
+hacek 'K' = "Ǩ"
+hacek 'k' = "ǩ"
+hacek 'L' = "Ľ"
+hacek 'l' = "ľ"
+hacek 'N' = "Ň"
+hacek 'n' = "ň"
+hacek 'O' = "Ǒ"
+hacek 'o' = "ǒ"
+hacek 'R' = "Ř"
+hacek 'r' = "ř"
+hacek 'S' = "Š"
+hacek 's' = "š"
+hacek 'T' = "Ť"
+hacek 't' = "ť"
+hacek 'U' = "Ǔ"
+hacek 'u' = "ǔ"
+hacek 'Z' = "Ž"
+hacek 'z' = "ž"
+hacek c   = [c]
+
+breve :: Char -> String
+breve 'A' = "Ă"
+breve 'a' = "ă"
+breve 'E' = "Ĕ"
+breve 'e' = "ĕ"
+breve 'G' = "Ğ"
+breve 'g' = "ğ"
+breve 'I' = "Ĭ"
+breve 'i' = "ĭ"
+breve 'O' = "Ŏ"
+breve 'o' = "ŏ"
+breve 'U' = "Ŭ"
+breve 'u' = "ŭ"
+breve c   = [c]
 
 tok :: LP Inlines
 tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar)
-- 
cgit v1.2.3


From dd5cb82348dfb2b8febb01db8bdc98ddeac394dc Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Wed, 28 Aug 2013 08:43:51 -0700
Subject: Generalized type of stringify.

---
 src/Text/Pandoc/Shared.hs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index eef150351..9a9a092fc 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -1,4 +1,5 @@
-{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses #-}
+{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
+    FlexibleContexts #-}
 {-
 Copyright (C) 2006-2013 John MacFarlane <jgm@berkeley.edu>
 
@@ -383,10 +384,10 @@ consolidateInlines (Code a1 x : Code a2 y : zs) | a1 == a2 =
 consolidateInlines (x : xs) = x : consolidateInlines xs
 consolidateInlines [] = []
 
--- | Convert list of inlines to a string with formatting removed.
+-- | Convert pandoc structure to a string with formatting removed.
 -- Footnotes are skipped (since we don't want their contents in link
 -- labels).
-stringify :: [Inline] -> String
+stringify :: Walkable Inline a => a -> String
 stringify = query go . walk deNote
   where go :: Inline -> [Char]
         go Space = " "
-- 
cgit v1.2.3


From 940515a00ba49b9feb3d736dc071059400f83015 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 28 Aug 2013 16:54:37 -0700
Subject: LaTeX reader: allow spaces in alignment spec in tables.

E.g. `{ l r c }`.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index b9ca986fb..e91ea1e82 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1116,12 +1116,13 @@ complexNatbibCitation mode = try $ do
 parseAligns :: LP [Alignment]
 parseAligns = try $ do
   char '{'
-  optional $ char '|'
+  let maybeBar = try $ spaces >> optional (char '|')
+  maybeBar
   let cAlign = AlignCenter <$ char 'c'
   let lAlign = AlignLeft <$ char 'l'
   let rAlign = AlignRight <$ char 'r'
   let alignChar = optional sp *> (cAlign <|> lAlign <|> rAlign)
-  aligns' <- sepEndBy alignChar (optional $ char '|')
+  aligns' <- sepEndBy alignChar maybeBar
   spaces
   char '}'
   spaces
-- 
cgit v1.2.3


From 6ed41fdfcc3b57e88cf98b875a75ab5e1629dca6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 1 Sep 2013 08:54:10 -0700
Subject: Factored out registerHeader from markdown reader, added to Parsing.

Text.Pandoc.Parsing now exports registerHeader, which can be
used in other readers.
---
 src/Text/Pandoc/Parsing.hs          | 32 ++++++++++++++++++++++++++++++++
 src/Text/Pandoc/Readers/Markdown.hs | 30 ++----------------------------
 2 files changed, 34 insertions(+), 28 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index c16d5bb1d..701b2ef84 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -75,6 +75,7 @@ module Text.Pandoc.Parsing ( (>>~),
                              SubstTable,
                              Key (..),
                              toKey,
+                             registerHeader,
                              smartPunctuation,
                              withQuoteContext,
                              singleQuoteStart,
@@ -151,6 +152,7 @@ where
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..))
+import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.XML (fromEntities)
 import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
 import Text.Parsec
@@ -162,11 +164,13 @@ import Text.Pandoc.Shared
 import qualified Data.Map as M
 import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions)
 import Text.Pandoc.Compat.TagSoupEntity ( lookupEntity )
+import Text.Pandoc.Asciify (toAsciiChar)
 import Data.Default
 import qualified Data.Set as Set
 import Control.Monad.Reader
 import Control.Applicative ((*>), (<*), (<$), liftA2)
 import Data.Monoid
+import Data.Maybe (catMaybes)
 
 type Parser t s = Parsec t s
 
@@ -886,6 +890,34 @@ type KeyTable = M.Map Key Target
 
 type SubstTable = M.Map Key Inlines
 
+--  | Add header to the list of headers in state, together
+--  with its associated identifier.  If the identifier is null
+--  and the auto_identifers extension is set, generate a new
+--  unique identifier, and update the list of identifiers
+--  in state.
+registerHeader :: Attr -> Inlines -> Parser s ParserState Attr
+registerHeader (ident,classes,kvs) header' = do
+  ids <- stateIdentifiers `fmap` getState
+  exts <- getOption readerExtensions
+  let insert' = M.insertWith (\_new old -> old)
+  if null ident && Ext_auto_identifiers `Set.member` exts
+     then do
+       let id' = uniqueIdent (B.toList header') ids
+       let id'' = if Ext_ascii_identifiers `Set.member` exts
+                     then catMaybes $ map toAsciiChar id'
+                     else id'
+       updateState $ \st -> st{
+              stateIdentifiers = if id' == id''
+                                    then id' : ids
+                                    else id' : id'' : ids,
+              stateHeaders = insert' header' id' $ stateHeaders st }
+       return (id'',classes,kvs)
+     else do
+        unless (null ident) $
+          updateState $ \st -> st{
+               stateHeaders = insert' header' ident $ stateHeaders st }
+        return (ident,classes,kvs)
+
 -- | Fail unless we're in "smart typography" mode.
 failUnlessSmart :: Parser [tok] ParserState ()
 failUnlessSmart = getOption readerSmart >>= guard
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 658335202..267b30032 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -49,7 +49,6 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
 import Text.Pandoc.Options
 import Text.Pandoc.Shared
 import Text.Pandoc.XML (fromEntities)
-import Text.Pandoc.Asciify (toAsciiChar)
 import Text.Pandoc.Parsing hiding (tableWith)
 import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
 import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
@@ -471,31 +470,6 @@ block = choice [ mempty <$ blanklines
 header :: MarkdownParser (F Blocks)
 header = setextHeader <|> atxHeader <?> "header"
 
---  returns unique identifier
-addToHeaderList :: Attr -> F Inlines -> MarkdownParser Attr
-addToHeaderList (ident,classes,kvs) text = do
-  let header' = runF text defaultParserState
-  exts <- getOption readerExtensions
-  let insert' = M.insertWith (\_new old -> old)
-  if null ident && Ext_auto_identifiers `Set.member` exts
-     then do
-       ids <- stateIdentifiers `fmap` getState
-       let id' = uniqueIdent (B.toList header') ids
-       let id'' = if Ext_ascii_identifiers `Set.member` exts
-                     then catMaybes $ map toAsciiChar id'
-                     else id'
-       updateState $ \st -> st{
-              stateIdentifiers = if id' == id''
-                                    then id' : ids
-                                    else id' : id'' : ids,
-              stateHeaders = insert' header' id' $ stateHeaders st }
-       return (id'',classes,kvs)
-     else do
-        unless (null ident) $
-          updateState $ \st -> st{
-               stateHeaders = insert' header' ident $ stateHeaders st }
-        return (ident,classes,kvs)
-
 atxHeader :: MarkdownParser (F Blocks)
 atxHeader = try $ do
   level <- many1 (char '#') >>= return . length
@@ -504,7 +478,7 @@ atxHeader = try $ do
   skipSpaces
   text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline)
   attr <- atxClosing
-  attr' <- addToHeaderList attr text
+  attr' <- registerHeader attr (runF text defaultParserState)
   return $ B.headerWith attr' level <$> text
 
 atxClosing :: MarkdownParser Attr
@@ -543,7 +517,7 @@ setextHeader = try $ do
   many (char underlineChar)
   blanklines
   let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
-  attr' <- addToHeaderList attr text
+  attr' <- registerHeader attr (runF text defaultParserState)
   return $ B.headerWith attr' level <$> text
 
 --
-- 
cgit v1.2.3


From 9282f632786e85c7a31f974f20162214c5387c00 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 1 Sep 2013 09:13:31 -0700
Subject: Use registerHeader in RST and LaTeX readers.

This will give automatic unique identifiers, unless
`-auto_identifiers` is specified.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 6 +++---
 src/Text/Pandoc/Readers/RST.hs   | 6 ++++--
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index e91ea1e82..ff5b73348 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -318,9 +318,9 @@ section (ident, classes, kvs) lvl = do
   let lvl' = if hasChapters then lvl + 1 else lvl
   skipopts
   contents <- grouped inline
-  lab <- option ident $ try $ spaces >> controlSeq "label" >>
-           spaces >> braced
-  return $ headerWith (lab, classes, kvs) lvl' contents
+  lab <- option ident $ try (spaces >> controlSeq "label" >> spaces >> braced)
+  attr' <- registerHeader (lab, classes, kvs) contents
+  return $ headerWith attr' lvl' contents
 
 inlineCommand :: LP Inlines
 inlineCommand = try $ do
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index df0a8294d..32893128a 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -275,7 +275,8 @@ doubleHeader = try $ do
         Just ind -> (headerTable, ind + 1)
         Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
   setState (state { stateHeaderTable = headerTable' })
-  return $ B.header level txt
+  attr <- registerHeader nullAttr txt
+  return $ B.headerWith attr level txt
 
 -- a header with line on the bottom only
 singleHeader :: RSTParser Blocks
@@ -295,7 +296,8 @@ singleHeader = try $ do
         Just ind -> (headerTable, ind + 1)
         Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
   setState (state { stateHeaderTable = headerTable' })
-  return $ B.header level txt
+  attr <- registerHeader nullAttr txt
+  return $ B.headerWith attr level txt
 
 --
 -- hrule block
-- 
cgit v1.2.3


From 90c49b0aaed34ef1efb8e342d80f93cb477512a7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 1 Sep 2013 09:22:55 -0700
Subject: Use registerHeader in Textile reader.

This produces automatic header identifiers, unless `auto_identifiers`
extension is disabled.

Closes #967.
---
 src/Text/Pandoc/Readers/Textile.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 8ccd1e227..23e07f621 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -52,6 +52,7 @@ TODO : refactor common patterns across readers :
 module Text.Pandoc.Readers.Textile ( readTextile) where
 
 import Text.Pandoc.Definition
+import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Shared
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
@@ -179,7 +180,8 @@ header = try $ do
   char '.'
   whitespace
   name <- normalizeSpaces <$> manyTill inline blockBreak
-  return $ Header level attr name
+  attr' <- registerHeader attr (B.fromList name)
+  return $ Header level attr' name
 
 -- | Blockquote of the form "bq. content"
 blockQuote :: Parser [Char] ParserState Block
-- 
cgit v1.2.3


From 8b0052ba5b0578814a5aca14a0e02874a10cf947 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 1 Sep 2013 15:05:51 -0700
Subject: Mathjax in HTML slide shows:  include explicit "Typeset" instruction.

This seems to be needed for some formats (e.g. slideous) and won't
hurt in others.

Closes #966.
---
 src/Text/Pandoc/Writers/HTML.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 25079574e..63b466af3 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -143,7 +143,8 @@ pandocToHtml opts (Pandoc meta blocks) = do
                            MathJax url ->
                               H.script ! A.src (toValue url)
                                        ! A.type_ "text/javascript"
-                                       $ mempty
+                                       $ preEscapedString
+                                         "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);"
                            JsMath (Just url) ->
                               H.script ! A.src (toValue url)
                                        ! A.type_ "text/javascript"
-- 
cgit v1.2.3


From 9b0b9b6e03c05ca81ff3cf52787a30ea00cb3a76 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 1 Sep 2013 15:18:56 -0700
Subject: Markdown reader: Don't autolink a bare URI that is followed by
 `</a>`.

Closes #937.
---
 src/Text/Pandoc/Readers/Markdown.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 267b30032..9b98cbc3e 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1662,6 +1662,7 @@ bareURL :: MarkdownParser (F Inlines)
 bareURL = try $ do
   guardEnabled Ext_autolink_bare_uris
   (orig, src) <- uri <|> emailAddress
+  notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
   return $ return $ B.link src "" (B.str orig)
 
 autoLink :: MarkdownParser (F Inlines)
-- 
cgit v1.2.3


From 728e47ae15252619444a9ee91f2ceeecd4f3cf98 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 6 Sep 2013 15:40:08 -0700
Subject: MediaWiki reader:  Allow Image: for images.

Closes #971.
---
 src/Text/Pandoc/Readers/MediaWiki.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 8f1ff2776..2b938cd82 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -523,7 +523,7 @@ endline = () <$ try (newline <*
 image :: MWParser Inlines
 image = try $ do
   sym "[["
-  sym "File:"
+  sym "File:" <|> sym "Image:"
   fname <- many1 (noneOf "|]")
   _ <- many (try $ char '|' *> imageOption)
   caption <-   (B.str fname <$ sym "]]")
-- 
cgit v1.2.3


From 8d43e08ce7be8673cc399b948d29386f525e9e1f Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Fri, 6 Sep 2013 22:26:18 -0700
Subject: Markdown writer: Fixed bugs in YAML header output.

---
 src/Text/Pandoc/Writers/Markdown.hs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index d617954dd..23e730bf0 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -39,7 +39,7 @@ import Text.Pandoc.Writers.Shared
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (blankline, char, space)
 import Data.List ( group, isPrefixOf, find, intersperse, transpose, sortBy )
-import Data.Char ( isSpace )
+import Data.Char ( isSpace, isPunctuation )
 import Data.Ord ( comparing )
 import Text.Pandoc.Pretty
 import Control.Monad.State
@@ -143,7 +143,7 @@ jsonToYaml (Object hashmap) =
                  | otherwise      -> (k' <> ":") $$ x
                (k', Object _, x)  -> (k' <> ":") $$ nest 2 x
                (_, String "", _)  -> empty
-               (k', _, x)         -> k' <> ":" <> space <> x)
+               (k', _, x)         -> k' <> ":" <> space <> hang 2 "" x)
        $ sortBy (comparing fst) $ H.toList hashmap
 jsonToYaml (Array vec) =
   vcat $ map (\v -> hang 2 "- " (jsonToYaml v)) $ V.toList vec
@@ -151,7 +151,7 @@ jsonToYaml (String "") = empty
 jsonToYaml (String s) =
   case T.unpack s of
      x | '\n' `elem` x -> hang 2 ("|" <> cr) $ text x
-       | not (any (`elem` x) "\"'#:[]{}?-") -> text x
+       | not (any isPunctuation x) -> text x
        | otherwise     -> text $ "'" ++ substitute "'" "''" x ++ "'"
 jsonToYaml (Bool b) = text $ show b
 jsonToYaml (Number n) = text $ show n
-- 
cgit v1.2.3


From 5afd373ae45f525ff1eff5e54c1850fe2c614b4b Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 7 Sep 2013 09:36:37 -0700
Subject: Added `lists_without_preceding_blankline` extension.

* Added `Ext_lists_without_preceding_blankline` to
  `Extension` in `Options`.  Added this option to
  `githubMarkdownExtensions`.
* Made markdown reader sensitive to this.
* Closes #972.
---
 src/Text/Pandoc/Options.hs          | 2 ++
 src/Text/Pandoc/Readers/Markdown.hs | 1 +
 2 files changed, 3 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 48e418ab2..5f65abdde 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -80,6 +80,7 @@ data Extension =
     | Ext_link_attributes     -- ^ MMD style reference link attributes
     | Ext_autolink_bare_uris  -- ^ Make all absolute URIs into links
     | Ext_fancy_lists         -- ^ Enable fancy list numbers and delimiters
+    | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank
     | Ext_startnum            -- ^ Make start number of ordered list significant
     | Ext_definition_lists    -- ^ Definition lists as in pandoc, mmd, php
     | Ext_example_lists       -- ^ Markdown-style numbered examples
@@ -169,6 +170,7 @@ githubMarkdownExtensions = Set.fromList
   , Ext_intraword_underscores
   , Ext_strikeout
   , Ext_hard_line_breaks
+  , Ext_lists_without_preceding_blankline
   ]
 
 multimarkdownExtensions :: Set Extension
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 9b98cbc3e..2ca0d312a 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1559,6 +1559,7 @@ endline :: MarkdownParser (F Inlines)
 endline = try $ do
   newline
   notFollowedBy blankline
+  guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
   guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
   guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header
   -- parse potential list-starts differently if in a list:
-- 
cgit v1.2.3


From 56f56e5e1594ef5d18326d1eb6de3176db307c6a Mon Sep 17 00:00:00 2001
From: Merijn Verstraaten <merijn@inconsistent.nl>
Date: Sat, 7 Sep 2013 18:58:16 +0100
Subject: Added support for LaTeX style literate Haskell code blocks in rST.

---
 src/Text/Pandoc/Readers/RST.hs | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 32893128a..c12a1493a 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -347,14 +347,25 @@ lhsCodeBlock = try $ do
   getPosition >>= guard . (==1) . sourceColumn
   guardEnabled Ext_literate_haskell
   optional codeBlockStart
-  lns <- many1 birdTrackLine
-  -- if (as is normal) there is always a space after >, drop it
-  let lns' = if all (\ln -> null ln || take 1 ln == " ") lns
-                then map (drop 1) lns
-                else lns
+  lns <- latexCodeBlock <|> birdCodeBlock
   blanklines
   return $ B.codeBlockWith ("", ["sourceCode", "literate", "haskell"], [])
-         $ intercalate "\n" lns'
+         $ intercalate "\n" lns
+
+latexCodeBlock :: Parser [Char] st [[Char]]
+latexCodeBlock = try $ do
+  try (latexBlockLine "\\begin{code}")
+  many1Till anyLine (try $ latexBlockLine "\\end{code}")
+ where
+  latexBlockLine s = skipMany spaceChar >> string s >> blankline
+
+birdCodeBlock :: Parser [Char] st [[Char]]
+birdCodeBlock = filterSpace <$> many1 birdTrackLine
+  where filterSpace lns =
+            -- if (as is normal) there is always a space after >, drop it
+            if all (\ln -> null ln || take 1 ln == " ") lns
+               then map (drop 1) lns
+               else lns
 
 birdTrackLine :: Parser [Char] st [Char]
 birdTrackLine = char '>' >> anyLine
-- 
cgit v1.2.3


From 2c13b6f6dc4f55b76861991dea318e3566cec9a2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 7 Sep 2013 22:43:56 -0700
Subject: MedaWiki reader:  Implement some mathjax extensions.

* `:<math>` for display math
* `\(..\)` for inline math
* `\[..\]` for display math

We omit the `$` forms as the heuristics are harder.
---
 src/Text/Pandoc/Readers/MediaWiki.hs | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 2b938cd82..0432915bc 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -91,7 +91,7 @@ nested p = do
   return res
 
 specialChars :: [Char]
-specialChars = "'[]<=&*{}|\""
+specialChars = "'[]<=&*{}|\":\\"
 
 spaceChars :: [Char]
 spaceChars = " \n\t"
@@ -380,8 +380,9 @@ defListItem = try $ do
   terms <- mconcat . intersperse B.linebreak <$> many defListTerm
   -- we allow dd with no dt, or dt with no dd
   defs  <- if B.isNull terms
-              then many1 $ listItem ':'
-              else many $ listItem ':'
+              then notFollowedBy (try $ string ":<math>") *>
+                       many1 (listItem ':')
+              else many (listItem ':')
   return (terms, defs)
 
 defListTerm  :: MWParser Inlines
@@ -462,6 +463,7 @@ inline =  whitespace
       <|> image
       <|> internalLink
       <|> externalLink
+      <|> math
       <|> inlineTag
       <|> B.singleton <$> charRef
       <|> inlineHtml
@@ -472,6 +474,16 @@ inline =  whitespace
 str :: MWParser Inlines
 str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars)
 
+math :: MWParser Inlines
+math = (B.displayMath <$> try (char ':' >> charsInTags "math"))
+   <|> (B.math <$> charsInTags "math")
+   <|> (B.displayMath <$> try (dmStart *> manyTill anyChar dmEnd))
+   <|> (B.math <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd))
+ where dmStart = string "\\["
+       dmEnd   = try (string "\\]")
+       mStart  = string "\\("
+       mEnd    = try (string "\\)")
+
 variable :: MWParser String
 variable = try $ do
   string "{{{"
@@ -495,7 +507,6 @@ inlineTag = do
        TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del"
        TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub"
        TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup"
-       TagOpen "math" _ -> B.math <$> charsInTags "math"
        TagOpen "code" _ -> B.code <$> charsInTags "code"
        TagOpen "tt" _ -> B.code <$> charsInTags "tt"
        TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask"
-- 
cgit v1.2.3


From cf2506acdc721ec27ed310cd7bdad8affb28d1e5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 8 Sep 2013 11:43:46 -0700
Subject: Markdown: Allow backtick code blocks not to be preceded by blank
 line.

Closes #975.
---
 src/Text/Pandoc/Readers/Markdown.hs | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 2ca0d312a..4a7789e17 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -872,6 +872,7 @@ para = try $ do
             newline
             (blanklines >> return mempty)
               <|> (guardDisabled Ext_blank_before_blockquote >> lookAhead blockQuote)
+              <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead codeBlockFenced)
               <|> (guardDisabled Ext_blank_before_header >> lookAhead header)
             return $ do
               result' <- result
@@ -1562,6 +1563,8 @@ endline = try $ do
   guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
   guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
   guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header
+  guardEnabled Ext_backtick_code_blocks >>
+     notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
   -- parse potential list-starts differently if in a list:
   st <- getState
   when (stateParserContext st == ListItemState) $ do
-- 
cgit v1.2.3


From 777226296b04fa37094ecb07eb33f8d3e05af036 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 8 Sep 2013 11:49:13 -0700
Subject: markdown+list_without_preceding_blankline:+Interpret text before list
 as paragraph.

---
 src/Text/Pandoc/Readers/Markdown.hs | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 4a7789e17..122db17de 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -871,9 +871,11 @@ para = try $ do
     $ try $ do
             newline
             (blanklines >> return mempty)
-              <|> (guardDisabled Ext_blank_before_blockquote >> lookAhead blockQuote)
-              <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead codeBlockFenced)
-              <|> (guardDisabled Ext_blank_before_header >> lookAhead header)
+              <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
+              <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
+              <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
+              <|> (guardEnabled Ext_lists_without_preceding_blankline >>
+                       () <$ lookAhead listStart)
             return $ do
               result' <- result
               case B.toList result' of
-- 
cgit v1.2.3


From c78557f3ca333d9ae925fdcb8a7c03199f5e47fd Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 8 Sep 2013 12:04:47 -0700
Subject: Templates:  more consistent behavior of `$for$`.

When `foo` is not a list, `$for(foo)$...$endfor$` should behave like
$if(foo)$...$endif$.  So if `foo` resolves to "", no output should
be produced.

See pandoc-templates#39.
---
 src/Text/Pandoc/Templates.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 22a44e735..7f744c7e1 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -212,7 +212,7 @@ iter var' template sep = Template $ \val -> unTemplate
            Just (Array vec) -> mconcat $ intersperse sep
                                        $ map (setVar template var')
                                        $ toList vec
-           Just x           -> setVar template var' x
+           Just x           -> cond var' (setVar template var' x) mempty
            Nothing          -> mempty) val
 
 setVar :: Template -> Variable -> Value -> Template
-- 
cgit v1.2.3


From 81e2df32c92ee95771f2613b9ad30aeaf11423e5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 8 Sep 2013 15:47:50 -0700
Subject: Made . . . for pause work in all slide show formats except slideous.

---
 src/Text/Pandoc/Writers/HTML.hs | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 63b466af3..78a3edce8 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -268,11 +268,24 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen
                 else blockToHtml opts (Header level' (id',classes,keyvals) title')
   let isSec (Sec _ _ _ _ _) = True
       isSec (Blk _)         = False
+  let isPause (Blk x) = x == Para [Str ".",Space,Str ".",Space,Str "."]
+      isPause _       = False
+  let fragmentClass = case writerSlideVariant opts of
+                           RevealJsSlides  -> "fragment"
+                           _               -> "incremental"
+  let inDiv xs = Blk (RawBlock (Format "html") ("<div class=\""
+                       ++ fragmentClass ++ "\">")) :
+                   (xs ++ [Blk (RawBlock (Format "html") "</div>")])
   innerContents <- mapM (elementToHtml slideLevel opts)
                    $ if titleSlide
                         -- title slides have no content of their own
                         then filter isSec elements
-                        else elements
+                        else if slide
+                                then case splitBy isPause elements of
+                                          []   -> []
+                                          [x]  -> x
+                                          xs   -> concatMap inDiv xs
+                                else elements
   let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts]
   let classes' = ["titleslide" | titleSlide] ++ ["slide" | slide] ++
                   ["section" | (slide || writerSectionDivs opts) &&
@@ -401,10 +414,6 @@ blockToHtml opts (Para [Image txt (s,'f':'i':'g':':':tit)]) = do
                     [nl opts, img, capt, nl opts]
               else H.div ! A.class_ "figure" $ mconcat
                     [nl opts, img, capt, nl opts]
--- . . . indicates a pause in a slideshow
-blockToHtml opts (Para [Str ".",Space,Str ".",Space,Str "."])
-  | writerSlideVariant opts == RevealJsSlides =
-  blockToHtml opts (RawBlock "html" "<div class=\"fragment\" />")
 blockToHtml opts (Para lst) = do
   contents <- inlineListToHtml opts lst
   return $ H.p contents
@@ -580,8 +589,7 @@ toListItem opts item = nl opts >> H.li item
 
 blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html
 blockListToHtml opts lst =
-  mapM (blockToHtml opts) lst >>=
-  return . mconcat . intersperse (nl opts)
+  fmap (mconcat . intersperse (nl opts)) $ mapM (blockToHtml opts) lst
 
 -- | Convert list of Pandoc inline elements to HTML.
 inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html
-- 
cgit v1.2.3


From a9f3abc653bc7c0cb320056e31bb569652e03321 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 9 Sep 2013 11:19:37 -0700
Subject: Markdown:  don't parse citation right after alphanumeric.

An `@` after an alphanumeric is probably an email address.
---
 src/Text/Pandoc/Readers/Markdown.hs | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 122db17de..9f2bc4447 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1823,6 +1823,11 @@ normalCite = try $ do
 
 citeKey :: MarkdownParser (Bool, String)
 citeKey = try $ do
+  -- make sure we're not right after an alphanumeric,
+  -- since foo@bar.baz is probably an email address
+  lastStrPos <- stateLastStrPos <$> getState
+  pos <- getPosition
+  guard $ lastStrPos /= Just pos
   suppress_author <- option False (char '-' >> return True)
   char '@'
   first <- letter
-- 
cgit v1.2.3


From 71841de0f3d02fc2c88d61ab5d29b7022090f5f1 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Wed, 11 Sep 2013 09:31:41 -0700
Subject: Mediawiki: Parse an image + caption in a para by itself as a figure.

---
 src/Text/Pandoc/Readers/MediaWiki.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 0432915bc..8b436c89f 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -43,7 +43,7 @@ import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag )
 import Text.Pandoc.XML ( fromEntities )
 import Text.Pandoc.Parsing hiding ( nested )
 import Text.Pandoc.Walk ( walk )
-import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead )
+import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead, stringify )
 import Data.Monoid (mconcat, mempty)
 import Control.Applicative ((<$>), (<*), (*>), (<$))
 import Control.Monad
@@ -539,7 +539,7 @@ image = try $ do
   _ <- many (try $ char '|' *> imageOption)
   caption <-   (B.str fname <$ sym "]]")
            <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]")))
-  return $ B.image fname "image" caption
+  return $ B.image fname ("fig:" ++ stringify caption) caption
 
 imageOption :: MWParser String
 imageOption =
-- 
cgit v1.2.3


From ca6842349e23b3f60cb2665d1c20de9951bea268 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 12 Sep 2013 09:24:25 -0700
Subject: HTML writer:  Ensure proper escaping in header metadata.

---
 src/Text/Pandoc/Writers/HTML.hs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 78a3edce8..902c8bc53 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -39,7 +39,7 @@ import Text.Pandoc.Readers.TeXMath
 import Text.Pandoc.Slides
 import Text.Pandoc.Highlighting ( highlight, styleToCss,
                                   formatHtmlInline, formatHtmlBlock )
-import Text.Pandoc.XML (fromEntities)
+import Text.Pandoc.XML (fromEntities, escapeStringForXML)
 import Network.HTTP ( urlEncode )
 import Numeric ( showHex )
 import Data.Char ( ord, toLower )
@@ -115,8 +115,9 @@ pandocToHtml opts (Pandoc meta blocks) = do
               (fmap renderHtml . blockListToHtml opts)
               (fmap renderHtml . inlineListToHtml opts)
               meta
-  let authsMeta = map stringify $ docAuthors meta
-  let dateMeta  = stringify $ docDate meta
+  let stringifyHTML = escapeStringForXML . stringify
+  let authsMeta = map stringifyHTML $ docAuthors meta
+  let dateMeta  = stringifyHTML $ docDate meta
   let slideLevel = maybe (getSlideLevel blocks) id $ writerSlideLevel opts
   let sects = hierarchicalize $
               if writerSlideVariant opts == NoSlides
@@ -168,7 +169,7 @@ pandocToHtml opts (Pandoc meta blocks) = do
                   maybe id (defField "toc" . renderHtml) toc $
                   defField "author-meta" authsMeta $
                   maybe id (defField "date-meta") (normalizeDate dateMeta) $
-                  defField "pagetitle" (stringify $ docTitle meta) $
+                  defField "pagetitle" (stringifyHTML $ docTitle meta) $
                   defField "idprefix" (writerIdentifierPrefix opts) $
                   -- these should maybe be set in pandoc.hs
                   defField "slidy-url"
-- 
cgit v1.2.3


From 37471041788f079632ec369a970a184864799c3d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 12 Sep 2013 11:23:34 -0700
Subject: Markdown writer: Print references if output is 'plain'.

---
 src/Text/Pandoc/Writers/Markdown.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 23e730bf0..a36bb8e14 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -189,7 +189,8 @@ pandocToMarkdown opts (Pandoc meta blocks) = do
   -- Strip off final 'references' header if markdown citations enabled
   let blocks' = case reverse blocks of
                      (Div (_,["references"],_) _):xs
-                       | isEnabled Ext_citations opts -> reverse xs
+                       | not isPlain && isEnabled Ext_citations opts
+                       -> reverse xs
                      _ -> blocks
   body <- blockListToMarkdown opts blocks'
   st <- get
-- 
cgit v1.2.3


From 21f1bcb2805aad0c8c3a201cf59ce068bab6ec51 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 14 Sep 2013 22:27:25 -0700
Subject: Markdown reader:  unresolved citations fall back to original text.

Not ???.

Reason:  Less surprising, especially for people using @ as
in twitter.
---
 src/Text/Pandoc/Readers/Markdown.hs | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 9f2bc4447..5456f25b5 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1774,12 +1774,11 @@ rawHtmlInline = do
 cite :: MarkdownParser (F Inlines)
 cite = do
   guardEnabled Ext_citations
-  citations <- textualCite <|> (fmap (flip B.cite unknownC) <$> normalCite)
+  citations <- textualCite
+            <|> do (cs, raw) <- withRaw normalCite
+                   return $ (flip B.cite (B.text raw)) <$> cs
   return citations
 
-unknownC :: Inlines
-unknownC = B.str "???"
-
 textualCite :: MarkdownParser (F Inlines)
 textualCite = try $ do
   (_, key) <- citeKey
@@ -1790,14 +1789,18 @@ textualCite = try $ do
                       , citationNoteNum = 0
                       , citationHash    = 0
                       }
-  mbrest <- option Nothing $ try $ spnl >> Just <$> normalCite
+  mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite
   case mbrest of
-       Just rest -> return $ (flip B.cite unknownC . (first:)) <$> rest
-       Nothing   -> (fmap (flip B.cite unknownC) <$> bareloc first) <|>
-                    return (do st <- askF
-                               return $ case M.lookup key (stateExamples st) of
-                                              Just n -> B.str (show n)
-                                              _      -> B.cite [first] unknownC)
+       Just (rest, raw) ->
+         return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:))
+               <$> rest
+       Nothing   ->
+         (do (cs, raw) <- withRaw $ bareloc first
+             return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw)) <$> cs)
+         <|> return (do st <- askF
+                        return $ case M.lookup key (stateExamples st) of
+                                 Just n -> B.str (show n)
+                                 _      -> B.cite [first] $ B.str $ '@':key)
 
 bareloc :: Citation -> MarkdownParser (F [Citation])
 bareloc c = try $ do
-- 
cgit v1.2.3


From 464b174d0f17c5a9f38d724523dcaf4e8a6b07fe Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Wed, 18 Sep 2013 09:13:37 -0700
Subject: Fixed reference slides.

The Div container around references messed up the procedure
for carving a document into slides.  So we now remove the surrounding
Div in prepSlides.
---
 src/Text/Pandoc/Slides.hs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs
index 2bbdb120f..db4aa2509 100644
--- a/src/Text/Pandoc/Slides.hs
+++ b/src/Text/Pandoc/Slides.hs
@@ -46,13 +46,18 @@ getSlideLevel = go 6
 
 -- | Prepare a block list to be passed to hierarchicalize.
 prepSlides :: Int -> [Block] -> [Block]
-prepSlides slideLevel = ensureStartWithH . splitHrule
+prepSlides slideLevel = ensureStartWithH . splitHrule . extractRefsHeader
   where splitHrule (HorizontalRule : Header n attr xs : ys)
                        | n == slideLevel = Header slideLevel attr xs : splitHrule ys
         splitHrule (HorizontalRule : xs) = Header slideLevel nullAttr [Str "\0"] :
                                            splitHrule xs
         splitHrule (x : xs)              = x : splitHrule xs
         splitHrule []                    = []
+        extractRefsHeader bs             =
+          case reverse bs of
+               (Div (_,["references"],_) (Header n attrs xs : ys) : zs)
+                 -> reverse zs ++ (Header n attrs xs : ys)
+               _ -> bs
         ensureStartWithH bs@(Header n _ _:_)
                        | n <= slideLevel = bs
         ensureStartWithH bs              = Header slideLevel nullAttr [Str "\0"] : bs
-- 
cgit v1.2.3


From d27e5a6ff002d575004bdb7abaebdc9c50e02b50 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 19 Sep 2013 09:48:02 -0700
Subject: DOCX writer: Add  missing settings.xml to the zip container.

Closes #990.
---
 src/Text/Pandoc/Writers/Docx.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index c8673ae48..1214e7f8b 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -257,6 +257,7 @@ writeDocx opts doc@(Pandoc meta _) = do
   docPropsAppEntry <- entryFromArchive "docProps/app.xml"
   themeEntry <- entryFromArchive "word/theme/theme1.xml"
   fontTableEntry <- entryFromArchive "word/fontTable.xml"
+  settingsEntry <- entryFromArchive "word/settings.xml"
   webSettingsEntry <- entryFromArchive "word/webSettings.xml"
 
   -- Create archive
@@ -264,7 +265,8 @@ writeDocx opts doc@(Pandoc meta _) = do
                   contentTypesEntry : relsEntry : contentEntry : relEntry :
                   footnoteRelEntry : numEntry : styleEntry : footnotesEntry :
                   docPropsEntry : docPropsAppEntry : themeEntry :
-                  fontTableEntry : webSettingsEntry : imageEntries
+                  fontTableEntry : settingsEntry : webSettingsEntry :
+                  imageEntries
   return $ fromArchive archive
 
 styleToOpenXml :: Style -> [Element]
-- 
cgit v1.2.3


From e135955b1e37b4bee72ffc6d7f4dc60e99dcecae Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 19 Sep 2013 10:08:49 -0700
Subject: LaTeX writer: Don't print biblio if --natbib or --biblatex option
 used.

---
 src/Text/Pandoc/Writers/LaTeX.hs | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 37ca60ce3..8b05cfb43 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -82,10 +82,17 @@ writeLaTeX options document =
 
 pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState String
 pandocToLaTeX options (Pandoc meta blocks) = do
+  -- Strip off final 'references' header if --natbib or --biblatex
+  let method = writerCiteMethod options
+  let blocks' = if method == Biblatex || method == Natbib
+                   then case reverse blocks of
+                             (Div (_,["references"],_) _):xs -> reverse xs
+                             _ -> blocks
+                   else blocks
   -- see if there are internal links
   let isInternalLink (Link _ ('#':xs,_))  = [xs]
       isInternalLink _                    = []
-  modify $ \s -> s{ stInternalLinks = query isInternalLink blocks }
+  modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' }
   let template = writerTemplate options
   -- set stBook depending on documentclass
   let bookClasses = ["memoir","book","report","scrreprt","scrbook"]
@@ -107,15 +114,15 @@ pandocToLaTeX options (Pandoc meta blocks) = do
               (fmap (render colwidth) . blockListToLaTeX)
               (fmap (render colwidth) . inlineListToLaTeX)
               meta
-  let (blocks', lastHeader) = if writerCiteMethod options == Citeproc then
-                                (blocks, [])
-                              else case last blocks of
-                                Header 1 _ il -> (init blocks, il)
-                                _             -> (blocks, [])
-  blocks'' <- if writerBeamer options
-                 then toSlides blocks'
-                 else return blocks'
-  body <- mapM (elementToLaTeX options) $ hierarchicalize blocks''
+  let (blocks'', lastHeader) = if writerCiteMethod options == Citeproc then
+                                 (blocks', [])
+                               else case last blocks' of
+                                 Header 1 _ il -> (init blocks', il)
+                                 _             -> (blocks', [])
+  blocks''' <- if writerBeamer options
+                  then toSlides blocks''
+                  else return blocks''
+  body <- mapM (elementToLaTeX options) $ hierarchicalize blocks'''
   (biblioTitle :: String) <- liftM (render colwidth) $ inlineListToLaTeX lastHeader
   let main = render colwidth $ vsep body
   st <- get
-- 
cgit v1.2.3


From 255037a0912c5cc819985f4224057659f7af50fa Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 19 Sep 2013 10:09:32 -0700
Subject: Markdown reader: small code improvement.

---
 src/Text/Pandoc/Writers/Markdown.hs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index a36bb8e14..69ca05216 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -187,11 +187,11 @@ pandocToMarkdown opts (Pandoc meta blocks) = do
                then tableOfContents opts headerBlocks
                else empty
   -- Strip off final 'references' header if markdown citations enabled
-  let blocks' = case reverse blocks of
-                     (Div (_,["references"],_) _):xs
-                       | not isPlain && isEnabled Ext_citations opts
-                       -> reverse xs
-                     _ -> blocks
+  let blocks' = if not isPlain && isEnabled Ext_citations opts
+                   then case reverse blocks of
+                             (Div (_,["references"],_) _):xs -> reverse xs
+                             _ -> blocks
+                   else blocks
   body <- blockListToMarkdown opts blocks'
   st <- get
   notes' <- notesToMarkdown opts (reverse $ stNotes st)
-- 
cgit v1.2.3


From e149d4e138fdf42df07ff8400a4748b6f7bde150 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?V=C3=A1clav=20Zeman?= <vhaisman@gmail.com>
Date: Wed, 25 Sep 2013 01:18:39 +0200
Subject: src/Text/Pandoc/Writers/OpenDocument.hs: Fix formatting of strikeout
 code.

---
 src/Text/Pandoc/Writers/OpenDocument.hs | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 3ec5c2073..0f9044601 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -192,8 +192,15 @@ writeOpenDocument opts (Pandoc meta blocks) =
       listStyles  = map listStyle (stListStyles s)
       automaticStyles = inTagsIndented "office:automatic-styles" $ vcat $
                           reverse $ styles ++ listStyles
+      fontFaceDecls = inTagsIndented "office:font-face-decls" $ vcat $
+                      [selfClosingTag "style:font-face" [
+                          ("style:name", "Courier New")
+                          , ("style:font-family-generic", "modern")
+                          , ("style:font-pitch", "fixed")
+                          , ("svg:font-family", "'Courier New'")]]
       context = defField "body" body
               $ defField "automatic-styles" (render' automaticStyles)
+              $ defField "font-face-decls" (render' fontFaceDecls)
               $ metadata
   in  if writerStandalone opts
          then renderTemplate' (writerTemplate opts) context
@@ -373,18 +380,18 @@ inlineToOpenDocument o ils
     | Subscript   l <- ils = withTextStyle Sub    $ inlinesToOpenDocument o l
     | SmallCaps   l <- ils = withTextStyle SmallC $ inlinesToOpenDocument o l
     | Quoted    t l <- ils = inQuotes t <$> inlinesToOpenDocument o l
-    | Code      _ s <- ils = preformatted s
+    | Code      _ s <- ils = withTextStyle Pre $ inTextStyle $ preformatted s
     | Math      _ s <- ils = inlinesToOpenDocument o (readTeXMath s)
     | Cite      _ l <- ils = inlinesToOpenDocument o l
     | RawInline f s <- ils = if f == "opendocument" || f == "html"
-                                then preformatted s
+                                then withTextStyle Pre $ inTextStyle $ preformatted s
                                 else return empty
     | Link  l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l
     | Image _ (s,t) <- ils = return $ mkImg  s t
     | Note        l <- ils = mkNote l
     | otherwise            = return empty
     where
-      preformatted = return . inSpanTags "Teletype" . handleSpaces . escapeStringForXML
+      preformatted s = handleSpaces $ escapeStringForXML s
       mkLink   s t = inTags False "text:a" [ ("xlink:type" , "simple")
                                            , ("xlink:href" , s       )
                                            , ("office:name", t       )
@@ -524,7 +531,8 @@ paraTableStyles t s (a:xs)
                      [ ("fo:text-align", x)
                      , ("style:justify-single-word", "false")]
 
-data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC deriving ( Eq,Ord )
+data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre 
+               deriving ( Eq,Ord )
 
 textStyleAttr :: TextStyle -> [(String,String)]
 textStyleAttr s
@@ -538,5 +546,8 @@ textStyleAttr s
     | Sub    <- s = [("style:text-position"          ,"sub 58%"   )]
     | Sup    <- s = [("style:text-position"          ,"super 58%" )]
     | SmallC <- s = [("fo:font-variant"              ,"small-caps")]
+    | Pre    <- s = [("style:font-name"              ,"Courier New")
+                    ,("style:font-name-asian"        ,"Courier New")
+                    ,("style:font-name-complex"      ,"Courier New")]
     | otherwise   = []
 
-- 
cgit v1.2.3


From d76a6e23720f4acb292d3384ee020dfb072a120c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 24 Sep 2013 18:41:19 -0700
Subject: OpenDocument writer:  don't use font-face-decls variable.

---
 src/Text/Pandoc/Writers/OpenDocument.hs | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 0f9044601..206be7133 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -192,15 +192,8 @@ writeOpenDocument opts (Pandoc meta blocks) =
       listStyles  = map listStyle (stListStyles s)
       automaticStyles = inTagsIndented "office:automatic-styles" $ vcat $
                           reverse $ styles ++ listStyles
-      fontFaceDecls = inTagsIndented "office:font-face-decls" $ vcat $
-                      [selfClosingTag "style:font-face" [
-                          ("style:name", "Courier New")
-                          , ("style:font-family-generic", "modern")
-                          , ("style:font-pitch", "fixed")
-                          , ("svg:font-family", "'Courier New'")]]
       context = defField "body" body
               $ defField "automatic-styles" (render' automaticStyles)
-              $ defField "font-face-decls" (render' fontFaceDecls)
               $ metadata
   in  if writerStandalone opts
          then renderTemplate' (writerTemplate opts) context
-- 
cgit v1.2.3


From 9e7072cf1bf9d2e786a9e49ae144b0e625f66c87 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 28 Sep 2013 11:53:19 -0700
Subject: LaTeX reader:  Parse {groups} as Span.

This is needed for accurate conversion of bibtex titles,
since we need to know what was protected from titlecase conversions.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index ff5b73348..cf5119345 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -176,7 +176,7 @@ inline = (mempty <$ comment)
      <|> (space  <$ sp)
      <|> inlineText
      <|> inlineCommand
-     <|> grouped inline
+     <|> inlineGroup
      <|> (char '-' *> option (str "-")
            ((char '-') *> option (str "–") (str "—" <$ char '-')))
      <|> double_quote
@@ -199,6 +199,15 @@ inline = (mempty <$ comment)
 inlines :: LP Inlines
 inlines = mconcat <$> many (notFollowedBy (char '}') *> inline)
 
+inlineGroup :: LP Inlines
+inlineGroup = do
+  ils <- grouped inline
+  if isNull ils
+     then return mempty
+     else return $ spanWith nullAttr ils
+          -- we need the span so we can detitlecase bibtex entries;
+          -- we need to know when something is {C}apitalized
+
 block :: LP Blocks
 block = (mempty <$ comment)
     <|> (mempty <$ ((spaceChar <|> newline) *> spaces))
-- 
cgit v1.2.3


From dbd4aee7305ed82c9daf33a59fd0c29d3e3461d6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 6 Oct 2013 17:21:33 -0700
Subject: Removed code that forces MathJax to typeset.

Closes #1012.  Reopens #966.  A better solution for #966 will just
affect slideous, not the other slide writers.
---
 src/Text/Pandoc/Writers/HTML.hs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 902c8bc53..f6775b13a 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -144,8 +144,7 @@ pandocToHtml opts (Pandoc meta blocks) = do
                            MathJax url ->
                               H.script ! A.src (toValue url)
                                        ! A.type_ "text/javascript"
-                                       $ preEscapedString
-                                         "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);"
+                                       $ mempty
                            JsMath (Just url) ->
                               H.script ! A.src (toValue url)
                                        ! A.type_ "text/javascript"
-- 
cgit v1.2.3


From 25e43d1c8944f793b7c22fad207a94a11d93365d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 11 Oct 2013 10:43:07 -0700
Subject: LaTeX reader: Fixed character escaping in \url{}.

Previously `\~` wasn't handled properly, among others.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index cf5119345..d22430eb9 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -527,9 +527,7 @@ inNote ils =
 
 unescapeURL :: String -> String
 unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
-  where isEscapable '%' = True
-        isEscapable '#' = True
-        isEscapable _   = False
+  where isEscapable c = c `elem` "#$%&~_^\\{}"
 unescapeURL (x:xs) = x:unescapeURL xs
 unescapeURL [] = ""
 
-- 
cgit v1.2.3


From de10b1653e0624d91bc9b0b96d2f84c4673c6d98 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Fri, 11 Oct 2013 22:01:58 -0700
Subject: RST writer:  Skip spaces after display math.

Otherwise we get indentation problems, and part of the next
paragraph may be rendered as part of the math.
---
 src/Text/Pandoc/Writers/RST.hs | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 70c6b4421..dd2c3186c 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -296,8 +296,14 @@ blockListToRST blocks = mapM blockToRST blocks >>= return . vcat
 
 -- | Convert list of Pandoc inline elements to RST.
 inlineListToRST :: [Inline] -> State WriterState Doc
-inlineListToRST lst = mapM inlineToRST (insertBS lst) >>= return . hcat
-  where insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed
+inlineListToRST lst =
+  mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>= return . hcat
+  where -- remove spaces after displaymath, as they screw up indentation:
+        removeSpaceAfterDisplayMath (Math DisplayMath x : zs) =
+              Math DisplayMath x : dropWhile (==Space) zs
+        removeSpaceAfterDisplayMath (x:xs) = x : removeSpaceAfterDisplayMath xs
+        removeSpaceAfterDisplayMath [] = []
+        insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed
         insertBS (x:y:z:zs)
           | isComplex y && surroundComplex x z =
              x : y : RawInline "rst" "\\ " : insertBS (z:zs)
-- 
cgit v1.2.3


From 1a55c8f5de357f36885e0ac5c50cdc8b6cafc211 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Fri, 11 Oct 2013 22:43:47 -0700
Subject: LaTeX reader:  Ensure that preamble doesn't contribute to text of
 doc.

---
 src/Text/Pandoc/Readers/LaTeX.hs | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index d22430eb9..762150dba 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1037,14 +1037,14 @@ paragraph = do
 preamble :: LP Blocks
 preamble = mempty <$> manyTill preambleBlock beginDoc
   where beginDoc = lookAhead $ controlSeq "begin" *> string "{document}"
-        preambleBlock =  (mempty <$ comment)
-                     <|> (mempty <$ sp)
-                     <|> (mempty <$ blanklines)
-                     <|> (mempty <$ macro)
-                     <|> blockCommand
-                     <|> (mempty <$ anyControlSeq)
-                     <|> (mempty <$ braced)
-                     <|> (mempty <$ anyChar)
+        preambleBlock =  (void comment)
+                     <|> (void sp)
+                     <|> (void blanklines)
+                     <|> (void macro)
+                     <|> (void blockCommand)
+                     <|> (void anyControlSeq)
+                     <|> (void braced)
+                     <|> (void anyChar)
 
 -------
 
-- 
cgit v1.2.3


From 2ae7f5e2a0a741fa4822448ad378280f77ab0dd5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 13 Oct 2013 11:31:33 -0700
Subject: HTML writer:  Insert command to typeset mathjax for slideous output.

Closes #966.
---
 src/Text/Pandoc/Writers/HTML.hs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index f6775b13a..cee07cff5 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -144,7 +144,11 @@ pandocToHtml opts (Pandoc meta blocks) = do
                            MathJax url ->
                               H.script ! A.src (toValue url)
                                        ! A.type_ "text/javascript"
-                                       $ mempty
+                                       $ case writerSlideVariant opts of
+                                              SlideousSlides ->
+                                                 preEscapedString
+                                                 "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);"
+                                              _ -> mempty
                            JsMath (Just url) ->
                               H.script ! A.src (toValue url)
                                        ! A.type_ "text/javascript"
-- 
cgit v1.2.3


From 0df7cce37da162c656aa88ecb67788109749668c Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 13 Oct 2013 15:36:19 -0700
Subject: Treat div with class "notes" as speaker notes in slide formats.

Currently beamer goes to `\note{}`, revealjs to `<aside class="notes">`,
and the notes are simply suppressed in other formats.

Closes #925.
---
 src/Text/Pandoc/Writers/HTML.hs  | 11 +++++++++--
 src/Text/Pandoc/Writers/LaTeX.hs |  7 ++++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index cee07cff5..22f5b8074 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -421,9 +421,16 @@ blockToHtml opts (Para [Image txt (s,'f':'i':'g':':':tit)]) = do
 blockToHtml opts (Para lst) = do
   contents <- inlineListToHtml opts lst
   return $ H.p contents
-blockToHtml opts (Div attr bs) = do
+blockToHtml opts (Div attr@(_,classes,_) bs) = do
   contents <- blockListToHtml opts bs
-  return $ addAttrs opts attr $ H.div $ nl opts >> contents >> nl opts
+  let contents' = nl opts >> contents >> nl opts
+  return $
+     if "notes" `elem` classes
+        then case writerSlideVariant opts of
+                  RevealJsSlides -> addAttrs opts attr $ H5.aside $ contents'
+                  NoSlides       -> addAttrs opts attr $ H.div $ contents'
+                  _              -> mempty
+        else addAttrs opts attr $ H.div $ contents'
 blockToHtml _ (RawBlock f str)
   | f == Format "html" = return $ preEscapedString str
   | otherwise          = return mempty
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 8b05cfb43..d31e33a3a 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -285,7 +285,12 @@ isLineBreakOrSpace _ = False
 blockToLaTeX :: Block     -- ^ Block to convert
              -> State WriterState Doc
 blockToLaTeX Null = return empty
-blockToLaTeX (Div _ bs) = blockListToLaTeX bs
+blockToLaTeX (Div (_,classes,_) bs) = do
+  beamer <- writerBeamer `fmap` gets stOptions
+  contents <- blockListToLaTeX bs
+  if beamer && "notes" `elem` classes  -- speaker notes
+     then return $ "\\note" <> braces contents
+     else return contents
 blockToLaTeX (Plain lst) =
   inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst
 -- title beginning with fig: indicates that the image is a figure
-- 
cgit v1.2.3


From 386e933432f5badf3149da1e6edaeb2e693bbb9b Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Wed, 16 Oct 2013 09:48:11 -0700
Subject: Use isURI instead of isAbsoluteURI.

It allows fragments identifiers.
---
 src/Text/Pandoc/SelfContained.hs    | 6 +++---
 src/Text/Pandoc/Shared.hs           | 4 ++--
 src/Text/Pandoc/Writers/LaTeX.hs    | 4 ++--
 src/Text/Pandoc/Writers/Markdown.hs | 4 ++--
 src/Text/Pandoc/Writers/RST.hs      | 4 ++--
 src/Text/Pandoc/Writers/RTF.hs      | 4 ++--
 src/Text/Pandoc/Writers/Texinfo.hs  | 4 ++--
 7 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 0547bc065..6112e764f 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -32,7 +32,7 @@ the HTML using data URIs.
 -}
 module Text.Pandoc.SelfContained ( makeSelfContained ) where
 import Text.HTML.TagSoup
-import Network.URI (isAbsoluteURI, escapeURIString)
+import Network.URI (isURI, escapeURIString)
 import Data.ByteString.Base64
 import qualified Data.ByteString.Char8 as B
 import Data.ByteString (ByteString)
@@ -86,7 +86,7 @@ cssURLs userdata d orig =
                                  "\"" -> B.takeWhile (/='"') $ B.drop 1 u
                                  "'"  -> B.takeWhile (/='\'') $ B.drop 1 u
                                  _    -> u
-                  let url' = if isAbsoluteURI url
+                  let url' = if isURI url
                                 then url
                                 else d </> url
                   (raw, mime) <- getRaw userdata "" url'
@@ -97,7 +97,7 @@ cssURLs userdata d orig =
 
 getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String)
 getItem userdata f =
-  if isAbsoluteURI f
+  if isURI f
      then openURL f >>= either handleErr return
      else do
        -- strip off trailing query or fragment part, if relative URL.
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 9a9a092fc..d6ccdae66 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -91,7 +91,7 @@ import Data.Char ( toLower, isLower, isUpper, isAlpha,
                    isLetter, isDigit, isSpace )
 import Data.List ( find, isPrefixOf, intercalate )
 import qualified Data.Map as M
-import Network.URI ( escapeURIString, isAbsoluteURI, unEscapeString )
+import Network.URI ( escapeURIString, isURI, unEscapeString )
 import System.Directory
 import Text.Pandoc.MIME (getMimeType)
 import System.FilePath ( (</>), takeExtension, dropExtension )
@@ -624,7 +624,7 @@ readDataFileUTF8 userDir fname =
 fetchItem :: Maybe String -> String
           -> IO (Either E.SomeException (BS.ByteString, Maybe String))
 fetchItem sourceURL s
-  | isAbsoluteURI s = openURL s
+  | isURI s         = openURL s
   | otherwise       = case sourceURL of
                            Just u  -> openURL (u ++ "/" ++ s)
                            Nothing -> E.try readLocalFile
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index d31e33a3a..dbfa57137 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -36,7 +36,7 @@ import Text.Pandoc.Writers.Shared
 import Text.Pandoc.Options
 import Text.Pandoc.Templates
 import Text.Printf ( printf )
-import Network.URI ( isAbsoluteURI, unEscapeString )
+import Network.URI ( isURI, unEscapeString )
 import Data.List ( (\\), isSuffixOf, isInfixOf,
                    isPrefixOf, intercalate, intersperse )
 import Data.Char ( toLower, isPunctuation )
@@ -671,7 +671,7 @@ inlineToLaTeX (Link txt (src, _)) =
                          contents <> char '}'
 inlineToLaTeX (Image _ (source, _)) = do
   modify $ \s -> s{ stGraphics = True }
-  let source' = if isAbsoluteURI source
+  let source' = if isURI source
                    then source
                    else unEscapeString source
   source'' <- stringToLaTeX URLString source'
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 69ca05216..33cb110b5 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -47,7 +47,7 @@ import qualified Data.Set as Set
 import Text.Pandoc.Writers.HTML (writeHtmlString)
 import Text.Pandoc.Readers.TeXMath (readTeXMath)
 import Text.HTML.TagSoup (renderTags, parseTags, isTagText, Tag(..))
-import Network.URI (isAbsoluteURI)
+import Network.URI (isURI)
 import Data.Default
 import Data.Yaml (Value(Object,String,Array,Bool,Number))
 import qualified Data.HashMap.Strict as H
@@ -753,7 +753,7 @@ inlineToMarkdown opts (Link txt (src, tit)) = do
                      then empty
                      else text $ " \"" ++ tit ++ "\""
   let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
-  let useAuto = isAbsoluteURI src &&
+  let useAuto = isURI src &&
                 case txt of
                       [Str s] | escapeURI s == srcSuffix -> True
                       _                                  -> False
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index dd2c3186c..1a62f7250 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -38,7 +38,7 @@ import Text.Pandoc.Writers.Shared
 import Text.Pandoc.Templates (renderTemplate')
 import Text.Pandoc.Builder (deleteMeta)
 import Data.List ( isPrefixOf, intersperse, transpose )
-import Network.URI (isAbsoluteURI)
+import Network.URI (isURI)
 import Text.Pandoc.Pretty
 import Control.Monad.State
 import Control.Applicative ( (<$>) )
@@ -393,7 +393,7 @@ inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para)
 inlineToRST Space = return space
 -- autolink
 inlineToRST (Link [Str str] (src, _))
-  | isAbsoluteURI src &&
+  | isURI src &&
     if "mailto:" `isPrefixOf` src
        then src == escapeURI ("mailto:" ++ str)
        else src == escapeURI str = do
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index 0e8ce2ece..cb5fb3232 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -40,7 +40,7 @@ import Data.Char ( ord, chr, isDigit, toLower )
 import System.FilePath ( takeExtension )
 import qualified Data.ByteString as B
 import Text.Printf ( printf )
-import Network.URI ( isAbsoluteURI, unEscapeString )
+import Network.URI ( isURI, unEscapeString )
 import qualified Control.Exception as E
 
 -- | Convert Image inlines into a raw RTF embedded image, read from a file.
@@ -48,7 +48,7 @@ import qualified Control.Exception as E
 rtfEmbedImage :: Inline -> IO Inline
 rtfEmbedImage x@(Image _ (src,_)) = do
   let ext = map toLower (takeExtension src)
-  if ext `elem` [".jpg",".jpeg",".png"] && not (isAbsoluteURI src)
+  if ext `elem` [".jpg",".jpeg",".png"] && not (isURI src)
      then do
        let src' = unEscapeString src
        imgdata <- E.catch (B.readFile src')
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index b1fd3d6af..d62e50880 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -40,7 +40,7 @@ import Data.Ord ( comparing )
 import Data.Char ( chr, ord )
 import Control.Monad.State
 import Text.Pandoc.Pretty
-import Network.URI ( isAbsoluteURI, unEscapeString )
+import Network.URI ( isURI, unEscapeString )
 import System.FilePath
 
 data WriterState =
@@ -448,7 +448,7 @@ inlineToTexinfo (Image alternate (source, _)) = do
   where
     ext     = drop 1 $ takeExtension source'
     base    = dropExtension source'
-    source' = if isAbsoluteURI source
+    source' = if isURI source
                  then source
                  else unEscapeString source
 
-- 
cgit v1.2.3


From 6e1c24da8e9f4715e03fd1ff9ee64528936d8bf3 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 17 Oct 2013 13:23:38 -0700
Subject: LaTeX writer:  Add link anchors for code blocks with identifiers.

Closes #1025.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index dbfa57137..8f52b11ca 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -329,17 +329,23 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do
        | writerListings opts                            -> listingsCodeBlock
        | writerHighlight opts && not (null classes)     -> highlightedCodeBlock
        | otherwise                                      -> rawCodeBlock
-   where lhsCodeBlock = do
+   where ref = text identifier
+         linkAnchor = if null identifier
+                         then empty
+                         else "\\hyperdef{}" <> braces ref <>
+                                   braces ("\\label" <> braces ref)
+         lhsCodeBlock = do
            modify $ \s -> s{ stLHS = True }
-           return $ flush ("\\begin{code}" $$ text str $$ "\\end{code}") $$ cr
+           return $ flush (linkAnchor $$ "\\begin{code}" $$ text str $$
+                               "\\end{code}") $$ cr
          rawCodeBlock = do
            st <- get
            env <- if stInNote st
                      then modify (\s -> s{ stVerbInNote = True }) >>
                           return "Verbatim"
                      else return "verbatim"
-           return $ flush (text ("\\begin{" ++ env ++ "}") $$ text str $$
-                    text ("\\end{" ++ env ++ "}")) <> cr
+           return $ flush (linkAnchor $$ text ("\\begin{" ++ env ++ "}") $$
+                    text str $$ text ("\\end{" ++ env ++ "}")) <> cr
          listingsCodeBlock = do
            st <- get
            let params = if writerListings (stOptions st)
@@ -367,7 +373,7 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do
            case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of
                   Nothing -> rawCodeBlock
                   Just  h -> modify (\st -> st{ stHighlighting = True }) >>
-                             return (flush $ text h)
+                             return (flush $ linkAnchor $$ text h)
 blockToLaTeX (RawBlock f x)
   | f == Format "latex" || f == Format "tex"
                         = return $ text x
-- 
cgit v1.2.3


From 80c1967e751c9b58f7c1f28211d47fbd57316d32 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 17 Oct 2013 13:36:43 -0700
Subject: PDF: Minor code cleanup.

---
 src/Text/Pandoc/PDF.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index a445e2991..e8683b98f 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -107,7 +107,7 @@ tex2pdf' tmpDir program source = do
                    then 3  -- to get page numbers
                    else 2  -- 1 run won't give you PDF bookmarks
   (exit, log', mbPdf) <- runTeXProgram program numruns tmpDir source
-  let msg = "Error producing PDF from TeX source."
+  let msg = "Error producing PDF from TeX source.\n"
   case (exit, mbPdf) of
        (ExitFailure _, _)      -> do
           let logmsg = extractMsg log'
@@ -116,7 +116,7 @@ tex2pdf' tmpDir program source = do
                      x | "! Package inputenc Error" `BC.isPrefixOf` x ->
                            "\nTry running pandoc with --latex-engine=xelatex."
                      _ -> ""
-          return $ Left $ msg <> "\n" <> extractMsg log' <> extramsg
+          return $ Left $ msg <> logmsg <> extramsg
        (ExitSuccess, Nothing)  -> return $ Left msg
        (ExitSuccess, Just pdf) -> return $ Right pdf
 
-- 
cgit v1.2.3


From 1f29f4678e0a1b1272704e5ddafa0bee45e5e426 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 17 Oct 2013 22:06:39 -0700
Subject: LaTeX writer:  Specially escape non-ascii characters in labels.

Otherwise we can get compile errors and other bugs when
compiled with pdflatex.  Closes #1007.

Thanks to begemotv2718 for the fix.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 8f52b11ca..20a6ac9a9 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -39,7 +39,7 @@ import Text.Printf ( printf )
 import Network.URI ( isURI, unEscapeString )
 import Data.List ( (\\), isSuffixOf, isInfixOf,
                    isPrefixOf, intercalate, intersperse )
-import Data.Char ( toLower, isPunctuation )
+import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord )
 import Control.Applicative ((<|>))
 import Control.Monad.State
 import Text.Pandoc.Pretty
@@ -222,6 +222,13 @@ stringToLaTeX  ctx (x:xs) = do
        '\x2013' | ligatures -> "--" ++ rest
        _        -> x : rest
 
+toLabel :: String -> String
+toLabel [] = ""
+toLabel (x:xs)
+  | (isLetter x || isDigit x) && isAscii x = x:toLabel xs
+  | elem x "-+=:;." = x:toLabel xs
+  | otherwise = "ux" ++ printf "%x" (ord x) ++ toLabel xs
+
 -- | Puts contents into LaTeX command.
 inCmd :: String -> Doc -> Doc
 inCmd cmd contents = char '\\' <> text cmd <> braces contents
@@ -329,7 +336,7 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do
        | writerListings opts                            -> listingsCodeBlock
        | writerHighlight opts && not (null classes)     -> highlightedCodeBlock
        | otherwise                                      -> rawCodeBlock
-   where ref = text identifier
+   where ref = text $ toLabel identifier
          linkAnchor = if null identifier
                          then empty
                          else "\\hyperdef{}" <> braces ref <>
@@ -361,7 +368,7 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do
                                    (key,attr) <- keyvalAttr ] ++
                              (if identifier == ""
                                    then []
-                                   else [ "label=" ++ identifier ])
+                                   else [ "label=" ++ toLabel identifier ])
 
                         else []
                printParams
@@ -537,13 +544,13 @@ sectionHeader unnumbered ref level lst = do
   let refLabel x = (if ref `elem` internalLinks
                        then text "\\hyperdef"
                                 <> braces empty
-                                <> braces (text ref)
+                                <> braces (text $ toLabel ref)
                                 <> braces x
                        else x)
   let headerWith x y r = refLabel $ text x <> y <>
                              if null r
                                 then empty
-                                else text "\\label" <> braces (text r)
+                                else text "\\label" <> braces (text $ toLabel r)
   let sectionType = case level' of
                           0  | writerBeamer opts -> "part"
                              | otherwise -> "chapter"
@@ -664,7 +671,8 @@ inlineToLaTeX Space = return space
 inlineToLaTeX (Link txt ('#':ident, _)) = do
   contents <- inlineListToLaTeX txt
   ident' <- stringToLaTeX URLString ident
-  return $ text "\\hyperref" <> brackets (text ident') <> braces contents
+  return $ text "\\hyperref" <> brackets (text $ toLabel ident') <>
+              braces contents
 inlineToLaTeX (Link txt (src, _)) =
   case txt of
         [Str x] | x == src ->  -- autolink
-- 
cgit v1.2.3


From e5feed00f7edb0d79f717ec9815c57df696a992e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 18 Oct 2013 17:50:43 -0700
Subject: MediaWiki reader: Trim contents of `<math>` tags.

Otherwise we get problems when converting to markdown.

Closes #1027.
---
 src/Text/Pandoc/Readers/MediaWiki.hs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 8b436c89f..136701bd0 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -43,7 +43,7 @@ import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag )
 import Text.Pandoc.XML ( fromEntities )
 import Text.Pandoc.Parsing hiding ( nested )
 import Text.Pandoc.Walk ( walk )
-import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead, stringify )
+import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead, stringify, trim )
 import Data.Monoid (mconcat, mempty)
 import Control.Applicative ((<$>), (<*), (*>), (<$))
 import Control.Monad
@@ -475,10 +475,10 @@ str :: MWParser Inlines
 str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars)
 
 math :: MWParser Inlines
-math = (B.displayMath <$> try (char ':' >> charsInTags "math"))
-   <|> (B.math <$> charsInTags "math")
-   <|> (B.displayMath <$> try (dmStart *> manyTill anyChar dmEnd))
-   <|> (B.math <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd))
+math = (B.displayMath . trim <$> try (char ':' >> charsInTags "math"))
+   <|> (B.math . trim <$> charsInTags "math")
+   <|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd))
+   <|> (B.math . trim <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd))
  where dmStart = string "\\["
        dmEnd   = try (string "\\]")
        mStart  = string "\\("
-- 
cgit v1.2.3


From 1ce875a010e1471e06338a20c82781036d476776 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 20 Oct 2013 09:56:50 -0700
Subject: Fixed '. . .' (pause) on HTML slide formats.  Closes #1029.

The old version caused a pause to be inserted before the first
material on a slide. This has been fixed.
---
 src/Text/Pandoc/Writers/HTML.hs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 22f5b8074..8a71c3a2e 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -286,9 +286,8 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen
                         then filter isSec elements
                         else if slide
                                 then case splitBy isPause elements of
-                                          []   -> []
-                                          [x]  -> x
-                                          xs   -> concatMap inDiv xs
+                                          []     -> []
+                                          (x:xs) -> x ++ concatMap inDiv xs
                                 else elements
   let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts]
   let classes' = ["titleslide" | titleSlide] ++ ["slide" | slide] ++
-- 
cgit v1.2.3


From 75ea0c4d0dd9133db8754ff640b9d3b316170e64 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Mon, 21 Oct 2013 09:33:10 -0700
Subject: LaTeX reader:  Improved citation parsing.

This fixes a run-time error that occured with `\citet{}` (empty
list of keys).  It also ensures that empty keys don't get produced.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 762150dba..681dcb077 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -43,7 +43,7 @@ import qualified Text.Pandoc.UTF8 as UTF8
 import Data.Char ( chr, ord )
 import Control.Monad
 import Text.Pandoc.Builder
-import Data.Char (isLetter)
+import Data.Char (isLetter, isAlphaNum)
 import Control.Applicative
 import Data.Monoid
 import Data.Maybe (fromMaybe)
@@ -1065,6 +1065,7 @@ simpleCiteArgs = try $ do
   first  <- optionMaybe $ toList <$> opt
   second <- optionMaybe $ toList <$> opt
   char '{'
+  optional sp
   keys <- manyTill citationLabel (char '}')
   let (pre, suf) = case (first  , second ) of
         (Just s , Nothing) -> (mempty, s )
@@ -1080,18 +1081,24 @@ simpleCiteArgs = try $ do
   return $ addPrefix pre $ addSuffix suf $ map conv keys
 
 citationLabel :: LP String
-citationLabel  = trim <$>
-  (many1 (satisfy $ \c -> c /=',' && c /='}') <* optional (char ',') <* optional sp)
+citationLabel  = optional sp *>
+  (many1 (satisfy isBibtexKeyChar)
+          <* optional sp
+          <* optional (char ',')
+          <* optional sp)
+  where isBibtexKeyChar c = isAlphaNum c || c `elem` ".:;?!`'()/*@_+=-[]*"
 
 cites :: CitationMode -> Bool -> LP [Citation]
 cites mode multi = try $ do
   cits <- if multi
              then many1 simpleCiteArgs
              else count 1 simpleCiteArgs
-  let (c:cs) = concat cits
+  let cs = concat cits
   return $ case mode of
-        AuthorInText   -> c {citationMode = mode} : cs
-        _              -> map (\a -> a {citationMode = mode}) (c:cs)
+        AuthorInText -> case cs of
+                             (c:rest) -> c {citationMode = mode} : rest
+                             []       -> []
+        _            -> map (\a -> a {citationMode = mode}) cs
 
 citation :: String -> CitationMode -> Bool -> LP Inlines
 citation name mode multi = do
-- 
cgit v1.2.3


From 0b16b08543e87503968649e63154a629005a406f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 21 Oct 2013 12:31:07 -0700
Subject: Templates:  Changed how array variables are resolved.

Previously if `foo` is an array (which might be because multiple
values were set on the command line), `$foo$` would resolve to
the concatenation of the elements of foo.  This is rarely useful
behavior.  It has been changed so that the first value is rendered.
Of course, you can still iterate over the values using
`$for(foo)$`.

This has the result that you can override earlier settings using
-V by putting new values later on the command line.  That's useful
for many purposes.
---
 src/Text/Pandoc/Templates.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 7f744c7e1..ad8838f72 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -117,6 +117,7 @@ import Text.Blaze (preEscapedText, Html)
 #endif
 import Data.ByteString.Lazy (ByteString, fromChunks)
 import Text.Pandoc.Shared (readDataFileUTF8)
+import Data.Vector ((!?))
 
 -- | Get default template for the specified writer.
 getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first
@@ -185,7 +186,7 @@ var = Template . resolveVar
 resolveVar :: Variable -> Value -> Text
 resolveVar var' val =
   case multiLookup var' val of
-       Just (Array vec) -> mconcat $ map (resolveVar []) $ toList vec
+       Just (Array vec) -> maybe mempty (resolveVar []) $ vec !? 0
        Just (String t)  -> T.stripEnd t
        Just (Number n)  -> T.pack $ show n
        Just (Bool True) -> "true"
-- 
cgit v1.2.3


From e63aafd62004e3424da46de75c78ba4dc7562af4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 21 Oct 2013 17:33:42 -0700
Subject: Fix definition lists with internal links in terms (closes #1032).

This fix puts braces around a term that contains an internal
link, to avoid problems with square brackets.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 20a6ac9a9..72b0bde6d 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -513,8 +513,15 @@ listItemToLaTeX lst = blockListToLaTeX lst >>= return .  (text "\\item" $$) .
 defListItemToLaTeX :: ([Inline], [[Block]]) -> State WriterState Doc
 defListItemToLaTeX (term, defs) = do
     term' <- inlineListToLaTeX term
+    -- put braces around term if it contains an internal link,
+    -- since otherwise we get bad bracket interactions: \item[\hyperref[..]
+    let isInternalLink (Link _ ('#':_,_)) = True
+        isInternalLink _                  = False
+    let term'' = if any isInternalLink term
+                    then braces term'
+                    else term'
     def'  <- liftM vsep $ mapM blockListToLaTeX defs
-    return $ "\\item" <> brackets term' $$ def'
+    return $ "\\item" <> brackets term'' $$ def'
 
 -- | Craft the section header, inserting the secton reference, if supplied.
 sectionHeader :: Bool    -- True for unnumbered
-- 
cgit v1.2.3


From 416dad86dca4087f4eab21f0ac6296178a1479cf Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 22 Oct 2013 12:34:18 -0700
Subject: DocBook reader:  Handle numerical attributes starting with decimal.

Also use safeRead instead of read.
---
 src/Text/Pandoc/Readers/DocBook.hs | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 6a799e270..fc29988d5 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,5 +1,6 @@
 module Text.Pandoc.Readers.DocBook ( readDocBook ) where
-import Data.Char (toUpper, isDigit)
+import Data.Char (toUpper)
+import Text.Pandoc.Shared (safeRead)
 import Text.Pandoc.Options
 import Text.Pandoc.Definition
 import Text.Pandoc.Builder
@@ -682,10 +683,9 @@ parseBlock (Elem e) =
                                "lowerroman" -> LowerRoman
                                "upperroman" -> UpperRoman
                                _            -> Decimal
-          let start = case attrValue "override" <$>
-                            filterElement (named "listitem") e of
-                              Just x@(_:_) | all isDigit x -> read x
-                              _                            -> 1
+          let start = maybe 1 id $
+                      (attrValue "override" <$> filterElement (named "listitem") e)
+                       >>= safeRead
           orderedListWith (start,listStyle,DefaultDelim)
             <$> listitems
         "variablelist" -> definitionList <$> deflistitems
@@ -801,7 +801,8 @@ parseBlock (Elem e) =
                                                 Just "center" -> AlignCenter
                                                 _             -> AlignDefault
                       let toWidth c = case findAttr (unqual "colwidth") c of
-                                                Just w -> read $ filter (\x ->
+                                                Just w -> maybe 0 id
+                                                   $ safeRead $ '0': filter (\x ->
                                                      (x >= '0' && x <= '9')
                                                       || x == '.') w
                                                 Nothing -> 0 :: Double
-- 
cgit v1.2.3


From ac7714ca398eaf25dd512754c1cb9aee297ff3d1 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 26 Oct 2013 18:22:59 -0700
Subject: Text.Pandoc.Writer.Shared: fixed bug in tagWithAttrs.

A space was omitted before key-value attributes, leading
to invalid HTML.
---
 src/Text/Pandoc/Writers/Shared.hs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index 89923822c..9cb08803c 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -134,7 +134,6 @@ tagWithAttrs tag (ident,classes,kvs) = hsep
   ,if null classes
       then empty
       else "class=" <> doubleQuotes (text (unwords classes))
-  ]
-  <> hsep (map (\(k,v) -> text k <> "=" <>
+  ,hsep (map (\(k,v) -> text k <> "=" <>
                 doubleQuotes (text (escapeStringForXML v))) kvs)
-  <> ">"
+  ] <> ">"
-- 
cgit v1.2.3


From a6aaff102ef75bf02c9a3256a2a750abae78d0b6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 29 Oct 2013 10:40:40 -0700
Subject: Slides:  Preserve `<div class="references">` in references slide.

---
 src/Text/Pandoc/Slides.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs
index db4aa2509..50c46d17f 100644
--- a/src/Text/Pandoc/Slides.hs
+++ b/src/Text/Pandoc/Slides.hs
@@ -55,8 +55,8 @@ prepSlides slideLevel = ensureStartWithH . splitHrule . extractRefsHeader
         splitHrule []                    = []
         extractRefsHeader bs             =
           case reverse bs of
-               (Div (_,["references"],_) (Header n attrs xs : ys) : zs)
-                 -> reverse zs ++ (Header n attrs xs : ys)
+               (Div ("",["references"],[]) (Header n attrs xs : ys) : zs)
+                 -> reverse zs ++ (Header n attrs xs : [Div ("",["references"],[]) ys])
                _ -> bs
         ensureStartWithH bs@(Header n _ _:_)
                        | n <= slideLevel = bs
-- 
cgit v1.2.3


From ab0ffe6549261313410207a0b5beba9284135962 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 29 Oct 2013 10:57:48 -0700
Subject: Markdown reader:  Yaml block must start immediately after `---`.

If there's a blank line after `---`, we interpreted it as
a horizontal rule.
---
 src/Text/Pandoc/Readers/Markdown.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 5456f25b5..8f804f863 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -227,6 +227,7 @@ yamlMetaBlock = try $ do
   pos <- getPosition
   string "---"
   blankline
+  notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
   rawYamlLines <- manyTill anyLine stopLine
   -- by including --- and ..., we allow yaml blocks with just comments:
   let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
-- 
cgit v1.2.3


From 0d95c15e8316eb28128bdd4c9c2f98e29f13f564 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 1 Nov 2013 14:27:22 -0700
Subject: TexMath: Export readTeXMath', which attends to display/inline.

Deprecate readTeXMath, and use readTeXMath' in all the writers.
Require texmath >= 0.6.5.
---
 src/Text/Pandoc/Readers/TeXMath.hs      | 24 +++++++++++++++++++-----
 src/Text/Pandoc/Writers/Docbook.hs      |  4 ++--
 src/Text/Pandoc/Writers/Docx.hs         |  2 +-
 src/Text/Pandoc/Writers/HTML.hs         |  4 ++--
 src/Text/Pandoc/Writers/Man.hs          |  5 +++--
 src/Text/Pandoc/Writers/Markdown.hs     |  6 +++---
 src/Text/Pandoc/Writers/OpenDocument.hs |  2 +-
 src/Text/Pandoc/Writers/RTF.hs          |  2 +-
 8 files changed, 32 insertions(+), 17 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
index 1f7088f72..6bd617f7e 100644
--- a/src/Text/Pandoc/Readers/TeXMath.hs
+++ b/src/Text/Pandoc/Readers/TeXMath.hs
@@ -27,16 +27,30 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 Conversion of TeX math to a list of 'Pandoc' inline elements.
 -}
-module Text.Pandoc.Readers.TeXMath ( readTeXMath ) where
+module Text.Pandoc.Readers.TeXMath ( readTeXMath, readTeXMath' ) where
 
 import Text.Pandoc.Definition
 import Text.TeXMath
 
 -- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
--- Defaults to raw formula between @$@ characters if entire formula
+-- Defaults to raw formula between @$@ or @$$@ characters if entire formula
 -- can't be converted.
+readTeXMath' :: MathType
+             -> String    -- ^ String to parse (assumes @'\n'@ line endings)
+             -> [Inline]
+readTeXMath' mt inp = case texMathToPandoc dt inp of
+                           Left _    -> [Str (delim ++ inp ++ delim)]
+                           Right res -> res
+    where (dt, delim) = case mt of
+                             DisplayMath -> (DisplayBlock, "$$")
+                             InlineMath  -> (DisplayInline, "$")
+
+{-# DEPRECATED readTeXMath "Use readTeXMath' from Text.Pandoc.JSON instead" #-}
+-- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
+-- Defaults to raw formula between @$@ characters if entire formula
+-- can't be converted.  (This is provided for backwards compatibility;
+-- it is better to use @readTeXMath'@, which properly distinguishes
+-- between display and inline math.)
 readTeXMath :: String    -- ^ String to parse (assumes @'\n'@ line endings)
             -> [Inline]
-readTeXMath inp = case texMathToPandoc DisplayInline inp of
-                        Left _    -> [Str ("$" ++ inp ++ "$")]
-                        Right res -> res
+readTeXMath = readTeXMath' InlineMath
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 7c03c07dc..dad83d7bb 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -281,8 +281,8 @@ inlineToDocbook opts (Math t str)
                  $ fixNS
                  $ removeAttr r
       Left  _ -> inlinesToDocbook opts
-                 $ readTeXMath str
-  | otherwise = inlinesToDocbook opts $ readTeXMath str
+                 $ readTeXMath' t str
+  | otherwise = inlinesToDocbook opts $ readTeXMath' t str
      where (dt, tagtype) = case t of
                             InlineMath  -> (DisplayInline,"inlineequation")
                             DisplayMath -> (DisplayBlock,"informalequation")
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 1214e7f8b..0fdea0a7a 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -669,7 +669,7 @@ inlineToOpenXML opts (Math mathType str) = do
                        else DisplayInline
   case texMathToOMML displayType str of
         Right r -> return [r]
-        Left  _ -> inlinesToOpenXML opts (readTeXMath str)
+        Left  _ -> inlinesToOpenXML opts (readTeXMath' mathType str)
 inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst
 inlineToOpenXML opts (Code attrs str) =
   withTextProp (rStyle "VerbatimChar")
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 8a71c3a2e..c1cca291b 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -685,14 +685,14 @@ inlineToHtml opts inline =
                                         Right r -> return $ preEscapedString $
                                                     ppcElement conf r
                                         Left  _ -> inlineListToHtml opts
-                                                   (readTeXMath str) >>= return .
+                                                   (readTeXMath' t str) >>= return .
                                                      (H.span ! A.class_ "math")
                                MathJax _ -> return $ H.span ! A.class_ "math" $ toHtml $
                                   case t of
                                     InlineMath  -> "\\(" ++ str ++ "\\)"
                                     DisplayMath -> "\\[" ++ str ++ "\\]"
                                PlainMath -> do
-                                  x <- inlineListToHtml opts (readTeXMath str)
+                                  x <- inlineListToHtml opts (readTeXMath' t str)
                                   let m = H.span ! A.class_ "math" $ x
                                   let brtag = if writerHtml5 opts then H5.br else H.br
                                   return  $ case t of
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 642a002d6..b31cc2b70 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -330,9 +330,10 @@ inlineToMan opts (Cite _ lst) =
 inlineToMan _ (Code _ str) =
   return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]"
 inlineToMan _ (Str str) = return $ text $ escapeString str
-inlineToMan opts (Math InlineMath str) = inlineListToMan opts $ readTeXMath str
+inlineToMan opts (Math InlineMath str) =
+  inlineListToMan opts $ readTeXMath' InlineMath str
 inlineToMan opts (Math DisplayMath str) = do
-  contents <- inlineListToMan opts $ readTeXMath str
+  contents <- inlineListToMan opts $ readTeXMath' DisplayMath str
   return $ cr <> text ".RS" $$ contents $$ text ".RE"
 inlineToMan _ (RawInline f str)
   | f == Format "man" = return $ text str
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 33cb110b5..56be709d8 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -45,7 +45,7 @@ import Text.Pandoc.Pretty
 import Control.Monad.State
 import qualified Data.Set as Set
 import Text.Pandoc.Writers.HTML (writeHtmlString)
-import Text.Pandoc.Readers.TeXMath (readTeXMath)
+import Text.Pandoc.Readers.TeXMath (readTeXMath')
 import Text.HTML.TagSoup (renderTags, parseTags, isTagText, Tag(..))
 import Network.URI (isURI)
 import Data.Default
@@ -697,7 +697,7 @@ inlineToMarkdown opts (Math InlineMath str)
       return $ "\\(" <> text str <> "\\)"
   | isEnabled Ext_tex_math_double_backslash opts =
       return $ "\\\\(" <> text str <> "\\\\)"
-  | otherwise = inlineListToMarkdown opts $ readTeXMath str
+  | otherwise = inlineListToMarkdown opts $ readTeXMath' InlineMath str
 inlineToMarkdown opts (Math DisplayMath str)
   | isEnabled Ext_tex_math_dollars opts =
       return $ "$$" <> text str <> "$$"
@@ -706,7 +706,7 @@ inlineToMarkdown opts (Math DisplayMath str)
   | isEnabled Ext_tex_math_double_backslash opts =
       return $ "\\\\[" <> text str <> "\\\\]"
   | otherwise = (\x -> cr <> x <> cr) `fmap`
-        inlineListToMarkdown opts (readTeXMath str)
+        inlineListToMarkdown opts (readTeXMath' DisplayMath str)
 inlineToMarkdown opts (RawInline f str)
   | f == "html" || f == "markdown" ||
     (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) =
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 206be7133..b38d250aa 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -374,7 +374,7 @@ inlineToOpenDocument o ils
     | SmallCaps   l <- ils = withTextStyle SmallC $ inlinesToOpenDocument o l
     | Quoted    t l <- ils = inQuotes t <$> inlinesToOpenDocument o l
     | Code      _ s <- ils = withTextStyle Pre $ inTextStyle $ preformatted s
-    | Math      _ s <- ils = inlinesToOpenDocument o (readTeXMath s)
+    | Math      t s <- ils = inlinesToOpenDocument o (readTeXMath' t s)
     | Cite      _ l <- ils = inlinesToOpenDocument o l
     | RawInline f s <- ils = if f == "opendocument" || f == "html"
                                 then withTextStyle Pre $ inTextStyle $ preformatted s
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index cb5fb3232..fb935fa6a 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -324,7 +324,7 @@ inlineToRTF (Quoted DoubleQuote lst) =
   "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\""
 inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}"
 inlineToRTF (Str str) = stringToRTF str
-inlineToRTF (Math _ str) = inlineListToRTF $ readTeXMath str
+inlineToRTF (Math t str) = inlineListToRTF $ readTeXMath' t str
 inlineToRTF (Cite _ lst) = inlineListToRTF lst
 inlineToRTF (RawInline f str)
   | f == Format "rtf" = str
-- 
cgit v1.2.3


From 732f6abe15b75724c2eb7a8bf0763f054b2dc500 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 3 Nov 2013 11:17:39 -0800
Subject: HTML reader:  Use pandoc Div and Span for raw "<div>", "<span>".

Only if --parse-raw.
---
 src/Text/Pandoc/Readers/HTML.hs | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 7ca554fa3..80279bf61 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -92,6 +92,7 @@ block = choice
             , pHead
             , pBody
             , pPlain
+            , pDiv
             , pRawHtmlBlock
             ]
 
@@ -177,6 +178,13 @@ pRawTag = do
      then return []
      else return $ renderTags' [tag]
 
+pDiv :: TagParser [Block]
+pDiv = try $ do
+  getOption readerParseRaw >>= guard
+  TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True)
+  contents <- pInTags "div" block
+  return [Div (mkAttr attr) contents]
+
 pRawHtmlBlock :: TagParser [Block]
 pRawHtmlBlock = do
   raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag
@@ -295,11 +303,7 @@ pCodeBlock = try $ do
   let result = case reverse result' of
                     '\n':_   -> init result'
                     _        -> result'
-  let attribsId = fromMaybe "" $ lookup "id" attr
-  let attribsClasses = words $ fromMaybe "" $ lookup "class" attr
-  let attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
-  let attribs = (attribsId, attribsClasses, attribsKV)
-  return [CodeBlock attribs result]
+  return [CodeBlock (mkAttr attr) result]
 
 inline :: TagParser [Inline]
 inline = choice
@@ -314,6 +318,7 @@ inline = choice
            , pLink
            , pImage
            , pCode
+           , pSpan
            , pRawHtmlInline
            ]
 
@@ -397,11 +402,14 @@ pCode :: TagParser [Inline]
 pCode = try $ do
   (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True)
   result <- manyTill pAnyTag (pCloses open)
-  let ident = fromMaybe "" $ lookup "id" attr
-  let classes = words $ fromMaybe [] $ lookup "class" attr
-  let rest = filter (\(x,_) -> x /= "id" && x /= "class") attr
-  return [Code (ident,classes,rest)
-         $ intercalate " " $ lines $ innerText result]
+  return [Code (mkAttr attr) $ intercalate " " $ lines $ innerText result]
+
+pSpan :: TagParser [Inline]
+pSpan = try $ do
+  getOption readerParseRaw >>= guard
+  TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
+  contents <- pInTags "span" inline
+  return [Span (mkAttr attr) contents]
 
 pRawHtmlInline :: TagParser [Inline]
 pRawHtmlInline = do
@@ -648,3 +656,10 @@ htmlTag f = try $ do
        _            -> do
           rendered <- manyTill anyChar (char '>')
           return (next, rendered ++ ">")
+
+mkAttr :: [(String, String)] -> Attr
+mkAttr attr = (attribsId, attribsClasses, attribsKV)
+  where attribsId = fromMaybe "" $ lookup "id" attr
+        attribsClasses = words $ fromMaybe "" $ lookup "class" attr
+        attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
+
-- 
cgit v1.2.3


From 4301fa4a2794cc354da0bc15e7a4ebb214bb9966 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 3 Nov 2013 21:16:47 -0800
Subject: Markdown reader: Correctly handle empty bullet list items.

For example:

    - one
    -
    - two

This should NOT be parsed as a setext header followed by a list.
---
 src/Text/Pandoc/Readers/Markdown.hs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 8f804f863..ea49d8c1d 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -444,6 +444,9 @@ block = choice [ mempty <$ blanklines
                , codeBlockFenced
                , yamlMetaBlock
                , guardEnabled Ext_latex_macros *> (macro >>= return . return)
+               -- note: bulletList needs to be before header because of
+               -- the possibility of empty list items: -
+               , bulletList
                , header
                , lhsCodeBlock
                , rawTeXBlock
@@ -454,7 +457,6 @@ block = choice [ mempty <$ blanklines
                , codeBlockIndented
                , blockQuote
                , hrule
-               , bulletList
                , orderedList
                , definitionList
                , noteBlock
@@ -699,7 +701,7 @@ bulletListStart = try $ do
   skipNonindentSpaces
   notFollowedBy' (() <$ hrule)     -- because hrules start out just like lists
   satisfy isBulletListMarker
-  spaceChar
+  spaceChar <|> lookAhead newline
   skipSpaces
 
 anyOrderedListStart :: MarkdownParser (Int, ListNumberStyle, ListNumberDelim)
@@ -727,7 +729,6 @@ listStart = bulletListStart <|> (anyOrderedListStart >> return ())
 -- parse a line of a list item (start = parser for beginning of list item)
 listLine :: MarkdownParser String
 listLine = try $ do
-  notFollowedBy blankline
   notFollowedBy' (do indentSpaces
                      many (spaceChar)
                      listStart)
@@ -740,7 +741,7 @@ rawListItem :: MarkdownParser a
 rawListItem start = try $ do
   start
   first <- listLine
-  rest <- many (notFollowedBy listStart >> listLine)
+  rest <- many (notFollowedBy listStart >> notFollowedBy blankline >> listLine)
   blanks <- many blankline
   return $ unlines (first:rest) ++ blanks
 
-- 
cgit v1.2.3


From 6b24b1afca306c9149c285b60ed36f04fae2ece2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Wed, 6 Nov 2013 09:25:50 -0800
Subject: Don't print `<span>` tags in 'plain' output.

---
 src/Text/Pandoc/Writers/Markdown.hs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 56be709d8..eefcd547a 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -642,8 +642,11 @@ escapeSpaces x = x
 -- | Convert Pandoc inline element to markdown.
 inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
 inlineToMarkdown opts (Span attrs ils) = do
+  st <- get
   contents <- inlineListToMarkdown opts ils
-  return $ tagWithAttrs "span" attrs <> contents <> text "</span>"
+  return $ if stPlain st
+              then contents
+              else tagWithAttrs "span" attrs <> contents <> text "</span>"
 inlineToMarkdown opts (Emph lst) = do
   contents <- inlineListToMarkdown opts lst
   return $ "*" <> contents <> "*"
-- 
cgit v1.2.3


From 5b99112f229db1fcbe82943678f9f55c1ead8f11 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Wed, 6 Nov 2013 19:18:24 -0800
Subject: Docx writer:  Fix URL for core-properties in `_rels/.rels`.

Partially addresses #1046.
---
 src/Text/Pandoc/Writers/Docx.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 0fdea0a7a..f1276c831 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -247,7 +247,7 @@ writeDocx opts doc@(Pandoc meta _) = do
           ,("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties")
           ,("Target","docProps/app.xml")]
         , [("Id","rId3")
-          ,("Type","http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties")
+          ,("Type","http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties")
           ,("Target","docProps/core.xml")]
         ]
   let relsEntry = toEntry relsPath epochtime $ renderXml rels
-- 
cgit v1.2.3


From 2efd0951d3d560a159f92df4730e2cee26978698 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 7 Nov 2013 08:46:52 -0800
Subject: Docx writer: fixed core metadata.

- Don't create empty date nodes if no date given.
- Don't create multiple dc:creator nodes; instead separate by
  semicolons.

Closes #1046.
---
 src/Text/Pandoc/Writers/Docx.hs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index f1276c831..5d1647844 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -231,10 +231,11 @@ writeDocx opts doc@(Pandoc meta _) = do
           ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/")
           ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")]
           $ mknode "dc:title" [] (stringify $ docTitle meta)
-          : mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")]
-            (maybe "" id $ normalizeDate $ stringify $ docDate meta)
-          : mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] () -- put current time here
-          : map (mknode "dc:creator" [] . stringify) (docAuthors meta)
+          : mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta))
+          : maybe []
+             (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] $ x
+                    , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] $ x
+                    ]) (normalizeDate $ stringify $ docDate meta)
   let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps
 
   let relsPath = "_rels/.rels"
-- 
cgit v1.2.3


From 01fed75b8f755bf4de3d4ec2b720e4dd9e66951a Mon Sep 17 00:00:00 2001
From: MinRK <benjaminrk@gmail.com>
Date: Thu, 7 Nov 2013 22:25:44 -0800
Subject: recognize svg tag in HTML Reader

avoids adding lots of `<p>` tags in embedded SVG content,
for instance in markdown to HTML.
---
 src/Text/Pandoc/Readers/HTML.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 80279bf61..d691c9878 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -557,7 +557,7 @@ blockHtmlTags = ["address", "article", "aside", "blockquote", "body", "button",
                  "noframes", "noscript", "object", "ol", "output", "p", "pre", "progress",
                  "section", "table", "tbody", "textarea", "thead", "tfoot", "ul", "dd",
                  "dt", "frameset", "li", "tbody", "td", "tfoot",
-                 "th", "thead", "tr", "script", "style", "video"]
+                 "th", "thead", "tr", "script", "style", "svg", "video"]
 
 -- We want to allow raw docbook in markdown documents, so we
 -- include docbook block tags here too.
-- 
cgit v1.2.3


From b4441c940dc76d1a64636ef88287b38306ebccb4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 12 Nov 2013 18:48:06 -0800
Subject: HTML/EPUB footnotes:  Put `<sup>` tag inside `<a>` tags.

This allows better control of formatting, since the `<a>`
tags have a distinguishing class.

Closes #1049.
---
 src/Text/Pandoc/Writers/HTML.hs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index c1cca291b..424843539 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -757,11 +757,11 @@ inlineToHtml opts inline =
                                          writerIdentifierPrefix opts ++ "fn" ++ ref)
                                        ! A.class_ "footnoteRef"
                                        ! prefixedId opts ("fnref" ++ ref)
+                                       $ H.sup
                                        $ toHtml ref
-                        let link' = case writerEpubVersion opts of
-                                         Just EPUB3 -> link ! customAttribute "epub:type" "noteref"
-                                         _ -> link
-                        return $ H.sup $ link'
+                        return $ case writerEpubVersion opts of
+                                      Just EPUB3 -> link ! customAttribute "epub:type" "noteref"
+                                      _          -> link
     (Cite cits il)-> do contents <- inlineListToHtml opts il
                         let citationIds = unwords $ map citationId cits
                         let result = H.span ! A.class_ "citation" $ contents
-- 
cgit v1.2.3


From 892ba2dd163cbba598cbd86814a594008aae9f27 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 16 Nov 2013 14:09:09 -0800
Subject: LaTeX writer: Properly escape pdftitle, pdfauthor.

Closes #1059.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 72b0bde6d..7ff64cf74 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -126,14 +126,16 @@ pandocToLaTeX options (Pandoc meta blocks) = do
   (biblioTitle :: String) <- liftM (render colwidth) $ inlineListToLaTeX lastHeader
   let main = render colwidth $ vsep body
   st <- get
+  titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta
+  authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta
   let context  =  defField "toc" (writerTableOfContents options) $
                   defField "toc-depth" (show (writerTOCDepth options -
                                               if writerChapters options
                                                  then 1
                                                  else 0)) $
                   defField "body" main $
-                  defField "title-meta" (stringify $ docTitle meta) $
-                  defField "author-meta" (intercalate "; " $ map stringify $ docAuthors meta) $
+                  defField "title-meta" titleMeta $
+                  defField "author-meta" (intercalate "; " authorsMeta) $
                   defField "documentclass" (if writerBeamer options
                                                then ("beamer" :: String)
                                                else if writerChapters options
-- 
cgit v1.2.3


From 9b0378b939f46183fc152b1a49a69f3007de295a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 16 Nov 2013 22:49:15 -0800
Subject: OpenDocument:  Skip raw HTML.

Previously it was erroneously included as verbatim text.

Closes #1035.
---
 src/Text/Pandoc/Writers/OpenDocument.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index b38d250aa..565f5f869 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -376,8 +376,8 @@ inlineToOpenDocument o ils
     | Code      _ s <- ils = withTextStyle Pre $ inTextStyle $ preformatted s
     | Math      t s <- ils = inlinesToOpenDocument o (readTeXMath' t s)
     | Cite      _ l <- ils = inlinesToOpenDocument o l
-    | RawInline f s <- ils = if f == "opendocument" || f == "html"
-                                then withTextStyle Pre $ inTextStyle $ preformatted s
+    | RawInline f s <- ils = if f == "opendocument"
+                                then return $ preformatted s
                                 else return empty
     | Link  l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l
     | Image _ (s,t) <- ils = return $ mkImg  s t
-- 
cgit v1.2.3


From d5660275a38a58334372326a79a9ce0153fede43 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 17 Nov 2013 08:45:21 -0800
Subject: Parsing:  Generalized type of registerHeader, using new typeclasses.

New type classes HasReadeOptions, HasIdentifierList, HasHeaderMap.
These allow certain common functions to be reused even in parsers
that use custom state (instead of ParserState), such as the MediaWiki
reader.

Minor API bump.
---
 src/Text/Pandoc/Parsing.hs | 54 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 42 insertions(+), 12 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 701b2ef84..9687d7712 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -1,4 +1,5 @@
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving, TypeSynonymInstances,
+    FlexibleInstances#-}
 {-
 Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
 
@@ -65,6 +66,9 @@ module Text.Pandoc.Parsing ( (>>~),
                              guardEnabled,
                              guardDisabled,
                              ParserState (..),
+                             HasReaderOptions (..),
+                             HasHeaderMap (..),
+                             HasIdentifierList (..),
                              defaultParserState,
                              HeaderType (..),
                              ParserContext (..),
@@ -826,6 +830,34 @@ instance HasMeta ParserState where
   deleteMeta field st =
     st{ stateMeta = deleteMeta field $ stateMeta st }
 
+class Monad m => HasReaderOptions m where
+  askReaderOption :: (ReaderOptions -> b) -> m b
+
+class Monad m => HasHeaderMap m where
+  getHeaderMap    :: m (M.Map Inlines String)
+  putHeaderMap    :: M.Map Inlines String -> m ()
+  modifyHeaderMap :: (M.Map Inlines String -> M.Map Inlines String) -> m ()
+  -- default
+  modifyHeaderMap f = getHeaderMap >>= putHeaderMap . f
+
+class Monad m => HasIdentifierList m where
+  getIdentifierList    :: m [String]
+  putIdentifierList    :: [String] -> m ()
+  modifyIdentifierList :: ([String] -> [String]) -> m ()
+  -- default
+  modifyIdentifierList f = getIdentifierList >>= putIdentifierList . f
+
+instance HasReaderOptions (Parser s ParserState) where
+  askReaderOption = getOption
+
+instance HasHeaderMap (Parser s ParserState) where
+  getHeaderMap      = fmap stateHeaders getState
+  putHeaderMap hm   = updateState $ \st -> st{ stateHeaders = hm }
+
+instance HasIdentifierList (Parser s ParserState) where
+  getIdentifierList   = fmap stateIdentifiers  getState
+  putIdentifierList l = updateState $ \st -> st{ stateIdentifiers = l }
+
 defaultParserState :: ParserState
 defaultParserState =
     ParserState { stateOptions         = def,
@@ -895,10 +927,11 @@ type SubstTable = M.Map Key Inlines
 --  and the auto_identifers extension is set, generate a new
 --  unique identifier, and update the list of identifiers
 --  in state.
-registerHeader :: Attr -> Inlines -> Parser s ParserState Attr
+registerHeader :: (HasReaderOptions m, HasHeaderMap m, HasIdentifierList m)
+               => Attr -> Inlines -> m Attr
 registerHeader (ident,classes,kvs) header' = do
-  ids <- stateIdentifiers `fmap` getState
-  exts <- getOption readerExtensions
+  ids <- getIdentifierList
+  exts <- askReaderOption readerExtensions
   let insert' = M.insertWith (\_new old -> old)
   if null ident && Ext_auto_identifiers `Set.member` exts
      then do
@@ -906,16 +939,13 @@ registerHeader (ident,classes,kvs) header' = do
        let id'' = if Ext_ascii_identifiers `Set.member` exts
                      then catMaybes $ map toAsciiChar id'
                      else id'
-       updateState $ \st -> st{
-              stateIdentifiers = if id' == id''
-                                    then id' : ids
-                                    else id' : id'' : ids,
-              stateHeaders = insert' header' id' $ stateHeaders st }
+       putIdentifierList $ if id' == id''
+                              then id' : ids
+                              else id' : id'' : ids
+       modifyHeaderMap $ insert' header' id'
        return (id'',classes,kvs)
      else do
-        unless (null ident) $
-          updateState $ \st -> st{
-               stateHeaders = insert' header' ident $ stateHeaders st }
+        unless (null ident) $ modifyHeaderMap $ insert' header' ident
         return (ident,classes,kvs)
 
 -- | Fail unless we're in "smart typography" mode.
-- 
cgit v1.2.3


From 0fd2176e29bd1118d314c6179455fb78bed35aea Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 17 Nov 2013 08:47:14 -0800
Subject: MediaWiki reader:  Add automatic header identifiers.

---
 src/Text/Pandoc/Readers/MediaWiki.hs | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 136701bd0..1c074e3de 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -1,4 +1,5 @@
-{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
+{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-}
+-- RelaxedPolyRec needed for inlinesBetween on GHC < 7
 {-
   Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
 
@@ -51,6 +52,7 @@ import Data.List (intersperse, intercalate, isPrefixOf )
 import Text.HTML.TagSoup
 import Data.Sequence (viewl, ViewL(..), (<|))
 import qualified Data.Foldable as F
+import qualified Data.Map as M
 import Data.Char (isDigit, isSpace)
 
 -- | Read mediawiki from an input string and return a Pandoc document.
@@ -62,6 +64,8 @@ readMediaWiki opts s =
                                        , mwMaxNestingLevel = 4
                                        , mwNextLinkNumber  = 1
                                        , mwCategoryLinks = []
+                                       , mwHeaderMap = M.empty
+                                       , mwIdentifierList = []
                                        }
        "source" (s ++ "\n") of
           Left err'    -> error $ "\nError:\n" ++ show err'
@@ -71,10 +75,23 @@ data MWState = MWState { mwOptions         :: ReaderOptions
                        , mwMaxNestingLevel :: Int
                        , mwNextLinkNumber  :: Int
                        , mwCategoryLinks   :: [Inlines]
+                       , mwHeaderMap       :: M.Map Inlines String
+                       , mwIdentifierList  :: [String]
                        }
 
 type MWParser = Parser [Char] MWState
 
+instance HasReaderOptions MWParser where
+  askReaderOption f = (f . mwOptions) `fmap` getState
+
+instance HasHeaderMap MWParser where
+  getHeaderMap      = fmap mwHeaderMap getState
+  putHeaderMap hm   = updateState $ \st -> st{ mwHeaderMap = hm }
+
+instance HasIdentifierList MWParser where
+  getIdentifierList   = fmap mwIdentifierList getState
+  putIdentifierList l = updateState $ \st -> st{ mwIdentifierList = l }
+
 --
 -- auxiliary functions
 --
@@ -351,7 +368,8 @@ header = try $ do
   let lev = length eqs
   guard $ lev <= 6
   contents <- trimInlines . mconcat <$> manyTill inline (count lev $ char '=')
-  return $ B.header lev contents
+  attr <- registerHeader nullAttr contents
+  return $ B.headerWith attr lev contents
 
 bulletList :: MWParser Blocks
 bulletList = B.bulletList <$>
-- 
cgit v1.2.3


From e690c87dc4c3ff79689d9a887a9b5e3bffbb5d37 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 17 Nov 2013 09:07:25 -0800
Subject: LaTeX reader: Support `\textnormal` as span with class "nodecor".

This is needed for pandoc-citeproc.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 681dcb077..689b12c8e 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -373,6 +373,7 @@ inlineCommands = M.fromList $
   , ("backslash", lit "\\")
   , ("slash", lit "/")
   , ("textbf", strong <$> tok)
+  , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
   , ("ldots", lit "…")
   , ("dots", lit "…")
   , ("mdots", lit "…")
-- 
cgit v1.2.3


From d07dc971da0123149e751dff987cf9df1a55b75d Mon Sep 17 00:00:00 2001
From: "Shaun Attfield (shaun@victor)" <heurist+git@gmail.com>
Date: Mon, 18 Nov 2013 11:53:13 +0200
Subject: Epub Writer: Add cover reference to guide element

Fixes an issue with calibre http://calibre-ebook.com/ putting the
cover at the end of the book if the spine has linear="no".

Apparently this is best practice for other converters as well.

http://www.idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.6
---
 src/Text/Pandoc/Writers/EPUB.hs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index ac0e7610c..ca69a0fd4 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -260,8 +260,10 @@ writeEPUB opts doc@(Pandoc meta _) = do
                                                     else "no")] $ ()) :
                   map chapterRefNode chapterEntries)
           , unode "guide" $
-              unode "reference" !
-                [("type","toc"),("title",plainTitle),("href","nav.xhtml")] $ ()
+              [ unode "reference" !
+                [("type","toc"),("title",plainTitle),("href","nav.xhtml")] $ () ]
+              ++ [ unode "reference" !
+                [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () ]
           ]
   let contentsEntry = mkEntry "content.opf" contentsData
 
-- 
cgit v1.2.3


From a3eba6ee848497f98f72aef5dad112c49bdd4fec Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 18 Nov 2013 20:28:27 -0800
Subject: LaTeX reader:  Parse contents of curly quotes or matched `"` as
 quotes.

---
 src/Text/Pandoc/Readers/LaTeX.hs | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 689b12c8e..75e29ebb9 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -163,13 +163,23 @@ mathChars = concat <$>
       <|> (\c -> ['\\',c]) <$> (try $ char '\\' *> anyChar)
        )
 
+quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines
+quoted' f starter ender = do
+  startchs <- starter
+  try ((f . mconcat) <$> manyTill inline ender) <|> lit startchs
+
 double_quote :: LP Inlines
-double_quote = (doubleQuoted . mconcat) <$>
-  (try $ string "``" *> manyTill inline (try $ string "''"))
+double_quote =
+  (   quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
+  <|> quoted' doubleQuoted (string "“")        (void $ char '”')
+  <|> quoted' doubleQuoted (string "\"")       (void $ char '"')
+  )
 
 single_quote :: LP Inlines
-single_quote = (singleQuoted . mconcat) <$>
-  (try $ char '`' *> manyTill inline (try $ char '\'' >> notFollowedBy letter))
+single_quote =
+  (  quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
+  <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter)
+  )
 
 inline :: LP Inlines
 inline = (mempty <$ comment)
@@ -181,10 +191,10 @@ inline = (mempty <$ comment)
            ((char '-') *> option (str "–") (str "—" <$ char '-')))
      <|> double_quote
      <|> single_quote
-     <|> (str "“" <$ try (string "``"))  -- nb. {``} won't be caught by double_quote
      <|> (str "”" <$ try (string "''"))
-     <|> (str "‘" <$ char '`')           -- nb. {`} won't be caught by single_quote
+     <|> (str "”" <$ char '”')
      <|> (str "’" <$ char '\'')
+     <|> (str "’" <$ char '’')
      <|> (str "\160" <$ char '~')
      <|> (mathDisplay $ string "$$" *> mathChars <* string "$$")
      <|> (mathInline  $ char '$' *> mathChars <* char '$')
@@ -755,7 +765,7 @@ inlineText :: LP Inlines
 inlineText = str <$> many1 inlineChar
 
 inlineChar :: LP Char
-inlineChar = noneOf "\\$%^_&~#{}^'`-[] \t\n"
+inlineChar = noneOf "\\$%^_&~#{}^'`\"‘’“”-[] \t\n"
 
 environment :: LP Blocks
 environment = do
-- 
cgit v1.2.3


From 5b27480e54f4ecfc145bde133f1f51865d9b12a0 Mon Sep 17 00:00:00 2001
From: "Shaun Attfield (shaun@victor)" <heurist+git@gmail.com>
Date: Tue, 19 Nov 2013 08:20:27 +0200
Subject: Epub Writer: Add cover reference to guide element (v2)

Avoiding an unnecessary list concatenation.

Fixes an issue with calibre http://calibre-ebook.com/ putting the
cover at the end of the book if the spine has linear="no".

Apparently this is best practice for other converters as well.

http://www.idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.6
---
 src/Text/Pandoc/Writers/EPUB.hs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index ca69a0fd4..be8de7073 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -260,10 +260,11 @@ writeEPUB opts doc@(Pandoc meta _) = do
                                                     else "no")] $ ()) :
                   map chapterRefNode chapterEntries)
           , unode "guide" $
-              [ unode "reference" !
-                [("type","toc"),("title",plainTitle),("href","nav.xhtml")] $ () ]
-              ++ [ unode "reference" !
-                [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () ]
+             [ unode "reference" !
+                   [("type","toc"),("title",plainTitle),("href","nav.xhtml")] $ ()
+             , unode "reference" !
+                   [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ ()
+             ]
           ]
   let contentsEntry = mkEntry "content.opf" contentsData
 
-- 
cgit v1.2.3


From c226a57eaa7f1e9d4782c7ea3b56e72251a6487a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 19 Nov 2013 12:01:52 -0800
Subject: Docbook writer:  Hierarchicalize block content in metadata.

Previously headers just disappeared from block-level metadata
when it was used in templates.

Now we apply the 'hierarchicalize' transformation.

Note that a block headed by a level-2 header will turn into
a `<sect1>` element.
---
 src/Text/Pandoc/Writers/Docbook.hs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index dad83d7bb..02d875be3 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -85,8 +85,9 @@ writeDocbook opts (Pandoc meta blocks) =
       auths'   = map (authorToDocbook opts) $ docAuthors meta
       meta'    = B.setMeta "author" auths' meta
       Just metadata = metaToJSON opts
-                 (Just . render colwidth . blocksToDocbook opts)
-                 (Just . render colwidth . inlinesToDocbook opts)
+                 (Just . render colwidth . (vcat .
+                          (map (elementToDocbook opts' startLvl)) . hierarchicalize))
+                 (Just . render colwidth . inlinesToDocbook opts')
                  meta'
       main     = render' $ vcat (map (elementToDocbook opts' startLvl) elements)
       context = defField "body" main
-- 
cgit v1.2.3


From e290d91c93b265ce7763797a5c995d29fb683c67 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 19 Nov 2013 13:09:34 -0800
Subject: MIME:  In looking up extensions, drop the encoding info.

E.g. for 'image/jpg;base64' we should lookup 'image/jpg'.
---
 src/Text/Pandoc/MIME.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index d9cb94a33..1f5f6f862 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -40,7 +40,8 @@ getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes
   where mimeTypes = M.fromList mimeTypesList
 
 extensionFromMimeType :: String -> Maybe String
-extensionFromMimeType mimetype = M.lookup mimetype reverseMimeTypes
+extensionFromMimeType mimetype = M.lookup (takeWhile (/=';') mimetype) reverseMimeTypes
+  -- note:  we just look up the basic mime type, dropping the content-encoding etc.
   where reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList
 
 mimeTypesList :: [(String, String)]
-- 
cgit v1.2.3


From 83b9a66bf468c0f32bd96be92bb571bd83b73903 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 19 Nov 2013 13:15:24 -0800
Subject: Shared: Fixed bug in openURL with data: URIs.

Previously the base-64 encoded bytestring was returned.
We now decode it so it's a proper image!

This should fix parsing of data: URLs.
---
 src/Text/Pandoc/Shared.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index d6ccdae66..8dcd88148 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -108,6 +108,7 @@ import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
 import qualified Data.ByteString as BS
 import qualified Data.ByteString.Char8 as B8
 import Text.Pandoc.Compat.Monoid
+import Data.ByteString.Base64 (decodeLenient)
 
 #ifdef EMBED_DATA_FILES
 import Text.Pandoc.Data (dataFiles)
@@ -641,7 +642,7 @@ openURL u
   | "data:" `isPrefixOf` u =
     let mime     = takeWhile (/=',') $ drop 5 u
         contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u
-    in  return $ Right (contents, Just mime)
+    in  return $ Right (decodeLenient contents, Just mime)
 #ifdef HTTP_CONDUIT
   | otherwise = E.try $ do
      req <- parseUrl u
-- 
cgit v1.2.3


From 3d453f096cfa12e231c5a4d4c8e468378e20e5e8 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 19 Nov 2013 13:16:31 -0800
Subject: Docx writer:  Use mime type info returned by fetchItem.

---
 src/Text/Pandoc/Writers/Docx.hs | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 5d1647844..b9c198a78 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -55,8 +55,8 @@ import Data.Unique (hashUnique, newUnique)
 import System.Random (randomRIO)
 import Text.Printf (printf)
 import qualified Control.Exception as E
-import System.FilePath (takeExtension)
-import Text.Pandoc.MIME (getMimeType)
+import Text.Pandoc.MIME (getMimeType, extensionFromMimeType)
+import Control.Applicative ((<|>))
 
 data WriterState = WriterState{
          stTextProperties :: [Element]
@@ -737,7 +737,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
           liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..."
           -- emit alt text
           inlinesToOpenXML opts alt
-        Right (img, _) -> do
+        Right (img, mt) -> do
           ident <- ("rId"++) `fmap` getUniqueId
           let size = imageSize img
           let (xpt,ypt) = maybe (120,120) sizeInPoints size
@@ -776,19 +776,21 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
                   , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] ()
                   , mknode "wp:docPr" [("descr",tit),("id","1"),("name","Picture")] ()
                   , graphic ]
-          let imgext = case imageType img of
-                             Just Png  -> ".png"
-                             Just Jpeg -> ".jpeg"
-                             Just Gif  -> ".gif"
-                             Just Pdf  -> ".pdf"
-                             Just Eps  -> ".eps"
-                             Nothing   -> takeExtension src
+          let imgext = case mt >>= extensionFromMimeType of
+                            Just x    -> '.':x
+                            Nothing   -> case imageType img of
+                                              Just Png  -> ".png"
+                                              Just Jpeg -> ".jpeg"
+                                              Just Gif  -> ".gif"
+                                              Just Pdf  -> ".pdf"
+                                              Just Eps  -> ".eps"
+                                              Nothing   -> ""
           if null imgext
              then -- without an extension there is no rule for content type
                inlinesToOpenXML opts alt -- return alt to avoid corrupted docx
              else do
                let imgpath = "media/" ++ ident ++ imgext
-               let mbMimeType = getMimeType imgpath
+               let mbMimeType = mt <|> getMimeType imgpath
                -- insert mime type to use in constructing [Content_Types].xml
                modify $ \st -> st{ stImages =
                    M.insert src (ident, imgpath, mbMimeType, imgElt, img)
-- 
cgit v1.2.3


From cf149fcf38d98b1bee79ecd9056fa0f46264e7ce Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 22 Nov 2013 19:41:08 -0800
Subject: Fixed bug with intraword emphasis.

Closes #1066.
---
 src/Text/Pandoc/Readers/Markdown.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index ea49d8c1d..33d1a9620 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1455,6 +1455,7 @@ enclosure c = do
 -- Parse inlines til you hit one c or a sequence of two cs.
 -- If one c, emit emph and then parse two.
 -- If two cs, emit strong and then parse one.
+-- Otherwise, emit ccc then the results.
 three :: Char -> MarkdownParser (F Inlines)
 three c = do
   contents <- mconcat <$> many (notFollowedBy (char c) >> inline)
@@ -1479,7 +1480,7 @@ one c prefix' = do
   contents <- mconcat <$> many (  (notFollowedBy (char c) >> inline)
                            <|> try (string [c,c] >>
                                     notFollowedBy (char c) >>
-                                    two c prefix') )
+                                    two c mempty) )
   (char c >> return (B.emph <$> (prefix' <> contents)))
     <|> return (return (B.str [c]) <> (prefix' <> contents))
 
-- 
cgit v1.2.3


From 526762bf222dbab199f6ff90c925fe18535c698f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 22 Nov 2013 19:51:07 -0800
Subject: ConTeXt writer:  Use setupcaption to separate style from content.

Instead of adding 'nunumber' every time we place a figure...

Closes #1067.
---
 src/Text/Pandoc/Writers/ConTeXt.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 0379f8b0a..179d9bc5b 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -130,7 +130,7 @@ blockToConTeXt (Plain lst) = inlineListToConTeXt lst
 -- title beginning with fig: indicates that the image is a figure
 blockToConTeXt (Para [Image txt (src,'f':'i':'g':':':_)]) = do
   capt <- inlineListToConTeXt txt
-  return $ blankline $$ "\\placefigure[here,nonumber]" <> braces capt <>
+  return $ blankline $$ "\\placefigure[here]" <> braces capt <>
            braces ("\\externalfigure" <> brackets (text src)) <> blankline
 blockToConTeXt (Para lst) = do
   contents <- inlineListToConTeXt lst
-- 
cgit v1.2.3


From 4321a09b7fa5b52e72aef4fa637392d7da4eb808 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 22 Nov 2013 23:36:19 -0800
Subject: EPUB writer:  Ensure that same identifier is used throughout.

If dc:identifier is given in metadata, we use that; otherwise
we use a random uuid.

Closes #1044.
---
 src/Text/Pandoc/Writers/EPUB.hs | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index be8de7073..98f043cd8 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -198,7 +198,12 @@ writeEPUB opts doc@(Pandoc meta _) = do
   let lang = case lookup "lang" (writerVariables opts') of
                      Just x  -> x
                      Nothing -> localeLang
-  uuid <- getRandomUUID
+  let userNodes = onlyElems $ parseXML $ writerEpubMetadata opts'
+  let mbIdent = findElement (QName "identifier" Nothing (Just "dc"))
+                $ unode "dummy" ! [] $ userNodes
+  uuid <- case mbIdent of
+               Just id' -> return $ trim $ strContent id'
+               Nothing  -> fmap show getRandomUUID
   let chapterNode ent = unode "item" !
                            ([("id", takeBaseName $ eRelativePath ent),
                              ("href", eRelativePath ent),
@@ -230,7 +235,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
                                              EPUB3 -> "3.0")
                           ,("xmlns","http://www.idpf.org/2007/opf")
                           ,("unique-identifier","BookId")] $
-          [ metadataElement version (writerEpubMetadata opts')
+          [ metadataElement version userNodes
               uuid lang plainTitle plainAuthors plainDate currentTime mbCoverImage
           , unode "manifest" $
              [ unode "item" ! [("id","ncx"), ("href","toc.ncx")
@@ -311,7 +316,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
                        ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $
           [ unode "head" $
              [ unode "meta" ! [("name","dtb:uid")
-                              ,("content", show uuid)] $ ()
+                              ,("content", uuid)] $ ()
              , unode "meta" ! [("name","dtb:depth")
                               ,("content", "1")] $ ()
              , unode "meta" ! [("name","dtb:totalPageCount")
@@ -384,13 +389,12 @@ writeEPUB opts doc@(Pandoc meta _) = do
                   (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries))
   return $ fromArchive archive
 
-metadataElement :: EPUBVersion -> String -> UUID -> String -> String -> [String]
+metadataElement :: EPUBVersion -> [Element] -> String -> String -> String -> [String]
                 -> String -> UTCTime -> Maybe a -> Element
-metadataElement version metadataXML uuid lang title authors date currentTime mbCoverImage =
-  let userNodes = parseXML metadataXML
-      elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
+metadataElement version userNodes uuid lang title authors date currentTime mbCoverImage =
+  let elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
                                ,("xmlns:opf","http://www.idpf.org/2007/opf")] $
-            filter isMetadataElement $ onlyElems userNodes
+            filter isMetadataElement userNodes
       dublinElements = ["contributor","coverage","creator","date",
             "description","format","identifier","language","publisher",
             "relation","rights","source","subject","title","type"]
@@ -401,7 +405,7 @@ metadataElement version metadataXML uuid lang title authors date currentTime mbC
       contains e n = not (null (findElements (QName n Nothing (Just "dc")) e))
       newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++
            [ unode "dc:language" lang | not (elt `contains` "language") ] ++
-           [ unode "dc:identifier" ! [("id","BookId")] $ show uuid |
+           [ unode "dc:identifier" ! [("id","BookId")] $ uuid |
                not (elt `contains` "identifier") ] ++
            [ unode "dc:creator" ! [("opf:role","aut") | version == EPUB2]
                      $ a | a <- authors, not (elt `contains` "creator") ] ++
-- 
cgit v1.2.3


From 56277bacea73818a7541b60987e5bc877782baab Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 22 Nov 2013 23:40:46 -0800
Subject: EPUB writer:  Don't include node for cover.xhtml if no cover!

---
 src/Text/Pandoc/Writers/EPUB.hs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 98f043cd8..ef4c7be23 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -267,8 +267,9 @@ writeEPUB opts doc@(Pandoc meta _) = do
           , unode "guide" $
              [ unode "reference" !
                    [("type","toc"),("title",plainTitle),("href","nav.xhtml")] $ ()
-             , unode "reference" !
-                   [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ ()
+             ] ++
+             [ unode "reference" !
+                   [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | mbCoverImage /= Nothing
              ]
           ]
   let contentsEntry = mkEntry "content.opf" contentsData
-- 
cgit v1.2.3


From c1ff65e5ef0b5b8d78149cf666cda1de4a0e4fdb Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 23 Nov 2013 14:24:33 -0800
Subject: HTML writer:  Handle csl flipflopping spans (csl-no-emph, etc.)

---
 src/Text/Pandoc/Writers/HTML.hs | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 424843539..641652276 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -613,8 +613,22 @@ inlineToHtml opts inline =
     (Str str)        -> return $ strToHtml str
     (Space)          -> return $ strToHtml " "
     (LineBreak)      -> return $ if writerHtml5 opts then H5.br else H.br
-    (Span attr ils)  -> inlineListToHtml opts ils >>=
-                           return . addAttrs opts attr . H.span
+    (Span (id',classes,kvs) ils)
+                     -> inlineListToHtml opts ils >>=
+                           return . addAttrs opts attr' . H.span
+                        where attr' = (id',classes',kvs')
+                              classes' = filter (`notElem` ["csl-no-emph",
+                                              "csl-no-strong",
+                                              "csl-no-smallcaps"]) classes
+                              kvs' = if null styles
+                                        then kvs
+                                        else (("style", concat styles) : kvs)
+                              styles = ["font-style:normal;"
+                                         | "csl-no-emph" `elem` classes]
+                                    ++ ["font-weight:normal;"
+                                         | "csl-no-strong" `elem` classes]
+                                    ++ ["font-variant:normal;"
+                                         | "csl-no-smallcaps" `elem` classes]
     (Emph lst)       -> inlineListToHtml opts lst >>= return . H.em
     (Strong lst)     -> inlineListToHtml opts lst >>= return . H.strong
     (Code attr str)  -> case hlCode of
-- 
cgit v1.2.3


From b82ef0e29a652b7e1629b6e2e5f56140ed08e633 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 23 Nov 2013 14:41:22 -0800
Subject: LaTeX writer:  Handle csl flipflopping spans (csl-no-emph, etc.)

---
 src/Text/Pandoc/Writers/LaTeX.hs | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 7ff64cf74..f3cbcf19f 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -602,7 +602,16 @@ isQuoted _ = False
 -- | Convert inline element to LaTeX
 inlineToLaTeX :: Inline    -- ^ Inline to convert
               -> State WriterState Doc
-inlineToLaTeX (Span _ ils) = inlineListToLaTeX ils >>= return . braces
+inlineToLaTeX (Span (_,classes,_) ils) = do
+  let noEmph = "csl-no-emph" `elem` classes
+  let noStrong = "csl-no-strong" `elem` classes
+  let noSmallCaps = "csl-no-smallcaps" `elem` classes
+  ((if noEmph then inCmd "textup" else id) .
+   (if noStrong then inCmd "textnormal" else id) .
+   (if noSmallCaps then inCmd "textnormal" else id) .
+   (if not (noEmph || noStrong || noSmallCaps)
+       then braces
+       else id)) `fmap` inlineListToLaTeX ils
 inlineToLaTeX (Emph lst) =
   inlineListToLaTeX lst >>= return . inCmd "emph"
 inlineToLaTeX (Strong lst) =
-- 
cgit v1.2.3


From e1a9a61774cce807c6db2fff7b8609b2d9dbc678 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 23 Nov 2013 14:52:14 -0800
Subject: Docx writer:  Implemented csl flipflopping spans.

---
 src/Text/Pandoc/Writers/Docx.hs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index b9c198a78..5c7341b69 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -638,7 +638,12 @@ formattedString str = do
 inlineToOpenXML :: WriterOptions -> Inline -> WS [Element]
 inlineToOpenXML _ (Str str) = formattedString str
 inlineToOpenXML opts Space = inlineToOpenXML opts (Str " ")
-inlineToOpenXML opts (Span _ ils) = inlinesToOpenXML opts ils
+inlineToOpenXML opts (Span (_,classes,_) ils) = do
+  let off x = withTextProp (mknode x [("w:val","0")] ())
+  ((if "csl-no-emph" `elem` classes then off "w:i" else id) .
+   (if "csl-no-strong" `elem` classes then off "w:b" else id) .
+   (if "csl-no-smallcaps" `elem` classes then off "w:smallCaps" else id))
+   $ inlinesToOpenXML opts ils
 inlineToOpenXML opts (Strong lst) =
   withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst
 inlineToOpenXML opts (Emph lst) =
-- 
cgit v1.2.3


From 303e42a94f16e00ecb65fb9de2d282d050a626c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Sun, 24 Nov 2013 12:51:41 +0100
Subject: MediaWiki reader: Accept image links in more languages

In some of the Wikipedia versions the local version of 'File' is used (for example 'Archivo' in Spanish)
---
 src/Text/Pandoc/Readers/MediaWiki.hs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 1c074e3de..7f99af528 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -549,10 +549,14 @@ endline = () <$ try (newline <*
                      notFollowedBy' header <*
                      notFollowedBy anyListStart)
 
+imageIdentifiers :: [MWParser ()]
+imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers]
+    where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier"]
+
 image :: MWParser Inlines
 image = try $ do
   sym "[["
-  sym "File:" <|> sym "Image:"
+  choice imageIdentifiers
   fname <- many1 (noneOf "|]")
   _ <- many (try $ char '|' *> imageOption)
   caption <-   (B.str fname <$ sym "]]")
-- 
cgit v1.2.3


From 659596876b74a2ce99ec5f7253962b27ca091354 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 29 Nov 2013 18:01:07 -0800
Subject: EPUB writer:  Improved metadata handling.

* Metadata may now be included in YAML blocks in a markdown document.
  For example,

        ---
        title:
        - type: main
          text: My Book
        - type: subtitle
          text: An investigation of metadata
        creator:
        - role: author
          text: John Smith
        - role: editor
          text: Sarah Jones
        identifier:
        - scheme: DOI
          text: doi:10.234234.234/33
        publisher:  My Press
        rights:  (c) 2007 John Smith, CC BY-NC
        ...

* Metadata may still be provided using `--epub-metadata`; it will
  be merged with the metadata in YAML blocks.
* meta tags are used instead of opf attributes for EPUB3.
---
 src/Text/Pandoc/Writers/EPUB.hs | 642 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 591 insertions(+), 51 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index ef4c7be23..4f1de5df0 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE PatternGuards, CPP #-}
+{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables #-}
 {-
 Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
 
@@ -30,7 +30,8 @@ Conversion of 'Pandoc' documents to EPUB.
 -}
 module Text.Pandoc.Writers.EPUB ( writeEPUB ) where
 import Data.IORef
-import Data.Maybe ( fromMaybe, isNothing )
+import qualified Data.Map as M
+import Data.Maybe ( fromMaybe )
 import Data.List ( isInfixOf, intercalate )
 import System.Environment ( getEnv )
 import Text.Printf (printf)
@@ -40,6 +41,7 @@ import qualified Data.ByteString.Lazy.Char8 as B8
 import Text.Pandoc.UTF8 ( fromStringLazy, toString )
 import Text.Pandoc.SelfContained ( makeSelfContained )
 import Codec.Archive.Zip
+import Control.Applicative ((<$>))
 import Data.Time.Clock.POSIX
 import Data.Time
 import System.Locale
@@ -70,6 +72,216 @@ import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
 -- in filenames, chapter0003.xhtml.
 data Chapter = Chapter (Maybe [Int]) [Block]
 
+data EPUBMetadata = EPUBMetadata{
+    epubIdentifier         :: [Identifier]
+  , epubTitle              :: [Title]
+  , epubDate               :: String
+  , epubLanguage           :: String
+  , epubCreator            :: [Creator]
+  , epubContributor        :: [Creator]
+  , epubSubject            :: [String]
+  , epubDescription        :: Maybe String
+  , epubType               :: Maybe String
+  , epubFormat             :: Maybe String
+  , epubPublisher          :: Maybe String
+  , epubSource             :: Maybe String
+  , epubRelation           :: Maybe String
+  , epubCoverage           :: Maybe String
+  , epubRights             :: Maybe String
+  , epubCoverImage         :: Maybe String
+  } deriving Show
+
+data Creator = Creator{
+    creatorText            :: String
+  , creatorRole            :: Maybe String
+  , creatorFileAs          :: Maybe String
+  } deriving Show
+
+data Identifier = Identifier{
+    identifierText         :: String
+  , identifierScheme       :: Maybe String
+  } deriving Show
+
+data Title = Title{
+    titleText              :: String
+  , titleFileAs            :: Maybe String
+  , titleType              :: Maybe String
+  } deriving Show
+
+dcName :: String -> QName
+dcName n = QName n Nothing (Just "dc")
+
+dcNode :: Node t => String -> t -> Element
+dcNode = node . dcName
+
+opfName :: String -> QName
+opfName n = QName n Nothing (Just "opf")
+
+plainify :: [Inline] -> String
+plainify t =
+  trimr $ writePlain def{ writerStandalone = False } $ Pandoc nullMeta [Plain t]
+
+getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata
+getEPUBMetadata opts meta = do
+  let md = metadataFromMeta opts meta
+  let elts = onlyElems $ parseXML $ writerEpubMetadata opts
+  let md' = foldr addMetadataFromXML md elts
+  let addIdentifier m =
+       if null (epubIdentifier m)
+          then do
+            randomId <- fmap show getRandomUUID
+            return $ m{ epubIdentifier = [Identifier randomId Nothing] }
+          else return m
+  let addLanguage m =
+       if null (epubLanguage m)
+          then case lookup "lang" (writerVariables opts) of
+                     Just x  -> return m{ epubLanguage = x }
+                     Nothing -> do
+                       localeLang <- catch (liftM
+                            (map (\c -> if c == '_' then '-' else c) .
+                             takeWhile (/='.')) $ getEnv "LANG")
+                          (\e -> let _ = (e :: SomeException) in return "en-US")
+                       return m{ epubLanguage = localeLang }
+          else return m
+  let fixDate m =
+       if null (epubDate m)
+          then do
+            currentTime <- getCurrentTime
+            return $ m{ epubDate = showDateTimeISO8601 currentTime }
+          else return m
+  let addAuthor m =
+       if any (\c -> creatorRole c == Just "aut") $ epubCreator m
+          then return m
+          else do
+            let authors' = map plainify $ docAuthors meta
+            let toAuthor name = Creator{ creatorText = name
+                                       , creatorRole = Just "aut"
+                                       , creatorFileAs = Nothing }
+            return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m }
+  addIdentifier md' >>= fixDate >>= addAuthor >>= addLanguage
+
+addMetadataFromXML :: Element -> EPUBMetadata -> EPUBMetadata
+addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md
+  | name == "identifier" = md{ epubIdentifier =
+             Identifier{ identifierText = strContent e
+                       , identifierScheme = lookupAttr (opfName "scheme") attrs
+                       } : epubIdentifier md }
+  | name == "title" = md{ epubTitle =
+            Title{ titleText = strContent e
+                 , titleFileAs = getAttr "file-as"
+                 , titleType = getAttr "type"
+                 } : epubTitle md }
+  | name == "date" = md{ epubDate = maybe "" id $ normalizeDate $ strContent e }
+  | name == "language" = md{ epubLanguage = strContent e }
+  | name == "creator" = md{ epubCreator =
+              Creator{ creatorText = strContent e
+                     , creatorRole = getAttr "role"
+                     , creatorFileAs = getAttr "file-as"
+                     } : epubCreator md }
+  | name == "contributor" = md{ epubContributor =
+              Creator  { creatorText = strContent e
+                       , creatorRole = getAttr "role"
+                       , creatorFileAs = getAttr "file-as"
+                       } : epubContributor md }
+  | name == "subject" = md{ epubSubject = strContent e : epubSubject md }
+  | name == "description" = md { epubDescription = Just $ strContent e }
+  | name == "type" = md { epubType = Just $ strContent e }
+  | name == "format" = md { epubFormat = Just $ strContent e }
+  | name == "type" = md { epubType = Just $ strContent e }
+  | name == "publisher" = md { epubPublisher = Just $ strContent e }
+  | name == "source" = md { epubSource = Just $ strContent e }
+  | name == "relation" = md { epubRelation = Just $ strContent e }
+  | name == "coverage" = md { epubCoverage = Just $ strContent e }
+  | name == "rights" = md { epubRights = Just $ strContent e }
+  | otherwise = md
+  where getAttr n = lookupAttr (opfName n) attrs
+addMetadataFromXML _ md = md
+
+metaValueToString :: MetaValue -> String
+metaValueToString (MetaString s) = s
+metaValueToString (MetaInlines ils) = plainify ils
+metaValueToString (MetaBlocks bs) = plainify $ query (:[]) bs
+metaValueToString (MetaBool b) = show b
+metaValueToString _ = ""
+
+getList :: String -> Meta -> (MetaValue -> a) -> [a]
+getList s meta handleMetaValue =
+  case lookupMeta s meta of
+       Just (MetaList xs) -> map handleMetaValue xs
+       Just mv            -> [handleMetaValue mv]
+       Nothing            -> []
+
+getIdentifier :: Meta -> [Identifier]
+getIdentifier meta = getList "identifier" meta handleMetaValue
+  where handleMetaValue (MetaMap m) =
+           Identifier{ identifierText = maybe "" metaValueToString
+                                        $ M.lookup "text" m
+                     , identifierScheme = metaValueToString <$>
+                                          M.lookup "scheme" m }
+        handleMetaValue mv = Identifier (metaValueToString mv) Nothing
+
+getTitle :: Meta -> [Title]
+getTitle meta = getList "title" meta handleMetaValue
+  where handleMetaValue (MetaMap m) =
+           Title{ titleText = maybe "" metaValueToString $ M.lookup "text" m
+                , titleFileAs = metaValueToString <$> M.lookup "file-as" m
+                , titleType = metaValueToString <$> M.lookup "type" m }
+        handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing
+
+getCreator :: String -> Meta -> [Creator]
+getCreator s meta = getList s meta handleMetaValue
+  where handleMetaValue (MetaMap m) =
+           Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m
+                  , creatorFileAs = metaValueToString <$> M.lookup "file-as" m
+                  , creatorRole = metaValueToString <$> M.lookup "role" m }
+        handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing
+
+simpleList :: String -> Meta -> [String]
+simpleList s meta =
+  case lookupMeta s meta of
+       Just (MetaList xs) -> map metaValueToString xs
+       Just x -> [metaValueToString x]
+       Nothing -> []
+
+metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata
+metadataFromMeta opts meta = EPUBMetadata{
+      epubIdentifier         = identifiers
+    , epubTitle              = titles
+    , epubDate               = date
+    , epubLanguage           = language
+    , epubCreator            = creators
+    , epubContributor        = contributors
+    , epubSubject            = subjects
+    , epubDescription        = description
+    , epubType               = epubtype
+    , epubFormat             = format
+    , epubPublisher          = publisher
+    , epubSource             = source
+    , epubRelation           = relation
+    , epubCoverage           = coverage
+    , epubRights             = rights
+    , epubCoverImage         = coverImage
+    }
+  where identifiers = getIdentifier meta
+        titles = getTitle meta
+        date = maybe "" id $
+              (metaValueToString <$> lookupMeta "date" meta) >>= normalizeDate
+        language = maybe "" metaValueToString $
+           lookupMeta "language" meta `mplus` lookupMeta "lang" meta
+        creators = getCreator "creator" meta
+        contributors = getCreator "contributor" meta
+        subjects = simpleList "subject" meta
+        description = metaValueToString <$> lookupMeta "description" meta
+        epubtype = metaValueToString <$> lookupMeta "type" meta
+        format = metaValueToString <$> lookupMeta "format" meta
+        publisher = metaValueToString <$> lookupMeta "publisher" meta
+        source = metaValueToString <$> lookupMeta "source" meta
+        relation = metaValueToString <$> lookupMeta "relation" meta
+        coverage = metaValueToString <$> lookupMeta "coverage" meta
+        rights = metaValueToString <$> lookupMeta "rights" meta
+        coverImage = fmap (const "cover-image") $
+                     lookup "epub-cover-image" $ writerVariables opts
+
 -- | Produce an EPUB file from a Pandoc document.
 writeEPUB :: WriterOptions  -- ^ Writer options
           -> Pandoc         -- ^ Document to convert
@@ -192,18 +404,6 @@ writeEPUB opts doc@(Pandoc meta _) = do
   let containsMathML ent = "<math" `isInfixOf` (B8.unpack $ fromEntry ent)
 
   -- contents.opf
-  localeLang <- catch (liftM (map (\c -> if c == '_' then '-' else c) .
-                       takeWhile (/='.')) $ getEnv "LANG")
-                    (\e -> let _ = (e :: SomeException) in return "en-US")
-  let lang = case lookup "lang" (writerVariables opts') of
-                     Just x  -> x
-                     Nothing -> localeLang
-  let userNodes = onlyElems $ parseXML $ writerEpubMetadata opts'
-  let mbIdent = findElement (QName "identifier" Nothing (Just "dc"))
-                $ unode "dummy" ! [] $ userNodes
-  uuid <- case mbIdent of
-               Just id' -> return $ trim $ strContent id'
-               Nothing  -> fmap show getRandomUUID
   let chapterNode ent = unode "item" !
                            ([("id", takeBaseName $ eRelativePath ent),
                              ("href", eRelativePath ent),
@@ -221,22 +421,23 @@ writeEPUB opts doc@(Pandoc meta _) = do
                            [("id", takeBaseName $ eRelativePath ent),
                             ("href", eRelativePath ent),
                             ("media-type", maybe "" id $ getMimeType $ eRelativePath ent)] $ ()
-  let plainify t = trimr $
-                    writePlain opts'{ writerStandalone = False } $
-                    Pandoc meta [Plain t]
-  let plainTitle = plainify $ docTitle meta
-  let plainAuthors = map plainify $ docAuthors meta
+  metadata <- getEPUBMetadata opts' meta
+  let plainTitle = case docTitle meta of
+                        [] -> case epubTitle metadata of
+                                   []   -> "UNTITLED"
+                                   (x:_) -> titleText x
+                        x  -> plainify x
+  let uuid = case epubIdentifier metadata of
+                  (x:_) -> identifierText x  -- use first identifier as UUID
+                  []    -> error "epubIdentifier is null"  -- shouldn't happen
   currentTime <- getCurrentTime
-  let plainDate = maybe (showDateTimeISO8601 currentTime) id
-                   $ normalizeDate $ stringify $ docDate meta
   let contentsData = fromStringLazy $ ppTopElement $
         unode "package" ! [("version", case version of
                                              EPUB2 -> "2.0"
                                              EPUB3 -> "3.0")
                           ,("xmlns","http://www.idpf.org/2007/opf")
-                          ,("unique-identifier","BookId")] $
-          [ metadataElement version userNodes
-              uuid lang plainTitle plainAuthors plainDate currentTime mbCoverImage
+                          ,("unique-identifier","epub-id-1")] $
+          [ metadataElement version metadata currentTime
           , unode "manifest" $
              [ unode "item" ! [("id","ncx"), ("href","toc.ncx")
                               ,("media-type","application/x-dtbncx+xml")] $ ()
@@ -266,7 +467,8 @@ writeEPUB opts doc@(Pandoc meta _) = do
                   map chapterRefNode chapterEntries)
           , unode "guide" $
              [ unode "reference" !
-                   [("type","toc"),("title",plainTitle),("href","nav.xhtml")] $ ()
+                   [("type","toc"),("title",plainTitle),
+                    ("href","nav.xhtml")] $ ()
              ] ++
              [ unode "reference" !
                    [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | mbCoverImage /= Nothing
@@ -390,32 +592,96 @@ writeEPUB opts doc@(Pandoc meta _) = do
                   (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries))
   return $ fromArchive archive
 
-metadataElement :: EPUBVersion -> [Element] -> String -> String -> String -> [String]
-                -> String -> UTCTime -> Maybe a -> Element
-metadataElement version userNodes uuid lang title authors date currentTime mbCoverImage =
-  let elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
-                               ,("xmlns:opf","http://www.idpf.org/2007/opf")] $
-            filter isMetadataElement userNodes
-      dublinElements = ["contributor","coverage","creator","date",
-            "description","format","identifier","language","publisher",
-            "relation","rights","source","subject","title","type"]
-      isMetadataElement e = (qPrefix (elName e) == Just "dc" &&
-                             qName (elName e) `elem` dublinElements) ||
-                            (qPrefix (elName e) == Nothing &&
-                             qName (elName e) `elem` ["link","meta"])
-      contains e n = not (null (findElements (QName n Nothing (Just "dc")) e))
-      newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++
-           [ unode "dc:language" lang | not (elt `contains` "language") ] ++
-           [ unode "dc:identifier" ! [("id","BookId")] $ uuid |
-               not (elt `contains` "identifier") ] ++
-           [ unode "dc:creator" ! [("opf:role","aut") | version == EPUB2]
-                     $ a | a <- authors, not (elt `contains` "creator") ] ++
-           [ unode "dc:date" date | not (elt `contains` "date") ] ++
-           [ unode "meta" ! [("property", "dcterms:modified")] $
-               (showDateTimeISO8601 currentTime) | version == EPUB3] ++
-           [ unode "meta" ! [("name","cover"), ("content","cover-image")] $ () |
-               not (isNothing mbCoverImage) ]
-  in  elt{ elContent = elContent elt ++ map Elem newNodes }
+metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element
+metadataElement version md currentTime =
+  unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
+                     ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ mdNodes
+  where mdNodes = identifierNodes ++ titleNodes ++ dateNodes ++ languageNodes
+                  ++ creatorNodes ++ contributorNodes ++ subjectNodes
+                  ++ descriptionNodes ++ typeNodes ++ formatNodes
+                  ++ publisherNodes ++ sourceNodes ++ relationNodes
+                  ++ coverageNodes ++ rightsNodes ++ coverImageNodes
+                  ++ modifiedNodes
+        withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x))
+                         ([1..] :: [Int]))
+        identifierNodes = withIds "epub-id" toIdentifierNode $
+                          epubIdentifier md
+        titleNodes = withIds "epub-title" toTitleNode $ epubTitle md
+        dateNodes = dcTag' "date" $ epubDate md
+        languageNodes = [dcTag "language" $ epubLanguage md]
+        creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $
+                       epubCreator md
+        contributorNodes = withIds "epub-contributor"
+                           (toCreatorNode "contributor") $ epubContributor md
+        subjectNodes = map (dcTag "subject") $ epubSubject md
+        descriptionNodes = maybe [] (dcTag' "description") $ epubDescription md
+        typeNodes = maybe [] (dcTag' "type") $ epubType md
+        formatNodes = maybe [] (dcTag' "format") $ epubFormat md
+        publisherNodes = maybe [] (dcTag' "publisher") $ epubPublisher md
+        sourceNodes = maybe [] (dcTag' "source") $ epubSource md
+        relationNodes = maybe [] (dcTag' "relation") $ epubRelation md
+        coverageNodes = maybe [] (dcTag' "coverage") $ epubCoverage md
+        rightsNodes = maybe [] (dcTag' "rights") $ epubRights md
+        coverImageNodes = maybe []
+            (\ci -> [unode "meta" !  [("name","cover"), ("content",ci)] $ ()])
+            $ epubCoverImage md
+        modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $
+               (showDateTimeISO8601 currentTime) | version == EPUB3 ]
+        dcTag n s = unode ("dc:" ++ n) s
+        dcTag' n s = [dcTag n s]
+        toIdentifierNode id' (Identifier txt scheme)
+          | version == EPUB2 = [dcNode "identifier" !
+              ([("id",id')] ++ maybe [] (\x -> [("opf:scheme", x)]) scheme) $
+              txt]
+          | otherwise = [dcNode "identifier" ! [("id",id')] $ txt] ++
+              maybe [] (\x -> [unode "meta" !
+                  [("refines",'#':id'),("property","identifier-type"),
+                   ("scheme","onix:codelist5")] $ x])
+                (schemeToOnix `fmap` scheme)
+        toCreatorNode s id' creator
+          | version == EPUB2 = [dcNode s !
+             ([("id",id')] ++
+              maybe [] (\x -> [("opf:file-as",x)]) (creatorFileAs creator) ++
+              maybe [] (\x -> [("opf:role",x)])
+               (creatorRole creator >>= toRelator)) $ creatorText creator]
+          | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++
+              maybe [] (\x -> [unode "meta" !
+                   [("refines",'#':id'),("property","file-as")] $ x])
+                   (creatorFileAs creator) ++
+              maybe [] (\x -> [unode "meta" !
+                   [("refines",'#':id'),("property","role"),
+                     ("scheme","marc:relators")] $ x])
+                   (creatorRole creator >>= toRelator)
+        toTitleNode id' title
+          | version == EPUB2 = [dcNode "title" !
+             ([("id",id')] ++
+              maybe [] (\x -> [("opf:file-as",x)]) (titleFileAs title) ++
+              maybe [] (\x -> [("opf:title-type",x)]) (titleType title)) $
+              titleText title]
+          | otherwise = [dcNode "title" ! [("id",id')] $ titleText title]
+              ++
+              maybe [] (\x -> [unode "meta" !
+                   [("refines",'#':id'),("property","file-as")] $ x])
+                   (titleFileAs title) ++
+              maybe [] (\x -> [unode "meta" !
+                   [("refines",'#':id'),("property","title-type")] $ x])
+                   (titleType title)
+        schemeToOnix "ISBN-10" = "02"
+        schemeToOnix "GTIN-13" = "03"
+        schemeToOnix "UPC"     = "04"
+        schemeToOnix "ISMN-10" = "05"
+        schemeToOnix "DOI"     = "06"
+        schemeToOnix "LCCN"    = "13"
+        schemeToOnix "GTIN-14" = "14"
+        schemeToOnix "ISBN-13" = "15"
+        schemeToOnix "Legal deposit number" = "17"
+        schemeToOnix "URN"     = "22"
+        schemeToOnix "OCLC"    = "23"
+        schemeToOnix "ISMN-13" = "25"
+        schemeToOnix "ISBN-A"  = "26"
+        schemeToOnix "JP"      = "27"
+        schemeToOnix "OLCC"    = "28"
+        schemeToOnix _         = "01"
 
 showDateTimeISO8601 :: UTCTime -> String
 showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
@@ -527,3 +793,277 @@ replaceRefs refTable = walk replaceOneRef
                 Just url -> Link lab (url,tit)
                 Nothing  -> x
         replaceOneRef x = x
+
+toRelator :: String -> Maybe String
+toRelator x
+  | x `elem` relators = Just x
+  | otherwise         = lookup (map toLower x) relatorMap
+
+relators :: [String]
+relators = map snd relatorMap
+
+relatorMap :: [(String, String)]
+relatorMap =
+           [("abridger", "abr")
+           ,("actor", "act")
+           ,("adapter", "adp")
+           ,("addressee", "rcp")
+           ,("analyst", "anl")
+           ,("animator", "anm")
+           ,("annotator", "ann")
+           ,("appellant", "apl")
+           ,("appellee", "ape")
+           ,("applicant", "app")
+           ,("architect", "arc")
+           ,("arranger", "arr")
+           ,("art copyist", "acp")
+           ,("art director", "adi")
+           ,("artist", "art")
+           ,("artistic director", "ard")
+           ,("assignee", "asg")
+           ,("associated name", "asn")
+           ,("attributed name", "att")
+           ,("auctioneer", "auc")
+           ,("author", "aut")
+           ,("author in quotations or text abstracts", "aqt")
+           ,("author of afterword, colophon, etc.", "aft")
+           ,("author of dialog", "aud")
+           ,("author of introduction, etc.", "aui")
+           ,("autographer", "ato")
+           ,("bibliographic antecedent", "ant")
+           ,("binder", "bnd")
+           ,("binding designer", "bdd")
+           ,("blurb writer", "blw")
+           ,("book designer", "bkd")
+           ,("book producer", "bkp")
+           ,("bookjacket designer", "bjd")
+           ,("bookplate designer", "bpd")
+           ,("bookseller", "bsl")
+           ,("braille embosser", "brl")
+           ,("broadcaster", "brd")
+           ,("calligrapher", "cll")
+           ,("cartographer", "ctg")
+           ,("caster", "cas")
+           ,("censor", "cns")
+           ,("choreographer", "chr")
+           ,("cinematographer", "cng")
+           ,("client", "cli")
+           ,("collection registrar", "cor")
+           ,("collector", "col")
+           ,("collotyper", "clt")
+           ,("colorist", "clr")
+           ,("commentator", "cmm")
+           ,("commentator for written text", "cwt")
+           ,("compiler", "com")
+           ,("complainant", "cpl")
+           ,("complainant-appellant", "cpt")
+           ,("complainant-appellee", "cpe")
+           ,("composer", "cmp")
+           ,("compositor", "cmt")
+           ,("conceptor", "ccp")
+           ,("conductor", "cnd")
+           ,("conservator", "con")
+           ,("consultant", "csl")
+           ,("consultant to a project", "csp")
+           ,("contestant", "cos")
+           ,("contestant-appellant", "cot")
+           ,("contestant-appellee", "coe")
+           ,("contestee", "cts")
+           ,("contestee-appellant", "ctt")
+           ,("contestee-appellee", "cte")
+           ,("contractor", "ctr")
+           ,("contributor", "ctb")
+           ,("copyright claimant", "cpc")
+           ,("copyright holder", "cph")
+           ,("corrector", "crr")
+           ,("correspondent", "crp")
+           ,("costume designer", "cst")
+           ,("court governed", "cou")
+           ,("court reporter", "crt")
+           ,("cover designer", "cov")
+           ,("creator", "cre")
+           ,("curator", "cur")
+           ,("dancer", "dnc")
+           ,("data contributor", "dtc")
+           ,("data manager", "dtm")
+           ,("dedicatee", "dte")
+           ,("dedicator", "dto")
+           ,("defendant", "dfd")
+           ,("defendant-appellant", "dft")
+           ,("defendant-appellee", "dfe")
+           ,("degree granting institution", "dgg")
+           ,("delineator", "dln")
+           ,("depicted", "dpc")
+           ,("depositor", "dpt")
+           ,("designer", "dsr")
+           ,("director", "drt")
+           ,("dissertant", "dis")
+           ,("distribution place", "dbp")
+           ,("distributor", "dst")
+           ,("donor", "dnr")
+           ,("draftsman", "drm")
+           ,("dubious author", "dub")
+           ,("editor", "edt")
+           ,("editor of compilation", "edc")
+           ,("editor of moving image work", "edm")
+           ,("electrician", "elg")
+           ,("electrotyper", "elt")
+           ,("enacting jurisdiction", "enj")
+           ,("engineer", "eng")
+           ,("engraver", "egr")
+           ,("etcher", "etr")
+           ,("event place", "evp")
+           ,("expert", "exp")
+           ,("facsimilist", "fac")
+           ,("field director", "fld")
+           ,("film director", "fmd")
+           ,("film distributor", "fds")
+           ,("film editor", "flm")
+           ,("film producer", "fmp")
+           ,("filmmaker", "fmk")
+           ,("first party", "fpy")
+           ,("forger", "frg")
+           ,("former owner", "fmo")
+           ,("funder", "fnd")
+           ,("geographic information specialist", "gis")
+           ,("honoree", "hnr")
+           ,("host", "hst")
+           ,("host institution", "his")
+           ,("illuminator", "ilu")
+           ,("illustrator", "ill")
+           ,("inscriber", "ins")
+           ,("instrumentalist", "itr")
+           ,("interviewee", "ive")
+           ,("interviewer", "ivr")
+           ,("inventor", "inv")
+           ,("issuing body", "isb")
+           ,("judge", "jud")
+           ,("jurisdiction governed", "jug")
+           ,("laboratory", "lbr")
+           ,("laboratory director", "ldr")
+           ,("landscape architect", "lsa")
+           ,("lead", "led")
+           ,("lender", "len")
+           ,("libelant", "lil")
+           ,("libelant-appellant", "lit")
+           ,("libelant-appellee", "lie")
+           ,("libelee", "lel")
+           ,("libelee-appellant", "let")
+           ,("libelee-appellee", "lee")
+           ,("librettist", "lbt")
+           ,("licensee", "lse")
+           ,("licensor", "lso")
+           ,("lighting designer", "lgd")
+           ,("lithographer", "ltg")
+           ,("lyricist", "lyr")
+           ,("manufacture place", "mfp")
+           ,("manufacturer", "mfr")
+           ,("marbler", "mrb")
+           ,("markup editor", "mrk")
+           ,("metadata contact", "mdc")
+           ,("metal-engraver", "mte")
+           ,("moderator", "mod")
+           ,("monitor", "mon")
+           ,("music copyist", "mcp")
+           ,("musical director", "msd")
+           ,("musician", "mus")
+           ,("narrator", "nrt")
+           ,("onscreen presenter", "osp")
+           ,("opponent", "opn")
+           ,("organizer of meeting", "orm")
+           ,("originator", "org")
+           ,("other", "oth")
+           ,("owner", "own")
+           ,("panelist", "pan")
+           ,("papermaker", "ppm")
+           ,("patent applicant", "pta")
+           ,("patent holder", "pth")
+           ,("patron", "pat")
+           ,("performer", "prf")
+           ,("permitting agency", "pma")
+           ,("photographer", "pht")
+           ,("plaintiff", "ptf")
+           ,("plaintiff-appellant", "ptt")
+           ,("plaintiff-appellee", "pte")
+           ,("platemaker", "plt")
+           ,("praeses", "pra")
+           ,("presenter", "pre")
+           ,("printer", "prt")
+           ,("printer of plates", "pop")
+           ,("printmaker", "prm")
+           ,("process contact", "prc")
+           ,("producer", "pro")
+           ,("production company", "prn")
+           ,("production designer", "prs")
+           ,("production manager", "pmn")
+           ,("production personnel", "prd")
+           ,("production place", "prp")
+           ,("programmer", "prg")
+           ,("project director", "pdr")
+           ,("proofreader", "pfr")
+           ,("provider", "prv")
+           ,("publication place", "pup")
+           ,("publisher", "pbl")
+           ,("publishing director", "pbd")
+           ,("puppeteer", "ppt")
+           ,("radio director", "rdd")
+           ,("radio producer", "rpc")
+           ,("recording engineer", "rce")
+           ,("recordist", "rcd")
+           ,("redaktor", "red")
+           ,("renderer", "ren")
+           ,("reporter", "rpt")
+           ,("repository", "rps")
+           ,("research team head", "rth")
+           ,("research team member", "rtm")
+           ,("researcher", "res")
+           ,("respondent", "rsp")
+           ,("respondent-appellant", "rst")
+           ,("respondent-appellee", "rse")
+           ,("responsible party", "rpy")
+           ,("restager", "rsg")
+           ,("restorationist", "rsr")
+           ,("reviewer", "rev")
+           ,("rubricator", "rbr")
+           ,("scenarist", "sce")
+           ,("scientific advisor", "sad")
+           ,("screenwriter", "aus")
+           ,("scribe", "scr")
+           ,("sculptor", "scl")
+           ,("second party", "spy")
+           ,("secretary", "sec")
+           ,("seller", "sll")
+           ,("set designer", "std")
+           ,("setting", "stg")
+           ,("signer", "sgn")
+           ,("singer", "sng")
+           ,("sound designer", "sds")
+           ,("speaker", "spk")
+           ,("sponsor", "spn")
+           ,("stage director", "sgd")
+           ,("stage manager", "stm")
+           ,("standards body", "stn")
+           ,("stereotyper", "str")
+           ,("storyteller", "stl")
+           ,("supporting host", "sht")
+           ,("surveyor", "srv")
+           ,("teacher", "tch")
+           ,("technical director", "tcd")
+           ,("television director", "tld")
+           ,("television producer", "tlp")
+           ,("thesis advisor", "ths")
+           ,("transcriber", "trc")
+           ,("translator", "trl")
+           ,("type designer", "tyd")
+           ,("typographer", "tyg")
+           ,("university place", "uvp")
+           ,("videographer", "vdg")
+           ,("witness", "wit")
+           ,("wood engraver", "wde")
+           ,("woodcutter", "wdc")
+           ,("writer of accompanying material", "wam")
+           ,("writer of added commentary", "wac")
+           ,("writer of added lyrics", "wal")
+           ,("writer of added text", "wat")
+           ]
+
-- 
cgit v1.2.3


From 96b678d823b544e3da7dd8531615c52de6164cb8 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 30 Nov 2013 15:17:38 -0800
Subject: Allow specification of epub-cover-image in YAML metadata.

---
 src/Text/Pandoc/Writers/EPUB.hs | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 4f1de5df0..a3b01848e 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -279,8 +279,7 @@ metadataFromMeta opts meta = EPUBMetadata{
         relation = metaValueToString <$> lookupMeta "relation" meta
         coverage = metaValueToString <$> lookupMeta "coverage" meta
         rights = metaValueToString <$> lookupMeta "rights" meta
-        coverImage = fmap (const "cover-image") $
-                     lookup "epub-cover-image" $ writerVariables opts
+        coverImage = lookup "epub-cover-image" (writerVariables opts)
 
 -- | Produce an EPUB file from a Pandoc document.
 writeEPUB :: WriterOptions  -- ^ Writer options
@@ -305,11 +304,11 @@ writeEPUB opts doc@(Pandoc meta _) = do
                           then MathML Nothing
                           else writerHTMLMathMethod opts
                   , writerWrapText = False }
-  let mbCoverImage = lookup "epub-cover-image" vars
+  metadata <- getEPUBMetadata opts' meta
 
   -- cover page
   (cpgEntry, cpicEntry) <-
-                case mbCoverImage of
+                case epubCoverImage metadata of
                      Nothing   -> return ([],[])
                      Just img  -> do
                        let coverImage = "cover-image" ++ takeExtension img
@@ -421,7 +420,6 @@ writeEPUB opts doc@(Pandoc meta _) = do
                            [("id", takeBaseName $ eRelativePath ent),
                             ("href", eRelativePath ent),
                             ("media-type", maybe "" id $ getMimeType $ eRelativePath ent)] $ ()
-  metadata <- getEPUBMetadata opts' meta
   let plainTitle = case docTitle meta of
                         [] -> case epubTitle metadata of
                                    []   -> "UNTITLED"
@@ -452,7 +450,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
              map pictureNode (cpicEntry ++ picEntries) ++
              map fontNode fontEntries
           , unode "spine" ! [("toc","ncx")] $
-              case mbCoverImage of
+              case epubCoverImage metadata of
                     Nothing -> []
                     Just _ -> [ unode "itemref" !
                                 [("idref", "cover"),("linear","no")] $ () ]
@@ -471,7 +469,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
                     ("href","nav.xhtml")] $ ()
              ] ++
              [ unode "reference" !
-                   [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | mbCoverImage /= Nothing
+                   [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | epubCoverImage metadata /= Nothing
              ]
           ]
   let contentsEntry = mkEntry "content.opf" contentsData
@@ -526,7 +524,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
                               ,("content", "0")] $ ()
              , unode "meta" ! [("name","dtb:maxPageNumber")
                               ,("content", "0")] $ ()
-             ] ++ case mbCoverImage of
+             ] ++ case epubCoverImage metadata of
                         Nothing  -> []
                         Just _   -> [unode "meta" ! [("name","cover"),
                                             ("content","cover-image")] $ ()]
@@ -623,7 +621,8 @@ metadataElement version md currentTime =
         coverageNodes = maybe [] (dcTag' "coverage") $ epubCoverage md
         rightsNodes = maybe [] (dcTag' "rights") $ epubRights md
         coverImageNodes = maybe []
-            (\ci -> [unode "meta" !  [("name","cover"), ("content",ci)] $ ()])
+            (const $ [unode "meta" !  [("name","cover"),
+                                       ("content","cover-image")] $ ()])
             $ epubCoverImage md
         modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $
                (showDateTimeISO8601 currentTime) | version == EPUB3 ]
-- 
cgit v1.2.3


From 17ef39d1577a7c844f8747294f0e7a0486c1edaa Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 30 Nov 2013 15:25:28 -0800
Subject: Fixed `cover-image` in EPUB YAML metadata.

---
 src/Text/Pandoc/Writers/EPUB.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index a3b01848e..9745db5a0 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -279,7 +279,8 @@ metadataFromMeta opts meta = EPUBMetadata{
         relation = metaValueToString <$> lookupMeta "relation" meta
         coverage = metaValueToString <$> lookupMeta "coverage" meta
         rights = metaValueToString <$> lookupMeta "rights" meta
-        coverImage = lookup "epub-cover-image" (writerVariables opts)
+        coverImage = lookup "epub-cover-image" (writerVariables opts) `mplus`
+             (metaValueToString <$> lookupMeta "cover-image" meta)
 
 -- | Produce an EPUB file from a Pandoc document.
 writeEPUB :: WriterOptions  -- ^ Writer options
-- 
cgit v1.2.3


From bb0f299165de22e119675aa6ceed0192a80c78e8 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 30 Nov 2013 16:16:35 -0800
Subject: ConTeXt writer:  Don't hardcode figure/table placement.

Instead, let this be set in the template, using `\setupfloat`.
Thanks to Aditya Mahajan for the suggestion.
---
 src/Text/Pandoc/Writers/ConTeXt.hs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 179d9bc5b..3095cf508 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -130,7 +130,7 @@ blockToConTeXt (Plain lst) = inlineListToConTeXt lst
 -- title beginning with fig: indicates that the image is a figure
 blockToConTeXt (Para [Image txt (src,'f':'i':'g':':':_)]) = do
   capt <- inlineListToConTeXt txt
-  return $ blankline $$ "\\placefigure[here]" <> braces capt <>
+  return $ blankline $$ "\\placefigure" <> braces capt <>
            braces ("\\externalfigure" <> brackets (text src)) <> blankline
 blockToConTeXt (Para lst) = do
   contents <- inlineListToConTeXt lst
@@ -205,9 +205,9 @@ blockToConTeXt (Table caption aligns widths heads rows) = do
                   else liftM ($$ "\\HL") $ tableRowToConTeXt heads
     captionText <- inlineListToConTeXt caption
     rows' <- mapM tableRowToConTeXt rows
-    return $ "\\placetable" <> brackets ("here" <> if null caption
-                                                      then ",none"
-                                                      else "")
+    return $ "\\placetable" <> (if null caption
+                                   then brackets "none"
+                                   else empty)
                             <> braces captionText $$
              "\\starttable" <> brackets (text colDescriptors) $$
              "\\HL" $$ headers $$
-- 
cgit v1.2.3


From 7aa4d519686af1416eaf3b380f8584ab89569c41 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 30 Nov 2013 17:00:58 -0800
Subject: ODT writer:  Add `draw:name` attribute to `draw:frame` elements.

This is reported to be necessary to avoid an error from recent
versions of Libre Office when files contain more than one image.

Closes #1069.

Thanks to wmanley for reporting and diagnosing the problem.
---
 src/Text/Pandoc/Writers/OpenDocument.hs | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 565f5f869..4ddfd7166 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -64,6 +64,7 @@ data WriterState =
                 , stInDefinition  :: Bool
                 , stTight         :: Bool
                 , stFirstPara     :: Bool
+                , stImageId       :: Int
                 }
 
 defaultWriterState :: WriterState
@@ -78,6 +79,7 @@ defaultWriterState =
                 , stInDefinition  = False
                 , stTight         = False
                 , stFirstPara     = False
+                , stImageId       = 1
                 }
 
 when :: Bool -> Doc -> Doc
@@ -380,7 +382,7 @@ inlineToOpenDocument o ils
                                 then return $ preformatted s
                                 else return empty
     | Link  l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l
-    | Image _ (s,t) <- ils = return $ mkImg  s t
+    | Image _ (s,t) <- ils = mkImg  s t
     | Note        l <- ils = mkNote l
     | otherwise            = return empty
     where
@@ -389,7 +391,11 @@ inlineToOpenDocument o ils
                                            , ("xlink:href" , s       )
                                            , ("office:name", t       )
                                            ] . inSpanTags "Definition"
-      mkImg  s t   = inTags False "draw:frame" (attrsFromTitle t) $
+      mkImg  s t   = do
+               id' <- gets stImageId
+               modify (\st -> st{ stImageId = id' + 1 })
+               return $ inTags False "draw:frame"
+                        (("draw:name", "img" ++ show id'):attrsFromTitle t) $
                      selfClosingTag "draw:image" [ ("xlink:href"   , s       )
                                                  , ("xlink:type"   , "simple")
                                                  , ("xlink:show"   , "embed" )
-- 
cgit v1.2.3


From 7f09c1834da9f87e7715f5c9dc52f4b730da8f3f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 30 Nov 2013 17:59:28 -0800
Subject: Markdown writer:  Fix rendering of tight sublists.

E.g.

    - foo
        - bar
    - baz

Previously a spurious blank line was included before the last item.
Closes #1050.
---
 src/Text/Pandoc/Writers/Markdown.hs | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index eefcd547a..60d474263 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -555,7 +555,14 @@ bulletListItemToMarkdown opts items = do
   contents <- blockListToMarkdown opts items
   let sps = replicate (writerTabStop opts - 2) ' '
   let start = text ('-' : ' ' : sps)
-  return $ hang (writerTabStop opts) start $ contents <> cr
+  -- remove trailing blank line if it is a tight list
+  let contents' = case reverse items of
+                       (BulletList xs:_) | isTightList xs ->
+                            chomp contents <> cr
+                       (OrderedList _ xs:_) | isTightList xs ->
+                            chomp contents <> cr
+                       _ -> contents
+  return $ hang (writerTabStop opts) start $ contents' <> cr
 
 -- | Convert ordered list item (a list of blocks) to markdown.
 orderedListItemToMarkdown :: WriterOptions -- ^ options
-- 
cgit v1.2.3


From 37569f30ed35a45b4765e9cd6c408cd167dd3452 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 30 Nov 2013 19:33:02 -0800
Subject: EPUB writer: Add properties attribute to cover-image item for EPUB
 v3.

---
 src/Text/Pandoc/Writers/EPUB.hs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 9745db5a0..6e1a391af 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -448,7 +448,12 @@ writeEPUB opts doc@(Pandoc meta _) = do
                                [("properties","nav") | epub3 ]) $ ()
              ] ++
              map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++
-             map pictureNode (cpicEntry ++ picEntries) ++
+             (case cpicEntry of
+                    []    -> []
+                    (x:_) -> [add_attrs
+                              [Attr (unqual "properties") "cover-image" | epub3]
+                              (pictureNode x)]) ++
+             map pictureNode picEntries ++
              map fontNode fontEntries
           , unode "spine" ! [("toc","ncx")] $
               case epubCoverImage metadata of
-- 
cgit v1.2.3


From 4501344d45947bee79707522efdc77bfdc650bd6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 30 Nov 2013 19:43:12 -0800
Subject: MIME:  Add entry for jfif.

---
 src/Text/Pandoc/MIME.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index 1f5f6f862..f41aa98bb 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -220,6 +220,7 @@ mimeTypesList = -- List borrowed from happstack-server.
            ,("jnlp","application/x-java-jnlp-file")
            ,("jpe","image/jpeg")
            ,("jpeg","image/jpeg")
+           ,("jfif","image/jpeg")
            ,("jpg","image/jpeg")
            ,("js","application/x-javascript")
            ,("kar","audio/midi")
-- 
cgit v1.2.3


From a7067ab22b0ce297ccc5271fca59b9f9e787195e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 30 Nov 2013 19:43:24 -0800
Subject: EPUB writer: Simplify imageTypeOf using getMimeType.

---
 src/Text/Pandoc/Writers/EPUB.hs | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 6e1a391af..7be7eb368 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -736,15 +736,9 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity .
         unEntity (x:xs) = x : unEntity xs
 
 imageTypeOf :: FilePath -> Maybe String
-imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of
-                     "jpg"       -> Just "image/jpeg"
-                     "jpeg"      -> Just "image/jpeg"
-                     "jfif"      -> Just "image/jpeg"
-                     "png"       -> Just "image/png"
-                     "gif"       -> Just "image/gif"
-                     "svg"       -> Just "image/svg+xml"
-                     _           -> Nothing
-
+imageTypeOf x = case getMimeType x of
+                     Just y@('i':'m':'a':'g':'e':_) -> Just y
+                     _                              -> Nothing
 
 data IdentState = IdentState{
        chapterNumber :: Int,
-- 
cgit v1.2.3


From 0e158d8d8dc277f19d1f7d2686c2672f08410f82 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 30 Nov 2013 20:20:18 -0800
Subject: EPUB writer: Insert "svg" property as needed in opf (EPUB 3).

---
 src/Text/Pandoc/Writers/EPUB.hs | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 7be7eb368..b23f43a6e 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -401,15 +401,21 @@ writeEPUB opts doc@(Pandoc meta _) = do
   let chapterEntries = zipWith chapToEntry [1..] chapters
 
   -- incredibly inefficient (TODO):
-  let containsMathML ent = "<math" `isInfixOf` (B8.unpack $ fromEntry ent)
+  let containsMathML ent = epub3 &&
+                           "<math" `isInfixOf` (B8.unpack $ fromEntry ent)
+  let containsSVG ent    = epub3 &&
+                           "<svg" `isInfixOf` (B8.unpack $ fromEntry ent)
+  let props ent = ["mathml" | containsMathML ent] ++ ["svg" | containsSVG ent]
 
   -- contents.opf
   let chapterNode ent = unode "item" !
                            ([("id", takeBaseName $ eRelativePath ent),
                              ("href", eRelativePath ent),
                              ("media-type", "application/xhtml+xml")]
-                            ++ [("properties","mathml") | epub3 &&
-                                   containsMathML ent]) $ ()
+                            ++ case props ent of
+                                    []   -> []
+                                    xs   -> [("properties", unwords xs)])
+                        $ ()
   let chapterRefNode ent = unode "itemref" !
                              [("idref", takeBaseName $ eRelativePath ent)] $ ()
   let pictureNode ent = unode "item" !
-- 
cgit v1.2.3


From 82813b55852c99c2e4d179083c119937c39d5398 Mon Sep 17 00:00:00 2001
From: Shaun Attfield <heurist+git@gmail.com>
Date: Sun, 1 Dec 2013 10:19:08 +0200
Subject: normalizeDate: Allow dates with year only (%Y)

---
 src/Text/Pandoc/Shared.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 8dcd88148..7592b7659 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -270,7 +270,7 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
   (msum $ map (\fs -> parsetimeWith fs s) formats :: Maybe Day)
    where parsetimeWith = parseTime defaultTimeLocale
          formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
-                    "%d %B %Y", "%b. %d, %Y", "%B %d, %Y"]
+                    "%d %B %Y", "%b. %d, %Y", "%B %d, %Y", "%Y"]
 
 --
 -- Pandoc block and inline list processing
-- 
cgit v1.2.3


From 1f6238f3ba1999988fbb2b25b091171451647d4e Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 1 Dec 2013 10:16:56 -0800
Subject: EPUB writer:  Allow partial dates:  YYYY, YYYY-MM.

Improves on #1074, since now we don't default to January 1.
---
 src/Text/Pandoc/Writers/EPUB.hs | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index b23f43a6e..59c90b16d 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -56,7 +56,7 @@ import Text.XML.Light hiding (ppTopElement)
 import Text.Pandoc.UUID
 import Text.Pandoc.Writers.HTML
 import Text.Pandoc.Writers.Markdown ( writePlain )
-import Data.Char ( toLower )
+import Data.Char ( toLower, isDigit )
 import Network.URI ( unEscapeString )
 import Text.Pandoc.MIME (getMimeType)
 #if MIN_VERSION_base(4,6,0)
@@ -171,7 +171,8 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md
                  , titleFileAs = getAttr "file-as"
                  , titleType = getAttr "type"
                  } : epubTitle md }
-  | name == "date" = md{ epubDate = maybe "" id $ normalizeDate $ strContent e }
+  | name == "date" = md{ epubDate = maybe "" id $ normalizeDate'
+                                                $ strContent e }
   | name == "language" = md{ epubLanguage = strContent e }
   | name == "creator" = md{ epubCreator =
               Creator{ creatorText = strContent e
@@ -265,7 +266,7 @@ metadataFromMeta opts meta = EPUBMetadata{
   where identifiers = getIdentifier meta
         titles = getTitle meta
         date = maybe "" id $
-              (metaValueToString <$> lookupMeta "date" meta) >>= normalizeDate
+              (metaValueToString <$> lookupMeta "date" meta) >>= normalizeDate'
         language = maybe "" metaValueToString $
            lookupMeta "language" meta `mplus` lookupMeta "lang" meta
         creators = getCreator "creator" meta
@@ -799,6 +800,16 @@ replaceRefs refTable = walk replaceOneRef
                 Nothing  -> x
         replaceOneRef x = x
 
+-- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM
+normalizeDate' :: String -> Maybe String
+normalizeDate' xs =
+  let xs' = trim xs in
+  case xs' of
+       [y1,y2,y3,y4] | all isDigit [y1,y2,y3,y4] -> Just xs'     -- YYYY
+       [y1,y2,y3,y4,'-',m1,m2] | all isDigit [y1,y2,y3,y4,m1,m2]  -- YYYY-MM
+                                                 -> Just xs'
+       _                                         -> normalizeDate xs'
+
 toRelator :: String -> Maybe String
 toRelator x
   | x `elem` relators = Just x
-- 
cgit v1.2.3


From bd96f2bdcce55bfe5a7b753d29b3f4a45986f211 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 3 Dec 2013 10:32:03 -0800
Subject: EPUB writer:  Allow 'stylesheet' in metadata.

The value is a path to the stylesheet.
---
 src/Text/Pandoc/Writers/EPUB.hs | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 59c90b16d..32b0c3c32 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -38,7 +38,7 @@ import Text.Printf (printf)
 import System.FilePath ( (</>), takeBaseName, takeExtension, takeFileName )
 import qualified Data.ByteString.Lazy as B
 import qualified Data.ByteString.Lazy.Char8 as B8
-import Text.Pandoc.UTF8 ( fromStringLazy, toString )
+import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.SelfContained ( makeSelfContained )
 import Codec.Archive.Zip
 import Control.Applicative ((<$>))
@@ -89,8 +89,13 @@ data EPUBMetadata = EPUBMetadata{
   , epubCoverage           :: Maybe String
   , epubRights             :: Maybe String
   , epubCoverImage         :: Maybe String
+  , epubStylesheet         :: Maybe Stylesheet
   } deriving Show
 
+data Stylesheet = StylesheetPath FilePath
+                | StylesheetContents String
+                deriving Show
+
 data Creator = Creator{
     creatorText            :: String
   , creatorRole            :: Maybe String
@@ -262,6 +267,7 @@ metadataFromMeta opts meta = EPUBMetadata{
     , epubCoverage           = coverage
     , epubRights             = rights
     , epubCoverImage         = coverImage
+    , epubStylesheet         = stylesheet
     }
   where identifiers = getIdentifier meta
         titles = getTitle meta
@@ -282,6 +288,10 @@ metadataFromMeta opts meta = EPUBMetadata{
         rights = metaValueToString <$> lookupMeta "rights" meta
         coverImage = lookup "epub-cover-image" (writerVariables opts) `mplus`
              (metaValueToString <$> lookupMeta "cover-image" meta)
+        stylesheet = (StylesheetContents <$>
+                       lookup "epub-stylesheet" (writerVariables opts)) `mplus`
+                     ((StylesheetPath . metaValueToString) <$>
+                       lookupMeta "stylesheet" meta)
 
 -- | Produce an EPUB file from a Pandoc document.
 writeEPUB :: WriterOptions  -- ^ Writer options
@@ -437,7 +447,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
                   (x:_) -> identifierText x  -- use first identifier as UUID
                   []    -> error "epubIdentifier is null"  -- shouldn't happen
   currentTime <- getCurrentTime
-  let contentsData = fromStringLazy $ ppTopElement $
+  let contentsData = UTF8.fromStringLazy $ ppTopElement $
         unode "package" ! [("version", case version of
                                              EPUB2 -> "2.0"
                                              EPUB3 -> "3.0")
@@ -525,7 +535,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
                   [ unode "navLabel" $ unode "text" (plainify $ docTitle meta)
                   , unode "content" ! [("src","title_page.xhtml")] $ () ]
 
-  let tocData = fromStringLazy $ ppTopElement $
+  let tocData = UTF8.fromStringLazy $ ppTopElement $
         unode "ncx" ! [("version","2005-1")
                        ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $
           [ unode "head" $
@@ -557,7 +567,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
                                                  (_:_) -> [unode "ol" ! [("class","toc")] $ subs]
 
   let navtag = if epub3 then "nav" else "div"
-  let navData = fromStringLazy $ ppTopElement $
+  let navData = UTF8.fromStringLazy $ ppTopElement $
         unode "html" ! [("xmlns","http://www.w3.org/1999/xhtml")
                        ,("xmlns:epub","http://www.idpf.org/2007/ops")] $
           [ unode "head" $
@@ -571,10 +581,10 @@ writeEPUB opts doc@(Pandoc meta _) = do
   let navEntry = mkEntry "nav.xhtml" navData
 
   -- mimetype
-  let mimetypeEntry = mkEntry "mimetype" $ fromStringLazy "application/epub+zip"
+  let mimetypeEntry = mkEntry "mimetype" $ UTF8.fromStringLazy "application/epub+zip"
 
   -- container.xml
-  let containerData = fromStringLazy $ ppTopElement $
+  let containerData = UTF8.fromStringLazy $ ppTopElement $
        unode "container" ! [("version","1.0")
               ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $
          unode "rootfiles" $
@@ -583,18 +593,19 @@ writeEPUB opts doc@(Pandoc meta _) = do
   let containerEntry = mkEntry "META-INF/container.xml" containerData
 
   -- com.apple.ibooks.display-options.xml
-  let apple = fromStringLazy $ ppTopElement $
+  let apple = UTF8.fromStringLazy $ ppTopElement $
         unode "display_options" $
           unode "platform" ! [("name","*")] $
             unode "option" ! [("name","specified-fonts")] $ "true"
   let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
 
   -- stylesheet
-  stylesheet <- case writerEpubStylesheet opts of
-                   Just s  -> return s
-                   Nothing -> toString `fmap`
+  stylesheet <- case epubStylesheet metadata of
+                   Just (StylesheetPath fp)    -> UTF8.readFile fp
+                   Just (StylesheetContents s) -> return s
+                   Nothing -> UTF8.toString `fmap`
                               readDataFile (writerUserDataDir opts) "epub.css"
-  let stylesheetEntry = mkEntry "stylesheet.css" $ fromStringLazy stylesheet
+  let stylesheetEntry = mkEntry "stylesheet.css" $ UTF8.fromStringLazy stylesheet
 
   -- construct archive
   let archive = foldr addEntryToArchive emptyArchive
-- 
cgit v1.2.3


From fdaeec0c48d742489ddf0ec0c0261ca9c53f989b Mon Sep 17 00:00:00 2001
From: Jose Luis Duran <jlduran@gmail.com>
Date: Mon, 2 Dec 2013 09:55:58 +0000
Subject: Add booktabs package for LaTeX tables

[ci skip]
---
 src/Text/Pandoc/Writers/LaTeX.hs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index f3cbcf19f..a2e0b016f 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -453,12 +453,12 @@ blockToLaTeX (Header level (id',classes,_) lst) =
 blockToLaTeX (Table caption aligns widths heads rows) = do
   headers <- if all null heads
                 then return empty
-                else ($$ "\\hline\\noalign{\\medskip}") `fmap`
+                else ($$ "\\midrule\\endhead") `fmap`
                       (tableRowToLaTeX True aligns widths) heads
   captionText <- inlineListToLaTeX caption
   let capt = if isEmpty captionText
                 then empty
-                else text "\\noalign{\\medskip}"
+                else text "\\addlinespace"
                      $$ text "\\caption" <> braces captionText
   rows' <- mapM (tableRowToLaTeX False aligns widths) rows
   let colDescriptors = text $ concat $ map toColDescriptor aligns
@@ -466,10 +466,10 @@ blockToLaTeX (Table caption aligns widths heads rows) = do
   return $ "\\begin{longtable}[c]" <>
               braces ("@{}" <> colDescriptors <> "@{}")
               -- the @{} removes extra space at beginning and end
-         $$ "\\hline\\noalign{\\medskip}"
+         $$ "\\toprule\\addlinespace"
          $$ headers
          $$ vcat rows'
-         $$ "\\hline"
+         $$ "\\bottomrule"
          $$ capt
          $$ "\\end{longtable}"
 
@@ -506,7 +506,7 @@ tableRowToLaTeX header aligns widths cols = do
                                     (w * scaleFactor))) <>
                      (halign a <> cr <> c <> cr) <> "\\end{minipage}"
   let cells = zipWith3 toCell widths aligns renderedCells
-  return $ hsep (intersperse "&" cells) $$ "\\\\\\noalign{\\medskip}"
+  return $ hsep (intersperse "&" cells) $$ "\\\\\\addlinespace"
 
 listItemToLaTeX :: [Block] -> State WriterState Doc
 listItemToLaTeX lst = blockListToLaTeX lst >>= return .  (text "\\item" $$) .
-- 
cgit v1.2.3


From 5314df51f368853f7ee4bf8f75eedf43afc0400e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 4 Dec 2013 10:00:40 -0800
Subject: Stop parsing "list lines" when we hit a block tag.

This fixes exponential slowdown in certain input, e.g.
a series of lists followed by `</div>`.
---
 src/Text/Pandoc/Readers/Markdown.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 33d1a9620..4cb75d86c 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -732,7 +732,8 @@ listLine = try $ do
   notFollowedBy' (do indentSpaces
                      many (spaceChar)
                      listStart)
-  chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 anyChar) newline
+  chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 (satisfy (/='<'))
+                    <|> (notFollowedBy' (htmlTag isBlockTag) >> count 1 anyChar)) newline
   return $ concat chunks
 
 -- parse raw text for one list item, excluding start marker and continuations
@@ -759,6 +760,7 @@ listContinuationLine :: MarkdownParser String
 listContinuationLine = try $ do
   notFollowedBy blankline
   notFollowedBy' listStart
+  notFollowedBy' $ try $ skipMany spaceChar >> htmlTag (~== TagClose "div")
   optional indentSpaces
   result <- anyLine
   return $ result ++ "\n"
-- 
cgit v1.2.3


From def05d3504bfc90c31c1621438f7a51c95079ad2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 6 Dec 2013 16:43:59 -0800
Subject: HTML reader:  Parse LaTeX math if appropriate options are set.

* Moved inlineMath, displayMath from Markdown reader to Parsing.
* Export them from Parsing.  (API change.)
* Generalize their types.
---
 src/Text/Pandoc/Parsing.hs          | 35 +++++++++++++++++++++++++++++++++++
 src/Text/Pandoc/Readers/HTML.hs     |  9 ++++++++-
 src/Text/Pandoc/Readers/LaTeX.hs    |  3 ++-
 src/Text/Pandoc/Readers/Markdown.hs | 33 ---------------------------------
 4 files changed, 45 insertions(+), 35 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 9687d7712..e15854333 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -48,6 +48,8 @@ module Text.Pandoc.Parsing ( (>>~),
                              romanNumeral,
                              emailAddress,
                              uri,
+                             mathInline,
+                             mathDisplay,
                              withHorizDisplacement,
                              withRaw,
                              escaped,
@@ -455,6 +457,39 @@ uri = try $ do
   let uri' = scheme ++ ":" ++ fromEntities str'
   return (uri', escapeURI uri')
 
+mathInlineWith :: String -> String -> Parser [Char] st String
+mathInlineWith op cl = try $ do
+  string op
+  notFollowedBy space
+  words' <- many1Till (count 1 (noneOf "\n\\")
+                   <|> (char '\\' >> anyChar >>= \c -> return ['\\',c])
+                   <|> count 1 newline <* notFollowedBy' blankline
+                       *> return " ")
+              (try $ string cl)
+  notFollowedBy digit  -- to prevent capture of $5
+  return $ concat words'
+
+mathDisplayWith :: String -> String -> Parser [Char] st String
+mathDisplayWith op cl = try $ do
+  string op
+  many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl)
+
+mathDisplay :: Parser [Char] ParserState String
+mathDisplay =
+      (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
+  <|> (guardEnabled Ext_tex_math_single_backslash >>
+       mathDisplayWith "\\[" "\\]")
+  <|> (guardEnabled Ext_tex_math_double_backslash >>
+       mathDisplayWith "\\\\[" "\\\\]")
+
+mathInline :: Parser [Char] ParserState String
+mathInline =
+      (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
+  <|> (guardEnabled Ext_tex_math_single_backslash >>
+       mathInlineWith "\\(" "\\)")
+  <|> (guardEnabled Ext_tex_math_double_backslash >>
+       mathInlineWith "\\\\(" "\\\\)")
+
 -- | Applies a parser, returns tuple of its results and its horizontal
 -- displacement (the difference between the source column at the end
 -- and the source column at the beginning). Vertical displacement
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index d691c9878..e758f712f 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -467,7 +467,13 @@ pBlank = try $ do
 
 pTagContents :: Parser [Char] ParserState Inline
 pTagContents =
-  pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad
+      Math InlineMath  `fmap` mathInline
+  <|> Math DisplayMath `fmap` mathDisplay
+  <|> pStr
+  <|> pSpace
+  <|> smartPunctuation pTagContents
+  <|> pSymbol
+  <|> pBad
 
 pStr :: Parser [Char] ParserState Inline
 pStr = do
@@ -482,6 +488,7 @@ isSpecial '"' = True
 isSpecial '\'' = True
 isSpecial '.' = True
 isSpecial '-' = True
+isSpecial '$' = True
 isSpecial '\8216' = True
 isSpecial '\8217' = True
 isSpecial '\8220' = True
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 75e29ebb9..509cb5d74 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -38,7 +38,8 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Walk
 import Text.Pandoc.Shared
 import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding ((<|>), many, optional, space)
+import Text.Pandoc.Parsing hiding ((<|>), many, optional, space,
+                                   mathDisplay, mathInline)
 import qualified Text.Pandoc.UTF8 as UTF8
 import Data.Char ( chr, ord )
 import Control.Monad
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 4cb75d86c..11168bc09 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1408,39 +1408,6 @@ math :: MarkdownParser (F Inlines)
 math =  (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
      <|> (return . B.math <$> (mathInline >>= applyMacros'))
 
-mathDisplay :: MarkdownParser String
-mathDisplay =
-      (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
-  <|> (guardEnabled Ext_tex_math_single_backslash >>
-       mathDisplayWith "\\[" "\\]")
-  <|> (guardEnabled Ext_tex_math_double_backslash >>
-       mathDisplayWith "\\\\[" "\\\\]")
-
-mathDisplayWith :: String -> String -> MarkdownParser String
-mathDisplayWith op cl = try $ do
-  string op
-  many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl)
-
-mathInline :: MarkdownParser String
-mathInline =
-      (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
-  <|> (guardEnabled Ext_tex_math_single_backslash >>
-       mathInlineWith "\\(" "\\)")
-  <|> (guardEnabled Ext_tex_math_double_backslash >>
-       mathInlineWith "\\\\(" "\\\\)")
-
-mathInlineWith :: String -> String -> MarkdownParser String
-mathInlineWith op cl = try $ do
-  string op
-  notFollowedBy space
-  words' <- many1Till (count 1 (noneOf "\n\\")
-                   <|> (char '\\' >> anyChar >>= \c -> return ['\\',c])
-                   <|> count 1 newline <* notFollowedBy' blankline
-                       *> return " ")
-              (try $ string cl)
-  notFollowedBy digit  -- to prevent capture of $5
-  return $ concat words'
-
 -- Parses material enclosed in *s, **s, _s, or __s.
 -- Designed to avoid backtracking.
 enclosure :: Char
-- 
cgit v1.2.3


From 4a1446705575ade1cf8ecaf19dec877bdd0ffb93 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 6 Dec 2013 17:31:47 -0800
Subject: Text.Pandoc:  Don't default to pandocExtensions for all writers.

In particular, we don't want to default to math parsing for the
HTML writer.
---
 src/Text/Pandoc.hs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 703bb876a..3ae81db00 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -267,7 +267,10 @@ getDefaultExtensions "markdown_strict" = strictExtensions
 getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions
 getDefaultExtensions "markdown_mmd" = multimarkdownExtensions
 getDefaultExtensions "markdown_github" = githubMarkdownExtensions
-getDefaultExtensions _        = pandocExtensions
+getDefaultExtensions "markdown"        = pandocExtensions
+getDefaultExtensions "plain"           = pandocExtensions
+getDefaultExtensions "textile"         = Set.fromList [Ext_auto_identifiers, Ext_raw_tex]
+getDefaultExtensions _                 = Set.fromList [Ext_auto_identifiers]
 
 -- | Retrieve reader based on formatSpec (format+extensions).
 getReader :: String -> Either String (ReaderOptions -> String -> IO Pandoc)
-- 
cgit v1.2.3


From e5a7c31a32b2b53ef5073355b70dc17ecf1d16af Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 7 Dec 2013 17:12:52 -0800
Subject: Markdown reader: Fixed bug with literal `</div>` in lists.

Closes #1078.
---
 src/Text/Pandoc/Readers/Markdown.hs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 11168bc09..c32c5ed86 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -732,8 +732,9 @@ listLine = try $ do
   notFollowedBy' (do indentSpaces
                      many (spaceChar)
                      listStart)
-  chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 (satisfy (/='<'))
-                    <|> (notFollowedBy' (htmlTag isBlockTag) >> count 1 anyChar)) newline
+  notFollowedBy' $ htmlTag (~== TagClose "div")
+  chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 anyChar)
+               newline
   return $ concat chunks
 
 -- parse raw text for one list item, excluding start marker and continuations
@@ -760,7 +761,7 @@ listContinuationLine :: MarkdownParser String
 listContinuationLine = try $ do
   notFollowedBy blankline
   notFollowedBy' listStart
-  notFollowedBy' $ try $ skipMany spaceChar >> htmlTag (~== TagClose "div")
+  notFollowedBy' $ htmlTag (~== TagClose "div")
   optional indentSpaces
   result <- anyLine
   return $ result ++ "\n"
-- 
cgit v1.2.3


From e2c4156c20f113b6f965ccd9313c5aa062cc198f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 7 Dec 2013 19:41:42 -0800
Subject: Small performance improvement in list parsing.

---
 src/Text/Pandoc/Readers/Markdown.hs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index c32c5ed86..a948d5ad3 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -733,8 +733,9 @@ listLine = try $ do
                      many (spaceChar)
                      listStart)
   notFollowedBy' $ htmlTag (~== TagClose "div")
-  chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 anyChar)
-               newline
+  chunks <- manyTill (liftM snd (htmlTag isCommentTag)
+             <|> many1 (satisfy (/='\n'))
+             <|> count 1 anyChar) newline
   return $ concat chunks
 
 -- parse raw text for one list item, excluding start marker and continuations
-- 
cgit v1.2.3


From 8e255fad98baf2448d8c1866d7c03c5d110a505f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 7 Dec 2013 19:56:54 -0800
Subject: Another small performance improvement.

---
 src/Text/Pandoc/Readers/Markdown.hs | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index a948d5ad3..e77dda8d7 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -733,9 +733,11 @@ listLine = try $ do
                      many (spaceChar)
                      listStart)
   notFollowedBy' $ htmlTag (~== TagClose "div")
-  chunks <- manyTill (liftM snd (htmlTag isCommentTag)
-             <|> many1 (satisfy (/='\n'))
-             <|> count 1 anyChar) newline
+  chunks <- manyTill
+              (  many1 (satisfy $ \c -> c /= '\n' && c /= '<')
+             <|> liftM snd (htmlTag isCommentTag)
+             <|> count 1 anyChar
+              ) newline
   return $ concat chunks
 
 -- parse raw text for one list item, excluding start marker and continuations
-- 
cgit v1.2.3


From f9662957704ebfe83d7764dd64151d37c476c0b0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Mon, 9 Dec 2013 20:31:47 -0800
Subject: Don't use tilde code blocks with braced attributes in gfm output.

A consequence of this change is that the backtick form will be
preferred in general if both are enabled.  I think that is good,
as it is much more widespread than the tilde form.

Closes #1084.
---
 src/Text/Pandoc/Writers/Markdown.hs | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 60d474263..c0b189b75 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -381,13 +381,11 @@ blockToMarkdown opts (CodeBlock (_,classes,_) str)
     isEnabled Ext_literate_haskell opts =
   return $ prefixed "> " (text str) <> blankline
 blockToMarkdown opts (CodeBlock attribs str) = return $
-  case attribs of
-     x | x /= nullAttr && isEnabled Ext_fenced_code_blocks opts ->
-          tildes <> " " <> attrs <> cr <> text str <>
-           cr <> tildes <> blankline
-     (_,(cls:_),_) | isEnabled Ext_backtick_code_blocks opts ->
-          backticks <> " " <> text cls <> cr <> text str <>
-           cr <> backticks <> blankline
+  case attribs == nullAttr of
+     False | isEnabled Ext_backtick_code_blocks opts ->
+          backticks <> attrs <> cr <> text str <> cr <> backticks <> blankline
+           | isEnabled Ext_fenced_code_blocks opts ->
+          tildes <> attrs <> cr <> text str <> cr <> tildes <> blankline
      _ -> nest (writerTabStop opts) (text str) <> blankline
    where tildes    = text $ case [ln | ln <- lines str, all (=='~') ln] of
                                [] -> "~~~~"
@@ -396,8 +394,10 @@ blockToMarkdown opts (CodeBlock attribs str) = return $
                                             | otherwise -> replicate (n+1) '~'
          backticks = text "```"
          attrs  = if isEnabled Ext_fenced_code_attributes opts
-                     then nowrap $ attrsToMarkdown attribs
-                     else empty
+                     then nowrap $ " " <> attrsToMarkdown attribs
+                     else case attribs of
+                                (_,[cls],_) -> " " <> text cls
+                                _           -> empty
 blockToMarkdown opts (BlockQuote blocks) = do
   st <- get
   -- if we're writing literate haskell, put a space before the bird tracks
-- 
cgit v1.2.3


From 142f81889b6ede73f965afa01ac67427e1335e9d Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Mon, 9 Dec 2013 22:35:22 -0800
Subject: Added `withSocketsDo` around http conduit code in `openURL`.

This should address #1080, but further testing on Windows is needed
before we can close the bug.
---
 src/Text/Pandoc/Shared.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 7592b7659..ce71881e6 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -121,6 +121,7 @@ import Data.ByteString.Lazy (toChunks)
 import Network.HTTP.Conduit (httpLbs, parseUrl, withManager,
                              responseBody, responseHeaders)
 import Network.HTTP.Types.Header ( hContentType)
+import Network (withSocketsDo)
 #else
 import Network.URI (parseURI)
 import Network.HTTP (findHeader, rspBody,
@@ -644,7 +645,7 @@ openURL u
         contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u
     in  return $ Right (decodeLenient contents, Just mime)
 #ifdef HTTP_CONDUIT
-  | otherwise = E.try $ do
+  | otherwise = withSocketsDo $ E.try $ do
      req <- parseUrl u
      resp <- withManager $ httpLbs req
      return (BS.concat $ toChunks $ responseBody resp,
-- 
cgit v1.2.3


From 6d0cd9203ce968b96ffa8fc2fbae5a50c99b125b Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 12 Dec 2013 22:34:13 -0800
Subject: Markdown reader:  Fixed regression in title blocks.

If author field was empty, date was being ignored.  Closes #1089.
---
 src/Text/Pandoc/Readers/Markdown.hs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index e77dda8d7..8014d480c 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -215,10 +215,10 @@ pandocTitleBlock = try $ do
                  author' <- author
                  date' <- date
                  return $
-                   ( if B.isNull title' then id else B.setMeta "title" title'
-                   . if null author' then id else B.setMeta "author" author'
-                   . if B.isNull date' then id else B.setMeta "date" date' )
-                   nullMeta
+                     (if B.isNull title' then id else B.setMeta "title" title')
+                   . (if null author' then id else B.setMeta "author" author')
+                   . (if B.isNull date' then id else B.setMeta "date" date')
+                   $ nullMeta
   updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
 
 yamlMetaBlock :: MarkdownParser (F Blocks)
-- 
cgit v1.2.3


From ca3c292f30c04bd24287d554f08a7911c808a4e2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Fri, 13 Dec 2013 11:10:04 -0800
Subject: EPUB writer:  Fixed bug with `--epub-stylesheet`.

Now the contents of `writerEpubStylesheet` (set by `--epub-stylesheet`)
should again work, and take precedence over a stylesheet specified
in the metadata.
---
 src/Text/Pandoc/Writers/EPUB.hs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 32b0c3c32..36ead0b8f 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -288,8 +288,7 @@ metadataFromMeta opts meta = EPUBMetadata{
         rights = metaValueToString <$> lookupMeta "rights" meta
         coverImage = lookup "epub-cover-image" (writerVariables opts) `mplus`
              (metaValueToString <$> lookupMeta "cover-image" meta)
-        stylesheet = (StylesheetContents <$>
-                       lookup "epub-stylesheet" (writerVariables opts)) `mplus`
+        stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus`
                      ((StylesheetPath . metaValueToString) <$>
                        lookupMeta "stylesheet" meta)
 
-- 
cgit v1.2.3


From 5adbe7b36592ba8779563bba791ad4724fde927c Mon Sep 17 00:00:00 2001
From: Jeff Arnold <jeffrey.arnold@gmail.com>
Date: Fri, 13 Dec 2013 19:16:04 -0500
Subject: LaTeX reader: add support for Verb macro

---
 src/Text/Pandoc/Readers/LaTeX.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 509cb5d74..736f2ff1c 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -455,6 +455,7 @@ inlineCommands = M.fromList $
   , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
   , ("verb", doverb)
   , ("lstinline", doverb)
+  , ("Verb", doverb)
   , ("texttt", (code . stringify . toList) <$> tok)
   , ("url", (unescapeURL <$> braced) >>= \url ->
        pure (link url "" (str url)))
-- 
cgit v1.2.3


From 2f00f5c7c22ef879df3a21f1c3da967d106628e9 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 15 Dec 2013 12:27:29 -0800
Subject: Properly handle script blocks in strict mode.

(That is, markdown-markdown_in_html_blocks.)
Previously a spurious `<p>` tag was being added.

Closes #1093.
---
 src/Text/Pandoc/Readers/Markdown.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 8014d480c..b2e88d47e 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -902,7 +902,9 @@ plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline
 --
 
 htmlElement :: MarkdownParser String
-htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag)
+htmlElement = rawVerbatimBlock
+          <|> strictHtmlBlock
+          <|> liftM snd (htmlTag isBlockTag)
 
 htmlBlock :: MarkdownParser (F Blocks)
 htmlBlock = do
-- 
cgit v1.2.3


From 826443926f85b72840b49cd7973e91ed80a09b5d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 16 Dec 2013 13:58:54 -0800
Subject: Docbook reader: Avoid failure if tbody contains no tr or row
 elements.

---
 src/Text/Pandoc/Readers/DocBook.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index fc29988d5..03c6140ac 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -806,7 +806,9 @@ parseBlock (Elem e) =
                                                      (x >= '0' && x <= '9')
                                                       || x == '.') w
                                                 Nothing -> 0 :: Double
-                      let numrows = maximum $ map length bodyrows
+                      let numrows = case bodyrows of
+                                         []   -> 0
+                                         xs   -> maximum $ map length xs
                       let aligns = case colspecs of
                                      []  -> replicate numrows AlignDefault
                                      cs  -> map toAlignment cs
-- 
cgit v1.2.3


From 0132f6fcb75e58774846e92bac6df2afd7a50e6f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 17 Dec 2013 16:03:43 -0800
Subject: LaTeX reader: Support babel-style quoting: `` "`..."' ``.

---
 src/Text/Pandoc/Readers/LaTeX.hs | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 736f2ff1c..0020a8f26 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -173,6 +173,8 @@ double_quote :: LP Inlines
 double_quote =
   (   quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
   <|> quoted' doubleQuoted (string "“")        (void $ char '”')
+  -- the following is used by babel for localized quotes:
+  <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
   <|> quoted' doubleQuoted (string "\"")       (void $ char '"')
   )
 
-- 
cgit v1.2.3


From a3f6f2827c9acb0e68ac533745b3f4dc8836ee24 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Tue, 17 Dec 2013 20:10:09 -0800
Subject: LaTeX writer: Factored out function for table cell creation.

---
 src/Text/Pandoc/Writers/LaTeX.hs | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index a2e0b016f..b7beacd91 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -490,24 +490,28 @@ tableRowToLaTeX :: Bool
                 -> [[Block]]
                 -> State WriterState Doc
 tableRowToLaTeX header aligns widths cols = do
-  renderedCells <- mapM blockListToLaTeX cols
-  let valign = text $ if header then "[b]" else "[t]"
-  let halign x = case x of
-                  AlignLeft    -> "\\raggedright"
-                  AlignRight   -> "\\raggedleft"
-                  AlignCenter  -> "\\centering"
-                  AlignDefault -> "\\raggedright"
   -- scale factor compensates for extra space between columns
   -- so the whole table isn't larger than columnwidth
   let scaleFactor = 0.97 ** fromIntegral (length aligns)
-  let toCell 0 _ c = c
-      toCell w a c = "\\begin{minipage}" <> valign <>
-                     braces (text (printf "%.2f\\columnwidth"
-                                    (w * scaleFactor))) <>
-                     (halign a <> cr <> c <> cr) <> "\\end{minipage}"
-  let cells = zipWith3 toCell widths aligns renderedCells
+  let widths' = map (scaleFactor *) widths
+  cells <- mapM (tableCellToLaTeX header) $ zip3 widths' aligns cols
   return $ hsep (intersperse "&" cells) $$ "\\\\\\addlinespace"
 
+tableCellToLaTeX :: Bool -> (Double, Alignment, [Block])
+                 -> State WriterState Doc
+tableCellToLaTeX _      (0,     _,     blocks) = blockListToLaTeX blocks
+tableCellToLaTeX header (width, align, blocks) = do
+  cellContents <- blockListToLaTeX blocks
+  let valign = text $ if header then "[b]" else "[t]"
+  let halign = case align of
+               AlignLeft    -> "\\raggedright"
+               AlignRight   -> "\\raggedleft"
+               AlignCenter  -> "\\centering"
+               AlignDefault -> "\\raggedright"
+  return $ "\\begin{minipage}" <> valign <>
+           braces (text (printf "%.2f\\columnwidth" width)) <>
+          (halign <> cr <> cellContents <> cr) <> "\\end{minipage}"
+
 listItemToLaTeX :: [Block] -> State WriterState Doc
 listItemToLaTeX lst = blockListToLaTeX lst >>= return .  (text "\\item" $$) .
                       (nest 2)
-- 
cgit v1.2.3


From 8053ba2123699fd89900b2a9c5656a72d3a2fe85 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Tue, 17 Dec 2013 20:53:59 -0800
Subject: LaTeX writer:  Better treatment of footnotes in tables.

Notes now appear in the regular sequence, rather than in the
table cell.  (This was a regression in 1.10.)
---
 src/Text/Pandoc/Writers/LaTeX.hs | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index b7beacd91..7612be100 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -50,6 +50,8 @@ import Text.Pandoc.Highlighting (highlight, styleToLaTeX,
 
 data WriterState =
   WriterState { stInNote        :: Bool          -- true if we're in a note
+              , stInMinipage    :: Bool          -- true if in minipage
+              , stNotes         :: [Doc]         -- notes in a minipage
               , stOLLevel       :: Int           -- level of ordered list nesting
               , stOptions       :: WriterOptions -- writer options, so they don't have to be parameter
               , stVerbInNote    :: Bool          -- true if document has verbatim text in note
@@ -70,7 +72,7 @@ data WriterState =
 writeLaTeX :: WriterOptions -> Pandoc -> String
 writeLaTeX options document =
   evalState (pandocToLaTeX options document) $
-  WriterState { stInNote = False,
+  WriterState { stInNote = False, stInMinipage = False, stNotes = [],
                 stOLLevel = 1, stOptions = options,
                 stVerbInNote = False,
                 stTable = False, stStrikeout = False,
@@ -501,16 +503,31 @@ tableCellToLaTeX :: Bool -> (Double, Alignment, [Block])
                  -> State WriterState Doc
 tableCellToLaTeX _      (0,     _,     blocks) = blockListToLaTeX blocks
 tableCellToLaTeX header (width, align, blocks) = do
+  modify $ \st -> st{ stInMinipage = True, stNotes = [] }
   cellContents <- blockListToLaTeX blocks
+  notes <- gets stNotes
+  modify $ \st -> st{ stInMinipage = False, stNotes = [] }
   let valign = text $ if header then "[b]" else "[t]"
   let halign = case align of
                AlignLeft    -> "\\raggedright"
                AlignRight   -> "\\raggedleft"
                AlignCenter  -> "\\centering"
                AlignDefault -> "\\raggedright"
-  return $ "\\begin{minipage}" <> valign <>
-           braces (text (printf "%.2f\\columnwidth" width)) <>
-          (halign <> cr <> cellContents <> cr) <> "\\end{minipage}"
+  return $ ("\\begin{minipage}" <> valign <>
+            braces (text (printf "%.2f\\columnwidth" width)) <>
+            (halign <> cr <> cellContents <> cr) <> "\\end{minipage}")
+          $$ case notes of
+                  [] -> empty
+                  ns -> (case length ns of
+                              n | n > 1 -> "\\addtocounter" <>
+                                           braces "footnote" <>
+                                           braces (text $ show $ 1 - n)
+                                | otherwise -> empty)
+                        $$
+                        vcat (intersperse
+                          ("\\addtocounter" <> braces "footnote" <> braces "1")
+                          $ map (\x -> "\\footnotetext" <> braces x)
+                          $ reverse ns)
 
 listItemToLaTeX :: [Block] -> State WriterState Doc
 listItemToLaTeX lst = blockListToLaTeX lst >>= return .  (text "\\item" $$) .
@@ -713,14 +730,20 @@ inlineToLaTeX (Image _ (source, _)) = do
   source'' <- stringToLaTeX URLString source'
   return $ "\\includegraphics" <> braces (text source'')
 inlineToLaTeX (Note contents) = do
+  inMinipage <- gets stInMinipage
   modify (\s -> s{stInNote = True})
   contents' <- blockListToLaTeX contents
   modify (\s -> s {stInNote = False})
   let optnl = case reverse contents of
                    (CodeBlock _ _ : _) -> cr
                    _                   -> empty
-  return $ "\\footnote" <> braces (nest 2 contents' <> optnl)
-     -- note: a \n before } needed when note ends with a Verbatim environment
+  let noteContents = nest 2 contents' <> optnl
+  modify $ \st -> st{ stNotes = noteContents : stNotes st }
+  return $
+    if inMinipage
+       then "\\footnotemark{}"
+       -- note: a \n before } needed when note ends with a Verbatim environment
+       else "\\footnote" <> braces noteContents
 
 citationsToNatbib :: [Citation] -> State WriterState Doc
 citationsToNatbib (one:[])
-- 
cgit v1.2.3


From 1ed2c467c9442934c060257f6e191a5a3d6c1e38 Mon Sep 17 00:00:00 2001
From: Henry de Valence <hdevalence@hdevalence.ca>
Date: Thu, 19 Dec 2013 17:06:27 -0500
Subject: HLint: Use all

Replace `and . map` with `all`.
---
 src/Text/Pandoc/Shared.hs        | 2 +-
 src/Text/Pandoc/Writers/LaTeX.hs | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index ce71881e6..cd2f7e24d 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -532,7 +532,7 @@ headerShift n = walk shift
 
 -- | Detect if a list is tight.
 isTightList :: [[Block]] -> Bool
-isTightList = and . map firstIsPlain
+isTightList = all firstIsPlain
   where firstIsPlain (Plain _ : _) = True
         firstIsPlain _             = False
 
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 7612be100..47b769c48 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -443,7 +443,7 @@ blockToLaTeX (DefinitionList lst) = do
   incremental <- gets stIncremental
   let inc = if incremental then "[<+->]" else ""
   items <- mapM defListItemToLaTeX lst
-  let spacing = if and $ map isTightList (map snd lst)
+  let spacing = if all isTightList (map snd lst)
                    then text "\\itemsep1pt\\parskip0pt\\parsep0pt"
                    else empty
   return $ text ("\\begin{description}" ++ inc) $$ spacing $$ vcat items $$
@@ -764,9 +764,9 @@ citationsToNatbib cits
   | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits
   = citeCommand "citep" p s ks
   where
-     noPrefix  = and . map (null . citationPrefix)
-     noSuffix  = and . map (null . citationSuffix)
-     ismode m  = and . map (((==) m)  . citationMode)
+     noPrefix  = all (null . citationPrefix)
+     noSuffix  = all (null . citationSuffix)
+     ismode m  = all (((==) m)  . citationMode)
      p         = citationPrefix  $ head $ cits
      s         = citationSuffix  $ last $ cits
      ks        = intercalate ", " $ map citationId cits
-- 
cgit v1.2.3


From 0c5e7cf8cb4fe6959d7e89880e8925afe6625414 Mon Sep 17 00:00:00 2001
From: Henry de Valence <hdevalence@hdevalence.ca>
Date: Thu, 19 Dec 2013 20:19:24 -0500
Subject: HLint: use `elem` and `notElem`

Replaces long conditional chains with calls to `elem` and `notElem`.
---
 src/Text/Pandoc/Parsing.hs          |  4 ++--
 src/Text/Pandoc/Readers/LaTeX.hs    |  5 ++---
 src/Text/Pandoc/Readers/Markdown.hs |  8 ++++----
 src/Text/Pandoc/Shared.hs           | 12 ++++--------
 src/Text/Pandoc/Writers/Markdown.hs | 11 ++++++-----
 src/Text/Pandoc/Writers/Org.hs      |  2 +-
 6 files changed, 19 insertions(+), 23 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index e15854333..2f21e1253 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -271,7 +271,7 @@ spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
 
 -- | Parses a nonspace, nonnewline character.
 nonspaceChar :: Parser [Char] st Char
-nonspaceChar = satisfy $ \x -> x /= '\t' && x /= '\n' && x /= ' ' && x /= '\r'
+nonspaceChar = satisfy $ flip notElem ['\t', '\n', ' ', '\r']
 
 -- | Skips zero or more spaces or tabs.
 skipSpaces :: Parser [Char] st ()
@@ -1062,7 +1062,7 @@ doubleQuoteStart :: Parser [Char] ParserState ()
 doubleQuoteStart = do
   failIfInQuoteContext InDoubleQuote
   try $ do charOrRef "\"\8220\147"
-           notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n'))
+           notFollowedBy . satisfy $ flip elem [' ', '\t', '\n']
 
 doubleQuoteEnd :: Parser [Char] st ()
 doubleQuoteEnd = do
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 0020a8f26..51271edc5 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -874,9 +874,8 @@ verbatimEnv = do
   (_,r) <- withRaw $ do
              controlSeq "begin"
              name <- braced
-             guard $ name == "verbatim" || name == "Verbatim" ||
-                     name == "lstlisting" || name == "minted" ||
-                     name == "alltt"
+             guard $ name `elem` ["verbatim", "Verbatim", "lstlisting",
+                                  "minted", "alltt"]
              verbEnv name
   rest <- getInput
   return (r,rest)
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index b2e88d47e..f483ab059 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -789,8 +789,8 @@ listItem start = try $ do
 orderedList :: MarkdownParser (F Blocks)
 orderedList = try $ do
   (start, style, delim) <- lookAhead anyOrderedListStart
-  unless ((style == DefaultStyle || style == Decimal || style == Example) &&
-          (delim == DefaultDelim || delim == Period)) $
+  unless (style `elem` [DefaultStyle, Decimal, Example] &&
+          delim `elem` [DefaultDelim, Period]) $
     guardEnabled Ext_fancy_lists
   when (style == Example) $ guardEnabled Ext_example_lists
   items <- fmap sequence $ many1 $ listItem
@@ -925,8 +925,8 @@ strictHtmlBlock = htmlInBalanced (not . isInlineTag)
 
 rawVerbatimBlock :: MarkdownParser String
 rawVerbatimBlock = try $ do
-  (TagOpen tag _, open) <- htmlTag (tagOpen (\t ->
-                              t == "pre" || t == "style" || t == "script")
+  (TagOpen tag _, open) <- htmlTag (tagOpen (flip elem
+                                                  ["pre", "style", "script"])
                               (const True))
   contents <- manyTill anyChar (htmlTag (~== TagClose tag))
   return $ open ++ contents ++ renderTags [TagClose tag]
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index cd2f7e24d..3446f4343 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -564,14 +564,10 @@ makeMeta title authors date =
 -- | Render HTML tags.
 renderTags' :: [Tag String] -> String
 renderTags' = renderTagsOptions
-               renderOptions{ optMinimize = \x ->
-                                    let y = map toLower x
-                                    in  y == "hr" || y == "br" ||
-                                        y == "img" || y == "meta" ||
-                                        y == "link"
-                            , optRawTag = \x ->
-                                    let y = map toLower x
-                                    in  y == "script" || y == "style" }
+               renderOptions{ optMinimize = matchTags ["hr", "br", "img",
+                                                       "meta", "link"]
+                            , optRawTag   = matchTags ["script", "style"] }
+              where matchTags = \tags -> flip elem tags . map toLower
 
 --
 -- File handling
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index c0b189b75..278e5cc9d 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -338,7 +338,7 @@ blockToMarkdown opts (RawBlock f str)
        else return $ if isEnabled Ext_markdown_attribute opts
                         then text (addMarkdownAttribute str) <> text "\n"
                         else text str <> text "\n"
-  | f == "latex" || f == "tex" || f == "markdown" = do
+  | f `elem` ["latex", "tex", "markdown"] = do
     st <- get
     if stPlain st
        then return empty
@@ -628,10 +628,11 @@ getReference label (src, tit) = do
     Nothing       -> do
       let label' = case find ((== label) . fst) (stRefs st) of
                       Just _ -> -- label is used; generate numerical label
-                                 case find (\n -> not (any (== [Str (show n)])
-                                           (map fst (stRefs st)))) [1..(10000 :: Integer)] of
-                                      Just x  -> [Str (show x)]
-                                      Nothing -> error "no unique label"
+                             case find (\n -> notElem [Str (show n)]
+                                                      (map fst (stRefs st)))
+                                       [1..(10000 :: Integer)] of
+                                  Just x  -> [Str (show x)]
+                                  Nothing -> error "no unique label"
                       Nothing -> label
       modify (\s -> s{ stRefs = (label', (src,tit)) : stRefs st })
       return label'
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 51083f52b..d318c5f6a 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -129,7 +129,7 @@ blockToOrg (Para inlines) = do
 blockToOrg (RawBlock "html" str) =
   return $ blankline $$ "#+BEGIN_HTML" $$
            nest 2 (text str) $$ "#+END_HTML" $$ blankline
-blockToOrg (RawBlock f str) | f == "org" || f == "latex" || f == "tex" =
+blockToOrg (RawBlock f str) | f `elem` ["org", "latex", "tex"] =
   return $ text str
 blockToOrg (RawBlock _ _) = return empty
 blockToOrg HorizontalRule = return $ blankline $$ "--------------" $$ blankline
-- 
cgit v1.2.3


From c35f5ba42df094cef5f69a191315385a0a1e12b0 Mon Sep 17 00:00:00 2001
From: Henry de Valence <hdevalence@hdevalence.ca>
Date: Thu, 19 Dec 2013 20:28:53 -0500
Subject: HLint: Remove lambdas.

---
 src/Text/Pandoc/Writers/Shared.hs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index 9cb08803c..33091ea94 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -65,8 +65,7 @@ metaToJSON opts blockWriter inlineWriter (Meta metamap)
     renderedMap <- Traversable.mapM
                    (metaValueToJSON blockWriter inlineWriter)
                    metamap
-    return $ M.foldWithKey (\key val obj -> defField key val obj)
-                   baseContext renderedMap
+    return $ M.foldWithKey defField baseContext renderedMap
   | otherwise = return (Object H.empty)
 
 metaValueToJSON :: Monad m
-- 
cgit v1.2.3


From f6d151889c8fff303be8ee8a4f9be67a04de9210 Mon Sep 17 00:00:00 2001
From: Henry de Valence <hdevalence@hdevalence.ca>
Date: Thu, 19 Dec 2013 20:43:25 -0500
Subject: HLint: redundant parens

Remove parens enclosing a single element.
---
 src/Text/Pandoc/Readers/Markdown.hs  | 2 +-
 src/Text/Pandoc/Readers/Textile.hs   | 2 +-
 src/Text/Pandoc/Writers/HTML.hs      | 2 +-
 src/Text/Pandoc/Writers/MediaWiki.hs | 2 +-
 src/Text/Pandoc/Writers/Textile.hs   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index f483ab059..166c524ef 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -730,7 +730,7 @@ listStart = bulletListStart <|> (anyOrderedListStart >> return ())
 listLine :: MarkdownParser String
 listLine = try $ do
   notFollowedBy' (do indentSpaces
-                     many (spaceChar)
+                     many spaceChar
                      listStart)
   notFollowedBy' $ htmlTag (~== TagClose "div")
   chunks <- manyTill
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 23e07f621..93658cdea 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -594,7 +594,7 @@ surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try bo
 simpleInline :: Parser [Char] ParserState t           -- ^ surrounding parser
                 -> ([Inline] -> Inline)       -- ^ Inline constructor
                 -> Parser [Char] ParserState Inline   -- ^ content parser (to be used repeatedly)
-simpleInline border construct = surrounded border (inlineWithAttribute) >>=
+simpleInline border construct = surrounded border inlineWithAttribute >>=
                                 return . construct . normalizeSpaces
   where inlineWithAttribute = (try $ optional attributes) >> inline
 
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 641652276..2c6435457 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -524,7 +524,7 @@ blockToHtml opts (DefinitionList lst) = do
   contents <- mapM (\(term, defs) ->
                   do term' <- if null term
                                  then return mempty
-                                 else liftM (H.dt) $ inlineListToHtml opts term
+                                 else liftM H.dt $ inlineListToHtml opts term
                      defs' <- mapM ((liftM (\x -> H.dd $ (x >> nl opts))) .
                                     blockListToHtml opts) defs
                      return $ mconcat $ nl opts : term' : nl opts :
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index 61741a61e..83fefaa29 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -51,7 +51,7 @@ data WriterState = WriterState {
 writeMediaWiki :: WriterOptions -> Pandoc -> String
 writeMediaWiki opts document =
   evalState (pandocToMediaWiki opts document)
-            (WriterState { stNotes = False, stListLevel = [], stUseTags = False })
+            WriterState { stNotes = False, stListLevel = [], stUseTags = False }
 
 -- | Return MediaWiki representation of document.
 pandocToMediaWiki :: WriterOptions -> Pandoc -> State WriterState String
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
index 7c102cc86..95aedf780 100644
--- a/src/Text/Pandoc/Writers/Textile.hs
+++ b/src/Text/Pandoc/Writers/Textile.hs
@@ -51,7 +51,7 @@ data WriterState = WriterState {
 writeTextile :: WriterOptions -> Pandoc -> String
 writeTextile opts document =
   evalState (pandocToTextile opts document)
-            (WriterState { stNotes = [], stListLevel = [], stUseTags = False })
+            WriterState { stNotes = [], stListLevel = [], stUseTags = False }
 
 -- | Return Textile representation of document.
 pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String
-- 
cgit v1.2.3


From c8fc0a03748ceb4dbea502297ece89eac9d73aff Mon Sep 17 00:00:00 2001
From: Henry de Valence <hdevalence@hdevalence.ca>
Date: Thu, 19 Dec 2013 20:46:11 -0500
Subject: HLint: use /=

---
 src/Text/Pandoc/Writers/LaTeX.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 47b769c48..1deacecb4 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -190,7 +190,7 @@ stringToLaTeX  _     []     = return ""
 stringToLaTeX  ctx (x:xs) = do
   opts <- gets stOptions
   rest <- stringToLaTeX ctx xs
-  let ligatures = writerTeXLigatures opts && not (ctx == CodeString)
+  let ligatures = writerTeXLigatures opts && (ctx /= CodeString)
   let isUrl = ctx == URLString
   when (x == '€') $
      modify $ \st -> st{ stUsesEuro = True }
-- 
cgit v1.2.3


From 3d70059a4896c66cab59832a3ed9eb8334b84a2f Mon Sep 17 00:00:00 2001
From: Henry de Valence <hdevalence@hdevalence.ca>
Date: Thu, 19 Dec 2013 21:07:09 -0500
Subject: HLint: use fromMaybe

Replace uses of `maybe x id` with `fromMaybe x`.
---
 src/Text/Pandoc/PDF.hs               |  3 ++-
 src/Text/Pandoc/Readers/DocBook.hs   |  7 ++++---
 src/Text/Pandoc/Readers/HTML.hs      |  4 ++--
 src/Text/Pandoc/Readers/Markdown.hs  |  4 ++--
 src/Text/Pandoc/Readers/MediaWiki.hs |  7 ++++---
 src/Text/Pandoc/Writers/Docx.hs      |  6 ++++--
 src/Text/Pandoc/Writers/EPUB.hs      | 12 ++++++------
 src/Text/Pandoc/Writers/HTML.hs      |  4 ++--
 src/Text/Pandoc/Writers/LaTeX.hs     |  3 ++-
 src/Text/Pandoc/Writers/ODT.hs       |  3 ++-
 10 files changed, 30 insertions(+), 23 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index e8683b98f..360338f8f 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -41,6 +41,7 @@ import System.Directory
 import System.Environment
 import Control.Monad (unless)
 import Data.List (isInfixOf)
+import Data.Maybe (fromMaybe)
 import qualified Data.ByteString.Base64 as B64
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.Definition
@@ -87,7 +88,7 @@ handleImage' baseURL tmpdir (Image ils (src,tit)) = do
          res <- fetchItem baseURL src
          case res of
               Right (contents, Just mime) -> do
-                let ext = maybe (takeExtension src) id $
+                let ext = fromMaybe (takeExtension src) $
                           extensionFromMimeType mime
                 let basename = UTF8.toString $ B64.encode $ UTF8.fromString src
                 let fname = tmpdir </> basename <.> ext
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 03c6140ac..56cb16b20 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -12,6 +12,7 @@ import Data.Char (isSpace)
 import Control.Monad.State
 import Control.Applicative ((<$>))
 import Data.List (intersperse)
+import Data.Maybe (fromMaybe)
 
 {-
 
@@ -683,7 +684,7 @@ parseBlock (Elem e) =
                                "lowerroman" -> LowerRoman
                                "upperroman" -> UpperRoman
                                _            -> Decimal
-          let start = maybe 1 id $
+          let start = fromMaybe 1 $
                       (attrValue "override" <$> filterElement (named "listitem") e)
                        >>= safeRead
           orderedListWith (start,listStyle,DefaultDelim)
@@ -779,7 +780,7 @@ parseBlock (Elem e) =
                       caption <- case filterChild isCaption e of
                                        Just t  -> getInlines t
                                        Nothing -> return mempty
-                      let e' = maybe e id $ filterChild (named "tgroup") e
+                      let e' = fromMaybe e $ filterChild (named "tgroup") e
                       let isColspec x = named "colspec" x || named "col" x
                       let colspecs = case filterChild (named "colgroup") e' of
                                            Just c -> filterChildren isColspec c
@@ -801,7 +802,7 @@ parseBlock (Elem e) =
                                                 Just "center" -> AlignCenter
                                                 _             -> AlignDefault
                       let toWidth c = case findAttr (unqual "colwidth") c of
-                                                Just w -> maybe 0 id
+                                                Just w -> fromMaybe 0
                                                    $ safeRead $ '0': filter (\x ->
                                                      (x >= '0' && x <= '9')
                                                       || x == '.') w
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index e758f712f..4b44a3a21 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -207,7 +207,7 @@ pHeader = try $ do
   let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")]
   let level = read (drop 1 tagtype)
   contents <- liftM concat $ manyTill inline (pCloses tagtype <|> eof)
-  let ident = maybe "" id $ lookup "id" attr
+  let ident = fromMaybe "" $ lookup "id" attr
   let classes = maybe [] words $ lookup "class" attr
   let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
   return $ if bodyTitle
@@ -257,7 +257,7 @@ pCol = try $ do
   skipMany pBlank
   return $ case lookup "width" attribs of
            Just x | not (null x) && last x == '%' ->
-             maybe 0.0 id $ safeRead ('0':'.':init x)
+             fromMaybe 0.0 $ safeRead ('0':'.':init x)
            _ -> 0.0
 
 pColgroup :: TagParser [Double]
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 166c524ef..3a5d29b4e 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1722,7 +1722,7 @@ spanHtml = try $ do
   guardEnabled Ext_markdown_in_html_blocks
   (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
   contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span"))
-  let ident = maybe "" id $ lookup "id" attrs
+  let ident = fromMaybe "" $ lookup "id" attrs
   let classes = maybe [] words $ lookup "class" attrs
   let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
   return $ B.spanWith (ident, classes, keyvals) <$> contents
@@ -1732,7 +1732,7 @@ divHtml = try $ do
   guardEnabled Ext_markdown_in_html_blocks
   (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "div" [])
   contents <- mconcat <$> manyTill block (htmlTag (~== TagClose "div"))
-  let ident = maybe "" id $ lookup "id" attrs
+  let ident = fromMaybe "" $ lookup "id" attrs
   let classes = maybe [] words $ lookup "class" attrs
   let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
   return $ B.divWith (ident, classes, keyvals) <$> contents
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 1c074e3de..8d8ea0199 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -54,6 +54,7 @@ import Data.Sequence (viewl, ViewL(..), (<|))
 import qualified Data.Foldable as F
 import qualified Data.Map as M
 import Data.Char (isDigit, isSpace)
+import Data.Maybe (fromMaybe)
 
 -- | Read mediawiki from an input string and return a Pandoc document.
 readMediaWiki :: ReaderOptions -- ^ Reader options
@@ -204,7 +205,7 @@ table = do
   tableStart
   styles <- option [] parseAttrs <* blankline
   let tableWidth = case lookup "width" styles of
-                         Just w  -> maybe 1.0 id $ parseWidth w
+                         Just w  -> fromMaybe 1.0 $ parseWidth w
                          Nothing -> 1.0
   caption <- option mempty tableCaption
   optional rowsep
@@ -285,7 +286,7 @@ tableCell = try $ do
                     Just "center" -> AlignCenter
                     _             -> AlignDefault
   let width = case lookup "width" attrs of
-                    Just xs -> maybe 0.0 id $ parseWidth xs
+                    Just xs -> fromMaybe 0.0 $ parseWidth xs
                     Nothing -> 0.0
   return ((align, width), bs)
 
@@ -387,7 +388,7 @@ orderedList =
           spaces
           items <- many (listItem '#' <|> li)
           optional (htmlTag (~== TagClose "ol"))
-          let start = maybe 1 id $ safeRead $ fromAttrib "start" tag
+          let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag
           return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items
 
 definitionList :: MWParser Blocks
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 5c7341b69..32ba7715a 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -29,6 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 Conversion of 'Pandoc' documents to docx.
 -}
 module Text.Pandoc.Writers.Docx ( writeDocx ) where
+import Data.Maybe (fromMaybe)
 import Data.List ( intercalate, groupBy )
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Lazy as BL
@@ -130,7 +131,8 @@ writeDocx opts doc@(Pandoc meta _) = do
   let mkOverrideNode (part', contentType') = mknode "Override"
                [("PartName",part'),("ContentType",contentType')] ()
   let mkImageOverride (_, imgpath, mbMimeType, _, _) =
-             mkOverrideNode ("/word/" ++ imgpath, maybe "application/octet-stream" id mbMimeType)
+             mkOverrideNode ("/word/" ++ imgpath,
+                             fromMaybe "application/octet-stream" mbMimeType)
   let overrides = map mkOverrideNode
                   [("/word/webSettings.xml",
                     "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml")
@@ -322,7 +324,7 @@ mkNum markers marker numid =
        NumberMarker _ _ start ->
           map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))]
               $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6]
-   where absnumid = maybe 0 id $ M.lookup marker markers
+   where absnumid = fromMaybe 0 $ M.lookup marker markers
 
 mkAbstractNum :: (ListMarker,Int) -> IO Element
 mkAbstractNum (marker,numid) = do
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 36ead0b8f..4daa9609e 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -176,8 +176,8 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md
                  , titleFileAs = getAttr "file-as"
                  , titleType = getAttr "type"
                  } : epubTitle md }
-  | name == "date" = md{ epubDate = maybe "" id $ normalizeDate'
-                                                $ strContent e }
+  | name == "date" = md{ epubDate = fromMaybe "" $ normalizeDate'
+                                                 $ strContent e }
   | name == "language" = md{ epubLanguage = strContent e }
   | name == "creator" = md{ epubCreator =
               Creator{ creatorText = strContent e
@@ -271,7 +271,7 @@ metadataFromMeta opts meta = EPUBMetadata{
     }
   where identifiers = getIdentifier meta
         titles = getTitle meta
-        date = maybe "" id $
+        date = fromMaybe "" $
               (metaValueToString <$> lookupMeta "date" meta) >>= normalizeDate'
         language = maybe "" metaValueToString $
            lookupMeta "language" meta `mplus` lookupMeta "lang" meta
@@ -297,7 +297,7 @@ writeEPUB :: WriterOptions  -- ^ Writer options
           -> Pandoc         -- ^ Document to convert
           -> IO B.ByteString
 writeEPUB opts doc@(Pandoc meta _) = do
-  let version = maybe EPUB2 id (writerEpubVersion opts)
+  let version = fromMaybe EPUB2 (writerEpubVersion opts)
   let epub3 = version == EPUB3
   epochtime <- floor `fmap` getPOSIXTime
   let mkEntry path content = toEntry path epochtime content
@@ -401,7 +401,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
       chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num)
         $ renderHtml
         $ writeHtml opts'{ writerNumberOffset =
-            maybe [] id mbnum }
+            fromMaybe [] mbnum }
         $ case bs of
               (Header _ _ xs : _) ->
                  Pandoc (setMeta "title" (fromList xs) nullMeta) bs
@@ -436,7 +436,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
   let fontNode ent = unode "item" !
                            [("id", takeBaseName $ eRelativePath ent),
                             ("href", eRelativePath ent),
-                            ("media-type", maybe "" id $ getMimeType $ eRelativePath ent)] $ ()
+                            ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ ()
   let plainTitle = case docTitle meta of
                         [] -> case epubTitle metadata of
                                    []   -> "UNTITLED"
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 2c6435457..129776363 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -45,7 +45,7 @@ import Numeric ( showHex )
 import Data.Char ( ord, toLower )
 import Data.List ( isPrefixOf, intersperse )
 import Data.String ( fromString )
-import Data.Maybe ( catMaybes )
+import Data.Maybe ( catMaybes, fromMaybe )
 import Control.Monad.State
 import Text.Blaze.Html hiding(contents)
 import Text.Blaze.Internal(preEscapedString)
@@ -118,7 +118,7 @@ pandocToHtml opts (Pandoc meta blocks) = do
   let stringifyHTML = escapeStringForXML . stringify
   let authsMeta = map stringifyHTML $ docAuthors meta
   let dateMeta  = stringifyHTML $ docDate meta
-  let slideLevel = maybe (getSlideLevel blocks) id $ writerSlideLevel opts
+  let slideLevel = fromMaybe (getSlideLevel blocks) $ writerSlideLevel opts
   let sects = hierarchicalize $
               if writerSlideVariant opts == NoSlides
                  then blocks
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 1deacecb4..a76d6d82b 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -40,6 +40,7 @@ import Network.URI ( isURI, unEscapeString )
 import Data.List ( (\\), isSuffixOf, isInfixOf,
                    isPrefixOf, intercalate, intersperse )
 import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord )
+import Data.Maybe ( fromMaybe )
 import Control.Applicative ((<|>))
 import Control.Monad.State
 import Text.Pandoc.Pretty
@@ -240,7 +241,7 @@ inCmd cmd contents = char '\\' <> text cmd <> braces contents
 toSlides :: [Block] -> State WriterState [Block]
 toSlides bs = do
   opts <- gets stOptions
-  let slideLevel = maybe (getSlideLevel bs) id $ writerSlideLevel opts
+  let slideLevel = fromMaybe (getSlideLevel bs) $ writerSlideLevel opts
   let bs' = prepSlides slideLevel bs
   concat `fmap` (mapM (elementToBeamer slideLevel) $ hierarchicalize bs')
 
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index cc0a06243..25cd5ae13 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -31,6 +31,7 @@ Conversion of 'Pandoc' documents to ODT.
 module Text.Pandoc.Writers.ODT ( writeODT ) where
 import Data.IORef
 import Data.List ( isPrefixOf )
+import Data.Maybe ( fromMaybe )
 import qualified Data.ByteString.Lazy as B
 import Text.Pandoc.UTF8 ( fromStringLazy )
 import Codec.Archive.Zip
@@ -127,7 +128,7 @@ transformPic opts entriesRef (Image lab (src,_)) = do
        return $ Emph lab
      Right (img, _) -> do
        let size = imageSize img
-       let (w,h) = maybe (0,0) id $ sizeInPoints `fmap` size
+       let (w,h) = fromMaybe (0,0) $ sizeInPoints `fmap` size
        let tit' = show w ++ "x" ++ show h
        entries <- readIORef entriesRef
        let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src
-- 
cgit v1.2.3


From d6ec6cf9cf5731977fa0f476cabef4786edd5665 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 1 Jan 2014 09:01:05 -0800
Subject: Docx writer:  Fixed problem with some modified reference docx files.

Include `word/_rels/settings.xml.rels` if it exists, as well as other
`rels` files besides the ones pandoc generates explicitly.
---
 src/Text/Pandoc/Writers/Docx.hs | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 32ba7715a..67d202010 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -30,7 +30,7 @@ Conversion of 'Pandoc' documents to docx.
 -}
 module Text.Pandoc.Writers.Docx ( writeDocx ) where
 import Data.Maybe (fromMaybe)
-import Data.List ( intercalate, groupBy )
+import Data.List ( intercalate, groupBy, isPrefixOf )
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Lazy as BL
 import qualified Data.ByteString.Lazy.Char8 as BL8
@@ -262,6 +262,11 @@ writeDocx opts doc@(Pandoc meta _) = do
   fontTableEntry <- entryFromArchive "word/fontTable.xml"
   settingsEntry <- entryFromArchive "word/settings.xml"
   webSettingsEntry <- entryFromArchive "word/webSettings.xml"
+  let miscRels = [ f | f <- filesInArchive refArchive
+                     , "word/_rels/" `isPrefixOf` f
+                     , f /= "word/_rels/document.xml.rels"
+                     , f /= "word/_rels/footnotes.xml.rels" ]
+  miscRelEntries <- mapM entryFromArchive miscRels
 
   -- Create archive
   let archive = foldr addEntryToArchive emptyArchive $
@@ -269,7 +274,7 @@ writeDocx opts doc@(Pandoc meta _) = do
                   footnoteRelEntry : numEntry : styleEntry : footnotesEntry :
                   docPropsEntry : docPropsAppEntry : themeEntry :
                   fontTableEntry : settingsEntry : webSettingsEntry :
-                  imageEntries
+                  imageEntries ++ miscRelEntries
   return $ fromArchive archive
 
 styleToOpenXml :: Style -> [Element]
-- 
cgit v1.2.3


From f3ee82373b4ad8e955db12d3c2c2159a2bea53a0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 1 Jan 2014 09:22:37 -0800
Subject: HTML reader: Parse name/content pairs from meta tags as metadata.

Closes #1106.
---
 src/Text/Pandoc/Readers/HTML.hs | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 4b44a3a21..506fe7770 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -76,9 +76,18 @@ pBody :: TagParser [Block]
 pBody = pInTags "body" block
 
 pHead :: TagParser [Block]
-pHead = pInTags "head" $ pTitle <|> ([] <$ pAnyTag)
+pHead = pInTags "head" $ pTitle <|> pMetaTag <|> ([] <$ pAnyTag)
   where pTitle = pInTags "title" inline >>= setTitle . normalizeSpaces
         setTitle t = [] <$ (updateState $ B.setMeta "title" (B.fromList t))
+        pMetaTag = do
+          mt <- pSatisfy (~== TagOpen "meta" [])
+          let name = fromAttrib "name" mt
+          if null name
+             then return []
+             else do
+               let content = fromAttrib "content" mt
+               updateState $ B.setMeta name (B.text content)
+               return []
 
 block :: TagParser [Block]
 block = choice
-- 
cgit v1.2.3


From e3d48da6271a37129ae5bdb6cb57f006de8c5bfc Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 2 Jan 2014 15:22:50 -0800
Subject: Moved fixDisplayMath from Docx writer to Writer.Shared.

---
 src/Text/Pandoc/Writers/Docx.hs   | 27 ++-------------------------
 src/Text/Pandoc/Writers/Shared.hs | 27 +++++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 25 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 67d202010..25739f7c8 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -30,7 +30,7 @@ Conversion of 'Pandoc' documents to docx.
 -}
 module Text.Pandoc.Writers.Docx ( writeDocx ) where
 import Data.Maybe (fromMaybe)
-import Data.List ( intercalate, groupBy, isPrefixOf )
+import Data.List ( intercalate, isPrefixOf )
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Lazy as BL
 import qualified Data.ByteString.Lazy.Char8 as BL8
@@ -43,6 +43,7 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Generic
 import Text.Pandoc.ImageSize
 import Text.Pandoc.Shared hiding (Element)
+import Text.Pandoc.Writers.Shared (fixDisplayMath)
 import Text.Pandoc.Options
 import Text.Pandoc.Readers.TeXMath
 import Text.Pandoc.Highlighting ( highlight )
@@ -817,27 +818,3 @@ parseXml refArchive relpath =
   case (findEntryByPath relpath refArchive >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) of
        Just d  -> return d
        Nothing -> fail $ relpath ++ " missing in reference docx"
-
-isDisplayMath :: Inline -> Bool
-isDisplayMath (Math DisplayMath _) = True
-isDisplayMath _                    = False
-
-stripLeadingTrailingSpace :: [Inline] -> [Inline]
-stripLeadingTrailingSpace = go . reverse . go . reverse
-  where go (Space:xs) = xs
-        go xs         = xs
-
-fixDisplayMath :: Block -> Block
-fixDisplayMath (Plain lst)
-  | any isDisplayMath lst && not (all isDisplayMath lst) =
-    -- chop into several paragraphs so each displaymath is its own
-    Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $
-       groupBy (\x y -> (isDisplayMath x && isDisplayMath y) ||
-                         not (isDisplayMath x || isDisplayMath y)) lst
-fixDisplayMath (Para lst)
-  | any isDisplayMath lst && not (all isDisplayMath lst) =
-    -- chop into several paragraphs so each displaymath is its own
-    Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $
-       groupBy (\x y -> (isDisplayMath x && isDisplayMath y) ||
-                         not (isDisplayMath x || isDisplayMath y)) lst
-fixDisplayMath x = x
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index 33091ea94..604aac1c9 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -34,6 +34,7 @@ module Text.Pandoc.Writers.Shared (
                      , setField
                      , defField
                      , tagWithAttrs
+                     , fixDisplayMath
                      )
 where
 import Text.Pandoc.Definition
@@ -46,6 +47,7 @@ import qualified Data.Map as M
 import qualified Data.Text as T
 import Data.Aeson (FromJSON(..), fromJSON, ToJSON (..), Value(Object), Result(..))
 import qualified Data.Traversable as Traversable
+import Data.List ( groupBy )
 
 -- | Create JSON value for template from a 'Meta' and an association list
 -- of variables, specified at the command line or in the writer.
@@ -136,3 +138,28 @@ tagWithAttrs tag (ident,classes,kvs) = hsep
   ,hsep (map (\(k,v) -> text k <> "=" <>
                 doubleQuotes (text (escapeStringForXML v))) kvs)
   ] <> ">"
+
+isDisplayMath :: Inline -> Bool
+isDisplayMath (Math DisplayMath _) = True
+isDisplayMath _                    = False
+
+stripLeadingTrailingSpace :: [Inline] -> [Inline]
+stripLeadingTrailingSpace = go . reverse . go . reverse
+  where go (Space:xs) = xs
+        go xs         = xs
+
+-- Put display math in its own block (for ODT/DOCX).
+fixDisplayMath :: Block -> Block
+fixDisplayMath (Plain lst)
+  | any isDisplayMath lst && not (all isDisplayMath lst) =
+    -- chop into several paragraphs so each displaymath is its own
+    Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $
+       groupBy (\x y -> (isDisplayMath x && isDisplayMath y) ||
+                         not (isDisplayMath x || isDisplayMath y)) lst
+fixDisplayMath (Para lst)
+  | any isDisplayMath lst && not (all isDisplayMath lst) =
+    -- chop into several paragraphs so each displaymath is its own
+    Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $
+       groupBy (\x y -> (isDisplayMath x && isDisplayMath y) ||
+                         not (isDisplayMath x || isDisplayMath y)) lst
+fixDisplayMath x = x
-- 
cgit v1.2.3


From ac100f27249e4c572e687d027330ca2cc53cb1ed Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 2 Jan 2014 15:23:16 -0800
Subject: OpenDocument writer:  Fixed RawInline, RawBlock so they don't escape.

---
 src/Text/Pandoc/Writers/OpenDocument.hs | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 4ddfd7166..0029c3296 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -285,8 +285,12 @@ blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b
 -- | Convert a Pandoc block element to OpenDocument.
 blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc
 blockToOpenDocument o bs
-    | Plain          b <- bs = inParagraphTags =<< inlinesToOpenDocument o b
-    | Para           b <- bs = inParagraphTags =<< inlinesToOpenDocument o b
+    | Plain          b <- bs = if null b
+                                  then return empty
+                                  else inParagraphTags =<< inlinesToOpenDocument o b
+    | Para           b <- bs = if null b
+                                  then return empty
+                                  else inParagraphTags =<< inlinesToOpenDocument o b
     | Div _ xs         <- bs = blocksToOpenDocument o xs
     | Header     i _ b <- bs = setFirstPara >>
                                (inHeaderTags  i =<< inlinesToOpenDocument o b)
@@ -298,8 +302,8 @@ blockToOpenDocument o bs
     | Table  c a w h r <- bs = setFirstPara >> table c a w h r
     | HorizontalRule   <- bs = setFirstPara >> return (selfClosingTag "text:p"
                                 [ ("text:style-name", "Horizontal_20_Line") ])
-    | RawBlock f     s <- bs = if f == "opendocument"
-                                  then preformatted s
+    | RawBlock f     s <- bs = if f == Format "opendocument"
+                                  then return $ text s
                                   else return empty
     | Null             <- bs = return empty
     | otherwise              = return empty
@@ -378,8 +382,8 @@ inlineToOpenDocument o ils
     | Code      _ s <- ils = withTextStyle Pre $ inTextStyle $ preformatted s
     | Math      t s <- ils = inlinesToOpenDocument o (readTeXMath' t s)
     | Cite      _ l <- ils = inlinesToOpenDocument o l
-    | RawInline f s <- ils = if f == "opendocument"
-                                then return $ preformatted s
+    | RawInline f s <- ils = if f == Format "opendocument"
+                                then return $ text s
                                 else return empty
     | Link  l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l
     | Image _ (s,t) <- ils = mkImg  s t
-- 
cgit v1.2.3


From 33955fd2efd640becfddb262c4443509521cb6cc Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 2 Jan 2014 15:23:40 -0800
Subject: ODT writer:  Use mathml for proper rendering of formulas.

Note:  LibreOffice's support for this seems a bit buggy.  But
it should be better than what we had before.
---
 src/Text/Pandoc/Writers/ODT.hs | 50 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 42 insertions(+), 8 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 25cd5ae13..c3652d65d 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -30,8 +30,10 @@ Conversion of 'Pandoc' documents to ODT.
 -}
 module Text.Pandoc.Writers.ODT ( writeODT ) where
 import Data.IORef
-import Data.List ( isPrefixOf )
+import Data.List ( isPrefixOf, isSuffixOf )
 import Data.Maybe ( fromMaybe )
+import Text.XML.Light.Output
+import Text.TeXMath
 import qualified Data.ByteString.Lazy as B
 import Text.Pandoc.UTF8 ( fromStringLazy )
 import Codec.Archive.Zip
@@ -41,13 +43,14 @@ import Text.Pandoc.ImageSize ( imageSize, sizeInPoints )
 import Text.Pandoc.MIME ( getMimeType )
 import Text.Pandoc.Definition
 import Text.Pandoc.Walk
+import Text.Pandoc.Writers.Shared ( fixDisplayMath )
 import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument )
 import Control.Monad (liftM)
 import Text.Pandoc.XML
 import Text.Pandoc.Pretty
 import qualified Control.Exception as E
 import Data.Time.Clock.POSIX ( getPOSIXTime )
-import System.FilePath ( takeExtension )
+import System.FilePath ( takeExtension, takeDirectory )
 
 -- | Produce an ODT file from a Pandoc document.
 writeODT :: WriterOptions  -- ^ Writer options
@@ -61,9 +64,9 @@ writeODT opts doc@(Pandoc meta _) = do
              Just f -> B.readFile f
              Nothing -> (B.fromChunks . (:[])) `fmap`
                            readDataFile datadir "reference.odt"
-  -- handle pictures
+  -- handle formulas and pictures
   picEntriesRef <- newIORef ([] :: [Entry])
-  doc' <- walkM (transformPic opts picEntriesRef) doc
+  doc' <- walkM (transformPicMath opts picEntriesRef) $ walk fixDisplayMath doc
   let newContents = writeOpenDocument opts{writerWrapText = False} doc'
   epochtime <- floor `fmap` getPOSIXTime
   let contentEntry = toEntry "content.xml" epochtime
@@ -73,7 +76,11 @@ writeODT opts doc@(Pandoc meta _) = do
                 $ contentEntry : picEntries
   -- construct META-INF/manifest.xml based on archive
   let toFileEntry fp = case getMimeType fp of
-                        Nothing  -> empty
+                        Nothing  -> if "Formula-" `isPrefixOf` fp && "/" `isSuffixOf` fp
+                                       then selfClosingTag "manifest:file-entry"
+                                             [("manifest:media-type","application/vnd.oasis.opendocument.formula")
+                                             ,("manifest:full-path",fp)]
+                                       else empty
                         Just m   -> selfClosingTag "manifest:file-entry"
                                      [("manifest:media-type", m)
                                      ,("manifest:full-path", fp)
@@ -81,6 +88,8 @@ writeODT opts doc@(Pandoc meta _) = do
                                      ]
   let files = [ ent | ent <- filesInArchive archive,
                              not ("META-INF" `isPrefixOf` ent) ]
+  let formulas = [ takeDirectory ent ++ "/" | ent <- filesInArchive archive,
+                      "Formula-" `isPrefixOf` ent, takeExtension ent == ".xml" ]
   let manifestEntry = toEntry "META-INF/manifest.xml" epochtime
         $ fromStringLazy $ render Nothing
         $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
@@ -92,6 +101,7 @@ writeODT opts doc@(Pandoc meta _) = do
                  [("manifest:media-type","application/vnd.oasis.opendocument.text")
                  ,("manifest:full-path","/")]
                 $$ vcat ( map toFileEntry $ files )
+                $$ vcat ( map toFileEntry $ formulas )
               )
          )
   let archive' = addEntryToArchive manifestEntry archive
@@ -119,8 +129,8 @@ writeODT opts doc@(Pandoc meta _) = do
                   $ addEntryToArchive metaEntry archive'
   return $ fromArchive archive''
 
-transformPic :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline
-transformPic opts entriesRef (Image lab (src,_)) = do
+transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline
+transformPicMath opts entriesRef (Image lab (src,_)) = do
   res <- fetchItem (writerSourceURL opts) src
   case res of
      Left (_ :: E.SomeException) -> do
@@ -137,5 +147,29 @@ transformPic opts entriesRef (Image lab (src,_)) = do
        let entry = toEntry newsrc epochtime $ toLazy img
        modifyIORef entriesRef (entry:)
        return $ Image lab (newsrc, tit')
-transformPic _ _ x = return x
+transformPicMath _ entriesRef (Math t math) = do
+  entries <- readIORef entriesRef
+  let dt = if t == InlineMath then DisplayInline else DisplayBlock
+  case texMathToMathML dt math of
+       Left  _ -> return $ Math t math
+       Right r -> do
+         let conf = useShortEmptyTags (const False) defaultConfigPP
+         let mathml = ppcTopElement conf r
+         epochtime <- floor `fmap` getPOSIXTime
+         let dirname = "Formula-" ++ show (length entries) ++ "/"
+         let fname = dirname ++ "content.xml"
+         let entry = toEntry fname epochtime (fromStringLazy mathml)
+         modifyIORef entriesRef (entry:)
+         return $ RawInline (Format "opendocument") $ render Nothing $
+           inTags False "draw:frame" [("text:anchor-type",
+                                       if t == DisplayMath
+                                          then "paragraph"
+                                          else "as-char")
+                                     ,("style:vertical-pos", "middle")
+                                     ,("style:vertical-rel", "text")] $
+             selfClosingTag "draw:object" [("xlink:href", dirname)
+                                        , ("xlink:type", "simple")
+                                        , ("xlink:show", "embed")
+                                        , ("xlink:actuate", "onLoad")]
 
+transformPicMath _ _ x = return x
-- 
cgit v1.2.3


From b2db6979fedc5f66d45596c9564a8313af90b9fa Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 2 Jan 2014 19:32:13 -0800
Subject: Use isHeaderBlock from Shared rather than defining it anew...

---
 src/Text/Pandoc/Writers/FB2.hs     | 6 ++----
 src/Text/Pandoc/Writers/Texinfo.hs | 6 +-----
 2 files changed, 3 insertions(+), 9 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index adbe948be..803617f95 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -44,7 +44,7 @@ import qualified Text.XML.Light.Cursor as XC
 
 import Text.Pandoc.Definition
 import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def)
-import Text.Pandoc.Shared (orderedListMarkers)
+import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock)
 import Text.Pandoc.Walk
 
 -- | Data to be written at the end of the document:
@@ -157,9 +157,7 @@ renderSection level (ttl, body) = do
                else cMapM blockToXml body
     return $ el "section" (title ++ content)
   where
-    hasSubsections = any isHeader
-    isHeader (Header _ _ _) = True
-    isHeader _ = False
+    hasSubsections = any isHeaderBlock
 
 -- | Only <p> and <empty-line> are allowed within <title> in FB2.
 formatTitle :: [Inline] -> [Content]
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index d62e50880..bf3df8035 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -293,7 +293,7 @@ blockListToTexinfo (x:xs) = do
   case x of
     Header level _ _ -> do
       -- We need need to insert a menu for this node.
-      let (before, after) = break isHeader xs
+      let (before, after) = break isHeaderBlock xs
       before' <- blockListToTexinfo before
       let menu = if level < 4
                     then collectNodes (level + 1) after
@@ -315,10 +315,6 @@ blockListToTexinfo (x:xs) = do
       xs' <- blockListToTexinfo xs
       return $ x' $$ xs'
 
-isHeader :: Block -> Bool
-isHeader (Header _ _ _) = True
-isHeader _              = False
-
 collectNodes :: Int -> [Block] -> [Block]
 collectNodes _ [] = []
 collectNodes level (x:xs) =
-- 
cgit v1.2.3


From 4e7aadb903548bb08c9132c93699071e52f61d76 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 2 Jan 2014 19:59:33 -0800
Subject: HTML writer:  With --toc, headers no longer link to themselves.

Closes #1081.
---
 src/Text/Pandoc/Writers/HTML.hs | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 129776363..c8a7e802b 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -475,28 +475,21 @@ blockToHtml opts (BlockQuote blocks) =
      else do
        contents <- blockListToHtml opts blocks
        return $ H.blockquote $ nl opts >> contents >> nl opts
-blockToHtml opts (Header level (ident,_,_) lst) = do
+blockToHtml opts (Header level (_,_,_) lst) = do
   contents <- inlineListToHtml opts lst
   secnum <- liftM stSecNum get
   let contents' = if writerNumberSections opts && not (null secnum)
                      then (H.span ! A.class_ "header-section-number" $ toHtml
                           $ showSecNum secnum) >> strToHtml " " >> contents
                      else contents
-  let revealSlash = ['/' | writerSlideVariant opts == RevealJsSlides]
-  let contents''  = if writerTableOfContents opts && not (null ident)
-                       then H.a ! A.href (toValue $
-                              '#' : revealSlash ++
-                                    writerIdentifierPrefix opts ++
-                                    ident) $ contents'
-                       else contents'
   return $ case level of
-              1 -> H.h1 contents''
-              2 -> H.h2 contents''
-              3 -> H.h3 contents''
-              4 -> H.h4 contents''
-              5 -> H.h5 contents''
-              6 -> H.h6 contents''
-              _ -> H.p contents''
+              1 -> H.h1 contents'
+              2 -> H.h2 contents'
+              3 -> H.h3 contents'
+              4 -> H.h4 contents'
+              5 -> H.h5 contents'
+              6 -> H.h6 contents'
+              _ -> H.p contents'
 blockToHtml opts (BulletList lst) = do
   contents <- mapM (blockListToHtml opts) lst
   return $ unordList opts contents
-- 
cgit v1.2.3


From 452a140d0cd808abbdfbb1f91dc35280b1f90794 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 2 Jan 2014 21:09:39 -0800
Subject: Pretty:  Added nestle.  API change, minor version bump to 1.12.3.

---
 src/Text/Pandoc/Pretty.hs | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
index faf2a6797..033511832 100644
--- a/src/Text/Pandoc/Pretty.hs
+++ b/src/Text/Pandoc/Pretty.hs
@@ -60,6 +60,7 @@ module Text.Pandoc.Pretty (
      , hsep
      , vcat
      , vsep
+     , nestle
      , chomp
      , inside
      , braces
@@ -72,7 +73,7 @@ module Text.Pandoc.Pretty (
      )
 
 where
-import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex)
+import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex, viewl, ViewL(..))
 import Data.Foldable (toList)
 import Data.List (intercalate)
 import Data.Monoid
@@ -186,6 +187,14 @@ vcat = foldr ($$) empty
 vsep :: [Doc] -> Doc
 vsep = foldr ($+$) empty
 
+-- | Removes leading blank lines from a 'Doc'.
+nestle :: Doc -> Doc
+nestle (Doc d) = Doc $ go d
+  where go x = case viewl x of
+               (BlankLine :< rest) -> go rest
+               (NewLine :< rest)   -> go rest
+               _                   -> x
+
 -- | Chomps trailing blank space off of a 'Doc'.
 chomp :: Doc -> Doc
 chomp d = Doc (fromList dl')
-- 
cgit v1.2.3


From e7e76dbdd8b07b1fd80bda1599d58b7fbc7cf4bd Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 2 Jan 2014 21:10:14 -0800
Subject: RST writer: Ensure no blank line after def in definition list.

Closes #992.
---
 src/Text/Pandoc/Writers/RST.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 1a62f7250..37bb66632 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -287,7 +287,7 @@ definitionListItemToRST (label, defs) = do
   label' <- inlineListToRST label
   contents <- liftM vcat $ mapM blockListToRST defs
   tabstop <- get >>= (return . writerTabStop . stOptions)
-  return $ label' $$ nest tabstop (contents <> cr)
+  return $ label' $$ nest tabstop (nestle contents <> cr)
 
 -- | Convert list of Pandoc block elements to RST.
 blockListToRST :: [Block]       -- ^ List of block elements
-- 
cgit v1.2.3


From 2dd6d892fa310135cece1d8e6d6bac7b538f4e76 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 6 Jan 2014 10:17:31 -0800
Subject: HTML writer:  Omit footnotes from TOC entries.

Otherwise we get doubled footnotes when headers have notes!
---
 src/Text/Pandoc/Writers/HTML.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index c8a7e802b..805bb57f1 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -749,7 +749,9 @@ inlineToHtml opts inline =
                                             else [A.title $ toValue tit])
                         return $ foldl (!) H5.embed attributes
                         -- note:  null title included, as in Markdown.pl
-    (Note contents)          -> do
+    (Note contents)
+      | writerIgnoreNotes opts -> return mempty
+      | otherwise              -> do
                         st <- get
                         let notes = stNotes st
                         let number = (length notes) + 1
-- 
cgit v1.2.3


From ba6a26b25871a3912556f1a17330d2363b3f3db5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 6 Jan 2014 12:12:21 -0800
Subject: EPUB writer:  Avoid duplicate notes when headings contain notes.

This arose because the headings are copied into the metadata
"title" field, and the note gets rendered twice.  We strip the
note now before putting the heading in "title".
---
 src/Text/Pandoc/Writers/EPUB.hs | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 4daa9609e..d2dd7da2e 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -397,6 +397,10 @@ writeEPUB opts doc@(Pandoc meta _) = do
 
   let chapters = evalState (toChapters blocks'') []
 
+  let removeNote :: Inline -> Inline
+      removeNote (Note _) = Str ""
+      removeNote x        = x
+
   let chapToEntry :: Int -> Chapter -> Entry
       chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num)
         $ renderHtml
@@ -404,7 +408,9 @@ writeEPUB opts doc@(Pandoc meta _) = do
             fromMaybe [] mbnum }
         $ case bs of
               (Header _ _ xs : _) ->
-                 Pandoc (setMeta "title" (fromList xs) nullMeta) bs
+                 -- remove notes or we get doubled footnotes
+                 Pandoc (setMeta "title" (walk removeNote $ fromList xs)
+                            nullMeta) bs
               _                   ->
                  Pandoc nullMeta bs
 
-- 
cgit v1.2.3


From d97b1fd14cd2b0f96fcf3bcc10cfc2540fbbe624 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 6 Jan 2014 21:51:11 -0800
Subject: EPUB writer:  Strip out footnotes from toc entries.

---
 src/Text/Pandoc/Writers/EPUB.hs | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index d2dd7da2e..a48300939 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -124,7 +124,12 @@ opfName n = QName n Nothing (Just "opf")
 
 plainify :: [Inline] -> String
 plainify t =
-  trimr $ writePlain def{ writerStandalone = False } $ Pandoc nullMeta [Plain t]
+  trimr $ writePlain def{ writerStandalone = False }
+        $ Pandoc nullMeta [Plain $ walk removeNote t]
+
+removeNote :: Inline -> Inline
+removeNote (Note _) = Str ""
+removeNote x        = x
 
 getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata
 getEPUBMetadata opts meta = do
@@ -397,10 +402,6 @@ writeEPUB opts doc@(Pandoc meta _) = do
 
   let chapters = evalState (toChapters blocks'') []
 
-  let removeNote :: Inline -> Inline
-      removeNote (Note _) = Str ""
-      removeNote x        = x
-
   let chapToEntry :: Int -> Chapter -> Entry
       chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num)
         $ renderHtml
-- 
cgit v1.2.3


From 002c8ce14ca15bfbc800e9628cf09babf3d96acd Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 7 Jan 2014 09:01:32 -0800
Subject: Fixed small regression in docx writer.

---
 src/Text/Pandoc/Writers/Docx.hs | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 25739f7c8..2a834c2da 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -30,7 +30,7 @@ Conversion of 'Pandoc' documents to docx.
 -}
 module Text.Pandoc.Writers.Docx ( writeDocx ) where
 import Data.Maybe (fromMaybe)
-import Data.List ( intercalate, isPrefixOf )
+import Data.List ( intercalate, isPrefixOf, isSuffixOf )
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Lazy as BL
 import qualified Data.ByteString.Lazy.Char8 as BL8
@@ -265,6 +265,7 @@ writeDocx opts doc@(Pandoc meta _) = do
   webSettingsEntry <- entryFromArchive "word/webSettings.xml"
   let miscRels = [ f | f <- filesInArchive refArchive
                      , "word/_rels/" `isPrefixOf` f
+                     , ".xml.rels" `isSuffixOf` f
                      , f /= "word/_rels/document.xml.rels"
                      , f /= "word/_rels/footnotes.xml.rels" ]
   miscRelEntries <- mapM entryFromArchive miscRels
@@ -815,6 +816,8 @@ br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ]
 
 parseXml :: Archive -> String -> IO Element
 parseXml refArchive relpath =
-  case (findEntryByPath relpath refArchive >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) of
-       Just d  -> return d
+  case findEntryByPath relpath refArchive of
+       Just e  -> case parseXMLDoc $ UTF8.toStringLazy $ fromEntry e of
+                       Just d  -> return d
+                       Nothing -> fail $ relpath ++ " corrupt in reference docx"
        Nothing -> fail $ relpath ++ " missing in reference docx"
-- 
cgit v1.2.3


From 2c7bf41d26dfe293f32830d92d5bfd13f2cf2b89 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 7 Jan 2014 10:32:47 -0800
Subject: Added wmf and emf mime types.

---
 src/Text/Pandoc/MIME.hs | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index f41aa98bb..44989ee94 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -147,6 +147,7 @@ mimeTypesList = -- List borrowed from happstack-server.
            ,("dxr","application/x-director")
            ,("emb","chemical/x-embl-dl-nucleotide")
            ,("embl","chemical/x-embl-dl-nucleotide")
+           ,("emf","image/x-emf")
            ,("eml","message/rfc822")
            ,("ent","chemical/x-ncbi-asn1-ascii")
            ,("eot","application/vnd.ms-fontobject")
@@ -465,6 +466,7 @@ mimeTypesList = -- List borrowed from happstack-server.
            ,("wm","video/x-ms-wm")
            ,("wma","audio/x-ms-wma")
            ,("wmd","application/x-ms-wmd")
+           ,("wmf","image/x-wmf")
            ,("wml","text/vnd.wap.wml")
            ,("wmlc","application/vnd.wap.wmlc")
            ,("wmls","text/vnd.wap.wmlscript")
-- 
cgit v1.2.3


From d9eff99f27bb6426534651c9ff85b2b4354e6d16 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 7 Jan 2014 23:39:49 -0800
Subject: Markdown reader:  Allow hard line breaks in table cells.

The \-newline form must be used; the two-space+newline form
won't work, since in a table cell nearly every line ends with
two spaces.
---
 src/Text/Pandoc/Readers/Markdown.hs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 3a5d29b4e..88d0bf439 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1126,12 +1126,12 @@ multilineTableHeader headless = try $ do
                      then liftM (map (:[]) . tail .
                               splitStringByIndices (init indices)) $ lookAhead anyLine
                      else return $ transpose $ map
-                           (\ln -> tail $ splitStringByIndices (init indices) ln)
+                           (tail . splitStringByIndices (init indices))
                            rawContent
   let aligns   = zipWith alignType rawHeadsList lengths
   let rawHeads = if headless
                     then replicate (length dashes) ""
-                    else map unwords rawHeadsList
+                    else map (unlines . map trim) rawHeadsList
   heads <- fmap sequence $
            mapM (parseFromString (mconcat <$> many plain)) $
              map trim rawHeads
@@ -1188,7 +1188,7 @@ gridTableHeader headless = try $ do
   -- RST does not have a notion of alignments
   let rawHeads = if headless
                     then replicate (length dashes) ""
-                    else map unwords $ transpose
+                    else map (unlines . map trim) $ transpose
                        $ map (gridTableSplitLine indices) rawContent
   heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads
   return (heads, aligns, indices)
-- 
cgit v1.2.3


From aada7b495bf4af9912603b3b7649dd0d63f9b5fc Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Wed, 8 Jan 2014 12:04:08 -0800
Subject: fetchItem: Handle image URLs beginning with '//'.

---
 src/Text/Pandoc/Shared.hs | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 3446f4343..714402e42 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -91,7 +91,8 @@ import Data.Char ( toLower, isLower, isUpper, isAlpha,
                    isLetter, isDigit, isSpace )
 import Data.List ( find, isPrefixOf, intercalate )
 import qualified Data.Map as M
-import Network.URI ( escapeURIString, isURI, unEscapeString )
+import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
+                     unEscapeString, parseURIReference )
 import System.Directory
 import Text.Pandoc.MIME (getMimeType)
 import System.FilePath ( (</>), takeExtension, dropExtension )
@@ -623,9 +624,13 @@ fetchItem :: Maybe String -> String
           -> IO (Either E.SomeException (BS.ByteString, Maybe String))
 fetchItem sourceURL s
   | isURI s         = openURL s
-  | otherwise       = case sourceURL of
-                           Just u  -> openURL (u ++ "/" ++ s)
-                           Nothing -> E.try readLocalFile
+  | otherwise       =
+      case sourceURL >>= parseURIReference of
+           Just u  -> case parseURIReference s of
+                           Just s' -> openURL $ show $
+                                        s' `nonStrictRelativeTo` u
+                           Nothing -> openURL $ show u ++ "/" ++ s
+           Nothing -> E.try readLocalFile
   where readLocalFile = do
           let mime = case takeExtension s of
                           ".gz" -> getMimeType $ dropExtension s
-- 
cgit v1.2.3


From 3bf8012bf6e6965a68de76d5bb46782086393da7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Wed, 8 Jan 2014 19:33:14 -0800
Subject: Text.Pandoc.ImageSize: Parse EXIF format JPGs.

Note:  For now we just assign them all 72 dpi.  It wasn't
clear to me how to extract the resolution information.
At least the aspect ratio will be right, and 72 dpi is
the most common setting.

Closes #976.
---
 src/Text/Pandoc/ImageSize.hs | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index 9b0850efb..e2a8b8283 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -53,7 +53,8 @@ imageType :: ByteString -> Maybe ImageType
 imageType img = case B.take 4 img of
                      "\x89\x50\x4e\x47" -> return Png
                      "\x47\x49\x46\x38" -> return Gif
-                     "\xff\xd8\xff\xe0" -> return Jpeg
+                     "\xff\xd8\xff\xe0" -> return Jpeg  -- JFIF
+                     "\xff\xd8\xff\xe1" -> return Jpeg  -- Exif
                      "%PDF"             -> return Pdf
                      "%!PS"
                        | (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF"
@@ -139,8 +140,14 @@ gifSize img = do
 jpegSize :: ByteString -> Maybe ImageSize
 jpegSize img = do
   let (hdr, rest) = B.splitAt 4 img
-  guard $ hdr == "\xff\xd8\xff\xe0"
   guard $ B.length rest >= 14
+  case hdr of
+       "\xff\xd8\xff\xe0" -> jfifSize rest
+       "\xff\xd8\xff\xe1" -> exifSize rest
+       _                  -> mzero
+
+jfifSize :: ByteString -> Maybe ImageSize
+jfifSize rest = do
   let [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] = map fromIntegral
                                            $ unpack $ B.take 5 $ B.drop 9 $ rest
   let factor = case dpiDensity of
@@ -149,11 +156,11 @@ jpegSize img = do
                     _ -> const 72
   let dpix = factor (shift dpix1 8 + dpix2)
   let dpiy = factor (shift dpiy1 8 + dpiy2)
-  (w,h) <- findJpegSize rest
+  (w,h) <- findJfifSize rest
   return $ ImageSize { pxX = w, pxY = h, dpiX = dpix, dpiY = dpiy }
 
-findJpegSize :: ByteString -> Maybe (Integer,Integer)
-findJpegSize bs = do
+findJfifSize :: ByteString -> Maybe (Integer,Integer)
+findJfifSize bs = do
   let bs' = B.dropWhile (=='\xff') $ B.dropWhile (/='\xff') bs
   case B.uncons bs' of
        Just (c,bs'') | c >= '\xc0' && c <= '\xc3' -> do
@@ -165,8 +172,23 @@ findJpegSize bs = do
               [c1,c2] -> do
                 let len = shift c1 8 + c2
                 -- skip variables
-                findJpegSize $ B.drop len bs''
+                findJfifSize $ B.drop len bs''
               _       -> fail "JPEG parse error"
        Nothing -> fail "Did not find length record"
 
+exifSize :: ByteString -> Maybe ImageSize
+exifSize rest = do
+  let bs' = B.takeWhile (/='\xff') $ B.drop 8 rest  -- exif data
+  let (_,bs'') = B.breakSubstring "\xa0\x02" bs' -- width
+  let rawWidth = B.take 2 $ B.drop 10 bs''
+  let (_,bs''') = B.breakSubstring "\xa0\x03" bs' -- height
+  let rawHeight = B.take 2 $ B.drop 10 bs'''
+  let tonum bs =  case map fromIntegral $ unpack bs of
+                       [x,y]  -> Just $ shift x 8 + y
+                       _      -> Nothing
+  case (tonum rawWidth, tonum rawHeight) of
+       (Just w, Just h) ->
+                 return $ ImageSize { pxX = w, pxY = h, dpiX = 72, dpiY = 72 }
+       _      -> fail "Could not determine exif image size"
+  -- some day, figure out how to parse dpi from exif
 
-- 
cgit v1.2.3


From 5c8c380a7997156964a5402974f6f464233aab9b Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 9 Jan 2014 11:16:17 -0800
Subject: Better exif parsing, including image resolution.

This introduces a dependency on binary >= 0.6, but we depend
on binary >= 0.5 via zip-archive anyway.

Closes #976.
---
 src/Text/Pandoc/ImageSize.hs | 225 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 210 insertions(+), 15 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index e2a8b8283..467205220 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -32,9 +32,14 @@ module Text.Pandoc.ImageSize ( ImageType(..), imageType, imageSize,
                     sizeInPixels, sizeInPoints ) where
 import Data.ByteString (ByteString, unpack)
 import qualified Data.ByteString.Char8 as B
+import qualified Data.ByteString.Lazy as BL
+import Control.Applicative
 import Control.Monad
 import Data.Bits
+import Data.Binary
+import Data.Binary.Get
 import Text.Pandoc.Shared (safeRead)
+import qualified Data.Map as M
 
 -- quick and dirty functions to get image sizes
 -- algorithms borrowed from wwwis.pl
@@ -143,7 +148,7 @@ jpegSize img = do
   guard $ B.length rest >= 14
   case hdr of
        "\xff\xd8\xff\xe0" -> jfifSize rest
-       "\xff\xd8\xff\xe1" -> exifSize rest
+       "\xff\xd8\xff\xe1" -> exifSize $ B.takeWhile (/= '\xff') rest
        _                  -> mzero
 
 jfifSize :: ByteString -> Maybe ImageSize
@@ -177,18 +182,208 @@ findJfifSize bs = do
        Nothing -> fail "Did not find length record"
 
 exifSize :: ByteString -> Maybe ImageSize
-exifSize rest = do
-  let bs' = B.takeWhile (/='\xff') $ B.drop 8 rest  -- exif data
-  let (_,bs'') = B.breakSubstring "\xa0\x02" bs' -- width
-  let rawWidth = B.take 2 $ B.drop 10 bs''
-  let (_,bs''') = B.breakSubstring "\xa0\x03" bs' -- height
-  let rawHeight = B.take 2 $ B.drop 10 bs'''
-  let tonum bs =  case map fromIntegral $ unpack bs of
-                       [x,y]  -> Just $ shift x 8 + y
-                       _      -> Nothing
-  case (tonum rawWidth, tonum rawHeight) of
-       (Just w, Just h) ->
-                 return $ ImageSize { pxX = w, pxY = h, dpiX = 72, dpiY = 72 }
-       _      -> fail "Could not determine exif image size"
-  -- some day, figure out how to parse dpi from exif
+exifSize = -- runGet ((Just <$> exifHeader) `mplus` return Nothing) .
+  runGet (Just <$> exifHeader) .
+           BL.fromChunks . (:[])
 
+exifHeader :: Get ImageSize
+exifHeader = do
+  _app1DataSize <- getWord16be
+  exifHdr <- getWord32be
+  unless (exifHdr == 0x45786966) $ fail "Did not find exif header"
+  zeros <- getWord16be
+  unless (zeros == 0) $ fail "Expected zeros after exif header"
+  -- beginning of tiff header -- we read whole thing to use
+  -- in getting data from offsets:
+  tiffHeader <- lookAhead getRemainingLazyByteString
+  byteAlign <- getWord16be
+  let bigEndian = byteAlign == 0x4d4d
+  let (getWord16, getWord32, getWord64) =
+        if bigEndian
+           then (getWord16be, getWord32be, getWord64be)
+           else (getWord16le, getWord32le, getWord64le)
+  let getRational = do
+        num <- getWord32
+        den <- getWord32
+        return $ fromIntegral num / fromIntegral den
+  tagmark <- getWord16
+  unless (tagmark == 0x002a) $ fail "Failed alignment sanity check"
+  ifdOffset <- getWord32
+  skip (fromIntegral ifdOffset - 8) -- skip to IDF
+  numentries <- getWord16
+  let ifdEntry = do
+       tag <- getWord16 >>= \t ->
+                maybe (fail $ "Unknown tag type " ++ show t) return
+                (M.lookup t tagTypeTable)
+       dataFormat <- getWord16
+       numComponents <- getWord32
+       (fmt, bytesPerComponent) <-
+             case dataFormat of
+                  1  -> return (UnsignedByte . runGet getWord8, 1)
+                  2  -> return (AsciiString, 1)
+                  3  -> return (UnsignedShort . runGet getWord16, 2)
+                  4  -> return (UnsignedLong . runGet getWord32, 4)
+                  5  -> return (UnsignedRational . runGet getRational, 8)
+                  6  -> return (SignedByte . runGet getWord8, 1)
+                  7  -> return (Undefined . runGet getWord8, 1)
+                  8  -> return (SignedShort . runGet getWord16, 2)
+                  9  -> return (SignedLong . runGet getWord32, 4)
+                  10 -> return (SignedRational . runGet getRational, 8)
+                  11 -> return (SingleFloat . runGet getWord32 {- TODO -}, 4)
+                  12 -> return (DoubleFloat . runGet getWord64 {- TODO -}, 8)
+                  _  -> fail $ "Unknown data format " ++ show dataFormat
+       let totalBytes = fromIntegral $ numComponents * bytesPerComponent
+       payload <- if totalBytes <= 4 -- data is right here
+                     then (fmt . BL.fromChunks . (:[])) <$>
+                          (getByteString totalBytes <*
+                          skip (4 - totalBytes))
+                     else do  -- get data from offset
+                          offs <- getWord32
+                          return $ fmt $ BL.take (fromIntegral totalBytes) $
+                                   BL.drop (fromIntegral offs) tiffHeader
+       return (tag, payload)
+  entries <- sequence $ replicate (fromIntegral numentries) ifdEntry
+  subentries <- case lookup ExifOffset entries of
+                      Just (UnsignedLong offset) -> do
+                        pos <- bytesRead
+                        skip (fromIntegral offset - (fromIntegral pos - 8))
+                        numsubentries <- getWord16
+                        sequence $
+                           replicate (fromIntegral numsubentries) ifdEntry
+                      _ -> return []
+  let allentries = entries ++ subentries
+  (width, height) <- case (lookup ExifImageWidth allentries,
+                           lookup ExifImageHeight allentries) of
+                          (Just (UnsignedLong w), Just (UnsignedLong h)) ->
+                            return (fromIntegral w, fromIntegral h)
+                          _ -> fail "Could not determine image width, height"
+  let resfactor = case lookup ResolutionUnit allentries of
+                        Just (UnsignedShort 1) -> (100 / 254)
+                        _ -> 1
+  let xres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor)
+             $ lookup XResolution allentries
+  let yres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor)
+             $ lookup YResolution allentries
+  return $ ImageSize{
+                    pxX  = width
+                  , pxY  = height
+                  , dpiX = xres
+                  , dpiY = yres }
+
+data DataFormat = UnsignedByte Word8
+                | AsciiString BL.ByteString
+                | UnsignedShort Word16
+                | UnsignedLong Word32
+                | UnsignedRational Rational
+                | SignedByte Word8
+                | Undefined Word8
+                | SignedShort Word16
+                | SignedLong Word32
+                | SignedRational Rational
+                | SingleFloat Word32
+                | DoubleFloat Word64
+                deriving (Show)
+
+data TagType = ImageDescription
+             | Make
+             | Model
+             | Orientation
+             | XResolution
+             | YResolution
+             | ResolutionUnit
+             | Software
+             | DateTime
+             | WhitePoint
+             | PrimaryChromaticities
+             | YCbCrCoefficients
+             | YCbCrPositioning
+             | ReferenceBlackWhite
+             | Copyright
+             | ExifOffset
+             | ExposureTime
+             | FNumber
+             | ExposureProgram
+             | ISOSpeedRatings
+             | ExifVersion
+             | DateTimeOriginal
+             | DateTimeDigitized
+             | ComponentConfiguration
+             | CompressedBitsPerPixel
+             | ShutterSpeedValue
+             | ApertureValue
+             | BrightnessValue
+             | ExposureBiasValue
+             | MaxApertureValue
+             | SubjectDistance
+             | MeteringMode
+             | LightSource
+             | Flash
+             | FocalLength
+             | MakerNote
+             | UserComment
+             | FlashPixVersion
+             | ColorSpace
+             | ExifImageWidth
+             | ExifImageHeight
+             | RelatedSoundFile
+             | ExifInteroperabilityOffset
+             | FocalPlaneXResolution
+             | FocalPlaneYResolution
+             | FocalPlaneResolutionUnit
+             | SensingMethod
+             | FileSource
+             | SceneType
+             deriving (Show, Eq, Ord)
+
+tagTypeTable :: M.Map Word16 TagType
+tagTypeTable = M.fromList
+  [ (0x010e, ImageDescription)
+  , (0x010f, Make)
+  , (0x0110, Model)
+  , (0x0112, Orientation)
+  , (0x011a, XResolution)
+  , (0x011b, YResolution)
+  , (0x0128, ResolutionUnit)
+  , (0x0131, Software)
+  , (0x0132, DateTime)
+  , (0x013e, WhitePoint)
+  , (0x013f, PrimaryChromaticities)
+  , (0x0211, YCbCrCoefficients)
+  , (0x0213, YCbCrPositioning)
+  , (0x0214, ReferenceBlackWhite)
+  , (0x8298, Copyright)
+  , (0x8769, ExifOffset)
+  , (0x829a, ExposureTime)
+  , (0x829d, FNumber)
+  , (0x8822, ExposureProgram)
+  , (0x8827, ISOSpeedRatings)
+  , (0x9000, ExifVersion)
+  , (0x9003, DateTimeOriginal)
+  , (0x9004, DateTimeDigitized)
+  , (0x9101, ComponentConfiguration)
+  , (0x9102, CompressedBitsPerPixel)
+  , (0x9201, ShutterSpeedValue)
+  , (0x9202, ApertureValue)
+  , (0x9203, BrightnessValue)
+  , (0x9204, ExposureBiasValue)
+  , (0x9205, MaxApertureValue)
+  , (0x9206, SubjectDistance)
+  , (0x9207, MeteringMode)
+  , (0x9208, LightSource)
+  , (0x9209, Flash)
+  , (0x920a, FocalLength)
+  , (0x927c, MakerNote)
+  , (0x9286, UserComment)
+  , (0xa000, FlashPixVersion)
+  , (0xa001, ColorSpace)
+  , (0xa002, ExifImageWidth)
+  , (0xa003, ExifImageHeight)
+  , (0xa004, RelatedSoundFile)
+  , (0xa005, ExifInteroperabilityOffset)
+  , (0xa20e, FocalPlaneXResolution)
+  , (0xa20f, FocalPlaneYResolution)
+  , (0xa210, FocalPlaneResolutionUnit)
+  , (0xa217, SensingMethod)
+  , (0xa300, FileSource)
+  , (0xa301, SceneType)
+  ]
-- 
cgit v1.2.3


From b9b1546ed238ca8fd4d65a6e02fa7ecbf9a4be65 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 9 Jan 2014 11:25:24 -0800
Subject: Markdown parser:  be more permissive about citation keys.

Keys may now start with an underscore as well as a letter.
Underscores do not count as internal punctuation, but are
treated like alphanumerics, so "key:_2008" will work, as
it did not before.  (This change was necessary to use keys
generated by zotero.)

Closes #1111, closes #1011.
---
 src/Text/Pandoc/Readers/Markdown.hs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 88d0bf439..3feafd362 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1811,9 +1811,10 @@ citeKey = try $ do
   guard $ lastStrPos /= Just pos
   suppress_author <- option False (char '-' >> return True)
   char '@'
-  first <- letter
-  let internal p = try $ p >>~ lookAhead (letter <|> digit)
-  rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_+?<>~/")
+  first <- letter <|> char '_'
+  let regchar = satisfy (\c -> isAlphaNum c || c == '_')
+  let internal p = try $ p >>~ lookAhead regchar
+  rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
   let key = first:rest
   return (suppress_author, key)
 
-- 
cgit v1.2.3


From b4b16d57865b1d951be79f04f8ad41fb70544077 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 9 Jan 2014 22:50:51 -0800
Subject: Minor improvement to exif parser.

---
 src/Text/Pandoc/ImageSize.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index 467205220..d1aacff1c 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -234,8 +234,8 @@ exifHeader = do
                   _  -> fail $ "Unknown data format " ++ show dataFormat
        let totalBytes = fromIntegral $ numComponents * bytesPerComponent
        payload <- if totalBytes <= 4 -- data is right here
-                     then (fmt . BL.fromChunks . (:[])) <$>
-                          (getByteString totalBytes <*
+                     then fmt <$>
+                          (getLazyByteString (fromIntegral totalBytes) <*
                           skip (4 - totalBytes))
                      else do  -- get data from offset
                           offs <- getWord32
-- 
cgit v1.2.3


From a1abb3eeea2321654a8450725ff6c0d1a18ee0c7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 14 Jan 2014 10:12:33 -0800
Subject: Allow binary 0.5.  Version bump to 1.12.3.1.

---
 src/Text/Pandoc/ImageSize.hs | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index d1aacff1c..14575244d 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -182,9 +182,12 @@ findJfifSize bs = do
        Nothing -> fail "Did not find length record"
 
 exifSize :: ByteString -> Maybe ImageSize
-exifSize = -- runGet ((Just <$> exifHeader) `mplus` return Nothing) .
-  runGet (Just <$> exifHeader) .
-           BL.fromChunks . (:[])
+exifSize = runGet (Just <$> exifHeader) .  BL.fromChunks . (:[])
+-- NOTE:  It would be nicer to do
+-- runGet ((Just <$> exifHeader) <|> return Nothing)
+-- which would prevent pandoc from raising an error when an exif header can't
+-- be parsed.  But we only get an Alternative instance for Get in binary 0.6,
+-- and binary 0.5 ships with ghc 7.6.
 
 exifHeader :: Get ImageSize
 exifHeader = do
-- 
cgit v1.2.3


From 6c59f060a769c7f312137d5c1a49cf878ea5b2a2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 20 Jan 2014 11:09:44 -0800
Subject: HTML reader:  Fixed bug reading inline math with `$$`.

See #225.
---
 src/Text/Pandoc/Readers/HTML.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 506fe7770..d1e4d0024 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -476,8 +476,8 @@ pBlank = try $ do
 
 pTagContents :: Parser [Char] ParserState Inline
 pTagContents =
-      Math InlineMath  `fmap` mathInline
-  <|> Math DisplayMath `fmap` mathDisplay
+      Math DisplayMath `fmap` mathDisplay
+  <|> Math InlineMath  `fmap` mathInline
   <|> pStr
   <|> pSpace
   <|> smartPunctuation pTagContents
-- 
cgit v1.2.3


From 9f3b2f6f5d06a4cf3142ffc74c8de4c1cc2bd928 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 22 Jan 2014 22:07:13 -0800
Subject: Fixed mediawiki ordered list parsing.

Closes #1122.
---
 src/Text/Pandoc/Readers/MediaWiki.hs | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 8d8ea0199..794890eb6 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -149,9 +149,16 @@ inlinesInTags tag = try $ do
 blocksInTags :: String -> MWParser Blocks
 blocksInTags tag = try $ do
   (_,raw) <- htmlTag (~== TagOpen tag [])
+  let closer = if tag == "li"
+                  then htmlTag (~== TagClose "li")
+                     <|> lookAhead (
+                              htmlTag (~== TagOpen "li" [])
+                          <|> htmlTag (~== TagClose "ol")
+                          <|> htmlTag (~== TagClose "ul"))
+                  else htmlTag (~== TagClose tag)
   if '/' `elem` raw   -- self-closing tag
      then return mempty
-     else mconcat <$> manyTill block (htmlTag (~== TagClose tag))
+     else mconcat <$> manyTill block closer
 
 charsInTags :: String -> MWParser [Char]
 charsInTags tag = try $ do
@@ -381,15 +388,13 @@ bulletList = B.bulletList <$>
 orderedList :: MWParser Blocks
 orderedList =
        (B.orderedList <$> many1 (listItem '#'))
-   <|> (B.orderedList <$> (htmlTag (~== TagOpen "ul" []) *> spaces *>
-        many (listItem '#' <|> li) <*
-        optional (htmlTag (~== TagClose "ul"))))
-   <|> do (tag,_) <- htmlTag (~== TagOpen "ol" [])
-          spaces
-          items <- many (listItem '#' <|> li)
-          optional (htmlTag (~== TagClose "ol"))
-          let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag
-          return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items
+   <|> try
+       (do (tag,_) <- htmlTag (~== TagOpen "ol" [])
+           spaces
+           items <- many (listItem '#' <|> li)
+           optional (htmlTag (~== TagClose "ol"))
+           let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag
+           return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items)
 
 definitionList :: MWParser Blocks
 definitionList = B.definitionList <$> many1 defListItem
-- 
cgit v1.2.3


From a333d9788e0f510f681ae1b5f0f246434ee15d62 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Fri, 24 Jan 2014 16:00:02 -0800
Subject: ImageSize:  Avoid use of lookAhead, which is not in binary >= 0.6.

Closes #1124.
---
 src/Text/Pandoc/ImageSize.hs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index 14575244d..3c9623b3c 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -182,15 +182,16 @@ findJfifSize bs = do
        Nothing -> fail "Did not find length record"
 
 exifSize :: ByteString -> Maybe ImageSize
-exifSize = runGet (Just <$> exifHeader) .  BL.fromChunks . (:[])
+exifSize bs = runGet (Just <$> exifHeader bl) bl
+  where bl = BL.fromChunks [bs]
 -- NOTE:  It would be nicer to do
 -- runGet ((Just <$> exifHeader) <|> return Nothing)
 -- which would prevent pandoc from raising an error when an exif header can't
 -- be parsed.  But we only get an Alternative instance for Get in binary 0.6,
 -- and binary 0.5 ships with ghc 7.6.
 
-exifHeader :: Get ImageSize
-exifHeader = do
+exifHeader :: BL.ByteString -> Get ImageSize
+exifHeader hdr = do
   _app1DataSize <- getWord16be
   exifHdr <- getWord32be
   unless (exifHdr == 0x45786966) $ fail "Did not find exif header"
@@ -198,7 +199,7 @@ exifHeader = do
   unless (zeros == 0) $ fail "Expected zeros after exif header"
   -- beginning of tiff header -- we read whole thing to use
   -- in getting data from offsets:
-  tiffHeader <- lookAhead getRemainingLazyByteString
+  let tiffHeader = BL.drop 8 hdr
   byteAlign <- getWord16be
   let bigEndian = byteAlign == 0x4d4d
   let (getWord16, getWord32, getWord64) =
-- 
cgit v1.2.3


From 3127ab2b5ede10224f9f86722125573d34d7d3dd Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Tue, 4 Feb 2014 10:05:52 -0800
Subject: Slight code reorganization in endline.

---
 src/Text/Pandoc/Readers/Markdown.hs | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 3feafd362..8a41cef49 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1540,16 +1540,14 @@ endline :: MarkdownParser (F Inlines)
 endline = try $ do
   newline
   notFollowedBy blankline
+  -- parse potential list-starts differently if in a list:
+  st <- getState
+  when (stateParserContext st == ListItemState) $ notFollowedBy listStart
   guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
   guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
   guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header
   guardEnabled Ext_backtick_code_blocks >>
      notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
-  -- parse potential list-starts differently if in a list:
-  st <- getState
-  when (stateParserContext st == ListItemState) $ do
-     notFollowedBy' bulletListStart
-     notFollowedBy' anyOrderedListStart
   (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
     <|> (guardEnabled Ext_ignore_line_breaks >> return mempty)
     <|> (return $ return B.space)
-- 
cgit v1.2.3


From 286781f8014cd40ad741e52b254904ffa7dc2855 Mon Sep 17 00:00:00 2001
From: Merijn Verstraaten <merijn@inconsistent.nl>
Date: Thu, 6 Feb 2014 23:10:59 +0100
Subject: Removed RenderState datatype context.

Reasoning:
    - It's not Haskell2010
    - It breaks some tools
    - Doesn't actually do anything
    - RenderState doesn't even have a Monoid instance
---
 src/Text/Pandoc/Pretty.hs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
index 033511832..5331587ce 100644
--- a/src/Text/Pandoc/Pretty.hs
+++ b/src/Text/Pandoc/Pretty.hs
@@ -81,8 +81,7 @@ import Data.String
 import Control.Monad.State
 import Data.Char (isSpace)
 
-data Monoid a =>
-     RenderState a = RenderState{
+data RenderState a = RenderState{
          output       :: [a]        -- ^ In reverse order
        , prefix       :: String
        , usePrefix    :: Bool
-- 
cgit v1.2.3


From 3f0fe345f9aa69d1faf36e6a6f913013f21b3749 Mon Sep 17 00:00:00 2001
From: Vaclav Zeman <vhaisman@gmail.com>
Date: Sat, 8 Feb 2014 13:40:04 +0100
Subject: Use \/ to avoid en-dash ligature instead of -{}-.

This is to fix LuaLaTeX output. The -{}- sequence does not avoid the
ligature with LuaLaTeX but \/ does.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 72b0bde6d..dbb9b477a 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -201,7 +201,7 @@ stringToLaTeX  ctx (x:xs) = do
        '_' | not isUrl -> "\\_" ++ rest
        '#' -> "\\#" ++ rest
        '-' -> case xs of   -- prevent adjacent hyphens from forming ligatures
-                   ('-':_) -> "-{}" ++ rest
+                   ('-':_) -> "-\\/" ++ rest
                    _       -> '-' : rest
        '~' | not isUrl -> "\\textasciitilde{}" ++ rest
        '^' -> "\\^{}" ++ rest
-- 
cgit v1.2.3


From fe246ce01c4c523b7391d58d910af09bf3bac6e6 Mon Sep 17 00:00:00 2001
From: Merijn Verstraaten <merijn@inconsistent.nl>
Date: Sat, 15 Feb 2014 17:51:33 +0100
Subject: Enhanced Pandoc's support for rST roles.

rST parser now supports:
    - All built-in rST roles
    - New role definition
    - Role inheritance

Issues/TODO:
    - Silently ignores illegal fields on roles
    - Silently drops class annotations for roles
    - Only supports :format: fields with a single format for :raw: roles,
      requires a change to Text.Pandoc.Definition.Format to support multiple
      formats.
    - Allows direct use of :raw: role, rST only allows indirect (i.e.,
      inherited use of :raw:).
---
 src/Text/Pandoc/Parsing.hs     |  2 +
 src/Text/Pandoc/Readers/RST.hs | 91 +++++++++++++++++++++++++++++++++++++-----
 2 files changed, 83 insertions(+), 10 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 2f21e1253..0713f4a96 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -853,6 +853,7 @@ data ParserState = ParserState
       stateHasChapters     :: Bool,          -- ^ True if \chapter encountered
       stateMacros          :: [Macro],       -- ^ List of macros defined so far
       stateRstDefaultRole  :: String,        -- ^ Current rST default interpreted text role
+      stateRstCustomRoles  :: M.Map String (String, Maybe String, Attr -> (String, Attr)), -- ^ Current rST custom text roles
       stateWarnings        :: [String]       -- ^ Warnings generated by the parser
     }
 
@@ -915,6 +916,7 @@ defaultParserState =
                   stateHasChapters     = False,
                   stateMacros          = [],
                   stateRstDefaultRole  = "title-reference",
+                  stateRstCustomRoles  = M.empty,
                   stateWarnings        = []}
 
 getOption :: (ReaderOptions -> a) -> Parser s ParserState a
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index c12a1493a..a46a3a6c6 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -36,12 +36,13 @@ import Text.Pandoc.Builder (setMeta, fromList)
 import Text.Pandoc.Shared
 import Text.Pandoc.Parsing
 import Text.Pandoc.Options
-import Control.Monad ( when, liftM, guard, mzero )
+import Control.Monad ( when, liftM, guard, mzero, mplus )
 import Data.List ( findIndex, intersperse, intercalate,
                    transpose, sort, deleteFirstsBy, isSuffixOf )
+import Data.Maybe (fromMaybe)
 import qualified Data.Map as M
 import Text.Printf ( printf )
-import Control.Applicative ((<$>), (<$), (<*), (*>))
+import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>))
 import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
 import qualified Text.Pandoc.Builder as B
 import Data.Monoid (mconcat, mempty)
@@ -530,7 +531,7 @@ directive' = do
   let body' = body ++ "\n\n"
   case label of
         "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body)
-        "role" -> return mempty
+        "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields
         "container" -> parseFromString parseBlocks body'
         "replace" -> B.para <$>  -- consumed by substKey
                    parseFromString (trimInlines . mconcat <$> many inline)
@@ -591,7 +592,38 @@ directive' = do
                           Nothing -> B.image src "" alt
         _     -> return mempty
 
--- Can contain haracter codes as decimal numbers or
+-- TODO:
+--  - Silently ignores illegal fields
+--  - Silently drops classes
+--  - Only supports :format: fields with a single format for :raw: roles,
+--    change Text.Pandoc.Definition.Format to fix
+addNewRole :: String -> [(String, String)] -> RSTParser Blocks
+addNewRole roleString fields = do
+    (role, parentRole) <- parseFromString inheritedRole roleString
+    customRoles <- stateRstCustomRoles <$> getState
+    baseRole <- case M.lookup parentRole customRoles of
+        Just (base, _, _) -> return base
+        Nothing -> return parentRole
+
+    let fmt = if baseRole == "raw" then lookup "format" fields else Nothing
+        annotate = maybe id addLanguage $
+            if baseRole == "code"
+               then lookup "language" fields
+               else Nothing
+
+    updateState $ \s -> s {
+        stateRstCustomRoles =
+          M.insert role (baseRole, fmt, (,) parentRole . annotate) customRoles
+    }
+
+    return $ B.singleton Null
+  where
+    addLanguage lang (ident, classes, keyValues) =
+        (ident, "sourceCode" : lang : classes, keyValues)
+    inheritedRole =
+        (,) <$> roleNameEndingIn (char '(') <*> roleNameEndingIn (char ')')
+
+-- Can contain character codes as decimal numbers or
 -- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u
 -- or as XML-style hexadecimal character entities, e.g. &#x1a2b;
 -- or text, which is used as-is.  Comments start with ..
@@ -930,17 +962,56 @@ strong = B.strong . trimInlines . mconcat <$>
 -- Note, this doesn't precisely implement the complex rule in
 -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
 -- but it should be good enough for most purposes
+--
+-- TODO:
+--  - Classes are silently discarded in addNewRole
+--  - Lacks sensible implementation for title-reference (which is the default)
+--  - Allows direct use of the :raw: role, rST only allows inherited use.
 interpretedRole :: RSTParser Inlines
 interpretedRole = try $ do
   (role, contents) <- roleBefore <|> roleAfter
-  case role of
-       "sup"  -> return $ B.superscript $ B.str contents
-       "sub"  -> return $ B.subscript $ B.str contents
-       "math" -> return $ B.math contents
-       _      -> return $ B.str contents  --unknown
+  renderRole contents Nothing role nullAttr
+
+renderRole :: String -> Maybe String -> String -> Attr -> RSTParser Inlines
+renderRole contents fmt role attr = case role of
+    "sup"  -> return $ B.superscript $ B.str contents
+    "superscript" -> return $ B.superscript $ B.str contents
+    "sub"  -> return $ B.subscript $ B.str contents
+    "subscript"  -> return $ B.subscript $ B.str contents
+    "emphasis" -> return $ B.emph $ B.str contents
+    "strong" -> return $ B.strong $ B.str contents
+    "rfc-reference" -> return $ rfcLink contents
+    "RFC" -> return $ rfcLink contents
+    "pep-reference" -> return $ pepLink contents
+    "PEP" -> return $ pepLink contents
+    "literal" -> return $ B.str contents
+    "math" -> return $ B.math contents
+    "title-reference" -> titleRef contents
+    "title" -> titleRef contents
+    "t" -> titleRef contents
+    "code" -> return $ B.codeWith attr contents
+    "raw" -> return $ B.rawInline (fromMaybe "" fmt) contents
+    custom -> do
+        customRole <- stateRstCustomRoles <$> getState
+        case M.lookup custom customRole of
+            Just (_, newFmt, inherit) -> let
+                fmtStr = fmt `mplus` newFmt
+                (newRole, newAttr) = inherit attr
+                in renderRole contents fmtStr newRole newAttr
+            Nothing -> return $ B.str contents -- Undefined role
+ where
+   titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
+   rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
+     where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html"
+   pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo)
+     where padNo = replicate (4 - length pepNo) '0' ++ pepNo
+           pepUrl = "http://http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
+
+roleNameEndingIn :: RSTParser Char -> RSTParser String
+roleNameEndingIn end = many1Till (letter <|> char '-') end
 
 roleMarker :: RSTParser String
-roleMarker = char ':' *> many1Till (letter <|> char '-') (char ':')
+roleMarker = char ':' *> roleNameEndingIn (char ':')
 
 roleBefore :: RSTParser (String,String)
 roleBefore = try $ do
-- 
cgit v1.2.3


From 66fd9bf7595cd97ccd24b0d64748c852dc604ce8 Mon Sep 17 00:00:00 2001
From: Merijn Verstraaten <merijn@inconsistent.nl>
Date: Sat, 15 Feb 2014 17:57:08 +0100
Subject: Clarified field values in RstCustomRoles.

---
 src/Text/Pandoc/Parsing.hs | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 0713f4a96..2bc351db3 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -854,6 +854,10 @@ data ParserState = ParserState
       stateMacros          :: [Macro],       -- ^ List of macros defined so far
       stateRstDefaultRole  :: String,        -- ^ Current rST default interpreted text role
       stateRstCustomRoles  :: M.Map String (String, Maybe String, Attr -> (String, Attr)), -- ^ Current rST custom text roles
+      -- Triple represents: 1) Base role, 2) Optional format (only for :raw:
+      -- roles), 3) Source language annotation for code (could be used to
+      -- annotate role classes too).
+
       stateWarnings        :: [String]       -- ^ Warnings generated by the parser
     }
 
-- 
cgit v1.2.3


From f6a020a906e5b03defa9924819b7f8cf2f626277 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Mon, 17 Feb 2014 15:18:52 -0800
Subject: HTML writer:  Fixed bug with unnumbered section headings.

Unnumbered section headings (with class 'unnumbered') were getting
numbers.  This commit fixes the bug.
---
 src/Text/Pandoc/Writers/HTML.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 805bb57f1..3ac2a836f 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -475,10 +475,11 @@ blockToHtml opts (BlockQuote blocks) =
      else do
        contents <- blockListToHtml opts blocks
        return $ H.blockquote $ nl opts >> contents >> nl opts
-blockToHtml opts (Header level (_,_,_) lst) = do
+blockToHtml opts (Header level (_,classes,_) lst) = do
   contents <- inlineListToHtml opts lst
   secnum <- liftM stSecNum get
   let contents' = if writerNumberSections opts && not (null secnum)
+                     && "unnumbered" `notElem` classes
                      then (H.span ! A.class_ "header-section-number" $ toHtml
                           $ showSecNum secnum) >> strToHtml " " >> contents
                      else contents
-- 
cgit v1.2.3


From f3a062d5f928ca46376f4fda0a04172d54540454 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 19 Feb 2014 09:11:15 -0800
Subject: Make rst figures true figures.  Closes #1168.

Thanks to CasperVector.
---
 src/Text/Pandoc/Readers/RST.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index a46a3a6c6..127eae167 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -581,7 +581,7 @@ directive' = do
         "figure" -> do
            (caption, legend) <- parseFromString extractCaption body'
            let src = escapeURI $ trim top
-           return $ B.para (B.image src "" caption) <> legend
+           return $ B.para (B.image src "fig:" caption) <> legend
         "image" -> do
            let src = escapeURI $ trim top
            let alt = B.str $ maybe "image" trim $ lookup "alt" fields
-- 
cgit v1.2.3


From a826d3936dcdefe630715007917dc676eb60861d Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Fri, 21 Feb 2014 17:32:02 -0800
Subject: Fixed bug in reference link parsing in markdown_mmd.

The bug was triggered by:

Link to [Google][]. Link to [twitter][].

[Google]: http://google.com
[twitter]: http://twitter.com
---
 src/Text/Pandoc/Readers/Markdown.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 8a41cef49..dd6fa733a 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -354,7 +354,7 @@ referenceKey = try $ do
   tit <- option "" referenceTitle
   -- currently we just ignore MMD-style link/image attributes
   _kvs <- option [] $ guardEnabled Ext_link_attributes
-                      >> many (spnl >> keyValAttr)
+                      >> many (try $ spnl >> keyValAttr)
   blanklines
   let target = (escapeURI $ trimr src,  tit)
   st <- getState
-- 
cgit v1.2.3


From 19b127b8986fb1a35e14c26808cf8247d6d6f3c4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 23 Feb 2014 20:35:07 -0800
Subject: PDF: Use ; for TEXINPUTS separator on Windows.

Closes #1151, I hope.  Testing needed.
---
 src/Text/Pandoc/PDF.hs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index 360338f8f..39442854d 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -149,7 +149,12 @@ runTeXProgram program runsLeft tmpDir source = do
     let programArgs = ["-halt-on-error", "-interaction", "nonstopmode",
          "-output-directory", tmpDir, file]
     env' <- getEnvironment
-    let texinputs = maybe (tmpDir ++ ":") ((tmpDir ++ ":") ++)
+#ifdef _WINDOWS
+    let sep = ";"
+#else
+    let sep = ":"
+#endif
+    let texinputs = maybe (tmpDir ++ sep) ((tmpDir ++ sep) ++)
           $ lookup "TEXINPUTS" env'
     let env'' = ("TEXINPUTS", texinputs) :
                   [(k,v) | (k,v) <- env', k /= "TEXINPUTS"]
-- 
cgit v1.2.3


From 69f7b1dbf3423960b148d6d11ad9f25fa6cc81a9 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 25 Feb 2014 22:43:58 -0800
Subject: Added readerTrace to ReaderOptions, --trace command line opt.

This is to debug backtracking-related parsing bugs.
So far it is only implemented for markdown, but it would
be good to extend it to latex and html readers.
---
 src/Text/Pandoc/Options.hs          |  2 ++
 src/Text/Pandoc/Readers/Markdown.hs | 12 +++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 5f65abdde..38220f542 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -210,6 +210,7 @@ data ReaderOptions = ReaderOptions{
        , readerIndentedCodeClasses :: [String] -- ^ Default classes for
                                        -- indented code blocks
        , readerDefaultImageExtension :: String -- ^ Default extension for images
+       , readerTrace           :: Bool -- ^ Print debugging info
 } deriving (Show, Read)
 
 instance Default ReaderOptions
@@ -225,6 +226,7 @@ instance Default ReaderOptions
                , readerApplyMacros           = True
                , readerIndentedCodeClasses   = []
                , readerDefaultImageExtension = ""
+               , readerTrace                 = False
                }
 
 --
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index dd6fa733a..c73c8f610 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -60,6 +60,8 @@ import System.FilePath (takeExtension, addExtension)
 import Text.HTML.TagSoup
 import Text.HTML.TagSoup.Match (tagOpen)
 import qualified Data.Set as Set
+import Text.Printf (printf)
+import Debug.Trace (trace)
 
 type MarkdownParser = Parser [Char] ParserState
 
@@ -440,7 +442,10 @@ parseBlocks :: MarkdownParser (F Blocks)
 parseBlocks = mconcat <$> manyTill block eof
 
 block :: MarkdownParser (F Blocks)
-block = choice [ mempty <$ blanklines
+block = do
+  tr <- getOption readerTrace
+  pos <- getPosition
+  res <- choice [ mempty <$ blanklines
                , codeBlockFenced
                , yamlMetaBlock
                , guardEnabled Ext_latex_macros *> (macro >>= return . return)
@@ -465,6 +470,11 @@ block = choice [ mempty <$ blanklines
                , para
                , plain
                ] <?> "block"
+  when tr $ do
+    st <- getState
+    trace (printf "line %d: %s" (sourceLine pos)
+           (take 60 $ show $ B.toList $ runF res st)) (return ())
+  return res
 
 --
 -- header blocks
-- 
cgit v1.2.3


From 581075a0ca7dcdae119e3634ff98a65e79e23256 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 26 Feb 2014 22:24:50 -0800
Subject: Markdown reader:  small efficiency improvement.

Switched `notFollewdBy' rawHtmlBlocks` ->
`notFollowedBy' (htmlTag isBlockTag)`, which is more
efficient.
---
 src/Text/Pandoc/Readers/Markdown.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index c73c8f610..d74b32bed 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1384,7 +1384,7 @@ ltSign :: MarkdownParser (F Inlines)
 ltSign = do
   guardDisabled Ext_raw_html
     <|> guardDisabled Ext_markdown_in_html_blocks
-    <|> (notFollowedBy' rawHtmlBlocks >> return ())
+    <|> (notFollowedBy' (htmlTag isBlockTag) >> return ())
   char '<'
   return $ return $ B.str "<"
 
-- 
cgit v1.2.3


From a208a972c318c1cb37551d9662a61b7c7ab35510 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 26 Feb 2014 22:46:38 -0800
Subject: Markdown parser:  avoid backtracking when closing `</div>` not found.

---
 src/Text/Pandoc/Readers/Markdown.hs | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index d74b32bed..9feb46f97 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1738,12 +1738,19 @@ spanHtml = try $ do
 divHtml :: MarkdownParser (F Blocks)
 divHtml = try $ do
   guardEnabled Ext_markdown_in_html_blocks
-  (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "div" [])
-  contents <- mconcat <$> manyTill block (htmlTag (~== TagClose "div"))
-  let ident = fromMaybe "" $ lookup "id" attrs
-  let classes = maybe [] words $ lookup "class" attrs
-  let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
-  return $ B.divWith (ident, classes, keyvals) <$> contents
+  (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
+  bls <- option "" (blankline >> option "" blanklines)
+  contents <- mconcat <$>
+              many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block)
+  closed <- option False (True <$ htmlTag (~== TagClose "div"))
+  if closed
+     then do
+       let ident = fromMaybe "" $ lookup "id" attrs
+       let classes = maybe [] words $ lookup "class" attrs
+       let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+       return $ B.divWith (ident, classes, keyvals) <$> contents
+     else -- avoid backtracing
+       return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
 
 rawHtmlInline :: MarkdownParser (F Inlines)
 rawHtmlInline = do
-- 
cgit v1.2.3


From 4d0bf3c5d685cbee3b13f562503a572af803ab95 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 26 Feb 2014 22:53:12 -0800
Subject: Markdown reader: Improved parsing of nested divs.

Formerly a closing div tag would be missed if it came right
after other block-level tags.
---
 src/Text/Pandoc/Readers/Markdown.hs | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 9feb46f97..0ea7f9ac5 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -954,6 +954,8 @@ rawHtmlBlocks = do
   htmlBlocks <- many1 $ try $ do
                           s <- rawVerbatimBlock <|> try (
                                 do (t,raw) <- htmlTag isBlockTag
+                                   guard $ t ~/= TagOpen "div" [] &&
+                                           t ~/= TagClose "div"
                                    exts <- getOption readerExtensions
                                    -- if open tag, need markdown="1" if
                                    -- markdown_attributes extension is set
-- 
cgit v1.2.3


From 80511f1b34d082742d78d9745469eb8c63592a9c Mon Sep 17 00:00:00 2001
From: mb21 <mb21@server.fake>
Date: Sun, 1 Dec 2013 21:11:39 +0100
Subject: InDesign ICML Writer

---
 src/Text/Pandoc.hs              |   3 +
 src/Text/Pandoc/Writers/ICML.hs | 525 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 528 insertions(+)
 create mode 100644 src/Text/Pandoc/Writers/ICML.hs

(limited to 'src/Text')

diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 703bb876a..2c90fd09b 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -85,6 +85,7 @@ module Text.Pandoc
                , writeTexinfo
                , writeHtml
                , writeHtmlString
+               , writeICML
                , writeDocbook
                , writeOPML
                , writeOpenDocument
@@ -133,6 +134,7 @@ import Text.Pandoc.Writers.ODT
 import Text.Pandoc.Writers.Docx
 import Text.Pandoc.Writers.EPUB
 import Text.Pandoc.Writers.FB2
+import Text.Pandoc.Writers.ICML
 import Text.Pandoc.Writers.Docbook
 import Text.Pandoc.Writers.OPML
 import Text.Pandoc.Writers.OpenDocument
@@ -226,6 +228,7 @@ writers = [
   ,("html"         , PureStringWriter writeHtmlString)
   ,("html5"        , PureStringWriter $ \o ->
      writeHtmlString o{ writerHtml5 = True })
+  ,("icml"         , PureStringWriter writeICML)
   ,("s5"           , PureStringWriter $ \o ->
      writeHtmlString o{ writerSlideVariant = S5Slides
                       , writerTableOfContents = False })
diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs
new file mode 100644
index 000000000..19d486b25
--- /dev/null
+++ b/src/Text/Pandoc/Writers/ICML.hs
@@ -0,0 +1,525 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+{- |
+   Module      : Text.Pandoc.Writers.ICML
+   Copyright   : Copyright (C) 2013 github.com/mb21
+   License     : GNU GPL, version 2 or above
+
+   Stability   : alpha
+
+Conversion of 'Pandoc' documents to Adobe InCopy ICML, a stand-alone XML format
+which is a subset of the zipped IDML format for which the documentation is
+available here: http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/indesign/sdk/cs6/idml/idml-specification.pdf
+InCopy is the companion word-processor to Adobe InDesign and ICML documents can be integrated
+into InDesign with File -> Place.
+-}
+module Text.Pandoc.Writers.ICML (writeICML) where
+import Text.Pandoc.Definition
+import Text.Pandoc.XML
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Shared (splitBy)
+import Text.Pandoc.Options
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Pretty
+import Data.List (isPrefixOf, isInfixOf, stripPrefix)
+import Data.Text as Text (breakOnAll, pack)
+import Data.Monoid (mappend)
+import Control.Monad.State
+import qualified Data.Set as Set
+
+type Style = [String]
+type Hyperlink = [(Int, String)]
+
+data WriterState = WriterState{
+    blockStyles  :: Set.Set String
+  , inlineStyles :: Set.Set String
+  , links        :: Hyperlink
+  , listDepth    :: Int
+  , maxListDepth :: Int
+  }
+
+type WS a = State WriterState a
+
+defaultWriterState :: WriterState
+defaultWriterState = WriterState{
+    blockStyles  = Set.empty 
+  , inlineStyles = Set.empty
+  , links        = []
+  , listDepth    = 1
+  , maxListDepth = 0
+  }
+
+-- inline names (appear in InDesign's character styles pane)
+emphName        :: String
+strongName      :: String
+strikeoutName   :: String
+superscriptName :: String
+subscriptName   :: String
+smallCapsName   :: String
+codeName        :: String
+linkName        :: String
+emphName        = "Italic"
+strongName      = "Bold"
+strikeoutName   = "Strikeout"
+superscriptName = "Superscript"
+subscriptName   = "Subscript"
+smallCapsName   = "SmallCaps"
+codeName        = "Code"
+linkName        = "Link"
+
+-- block element names (appear in InDesign's paragraph styles pane)
+paragraphName     :: String
+codeBlockName     :: String
+rawBlockName      :: String
+blockQuoteName    :: String
+orderedListName   :: String
+bulletListName    :: String
+defListTermName   :: String
+defListDefName    :: String
+headerName        :: String
+tableName         :: String
+tableHeaderName   :: String
+tableCaptionName  :: String
+alignLeftName     :: String
+alignRightName    :: String
+alignCenterName   :: String
+firstListItemName :: String
+beginsWithName    :: String
+lowerRomanName    :: String
+upperRomanName    :: String
+lowerAlphaName    :: String
+upperAlphaName    :: String
+subListParName    :: String
+footnoteName      :: String
+paragraphName     = "Paragraph"
+codeBlockName     = "CodeBlock"
+rawBlockName      = "Rawblock"
+blockQuoteName    = "Blockquote"
+orderedListName   = "NumList"
+bulletListName    = "BulList"
+defListTermName   = "DefListTerm"
+defListDefName    = "DefListDef"
+headerName        = "Header"
+tableName         = "TablePar"
+tableHeaderName   = "TableHeader"
+tableCaptionName  = "TableCaption"
+alignLeftName     = "LeftAlign"
+alignRightName    = "RightAlign"
+alignCenterName   = "CenterAlign"
+firstListItemName = "first"
+beginsWithName    = "beginsWith-"
+lowerRomanName    = "lowerRoman"
+upperRomanName    = "upperRoman"
+lowerAlphaName    = "lowerAlpha"
+upperAlphaName    = "upperAlpha"
+subListParName    = "subParagraph"
+footnoteName      = "Footnote"
+
+
+-- | Convert Pandoc document to string in ICML format.
+writeICML :: WriterOptions -> Pandoc -> String
+writeICML opts (Pandoc meta blocks) =
+  let colwidth = if writerWrapText opts
+                    then Just $ writerColumns opts
+                    else Nothing
+      render' = render colwidth
+      renderMeta f s = Just $ render' $ fst $ runState (f opts [] s) defaultWriterState
+      Just metadata = metaToJSON opts
+                 (renderMeta blocksToICML)
+                 (renderMeta inlinesToICML)
+                 meta
+      (doc, st) = runState (blocksToICML opts [] blocks) defaultWriterState
+      main    = render' doc
+      context = defField "body" main
+              $ defField "charStyles" (render' $ charStylesToDoc st)
+              $ defField "parStyles"  (render' $ parStylesToDoc st)
+              $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st)
+              $ metadata
+  in  if writerStandalone opts
+         then renderTemplate' (writerTemplate opts) context
+         else main
+
+-- | Auxilary functions for parStylesToDoc and charStylesToDoc.
+contains :: String -> (String, (String, String)) -> [(String, String)]
+contains s rule =
+  if isInfixOf (fst rule) s
+     then [snd rule]
+     else []
+
+-- | The monospaced font to use as default.
+monospacedFont :: Doc
+monospacedFont = inTags False "AppliedFont" [("type", "string")] $ text "Courier New"
+
+-- | How much to indent blockquotes etc.
+defaultIndent :: Int
+defaultIndent = 20
+
+-- | How much to indent numbered lists before the number.
+defaultListIndent :: Int
+defaultListIndent = 10
+
+-- other constants
+lineSeparator :: String
+lineSeparator = "&#x2028;"
+
+-- | Convert a WriterState with its block styles to the ICML listing of Paragraph Styles.
+parStylesToDoc :: WriterState -> Doc
+parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st
+  where
+    makeStyle s =
+      let countSubStrs sub str = length $ Text.breakOnAll (Text.pack sub) (Text.pack str)
+          attrs = concat $ map (contains s) $ [
+                               (defListTermName, ("BulletsAndNumberingListType", "BulletList"))
+                             , (defListTermName, ("FontStyle", "Bold"))
+                             , (tableHeaderName, ("FontStyle", "Bold"))
+                             , (alignLeftName,   ("Justification", "LeftAlign"))
+                             , (alignRightName,  ("Justification", "RightAlign"))
+                             , (alignCenterName, ("Justification", "CenterAlign"))
+                             , (headerName++"1", ("PointSize", "36"))
+                             , (headerName++"2", ("PointSize", "30"))
+                             , (headerName++"3", ("PointSize", "24"))
+                             , (headerName++"4", ("PointSize", "18"))
+                             , (headerName++"5", ("PointSize", "14"))
+                             ]
+          -- what is the most nested list type, if any?
+          (isBulletList, isOrderedList) = findList $ reverse $ splitBy (==' ') s
+            where
+              findList [] = (False, False)
+              findList (x:xs) | x == bulletListName  = (True, False)
+                              | x == orderedListName = (False, True)
+                              | otherwise = findList xs
+          nBuls = countSubStrs bulletListName s
+          nOrds = countSubStrs orderedListName s
+          attrs' = numbering ++ listType ++ indent ++ attrs
+            where
+              numbering | isOrderedList = [("NumberingExpression", "^#.^t"), ("NumberingLevel", show nOrds)]
+                        | otherwise     = []
+              listType | isOrderedList && (not $ isInfixOf subListParName s)
+                           = [("BulletsAndNumberingListType", "NumberedList")]
+                       | isBulletList && (not $ isInfixOf subListParName s)
+                           = [("BulletsAndNumberingListType", "BulletList")]
+                       | otherwise = []
+              indent = [("LeftIndent", show indt)]
+                where
+                  nBlockQuotes = countSubStrs blockQuoteName s
+                  nDefLists = countSubStrs defListDefName s
+                  indt = max 0 $ defaultListIndent*(nBuls + nOrds - 1) + defaultIndent*(nBlockQuotes + nDefLists)
+          props = inTags True "Properties" [] $ (basedOn $$ tabList $$ numbForm)
+            where
+              font = if isInfixOf codeBlockName s
+                        then monospacedFont
+                        else empty
+              basedOn = inTags False "BasedOn" [("type", "object")] (text "$ID/NormalParagraphStyle") $$ font
+              tabList = if isBulletList
+                           then inTags True "TabList" [("type","list")] $ inTags True "ListItem" [("type","record")]
+                                $ vcat [
+                                    inTags False "Alignment" [("type","enumeration")] $ text "LeftAlign"
+                                  , inTags False "AlignmentCharacter" [("type","string")] $ text "."
+                                  , selfClosingTag "Leader" [("type","string")]
+                                  , inTags False "Position" [("type","unit")] $ text
+                                      $ show $ defaultListIndent * (nBuls + nOrds)
+                                  ]
+                           else empty
+              makeNumb name = inTags False "NumberingFormat" [("type", "string")] (text name)
+              numbForm | isInfixOf lowerRomanName s = makeNumb "i, ii, iii, iv..."
+                       | isInfixOf upperRomanName s = makeNumb "I, II, III, IV..."
+                       | isInfixOf lowerAlphaName s = makeNumb "a, b, c, d..."
+                       | isInfixOf upperAlphaName s = makeNumb "A, B, C, D..."
+                       | otherwise = empty
+      in  inTags True "ParagraphStyle" ([("Self", "ParagraphStyle/"++s), ("Name", s)] ++ attrs') props
+
+-- | Convert a WriterState with its inline styles to the ICML listing of Character Styles.
+charStylesToDoc :: WriterState -> Doc
+charStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ inlineStyles st
+  where
+    makeStyle s =
+      let attrs = concat $ map (contains s) [
+                               (strikeoutName,   ("StrikeThru", "true"))
+                             , (superscriptName, ("Position", "Superscript"))
+                             , (subscriptName,   ("Position", "Subscript"))
+                             , (smallCapsName,   ("Capitalization", "SmallCaps"))
+                             ]
+          attrs' | isInfixOf emphName s && isInfixOf strongName s = ("FontStyle", "Bold Italic") : attrs
+                 | isInfixOf strongName s                         = ("FontStyle", "Bold") : attrs
+                 | isInfixOf emphName s                           = ("FontStyle", "Italic") : attrs
+                 | otherwise                                      = attrs
+          props = inTags True "Properties" [] $
+                    inTags False "BasedOn" [("type", "object")] (text "$ID/NormalCharacterStyle") $$ font
+                  where
+                    font =
+                      if isInfixOf codeName s
+                         then monospacedFont
+                         else empty
+      in  inTags True "CharacterStyle" ([("Self", "CharacterStyle/"++s), ("Name", s)] ++ attrs') props
+
+-- | Convert a list of (identifier, url) pairs to the ICML listing of hyperlinks.
+hyperlinksToDoc :: Hyperlink -> Doc
+hyperlinksToDoc []     = empty
+hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs
+  where
+    hyp (ident, url) = hdest $$ hlink
+      where
+        hdest = selfClosingTag "HyperlinkURLDestination"
+                  [("Self", "HyperlinkURLDestination/"++url), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")]
+        hlink = inTags True "Hyperlink" [("Self","uf-"++show ident),  ("Name",url),
+                    ("Source","htss-"++show ident), ("Visible","true"), ("DestinationUniqueKey","1")]
+                  $ inTags True "Properties" []
+                  $ inTags False "BorderColor" [("type","enumeration")] (text "Black")
+                  $$ (inTags False "Destination" [("type","object")]
+                  $ text $ "HyperlinkURLDestination/"++(escapeStringForXML url))
+  
+
+-- | Convert a list of Pandoc blocks to ICML.
+blocksToICML :: WriterOptions -> Style -> [Block] -> WS Doc
+blocksToICML opts style lst = vcat `fmap` mapM (blockToICML opts style) lst
+
+-- | Convert a Pandoc block element to ICML.
+blockToICML :: WriterOptions -> Style -> Block -> WS Doc
+blockToICML opts style (Plain lst) = parStyle opts style lst
+blockToICML opts style (Para lst) = parStyle opts (paragraphName:style) lst
+blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) $ [Str str]
+blockToICML opts style (RawBlock _ str) = parStyle opts (rawBlockName:style) $ [Str str]
+blockToICML opts style (BlockQuote blocks) = blocksToICML opts (blockQuoteName:style) blocks
+blockToICML opts style (OrderedList attribs lst) = listItemsToICML opts orderedListName style (Just attribs) lst
+blockToICML opts style (BulletList lst) = listItemsToICML opts bulletListName style Nothing lst
+blockToICML opts style (DefinitionList lst) = vcat `fmap` mapM (definitionListItemToICML opts style) lst
+blockToICML opts style (Header lvl _ lst) =
+  let stl = (headerName ++ show lvl):style
+  in parStyle opts stl lst
+blockToICML _ _ HorizontalRule = return empty -- we could insert a page break instead
+blockToICML opts style (Table caption aligns widths headers rows) =
+  let style' = tableName : style
+      noHeader  = all null headers
+      nrHeaders = if noHeader
+                     then "0"
+                     else "1"
+      nrRows = length rows
+      nrCols = if null rows
+                  then 0
+                  else length $ head rows
+      rowsToICML [] _ = return empty
+      rowsToICML (col:rest) rowNr =
+        liftM2 ($$) (colsToICML col rowNr (0::Int)) $ rowsToICML rest (rowNr+1)
+      colsToICML [] _ _ = return empty
+      colsToICML (cell:rest) rowNr colNr = do
+        let stl  = if rowNr == 0 && not noHeader
+                      then tableHeaderName:style'
+                      else style'
+            alig = aligns !! colNr
+            stl' | alig == AlignLeft = alignLeftName : stl
+                 | alig == AlignRight = alignRightName : stl
+                 | alig == AlignCenter = alignCenterName : stl
+                 | otherwise = stl
+        c <- blocksToICML opts stl' cell
+        let cl = return $ inTags True "Cell"
+                   [("Name", show colNr ++":"++ show rowNr), ("AppliedCellStyle","CellStyle/Cell")] c
+        liftM2 ($$) cl $ colsToICML rest rowNr (colNr+1)
+  in  do
+      let tabl = if noHeader
+                    then rows
+                    else headers:rows
+      cells <- rowsToICML tabl (0::Int)
+      let colWidths w = if w > 0
+                           then [("SingleColumnWidth",show $ 500 * w)]
+                           else []
+      let tupToDoc tup = selfClosingTag "Column" $ [("Name",show $ fst tup)] ++ (colWidths $ snd tup)
+      let colDescs = vcat $ map tupToDoc $ zip [0..nrCols-1] widths
+      let tableDoc = return $ inTags True "Table" [
+                         ("AppliedTableStyle","TableStyle/Table")
+                       , ("HeaderRowCount", nrHeaders)
+                       , ("BodyRowCount", show nrRows)
+                       , ("ColumnCount", show nrCols)
+                       ] (colDescs $$ cells)
+      liftM2 ($$) tableDoc $ parStyle opts (tableCaptionName:style) caption
+blockToICML opts style (Div _ lst) = blocksToICML opts style lst
+blockToICML _ _ Null = return empty
+
+-- | Convert a list of lists of blocks to ICML list items.
+listItemsToICML :: WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS Doc
+listItemsToICML _ _ _ _ [] = return empty
+listItemsToICML opts listType style attribs (first:rest) = do
+  st <- get
+  put st{ listDepth = 1 + listDepth st}
+  let stl = listType:style
+  let f = listItemToICML opts stl True attribs first
+  let r = map (listItemToICML opts stl False attribs) rest
+  docs <- sequence $ f:r
+  s    <- get
+  let maxD = max (maxListDepth s) (listDepth s)
+  put s{ listDepth = 1, maxListDepth = maxD }
+  return $ vcat docs
+
+-- | Convert a list of blocks to ICML list items.
+listItemToICML :: WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS Doc
+listItemToICML opts style isFirst attribs item =
+  let makeNumbStart (Just (beginsWith, numbStl, _)) = 
+        let doN DefaultStyle = []
+            doN LowerRoman = [lowerRomanName]
+            doN UpperRoman = [upperRomanName]
+            doN LowerAlpha = [lowerAlphaName]
+            doN UpperAlpha = [upperAlphaName]
+            doN _ = []
+            bw = if beginsWith > 1
+                    then [beginsWithName ++ show beginsWith]
+                    else []
+        in  doN numbStl ++ bw
+      makeNumbStart Nothing = []
+      stl = if isFirst
+               then firstListItemName:style
+               else style
+      stl' = makeNumbStart attribs ++ stl
+  in  if length item > 1
+         then do
+           let insertTab (Para lst) = blockToICML opts (subListParName:style) $ Para $ (Str "\t"):lst
+               insertTab block      = blockToICML opts style block
+           f <- blockToICML opts stl' $ head item
+           r <- fmap vcat $ mapM insertTab $ tail item
+           return $ f $$ r
+         else blocksToICML opts stl' item
+
+definitionListItemToICML :: WriterOptions -> Style -> ([Inline],[[Block]]) -> WS Doc
+definitionListItemToICML opts style (term,defs) = do
+  term' <- parStyle opts (defListTermName:style) term
+  defs' <- vcat `fmap` mapM (blocksToICML opts (defListDefName:style)) defs
+  return $ term' $$ defs'
+
+
+-- | Convert a list of inline elements to ICML.
+inlinesToICML :: WriterOptions -> Style -> [Inline] -> WS Doc
+inlinesToICML opts style lst = vcat `fmap` mapM (inlineToICML opts style) (mergeSpaces lst)
+
+-- | Convert an inline element to ICML.
+inlineToICML :: WriterOptions -> Style -> Inline -> WS Doc
+inlineToICML _    style (Str str) = charStyle style $ text $ escapeStringForXML str
+inlineToICML opts style (Emph lst) = inlinesToICML opts (emphName:style) lst
+inlineToICML opts style (Strong lst) = inlinesToICML opts (strongName:style) lst
+inlineToICML opts style (Strikeout lst) = inlinesToICML opts (strikeoutName:style) lst
+inlineToICML opts style (Superscript lst) = inlinesToICML opts (superscriptName:style) lst
+inlineToICML opts style (Subscript lst) = inlinesToICML opts (subscriptName:style) lst
+inlineToICML opts style (SmallCaps lst) = inlinesToICML opts (smallCapsName:style) lst
+inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $ [Str "‘"] ++ lst ++ [Str "’"]
+inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ [Str "“"] ++ lst ++ [Str "”"]
+inlineToICML opts style (Cite _ lst) = footnoteToICML opts style [Para lst]
+inlineToICML _    style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str
+inlineToICML _    style Space = charStyle style space
+inlineToICML _ style LineBreak = charStyle style $ text lineSeparator
+inlineToICML _ style (Math _ str) = charStyle style $ text $ escapeStringForXML str --InDesign doesn't really do math
+inlineToICML _ style (RawInline _ str) = charStyle style $ text $ escapeStringForXML str
+inlineToICML opts style (Link lst (url, title)) = do
+  content <- inlinesToICML opts (linkName:style) lst
+  state $ \st ->
+            let ident = if null $ links st
+                           then 1::Int
+                           else 1 + (fst $ head $ links st)
+                newst = st{ links = (ident, url):(links st) }
+                cont  = inTags True "HyperlinkTextSource"
+                         [("Self","htss-"++show ident), ("Name",title), ("Hidden","false")] content
+            in  (cont, newst)
+inlineToICML opts style (Image alt target) = imageICML opts style alt target
+inlineToICML opts style (Note lst) = footnoteToICML opts style lst
+inlineToICML opts style (Span _ lst) = inlinesToICML opts style lst
+
+-- | Convert a list of block elements to an ICML footnote.
+footnoteToICML :: WriterOptions -> Style -> [Block] -> WS Doc
+footnoteToICML opts style lst =
+  let insertTab (Para ls) = blockToICML opts (footnoteName:style) $ Para $ (Str "\t"):ls
+      insertTab block     = blockToICML opts (footnoteName:style) block
+  in  do
+    contents <- mapM insertTab lst
+    let number = inTags True "ParagraphStyleRange" [] $
+                   inTags True "CharacterStyleRange" [] $ inTagsSimple "Content" "<?ACE 4?>"
+    return $ inTags True "CharacterStyleRange"
+      [("AppliedCharacterStyle","$ID/NormalCharacterStyle"), ("Position","Superscript")]
+      $ inTags True "Footnote" [] $ number $$ vcat contents
+
+-- | Auxiliary function to merge Space elements into the adjacent Strs.
+mergeSpaces :: [Inline] -> [Inline]
+mergeSpaces ((Str s):(Space:((Str s'):xs))) = mergeSpaces $ Str(s++" "++s') : xs
+mergeSpaces (Space:((Str s):xs)) = mergeSpaces $ Str (" "++s) : xs
+mergeSpaces ((Str s):(Space:xs)) = mergeSpaces $ Str (s++" ") : xs
+mergeSpaces (x:xs) = x : (mergeSpaces xs)
+mergeSpaces []     = []
+
+-- | Wrap a list of inline elements in an ICML Paragraph Style
+parStyle :: WriterOptions -> Style -> [Inline] -> WS Doc
+parStyle opts style lst =
+  let slipIn x y = if null y
+                      then x
+                      else x ++ " > " ++ y
+      stlStr = foldr slipIn [] $ reverse style
+      stl    = if null stlStr
+                  then ""
+                  else "ParagraphStyle/" ++ stlStr
+      attrs  = ("AppliedParagraphStyle", stl)
+      attrs' =  if firstListItemName `elem` style
+                   then let ats = attrs : [("NumberingContinue", "false")]
+                            begins = filter (isPrefixOf beginsWithName) style
+                        in  if null begins
+                               then ats
+                               else let i = maybe "" id $ stripPrefix beginsWithName $ head begins
+                                    in  ("NumberingStartAt", i) : ats
+                   else [attrs]
+  in  do
+      content <- inlinesToICML opts [] lst
+      let cont = inTags True "ParagraphStyleRange" attrs'
+                   $ mappend content $ selfClosingTag "Br" []
+      state $ \st -> (cont, st{ blockStyles = Set.insert stlStr $ blockStyles st })
+
+-- | Wrap a Doc in an ICML Character Style.
+charStyle :: Style -> Doc -> WS Doc
+charStyle style content = 
+  let (stlStr, attrs) = styleToStrAttr style
+      doc = inTags True "CharacterStyleRange" attrs $ inTagsSimple "Content" $ flush content
+  in  do
+      state $ \st ->
+        let styles = if null stlStr
+                        then st
+                        else st{ inlineStyles = Set.insert stlStr $ inlineStyles st }
+        in  (doc, styles)
+
+-- | Transform a Style to a tuple of String (eliminating duplicates and ordered) and corresponding attribute.
+styleToStrAttr :: Style -> (String, [(String, String)])
+styleToStrAttr style =
+  let stlStr = unwords $ Set.toAscList $ Set.fromList style
+      stl    = if null style
+                  then "$ID/NormalCharacterStyle"
+                  else "CharacterStyle/" ++ stlStr
+      attrs = [("AppliedCharacterStyle", stl)]
+  in  (stlStr, attrs)
+
+-- | Assemble an ICML Image.
+imageICML :: WriterOptions -> Style -> [Inline] -> Target -> WS Doc
+imageICML _ style _ (linkURI, _) =
+  let imgWidth  = 300::Int --TODO: set width, height dynamically as in Docx.hs
+      imgHeight = 200::Int
+      scaleFact = show (1::Double) --TODO: set scaling factor so image is scaled exactly to imgWidth x imgHeight
+      hw = show $ imgWidth  `div` 2
+      hh = show $ imgHeight `div` 2
+      qw = show $ imgWidth  `div` 4
+      qh = show $ imgHeight `div` 4
+      (stlStr, attrs) = styleToStrAttr style
+      props  = inTags True "Properties" [] $ inTags True "PathGeometry" []
+                 $ inTags True "GeometryPathType" [("PathOpen","false")]
+                 $ inTags True "PathPointArray" []
+                 $ vcat [
+                     selfClosingTag "PathPointType" [("Anchor", "-"++qw++" -"++qh),
+                       ("LeftDirection", "-"++qw++" -"++qh), ("RightDirection", "-"++qw++" -"++qh)]
+                   , selfClosingTag "PathPointType" [("Anchor", "-"++qw++" "++qh),
+                       ("LeftDirection", "-"++qw++" "++qh), ("RightDirection", "-"++qw++" "++qh)]
+                   , selfClosingTag "PathPointType" [("Anchor", qw++" "++qh),
+                       ("LeftDirection", qw++" "++qh), ("RightDirection", qw++" "++qh)]
+                   , selfClosingTag "PathPointType" [("Anchor", qw++" -"++qh),
+                       ("LeftDirection", qw++" -"++qh), ("RightDirection", qw++" -"++qh)]
+                   ]
+      image  = inTags True "Image"
+                   [("Self","ue6"), ("ItemTransform", scaleFact++" 0 0 "++scaleFact++" -"++qw++" -"++qh)]
+                 $ vcat [
+                     inTags True "Properties" [] $ inTags True "Profile" [("type","string")] $ text "$ID/Embedded"
+                       $$ selfClosingTag "GraphicBounds" [("Left","0"), ("Top","0"), ("Right", hw), ("Bottom", hh)]
+                   , selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", linkURI)]
+                   ]
+      doc    = inTags True "CharacterStyleRange" attrs
+                 $ inTags True "Rectangle" [("Self","uec"), ("ItemTransform", "1 0 0 1 "++qw++" -"++qh)]
+                 $ (props $$ image)
+  in  do
+      state $ \st -> (doc, st{ inlineStyles = Set.insert stlStr $ inlineStyles st } )
-- 
cgit v1.2.3


From 24b2ac43b0a8596f7baea10579c95ee75b6e584f Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Tue, 4 Mar 2014 00:33:25 +0100
Subject: Add a simple Emacs Org-mode reader

The basic structure of org-mode documents is recognized; however,
org-mode features like todo markers, tags etc. are not supported yet.
---
 src/Text/Pandoc.hs             |   3 +
 src/Text/Pandoc/Readers/Org.hs | 552 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 555 insertions(+)
 create mode 100644 src/Text/Pandoc/Readers/Org.hs

(limited to 'src/Text')

diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 3ae81db00..e511ed861 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -65,6 +65,7 @@ module Text.Pandoc
                , readMarkdown
                , readMediaWiki
                , readRST
+               , readOrg
                , readLaTeX
                , readHtml
                , readTextile
@@ -115,6 +116,7 @@ import Text.Pandoc.JSON
 import Text.Pandoc.Readers.Markdown
 import Text.Pandoc.Readers.MediaWiki
 import Text.Pandoc.Readers.RST
+import Text.Pandoc.Readers.Org
 import Text.Pandoc.Readers.DocBook
 import Text.Pandoc.Readers.OPML
 import Text.Pandoc.Readers.LaTeX
@@ -201,6 +203,7 @@ readers = [ ("native"       , \_ s -> return $ readNative s)
            ,("mediawiki"    , \o s -> return $ readMediaWiki o s)
            ,("docbook"      , \o s -> return $ readDocBook o s)
            ,("opml"         , \o s -> return $ readOPML o s)
+           ,("org"          , \o s -> return $ readOrg o s)
            ,("textile"      , \o s -> return $ readTextile o s) -- TODO : textile+lhs
            ,("html"         , \o s -> return $ readHtml o s)
            ,("latex"        , \o s -> return $ readLaTeX o s)
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
new file mode 100644
index 000000000..5dc250f04
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -0,0 +1,552 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.Org
+   Copyright   : Copyright (C) 2014 Albert Krewinkel
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Albert Krewinkel <tarleb@moltkeplatz.de>
+
+Conversion of Org-Mode to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Org ( readOrg ) where
+
+import qualified Text.Pandoc.Builder as B
+import           Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
+import           Text.Pandoc.Definition
+import           Text.Pandoc.Options
+import           Text.Pandoc.Parsing hiding (orderedListMarker)
+import           Text.Pandoc.Shared (compactify')
+
+import           Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>))
+import           Control.Monad (guard, mzero)
+import           Data.Char (toLower)
+import           Data.List (foldl')
+import           Data.Maybe (listToMaybe, fromMaybe)
+import           Data.Monoid (mconcat, mempty, mappend)
+
+-- | Parse org-mode string and return a Pandoc document.
+readOrg :: ReaderOptions -- ^ Reader options
+        -> String        -- ^ String to parse (assuming @'\n'@ line endings)
+        -> Pandoc
+readOrg opts s = (readWith parseOrg) def{ stateOptions = opts } (s ++ "\n\n")
+
+type OrgParser = Parser [Char] ParserState
+
+parseOrg:: OrgParser Pandoc
+parseOrg = do
+  blocks' <- B.toList <$> parseBlocks
+  st <- getState
+  let meta = stateMeta st
+  return $ Pandoc meta $ filter (/= Null) blocks'
+
+--
+-- parsing blocks
+--
+
+parseBlocks :: OrgParser Blocks
+parseBlocks = mconcat <$> manyTill block eof
+
+block :: OrgParser Blocks
+block = choice [ mempty <$ blanklines
+               , orgBlock
+               , example
+               , drawer
+               , specialLine
+               , header
+               , hline
+               , list
+               , table
+               , paraOrPlain
+               ] <?> "block"
+
+--
+-- Org Blocks (#+BEGIN_... / #+END_...)
+--
+
+orgBlock :: OrgParser Blocks
+orgBlock = try $ do
+  (indent, blockType, args) <- blockHeader
+  blockStr <- rawBlockContent indent blockType
+  let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ]
+  case blockType of
+    "comment" -> return mempty
+    "src"     -> return $ B.codeBlockWith ("", classArgs, []) blockStr
+    _         -> B.divWith ("", [blockType], [])
+                            <$> (parseFromString parseBlocks blockStr)
+
+blockHeader :: OrgParser (Int, String, [String])
+blockHeader = (,,) <$> blockIndent
+                   <*> blockType
+                   <*> (skipSpaces *> blockArgs)
+ where blockIndent = length <$> many spaceChar
+       blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter)
+       blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline
+
+rawBlockContent :: Int -> String -> OrgParser String
+rawBlockContent indent blockType =
+  unlines . map commaEscaped <$> manyTill indentedLine blockEnder
+ where
+   indentedLine = try $ choice [ blankline         *> pure "\n"
+                               , indentWith indent *> anyLine
+                               ]
+   blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType)
+
+-- indent by specified number of spaces (or equiv. tabs)
+indentWith :: Int -> OrgParser String
+indentWith num = do
+  tabStop <- getOption readerTabStop
+  if (num < tabStop)
+     then count num (char ' ')
+     else choice [ try (count num (char ' '))
+                 , try (char '\t' >> count (num - tabStop) (char ' ')) ]
+
+translateLang :: String -> String
+translateLang "sh" = "bash"
+translateLang cs = cs
+
+commaEscaped :: String -> String
+commaEscaped (',':cs@('*':_))     = cs
+commaEscaped (',':cs@('#':'+':_)) = cs
+commaEscaped cs                   = cs
+
+example :: OrgParser Blocks
+example = try $
+  B.codeBlockWith ("", ["example"], []) . unlines <$> many1 exampleLine
+
+exampleLine :: OrgParser String
+exampleLine = try $ string ": " *> anyLine
+
+-- Drawers for properties or a logbook
+drawer :: OrgParser Blocks
+drawer = try $ do
+  drawerStart
+  manyTill drawerLine (try drawerEnd)
+  return mempty
+
+drawerStart :: OrgParser String
+drawerStart = try $
+  skipSpaces *> drawerName <* skipSpaces <* newline
+ where drawerName = try $  char ':' *> validDrawerName <* char ':'
+       validDrawerName =  stringAnyCase "PROPERTIES"
+                          <|> stringAnyCase "LOGBOOK"
+
+drawerLine :: OrgParser String
+drawerLine = try $ anyLine
+
+drawerEnd :: OrgParser String
+drawerEnd = try $
+  skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
+
+
+-- Comments, Options and Metadata
+specialLine :: OrgParser Blocks
+specialLine = try $ metaLine <|> commentLine
+
+metaLine :: OrgParser Blocks
+metaLine = try $ metaLineStart *> declarationLine
+
+commentLine :: OrgParser Blocks
+commentLine = try $ commentLineStart *> anyLine *> pure mempty
+
+-- The order, in which blocks are tried, makes sure that we're not looking at
+-- the beginning of a block, so we don't need to check for it
+metaLineStart :: OrgParser String
+metaLineStart = try $ mappend <$> many spaceChar <*> string "#+"
+
+commentLineStart :: OrgParser String
+commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
+
+declarationLine :: OrgParser Blocks
+declarationLine = try $ do
+  meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta
+  updateState $ \st -> st { stateMeta = stateMeta st <> meta' }
+  return mempty
+
+metaValue :: OrgParser MetaValue
+metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine
+
+metaKey :: OrgParser [Char]
+metaKey = map toLower <$> many1 (noneOf ": \n\r")
+                      <*  char ':'
+                      <*  skipSpaces
+
+-- | Headers
+header :: OrgParser Blocks
+header = try $
+  B.header <$> headerStart
+           <*> (trimInlines <$> restOfLine)
+
+headerStart :: OrgParser Int
+headerStart = try $
+  (length <$> many1 (char '*')) <* many1 (char ' ')
+
+-- Horizontal Line (five dashes or more)
+hline :: OrgParser Blocks
+hline = try $ do
+  skipSpaces
+  string "-----"
+  many (char '-')
+  skipSpaces
+  newline
+  return B.horizontalRule
+
+--
+-- Tables
+--
+
+data OrgTableRow = OrgContentRow [Blocks]
+                 | OrgAlignRow [Alignment]
+                 | OrgHlineRow
+ deriving (Eq, Show)
+
+type OrgTableContent = (Int, [Alignment], [Double], [Blocks], [[Blocks]])
+
+table :: OrgParser Blocks
+table = try $ do
+  lookAhead tableStart
+  (_, aligns, widths, heads, lns) <- normalizeTable . tableContent <$> tableRows
+  return $ B.table "" (zip aligns widths) heads lns
+
+tableStart :: OrgParser Char
+tableStart = try $ skipSpaces *> char '|'
+
+tableRows :: OrgParser [OrgTableRow]
+tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
+
+tableContentRow :: OrgParser OrgTableRow
+tableContentRow = try $
+  OrgContentRow <$> (tableStart *> manyTill tableContentCell newline)
+
+tableContentCell :: OrgParser Blocks
+tableContentCell = try $
+  B.plain . trimInlines . mconcat <$> many1Till inline (try endOfCell)
+
+endOfCell :: OrgParser Char
+-- endOfCell =  char '|' <|> newline
+endOfCell = try $ char '|' <|> lookAhead newline
+
+tableAlignRow :: OrgParser OrgTableRow
+tableAlignRow = try $
+  OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline)
+
+tableAlignCell :: OrgParser Alignment
+tableAlignCell =
+  choice [ try $ emptyCell *> return (AlignDefault)
+         , try $ skipSpaces
+                   *> char '<'
+                   *> tableAlignFromChar
+                   <* many digit
+                   <* char '>'
+                   <* emptyCell
+         ] <?> "alignment info"
+    where emptyCell = try $ skipSpaces *> endOfCell
+
+tableAlignFromChar :: OrgParser Alignment
+tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft
+                                  , char 'c' *> return AlignCenter
+                                  , char 'r' *> return AlignRight
+                                  ]
+
+tableHline :: OrgParser OrgTableRow
+tableHline = try $
+  OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
+
+tableContent :: [OrgTableRow]
+             -> OrgTableContent
+tableContent = foldl' (flip rowToContent) (0, mempty, repeat 0, mempty, mempty)
+
+normalizeTable :: OrgTableContent
+               -> OrgTableContent
+normalizeTable (cols, aligns, widths, heads, lns) =
+  let aligns' = fillColumns aligns AlignDefault
+      widths' = fillColumns widths 0.0
+      heads'  = if heads == mempty
+                then heads
+                else fillColumns heads  (B.plain mempty)
+      lns'    = map (flip fillColumns (B.plain mempty)) lns
+      fillColumns base padding = take cols $ base ++ repeat padding
+  in (cols, aligns', widths', heads', lns')
+
+
+-- One or more horizontal rules after the first content line mark the previous
+-- line as a header.  All other horizontal lines are discarded.
+rowToContent :: OrgTableRow
+             -> OrgTableContent
+             -> OrgTableContent
+rowToContent OrgHlineRow        = maybeBodyToHeader
+rowToContent (OrgContentRow rs) = setLongestRow rs . appendToBody rs
+rowToContent (OrgAlignRow as)   = setLongestRow as . setAligns as
+
+setLongestRow :: [a]
+              -> OrgTableContent
+              -> OrgTableContent
+setLongestRow r (cols, aligns, widths, heads, lns) =
+  (max cols (length r), aligns, widths, heads, lns)
+
+maybeBodyToHeader :: OrgTableContent
+                  -> OrgTableContent
+maybeBodyToHeader (cols, aligns, widths, [], b:[]) = (cols, aligns, widths, b, [])
+maybeBodyToHeader content                          = content
+
+appendToBody :: [Blocks]
+             -> OrgTableContent
+             -> OrgTableContent
+appendToBody r (cols, aligns, widths, heads, lns) =
+  (cols, aligns, widths, heads, lns ++ [r])
+
+setAligns :: [Alignment]
+          -> OrgTableContent
+          -> OrgTableContent
+setAligns aligns (cols, _, widths, heads, lns) =
+   (cols, aligns, widths, heads, lns)
+
+-- Paragraphs or Plain text
+paraOrPlain :: OrgParser Blocks
+paraOrPlain = try $
+  trimInlines . mconcat
+    <$> many1 inline
+    <**> option B.plain
+                (try $ newline *> pure B.para)
+
+restOfLine :: OrgParser Inlines
+restOfLine = mconcat <$> manyTill inline newline
+
+
+--
+-- list blocks
+--
+
+list :: OrgParser Blocks
+list = choice [ bulletList, orderedList ] <?> "list"
+
+bulletList :: OrgParser Blocks
+bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart)
+
+orderedList :: OrgParser Blocks
+orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart)
+
+genericListStart :: OrgParser String
+                 -> OrgParser Int
+genericListStart listMarker = try $
+  (+) <$> (length <$> many spaceChar)
+      <*> (length <$> listMarker <* many1 spaceChar)
+
+-- parses bullet list start and returns its length (excl. following whitespace)
+bulletListStart :: OrgParser Int
+bulletListStart = genericListStart bulletListMarker
+  where bulletListMarker = pure <$> oneOf "*-+"
+
+orderedListStart :: OrgParser Int
+orderedListStart = genericListStart orderedListMarker
+  -- Ordered list markers allowed in org-mode
+  where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
+
+listItem :: OrgParser Int
+         -> OrgParser Blocks
+listItem start = try $ do
+  (markerLength, first) <- try (start >>= rawListItem)
+  rest <- many (listContinuation markerLength)
+  parseFromString parseBlocks $ concat (first:rest)
+
+-- parse raw text for one list item, excluding start marker and continuations
+rawListItem :: Int
+            -> OrgParser (Int, String)
+rawListItem markerLength = try $ do
+  firstLine <- anyLine
+  restLines <- many (listLine markerLength)
+  return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
+
+-- continuation of a list item - indented and separated by blankline or endline.
+-- Note: nested lists are parsed as continuations.
+listContinuation :: Int
+                 -> OrgParser String
+listContinuation markerLength = try $
+  mappend <$> many blankline
+          <*> (concat <$> many1 (listLine markerLength))
+
+-- parse a line of a list item
+listLine :: Int
+         -> OrgParser String
+listLine markerLength = try $
+  indentWith markerLength *> anyLine
+    <**> pure (++ "\n")
+
+
+--
+-- inline
+--
+
+inline :: OrgParser Inlines
+inline = choice inlineParsers <?> "inline"
+  where inlineParsers = [ whitespace
+                        , link
+                        , str
+                        , endline
+                        , emph
+                        , strong
+                        , strikeout
+                        , underline
+                        , code
+                        , verbatim
+                        , subscript
+                        , superscript
+                        , symbol
+                        ]
+
+-- treat these as potentially non-text when parsing inline:
+specialChars :: [Char]
+specialChars = "\"$'()*+-./:<=>[\\]^_{|}~"
+
+
+whitespace :: OrgParser Inlines
+whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
+
+str :: OrgParser Inlines
+str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+            <* updateLastStrPos
+
+-- an endline character that can be treated as a space, not a structural break
+endline :: OrgParser Inlines
+endline = try $ do
+  newline
+  notFollowedBy blankline
+  notFollowedBy' exampleLine
+  notFollowedBy' hline
+  notFollowedBy' tableStart
+  notFollowedBy' drawerStart
+  notFollowedBy' headerStart
+  notFollowedBy' metaLineStart
+  notFollowedBy' commentLineStart
+  notFollowedBy' bulletListStart
+  notFollowedBy' orderedListStart
+  return B.space
+
+link :: OrgParser Inlines
+link = explicitLink <|> selfLink <?> "link"
+
+explicitLink :: OrgParser Inlines
+explicitLink = try $ do
+  char '['
+  src   <- enclosedRaw     (char '[') (char ']')
+  title <- enclosedInlines (char '[') (char ']')
+  char ']'
+  return $ B.link src "" title
+
+selfLink :: OrgParser Inlines
+selfLink = try $ do
+  src <- enclosedRaw (string "[[") (string "]]")
+  return $ B.link src "" (B.str src)
+
+emph      :: OrgParser Inlines
+emph      = B.emph         <$> inlinesEnclosedBy '/'
+
+strong    :: OrgParser Inlines
+strong    = B.strong       <$> inlinesEnclosedBy '*'
+
+strikeout :: OrgParser Inlines
+strikeout = B.strikeout    <$> inlinesEnclosedBy '+'
+
+-- There is no underline, so we use strong instead.
+underline :: OrgParser Inlines
+underline = B.strong       <$> inlinesEnclosedBy '_'
+
+code      :: OrgParser Inlines
+code      = B.code         <$> rawEnclosedBy '='
+
+verbatim  ::  OrgParser Inlines
+verbatim  = B.rawInline "" <$> rawEnclosedBy '~'
+
+subscript ::  OrgParser Inlines
+subscript = B.subscript    <$> (try $ char '_' *> maybeGroupedByBraces)
+
+superscript ::  OrgParser Inlines
+superscript = B.superscript <$> (try $ char '^' *> maybeGroupedByBraces)
+
+maybeGroupedByBraces :: OrgParser Inlines
+maybeGroupedByBraces = try $
+  choice [ try $ enclosedInlines (char '{') (char '}')
+         , B.str . (:"") <$> anyChar
+         ]
+
+symbol :: OrgParser Inlines
+symbol = B.str . (: "") <$> oneOf specialChars
+
+enclosedInlines :: OrgParser a
+                -> OrgParser b
+                -> OrgParser Inlines
+enclosedInlines start end = try $
+  trimInlines . mconcat <$> enclosed start end inline
+
+-- FIXME: This is a hack
+inlinesEnclosedBy :: Char
+                  -> OrgParser Inlines
+inlinesEnclosedBy c = enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c)
+                                      (atEnd $ char c)
+
+enclosedRaw :: OrgParser a
+            -> OrgParser b
+            -> OrgParser String
+enclosedRaw start end = try $
+  start *> (onSingleLine <|> spanningTwoLines)
+ where onSingleLine = try $ many1Till (noneOf "\n\r") end
+       spanningTwoLines = try $
+         anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
+
+rawEnclosedBy :: Char
+              -> OrgParser String
+rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c)
+
+-- succeeds only if we're not right after a str (ie. in middle of word)
+atStart :: OrgParser a -> OrgParser a
+atStart p = do
+  pos <- getPosition
+  st <- getState
+  guard $ stateLastStrPos st /= Just pos
+  p
+
+-- | succeeds only if we're at the end of a word
+atEnd :: OrgParser a -> OrgParser a
+atEnd p = try $ p <* lookingAtEndOfWord
+ where lookingAtEndOfWord = lookAhead . oneOf $ postWordChars
+
+postWordChars :: [Char]
+postWordChars = "\t\n\r !\"'),-.:?}"
+
+-- FIXME: These functions are hacks and should be replaced
+endsOnThisOrNextLine :: Char
+                     -> OrgParser ()
+endsOnThisOrNextLine c = do
+  inp <- getInput
+  let doOtherwise = \rest -> endsOnThisLine rest c (const mzero)
+  endsOnThisLine inp c doOtherwise
+
+endsOnThisLine :: [Char]
+               -> Char
+               -> ([Char] -> OrgParser ())
+               -> OrgParser ()
+endsOnThisLine input c doOnOtherLines = do
+  case break (`elem` c:"\n") input of
+    (_,'\n':rest)    -> doOnOtherLines rest
+    (_,_:rest@(n:_)) -> if n `elem` postWordChars
+                        then return ()
+                        else endsOnThisLine rest c doOnOtherLines
+    _                -> mzero
+
-- 
cgit v1.2.3


From 3126b00f11a684a20633cdc3810722e2bf210d36 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 5 Mar 2014 08:47:20 -0800
Subject: Templates:  YAML objects resolve to "true" in conditionals.

Closes #1133.

Note:  If address is a YAML object and you just have $address$
in your template, the word "true" will appear, which may be
unexpected.  (Previously nothing would appear.)
---
 src/Text/Pandoc/Templates.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index ad8838f72..52625abf6 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -190,6 +190,7 @@ resolveVar var' val =
        Just (String t)  -> T.stripEnd t
        Just (Number n)  -> T.pack $ show n
        Just (Bool True) -> "true"
+       Just (Object _)  -> "true"
        Just _           -> mempty
        Nothing          -> mempty
 
-- 
cgit v1.2.3


From 6fda3619770f7c07ccbfc8a51614c55e01b740d8 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 5 Mar 2014 09:10:09 -0800
Subject: SelfContained:  Handle "poster" attribute in "video" tags.

Closes #1188.
---
 src/Text/Pandoc/SelfContained.hs | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 6112e764f..7fc9c2966 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -50,14 +50,16 @@ isOk c = isAscii c && isAlphaNum c
 
 convertTag :: Maybe FilePath -> Tag String -> IO (Tag String)
 convertTag userdata t@(TagOpen tagname as)
-  | tagname `elem` ["img", "embed", "video", "input", "audio", "source"] =
-       case fromAttrib "src" t of
-         []   -> return t
-         src  -> do
-           (raw, mime) <- getRaw userdata (fromAttrib "type" t) src
-           let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw)
-           return $ TagOpen tagname
-                    (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
+  | tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = do
+       as' <- mapM processAttribute as
+       return $ TagOpen tagname as'
+  where processAttribute (x,y) =
+           if x == "src" || x == "href" || x == "poster"
+              then do
+                (raw, mime) <- getRaw userdata (fromAttrib "type" t) y
+                let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw)
+                return (x, enc)
+              else return (x,y)
 convertTag userdata t@(TagOpen "script" as) =
   case fromAttrib "src" t of
        []     -> return t
-- 
cgit v1.2.3


From f3c9d3788530e450d1bb23a4fd829bc5a6eed266 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 5 Mar 2014 13:01:23 -0800
Subject: HTML writer: Add colgroup around col tags.

Also affects EPUB writer.
Closes #877.
---
 src/Text/Pandoc/Writers/HTML.hs | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 3ac2a836f..e0385af25 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -533,11 +533,16 @@ blockToHtml opts (Table capt aligns widths headers rows') = do
   let percent w = show (truncate (100*w) :: Integer) ++ "%"
   let coltags = if all (== 0.0) widths
                    then mempty
-                   else mconcat $ map (\w ->
-                          if writerHtml5 opts
-                             then H.col ! A.style (toValue $ "width: " ++ percent w)
-                             else H.col ! A.width (toValue $ percent w) >> nl opts)
-                          widths
+                   else do
+                     H.colgroup $ do
+                       nl opts
+                       mapM_ (\w -> do
+                            if writerHtml5 opts
+                               then H.col ! A.style (toValue $ "width: " ++
+                                                      percent w)
+                               else H.col ! A.width (toValue $ percent w)
+                            nl opts) widths
+                     nl opts
   head' <- if all null headers
               then return mempty
               else do
-- 
cgit v1.2.3


From c026c16fa6d9313dc2aa30e8348176e292dc78e0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 9 Mar 2014 21:26:25 -0700
Subject: PDF:  Use / as path separators even on Windows.

This seems to be necessary for texlive.
Closes #1151 (again!).
---
 src/Text/Pandoc/PDF.hs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index 39442854d..06e4d2aae 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -91,7 +91,8 @@ handleImage' baseURL tmpdir (Image ils (src,tit)) = do
                 let ext = fromMaybe (takeExtension src) $
                           extensionFromMimeType mime
                 let basename = UTF8.toString $ B64.encode $ UTF8.fromString src
-                let fname = tmpdir </> basename <.> ext
+                -- note:  we want / even on Windows, for TexLive:
+                let fname = tmpdir ++ "/" ++ basename <.> ext
                 BS.writeFile fname contents
                 return $ Image ils (fname,tit)
               _ -> do
@@ -143,7 +144,7 @@ extractMsg log' = do
 runTeXProgram :: String -> Int -> FilePath -> String
               -> IO (ExitCode, ByteString, Maybe ByteString)
 runTeXProgram program runsLeft tmpDir source = do
-    let file = tmpDir </> "input.tex"
+    let file = tmpDir ++ "/input.tex"
     exists <- doesFileExist file
     unless exists $ UTF8.writeFile file source
     let programArgs = ["-halt-on-error", "-interaction", "nonstopmode",
-- 
cgit v1.2.3


From 5040f3ede08188610054ff2afab1c3cd9c16c58a Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Mon, 10 Mar 2014 11:16:09 -0700
Subject: PDF:  Use / as path separators in tempdir on Windows.

This is needed for texlive.
Note that the / is used only in the body of withTempDir,
so when the directory is deleted, the original separators will
be used.

See #1151.
---
 src/Text/Pandoc/PDF.hs | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index 06e4d2aae..4dfa1d827 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -51,13 +51,21 @@ import Text.Pandoc.Options (WriterOptions(..))
 import Text.Pandoc.MIME (extensionFromMimeType)
 import Text.Pandoc.Process (pipeProcess)
 import qualified Data.ByteString.Lazy as BL
+#ifdef _WINDOWS
+import Data.List (intercalate)
+#endif
 
 withTempDir :: String -> (FilePath -> IO a) -> IO a
-withTempDir =
+withTempDir f =
 #ifdef _WINDOWS
-  withTempDirectory "."
+  withTempDirectory "." (f . changePathSeparators)
 #else
-  withSystemTempDirectory
+  withSystemTempDirectory f
+#endif
+
+#ifdef _WINDOWS
+changePathSeparators :: FilePath -> FilePath
+changePathSeparators = intercalate "/" . splitDirectories
 #endif
 
 makePDF :: String              -- ^ pdf creator (pdflatex, lualatex, xelatex)
-- 
cgit v1.2.3


From 1aed9208f883c3bf67f81582b149912de832273a Mon Sep 17 00:00:00 2001
From: Tim Lin <timtylin@gmail.com>
Date: Mon, 10 Mar 2014 16:23:57 -0700
Subject: PDF: Use / as path separators in latex input only

Fixes compile error on Windows for 5040f3e
Reverted back to canonical file separators </> in all places except for
arguments to the LaTeX builder and in TEXINPUTS

See #1151.

Note: Temporary directories still fail to be removed in Windows due to
call of ByteString.Lazy.readFile creating process ownership of the
compiled pdf file.
---
 src/Text/Pandoc/PDF.hs | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index 4dfa1d827..a5f1597bd 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -56,11 +56,11 @@ import Data.List (intercalate)
 #endif
 
 withTempDir :: String -> (FilePath -> IO a) -> IO a
-withTempDir f =
+withTempDir =
 #ifdef _WINDOWS
-  withTempDirectory "." (f . changePathSeparators)
+  withTempDirectory "."
 #else
-  withSystemTempDirectory f
+  withSystemTempDirectory
 #endif
 
 #ifdef _WINDOWS
@@ -99,8 +99,7 @@ handleImage' baseURL tmpdir (Image ils (src,tit)) = do
                 let ext = fromMaybe (takeExtension src) $
                           extensionFromMimeType mime
                 let basename = UTF8.toString $ B64.encode $ UTF8.fromString src
-                -- note:  we want / even on Windows, for TexLive:
-                let fname = tmpdir ++ "/" ++ basename <.> ext
+                let fname = tmpdir </> basename <.> ext
                 BS.writeFile fname contents
                 return $ Image ils (fname,tit)
               _ -> do
@@ -152,18 +151,22 @@ extractMsg log' = do
 runTeXProgram :: String -> Int -> FilePath -> String
               -> IO (ExitCode, ByteString, Maybe ByteString)
 runTeXProgram program runsLeft tmpDir source = do
-    let file = tmpDir ++ "/input.tex"
+    let file = tmpDir </> "input.tex"
     exists <- doesFileExist file
     unless exists $ UTF8.writeFile file source
-    let programArgs = ["-halt-on-error", "-interaction", "nonstopmode",
-         "-output-directory", tmpDir, file]
-    env' <- getEnvironment
 #ifdef _WINDOWS
-    let sep = ";"
+    -- note:  we want / even on Windows, for TexLive
+    let tmpDir' = changePathSeparators tmpDir
+    let file' = changePathSeparators file
 #else
-    let sep = ":"
+    let tmpDir' = tmpDir
+    let file' = file
 #endif
-    let texinputs = maybe (tmpDir ++ sep) ((tmpDir ++ sep) ++)
+    let programArgs = ["-halt-on-error", "-interaction", "nonstopmode",
+         "-output-directory", tmpDir', file']
+    env' <- getEnvironment
+    let sep = searchPathSeparator:[]
+    let texinputs = maybe (tmpDir' ++ sep) ((tmpDir' ++ sep) ++)
           $ lookup "TEXINPUTS" env'
     let env'' = ("TEXINPUTS", texinputs) :
                   [(k,v) | (k,v) <- env', k /= "TEXINPUTS"]
-- 
cgit v1.2.3


From 76ef65f0b36d3a613e004350609d3696d0bf5658 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 12 Mar 2014 10:23:45 -0700
Subject: Man writer:  Ensure that terms in definition lists aren't line
 wrapped.

Closes #1195.
---
 src/Text/Pandoc/Writers/Man.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index b31cc2b70..680bfef44 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -283,7 +283,7 @@ definitionListItemToMan opts (label, defs) = do
                                   mapM (\item -> blockToMan opts item) rest
                         first' <- blockToMan opts first
                         return $ first' $$ text ".RS" $$ rest' $$ text ".RE"
-  return $ text ".TP" $$ text ".B " <> labelText $$ contents
+  return $ text ".TP" $$ nowrap (text ".B " <> labelText) $$ contents
 
 -- | Convert list of Pandoc block elements to man.
 blockListToMan :: WriterOptions -- ^ Options
-- 
cgit v1.2.3


From 814af2002e4837c160526123ef753bb34547d811 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 14 Mar 2014 14:03:15 -0700
Subject: RST writer:  Avoid stack overflow with certain tables.

Closes #1197.

Note that there are still problems with the formatting of
the tables inside tables with output produced from the input
file in the original bug report.  But this fixes the stack
overflow problem.
---
 src/Text/Pandoc/Writers/RST.hs | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 37bb66632..1e7596b21 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -219,11 +219,15 @@ blockToRST (Table caption _ widths headers rows) =  do
                      else blankline <> text "Table: " <> caption'
   headers' <- mapM blockListToRST headers
   rawRows <- mapM (mapM blockListToRST) rows
-  let isSimple = all (==0) widths && all (all (\bs -> length bs <= 1)) rows
+  -- let isSimpleCell [Plain _] = True
+  --     isSimpleCell [Para _]  = True
+  --     isSimpleCell []        = True
+  --     isSimpleCell _         = False
+  -- let isSimple = all (==0) widths && all (all isSimpleCell) rows
   let numChars = maximum . map offset
   opts <- get >>= return . stOptions
   let widthsInChars =
-       if isSimple
+       if all (== 0) widths
           then map ((+2) . numChars) $ transpose (headers' : rawRows)
           else map (floor . (fromIntegral (writerColumns opts) *)) widths
   let hpipeBlocks blocks = hcat [beg, middle, end]
-- 
cgit v1.2.3


From f6141aa241eb2e636cda369c12d26c8f4b4a3308 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 14 Mar 2014 15:18:43 -0700
Subject: EPUB writer:  Incorporate files linked in <video> tags.

src and poster will both be incorporated into content.opf
and the epub container.

This partially address #1170.
Still need to do something similar for <audio>.
---
 src/Text/Pandoc/MIME.hs         |  1 +
 src/Text/Pandoc/Writers/EPUB.hs | 56 +++++++++++++++++++++++++++++++++--------
 2 files changed, 47 insertions(+), 10 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index 44989ee94..977cb576b 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -246,6 +246,7 @@ mimeTypesList = -- List borrowed from happstack-server.
            ,("lzx","application/x-lzx")
            ,("m3u","audio/mpegurl")
            ,("m4a","audio/mpeg")
+           ,("m4v","video/x-m4v")
            ,("maker","application/x-maker")
            ,("man","application/x-troff-man")
            ,("mcif","chemical/x-mmcif")
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index a48300939..d4044d475 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -65,6 +65,7 @@ import Prelude hiding (catch)
 #endif
 import Control.Exception (catch, SomeException)
 import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
+import Text.HTML.TagSoup
 
 -- A Chapter includes a list of blocks and maybe a section
 -- number offset.  Note, some chapters are unnumbered. The section
@@ -342,8 +343,8 @@ writeEPUB opts doc@(Pandoc meta _) = do
 
   -- handle pictures
   picsRef <- newIORef []
-  Pandoc _ blocks <- walkM
-       (transformInline opts' picsRef) doc
+  Pandoc _ blocks <- walkM (transformInline opts' picsRef) doc >>=
+                     walkM (transformBlock opts' picsRef)
   pics <- readIORef picsRef
   let readPicEntry entries (oldsrc, newsrc) = do
         res <- fetchItem (writerSourceURL opts') oldsrc
@@ -715,21 +716,55 @@ metadataElement version md currentTime =
 showDateTimeISO8601 :: UTCTime -> String
 showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
 
+transformTag :: WriterOptions
+             -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images
+             -> Tag String
+             -> IO (Tag String)
+transformTag opts picsRef tag@(TagOpen "video" attr) = do
+  let src = fromAttrib "src" tag
+  let poster = fromAttrib "poster" tag
+  let oldsrc = maybe src (</> src) $ writerSourceURL opts
+  let oldposter = maybe poster (</> poster) $ writerSourceURL opts
+  newsrc <- modifyPicsRef picsRef oldsrc
+  newposter <- modifyPicsRef picsRef oldposter
+  let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++
+              [("src", newsrc) | not (null newsrc)] ++
+              [("poster", newposter) | not (null newposter)]
+  return $ TagOpen "video" attr'
+transformTag _ _ tag = return tag
+
+modifyPicsRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath
+modifyPicsRef _ "" = return ""
+modifyPicsRef picsRef oldsrc = do
+  pics <- readIORef picsRef
+  case lookup oldsrc pics of
+         Just n  -> return n
+         Nothing -> do
+           let new = "images/img" ++ show (length pics) ++
+                    takeExtension oldsrc
+           modifyIORef picsRef ( (oldsrc, new): )
+           return new
+
+transformBlock  :: WriterOptions
+                -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images
+                -> Block
+                -> IO Block
+transformBlock opts picsRef (RawBlock fmt raw)
+  | fmt == Format "html" = do
+  let tags = parseTags raw
+  -- look for video tags and add poster and src to images
+  tags' <- mapM (transformTag opts picsRef)  tags
+  return $ RawBlock fmt (renderTags tags')
+transformBlock _ _ b = return b
+
 transformInline  :: WriterOptions
                  -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images
                  -> Inline
                  -> IO Inline
 transformInline opts picsRef (Image lab (src,tit)) = do
     let src' = unEscapeString src
-    pics <- readIORef picsRef
     let oldsrc = maybe src' (</> src) $ writerSourceURL opts
-    let ext = takeExtension src'
-    newsrc <- case lookup oldsrc pics of
-                    Just n  -> return n
-                    Nothing -> do
-                          let new = "images/img" ++ show (length pics) ++ ext
-                          modifyIORef picsRef ( (oldsrc, new): )
-                          return new
+    newsrc <- modifyPicsRef picsRef oldsrc
     return $ Image lab (newsrc, tit)
 transformInline opts _ (x@(Math _ _))
   | WebTeX _ <- writerHTMLMathMethod opts = do
@@ -762,6 +797,7 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity .
 imageTypeOf :: FilePath -> Maybe String
 imageTypeOf x = case getMimeType x of
                      Just y@('i':'m':'a':'g':'e':_) -> Just y
+                     Just y@('v':'i':'d':'e':'o':_) -> Just y
                      _                              -> Nothing
 
 data IdentState = IdentState{
-- 
cgit v1.2.3


From 91696c62c431b85de7704b5444431230464d77b3 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 14 Mar 2014 15:30:11 -0700
Subject: EPUB writer:  Handle media in audio source tags.

This should resolve the rest of #1170, but it needs
extensive testing.

Note that we now use a 'media' directory rather than 'images'.
---
 src/Text/Pandoc/Writers/EPUB.hs | 55 +++++++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 27 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index d4044d475..75aae55c1 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -342,15 +342,15 @@ writeEPUB opts doc@(Pandoc meta _) = do
   let tpEntry = mkEntry "title_page.xhtml" tpContent
 
   -- handle pictures
-  picsRef <- newIORef []
-  Pandoc _ blocks <- walkM (transformInline opts' picsRef) doc >>=
-                     walkM (transformBlock opts' picsRef)
-  pics <- readIORef picsRef
+  mediaRef <- newIORef []
+  Pandoc _ blocks <- walkM (transformInline opts' mediaRef) doc >>=
+                     walkM (transformBlock opts' mediaRef)
+  pics <- readIORef mediaRef
   let readPicEntry entries (oldsrc, newsrc) = do
         res <- fetchItem (writerSourceURL opts') oldsrc
         case res of
              Left _        -> do
-              warn $ "Could not find image `" ++ oldsrc ++ "', skipping..."
+              warn $ "Could not find media `" ++ oldsrc ++ "', skipping..."
               return entries
              Right (img,_) -> return $
               (toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img) : entries
@@ -440,7 +440,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
                            [("id", takeBaseName $ eRelativePath ent),
                             ("href", eRelativePath ent),
                             ("media-type", fromMaybe "application/octet-stream"
-                               $ imageTypeOf $ eRelativePath ent)] $ ()
+                               $ mediaTypeOf $ eRelativePath ent)] $ ()
   let fontNode ent = unode "item" !
                            [("id", takeBaseName $ eRelativePath ent),
                             ("href", eRelativePath ent),
@@ -717,54 +717,54 @@ showDateTimeISO8601 :: UTCTime -> String
 showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
 
 transformTag :: WriterOptions
-             -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images
+             -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
              -> Tag String
              -> IO (Tag String)
-transformTag opts picsRef tag@(TagOpen "video" attr) = do
+transformTag opts mediaRef tag@(TagOpen name attr)
+  | name == "video" || name == "source" = do
   let src = fromAttrib "src" tag
   let poster = fromAttrib "poster" tag
   let oldsrc = maybe src (</> src) $ writerSourceURL opts
   let oldposter = maybe poster (</> poster) $ writerSourceURL opts
-  newsrc <- modifyPicsRef picsRef oldsrc
-  newposter <- modifyPicsRef picsRef oldposter
+  newsrc <- modifyMediaRef mediaRef oldsrc
+  newposter <- modifyMediaRef mediaRef oldposter
   let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++
               [("src", newsrc) | not (null newsrc)] ++
               [("poster", newposter) | not (null newposter)]
-  return $ TagOpen "video" attr'
+  return $ TagOpen name attr'
 transformTag _ _ tag = return tag
 
-modifyPicsRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath
-modifyPicsRef _ "" = return ""
-modifyPicsRef picsRef oldsrc = do
-  pics <- readIORef picsRef
-  case lookup oldsrc pics of
+modifyMediaRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath
+modifyMediaRef _ "" = return ""
+modifyMediaRef mediaRef oldsrc = do
+  media <- readIORef mediaRef
+  case lookup oldsrc media of
          Just n  -> return n
          Nothing -> do
-           let new = "images/img" ++ show (length pics) ++
+           let new = "media/file" ++ show (length media) ++
                     takeExtension oldsrc
-           modifyIORef picsRef ( (oldsrc, new): )
+           modifyIORef mediaRef ( (oldsrc, new): )
            return new
 
 transformBlock  :: WriterOptions
-                -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images
+                -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
                 -> Block
                 -> IO Block
-transformBlock opts picsRef (RawBlock fmt raw)
+transformBlock opts mediaRef (RawBlock fmt raw)
   | fmt == Format "html" = do
   let tags = parseTags raw
-  -- look for video tags and add poster and src to images
-  tags' <- mapM (transformTag opts picsRef)  tags
+  tags' <- mapM (transformTag opts mediaRef)  tags
   return $ RawBlock fmt (renderTags tags')
 transformBlock _ _ b = return b
 
 transformInline  :: WriterOptions
-                 -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images
+                 -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
                  -> Inline
                  -> IO Inline
-transformInline opts picsRef (Image lab (src,tit)) = do
+transformInline opts mediaRef (Image lab (src,tit)) = do
     let src' = unEscapeString src
     let oldsrc = maybe src' (</> src) $ writerSourceURL opts
-    newsrc <- modifyPicsRef picsRef oldsrc
+    newsrc <- modifyMediaRef mediaRef oldsrc
     return $ Image lab (newsrc, tit)
 transformInline opts _ (x@(Math _ _))
   | WebTeX _ <- writerHTMLMathMethod opts = do
@@ -794,10 +794,11 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity .
                           Nothing  -> '&':'#':unEntity xs
         unEntity (x:xs) = x : unEntity xs
 
-imageTypeOf :: FilePath -> Maybe String
-imageTypeOf x = case getMimeType x of
+mediaTypeOf :: FilePath -> Maybe String
+mediaTypeOf x = case getMimeType x of
                      Just y@('i':'m':'a':'g':'e':_) -> Just y
                      Just y@('v':'i':'d':'e':'o':_) -> Just y
+                     Just y@('a':'u':'d':'i':'o':_) -> Just y
                      _                              -> Nothing
 
 data IdentState = IdentState{
-- 
cgit v1.2.3


From 44f58e7e380b2ec38ba65e1cebb295661d3bad62 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 14 Mar 2014 15:41:28 -0700
Subject: EPUB writer:  Handle files linked in raw img tags.

See #1170.
---
 src/Text/Pandoc/Writers/EPUB.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 75aae55c1..dae45b90f 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -721,7 +721,7 @@ transformTag :: WriterOptions
              -> Tag String
              -> IO (Tag String)
 transformTag opts mediaRef tag@(TagOpen name attr)
-  | name == "video" || name == "source" = do
+  | name == "video" || name == "source" || name == "img" = do
   let src = fromAttrib "src" tag
   let poster = fromAttrib "poster" tag
   let oldsrc = maybe src (</> src) $ writerSourceURL opts
-- 
cgit v1.2.3


From 3df75bc160827fbd0322ecf48e543977cda0156f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 19 Mar 2014 11:09:36 -0700
Subject: PDF:  Changes to error reporting, to handle non-UTF8 error output.

---
 src/Text/Pandoc/PDF.hs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index a5f1597bd..608cad2e9 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -116,7 +116,6 @@ tex2pdf' tmpDir program source = do
                    then 3  -- to get page numbers
                    else 2  -- 1 run won't give you PDF bookmarks
   (exit, log', mbPdf) <- runTeXProgram program numruns tmpDir source
-  let msg = "Error producing PDF from TeX source.\n"
   case (exit, mbPdf) of
        (ExitFailure _, _)      -> do
           let logmsg = extractMsg log'
@@ -125,8 +124,8 @@ tex2pdf' tmpDir program source = do
                      x | "! Package inputenc Error" `BC.isPrefixOf` x ->
                            "\nTry running pandoc with --latex-engine=xelatex."
                      _ -> ""
-          return $ Left $ msg <> logmsg <> extramsg
-       (ExitSuccess, Nothing)  -> return $ Left msg
+          return $ Left $ logmsg <> extramsg
+       (ExitSuccess, Nothing)  -> return $ Left ""
        (ExitSuccess, Just pdf) -> return $ Right pdf
 
 (<>) :: ByteString -> ByteString -> ByteString
-- 
cgit v1.2.3


From dd058b38b0e7ed511353338c4830aab156c39a0e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 24 Mar 2014 09:56:16 -0700
Subject: Markdown reader: Fixed regression on line breaks in strict mode.

Closes #1203.
---
 src/Text/Pandoc/Readers/Markdown.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 0ea7f9ac5..aa0252266 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1558,7 +1558,7 @@ endline = try $ do
   guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
   guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
   guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header
-  guardEnabled Ext_backtick_code_blocks >>
+  guardDisabled Ext_backtick_code_blocks <|>
      notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
   (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
     <|> (guardEnabled Ext_ignore_line_breaks >> return mempty)
-- 
cgit v1.2.3


From 3fa38db80b422af627ba260024756faf2a1d9147 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 24 Mar 2014 10:29:24 -0700
Subject: Parsing:  Make F an instance of Applicative.  Closes #1138.

---
 src/Text/Pandoc/Parsing.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 2bc351db3..883a560d0 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -174,13 +174,13 @@ import Text.Pandoc.Asciify (toAsciiChar)
 import Data.Default
 import qualified Data.Set as Set
 import Control.Monad.Reader
-import Control.Applicative ((*>), (<*), (<$), liftA2)
+import Control.Applicative ((*>), (<*), (<$), liftA2, Applicative)
 import Data.Monoid
 import Data.Maybe (catMaybes)
 
 type Parser t s = Parsec t s
 
-newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Functor)
+newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor)
 
 runF :: F a -> ParserState -> a
 runF = runReader . unF
-- 
cgit v1.2.3


From d7fbc40dff9771181f26d7d9cb3129c9884a5f01 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 24 Mar 2014 15:07:19 -0700
Subject: RTF writer:  Fixed tables cells containing paragraphs.

This moves \intbl after \pard.
---
 src/Text/Pandoc/Writers/RTF.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index fb935fa6a..3e0bd9976 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -259,7 +259,7 @@ tableRowToRTF header indent aligns sizes' cols =
 tableItemToRTF :: Int -> Alignment -> [Block] -> String
 tableItemToRTF indent alignment item =
   let contents = concatMap (blockToRTF indent alignment) item
-  in  "{\\intbl " ++ contents ++ "\\cell}\n"
+  in  "{" ++ substitute "\\pard" "\\pard\\intbl" contents ++ "\\cell}\n"
 
 -- | Ensure that there's the same amount of space after compact
 -- lists as after regular lists.
-- 
cgit v1.2.3


From 5e69f845d534964bd3d0d1fe275b5cecc0bf3098 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 25 Mar 2014 12:04:25 -0700
Subject: LaTeX reader:  Better handling of "table" environment.

Positioning options no longer rendered verbatim.
Partially addresses #1204.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 51271edc5..8c3dac263 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -894,6 +894,7 @@ environments = M.fromList
   , ("letter", env "letter" letter_contents)
   , ("figure", env "figure" $ skipopts *> blocks)
   , ("center", env "center" blocks)
+  , ("table",  env "table" $ skipopts *> blocks)
   , ("tabular", env "tabular" simpTable)
   , ("quote", blockQuote <$> env "quote" blocks)
   , ("quotation", blockQuote <$> env "quotation" blocks)
-- 
cgit v1.2.3


From 08d1404b31a0fd6edd94b1e5674d3b07d8e4de6a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 25 Mar 2014 13:43:34 -0700
Subject: API changes to HasReaderOptions, HasHeaderMap, HasIdentifierList.

Previously these were typeclasses of monads.  They've been changed
to be typeclasses of states.  This ismplifies the instance definitions
and provides more flexibility.

This is an API change!  However, it should be backwards compatible
unless you're defining instances of HasReaderOptions, HasHeaderMap,
or HasIdentifierList.  The old getOption function should work as
before (albeit with a more general type).

The function askReaderOption has been removed.
extractReaderOptions has been added.
getOption has been given a default definition.

In HasHeaderMap, extractHeaderMap and updateHeaderMap have been added.
Default definitions have been given for getHeaderMap, putHeaderMap,
and modifyHeaderMap.

In HasIdentifierList, extractIdentifierList and updateIdentifierList
have been added.  Default definitions have been given for
getIdentifierList, putIdentifierList, and modifyIdentifierList.

The ultimate goal here is to allow different parsers to use their
own, tailored parser states (instead of ParserState) while still
using shared functions.
---
 src/Text/Pandoc/Parsing.hs           | 70 ++++++++++++++++++++----------------
 src/Text/Pandoc/Readers/MediaWiki.hs | 16 ++++-----
 2 files changed, 47 insertions(+), 39 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 883a560d0..c12e967dc 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -64,7 +64,6 @@ module Text.Pandoc.Parsing ( (>>~),
                              gridTableWith,
                              readWith,
                              testStringWith,
-                             getOption,
                              guardEnabled,
                              guardDisabled,
                              ParserState (..),
@@ -870,33 +869,45 @@ instance HasMeta ParserState where
   deleteMeta field st =
     st{ stateMeta = deleteMeta field $ stateMeta st }
 
-class Monad m => HasReaderOptions m where
-  askReaderOption :: (ReaderOptions -> b) -> m b
-
-class Monad m => HasHeaderMap m where
-  getHeaderMap    :: m (M.Map Inlines String)
-  putHeaderMap    :: M.Map Inlines String -> m ()
-  modifyHeaderMap :: (M.Map Inlines String -> M.Map Inlines String) -> m ()
+class HasReaderOptions st where
+  extractReaderOptions :: st -> ReaderOptions
+  getOption            :: (ReaderOptions -> b) -> Parser s st b
+  -- default
+  getOption  f         = (f . extractReaderOptions) `fmap` getState
+
+instance HasReaderOptions ParserState where
+  extractReaderOptions = stateOptions
+
+class HasHeaderMap st where
+  extractHeaderMap  :: st -> M.Map Inlines String
+  updateHeaderMap   :: M.Map Inlines String -> st -> st
+  getHeaderMap      :: Parser s st (M.Map Inlines String)
+  putHeaderMap      :: M.Map Inlines String -> Parser s st ()
+  modifyHeaderMap   :: (M.Map Inlines String -> M.Map Inlines String)
+                       -> Parser s st ()
   -- default
+  getHeaderMap      = extractHeaderMap `fmap` getState
+  putHeaderMap x    = updateState (updateHeaderMap x)
   modifyHeaderMap f = getHeaderMap >>= putHeaderMap . f
 
-class Monad m => HasIdentifierList m where
-  getIdentifierList    :: m [String]
-  putIdentifierList    :: [String] -> m ()
-  modifyIdentifierList :: ([String] -> [String]) -> m ()
+instance HasHeaderMap ParserState where
+  extractHeaderMap     = stateHeaders
+  updateHeaderMap x st = st{ stateHeaders = x }
+
+class HasIdentifierList st where
+  extractIdentifierList  :: st -> [String]
+  updateIdentifierList   :: [String] -> st -> st
+  getIdentifierList      :: Parser s st ([String])
+  putIdentifierList      :: [String] -> Parser s st ()
+  modifyIdentifierList   :: ([String] -> [String]) -> Parser s st ()
   -- default
+  getIdentifierList      = extractIdentifierList `fmap` getState
+  putIdentifierList x    = updateState (updateIdentifierList x)
   modifyIdentifierList f = getIdentifierList >>= putIdentifierList . f
 
-instance HasReaderOptions (Parser s ParserState) where
-  askReaderOption = getOption
-
-instance HasHeaderMap (Parser s ParserState) where
-  getHeaderMap      = fmap stateHeaders getState
-  putHeaderMap hm   = updateState $ \st -> st{ stateHeaders = hm }
-
-instance HasIdentifierList (Parser s ParserState) where
-  getIdentifierList   = fmap stateIdentifiers  getState
-  putIdentifierList l = updateState $ \st -> st{ stateIdentifiers = l }
+instance HasIdentifierList ParserState where
+  extractIdentifierList     = stateIdentifiers
+  updateIdentifierList x st = st{ stateIdentifiers = x }
 
 defaultParserState :: ParserState
 defaultParserState =
@@ -923,15 +934,12 @@ defaultParserState =
                   stateRstCustomRoles  = M.empty,
                   stateWarnings        = []}
 
-getOption :: (ReaderOptions -> a) -> Parser s ParserState a
-getOption f = (f . stateOptions) `fmap` getState
-
 -- | Succeed only if the extension is enabled.
-guardEnabled :: Extension -> Parser s ParserState ()
+guardEnabled :: HasReaderOptions st => Extension -> Parser s st ()
 guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext
 
 -- | Succeed only if the extension is disabled.
-guardDisabled :: Extension -> Parser s ParserState ()
+guardDisabled :: HasReaderOptions st => Extension -> Parser s st ()
 guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext
 
 data HeaderType
@@ -968,11 +976,11 @@ type SubstTable = M.Map Key Inlines
 --  and the auto_identifers extension is set, generate a new
 --  unique identifier, and update the list of identifiers
 --  in state.
-registerHeader :: (HasReaderOptions m, HasHeaderMap m, HasIdentifierList m)
-               => Attr -> Inlines -> m Attr
+registerHeader :: (HasReaderOptions st, HasHeaderMap st, HasIdentifierList st)
+               => Attr -> Inlines -> Parser s st Attr
 registerHeader (ident,classes,kvs) header' = do
   ids <- getIdentifierList
-  exts <- askReaderOption readerExtensions
+  exts <- getOption readerExtensions
   let insert' = M.insertWith (\_new old -> old)
   if null ident && Ext_auto_identifiers `Set.member` exts
      then do
@@ -990,7 +998,7 @@ registerHeader (ident,classes,kvs) header' = do
         return (ident,classes,kvs)
 
 -- | Fail unless we're in "smart typography" mode.
-failUnlessSmart :: Parser [tok] ParserState ()
+failUnlessSmart :: HasReaderOptions st => Parser s st ()
 failUnlessSmart = getOption readerSmart >>= guard
 
 smartPunctuation :: Parser [Char] ParserState Inline
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 7ac2f33ba..7bad4d346 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -82,16 +82,16 @@ data MWState = MWState { mwOptions         :: ReaderOptions
 
 type MWParser = Parser [Char] MWState
 
-instance HasReaderOptions MWParser where
-  askReaderOption f = (f . mwOptions) `fmap` getState
+instance HasReaderOptions MWState where
+  extractReaderOptions = mwOptions
 
-instance HasHeaderMap MWParser where
-  getHeaderMap      = fmap mwHeaderMap getState
-  putHeaderMap hm   = updateState $ \st -> st{ mwHeaderMap = hm }
+instance HasHeaderMap MWState where
+  extractHeaderMap     = mwHeaderMap
+  updateHeaderMap x st = st{ mwHeaderMap = x }
 
-instance HasIdentifierList MWParser where
-  getIdentifierList   = fmap mwIdentifierList getState
-  putIdentifierList l = updateState $ \st -> st{ mwIdentifierList = l }
+instance HasIdentifierList MWState where
+  extractIdentifierList     = mwIdentifierList
+  updateIdentifierList x st = st{ mwIdentifierList = x }
 
 --
 -- auxiliary functions
-- 
cgit v1.2.3


From 6ec3ee3a67c30d2e83f638035521896cbbd70f1e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 25 Mar 2014 13:51:55 -0700
Subject: Whitespace change, and note:

Contrary to the previous commit message, there was no API
change, since Text.Pandoc.Parsing is not an exposed module.
---
 src/Text/Pandoc/Parsing.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index c12e967dc..bc0c5bdf8 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -909,6 +909,7 @@ instance HasIdentifierList ParserState where
   extractIdentifierList     = stateIdentifiers
   updateIdentifierList x st = st{ stateIdentifiers = x }
 
+
 defaultParserState :: ParserState
 defaultParserState =
     ParserState { stateOptions         = def,
-- 
cgit v1.2.3


From 6992050161f1bbe8d18d7d78beb3b38a4b69a23e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 25 Mar 2014 14:55:18 -0700
Subject: Parsing:  Added HasMacros, simplified other typeclasses.

Removed updateHeaderMap, setHeaderMap, getHeaderMap,
updateIdentifierList, setIdentifierList, getIdentifierList.
---
 src/Text/Pandoc/Parsing.hs           | 50 ++++++++++++++++--------------------
 src/Text/Pandoc/Readers/MediaWiki.hs |  4 +--
 2 files changed, 24 insertions(+), 30 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index bc0c5bdf8..d8c7e71d5 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -70,6 +70,7 @@ module Text.Pandoc.Parsing ( (>>~),
                              HasReaderOptions (..),
                              HasHeaderMap (..),
                              HasIdentifierList (..),
+                             HasMacros (..),
                              defaultParserState,
                              HeaderType (..),
                              ParserContext (..),
@@ -880,35 +881,28 @@ instance HasReaderOptions ParserState where
 
 class HasHeaderMap st where
   extractHeaderMap  :: st -> M.Map Inlines String
-  updateHeaderMap   :: M.Map Inlines String -> st -> st
-  getHeaderMap      :: Parser s st (M.Map Inlines String)
-  putHeaderMap      :: M.Map Inlines String -> Parser s st ()
-  modifyHeaderMap   :: (M.Map Inlines String -> M.Map Inlines String)
-                       -> Parser s st ()
-  -- default
-  getHeaderMap      = extractHeaderMap `fmap` getState
-  putHeaderMap x    = updateState (updateHeaderMap x)
-  modifyHeaderMap f = getHeaderMap >>= putHeaderMap . f
+  updateHeaderMap   :: (M.Map Inlines String -> M.Map Inlines String) ->
+                       st -> st
 
 instance HasHeaderMap ParserState where
   extractHeaderMap     = stateHeaders
-  updateHeaderMap x st = st{ stateHeaders = x }
+  updateHeaderMap f st = st{ stateHeaders = f $ stateHeaders st }
 
 class HasIdentifierList st where
   extractIdentifierList  :: st -> [String]
-  updateIdentifierList   :: [String] -> st -> st
-  getIdentifierList      :: Parser s st ([String])
-  putIdentifierList      :: [String] -> Parser s st ()
-  modifyIdentifierList   :: ([String] -> [String]) -> Parser s st ()
-  -- default
-  getIdentifierList      = extractIdentifierList `fmap` getState
-  putIdentifierList x    = updateState (updateIdentifierList x)
-  modifyIdentifierList f = getIdentifierList >>= putIdentifierList . f
+  updateIdentifierList   :: ([String] -> [String]) -> st -> st
 
 instance HasIdentifierList ParserState where
   extractIdentifierList     = stateIdentifiers
-  updateIdentifierList x st = st{ stateIdentifiers = x }
+  updateIdentifierList f st = st{ stateIdentifiers = f $ stateIdentifiers st }
+
+class HasMacros st where
+  extractMacros         :: st -> [Macro]
+  updateMacros          :: ([Macro] -> [Macro]) -> st -> st
 
+instance HasMacros ParserState where
+  extractMacros        = stateMacros
+  updateMacros f st    = st{ stateMacros = f $ stateMacros st }
 
 defaultParserState :: ParserState
 defaultParserState =
@@ -980,7 +974,7 @@ type SubstTable = M.Map Key Inlines
 registerHeader :: (HasReaderOptions st, HasHeaderMap st, HasIdentifierList st)
                => Attr -> Inlines -> Parser s st Attr
 registerHeader (ident,classes,kvs) header' = do
-  ids <- getIdentifierList
+  ids <- extractIdentifierList `fmap` getState
   exts <- getOption readerExtensions
   let insert' = M.insertWith (\_new old -> old)
   if null ident && Ext_auto_identifiers `Set.member` exts
@@ -989,13 +983,13 @@ registerHeader (ident,classes,kvs) header' = do
        let id'' = if Ext_ascii_identifiers `Set.member` exts
                      then catMaybes $ map toAsciiChar id'
                      else id'
-       putIdentifierList $ if id' == id''
-                              then id' : ids
-                              else id' : id'' : ids
-       modifyHeaderMap $ insert' header' id'
+       updateState $ updateIdentifierList $
+         if id' == id'' then (id' :) else ([id', id''] ++)
+       updateState $ updateHeaderMap $ insert' header' id'
        return (id'',classes,kvs)
      else do
-        unless (null ident) $ modifyHeaderMap $ insert' header' ident
+        unless (null ident) $
+          updateState $ updateHeaderMap $ insert' header' ident
         return (ident,classes,kvs)
 
 -- | Fail unless we're in "smart typography" mode.
@@ -1140,7 +1134,7 @@ nested p = do
 --
 
 -- | Parse a \newcommand or \renewcommand macro definition.
-macro :: Parser [Char] ParserState Blocks
+macro :: (HasMacros st, HasReaderOptions st) => Parser [Char] st Blocks
 macro = do
   apply <- getOption readerApplyMacros
   inp <- getInput
@@ -1150,7 +1144,7 @@ macro = do
                         if apply
                            then do
                              updateState $ \st ->
-                               st { stateMacros = ms ++ stateMacros st }
+                               updateMacros (ms ++) st
                              return mempty
                            else return $ rawBlock "latex" def'
 
@@ -1159,7 +1153,7 @@ applyMacros' :: String -> Parser [Char] ParserState String
 applyMacros' target = do
   apply <- getOption readerApplyMacros
   if apply
-     then do macros <- liftM stateMacros getState
+     then do macros <- extractMacros `fmap` getState
              return $ applyMacros macros target
      else return target
 
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 7bad4d346..f70b44aad 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -87,11 +87,11 @@ instance HasReaderOptions MWState where
 
 instance HasHeaderMap MWState where
   extractHeaderMap     = mwHeaderMap
-  updateHeaderMap x st = st{ mwHeaderMap = x }
+  updateHeaderMap f st = st{ mwHeaderMap = f $ mwHeaderMap st }
 
 instance HasIdentifierList MWState where
   extractIdentifierList     = mwIdentifierList
-  updateIdentifierList x st = st{ mwIdentifierList = x }
+  updateIdentifierList f st = st{ mwIdentifierList = f $ mwIdentifierList st }
 
 --
 -- auxiliary functions
-- 
cgit v1.2.3


From 82ddec698e782fef83dcd1b1fba79cd3b698c717 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 25 Mar 2014 15:38:30 -0700
Subject: LaTeX reader:  Added LPState.

Plan is to use this instead of ParserState in LP.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 8c3dac263..dd94c0ac1 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -43,6 +43,7 @@ import Text.Pandoc.Parsing hiding ((<|>), many, optional, space,
 import qualified Text.Pandoc.UTF8 as UTF8
 import Data.Char ( chr, ord )
 import Control.Monad
+import Data.Default
 import Text.Pandoc.Builder
 import Data.Char (isLetter, isAlphaNum)
 import Control.Applicative
@@ -71,6 +72,23 @@ parseLaTeX = do
   let (Pandoc _ bs') = doc bs
   return $ Pandoc meta bs'
 
+data LPState = LPState{
+    lpMeta           :: Meta
+  , lpOptions        :: ReaderOptions
+  , lpHasChapters    :: Bool
+  , lpParserContext  :: ParserContext
+  , lpQuoteContext   :: QuoteContext
+  } deriving Show
+
+instance Default LPState where
+  def = LPState{
+      lpMeta          = nullMeta
+    , lpOptions       = def
+    , lpHasChapters   = False
+    , lpParserContext = NullState
+    , lpQuoteContext  = NoQuote
+    }
+
 type LP = Parser [Char] ParserState
 
 anyControlSeq :: LP String
-- 
cgit v1.2.3


From 994597f071cddbf9631e0d27bc8441afd2b1ebeb Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 25 Mar 2014 22:40:18 -0700
Subject: Revert "LaTeX reader:  Added LPState."

This reverts commit 82ddec698e782fef83dcd1b1fba79cd3b698c717.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index dd94c0ac1..8c3dac263 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -43,7 +43,6 @@ import Text.Pandoc.Parsing hiding ((<|>), many, optional, space,
 import qualified Text.Pandoc.UTF8 as UTF8
 import Data.Char ( chr, ord )
 import Control.Monad
-import Data.Default
 import Text.Pandoc.Builder
 import Data.Char (isLetter, isAlphaNum)
 import Control.Applicative
@@ -72,23 +71,6 @@ parseLaTeX = do
   let (Pandoc _ bs') = doc bs
   return $ Pandoc meta bs'
 
-data LPState = LPState{
-    lpMeta           :: Meta
-  , lpOptions        :: ReaderOptions
-  , lpHasChapters    :: Bool
-  , lpParserContext  :: ParserContext
-  , lpQuoteContext   :: QuoteContext
-  } deriving Show
-
-instance Default LPState where
-  def = LPState{
-      lpMeta          = nullMeta
-    , lpOptions       = def
-    , lpHasChapters   = False
-    , lpParserContext = NullState
-    , lpQuoteContext  = NoQuote
-    }
-
 type LP = Parser [Char] ParserState
 
 anyControlSeq :: LP String
-- 
cgit v1.2.3


From 0934c4430a91551f8d293fa7eb6a2c3e745d3a48 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 25 Mar 2014 22:44:16 -0700
Subject: Parsing:  Added stateCaption.

This is primarily for use in the LaTeX reader, so far.
---
 src/Text/Pandoc/Parsing.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index d8c7e71d5..68d4605ee 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -857,7 +857,7 @@ data ParserState = ParserState
       -- Triple represents: 1) Base role, 2) Optional format (only for :raw:
       -- roles), 3) Source language annotation for code (could be used to
       -- annotate role classes too).
-
+      stateCaption         :: Maybe Inlines, -- ^ Caption in current environment
       stateWarnings        :: [String]       -- ^ Warnings generated by the parser
     }
 
@@ -927,6 +927,7 @@ defaultParserState =
                   stateMacros          = [],
                   stateRstDefaultRole  = "title-reference",
                   stateRstCustomRoles  = M.empty,
+                  stateCaption         = Nothing,
                   stateWarnings        = []}
 
 -- | Succeed only if the extension is enabled.
-- 
cgit v1.2.3


From 69a7c9f634b1e9133f7c6e4e135404bdf8caa3ab Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 25 Mar 2014 23:10:43 -0700
Subject: LaTeX reader:  Better handling of figure and table with caption.

We now look for a \caption inside the environment; if one is
found, it is attached to the graphic or tabular found there.

Closes #1204.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 45 ++++++++++++++++++++++++++++++----------
 1 file changed, 34 insertions(+), 11 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 8c3dac263..f23a5b35e 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -302,6 +302,7 @@ blockCommands = M.fromList $
   , ("item", skipopts *> loose_item)
   , ("documentclass", skipopts *> braced *> preamble)
   , ("centerline", (para . trimInlines) <$> (skipopts *> tok))
+  , ("caption", tok >>= setCaption)
   ] ++ map ignoreBlocks
   -- these commands will be ignored unless --parse-raw is specified,
   -- in which case they will appear as raw latex blocks
@@ -323,6 +324,14 @@ blockCommands = M.fromList $
 addMeta :: ToMetaValue a => String -> a -> LP ()
 addMeta field val = updateState $ setMeta field val
 
+setCaption :: Inlines -> LP Blocks
+setCaption ils = do
+  updateState $ \st -> st{ stateCaption = Just ils }
+  return mempty
+
+resetCaption :: LP ()
+resetCaption = updateState $ \st -> st{ stateCaption = Nothing }
+
 authors :: LP ()
 authors = try $ do
   char '{'
@@ -523,18 +532,12 @@ inlineCommands = M.fromList $
 
 mkImage :: String -> LP Inlines
 mkImage src = do
-   -- try for a caption
-   (alt, tit) <- option (str "image", "") $ try $ do
-                   spaces
-                   controlSeq "caption"
-                   optional (char '*')
-                   ils <- grouped inline
-                   return (ils, "fig:")
+   let alt = str "image"
    case takeExtension src of
         "" -> do
               defaultExt <- getOption readerDefaultImageExtension
-              return $ image (addExtension src defaultExt) tit alt
-        _  -> return $ image src tit alt
+              return $ image (addExtension src defaultExt) "" alt
+        _  -> return $ image src "" alt
 
 inNote :: Inlines -> Inlines
 inNote ils =
@@ -888,13 +891,33 @@ rawLaTeXInline = do
   raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand)
   RawInline "latex" <$> applyMacros' raw
 
+addImageCaption :: Blocks -> LP Blocks
+addImageCaption = walkM go
+  where go (Image alt (src,tit)) = do
+          mbcapt <- stateCaption <$> getState
+          case mbcapt of
+               Just ils -> return (Image (toList ils) (src, "fig:"))
+               Nothing  -> return (Image alt (src,tit))
+        go x = return x
+
+addTableCaption :: Blocks -> LP Blocks
+addTableCaption = walkM go
+  where go (Table c als ws hs rs) = do
+          mbcapt <- stateCaption <$> getState
+          case mbcapt of
+               Just ils -> return (Table (toList ils) als ws hs rs)
+               Nothing  -> return (Table c als ws hs rs)
+        go x = return x
+
 environments :: M.Map String (LP Blocks)
 environments = M.fromList
   [ ("document", env "document" blocks <* skipMany anyChar)
   , ("letter", env "letter" letter_contents)
-  , ("figure", env "figure" $ skipopts *> blocks)
+  , ("figure", env "figure" $
+         resetCaption *> skipopts *> blocks >>= addImageCaption)
   , ("center", env "center" blocks)
-  , ("table",  env "table" $ skipopts *> blocks)
+  , ("table",  env "table" $
+         resetCaption *> skipopts *> blocks >>= addTableCaption)
   , ("tabular", env "tabular" simpTable)
   , ("quote", blockQuote <$> env "quote" blocks)
   , ("quotation", blockQuote <$> env "quotation" blocks)
-- 
cgit v1.2.3


From 361167deff57e0e2d3508c598785a7de7b887fb7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 30 Mar 2014 15:48:40 -0700
Subject: Markdown writer:  Use longer backtick fences if needed.

If the content contains a backtick fence and there are
attributes, make sure longer fences are used to delimit the code.

Note:  This works well in pandoc, but github markdown is more
limited, and will interpret the first string of three or more
backticks as ending the code block.

Closes #1206.
---
 src/Text/Pandoc/Writers/Markdown.hs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 278e5cc9d..e8f976da1 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -392,7 +392,11 @@ blockToMarkdown opts (CodeBlock attribs str) = return $
                                xs -> case maximum $ map length xs of
                                           n | n < 3 -> "~~~~"
                                             | otherwise -> replicate (n+1) '~'
-         backticks = text "```"
+         backticks = text $ case [ln | ln <- lines str, all (=='`') ln] of
+                               [] -> "```"
+                               xs -> case maximum $ map length xs of
+                                          n | n < 3 -> "```"
+                                            | otherwise -> replicate (n+1) '`'
          attrs  = if isEnabled Ext_fenced_code_attributes opts
                      then nowrap $ " " <> attrsToMarkdown attribs
                      else case attribs of
-- 
cgit v1.2.3


From 99f4f636df36d84a4bc58f67337af3584595f5c1 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Mon, 31 Mar 2014 11:08:10 -0700
Subject: Make --toc-depth work well with books in latex/pdf output.

Closes #1210.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index b2eff4490..07be6e9af 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -133,7 +133,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do
   authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta
   let context  =  defField "toc" (writerTableOfContents options) $
                   defField "toc-depth" (show (writerTOCDepth options -
-                                              if writerChapters options
+                                              if stBook st
                                                  then 1
                                                  else 0)) $
                   defField "body" main $
@@ -141,7 +141,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do
                   defField "author-meta" (intercalate "; " authorsMeta) $
                   defField "documentclass" (if writerBeamer options
                                                then ("beamer" :: String)
-                                               else if writerChapters options
+                                               else if stBook st
                                                     then "book"
                                                     else "article") $
                   defField "verbatim-in-note" (stVerbInNote st) $
-- 
cgit v1.2.3


From 0ccca94b4cb22e846289c004deaa023245e14981 Mon Sep 17 00:00:00 2001
From: Matthew Pickering <matthewtpickering@gmail.com>
Date: Tue, 25 Mar 2014 18:22:23 +0000
Subject: Bugfix for #1175 and convert textile reader to use builder.

The reader did not correctly parse inline markup. The behavoir is now as follows.

(a) The markup must start at the start of a line, be inside previous
inline markup or be preceeded by whitespace.
(b) The markup can not span across paragraphs (delimited by \n\n)
(c) The markup can not be followed by a alphanumeric character.
(d) Square brackets can be placed around the markup to avoid having
to have white space before it.

In order to make these changes it was either necessary to convert the parser to return a list of inlines or to convert the whole reader to use the builder. The latter approach whilst more work makes a bit more sense as it becomes easy to arbitarily append and prepend elements without changing the type.

Tests are accordingly updated in a later commit to reflect the different normalisation behavoir specified by the builder monoid.
---
 src/Text/Pandoc/Readers/Textile.hs | 301 ++++++++++++++++++++-----------------
 1 file changed, 167 insertions(+), 134 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 93658cdea..ede50c6de 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -50,20 +50,20 @@ TODO : refactor common patterns across readers :
 
 
 module Text.Pandoc.Readers.Textile ( readTextile) where
-
 import Text.Pandoc.Definition
+import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
 import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Shared
 import Text.Pandoc.Options
-import Text.Pandoc.Parsing
+import Text.Pandoc.Parsing 
 import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
 import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
 import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
 import Text.HTML.TagSoup.Match
 import Data.List ( intercalate )
-import Data.Char ( digitToInt, isUpper )
+import Data.Char ( digitToInt, isUpper)
 import Control.Monad ( guard, liftM )
 import Control.Applicative ((<$>), (*>), (<*))
+import Data.Monoid
 
 -- | Parse a Textile text and return a Pandoc document.
 readTextile :: ReaderOptions -- ^ Reader options
@@ -95,7 +95,7 @@ parseTextile = do
   updateState $ \s -> s { stateNotes = reverse reversedNotes }
   -- now parse it for real...
   blocks <- parseBlocks
-  return $ Pandoc nullMeta blocks -- FIXME
+  return $ Pandoc nullMeta (B.toList blocks) -- FIXME
 
 noteMarker :: Parser [Char] ParserState [Char]
 noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')
@@ -115,11 +115,11 @@ noteBlock = try $ do
   return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
 
 -- | Parse document blocks
-parseBlocks :: Parser [Char] ParserState [Block]
-parseBlocks = manyTill block eof
+parseBlocks :: Parser [Char] ParserState Blocks
+parseBlocks = mconcat <$> manyTill block eof
 
 -- | Block parsers list tried in definition order
-blockParsers :: [Parser [Char] ParserState Block]
+blockParsers :: [Parser [Char] ParserState Blocks]
 blockParsers = [ codeBlock
                , header
                , blockQuote
@@ -130,29 +130,32 @@ blockParsers = [ codeBlock
                , rawLaTeXBlock'
                , maybeExplicitBlock "table" table
                , maybeExplicitBlock "p" para
+               , endBlock
                ]
 
+endBlock :: Parser [Char] ParserState Blocks
+endBlock = string "\n\n" >> return mempty
 -- | Any block in the order of definition of blockParsers
-block :: Parser [Char] ParserState Block
+block :: Parser [Char] ParserState Blocks
 block = choice blockParsers <?> "block"
 
-commentBlock :: Parser [Char] ParserState Block
+commentBlock :: Parser [Char] ParserState Blocks
 commentBlock = try $ do
   string "###."
   manyTill anyLine blanklines
-  return Null
+  return mempty
 
-codeBlock :: Parser [Char] ParserState Block
+codeBlock :: Parser [Char] ParserState Blocks
 codeBlock = codeBlockBc <|> codeBlockPre
 
-codeBlockBc :: Parser [Char] ParserState Block
+codeBlockBc :: Parser [Char] ParserState Blocks
 codeBlockBc = try $ do
   string "bc. "
   contents <- manyTill anyLine blanklines
-  return $ CodeBlock ("",[],[]) $ unlines contents
+  return $ B.codeBlock (unlines contents)
 
 -- | Code Blocks in Textile are between <pre> and </pre>
-codeBlockPre :: Parser [Char] ParserState Block
+codeBlockPre :: Parser [Char] ParserState Blocks
 codeBlockPre = try $ do
   (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
   result' <- (innerText . parseTags) `fmap` -- remove internal tags
@@ -169,29 +172,29 @@ codeBlockPre = try $ do
   let classes = words $ fromAttrib "class" t
   let ident = fromAttrib "id" t
   let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
-  return $ CodeBlock (ident,classes,kvs) result'''
+  return $ B.codeBlockWith (ident,classes,kvs) result'''
 
 -- | Header of the form "hN. content" with N in 1..6
-header :: Parser [Char] ParserState Block
+header :: Parser [Char] ParserState Blocks
 header = try $ do
   char 'h'
   level <- digitToInt <$> oneOf "123456"
   attr <- attributes
   char '.'
-  whitespace
-  name <- normalizeSpaces <$> manyTill inline blockBreak
-  attr' <- registerHeader attr (B.fromList name)
-  return $ Header level attr' name
+  lookAhead whitespace
+  name <- trimInlines . mconcat <$> manyTill inline blockBreak
+  attr' <- registerHeader attr name
+  return $ B.headerWith attr' level name
 
 -- | Blockquote of the form "bq. content"
-blockQuote :: Parser [Char] ParserState Block
+blockQuote :: Parser [Char] ParserState Blocks
 blockQuote = try $ do
   string "bq" >> attributes >> char '.' >> whitespace
-  BlockQuote . singleton <$> para
+  B.blockQuote <$> para
 
 -- Horizontal rule
 
-hrule :: Parser [Char] st Block
+hrule :: Parser [Char] st Blocks
 hrule = try $ do
   skipSpaces
   start <- oneOf "-*"
@@ -199,62 +202,62 @@ hrule = try $ do
   skipMany (spaceChar <|> char start)
   newline
   optional blanklines
-  return HorizontalRule
+  return B.horizontalRule
 
 -- Lists handling
 
 -- | Can be a bullet list or an ordered list. This implementation is
 -- strict in the nesting, sublist must start at exactly "parent depth
 -- plus one"
-anyList :: Parser [Char] ParserState Block
+anyList :: Parser [Char] ParserState Blocks
 anyList = try $ anyListAtDepth 1 <* blanklines
 
 -- | This allow one type of list to be nested into an other type,
 -- provided correct nesting
-anyListAtDepth :: Int -> Parser [Char] ParserState Block
+anyListAtDepth :: Int -> Parser [Char] ParserState Blocks
 anyListAtDepth depth = choice [ bulletListAtDepth depth,
                                 orderedListAtDepth depth,
                                 definitionList ]
 
 -- | Bullet List of given depth, depth being the number of leading '*'
-bulletListAtDepth :: Int -> Parser [Char] ParserState Block
-bulletListAtDepth depth = try $ BulletList <$> many1 (bulletListItemAtDepth depth)
+bulletListAtDepth :: Int -> Parser [Char] ParserState Blocks
+bulletListAtDepth depth = try $ B.bulletList  <$> many1 (bulletListItemAtDepth depth)
 
 -- | Bullet List Item of given depth, depth being the number of
 -- leading '*'
-bulletListItemAtDepth :: Int -> Parser [Char] ParserState [Block]
+bulletListItemAtDepth :: Int -> Parser [Char] ParserState Blocks 
 bulletListItemAtDepth = genericListItemAtDepth '*'
 
 -- | Ordered List of given depth, depth being the number of
 -- leading '#'
-orderedListAtDepth :: Int -> Parser [Char] ParserState Block
+orderedListAtDepth :: Int -> Parser [Char] ParserState Blocks
 orderedListAtDepth depth = try $ do
   items <- many1 (orderedListItemAtDepth depth)
-  return (OrderedList (1, DefaultStyle, DefaultDelim) items)
+  return $ B.orderedList items
 
 -- | Ordered List Item of given depth, depth being the number of
 -- leading '#'
-orderedListItemAtDepth :: Int -> Parser [Char] ParserState [Block]
+orderedListItemAtDepth :: Int -> Parser [Char] ParserState Blocks 
 orderedListItemAtDepth = genericListItemAtDepth '#'
 
 -- | Common implementation of list items
-genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState [Block]
+genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState Blocks
 genericListItemAtDepth c depth = try $ do
   count depth (char c) >> attributes >> whitespace
-  p <- many listInline
+  p <- mconcat <$> many listInline
   newline
-  sublist <- option [] (singleton <$> anyListAtDepth (depth + 1))
-  return (Plain p : sublist)
+  sublist <- option mempty (anyListAtDepth (depth + 1))
+  return $ (B.plain p) <> sublist
 
 -- | A definition list is a set of consecutive definition items
-definitionList :: Parser [Char] ParserState Block
-definitionList = try $ DefinitionList <$> many1 definitionListItem
+definitionList :: Parser [Char] ParserState Blocks
+definitionList = try $ B.definitionList <$> many1 definitionListItem
 
 -- | List start character.
 listStart :: Parser [Char] st Char
 listStart = oneOf "*#-"
 
-listInline :: Parser [Char] ParserState Inline
+listInline :: Parser [Char] ParserState Inlines
 listInline = try (notFollowedBy newline >> inline)
          <|> try (endline <* notFollowedBy listStart)
 
@@ -262,16 +265,16 @@ listInline = try (notFollowedBy newline >> inline)
 -- the term defined, then spaces and ":=". The definition follows, on
 -- the same single line, or spaned on multiple line, after a line
 -- break.
-definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]])
+definitionListItem :: Parser [Char] ParserState (Inlines, [Blocks])
 definitionListItem = try $ do
   string "- "
-  term <- many1Till inline (try (whitespace >> string ":="))
+  term <- mconcat <$> many1Till inline (try (whitespace >> string ":="))
   def' <- multilineDef <|> inlineDef
   return (term, def')
-  where inlineDef :: Parser [Char] ParserState [[Block]]
-        inlineDef = liftM (\d -> [[Plain d]])
-                    $ optional whitespace >> many listInline <* newline
-        multilineDef :: Parser [Char] ParserState [[Block]]
+  where inlineDef :: Parser [Char] ParserState [Blocks]
+        inlineDef = liftM (\d -> [B.plain d])
+                    $ optional whitespace >> (trimInlines . mconcat <$> many listInline) <* newline
+        multilineDef :: Parser [Char] ParserState [Blocks] 
         multilineDef = try $ do
           optional whitespace >> newline
           s <- many1Till anyChar (try (string "=:" >> newline))
@@ -288,59 +291,59 @@ blockBreak = try (newline >> blanklines >> return ()) <|>
 -- raw content
 
 -- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock :: Parser [Char] ParserState Block
+rawHtmlBlock :: Parser [Char] ParserState Blocks
 rawHtmlBlock = try $ do
   (_,b) <- htmlTag isBlockTag
   optional blanklines
-  return $ RawBlock (Format "html") b
+  return $ B.rawBlock "html" b
 
 -- | Raw block of LaTeX content
-rawLaTeXBlock' :: Parser [Char] ParserState Block
+rawLaTeXBlock' :: Parser [Char] ParserState Blocks
 rawLaTeXBlock' = do
   guardEnabled Ext_raw_tex
-  RawBlock (Format "latex") <$> (rawLaTeXBlock <* spaces)
+  B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
 
 
 -- | In textile, paragraphs are separated by blank lines.
-para :: Parser [Char] ParserState Block
-para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak
-
+para :: Parser [Char] ParserState Blocks
+para = do
+    a <- manyTill inline blockBreak 
+    return $ (B.para . trimInlines . mconcat) a
 
 -- Tables
 
 -- | A table cell spans until a pipe |
-tableCell :: Parser [Char] ParserState TableCell
+tableCell :: Parser [Char] ParserState Blocks
 tableCell = do
   c <- many1 (noneOf "|\n")
-  content <- parseFromString (many1 inline) c
-  return $ [ Plain $ normalizeSpaces content ]
+  content <- trimInlines . mconcat <$> parseFromString (many1 inline) c
+  return $ B.plain content
 
 -- | A table row is made of many table cells
-tableRow :: Parser [Char] ParserState [TableCell]
+tableRow :: Parser [Char] ParserState [Blocks]
 tableRow = try $ ( char '|' *>
   (endBy1 tableCell (optional blankline *> char '|')) <* newline)
 
 -- | Many table rows
-tableRows :: Parser [Char] ParserState [[TableCell]]
+tableRows :: Parser [Char] ParserState [[Blocks]]
 tableRows = many1 tableRow
 
 -- | Table headers are made of cells separated by a tag "|_."
-tableHeaders :: Parser [Char] ParserState [TableCell]
+tableHeaders :: Parser [Char] ParserState [Blocks]
 tableHeaders = let separator = (try $ string "|_.") in
   try $ ( separator *> (sepBy1 tableCell separator) <* char '|' <* newline )
 
 -- | A table with an optional header. Current implementation can
 -- handle tables with and without header, but will parse cells
 -- alignment attributes as content.
-table :: Parser [Char] ParserState Block
+table :: Parser [Char] ParserState Blocks
 table = try $ do
-  headers <- option [] tableHeaders
+  headers <- option mempty tableHeaders
   rows <- tableRows
   blanklines
   let nbOfCols = max (length headers) (length $ head rows)
-  return $ Table []
-    (replicate nbOfCols AlignDefault)
-    (replicate nbOfCols 0.0)
+  return $ B.table mempty
+    (zip (replicate nbOfCols AlignDefault) (replicate nbOfCols 0.0))
     headers
     rows
 
@@ -348,8 +351,8 @@ table = try $ do
 -- | Blocks like 'p' and 'table' do not need explicit block tag.
 -- However, they can be used to set HTML/CSS attributes when needed.
 maybeExplicitBlock :: String  -- ^ block tag name
-                    -> Parser [Char] ParserState Block -- ^ implicit block
-                    -> Parser [Char] ParserState Block
+                    -> Parser [Char] ParserState Blocks -- ^ implicit block
+                    -> Parser [Char] ParserState Blocks
 maybeExplicitBlock name blk = try $ do
   optional $ try $ string name >> attributes >> char '.' >>
     optional whitespace >> optional endline
@@ -363,12 +366,14 @@ maybeExplicitBlock name blk = try $ do
 
 
 -- | Any inline element
-inline :: Parser [Char] ParserState Inline
-inline = choice inlineParsers <?> "inline"
+inline :: Parser [Char] ParserState Inlines
+inline = do
+    choice inlineParsers <?> "inline"
 
 -- | Inline parsers tried in order
-inlineParsers :: [Parser [Char] ParserState Inline]
-inlineParsers = [ str
+inlineParsers :: [Parser [Char] ParserState Inlines]
+inlineParsers = [ inlineMarkup
+                , str
                 , whitespace
                 , endline
                 , code
@@ -378,58 +383,57 @@ inlineParsers = [ str
                 , rawLaTeXInline'
                 , note
                 , try $ (char '[' *> inlineMarkup <* char ']')
-                , inlineMarkup
                 , link
                 , image
                 , mark
-                , (Str . (:[])) <$> characterReference
+                , (B.str . (:[])) <$> characterReference
                 , smartPunctuation inline
                 , symbol
                 ]
 
 -- | Inline markups
-inlineMarkup :: Parser [Char] ParserState Inline
-inlineMarkup = choice [ simpleInline (string "??") (Cite [])
-                      , simpleInline (string "**") Strong
-                      , simpleInline (string "__") Emph
-                      , simpleInline (char '*') Strong
-                      , simpleInline (char '_') Emph
-                      , simpleInline (char '+') Emph  -- approximates underline
-                      , simpleInline (char '-' <* notFollowedBy (char '-')) Strikeout
-                      , simpleInline (char '^') Superscript
-                      , simpleInline (char '~') Subscript
+inlineMarkup :: Parser [Char] ParserState Inlines
+inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
+                      , simpleInline (string "**") B.strong
+                      , simpleInline (string "__") B.emph
+                      , simpleInline (char '*') B.strong
+                      , simpleInline (char '_') B.emph
+                      , simpleInline (char '+') B.emph  -- approximates underline
+                      , simpleInline (char '-' <* notFollowedBy (char '-')) B.strikeout
+                      , simpleInline (char '^') B.superscript
+                      , simpleInline (char '~') B.subscript
                       ]
 
 -- | Trademark, registered, copyright
-mark :: Parser [Char] st Inline
+mark :: Parser [Char] st Inlines
 mark = try $ char '(' >> (try tm <|> try reg <|> copy)
 
-reg :: Parser [Char] st Inline
+reg :: Parser [Char] st Inlines
 reg = do
   oneOf "Rr"
   char ')'
-  return $ Str "\174"
+  return $ B.str "\174"
 
-tm :: Parser [Char] st Inline
+tm :: Parser [Char] st Inlines
 tm = do
   oneOf "Tt"
   oneOf "Mm"
   char ')'
-  return $ Str "\8482"
+  return $ B.str "\8482"
 
-copy :: Parser [Char] st Inline
+copy :: Parser [Char] st Inlines
 copy = do
   oneOf "Cc"
   char ')'
-  return $ Str "\169"
+  return $ B.str "\169"
 
-note :: Parser [Char] ParserState Inline
+note :: Parser [Char] ParserState Inlines
 note = try $ do
   ref <- (char '[' *> many1 digit <* char ']')
   notes <- stateNotes <$> getState
   case lookup ref notes of
     Nothing   -> fail "note not found"
-    Just raw  -> liftM Note $ parseFromString parseBlocks raw
+    Just raw  -> B.note <$> parseFromString parseBlocks raw
 
 -- | Special chars
 markupChars :: [Char]
@@ -450,7 +454,7 @@ wordBoundaries = markupChars ++ stringBreakers
 hyphenedWords :: Parser [Char] ParserState String
 hyphenedWords = do
   x <- wordChunk
-  xs <- many (try $ char '-' >> wordChunk)
+  xs <-  many (try $ char '-' >> wordChunk)
   return $ intercalate "-" (x:xs)
 
 wordChunk :: Parser [Char] ParserState String
@@ -462,7 +466,7 @@ wordChunk = try $ do
   return $ hd:tl
 
 -- | Any string
-str :: Parser [Char] ParserState Inline
+str :: Parser [Char] ParserState Inlines
 str = do
   baseStr <- hyphenedWords
   -- RedCloth compliance : if parsed word is uppercase and immediatly
@@ -472,89 +476,89 @@ str = do
     acro <- enclosed (char '(') (char ')') anyChar
     return $ concat [baseStr, " (", acro, ")"]
   updateLastStrPos
-  return $ Str fullStr
+  return $ B.str fullStr
 
 -- | Textile allows HTML span infos, we discard them
-htmlSpan :: Parser [Char] ParserState Inline
-htmlSpan = try $ Str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') )
+htmlSpan :: Parser [Char] ParserState Inlines
+htmlSpan = try $ B.str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') )
 
 -- | Some number of space chars
-whitespace :: Parser [Char] ParserState Inline
-whitespace = many1 spaceChar >> return Space <?> "whitespace"
+whitespace :: Parser [Char] ParserState Inlines
+whitespace = many1 spaceChar >> return B.space <?> "whitespace"
 
 -- | In Textile, an isolated endline character is a line break
-endline :: Parser [Char] ParserState Inline
+endline :: Parser [Char] ParserState Inlines
 endline = try $ do
   newline >> notFollowedBy blankline
-  return LineBreak
+  return B.linebreak
 
-rawHtmlInline :: Parser [Char] ParserState Inline
-rawHtmlInline = RawInline (Format "html") . snd <$> htmlTag isInlineTag
+rawHtmlInline :: Parser [Char] ParserState Inlines
+rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
 
 -- | Raw LaTeX Inline
-rawLaTeXInline' :: Parser [Char] ParserState Inline
+rawLaTeXInline' :: Parser [Char] ParserState Inlines
 rawLaTeXInline' = try $ do
   guardEnabled Ext_raw_tex
-  rawLaTeXInline
+  B.singleton <$> rawLaTeXInline
 
 -- | Textile standard link syntax is "label":target. But we
 -- can also have ["label":target].
-link :: Parser [Char] ParserState Inline
+link :: Parser [Char] ParserState Inlines
 link = linkB <|> linkNoB
 
-linkNoB :: Parser [Char] ParserState Inline
+linkNoB :: Parser [Char] ParserState Inlines
 linkNoB = try $ do
-  name <- surrounded (char '"') inline
+  name <- mconcat <$> surrounded (char '"') (withQuoteContext InDoubleQuote inline)
   char ':'
   let stopChars = "!.,;:"
   url <- manyTill nonspaceChar (lookAhead $ space <|> try (oneOf stopChars >> (space <|> newline)))
-  let name' = if name == [Str "$"] then [Str url] else name
-  return $ Link name' (url, "")
+  let name' = if B.toList name == [Str "$"] then B.str url else name
+  return $ B.link url "" name'
 
-linkB :: Parser [Char] ParserState Inline
+linkB :: Parser [Char] ParserState Inlines
 linkB = try $ do
   char '['
-  name <- surrounded (char '"') inline
+  name <- mconcat <$> surrounded (char '"') inline
   char ':'
   url <- manyTill nonspaceChar (char ']')
-  let name' = if name == [Str "$"] then [Str url] else name
-  return $ Link name' (url, "")
+  let name' = if B.toList name == [Str "$"] then B.str url else name
+  return $ B.link url "" name'
 
 -- | image embedding
-image :: Parser [Char] ParserState Inline
+image :: Parser [Char] ParserState Inlines
 image = try $ do
   char '!' >> notFollowedBy space
   src <- manyTill anyChar (lookAhead $ oneOf "!(")
   alt <- option "" (try $ (char '(' >> manyTill anyChar (char ')')))
   char '!'
-  return $ Image [Str alt] (src, alt)
+  return $ B.image src alt (B.str alt)
 
-escapedInline :: Parser [Char] ParserState Inline
+escapedInline :: Parser [Char] ParserState Inlines
 escapedInline = escapedEqs <|> escapedTag
 
-escapedEqs :: Parser [Char] ParserState Inline
-escapedEqs = Str <$> (try $ string "==" *> manyTill anyChar (try $ string "=="))
+escapedEqs :: Parser [Char] ParserState Inlines
+escapedEqs = B.str <$> (try $ string "==" *> manyTill anyChar (try $ string "=="))
 
 -- | literal text escaped btw <notextile> tags
-escapedTag :: Parser [Char] ParserState Inline
-escapedTag = Str <$>
+escapedTag :: Parser [Char] ParserState Inlines
+escapedTag = B.str <$>
   (try $ string "<notextile>" *> manyTill anyChar (try $ string "</notextile>"))
 
 -- | Any special symbol defined in wordBoundaries
-symbol :: Parser [Char] ParserState Inline
-symbol = Str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars)
+symbol :: Parser [Char] ParserState Inlines
+symbol = B.str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars)
 
 -- | Inline code
-code :: Parser [Char] ParserState Inline
+code :: Parser [Char] ParserState Inlines
 code = code1 <|> code2
 
-code1 :: Parser [Char] ParserState Inline
-code1 = Code nullAttr <$> surrounded (char '@') anyChar
+code1 :: Parser [Char] ParserState Inlines
+code1 = B.code <$> surrounded (char '@') anyChar
 
-code2 :: Parser [Char] ParserState Inline
+code2 :: Parser [Char] ParserState Inlines
 code2 = do
   htmlTag (tagOpen (=="tt") null)
-  Code nullAttr <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt"))
+  B.code <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt"))
 
 -- | Html / CSS attributes
 attributes :: Parser [Char] ParserState Attr
@@ -581,7 +585,7 @@ styleAttr = do
 
 langAttr :: Parser [Char] ParserState (Attr -> Attr)
 langAttr = do
-  lang <- try $ enclosed (char '[') (char ']') anyChar
+  lang <- try $ enclosed (char '[') (char ']') alphaNum
   return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals)
 
 -- | Parses material surrounded by a parser.
@@ -590,14 +594,43 @@ surrounded :: Parser [Char] st t   -- ^ surrounding parser
 	    -> Parser [Char] st [a]
 surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
 
--- | Inlines are most of the time of the same form
+
 simpleInline :: Parser [Char] ParserState t           -- ^ surrounding parser
-                -> ([Inline] -> Inline)       -- ^ Inline constructor
-                -> Parser [Char] ParserState Inline   -- ^ content parser (to be used repeatedly)
-simpleInline border construct = surrounded border inlineWithAttribute >>=
-                                return . construct . normalizeSpaces
-  where inlineWithAttribute = (try $ optional attributes) >> inline
+                -> (Inlines -> Inlines)       -- ^ Inline constructor
+                -> Parser [Char] ParserState Inlines   -- ^ content parser (to be used repeatedly)
+simpleInline border construct = groupedSimpleInline border construct <|> ungroupedSimpleInline border construct 
+
+ungroupedSimpleInline :: Parser [Char] ParserState t           -- ^ surrounding parser
+                -> (Inlines -> Inlines)       -- ^ Inline constructor
+                -> Parser [Char] ParserState Inlines   -- ^ content parser (to be used repeatedly)
+ungroupedSimpleInline border construct = try $ do
+  st <- getState
+  pos <- getPosition
+  isWhitespace <- option False (whitespace >> return True)
+  guard $ (stateQuoteContext st /= NoQuote) 
+       || (sourceColumn pos == 1) 
+       || isWhitespace
+  body <- surrounded border inlineWithAttribute
+  lookAhead (notFollowedBy alphaNum)
+  let result = construct $ mconcat body 
+  return $ if isWhitespace then B.space <> result 
+                           else result
+  where 
+    inlineWithAttribute = (try $ optional attributes) >> notFollowedBy (string "\n\n") 
+        >> (withQuoteContext InSingleQuote inline)
+
+
+groupedSimpleInline :: Parser [Char] ParserState t 
+                  -> (Inlines -> Inlines)
+                  -> Parser [Char] ParserState Inlines 
+groupedSimpleInline border construct = try $ do
+    char '['  
+    withQuoteContext InSingleQuote (simpleInline border construct) >>~ char ']'
+
+    
+
 
 -- | Create a singleton list
 singleton :: a -> [a]
 singleton x = [x]
+
-- 
cgit v1.2.3


From 9b5d474e79c0b508ac0da9943b9bb385671aad85 Mon Sep 17 00:00:00 2001
From: Matthew Pickering <matthewtpickering@gmail.com>
Date: Thu, 27 Mar 2014 19:53:32 +0000
Subject: Converted HTML reader to use builder. Fixes #1162.

---
 src/Text/Pandoc/Readers/HTML.hs | 235 +++++++++++++++++++++-------------------
 1 file changed, 126 insertions(+), 109 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index d1e4d0024..4fab251bb 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -40,6 +40,7 @@ import Text.HTML.TagSoup
 import Text.HTML.TagSoup.Match
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
 import Text.Pandoc.Shared
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
@@ -48,6 +49,8 @@ import Data.List ( intercalate )
 import Data.Char ( isDigit )
 import Control.Monad ( liftM, guard, when, mzero )
 import Control.Applicative ( (<$>), (<$), (<*) )
+import Data.Monoid
+import Data.Sequence (ViewL(..), ViewR(..), viewr, viewl)
 
 isSpace :: Char -> Bool
 isSpace ' '  = True
@@ -66,30 +69,30 @@ readHtml opts inp =
     where tags = canonicalizeTags $
                    parseTagsOptions parseOptions{ optTagPosition = True } inp
           parseDoc = do
-             blocks <- (fixPlains False . concat) <$> manyTill block eof
+             blocks <- (fixPlains False) . mconcat <$> manyTill block eof
              meta <- stateMeta <$> getState
-             return $ Pandoc meta blocks
+             return $ Pandoc meta (B.toList blocks)
 
 type TagParser = Parser [Tag String] ParserState
 
-pBody :: TagParser [Block]
+pBody :: TagParser Blocks
 pBody = pInTags "body" block
 
-pHead :: TagParser [Block]
-pHead = pInTags "head" $ pTitle <|> pMetaTag <|> ([] <$ pAnyTag)
-  where pTitle = pInTags "title" inline >>= setTitle . normalizeSpaces
-        setTitle t = [] <$ (updateState $ B.setMeta "title" (B.fromList t))
+pHead :: TagParser Blocks
+pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag)
+  where pTitle = pInTags "title" inline >>= setTitle . trimInlines
+        setTitle t = mempty <$ (updateState $ B.setMeta "title" t)
         pMetaTag = do
           mt <- pSatisfy (~== TagOpen "meta" [])
           let name = fromAttrib "name" mt
           if null name
-             then return []
+             then return mempty
              else do
                let content = fromAttrib "content" mt
                updateState $ B.setMeta name (B.text content)
-               return []
+               return mempty
 
-block :: TagParser [Block]
+block :: TagParser Blocks
 block = choice
             [ pPara
             , pHeader
@@ -105,10 +108,10 @@ block = choice
             , pRawHtmlBlock
             ]
 
-pList :: TagParser [Block]
+pList :: TagParser Blocks
 pList = pBulletList <|> pOrderedList <|> pDefinitionList
 
-pBulletList :: TagParser [Block]
+pBulletList :: TagParser Blocks
 pBulletList = try $ do
   pSatisfy (~== TagOpen "ul" [])
   let nonItem = pSatisfy (\t ->
@@ -118,9 +121,9 @@ pBulletList = try $ do
   -- treat it as a list item, though it's not valid xhtml...
   skipMany nonItem
   items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ul")
-  return [BulletList $ map (fixPlains True) items]
+  return $ B.bulletList $ map (fixPlains True) items
 
-pOrderedList :: TagParser [Block]
+pOrderedList :: TagParser Blocks
 pOrderedList = try $ do
   TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
   let (start, style) = (sta', sty')
@@ -146,27 +149,27 @@ pOrderedList = try $ do
   -- treat it as a list item, though it's not valid xhtml...
   skipMany nonItem
   items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ol")
-  return [OrderedList (start, style, DefaultDelim) $ map (fixPlains True) items]
+  return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items
 
-pDefinitionList :: TagParser [Block]
+pDefinitionList :: TagParser Blocks
 pDefinitionList = try $ do
   pSatisfy (~== TagOpen "dl" [])
   items <- manyTill pDefListItem (pCloses "dl")
-  return [DefinitionList items]
+  return $ B.definitionList items
 
-pDefListItem :: TagParser ([Inline],[[Block]])
+pDefListItem :: TagParser (Inlines, [Blocks])
 pDefListItem = try $ do
   let nonItem = pSatisfy (\t -> not (t ~== TagOpen "dt" []) &&
                   not (t ~== TagOpen "dd" []) && not (t ~== TagClose "dl"))
   terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline)
   defs  <- many1 (try $ skipMany nonItem >> pInTags "dd" block)
   skipMany nonItem
-  let term = intercalate [LineBreak] terms
+  let term = foldl1 (\x y ->  x <> B.linebreak <> y) terms
   return (term, map (fixPlains True) defs)
 
-fixPlains :: Bool -> [Block] -> [Block]
-fixPlains inList bs = if any isParaish bs
-                         then map plainToPara bs
+fixPlains :: Bool -> Blocks -> Blocks
+fixPlains inList bs = if any isParaish bs'
+                         then B.fromList $ map plainToPara bs'
                          else bs
   where isParaish (Para _) = True
         isParaish (CodeBlock _ _) = True
@@ -178,6 +181,7 @@ fixPlains inList bs = if any isParaish bs
         isParaish _        = False
         plainToPara (Plain xs) = Para xs
         plainToPara x = x
+        bs' = B.toList bs
 
 pRawTag :: TagParser String
 pRawTag = do
@@ -187,20 +191,20 @@ pRawTag = do
      then return []
      else return $ renderTags' [tag]
 
-pDiv :: TagParser [Block]
+pDiv :: TagParser Blocks
 pDiv = try $ do
   getOption readerParseRaw >>= guard
   TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True)
   contents <- pInTags "div" block
-  return [Div (mkAttr attr) contents]
+  return $ B.divWith (mkAttr attr) contents
 
-pRawHtmlBlock :: TagParser [Block]
+pRawHtmlBlock :: TagParser Blocks
 pRawHtmlBlock = do
   raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag
   parseRaw <- getOption readerParseRaw
   if parseRaw && not (null raw)
-     then return [RawBlock (Format "html") raw]
-     else return []
+     then return $ B.rawBlock "html" raw
+     else return mempty
 
 pHtmlBlock :: String -> TagParser String
 pHtmlBlock t = try $ do
@@ -208,35 +212,34 @@ pHtmlBlock t = try $ do
   contents <- manyTill pAnyTag (pSatisfy (~== TagClose t))
   return $ renderTags' $ [open] ++ contents ++ [TagClose t]
 
-pHeader :: TagParser [Block]
+pHeader :: TagParser Blocks
 pHeader = try $ do
   TagOpen tagtype attr <- pSatisfy $
                            tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"])
                            (const True)
   let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")]
   let level = read (drop 1 tagtype)
-  contents <- liftM concat $ manyTill inline (pCloses tagtype <|> eof)
+  contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof)
   let ident = fromMaybe "" $ lookup "id" attr
   let classes = maybe [] words $ lookup "class" attr
   let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
   return $ if bodyTitle
-              then []  -- skip a representation of the title in the body
-              else [Header level (ident, classes, keyvals) $
-                    normalizeSpaces contents]
+              then mempty  -- skip a representation of the title in the body
+              else B.headerWith (ident, classes, keyvals) level contents
 
-pHrule :: TagParser [Block]
+pHrule :: TagParser Blocks
 pHrule = do
   pSelfClosing (=="hr") (const True)
-  return [HorizontalRule]
+  return B.horizontalRule
 
-pTable :: TagParser [Block]
+pTable :: TagParser Blocks
 pTable = try $ do
   TagOpen _ _ <- pSatisfy (~== TagOpen "table" [])
   skipMany pBlank
-  caption <- option [] $ pInTags "caption" inline >>~ skipMany pBlank
+  caption <- option mempty $ pInTags "caption" inline >>~ skipMany pBlank
   -- TODO actually read these and take width information from them
   widths' <- pColgroup <|> many pCol
-  head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
+  head' <- option mempty $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
   skipMany pBlank
   rows <- pOptInTag "tbody"
           $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td")
@@ -245,19 +248,21 @@ pTable = try $ do
   let isSinglePlain []        = True
       isSinglePlain [Plain _] = True
       isSinglePlain _         = False
-  let isSimple = all isSinglePlain $ concat (head':rows)
-  let cols = length $ if null head'
-                         then head rows
-                         else head'
+  let lHead = B.toList head'
+  let lRows = map B.toList rows
+  let isSimple = all isSinglePlain (lHead:lRows)
+  let cols = length $ if null lHead
+                         then head lRows
+                         else lHead
   -- fail if there are colspans or rowspans
-  guard $ all (\r -> length r == cols) rows
+  guard $ all (\r -> length r == cols) lRows
   let aligns = replicate cols AlignLeft
   let widths = if null widths'
                   then if isSimple
                        then replicate cols 0
                        else replicate cols (1.0 / fromIntegral cols)
                   else widths'
-  return [Table caption aligns widths head' rows]
+  return $ B.table caption (zip aligns widths) [head'] [rows]
 
 pCol :: TagParser Double
 pCol = try $ do
@@ -275,31 +280,31 @@ pColgroup = try $ do
   skipMany pBlank
   manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank
 
-pCell :: String -> TagParser [TableCell]
+pCell :: String -> TagParser Blocks
 pCell celltype = try $ do
   skipMany pBlank
   res <- pInTags celltype block
   skipMany pBlank
-  return [res]
+  return res
 
-pBlockQuote :: TagParser [Block]
+pBlockQuote :: TagParser Blocks
 pBlockQuote = do
   contents <- pInTags "blockquote" block
-  return [BlockQuote $ fixPlains False contents]
+  return $ B.blockQuote $ fixPlains False contents
 
-pPlain :: TagParser [Block]
+pPlain :: TagParser Blocks
 pPlain = do
-  contents <- liftM (normalizeSpaces . concat) $ many1 inline
-  if null contents
-     then return []
-     else return [Plain contents]
+  contents <- trimInlines . mconcat <$> many1 inline
+  if B.isNull contents
+     then return mempty
+     else return $ B.plain contents
 
-pPara :: TagParser [Block]
+pPara :: TagParser Blocks
 pPara = do
-  contents <- pInTags "p" inline
-  return [Para $ normalizeSpaces contents]
+  contents <- trimInlines <$> pInTags "p" inline
+  return $ B.para contents
 
-pCodeBlock :: TagParser [Block]
+pCodeBlock :: TagParser Blocks
 pCodeBlock = try $ do
   TagOpen _ attr <- pSatisfy (~== TagOpen "pre" [])
   contents <- manyTill pAnyTag (pCloses "pre" <|> eof)
@@ -312,9 +317,9 @@ pCodeBlock = try $ do
   let result = case reverse result' of
                     '\n':_   -> init result'
                     _        -> result'
-  return [CodeBlock (mkAttr attr) result]
+  return $ B.codeBlockWith (mkAttr attr) result
 
-inline :: TagParser [Inline]
+inline :: TagParser Inlines
 inline = choice
            [ pTagText
            , pQ
@@ -354,7 +359,7 @@ pSelfClosing f g = do
   optional $ pSatisfy (tagClose f)
   return open
 
-pQ :: TagParser [Inline]
+pQ :: TagParser Inlines
 pQ = do
   quoteContext <- stateQuoteContext `fmap` getState
   let quoteType = case quoteContext of
@@ -363,82 +368,93 @@ pQ = do
   let innerQuoteContext = if quoteType == SingleQuote
                              then InSingleQuote
                              else InDoubleQuote
-  withQuoteContext innerQuoteContext $ pInlinesInTags "q" (Quoted quoteType)
+  let constructor = case quoteType of
+                            SingleQuote -> B.singleQuoted 
+                            DoubleQuote -> B.doubleQuoted
+  withQuoteContext innerQuoteContext $ 
+    pInlinesInTags "q" constructor
 
-pEmph :: TagParser [Inline]
-pEmph = pInlinesInTags "em" Emph <|> pInlinesInTags "i" Emph
+pEmph :: TagParser Inlines
+pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph
 
-pStrong :: TagParser [Inline]
-pStrong = pInlinesInTags "strong" Strong <|> pInlinesInTags "b" Strong
+pStrong :: TagParser Inlines
+pStrong = pInlinesInTags "strong" B.strong <|> pInlinesInTags "b" B.strong
 
-pSuperscript :: TagParser [Inline]
-pSuperscript = pInlinesInTags "sup" Superscript
+pSuperscript :: TagParser Inlines
+pSuperscript = pInlinesInTags "sup" B.superscript
 
-pSubscript :: TagParser [Inline]
-pSubscript = pInlinesInTags "sub" Subscript
+pSubscript :: TagParser Inlines
+pSubscript = pInlinesInTags "sub" B.subscript
 
-pStrikeout :: TagParser [Inline]
+pStrikeout :: TagParser Inlines
 pStrikeout = do
-  pInlinesInTags "s" Strikeout <|>
-    pInlinesInTags "strike" Strikeout <|>
-    pInlinesInTags "del" Strikeout <|>
+  pInlinesInTags "s" B.strikeout <|>
+    pInlinesInTags "strike" B.strikeout <|>
+    pInlinesInTags "del" B.strikeout <|>
     try (do pSatisfy (~== TagOpen "span" [("class","strikeout")])
-            contents <- liftM concat $ manyTill inline (pCloses "span")
-            return [Strikeout contents])
+            contents <- mconcat <$> manyTill inline (pCloses "span")
+            return $ B.strikeout contents)
 
-pLineBreak :: TagParser [Inline]
+pLineBreak :: TagParser Inlines
 pLineBreak = do
   pSelfClosing (=="br") (const True)
-  return [LineBreak]
+  return B.linebreak
 
-pLink :: TagParser [Inline]
+pLink :: TagParser Inlines
 pLink = try $ do
   tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href"))
   let url = fromAttrib "href" tag
   let title = fromAttrib "title" tag
-  lab <- liftM concat $ manyTill inline (pCloses "a")
-  return [Link (normalizeSpaces lab) (escapeURI url, title)]
+  lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a")
+  return $ B.link (escapeURI url) title lab 
 
-pImage :: TagParser [Inline]
+pImage :: TagParser Inlines
 pImage = do
   tag <- pSelfClosing (=="img") (isJust . lookup "src")
   let url = fromAttrib "src" tag
   let title = fromAttrib "title" tag
   let alt = fromAttrib "alt" tag
-  return [Image (B.toList $ B.text alt) (escapeURI url, title)]
+  return $ B.image (escapeURI url) title (B.text alt)
 
-pCode :: TagParser [Inline]
+pCode :: TagParser Inlines
 pCode = try $ do
   (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True)
   result <- manyTill pAnyTag (pCloses open)
-  return [Code (mkAttr attr) $ intercalate " " $ lines $ innerText result]
+  return $ B.codeWith (mkAttr attr) $ intercalate " " $ lines $ innerText result
 
-pSpan :: TagParser [Inline]
+pSpan :: TagParser Inlines
 pSpan = try $ do
   getOption readerParseRaw >>= guard
   TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
   contents <- pInTags "span" inline
-  return [Span (mkAttr attr) contents]
+  return $ B.spanWith (mkAttr attr) contents
 
-pRawHtmlInline :: TagParser [Inline]
+pRawHtmlInline :: TagParser Inlines
 pRawHtmlInline = do
   result <- pSatisfy (tagComment (const True)) <|> pSatisfy isInlineTag
   parseRaw <- getOption readerParseRaw
   if parseRaw
-     then return [RawInline (Format "html") $ renderTags' [result]]
-     else return []
+     then return $ B.rawInline "html" $ renderTags' [result]
+     else return mempty
 
-pInlinesInTags :: String -> ([Inline] -> Inline)
-               -> TagParser [Inline]
+pInlinesInTags :: String -> (Inlines -> Inlines)
+               -> TagParser Inlines
 pInlinesInTags tagtype f = do
-  contents <- pInTags tagtype inline
-  return [f $ normalizeSpaces contents]
-
-pInTags :: String -> TagParser [a]
-        -> TagParser [a]
+  contents <- B.unMany <$> pInTags tagtype inline
+  let left  = case viewl contents of
+                    EmptyL -> mempty
+                    (a :< _) -> padSpace a
+  let right = case viewr contents of
+                    EmptyR   -> mempty
+                    (_ :> a) -> padSpace a
+  return (left <> f (trimInlines . B.Many $ contents) <> right)
+  where padSpace a = if a == Space then B.space else mempty
+
+pInTags :: (Monoid a) => String -> TagParser a
+        -> TagParser a
 pInTags tagtype parser = try $ do
   pSatisfy (~== TagOpen tagtype [])
-  liftM concat $ manyTill parser (pCloses tagtype <|> eof)
+  mconcat <$> manyTill parser (pCloses tagtype <|> eof)
 
 pOptInTag :: String -> TagParser a
           -> TagParser a
@@ -461,36 +477,36 @@ pCloses tagtype = try $ do
        (TagClose "dl") | tagtype == "li" -> return ()
        _ -> mzero
 
-pTagText :: TagParser [Inline]
+pTagText :: TagParser Inlines
 pTagText = try $ do
   (TagText str) <- pSatisfy isTagText
   st <- getState
   case runParser (many pTagContents) st "text" str of
        Left _        -> fail $ "Could not parse `" ++ str ++ "'"
-       Right result  -> return result
+       Right result  -> return $ mconcat result
 
 pBlank :: TagParser ()
 pBlank = try $ do
   (TagText str) <- pSatisfy isTagText
   guard $ all isSpace str
 
-pTagContents :: Parser [Char] ParserState Inline
+pTagContents :: Parser [Char] ParserState Inlines
 pTagContents =
-      Math DisplayMath `fmap` mathDisplay
-  <|> Math InlineMath  `fmap` mathInline
+      B.displayMath <$> mathDisplay
+  <|> B.math        <$> mathInline
   <|> pStr
   <|> pSpace
   <|> smartPunctuation pTagContents
   <|> pSymbol
   <|> pBad
 
-pStr :: Parser [Char] ParserState Inline
+pStr :: Parser [Char] ParserState Inlines
 pStr = do
   result <- many1 $ satisfy $ \c ->
                      not (isSpace c) && not (isSpecial c) && not (isBad c)
   pos <- getPosition
   updateState $ \s -> s{ stateLastStrPos = Just pos }
-  return $ Str result
+  return $ B.str result
 
 isSpecial :: Char -> Bool
 isSpecial '"' = True
@@ -504,13 +520,13 @@ isSpecial '\8220' = True
 isSpecial '\8221' = True
 isSpecial _ = False
 
-pSymbol :: Parser [Char] ParserState Inline
-pSymbol = satisfy isSpecial >>= return . Str . (:[])
+pSymbol :: Parser [Char] ParserState Inlines
+pSymbol = satisfy isSpecial >>= return . B.str . (:[])
 
 isBad :: Char -> Bool
 isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
 
-pBad :: Parser [Char] ParserState Inline
+pBad :: Parser [Char] ParserState Inlines
 pBad = do
   c <- satisfy isBad
   let c' = case c of
@@ -542,10 +558,10 @@ pBad = do
                 '\158' -> '\382'
                 '\159' -> '\376'
                 _      -> '?'
-  return $ Str [c']
+  return $ B.str [c']
 
-pSpace :: Parser [Char] ParserState Inline
-pSpace = many1 (satisfy isSpace) >> return Space
+pSpace :: Parser [Char] ParserState Inlines
+pSpace = many1 (satisfy isSpace) >> return B.space
 
 --
 -- Constants
@@ -679,3 +695,4 @@ mkAttr attr = (attribsId, attribsClasses, attribsKV)
         attribsClasses = words $ fromMaybe "" $ lookup "class" attr
         attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
 
+
-- 
cgit v1.2.3


From 5a51a67abda59c177f3a6d0f6cba59d41e866287 Mon Sep 17 00:00:00 2001
From: Matthew Pickering <matthewtpickering@gmail.com>
Date: Thu, 27 Mar 2014 19:56:47 +0000
Subject: Changed the smart punctuation parser to return Inlines rather than an
 Inline element and updated files accordingly

---
 src/Text/Pandoc/Parsing.hs          | 43 ++++++++++++++++++-------------------
 src/Text/Pandoc/Readers/Markdown.hs |  2 +-
 src/Text/Pandoc/Readers/RST.hs      |  2 +-
 src/Text/Pandoc/Readers/Textile.hs  |  4 +---
 4 files changed, 24 insertions(+), 27 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 68d4605ee..a9009eaa2 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -997,17 +997,17 @@ registerHeader (ident,classes,kvs) header' = do
 failUnlessSmart :: HasReaderOptions st => Parser s st ()
 failUnlessSmart = getOption readerSmart >>= guard
 
-smartPunctuation :: Parser [Char] ParserState Inline
-                 -> Parser [Char] ParserState Inline
+smartPunctuation :: Parser [Char] ParserState Inlines
+                 -> Parser [Char] ParserState Inlines
 smartPunctuation inlineParser = do
   failUnlessSmart
   choice [ quoted inlineParser, apostrophe, dash, ellipses ]
 
-apostrophe :: Parser [Char] ParserState Inline
-apostrophe = (char '\'' <|> char '\8217') >> return (Str "\x2019")
+apostrophe :: Parser [Char] ParserState Inlines
+apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
 
-quoted :: Parser [Char] ParserState Inline
-       -> Parser [Char] ParserState Inline
+quoted :: Parser [Char] ParserState Inlines
+       -> Parser [Char] ParserState Inlines
 quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
 
 withQuoteContext :: QuoteContext
@@ -1022,20 +1022,19 @@ withQuoteContext context parser = do
   setState newState { stateQuoteContext = oldQuoteContext }
   return result
 
-singleQuoted :: Parser [Char] ParserState Inline
-             -> Parser [Char] ParserState Inline
+singleQuoted :: Parser [Char] ParserState Inlines
+             -> Parser [Char] ParserState Inlines
 singleQuoted inlineParser = try $ do
   singleQuoteStart
   withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>=
-    return . Quoted SingleQuote . normalizeSpaces
+    return . B.singleQuoted . mconcat
 
-doubleQuoted :: Parser [Char] ParserState Inline
-             -> Parser [Char] ParserState Inline
+doubleQuoted :: Parser [Char] ParserState Inlines
+             -> Parser [Char] ParserState Inlines
 doubleQuoted inlineParser = try $ do
   doubleQuoteStart
-  withQuoteContext InDoubleQuote $ do
-    contents <- manyTill inlineParser doubleQuoteEnd
-    return . Quoted DoubleQuote . normalizeSpaces $ contents
+  withQuoteContext InDoubleQuote $ manyTill inlineParser doubleQuoteEnd >>=
+    return . B.doubleQuoted . mconcat
 
 failIfInQuoteContext :: QuoteContext -> Parser [tok] ParserState ()
 failIfInQuoteContext context = do
@@ -1079,17 +1078,17 @@ doubleQuoteEnd = do
   charOrRef "\"\8221\148"
   return ()
 
-ellipses :: Parser [Char] st Inline
+ellipses :: Parser [Char] st Inlines
 ellipses = do
   try (charOrRef "\8230\133") <|> try (string "..." >> return '…')
-  return (Str "\8230")
+  return (B.str "\8230")
 
-dash :: Parser [Char] ParserState Inline
+dash :: Parser [Char] ParserState Inlines
 dash = do
   oldDashes <- getOption readerOldDashes
   if oldDashes
      then emDashOld <|> enDashOld
-     else Str `fmap` (hyphenDash <|> emDash <|> enDash)
+     else B.str `fmap` (hyphenDash <|> emDash <|> enDash)
 
 -- Two hyphens = en-dash, three = em-dash
 hyphenDash :: Parser [Char] st String
@@ -1107,16 +1106,16 @@ enDash = do
   try (charOrRef "\8212\151")
   return "\8211"
 
-enDashOld :: Parser [Char] st Inline
+enDashOld :: Parser [Char] st Inlines
 enDashOld = do
   try (charOrRef "\8211\150") <|>
     try (char '-' >> lookAhead (satisfy isDigit) >> return '–')
-  return (Str "\8211")
+  return (B.str "\8211")
 
-emDashOld :: Parser [Char] st Inline
+emDashOld :: Parser [Char] st Inlines
 emDashOld = do
   try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-')
-  return (Str "\8212")
+  return (B.str "\8212")
 
 -- This is used to prevent exponential blowups for things like:
 -- a**a*a**a*a**a*a**a*a**a*a**a*a**
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index aa0252266..57e1ca560 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1873,7 +1873,7 @@ smart :: MarkdownParser (F Inlines)
 smart = do
   getOption readerSmart >>= guard
   doubleQuoted <|> singleQuoted <|>
-    choice (map (return . B.singleton <$>) [apostrophe, dash, ellipses])
+    choice (map (return <$>) [apostrophe, dash, ellipses])
 
 singleQuoted :: MarkdownParser (F Inlines)
 singleQuoted = try $ do
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 127eae167..a574f343a 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -1140,7 +1140,7 @@ smart :: RSTParser Inlines
 smart = do
   getOption readerSmart >>= guard
   doubleQuoted <|> singleQuoted <|>
-    choice (map (B.singleton <$>) [apostrophe, dash, ellipses])
+    choice [apostrophe, dash, ellipses]
 
 singleQuoted :: RSTParser Inlines
 singleQuoted = try $ do
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index ede50c6de..c6f992275 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -306,9 +306,7 @@ rawLaTeXBlock' = do
 
 -- | In textile, paragraphs are separated by blank lines.
 para :: Parser [Char] ParserState Blocks
-para = do
-    a <- manyTill inline blockBreak 
-    return $ (B.para . trimInlines . mconcat) a
+para = B.para . trimInlines . mconcat <$> manyTill inline blockBreak
 
 -- Tables
 
-- 
cgit v1.2.3


From 1cadba16eb91a1f9dae03d87201fe1e8a7e0d1b6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Tue, 1 Apr 2014 10:10:46 -0700
Subject: HTML reader: idiomatic rewriting for clarity.

---
 src/Text/Pandoc/Readers/HTML.hs | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 4fab251bb..38619d040 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -442,13 +442,12 @@ pInlinesInTags :: String -> (Inlines -> Inlines)
 pInlinesInTags tagtype f = do
   contents <- B.unMany <$> pInTags tagtype inline
   let left  = case viewl contents of
-                    EmptyL -> mempty
-                    (a :< _) -> padSpace a
+                    (Space :< _) -> B.space
+                    _            -> mempty
   let right = case viewr contents of
-                    EmptyR   -> mempty
-                    (_ :> a) -> padSpace a
+                    (_ :> Space) -> B.space
+                    _            -> mempty
   return (left <> f (trimInlines . B.Many $ contents) <> right)
-  where padSpace a = if a == Space then B.space else mempty
 
 pInTags :: (Monoid a) => String -> TagParser a
         -> TagParser a
-- 
cgit v1.2.3


From 4ee92dce0ce624db2d02c60ae2856a70cfeb6c42 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Tue, 1 Apr 2014 10:36:23 -0700
Subject: MediaWiki reader: Fixed bug in certain nested lists.

The bug: If a level 2 list was followed by a level 1 list, the first
item of the level 1 list would be lost.

Closes #1213.
---
 src/Text/Pandoc/Readers/MediaWiki.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index f70b44aad..9bbabd44b 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -438,7 +438,8 @@ listItem c = try $ do
        skipMany spaceChar
        first <- concat <$> manyTill listChunk newline
        rest <- many
-                (try $ string extras *> (concat <$> manyTill listChunk newline))
+                (try $ string extras *> lookAhead listStartChar *>
+                       (concat <$> manyTill listChunk newline))
        contents <- parseFromString (many1 $ listItem' c)
                           (unlines (first : rest))
        case c of
-- 
cgit v1.2.3


From fa0f73aef9d9ff3ca89c1479e3644ac7e7c15991 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Fri, 4 Apr 2014 10:07:56 -0700
Subject: Custom writer:  read lua script as UTF-8.

This should fix #1189.
---
 src/Text/Pandoc/Writers/Custom.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
index 0234e1e35..0d58afb61 100644
--- a/src/Text/Pandoc/Writers/Custom.hs
+++ b/src/Text/Pandoc/Writers/Custom.hs
@@ -37,6 +37,7 @@ import Data.Char ( toLower )
 import Scripting.Lua (LuaState, StackValue, callfunc)
 import qualified Scripting.Lua as Lua
 import Text.Pandoc.UTF8 (fromString, toString)
+import qualified Text.Pandoc.UTF8 as UTF8
 import Data.ByteString (ByteString)
 import qualified Data.ByteString.Char8 as C8
 import Data.Monoid
@@ -131,7 +132,7 @@ instance StackValue MetaValue where
 -- | Convert Pandoc to custom markup.
 writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String
 writeCustom luaFile opts doc = do
-  luaScript <- readFile luaFile
+  luaScript <- UTF8.readFile luaFile
   lua <- Lua.newstate
   Lua.openlibs lua
   Lua.loadstring lua luaScript "custom"
-- 
cgit v1.2.3


From 036576c5a2660897e1c977407c621c8c436b7600 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 4 Apr 2014 15:43:47 -0700
Subject: Correctly handle UTF-8 in custom lua scripts. Closes #1189.

---
 src/Text/Pandoc/Writers/Custom.hs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
index 0d58afb61..0b30287f5 100644
--- a/src/Text/Pandoc/Writers/Custom.hs
+++ b/src/Text/Pandoc/Writers/Custom.hs
@@ -37,7 +37,6 @@ import Data.Char ( toLower )
 import Scripting.Lua (LuaState, StackValue, callfunc)
 import qualified Scripting.Lua as Lua
 import Text.Pandoc.UTF8 (fromString, toString)
-import qualified Text.Pandoc.UTF8 as UTF8
 import Data.ByteString (ByteString)
 import qualified Data.ByteString.Char8 as C8
 import Data.Monoid
@@ -132,7 +131,7 @@ instance StackValue MetaValue where
 -- | Convert Pandoc to custom markup.
 writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String
 writeCustom luaFile opts doc = do
-  luaScript <- UTF8.readFile luaFile
+  luaScript <- C8.unpack `fmap` C8.readFile luaFile
   lua <- Lua.newstate
   Lua.openlibs lua
   Lua.loadstring lua luaScript "custom"
-- 
cgit v1.2.3


From 98658b55fc922d9ad3d883bdc3bdaddd2318dff2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 4 Apr 2014 17:39:56 -0700
Subject: LaTeX writer: handle line breaks in simple table cells.

Closes #1217.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 07be6e9af..6cf7ed730 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -500,9 +500,28 @@ tableRowToLaTeX header aligns widths cols = do
   cells <- mapM (tableCellToLaTeX header) $ zip3 widths' aligns cols
   return $ hsep (intersperse "&" cells) $$ "\\\\\\addlinespace"
 
+-- For simple latex tables (without minipages or parboxes),
+-- we need to go to some lengths to get line breaks working:
+-- as LineBreak bs = \vtop{\hbox{\strut as}\hbox{\strut bs}}.
+fixLineBreaks :: Block -> Block
+fixLineBreaks (Para ils)  = Para $ fixLineBreaks' ils
+fixLineBreaks (Plain ils) = Plain $ fixLineBreaks' ils
+fixLineBreaks x           = x
+
+fixLineBreaks' :: [Inline] -> [Inline]
+fixLineBreaks' ils = case splitBy (== LineBreak) ils of
+                       []     -> []
+                       [xs]   -> xs
+                       chunks -> RawInline "tex" "\\vtop{" :
+                                 concatMap tohbox chunks ++
+                                 [RawInline "tex" "}"]
+  where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys ++
+                    [RawInline "tex" "}"]
+
 tableCellToLaTeX :: Bool -> (Double, Alignment, [Block])
                  -> State WriterState Doc
-tableCellToLaTeX _      (0,     _,     blocks) = blockListToLaTeX blocks
+tableCellToLaTeX _      (0,     _,     blocks) =
+  blockListToLaTeX $ walk fixLineBreaks blocks
 tableCellToLaTeX header (width, align, blocks) = do
   modify $ \st -> st{ stInMinipage = True, stNotes = [] }
   cellContents <- blockListToLaTeX blocks
-- 
cgit v1.2.3


From b1c865ccc6c9a66fbebe59575bd1c3a4a8081cd3 Mon Sep 17 00:00:00 2001
From: Matthew Pickering <matthewtpickering@gmail.com>
Date: Fri, 4 Apr 2014 15:18:30 +0100
Subject: Converted current meta information parsing in DocBook to a more
 extensible version which is aware of the more recent meta representation.

---
 src/Text/Pandoc/Readers/DocBook.hs | 82 ++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 34 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 56cb16b20..2c7ec61d3 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -499,24 +499,19 @@ type DB = State DBState
 
 data DBState = DBState{ dbSectionLevel :: Int
                       , dbQuoteType    :: QuoteType
-                      , dbDocTitle     :: Inlines
-                      , dbDocAuthors   :: [Inlines]
-                      , dbDocDate      :: Inlines
+                      , dbMeta         :: Meta
+                      , dbAcceptsMeta  :: Bool
                       , dbBook         :: Bool
                       , dbFigureTitle  :: Inlines
                       } deriving Show
 
 readDocBook :: ReaderOptions -> String -> Pandoc
-readDocBook _ inp  = setTitle (dbDocTitle st')
-                   $ setAuthors (dbDocAuthors st')
-                   $ setDate (dbDocDate st')
-                   $ doc $ mconcat bs
+readDocBook _ inp  = Pandoc (dbMeta st') (toList $ mconcat bs)
   where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp)
                              DBState{ dbSectionLevel = 0
                                     , dbQuoteType = DoubleQuote
-                                    , dbDocTitle = mempty
-                                    , dbDocAuthors = []
-                                    , dbDocDate = mempty
+                                    , dbMeta = mempty
+                                    , dbAcceptsMeta = False
                                     , dbBook = False
                                     , dbFigureTitle = mempty
                                     }
@@ -560,6 +555,29 @@ attrValue attr elt =
 named :: String -> Element -> Bool
 named s e = qName (elName e) == s
 
+-- 
+
+acceptingMetadata :: DB a -> DB a
+acceptingMetadata p = do
+  modify (\s -> s { dbAcceptsMeta = True } )
+  res <- p
+  modify (\s -> s { dbAcceptsMeta = False })
+  return res 
+
+checkInMeta :: Monoid a => DB a -> DB a 
+checkInMeta p = do
+  accepts <- dbAcceptsMeta <$> get 
+  if accepts then p else return mempty
+  
+  
+   
+addMeta :: ToMetaValue a => String -> a -> DB ()
+addMeta field val = modify (setMeta field val)
+
+instance HasMeta DBState where
+  setMeta str v s =  s {dbMeta = setMeta str v (dbMeta s)}
+
+
 isBlockElement :: Content -> Bool
 isBlockElement (Elem e) = qName (elName e) `elem` blocktags
   where blocktags = ["toc","index","para","formalpara","simpara",
@@ -606,6 +624,7 @@ getImage e = do
 getBlocks :: Element -> DB Blocks
 getBlocks e =  mconcat <$> (mapM parseBlock $ elContent e)
 
+
 parseBlock :: Content -> DB Blocks
 parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
 parseBlock (Text (CData _ s _)) = if all isSpace s
@@ -630,7 +649,9 @@ parseBlock (Elem e) =
         "attribution" -> return mempty
         "titleabbrev" -> return mempty
         "authorinitials" -> return mempty
-        "title" -> return mempty -- handled by getTitle or sect or figure
+        "title" ->  checkInMeta getTitle >> return mempty -- handled by getTitle or sect or figure
+        "author" -> checkInMeta getAuthor >> return mempty
+        "date" -> checkInMeta getDate >> return mempty
         "bibliography" -> sect 0
         "bibliodiv" -> sect 1
         "biblioentry" -> parseMixed para (elContent e)
@@ -693,8 +714,8 @@ parseBlock (Elem e) =
         "figure" -> getFigure e
         "mediaobject" -> para <$> getImage e
         "caption" -> return mempty
-        "info" -> getTitle >> getAuthors >> getDate >> return mempty
-        "articleinfo" -> getTitle >> getAuthors >> getDate >> return mempty
+        "info" -> metaBlock 
+        "articleinfo" -> metaBlock
         "sectioninfo" -> return mempty  -- keywords & other metadata
         "refsectioninfo" -> return mempty  -- keywords & other metadata
         "refsect1info" -> return mempty  -- keywords & other metadata
@@ -708,10 +729,10 @@ parseBlock (Elem e) =
         "chapterinfo" -> return mempty  -- keywords & other metadata
         "glossaryinfo" -> return mempty  -- keywords & other metadata
         "appendixinfo" -> return mempty  -- keywords & other metadata
-        "bookinfo" -> getTitle >> getAuthors >> getDate >> return mempty
+        "bookinfo" -> metaBlock
         "article" -> modify (\st -> st{ dbBook = False }) >>
-                          getTitle >> getBlocks e
-        "book" -> modify (\st -> st{ dbBook = True }) >> getTitle >> getBlocks e
+                           getBlocks e
+        "book" -> modify (\st -> st{ dbBook = True }) >>  getBlocks e
         "table" -> parseTable
         "informaltable" -> parseTable
         "literallayout" -> codeBlockWithLang
@@ -757,24 +778,16 @@ parseBlock (Elem e) =
                      terms' <- mapM getInlines terms
                      items' <- mapM getBlocks items
                      return (mconcat $ intersperse (str "; ") terms', items')
-         getTitle = case filterChild (named "title") e of
-                         Just t  -> do
-                            tit <- getInlines t
-                            subtit <-  case filterChild (named "subtitle") e of
-                                            Just s  -> (text ": " <>) <$>
-                                                         getInlines s
-                                            Nothing -> return mempty
-                            modify $ \st -> st{dbDocTitle = tit <> subtit}
-                         Nothing -> return ()
-         getAuthors = do
-                      auths <- mapM getInlines
-                               $ filterChildren (named "author") e
-                      modify $ \st -> st{dbDocAuthors = auths}
-         getDate = case filterChild (named "date") e of
-                         Just t  -> do
-                            dat <- getInlines t
-                            modify $ \st -> st{dbDocDate = dat}
-                         Nothing -> return ()
+         getTitle =  do
+                     tit <- getInlines e
+                     subtit <-  case filterChild (named "subtitle") e of
+                                  Just s  -> (text ": " <>) <$>
+                                              getInlines s
+                                  Nothing -> return mempty
+                     addMeta "title" (tit <> subtit)
+                     
+         getAuthor = getInlines e >>= addMeta "author"
+         getDate = getInlines e >>= addMeta "date" 
          parseTable = do
                       let isCaption x = named "title" x || named "caption" x
                       caption <- case filterChild isCaption e of
@@ -836,6 +849,7 @@ parseBlock (Elem e) =
                      b <- getBlocks e
                      modify $ \st -> st{ dbSectionLevel = n - 1 }
                      return $ header n' headerText <> b
+         metaBlock = acceptingMetadata (getBlocks e)
 
 getInlines :: Element -> DB Inlines
 getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e')
-- 
cgit v1.2.3


From 1b930a9670403f9c6ceadbc067fb3ac7400bd042 Mon Sep 17 00:00:00 2001
From: Matthew Pickering <matthewtpickering@gmail.com>
Date: Fri, 4 Apr 2014 16:21:24 +0100
Subject: Added recognition of authorgroup element and releaseinfo element to
 DocBook reader.

Closes #1214
---
 src/Text/Pandoc/Readers/DocBook.hs | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 2c7ec61d3..76bc7374f 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -45,7 +45,7 @@ List of all DocBook tags, with [x] indicating implemented,
 [ ] audioobject - A wrapper for audio data and its associated meta-information
 [x] author - The name of an individual author
 [ ] authorblurb - A short description or note about an author
-[ ] authorgroup - Wrapper for author information when a document has
+[x] authorgroup - Wrapper for author information when a document has
     multiple authors or collabarators
 [x] authorinitials - The initials or other short identifier for an author
 [o] beginpage - The location of a page break in a print version of the document
@@ -341,7 +341,7 @@ List of all DocBook tags, with [x] indicating implemented,
 [x] refsectioninfo - Meta-information for a refsection
 [ ] refsynopsisdiv - A syntactic synopsis of the subject of the reference page
 [ ] refsynopsisdivinfo - Meta-information for a RefSynopsisDiv
-[ ] releaseinfo - Information about a particular release of a document
+[x] releaseinfo - Information about a particular release of a document
 [ ] remark - A remark (or comment) intended for presentation in a draft
     manuscript
 [ ] replaceable - Content that may or must be replaced by the user
@@ -564,10 +564,11 @@ acceptingMetadata p = do
   modify (\s -> s { dbAcceptsMeta = False })
   return res 
 
-checkInMeta :: Monoid a => DB a -> DB a 
+checkInMeta :: Monoid a => DB () -> DB a 
 checkInMeta p = do
   accepts <- dbAcceptsMeta <$> get 
-  if accepts then p else return mempty
+  when accepts p 
+  return mempty
   
   
@@ -649,9 +650,11 @@ parseBlock (Elem e) =
         "attribution" -> return mempty
         "titleabbrev" -> return mempty
         "authorinitials" -> return mempty
-        "title" ->  checkInMeta getTitle >> return mempty -- handled by getTitle or sect or figure
-        "author" -> checkInMeta getAuthor >> return mempty
-        "date" -> checkInMeta getDate >> return mempty
+        "title" ->  checkInMeta getTitle 
+        "author" -> checkInMeta getAuthor 
+        "authorgroup" -> checkInMeta getAuthorGroup 
+        "releaseinfo" -> checkInMeta (getInlines e >>= addMeta "release")
+        "date" -> checkInMeta getDate
         "bibliography" -> sect 0
         "bibliodiv" -> sect 1
         "biblioentry" -> parseMixed para (elContent e)
@@ -786,7 +789,10 @@ parseBlock (Elem e) =
                                   Nothing -> return mempty
                      addMeta "title" (tit <> subtit)
                      
-         getAuthor = getInlines e >>= addMeta "author"
+         getAuthor = (:[]) <$> getInlines e >>= addMeta "authors"
+         getAuthorGroup = do
+          let terms = filterChildren (named "author") e
+          mapM getInlines terms >>= addMeta "authors" 
          getDate = getInlines e >>= addMeta "date" 
          parseTable = do
                       let isCaption x = named "title" x || named "caption" x
@@ -849,7 +855,7 @@ parseBlock (Elem e) =
                      b <- getBlocks e
                      modify $ \st -> st{ dbSectionLevel = n - 1 }
                      return $ header n' headerText <> b
-         metaBlock = acceptingMetadata (getBlocks e)
+         metaBlock = acceptingMetadata (getBlocks e) >> return mempty
 
 getInlines :: Element -> DB Inlines
 getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e')
@@ -913,6 +919,7 @@ parseInline (Elem e) =
                              _        -> emph <$> innerInlines
         "footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e)
         "title" -> return mempty
+        "affiliation" -> return mempty
         _          -> innerInlines
    where innerInlines = (trimInlines . mconcat) <$>
                           (mapM parseInline $ elContent e)
-- 
cgit v1.2.3


From c99ecf6cc566a90eef1816eb5a1a1167b81ad9ac Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Fri, 4 Apr 2014 21:33:16 -0700
Subject: DocBook reader:  set "author" not "authors".

---
 src/Text/Pandoc/Readers/DocBook.hs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 76bc7374f..279f1e7f8 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -576,8 +576,8 @@ addMeta :: ToMetaValue a => String -> a -> DB ()
 addMeta field val = modify (setMeta field val)
 
 instance HasMeta DBState where
-  setMeta str v s =  s {dbMeta = setMeta str v (dbMeta s)}
-
+  setMeta field v s =  s {dbMeta = setMeta field v (dbMeta s)}
+  deleteMeta field s = s {dbMeta = deleteMeta field (dbMeta s)}
 
 isBlockElement :: Content -> Bool
 isBlockElement (Elem e) = qName (elName e) `elem` blocktags
@@ -789,7 +789,7 @@ parseBlock (Elem e) =
                                   Nothing -> return mempty
                      addMeta "title" (tit <> subtit)
                      
-         getAuthor = (:[]) <$> getInlines e >>= addMeta "authors"
+         getAuthor = (:[]) <$> getInlines e >>= addMeta "author"
          getAuthorGroup = do
           let terms = filterChildren (named "author") e
           mapM getInlines terms >>= addMeta "authors" 
-- 
cgit v1.2.3


From c5a0c7a8199698c3973e50db9e91ae2694e018d4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Fri, 4 Apr 2014 21:34:12 -0700
Subject: Removed trailing whitespace.

---
 src/Text/Pandoc/Readers/DocBook.hs | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 279f1e7f8..28af646c1 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -555,23 +555,23 @@ attrValue attr elt =
 named :: String -> Element -> Bool
 named s e = qName (elName e) == s
 
--- 
+--
 
 acceptingMetadata :: DB a -> DB a
 acceptingMetadata p = do
   modify (\s -> s { dbAcceptsMeta = True } )
   res <- p
   modify (\s -> s { dbAcceptsMeta = False })
-  return res 
+  return res
 
-checkInMeta :: Monoid a => DB () -> DB a 
+checkInMeta :: Monoid a => DB () -> DB a
 checkInMeta p = do
-  accepts <- dbAcceptsMeta <$> get 
-  when accepts p 
+  accepts <- dbAcceptsMeta <$> get
+  when accepts p
   return mempty
-  
-  
-   
+
+
+
 addMeta :: ToMetaValue a => String -> a -> DB ()
 addMeta field val = modify (setMeta field val)
 
@@ -650,9 +650,9 @@ parseBlock (Elem e) =
         "attribution" -> return mempty
         "titleabbrev" -> return mempty
         "authorinitials" -> return mempty
-        "title" ->  checkInMeta getTitle 
-        "author" -> checkInMeta getAuthor 
-        "authorgroup" -> checkInMeta getAuthorGroup 
+        "title" ->  checkInMeta getTitle
+        "author" -> checkInMeta getAuthor
+        "authorgroup" -> checkInMeta getAuthorGroup
         "releaseinfo" -> checkInMeta (getInlines e >>= addMeta "release")
         "date" -> checkInMeta getDate
         "bibliography" -> sect 0
@@ -717,7 +717,7 @@ parseBlock (Elem e) =
         "figure" -> getFigure e
         "mediaobject" -> para <$> getImage e
         "caption" -> return mempty
-        "info" -> metaBlock 
+        "info" -> metaBlock
         "articleinfo" -> metaBlock
         "sectioninfo" -> return mempty  -- keywords & other metadata
         "refsectioninfo" -> return mempty  -- keywords & other metadata
@@ -788,12 +788,12 @@ parseBlock (Elem e) =
                                               getInlines s
                                   Nothing -> return mempty
                      addMeta "title" (tit <> subtit)
-                     
+
          getAuthor = (:[]) <$> getInlines e >>= addMeta "author"
          getAuthorGroup = do
           let terms = filterChildren (named "author") e
-          mapM getInlines terms >>= addMeta "authors" 
-         getDate = getInlines e >>= addMeta "date" 
+          mapM getInlines terms >>= addMeta "authors"
+         getDate = getInlines e >>= addMeta "date"
          parseTable = do
                       let isCaption x = named "title" x || named "caption" x
                       caption <- case filterChild isCaption e of
-- 
cgit v1.2.3


From e06aa97bb93cab5ddeadca5b65e45649663e527c Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Fri, 4 Apr 2014 21:39:08 -0700
Subject: DocBook reader:  set metadata "author" not "authors"

---
 src/Text/Pandoc/Readers/DocBook.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 28af646c1..4fb38a07d 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -792,7 +792,7 @@ parseBlock (Elem e) =
          getAuthor = (:[]) <$> getInlines e >>= addMeta "author"
          getAuthorGroup = do
           let terms = filterChildren (named "author") e
-          mapM getInlines terms >>= addMeta "authors"
+          mapM getInlines terms >>= addMeta "author"
          getDate = getInlines e >>= addMeta "date"
          parseTable = do
                       let isCaption x = named "title" x || named "caption" x
-- 
cgit v1.2.3


From ee2e769cd7ee075876c974f94817e6eee294bedd Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Fri, 4 Apr 2014 22:01:12 -0700
Subject: DocBook reader:  Better treatment of formalpara.

We now emit the title (if present) as a separate paragraph
with boldface text.

Closes #1215.
---
 src/Text/Pandoc/Readers/DocBook.hs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 4fb38a07d..d58f8b3c5 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -639,10 +639,10 @@ parseBlock (Elem e) =
         "para"  -> parseMixed para (elContent e)
         "formalpara" -> do
            tit <- case filterChild (named "title") e of
-                        Just t  -> (<> str "." <> linebreak) <$> emph
-                                      <$> getInlines t
+                        Just t  -> (para . strong . (<> str ".")) <$>
+                                     getInlines t
                         Nothing -> return mempty
-           addToStart tit <$> parseMixed para (elContent e)
+           (tit <>) <$> parseMixed para (elContent e)
         "simpara"  -> parseMixed para (elContent e)
         "ackno"  -> parseMixed para (elContent e)
         "epigraph" -> parseBlockquote
-- 
cgit v1.2.3


From 7cf7e45e4cbb99b320a92b4bd31e433f535d3ef7 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 4 Apr 2014 14:17:43 +0200
Subject: Org reader: Slight cleaning of table parsing code

---
 src/Text/Pandoc/Readers/Org.hs | 68 ++++++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 33 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 5dc250f04..8b155194b 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -217,13 +217,18 @@ data OrgTableRow = OrgContentRow [Blocks]
                  | OrgHlineRow
  deriving (Eq, Show)
 
-type OrgTableContent = (Int, [Alignment], [Double], [Blocks], [[Blocks]])
+data OrgTable = OrgTable
+  { orgTableColumns    :: Int
+  , orgTableAlignments :: [Alignment]
+  , orgTableHeader     :: [Blocks]
+  , orgTableRows       :: [[Blocks]]
+  } deriving (Eq, Show)
 
 table :: OrgParser Blocks
 table = try $ do
   lookAhead tableStart
-  (_, aligns, widths, heads, lns) <- normalizeTable . tableContent <$> tableRows
-  return $ B.table "" (zip aligns widths) heads lns
+  OrgTable _ aligns heads lns <- normalizeTable . rowsToTable <$> tableRows
+  return $ B.table "" (zip aligns $ repeat 0) heads lns
 
 tableStart :: OrgParser Char
 tableStart = try $ skipSpaces *> char '|'
@@ -237,10 +242,9 @@ tableContentRow = try $
 
 tableContentCell :: OrgParser Blocks
 tableContentCell = try $
-  B.plain . trimInlines . mconcat <$> many1Till inline (try endOfCell)
+  B.plain . trimInlines . mconcat <$> many1Till inline endOfCell
 
 endOfCell :: OrgParser Char
--- endOfCell =  char '|' <|> newline
 endOfCell = try $ char '|' <|> lookAhead newline
 
 tableAlignRow :: OrgParser OrgTableRow
@@ -269,54 +273,53 @@ tableHline :: OrgParser OrgTableRow
 tableHline = try $
   OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
 
-tableContent :: [OrgTableRow]
-             -> OrgTableContent
-tableContent = foldl' (flip rowToContent) (0, mempty, repeat 0, mempty, mempty)
+rowsToTable :: [OrgTableRow]
+            -> OrgTable
+rowsToTable = foldl' (flip rowToContent) zeroTable
+  where zeroTable = OrgTable 0 mempty mempty mempty
 
-normalizeTable :: OrgTableContent
-               -> OrgTableContent
-normalizeTable (cols, aligns, widths, heads, lns) =
+normalizeTable :: OrgTable
+               -> OrgTable
+normalizeTable (OrgTable cols aligns heads lns) =
   let aligns' = fillColumns aligns AlignDefault
-      widths' = fillColumns widths 0.0
       heads'  = if heads == mempty
-                then heads
+                then mempty
                 else fillColumns heads  (B.plain mempty)
       lns'    = map (flip fillColumns (B.plain mempty)) lns
       fillColumns base padding = take cols $ base ++ repeat padding
-  in (cols, aligns', widths', heads', lns')
+  in OrgTable cols aligns' heads' lns'
 
 
 -- One or more horizontal rules after the first content line mark the previous
 -- line as a header.  All other horizontal lines are discarded.
 rowToContent :: OrgTableRow
-             -> OrgTableContent
-             -> OrgTableContent
+             -> OrgTable
+             -> OrgTable
 rowToContent OrgHlineRow        = maybeBodyToHeader
 rowToContent (OrgContentRow rs) = setLongestRow rs . appendToBody rs
 rowToContent (OrgAlignRow as)   = setLongestRow as . setAligns as
 
 setLongestRow :: [a]
-              -> OrgTableContent
-              -> OrgTableContent
-setLongestRow r (cols, aligns, widths, heads, lns) =
-  (max cols (length r), aligns, widths, heads, lns)
+              -> OrgTable
+              -> OrgTable
+setLongestRow rs t = t{ orgTableColumns = max (length rs) (orgTableColumns t) }
 
-maybeBodyToHeader :: OrgTableContent
-                  -> OrgTableContent
-maybeBodyToHeader (cols, aligns, widths, [], b:[]) = (cols, aligns, widths, b, [])
-maybeBodyToHeader content                          = content
+maybeBodyToHeader :: OrgTable
+                  -> OrgTable
+maybeBodyToHeader t = case t of
+  OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
+         t{ orgTableHeader = b , orgTableRows = [] }
+  _   -> t
 
 appendToBody :: [Blocks]
-             -> OrgTableContent
-             -> OrgTableContent
-appendToBody r (cols, aligns, widths, heads, lns) =
-  (cols, aligns, widths, heads, lns ++ [r])
+             -> OrgTable
+             -> OrgTable
+appendToBody r t = t{ orgTableRows = orgTableRows t ++ [r] }
 
 setAligns :: [Alignment]
-          -> OrgTableContent
-          -> OrgTableContent
-setAligns aligns (cols, _, widths, heads, lns) =
-   (cols, aligns, widths, heads, lns)
+          -> OrgTable
+          -> OrgTable
+setAligns aligns t = t{ orgTableAlignments = aligns }
 
 -- Paragraphs or Plain text
 paraOrPlain :: OrgParser Blocks
@@ -549,4 +552,3 @@ endsOnThisLine input c doOnOtherLines = do
                         then return ()
                         else endsOnThisLine rest c doOnOtherLines
     _                -> mzero
-
-- 
cgit v1.2.3


From d43c3e81017734170fb25460c4b9ab9cccb1e0db Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 4 Apr 2014 17:20:36 +0200
Subject: Org reader: Use specialized org parser state

The default pandoc ParserState is replaced with `OrgParserState`.  This
is done to simplify the introduction of new state fields required for
efficient Org parsing.
---
 src/Text/Pandoc/Readers/Org.hs | 48 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 41 insertions(+), 7 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 8b155194b..0ae4d231c 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -29,15 +29,16 @@ Conversion of Org-Mode to 'Pandoc' document.
 module Text.Pandoc.Readers.Org ( readOrg ) where
 
 import qualified Text.Pandoc.Builder as B
-import           Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
+import           Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>), HasMeta(..))
 import           Text.Pandoc.Definition
 import           Text.Pandoc.Options
-import           Text.Pandoc.Parsing hiding (orderedListMarker)
+import           Text.Pandoc.Parsing hiding (orderedListMarker, updateLastStrPos)
 import           Text.Pandoc.Shared (compactify')
 
 import           Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>))
 import           Control.Monad (guard, mzero)
 import           Data.Char (toLower)
+import           Data.Default
 import           Data.List (foldl')
 import           Data.Maybe (listToMaybe, fromMaybe)
 import           Data.Monoid (mconcat, mempty, mappend)
@@ -46,15 +47,48 @@ import           Data.Monoid (mconcat, mempty, mappend)
 readOrg :: ReaderOptions -- ^ Reader options
         -> String        -- ^ String to parse (assuming @'\n'@ line endings)
         -> Pandoc
-readOrg opts s = (readWith parseOrg) def{ stateOptions = opts } (s ++ "\n\n")
+readOrg opts s = (readWith parseOrg) def{ orgOptions = opts } (s ++ "\n\n")
+
+type OrgParser = Parser [Char] OrgParserState
+
+-- | Org-mode parser state
+data OrgParserState = OrgParserState
+                      { orgOptions          :: ReaderOptions
+                      , orgInlineCharStack  :: [Char]
+                      , orgLastStrPos       :: Maybe SourcePos
+                      , orgMeta             :: Meta
+                      } deriving (Show)
+
+instance HasReaderOptions OrgParserState where
+  extractReaderOptions = orgOptions
+
+instance HasMeta OrgParserState where
+  setMeta field val st =
+    st{ orgMeta = setMeta field val $ orgMeta st }
+  deleteMeta field st =
+    st{ orgMeta = deleteMeta field $ orgMeta st }
+
+instance Default OrgParserState where
+  def = defaultOrgParserState
+
+defaultOrgParserState :: OrgParserState
+defaultOrgParserState = OrgParserState
+                        { orgOptions = def
+                        , orgInlineCharStack = []
+                        , orgLastStrPos = Nothing
+                        , orgMeta = nullMeta
+                        }
+
+updateLastStrPos :: OrgParser ()
+updateLastStrPos = getPosition >>= \p ->
+  updateState $ \s -> s{ orgLastStrPos = Just p }
 
-type OrgParser = Parser [Char] ParserState
 
 parseOrg:: OrgParser Pandoc
 parseOrg = do
   blocks' <- B.toList <$> parseBlocks
   st <- getState
-  let meta = stateMeta st
+  let meta = orgMeta st
   return $ Pandoc meta $ filter (/= Null) blocks'
 
 --
@@ -177,7 +211,7 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
 declarationLine :: OrgParser Blocks
 declarationLine = try $ do
   meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta
-  updateState $ \st -> st { stateMeta = stateMeta st <> meta' }
+  updateState $ \st -> st { orgMeta  = orgMeta st <> meta' }
   return mempty
 
 metaValue :: OrgParser MetaValue
@@ -522,7 +556,7 @@ atStart :: OrgParser a -> OrgParser a
 atStart p = do
   pos <- getPosition
   st <- getState
-  guard $ stateLastStrPos st /= Just pos
+  guard $ orgLastStrPos st /= Just pos
   p
 
 -- | succeeds only if we're at the end of a word
-- 
cgit v1.2.3


From fd98532784e43ad73072f37a31af5ff40fdc1c56 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 5 Apr 2014 09:37:46 +0200
Subject: Org reader: Fix parsing of nested inlines

Text such as /*this*/ was not correctly parsed as a strong, emphasised
word.  This was due to the end-of-word recognition being to strict as it
did not accept markup chars as part of a word.  The fix involves an
additional parser state field, listing the markup chars which might be
parsed as part of a word.
---
 src/Text/Pandoc/Readers/Org.hs | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 0ae4d231c..ad66caab9 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -535,8 +535,15 @@ enclosedInlines start end = try $
 -- FIXME: This is a hack
 inlinesEnclosedBy :: Char
                   -> OrgParser Inlines
-inlinesEnclosedBy c = enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c)
-                                      (atEnd $ char c)
+inlinesEnclosedBy c = try $ do
+  updateState $ \st -> st { orgInlineCharStack = c:(orgInlineCharStack st) }
+  res <- enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c)
+                         (atEnd $ char c)
+  updateState $ \st -> st { orgInlineCharStack = shift . orgInlineCharStack $ st }
+  return res
+ where shift xs
+           | null xs   = []
+           | otherwise = tail xs
 
 enclosedRaw :: OrgParser a
             -> OrgParser b
@@ -561,11 +568,16 @@ atStart p = do
 
 -- | succeeds only if we're at the end of a word
 atEnd :: OrgParser a -> OrgParser a
-atEnd p = try $ p <* lookingAtEndOfWord
- where lookingAtEndOfWord = lookAhead . oneOf $ postWordChars
+atEnd p = try $ do
+  p <* lookingAtEndOfWord
+ where lookingAtEndOfWord = lookAhead . oneOf =<< postWordChars
 
-postWordChars :: [Char]
-postWordChars = "\t\n\r !\"'),-.:?}"
+postWordChars :: OrgParser [Char]
+postWordChars = do
+  st <- getState
+  return $ "\t\n\r !\"'),-.:?}" ++ (safeSecond . orgInlineCharStack $ st)
+ where safeSecond (_:x2:_) = [x2]
+       safeSecond _        = []
 
 -- FIXME: These functions are hacks and should be replaced
 endsOnThisOrNextLine :: Char
@@ -580,9 +592,10 @@ endsOnThisLine :: [Char]
                -> ([Char] -> OrgParser ())
                -> OrgParser ()
 endsOnThisLine input c doOnOtherLines = do
+  postWordChars' <- postWordChars
   case break (`elem` c:"\n") input of
     (_,'\n':rest)    -> doOnOtherLines rest
-    (_,_:rest@(n:_)) -> if n `elem` postWordChars
+    (_,_:rest@(n:_)) -> if n `elem` postWordChars'
                         then return ()
                         else endsOnThisLine rest c doOnOtherLines
     _                -> mzero
-- 
cgit v1.2.3


From d76d2b707b2b5cebb38122e117527a70996c2c4f Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 5 Apr 2014 09:09:44 +0200
Subject: Org reader: Provide more language identifier translations

Org-mode and Pandoc use different language identifiers, marking source
code as being written in a certain programming language.  This adds more
translations from identifiers as used in Org to identifiers used in
Pandoc.

The full list of identifiers used in Org and Pandoc is available through
http://orgmode.org/manual/Languages.html and `pandoc -v`, respectively.
---
 src/Text/Pandoc/Readers/Org.hs | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index ad66caab9..62088a04d 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -153,7 +153,14 @@ indentWith num = do
                  , try (char '\t' >> count (num - tabStop) (char ' ')) ]
 
 translateLang :: String -> String
-translateLang "sh" = "bash"
+translateLang "C"          = "c"
+translateLang "C++"        = "cpp"
+translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported
+translateLang "js"         = "javascript"
+translateLang "lisp"       = "commonlisp"
+translateLang "R"          = "r"
+translateLang "sh"         = "bash"
+translateLang "sqlite"     = "sql"
 translateLang cs = cs
 
 commaEscaped :: String -> String
-- 
cgit v1.2.3


From 652c781e375f3678a0ec821663240d4958f324de Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 5 Apr 2014 16:10:52 +0200
Subject: Org reader: Support inline images

---
 src/Text/Pandoc/Readers/Org.hs | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 62088a04d..8b1b4fa23 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -39,7 +39,7 @@ import           Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**
 import           Control.Monad (guard, mzero)
 import           Data.Char (toLower)
 import           Data.Default
-import           Data.List (foldl')
+import           Data.List (foldl', isPrefixOf, isSuffixOf)
 import           Data.Maybe (listToMaybe, fromMaybe)
 import           Data.Monoid (mconcat, mempty, mappend)
 
@@ -484,20 +484,26 @@ endline = try $ do
   return B.space
 
 link :: OrgParser Inlines
-link = explicitLink <|> selfLink <?> "link"
+link = explicitOrImageLink <|> selflinkOrImage <?> "link"
 
-explicitLink :: OrgParser Inlines
-explicitLink = try $ do
+explicitOrImageLink :: OrgParser Inlines
+explicitOrImageLink = try $ do
   char '['
-  src   <- enclosedRaw     (char '[') (char ']')
-  title <- enclosedInlines (char '[') (char ']')
+  src    <- enclosedRaw (char '[') (char ']')
+  title  <- enclosedRaw (char '[') (char ']')
+  title' <- parseFromString (mconcat . butLast <$> many inline) (title++"\n")
   char ']'
-  return $ B.link src "" title
+  return $ if (isImage src) && (isImage title)
+           then B.link src "" (B.image title "" "")
+           else B.link src "" title'
+ where butLast = reverse . tail . reverse
 
-selfLink :: OrgParser Inlines
-selfLink = try $ do
+selflinkOrImage :: OrgParser Inlines
+selflinkOrImage = try $ do
   src <- enclosedRaw (string "[[") (string "]]")
-  return $ B.link src "" (B.str src)
+  return $ if isImage src
+           then B.image src "" ""
+           else B.link src "" (B.str src)
 
 emph      :: OrgParser Inlines
 emph      = B.emph         <$> inlinesEnclosedBy '/'
@@ -606,3 +612,11 @@ endsOnThisLine input c doOnOtherLines = do
                         then return ()
                         else endsOnThisLine rest c doOnOtherLines
     _                -> mzero
+
+isImage filename =
+  any (\x -> ('.':x)  `isSuffixOf` filename) imageExtensions &&
+  any (\x -> (x++":") `isPrefixOf` filename) protocols ||
+  ':' `notElem` filename
+ where
+   imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
+   protocols = [ "file", "http", "https" ]
-- 
cgit v1.2.3


From c0309a60bc48e347e4b9d621ee38b84a98d0c187 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 5 Apr 2014 10:58:32 -0700
Subject: Shared.openURL:  Set proxy with value of http_proxy env variable.

Note:  proxies with non-root paths are not supported,
because of limitations in http-conduit.

Closes #1211.
---
 src/Text/Pandoc/Shared.hs | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 714402e42..3835629db 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -1,5 +1,5 @@
 {-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
-    FlexibleContexts #-}
+    FlexibleContexts, ScopedTypeVariables #-}
 {-
 Copyright (C) 2006-2013 John MacFarlane <jgm@berkeley.edu>
 
@@ -120,7 +120,9 @@ import Paths_pandoc (getDataFileName)
 #ifdef HTTP_CONDUIT
 import Data.ByteString.Lazy (toChunks)
 import Network.HTTP.Conduit (httpLbs, parseUrl, withManager,
-                             responseBody, responseHeaders)
+                             responseBody, responseHeaders, addProxy,
+                             Request(port,host))
+import System.Environment (getEnv)
 import Network.HTTP.Types.Header ( hContentType)
 import Network (withSocketsDo)
 #else
@@ -648,7 +650,13 @@ openURL u
 #ifdef HTTP_CONDUIT
   | otherwise = withSocketsDo $ E.try $ do
      req <- parseUrl u
-     resp <- withManager $ httpLbs req
+     (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy"
+     let req' = case proxy of
+                     Left _   -> req
+                     Right pr -> case parseUrl pr of
+                                      Just r  -> addProxy (host r) (port r) req
+                                      Nothing -> req
+     resp <- withManager $ httpLbs req'
      return (BS.concat $ toChunks $ responseBody resp,
              UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
 #else
-- 
cgit v1.2.3


From f2deb9d86d864c79c5ca18205b4ca565b7199413 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 5 Apr 2014 15:50:46 -0700
Subject: Org reader:  Added type signature.

---
 src/Text/Pandoc/Readers/Org.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 8b1b4fa23..5ad2531ac 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -613,6 +613,7 @@ endsOnThisLine input c doOnOtherLines = do
                         else endsOnThisLine rest c doOnOtherLines
     _                -> mzero
 
+isImage :: String -> Bool
 isImage filename =
   any (\x -> ('.':x)  `isSuffixOf` filename) imageExtensions &&
   any (\x -> (x++":") `isPrefixOf` filename) protocols ||
-- 
cgit v1.2.3


From d4054444c00449f49a8948eae91ad7bfdb36ea8e Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 5 Apr 2014 19:57:42 -0700
Subject: Text.Pandoc.PDF:  Ensure that temp directories deleted on Windows.

The PDF is now read as a strict bytestring, ensuring that process
ownership will be terminated, so the temp directory can be deleted.
Closes #1192.
---
 src/Text/Pandoc/PDF.hs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index 608cad2e9..abc5c41b7 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -176,7 +176,10 @@ runTeXProgram program runsLeft tmpDir source = do
          let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir
          pdfExists <- doesFileExist pdfFile
          pdf <- if pdfExists
-                   then Just `fmap` B.readFile pdfFile
+                   -- We read PDF as a strict bytestring to make sure that the
+                   -- temp directory is removed on Windows.
+                   -- See https://github.com/jgm/pandoc/issues/1192.
+                   then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile
                    else return Nothing
          return (exit, out <> err, pdf)
 
-- 
cgit v1.2.3


From 75dbe87a9969d3fb684409f26bb14b593ea68ca7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 5 Apr 2014 20:31:27 -0700
Subject: Removed whitespace at ends of lines.

---
 src/Text/Pandoc/Readers/Textile.hs | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index c6f992275..b42c5e75d 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -54,7 +54,7 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Options
-import Text.Pandoc.Parsing 
+import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
 import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
 import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
@@ -225,7 +225,7 @@ bulletListAtDepth depth = try $ B.bulletList  <$> many1 (bulletListItemAtDepth d
 
 -- | Bullet List Item of given depth, depth being the number of
 -- leading '*'
-bulletListItemAtDepth :: Int -> Parser [Char] ParserState Blocks 
+bulletListItemAtDepth :: Int -> Parser [Char] ParserState Blocks
 bulletListItemAtDepth = genericListItemAtDepth '*'
 
 -- | Ordered List of given depth, depth being the number of
@@ -237,7 +237,7 @@ orderedListAtDepth depth = try $ do
 
 -- | Ordered List Item of given depth, depth being the number of
 -- leading '#'
-orderedListItemAtDepth :: Int -> Parser [Char] ParserState Blocks 
+orderedListItemAtDepth :: Int -> Parser [Char] ParserState Blocks
 orderedListItemAtDepth = genericListItemAtDepth '#'
 
 -- | Common implementation of list items
@@ -274,7 +274,7 @@ definitionListItem = try $ do
   where inlineDef :: Parser [Char] ParserState [Blocks]
         inlineDef = liftM (\d -> [B.plain d])
                     $ optional whitespace >> (trimInlines . mconcat <$> many listInline) <* newline
-        multilineDef :: Parser [Char] ParserState [Blocks] 
+        multilineDef :: Parser [Char] ParserState [Blocks]
         multilineDef = try $ do
           optional whitespace >> newline
           s <- many1Till anyChar (try (string "=:" >> newline))
@@ -596,7 +596,7 @@ surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try bo
 simpleInline :: Parser [Char] ParserState t           -- ^ surrounding parser
                 -> (Inlines -> Inlines)       -- ^ Inline constructor
                 -> Parser [Char] ParserState Inlines   -- ^ content parser (to be used repeatedly)
-simpleInline border construct = groupedSimpleInline border construct <|> ungroupedSimpleInline border construct 
+simpleInline border construct = groupedSimpleInline border construct <|> ungroupedSimpleInline border construct
 
 ungroupedSimpleInline :: Parser [Char] ParserState t           -- ^ surrounding parser
                 -> (Inlines -> Inlines)       -- ^ Inline constructor
@@ -605,27 +605,27 @@ ungroupedSimpleInline border construct = try $ do
   st <- getState
   pos <- getPosition
   isWhitespace <- option False (whitespace >> return True)
-  guard $ (stateQuoteContext st /= NoQuote) 
-       || (sourceColumn pos == 1) 
+  guard $ (stateQuoteContext st /= NoQuote)
+       || (sourceColumn pos == 1)
        || isWhitespace
   body <- surrounded border inlineWithAttribute
   lookAhead (notFollowedBy alphaNum)
-  let result = construct $ mconcat body 
-  return $ if isWhitespace then B.space <> result 
+  let result = construct $ mconcat body
+  return $ if isWhitespace then B.space <> result
                            else result
-  where 
-    inlineWithAttribute = (try $ optional attributes) >> notFollowedBy (string "\n\n") 
+  where
+    inlineWithAttribute = (try $ optional attributes) >> notFollowedBy (string "\n\n")
         >> (withQuoteContext InSingleQuote inline)
 
 
-groupedSimpleInline :: Parser [Char] ParserState t 
+groupedSimpleInline :: Parser [Char] ParserState t
                   -> (Inlines -> Inlines)
-                  -> Parser [Char] ParserState Inlines 
+                  -> Parser [Char] ParserState Inlines
 groupedSimpleInline border construct = try $ do
-    char '['  
+    char '['
     withQuoteContext InSingleQuote (simpleInline border construct) >>~ char ']'
 
-    
+
 
 
 -- | Create a singleton list
-- 
cgit v1.2.3


From 060a76a38e1f3586bc92787bb2c25c2dc04e380e Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 5 Apr 2014 20:41:38 -0700
Subject: Textile reader: Improved treatment of HTML spans (%).

Closes #1115.
---
 src/Text/Pandoc/Readers/Textile.hs | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index b42c5e75d..f19d68e64 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -376,7 +376,6 @@ inlineParsers = [ inlineMarkup
                 , endline
                 , code
                 , escapedInline
-                , htmlSpan
                 , rawHtmlInline
                 , rawLaTeXInline'
                 , note
@@ -400,6 +399,7 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
                       , simpleInline (char '-' <* notFollowedBy (char '-')) B.strikeout
                       , simpleInline (char '^') B.superscript
                       , simpleInline (char '~') B.subscript
+                      , simpleInline (char '%') id
                       ]
 
 -- | Trademark, registered, copyright
@@ -476,10 +476,6 @@ str = do
   updateLastStrPos
   return $ B.str fullStr
 
--- | Textile allows HTML span infos, we discard them
-htmlSpan :: Parser [Char] ParserState Inlines
-htmlSpan = try $ B.str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') )
-
 -- | Some number of space chars
 whitespace :: Parser [Char] ParserState Inlines
 whitespace = many1 spaceChar >> return B.space <?> "whitespace"
-- 
cgit v1.2.3


From 24f438aa5f230464d510fae034c94644c0e181ca Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 5 Apr 2014 21:02:12 -0700
Subject: Textile reader:  Better support for attributes.

Instead of being ignored, attributes are now parsed and
included in Span inlines.

The output will be a bit different from stock textile:
e.g. for `*(foo)hi*`, we'll get `<em><span class="foo">hi</span></em>`
instead of `<em class="foo">hi</em>`.  But at least the data is
not lost.
---
 src/Text/Pandoc/Readers/Textile.hs | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index f19d68e64..81994e6bd 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -604,15 +604,18 @@ ungroupedSimpleInline border construct = try $ do
   guard $ (stateQuoteContext st /= NoQuote)
        || (sourceColumn pos == 1)
        || isWhitespace
-  body <- surrounded border inlineWithAttribute
-  lookAhead (notFollowedBy alphaNum)
-  let result = construct $ mconcat body
-  return $ if isWhitespace then B.space <> result
-                           else result
-  where
-    inlineWithAttribute = (try $ optional attributes) >> notFollowedBy (string "\n\n")
-        >> (withQuoteContext InSingleQuote inline)
-
+  border *> notFollowedBy (oneOf " \t\n\r")
+  attr <- attributes
+  body <- trimInlines . mconcat <$>
+          withQuoteContext InSingleQuote
+            (manyTill inline (try border <* notFollowedBy alphaNum))
+  let result = construct $
+        if attr == nullAttr
+           then body
+           else B.spanWith attr body
+  return $ if isWhitespace
+              then B.space <> result
+              else result
 
 groupedSimpleInline :: Parser [Char] ParserState t
                   -> (Inlines -> Inlines)
-- 
cgit v1.2.3


From 971e4c43647cf28d1de0dbc109a6ec6f269fa563 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 5 Apr 2014 23:05:11 -0700
Subject: HTML reader:  Updated `closes` with rules from HTML5 spec.

---
 src/Text/Pandoc/Readers/HTML.hs | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 38619d040..2101b2fc2 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -469,7 +469,7 @@ pCloses :: String -> TagParser ()
 pCloses tagtype = try $ do
   t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag
   case t of
-       (TagClose t')  | t' == tagtype -> pAnyTag >> return ()
+       (TagClose t') | t' == tagtype -> pAnyTag >> return ()
        (TagOpen t' _) | t' `closes` tagtype -> return ()
        (TagClose "ul") | tagtype == "li" -> return ()
        (TagClose "ol") | tagtype == "li" -> return ()
@@ -627,7 +627,7 @@ isCommentTag :: Tag String -> Bool
 isCommentTag = tagComment (const True)
 
 -- taken from HXT and extended
-
+-- See http://www.w3.org/TR/html5/syntax.html sec 8.1.2.4 optional tags
 closes :: String -> String -> Bool
 _ `closes` "body" = False
 _ `closes` "html" = False
@@ -635,11 +635,18 @@ _ `closes` "html" = False
 "li" `closes` "li" = True
 "th" `closes` t | t `elem` ["th","td"] = True
 "tr" `closes` t | t `elem` ["th","td","tr"] = True
+"dd" `closes` t | t `elem` ["dt", "dd"] = True
 "dt" `closes` t | t `elem` ["dt","dd"] = True
-"hr" `closes` "p" = True
-"p" `closes` "p" = True
+"rt" `closes` t | t `elem` ["rb", "rt", "rtc"] = True
+"optgroup" `closes` "optgroup" = True
+"optgroup" `closes` "option" = True
+"option" `closes` "option" = True
+-- http://www.w3.org/TR/html-markup/p.html
+x `closes` "p" | x `elem` ["address", "article", "aside", "blockquote",
+   "dir", "div", "dl", "fieldset", "footer", "form", "h1", "h2", "h3", "h4",
+   "h5", "h6", "header", "hr", "menu", "nav", "ol", "p", "pre", "section",
+   "table", "ul"] = True
 "meta" `closes` "meta" = True
-"colgroup" `closes` "colgroup" = True
 "form" `closes` "form" = True
 "label" `closes` "label" = True
 "map" `closes` "map" = True
-- 
cgit v1.2.3


From 4ebf6f6ebf7d679252ade08203ec13e3e92c2db5 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sun, 6 Apr 2014 19:09:33 +0200
Subject: Org reader: Minor code clean-up

---
 src/Text/Pandoc/Readers/Org.hs | 51 +++++++++++++++++-------------------------
 1 file changed, 21 insertions(+), 30 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 5ad2531ac..6652925aa 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -268,8 +268,12 @@ data OrgTable = OrgTable
 table :: OrgParser Blocks
 table = try $ do
   lookAhead tableStart
-  OrgTable _ aligns heads lns <- normalizeTable . rowsToTable <$> tableRows
-  return $ B.table "" (zip aligns $ repeat 0) heads lns
+  orgToPandocTable . normalizeTable . rowsToTable <$> tableRows
+
+orgToPandocTable :: OrgTable
+                 -> Blocks
+orgToPandocTable (OrgTable _ aligns heads lns) =
+  B.table "" (zip aligns $ repeat 0) heads lns
 
 tableStart :: OrgParser Char
 tableStart = try $ skipSpaces *> char '|'
@@ -403,20 +407,14 @@ orderedListStart = genericListStart orderedListMarker
   -- Ordered list markers allowed in org-mode
   where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
 
+-- parse raw text for one list item, excluding start marker and continuations
 listItem :: OrgParser Int
          -> OrgParser Blocks
 listItem start = try $ do
-  (markerLength, first) <- try (start >>= rawListItem)
-  rest <- many (listContinuation markerLength)
-  parseFromString parseBlocks $ concat (first:rest)
-
--- parse raw text for one list item, excluding start marker and continuations
-rawListItem :: Int
-            -> OrgParser (Int, String)
-rawListItem markerLength = try $ do
-  firstLine <- anyLine
-  restLines <- many (listLine markerLength)
-  return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
+  markerLength <- try start
+  firstLine <- anyLineNewline
+  rest <- concat <$> many (listContinuation markerLength)
+  parseFromString parseBlocks $ firstLine ++ rest
 
 -- continuation of a list item - indented and separated by blankline or endline.
 -- Note: nested lists are parsed as continuations.
@@ -424,14 +422,11 @@ listContinuation :: Int
                  -> OrgParser String
 listContinuation markerLength = try $
   mappend <$> many blankline
-          <*> (concat <$> many1 (listLine markerLength))
+          <*> (concat <$> many1 listLine)
+ where listLine = try $ indentWith markerLength *> anyLineNewline
 
--- parse a line of a list item
-listLine :: Int
-         -> OrgParser String
-listLine markerLength = try $
-  indentWith markerLength *> anyLine
-    <**> pure (++ "\n")
+anyLineNewline :: OrgParser String
+anyLineNewline = (++ "\n") <$> anyLine
 
 
 --
@@ -491,12 +486,11 @@ explicitOrImageLink = try $ do
   char '['
   src    <- enclosedRaw (char '[') (char ']')
   title  <- enclosedRaw (char '[') (char ']')
-  title' <- parseFromString (mconcat . butLast <$> many inline) (title++"\n")
+  title' <- parseFromString (mconcat <$> many inline) title
   char ']'
   return $ if (isImage src) && (isImage title)
            then B.link src "" (B.image title "" "")
            else B.link src "" title'
- where butLast = reverse . tail . reverse
 
 selflinkOrImage :: OrgParser Inlines
 selflinkOrImage = try $ do
@@ -552,11 +546,8 @@ inlinesEnclosedBy c = try $ do
   updateState $ \st -> st { orgInlineCharStack = c:(orgInlineCharStack st) }
   res <- enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c)
                          (atEnd $ char c)
-  updateState $ \st -> st { orgInlineCharStack = shift . orgInlineCharStack $ st }
+  updateState $ \st -> st { orgInlineCharStack = drop 1 . orgInlineCharStack $ st }
   return res
- where shift xs
-           | null xs   = []
-           | otherwise = tail xs
 
 enclosedRaw :: OrgParser a
             -> OrgParser b
@@ -583,14 +574,13 @@ atStart p = do
 atEnd :: OrgParser a -> OrgParser a
 atEnd p = try $ do
   p <* lookingAtEndOfWord
- where lookingAtEndOfWord = lookAhead . oneOf =<< postWordChars
+ where lookingAtEndOfWord =
+           eof <|> const (return ()) =<< lookAhead . oneOf =<< postWordChars
 
 postWordChars :: OrgParser [Char]
 postWordChars = do
   st <- getState
-  return $ "\t\n\r !\"'),-.:?}" ++ (safeSecond . orgInlineCharStack $ st)
- where safeSecond (_:x2:_) = [x2]
-       safeSecond _        = []
+  return $ "\t\n\r !\"'),-.:?}" ++ (take 1 . drop 1 . orgInlineCharStack $ st)
 
 -- FIXME: These functions are hacks and should be replaced
 endsOnThisOrNextLine :: Char
@@ -608,6 +598,7 @@ endsOnThisLine input c doOnOtherLines = do
   postWordChars' <- postWordChars
   case break (`elem` c:"\n") input of
     (_,'\n':rest)    -> doOnOtherLines rest
+    (_,_:[])         -> return ()
     (_,_:rest@(n:_)) -> if n `elem` postWordChars'
                         then return ()
                         else endsOnThisLine rest c doOnOtherLines
-- 
cgit v1.2.3


From 480b33b7100048ef3fad51754ae76c21daa8b86f Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sun, 6 Apr 2014 14:49:57 +0200
Subject: Org reader: Add support for definition lists

---
 src/Text/Pandoc/Readers/Org.hs | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 6652925aa..20bca3e28 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -383,7 +383,10 @@ restOfLine = mconcat <$> manyTill inline newline
 --
 
 list :: OrgParser Blocks
-list = choice [ bulletList, orderedList ] <?> "list"
+list = choice [ definitionList, bulletList, orderedList ] <?> "list"
+
+definitionList :: OrgParser Blocks
+definitionList = B.definitionList <$> many1 (definitionListItem bulletListStart)
 
 bulletList :: OrgParser Blocks
 bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart)
@@ -407,6 +410,18 @@ orderedListStart = genericListStart orderedListMarker
   -- Ordered list markers allowed in org-mode
   where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
 
+definitionListItem :: OrgParser Int
+                   -> OrgParser (Inlines, [Blocks])
+definitionListItem parseMarkerGetLength = try $ do
+  markerLength <- parseMarkerGetLength
+  term <- manyTill (noneOf "\n\r") (try $ string "::")
+  first <- anyLineNewline
+  cont <- concat <$> many (listContinuation markerLength)
+  term' <- parseFromString inline term
+  contents' <- parseFromString parseBlocks $ first ++ cont
+  return (term', [contents'])
+
+
 -- parse raw text for one list item, excluding start marker and continuations
 listItem :: OrgParser Int
          -> OrgParser Blocks
-- 
cgit v1.2.3


From fcddd0e4bd5f1b5fd4eb33e291fb9ccc2358d43f Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 6 Apr 2014 15:11:18 -0700
Subject: LaTeX reader:  handle @{} and p{length} in tabular.

The length is not actually recorded, but at least we get a table.

Closes #1180.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index f23a5b35e..fd761dbec 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1168,12 +1168,13 @@ complexNatbibCitation mode = try $ do
 parseAligns :: LP [Alignment]
 parseAligns = try $ do
   char '{'
-  let maybeBar = try $ spaces >> optional (char '|')
+  let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ try (string "@{}")
   maybeBar
   let cAlign = AlignCenter <$ char 'c'
   let lAlign = AlignLeft <$ char 'l'
   let rAlign = AlignRight <$ char 'r'
-  let alignChar = optional sp *> (cAlign <|> lAlign <|> rAlign)
+  let parAlign = AlignLeft <$ (char 'p' >> braced)
+  let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign
   aligns' <- sepEndBy alignChar maybeBar
   spaces
   char '}'
-- 
cgit v1.2.3


From e352ec5a0e4af245584972e89ae13352e907cdf0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 6 Apr 2014 16:32:53 -0700
Subject: LaTeX writer: Workaround for level 4-5 headers in quotes.

These previously produced invalid LaTeX: `\paragraph` or
`\subparagraph` in a `quote` environment.  This adds an
`mbox{}` in these contexts to work around the problem.
See http://tex.stackexchange.com/a/169833/22451.

Closes #1221.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 6cf7ed730..e12c9078f 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -51,6 +51,7 @@ import Text.Pandoc.Highlighting (highlight, styleToLaTeX,
 
 data WriterState =
   WriterState { stInNote        :: Bool          -- true if we're in a note
+              , stInQuote       :: Bool          -- true if in a blockquote
               , stInMinipage    :: Bool          -- true if in minipage
               , stNotes         :: [Doc]         -- notes in a minipage
               , stOLLevel       :: Int           -- level of ordered list nesting
@@ -73,7 +74,8 @@ data WriterState =
 writeLaTeX :: WriterOptions -> Pandoc -> String
 writeLaTeX options document =
   evalState (pandocToLaTeX options document) $
-  WriterState { stInNote = False, stInMinipage = False, stNotes = [],
+  WriterState { stInNote = False, stInQuote = False,
+                stInMinipage = False, stNotes = [],
                 stOLLevel = 1, stOptions = options,
                 stVerbInNote = False,
                 stTable = False, stStrikeout = False,
@@ -331,7 +333,10 @@ blockToLaTeX (BlockQuote lst) = do
          modify $ \s -> s{ stIncremental = oldIncremental }
          return result
        _ -> do
+         oldInQuote <- gets stInQuote
+         modify (\s -> s{stInQuote = True})
          contents <- blockListToLaTeX lst
+         modify (\s -> s{stInQuote = oldInQuote})
          return $ "\\begin{quote}" $$ contents $$ "\\end{quote}"
 blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do
   opts <- gets stOptions
@@ -610,9 +615,16 @@ sectionHeader unnumbered ref level lst = do
                           4  -> "paragraph"
                           5  -> "subparagraph"
                           _  -> ""
+  inQuote <- gets stInQuote
+  let prefix = if inQuote && level' >= 4
+                  then text "\\mbox{}%"
+                  -- needed for \paragraph, \subparagraph in quote environment
+                  -- see http://tex.stackexchange.com/questions/169830/
+                  else empty
   return $ if level' > 5
               then txt
-              else headerWith ('\\':sectionType) stuffing ref
+              else prefix $$
+                   headerWith ('\\':sectionType) stuffing ref
                    $$ if unnumbered
                          then "\\addcontentsline{toc}" <>
                                 braces (text sectionType) <>
-- 
cgit v1.2.3


From c47bd8404fda0a782719848ef190b56eb0fdb9dc Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Mon, 7 Apr 2014 11:00:30 +0200
Subject: Org reader: Support inline math (like $E=mc^2$)

Closes #1223.
---
 src/Text/Pandoc/Readers/Org.hs | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 20bca3e28..2bb6ee122 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -459,6 +459,7 @@ inline = choice inlineParsers <?> "inline"
                         , strikeout
                         , underline
                         , code
+                        , math
                         , verbatim
                         , subscript
                         , superscript
@@ -530,10 +531,13 @@ underline = B.strong       <$> inlinesEnclosedBy '_'
 code      :: OrgParser Inlines
 code      = B.code         <$> rawEnclosedBy '='
 
-verbatim  ::  OrgParser Inlines
+math      :: OrgParser Inlines
+math      = B.math         <$> rawEnclosedBy '$'
+
+verbatim  :: OrgParser Inlines
 verbatim  = B.rawInline "" <$> rawEnclosedBy '~'
 
-subscript ::  OrgParser Inlines
+subscript :: OrgParser Inlines
 subscript = B.subscript    <$> (try $ char '_' *> maybeGroupedByBraces)
 
 superscript ::  OrgParser Inlines
@@ -580,18 +584,24 @@ rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c)
 -- succeeds only if we're not right after a str (ie. in middle of word)
 atStart :: OrgParser a -> OrgParser a
 atStart p = do
-  pos <- getPosition
-  st <- getState
-  guard $ orgLastStrPos st /= Just pos
+  guard =<< not <$> isRightAfterString
   p
 
 -- | succeeds only if we're at the end of a word
 atEnd :: OrgParser a -> OrgParser a
 atEnd p = try $ do
-  p <* lookingAtEndOfWord
+ p <* lookingAtEndOfWord
  where lookingAtEndOfWord =
            eof <|> const (return ()) =<< lookAhead . oneOf =<< postWordChars
 
+isRightAfterString :: OrgParser Bool
+isRightAfterString = do
+  pos <- getPosition
+  st <- getState
+  -- the position `Nothing` isn't after a String, either, hence the double
+  -- negation
+  return $ not $ orgLastStrPos st /= Just pos
+
 postWordChars :: OrgParser [Char]
 postWordChars = do
   st <- getState
-- 
cgit v1.2.3


From e555a5703d4581f11c6b5020811bf60b5ec98c41 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Mon, 7 Apr 2014 21:23:39 -0700
Subject: Textile reader: Improved link parsing.

In particular we now pick up on attributes. Since pandoc links
can't have attributes, we enclose the whole link in a span
if there are attributes.

Closes #1008.
---
 src/Text/Pandoc/Readers/Textile.hs | 34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 81994e6bd..ae9c0cc8e 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -62,7 +62,7 @@ import Text.HTML.TagSoup.Match
 import Data.List ( intercalate )
 import Data.Char ( digitToInt, isUpper)
 import Control.Monad ( guard, liftM )
-import Control.Applicative ((<$>), (*>), (<*))
+import Control.Applicative ((<$>), (*>), (<*), (<$))
 import Data.Monoid
 
 -- | Parse a Textile text and return a Pandoc document.
@@ -498,25 +498,21 @@ rawLaTeXInline' = try $ do
 -- | Textile standard link syntax is "label":target. But we
 -- can also have ["label":target].
 link :: Parser [Char] ParserState Inlines
-link = linkB <|> linkNoB
-
-linkNoB :: Parser [Char] ParserState Inlines
-linkNoB = try $ do
-  name <- mconcat <$> surrounded (char '"') (withQuoteContext InDoubleQuote inline)
-  char ':'
-  let stopChars = "!.,;:"
-  url <- manyTill nonspaceChar (lookAhead $ space <|> try (oneOf stopChars >> (space <|> newline)))
-  let name' = if B.toList name == [Str "$"] then B.str url else name
-  return $ B.link url "" name'
-
-linkB :: Parser [Char] ParserState Inlines
-linkB = try $ do
-  char '['
-  name <- mconcat <$> surrounded (char '"') inline
-  char ':'
-  url <- manyTill nonspaceChar (char ']')
+link = try $ do
+  bracketed <- (True <$ char '[') <|> return False
+  char '"' *> notFollowedBy (oneOf " \t\n\r")
+  attr <- attributes
+  name <- trimInlines . mconcat <$>
+          withQuoteContext InSingleQuote (manyTill inline (try (string "\":")))
+  let stop = if bracketed
+                then char ']'
+                else lookAhead $ space <|>
+                       try (oneOf "!.,;:" *> (space <|> newline))
+  url <- manyTill nonspaceChar stop
   let name' = if B.toList name == [Str "$"] then B.str url else name
-  return $ B.link url "" name'
+  return $ if attr == nullAttr
+              then B.link url "" name'
+              else B.spanWith attr $ B.link url "" name'
 
 -- | image embedding
 image :: Parser [Char] ParserState Inlines
-- 
cgit v1.2.3


From 030020236c85c736892a6f8e0dcefca1681e5ce0 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Tue, 8 Apr 2014 22:39:25 +0200
Subject: Org reader: Precise rules for the recognition of markup

The inline parsers have been rewritten using the org source code as a
reference. This fixes a couple of bugs related to erroneous markup
recognition.
---
 src/Text/Pandoc/Readers/Org.hs | 380 ++++++++++++++++++++++++++++-------------
 1 file changed, 260 insertions(+), 120 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 2bb6ee122..392b17bbc 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -32,11 +32,12 @@ import qualified Text.Pandoc.Builder as B
 import           Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>), HasMeta(..))
 import           Text.Pandoc.Definition
 import           Text.Pandoc.Options
-import           Text.Pandoc.Parsing hiding (orderedListMarker, updateLastStrPos)
+import qualified Text.Pandoc.Parsing as P
+import           Text.Pandoc.Parsing hiding (newline, orderedListMarker, updateLastStrPos)
 import           Text.Pandoc.Shared (compactify')
 
 import           Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>))
-import           Control.Monad (guard, mzero)
+import           Control.Monad (guard, when)
 import           Data.Char (toLower)
 import           Data.Default
 import           Data.List (foldl', isPrefixOf, isSuffixOf)
@@ -47,49 +48,100 @@ import           Data.Monoid (mconcat, mempty, mappend)
 readOrg :: ReaderOptions -- ^ Reader options
         -> String        -- ^ String to parse (assuming @'\n'@ line endings)
         -> Pandoc
-readOrg opts s = (readWith parseOrg) def{ orgOptions = opts } (s ++ "\n\n")
+readOrg opts s = (readWith parseOrg) def{ orgStateOptions = opts } (s ++ "\n\n")
 
 type OrgParser = Parser [Char] OrgParserState
 
+parseOrg:: OrgParser Pandoc
+parseOrg = do
+  blocks' <- B.toList <$> parseBlocks
+  st <- getState
+  let meta = orgStateMeta st
+  return $ Pandoc meta $ filter (/= Null) blocks'
+
+--
+-- Parser State for Org
+--
+
 -- | Org-mode parser state
 data OrgParserState = OrgParserState
-                      { orgOptions          :: ReaderOptions
-                      , orgInlineCharStack  :: [Char]
-                      , orgLastStrPos       :: Maybe SourcePos
-                      , orgMeta             :: Meta
+                      { orgStateOptions              :: ReaderOptions
+                      , orgStateEmphasisCharStack    :: [Char]
+                      , orgStateEmphasisNewlines     :: Maybe Int
+                      , orgStateLastForbiddenCharPos :: Maybe SourcePos
+                      , orgStateLastPreCharPos       :: Maybe SourcePos
+                      , orgStateLastStrPos           :: Maybe SourcePos
+                      , orgStateMeta                 :: Meta
                       } deriving (Show)
 
 instance HasReaderOptions OrgParserState where
-  extractReaderOptions = orgOptions
+  extractReaderOptions = orgStateOptions
 
 instance HasMeta OrgParserState where
   setMeta field val st =
-    st{ orgMeta = setMeta field val $ orgMeta st }
+    st{ orgStateMeta = setMeta field val $ orgStateMeta st }
   deleteMeta field st =
-    st{ orgMeta = deleteMeta field $ orgMeta st }
+    st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
 
 instance Default OrgParserState where
   def = defaultOrgParserState
 
 defaultOrgParserState :: OrgParserState
 defaultOrgParserState = OrgParserState
-                        { orgOptions = def
-                        , orgInlineCharStack = []
-                        , orgLastStrPos = Nothing
-                        , orgMeta = nullMeta
+                        { orgStateOptions = def
+                        , orgStateEmphasisCharStack = []
+                        , orgStateEmphasisNewlines = Nothing
+                        , orgStateLastForbiddenCharPos = Nothing
+                        , orgStateLastPreCharPos = Nothing
+                        , orgStateLastStrPos = Nothing
+                        , orgStateMeta = nullMeta
                         }
 
 updateLastStrPos :: OrgParser ()
 updateLastStrPos = getPosition >>= \p ->
-  updateState $ \s -> s{ orgLastStrPos = Just p }
+  updateState $ \s -> s{ orgStateLastStrPos = Just p }
 
+updateLastForbiddenCharPos :: OrgParser ()
+updateLastForbiddenCharPos = getPosition >>= \p ->
+  updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
 
-parseOrg:: OrgParser Pandoc
-parseOrg = do
-  blocks' <- B.toList <$> parseBlocks
+updateLastPreCharPos :: OrgParser ()
+updateLastPreCharPos = getPosition >>= \p ->
+  updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
+
+pushToInlineCharStack :: Char -> OrgParser ()
+pushToInlineCharStack c = updateState $ \st ->
+  st { orgStateEmphasisCharStack = c:(orgStateEmphasisCharStack st) }
+
+popInlineCharStack :: OrgParser ()
+popInlineCharStack = updateState $ \st ->
+  st { orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ st }
+
+surroundingEmphasisChar :: OrgParser [Char]
+surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
+
+startEmphasisNewlinesCounting :: Int -> OrgParser ()
+startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
+  s { orgStateEmphasisNewlines = Just maxNewlines }
+
+decEmphasisNewlinesCount :: OrgParser ()
+decEmphasisNewlinesCount = updateState $ \s ->
+      s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
+
+newlinesCountWithinLimits :: OrgParser Bool
+newlinesCountWithinLimits = do
   st <- getState
-  let meta = orgMeta st
-  return $ Pandoc meta $ filter (/= Null) blocks'
+  return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True
+
+resetEmphasisNewlines :: OrgParser ()
+resetEmphasisNewlines = updateState $ \s ->
+  s{ orgStateEmphasisNewlines = Nothing }
+
+newline :: OrgParser Char
+newline =
+  P.newline
+       <* updateLastPreCharPos
+       <* updateLastForbiddenCharPos
 
 --
 -- parsing blocks
@@ -218,7 +270,7 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
 declarationLine :: OrgParser Blocks
 declarationLine = try $ do
   meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta
-  updateState $ \st -> st { orgMeta  = orgMeta st <> meta' }
+  updateState $ \st -> st { orgStateMeta  = orgStateMeta st <> meta' }
   return mempty
 
 metaValue :: OrgParser MetaValue
@@ -449,22 +501,24 @@ anyLineNewline = (++ "\n") <$> anyLine
 --
 
 inline :: OrgParser Inlines
-inline = choice inlineParsers <?> "inline"
-  where inlineParsers = [ whitespace
-                        , link
-                        , str
-                        , endline
-                        , emph
-                        , strong
-                        , strikeout
-                        , underline
-                        , code
-                        , math
-                        , verbatim
-                        , subscript
-                        , superscript
-                        , symbol
-                        ]
+inline =
+  choice [ whitespace
+         , link
+         , str
+         , endline
+         , emph
+         , strong
+         , strikeout
+         , underline
+         , code
+         , math
+         , verbatim
+         , subscript
+         , superscript
+         , symbol
+         ] <* (guard =<< newlinesCountWithinLimits)
+  <?> "inline"
+
 
 -- treat these as potentially non-text when parsing inline:
 specialChars :: [Char]
@@ -472,7 +526,10 @@ specialChars = "\"$'()*+-./:<=>[\\]^_{|}~"
 
 
 whitespace :: OrgParser Inlines
-whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
+whitespace = B.space <$ skipMany1 spaceChar
+                     <* updateLastPreCharPos
+                     <* updateLastForbiddenCharPos
+             <?> "whitespace"
 
 str :: OrgParser Inlines
 str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
@@ -492,6 +549,9 @@ endline = try $ do
   notFollowedBy' commentLineStart
   notFollowedBy' bulletListStart
   notFollowedBy' orderedListStart
+  decEmphasisNewlinesCount
+  guard =<< newlinesCountWithinLimits
+  updateLastPreCharPos
   return B.space
 
 link :: OrgParser Inlines
@@ -500,42 +560,54 @@ link = explicitOrImageLink <|> selflinkOrImage <?> "link"
 explicitOrImageLink :: OrgParser Inlines
 explicitOrImageLink = try $ do
   char '['
-  src    <- enclosedRaw (char '[') (char ']')
+  src    <- linkTarget
   title  <- enclosedRaw (char '[') (char ']')
   title' <- parseFromString (mconcat <$> many inline) title
   char ']'
-  return $ if (isImage src) && (isImage title)
+  return $ if (isImageFilename src) && (isImageFilename title)
            then B.link src "" (B.image title "" "")
            else B.link src "" title'
 
 selflinkOrImage :: OrgParser Inlines
 selflinkOrImage = try $ do
-  src <- enclosedRaw (string "[[") (string "]]")
-  return $ if isImage src
+  src <- (char '[') *> linkTarget <* char ']'
+  return $ if isImageFilename src
            then B.image src "" ""
            else B.link src "" (B.str src)
 
+linkTarget :: OrgParser String
+linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]")
+
+isImageFilename :: String -> Bool
+isImageFilename filename =
+  any (\x -> ('.':x)  `isSuffixOf` filename) imageExtensions &&
+  any (\x -> (x++":") `isPrefixOf` filename) protocols ||
+  ':' `notElem` filename
+ where
+   imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
+   protocols = [ "file", "http", "https" ]
+
 emph      :: OrgParser Inlines
-emph      = B.emph         <$> inlinesEnclosedBy '/'
+emph      = B.emph         <$> emphasisBetween '/'
 
 strong    :: OrgParser Inlines
-strong    = B.strong       <$> inlinesEnclosedBy '*'
+strong    = B.strong       <$> emphasisBetween '*'
 
 strikeout :: OrgParser Inlines
-strikeout = B.strikeout    <$> inlinesEnclosedBy '+'
+strikeout = B.strikeout    <$> emphasisBetween '+'
 
 -- There is no underline, so we use strong instead.
 underline :: OrgParser Inlines
-underline = B.strong       <$> inlinesEnclosedBy '_'
+underline = B.strong       <$> emphasisBetween '_'
 
 code      :: OrgParser Inlines
-code      = B.code         <$> rawEnclosedBy '='
-
-math      :: OrgParser Inlines
-math      = B.math         <$> rawEnclosedBy '$'
+code      = B.code         <$> verbatimBetween '='
 
 verbatim  :: OrgParser Inlines
-verbatim  = B.rawInline "" <$> rawEnclosedBy '~'
+verbatim  = B.rawInline "" <$> verbatimBetween '~'
+
+math      :: OrgParser Inlines
+math      = B.math         <$> mathStringBetween '$'
 
 subscript :: OrgParser Inlines
 subscript = B.subscript    <$> (try $ char '_' *> maybeGroupedByBraces)
@@ -550,7 +622,72 @@ maybeGroupedByBraces = try $
          ]
 
 symbol :: OrgParser Inlines
-symbol = B.str . (: "") <$> oneOf specialChars
+symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
+ where updatePositions c
+           | c `elem` emphasisPreChars = c <$ updateLastPreCharPos
+           | c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos
+           | otherwise = return c
+
+emphasisBetween :: Char
+                -> OrgParser Inlines
+emphasisBetween c = try $ do
+  startEmphasisNewlinesCounting emphasisAllowedNewlines
+  res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
+  isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState
+  when isTopLevelEmphasis
+       resetEmphasisNewlines
+  return res
+
+verbatimBetween :: Char
+                -> OrgParser String
+verbatimBetween c = try $
+  emphasisStart c *>
+  many1TillNOrLessNewlines 1 (noneOf "\n\r") (emphasisEnd c)
+
+-- | Parses a raw string delimited by @c@ using Org's math rules
+mathStringBetween :: Char
+                  -> OrgParser String
+mathStringBetween c = try $ do
+  mathStart c
+  body <- many1TillNOrLessNewlines mathAllowedNewlines
+                                   (noneOf (c:"\n\r"))
+                                   (lookAhead $ mathEnd c)
+  final <- mathEnd c
+  return $ body ++ [final]
+
+-- | Parses the start (opening character) of emphasis
+emphasisStart :: Char -> OrgParser Char
+emphasisStart c = try $ do
+  guard =<< afterEmphasisPreChar
+  guard =<< notAfterString
+  char c
+  lookAhead (noneOf emphasisForbiddenBorderChars)
+  pushToInlineCharStack c
+  return c
+
+-- | Parses the closing character of emphasis
+emphasisEnd :: Char -> OrgParser Char
+emphasisEnd c = try $ do
+  guard =<< notAfterForbiddenBorderChar
+  char c
+  eof <|> lookAhead (surroundingEmphasisChar >>= \x ->
+                         oneOf (x ++ emphasisPostChars))
+          *> return ()
+  updateLastStrPos
+  popInlineCharStack
+  return c
+
+mathStart :: Char -> OrgParser Char
+mathStart c = try $ do
+  char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
+
+mathEnd :: Char -> OrgParser Char
+mathEnd c = try $ do
+  res <- noneOf (c:mathForbiddenBorderChars)
+  char c
+  eof <|> (lookAhead $ oneOf mathPostChars *> pure ())
+  return res
+
 
 enclosedInlines :: OrgParser a
                 -> OrgParser b
@@ -558,16 +695,6 @@ enclosedInlines :: OrgParser a
 enclosedInlines start end = try $
   trimInlines . mconcat <$> enclosed start end inline
 
--- FIXME: This is a hack
-inlinesEnclosedBy :: Char
-                  -> OrgParser Inlines
-inlinesEnclosedBy c = try $ do
-  updateState $ \st -> st { orgInlineCharStack = c:(orgInlineCharStack st) }
-  res <- enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c)
-                         (atEnd $ char c)
-  updateState $ \st -> st { orgInlineCharStack = drop 1 . orgInlineCharStack $ st }
-  return res
-
 enclosedRaw :: OrgParser a
             -> OrgParser b
             -> OrgParser String
@@ -577,63 +704,76 @@ enclosedRaw start end = try $
        spanningTwoLines = try $
          anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
 
-rawEnclosedBy :: Char
-              -> OrgParser String
-rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c)
-
--- succeeds only if we're not right after a str (ie. in middle of word)
-atStart :: OrgParser a -> OrgParser a
-atStart p = do
-  guard =<< not <$> isRightAfterString
-  p
-
--- | succeeds only if we're at the end of a word
-atEnd :: OrgParser a -> OrgParser a
-atEnd p = try $ do
- p <* lookingAtEndOfWord
- where lookingAtEndOfWord =
-           eof <|> const (return ()) =<< lookAhead . oneOf =<< postWordChars
-
-isRightAfterString :: OrgParser Bool
-isRightAfterString = do
+-- | Like many1Till, but parses at most @n+1@ lines.  @p@ must not consume
+--   newlines.
+many1TillNOrLessNewlines :: Int
+                         -> OrgParser Char
+                         -> OrgParser a
+                         -> OrgParser String
+many1TillNOrLessNewlines n p end = try $
+  nMoreLines (Just n) mempty >>= oneOrMore
+ where
+   nMoreLines Nothing  cs = return cs
+   nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
+   nMoreLines k        cs = try $ (final k cs <|> rest k cs)
+                                  >>= uncurry nMoreLines
+   final _ cs = (\x -> (Nothing,      cs ++ x)) <$> (try finalLine)
+   rest  m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> (try $ manyTill p P.newline)
+   finalLine = try $ manyTill p end
+   minus1 k = k - 1
+   oneOrMore cs = guard (not $ null cs) *> return cs
+
+-- Org allows customization of the way it reads emphasis.  We use the defaults
+-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
+-- for details).
+
+-- | Chars allowed to occur before emphasis (spaces and newlines are ok, too)
+emphasisPreChars :: [Char]
+emphasisPreChars = "\t \"'({"
+
+-- | Chars allowed at after emphasis
+emphasisPostChars :: [Char]
+emphasisPostChars = "\t\n !\"'),-.:;?\\}"
+
+-- | Chars not allowed at the (inner) border of emphasis
+emphasisForbiddenBorderChars :: [Char]
+emphasisForbiddenBorderChars = "\t\n\r \"',"
+
+-- | The maximum number of newlines within
+emphasisAllowedNewlines :: Int
+emphasisAllowedNewlines = 1
+
+-- LaTeX-style math: see `org-latex-regexps` for details
+
+-- | Chars allowed after an inline ($...$) math statement
+mathPostChars :: [Char]
+mathPostChars = "\t\n \"',-.:;?"
+
+-- | Chars not allowed at the (inner) border of math
+mathForbiddenBorderChars :: [Char]
+mathForbiddenBorderChars = "\t\n\r ,;.$"
+
+-- | Maximum number of newlines in an inline math statement
+mathAllowedNewlines :: Int
+mathAllowedNewlines = 2
+
+-- | Whether we are right behind a char allowed before emphasis
+afterEmphasisPreChar :: OrgParser Bool
+afterEmphasisPreChar = do
   pos <- getPosition
-  st <- getState
-  -- the position `Nothing` isn't after a String, either, hence the double
-  -- negation
-  return $ not $ orgLastStrPos st /= Just pos
+  lastPrePos <- orgStateLastPreCharPos <$> getState
+  return $ lastPrePos == Nothing || lastPrePos == Just pos
 
-postWordChars :: OrgParser [Char]
-postWordChars = do
-  st <- getState
-  return $ "\t\n\r !\"'),-.:?}" ++ (take 1 . drop 1 . orgInlineCharStack $ st)
-
--- FIXME: These functions are hacks and should be replaced
-endsOnThisOrNextLine :: Char
-                     -> OrgParser ()
-endsOnThisOrNextLine c = do
-  inp <- getInput
-  let doOtherwise = \rest -> endsOnThisLine rest c (const mzero)
-  endsOnThisLine inp c doOtherwise
-
-endsOnThisLine :: [Char]
-               -> Char
-               -> ([Char] -> OrgParser ())
-               -> OrgParser ()
-endsOnThisLine input c doOnOtherLines = do
-  postWordChars' <- postWordChars
-  case break (`elem` c:"\n") input of
-    (_,'\n':rest)    -> doOnOtherLines rest
-    (_,_:[])         -> return ()
-    (_,_:rest@(n:_)) -> if n `elem` postWordChars'
-                        then return ()
-                        else endsOnThisLine rest c doOnOtherLines
-    _                -> mzero
-
-isImage :: String -> Bool
-isImage filename =
-  any (\x -> ('.':x)  `isSuffixOf` filename) imageExtensions &&
-  any (\x -> (x++":") `isPrefixOf` filename) protocols ||
-  ':' `notElem` filename
- where
-   imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
-   protocols = [ "file", "http", "https" ]
+-- | Whether we are right after the end of a string
+notAfterString :: OrgParser Bool
+notAfterString = do
+  pos <- getPosition
+  lastStrPos <- orgStateLastStrPos <$> getState
+  return $ lastStrPos /= Just pos
+
+-- | Whether the parser is right after a forbidden border char
+notAfterForbiddenBorderChar :: OrgParser Bool
+notAfterForbiddenBorderChar = do
+  pos <- getPosition
+  lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
+  return $ lastFBCPos /= Just pos
-- 
cgit v1.2.3


From 1715d7cee0b9388ac77b8b2a31fcbb00ead80adf Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Thu, 10 Apr 2014 15:11:03 +0200
Subject: Org reader: Support more inline/display math variants

Support all of the following variants as valid ways to define inline or
display math inlines:

  - `\[..\]` (display)
  - `$$..$$` (display)
  - `\(..\)` (inline)
  - `$..$`   (inline)

This closes #1223.  Again.
---
 src/Text/Pandoc/Readers/Org.hs | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 392b17bbc..1d0400d96 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -512,6 +512,7 @@ inline =
          , underline
          , code
          , math
+         , displayMath
          , verbatim
          , subscript
          , superscript
@@ -607,7 +608,15 @@ verbatim  :: OrgParser Inlines
 verbatim  = B.rawInline "" <$> verbatimBetween '~'
 
 math      :: OrgParser Inlines
-math      = B.math         <$> mathStringBetween '$'
+math      = B.math         <$> choice [ math1CharBetween '$'
+                                      , mathStringBetween '$'
+                                      , rawMathBetween "\\(" "\\)"
+                                      ]
+
+displayMath :: OrgParser Inlines
+displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
+                                       , rawMathBetween "$$"  "$$"
+                                       ]
 
 subscript :: OrgParser Inlines
 subscript = B.subscript    <$> (try $ char '_' *> maybeGroupedByBraces)
@@ -655,6 +664,21 @@ mathStringBetween c = try $ do
   final <- mathEnd c
   return $ body ++ [final]
 
+-- | Parse a single character between @c@ using math rules
+math1CharBetween :: Char
+                -> OrgParser String
+math1CharBetween c = try $ do
+  char c
+  res <- noneOf $ c:mathForbiddenBorderChars
+  char c
+  eof <|> lookAhead (oneOf mathPostChars) *> return ()
+  return [res]
+
+rawMathBetween :: String
+               -> String
+               -> OrgParser String
+rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
+
 -- | Parses the start (opening character) of emphasis
 emphasisStart :: Char -> OrgParser Char
 emphasisStart c = try $ do
@@ -747,7 +771,7 @@ emphasisAllowedNewlines = 1
 
 -- | Chars allowed after an inline ($...$) math statement
 mathPostChars :: [Char]
-mathPostChars = "\t\n \"',-.:;?"
+mathPostChars = "\t\n \"'),-.:;?"
 
 -- | Chars not allowed at the (inner) border of math
 mathForbiddenBorderChars :: [Char]
-- 
cgit v1.2.3


From ace8837cd691b17e994b41dcb797de6ca1940136 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Thu, 10 Apr 2014 17:25:05 +0200
Subject: Org reader: Improve code by following HLint recommendations

HLint's recommendations for better code are applied to the Org-mode
reader code.
---
 src/Text/Pandoc/Readers/Org.hs | 44 +++++++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 20 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 1d0400d96..29611e8cc 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -44,11 +44,14 @@ import           Data.List (foldl', isPrefixOf, isSuffixOf)
 import           Data.Maybe (listToMaybe, fromMaybe)
 import           Data.Monoid (mconcat, mempty, mappend)
 
+-- Ignore HLint warnings to use String instead of [Char]
+{-# ANN module ("HLint: ignore Use String" :: String) #-}
+
 -- | Parse org-mode string and return a Pandoc document.
 readOrg :: ReaderOptions -- ^ Reader options
         -> String        -- ^ String to parse (assuming @'\n'@ line endings)
         -> Pandoc
-readOrg opts s = (readWith parseOrg) def{ orgStateOptions = opts } (s ++ "\n\n")
+readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
 
 type OrgParser = Parser [Char] OrgParserState
 
@@ -111,7 +114,7 @@ updateLastPreCharPos = getPosition >>= \p ->
 
 pushToInlineCharStack :: Char -> OrgParser ()
 pushToInlineCharStack c = updateState $ \st ->
-  st { orgStateEmphasisCharStack = c:(orgStateEmphasisCharStack st) }
+  st { orgStateEmphasisCharStack = c:orgStateEmphasisCharStack st }
 
 popInlineCharStack :: OrgParser ()
 popInlineCharStack = updateState $ \st ->
@@ -176,7 +179,7 @@ orgBlock = try $ do
     "comment" -> return mempty
     "src"     -> return $ B.codeBlockWith ("", classArgs, []) blockStr
     _         -> B.divWith ("", [blockType], [])
-                            <$> (parseFromString parseBlocks blockStr)
+                 <$> parseFromString parseBlocks blockStr
 
 blockHeader :: OrgParser (Int, String, [String])
 blockHeader = (,,) <$> blockIndent
@@ -199,7 +202,7 @@ rawBlockContent indent blockType =
 indentWith :: Int -> OrgParser String
 indentWith num = do
   tabStop <- getOption readerTabStop
-  if (num < tabStop)
+  if num < tabStop
      then count num (char ' ')
      else choice [ try (count num (char ' '))
                  , try (char '\t' >> count (num - tabStop) (char ' ')) ]
@@ -242,7 +245,7 @@ drawerStart = try $
                           <|> stringAnyCase "LOGBOOK"
 
 drawerLine :: OrgParser String
-drawerLine = try $ anyLine
+drawerLine = try anyLine
 
 drawerEnd :: OrgParser String
 drawerEnd = try $
@@ -276,7 +279,7 @@ declarationLine = try $ do
 metaValue :: OrgParser MetaValue
 metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine
 
-metaKey :: OrgParser [Char]
+metaKey :: OrgParser String
 metaKey = map toLower <$> many1 (noneOf ": \n\r")
                       <*  char ':'
                       <*  skipSpaces
@@ -350,7 +353,7 @@ tableAlignRow = try $
 
 tableAlignCell :: OrgParser Alignment
 tableAlignCell =
-  choice [ try $ emptyCell *> return (AlignDefault)
+  choice [ try $ emptyCell *> return AlignDefault
          , try $ skipSpaces
                    *> char '<'
                    *> tableAlignFromChar
@@ -381,8 +384,8 @@ normalizeTable (OrgTable cols aligns heads lns) =
   let aligns' = fillColumns aligns AlignDefault
       heads'  = if heads == mempty
                 then mempty
-                else fillColumns heads  (B.plain mempty)
-      lns'    = map (flip fillColumns (B.plain mempty)) lns
+                else fillColumns heads (B.plain mempty)
+      lns'    = map (`fillColumns` B.plain mempty) lns
       fillColumns base padding = take cols $ base ++ repeat padding
   in OrgTable cols aligns' heads' lns'
 
@@ -565,13 +568,14 @@ explicitOrImageLink = try $ do
   title  <- enclosedRaw (char '[') (char ']')
   title' <- parseFromString (mconcat <$> many inline) title
   char ']'
-  return $ if (isImageFilename src) && (isImageFilename title)
-           then B.link src "" (B.image title "" "")
-           else B.link src "" title'
+  return . B.link src ""
+         $ if isImageFilename src && isImageFilename title
+           then B.image title "" ""
+           else title'
 
 selflinkOrImage :: OrgParser Inlines
 selflinkOrImage = try $ do
-  src <- (char '[') *> linkTarget <* char ']'
+  src <- char '[' *> linkTarget <* char ']'
   return $ if isImageFilename src
            then B.image src "" ""
            else B.link src "" (B.str src)
@@ -619,10 +623,10 @@ displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
                                        ]
 
 subscript :: OrgParser Inlines
-subscript = B.subscript    <$> (try $ char '_' *> maybeGroupedByBraces)
+subscript = B.subscript    <$> try (char '_' *> maybeGroupedByBraces)
 
 superscript ::  OrgParser Inlines
-superscript = B.superscript <$> (try $ char '^' *> maybeGroupedByBraces)
+superscript = B.superscript <$> try (char '^' *> maybeGroupedByBraces)
 
 maybeGroupedByBraces :: OrgParser Inlines
 maybeGroupedByBraces = try $
@@ -702,14 +706,14 @@ emphasisEnd c = try $ do
   return c
 
 mathStart :: Char -> OrgParser Char
-mathStart c = try $ do
+mathStart c = try $
   char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
 
 mathEnd :: Char -> OrgParser Char
 mathEnd c = try $ do
   res <- noneOf (c:mathForbiddenBorderChars)
   char c
-  eof <|> (lookAhead $ oneOf mathPostChars *> pure ())
+  eof <|> lookAhead (oneOf mathPostChars *> pure ())
   return res
 
 
@@ -741,8 +745,8 @@ many1TillNOrLessNewlines n p end = try $
    nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
    nMoreLines k        cs = try $ (final k cs <|> rest k cs)
                                   >>= uncurry nMoreLines
-   final _ cs = (\x -> (Nothing,      cs ++ x)) <$> (try finalLine)
-   rest  m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> (try $ manyTill p P.newline)
+   final _ cs = (\x -> (Nothing,      cs ++ x)) <$> try finalLine
+   rest  m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p P.newline)
    finalLine = try $ manyTill p end
    minus1 k = k - 1
    oneOrMore cs = guard (not $ null cs) *> return cs
@@ -786,7 +790,7 @@ afterEmphasisPreChar :: OrgParser Bool
 afterEmphasisPreChar = do
   pos <- getPosition
   lastPrePos <- orgStateLastPreCharPos <$> getState
-  return $ lastPrePos == Nothing || lastPrePos == Just pos
+  return . fromMaybe True $ (== pos) <$> lastPrePos
 
 -- | Whether we are right after the end of a string
 notAfterString :: OrgParser Bool
-- 
cgit v1.2.3


From ca40acea5b022d6309a36000d54844a482c14555 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 10 Apr 2014 16:52:30 -0700
Subject: MediaWiki reader:  Handle table rows containing just an HTML comment.

Closes #1230.
---
 src/Text/Pandoc/Readers/MediaWiki.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 9bbabd44b..feaedb7c2 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -227,6 +227,7 @@ table = do
   let widths' = map (\w -> if w == 0 then defaultwidth else w) widths
   let cellspecs = zip (map fst cellspecs') widths'
   rows' <- many $ try $ rowsep *> (map snd <$> tableRow)
+  optional blanklines
   tableEnd
   let cols = length hdr
   let (headers,rows) = if hasheader
@@ -275,7 +276,7 @@ tableCaption = try $ do
   (trimInlines . mconcat) <$> many (notFollowedBy (cellsep <|> rowsep) *> inline)
 
 tableRow :: MWParser [((Alignment, Double), Blocks)]
-tableRow = try $ many tableCell
+tableRow = try $ skipMany htmlComment *> many tableCell
 
 tableCell :: MWParser ((Alignment, Double), Blocks)
 tableCell = try $ do
-- 
cgit v1.2.3


From 6f19be7d40f583ee4e10fa2b0f20bd4f1fa80c43 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 11 Apr 2014 11:05:42 +0200
Subject: Org reader: Fix parsing of sub-/superscript expressions

This fixes the org-reader's handling of sub- and superscript
expressions.  Simple expressions (like `2^+10`), expressions in
parentheses (`a_(n+1)`) and nested sexp (like `a_(nested()parens)`) are
now read correctly.
---
 src/Text/Pandoc/Readers/Org.hs | 47 +++++++++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 10 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 29611e8cc..ceac69367 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -622,17 +622,11 @@ displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
                                        , rawMathBetween "$$"  "$$"
                                        ]
 
-subscript :: OrgParser Inlines
-subscript = B.subscript    <$> try (char '_' *> maybeGroupedByBraces)
+subscript   :: OrgParser Inlines
+subscript   = B.subscript   <$> try (char '_' *> subOrSuperExpr)
 
-superscript ::  OrgParser Inlines
-superscript = B.superscript <$> try (char '^' *> maybeGroupedByBraces)
-
-maybeGroupedByBraces :: OrgParser Inlines
-maybeGroupedByBraces = try $
-  choice [ try $ enclosedInlines (char '{') (char '}')
-         , B.str . (:"") <$> anyChar
-         ]
+superscript :: OrgParser Inlines
+superscript = B.superscript <$> try (char '^' *> subOrSuperExpr)
 
 symbol :: OrgParser Inlines
 symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
@@ -805,3 +799,36 @@ notAfterForbiddenBorderChar = do
   pos <- getPosition
   lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
   return $ lastFBCPos /= Just pos
+
+-- | Read a sub- or superscript expression
+subOrSuperExpr :: OrgParser Inlines
+subOrSuperExpr = try $ do
+  choice [ balancedSexp '{' '}'
+         , balancedSexp '(' ')' >>= return . enclosing ('(', ')')
+         , simpleSubOrSuperString
+         ] >>= parseFromString (mconcat <$> many inline)
+
+-- | Read a balanced sexp
+balancedSexp :: Char
+             -> Char
+             -> OrgParser String
+balancedSexp l r = try $ do
+  char l
+  res <- concat <$> many (  many1 (noneOf ([l, r] ++ "\n\r"))
+                        <|> try (string [l, r])
+                        <|> enclosing (l, r) <$> balancedSexp l r
+                         )
+  char r
+  return res
+
+simpleSubOrSuperString :: OrgParser String
+simpleSubOrSuperString = try $
+  choice [ string "*"
+         , mappend <$> option [] ((:[]) <$> oneOf "+-")
+                   <*> many1 alphaNum
+         ]
+
+enclosing :: (a, a)
+          -> [a]
+          -> [a]
+enclosing (left, right) s = left : s ++ [right]
-- 
cgit v1.2.3


From 8699071ec2d01dbf2c72eb6cda7794de8b850999 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 11 Apr 2014 10:10:54 -0700
Subject: HTML reader:  Treat processing instructions & declarations as block.

Previously these were treated as inline, and included in paragraph
tags in HTML or DocBook output, which is generally not what is wanted.

Closes #1233.
---
 src/Text/Pandoc/Readers/HTML.hs | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 2101b2fc2..c94ee3d6b 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -610,15 +610,19 @@ blockTags :: [String]
 blockTags = blockHtmlTags ++ blockDocBookTags
 
 isInlineTag :: Tag String -> Bool
-isInlineTag t = tagOpen (`notElem` blockTags) (const True) t ||
-                tagClose (`notElem` blockTags) t ||
+isInlineTag t = tagOpen isInlineTagName (const True) t ||
+                tagClose isInlineTagName t ||
                 tagComment (const True) t
+                 where isInlineTagName x = x `notElem` blockTags
 
 isBlockTag :: Tag String -> Bool
-isBlockTag t = tagOpen (`elem` blocktags) (const True) t ||
-               tagClose (`elem` blocktags) t ||
+isBlockTag t = tagOpen isBlockTagName (const True) t ||
+               tagClose isBlockTagName t ||
                tagComment (const True) t
-                 where blocktags = blockTags ++ eitherBlockOrInline
+                 where isBlockTagName ('?':_) = True
+                       isBlockTagName ('!':_) = True
+                       isBlockTagName x       = x `elem` blockTags
+                                             || x `elem` eitherBlockOrInline
 
 isTextTag :: Tag String -> Bool
 isTextTag = tagText (const True)
-- 
cgit v1.2.3


From ae4280fba528efe68c5955cb3ca0779e6910f43b Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 12 Apr 2014 00:17:46 +0200
Subject: Org reader: Add support for figures

Support for figures (images with name and caption) is added.
---
 src/Text/Pandoc/Readers/Org.hs | 57 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 49 insertions(+), 8 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index ceac69367..8f0ce61e0 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -37,6 +37,7 @@ import           Text.Pandoc.Parsing hiding (newline, orderedListMarker, updateL
 import           Text.Pandoc.Shared (compactify')
 
 import           Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>))
+import           Control.Arrow ((***))
 import           Control.Monad (guard, when)
 import           Data.Char (toLower)
 import           Data.Default
@@ -158,6 +159,7 @@ block = choice [ mempty <$ blanklines
                , orgBlock
                , example
                , drawer
+               , figure
                , specialLine
                , header
                , hline
@@ -252,6 +254,43 @@ drawerEnd = try $
   skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
 
 
+--
+-- Figures
+--
+
+-- Figures (Image on a line by itself, preceded by name and/or caption)
+figure :: OrgParser Blocks
+figure = try $ do
+  (tit, cap) <- (maybe mempty withFigPrefix *** fromMaybe mempty)
+                <$> nameAndOrCaption
+  src <- skipSpaces *> selfTarget <* skipSpaces <* newline
+  guard (isImageFilename src)
+  return . B.para $ B.image src tit cap
+ where withFigPrefix cs = if "fig:" `isPrefixOf` cs
+                          then cs
+                          else "fig:" ++ cs
+
+nameAndOrCaption :: OrgParser (Maybe String, Maybe Inlines)
+nameAndOrCaption = try $ nameFirst <|> captionFirst
+ where
+   nameFirst = try $ do
+                 n <- name
+                 c <- optionMaybe caption
+                 return (Just n, c)
+   captionFirst = try $ do
+                 c <- caption
+                 n <- optionMaybe name
+                 return (n, Just c)
+
+caption :: OrgParser Inlines
+caption = try $ annotation "CAPTION" *> inlinesTillNewline
+
+name :: OrgParser String
+name = try $ annotation "NAME" *> skipSpaces *> manyTill anyChar newline
+
+annotation :: String -> OrgParser String
+annotation ann = try $ metaLineStart *> stringAnyCase ann <* char ':'
+
 -- Comments, Options and Metadata
 specialLine :: OrgParser Blocks
 specialLine = try $ metaLine <|> commentLine
@@ -277,7 +316,7 @@ declarationLine = try $ do
   return mempty
 
 metaValue :: OrgParser MetaValue
-metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine
+metaValue = MetaInlines . B.toList <$> inlinesTillNewline
 
 metaKey :: OrgParser String
 metaKey = map toLower <$> many1 (noneOf ": \n\r")
@@ -288,7 +327,7 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r")
 header :: OrgParser Blocks
 header = try $
   B.header <$> headerStart
-           <*> (trimInlines <$> restOfLine)
+           <*> inlinesTillNewline
 
 headerStart :: OrgParser Int
 headerStart = try $
@@ -424,13 +463,10 @@ setAligns aligns t = t{ orgTableAlignments = aligns }
 -- Paragraphs or Plain text
 paraOrPlain :: OrgParser Blocks
 paraOrPlain = try $
-  trimInlines . mconcat
-    <$> many1 inline
-    <**> option B.plain
-                (try $ newline *> pure B.para)
+  parseInlines <**> option B.plain (try $ newline *> pure B.para)
 
-restOfLine :: OrgParser Inlines
-restOfLine = mconcat <$> manyTill inline newline
+inlinesTillNewline :: OrgParser Inlines
+inlinesTillNewline = trimInlines . mconcat <$> manyTill inline newline
 
 
 --
@@ -523,6 +559,8 @@ inline =
          ] <* (guard =<< newlinesCountWithinLimits)
   <?> "inline"
 
+parseInlines :: OrgParser Inlines
+parseInlines = trimInlines . mconcat <$> many1 inline
 
 -- treat these as potentially non-text when parsing inline:
 specialChars :: [Char]
@@ -580,6 +618,9 @@ selflinkOrImage = try $ do
            then B.image src "" ""
            else B.link src "" (B.str src)
 
+selfTarget :: OrgParser String
+selfTarget = try $ char '[' *> linkTarget <* char ']'
+
 linkTarget :: OrgParser String
 linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]")
 
-- 
cgit v1.2.3


From 36066699c31ca50566ca2492a5c112ecbe690a63 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 12 Apr 2014 00:22:49 +0200
Subject: Org writer: Fix output for linebreaks

Hard linebreaks in Org mode are represented by the string "\\" as the
last characters in a line.  Adds this feature to the Org-mode writer.
---
 src/Text/Pandoc/Writers/Org.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index d318c5f6a..58a5729e7 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -271,7 +271,7 @@ inlineToOrg (Math t str) = do
               else "$$" <> text str <> "$$"
 inlineToOrg (RawInline f str) | f == "tex" || f == "latex" = return $ text str
 inlineToOrg (RawInline _ _) = return empty
-inlineToOrg (LineBreak) = return cr -- there's no line break in Org
+inlineToOrg (LineBreak) = return (text "\\\\" <> cr)
 inlineToOrg Space = return space
 inlineToOrg (Link txt (src, _)) = do
   case txt of
-- 
cgit v1.2.3


From 82d4160bdcc149df020d1f95f4a7d893a9ecb42a Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 12 Apr 2014 11:07:38 +0200
Subject: Org reader: Read linebreaks

Linebreaks are marked by the string `\\` at the end of a line.
---
 src/Text/Pandoc/Readers/Org.hs | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index ceac69367..19dd03c6b 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -506,6 +506,7 @@ anyLineNewline = (++ "\n") <$> anyLine
 inline :: OrgParser Inlines
 inline =
   choice [ whitespace
+         , linebreak
          , link
          , str
          , endline
@@ -535,6 +536,9 @@ whitespace = B.space <$ skipMany1 spaceChar
                      <* updateLastForbiddenCharPos
              <?> "whitespace"
 
+linebreak :: OrgParser Inlines
+linebreak = try $ B.linebreak <$ string "\\\\" <* skipSpaces <* newline
+
 str :: OrgParser Inlines
 str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
             <* updateLastStrPos
-- 
cgit v1.2.3


From 464d7a8e49b81a0c883b8c4a9bfd5d26cdfb38ba Mon Sep 17 00:00:00 2001
From: Neil Mayhew <neiljhmayhew@gmail.com>
Date: Thu, 6 Mar 2014 07:31:49 -0700
Subject: Improve handling of hard line breaks in Docbook writer

 * Use a <literallayout> for the entire paragraph, not just for the
   newline character
 * Don't let LineBreaks inside footnotes influence the enclosing
   paragraph
---
 src/Text/Pandoc/Writers/Docbook.hs | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 02d875be3..2d6ce3020 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -32,12 +32,14 @@ module Text.Pandoc.Writers.Docbook ( writeDocbook) where
 import Text.Pandoc.Definition
 import Text.Pandoc.XML
 import Text.Pandoc.Shared
+import Text.Pandoc.Walk
 import Text.Pandoc.Writers.Shared
 import Text.Pandoc.Options
 import Text.Pandoc.Templates (renderTemplate')
 import Text.Pandoc.Readers.TeXMath
 import Data.List ( isPrefixOf, intercalate, isSuffixOf )
 import Data.Char ( toLower )
+import Data.Monoid ( Any(..) )
 import Text.Pandoc.Highlighting ( languages, languagesByExtension )
 import Text.Pandoc.Pretty
 import qualified Text.Pandoc.Builder as B
@@ -165,8 +167,9 @@ blockToDocbook opts (Para [Image txt (src,'f':'i':'g':':':_)]) =
            (inTagsIndented "imageobject"
              (selfClosingTag "imagedata" [("fileref",src)])) $$
            inTagsSimple "textobject" (inTagsSimple "phrase" alt))
-blockToDocbook opts (Para lst) =
-  inTagsIndented "para" $ inlinesToDocbook opts lst
+blockToDocbook opts (Para lst)
+  | hasLineBreaks lst = flush $ nowrap $ inTagsSimple "literallayout" $ inlinesToDocbook opts lst
+  | otherwise         = inTagsIndented "para" $ inlinesToDocbook opts lst
 blockToDocbook opts (BlockQuote blocks) =
   inTagsIndented "blockquote" $ blocksToDocbook opts blocks
 blockToDocbook _ (CodeBlock (_,classes,_) str) =
@@ -226,6 +229,16 @@ blockToDocbook opts (Table caption aligns widths headers rows) =
         (inTags True "tgroup" [("cols", show (length headers))] $
          coltags $$ head' $$ body')
 
+hasLineBreaks :: [Inline] -> Bool
+hasLineBreaks = getAny . query isLineBreak . walk removeNote
+  where
+    removeNote :: Inline -> Inline
+    removeNote (Note _) = Str ""
+    removeNote x = x
+    isLineBreak :: Inline -> Any
+    isLineBreak LineBreak = Any True
+    isLineBreak _ = Any False
+
 alignmentToString :: Alignment -> [Char]
 alignmentToString alignment = case alignment of
                                  AlignLeft -> "left"
@@ -293,7 +306,7 @@ inlineToDocbook opts (Math t str)
            fixNS = everywhere (mkT fixNS')
 inlineToDocbook _ (RawInline f x) | f == "html" || f == "docbook" = text x
                                   | otherwise                     = empty
-inlineToDocbook _ LineBreak = flush $ inTagsSimple "literallayout" (text "\n")
+inlineToDocbook _ LineBreak = text "\n"
 inlineToDocbook _ Space = space
 inlineToDocbook opts (Link txt (src, _)) =
   if isPrefixOf "mailto:" src
-- 
cgit v1.2.3


From d4c1cd456c16298069b05e1da328e70dc87ea547 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sat, 12 Apr 2014 21:44:54 -0700
Subject: Org reader: Removed ANN pragma.

This relies on Template Haskell, which causes problems in Windows
due to libraries with C dependencies.  We need to avoid using TH
in pandoc code.
---
 src/Text/Pandoc/Readers/Org.hs | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index ceac69367..36b1ad287 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -44,9 +44,6 @@ import           Data.List (foldl', isPrefixOf, isSuffixOf)
 import           Data.Maybe (listToMaybe, fromMaybe)
 import           Data.Monoid (mconcat, mempty, mappend)
 
--- Ignore HLint warnings to use String instead of [Char]
-{-# ANN module ("HLint: ignore Use String" :: String) #-}
-
 -- | Parse org-mode string and return a Pandoc document.
 readOrg :: ReaderOptions -- ^ Reader options
         -> String        -- ^ String to parse (assuming @'\n'@ line endings)
-- 
cgit v1.2.3


From 7f036c0b57f2791c03040bed61e55adcd21ee496 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Tue, 15 Apr 2014 19:53:11 -0700
Subject: Shared: Fixed bug in toRomanNumeral.

9 and numbers ending in 9 would end with "IXIV."
Thanks to Jesse Rosenthal.  Closes #1249.
---
 src/Text/Pandoc/Shared.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 3835629db..27ef6a579 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -234,9 +234,9 @@ toRomanNumeral x =
               _ | x >= 50   -> "L"  ++ toRomanNumeral (x - 50)
               _ | x >= 40   -> "XL" ++ toRomanNumeral (x - 40)
               _ | x >= 10   -> "X" ++ toRomanNumeral (x - 10)
-              _ | x >= 9    -> "IX" ++ toRomanNumeral (x - 5)
+              _ | x == 9    -> "IX"
               _ | x >= 5    -> "V" ++ toRomanNumeral (x - 5)
-              _ | x >= 4    -> "IV" ++ toRomanNumeral (x - 4)
+              _ | x == 4    -> "IV"
               _ | x >= 1    -> "I" ++ toRomanNumeral (x - 1)
               _             -> ""
 
-- 
cgit v1.2.3


From 346bcea713f933a6cf48829d948e14b9c28b4798 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Wed, 16 Apr 2014 13:22:48 +0200
Subject: Org reader: Better module description, minor style changes

Use module description analogous to the markdown reader's.
Use (<$) where it makes sense.
---
 src/Text/Pandoc/Readers/Org.hs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index bda0b0262..c4ea64ba7 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -24,7 +24,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
    Maintainer  : Albert Krewinkel <tarleb@moltkeplatz.de>
 
-Conversion of Org-Mode to 'Pandoc' document.
+Conversion of org-mode formatted plain text to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.Org ( readOrg ) where
 
@@ -711,7 +711,7 @@ math1CharBetween c = try $ do
   char c
   res <- noneOf $ c:mathForbiddenBorderChars
   char c
-  eof <|> lookAhead (oneOf mathPostChars) *> return ()
+  eof <|> () <$ lookAhead (oneOf mathPostChars)
   return [res]
 
 rawMathBetween :: String
@@ -734,12 +734,12 @@ emphasisEnd :: Char -> OrgParser Char
 emphasisEnd c = try $ do
   guard =<< notAfterForbiddenBorderChar
   char c
-  eof <|> lookAhead (surroundingEmphasisChar >>= \x ->
-                         oneOf (x ++ emphasisPostChars))
-          *> return ()
+  eof <|> () <$ lookAhead acceptablePostChars
   updateLastStrPos
   popInlineCharStack
   return c
+ where acceptablePostChars =
+           surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)
 
 mathStart :: Char -> OrgParser Char
 mathStart c = try $
@@ -749,7 +749,7 @@ mathEnd :: Char -> OrgParser Char
 mathEnd c = try $ do
   res <- noneOf (c:mathForbiddenBorderChars)
   char c
-  eof <|> lookAhead (oneOf mathPostChars *> pure ())
+  eof <|> () <$ lookAhead (oneOf mathPostChars)
   return res
 
 
-- 
cgit v1.2.3


From 5fc252270c8332908e3ad9ec12d16c08c49de4a2 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Mon, 14 Apr 2014 15:04:32 +0200
Subject: Org reader: Fix code for subexpression parsing

---
 src/Text/Pandoc/Readers/Org.hs | 25 ++++---------------------
 1 file changed, 4 insertions(+), 21 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index c4ea64ba7..ec0436f4c 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -844,24 +844,12 @@ notAfterForbiddenBorderChar = do
 
 -- | Read a sub- or superscript expression
 subOrSuperExpr :: OrgParser Inlines
-subOrSuperExpr = try $ do
-  choice [ balancedSexp '{' '}'
-         , balancedSexp '(' ')' >>= return . enclosing ('(', ')')
+subOrSuperExpr = try $
+  choice [ id                   <$> charsInBalanced '{' '}' (noneOf "\n\r")
+         , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
          , simpleSubOrSuperString
          ] >>= parseFromString (mconcat <$> many inline)
-
--- | Read a balanced sexp
-balancedSexp :: Char
-             -> Char
-             -> OrgParser String
-balancedSexp l r = try $ do
-  char l
-  res <- concat <$> many (  many1 (noneOf ([l, r] ++ "\n\r"))
-                        <|> try (string [l, r])
-                        <|> enclosing (l, r) <$> balancedSexp l r
-                         )
-  char r
-  return res
+ where enclosing (left, right) s = left : s ++ [right]
 
 simpleSubOrSuperString :: OrgParser String
 simpleSubOrSuperString = try $
@@ -869,8 +857,3 @@ simpleSubOrSuperString = try $
          , mappend <$> option [] ((:[]) <$> oneOf "+-")
                    <*> many1 alphaNum
          ]
-
-enclosing :: (a, a)
-          -> [a]
-          -> [a]
-enclosing (left, right) s = left : s ++ [right]
-- 
cgit v1.2.3


From 92582c6272a3a171c406699e46e88afc4835d85c Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Wed, 16 Apr 2014 11:58:16 +0200
Subject: Org reader: introduce Reader environment around Blocks/Inlines

This introduces a Reader environment in the style of
Text.Pandoc.Parsing.F, but adapted to the Org reader parser.
---
 src/Text/Pandoc/Readers/Org.hs | 306 ++++++++++++++++++++++++-----------------
 1 file changed, 176 insertions(+), 130 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index ec0436f4c..bdff4869c 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {-
 Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de>
 
@@ -29,21 +30,26 @@ Conversion of org-mode formatted plain text to 'Pandoc' document.
 module Text.Pandoc.Readers.Org ( readOrg ) where
 
 import qualified Text.Pandoc.Builder as B
-import           Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>), HasMeta(..))
+import           Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>)
+                                     , trimInlines )
 import           Text.Pandoc.Definition
 import           Text.Pandoc.Options
 import qualified Text.Pandoc.Parsing as P
-import           Text.Pandoc.Parsing hiding (newline, orderedListMarker, updateLastStrPos)
+import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
+                                            , newline, orderedListMarker
+                                            , updateLastStrPos )
 import           Text.Pandoc.Shared (compactify')
 
-import           Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>))
+import           Control.Applicative ( Applicative, pure
+                                     , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
 import           Control.Arrow ((***))
-import           Control.Monad (guard, when)
+import           Control.Monad (foldM, guard, liftM, liftM2, when)
+import           Control.Monad.Reader (Reader, runReader)
 import           Data.Char (toLower)
 import           Data.Default
-import           Data.List (foldl', isPrefixOf, isSuffixOf)
+import           Data.List (isPrefixOf, isSuffixOf)
 import           Data.Maybe (listToMaybe, fromMaybe)
-import           Data.Monoid (mconcat, mempty, mappend)
+import           Data.Monoid (Monoid, mconcat, mempty, mappend)
 
 -- | Parse org-mode string and return a Pandoc document.
 readOrg :: ReaderOptions -- ^ Reader options
@@ -55,10 +61,10 @@ type OrgParser = Parser [Char] OrgParserState
 
 parseOrg:: OrgParser Pandoc
 parseOrg = do
-  blocks' <- B.toList <$> parseBlocks
+  blocks' <- parseBlocks
   st <- getState
-  let meta = orgStateMeta st
-  return $ Pandoc meta $ filter (/= Null) blocks'
+  let meta = runF (orgStateMeta' st) st
+  return $ Pandoc meta $ filter (/= Null) (B.toList $ runF blocks' st)
 
 --
 -- Parser State for Org
@@ -73,7 +79,8 @@ data OrgParserState = OrgParserState
                       , orgStateLastPreCharPos       :: Maybe SourcePos
                       , orgStateLastStrPos           :: Maybe SourcePos
                       , orgStateMeta                 :: Meta
-                      } deriving (Show)
+                      , orgStateMeta'                :: F Meta
+                      }
 
 instance HasReaderOptions OrgParserState where
   extractReaderOptions = orgStateOptions
@@ -96,6 +103,7 @@ defaultOrgParserState = OrgParserState
                         , orgStateLastPreCharPos = Nothing
                         , orgStateLastStrPos = Nothing
                         , orgStateMeta = nullMeta
+                        , orgStateMeta' = return nullMeta
                         }
 
 updateLastStrPos :: OrgParser ()
@@ -138,6 +146,27 @@ resetEmphasisNewlines :: OrgParser ()
 resetEmphasisNewlines = updateState $ \s ->
   s{ orgStateEmphasisNewlines = Nothing }
 
+
+--
+-- Adaptions and specializations of parsing utilities
+--
+
+newtype F a = F { unF :: Reader OrgParserState a
+                } deriving (Monad, Applicative, Functor)
+
+runF :: F a -> OrgParserState -> a
+runF = runReader . unF
+
+instance Monoid a => Monoid (F a) where
+  mempty = return mempty
+  mappend = liftM2 mappend
+  mconcat = fmap mconcat . sequence
+
+trimInlinesF :: F Inlines -> F Inlines
+trimInlinesF = liftM trimInlines
+
+
+-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
 newline :: OrgParser Char
 newline =
   P.newline
@@ -148,10 +177,10 @@ newline =
 -- parsing blocks
 --
 
-parseBlocks :: OrgParser Blocks
+parseBlocks :: OrgParser (F Blocks)
 parseBlocks = mconcat <$> manyTill block eof
 
-block :: OrgParser Blocks
+block :: OrgParser (F Blocks)
 block = choice [ mempty <$ blanklines
                , orgBlock
                , example
@@ -159,7 +188,7 @@ block = choice [ mempty <$ blanklines
                , figure
                , specialLine
                , header
-               , hline
+               , return <$> hline
                , list
                , table
                , paraOrPlain
@@ -169,15 +198,15 @@ block = choice [ mempty <$ blanklines
 -- Org Blocks (#+BEGIN_... / #+END_...)
 --
 
-orgBlock :: OrgParser Blocks
+orgBlock :: OrgParser (F Blocks)
 orgBlock = try $ do
   (indent, blockType, args) <- blockHeader
   blockStr <- rawBlockContent indent blockType
   let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ]
   case blockType of
     "comment" -> return mempty
-    "src"     -> return $ B.codeBlockWith ("", classArgs, []) blockStr
-    _         -> B.divWith ("", [blockType], [])
+    "src"     -> return . return $ B.codeBlockWith ("", classArgs, []) blockStr
+    _         -> fmap (B.divWith ("", [blockType], []))
                  <$> parseFromString parseBlocks blockStr
 
 blockHeader :: OrgParser (Int, String, [String])
@@ -222,15 +251,16 @@ commaEscaped (',':cs@('*':_))     = cs
 commaEscaped (',':cs@('#':'+':_)) = cs
 commaEscaped cs                   = cs
 
-example :: OrgParser Blocks
-example = try $
-  B.codeBlockWith ("", ["example"], []) . unlines <$> many1 exampleLine
+example :: OrgParser (F Blocks)
+example = try $ do
+  body <- unlines <$> many1 exampleLine
+  return . return $ B.codeBlockWith ("", ["example"], []) body
 
 exampleLine :: OrgParser String
 exampleLine = try $ string ": " *> anyLine
 
 -- Drawers for properties or a logbook
-drawer :: OrgParser Blocks
+drawer :: OrgParser (F Blocks)
 drawer = try $ do
   drawerStart
   manyTill drawerLine (try drawerEnd)
@@ -256,18 +286,20 @@ drawerEnd = try $
 --
 
 -- Figures (Image on a line by itself, preceded by name and/or caption)
-figure :: OrgParser Blocks
+figure :: OrgParser (F Blocks)
 figure = try $ do
   (tit, cap) <- (maybe mempty withFigPrefix *** fromMaybe mempty)
                 <$> nameAndOrCaption
   src <- skipSpaces *> selfTarget <* skipSpaces <* newline
   guard (isImageFilename src)
-  return . B.para $ B.image src tit cap
+  return $ do
+    cap' <- cap
+    return $ B.para $ B.image src tit cap'
  where withFigPrefix cs = if "fig:" `isPrefixOf` cs
                           then cs
                           else "fig:" ++ cs
 
-nameAndOrCaption :: OrgParser (Maybe String, Maybe Inlines)
+nameAndOrCaption :: OrgParser (Maybe String, Maybe (F Inlines))
 nameAndOrCaption = try $ nameFirst <|> captionFirst
  where
    nameFirst = try $ do
@@ -279,7 +311,7 @@ nameAndOrCaption = try $ nameFirst <|> captionFirst
                  n <- optionMaybe name
                  return (n, Just c)
 
-caption :: OrgParser Inlines
+caption :: OrgParser (F Inlines)
 caption = try $ annotation "CAPTION" *> inlinesTillNewline
 
 name :: OrgParser String
@@ -289,8 +321,8 @@ annotation :: String -> OrgParser String
 annotation ann = try $ metaLineStart *> stringAnyCase ann <* char ':'
 
 -- Comments, Options and Metadata
-specialLine :: OrgParser Blocks
-specialLine = try $ metaLine <|> commentLine
+specialLine :: OrgParser (F Blocks)
+specialLine = fmap return . try $ metaLine <|> commentLine
 
 metaLine :: OrgParser Blocks
 metaLine = try $ metaLineStart *> declarationLine
@@ -308,12 +340,15 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
 
 declarationLine :: OrgParser Blocks
 declarationLine = try $ do
-  meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta
-  updateState $ \st -> st { orgStateMeta  = orgStateMeta st <> meta' }
+  key <- metaKey
+  inlinesF <- metaInlines
+  updateState $ \st ->
+    let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta
+    in st { orgStateMeta' = orgStateMeta' st <> meta' }
   return mempty
 
-metaValue :: OrgParser MetaValue
-metaValue = MetaInlines . B.toList <$> inlinesTillNewline
+metaInlines :: OrgParser (F MetaValue)
+metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
 
 metaKey :: OrgParser String
 metaKey = map toLower <$> many1 (noneOf ": \n\r")
@@ -321,16 +356,20 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r")
                       <*  skipSpaces
 
 -- | Headers
-header :: OrgParser Blocks
-header = try $
-  B.header <$> headerStart
-           <*> inlinesTillNewline
+header :: OrgParser (F Blocks)
+header = try $ do
+  level <- headerStart
+  title <- inlinesTillNewline
+  return $ B.header level <$> title
 
 headerStart :: OrgParser Int
 headerStart = try $
   (length <$> many1 (char '*')) <* many1 (char ' ')
 
--- Horizontal Line (five dashes or more)
+-- Don't use (or need) the reader wrapper here, we want hline to be
+-- @show@able.  Otherwise we can't use it with @notFollowedBy'@.
+
+-- | Horizontal Line (five -- dashes or more)
 hline :: OrgParser Blocks
 hline = try $ do
   skipSpaces
@@ -344,22 +383,23 @@ hline = try $ do
 -- Tables
 --
 
-data OrgTableRow = OrgContentRow [Blocks]
+data OrgTableRow = OrgContentRow (F [Blocks])
                  | OrgAlignRow [Alignment]
                  | OrgHlineRow
- deriving (Eq, Show)
 
 data OrgTable = OrgTable
   { orgTableColumns    :: Int
   , orgTableAlignments :: [Alignment]
   , orgTableHeader     :: [Blocks]
   , orgTableRows       :: [[Blocks]]
-  } deriving (Eq, Show)
+  }
 
-table :: OrgParser Blocks
+table :: OrgParser (F Blocks)
 table = try $ do
   lookAhead tableStart
-  orgToPandocTable . normalizeTable . rowsToTable <$> tableRows
+  do
+    rows <- tableRows
+    return $ return . orgToPandocTable . normalizeTable =<< rowsToTable rows
 
 orgToPandocTable :: OrgTable
                  -> Blocks
@@ -374,11 +414,11 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
 
 tableContentRow :: OrgParser OrgTableRow
 tableContentRow = try $
-  OrgContentRow <$> (tableStart *> manyTill tableContentCell newline)
+  OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline)
 
-tableContentCell :: OrgParser Blocks
+tableContentCell :: OrgParser (F Blocks)
 tableContentCell = try $
-  B.plain . trimInlines . mconcat <$> many1Till inline endOfCell
+  fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell
 
 endOfCell :: OrgParser Char
 endOfCell = try $ char '|' <|> lookAhead newline
@@ -410,8 +450,8 @@ tableHline = try $
   OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
 
 rowsToTable :: [OrgTableRow]
-            -> OrgTable
-rowsToTable = foldl' (flip rowToContent) zeroTable
+            -> F OrgTable
+rowsToTable = foldM (flip rowToContent) zeroTable
   where zeroTable = OrgTable 0 mempty mempty mempty
 
 normalizeTable :: OrgTable
@@ -430,57 +470,64 @@ normalizeTable (OrgTable cols aligns heads lns) =
 -- line as a header.  All other horizontal lines are discarded.
 rowToContent :: OrgTableRow
              -> OrgTable
-             -> OrgTable
-rowToContent OrgHlineRow        = maybeBodyToHeader
-rowToContent (OrgContentRow rs) = setLongestRow rs . appendToBody rs
-rowToContent (OrgAlignRow as)   = setLongestRow as . setAligns as
+             -> F OrgTable
+rowToContent OrgHlineRow        t = maybeBodyToHeader t
+rowToContent (OrgAlignRow as)   t = setLongestRow as =<< setAligns as t
+rowToContent (OrgContentRow rf) t = do
+  rs <- rf
+  setLongestRow rs =<< appendToBody rs t
 
 setLongestRow :: [a]
               -> OrgTable
-              -> OrgTable
-setLongestRow rs t = t{ orgTableColumns = max (length rs) (orgTableColumns t) }
+              -> F OrgTable
+setLongestRow rs t =
+  return t{ orgTableColumns = max (length rs) (orgTableColumns t) }
 
 maybeBodyToHeader :: OrgTable
-                  -> OrgTable
+                  -> F OrgTable
 maybeBodyToHeader t = case t of
   OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
-         t{ orgTableHeader = b , orgTableRows = [] }
-  _   -> t
+         return t{ orgTableHeader = b , orgTableRows = [] }
+  _   -> return t
 
 appendToBody :: [Blocks]
              -> OrgTable
-             -> OrgTable
-appendToBody r t = t{ orgTableRows = orgTableRows t ++ [r] }
+             -> F OrgTable
+appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] }
 
 setAligns :: [Alignment]
           -> OrgTable
-          -> OrgTable
-setAligns aligns t = t{ orgTableAlignments = aligns }
+          -> F OrgTable
+setAligns aligns t = return $ t{ orgTableAlignments = aligns }
 
 -- Paragraphs or Plain text
-paraOrPlain :: OrgParser Blocks
+paraOrPlain :: OrgParser (F Blocks)
 paraOrPlain = try $
-  parseInlines <**> option B.plain (try $ newline *> pure B.para)
+  parseInlines <**> (fmap <$> option B.plain (try $ newline *> pure B.para))
 
-inlinesTillNewline :: OrgParser Inlines
-inlinesTillNewline = trimInlines . mconcat <$> manyTill inline newline
+inlinesTillNewline :: OrgParser (F Inlines)
+inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
 
 
 --
 -- list blocks
 --
 
-list :: OrgParser Blocks
+list :: OrgParser (F Blocks)
 list = choice [ definitionList, bulletList, orderedList ] <?> "list"
 
-definitionList :: OrgParser Blocks
-definitionList = B.definitionList <$> many1 (definitionListItem bulletListStart)
+definitionList :: OrgParser (F Blocks)
+definitionList = fmap B.definitionList . sequence
+                 <$> many1 (definitionListItem bulletListStart)
 
-bulletList :: OrgParser Blocks
-bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart)
+bulletList :: OrgParser (F Blocks)
+bulletList = fmap B.bulletList . fmap compactify' . sequence
+             <$> many1 (listItem bulletListStart)
 
-orderedList :: OrgParser Blocks
-orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart)
+orderedList :: OrgParser (F Blocks)
+-- orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart)
+orderedList = fmap B.orderedList . fmap compactify' . sequence
+              <$> many1 (listItem orderedListStart)
 
 genericListStart :: OrgParser String
                  -> OrgParser Int
@@ -499,7 +546,7 @@ orderedListStart = genericListStart orderedListMarker
   where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
 
 definitionListItem :: OrgParser Int
-                   -> OrgParser (Inlines, [Blocks])
+                   -> OrgParser (F (Inlines, [Blocks]))
 definitionListItem parseMarkerGetLength = try $ do
   markerLength <- parseMarkerGetLength
   term <- manyTill (noneOf "\n\r") (try $ string "::")
@@ -507,12 +554,12 @@ definitionListItem parseMarkerGetLength = try $ do
   cont <- concat <$> many (listContinuation markerLength)
   term' <- parseFromString inline term
   contents' <- parseFromString parseBlocks $ first ++ cont
-  return (term', [contents'])
+  return $ (,) <$> term' <*> fmap (:[]) contents'
 
 
 -- parse raw text for one list item, excluding start marker and continuations
 listItem :: OrgParser Int
-         -> OrgParser Blocks
+         -> OrgParser (F Blocks)
 listItem start = try $ do
   markerLength <- try start
   firstLine <- anyLineNewline
@@ -536,11 +583,11 @@ anyLineNewline = (++ "\n") <$> anyLine
 -- inline
 --
 
-inline :: OrgParser Inlines
+inline :: OrgParser (F Inlines)
 inline =
   choice [ whitespace
          , linebreak
-         , link
+         , linkOrImage
          , str
          , endline
          , emph
@@ -557,29 +604,29 @@ inline =
          ] <* (guard =<< newlinesCountWithinLimits)
   <?> "inline"
 
-parseInlines :: OrgParser Inlines
-parseInlines = trimInlines . mconcat <$> many1 inline
+parseInlines :: OrgParser (F Inlines)
+parseInlines = trimInlinesF . mconcat <$> many1 inline
 
 -- treat these as potentially non-text when parsing inline:
 specialChars :: [Char]
 specialChars = "\"$'()*+-./:<=>[\\]^_{|}~"
 
 
-whitespace :: OrgParser Inlines
-whitespace = B.space <$ skipMany1 spaceChar
-                     <* updateLastPreCharPos
-                     <* updateLastForbiddenCharPos
+whitespace :: OrgParser (F Inlines)
+whitespace = pure B.space <$ skipMany1 spaceChar
+                          <* updateLastPreCharPos
+                          <* updateLastForbiddenCharPos
              <?> "whitespace"
 
-linebreak :: OrgParser Inlines
-linebreak = try $ B.linebreak <$ string "\\\\" <* skipSpaces <* newline
+linebreak :: OrgParser (F Inlines)
+linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
 
-str :: OrgParser Inlines
-str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
-            <* updateLastStrPos
+str :: OrgParser (F Inlines)
+str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+      <* updateLastStrPos
 
 -- an endline character that can be treated as a space, not a structural break
-endline :: OrgParser Inlines
+endline :: OrgParser (F Inlines)
 endline = try $ do
   newline
   notFollowedBy blankline
@@ -595,29 +642,29 @@ endline = try $ do
   decEmphasisNewlinesCount
   guard =<< newlinesCountWithinLimits
   updateLastPreCharPos
-  return B.space
+  return . return $ B.space
 
-link :: OrgParser Inlines
-link = explicitOrImageLink <|> selflinkOrImage <?> "link"
+linkOrImage :: OrgParser (F Inlines)
+linkOrImage = explicitOrImageLink <|> selflinkOrImage <?> "link or image"
 
-explicitOrImageLink :: OrgParser Inlines
+explicitOrImageLink :: OrgParser (F Inlines)
 explicitOrImageLink = try $ do
   char '['
   src    <- linkTarget
   title  <- enclosedRaw (char '[') (char ']')
   title' <- parseFromString (mconcat <$> many inline) title
   char ']'
-  return . B.link src ""
-         $ if isImageFilename src && isImageFilename title
-           then B.image title "" ""
-           else title'
+  return $ B.link src ""  <$>
+         if isImageFilename src && isImageFilename title
+            then return $ B.image title mempty mempty
+            else title'
 
-selflinkOrImage :: OrgParser Inlines
+selflinkOrImage :: OrgParser (F Inlines)
 selflinkOrImage = try $ do
   src <- char '[' *> linkTarget <* char ']'
-  return $ if isImageFilename src
-           then B.image src "" ""
-           else B.link src "" (B.str src)
+  return . return $ if isImageFilename src
+                    then B.image src "" ""
+                    else B.link src "" (B.str src)
 
 selfTarget :: OrgParser String
 selfTarget = try $ char '[' *> linkTarget <* char ']'
@@ -634,51 +681,50 @@ isImageFilename filename =
    imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
    protocols = [ "file", "http", "https" ]
 
-emph      :: OrgParser Inlines
-emph      = B.emph         <$> emphasisBetween '/'
+emph      :: OrgParser (F Inlines)
+emph      = fmap B.emph         <$> emphasisBetween '/'
 
-strong    :: OrgParser Inlines
-strong    = B.strong       <$> emphasisBetween '*'
+strong    :: OrgParser (F Inlines)
+strong    = fmap B.strong       <$> emphasisBetween '*'
 
-strikeout :: OrgParser Inlines
-strikeout = B.strikeout    <$> emphasisBetween '+'
+strikeout :: OrgParser (F Inlines)
+strikeout = fmap B.strikeout    <$> emphasisBetween '+'
 
 -- There is no underline, so we use strong instead.
-underline :: OrgParser Inlines
-underline = B.strong       <$> emphasisBetween '_'
-
-code      :: OrgParser Inlines
-code      = B.code         <$> verbatimBetween '='
+underline :: OrgParser (F Inlines)
+underline = fmap B.strong       <$> emphasisBetween '_'
 
-verbatim  :: OrgParser Inlines
-verbatim  = B.rawInline "" <$> verbatimBetween '~'
+code      :: OrgParser (F Inlines)
+code      = return . B.code         <$> verbatimBetween '='
 
-math      :: OrgParser Inlines
-math      = B.math         <$> choice [ math1CharBetween '$'
-                                      , mathStringBetween '$'
-                                      , rawMathBetween "\\(" "\\)"
-                                      ]
+verbatim  :: OrgParser (F Inlines)
+verbatim  = return . B.rawInline "" <$> verbatimBetween '~'
 
-displayMath :: OrgParser Inlines
-displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
-                                       , rawMathBetween "$$"  "$$"
-                                       ]
+subscript   :: OrgParser (F Inlines)
+subscript   = fmap B.subscript   <$> try (char '_' *> subOrSuperExpr)
 
-subscript   :: OrgParser Inlines
-subscript   = B.subscript   <$> try (char '_' *> subOrSuperExpr)
+superscript :: OrgParser (F Inlines)
+superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
 
-superscript :: OrgParser Inlines
-superscript = B.superscript <$> try (char '^' *> subOrSuperExpr)
+math      :: OrgParser (F Inlines)
+math      = return . B.math      <$> choice [ math1CharBetween '$'
+                                            , mathStringBetween '$'
+                                            , rawMathBetween "\\(" "\\)"
+                                            ]
 
-symbol :: OrgParser Inlines
-symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
+displayMath :: OrgParser (F Inlines)
+displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
+                                                , rawMathBetween "$$"  "$$"
+                                                ]
+symbol :: OrgParser (F Inlines)
+symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
  where updatePositions c
            | c `elem` emphasisPreChars = c <$ updateLastPreCharPos
            | c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos
            | otherwise = return c
 
 emphasisBetween :: Char
-                -> OrgParser Inlines
+                -> OrgParser (F Inlines)
 emphasisBetween c = try $ do
   startEmphasisNewlinesCounting emphasisAllowedNewlines
   res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
@@ -755,9 +801,9 @@ mathEnd c = try $ do
 
 enclosedInlines :: OrgParser a
                 -> OrgParser b
-                -> OrgParser Inlines
+                -> OrgParser (F Inlines)
 enclosedInlines start end = try $
-  trimInlines . mconcat <$> enclosed start end inline
+  trimInlinesF . mconcat <$> enclosed start end inline
 
 enclosedRaw :: OrgParser a
             -> OrgParser b
@@ -843,7 +889,7 @@ notAfterForbiddenBorderChar = do
   return $ lastFBCPos /= Just pos
 
 -- | Read a sub- or superscript expression
-subOrSuperExpr :: OrgParser Inlines
+subOrSuperExpr :: OrgParser (F Inlines)
 subOrSuperExpr = try $
   choice [ id                   <$> charsInBalanced '{' '}' (noneOf "\n\r")
          , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
-- 
cgit v1.2.3


From 0672f58a445c289c58e42cffbbf32a273e801e39 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sun, 6 Apr 2014 18:43:49 +0200
Subject: Org reader: Support footnotes

---
 src/Text/Pandoc/Readers/Org.hs | 66 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 64 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index bdff4869c..17f8a1c9e 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -44,7 +44,7 @@ import           Control.Applicative ( Applicative, pure
                                      , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
 import           Control.Arrow ((***))
 import           Control.Monad (foldM, guard, liftM, liftM2, when)
-import           Control.Monad.Reader (Reader, runReader)
+import           Control.Monad.Reader (Reader, runReader, ask, asks)
 import           Data.Char (toLower)
 import           Data.Default
 import           Data.List (isPrefixOf, isSuffixOf)
@@ -59,7 +59,7 @@ readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
 
 type OrgParser = Parser [Char] OrgParserState
 
-parseOrg:: OrgParser Pandoc
+parseOrg :: OrgParser Pandoc
 parseOrg = do
   blocks' <- parseBlocks
   st <- getState
@@ -70,6 +70,9 @@ parseOrg = do
 -- Parser State for Org
 --
 
+type OrgNoteRecord = (String, F Blocks)
+type OrgNoteTable = [OrgNoteRecord]
+
 -- | Org-mode parser state
 data OrgParserState = OrgParserState
                       { orgStateOptions              :: ReaderOptions
@@ -80,6 +83,7 @@ data OrgParserState = OrgParserState
                       , orgStateLastStrPos           :: Maybe SourcePos
                       , orgStateMeta                 :: Meta
                       , orgStateMeta'                :: F Meta
+                      , orgStateNotes'               :: OrgNoteTable
                       }
 
 instance HasReaderOptions OrgParserState where
@@ -104,6 +108,7 @@ defaultOrgParserState = OrgParserState
                         , orgStateLastStrPos = Nothing
                         , orgStateMeta = nullMeta
                         , orgStateMeta' = return nullMeta
+                        , orgStateNotes' = []
                         }
 
 updateLastStrPos :: OrgParser ()
@@ -146,6 +151,11 @@ resetEmphasisNewlines :: OrgParser ()
 resetEmphasisNewlines = updateState $ \s ->
   s{ orgStateEmphasisNewlines = Nothing }
 
+addToNotesTable :: OrgNoteRecord -> OrgParser ()
+addToNotesTable note = do
+  oldnotes <- orgStateNotes' <$> getState
+  updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
+
 
 --
 -- Adaptions and specializations of parsing utilities
@@ -157,6 +167,12 @@ newtype F a = F { unF :: Reader OrgParserState a
 runF :: F a -> OrgParserState -> a
 runF = runReader . unF
 
+askF :: F OrgParserState
+askF = F ask
+
+asksF :: (OrgParserState -> a) -> F a
+asksF f = F $ asks f
+
 instance Monoid a => Monoid (F a) where
   mempty = return mempty
   mappend = liftM2 mappend
@@ -191,6 +207,7 @@ block = choice [ mempty <$ blanklines
                , return <$> hline
                , list
                , table
+               , noteBlock
                , paraOrPlain
                ] <?> "block"
 
@@ -500,6 +517,16 @@ setAligns :: [Alignment]
           -> F OrgTable
 setAligns aligns t = return $ t{ orgTableAlignments = aligns }
 
+--
+-- Footnote defintions
+--
+noteBlock :: OrgParser (F Blocks)
+noteBlock = try $ do
+  ref <- noteMarker
+  content <- skipSpaces *> paraOrPlain
+  addToNotesTable (ref, content)
+  return mempty
+
 -- Paragraphs or Plain text
 paraOrPlain :: OrgParser (F Blocks)
 paraOrPlain = try $
@@ -587,6 +614,7 @@ inline :: OrgParser (F Inlines)
 inline =
   choice [ whitespace
          , linebreak
+         , footnote
          , linkOrImage
          , str
          , endline
@@ -632,6 +660,7 @@ endline = try $ do
   notFollowedBy blankline
   notFollowedBy' exampleLine
   notFollowedBy' hline
+  notFollowedBy' noteMarker
   notFollowedBy' tableStart
   notFollowedBy' drawerStart
   notFollowedBy' headerStart
@@ -644,6 +673,39 @@ endline = try $ do
   updateLastPreCharPos
   return . return $ B.space
 
+footnote :: OrgParser (F Inlines)
+footnote = try $ inlineNote <|> referencedNote
+
+inlineNote :: OrgParser (F Inlines)
+inlineNote = try $ do
+  string "[fn:"
+  ref <- many alphaNum
+  char ':'
+  note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
+  when (not $ null ref) $
+       addToNotesTable ("fn:" ++ ref, note)
+  return $ B.note <$> note
+
+referencedNote :: OrgParser (F Inlines)
+referencedNote = try $ do
+  ref <- noteMarker
+  return $ do
+    notes <- asksF orgStateNotes'
+    case lookup ref notes of
+      Nothing   -> return $ B.str $ "[" ++ ref ++ "]"
+      Just contents  -> do
+        st <- askF
+        let contents' = runF contents st{ orgStateNotes' = [] }
+        return $ B.note contents'
+
+noteMarker :: OrgParser String
+noteMarker = try $ do
+  char '['
+  choice [ many1Till digit (char ']')
+         , (++) <$> string "fn:"
+                <*> many1Till (noneOf "\n\r\t ") (char ']')
+         ]
+
 linkOrImage :: OrgParser (F Inlines)
 linkOrImage = explicitOrImageLink <|> selflinkOrImage <?> "link or image"
 
-- 
cgit v1.2.3


From 6d6724cf2c6ae6bcc0df312c476e45644c972a85 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Thu, 17 Apr 2014 18:09:27 +0200
Subject: Org reader: Support more types of '#+BEGIN_<type>' blocks

Support for standard org-blocks is improved.  The parser now handles
"HTML", "LATEX", "ASCII", "EXAMPLE", "QUOTE" and "VERSE" blocks in a
sensible fashion.
---
 src/Text/Pandoc/Readers/Org.hs | 41 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 34 insertions(+), 7 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 17f8a1c9e..88e81f5fc 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -37,6 +37,7 @@ import           Text.Pandoc.Options
 import qualified Text.Pandoc.Parsing as P
 import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
                                             , newline, orderedListMarker
+                                            , parseFromString
                                             , updateLastStrPos )
 import           Text.Pandoc.Shared (compactify')
 
@@ -47,7 +48,7 @@ import           Control.Monad (foldM, guard, liftM, liftM2, when)
 import           Control.Monad.Reader (Reader, runReader, ask, asks)
 import           Data.Char (toLower)
 import           Data.Default
-import           Data.List (isPrefixOf, isSuffixOf)
+import           Data.List (intersperse, isPrefixOf, isSuffixOf)
 import           Data.Maybe (listToMaybe, fromMaybe)
 import           Data.Monoid (Monoid, mconcat, mempty, mappend)
 
@@ -156,6 +157,16 @@ addToNotesTable note = do
   oldnotes <- orgStateNotes' <$> getState
   updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
 
+-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
+-- of the state saved and restored.
+parseFromString :: OrgParser a -> String -> OrgParser a
+parseFromString parser str' = do
+  oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
+  updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
+  result <- P.parseFromString parser str'
+  updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
+  return result
+
 
 --
 -- Adaptions and specializations of parsing utilities
@@ -218,13 +229,27 @@ block = choice [ mempty <$ blanklines
 orgBlock :: OrgParser (F Blocks)
 orgBlock = try $ do
   (indent, blockType, args) <- blockHeader
-  blockStr <- rawBlockContent indent blockType
+  content <- rawBlockContent indent blockType
+  contentBlocks <- parseFromString parseBlocks (content ++ "\n")
   let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ]
   case blockType of
     "comment" -> return mempty
-    "src"     -> return . return $ B.codeBlockWith ("", classArgs, []) blockStr
-    _         -> fmap (B.divWith ("", [blockType], []))
-                 <$> parseFromString parseBlocks blockStr
+    "src"     -> returnF $ B.codeBlockWith ("", classArgs, []) content
+    "html"    -> returnF $ B.rawBlock "html" content
+    "latex"   -> returnF $ B.rawBlock "latex" content
+    "ascii"   -> returnF $ B.rawBlock "ascii" content
+    "example" -> returnF $ exampleCode content
+    "quote"   -> return  $ B.blockQuote <$> contentBlocks
+    "verse"   -> parseVerse content
+    _         -> return  $ B.divWith ("", [blockType], []) <$> contentBlocks
+ where
+   returnF :: a -> OrgParser (F a)
+   returnF = return . return
+
+   parseVerse :: String -> OrgParser (F Blocks)
+   parseVerse cs =
+       fmap B.para . mconcat . intersperse (pure B.linebreak)
+       <$> mapM (parseFromString parseInlines) (lines cs)
 
 blockHeader :: OrgParser (Int, String, [String])
 blockHeader = (,,) <$> blockIndent
@@ -270,8 +295,10 @@ commaEscaped cs                   = cs
 
 example :: OrgParser (F Blocks)
 example = try $ do
-  body <- unlines <$> many1 exampleLine
-  return . return $ B.codeBlockWith ("", ["example"], []) body
+  return . return . exampleCode =<< unlines <$> many1 exampleLine
+
+exampleCode :: String -> Blocks
+exampleCode = B.codeBlockWith ("", ["example"], [])
 
 exampleLine :: OrgParser String
 exampleLine = try $ string ": " *> anyLine
-- 
cgit v1.2.3


From f19d7233d8d3e47912b760fc62a253e5baf8275a Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 18 Apr 2014 08:33:25 +0200
Subject: Org reader: Fix parsing of loose lists

Loose lists (i.e. lists with blankline separated items), were parsed as
multiple lists, each containing a single item.  This patch fixes this
issue.
---
 src/Text/Pandoc/Readers/Org.hs | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 88e81f5fc..1fa8d4d5e 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -605,9 +605,10 @@ definitionListItem parseMarkerGetLength = try $ do
   markerLength <- parseMarkerGetLength
   term <- manyTill (noneOf "\n\r") (try $ string "::")
   first <- anyLineNewline
+  blank <- option "" ("\n" <$ blankline)
   cont <- concat <$> many (listContinuation markerLength)
   term' <- parseFromString inline term
-  contents' <- parseFromString parseBlocks $ first ++ cont
+  contents' <- parseFromString parseBlocks $ first ++ blank ++ cont
   return $ (,) <$> term' <*> fmap (:[]) contents'
 
 
@@ -617,16 +618,18 @@ listItem :: OrgParser Int
 listItem start = try $ do
   markerLength <- try start
   firstLine <- anyLineNewline
+  blank <- option "" ("\n" <$ blankline)
   rest <- concat <$> many (listContinuation markerLength)
-  parseFromString parseBlocks $ firstLine ++ rest
+  parseFromString parseBlocks $ firstLine ++ blank ++ rest
 
 -- continuation of a list item - indented and separated by blankline or endline.
 -- Note: nested lists are parsed as continuations.
 listContinuation :: Int
                  -> OrgParser String
 listContinuation markerLength = try $
-  mappend <$> many blankline
-          <*> (concat <$> many1 listLine)
+  notFollowedBy' blankline
+  *> (mappend <$> (concat <$> many1 listLine)
+              <*> many blankline)
  where listLine = try $ indentWith markerLength *> anyLineNewline
 
 anyLineNewline :: OrgParser String
-- 
cgit v1.2.3


From 09441b65a83f372410394a88af7808f494c3aa57 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 18 Apr 2014 10:15:58 +0200
Subject: Org reader: Add support for plain LaTeX fragments

This adds support for LaTeX fragments like the following:

```
\begin{equation}
\int fg \mathrm{d}x
\end{equation}
```
---
 src/Text/Pandoc/Readers/Org.hs | 41 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 1fa8d4d5e..66cfe720e 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -218,6 +218,7 @@ block = choice [ mempty <$ blanklines
                , return <$> hline
                , list
                , table
+               , latexFragment
                , noteBlock
                , paraOrPlain
                ] <?> "block"
@@ -544,6 +545,41 @@ setAligns :: [Alignment]
           -> F OrgTable
 setAligns aligns t = return $ t{ orgTableAlignments = aligns }
 
+
+--
+-- LaTeX fragments
+--
+latexFragment :: OrgParser (F Blocks)
+latexFragment = try $ do
+  envName <- latexEnvStart
+  content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
+  return . return $ B.rawBlock "latex" (content `inLatexEnv` envName)
+ where
+   c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n"
+                              , c
+                              , "\\end{", e, "}\n"
+                              ]
+
+latexEnvStart :: OrgParser String
+latexEnvStart = try $ do
+  skipSpaces *> string "\\begin{"
+             *> latexEnvName
+             <* string "}"
+             <* blankline
+
+latexEnd :: String -> OrgParser ()
+latexEnd envName = try $
+  () <$ skipSpaces
+     <* string ("\\end{" ++ envName ++ "}")
+     <* blankline
+
+-- | Parses a LaTeX environment name.
+latexEnvName :: OrgParser String
+latexEnvName = try $ do
+  mappend <$> many1 alphaNum
+          <*> option "" (string "*")
+
+
 --
 -- Footnote defintions
 --
@@ -683,7 +719,9 @@ str :: OrgParser (F Inlines)
 str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
       <* updateLastStrPos
 
--- an endline character that can be treated as a space, not a structural break
+-- | An endline character that can be treated as a space, not a structural
+-- break.  This should reflect the values of the Emacs variable
+-- @org-element-pagaraph-separate@.
 endline :: OrgParser (F Inlines)
 endline = try $ do
   newline
@@ -695,6 +733,7 @@ endline = try $ do
   notFollowedBy' drawerStart
   notFollowedBy' headerStart
   notFollowedBy' metaLineStart
+  notFollowedBy' latexEnvStart
   notFollowedBy' commentLineStart
   notFollowedBy' bulletListStart
   notFollowedBy' orderedListStart
-- 
cgit v1.2.3


From 6ded3d41d94c1e90d1d30a1f99ddad62e62d9ce6 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 18 Apr 2014 20:47:50 +0200
Subject: Org reader: Apply captions to code blocks and tables

The `Table` blocktype already takes the caption as an argument, while code
blocks are wrapped in a `Div` block together with a labelling `Span`.
---
 src/Text/Pandoc/Readers/Org.hs | 134 +++++++++++++++++++++++++++++------------
 1 file changed, 94 insertions(+), 40 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 66cfe720e..025158060 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -43,13 +43,13 @@ import           Text.Pandoc.Shared (compactify')
 
 import           Control.Applicative ( Applicative, pure
                                      , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
-import           Control.Arrow ((***))
 import           Control.Monad (foldM, guard, liftM, liftM2, when)
 import           Control.Monad.Reader (Reader, runReader, ask, asks)
 import           Data.Char (toLower)
 import           Data.Default
 import           Data.List (intersperse, isPrefixOf, isSuffixOf)
-import           Data.Maybe (listToMaybe, fromMaybe)
+import qualified Data.Map as M
+import           Data.Maybe (listToMaybe, fromMaybe, isJust)
 import           Data.Monoid (Monoid, mconcat, mempty, mappend)
 
 -- | Parse org-mode string and return a Pandoc document.
@@ -74,9 +74,12 @@ parseOrg = do
 type OrgNoteRecord = (String, F Blocks)
 type OrgNoteTable = [OrgNoteRecord]
 
+type OrgBlockAttributes = M.Map String String
+
 -- | Org-mode parser state
 data OrgParserState = OrgParserState
                       { orgStateOptions              :: ReaderOptions
+                      , orgStateBlockAttributes      :: OrgBlockAttributes
                       , orgStateEmphasisCharStack    :: [Char]
                       , orgStateEmphasisNewlines     :: Maybe Int
                       , orgStateLastForbiddenCharPos :: Maybe SourcePos
@@ -102,6 +105,7 @@ instance Default OrgParserState where
 defaultOrgParserState :: OrgParserState
 defaultOrgParserState = OrgParserState
                         { orgStateOptions = def
+                        , orgStateBlockAttributes = M.empty
                         , orgStateEmphasisCharStack = []
                         , orgStateEmphasisNewlines = Nothing
                         , orgStateLastForbiddenCharPos = Nothing
@@ -112,6 +116,19 @@ defaultOrgParserState = OrgParserState
                         , orgStateNotes' = []
                         }
 
+addBlockAttribute :: String -> String -> OrgParser ()
+addBlockAttribute key val = updateState $ \s ->
+  let attrs = orgStateBlockAttributes s
+  in s{ orgStateBlockAttributes = M.insert key val attrs }
+
+lookupBlockAttribute :: String -> OrgParser (Maybe String)
+lookupBlockAttribute key =
+  M.lookup key . orgStateBlockAttributes <$> getState
+
+resetBlockAttributes :: OrgParser ()
+resetBlockAttributes = updateState $ \s ->
+  s{ orgStateBlockAttributes = orgStateBlockAttributes def }
+
 updateLastStrPos :: OrgParser ()
 updateLastStrPos = getPosition >>= \p ->
   updateState $ \s -> s{ orgStateLastStrPos = Just p }
@@ -125,19 +142,19 @@ updateLastPreCharPos = getPosition >>= \p ->
   updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
 
 pushToInlineCharStack :: Char -> OrgParser ()
-pushToInlineCharStack c = updateState $ \st ->
-  st { orgStateEmphasisCharStack = c:orgStateEmphasisCharStack st }
+pushToInlineCharStack c = updateState $ \s ->
+  s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
 
 popInlineCharStack :: OrgParser ()
-popInlineCharStack = updateState $ \st ->
-  st { orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ st }
+popInlineCharStack = updateState $ \s ->
+  s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
 
 surroundingEmphasisChar :: OrgParser [Char]
 surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
 
 startEmphasisNewlinesCounting :: Int -> OrgParser ()
 startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
-  s { orgStateEmphasisNewlines = Just maxNewlines }
+  s{ orgStateEmphasisNewlines = Just maxNewlines }
 
 decEmphasisNewlinesCount :: OrgParser ()
 decEmphasisNewlinesCount = updateState $ \s ->
@@ -209,20 +226,50 @@ parseBlocks = mconcat <$> manyTill block eof
 
 block :: OrgParser (F Blocks)
 block = choice [ mempty <$ blanklines
-               , orgBlock
+               , optionalAttributes $ choice
+                 [ orgBlock
+                 , figure
+                 , table
+                 ]
                , example
                , drawer
-               , figure
                , specialLine
                , header
                , return <$> hline
                , list
-               , table
                , latexFragment
                , noteBlock
                , paraOrPlain
                ] <?> "block"
 
+optionalAttributes :: OrgParser (F Blocks) -> OrgParser (F Blocks)
+optionalAttributes parser = try $
+  resetBlockAttributes *> parseBlockAttributes *> parser
+
+parseBlockAttributes :: OrgParser ()
+parseBlockAttributes = do
+  attrs <- many attribute
+  () <$ mapM (uncurry parseAndAddAttribute) attrs
+ where
+   attribute :: OrgParser (String, String)
+   attribute = try $ do
+         key <- metaLineStart *> many1Till (noneOf "\n\r") (char ':')
+         val <- skipSpaces *> anyLine
+         return (map toLower key, val)
+
+parseAndAddAttribute :: String -> String -> OrgParser ()
+parseAndAddAttribute key value = do
+  let key' = map toLower key
+  () <$ addBlockAttribute key' value
+
+lookupInlinesAttr :: String -> OrgParser (Maybe (F Inlines))
+lookupInlinesAttr attr = try $ do
+  val <- lookupBlockAttribute attr
+  maybe (return Nothing)
+        (fmap Just . parseFromString parseInlines)
+        val
+
+
 --
 -- Org Blocks (#+BEGIN_... / #+END_...)
 --
@@ -235,13 +282,13 @@ orgBlock = try $ do
   let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ]
   case blockType of
     "comment" -> return mempty
-    "src"     -> returnF $ B.codeBlockWith ("", classArgs, []) content
     "html"    -> returnF $ B.rawBlock "html" content
     "latex"   -> returnF $ B.rawBlock "latex" content
     "ascii"   -> returnF $ B.rawBlock "ascii" content
     "example" -> returnF $ exampleCode content
     "quote"   -> return  $ B.blockQuote <$> contentBlocks
     "verse"   -> parseVerse content
+    "src"     -> codeBlockWithAttr classArgs content
     _         -> return  $ B.divWith ("", [blockType], []) <$> contentBlocks
  where
    returnF :: a -> OrgParser (F a)
@@ -260,6 +307,18 @@ blockHeader = (,,) <$> blockIndent
        blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter)
        blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline
 
+codeBlockWithAttr :: [String] -> String -> OrgParser (F Blocks)
+codeBlockWithAttr classArgs content = do
+  identifier <- fromMaybe "" <$> lookupBlockAttribute "name"
+  caption <- lookupInlinesAttr "caption"
+  let codeBlck = B.codeBlockWith (identifier, classArgs, []) content
+  return $ maybe (pure codeBlck) (labelDiv codeBlck) caption
+ where
+   labelDiv blk value =
+       B.divWith nullAttr <$> (mappend <$> labelledBlock value
+                                       <*> pure blk)
+   labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
+
 rawBlockContent :: Int -> String -> OrgParser String
 rawBlockContent indent blockType =
   unlines . map commaEscaped <$> manyTill indentedLine blockEnder
@@ -333,38 +392,26 @@ drawerEnd = try $
 -- Figures (Image on a line by itself, preceded by name and/or caption)
 figure :: OrgParser (F Blocks)
 figure = try $ do
-  (tit, cap) <- (maybe mempty withFigPrefix *** fromMaybe mempty)
-                <$> nameAndOrCaption
+  (cap, nam) <- nameAndCaption
   src <- skipSpaces *> selfTarget <* skipSpaces <* newline
   guard (isImageFilename src)
   return $ do
     cap' <- cap
-    return $ B.para $ B.image src tit cap'
- where withFigPrefix cs = if "fig:" `isPrefixOf` cs
-                          then cs
-                          else "fig:" ++ cs
-
-nameAndOrCaption :: OrgParser (Maybe String, Maybe (F Inlines))
-nameAndOrCaption = try $ nameFirst <|> captionFirst
+    return $ B.para $ B.image src nam cap'
  where
-   nameFirst = try $ do
-                 n <- name
-                 c <- optionMaybe caption
-                 return (Just n, c)
-   captionFirst = try $ do
-                 c <- caption
-                 n <- optionMaybe name
-                 return (n, Just c)
-
-caption :: OrgParser (F Inlines)
-caption = try $ annotation "CAPTION" *> inlinesTillNewline
-
-name :: OrgParser String
-name = try $ annotation "NAME" *> skipSpaces *> manyTill anyChar newline
-
-annotation :: String -> OrgParser String
-annotation ann = try $ metaLineStart *> stringAnyCase ann <* char ':'
+   nameAndCaption =
+       do
+         maybeCap <- lookupInlinesAttr "caption"
+         maybeNam <- lookupBlockAttribute "name"
+         guard $ isJust maybeCap || isJust maybeNam
+         return ( fromMaybe mempty maybeCap
+                , maybe mempty withFigPrefix maybeNam )
+   withFigPrefix cs =
+       if "fig:" `isPrefixOf` cs
+       then cs
+       else "fig:" ++ cs
 
+--
 -- Comments, Options and Metadata
 specialLine :: OrgParser (F Blocks)
 specialLine = fmap return . try $ metaLine <|> commentLine
@@ -400,6 +447,10 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r")
                       <*  char ':'
                       <*  skipSpaces
 
+--
+-- Headers
+--
+
 -- | Headers
 header :: OrgParser (F Blocks)
 header = try $ do
@@ -411,6 +462,7 @@ headerStart :: OrgParser Int
 headerStart = try $
   (length <$> many1 (char '*')) <* many1 (char ' ')
 
+
 -- Don't use (or need) the reader wrapper here, we want hline to be
 -- @show@able.  Otherwise we can't use it with @notFollowedBy'@.
 
@@ -444,12 +496,14 @@ table = try $ do
   lookAhead tableStart
   do
     rows <- tableRows
-    return $ return . orgToPandocTable . normalizeTable =<< rowsToTable rows
+    cptn <- fromMaybe (pure "") <$> lookupInlinesAttr "caption"
+    return $ (<$> cptn) . orgToPandocTable . normalizeTable =<< rowsToTable rows
 
 orgToPandocTable :: OrgTable
+                 -> Inlines
                  -> Blocks
-orgToPandocTable (OrgTable _ aligns heads lns) =
-  B.table "" (zip aligns $ repeat 0) heads lns
+orgToPandocTable (OrgTable _ aligns heads lns) caption =
+  B.table caption (zip aligns $ repeat 0) heads lns
 
 tableStart :: OrgParser Char
 tableStart = try $ skipSpaces *> char '|'
-- 
cgit v1.2.3


From a69416091ba035ab1661ff306ef3e51fd926488b Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 19 Apr 2014 11:25:39 +0200
Subject: Org reader: Fix distinction of images and normal links

Fixed a false assumption about the precedence of (&&) vs (||).
---
 src/Text/Pandoc/Readers/Org.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 025158060..66211b20e 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -860,8 +860,8 @@ linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]")
 isImageFilename :: String -> Bool
 isImageFilename filename =
   any (\x -> ('.':x)  `isSuffixOf` filename) imageExtensions &&
-  any (\x -> (x++":") `isPrefixOf` filename) protocols ||
-  ':' `notElem` filename
+  (any (\x -> (x++":") `isPrefixOf` filename) protocols ||
+   ':' `notElem` filename)
  where
    imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
    protocols = [ "file", "http", "https" ]
-- 
cgit v1.2.3


From 8e91d362a392d1ee90a497f39cfcf90fee8d8da0 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 19 Apr 2014 13:15:47 +0200
Subject: Org reader: Fix parsing of footnotes

Footnotes can consist of multiple blocks and end only at a header or at
the beginning of another footnote.  This fixes the previous behavior,
which restricted notes to a single paragraph.
---
 src/Text/Pandoc/Readers/Org.hs | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 66211b20e..0bc0a2668 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -639,10 +639,14 @@ latexEnvName = try $ do
 --
 noteBlock :: OrgParser (F Blocks)
 noteBlock = try $ do
-  ref <- noteMarker
-  content <- skipSpaces *> paraOrPlain
+  ref <- noteMarker <* skipSpaces
+  content <- mconcat <$> blocksTillHeaderOrNote
   addToNotesTable (ref, content)
   return mempty
+ where
+   blocksTillHeaderOrNote =
+     many1Till block (eof <|> () <$ lookAhead noteMarker
+                          <|> () <$ lookAhead headerStart)
 
 -- Paragraphs or Plain text
 paraOrPlain :: OrgParser (F Blocks)
-- 
cgit v1.2.3


From efebade38b3aef30b5fbf04e60f1fc2bd76e6c6c Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 19 Apr 2014 14:48:35 +0200
Subject: Move `compactify'DL` from Markdown reader into Shared

The function `compactify'DL`, used to change the final definition item of a
definition list into a `Plain` iff all other items are `Plain`s as well, is
useful in many parsers and hence moved into Text.Pandoc.Shared.
---
 src/Text/Pandoc/Readers/Markdown.hs | 17 -----------------
 src/Text/Pandoc/Shared.hs           | 20 +++++++++++++++++---
 2 files changed, 17 insertions(+), 20 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 57e1ca560..053385d20 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -861,22 +861,6 @@ definitionList = do
   items <- fmap sequence $ many1 definitionListItem
   return $ B.definitionList <$> fmap compactify'DL items
 
-compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
-compactify'DL items =
-  let defs = concatMap snd items
-      defBlocks = reverse $ concatMap B.toList defs
-      isPara (Para _) = True
-      isPara _        = False
-  in  case defBlocks of
-           (Para x:_) -> if not $ any isPara (drop 1 defBlocks)
-                            then let (t,ds) = last items
-                                     lastDef = B.toList $ last ds
-                                     ds' = init ds ++
-                                          [B.fromList $ init lastDef ++ [Plain x]]
-                                  in init items ++ [(t, ds')]
-                            else items
-           _          -> items
-
 --
 -- paragraph block
 --
@@ -1892,4 +1876,3 @@ doubleQuoted = try $ do
   (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return
        (fmap B.doubleQuoted . trimInlinesF $ contents))
    <|> (return $ return (B.str "\8220") <> contents)
-
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 3835629db..687355701 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -56,6 +56,7 @@ module Text.Pandoc.Shared (
                      stringify,
                      compactify,
                      compactify',
+                     compactify'DL,
                      Element (..),
                      hierarchicalize,
                      uniqueIdent,
@@ -82,7 +83,7 @@ module Text.Pandoc.Shared (
 import Text.Pandoc.Definition
 import Text.Pandoc.Walk
 import Text.Pandoc.Generic
-import Text.Pandoc.Builder (Blocks, ToMetaValue(..))
+import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
 import qualified Text.Pandoc.Builder as B
 import qualified Text.Pandoc.UTF8 as UTF8
 import System.Environment (getProgName)
@@ -435,6 +436,21 @@ compactify' items =
                             _   -> items
            _      -> items
 
+-- | Like @compactify'@, but akts on items of definition lists.
+compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
+compactify'DL items =
+  let defs = concatMap snd items
+      defBlocks = reverse $ concatMap B.toList defs
+  in  case defBlocks of
+           (Para x:_) -> if not $ any isPara (drop 1 defBlocks)
+                            then let (t,ds) = last items
+                                     lastDef = B.toList $ last ds
+                                     ds' = init ds ++
+                                          [B.fromList $ init lastDef ++ [Plain x]]
+                                  in init items ++ [(t, ds')]
+                            else items
+           _          -> items
+
 isPara :: Block -> Bool
 isPara (Para _) = True
 isPara _        = False
@@ -698,5 +714,3 @@ safeRead s = case reads s of
                   (d,x):_
                     | all isSpace x -> return d
                   _                 -> fail $ "Could not read `" ++ s ++ "'"
-
-
-- 
cgit v1.2.3


From 8276449520ba85c78f0b4e919fbc9bcf893a74f0 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 19 Apr 2014 15:05:03 +0200
Subject: Org reader: Allow for compact definition lists

Use `Text.Pandoc.Shared.compactify'DL` to allow for compact definition
lists.
---
 src/Text/Pandoc/Readers/Org.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 0bc0a2668..c71cc24be 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -39,7 +39,7 @@ import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
                                             , newline, orderedListMarker
                                             , parseFromString
                                             , updateLastStrPos )
-import           Text.Pandoc.Shared (compactify')
+import           Text.Pandoc.Shared (compactify', compactify'DL)
 
 import           Control.Applicative ( Applicative, pure
                                      , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
@@ -665,7 +665,7 @@ list :: OrgParser (F Blocks)
 list = choice [ definitionList, bulletList, orderedList ] <?> "list"
 
 definitionList :: OrgParser (F Blocks)
-definitionList = fmap B.definitionList . sequence
+definitionList = fmap B.definitionList . fmap compactify'DL . sequence
                  <$> many1 (definitionListItem bulletListStart)
 
 bulletList :: OrgParser (F Blocks)
-- 
cgit v1.2.3


From e0688711fd8fb640d96044413aa2d7b1b1cd4e03 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 23 Apr 2014 10:23:02 -0700
Subject: EPUB writer:  include extension in epub ids.

This fixes a problem with duplicate extensions for fonts and
images with the same base name but different extensions.

Closes #1254.
---
 src/Text/Pandoc/Writers/EPUB.hs | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index dae45b90f..9f10554a9 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -35,7 +35,7 @@ import Data.Maybe ( fromMaybe )
 import Data.List ( isInfixOf, intercalate )
 import System.Environment ( getEnv )
 import Text.Printf (printf)
-import System.FilePath ( (</>), takeBaseName, takeExtension, takeFileName )
+import System.FilePath ( (</>), takeExtension, takeFileName )
 import qualified Data.ByteString.Lazy as B
 import qualified Data.ByteString.Lazy.Char8 as B8
 import qualified Text.Pandoc.UTF8 as UTF8
@@ -56,7 +56,7 @@ import Text.XML.Light hiding (ppTopElement)
 import Text.Pandoc.UUID
 import Text.Pandoc.Writers.HTML
 import Text.Pandoc.Writers.Markdown ( writePlain )
-import Data.Char ( toLower, isDigit )
+import Data.Char ( toLower, isDigit, isAlphaNum )
 import Network.URI ( unEscapeString )
 import Text.Pandoc.MIME (getMimeType)
 #if MIN_VERSION_base(4,6,0)
@@ -132,6 +132,11 @@ removeNote :: Inline -> Inline
 removeNote (Note _) = Str ""
 removeNote x        = x
 
+toId :: FilePath -> String
+toId = map (\x -> if isAlphaNum x || x == '-' || x == '_'
+                     then x
+                     else '_') . takeFileName
+
 getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata
 getEPUBMetadata opts meta = do
   let md = metadataFromMeta opts meta
@@ -427,7 +432,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
 
   -- contents.opf
   let chapterNode ent = unode "item" !
-                           ([("id", takeBaseName $ eRelativePath ent),
+                           ([("id", toId $ eRelativePath ent),
                              ("href", eRelativePath ent),
                              ("media-type", "application/xhtml+xml")]
                             ++ case props ent of
@@ -435,14 +440,14 @@ writeEPUB opts doc@(Pandoc meta _) = do
                                     xs   -> [("properties", unwords xs)])
                         $ ()
   let chapterRefNode ent = unode "itemref" !
-                             [("idref", takeBaseName $ eRelativePath ent)] $ ()
+                             [("idref", takeFileName $ eRelativePath ent)] $ ()
   let pictureNode ent = unode "item" !
-                           [("id", takeBaseName $ eRelativePath ent),
+                           [("id", toId $ eRelativePath ent),
                             ("href", eRelativePath ent),
                             ("media-type", fromMaybe "application/octet-stream"
                                $ mediaTypeOf $ eRelativePath ent)] $ ()
   let fontNode ent = unode "item" !
-                           [("id", takeBaseName $ eRelativePath ent),
+                           [("id", toId $ eRelativePath ent),
                             ("href", eRelativePath ent),
                             ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ ()
   let plainTitle = case docTitle meta of
-- 
cgit v1.2.3


From ec24f9761c71961821c180d3f6eeb3b5c08aaebf Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Thu, 24 Apr 2014 12:56:25 +0200
Subject: RST reader: Remove duplicate 'http' in PEP links

The generated link to PEPs had a duplicate 'http://' in its URL.
---
 src/Text/Pandoc/Readers/RST.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index a574f343a..7785861cc 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -1005,7 +1005,7 @@ renderRole contents fmt role attr = case role of
      where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html"
    pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo)
      where padNo = replicate (4 - length pepNo) '0' ++ pepNo
-           pepUrl = "http://http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
+           pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
 
 roleNameEndingIn :: RSTParser Char -> RSTParser String
 roleNameEndingIn end = many1Till (letter <|> char '-') end
-- 
cgit v1.2.3


From c128daba9dee096ce0e78b81a381f43337b74285 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Thu, 24 Apr 2014 17:42:01 +0200
Subject: Org reader: Recognize plain and angle links

This adds support for plain links (like http://zeitlens.com) and angle
links (like <http://moltkeplatz.de>).
---
 src/Text/Pandoc/Readers/Org.hs | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index c71cc24be..7a50b1db9 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -45,7 +45,7 @@ import           Control.Applicative ( Applicative, pure
                                      , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
 import           Control.Monad (foldM, guard, liftM, liftM2, when)
 import           Control.Monad.Reader (Reader, runReader, ask, asks)
-import           Data.Char (toLower)
+import           Data.Char (isAlphaNum, toLower)
 import           Data.Default
 import           Data.List (intersperse, isPrefixOf, isSuffixOf)
 import qualified Data.Map as M
@@ -209,6 +209,9 @@ instance Monoid a => Monoid (F a) where
 trimInlinesF :: F Inlines -> F Inlines
 trimInlinesF = liftM trimInlines
 
+returnF :: a -> OrgParser (F a)
+returnF = return . return
+
 
 -- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
 newline :: OrgParser Char
@@ -291,9 +294,6 @@ orgBlock = try $ do
     "src"     -> codeBlockWithAttr classArgs content
     _         -> return  $ B.divWith ("", [blockType], []) <$> contentBlocks
  where
-   returnF :: a -> OrgParser (F a)
-   returnF = return . return
-
    parseVerse :: String -> OrgParser (F Blocks)
    parseVerse cs =
        fmap B.para . mconcat . intersperse (pure B.linebreak)
@@ -834,7 +834,11 @@ noteMarker = try $ do
          ]
 
 linkOrImage :: OrgParser (F Inlines)
-linkOrImage = explicitOrImageLink <|> selflinkOrImage <?> "link or image"
+linkOrImage = explicitOrImageLink
+              <|> selflinkOrImage
+              <|> angleLink
+              <|> plainLink
+              <?> "link or image"
 
 explicitOrImageLink :: OrgParser (F Inlines)
 explicitOrImageLink = try $ do
@@ -851,15 +855,27 @@ explicitOrImageLink = try $ do
 selflinkOrImage :: OrgParser (F Inlines)
 selflinkOrImage = try $ do
   src <- char '[' *> linkTarget <* char ']'
-  return . return $ if isImageFilename src
-                    then B.image src "" ""
-                    else B.link src "" (B.str src)
+  returnF $ if isImageFilename src
+            then B.image src "" ""
+            else B.link src "" (B.str src)
+
+plainLink :: OrgParser (F Inlines)
+plainLink = try $ do
+  (orig, src) <- uri
+  returnF $ B.link src "" (B.str orig)
+
+angleLink :: OrgParser (F Inlines)
+angleLink = try $ do
+  char '<'
+  link <- plainLink
+  char '>'
+  return link
 
 selfTarget :: OrgParser String
 selfTarget = try $ char '[' *> linkTarget <* char ']'
 
 linkTarget :: OrgParser String
-linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]")
+linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]")
 
 isImageFilename :: String -> Bool
 isImageFilename filename =
-- 
cgit v1.2.3


From 2f724aaaa4875a21560870d25c3d212f974c6dde Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Thu, 24 Apr 2014 17:42:01 +0200
Subject: Org reader: Read anchors as empty spans

Anchors (like <<this>>) are parsed as empty spans.
---
 src/Text/Pandoc/Readers/Org.hs | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 7a50b1db9..7f1893936 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -740,6 +740,7 @@ inline =
          , linebreak
          , footnote
          , linkOrImage
+         , anchor
          , str
          , endline
          , emph
@@ -886,6 +887,30 @@ isImageFilename filename =
    imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
    protocols = [ "file", "http", "https" ]
 
+-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
+-- @anchor-id@ set as id.  Legal anchors in org-mode are defined through
+-- @org-target-regexp@, which is fairly liberal.  Since no link is created if
+-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
+-- an anchor.
+
+anchor :: OrgParser (F Inlines)
+anchor =  try $ pure <$> (B.spanWith <$> attributes <*> pure mempty)
+ where
+       name = string "<<"
+              *> many1 (noneOf "\t\n\r<>\"' ")
+              <* string ">>"
+       attributes = name >>= \n -> return (solidify n, [], [])
+
+-- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'.  This mirrors
+-- the org function @org-export-solidify-link-text@.
+
+solidify :: String -> String
+solidify = map replaceSpecialChar
+ where replaceSpecialChar c
+           | isAlphaNum c    = c
+           | c `elem` "_.-:" = c
+           | otherwise       = '-'
+
 emph      :: OrgParser (F Inlines)
 emph      = fmap B.emph         <$> emphasisBetween '/'
 
-- 
cgit v1.2.3


From d16775e1c7ba248e693817a8d53ebcb9a8332ed5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 24 Apr 2014 11:09:07 -0700
Subject: Render numbers in YAML metadata without decimals when possible.

The change to aeson > 0.7 caused numbers to be rendered with
decimals.  This change causes them to be rendered without decimals
wehn possible.
---
 src/Text/Pandoc/Readers/Markdown.hs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 053385d20..d3ca8d26f 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -33,6 +33,7 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown,
 
 import Data.List ( transpose, sortBy, findIndex, intersperse, intercalate )
 import qualified Data.Map as M
+import Data.Scientific (coefficient, base10Exponent)
 import Data.Ord ( comparing )
 import Data.Char ( isAlphaNum, toLower )
 import Data.Maybe
@@ -285,7 +286,11 @@ toMetaValue opts x =
 
 yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue
 yamlToMeta opts (Yaml.String t) = toMetaValue opts t
-yamlToMeta _    (Yaml.Number n) = MetaString $ show n
+yamlToMeta _    (Yaml.Number n)
+  -- avoid decimal points for numbers that don't need them:
+  | base10Exponent n >= 0     = MetaString $ show
+                                $ coefficient n * (10 ^ base10Exponent n)
+  | otherwise                 = MetaString $ show n
 yamlToMeta _    (Yaml.Bool b) = MetaBool b
 yamlToMeta opts (Yaml.Array xs) = B.toMetaValue $ map (yamlToMeta opts)
                                                 $ V.toList xs
-- 
cgit v1.2.3


From e6333a9d7cfe8ec36acae59bd19a654d868f4b8d Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 24 Apr 2014 16:44:49 -0700
Subject: Markdown writer:  Use proper escapes to avoid unwanted lists.

Previously we used 0-width spaces, an ugly hack.

Closes #980.
---
 src/Text/Pandoc/Writers/Markdown.hs | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index e8f976da1..95082add6 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -316,20 +316,25 @@ blockToMarkdown opts (Div attrs ils) = do
                       contents <> blankline <> "</div>" <> blankline
 blockToMarkdown opts (Plain inlines) = do
   contents <- inlineListToMarkdown opts inlines
-  return $ contents <> cr
+  -- escape if para starts with ordered list marker
+  st <- get
+  let colwidth = if writerWrapText opts
+                    then Just $ writerColumns opts
+                    else Nothing
+  let rendered = render colwidth contents
+  let escapeDelimiter (x:xs) | x `elem` ".()" = '\\':x:xs
+                             | otherwise      = x : escapeDelimiter xs
+      escapeDelimiter []                      = []
+  let contents' = if isEnabled Ext_all_symbols_escapable opts &&
+                     not (stPlain st) && beginsWithOrderedListMarker rendered
+                     then text $ escapeDelimiter rendered
+                     else contents
+  return $ contents' <> cr
 -- title beginning with fig: indicates figure
 blockToMarkdown opts (Para [Image alt (src,'f':'i':'g':':':tit)]) =
   blockToMarkdown opts (Para [Image alt (src,tit)])
-blockToMarkdown opts (Para inlines) = do
-  contents <- inlineListToMarkdown opts inlines
-  -- escape if para starts with ordered list marker
-  st <- get
-  let esc = if isEnabled Ext_all_symbols_escapable opts &&
-               not (stPlain st) &&
-               beginsWithOrderedListMarker (render Nothing contents)
-               then text "\x200B" -- zero-width space, a hack
-               else empty
-  return $ esc <> contents <> blankline
+blockToMarkdown opts (Para inlines) =
+  (<> blankline) `fmap` blockToMarkdown opts (Plain inlines)
 blockToMarkdown opts (RawBlock f str)
   | f == "html" = do
     st <- get
-- 
cgit v1.2.3


From cbeb3bb2132908b76e3a83e61ff99418ebdf83b4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 24 Apr 2014 17:37:10 -0700
Subject: EPUB writer: Fixed some idrefs to match changes in ids.

---
 src/Text/Pandoc/Writers/EPUB.hs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 9f10554a9..c39a7798d 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -440,7 +440,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
                                     xs   -> [("properties", unwords xs)])
                         $ ()
   let chapterRefNode ent = unode "itemref" !
-                             [("idref", takeFileName $ eRelativePath ent)] $ ()
+                             [("idref", toId $ eRelativePath ent)] $ ()
   let pictureNode ent = unode "item" !
                            [("id", toId $ eRelativePath ent),
                             ("href", eRelativePath ent),
@@ -488,8 +488,8 @@ writeEPUB opts doc@(Pandoc meta _) = do
               case epubCoverImage metadata of
                     Nothing -> []
                     Just _ -> [ unode "itemref" !
-                                [("idref", "cover"),("linear","no")] $ () ]
-              ++ ((unode "itemref" ! [("idref", "title_page")
+                                [("idref", "cover_xhtml"),("linear","no")] $ () ]
+              ++ ((unode "itemref" ! [("idref", "title_page_xhtml")
                                      ,("linear", if null (docTitle meta)
                                                     then "no"
                                                     else "yes")] $ ()) :
-- 
cgit v1.2.3


From 2eec20d92fd0f498da5b66ac03cf6f8159392323 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 25 Apr 2014 15:29:28 +0200
Subject: Org reader: Enable internal links

Internal links in Org are possible by using an anchor-name as the target
of a link:

[[some-anchor][This]] is an internal link.

It links <<some-anchor>> here.
---
 src/Text/Pandoc/Readers/Org.hs | 50 ++++++++++++++++++++++++++++++++----------
 1 file changed, 38 insertions(+), 12 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 7f1893936..0e52bff90 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -79,6 +79,7 @@ type OrgBlockAttributes = M.Map String String
 -- | Org-mode parser state
 data OrgParserState = OrgParserState
                       { orgStateOptions              :: ReaderOptions
+                      , orgStateAnchorIds            :: [String]
                       , orgStateBlockAttributes      :: OrgBlockAttributes
                       , orgStateEmphasisCharStack    :: [Char]
                       , orgStateEmphasisNewlines     :: Maybe Int
@@ -105,6 +106,7 @@ instance Default OrgParserState where
 defaultOrgParserState :: OrgParserState
 defaultOrgParserState = OrgParserState
                         { orgStateOptions = def
+                        , orgStateAnchorIds = []
                         , orgStateBlockAttributes = M.empty
                         , orgStateEmphasisCharStack = []
                         , orgStateEmphasisNewlines = Nothing
@@ -116,6 +118,10 @@ defaultOrgParserState = OrgParserState
                         , orgStateNotes' = []
                         }
 
+recordAnchorId :: String -> OrgParser ()
+recordAnchorId i = updateState $ \s ->
+  s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
+
 addBlockAttribute :: String -> String -> OrgParser ()
 addBlockAttribute key val = updateState $ \s ->
   let attrs = orgStateBlockAttributes s
@@ -848,17 +854,14 @@ explicitOrImageLink = try $ do
   title  <- enclosedRaw (char '[') (char ']')
   title' <- parseFromString (mconcat <$> many inline) title
   char ']'
-  return $ B.link src ""  <$>
-         if isImageFilename src && isImageFilename title
-            then return $ B.image title mempty mempty
-            else title'
+  return $ if isImageFilename src && isImageFilename title
+           then pure $ B.link src "" $ B.image title mempty mempty
+           else linkToInlinesF src =<< title'
 
 selflinkOrImage :: OrgParser (F Inlines)
 selflinkOrImage = try $ do
   src <- char '[' *> linkTarget <* char ']'
-  returnF $ if isImageFilename src
-            then B.image src "" ""
-            else B.link src "" (B.str src)
+  return $ linkToInlinesF src (B.str src)
 
 plainLink :: OrgParser (F Inlines)
 plainLink = try $ do
@@ -878,6 +881,26 @@ selfTarget = try $ char '[' *> linkTarget <* char ']'
 linkTarget :: OrgParser String
 linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]")
 
+linkToInlinesF :: String -> Inlines -> F Inlines
+linkToInlinesF s@('#':_) = pure . B.link s ""
+linkToInlinesF s
+    | isImageFilename s = const . pure $ B.image s "" ""
+    | isUri s           = pure . B.link s ""
+    | isRelativeUrl s   = pure . B.link s ""
+linkToInlinesF s = \title -> do
+  anchorB <- (s `elem`) <$> asksF orgStateAnchorIds
+  if anchorB
+    then pure $ B.link ('#':s) "" title
+    else pure $ B.emph title
+
+isRelativeUrl :: String -> Bool
+isRelativeUrl s = (':' `notElem` s) && ("./" `isPrefixOf` s)
+
+isUri :: String -> Bool
+isUri s = let (scheme, path) = break (== ':') s
+          in all (\c -> isAlphaNum c || c `elem` ".-") scheme
+             && not (null path)
+
 isImageFilename :: String -> Bool
 isImageFilename filename =
   any (\x -> ('.':x)  `isSuffixOf` filename) imageExtensions &&
@@ -894,12 +917,15 @@ isImageFilename filename =
 -- an anchor.
 
 anchor :: OrgParser (F Inlines)
-anchor =  try $ pure <$> (B.spanWith <$> attributes <*> pure mempty)
+anchor =  try $ do
+  anchorId <- parseAnchor
+  recordAnchorId anchorId
+  returnF $ B.spanWith (solidify anchorId, [], []) mempty
  where
-       name = string "<<"
-              *> many1 (noneOf "\t\n\r<>\"' ")
-              <* string ">>"
-       attributes = name >>= \n -> return (solidify n, [], [])
+       parseAnchor = string "<<"
+                     *> many1 (noneOf "\t\n\r<>\"' ")
+                     <* string ">>"
+                     <* skipSpaces
 
 -- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'.  This mirrors
 -- the org function @org-export-solidify-link-text@.
-- 
cgit v1.2.3


From b09412d852880a0c8e18e1cab9b0ce33f0e0e8a2 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 25 Apr 2014 16:14:52 +0200
Subject: LaTeX writer: Mark span contents with label if span has an ID

Prepend `\label{span-id}` to span contents iff `span-id` is defined.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index e12c9078f..e52220f01 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -655,16 +655,20 @@ isQuoted _ = False
 -- | Convert inline element to LaTeX
 inlineToLaTeX :: Inline    -- ^ Inline to convert
               -> State WriterState Doc
-inlineToLaTeX (Span (_,classes,_) ils) = do
+inlineToLaTeX (Span (id',classes,_) ils) = do
   let noEmph = "csl-no-emph" `elem` classes
   let noStrong = "csl-no-strong" `elem` classes
   let noSmallCaps = "csl-no-smallcaps" `elem` classes
-  ((if noEmph then inCmd "textup" else id) .
-   (if noStrong then inCmd "textnormal" else id) .
-   (if noSmallCaps then inCmd "textnormal" else id) .
-   (if not (noEmph || noStrong || noSmallCaps)
-       then braces
-       else id)) `fmap` inlineListToLaTeX ils
+  let label' = if (null id')
+               then empty
+               else text "\\label" <> braces (text $ toLabel id')
+  fmap (label' <>)
+    ((if noEmph then inCmd "textup" else id) .
+     (if noStrong then inCmd "textnormal" else id) .
+     (if noSmallCaps then inCmd "textnormal" else id) .
+     (if not (noEmph || noStrong || noSmallCaps)
+         then braces
+         else id)) `fmap` inlineListToLaTeX ils
 inlineToLaTeX (Emph lst) =
   inlineListToLaTeX lst >>= return . inCmd "emph"
 inlineToLaTeX (Strong lst) =
-- 
cgit v1.2.3


From 35ea8de3690fcd9bf06532576ced4d82fd51f26d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 26 Apr 2014 12:04:08 -0700
Subject: HTML writer:  improved detection of image links.

Previously image links with queries were not recognized,
leading to use of an embed tag rather than an img tag.
---
 src/Text/Pandoc/Writers/HTML.hs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index e0385af25..1de4345f9 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -40,6 +40,7 @@ import Text.Pandoc.Slides
 import Text.Pandoc.Highlighting ( highlight, styleToCss,
                                   formatHtmlInline, formatHtmlBlock )
 import Text.Pandoc.XML (fromEntities, escapeStringForXML)
+import Network.URI ( parseURIReference, URI(..) )
 import Network.HTTP ( urlEncode )
 import Numeric ( showHex )
 import Data.Char ( ord, toLower )
@@ -396,7 +397,10 @@ imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf",
 
 treatAsImage :: FilePath -> Bool
 treatAsImage fp =
-  let ext = map toLower $ drop 1 $ takeExtension fp
+  let path = case uriPath `fmap` parseURIReference fp of
+                  Nothing -> fp
+                  Just up -> up
+      ext  = map toLower $ drop 1 $ takeExtension path
   in  null ext || ext `elem` imageExts
 
 -- | Convert Pandoc block element to HTML.
-- 
cgit v1.2.3


From 22e36e104058b0ea93dbda106374f6c02bbf36d6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 26 Apr 2014 12:14:42 -0700
Subject: LaTeX reader: Made `\nocite` work.

This adds nocite citations to a metadata field, `nocite`.
These will appear in the bibliography but not in the text
(unless you use a `$nocite$` variable in your template, of
course).
---
 src/Text/Pandoc/Readers/LaTeX.hs | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index fd761dbec..b5d529eb9 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -322,7 +322,8 @@ blockCommands = M.fromList $
   ]
 
 addMeta :: ToMetaValue a => String -> a -> LP ()
-addMeta field val = updateState $ setMeta field val
+addMeta field val = updateState $ \st ->
+  st{ stateMeta = addMetaField field val $ stateMeta st }
 
 setCaption :: Inlines -> LP Blocks
 setCaption ils = do
@@ -341,7 +342,7 @@ authors = try $ do
                -- skip e.g. \vspace{10pt}
   auths <- sepBy oneAuthor (controlSeq "and")
   char '}'
-  addMeta "authors" (map trimInlines auths)
+  addMeta "author" (map trimInlines auths)
 
 section :: Attr -> Int -> LP Blocks
 section (ident, classes, kvs) lvl = do
@@ -525,10 +526,12 @@ inlineCommands = M.fromList $
   , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *>
                         complexNatbibCitation AuthorInText)
                    <|> citation "citeauthor" AuthorInText False)
+  , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
+                          addMeta "nocite"))
   ] ++ map ignoreInlines
   -- these commands will be ignored unless --parse-raw is specified,
   -- in which case they will appear as raw latex blocks:
-  [ "noindent", "index", "nocite" ]
+  [ "noindent", "index" ]
 
 mkImage :: String -> LP Inlines
 mkImage src = do
-- 
cgit v1.2.3


From c8f97d3d418f929a1f499a9ef37c17d71f282d45 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 27 Apr 2014 20:56:50 -0700
Subject: Fix #1267.

We now check the writerName for a lua script in pandoc.hs, so that
lowercasing and format parsing aren't done.  Note this behavior
change: getWriter in Text.Pandoc no longer returns a custom writer on
input "foo.lua".
---
 src/Text/Pandoc.hs | 35 ++++++++++++++++-------------------
 1 file changed, 16 insertions(+), 19 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 66b0e49c0..a37c98814 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -152,7 +152,7 @@ import Text.Pandoc.Options
 import Text.Pandoc.Shared (safeRead, warn)
 import Data.Aeson
 import qualified Data.ByteString.Lazy as BL
-import Data.List (intercalate, isSuffixOf)
+import Data.List (intercalate)
 import Data.Version (showVersion)
 import Data.Set (Set)
 import qualified Data.Set as Set
@@ -292,24 +292,21 @@ getReader s =
 
 -- | Retrieve writer based on formatSpec (format+extensions).
 getWriter :: String -> Either String Writer
-getWriter s =
-  case parseFormatSpec s of
-       Left e  -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e]
-       Right (writerName, setExts) ->
-           case lookup writerName writers of
-                   Nothing
-                     | ".lua" `isSuffixOf` s ->
-                       Right $ IOStringWriter $ writeCustom s
-                     | otherwise -> Left $ "Unknown writer: " ++ writerName
-                   Just (PureStringWriter r) -> Right $ PureStringWriter $
-                           \o -> r o{ writerExtensions = setExts $
-                                            getDefaultExtensions writerName }
-                   Just (IOStringWriter r) -> Right $ IOStringWriter $
-                           \o -> r o{ writerExtensions = setExts $
-                                            getDefaultExtensions writerName }
-                   Just (IOByteStringWriter r) -> Right $ IOByteStringWriter $
-                           \o -> r o{ writerExtensions = setExts $
-                                            getDefaultExtensions writerName }
+getWriter s
+  = case parseFormatSpec s of
+         Left e  -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e]
+         Right (writerName, setExts) ->
+             case lookup writerName writers of
+                     Nothing -> Left $ "Unknown writer: " ++ writerName
+                     Just (PureStringWriter r) -> Right $ PureStringWriter $
+                             \o -> r o{ writerExtensions = setExts $
+                                              getDefaultExtensions writerName }
+                     Just (IOStringWriter r) -> Right $ IOStringWriter $
+                             \o -> r o{ writerExtensions = setExts $
+                                              getDefaultExtensions writerName }
+                     Just (IOByteStringWriter r) -> Right $ IOByteStringWriter $
+                             \o -> r o{ writerExtensions = setExts $
+                                              getDefaultExtensions writerName }
 
 {-# DEPRECATED toJsonFilter "Use 'toJSONFilter' from 'Text.Pandoc.JSON' instead" #-}
 -- | Deprecated.  Use @toJSONFilter@ from @Text.Pandoc.JSON@ instead.
-- 
cgit v1.2.3


From 093229dc35506bff88f4edc6f2ae5316d621f8ff Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 30 Apr 2014 08:58:10 -0700
Subject: ConTeXt writer:  Improved autolinks.

Closes #1270.
---
 src/Text/Pandoc/Writers/ConTeXt.hs | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 3095cf508..cec420dcf 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -35,7 +35,7 @@ import Text.Pandoc.Writers.Shared
 import Text.Pandoc.Options
 import Text.Pandoc.Walk (query)
 import Text.Printf ( printf )
-import Data.List ( intercalate, isPrefixOf )
+import Data.List ( intercalate )
 import Control.Monad.State
 import Text.Pandoc.Pretty
 import Text.Pandoc.Templates ( renderTemplate' )
@@ -283,14 +283,6 @@ inlineToConTeXt (RawInline "tex" str) = return $ text str
 inlineToConTeXt (RawInline _ _) = return empty
 inlineToConTeXt (LineBreak) = return $ text "\\crlf" <> cr
 inlineToConTeXt Space = return space
--- autolink
-inlineToConTeXt (Link [Str str] (src, tit))
-  | if "mailto:" `isPrefixOf` src
-    then src == escapeURI ("mailto:" ++ str)
-    else src == escapeURI str =
-  inlineToConTeXt (Link
-    [RawInline "context" "\\hyphenatedurl{", Str str, RawInline "context" "}"]
-    (src, tit))
 -- Handle HTML-like internal document references to sections
 inlineToConTeXt (Link txt          (('#' : ref), _)) = do
   opts <- gets stOptions
@@ -305,6 +297,7 @@ inlineToConTeXt (Link txt          (('#' : ref), _)) = do
            <> brackets (text ref)
 
 inlineToConTeXt (Link txt          (src, _))      = do
+  let isAutolink = txt == [Str src]
   st <- get
   let next = stNextRef st
   put $ st {stNextRef = next + 1}
@@ -313,8 +306,9 @@ inlineToConTeXt (Link txt          (src, _))      = do
   return $ "\\useURL"
            <> brackets (text ref)
            <> brackets (text $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src)
-           <> brackets empty
-           <> brackets label
+           <> (if isAutolink
+                  then empty
+                  else brackets empty <> brackets label)
            <> "\\from"
            <> brackets (text ref)
 inlineToConTeXt (Image _ (src, _)) = do
-- 
cgit v1.2.3


From 81bf82c258f12700d64c8d090f75a90c8a18ec61 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 30 Apr 2014 09:59:36 -0700
Subject: RST reader:  Better handling of directives.

* We now correctly handle field lists that are indented more than
  3 spaces.
* We treat an "aafig" directive as a code block with attributes,
  so it can be processed in a filter.  (Closes #1212.)
---
 src/Text/Pandoc/Readers/RST.hs | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 7785861cc..4dc1fa006 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -185,22 +185,22 @@ block = choice [ codeBlock
 -- field list
 --
 
-rawFieldListItem :: String -> RSTParser (String, String)
-rawFieldListItem indent = try $ do
-  string indent
+rawFieldListItem :: Int -> RSTParser (String, String)
+rawFieldListItem minIndent = try $ do
+  indent <- length <$> many (char ' ')
+  guard $ indent >= minIndent
   char ':'
   name <- many1Till (noneOf "\n") (char ':')
   (() <$ lookAhead newline) <|> skipMany1 spaceChar
   first <- anyLine
-  rest <- option "" $ try $ do lookAhead (string indent >> spaceChar)
+  rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar)
                                indentedBlock
   let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n"
-  return (name, raw)
+  return (name, trimr raw)
 
-fieldListItem :: String
-              -> RSTParser (Inlines, [Blocks])
-fieldListItem indent = try $ do
-  (name, raw) <- rawFieldListItem indent
+fieldListItem :: Int -> RSTParser (Inlines, [Blocks])
+fieldListItem minIndent = try $ do
+  (name, raw) <- rawFieldListItem minIndent
   let term = B.str name
   contents <- parseFromString parseBlocks raw
   optional blanklines
@@ -208,7 +208,7 @@ fieldListItem indent = try $ do
 
 fieldList :: RSTParser Blocks
 fieldList = try $ do
-  indent <- lookAhead $ many spaceChar
+  indent <- length <$> lookAhead (many spaceChar)
   items <- many1 $ fieldListItem indent
   case items of
      []     -> return mempty
@@ -521,11 +521,11 @@ directive' = do
   skipMany spaceChar
   top <- many $ satisfy (/='\n')
              <|> try (char '\n' <*
-                      notFollowedBy' (rawFieldListItem "   ") <*
+                      notFollowedBy' (rawFieldListItem 3) <*
                       count 3 (char ' ') <*
                       notFollowedBy blankline)
   newline
-  fields <- many $ rawFieldListItem "   "
+  fields <- many $ rawFieldListItem 3
   body <- option "" $ try $ blanklines >> indentedBlock
   optional blanklines
   let body' = body ++ "\n\n"
@@ -576,6 +576,9 @@ directive' = do
                                      role -> role })
         "code" -> codeblock (lookup "number-lines" fields) (trim top) body
         "code-block" -> codeblock (lookup "number-lines" fields) (trim top) body
+        "aafig" -> do
+          let attribs = ("", ["aafig"], fields)
+          return $ B.codeBlockWith attribs $ stripTrailingNewlines body
         "math" -> return $ B.para $ mconcat $ map B.displayMath
                          $ toChunks $ top ++ "\n\n" ++ body
         "figure" -> do
-- 
cgit v1.2.3


From eaba340b9381264f3706c780182711a8713b3def Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 30 Apr 2014 11:28:18 -0700
Subject: RST reader:  Some fixes to last change, and use "author" not
 "authors".

(in metadata)
---
 src/Text/Pandoc/Readers/RST.hs | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 4dc1fa006..54b6fa34a 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -113,15 +113,16 @@ titleTransform (bs, meta) =
 metaFromDefList :: [([Inline], [[Block]])] -> Meta -> Meta
 metaFromDefList ds meta = adjustAuthors $ foldr f meta ds
  where f (k,v) = setMeta (map toLower $ stringify k) (mconcat $ map fromList v)
-       adjustAuthors (Meta metamap) = Meta $ M.adjust toPlain "author"
+       adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author"
                                            $ M.adjust toPlain "date"
                                            $ M.adjust toPlain "title"
-                                           $ M.adjust splitAuthors "authors"
+                                           $ M.mapKeys (\k -> if k == "authors" then "author" else k)
                                            $ metamap
        toPlain (MetaBlocks [Para xs]) = MetaInlines xs
        toPlain x                      = x
-       splitAuthors (MetaBlocks [Para xs]) = MetaList $ map MetaInlines
-                                                      $ splitAuthors' xs
+       splitAuthors (MetaBlocks [Para xs])
+                                      = MetaList $ map MetaInlines
+                                                 $ splitAuthors' xs
        splitAuthors x                 = x
        splitAuthors'                  = map normalizeSpaces .
                                          splitOnSemi . concatMap factorSemi
@@ -196,7 +197,7 @@ rawFieldListItem minIndent = try $ do
   rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar)
                                indentedBlock
   let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n"
-  return (name, trimr raw)
+  return (name, raw)
 
 fieldListItem :: Int -> RSTParser (Inlines, [Blocks])
 fieldListItem minIndent = try $ do
@@ -577,7 +578,7 @@ directive' = do
         "code" -> codeblock (lookup "number-lines" fields) (trim top) body
         "code-block" -> codeblock (lookup "number-lines" fields) (trim top) body
         "aafig" -> do
-          let attribs = ("", ["aafig"], fields)
+          let attribs = ("", ["aafig"], map (\(k,v) -> (k, trimr v)) fields)
           return $ B.codeBlockWith attribs $ stripTrailingNewlines body
         "math" -> return $ B.para $ mconcat $ map B.displayMath
                          $ toChunks $ top ++ "\n\n" ++ body
-- 
cgit v1.2.3


From 8726eebcd363ccb33ea8c297b004feca7ef37ceb Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Wed, 30 Apr 2014 11:16:01 +0200
Subject: Org reader: Add support for custom link types

Org allows users to define their own custom link types.  E.g., in a
document with a lot of links to Wikipedia articles, one can define a
custom wikipedia link-type via

    #+LINK: wp https://en.wikipedia.org/wiki/

This allows to write [[wp:Org_mode][Org-mode]] instead of the
equivallent [[https://en.wikipedia.org/wiki/Org_mode][Org-mode]].
---
 src/Text/Pandoc/Readers/Org.hs | 68 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 60 insertions(+), 8 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 0e52bff90..d68ef45ef 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -43,7 +43,7 @@ import           Text.Pandoc.Shared (compactify', compactify'DL)
 
 import           Control.Applicative ( Applicative, pure
                                      , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
-import           Control.Monad (foldM, guard, liftM, liftM2, when)
+import           Control.Monad (foldM, guard, liftM, liftM2, mzero, when)
 import           Control.Monad.Reader (Reader, runReader, ask, asks)
 import           Data.Char (isAlphaNum, toLower)
 import           Data.Default
@@ -51,6 +51,7 @@ import           Data.List (intersperse, isPrefixOf, isSuffixOf)
 import qualified Data.Map as M
 import           Data.Maybe (listToMaybe, fromMaybe, isJust)
 import           Data.Monoid (Monoid, mconcat, mempty, mappend)
+import           Network.HTTP (urlEncode)
 
 -- | Parse org-mode string and return a Pandoc document.
 readOrg :: ReaderOptions -- ^ Reader options
@@ -76,6 +77,8 @@ type OrgNoteTable = [OrgNoteRecord]
 
 type OrgBlockAttributes = M.Map String String
 
+type OrgLinkFormatters = M.Map String (String -> String)
+
 -- | Org-mode parser state
 data OrgParserState = OrgParserState
                       { orgStateOptions              :: ReaderOptions
@@ -86,6 +89,7 @@ data OrgParserState = OrgParserState
                       , orgStateLastForbiddenCharPos :: Maybe SourcePos
                       , orgStateLastPreCharPos       :: Maybe SourcePos
                       , orgStateLastStrPos           :: Maybe SourcePos
+                      , orgStateLinkFormatters       :: OrgLinkFormatters
                       , orgStateMeta                 :: Meta
                       , orgStateMeta'                :: F Meta
                       , orgStateNotes'               :: OrgNoteTable
@@ -113,6 +117,7 @@ defaultOrgParserState = OrgParserState
                         , orgStateLastForbiddenCharPos = Nothing
                         , orgStateLastPreCharPos = Nothing
                         , orgStateLastStrPos = Nothing
+                        , orgStateLinkFormatters = M.empty
                         , orgStateMeta = nullMeta
                         , orgStateMeta' = return nullMeta
                         , orgStateNotes' = []
@@ -175,6 +180,13 @@ resetEmphasisNewlines :: OrgParser ()
 resetEmphasisNewlines = updateState $ \s ->
   s{ orgStateEmphasisNewlines = Nothing }
 
+addLinkFormat :: String
+              -> (String -> String)
+              -> OrgParser ()
+addLinkFormat key formatter = updateState $ \s ->
+  let fs = orgStateLinkFormatters s
+  in s{ orgStateLinkFormatters = M.insert key formatter fs }
+
 addToNotesTable :: OrgNoteRecord -> OrgParser ()
 addToNotesTable note = do
   oldnotes <- orgStateNotes' <$> getState
@@ -423,7 +435,8 @@ specialLine :: OrgParser (F Blocks)
 specialLine = fmap return . try $ metaLine <|> commentLine
 
 metaLine :: OrgParser Blocks
-metaLine = try $ metaLineStart *> declarationLine
+metaLine = try $ mempty
+              <$ (metaLineStart *> (optionLine <|> declarationLine))
 
 commentLine :: OrgParser Blocks
 commentLine = try $ commentLineStart *> anyLine *> pure mempty
@@ -436,14 +449,14 @@ metaLineStart = try $ mappend <$> many spaceChar <*> string "#+"
 commentLineStart :: OrgParser String
 commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
 
-declarationLine :: OrgParser Blocks
+declarationLine :: OrgParser ()
 declarationLine = try $ do
   key <- metaKey
   inlinesF <- metaInlines
   updateState $ \st ->
     let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta
     in st { orgStateMeta' = orgStateMeta' st <> meta' }
-  return mempty
+  return ()
 
 metaInlines :: OrgParser (F MetaValue)
 metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
@@ -453,6 +466,35 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r")
                       <*  char ':'
                       <*  skipSpaces
 
+optionLine :: OrgParser ()
+optionLine = try $ do
+  key <- metaKey
+  case key of
+    "link" -> parseLinkFormat >>= uncurry addLinkFormat
+    _      -> mzero
+
+parseLinkFormat :: OrgParser ((String, String -> String))
+parseLinkFormat = try $ do
+  linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
+  linkSubst <- parseFormat
+  return (linkType, linkSubst)
+
+-- | An ad-hoc, single-argument-only implementation of a printf-style format
+-- parser.
+parseFormat :: OrgParser (String -> String)
+parseFormat = try $ do
+  replacePlain <|> replaceUrl <|> justAppend
+ where
+   -- inefficient, but who cares
+   replacePlain = try $ (\x -> concat . flip intersperse x)
+                     <$> sequence [tillSpecifier 's', rest]
+   replaceUrl   = try $ (\x -> concat . flip intersperse x . urlEncode)
+                     <$> sequence [tillSpecifier 'h', rest]
+   justAppend   = try $ (++) <$> rest
+
+   rest            = manyTill anyChar         (eof <|> () <$ oneOf "\n\r")
+   tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
+
 --
 -- Headers
 --
@@ -850,13 +892,15 @@ linkOrImage = explicitOrImageLink
 explicitOrImageLink :: OrgParser (F Inlines)
 explicitOrImageLink = try $ do
   char '['
-  src    <- linkTarget
+  srcF   <- applyCustomLinkFormat =<< linkTarget
   title  <- enclosedRaw (char '[') (char ']')
   title' <- parseFromString (mconcat <$> many inline) title
   char ']'
-  return $ if isImageFilename src && isImageFilename title
-           then pure $ B.link src "" $ B.image title mempty mempty
-           else linkToInlinesF src =<< title'
+  return $ do
+    src <- srcF
+    if isImageFilename src && isImageFilename title
+      then pure $ B.link src "" $ B.image title mempty mempty
+      else linkToInlinesF src =<< title'
 
 selflinkOrImage :: OrgParser (F Inlines)
 selflinkOrImage = try $ do
@@ -881,6 +925,14 @@ selfTarget = try $ char '[' *> linkTarget <* char ']'
 linkTarget :: OrgParser String
 linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]")
 
+applyCustomLinkFormat :: String -> OrgParser (F String)
+applyCustomLinkFormat link = do
+  let (linkType, rest) = break (== ':') link
+  return $ do
+    formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
+    return $ maybe link ($ drop 1 rest) formatter
+
+
 linkToInlinesF :: String -> Inlines -> F Inlines
 linkToInlinesF s@('#':_) = pure . B.link s ""
 linkToInlinesF s
-- 
cgit v1.2.3


From 007eb96e06bc1fff12119addf2e03552ac992b2e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 1 May 2014 09:23:21 -0700
Subject: Markdown reader:  Make one-column pipe tables work.

Closes #1218.
---
 src/Text/Pandoc/Readers/Markdown.hs | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index d3ca8d26f..97a3168da 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1221,11 +1221,20 @@ removeOneLeadingSpace xs =
 gridTableFooter :: MarkdownParser [Char]
 gridTableFooter = blanklines
 
+pipeBreak :: MarkdownParser [Alignment]
+pipeBreak = try $ do
+  nonindentSpaces
+  openPipe <- (True <$ char '|') <|> return False
+  first <- pipeTableHeaderPart
+  rest <- many $ sepPipe *> pipeTableHeaderPart
+  -- surrounding pipes needed for a one-column table:
+  guard $ not (null rest && not openPipe)
+  optional (char '|')
+  blankline
+  return (first:rest)
+
 pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
 pipeTable = try $ do
-  let pipeBreak = nonindentSpaces *> optional (char '|') *>
-                      pipeTableHeaderPart `sepBy1` sepPipe <*
-                      optional (char '|') <* blankline
   (heads,aligns) <- try ( pipeBreak >>= \als ->
                      return (return $ replicate (length als) mempty, als))
                   <|> ( pipeTableRow >>= \row -> pipeBreak >>= \als ->
@@ -1244,12 +1253,13 @@ sepPipe = try $ do
 pipeTableRow :: MarkdownParser (F [Blocks])
 pipeTableRow = do
   nonindentSpaces
-  optional (char '|')
+  openPipe <- (True <$ char '|') <|> return False
   let cell = mconcat <$>
                  many (notFollowedBy (blankline <|> char '|') >> inline)
   first <- cell
-  sepPipe
-  rest <- cell `sepBy1` sepPipe
+  rest <- many $ sepPipe *> cell
+  -- surrounding pipes needed for a one-column table:
+  guard $ not (null rest && not openPipe)
   optional (char '|')
   blankline
   let cells  = sequence (first:rest)
-- 
cgit v1.2.3


From 4c4382420356928d73026395d4ab2f0f9957df08 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 2 May 2014 22:58:47 -0700
Subject: Fixed empty reference links.  Closes #1186.

Includes test.
---
 src/Text/Pandoc/Readers/Markdown.hs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 97a3168da..36f73d847 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -349,10 +349,8 @@ referenceKey = try $ do
   char ':'
   skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
   let sourceURL = liftM unwords $ many $ try $ do
-                    notFollowedBy' referenceTitle
-                    skipMany spaceChar
-                    optional $ newline >> notFollowedBy blankline
                     skipMany spaceChar
+                    notFollowedBy' referenceTitle
                     notFollowedBy' (() <$ reference)
                     many1 $ notFollowedBy space >> litChar
   let betweenAngles = try $ char '<' >>
-- 
cgit v1.2.3


From 743dac493fab08abdec59feb7bd57030a3ba5c90 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 3 May 2014 15:15:04 -0700
Subject: LaTeX reader:  Better error messages with include files.

Closes #1274.

Rewrote handleIncludes.

We now report the actual source file and position where the error
occurs, even if it is included.  We do this by inserting special
commands, `\PandocStartInclude` and `\PandocEndInclude`, that encode
this information in the preprocessing phase.

Also generalized the types of a couple functions from
`Text.Pandoc.Parsing`.
---
 src/Text/Pandoc/Parsing.hs       |   2 +-
 src/Text/Pandoc/Readers/LaTeX.hs | 172 ++++++++++++++++++++++++++-------------
 2 files changed, 118 insertions(+), 56 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index a9009eaa2..4d0a677da 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -504,7 +504,7 @@ withHorizDisplacement parser = do
 
 -- | Applies a parser and returns the raw string that was parsed,
 -- along with the value produced by the parser.
-withRaw :: Parser [Char] st a -> Parser [Char] st (a, [Char])
+withRaw :: Monad m => ParsecT [Char] st m a -> ParsecT [Char] st m (a, [Char])
 withRaw parser = do
   pos1 <- getPosition
   inp <- getInput
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index b5d529eb9..d1e0b6f0a 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -40,8 +40,10 @@ import Text.Pandoc.Shared
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding ((<|>), many, optional, space,
                                    mathDisplay, mathInline)
+import Text.Parsec.Prim (ParsecT, runParserT)
 import qualified Text.Pandoc.UTF8 as UTF8
 import Data.Char ( chr, ord )
+import Control.Monad.Trans (lift)
 import Control.Monad
 import Text.Pandoc.Builder
 import Data.Char (isLetter, isAlphaNum)
@@ -303,6 +305,8 @@ blockCommands = M.fromList $
   , ("documentclass", skipopts *> braced *> preamble)
   , ("centerline", (para . trimInlines) <$> (skipopts *> tok))
   , ("caption", tok >>= setCaption)
+  , ("PandocStartInclude", startInclude)
+  , ("PandocEndInclude", endInclude)
   ] ++ map ignoreBlocks
   -- these commands will be ignored unless --parse-raw is specified,
   -- in which case they will appear as raw latex blocks
@@ -794,31 +798,107 @@ rawEnv name = do
             (withRaw (env name blocks) >>= applyMacros' . snd)
      else env name blocks
 
+----
+
+type IncludeParser = ParsecT [Char] [String] IO String
+
 -- | Replace "include" commands with file contents.
 handleIncludes :: String -> IO String
-handleIncludes = handleIncludes' []
-
--- parents parameter prevents infinite include loops
-handleIncludes' :: [FilePath] -> String -> IO String
-handleIncludes' _ [] = return []
-handleIncludes' parents ('\\':'%':xs) =
-  ("\\%"++) `fmap` handleIncludes' parents xs
-handleIncludes' parents ('%':xs) = handleIncludes' parents
-  $ drop 1 $ dropWhile (/='\n') xs
-handleIncludes' parents ('\\':xs) =
-  case runParser include defaultParserState "input" ('\\':xs) of
-       Right (fs, rest) -> do yss <- mapM (\f -> if f `elem` parents
-                                                    then "" <$ warn ("Include file loop in '"
-                                                                      ++ f ++ "'.")
-                                                    else readTeXFile f >>=
-                                                           handleIncludes' (f:parents)) fs
-                              rest' <- handleIncludes' parents rest
-                              return $ intercalate "\n" yss ++ rest'
-       _  -> case runParser (verbCmd <|> verbatimEnv) defaultParserState
-                  "input" ('\\':xs) of
-                   Right (r, rest) -> (r ++) `fmap` handleIncludes' parents rest
-                   _               -> ('\\':) `fmap` handleIncludes' parents xs
-handleIncludes' parents (x:xs) = (x:) `fmap` handleIncludes' parents xs
+handleIncludes s = do
+  res <- runParserT includeParser' [] "input" s
+  case res of
+       Right s'    -> return s'
+       Left e      -> error $ show e
+
+includeParser' :: IncludeParser
+includeParser' =
+  concat <$> many (comment' <|> escaped' <|> blob' <|> include'
+                   <|> startMarker' <|> endMarker'
+                   <|> verbCmd' <|> verbatimEnv' <|> backslash')
+
+comment' :: IncludeParser
+comment' = do
+  char '%'
+  xs <- manyTill anyChar newline
+  return ('%':xs ++ "\n")
+
+escaped' :: IncludeParser
+escaped' = try $ string "\\%" <|> string "\\\\"
+
+verbCmd' :: IncludeParser
+verbCmd' = fmap snd <$>
+  withRaw $ try $ do
+             string "\\verb"
+             c <- anyChar
+             manyTill anyChar (char c)
+
+verbatimEnv' :: IncludeParser
+verbatimEnv' = fmap snd <$>
+  withRaw $ try $ do
+             string "\\begin"
+             name <- braced'
+             guard $ name `elem` ["verbatim", "Verbatim", "lstlisting",
+                                  "minted", "alltt"]
+             manyTill anyChar (try $ string $ "\\end{" ++ name ++ "}")
+
+blob' :: IncludeParser
+blob' = try $ many1 (noneOf "\\%")
+
+backslash' :: IncludeParser
+backslash' = string "\\"
+
+braced' :: IncludeParser
+braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}')
+
+include' :: IncludeParser
+include' = do
+  name <- try $ do
+              char '\\'
+              try (string "include")
+               <|> try (string "input")
+               <|> string "usepackage"
+   -- skip options
+  skipMany $ try $ char '[' *> (manyTill anyChar (char ']'))
+  fs <- (map trim . splitBy (==',')) <$> braced'
+  pos <- getPosition
+  let fs' = if name == "usepackage"
+               then map (flip replaceExtension ".sty") fs
+               else map (flip replaceExtension ".tex") fs
+  containers <- getState
+  let fn = case containers of
+                (f':_) -> f'
+                []     -> "input"
+  -- now process each include file in order...
+  rest <- getInput
+  results' <- forM fs' (\f -> do
+    when (f `elem` containers) $
+      fail "Include file loop!"
+    contents <- lift $ readTeXFile f
+    return $ "\\PandocStartInclude{" ++ f ++ "}" ++
+             contents ++ "\\PandocEndInclude{" ++
+             fn ++ "}{" ++ show (sourceLine pos) ++ "}{"
+             ++ show (sourceColumn pos) ++ "}")
+  setInput $ concat results' ++ rest
+  return ""
+
+startMarker' :: IncludeParser
+startMarker' = try $ do
+  string "\\PandocStartInclude"
+  fn <- braced'
+  updateState (fn:)
+  setPosition $ newPos fn 1 1
+  return $ "\\PandocStartInclude{" ++ fn ++ "}"
+
+endMarker' :: IncludeParser
+endMarker' = try $ do
+  string "\\PandocEndInclude"
+  fn <- braced'
+  ln <- braced'
+  co <- braced'
+  updateState tail
+  setPosition $ newPos fn (fromMaybe 1 $ safeRead ln) (fromMaybe 1 $ safeRead co)
+  return $ "\\PandocEndInclude{" ++ fn ++ "}{" ++ ln ++ "}{" ++
+               co ++ "}"
 
 readTeXFile :: FilePath -> IO String
 readTeXFile f = do
@@ -833,27 +913,7 @@ readFileFromDirs (d:ds) f =
   E.catch (UTF8.readFile $ d </> f) $ \(_ :: E.SomeException) ->
     readFileFromDirs ds f
 
-include :: LP ([FilePath], String)
-include = do
-  name <- controlSeq "include"
-      <|> controlSeq "input"
-      <|> controlSeq "usepackage"
-  skipopts
-  fs <- (splitBy (==',')) <$> braced
-  rest <- getInput
-  let fs' = if name == "usepackage"
-               then map (flip replaceExtension ".sty") fs
-               else map (flip replaceExtension ".tex") fs
-  return (fs', rest)
-
-verbCmd :: LP (String, String)
-verbCmd = do
-  (_,r) <- withRaw $ do
-             controlSeq "verb"
-             c <- anyChar
-             manyTill anyChar (char c)
-  rest <- getInput
-  return (r, rest)
+----
 
 keyval :: LP (String, String)
 keyval = try $ do
@@ -875,17 +935,6 @@ alltt t = walk strToCode <$> parseFromString blocks
   where strToCode (Str s) = Code nullAttr s
         strToCode x       = x
 
-verbatimEnv :: LP (String, String)
-verbatimEnv = do
-  (_,r) <- withRaw $ do
-             controlSeq "begin"
-             name <- braced
-             guard $ name `elem` ["verbatim", "Verbatim", "lstlisting",
-                                  "minted", "alltt"]
-             verbEnv name
-  rest <- getInput
-  return (r,rest)
-
 rawLaTeXBlock :: Parser [Char] ParserState String
 rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand))
 
@@ -1218,3 +1267,16 @@ simpTable = try $ do
   lookAhead $ controlSeq "end" -- make sure we're at end
   return $ table mempty (zip aligns (repeat 0)) header'' rows
 
+startInclude :: LP Blocks
+startInclude = do
+  fn <- braced
+  setPosition $ newPos fn 1 1
+  return mempty
+
+endInclude :: LP Blocks
+endInclude = do
+  fn <- braced
+  ln <- braced
+  co <- braced
+  setPosition $ newPos fn (fromMaybe 1 $ safeRead ln) (fromMaybe 1 $ safeRead co)
+  return mempty
-- 
cgit v1.2.3


From 2ba78730861b0947ed26aec00f0520e5affd5f7c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 3 May 2014 18:34:23 -0700
Subject: LaTeX reader: Fixed regression introduced with last commit.

Tests now pass again.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index d1e0b6f0a..79c66b510 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -852,18 +852,18 @@ braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}')
 
 include' :: IncludeParser
 include' = do
-  name <- try $ do
+  fs' <- try $ do
               char '\\'
-              try (string "include")
-               <|> try (string "input")
-               <|> string "usepackage"
-   -- skip options
-  skipMany $ try $ char '[' *> (manyTill anyChar (char ']'))
-  fs <- (map trim . splitBy (==',')) <$> braced'
+              name <- try (string "include")
+                  <|> try (string "input")
+                  <|> string "usepackage"
+              -- skip options
+              skipMany $ try $ char '[' *> (manyTill anyChar (char ']'))
+              fs <- (map trim . splitBy (==',')) <$> braced'
+              return $ if name == "usepackage"
+                          then map (flip replaceExtension ".sty") fs
+                          else map (flip replaceExtension ".tex") fs
   pos <- getPosition
-  let fs' = if name == "usepackage"
-               then map (flip replaceExtension ".sty") fs
-               else map (flip replaceExtension ".tex") fs
   containers <- getState
   let fn = case containers of
                 (f':_) -> f'
-- 
cgit v1.2.3


From ccbf4fc9c20ccdce0f7144845cd022ee8cfca1af Mon Sep 17 00:00:00 2001
From: Neil Mayhew <neiljhmayhew@gmail.com>
Date: Mon, 14 Apr 2014 19:07:14 -0600
Subject: Distinguish tight and loose lists in Docbook output

Determined by the first block of the first item being Plain.
---
 src/Text/Pandoc/Writers/Docbook.hs | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 2d6ce3020..e1b62f02d 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -185,10 +185,13 @@ blockToDocbook _ (CodeBlock (_,classes,_) str) =
                            else languagesByExtension . map toLower $ s
           langs       = concatMap langsFrom classes
 blockToDocbook opts (BulletList lst) =
-  inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst
+  let attribs = case lst of
+                      ((Plain _:_):_) -> [("spacing", "compact")]
+                      _               -> []
+  in  inTags True "itemizedlist" attribs $ listItemsToDocbook opts lst
 blockToDocbook _ (OrderedList _ []) = empty
 blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) =
-  let attribs  = case numstyle of
+  let numeration = case numstyle of
                        DefaultStyle -> []
                        Decimal      -> [("numeration", "arabic")]
                        Example      -> [("numeration", "arabic")]
@@ -196,14 +199,21 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) =
                        LowerAlpha   -> [("numeration", "loweralpha")]
                        UpperRoman   -> [("numeration", "upperroman")]
                        LowerRoman   -> [("numeration", "lowerroman")]
-      items    = if start == 1
-                    then listItemsToDocbook opts (first:rest)
-                    else (inTags True "listitem" [("override",show start)]
-                         (blocksToDocbook opts $ map plainToPara first)) $$
-                         listItemsToDocbook opts rest
+      spacing    = case first of
+                       (Plain _:_)  -> [("spacing", "compact")]
+                       _            -> []
+      attribs    = numeration ++ spacing
+      items      = if start == 1
+                      then listItemsToDocbook opts (first:rest)
+                      else (inTags True "listitem" [("override",show start)]
+                           (blocksToDocbook opts $ map plainToPara first)) $$
+                           listItemsToDocbook opts rest
   in  inTags True "orderedlist" attribs items
 blockToDocbook opts (DefinitionList lst) =
-  inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst
+  let attribs = case lst of
+                  ((_, (Plain _:_):_):_) -> [("spacing", "compact")]
+                  _                      -> []
+  in  inTags True "variablelist" attribs $ deflistItemsToDocbook opts lst
 blockToDocbook _ (RawBlock f str)
   | f == "docbook" = text str -- raw XML block
   | f == "html"    = text str -- allow html for backwards compatibility
-- 
cgit v1.2.3


From abd3a039b9adcafa8aa1df6e0753a725f90c78fc Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 3 May 2014 20:45:05 -0700
Subject: DocBook writer:  Small tweaks to last commit.

* Use isTightList from Shared.
* Adjust writer test, since isTightList is a bit different from what
  was used before.

Closes #1250.
---
 src/Text/Pandoc/Writers/Docbook.hs | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index e1b62f02d..1a8e58354 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -185,9 +185,7 @@ blockToDocbook _ (CodeBlock (_,classes,_) str) =
                            else languagesByExtension . map toLower $ s
           langs       = concatMap langsFrom classes
 blockToDocbook opts (BulletList lst) =
-  let attribs = case lst of
-                      ((Plain _:_):_) -> [("spacing", "compact")]
-                      _               -> []
+  let attribs = [("spacing", "compact") | isTightList lst]
   in  inTags True "itemizedlist" attribs $ listItemsToDocbook opts lst
 blockToDocbook _ (OrderedList _ []) = empty
 blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) =
@@ -199,9 +197,7 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) =
                        LowerAlpha   -> [("numeration", "loweralpha")]
                        UpperRoman   -> [("numeration", "upperroman")]
                        LowerRoman   -> [("numeration", "lowerroman")]
-      spacing    = case first of
-                       (Plain _:_)  -> [("spacing", "compact")]
-                       _            -> []
+      spacing    = [("spacing", "compact") | isTightList (first:rest)]
       attribs    = numeration ++ spacing
       items      = if start == 1
                       then listItemsToDocbook opts (first:rest)
@@ -210,9 +206,7 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) =
                            listItemsToDocbook opts rest
   in  inTags True "orderedlist" attribs items
 blockToDocbook opts (DefinitionList lst) =
-  let attribs = case lst of
-                  ((_, (Plain _:_):_):_) -> [("spacing", "compact")]
-                  _                      -> []
+  let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst]
   in  inTags True "variablelist" attribs $ deflistItemsToDocbook opts lst
 blockToDocbook _ (RawBlock f str)
   | f == "docbook" = text str -- raw XML block
-- 
cgit v1.2.3


From fde52c25a65c479871afcf8192f56d2918230f5e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 3 May 2014 21:08:45 -0700
Subject: AsciiDoc writer: Correctly handle empty table cells.

Closes #1245.
---
 src/Text/Pandoc/Writers/AsciiDoc.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index 68b525742..e5868172e 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -217,6 +217,7 @@ blockToAsciiDoc opts (Table caption aligns widths headers rows) =  do
   let makeCell [Plain x] = do d <- blockListToAsciiDoc opts [Plain x]
                               return $ text "|" <> chomp d
       makeCell [Para x]  = makeCell [Plain x]
+      makeCell []        = return $ text "|"
       makeCell _         = return $ text "|" <> "[multiblock cell omitted]"
   let makeRow cells = hsep `fmap` mapM makeCell cells
   rows' <- mapM makeRow rows
@@ -227,7 +228,7 @@ blockToAsciiDoc opts (Table caption aligns widths headers rows) =  do
                     else 100000
   let maxwidth = maximum $ map offset (head':rows')
   let body = if maxwidth > colwidth then vsep rows' else vcat rows'
-  let border = text $ "|" ++ replicate ((min maxwidth colwidth) - 1) '='
+  let border = text $ "|" ++ replicate (max 5 (min maxwidth colwidth) - 1) '='
   return $
     caption'' $$ tablespec $$ border $$ head'' $$ body $$ border $$ blankline
 blockToAsciiDoc opts (BulletList items) = do
-- 
cgit v1.2.3


From 96c0c950cab8b42d5d6b8b1a6f1fb20f7f4a5aae Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 3 May 2014 21:31:53 -0700
Subject: AsciiDoc writer:  Handle multiblock table cells.

Closes #1246.
---
 src/Text/Pandoc/Writers/AsciiDoc.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index e5868172e..15579cba2 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -218,7 +218,8 @@ blockToAsciiDoc opts (Table caption aligns widths headers rows) =  do
                               return $ text "|" <> chomp d
       makeCell [Para x]  = makeCell [Plain x]
       makeCell []        = return $ text "|"
-      makeCell _         = return $ text "|" <> "[multiblock cell omitted]"
+      makeCell bs        = do d <- blockListToAsciiDoc opts bs
+                              return $ text "a|" $$ d
   let makeRow cells = hsep `fmap` mapM makeCell cells
   rows' <- mapM makeRow rows
   head' <- makeRow headers
-- 
cgit v1.2.3


From 3e42f08e87c4795b260154e9747df29bc1613ccc Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 4 May 2014 08:07:17 -0700
Subject: Markdown reader: Fixed bug with unwanted code in lists.

Closes #1154.

When reading a raw list item, we now strip off nonindent
spaces.
---
 src/Text/Pandoc/Readers/Markdown.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 36f73d847..69245cf66 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -746,6 +746,7 @@ listLine = try $ do
                      many spaceChar
                      listStart)
   notFollowedBy' $ htmlTag (~== TagClose "div")
+  nonindentSpaces
   chunks <- manyTill
               (  many1 (satisfy $ \c -> c /= '\n' && c /= '<')
              <|> liftM snd (htmlTag isCommentTag)
-- 
cgit v1.2.3


From d72871598174474218ae46dd984632a3753882b1 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 4 May 2014 10:45:20 -0700
Subject: Docx writer:  Added ability to give fallback in parseXml.

---
 src/Text/Pandoc/Writers/Docx.hs | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 2a834c2da..bb2071455 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -217,7 +217,7 @@ writeDocx opts doc@(Pandoc meta _) = do
   -- styles
   let newstyles = styleToOpenXml $ writerHighlightStyle opts
   let stylepath = "word/styles.xml"
-  styledoc <- parseXml refArchive stylepath
+  styledoc <- parseXml refArchive stylepath Nothing
   let styledoc' = styledoc{ elContent = elContent styledoc ++
                   [Elem x | x <- newstyles, writerHighlight opts] }
   let styleEntry = toEntry stylepath epochtime $ renderXml styledoc'
@@ -256,19 +256,20 @@ writeDocx opts doc@(Pandoc meta _) = do
         ]
   let relsEntry = toEntry relsPath epochtime $ renderXml rels
 
-  let entryFromArchive path = (toEntry path epochtime . renderXml) `fmap`
-                                parseXml refArchive path
-  docPropsAppEntry <- entryFromArchive "docProps/app.xml"
-  themeEntry <- entryFromArchive "word/theme/theme1.xml"
-  fontTableEntry <- entryFromArchive "word/fontTable.xml"
-  settingsEntry <- entryFromArchive "word/settings.xml"
-  webSettingsEntry <- entryFromArchive "word/webSettings.xml"
+  let entryFromArchive path fallback =
+         (toEntry path epochtime . renderXml) `fmap`
+           parseXml refArchive path fallback
+  docPropsAppEntry <- entryFromArchive "docProps/app.xml" Nothing
+  themeEntry <- entryFromArchive "word/theme/theme1.xml" Nothing
+  fontTableEntry <- entryFromArchive "word/fontTable.xml" Nothing
+  settingsEntry <- entryFromArchive "word/settings.xml" Nothing
+  webSettingsEntry <- entryFromArchive "word/webSettings.xml" Nothing
   let miscRels = [ f | f <- filesInArchive refArchive
                      , "word/_rels/" `isPrefixOf` f
                      , ".xml.rels" `isSuffixOf` f
                      , f /= "word/_rels/document.xml.rels"
                      , f /= "word/_rels/footnotes.xml.rels" ]
-  miscRelEntries <- mapM entryFromArchive miscRels
+  miscRelEntries <- mapM (\f -> entryFromArchive f Nothing) miscRels
 
   -- Create archive
   let archive = foldr addEntryToArchive emptyArchive $
@@ -814,10 +815,10 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
 br :: Element
 br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ]
 
-parseXml :: Archive -> String -> IO Element
-parseXml refArchive relpath =
-  case findEntryByPath relpath refArchive of
-       Just e  -> case parseXMLDoc $ UTF8.toStringLazy $ fromEntry e of
-                       Just d  -> return d
-                       Nothing -> fail $ relpath ++ " corrupt in reference docx"
-       Nothing -> fail $ relpath ++ " missing in reference docx"
+parseXml :: Archive -> String -> Maybe String -> IO Element
+parseXml refArchive relpath fallback =
+  case (findEntryByPath relpath refArchive
+         >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) `mplus`
+       (fallback >>= parseXMLDoc) of
+            Just d  -> return d
+            Nothing -> fail $ relpath ++ " corrupt or missing in reference docx"
-- 
cgit v1.2.3


From 0c7e084342b2a077f83809e6613979adcefb1592 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 4 May 2014 10:54:45 -0700
Subject: Docx writer:  Fall back on distribution reference.docx.

* Undid changes to parseXml in last commit.
* Instead of a string fallback, we have parseXml fall back
  on the reference.docx that comes with pandoc if the user's
  reference.docx does not contain a needed file.
* Closes #1185.
---
 src/Text/Pandoc/Writers/Docx.hs | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index bb2071455..fcb73a427 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -116,6 +116,7 @@ writeDocx opts doc@(Pandoc meta _) = do
        case writerReferenceDocx opts of
              Just f  -> B.readFile f
              Nothing -> readDataFile datadir "reference.docx"
+  distArchive <- liftM (toArchive . toLazy) $ readDataFile Nothing "reference.docx"
 
   ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc')
                        defaultWriterState
@@ -217,7 +218,7 @@ writeDocx opts doc@(Pandoc meta _) = do
   -- styles
   let newstyles = styleToOpenXml $ writerHighlightStyle opts
   let stylepath = "word/styles.xml"
-  styledoc <- parseXml refArchive stylepath Nothing
+  styledoc <- parseXml refArchive distArchive stylepath
   let styledoc' = styledoc{ elContent = elContent styledoc ++
                   [Elem x | x <- newstyles, writerHighlight opts] }
   let styleEntry = toEntry stylepath epochtime $ renderXml styledoc'
@@ -256,20 +257,20 @@ writeDocx opts doc@(Pandoc meta _) = do
         ]
   let relsEntry = toEntry relsPath epochtime $ renderXml rels
 
-  let entryFromArchive path fallback =
+  let entryFromArchive path =
          (toEntry path epochtime . renderXml) `fmap`
-           parseXml refArchive path fallback
-  docPropsAppEntry <- entryFromArchive "docProps/app.xml" Nothing
-  themeEntry <- entryFromArchive "word/theme/theme1.xml" Nothing
-  fontTableEntry <- entryFromArchive "word/fontTable.xml" Nothing
-  settingsEntry <- entryFromArchive "word/settings.xml" Nothing
-  webSettingsEntry <- entryFromArchive "word/webSettings.xml" Nothing
+           parseXml refArchive distArchive path
+  docPropsAppEntry <- entryFromArchive "docProps/app.xml"
+  themeEntry <- entryFromArchive "word/theme/theme1.xml"
+  fontTableEntry <- entryFromArchive "word/fontTable.xml"
+  settingsEntry <- entryFromArchive "word/settings.xml"
+  webSettingsEntry <- entryFromArchive "word/webSettings.xml"
   let miscRels = [ f | f <- filesInArchive refArchive
                      , "word/_rels/" `isPrefixOf` f
                      , ".xml.rels" `isSuffixOf` f
                      , f /= "word/_rels/document.xml.rels"
                      , f /= "word/_rels/footnotes.xml.rels" ]
-  miscRelEntries <- mapM (\f -> entryFromArchive f Nothing) miscRels
+  miscRelEntries <- mapM entryFromArchive miscRels
 
   -- Create archive
   let archive = foldr addEntryToArchive emptyArchive $
@@ -815,10 +816,10 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
 br :: Element
 br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ]
 
-parseXml :: Archive -> String -> Maybe String -> IO Element
-parseXml refArchive relpath fallback =
-  case (findEntryByPath relpath refArchive
-         >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) `mplus`
-       (fallback >>= parseXMLDoc) of
+parseXml :: Archive -> Archive -> String -> IO Element
+parseXml refArchive distArchive relpath =
+  case ((findEntryByPath relpath refArchive `mplus`
+         findEntryByPath relpath distArchive)
+         >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) of
             Just d  -> return d
             Nothing -> fail $ relpath ++ " corrupt or missing in reference docx"
-- 
cgit v1.2.3


From 51aa3048347280db6798a84a30af4f6e1ae56b26 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 4 May 2014 14:43:05 -0700
Subject: LaTeX writer: Fixed inconsistencies with reference escaping.

- toLabel is now monadic, and it does the needed string escaping.
- Closes #1130.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 131 ++++++++++++++++++++-------------------
 1 file changed, 67 insertions(+), 64 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index e52220f01..c17e041b5 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -193,7 +193,7 @@ stringToLaTeX  _     []     = return ""
 stringToLaTeX  ctx (x:xs) = do
   opts <- gets stOptions
   rest <- stringToLaTeX ctx xs
-  let ligatures = writerTeXLigatures opts && (ctx /= CodeString)
+  let ligatures = writerTeXLigatures opts && ctx == TextString
   let isUrl = ctx == URLString
   when (x == '€') $
      modify $ \st -> st{ stUsesEuro = True }
@@ -207,7 +207,8 @@ stringToLaTeX  ctx (x:xs) = do
        '&' -> "\\&" ++ rest
        '_' | not isUrl -> "\\_" ++ rest
        '#' -> "\\#" ++ rest
-       '-' -> case xs of   -- prevent adjacent hyphens from forming ligatures
+       '-' | not isUrl -> case xs of
+                   -- prevent adjacent hyphens from forming ligatures
                    ('-':_) -> "-\\/" ++ rest
                    _       -> '-' : rest
        '~' | not isUrl -> "\\textasciitilde{}" ++ rest
@@ -229,12 +230,13 @@ stringToLaTeX  ctx (x:xs) = do
        '\x2013' | ligatures -> "--" ++ rest
        _        -> x : rest
 
-toLabel :: String -> String
-toLabel [] = ""
-toLabel (x:xs)
-  | (isLetter x || isDigit x) && isAscii x = x:toLabel xs
-  | elem x "-+=:;." = x:toLabel xs
-  | otherwise = "ux" ++ printf "%x" (ord x) ++ toLabel xs
+toLabel :: String -> State WriterState String
+toLabel z = go `fmap` stringToLaTeX URLString z
+ where go [] = ""
+       go (x:xs)
+         | (isLetter x || isDigit x) && isAscii x = x:go xs
+         | elem x "-+=:;." = x:go xs
+         | otherwise = "ux" ++ printf "%x" (ord x) ++ go xs
 
 -- | Puts contents into LaTeX command.
 inCmd :: String -> Doc -> Doc
@@ -340,57 +342,57 @@ blockToLaTeX (BlockQuote lst) = do
          return $ "\\begin{quote}" $$ contents $$ "\\end{quote}"
 blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do
   opts <- gets stOptions
+  ref <- toLabel identifier
+  let linkAnchor = if null identifier
+                      then empty
+                      else "\\hyperdef{}" <> braces (text ref) <>
+                                braces ("\\label" <> braces (text ref))
+  let lhsCodeBlock = do
+        modify $ \s -> s{ stLHS = True }
+        return $ flush (linkAnchor $$ "\\begin{code}" $$ text str $$
+                            "\\end{code}") $$ cr
+  let rawCodeBlock = do
+        st <- get
+        env <- if stInNote st
+                  then modify (\s -> s{ stVerbInNote = True }) >>
+                       return "Verbatim"
+                  else return "verbatim"
+        return $ flush (linkAnchor $$ text ("\\begin{" ++ env ++ "}") $$
+                 text str $$ text ("\\end{" ++ env ++ "}")) <> cr
+  let listingsCodeBlock = do
+        st <- get
+        let params = if writerListings (stOptions st)
+                     then (case getListingsLanguage classes of
+                                Just l  -> [ "language=" ++ l ]
+                                Nothing -> []) ++
+                          [ "numbers=left" | "numberLines" `elem` classes
+                             || "number" `elem` classes
+                             || "number-lines" `elem` classes ] ++
+                          [ (if key == "startFrom"
+                                then "firstnumber"
+                                else key) ++ "=" ++ attr |
+                                (key,attr) <- keyvalAttr ] ++
+                          (if identifier == ""
+                                then []
+                                else [ "label=" ++ ref ])
+
+                     else []
+            printParams
+                | null params = empty
+                | otherwise   = brackets $ hcat (intersperse ", " (map text params))
+        return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$
+                 "\\end{lstlisting}") $$ cr
+  let highlightedCodeBlock =
+        case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of
+               Nothing -> rawCodeBlock
+               Just  h -> modify (\st -> st{ stHighlighting = True }) >>
+                          return (flush $ linkAnchor $$ text h)
   case () of
      _ | isEnabled Ext_literate_haskell opts && "haskell" `elem` classes &&
          "literate" `elem` classes                      -> lhsCodeBlock
        | writerListings opts                            -> listingsCodeBlock
        | writerHighlight opts && not (null classes)     -> highlightedCodeBlock
        | otherwise                                      -> rawCodeBlock
-   where ref = text $ toLabel identifier
-         linkAnchor = if null identifier
-                         then empty
-                         else "\\hyperdef{}" <> braces ref <>
-                                   braces ("\\label" <> braces ref)
-         lhsCodeBlock = do
-           modify $ \s -> s{ stLHS = True }
-           return $ flush (linkAnchor $$ "\\begin{code}" $$ text str $$
-                               "\\end{code}") $$ cr
-         rawCodeBlock = do
-           st <- get
-           env <- if stInNote st
-                     then modify (\s -> s{ stVerbInNote = True }) >>
-                          return "Verbatim"
-                     else return "verbatim"
-           return $ flush (linkAnchor $$ text ("\\begin{" ++ env ++ "}") $$
-                    text str $$ text ("\\end{" ++ env ++ "}")) <> cr
-         listingsCodeBlock = do
-           st <- get
-           let params = if writerListings (stOptions st)
-                        then (case getListingsLanguage classes of
-                                   Just l  -> [ "language=" ++ l ]
-                                   Nothing -> []) ++
-                             [ "numbers=left" | "numberLines" `elem` classes
-                                || "number" `elem` classes
-                                || "number-lines" `elem` classes ] ++
-                             [ (if key == "startFrom"
-                                   then "firstnumber"
-                                   else key) ++ "=" ++ attr |
-                                   (key,attr) <- keyvalAttr ] ++
-                             (if identifier == ""
-                                   then []
-                                   else [ "label=" ++ toLabel identifier ])
-
-                        else []
-               printParams
-                   | null params = empty
-                   | otherwise   = brackets $ hcat (intersperse ", " (map text params))
-           return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$
-                    "\\end{lstlisting}") $$ cr
-         highlightedCodeBlock =
-           case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of
-                  Nothing -> rawCodeBlock
-                  Just  h -> modify (\st -> st{ stHighlighting = True }) >>
-                             return (flush $ linkAnchor $$ text h)
 blockToLaTeX (RawBlock f x)
   | f == Format "latex" || f == Format "tex"
                         = return $ text x
@@ -579,6 +581,7 @@ sectionHeader :: Bool    -- True for unnumbered
               -> State WriterState Doc
 sectionHeader unnumbered ref level lst = do
   txt <- inlineListToLaTeX lst
+  lab <- text `fmap` toLabel ref
   let noNote (Note _) = Str ""
       noNote x        = x
   let lstNoNotes = walk noNote lst
@@ -599,13 +602,13 @@ sectionHeader unnumbered ref level lst = do
   let refLabel x = (if ref `elem` internalLinks
                        then text "\\hyperdef"
                                 <> braces empty
-                                <> braces (text $ toLabel ref)
+                                <> braces lab
                                 <> braces x
                        else x)
-  let headerWith x y r = refLabel $ text x <> y <>
-                             if null r
+  let headerWith x y = refLabel $ text x <> y <>
+                             if null ref
                                 then empty
-                                else text "\\label" <> braces (text $ toLabel r)
+                                else text "\\label" <> braces lab
   let sectionType = case level' of
                           0  | writerBeamer opts -> "part"
                              | otherwise -> "chapter"
@@ -624,7 +627,7 @@ sectionHeader unnumbered ref level lst = do
   return $ if level' > 5
               then txt
               else prefix $$
-                   headerWith ('\\':sectionType) stuffing ref
+                   headerWith ('\\':sectionType) stuffing
                    $$ if unnumbered
                          then "\\addcontentsline{toc}" <>
                                 braces (text sectionType) <>
@@ -659,9 +662,10 @@ inlineToLaTeX (Span (id',classes,_) ils) = do
   let noEmph = "csl-no-emph" `elem` classes
   let noStrong = "csl-no-strong" `elem` classes
   let noSmallCaps = "csl-no-smallcaps" `elem` classes
-  let label' = if (null id')
-               then empty
-               else text "\\label" <> braces (text $ toLabel id')
+  label' <- if null id'
+               then return empty
+               else toLabel id' >>= \x ->
+                      return (text "\\label" <> braces (text x))
   fmap (label' <>)
     ((if noEmph then inCmd "textup" else id) .
      (if noStrong then inCmd "textnormal" else id) .
@@ -745,9 +749,8 @@ inlineToLaTeX (LineBreak) = return "\\\\"
 inlineToLaTeX Space = return space
 inlineToLaTeX (Link txt ('#':ident, _)) = do
   contents <- inlineListToLaTeX txt
-  ident' <- stringToLaTeX URLString ident
-  return $ text "\\hyperref" <> brackets (text $ toLabel ident') <>
-              braces contents
+  lab <- toLabel ident
+  return $ text "\\hyperref" <> brackets (text lab) <> braces contents
 inlineToLaTeX (Link txt (src, _)) =
   case txt of
         [Str x] | x == src ->  -- autolink
-- 
cgit v1.2.3


From dbd6c1540f9688a3439fceec405ec4d86dc951d5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 4 May 2014 16:21:18 -0700
Subject: Fixed the fix to #1154.

We need to strip off up to 4 spaces, not up to 3.
---
 src/Text/Pandoc/Readers/Markdown.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 69245cf66..aac87f363 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -746,7 +746,7 @@ listLine = try $ do
                      many spaceChar
                      listStart)
   notFollowedBy' $ htmlTag (~== TagClose "div")
-  nonindentSpaces
+  optional (() <$ indentSpaces)
   chunks <- manyTill
               (  many1 (satisfy $ \c -> c /= '\n' && c /= '<')
              <|> liftM snd (htmlTag isCommentTag)
-- 
cgit v1.2.3


From 71bd4fb2b3778d3906a63938625ebcadca40b8c8 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Mon, 5 May 2014 14:39:25 +0200
Subject: Org reader: Read inline code blocks

Org's inline code blocks take forms like `src_haskell(print "hi")` and
are frequently used to include results from computations called from
within the document.  The blocks are read as inline code and marked with
the special class `rundoc-block`.  Proper handling and execution of
these blocks is the subject of a separate library, rundoc, which is
work in progress.

This closes #1278.
---
 src/Text/Pandoc/Readers/Org.hs | 43 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index d68ef45ef..dba61dfe0 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -43,6 +43,7 @@ import           Text.Pandoc.Shared (compactify', compactify'DL)
 
 import           Control.Applicative ( Applicative, pure
                                      , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
+import           Control.Arrow (first)
 import           Control.Monad (foldM, guard, liftM, liftM2, mzero, when)
 import           Control.Monad.Reader (Reader, runReader, ask, asks)
 import           Data.Char (isAlphaNum, toLower)
@@ -721,7 +722,6 @@ bulletList = fmap B.bulletList . fmap compactify' . sequence
              <$> many1 (listItem bulletListStart)
 
 orderedList :: OrgParser (F Blocks)
--- orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart)
 orderedList = fmap B.orderedList . fmap compactify' . sequence
               <$> many1 (listItem orderedListStart)
 
@@ -746,11 +746,11 @@ definitionListItem :: OrgParser Int
 definitionListItem parseMarkerGetLength = try $ do
   markerLength <- parseMarkerGetLength
   term <- manyTill (noneOf "\n\r") (try $ string "::")
-  first <- anyLineNewline
+  line1 <- anyLineNewline
   blank <- option "" ("\n" <$ blankline)
   cont <- concat <$> many (listContinuation markerLength)
   term' <- parseFromString inline term
-  contents' <- parseFromString parseBlocks $ first ++ blank ++ cont
+  contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont
   return $ (,) <$> term' <*> fmap (:[]) contents'
 
 
@@ -789,6 +789,7 @@ inline =
          , footnote
          , linkOrImage
          , anchor
+         , inlineCodeBlock
          , str
          , endline
          , emph
@@ -989,6 +990,42 @@ solidify = map replaceSpecialChar
            | c `elem` "_.-:" = c
            | otherwise       = '-'
 
+-- | Parses an inline code block and marks it as an babel block.
+inlineCodeBlock :: OrgParser (F Inlines)
+inlineCodeBlock = try $ do
+  string "src_"
+  lang <- many1 orgArgWordChar
+  opts <- option [] $ enclosedByPair '[' ']' blockOption
+  inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
+  let attrClasses = [translateLang lang, rundocBlockClass]
+  let attrKeyVal  = map toRundocAttrib (("language", lang) : opts)
+  returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
+ where enclosedByPair s e p = char s *> many1Till p (char e)
+
+-- | The class-name used to mark rundoc blocks.
+rundocBlockClass :: String
+rundocBlockClass = "rundoc-block"
+
+blockOption :: OrgParser (String, String)
+blockOption = try $ (,) <$> orgArgKey <*> orgArgValue
+
+orgArgKey :: OrgParser String
+orgArgKey = try $
+  skipSpaces *> char ':'
+             *> many1 orgArgWordChar
+             <* many1 spaceChar
+
+orgArgValue :: OrgParser String
+orgArgValue = try $
+  skipSpaces *> many1 orgArgWordChar
+             <* skipSpaces
+
+orgArgWordChar :: OrgParser Char
+orgArgWordChar = alphaNum <|> oneOf "-_"
+
+toRundocAttrib :: (String, String) -> (String, String)
+toRundocAttrib = first ("rundoc-" ++)
+
 emph      :: OrgParser (F Inlines)
 emph      = fmap B.emph         <$> emphasisBetween '/'
 
-- 
cgit v1.2.3


From 10644607e35369ec3b19b5d02fbe9b936d0ecb85 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 6 May 2014 22:13:59 -0700
Subject: Textile reader:  Rewrote some inline parsing code for clarity.

(It seems clearer to put the whitespace parsing in the grouped
parser.  This also uses stateLastStrPos to determine when the
border is adjacent to an alphanumeric.)
---
 src/Text/Pandoc/Readers/Textile.hs | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index ae9c0cc8e..3c07a4d85 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -596,32 +596,28 @@ ungroupedSimpleInline :: Parser [Char] ParserState t           -- ^ surrounding
 ungroupedSimpleInline border construct = try $ do
   st <- getState
   pos <- getPosition
-  isWhitespace <- option False (whitespace >> return True)
-  guard $ (stateQuoteContext st /= NoQuote)
-       || (sourceColumn pos == 1)
-       || isWhitespace
+  let afterString = stateLastStrPos st == Just pos
+  guard $ not afterString
   border *> notFollowedBy (oneOf " \t\n\r")
   attr <- attributes
   body <- trimInlines . mconcat <$>
           withQuoteContext InSingleQuote
             (manyTill inline (try border <* notFollowedBy alphaNum))
-  let result = construct $
+  return $ construct $
         if attr == nullAttr
            then body
            else B.spanWith attr body
-  return $ if isWhitespace
-              then B.space <> result
-              else result
 
 groupedSimpleInline :: Parser [Char] ParserState t
                   -> (Inlines -> Inlines)
                   -> Parser [Char] ParserState Inlines
 groupedSimpleInline border construct = try $ do
     char '['
-    withQuoteContext InSingleQuote (simpleInline border construct) >>~ char ']'
-
-
-
+    sp1 <- option mempty $ B.space <$ whitespace
+    result <- withQuoteContext InSingleQuote (simpleInline border construct)
+    sp2 <- option mempty $ B.space <$ whitespace
+    char ']'
+    return $ sp1 <> result <> sp2
 
 -- | Create a singleton list
 singleton :: a -> [a]
-- 
cgit v1.2.3


From d6a9ba1cdc4cb10d34c61593d04868da3abb5e40 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 6 May 2014 22:28:11 -0700
Subject: Make `--trace` work with textile reader.

---
 src/Text/Pandoc/Readers/Textile.hs | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 3c07a4d85..b67e8fbc8 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -61,9 +61,11 @@ import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
 import Text.HTML.TagSoup.Match
 import Data.List ( intercalate )
 import Data.Char ( digitToInt, isUpper)
-import Control.Monad ( guard, liftM )
+import Control.Monad ( guard, liftM, when )
+import Text.Printf
 import Control.Applicative ((<$>), (*>), (<*), (<$))
 import Data.Monoid
+import Debug.Trace (trace)
 
 -- | Parse a Textile text and return a Pandoc document.
 readTextile :: ReaderOptions -- ^ Reader options
@@ -135,9 +137,17 @@ blockParsers = [ codeBlock
 
 endBlock :: Parser [Char] ParserState Blocks
 endBlock = string "\n\n" >> return mempty
+
 -- | Any block in the order of definition of blockParsers
 block :: Parser [Char] ParserState Blocks
-block = choice blockParsers <?> "block"
+block = do
+  res <- choice blockParsers <?> "block"
+  pos <- getPosition
+  tr <- getOption readerTrace
+  when tr $
+    trace (printf "line %d: %s" (sourceLine pos)
+           (take 60 $ show $ B.toList res)) (return ())
+  return res
 
 commentBlock :: Parser [Char] ParserState Blocks
 commentBlock = try $ do
-- 
cgit v1.2.3


From ea4e947bd0308861dbbbe020d21afe7943db1b98 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 6 May 2014 23:16:47 -0700
Subject: Textile reader:  Disallow blank lines in inline contexts.

    @hi

    there@

should not be a single code span.
---
 src/Text/Pandoc/Readers/Textile.hs | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index b67e8fbc8..622a41168 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -481,7 +481,7 @@ str = do
   -- followed by parens, parens content is unconditionally word acronym
   fullStr <- option baseStr $ try $ do
     guard $ all isUpper baseStr
-    acro <- enclosed (char '(') (char ')') anyChar
+    acro <- enclosed (char '(') (char ')') anyChar'
     return $ concat [baseStr, " (", acro, ")"]
   updateLastStrPos
   return $ B.str fullStr
@@ -528,8 +528,8 @@ link = try $ do
 image :: Parser [Char] ParserState Inlines
 image = try $ do
   char '!' >> notFollowedBy space
-  src <- manyTill anyChar (lookAhead $ oneOf "!(")
-  alt <- option "" (try $ (char '(' >> manyTill anyChar (char ')')))
+  src <- manyTill anyChar' (lookAhead $ oneOf "!(")
+  alt <- option "" (try $ (char '(' >> manyTill anyChar' (char ')')))
   char '!'
   return $ B.image src alt (B.str alt)
 
@@ -537,12 +537,14 @@ escapedInline :: Parser [Char] ParserState Inlines
 escapedInline = escapedEqs <|> escapedTag
 
 escapedEqs :: Parser [Char] ParserState Inlines
-escapedEqs = B.str <$> (try $ string "==" *> manyTill anyChar (try $ string "=="))
+escapedEqs = B.str <$>
+  (try $ string "==" *> manyTill anyChar' (try $ string "=="))
 
 -- | literal text escaped btw <notextile> tags
 escapedTag :: Parser [Char] ParserState Inlines
 escapedTag = B.str <$>
-  (try $ string "<notextile>" *> manyTill anyChar (try $ string "</notextile>"))
+  (try $ string "<notextile>" *>
+         manyTill anyChar' (try $ string "</notextile>"))
 
 -- | Any special symbol defined in wordBoundaries
 symbol :: Parser [Char] ParserState Inlines
@@ -552,13 +554,18 @@ symbol = B.str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars)
 code :: Parser [Char] ParserState Inlines
 code = code1 <|> code2
 
+-- any character except a newline before a blank line
+anyChar' :: Parser [Char] ParserState Char
+anyChar' =
+  satisfy (/='\n') <|> (try $ char '\n' <* notFollowedBy blankline)
+
 code1 :: Parser [Char] ParserState Inlines
-code1 = B.code <$> surrounded (char '@') anyChar
+code1 = B.code <$> surrounded (char '@') anyChar'
 
 code2 :: Parser [Char] ParserState Inlines
 code2 = do
   htmlTag (tagOpen (=="tt") null)
-  B.code <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt"))
+  B.code <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
 
 -- | Html / CSS attributes
 attributes :: Parser [Char] ParserState Attr
@@ -570,7 +577,7 @@ attribute = classIdAttr <|> styleAttr <|> langAttr
 classIdAttr :: Parser [Char] ParserState (Attr -> Attr)
 classIdAttr = try $ do -- (class class #id)
   char '('
-  ws <- words `fmap` manyTill anyChar (char ')')
+  ws <- words `fmap` manyTill anyChar' (char ')')
   case reverse ws of
        []                      -> return $ \(_,_,keyvals) -> ("",[],keyvals)
        (('#':ident'):classes') -> return $ \(_,_,keyvals) ->
@@ -580,7 +587,7 @@ classIdAttr = try $ do -- (class class #id)
 
 styleAttr :: Parser [Char] ParserState (Attr -> Attr)
 styleAttr = do
-  style <- try $ enclosed (char '{') (char '}') anyChar
+  style <- try $ enclosed (char '{') (char '}') anyChar'
   return $ \(id',classes,keyvals) -> (id',classes,("style",style):keyvals)
 
 langAttr :: Parser [Char] ParserState (Attr -> Attr)
@@ -592,13 +599,15 @@ langAttr = do
 surrounded :: Parser [Char] st t   -- ^ surrounding parser
 	    -> Parser [Char] st a    -- ^ content parser (to be used repeatedly)
 	    -> Parser [Char] st [a]
-surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
+surrounded border =
+  enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
 
 
 simpleInline :: Parser [Char] ParserState t           -- ^ surrounding parser
                 -> (Inlines -> Inlines)       -- ^ Inline constructor
                 -> Parser [Char] ParserState Inlines   -- ^ content parser (to be used repeatedly)
-simpleInline border construct = groupedSimpleInline border construct <|> ungroupedSimpleInline border construct
+simpleInline border construct = groupedSimpleInline border construct
+                            <|> ungroupedSimpleInline border construct
 
 ungroupedSimpleInline :: Parser [Char] ParserState t           -- ^ surrounding parser
                 -> (Inlines -> Inlines)       -- ^ Inline constructor
-- 
cgit v1.2.3


From 442eecc15c2b805872600e111a510e022d1920f7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 6 May 2014 23:27:16 -0700
Subject: Textile reader: Rewrote simpleInline for clarity and efficiency.

This way we only look once for the opening `[`.
---
 src/Text/Pandoc/Readers/Textile.hs | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 622a41168..f83298d4c 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -381,6 +381,7 @@ inline = do
 -- | Inline parsers tried in order
 inlineParsers :: [Parser [Char] ParserState Inlines]
 inlineParsers = [ inlineMarkup
+                , groupedInlineMarkup
                 , str
                 , whitespace
                 , endline
@@ -602,17 +603,10 @@ surrounded :: Parser [Char] st t   -- ^ surrounding parser
 surrounded border =
   enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
 
-
 simpleInline :: Parser [Char] ParserState t           -- ^ surrounding parser
-                -> (Inlines -> Inlines)       -- ^ Inline constructor
-                -> Parser [Char] ParserState Inlines   -- ^ content parser (to be used repeatedly)
-simpleInline border construct = groupedSimpleInline border construct
-                            <|> ungroupedSimpleInline border construct
-
-ungroupedSimpleInline :: Parser [Char] ParserState t           -- ^ surrounding parser
-                -> (Inlines -> Inlines)       -- ^ Inline constructor
-                -> Parser [Char] ParserState Inlines   -- ^ content parser (to be used repeatedly)
-ungroupedSimpleInline border construct = try $ do
+             -> (Inlines -> Inlines)       -- ^ Inline constructor
+             -> Parser [Char] ParserState Inlines   -- ^ content parser (to be used repeatedly)
+simpleInline border construct = try $ do
   st <- getState
   pos <- getPosition
   let afterString = stateLastStrPos st == Just pos
@@ -627,13 +621,11 @@ ungroupedSimpleInline border construct = try $ do
            then body
            else B.spanWith attr body
 
-groupedSimpleInline :: Parser [Char] ParserState t
-                  -> (Inlines -> Inlines)
-                  -> Parser [Char] ParserState Inlines
-groupedSimpleInline border construct = try $ do
+groupedInlineMarkup :: Parser [Char] ParserState Inlines
+groupedInlineMarkup = try $ do
     char '['
     sp1 <- option mempty $ B.space <$ whitespace
-    result <- withQuoteContext InSingleQuote (simpleInline border construct)
+    result <- withQuoteContext InSingleQuote inlineMarkup
     sp2 <- option mempty $ B.space <$ whitespace
     char ']'
     return $ sp1 <> result <> sp2
-- 
cgit v1.2.3


From 84f2336a7db04dbc3c36594d1b4aca2e18186617 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 6 May 2014 23:41:56 -0700
Subject: Textile reader:  Rearranged inline parsers for performance.

This is possible because of the rewrite of simpleInline.
Also removed a redundant parser for grouped inlines.
---
 src/Text/Pandoc/Readers/Textile.hs | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index f83298d4c..2f1fd30b4 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -380,17 +380,16 @@ inline = do
 
 -- | Inline parsers tried in order
 inlineParsers :: [Parser [Char] ParserState Inlines]
-inlineParsers = [ inlineMarkup
-                , groupedInlineMarkup
-                , str
+inlineParsers = [ str
                 , whitespace
                 , endline
                 , code
                 , escapedInline
+                , inlineMarkup
+                , groupedInlineMarkup
                 , rawHtmlInline
                 , rawLaTeXInline'
                 , note
-                , try $ (char '[' *> inlineMarkup <* char ']')
                 , link
                 , image
                 , mark
-- 
cgit v1.2.3


From 0050b509052ff81ba021b98fdbc573d3475ed74c Mon Sep 17 00:00:00 2001
From: mpickering <matthewtpickering@gmail.com>
Date: Wed, 7 May 2014 13:03:45 +0100
Subject: Fix textile reader hanging.

Textile reader hung on

    pandoc -f textile http://johnmacfarlane.net/pandoc/demo/example25.textile

The reader no longer hangs.
---
 src/Text/Pandoc/Readers/Textile.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 2f1fd30b4..f7c87ab5a 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -513,7 +513,8 @@ link = try $ do
   char '"' *> notFollowedBy (oneOf " \t\n\r")
   attr <- attributes
   name <- trimInlines . mconcat <$>
-          withQuoteContext InSingleQuote (manyTill inline (try (string "\":")))
+          withQuoteContext InDoubleQuote (many1Till inline (try (char '"')))
+  char ':'
   let stop = if bracketed
                 then char ']'
                 else lookAhead $ space <|>
-- 
cgit v1.2.3


From f0f88111e6597ade7e771457fc1b81bcc9a6d974 Mon Sep 17 00:00:00 2001
From: mpickering <matthewtpickering@gmail.com>
Date: Wed, 7 May 2014 13:03:45 +0100
Subject: Small improvement to textile reader fix.  Removed 'try'.

---
 src/Text/Pandoc/Readers/Textile.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index f7c87ab5a..9ee34caa5 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -513,7 +513,7 @@ link = try $ do
   char '"' *> notFollowedBy (oneOf " \t\n\r")
   attr <- attributes
   name <- trimInlines . mconcat <$>
-          withQuoteContext InDoubleQuote (many1Till inline (try (char '"')))
+          withQuoteContext InDoubleQuote (many1Till inline (char '"'))
   char ':'
   let stop = if bracketed
                 then char ']'
-- 
cgit v1.2.3


From 8fdbef841d0ef77dcc2e30cfa475e92a0f3de6cf Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Thu, 8 May 2014 21:50:20 +0200
Subject: Update copyright notices for 2014, add missing notices

---
 src/Text/Pandoc.hs                      | 4 ++--
 src/Text/Pandoc/Asciify.hs              | 4 ++--
 src/Text/Pandoc/Highlighting.hs         | 4 ++--
 src/Text/Pandoc/ImageSize.hs            | 4 ++--
 src/Text/Pandoc/MIME.hs                 | 4 ++--
 src/Text/Pandoc/Options.hs              | 4 ++--
 src/Text/Pandoc/PDF.hs                  | 4 ++--
 src/Text/Pandoc/Parsing.hs              | 4 ++--
 src/Text/Pandoc/Pretty.hs               | 4 ++--
 src/Text/Pandoc/Process.hs              | 4 ++--
 src/Text/Pandoc/Readers/HTML.hs         | 4 ++--
 src/Text/Pandoc/Readers/LaTeX.hs        | 4 ++--
 src/Text/Pandoc/Readers/Markdown.hs     | 4 ++--
 src/Text/Pandoc/Readers/MediaWiki.hs    | 4 ++--
 src/Text/Pandoc/Readers/Native.hs       | 4 ++--
 src/Text/Pandoc/Readers/RST.hs          | 4 ++--
 src/Text/Pandoc/Readers/TeXMath.hs      | 4 ++--
 src/Text/Pandoc/Readers/Textile.hs      | 5 +++--
 src/Text/Pandoc/SelfContained.hs        | 4 ++--
 src/Text/Pandoc/Shared.hs               | 4 ++--
 src/Text/Pandoc/Slides.hs               | 4 ++--
 src/Text/Pandoc/Templates.hs            | 4 ++--
 src/Text/Pandoc/UTF8.hs                 | 4 ++--
 src/Text/Pandoc/UUID.hs                 | 4 ++--
 src/Text/Pandoc/Writers/AsciiDoc.hs     | 4 ++--
 src/Text/Pandoc/Writers/ConTeXt.hs      | 4 ++--
 src/Text/Pandoc/Writers/Custom.hs       | 4 ++--
 src/Text/Pandoc/Writers/Docbook.hs      | 4 ++--
 src/Text/Pandoc/Writers/Docx.hs         | 4 ++--
 src/Text/Pandoc/Writers/EPUB.hs         | 4 ++--
 src/Text/Pandoc/Writers/HTML.hs         | 4 ++--
 src/Text/Pandoc/Writers/LaTeX.hs        | 4 ++--
 src/Text/Pandoc/Writers/Man.hs          | 4 ++--
 src/Text/Pandoc/Writers/Markdown.hs     | 4 ++--
 src/Text/Pandoc/Writers/MediaWiki.hs    | 4 ++--
 src/Text/Pandoc/Writers/Native.hs       | 4 ++--
 src/Text/Pandoc/Writers/ODT.hs          | 4 ++--
 src/Text/Pandoc/Writers/OPML.hs         | 4 ++--
 src/Text/Pandoc/Writers/OpenDocument.hs | 6 +++---
 src/Text/Pandoc/Writers/Org.hs          | 5 +++--
 src/Text/Pandoc/Writers/RST.hs          | 4 ++--
 src/Text/Pandoc/Writers/RTF.hs          | 4 ++--
 src/Text/Pandoc/Writers/Shared.hs       | 4 ++--
 src/Text/Pandoc/Writers/Texinfo.hs      | 4 ++--
 src/Text/Pandoc/Writers/Textile.hs      | 4 ++--
 src/Text/Pandoc/XML.hs                  | 4 ++--
 46 files changed, 95 insertions(+), 93 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index a37c98814..dd5bc18f6 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE ScopedTypeVariables, FlexibleInstances #-}
 {-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc
-   Copyright   : Copyright (C) 2006-2010 John MacFarlane
+   Copyright   : Copyright (C) 2006-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs
index 1c177da90..8a5ccec5c 100644
--- a/src/Text/Pandoc/Asciify.hs
+++ b/src/Text/Pandoc/Asciify.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2013 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2013-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.SelfContained
-   Copyright   : Copyright (C) 2013 John MacFarlane
+   Copyright   : Copyright (C) 2013-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
index 11d608db6..2e7a9f648 100644
--- a/src/Text/Pandoc/Highlighting.hs
+++ b/src/Text/Pandoc/Highlighting.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2008-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Highlighting
-   Copyright   : Copyright (C) 2008 John MacFarlane
+   Copyright   : Copyright (C) 2008-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index 3c9623b3c..a6d076fa9 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE OverloadedStrings, ScopedTypeVariables  #-}
 {-
-  Copyright (C) 2011 John MacFarlane <jgm@berkeley.edu>
+  Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu>
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@
 
 {- |
 Module      : Text.Pandoc.ImageSize
-Copyright   : Copyright (C) 2011 John MacFarlane
+Copyright   : Copyright (C) 2011-2014 John MacFarlane
 License     : GNU GPL, version 2 or above
 
 Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index 977cb576b..6e6284b25 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2011 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.MIME
-   Copyright   : Copyright (C) 2011 John MacFarlane
+   Copyright   : Copyright (C) 2011-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 38220f542..611a6bb06 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Options
-   Copyright   : Copyright (C) 2012 John MacFarlane
+   Copyright   : Copyright (C) 2012-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index abc5c41b7..e4e06e6c9 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE OverloadedStrings, CPP #-}
 {-
-Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.PDF
-   Copyright   : Copyright (C) 2012 John MacFarlane
+   Copyright   : Copyright (C) 2012-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 4d0a677da..d1e55cbc4 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE GeneralizedNewtypeDeriving, TypeSynonymInstances,
     FlexibleInstances#-}
 {-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Parsing
-   Copyright   : Copyright (C) 2006-2010 John MacFarlane
+   Copyright   : Copyright (C) 2006-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
index 5331587ce..d25ba725f 100644
--- a/src/Text/Pandoc/Pretty.hs
+++ b/src/Text/Pandoc/Pretty.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE GeneralizedNewtypeDeriving, CPP #-}
 {-
-Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111(-1)307  USA
 
 {- |
    Module      : Text.Pandoc.Pretty
-   Copyright   : Copyright (C) 2010 John MacFarlane
+   Copyright   : Copyright (C) 2010-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs
index 112c5b974..9c8853366 100644
--- a/src/Text/Pandoc/Process.hs
+++ b/src/Text/Pandoc/Process.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2013 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2013-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Process
-   Copyright   : Copyright (C) 2013 John MacFarlane
+   Copyright   : Copyright (C) 2013-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index c94ee3d6b..905e55b22 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Readers.HTML
-   Copyright   : Copyright (C) 2006-2010 John MacFarlane
+   Copyright   : Copyright (C) 2006-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 79c66b510..bfafea1f6 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE ScopedTypeVariables, OverloadedStrings #-}
 {-
-Copyright (C) 2006-2012 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Readers.LaTeX
-   Copyright   : Copyright (C) 2006-2012 John MacFarlane
+   Copyright   : Copyright (C) 2006-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index aac87f363..d1637b701 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
 {-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Readers.Markdown
-   Copyright   : Copyright (C) 2006-2013 John MacFarlane
+   Copyright   : Copyright (C) 2006-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index feaedb7c2..e4fabc898 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-}
 -- RelaxedPolyRec needed for inlinesBetween on GHC < 7
 {-
-  Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
+  Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Readers.MediaWiki
-   Copyright   : Copyright (C) 2012 John MacFarlane
+   Copyright   : Copyright (C) 2012-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index c5d4cb98a..f4dfa62c1 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2011 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Readers.Native
-   Copyright   : Copyright (C) 2011 John MacFarlane
+   Copyright   : Copyright (C) 2011-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 54b6fa34a..fa8438e70 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE OverloadedStrings #-}
 {-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Readers.RST
-   Copyright   : Copyright (C) 2006-2010 John MacFarlane
+   Copyright   : Copyright (C) 2006-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
index 6bd617f7e..f03eae044 100644
--- a/src/Text/Pandoc/Readers/TeXMath.hs
+++ b/src/Text/Pandoc/Readers/TeXMath.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2007-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2007-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Readers.TeXMath
-   Copyright   : Copyright (C) 2007-2010 John MacFarlane
+   Copyright   : Copyright (C) 2007-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 9ee34caa5..6d839ec1d 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -1,5 +1,6 @@
 {-
-Copyright (C) 2010 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@'
+Copyright (C) 2010-2014 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@'
+                        and John MacFarlane
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Readers.Textile
-   Copyright   : Copyright (C) 2010-2012 Paul Rivier and John MacFarlane
+   Copyright   : Copyright (C) 2010-2014 Paul Rivier and John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Paul Rivier <paul*rivier#demotera*com>
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 7fc9c2966..2a2f56281 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE OverloadedStrings #-}
 {-
-Copyright (C) 2011 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.SelfContained
-   Copyright   : Copyright (C) 2011 John MacFarlane
+   Copyright   : Copyright (C) 2011-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 6f0629ea2..31c490af6 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
     FlexibleContexts, ScopedTypeVariables #-}
 {-
-Copyright (C) 2006-2013 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Shared
-   Copyright   : Copyright (C) 2006-2013 John MacFarlane
+   Copyright   : Copyright (C) 2006-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs
index 50c46d17f..2b863c780 100644
--- a/src/Text/Pandoc/Slides.hs
+++ b/src/Text/Pandoc/Slides.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Slides
-   Copyright   : Copyright (C) 2012 John MacFarlane
+   Copyright   : Copyright (C) 2012-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 52625abf6..551db6483 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE TypeSynonymInstances, FlexibleInstances, CPP,
     OverloadedStrings, GeneralizedNewtypeDeriving #-}
 {-
-Copyright (C) 2009-2013 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2009-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Templates
-   Copyright   : Copyright (C) 2009-2013 John MacFarlane
+   Copyright   : Copyright (C) 2009-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index 229442543..33c9ec1c5 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE CPP #-}
 {-
-Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.UTF8
-   Copyright   : Copyright (C) 2010 John MacFarlane
+   Copyright   : Copyright (C) 2010-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs
index 082644eea..eebfe09d2 100644
--- a/src/Text/Pandoc/UUID.hs
+++ b/src/Text/Pandoc/UUID.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.UUID
-   Copyright   : Copyright (C) 2010 John MacFarlane
+   Copyright   : Copyright (C) 2010-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index 15579cba2..19112d8f5 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE OverloadedStrings #-}
 {-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.AsciiDoc
-   Copyright   : Copyright (C) 2006-2010 John MacFarlane
+   Copyright   : Copyright (C) 2006-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index cec420dcf..3b321cc19 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE OverloadedStrings #-}
 {-
-Copyright (C) 2007-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2007-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.ConTeXt
-   Copyright   : Copyright (C) 2007-2010 John MacFarlane
+   Copyright   : Copyright (C) 2007-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
index 0b30287f5..88f590c43 100644
--- a/src/Text/Pandoc/Writers/Custom.hs
+++ b/src/Text/Pandoc/Writers/Custom.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE OverlappingInstances, FlexibleInstances, OverloadedStrings #-}
 {-# OPTIONS_GHC -fno-warn-orphans #-}
-{- Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
+{- Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.Custom
-   Copyright   : Copyright (C) 2012 John MacFarlane
+   Copyright   : Copyright (C) 2012-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 1a8e58354..ba6a92a08 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE OverloadedStrings #-}
 {-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.Docbook
-   Copyright   : Copyright (C) 2006-2010 John MacFarlane
+   Copyright   : Copyright (C) 2006-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index fcb73a427..551d97855 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {-
-Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.Docx
-   Copyright   : Copyright (C) 2012 John MacFarlane
+   Copyright   : Copyright (C) 2012-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index c39a7798d..893ec3be9 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables #-}
 {-
-Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.EPUB
-   Copyright   : Copyright (C) 2010 John MacFarlane
+   Copyright   : Copyright (C) 2010-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 1de4345f9..9a26cf2ac 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings, CPP #-}
 {-# OPTIONS_GHC -fno-warn-deprecations #-}
 {-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.HTML
-   Copyright   : Copyright (C) 2006-2010 John MacFarlane
+   Copyright   : Copyright (C) 2006-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index c17e041b5..c221b318e 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-}
 {-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.LaTeX
-   Copyright   : Copyright (C) 2006-2010 John MacFarlane
+   Copyright   : Copyright (C) 2006-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 680bfef44..41eb3e5be 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2007-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2007-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.Man
-   Copyright   : Copyright (C) 2007-2010 John MacFarlane
+   Copyright   : Copyright (C) 2007-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 95082add6..f42a1b54c 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-}
 {-
-Copyright (C) 2006-2013 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.Markdown
-   Copyright   : Copyright (C) 2006-2013 John MacFarlane
+   Copyright   : Copyright (C) 2006-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index 83fefaa29..3b987ba2b 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2008-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2008-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.MediaWiki
-   Copyright   : Copyright (C) 2008-2010 John MacFarlane
+   Copyright   : Copyright (C) 2008-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs
index 090b97433..cb821e40b 100644
--- a/src/Text/Pandoc/Writers/Native.hs
+++ b/src/Text/Pandoc/Writers/Native.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE OverloadedStrings #-}
 {-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.Native
-   Copyright   : Copyright (C) 2006-2010 John MacFarlane
+   Copyright   : Copyright (C) 2006-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index c3652d65d..15f7c8be8 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {-
-Copyright (C) 2008-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2008-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.ODT
-   Copyright   : Copyright (C) 2008-2010 John MacFarlane
+   Copyright   : Copyright (C) 2008-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs
index f6926c1dc..dd359f3f5 100644
--- a/src/Text/Pandoc/Writers/OPML.hs
+++ b/src/Text/Pandoc/Writers/OPML.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2013 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2013-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.OPML
-   Copyright   : Copyright (C) 2013 John MacFarlane
+   Copyright   : Copyright (C) 2013-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 0029c3296..b6da2694c 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE PatternGuards, OverloadedStrings #-}
 {-
-Copyright (C) 2008-2010 Andrea Rossato <andrea.rossato@ing.unitn.it>
-and John MacFarlane.
+Copyright (C) 2008-2014 Andrea Rossato <andrea.rossato@ing.unitn.it>
+                        and John MacFarlane.
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.OpenDocument
-   Copyright   : Copyright (C) 2008-2010 Andrea Rossato and John MacFarlane
+   Copyright   : Copyright (C) 2008-2014 Andrea Rossato and John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Andrea Rossato <andrea.rossato@ing.unitn.it>
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 58a5729e7..87046537c 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -1,6 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {-
-Copyright (C) 2006-2010 Puneeth Chaganti <punchagan@gmail.com>
+Copyright (C) 2010-2014 Puneeth Chaganti <punchagan@gmail.com>
+                        and John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.Org
-   Copyright   : Copyright (C) 2010 Puneeth Chaganti
+   Copyright   : Copyright (C) 2010-2014 Puneeth Chaganti and John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Puneeth Chaganti <punchagan@gmail.com>
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 1e7596b21..31c97349b 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE OverloadedStrings #-}
 {-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.RST
-   Copyright   : Copyright (C) 2006-2010 John MacFarlane
+   Copyright   : Copyright (C) 2006-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index 3e0bd9976..e0428aaa8 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.RTF
-   Copyright   : Copyright (C) 2006-2010 John MacFarlane
+   Copyright   : Copyright (C) 2006-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index 604aac1c9..800e741a4 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE OverloadedStrings #-}
 {-
-Copyright (C) 2013 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2013-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.Shared
-   Copyright   : Copyright (C) 2013 John MacFarlane
+   Copyright   : Copyright (C) 2013-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index bf3df8035..8ac717bab 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE OverloadedStrings #-}
 {-
-Copyright (C) 2008-2010 John MacFarlane and Peter Wang
+Copyright (C) 2008-2014 John MacFarlane and Peter Wang
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.Texinfo
-   Copyright   : Copyright (C) 2008-2010 John MacFarlane and Peter Wang
+   Copyright   : Copyright (C) 2008-2014 John MacFarlane and Peter Wang
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
index 95aedf780..3a6982a01 100644
--- a/src/Text/Pandoc/Writers/Textile.hs
+++ b/src/Text/Pandoc/Writers/Textile.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Writers.Textile
-   Copyright   : Copyright (C) 2010 John MacFarlane
+   Copyright   : Copyright (C) 2010-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index c11af9a19..8000368aa 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.XML
-   Copyright   : Copyright (C) 2006-2010 John MacFarlane
+   Copyright   : Copyright (C) 2006-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
-- 
cgit v1.2.3


From 7760504bb26f215e7d0c57da843f1f1dcc8c1186 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Thu, 8 May 2014 17:01:58 +0200
Subject: Org reader: refactor #+BEGIN..#+END block parsing code

---
 src/Text/Pandoc/Readers/Org.hs | 122 +++++++++++++++++++++++++++--------------
 1 file changed, 80 insertions(+), 42 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index dba61dfe0..9df8ce0b3 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -50,7 +50,7 @@ import           Data.Char (isAlphaNum, toLower)
 import           Data.Default
 import           Data.List (intersperse, isPrefixOf, isSuffixOf)
 import qualified Data.Map as M
-import           Data.Maybe (listToMaybe, fromMaybe, isJust)
+import           Data.Maybe (fromMaybe, isJust)
 import           Data.Monoid (Monoid, mconcat, mempty, mappend)
 import           Network.HTTP (urlEncode)
 
@@ -162,7 +162,8 @@ popInlineCharStack = updateState $ \s ->
   s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
 
 surroundingEmphasisChar :: OrgParser [Char]
-surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
+surroundingEmphasisChar =
+  take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
 
 startEmphasisNewlinesCounting :: Int -> OrgParser ()
 startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
@@ -170,7 +171,7 @@ startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
 
 decEmphasisNewlinesCount :: OrgParser ()
 decEmphasisNewlinesCount = updateState $ \s ->
-      s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
+  s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
 
 newlinesCountWithinLimits :: OrgParser Bool
 newlinesCountWithinLimits = do
@@ -296,41 +297,60 @@ lookupInlinesAttr attr = try $ do
 -- Org Blocks (#+BEGIN_... / #+END_...)
 --
 
+type BlockProperties = (Int, String)  -- (Indentation, Block-Type)
+
 orgBlock :: OrgParser (F Blocks)
 orgBlock = try $ do
-  (indent, blockType, args) <- blockHeader
-  content <- rawBlockContent indent blockType
-  contentBlocks <- parseFromString parseBlocks (content ++ "\n")
-  let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ]
-  case blockType of
-    "comment" -> return mempty
-    "html"    -> returnF $ B.rawBlock "html" content
-    "latex"   -> returnF $ B.rawBlock "latex" content
-    "ascii"   -> returnF $ B.rawBlock "ascii" content
-    "example" -> returnF $ exampleCode content
-    "quote"   -> return  $ B.blockQuote <$> contentBlocks
-    "verse"   -> parseVerse content
-    "src"     -> codeBlockWithAttr classArgs content
-    _         -> return  $ B.divWith ("", [blockType], []) <$> contentBlocks
+  blockProp@(_, blkType) <- blockHeaderStart
+  ($ blockProp) $
+    case blkType of
+      "comment" -> withRaw'   (const mempty)
+      "html"    -> withRaw'   (return . (B.rawBlock blkType))
+      "latex"   -> withRaw'   (return . (B.rawBlock blkType))
+      "ascii"   -> withRaw'   (return . (B.rawBlock blkType))
+      "example" -> withRaw'   (return . exampleCode)
+      "quote"   -> withParsed (fmap B.blockQuote)
+      "verse"   -> verseBlock
+      "src"     -> codeBlock
+      _         -> withParsed (fmap $ divWithClass blkType)
+
+blockHeaderStart :: OrgParser (Int, String)
+blockHeaderStart = try $ (,) <$> indent <*> blockType
  where
-   parseVerse :: String -> OrgParser (F Blocks)
-   parseVerse cs =
-       fmap B.para . mconcat . intersperse (pure B.linebreak)
-       <$> mapM (parseFromString parseInlines) (lines cs)
-
-blockHeader :: OrgParser (Int, String, [String])
-blockHeader = (,,) <$> blockIndent
-                   <*> blockType
-                   <*> (skipSpaces *> blockArgs)
- where blockIndent = length <$> many spaceChar
-       blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter)
-       blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline
-
-codeBlockWithAttr :: [String] -> String -> OrgParser (F Blocks)
-codeBlockWithAttr classArgs content = do
-  identifier <- fromMaybe "" <$> lookupBlockAttribute "name"
-  caption <- lookupInlinesAttr "caption"
-  let codeBlck = B.codeBlockWith (identifier, classArgs, []) content
+  indent    = length      <$> many spaceChar
+  blockType = map toLower <$> (stringAnyCase "#+begin_" *> many orgArgWordChar)
+
+withRaw'   :: (String   -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
+withRaw'   f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp))
+
+withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
+withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp))
+
+ignHeaders :: OrgParser ()
+ignHeaders = (() <$ newline) <|> (() <$ anyLine)
+
+divWithClass :: String -> Blocks -> Blocks
+divWithClass cls = B.divWith ("", [cls], [])
+
+verseBlock :: BlockProperties -> OrgParser (F Blocks)
+verseBlock blkProp = try $ do
+  ignHeaders
+  content <- rawBlockContent blkProp
+  fmap B.para . mconcat . intersperse (pure B.linebreak)
+    <$> mapM (parseFromString parseInlines) (lines content)
+
+codeBlock :: BlockProperties -> OrgParser (F Blocks)
+codeBlock blkProp = do
+  skipSpaces
+  language      <- optionMaybe orgArgWord
+  (classes, kv) <- codeHeaderArgs
+  id'           <- fromMaybe "" <$> lookupBlockAttribute "name"
+  caption       <- lookupInlinesAttr "caption"
+  content       <- rawBlockContent blkProp
+  let attr      = ( id'
+                  , maybe id (\l -> (l:)) language $ classes
+                  , kv )
+  let codeBlck  = B.codeBlockWith attr content
   return $ maybe (pure codeBlck) (labelDiv codeBlck) caption
  where
    labelDiv blk value =
@@ -338,14 +358,21 @@ codeBlockWithAttr classArgs content = do
                                        <*> pure blk)
    labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
 
-rawBlockContent :: Int -> String -> OrgParser String
-rawBlockContent indent blockType =
+rawBlockContent :: BlockProperties -> OrgParser String
+rawBlockContent (indent, blockType) = try $
   unlines . map commaEscaped <$> manyTill indentedLine blockEnder
  where
-   indentedLine = try $ choice [ blankline         *> pure "\n"
-                               , indentWith indent *> anyLine
-                               ]
-   blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType)
+   indentedLine = try $
+     choice [ blankline         *> pure "\n"
+            , indentWith indent *> anyLine
+            ]
+   blockEnder = try $
+     indentWith indent *> stringAnyCase ("#+end_" <> blockType)
+
+parsedBlockContent :: BlockProperties -> OrgParser (F Blocks)
+parsedBlockContent blkProps = try $ do
+  raw <- rawBlockContent blkProps
+  parseFromString parseBlocks (raw ++ "\n")
 
 -- indent by specified number of spaces (or equiv. tabs)
 indentWith :: Int -> OrgParser String
@@ -356,6 +383,13 @@ indentWith num = do
      else choice [ try (count num (char ' '))
                  , try (char '\t' >> count (num - tabStop) (char ' ')) ]
 
+orgArgWord :: OrgParser String
+orgArgWord = many1 orgArgWordChar
+
+codeHeaderArgs :: OrgParser ([String], [(String, String)])
+codeHeaderArgs =
+  (\x -> (x, [])) <$> manyTill (many nonspaceChar <* skipSpaces) newline
+
 translateLang :: String -> String
 translateLang "C"          = "c"
 translateLang "C++"        = "cpp"
@@ -1002,9 +1036,13 @@ inlineCodeBlock = try $ do
   returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
  where enclosedByPair s e p = char s *> many1Till p (char e)
 
+-- | Prefix used for Rundoc classes and arguments.
+rundocPrefix :: String
+rundocPrefix = "rundoc-"
+
 -- | The class-name used to mark rundoc blocks.
 rundocBlockClass :: String
-rundocBlockClass = "rundoc-block"
+rundocBlockClass = rundocPrefix ++ "block"
 
 blockOption :: OrgParser (String, String)
 blockOption = try $ (,) <$> orgArgKey <*> orgArgValue
-- 
cgit v1.2.3


From 757c4f68f3f3cab99db9499936e3ae4775ebbddf Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 9 May 2014 18:07:37 +0200
Subject: Org reader: Support arguments for code blocks

The general form of source block headers
(`#+BEGIN_SRC <language> <switches> <header arguments>`) was not
recognized by the reader.  This patch adds support for the above form,
adds header arguments to the block's key-value pairs and marks the block
as a rundoc block if header arguments are present.

This closes #1286.
---
 src/Text/Pandoc/Readers/Org.hs | 98 ++++++++++++++++++++++++------------------
 1 file changed, 56 insertions(+), 42 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 9df8ce0b3..c05ac92d0 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -276,7 +276,7 @@ parseBlockAttributes = do
  where
    attribute :: OrgParser (String, String)
    attribute = try $ do
-         key <- metaLineStart *> many1Till (noneOf "\n\r") (char ':')
+         key <- metaLineStart *> many1Till nonspaceChar (char ':')
          val <- skipSpaces *> anyLine
          return (map toLower key, val)
 
@@ -342,16 +342,11 @@ verseBlock blkProp = try $ do
 codeBlock :: BlockProperties -> OrgParser (F Blocks)
 codeBlock blkProp = do
   skipSpaces
-  language      <- optionMaybe orgArgWord
-  (classes, kv) <- codeHeaderArgs
+  (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
   id'           <- fromMaybe "" <$> lookupBlockAttribute "name"
-  caption       <- lookupInlinesAttr "caption"
   content       <- rawBlockContent blkProp
-  let attr      = ( id'
-                  , maybe id (\l -> (l:)) language $ classes
-                  , kv )
-  let codeBlck  = B.codeBlockWith attr content
-  return $ maybe (pure codeBlck) (labelDiv codeBlck) caption
+  let codeBlck  = B.codeBlockWith ( id', classes, kv ) content
+  maybe (pure codeBlck) (labelDiv codeBlck) <$> lookupInlinesAttr "caption"
  where
    labelDiv blk value =
        B.divWith nullAttr <$> (mappend <$> labelledBlock value
@@ -383,12 +378,33 @@ indentWith num = do
      else choice [ try (count num (char ' '))
                  , try (char '\t' >> count (num - tabStop) (char ' ')) ]
 
+type SwitchOption = (Char, Maybe String)
+
 orgArgWord :: OrgParser String
 orgArgWord = many1 orgArgWordChar
 
+-- | Parse code block arguments
+-- TODO: We currently don't handle switches.
 codeHeaderArgs :: OrgParser ([String], [(String, String)])
-codeHeaderArgs =
-  (\x -> (x, [])) <$> manyTill (many nonspaceChar <* skipSpaces) newline
+codeHeaderArgs = try $ do
+  language   <- skipSpaces *> orgArgWord
+  _          <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar))
+  parameters <- manyTill blockOption newline
+  let pandocLang = translateLang language
+  return $
+    if hasRundocParameters parameters
+    then ( [ pandocLang, rundocBlockClass ]
+         , map toRundocAttrib (("language", language) : parameters)
+         )
+    else ([ pandocLang ], parameters)
+ where hasRundocParameters = not . null
+
+switch :: OrgParser SwitchOption
+switch = try $ simpleSwitch <|> lineNumbersSwitch
+ where
+   simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter)
+   lineNumbersSwitch = (\ls -> ('l', Just ls)) <$>
+                       (string "-l \"" *> many1Till nonspaceChar (char '"'))
 
 translateLang :: String -> String
 translateLang "C"          = "c"
@@ -401,6 +417,32 @@ translateLang "sh"         = "bash"
 translateLang "sqlite"     = "sql"
 translateLang cs = cs
 
+-- | Prefix used for Rundoc classes and arguments.
+rundocPrefix :: String
+rundocPrefix = "rundoc-"
+
+-- | The class-name used to mark rundoc blocks.
+rundocBlockClass :: String
+rundocBlockClass = rundocPrefix ++ "block"
+
+blockOption :: OrgParser (String, String)
+blockOption = try $ (,) <$> orgArgKey <*> orgArgValue
+
+orgArgKey :: OrgParser String
+orgArgKey = try $
+  skipSpaces *> char ':'
+             *> many1 orgArgWordChar
+
+orgArgValue :: OrgParser String
+orgArgValue = try $
+  skipSpaces *> many1 orgArgWordChar <* skipSpaces
+
+orgArgWordChar :: OrgParser Char
+orgArgWordChar = alphaNum <|> oneOf "-_"
+
+toRundocAttrib :: (String, String) -> (String, String)
+toRundocAttrib = first ("rundoc-" ++)
+
 commaEscaped :: String -> String
 commaEscaped (',':cs@('*':_))     = cs
 commaEscaped (',':cs@('#':'+':_)) = cs
@@ -425,7 +467,7 @@ drawer = try $ do
 
 drawerStart :: OrgParser String
 drawerStart = try $
-  skipSpaces *> drawerName <* skipSpaces <* newline
+  skipSpaces *> drawerName <* skipSpaces <* P.newline
  where drawerName = try $  char ':' *> validDrawerName <* char ':'
        validDrawerName =  stringAnyCase "PROPERTIES"
                           <|> stringAnyCase "LOGBOOK"
@@ -435,7 +477,7 @@ drawerLine = try anyLine
 
 drawerEnd :: OrgParser String
 drawerEnd = try $
-  skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
+  skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* P.newline
 
 
 --
@@ -446,7 +488,7 @@ drawerEnd = try $
 figure :: OrgParser (F Blocks)
 figure = try $ do
   (cap, nam) <- nameAndCaption
-  src <- skipSpaces *> selfTarget <* skipSpaces <* newline
+  src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline
   guard (isImageFilename src)
   return $ do
     cap' <- cap
@@ -1036,34 +1078,6 @@ inlineCodeBlock = try $ do
   returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
  where enclosedByPair s e p = char s *> many1Till p (char e)
 
--- | Prefix used for Rundoc classes and arguments.
-rundocPrefix :: String
-rundocPrefix = "rundoc-"
-
--- | The class-name used to mark rundoc blocks.
-rundocBlockClass :: String
-rundocBlockClass = rundocPrefix ++ "block"
-
-blockOption :: OrgParser (String, String)
-blockOption = try $ (,) <$> orgArgKey <*> orgArgValue
-
-orgArgKey :: OrgParser String
-orgArgKey = try $
-  skipSpaces *> char ':'
-             *> many1 orgArgWordChar
-             <* many1 spaceChar
-
-orgArgValue :: OrgParser String
-orgArgValue = try $
-  skipSpaces *> many1 orgArgWordChar
-             <* skipSpaces
-
-orgArgWordChar :: OrgParser Char
-orgArgWordChar = alphaNum <|> oneOf "-_"
-
-toRundocAttrib :: (String, String) -> (String, String)
-toRundocAttrib = first ("rundoc-" ++)
-
 emph      :: OrgParser (F Inlines)
 emph      = fmap B.emph         <$> emphasisBetween '/'
 
-- 
cgit v1.2.3


From 07694b30184bcf2ed0e2998016df394f47a1996f Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 9 May 2014 18:23:23 +0200
Subject: Org reader: Fix parsing of blank lines within blocks

Blank lines were parsed as two newlines instead of just one.
Thanks to Xiao Hanyu (@xiaohanyu) for pointing this out.
---
 src/Text/Pandoc/Readers/Org.hs | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index c05ac92d0..0f218d43f 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -357,12 +357,8 @@ rawBlockContent :: BlockProperties -> OrgParser String
 rawBlockContent (indent, blockType) = try $
   unlines . map commaEscaped <$> manyTill indentedLine blockEnder
  where
-   indentedLine = try $
-     choice [ blankline         *> pure "\n"
-            , indentWith indent *> anyLine
-            ]
-   blockEnder = try $
-     indentWith indent *> stringAnyCase ("#+end_" <> blockType)
+   indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine)
+   blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType)
 
 parsedBlockContent :: BlockProperties -> OrgParser (F Blocks)
 parsedBlockContent blkProps = try $ do
-- 
cgit v1.2.3


From c5fd631b550a3b05b60de1684c80387bc46a88cc Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 10 May 2014 11:25:20 +0200
Subject: Org reader: Fix block parameter reader, relax constraints

The reader produced wrong results for block containing non-letter chars
in their parameter arguments.  This patch relaxes constraints in that it
allows block header arguments to contain any non-space character (except
for ']' for inline blocks).

Thanks to Xiao Hanyu for noticing this.
---
 src/Text/Pandoc/Readers/Org.hs | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 0f218d43f..2e4a29beb 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -318,7 +318,7 @@ blockHeaderStart :: OrgParser (Int, String)
 blockHeaderStart = try $ (,) <$> indent <*> blockType
  where
   indent    = length      <$> many spaceChar
-  blockType = map toLower <$> (stringAnyCase "#+begin_" *> many orgArgWordChar)
+  blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord)
 
 withRaw'   :: (String   -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
 withRaw'   f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp))
@@ -422,16 +422,23 @@ rundocBlockClass :: String
 rundocBlockClass = rundocPrefix ++ "block"
 
 blockOption :: OrgParser (String, String)
-blockOption = try $ (,) <$> orgArgKey <*> orgArgValue
+blockOption = try $ (,) <$> orgArgKey <*> orgParamValue
+
+inlineBlockOption :: OrgParser (String, String)
+inlineBlockOption = try $ (,) <$> orgArgKey <*> orgInlineParamValue
 
 orgArgKey :: OrgParser String
 orgArgKey = try $
   skipSpaces *> char ':'
              *> many1 orgArgWordChar
 
-orgArgValue :: OrgParser String
-orgArgValue = try $
-  skipSpaces *> many1 orgArgWordChar <* skipSpaces
+orgParamValue :: OrgParser String
+orgParamValue = try $
+  skipSpaces *> many1 (noneOf "\t\n\r ") <* skipSpaces
+
+orgInlineParamValue :: OrgParser String
+orgInlineParamValue = try $
+  skipSpaces *> many1 (noneOf "\t\n\r ]") <* skipSpaces
 
 orgArgWordChar :: OrgParser Char
 orgArgWordChar = alphaNum <|> oneOf "-_"
@@ -1067,7 +1074,7 @@ inlineCodeBlock :: OrgParser (F Inlines)
 inlineCodeBlock = try $ do
   string "src_"
   lang <- many1 orgArgWordChar
-  opts <- option [] $ enclosedByPair '[' ']' blockOption
+  opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
   inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
   let attrClasses = [translateLang lang, rundocBlockClass]
   let attrKeyVal  = map toRundocAttrib (("language", lang) : opts)
-- 
cgit v1.2.3


From 009260647612238b5af964afcbeb452001f2ab0c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 10 May 2014 23:26:32 -0700
Subject: LaTeX reader:  Don't error on "%foo" with no newline.

---
 src/Text/Pandoc/Readers/LaTeX.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index bfafea1f6..8476c8636 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -125,7 +125,7 @@ comment :: LP ()
 comment = do
   char '%'
   skipMany (satisfy (/='\n'))
-  newline
+  optional newline
   return ()
 
 bgroup :: LP ()
-- 
cgit v1.2.3


From 113a32daa8a628a1f6f166c6b43d07ac005b8d42 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sun, 11 May 2014 15:02:48 +0200
Subject: Process: Fix minor typo in pipeProcess' docs

Replace fullstop with comma, adjust capitalisation.
---
 src/Text/Pandoc/Process.hs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs
index 9c8853366..19872b405 100644
--- a/src/Text/Pandoc/Process.hs
+++ b/src/Text/Pandoc/Process.hs
@@ -47,7 +47,7 @@ terminates, and then returns the 'ExitCode' of the process,
 the standard output, and the standard error.
 
 If an asynchronous exception is thrown to the thread executing
-@readProcessWithExitCode@. The forked process will be terminated and
+@readProcessWithExitCode@, the forked process will be terminated and
 @readProcessWithExitCode@ will wait (block) until the process has been
 terminated.
 -}
@@ -102,4 +102,3 @@ forkWait a = do
   res <- newEmptyMVar
   _ <- mask $ \restore -> forkIO $ try (restore a) >>= putMVar res
   return (takeMVar res >>= either (\ex -> throwIO (ex :: SomeException)) return)
-
-- 
cgit v1.2.3


From a8319d133908f3c39834984e5e11991b166c37b7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 11 May 2014 22:52:29 -0700
Subject: LaTeX reader:  set `bibliography` in metadata from `\bibliography`
 cmd.

---
 src/Text/Pandoc/Readers/LaTeX.hs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 8476c8636..4b9e424d9 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -307,6 +307,8 @@ blockCommands = M.fromList $
   , ("caption", tok >>= setCaption)
   , ("PandocStartInclude", startInclude)
   , ("PandocEndInclude", endInclude)
+  , ("bibliography", mempty <$ (skipopts *> braced >>=
+                                addMeta "bibliography" . splitBibs))
   ] ++ map ignoreBlocks
   -- these commands will be ignored unless --parse-raw is specified,
   -- in which case they will appear as raw latex blocks
@@ -314,7 +316,7 @@ blockCommands = M.fromList $
     -- newcommand, etc. should be parsed by macro, but we need this
     -- here so these aren't parsed as inline commands to ignore
   , "special", "pdfannot", "pdfstringdef"
-  , "bibliography", "bibliographystyle"
+  , "bibliographystyle"
   , "maketitle", "makeindex", "makeglossary"
   , "addcontentsline", "addtocontents", "addtocounter"
      -- \ignore{} is used conventionally in literate haskell for definitions
@@ -329,6 +331,9 @@ addMeta :: ToMetaValue a => String -> a -> LP ()
 addMeta field val = updateState $ \st ->
   st{ stateMeta = addMetaField field val $ stateMeta st }
 
+splitBibs :: String -> [Inlines]
+splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')
+
 setCaption :: Inlines -> LP Blocks
 setCaption ils = do
   updateState $ \st -> st{ stateCaption = Just ils }
-- 
cgit v1.2.3


From 2348f07b11700e685cd194c93c5891d2c288dbc0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Mon, 12 May 2014 13:05:42 -0700
Subject: Shared addMetaField:  if old and new values both lists, concatenate.

---
 src/Text/Pandoc/Shared.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 31c490af6..4f506b5a6 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -564,8 +564,10 @@ addMetaField :: ToMetaValue a
              -> Meta
 addMetaField key val (Meta meta) =
   Meta $ M.insertWith combine key (toMetaValue val) meta
-  where combine newval (MetaList xs) = MetaList (xs ++ [newval])
+  where combine newval (MetaList xs) = MetaList (xs ++ tolist newval)
         combine newval x             = MetaList [x, newval]
+        tolist (MetaList ys)         = ys
+        tolist y                     = [y]
 
 -- | Create 'Meta' from old-style title, authors, date.  This is
 -- provided to ease the transition from the old API.
-- 
cgit v1.2.3


From aa019448d6eef010922ea778597a2b7a0f3bd58f Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Mon, 12 May 2014 13:06:06 -0700
Subject: LaTeX reader:  Support `\addbibresource`.

---
 src/Text/Pandoc/Readers/LaTeX.hs | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 4b9e424d9..6f870318f 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -309,6 +309,8 @@ blockCommands = M.fromList $
   , ("PandocEndInclude", endInclude)
   , ("bibliography", mempty <$ (skipopts *> braced >>=
                                 addMeta "bibliography" . splitBibs))
+  , ("addbibresource", mempty <$ (skipopts *> braced >>=
+                                addMeta "bibliography" . splitBibs))
   ] ++ map ignoreBlocks
   -- these commands will be ignored unless --parse-raw is specified,
   -- in which case they will appear as raw latex blocks
-- 
cgit v1.2.3


From 9df589b9c5a4f2dcb19445239dfae41b54625330 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Wed, 14 May 2014 14:45:37 +0200
Subject: Introduce class HasLastStrPosition, generalize functions

Both `ParserState` and `OrgParserState` keep track of the parser position at
which the last string ended.  This patch introduces a new class
`HasLastStrPosition` and makes the above types instances of that class.  This
enables the generalization of functions updating the state or checking if one
is right after a string.
---
 src/Text/Pandoc/Parsing.hs          | 32 +++++++++++++++++++++++---------
 src/Text/Pandoc/Readers/Markdown.hs | 11 +++--------
 src/Text/Pandoc/Readers/Org.hs      | 11 ++++-------
 3 files changed, 30 insertions(+), 24 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index d1e55cbc4..344f6c7ba 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -54,7 +54,6 @@ module Text.Pandoc.Parsing ( (>>~),
                              withRaw,
                              escaped,
                              characterReference,
-                             updateLastStrPos,
                              anyOrderedListMarker,
                              orderedListMarker,
                              charRef,
@@ -66,11 +65,14 @@ module Text.Pandoc.Parsing ( (>>~),
                              testStringWith,
                              guardEnabled,
                              guardDisabled,
+                             updateLastStrPos,
+                             notAfterString,
                              ParserState (..),
                              HasReaderOptions (..),
                              HasHeaderMap (..),
                              HasIdentifierList (..),
                              HasMacros (..),
+                             HasLastStrPosition (..),
                              defaultParserState,
                              HeaderType (..),
                              ParserContext (..),
@@ -904,6 +906,14 @@ instance HasMacros ParserState where
   extractMacros        = stateMacros
   updateMacros f st    = st{ stateMacros = f $ stateMacros st }
 
+class HasLastStrPosition st where
+  setLastStrPos  :: SourcePos -> st -> st
+  getLastStrPos  :: st -> Maybe SourcePos
+
+instance HasLastStrPosition ParserState where
+  setLastStrPos pos st = st{ stateLastStrPos = Just pos }
+  getLastStrPos st     = stateLastStrPos st
+
 defaultParserState :: ParserState
 defaultParserState =
     ParserState { stateOptions         = def,
@@ -938,6 +948,17 @@ guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext
 guardDisabled :: HasReaderOptions st => Extension -> Parser s st ()
 guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext
 
+-- | Update the position on which the last string ended.
+updateLastStrPos :: HasLastStrPosition st => Parser s st ()
+updateLastStrPos = getPosition >>= updateState . setLastStrPos
+
+-- | Whether we are right after the end of a string.
+notAfterString :: HasLastStrPosition st => Parser s st Bool
+notAfterString = do
+  pos <- getPosition
+  st  <- getState
+  return $ getLastStrPos st /= Just pos
+
 data HeaderType
     = SingleHeader Char  -- ^ Single line of characters underneath
     | DoubleHeader Char  -- ^ Lines of characters above and below
@@ -1049,17 +1070,11 @@ charOrRef cs =
                        guard (c `elem` cs)
                        return c)
 
-updateLastStrPos :: Parser [Char] ParserState ()
-updateLastStrPos = getPosition >>= \p ->
-  updateState $ \s -> s{ stateLastStrPos = Just p }
-
 singleQuoteStart :: Parser [Char] ParserState ()
 singleQuoteStart = do
   failIfInQuoteContext InSingleQuote
-  pos <- getPosition
-  st <- getState
   -- single quote start can't be right after str
-  guard $ stateLastStrPos st /= Just pos
+  guard =<< notAfterString
   () <$ charOrRef "'\8216\145"
 
 singleQuoteEnd :: Parser [Char] st ()
@@ -1156,4 +1171,3 @@ applyMacros' target = do
      then do macros <- extractMacros `fmap` getState
              return $ applyMacros macros target
      else return target
-
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index d1637b701..1ac98e94c 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1474,9 +1474,7 @@ strongOrEmph =  enclosure '*' <|> (checkIntraword >> enclosure '_')
   where  checkIntraword = do
            exts <- getOption readerExtensions
            when (Ext_intraword_underscores `Set.member` exts) $ do
-             pos <- getPosition
-             lastStrPos <- stateLastStrPos <$> getState
-             guard $ lastStrPos /= Just pos
+             guard =<< notAfterString
 
 -- | Parses a list of inlines between start and end delimiters.
 inlinesBetween :: (Show b)
@@ -1518,8 +1516,7 @@ nonEndline = satisfy (/='\n')
 str :: MarkdownParser (F Inlines)
 str = do
   result <- many1 alphaNum
-  pos <- getPosition
-  updateState $ \s -> s{ stateLastStrPos = Just pos }
+  updateLastStrPos
   let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
   isSmart <- getOption readerSmart
   if isSmart
@@ -1821,9 +1818,7 @@ citeKey :: MarkdownParser (Bool, String)
 citeKey = try $ do
   -- make sure we're not right after an alphanumeric,
   -- since foo@bar.baz is probably an email address
-  lastStrPos <- stateLastStrPos <$> getState
-  pos <- getPosition
-  guard $ lastStrPos /= Just pos
+  guard =<< notAfterString
   suppress_author <- option False (char '-' >> return True)
   char '@'
   first <- letter <|> char '_'
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 2e4a29beb..5dbcaee98 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -105,6 +105,10 @@ instance HasMeta OrgParserState where
   deleteMeta field st =
     st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
 
+instance HasLastStrPosition OrgParserState where
+  getLastStrPos = orgStateLastStrPos
+  setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
+
 instance Default OrgParserState where
   def = defaultOrgParserState
 
@@ -1274,13 +1278,6 @@ afterEmphasisPreChar = do
   lastPrePos <- orgStateLastPreCharPos <$> getState
   return . fromMaybe True $ (== pos) <$> lastPrePos
 
--- | Whether we are right after the end of a string
-notAfterString :: OrgParser Bool
-notAfterString = do
-  pos <- getPosition
-  lastStrPos <- orgStateLastStrPos <$> getState
-  return $ lastStrPos /= Just pos
-
 -- | Whether the parser is right after a forbidden border char
 notAfterForbiddenBorderChar :: OrgParser Bool
 notAfterForbiddenBorderChar = do
-- 
cgit v1.2.3


From 2423f9e6b180bc6b04d222a4b574de995d296f80 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Wed, 14 May 2014 14:58:05 +0200
Subject: Move `citeKey` from Readers.Markdown to Parsing

The function can be used by other readers, so it is made accessible for
all parsers.
---
 src/Text/Pandoc/Parsing.hs          | 13 +++++++++++++
 src/Text/Pandoc/Readers/Markdown.hs | 14 --------------
 2 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 344f6c7ba..4cd6591c0 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -94,6 +94,7 @@ module Text.Pandoc.Parsing ( (>>~),
                              apostrophe,
                              dash,
                              nested,
+                             citeKey,
                              macro,
                              applyMacros',
                              Parser,
@@ -1144,6 +1145,18 @@ nested p = do
   updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
   return res
 
+citeKey :: HasLastStrPosition st => Parser [Char] st (Bool, String)
+citeKey = try $ do
+  guard =<< notAfterString
+  suppress_author <- option False (char '-' *> return True)
+  char '@'
+  firstChar <- letter <|> char '_'
+  let regchar = satisfy (\c -> isAlphaNum c || c == '_')
+  let internal p = try $ p <* lookAhead regchar
+  rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
+  let key = firstChar:rest
+  return (suppress_author, key)
+
 --
 -- Macros
 --
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 1ac98e94c..5129bc2e3 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1814,20 +1814,6 @@ normalCite = try $ do
   char ']'
   return citations
 
-citeKey :: MarkdownParser (Bool, String)
-citeKey = try $ do
-  -- make sure we're not right after an alphanumeric,
-  -- since foo@bar.baz is probably an email address
-  guard =<< notAfterString
-  suppress_author <- option False (char '-' >> return True)
-  char '@'
-  first <- letter <|> char '_'
-  let regchar = satisfy (\c -> isAlphaNum c || c == '_')
-  let internal p = try $ p >>~ lookAhead regchar
-  rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
-  let key = first:rest
-  return (suppress_author, key)
-
 suffix :: MarkdownParser (F Inlines)
 suffix = try $ do
   hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
-- 
cgit v1.2.3


From ceeb701c254c6dc4c054e10dd151d9ef6f751ad7 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Wed, 14 May 2014 14:49:30 +0200
Subject: Org reader: support Pandocs citation extension

Citations are defined via the "normal citation" syntax used in markdown,
with the sole difference that newlines are not allowed between "[...]".
This is for consistency, as org-mode generally disallows newlines
between square brackets.

The extension is turned on by default and can be turned off via the
default syntax-extension mechanism, i.e. by specifying "org-citation" as
the input format.
Move `citeKey` from Readers.Markdown into Parsing

The function can be used by other readers, so it is made accessible for
all parsers.
---
 src/Text/Pandoc.hs             |  2 +-
 src/Text/Pandoc/Readers/Org.hs | 55 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 54 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index dd5bc18f6..130338f0e 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -275,6 +275,7 @@ getDefaultExtensions "markdown_mmd" = multimarkdownExtensions
 getDefaultExtensions "markdown_github" = githubMarkdownExtensions
 getDefaultExtensions "markdown"        = pandocExtensions
 getDefaultExtensions "plain"           = pandocExtensions
+getDefaultExtensions "org"             = Set.fromList [Ext_citations]
 getDefaultExtensions "textile"         = Set.fromList [Ext_auto_identifiers, Ext_raw_tex]
 getDefaultExtensions _                 = Set.fromList [Ext_auto_identifiers]
 
@@ -319,4 +320,3 @@ readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy
 
 writeJSON :: WriterOptions -> Pandoc -> String
 writeJSON _ = UTF8.toStringLazy . encode
-
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 5dbcaee98..86dda2732 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -869,6 +869,7 @@ inline :: OrgParser (F Inlines)
 inline =
   choice [ whitespace
          , linebreak
+         , cite
          , footnote
          , linkOrImage
          , anchor
@@ -933,6 +934,51 @@ endline = try $ do
   updateLastPreCharPos
   return . return $ B.space
 
+cite :: OrgParser (F Inlines)
+cite = try $ do
+  guardEnabled Ext_citations
+  (cs, raw) <- withRaw normalCite
+  return $ (flip B.cite (B.text raw)) <$> cs
+
+normalCite :: OrgParser (F [Citation])
+normalCite = try $  char '['
+                 *> skipSpaces
+                 *> citeList
+                 <* skipSpaces
+                 <* char ']'
+
+citeList :: OrgParser (F [Citation])
+citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
+
+citation :: OrgParser (F Citation)
+citation = try $ do
+  pref <- prefix
+  (suppress_author, key) <- citeKey
+  suff <- suffix
+  return $ do
+    x <- pref
+    y <- suff
+    return $ Citation{ citationId      = key
+                     , citationPrefix  = B.toList x
+                     , citationSuffix  = B.toList y
+                     , citationMode    = if suppress_author
+                                            then SuppressAuthor
+                                            else NormalCitation
+                     , citationNoteNum = 0
+                     , citationHash    = 0
+                     }
+ where
+   prefix = trimInlinesF . mconcat <$>
+            manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
+   suffix = try $ do
+     hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
+     skipSpaces
+     rest <- trimInlinesF . mconcat <$>
+             many (notFollowedBy (oneOf ";]") *> inline)
+     return $ if hasSpace
+              then (B.space <>) <$> rest
+              else rest
+
 footnote :: OrgParser (F Inlines)
 footnote = try $ inlineNote <|> referencedNote
 
@@ -1007,7 +1053,7 @@ selfTarget :: OrgParser String
 selfTarget = try $ char '[' *> linkTarget <* char ']'
 
 linkTarget :: OrgParser String
-linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]")
+linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
 
 applyCustomLinkFormat :: String -> OrgParser (F String)
 applyCustomLinkFormat link = do
@@ -1083,7 +1129,12 @@ inlineCodeBlock = try $ do
   let attrClasses = [translateLang lang, rundocBlockClass]
   let attrKeyVal  = map toRundocAttrib (("language", lang) : opts)
   returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
- where enclosedByPair s e p = char s *> many1Till p (char e)
+
+enclosedByPair :: Char          -- ^ opening char
+               -> Char          -- ^ closing char
+               -> OrgParser a   -- ^ parser
+               -> OrgParser [a]
+enclosedByPair s e p = char s *> many1Till p (char e)
 
 emph      :: OrgParser (F Inlines)
 emph      = fmap B.emph         <$> emphasisBetween '/'
-- 
cgit v1.2.3


From c5c9b0d2890699ee8fcdbb660662957f8efad319 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 15 May 2014 10:11:48 -0700
Subject: EPUB writer:  Fixed regression on cover image.

In 1.12.4 and 1.12.4.2, the cover image would not appear properly,
because the metadata id was not correct.

This was introduced by the fix to #1254.

Now we derive the id from the actual cover image filename,
which we preserve rather than using "cover-image."
---
 src/Text/Pandoc/Writers/EPUB.hs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 893ec3be9..4d2a39846 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -333,7 +333,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
                 case epubCoverImage metadata of
                      Nothing   -> return ([],[])
                      Just img  -> do
-                       let coverImage = "cover-image" ++ takeExtension img
+                       let coverImage = "media/" ++ takeFileName img
                        let cpContent = renderHtml $ writeHtml opts'
                                (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"])
                        imgContent <- B.readFile img
@@ -561,8 +561,8 @@ writeEPUB opts doc@(Pandoc meta _) = do
                               ,("content", "0")] $ ()
              ] ++ case epubCoverImage metadata of
                         Nothing  -> []
-                        Just _   -> [unode "meta" ! [("name","cover"),
-                                            ("content","cover-image")] $ ()]
+                        Just img -> [unode "meta" ! [("name","cover"),
+                                            ("content", toId img)] $ ()]
           , unode "docTitle" $ unode "text" $ plainTitle
           , unode "navMap" $
               tpNode : evalState (mapM (navPointNode navMapFormatter) secs) 1
@@ -657,8 +657,8 @@ metadataElement version md currentTime =
         coverageNodes = maybe [] (dcTag' "coverage") $ epubCoverage md
         rightsNodes = maybe [] (dcTag' "rights") $ epubRights md
         coverImageNodes = maybe []
-            (const $ [unode "meta" !  [("name","cover"),
-                                       ("content","cover-image")] $ ()])
+            (\img -> [unode "meta" !  [("name","cover"),
+                                       ("content",toId img)] $ ()])
             $ epubCoverImage md
         modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $
                (showDateTimeISO8601 currentTime) | version == EPUB3 ]
-- 
cgit v1.2.3


From ee8c8da8ccfc7e3eb33679fd8a3a465766f9d5f7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 18 May 2014 22:04:39 -0700
Subject: Removed dependency on conduit.

* http-conduit flag is now https.
* Instead of http-conduit, we depend on http-client and http-client-tls.
---
 src/Text/Pandoc/Shared.hs | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 4f506b5a6..d8cbe46d9 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -118,11 +118,13 @@ import System.FilePath ( joinPath, splitDirectories )
 #else
 import Paths_pandoc (getDataFileName)
 #endif
-#ifdef HTTP_CONDUIT
+#ifdef HTTP_CLIENT
 import Data.ByteString.Lazy (toChunks)
-import Network.HTTP.Conduit (httpLbs, parseUrl, withManager,
-                             responseBody, responseHeaders, addProxy,
-                             Request(port,host))
+import Network.HTTP.Client (httpLbs, parseUrl, withManager,
+                            responseBody, responseHeaders,
+                            Request(port,host))
+import Network.HTTP.Client.Internal (addProxy)
+import Network.HTTP.Client.TLS (tlsManagerSettings)
 import System.Environment (getEnv)
 import Network.HTTP.Types.Header ( hContentType)
 import Network (withSocketsDo)
@@ -665,7 +667,7 @@ openURL u
     let mime     = takeWhile (/=',') $ drop 5 u
         contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u
     in  return $ Right (decodeLenient contents, Just mime)
-#ifdef HTTP_CONDUIT
+#ifdef HTTP_CLIENT
   | otherwise = withSocketsDo $ E.try $ do
      req <- parseUrl u
      (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy"
@@ -674,7 +676,7 @@ openURL u
                      Right pr -> case parseUrl pr of
                                       Just r  -> addProxy (host r) (port r) req
                                       Nothing -> req
-     resp <- withManager $ httpLbs req'
+     resp <- withManager tlsManagerSettings $ httpLbs req'
      return (BS.concat $ toChunks $ responseBody resp,
              UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
 #else
-- 
cgit v1.2.3


From 8d04c821aaa8a96803126140b58b8aef02d85906 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 19 May 2014 09:45:00 -0700
Subject: Avoid `import Prelude hiding (catch)`.

See #1309.
---
 src/Text/Pandoc/UTF8.hs         |  3 +--
 src/Text/Pandoc/Writers/EPUB.hs | 10 +++-------
 2 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index 33c9ec1c5..cf25de85b 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -51,8 +51,7 @@ import System.IO hiding (readFile, writeFile, getContents,
 #if MIN_VERSION_base(4,6,0)
 import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn)
 #else
-import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn,
-                       catch)
+import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn)
 #endif
 import qualified System.IO as IO
 import qualified Data.ByteString.Char8 as B
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 4d2a39846..9514e87c9 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -59,11 +59,7 @@ import Text.Pandoc.Writers.Markdown ( writePlain )
 import Data.Char ( toLower, isDigit, isAlphaNum )
 import Network.URI ( unEscapeString )
 import Text.Pandoc.MIME (getMimeType)
-#if MIN_VERSION_base(4,6,0)
-#else
-import Prelude hiding (catch)
-#endif
-import Control.Exception (catch, SomeException)
+import qualified Control.Exception as E
 import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
 import Text.HTML.TagSoup
 
@@ -153,10 +149,10 @@ getEPUBMetadata opts meta = do
           then case lookup "lang" (writerVariables opts) of
                      Just x  -> return m{ epubLanguage = x }
                      Nothing -> do
-                       localeLang <- catch (liftM
+                       localeLang <- E.catch (liftM
                             (map (\c -> if c == '_' then '-' else c) .
                              takeWhile (/='.')) $ getEnv "LANG")
-                          (\e -> let _ = (e :: SomeException) in return "en-US")
+                          (\e -> let _ = (e :: E.SomeException) in return "en-US")
                        return m{ epubLanguage = localeLang }
           else return m
   let fixDate m =
-- 
cgit v1.2.3


From 3c77ab98bf9a055237a69be48001f5c6ef1d64ca Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 19 May 2014 12:52:25 -0700
Subject: EPUB writer:  Handle multiple dates with OPF `event` attributes.

Note: in EPUB3 we can have only one dc:date, so only the first
one is used.
---
 src/Text/Pandoc/Writers/EPUB.hs | 47 +++++++++++++++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 9 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 9514e87c9..b6687c330 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -72,7 +72,7 @@ data Chapter = Chapter (Maybe [Int]) [Block]
 data EPUBMetadata = EPUBMetadata{
     epubIdentifier         :: [Identifier]
   , epubTitle              :: [Title]
-  , epubDate               :: String
+  , epubDate               :: [Date]
   , epubLanguage           :: String
   , epubCreator            :: [Creator]
   , epubContributor        :: [Creator]
@@ -93,6 +93,11 @@ data Stylesheet = StylesheetPath FilePath
                 | StylesheetContents String
                 deriving Show
 
+data Date = Date{
+    dateText               :: String
+  , dateEvent              :: Maybe String
+  } deriving Show
+
 data Creator = Creator{
     creatorText            :: String
   , creatorRole            :: Maybe String
@@ -159,7 +164,9 @@ getEPUBMetadata opts meta = do
        if null (epubDate m)
           then do
             currentTime <- getCurrentTime
-            return $ m{ epubDate = showDateTimeISO8601 currentTime }
+            return $ m{ epubDate = [ Date{
+                             dateText = showDateTimeISO8601 currentTime
+                           , dateEvent = Nothing } ] }
           else return m
   let addAuthor m =
        if any (\c -> creatorRole c == Just "aut") $ epubCreator m
@@ -183,8 +190,10 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md
                  , titleFileAs = getAttr "file-as"
                  , titleType = getAttr "type"
                  } : epubTitle md }
-  | name == "date" = md{ epubDate = fromMaybe "" $ normalizeDate'
-                                                 $ strContent e }
+  | name == "date" = md{ epubDate =
+             Date{ dateText = fromMaybe "" $ normalizeDate' $ strContent e
+                 , dateEvent = getAttr "event"
+                 } : epubDate md }
   | name == "language" = md{ epubLanguage = strContent e }
   | name == "creator" = md{ epubCreator =
               Creator{ creatorText = strContent e
@@ -249,6 +258,16 @@ getCreator s meta = getList s meta handleMetaValue
                   , creatorRole = metaValueToString <$> M.lookup "role" m }
         handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing
 
+getDate :: String -> Meta -> [Date]
+getDate s meta = getList s meta handleMetaValue
+  where handleMetaValue (MetaMap m) =
+           Date{ dateText = maybe "" id $
+                   M.lookup "text" m >>= normalizeDate' . metaValueToString
+               , dateEvent = metaValueToString <$> M.lookup "event" m }
+        handleMetaValue mv = Date { dateText = maybe ""
+                                      id $ normalizeDate' $ metaValueToString mv
+                                  , dateEvent = Nothing }
+
 simpleList :: String -> Meta -> [String]
 simpleList s meta =
   case lookupMeta s meta of
@@ -278,8 +297,7 @@ metadataFromMeta opts meta = EPUBMetadata{
     }
   where identifiers = getIdentifier meta
         titles = getTitle meta
-        date = fromMaybe "" $
-              (metaValueToString <$> lookupMeta "date" meta) >>= normalizeDate'
+        date = getDate "date" meta
         language = maybe "" metaValueToString $
            lookupMeta "language" meta `mplus` lookupMeta "lang" meta
         creators = getCreator "creator" meta
@@ -637,7 +655,14 @@ metadataElement version md currentTime =
         identifierNodes = withIds "epub-id" toIdentifierNode $
                           epubIdentifier md
         titleNodes = withIds "epub-title" toTitleNode $ epubTitle md
-        dateNodes = dcTag' "date" $ epubDate md
+        dateNodes = if version == EPUB2
+                       then withIds "epub-date" toDateNode $ epubDate md
+                       else -- epub3 allows only one dc:date
+                            -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dcdate
+                            case epubDate md of
+                                 [] -> []
+                                 (x:_) -> [dcNode "date" ! [("id","epub-date")]
+                                            $ dateText x]
         languageNodes = [dcTag "language" $ epubLanguage md]
         creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $
                        epubCreator md
@@ -671,7 +696,7 @@ metadataElement version md currentTime =
                 (schemeToOnix `fmap` scheme)
         toCreatorNode s id' creator
           | version == EPUB2 = [dcNode s !
-             ([("id",id')] ++
+             (("id",id') :
               maybe [] (\x -> [("opf:file-as",x)]) (creatorFileAs creator) ++
               maybe [] (\x -> [("opf:role",x)])
                (creatorRole creator >>= toRelator)) $ creatorText creator]
@@ -685,7 +710,7 @@ metadataElement version md currentTime =
                    (creatorRole creator >>= toRelator)
         toTitleNode id' title
           | version == EPUB2 = [dcNode "title" !
-             ([("id",id')] ++
+             (("id",id') :
               maybe [] (\x -> [("opf:file-as",x)]) (titleFileAs title) ++
               maybe [] (\x -> [("opf:title-type",x)]) (titleType title)) $
               titleText title]
@@ -697,6 +722,10 @@ metadataElement version md currentTime =
               maybe [] (\x -> [unode "meta" !
                    [("refines",'#':id'),("property","title-type")] $ x])
                    (titleType title)
+        toDateNode id' date = [dcNode "date" !
+             (("id",id') :
+                maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $
+                 dateText date]
         schemeToOnix "ISBN-10" = "02"
         schemeToOnix "GTIN-13" = "03"
         schemeToOnix "UPC"     = "04"
-- 
cgit v1.2.3


From 3238a2f9191b83864abd682261634a603ec89056 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Tue, 20 May 2014 22:29:21 +0200
Subject: Org reader: support for inline LaTeX

Inline LaTeX is now accepted and parsed by the org-mode reader.  Both,
math symbols (like \tau) and LaTeX commands (like \cite{Coffee}), can be
used without any further escaping.
---
 src/Text/Pandoc/Readers/LaTeX.hs |  1 +
 src/Text/Pandoc/Readers/Org.hs   | 32 +++++++++++++++++++++++++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 6f870318f..7fc587882 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -31,6 +31,7 @@ Conversion of LaTeX to 'Pandoc' document.
 module Text.Pandoc.Readers.LaTeX ( readLaTeX,
                                    rawLaTeXInline,
                                    rawLaTeXBlock,
+                                   inlineCommand,
                                    handleIncludes
                                  ) where
 
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 86dda2732..c3ea8d7c2 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -39,12 +39,15 @@ import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
                                             , newline, orderedListMarker
                                             , parseFromString
                                             , updateLastStrPos )
+import           Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
 import           Text.Pandoc.Shared (compactify', compactify'DL)
+import           Text.Parsec.Pos (updatePosString)
+import           Text.TeXMath (texMathToPandoc, DisplayType(..))
 
 import           Control.Applicative ( Applicative, pure
                                      , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
 import           Control.Arrow (first)
-import           Control.Monad (foldM, guard, liftM, liftM2, mzero, when)
+import           Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
 import           Control.Monad.Reader (Reader, runReader, ask, asks)
 import           Data.Char (isAlphaNum, toLower)
 import           Data.Default
@@ -886,6 +889,7 @@ inline =
          , verbatim
          , subscript
          , superscript
+         , inlineLaTeX
          , symbol
          ] <* (guard =<< newlinesCountWithinLimits)
   <?> "inline"
@@ -1351,3 +1355,29 @@ simpleSubOrSuperString = try $
          , mappend <$> option [] ((:[]) <$> oneOf "+-")
                    <*> many1 alphaNum
          ]
+
+inlineLaTeX :: OrgParser (F Inlines)
+inlineLaTeX = try $ do
+  cmd <- inlineLaTeXCommand
+  maybe mzero returnF $ parseAsMath cmd `mplus` parseAsInlineLaTeX cmd
+ where
+   parseAsMath :: String -> Maybe Inlines
+   parseAsMath cs = maybeRight $ B.fromList <$> texMathToPandoc DisplayInline cs
+
+   parseAsInlineLaTeX :: String -> Maybe Inlines
+   parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs
+
+   state :: ParserState
+   state = def{ stateOptions = def{ readerParseRaw = True }}
+
+maybeRight :: Either a b -> Maybe b
+maybeRight = either (const Nothing) Just
+
+inlineLaTeXCommand :: OrgParser String
+inlineLaTeXCommand = try $ do
+  rest <- getInput
+  pos <- getPosition
+  case runParser rawLaTeXInline def "source" rest of
+    Right (RawInline _ cs) -> cs <$ (setInput $ drop (length cs) rest)
+                                 <* (setPosition $ updatePosString pos cs)
+    _ -> mzero
-- 
cgit v1.2.3


From 2e80613451651ec8f1945daa7540168a427f0507 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 27 May 2014 11:59:28 -0700
Subject: Markdown reader:  inline math must have nonspace before final `$`.

Closes #1313.
---
 src/Text/Pandoc/Parsing.hs | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 4cd6591c0..8bc042e28 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -464,11 +464,13 @@ mathInlineWith :: String -> String -> Parser [Char] st String
 mathInlineWith op cl = try $ do
   string op
   notFollowedBy space
-  words' <- many1Till (count 1 (noneOf "\n\\")
+  words' <- many1Till (count 1 (noneOf " \t\n\\")
                    <|> (char '\\' >> anyChar >>= \c -> return ['\\',c])
-                   <|> count 1 newline <* notFollowedBy' blankline
-                       *> return " ")
-              (try $ string cl)
+                   <|> do (blankline <* notFollowedBy' blankline) <|>
+                             (oneOf " \t" <* skipMany (oneOf " \t"))
+                          notFollowedBy (char '$')
+                          return " "
+                    ) (try $ string cl)
   notFollowedBy digit  -- to prevent capture of $5
   return $ concat words'
 
-- 
cgit v1.2.3


From e3ddc371dee9630dae71d61a69088b06cea8e909 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 27 May 2014 12:44:39 -0700
Subject: Markdown reader:  Handle `c++` and `objective-c` as language
 identifiers

in github-style fenced blocks.  Closes #1318.

Note:  This is special-case handling of these two cases.
It would be good to do something more systematic.
---
 src/Text/Pandoc/Readers/Markdown.hs | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 5129bc2e3..caa938ed6 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -618,12 +618,19 @@ codeBlockFenced = try $ do
   skipMany spaceChar
   attr <- option ([],[],[]) $
             try (guardEnabled Ext_fenced_code_attributes >> attributes)
-           <|> ((\x -> ("",[x],[])) <$> identifier)
+           <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)
   blankline
   contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
   blanklines
   return $ return $ B.codeBlockWith attr $ intercalate "\n" contents
 
+-- correctly handle github language identifiers
+toLanguageId :: String -> String
+toLanguageId = map toLower . go
+  where go "c++" = "cpp"
+        go "objective-c" = "objectivec"
+        go x = x
+
 codeBlockIndented :: MarkdownParser (F Blocks)
 codeBlockIndented = do
   contents <- many1 (indentedLine <|>
-- 
cgit v1.2.3


From 9cf5f74e8fad9c7b898553724a37035bfc46f268 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 28 May 2014 10:40:50 -0700
Subject: PDF writer:  Fixed treatment of data uris for images.

Closes #1062.
---
 src/Text/Pandoc/PDF.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index e4e06e6c9..bd55c565f 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -38,11 +38,11 @@ import qualified Data.ByteString as BS
 import System.Exit (ExitCode (..))
 import System.FilePath
 import System.Directory
+import Data.Digest.Pure.SHA (showDigest, sha1)
 import System.Environment
 import Control.Monad (unless)
 import Data.List (isInfixOf)
 import Data.Maybe (fromMaybe)
-import qualified Data.ByteString.Base64 as B64
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.Definition
 import Text.Pandoc.Walk (walkM)
@@ -98,7 +98,7 @@ handleImage' baseURL tmpdir (Image ils (src,tit)) = do
               Right (contents, Just mime) -> do
                 let ext = fromMaybe (takeExtension src) $
                           extensionFromMimeType mime
-                let basename = UTF8.toString $ B64.encode $ UTF8.fromString src
+                let basename = showDigest $ sha1 $ BL.fromChunks [contents]
                 let fname = tmpdir </> basename <.> ext
                 BS.writeFile fname contents
                 return $ Image ils (fname,tit)
-- 
cgit v1.2.3


From 23a9b800a35d3c17d29a278b6bb218f05642d282 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 31 May 2014 22:02:33 -0700
Subject: Docx writer:  Take over document formatting from reference.docx.

This includes margins, page size, page orientation.
---
 src/Text/Pandoc/Writers/Docx.hs | 47 ++++++++++++++++++++++++++---------------
 1 file changed, 30 insertions(+), 17 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 551d97855..6fd76c9c7 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -204,11 +204,35 @@ writeDocx opts doc@(Pandoc meta _) = do
   let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img
   let imageEntries = map toImageEntry imgs
 
+  -- adjust contents to add sectPr from reference.docx
+  let docpath = "word/document.xml"
+  parsedDoc <- parseXml refArchive distArchive docpath
+  let sectprs = filterElementsName (\qn -> qPrefix qn == Just "w" &&
+                                           qName qn == "sectPr")
+                   parsedDoc
+
+  let stdAttributes =
+            [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")
+            ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math")
+            ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships")
+            ,("xmlns:o","urn:schemas-microsoft-com:office:office")
+            ,("xmlns:v","urn:schemas-microsoft-com:vml")
+            ,("xmlns:w10","urn:schemas-microsoft-com:office:word")
+            ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main")
+            ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture")
+            ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")]
+
+  let contents' = contents ++ sectprs
+  let docContents = mknode "w:document" stdAttributes
+                    $ mknode "w:body" [] $ contents'
+
   -- word/document.xml
-  let contentEntry = toEntry "word/document.xml" epochtime $ renderXml contents
+  let contentEntry = toEntry "word/document.xml" epochtime
+                     $ renderXml docContents
 
   -- footnotes
-  let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ renderXml footnotes
+  let notes = mknode "w:footnotes" stdAttributes footnotes
+  let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ renderXml notes
 
   -- footnote rels
   let footnoteRelEntry = toEntry "word/_rels/footnotes.xml.rels" epochtime
@@ -392,8 +416,9 @@ mkLvl marker lvl =
 getNumId :: WS Int
 getNumId = length `fmap` gets stLists
 
--- | Convert Pandoc document to two OpenXML elements (the main document and footnotes).
-writeOpenXML :: WriterOptions -> Pandoc -> WS (Element, Element)
+-- | Convert Pandoc document to two lists of
+-- OpenXML elements (the main document and footnotes).
+writeOpenXML :: WriterOptions -> Pandoc -> WS ([Element], [Element])
 writeOpenXML opts (Pandoc meta blocks) = do
   let tit = docTitle meta ++ case lookupMeta "subtitle" meta of
                                   Just (MetaBlocks [Plain xs]) -> LineBreak : xs
@@ -411,19 +436,7 @@ writeOpenXML opts (Pandoc meta blocks) = do
   doc' <- blocksToOpenXML opts blocks'
   notes' <- reverse `fmap` gets stFootnotes
   let meta' = title ++ authors ++ date
-  let stdAttributes =
-            [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")
-            ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math")
-            ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships")
-            ,("xmlns:o","urn:schemas-microsoft-com:office:office")
-            ,("xmlns:v","urn:schemas-microsoft-com:vml")
-            ,("xmlns:w10","urn:schemas-microsoft-com:office:word")
-            ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main")
-            ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture")
-            ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")]
-  let doc = mknode "w:document" stdAttributes $ mknode "w:body" [] (meta' ++ doc')
-  let notes = mknode "w:footnotes" stdAttributes notes'
-  return (doc, notes)
+  return (meta' ++ doc', notes')
 
 -- | Convert a list of Pandoc blocks to OpenXML.
 blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element]
-- 
cgit v1.2.3


From 6327ccf523bb5d550d85dd7782079b8f070fe5d1 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 1 Jun 2014 15:29:27 -0700
Subject: Minor code reformat.

---
 src/Text/Pandoc/Writers/Docx.hs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 6fd76c9c7..026cfcb41 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -60,6 +60,11 @@ import qualified Control.Exception as E
 import Text.Pandoc.MIME (getMimeType, extensionFromMimeType)
 import Control.Applicative ((<|>))
 
+data ListMarker = NoMarker
+                | BulletMarker
+                | NumberMarker ListNumberStyle ListNumberDelim Int
+                deriving (Show, Read, Eq, Ord)
+
 data WriterState = WriterState{
          stTextProperties :: [Element]
        , stParaProperties :: [Element]
@@ -73,11 +78,6 @@ data WriterState = WriterState{
        , stLists          :: [ListMarker]
        }
 
-data ListMarker = NoMarker
-                | BulletMarker
-                | NumberMarker ListNumberStyle ListNumberDelim Int
-                deriving (Show, Read, Eq, Ord)
-
 defaultWriterState :: WriterState
 defaultWriterState = WriterState{
         stTextProperties = []
-- 
cgit v1.2.3


From 6848f642e82322c0894c62d3215e98325ab7fd8c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 1 Jun 2014 21:15:03 -0700
Subject: Docx writer:  Header and footer are now carried over from
 reference.docx.

---
 src/Text/Pandoc/Writers/Docx.hs | 54 +++++++++++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 16 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 026cfcb41..584662be8 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -155,8 +155,11 @@ writeDocx opts doc@(Pandoc meta _) = do
                   ,("/word/document.xml",
                     "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml")
                   ,("/word/footnotes.xml",
-                    "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml")
-                  ] ++ map mkImageOverride imgs
+                    "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml"),
+                   ("/word/header1.xml",
+                    "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"),
+                   ("/word/footer1.xml",
+                    "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml") ] ++ map mkImageOverride imgs
   let defaultnodes = [mknode "Default"
               [("Extension","xml"),("ContentType","application/xml")] (),
              mknode "Default"
@@ -191,7 +194,14 @@ writeDocx opts doc@(Pandoc meta _) = do
                       "theme/theme1.xml")
                     ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes",
                       "rId7",
-                      "footnotes.xml")]
+                      "footnotes.xml")
+                    ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/header",
+                      "rId8",
+                      "header1.xml")
+                    ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer",
+                      "rId9",
+                      "footer1.xml")
+                    ]
   let toImgRel (ident,path,_,_,_) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] ()
   let imgrels = map toImgRel imgs
   let toLinkRel (src,ident) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] ()
@@ -207,9 +217,16 @@ writeDocx opts doc@(Pandoc meta _) = do
   -- adjust contents to add sectPr from reference.docx
   let docpath = "word/document.xml"
   parsedDoc <- parseXml refArchive distArchive docpath
-  let sectprs = filterElementsName (\qn -> qPrefix qn == Just "w" &&
-                                           qName qn == "sectPr")
-                   parsedDoc
+  let mbsectpr = filterElementName (\qn -> qPrefix qn == Just "w" &&
+                                           qName qn == "sectPr") parsedDoc
+  let sectPrProps = case mbsectpr of
+                         Nothing   -> []
+                         Just e    -> filterElementsName (\qn ->
+                                         qPrefix qn == Just "w" &&
+                                         qName qn `notElem` ["headerReference","footerReference","sectPr"]) e
+  let headerPr = mknode "w:headerReference" [("w:type","default"),("r:id","rId8")] $ ()
+  let footerPr = mknode "w:footerReference" [("w:type","default"),("r:id","rId9")] $ ()
+  let sectpr = mknode "w:sectPr" [] $ [headerPr, footerPr] ++ sectPrProps
 
   let stdAttributes =
             [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")
@@ -222,7 +239,7 @@ writeDocx opts doc@(Pandoc meta _) = do
             ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture")
             ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")]
 
-  let contents' = contents ++ sectprs
+  let contents' = contents ++ [sectpr]
   let docContents = mknode "w:document" stdAttributes
                     $ mknode "w:body" [] $ contents'
 
@@ -281,20 +298,25 @@ writeDocx opts doc@(Pandoc meta _) = do
         ]
   let relsEntry = toEntry relsPath epochtime $ renderXml rels
 
-  let entryFromArchive path =
+  let entryFromArchive arch path =
          (toEntry path epochtime . renderXml) `fmap`
-           parseXml refArchive distArchive path
-  docPropsAppEntry <- entryFromArchive "docProps/app.xml"
-  themeEntry <- entryFromArchive "word/theme/theme1.xml"
-  fontTableEntry <- entryFromArchive "word/fontTable.xml"
-  settingsEntry <- entryFromArchive "word/settings.xml"
-  webSettingsEntry <- entryFromArchive "word/webSettings.xml"
+           parseXml arch distArchive path
+  docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml"
+  themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml"
+  fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml"
+  -- we take settings.xml from dist archive because the ref archive
+  -- sometimes references special footnotes and endnotes that may
+  -- not be defined in footnotes.xml or endnotes.xml.
+  settingsEntry <- entryFromArchive distArchive "word/settings.xml"
+  webSettingsEntry <- entryFromArchive distArchive "word/webSettings.xml"
+  headerEntry <- entryFromArchive refArchive "word/header1.xml"
+  footerEntry <- entryFromArchive refArchive "word/footer1.xml"
   let miscRels = [ f | f <- filesInArchive refArchive
                      , "word/_rels/" `isPrefixOf` f
                      , ".xml.rels" `isSuffixOf` f
                      , f /= "word/_rels/document.xml.rels"
                      , f /= "word/_rels/footnotes.xml.rels" ]
-  miscRelEntries <- mapM entryFromArchive miscRels
+  miscRelEntries <- mapM (entryFromArchive refArchive) miscRels
 
   -- Create archive
   let archive = foldr addEntryToArchive emptyArchive $
@@ -302,7 +324,7 @@ writeDocx opts doc@(Pandoc meta _) = do
                   footnoteRelEntry : numEntry : styleEntry : footnotesEntry :
                   docPropsEntry : docPropsAppEntry : themeEntry :
                   fontTableEntry : settingsEntry : webSettingsEntry :
-                  imageEntries ++ miscRelEntries
+                  headerEntry : footerEntry : imageEntries ++ miscRelEntries
   return $ fromArchive archive
 
 styleToOpenXml :: Style -> [Element]
-- 
cgit v1.2.3


From 7242165bed21a63b49e1ce7d639400f095800204 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 1 Jun 2014 22:29:13 -0700
Subject: Docx writer:  Improved handling of headers/footers.

---
 src/Text/Pandoc/Writers/Docx.hs | 105 ++++++++++++++++++++--------------------
 1 file changed, 53 insertions(+), 52 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 584662be8..098da119b 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -59,6 +59,7 @@ import Text.Printf (printf)
 import qualified Control.Exception as E
 import Text.Pandoc.MIME (getMimeType, extensionFromMimeType)
 import Control.Applicative ((<|>))
+import Data.Maybe (mapMaybe)
 
 data ListMarker = NoMarker
                 | BulletMarker
@@ -123,6 +124,40 @@ writeDocx opts doc@(Pandoc meta _) = do
   epochtime <- floor `fmap` getPOSIXTime
   let imgs = M.elems $ stImages st
 
+  -- create entries for images in word/media/...
+  let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img
+  let imageEntries = map toImageEntry imgs
+
+  -- adjust contents to add sectPr from reference.docx
+  parsedDoc <- parseXml refArchive distArchive "word/document.xml"
+  let wname f qn = qPrefix qn == Just "w" && f (qName qn)
+  let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc
+
+  let sectpr = maybe (mknode "w:sectPr" [] $ ()) id mbsectpr
+
+  let stdAttributes =
+            [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")
+            ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math")
+            ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships")
+            ,("xmlns:o","urn:schemas-microsoft-com:office:office")
+            ,("xmlns:v","urn:schemas-microsoft-com:vml")
+            ,("xmlns:w10","urn:schemas-microsoft-com:office:word")
+            ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main")
+            ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture")
+            ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")]
+
+  let contents' = contents ++ [sectpr]
+  let docContents = mknode "w:document" stdAttributes
+                    $ mknode "w:body" [] $ contents'
+
+  parsedRels <- parseXml refArchive distArchive "word/_rels/document.xml.rels"
+  let isHeaderNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header"
+  let isFooterNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer"
+  let headers = filterElements isHeaderNode parsedRels
+  let footers = filterElements isFooterNode parsedRels
+
+  let extractTarget e = findAttr (QName "Target" Nothing Nothing) e
+
   -- we create [Content_Types].xml and word/_rels/document.xml.rels
   -- from scratch rather than reading from reference.docx,
   -- because Word sometimes changes these files when a reference.docx is modified,
@@ -135,7 +170,7 @@ writeDocx opts doc@(Pandoc meta _) = do
   let mkImageOverride (_, imgpath, mbMimeType, _, _) =
              mkOverrideNode ("/word/" ++ imgpath,
                              fromMaybe "application/octet-stream" mbMimeType)
-  let overrides = map mkOverrideNode
+  let overrides = map mkOverrideNode (
                   [("/word/webSettings.xml",
                     "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml")
                   ,("/word/numbering.xml",
@@ -155,11 +190,13 @@ writeDocx opts doc@(Pandoc meta _) = do
                   ,("/word/document.xml",
                     "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml")
                   ,("/word/footnotes.xml",
-                    "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml"),
-                   ("/word/header1.xml",
-                    "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"),
-                   ("/word/footer1.xml",
-                    "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml") ] ++ map mkImageOverride imgs
+                    "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml")
+                  ] ++
+                  map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
+                       "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++
+                  map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
+                       "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++
+                    map mkImageOverride imgs
   let defaultnodes = [mknode "Default"
               [("Extension","xml"),("ContentType","application/xml")] (),
              mknode "Default"
@@ -195,13 +232,8 @@ writeDocx opts doc@(Pandoc meta _) = do
                     ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes",
                       "rId7",
                       "footnotes.xml")
-                    ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/header",
-                      "rId8",
-                      "header1.xml")
-                    ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer",
-                      "rId9",
-                      "footer1.xml")
-                    ]
+                    ] ++
+                    headers ++ footers
   let toImgRel (ident,path,_,_,_) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] ()
   let imgrels = map toImgRel imgs
   let toLinkRel (src,ident) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] ()
@@ -210,38 +242,6 @@ writeDocx opts doc@(Pandoc meta _) = do
   let relEntry = toEntry "word/_rels/document.xml.rels" epochtime
         $ renderXml reldoc
 
-  -- create entries for images in word/media/...
-  let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img
-  let imageEntries = map toImageEntry imgs
-
-  -- adjust contents to add sectPr from reference.docx
-  let docpath = "word/document.xml"
-  parsedDoc <- parseXml refArchive distArchive docpath
-  let mbsectpr = filterElementName (\qn -> qPrefix qn == Just "w" &&
-                                           qName qn == "sectPr") parsedDoc
-  let sectPrProps = case mbsectpr of
-                         Nothing   -> []
-                         Just e    -> filterElementsName (\qn ->
-                                         qPrefix qn == Just "w" &&
-                                         qName qn `notElem` ["headerReference","footerReference","sectPr"]) e
-  let headerPr = mknode "w:headerReference" [("w:type","default"),("r:id","rId8")] $ ()
-  let footerPr = mknode "w:footerReference" [("w:type","default"),("r:id","rId9")] $ ()
-  let sectpr = mknode "w:sectPr" [] $ [headerPr, footerPr] ++ sectPrProps
-
-  let stdAttributes =
-            [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")
-            ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math")
-            ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships")
-            ,("xmlns:o","urn:schemas-microsoft-com:office:office")
-            ,("xmlns:v","urn:schemas-microsoft-com:vml")
-            ,("xmlns:w10","urn:schemas-microsoft-com:office:word")
-            ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main")
-            ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture")
-            ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")]
-
-  let contents' = contents ++ [sectpr]
-  let docContents = mknode "w:document" stdAttributes
-                    $ mknode "w:body" [] $ contents'
 
   -- word/document.xml
   let contentEntry = toEntry "word/document.xml" epochtime
@@ -304,13 +304,13 @@ writeDocx opts doc@(Pandoc meta _) = do
   docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml"
   themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml"
   fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml"
-  -- we take settings.xml from dist archive because the ref archive
-  -- sometimes references special footnotes and endnotes that may
-  -- not be defined in footnotes.xml or endnotes.xml.
+  -- we use dist archive for settings.xml, because Word sometimes
+  -- adds references to footnotes or endnotes we don't have...
   settingsEntry <- entryFromArchive distArchive "word/settings.xml"
-  webSettingsEntry <- entryFromArchive distArchive "word/webSettings.xml"
-  headerEntry <- entryFromArchive refArchive "word/header1.xml"
-  footerEntry <- entryFromArchive refArchive "word/footer1.xml"
+  webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml"
+  headerFooterEntries <- mapM (entryFromArchive refArchive) $
+                     mapMaybe (\e -> fmap ("word/" ++) $ extractTarget e)
+                     (headers ++ footers)
   let miscRels = [ f | f <- filesInArchive refArchive
                      , "word/_rels/" `isPrefixOf` f
                      , ".xml.rels" `isSuffixOf` f
@@ -324,7 +324,8 @@ writeDocx opts doc@(Pandoc meta _) = do
                   footnoteRelEntry : numEntry : styleEntry : footnotesEntry :
                   docPropsEntry : docPropsAppEntry : themeEntry :
                   fontTableEntry : settingsEntry : webSettingsEntry :
-                  headerEntry : footerEntry : imageEntries ++ miscRelEntries
+                  imageEntries ++ headerFooterEntries ++
+                  miscRelEntries
   return $ fromArchive archive
 
 styleToOpenXml :: Style -> [Element]
-- 
cgit v1.2.3


From e1cf47efa0029a385c39053c9f441ade29d7a991 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 1 Jun 2014 23:45:05 -0700
Subject: Templates:  Fail informatively on template syntax errors.

With the move from parsec to attoparsec, we lost good error
reporting.  In fact, since we weren't testing for end of input,
malformed templates would fail silently.  Here we revert back to
Parsec for better error messages.
---
 src/Text/Pandoc/Templates.hs | 70 ++++++++++++++++++++++++--------------------
 1 file changed, 38 insertions(+), 32 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 551db6483..89856a9ee 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -96,8 +96,8 @@ module Text.Pandoc.Templates ( renderTemplate
 import Data.Char (isAlphaNum)
 import Control.Monad (guard, when)
 import Data.Aeson (ToJSON(..), Value(..))
-import qualified Data.Attoparsec.Text as A
-import Data.Attoparsec.Text (Parser)
+import qualified Text.Parsec as P
+import Text.Parsec.Text (Parser)
 import Control.Applicative
 import qualified Data.Text as T
 import Data.Text (Text)
@@ -172,7 +172,10 @@ renderTemplate :: (ToJSON a, TemplateTarget b) => Template -> a -> b
 renderTemplate (Template f) context = toTarget $ f $ toJSON context
 
 compileTemplate :: Text -> Either String Template
-compileTemplate template = A.parseOnly pTemplate template
+compileTemplate template =
+  case P.parse (pTemplate <* P.eof) "template" template of
+       Left e   -> Left (show e)
+       Right x  -> Right x
 
 -- | Like 'renderTemplate', but compiles the template first,
 -- raising an error if compilation fails.
@@ -230,7 +233,7 @@ replaceVar _ _ old = old
 
 pTemplate :: Parser Template
 pTemplate = do
-  sp <- A.option mempty pInitialSpace
+  sp <- P.option mempty pInitialSpace
   rest <- mconcat <$> many (pConditional <|>
                             pFor <|>
                             pNewline <|>
@@ -239,40 +242,43 @@ pTemplate = do
                             pEscapedDollar)
   return $ sp <> rest
 
+takeWhile1 :: (Char -> Bool) -> Parser Text
+takeWhile1 f = T.pack <$> P.many1 (P.satisfy f)
+
 pLit :: Parser Template
-pLit = lit <$> A.takeWhile1 (\x -> x /='$' && x /= '\n')
+pLit = lit <$> takeWhile1 (\x -> x /='$' && x /= '\n')
 
 pNewline :: Parser Template
 pNewline = do
-  A.char '\n'
-  sp <- A.option mempty pInitialSpace
+  P.char '\n'
+  sp <- P.option mempty pInitialSpace
   return $ lit "\n" <> sp
 
 pInitialSpace :: Parser Template
 pInitialSpace = do
-  sps <- A.takeWhile1 (==' ')
+  sps <- takeWhile1 (==' ')
   let indentVar = if T.null sps
                      then id
                      else indent (T.length sps)
-  v <- A.option mempty $ indentVar <$> pVar
+  v <- P.option mempty $ indentVar <$> pVar
   return $ lit sps <> v
 
 pEscapedDollar :: Parser Template
-pEscapedDollar = lit "$" <$ A.string "$$"
+pEscapedDollar = lit "$" <$ P.try (P.string "$$")
 
 pVar :: Parser Template
-pVar = var <$> (A.char '$' *> pIdent <* A.char '$')
+pVar = var <$> (P.try $ P.char '$' *> pIdent <* P.char '$')
 
 pIdent :: Parser [Text]
 pIdent = do
   first <- pIdentPart
-  rest <- many (A.char '.' *> pIdentPart)
+  rest <- many (P.char '.' *> pIdentPart)
   return (first:rest)
 
 pIdentPart :: Parser Text
-pIdentPart = do
-  first <- A.letter
-  rest <- A.takeWhile (\c -> isAlphaNum c || c == '_' || c == '-')
+pIdentPart = P.try $ do
+  first <- P.letter
+  rest <- T.pack <$> P.many (P.satisfy (\c -> isAlphaNum c || c == '_' || c == '-'))
   let id' = T.singleton first <> rest
   guard $ id' `notElem` reservedWords
   return id'
@@ -281,38 +287,38 @@ reservedWords :: [Text]
 reservedWords = ["else","endif","for","endfor","sep"]
 
 skipEndline :: Parser ()
-skipEndline = A.skipWhile (`elem` " \t") >> A.char '\n' >> return ()
+skipEndline = P.try $ P.skipMany (P.satisfy (`elem` " \t")) >> P.char '\n' >> return ()
 
 pConditional :: Parser Template
 pConditional = do
-  A.string "$if("
+  P.try $ P.string "$if("
   id' <- pIdent
-  A.string ")$"
+  P.string ")$"
   -- if newline after the "if", then a newline after "endif" will be swallowed
-  multiline <- A.option False (True <$ skipEndline)
+  multiline <- P.option False (True <$ skipEndline)
   ifContents <- pTemplate
-  elseContents <- A.option mempty $
-                      do A.string "$else$"
-                         when multiline $ A.option () skipEndline
+  elseContents <- P.option mempty $ P.try $
+                      do P.string "$else$"
+                         when multiline $ P.option () skipEndline
                          pTemplate
-  A.string "$endif$"
-  when multiline $ A.option () skipEndline
+  P.string "$endif$"
+  when multiline $ P.option () skipEndline
   return $ cond id' ifContents elseContents
 
 pFor :: Parser Template
 pFor = do
-  A.string "$for("
+  P.try $ P.string "$for("
   id' <- pIdent
-  A.string ")$"
+  P.string ")$"
   -- if newline after the "for", then a newline after "endfor" will be swallowed
-  multiline <- A.option False $ skipEndline >> return True
+  multiline <- P.option False $ skipEndline >> return True
   contents <- pTemplate
-  sep <- A.option mempty $
-           do A.string "$sep$"
-              when multiline $ A.option () skipEndline
+  sep <- P.option mempty $
+           do P.try $ P.string "$sep$"
+              when multiline $ P.option () skipEndline
               pTemplate
-  A.string "$endfor$"
-  when multiline $ A.option () skipEndline
+  P.string "$endfor$"
+  when multiline $ P.option () skipEndline
   return $ iter id' contents sep
 
 indent :: Int -> Template -> Template
-- 
cgit v1.2.3


From bf915da6cd0dc97a231100b784450e334c715969 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 2 Jun 2014 20:07:41 -0700
Subject: Docx writer:  Make images work in reference.docx headers/footers.

* All media from reference.docx are copied into result.
* Added defaults for common image types to [Content Types].
* Avoided redundant XML parse + write for entries taken over from
  reference.docx, for better performance.
---
 src/Text/Pandoc/Writers/Docx.hs | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 098da119b..8aaf3c1b8 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -197,10 +197,21 @@ writeDocx opts doc@(Pandoc meta _) = do
                   map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
                        "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++
                     map mkImageOverride imgs
+  let imageDefaults = map (\(x,y) -> mknode "Default"
+                             [("Extension",x),("ContentType",y)] ())
+                       [("jpg","image/jpeg")
+                       ,("jpeg","image/jpeg")
+                       ,("png","image/png")
+                       ,("svg","image/svg+xml")
+                       ,("tif","image/tiff")
+                       ,("tiff","image/tiff")
+                       ,("bmp","image/x-ms-bmp")
+                       ,("gif","image/gif")
+                       ]
   let defaultnodes = [mknode "Default"
               [("Extension","xml"),("ContentType","application/xml")] (),
              mknode "Default"
-              [("Extension","rels"),("ContentType","application/vnd.openxmlformats-package.relationships+xml")] ()]
+              [("Extension","rels"),("ContentType","application/vnd.openxmlformats-package.relationships+xml")] ()] ++ imageDefaults
   let contentTypesDoc = mknode "Types" [("xmlns","http://schemas.openxmlformats.org/package/2006/content-types")] $ defaultnodes ++ overrides
   let contentTypesEntry = toEntry "[Content_Types].xml" epochtime
         $ renderXml contentTypesDoc
@@ -311,12 +322,13 @@ writeDocx opts doc@(Pandoc meta _) = do
   headerFooterEntries <- mapM (entryFromArchive refArchive) $
                      mapMaybe (\e -> fmap ("word/" ++) $ extractTarget e)
                      (headers ++ footers)
-  let miscRels = [ f | f <- filesInArchive refArchive
-                     , "word/_rels/" `isPrefixOf` f
-                     , ".xml.rels" `isSuffixOf` f
-                     , f /= "word/_rels/document.xml.rels"
-                     , f /= "word/_rels/footnotes.xml.rels" ]
-  miscRelEntries <- mapM (entryFromArchive refArchive) miscRels
+  let miscRelEntries = [ e | e <- zEntries refArchive
+                       , "word/_rels/" `isPrefixOf` (eRelativePath e)
+                       , ".xml.rels" `isSuffixOf` (eRelativePath e)
+                       , eRelativePath e /= "word/_rels/document.xml.rels"
+                       , eRelativePath e /= "word/_rels/footnotes.xml.rels" ]
+  let otherMediaEntries = [ e | e <- zEntries refArchive
+                          , "word/media/" `isPrefixOf` eRelativePath e ]
 
   -- Create archive
   let archive = foldr addEntryToArchive emptyArchive $
@@ -325,7 +337,7 @@ writeDocx opts doc@(Pandoc meta _) = do
                   docPropsEntry : docPropsAppEntry : themeEntry :
                   fontTableEntry : settingsEntry : webSettingsEntry :
                   imageEntries ++ headerFooterEntries ++
-                  miscRelEntries
+                  miscRelEntries ++ otherMediaEntries
   return $ fromArchive archive
 
 styleToOpenXml :: Style -> [Element]
-- 
cgit v1.2.3


From 326d7fa8f89f9a4b74042bf4cbb04931e26c8d8d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 2 Jun 2014 20:20:16 -0700
Subject: Docx writer:  Improved entryFromArchive to avoid parse.

No need to parse the XML if we're just going to render it
right away!
---
 src/Text/Pandoc/Writers/Docx.hs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 8aaf3c1b8..1e37b5515 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -310,8 +310,9 @@ writeDocx opts doc@(Pandoc meta _) = do
   let relsEntry = toEntry relsPath epochtime $ renderXml rels
 
   let entryFromArchive arch path =
-         (toEntry path epochtime . renderXml) `fmap`
-           parseXml arch distArchive path
+         maybe (fail $ path ++ " corrupt or missing in reference docx")
+               return
+               (findEntryByPath path arch `mplus` findEntryByPath path distArchive)
   docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml"
   themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml"
   fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml"
-- 
cgit v1.2.3


From cbfde5cb50a461995a8b60d148615b5a72159f3d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 2 Jun 2014 20:39:27 -0700
Subject: Docx writer: Create overrides per-image for media/ in ref docx.

This should be somewhat more robust and cover more types
of images.
---
 src/Text/Pandoc/Writers/Docx.hs | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 1e37b5515..e630c5094 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -170,6 +170,9 @@ writeDocx opts doc@(Pandoc meta _) = do
   let mkImageOverride (_, imgpath, mbMimeType, _, _) =
              mkOverrideNode ("/word/" ++ imgpath,
                              fromMaybe "application/octet-stream" mbMimeType)
+  let mkMediaOverride imgpath = mkOverrideNode ('/':imgpath,
+                                 fromMaybe "application/octet-stream"
+                                   $ getMimeType imgpath)
   let overrides = map mkOverrideNode (
                   [("/word/webSettings.xml",
                     "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml")
@@ -196,22 +199,14 @@ writeDocx opts doc@(Pandoc meta _) = do
                        "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++
                   map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
                        "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++
-                    map mkImageOverride imgs
-  let imageDefaults = map (\(x,y) -> mknode "Default"
-                             [("Extension",x),("ContentType",y)] ())
-                       [("jpg","image/jpeg")
-                       ,("jpeg","image/jpeg")
-                       ,("png","image/png")
-                       ,("svg","image/svg+xml")
-                       ,("tif","image/tiff")
-                       ,("tiff","image/tiff")
-                       ,("bmp","image/x-ms-bmp")
-                       ,("gif","image/gif")
-                       ]
+                    map mkImageOverride imgs ++
+                    map mkMediaOverride [ eRelativePath e | e <- zEntries refArchive
+                                        , "word/media/" `isPrefixOf` eRelativePath e ]
+
   let defaultnodes = [mknode "Default"
               [("Extension","xml"),("ContentType","application/xml")] (),
              mknode "Default"
-              [("Extension","rels"),("ContentType","application/vnd.openxmlformats-package.relationships+xml")] ()] ++ imageDefaults
+              [("Extension","rels"),("ContentType","application/vnd.openxmlformats-package.relationships+xml")] ()]
   let contentTypesDoc = mknode "Types" [("xmlns","http://schemas.openxmlformats.org/package/2006/content-types")] $ defaultnodes ++ overrides
   let contentTypesEntry = toEntry "[Content_Types].xml" epochtime
         $ renderXml contentTypesDoc
-- 
cgit v1.2.3


From 2a627f85fe27d4351e9c612454d18ae701a466a3 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Tue, 3 Jun 2014 11:00:54 -0700
Subject: Shared:  Added ordNub.

API change (adds export).
---
 src/Text/Pandoc/Shared.hs | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index d8cbe46d9..b0adf55f5 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -35,6 +35,7 @@ module Text.Pandoc.Shared (
                      splitByIndices,
                      splitStringByIndices,
                      substitute,
+                     ordNub,
                      -- * Text processing
                      backslashEscapes,
                      escapeStringUsing,
@@ -94,6 +95,7 @@ import Data.List ( find, isPrefixOf, intercalate )
 import qualified Data.Map as M
 import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
                      unEscapeString, parseURIReference )
+import qualified Data.Set as Set
 import System.Directory
 import Text.Pandoc.MIME (getMimeType)
 import System.FilePath ( (</>), takeExtension, dropExtension )
@@ -174,6 +176,13 @@ substitute target replacement lst@(x:xs) =
        then replacement ++ substitute target replacement (drop (length target) lst)
        else x : substitute target replacement xs
 
+ordNub :: (Ord a) => [a] -> [a]
+ordNub l = go Set.empty l
+  where
+    go _ [] = []
+    go s (x:xs) = if x `Set.member` s then go s xs
+                                      else x : go (Set.insert x s) xs
+
 --
 -- Text processing
 --
-- 
cgit v1.2.3


From 9b4e772718a868392a51727ff8cc5eba2ce35bcd Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Tue, 3 Jun 2014 11:01:23 -0700
Subject: Templates: use ordNum instead of ord.

Closes #1022.
---
 src/Text/Pandoc/Templates.hs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 89856a9ee..4ae6a6d8a 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -103,7 +103,7 @@ import qualified Data.Text as T
 import Data.Text (Text)
 import Data.Text.Encoding (encodeUtf8)
 import Text.Pandoc.Compat.Monoid ((<>), Monoid(..))
-import Data.List (intersperse, nub)
+import Data.List (intersperse)
 import System.FilePath ((</>), (<.>))
 import qualified Data.Map as M
 import qualified Data.HashMap.Strict as H
@@ -116,7 +116,7 @@ import Text.Blaze.Internal (preEscapedText)
 import Text.Blaze (preEscapedText, Html)
 #endif
 import Data.ByteString.Lazy (ByteString, fromChunks)
-import Text.Pandoc.Shared (readDataFileUTF8)
+import Text.Pandoc.Shared (readDataFileUTF8, ordNub)
 import Data.Vector ((!?))
 
 -- | Get default template for the specified writer.
@@ -163,7 +163,7 @@ varListToJSON assoc = toJSON $ M.fromList assoc'
   where assoc' = [(T.pack k, toVal [T.pack z | (y,z) <- assoc,
                                                 not (null z),
                                                 y == k])
-                        | k <- nub $ map fst assoc ]
+                        | k <- ordNub $ map fst assoc ]
         toVal [x] = toJSON x
         toVal []  = Null
         toVal xs  = toJSON xs
-- 
cgit v1.2.3


From 05355ac57b7ccadfa4462f3304a7f6364147c8eb Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Tue, 3 Jun 2014 11:03:40 -0700
Subject: Docx writer: Simplified abstractNumId numbering.

Instead of sequential numbering, we assign numbers based on the
list marker styles.  This simplifies some of the code and should
make it easier to modify numbering in the future.
---
 src/Text/Pandoc/Writers/Docx.hs | 49 +++++++++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 19 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index e630c5094..572823871 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -66,6 +66,25 @@ data ListMarker = NoMarker
                 | NumberMarker ListNumberStyle ListNumberDelim Int
                 deriving (Show, Read, Eq, Ord)
 
+listMarkerToId :: ListMarker -> String
+listMarkerToId NoMarker = "0"
+listMarkerToId BulletMarker = "1"
+listMarkerToId (NumberMarker sty delim n) =
+  styNum : delimNum : show n
+  where styNum = case sty of
+                      DefaultStyle   -> '2'
+                      Example        -> '3'
+                      Decimal        -> '4'
+                      LowerRoman     -> '5'
+                      UpperRoman     -> '6'
+                      LowerAlpha     -> '7'
+                      UpperAlpha     -> '8'
+        delimNum = case delim of
+                      DefaultDelim   -> '0'
+                      Period         -> '1'
+                      OneParen       -> '2'
+                      TwoParens      -> '3'
+
 data WriterState = WriterState{
          stTextProperties :: [Element]
        , stParaProperties :: [Element]
@@ -75,7 +94,6 @@ data WriterState = WriterState{
        , stImages         :: M.Map FilePath (String, String, Maybe String, Element, B.ByteString)
        , stListLevel      :: Int
        , stListNumId      :: Int
-       , stNumStyles      :: M.Map ListMarker Int
        , stLists          :: [ListMarker]
        }
 
@@ -89,7 +107,6 @@ defaultWriterState = WriterState{
       , stImages         = M.empty
       , stListLevel      = -1
       , stListNumId      = 1
-      , stNumStyles      = M.fromList [(NoMarker, 0)]
       , stLists          = [NoMarker]
       }
 
@@ -273,7 +290,7 @@ writeDocx opts doc@(Pandoc meta _) = do
   -- construct word/numbering.xml
   let numpath = "word/numbering.xml"
   numEntry <- (toEntry numpath epochtime . renderXml)
-                 `fmap` mkNumbering (stNumStyles st) (stLists st)
+                 `fmap` mkNumbering (stLists st)
   let docPropsPath = "docProps/core.xml"
   let docProps = mknode "cp:coreProperties"
           [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties")
@@ -371,29 +388,28 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes
                                  $ backgroundColor style )
                              ]
 
-mkNumbering :: M.Map ListMarker Int -> [ListMarker] -> IO Element
-mkNumbering markers lists = do
-  elts <- mapM mkAbstractNum (M.toList markers)
+mkNumbering :: [ListMarker] -> IO Element
+mkNumbering lists = do
+  elts <- mapM mkAbstractNum (ordNub lists)
   return $ mknode "w:numbering"
      [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")]
-     $ elts ++ zipWith (mkNum markers) lists [1..(length lists)]
+     $ elts ++ zipWith mkNum lists [1..(length lists)]
 
-mkNum :: M.Map ListMarker Int -> ListMarker -> Int -> Element
-mkNum markers marker numid =
+mkNum :: ListMarker -> Int -> Element
+mkNum marker numid =
   mknode "w:num" [("w:numId",show numid)]
-   $ mknode "w:abstractNumId" [("w:val",show absnumid)] ()
+   $ mknode "w:abstractNumId" [("w:val",listMarkerToId marker)] ()
    : case marker of
        NoMarker     -> []
        BulletMarker -> []
        NumberMarker _ _ start ->
           map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))]
               $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6]
-   where absnumid = fromMaybe 0 $ M.lookup marker markers
 
-mkAbstractNum :: (ListMarker,Int) -> IO Element
-mkAbstractNum (marker,numid) = do
+mkAbstractNum :: ListMarker -> IO Element
+mkAbstractNum marker = do
   nsid <- randomRIO (0x10000000 :: Integer, 0xFFFFFFFF :: Integer)
-  return $ mknode "w:abstractNum" [("w:abstractNumId",show numid)]
+  return $ mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)]
     $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] ()
     : mknode "w:multiLevelType" [("w:val","multilevel")] ()
     : map (mkLvl marker) [0..6]
@@ -594,11 +610,6 @@ addList :: ListMarker -> WS ()
 addList marker = do
   lists <- gets stLists
   modify $ \st -> st{ stLists = lists ++ [marker] }
-  numStyles <- gets stNumStyles
-  case M.lookup marker numStyles of
-           Just _  -> return ()
-           Nothing -> modify $ \st ->
-                 st{ stNumStyles = M.insert marker (M.size numStyles + 1) numStyles }
 
 listItemToOpenXML :: WriterOptions -> Int -> [Block] -> WS [Element]
 listItemToOpenXML _ _ []                   = return []
-- 
cgit v1.2.3


From 2842ad5a978de758d70801b5279f75b9ba679406 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Tue, 3 Jun 2014 11:33:09 -0700
Subject: Docx writer: Changed abstractNumId numbering scheme.

Now the minimum id used by pandoc is 990.  All ids start with "99".
This gives some room for a reference.docx to define numbering styles.
Note:  this is not yet possible, since pandoc generates numbering.xml
entirely on its own.
---
 src/Text/Pandoc/Writers/Docx.hs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 572823871..ca0892547 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -67,10 +67,10 @@ data ListMarker = NoMarker
                 deriving (Show, Read, Eq, Ord)
 
 listMarkerToId :: ListMarker -> String
-listMarkerToId NoMarker = "0"
-listMarkerToId BulletMarker = "1"
+listMarkerToId NoMarker = "990"
+listMarkerToId BulletMarker = "991"
 listMarkerToId (NumberMarker sty delim n) =
-  styNum : delimNum : show n
+  '9' : '9' : styNum : delimNum : show n
   where styNum = case sty of
                       DefaultStyle   -> '2'
                       Example        -> '3'
-- 
cgit v1.2.3


From ec047aaa8c1c1e9d69b0029a2e4512785fbc15a8 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Tue, 3 Jun 2014 12:13:31 -0700
Subject: Docx writer:  pandoc uses only numIds >= 1000 for lists.

This opens up the possiblity (with further code changes) of
preserving some numbering from the reference.docx (e.g. header
numbering.)  See #1305.
---
 src/Text/Pandoc/Writers/Docx.hs | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index ca0892547..3d2f5d4b5 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -388,12 +388,16 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes
                                  $ backgroundColor style )
                              ]
 
+-- this is the lowest number used for a list numId
+baseListId :: Int
+baseListId = 1000
+
 mkNumbering :: [ListMarker] -> IO Element
 mkNumbering lists = do
   elts <- mapM mkAbstractNum (ordNub lists)
   return $ mknode "w:numbering"
      [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")]
-     $ elts ++ zipWith mkNum lists [1..(length lists)]
+     $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)]
 
 mkNum :: ListMarker -> Int -> Element
 mkNum marker numid =
@@ -461,7 +465,7 @@ mkLvl marker lvl =
           patternFor _ s = s ++ "."
 
 getNumId :: WS Int
-getNumId = length `fmap` gets stLists
+getNumId = ((999 +) . length) `fmap` gets stLists
 
 -- | Convert Pandoc document to two lists of
 -- OpenXML elements (the main document and footnotes).
@@ -615,7 +619,8 @@ listItemToOpenXML :: WriterOptions -> Int -> [Block] -> WS [Element]
 listItemToOpenXML _ _ []                   = return []
 listItemToOpenXML opts numid (first:rest) = do
   first' <- withNumId numid $ blockToOpenXML opts first
-  rest'  <- withNumId 1      $ blocksToOpenXML opts rest
+  -- baseListId is the code for no list marker:
+  rest'  <- withNumId baseListId $ blocksToOpenXML opts rest
   return $ first' ++ rest'
 
 alignmentToString :: Alignment -> [Char]
-- 
cgit v1.2.3


From 0ddb4cd2e8883c226ca7ab8a92737dc29f07dfda Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Tue, 3 Jun 2014 13:14:32 -0700
Subject: Docx writer: Combine reference.docx numbering with pandoc's.

This should have fixed #1305, allowing the reference.docx to define
section numbering, but it doesn't.  Now the headings appear with proper
indentation, but the numbers don't appear.  Unclear why.  styles.xml and
numbering.xml basically match the docx which has the expected result.
---
 src/Text/Pandoc/Writers/Docx.hs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 3d2f5d4b5..785238d6f 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -289,8 +289,10 @@ writeDocx opts doc@(Pandoc meta _) = do
 
   -- construct word/numbering.xml
   let numpath = "word/numbering.xml"
-  numEntry <- (toEntry numpath epochtime . renderXml)
-                 `fmap` mkNumbering (stLists st)
+  numbering <- parseXml refArchive distArchive numpath
+  newNumElts <- mkNumbering (stLists st)
+  let numEntry = toEntry numpath epochtime $ renderXml numbering{ elContent =
+                       elContent numbering ++ map Elem newNumElts }
   let docPropsPath = "docProps/core.xml"
   let docProps = mknode "cp:coreProperties"
           [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties")
@@ -392,12 +394,10 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes
 baseListId :: Int
 baseListId = 1000
 
-mkNumbering :: [ListMarker] -> IO Element
+mkNumbering :: [ListMarker] -> IO [Element]
 mkNumbering lists = do
   elts <- mapM mkAbstractNum (ordNub lists)
-  return $ mknode "w:numbering"
-     [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")]
-     $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)]
+  return $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)]
 
 mkNum :: ListMarker -> Int -> Element
 mkNum marker numid =
-- 
cgit v1.2.3


From 45f3851611007f18530b52c9fcc5f0106fbc6816 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Tue, 3 Jun 2014 16:46:55 -0700
Subject: Docx writer: Section numbering carries over from reference.docx.

Closes #1305.
---
 src/Text/Pandoc/Writers/Docx.hs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 785238d6f..4e64a79df 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -291,8 +291,13 @@ writeDocx opts doc@(Pandoc meta _) = do
   let numpath = "word/numbering.xml"
   numbering <- parseXml refArchive distArchive numpath
   newNumElts <- mkNumbering (stLists st)
+  let allElts = onlyElems (elContent numbering) ++ newNumElts
   let numEntry = toEntry numpath epochtime $ renderXml numbering{ elContent =
-                       elContent numbering ++ map Elem newNumElts }
+                       -- we want all the abstractNums first, then the nums,
+                       -- otherwise things break:
+                       [Elem e | e <- allElts
+                               , qName (elName e) == "abstractNum" ] ++
+                       [Elem e | e <- allElts, qName (elName e) == "num" ] }
   let docPropsPath = "docProps/core.xml"
   let docProps = mknode "cp:coreProperties"
           [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties")
-- 
cgit v1.2.3


From ab5dda7a601ad93b97ee02b3d216cc6d5321a462 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 3 Jun 2014 23:17:03 -0700
Subject: Markdown writer:  Prettier pipe tables.

Columns are now aligned.  Closes #1323.
---
 src/Text/Pandoc/Writers/Markdown.hs | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index f42a1b54c..a67271a5d 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -478,16 +478,24 @@ addMarkdownAttribute s =
 
 pipeTable :: Bool -> [Alignment] -> [Doc] -> [[Doc]] -> State WriterState Doc
 pipeTable headless aligns rawHeaders rawRows = do
+  let sp = text " "
+  let blockFor AlignLeft   x y = lblock (x + 2) (sp <> y) <> lblock 0 empty
+      blockFor AlignCenter x y = cblock (x + 2) (sp <> y) <> lblock 0 empty
+      blockFor AlignRight  x y = rblock (x + 2) (sp <> y) <> lblock 0 empty
+      blockFor _           x y = lblock (x + 2) (sp <> y) <> lblock 0 empty
+  let widths = map (max 3 . maximum . map offset) $ transpose (rawHeaders : rawRows)
   let torow cs = nowrap $ text "|" <>
-                 hcat (intersperse (text "|") $ map chomp cs) <> text "|"
-  let toborder (a, h) = let wid = max (offset h) 3
-                        in  text $ case a of
-                             AlignLeft    -> ':':replicate (wid - 1) '-'
-                             AlignCenter  -> ':':replicate (wid - 2) '-' ++ ":"
-                             AlignRight   -> replicate (wid - 1) '-' ++ ":"
-                             AlignDefault -> replicate wid '-'
+                    hcat (intersperse (text "|") $
+                          zipWith3 blockFor aligns widths (map chomp cs))
+                    <> text "|"
+  let toborder (a, w) = text $ case a of
+                             AlignLeft    -> ':':replicate (w + 1) '-'
+                             AlignCenter  -> ':':replicate w '-' ++ ":"
+                             AlignRight   -> replicate (w + 1) '-' ++ ":"
+                             AlignDefault -> replicate (w + 2) '-'
   let header = if headless then empty else torow rawHeaders
-  let border = torow $ map toborder $ zip aligns rawHeaders
+  let border = nowrap $ text "|" <> hcat (intersperse (text "|") $
+                        map toborder $ zip aligns widths) <> text "|"
   let body   = vcat $ map torow rawRows
   return $ header $$ border $$ body
 
-- 
cgit v1.2.3


From 96815746611d63fae7be718cce643a52effd6525 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 3 Jun 2014 23:17:42 -0700
Subject: LaTeX reader:  Handle comments at the end of tables.

This resolves the issue illustrated in
http://stackoverflow.com/questions/24009489/comments-in-latex-break-pandoc-table.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 7fc587882..c3c0ba423 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1269,6 +1269,7 @@ simpTable = try $ do
   header' <- option [] $ try (parseTableRow cols <* lbreak <* hline)
   rows <- sepEndBy (parseTableRow cols) (lbreak <* optional hline)
   spaces
+  skipMany (comment *> spaces)
   let header'' = if null header'
                     then replicate cols mempty
                     else header'
-- 
cgit v1.2.3


From abbf33ae7d6ee3358fd74e434ccd897c774bc3d0 Mon Sep 17 00:00:00 2001
From: James Aspnes <aspnes@cs.yale.edu>
Date: Thu, 12 Jun 2014 21:19:00 -0400
Subject: allow (and discard) optional argument for \caption

---
 src/Text/Pandoc/Readers/LaTeX.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index c3c0ba423..6b5958920 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -305,7 +305,7 @@ blockCommands = M.fromList $
   , ("item", skipopts *> loose_item)
   , ("documentclass", skipopts *> braced *> preamble)
   , ("centerline", (para . trimInlines) <$> (skipopts *> tok))
-  , ("caption", tok >>= setCaption)
+  , ("caption", skipopts *> tok >>= setCaption)
   , ("PandocStartInclude", startInclude)
   , ("PandocEndInclude", endInclude)
   , ("bibliography", mempty <$ (skipopts *> braced >>=
-- 
cgit v1.2.3


From 293e4cfdc3028218089115acd6f091b9ea3aa7d6 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Sat, 14 Jun 2014 10:02:35 -0400
Subject: Add DocX files to tree.

This introduces Text.Pandoc.DocX, and its exported `readDocX` function.
---
 src/Text/Pandoc/Readers/DocX.hs       | 479 +++++++++++++++++++++++++++
 src/Text/Pandoc/Readers/DocX/Lists.hs | 208 ++++++++++++
 src/Text/Pandoc/Readers/DocX/Parse.hs | 604 ++++++++++++++++++++++++++++++++++
 3 files changed, 1291 insertions(+)
 create mode 100644 src/Text/Pandoc/Readers/DocX.hs
 create mode 100644 src/Text/Pandoc/Readers/DocX/Lists.hs
 create mode 100644 src/Text/Pandoc/Readers/DocX/Parse.hs

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/DocX.hs b/src/Text/Pandoc/Readers/DocX.hs
new file mode 100644
index 000000000..976e2e271
--- /dev/null
+++ b/src/Text/Pandoc/Readers/DocX.hs
@@ -0,0 +1,479 @@
+{-
+Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.DocX
+   Copyright   : Copyright (C) 2014 Jesse Rosenthal
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
+   Stability   : alpha
+   Portability : portable
+
+Conversion of DocX type (defined in Text.Pandoc.Readers.DocX.Parse)
+to 'Pandoc' document.  -}
+
+{-
+Current state of implementation of DocX entities ([x] means
+implemented, [-] means partially implemented):
+
+* Blocks
+
+  - [X] Para
+  - [X] CodeBlock (styled with `SourceCode`)
+  - [X] BlockQuote (styled with `Quote`, `BlockQuote`, or, optionally,
+        indented)
+  - [X] OrderedList
+  - [X] BulletList
+  - [X] DefinitionList (styled with adjacent `DefinitionTerm` and `Definition`)
+  - [X] Header (styled with `Heading#`)
+  - [ ] HorizontalRule
+  - [-] Table (column widths and alignments not yet implemented)
+
+* Inlines
+
+  - [X] Str
+  - [X] Emph (From italics. `underline` currently read as span. In
+        future, it might optionally be emph as well)
+  - [X] Strong
+  - [X] Strikeout
+  - [X] Superscript
+  - [X] Subscript
+  - [X] SmallCaps
+  - [ ] Quoted
+  - [ ] Cite
+  - [X] Code (styled with `VerbatimChar`)
+  - [X] Space
+  - [X] LineBreak (these are invisible in Word: entered with Shift-Return)
+  - [ ] Math
+  - [X] Link (links to an arbitrary bookmark create a span with the target as
+        id and "anchor" class)
+  - [-] Image (Links to path in archive. Future option for
+        data-encoded URI likely.)
+  - [X] Note (Footnotes and Endnotes are silently combined.)
+-}
+
+module Text.Pandoc.Readers.DocX
+       ( readDocX
+       ) where
+
+import Codec.Archive.Zip
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Builder (text, toList)
+import Text.Pandoc.Generic (bottomUp)
+import Text.Pandoc.MIME (getMimeType)
+import Text.Pandoc.UTF8 (toString)
+import Text.Pandoc.Readers.DocX.Parse
+import Text.Pandoc.Readers.DocX.Lists
+import Data.Maybe (mapMaybe, isJust, fromJust)
+import Data.List (delete, isPrefixOf, (\\), intersect)
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Lazy as B
+import Data.ByteString.Base64 (encode)
+import System.FilePath (combine)
+
+readDocX :: ReaderOptions
+         -> B.ByteString
+         -> Pandoc
+readDocX opts bytes =
+  case archiveToDocX (toArchive bytes) of
+    Just docx -> Pandoc nullMeta (docxToBlocks opts docx)
+    Nothing   -> error $ "couldn't parse docx file"
+
+runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)])
+runStyleToSpanAttr rPr = ("",
+                          mapMaybe id [
+                            if isBold rPr then (Just "strong") else Nothing,
+                            if isItalic rPr then (Just "emph") else Nothing,
+                            if isSmallCaps rPr then (Just "smallcaps") else Nothing,
+                            if isStrike rPr then (Just "strike") else Nothing,
+                            if isSuperScript rPr then (Just "superscript") else Nothing,
+                            if isSubScript rPr then (Just "subscript") else Nothing,
+                            rStyle rPr],
+                          case underline rPr of
+                            Just fmt -> [("underline", fmt)]
+                            _        -> []
+                         )
+
+parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)])
+parStyleToDivAttr pPr = ("",
+                          pStyle pPr,
+                          case indent pPr of
+                            Just n  -> [("indent", (show n))]
+                            Nothing -> []
+                         )
+
+strToInlines :: String -> [Inline]
+strToInlines = toList . text
+
+codeSpans :: [String]
+codeSpans = ["VerbatimChar"]
+
+blockQuoteDivs :: [String]
+blockQuoteDivs = ["Quote", "BlockQuote"]
+
+codeDivs :: [String]
+codeDivs = ["SourceCode"]
+
+runElemToInlines :: RunElem -> [Inline]
+runElemToInlines (TextRun s) = strToInlines s
+runElemToInlines (LnBrk) = [LineBreak]
+
+runElemToString :: RunElem -> String
+runElemToString (TextRun s) = s
+runElemToString (LnBrk) = ['\n']
+
+runElemsToString :: [RunElem] -> String
+runElemsToString = concatMap runElemToString
+
+strNormalize :: [Inline] -> [Inline]
+strNormalize [] = []
+strNormalize (Str "" : ils) = strNormalize ils
+strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l)
+strNormalize (il:ils) = il : (strNormalize ils)
+
+runToInlines :: ReaderOptions -> DocX -> Run -> [Inline]
+runToInlines _ _ (Run rs runElems) 
+  | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans =
+    case runStyleToSpanAttr rs == ("", [], []) of
+      True -> [Str (runElemsToString runElems)]
+      False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]]
+  | otherwise = case runStyleToSpanAttr rs == ("", [], []) of
+      True -> concatMap runElemToInlines runElems
+      False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)]
+runToInlines opts docx@(DocX _ notes _ _ _ ) (Footnote fnId) =
+  case (getFootNote fnId notes) of
+    Just bodyParts ->
+      [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]]
+    Nothing        ->
+      [Note [Div ("", ["footnote"], []) []]]
+runToInlines opts docx@(DocX _ notes _ _ _) (Endnote fnId) =
+  case (getEndNote fnId notes) of
+    Just bodyParts ->
+      [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]]
+    Nothing        ->
+      [Note [Div ("", ["endnote"], []) []]]
+
+parPartToInlines :: ReaderOptions -> DocX -> ParPart -> [Inline]
+parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r
+parPartToInlines _ _ (BookMark _ anchor) =
+  [Span (anchor, ["anchor"], []) []]
+parPartToInlines _ (DocX _ _ _ rels _) (Drawing relid) =
+  case lookupRelationship relid rels of
+    Just target -> [Image [] (combine "word" target, "")]
+    Nothing     -> [Image [] ("", "")]
+parPartToInlines opts docx (InternalHyperLink anchor runs) =
+  [Link (concatMap (runToInlines opts docx) runs) ('#' : anchor, "")]
+parPartToInlines opts docx@(DocX _ _ _ rels _) (ExternalHyperLink relid runs) =
+  case lookupRelationship relid rels of
+    Just target ->
+      [Link (concatMap (runToInlines opts docx) runs) (target, "")]
+    Nothing ->
+      [Link (concatMap (runToInlines opts docx) runs) ("", "")]
+
+isAnchorSpan :: Inline -> Bool
+isAnchorSpan (Span (ident, classes, kvs) ils) =
+  (not . null) ident &&
+  classes == ["anchor"] &&
+  null kvs &&
+  null ils
+isAnchorSpan _ = False
+
+dummyAnchors :: [String]
+dummyAnchors = ["_GoBack"]
+
+makeHeaderAnchors :: Block -> Block
+makeHeaderAnchors h@(Header n (_, classes, kvs) ils) =
+  case filter isAnchorSpan ils of
+    []   -> h
+    (x@(Span (ident, _, _) _) : xs) ->
+      case ident `elem` dummyAnchors of
+        True -> h
+        False -> Header n (ident, classes, kvs) (ils \\ (x:xs))
+    _ -> h
+makeHeaderAnchors blk = blk
+  
+
+parPartsToInlines :: ReaderOptions -> DocX -> [ParPart] -> [Inline]
+parPartsToInlines opts docx parparts =
+  --
+  -- We're going to skip data-uri's for now. It should be an option,
+  -- not mandatory.
+  --
+  --bottomUp (makeImagesSelfContained docx) $
+  bottomUp spanCorrect $
+  bottomUp spanTrim $
+  bottomUp spanReduce $
+  concatMap (parPartToInlines opts docx) parparts
+
+cellToBlocks :: ReaderOptions -> DocX -> Cell -> [Block]
+cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps
+
+rowToBlocksList :: ReaderOptions -> DocX -> Row -> [[Block]]
+rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells
+
+bodyPartToBlock :: ReaderOptions -> DocX -> BodyPart -> Block
+bodyPartToBlock opts docx (Paragraph pPr parparts) =
+  Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)]
+bodyPartToBlock opts docx@(DocX _ _ numbering _ _) (ListItem pPr numId lvl parparts) =
+  let
+    kvs = case lookupLevel numId lvl numbering of
+      Just (_, fmt, txt, Just start) -> [ ("level", lvl)
+                                        , ("num-id", numId)
+                                        , ("format", fmt)
+                                        , ("text", txt)
+                                        , ("start", (show start))
+                                        ]
+      
+      Just (_, fmt, txt, Nothing)    -> [ ("level", lvl)
+                                        , ("num-id", numId)
+                                        , ("format", fmt)
+                                        , ("text", txt)
+                                        ]
+      Nothing                        -> []
+  in
+   Div
+   ("", ["list-item"], kvs)
+   [bodyPartToBlock opts docx (Paragraph pPr parparts)]
+bodyPartToBlock _ _ (Tbl _ _ _ []) =
+  Para []
+bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) =
+  let caption = strToInlines cap
+      (hdr, rows) = case firstRowFormatting look of
+        True -> (Just r, rs)
+        False -> (Nothing, r:rs)
+      hdrCells = case hdr of
+        Just r' -> rowToBlocksList opts docx r'
+        Nothing -> []
+      cells = map (rowToBlocksList opts docx) rows
+      
+      size = case null hdrCells of
+        True -> length $ head cells
+        False -> length $ hdrCells
+      --
+      -- The two following variables (horizontal column alignment and
+      -- relative column widths) go to the default at the
+      -- moment. Width information is in the TblGrid field of the Tbl,
+      -- so should be possible. Alignment might be more difficult,
+      -- since there doesn't seem to be a column entity in docx.
+      alignments = take size (repeat AlignDefault)
+      widths = take size (repeat 0) :: [Double]
+  in
+   Table caption alignments widths hdrCells cells
+
+makeImagesSelfContained :: DocX -> Inline -> Inline
+makeImagesSelfContained (DocX _ _ _ _ media) i@(Image alt (uri, title)) =
+  case lookup uri media of
+    Just bs -> case getMimeType uri of
+      Just mime ->  let data_uri =
+                          "data:" ++ mime ++ ";base64," ++ toString (encode $ BS.concat $ B.toChunks bs)
+                    in
+                     Image alt (data_uri, title)
+      Nothing  -> i
+    Nothing -> i
+makeImagesSelfContained _ inline = inline
+
+bodyToBlocks :: ReaderOptions -> DocX -> Body -> [Block]
+bodyToBlocks opts docx (Body bps) =
+  bottomUp removeEmptyPars $
+  bottomUp strNormalize $ 
+  bottomUp spanRemove $ 
+  bottomUp divRemove $
+  map (makeHeaderAnchors) $
+  bottomUp divCorrect $
+  bottomUp divReduce $
+  bottomUp divCorrectPreReduce $
+  bottomUp blocksToDefinitions $
+  blocksToBullets $
+  map (bodyPartToBlock opts docx) bps
+
+docxToBlocks :: ReaderOptions -> DocX -> [Block]
+docxToBlocks opts d@(DocX (Document _ body) _ _ _ _) = bodyToBlocks opts d body
+
+spanReduce :: [Inline] -> [Inline]
+spanReduce [] = []
+spanReduce ((Span (id1, classes1, kvs1) ils1) : ils)
+  | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils)
+spanReduce (s1@(Span (id1, classes1, kvs1) ils1) :
+            s2@(Span (id2, classes2, kvs2) ils2) :
+            ils) =
+  let classes'  = classes1 `intersect` classes2
+      kvs'      = kvs1 `intersect` kvs2
+      classes1' = classes1 \\ classes'
+      kvs1'     = kvs1 \\ kvs'
+      classes2' = classes2 \\ classes'
+      kvs2'     = kvs2 \\ kvs'
+  in
+   case null classes' && null kvs' of
+     True -> s1 : (spanReduce (s2 : ils))
+     False -> let attr'  = ("", classes', kvs')
+                  attr1' = (id1, classes1', kvs1')
+                  attr2' = (id2, classes2', kvs2')
+              in
+               spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] :
+                           ils)
+spanReduce (il:ils) = il : (spanReduce ils)
+
+ilToCode :: Inline -> String
+ilToCode (Str s) = s
+ilToCode _ = ""
+
+spanRemove' :: Inline -> [Inline]
+spanRemove' s@(Span (ident, classes, _) [])
+  -- "_GoBack" is automatically inserted. We don't want to keep it.
+  | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s]
+spanRemove' (Span (_, _, kvs) ils) =
+  case lookup "underline" kvs of
+    Just val -> [Span ("", [], [("underline", val)]) ils]
+    Nothing  -> ils
+spanRemove' il = [il]
+
+spanRemove :: [Inline] -> [Inline]
+spanRemove = concatMap spanRemove'
+
+spanTrim' :: Inline -> [Inline]
+spanTrim' il@(Span _ []) = [il]
+spanTrim' il@(Span attr (il':[]))
+  | il' == Space = [Span attr [], Space]
+  | otherwise = [il]
+spanTrim' (Span attr ils)
+  | head ils == Space && last ils == Space =
+    [Space, Span attr (init $ tail ils), Space]
+  | head ils == Space = [Space, Span attr (tail ils)]
+  | last ils == Space = [Span attr (init ils), Space]
+spanTrim' il = [il]
+
+spanTrim :: [Inline] -> [Inline]
+spanTrim = concatMap spanTrim'
+
+spanCorrect' :: Inline -> [Inline]
+spanCorrect' (Span ("", [], []) ils) = ils
+spanCorrect' (Span (ident, classes, kvs) ils)
+  | "emph" `elem` classes =
+    [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils]
+  | "strong" `elem` classes =
+      [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils]
+  | "smallcaps" `elem` classes =
+      [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils]
+  | "strike" `elem` classes =
+      [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils]
+  | "superscript" `elem` classes =
+      [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils]
+  | "subscript" `elem` classes =
+      [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils]
+  | (not . null) (codeSpans `intersect` classes) =
+         [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)]
+  | otherwise =
+      [Span (ident, classes, kvs) ils]
+spanCorrect' il = [il]
+
+spanCorrect :: [Inline] -> [Inline]
+spanCorrect = concatMap spanCorrect'
+
+removeEmptyPars :: [Block] -> [Block]
+removeEmptyPars blks = filter (\b -> b /= (Para [])) blks
+
+divReduce :: [Block] -> [Block]
+divReduce [] = []
+divReduce ((Div (id1, classes1, kvs1) blks1) : blks)
+  | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks)
+divReduce (d1@(Div (id1, classes1, kvs1) blks1) :
+           d2@(Div (id2, classes2, kvs2) blks2) :
+            blks) =
+  let classes'  = classes1 `intersect` classes2
+      kvs'      = kvs1 `intersect` kvs2
+      classes1' = classes1 \\ classes'
+      kvs1'     = kvs1 \\ kvs'
+      classes2' = classes2 \\ classes'
+      kvs2'     = kvs2 \\ kvs'
+  in
+   case null classes' && null kvs' of
+     True -> d1 : (divReduce (d2 : blks))
+     False -> let attr'  = ("", classes', kvs')
+                  attr1' = (id1, classes1', kvs1')
+                  attr2' = (id2, classes2', kvs2')
+              in
+               divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] :
+                           blks)
+divReduce (blk:blks) = blk : (divReduce blks)
+
+isHeaderClass :: String -> Maybe Int
+isHeaderClass s | "Heading" `isPrefixOf` s =
+  case reads (drop (length "Heading") s) :: [(Int, String)] of
+    [] -> Nothing
+    ((n, "") : []) -> Just n
+    _       -> Nothing
+isHeaderClass _ = Nothing
+
+findHeaderClass :: [String] -> Maybe Int
+findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of
+  [] -> Nothing
+  n : _ -> Just n
+
+blksToInlines :: [Block] -> [Inline]
+blksToInlines (Para ils : _) = ils
+blksToInlines (Plain ils : _) = ils
+blksToInlines _ = []
+
+divCorrectPreReduce' :: Block -> [Block]
+divCorrectPreReduce' (Div (ident, classes, kvs) blks)
+  | isJust $ findHeaderClass classes =
+    let n = fromJust $ findHeaderClass classes
+    in
+    [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)]
+  | otherwise = [Div (ident, classes, kvs) blks]
+divCorrectPreReduce' blk = [blk]
+
+divCorrectPreReduce :: [Block] -> [Block]
+divCorrectPreReduce = concatMap divCorrectPreReduce'
+
+blkToCode :: Block -> String
+blkToCode (Para []) = ""
+blkToCode (Para ((Code _ s):ils)) = s ++ (blkToCode (Para ils))
+blkToCode (Para ((Span (_, classes, _) ils'): ils))
+  | (not . null) (codeSpans `intersect` classes) =
+    (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils))
+blkToCode _ = ""
+
+divRemove' :: Block -> [Block]
+divRemove' (Div (_, _, kvs) blks) =
+  case lookup "indent" kvs of
+    Just val -> [Div ("", [], [("indent", val)]) blks]
+    Nothing  -> blks
+divRemove' blk = [blk]
+
+divRemove :: [Block] -> [Block]
+divRemove = concatMap divRemove'
+                                                 
+divCorrect' :: Block -> [Block]
+divCorrect' b@(Div (ident, classes, kvs) blks)
+  | (not . null) (blockQuoteDivs `intersect` classes) =
+    [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]]
+  | (not . null) (codeDivs `intersect` classes) =
+    [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)]
+  | otherwise =
+      case lookup "indent" kvs of
+        Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]
+        Just _   ->
+          [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]]
+        Nothing  -> [b]
+divCorrect' blk = [blk]
+
+divCorrect :: [Block] -> [Block]
+divCorrect = concatMap divCorrect'
diff --git a/src/Text/Pandoc/Readers/DocX/Lists.hs b/src/Text/Pandoc/Readers/DocX/Lists.hs
new file mode 100644
index 000000000..b20679261
--- /dev/null
+++ b/src/Text/Pandoc/Readers/DocX/Lists.hs
@@ -0,0 +1,208 @@
+{-
+Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.DocX.Lists
+   Copyright   : Copyright (C) 2014 Jesse Rosenthal
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
+   Stability   : alpha
+   Portability : portable
+
+Functions for converting flat DocX paragraphs into nested lists.
+-}
+
+module Text.Pandoc.Readers.DocX.Lists ( blocksToBullets
+                                      , blocksToDefinitions) where
+
+import Text.Pandoc.JSON
+import Text.Pandoc.Shared (trim)
+import Control.Monad
+import Data.List
+import Data.Maybe
+
+isListItem :: Block -> Bool
+isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True
+isListItem _ = False
+
+getLevel :: Block -> Maybe Integer
+getLevel (Div (_, _, kvs) _) =  liftM read $ lookup "level" kvs
+getLevel _ = Nothing
+
+getLevelN :: Block -> Integer
+getLevelN b = case getLevel b of
+  Just n -> n
+  Nothing -> -1
+
+getNumId :: Block -> Maybe Integer
+getNumId (Div (_, _, kvs) _) =  liftM read $ lookup "num-id" kvs
+getNumId _ = Nothing
+
+getNumIdN :: Block -> Integer
+getNumIdN b = case getNumId b of
+  Just n -> n
+  Nothing -> -1
+
+getText :: Block -> Maybe String
+getText (Div (_, _, kvs) _) = lookup "text" kvs
+getText _ = Nothing
+
+data ListType = Itemized | Enumerated ListAttributes
+
+listStyleMap :: [(String, ListNumberStyle)]
+listStyleMap = [("upperLetter", UpperAlpha),
+                ("lowerLetter", LowerAlpha),
+                ("upperRoman", UpperRoman),
+                ("lowerRoman", LowerRoman),
+                ("decimal", Decimal)]
+
+listDelimMap :: [(String, ListNumberDelim)]
+listDelimMap = [("%1)", OneParen),
+                ("(%1)", TwoParens),
+                ("%1.", Period)]
+
+getListType :: Block -> Maybe ListType
+getListType b@(Div (_, _, kvs) _) | isListItem b =
+  let
+    start = lookup "start" kvs
+    frmt = lookup "format" kvs
+    txt  = lookup "text" kvs
+  in
+   case frmt of
+     Just "bullet" -> Just Itemized
+     Just f        ->
+       case txt of
+         Just t -> Just $ Enumerated (
+                  read (fromMaybe "1" start) :: Int,
+                  fromMaybe DefaultStyle (lookup f listStyleMap),
+                  fromMaybe DefaultDelim (lookup t listDelimMap))
+         Nothing -> Nothing
+     _ -> Nothing
+getListType _ = Nothing
+
+listParagraphDivs :: [String]
+listParagraphDivs = ["ListParagraph"]
+
+-- This is a first stab at going through and attaching meaning to list
+-- paragraphs, without an item marker, following a list item. We
+-- assume that these are paragraphs in the same item.
+
+handleListParagraphs :: [Block] -> [Block]
+handleListParagraphs [] = []
+handleListParagraphs (
+  (Div attr1@(_, classes1, _) blks1) :
+  (Div (ident2, classes2, kvs2) blks2) :
+  blks
+  ) | "list-item" `elem` classes1 &&
+    not ("list-item" `elem` classes2) &&
+    (not . null) (listParagraphDivs `intersect` classes2) =
+      -- We don't want to keep this indent.
+      let newDiv2 =
+            (Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2)
+      in
+       handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks)
+handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks)
+                  
+separateBlocks' :: Block -> [[Block]] -> [[Block]]
+separateBlocks' blk ([] : []) = [[blk]]
+separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]]
+separateBlocks' b@(OrderedList _ _) acc = (init acc) ++ [(last acc) ++ [b]]
+-- The following is for the invisible bullet lists. This is how
+-- pandoc-generated ooxml does multiparagraph item lists.
+separateBlocks' b acc | liftM trim (getText b) == Just "" =
+  (init acc) ++ [(last acc) ++ [b]]
+separateBlocks' b acc = acc ++ [[b]]
+
+separateBlocks :: [Block] -> [[Block]]
+separateBlocks blks = foldr separateBlocks' [[]] (reverse blks)
+
+flatToBullets' :: Integer -> [Block] -> [Block]
+flatToBullets' _ [] = []
+flatToBullets' num xs@(b : elems)
+  | getLevelN b == num = b : (flatToBullets' num elems)
+  | otherwise = 
+    let bNumId = getNumIdN b
+        bLevel = getLevelN b
+        (children, remaining) =
+          span
+          (\b' ->
+            ((getLevelN b') > bLevel ||
+             ((getLevelN b') == bLevel && (getNumIdN b') == bNumId)))
+          xs
+    in
+     case getListType b of
+       Just (Enumerated attr) ->
+         (OrderedList attr (separateBlocks $ flatToBullets' bLevel children)) :
+         (flatToBullets' num remaining)
+       _ ->
+         (BulletList (separateBlocks $ flatToBullets' bLevel children)) :
+         (flatToBullets' num remaining)
+
+flatToBullets :: [Block] -> [Block]
+flatToBullets elems = flatToBullets' (-1) elems
+
+blocksToBullets :: [Block] -> [Block]
+blocksToBullets blks =
+  -- bottomUp removeListItemDivs $ 
+  flatToBullets $ (handleListParagraphs blks)
+
+
+plainParaInlines :: Block -> [Inline]
+plainParaInlines (Plain ils) = ils
+plainParaInlines (Para ils) = ils
+plainParaInlines _ = []
+
+blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block]
+blocksToDefinitions' []     acc [] = reverse acc
+blocksToDefinitions' defAcc acc [] =
+  reverse $ (DefinitionList (reverse defAcc)) : acc
+blocksToDefinitions' defAcc acc
+  ((Div (_, classes1, _) blks1) : (Div (ident2, classes2, kvs2) blks2) : blks)
+  | "DefinitionTerm" `elem` classes1 && "Definition"  `elem` classes2 =
+    let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
+        pair = case remainingAttr2 == ("", [], []) of
+          True -> (concatMap plainParaInlines blks1, [blks2])
+          False -> (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]])
+    in
+     blocksToDefinitions' (pair : defAcc) acc blks
+blocksToDefinitions' defAcc acc
+  ((Div (ident2, classes2, kvs2) blks2) : blks)
+  | (not . null) defAcc && "Definition"  `elem` classes2 =
+    let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
+        defItems2 = case remainingAttr2 == ("", [], []) of
+          True -> blks2
+          False -> [Div remainingAttr2 blks2]
+        ((defTerm, defItems):defs) = defAcc
+        defAcc' = case null defItems of
+          True -> (defTerm, [defItems2]) : defs
+          False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs
+    in
+     blocksToDefinitions' defAcc' acc blks
+blocksToDefinitions' [] acc (b:blks) =
+  blocksToDefinitions' [] (b:acc) blks
+blocksToDefinitions' defAcc acc (b:blks) =
+  blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks
+
+
+blocksToDefinitions :: [Block] -> [Block]
+blocksToDefinitions = blocksToDefinitions' [] []
+
+    
+    
+    
diff --git a/src/Text/Pandoc/Readers/DocX/Parse.hs b/src/Text/Pandoc/Readers/DocX/Parse.hs
new file mode 100644
index 000000000..d7033d9e8
--- /dev/null
+++ b/src/Text/Pandoc/Readers/DocX/Parse.hs
@@ -0,0 +1,604 @@
+{-
+Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.DocX.Parse
+   Copyright   : Copyright (C) 2014 Jesse Rosenthal
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
+   Stability   : alpha
+   Portability : portable
+
+Conversion of DocX archive into DocX haskell type
+-}
+
+
+module Text.Pandoc.Readers.DocX.Parse (  DocX(..)
+                                       , Document(..)
+                                       , Body(..)
+                                       , BodyPart(..)
+                                       , TblLook(..)
+                                       , ParPart(..)
+                                       , Run(..)
+                                       , RunElem(..)
+                                       , Notes
+                                       , Numbering
+                                       , Relationship
+                                       , Media
+                                       , RunStyle(..)
+                                       , ParagraphStyle(..)
+                                       , Row(..)
+                                       , Cell(..)
+                                       , getFootNote
+                                       , getEndNote
+                                       , lookupLevel
+                                       , lookupRelationship
+                                       , archiveToDocX
+                                       ) where
+import Codec.Archive.Zip
+import Text.XML.Light
+import Data.Maybe
+import Data.List
+import System.FilePath
+import Data.Bits ((.|.))
+import qualified Data.ByteString.Lazy as B
+import qualified Text.Pandoc.UTF8 as UTF8
+
+attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
+attrToNSPair _ = Nothing
+
+
+type NameSpaces = [(String, String)]
+
+data DocX = DocX Document Notes Numbering [Relationship] Media
+          deriving Show
+
+archiveToDocX :: Archive -> Maybe DocX
+archiveToDocX archive = do
+  let notes = archiveToNotes archive
+      rels = archiveToRelationships archive
+      media = archiveToMedia archive
+  doc <- archiveToDocument archive
+  numbering <- archiveToNumbering archive
+  return $ DocX doc notes numbering rels media
+
+data Document = Document NameSpaces Body 
+          deriving Show
+
+archiveToDocument :: Archive -> Maybe Document
+archiveToDocument zf = do
+  entry <- findEntryByPath "word/document.xml" zf
+  docElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+  let namespaces = mapMaybe attrToNSPair (elAttribs docElem) 
+  bodyElem <- findChild (QName "body" (lookup "w" namespaces) Nothing) docElem
+  body <- elemToBody namespaces bodyElem
+  return $ Document namespaces body
+
+type Media = [(FilePath, B.ByteString)]
+
+filePathIsMedia :: FilePath -> Bool
+filePathIsMedia fp =
+  let (dir, _) = splitFileName fp
+  in
+   (dir == "word/media/")
+
+getMediaPair :: Archive -> FilePath -> Maybe (FilePath, B.ByteString)
+getMediaPair zf fp =
+  case findEntryByPath fp zf of
+    Just e -> Just (fp, fromEntry e)
+    Nothing -> Nothing
+
+archiveToMedia :: Archive -> Media
+archiveToMedia zf =
+  mapMaybe (getMediaPair zf) (filter filePathIsMedia (filesInArchive zf))
+
+data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
+                 deriving Show
+
+data Numb = Numb String String           -- right now, only a key to an abstract num
+            deriving Show
+
+data AbstractNumb = AbstractNumb String [Level]
+                    deriving Show
+
+-- (ilvl, format, string, start)
+type Level = (String, String, String, Maybe Integer)
+
+lookupLevel :: String -> String -> Numbering -> Maybe Level
+lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
+  absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs
+  lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs
+  lvl  <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls
+  return lvl
+
+numElemToNum :: NameSpaces -> Element -> Maybe Numb
+numElemToNum ns element |
+  qName (elName element) == "num" &&
+  qURI (elName element) == (lookup "w" ns) = do
+    numId <- findAttr (QName "numId" (lookup "w" ns) (Just "w")) element
+    absNumId <- findChild (QName "abstractNumId" (lookup "w" ns) (Just "w")) element
+                >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
+    return $ Numb numId absNumId
+numElemToNum _ _ = Nothing
+
+absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
+absNumElemToAbsNum ns element |
+  qName (elName element) == "abstractNum" &&
+  qURI (elName element) == (lookup "w" ns) = do
+    absNumId <- findAttr
+                (QName "abstractNumId" (lookup "w" ns) (Just "w"))
+                element
+    let levelElems = findChildren
+                 (QName "lvl" (lookup "w" ns) (Just "w"))
+                 element
+        levels = mapMaybe id $ map (levelElemToLevel ns) levelElems
+    return $ AbstractNumb absNumId levels
+absNumElemToAbsNum _ _ = Nothing
+
+levelElemToLevel :: NameSpaces -> Element -> Maybe Level
+levelElemToLevel ns element |
+    qName (elName element) == "lvl" &&
+    qURI (elName element) == (lookup "w" ns) = do
+      ilvl <- findAttr (QName "ilvl" (lookup "w" ns) (Just "w")) element
+      fmt <- findChild (QName "numFmt" (lookup "w" ns) (Just "w")) element
+             >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
+      txt <- findChild (QName "lvlText" (lookup "w" ns) (Just "w")) element
+             >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
+      let start = findChild (QName "start" (lookup "w" ns) (Just "w")) element
+                  >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
+                  >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
+      return (ilvl, fmt, txt, start)
+levelElemToLevel _ _ = Nothing
+
+archiveToNumbering :: Archive -> Maybe Numbering
+archiveToNumbering zf =
+  case findEntryByPath "word/numbering.xml" zf of
+    Nothing -> Just $ Numbering [] [] []
+    Just entry -> do
+      numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+      let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem)
+          numElems = findChildren
+                     (QName "num" (lookup "w" namespaces) (Just "w"))
+                     numberingElem
+          absNumElems = findChildren
+                        (QName "abstractNum" (lookup "w" namespaces) (Just "w"))
+                        numberingElem
+          nums = mapMaybe id $ map (numElemToNum namespaces) numElems
+          absNums = mapMaybe id $ map (absNumElemToAbsNum namespaces) absNumElems
+      return $ Numbering namespaces nums absNums
+
+data Notes = Notes NameSpaces (Maybe [(String, [BodyPart])]) (Maybe [(String, [BodyPart])])
+           deriving Show
+
+noteElemToNote :: NameSpaces -> Element -> Maybe (String, [BodyPart])
+noteElemToNote ns element
+  | qName (elName element) `elem` ["endnote", "footnote"] &&
+    qURI (elName element) == (lookup "w" ns) =
+      do
+        noteId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element
+        let bps = map fromJust
+                  $ filter isJust
+                  $ map (elemToBodyPart ns)
+                  $ filterChildrenName (isParOrTbl ns) element
+        return $ (noteId, bps)
+noteElemToNote _ _ = Nothing
+
+getFootNote :: String -> Notes -> Maybe [BodyPart]
+getFootNote s (Notes _ fns _) = fns >>= (lookup s)
+
+getEndNote :: String -> Notes -> Maybe [BodyPart]
+getEndNote s (Notes _ _ ens) = ens >>= (lookup s)
+
+elemToNotes :: NameSpaces -> String -> Element -> Maybe [(String, [BodyPart])]
+elemToNotes ns notetype element
+  | qName (elName element) == (notetype ++ "s") &&
+    qURI (elName element) == (lookup "w" ns) =
+      Just $ map fromJust
+      $ filter isJust
+      $ map (noteElemToNote ns)
+      $ findChildren (QName notetype (lookup "w" ns) (Just "w")) element
+elemToNotes _ _ _ = Nothing
+
+archiveToNotes :: Archive -> Notes
+archiveToNotes zf =
+  let fnElem = findEntryByPath "word/footnotes.xml" zf
+               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+      enElem = findEntryByPath "word/endnotes.xml" zf
+               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+      fn_namespaces = case fnElem of
+        Just e -> mapMaybe attrToNSPair (elAttribs e)
+        Nothing -> []
+      en_namespaces = case enElem of
+        Just e -> mapMaybe attrToNSPair (elAttribs e)
+        Nothing -> []
+      ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
+      fn = fnElem >>= (elemToNotes ns "footnote")
+      en = enElem >>= (elemToNotes ns "endnote")
+  in
+   Notes ns fn en
+
+
+data Relationship = Relationship (RelId, Target)
+                  deriving Show
+
+lookupRelationship :: RelId -> [Relationship] -> Maybe Target
+lookupRelationship relid rels =
+  lookup relid (map (\(Relationship pair) -> pair) rels)
+
+filePathIsRel :: FilePath -> Bool
+filePathIsRel fp =
+  let (dir, name) = splitFileName fp
+  in
+   (dir == "word/_rels/") && ((takeExtension name) == ".rels")
+
+relElemToRelationship :: Element -> Maybe Relationship
+relElemToRelationship element | qName (elName element) == "Relationship" =
+  do
+    relId <- findAttr (QName "Id" Nothing Nothing) element
+    target <- findAttr (QName "Target" Nothing Nothing) element
+    return $ Relationship (relId, target)
+relElemToRelationship _ = Nothing
+  
+
+archiveToRelationships :: Archive -> [Relationship]
+archiveToRelationships archive = 
+  let relPaths = filter filePathIsRel (filesInArchive archive)
+      entries  = map fromJust $ filter isJust $ map (\f -> findEntryByPath f archive) relPaths
+      relElems = map fromJust $ filter isJust $ map (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries
+      rels = map fromJust $ filter isJust $ map relElemToRelationship $ concatMap elChildren relElems
+  in
+   rels
+   
+data Body = Body [BodyPart]
+          deriving Show
+
+isParOrTbl :: NameSpaces -> QName -> Bool
+isParOrTbl ns q = qName q `elem` ["p", "tbl"] &&
+                  qURI q == (lookup "w" ns)
+
+elemToBody :: NameSpaces -> Element ->  Maybe Body
+elemToBody ns element | qName (elName element) == "body" && qURI (elName element) == (lookup "w" ns) =
+  Just $ Body
+  $ map fromJust
+  $ filter isJust
+  $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element
+elemToBody _ _ = Nothing
+
+isRunOrLinkOrBookmark :: NameSpaces -> QName ->  Bool
+isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] &&
+                   qURI q == (lookup "w" ns)
+
+elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String)
+elemToNumInfo ns element
+  | qName (elName element) == "p" &&
+    qURI (elName element) == (lookup "w" ns) =
+      do
+        pPr <- findChild (QName "pPr" (lookup "w" ns) (Just "w")) element
+        numPr <- findChild (QName "numPr" (lookup "w" ns) (Just "w")) pPr
+        lvl <- findChild (QName "ilvl" (lookup "w" ns) (Just "w")) numPr >>=
+                findAttr (QName "val" (lookup "w" ns) (Just "w"))
+        numId <- findChild (QName "numId" (lookup "w" ns) (Just "w")) numPr >>=
+                 findAttr (QName "val" (lookup "w" ns) (Just "w"))
+        return (numId, lvl)
+elemToNumInfo _ _ = Nothing
+
+-- isBookMarkTag :: NameSpaces -> QName -> Bool
+-- isBookMarkTag ns q = qName q `elem` ["bookmarkStart", "bookmarkEnd"] &&
+--                      qURI q == (lookup "w" ns)
+
+-- parChildrenToBookmark :: NameSpaces -> [Element] -> BookMark
+-- parChildrenToBookmark ns (bms : bme : _)
+--   | qName (elName bms) == "bookmarkStart" &&
+--     qURI (elName bms) == (lookup "w" ns) &&
+--     qName (elName bme) == "bookmarkEnd" &&
+--     qURI (elName bme) == (lookup "w" ns) = do
+--       bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) bms
+--       bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) bms
+--       return $ (bmId, bmName)
+-- parChildrenToBookmark _ _ = Nothing
+
+elemToBodyPart :: NameSpaces -> Element ->  Maybe BodyPart
+elemToBodyPart ns element
+  | qName (elName element) == "p" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let parstyle = elemToParagraphStyle ns element
+          parparts = mapMaybe id
+                     $ map (elemToParPart ns)
+                     $ filterChildrenName (isRunOrLinkOrBookmark ns) element
+      in
+       case elemToNumInfo ns element of
+         Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts
+         Nothing -> Just $ Paragraph parstyle parparts
+  | qName (elName element) == "tbl" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let
+        caption = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element
+                  >>= findChild (QName "tblCaption" (lookup "w" ns) (Just "w"))
+                  >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
+        grid = case
+          findChild (QName "tblGrid" (lookup "w" ns) (Just "w")) element
+          of
+            Just g -> elemToTblGrid ns g
+            Nothing -> []
+        tblLook = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element
+                  >>= findChild (QName "tblLook" (lookup "w" ns) (Just "w"))
+                  >>= elemToTblLook ns
+      in
+       Just $ Tbl
+       (fromMaybe "" caption)
+       grid
+       (fromMaybe defaultTblLook tblLook)
+       (mapMaybe (elemToRow ns) (elChildren element))
+  | otherwise = Nothing
+
+elemToTblLook :: NameSpaces -> Element -> Maybe TblLook
+elemToTblLook ns element
+  | qName (elName element) == "tblLook" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let firstRow = findAttr (QName "firstRow" (lookup "w" ns) (Just "w")) element
+          val = findAttr (QName "val" (lookup "w" ns) (Just "w")) element
+          firstRowFmt = 
+            case firstRow of
+              Just "1" -> True
+              Just  _  -> False
+              Nothing -> case val of
+                Just bitMask -> testBitMask bitMask 0x020
+                Nothing      -> False
+      in
+       Just $ TblLook{firstRowFormatting = firstRowFmt}
+elemToTblLook _ _ = Nothing
+
+testBitMask :: String -> Int -> Bool
+testBitMask bitMaskS n =
+  case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
+    []            -> False
+    ((n', _) : _) -> ((n' .|. n) /= 0)
+
+data ParagraphStyle = ParagraphStyle { pStyle :: [String]
+                                     , indent :: Maybe Integer
+                                     }
+                      deriving Show
+
+defaultParagraphStyle :: ParagraphStyle
+defaultParagraphStyle = ParagraphStyle { pStyle = []
+                                       , indent = Nothing
+                                       }
+
+elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle
+elemToParagraphStyle ns element =
+  case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of
+    Just pPr ->
+      ParagraphStyle
+      {pStyle =
+          mapMaybe id $
+          map
+          (findAttr (QName "val" (lookup "w" ns) (Just "w")))
+          (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr)
+      , indent =
+        findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>=
+        findAttr (QName "left" (lookup "w" ns) (Just "w")) >>=
+        stringToInteger
+        }
+    Nothing -> defaultParagraphStyle
+
+
+data BodyPart = Paragraph ParagraphStyle [ParPart]
+              | ListItem ParagraphStyle String String [ParPart]
+              | Tbl String TblGrid TblLook [Row]
+
+              deriving Show
+
+type TblGrid = [Integer]
+
+data TblLook = TblLook {firstRowFormatting::Bool}
+              deriving Show
+
+defaultTblLook :: TblLook
+defaultTblLook = TblLook{firstRowFormatting = False}
+
+stringToInteger :: String -> Maybe Integer
+stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
+
+elemToTblGrid :: NameSpaces -> Element -> TblGrid
+elemToTblGrid ns element
+  | qName (elName element) == "tblGrid" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let
+        cols = findChildren (QName "gridCol" (lookup "w" ns) (Just "w")) element
+      in
+       mapMaybe (\e ->
+                  findAttr (QName "val" (lookup "w" ns) (Just ("w"))) e
+                  >>= stringToInteger
+                )
+       cols
+elemToTblGrid _ _ = []
+
+data Row = Row [Cell]
+           deriving Show
+
+
+elemToRow :: NameSpaces -> Element -> Maybe Row
+elemToRow ns element
+  | qName (elName element) == "tr" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let 
+        cells = findChildren (QName "tc" (lookup "w" ns) (Just "w")) element
+      in
+       Just $ Row (mapMaybe (elemToCell ns) cells)
+elemToRow _ _ = Nothing
+
+data Cell = Cell [BodyPart]
+            deriving Show
+
+elemToCell :: NameSpaces -> Element -> Maybe Cell
+elemToCell ns element
+  | qName (elName element) == "tc" &&
+    qURI (elName element) == (lookup "w" ns) =
+      Just $ Cell (mapMaybe (elemToBodyPart ns) (elChildren element))
+elemToCell _ _ = Nothing
+
+data ParPart = PlainRun Run
+             | BookMark BookMarkId Anchor
+             | InternalHyperLink Anchor [Run]
+             | ExternalHyperLink RelId [Run]
+             | Drawing String
+             deriving Show
+
+data Run = Run RunStyle [RunElem]
+         | Footnote String 
+         | Endnote String
+           deriving Show
+
+data RunElem = TextRun String | LnBrk
+             deriving Show
+
+data RunStyle = RunStyle { isBold :: Bool
+                         , isItalic :: Bool
+                         , isSmallCaps :: Bool
+                         , isStrike :: Bool
+                         , isSuperScript :: Bool
+                         , isSubScript :: Bool
+                         , underline :: Maybe String
+                         , rStyle :: Maybe String }
+                deriving Show
+
+defaultRunStyle :: RunStyle
+defaultRunStyle = RunStyle { isBold = False
+                           , isItalic = False
+                           , isSmallCaps = False
+                           , isStrike = False
+                           , isSuperScript = False
+                           , isSubScript = False
+                           , underline = Nothing
+                           , rStyle = Nothing
+                           } 
+
+elemToRunStyle :: NameSpaces -> Element -> RunStyle
+elemToRunStyle ns element =
+  case findChild (QName "rPr" (lookup "w" ns) (Just "w")) element of
+    Just rPr ->
+      RunStyle
+      {
+        isBold = isJust $ findChild (QName "b" (lookup "w" ns) (Just "w")) rPr
+      , isItalic = isJust $ findChild (QName "i" (lookup "w" ns) (Just "w")) rPr
+      , isSmallCaps = isJust $ findChild (QName "smallCaps" (lookup "w" ns) (Just "w")) rPr
+      , isStrike = isJust $ findChild (QName "strike" (lookup "w" ns) (Just "w")) rPr
+      , isSuperScript =
+        (Just "superscript" ==
+        (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>=
+         findAttr (QName "val" (lookup "w" ns) (Just "w"))))
+      , isSubScript =
+        (Just "subscript" ==
+        (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>=
+         findAttr (QName "val" (lookup "w" ns) (Just "w"))))
+      , underline =
+        findChild (QName "u" (lookup "w" ns) (Just "w")) rPr >>=
+        findAttr (QName "val" (lookup "w" ns) (Just "w"))
+      , rStyle =
+        findChild (QName "rStyle" (lookup "w" ns) (Just "w")) rPr >>=
+        findAttr (QName "val" (lookup "w" ns) (Just "w"))
+        }
+    Nothing -> defaultRunStyle
+
+elemToRun :: NameSpaces -> Element -> Maybe Run
+elemToRun ns element
+  | qName (elName element) == "r" &&
+    qURI (elName element) == (lookup "w" ns) =
+      case
+        findChild (QName "footnoteReference" (lookup "w" ns) (Just "w")) element >>=
+        findAttr (QName "id" (lookup "w" ns) (Just "w"))
+      of
+        Just s -> Just $ Footnote s
+        Nothing ->
+          case
+            findChild (QName "endnoteReference" (lookup "w" ns) (Just "w")) element >>=
+            findAttr (QName "id" (lookup "w" ns) (Just "w"))
+          of
+            Just s -> Just $ Endnote s 
+            Nothing ->  Just $
+                        Run (elemToRunStyle ns element)
+                        (elemToRunElems ns element)
+elemToRun _ _ = Nothing
+
+elemToRunElem :: NameSpaces -> Element -> Maybe RunElem
+elemToRunElem ns element
+  | qName (elName element) == "t" &&
+    qURI (elName element) == (lookup "w" ns) =
+      Just $ TextRun (strContent element)
+  | qName (elName element) == "br" &&
+    qURI (elName element) == (lookup "w" ns) =
+      Just $ LnBrk
+  | otherwise = Nothing
+
+
+elemToRunElems :: NameSpaces -> Element -> [RunElem]
+elemToRunElems ns element
+  | qName (elName element) == "r" &&
+    qURI (elName element) == (lookup "w" ns) =
+      mapMaybe (elemToRunElem ns) (elChildren element)
+  | otherwise = []
+
+elemToDrawing :: NameSpaces -> Element -> Maybe ParPart
+elemToDrawing ns element
+  | qName (elName element) == "drawing" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
+      in
+       findElement (QName "blip" (Just a_ns) (Just "a")) element
+       >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
+       >>= (\s -> Just $ Drawing s)
+elemToDrawing _ _ = Nothing
+
+
+elemToParPart :: NameSpaces -> Element -> Maybe ParPart
+elemToParPart ns element
+  | qName (elName element) == "r" &&
+    qURI (elName element) == (lookup "w" ns) =
+      case findChild (QName "drawing" (lookup "w" ns) (Just "w")) element of
+        Just drawingElem -> elemToDrawing ns drawingElem
+        Nothing -> do
+          r <- elemToRun ns element
+          return $ PlainRun r
+elemToParPart ns element
+  | qName (elName element) == "bookmarkStart" &&
+    qURI (elName element) == (lookup "w" ns) = do
+      bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element
+      bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) element
+      return $ BookMark bmId bmName
+elemToParPart ns element
+  | qName (elName element) == "hyperlink" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let runs = map fromJust $ filter isJust $ map (elemToRun ns)
+                 $ findChildren (QName "r" (lookup "w" ns) (Just "w")) element
+      in
+       case findAttr (QName "anchor" (lookup "w" ns) (Just "w")) element of
+         Just anchor ->
+          Just $ InternalHyperLink anchor runs
+         Nothing ->
+           case findAttr (QName "id" (lookup "r" ns) (Just "r")) element of
+             Just relId -> Just $ ExternalHyperLink relId runs
+             Nothing    -> Nothing
+elemToParPart _ _ = Nothing
+
+type Target = String
+type Anchor = String
+type BookMarkId = String
+type RelId = String
+               
-- 
cgit v1.2.3


From 3bc818d2d3079f1d31dbb409f839585a32f26f6e Mon Sep 17 00:00:00 2001
From: mpickering <matthewtpickering@gmail.com>
Date: Sun, 15 Jun 2014 13:38:16 +0100
Subject: Integrated the docx reader into the main pandoc program.

Changes also include generalising the types of reader allowed. The
mechanism now mimics the more general output mechanism.
---
 src/Text/Pandoc.hs | 56 +++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 36 insertions(+), 20 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 130338f0e..aff471a3c 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -62,6 +62,8 @@ module Text.Pandoc
                , readers
                , writers
                -- * Readers: converting /to/ Pandoc format
+               , Reader (..)
+               , readDocX
                , readMarkdown
                , readMediaWiki
                , readRST
@@ -125,6 +127,7 @@ import Text.Pandoc.Readers.HTML
 import Text.Pandoc.Readers.Textile
 import Text.Pandoc.Readers.Native
 import Text.Pandoc.Readers.Haddock
+import Text.Pandoc.Readers.DocX
 import Text.Pandoc.Writers.Native
 import Text.Pandoc.Writers.Markdown
 import Text.Pandoc.Writers.RST
@@ -192,24 +195,34 @@ markdown o s = do
   mapM_ warn warnings
   return doc
 
+data Reader = StringReader (ReaderOptions -> String -> IO Pandoc)
+              | ByteStringReader (ReaderOptions -> BL.ByteString -> IO Pandoc)
+
+mkStringReader :: (ReaderOptions -> String -> Pandoc) -> Reader
+mkStringReader r = StringReader (\o s -> return $ r o s)
+
+mkBSReader :: (ReaderOptions -> BL.ByteString -> Pandoc) -> Reader
+mkBSReader r = ByteStringReader (\o s -> return $ r o s)
+
 -- | Association list of formats and readers.
-readers :: [(String, ReaderOptions -> String -> IO Pandoc)]
-readers = [ ("native"       , \_ s -> return $ readNative s)
-           ,("json"         , \o s -> return $ readJSON o s)
-           ,("markdown"     , markdown)
-           ,("markdown_strict" , markdown)
-           ,("markdown_phpextra" , markdown)
-           ,("markdown_github" , markdown)
-           ,("markdown_mmd",  markdown)
-           ,("rst"          , \o s -> return $ readRST o s)
-           ,("mediawiki"    , \o s -> return $ readMediaWiki o s)
-           ,("docbook"      , \o s -> return $ readDocBook o s)
-           ,("opml"         , \o s -> return $ readOPML o s)
-           ,("org"          , \o s -> return $ readOrg o s)
-           ,("textile"      , \o s -> return $ readTextile o s) -- TODO : textile+lhs
-           ,("html"         , \o s -> return $ readHtml o s)
-           ,("latex"        , \o s -> return $ readLaTeX o s)
-           ,("haddock"      , \o s -> return $ readHaddock o s)
+readers :: [(String, Reader)]
+readers = [ ("native"       , StringReader $ \_ s -> return $ readNative s)
+           ,("json"         , mkStringReader readJSON )
+           ,("markdown"     , StringReader  markdown)
+           ,("markdown_strict" , StringReader markdown)
+           ,("markdown_phpextra" , StringReader markdown)
+           ,("markdown_github" , StringReader markdown)
+           ,("markdown_mmd",  StringReader markdown)
+           ,("rst"          , mkStringReader readRST )
+           ,("mediawiki"    , mkStringReader readMediaWiki)
+           ,("docbook"      , mkStringReader readDocBook)
+           ,("opml"         , mkStringReader readOPML)
+           ,("org"          , mkStringReader readOrg)
+           ,("textile"      , mkStringReader readTextile) -- TODO : textile+lhs
+           ,("html"         , mkStringReader readHtml)
+           ,("latex"        , mkStringReader readLaTeX)
+           ,("haddock"      , mkStringReader readHaddock)
+           ,("docx"         , mkBSReader readDocX)
            ]
 
 data Writer = PureStringWriter   (WriterOptions -> Pandoc -> String)
@@ -280,14 +293,17 @@ getDefaultExtensions "textile"         = Set.fromList [Ext_auto_identifiers, Ext
 getDefaultExtensions _                 = Set.fromList [Ext_auto_identifiers]
 
 -- | Retrieve reader based on formatSpec (format+extensions).
-getReader :: String -> Either String (ReaderOptions -> String -> IO Pandoc)
+getReader :: String -> Either String Reader
 getReader s =
   case parseFormatSpec s of
        Left e  -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e]
-       Right (readerName, setExts) ->
+       Right (readerName, setExts) -> 
            case lookup readerName readers of
                    Nothing  -> Left $ "Unknown reader: " ++ readerName
-                   Just  r  -> Right $ \o ->
+                   Just  (StringReader r)  -> Right $ StringReader $ \o ->
+                                  r o{ readerExtensions = setExts $
+                                            getDefaultExtensions readerName }
+                   Just (ByteStringReader r) -> Right $ ByteStringReader $ \o ->
                                   r o{ readerExtensions = setExts $
                                             getDefaultExtensions readerName }
 
-- 
cgit v1.2.3


From 7807564d4493af1462d61138c63d8ec365abc792 Mon Sep 17 00:00:00 2001
From: mpickering <matthewtpickering@gmail.com>
Date: Mon, 16 Jun 2014 20:45:54 +0100
Subject: Moved extractSpaces to Shared.hs

Generalised and move the extractSpaces function from `HTML.hs` to
`Shared.hs` so that the docx reader can also use it.
---
 src/Text/Pandoc/Readers/HTML.hs | 17 ++++-------------
 src/Text/Pandoc/Shared.hs       | 16 ++++++++++++++++
 2 files changed, 20 insertions(+), 13 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 905e55b22..9cdc5a567 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -50,7 +50,6 @@ import Data.Char ( isDigit )
 import Control.Monad ( liftM, guard, when, mzero )
 import Control.Applicative ( (<$>), (<$), (<*) )
 import Data.Monoid
-import Data.Sequence (ViewL(..), ViewR(..), viewr, viewl)
 
 isSpace :: Char -> Bool
 isSpace ' '  = True
@@ -369,9 +368,9 @@ pQ = do
                              then InSingleQuote
                              else InDoubleQuote
   let constructor = case quoteType of
-                            SingleQuote -> B.singleQuoted 
+                            SingleQuote -> B.singleQuoted
                             DoubleQuote -> B.doubleQuoted
-  withQuoteContext innerQuoteContext $ 
+  withQuoteContext innerQuoteContext $
     pInlinesInTags "q" constructor
 
 pEmph :: TagParser Inlines
@@ -406,7 +405,7 @@ pLink = try $ do
   let url = fromAttrib "href" tag
   let title = fromAttrib "title" tag
   lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a")
-  return $ B.link (escapeURI url) title lab 
+  return $ B.link (escapeURI url) title lab
 
 pImage :: TagParser Inlines
 pImage = do
@@ -439,15 +438,7 @@ pRawHtmlInline = do
 
 pInlinesInTags :: String -> (Inlines -> Inlines)
                -> TagParser Inlines
-pInlinesInTags tagtype f = do
-  contents <- B.unMany <$> pInTags tagtype inline
-  let left  = case viewl contents of
-                    (Space :< _) -> B.space
-                    _            -> mempty
-  let right = case viewr contents of
-                    (_ :> Space) -> B.space
-                    _            -> mempty
-  return (left <> f (trimInlines . B.Many $ contents) <> right)
+pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
 
 pInTags :: (Monoid a) => String -> TagParser a
         -> TagParser a
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index b0adf55f5..5b0d9b6b4 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -53,6 +53,7 @@ module Text.Pandoc.Shared (
                      -- * Pandoc block and inline list processing
                      orderedListMarkers,
                      normalizeSpaces,
+                     extractSpaces,
                      normalize,
                      stringify,
                      compactify,
@@ -113,6 +114,7 @@ import qualified Data.ByteString as BS
 import qualified Data.ByteString.Char8 as B8
 import Text.Pandoc.Compat.Monoid
 import Data.ByteString.Base64 (decodeLenient)
+import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
 
 #ifdef EMBED_DATA_FILES
 import Text.Pandoc.Data (dataFiles)
@@ -331,6 +333,20 @@ isSpaceOrEmpty Space = True
 isSpaceOrEmpty (Str "") = True
 isSpaceOrEmpty _ = False
 
+-- | Extract the leading and trailing spaces from inside an inline element
+-- and place them outside the element. 
+
+extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
+extractSpaces f is = 
+  let contents = B.unMany is
+      left  = case viewl contents of
+                    (Space :< _) -> B.space
+                    _            -> mempty
+      right = case viewr contents of
+                    (_ :> Space) -> B.space
+                    _            -> mempty in
+  (left <> f (B.trimInlines . B.Many $ contents) <> right)
+
 -- | Normalize @Pandoc@ document, consolidating doubled 'Space's,
 -- combining adjacent 'Str's and 'Emph's, remove 'Null's and
 -- empty elements, etc.
-- 
cgit v1.2.3


From 01ef573ac2f6620e9f70ae8965e5ccc664e3aec4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 16 Jun 2014 14:18:06 -0700
Subject: Org reader:  fixed #1342.

This change rewrites `inlineLaTeXCommand` so that parsec will
know when input is being consumed.  Previously a run-time
error would be produced with some input involving raw latex.
(I believe this does not affect the last release, as the inline
latex reading was added recently.)
---
 src/Text/Pandoc/Readers/Org.hs | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index c3ea8d7c2..0e872abf0 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -38,10 +38,9 @@ import qualified Text.Pandoc.Parsing as P
 import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
                                             , newline, orderedListMarker
                                             , parseFromString
-                                            , updateLastStrPos )
+                                            )
 import           Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
 import           Text.Pandoc.Shared (compactify', compactify'DL)
-import           Text.Parsec.Pos (updatePosString)
 import           Text.TeXMath (texMathToPandoc, DisplayType(..))
 
 import           Control.Applicative ( Applicative, pure
@@ -148,10 +147,6 @@ resetBlockAttributes :: OrgParser ()
 resetBlockAttributes = updateState $ \s ->
   s{ orgStateBlockAttributes = orgStateBlockAttributes def }
 
-updateLastStrPos :: OrgParser ()
-updateLastStrPos = getPosition >>= \p ->
-  updateState $ \s -> s{ orgStateLastStrPos = Just p }
-
 updateLastForbiddenCharPos :: OrgParser ()
 updateLastForbiddenCharPos = getPosition >>= \p ->
   updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
@@ -1376,8 +1371,9 @@ maybeRight = either (const Nothing) Just
 inlineLaTeXCommand :: OrgParser String
 inlineLaTeXCommand = try $ do
   rest <- getInput
-  pos <- getPosition
   case runParser rawLaTeXInline def "source" rest of
-    Right (RawInline _ cs) -> cs <$ (setInput $ drop (length cs) rest)
-                                 <* (setPosition $ updatePosString pos cs)
+    Right (RawInline _ cs) -> do
+      let len = length cs
+      count len anyChar
+      return cs
     _ -> mzero
-- 
cgit v1.2.3


From 31fd843133ee9482b6af353a7d793cae18929425 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 16 Jun 2014 15:41:23 -0700
Subject: HTML reader: Fixed major parsing problem with HTML tables.

Table cells were being combined into one cell.  Closes #1341.
---
 src/Text/Pandoc/Readers/HTML.hs | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 9cdc5a567..d27afc543 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -238,30 +238,26 @@ pTable = try $ do
   caption <- option mempty $ pInTags "caption" inline >>~ skipMany pBlank
   -- TODO actually read these and take width information from them
   widths' <- pColgroup <|> many pCol
-  head' <- option mempty $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
+  head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
   skipMany pBlank
   rows <- pOptInTag "tbody"
           $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td")
   skipMany pBlank
   TagClose _ <- pSatisfy (~== TagClose "table")
-  let isSinglePlain []        = True
-      isSinglePlain [Plain _] = True
-      isSinglePlain _         = False
-  let lHead = B.toList head'
-  let lRows = map B.toList rows
-  let isSimple = all isSinglePlain (lHead:lRows)
-  let cols = length $ if null lHead
-                         then head lRows
-                         else lHead
+  let isSinglePlain x = case B.toList x of
+                             [Plain _] -> True
+                             _         -> False
+  let isSimple = all isSinglePlain $ concat (head':rows)
+  let cols = length $ if null head' then head rows else head'
   -- fail if there are colspans or rowspans
-  guard $ all (\r -> length r == cols) lRows
-  let aligns = replicate cols AlignLeft
+  guard $ all (\r -> length r == cols) rows
+  let aligns = replicate cols AlignDefault
   let widths = if null widths'
                   then if isSimple
                        then replicate cols 0
                        else replicate cols (1.0 / fromIntegral cols)
                   else widths'
-  return $ B.table caption (zip aligns widths) [head'] [rows]
+  return $ B.table caption (zip aligns widths) head' rows
 
 pCol :: TagParser Double
 pCol = try $ do
@@ -279,12 +275,12 @@ pColgroup = try $ do
   skipMany pBlank
   manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank
 
-pCell :: String -> TagParser Blocks
+pCell :: String -> TagParser [Blocks]
 pCell celltype = try $ do
   skipMany pBlank
   res <- pInTags celltype block
   skipMany pBlank
-  return res
+  return [res]
 
 pBlockQuote :: TagParser Blocks
 pBlockQuote = do
-- 
cgit v1.2.3


From 459805de4c4e7129dc624086a654febb161f99ad Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 16 Jun 2014 17:43:56 -0700
Subject: LaTeX reader:  don't assume preamble doesn't contain environments.

Closes #1338.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 6b5958920..9e7a38b8f 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1134,7 +1134,7 @@ paragraph = do
 
 preamble :: LP Blocks
 preamble = mempty <$> manyTill preambleBlock beginDoc
-  where beginDoc = lookAhead $ controlSeq "begin" *> string "{document}"
+  where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}"
         preambleBlock =  (void comment)
                      <|> (void sp)
                      <|> (void blanklines)
-- 
cgit v1.2.3


From 87c08be58f54ae60cfd57be6c17a256cbdb105d6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 16 Jun 2014 19:18:33 -0700
Subject: LaTeX reader:  handle leading/trailing spaces in emph better.

`\emph{ hi }` gets parsed as `[Space, Emph [Str "hi"], Space]`
so that we don't get things like `* hi *` in markdown output.

Also applies to textbf and some other constructions.

Closes #1146.  (`--normalize` isn't touched by this, but
normalization should not generally be necessary with the
changes to the readers.)
---
 src/Text/Pandoc/Readers/LaTeX.hs | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 9e7a38b8f..3c4d4ee52 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -397,18 +397,18 @@ isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands
 
 inlineCommands :: M.Map String (LP Inlines)
 inlineCommands = M.fromList $
-  [ ("emph", emph <$> tok)
-  , ("textit", emph <$> tok)
-  , ("textsl", emph <$> tok)
-  , ("textsc", smallcaps <$> tok)
-  , ("sout", strikeout <$> tok)
-  , ("textsuperscript", superscript <$> tok)
-  , ("textsubscript", subscript <$> tok)
+  [ ("emph", extractSpaces emph <$> tok)
+  , ("textit", extractSpaces emph <$> tok)
+  , ("textsl", extractSpaces emph <$> tok)
+  , ("textsc", extractSpaces smallcaps <$> tok)
+  , ("sout", extractSpaces strikeout <$> tok)
+  , ("textsuperscript", extractSpaces superscript <$> tok)
+  , ("textsubscript", extractSpaces subscript <$> tok)
   , ("textbackslash", lit "\\")
   , ("backslash", lit "\\")
   , ("slash", lit "/")
-  , ("textbf", strong <$> tok)
-  , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
+  , ("textbf", extractSpaces strong <$> tok)
+  , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
   , ("ldots", lit "…")
   , ("dots", lit "…")
   , ("mdots", lit "…")
@@ -428,15 +428,15 @@ inlineCommands = M.fromList $
   , ("{", lit "{")
   , ("}", lit "}")
   -- old TeX commands
-  , ("em", emph <$> inlines)
-  , ("it", emph <$> inlines)
-  , ("sl", emph <$> inlines)
-  , ("bf", strong <$> inlines)
+  , ("em", extractSpaces emph <$> inlines)
+  , ("it", extractSpaces emph <$> inlines)
+  , ("sl", extractSpaces emph <$> inlines)
+  , ("bf", extractSpaces strong <$> inlines)
   , ("rm", inlines)
-  , ("itshape", emph <$> inlines)
-  , ("slshape", emph <$> inlines)
-  , ("scshape", smallcaps <$> inlines)
-  , ("bfseries", strong <$> inlines)
+  , ("itshape", extractSpaces emph <$> inlines)
+  , ("slshape", extractSpaces emph <$> inlines)
+  , ("scshape", extractSpaces smallcaps <$> inlines)
+  , ("bfseries", extractSpaces strong <$> inlines)
   , ("/", pure mempty) -- italic correction
   , ("aa", lit "å")
   , ("AA", lit "Å")
-- 
cgit v1.2.3


From 9da5d8955ecc090d1582a9d007a0ecace654d229 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 16 Jun 2014 20:48:55 -0700
Subject: Markdown reader: fixed #1333 (table parsing bug).

---
 src/Text/Pandoc/Readers/Markdown.hs | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index caa938ed6..c20a2a1fc 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1117,13 +1117,14 @@ multilineTable headless =
 multilineTableHeader :: Bool -- ^ Headerless table
                      -> MarkdownParser (F [Blocks], [Alignment], [Int])
 multilineTableHeader headless = try $ do
-  if headless
-     then return '\n'
-     else tableSep >>~ notFollowedBy blankline
+  unless headless $
+     tableSep >> notFollowedBy blankline
   rawContent  <- if headless
                     then return $ repeat ""
-                    else many1
-                         (notFollowedBy tableSep >> many1Till anyChar newline)
+                    else many1 $ do
+                          notFollowedBy blankline
+                          notFollowedBy tableSep
+                          anyLine
   initSp      <- nonindentSpaces
   dashes      <- many1 (dashedLine '-')
   newline
-- 
cgit v1.2.3


From f9b97e6bfb1d26bd328cdbb1ca83c4558e7f4a0c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 16 Jun 2014 21:26:50 -0700
Subject: Small improvement to fix to #1333.

This allows blank lines at end of multiline headers.
---
 src/Text/Pandoc/Readers/Markdown.hs | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index c20a2a1fc..a6720beba 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1121,10 +1121,7 @@ multilineTableHeader headless = try $ do
      tableSep >> notFollowedBy blankline
   rawContent  <- if headless
                     then return $ repeat ""
-                    else many1 $ do
-                          notFollowedBy blankline
-                          notFollowedBy tableSep
-                          anyLine
+                    else many1 $ notFollowedBy tableSep >> anyLine
   initSp      <- nonindentSpaces
   dashes      <- many1 (dashedLine '-')
   newline
-- 
cgit v1.2.3


From 78ee2416d105bd25337819a49835623a8a296224 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 16 Jun 2014 22:03:26 -0700
Subject: Org reader:  make tildes create inline code.

Closes #1345.  Also relabeled 'code' and 'verbatim' parsers
to accord with the org-mode manual.

I'm not sure what the distinction between code and verbatim
is supposed to be, but I'm pretty sure both should be represented
as Code inlines in pandoc.  The previous behavior resulted in the
text not appearing in any output format.
---
 src/Text/Pandoc/Readers/Org.hs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 0e872abf0..7a35e2ca0 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1148,11 +1148,11 @@ strikeout = fmap B.strikeout    <$> emphasisBetween '+'
 underline :: OrgParser (F Inlines)
 underline = fmap B.strong       <$> emphasisBetween '_'
 
-code      :: OrgParser (F Inlines)
-code      = return . B.code         <$> verbatimBetween '='
-
 verbatim  :: OrgParser (F Inlines)
-verbatim  = return . B.rawInline "" <$> verbatimBetween '~'
+verbatim  = return . B.code     <$> verbatimBetween '='
+
+code      :: OrgParser (F Inlines)
+code      = return . B.code     <$> verbatimBetween '~'
 
 subscript   :: OrgParser (F Inlines)
 subscript   = fmap B.subscript   <$> try (char '_' *> subOrSuperExpr)
-- 
cgit v1.2.3


From bbe99003f8d25dc65ab12851907ecd5d9aad746c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 16 Jun 2014 22:44:40 -0700
Subject: Naming: Use Docx instead of DocX.

For consistency with the existing writer.
---
 src/Text/Pandoc.hs                    |   6 +-
 src/Text/Pandoc/Readers/DocX.hs       | 479 ---------------------------
 src/Text/Pandoc/Readers/DocX/Lists.hs | 208 ------------
 src/Text/Pandoc/Readers/DocX/Parse.hs | 604 ----------------------------------
 src/Text/Pandoc/Readers/Docx.hs       | 479 +++++++++++++++++++++++++++
 src/Text/Pandoc/Readers/Docx/Lists.hs | 208 ++++++++++++
 src/Text/Pandoc/Readers/Docx/Parse.hs | 604 ++++++++++++++++++++++++++++++++++
 7 files changed, 1294 insertions(+), 1294 deletions(-)
 delete mode 100644 src/Text/Pandoc/Readers/DocX.hs
 delete mode 100644 src/Text/Pandoc/Readers/DocX/Lists.hs
 delete mode 100644 src/Text/Pandoc/Readers/DocX/Parse.hs
 create mode 100644 src/Text/Pandoc/Readers/Docx.hs
 create mode 100644 src/Text/Pandoc/Readers/Docx/Lists.hs
 create mode 100644 src/Text/Pandoc/Readers/Docx/Parse.hs

(limited to 'src/Text')

diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index aff471a3c..45c2f453b 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -63,7 +63,7 @@ module Text.Pandoc
                , writers
                -- * Readers: converting /to/ Pandoc format
                , Reader (..)
-               , readDocX
+               , readDocx
                , readMarkdown
                , readMediaWiki
                , readRST
@@ -127,7 +127,7 @@ import Text.Pandoc.Readers.HTML
 import Text.Pandoc.Readers.Textile
 import Text.Pandoc.Readers.Native
 import Text.Pandoc.Readers.Haddock
-import Text.Pandoc.Readers.DocX
+import Text.Pandoc.Readers.Docx
 import Text.Pandoc.Writers.Native
 import Text.Pandoc.Writers.Markdown
 import Text.Pandoc.Writers.RST
@@ -222,7 +222,7 @@ readers = [ ("native"       , StringReader $ \_ s -> return $ readNative s)
            ,("html"         , mkStringReader readHtml)
            ,("latex"        , mkStringReader readLaTeX)
            ,("haddock"      , mkStringReader readHaddock)
-           ,("docx"         , mkBSReader readDocX)
+           ,("docx"         , mkBSReader readDocx)
            ]
 
 data Writer = PureStringWriter   (WriterOptions -> Pandoc -> String)
diff --git a/src/Text/Pandoc/Readers/DocX.hs b/src/Text/Pandoc/Readers/DocX.hs
deleted file mode 100644
index 976e2e271..000000000
--- a/src/Text/Pandoc/Readers/DocX.hs
+++ /dev/null
@@ -1,479 +0,0 @@
-{-
-Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
--}
-
-{- |
-   Module      : Text.Pandoc.Readers.DocX
-   Copyright   : Copyright (C) 2014 Jesse Rosenthal
-   License     : GNU GPL, version 2 or above
-
-   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
-   Stability   : alpha
-   Portability : portable
-
-Conversion of DocX type (defined in Text.Pandoc.Readers.DocX.Parse)
-to 'Pandoc' document.  -}
-
-{-
-Current state of implementation of DocX entities ([x] means
-implemented, [-] means partially implemented):
-
-* Blocks
-
-  - [X] Para
-  - [X] CodeBlock (styled with `SourceCode`)
-  - [X] BlockQuote (styled with `Quote`, `BlockQuote`, or, optionally,
-        indented)
-  - [X] OrderedList
-  - [X] BulletList
-  - [X] DefinitionList (styled with adjacent `DefinitionTerm` and `Definition`)
-  - [X] Header (styled with `Heading#`)
-  - [ ] HorizontalRule
-  - [-] Table (column widths and alignments not yet implemented)
-
-* Inlines
-
-  - [X] Str
-  - [X] Emph (From italics. `underline` currently read as span. In
-        future, it might optionally be emph as well)
-  - [X] Strong
-  - [X] Strikeout
-  - [X] Superscript
-  - [X] Subscript
-  - [X] SmallCaps
-  - [ ] Quoted
-  - [ ] Cite
-  - [X] Code (styled with `VerbatimChar`)
-  - [X] Space
-  - [X] LineBreak (these are invisible in Word: entered with Shift-Return)
-  - [ ] Math
-  - [X] Link (links to an arbitrary bookmark create a span with the target as
-        id and "anchor" class)
-  - [-] Image (Links to path in archive. Future option for
-        data-encoded URI likely.)
-  - [X] Note (Footnotes and Endnotes are silently combined.)
--}
-
-module Text.Pandoc.Readers.DocX
-       ( readDocX
-       ) where
-
-import Codec.Archive.Zip
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Builder (text, toList)
-import Text.Pandoc.Generic (bottomUp)
-import Text.Pandoc.MIME (getMimeType)
-import Text.Pandoc.UTF8 (toString)
-import Text.Pandoc.Readers.DocX.Parse
-import Text.Pandoc.Readers.DocX.Lists
-import Data.Maybe (mapMaybe, isJust, fromJust)
-import Data.List (delete, isPrefixOf, (\\), intersect)
-import qualified Data.ByteString as BS
-import qualified Data.ByteString.Lazy as B
-import Data.ByteString.Base64 (encode)
-import System.FilePath (combine)
-
-readDocX :: ReaderOptions
-         -> B.ByteString
-         -> Pandoc
-readDocX opts bytes =
-  case archiveToDocX (toArchive bytes) of
-    Just docx -> Pandoc nullMeta (docxToBlocks opts docx)
-    Nothing   -> error $ "couldn't parse docx file"
-
-runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)])
-runStyleToSpanAttr rPr = ("",
-                          mapMaybe id [
-                            if isBold rPr then (Just "strong") else Nothing,
-                            if isItalic rPr then (Just "emph") else Nothing,
-                            if isSmallCaps rPr then (Just "smallcaps") else Nothing,
-                            if isStrike rPr then (Just "strike") else Nothing,
-                            if isSuperScript rPr then (Just "superscript") else Nothing,
-                            if isSubScript rPr then (Just "subscript") else Nothing,
-                            rStyle rPr],
-                          case underline rPr of
-                            Just fmt -> [("underline", fmt)]
-                            _        -> []
-                         )
-
-parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)])
-parStyleToDivAttr pPr = ("",
-                          pStyle pPr,
-                          case indent pPr of
-                            Just n  -> [("indent", (show n))]
-                            Nothing -> []
-                         )
-
-strToInlines :: String -> [Inline]
-strToInlines = toList . text
-
-codeSpans :: [String]
-codeSpans = ["VerbatimChar"]
-
-blockQuoteDivs :: [String]
-blockQuoteDivs = ["Quote", "BlockQuote"]
-
-codeDivs :: [String]
-codeDivs = ["SourceCode"]
-
-runElemToInlines :: RunElem -> [Inline]
-runElemToInlines (TextRun s) = strToInlines s
-runElemToInlines (LnBrk) = [LineBreak]
-
-runElemToString :: RunElem -> String
-runElemToString (TextRun s) = s
-runElemToString (LnBrk) = ['\n']
-
-runElemsToString :: [RunElem] -> String
-runElemsToString = concatMap runElemToString
-
-strNormalize :: [Inline] -> [Inline]
-strNormalize [] = []
-strNormalize (Str "" : ils) = strNormalize ils
-strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l)
-strNormalize (il:ils) = il : (strNormalize ils)
-
-runToInlines :: ReaderOptions -> DocX -> Run -> [Inline]
-runToInlines _ _ (Run rs runElems) 
-  | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans =
-    case runStyleToSpanAttr rs == ("", [], []) of
-      True -> [Str (runElemsToString runElems)]
-      False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]]
-  | otherwise = case runStyleToSpanAttr rs == ("", [], []) of
-      True -> concatMap runElemToInlines runElems
-      False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)]
-runToInlines opts docx@(DocX _ notes _ _ _ ) (Footnote fnId) =
-  case (getFootNote fnId notes) of
-    Just bodyParts ->
-      [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]]
-    Nothing        ->
-      [Note [Div ("", ["footnote"], []) []]]
-runToInlines opts docx@(DocX _ notes _ _ _) (Endnote fnId) =
-  case (getEndNote fnId notes) of
-    Just bodyParts ->
-      [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]]
-    Nothing        ->
-      [Note [Div ("", ["endnote"], []) []]]
-
-parPartToInlines :: ReaderOptions -> DocX -> ParPart -> [Inline]
-parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r
-parPartToInlines _ _ (BookMark _ anchor) =
-  [Span (anchor, ["anchor"], []) []]
-parPartToInlines _ (DocX _ _ _ rels _) (Drawing relid) =
-  case lookupRelationship relid rels of
-    Just target -> [Image [] (combine "word" target, "")]
-    Nothing     -> [Image [] ("", "")]
-parPartToInlines opts docx (InternalHyperLink anchor runs) =
-  [Link (concatMap (runToInlines opts docx) runs) ('#' : anchor, "")]
-parPartToInlines opts docx@(DocX _ _ _ rels _) (ExternalHyperLink relid runs) =
-  case lookupRelationship relid rels of
-    Just target ->
-      [Link (concatMap (runToInlines opts docx) runs) (target, "")]
-    Nothing ->
-      [Link (concatMap (runToInlines opts docx) runs) ("", "")]
-
-isAnchorSpan :: Inline -> Bool
-isAnchorSpan (Span (ident, classes, kvs) ils) =
-  (not . null) ident &&
-  classes == ["anchor"] &&
-  null kvs &&
-  null ils
-isAnchorSpan _ = False
-
-dummyAnchors :: [String]
-dummyAnchors = ["_GoBack"]
-
-makeHeaderAnchors :: Block -> Block
-makeHeaderAnchors h@(Header n (_, classes, kvs) ils) =
-  case filter isAnchorSpan ils of
-    []   -> h
-    (x@(Span (ident, _, _) _) : xs) ->
-      case ident `elem` dummyAnchors of
-        True -> h
-        False -> Header n (ident, classes, kvs) (ils \\ (x:xs))
-    _ -> h
-makeHeaderAnchors blk = blk
-  
-
-parPartsToInlines :: ReaderOptions -> DocX -> [ParPart] -> [Inline]
-parPartsToInlines opts docx parparts =
-  --
-  -- We're going to skip data-uri's for now. It should be an option,
-  -- not mandatory.
-  --
-  --bottomUp (makeImagesSelfContained docx) $
-  bottomUp spanCorrect $
-  bottomUp spanTrim $
-  bottomUp spanReduce $
-  concatMap (parPartToInlines opts docx) parparts
-
-cellToBlocks :: ReaderOptions -> DocX -> Cell -> [Block]
-cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps
-
-rowToBlocksList :: ReaderOptions -> DocX -> Row -> [[Block]]
-rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells
-
-bodyPartToBlock :: ReaderOptions -> DocX -> BodyPart -> Block
-bodyPartToBlock opts docx (Paragraph pPr parparts) =
-  Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)]
-bodyPartToBlock opts docx@(DocX _ _ numbering _ _) (ListItem pPr numId lvl parparts) =
-  let
-    kvs = case lookupLevel numId lvl numbering of
-      Just (_, fmt, txt, Just start) -> [ ("level", lvl)
-                                        , ("num-id", numId)
-                                        , ("format", fmt)
-                                        , ("text", txt)
-                                        , ("start", (show start))
-                                        ]
-      
-      Just (_, fmt, txt, Nothing)    -> [ ("level", lvl)
-                                        , ("num-id", numId)
-                                        , ("format", fmt)
-                                        , ("text", txt)
-                                        ]
-      Nothing                        -> []
-  in
-   Div
-   ("", ["list-item"], kvs)
-   [bodyPartToBlock opts docx (Paragraph pPr parparts)]
-bodyPartToBlock _ _ (Tbl _ _ _ []) =
-  Para []
-bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) =
-  let caption = strToInlines cap
-      (hdr, rows) = case firstRowFormatting look of
-        True -> (Just r, rs)
-        False -> (Nothing, r:rs)
-      hdrCells = case hdr of
-        Just r' -> rowToBlocksList opts docx r'
-        Nothing -> []
-      cells = map (rowToBlocksList opts docx) rows
-      
-      size = case null hdrCells of
-        True -> length $ head cells
-        False -> length $ hdrCells
-      --
-      -- The two following variables (horizontal column alignment and
-      -- relative column widths) go to the default at the
-      -- moment. Width information is in the TblGrid field of the Tbl,
-      -- so should be possible. Alignment might be more difficult,
-      -- since there doesn't seem to be a column entity in docx.
-      alignments = take size (repeat AlignDefault)
-      widths = take size (repeat 0) :: [Double]
-  in
-   Table caption alignments widths hdrCells cells
-
-makeImagesSelfContained :: DocX -> Inline -> Inline
-makeImagesSelfContained (DocX _ _ _ _ media) i@(Image alt (uri, title)) =
-  case lookup uri media of
-    Just bs -> case getMimeType uri of
-      Just mime ->  let data_uri =
-                          "data:" ++ mime ++ ";base64," ++ toString (encode $ BS.concat $ B.toChunks bs)
-                    in
-                     Image alt (data_uri, title)
-      Nothing  -> i
-    Nothing -> i
-makeImagesSelfContained _ inline = inline
-
-bodyToBlocks :: ReaderOptions -> DocX -> Body -> [Block]
-bodyToBlocks opts docx (Body bps) =
-  bottomUp removeEmptyPars $
-  bottomUp strNormalize $ 
-  bottomUp spanRemove $ 
-  bottomUp divRemove $
-  map (makeHeaderAnchors) $
-  bottomUp divCorrect $
-  bottomUp divReduce $
-  bottomUp divCorrectPreReduce $
-  bottomUp blocksToDefinitions $
-  blocksToBullets $
-  map (bodyPartToBlock opts docx) bps
-
-docxToBlocks :: ReaderOptions -> DocX -> [Block]
-docxToBlocks opts d@(DocX (Document _ body) _ _ _ _) = bodyToBlocks opts d body
-
-spanReduce :: [Inline] -> [Inline]
-spanReduce [] = []
-spanReduce ((Span (id1, classes1, kvs1) ils1) : ils)
-  | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils)
-spanReduce (s1@(Span (id1, classes1, kvs1) ils1) :
-            s2@(Span (id2, classes2, kvs2) ils2) :
-            ils) =
-  let classes'  = classes1 `intersect` classes2
-      kvs'      = kvs1 `intersect` kvs2
-      classes1' = classes1 \\ classes'
-      kvs1'     = kvs1 \\ kvs'
-      classes2' = classes2 \\ classes'
-      kvs2'     = kvs2 \\ kvs'
-  in
-   case null classes' && null kvs' of
-     True -> s1 : (spanReduce (s2 : ils))
-     False -> let attr'  = ("", classes', kvs')
-                  attr1' = (id1, classes1', kvs1')
-                  attr2' = (id2, classes2', kvs2')
-              in
-               spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] :
-                           ils)
-spanReduce (il:ils) = il : (spanReduce ils)
-
-ilToCode :: Inline -> String
-ilToCode (Str s) = s
-ilToCode _ = ""
-
-spanRemove' :: Inline -> [Inline]
-spanRemove' s@(Span (ident, classes, _) [])
-  -- "_GoBack" is automatically inserted. We don't want to keep it.
-  | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s]
-spanRemove' (Span (_, _, kvs) ils) =
-  case lookup "underline" kvs of
-    Just val -> [Span ("", [], [("underline", val)]) ils]
-    Nothing  -> ils
-spanRemove' il = [il]
-
-spanRemove :: [Inline] -> [Inline]
-spanRemove = concatMap spanRemove'
-
-spanTrim' :: Inline -> [Inline]
-spanTrim' il@(Span _ []) = [il]
-spanTrim' il@(Span attr (il':[]))
-  | il' == Space = [Span attr [], Space]
-  | otherwise = [il]
-spanTrim' (Span attr ils)
-  | head ils == Space && last ils == Space =
-    [Space, Span attr (init $ tail ils), Space]
-  | head ils == Space = [Space, Span attr (tail ils)]
-  | last ils == Space = [Span attr (init ils), Space]
-spanTrim' il = [il]
-
-spanTrim :: [Inline] -> [Inline]
-spanTrim = concatMap spanTrim'
-
-spanCorrect' :: Inline -> [Inline]
-spanCorrect' (Span ("", [], []) ils) = ils
-spanCorrect' (Span (ident, classes, kvs) ils)
-  | "emph" `elem` classes =
-    [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils]
-  | "strong" `elem` classes =
-      [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils]
-  | "smallcaps" `elem` classes =
-      [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils]
-  | "strike" `elem` classes =
-      [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils]
-  | "superscript" `elem` classes =
-      [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils]
-  | "subscript" `elem` classes =
-      [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils]
-  | (not . null) (codeSpans `intersect` classes) =
-         [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)]
-  | otherwise =
-      [Span (ident, classes, kvs) ils]
-spanCorrect' il = [il]
-
-spanCorrect :: [Inline] -> [Inline]
-spanCorrect = concatMap spanCorrect'
-
-removeEmptyPars :: [Block] -> [Block]
-removeEmptyPars blks = filter (\b -> b /= (Para [])) blks
-
-divReduce :: [Block] -> [Block]
-divReduce [] = []
-divReduce ((Div (id1, classes1, kvs1) blks1) : blks)
-  | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks)
-divReduce (d1@(Div (id1, classes1, kvs1) blks1) :
-           d2@(Div (id2, classes2, kvs2) blks2) :
-            blks) =
-  let classes'  = classes1 `intersect` classes2
-      kvs'      = kvs1 `intersect` kvs2
-      classes1' = classes1 \\ classes'
-      kvs1'     = kvs1 \\ kvs'
-      classes2' = classes2 \\ classes'
-      kvs2'     = kvs2 \\ kvs'
-  in
-   case null classes' && null kvs' of
-     True -> d1 : (divReduce (d2 : blks))
-     False -> let attr'  = ("", classes', kvs')
-                  attr1' = (id1, classes1', kvs1')
-                  attr2' = (id2, classes2', kvs2')
-              in
-               divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] :
-                           blks)
-divReduce (blk:blks) = blk : (divReduce blks)
-
-isHeaderClass :: String -> Maybe Int
-isHeaderClass s | "Heading" `isPrefixOf` s =
-  case reads (drop (length "Heading") s) :: [(Int, String)] of
-    [] -> Nothing
-    ((n, "") : []) -> Just n
-    _       -> Nothing
-isHeaderClass _ = Nothing
-
-findHeaderClass :: [String] -> Maybe Int
-findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of
-  [] -> Nothing
-  n : _ -> Just n
-
-blksToInlines :: [Block] -> [Inline]
-blksToInlines (Para ils : _) = ils
-blksToInlines (Plain ils : _) = ils
-blksToInlines _ = []
-
-divCorrectPreReduce' :: Block -> [Block]
-divCorrectPreReduce' (Div (ident, classes, kvs) blks)
-  | isJust $ findHeaderClass classes =
-    let n = fromJust $ findHeaderClass classes
-    in
-    [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)]
-  | otherwise = [Div (ident, classes, kvs) blks]
-divCorrectPreReduce' blk = [blk]
-
-divCorrectPreReduce :: [Block] -> [Block]
-divCorrectPreReduce = concatMap divCorrectPreReduce'
-
-blkToCode :: Block -> String
-blkToCode (Para []) = ""
-blkToCode (Para ((Code _ s):ils)) = s ++ (blkToCode (Para ils))
-blkToCode (Para ((Span (_, classes, _) ils'): ils))
-  | (not . null) (codeSpans `intersect` classes) =
-    (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils))
-blkToCode _ = ""
-
-divRemove' :: Block -> [Block]
-divRemove' (Div (_, _, kvs) blks) =
-  case lookup "indent" kvs of
-    Just val -> [Div ("", [], [("indent", val)]) blks]
-    Nothing  -> blks
-divRemove' blk = [blk]
-
-divRemove :: [Block] -> [Block]
-divRemove = concatMap divRemove'
-                                                 
-divCorrect' :: Block -> [Block]
-divCorrect' b@(Div (ident, classes, kvs) blks)
-  | (not . null) (blockQuoteDivs `intersect` classes) =
-    [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]]
-  | (not . null) (codeDivs `intersect` classes) =
-    [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)]
-  | otherwise =
-      case lookup "indent" kvs of
-        Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]
-        Just _   ->
-          [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]]
-        Nothing  -> [b]
-divCorrect' blk = [blk]
-
-divCorrect :: [Block] -> [Block]
-divCorrect = concatMap divCorrect'
diff --git a/src/Text/Pandoc/Readers/DocX/Lists.hs b/src/Text/Pandoc/Readers/DocX/Lists.hs
deleted file mode 100644
index b20679261..000000000
--- a/src/Text/Pandoc/Readers/DocX/Lists.hs
+++ /dev/null
@@ -1,208 +0,0 @@
-{-
-Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
--}
-
-{- |
-   Module      : Text.Pandoc.Readers.DocX.Lists
-   Copyright   : Copyright (C) 2014 Jesse Rosenthal
-   License     : GNU GPL, version 2 or above
-
-   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
-   Stability   : alpha
-   Portability : portable
-
-Functions for converting flat DocX paragraphs into nested lists.
--}
-
-module Text.Pandoc.Readers.DocX.Lists ( blocksToBullets
-                                      , blocksToDefinitions) where
-
-import Text.Pandoc.JSON
-import Text.Pandoc.Shared (trim)
-import Control.Monad
-import Data.List
-import Data.Maybe
-
-isListItem :: Block -> Bool
-isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True
-isListItem _ = False
-
-getLevel :: Block -> Maybe Integer
-getLevel (Div (_, _, kvs) _) =  liftM read $ lookup "level" kvs
-getLevel _ = Nothing
-
-getLevelN :: Block -> Integer
-getLevelN b = case getLevel b of
-  Just n -> n
-  Nothing -> -1
-
-getNumId :: Block -> Maybe Integer
-getNumId (Div (_, _, kvs) _) =  liftM read $ lookup "num-id" kvs
-getNumId _ = Nothing
-
-getNumIdN :: Block -> Integer
-getNumIdN b = case getNumId b of
-  Just n -> n
-  Nothing -> -1
-
-getText :: Block -> Maybe String
-getText (Div (_, _, kvs) _) = lookup "text" kvs
-getText _ = Nothing
-
-data ListType = Itemized | Enumerated ListAttributes
-
-listStyleMap :: [(String, ListNumberStyle)]
-listStyleMap = [("upperLetter", UpperAlpha),
-                ("lowerLetter", LowerAlpha),
-                ("upperRoman", UpperRoman),
-                ("lowerRoman", LowerRoman),
-                ("decimal", Decimal)]
-
-listDelimMap :: [(String, ListNumberDelim)]
-listDelimMap = [("%1)", OneParen),
-                ("(%1)", TwoParens),
-                ("%1.", Period)]
-
-getListType :: Block -> Maybe ListType
-getListType b@(Div (_, _, kvs) _) | isListItem b =
-  let
-    start = lookup "start" kvs
-    frmt = lookup "format" kvs
-    txt  = lookup "text" kvs
-  in
-   case frmt of
-     Just "bullet" -> Just Itemized
-     Just f        ->
-       case txt of
-         Just t -> Just $ Enumerated (
-                  read (fromMaybe "1" start) :: Int,
-                  fromMaybe DefaultStyle (lookup f listStyleMap),
-                  fromMaybe DefaultDelim (lookup t listDelimMap))
-         Nothing -> Nothing
-     _ -> Nothing
-getListType _ = Nothing
-
-listParagraphDivs :: [String]
-listParagraphDivs = ["ListParagraph"]
-
--- This is a first stab at going through and attaching meaning to list
--- paragraphs, without an item marker, following a list item. We
--- assume that these are paragraphs in the same item.
-
-handleListParagraphs :: [Block] -> [Block]
-handleListParagraphs [] = []
-handleListParagraphs (
-  (Div attr1@(_, classes1, _) blks1) :
-  (Div (ident2, classes2, kvs2) blks2) :
-  blks
-  ) | "list-item" `elem` classes1 &&
-    not ("list-item" `elem` classes2) &&
-    (not . null) (listParagraphDivs `intersect` classes2) =
-      -- We don't want to keep this indent.
-      let newDiv2 =
-            (Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2)
-      in
-       handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks)
-handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks)
-                  
-separateBlocks' :: Block -> [[Block]] -> [[Block]]
-separateBlocks' blk ([] : []) = [[blk]]
-separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]]
-separateBlocks' b@(OrderedList _ _) acc = (init acc) ++ [(last acc) ++ [b]]
--- The following is for the invisible bullet lists. This is how
--- pandoc-generated ooxml does multiparagraph item lists.
-separateBlocks' b acc | liftM trim (getText b) == Just "" =
-  (init acc) ++ [(last acc) ++ [b]]
-separateBlocks' b acc = acc ++ [[b]]
-
-separateBlocks :: [Block] -> [[Block]]
-separateBlocks blks = foldr separateBlocks' [[]] (reverse blks)
-
-flatToBullets' :: Integer -> [Block] -> [Block]
-flatToBullets' _ [] = []
-flatToBullets' num xs@(b : elems)
-  | getLevelN b == num = b : (flatToBullets' num elems)
-  | otherwise = 
-    let bNumId = getNumIdN b
-        bLevel = getLevelN b
-        (children, remaining) =
-          span
-          (\b' ->
-            ((getLevelN b') > bLevel ||
-             ((getLevelN b') == bLevel && (getNumIdN b') == bNumId)))
-          xs
-    in
-     case getListType b of
-       Just (Enumerated attr) ->
-         (OrderedList attr (separateBlocks $ flatToBullets' bLevel children)) :
-         (flatToBullets' num remaining)
-       _ ->
-         (BulletList (separateBlocks $ flatToBullets' bLevel children)) :
-         (flatToBullets' num remaining)
-
-flatToBullets :: [Block] -> [Block]
-flatToBullets elems = flatToBullets' (-1) elems
-
-blocksToBullets :: [Block] -> [Block]
-blocksToBullets blks =
-  -- bottomUp removeListItemDivs $ 
-  flatToBullets $ (handleListParagraphs blks)
-
-
-plainParaInlines :: Block -> [Inline]
-plainParaInlines (Plain ils) = ils
-plainParaInlines (Para ils) = ils
-plainParaInlines _ = []
-
-blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block]
-blocksToDefinitions' []     acc [] = reverse acc
-blocksToDefinitions' defAcc acc [] =
-  reverse $ (DefinitionList (reverse defAcc)) : acc
-blocksToDefinitions' defAcc acc
-  ((Div (_, classes1, _) blks1) : (Div (ident2, classes2, kvs2) blks2) : blks)
-  | "DefinitionTerm" `elem` classes1 && "Definition"  `elem` classes2 =
-    let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
-        pair = case remainingAttr2 == ("", [], []) of
-          True -> (concatMap plainParaInlines blks1, [blks2])
-          False -> (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]])
-    in
-     blocksToDefinitions' (pair : defAcc) acc blks
-blocksToDefinitions' defAcc acc
-  ((Div (ident2, classes2, kvs2) blks2) : blks)
-  | (not . null) defAcc && "Definition"  `elem` classes2 =
-    let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
-        defItems2 = case remainingAttr2 == ("", [], []) of
-          True -> blks2
-          False -> [Div remainingAttr2 blks2]
-        ((defTerm, defItems):defs) = defAcc
-        defAcc' = case null defItems of
-          True -> (defTerm, [defItems2]) : defs
-          False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs
-    in
-     blocksToDefinitions' defAcc' acc blks
-blocksToDefinitions' [] acc (b:blks) =
-  blocksToDefinitions' [] (b:acc) blks
-blocksToDefinitions' defAcc acc (b:blks) =
-  blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks
-
-
-blocksToDefinitions :: [Block] -> [Block]
-blocksToDefinitions = blocksToDefinitions' [] []
-
-    
-    
-    
diff --git a/src/Text/Pandoc/Readers/DocX/Parse.hs b/src/Text/Pandoc/Readers/DocX/Parse.hs
deleted file mode 100644
index d7033d9e8..000000000
--- a/src/Text/Pandoc/Readers/DocX/Parse.hs
+++ /dev/null
@@ -1,604 +0,0 @@
-{-
-Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
--}
-
-{- |
-   Module      : Text.Pandoc.Readers.DocX.Parse
-   Copyright   : Copyright (C) 2014 Jesse Rosenthal
-   License     : GNU GPL, version 2 or above
-
-   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
-   Stability   : alpha
-   Portability : portable
-
-Conversion of DocX archive into DocX haskell type
--}
-
-
-module Text.Pandoc.Readers.DocX.Parse (  DocX(..)
-                                       , Document(..)
-                                       , Body(..)
-                                       , BodyPart(..)
-                                       , TblLook(..)
-                                       , ParPart(..)
-                                       , Run(..)
-                                       , RunElem(..)
-                                       , Notes
-                                       , Numbering
-                                       , Relationship
-                                       , Media
-                                       , RunStyle(..)
-                                       , ParagraphStyle(..)
-                                       , Row(..)
-                                       , Cell(..)
-                                       , getFootNote
-                                       , getEndNote
-                                       , lookupLevel
-                                       , lookupRelationship
-                                       , archiveToDocX
-                                       ) where
-import Codec.Archive.Zip
-import Text.XML.Light
-import Data.Maybe
-import Data.List
-import System.FilePath
-import Data.Bits ((.|.))
-import qualified Data.ByteString.Lazy as B
-import qualified Text.Pandoc.UTF8 as UTF8
-
-attrToNSPair :: Attr -> Maybe (String, String)
-attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
-attrToNSPair _ = Nothing
-
-
-type NameSpaces = [(String, String)]
-
-data DocX = DocX Document Notes Numbering [Relationship] Media
-          deriving Show
-
-archiveToDocX :: Archive -> Maybe DocX
-archiveToDocX archive = do
-  let notes = archiveToNotes archive
-      rels = archiveToRelationships archive
-      media = archiveToMedia archive
-  doc <- archiveToDocument archive
-  numbering <- archiveToNumbering archive
-  return $ DocX doc notes numbering rels media
-
-data Document = Document NameSpaces Body 
-          deriving Show
-
-archiveToDocument :: Archive -> Maybe Document
-archiveToDocument zf = do
-  entry <- findEntryByPath "word/document.xml" zf
-  docElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
-  let namespaces = mapMaybe attrToNSPair (elAttribs docElem) 
-  bodyElem <- findChild (QName "body" (lookup "w" namespaces) Nothing) docElem
-  body <- elemToBody namespaces bodyElem
-  return $ Document namespaces body
-
-type Media = [(FilePath, B.ByteString)]
-
-filePathIsMedia :: FilePath -> Bool
-filePathIsMedia fp =
-  let (dir, _) = splitFileName fp
-  in
-   (dir == "word/media/")
-
-getMediaPair :: Archive -> FilePath -> Maybe (FilePath, B.ByteString)
-getMediaPair zf fp =
-  case findEntryByPath fp zf of
-    Just e -> Just (fp, fromEntry e)
-    Nothing -> Nothing
-
-archiveToMedia :: Archive -> Media
-archiveToMedia zf =
-  mapMaybe (getMediaPair zf) (filter filePathIsMedia (filesInArchive zf))
-
-data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
-                 deriving Show
-
-data Numb = Numb String String           -- right now, only a key to an abstract num
-            deriving Show
-
-data AbstractNumb = AbstractNumb String [Level]
-                    deriving Show
-
--- (ilvl, format, string, start)
-type Level = (String, String, String, Maybe Integer)
-
-lookupLevel :: String -> String -> Numbering -> Maybe Level
-lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
-  absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs
-  lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs
-  lvl  <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls
-  return lvl
-
-numElemToNum :: NameSpaces -> Element -> Maybe Numb
-numElemToNum ns element |
-  qName (elName element) == "num" &&
-  qURI (elName element) == (lookup "w" ns) = do
-    numId <- findAttr (QName "numId" (lookup "w" ns) (Just "w")) element
-    absNumId <- findChild (QName "abstractNumId" (lookup "w" ns) (Just "w")) element
-                >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
-    return $ Numb numId absNumId
-numElemToNum _ _ = Nothing
-
-absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
-absNumElemToAbsNum ns element |
-  qName (elName element) == "abstractNum" &&
-  qURI (elName element) == (lookup "w" ns) = do
-    absNumId <- findAttr
-                (QName "abstractNumId" (lookup "w" ns) (Just "w"))
-                element
-    let levelElems = findChildren
-                 (QName "lvl" (lookup "w" ns) (Just "w"))
-                 element
-        levels = mapMaybe id $ map (levelElemToLevel ns) levelElems
-    return $ AbstractNumb absNumId levels
-absNumElemToAbsNum _ _ = Nothing
-
-levelElemToLevel :: NameSpaces -> Element -> Maybe Level
-levelElemToLevel ns element |
-    qName (elName element) == "lvl" &&
-    qURI (elName element) == (lookup "w" ns) = do
-      ilvl <- findAttr (QName "ilvl" (lookup "w" ns) (Just "w")) element
-      fmt <- findChild (QName "numFmt" (lookup "w" ns) (Just "w")) element
-             >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
-      txt <- findChild (QName "lvlText" (lookup "w" ns) (Just "w")) element
-             >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
-      let start = findChild (QName "start" (lookup "w" ns) (Just "w")) element
-                  >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
-                  >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
-      return (ilvl, fmt, txt, start)
-levelElemToLevel _ _ = Nothing
-
-archiveToNumbering :: Archive -> Maybe Numbering
-archiveToNumbering zf =
-  case findEntryByPath "word/numbering.xml" zf of
-    Nothing -> Just $ Numbering [] [] []
-    Just entry -> do
-      numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
-      let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem)
-          numElems = findChildren
-                     (QName "num" (lookup "w" namespaces) (Just "w"))
-                     numberingElem
-          absNumElems = findChildren
-                        (QName "abstractNum" (lookup "w" namespaces) (Just "w"))
-                        numberingElem
-          nums = mapMaybe id $ map (numElemToNum namespaces) numElems
-          absNums = mapMaybe id $ map (absNumElemToAbsNum namespaces) absNumElems
-      return $ Numbering namespaces nums absNums
-
-data Notes = Notes NameSpaces (Maybe [(String, [BodyPart])]) (Maybe [(String, [BodyPart])])
-           deriving Show
-
-noteElemToNote :: NameSpaces -> Element -> Maybe (String, [BodyPart])
-noteElemToNote ns element
-  | qName (elName element) `elem` ["endnote", "footnote"] &&
-    qURI (elName element) == (lookup "w" ns) =
-      do
-        noteId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element
-        let bps = map fromJust
-                  $ filter isJust
-                  $ map (elemToBodyPart ns)
-                  $ filterChildrenName (isParOrTbl ns) element
-        return $ (noteId, bps)
-noteElemToNote _ _ = Nothing
-
-getFootNote :: String -> Notes -> Maybe [BodyPart]
-getFootNote s (Notes _ fns _) = fns >>= (lookup s)
-
-getEndNote :: String -> Notes -> Maybe [BodyPart]
-getEndNote s (Notes _ _ ens) = ens >>= (lookup s)
-
-elemToNotes :: NameSpaces -> String -> Element -> Maybe [(String, [BodyPart])]
-elemToNotes ns notetype element
-  | qName (elName element) == (notetype ++ "s") &&
-    qURI (elName element) == (lookup "w" ns) =
-      Just $ map fromJust
-      $ filter isJust
-      $ map (noteElemToNote ns)
-      $ findChildren (QName notetype (lookup "w" ns) (Just "w")) element
-elemToNotes _ _ _ = Nothing
-
-archiveToNotes :: Archive -> Notes
-archiveToNotes zf =
-  let fnElem = findEntryByPath "word/footnotes.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
-      enElem = findEntryByPath "word/endnotes.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
-      fn_namespaces = case fnElem of
-        Just e -> mapMaybe attrToNSPair (elAttribs e)
-        Nothing -> []
-      en_namespaces = case enElem of
-        Just e -> mapMaybe attrToNSPair (elAttribs e)
-        Nothing -> []
-      ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
-      fn = fnElem >>= (elemToNotes ns "footnote")
-      en = enElem >>= (elemToNotes ns "endnote")
-  in
-   Notes ns fn en
-
-
-data Relationship = Relationship (RelId, Target)
-                  deriving Show
-
-lookupRelationship :: RelId -> [Relationship] -> Maybe Target
-lookupRelationship relid rels =
-  lookup relid (map (\(Relationship pair) -> pair) rels)
-
-filePathIsRel :: FilePath -> Bool
-filePathIsRel fp =
-  let (dir, name) = splitFileName fp
-  in
-   (dir == "word/_rels/") && ((takeExtension name) == ".rels")
-
-relElemToRelationship :: Element -> Maybe Relationship
-relElemToRelationship element | qName (elName element) == "Relationship" =
-  do
-    relId <- findAttr (QName "Id" Nothing Nothing) element
-    target <- findAttr (QName "Target" Nothing Nothing) element
-    return $ Relationship (relId, target)
-relElemToRelationship _ = Nothing
-  
-
-archiveToRelationships :: Archive -> [Relationship]
-archiveToRelationships archive = 
-  let relPaths = filter filePathIsRel (filesInArchive archive)
-      entries  = map fromJust $ filter isJust $ map (\f -> findEntryByPath f archive) relPaths
-      relElems = map fromJust $ filter isJust $ map (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries
-      rels = map fromJust $ filter isJust $ map relElemToRelationship $ concatMap elChildren relElems
-  in
-   rels
-   
-data Body = Body [BodyPart]
-          deriving Show
-
-isParOrTbl :: NameSpaces -> QName -> Bool
-isParOrTbl ns q = qName q `elem` ["p", "tbl"] &&
-                  qURI q == (lookup "w" ns)
-
-elemToBody :: NameSpaces -> Element ->  Maybe Body
-elemToBody ns element | qName (elName element) == "body" && qURI (elName element) == (lookup "w" ns) =
-  Just $ Body
-  $ map fromJust
-  $ filter isJust
-  $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element
-elemToBody _ _ = Nothing
-
-isRunOrLinkOrBookmark :: NameSpaces -> QName ->  Bool
-isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] &&
-                   qURI q == (lookup "w" ns)
-
-elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String)
-elemToNumInfo ns element
-  | qName (elName element) == "p" &&
-    qURI (elName element) == (lookup "w" ns) =
-      do
-        pPr <- findChild (QName "pPr" (lookup "w" ns) (Just "w")) element
-        numPr <- findChild (QName "numPr" (lookup "w" ns) (Just "w")) pPr
-        lvl <- findChild (QName "ilvl" (lookup "w" ns) (Just "w")) numPr >>=
-                findAttr (QName "val" (lookup "w" ns) (Just "w"))
-        numId <- findChild (QName "numId" (lookup "w" ns) (Just "w")) numPr >>=
-                 findAttr (QName "val" (lookup "w" ns) (Just "w"))
-        return (numId, lvl)
-elemToNumInfo _ _ = Nothing
-
--- isBookMarkTag :: NameSpaces -> QName -> Bool
--- isBookMarkTag ns q = qName q `elem` ["bookmarkStart", "bookmarkEnd"] &&
---                      qURI q == (lookup "w" ns)
-
--- parChildrenToBookmark :: NameSpaces -> [Element] -> BookMark
--- parChildrenToBookmark ns (bms : bme : _)
---   | qName (elName bms) == "bookmarkStart" &&
---     qURI (elName bms) == (lookup "w" ns) &&
---     qName (elName bme) == "bookmarkEnd" &&
---     qURI (elName bme) == (lookup "w" ns) = do
---       bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) bms
---       bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) bms
---       return $ (bmId, bmName)
--- parChildrenToBookmark _ _ = Nothing
-
-elemToBodyPart :: NameSpaces -> Element ->  Maybe BodyPart
-elemToBodyPart ns element
-  | qName (elName element) == "p" &&
-    qURI (elName element) == (lookup "w" ns) =
-      let parstyle = elemToParagraphStyle ns element
-          parparts = mapMaybe id
-                     $ map (elemToParPart ns)
-                     $ filterChildrenName (isRunOrLinkOrBookmark ns) element
-      in
-       case elemToNumInfo ns element of
-         Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts
-         Nothing -> Just $ Paragraph parstyle parparts
-  | qName (elName element) == "tbl" &&
-    qURI (elName element) == (lookup "w" ns) =
-      let
-        caption = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element
-                  >>= findChild (QName "tblCaption" (lookup "w" ns) (Just "w"))
-                  >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
-        grid = case
-          findChild (QName "tblGrid" (lookup "w" ns) (Just "w")) element
-          of
-            Just g -> elemToTblGrid ns g
-            Nothing -> []
-        tblLook = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element
-                  >>= findChild (QName "tblLook" (lookup "w" ns) (Just "w"))
-                  >>= elemToTblLook ns
-      in
-       Just $ Tbl
-       (fromMaybe "" caption)
-       grid
-       (fromMaybe defaultTblLook tblLook)
-       (mapMaybe (elemToRow ns) (elChildren element))
-  | otherwise = Nothing
-
-elemToTblLook :: NameSpaces -> Element -> Maybe TblLook
-elemToTblLook ns element
-  | qName (elName element) == "tblLook" &&
-    qURI (elName element) == (lookup "w" ns) =
-      let firstRow = findAttr (QName "firstRow" (lookup "w" ns) (Just "w")) element
-          val = findAttr (QName "val" (lookup "w" ns) (Just "w")) element
-          firstRowFmt = 
-            case firstRow of
-              Just "1" -> True
-              Just  _  -> False
-              Nothing -> case val of
-                Just bitMask -> testBitMask bitMask 0x020
-                Nothing      -> False
-      in
-       Just $ TblLook{firstRowFormatting = firstRowFmt}
-elemToTblLook _ _ = Nothing
-
-testBitMask :: String -> Int -> Bool
-testBitMask bitMaskS n =
-  case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
-    []            -> False
-    ((n', _) : _) -> ((n' .|. n) /= 0)
-
-data ParagraphStyle = ParagraphStyle { pStyle :: [String]
-                                     , indent :: Maybe Integer
-                                     }
-                      deriving Show
-
-defaultParagraphStyle :: ParagraphStyle
-defaultParagraphStyle = ParagraphStyle { pStyle = []
-                                       , indent = Nothing
-                                       }
-
-elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle
-elemToParagraphStyle ns element =
-  case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of
-    Just pPr ->
-      ParagraphStyle
-      {pStyle =
-          mapMaybe id $
-          map
-          (findAttr (QName "val" (lookup "w" ns) (Just "w")))
-          (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr)
-      , indent =
-        findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>=
-        findAttr (QName "left" (lookup "w" ns) (Just "w")) >>=
-        stringToInteger
-        }
-    Nothing -> defaultParagraphStyle
-
-
-data BodyPart = Paragraph ParagraphStyle [ParPart]
-              | ListItem ParagraphStyle String String [ParPart]
-              | Tbl String TblGrid TblLook [Row]
-
-              deriving Show
-
-type TblGrid = [Integer]
-
-data TblLook = TblLook {firstRowFormatting::Bool}
-              deriving Show
-
-defaultTblLook :: TblLook
-defaultTblLook = TblLook{firstRowFormatting = False}
-
-stringToInteger :: String -> Maybe Integer
-stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
-
-elemToTblGrid :: NameSpaces -> Element -> TblGrid
-elemToTblGrid ns element
-  | qName (elName element) == "tblGrid" &&
-    qURI (elName element) == (lookup "w" ns) =
-      let
-        cols = findChildren (QName "gridCol" (lookup "w" ns) (Just "w")) element
-      in
-       mapMaybe (\e ->
-                  findAttr (QName "val" (lookup "w" ns) (Just ("w"))) e
-                  >>= stringToInteger
-                )
-       cols
-elemToTblGrid _ _ = []
-
-data Row = Row [Cell]
-           deriving Show
-
-
-elemToRow :: NameSpaces -> Element -> Maybe Row
-elemToRow ns element
-  | qName (elName element) == "tr" &&
-    qURI (elName element) == (lookup "w" ns) =
-      let 
-        cells = findChildren (QName "tc" (lookup "w" ns) (Just "w")) element
-      in
-       Just $ Row (mapMaybe (elemToCell ns) cells)
-elemToRow _ _ = Nothing
-
-data Cell = Cell [BodyPart]
-            deriving Show
-
-elemToCell :: NameSpaces -> Element -> Maybe Cell
-elemToCell ns element
-  | qName (elName element) == "tc" &&
-    qURI (elName element) == (lookup "w" ns) =
-      Just $ Cell (mapMaybe (elemToBodyPart ns) (elChildren element))
-elemToCell _ _ = Nothing
-
-data ParPart = PlainRun Run
-             | BookMark BookMarkId Anchor
-             | InternalHyperLink Anchor [Run]
-             | ExternalHyperLink RelId [Run]
-             | Drawing String
-             deriving Show
-
-data Run = Run RunStyle [RunElem]
-         | Footnote String 
-         | Endnote String
-           deriving Show
-
-data RunElem = TextRun String | LnBrk
-             deriving Show
-
-data RunStyle = RunStyle { isBold :: Bool
-                         , isItalic :: Bool
-                         , isSmallCaps :: Bool
-                         , isStrike :: Bool
-                         , isSuperScript :: Bool
-                         , isSubScript :: Bool
-                         , underline :: Maybe String
-                         , rStyle :: Maybe String }
-                deriving Show
-
-defaultRunStyle :: RunStyle
-defaultRunStyle = RunStyle { isBold = False
-                           , isItalic = False
-                           , isSmallCaps = False
-                           , isStrike = False
-                           , isSuperScript = False
-                           , isSubScript = False
-                           , underline = Nothing
-                           , rStyle = Nothing
-                           } 
-
-elemToRunStyle :: NameSpaces -> Element -> RunStyle
-elemToRunStyle ns element =
-  case findChild (QName "rPr" (lookup "w" ns) (Just "w")) element of
-    Just rPr ->
-      RunStyle
-      {
-        isBold = isJust $ findChild (QName "b" (lookup "w" ns) (Just "w")) rPr
-      , isItalic = isJust $ findChild (QName "i" (lookup "w" ns) (Just "w")) rPr
-      , isSmallCaps = isJust $ findChild (QName "smallCaps" (lookup "w" ns) (Just "w")) rPr
-      , isStrike = isJust $ findChild (QName "strike" (lookup "w" ns) (Just "w")) rPr
-      , isSuperScript =
-        (Just "superscript" ==
-        (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>=
-         findAttr (QName "val" (lookup "w" ns) (Just "w"))))
-      , isSubScript =
-        (Just "subscript" ==
-        (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>=
-         findAttr (QName "val" (lookup "w" ns) (Just "w"))))
-      , underline =
-        findChild (QName "u" (lookup "w" ns) (Just "w")) rPr >>=
-        findAttr (QName "val" (lookup "w" ns) (Just "w"))
-      , rStyle =
-        findChild (QName "rStyle" (lookup "w" ns) (Just "w")) rPr >>=
-        findAttr (QName "val" (lookup "w" ns) (Just "w"))
-        }
-    Nothing -> defaultRunStyle
-
-elemToRun :: NameSpaces -> Element -> Maybe Run
-elemToRun ns element
-  | qName (elName element) == "r" &&
-    qURI (elName element) == (lookup "w" ns) =
-      case
-        findChild (QName "footnoteReference" (lookup "w" ns) (Just "w")) element >>=
-        findAttr (QName "id" (lookup "w" ns) (Just "w"))
-      of
-        Just s -> Just $ Footnote s
-        Nothing ->
-          case
-            findChild (QName "endnoteReference" (lookup "w" ns) (Just "w")) element >>=
-            findAttr (QName "id" (lookup "w" ns) (Just "w"))
-          of
-            Just s -> Just $ Endnote s 
-            Nothing ->  Just $
-                        Run (elemToRunStyle ns element)
-                        (elemToRunElems ns element)
-elemToRun _ _ = Nothing
-
-elemToRunElem :: NameSpaces -> Element -> Maybe RunElem
-elemToRunElem ns element
-  | qName (elName element) == "t" &&
-    qURI (elName element) == (lookup "w" ns) =
-      Just $ TextRun (strContent element)
-  | qName (elName element) == "br" &&
-    qURI (elName element) == (lookup "w" ns) =
-      Just $ LnBrk
-  | otherwise = Nothing
-
-
-elemToRunElems :: NameSpaces -> Element -> [RunElem]
-elemToRunElems ns element
-  | qName (elName element) == "r" &&
-    qURI (elName element) == (lookup "w" ns) =
-      mapMaybe (elemToRunElem ns) (elChildren element)
-  | otherwise = []
-
-elemToDrawing :: NameSpaces -> Element -> Maybe ParPart
-elemToDrawing ns element
-  | qName (elName element) == "drawing" &&
-    qURI (elName element) == (lookup "w" ns) =
-      let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
-      in
-       findElement (QName "blip" (Just a_ns) (Just "a")) element
-       >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
-       >>= (\s -> Just $ Drawing s)
-elemToDrawing _ _ = Nothing
-
-
-elemToParPart :: NameSpaces -> Element -> Maybe ParPart
-elemToParPart ns element
-  | qName (elName element) == "r" &&
-    qURI (elName element) == (lookup "w" ns) =
-      case findChild (QName "drawing" (lookup "w" ns) (Just "w")) element of
-        Just drawingElem -> elemToDrawing ns drawingElem
-        Nothing -> do
-          r <- elemToRun ns element
-          return $ PlainRun r
-elemToParPart ns element
-  | qName (elName element) == "bookmarkStart" &&
-    qURI (elName element) == (lookup "w" ns) = do
-      bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element
-      bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) element
-      return $ BookMark bmId bmName
-elemToParPart ns element
-  | qName (elName element) == "hyperlink" &&
-    qURI (elName element) == (lookup "w" ns) =
-      let runs = map fromJust $ filter isJust $ map (elemToRun ns)
-                 $ findChildren (QName "r" (lookup "w" ns) (Just "w")) element
-      in
-       case findAttr (QName "anchor" (lookup "w" ns) (Just "w")) element of
-         Just anchor ->
-          Just $ InternalHyperLink anchor runs
-         Nothing ->
-           case findAttr (QName "id" (lookup "r" ns) (Just "r")) element of
-             Just relId -> Just $ ExternalHyperLink relId runs
-             Nothing    -> Nothing
-elemToParPart _ _ = Nothing
-
-type Target = String
-type Anchor = String
-type BookMarkId = String
-type RelId = String
-               
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
new file mode 100644
index 000000000..df4be41ff
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -0,0 +1,479 @@
+{-
+Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.Docx
+   Copyright   : Copyright (C) 2014 Jesse Rosenthal
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
+   Stability   : alpha
+   Portability : portable
+
+Conversion of Docx type (defined in Text.Pandoc.Readers.Docx.Parse)
+to 'Pandoc' document.  -}
+
+{-
+Current state of implementation of Docx entities ([x] means
+implemented, [-] means partially implemented):
+
+* Blocks
+
+  - [X] Para
+  - [X] CodeBlock (styled with `SourceCode`)
+  - [X] BlockQuote (styled with `Quote`, `BlockQuote`, or, optionally,
+        indented)
+  - [X] OrderedList
+  - [X] BulletList
+  - [X] DefinitionList (styled with adjacent `DefinitionTerm` and `Definition`)
+  - [X] Header (styled with `Heading#`)
+  - [ ] HorizontalRule
+  - [-] Table (column widths and alignments not yet implemented)
+
+* Inlines
+
+  - [X] Str
+  - [X] Emph (From italics. `underline` currently read as span. In
+        future, it might optionally be emph as well)
+  - [X] Strong
+  - [X] Strikeout
+  - [X] Superscript
+  - [X] Subscript
+  - [X] SmallCaps
+  - [ ] Quoted
+  - [ ] Cite
+  - [X] Code (styled with `VerbatimChar`)
+  - [X] Space
+  - [X] LineBreak (these are invisible in Word: entered with Shift-Return)
+  - [ ] Math
+  - [X] Link (links to an arbitrary bookmark create a span with the target as
+        id and "anchor" class)
+  - [-] Image (Links to path in archive. Future option for
+        data-encoded URI likely.)
+  - [X] Note (Footnotes and Endnotes are silently combined.)
+-}
+
+module Text.Pandoc.Readers.Docx
+       ( readDocx
+       ) where
+
+import Codec.Archive.Zip
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Builder (text, toList)
+import Text.Pandoc.Generic (bottomUp)
+import Text.Pandoc.MIME (getMimeType)
+import Text.Pandoc.UTF8 (toString)
+import Text.Pandoc.Readers.Docx.Parse
+import Text.Pandoc.Readers.Docx.Lists
+import Data.Maybe (mapMaybe, isJust, fromJust)
+import Data.List (delete, isPrefixOf, (\\), intersect)
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Lazy as B
+import Data.ByteString.Base64 (encode)
+import System.FilePath (combine)
+
+readDocx :: ReaderOptions
+         -> B.ByteString
+         -> Pandoc
+readDocx opts bytes =
+  case archiveToDocx (toArchive bytes) of
+    Just docx -> Pandoc nullMeta (docxToBlocks opts docx)
+    Nothing   -> error $ "couldn't parse docx file"
+
+runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)])
+runStyleToSpanAttr rPr = ("",
+                          mapMaybe id [
+                            if isBold rPr then (Just "strong") else Nothing,
+                            if isItalic rPr then (Just "emph") else Nothing,
+                            if isSmallCaps rPr then (Just "smallcaps") else Nothing,
+                            if isStrike rPr then (Just "strike") else Nothing,
+                            if isSuperScript rPr then (Just "superscript") else Nothing,
+                            if isSubScript rPr then (Just "subscript") else Nothing,
+                            rStyle rPr],
+                          case underline rPr of
+                            Just fmt -> [("underline", fmt)]
+                            _        -> []
+                         )
+
+parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)])
+parStyleToDivAttr pPr = ("",
+                          pStyle pPr,
+                          case indent pPr of
+                            Just n  -> [("indent", (show n))]
+                            Nothing -> []
+                         )
+
+strToInlines :: String -> [Inline]
+strToInlines = toList . text
+
+codeSpans :: [String]
+codeSpans = ["VerbatimChar"]
+
+blockQuoteDivs :: [String]
+blockQuoteDivs = ["Quote", "BlockQuote"]
+
+codeDivs :: [String]
+codeDivs = ["SourceCode"]
+
+runElemToInlines :: RunElem -> [Inline]
+runElemToInlines (TextRun s) = strToInlines s
+runElemToInlines (LnBrk) = [LineBreak]
+
+runElemToString :: RunElem -> String
+runElemToString (TextRun s) = s
+runElemToString (LnBrk) = ['\n']
+
+runElemsToString :: [RunElem] -> String
+runElemsToString = concatMap runElemToString
+
+strNormalize :: [Inline] -> [Inline]
+strNormalize [] = []
+strNormalize (Str "" : ils) = strNormalize ils
+strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l)
+strNormalize (il:ils) = il : (strNormalize ils)
+
+runToInlines :: ReaderOptions -> Docx -> Run -> [Inline]
+runToInlines _ _ (Run rs runElems)
+  | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans =
+    case runStyleToSpanAttr rs == ("", [], []) of
+      True -> [Str (runElemsToString runElems)]
+      False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]]
+  | otherwise = case runStyleToSpanAttr rs == ("", [], []) of
+      True -> concatMap runElemToInlines runElems
+      False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)]
+runToInlines opts docx@(Docx _ notes _ _ _ ) (Footnote fnId) =
+  case (getFootNote fnId notes) of
+    Just bodyParts ->
+      [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]]
+    Nothing        ->
+      [Note [Div ("", ["footnote"], []) []]]
+runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) =
+  case (getEndNote fnId notes) of
+    Just bodyParts ->
+      [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]]
+    Nothing        ->
+      [Note [Div ("", ["endnote"], []) []]]
+
+parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline]
+parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r
+parPartToInlines _ _ (BookMark _ anchor) =
+  [Span (anchor, ["anchor"], []) []]
+parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) =
+  case lookupRelationship relid rels of
+    Just target -> [Image [] (combine "word" target, "")]
+    Nothing     -> [Image [] ("", "")]
+parPartToInlines opts docx (InternalHyperLink anchor runs) =
+  [Link (concatMap (runToInlines opts docx) runs) ('#' : anchor, "")]
+parPartToInlines opts docx@(Docx _ _ _ rels _) (ExternalHyperLink relid runs) =
+  case lookupRelationship relid rels of
+    Just target ->
+      [Link (concatMap (runToInlines opts docx) runs) (target, "")]
+    Nothing ->
+      [Link (concatMap (runToInlines opts docx) runs) ("", "")]
+
+isAnchorSpan :: Inline -> Bool
+isAnchorSpan (Span (ident, classes, kvs) ils) =
+  (not . null) ident &&
+  classes == ["anchor"] &&
+  null kvs &&
+  null ils
+isAnchorSpan _ = False
+
+dummyAnchors :: [String]
+dummyAnchors = ["_GoBack"]
+
+makeHeaderAnchors :: Block -> Block
+makeHeaderAnchors h@(Header n (_, classes, kvs) ils) =
+  case filter isAnchorSpan ils of
+    []   -> h
+    (x@(Span (ident, _, _) _) : xs) ->
+      case ident `elem` dummyAnchors of
+        True -> h
+        False -> Header n (ident, classes, kvs) (ils \\ (x:xs))
+    _ -> h
+makeHeaderAnchors blk = blk
+
+
+parPartsToInlines :: ReaderOptions -> Docx -> [ParPart] -> [Inline]
+parPartsToInlines opts docx parparts =
+  --
+  -- We're going to skip data-uri's for now. It should be an option,
+  -- not mandatory.
+  --
+  --bottomUp (makeImagesSelfContained docx) $
+  bottomUp spanCorrect $
+  bottomUp spanTrim $
+  bottomUp spanReduce $
+  concatMap (parPartToInlines opts docx) parparts
+
+cellToBlocks :: ReaderOptions -> Docx -> Cell -> [Block]
+cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps
+
+rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]]
+rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells
+
+bodyPartToBlock :: ReaderOptions -> Docx -> BodyPart -> Block
+bodyPartToBlock opts docx (Paragraph pPr parparts) =
+  Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)]
+bodyPartToBlock opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) =
+  let
+    kvs = case lookupLevel numId lvl numbering of
+      Just (_, fmt, txt, Just start) -> [ ("level", lvl)
+                                        , ("num-id", numId)
+                                        , ("format", fmt)
+                                        , ("text", txt)
+                                        , ("start", (show start))
+                                        ]
+
+      Just (_, fmt, txt, Nothing)    -> [ ("level", lvl)
+                                        , ("num-id", numId)
+                                        , ("format", fmt)
+                                        , ("text", txt)
+                                        ]
+      Nothing                        -> []
+  in
+   Div
+   ("", ["list-item"], kvs)
+   [bodyPartToBlock opts docx (Paragraph pPr parparts)]
+bodyPartToBlock _ _ (Tbl _ _ _ []) =
+  Para []
+bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) =
+  let caption = strToInlines cap
+      (hdr, rows) = case firstRowFormatting look of
+        True -> (Just r, rs)
+        False -> (Nothing, r:rs)
+      hdrCells = case hdr of
+        Just r' -> rowToBlocksList opts docx r'
+        Nothing -> []
+      cells = map (rowToBlocksList opts docx) rows
+
+      size = case null hdrCells of
+        True -> length $ head cells
+        False -> length $ hdrCells
+      --
+      -- The two following variables (horizontal column alignment and
+      -- relative column widths) go to the default at the
+      -- moment. Width information is in the TblGrid field of the Tbl,
+      -- so should be possible. Alignment might be more difficult,
+      -- since there doesn't seem to be a column entity in docx.
+      alignments = take size (repeat AlignDefault)
+      widths = take size (repeat 0) :: [Double]
+  in
+   Table caption alignments widths hdrCells cells
+
+makeImagesSelfContained :: Docx -> Inline -> Inline
+makeImagesSelfContained (Docx _ _ _ _ media) i@(Image alt (uri, title)) =
+  case lookup uri media of
+    Just bs -> case getMimeType uri of
+      Just mime ->  let data_uri =
+                          "data:" ++ mime ++ ";base64," ++ toString (encode $ BS.concat $ B.toChunks bs)
+                    in
+                     Image alt (data_uri, title)
+      Nothing  -> i
+    Nothing -> i
+makeImagesSelfContained _ inline = inline
+
+bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block]
+bodyToBlocks opts docx (Body bps) =
+  bottomUp removeEmptyPars $
+  bottomUp strNormalize $
+  bottomUp spanRemove $
+  bottomUp divRemove $
+  map (makeHeaderAnchors) $
+  bottomUp divCorrect $
+  bottomUp divReduce $
+  bottomUp divCorrectPreReduce $
+  bottomUp blocksToDefinitions $
+  blocksToBullets $
+  map (bodyPartToBlock opts docx) bps
+
+docxToBlocks :: ReaderOptions -> Docx -> [Block]
+docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body
+
+spanReduce :: [Inline] -> [Inline]
+spanReduce [] = []
+spanReduce ((Span (id1, classes1, kvs1) ils1) : ils)
+  | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils)
+spanReduce (s1@(Span (id1, classes1, kvs1) ils1) :
+            s2@(Span (id2, classes2, kvs2) ils2) :
+            ils) =
+  let classes'  = classes1 `intersect` classes2
+      kvs'      = kvs1 `intersect` kvs2
+      classes1' = classes1 \\ classes'
+      kvs1'     = kvs1 \\ kvs'
+      classes2' = classes2 \\ classes'
+      kvs2'     = kvs2 \\ kvs'
+  in
+   case null classes' && null kvs' of
+     True -> s1 : (spanReduce (s2 : ils))
+     False -> let attr'  = ("", classes', kvs')
+                  attr1' = (id1, classes1', kvs1')
+                  attr2' = (id2, classes2', kvs2')
+              in
+               spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] :
+                           ils)
+spanReduce (il:ils) = il : (spanReduce ils)
+
+ilToCode :: Inline -> String
+ilToCode (Str s) = s
+ilToCode _ = ""
+
+spanRemove' :: Inline -> [Inline]
+spanRemove' s@(Span (ident, classes, _) [])
+  -- "_GoBack" is automatically inserted. We don't want to keep it.
+  | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s]
+spanRemove' (Span (_, _, kvs) ils) =
+  case lookup "underline" kvs of
+    Just val -> [Span ("", [], [("underline", val)]) ils]
+    Nothing  -> ils
+spanRemove' il = [il]
+
+spanRemove :: [Inline] -> [Inline]
+spanRemove = concatMap spanRemove'
+
+spanTrim' :: Inline -> [Inline]
+spanTrim' il@(Span _ []) = [il]
+spanTrim' il@(Span attr (il':[]))
+  | il' == Space = [Span attr [], Space]
+  | otherwise = [il]
+spanTrim' (Span attr ils)
+  | head ils == Space && last ils == Space =
+    [Space, Span attr (init $ tail ils), Space]
+  | head ils == Space = [Space, Span attr (tail ils)]
+  | last ils == Space = [Span attr (init ils), Space]
+spanTrim' il = [il]
+
+spanTrim :: [Inline] -> [Inline]
+spanTrim = concatMap spanTrim'
+
+spanCorrect' :: Inline -> [Inline]
+spanCorrect' (Span ("", [], []) ils) = ils
+spanCorrect' (Span (ident, classes, kvs) ils)
+  | "emph" `elem` classes =
+    [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils]
+  | "strong" `elem` classes =
+      [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils]
+  | "smallcaps" `elem` classes =
+      [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils]
+  | "strike" `elem` classes =
+      [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils]
+  | "superscript" `elem` classes =
+      [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils]
+  | "subscript" `elem` classes =
+      [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils]
+  | (not . null) (codeSpans `intersect` classes) =
+         [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)]
+  | otherwise =
+      [Span (ident, classes, kvs) ils]
+spanCorrect' il = [il]
+
+spanCorrect :: [Inline] -> [Inline]
+spanCorrect = concatMap spanCorrect'
+
+removeEmptyPars :: [Block] -> [Block]
+removeEmptyPars blks = filter (\b -> b /= (Para [])) blks
+
+divReduce :: [Block] -> [Block]
+divReduce [] = []
+divReduce ((Div (id1, classes1, kvs1) blks1) : blks)
+  | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks)
+divReduce (d1@(Div (id1, classes1, kvs1) blks1) :
+           d2@(Div (id2, classes2, kvs2) blks2) :
+            blks) =
+  let classes'  = classes1 `intersect` classes2
+      kvs'      = kvs1 `intersect` kvs2
+      classes1' = classes1 \\ classes'
+      kvs1'     = kvs1 \\ kvs'
+      classes2' = classes2 \\ classes'
+      kvs2'     = kvs2 \\ kvs'
+  in
+   case null classes' && null kvs' of
+     True -> d1 : (divReduce (d2 : blks))
+     False -> let attr'  = ("", classes', kvs')
+                  attr1' = (id1, classes1', kvs1')
+                  attr2' = (id2, classes2', kvs2')
+              in
+               divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] :
+                           blks)
+divReduce (blk:blks) = blk : (divReduce blks)
+
+isHeaderClass :: String -> Maybe Int
+isHeaderClass s | "Heading" `isPrefixOf` s =
+  case reads (drop (length "Heading") s) :: [(Int, String)] of
+    [] -> Nothing
+    ((n, "") : []) -> Just n
+    _       -> Nothing
+isHeaderClass _ = Nothing
+
+findHeaderClass :: [String] -> Maybe Int
+findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of
+  [] -> Nothing
+  n : _ -> Just n
+
+blksToInlines :: [Block] -> [Inline]
+blksToInlines (Para ils : _) = ils
+blksToInlines (Plain ils : _) = ils
+blksToInlines _ = []
+
+divCorrectPreReduce' :: Block -> [Block]
+divCorrectPreReduce' (Div (ident, classes, kvs) blks)
+  | isJust $ findHeaderClass classes =
+    let n = fromJust $ findHeaderClass classes
+    in
+    [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)]
+  | otherwise = [Div (ident, classes, kvs) blks]
+divCorrectPreReduce' blk = [blk]
+
+divCorrectPreReduce :: [Block] -> [Block]
+divCorrectPreReduce = concatMap divCorrectPreReduce'
+
+blkToCode :: Block -> String
+blkToCode (Para []) = ""
+blkToCode (Para ((Code _ s):ils)) = s ++ (blkToCode (Para ils))
+blkToCode (Para ((Span (_, classes, _) ils'): ils))
+  | (not . null) (codeSpans `intersect` classes) =
+    (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils))
+blkToCode _ = ""
+
+divRemove' :: Block -> [Block]
+divRemove' (Div (_, _, kvs) blks) =
+  case lookup "indent" kvs of
+    Just val -> [Div ("", [], [("indent", val)]) blks]
+    Nothing  -> blks
+divRemove' blk = [blk]
+
+divRemove :: [Block] -> [Block]
+divRemove = concatMap divRemove'
+
+divCorrect' :: Block -> [Block]
+divCorrect' b@(Div (ident, classes, kvs) blks)
+  | (not . null) (blockQuoteDivs `intersect` classes) =
+    [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]]
+  | (not . null) (codeDivs `intersect` classes) =
+    [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)]
+  | otherwise =
+      case lookup "indent" kvs of
+        Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]
+        Just _   ->
+          [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]]
+        Nothing  -> [b]
+divCorrect' blk = [blk]
+
+divCorrect :: [Block] -> [Block]
+divCorrect = concatMap divCorrect'
diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
new file mode 100644
index 000000000..68559d98b
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Lists.hs
@@ -0,0 +1,208 @@
+{-
+Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.Docx.Lists
+   Copyright   : Copyright (C) 2014 Jesse Rosenthal
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
+   Stability   : alpha
+   Portability : portable
+
+Functions for converting flat docx paragraphs into nested lists.
+-}
+
+module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets
+                                      , blocksToDefinitions) where
+
+import Text.Pandoc.JSON
+import Text.Pandoc.Shared (trim)
+import Control.Monad
+import Data.List
+import Data.Maybe
+
+isListItem :: Block -> Bool
+isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True
+isListItem _ = False
+
+getLevel :: Block -> Maybe Integer
+getLevel (Div (_, _, kvs) _) =  liftM read $ lookup "level" kvs
+getLevel _ = Nothing
+
+getLevelN :: Block -> Integer
+getLevelN b = case getLevel b of
+  Just n -> n
+  Nothing -> -1
+
+getNumId :: Block -> Maybe Integer
+getNumId (Div (_, _, kvs) _) =  liftM read $ lookup "num-id" kvs
+getNumId _ = Nothing
+
+getNumIdN :: Block -> Integer
+getNumIdN b = case getNumId b of
+  Just n -> n
+  Nothing -> -1
+
+getText :: Block -> Maybe String
+getText (Div (_, _, kvs) _) = lookup "text" kvs
+getText _ = Nothing
+
+data ListType = Itemized | Enumerated ListAttributes
+
+listStyleMap :: [(String, ListNumberStyle)]
+listStyleMap = [("upperLetter", UpperAlpha),
+                ("lowerLetter", LowerAlpha),
+                ("upperRoman", UpperRoman),
+                ("lowerRoman", LowerRoman),
+                ("decimal", Decimal)]
+
+listDelimMap :: [(String, ListNumberDelim)]
+listDelimMap = [("%1)", OneParen),
+                ("(%1)", TwoParens),
+                ("%1.", Period)]
+
+getListType :: Block -> Maybe ListType
+getListType b@(Div (_, _, kvs) _) | isListItem b =
+  let
+    start = lookup "start" kvs
+    frmt = lookup "format" kvs
+    txt  = lookup "text" kvs
+  in
+   case frmt of
+     Just "bullet" -> Just Itemized
+     Just f        ->
+       case txt of
+         Just t -> Just $ Enumerated (
+                  read (fromMaybe "1" start) :: Int,
+                  fromMaybe DefaultStyle (lookup f listStyleMap),
+                  fromMaybe DefaultDelim (lookup t listDelimMap))
+         Nothing -> Nothing
+     _ -> Nothing
+getListType _ = Nothing
+
+listParagraphDivs :: [String]
+listParagraphDivs = ["ListParagraph"]
+
+-- This is a first stab at going through and attaching meaning to list
+-- paragraphs, without an item marker, following a list item. We
+-- assume that these are paragraphs in the same item.
+
+handleListParagraphs :: [Block] -> [Block]
+handleListParagraphs [] = []
+handleListParagraphs (
+  (Div attr1@(_, classes1, _) blks1) :
+  (Div (ident2, classes2, kvs2) blks2) :
+  blks
+  ) | "list-item" `elem` classes1 &&
+    not ("list-item" `elem` classes2) &&
+    (not . null) (listParagraphDivs `intersect` classes2) =
+      -- We don't want to keep this indent.
+      let newDiv2 =
+            (Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2)
+      in
+       handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks)
+handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks)
+                  
+separateBlocks' :: Block -> [[Block]] -> [[Block]]
+separateBlocks' blk ([] : []) = [[blk]]
+separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]]
+separateBlocks' b@(OrderedList _ _) acc = (init acc) ++ [(last acc) ++ [b]]
+-- The following is for the invisible bullet lists. This is how
+-- pandoc-generated ooxml does multiparagraph item lists.
+separateBlocks' b acc | liftM trim (getText b) == Just "" =
+  (init acc) ++ [(last acc) ++ [b]]
+separateBlocks' b acc = acc ++ [[b]]
+
+separateBlocks :: [Block] -> [[Block]]
+separateBlocks blks = foldr separateBlocks' [[]] (reverse blks)
+
+flatToBullets' :: Integer -> [Block] -> [Block]
+flatToBullets' _ [] = []
+flatToBullets' num xs@(b : elems)
+  | getLevelN b == num = b : (flatToBullets' num elems)
+  | otherwise = 
+    let bNumId = getNumIdN b
+        bLevel = getLevelN b
+        (children, remaining) =
+          span
+          (\b' ->
+            ((getLevelN b') > bLevel ||
+             ((getLevelN b') == bLevel && (getNumIdN b') == bNumId)))
+          xs
+    in
+     case getListType b of
+       Just (Enumerated attr) ->
+         (OrderedList attr (separateBlocks $ flatToBullets' bLevel children)) :
+         (flatToBullets' num remaining)
+       _ ->
+         (BulletList (separateBlocks $ flatToBullets' bLevel children)) :
+         (flatToBullets' num remaining)
+
+flatToBullets :: [Block] -> [Block]
+flatToBullets elems = flatToBullets' (-1) elems
+
+blocksToBullets :: [Block] -> [Block]
+blocksToBullets blks =
+  -- bottomUp removeListItemDivs $ 
+  flatToBullets $ (handleListParagraphs blks)
+
+
+plainParaInlines :: Block -> [Inline]
+plainParaInlines (Plain ils) = ils
+plainParaInlines (Para ils) = ils
+plainParaInlines _ = []
+
+blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block]
+blocksToDefinitions' []     acc [] = reverse acc
+blocksToDefinitions' defAcc acc [] =
+  reverse $ (DefinitionList (reverse defAcc)) : acc
+blocksToDefinitions' defAcc acc
+  ((Div (_, classes1, _) blks1) : (Div (ident2, classes2, kvs2) blks2) : blks)
+  | "DefinitionTerm" `elem` classes1 && "Definition"  `elem` classes2 =
+    let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
+        pair = case remainingAttr2 == ("", [], []) of
+          True -> (concatMap plainParaInlines blks1, [blks2])
+          False -> (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]])
+    in
+     blocksToDefinitions' (pair : defAcc) acc blks
+blocksToDefinitions' defAcc acc
+  ((Div (ident2, classes2, kvs2) blks2) : blks)
+  | (not . null) defAcc && "Definition"  `elem` classes2 =
+    let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
+        defItems2 = case remainingAttr2 == ("", [], []) of
+          True -> blks2
+          False -> [Div remainingAttr2 blks2]
+        ((defTerm, defItems):defs) = defAcc
+        defAcc' = case null defItems of
+          True -> (defTerm, [defItems2]) : defs
+          False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs
+    in
+     blocksToDefinitions' defAcc' acc blks
+blocksToDefinitions' [] acc (b:blks) =
+  blocksToDefinitions' [] (b:acc) blks
+blocksToDefinitions' defAcc acc (b:blks) =
+  blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks
+
+
+blocksToDefinitions :: [Block] -> [Block]
+blocksToDefinitions = blocksToDefinitions' [] []
+
+    
+    
+    
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
new file mode 100644
index 000000000..22e9dd909
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -0,0 +1,604 @@
+{-
+Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.Docx.Parse
+   Copyright   : Copyright (C) 2014 Jesse Rosenthal
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
+   Stability   : alpha
+   Portability : portable
+
+Conversion of docx archive into Docx haskell type
+-}
+
+
+module Text.Pandoc.Readers.Docx.Parse (  Docx(..)
+                                       , Document(..)
+                                       , Body(..)
+                                       , BodyPart(..)
+                                       , TblLook(..)
+                                       , ParPart(..)
+                                       , Run(..)
+                                       , RunElem(..)
+                                       , Notes
+                                       , Numbering
+                                       , Relationship
+                                       , Media
+                                       , RunStyle(..)
+                                       , ParagraphStyle(..)
+                                       , Row(..)
+                                       , Cell(..)
+                                       , getFootNote
+                                       , getEndNote
+                                       , lookupLevel
+                                       , lookupRelationship
+                                       , archiveToDocx
+                                       ) where
+import Codec.Archive.Zip
+import Text.XML.Light
+import Data.Maybe
+import Data.List
+import System.FilePath
+import Data.Bits ((.|.))
+import qualified Data.ByteString.Lazy as B
+import qualified Text.Pandoc.UTF8 as UTF8
+
+attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
+attrToNSPair _ = Nothing
+
+
+type NameSpaces = [(String, String)]
+
+data Docx = Docx Document Notes Numbering [Relationship] Media
+          deriving Show
+
+archiveToDocx :: Archive -> Maybe Docx
+archiveToDocx archive = do
+  let notes = archiveToNotes archive
+      rels = archiveToRelationships archive
+      media = archiveToMedia archive
+  doc <- archiveToDocument archive
+  numbering <- archiveToNumbering archive
+  return $ Docx doc notes numbering rels media
+
+data Document = Document NameSpaces Body 
+          deriving Show
+
+archiveToDocument :: Archive -> Maybe Document
+archiveToDocument zf = do
+  entry <- findEntryByPath "word/document.xml" zf
+  docElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+  let namespaces = mapMaybe attrToNSPair (elAttribs docElem) 
+  bodyElem <- findChild (QName "body" (lookup "w" namespaces) Nothing) docElem
+  body <- elemToBody namespaces bodyElem
+  return $ Document namespaces body
+
+type Media = [(FilePath, B.ByteString)]
+
+filePathIsMedia :: FilePath -> Bool
+filePathIsMedia fp =
+  let (dir, _) = splitFileName fp
+  in
+   (dir == "word/media/")
+
+getMediaPair :: Archive -> FilePath -> Maybe (FilePath, B.ByteString)
+getMediaPair zf fp =
+  case findEntryByPath fp zf of
+    Just e -> Just (fp, fromEntry e)
+    Nothing -> Nothing
+
+archiveToMedia :: Archive -> Media
+archiveToMedia zf =
+  mapMaybe (getMediaPair zf) (filter filePathIsMedia (filesInArchive zf))
+
+data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
+                 deriving Show
+
+data Numb = Numb String String           -- right now, only a key to an abstract num
+            deriving Show
+
+data AbstractNumb = AbstractNumb String [Level]
+                    deriving Show
+
+-- (ilvl, format, string, start)
+type Level = (String, String, String, Maybe Integer)
+
+lookupLevel :: String -> String -> Numbering -> Maybe Level
+lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
+  absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs
+  lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs
+  lvl  <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls
+  return lvl
+
+numElemToNum :: NameSpaces -> Element -> Maybe Numb
+numElemToNum ns element |
+  qName (elName element) == "num" &&
+  qURI (elName element) == (lookup "w" ns) = do
+    numId <- findAttr (QName "numId" (lookup "w" ns) (Just "w")) element
+    absNumId <- findChild (QName "abstractNumId" (lookup "w" ns) (Just "w")) element
+                >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
+    return $ Numb numId absNumId
+numElemToNum _ _ = Nothing
+
+absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
+absNumElemToAbsNum ns element |
+  qName (elName element) == "abstractNum" &&
+  qURI (elName element) == (lookup "w" ns) = do
+    absNumId <- findAttr
+                (QName "abstractNumId" (lookup "w" ns) (Just "w"))
+                element
+    let levelElems = findChildren
+                 (QName "lvl" (lookup "w" ns) (Just "w"))
+                 element
+        levels = mapMaybe id $ map (levelElemToLevel ns) levelElems
+    return $ AbstractNumb absNumId levels
+absNumElemToAbsNum _ _ = Nothing
+
+levelElemToLevel :: NameSpaces -> Element -> Maybe Level
+levelElemToLevel ns element |
+    qName (elName element) == "lvl" &&
+    qURI (elName element) == (lookup "w" ns) = do
+      ilvl <- findAttr (QName "ilvl" (lookup "w" ns) (Just "w")) element
+      fmt <- findChild (QName "numFmt" (lookup "w" ns) (Just "w")) element
+             >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
+      txt <- findChild (QName "lvlText" (lookup "w" ns) (Just "w")) element
+             >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
+      let start = findChild (QName "start" (lookup "w" ns) (Just "w")) element
+                  >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
+                  >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
+      return (ilvl, fmt, txt, start)
+levelElemToLevel _ _ = Nothing
+
+archiveToNumbering :: Archive -> Maybe Numbering
+archiveToNumbering zf =
+  case findEntryByPath "word/numbering.xml" zf of
+    Nothing -> Just $ Numbering [] [] []
+    Just entry -> do
+      numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+      let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem)
+          numElems = findChildren
+                     (QName "num" (lookup "w" namespaces) (Just "w"))
+                     numberingElem
+          absNumElems = findChildren
+                        (QName "abstractNum" (lookup "w" namespaces) (Just "w"))
+                        numberingElem
+          nums = mapMaybe id $ map (numElemToNum namespaces) numElems
+          absNums = mapMaybe id $ map (absNumElemToAbsNum namespaces) absNumElems
+      return $ Numbering namespaces nums absNums
+
+data Notes = Notes NameSpaces (Maybe [(String, [BodyPart])]) (Maybe [(String, [BodyPart])])
+           deriving Show
+
+noteElemToNote :: NameSpaces -> Element -> Maybe (String, [BodyPart])
+noteElemToNote ns element
+  | qName (elName element) `elem` ["endnote", "footnote"] &&
+    qURI (elName element) == (lookup "w" ns) =
+      do
+        noteId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element
+        let bps = map fromJust
+                  $ filter isJust
+                  $ map (elemToBodyPart ns)
+                  $ filterChildrenName (isParOrTbl ns) element
+        return $ (noteId, bps)
+noteElemToNote _ _ = Nothing
+
+getFootNote :: String -> Notes -> Maybe [BodyPart]
+getFootNote s (Notes _ fns _) = fns >>= (lookup s)
+
+getEndNote :: String -> Notes -> Maybe [BodyPart]
+getEndNote s (Notes _ _ ens) = ens >>= (lookup s)
+
+elemToNotes :: NameSpaces -> String -> Element -> Maybe [(String, [BodyPart])]
+elemToNotes ns notetype element
+  | qName (elName element) == (notetype ++ "s") &&
+    qURI (elName element) == (lookup "w" ns) =
+      Just $ map fromJust
+      $ filter isJust
+      $ map (noteElemToNote ns)
+      $ findChildren (QName notetype (lookup "w" ns) (Just "w")) element
+elemToNotes _ _ _ = Nothing
+
+archiveToNotes :: Archive -> Notes
+archiveToNotes zf =
+  let fnElem = findEntryByPath "word/footnotes.xml" zf
+               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+      enElem = findEntryByPath "word/endnotes.xml" zf
+               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+      fn_namespaces = case fnElem of
+        Just e -> mapMaybe attrToNSPair (elAttribs e)
+        Nothing -> []
+      en_namespaces = case enElem of
+        Just e -> mapMaybe attrToNSPair (elAttribs e)
+        Nothing -> []
+      ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
+      fn = fnElem >>= (elemToNotes ns "footnote")
+      en = enElem >>= (elemToNotes ns "endnote")
+  in
+   Notes ns fn en
+
+
+data Relationship = Relationship (RelId, Target)
+                  deriving Show
+
+lookupRelationship :: RelId -> [Relationship] -> Maybe Target
+lookupRelationship relid rels =
+  lookup relid (map (\(Relationship pair) -> pair) rels)
+
+filePathIsRel :: FilePath -> Bool
+filePathIsRel fp =
+  let (dir, name) = splitFileName fp
+  in
+   (dir == "word/_rels/") && ((takeExtension name) == ".rels")
+
+relElemToRelationship :: Element -> Maybe Relationship
+relElemToRelationship element | qName (elName element) == "Relationship" =
+  do
+    relId <- findAttr (QName "Id" Nothing Nothing) element
+    target <- findAttr (QName "Target" Nothing Nothing) element
+    return $ Relationship (relId, target)
+relElemToRelationship _ = Nothing
+  
+
+archiveToRelationships :: Archive -> [Relationship]
+archiveToRelationships archive = 
+  let relPaths = filter filePathIsRel (filesInArchive archive)
+      entries  = map fromJust $ filter isJust $ map (\f -> findEntryByPath f archive) relPaths
+      relElems = map fromJust $ filter isJust $ map (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries
+      rels = map fromJust $ filter isJust $ map relElemToRelationship $ concatMap elChildren relElems
+  in
+   rels
+   
+data Body = Body [BodyPart]
+          deriving Show
+
+isParOrTbl :: NameSpaces -> QName -> Bool
+isParOrTbl ns q = qName q `elem` ["p", "tbl"] &&
+                  qURI q == (lookup "w" ns)
+
+elemToBody :: NameSpaces -> Element ->  Maybe Body
+elemToBody ns element | qName (elName element) == "body" && qURI (elName element) == (lookup "w" ns) =
+  Just $ Body
+  $ map fromJust
+  $ filter isJust
+  $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element
+elemToBody _ _ = Nothing
+
+isRunOrLinkOrBookmark :: NameSpaces -> QName ->  Bool
+isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] &&
+                   qURI q == (lookup "w" ns)
+
+elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String)
+elemToNumInfo ns element
+  | qName (elName element) == "p" &&
+    qURI (elName element) == (lookup "w" ns) =
+      do
+        pPr <- findChild (QName "pPr" (lookup "w" ns) (Just "w")) element
+        numPr <- findChild (QName "numPr" (lookup "w" ns) (Just "w")) pPr
+        lvl <- findChild (QName "ilvl" (lookup "w" ns) (Just "w")) numPr >>=
+                findAttr (QName "val" (lookup "w" ns) (Just "w"))
+        numId <- findChild (QName "numId" (lookup "w" ns) (Just "w")) numPr >>=
+                 findAttr (QName "val" (lookup "w" ns) (Just "w"))
+        return (numId, lvl)
+elemToNumInfo _ _ = Nothing
+
+-- isBookMarkTag :: NameSpaces -> QName -> Bool
+-- isBookMarkTag ns q = qName q `elem` ["bookmarkStart", "bookmarkEnd"] &&
+--                      qURI q == (lookup "w" ns)
+
+-- parChildrenToBookmark :: NameSpaces -> [Element] -> BookMark
+-- parChildrenToBookmark ns (bms : bme : _)
+--   | qName (elName bms) == "bookmarkStart" &&
+--     qURI (elName bms) == (lookup "w" ns) &&
+--     qName (elName bme) == "bookmarkEnd" &&
+--     qURI (elName bme) == (lookup "w" ns) = do
+--       bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) bms
+--       bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) bms
+--       return $ (bmId, bmName)
+-- parChildrenToBookmark _ _ = Nothing
+
+elemToBodyPart :: NameSpaces -> Element ->  Maybe BodyPart
+elemToBodyPart ns element
+  | qName (elName element) == "p" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let parstyle = elemToParagraphStyle ns element
+          parparts = mapMaybe id
+                     $ map (elemToParPart ns)
+                     $ filterChildrenName (isRunOrLinkOrBookmark ns) element
+      in
+       case elemToNumInfo ns element of
+         Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts
+         Nothing -> Just $ Paragraph parstyle parparts
+  | qName (elName element) == "tbl" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let
+        caption = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element
+                  >>= findChild (QName "tblCaption" (lookup "w" ns) (Just "w"))
+                  >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
+        grid = case
+          findChild (QName "tblGrid" (lookup "w" ns) (Just "w")) element
+          of
+            Just g -> elemToTblGrid ns g
+            Nothing -> []
+        tblLook = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element
+                  >>= findChild (QName "tblLook" (lookup "w" ns) (Just "w"))
+                  >>= elemToTblLook ns
+      in
+       Just $ Tbl
+       (fromMaybe "" caption)
+       grid
+       (fromMaybe defaultTblLook tblLook)
+       (mapMaybe (elemToRow ns) (elChildren element))
+  | otherwise = Nothing
+
+elemToTblLook :: NameSpaces -> Element -> Maybe TblLook
+elemToTblLook ns element
+  | qName (elName element) == "tblLook" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let firstRow = findAttr (QName "firstRow" (lookup "w" ns) (Just "w")) element
+          val = findAttr (QName "val" (lookup "w" ns) (Just "w")) element
+          firstRowFmt = 
+            case firstRow of
+              Just "1" -> True
+              Just  _  -> False
+              Nothing -> case val of
+                Just bitMask -> testBitMask bitMask 0x020
+                Nothing      -> False
+      in
+       Just $ TblLook{firstRowFormatting = firstRowFmt}
+elemToTblLook _ _ = Nothing
+
+testBitMask :: String -> Int -> Bool
+testBitMask bitMaskS n =
+  case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
+    []            -> False
+    ((n', _) : _) -> ((n' .|. n) /= 0)
+
+data ParagraphStyle = ParagraphStyle { pStyle :: [String]
+                                     , indent :: Maybe Integer
+                                     }
+                      deriving Show
+
+defaultParagraphStyle :: ParagraphStyle
+defaultParagraphStyle = ParagraphStyle { pStyle = []
+                                       , indent = Nothing
+                                       }
+
+elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle
+elemToParagraphStyle ns element =
+  case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of
+    Just pPr ->
+      ParagraphStyle
+      {pStyle =
+          mapMaybe id $
+          map
+          (findAttr (QName "val" (lookup "w" ns) (Just "w")))
+          (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr)
+      , indent =
+        findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>=
+        findAttr (QName "left" (lookup "w" ns) (Just "w")) >>=
+        stringToInteger
+        }
+    Nothing -> defaultParagraphStyle
+
+
+data BodyPart = Paragraph ParagraphStyle [ParPart]
+              | ListItem ParagraphStyle String String [ParPart]
+              | Tbl String TblGrid TblLook [Row]
+
+              deriving Show
+
+type TblGrid = [Integer]
+
+data TblLook = TblLook {firstRowFormatting::Bool}
+              deriving Show
+
+defaultTblLook :: TblLook
+defaultTblLook = TblLook{firstRowFormatting = False}
+
+stringToInteger :: String -> Maybe Integer
+stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
+
+elemToTblGrid :: NameSpaces -> Element -> TblGrid
+elemToTblGrid ns element
+  | qName (elName element) == "tblGrid" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let
+        cols = findChildren (QName "gridCol" (lookup "w" ns) (Just "w")) element
+      in
+       mapMaybe (\e ->
+                  findAttr (QName "val" (lookup "w" ns) (Just ("w"))) e
+                  >>= stringToInteger
+                )
+       cols
+elemToTblGrid _ _ = []
+
+data Row = Row [Cell]
+           deriving Show
+
+
+elemToRow :: NameSpaces -> Element -> Maybe Row
+elemToRow ns element
+  | qName (elName element) == "tr" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let 
+        cells = findChildren (QName "tc" (lookup "w" ns) (Just "w")) element
+      in
+       Just $ Row (mapMaybe (elemToCell ns) cells)
+elemToRow _ _ = Nothing
+
+data Cell = Cell [BodyPart]
+            deriving Show
+
+elemToCell :: NameSpaces -> Element -> Maybe Cell
+elemToCell ns element
+  | qName (elName element) == "tc" &&
+    qURI (elName element) == (lookup "w" ns) =
+      Just $ Cell (mapMaybe (elemToBodyPart ns) (elChildren element))
+elemToCell _ _ = Nothing
+
+data ParPart = PlainRun Run
+             | BookMark BookMarkId Anchor
+             | InternalHyperLink Anchor [Run]
+             | ExternalHyperLink RelId [Run]
+             | Drawing String
+             deriving Show
+
+data Run = Run RunStyle [RunElem]
+         | Footnote String 
+         | Endnote String
+           deriving Show
+
+data RunElem = TextRun String | LnBrk
+             deriving Show
+
+data RunStyle = RunStyle { isBold :: Bool
+                         , isItalic :: Bool
+                         , isSmallCaps :: Bool
+                         , isStrike :: Bool
+                         , isSuperScript :: Bool
+                         , isSubScript :: Bool
+                         , underline :: Maybe String
+                         , rStyle :: Maybe String }
+                deriving Show
+
+defaultRunStyle :: RunStyle
+defaultRunStyle = RunStyle { isBold = False
+                           , isItalic = False
+                           , isSmallCaps = False
+                           , isStrike = False
+                           , isSuperScript = False
+                           , isSubScript = False
+                           , underline = Nothing
+                           , rStyle = Nothing
+                           } 
+
+elemToRunStyle :: NameSpaces -> Element -> RunStyle
+elemToRunStyle ns element =
+  case findChild (QName "rPr" (lookup "w" ns) (Just "w")) element of
+    Just rPr ->
+      RunStyle
+      {
+        isBold = isJust $ findChild (QName "b" (lookup "w" ns) (Just "w")) rPr
+      , isItalic = isJust $ findChild (QName "i" (lookup "w" ns) (Just "w")) rPr
+      , isSmallCaps = isJust $ findChild (QName "smallCaps" (lookup "w" ns) (Just "w")) rPr
+      , isStrike = isJust $ findChild (QName "strike" (lookup "w" ns) (Just "w")) rPr
+      , isSuperScript =
+        (Just "superscript" ==
+        (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>=
+         findAttr (QName "val" (lookup "w" ns) (Just "w"))))
+      , isSubScript =
+        (Just "subscript" ==
+        (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>=
+         findAttr (QName "val" (lookup "w" ns) (Just "w"))))
+      , underline =
+        findChild (QName "u" (lookup "w" ns) (Just "w")) rPr >>=
+        findAttr (QName "val" (lookup "w" ns) (Just "w"))
+      , rStyle =
+        findChild (QName "rStyle" (lookup "w" ns) (Just "w")) rPr >>=
+        findAttr (QName "val" (lookup "w" ns) (Just "w"))
+        }
+    Nothing -> defaultRunStyle
+
+elemToRun :: NameSpaces -> Element -> Maybe Run
+elemToRun ns element
+  | qName (elName element) == "r" &&
+    qURI (elName element) == (lookup "w" ns) =
+      case
+        findChild (QName "footnoteReference" (lookup "w" ns) (Just "w")) element >>=
+        findAttr (QName "id" (lookup "w" ns) (Just "w"))
+      of
+        Just s -> Just $ Footnote s
+        Nothing ->
+          case
+            findChild (QName "endnoteReference" (lookup "w" ns) (Just "w")) element >>=
+            findAttr (QName "id" (lookup "w" ns) (Just "w"))
+          of
+            Just s -> Just $ Endnote s 
+            Nothing ->  Just $
+                        Run (elemToRunStyle ns element)
+                        (elemToRunElems ns element)
+elemToRun _ _ = Nothing
+
+elemToRunElem :: NameSpaces -> Element -> Maybe RunElem
+elemToRunElem ns element
+  | qName (elName element) == "t" &&
+    qURI (elName element) == (lookup "w" ns) =
+      Just $ TextRun (strContent element)
+  | qName (elName element) == "br" &&
+    qURI (elName element) == (lookup "w" ns) =
+      Just $ LnBrk
+  | otherwise = Nothing
+
+
+elemToRunElems :: NameSpaces -> Element -> [RunElem]
+elemToRunElems ns element
+  | qName (elName element) == "r" &&
+    qURI (elName element) == (lookup "w" ns) =
+      mapMaybe (elemToRunElem ns) (elChildren element)
+  | otherwise = []
+
+elemToDrawing :: NameSpaces -> Element -> Maybe ParPart
+elemToDrawing ns element
+  | qName (elName element) == "drawing" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
+      in
+       findElement (QName "blip" (Just a_ns) (Just "a")) element
+       >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
+       >>= (\s -> Just $ Drawing s)
+elemToDrawing _ _ = Nothing
+
+
+elemToParPart :: NameSpaces -> Element -> Maybe ParPart
+elemToParPart ns element
+  | qName (elName element) == "r" &&
+    qURI (elName element) == (lookup "w" ns) =
+      case findChild (QName "drawing" (lookup "w" ns) (Just "w")) element of
+        Just drawingElem -> elemToDrawing ns drawingElem
+        Nothing -> do
+          r <- elemToRun ns element
+          return $ PlainRun r
+elemToParPart ns element
+  | qName (elName element) == "bookmarkStart" &&
+    qURI (elName element) == (lookup "w" ns) = do
+      bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element
+      bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) element
+      return $ BookMark bmId bmName
+elemToParPart ns element
+  | qName (elName element) == "hyperlink" &&
+    qURI (elName element) == (lookup "w" ns) =
+      let runs = map fromJust $ filter isJust $ map (elemToRun ns)
+                 $ findChildren (QName "r" (lookup "w" ns) (Just "w")) element
+      in
+       case findAttr (QName "anchor" (lookup "w" ns) (Just "w")) element of
+         Just anchor ->
+          Just $ InternalHyperLink anchor runs
+         Nothing ->
+           case findAttr (QName "id" (lookup "r" ns) (Just "r")) element of
+             Just relId -> Just $ ExternalHyperLink relId runs
+             Nothing    -> Nothing
+elemToParPart _ _ = Nothing
+
+type Target = String
+type Anchor = String
+type BookMarkId = String
+type RelId = String
+               
-- 
cgit v1.2.3


From 7d60c798bf12a93ca4d7f4d973c917ba0d5a96ff Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 16 Jun 2014 23:02:20 -0700
Subject: Fixed compiler warning.

---
 src/Text/Pandoc/Readers/Docx.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index df4be41ff..4035cde99 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -216,7 +216,9 @@ parPartsToInlines opts docx parparts =
   -- We're going to skip data-uri's for now. It should be an option,
   -- not mandatory.
   --
-  --bottomUp (makeImagesSelfContained docx) $
+  (if False -- TODO depend on option
+      then bottomUp (makeImagesSelfContained docx)
+      else id) $
   bottomUp spanCorrect $
   bottomUp spanTrim $
   bottomUp spanReduce $
-- 
cgit v1.2.3


From fc291efad3430be0645e979e0279c93195012075 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 17 Jun 2014 00:38:55 -0700
Subject: LaTeX reader: Correctly handle table rows with too few cells.

LaTeX seems to treat them as if they have empty cells at the
end.  Closes #241.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 3c4d4ee52..97bfaa455 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1255,10 +1255,14 @@ parseTableRow :: Int  -- ^ number of columns
 parseTableRow cols = try $ do
   let tableCellInline = notFollowedBy (amp <|> lbreak) >> inline
   let tableCell = (plain . trimInlines . mconcat) <$> many tableCellInline
-  cells' <- sepBy tableCell amp
-  guard $ length cells' == cols
+  cells' <- sepBy1 tableCell amp
+  let numcells = length cells'
+  guard $ numcells <= cols && numcells >= 1
+  guard $ cells' /= [mempty]
+  -- note:  a & b in a three-column table leaves an empty 3rd cell:
+  let cells'' = cells' ++ replicate (cols - numcells) mempty
   spaces
-  return cells'
+  return cells''
 
 simpTable :: LP Blocks
 simpTable = try $ do
-- 
cgit v1.2.3


From 59272e4d99668ddc48f07eb761979c2f49cf76d5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 17 Jun 2014 12:14:02 -0700
Subject: DocBook reader: Support <?asciidoc-br?>.

Closes #1236.

Note, this is a bit of a kludge, to work around the fact that xml-light
doesn't parse `<?asciidoc-br?>` correctly.  We preprocess the input,
replacing that instruction with `<br/>`, and then parse that as a line
break.  Other XML instructions are simply removed from the input stream.
---
 src/Text/Pandoc/Readers/DocBook.hs | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index d58f8b3c5..cf1d5132e 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -492,7 +492,7 @@ List of all DocBook tags, with [x] indicating implemented,
     anything else
 [ ] xref - A cross reference to another part of the document
 [ ] year - The year of publication of a document
-
+[x] ?asciidoc-br? - line break from asciidoc docbook output
 -}
 
 type DB = State DBState
@@ -507,7 +507,7 @@ data DBState = DBState{ dbSectionLevel :: Int
 
 readDocBook :: ReaderOptions -> String -> Pandoc
 readDocBook _ inp  = Pandoc (dbMeta st') (toList $ mconcat bs)
-  where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp)
+  where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp')
                              DBState{ dbSectionLevel = 0
                                     , dbQuoteType = DoubleQuote
                                     , dbMeta = mempty
@@ -515,6 +515,17 @@ readDocBook _ inp  = Pandoc (dbMeta st') (toList $ mconcat bs)
                                     , dbBook = False
                                     , dbFigureTitle = mempty
                                     }
+        inp' = handleInstructions inp
+
+-- We treat <?asciidoc-br?> specially (issue #1236), converting it
+-- to <br/>, since xml-light doesn't parse the instruction correctly.
+-- Other xml instructions are simply removed from the input stream.
+handleInstructions :: String -> String
+handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions xs
+handleInstructions xs = case break (=='<') xs of
+                             (ys, [])     -> ys
+                             ([], '<':zs) -> '<' : handleInstructions zs
+                             (ys, zs) -> ys ++ handleInstructions zs
 
 getFigure :: Element -> DB Blocks
 getFigure e = do
@@ -920,6 +931,10 @@ parseInline (Elem e) =
         "footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e)
         "title" -> return mempty
         "affiliation" -> return mempty
+        -- Note: this isn't a real docbook tag; it's what we convert
+        -- <?asciidor-br?> to in handleInstructions, above.  A kludge to
+        -- work around xml-light's inability to parse an instruction.
+        "br" -> return linebreak
         _          -> innerInlines
    where innerInlines = (trimInlines . mconcat) <$>
                           (mapM parseInline $ elContent e)
-- 
cgit v1.2.3


From b371e83d7362c6ffc6e4dd98b7288ddd7f23f46a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 17 Jun 2014 15:15:56 -0700
Subject: Highlighting:  Let .numberLines work even if no language given.

Closes #1287, jgm/highlighting-kate#40.
---
 src/Text/Pandoc/Highlighting.hs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
index 2e7a9f648..7f975d4c6 100644
--- a/src/Text/Pandoc/Highlighting.hs
+++ b/src/Text/Pandoc/Highlighting.hs
@@ -74,7 +74,12 @@ highlight formatter (_, classes, keyvals) rawCode =
                         ["number","numberLines", "number-lines"]) classes }
       lcclasses = map (map toLower) classes
   in  case find (`elem` lcLanguages) lcclasses of
-            Nothing        -> Nothing
+            Nothing
+              | numberLines fmtOpts -> Just
+                              $ formatter fmtOpts{ codeClasses = [],
+                                                   containerClasses = classes }
+                              $ map (\ln -> [(NormalTok, ln)]) $ lines rawCode
+              | otherwise  -> Nothing
             Just language  -> Just
                               $ formatter fmtOpts{ codeClasses = [language],
                                                    containerClasses = classes }
-- 
cgit v1.2.3


From ab390a10ec3bc42c71d8746152acbf3ee7b1595b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 18 Jun 2014 11:33:09 -0700
Subject: Removed old haddock reader code.  Add dependency on haddock-library.

This also removes the dependency on alex and happy.
---
 src/Text/Pandoc/Readers/Haddock.hs      |  32 ++++--
 src/Text/Pandoc/Readers/Haddock/Lex.x   | 171 ------------------------------
 src/Text/Pandoc/Readers/Haddock/Parse.y | 178 --------------------------------
 3 files changed, 21 insertions(+), 360 deletions(-)
 delete mode 100644 src/Text/Pandoc/Readers/Haddock/Lex.x
 delete mode 100644 src/Text/Pandoc/Readers/Haddock/Parse.y

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 0e74406ef..65d8de98f 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -3,7 +3,8 @@
    Copyright   : Copyright (C) 2013 David Lazar
    License     : GNU GPL, version 2 or above
 
-   Maintainer  : David Lazar <lazar6@illinois.edu>
+   Maintainer  : David Lazar <lazar6@illinois.edu>,
+                 John MacFarlane <jgm@berkeley.edu>
    Stability   : alpha
 
 Conversion of Haddock markup to 'Pandoc' document.
@@ -12,22 +13,31 @@ module Text.Pandoc.Readers.Haddock
     ( readHaddock
     ) where
 
-import Text.Pandoc.Builder
+import Text.Pandoc.Builder (Blocks, Inlines)
+import qualified Text.Pandoc.Builder as B
+import Data.Monoid
+import Text.Pandoc.Definition
 import Text.Pandoc.Options
-import Text.Pandoc.Readers.Haddock.Lex
-import Text.Pandoc.Readers.Haddock.Parse
+import Documentation.Haddock.Parser (parseParas, Identifier)
+import Documentation.Haddock.Types
 
 -- | Parse Haddock markup and return a 'Pandoc' document.
 readHaddock :: ReaderOptions -- ^ Reader options
             -> String        -- ^ String to parse
             -> Pandoc
-readHaddock _ s = Pandoc nullMeta blocks
-  where
-    blocks = case parseParas (tokenise s (0,0)) of
-        Left [] -> error "parse failure"
-        Left (tok:_) -> error $ "parse failure " ++ pos (tokenPos tok)
-          where pos (l, c) = "(line " ++ show l ++ ", column " ++ show c ++ ")"
-        Right x -> mergeLists (toList x)
+readHaddock _ = B.doc . docHToBlocks . parseParas
+
+docHToBlocks :: DocH mod Identifier -> Blocks
+docHToBlocks d =
+  case d of
+    DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2)
+    DocParagraph ils -> B.para $ docHToInlines ils
+
+docHToInlines :: DocH mod Identifier -> Inlines
+docHToInlines d =
+  case d of
+    DocAppend d1 d2 -> mappend (docHToInlines d1) (docHToInlines d2)
+    DocString s -> B.text s
 
 -- similar to 'docAppend' in Haddock.Doc
 mergeLists :: [Block] -> [Block]
diff --git a/src/Text/Pandoc/Readers/Haddock/Lex.x b/src/Text/Pandoc/Readers/Haddock/Lex.x
deleted file mode 100644
index 120e96ebf..000000000
--- a/src/Text/Pandoc/Readers/Haddock/Lex.x
+++ /dev/null
@@ -1,171 +0,0 @@
---
--- Haddock - A Haskell Documentation Tool
---
--- (c) Simon Marlow 2002
---
--- This file was modified and integrated into GHC by David Waern 2006.
--- Then moved back into Haddock by Isaac Dupree in 2009 :-)
--- Then copied into Pandoc by David Lazar in 2013 :-D
-
-{
-{-# LANGUAGE BangPatterns #-}   -- Generated by Alex
-{-# OPTIONS -Wwarn -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
-module Text.Pandoc.Readers.Haddock.Lex (
-    Token(..),
-    LToken,
-    tokenise,
-    tokenPos
- ) where
-
-import Data.Char
-import Numeric (readHex)
-}
-
-%wrapper "posn"
-
-$ws       = $white # \n
-$digit    = [0-9]
-$hexdigit = [0-9a-fA-F]
-$special  = [\"\@]
-$alphanum = [A-Za-z0-9]
-$ident    = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~\:]
-
-:-
-
--- beginning of a paragraph
-<0,para> {
-  $ws* \n               ;
-  $ws* \>               { begin birdtrack }
-  $ws* prop \> .* \n    { strtoken TokProperty `andBegin` property}
-  $ws* \>\>\>           { strtoken TokExamplePrompt `andBegin` exampleexpr }
-  $ws* [\*\-]           { token TokBullet `andBegin` string }
-  $ws* \[               { token TokDefStart `andBegin` def }
-  $ws* \( $digit+ \)    { token TokNumber `andBegin` string }
-  $ws* $digit+ \.       { token TokNumber `andBegin` string }
-  $ws*                  { begin string }
-}
-
--- beginning of a line
-<line> {
-  $ws* \>               { begin birdtrack }
-  $ws* \>\>\>           { strtoken TokExamplePrompt `andBegin` exampleexpr }
-
-  $ws* \n               { token TokPara `andBegin` para }
-  -- ^ Here, we really want to be able to say
-  -- $ws* (\n | <eof>)  { token TokPara `andBegin` para}
-  -- because otherwise a trailing line of whitespace will result in
-  -- a spurious TokString at the end of a docstring.  We don't have <eof>,
-  -- though (NOW I realise what it was for :-).  To get around this, we always
-  -- append \n to the end of a docstring.
-
-  ()                    { begin string }
-}
-
-<birdtrack> .*  \n?     { strtokenNL TokBirdTrack `andBegin` line }
-
-<property> ()           { token TokPara `andBegin` para }
-
-<example> {
-  $ws*  \n              { token TokPara `andBegin` para }
-  $ws* \>\>\>           { strtoken TokExamplePrompt `andBegin` exampleexpr }
-  ()                    { begin exampleresult }
-}
-
-<exampleexpr> .* \n     { strtokenNL TokExampleExpression `andBegin` example }
-
-<exampleresult> .* \n   { strtokenNL TokExampleResult `andBegin` example }
-
-<string,def> {
-  $special              { strtoken $ \s -> TokSpecial (head s) }
-  \<\< [^\>]* \>\>      { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) }
-  \< [^\>]* \>          { strtoken $ \s -> TokURL (init (tail s)) }
-  \# [^\#]* \#          { strtoken $ \s -> TokAName (init (tail s)) }
-  \/ [^\/]* \/          { strtoken $ \s -> TokEmphasis (init (tail s)) }
-  [\'\`] $ident+ [\'\`] { strtoken $ \s -> TokIdent (init (tail s)) }
-  \\ .                  { strtoken (TokString . tail) }
-  "&#" $digit+ \;       { strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] }
-  "&#" [xX] $hexdigit+ \;
-        { strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] }
-  -- allow special characters through if they don't fit one of the previous
-  -- patterns.
-  [\/\'\`\<\#\&\\]      { strtoken TokString }
-  [^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line }
-  [^ $special \/ \< \# \n \'\` \& \\ \]]+    { strtoken TokString }
-}
-
-<def> {
-  \]                    { token TokDefEnd `andBegin` string }
-}
-
--- ']' doesn't have any special meaning outside of the [...] at the beginning
--- of a definition paragraph.
-<string> {
-  \]                    { strtoken TokString }
-}
-
-{
--- | A located token
-type LToken = (Token, AlexPosn)
-
-data Token
-  = TokPara
-  | TokNumber
-  | TokBullet
-  | TokDefStart
-  | TokDefEnd
-  | TokSpecial Char
-  | TokIdent String
-  | TokString String
-  | TokURL String
-  | TokPic String
-  | TokEmphasis String
-  | TokAName String
-  | TokBirdTrack String
-  | TokProperty String
-  | TokExamplePrompt String
-  | TokExampleExpression String
-  | TokExampleResult String
-  deriving Show
-
-tokenPos :: LToken -> (Int, Int)
-tokenPos t = let AlexPn _ line col = snd t in (line, col)
-
-type StartCode = Int
-type Action = AlexPosn -> String -> StartCode -> (StartCode -> [LToken]) -> [LToken]
-
-tokenise :: String -> (Int, Int) -> [LToken]
-tokenise str (line, col) = go (posn,'\n',[],eofHack str) para
-  where posn = AlexPn 0 line col
-        go inp@(pos,_,_,str) sc =
-          case alexScan inp sc of
-                AlexEOF -> []
-                AlexError _ -> []
-                AlexSkip  inp' len     -> go inp' sc
-                AlexToken inp' len act -> act pos (take len str) sc (\sc -> go inp' sc)
-
--- NB. we add a final \n to the string, (see comment in the beginning of line
--- production above).
-eofHack str = str++"\n"
-
-andBegin :: Action -> StartCode -> Action
-andBegin act new_sc = \pos str _ cont -> act pos str new_sc cont
-
-token :: Token -> Action
-token t = \pos _ sc cont -> (t, pos) : cont sc
-
-strtoken, strtokenNL :: (String -> Token) -> Action
-strtoken t = \pos str sc cont -> (t str, pos) : cont sc
-strtokenNL t = \pos str sc cont -> (t (filter (/= '\r') str), pos) : cont sc
--- ^ We only want LF line endings in our internal doc string format, so we
--- filter out all CRs.
-
-begin :: StartCode -> Action
-begin sc = \_ _ _ cont -> cont sc
-
-}
diff --git a/src/Text/Pandoc/Readers/Haddock/Parse.y b/src/Text/Pandoc/Readers/Haddock/Parse.y
deleted file mode 100644
index 9c2bbc8a9..000000000
--- a/src/Text/Pandoc/Readers/Haddock/Parse.y
+++ /dev/null
@@ -1,178 +0,0 @@
--- This code was copied from the 'haddock' package, modified, and integrated
--- into Pandoc by David Lazar.
-{
-{-# LANGUAGE BangPatterns #-} -- required for versions of Happy before 1.18.6
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
-module Text.Pandoc.Readers.Haddock.Parse (parseString, parseParas) where
-
-import Text.Pandoc.Readers.Haddock.Lex
-import Text.Pandoc.Builder
-import Text.Pandoc.Shared (trim, trimr)
-import Data.Generics (everywhere, mkT)
-import Data.Char  (isSpace)
-import Data.Maybe (fromMaybe)
-import Data.List  (stripPrefix, intersperse)
-import Data.Monoid (mempty, mconcat)
-}
-
-%expect 0
-
-%tokentype  { LToken }
-
-%token
-    '/'     { (TokSpecial '/',_) }
-    '@'     { (TokSpecial '@',_) }
-    '['     { (TokDefStart,_) }
-    ']'     { (TokDefEnd,_) }
-    DQUO    { (TokSpecial '\"',_) }
-    URL     { (TokURL $$,_) }
-    PIC     { (TokPic $$,_) }
-    ANAME   { (TokAName $$,_) }
-    '/../'  { (TokEmphasis $$,_) }
-    '-'     { (TokBullet,_) }
-    '(n)'   { (TokNumber,_) }
-    '>..'   { (TokBirdTrack $$,_) }
-    PROP    { (TokProperty $$,_) }
-    PROMPT  { (TokExamplePrompt $$,_) }
-    RESULT  { (TokExampleResult $$,_) }
-    EXP     { (TokExampleExpression $$,_) }
-    IDENT   { (TokIdent $$,_) }
-    PARA    { (TokPara,_) }
-    STRING  { (TokString $$,_) }
-
-%monad { Either [LToken] }
-
-%name parseParas doc
-%name parseString seq
-
-%%
-
-doc :: { Blocks }
-    : apara PARA doc    { $1 <> $3 }
-    | PARA doc          { $2 }
-    | apara             { $1 }
-    | {- empty -}       { mempty }
-
-apara :: { Blocks }
-    : ulpara            { bulletList [$1] }
-    | olpara            { orderedList [$1] }
-    | defpara           { definitionList [$1] }
-    | para              { $1 }
-
-ulpara :: { Blocks }
-    : '-' para          { $2 }
-
-olpara  :: { Blocks }
-    : '(n)' para        { $2 }
-
-defpara :: { (Inlines, [Blocks]) }
-    : '[' seq ']' seq   { (trimInlines $2, [plain $ trimInlines $4]) }
-
-para :: { Blocks }
-    : seq               { para' $1 }
-    | codepara          { codeBlockWith ([], ["haskell"], []) $1 }
-    | property          { $1 }
-    | examples          { $1 }
-
-codepara :: { String }
-    : '>..' codepara    { $1 ++ $2 }
-    | '>..'             { $1 }
-
-property :: { Blocks }
-    : PROP              { makeProperty $1 }
-
-examples :: { Blocks }
-    : example examples  { $1 <> $2 }
-    | example           { $1 }
-
-example :: { Blocks }
-    : PROMPT EXP result { makeExample $1 $2 (lines $3) }
-    | PROMPT EXP        { makeExample $1 $2 [] }
-
-result :: { String }
-    : RESULT result     { $1 ++ $2 }
-    | RESULT            { $1 }
-
-seq :: { Inlines }
-    : elem seq          { $1 <> $2 }
-    | elem              { $1 }
-
-elem :: { Inlines }
-    : elem1             { $1 }
-    | '@' seq1 '@'      { monospace $2 }
-
-seq1 :: { Inlines }
-    : PARA seq1         { linebreak <> $2 }
-    | elem1 seq1        { $1 <> $2 }
-    | elem1             { $1 }
-
-elem1 :: { Inlines }
-    : STRING            { text $1 }
-    | '/../'            { emph (str $1) }
-    | URL               { makeHyperlink $1 }
-    | PIC               { image $1 $1 mempty }
-    | ANAME             { mempty } -- TODO
-    | IDENT             { codeWith ([], ["haskell"], []) $1 }
-    | DQUO strings DQUO { codeWith ([], ["haskell"], []) $2 }
-
-strings :: { String }
-    : STRING            { $1 }
-    | STRING strings    { $1 ++ $2 }
-
-{
-happyError :: [LToken] -> Either [LToken] a
-happyError toks = Left toks
-
-para' :: Inlines -> Blocks
-para' = para . trimInlines
-
-monospace :: Inlines -> Inlines
-monospace = everywhere (mkT go)
-  where
-    go (Str s) = Code nullAttr s
-    go x = x
-
--- | Create a `Hyperlink` from given string.
---
--- A hyperlink consists of a URL and an optional label.  The label is separated
--- from the url by one or more whitespace characters.
-makeHyperlink :: String -> Inlines
-makeHyperlink input = case break isSpace $ trim input of
-  (url, "") -> link url url (str url)
-  (url, lb) -> link url url (trimInlines $ text lb)
-
-makeProperty :: String -> Blocks
-makeProperty s = case trim s of
-  'p':'r':'o':'p':'>':xs ->
-    codeBlockWith ([], ["property"], []) (dropWhile isSpace xs)
-  xs ->
-    error $ "makeProperty: invalid input " ++ show xs
-
--- | Create an 'Example', stripping superfluous characters as appropriate
-makeExample :: String -> String -> [String] -> Blocks
-makeExample prompt expression result =
-    para $ codeWith ([], ["haskell","expr"], []) (trim expression)
-        <> linebreak
-        <> (mconcat $ intersperse linebreak $ map coder result')
-  where
-    -- 1. drop trailing whitespace from the prompt, remember the prefix
-    prefix = takeWhile isSpace prompt
-
-    -- 2. drop, if possible, the exact same sequence of whitespace
-    -- characters from each result line
-    --
-    -- 3. interpret lines that only contain the string "<BLANKLINE>" as an
-    -- empty line
-    result' = map (substituteBlankLine . tryStripPrefix prefix) result
-      where
-        tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys
-
-        substituteBlankLine "<BLANKLINE>" = ""
-        substituteBlankLine line          = line
-    coder = codeWith ([], ["result"], [])
-}
-- 
cgit v1.2.3


From 9fc5c8d7af31a47d8e3e8ea6dbb541178ec9ca66 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 18 Jun 2014 12:27:27 -0700
Subject: Rewrote haddock reader to use haddock-library.

This brings pandoc's rendering of haddock markup in line
with the new haddock.

Note that we preserve line breaks in `@` code blocks, unlike
the earlier version.

Modified tests pass.  More tests would be good.
---
 src/Text/Pandoc/Readers/Haddock.hs | 124 ++++++++++++++++++++++++++++++-------
 1 file changed, 102 insertions(+), 22 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 65d8de98f..a512f969d 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -15,10 +15,13 @@ module Text.Pandoc.Readers.Haddock
 
 import Text.Pandoc.Builder (Blocks, Inlines)
 import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Shared (trim, splitBy)
 import Data.Monoid
+import Data.List (intersperse, stripPrefix)
+import Data.Maybe (fromMaybe)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
-import Documentation.Haddock.Parser (parseParas, Identifier)
+import Documentation.Haddock.Parser
 import Documentation.Haddock.Types
 
 -- | Parse Haddock markup and return a 'Pandoc' document.
@@ -27,25 +30,102 @@ readHaddock :: ReaderOptions -- ^ Reader options
             -> Pandoc
 readHaddock _ = B.doc . docHToBlocks . parseParas
 
-docHToBlocks :: DocH mod Identifier -> Blocks
-docHToBlocks d =
-  case d of
+docHToBlocks :: DocH String Identifier -> Blocks
+docHToBlocks d' =
+  case d' of
+    DocEmpty -> mempty
     DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2)
-    DocParagraph ils -> B.para $ docHToInlines ils
-
-docHToInlines :: DocH mod Identifier -> Inlines
-docHToInlines d =
-  case d of
-    DocAppend d1 d2 -> mappend (docHToInlines d1) (docHToInlines d2)
-    DocString s -> B.text s
-
--- similar to 'docAppend' in Haddock.Doc
-mergeLists :: [Block] -> [Block]
-mergeLists (BulletList xs : BulletList ys : blocks)
-    = mergeLists (BulletList (xs ++ ys) : blocks)
-mergeLists (OrderedList _ xs : OrderedList a ys : blocks) 
-    = mergeLists (OrderedList a (xs ++ ys) : blocks)
-mergeLists (DefinitionList xs : DefinitionList ys : blocks)
-    = mergeLists (DefinitionList (xs ++ ys) : blocks)
-mergeLists (x : blocks) = x : mergeLists blocks
-mergeLists [] = []
+    DocString _ -> inlineFallback
+    DocParagraph ils -> B.para $ docHToInlines False ils
+    DocIdentifier _ -> inlineFallback
+    DocIdentifierUnchecked _ -> inlineFallback
+    DocModule s -> B.plain $ docHToInlines False $ DocModule s
+    DocWarning _ -> mempty -- TODO
+    DocEmphasis _ -> inlineFallback
+    DocMonospaced _ -> inlineFallback
+    DocBold _ -> inlineFallback
+    DocHeader h -> B.header (headerLevel h)
+                           (docHToInlines False $ headerTitle h)
+    DocUnorderedList items -> B.bulletList (map docHToBlocks items)
+    DocOrderedList items -> B.orderedList (map docHToBlocks items)
+    DocDefList items -> B.definitionList (map (\(d,t) ->
+                               (docHToInlines False d,
+                                [consolidatePlains $ docHToBlocks t])) items)
+    DocCodeBlock (DocString s) -> B.codeBlockWith ("",["haskell"],[]) s
+    DocCodeBlock d -> B.para $ docHToInlines True d
+    DocHyperlink _ -> inlineFallback
+    DocPic _ -> inlineFallback
+    DocAName _ -> inlineFallback
+    DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim s)
+    DocExamples es -> mconcat $ map (\e ->
+       makeExample ">>>" (exampleExpression e) (exampleResult e)) es
+
+  where inlineFallback = B.plain $ docHToInlines False d'
+        consolidatePlains = B.fromList . consolidatePlains' . B.toList
+        consolidatePlains' zs@(Plain _ : _) =
+          let (xs, ys) = span isPlain zs in
+          Plain (concatMap extractContents xs) : consolidatePlains' ys
+        consolidatePlains' (x : xs) = x : consolidatePlains' xs
+        consolidatePlains' [] = []
+        isPlain (Plain _) = True
+        isPlain _         = False
+        extractContents (Plain xs) = xs
+        extractContents _ = []
+
+docHToInlines :: Bool -> DocH String Identifier -> Inlines
+docHToInlines isCode d' =
+  case d' of
+    DocEmpty -> mempty
+    DocAppend d1 d2 -> mappend (docHToInlines isCode d1)
+                               (docHToInlines isCode d2)
+    DocString s
+      | isCode -> mconcat $ intersperse B.linebreak
+                              $ map B.code $ splitBy (=='\n') s
+      | otherwise  -> B.text s
+    DocParagraph _ -> mempty
+    DocIdentifier (_,s,_) -> B.codeWith ("",["haskell"],[]) s
+    DocIdentifierUnchecked s -> B.codeWith ("",["haskell"],[]) s
+    DocModule s -> B.codeWith ("",["haskell"],[]) s
+    DocWarning _ -> mempty -- TODO
+    DocEmphasis d -> B.emph (docHToInlines isCode d)
+    DocMonospaced (DocString s) -> B.code s
+    DocMonospaced d -> docHToInlines True d
+    DocBold d -> B.strong (docHToInlines isCode d)
+    DocHeader _ -> mempty
+    DocUnorderedList _ -> mempty
+    DocOrderedList _ -> mempty
+    DocDefList _ -> mempty
+    DocCodeBlock _ -> mempty
+    DocHyperlink h -> B.link (hyperlinkUrl h) (hyperlinkUrl h)
+             (maybe (B.text $ hyperlinkUrl h) B.text $ hyperlinkLabel h)
+    DocPic p -> B.image (pictureUri p) (fromMaybe (pictureUri p) $ pictureTitle p)
+                        (maybe mempty B.text $ pictureTitle p)
+    DocAName s -> B.spanWith (s,["anchor"],[]) mempty
+    DocProperty _ -> mempty
+    DocExamples _ -> mempty
+
+-- | Create an 'Example', stripping superfluous characters as appropriate
+makeExample :: String -> String -> [String] -> Blocks
+makeExample prompt expression result =
+    B.para $ B.codeWith ("",["prompt"],[]) prompt
+        <> B.space
+        <> B.codeWith ([], ["haskell","expr"], []) (trim expression)
+        <> B.linebreak
+        <> (mconcat $ intersperse B.linebreak $ map coder result')
+  where
+    -- 1. drop trailing whitespace from the prompt, remember the prefix
+    prefix = takeWhile (`elem` " \t") prompt
+
+    -- 2. drop, if possible, the exact same sequence of whitespace
+    -- characters from each result line
+    --
+    -- 3. interpret lines that only contain the string "<BLANKLINE>" as an
+    -- empty line
+    result' = map (substituteBlankLine . tryStripPrefix prefix) result
+      where
+        tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys
+
+        substituteBlankLine "<BLANKLINE>" = ""
+        substituteBlankLine line          = line
+    coder = B.codeWith ([], ["result"], [])
+
-- 
cgit v1.2.3


From 35e57db5c292957e74c24eb2cee63928c7865cc6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 18 Jun 2014 15:32:13 -0700
Subject: Finished first draft of Haddock writer.

---
 src/Text/Pandoc.hs                 |   3 +
 src/Text/Pandoc/Readers/Haddock.hs |  13 +-
 src/Text/Pandoc/Writers/Haddock.hs | 357 +++++++++++++++++++++++++++++++++++++
 3 files changed, 371 insertions(+), 2 deletions(-)
 create mode 100644 src/Text/Pandoc/Writers/Haddock.hs

(limited to 'src/Text')

diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 45c2f453b..d2e7887b5 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -102,6 +102,7 @@ module Text.Pandoc
                , writeFB2
                , writeOrg
                , writeAsciiDoc
+               , writeHaddock
                , writeCustom
                -- * Rendering templates and default templates
                , module Text.Pandoc.Templates
@@ -149,6 +150,7 @@ import Text.Pandoc.Writers.MediaWiki
 import Text.Pandoc.Writers.Textile
 import Text.Pandoc.Writers.Org
 import Text.Pandoc.Writers.AsciiDoc
+import Text.Pandoc.Writers.Haddock
 import Text.Pandoc.Writers.Custom
 import Text.Pandoc.Templates
 import Text.Pandoc.Options
@@ -279,6 +281,7 @@ writers = [
   ,("rtf"          , IOStringWriter writeRTFWithEmbeddedImages)
   ,("org"          , PureStringWriter writeOrg)
   ,("asciidoc"     , PureStringWriter writeAsciiDoc)
+  ,("haddock"      , PureStringWriter writeHaddock)
   ]
 
 getDefaultExtensions :: String -> Set Extension
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index a512f969d..f184eabdb 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -23,19 +23,28 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Documentation.Haddock.Parser
 import Documentation.Haddock.Types
+import Debug.Trace (trace)
 
 -- | Parse Haddock markup and return a 'Pandoc' document.
 readHaddock :: ReaderOptions -- ^ Reader options
             -> String        -- ^ String to parse
             -> Pandoc
-readHaddock _ = B.doc . docHToBlocks . parseParas
+readHaddock opts = B.doc . docHToBlocks . trace' . parseParas
+  where trace' x = if readerTrace opts
+                      then trace (show x) x
+                      else x
 
 docHToBlocks :: DocH String Identifier -> Blocks
 docHToBlocks d' =
   case d' of
     DocEmpty -> mempty
+    DocAppend (DocParagraph (DocHeader h)) (DocParagraph (DocAName ident)) ->
+         B.headerWith (ident,[],[]) (headerLevel h)
+            (docHToInlines False $ headerTitle h)
     DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2)
     DocString _ -> inlineFallback
+    DocParagraph (DocHeader h) -> docHToBlocks (DocHeader h)
+    DocParagraph (DocAName h) -> B.plain $ docHToInlines False $ DocAName h
     DocParagraph ils -> B.para $ docHToInlines False ils
     DocIdentifier _ -> inlineFallback
     DocIdentifierUnchecked _ -> inlineFallback
@@ -64,7 +73,7 @@ docHToBlocks d' =
         consolidatePlains = B.fromList . consolidatePlains' . B.toList
         consolidatePlains' zs@(Plain _ : _) =
           let (xs, ys) = span isPlain zs in
-          Plain (concatMap extractContents xs) : consolidatePlains' ys
+          Para (concatMap extractContents xs) : consolidatePlains' ys
         consolidatePlains' (x : xs) = x : consolidatePlains' xs
         consolidatePlains' [] = []
         isPlain (Plain _) = True
diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs
new file mode 100644
index 000000000..4d6b8e69f
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Haddock.hs
@@ -0,0 +1,357 @@
+{-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-}
+{-
+Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Writers.Haddock
+   Copyright   : Copyright (C) 2014 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Conversion of 'Pandoc' documents to haddock markup.
+
+Haddock:  <http://www.haskell.org/haddock/doc/html/>
+-}
+module Text.Pandoc.Writers.Haddock (writeHaddock) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Options
+import Data.List ( intersperse, transpose )
+import Text.Pandoc.Pretty
+import Control.Monad.State
+import Text.Pandoc.Readers.TeXMath (readTeXMath')
+import Network.URI (isURI)
+import Data.Default
+
+type Notes = [[Block]]
+data WriterState = WriterState { stNotes :: Notes }
+instance Default WriterState
+  where def = WriterState{ stNotes = [] }
+
+-- | Convert Pandoc to Haddock.
+writeHaddock :: WriterOptions -> Pandoc -> String
+writeHaddock opts document =
+  evalState (pandocToHaddock opts{
+                  writerWrapText = writerWrapText opts } document) def
+
+-- | Return haddock representation of document.
+pandocToHaddock :: WriterOptions -> Pandoc -> State WriterState String
+pandocToHaddock opts (Pandoc meta blocks) = do
+  let colwidth = if writerWrapText opts
+                    then Just $ writerColumns opts
+                    else Nothing
+  body <- blockListToHaddock opts blocks
+  st <- get
+  notes' <- notesToHaddock opts (reverse $ stNotes st)
+  let render' :: Doc -> String
+      render' = render colwidth
+  let main = render' $ body <>
+               (if isEmpty notes' then empty else blankline <> notes')
+  metadata <- metaToJSON opts
+               (fmap (render colwidth) . blockListToHaddock opts)
+               (fmap (render colwidth) . inlineListToHaddock opts)
+               meta
+  let context  = defField "body" main
+               $ metadata
+  if writerStandalone opts
+     then return $ renderTemplate' (writerTemplate opts) context
+     else return main
+
+-- | Return haddock representation of notes.
+notesToHaddock :: WriterOptions -> [[Block]] -> State WriterState Doc
+notesToHaddock opts notes =
+  mapM (\(num, note) -> noteToHaddock opts num note) (zip [1..] notes) >>=
+  return . vsep
+
+-- | Return haddock representation of a note.
+noteToHaddock :: WriterOptions -> Int -> [Block] -> State WriterState Doc
+noteToHaddock opts num blocks = do
+  contents  <- blockListToHaddock opts blocks
+  let num' = text $ writerIdentifierPrefix opts ++ show num
+  let marker = text "[" <> num' <> text "]"
+  let markerSize = 4 + offset num'
+  let spacer = case writerTabStop opts - markerSize of
+                     n | n > 0  -> text $ replicate n ' '
+                     _          -> text " "
+  return $ if isEnabled Ext_footnotes opts
+              then hang (writerTabStop opts) (marker <> spacer) contents
+              else marker <> spacer <> contents
+
+-- | Escape special characters for Haddock.
+escapeString :: String -> String
+escapeString = escapeStringUsing haddockEscapes
+  where haddockEscapes = backslashEscapes "\\/'`\"@<"
+
+-- | Convert Pandoc block element to haddock.
+blockToHaddock :: WriterOptions -- ^ Options
+                -> Block         -- ^ Block element
+                -> State WriterState Doc
+blockToHaddock _ Null = return empty
+blockToHaddock opts (Div _ ils) = do
+  contents <- blockListToHaddock opts ils
+  return $ contents <> blankline
+blockToHaddock opts (Plain inlines) = do
+  contents <- inlineListToHaddock opts inlines
+  return $ contents <> cr
+-- title beginning with fig: indicates figure
+blockToHaddock opts (Para [Image alt (src,'f':'i':'g':':':tit)]) =
+  blockToHaddock opts (Para [Image alt (src,tit)])
+blockToHaddock opts (Para inlines) =
+  -- TODO:  if it contains linebreaks, we need to use a @...@ block
+  (<> blankline) `fmap` blockToHaddock opts (Plain inlines)
+blockToHaddock _ (RawBlock f str)
+  | f == "haddock" = do
+      return $ text str <> text "\n"
+  | otherwise = return empty
+blockToHaddock _ HorizontalRule =
+  return $ blankline <> text "--------------" <> blankline
+blockToHaddock opts (Header level (ident,_,_) inlines) = do
+  contents <- inlineListToHaddock opts inlines
+  let attr' = if null ident
+                 then empty
+                 else cr <> text "#" <> text ident <> text "#"
+  return $ nowrap (text (replicate level '=') <> space <> contents)
+                 <> attr' <> blankline
+blockToHaddock _ (CodeBlock (_,_,_) str) =
+  return $ prefixed "> " (text str) <> blankline
+-- Nothing in haddock corresponds to block quotes:
+blockToHaddock opts (BlockQuote blocks) =
+  blockListToHaddock opts blocks
+-- Haddock doesn't have tables.  Use haddock tables in code.
+blockToHaddock opts (Table caption aligns widths headers rows) = do
+  caption' <- inlineListToHaddock opts caption
+  let caption'' = if null caption || not (isEnabled Ext_table_captions opts)
+                     then empty
+                     else blankline <> caption' <> blankline
+  rawHeaders <- mapM (blockListToHaddock opts) headers
+  rawRows <- mapM (mapM (blockListToHaddock opts)) rows
+  let isSimple = all (==0) widths
+  let isPlainBlock (Plain _) = True
+      isPlainBlock _         = False
+  let hasBlocks = not (all isPlainBlock $ concat . concat $ headers:rows)
+  (nst,tbl) <- case True of
+                _ | isSimple -> fmap (nest 2,) $
+                         pandocTable opts (all null headers) aligns widths
+                             rawHeaders rawRows
+                  | not hasBlocks -> fmap (nest 2,) $
+                         pandocTable opts (all null headers) aligns widths
+                             rawHeaders rawRows
+                  | otherwise -> fmap (id,) $
+                         gridTable opts (all null headers) aligns widths
+                             rawHeaders rawRows
+  return $ prefixed "> " $ nst $ tbl $$ blankline $$ caption'' $$ blankline
+blockToHaddock opts (BulletList items) = do
+  contents <- mapM (bulletListItemToHaddock opts) items
+  return $ cat contents <> blankline
+blockToHaddock opts (OrderedList (start,sty,delim) items) = do
+  let attribs = (start, sty, delim)
+  let markers  = orderedListMarkers attribs
+  let markers' = map (\m -> if length m < 3
+                               then m ++ replicate (3 - length m) ' '
+                               else m) markers
+  contents <- mapM (\(item, num) -> orderedListItemToHaddock opts item num) $
+              zip markers' items
+  return $ cat contents <> blankline
+blockToHaddock opts (DefinitionList items) = do
+  contents <- mapM (definitionListItemToHaddock opts) items
+  return $ cat contents <> blankline
+
+pandocTable :: WriterOptions -> Bool -> [Alignment] -> [Double]
+            -> [Doc] -> [[Doc]] -> State WriterState Doc
+pandocTable opts headless aligns widths rawHeaders rawRows =  do
+  let isSimple = all (==0) widths
+  let alignHeader alignment = case alignment of
+                                AlignLeft    -> lblock
+                                AlignCenter  -> cblock
+                                AlignRight   -> rblock
+                                AlignDefault -> lblock
+  let numChars = maximum . map offset
+  let widthsInChars = if isSimple
+                         then map ((+2) . numChars)
+                              $ transpose (rawHeaders : rawRows)
+                         else map
+                              (floor . (fromIntegral (writerColumns opts) *))
+                              widths
+  let makeRow = hcat . intersperse (lblock 1 (text " ")) .
+                   (zipWith3 alignHeader aligns widthsInChars)
+  let rows' = map makeRow rawRows
+  let head' = makeRow rawHeaders
+  let maxRowHeight = maximum $ map height (head':rows')
+  let underline = cat $ intersperse (text " ") $
+                  map (\width -> text (replicate width '-')) widthsInChars
+  let border = if maxRowHeight > 1
+                  then text (replicate (sum widthsInChars +
+                          length widthsInChars - 1) '-')
+                  else if headless
+                          then underline
+                          else empty
+  let head'' = if headless
+                  then empty
+                  else border <> cr <> head'
+  let body = if maxRowHeight > 1
+                then vsep rows'
+                else vcat rows'
+  let bottom = if headless
+                  then underline
+                  else border
+  return $ head'' $$ underline $$ body $$ bottom
+
+gridTable :: WriterOptions -> Bool -> [Alignment] -> [Double]
+          -> [Doc] -> [[Doc]] -> State WriterState Doc
+gridTable opts headless _aligns widths headers' rawRows =  do
+  let numcols = length headers'
+  let widths' = if all (==0) widths
+                   then replicate numcols (1.0 / fromIntegral numcols)
+                   else widths
+  let widthsInChars = map (floor . (fromIntegral (writerColumns opts) *)) widths'
+  let hpipeBlocks blocks = hcat [beg, middle, end]
+        where h       = maximum (map height blocks)
+              sep'    = lblock 3 $ vcat (map text $ replicate h " | ")
+              beg     = lblock 2 $ vcat (map text $ replicate h "| ")
+              end     = lblock 2 $ vcat (map text $ replicate h " |")
+              middle  = chomp $ hcat $ intersperse sep' blocks
+  let makeRow = hpipeBlocks . zipWith lblock widthsInChars
+  let head' = makeRow headers'
+  let rows' = map (makeRow . map chomp) rawRows
+  let border ch = char '+' <> char ch <>
+                  (hcat $ intersperse (char ch <> char '+' <> char ch) $
+                          map (\l -> text $ replicate l ch) widthsInChars) <>
+                  char ch <> char '+'
+  let body = vcat $ intersperse (border '-') rows'
+  let head'' = if headless
+                  then empty
+                  else head' $$ border '='
+  return $ border '-' $$ head'' $$ body $$ border '-'
+
+-- | Convert bullet list item (list of blocks) to haddock
+bulletListItemToHaddock :: WriterOptions -> [Block] -> State WriterState Doc
+bulletListItemToHaddock opts items = do
+  contents <- blockListToHaddock opts items
+  let sps = replicate (writerTabStop opts - 2) ' '
+  let start = text ('-' : ' ' : sps)
+  -- remove trailing blank line if it is a tight list
+  let contents' = case reverse items of
+                       (BulletList xs:_) | isTightList xs ->
+                            chomp contents <> cr
+                       (OrderedList _ xs:_) | isTightList xs ->
+                            chomp contents <> cr
+                       _ -> contents
+  return $ hang (writerTabStop opts) start $ contents' <> cr
+
+-- | Convert ordered list item (a list of blocks) to haddock
+orderedListItemToHaddock :: WriterOptions -- ^ options
+                          -> String        -- ^ list item marker
+                          -> [Block]       -- ^ list item (list of blocks)
+                          -> State WriterState Doc
+orderedListItemToHaddock opts marker items = do
+  contents <- blockListToHaddock opts items
+  let sps = case length marker - writerTabStop opts of
+                   n | n > 0 -> text $ replicate n ' '
+                   _         -> text " "
+  let start = text marker <> sps
+  return $ hang (writerTabStop opts) start $ contents <> cr
+
+-- | Convert definition list item (label, list of blocks) to haddock
+definitionListItemToHaddock :: WriterOptions
+                             -> ([Inline],[[Block]])
+                             -> State WriterState Doc
+definitionListItemToHaddock opts (label, defs) = do
+  labelText <- inlineListToHaddock opts label
+  defs' <- mapM (mapM (blockToHaddock opts)) defs
+  let contents = vcat $ map (\d -> hang 4 empty $ vcat d <> cr) defs'
+  return $ nowrap (brackets labelText) <> cr <> contents <> cr
+
+-- | Convert list of Pandoc block elements to haddock
+blockListToHaddock :: WriterOptions -- ^ Options
+                    -> [Block]       -- ^ List of block elements
+                    -> State WriterState Doc
+blockListToHaddock opts blocks =
+  mapM (blockToHaddock opts) blocks >>= return . cat
+
+-- | Convert list of Pandoc inline elements to haddock.
+inlineListToHaddock :: WriterOptions -> [Inline] -> State WriterState Doc
+inlineListToHaddock opts lst =
+  mapM (inlineToHaddock opts) lst >>= return . cat
+
+-- | Convert Pandoc inline element to haddock.
+inlineToHaddock :: WriterOptions -> Inline -> State WriterState Doc
+inlineToHaddock opts (Span (ident,_,_) ils) = do
+  contents <- inlineListToHaddock opts ils
+  if not (null ident) && null ils
+     then return $ "#" <> text ident <> "#"
+     else return contents
+inlineToHaddock opts (Emph lst) = do
+  contents <- inlineListToHaddock opts lst
+  return $ "/" <> contents <> "/"
+inlineToHaddock opts (Strong lst) = do
+  contents <- inlineListToHaddock opts lst
+  return $ "__" <> contents <> "__"
+inlineToHaddock opts (Strikeout lst) = do
+  contents <- inlineListToHaddock opts lst
+  -- not supported in haddock, but we fake it:
+  return $ "~~" <> contents <> "~~"
+-- not supported in haddock:
+inlineToHaddock opts (Superscript lst) = inlineListToHaddock opts lst
+-- not supported in haddock:
+inlineToHaddock opts (Subscript lst) = inlineListToHaddock opts lst
+-- not supported in haddock:
+inlineToHaddock opts (SmallCaps lst) = inlineListToHaddock opts lst
+inlineToHaddock opts (Quoted SingleQuote lst) = do
+  contents <- inlineListToHaddock opts lst
+  return $ "‘" <> contents <> "’"
+inlineToHaddock opts (Quoted DoubleQuote lst) = do
+  contents <- inlineListToHaddock opts lst
+  return $ "“" <> contents <> "”"
+inlineToHaddock _ (Code _ str) =
+  return $ "@" <> text (escapeString str) <> "@"
+inlineToHaddock _ (Str str) = do
+  return $ text $ escapeString str
+inlineToHaddock opts (Math mt str) = do
+  let adjust x = case mt of
+                      DisplayMath -> cr <> x <> cr
+                      InlineMath  -> x
+  adjust `fmap` (inlineListToHaddock opts $ readTeXMath' mt str)
+inlineToHaddock _ (RawInline f str)
+  | f == "haddock" = return $ text str
+  | otherwise = return empty
+-- no line break in haddock (see above on CodeBlock)
+inlineToHaddock _ (LineBreak) = return cr
+inlineToHaddock _ Space = return space
+inlineToHaddock opts (Cite _ lst) = inlineListToHaddock opts lst
+inlineToHaddock opts (Link txt (src, _)) = do
+  linktext <- inlineListToHaddock opts txt
+  let useAuto = isURI src &&
+                case txt of
+                      [Str s] | escapeURI s == src -> True
+                      _                            -> False
+  return $ nowrap $ "<" <> text src <>
+           (if useAuto then empty else space <> linktext) <> ">"
+inlineToHaddock opts (Image alternate (source, tit)) = do
+  linkhaddock <- inlineToHaddock opts (Link alternate (source, tit))
+  return $ "<" <> linkhaddock <> ">"
+-- haddock doesn't have notes, but we can fake it:
+inlineToHaddock opts (Note contents) = do
+  modify (\st -> st{ stNotes = contents : stNotes st })
+  st <- get
+  let ref = text $ writerIdentifierPrefix opts ++ show (length $ stNotes st)
+  return $ "[" <> ref <> "]"
-- 
cgit v1.2.3


From ff6a2baeb9940276fbbaf486a5711378d13cc1e1 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 18 Jun 2014 17:49:59 -0700
Subject: More polish on Haddock reader/writer.

---
 src/Text/Pandoc/Readers/Haddock.hs | 46 +++++++++++++++++++++++++++++++++-----
 src/Text/Pandoc/Writers/Haddock.hs | 23 +++++--------------
 2 files changed, 47 insertions(+), 22 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index f184eabdb..a3dfb7c3c 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -45,7 +45,9 @@ docHToBlocks d' =
     DocString _ -> inlineFallback
     DocParagraph (DocHeader h) -> docHToBlocks (DocHeader h)
     DocParagraph (DocAName h) -> B.plain $ docHToInlines False $ DocAName h
-    DocParagraph ils -> B.para $ docHToInlines False ils
+    DocParagraph x -> let (ils, rest) = getInlines x
+                       in (B.para $ docHToInlines False ils)
+                          <> docHToBlocks rest
     DocIdentifier _ -> inlineFallback
     DocIdentifierUnchecked _ -> inlineFallback
     DocModule s -> B.plain $ docHToInlines False $ DocModule s
@@ -60,7 +62,7 @@ docHToBlocks d' =
     DocDefList items -> B.definitionList (map (\(d,t) ->
                                (docHToInlines False d,
                                 [consolidatePlains $ docHToBlocks t])) items)
-    DocCodeBlock (DocString s) -> B.codeBlockWith ("",["haskell"],[]) s
+    DocCodeBlock (DocString s) -> B.codeBlockWith ("",[],[]) s
     DocCodeBlock d -> B.para $ docHToInlines True d
     DocHyperlink _ -> inlineFallback
     DocPic _ -> inlineFallback
@@ -92,9 +94,9 @@ docHToInlines isCode d' =
                               $ map B.code $ splitBy (=='\n') s
       | otherwise  -> B.text s
     DocParagraph _ -> mempty
-    DocIdentifier (_,s,_) -> B.codeWith ("",["haskell"],[]) s
-    DocIdentifierUnchecked s -> B.codeWith ("",["haskell"],[]) s
-    DocModule s -> B.codeWith ("",["haskell"],[]) s
+    DocIdentifier (_,s,_) -> B.codeWith ("",["haskell","identifier"],[]) s
+    DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) s
+    DocModule s -> B.codeWith ("",["haskell","module"],[]) s
     DocWarning _ -> mempty -- TODO
     DocEmphasis d -> B.emph (docHToInlines isCode d)
     DocMonospaced (DocString s) -> B.code s
@@ -113,6 +115,40 @@ docHToInlines isCode d' =
     DocProperty _ -> mempty
     DocExamples _ -> mempty
 
+getInlines :: DocH String Identifier -> (DocH String Identifier, DocH String Identifier)
+getInlines (DocAppend x y) = if isInline x
+                                then let (a, b) = getInlines y
+                                     in  (DocAppend x a, b)
+                                else (DocEmpty, DocAppend x y)
+getInlines x = if isInline x
+                  then (x, DocEmpty)
+                  else (DocEmpty, x)
+
+isInline :: DocH String Identifier -> Bool
+isInline d' =
+  case d' of
+    DocEmpty -> True
+    DocAppend d1 _ -> isInline d1
+    DocString _ -> True
+    DocParagraph _ -> False
+    DocIdentifier _ -> True
+    DocIdentifierUnchecked _ -> True
+    DocModule _ -> True
+    DocWarning _ -> True
+    DocEmphasis _ -> True
+    DocMonospaced _ -> True
+    DocBold _ -> True
+    DocHeader _ -> False
+    DocUnorderedList _ -> False
+    DocOrderedList _ -> False
+    DocDefList _ -> False
+    DocCodeBlock _ -> False
+    DocHyperlink _ -> True
+    DocPic _ -> True
+    DocAName _ -> True
+    DocProperty _ -> False
+    DocExamples _ -> False
+
 -- | Create an 'Example', stripping superfluous characters as appropriate
 makeExample :: String -> String -> [String] -> Blocks
 makeExample prompt expression result =
diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs
index 4d6b8e69f..36f57c2b7 100644
--- a/src/Text/Pandoc/Writers/Haddock.hs
+++ b/src/Text/Pandoc/Writers/Haddock.hs
@@ -80,22 +80,11 @@ pandocToHaddock opts (Pandoc meta blocks) = do
 -- | Return haddock representation of notes.
 notesToHaddock :: WriterOptions -> [[Block]] -> State WriterState Doc
 notesToHaddock opts notes =
-  mapM (\(num, note) -> noteToHaddock opts num note) (zip [1..] notes) >>=
-  return . vsep
-
--- | Return haddock representation of a note.
-noteToHaddock :: WriterOptions -> Int -> [Block] -> State WriterState Doc
-noteToHaddock opts num blocks = do
-  contents  <- blockListToHaddock opts blocks
-  let num' = text $ writerIdentifierPrefix opts ++ show num
-  let marker = text "[" <> num' <> text "]"
-  let markerSize = 4 + offset num'
-  let spacer = case writerTabStop opts - markerSize of
-                     n | n > 0  -> text $ replicate n ' '
-                     _          -> text " "
-  return $ if isEnabled Ext_footnotes opts
-              then hang (writerTabStop opts) (marker <> spacer) contents
-              else marker <> spacer <> contents
+  if null notes
+     then return empty
+     else do
+       contents <- blockToHaddock opts $ OrderedList (1,DefaultStyle,DefaultDelim) notes
+       return $ text "#notes#" <> blankline <> contents
 
 -- | Escape special characters for Haddock.
 escapeString :: String -> String
@@ -354,4 +343,4 @@ inlineToHaddock opts (Note contents) = do
   modify (\st -> st{ stNotes = contents : stNotes st })
   st <- get
   let ref = text $ writerIdentifierPrefix opts ++ show (length $ stNotes st)
-  return $ "[" <> ref <> "]"
+  return $ "<#notes [" <> ref <> "]>"
-- 
cgit v1.2.3


From c4182b39ca009f02fc4e0768056d37d64b93df7c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 18 Jun 2014 18:08:41 -0700
Subject: Small fix to haddock "tables".

---
 src/Text/Pandoc/Writers/Haddock.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs
index 36f57c2b7..1939d3a6d 100644
--- a/src/Text/Pandoc/Writers/Haddock.hs
+++ b/src/Text/Pandoc/Writers/Haddock.hs
@@ -129,7 +129,7 @@ blockToHaddock opts (BlockQuote blocks) =
 -- Haddock doesn't have tables.  Use haddock tables in code.
 blockToHaddock opts (Table caption aligns widths headers rows) = do
   caption' <- inlineListToHaddock opts caption
-  let caption'' = if null caption || not (isEnabled Ext_table_captions opts)
+  let caption'' = if null caption
                      then empty
                      else blankline <> caption' <> blankline
   rawHeaders <- mapM (blockListToHaddock opts) headers
@@ -148,7 +148,7 @@ blockToHaddock opts (Table caption aligns widths headers rows) = do
                   | otherwise -> fmap (id,) $
                          gridTable opts (all null headers) aligns widths
                              rawHeaders rawRows
-  return $ prefixed "> " $ nst $ tbl $$ blankline $$ caption'' $$ blankline
+  return $ (prefixed "> " $ nst $ tbl $$ blankline $$ caption'') $$ blankline
 blockToHaddock opts (BulletList items) = do
   contents <- mapM (bulletListItemToHaddock opts) items
   return $ cat contents <> blankline
-- 
cgit v1.2.3


From de7b3a3d08264d6aa755436583d53f9a61252fa2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 18 Jun 2014 18:11:01 -0700
Subject: Haddock writer:  Only use Decimal list style.

---
 src/Text/Pandoc/Writers/Haddock.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs
index 1939d3a6d..59d979ea8 100644
--- a/src/Text/Pandoc/Writers/Haddock.hs
+++ b/src/Text/Pandoc/Writers/Haddock.hs
@@ -152,8 +152,8 @@ blockToHaddock opts (Table caption aligns widths headers rows) = do
 blockToHaddock opts (BulletList items) = do
   contents <- mapM (bulletListItemToHaddock opts) items
   return $ cat contents <> blankline
-blockToHaddock opts (OrderedList (start,sty,delim) items) = do
-  let attribs = (start, sty, delim)
+blockToHaddock opts (OrderedList (start,_,delim) items) = do
+  let attribs = (start, Decimal, delim)
   let markers  = orderedListMarkers attribs
   let markers' = map (\m -> if length m < 3
                                then m ++ replicate (3 - length m) ' '
-- 
cgit v1.2.3


From 00281559bf9c955ece6b18d48ef487fdc5f4406e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 19 Jun 2014 00:28:23 -0700
Subject: Haddock writer:  Use _____ for hrule.

Avoids interpretation as list.
---
 src/Text/Pandoc/Writers/Haddock.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs
index 59d979ea8..1c82839d0 100644
--- a/src/Text/Pandoc/Writers/Haddock.hs
+++ b/src/Text/Pandoc/Writers/Haddock.hs
@@ -112,8 +112,8 @@ blockToHaddock _ (RawBlock f str)
   | f == "haddock" = do
       return $ text str <> text "\n"
   | otherwise = return empty
-blockToHaddock _ HorizontalRule =
-  return $ blankline <> text "--------------" <> blankline
+blockToHaddock opts HorizontalRule =
+  return $ blankline <> text (replicate (writerColumns opts) '_') <> blankline
 blockToHaddock opts (Header level (ident,_,_) inlines) = do
   contents <- inlineListToHaddock opts inlines
   let attr' = if null ident
-- 
cgit v1.2.3


From 5cb53a48d541b97b5f60968715a5969133196d70 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 19 Jun 2014 14:30:03 -0700
Subject: ImageSize:  ignore unknown exif header tag rather than crashing.

Some images seem to have tag type of 256, which was causing
a runtime error.
---
 src/Text/Pandoc/ImageSize.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index a6d076fa9..9e6b457c0 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -217,7 +217,7 @@ exifHeader hdr = do
   numentries <- getWord16
   let ifdEntry = do
        tag <- getWord16 >>= \t ->
-                maybe (fail $ "Unknown tag type " ++ show t) return
+                maybe (return UnknownTagType) return
                 (M.lookup t tagTypeTable)
        dataFormat <- getWord16
        numComponents <- getWord32
@@ -337,6 +337,7 @@ data TagType = ImageDescription
              | SensingMethod
              | FileSource
              | SceneType
+             | UnknownTagType
              deriving (Show, Eq, Ord)
 
 tagTypeTable :: M.Map Word16 TagType
-- 
cgit v1.2.3


From 86fc44d6b3f82a2b274d4b592d1dd6152bd1eaf5 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Thu, 19 Jun 2014 17:53:52 -0400
Subject: Add literal tabs to parser.

---
 src/Text/Pandoc/Readers/Docx/Parse.hs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 22e9dd909..18200bcf9 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -466,7 +466,7 @@ data Run = Run RunStyle [RunElem]
          | Endnote String
            deriving Show
 
-data RunElem = TextRun String | LnBrk
+data RunElem = TextRun String | LnBrk | Tab
              deriving Show
 
 data RunStyle = RunStyle { isBold :: Bool
@@ -545,6 +545,9 @@ elemToRunElem ns element
   | qName (elName element) == "br" &&
     qURI (elName element) == (lookup "w" ns) =
       Just $ LnBrk
+  | qName (elName element) == "tab" &&
+    qURI (elName element) == (lookup "w" ns) =
+      Just $ Tab
   | otherwise = Nothing
 
 
-- 
cgit v1.2.3


From 0e7d2dbd4304902cb6c6d4e9618592b5148dc598 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Thu, 19 Jun 2014 17:55:02 -0400
Subject: Have Docx reader properly interpret tabs.

---
 src/Text/Pandoc/Readers/Docx.hs | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 4035cde99..c43879ed9 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -134,10 +134,12 @@ codeDivs = ["SourceCode"]
 runElemToInlines :: RunElem -> [Inline]
 runElemToInlines (TextRun s) = strToInlines s
 runElemToInlines (LnBrk) = [LineBreak]
+runElemToInlines (Tab) = [Space]
 
 runElemToString :: RunElem -> String
 runElemToString (TextRun s) = s
 runElemToString (LnBrk) = ['\n']
+runElemToString (Tab) = ['\t']
 
 runElemsToString :: [RunElem] -> String
 runElemsToString = concatMap runElemToString
-- 
cgit v1.2.3


From a934db9a320ec76e15e62954b75d0e8d2d972244 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Thu, 19 Jun 2014 19:28:55 -0400
Subject: Introduce blockNormalize

This will help take care of spaces introduced at the beginning of strings.
---
 src/Text/Pandoc/Readers/Docx.hs | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index c43879ed9..9c1d0c5e6 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -150,6 +150,19 @@ strNormalize (Str "" : ils) = strNormalize ils
 strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l)
 strNormalize (il:ils) = il : (strNormalize ils)
 
+blockNormalize :: Block -> Block
+blockNormalize (Plain (Space : ils)) = blockNormalize (Plain ils)
+blockNormalize (Plain ils) = Plain $ strNormalize ils
+blockNormalize (Para (Space : ils)) = blockNormalize (Para ils)
+blockNormalize (Para ils) = Para $ strNormalize ils
+blockNormalize (Header n attr (Space : ils)) =
+  blockNormalize $ Header n attr ils
+blockNormalize (Table (Space : ils) align width hdr cells) =
+  blockNormalize $ Table ils align width hdr cells
+blockNormalize (Table ils align width hdr cells) =
+  Table (strNormalize ils) align width hdr cells
+blockNormalize blk = blk
+
 runToInlines :: ReaderOptions -> Docx -> Run -> [Inline]
 runToInlines _ _ (Run rs runElems)
   | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans =
@@ -296,7 +309,7 @@ makeImagesSelfContained _ inline = inline
 bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block]
 bodyToBlocks opts docx (Body bps) =
   bottomUp removeEmptyPars $
-  bottomUp strNormalize $
+  bottomUp blockNormalize $
   bottomUp spanRemove $
   bottomUp divRemove $
   map (makeHeaderAnchors) $
-- 
cgit v1.2.3


From 3c059dbe600608f4166b02c63d7153ace3156665 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 19 Jun 2014 23:24:28 -0700
Subject: HTML reader:  Allow space between `<col>` and `</col>`.

Test case:

```
<table border="1">
  <colgroup>
    <col> </col>
    <col></col>
  </colgroup>
  <tbody>
    <tr>
        <td>X</td>
        <td>Y</td>
    </tr>
    <tr>
        <td>1</td>
        <td>2</td>
    </tr>
  </tbody>
</table>

```
---
 src/Text/Pandoc/Readers/HTML.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index d27afc543..204239923 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -262,6 +262,7 @@ pTable = try $ do
 pCol :: TagParser Double
 pCol = try $ do
   TagOpen _ attribs <- pSatisfy (~== TagOpen "col" [])
+  skipMany pBlank
   optional $ pSatisfy (~== TagClose "col")
   skipMany pBlank
   return $ case lookup "width" attribs of
-- 
cgit v1.2.3


From 557b302731411057cf12e62c87d98752f713d5d0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 19 Jun 2014 23:31:17 -0700
Subject: Docx writer:  Use Compact style for empty table cells.

Otherwise we get overly tall lines when there are empty
table cells and the other cells are compact.

Closes #1353.
---
 src/Text/Pandoc/Writers/Docx.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 4e64a79df..31e64f14e 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -571,10 +571,12 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do
                     [ mknode "w:tcBorders" []
                       $ mknode "w:bottom" [("w:val","single")] ()
                     , mknode "w:vAlign" [("w:val","bottom")] () ]
+  let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] $
+                    [mknode "w:pStyle" [("w:val","Compact")] ()]]]
   let mkcell border contents = mknode "w:tc" []
                             $ [ borderProps | border ] ++
                             if null contents
-                               then [mknode "w:p" [] ()]
+                               then emptyCell
                                else contents
   let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells
   let textwidth = 7920  -- 5.5 in in twips, 1/20 pt
-- 
cgit v1.2.3


From 7fd48b30e0a6e1e3c02a2b66c76118d10c02636f Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Fri, 20 Jun 2014 09:30:30 -0400
Subject: Docx reader: Fix hdr handling in block norm

`blockNormalize` previously forgot to account for the case in which a
Header's inlines did not start with a space.
---
 src/Text/Pandoc/Readers/Docx.hs | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 9c1d0c5e6..84d50a396 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -157,6 +157,8 @@ blockNormalize (Para (Space : ils)) = blockNormalize (Para ils)
 blockNormalize (Para ils) = Para $ strNormalize ils
 blockNormalize (Header n attr (Space : ils)) =
   blockNormalize $ Header n attr ils
+blockNormalize (Header n attr ils) =
+  Header n attr $ strNormalize ils
 blockNormalize (Table (Space : ils) align width hdr cells) =
   blockNormalize $ Table ils align width hdr cells
 blockNormalize (Table ils align width hdr cells) =
-- 
cgit v1.2.3


From 3da515bdb005cf16589b88d80aa4a8a71760e366 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Fri, 20 Jun 2014 10:12:28 -0400
Subject: Docx reader: simplify blockNormalize

Use a function `stripSpaces`, instead of recursion. Makes it a bit
easier to read and mantain, and simplify normalizing DefinitionList,
which was left out the first time.
---
 src/Text/Pandoc/Readers/Docx.hs | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 84d50a396..08afc94e6 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -150,19 +150,17 @@ strNormalize (Str "" : ils) = strNormalize ils
 strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l)
 strNormalize (il:ils) = il : (strNormalize ils)
 
+stripSpaces :: [Inline] -> [Inline]
+stripSpaces ils =
+  reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils
+
 blockNormalize :: Block -> Block
-blockNormalize (Plain (Space : ils)) = blockNormalize (Plain ils)
-blockNormalize (Plain ils) = Plain $ strNormalize ils
-blockNormalize (Para (Space : ils)) = blockNormalize (Para ils)
-blockNormalize (Para ils) = Para $ strNormalize ils
-blockNormalize (Header n attr (Space : ils)) =
-  blockNormalize $ Header n attr ils
+blockNormalize (Plain ils) = Plain $ strNormalize $ stripSpaces ils
+blockNormalize (Para ils) = Para $ strNormalize $ stripSpaces ils
 blockNormalize (Header n attr ils) =
-  Header n attr $ strNormalize ils
-blockNormalize (Table (Space : ils) align width hdr cells) =
-  blockNormalize $ Table ils align width hdr cells
+  Header n attr $ strNormalize $ stripSpaces ils
 blockNormalize (Table ils align width hdr cells) =
-  Table (strNormalize ils) align width hdr cells
+  Table (strNormalize $ stripSpaces ils) align width hdr cells
 blockNormalize blk = blk
 
 runToInlines :: ReaderOptions -> Docx -> Run -> [Inline]
-- 
cgit v1.2.3


From 03af19a7e12ff3a7f0a396ebed73c6c17f12ad07 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Fri, 20 Jun 2014 10:16:32 -0400
Subject: Docx Reader: Normalize DefinitionLists

Previously DefinitionList had been left out of `blockNormalize`. Now it
is included.
---
 src/Text/Pandoc/Readers/Docx.hs | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 08afc94e6..e62cf6f0e 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -161,6 +161,8 @@ blockNormalize (Header n attr ils) =
   Header n attr $ strNormalize $ stripSpaces ils
 blockNormalize (Table ils align width hdr cells) =
   Table (strNormalize $ stripSpaces ils) align width hdr cells
+blockNormalize (DefinitionList pairs) =
+  DefinitionList $ map (\(ils, blklsts) -> (strNormalize (stripSpaces ils), blklsts)) pairs
 blockNormalize blk = blk
 
 runToInlines :: ReaderOptions -> Docx -> Run -> [Inline]
-- 
cgit v1.2.3


From 2aa5f58c5b82dd5750e1bf5f30e1936d132104ac Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Fri, 20 Jun 2014 10:27:18 -0400
Subject: Docx reader: Add a comment explaining strNormalize

`normalize` from Text.Pandoc.Shared is more general. In tests, though,
it more than doubles the run time. `strNormalize` does less, but it does
what we need. This comment is added for future maintainability.
---
 src/Text/Pandoc/Readers/Docx.hs | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index e62cf6f0e..8a8bc46a6 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -144,6 +144,10 @@ runElemToString (Tab) = ['\t']
 runElemsToString :: [RunElem] -> String
 runElemsToString = concatMap runElemToString
 
+--- We use this instead of the more general
+--- Text.Pandoc.Shared.normalize for reasons of efficiency. For
+--- whatever reason, `normalize` makes a run take almost twice as
+--- long. (It does more, but this does what we need)
 strNormalize :: [Inline] -> [Inline]
 strNormalize [] = []
 strNormalize (Str "" : ils) = strNormalize ils
-- 
cgit v1.2.3


From f6ae644831aa40f7199ecfc9a1631880639b6312 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Fri, 20 Jun 2014 12:31:36 -0400
Subject: Make strNormalize go bottomUp.

This was how it used to be before it was folded into blockNormalize.
---
 src/Text/Pandoc/Readers/Docx.hs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 8a8bc46a6..479a88161 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -159,14 +159,14 @@ stripSpaces ils =
   reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils
 
 blockNormalize :: Block -> Block
-blockNormalize (Plain ils) = Plain $ strNormalize $ stripSpaces ils
-blockNormalize (Para ils) = Para $ strNormalize $ stripSpaces ils
+blockNormalize (Plain ils) = Plain $ bottomUp strNormalize $ stripSpaces ils
+blockNormalize (Para ils) = Para $ bottomUp strNormalize $ stripSpaces ils
 blockNormalize (Header n attr ils) =
-  Header n attr $ strNormalize $ stripSpaces ils
+  Header n attr $ bottomUp strNormalize $ stripSpaces ils
 blockNormalize (Table ils align width hdr cells) =
-  Table (strNormalize $ stripSpaces ils) align width hdr cells
+  Table (bottomUp strNormalize $ stripSpaces ils) align width hdr cells
 blockNormalize (DefinitionList pairs) =
-  DefinitionList $ map (\(ils, blklsts) -> (strNormalize (stripSpaces ils), blklsts)) pairs
+  DefinitionList $ map (\(ils, blklsts) -> (bottomUp strNormalize (stripSpaces ils), blklsts)) pairs
 blockNormalize blk = blk
 
 runToInlines :: ReaderOptions -> Docx -> Run -> [Inline]
-- 
cgit v1.2.3


From 12efffa85a257dbe81137f97334b2c6a7e072777 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 20 Jun 2014 10:24:30 -0700
Subject: LaTeX writer: Fixed strikeout + highlighted code. Closes #1294.

Previously strikeout highlighted code caused an error.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index c221b318e..ed735242f 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -678,7 +678,9 @@ inlineToLaTeX (Emph lst) =
 inlineToLaTeX (Strong lst) =
   inlineListToLaTeX lst >>= return . inCmd "textbf"
 inlineToLaTeX (Strikeout lst) = do
-  contents <- inlineListToLaTeX lst
+  -- we need to protect VERB in an mbox or we get an error
+  -- see #1294
+  contents <- inlineListToLaTeX $ protectCode lst
   modify $ \s -> s{ stStrikeout = True }
   return $ inCmd "sout" contents
 inlineToLaTeX (Superscript lst) =
@@ -784,6 +786,13 @@ inlineToLaTeX (Note contents) = do
        -- note: a \n before } needed when note ends with a Verbatim environment
        else "\\footnote" <> braces noteContents
 
+protectCode :: [Inline] -> [Inline]
+protectCode [] = []
+protectCode (x@(Code ("",[],[]) _) : xs) = x : protectCode xs
+protectCode (x@(Code _ _) : xs) = ltx "\\mbox{" : x : ltx "}" : xs
+  where ltx = RawInline (Format "latex")
+protectCode (x : xs) = x : protectCode xs
+
 citationsToNatbib :: [Citation] -> State WriterState Doc
 citationsToNatbib (one:[])
   = citeCommand c p s k
-- 
cgit v1.2.3


From cab4b829b3683cec1def11d7189b5a850f23b016 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 20 Jun 2014 10:39:24 -0700
Subject: Support --trace in HTML reader.

---
 src/Text/Pandoc/Readers/HTML.hs | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 204239923..7bbad4257 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -50,6 +50,8 @@ import Data.Char ( isDigit )
 import Control.Monad ( liftM, guard, when, mzero )
 import Control.Applicative ( (<$>), (<$), (<*) )
 import Data.Monoid
+import Text.Printf (printf)
+import Debug.Trace (trace)
 
 isSpace :: Char -> Bool
 isSpace ' '  = True
@@ -92,7 +94,10 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag)
                return mempty
 
 block :: TagParser Blocks
-block = choice
+block = do
+  tr <- getOption readerTrace
+  pos <- getPosition
+  res <- choice
             [ pPara
             , pHeader
             , pBlockQuote
@@ -106,6 +111,10 @@ block = choice
             , pDiv
             , pRawHtmlBlock
             ]
+  when tr $ trace (printf "line %d: %s" (sourceLine pos)
+             (take 60 $ show $ B.toList res)) (return ())
+  return res
+
 
 pList :: TagParser Blocks
 pList = pBulletList <|> pOrderedList <|> pDefinitionList
-- 
cgit v1.2.3


From b3b40546cb5ad00ee6fadcd83bcc38854fb137ae Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 20 Jun 2014 10:47:29 -0700
Subject: HTML reader:  Fix performance issue with malformed HTML tables.

We let a `</table>` tag close an open `<tr>` or `<td>`.
Closes #1167.
---
 src/Text/Pandoc/Readers/HTML.hs | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 7bbad4257..552e8a251 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -471,6 +471,8 @@ pCloses tagtype = try $ do
        (TagClose "ul") | tagtype == "li" -> return ()
        (TagClose "ol") | tagtype == "li" -> return ()
        (TagClose "dl") | tagtype == "li" -> return ()
+       (TagClose "table") | tagtype == "td" -> return ()
+       (TagClose "table") | tagtype == "tr" -> return ()
        _ -> mzero
 
 pTagText :: TagParser Inlines
-- 
cgit v1.2.3


From 2eadc7805392c165f7286bd9a337b310b41c897d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 20 Jun 2014 10:58:26 -0700
Subject: ImageSize:  Use default instead of failing if image size not found

in exif header.  Closes #1358.
---
 src/Text/Pandoc/ImageSize.hs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index 9e6b457c0..68b34dcf3 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -76,6 +76,9 @@ imageSize img = do
        Eps  -> epsSize img
        Pdf  -> Nothing  -- TODO
 
+defaultSize :: (Integer, Integer)
+defaultSize = (72, 72)
+
 sizeInPixels :: ImageSize -> (Integer, Integer)
 sizeInPixels s = (pxX s, pxY s)
 
@@ -260,7 +263,9 @@ exifHeader hdr = do
                            lookup ExifImageHeight allentries) of
                           (Just (UnsignedLong w), Just (UnsignedLong h)) ->
                             return (fromIntegral w, fromIntegral h)
-                          _ -> fail "Could not determine image width, height"
+                          _ -> return defaultSize
+                               -- we return a default width and height when
+                               -- the exif header doesn't contain these
   let resfactor = case lookup ResolutionUnit allentries of
                         Just (UnsignedShort 1) -> (100 / 254)
                         _ -> 1
-- 
cgit v1.2.3


From 56c410ef6a3b7c3dd4054b18ea667cb4ab5a5ee0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 20 Jun 2014 11:10:35 -0700
Subject: Markdown reader: Prevent spurious line breaks after list items.

When the `hard_line_breaks` option was specified, pandoc would
produce a spurious line break after a tight list item.  This
patch solves the problem.  Closes #1137.
---
 src/Text/Pandoc/Readers/Markdown.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index a6720beba..2c9064994 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1560,7 +1560,8 @@ endline = try $ do
   guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header
   guardDisabled Ext_backtick_code_blocks <|>
      notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
-  (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
+  (eof >> return mempty)
+    <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
     <|> (guardEnabled Ext_ignore_line_breaks >> return mempty)
     <|> (return $ return B.space)
 
-- 
cgit v1.2.3


From d81b4358ea8d0da615c01a468196fd0592934af6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 20 Jun 2014 11:26:38 -0700
Subject: LaTeX writer:  Correctly handle figures in notes.

Notes can't contain figures in LaTeX, so we fake it to avoid
an error.  Closes #1053.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index ed735242f..f2f7438c4 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -311,12 +311,14 @@ blockToLaTeX (Plain lst) =
   inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst
 -- title beginning with fig: indicates that the image is a figure
 blockToLaTeX (Para [Image txt (src,'f':'i':'g':':':tit)]) = do
-  capt <- if null txt
-             then return empty
-             else (\c -> "\\caption" <> braces c) `fmap` inlineListToLaTeX txt
+  inNote <- gets stInNote
+  capt <- inlineListToLaTeX txt
   img <- inlineToLaTeX (Image txt (src,tit))
-  return $ "\\begin{figure}[htbp]" $$ "\\centering" $$ img $$
-           capt $$ "\\end{figure}"
+  return $ if inNote
+              -- can't have figures in notes
+              then "\\begin{center}" $$ img $+$ capt $$ "\\end{center}"
+              else "\\begin{figure}[htbp]" $$ "\\centering" $$ img $$
+                      ("\\caption" <> braces capt) $$ "\\end{figure}"
 -- . . . indicates pause in beamer slides
 blockToLaTeX (Para [Str ".",Space,Str ".",Space,Str "."]) = do
   beamer <- writerBeamer `fmap` gets stOptions
-- 
cgit v1.2.3


From 8f20ac3da3b0d1111f94161d5b3528dfa94d1069 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 20 Jun 2014 11:39:24 -0700
Subject: MediaWiki reader: Support --trace.

---
 src/Text/Pandoc/Readers/MediaWiki.hs | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index e4fabc898..eb9dd5b8a 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -55,6 +55,8 @@ import qualified Data.Foldable as F
 import qualified Data.Map as M
 import Data.Char (isDigit, isSpace)
 import Data.Maybe (fromMaybe)
+import Text.Printf (printf)
+import Debug.Trace (trace)
 
 -- | Read mediawiki from an input string and return a Pandoc document.
 readMediaWiki :: ReaderOptions -- ^ Reader options
@@ -187,7 +189,10 @@ parseMediaWiki = do
 --
 
 block :: MWParser Blocks
-block =  mempty <$ skipMany1 blankline
+block = do
+  tr <- getOption readerTrace
+  pos <- getPosition
+  res <- mempty <$ skipMany1 blankline
      <|> table
      <|> header
      <|> hrule
@@ -199,6 +204,10 @@ block =  mempty <$ skipMany1 blankline
      <|> blockTag
      <|> (B.rawBlock "mediawiki" <$> template)
      <|> para
+  when tr $
+    trace (printf "line %d: %s" (sourceLine pos)
+           (take 60 $ show $ B.toList res)) (return ())
+  return res
 
 para :: MWParser Blocks
 para = do
-- 
cgit v1.2.3


From d397a66107a932e702d0f9cbba5df3ce09be25fd Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 20 Jun 2014 12:00:26 -0700
Subject: MediaWiki reader: Tightened up template parsing.

The opening "{{" must be followed by an alphanumeric or ':'.
This prevents the exponential slowdown in #1033.
Closes #1033.
---
 src/Text/Pandoc/Readers/MediaWiki.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index eb9dd5b8a..f1dcce8f7 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -317,6 +317,7 @@ template :: MWParser String
 template = try $ do
   string "{{"
   notFollowedBy (char '{')
+  lookAhead $ letter <|> digit <|> char ':'
   let chunk = template <|> variable <|> many1 (noneOf "{}") <|> count 1 anyChar
   contents <- manyTill chunk (try $ string "}}")
   return $ "{{" ++ concat contents ++ "}}"
-- 
cgit v1.2.3


From 5d0103606f469a6336beb2f7a90f210dd29485bd Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 20 Jun 2014 15:24:24 -0700
Subject: Markdown reader:  Support smallcaps through span.

`<span style="font-variant:small-caps;">foo</span>` will be
parsed as a `SmallCaps` inline, and will work in all output
formats that support small caps.

Closes #1360.
---
 src/Text/Pandoc/Readers/Markdown.hs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 2c9064994..6c710c8ff 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1736,7 +1736,12 @@ spanHtml = try $ do
   let ident = fromMaybe "" $ lookup "id" attrs
   let classes = maybe [] words $ lookup "class" attrs
   let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
-  return $ B.spanWith (ident, classes, keyvals) <$> contents
+  case lookup "style" keyvals of
+       Just s | null ident && null classes &&
+            map toLower (filter (`notElem` " \t;") s) ==
+                 "font-variant:small-caps"
+         -> return $ B.smallcaps <$> contents
+       _ -> return $ B.spanWith (ident, classes, keyvals) <$> contents
 
 divHtml :: MarkdownParser (F Blocks)
 divHtml = try $ do
-- 
cgit v1.2.3


From 9c7e0dc84b2384347099827999f0e2f7be4f7e51 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Sat, 21 Jun 2014 17:53:45 -0400
Subject: Implement new normalization.

There were some problems with the old str normalization. This fixes those
problems. Also, since it drills down on its own, it only needs to be
mapped over the blocks, not walked over the tree.
---
 src/Text/Pandoc/Readers/Docx.hs | 68 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 57 insertions(+), 11 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 479a88161..299adf5a8 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -148,25 +148,71 @@ runElemsToString = concatMap runElemToString
 --- Text.Pandoc.Shared.normalize for reasons of efficiency. For
 --- whatever reason, `normalize` makes a run take almost twice as
 --- long. (It does more, but this does what we need)
-strNormalize :: [Inline] -> [Inline]
-strNormalize [] = []
-strNormalize (Str "" : ils) = strNormalize ils
-strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l)
-strNormalize (il:ils) = il : (strNormalize ils)
+inlineNormalize :: [Inline] -> [Inline]
+inlineNormalize [] = []
+inlineNormalize (Str "" : ils) = inlineNormalize ils
+inlineNormalize ((Str s) : (Str s') : l) =
+  inlineNormalize (Str (s++s') : l)
+inlineNormalize ((Emph ils) : (Emph ils') : l) =
+  inlineNormalize $ (Emph $ inlineNormalize (ils ++ ils')) : l
+inlineNormalize ((Emph ils) : l) =
+  Emph (inlineNormalize ils) : (inlineNormalize l)
+inlineNormalize ((Strong ils) : (Strong ils') : l) = 
+  inlineNormalize $ (Strong $ inlineNormalize (ils ++ ils')) : l
+inlineNormalize ((Strong ils) : l) =
+  Strong (inlineNormalize ils) : (inlineNormalize l)
+inlineNormalize ((Strikeout ils) : (Strikeout ils') : l) = 
+  inlineNormalize $ (Strikeout $ inlineNormalize (ils ++ ils')) : l
+inlineNormalize ((Strikeout ils) : l) =
+  Strikeout (inlineNormalize ils) : (inlineNormalize l)
+inlineNormalize ((Superscript ils) : (Superscript ils') : l) = 
+  inlineNormalize $ (Superscript $ inlineNormalize (ils ++ ils')) : l
+inlineNormalize ((Superscript ils) : l) =
+  Superscript (inlineNormalize ils) : (inlineNormalize l)
+inlineNormalize ((Subscript ils) : (Subscript ils') : l) = 
+  inlineNormalize $ (Subscript $ inlineNormalize (ils ++ ils')) : l
+inlineNormalize ((Subscript ils) : l) =
+  Subscript (inlineNormalize ils) : (inlineNormalize l)
+inlineNormalize ((Space : Space : l)) =
+  inlineNormalize $ (Space : l)
+inlineNormalize ((Quoted qt ils) : l) =
+  Quoted qt (inlineNormalize ils) : inlineNormalize l
+inlineNormalize ((Cite cits ils) : l) =
+  let
+    f :: Citation -> Citation
+    f (Citation s pref suff mode num hash) =
+      Citation s (inlineNormalize pref) (inlineNormalize suff) mode num hash
+  in
+   Cite (map f cits) (inlineNormalize ils) : (inlineNormalize l)
+inlineNormalize ((Link ils s) : l) =
+  Link (inlineNormalize ils) s : (inlineNormalize l)
+inlineNormalize ((Image ils s) : l) =
+  Image (inlineNormalize ils) s : (inlineNormalize l)
+inlineNormalize ((Note blks) : l) =
+  Note (map blockNormalize blks) : (inlineNormalize l)
+inlineNormalize ((Span attr ils) : l) =
+  Span attr (inlineNormalize ils) : (inlineNormalize l)
+inlineNormalize (il : l) = il : (inlineNormalize l)
 
 stripSpaces :: [Inline] -> [Inline]
 stripSpaces ils =
   reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils
 
 blockNormalize :: Block -> Block
-blockNormalize (Plain ils) = Plain $ bottomUp strNormalize $ stripSpaces ils
-blockNormalize (Para ils) = Para $ bottomUp strNormalize $ stripSpaces ils
+blockNormalize (Plain ils) = Plain $ stripSpaces $ inlineNormalize ils
+blockNormalize (Para ils) = Para $ stripSpaces $ inlineNormalize ils
 blockNormalize (Header n attr ils) =
-  Header n attr $ bottomUp strNormalize $ stripSpaces ils
+  Header n attr $ stripSpaces $ inlineNormalize ils
 blockNormalize (Table ils align width hdr cells) =
-  Table (bottomUp strNormalize $ stripSpaces ils) align width hdr cells
+  Table (stripSpaces $ inlineNormalize ils) align width hdr cells
 blockNormalize (DefinitionList pairs) =
-  DefinitionList $ map (\(ils, blklsts) -> (bottomUp strNormalize (stripSpaces ils), blklsts)) pairs
+  DefinitionList $ map (\(ils, blklsts) -> (stripSpaces (inlineNormalize ils), (map (map blockNormalize) blklsts))) pairs
+blockNormalize (BlockQuote blks) = BlockQuote (map blockNormalize blks)
+blockNormalize (OrderedList attr blkslst) =
+  OrderedList attr $ map (\blks -> map blockNormalize blks) blkslst
+blockNormalize (BulletList blkslst) =
+  BulletList $ map (\blks -> map blockNormalize blks) blkslst
+blockNormalize (Div attr blks) = Div attr (map blockNormalize blks)
 blockNormalize blk = blk
 
 runToInlines :: ReaderOptions -> Docx -> Run -> [Inline]
@@ -315,7 +361,7 @@ makeImagesSelfContained _ inline = inline
 bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block]
 bodyToBlocks opts docx (Body bps) =
   bottomUp removeEmptyPars $
-  bottomUp blockNormalize $
+  map blockNormalize $
   bottomUp spanRemove $
   bottomUp divRemove $
   map (makeHeaderAnchors) $
-- 
cgit v1.2.3


From 8e5bd9d851aa0f60462015f61e3980b134e3c131 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Sun, 22 Jun 2014 01:47:11 -0400
Subject: Docx reader: Fix spacing in formatting.

The normalizing tests revealed a problem with unformatted spaces, brought about
by `spanTrim`. This fixes by not trimming the spaces out of spans until they
are in their final form.
---
 src/Text/Pandoc/Readers/Docx.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 299adf5a8..09c2330fb 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -286,8 +286,8 @@ parPartsToInlines opts docx parparts =
   (if False -- TODO depend on option
       then bottomUp (makeImagesSelfContained docx)
       else id) $
-  bottomUp spanCorrect $
   bottomUp spanTrim $
+  bottomUp spanCorrect $
   bottomUp spanReduce $
   concatMap (parPartToInlines opts docx) parparts
 
-- 
cgit v1.2.3


From e03ed7377cfd3d64d65f186aa76b17417a4e4fde Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 23 Jun 2014 12:41:47 -0700
Subject: Markdown reader: Combine consecutive latex environments.

This helps when you have two minipages which can't have
blank lines between them.

See #690, #1196.
---
 src/Text/Pandoc/Readers/Markdown.hs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 6c710c8ff..690256224 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -939,8 +939,10 @@ rawVerbatimBlock = try $ do
 rawTeXBlock :: MarkdownParser (F Blocks)
 rawTeXBlock = do
   guardEnabled Ext_raw_tex
-  result <- (B.rawBlock "latex" <$> rawLaTeXBlock)
-        <|> (B.rawBlock "context" <$> rawConTeXtEnvironment)
+  result <- (B.rawBlock "latex" . concat <$>
+                  rawLaTeXBlock `sepEndBy1` blankline)
+        <|> (B.rawBlock "context" . concat <$>
+                  rawConTeXtEnvironment `sepEndBy1` blankline)
   spaces
   return $ return result
 
-- 
cgit v1.2.3


From 87ab01637e1dc0f583277828bc458567a72e38ce Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 23 Jun 2014 12:51:10 -0700
Subject: LaTeX writer: Use `\textquotesingle` for `'` in inline code.

Otherwise we get curly quotes in the PDF output.
Closes #1364.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index f2f7438c4..100bf900d 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -220,6 +220,7 @@ stringToLaTeX  ctx (x:xs) = do
        '>' -> "\\textgreater{}" ++ rest
        '[' -> "{[}" ++ rest  -- to avoid interpretation as
        ']' -> "{]}" ++ rest  -- optional arguments
+       '\'' | ctx == CodeString -> "\\textquotesingle{}" ++ rest
        '\160' -> "~" ++ rest
        '\x2026' -> "\\ldots{}" ++ rest
        '\x2018' | ligatures -> "`" ++ rest
-- 
cgit v1.2.3


From ef5fad2698f3d4c1fe528f138264cc8abb3b2943 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Mon, 23 Jun 2014 15:25:46 -0400
Subject: Add new typeclass, Reducible

This defines a typeclass `Reducible` which allows us to "reduce" pandoc
Inlines and Blocks, like so

    Emph [Strong [Str "foo", Space]] <++> Strong [Emph [Str "bar"]], Str
"baz"] =
        [Strong [Emph [Str "foo", Space, Str "bar"], Space, Str "baz"]]

So adjacent formattings and strings are appropriately grouped.

Another set of operators for `(Reducible a) => (Many a)` are also
included.
---
 src/Text/Pandoc/Readers/Docx/Reducible.hs | 150 ++++++++++++++++++++++++++++++
 1 file changed, 150 insertions(+)
 create mode 100644 src/Text/Pandoc/Readers/Docx/Reducible.hs

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs
new file mode 100644
index 000000000..1ed31ebd0
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs
@@ -0,0 +1,150 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module Text.Pandoc.Readers.Docx.Reducible ((<++>),
+                                           (<+++>),
+                                           Reducible,
+                                           Container(..),
+                                           container,
+                                           innards,
+                                           reduceList,
+                                           reduceListB,
+                                           rebuild)
+       where
+
+import Text.Pandoc.Builder
+import Data.List ((\\), intersect)
+
+data Container a = Container ([a] -> a) | NullContainer
+
+instance (Eq a) => Eq (Container a) where
+  (Container x) == (Container y) = ((x []) == (y []))
+  NullContainer == NullContainer = True
+  _ == _ = False
+
+instance (Show a) => Show (Container a) where
+  show (Container x) = "Container {" ++
+                       (reverse $ drop 3 $ reverse $ show $ x []) ++
+                       "}"
+  show (NullContainer) = "NullContainer"
+
+class Reducible a where
+  (<++>) :: a -> a -> [a]
+  container :: a -> Container a
+  innards :: a -> [a]
+  isSpace :: a -> Bool
+
+(<+++>) :: (Reducible a) => Many a -> Many a -> Many a
+mr <+++> ms = fromList $ reduceList $ toList mr ++ toList ms
+
+reduceListB :: (Reducible a) => Many a -> Many a
+reduceListB = fromList . reduceList . toList
+
+reduceList' :: (Reducible a) => [a] -> [a] -> [a]
+reduceList' acc [] = acc
+reduceList' [] (x:xs) = reduceList' [x] xs
+reduceList' as (x:xs) = reduceList' (init as ++ (last as <++> x) ) xs
+
+reduceList :: (Reducible a) => [a] -> [a]
+reduceList = reduceList' []
+
+combineReducibles :: (Reducible a, Eq a) => a -> a -> [a]
+combineReducibles r s =
+  let (conts, rs) = topLevelContainers r
+      (conts', ss) = topLevelContainers s
+      shared = conts `intersect` conts'
+      remaining = conts \\ shared
+      remaining' = conts' \\ shared
+  in
+   case null shared of
+       True -> case (not . null) rs && isSpace (last rs) of
+         True -> rebuild conts (init rs) ++ [last rs, s]
+         False -> [r,s]
+       False -> rebuild 
+                shared $
+                reduceList $
+                (rebuild remaining rs) ++ (rebuild remaining' ss)
+
+instance Reducible Inline where
+  s1@(Span (id1, classes1, kvs1) ils1) <++> s2@(Span (id2, classes2, kvs2) ils2) =
+    let classes'  = classes1 `intersect` classes2
+        kvs'      = kvs1 `intersect` kvs2
+        classes1' = classes1 \\ classes'
+        kvs1'     = kvs1 \\ kvs'
+        classes2' = classes2 \\ classes'
+        kvs2'     = kvs2 \\ kvs'
+    in
+     case null classes' && null kvs' of
+       True -> [s1,s2]
+       False -> let attr'  = ("", classes', kvs')
+                    attr1' = (id1, classes1', kvs1')
+                    attr2' = (id2, classes2', kvs2')
+                    s1' = case null classes1' && null kvs1' of
+                      True -> ils1
+                      False -> [Span attr1' ils1]
+                    s2' = case null classes2' && null kvs2' of
+                      True -> ils2
+                      False -> [Span attr2' ils2]
+                in
+                 [Span attr' $ reduceList $ s1' ++ s2']
+
+  (Str x) <++> (Str y) = [Str (x++y)]
+  il <++> il' = combineReducibles il il'
+
+  container (Emph _) = Container Emph
+  container (Strong _) = Container Strong
+  container (Strikeout _) = Container Strikeout
+  container (Subscript _) = Container Subscript
+  container (Superscript _) = Container Superscript
+  container (Quoted qt _) = Container $ Quoted qt
+  container (Cite cs _) = Container $ Cite cs
+  container (Span attr _) = Container $ Span attr
+  container _ = NullContainer
+
+  innards (Emph ils) = ils
+  innards (Strong ils) = ils
+  innards (Strikeout ils) = ils
+  innards (Subscript ils) = ils
+  innards (Superscript ils) = ils
+  innards (Quoted _ ils) = ils
+  innards (Cite _ ils) = ils
+  innards (Span _ ils) = ils
+  innards _ = []
+
+  isSpace Space = True
+  isSpace _     = False
+
+instance Reducible Block where
+  (Div (ident, classes, kvs) blks) <++> blk | "list-item" `elem` classes = 
+    [Div (ident, classes, kvs) (reduceList blks), blk]
+
+  blk <++> blk' = combineReducibles blk blk'
+
+  container (BlockQuote _) = Container BlockQuote
+  container (Div attr _) = Container $ Div attr
+  container _            = NullContainer
+
+  innards (BlockQuote bs) = bs
+  innards (Div _ bs) = bs
+  innards _          = []
+
+  isSpace _          = False
+
+
+topLevelContainers' :: (Reducible a) => [a] -> ([Container a], [a])
+topLevelContainers' (r : []) = case container r of
+  NullContainer -> ([], [r])
+  _             ->
+    let (conts, inns) = topLevelContainers' (innards r)
+    in
+    ((container r) : conts, inns)
+topLevelContainers' rs = ([], rs)
+
+topLevelContainers :: (Reducible a) => a -> ([Container a], [a])
+topLevelContainers il = topLevelContainers' [il]
+
+rebuild :: [Container a] -> [a] -> [a]
+rebuild [] xs = xs
+rebuild ((Container f) : cs) xs = rebuild cs $ [f xs]
+rebuild (NullContainer : cs) xs = rebuild cs $ xs
+  
+  
-- 
cgit v1.2.3


From 94d0fb15382a4855938c540c9e521642bccc00e3 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Mon, 23 Jun 2014 15:27:01 -0400
Subject: Move some of the clean-up logic into List module.

This will allow us to get rid of more general functions we no longer need in
the main reader.
---
 src/Text/Pandoc/Readers/Docx/Lists.hs | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
index 68559d98b..1e37d0076 100644
--- a/src/Text/Pandoc/Readers/Docx/Lists.hs
+++ b/src/Text/Pandoc/Readers/Docx/Lists.hs
@@ -29,9 +29,12 @@ Functions for converting flat docx paragraphs into nested lists.
 -}
 
 module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets
-                                      , blocksToDefinitions) where
+                                      , blocksToDefinitions
+                                      , listParagraphDivs
+                                      ) where
 
 import Text.Pandoc.JSON
+import Text.Pandoc.Generic (bottomUp)
 import Text.Pandoc.Shared (trim)
 import Control.Monad
 import Data.List
@@ -159,10 +162,9 @@ flatToBullets elems = flatToBullets' (-1) elems
 
 blocksToBullets :: [Block] -> [Block]
 blocksToBullets blks =
-  -- bottomUp removeListItemDivs $ 
+  bottomUp removeListDivs $ 
   flatToBullets $ (handleListParagraphs blks)
 
-
 plainParaInlines :: Block -> [Inline]
 plainParaInlines (Plain ils) = ils
 plainParaInlines (Para ils) = ils
@@ -199,6 +201,23 @@ blocksToDefinitions' [] acc (b:blks) =
 blocksToDefinitions' defAcc acc (b:blks) =
   blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks
 
+removeListDivs' :: Block -> [Block]
+removeListDivs' (Div (ident, classes, kvs) blks)
+  | "list-item" `elem` classes =
+    case delete "list-item" classes of
+      [] -> blks
+      classes' -> [Div (ident, classes', kvs) $ blks]
+removeListDivs' (Div (ident, classes, kvs) blks)
+  | not $ null $ listParagraphDivs `intersect` classes =
+    case classes \\ listParagraphDivs of
+      [] -> blks
+      classes' -> [Div (ident, classes', kvs) blks]
+removeListDivs' blk = [blk]
+
+removeListDivs :: [Block] -> [Block]
+removeListDivs = concatMap removeListDivs'
+  
+
 
 blocksToDefinitions :: [Block] -> [Block]
 blocksToDefinitions = blocksToDefinitions' [] []
-- 
cgit v1.2.3


From 11b0778744d0eeb61e2502e452d010631fab979b Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Mon, 23 Jun 2014 15:27:55 -0400
Subject: Use Reducible in docx reader.

This cleans up them implementation, and cuts down on tree-walking.
Anecdotally, I've seen about a 3-fold speedup.
---
 src/Text/Pandoc/Readers/Docx.hs | 384 ++++++++++++----------------------------
 1 file changed, 111 insertions(+), 273 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 09c2330fb..ffe7f5a92 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -79,8 +79,10 @@ import Text.Pandoc.Builder (text, toList)
 import Text.Pandoc.Generic (bottomUp)
 import Text.Pandoc.MIME (getMimeType)
 import Text.Pandoc.UTF8 (toString)
+import Text.Pandoc.Walk
 import Text.Pandoc.Readers.Docx.Parse
 import Text.Pandoc.Readers.Docx.Lists
+import Text.Pandoc.Readers.Docx.Reducible
 import Data.Maybe (mapMaybe, isJust, fromJust)
 import Data.List (delete, isPrefixOf, (\\), intersect)
 import qualified Data.ByteString as BS
@@ -96,28 +98,65 @@ readDocx opts bytes =
     Just docx -> Pandoc nullMeta (docxToBlocks opts docx)
     Nothing   -> error $ "couldn't parse docx file"
 
-runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)])
-runStyleToSpanAttr rPr = ("",
-                          mapMaybe id [
-                            if isBold rPr then (Just "strong") else Nothing,
-                            if isItalic rPr then (Just "emph") else Nothing,
-                            if isSmallCaps rPr then (Just "smallcaps") else Nothing,
-                            if isStrike rPr then (Just "strike") else Nothing,
-                            if isSuperScript rPr then (Just "superscript") else Nothing,
-                            if isSubScript rPr then (Just "subscript") else Nothing,
-                            rStyle rPr],
-                          case underline rPr of
-                            Just fmt -> [("underline", fmt)]
-                            _        -> []
-                         )
-
-parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)])
-parStyleToDivAttr pPr = ("",
-                          pStyle pPr,
-                          case indent pPr of
-                            Just n  -> [("indent", (show n))]
-                            Nothing -> []
-                         )
+spansToKeep :: [String]
+spansToKeep = ["list-item", "Definition", "DefinitionTerm"] ++ codeSpans
+
+
+-- This is empty, but we put it in for future-proofing.
+divsToKeep :: [String]
+divsToKeep = []
+
+runStyleToContainers :: RunStyle -> [Container Inline]
+runStyleToContainers rPr =
+  let formatters = mapMaybe id
+                 [ if isBold rPr then (Just Strong) else Nothing
+                 , if isItalic rPr then (Just Emph) else Nothing
+                 , if isSmallCaps rPr then (Just SmallCaps) else Nothing
+                 , if isStrike rPr then (Just Strikeout) else Nothing
+                 , if isSuperScript rPr then (Just Superscript) else Nothing
+                 , if isSubScript rPr then (Just Subscript) else Nothing
+                 , rStyle rPr >>=
+                   (\s -> if s `elem` spansToKeep then Just s else Nothing) >>=
+                   (\s -> Just $ Span ("", [s], []))
+                 , underline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)]))
+                 ]
+  in
+   map Container formatters
+
+
+divAttrToContainers :: [String] -> [(String, String)] -> [Container Block]
+divAttrToContainers [] [] = []
+divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) =
+  let n = fromJust (isHeaderClass c)
+  in
+   [(Container $ \blks ->
+      Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))]
+divAttrToContainers (c:_) _ | c `elem` codeDivs =
+  [Container $ \blks -> CodeBlock ("", [], []) (concatMap blkToCode blks)]
+divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs =
+  let kvs' = filter (\(k,_) -> k /= "indent") kvs
+  in
+   (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs')
+divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs =
+  (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs)
+divAttrToContainers (c:cs) kvs | c `elem` divsToKeep =
+  (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs)
+divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs
+divAttrToContainers [] (kv:kvs) | fst kv == "indent" =
+  (Container BlockQuote) : divAttrToContainers [] kvs
+divAttrToContainers [] (_:kvs) =
+  divAttrToContainers [] kvs
+
+
+parStyleToContainers :: ParagraphStyle -> [Container Block]
+parStyleToContainers pPr =
+  let classes = pStyle pPr
+      kvs = case indent pPr of
+        Just n -> [("indent", show n)]
+        Nothing -> []
+  in
+   divAttrToContainers classes kvs
+  
 
 strToInlines :: String -> [Inline]
 strToInlines = toList . text
@@ -144,103 +183,42 @@ runElemToString (Tab) = ['\t']
 runElemsToString :: [RunElem] -> String
 runElemsToString = concatMap runElemToString
 
---- We use this instead of the more general
---- Text.Pandoc.Shared.normalize for reasons of efficiency. For
---- whatever reason, `normalize` makes a run take almost twice as
---- long. (It does more, but this does what we need)
-inlineNormalize :: [Inline] -> [Inline]
-inlineNormalize [] = []
-inlineNormalize (Str "" : ils) = inlineNormalize ils
-inlineNormalize ((Str s) : (Str s') : l) =
-  inlineNormalize (Str (s++s') : l)
-inlineNormalize ((Emph ils) : (Emph ils') : l) =
-  inlineNormalize $ (Emph $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Emph ils) : l) =
-  Emph (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Strong ils) : (Strong ils') : l) = 
-  inlineNormalize $ (Strong $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Strong ils) : l) =
-  Strong (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Strikeout ils) : (Strikeout ils') : l) = 
-  inlineNormalize $ (Strikeout $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Strikeout ils) : l) =
-  Strikeout (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Superscript ils) : (Superscript ils') : l) = 
-  inlineNormalize $ (Superscript $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Superscript ils) : l) =
-  Superscript (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Subscript ils) : (Subscript ils') : l) = 
-  inlineNormalize $ (Subscript $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Subscript ils) : l) =
-  Subscript (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Space : Space : l)) =
-  inlineNormalize $ (Space : l)
-inlineNormalize ((Quoted qt ils) : l) =
-  Quoted qt (inlineNormalize ils) : inlineNormalize l
-inlineNormalize ((Cite cits ils) : l) =
-  let
-    f :: Citation -> Citation
-    f (Citation s pref suff mode num hash) =
-      Citation s (inlineNormalize pref) (inlineNormalize suff) mode num hash
-  in
-   Cite (map f cits) (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Link ils s) : l) =
-  Link (inlineNormalize ils) s : (inlineNormalize l)
-inlineNormalize ((Image ils s) : l) =
-  Image (inlineNormalize ils) s : (inlineNormalize l)
-inlineNormalize ((Note blks) : l) =
-  Note (map blockNormalize blks) : (inlineNormalize l)
-inlineNormalize ((Span attr ils) : l) =
-  Span attr (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize (il : l) = il : (inlineNormalize l)
-
-stripSpaces :: [Inline] -> [Inline]
-stripSpaces ils =
-  reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils
-
-blockNormalize :: Block -> Block
-blockNormalize (Plain ils) = Plain $ stripSpaces $ inlineNormalize ils
-blockNormalize (Para ils) = Para $ stripSpaces $ inlineNormalize ils
-blockNormalize (Header n attr ils) =
-  Header n attr $ stripSpaces $ inlineNormalize ils
-blockNormalize (Table ils align width hdr cells) =
-  Table (stripSpaces $ inlineNormalize ils) align width hdr cells
-blockNormalize (DefinitionList pairs) =
-  DefinitionList $ map (\(ils, blklsts) -> (stripSpaces (inlineNormalize ils), (map (map blockNormalize) blklsts))) pairs
-blockNormalize (BlockQuote blks) = BlockQuote (map blockNormalize blks)
-blockNormalize (OrderedList attr blkslst) =
-  OrderedList attr $ map (\blks -> map blockNormalize blks) blkslst
-blockNormalize (BulletList blkslst) =
-  BulletList $ map (\blks -> map blockNormalize blks) blkslst
-blockNormalize (Div attr blks) = Div attr (map blockNormalize blks)
-blockNormalize blk = blk
+
+inlineCodeContainer :: Container Inline -> Bool
+inlineCodeContainer (Container f) = case f [] of
+  Span (_, classes, _) _ -> (not . null) (classes `intersect` codeSpans)
+  _         -> False
+inlineCodeContainer _ = False
+
+-- blockCodeContainer :: Container Block -> Bool
+-- blockCodeContainer (Container f) = case f [] of
+--   Div (ident, classes, kvs) _ -> (not . null) (classes `intersect` codeDivs)
+--   _         -> False
+-- blockCodeContainer _ = False
 
 runToInlines :: ReaderOptions -> Docx -> Run -> [Inline]
 runToInlines _ _ (Run rs runElems)
-  | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans =
-    case runStyleToSpanAttr rs == ("", [], []) of
-      True -> [Str (runElemsToString runElems)]
-      False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]]
-  | otherwise = case runStyleToSpanAttr rs == ("", [], []) of
-      True -> concatMap runElemToInlines runElems
-      False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)]
+  | any inlineCodeContainer (runStyleToContainers rs) =
+      rebuild (runStyleToContainers rs) $ [Str $ runElemsToString runElems]
+  | otherwise =
+      rebuild (runStyleToContainers rs) (concatMap runElemToInlines runElems)
 runToInlines opts docx@(Docx _ notes _ _ _ ) (Footnote fnId) =
   case (getFootNote fnId notes) of
     Just bodyParts ->
-      [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]]
+      [Note (concatMap (bodyPartToBlocks opts docx) bodyParts)]
     Nothing        ->
-      [Note [Div ("", ["footnote"], []) []]]
+      [Note []]
 runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) =
   case (getEndNote fnId notes) of
     Just bodyParts ->
-      [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]]
+      [Note (concatMap (bodyPartToBlocks opts docx) bodyParts)]
     Nothing        ->
-      [Note [Div ("", ["endnote"], []) []]]
+      [Note []]
 
 parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline]
 parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r
-parPartToInlines _ _ (BookMark _ anchor) =
-  [Span (anchor, ["anchor"], []) []]
+parPartToInlines _ _ (BookMark _ anchor) | anchor `elem` dummyAnchors = []
+parPartToInlines _ _ (BookMark _ anchor) = [Span (anchor, ["anchor"], []) []]
 parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) =
   case lookupRelationship relid rels of
     Just target -> [Image [] (combine "word" target, "")]
@@ -276,7 +254,6 @@ makeHeaderAnchors h@(Header n (_, classes, kvs) ils) =
     _ -> h
 makeHeaderAnchors blk = blk
 
-
 parPartsToInlines :: ReaderOptions -> Docx -> [ParPart] -> [Inline]
 parPartsToInlines opts docx parparts =
   --
@@ -284,23 +261,32 @@ parPartsToInlines opts docx parparts =
   -- not mandatory.
   --
   (if False -- TODO depend on option
-      then bottomUp (makeImagesSelfContained docx)
+      then walk (makeImagesSelfContained docx)
       else id) $
-  bottomUp spanTrim $
-  bottomUp spanCorrect $
-  bottomUp spanReduce $
-  concatMap (parPartToInlines opts docx) parparts
+  -- bottomUp spanTrim $
+  -- bottomUp spanCorrect $
+  -- bottomUp spanReduce $
+  reduceList $ concatMap (parPartToInlines opts docx) parparts
 
 cellToBlocks :: ReaderOptions -> Docx -> Cell -> [Block]
-cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps
+cellToBlocks opts docx (Cell bps) = concatMap (bodyPartToBlocks opts docx) bps
 
 rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]]
 rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells
 
-bodyPartToBlock :: ReaderOptions -> Docx -> BodyPart -> Block
-bodyPartToBlock opts docx (Paragraph pPr parparts) =
-  Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)]
-bodyPartToBlock opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) =
+bodyPartToBlocks :: ReaderOptions -> Docx -> BodyPart -> [Block]
+bodyPartToBlocks opts docx (Paragraph pPr parparts) =
+  case parPartsToInlines opts docx parparts of
+    [] ->
+      []
+    _ ->
+      let parContents = parPartsToInlines opts docx parparts
+          trimmedContents = reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) parContents
+      in
+       rebuild
+       (parStyleToContainers pPr)
+       [Para trimmedContents]
+bodyPartToBlocks opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) =
   let
     kvs = case lookupLevel numId lvl numbering of
       Just (_, fmt, txt, Just start) -> [ ("level", lvl)
@@ -317,12 +303,12 @@ bodyPartToBlock opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parpa
                                         ]
       Nothing                        -> []
   in
-   Div
-   ("", ["list-item"], kvs)
-   [bodyPartToBlock opts docx (Paragraph pPr parparts)]
-bodyPartToBlock _ _ (Tbl _ _ _ []) =
-  Para []
-bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) =
+   [Div
+    ("", ["list-item"], kvs)
+    (bodyPartToBlocks opts docx (Paragraph pPr parparts))]
+bodyPartToBlocks _ _ (Tbl _ _ _ []) =
+  [Para []]
+bodyPartToBlocks opts docx (Tbl cap _ look (r:rs)) =
   let caption = strToInlines cap
       (hdr, rows) = case firstRowFormatting look of
         True -> (Just r, rs)
@@ -344,7 +330,8 @@ bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) =
       alignments = take size (repeat AlignDefault)
       widths = take size (repeat 0) :: [Double]
   in
-   Table caption alignments widths hdrCells cells
+   [Table caption alignments widths hdrCells cells]
+
 
 makeImagesSelfContained :: Docx -> Inline -> Inline
 makeImagesSelfContained (Docx _ _ _ _ media) i@(Image alt (uri, title)) =
@@ -360,127 +347,19 @@ makeImagesSelfContained _ inline = inline
 
 bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block]
 bodyToBlocks opts docx (Body bps) =
-  bottomUp removeEmptyPars $
-  map blockNormalize $
-  bottomUp spanRemove $
-  bottomUp divRemove $
   map (makeHeaderAnchors) $
-  bottomUp divCorrect $
-  bottomUp divReduce $
-  bottomUp divCorrectPreReduce $
   bottomUp blocksToDefinitions $
   blocksToBullets $
-  map (bodyPartToBlock opts docx) bps
+  concatMap (bodyPartToBlocks opts docx) bps
 
 docxToBlocks :: ReaderOptions -> Docx -> [Block]
 docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body
 
-spanReduce :: [Inline] -> [Inline]
-spanReduce [] = []
-spanReduce ((Span (id1, classes1, kvs1) ils1) : ils)
-  | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils)
-spanReduce (s1@(Span (id1, classes1, kvs1) ils1) :
-            s2@(Span (id2, classes2, kvs2) ils2) :
-            ils) =
-  let classes'  = classes1 `intersect` classes2
-      kvs'      = kvs1 `intersect` kvs2
-      classes1' = classes1 \\ classes'
-      kvs1'     = kvs1 \\ kvs'
-      classes2' = classes2 \\ classes'
-      kvs2'     = kvs2 \\ kvs'
-  in
-   case null classes' && null kvs' of
-     True -> s1 : (spanReduce (s2 : ils))
-     False -> let attr'  = ("", classes', kvs')
-                  attr1' = (id1, classes1', kvs1')
-                  attr2' = (id2, classes2', kvs2')
-              in
-               spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] :
-                           ils)
-spanReduce (il:ils) = il : (spanReduce ils)
 
 ilToCode :: Inline -> String
 ilToCode (Str s) = s
 ilToCode _ = ""
 
-spanRemove' :: Inline -> [Inline]
-spanRemove' s@(Span (ident, classes, _) [])
-  -- "_GoBack" is automatically inserted. We don't want to keep it.
-  | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s]
-spanRemove' (Span (_, _, kvs) ils) =
-  case lookup "underline" kvs of
-    Just val -> [Span ("", [], [("underline", val)]) ils]
-    Nothing  -> ils
-spanRemove' il = [il]
-
-spanRemove :: [Inline] -> [Inline]
-spanRemove = concatMap spanRemove'
-
-spanTrim' :: Inline -> [Inline]
-spanTrim' il@(Span _ []) = [il]
-spanTrim' il@(Span attr (il':[]))
-  | il' == Space = [Span attr [], Space]
-  | otherwise = [il]
-spanTrim' (Span attr ils)
-  | head ils == Space && last ils == Space =
-    [Space, Span attr (init $ tail ils), Space]
-  | head ils == Space = [Space, Span attr (tail ils)]
-  | last ils == Space = [Span attr (init ils), Space]
-spanTrim' il = [il]
-
-spanTrim :: [Inline] -> [Inline]
-spanTrim = concatMap spanTrim'
-
-spanCorrect' :: Inline -> [Inline]
-spanCorrect' (Span ("", [], []) ils) = ils
-spanCorrect' (Span (ident, classes, kvs) ils)
-  | "emph" `elem` classes =
-    [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils]
-  | "strong" `elem` classes =
-      [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils]
-  | "smallcaps" `elem` classes =
-      [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils]
-  | "strike" `elem` classes =
-      [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils]
-  | "superscript" `elem` classes =
-      [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils]
-  | "subscript" `elem` classes =
-      [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils]
-  | (not . null) (codeSpans `intersect` classes) =
-         [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)]
-  | otherwise =
-      [Span (ident, classes, kvs) ils]
-spanCorrect' il = [il]
-
-spanCorrect :: [Inline] -> [Inline]
-spanCorrect = concatMap spanCorrect'
-
-removeEmptyPars :: [Block] -> [Block]
-removeEmptyPars blks = filter (\b -> b /= (Para [])) blks
-
-divReduce :: [Block] -> [Block]
-divReduce [] = []
-divReduce ((Div (id1, classes1, kvs1) blks1) : blks)
-  | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks)
-divReduce (d1@(Div (id1, classes1, kvs1) blks1) :
-           d2@(Div (id2, classes2, kvs2) blks2) :
-            blks) =
-  let classes'  = classes1 `intersect` classes2
-      kvs'      = kvs1 `intersect` kvs2
-      classes1' = classes1 \\ classes'
-      kvs1'     = kvs1 \\ kvs'
-      classes2' = classes2 \\ classes'
-      kvs2'     = kvs2 \\ kvs'
-  in
-   case null classes' && null kvs' of
-     True -> d1 : (divReduce (d2 : blks))
-     False -> let attr'  = ("", classes', kvs')
-                  attr1' = (id1, classes1', kvs1')
-                  attr2' = (id2, classes2', kvs2')
-              in
-               divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] :
-                           blks)
-divReduce (blk:blks) = blk : (divReduce blks)
 
 isHeaderClass :: String -> Maybe Int
 isHeaderClass s | "Heading" `isPrefixOf` s =
@@ -490,27 +369,12 @@ isHeaderClass s | "Heading" `isPrefixOf` s =
     _       -> Nothing
 isHeaderClass _ = Nothing
 
-findHeaderClass :: [String] -> Maybe Int
-findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of
-  [] -> Nothing
-  n : _ -> Just n
 
 blksToInlines :: [Block] -> [Inline]
 blksToInlines (Para ils : _) = ils
 blksToInlines (Plain ils : _) = ils
 blksToInlines _ = []
 
-divCorrectPreReduce' :: Block -> [Block]
-divCorrectPreReduce' (Div (ident, classes, kvs) blks)
-  | isJust $ findHeaderClass classes =
-    let n = fromJust $ findHeaderClass classes
-    in
-    [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)]
-  | otherwise = [Div (ident, classes, kvs) blks]
-divCorrectPreReduce' blk = [blk]
-
-divCorrectPreReduce :: [Block] -> [Block]
-divCorrectPreReduce = concatMap divCorrectPreReduce'
 
 blkToCode :: Block -> String
 blkToCode (Para []) = ""
@@ -520,29 +384,3 @@ blkToCode (Para ((Span (_, classes, _) ils'): ils))
     (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils))
 blkToCode _ = ""
 
-divRemove' :: Block -> [Block]
-divRemove' (Div (_, _, kvs) blks) =
-  case lookup "indent" kvs of
-    Just val -> [Div ("", [], [("indent", val)]) blks]
-    Nothing  -> blks
-divRemove' blk = [blk]
-
-divRemove :: [Block] -> [Block]
-divRemove = concatMap divRemove'
-
-divCorrect' :: Block -> [Block]
-divCorrect' b@(Div (ident, classes, kvs) blks)
-  | (not . null) (blockQuoteDivs `intersect` classes) =
-    [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]]
-  | (not . null) (codeDivs `intersect` classes) =
-    [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)]
-  | otherwise =
-      case lookup "indent" kvs of
-        Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]
-        Just _   ->
-          [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]]
-        Nothing  -> [b]
-divCorrect' blk = [blk]
-
-divCorrect :: [Block] -> [Block]
-divCorrect = concatMap divCorrect'
-- 
cgit v1.2.3


From 08633fad332fe9acfb884a2ba0ee9f8543ab23ed Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Mon, 23 Jun 2014 20:26:08 -0400
Subject: Add copyright block to T.P.R.Docx.Reducible.

---
 src/Text/Pandoc/Readers/Docx/Reducible.hs | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs
index 1ed31ebd0..8c105d1f1 100644
--- a/src/Text/Pandoc/Readers/Docx/Reducible.hs
+++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs
@@ -1,5 +1,36 @@
 {-# LANGUAGE OverloadedStrings #-}
 
+{-
+Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.Docx.Reducible
+   Copyright   : Copyright (C) 2014 Jesse Rosenthal
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
+   Stability   : alpha
+   Portability : portable
+
+Typeclass for combining adjacent blocks and inlines correctly.
+-}
+
+
 module Text.Pandoc.Readers.Docx.Reducible ((<++>),
                                            (<+++>),
                                            Reducible,
-- 
cgit v1.2.3


From bebea5e936d6c3c90b977a1b8f69bb3a290d30dc Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Tue, 24 Jun 2014 10:34:07 -0400
Subject: Docx reader: pass code tests.

---
 src/Text/Pandoc/Readers/Docx.hs | 80 ++++++++++++++++++++++++-----------------
 1 file changed, 47 insertions(+), 33 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index ffe7f5a92..5f62d0b21 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -84,7 +84,7 @@ import Text.Pandoc.Readers.Docx.Parse
 import Text.Pandoc.Readers.Docx.Lists
 import Text.Pandoc.Readers.Docx.Reducible
 import Data.Maybe (mapMaybe, isJust, fromJust)
-import Data.List (delete, isPrefixOf, (\\), intersect)
+import Data.List (delete, isPrefixOf, (\\))
 import qualified Data.ByteString as BS
 import qualified Data.ByteString.Lazy as B
 import Data.ByteString.Base64 (encode)
@@ -99,7 +99,7 @@ readDocx opts bytes =
     Nothing   -> error $ "couldn't parse docx file"
 
 spansToKeep :: [String]
-spansToKeep = ["list-item", "Definition", "DefinitionTerm"] ++ codeSpans
+spansToKeep = ["list-item", "Definition", "DefinitionTerm"]
 
 
 -- This is empty, but we put it in for future-proofing.
@@ -108,20 +108,28 @@ divsToKeep = []
 
 runStyleToContainers :: RunStyle -> [Container Inline]
 runStyleToContainers rPr =
-  let formatters = mapMaybe id
-                 [ if isBold rPr then (Just Strong) else Nothing
-                 , if isItalic rPr then (Just Emph) else Nothing
-                 , if isSmallCaps rPr then (Just SmallCaps) else Nothing
-                 , if isStrike rPr then (Just Strikeout) else Nothing
-                 , if isSuperScript rPr then (Just Superscript) else Nothing
-                 , if isSubScript rPr then (Just Subscript) else Nothing
-                 , rStyle rPr >>=
-                   (\s -> if s `elem` spansToKeep then Just s else Nothing) >>=
-                   (\s -> Just $ Span ("", [s], []))
-                 , underline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)]))
+  let spanClassToContainers :: String -> [Container Inline]
+      spanClassToContainers s | s `elem` codeSpans =
+        [Container $ (\ils -> Code ("", [], []) (concatMap ilToCode ils))]
+      spanClassToContainers s | s `elem` spansToKeep =
+        [Container $ Span ("", [s], [])]
+      spanClassToContainers _ = []
+
+      classContainers = case rStyle rPr of
+        Nothing -> []
+        Just s  -> spanClassToContainers s
+      
+      formatters = map Container $ mapMaybe id
+                   [ if isBold rPr then (Just Strong) else Nothing
+                   , if isItalic rPr then (Just Emph) else Nothing
+                   , if isSmallCaps rPr then (Just SmallCaps) else Nothing
+                   , if isStrike rPr then (Just Strikeout) else Nothing
+                   , if isSuperScript rPr then (Just Superscript) else Nothing
+                   , if isSubScript rPr then (Just Subscript) else Nothing
+                   , underline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)]))
                  ]
   in
-   map Container formatters
+   classContainers ++ formatters
 
 
 divAttrToContainers :: [String] -> [(String, String)] -> [Container Block]
@@ -132,7 +140,9 @@ divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) =
    [(Container $ \blks ->
       Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))]
 divAttrToContainers (c:_) _ | c `elem` codeDivs =
-  [Container $ \blks -> CodeBlock ("", [], []) (concatMap blkToCode blks)]
+  -- This is a bit of a cludge. We make the codeblock from the raw
+  -- parparts in bodyPartToBlocks. But we need something to match against.
+  [Container $ \_ -> CodeBlock ("", [], []) ""]
 divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs =
   let kvs' = filter (\(k,_) -> k /= "indent") kvs
   in
@@ -183,18 +193,23 @@ runElemToString (Tab) = ['\t']
 runElemsToString :: [RunElem] -> String
 runElemsToString = concatMap runElemToString
 
+runToString :: Run -> String
+runToString (Run _ runElems) = runElemsToString runElems
+runToString _ = ""
+
+parPartToString :: ParPart -> String
+parPartToString (PlainRun run) = runToString run
+parPartToString (InternalHyperLink _ runs) = concatMap runToString runs
+parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs
+parPartToString _ = ""
+
 
 inlineCodeContainer :: Container Inline -> Bool
 inlineCodeContainer (Container f) = case f [] of
-  Span (_, classes, _) _ -> (not . null) (classes `intersect` codeSpans)
+  Code _ "" -> True
   _         -> False
 inlineCodeContainer _ = False
 
--- blockCodeContainer :: Container Block -> Bool
--- blockCodeContainer (Container f) = case f [] of
---   Div (ident, classes, kvs) _ -> (not . null) (classes `intersect` codeDivs)
---   _         -> False
--- blockCodeContainer _ = False
 
 runToInlines :: ReaderOptions -> Docx -> Run -> [Inline]
 runToInlines _ _ (Run rs runElems)
@@ -274,7 +289,16 @@ cellToBlocks opts docx (Cell bps) = concatMap (bodyPartToBlocks opts docx) bps
 rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]]
 rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells
 
+blockCodeContainer :: Container Block -> Bool
+blockCodeContainer (Container f) = case f [] of
+  CodeBlock _ _ -> True
+  _             -> False
+blockCodeContainer _ = False
+
 bodyPartToBlocks :: ReaderOptions -> Docx -> BodyPart -> [Block]
+bodyPartToBlocks _ _ (Paragraph pPr parparts)
+  | any blockCodeContainer (parStyleToContainers pPr) =
+    [CodeBlock ("", [], []) (concatMap parPartToString parparts)]
 bodyPartToBlocks opts docx (Paragraph pPr parparts) =
   case parPartsToInlines opts docx parparts of
     [] ->
@@ -358,7 +382,8 @@ docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body
 
 ilToCode :: Inline -> String
 ilToCode (Str s) = s
-ilToCode _ = ""
+ilToCode Space = " "
+ilToCode _     = ""
 
 
 isHeaderClass :: String -> Maybe Int
@@ -369,18 +394,7 @@ isHeaderClass s | "Heading" `isPrefixOf` s =
     _       -> Nothing
 isHeaderClass _ = Nothing
 
-
 blksToInlines :: [Block] -> [Inline]
 blksToInlines (Para ils : _) = ils
 blksToInlines (Plain ils : _) = ils
 blksToInlines _ = []
-
-
-blkToCode :: Block -> String
-blkToCode (Para []) = ""
-blkToCode (Para ((Code _ s):ils)) = s ++ (blkToCode (Para ils))
-blkToCode (Para ((Span (_, classes, _) ils'): ils))
-  | (not . null) (codeSpans `intersect` classes) =
-    (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils))
-blkToCode _ = ""
-
-- 
cgit v1.2.3


From 5ae6b8c6f1ba29fa8b6eec065c6d463e5e03a9aa Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Tue, 24 Jun 2014 12:10:49 -0400
Subject: Docx reader: pass definition test.

This commit also fixes a problem with the previous code pushes, which
wouldn't allow code blocks to share a div.
---
 src/Text/Pandoc/Readers/Docx.hs | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 5f62d0b21..67cf519dc 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -99,12 +99,12 @@ readDocx opts bytes =
     Nothing   -> error $ "couldn't parse docx file"
 
 spansToKeep :: [String]
-spansToKeep = ["list-item", "Definition", "DefinitionTerm"]
+spansToKeep = []
 
 
 -- This is empty, but we put it in for future-proofing.
 divsToKeep :: [String]
-divsToKeep = []
+divsToKeep = ["list-item", "Definition", "DefinitionTerm"]
 
 runStyleToContainers :: RunStyle -> [Container Inline]
 runStyleToContainers rPr =
@@ -139,18 +139,18 @@ divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) =
   in
    [(Container $ \blks ->
       Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))]
-divAttrToContainers (c:_) _ | c `elem` codeDivs =
+divAttrToContainers (c:cs) kvs | c `elem` divsToKeep =
+  (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs)
+divAttrToContainers (c:cs) kvs | c `elem` codeDivs =
   -- This is a bit of a cludge. We make the codeblock from the raw
   -- parparts in bodyPartToBlocks. But we need something to match against.
-  [Container $ \_ -> CodeBlock ("", [], []) ""]
+  (Container $ \_ -> CodeBlock ("", [], []) "") : (divAttrToContainers cs kvs)
 divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs =
   let kvs' = filter (\(k,_) -> k /= "indent") kvs
   in
    (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs')
 divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs =
   (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs)
-divAttrToContainers (c:cs) kvs | c `elem` divsToKeep =
-  (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs)
 divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs
 divAttrToContainers [] (kv:kvs) | fst kv == "indent" =
   (Container BlockQuote) : divAttrToContainers [] kvs
@@ -298,7 +298,12 @@ blockCodeContainer _ = False
 bodyPartToBlocks :: ReaderOptions -> Docx -> BodyPart -> [Block]
 bodyPartToBlocks _ _ (Paragraph pPr parparts)
   | any blockCodeContainer (parStyleToContainers pPr) =
-    [CodeBlock ("", [], []) (concatMap parPartToString parparts)]
+    let
+      otherConts = filter (not . blockCodeContainer) (parStyleToContainers pPr)
+    in
+     rebuild
+     otherConts
+     [CodeBlock ("", [], []) (concatMap parPartToString parparts)]
 bodyPartToBlocks opts docx (Paragraph pPr parparts) =
   case parPartsToInlines opts docx parparts of
     [] ->
@@ -372,7 +377,7 @@ makeImagesSelfContained _ inline = inline
 bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block]
 bodyToBlocks opts docx (Body bps) =
   map (makeHeaderAnchors) $
-  bottomUp blocksToDefinitions $
+  blocksToDefinitions $
   blocksToBullets $
   concatMap (bodyPartToBlocks opts docx) bps
 
-- 
cgit v1.2.3


From a8866bc1215a4e4c6582dedc940c86cdaeb02d9f Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Tue, 24 Jun 2014 12:15:26 -0400
Subject: Docx reader: remove T.P.Generic import.

This marks the removal of the final tree-walk in the code. (Though there
is still one in the Lists module.)
---
 src/Text/Pandoc/Readers/Docx.hs | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 67cf519dc..59fb7b37f 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -76,7 +76,6 @@ import Codec.Archive.Zip
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Builder (text, toList)
-import Text.Pandoc.Generic (bottomUp)
 import Text.Pandoc.MIME (getMimeType)
 import Text.Pandoc.UTF8 (toString)
 import Text.Pandoc.Walk
-- 
cgit v1.2.3


From 69743cd5981d7e910c5d83da18fc698c8d522e69 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Tue, 24 Jun 2014 14:24:38 -0400
Subject: Docx reader: Ignore zero (or negative) indent

If a block has an indentation less than or equal to zero, it should not be
treated as a block quote.
---
 src/Text/Pandoc/Readers/Docx.hs | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 59fb7b37f..b787ca9fb 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -132,7 +132,6 @@ runStyleToContainers rPr =
 
 
 divAttrToContainers :: [String] -> [(String, String)] -> [Container Block]
-divAttrToContainers [] [] = []
 divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) =
   let n = fromJust (isHeaderClass c)
   in
@@ -151,10 +150,14 @@ divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs =
 divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs =
   (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs)
 divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs
-divAttrToContainers [] (kv:kvs) | fst kv == "indent" =
-  (Container BlockQuote) : divAttrToContainers [] kvs
-divAttrToContainers [] (_:kvs) =
-  divAttrToContainers [] kvs
+divAttrToContainers [] kvs | isJust (lookup "indent" kvs) =
+  let kvs' = filter (\(k,_) -> k /= "indent") kvs
+  in
+   case fromJust (lookup "indent" kvs) of
+     "0"       -> divAttrToContainers [] kvs'
+     ('-' : _) -> divAttrToContainers [] kvs'
+     _         -> (Container BlockQuote) : divAttrToContainers [] kvs'
+divAttrToContainers _ _ = []
 
 
 parStyleToContainers :: ParagraphStyle -> [Container Block]
-- 
cgit v1.2.3


From c343f1a90bc35d745de673de5ff771ddbe60be54 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Wed, 25 Jun 2014 08:10:19 -0400
Subject: Docx Reader: Add change types

Insertion and deletion. Dates are just strings for now.
---
 src/Text/Pandoc/Readers/Docx/Parse.hs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 18200bcf9..1cb5fe2e3 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -455,6 +455,8 @@ elemToCell ns element
 elemToCell _ _ = Nothing
 
 data ParPart = PlainRun Run
+             | Insertion ChangeId Author ChangeDate [Run]
+             | Deletion ChangeId Author ChangeDate [Run]
              | BookMark BookMarkId Anchor
              | InternalHyperLink Anchor [Run]
              | ExternalHyperLink RelId [Run]
@@ -604,4 +606,6 @@ type Target = String
 type Anchor = String
 type BookMarkId = String
 type RelId = String
-               
+type ChangeId = String
+type Author = String
+type ChangeDate = String               
-- 
cgit v1.2.3


From 38e1d3e95b8240eeb35db0a1a56e308cfb4835e4 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Wed, 25 Jun 2014 10:32:48 -0400
Subject: Docx reader: Parse Insertions and Deletions.

This is just for the Parse module, reading it into the Docx format. It
still has to be translated into pandoc.
---
 src/Text/Pandoc/Readers/Docx/Parse.hs | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 1cb5fe2e3..c76ef7511 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -541,7 +541,7 @@ elemToRun _ _ = Nothing
 
 elemToRunElem :: NameSpaces -> Element -> Maybe RunElem
 elemToRunElem ns element
-  | qName (elName element) == "t" &&
+  | (qName (elName element) == "t" || qName (elName element) == "delText") &&
     qURI (elName element) == (lookup "w" ns) =
       Just $ TextRun (strContent element)
   | qName (elName element) == "br" &&
@@ -581,6 +581,22 @@ elemToParPart ns element
         Nothing -> do
           r <- elemToRun ns element
           return $ PlainRun r
+elemToParPart ns element
+  | qName (elName element) == "ins" &&
+    qURI (elName element) == (lookup "w" ns) = do
+      cId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element
+      cAuthor <- findAttr (QName "author" (lookup "w" ns) (Just "w")) element
+      cDate <- findAttr (QName "date" (lookup "w" ns) (Just "w")) element
+      let runs = mapMaybe (elemToRun ns) (elChildren element)
+      return $ Insertion cId cAuthor cDate runs
+elemToParPart ns element
+  | qName (elName element) == "del" &&
+    qURI (elName element) == (lookup "w" ns) = do
+      cId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element
+      cAuthor <- findAttr (QName "author" (lookup "w" ns) (Just "w")) element
+      cDate <- findAttr (QName "date" (lookup "w" ns) (Just "w")) element
+      let runs = mapMaybe (elemToRun ns) (elChildren element)
+      return $ Deletion cId cAuthor cDate runs
 elemToParPart ns element
   | qName (elName element) == "bookmarkStart" &&
     qURI (elName element) == (lookup "w" ns) = do
-- 
cgit v1.2.3


From ed44e4ca8c8f3d3c4c7ac65b98f16732c8173b88 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Wed, 25 Jun 2014 10:38:01 -0400
Subject: Docx reader: Add rudimentary track changes support.

This will only read the insertions, and ignore the deletions.
---
 src/Text/Pandoc/Readers/Docx.hs | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index b787ca9fb..130e2a1e2 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -234,6 +234,9 @@ runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) =
 
 parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline]
 parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r
+parPartToInlines opts docx (Insertion _ _ _ runs) =
+  concatMap (runToInlines opts docx) runs
+parPartToInlines _ _ (Deletion _ _ _ _) = []
 parPartToInlines _ _ (BookMark _ anchor) | anchor `elem` dummyAnchors = []
 parPartToInlines _ _ (BookMark _ anchor) = [Span (anchor, ["anchor"], []) []]
 parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) =
-- 
cgit v1.2.3


From 9614ddfedc18cccbf9fbe1a23fae200c7e67d72d Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Wed, 25 Jun 2014 11:00:15 -0400
Subject: Docx reader: Remove unnecessary filter in Parse.

mapMaybe does the filtering for us.
---
 src/Text/Pandoc/Readers/Docx/Parse.hs | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index c76ef7511..dbbd65681 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -281,10 +281,6 @@ elemToBody ns element | qName (elName element) == "body" && qURI (elName element
   $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element
 elemToBody _ _ = Nothing
 
-isRunOrLinkOrBookmark :: NameSpaces -> QName ->  Bool
-isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] &&
-                   qURI q == (lookup "w" ns)
-
 elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String)
 elemToNumInfo ns element
   | qName (elName element) == "p" &&
@@ -319,9 +315,8 @@ elemToBodyPart ns element
   | qName (elName element) == "p" &&
     qURI (elName element) == (lookup "w" ns) =
       let parstyle = elemToParagraphStyle ns element
-          parparts = mapMaybe id
-                     $ map (elemToParPart ns)
-                     $ filterChildrenName (isRunOrLinkOrBookmark ns) element
+          parparts = mapMaybe (elemToParPart ns)
+                     $ elChildren element
       in
        case elemToNumInfo ns element of
          Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts
-- 
cgit v1.2.3


From 3ec62d006483d369bb896b283db82e4437b66d05 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Wed, 25 Jun 2014 13:50:08 -0400
Subject: Add TrackChanges type to options.

---
 src/Text/Pandoc/Options.hs | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 611a6bb06..e0ad866ad 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -264,6 +264,12 @@ data HTMLSlideVariant = S5Slides
                       | NoSlides
                       deriving (Show, Read, Eq)
 
+-- | Options for accepting or rejecting MS Word track-changes.
+data TrackChanges = AcceptChanges
+                  | RejectChanges
+                  | AllChanges
+                  deriving (Show, Read, Eq)
+
 -- | Options for writers
 data WriterOptions = WriterOptions
   { writerStandalone       :: Bool   -- ^ Include header and footer
-- 
cgit v1.2.3


From 6ff84b5e8da47ff7f4b77bd6cd017beae81fed97 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Wed, 25 Jun 2014 13:57:56 -0400
Subject: Add reader option for track changes.

---
 src/Text/Pandoc/Options.hs | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index e0ad866ad..d0a76a001 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -211,6 +211,7 @@ data ReaderOptions = ReaderOptions{
                                        -- indented code blocks
        , readerDefaultImageExtension :: String -- ^ Default extension for images
        , readerTrace           :: Bool -- ^ Print debugging info
+       , readerTrackChanges    :: TrackChanges
 } deriving (Show, Read)
 
 instance Default ReaderOptions
@@ -227,6 +228,7 @@ instance Default ReaderOptions
                , readerIndentedCodeClasses   = []
                , readerDefaultImageExtension = ""
                , readerTrace                 = False
+               , readerTrackChanges          = AcceptChanges
                }
 
 --
-- 
cgit v1.2.3


From d824f89fb3996fd27e156da1141808fbf468819d Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Wed, 25 Jun 2014 14:05:21 -0400
Subject: Add TrackChanges to Options export.

---
 src/Text/Pandoc/Options.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index d0a76a001..b7a3a4b7b 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -41,6 +41,7 @@ module Text.Pandoc.Options ( Extension(..)
                            , HTMLSlideVariant (..)
                            , EPUBVersion (..)
                            , WriterOptions (..)
+                           , TrackChanges (..)
                            , def
                            , isEnabled
                            ) where
-- 
cgit v1.2.3


From 0e9bf37f64be0a121a0d682570fc8f0cf2b27c51 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Wed, 25 Jun 2014 14:17:20 -0400
Subject: Docx reader: Make use of track-changes option.

---
 src/Text/Pandoc/Readers/Docx.hs | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 130e2a1e2..cb0735e31 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -234,9 +234,22 @@ runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) =
 
 parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline]
 parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r
-parPartToInlines opts docx (Insertion _ _ _ runs) =
-  concatMap (runToInlines opts docx) runs
-parPartToInlines _ _ (Deletion _ _ _ _) = []
+parPartToInlines opts docx (Insertion _ author date runs) =
+  case readerTrackChanges opts of
+    AcceptChanges -> concatMap (runToInlines opts docx) runs
+    RejectChanges -> []
+    AllChanges    ->
+      [Span
+       ("", ["insertion"], [("author", author), ("date", date)])
+       (concatMap (runToInlines opts docx) runs)]
+parPartToInlines opts docx (Deletion _ author date runs) =
+  case readerTrackChanges opts of
+    AcceptChanges -> []
+    RejectChanges -> concatMap (runToInlines opts docx) runs
+    AllChanges    ->
+      [Span
+       ("", ["deletion"], [("author", author), ("date", date)])
+       (concatMap (runToInlines opts docx) runs)]
 parPartToInlines _ _ (BookMark _ anchor) | anchor `elem` dummyAnchors = []
 parPartToInlines _ _ (BookMark _ anchor) = [Span (anchor, ["anchor"], []) []]
 parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) =
-- 
cgit v1.2.3


From 2396be6f5777ac04067264d489fb84fbec72d164 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Wed, 25 Jun 2014 17:12:03 -0400
Subject: Docx reader: Code cleanup in parse.

Remove some redundant ways of dealing with Maybe.
---
 src/Text/Pandoc/Readers/Docx/Parse.hs | 50 +++++++++--------------------------
 1 file changed, 12 insertions(+), 38 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index dbbd65681..07f34450d 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -148,7 +148,7 @@ absNumElemToAbsNum ns element |
     let levelElems = findChildren
                  (QName "lvl" (lookup "w" ns) (Just "w"))
                  element
-        levels = mapMaybe id $ map (levelElemToLevel ns) levelElems
+        levels = mapMaybe (levelElemToLevel ns) levelElems
     return $ AbstractNumb absNumId levels
 absNumElemToAbsNum _ _ = Nothing
 
@@ -180,8 +180,8 @@ archiveToNumbering zf =
           absNumElems = findChildren
                         (QName "abstractNum" (lookup "w" namespaces) (Just "w"))
                         numberingElem
-          nums = mapMaybe id $ map (numElemToNum namespaces) numElems
-          absNums = mapMaybe id $ map (absNumElemToAbsNum namespaces) absNumElems
+          nums = mapMaybe (numElemToNum namespaces) numElems
+          absNums = mapMaybe (absNumElemToAbsNum namespaces) absNumElems
       return $ Numbering namespaces nums absNums
 
 data Notes = Notes NameSpaces (Maybe [(String, [BodyPart])]) (Maybe [(String, [BodyPart])])
@@ -193,10 +193,8 @@ noteElemToNote ns element
     qURI (elName element) == (lookup "w" ns) =
       do
         noteId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element
-        let bps = map fromJust
-                  $ filter isJust
-                  $ map (elemToBodyPart ns)
-                  $ filterChildrenName (isParOrTbl ns) element
+        let bps = mapMaybe  (elemToBodyPart ns)
+                  $ elChildren element
         return $ (noteId, bps)
 noteElemToNote _ _ = Nothing
 
@@ -210,9 +208,7 @@ elemToNotes :: NameSpaces -> String -> Element -> Maybe [(String, [BodyPart])]
 elemToNotes ns notetype element
   | qName (elName element) == (notetype ++ "s") &&
     qURI (elName element) == (lookup "w" ns) =
-      Just $ map fromJust
-      $ filter isJust
-      $ map (noteElemToNote ns)
+      Just $ mapMaybe (noteElemToNote ns)
       $ findChildren (QName notetype (lookup "w" ns) (Just "w")) element
 elemToNotes _ _ _ = Nothing
 
@@ -260,25 +256,19 @@ relElemToRelationship _ = Nothing
 archiveToRelationships :: Archive -> [Relationship]
 archiveToRelationships archive = 
   let relPaths = filter filePathIsRel (filesInArchive archive)
-      entries  = map fromJust $ filter isJust $ map (\f -> findEntryByPath f archive) relPaths
-      relElems = map fromJust $ filter isJust $ map (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries
-      rels = map fromJust $ filter isJust $ map relElemToRelationship $ concatMap elChildren relElems
+      entries  = mapMaybe (\f -> findEntryByPath f archive) relPaths
+      relElems = mapMaybe (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries
+      rels =     mapMaybe relElemToRelationship $ concatMap elChildren relElems
   in
    rels
    
 data Body = Body [BodyPart]
           deriving Show
 
-isParOrTbl :: NameSpaces -> QName -> Bool
-isParOrTbl ns q = qName q `elem` ["p", "tbl"] &&
-                  qURI q == (lookup "w" ns)
-
 elemToBody :: NameSpaces -> Element ->  Maybe Body
 elemToBody ns element | qName (elName element) == "body" && qURI (elName element) == (lookup "w" ns) =
   Just $ Body
-  $ map fromJust
-  $ filter isJust
-  $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element
+  $ mapMaybe (elemToBodyPart ns) $ elChildren element
 elemToBody _ _ = Nothing
 
 elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String)
@@ -295,21 +285,6 @@ elemToNumInfo ns element
         return (numId, lvl)
 elemToNumInfo _ _ = Nothing
 
--- isBookMarkTag :: NameSpaces -> QName -> Bool
--- isBookMarkTag ns q = qName q `elem` ["bookmarkStart", "bookmarkEnd"] &&
---                      qURI q == (lookup "w" ns)
-
--- parChildrenToBookmark :: NameSpaces -> [Element] -> BookMark
--- parChildrenToBookmark ns (bms : bme : _)
---   | qName (elName bms) == "bookmarkStart" &&
---     qURI (elName bms) == (lookup "w" ns) &&
---     qName (elName bme) == "bookmarkEnd" &&
---     qURI (elName bme) == (lookup "w" ns) = do
---       bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) bms
---       bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) bms
---       return $ (bmId, bmName)
--- parChildrenToBookmark _ _ = Nothing
-
 elemToBodyPart :: NameSpaces -> Element ->  Maybe BodyPart
 elemToBodyPart ns element
   | qName (elName element) == "p" &&
@@ -382,8 +357,7 @@ elemToParagraphStyle ns element =
     Just pPr ->
       ParagraphStyle
       {pStyle =
-          mapMaybe id $
-          map
+          mapMaybe
           (findAttr (QName "val" (lookup "w" ns) (Just "w")))
           (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr)
       , indent =
@@ -601,7 +575,7 @@ elemToParPart ns element
 elemToParPart ns element
   | qName (elName element) == "hyperlink" &&
     qURI (elName element) == (lookup "w" ns) =
-      let runs = map fromJust $ filter isJust $ map (elemToRun ns)
+      let runs = mapMaybe (elemToRun ns)
                  $ findChildren (QName "r" (lookup "w" ns) (Just "w")) element
       in
        case findAttr (QName "anchor" (lookup "w" ns) (Just "w")) element of
-- 
cgit v1.2.3


From b2127311cb360479dbea59264ada0112a94d7819 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 26 Jun 2014 12:34:41 -0700
Subject: Require haddock-library >= 1.1 and simplify haddock reader code.

See #1346.
---
 src/Text/Pandoc/Readers/Haddock.hs | 40 +-------------------------------------
 1 file changed, 1 insertion(+), 39 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index a3dfb7c3c..4b46c869d 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -43,11 +43,8 @@ docHToBlocks d' =
             (docHToInlines False $ headerTitle h)
     DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2)
     DocString _ -> inlineFallback
-    DocParagraph (DocHeader h) -> docHToBlocks (DocHeader h)
     DocParagraph (DocAName h) -> B.plain $ docHToInlines False $ DocAName h
-    DocParagraph x -> let (ils, rest) = getInlines x
-                       in (B.para $ docHToInlines False ils)
-                          <> docHToBlocks rest
+    DocParagraph x -> B.para $ docHToInlines False x
     DocIdentifier _ -> inlineFallback
     DocIdentifierUnchecked _ -> inlineFallback
     DocModule s -> B.plain $ docHToInlines False $ DocModule s
@@ -115,40 +112,6 @@ docHToInlines isCode d' =
     DocProperty _ -> mempty
     DocExamples _ -> mempty
 
-getInlines :: DocH String Identifier -> (DocH String Identifier, DocH String Identifier)
-getInlines (DocAppend x y) = if isInline x
-                                then let (a, b) = getInlines y
-                                     in  (DocAppend x a, b)
-                                else (DocEmpty, DocAppend x y)
-getInlines x = if isInline x
-                  then (x, DocEmpty)
-                  else (DocEmpty, x)
-
-isInline :: DocH String Identifier -> Bool
-isInline d' =
-  case d' of
-    DocEmpty -> True
-    DocAppend d1 _ -> isInline d1
-    DocString _ -> True
-    DocParagraph _ -> False
-    DocIdentifier _ -> True
-    DocIdentifierUnchecked _ -> True
-    DocModule _ -> True
-    DocWarning _ -> True
-    DocEmphasis _ -> True
-    DocMonospaced _ -> True
-    DocBold _ -> True
-    DocHeader _ -> False
-    DocUnorderedList _ -> False
-    DocOrderedList _ -> False
-    DocDefList _ -> False
-    DocCodeBlock _ -> False
-    DocHyperlink _ -> True
-    DocPic _ -> True
-    DocAName _ -> True
-    DocProperty _ -> False
-    DocExamples _ -> False
-
 -- | Create an 'Example', stripping superfluous characters as appropriate
 makeExample :: String -> String -> [String] -> Blocks
 makeExample prompt expression result =
@@ -173,4 +136,3 @@ makeExample prompt expression result =
         substituteBlankLine "<BLANKLINE>" = ""
         substituteBlankLine line          = line
     coder = B.codeWith ([], ["result"], [])
-
-- 
cgit v1.2.3


From 4248f25152d5715ad99f9d8dda8bf83f33f650ff Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Thu, 26 Jun 2014 16:48:41 -0400
Subject: Move Docx reader to DocxContext monad

This is a ReaderT State stack, which keeps track of some environment info, such
as the options and the docx doc. The state will come in handy in the future,
for a couple of planned features (rewriting the section anchors as auto_idents,
and hopefully smart-quoting).
---
 src/Text/Pandoc/Readers/Docx.hs | 247 +++++++++++++++++++++++-----------------
 1 file changed, 140 insertions(+), 107 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index cb0735e31..5773027f2 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -88,6 +88,9 @@ import qualified Data.ByteString as BS
 import qualified Data.ByteString.Lazy as B
 import Data.ByteString.Base64 (encode)
 import System.FilePath (combine)
+import qualified Data.Map as M
+import Control.Monad.Reader
+import Control.Monad.State
 
 readDocx :: ReaderOptions
          -> B.ByteString
@@ -97,11 +100,24 @@ readDocx opts bytes =
     Just docx -> Pandoc nullMeta (docxToBlocks opts docx)
     Nothing   -> error $ "couldn't parse docx file"
 
-spansToKeep :: [String]
-spansToKeep = []
 
+data DState = DState { docxHdrLinks :: M.Map String String }
+
+data DEnv = DEnv { docxOptions  :: ReaderOptions
+                 , docxDocument :: Docx}
+
+type DocxContext = ReaderT DEnv (State DState)
+
+evalDocxContext :: DocxContext a -> DEnv -> DState -> a
+evalDocxContext ctx env st = evalState (runReaderT ctx env) st
+
+concatMapM        :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
+concatMapM f xs   =  liftM concat (mapM f xs)
 
 -- This is empty, but we put it in for future-proofing.
+spansToKeep :: [String]
+spansToKeep = []
+
 divsToKeep :: [String]
 divsToKeep = ["list-item", "Definition", "DefinitionTerm"]
 
@@ -213,57 +229,69 @@ inlineCodeContainer (Container f) = case f [] of
 inlineCodeContainer _ = False
 
 
-runToInlines :: ReaderOptions -> Docx -> Run -> [Inline]
-runToInlines _ _ (Run rs runElems)
+runToInlines :: Run -> DocxContext [Inline]
+runToInlines (Run rs runElems)
   | any inlineCodeContainer (runStyleToContainers rs) =
+      return $
       rebuild (runStyleToContainers rs) $ [Str $ runElemsToString runElems]
   | otherwise =
+      return $
       rebuild (runStyleToContainers rs) (concatMap runElemToInlines runElems)
-runToInlines opts docx@(Docx _ notes _ _ _ ) (Footnote fnId) =
+runToInlines (Footnote fnId) = do
+  (Docx _ notes _ _ _ ) <- asks docxDocument
   case (getFootNote fnId notes) of
-    Just bodyParts ->
-      [Note (concatMap (bodyPartToBlocks opts docx) bodyParts)]
-    Nothing        ->
-      [Note []]
-runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) =
+    Just bodyParts -> do
+      blks <- concatMapM bodyPartToBlocks bodyParts
+      return $ [Note blks]
+    Nothing        -> return [Note []]
+runToInlines (Endnote fnId) = do
+  (Docx _ notes _ _ _ ) <- asks docxDocument
   case (getEndNote fnId notes) of
-    Just bodyParts ->
-      [Note (concatMap (bodyPartToBlocks opts docx) bodyParts)]
-    Nothing        ->
-      [Note []]
-
-parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline]
-parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r
-parPartToInlines opts docx (Insertion _ author date runs) =
+    Just bodyParts -> do
+      blks <- concatMapM bodyPartToBlocks bodyParts
+      return $ [Note blks]
+    Nothing        -> return [Note []]
+
+parPartToInlines :: ParPart -> DocxContext [Inline]
+parPartToInlines (PlainRun r) = runToInlines r
+parPartToInlines (Insertion _ author date runs) = do
+  opts <- asks docxOptions
   case readerTrackChanges opts of
-    AcceptChanges -> concatMap (runToInlines opts docx) runs
-    RejectChanges -> []
-    AllChanges    ->
-      [Span
-       ("", ["insertion"], [("author", author), ("date", date)])
-       (concatMap (runToInlines opts docx) runs)]
-parPartToInlines opts docx (Deletion _ author date runs) =
+    AcceptChanges -> concatMapM runToInlines runs >>= return
+    RejectChanges -> return []
+    AllChanges    -> do
+      ils <- (concatMapM runToInlines runs)
+      return [Span
+              ("", ["insertion"], [("author", author), ("date", date)])
+              ils]
+parPartToInlines (Deletion _ author date runs) = do
+  opts <- asks docxOptions
   case readerTrackChanges opts of
-    AcceptChanges -> []
-    RejectChanges -> concatMap (runToInlines opts docx) runs
-    AllChanges    ->
-      [Span
-       ("", ["deletion"], [("author", author), ("date", date)])
-       (concatMap (runToInlines opts docx) runs)]
-parPartToInlines _ _ (BookMark _ anchor) | anchor `elem` dummyAnchors = []
-parPartToInlines _ _ (BookMark _ anchor) = [Span (anchor, ["anchor"], []) []]
-parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) =
-  case lookupRelationship relid rels of
+    AcceptChanges -> return []
+    RejectChanges -> concatMapM runToInlines runs >>= return
+    AllChanges    -> do
+      ils <- concatMapM runToInlines runs
+      return [Span
+              ("", ["deletion"], [("author", author), ("date", date)])
+              ils]
+parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors = return []
+parPartToInlines (BookMark _ anchor) = return [Span (anchor, ["anchor"], []) []]
+parPartToInlines (Drawing relid) = do
+  (Docx _ _ _ rels _) <- asks docxDocument
+  return $ case lookupRelationship relid rels of
     Just target -> [Image [] (combine "word" target, "")]
     Nothing     -> [Image [] ("", "")]
-parPartToInlines opts docx (InternalHyperLink anchor runs) =
-  [Link (concatMap (runToInlines opts docx) runs) ('#' : anchor, "")]
-parPartToInlines opts docx@(Docx _ _ _ rels _) (ExternalHyperLink relid runs) =
-  case lookupRelationship relid rels of
+parPartToInlines (InternalHyperLink anchor runs) = do
+  ils <- concatMapM runToInlines runs
+  return [Link ils ('#' : anchor, "")]
+parPartToInlines (ExternalHyperLink relid runs) = do
+  (Docx _ _ _ rels _) <- asks docxDocument
+  rs <- concatMapM runToInlines runs
+  return $ case lookupRelationship relid rels of
     Just target ->
-      [Link (concatMap (runToInlines opts docx) runs) (target, "")]
+      [Link rs (target, "")]
     Nothing ->
-      [Link (concatMap (runToInlines opts docx) runs) ("", "")]
+      [Link rs ("", "")]
 
 isAnchorSpan :: Inline -> Bool
 isAnchorSpan (Span (ident, classes, kvs) ils) =
@@ -287,25 +315,18 @@ makeHeaderAnchors h@(Header n (_, classes, kvs) ils) =
     _ -> h
 makeHeaderAnchors blk = blk
 
-parPartsToInlines :: ReaderOptions -> Docx -> [ParPart] -> [Inline]
-parPartsToInlines opts docx parparts =
-  --
-  -- We're going to skip data-uri's for now. It should be an option,
-  -- not mandatory.
-  --
-  (if False -- TODO depend on option
-      then walk (makeImagesSelfContained docx)
-      else id) $
-  -- bottomUp spanTrim $
-  -- bottomUp spanCorrect $
-  -- bottomUp spanReduce $
-  reduceList $ concatMap (parPartToInlines opts docx) parparts
-
-cellToBlocks :: ReaderOptions -> Docx -> Cell -> [Block]
-cellToBlocks opts docx (Cell bps) = concatMap (bodyPartToBlocks opts docx) bps
-
-rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]]
-rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells
+parPartsToInlines :: [ParPart] -> DocxContext [Inline]
+parPartsToInlines parparts = do
+  ils <- concatMapM parPartToInlines parparts >>=
+         --  TODO: Option for self-containted images
+         (if False then (walkM makeImagesSelfContained) else return)
+  return $ reduceList $ ils
+
+cellToBlocks :: Cell -> DocxContext [Block]
+cellToBlocks (Cell bps) = concatMapM bodyPartToBlocks bps
+
+rowToBlocksList :: Row -> DocxContext [[Block]]
+rowToBlocksList (Row cells) = mapM cellToBlocks cells
 
 blockCodeContainer :: Container Block -> Bool
 blockCodeContainer (Container f) = case f [] of
@@ -313,27 +334,32 @@ blockCodeContainer (Container f) = case f [] of
   _             -> False
 blockCodeContainer _ = False
 
-bodyPartToBlocks :: ReaderOptions -> Docx -> BodyPart -> [Block]
-bodyPartToBlocks _ _ (Paragraph pPr parparts)
+bodyPartToBlocks :: BodyPart -> DocxContext [Block]
+bodyPartToBlocks (Paragraph pPr parparts)
   | any blockCodeContainer (parStyleToContainers pPr) =
     let
       otherConts = filter (not . blockCodeContainer) (parStyleToContainers pPr)
     in
+     return $ 
      rebuild
      otherConts
      [CodeBlock ("", [], []) (concatMap parPartToString parparts)]
-bodyPartToBlocks opts docx (Paragraph pPr parparts) =
-  case parPartsToInlines opts docx parparts of
-    [] ->
-      []
-    _ ->
-      let parContents = parPartsToInlines opts docx parparts
-          trimmedContents = reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) parContents
-      in
+bodyPartToBlocks (Paragraph pPr parparts) = do
+  ils <- parPartsToInlines parparts
+  case ils of
+    [] -> return []
+    _ -> do
+      parContents <- parPartsToInlines parparts
+      let trimmedContents = reverse $
+                            dropWhile (Space ==) $
+                            reverse $
+                            dropWhile (Space ==) parContents
+      return $
        rebuild
        (parStyleToContainers pPr)
        [Para trimmedContents]
-bodyPartToBlocks opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) =
+bodyPartToBlocks (ListItem pPr numId lvl parparts) = do
+  (Docx _ _ numbering _ _) <- asks docxDocument
   let
     kvs = case lookupLevel numId lvl numbering of
       Just (_, fmt, txt, Just start) -> [ ("level", lvl)
@@ -349,23 +375,22 @@ bodyPartToBlocks opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parp
                                         , ("text", txt)
                                         ]
       Nothing                        -> []
-  in
-   [Div
-    ("", ["list-item"], kvs)
-    (bodyPartToBlocks opts docx (Paragraph pPr parparts))]
-bodyPartToBlocks _ _ (Tbl _ _ _ []) =
-  [Para []]
-bodyPartToBlocks opts docx (Tbl cap _ look (r:rs)) =
+  blks <- bodyPartToBlocks (Paragraph pPr parparts)
+  return $ [Div ("", ["list-item"], kvs) blks]
+bodyPartToBlocks (Tbl _ _ _ []) =
+  return [Para []]
+bodyPartToBlocks (Tbl cap _ look (r:rs)) = do
   let caption = strToInlines cap
       (hdr, rows) = case firstRowFormatting look of
         True -> (Just r, rs)
         False -> (Nothing, r:rs)
-      hdrCells = case hdr of
-        Just r' -> rowToBlocksList opts docx r'
-        Nothing -> []
-      cells = map (rowToBlocksList opts docx) rows
+  hdrCells <- case hdr of
+    Just r' -> rowToBlocksList r'
+    Nothing -> return []
+        
+  cells <- mapM rowToBlocksList rows
 
-      size = case null hdrCells of
+  let size = case null hdrCells of
         True -> length $ head cells
         False -> length $ hdrCells
       --
@@ -374,34 +399,42 @@ bodyPartToBlocks opts docx (Tbl cap _ look (r:rs)) =
       -- moment. Width information is in the TblGrid field of the Tbl,
       -- so should be possible. Alignment might be more difficult,
       -- since there doesn't seem to be a column entity in docx.
-      alignments = take size (repeat AlignDefault)
-      widths = take size (repeat 0) :: [Double]
-  in
-   [Table caption alignments widths hdrCells cells]
-
-
-makeImagesSelfContained :: Docx -> Inline -> Inline
-makeImagesSelfContained (Docx _ _ _ _ media) i@(Image alt (uri, title)) =
-  case lookup uri media of
-    Just bs -> case getMimeType uri of
-      Just mime ->  let data_uri =
-                          "data:" ++ mime ++ ";base64," ++ toString (encode $ BS.concat $ B.toChunks bs)
-                    in
-                     Image alt (data_uri, title)
-      Nothing  -> i
+      alignments = replicate size AlignDefault
+      widths = replicate size 0 :: [Double]
+
+  return [Table caption alignments widths hdrCells cells]
+
+
+makeImagesSelfContained :: Inline -> DocxContext Inline
+makeImagesSelfContained i@(Image alt (uri, title)) = do
+  (Docx _ _ _ _ media) <- asks docxDocument
+  return $ case lookup uri media of
+    Just bs ->
+      case getMimeType uri of
+        Just mime ->
+          let data_uri = "data:" ++ mime ++ ";base64," ++
+                         toString (encode $ BS.concat $ B.toChunks bs)
+          in
+           Image alt (data_uri, title)
+        Nothing  -> i
     Nothing -> i
-makeImagesSelfContained _ inline = inline
+makeImagesSelfContained inline = return inline
 
-bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block]
-bodyToBlocks opts docx (Body bps) =
-  map (makeHeaderAnchors) $
-  blocksToDefinitions $
-  blocksToBullets $
-  concatMap (bodyPartToBlocks opts docx) bps
+bodyToBlocks :: Body -> DocxContext [Block]
+bodyToBlocks (Body bps) = do
+  blks <- concatMapM bodyPartToBlocks bps
+  return $
+    map (makeHeaderAnchors) $
+    blocksToDefinitions $
+    blocksToBullets $ blks
 
 docxToBlocks :: ReaderOptions -> Docx -> [Block]
-docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body
-
+docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) =
+  let dState = DState { docxHdrLinks = M.empty }
+      dEnv   = DEnv { docxOptions  = opts
+                    , docxDocument = d}
+  in
+   evalDocxContext (bodyToBlocks body) dEnv dState
 
 ilToCode :: Inline -> String
 ilToCode (Str s) = s
-- 
cgit v1.2.3


From db187348cd8bb17ce66d2d4c1db6a5ff46a1ffbc Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Thu, 26 Jun 2014 23:10:11 -0400
Subject: Docx rdr: Avoid mapping makeHeaderAnchors globally

It only applies to headers, so we can just apply it when we make a
header.
---
 src/Text/Pandoc/Readers/Docx.hs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 5773027f2..42352a845 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -152,6 +152,7 @@ divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) =
   let n = fromJust (isHeaderClass c)
   in
    [(Container $ \blks ->
+      makeHeaderAnchor $
       Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))]
 divAttrToContainers (c:cs) kvs | c `elem` divsToKeep =
   (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs)
@@ -304,8 +305,8 @@ isAnchorSpan _ = False
 dummyAnchors :: [String]
 dummyAnchors = ["_GoBack"]
 
-makeHeaderAnchors :: Block -> Block
-makeHeaderAnchors h@(Header n (_, classes, kvs) ils) =
+makeHeaderAnchor :: Block -> Block
+makeHeaderAnchor h@(Header n (_, classes, kvs) ils) =
   case filter isAnchorSpan ils of
     []   -> h
     (x@(Span (ident, _, _) _) : xs) ->
@@ -313,7 +314,7 @@ makeHeaderAnchors h@(Header n (_, classes, kvs) ils) =
         True -> h
         False -> Header n (ident, classes, kvs) (ils \\ (x:xs))
     _ -> h
-makeHeaderAnchors blk = blk
+makeHeaderAnchor blk = blk
 
 parPartsToInlines :: [ParPart] -> DocxContext [Inline]
 parPartsToInlines parparts = do
@@ -424,7 +425,6 @@ bodyToBlocks :: Body -> DocxContext [Block]
 bodyToBlocks (Body bps) = do
   blks <- concatMapM bodyPartToBlocks bps
   return $
-    map (makeHeaderAnchors) $
     blocksToDefinitions $
     blocksToBullets $ blks
 
-- 
cgit v1.2.3


From ab76bbebbe7afd3acdf3218b88f02482c885cc87 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Fri, 27 Jun 2014 11:35:50 -0400
Subject: Docx Reader: Clean up guards

Use PatternGuards to get rid of need for `isJust`, `fromJust`
altogether.
---
 src/Text/Pandoc/Readers/Docx.hs | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 42352a845..0c52b1acb 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -1,3 +1,5 @@
+{-# LANGUAGE PatternGuards #-}
+
 {-
 Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
 
@@ -82,7 +84,7 @@ import Text.Pandoc.Walk
 import Text.Pandoc.Readers.Docx.Parse
 import Text.Pandoc.Readers.Docx.Lists
 import Text.Pandoc.Readers.Docx.Reducible
-import Data.Maybe (mapMaybe, isJust, fromJust)
+import Data.Maybe (mapMaybe)
 import Data.List (delete, isPrefixOf, (\\))
 import qualified Data.ByteString as BS
 import qualified Data.ByteString.Lazy as B
@@ -148,12 +150,10 @@ runStyleToContainers rPr =
 
 
 divAttrToContainers :: [String] -> [(String, String)] -> [Container Block]
-divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) =
-  let n = fromJust (isHeaderClass c)
-  in
-   [(Container $ \blks ->
-      makeHeaderAnchor $
-      Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))]
+divAttrToContainers (c:cs) _ | Just n <- isHeaderClass c =
+  [(Container $ \blks ->
+     makeHeaderAnchor $
+     Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))]
 divAttrToContainers (c:cs) kvs | c `elem` divsToKeep =
   (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs)
 divAttrToContainers (c:cs) kvs | c `elem` codeDivs =
@@ -167,10 +167,10 @@ divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs =
 divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs =
   (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs)
 divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs
-divAttrToContainers [] kvs | isJust (lookup "indent" kvs) =
+divAttrToContainers [] kvs | Just numString <- lookup "indent" kvs =
   let kvs' = filter (\(k,_) -> k /= "indent") kvs
   in
-   case fromJust (lookup "indent" kvs) of
+   case numString of
      "0"       -> divAttrToContainers [] kvs'
      ('-' : _) -> divAttrToContainers [] kvs'
      _         -> (Container BlockQuote) : divAttrToContainers [] kvs'
-- 
cgit v1.2.3


From 1de8d4d08788ef24f69f9f90266604854996080e Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Fri, 27 Jun 2014 11:45:06 -0400
Subject: Docx Reader: Simplify makeHeaderAnchors

Using pattern guard, in preparation for doing some more complicated
stuff with it (recording header anchors, so we can change them to auto
ids.)
---
 src/Text/Pandoc/Readers/Docx.hs | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 0c52b1acb..9aaf1d340 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -306,14 +306,11 @@ dummyAnchors :: [String]
 dummyAnchors = ["_GoBack"]
 
 makeHeaderAnchor :: Block -> Block
-makeHeaderAnchor h@(Header n (_, classes, kvs) ils) =
-  case filter isAnchorSpan ils of
-    []   -> h
-    (x@(Span (ident, _, _) _) : xs) ->
-      case ident `elem` dummyAnchors of
-        True -> h
-        False -> Header n (ident, classes, kvs) (ils \\ (x:xs))
-    _ -> h
+makeHeaderAnchor (Header n (_, classes, kvs) ils)
+  | (x : xs) <- filter isAnchorSpan ils
+  , (Span (ident, _, _) _) <- x
+  , notElem ident dummyAnchors =
+   Header n (ident, classes, kvs) (ils \\ (x:xs))
 makeHeaderAnchor blk = blk
 
 parPartsToInlines :: [ParPart] -> DocxContext [Inline]
-- 
cgit v1.2.3


From 5969baf5b97c0926384b1619be3c4be6d92b277b Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Sat, 28 Jun 2014 02:47:40 -0400
Subject: Rewrote header generation.

In preparation for auto ids.
---
 src/Text/Pandoc/Readers/Docx.hs | 50 ++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 9aaf1d340..bbe770f6e 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -84,6 +84,7 @@ import Text.Pandoc.Walk
 import Text.Pandoc.Readers.Docx.Parse
 import Text.Pandoc.Readers.Docx.Lists
 import Text.Pandoc.Readers.Docx.Reducible
+import Text.Pandoc.Shared
 import Data.Maybe (mapMaybe)
 import Data.List (delete, isPrefixOf, (\\))
 import qualified Data.ByteString as BS
@@ -151,9 +152,8 @@ runStyleToContainers rPr =
 
 divAttrToContainers :: [String] -> [(String, String)] -> [Container Block]
 divAttrToContainers (c:cs) _ | Just n <- isHeaderClass c =
-  [(Container $ \blks ->
-     makeHeaderAnchor $
-     Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))]
+  [Container $ \_ ->
+     Header n ("", delete ("Heading" ++ show n) cs, []) []]
 divAttrToContainers (c:cs) kvs | c `elem` divsToKeep =
   (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs)
 divAttrToContainers (c:cs) kvs | c `elem` codeDivs =
@@ -305,13 +305,14 @@ isAnchorSpan _ = False
 dummyAnchors :: [String]
 dummyAnchors = ["_GoBack"]
 
-makeHeaderAnchor :: Block -> Block
+makeHeaderAnchor :: Block -> DocxContext Block
 makeHeaderAnchor (Header n (_, classes, kvs) ils)
   | (x : xs) <- filter isAnchorSpan ils
   , (Span (ident, _, _) _) <- x
   , notElem ident dummyAnchors =
-   Header n (ident, classes, kvs) (ils \\ (x:xs))
-makeHeaderAnchor blk = blk
+   return $ Header n (ident, classes, kvs) (ils \\ (x:xs))
+makeHeaderAnchor blk = return blk
+
 
 parPartsToInlines :: [ParPart] -> DocxContext [Inline]
 parPartsToInlines parparts = do
@@ -326,36 +327,40 @@ cellToBlocks (Cell bps) = concatMapM bodyPartToBlocks bps
 rowToBlocksList :: Row -> DocxContext [[Block]]
 rowToBlocksList (Row cells) = mapM cellToBlocks cells
 
-blockCodeContainer :: Container Block -> Bool
-blockCodeContainer (Container f) = case f [] of
-  CodeBlock _ _ -> True
-  _             -> False
-blockCodeContainer _ = False
+isBlockCodeContainer :: Container Block -> Bool
+isBlockCodeContainer (Container f) | CodeBlock _ _ <- f [] = True
+isBlockCodeContainer _ = False
+
+isHeaderContainer :: Container Block -> Bool
+isHeaderContainer (Container f) | Header _ _ _ <- f [] = True
+isHeaderContainer _ = False
 
 bodyPartToBlocks :: BodyPart -> DocxContext [Block]
 bodyPartToBlocks (Paragraph pPr parparts)
-  | any blockCodeContainer (parStyleToContainers pPr) =
+  | any isBlockCodeContainer (parStyleToContainers pPr) =
     let
-      otherConts = filter (not . blockCodeContainer) (parStyleToContainers pPr)
+      otherConts = filter (not . isBlockCodeContainer) (parStyleToContainers pPr)
     in
      return $ 
      rebuild
      otherConts
      [CodeBlock ("", [], []) (concatMap parPartToString parparts)]
+bodyPartToBlocks (Paragraph pPr parparts)
+  | any isHeaderContainer (parStyleToContainers pPr) = do
+    ils <- parPartsToInlines parparts >>= (return . normalizeSpaces)
+    let (Container hdrFun) = head $ filter isHeaderContainer (parStyleToContainers pPr)
+        Header n attr _ = hdrFun []
+    hdr <- makeHeaderAnchor $ Header n attr ils
+    return [hdr]
 bodyPartToBlocks (Paragraph pPr parparts) = do
-  ils <- parPartsToInlines parparts
+  ils <- parPartsToInlines parparts >>= (return . normalizeSpaces)
   case ils of
     [] -> return []
     _ -> do
-      parContents <- parPartsToInlines parparts
-      let trimmedContents = reverse $
-                            dropWhile (Space ==) $
-                            reverse $
-                            dropWhile (Space ==) parContents
       return $
        rebuild
        (parStyleToContainers pPr)
-       [Para trimmedContents]
+       [Para ils]
 bodyPartToBlocks (ListItem pPr numId lvl parparts) = do
   (Docx _ _ numbering _ _) <- asks docxDocument
   let
@@ -446,8 +451,3 @@ isHeaderClass s | "Heading" `isPrefixOf` s =
     ((n, "") : []) -> Just n
     _       -> Nothing
 isHeaderClass _ = Nothing
-
-blksToInlines :: [Block] -> [Inline]
-blksToInlines (Para ils : _) = ils
-blksToInlines (Plain ils : _) = ils
-blksToInlines _ = []
-- 
cgit v1.2.3


From b89a3ba2b1069205a308ad0f444457d595e5a77f Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Sat, 28 Jun 2014 03:04:34 -0400
Subject: make makeHeaderAnchors make an auto id

Record relationship between original id and auto id, so we can fix links
after.
---
 src/Text/Pandoc/Readers/Docx.hs | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index bbe770f6e..a3053b72a 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -104,7 +104,7 @@ readDocx opts bytes =
     Nothing   -> error $ "couldn't parse docx file"
 
 
-data DState = DState { docxHdrLinks :: M.Map String String }
+data DState = DState { docxHeaderAnchors :: M.Map String String }
 
 data DEnv = DEnv { docxOptions  :: ReaderOptions
                  , docxDocument :: Docx}
@@ -310,7 +310,11 @@ makeHeaderAnchor (Header n (_, classes, kvs) ils)
   | (x : xs) <- filter isAnchorSpan ils
   , (Span (ident, _, _) _) <- x
   , notElem ident dummyAnchors =
-   return $ Header n (ident, classes, kvs) (ils \\ (x:xs))
+    do
+      hdrIDMap <- gets docxHeaderAnchors
+      let newIdent = uniqueIdent ils (M.elems hdrIDMap)
+      put DState{docxHeaderAnchors = M.insert ident newIdent hdrIDMap}
+      return $ Header n (newIdent, classes, kvs) (ils \\ (x:xs))
 makeHeaderAnchor blk = return blk
 
 
@@ -432,7 +436,7 @@ bodyToBlocks (Body bps) = do
 
 docxToBlocks :: ReaderOptions -> Docx -> [Block]
 docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) =
-  let dState = DState { docxHdrLinks = M.empty }
+  let dState = DState { docxHeaderAnchors = M.empty }
       dEnv   = DEnv { docxOptions  = opts
                     , docxDocument = d}
   in
-- 
cgit v1.2.3


From dce360e1e6fee089e849c07785d8e21961fefb9b Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Sat, 28 Jun 2014 03:54:58 -0400
Subject: Docx Reader: Introduce link rewriting.

---
 src/Text/Pandoc/Readers/Docx.hs | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index a3053b72a..0607aac7f 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -104,7 +104,7 @@ readDocx opts bytes =
     Nothing   -> error $ "couldn't parse docx file"
 
 
-data DState = DState { docxHeaderAnchors :: M.Map String String }
+data DState = DState { docxAnchorMap :: M.Map String String }
 
 data DEnv = DEnv { docxOptions  :: ReaderOptions
                  , docxDocument :: Docx}
@@ -276,7 +276,22 @@ parPartToInlines (Deletion _ author date runs) = do
               ("", ["deletion"], [("author", author), ("date", date)])
               ils]
 parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors = return []
-parPartToInlines (BookMark _ anchor) = return [Span (anchor, ["anchor"], []) []]
+parPartToInlines (BookMark _ anchor) =
+  -- We record these, so we can make sure not to overwrite
+  -- user-defined anchor links with header auto ids.
+  do
+    -- Get the anchor map.
+    anchorMap <- gets docxAnchorMap
+    -- Check to see if the id is already in there. Rewrite if
+    -- necessary. This will have the possible effect of rewriting
+    -- user-defined anchor links. However, since these are not defined
+    -- in pandoc, it seems like a necessary evil to avoid an extra
+    -- pass.
+    let newAnchor = case anchor `elem` (M.elems anchorMap) of
+          True -> uniqueIdent [Str anchor] (M.elems anchorMap)
+          False -> anchor
+    put DState{ docxAnchorMap = M.insert anchor newAnchor anchorMap}
+    return [Span (anchor, ["anchor"], []) []]
 parPartToInlines (Drawing relid) = do
   (Docx _ _ _ rels _) <- asks docxDocument
   return $ case lookupRelationship relid rels of
@@ -311,9 +326,9 @@ makeHeaderAnchor (Header n (_, classes, kvs) ils)
   , (Span (ident, _, _) _) <- x
   , notElem ident dummyAnchors =
     do
-      hdrIDMap <- gets docxHeaderAnchors
+      hdrIDMap <- gets docxAnchorMap
       let newIdent = uniqueIdent ils (M.elems hdrIDMap)
-      put DState{docxHeaderAnchors = M.insert ident newIdent hdrIDMap}
+      put DState{docxAnchorMap = M.insert ident newIdent hdrIDMap}
       return $ Header n (newIdent, classes, kvs) (ils \\ (x:xs))
 makeHeaderAnchor blk = return blk
 
@@ -411,6 +426,14 @@ bodyPartToBlocks (Tbl cap _ look (r:rs)) = do
 
   return [Table caption alignments widths hdrCells cells]
 
+-- replace targets with generated anchors.
+rewriteLink :: Inline -> DocxContext Inline
+rewriteLink l@(Link ils ('#':target, title)) = do
+  anchorMap <- gets docxAnchorMap
+  return $ case M.lookup target anchorMap of
+    Just newTarget -> (Link ils ('#':newTarget, title))
+    Nothing        -> l
+rewriteLink il = return il
 
 makeImagesSelfContained :: Inline -> DocxContext Inline
 makeImagesSelfContained i@(Image alt (uri, title)) = do
@@ -429,14 +452,15 @@ makeImagesSelfContained inline = return inline
 
 bodyToBlocks :: Body -> DocxContext [Block]
 bodyToBlocks (Body bps) = do
-  blks <- concatMapM bodyPartToBlocks bps
+  blks <- concatMapM bodyPartToBlocks bps >>=
+          walkM rewriteLink
   return $
     blocksToDefinitions $
     blocksToBullets $ blks
 
 docxToBlocks :: ReaderOptions -> Docx -> [Block]
 docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) =
-  let dState = DState { docxHeaderAnchors = M.empty }
+  let dState = DState { docxAnchorMap = M.empty }
       dEnv   = DEnv { docxOptions  = opts
                     , docxDocument = d}
   in
@@ -447,7 +471,6 @@ ilToCode (Str s) = s
 ilToCode Space = " "
 ilToCode _     = ""
 
-
 isHeaderClass :: String -> Maybe Int
 isHeaderClass s | "Heading" `isPrefixOf` s =
   case reads (drop (length "Heading") s) :: [(Int, String)] of
-- 
cgit v1.2.3


From c0a8d5ac7213ac01b5f12dd7dfca66e6d8301f5f Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Sat, 28 Jun 2014 08:40:59 -0400
Subject: Docx Reader: All headers get auto id.

Previously, only those with an anchor got an auto id. Now, all do, which
puts it in line with pandoc's markdown extension.
---
 src/Text/Pandoc/Readers/Docx.hs | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 0607aac7f..71baa5dde 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -103,7 +103,6 @@ readDocx opts bytes =
     Just docx -> Pandoc nullMeta (docxToBlocks opts docx)
     Nothing   -> error $ "couldn't parse docx file"
 
-
 data DState = DState { docxAnchorMap :: M.Map String String }
 
 data DEnv = DEnv { docxOptions  :: ReaderOptions
@@ -321,6 +320,8 @@ dummyAnchors :: [String]
 dummyAnchors = ["_GoBack"]
 
 makeHeaderAnchor :: Block -> DocxContext Block
+-- If there is an anchor already there (an anchor span in the header,
+-- to be exact), we rename and associate the new id with the old one.
 makeHeaderAnchor (Header n (_, classes, kvs) ils)
   | (x : xs) <- filter isAnchorSpan ils
   , (Span (ident, _, _) _) <- x
@@ -330,6 +331,14 @@ makeHeaderAnchor (Header n (_, classes, kvs) ils)
       let newIdent = uniqueIdent ils (M.elems hdrIDMap)
       put DState{docxAnchorMap = M.insert ident newIdent hdrIDMap}
       return $ Header n (newIdent, classes, kvs) (ils \\ (x:xs))
+-- Otherwise we just give it a name, and register that name (associate
+-- it with itself.)
+makeHeaderAnchor (Header n (_, classes, kvs) ils) =
+  do
+    hdrIDMap <- gets docxAnchorMap
+    let newIdent = uniqueIdent ils (M.elems hdrIDMap)
+    put DState{docxAnchorMap = M.insert newIdent newIdent hdrIDMap}
+    return $ Header n (newIdent, classes, kvs) ils
 makeHeaderAnchor blk = return blk
 
 
-- 
cgit v1.2.3