Fix line lengths in Markdown reader, DocBook writer

Trying to limit lines to 80 chars.
author: Albert Krewinkel <albert@zeitkraut.de> 2023-01-18 14:46:10 +0100
committer: Albert Krewinkel <albert@zeitkraut.de> 2023-01-18 16:02:57 +0100
commit: 39830e1f61cd2855569bd7def771c71eb8dfa8b5 (patch)
tree: 3c6705fa4598afce33f5639c05dbf34ec229ad56
parent: 8a9116e159c1c41e1f21c2acbc175615b047f960 (diff)
2 files changed, 62 insertions, 38 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 4369c0b2a..e0c0922cb 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -352,7 +352,8 @@ referenceKey = try $ do
   let sourceURL = fmap T.unwords $ many $ try $ do
                     skipMany spaceChar
                     notFollowedBy' referenceTitle
-                    notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes
+                    notFollowedBy' $ guardEnabled Ext_link_attributes >>
+                                     attributes
                     notFollowedBy' $ guardEnabled Ext_mmd_link_attributes >>
                                      try (spnl <* keyValAttr)
                     notFollowedBy' (() <$ reference)
@@ -397,7 +398,7 @@ quotedTitle c = try $ do
   notFollowedBy spaces
   let pEnder = try $ char c >> notFollowedBy (satisfy isAlphaNum)
   let regChunk = many1Char (noneOf ['\\','\n','&',c]) <|> litChar
-  let nestedChunk = (\x -> T.singleton c <> x <> T.singleton c) <$> quotedTitle c
+  let nestedChunk = (\x -> (c `T.cons` x) `T.snoc` c) <$> quotedTitle c
   T.unwords . T.words . T.concat <$> manyTill (nestedChunk <|> regChunk) pEnder
 
 -- | PHP Markdown Extra style abbreviation key.  Currently
@@ -691,13 +692,17 @@ codeBlockFenced = try $ do
   rawattr <-
      (Left <$> (guardEnabled Ext_raw_attribute >> try rawAttribute))
     <|>
-     (Right <$> (do
-           languageId <- option Nothing (Just . toLanguageId <$> try (many1Char $ satisfy (\x -> x `notElem` ['`', '{', '}'] && not (isSpace x))))
-           skipMany spaceChar
-           maybeAttr <- option Nothing (Just <$> (guardEnabled Ext_fenced_code_attributes >> try attributes))
-           return $ case maybeAttr of
-              Nothing -> ("", maybeToList languageId, [])
-              Just (elementId, classes, attrs) -> (elementId, maybe classes (: classes) languageId, attrs)))
+     (Right <$> do
+         let pLangId = many1Char . satisfy $ \x ->
+               x `notElem` ['`', '{', '}'] && not (isSpace x)
+         mbLanguageId <- optionMaybe (toLanguageId <$> pLangId)
+         skipMany spaceChar
+         mbAttr <- optionMaybe
+                   (guardEnabled Ext_fenced_code_attributes *> try attributes)
+         return $ case mbAttr of
+           Nothing -> ("", maybeToList mbLanguageId, [])
+           Just (elementId, classes, attrs) ->
+             (elementId, (maybe id (:) mbLanguageId) classes, attrs))
   blankline
   contents <- T.intercalate "\n" <$>
                  manyTill (gobbleAtMostSpaces indentLevel >> anyLine)
@@ -1054,19 +1059,22 @@ para = try $ do
     $ try $ do
             newline
             (mempty <$ blanklines)
-              <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
-              <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
-              <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
-              <|> (guardEnabled Ext_lists_without_preceding_blankline >>
-                       -- Avoid creating a paragraph in a nested list.
-                       notFollowedBy' inList >>
-                       () <$ lookAhead listStart)
+              <|> (guardDisabled Ext_blank_before_blockquote
+                   <* lookAhead blockQuote)
+              <|> (guardEnabled Ext_backtick_code_blocks
+                   <* lookAhead codeBlockFenced)
+              <|> (guardDisabled Ext_blank_before_header
+                   <* lookAhead header)
+              <|> (guardEnabled Ext_lists_without_preceding_blankline
+                    -- Avoid creating a paragraph in a nested list.
+                    <* notFollowedBy' inList
+                    <* lookAhead listStart)
               <|> do guardEnabled Ext_native_divs
                      inHtmlBlock <- stateInHtmlBlock <$> getState
                      case inHtmlBlock of
-                          Just "div" -> () <$
-                                       lookAhead (htmlTag (~== TagClose ("div" :: Text)))
-                          _          -> mzero
+                       Just "div" -> () <$
+                         lookAhead (htmlTag (~== TagClose ("div" :: Text)))
+                       _          -> mzero
               <|> do guardEnabled Ext_fenced_divs
                      divLevel <- stateFencedDivLevel <$> getState
                      if divLevel > 0
@@ -1364,8 +1372,8 @@ multilineTableHeader headless = try $ do
                       []     -> []
                       (x:xs) -> reverse (x+1:xs)
   rawHeadsList <- if headless
-                     then fmap (map (:[]) . tail .
-                              splitTextByIndices (init indices')) $ lookAhead anyLine
+                     then map (:[]) . tail . splitTextByIndices (init indices')
+                          <$> lookAhead anyLine
                      else return $ transpose $ map
                            (tail . splitTextByIndices (init indices'))
                            rawContent
@@ -1410,13 +1418,15 @@ pipeTable = try $ do
   let lineWidths = map (sum . map realLength) (heads' : lines'')
   columns <- getOption readerColumns
   -- add numcols + 1 for the pipes themselves
-  let widths = if maximumBounded (sum seplengths : lineWidths) + (numcols + 1) > columns
+  let widths = if maximumBounded (sum seplengths : lineWidths) + (numcols + 1)
+                  > columns
                   then map (\len ->
                          fromIntegral len / fromIntegral (sum seplengths))
                          seplengths
                   else replicate (length aligns) 0.0
   (headCells :: F [Blocks]) <- sequence <$> mapM cellContents heads'
-  (rows :: F [[Blocks]]) <- sequence <$> mapM (fmap sequence . mapM cellContents) lines''
+  (rows :: F [[Blocks]]) <- sequence <$>
+                            mapM (fmap sequence . mapM cellContents) lines''
   return $
     toTableComponents' NormalizeHeader aligns widths <$> headCells <*> rows
 
@@ -1432,7 +1442,8 @@ pipeTableRow = try $ do
   skipMany spaceChar
   openPipe <- (True <$ char '|') <|> return False
   -- split into cells
-  let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline')
+  let chunk = void (code <|> math <|> rawHtmlInline <|>
+                    escapedChar <|> rawLaTeXInline')
        <|> void (noneOf "|\n\r")
   cells <- (snd <$> withRaw (many chunk)) `sepBy1` char '|'
   closePipe <- (True <$ char '|') <|> return False
@@ -1700,7 +1711,8 @@ inlinesBetween :: PandocMonad m
                -> MarkdownParser m (F Inlines)
 inlinesBetween start end =
   trimInlinesF . mconcat <$> try (start >> many1Till inner end)
-    where inner      = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
+    where inner      = innerSpace <|>
+                       (notFollowedBy' (() <$ whitespace) >> inline)
           innerSpace = try $ whitespace <* notFollowedBy' end
 
 strikeout :: PandocMonad m => MarkdownParser m (F Inlines)
@@ -1745,7 +1757,8 @@ subscript = do
 
 whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
 whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
-  where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
+  where lb = spaceChar >> skipMany spaceChar
+                       >> option B.space (endline >> return B.linebreak)
         regsp = skipMany spaceChar >> return B.space
 
 nonEndline :: PandocMonad m => ParsecT Sources st m Char
@@ -2085,7 +2098,8 @@ divHtml = do
     updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
     bls <- option "" (blankline >> option "" blanklines)
     contents <- mconcat <$>
-                many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block)
+                many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text)))
+                      >> block)
     closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text)))
     if closed
        then do
diff --git a/src/Text/Pandoc/Writers/DocBook.hs b/src/Text/Pandoc/Writers/DocBook.hs
index 90bee50e1..787401152 100644
--- a/src/Text/Pandoc/Writers/DocBook.hs
+++ b/src/Text/Pandoc/Writers/DocBook.hs
@@ -15,7 +15,8 @@ Conversion of 'Pandoc' documents to DocBook XML.
 module Text.Pandoc.Writers.DocBook ( writeDocBook4, writeDocBook5 ) where
 import Control.Monad.Reader
 import Data.Generics (everywhere, mkT)
-import Data.Maybe (isNothing, maybeToList)
+import Data.List (nub, partition)
+import Data.Maybe (isNothing)
 import Data.Monoid (All (..), Any (..))
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -133,13 +134,15 @@ plainToPara x         = x
 -- | Convert a list of pairs of terms and definitions into a list of
 -- DocBook varlistentrys.
 deflistItemsToDocBook :: PandocMonad m
-                      => WriterOptions -> [([Inline],[[Block]])] -> DB m (Doc Text)
+                      => WriterOptions -> [([Inline],[[Block]])]
+                      -> DB m (Doc Text)
 deflistItemsToDocBook opts items =
   vcat <$> mapM (uncurry (deflistItemToDocBook opts)) items
 
 -- | Convert a term and a list of blocks into a DocBook varlistentry.
 deflistItemToDocBook :: PandocMonad m
-                     => WriterOptions -> [Inline] -> [[Block]] -> DB m (Doc Text)
+                     => WriterOptions -> [Inline] -> [[Block]]
+                     -> DB m (Doc Text)
 deflistItemToDocBook opts term defs = do
   term' <- inlinesToDocBook opts term
   def' <- blocksToDocBook opts $ concatMap (map plainToPara) defs
@@ -148,11 +151,13 @@ deflistItemToDocBook opts term defs = do
       inTagsIndented "listitem" def'
 
 -- | Convert a list of lists of blocks to a list of DocBook list items.
-listItemsToDocBook :: PandocMonad m => WriterOptions -> [[Block]] -> DB m (Doc Text)
+listItemsToDocBook :: PandocMonad m
+                   => WriterOptions -> [[Block]] -> DB m (Doc Text)
 listItemsToDocBook opts items = vcat <$> mapM (listItemToDocBook opts) items
 
 -- | Convert a list of blocks into a DocBook list item.
-listItemToDocBook :: PandocMonad m => WriterOptions -> [Block] -> DB m (Doc Text)
+listItemToDocBook :: PandocMonad m
+                  => WriterOptions -> [Block] -> DB m (Doc Text)
 listItemToDocBook opts item =
   inTagsIndented "listitem" <$> blocksToDocBook opts (map plainToPara item)
 
@@ -187,7 +192,8 @@ blockToDocBook opts (Div (id',"section":_,_) (Header lvl (_,classes,attrs) ils :
       nsAttr = if version == DocBook5 && lvl == getStartLvl opts && isNothing (writerTemplate opts)
       -- Though, DocBook 4 does not support namespaces and
       -- standalone documents will include them in the template.
-                 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")]
+                 then [("xmlns", "http://docbook.org/ns/docbook")
+                      ,("xmlns:xlink", "http://www.w3.org/1999/xlink")]
                  else []
 
       -- Populate miscAttr with Header.Attr.attributes, filtering out non-valid DocBook section attributes, id, and xml:id
@@ -288,13 +294,15 @@ blockToDocBook _ b@(RawBlock f str)
                      version <- ask
                      if version == DocBook5
                         then return empty -- No html in DocBook5
-                        else return $ literal str -- allow html for backwards compatibility
+                        else return $ literal str -- allow html for backwards
+                                                  -- compatibility
   | otherwise      = do
       report $ BlockNotRendered b
       return empty
 blockToDocBook _ HorizontalRule = return empty -- not semantic
 blockToDocBook opts (Table _ blkCapt specs thead tbody tfoot) = do
-  let (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot
+  let (caption, aligns, widths, headers, rows) =
+        toLegacyTable blkCapt specs thead tbody tfoot
   captionDoc <- if null caption
                    then return empty
                    else inTagsSimple "title" <$>
@@ -452,7 +460,8 @@ inlineToDocBook opts (Link attr txt (src, _))
       (if "#" `T.isPrefixOf` src
             then let tag = if null txt then "xref" else "link"
                  in  inTags False tag $
-                     ("linkend", writerIdentifierPrefix opts <> T.drop 1 src) : idAndRole attr
+                     ("linkend", writerIdentifierPrefix opts <> T.drop 1 src) :
+                     idAndRole attr
             else if version == DocBook5
                     then inTags False "link" $ ("xlink:href", src) : idAndRole attr
                     else inTags False "ulink" $ ("url", src) : idAndRole attr )
@@ -486,9 +495,10 @@ idAndRole (id',cls,_) = ident <> role
 -- Used in blockToDocBook for Header (section) to create or extend
 -- the role attribute with candidate class tokens
 enrichRole :: [(Text, Text)] -> [Text] -> [(Text, Text)]
-enrichRole mattrs cls = [("role",rolevals) | rolevals /= ""]<>(filter (\x -> (fst x) /= "role") mattrs)
+enrichRole mattrs cls = [("role", T.unwords roles) | roles /= []] <> nonRole
   where
-    rolevals = T.unwords((filter (`elem` cand) cls)<>(maybeToList(lookup "role" mattrs)))
+    (roleAttr, nonRole) = partition (\(key, _v) -> key == "role") mattrs
+    roles = nub $ filter (`elem` cand) cls <> map snd roleAttr
     cand = ["unnumbered"]
 
 isSectionAttr :: DocBookVersion -> (Text, Text) -> Bool
author	Albert Krewinkel <albert@zeitkraut.de>	2023-01-18 14:46:10 +0100
committer	Albert Krewinkel <albert@zeitkraut.de>	2023-01-18 16:02:57 +0100
commit	39830e1f61cd2855569bd7def771c71eb8dfa8b5 (patch)
tree	3c6705fa4598afce33f5639c05dbf34ec229ad56
parent	8a9116e159c1c41e1f21c2acbc175615b047f960 (diff)