summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2022-02-28 08:57:18 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2022-02-28 10:19:04 -0800
commitb94ad5b2edbcad262e5cd6029739f96c571da207 (patch)
tree74fb6a91fda7bca9b6b88c58960a8df20765b94a
parentbff49852a5dd552d4319f05bc2c3c07a2c64d104 (diff)
DocBook reader: improve info parsing.
Simplify metadata parsing code. Handle abstract as block-level content. Report skipped info elements with `--verbose`. See #7747.
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs114
-rw-r--r--test/docbook-reader.native3
2 files changed, 82 insertions, 35 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 4594c934e..7a12ad154 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TemplateHaskell #-}
{- |
Module : Text.Pandoc.Readers.DocBook
@@ -21,6 +22,7 @@ import Data.Either (rights)
import Data.Foldable (asum)
import Data.Generics
import Data.List (intersperse,elemIndex)
+import qualified Data.Set as Set
import Data.List.NonEmpty (nonEmpty)
import Data.Maybe (catMaybes,fromMaybe,mapMaybe,maybeToList)
import Data.Text (Text)
@@ -598,35 +600,81 @@ named s e = qName (elName e) == s
--
addMetadataFromElement :: PandocMonad m => Element -> DB m Blocks
-addMetadataFromElement e = do
- case filterChild (named "title") e of
- Nothing -> return ()
- Just z -> do
- getInlines z >>= addMeta "title"
- addMetaField "subtitle" z
- case filterChild (named "authorgroup") e of
- Nothing -> return ()
- Just z -> addMetaField "author" z
- addMetaField "subtitle" e
- addAuthor e
- addMetaField "date" e
- addMetaField "release" e
- addMetaField "releaseinfo" e
- addMetaField "abstract" e
- return mempty
- where
- addAuthor elt =
- case filterChildren (named "author") elt of
- [] -> return ()
- [z] -> fromAuthor z >>= addMeta "author"
- zs -> mapM fromAuthor zs >>= addMeta "author"
- fromAuthor elt =
- mconcat . intersperse space <$> mapM getInlines (elChildren elt)
- addMetaField fieldname elt =
- case filterChildren (named fieldname) elt of
- [] -> return ()
- [z] -> getInlines z >>= addMeta fieldname
- zs -> mapM getInlines zs >>= addMeta fieldname
+addMetadataFromElement e =
+ mempty <$ mapM_ handleMetadataElement
+ (filterChildren ((isMetadataField . qName . elName)) e)
+ where
+ handleMetadataElement elt =
+ case qName (elName elt) of
+ "title" -> addContentsToMetadata "title" elt
+ "subtitle" -> addContentsToMetadata "subtitle" elt
+ "abstract" -> addContentsToMetadata "abstract" elt
+ "date" -> addContentsToMetadata "date" elt
+ "release" -> addContentsToMetadata "release" elt
+ "releaseinfo" -> addContentsToMetadata "releaseinfo" elt
+ "author" -> fromAuthor elt >>= addMeta "author"
+ "authorgroup" ->
+ mapM fromAuthor (filterChildren (named "author") elt) >>= addMeta "author"
+ _ -> report . IgnoredElement . qName . elName $ elt
+
+ fromAuthor elt =
+ mconcat . intersperse space . filter (not . null)
+ <$> mapM getInlines (elChildren elt)
+
+ addContentsToMetadata fieldname elt =
+ if any ((`Set.member` blockTags) . qName . elName) (elChildren elt)
+ then getBlocks elt >>= addMeta fieldname
+ else getInlines elt >>= addMeta fieldname
+
+ isMetadataField "abstract" = True
+ isMetadataField "address" = True
+ isMetadataField "annotation" = True
+ isMetadataField "artpagenums" = True
+ isMetadataField "author" = True
+ isMetadataField "authorgroup" = True
+ isMetadataField "authorinitials" = True
+ isMetadataField "bibliocoverage" = True
+ isMetadataField "biblioid" = True
+ isMetadataField "bibliomisc" = True
+ isMetadataField "bibliomset" = True
+ isMetadataField "bibliorelation" = True
+ isMetadataField "biblioset" = True
+ isMetadataField "bibliosource" = True
+ isMetadataField "collab" = True
+ isMetadataField "confgroup" = True
+ isMetadataField "contractnum" = True
+ isMetadataField "contractsponsor" = True
+ isMetadataField "copyright" = True
+ isMetadataField "cover" = True
+ isMetadataField "date" = True
+ isMetadataField "edition" = True
+ isMetadataField "editor" = True
+ isMetadataField "extendedlink" = True
+ isMetadataField "issuenum" = True
+ isMetadataField "itermset" = True
+ isMetadataField "keywordset" = True
+ isMetadataField "legalnotice" = True
+ isMetadataField "mediaobject" = True
+ isMetadataField "org" = True
+ isMetadataField "orgname" = True
+ isMetadataField "othercredit" = True
+ isMetadataField "pagenums" = True
+ isMetadataField "printhistory" = True
+ isMetadataField "productname" = True
+ isMetadataField "productnumber" = True
+ isMetadataField "pubdate" = True
+ isMetadataField "publisher" = True
+ isMetadataField "publishername" = True
+ isMetadataField "releaseinfo" = True
+ isMetadataField "revhistory" = True
+ isMetadataField "seriesvolnums" = True
+ isMetadataField "subjectset" = True
+ isMetadataField "subtitle" = True
+ isMetadataField "title" = True
+ isMetadataField "titleabbrev" = True
+ isMetadataField "volumenum" = True
+ isMetadataField _ = False
+
addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> DB m ()
addMeta field val = modify (setMeta field val)
@@ -636,11 +684,11 @@ instance HasMeta DBState where
deleteMeta field s = s {dbMeta = deleteMeta field (dbMeta s)}
isBlockElement :: Content -> Bool
-isBlockElement (Elem e) = qName (elName e) `elem` blockTags
+isBlockElement (Elem e) = qName (elName e) `Set.member` blockTags
isBlockElement _ = False
-blockTags :: [Text]
-blockTags =
+blockTags :: Set.Set Text
+blockTags = Set.fromList $
[ "abstract"
, "ackno"
, "answer"
@@ -902,7 +950,7 @@ parseBlock (Elem e) =
"?xml" -> return mempty
"title" -> return mempty -- handled in parent element
"subtitle" -> return mempty -- handled in parent element
- _ -> skip >> getBlocks e
+ _ -> skip >> getBlocks e
where skip = do
let qn = qName $ elName e
let name = if "pi-" `T.isPrefixOf` qn
diff --git a/test/docbook-reader.native b/test/docbook-reader.native
index cad1d17a7..87ccdfac0 100644
--- a/test/docbook-reader.native
+++ b/test/docbook-reader.native
@@ -4,8 +4,7 @@ Pandoc
fromList
[ ( "author"
, MetaList
- [ MetaInlines
- [ Str "John" , SoftBreak , Str "MacFarlane" ]
+ [ MetaInlines [ Str "John" , Space , Str "MacFarlane" ]
, MetaInlines [ Str "Anonymous" ]
]
)