summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2022-03-28 18:37:04 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2022-03-28 18:37:33 -0700
commit51c8b059e1710cc7301b4daa379bb3b66847d00e (patch)
tree79df0c2bfe5a5cb98103eb0065e6af0732440d91 /src
parent6217fd09768909a0a324e2ab3ecf4479a88e3dcd (diff)
JATS reader: improve refs parsing.
Handle issn and isbn; use simpler form for issued date.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/JATS.hs132
1 files changed, 62 insertions, 70 deletions
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index 516bf2f14..98cf5bdf5 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
@@ -21,7 +22,7 @@ import Data.Default
import Data.Generics
import Data.List (foldl', intersperse)
import qualified Data.Map as Map
-import Data.Maybe (maybeToList, fromMaybe, mapMaybe)
+import Data.Maybe (maybeToList, fromMaybe, catMaybes)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
@@ -400,75 +401,66 @@ parseRefList e = do
parseRef :: PandocMonad m
=> Element -> JATS m (Map.Map Text MetaValue)
parseRef e = do
- let refId = text $ attrValue "id" e
- let getInlineText n = maybe (return mempty) getInlines . filterChild (named n)
- case filterChild (named "element-citation") e of
- Just c -> do
- let refType = text $
- case attrValue "publication-type" c of
- "journal" -> "article-journal"
- x -> x
- (refTitle, refContainerTitle) <- do
- t <- getInlineText "article-title" c
- ct <- getInlineText "source" c
- if t == mempty
- then return (ct, mempty)
- else return (t, ct)
- refLabel <- getInlineText "label" c
- refYear <- getInlineText "year" c
- refVolume <- getInlineText "volume" c
- refIssue <- getInlineText "issue" c
- refFirstPage <- getInlineText "fpage" c
- refLastPage <- getInlineText "lpage" c
- refPublisher <- getInlineText "publisher-name" c
- refPublisherPlace <- getInlineText "publisher-loc" c
- let refPages = refFirstPage <> (if refLastPage == mempty
- then mempty
- else text "\x2013" <> refLastPage)
- let personGroups' = filterChildren (named "person-group") c
- let getName nm = do
- given <- maybe (return mempty) getInlines
- $ filterChild (named "given-names") nm
- family <- maybe (return mempty) getInlines
- $ filterChild (named "surname") nm
- return $ toMetaValue $ Map.fromList [
- ("given" :: Text, given)
- , ("family", family)
- ]
- let extractObjectId e' =
- let v = toMetaValue (strContent e')
- in case attrValue "pub-id-type" e' of
- "doi" -> Just ("DOI", v)
- "pmid" -> Just ("PMID", v)
- _ -> Nothing
- let objectIds = mapMaybe extractObjectId $
- (filterChildren (\e' -> named "object-id" e' ||
- named "pub-id" e') c)
- personGroups <- mapM (\pg ->
- do names <- mapM getName
- (filterChildren (named "name") pg)
- return (attrValue "person-group-type" pg,
- toMetaValue names))
- personGroups'
- return $ Map.fromList $
- [ ("id" :: Text, toMetaValue refId)
- , ("type", toMetaValue refType)
- , ("title", toMetaValue refTitle)
- , ("container-title", toMetaValue refContainerTitle)
- , ("publisher", toMetaValue refPublisher)
- , ("publisher-place", toMetaValue refPublisherPlace)
- , ("title", toMetaValue refTitle)
- , ("issued", toMetaValue
- $ Map.fromList [
- ("year" :: Text, refYear)
- ])
- , ("volume", toMetaValue refVolume)
- , ("issue", toMetaValue refIssue)
- , ("page", toMetaValue refPages)
- , ("citation-label", toMetaValue refLabel)
- ] ++ objectIds ++ personGroups
- Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty
- -- TODO handle mixed-citation
+ let combineWithDash x y = x <> text "-" <> y
+ let getName nm = do
+ given <- maybe (return mempty) getInlines
+ $ filterChild (named "given-names") nm
+ family <- maybe (return mempty) getInlines
+ $ filterChild (named "surname") nm
+ return $ toMetaValue $ Map.fromList [
+ ("given" :: Text, given)
+ , ("family", family)
+ ]
+ let refElement :: PandocMonad m
+ => Element -> Element -> JATS m (Maybe (Text, MetaValue))
+ refElement c el =
+ case qName (elName el) of
+ "article-title" -> Just . ("title",) . toMetaValue <$> getInlines el
+ "source" ->
+ case filterChild (named "article-title") c of
+ Just _ -> Just . ("container-title",) . toMetaValue <$> getInlines el
+ Nothing -> Just . ("title",) . toMetaValue <$> getInlines el
+ "label" -> Just . ("citation-label",) . toMetaValue <$> getInlines el
+ "year" -> case filterChild (named "month") c of
+ Just m -> Just . ("issued",) . toMetaValue <$>
+ (combineWithDash
+ <$> getInlines el <*> getInlines m)
+ Nothing -> Just . ("issued",) . toMetaValue <$> getInlines el
+ "volume" -> Just . ("volume",) . toMetaValue <$> getInlines el
+ "issue" -> Just . ("issue",) . toMetaValue <$> getInlines el
+ "isbn" -> Just . ("ISBN",) . toMetaValue <$> getInlines el
+ "issn" -> Just . ("ISSN",) . toMetaValue <$> getInlines el
+ "fpage" ->
+ case filterChild (named "lpage") c of
+ Just lp -> Just . ("page",) . toMetaValue <$>
+ (combineWithDash <$> getInlines el <*> getInlines lp)
+ Nothing -> Just . ("page-first",) . toMetaValue <$> getInlines el
+ "publisher-name" -> Just . ("publisher",) . toMetaValue <$> getInlines el
+ "publisher-loc" -> Just . ("publisher-place",) . toMetaValue
+ <$> getInlines el
+ "person-group" -> do names <- mapM getName
+ (filterChildren (named "name") el)
+ pure $ Just (attrValue "person-group-type" el,
+ toMetaValue names)
+ "pub-id" -> case attrValue "pub-id-type" el of
+ "doi" -> pure $ Just ("DOI", toMetaValue $ strContent el)
+ "pmid" -> pure $ Just ("PMID", toMetaValue $ strContent el)
+ _ -> pure Nothing
+ "object-id" -> case attrValue "pub-id-type" el of
+ "doi" -> pure $ Just ("DOI", toMetaValue $ strContent el)
+ "pmid" -> pure $ Just ("PMID", toMetaValue $ strContent el)
+ _ -> pure Nothing
+
+
+ _ -> pure Nothing
+ refVariables <-
+ case filterChild (named "element-citation") e of
+ Just c -> (("type", toMetaValue $ case attrValue "publication-type" c of
+ "journal" -> "article-journal"
+ x -> x) :) .
+ catMaybes <$> mapM (refElement c) (elChildren c)
+ Nothing -> pure [] -- TODO handle mixed-citation
+ return $ Map.fromList (("id", toMetaValue $ attrValue "id" e) : refVariables)
textContent :: Element -> Text
textContent = strContent