summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlbert Krewinkel <albert@zeitkraut.de>2022-02-13 02:39:02 +0100
committerGitHub <noreply@github.com>2022-02-12 17:39:02 -0800
commite1b7f3a63d521433d8a83e4e1f4f7aa1935d0bdd (patch)
treea9f6932ba79035d9daf57073e5f24ed2165b7611
parent4b1cddd292a1d0f9f8b69392488fd5f79c8d46ad (diff)
JATS reader: improve handling of fn-group elements (#7914)
Footnotes in `<fn-group>` elements are collected and re-inserted into the document as proper footnotes in the place where they are referenced. Fixes: #6348
-rw-r--r--src/Text/Pandoc/Readers/JATS.hs25
-rw-r--r--test/command/6348.md42
2 files changed, 65 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index 37e0d13bc..03ed4a659 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -30,8 +30,9 @@ import Text.Pandoc.Builder
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Options
import Text.Pandoc.Shared (safeRead, extractSpaces)
-import Text.TeXMath (readMathML, writeTeX)
+import Text.Pandoc.Walk (walk)
import Text.Pandoc.XML.Light
+import Text.TeXMath (readMathML, writeTeX)
import qualified Data.Set as S (fromList, member)
import Data.Set ((\\))
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
@@ -43,6 +44,7 @@ data JATSState = JATSState{ jatsSectionLevel :: Int
, jatsQuoteType :: QuoteType
, jatsMeta :: Meta
, jatsBook :: Bool
+ , jatsFootnotes :: Map.Map Text Blocks
, jatsContent :: [Content]
} deriving Show
@@ -51,6 +53,7 @@ instance Default JATSState where
, jatsQuoteType = DoubleQuote
, jatsMeta = mempty
, jatsBook = False
+ , jatsFootnotes = mempty
, jatsContent = [] }
@@ -63,7 +66,17 @@ readJATS _ inp = do
tree <- either (throwError . PandocXMLError "") return $
parseXMLContents (TL.fromStrict . sourcesToText $ sources)
(bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
- return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
+ let footnotes = jatsFootnotes st'
+ let blockList = toList $ mconcat bs
+ let linkToFootnotes :: Inline -> Inline
+ linkToFootnotes link'@(Link _attr _txt (href, _title)) =
+ case T.uncons href of
+ Just ('#', rid) -> case Map.lookup rid footnotes of
+ Just footnote -> Note (toList footnote)
+ Nothing -> link'
+ _ -> link'
+ linkToFootnotes inline = inline
+ return $ Pandoc (jatsMeta st') (walk linkToFootnotes blockList)
-- convenience function to get an attribute value, defaulting to ""
attrValue :: Text -> Element -> Text
@@ -175,6 +188,7 @@ parseBlock (Elem e) =
"table-wrap" -> divWith (attrValue "id" e, ["table-wrap"], [])
<$> getBlocks e
"caption" -> divWith (attrValue "id" e, ["caption"], []) <$> sect 6
+ "fn-group" -> parseFootnoteGroup
"ref-list" -> parseRefList e
"?xml" -> return mempty
_ -> getBlocks e
@@ -239,6 +253,13 @@ parseBlock (Elem e) =
(attrValue "title" g)
_ -> divWith (attrValue "id" e, ["fig"], []) <$> getBlocks e
+ parseFootnoteGroup = do
+ forM_ (filterChildren (named "fn") e) $ \fn -> do
+ let id' = attrValue "id" fn
+ contents <- getBlocks fn
+ modify $ \st ->
+ st { jatsFootnotes = Map.insert id' contents (jatsFootnotes st) }
+ return mempty
parseTable = do
let isCaption x = named "title" x || named "caption" x
diff --git a/test/command/6348.md b/test/command/6348.md
new file mode 100644
index 000000000..7f599e52e
--- /dev/null
+++ b/test/command/6348.md
@@ -0,0 +1,42 @@
+```
+% pandoc -f jats -t native
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.0 20120330//EN" "http://jats.nlm.nih.gov/publishing/1.0/JATS-journalpublishing1.dtd">
+<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.0" article-type="research-article">
+ <front>
+ <journal-meta>
+ <journal-title-group>
+ <journal-title>Opinion Research</journal-title>
+ </journal-title-group>
+ </journal-meta>
+ <article-meta>
+ <title-group>
+ <article-title>Example article</article-title>
+ </title-group>
+ </article-meta>
+ </front>
+ <body>
+ <sec sec-type="results">
+ <title>Results</title>
+ <p>A University <xref ref-type="fn" rid="N0001">1</xref></p>
+ </sec>
+ </body>
+ <back>
+ <fn-group>
+ <fn id="N0001">
+ <p>footnote</p>
+ </fn>
+ </fn-group>
+ </back>
+</article>
+^D
+[ Header 1 ( "" , [] , [] ) [ Str "Results" ]
+, Para
+ [ Str "A"
+ , Space
+ , Str "University"
+ , Space
+ , Note [ Para [ Str "footnote" ] ]
+ ]
+]
+```