summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/JATS.hs103
1 files changed, 65 insertions, 38 deletions
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index 538fb8264..5056eb3ff 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -15,7 +15,7 @@ Conversion of JATS XML to 'Pandoc' document.
module Text.Pandoc.Readers.JATS ( readJATS ) where
import Control.Monad.State.Strict ( StateT(runStateT), gets, modify )
-import Control.Monad (forM_, when, unless, MonadPlus(mplus))
+import Control.Monad (forM_, when, unless)
import Control.Monad.Except (throwError)
import Text.Pandoc.Error (PandocError(..))
import Data.Char (isDigit, isSpace)
@@ -105,31 +105,30 @@ instance HasMeta JATSState where
deleteMeta field s = s {jatsMeta = deleteMeta field (jatsMeta s)}
isBlockElement :: Content -> Bool
-isBlockElement (Elem e) = qName (elName e) `S.member` blocktags
- where blocktags = S.fromList (paragraphLevel ++ lists ++ mathML ++ other) \\ S.fromList inlinetags
- paragraphLevel = ["address", "array", "boxed-text", "chem-struct-wrap",
- "code", "fig", "fig-group", "graphic", "media", "preformat",
+isBlockElement (Elem e) = case qName (elName e) of
+ "disp-formula" -> if onlyOneChild e
+ then if hasFormulaChild e
+ then False
+ else case filterChild (named "alternatives") e of
+ Just a -> if hasFormulaChild a then False else True
+ Nothing -> True
+ else True
+ "alternatives" -> if hasFormulaChild e then False else True
+ _ -> qName (elName e) `S.member` blocktags
+
+ where blocktags = S.fromList (paragraphLevel ++ lists ++ formulae ++ other) \\ S.fromList canBeInline
+ paragraphLevel = ["address", "answer", "answer-set", "array", "boxed-text", "chem-struct-wrap",
+ "code", "explanation", "fig", "fig-group", "graphic", "media", "preformat", "question", "question-wrap", "question-wrap-group",
"supplementary-material", "table-wrap", "table-wrap-group",
"alternatives", "disp-formula", "disp-formula-group"]
lists = ["def-list", "list"]
- mathML = ["tex-math", "mml:math"]
+ formulae = ["tex-math", "mml:math"]
other = ["p", "related-article", "related-object", "ack", "disp-quote",
"speech", "statement", "verse-group", "x"]
- inlinetags = ["email", "ext-link", "uri", "inline-supplementary-material",
- "related-article", "related-object", "hr", "bold", "fixed-case",
- "italic", "monospace", "overline", "overline-start", "overline-end",
- "roman", "sans-serif", "sc", "strike", "underline", "underline-start",
- "underline-end", "ruby", "alternatives", "inline-graphic", "private-char",
- "chem-struct", "inline-formula", "tex-math", "mml:math", "abbrev",
- "milestone-end", "milestone-start", "named-content", "styled-content",
- "fn", "target", "xref", "sub", "sup", "x", "address", "array",
- "boxed-text", "chem-struct-wrap", "code", "fig", "fig-group", "graphic",
- "media", "preformat", "supplementary-material", "table-wrap",
- "table-wrap-group", "disp-formula", "disp-formula-group",
- "citation-alternatives", "element-citation", "mixed-citation",
- "nlm-citation", "award-id", "funding-source", "open-access",
- "def-list", "list", "ack", "disp-quote", "speech", "statement",
- "verse-group"]
+ canBeInline = ["tex-math", "mml:math", "related-object", "x"]
+ onlyOneChild x = length (allChildren x) == 1
+ allChildren x = filterChildren (const True) x
+
isBlockElement _ = False
-- Trim leading and trailing newline characters
@@ -217,6 +216,13 @@ parseBlock (Elem e) = do
else divWith (attrValue "id" e, ["caption"], []) <$> wrapWithHeader 6 (getBlocks e)
"fn-group" -> parseFootnoteGroup
"ref-list" -> parseRefList e
+ "alternatives" -> if hasFormulaChild e
+ then blockFormula displayMath e
+ else getBlocks e
+ "disp-formula" -> if hasFormulaChild e
+ then blockFormula displayMath e
+ else divWith (attrValue "id" e, ["disp-formula"], [])
+ <$> getBlocks e
"?xml" -> return mempty
_ -> getBlocks e
where parseMixed container conts = do
@@ -363,9 +369,7 @@ parseBlock (Elem e) = do
wrapWithHeader n mBlocks = do
isBook <- gets jatsBook
let n' = if isBook || n == 0 then n + 1 else n
- headerText <- case filterChild (named "title") e `mplus`
- (filterChild (named "info") e >>=
- filterChild (named "title")) of
+ headerText <- case filterChild (named "title") e of
Just t -> getInlines t
Nothing -> return mempty
oldN <- gets jatsSectionLevel
@@ -602,8 +606,11 @@ parseInline (Elem e) =
let attr = (attrValue "id" e, [], [])
return $ linkWith attr href title ils'
- "disp-formula" -> formula displayMath
- "inline-formula" -> formula math
+ "alternatives" -> if hasFormulaChild e
+ then inlineFormula math e
+ else innerInlines id
+ "disp-formula" -> inlineFormula displayMath e
+ "inline-formula" -> inlineFormula math e
"math" | qURI (elName e) == Just "http://www.w3.org/1998/Math/MathML"
-> return . math $ mathML e
"tex-math" -> return . math $ textContent e
@@ -616,11 +623,14 @@ parseInline (Elem e) =
_ -> innerInlines id
where innerInlines f = extractSpaces f . mconcat <$>
mapM parseInline (elContent e)
- mathML x =
- case readMathML . showElement $ everywhere (mkT removePrefix) x of
- Left _ -> mempty
- Right m -> writeTeX m
- formula constructor = do
+ codeWithLang = do
+ let classes' = case attrValue "language" e of
+ "" -> []
+ l -> [l]
+ return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
+
+inlineFormula :: PandocMonad m => (Text->Inlines) -> Element -> JATS m Inlines
+inlineFormula constructor e = do
let whereToLook = fromMaybe e $ filterElement (named "alternatives") e
texMaths = map textContent $
filterChildren (named "tex-math") whereToLook
@@ -628,12 +638,29 @@ parseInline (Elem e) =
filterChildren isMathML whereToLook
return . mconcat . take 1 . map constructor $ texMaths ++ mathMLs
- isMathML x = qName (elName x) == "math" &&
+blockFormula :: PandocMonad m => (Text->Inlines) -> Element -> JATS m Blocks
+blockFormula constructor e = do
+ let whereToLook = fromMaybe e $ filterElement (named "alternatives") e
+ texMaths = map textContent $
+ filterChildren (named "tex-math") whereToLook
+ mathMLs = map mathML $
+ filterChildren isMathML whereToLook
+ return . para . head . take 1 . map constructor $ texMaths ++ mathMLs
+
+mathML :: Element -> Text
+mathML x =
+ case readMathML . showElement $ everywhere (mkT removePrefix) x of
+ Left _ -> mempty
+ Right m -> writeTeX m
+ where removePrefix elname = elname { qPrefix = Nothing }
+
+isMathML :: Element -> Bool
+isMathML x = qName (elName x) == "math" &&
qURI (elName x) ==
Just "http://www.w3.org/1998/Math/MathML"
- removePrefix elname = elname { qPrefix = Nothing }
- codeWithLang = do
- let classes' = case attrValue "language" e of
- "" -> []
- l -> [l]
- return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
+
+formulaChildren :: Element -> [Element]
+formulaChildren x = filterChildren isMathML x ++ filterChildren (named "tex-math") x
+
+hasFormulaChild :: Element -> Bool
+hasFormulaChild x = length (formulaChildren x) > 0 \ No newline at end of file