summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNoah Malmed <nmalmed@scholasticahq.com>2023-06-08 11:58:28 -0500
committerGitHub <noreply@github.com>2023-06-08 09:58:28 -0700
commit76952d0b06ad8fec12b898372caa2ed36ba93700 (patch)
tree9614946022c3ca79125a618d92811c2ec6dd5925 /src
parent7b4eb40737e5861149124225cb31a8f39c0286b1 (diff)
Add footer and multiple body parsing to JATS table reader (#8795)
Closes #8765.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/JATS.hs40
1 files changed, 24 insertions, 16 deletions
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index 10f5ce809..90887ed04 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -38,6 +38,7 @@ import Text.TeXMath (readMathML, writeTeX)
import qualified Data.Set as S (fromList, member)
import Data.Set ((\\))
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+import Safe (headMay)
type JATS m = StateT JATSState m
@@ -300,14 +301,19 @@ parseBlock (Elem e) = do
_ -> filterChildren isColspec e'
let isRow x = named "row" x || named "tr" x
- -- list of header cell elements
- let headRowElements = case filterChild (named "thead") e' of
- Just h -> maybe [] parseElement (filterChild isRow h)
- Nothing -> []
- -- list of list of body cell elements
- let bodyRowElements = case filterChild (named "tbody") e' of
- Just b -> map parseElement $ filterChildren isRow b
- Nothing -> map parseElement $ filterChildren isRow e'
+ let parseRows elementWithRows =
+ map parseElement $ filterChildren isRow elementWithRows
+
+ -- list of list of body cell elements
+ let multipleBodyRowElements =
+ map parseRows $ filterChildren (named "tbody") e'
+
+ -- list of list header cell elements
+ let headRowElements = maybe [] parseRows (filterChild (named "thead") e')
+
+ -- list of foot cell elements
+ let footRowElements = maybe [] parseRows (filterChild (named "tfoot") e')
+
let toAlignment c = case findAttr (unqual "align") c of
Just "left" -> AlignLeft
Just "right" -> AlignRight
@@ -321,7 +327,8 @@ parseBlock (Elem e) = do
w <- findAttr (unqual "colwidth") c
n <- safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w
if n > 0 then Just n else Nothing
- let numrows = foldl' max 0 $ map length bodyRowElements
+ let firstBody = fromMaybe [] (headMay multipleBodyRowElements)
+ let numrows = foldl' max 0 $ map length firstBody
let aligns = case colspecs of
[] -> replicate numrows AlignDefault
cs -> map toAlignment cs
@@ -332,7 +339,6 @@ parseBlock (Elem e) = do
Just ws' -> let tot = sum ws'
in ColWidth . (/ tot) <$> ws'
Nothing -> replicate numrows ColWidthDefault
-
let parseCell = parseMixed plain . elContent
let elementToCell element = cell
(toAlignment element)
@@ -341,15 +347,17 @@ parseBlock (Elem e) = do
<$> (parseCell element)
let rowElementsToCells elements = mapM elementToCell elements
let toRow = fmap (Row nullAttr) . rowElementsToCells
- toHeaderRow element = sequence $ [toRow element | not (null element)]
+ toRows elements = mapM toRow elements
+
+ headerRows <- toRows headRowElements
+ footerRows <- toRows footRowElements
+ bodyRows <- mapM toRows multipleBodyRowElements
- headerRow <- toHeaderRow headRowElements
- bodyRows <- mapM toRow bodyRowElements
return $ table (simpleCaption $ plain capt)
(zip aligns widths)
- (TableHead nullAttr headerRow)
- [TableBody nullAttr 0 [] bodyRows]
- (TableFoot nullAttr [])
+ (TableHead nullAttr headerRows)
+ (map (TableBody nullAttr 0 []) bodyRows)
+ (TableFoot nullAttr footerRows)
isEntry x = named "entry" x || named "td" x || named "th" x
parseElement = filterChildren isEntry
wrapWithHeader n mBlocks = do