diff options
| author | Noah Malmed <nmalmed@scholasticahq.com> | 2023-06-08 11:58:28 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-06-08 09:58:28 -0700 |
| commit | 76952d0b06ad8fec12b898372caa2ed36ba93700 (patch) | |
| tree | 9614946022c3ca79125a618d92811c2ec6dd5925 /src | |
| parent | 7b4eb40737e5861149124225cb31a8f39c0286b1 (diff) | |
Add footer and multiple body parsing to JATS table reader (#8795)
Closes #8765.
Diffstat (limited to 'src')
| -rw-r--r-- | src/Text/Pandoc/Readers/JATS.hs | 40 |
1 files changed, 24 insertions, 16 deletions
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index 10f5ce809..90887ed04 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -38,6 +38,7 @@ import Text.TeXMath (readMathML, writeTeX) import qualified Data.Set as S (fromList, member) import Data.Set ((\\)) import Text.Pandoc.Sources (ToSources(..), sourcesToText) +import Safe (headMay) type JATS m = StateT JATSState m @@ -300,14 +301,19 @@ parseBlock (Elem e) = do _ -> filterChildren isColspec e' let isRow x = named "row" x || named "tr" x - -- list of header cell elements - let headRowElements = case filterChild (named "thead") e' of - Just h -> maybe [] parseElement (filterChild isRow h) - Nothing -> [] - -- list of list of body cell elements - let bodyRowElements = case filterChild (named "tbody") e' of - Just b -> map parseElement $ filterChildren isRow b - Nothing -> map parseElement $ filterChildren isRow e' + let parseRows elementWithRows = + map parseElement $ filterChildren isRow elementWithRows + + -- list of list of body cell elements + let multipleBodyRowElements = + map parseRows $ filterChildren (named "tbody") e' + + -- list of list header cell elements + let headRowElements = maybe [] parseRows (filterChild (named "thead") e') + + -- list of foot cell elements + let footRowElements = maybe [] parseRows (filterChild (named "tfoot") e') + let toAlignment c = case findAttr (unqual "align") c of Just "left" -> AlignLeft Just "right" -> AlignRight @@ -321,7 +327,8 @@ parseBlock (Elem e) = do w <- findAttr (unqual "colwidth") c n <- safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w if n > 0 then Just n else Nothing - let numrows = foldl' max 0 $ map length bodyRowElements + let firstBody = fromMaybe [] (headMay multipleBodyRowElements) + let numrows = foldl' max 0 $ map length firstBody let aligns = case colspecs of [] -> replicate numrows AlignDefault cs -> map toAlignment cs @@ -332,7 +339,6 @@ parseBlock (Elem e) = do Just ws' -> let tot = sum ws' in ColWidth . (/ tot) <$> ws' Nothing -> replicate numrows ColWidthDefault - let parseCell = parseMixed plain . elContent let elementToCell element = cell (toAlignment element) @@ -341,15 +347,17 @@ parseBlock (Elem e) = do <$> (parseCell element) let rowElementsToCells elements = mapM elementToCell elements let toRow = fmap (Row nullAttr) . rowElementsToCells - toHeaderRow element = sequence $ [toRow element | not (null element)] + toRows elements = mapM toRow elements + + headerRows <- toRows headRowElements + footerRows <- toRows footRowElements + bodyRows <- mapM toRows multipleBodyRowElements - headerRow <- toHeaderRow headRowElements - bodyRows <- mapM toRow bodyRowElements return $ table (simpleCaption $ plain capt) (zip aligns widths) - (TableHead nullAttr headerRow) - [TableBody nullAttr 0 [] bodyRows] - (TableFoot nullAttr []) + (TableHead nullAttr headerRows) + (map (TableBody nullAttr 0 []) bodyRows) + (TableFoot nullAttr footerRows) isEntry x = named "entry" x || named "td" x || named "th" x parseElement = filterChildren isEntry wrapWithHeader n mBlocks = do |
