diff options
| author | Albert Krewinkel <albert@zeitkraut.de> | 2022-08-03 13:04:45 +0200 |
|---|---|---|
| committer | Albert Krewinkel <albert@zeitkraut.de> | 2022-08-03 15:00:35 +0200 |
| commit | 0d7f80c87ff3948669356c7963118d90533cd519 (patch) | |
| tree | 1fd97e1b61625587d6aa3623fc7fffaf63408d8c /src/Text | |
| parent | b306f2e1fdf0cd340e49e3be91e267f456aefa0e (diff) | |
HTML reader: allow sublists that are not marked as items.
The HTML standard requires all list items to be marked with a `<li>`
tag, but some tools fail to do so for sublists. The reader now accepts
these unwrapped lists as sublists.
Closes: #8150
Diffstat (limited to 'src/Text')
| -rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 16 |
1 files changed, 11 insertions, 5 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index dd0e54c27..711457312 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -327,11 +327,11 @@ pBulletList = try $ do -- note: if they have an <ol> or <ul> not in scope of a <li>, -- treat it as a list item, though it's not valid xhtml... skipMany nonItem - items <- manyTill (pListItem nonItem) (pCloses "ul") + items <- manyTill (pListItem' nonItem) (pCloses "ul") return $ B.bulletList $ map (fixPlains True) items -pListItem :: PandocMonad m => TagParser m a -> TagParser m Blocks -pListItem nonItem = do +pListItem :: PandocMonad m => TagParser m Blocks +pListItem = do TagOpen _ attr' <- lookAhead $ pSatisfy (matchTagOpen "li" []) let attr = toStringAttr attr' let addId ident bs = case B.toList bs of @@ -339,7 +339,13 @@ pListItem nonItem = do [Span (ident, [], []) ils] : xs) _ -> B.divWith (ident, [], []) bs maybe id addId (lookup "id" attr) <$> - pInTags "li" block <* skipMany nonItem + pInTags "li" block + +-- | Parses a list item just like 'pListItem', but allows sublists outside of +-- @li@ tags to be treated as items. +pListItem' :: PandocMonad m => TagParser m a -> TagParser m Blocks +pListItem' nonItem = (pListItem <|> pBulletList <|> pOrderedList) + <* skipMany nonItem parseListStyleType :: Text -> ListNumberStyle parseListStyleType "lower-roman" = LowerRoman @@ -381,7 +387,7 @@ pOrderedList = try $ do _ <- manyTill (eFootnote <|> pBlank) (pCloses "ol") return mempty else do - items <- manyTill (pListItem nonItem) (pCloses "ol") + items <- manyTill (pListItem' nonItem) (pCloses "ol") return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items |
