summaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2023-11-15 14:40:00 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2023-11-15 14:40:00 -0800
commit13e1b49224ccd5a70af8f3b99b9a5ed6b9dfc48c (patch)
tree35d175403c4ebc99da253ddc8a92b025d3106dd3 /src/Text
parentcd48bf40597e99bec6419a9a0b49fef46330d56c (diff)
HTML reader: Fix handling of invalidly nested sublists.
This revises the fix to #8150 (and the test case) and closes #9187. HTML in the (invalid) form: <ul> <li>L1</li> <ul> <li>L1.1</li> </ul> </ul> is treated by browsers like <ul> <li>L1 <ul> <li>L1.1</li> </ul> </li> </ul> not <ul> <li>L1 <li><ul> <li>L1.1</li> </ul> </li> </ul> as pandoc previously assumed. This change will give a similar treatment to <ul> <li>L1</li> <p>foobar</p> </ul> which also seems to match browser behavior.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs22
1 files changed, 10 insertions, 12 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 383892c9b..42e12c419 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -334,19 +334,23 @@ pBulletList = try $ do
-- note: if they have an <ol> or <ul> not in scope of a <li>,
-- treat it as a list item, though it's not valid xhtml...
skipMany nonItem
- items <- manyTill (pListItem' nonItem) (pCloses "ul")
+ items <- manyTill (pListItem nonItem) (pCloses "ul")
return $ B.bulletList $ map (fixPlains True) items
-pListItem :: PandocMonad m => TagParser m Blocks
-pListItem = setInListItem $ do
+pListItem :: PandocMonad m => TagParser m a -> TagParser m Blocks
+pListItem nonItem = setInListItem $ do
TagOpen _ attr' <- lookAhead $ pSatisfy (matchTagOpen "li" [])
let attr = toStringAttr attr'
let addId ident bs = case B.toList bs of
(Plain ils:xs) -> B.fromList (Plain
[Span (ident, [], []) ils] : xs)
_ -> B.divWith (ident, [], []) bs
- maybe id addId (lookup "id" attr) <$>
- pInTags "li" block
+ item <- pInTags "li" block
+ skipMany nonItem
+ orphans <- many (do notFollowedBy (pSatisfy (matchTagOpen "li" []))
+ notFollowedBy (pSatisfy isTagClose)
+ block) -- e.g. <ul>, see #9187
+ return $ maybe id addId (lookup "id" attr) $ item <> mconcat orphans
pCheckbox :: PandocMonad m => TagParser m Inlines
pCheckbox = do
@@ -358,12 +362,6 @@ pCheckbox = do
return $ escapeSequence <> B.space
--- | Parses a list item just like 'pListItem', but allows sublists outside of
--- @li@ tags to be treated as items.
-pListItem' :: PandocMonad m => TagParser m a -> TagParser m Blocks
-pListItem' nonItem = (pListItem <|> pBulletList <|> pOrderedList)
- <* skipMany nonItem
-
parseListStyleType :: Text -> ListNumberStyle
parseListStyleType "lower-roman" = LowerRoman
parseListStyleType "upper-roman" = UpperRoman
@@ -404,7 +402,7 @@ pOrderedList = try $ do
_ <- manyTill (eFootnote <|> pBlank) (pCloses "ol")
return mempty
else do
- items <- manyTill (pListItem' nonItem) (pCloses "ol")
+ items <- manyTill (pListItem nonItem) (pCloses "ol")
return $ B.orderedListWith (start, style, DefaultDelim) $
map (fixPlains True) items