diff options
| author | John MacFarlane <jgm@berkeley.edu> | 2023-11-15 14:40:00 -0800 |
|---|---|---|
| committer | John MacFarlane <jgm@berkeley.edu> | 2023-11-15 14:40:00 -0800 |
| commit | 13e1b49224ccd5a70af8f3b99b9a5ed6b9dfc48c (patch) | |
| tree | 35d175403c4ebc99da253ddc8a92b025d3106dd3 /src/Text | |
| parent | cd48bf40597e99bec6419a9a0b49fef46330d56c (diff) | |
HTML reader: Fix handling of invalidly nested sublists.
This revises the fix to #8150 (and the test case) and closes #9187.
HTML in the (invalid) form:
<ul>
<li>L1</li>
<ul>
<li>L1.1</li>
</ul>
</ul>
is treated by browsers like
<ul>
<li>L1
<ul>
<li>L1.1</li>
</ul>
</li>
</ul>
not
<ul>
<li>L1
<li><ul>
<li>L1.1</li>
</ul>
</li>
</ul>
as pandoc previously assumed.
This change will give a similar treatment to
<ul>
<li>L1</li>
<p>foobar</p>
</ul>
which also seems to match browser behavior.
Diffstat (limited to 'src/Text')
| -rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 22 |
1 files changed, 10 insertions, 12 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 383892c9b..42e12c419 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -334,19 +334,23 @@ pBulletList = try $ do -- note: if they have an <ol> or <ul> not in scope of a <li>, -- treat it as a list item, though it's not valid xhtml... skipMany nonItem - items <- manyTill (pListItem' nonItem) (pCloses "ul") + items <- manyTill (pListItem nonItem) (pCloses "ul") return $ B.bulletList $ map (fixPlains True) items -pListItem :: PandocMonad m => TagParser m Blocks -pListItem = setInListItem $ do +pListItem :: PandocMonad m => TagParser m a -> TagParser m Blocks +pListItem nonItem = setInListItem $ do TagOpen _ attr' <- lookAhead $ pSatisfy (matchTagOpen "li" []) let attr = toStringAttr attr' let addId ident bs = case B.toList bs of (Plain ils:xs) -> B.fromList (Plain [Span (ident, [], []) ils] : xs) _ -> B.divWith (ident, [], []) bs - maybe id addId (lookup "id" attr) <$> - pInTags "li" block + item <- pInTags "li" block + skipMany nonItem + orphans <- many (do notFollowedBy (pSatisfy (matchTagOpen "li" [])) + notFollowedBy (pSatisfy isTagClose) + block) -- e.g. <ul>, see #9187 + return $ maybe id addId (lookup "id" attr) $ item <> mconcat orphans pCheckbox :: PandocMonad m => TagParser m Inlines pCheckbox = do @@ -358,12 +362,6 @@ pCheckbox = do return $ escapeSequence <> B.space --- | Parses a list item just like 'pListItem', but allows sublists outside of --- @li@ tags to be treated as items. -pListItem' :: PandocMonad m => TagParser m a -> TagParser m Blocks -pListItem' nonItem = (pListItem <|> pBulletList <|> pOrderedList) - <* skipMany nonItem - parseListStyleType :: Text -> ListNumberStyle parseListStyleType "lower-roman" = LowerRoman parseListStyleType "upper-roman" = UpperRoman @@ -404,7 +402,7 @@ pOrderedList = try $ do _ <- manyTill (eFootnote <|> pBlank) (pCloses "ol") return mempty else do - items <- manyTill (pListItem' nonItem) (pCloses "ol") + items <- manyTill (pListItem nonItem) (pCloses "ol") return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items |
