summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSeth Speaks <sspeaks610@gmail.com>2023-09-16 09:21:01 -0700
committerGitHub <noreply@github.com>2023-09-16 09:21:01 -0700
commit0d27947b96f7066edeed8a63c196e045ac8822b5 (patch)
treed417142aff52056479e96335dc512f8d351eb846 /src
parent138613156a53f73065223651fd77cef60fdb8e82 (diff)
HTML reader: parse task lists using input elements (#9066)
Allow the HTML reader to parse task lists of the sort produced by pandoc. Closes #9047
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs19
-rw-r--r--src/Text/Pandoc/Readers/HTML/Types.hs3
2 files changed, 20 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 5e4c8d315..18441633d 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -129,6 +129,10 @@ setInChapter = local (\s -> s {inChapter = True})
setInPlain :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a
setInPlain = local (\s -> s {inPlain = True})
+-- Some items should be handled differently when in a list item tag, e.g. checkbox
+setInListItem :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a
+setInListItem = local (\s -> s {inListItem = True})
+
pHtml :: PandocMonad m => TagParser m Blocks
pHtml = do
(TagOpen "html" attr) <- lookAhead pAny
@@ -334,7 +338,7 @@ pBulletList = try $ do
return $ B.bulletList $ map (fixPlains True) items
pListItem :: PandocMonad m => TagParser m Blocks
-pListItem = do
+pListItem = setInListItem $ do
TagOpen _ attr' <- lookAhead $ pSatisfy (matchTagOpen "li" [])
let attr = toStringAttr attr'
let addId ident bs = case B.toList bs of
@@ -344,6 +348,16 @@ pListItem = do
maybe id addId (lookup "id" attr) <$>
pInTags "li" block
+pCheckbox :: PandocMonad m => TagParser m Inlines
+pCheckbox = do
+ TagOpen _ attr' <- pSatisfy $ matchTagOpen "input" [("type","checkbox")]
+ TagClose _ <- pSatisfy (matchTagClose "input")
+ let attr = toStringAttr attr'
+ let isChecked = isJust $ lookup "checked" attr
+ let escapeSequence = B.str $ if isChecked then "\9746" else "\9744"
+ return $ escapeSequence <> B.space
+
+
-- | Parses a list item just like 'pListItem', but allows sublists outside of
-- @li@ tags to be treated as items.
pListItem' :: PandocMonad m => TagParser m a -> TagParser m Blocks
@@ -673,6 +687,9 @@ inline = pTagText <|> do
"var" -> pCodeWithClass "var" "variable"
"span" -> pSpan
"math" -> pMath False
+ "input"
+ | lookup "type" attr == Just "checkbox"
+ -> asks inListItem >>= guard >> pCheckbox
"script"
| Just x <- lookup "type" attr
, "math/tex" `T.isPrefixOf` x -> pScriptMath
diff --git a/src/Text/Pandoc/Readers/HTML/Types.hs b/src/Text/Pandoc/Readers/HTML/Types.hs
index f03454c3b..a16773949 100644
--- a/src/Text/Pandoc/Readers/HTML/Types.hs
+++ b/src/Text/Pandoc/Readers/HTML/Types.hs
@@ -60,6 +60,7 @@ data HTMLLocal = HTMLLocal
{ quoteContext :: QuoteContext
, inChapter :: Bool -- ^ Set if in chapter section
, inPlain :: Bool -- ^ Set if in pPlain
+ , inListItem :: Bool -- ^ Set if in <li> tag
}
@@ -91,7 +92,7 @@ instance HasMeta HTMLState where
deleteMeta s st = st {parserState = deleteMeta s $ parserState st}
instance Default HTMLLocal where
- def = HTMLLocal NoQuote False False
+ def = HTMLLocal NoQuote False False False
instance HasLastStrPosition HTMLState where
setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)}