diff options
| author | Seth Speaks <sspeaks610@gmail.com> | 2023-09-16 09:21:01 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-09-16 09:21:01 -0700 |
| commit | 0d27947b96f7066edeed8a63c196e045ac8822b5 (patch) | |
| tree | d417142aff52056479e96335dc512f8d351eb846 /src | |
| parent | 138613156a53f73065223651fd77cef60fdb8e82 (diff) | |
HTML reader: parse task lists using input elements (#9066)
Allow the HTML reader to parse task lists of the sort produced by pandoc.
Closes #9047
Diffstat (limited to 'src')
| -rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 19 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/HTML/Types.hs | 3 |
2 files changed, 20 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 5e4c8d315..18441633d 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -129,6 +129,10 @@ setInChapter = local (\s -> s {inChapter = True}) setInPlain :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a setInPlain = local (\s -> s {inPlain = True}) +-- Some items should be handled differently when in a list item tag, e.g. checkbox +setInListItem :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a +setInListItem = local (\s -> s {inListItem = True}) + pHtml :: PandocMonad m => TagParser m Blocks pHtml = do (TagOpen "html" attr) <- lookAhead pAny @@ -334,7 +338,7 @@ pBulletList = try $ do return $ B.bulletList $ map (fixPlains True) items pListItem :: PandocMonad m => TagParser m Blocks -pListItem = do +pListItem = setInListItem $ do TagOpen _ attr' <- lookAhead $ pSatisfy (matchTagOpen "li" []) let attr = toStringAttr attr' let addId ident bs = case B.toList bs of @@ -344,6 +348,16 @@ pListItem = do maybe id addId (lookup "id" attr) <$> pInTags "li" block +pCheckbox :: PandocMonad m => TagParser m Inlines +pCheckbox = do + TagOpen _ attr' <- pSatisfy $ matchTagOpen "input" [("type","checkbox")] + TagClose _ <- pSatisfy (matchTagClose "input") + let attr = toStringAttr attr' + let isChecked = isJust $ lookup "checked" attr + let escapeSequence = B.str $ if isChecked then "\9746" else "\9744" + return $ escapeSequence <> B.space + + -- | Parses a list item just like 'pListItem', but allows sublists outside of -- @li@ tags to be treated as items. pListItem' :: PandocMonad m => TagParser m a -> TagParser m Blocks @@ -673,6 +687,9 @@ inline = pTagText <|> do "var" -> pCodeWithClass "var" "variable" "span" -> pSpan "math" -> pMath False + "input" + | lookup "type" attr == Just "checkbox" + -> asks inListItem >>= guard >> pCheckbox "script" | Just x <- lookup "type" attr , "math/tex" `T.isPrefixOf` x -> pScriptMath diff --git a/src/Text/Pandoc/Readers/HTML/Types.hs b/src/Text/Pandoc/Readers/HTML/Types.hs index f03454c3b..a16773949 100644 --- a/src/Text/Pandoc/Readers/HTML/Types.hs +++ b/src/Text/Pandoc/Readers/HTML/Types.hs @@ -60,6 +60,7 @@ data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext , inChapter :: Bool -- ^ Set if in chapter section , inPlain :: Bool -- ^ Set if in pPlain + , inListItem :: Bool -- ^ Set if in <li> tag } @@ -91,7 +92,7 @@ instance HasMeta HTMLState where deleteMeta s st = st {parserState = deleteMeta s $ parserState st} instance Default HTMLLocal where - def = HTMLLocal NoQuote False False + def = HTMLLocal NoQuote False False False instance HasLastStrPosition HTMLState where setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)} |
