diff options
| author | John MacFarlane <jgm@berkeley.edu> | 2022-08-13 12:57:28 -0700 |
|---|---|---|
| committer | John MacFarlane <jgm@berkeley.edu> | 2022-08-13 12:58:40 -0700 |
| commit | 55c524e83cda8ab90b3b202e32b79106b408efb0 (patch) | |
| tree | 5c4a72e08ccf29ed753c01ee62280492266073b3 | |
| parent | 5a99747063bea86ce83b1609773542f799b9a6bb (diff) | |
Support --strip-comments in commonmark/gfm.
This change makes the commonmark reader sensitive to
`readerStripComments`.
Closes #8222.
| -rw-r--r-- | src/Text/Pandoc/Readers/CommonMark.hs | 47 |
1 files changed, 38 insertions, 9 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index ecc6505f8..528d84dbf 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -34,7 +34,10 @@ import Text.Pandoc.Parsing (runParserT, getInput, getPosition, runF, defaultParserState, option, many1, anyChar, Sources(..), ToSources(..), ParserT, Future, sourceName, sourceLine, incSourceLine) +import Text.Pandoc.Walk (walk) import qualified Data.Text as T +import qualified Data.Attoparsec.Text as A +import Control.Applicative ((<|>)) -- | Parse a CommonMark formatted string into a 'Pandoc' structure. readCommonMark :: (PandocMonad m, ToSources a) @@ -86,15 +89,41 @@ metaValueParser opts = do Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc -readCommonMarkBody opts s toks - | isEnabled Ext_sourcepos opts = - case runIdentity (parseCommonmarkWith (specFor opts) toks) of - Left err -> throwError $ PandocParsecError s err - Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls - | otherwise = - case runIdentity (parseCommonmarkWith (specFor opts) toks) of - Left err -> throwError $ PandocParsecError s err - Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls +readCommonMarkBody opts s toks = + (if readerStripComments opts + then walk stripBlockComments . walk stripInlineComments + else id) <$> + if isEnabled Ext_sourcepos opts + then case runIdentity (parseCommonmarkWith (specFor opts) toks) of + Left err -> throwError $ PandocParsecError s err + Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls + else case runIdentity (parseCommonmarkWith (specFor opts) toks) of + Left err -> throwError $ PandocParsecError s err + Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls + +stripBlockComments :: Block -> Block +stripBlockComments (RawBlock (B.Format "html") s) = + RawBlock (B.Format "html") (removeComments s) +stripBlockComments x = x + +stripInlineComments :: Inline -> Inline +stripInlineComments (RawInline (B.Format "html") s) = + RawInline (B.Format "html") (removeComments s) +stripInlineComments x = x + +removeComments :: Text -> Text +removeComments s = + either (const s) id $ A.parseOnly pRemoveComments s + where + pRemoveComments = mconcat <$> A.many' + ("" <$ (A.string "<!--" *> A.scan (0 :: Int) scanChar <* A.char '>') <|> + (A.takeWhile1 (/= '<')) <|> + (A.string "<")) + scanChar st c = + case c of + '-' -> Just (st + 1) + '>' | st >= 2 -> Nothing + _ -> Just 0 specFor :: (Monad m, Typeable m, Typeable a, Rangeable (Cm a Inlines), Rangeable (Cm a Blocks)) |
