summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2022-08-13 12:57:28 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2022-08-13 12:58:40 -0700
commit55c524e83cda8ab90b3b202e32b79106b408efb0 (patch)
tree5c4a72e08ccf29ed753c01ee62280492266073b3
parent5a99747063bea86ce83b1609773542f799b9a6bb (diff)
Support --strip-comments in commonmark/gfm.
This change makes the commonmark reader sensitive to `readerStripComments`. Closes #8222.
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs47
1 files changed, 38 insertions, 9 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index ecc6505f8..528d84dbf 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -34,7 +34,10 @@ import Text.Pandoc.Parsing (runParserT, getInput, getPosition,
runF, defaultParserState, option, many1, anyChar,
Sources(..), ToSources(..), ParserT, Future,
sourceName, sourceLine, incSourceLine)
+import Text.Pandoc.Walk (walk)
import qualified Data.Text as T
+import qualified Data.Attoparsec.Text as A
+import Control.Applicative ((<|>))
-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
readCommonMark :: (PandocMonad m, ToSources a)
@@ -86,15 +89,41 @@ metaValueParser opts = do
Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls
readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc
-readCommonMarkBody opts s toks
- | isEnabled Ext_sourcepos opts =
- case runIdentity (parseCommonmarkWith (specFor opts) toks) of
- Left err -> throwError $ PandocParsecError s err
- Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls
- | otherwise =
- case runIdentity (parseCommonmarkWith (specFor opts) toks) of
- Left err -> throwError $ PandocParsecError s err
- Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
+readCommonMarkBody opts s toks =
+ (if readerStripComments opts
+ then walk stripBlockComments . walk stripInlineComments
+ else id) <$>
+ if isEnabled Ext_sourcepos opts
+ then case runIdentity (parseCommonmarkWith (specFor opts) toks) of
+ Left err -> throwError $ PandocParsecError s err
+ Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls
+ else case runIdentity (parseCommonmarkWith (specFor opts) toks) of
+ Left err -> throwError $ PandocParsecError s err
+ Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
+
+stripBlockComments :: Block -> Block
+stripBlockComments (RawBlock (B.Format "html") s) =
+ RawBlock (B.Format "html") (removeComments s)
+stripBlockComments x = x
+
+stripInlineComments :: Inline -> Inline
+stripInlineComments (RawInline (B.Format "html") s) =
+ RawInline (B.Format "html") (removeComments s)
+stripInlineComments x = x
+
+removeComments :: Text -> Text
+removeComments s =
+ either (const s) id $ A.parseOnly pRemoveComments s
+ where
+ pRemoveComments = mconcat <$> A.many'
+ ("" <$ (A.string "<!--" *> A.scan (0 :: Int) scanChar <* A.char '>') <|>
+ (A.takeWhile1 (/= '<')) <|>
+ (A.string "<"))
+ scanChar st c =
+ case c of
+ '-' -> Just (st + 1)
+ '>' | st >= 2 -> Nothing
+ _ -> Just 0
specFor :: (Monad m, Typeable m, Typeable a,
Rangeable (Cm a Inlines), Rangeable (Cm a Blocks))