diff options
| author | John MacFarlane <jgm@berkeley.edu> | 2022-02-24 14:47:35 -0800 |
|---|---|---|
| committer | John MacFarlane <jgm@berkeley.edu> | 2022-02-24 14:47:35 -0800 |
| commit | 7dea81f9928a754a5e620e01f36d484734442e45 (patch) | |
| tree | 32738a799358de5fe5fed863a58f9a62326c4c6c | |
| parent | 0ae7b1e1f8b0fafbc36f9b5a84fc977363b555bc (diff) | |
Text.Pandoc.XML.Light: add versions of the parsers...
that allow specifying a custom entity map.
Exports new functions `parseXMLElementWithEntities`,
`parseXMLContentsWithEntities` [API change].
| -rw-r--r-- | pandoc.cabal | 1 | ||||
| -rw-r--r-- | src/Text/Pandoc/XML/Light.hs | 35 |
2 files changed, 31 insertions, 5 deletions
diff --git a/pandoc.cabal b/pandoc.cabal index 0a670c49b..a0190a36f 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -521,6 +521,7 @@ library unicode-transforms >= 0.3 && < 0.5, xml >= 1.3.12 && < 1.4, xml-conduit >= 1.9.1.1 && < 1.10, + xml-types >= 0.3 && < 0.4, yaml >= 0.11 && < 0.12, zip-archive >= 0.2.3.4 && < 0.5, zlib >= 0.5 && < 0.7 diff --git a/src/Text/Pandoc/XML/Light.hs b/src/Text/Pandoc/XML/Light.hs index 8ba71b68f..85095338d 100644 --- a/src/Text/Pandoc/XML/Light.hs +++ b/src/Text/Pandoc/XML/Light.hs @@ -36,6 +36,9 @@ module Text.Pandoc.XML.Light -- * Replacement for xml-light's Text.XML.Input , parseXMLElement , parseXMLContents + -- * Versions that allow passing in a custom entity table + , parseXMLElementWithEntities + , parseXMLContentsWithEntities ) where import qualified Control.Exception as E @@ -48,23 +51,45 @@ import Data.Maybe (mapMaybe) import Text.Pandoc.XML.Light.Types import Text.Pandoc.XML.Light.Proc import Text.Pandoc.XML.Light.Output +import qualified Data.XML.Types as XML -- Drop in replacement for parseXMLDoc in xml-light. parseXMLElement :: TL.Text -> Either T.Text Element -parseXMLElement t = +parseXMLElement = parseXMLElementWithEntities mempty + +-- Drop in replacement for parseXMLDoc in xml-light. +parseXMLElementWithEntities :: M.Map T.Text T.Text + -> TL.Text -> Either T.Text Element +parseXMLElementWithEntities entityMap t = elementToElement . Conduit.documentRoot <$> either (Left . T.pack . E.displayException) Right - (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t) + (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True + , Conduit.psDecodeEntities = decodeEnts } t) + where + decodeEnts ref = case M.lookup ref entityMap of + Nothing -> XML.ContentEntity ref + Just t' -> XML.ContentText t' parseXMLContents :: TL.Text -> Either T.Text [Content] -parseXMLContents t = - case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of +parseXMLContents = parseXMLContentsWithEntities mempty + +parseXMLContentsWithEntities :: M.Map T.Text T.Text + -> TL.Text -> Either T.Text [Content] +parseXMLContentsWithEntities entityMap t = + case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True + , Conduit.psDecodeEntities = decodeEnts + } t of Left e -> case E.fromException e of Just (ContentAfterRoot _) -> - elContent <$> parseXMLElement ("<wrapper>" <> t <> "</wrapper>") + elContent <$> parseXMLElementWithEntities entityMap + ("<wrapper>" <> t <> "</wrapper>") _ -> Left . T.pack . E.displayException $ e Right x -> Right [Elem . elementToElement . Conduit.documentRoot $ x] + where + decodeEnts ref = case M.lookup ref entityMap of + Nothing -> XML.ContentEntity ref + Just t' -> XML.ContentText t' elementToElement :: Conduit.Element -> Element elementToElement (Conduit.Element name attribMap nodes) = |
