summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlbert Krewinkel <albert@zeitkraut.de>2022-10-07 21:16:45 +0200
committerJohn MacFarlane <jgm@berkeley.edu>2022-10-08 16:05:48 -0700
commita4218b9719c77978e1968065a3c2c4f25d3c4137 (patch)
tree2335115e82577031786f766a122ae400ac3dcc5a /src
parente663bb0e1479dac2638a3e4f693e5eeac314e347 (diff)
[API Change] Add new module "Text.Pandoc.Format"
The module provides functions and types for format spec parsing and processing. The function `parseFormatSpec` was moved from Text.Pandoc.Extensions to the new module and renamed to `parseFlavoredFormat`. It now operates in a PandocMonad and is based on the updated types.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/App.hs5
-rw-r--r--src/Text/Pandoc/App/OutputSettings.hs23
-rw-r--r--src/Text/Pandoc/Extensions.hs26
-rw-r--r--src/Text/Pandoc/Format.hs125
-rw-r--r--src/Text/Pandoc/Readers.hs35
-rw-r--r--src/Text/Pandoc/Writers.hs35
6 files changed, 160 insertions, 89 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index b6129422c..0306b3b22 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -58,6 +58,7 @@ import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings)
import Text.Collate.Lang (Lang (..), parseLang)
import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), Environment (..),
applyFilters)
+import qualified Text.Pandoc.Format as Format
import Text.Pandoc.PDF (makePDF)
import Text.Pandoc.Scripting (ScriptingEngine (..))
import Text.Pandoc.SelfContained (makeSelfContained)
@@ -148,8 +149,8 @@ convertWithOpts' scriptingEngine istty datadir opts = do
(map (T.pack . takeExtension) sources) "markdown"
return "markdown"
- let readerNameBase = T.takeWhile (\c -> c /= '+' && c /= '-') readerName
-
+ Format.FlavoredFormat readerNameBase _extsDiff <-
+ Format.parseFlavoredFormat readerName
let makeSandboxed pureReader =
let files = maybe id (:) (optReferenceDoc opts) .
maybe id (:) (optEpubMetadata opts) .
diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs
index e69cc4d47..5851d433c 100644
--- a/src/Text/Pandoc/App/OutputSettings.hs
+++ b/src/Text/Pandoc/App/OutputSettings.hs
@@ -38,6 +38,7 @@ import Text.Pandoc
import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths)
import Text.Pandoc.App.Opt (Opt (..))
import Text.Pandoc.App.CommandLineOptions (engines, setVariable)
+import qualified Text.Pandoc.Format as Format
import Text.Pandoc.Highlighting (lookupHighlightingStyle)
import Text.Pandoc.Scripting (ScriptingEngine (engineWriteCustom))
import qualified Text.Pandoc.UTF8 as UTF8
@@ -88,10 +89,6 @@ optToOutputSettings scriptingEngine opts = do
return ("html", Nothing)
Just f -> return (f, Nothing)
- let format = if ".lua" `T.isSuffixOf` writerName
- then writerName
- else T.toLower $ baseWriterName writerName
-
let makeSandboxed pureWriter =
let files = maybe id (:) (optReferenceDoc opts) .
maybe id (:) (optEpubMetadata opts) .
@@ -105,15 +102,17 @@ optToOutputSettings scriptingEngine opts = do
ByteStringWriter w ->
ByteStringWriter $ \o d -> sandbox files (w o d)
+ Format.FlavoredFormat format _extsDiff <- Format.parseFlavoredFormat writerName
(writer, writerExts) <-
- if ".lua" `T.isSuffixOf` format
- then (,mempty) <$> engineWriteCustom scriptingEngine (T.unpack writerName)
- else if optSandbox opts
- then
- case runPure (getWriter writerName) of
- Left e -> throwError e
- Right (w, wexts) ->return (makeSandboxed w, wexts)
- else getWriter (T.toLower writerName)
+ if "lua" `T.isSuffixOf` format
+ then do
+ (, mempty) <$> engineWriteCustom scriptingEngine (T.unpack format)
+ else do
+ if optSandbox opts
+ then case runPure (getWriter writerName) of
+ Right (w, wexts) -> return (makeSandboxed w, wexts)
+ Left e -> throwError e
+ else getWriter writerName
let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index c8bf1e546..d8df71bd6 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -20,7 +20,6 @@ module Text.Pandoc.Extensions ( Extension(..)
, Extensions
, emptyExtensions
, extensionsFromList
- , parseFormatSpec
, extensionEnabled
, enableExtension
, disableExtension
@@ -35,11 +34,9 @@ module Text.Pandoc.Extensions ( Extension(..)
where
import Data.Bits (clearBit, setBit, testBit, (.|.))
import Data.Data (Data)
-import Data.List (foldl')
import qualified Data.Text as T
import Data.Typeable (Typeable)
import GHC.Generics (Generic)
-import Text.Parsec
import Text.Read (readMaybe)
import Data.Aeson.TH (deriveJSON)
import Data.Aeson
@@ -611,26 +608,3 @@ getAllExtensions f = universalExtensions <> getAll f
extensionsFromList
[ Ext_smart ]
getAll _ = mempty
-
-
--- | Parse a format-specifying string into a markup format,
--- a set of extensions to enable, and a set of extensions to disable.
-parseFormatSpec :: T.Text
- -> Either ParseError (T.Text, [Extension], [Extension])
-parseFormatSpec = parse formatSpec ""
- where formatSpec = do
- name <- formatName
- (extsToEnable, extsToDisable) <- foldl' (flip ($)) ([],[]) <$>
- many extMod
- return (T.pack name, reverse extsToEnable, reverse extsToDisable)
- formatName = many1 $ noneOf "-+"
- extMod = do
- polarity <- oneOf "-+"
- name <- many $ noneOf "-+"
- ext <- case readExtension name of
- Just n -> return n
- Nothing -> unexpected $ "unknown extension: " ++ name
- return $ \(extsToEnable, extsToDisable) ->
- case polarity of
- '+' -> (ext : extsToEnable, extsToDisable)
- _ -> (extsToEnable, ext : extsToDisable)
diff --git a/src/Text/Pandoc/Format.hs b/src/Text/Pandoc/Format.hs
new file mode 100644
index 000000000..ae2407bc2
--- /dev/null
+++ b/src/Text/Pandoc/Format.hs
@@ -0,0 +1,125 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+ Module : Text.Pandoc.Format
+ Copyright : © 2022 Albert Krewinkel
+ License : GPL-2.0-or-later
+ Maintainer : Albert Krewinkel <pandoc@tarleb.com>
+
+Handling of format specifiers for input and output.
+-}
+module Text.Pandoc.Format
+ ( FlavoredFormat (..)
+ , ExtensionsConfig (..)
+ , ExtensionsDiff (..)
+ , parseFlavoredFormat
+ , applyExtensionsDiff
+ , getExtensionsConfig
+ ) where
+
+import Control.Monad.Except (throwError)
+import Data.List (foldl')
+import System.FilePath (splitExtension)
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Error (PandocError (..))
+import Text.Pandoc.Extensions
+ ( Extension
+ , Extensions
+ , disableExtension
+ , enableExtension
+ , extensionEnabled
+ , getAllExtensions
+ , getDefaultExtensions
+ , readExtension
+ )
+import Text.Parsec
+import qualified Data.Text as T
+
+-- | Format specifier with the format's name and the lists of extensions
+-- to be enabled or disabled.
+data FlavoredFormat = FlavoredFormat
+ { formatName :: T.Text
+ , formatExtsDiff :: ExtensionsDiff
+ }
+
+-- | Changes to a set of extensions, i.e., list of extensions to be
+-- enabled or disabled.
+data ExtensionsDiff = ExtensionsDiff
+ { extsToEnable :: [Extension]
+ , extsToDisable :: [Extension]
+ }
+
+-- | Describes the properties of a format.
+data ExtensionsConfig = ExtensionsConfig
+ { extsDefault :: Extensions -- ^ Extensions enabled by default
+ , extsSupported :: Extensions -- ^ Extensions that can be enabled or disabled.
+ }
+
+-- | Returns the extensions configuration of a format.
+getExtensionsConfig :: T.Text -> ExtensionsConfig
+getExtensionsConfig fmt = ExtensionsConfig
+ { extsDefault = getDefaultExtensions fmt
+ , extsSupported = getAllExtensions fmt
+ }
+
+-- | Apply the extension changes in the format spec to the extensions
+-- given in the format's extensions configuration. Throws an error in
+-- case of an unknown or unsupported extension.
+applyExtensionsDiff :: PandocMonad m
+ => ExtensionsConfig
+ -> FlavoredFormat
+ -> m Extensions
+applyExtensionsDiff extConf (FlavoredFormat fname extsDiff) = do
+ let unsupported =
+ filter (\ext -> not $ extensionEnabled ext (extsSupported extConf))
+ (extsToEnable extsDiff ++ extsToDisable extsDiff)
+ case unsupported of
+ ext:_ -> throwError $ PandocUnsupportedExtensionError
+ (T.drop 4 . T.pack $ show ext) fname
+ [] -> let enabled = foldr enableExtension
+ (extsDefault extConf)
+ (extsToEnable extsDiff)
+ in pure $ foldr disableExtension enabled (extsToDisable extsDiff)
+
+-- | Parse a format-specifying string into a markup format and the
+-- change set to the format's extensions. Throws an error if the spec
+-- cannot be parsed or contains an unknown extension.
+parseFlavoredFormat :: PandocMonad m
+ => T.Text
+ -> m FlavoredFormat
+parseFlavoredFormat spec =
+ -- Paths like `latex-foo-bar.lua` or `latex-smart-citations.lua`
+ -- should be parsed as the format name. The `-` (or `+`) in the
+ -- filename would confuse the extensions parser, so, if `spec` looks
+ -- like a filename, the file's basename is split off into the prefix.
+ -- Only the remaining part is parsed, and the prefix is appended back
+ -- to the format after parsing.
+ case parse (fixSourcePos *> formatSpec) "" spec' of
+ Right (fname, extsDiff) -> pure (FlavoredFormat (prefix <> fname) extsDiff)
+ Left err -> throwError $ PandocFormatError spec (T.pack $ show err)
+ where
+ fixSourcePos = do
+ pos <- statePos <$> getParserState
+ setPosition (incSourceColumn pos (T.length prefix))
+ formatSpec = do
+ name <- parseFormatName
+ extsDiff <- pExtensionsDiff
+ return ( T.pack name, extsDiff )
+ parseFormatName = many1 $ noneOf "-+"
+ (prefix, spec') = case splitExtension (T.unpack spec) of
+ (_, "") -> ("", T.toLower $ spec) -- no extension
+ (p,s) -> (T.pack p, T.pack s)
+
+pExtensionsDiff :: Stream s m Char => ParsecT s u m ExtensionsDiff
+pExtensionsDiff = foldl' (flip ($)) (ExtensionsDiff [] []) <$> many extMod
+ where
+ extMod = do
+ polarity <- oneOf "-+"
+ name <- many $ noneOf "-+"
+ ext <- case readExtension name of
+ Just n -> return n
+ Nothing -> unexpected $ "unknown extension: " ++ name
+ return $ \extsDiff ->
+ case polarity of
+ '+' -> extsDiff{extsToEnable = (ext : extsToEnable extsDiff)}
+ _ -> extsDiff{extsToDisable = (ext : extsToDisable extsDiff)}
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index eef455627..ab660c5b2 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -1,7 +1,8 @@
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TupleSections #-}
{- |
Module : Text.Pandoc.Readers
Copyright : Copyright (C) 2006-2022 John MacFarlane
@@ -64,17 +65,16 @@ module Text.Pandoc.Readers
, getDefaultExtensions
) where
-import Control.Monad (unless)
import Control.Monad.Except (throwError)
import Data.Aeson
import qualified Data.ByteString.Lazy as BL
import Data.Text (Text)
import qualified Data.Text as T
-import Text.Pandoc.Shared (tshow)
import Text.Pandoc.Class
import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Extensions
+import qualified Text.Pandoc.Format as Format
import Text.Pandoc.Options
import Text.Pandoc.Readers.CommonMark
import Text.Pandoc.Readers.Markdown
@@ -162,28 +162,15 @@ readers = [("native" , TextReader readNative)
,("rtf" , TextReader readRTF)
]
--- | Retrieve reader, extensions based on formatSpec (format+extensions).
+-- | Retrieve reader, extensions based on format spec (format+extensions).
getReader :: PandocMonad m => Text -> m (Reader m, Extensions)
-getReader s =
- case parseFormatSpec s of
- Left e -> throwError $ PandocFormatError s (tshow e)
- Right (readerName, extsToEnable, extsToDisable) ->
- case lookup readerName readers of
- Nothing -> throwError $ PandocUnknownReaderError
- readerName
- Just r -> do
- let allExts = getAllExtensions readerName
- let exts = foldr disableExtension
- (foldr enableExtension
- (getDefaultExtensions readerName)
- extsToEnable) extsToDisable
- mapM_ (\ext ->
- unless (extensionEnabled ext allExts) $
- throwError $
- PandocUnsupportedExtensionError
- (T.drop 4 $ T.pack $ show ext) readerName)
- (extsToEnable ++ extsToDisable)
- return (r, exts)
+getReader s = do
+ spec <- Format.parseFlavoredFormat s
+ let readerName = Format.formatName spec
+ case lookup readerName readers of
+ Nothing -> throwError $ PandocUnknownReaderError readerName
+ Just r -> (r,) <$>
+ Format.applyExtensionsDiff (Format.getExtensionsConfig readerName) spec
-- | Read pandoc document from JSON format.
readJSON :: (PandocMonad m, ToSources a)
diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs
index 385f1352f..fc2b28e8f 100644
--- a/src/Text/Pandoc/Writers.hs
+++ b/src/Text/Pandoc/Writers.hs
@@ -1,7 +1,8 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
-{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TupleSections #-}
{- |
Module : Text.Pandoc
Copyright : Copyright (C) 2006-2022 John MacFarlane
@@ -76,14 +77,12 @@ module Text.Pandoc.Writers
) where
import Control.Monad.Except (throwError)
-import Control.Monad (unless)
import Data.Aeson
import qualified Data.ByteString.Lazy as BL
import Data.Text (Text)
-import qualified Data.Text as T
-import Text.Pandoc.Shared (tshow)
import Text.Pandoc.Class
import Text.Pandoc.Definition
+import qualified Text.Pandoc.Format as Format
import Text.Pandoc.Options
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Error
@@ -194,27 +193,13 @@ writers = [
-- | Retrieve writer, extensions based on formatSpec (format+extensions).
getWriter :: PandocMonad m => Text -> m (Writer m, Extensions)
-getWriter s =
- case parseFormatSpec s of
- Left e -> throwError $ PandocFormatError s (tshow e)
- Right (writerName, extsToEnable, extsToDisable) ->
- case lookup writerName writers of
- Nothing -> throwError $
- PandocUnknownWriterError writerName
- Just w -> do
- let allExts = getAllExtensions writerName
- let exts = foldr disableExtension
- (foldr enableExtension
- (getDefaultExtensions writerName)
- extsToEnable) extsToDisable
- mapM_ (\ext ->
- unless (extensionEnabled ext allExts) $
- throwError $
- PandocUnsupportedExtensionError
- (T.drop 4 $ T.pack $ show ext) writerName)
- (extsToEnable ++ extsToDisable)
- return (w, exts)
-
+getWriter s = do
+ spec <- Format.parseFlavoredFormat s
+ let writerName = Format.formatName spec
+ case lookup writerName writers of
+ Nothing -> throwError $ PandocUnknownWriterError writerName
+ Just w -> (w,) <$>
+ Format.applyExtensionsDiff (Format.getExtensionsConfig writerName) spec
writeJSON :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeJSON _ = return . UTF8.toText . BL.toStrict . encode