diff options
Diffstat (limited to 'server')
| -rw-r--r-- | server/Main.hs | 54 | ||||
| -rw-r--r-- | server/PandocServer.hs | 301 | ||||
| -rw-r--r-- | server/pandoc-server.md | 325 |
3 files changed, 0 insertions, 680 deletions
diff --git a/server/Main.hs b/server/Main.hs deleted file mode 100644 index 531a0b0a0..000000000 --- a/server/Main.hs +++ /dev/null @@ -1,54 +0,0 @@ -module Main where - -import PandocServer (app) -import Text.Pandoc (pandocVersion) -import Control.Monad (when) -import qualified Network.Wai.Handler.CGI as CGI -import qualified Network.Wai.Handler.Warp as Warp -import Network.Wai.Middleware.Timeout (timeout) -import System.Environment (getProgName) -import Options.Applicative -import System.Exit (exitWith, ExitCode(ExitSuccess)) -import Data.Text as T - -data Opts = Opts - { optPort :: Warp.Port, - optTimeout :: Int, -- seconds - optVersion :: Bool } - -options :: Parser Opts -options = Opts - <$> option auto - ( long "port" - <> value 3030 - <> metavar "PORT" - <> help "Port to serve on" ) - <*> option auto - ( long "timeout" - <> value 2 - <> metavar "SECONDS" - <> help "Seconds timeout" ) - <*> flag False True - ( long "version" - <> help "Print version" ) - -main :: IO () -main = do - progname <- getProgName - let optspec = info (options <**> helper) - ( fullDesc - <> progDesc "Run a pandoc server" - <> header "pandoc-server - text conversion server" ) - opts <- execParser optspec - - when (optVersion opts) $ do - putStrLn $ progname <> " " <> T.unpack pandocVersion - exitWith ExitSuccess - - let port = optPort opts - let app' = timeout (optTimeout opts) app - if progname == "pandoc-server.cgi" - then -- operate as a CGI script - CGI.run app' - else -- operate as a persistent server - Warp.run port app' diff --git a/server/PandocServer.hs b/server/PandocServer.hs deleted file mode 100644 index 295412c6d..000000000 --- a/server/PandocServer.hs +++ /dev/null @@ -1,301 +0,0 @@ -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE TemplateHaskell #-} -{-# LANGUAGE TypeOperators #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE OverloadedStrings #-} -module PandocServer - ( app - , Params(..) - ) where - -import Data.Aeson -import Data.Aeson.TH -import Network.Wai -import Servant -import Text.DocTemplates as DocTemplates -import Text.Pandoc -import Text.Pandoc.Citeproc (processCitations) -import Text.Pandoc.Highlighting (lookupHighlightingStyle) -import qualified Text.Pandoc.UTF8 as UTF8 -import Data.Text (Text) -import qualified Data.Text as T -import qualified Data.Text.Lazy as TL -import qualified Data.Text.Lazy.Encoding as TLE -import Data.Maybe (fromMaybe) -import Data.Char (isAlphaNum) -import qualified Data.ByteString as BS -import qualified Data.ByteString.Lazy as BL -import Data.ByteString.Base64 (decodeBase64, encodeBase64) -import Data.Default -import Data.Map (Map) -import Data.Set (Set) -import Skylighting (defaultSyntaxMap) - -newtype Blob = Blob BL.ByteString - deriving (Show, Eq) - -instance ToJSON Blob where - toJSON (Blob bs) = toJSON (encodeBase64 $ BL.toStrict bs) - -instance FromJSON Blob where - parseJSON = withText "Blob" $ \t -> do - let inp = UTF8.fromText t - case decodeBase64 inp of - Right bs -> return $ Blob $ BL.fromStrict bs - Left _ -> -- treat as regular text - return $ Blob $ BL.fromStrict inp - --- This is the data to be supplied by the JSON payload --- of requests. Maybe values may be omitted and will be --- given default values. -data Params = Params - { text :: Text - , from :: Maybe Text - , to :: Maybe Text - , wrapText :: Maybe WrapOption - , columns :: Maybe Int - , standalone :: Maybe Bool - , template :: Maybe Text - , tabStop :: Maybe Int - , indentedCodeClasses :: Maybe [Text] - , abbreviations :: Maybe (Set Text) - , defaultImageExtension :: Maybe Text - , trackChanges :: Maybe TrackChanges - , stripComments :: Maybe Bool - , citeproc :: Maybe Bool - , variables :: Maybe (DocTemplates.Context Text) - , tableOfContents :: Maybe Bool - , incremental :: Maybe Bool - , htmlMathMethod :: Maybe HTMLMathMethod - , numberSections :: Maybe Bool - , numberOffset :: Maybe [Int] - , sectionDivs :: Maybe Bool - , referenceLinks :: Maybe Bool - , dpi :: Maybe Int - , emailObfuscation :: Maybe ObfuscationMethod - , identifierPrefix :: Maybe Text - , citeMethod :: Maybe CiteMethod - , htmlQTags :: Maybe Bool - , slideLevel :: Maybe Int - , topLevelDivision :: Maybe TopLevelDivision - , listings :: Maybe Bool - , highlightStyle :: Maybe Text - , setextHeaders :: Maybe Bool - , epubSubdirectory :: Maybe Text - , epubFonts :: Maybe [FilePath] - , epubMetadata :: Maybe Text - , epubChapterLevel :: Maybe Int - , tocDepth :: Maybe Int - , referenceDoc :: Maybe FilePath - , referenceLocation :: Maybe ReferenceLocation - , preferAscii :: Maybe Bool - , files :: Maybe (Map FilePath Blob) - } deriving (Show) - -instance Default Params where - def = Params - { text = "" - , from = Nothing - , to = Nothing - , wrapText = Nothing - , columns = Nothing - , standalone = Nothing - , template = Nothing - , tabStop = Nothing - , indentedCodeClasses = Nothing - , abbreviations = Nothing - , defaultImageExtension = Nothing - , trackChanges = Nothing - , stripComments = Nothing - , citeproc = Nothing - , variables = Nothing - , tableOfContents = Nothing - , incremental = Nothing - , htmlMathMethod = Nothing - , numberSections = Nothing - , numberOffset = Nothing - , sectionDivs = Nothing - , referenceLinks = Nothing - , dpi = Nothing - , emailObfuscation = Nothing - , identifierPrefix = Nothing - , citeMethod = Nothing - , htmlQTags = Nothing - , slideLevel = Nothing - , topLevelDivision = Nothing - , listings = Nothing - , highlightStyle = Nothing - , setextHeaders = Nothing - , epubSubdirectory = Nothing - , epubMetadata = Nothing - , epubChapterLevel = Nothing - , epubFonts = Nothing - , tocDepth = Nothing - , referenceDoc = Nothing - , referenceLocation = Nothing - , preferAscii = Nothing - , files = Nothing - } - -- TODO: - -- shiftHeadingLevelBy - -- metadata - -- selfContained - -- embedResources - -- epubCoverImage - -- stripEmptyParagraphs - -- titlePrefix - -- ipynbOutput - -- eol - -- csl - -- bibliography - -- citationAbbreviations - --- Automatically derive code to convert to/from JSON. -$(deriveJSON defaultOptions ''Params) - --- This is the API. The "/convert" endpoint takes a request body --- consisting of a JSON-encoded Params structure and responds to --- Get requests with either plain text or JSON, depending on the --- Accept header. -type API = - ReqBody '[JSON] Params :> Post '[PlainText, JSON] Text - :<|> - ReqBody '[JSON] Params :> Post '[OctetStream] BS.ByteString - :<|> - "batch" :> ReqBody '[JSON] [Params] :> Post '[JSON] [Text] - :<|> - "babelmark" :> QueryParam' '[Required] "text" Text :> QueryParam "from" Text :> QueryParam "to" Text :> QueryFlag "standalone" :> Get '[JSON] Value - :<|> - "version" :> Get '[PlainText, JSON] Text - -app :: Application -app = serve api server - -api :: Proxy API -api = Proxy - -server :: Server API -server = convert - :<|> convertBytes - :<|> mapM convert - :<|> babelmark -- for babelmark which expects {"html": "", "version": ""} - :<|> pure pandocVersion - where - babelmark text' from' to' standalone' = do - res <- convert def{ text = text', - from = from', to = to', - standalone = Just standalone' } - return $ toJSON $ object [ "html" .= res, "version" .= pandocVersion ] - - -- We use runPure for the pandoc conversions, which ensures that - -- they will do no IO. This makes the server safe to use. However, - -- it will mean that features requiring IO, like RST includes, will not work. - -- Changing this to - -- handleErr =<< liftIO (runIO (convert' params)) - -- will allow the IO operations. - convert params = handleErr $ - runPure (convert' id (encodeBase64 . BL.toStrict) params) - - convertBytes params = handleErr $ - runPure (convert' UTF8.fromText BL.toStrict params) - - convert' :: PandocMonad m - => (Text -> a) -> (BL.ByteString -> a) -> Params -> m a - convert' textHandler bsHandler params = do - let readerFormat = fromMaybe "markdown" $ from params - let writerFormat = fromMaybe "html" $ to params - (readerSpec, readerExts) <- getReader readerFormat - (writerSpec, writerExts) <- getWriter writerFormat - let binaryOutput = case writerSpec of - ByteStringWriter{} -> True - _ -> False - let isStandalone = fromMaybe binaryOutput (standalone params) - let toformat = T.toLower $ T.takeWhile isAlphaNum $ writerFormat - hlStyle <- traverse (lookupHighlightingStyle . T.unpack) - $ highlightStyle params - mbTemplate <- if isStandalone - then case template params of - Nothing -> Just <$> - compileDefaultTemplate toformat - Just t -> Just <$> - compileCustomTemplate toformat t - else return Nothing - let readeropts = def{ readerExtensions = readerExts - , readerStandalone = isStandalone - , readerTabStop = fromMaybe 4 (tabStop params) - , readerIndentedCodeClasses = fromMaybe [] - (indentedCodeClasses params) - , readerAbbreviations = - fromMaybe mempty (abbreviations params) - , readerDefaultImageExtension = - fromMaybe mempty (defaultImageExtension params) - , readerTrackChanges = - fromMaybe AcceptChanges (trackChanges params) - , readerStripComments = - fromMaybe False (stripComments params) - } - let writeropts = - def{ writerExtensions = writerExts - , writerTabStop = fromMaybe 4 (tabStop params) - , writerWrapText = fromMaybe WrapAuto (wrapText params) - , writerColumns = fromMaybe 72 (columns params) - , writerTemplate = mbTemplate - , writerSyntaxMap = defaultSyntaxMap - , writerVariables = fromMaybe mempty (variables params) - , writerTableOfContents = fromMaybe False (tableOfContents params) - , writerIncremental = fromMaybe False (incremental params) - , writerHTMLMathMethod = - fromMaybe PlainMath (htmlMathMethod params) - , writerNumberSections = fromMaybe False (numberSections params) - , writerNumberOffset = fromMaybe [] (numberOffset params) - , writerSectionDivs = fromMaybe False (sectionDivs params) - , writerReferenceLinks = fromMaybe False (referenceLinks params) - , writerDpi = fromMaybe 96 (dpi params) - , writerEmailObfuscation = - fromMaybe NoObfuscation (emailObfuscation params) - , writerIdentifierPrefix = - fromMaybe mempty (identifierPrefix params) - , writerCiteMethod = fromMaybe Citeproc (citeMethod params) - , writerHtmlQTags = fromMaybe False (htmlQTags params) - , writerSlideLevel = slideLevel params - , writerTopLevelDivision = - fromMaybe TopLevelDefault (topLevelDivision params) - , writerListings = fromMaybe False (listings params) - , writerHighlightStyle = hlStyle - , writerSetextHeaders = fromMaybe False (setextHeaders params) - , writerEpubSubdirectory = - fromMaybe "EPUB" (epubSubdirectory params) - , writerEpubMetadata = epubMetadata params - , writerEpubFonts = fromMaybe [] (epubFonts params) - , writerEpubChapterLevel = fromMaybe 1 (epubChapterLevel params) - , writerTOCDepth = fromMaybe 3 (tocDepth params) - , writerReferenceDoc = referenceDoc params - , writerReferenceLocation = - fromMaybe EndOfDocument (referenceLocation params) - , writerPreferAscii = fromMaybe False (preferAscii params) - } - let reader = case readerSpec of - TextReader r -> r readeropts - ByteStringReader r -> \t -> do - let eitherbs = decodeBase64 $ UTF8.fromText t - case eitherbs of - Left errt -> throwError $ PandocSomeError errt - Right bs -> r readeropts $ BL.fromStrict bs - let writer = case writerSpec of - TextWriter w -> fmap textHandler . w writeropts - ByteStringWriter w -> fmap bsHandler . w writeropts - reader (text params) >>= - (if citeproc params == Just True - then processCitations - else return) >>= - writer - - handleErr (Right t) = return t - handleErr (Left err) = throwError $ - err500 { errBody = TLE.encodeUtf8 $ TL.fromStrict $ renderError err } - - compileCustomTemplate toformat t = do - res <- runWithPartials $ compileTemplate ("custom." <> T.unpack toformat) t - case res of - Left e -> throwError $ PandocTemplateError (T.pack e) - Right tpl -> return tpl diff --git a/server/pandoc-server.md b/server/pandoc-server.md deleted file mode 100644 index e22063fa8..000000000 --- a/server/pandoc-server.md +++ /dev/null @@ -1,325 +0,0 @@ ---- -title: pandoc-server -section: 1 -date: August 15, 2022 ---- - -# SYNOPSIS - -`pandoc-server` [*options*] - -# DESCRIPTION - -`pandoc-server` is a web server that can perform pandoc -conversions. It can be used either as a running server -or as a CGI program. To use `pandoc-server` as a CGI -program, rename it (or symlink it) as `pandoc-server.cgi`. -(Note: if you symlink it, you may need to adjust your -webserver's configuration in order to allow it to follow -symlinks for the CGI script.) - -All pandoc functions are run in the PandocPure monad, which -ensures that they can do no I/O operations on the server. -This should provide a high degree of security. It does, -however, impose certain limitations: - -- PDFs cannot be produced. - -- Filters are not supported. - -- Resources cannot be fetched via HTTP. - -- Any images, include files, or other resources needed for - the document conversion must be explicitly included in - the request, via the `files` field (see below under API). - -# OPTIONS - -`--port NUM` -: HTTP port on which to run the server. Default: 3030. - -`--timeout SECONDS` -: Timeout in seconds, after which a conversion is killed. Default: 2. - -`--help` -: Print this help. - -`--version` -: Print version. - -# API - -## Root endpoint - -The root (`/`) endpoint accepts only POST requests. -It returns a converted document in one of the following -formats, depending on Accept headers: - -- `text/plain` -- `application/json` -- `application/octet-stream` - -If the result is a binary format (e.g., `epub` or `docx`) -and the content is returned as plain text or JSON, the -binary will be base64 encoded. - -The body of the POST request should be a JSON object, -with the following fields. Only the `text` field is -required; all of the others can be omitted for default -values. When there are several string alternatives, -the first one given is the default. - -`text` (string) - -: The document to be converted. Note: - if the `from` format is binary (e.g., `epub` or `docx`), then - `text` should be a base64 encoding of the document. - -`from` (string, default `"markdown"`) - -: The input format, possibly with extensions, just as it is - specified on the pandoc command line. - -`to` (string, default `"html"`) - -: The output format, possibly with extensions, just as it is - specified on the pandoc command line. - -`wrapText` (`"auto"|"preserve"|"none"`) - -: Text wrapping option: either `"auto"` (automatic - hard-wrapping to fit within a column width), `"preserve"` - (insert newlines where they are present in the source), - or `"none"` (don't insert any unnecessary newlines at all). - -`columns` (integer, default 72) - -: Column width (affects text wrapping and calculation of - table column widths in plain text formats) - -`standalone` (boolean, default false) - -: If true, causes a standalone document to be produced, using - the default template or the custom template specified using - `template`. If false, a fragment will be produced. - -`template` (string) - -: String contents of a document template (see Templates in - `pandoc(1)` for the format). - -`tabStop` (integer, default 4) - -: Tab stop (spaces per tab). - -`indentedCodeClasses` (array of strings) - -: List of classes to be applied to indented Markdown code blocks. - -`abbreviations` (array of strings) - -: List of strings to be regarded as abbreviations when - parsing Markdown. See `--abbreviations` in `pandoc(1)` for - details. - -`defaultImageExtension` (string) - -: Extension to be applied to image sources that lack extensions - (e.g. `".jpg"`). - -`trackChanges` (`"accept"|"reject"|"all"`) - -: Specifies what to do with insertions, deletions, and - comments produced by the MS Word "Track Changes" feature. Only - affects docx input. - -`stripComments` (boolean, default false) - -: Causes HTML comments to be stripped in Markdown or Textile - source, instead of being passed through to the output format. - -`citeproc` (boolean, default false) - -: Causes citations to be processed using citeproc. See - Citations in `pandoc(1)` for details. - -`citeMethod` (`"citeproc"|"natbib"|"biblatex"`) - -: Determines how citations are formatted in LaTeX output. - -`tableOfContents` (boolean, default false) - -: Include a table of contents (in supported formats). - -`tocDepth` (integer, default 3) - -: Depth of sections to include in the table of contents. - -`numberSections` (boolean, default false) - -: Automatically number sections (in supported formats). - -`numberOffset` (array of integers) - -: Offsets to be added to each component of the section number. - For example, `[1]` will cause the first section to be - numbered "2" and the first subsection "2.1"; `[0,1]` will - cause the first section to be numbered "1" and the first - subsection "1.2." - -`identifierPrefix` (string) - -: Prefix to be added to all automatically-generated identifiers. - -`sectionDivs` (boolean, default false) - -: Arrange the document into a hierarchy of nested sections - based on the headings. - -`htmlQTags` (boolean, default false) - -: Use `<q>` elements in HTML instead of literal quotation marks. - -`listings` (boolean, default false) - -: Use the `listings` package to format code in LaTeX output. - -`referenceLinks` (boolean, default false) - -: Create reference links rather than inline links in Markdown output. - -`setextHeaders` (boolean, default false) - -: Use Setext (underlined) headings instead of ATX (`#`-prefixed) - in Markdown output. - -`preferAscii` (boolean, default false) - -: Use entities and escapes when possible to avoid non-ASCII - characters in the output. - -`referenceLocation` (`"document"|"section"|"block"`) - -: Determines whether link references and footnotes are placed - at the end of the document, the end of the section, or the - end of the block (e.g. paragraph), in - certain formats. (See `pandoc(1)` under `--reference-location`.) - - -`topLevelDivision` (`"default"|"part"|"chapter"|"section"`) - -: Determines how top-level headings are interpreted in - LaTeX, ConTeXt, DocBook, and TEI. The `"default"` value - tries to choose the best interpretation based on heuristics. - -`emailObfuscation` (`"none"|"references"|"javascript"`) - -: Determines how email addresses are obfuscated in HTML. - -`htmlMathMethod` (`"plain"|"webtex"|"gladtex"|"mathml"|"mathjax"|"katex"`) - -: Determines how math is represented in HTML. - -`variables` (JSON mapping) - -: Variables to be interpolated in the template. (See Templates - in `pandoc(1)`.) - -`dpi` (integer, default 96) - -: Dots-per-inch to use for conversions between pixels and - other measurements (for image sizes). - -`incremental` (boolean, default false) - -: If true, lists appear incrementally by default in slide shows. - -`slideLevel` (integer) - -: Heading level that deterimes slide divisions in slide shows. - The default is to pick the highest heading level under which - there is body text. - -`highlightStyle` (string, default `"pygments"`) - -: Specify the style to use for syntax highlighting of code. - Standard styles are `"pygments"` (the default), `"kate"`, - `"monochrome"`, `"breezeDark"`, `"espresso"`, `"zenburn"`, - `"haddock"`, and `"tango"`. Alternatively, the path of - a `.theme` with a KDE syntax theme may be used (in this - case, the relevant file contents must also be included - in `files`, see below). - -`epubMetadata` (string) - -: Dublin core XML elements to be used for EPUB metadata. - -`epubChapterLevel` (integer, default 1) - -: Heading level at which chapter splitting occurs in EPUBs. - -`epubSubdirectory` (string, default "EPUB") - -: Name of content subdirectory in the EPUB container. - -`epubFonts` (array of file paths) - -: Fonts to include in the EPUB. The fonts themselves must be - included in `files` (see below). - -`referenceDoc` (file path) - -: Reference doc to use in creating `docx` or `odt` or `pptx`. - See `pandoc(1)` under `--reference-doc` for details. - -`files` (JSON mapping of file paths to base64-encoded strings) - -: Any files needed for the conversion, including images - referred to in the document source, should be included here. - Binary data must be base64-encoded. Textual data may be - left as it is, unless it is *also* valid base 64 data, - in which case it will be interpreted that way. - - -## `/batch` endpoint - -The `/batch` endpoint behaves like the root endpoint, -except for these two points: - -- It accepts a JSON array, each element of which is a JSON - object like the one expected by the root endpoint. -- It returns a JSON array of results. (It will not return - plain text or octet-stream, like the root endpoint.) - -This endpoint can be used to convert a sequence of small -snippets in one request. - -## `/version` endpoint - -The `/version` endpoint accepts a GET request and returns -the pandoc version as a plain or JSON-encoded string, -depending on Accept headers. - -## `/babelmark` endpoint - -The `/babelmark` endpoint accepts a GET request with -the following query parameters: - -- `text` (required string) -- `from` (optional string, default is `"markdown"`) -- `to` (optional string, default is `"html"`) -- `standalone` (optional boolean, default is `false`) - -It returns a JSON object with fields `html` and `version`. -This endpoint is designed to support the -[Babelmark]()https://babelmark.github.io website. - -# AUTHORS - -Copyright 2022 John MacFarlane (jgm@berkeley.edu). Released -under the [GPL], version 2 or greater. This software carries no -warranty of any kind. (See COPYRIGHT for full copyright and -warranty notices.) - -[GPL]: https://www.gnu.org/copyleft/gpl.html "GNU General Public License" - |
