summaryrefslogtreecommitdiff
path: root/server
diff options
context:
space:
mode:
Diffstat (limited to 'server')
-rw-r--r--server/Main.hs54
-rw-r--r--server/PandocServer.hs301
-rw-r--r--server/pandoc-server.md325
3 files changed, 0 insertions, 680 deletions
diff --git a/server/Main.hs b/server/Main.hs
deleted file mode 100644
index 531a0b0a0..000000000
--- a/server/Main.hs
+++ /dev/null
@@ -1,54 +0,0 @@
-module Main where
-
-import PandocServer (app)
-import Text.Pandoc (pandocVersion)
-import Control.Monad (when)
-import qualified Network.Wai.Handler.CGI as CGI
-import qualified Network.Wai.Handler.Warp as Warp
-import Network.Wai.Middleware.Timeout (timeout)
-import System.Environment (getProgName)
-import Options.Applicative
-import System.Exit (exitWith, ExitCode(ExitSuccess))
-import Data.Text as T
-
-data Opts = Opts
- { optPort :: Warp.Port,
- optTimeout :: Int, -- seconds
- optVersion :: Bool }
-
-options :: Parser Opts
-options = Opts
- <$> option auto
- ( long "port"
- <> value 3030
- <> metavar "PORT"
- <> help "Port to serve on" )
- <*> option auto
- ( long "timeout"
- <> value 2
- <> metavar "SECONDS"
- <> help "Seconds timeout" )
- <*> flag False True
- ( long "version"
- <> help "Print version" )
-
-main :: IO ()
-main = do
- progname <- getProgName
- let optspec = info (options <**> helper)
- ( fullDesc
- <> progDesc "Run a pandoc server"
- <> header "pandoc-server - text conversion server" )
- opts <- execParser optspec
-
- when (optVersion opts) $ do
- putStrLn $ progname <> " " <> T.unpack pandocVersion
- exitWith ExitSuccess
-
- let port = optPort opts
- let app' = timeout (optTimeout opts) app
- if progname == "pandoc-server.cgi"
- then -- operate as a CGI script
- CGI.run app'
- else -- operate as a persistent server
- Warp.run port app'
diff --git a/server/PandocServer.hs b/server/PandocServer.hs
deleted file mode 100644
index 295412c6d..000000000
--- a/server/PandocServer.hs
+++ /dev/null
@@ -1,301 +0,0 @@
-{-# LANGUAGE DataKinds #-}
-{-# LANGUAGE TemplateHaskell #-}
-{-# LANGUAGE TypeOperators #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE OverloadedStrings #-}
-module PandocServer
- ( app
- , Params(..)
- ) where
-
-import Data.Aeson
-import Data.Aeson.TH
-import Network.Wai
-import Servant
-import Text.DocTemplates as DocTemplates
-import Text.Pandoc
-import Text.Pandoc.Citeproc (processCitations)
-import Text.Pandoc.Highlighting (lookupHighlightingStyle)
-import qualified Text.Pandoc.UTF8 as UTF8
-import Data.Text (Text)
-import qualified Data.Text as T
-import qualified Data.Text.Lazy as TL
-import qualified Data.Text.Lazy.Encoding as TLE
-import Data.Maybe (fromMaybe)
-import Data.Char (isAlphaNum)
-import qualified Data.ByteString as BS
-import qualified Data.ByteString.Lazy as BL
-import Data.ByteString.Base64 (decodeBase64, encodeBase64)
-import Data.Default
-import Data.Map (Map)
-import Data.Set (Set)
-import Skylighting (defaultSyntaxMap)
-
-newtype Blob = Blob BL.ByteString
- deriving (Show, Eq)
-
-instance ToJSON Blob where
- toJSON (Blob bs) = toJSON (encodeBase64 $ BL.toStrict bs)
-
-instance FromJSON Blob where
- parseJSON = withText "Blob" $ \t -> do
- let inp = UTF8.fromText t
- case decodeBase64 inp of
- Right bs -> return $ Blob $ BL.fromStrict bs
- Left _ -> -- treat as regular text
- return $ Blob $ BL.fromStrict inp
-
--- This is the data to be supplied by the JSON payload
--- of requests. Maybe values may be omitted and will be
--- given default values.
-data Params = Params
- { text :: Text
- , from :: Maybe Text
- , to :: Maybe Text
- , wrapText :: Maybe WrapOption
- , columns :: Maybe Int
- , standalone :: Maybe Bool
- , template :: Maybe Text
- , tabStop :: Maybe Int
- , indentedCodeClasses :: Maybe [Text]
- , abbreviations :: Maybe (Set Text)
- , defaultImageExtension :: Maybe Text
- , trackChanges :: Maybe TrackChanges
- , stripComments :: Maybe Bool
- , citeproc :: Maybe Bool
- , variables :: Maybe (DocTemplates.Context Text)
- , tableOfContents :: Maybe Bool
- , incremental :: Maybe Bool
- , htmlMathMethod :: Maybe HTMLMathMethod
- , numberSections :: Maybe Bool
- , numberOffset :: Maybe [Int]
- , sectionDivs :: Maybe Bool
- , referenceLinks :: Maybe Bool
- , dpi :: Maybe Int
- , emailObfuscation :: Maybe ObfuscationMethod
- , identifierPrefix :: Maybe Text
- , citeMethod :: Maybe CiteMethod
- , htmlQTags :: Maybe Bool
- , slideLevel :: Maybe Int
- , topLevelDivision :: Maybe TopLevelDivision
- , listings :: Maybe Bool
- , highlightStyle :: Maybe Text
- , setextHeaders :: Maybe Bool
- , epubSubdirectory :: Maybe Text
- , epubFonts :: Maybe [FilePath]
- , epubMetadata :: Maybe Text
- , epubChapterLevel :: Maybe Int
- , tocDepth :: Maybe Int
- , referenceDoc :: Maybe FilePath
- , referenceLocation :: Maybe ReferenceLocation
- , preferAscii :: Maybe Bool
- , files :: Maybe (Map FilePath Blob)
- } deriving (Show)
-
-instance Default Params where
- def = Params
- { text = ""
- , from = Nothing
- , to = Nothing
- , wrapText = Nothing
- , columns = Nothing
- , standalone = Nothing
- , template = Nothing
- , tabStop = Nothing
- , indentedCodeClasses = Nothing
- , abbreviations = Nothing
- , defaultImageExtension = Nothing
- , trackChanges = Nothing
- , stripComments = Nothing
- , citeproc = Nothing
- , variables = Nothing
- , tableOfContents = Nothing
- , incremental = Nothing
- , htmlMathMethod = Nothing
- , numberSections = Nothing
- , numberOffset = Nothing
- , sectionDivs = Nothing
- , referenceLinks = Nothing
- , dpi = Nothing
- , emailObfuscation = Nothing
- , identifierPrefix = Nothing
- , citeMethod = Nothing
- , htmlQTags = Nothing
- , slideLevel = Nothing
- , topLevelDivision = Nothing
- , listings = Nothing
- , highlightStyle = Nothing
- , setextHeaders = Nothing
- , epubSubdirectory = Nothing
- , epubMetadata = Nothing
- , epubChapterLevel = Nothing
- , epubFonts = Nothing
- , tocDepth = Nothing
- , referenceDoc = Nothing
- , referenceLocation = Nothing
- , preferAscii = Nothing
- , files = Nothing
- }
- -- TODO:
- -- shiftHeadingLevelBy
- -- metadata
- -- selfContained
- -- embedResources
- -- epubCoverImage
- -- stripEmptyParagraphs
- -- titlePrefix
- -- ipynbOutput
- -- eol
- -- csl
- -- bibliography
- -- citationAbbreviations
-
--- Automatically derive code to convert to/from JSON.
-$(deriveJSON defaultOptions ''Params)
-
--- This is the API. The "/convert" endpoint takes a request body
--- consisting of a JSON-encoded Params structure and responds to
--- Get requests with either plain text or JSON, depending on the
--- Accept header.
-type API =
- ReqBody '[JSON] Params :> Post '[PlainText, JSON] Text
- :<|>
- ReqBody '[JSON] Params :> Post '[OctetStream] BS.ByteString
- :<|>
- "batch" :> ReqBody '[JSON] [Params] :> Post '[JSON] [Text]
- :<|>
- "babelmark" :> QueryParam' '[Required] "text" Text :> QueryParam "from" Text :> QueryParam "to" Text :> QueryFlag "standalone" :> Get '[JSON] Value
- :<|>
- "version" :> Get '[PlainText, JSON] Text
-
-app :: Application
-app = serve api server
-
-api :: Proxy API
-api = Proxy
-
-server :: Server API
-server = convert
- :<|> convertBytes
- :<|> mapM convert
- :<|> babelmark -- for babelmark which expects {"html": "", "version": ""}
- :<|> pure pandocVersion
- where
- babelmark text' from' to' standalone' = do
- res <- convert def{ text = text',
- from = from', to = to',
- standalone = Just standalone' }
- return $ toJSON $ object [ "html" .= res, "version" .= pandocVersion ]
-
- -- We use runPure for the pandoc conversions, which ensures that
- -- they will do no IO. This makes the server safe to use. However,
- -- it will mean that features requiring IO, like RST includes, will not work.
- -- Changing this to
- -- handleErr =<< liftIO (runIO (convert' params))
- -- will allow the IO operations.
- convert params = handleErr $
- runPure (convert' id (encodeBase64 . BL.toStrict) params)
-
- convertBytes params = handleErr $
- runPure (convert' UTF8.fromText BL.toStrict params)
-
- convert' :: PandocMonad m
- => (Text -> a) -> (BL.ByteString -> a) -> Params -> m a
- convert' textHandler bsHandler params = do
- let readerFormat = fromMaybe "markdown" $ from params
- let writerFormat = fromMaybe "html" $ to params
- (readerSpec, readerExts) <- getReader readerFormat
- (writerSpec, writerExts) <- getWriter writerFormat
- let binaryOutput = case writerSpec of
- ByteStringWriter{} -> True
- _ -> False
- let isStandalone = fromMaybe binaryOutput (standalone params)
- let toformat = T.toLower $ T.takeWhile isAlphaNum $ writerFormat
- hlStyle <- traverse (lookupHighlightingStyle . T.unpack)
- $ highlightStyle params
- mbTemplate <- if isStandalone
- then case template params of
- Nothing -> Just <$>
- compileDefaultTemplate toformat
- Just t -> Just <$>
- compileCustomTemplate toformat t
- else return Nothing
- let readeropts = def{ readerExtensions = readerExts
- , readerStandalone = isStandalone
- , readerTabStop = fromMaybe 4 (tabStop params)
- , readerIndentedCodeClasses = fromMaybe []
- (indentedCodeClasses params)
- , readerAbbreviations =
- fromMaybe mempty (abbreviations params)
- , readerDefaultImageExtension =
- fromMaybe mempty (defaultImageExtension params)
- , readerTrackChanges =
- fromMaybe AcceptChanges (trackChanges params)
- , readerStripComments =
- fromMaybe False (stripComments params)
- }
- let writeropts =
- def{ writerExtensions = writerExts
- , writerTabStop = fromMaybe 4 (tabStop params)
- , writerWrapText = fromMaybe WrapAuto (wrapText params)
- , writerColumns = fromMaybe 72 (columns params)
- , writerTemplate = mbTemplate
- , writerSyntaxMap = defaultSyntaxMap
- , writerVariables = fromMaybe mempty (variables params)
- , writerTableOfContents = fromMaybe False (tableOfContents params)
- , writerIncremental = fromMaybe False (incremental params)
- , writerHTMLMathMethod =
- fromMaybe PlainMath (htmlMathMethod params)
- , writerNumberSections = fromMaybe False (numberSections params)
- , writerNumberOffset = fromMaybe [] (numberOffset params)
- , writerSectionDivs = fromMaybe False (sectionDivs params)
- , writerReferenceLinks = fromMaybe False (referenceLinks params)
- , writerDpi = fromMaybe 96 (dpi params)
- , writerEmailObfuscation =
- fromMaybe NoObfuscation (emailObfuscation params)
- , writerIdentifierPrefix =
- fromMaybe mempty (identifierPrefix params)
- , writerCiteMethod = fromMaybe Citeproc (citeMethod params)
- , writerHtmlQTags = fromMaybe False (htmlQTags params)
- , writerSlideLevel = slideLevel params
- , writerTopLevelDivision =
- fromMaybe TopLevelDefault (topLevelDivision params)
- , writerListings = fromMaybe False (listings params)
- , writerHighlightStyle = hlStyle
- , writerSetextHeaders = fromMaybe False (setextHeaders params)
- , writerEpubSubdirectory =
- fromMaybe "EPUB" (epubSubdirectory params)
- , writerEpubMetadata = epubMetadata params
- , writerEpubFonts = fromMaybe [] (epubFonts params)
- , writerEpubChapterLevel = fromMaybe 1 (epubChapterLevel params)
- , writerTOCDepth = fromMaybe 3 (tocDepth params)
- , writerReferenceDoc = referenceDoc params
- , writerReferenceLocation =
- fromMaybe EndOfDocument (referenceLocation params)
- , writerPreferAscii = fromMaybe False (preferAscii params)
- }
- let reader = case readerSpec of
- TextReader r -> r readeropts
- ByteStringReader r -> \t -> do
- let eitherbs = decodeBase64 $ UTF8.fromText t
- case eitherbs of
- Left errt -> throwError $ PandocSomeError errt
- Right bs -> r readeropts $ BL.fromStrict bs
- let writer = case writerSpec of
- TextWriter w -> fmap textHandler . w writeropts
- ByteStringWriter w -> fmap bsHandler . w writeropts
- reader (text params) >>=
- (if citeproc params == Just True
- then processCitations
- else return) >>=
- writer
-
- handleErr (Right t) = return t
- handleErr (Left err) = throwError $
- err500 { errBody = TLE.encodeUtf8 $ TL.fromStrict $ renderError err }
-
- compileCustomTemplate toformat t = do
- res <- runWithPartials $ compileTemplate ("custom." <> T.unpack toformat) t
- case res of
- Left e -> throwError $ PandocTemplateError (T.pack e)
- Right tpl -> return tpl
diff --git a/server/pandoc-server.md b/server/pandoc-server.md
deleted file mode 100644
index e22063fa8..000000000
--- a/server/pandoc-server.md
+++ /dev/null
@@ -1,325 +0,0 @@
----
-title: pandoc-server
-section: 1
-date: August 15, 2022
----
-
-# SYNOPSIS
-
-`pandoc-server` [*options*]
-
-# DESCRIPTION
-
-`pandoc-server` is a web server that can perform pandoc
-conversions. It can be used either as a running server
-or as a CGI program. To use `pandoc-server` as a CGI
-program, rename it (or symlink it) as `pandoc-server.cgi`.
-(Note: if you symlink it, you may need to adjust your
-webserver's configuration in order to allow it to follow
-symlinks for the CGI script.)
-
-All pandoc functions are run in the PandocPure monad, which
-ensures that they can do no I/O operations on the server.
-This should provide a high degree of security. It does,
-however, impose certain limitations:
-
-- PDFs cannot be produced.
-
-- Filters are not supported.
-
-- Resources cannot be fetched via HTTP.
-
-- Any images, include files, or other resources needed for
- the document conversion must be explicitly included in
- the request, via the `files` field (see below under API).
-
-# OPTIONS
-
-`--port NUM`
-: HTTP port on which to run the server. Default: 3030.
-
-`--timeout SECONDS`
-: Timeout in seconds, after which a conversion is killed. Default: 2.
-
-`--help`
-: Print this help.
-
-`--version`
-: Print version.
-
-# API
-
-## Root endpoint
-
-The root (`/`) endpoint accepts only POST requests.
-It returns a converted document in one of the following
-formats, depending on Accept headers:
-
-- `text/plain`
-- `application/json`
-- `application/octet-stream`
-
-If the result is a binary format (e.g., `epub` or `docx`)
-and the content is returned as plain text or JSON, the
-binary will be base64 encoded.
-
-The body of the POST request should be a JSON object,
-with the following fields. Only the `text` field is
-required; all of the others can be omitted for default
-values. When there are several string alternatives,
-the first one given is the default.
-
-`text` (string)
-
-: The document to be converted. Note:
- if the `from` format is binary (e.g., `epub` or `docx`), then
- `text` should be a base64 encoding of the document.
-
-`from` (string, default `"markdown"`)
-
-: The input format, possibly with extensions, just as it is
- specified on the pandoc command line.
-
-`to` (string, default `"html"`)
-
-: The output format, possibly with extensions, just as it is
- specified on the pandoc command line.
-
-`wrapText` (`"auto"|"preserve"|"none"`)
-
-: Text wrapping option: either `"auto"` (automatic
- hard-wrapping to fit within a column width), `"preserve"`
- (insert newlines where they are present in the source),
- or `"none"` (don't insert any unnecessary newlines at all).
-
-`columns` (integer, default 72)
-
-: Column width (affects text wrapping and calculation of
- table column widths in plain text formats)
-
-`standalone` (boolean, default false)
-
-: If true, causes a standalone document to be produced, using
- the default template or the custom template specified using
- `template`. If false, a fragment will be produced.
-
-`template` (string)
-
-: String contents of a document template (see Templates in
- `pandoc(1)` for the format).
-
-`tabStop` (integer, default 4)
-
-: Tab stop (spaces per tab).
-
-`indentedCodeClasses` (array of strings)
-
-: List of classes to be applied to indented Markdown code blocks.
-
-`abbreviations` (array of strings)
-
-: List of strings to be regarded as abbreviations when
- parsing Markdown. See `--abbreviations` in `pandoc(1)` for
- details.
-
-`defaultImageExtension` (string)
-
-: Extension to be applied to image sources that lack extensions
- (e.g. `".jpg"`).
-
-`trackChanges` (`"accept"|"reject"|"all"`)
-
-: Specifies what to do with insertions, deletions, and
- comments produced by the MS Word "Track Changes" feature. Only
- affects docx input.
-
-`stripComments` (boolean, default false)
-
-: Causes HTML comments to be stripped in Markdown or Textile
- source, instead of being passed through to the output format.
-
-`citeproc` (boolean, default false)
-
-: Causes citations to be processed using citeproc. See
- Citations in `pandoc(1)` for details.
-
-`citeMethod` (`"citeproc"|"natbib"|"biblatex"`)
-
-: Determines how citations are formatted in LaTeX output.
-
-`tableOfContents` (boolean, default false)
-
-: Include a table of contents (in supported formats).
-
-`tocDepth` (integer, default 3)
-
-: Depth of sections to include in the table of contents.
-
-`numberSections` (boolean, default false)
-
-: Automatically number sections (in supported formats).
-
-`numberOffset` (array of integers)
-
-: Offsets to be added to each component of the section number.
- For example, `[1]` will cause the first section to be
- numbered "2" and the first subsection "2.1"; `[0,1]` will
- cause the first section to be numbered "1" and the first
- subsection "1.2."
-
-`identifierPrefix` (string)
-
-: Prefix to be added to all automatically-generated identifiers.
-
-`sectionDivs` (boolean, default false)
-
-: Arrange the document into a hierarchy of nested sections
- based on the headings.
-
-`htmlQTags` (boolean, default false)
-
-: Use `<q>` elements in HTML instead of literal quotation marks.
-
-`listings` (boolean, default false)
-
-: Use the `listings` package to format code in LaTeX output.
-
-`referenceLinks` (boolean, default false)
-
-: Create reference links rather than inline links in Markdown output.
-
-`setextHeaders` (boolean, default false)
-
-: Use Setext (underlined) headings instead of ATX (`#`-prefixed)
- in Markdown output.
-
-`preferAscii` (boolean, default false)
-
-: Use entities and escapes when possible to avoid non-ASCII
- characters in the output.
-
-`referenceLocation` (`"document"|"section"|"block"`)
-
-: Determines whether link references and footnotes are placed
- at the end of the document, the end of the section, or the
- end of the block (e.g. paragraph), in
- certain formats. (See `pandoc(1)` under `--reference-location`.)
-
-
-`topLevelDivision` (`"default"|"part"|"chapter"|"section"`)
-
-: Determines how top-level headings are interpreted in
- LaTeX, ConTeXt, DocBook, and TEI. The `"default"` value
- tries to choose the best interpretation based on heuristics.
-
-`emailObfuscation` (`"none"|"references"|"javascript"`)
-
-: Determines how email addresses are obfuscated in HTML.
-
-`htmlMathMethod` (`"plain"|"webtex"|"gladtex"|"mathml"|"mathjax"|"katex"`)
-
-: Determines how math is represented in HTML.
-
-`variables` (JSON mapping)
-
-: Variables to be interpolated in the template. (See Templates
- in `pandoc(1)`.)
-
-`dpi` (integer, default 96)
-
-: Dots-per-inch to use for conversions between pixels and
- other measurements (for image sizes).
-
-`incremental` (boolean, default false)
-
-: If true, lists appear incrementally by default in slide shows.
-
-`slideLevel` (integer)
-
-: Heading level that deterimes slide divisions in slide shows.
- The default is to pick the highest heading level under which
- there is body text.
-
-`highlightStyle` (string, default `"pygments"`)
-
-: Specify the style to use for syntax highlighting of code.
- Standard styles are `"pygments"` (the default), `"kate"`,
- `"monochrome"`, `"breezeDark"`, `"espresso"`, `"zenburn"`,
- `"haddock"`, and `"tango"`. Alternatively, the path of
- a `.theme` with a KDE syntax theme may be used (in this
- case, the relevant file contents must also be included
- in `files`, see below).
-
-`epubMetadata` (string)
-
-: Dublin core XML elements to be used for EPUB metadata.
-
-`epubChapterLevel` (integer, default 1)
-
-: Heading level at which chapter splitting occurs in EPUBs.
-
-`epubSubdirectory` (string, default "EPUB")
-
-: Name of content subdirectory in the EPUB container.
-
-`epubFonts` (array of file paths)
-
-: Fonts to include in the EPUB. The fonts themselves must be
- included in `files` (see below).
-
-`referenceDoc` (file path)
-
-: Reference doc to use in creating `docx` or `odt` or `pptx`.
- See `pandoc(1)` under `--reference-doc` for details.
-
-`files` (JSON mapping of file paths to base64-encoded strings)
-
-: Any files needed for the conversion, including images
- referred to in the document source, should be included here.
- Binary data must be base64-encoded. Textual data may be
- left as it is, unless it is *also* valid base 64 data,
- in which case it will be interpreted that way.
-
-
-## `/batch` endpoint
-
-The `/batch` endpoint behaves like the root endpoint,
-except for these two points:
-
-- It accepts a JSON array, each element of which is a JSON
- object like the one expected by the root endpoint.
-- It returns a JSON array of results. (It will not return
- plain text or octet-stream, like the root endpoint.)
-
-This endpoint can be used to convert a sequence of small
-snippets in one request.
-
-## `/version` endpoint
-
-The `/version` endpoint accepts a GET request and returns
-the pandoc version as a plain or JSON-encoded string,
-depending on Accept headers.
-
-## `/babelmark` endpoint
-
-The `/babelmark` endpoint accepts a GET request with
-the following query parameters:
-
-- `text` (required string)
-- `from` (optional string, default is `"markdown"`)
-- `to` (optional string, default is `"html"`)
-- `standalone` (optional boolean, default is `false`)
-
-It returns a JSON object with fields `html` and `version`.
-This endpoint is designed to support the
-[Babelmark]()https://babelmark.github.io website.
-
-# AUTHORS
-
-Copyright 2022 John MacFarlane (jgm@berkeley.edu). Released
-under the [GPL], version 2 or greater. This software carries no
-warranty of any kind. (See COPYRIGHT for full copyright and
-warranty notices.)
-
-[GPL]: https://www.gnu.org/copyleft/gpl.html "GNU General Public License"
-