summaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2023-01-30 18:41:44 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2023-01-30 21:09:02 -0800
commit8fc4fc66a31a7a13db9072ebdf0c3f017d6ec51e (patch)
treebdef1df49034727a5cb1cf4821aa7e92a306c006 /src/Text
parent1193bb59641f0fa0ab033d0382916be4c83e07da (diff)
Add new `--chunk-template` option (closes #8581).
* Add `--chunk-template` CLI option, allowing more control over the chunk filenames in chunked HTML output. * Text.Pandoc.App: Add `optChunkTemplate` constructor to Opt [API change]. * Text.Pandoc.Options: add `writerChunkTemplate` contsructor to WriterOptions [API change]. * Text.Pandoc.Chunks: add Data, Typeable, Generic instances for PathTemplate.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/App/CommandLineOptions.hs7
-rw-r--r--src/Text/Pandoc/App/Opt.hs5
-rw-r--r--src/Text/Pandoc/App/OutputSettings.hs4
-rw-r--r--src/Text/Pandoc/Chunks.hs6
-rw-r--r--src/Text/Pandoc/Options.hs3
-rw-r--r--src/Text/Pandoc/Writers/ChunkedHTML.hs2
6 files changed, 25 insertions, 2 deletions
diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs
index 3e8b832d2..b0e54c92d 100644
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@@ -793,6 +793,13 @@ options =
"NUMBER")
"" -- "Header level at which to split documents in chunked HTML or EPUB"
+ , Option "" ["chunk-template"]
+ (ReqArg
+ (\arg opt ->
+ return opt{ optChunkTemplate = Just (T.pack arg) })
+ "PATHTEMPLATE")
+ "" -- "Template for file paths in chunkedhtml"
+
, Option "" ["epub-chapter-level"]
(ReqArg
(\arg opt -> do
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index 762305170..aa24690dd 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -127,6 +127,7 @@ data Opt = Opt
, optAbbreviations :: Maybe FilePath -- ^ Path to abbrevs file
, optReferenceDoc :: Maybe FilePath -- ^ Path of reference doc
, optSplitLevel :: Int -- ^ Header level at which to split documents in epub and chunkedhtml
+ , optChunkTemplate :: Maybe Text -- ^ Template to use for chunk filenames
, optEpubSubdirectory :: String -- ^ EPUB subdir in OCF container
, optEpubMetadata :: Maybe FilePath -- ^ EPUB metadata
, optEpubFonts :: [FilePath] -- ^ EPUB fonts to embed
@@ -209,6 +210,7 @@ instance FromJSON Opt where
<*> o .:? "reference-doc"
<*> ((o .:? "split-level") <|> (o .:? "epub-chapter-level"))
.!= optSplitLevel defaultOpts
+ <*> o .:? "chunk-template"
<*> o .:? "epub-subdirectory" .!= optEpubSubdirectory defaultOpts
<*> o .:? "epub-metadata"
<*> o .:? "epub-fonts" .!= optEpubFonts defaultOpts
@@ -563,6 +565,8 @@ doOpt (k,v) = do
parseJSON v >>= \x -> return (\o -> o{ optSplitLevel = x })
"split-level" ->
parseJSON v >>= \x -> return (\o -> o{ optSplitLevel = x })
+ "chunk-template" ->
+ parseJSON v >>= \x -> return (\o -> o{ optChunkTemplate = Just x })
"epub-cover-image" ->
parseJSON v >>= \x ->
return (\o -> o{ optEpubCoverImage = unpack <$> x })
@@ -740,6 +744,7 @@ defaultOpts = Opt
, optAbbreviations = Nothing
, optReferenceDoc = Nothing
, optSplitLevel = 1
+ , optChunkTemplate = Nothing
, optEpubSubdirectory = "EPUB"
, optEpubMetadata = Nothing
, optEpubFonts = []
diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs
index 17df09e9d..525055d72 100644
--- a/src/Text/Pandoc/App/OutputSettings.hs
+++ b/src/Text/Pandoc/App/OutputSettings.hs
@@ -35,6 +35,7 @@ import System.Directory (getCurrentDirectory)
import System.Exit (exitSuccess)
import System.FilePath
import System.IO (stdout)
+import Text.Pandoc.Chunks (PathTemplate(..))
import Text.Pandoc
import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths)
import Text.Pandoc.App.Opt (Opt (..))
@@ -249,6 +250,9 @@ optToOutputSettings scriptingEngine opts = do
, writerEpubFonts = optEpubFonts opts
, writerEpubTitlePage = optEpubTitlePage opts
, writerSplitLevel = optSplitLevel opts
+ , writerChunkTemplate = maybe (PathTemplate "%s-%i.html")
+ PathTemplate
+ (optChunkTemplate opts)
, writerTOCDepth = optTOCDepth opts
, writerReferenceDoc = optReferenceDoc opts
, writerSyntaxMap = syntaxMap
diff --git a/src/Text/Pandoc/Chunks.hs b/src/Text/Pandoc/Chunks.hs
index 9ba282999..2447deb72 100644
--- a/src/Text/Pandoc/Chunks.hs
+++ b/src/Text/Pandoc/Chunks.hs
@@ -4,6 +4,7 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}
{- |
Module : Text.Pandoc.Chunks
@@ -38,6 +39,8 @@ import Data.String (IsString)
import GHC.Generics (Generic)
import Text.HTML.TagSoup (Tag (TagOpen), fromAttrib, parseTags)
import Data.Tree (Tree(..))
+import Data.Data (Data)
+import Data.Typeable (Typeable)
-- | Split 'Pandoc' into 'Chunk's, e.g. for conversion into
-- a set of HTML pages or EPUB chapters.
@@ -236,6 +239,7 @@ resolvePathTemplate :: PathTemplate
-> FilePath
resolvePathTemplate (PathTemplate templ) chunknum headingText ident secnum =
T.unpack .
+ T.filter (\c -> c /= '/' && c /= '\\') .
T.replace "%n" (T.pack $ printf "%03d" chunknum) .
T.replace "%s" secnum .
T.replace "%h" headingText .
@@ -253,7 +257,7 @@ resolvePathTemplate (PathTemplate templ) chunknum headingText ident secnum =
-- @"section-1.2-introduction.html"@.
newtype PathTemplate =
PathTemplate { unPathTemplate :: Text }
- deriving (Show, IsString)
+ deriving (Show, IsString, Data, Typeable, Generic)
-- | A part of a document (typically a chapter or section, or
-- the part of a section before its subsections).
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index f671dd279..d609f591d 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -45,6 +45,7 @@ import GHC.Generics (Generic)
import Skylighting (SyntaxMap, defaultSyntaxMap)
import Text.DocTemplates (Context(..), Template)
import Text.Pandoc.Extensions
+import Text.Pandoc.Chunks (PathTemplate)
import Text.Pandoc.Highlighting (Style, pygments)
import Text.Pandoc.UTF8 (toStringLazy)
import Data.Aeson.TH (deriveJSON)
@@ -318,6 +319,7 @@ data WriterOptions = WriterOptions
, writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed
, writerEpubTitlePage :: Bool -- ^ Include title page in epub
, writerSplitLevel :: Int -- ^ Header level at which to split EPUB or chunked HTML into separate files
+ , writerChunkTemplate :: PathTemplate -- ^ Template for filenames in chunked HTML
, writerTOCDepth :: Int -- ^ Number of levels to include in TOC
, writerReferenceDoc :: Maybe FilePath -- ^ Path to reference document if specified
, writerReferenceLocation :: ReferenceLocation -- ^ Location of footnotes and references for writing markdown
@@ -355,6 +357,7 @@ instance Default WriterOptions where
, writerEpubFonts = []
, writerEpubTitlePage = True
, writerSplitLevel = 1
+ , writerChunkTemplate = "%s-%i.html"
, writerTOCDepth = 3
, writerReferenceDoc = Nothing
, writerReferenceLocation = EndOfDocument
diff --git a/src/Text/Pandoc/Writers/ChunkedHTML.hs b/src/Text/Pandoc/Writers/ChunkedHTML.hs
index b20f160ce..7287f3a9d 100644
--- a/src/Text/Pandoc/Writers/ChunkedHTML.hs
+++ b/src/Text/Pandoc/Writers/ChunkedHTML.hs
@@ -55,7 +55,7 @@ writeChunkedHTML opts (Pandoc meta blocks) = do
epochtime <- floor <$> getPOSIXTime
let toMediaEntry (fp, _mt, bs) = toEntry fp epochtime bs
mediaEntries <- map toMediaEntry . mediaItems <$> getMediaBag
- let chunkedDoc = splitIntoChunks "%s-%i.html"
+ let chunkedDoc = splitIntoChunks (writerChunkTemplate opts)
True
(Just 1)
(writerSplitLevel opts)