From 8fc4fc66a31a7a13db9072ebdf0c3f017d6ec51e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 30 Jan 2023 18:41:44 -0800 Subject: Add new `--chunk-template` option (closes #8581). * Add `--chunk-template` CLI option, allowing more control over the chunk filenames in chunked HTML output. * Text.Pandoc.App: Add `optChunkTemplate` constructor to Opt [API change]. * Text.Pandoc.Options: add `writerChunkTemplate` contsructor to WriterOptions [API change]. * Text.Pandoc.Chunks: add Data, Typeable, Generic instances for PathTemplate. --- src/Text/Pandoc/App/CommandLineOptions.hs | 7 +++++++ src/Text/Pandoc/App/Opt.hs | 5 +++++ src/Text/Pandoc/App/OutputSettings.hs | 4 ++++ src/Text/Pandoc/Chunks.hs | 6 +++++- src/Text/Pandoc/Options.hs | 3 +++ src/Text/Pandoc/Writers/ChunkedHTML.hs | 2 +- 6 files changed, 25 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs index 3e8b832d2..b0e54c92d 100644 --- a/src/Text/Pandoc/App/CommandLineOptions.hs +++ b/src/Text/Pandoc/App/CommandLineOptions.hs @@ -793,6 +793,13 @@ options = "NUMBER") "" -- "Header level at which to split documents in chunked HTML or EPUB" + , Option "" ["chunk-template"] + (ReqArg + (\arg opt -> + return opt{ optChunkTemplate = Just (T.pack arg) }) + "PATHTEMPLATE") + "" -- "Template for file paths in chunkedhtml" + , Option "" ["epub-chapter-level"] (ReqArg (\arg opt -> do diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs index 762305170..aa24690dd 100644 --- a/src/Text/Pandoc/App/Opt.hs +++ b/src/Text/Pandoc/App/Opt.hs @@ -127,6 +127,7 @@ data Opt = Opt , optAbbreviations :: Maybe FilePath -- ^ Path to abbrevs file , optReferenceDoc :: Maybe FilePath -- ^ Path of reference doc , optSplitLevel :: Int -- ^ Header level at which to split documents in epub and chunkedhtml + , optChunkTemplate :: Maybe Text -- ^ Template to use for chunk filenames , optEpubSubdirectory :: String -- ^ EPUB subdir in OCF container , optEpubMetadata :: Maybe FilePath -- ^ EPUB metadata , optEpubFonts :: [FilePath] -- ^ EPUB fonts to embed @@ -209,6 +210,7 @@ instance FromJSON Opt where <*> o .:? "reference-doc" <*> ((o .:? "split-level") <|> (o .:? "epub-chapter-level")) .!= optSplitLevel defaultOpts + <*> o .:? "chunk-template" <*> o .:? "epub-subdirectory" .!= optEpubSubdirectory defaultOpts <*> o .:? "epub-metadata" <*> o .:? "epub-fonts" .!= optEpubFonts defaultOpts @@ -563,6 +565,8 @@ doOpt (k,v) = do parseJSON v >>= \x -> return (\o -> o{ optSplitLevel = x }) "split-level" -> parseJSON v >>= \x -> return (\o -> o{ optSplitLevel = x }) + "chunk-template" -> + parseJSON v >>= \x -> return (\o -> o{ optChunkTemplate = Just x }) "epub-cover-image" -> parseJSON v >>= \x -> return (\o -> o{ optEpubCoverImage = unpack <$> x }) @@ -740,6 +744,7 @@ defaultOpts = Opt , optAbbreviations = Nothing , optReferenceDoc = Nothing , optSplitLevel = 1 + , optChunkTemplate = Nothing , optEpubSubdirectory = "EPUB" , optEpubMetadata = Nothing , optEpubFonts = [] diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs index 17df09e9d..525055d72 100644 --- a/src/Text/Pandoc/App/OutputSettings.hs +++ b/src/Text/Pandoc/App/OutputSettings.hs @@ -35,6 +35,7 @@ import System.Directory (getCurrentDirectory) import System.Exit (exitSuccess) import System.FilePath import System.IO (stdout) +import Text.Pandoc.Chunks (PathTemplate(..)) import Text.Pandoc import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths) import Text.Pandoc.App.Opt (Opt (..)) @@ -249,6 +250,9 @@ optToOutputSettings scriptingEngine opts = do , writerEpubFonts = optEpubFonts opts , writerEpubTitlePage = optEpubTitlePage opts , writerSplitLevel = optSplitLevel opts + , writerChunkTemplate = maybe (PathTemplate "%s-%i.html") + PathTemplate + (optChunkTemplate opts) , writerTOCDepth = optTOCDepth opts , writerReferenceDoc = optReferenceDoc opts , writerSyntaxMap = syntaxMap diff --git a/src/Text/Pandoc/Chunks.hs b/src/Text/Pandoc/Chunks.hs index 9ba282999..2447deb72 100644 --- a/src/Text/Pandoc/Chunks.hs +++ b/src/Text/Pandoc/Chunks.hs @@ -4,6 +4,7 @@ {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveGeneric #-} {- | Module : Text.Pandoc.Chunks @@ -38,6 +39,8 @@ import Data.String (IsString) import GHC.Generics (Generic) import Text.HTML.TagSoup (Tag (TagOpen), fromAttrib, parseTags) import Data.Tree (Tree(..)) +import Data.Data (Data) +import Data.Typeable (Typeable) -- | Split 'Pandoc' into 'Chunk's, e.g. for conversion into -- a set of HTML pages or EPUB chapters. @@ -236,6 +239,7 @@ resolvePathTemplate :: PathTemplate -> FilePath resolvePathTemplate (PathTemplate templ) chunknum headingText ident secnum = T.unpack . + T.filter (\c -> c /= '/' && c /= '\\') . T.replace "%n" (T.pack $ printf "%03d" chunknum) . T.replace "%s" secnum . T.replace "%h" headingText . @@ -253,7 +257,7 @@ resolvePathTemplate (PathTemplate templ) chunknum headingText ident secnum = -- @"section-1.2-introduction.html"@. newtype PathTemplate = PathTemplate { unPathTemplate :: Text } - deriving (Show, IsString) + deriving (Show, IsString, Data, Typeable, Generic) -- | A part of a document (typically a chapter or section, or -- the part of a section before its subsections). diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index f671dd279..d609f591d 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -45,6 +45,7 @@ import GHC.Generics (Generic) import Skylighting (SyntaxMap, defaultSyntaxMap) import Text.DocTemplates (Context(..), Template) import Text.Pandoc.Extensions +import Text.Pandoc.Chunks (PathTemplate) import Text.Pandoc.Highlighting (Style, pygments) import Text.Pandoc.UTF8 (toStringLazy) import Data.Aeson.TH (deriveJSON) @@ -318,6 +319,7 @@ data WriterOptions = WriterOptions , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed , writerEpubTitlePage :: Bool -- ^ Include title page in epub , writerSplitLevel :: Int -- ^ Header level at which to split EPUB or chunked HTML into separate files + , writerChunkTemplate :: PathTemplate -- ^ Template for filenames in chunked HTML , writerTOCDepth :: Int -- ^ Number of levels to include in TOC , writerReferenceDoc :: Maybe FilePath -- ^ Path to reference document if specified , writerReferenceLocation :: ReferenceLocation -- ^ Location of footnotes and references for writing markdown @@ -355,6 +357,7 @@ instance Default WriterOptions where , writerEpubFonts = [] , writerEpubTitlePage = True , writerSplitLevel = 1 + , writerChunkTemplate = "%s-%i.html" , writerTOCDepth = 3 , writerReferenceDoc = Nothing , writerReferenceLocation = EndOfDocument diff --git a/src/Text/Pandoc/Writers/ChunkedHTML.hs b/src/Text/Pandoc/Writers/ChunkedHTML.hs index b20f160ce..7287f3a9d 100644 --- a/src/Text/Pandoc/Writers/ChunkedHTML.hs +++ b/src/Text/Pandoc/Writers/ChunkedHTML.hs @@ -55,7 +55,7 @@ writeChunkedHTML opts (Pandoc meta blocks) = do epochtime <- floor <$> getPOSIXTime let toMediaEntry (fp, _mt, bs) = toEntry fp epochtime bs mediaEntries <- map toMediaEntry . mediaItems <$> getMediaBag - let chunkedDoc = splitIntoChunks "%s-%i.html" + let chunkedDoc = splitIntoChunks (writerChunkTemplate opts) True (Just 1) (writerSplitLevel opts) -- cgit v1.2.3