diff options
| author | John MacFarlane <jgm@berkeley.edu> | 2023-01-30 18:41:44 -0800 |
|---|---|---|
| committer | John MacFarlane <jgm@berkeley.edu> | 2023-01-30 21:09:02 -0800 |
| commit | 8fc4fc66a31a7a13db9072ebdf0c3f017d6ec51e (patch) | |
| tree | bdef1df49034727a5cb1cf4821aa7e92a306c006 | |
| parent | 1193bb59641f0fa0ab033d0382916be4c83e07da (diff) | |
Add new `--chunk-template` option (closes #8581).
* Add `--chunk-template` CLI option, allowing more control over the
chunk filenames in chunked HTML output.
* Text.Pandoc.App: Add `optChunkTemplate` constructor to Opt [API change].
* Text.Pandoc.Options: add `writerChunkTemplate` contsructor to
WriterOptions [API change].
* Text.Pandoc.Chunks: add Data, Typeable, Generic instances for
PathTemplate.
| -rw-r--r-- | MANUAL.txt | 11 | ||||
| -rw-r--r-- | src/Text/Pandoc/App/CommandLineOptions.hs | 7 | ||||
| -rw-r--r-- | src/Text/Pandoc/App/Opt.hs | 5 | ||||
| -rw-r--r-- | src/Text/Pandoc/App/OutputSettings.hs | 4 | ||||
| -rw-r--r-- | src/Text/Pandoc/Chunks.hs | 6 | ||||
| -rw-r--r-- | src/Text/Pandoc/Options.hs | 3 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/ChunkedHTML.hs | 2 |
7 files changed, 36 insertions, 2 deletions
diff --git a/MANUAL.txt b/MANUAL.txt index 751dc2c5b..0d6ce597b 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -1261,6 +1261,17 @@ header when requesting a document from a URL: level of 2 or 3. For chunked HTML, this option determines how much content goes in each "chunk." +`--chunk-template=`*PATHTEMPLATE* + +: Specify a template for the filenames in a `chunkedhtml` document. + In the template, `%n` will be replaced by the chunk number (padded + with leading 0s to 3 digits), `%s` with the section number of the chunk, + `%h` with the heading text (with formatting removed), `%i` with + the section identifier. For example, `%section-%s-%i.html` might + be resolved to `section-1.1-introduction.html`. The characters + `/` and `\` are not allowed in chunk templates and will be + ignored. The default is `%s-%i.html`. + `--epub-chapter-level=`*NUMBER* : *Deprecated synonym for `--split-level`.* diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs index 3e8b832d2..b0e54c92d 100644 --- a/src/Text/Pandoc/App/CommandLineOptions.hs +++ b/src/Text/Pandoc/App/CommandLineOptions.hs @@ -793,6 +793,13 @@ options = "NUMBER") "" -- "Header level at which to split documents in chunked HTML or EPUB" + , Option "" ["chunk-template"] + (ReqArg + (\arg opt -> + return opt{ optChunkTemplate = Just (T.pack arg) }) + "PATHTEMPLATE") + "" -- "Template for file paths in chunkedhtml" + , Option "" ["epub-chapter-level"] (ReqArg (\arg opt -> do diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs index 762305170..aa24690dd 100644 --- a/src/Text/Pandoc/App/Opt.hs +++ b/src/Text/Pandoc/App/Opt.hs @@ -127,6 +127,7 @@ data Opt = Opt , optAbbreviations :: Maybe FilePath -- ^ Path to abbrevs file , optReferenceDoc :: Maybe FilePath -- ^ Path of reference doc , optSplitLevel :: Int -- ^ Header level at which to split documents in epub and chunkedhtml + , optChunkTemplate :: Maybe Text -- ^ Template to use for chunk filenames , optEpubSubdirectory :: String -- ^ EPUB subdir in OCF container , optEpubMetadata :: Maybe FilePath -- ^ EPUB metadata , optEpubFonts :: [FilePath] -- ^ EPUB fonts to embed @@ -209,6 +210,7 @@ instance FromJSON Opt where <*> o .:? "reference-doc" <*> ((o .:? "split-level") <|> (o .:? "epub-chapter-level")) .!= optSplitLevel defaultOpts + <*> o .:? "chunk-template" <*> o .:? "epub-subdirectory" .!= optEpubSubdirectory defaultOpts <*> o .:? "epub-metadata" <*> o .:? "epub-fonts" .!= optEpubFonts defaultOpts @@ -563,6 +565,8 @@ doOpt (k,v) = do parseJSON v >>= \x -> return (\o -> o{ optSplitLevel = x }) "split-level" -> parseJSON v >>= \x -> return (\o -> o{ optSplitLevel = x }) + "chunk-template" -> + parseJSON v >>= \x -> return (\o -> o{ optChunkTemplate = Just x }) "epub-cover-image" -> parseJSON v >>= \x -> return (\o -> o{ optEpubCoverImage = unpack <$> x }) @@ -740,6 +744,7 @@ defaultOpts = Opt , optAbbreviations = Nothing , optReferenceDoc = Nothing , optSplitLevel = 1 + , optChunkTemplate = Nothing , optEpubSubdirectory = "EPUB" , optEpubMetadata = Nothing , optEpubFonts = [] diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs index 17df09e9d..525055d72 100644 --- a/src/Text/Pandoc/App/OutputSettings.hs +++ b/src/Text/Pandoc/App/OutputSettings.hs @@ -35,6 +35,7 @@ import System.Directory (getCurrentDirectory) import System.Exit (exitSuccess) import System.FilePath import System.IO (stdout) +import Text.Pandoc.Chunks (PathTemplate(..)) import Text.Pandoc import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths) import Text.Pandoc.App.Opt (Opt (..)) @@ -249,6 +250,9 @@ optToOutputSettings scriptingEngine opts = do , writerEpubFonts = optEpubFonts opts , writerEpubTitlePage = optEpubTitlePage opts , writerSplitLevel = optSplitLevel opts + , writerChunkTemplate = maybe (PathTemplate "%s-%i.html") + PathTemplate + (optChunkTemplate opts) , writerTOCDepth = optTOCDepth opts , writerReferenceDoc = optReferenceDoc opts , writerSyntaxMap = syntaxMap diff --git a/src/Text/Pandoc/Chunks.hs b/src/Text/Pandoc/Chunks.hs index 9ba282999..2447deb72 100644 --- a/src/Text/Pandoc/Chunks.hs +++ b/src/Text/Pandoc/Chunks.hs @@ -4,6 +4,7 @@ {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveGeneric #-} {- | Module : Text.Pandoc.Chunks @@ -38,6 +39,8 @@ import Data.String (IsString) import GHC.Generics (Generic) import Text.HTML.TagSoup (Tag (TagOpen), fromAttrib, parseTags) import Data.Tree (Tree(..)) +import Data.Data (Data) +import Data.Typeable (Typeable) -- | Split 'Pandoc' into 'Chunk's, e.g. for conversion into -- a set of HTML pages or EPUB chapters. @@ -236,6 +239,7 @@ resolvePathTemplate :: PathTemplate -> FilePath resolvePathTemplate (PathTemplate templ) chunknum headingText ident secnum = T.unpack . + T.filter (\c -> c /= '/' && c /= '\\') . T.replace "%n" (T.pack $ printf "%03d" chunknum) . T.replace "%s" secnum . T.replace "%h" headingText . @@ -253,7 +257,7 @@ resolvePathTemplate (PathTemplate templ) chunknum headingText ident secnum = -- @"section-1.2-introduction.html"@. newtype PathTemplate = PathTemplate { unPathTemplate :: Text } - deriving (Show, IsString) + deriving (Show, IsString, Data, Typeable, Generic) -- | A part of a document (typically a chapter or section, or -- the part of a section before its subsections). diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index f671dd279..d609f591d 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -45,6 +45,7 @@ import GHC.Generics (Generic) import Skylighting (SyntaxMap, defaultSyntaxMap) import Text.DocTemplates (Context(..), Template) import Text.Pandoc.Extensions +import Text.Pandoc.Chunks (PathTemplate) import Text.Pandoc.Highlighting (Style, pygments) import Text.Pandoc.UTF8 (toStringLazy) import Data.Aeson.TH (deriveJSON) @@ -318,6 +319,7 @@ data WriterOptions = WriterOptions , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed , writerEpubTitlePage :: Bool -- ^ Include title page in epub , writerSplitLevel :: Int -- ^ Header level at which to split EPUB or chunked HTML into separate files + , writerChunkTemplate :: PathTemplate -- ^ Template for filenames in chunked HTML , writerTOCDepth :: Int -- ^ Number of levels to include in TOC , writerReferenceDoc :: Maybe FilePath -- ^ Path to reference document if specified , writerReferenceLocation :: ReferenceLocation -- ^ Location of footnotes and references for writing markdown @@ -355,6 +357,7 @@ instance Default WriterOptions where , writerEpubFonts = [] , writerEpubTitlePage = True , writerSplitLevel = 1 + , writerChunkTemplate = "%s-%i.html" , writerTOCDepth = 3 , writerReferenceDoc = Nothing , writerReferenceLocation = EndOfDocument diff --git a/src/Text/Pandoc/Writers/ChunkedHTML.hs b/src/Text/Pandoc/Writers/ChunkedHTML.hs index b20f160ce..7287f3a9d 100644 --- a/src/Text/Pandoc/Writers/ChunkedHTML.hs +++ b/src/Text/Pandoc/Writers/ChunkedHTML.hs @@ -55,7 +55,7 @@ writeChunkedHTML opts (Pandoc meta blocks) = do epochtime <- floor <$> getPOSIXTime let toMediaEntry (fp, _mt, bs) = toEntry fp epochtime bs mediaEntries <- map toMediaEntry . mediaItems <$> getMediaBag - let chunkedDoc = splitIntoChunks "%s-%i.html" + let chunkedDoc = splitIntoChunks (writerChunkTemplate opts) True (Just 1) (writerSplitLevel opts) |
