diff options
| author | John MacFarlane <jgm@berkeley.edu> | 2022-10-03 13:05:38 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-10-03 13:05:38 -0700 |
| commit | ad0532244adfbd0ca9596a27506ed6161e4bc225 (patch) | |
| tree | 70cdf0534234805fb650fa349745abea5814d65c /src/Text/Pandoc/Data.hs | |
| parent | 2d565eda9ec6df702bc0381ccb60fc3cc9699318 (diff) | |
Separate out T.P.Data, T.P.Translations from T.P.Class. (#8348)
This makes T.P.Class more self-contained, and suitable for extraction
into a separate package if desired.
[API changes]
- T.P.Data is now an exported module, providing `readDataFile`,
`readDefaultDataFile` (both formerly provided by T.P.Class),
and also `getDataFileNames` (formerly unexported in
T.P.App.CommandLineOptions).
- T.P.Translations is now an exported module (along with
T.P.Translations.Types), providing `readTranslations`,
`getTranslations`, `setTranslations`, `translateTerm`,
`lookupTerm`, `readTranslations`, `Term(..)`, and `Translations`.
- T.P.Class: `readDataFile`, `readDefaultDataFile`, `setTranslations`,
and `translateTerm` are no longer exported.
`checkUserDataDir` is now exported.
- Text.Pandoc now exports Text.Pandoc.Data and `setTranslations`
and `translateTerm`.
Diffstat (limited to 'src/Text/Pandoc/Data.hs')
| -rw-r--r-- | src/Text/Pandoc/Data.hs | 232 |
1 files changed, 212 insertions, 20 deletions
diff --git a/src/Text/Pandoc/Data.hs b/src/Text/Pandoc/Data.hs index fe543edfa..712b53f8e 100644 --- a/src/Text/Pandoc/Data.hs +++ b/src/Text/Pandoc/Data.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE CPP #-} {-# LANGUAGE TemplateHaskell #-} {- | Module : Text.Pandoc.Data @@ -8,25 +9,216 @@ Maintainer : John MacFarlane <jgm@berkeley@edu> Stability : alpha Portability : portable -Provide contents data files as Haskell values. +Access to pandoc's data files. -} -module Text.Pandoc.Data (dataFiles) where - +module Text.Pandoc.Data ( readDefaultDataFile + , readDataFile + , getDataFileNames + ) where +import Text.Pandoc.Class (PandocMonad(..), checkUserDataDir, getTimestamp, + getUserDataDir, getPOSIXTime) +import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds) +import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString as B -import Data.FileEmbed -import System.FilePath (splitDirectories) -import qualified System.FilePath.Posix as Posix - --- We ensure that the data files are stored using Posix --- path separators (/), even on Windows. -dataFiles :: [(FilePath, B.ByteString)] -dataFiles = map (\(fp, contents) -> - (Posix.joinPath (splitDirectories fp), contents)) dataFiles' - -dataFiles' :: [(FilePath, B.ByteString)] -dataFiles' = ("MANUAL.txt", $(embedFile "MANUAL.txt")) : - -- handle the hidden file separately, since embedDir doesn't - -- include it: - ("docx/_rels/.rels", $(embedFile "data/docx/_rels/.rels")) : - ("pptx/_rels/.rels", $(embedFile "data/pptx/_rels/.rels")) : - $(embedDir "data") +import Codec.Archive.Zip +import qualified Data.Text as T +import Control.Monad.Except (throwError) +import Text.Pandoc.Error (PandocError(..)) +import System.FilePath +#ifdef EMBED_DATA_FILES +import Text.Pandoc.Data.BakedIn (dataFiles) +import Text.Pandoc.Shared (makeCanonical) +#else +import Paths_pandoc (getDataDir) +import System.Directory (getDirectoryContents) +#endif + +-- | Read file from from the default data files. +readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString +readDefaultDataFile "reference.docx" = + B.concat . BL.toChunks . fromArchive <$> getDefaultReferenceDocx +readDefaultDataFile "reference.pptx" = + B.concat . BL.toChunks . fromArchive <$> getDefaultReferencePptx +readDefaultDataFile "reference.odt" = + B.concat . BL.toChunks . fromArchive <$> getDefaultReferenceODT +readDefaultDataFile fname = +#ifdef EMBED_DATA_FILES + case lookup (makeCanonical fname) dataFiles of + Nothing -> throwError $ PandocCouldNotFindDataFileError $ T.pack fname + Just contents -> return contents +#else + getDataFileName fname' >>= checkExistence >>= readFileStrict + where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname + +-- | Returns the input filename unchanged if the file exits, and throws +-- a `PandocCouldNotFindDataFileError` if it doesn't. +checkExistence :: PandocMonad m => FilePath -> m FilePath +checkExistence fn = do + exists <- fileExists fn + if exists + then return fn + else throwError $ PandocCouldNotFindDataFileError $ T.pack fn +#endif + +--- | Read file from user data directory or, +--- if not found there, from the default data files. +readDataFile :: PandocMonad m => FilePath -> m B.ByteString +readDataFile fname = do + datadir <- checkUserDataDir fname + case datadir of + Nothing -> readDefaultDataFile fname + Just userDir -> do + exists <- fileExists (userDir </> fname) + if exists + then readFileStrict (userDir </> fname) + else readDefaultDataFile fname + +-- | Retrieve default reference.docx. +getDefaultReferenceDocx :: PandocMonad m => m Archive +getDefaultReferenceDocx = do + let paths = ["[Content_Types].xml", + "_rels/.rels", + "docProps/app.xml", + "docProps/core.xml", + "docProps/custom.xml", + "word/document.xml", + "word/fontTable.xml", + "word/footnotes.xml", + "word/comments.xml", + "word/numbering.xml", + "word/settings.xml", + "word/webSettings.xml", + "word/styles.xml", + "word/_rels/document.xml.rels", + "word/_rels/footnotes.xml.rels", + "word/theme/theme1.xml"] + let toLazy = BL.fromChunks . (:[]) + let pathToEntry path = do + epochtime <- floor . utcTimeToPOSIXSeconds <$> getTimestamp + contents <- toLazy <$> readDataFile ("docx/" ++ path) + return $ toEntry path epochtime contents + datadir <- getUserDataDir + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- fileExists (d </> "reference.docx") + if exists + then return (Just (d </> "reference.docx")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> readFileLazy arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + +-- | Retrieve default reference.odt. +getDefaultReferenceODT :: PandocMonad m => m Archive +getDefaultReferenceODT = do + let paths = ["mimetype", + "manifest.rdf", + "styles.xml", + "content.xml", + "meta.xml", + "settings.xml", + "Configurations2/accelerator/current.xml", + "Thumbnails/thumbnail.png", + "META-INF/manifest.xml"] + let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime + contents <- (BL.fromChunks . (:[])) `fmap` + readDataFile ("odt/" ++ path) + return $ toEntry path epochtime contents + datadir <- getUserDataDir + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- fileExists (d </> "reference.odt") + if exists + then return (Just (d </> "reference.odt")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> readFileLazy arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + +-- | Retrieve default reference.pptx. +getDefaultReferencePptx :: PandocMonad m => m Archive +getDefaultReferencePptx = do + -- We're going to narrow this down substantially once we get it + -- working. + let paths = [ "[Content_Types].xml" + , "_rels/.rels" + , "docProps/app.xml" + , "docProps/core.xml" + , "ppt/_rels/presentation.xml.rels" + , "ppt/presProps.xml" + , "ppt/presentation.xml" + , "ppt/slideLayouts/_rels/slideLayout1.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout2.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout3.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout4.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout5.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout6.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout7.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout8.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout9.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout10.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout11.xml.rels" + , "ppt/slideLayouts/slideLayout1.xml" + , "ppt/slideLayouts/slideLayout10.xml" + , "ppt/slideLayouts/slideLayout11.xml" + , "ppt/slideLayouts/slideLayout2.xml" + , "ppt/slideLayouts/slideLayout3.xml" + , "ppt/slideLayouts/slideLayout4.xml" + , "ppt/slideLayouts/slideLayout5.xml" + , "ppt/slideLayouts/slideLayout6.xml" + , "ppt/slideLayouts/slideLayout7.xml" + , "ppt/slideLayouts/slideLayout8.xml" + , "ppt/slideLayouts/slideLayout9.xml" + , "ppt/slideMasters/_rels/slideMaster1.xml.rels" + , "ppt/slideMasters/slideMaster1.xml" + , "ppt/slides/_rels/slide1.xml.rels" + , "ppt/slides/slide1.xml" + , "ppt/slides/_rels/slide2.xml.rels" + , "ppt/slides/slide2.xml" + , "ppt/slides/_rels/slide3.xml.rels" + , "ppt/slides/slide3.xml" + , "ppt/slides/_rels/slide4.xml.rels" + , "ppt/slides/slide4.xml" + , "ppt/tableStyles.xml" + , "ppt/theme/theme1.xml" + , "ppt/viewProps.xml" + -- These relate to notes slides. + , "ppt/notesMasters/notesMaster1.xml" + , "ppt/notesMasters/_rels/notesMaster1.xml.rels" + , "ppt/notesSlides/notesSlide1.xml" + , "ppt/notesSlides/_rels/notesSlide1.xml.rels" + , "ppt/notesSlides/notesSlide2.xml" + , "ppt/notesSlides/_rels/notesSlide2.xml.rels" + , "ppt/theme/theme2.xml" + ] + let toLazy = BL.fromChunks . (:[]) + let pathToEntry path = do + epochtime <- floor . utcTimeToPOSIXSeconds <$> getCurrentTime + contents <- toLazy <$> readDataFile ("pptx/" ++ path) + return $ toEntry path epochtime contents + datadir <- getUserDataDir + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- fileExists (d </> "reference.pptx") + if exists + then return (Just (d </> "reference.pptx")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> readFileLazy arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + +getDataFileNames :: IO [FilePath] +getDataFileNames = do +#ifdef EMBED_DATA_FILES + let allDataFiles = map fst dataFiles +#else + allDataFiles <- filter (\x -> x /= "." && x /= "..") <$> + (getDataDir >>= getDirectoryContents) +#endif + return $ "reference.docx" : "reference.odt" : "reference.pptx" : allDataFiles |
