summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/MediaBag.hs
blob: 18a40a6dc10e681f4ca9893cb17dfaef18b2d8d3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
{-# LANGUAGE CPP                        #-}
{-# LANGUAGE OverloadedStrings          #-}
{-# LANGUAGE DeriveDataTypeable         #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{- |
   Module      : Text.Pandoc.MediaBag
   Copyright   : Copyright (C) 2014-2015, 2017-2023 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Definition of a MediaBag object to hold binary resources, and an
interface for interacting with it.
-}
module Text.Pandoc.MediaBag (
                     MediaItem(..),
                     MediaBag,
                     deleteMedia,
                     lookupMedia,
                     insertMedia,
                     mediaDirectory,
                     mediaItems
                     ) where
import qualified Data.ByteString.Lazy as BL
import Data.Data (Data)
import qualified Data.Map as M
import Data.Maybe (fromMaybe, isNothing)
import Data.Typeable (Typeable)
import Network.URI (unEscapeString)
import System.FilePath
import qualified System.FilePath.Posix as Posix
import qualified System.FilePath.Windows as Windows
import Text.Pandoc.MIME (MimeType, getMimeTypeDef, extensionFromMimeType)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Digest.Pure.SHA (sha1, showDigest)
import Network.URI (URI (..), parseURI, isURI)
import Data.List (isInfixOf)

data MediaItem =
  MediaItem
  { mediaMimeType :: MimeType
  , mediaPath :: FilePath
  , mediaContents :: BL.ByteString
  } deriving (Eq, Ord, Show, Data, Typeable)

-- | A container for a collection of binary resources, with names and
-- mime types.  Note that a 'MediaBag' is a Monoid, so 'mempty'
-- can be used for an empty 'MediaBag', and '<>' can be used to append
-- two 'MediaBag's.
newtype MediaBag = MediaBag (M.Map Text MediaItem)
        deriving (Semigroup, Monoid, Data, Typeable)

instance Show MediaBag where
  show bag = "MediaBag " ++ show (mediaDirectory bag)

-- | We represent paths with /, in normalized form.  Percent-encoding
-- is not resolved.
canonicalize :: FilePath -> Text
canonicalize fp
  | isURI fp = T.pack fp
  | otherwise = T.replace "\\" "/" . T.pack . normalise $ fp

-- | Delete a media item from a 'MediaBag', or do nothing if no item corresponds
-- to the given path.
deleteMedia :: FilePath       -- ^ relative path and canonical name of resource
            -> MediaBag
            -> MediaBag
deleteMedia fp (MediaBag mediamap) =
  MediaBag $ M.delete (canonicalize fp) mediamap

-- | Insert a media item into a 'MediaBag', replacing any existing
-- value with the same name.
insertMedia :: FilePath       -- ^ relative path and canonical name of resource
            -> Maybe MimeType -- ^ mime type (Nothing = determine from extension)
            -> BL.ByteString  -- ^ contents of resource
            -> MediaBag
            -> MediaBag
insertMedia fp mbMime contents (MediaBag mediamap) =
  MediaBag (M.insert fp' mediaItem mediamap)
  where mediaItem = MediaItem{ mediaPath = newpath
                             , mediaContents = contents
                             , mediaMimeType = mt }
        fp' = canonicalize fp
        fp'' = unEscapeString $ T.unpack fp'
        uri = parseURI fp
        newpath = if Posix.isRelative fp''
                       && Windows.isRelative fp''
                       && isNothing uri
                       && not (".." `isInfixOf` fp'')
                       && '%' `notElem` fp''
                     then fp''
                     else showDigest (sha1 contents) <> ext
        fallback = case takeExtension fp'' of
                        ".gz" -> getMimeTypeDef $ dropExtension fp''
                        _     -> getMimeTypeDef fp''
        mt = fromMaybe fallback mbMime
        path = maybe fp'' (unEscapeString . uriPath) uri
        ext = case takeExtension path of
                '.':e | '%' `notElem` e -> '.':e
                _ -> maybe "" (\x -> '.':T.unpack x) $ extensionFromMimeType mt

-- | Lookup a media item in a 'MediaBag', returning mime type and contents.
lookupMedia :: FilePath
            -> MediaBag
            -> Maybe MediaItem
lookupMedia fp (MediaBag mediamap) = M.lookup (canonicalize fp) mediamap

-- | Get a list of the file paths stored in a 'MediaBag', with
-- their corresponding mime types and the lengths in bytes of the contents.
mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)]
mediaDirectory mediabag =
  map (\(fp, mt, bs) -> (fp, mt, fromIntegral (BL.length bs)))
    (mediaItems mediabag)

mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)]
mediaItems (MediaBag mediamap) =
  map (\item -> (mediaPath item, mediaMimeType item, mediaContents item))
      (M.elems mediamap)