From 368f3fd71fb570278f3a3af20547d63525444cfe Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 24 Oct 2022 22:58:03 -0700 Subject: Rename T.P.Readers.Odt -> T.P.Readers.ODT. For consistency with Writers.ODT. Similarly, rename `readOdt` -> `readODT`. [API change] --- pandoc.cabal | 26 +- src/Text/Pandoc/Readers.hs | 6 +- src/Text/Pandoc/Readers/ODT.hs | 97 +++ src/Text/Pandoc/Readers/ODT/Arrows/State.hs | 139 +++ src/Text/Pandoc/Readers/ODT/Arrows/Utils.hs | 208 +++++ src/Text/Pandoc/Readers/ODT/Base.hs | 22 + src/Text/Pandoc/Readers/ODT/ContentReader.hs | 960 +++++++++++++++++++++ src/Text/Pandoc/Readers/ODT/Generic/Fallible.hs | 99 +++ src/Text/Pandoc/Readers/ODT/Generic/Namespaces.hs | 45 + src/Text/Pandoc/Readers/ODT/Generic/SetMap.hs | 30 + src/Text/Pandoc/Readers/ODT/Generic/Utils.hs | 116 +++ .../Pandoc/Readers/ODT/Generic/XMLConverter.hs | 775 +++++++++++++++++ src/Text/Pandoc/Readers/ODT/Namespaces.hs | 93 ++ src/Text/Pandoc/Readers/ODT/StyleReader.hs | 640 ++++++++++++++ src/Text/Pandoc/Readers/Odt.hs | 97 --- src/Text/Pandoc/Readers/Odt/Arrows/State.hs | 139 --- src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs | 208 ----- src/Text/Pandoc/Readers/Odt/Base.hs | 22 - src/Text/Pandoc/Readers/Odt/ContentReader.hs | 960 --------------------- src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs | 99 --- src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs | 45 - src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs | 30 - src/Text/Pandoc/Readers/Odt/Generic/Utils.hs | 116 --- .../Pandoc/Readers/Odt/Generic/XMLConverter.hs | 775 ----------------- src/Text/Pandoc/Readers/Odt/Namespaces.hs | 93 -- src/Text/Pandoc/Readers/Odt/StyleReader.hs | 640 -------------- test/Tests/Readers/ODT.hs | 187 ++++ test/Tests/Readers/Odt.hs | 187 ---- test/test-pandoc.hs | 4 +- 29 files changed, 3429 insertions(+), 3429 deletions(-) create mode 100644 src/Text/Pandoc/Readers/ODT.hs create mode 100644 src/Text/Pandoc/Readers/ODT/Arrows/State.hs create mode 100644 src/Text/Pandoc/Readers/ODT/Arrows/Utils.hs create mode 100644 src/Text/Pandoc/Readers/ODT/Base.hs create mode 100644 src/Text/Pandoc/Readers/ODT/ContentReader.hs create mode 100644 src/Text/Pandoc/Readers/ODT/Generic/Fallible.hs create mode 100644 src/Text/Pandoc/Readers/ODT/Generic/Namespaces.hs create mode 100644 src/Text/Pandoc/Readers/ODT/Generic/SetMap.hs create mode 100644 src/Text/Pandoc/Readers/ODT/Generic/Utils.hs create mode 100644 src/Text/Pandoc/Readers/ODT/Generic/XMLConverter.hs create mode 100644 src/Text/Pandoc/Readers/ODT/Namespaces.hs create mode 100644 src/Text/Pandoc/Readers/ODT/StyleReader.hs delete mode 100644 src/Text/Pandoc/Readers/Odt.hs delete mode 100644 src/Text/Pandoc/Readers/Odt/Arrows/State.hs delete mode 100644 src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs delete mode 100644 src/Text/Pandoc/Readers/Odt/Base.hs delete mode 100644 src/Text/Pandoc/Readers/Odt/ContentReader.hs delete mode 100644 src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs delete mode 100644 src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs delete mode 100644 src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs delete mode 100644 src/Text/Pandoc/Readers/Odt/Generic/Utils.hs delete mode 100644 src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs delete mode 100644 src/Text/Pandoc/Readers/Odt/Namespaces.hs delete mode 100644 src/Text/Pandoc/Readers/Odt/StyleReader.hs create mode 100644 test/Tests/Readers/ODT.hs delete mode 100644 test/Tests/Readers/Odt.hs diff --git a/pandoc.cabal b/pandoc.cabal index 983326a41..186634b9d 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -569,7 +569,7 @@ library Text.Pandoc.Readers.TikiWiki, Text.Pandoc.Readers.Txt2Tags, Text.Pandoc.Readers.Docx, - Text.Pandoc.Readers.Odt, + Text.Pandoc.Readers.ODT, Text.Pandoc.Readers.EPUB, Text.Pandoc.Readers.Muse, Text.Pandoc.Readers.Man, @@ -675,17 +675,17 @@ library Text.Pandoc.Readers.LaTeX.Parsing, Text.Pandoc.Readers.LaTeX.SIunitx, Text.Pandoc.Readers.LaTeX.Table, - Text.Pandoc.Readers.Odt.Base, - Text.Pandoc.Readers.Odt.Namespaces, - Text.Pandoc.Readers.Odt.StyleReader, - Text.Pandoc.Readers.Odt.ContentReader, - Text.Pandoc.Readers.Odt.Generic.Fallible, - Text.Pandoc.Readers.Odt.Generic.SetMap, - Text.Pandoc.Readers.Odt.Generic.Utils, - Text.Pandoc.Readers.Odt.Generic.Namespaces, - Text.Pandoc.Readers.Odt.Generic.XMLConverter, - Text.Pandoc.Readers.Odt.Arrows.State, - Text.Pandoc.Readers.Odt.Arrows.Utils, + Text.Pandoc.Readers.ODT.Base, + Text.Pandoc.Readers.ODT.Namespaces, + Text.Pandoc.Readers.ODT.StyleReader, + Text.Pandoc.Readers.ODT.ContentReader, + Text.Pandoc.Readers.ODT.Generic.Fallible, + Text.Pandoc.Readers.ODT.Generic.SetMap, + Text.Pandoc.Readers.ODT.Generic.Utils, + Text.Pandoc.Readers.ODT.Generic.Namespaces, + Text.Pandoc.Readers.ODT.Generic.XMLConverter, + Text.Pandoc.Readers.ODT.Arrows.State, + Text.Pandoc.Readers.ODT.Arrows.Utils, Text.Pandoc.Readers.Org.BlockStarts, Text.Pandoc.Readers.Org.Blocks, Text.Pandoc.Readers.Org.DocumentTree, @@ -787,7 +787,7 @@ test-suite test-pandoc Tests.Readers.RST Tests.Readers.RTF Tests.Readers.Docx - Tests.Readers.Odt + Tests.Readers.ODT Tests.Readers.Txt2Tags Tests.Readers.EPUB Tests.Readers.Muse diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 61f7326ac..2c9dee416 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -26,7 +26,7 @@ module Text.Pandoc.Readers Reader (..) , readers , readDocx - , readOdt + , readODT , readMarkdown , readCommonMark , readCreole @@ -93,7 +93,7 @@ import Text.Pandoc.Readers.LaTeX import Text.Pandoc.Readers.MediaWiki import Text.Pandoc.Readers.Muse import Text.Pandoc.Readers.Native -import Text.Pandoc.Readers.Odt +import Text.Pandoc.Readers.ODT import Text.Pandoc.Readers.OPML import Text.Pandoc.Readers.Org import Text.Pandoc.Readers.RST @@ -145,7 +145,7 @@ readers = [("native" , TextReader readNative) ,("twiki" , TextReader readTWiki) ,("tikiwiki" , TextReader readTikiWiki) ,("docx" , ByteStringReader readDocx) - ,("odt" , ByteStringReader readOdt) + ,("odt" , ByteStringReader readODT) ,("t2t" , TextReader readTxt2Tags) ,("epub" , ByteStringReader readEPUB) ,("muse" , TextReader readMuse) diff --git a/src/Text/Pandoc/Readers/ODT.hs b/src/Text/Pandoc/Readers/ODT.hs new file mode 100644 index 000000000..4139c35be --- /dev/null +++ b/src/Text/Pandoc/Readers/ODT.hs @@ -0,0 +1,97 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Reader.ODT + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +Entry point to the odt reader. +-} + +module Text.Pandoc.Readers.ODT ( readODT ) where + +import Codec.Archive.Zip +import Text.Pandoc.XML.Light + +import qualified Data.ByteString.Lazy as B + +import System.FilePath + +import Control.Monad.Except (throwError) + +import qualified Data.Text as T + +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import qualified Text.Pandoc.Class.PandocMonad as P +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.MediaBag +import Text.Pandoc.Options +import qualified Text.Pandoc.UTF8 as UTF8 + +import Text.Pandoc.Readers.ODT.ContentReader +import Text.Pandoc.Readers.ODT.StyleReader + +import Text.Pandoc.Readers.ODT.Generic.Fallible +import Text.Pandoc.Readers.ODT.Generic.XMLConverter +import Text.Pandoc.Shared (filteredFilesFromArchive) + +readODT :: PandocMonad m + => ReaderOptions + -> B.ByteString + -> m Pandoc +readODT opts bytes = case readODT' opts bytes of + Right (doc, mb) -> do + P.setMediaBag mb + return doc + Left e -> throwError e + +-- +readODT' :: ReaderOptions + -> B.ByteString + -> Either PandocError (Pandoc, MediaBag) +readODT' _ bytes = bytesToODT bytes-- of +-- Right (pandoc, mediaBag) -> Right (pandoc , mediaBag) +-- Left err -> Left err + +-- +bytesToODT :: B.ByteString -> Either PandocError (Pandoc, MediaBag) +bytesToODT bytes = case toArchiveOrFail bytes of + Right archive -> archiveToODT archive + Left err -> Left $ PandocParseError + $ "Could not unzip ODT: " <> T.pack err + +-- +archiveToODT :: Archive -> Either PandocError (Pandoc, MediaBag) +archiveToODT archive = do + let onFailure msg Nothing = Left $ PandocParseError msg + onFailure _ (Just x) = Right x + contentEntry <- onFailure "Could not find content.xml" + (findEntryByPath "content.xml" archive) + stylesEntry <- onFailure "Could not find styles.xml" + (findEntryByPath "styles.xml" archive) + contentElem <- entryToXmlElem contentEntry + stylesElem <- entryToXmlElem stylesEntry + styles <- either + (\_ -> Left $ PandocParseError "Could not read styles") + Right + (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem)) + let filePathIsODTMedia :: FilePath -> Bool + filePathIsODTMedia fp = + let (dir, name) = splitFileName fp + in (dir == "Pictures/") || (dir /= "./" && name == "content.xml") + let media = filteredFilesFromArchive archive filePathIsODTMedia + let startState = readerState styles media + either (\_ -> Left $ PandocParseError "Could not convert opendocument") Right + (runConverter' read_body startState contentElem) + + +-- +entryToXmlElem :: Entry -> Either PandocError Element +entryToXmlElem entry = + case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of + Right x -> Right x + Left msg -> Left $ PandocXMLError (T.pack $ eRelativePath entry) msg diff --git a/src/Text/Pandoc/Readers/ODT/Arrows/State.hs b/src/Text/Pandoc/Readers/ODT/Arrows/State.hs new file mode 100644 index 000000000..742f6e9ee --- /dev/null +++ b/src/Text/Pandoc/Readers/ODT/Arrows/State.hs @@ -0,0 +1,139 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE TupleSections #-} +{- | + Module : Text.Pandoc.Readers.ODT.Arrows.State + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +An arrow that transports a state. It is in essence a more powerful version of +the standard state monad. As it is such a simple extension, there are +other version out there that do exactly the same. +The implementation is duplicated, though, to add some useful features. +Most of these might be implemented without access to innards, but it's much +faster and easier to implement this way. +-} + +module Text.Pandoc.Readers.ODT.Arrows.State where + +import Control.Arrow +import qualified Control.Category as Cat +import Control.Monad +import Data.List (foldl') +import Text.Pandoc.Readers.ODT.Arrows.Utils +import Text.Pandoc.Readers.ODT.Generic.Fallible + + +newtype ArrowState state a b = ArrowState + { runArrowState :: (state, a) -> (state, b) } + +-- | Constructor +withState :: (state -> a -> (state, b)) -> ArrowState state a b +withState = ArrowState . uncurry + +-- | Constructor +modifyState :: (state -> state ) -> ArrowState state a a +modifyState = ArrowState . first + +-- | Constructor +ignoringState :: ( a -> b ) -> ArrowState state a b +ignoringState = ArrowState . second + +-- | Constructor +fromState :: (state -> (state, b)) -> ArrowState state a b +fromState = ArrowState . (.fst) + +-- | Constructor +extractFromState :: (state -> b ) -> ArrowState state x b +extractFromState f = ArrowState $ \(state,_) -> (state, f state) + +-- | Constructor +tryModifyState :: (state -> Either f state) + -> ArrowState state a (Either f a) +tryModifyState f = ArrowState $ \(state,a) + -> (state,).Left ||| (,Right a) $ f state + +instance Cat.Category (ArrowState s) where + id = ArrowState id + arrow2 . arrow1 = ArrowState $ runArrowState arrow2 . runArrowState arrow1 + +instance Arrow (ArrowState state) where + arr = ignoringState + first a = ArrowState $ \(s,(aF,aS)) + -> second (,aS) $ runArrowState a (s,aF) + second a = ArrowState $ \(s,(aF,aS)) + -> second (aF,) $ runArrowState a (s,aS) + +instance ArrowChoice (ArrowState state) where + left a = ArrowState $ \(s,e) -> case e of + Left l -> second Left $ runArrowState a (s,l) + Right r -> (s, Right r) + right a = ArrowState $ \(s,e) -> case e of + Left l -> (s, Left l) + Right r -> second Right $ runArrowState a (s,r) + +instance ArrowApply (ArrowState state) where + app = ArrowState $ \(s, (f,b)) -> runArrowState f (s,b) + +-- | Switches the type of the state temporarily. +-- Drops the intermediate result state, behaving like a fallible +-- identity arrow, save for side effects in the state. +withSubStateF :: ArrowState s x (Either f s') + -> ArrowState s' s (Either f s ) + -> ArrowState s x (Either f x ) +withSubStateF unlift a = keepingTheValue (withSubStateF' unlift a) + >>^ spreadChoice + >>^ fmap fst + +-- | Switches the type of the state temporarily. +-- Returns the resulting sub-state. +withSubStateF' :: ArrowState s x (Either f s') + -> ArrowState s' s (Either f s ) + -> ArrowState s x (Either f s') +withSubStateF' unlift a = ArrowState go + where go p@(s,_) = tryRunning unlift + ( tryRunning a (second Right) ) + p + where tryRunning a' b v = case runArrowState a' v of + (_ , Left f) -> (s, Left f) + (x , Right y) -> b (y,x) + +-- | Fold a state arrow through something 'Foldable'. Collect the results +-- in a 'Monoid'. +-- Intermediate form of a fold between one with "only" a 'Monoid' +-- and one with any function. +foldS :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m +foldS a = ArrowState $ \(s,f) -> foldr a' (s,mempty) f + where a' x (s',m) = second (mappend m) $ runArrowState a (s',x) + +-- | Fold a state arrow through something 'Foldable'. Collect the results in a +-- 'MonadPlus'. +iterateS :: (Foldable f, MonadPlus m) + => ArrowState s x y + -> ArrowState s (f x) (m y) +iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f + where a' x (s',m) = second (mplus m.return) $ runArrowState a (s',x) + +-- | Fold a state arrow through something 'Foldable'. Collect the results in a +-- 'MonadPlus'. +iterateSL :: (Foldable f, MonadPlus m) + => ArrowState s x y + -> ArrowState s (f x) (m y) +iterateSL a = ArrowState $ \(s,f) -> foldl' a' (s,mzero) f + where a' (s',m) x = second (mplus m.return) $ runArrowState a (s',x) + + +-- | Fold a fallible state arrow through something 'Foldable'. +-- Collect the results in a 'MonadPlus'. +-- If the iteration fails, the state will be reset to the initial one. +iterateS' :: (Foldable f, MonadPlus m) + => ArrowState s x (Either e y ) + -> ArrowState s (f x) (Either e (m y)) +iterateS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mzero) f + where a' s x (s',Right m) = case runArrowState a (s',x) of + (s'',Right m') -> (s'',Right $ mplus m $ return m') + (_ ,Left e ) -> (s ,Left e ) + a' _ _ e = e diff --git a/src/Text/Pandoc/Readers/ODT/Arrows/Utils.hs b/src/Text/Pandoc/Readers/ODT/Arrows/Utils.hs new file mode 100644 index 000000000..339bff1cb --- /dev/null +++ b/src/Text/Pandoc/Readers/ODT/Arrows/Utils.hs @@ -0,0 +1,208 @@ +{- | + Module : Text.Pandoc.Readers.ODT.Arrows.Utils + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +Utility functions for Arrows (Kleisli monads). + +Some general notes on notation: + +* "^" is meant to stand for a pure function that is lifted into an arrow +based on its usage for that purpose in "Control.Arrow". +* "?" is meant to stand for the usage of a 'FallibleArrow' or a pure function +with an equivalent return value. +* "_" stands for the dropping of a value. +-} + +-- We export everything +module Text.Pandoc.Readers.ODT.Arrows.Utils where + +import Prelude hiding (Applicative(..)) +import Control.Arrow +import Control.Monad (join) + +import Text.Pandoc.Readers.ODT.Generic.Fallible +import Text.Pandoc.Readers.ODT.Generic.Utils + +and2 :: (Arrow a) => a b c -> a b c' -> a b (c,c') +and2 = (&&&) + +and3 :: (Arrow a) + => a b c0->a b c1->a b c2 + -> a b (c0,c1,c2 ) +and4 :: (Arrow a) + => a b c0->a b c1->a b c2->a b c3 + -> a b (c0,c1,c2,c3 ) +and5 :: (Arrow a) + => a b c0->a b c1->a b c2->a b c3->a b c4 + -> a b (c0,c1,c2,c3,c4 ) +and6 :: (Arrow a) + => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5 + -> a b (c0,c1,c2,c3,c4,c5 ) + +and3 a b c = and2 a b &&& c + >>^ \((z,y ) , x) -> (z,y,x ) +and4 a b c d = and3 a b c &&& d + >>^ \((z,y,x ) , w) -> (z,y,x,w ) +and5 a b c d e = and4 a b c d &&& e + >>^ \((z,y,x,w ) , v) -> (z,y,x,w,v ) +and6 a b c d e f = and5 a b c d e &&& f + >>^ \((z,y,x,w,v ) , u) -> (z,y,x,w,v,u ) + +liftA2 :: (Arrow a) => (x -> y -> z) -> a b x -> a b y -> a b z +liftA2 f a b = a &&& b >>^ uncurry f + +liftA3 :: (Arrow a) => (z->y->x -> r) + -> a b z->a b y->a b x + -> a b r +liftA4 :: (Arrow a) => (z->y->x->w -> r) + -> a b z->a b y->a b x->a b w + -> a b r +liftA5 :: (Arrow a) => (z->y->x->w->v -> r) + -> a b z->a b y->a b x->a b w->a b v + -> a b r +liftA6 :: (Arrow a) => (z->y->x->w->v->u -> r) + -> a b z->a b y->a b x->a b w->a b v->a b u + -> a b r + +liftA3 fun a b c = and3 a b c >>^ uncurry3 fun +liftA4 fun a b c d = and4 a b c d >>^ uncurry4 fun +liftA5 fun a b c d e = and5 a b c d e >>^ uncurry5 fun +liftA6 fun a b c d e f = and6 a b c d e f >>^ uncurry6 fun + +liftA :: (Arrow a) => (y -> z) -> a b y -> a b z +liftA fun a = a >>^ fun + + +-- | Duplicate a value to subsequently feed it into different arrows. +-- Can almost always be replaced with '(&&&)', 'keepingTheValue', +-- or even '(|||)'. +-- Equivalent to +-- > returnA &&& returnA +duplicate :: (Arrow a) => a b (b,b) +duplicate = arr $ join (,) + +-- | Applies a function to the uncurried result-pair of an arrow-application. +-- (The %-symbol was chosen to evoke an association with pairs.) +(>>%) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d +a >>% f = a >>^ uncurry f + +infixr 2 >>% + + +-- | Duplicate a value and apply an arrow to the second instance. +-- Equivalent to +-- > \a -> duplicate >>> second a +-- or +-- > \a -> returnA &&& a +keepingTheValue :: (Arrow a) => a b c -> a b (b,c) +keepingTheValue a = returnA &&& a + +( ^||| ) :: (ArrowChoice a) => (b -> d) -> a c d -> a (Either b c) d +( |||^ ) :: (ArrowChoice a) => a b d -> (c -> d) -> a (Either b c) d +( ^|||^ ) :: (ArrowChoice a) => (b -> d) -> (c -> d) -> a (Either b c) d + +l ^||| r = arr l ||| r +l |||^ r = l ||| arr r +l ^|||^ r = arr l ||| arr r + +infixr 2 ^||| , |||^, ^|||^ + +( ^&&& ) :: (Arrow a) => (b -> c) -> a b c' -> a b (c,c') +( &&&^ ) :: (Arrow a) => a b c -> (b -> c') -> a b (c,c') + +l ^&&& r = arr l &&& r +l &&&^ r = l &&& arr r + +infixr 3 ^&&&, &&&^ + + +-- | Converts @Right a@ into @Just a@ and @Left _@ into @Nothing@. +choiceToMaybe :: (ArrowChoice a) => a (Either l r) (Maybe r) +choiceToMaybe = arr eitherToMaybe + +-- | Converts @Nothing@ into @Left ()@ and @Just a@ into @Right a@. +maybeToChoice :: (ArrowChoice a) => a (Maybe b) (Fallible b) +maybeToChoice = arr maybeToEither + +-- | Lifts a constant value into an arrow +returnV :: (Arrow a) => c -> a x c +returnV = arr.const + +-- | Defines Left as failure, Right as success +type FallibleArrow a input failure success = a input (Either failure success) + +-- +liftAsSuccess :: (ArrowChoice a) + => a x success + -> FallibleArrow a x failure success +liftAsSuccess a = a >>^ Right + +-- | Execute the second arrow if the first succeeds +(>>?) :: (ArrowChoice a) + => FallibleArrow a x failure success + -> FallibleArrow a success failure success' + -> FallibleArrow a x failure success' +a >>? b = a >>> Left ^||| b + +-- | Execute the lifted second arrow if the first succeeds +(>>?^) :: (ArrowChoice a) + => FallibleArrow a x failure success + -> (success -> success') + -> FallibleArrow a x failure success' +a >>?^ f = a >>^ Left ^|||^ Right . f + +-- | Execute the lifted second arrow if the first succeeds +(>>?^?) :: (ArrowChoice a) + => FallibleArrow a x failure success + -> (success -> Either failure success') + -> FallibleArrow a x failure success' +a >>?^? b = a >>> Left ^|||^ b + +-- | Execute the second arrow if the lifted first arrow succeeds +(^>>?) :: (ArrowChoice a) + => (x -> Either failure success) + -> FallibleArrow a success failure success' + -> FallibleArrow a x failure success' +a ^>>? b = a ^>> Left ^||| b + +-- | Execute the second, non-fallible arrow if the first arrow succeeds +(>>?!) :: (ArrowChoice a) + => FallibleArrow a x failure success + -> a success success' + -> FallibleArrow a x failure success' +a >>?! f = a >>> right f + +--- +(>>?%) :: (ArrowChoice a) + => FallibleArrow a x f (b,b') + -> (b -> b' -> c) + -> FallibleArrow a x f c +a >>?% f = a >>?^ uncurry f + + +--- +(>>?%?) :: (ArrowChoice a) + => FallibleArrow a x f (b,b') + -> (b -> b' -> Either f c) + -> FallibleArrow a x f c +a >>?%? f = a >>?^? uncurry f + +infixr 1 >>?, >>?^, >>?^? +infixr 1 ^>>?, >>?! +infixr 1 >>?%, >>?%? + +-- | An arrow version of a short-circuit (<|>) +ifFailedDo :: (ArrowChoice a) + => FallibleArrow a x f y + -> FallibleArrow a x f y + -> FallibleArrow a x f y +ifFailedDo a b = keepingTheValue a >>> repackage ^>> (b |||^ Right) + where repackage (x , Left _) = Left x + repackage (_ , Right y) = Right y + +infixr 1 `ifFailedDo` diff --git a/src/Text/Pandoc/Readers/ODT/Base.hs b/src/Text/Pandoc/Readers/ODT/Base.hs new file mode 100644 index 000000000..4a99f5ad6 --- /dev/null +++ b/src/Text/Pandoc/Readers/ODT/Base.hs @@ -0,0 +1,22 @@ +{- | + Module : Text.Pandoc.Readers.ODT.Base + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +Core types of the odt reader. +-} + +module Text.Pandoc.Readers.ODT.Base where + +import Text.Pandoc.Readers.ODT.Generic.XMLConverter +import Text.Pandoc.Readers.ODT.Namespaces + +type ODTConverterState s = XMLConverterState Namespace s + +type XMLReader s a b = FallibleXMLConverter Namespace s a b + +type XMLReaderSafe s a b = XMLConverter Namespace s a b diff --git a/src/Text/Pandoc/Readers/ODT/ContentReader.hs b/src/Text/Pandoc/Readers/ODT/ContentReader.hs new file mode 100644 index 000000000..0e44b01a3 --- /dev/null +++ b/src/Text/Pandoc/Readers/ODT/ContentReader.hs @@ -0,0 +1,960 @@ +{-# LANGUAGE Arrows #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE DeriveFoldable #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Readers.ODT.ContentReader + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +The core of the odt reader that converts odt features into Pandoc types. +-} + +module Text.Pandoc.Readers.ODT.ContentReader +( readerState +, read_body +) where + +import Prelude hiding (Applicative(..)) +import Control.Applicative hiding (liftA, liftA2, liftA3) +import Control.Arrow +import Control.Monad ((<=<)) + +import qualified Data.ByteString.Lazy as B +import Data.Foldable (fold) +import Data.List (find) +import qualified Data.Map as M +import qualified Data.Text as T +import Data.Maybe +import Data.Monoid (Alt (..)) + +import Text.TeXMath (readMathML, writeTeX) +import qualified Text.Pandoc.XML.Light as XML + +import Text.Pandoc.Builder hiding (underline) +import Text.Pandoc.MediaBag (MediaBag, insertMedia) +import Text.Pandoc.Shared +import Text.Pandoc.Extensions (extensionsFromList, Extension(..)) +import qualified Text.Pandoc.UTF8 as UTF8 + +import Text.Pandoc.Readers.ODT.Base +import Text.Pandoc.Readers.ODT.Namespaces +import Text.Pandoc.Readers.ODT.StyleReader + +import Text.Pandoc.Readers.ODT.Arrows.State (foldS) +import Text.Pandoc.Readers.ODT.Arrows.Utils +import Text.Pandoc.Readers.ODT.Generic.Fallible +import Text.Pandoc.Readers.ODT.Generic.Utils +import Text.Pandoc.Readers.ODT.Generic.XMLConverter + +import Network.URI (parseRelativeReference, URI(uriPath)) +import qualified Data.Set as Set + +-------------------------------------------------------------------------------- +-- State +-------------------------------------------------------------------------------- + +type Anchor = T.Text +type Media = [(FilePath, B.ByteString)] + +data ReaderState + = ReaderState { -- | A collection of styles read somewhere else. + -- It is only queried here, not modified. + styleSet :: Styles + -- | A stack of the styles of parent elements. + -- Used to look up inherited style properties. + , styleTrace :: [Style] + -- | Keeps track of the current depth in nested lists + , currentListLevel :: ListLevel + -- | Lists may provide their own style, but they don't have + -- to. If they do not, the style of a parent list may be used + -- or even a default list style from the paragraph style. + -- This value keeps track of the closest list style there + -- currently is. + , currentListStyle :: Maybe ListStyle + -- | A map from internal anchor names to "pretty" ones. + -- The mapping is a purely cosmetic one. + , bookmarkAnchors :: M.Map Anchor Anchor + -- | A map of files / binary data from the archive + , envMedia :: Media + -- | Hold binary resources used in the document + , odtMediaBag :: MediaBag + } + deriving ( Show ) + +readerState :: Styles -> Media -> ReaderState +readerState styles media = ReaderState styles [] 0 Nothing M.empty media mempty + +-- +pushStyle' :: Style -> ReaderState -> ReaderState +pushStyle' style state = state { styleTrace = style : styleTrace state } + +-- +popStyle' :: ReaderState -> ReaderState +popStyle' state = case styleTrace state of + _:trace -> state { styleTrace = trace } + _ -> state + +-- +modifyListLevel :: (ListLevel -> ListLevel) -> (ReaderState -> ReaderState) +modifyListLevel f state = state { currentListLevel = f (currentListLevel state) } + +-- +shiftListLevel :: ListLevel -> (ReaderState -> ReaderState) +shiftListLevel diff = modifyListLevel (+ diff) + +-- +swapCurrentListStyle :: Maybe ListStyle -> ReaderState + -> (ReaderState, Maybe ListStyle) +swapCurrentListStyle mListStyle state = ( state { currentListStyle = mListStyle } + , currentListStyle state + ) + +-- +lookupPrettyAnchor :: Anchor -> ReaderState -> Maybe Anchor +lookupPrettyAnchor anchor ReaderState{..} = M.lookup anchor bookmarkAnchors + +-- +putPrettyAnchor :: Anchor -> Anchor -> ReaderState -> ReaderState +putPrettyAnchor ugly pretty state@ReaderState{..} + = state { bookmarkAnchors = M.insert ugly pretty bookmarkAnchors } + +-- +usedAnchors :: ReaderState -> [Anchor] +usedAnchors ReaderState{..} = M.elems bookmarkAnchors + +getMediaBag :: ReaderState -> MediaBag +getMediaBag ReaderState{..} = odtMediaBag + +getMediaEnv :: ReaderState -> Media +getMediaEnv ReaderState{..} = envMedia + +insertMedia' :: (FilePath, B.ByteString) -> ReaderState -> ReaderState +insertMedia' (fp, bs) state@ReaderState{..} + = state { odtMediaBag = insertMedia fp Nothing bs odtMediaBag } + +-------------------------------------------------------------------------------- +-- Reader type and associated tools +-------------------------------------------------------------------------------- + +type ODTReader a b = XMLReader ReaderState a b + +type ODTReaderSafe a b = XMLReaderSafe ReaderState a b + +-- | Extract something from the styles +fromStyles :: (a -> Styles -> b) -> ODTReaderSafe a b +fromStyles f = keepingTheValue + (getExtraState >>^ styleSet) + >>% f + +-- +getStyleByName :: ODTReader StyleName Style +getStyleByName = fromStyles lookupStyle >>^ maybeToChoice + +-- +findStyleFamily :: ODTReader Style StyleFamily +findStyleFamily = fromStyles getStyleFamily >>^ maybeToChoice + +-- +lookupListStyle :: ODTReader StyleName ListStyle +lookupListStyle = fromStyles lookupListStyleByName >>^ maybeToChoice + +-- +switchCurrentListStyle :: ODTReaderSafe (Maybe ListStyle) (Maybe ListStyle) +switchCurrentListStyle = keepingTheValue getExtraState + >>% swapCurrentListStyle + >>> first setExtraState + >>^ snd + +-- +pushStyle :: ODTReaderSafe Style Style +pushStyle = keepingTheValue ( + ( keepingTheValue getExtraState + >>% pushStyle' + ) + >>> setExtraState + ) + >>^ fst + +-- +popStyle :: ODTReaderSafe x x +popStyle = keepingTheValue ( + getExtraState + >>> arr popStyle' + >>> setExtraState + ) + >>^ fst + +-- +getCurrentListLevel :: ODTReaderSafe _x ListLevel +getCurrentListLevel = getExtraState >>^ currentListLevel + +-- +updateMediaWithResource :: ODTReaderSafe (FilePath, B.ByteString) (FilePath, B.ByteString) +updateMediaWithResource = keepingTheValue ( + (keepingTheValue getExtraState + >>% insertMedia' + ) + >>> setExtraState + ) + >>^ fst + +lookupResource :: ODTReaderSafe FilePath (FilePath, B.ByteString) +lookupResource = proc target -> do + state <- getExtraState -< () + case lookup target (getMediaEnv state) of + Just bs -> returnV (target, bs) -<< () + Nothing -> returnV ("", B.empty) -< () + +type AnchorPrefix = T.Text + +-- | An adaptation of 'uniqueIdent' from "Text.Pandoc.Shared" that generates a +-- unique identifier but without assuming that the id should be for a header. +-- Second argument is a list of already used identifiers. +uniqueIdentFrom :: AnchorPrefix -> [Anchor] -> Anchor +uniqueIdentFrom baseIdent usedIdents = + let numIdent n = baseIdent <> "-" <> T.pack (show n) + in if baseIdent `elem` usedIdents + then maybe baseIdent numIdent + $ find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) + -- if we have more than 60,000, allow repeats + else baseIdent + +-- | First argument: basis for a new "pretty" anchor if none exists yet +-- Second argument: a key ("ugly" anchor) +-- Returns: saved "pretty" anchor or created new one +getPrettyAnchor :: ODTReaderSafe (AnchorPrefix, Anchor) Anchor +getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do + state <- getExtraState -< () + case lookupPrettyAnchor uglyAnchor state of + Just prettyAnchor -> returnA -< prettyAnchor + Nothing -> do + let newPretty = uniqueIdentFrom baseIdent (usedAnchors state) + modifyExtraState (putPrettyAnchor uglyAnchor newPretty) -<< newPretty + +-- | Input: basis for a new header anchor +-- Output: saved new anchor +getHeaderAnchor :: ODTReaderSafe Inlines Anchor +getHeaderAnchor = proc title -> do + state <- getExtraState -< () + let exts = extensionsFromList [Ext_auto_identifiers] + let anchor = uniqueIdent exts (toList title) + (Set.fromList $ usedAnchors state) + modifyExtraState (putPrettyAnchor anchor anchor) -<< anchor + + +-------------------------------------------------------------------------------- +-- Working with styles +-------------------------------------------------------------------------------- + +-- +readStyleByName :: ODTReader _x (StyleName, Style) +readStyleByName = + findAttr NsText "style-name" >>? keepingTheValue getStyleByName >>^ liftE + where + liftE :: (StyleName, Fallible Style) -> Fallible (StyleName, Style) + liftE (name, Right v) = Right (name, v) + liftE (_, Left v) = Left v + +-- +isStyleToTrace :: ODTReader Style Bool +isStyleToTrace = findStyleFamily >>?^ (==FaText) + +-- +withNewStyle :: ODTReaderSafe x Inlines -> ODTReaderSafe x Inlines +withNewStyle a = proc x -> do + fStyle <- readStyleByName -< () + case fStyle of + Right (styleName, _) | isCodeStyle styleName -> do + inlines <- a -< x + arr inlineCode -<< inlines + Right (_, style) -> do + mFamily <- arr styleFamily -< style + fTextProps <- arr ( maybeToChoice + . textProperties + . styleProperties + ) -< style + case fTextProps of + Right textProps -> do + state <- getExtraState -< () + let triple = (state, textProps, mFamily) + modifier <- arr modifierFromStyleDiff -< triple + fShouldTrace <- isStyleToTrace -< style + case fShouldTrace of + Right shouldTrace -> + if shouldTrace + then do + pushStyle -< style + inlines <- a -< x + popStyle -< () + arr modifier -<< inlines + else + -- In case anything goes wrong + a -< x + Left _ -> a -< x + Left _ -> a -< x + Left _ -> a -< x + where + isCodeStyle :: StyleName -> Bool + isCodeStyle "Source_Text" = True + isCodeStyle _ = False + + inlineCode :: Inlines -> Inlines + inlineCode = code . T.concat . map stringify . toList + +type PropertyTriple = (ReaderState, TextProperties, Maybe StyleFamily) +type InlineModifier = Inlines -> Inlines + +-- | Given data about the local style changes, calculates how to modify +-- an instance of 'Inlines' +modifierFromStyleDiff :: PropertyTriple -> InlineModifier +modifierFromStyleDiff propertyTriple = + composition $ + getVPosModifier propertyTriple + : map (first ($ propertyTriple) >>> ifThen_else ignore) + [ (hasEmphChanged , emph ) + , (hasChanged isStrong , strong ) + , (hasChanged strikethrough , strikeout ) + ] + where + ifThen_else else' (if',then') = if if' then then' else else' + + ignore = id :: InlineModifier + + getVPosModifier :: PropertyTriple -> InlineModifier + getVPosModifier triple@(_,textProps,_) = + let getVPos = Just . verticalPosition + in case lookupPreviousValueM getVPos triple of + Nothing -> ignore + Just oldVPos -> getVPosModifier' (oldVPos, verticalPosition textProps) + + getVPosModifier' (oldVPos , newVPos ) | oldVPos == newVPos = ignore + getVPosModifier' ( _ , VPosSub ) = subscript + getVPosModifier' ( _ , VPosSuper ) = superscript + getVPosModifier' ( _ , _ ) = ignore + + hasEmphChanged :: PropertyTriple -> Bool + hasEmphChanged = swing any [ hasChanged isEmphasised + , hasChangedM pitch + , hasChanged underline + ] + + hasChanged property triple@(_, property -> newProperty, _) = + (/= Just newProperty) (lookupPreviousValue property triple) + + hasChangedM property triple@(_, textProps,_) = + fromMaybe False $ (/=) <$> property textProps <*> lookupPreviousValueM property triple + + lookupPreviousValue f = lookupPreviousStyleValue (fmap f . textProperties) + + lookupPreviousValueM f = lookupPreviousStyleValue (f <=< textProperties) + + lookupPreviousStyleValue f (ReaderState{..},_,mFamily) + = findBy f (extendedStylePropertyChain styleTrace styleSet) + <|> (f . lookupDefaultStyle' styleSet =<< mFamily) + + +type ParaModifier = Blocks -> Blocks + +_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ :: Int +_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ :: Int +_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ = 5 +_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ = 5 + +-- | Returns either 'id' or 'blockQuote' depending on the current indentation +getParaModifier :: Style -> ParaModifier +getParaModifier Style{..} | Just props <- paraProperties styleProperties + , isBlockQuote (indentation props) + (margin_left props) + = blockQuote + | otherwise + = id + where + isBlockQuote mIndent mMargin + | LengthValueMM indent <- mIndent + , indent > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ + = True + | LengthValueMM margin <- mMargin + , margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ + = True + | LengthValueMM indent <- mIndent + , LengthValueMM margin <- mMargin + = indent + margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ + + | PercentValue indent <- mIndent + , indent > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ + = True + | PercentValue margin <- mMargin + , margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ + = True + | PercentValue indent <- mIndent + , PercentValue margin <- mMargin + = indent + margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ + + | otherwise + = False + +-- +constructPara :: ODTReaderSafe Blocks Blocks -> ODTReaderSafe Blocks Blocks +constructPara reader = proc blocks -> do + fStyle <- readStyleByName -< blocks + case fStyle of + Left _ -> reader -< blocks + Right (styleName, _) | isTableCaptionStyle styleName -> do + blocks' <- reader -< blocks + arr tableCaptionP -< blocks' + Right (_, style) -> do + let modifier = getParaModifier style + blocks' <- reader -< blocks + arr modifier -<< blocks' + where + isTableCaptionStyle :: StyleName -> Bool + isTableCaptionStyle "Table" = True + isTableCaptionStyle _ = False + tableCaptionP b = divWith ("", ["caption"], []) b + +type ListConstructor = [Blocks] -> Blocks + +getListConstructor :: ListLevelStyle -> ListConstructor +getListConstructor ListLevelStyle{..} = + case listLevelType of + LltBullet -> bulletList + LltImage -> bulletList + LltNumbered -> let listNumberStyle = toListNumberStyle listItemFormat + listNumberDelim = toListNumberDelim listItemPrefix + listItemSuffix + in orderedListWith (listItemStart, listNumberStyle, listNumberDelim) + where + toListNumberStyle LinfNone = DefaultStyle + toListNumberStyle LinfNumber = Decimal + toListNumberStyle LinfRomanLC = LowerRoman + toListNumberStyle LinfRomanUC = UpperRoman + toListNumberStyle LinfAlphaLC = LowerAlpha + toListNumberStyle LinfAlphaUC = UpperAlpha + toListNumberStyle (LinfString _) = Example + + toListNumberDelim Nothing (Just ".") = Period + toListNumberDelim (Just "" ) (Just ".") = Period + toListNumberDelim Nothing (Just ")") = OneParen + toListNumberDelim (Just "" ) (Just ")") = OneParen + toListNumberDelim (Just "(") (Just ")") = TwoParens + toListNumberDelim _ _ = DefaultDelim + + +-- | Determines which style to use for a list, which level to use of that +-- style, and which type of list to create as a result of this information. +-- Then prepares the state for eventual child lists and constructs the list from +-- the results. +-- Two main cases are handled: The list may provide its own style or it may +-- rely on a parent list's style. I the former case the current style in the +-- state must be switched before and after the call to the child converter +-- while in the latter the child converter can be called directly. +-- If anything goes wrong, a default ordered-list-constructor is used. +constructList :: ODTReaderSafe x [Blocks] -> ODTReaderSafe x Blocks +constructList reader = proc x -> do + modifyExtraState (shiftListLevel 1) -< () + listLevel <- getCurrentListLevel -< () + fStyleName <- findAttr NsText "style-name" -< () + case fStyleName of + Right styleName -> do + fListStyle <- lookupListStyle -< styleName + case fListStyle of + Right listStyle -> do + fLLS <- arr (uncurry getListLevelStyle) -< (listLevel,listStyle) + case fLLS of + Just listLevelStyle -> do + oldListStyle <- switchCurrentListStyle -< Just listStyle + blocks <- constructListWith listLevelStyle -<< x + switchCurrentListStyle -< oldListStyle + returnA -< blocks + Nothing -> constructOrderedList -< x + Left _ -> constructOrderedList -< x + Left _ -> do + state <- getExtraState -< () + mListStyle <- arr currentListStyle -< state + case mListStyle of + Just listStyle -> do + fLLS <- arr (uncurry getListLevelStyle) -< (listLevel,listStyle) + case fLLS of + Just listLevelStyle -> constructListWith listLevelStyle -<< x + Nothing -> constructOrderedList -< x + Nothing -> constructOrderedList -< x + where + constructOrderedList = + reader + >>> modifyExtraState (shiftListLevel (-1)) + >>^ orderedList + constructListWith listLevelStyle = + reader + >>> getListConstructor listLevelStyle + ^>> modifyExtraState (shiftListLevel (-1)) + +-------------------------------------------------------------------------------- +-- Readers +-------------------------------------------------------------------------------- + +type ElementMatcher result = (Namespace, ElementName, ODTReader result result) + +type InlineMatcher = ElementMatcher Inlines + +type BlockMatcher = ElementMatcher Blocks + +newtype FirstMatch a = FirstMatch (Alt Maybe a) + deriving (Foldable, Monoid, Semigroup) + +firstMatch :: a -> FirstMatch a +firstMatch = FirstMatch . Alt . Just + +-- +matchingElement :: (Monoid e) + => Namespace -> ElementName + -> ODTReaderSafe e e + -> ElementMatcher e +matchingElement ns name reader = (ns, name, asResultAccumulator reader) + where + asResultAccumulator :: (ArrowChoice a, Monoid m) => a m m -> a m (Fallible m) + asResultAccumulator a = liftAsSuccess $ keepingTheValue a >>% mappend + +-- +matchChildContent' :: (Monoid result) + => [ElementMatcher result] + -> ODTReaderSafe _x result +matchChildContent' ls = returnV mempty >>> matchContent' ls + +-- +matchChildContent :: (Monoid result) + => [ElementMatcher result] + -> ODTReaderSafe (result, XML.Content) result + -> ODTReaderSafe _x result +matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback + +-------------------------------------------- +-- Matchers +-------------------------------------------- + +---------------------- +-- Basics +---------------------- + +-- +-- | Open Document allows several consecutive spaces if they are marked up +read_plain_text :: ODTReaderSafe (Inlines, XML.Content) Inlines +read_plain_text = fst ^&&& read_plain_text' >>% recover + where + -- fallible version + read_plain_text' :: ODTReader (Inlines, XML.Content) Inlines + read_plain_text' = ( second ( arr extractText ) + >>^ spreadChoice >>?! second text + ) + >>?% mappend + -- + extractText :: XML.Content -> Fallible T.Text + extractText (XML.Text cData) = succeedWith (XML.cdData cData) + extractText _ = failEmpty + +read_text_seq :: InlineMatcher +read_text_seq = matchingElement NsText "sequence" + $ matchChildContent [] read_plain_text + + +-- specifically. I honor that, although the current implementation of 'mappend' +-- for 'Inlines' in "Text.Pandoc.Builder" will collapse them again. +-- The rational is to be prepared for future modifications. +read_spaces :: InlineMatcher +read_spaces = matchingElement NsText "s" ( + readAttrWithDefault NsText "c" 1 -- how many spaces? + >>^ fromList.(`replicate` Space) + ) +-- +read_line_break :: InlineMatcher +read_line_break = matchingElement NsText "line-break" + $ returnV linebreak +-- +read_tab :: InlineMatcher +read_tab = matchingElement NsText "tab" + $ returnV space +-- +read_span :: InlineMatcher +read_span = matchingElement NsText "span" + $ withNewStyle + $ matchChildContent [ read_span + , read_spaces + , read_line_break + , read_tab + , read_link + , read_note + , read_citation + , read_bookmark + , read_bookmark_start + , read_reference_start + , read_bookmark_ref + , read_reference_ref + ] read_plain_text + +-- +read_paragraph :: BlockMatcher +read_paragraph = matchingElement NsText "p" + $ constructPara + $ liftA para + $ withNewStyle + $ matchChildContent [ read_span + , read_spaces + , read_line_break + , read_tab + , read_link + , read_note + , read_citation + , read_bookmark + , read_bookmark_start + , read_reference_start + , read_bookmark_ref + , read_reference_ref + , read_frame + , read_text_seq + ] read_plain_text + + +---------------------- +-- Headers +---------------------- + +-- +read_header :: BlockMatcher +read_header = matchingElement NsText "h" + $ proc blocks -> do + level <- ( readAttrWithDefault NsText "outline-level" 1 + ) -< blocks + children <- ( matchChildContent [ read_span + , read_spaces + , read_line_break + , read_tab + , read_link + , read_note + , read_citation + , read_bookmark + , read_bookmark_start + , read_reference_start + , read_bookmark_ref + , read_reference_ref + , read_frame + ] read_plain_text + ) -< blocks + anchor <- getHeaderAnchor -< children + let idAttr = (anchor, [], []) -- no classes, no key-value pairs + arr (uncurry3 headerWith) -< (idAttr, level, children) + +---------------------- +-- Lists +---------------------- + +-- +read_list :: BlockMatcher +read_list = matchingElement NsText "list" +-- $ withIncreasedListLevel + $ constructList +-- $ liftA bulletList + $ matchChildContent' [ read_list_item + , read_list_header + ] +-- +read_list_item :: ElementMatcher [Blocks] +read_list_item = read_list_element "list-item" + +read_list_header :: ElementMatcher [Blocks] +read_list_header = read_list_element "list-header" + +read_list_element :: ElementName -> ElementMatcher [Blocks] +read_list_element listElement = matchingElement NsText listElement + $ liftA (compactify.(:[])) + ( matchChildContent' [ read_paragraph + , read_header + , read_list + ] + ) + + +---------------------- +-- Links +---------------------- + +read_link :: InlineMatcher +read_link = matchingElement NsText "a" + $ liftA3 link + ( findAttrTextWithDefault NsXLink "href" "" + >>> arr fixRelativeLink ) + ( findAttrTextWithDefault NsOffice "title" "" ) + ( matchChildContent [ read_span + , read_note + , read_citation + , read_bookmark + , read_bookmark_start + , read_reference_start + , read_bookmark_ref + , read_reference_ref + ] read_plain_text ) + +fixRelativeLink :: T.Text -> T.Text +fixRelativeLink uri = + case parseRelativeReference (T.unpack uri) of + Nothing -> uri + Just u -> + case uriPath u of + '.':'.':'/':xs -> tshow $ u{ uriPath = xs } + _ -> uri + +------------------------- +-- Footnotes +------------------------- + +read_note :: InlineMatcher +read_note = matchingElement NsText "note" + $ liftA note + $ matchChildContent' [ read_note_body ] + +read_note_body :: BlockMatcher +read_note_body = matchingElement NsText "note-body" + $ matchChildContent' [ read_paragraph ] + +------------------------- +-- Citations +------------------------- + +read_citation :: InlineMatcher +read_citation = matchingElement NsText "bibliography-mark" + $ liftA2 cite + ( liftA2 makeCitation + ( findAttrTextWithDefault NsText "identifier" "" ) + ( readAttrWithDefault NsText "number" 0 ) + ) + ( matchChildContent [] read_plain_text ) + where + makeCitation :: T.Text -> Int -> [Citation] + makeCitation citeId num = [Citation citeId [] [] NormalCitation num 0] + + +---------------------- +-- Tables +---------------------- + +-- +read_table :: BlockMatcher +read_table = matchingElement NsTable "table" + $ liftA simpleTable' + $ matchChildContent' [ read_table_row + ] + +-- | A simple table without a caption or headers +-- | Infers the number of headers from rows +simpleTable' :: [[Blocks]] -> Blocks +simpleTable' [] = simpleTable [] [] +simpleTable' (x : rest) = simpleTable (fmap (const defaults) x) (x : rest) + where defaults = fromList [] + +-- +read_table_row :: ElementMatcher [[Blocks]] +read_table_row = matchingElement NsTable "table-row" + $ liftA (:[]) + $ matchChildContent' [ read_table_cell + ] + +-- +read_table_cell :: ElementMatcher [Blocks] +read_table_cell = matchingElement NsTable "table-cell" + $ liftA (compactify.(:[])) + $ matchChildContent' [ read_paragraph + ] + +---------------------- +-- Frames +---------------------- + +-- +read_frame :: InlineMatcher +read_frame = matchingElement NsDraw "frame" + $ filterChildrenName' NsDraw (`elem` ["image", "object", "text-box"]) + >>> foldS read_frame_child + >>> arr fold + +read_frame_child :: ODTReaderSafe XML.Element (FirstMatch Inlines) +read_frame_child = + proc child -> case elName child of + "image" -> read_frame_img -< child + "object" -> read_frame_mathml -< child + "text-box" -> read_frame_text_box -< child + _ -> returnV mempty -< () + +read_frame_img :: ODTReaderSafe XML.Element (FirstMatch Inlines) +read_frame_img = + proc img -> do + src <- executeIn (findAttr' NsXLink "href") -< img + case fold src of + "" -> returnV mempty -< () + src' -> do + let exts = extensionsFromList [Ext_auto_identifiers] + resource <- lookupResource -< T.unpack src' + _ <- updateMediaWithResource -< resource + w <- findAttrText' NsSVG "width" -< () + h <- findAttrText' NsSVG "height" -< () + titleNodes <- matchChildContent' [ read_frame_title ] -< () + alt <- matchChildContent [] read_plain_text -< () + arr (firstMatch . uncurry4 imageWith) -< + (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt) + +read_frame_title :: InlineMatcher +read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text) + +image_attributes :: Maybe T.Text -> Maybe T.Text -> Attr +image_attributes x y = + ( "", [], dim "width" x ++ dim "height" y) + where + dim _ (Just "") = [] + dim name (Just v) = [(name, v)] + dim _ Nothing = [] + +read_frame_mathml :: ODTReaderSafe XML.Element (FirstMatch Inlines) +read_frame_mathml = + proc obj -> do + src <- executeIn (findAttr' NsXLink "href") -< obj + case fold src of + "" -> returnV mempty -< () + src' -> do + let path = T.unpack $ + fromMaybe src' (T.stripPrefix "./" src') <> "/content.xml" + (_, mathml) <- lookupResource -< path + case readMathML (UTF8.toText $ B.toStrict mathml) of + Left _ -> returnV mempty -< () + Right exps -> arr (firstMatch . displayMath . writeTeX) -< exps + +read_frame_text_box :: ODTReaderSafe XML.Element (FirstMatch Inlines) +read_frame_text_box = proc box -> do + paragraphs <- executeIn (matchChildContent' [ read_paragraph ]) -< box + arr read_img_with_caption -< toList paragraphs + +read_img_with_caption :: [Block] -> FirstMatch Inlines +read_img_with_caption (Para [Image attr alt (src,title)] : _) = + firstMatch $ singleton (Image attr alt (src, "fig:" <> title)) -- no text, default caption +read_img_with_caption (Para (Image attr _ (src,title) : txt) : _) = + firstMatch $ singleton (Image attr txt (src, "fig:" <> title) ) -- override caption with the text that follows +read_img_with_caption ( Para (_ : xs) : ys) = + read_img_with_caption (Para xs : ys) +read_img_with_caption _ = + mempty + +---------------------- +-- Internal links +---------------------- + +_ANCHOR_PREFIX_ :: T.Text +_ANCHOR_PREFIX_ = "anchor" + +-- +readAnchorAttr :: ODTReader _x Anchor +readAnchorAttr = findAttrText NsText "name" + +-- | Beware: may fail +findAnchorName :: ODTReader AnchorPrefix Anchor +findAnchorName = ( keepingTheValue readAnchorAttr + >>^ spreadChoice + ) >>?! getPrettyAnchor + + +-- +maybeAddAnchorFrom :: ODTReader Inlines AnchorPrefix + -> ODTReaderSafe Inlines Inlines +maybeAddAnchorFrom anchorReader = + keepingTheValue (anchorReader >>? findAnchorName >>?^ toAnchorElem) + >>> + proc (inlines, fAnchorElem) -> do + case fAnchorElem of + Right anchorElem -> returnA -< anchorElem + Left _ -> returnA -< inlines + where + toAnchorElem :: Anchor -> Inlines + toAnchorElem anchorID = spanWith (anchorID, [], []) mempty + -- no classes, no key-value pairs + +-- +read_bookmark :: InlineMatcher +read_bookmark = matchingElement NsText "bookmark" + $ maybeAddAnchorFrom (liftAsSuccess $ returnV _ANCHOR_PREFIX_) + +-- +read_bookmark_start :: InlineMatcher +read_bookmark_start = matchingElement NsText "bookmark-start" + $ maybeAddAnchorFrom (liftAsSuccess $ returnV _ANCHOR_PREFIX_) + +-- +read_reference_start :: InlineMatcher +read_reference_start = matchingElement NsText "reference-mark-start" + $ maybeAddAnchorFrom readAnchorAttr + +-- | Beware: may fail +findAnchorRef :: ODTReader _x Anchor +findAnchorRef = ( findAttrText NsText "ref-name" + >>?^ (_ANCHOR_PREFIX_,) + ) >>?! getPrettyAnchor + + +-- +maybeInAnchorRef :: ODTReaderSafe Inlines Inlines +maybeInAnchorRef = proc inlines -> do + fRef <- findAnchorRef -< () + case fRef of + Right anchor -> + arr (toAnchorRef anchor) -<< inlines + Left _ -> returnA -< inlines + where + toAnchorRef :: Anchor -> Inlines -> Inlines + toAnchorRef anchor = link ("#" <> anchor) "" -- no title + +-- +read_bookmark_ref :: InlineMatcher +read_bookmark_ref = matchingElement NsText "bookmark-ref" + $ maybeInAnchorRef + <<< matchChildContent [] read_plain_text + +-- +read_reference_ref :: InlineMatcher +read_reference_ref = matchingElement NsText "reference-ref" + $ maybeInAnchorRef + <<< matchChildContent [] read_plain_text + + +---------------------- +-- Entry point +---------------------- + +read_text :: ODTReaderSafe _x Pandoc +read_text = matchChildContent' [ read_header + , read_paragraph + , read_list + , read_table + ] + >>^ doc + +post_process :: Pandoc -> Pandoc +post_process (Pandoc m blocks) = + Pandoc m (post_process' blocks) + +post_process' :: [Block] -> [Block] +post_process' (Table attr _ specs th tb tf : Div ("", ["caption"], _) blks : xs) + = Table attr (Caption Nothing blks) specs th tb tf : post_process' xs +post_process' bs = bs + +read_body :: ODTReader _x (Pandoc, MediaBag) +read_body = executeInSub NsOffice "body" + $ executeInSub NsOffice "text" + $ liftAsSuccess + $ proc inlines -> do + txt <- read_text -< inlines + state <- getExtraState -< () + returnA -< (post_process txt, getMediaBag state) diff --git a/src/Text/Pandoc/Readers/ODT/Generic/Fallible.hs b/src/Text/Pandoc/Readers/ODT/Generic/Fallible.hs new file mode 100644 index 000000000..c6f45ced1 --- /dev/null +++ b/src/Text/Pandoc/Readers/ODT/Generic/Fallible.hs @@ -0,0 +1,99 @@ +{- | + Module : Text.Pandoc.Readers.ODT.Generic.Fallible + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +Data types and utilities representing failure. Most of it is based on the +"Either" type in its usual configuration (left represents failure). + +In most cases, the failure type is implied or required to be a "Monoid". + +The choice of "Either" instead of a custom type makes it easier to write +compatible instances of "ArrowChoice". +-} + +-- We export everything +module Text.Pandoc.Readers.ODT.Generic.Fallible where + +-- | Default for now. Will probably become a class at some point. +type Failure = () + +type Fallible a = Either Failure a + + +-- +maybeToEither :: Maybe a -> Fallible a +maybeToEither (Just a) = Right a +maybeToEither Nothing = Left () + +-- +eitherToMaybe :: Either _l a -> Maybe a +eitherToMaybe (Left _) = Nothing +eitherToMaybe (Right a) = Just a + +-- | > recover a === either (const a) id +recover :: a -> Either _f a -> a +recover a (Left _) = a +recover _ (Right a) = a + +-- | I would love to use 'fail'. Alas, 'Monad.fail'... +failWith :: failure -> Either failure _x +failWith f = Left f + +-- +failEmpty :: (Monoid failure) => Either failure _x +failEmpty = failWith mempty + +-- +succeedWith :: a -> Either _x a +succeedWith = Right + +-- +collapseEither :: Either failure (Either failure x) + -> Either failure x +collapseEither (Left f ) = Left f +collapseEither (Right (Left f)) = Left f +collapseEither (Right (Right x)) = Right x + +-- | If either of the values represents a non-error, the result is a +-- (possibly combined) non-error. If both values represent an error, an error +-- is returned. +chooseMax :: (Monoid a, Monoid b) => Either a b -> Either a b -> Either a b +chooseMax = chooseMaxWith mappend + +-- | If either of the values represents a non-error, the result is a +-- (possibly combined) non-error. If both values represent an error, an error +-- is returned. +chooseMaxWith :: (Monoid a) => (b -> b -> b) + -> Either a b + -> Either a b + -> Either a b +chooseMaxWith (><) (Right a) (Right b) = Right $ a >< b +chooseMaxWith _ (Left a) (Left b) = Left $ a `mappend` b +chooseMaxWith _ (Right a) _ = Right a +chooseMaxWith _ _ (Right b) = Right b + + +-- | Class of containers that can escalate contained 'Either's. +-- The word "Vector" is meant in the sense of a disease transmitter. +class ChoiceVector v where + spreadChoice :: v (Either f a) -> Either f (v a) + +instance ChoiceVector ((,) a) where + spreadChoice (_, Left f) = Left f + spreadChoice (x, Right y) = Right (x,y) + -- Wasn't there a newtype somewhere with the elements flipped? + +-- | Wrapper for a list. While the normal list instance of 'ChoiceVector' +-- fails whenever it can, this type will never fail. +newtype SuccessList a = SuccessList { collectNonFailing :: [a] } + deriving ( Eq, Ord, Show ) + +instance ChoiceVector SuccessList where + spreadChoice = Right . SuccessList . foldr unTagRight [] . collectNonFailing + where unTagRight (Right x) = (x:) + unTagRight _ = id diff --git a/src/Text/Pandoc/Readers/ODT/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/ODT/Generic/Namespaces.hs new file mode 100644 index 000000000..d7310d2e5 --- /dev/null +++ b/src/Text/Pandoc/Readers/ODT/Generic/Namespaces.hs @@ -0,0 +1,45 @@ +{- | + Module : Text.Pandoc.Readers.ODT.Generic.Namespaces + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +A class containing a set of namespace identifiers. Used to convert between +typesafe Haskell namespace identifiers and unsafe "real world" namespaces. +-} + +module Text.Pandoc.Readers.ODT.Generic.Namespaces where + +import qualified Data.Map as M +import Data.Text (Text) + +-- +type NameSpaceIRI = Text + +-- +type NameSpaceIRIs nsID = M.Map nsID NameSpaceIRI + +-- +class (Eq nsID, Ord nsID) => NameSpaceID nsID where + + -- | Given a IRI, possibly update the map and return the id of the namespace. + -- May fail if the namespace is unknown and the application does not + -- allow unknown namespaces. + getNamespaceID :: NameSpaceIRI + -> NameSpaceIRIs nsID + -> Maybe (NameSpaceIRIs nsID, nsID) + -- | Given a namespace id, lookup its IRI. May be overridden for performance. + getIRI :: nsID + -> NameSpaceIRIs nsID + -> Maybe NameSpaceIRI + -- | The root element of an XML document has a namespace, too, and the + -- "XML.Light-parser" is eager to remove the corresponding namespace + -- attribute. + -- As a result, at least this root namespace must be provided. + getInitialIRImap :: NameSpaceIRIs nsID + + getIRI = M.lookup + getInitialIRImap = M.empty diff --git a/src/Text/Pandoc/Readers/ODT/Generic/SetMap.hs b/src/Text/Pandoc/Readers/ODT/Generic/SetMap.hs new file mode 100644 index 000000000..be586803b --- /dev/null +++ b/src/Text/Pandoc/Readers/ODT/Generic/SetMap.hs @@ -0,0 +1,30 @@ +{- | + Module : Text.Pandoc.Readers.ODT.Generic.SetMap + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +A map of values to sets of values. +-} + +module Text.Pandoc.Readers.ODT.Generic.SetMap where + +import qualified Data.Map as M +import qualified Data.Set as S + +type SetMap k v = M.Map k (S.Set v) + +empty :: SetMap k v +empty = M.empty + +fromList :: (Ord k, Ord v) => [(k,v)] -> SetMap k v +fromList = foldr (uncurry insert) empty + +insert :: (Ord k, Ord v) => k -> v -> SetMap k v -> SetMap k v +insert key value setMap = M.insertWith S.union key (S.singleton value) setMap + +union3 :: (Ord k) => SetMap k v -> SetMap k v -> SetMap k v -> SetMap k v +union3 sm1 sm2 sm3 = sm1 `M.union` sm2 `M.union` sm3 diff --git a/src/Text/Pandoc/Readers/ODT/Generic/Utils.hs b/src/Text/Pandoc/Readers/ODT/Generic/Utils.hs new file mode 100644 index 000000000..fe85ef6f2 --- /dev/null +++ b/src/Text/Pandoc/Readers/ODT/Generic/Utils.hs @@ -0,0 +1,116 @@ +{-# LANGUAGE ViewPatterns #-} +{- | + Module : Text.Pandoc.Reader.ODT.Generic.Utils + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +General utility functions for the odt reader. +-} + +module Text.Pandoc.Readers.ODT.Generic.Utils +( uncurry3 +, uncurry4 +, uncurry5 +, uncurry6 +, swap +, reverseComposition +, tryToRead +, Lookupable(..) +, readLookupable +, readPercent +, findBy +, swing +, composition +) where + +import Control.Category (Category, (<<<), (>>>)) +import qualified Control.Category as Cat (id) +import Data.Char (isSpace) +import qualified Data.Foldable as F (Foldable, foldr) +import Data.Maybe +import Data.Text (Text) +import qualified Data.Text as T + +-- | Equivalent to +-- > foldr (.) id +-- where '(.)' are 'id' are the ones from "Control.Category" +-- and 'foldr' is the one from "Data.Foldable". +-- The noun-form was chosen to be consistent with 'sum', 'product' etc +-- based on the discussion at +-- +-- (that I was not part of) +composition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a +composition = F.foldr (<<<) Cat.id + +-- | Equivalent to +-- > foldr (flip (.)) id +-- where '(.)' are 'id' are the ones from "Control.Category" +-- and 'foldr' is the one from "Data.Foldable". +-- A reversed version of 'composition'. +reverseComposition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a +reverseComposition = F.foldr (>>>) Cat.id + +-- | This function often makes it possible to switch values with the functions +-- that are applied to them. +-- +-- Examples: +-- > swing map :: [a -> b] -> a -> [b] +-- > swing any :: [a -> Bool] -> a -> Bool +-- > swing foldr :: b -> a -> [a -> b -> b] -> b +-- > swing scanr :: c -> a -> [a -> c -> c] -> c +-- > swing zipWith :: [a -> b -> c] -> a -> [b] -> [c] +-- > swing find :: [a -> Bool] -> a -> Maybe (a -> Bool) +-- +-- Stolen from +swing :: (((a -> b) -> b) -> c -> d) -> c -> a -> d +swing = flip.(.flip id) +-- swing f c a = f ($ a) c + + +-- | Alternative to 'read'/'reads'. The former of these throws errors +-- (nobody wants that) while the latter returns "to much" for simple purposes. +-- This function instead applies 'reads' and returns the first match (if any) +-- in a 'Maybe'. +tryToRead :: (Read r) => Text -> Maybe r +tryToRead = (reads . T.unpack) >>> listToMaybe >>> fmap fst + +-- | A version of 'reads' that requires a '%' sign after the number +readPercent :: ReadS Int +readPercent s = [ (i,s') | (i , r ) <- reads s + , ("%" , s') <- lex r + ] + +-- | Data that can be looked up. +-- This is mostly a utility to read data with kind *. +class Lookupable a where + lookupTable :: [(Text, a)] + +-- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer. +readLookupable :: (Lookupable a) => Text -> Maybe a +readLookupable s = + lookup (T.takeWhile (not . isSpace) $ T.dropWhile isSpace s) lookupTable + +uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z +uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z +uncurry5 :: (a->b->c->d->e -> z) -> (a,b,c,d,e ) -> z +uncurry6 :: (a->b->c->d->e->f -> z) -> (a,b,c,d,e,f ) -> z + +uncurry3 fun (a,b,c ) = fun a b c +uncurry4 fun (a,b,c,d ) = fun a b c d +uncurry5 fun (a,b,c,d,e ) = fun a b c d e +uncurry6 fun (a,b,c,d,e,f ) = fun a b c d e f + +swap :: (a,b) -> (b,a) +swap (a,b) = (b,a) + +-- | A version of "Data.List.find" that uses a converter to a Maybe instance. +-- The returned value is the first which the converter returns in a 'Just' +-- wrapper. +findBy :: (a -> Maybe b) -> [a] -> Maybe b +findBy _ [] = Nothing +findBy f ((f -> Just x):_ ) = Just x +findBy f ( _:xs) = findBy f xs diff --git a/src/Text/Pandoc/Readers/ODT/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/ODT/Generic/XMLConverter.hs new file mode 100644 index 000000000..fc0cd21e5 --- /dev/null +++ b/src/Text/Pandoc/Readers/ODT/Generic/XMLConverter.hs @@ -0,0 +1,775 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE PatternGuards #-} +{- | + Module : Text.Pandoc.Readers.ODT.Generic.XMLConverter + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +A generalized XML parser based on stateful arrows. +It might be sufficient to define this reader as a comonad, but there is +not a lot of use in trying. +-} + +module Text.Pandoc.Readers.ODT.Generic.XMLConverter +( ElementName +, XMLConverterState +, XMLConverter +, FallibleXMLConverter +, runConverter' +, getExtraState +, setExtraState +, modifyExtraState +, producingExtraState +, findChild' +, filterChildrenName' +, isSet' +, isSetWithDefault +, elName +, searchAttr +, lookupAttr +, lookupAttr' +, lookupDefaultingAttr +, findAttr' +, findAttrText' +, findAttr +, findAttrText +, findAttrTextWithDefault +, readAttr +, readAttr' +, readAttrWithDefault +, getAttr +, executeIn +, executeInSub +, withEveryL +, tryAll +, matchContent' +, matchContent +) where + +import Prelude hiding (Applicative(..)) +import Control.Applicative hiding ( liftA, liftA2 ) +import Control.Monad ( MonadPlus ) +import Control.Arrow + +import Data.Bool ( bool ) +import Data.Either ( rights ) +import qualified Data.Map as M +import Data.Text (Text) +import Data.Default +import Data.Maybe +import Data.List (foldl') + +import qualified Text.Pandoc.XML.Light as XML + +import Text.Pandoc.Readers.ODT.Arrows.State +import Text.Pandoc.Readers.ODT.Arrows.Utils +import Text.Pandoc.Readers.ODT.Generic.Namespaces +import Text.Pandoc.Readers.ODT.Generic.Utils +import Text.Pandoc.Readers.ODT.Generic.Fallible + +-------------------------------------------------------------------------------- +-- Basis types for readability +-------------------------------------------------------------------------------- + +-- +type ElementName = Text +type AttributeName = Text +type AttributeValue = Text +type TextAttributeValue = Text + +-- +type NameSpacePrefix = Text + +-- +type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix + +-------------------------------------------------------------------------------- +-- Main converter state +-------------------------------------------------------------------------------- + +-- GADT so some of the NameSpaceID restrictions can be deduced +data XMLConverterState nsID extraState where + XMLConverterState :: NameSpaceID nsID + => { -- | A stack of parent elements. The top element is the current one. + -- Arguably, a real Zipper would be better. But that is an + -- optimization that can be made at a later time, e.g. when + -- replacing Text.XML.Light. + parentElements :: [XML.Element] + -- | A map from internal namespace IDs to the namespace prefixes + -- used in XML elements + , namespacePrefixes :: NameSpacePrefixes nsID + -- | A map from internal namespace IDs to namespace IRIs + -- (Only necessary for matching namespace IDs and prefixes) + , namespaceIRIs :: NameSpaceIRIs nsID + -- | A place to put "something else". This feature is used heavily + -- to keep the main code cleaner. More specifically, the main reader + -- is divided into different stages. Each stage lifts something up + -- here, which the next stage can then use. This could of course be + -- generalized to a state-tree or used for the namespace IRIs. The + -- border between states and values is an imaginary one, after all. + -- But the separation as it is seems to be enough for now. + , moreState :: extraState + } + -> XMLConverterState nsID extraState + +-- +createStartState :: (NameSpaceID nsID) + => XML.Element + -> extraState + -> XMLConverterState nsID extraState +createStartState element extraState = + XMLConverterState + { parentElements = [element] + , namespacePrefixes = M.empty + , namespaceIRIs = getInitialIRImap + , moreState = extraState + } + +-- | Functor over extra state +instance Functor (XMLConverterState nsID) where + fmap f ( XMLConverterState parents prefixes iRIs extraState ) + = XMLConverterState parents prefixes iRIs (f extraState) + +-- +replaceExtraState :: extraState + -> XMLConverterState nsID _x + -> XMLConverterState nsID extraState +replaceExtraState x s + = fmap (const x) s + +-- +currentElement :: XMLConverterState nsID extraState + -> XML.Element +currentElement state = head (parentElements state) + +-- | Replace the current position by another, modifying the extra state +-- in the process +swapStack' :: XMLConverterState nsID extraState + -> [XML.Element] + -> ( XMLConverterState nsID extraState , [XML.Element] ) +swapStack' state stack + = ( state { parentElements = stack } + , parentElements state + ) + +-- +pushElement :: XML.Element + -> XMLConverterState nsID extraState + -> XMLConverterState nsID extraState +pushElement e state = state { parentElements = e:parentElements state } + +-- | Pop the top element from the call stack, unless it is the last one. +popElement :: XMLConverterState nsID extraState + -> Maybe (XMLConverterState nsID extraState) +popElement state + | _:es@(_:_) <- parentElements state = Just $ state { parentElements = es } + | otherwise = Nothing + +-------------------------------------------------------------------------------- +-- Main type +-------------------------------------------------------------------------------- + +-- It might be a good idea to pack the converters in a GADT +-- Downside: data instead of type +-- Upside: 'Failure' could be made a parameter as well. + +-- +type XMLConverter nsID extraState input output + = ArrowState (XMLConverterState nsID extraState ) input output + +type FallibleXMLConverter nsID extraState input output + = XMLConverter nsID extraState input (Fallible output) + +-- +runConverter :: XMLConverter nsID extraState input output + -> XMLConverterState nsID extraState + -> input + -> output +runConverter converter state input = snd $ runArrowState converter (state,input) + +runConverter' :: (NameSpaceID nsID) + => FallibleXMLConverter nsID extraState () success + -> extraState + -> XML.Element + -> Fallible success +runConverter' converter extraState element = runConverter (readNSattributes >>? converter) (createStartState element extraState) () + +-- +getCurrentElement :: XMLConverter nsID extraState x XML.Element +getCurrentElement = extractFromState currentElement + +-- +getExtraState :: XMLConverter nsID extraState x extraState +getExtraState = extractFromState moreState + +-- +setExtraState :: XMLConverter nsID extraState extraState extraState +setExtraState = withState $ \state extra + -> (replaceExtraState extra state , extra) + + +-- | Lifts a function to the extra state. +modifyExtraState :: (extraState -> extraState) + -> XMLConverter nsID extraState x x +modifyExtraState = modifyState.fmap + + +-- | First sets the extra state to the new value. Then modifies the original +-- extra state with a converter that uses the new state. Finally, the +-- intermediate state is dropped and the extra state is lifted into the +-- state as it was at the beginning of the function. +-- As a result, exactly the extra state and nothing else is changed. +-- The resulting converter even behaves like an identity converter on the +-- value level. +-- +-- (The -ing form is meant to be mnemonic in a sequence of arrows as in +-- convertingExtraState () converter >>> doOtherStuff) +-- +convertingExtraState :: extraState' + -> FallibleXMLConverter nsID extraState' extraState extraState + -> FallibleXMLConverter nsID extraState x x +convertingExtraState v a = withSubStateF setVAsExtraState modifyWithA + where + setVAsExtraState = liftAsSuccess $ extractFromState id >>^ replaceExtraState v + modifyWithA = keepingTheValue (moreState ^>> a) + >>^ spreadChoice >>?% flip replaceExtraState + +-- | First sets the extra state to the new value. Then produces a new +-- extra state with a converter that uses the new state. Finally, the +-- intermediate state is dropped and the extra state is lifted into the +-- state as it was at the beginning of the function. +-- As a result, exactly the extra state and nothing else is changed. +-- The resulting converter even behaves like an identity converter on the +-- value level. +-- +-- Equivalent to +-- +-- > \v x a -> convertingExtraState v (returnV x >>> a) +-- +-- (The -ing form is meant to be mnemonic in a sequence of arrows as in +-- producingExtraState () () producer >>> doOtherStuff) +-- +producingExtraState :: extraState' + -> a + -> FallibleXMLConverter nsID extraState' a extraState + -> FallibleXMLConverter nsID extraState x x +producingExtraState v x a = convertingExtraState v (returnV x >>> a) + + +-------------------------------------------------------------------------------- +-- Work in namespaces +-------------------------------------------------------------------------------- + +-- | Arrow version of 'getIRI' +lookupNSiri :: (NameSpaceID nsID) + => nsID + -> XMLConverter nsID extraState x (Maybe NameSpaceIRI) +lookupNSiri nsID = extractFromState + $ \state -> getIRI nsID $ namespaceIRIs state + +-- +lookupNSprefix :: (NameSpaceID nsID) + => nsID + -> XMLConverter nsID extraState x (Maybe NameSpacePrefix) +lookupNSprefix nsID = extractFromState + $ \state -> M.lookup nsID $ namespacePrefixes state + +-- | Extracts namespace attributes from the current element and tries to +-- update the current mapping accordingly +readNSattributes :: (NameSpaceID nsID) + => FallibleXMLConverter nsID extraState x () +readNSattributes = fromState $ \state -> maybe (state, failEmpty ) + ( , succeedWith ()) + (extractNSAttrs state ) + where + extractNSAttrs :: (NameSpaceID nsID) + => XMLConverterState nsID extraState + -> Maybe (XMLConverterState nsID extraState) + extractNSAttrs startState + = foldl' (\state d -> state >>= addNS d) + (Just startState) + nsAttribs + where nsAttribs = mapMaybe readNSattr (XML.elAttribs element) + element = currentElement startState + readNSattr (XML.Attr (XML.QName name _ (Just "xmlns")) iri) + = Just (name, iri) + readNSattr _ = Nothing + addNS (prefix, iri) state = fmap updateState + $ getNamespaceID iri + $ namespaceIRIs state + where updateState (iris,nsID) + = state { namespaceIRIs = iris + , namespacePrefixes = M.insert nsID prefix + $ namespacePrefixes state + } + +-------------------------------------------------------------------------------- +-- Common namespace accessors +-------------------------------------------------------------------------------- + +-- | Given a namespace id and an element name, creates a 'XML.QName' for +-- internal use +qualifyName :: (NameSpaceID nsID) + => nsID -> ElementName + -> XMLConverter nsID extraState x XML.QName +qualifyName nsID name = lookupNSiri nsID + &&& lookupNSprefix nsID + >>% XML.QName name + +-- | Checks if a given element matches both a specified namespace id +-- and a predicate +elemNameMatches :: (NameSpaceID nsID) + => nsID -> (ElementName -> Bool) + -> XMLConverter nsID extraState XML.Element Bool +elemNameMatches nsID f = keepingTheValue (lookupNSiri nsID) >>% hasMatchingName + where hasMatchingName e iri = let name = XML.elName e + in f (XML.qName name) + && XML.qURI name == iri + +-- | Checks if a given element matches both a specified namespace id +-- and a specified element name +elemNameIs :: (NameSpaceID nsID) + => nsID -> ElementName + -> XMLConverter nsID extraState XML.Element Bool +elemNameIs nsID name = elemNameMatches nsID (== name) + +-------------------------------------------------------------------------------- +-- General content +-------------------------------------------------------------------------------- + +elName :: XML.Element -> ElementName +elName = XML.qName . XML.elName + +-- +elContent :: XMLConverter nsID extraState x [XML.Content] +elContent = getCurrentElement + >>^ XML.elContent + +-------------------------------------------------------------------------------- +-- Children +-------------------------------------------------------------------------------- + +-- +-- +findChildren :: (NameSpaceID nsID) + => nsID -> ElementName + -> XMLConverter nsID extraState x [XML.Element] +findChildren nsID name = qualifyName nsID name + &&& getCurrentElement + >>% XML.findChildren + +-- +findChild' :: (NameSpaceID nsID) + => nsID + -> ElementName + -> XMLConverter nsID extraState x (Maybe XML.Element) +findChild' nsID name = qualifyName nsID name + &&& getCurrentElement + >>% XML.findChild + +-- +findChild :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState x XML.Element +findChild nsID name = findChild' nsID name + >>> maybeToChoice + +filterChildrenName' :: (NameSpaceID nsID) + => nsID + -> (ElementName -> Bool) + -> XMLConverter nsID extraState x [XML.Element] +filterChildrenName' nsID f = getCurrentElement + >>> arr XML.elChildren + >>> iterateS (keepingTheValue (elemNameMatches nsID f)) + >>> arr (map fst . filter snd) + +-------------------------------------------------------------------------------- +-- Attributes +-------------------------------------------------------------------------------- + +-- +isSet' :: (NameSpaceID nsID) + => nsID -> AttributeName + -> XMLConverter nsID extraState x (Maybe Bool) +isSet' nsID attrName = findAttr' nsID attrName + >>^ (>>= stringToBool') + +isSetWithDefault :: (NameSpaceID nsID) + => nsID -> AttributeName + -> Bool + -> XMLConverter nsID extraState x Bool +isSetWithDefault nsID attrName def' + = isSet' nsID attrName + >>^ fromMaybe def' + +-- | Lookup value in a dictionary, fail if no attribute found or value +-- not in dictionary +searchAttrIn :: (NameSpaceID nsID) + => nsID -> AttributeName + -> [(AttributeValue,a)] + -> FallibleXMLConverter nsID extraState x a +searchAttrIn nsID attrName dict + = findAttr nsID attrName + >>?^? maybeToChoice.(`lookup` dict ) + +-- | Lookup value in a dictionary. If attribute or value not found, +-- return default value +searchAttr :: (NameSpaceID nsID) + => nsID -> AttributeName + -> a + -> [(AttributeValue,a)] + -> XMLConverter nsID extraState x a +searchAttr nsID attrName defV dict + = searchAttrIn nsID attrName dict + >>> const defV ^|||^ id + +-- | Read a 'Lookupable' attribute. Fail if no match. +lookupAttr :: (NameSpaceID nsID, Lookupable a) + => nsID -> AttributeName + -> FallibleXMLConverter nsID extraState x a +lookupAttr nsID attrName = lookupAttr' nsID attrName + >>^ maybeToChoice + + +-- | Read a 'Lookupable' attribute. Return the result as a 'Maybe'. +lookupAttr' :: (NameSpaceID nsID, Lookupable a) + => nsID -> AttributeName + -> XMLConverter nsID extraState x (Maybe a) +lookupAttr' nsID attrName + = findAttr' nsID attrName + >>^ (>>= readLookupable) + +-- | Read a 'Lookupable' attribute with explicit default +lookupAttrWithDefault :: (NameSpaceID nsID, Lookupable a) + => nsID -> AttributeName + -> a + -> XMLConverter nsID extraState x a +lookupAttrWithDefault nsID attrName deflt + = lookupAttr' nsID attrName + >>^ fromMaybe deflt + +-- | Read a 'Lookupable' attribute with implicit default +lookupDefaultingAttr :: (NameSpaceID nsID, Lookupable a, Default a) + => nsID -> AttributeName + -> XMLConverter nsID extraState x a +lookupDefaultingAttr nsID attrName + = lookupAttrWithDefault nsID attrName def + +-- | Return value as a (Maybe Text) +findAttr' :: (NameSpaceID nsID) + => nsID -> AttributeName + -> XMLConverter nsID extraState x (Maybe AttributeValue) +findAttr' nsID attrName = qualifyName nsID attrName + &&& getCurrentElement + >>% XML.findAttr + +-- | Return value as a (Maybe Text) +findAttrText' :: (NameSpaceID nsID) + => nsID -> AttributeName + -> XMLConverter nsID extraState x (Maybe TextAttributeValue) +findAttrText' nsID attrName + = qualifyName nsID attrName + &&& getCurrentElement + >>% XML.findAttr + +-- | Return value as string or fail +findAttr :: (NameSpaceID nsID) + => nsID -> AttributeName + -> FallibleXMLConverter nsID extraState x AttributeValue +findAttr nsID attrName = findAttr' nsID attrName + >>> maybeToChoice + +-- | Return value as text or fail +findAttrText :: (NameSpaceID nsID) + => nsID -> AttributeName + -> FallibleXMLConverter nsID extraState x TextAttributeValue +findAttrText nsID attrName + = findAttr' nsID attrName + >>> maybeToChoice + +-- | Return value as string or return provided default value +findAttrTextWithDefault :: (NameSpaceID nsID) + => nsID -> AttributeName + -> TextAttributeValue + -> XMLConverter nsID extraState x TextAttributeValue +findAttrTextWithDefault nsID attrName deflt + = findAttr' nsID attrName + >>^ fromMaybe deflt + +-- | Read and return value or fail +readAttr :: (NameSpaceID nsID, Read attrValue) + => nsID -> AttributeName + -> FallibleXMLConverter nsID extraState x attrValue +readAttr nsID attrName = readAttr' nsID attrName + >>> maybeToChoice + +-- | Read and return value or return Nothing +readAttr' :: (NameSpaceID nsID, Read attrValue) + => nsID -> AttributeName + -> XMLConverter nsID extraState x (Maybe attrValue) +readAttr' nsID attrName = findAttr' nsID attrName + >>^ (>>= tryToRead) + +-- | Read and return value or return provided default value +readAttrWithDefault :: (NameSpaceID nsID, Read attrValue) + => nsID -> AttributeName + -> attrValue + -> XMLConverter nsID extraState x attrValue +readAttrWithDefault nsID attrName deflt + = findAttr' nsID attrName + >>^ (>>= tryToRead) + >>^ fromMaybe deflt + +-- | Read and return value or return default value from 'Default' instance +getAttr :: (NameSpaceID nsID, Read attrValue, Default attrValue) + => nsID -> AttributeName + -> XMLConverter nsID extraState x attrValue +getAttr nsID attrName = readAttrWithDefault nsID attrName def + +-------------------------------------------------------------------------------- +-- Movements +-------------------------------------------------------------------------------- + +-- +jumpThere :: XMLConverter nsID extraState XML.Element XML.Element +jumpThere = withState (\state element + -> ( pushElement element state , element ) + ) + +-- +swapStack :: XMLConverter nsID extraState [XML.Element] [XML.Element] +swapStack = withState swapStack' + +-- +jumpBack :: FallibleXMLConverter nsID extraState _x _x +jumpBack = tryModifyState (popElement >>> maybeToChoice) + +-- | Support function for "procedural" converters: jump to an element, execute +-- a converter, jump back. +-- This version is safer than 'executeThere', because it does not rely on the +-- internal stack. As a result, the converter can not move around in arbitrary +-- ways. The downside is of course that some of the environment is not +-- accessible to the converter. +switchingTheStack :: XMLConverter nsID moreState a b + -> XMLConverter nsID moreState (a, XML.Element) b +switchingTheStack a = second ( (:[]) ^>> swapStack ) + >>> first a + >>> second swapStack + >>^ fst + +-- | Support function for "procedural" converters: jumps to an element, executes +-- a converter, jumps back. +-- Make sure that the converter is well-behaved; that is it should +-- return to the exact position it started from in /every possible path/ of +-- execution, even if it "fails". If it does not, you may encounter +-- strange bugs. If you are not sure about the behaviour or want to use +-- shortcuts, you can often use 'switchingTheStack' instead. +executeThere :: FallibleXMLConverter nsID moreState a b + -> FallibleXMLConverter nsID moreState (a, XML.Element) b +executeThere a = second jumpThere + >>> fst + ^>> a + >>> jumpBack -- >>? jumpBack would not ensure the jump. + >>^ collapseEither + + +-- | Do something in a specific element, then come back +executeIn :: XMLConverter nsID extraState XML.Element s + -> XMLConverter nsID extraState XML.Element s +executeIn a = duplicate >>> switchingTheStack a + +-- | Do something in a sub-element, then come back +executeInSub :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState f s + -> FallibleXMLConverter nsID extraState f s +executeInSub nsID name a = keepingTheValue + (findChild nsID name) + >>> ignoringState liftFailure + >>? switchingTheStack a + where liftFailure (_, Left f) = Left f + liftFailure (x, Right e) = Right (x, e) + +-------------------------------------------------------------------------------- +-- Iterating over children +-------------------------------------------------------------------------------- + +-- Helper converter to prepare different types of iterations. +-- It lifts the children (of a certain type) of the current element +-- into the value level and pairs each one with the current input value. +prepareIteration :: (NameSpaceID nsID) + => nsID -> ElementName + -> XMLConverter nsID extraState b [(b, XML.Element)] +prepareIteration nsID name = keepingTheValue + (findChildren nsID name) + >>% distributeValue + +-- +withEveryL :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState a b + -> FallibleXMLConverter nsID extraState a [b] +withEveryL = withEvery + +-- | Applies a converter to every child element of a specific type. +-- Collects results in a 'MonadPlus'. +-- Fails completely if any conversion fails. +withEvery :: (NameSpaceID nsID, MonadPlus m) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState a b + -> FallibleXMLConverter nsID extraState a (m b) +withEvery nsID name a = prepareIteration nsID name + >>> iterateS' (switchingTheStack a) + +-- | Applies a converter to every child element of a specific type. +-- Collects all successful results in a list. +tryAll :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState b a + -> XMLConverter nsID extraState b [a] +tryAll nsID name a = prepareIteration nsID name + >>> iterateS (switchingTheStack a) + >>^ rights + +-------------------------------------------------------------------------------- +-- Matching children +-------------------------------------------------------------------------------- + +type IdXMLConverter nsID moreState x + = XMLConverter nsID moreState x x + +type MaybeCConverter nsID moreState x + = Maybe (IdXMLConverter nsID moreState (x, XML.Content)) + +-- Chainable converter that helps deciding which converter to actually use. +type ContentMatchConverter nsID extraState x + = IdXMLConverter nsID + extraState + (MaybeCConverter nsID extraState x, XML.Content) + +-- Helper function: The @c@ is actually a converter that is to be selected by +-- matching XML content to the first two parameters. +-- The fold used to match elements however is very simple, so to use it, +-- this function wraps the converter in another converter that unifies +-- the accumulator. Think of a lot of converters with the resulting type +-- chained together. The accumulator not only transports the element +-- unchanged to the next matcher, it also does the actual selecting by +-- combining the intermediate results with '(<|>)'. +makeMatcherC :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState a a + -> ContentMatchConverter nsID extraState a +makeMatcherC nsID name c = ( second ( contentToElem + >>> returnV Nothing + ||| ( elemNameIs nsID name + >>^ bool Nothing (Just cWithJump) + ) + ) + >>% (<|>) + ) &&&^ snd + where cWithJump = ( fst + ^&&& ( second contentToElem + >>> spreadChoice + ^>>? executeThere c + ) + >>% recover) + &&&^ snd + contentToElem :: FallibleXMLConverter nsID extraState XML.Content XML.Element + contentToElem = arr $ \case + XML.Elem e' -> succeedWith e' + _ -> failEmpty + +-- Creates and chains a bunch of matchers +prepareMatchersC :: (NameSpaceID nsID) + => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)] + -> ContentMatchConverter nsID extraState x +--prepareMatchersC = foldSs . (map $ uncurry3 makeMatcherC) +prepareMatchersC = reverseComposition . map (uncurry3 makeMatcherC) + +-- | Takes a list of element-data - converter groups and +-- * Finds all content of the current element +-- * Matches each group to each piece of content in order +-- (at most one group per piece of content) +-- * Filters non-matched content +-- * Chains all found converters in content-order +-- * Applies the chain to the input element +matchContent' :: (NameSpaceID nsID) + => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] + -> XMLConverter nsID extraState a a +matchContent' lookups = matchContent lookups (arr fst) + +-- | Takes a list of element-data - converter groups and +-- * Finds all content of the current element +-- * Matches each group to each piece of content in order +-- (at most one group per piece of content) +-- * Adds a default converter for all non-matched content +-- * Chains all found converters in content-order +-- * Applies the chain to the input element +matchContent :: (NameSpaceID nsID) + => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] + -> XMLConverter nsID extraState (a,XML.Content) a + -> XMLConverter nsID extraState a a +matchContent lookups fallback + = let matcher = prepareMatchersC lookups + in keepingTheValue ( + elContent + >>> map (Nothing,) + ^>> iterateSL matcher + >>^ map swallowOrFallback + -- >>> foldSs + >>> reverseComposition + ) + >>> swap + ^>> app + where + -- let the converter swallow the content and drop the content + -- in the return value + swallowOrFallback (Just converter,content) = (,content) ^>> converter >>^ fst + swallowOrFallback (Nothing ,content) = (,content) ^>> fallback + +-------------------------------------------------------------------------------- +-- Internals +-------------------------------------------------------------------------------- + +stringToBool' :: Text -> Maybe Bool +stringToBool' val | val `elem` trueValues = Just True + | val `elem` falseValues = Just False + | otherwise = Nothing + where trueValues = ["true" ,"on" ,"1"] + falseValues = ["false","off","0"] + + +distributeValue :: a -> [b] -> [(a,b)] +distributeValue = map.(,) + +-------------------------------------------------------------------------------- + +{- +NOTES +It might be a good idea to refactor the namespace stuff. +E.g.: if a namespace constructor took a string as a parameter, things like +> a ?>/< (NsText,"body") +would be nicer. +Together with a rename and some trickery, something like +> |< NsText "body" >< NsText "p" ?> a | +might even be possible. + +Some day, XML.Light should be replaced by something better. +While doing that, it might be useful to replace String as the type of element +names with something else, too. (Of course with OverloadedStrings). +While doing that, maybe the types can be created in a way that something like +> NsText:"body" +could be used. Overloading (:) does not sounds like the best idea, but if the +element name type was a list, this might be possible. +Of course that would be a bit hackish, so the "right" way would probably be +something like +> InNS NsText "body" +but isn't that a bit boring? ;) +-} diff --git a/src/Text/Pandoc/Readers/ODT/Namespaces.hs b/src/Text/Pandoc/Readers/ODT/Namespaces.hs new file mode 100644 index 000000000..77ca21165 --- /dev/null +++ b/src/Text/Pandoc/Readers/ODT/Namespaces.hs @@ -0,0 +1,93 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Reader.ODT.Namespaces + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +Namespaces used in odt files. +-} + +module Text.Pandoc.Readers.ODT.Namespaces ( Namespace (..) + ) where + +import qualified Data.Map as M (empty, insert) +import Data.Maybe (fromMaybe, listToMaybe) +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Readers.ODT.Generic.Namespaces + + +instance NameSpaceID Namespace where + + getInitialIRImap = nsIDmap + + getNamespaceID "" m = Just(m, NsXML) + getNamespaceID iri m = asPair $ fromMaybe (NsOther iri) (findID iri) + where asPair nsID = Just (M.insert nsID iri m, nsID) + + +findID :: NameSpaceIRI -> Maybe Namespace +findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `T.isPrefixOf` iri] + +nsIDmap :: NameSpaceIRIs Namespace +nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs + +data Namespace = -- Open Document core + NsOffice | NsStyle | NsText | NsTable | NsForm + | NsDraw | Ns3D | NsAnim | NsChart | NsConfig + | NsDB | NsMeta | NsNumber | NsScript | NsManifest + | NsPresentation + -- Metadata + | NsODF + -- Compatible elements + | NsXSL_FO | NsSVG | NsSmil + -- External standards + | NsMathML | NsXForms | NsXLink | NsXHtml | NsGRDDL + | NsDublinCore + -- Metadata manifest + | NsPKG + -- Others + | NsOpenFormula + -- Core XML (basically only for the 'id'-attribute) + | NsXML + -- Fallback + | NsOther Text + deriving ( Eq, Ord, Show ) + +-- | Not the actual iri's, but large prefixes of them - this way there are +-- less versioning problems and the like. +nsIDs :: [(Text, Namespace)] +nsIDs = [ + ("urn:oasis:names:tc:opendocument:xmlns:animation" , NsAnim ), + ("urn:oasis:names:tc:opendocument:xmlns:chart" , NsChart ), + ("urn:oasis:names:tc:opendocument:xmlns:config" , NsConfig ), + ("urn:oasis:names:tc:opendocument:xmlns:database" , NsDB ), + ("urn:oasis:names:tc:opendocument:xmlns:dr3d" , Ns3D ), + ("urn:oasis:names:tc:opendocument:xmlns:drawing" , NsDraw ), + ("urn:oasis:names:tc:opendocument:xmlns:form" , NsForm ), + ("urn:oasis:names:tc:opendocument:xmlns:manifest" , NsManifest ), + ("urn:oasis:names:tc:opendocument:xmlns:meta" , NsMeta ), + ("urn:oasis:names:tc:opendocument:xmlns:datastyle" , NsNumber ), + ("urn:oasis:names:tc:opendocument:xmlns:of" , NsOpenFormula ), + ("urn:oasis:names:tc:opendocument:xmlns:office:1.0" , NsOffice ), + ("urn:oasis:names:tc:opendocument:xmlns:presentation" , NsPresentation ), + ("urn:oasis:names:tc:opendocument:xmlns:script" , NsScript ), + ("urn:oasis:names:tc:opendocument:xmlns:style" , NsStyle ), + ("urn:oasis:names:tc:opendocument:xmlns:table" , NsTable ), + ("urn:oasis:names:tc:opendocument:xmlns:text" , NsText ), + ("urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible", NsXSL_FO ), + ("urn:oasis:names:tc:opendocument:xmlns:smil-compatible" , NsSmil ), + ("urn:oasis:names:tc:opendocument:xmlns:svg-compatible" , NsSVG ), + ("http://docs.oasis-open.org/ns/office/1.2/meta/odf" , NsODF ), + ("http://docs.oasis-open.org/ns/office/1.2/meta/pkg" , NsPKG ), + ("http://purl.org/dc/elements" , NsDublinCore ), + ("http://www.w3.org/2003/g/data-view" , NsGRDDL ), + ("http://www.w3.org/1998/Math/MathML" , NsMathML ), + ("http://www.w3.org/1999/xhtml" , NsXHtml ), + ("http://www.w3.org/2002/xforms" , NsXForms ), + ("http://www.w3.org/1999/xlink" , NsXLink ) + ] diff --git a/src/Text/Pandoc/Readers/ODT/StyleReader.hs b/src/Text/Pandoc/Readers/ODT/StyleReader.hs new file mode 100644 index 000000000..dadd37dcc --- /dev/null +++ b/src/Text/Pandoc/Readers/ODT/StyleReader.hs @@ -0,0 +1,640 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE Arrows #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Readers.ODT.StyleReader + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +Reader for the style information in an odt document. +-} + +module Text.Pandoc.Readers.ODT.StyleReader +( Style (..) +, StyleName +, StyleFamily (..) +, Styles (..) +, StyleProperties (..) +, TextProperties (..) +, ParaProperties (..) +, VerticalTextPosition (..) +, ListItemNumberFormat (..) +, ListLevel +, ListStyle (..) +, ListLevelStyle (..) +, ListLevelType (..) +, LengthOrPercent (..) +, lookupStyle +, getListLevelStyle +, getStyleFamily +, lookupDefaultStyle' +, lookupListStyleByName +, extendedStylePropertyChain +, readStylesAt +) where + +import Prelude hiding (Applicative(..)) +import Control.Applicative hiding (liftA, liftA2, liftA3) +import Control.Arrow + +import Data.Default +import qualified Data.Foldable as F +import Data.List (unfoldr, foldl') +import qualified Data.Map as M +import Data.Maybe +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Set as S + +import qualified Text.Pandoc.XML.Light as XML + +import Text.Pandoc.Shared (safeRead, tshow) + +import Text.Pandoc.Readers.ODT.Arrows.Utils + +import Text.Pandoc.Readers.ODT.Generic.Fallible +import qualified Text.Pandoc.Readers.ODT.Generic.SetMap as SM +import Text.Pandoc.Readers.ODT.Generic.Utils +import Text.Pandoc.Readers.ODT.Generic.XMLConverter + +import Text.Pandoc.Readers.ODT.Base +import Text.Pandoc.Readers.ODT.Namespaces + +readStylesAt :: XML.Element -> Fallible Styles +readStylesAt e = runConverter' readAllStyles mempty e + +-------------------------------------------------------------------------------- +-- Reader for font declarations and font pitches +-------------------------------------------------------------------------------- + +-- Pandoc has no support for different font pitches. Yet knowing them can be +-- very helpful in cases where Pandoc has more semantics than OpenDocument. +-- In these cases, the pitch can help deciding as what to define a block of +-- text. So let's start with a type for font pitches: + +data FontPitch = PitchVariable | PitchFixed + deriving ( Eq, Show ) + +instance Lookupable FontPitch where + lookupTable = [ ("variable" , PitchVariable) + , ("fixed" , PitchFixed ) + ] + +instance Default FontPitch where + def = PitchVariable + +-- The font pitch can be specified in a style directly. Normally, however, +-- it is defined in the font. That is also the specs' recommendation. +-- +-- Thus, we want + +type FontFaceName = Text + +type FontPitches = M.Map FontFaceName FontPitch + +-- To get there, the fonts have to be read and the pitches extracted. +-- But the resulting map are only needed at one later place, so it should not be +-- transported on the value level, especially as we already use a state arrow. +-- So instead, the resulting map is lifted into the state of the reader. +-- (An alternative might be ImplicitParams, but again, we already have a state.) +-- +-- So the main style readers will have the types +type StyleReader a b = XMLReader FontPitches a b +-- and +type StyleReaderSafe a b = XMLReaderSafe FontPitches a b +-- respectively. +-- +-- But before we can work with these, we need to define the reader that reads +-- the fonts: + +-- | A reader for font pitches +fontPitchReader :: XMLReader _s _x FontPitches +fontPitchReader = executeInSub NsOffice "font-face-decls" ( + withEveryL NsStyle "font-face" (liftAsSuccess ( + findAttr' NsStyle "name" + &&& + lookupDefaultingAttr NsStyle "font-pitch" + )) + >>?^ ( M.fromList . foldl' accumLegalPitches [] ) + ) `ifFailedDo` returnV (Right M.empty) + where accumLegalPitches ls (Nothing,_) = ls + accumLegalPitches ls (Just n,p) = (n,p):ls + + +-- | A wrapper around the font pitch reader that lifts the result into the +-- state. +readFontPitches :: StyleReader x x +readFontPitches = producingExtraState () () fontPitchReader + + +-- | Looking up a pitch in the state of the arrow. +-- +-- The function does the following: +-- * Look for the font pitch in an attribute. +-- * If that fails, look for the font name, look up the font in the state +-- and use the pitch from there. +-- * Return the result in a Maybe +-- +findPitch :: XMLReaderSafe FontPitches _x (Maybe FontPitch) +findPitch = ( lookupAttr NsStyle "font-pitch" + `ifFailedDo` findAttr NsStyle "font-name" + >>? ( keepingTheValue getExtraState + >>% M.lookup + >>^ maybeToChoice + ) + ) + >>> choiceToMaybe + +-------------------------------------------------------------------------------- +-- Definitions of main data +-------------------------------------------------------------------------------- + +type StyleName = Text + +-- | There are two types of styles: named styles with a style family and an +-- optional style parent, and default styles for each style family, +-- defining default style properties +data Styles = Styles + { stylesByName :: M.Map StyleName Style + , listStylesByName :: M.Map StyleName ListStyle + , defaultStyleMap :: M.Map StyleFamily StyleProperties + } + deriving ( Show ) + +-- Styles from a monoid under union +instance Semigroup Styles where + (Styles sBn1 dSm1 lsBn1) <> (Styles sBn2 dSm2 lsBn2) + = Styles (M.union sBn1 sBn2) + (M.union dSm1 dSm2) + (M.union lsBn1 lsBn2) +instance Monoid Styles where + mempty = Styles M.empty M.empty M.empty + mappend = (<>) + +-- Not all families from the specifications are implemented, only those we need. +-- But there are none that are not mentioned here. +data StyleFamily = FaText | FaParagraph +-- | FaTable | FaTableCell | FaTableColumn | FaTableRow +-- | FaGraphic | FaDrawing | FaChart +-- | FaPresentation +-- | FaRuby + deriving ( Eq, Ord, Show ) + +instance Lookupable StyleFamily where + lookupTable = [ ( "text" , FaText ) + , ( "paragraph" , FaParagraph ) +-- , ( "table" , FaTable ) +-- , ( "table-cell" , FaTableCell ) +-- , ( "table-column" , FaTableColumn ) +-- , ( "table-row" , FaTableRow ) +-- , ( "graphic" , FaGraphic ) +-- , ( "drawing-page" , FaDrawing ) +-- , ( "chart" , FaChart ) +-- , ( "presentation" , FaPresentation ) +-- , ( "ruby" , FaRuby ) + ] + +-- | A named style +data Style = Style { styleFamily :: Maybe StyleFamily + , styleParentName :: Maybe StyleName + , listStyle :: Maybe StyleName + , styleProperties :: StyleProperties + } + deriving ( Eq, Show ) + +data StyleProperties = SProps { textProperties :: Maybe TextProperties + , paraProperties :: Maybe ParaProperties +-- , tableColProperties :: Maybe TColProperties +-- , tableRowProperties :: Maybe TRowProperties +-- , tableCellProperties :: Maybe TCellProperties +-- , tableProperties :: Maybe TableProperties +-- , graphicProperties :: Maybe GraphProperties + } + deriving ( Eq, Show ) + +instance Default StyleProperties where + def = SProps { textProperties = Just def + , paraProperties = Just def + } + +data TextProperties = PropT { isEmphasised :: Bool + , isStrong :: Bool + , pitch :: Maybe FontPitch + , verticalPosition :: VerticalTextPosition + , underline :: Maybe UnderlineMode + , strikethrough :: Maybe UnderlineMode + } + deriving ( Eq, Show ) + +instance Default TextProperties where + def = PropT { isEmphasised = False + , isStrong = False + , pitch = Just def + , verticalPosition = def + , underline = Nothing + , strikethrough = Nothing + } + +data ParaProperties = PropP { paraNumbering :: ParaNumbering + , indentation :: LengthOrPercent + , margin_left :: LengthOrPercent + } + deriving ( Eq, Show ) + +instance Default ParaProperties where + def = PropP { paraNumbering = NumberingNone + , indentation = def + , margin_left = def + } + +---- +-- All the little data types that make up the properties +---- + +data VerticalTextPosition = VPosNormal | VPosSuper | VPosSub + deriving ( Eq, Show ) + +instance Default VerticalTextPosition where + def = VPosNormal + +instance Read VerticalTextPosition where + readsPrec _ s = [ (VPosSub , s') | ("sub" , s') <- lexS ] + ++ [ (VPosSuper , s') | ("super" , s') <- lexS ] + ++ [ (signumToVPos n , s') | ( n , s') <- readPercent s ] + where + lexS = lex s + signumToVPos n | n < 0 = VPosSub + | n > 0 = VPosSuper + | otherwise = VPosNormal + +data UnderlineMode = UnderlineModeNormal | UnderlineModeSkipWhitespace + deriving ( Eq, Show ) + +instance Lookupable UnderlineMode where + lookupTable = [ ( "continuous" , UnderlineModeNormal ) + , ( "skip-white-space" , UnderlineModeSkipWhitespace ) + ] + + +data ParaNumbering = NumberingNone | NumberingKeep | NumberingRestart Int + deriving ( Eq, Show ) + +data LengthOrPercent = LengthValueMM Int | PercentValue Int + deriving ( Eq, Show ) + +instance Default LengthOrPercent where + def = LengthValueMM 0 + +instance Read LengthOrPercent where + readsPrec _ s = + [ (PercentValue percent , s' ) | (percent , s' ) <- readPercent s] + ++ [ (LengthValueMM lengthMM , s'') | (length' , s' ) <- reads s + , (unit , s'') <- reads s' + , let lengthMM = estimateInMillimeter + length' unit + ] + +data XslUnit = XslUnitMM | XslUnitCM + | XslUnitInch + | XslUnitPoints | XslUnitPica + | XslUnitPixel + | XslUnitEM + +instance Show XslUnit where + show XslUnitMM = "mm" + show XslUnitCM = "cm" + show XslUnitInch = "in" + show XslUnitPoints = "pt" + show XslUnitPica = "pc" + show XslUnitPixel = "px" + show XslUnitEM = "em" + +instance Read XslUnit where + readsPrec _ "mm" = [(XslUnitMM , "")] + readsPrec _ "cm" = [(XslUnitCM , "")] + readsPrec _ "in" = [(XslUnitInch , "")] + readsPrec _ "pt" = [(XslUnitPoints , "")] + readsPrec _ "pc" = [(XslUnitPica , "")] + readsPrec _ "px" = [(XslUnitPixel , "")] + readsPrec _ "em" = [(XslUnitEM , "")] + readsPrec _ _ = [] + +-- | Rough conversion of measures into millimetres. +-- Pixels and em's are actually implementation dependent/relative measures, +-- so I could not really easily calculate anything exact here even if I wanted. +-- But I do not care about exactness right now, as I only use measures +-- to determine if a paragraph is "indented" or not. +estimateInMillimeter :: Int -> XslUnit -> Int +estimateInMillimeter n XslUnitMM = n +estimateInMillimeter n XslUnitCM = n * 10 +estimateInMillimeter n XslUnitInch = n * 25 -- \* 25.4 +estimateInMillimeter n XslUnitPoints = n `div` 3 -- \* 1/72 * 25.4 +estimateInMillimeter n XslUnitPica = n * 4 -- \* 12 * 1/72 * 25.4 +estimateInMillimeter n XslUnitPixel = n `div`3 -- \* 1/72 * 25.4 +estimateInMillimeter n XslUnitEM = n * 7 -- \* 16 * 1/72 * 25.4 + + +---- +-- List styles +---- + +type ListLevel = Int + +newtype ListStyle = ListStyle { levelStyles :: M.Map ListLevel ListLevelStyle + } + deriving ( Eq, Show ) + +-- +getListLevelStyle :: ListLevel -> ListStyle -> Maybe ListLevelStyle +getListLevelStyle level ListStyle{..} = + let (lower , exactHit , _) = M.splitLookup level levelStyles + in exactHit <|> fmap fst (M.maxView lower) + -- findBy (`M.lookup` levelStyles) [level, (level-1) .. 1] + -- \^ simpler, but in general less efficient + +data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType + , listItemPrefix :: Maybe Text + , listItemSuffix :: Maybe Text + , listItemFormat :: ListItemNumberFormat + , listItemStart :: Int + } + deriving ( Eq, Ord ) + +instance Show ListLevelStyle where + show ListLevelStyle{..} = " listItemPrefix) + ++ show listItemFormat + ++ maybeToString (T.unpack <$> listItemSuffix) + ++ ">" + where maybeToString = fromMaybe "" + +data ListLevelType = LltBullet | LltImage | LltNumbered + deriving ( Eq, Ord, Show ) + +data ListItemNumberFormat = LinfNone + | LinfNumber + | LinfRomanLC | LinfRomanUC + | LinfAlphaLC | LinfAlphaUC + | LinfString String + deriving ( Eq, Ord ) + +instance Show ListItemNumberFormat where + show LinfNone = "" + show LinfNumber = "1" + show LinfRomanLC = "i" + show LinfRomanUC = "I" + show LinfAlphaLC = "a" + show LinfAlphaUC = "A" + show (LinfString s) = s + +instance Default ListItemNumberFormat where + def = LinfNone + +instance Read ListItemNumberFormat where + readsPrec _ "" = [(LinfNone , "")] + readsPrec _ "1" = [(LinfNumber , "")] + readsPrec _ "i" = [(LinfRomanLC , "")] + readsPrec _ "I" = [(LinfRomanUC , "")] + readsPrec _ "a" = [(LinfAlphaLC , "")] + readsPrec _ "A" = [(LinfAlphaUC , "")] + readsPrec _ s = [(LinfString s , "")] + +-------------------------------------------------------------------------------- +-- Readers +-- +-- ...it seems like a whole lot of this should be automatically derivable +-- or at least moveable into a class. Most of this is data concealed in +-- code. +-------------------------------------------------------------------------------- + +-- +readAllStyles :: StyleReader _x Styles +readAllStyles = ( readFontPitches + >>?! ( readAutomaticStyles + &&& readStyles )) + >>?%? chooseMax + -- all top elements are always on the same hierarchy level + +-- +readStyles :: StyleReader _x Styles +readStyles = executeInSub NsOffice "styles" $ liftAsSuccess + $ liftA3 Styles + ( tryAll NsStyle "style" readStyle >>^ M.fromList ) + ( tryAll NsText "list-style" readListStyle >>^ M.fromList ) + ( tryAll NsStyle "default-style" readDefaultStyle >>^ M.fromList ) + +-- +readAutomaticStyles :: StyleReader _x Styles +readAutomaticStyles = executeInSub NsOffice "automatic-styles" $ liftAsSuccess + $ liftA3 Styles + ( tryAll NsStyle "style" readStyle >>^ M.fromList ) + ( tryAll NsText "list-style" readListStyle >>^ M.fromList ) + ( returnV M.empty ) + +-- +readDefaultStyle :: StyleReader _x (StyleFamily, StyleProperties) +readDefaultStyle = lookupAttr NsStyle "family" + >>?! keepingTheValue readStyleProperties + +-- +readStyle :: StyleReader _x (StyleName,Style) +readStyle = findAttr NsStyle "name" + >>?! keepingTheValue + ( liftA4 Style + ( lookupAttr' NsStyle "family" ) + ( findAttr' NsStyle "parent-style-name" ) + ( findAttr' NsStyle "list-style-name" ) + readStyleProperties + ) + +-- +readStyleProperties :: StyleReaderSafe _x StyleProperties +readStyleProperties = liftA2 SProps + ( readTextProperties >>> choiceToMaybe ) + ( readParaProperties >>> choiceToMaybe ) + +-- +readTextProperties :: StyleReader _x TextProperties +readTextProperties = + executeInSub NsStyle "text-properties" $ liftAsSuccess + ( liftA6 PropT + ( searchAttr NsXSL_FO "font-style" False isFontEmphasised ) + ( searchAttr NsXSL_FO "font-weight" False isFontBold ) + findPitch + ( getAttr NsStyle "text-position" ) + readUnderlineMode + readStrikeThroughMode + ) + where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)] + isFontBold = ("normal",False):("bold",True) + :map ((,True) . tshow) ([100,200..900]::[Int]) + +readUnderlineMode :: StyleReaderSafe _x (Maybe UnderlineMode) +readUnderlineMode = readLineMode "text-underline-mode" + "text-underline-style" + +readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode) +readStrikeThroughMode = readLineMode "text-line-through-mode" + "text-line-through-style" + +readLineMode :: Text -> Text -> StyleReaderSafe _x (Maybe UnderlineMode) +readLineMode modeAttr styleAttr = proc x -> do + isUL <- searchAttr NsStyle styleAttr False isLinePresent -< x + mode <- lookupAttr' NsStyle modeAttr -< x + if isUL + then case mode of + Just m -> returnA -< Just m + Nothing -> returnA -< Just UnderlineModeNormal + else returnA -< Nothing + where + isLinePresent = ("none",False) : map (,True) + [ "dash" , "dot-dash" , "dot-dot-dash" , "dotted" + , "long-dash" , "solid" , "wave" + ] + +-- +readParaProperties :: StyleReader _x ParaProperties +readParaProperties = + executeInSub NsStyle "paragraph-properties" $ liftAsSuccess + ( liftA3 PropP + ( liftA2 readNumbering + ( isSet' NsText "number-lines" ) + ( readAttr' NsText "line-number" ) + ) + ( liftA2 readIndentation + ( isSetWithDefault NsStyle "auto-text-indent" False ) + ( getAttr NsXSL_FO "text-indent" ) + ) + ( getAttr NsXSL_FO "margin-left" ) + ) + where readNumbering (Just True) (Just n) = NumberingRestart n + readNumbering (Just True) _ = NumberingKeep + readNumbering _ _ = NumberingNone + + readIndentation False indent = indent + readIndentation True _ = def + +---- +-- List styles +---- + +-- +readListStyle :: StyleReader _x (StyleName, ListStyle) +readListStyle = + findAttr NsStyle "name" + >>?! keepingTheValue + ( liftA ListStyle + $ liftA3 SM.union3 + ( readListLevelStyles NsText "list-level-style-number" LltNumbered ) + ( readListLevelStyles NsText "list-level-style-bullet" LltBullet ) + ( readListLevelStyles NsText "list-level-style-image" LltImage ) >>^ M.mapMaybe chooseMostSpecificListLevelStyle + ) +-- +readListLevelStyles :: Namespace -> ElementName + -> ListLevelType + -> StyleReaderSafe _x (SM.SetMap Int ListLevelStyle) +readListLevelStyles namespace elementName levelType = + tryAll namespace elementName (readListLevelStyle levelType) + >>^ SM.fromList + +-- +readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle) +readListLevelStyle levelType = readAttr NsText "level" + >>?! keepingTheValue + ( liftA5 toListLevelStyle + ( returnV levelType ) + ( findAttr' NsStyle "num-prefix" ) + ( findAttr' NsStyle "num-suffix" ) + ( getAttr NsStyle "num-format" ) + ( findAttrText' NsText "start-value" ) + ) + where + toListLevelStyle _ p s LinfNone b = ListLevelStyle LltBullet p s LinfNone (startValue b) + toListLevelStyle _ p s f@(LinfString _) b = ListLevelStyle LltBullet p s f (startValue b) + toListLevelStyle t p s f b = ListLevelStyle t p s f (startValue b) + startValue mbx = fromMaybe 1 (mbx >>= safeRead) + +-- +chooseMostSpecificListLevelStyle :: S.Set ListLevelStyle -> Maybe ListLevelStyle +chooseMostSpecificListLevelStyle ls = F.foldr select Nothing ls + where + select l Nothing = Just l + select ( ListLevelStyle t1 p1 s1 f1 b1 ) + ( Just ( ListLevelStyle t2 p2 s2 f2 _ )) + = Just $ ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) + (selectLinf f1 f2) b1 + select' LltNumbered _ = LltNumbered + select' _ LltNumbered = LltNumbered + select' _ _ = LltBullet + selectLinf LinfNone f2 = f2 + selectLinf f1 LinfNone = f1 + selectLinf (LinfString _) f2 = f2 + selectLinf f1 (LinfString _) = f1 + selectLinf f1 _ = f1 + + +-------------------------------------------------------------------------------- +-- Tools to access style data +-------------------------------------------------------------------------------- + +-- +lookupStyle :: StyleName -> Styles -> Maybe Style +lookupStyle name Styles{..} = M.lookup name stylesByName + +-- +lookupDefaultStyle' :: Styles -> StyleFamily -> StyleProperties +lookupDefaultStyle' Styles{..} family = fromMaybe def + (M.lookup family defaultStyleMap) + +-- +lookupListStyleByName :: StyleName -> Styles -> Maybe ListStyle +lookupListStyleByName name Styles{..} = M.lookup name listStylesByName + + +-- | Returns a chain of parent of the current style. The direct parent will +-- be the first element of the list, followed by its parent and so on. +-- The current style is not in the list. +parents :: Style -> Styles -> [Style] +parents style styles = unfoldr findNextParent style -- Ha! + where findNextParent Style{..} + = fmap duplicate $ (`lookupStyle` styles) =<< styleParentName + +-- | Looks up the style family of the current style. Normally, every style +-- should have one. But if not, all parents are searched. +getStyleFamily :: Style -> Styles -> Maybe StyleFamily +getStyleFamily style@Style{..} styles + = styleFamily + <|> F.asum (map (`getStyleFamily` styles) $ parents style styles) + +-- | Each 'Style' has certain 'StyleProperties'. But sometimes not all property +-- values are specified. Instead, a value might be inherited from a +-- parent style. This function makes this chain of inheritance +-- concrete and easily accessible by encapsulating the necessary lookups. +-- The resulting list contains the direct properties of the style as the first +-- element, the ones of the direct parent element as the next one, and so on. +-- +-- Note: There should also be default properties for each style family. These +-- are @not@ contained in this list because properties inherited from +-- parent elements take precedence over default styles. +-- +-- This function is primarily meant to be used through convenience wrappers. +-- +stylePropertyChain :: Style -> Styles -> [StyleProperties] +stylePropertyChain style styles + = map styleProperties (style : parents style styles) + +-- +extendedStylePropertyChain :: [Style] -> Styles -> [StyleProperties] +extendedStylePropertyChain [] _ = [] +extendedStylePropertyChain [style] styles = stylePropertyChain style styles + ++ maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles)) +extendedStylePropertyChain (style:trace) styles = stylePropertyChain style styles + ++ extendedStylePropertyChain trace styles diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs deleted file mode 100644 index c274b6fd4..000000000 --- a/src/Text/Pandoc/Readers/Odt.hs +++ /dev/null @@ -1,97 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- | - Module : Text.Pandoc.Reader.Odt - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann - Stability : alpha - Portability : portable - -Entry point to the odt reader. --} - -module Text.Pandoc.Readers.Odt ( readOdt ) where - -import Codec.Archive.Zip -import Text.Pandoc.XML.Light - -import qualified Data.ByteString.Lazy as B - -import System.FilePath - -import Control.Monad.Except (throwError) - -import qualified Data.Text as T - -import Text.Pandoc.Class.PandocMonad (PandocMonad) -import qualified Text.Pandoc.Class.PandocMonad as P -import Text.Pandoc.Definition -import Text.Pandoc.Error -import Text.Pandoc.MediaBag -import Text.Pandoc.Options -import qualified Text.Pandoc.UTF8 as UTF8 - -import Text.Pandoc.Readers.Odt.ContentReader -import Text.Pandoc.Readers.Odt.StyleReader - -import Text.Pandoc.Readers.Odt.Generic.Fallible -import Text.Pandoc.Readers.Odt.Generic.XMLConverter -import Text.Pandoc.Shared (filteredFilesFromArchive) - -readOdt :: PandocMonad m - => ReaderOptions - -> B.ByteString - -> m Pandoc -readOdt opts bytes = case readOdt' opts bytes of - Right (doc, mb) -> do - P.setMediaBag mb - return doc - Left e -> throwError e - --- -readOdt' :: ReaderOptions - -> B.ByteString - -> Either PandocError (Pandoc, MediaBag) -readOdt' _ bytes = bytesToOdt bytes-- of --- Right (pandoc, mediaBag) -> Right (pandoc , mediaBag) --- Left err -> Left err - --- -bytesToOdt :: B.ByteString -> Either PandocError (Pandoc, MediaBag) -bytesToOdt bytes = case toArchiveOrFail bytes of - Right archive -> archiveToOdt archive - Left err -> Left $ PandocParseError - $ "Could not unzip ODT: " <> T.pack err - --- -archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag) -archiveToOdt archive = do - let onFailure msg Nothing = Left $ PandocParseError msg - onFailure _ (Just x) = Right x - contentEntry <- onFailure "Could not find content.xml" - (findEntryByPath "content.xml" archive) - stylesEntry <- onFailure "Could not find styles.xml" - (findEntryByPath "styles.xml" archive) - contentElem <- entryToXmlElem contentEntry - stylesElem <- entryToXmlElem stylesEntry - styles <- either - (\_ -> Left $ PandocParseError "Could not read styles") - Right - (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem)) - let filePathIsOdtMedia :: FilePath -> Bool - filePathIsOdtMedia fp = - let (dir, name) = splitFileName fp - in (dir == "Pictures/") || (dir /= "./" && name == "content.xml") - let media = filteredFilesFromArchive archive filePathIsOdtMedia - let startState = readerState styles media - either (\_ -> Left $ PandocParseError "Could not convert opendocument") Right - (runConverter' read_body startState contentElem) - - --- -entryToXmlElem :: Entry -> Either PandocError Element -entryToXmlElem entry = - case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of - Right x -> Right x - Left msg -> Left $ PandocXMLError (T.pack $ eRelativePath entry) msg diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs deleted file mode 100644 index 96515bf56..000000000 --- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs +++ /dev/null @@ -1,139 +0,0 @@ -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE TupleSections #-} -{- | - Module : Text.Pandoc.Readers.Odt.Arrows.State - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann - Stability : alpha - Portability : portable - -An arrow that transports a state. It is in essence a more powerful version of -the standard state monad. As it is such a simple extension, there are -other version out there that do exactly the same. -The implementation is duplicated, though, to add some useful features. -Most of these might be implemented without access to innards, but it's much -faster and easier to implement this way. --} - -module Text.Pandoc.Readers.Odt.Arrows.State where - -import Control.Arrow -import qualified Control.Category as Cat -import Control.Monad -import Data.List (foldl') -import Text.Pandoc.Readers.Odt.Arrows.Utils -import Text.Pandoc.Readers.Odt.Generic.Fallible - - -newtype ArrowState state a b = ArrowState - { runArrowState :: (state, a) -> (state, b) } - --- | Constructor -withState :: (state -> a -> (state, b)) -> ArrowState state a b -withState = ArrowState . uncurry - --- | Constructor -modifyState :: (state -> state ) -> ArrowState state a a -modifyState = ArrowState . first - --- | Constructor -ignoringState :: ( a -> b ) -> ArrowState state a b -ignoringState = ArrowState . second - --- | Constructor -fromState :: (state -> (state, b)) -> ArrowState state a b -fromState = ArrowState . (.fst) - --- | Constructor -extractFromState :: (state -> b ) -> ArrowState state x b -extractFromState f = ArrowState $ \(state,_) -> (state, f state) - --- | Constructor -tryModifyState :: (state -> Either f state) - -> ArrowState state a (Either f a) -tryModifyState f = ArrowState $ \(state,a) - -> (state,).Left ||| (,Right a) $ f state - -instance Cat.Category (ArrowState s) where - id = ArrowState id - arrow2 . arrow1 = ArrowState $ runArrowState arrow2 . runArrowState arrow1 - -instance Arrow (ArrowState state) where - arr = ignoringState - first a = ArrowState $ \(s,(aF,aS)) - -> second (,aS) $ runArrowState a (s,aF) - second a = ArrowState $ \(s,(aF,aS)) - -> second (aF,) $ runArrowState a (s,aS) - -instance ArrowChoice (ArrowState state) where - left a = ArrowState $ \(s,e) -> case e of - Left l -> second Left $ runArrowState a (s,l) - Right r -> (s, Right r) - right a = ArrowState $ \(s,e) -> case e of - Left l -> (s, Left l) - Right r -> second Right $ runArrowState a (s,r) - -instance ArrowApply (ArrowState state) where - app = ArrowState $ \(s, (f,b)) -> runArrowState f (s,b) - --- | Switches the type of the state temporarily. --- Drops the intermediate result state, behaving like a fallible --- identity arrow, save for side effects in the state. -withSubStateF :: ArrowState s x (Either f s') - -> ArrowState s' s (Either f s ) - -> ArrowState s x (Either f x ) -withSubStateF unlift a = keepingTheValue (withSubStateF' unlift a) - >>^ spreadChoice - >>^ fmap fst - --- | Switches the type of the state temporarily. --- Returns the resulting sub-state. -withSubStateF' :: ArrowState s x (Either f s') - -> ArrowState s' s (Either f s ) - -> ArrowState s x (Either f s') -withSubStateF' unlift a = ArrowState go - where go p@(s,_) = tryRunning unlift - ( tryRunning a (second Right) ) - p - where tryRunning a' b v = case runArrowState a' v of - (_ , Left f) -> (s, Left f) - (x , Right y) -> b (y,x) - --- | Fold a state arrow through something 'Foldable'. Collect the results --- in a 'Monoid'. --- Intermediate form of a fold between one with "only" a 'Monoid' --- and one with any function. -foldS :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m -foldS a = ArrowState $ \(s,f) -> foldr a' (s,mempty) f - where a' x (s',m) = second (mappend m) $ runArrowState a (s',x) - --- | Fold a state arrow through something 'Foldable'. Collect the results in a --- 'MonadPlus'. -iterateS :: (Foldable f, MonadPlus m) - => ArrowState s x y - -> ArrowState s (f x) (m y) -iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f - where a' x (s',m) = second (mplus m.return) $ runArrowState a (s',x) - --- | Fold a state arrow through something 'Foldable'. Collect the results in a --- 'MonadPlus'. -iterateSL :: (Foldable f, MonadPlus m) - => ArrowState s x y - -> ArrowState s (f x) (m y) -iterateSL a = ArrowState $ \(s,f) -> foldl' a' (s,mzero) f - where a' (s',m) x = second (mplus m.return) $ runArrowState a (s',x) - - --- | Fold a fallible state arrow through something 'Foldable'. --- Collect the results in a 'MonadPlus'. --- If the iteration fails, the state will be reset to the initial one. -iterateS' :: (Foldable f, MonadPlus m) - => ArrowState s x (Either e y ) - -> ArrowState s (f x) (Either e (m y)) -iterateS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mzero) f - where a' s x (s',Right m) = case runArrowState a (s',x) of - (s'',Right m') -> (s'',Right $ mplus m $ return m') - (_ ,Left e ) -> (s ,Left e ) - a' _ _ e = e diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs deleted file mode 100644 index 45e5a525c..000000000 --- a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs +++ /dev/null @@ -1,208 +0,0 @@ -{- | - Module : Text.Pandoc.Readers.Odt.Arrows.Utils - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann - Stability : alpha - Portability : portable - -Utility functions for Arrows (Kleisli monads). - -Some general notes on notation: - -* "^" is meant to stand for a pure function that is lifted into an arrow -based on its usage for that purpose in "Control.Arrow". -* "?" is meant to stand for the usage of a 'FallibleArrow' or a pure function -with an equivalent return value. -* "_" stands for the dropping of a value. --} - --- We export everything -module Text.Pandoc.Readers.Odt.Arrows.Utils where - -import Prelude hiding (Applicative(..)) -import Control.Arrow -import Control.Monad (join) - -import Text.Pandoc.Readers.Odt.Generic.Fallible -import Text.Pandoc.Readers.Odt.Generic.Utils - -and2 :: (Arrow a) => a b c -> a b c' -> a b (c,c') -and2 = (&&&) - -and3 :: (Arrow a) - => a b c0->a b c1->a b c2 - -> a b (c0,c1,c2 ) -and4 :: (Arrow a) - => a b c0->a b c1->a b c2->a b c3 - -> a b (c0,c1,c2,c3 ) -and5 :: (Arrow a) - => a b c0->a b c1->a b c2->a b c3->a b c4 - -> a b (c0,c1,c2,c3,c4 ) -and6 :: (Arrow a) - => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5 - -> a b (c0,c1,c2,c3,c4,c5 ) - -and3 a b c = and2 a b &&& c - >>^ \((z,y ) , x) -> (z,y,x ) -and4 a b c d = and3 a b c &&& d - >>^ \((z,y,x ) , w) -> (z,y,x,w ) -and5 a b c d e = and4 a b c d &&& e - >>^ \((z,y,x,w ) , v) -> (z,y,x,w,v ) -and6 a b c d e f = and5 a b c d e &&& f - >>^ \((z,y,x,w,v ) , u) -> (z,y,x,w,v,u ) - -liftA2 :: (Arrow a) => (x -> y -> z) -> a b x -> a b y -> a b z -liftA2 f a b = a &&& b >>^ uncurry f - -liftA3 :: (Arrow a) => (z->y->x -> r) - -> a b z->a b y->a b x - -> a b r -liftA4 :: (Arrow a) => (z->y->x->w -> r) - -> a b z->a b y->a b x->a b w - -> a b r -liftA5 :: (Arrow a) => (z->y->x->w->v -> r) - -> a b z->a b y->a b x->a b w->a b v - -> a b r -liftA6 :: (Arrow a) => (z->y->x->w->v->u -> r) - -> a b z->a b y->a b x->a b w->a b v->a b u - -> a b r - -liftA3 fun a b c = and3 a b c >>^ uncurry3 fun -liftA4 fun a b c d = and4 a b c d >>^ uncurry4 fun -liftA5 fun a b c d e = and5 a b c d e >>^ uncurry5 fun -liftA6 fun a b c d e f = and6 a b c d e f >>^ uncurry6 fun - -liftA :: (Arrow a) => (y -> z) -> a b y -> a b z -liftA fun a = a >>^ fun - - --- | Duplicate a value to subsequently feed it into different arrows. --- Can almost always be replaced with '(&&&)', 'keepingTheValue', --- or even '(|||)'. --- Equivalent to --- > returnA &&& returnA -duplicate :: (Arrow a) => a b (b,b) -duplicate = arr $ join (,) - --- | Applies a function to the uncurried result-pair of an arrow-application. --- (The %-symbol was chosen to evoke an association with pairs.) -(>>%) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d -a >>% f = a >>^ uncurry f - -infixr 2 >>% - - --- | Duplicate a value and apply an arrow to the second instance. --- Equivalent to --- > \a -> duplicate >>> second a --- or --- > \a -> returnA &&& a -keepingTheValue :: (Arrow a) => a b c -> a b (b,c) -keepingTheValue a = returnA &&& a - -( ^||| ) :: (ArrowChoice a) => (b -> d) -> a c d -> a (Either b c) d -( |||^ ) :: (ArrowChoice a) => a b d -> (c -> d) -> a (Either b c) d -( ^|||^ ) :: (ArrowChoice a) => (b -> d) -> (c -> d) -> a (Either b c) d - -l ^||| r = arr l ||| r -l |||^ r = l ||| arr r -l ^|||^ r = arr l ||| arr r - -infixr 2 ^||| , |||^, ^|||^ - -( ^&&& ) :: (Arrow a) => (b -> c) -> a b c' -> a b (c,c') -( &&&^ ) :: (Arrow a) => a b c -> (b -> c') -> a b (c,c') - -l ^&&& r = arr l &&& r -l &&&^ r = l &&& arr r - -infixr 3 ^&&&, &&&^ - - --- | Converts @Right a@ into @Just a@ and @Left _@ into @Nothing@. -choiceToMaybe :: (ArrowChoice a) => a (Either l r) (Maybe r) -choiceToMaybe = arr eitherToMaybe - --- | Converts @Nothing@ into @Left ()@ and @Just a@ into @Right a@. -maybeToChoice :: (ArrowChoice a) => a (Maybe b) (Fallible b) -maybeToChoice = arr maybeToEither - --- | Lifts a constant value into an arrow -returnV :: (Arrow a) => c -> a x c -returnV = arr.const - --- | Defines Left as failure, Right as success -type FallibleArrow a input failure success = a input (Either failure success) - --- -liftAsSuccess :: (ArrowChoice a) - => a x success - -> FallibleArrow a x failure success -liftAsSuccess a = a >>^ Right - --- | Execute the second arrow if the first succeeds -(>>?) :: (ArrowChoice a) - => FallibleArrow a x failure success - -> FallibleArrow a success failure success' - -> FallibleArrow a x failure success' -a >>? b = a >>> Left ^||| b - --- | Execute the lifted second arrow if the first succeeds -(>>?^) :: (ArrowChoice a) - => FallibleArrow a x failure success - -> (success -> success') - -> FallibleArrow a x failure success' -a >>?^ f = a >>^ Left ^|||^ Right . f - --- | Execute the lifted second arrow if the first succeeds -(>>?^?) :: (ArrowChoice a) - => FallibleArrow a x failure success - -> (success -> Either failure success') - -> FallibleArrow a x failure success' -a >>?^? b = a >>> Left ^|||^ b - --- | Execute the second arrow if the lifted first arrow succeeds -(^>>?) :: (ArrowChoice a) - => (x -> Either failure success) - -> FallibleArrow a success failure success' - -> FallibleArrow a x failure success' -a ^>>? b = a ^>> Left ^||| b - --- | Execute the second, non-fallible arrow if the first arrow succeeds -(>>?!) :: (ArrowChoice a) - => FallibleArrow a x failure success - -> a success success' - -> FallibleArrow a x failure success' -a >>?! f = a >>> right f - ---- -(>>?%) :: (ArrowChoice a) - => FallibleArrow a x f (b,b') - -> (b -> b' -> c) - -> FallibleArrow a x f c -a >>?% f = a >>?^ uncurry f - - ---- -(>>?%?) :: (ArrowChoice a) - => FallibleArrow a x f (b,b') - -> (b -> b' -> Either f c) - -> FallibleArrow a x f c -a >>?%? f = a >>?^? uncurry f - -infixr 1 >>?, >>?^, >>?^? -infixr 1 ^>>?, >>?! -infixr 1 >>?%, >>?%? - --- | An arrow version of a short-circuit (<|>) -ifFailedDo :: (ArrowChoice a) - => FallibleArrow a x f y - -> FallibleArrow a x f y - -> FallibleArrow a x f y -ifFailedDo a b = keepingTheValue a >>> repackage ^>> (b |||^ Right) - where repackage (x , Left _) = Left x - repackage (_ , Right y) = Right y - -infixr 1 `ifFailedDo` diff --git a/src/Text/Pandoc/Readers/Odt/Base.hs b/src/Text/Pandoc/Readers/Odt/Base.hs deleted file mode 100644 index 2c07b1c11..000000000 --- a/src/Text/Pandoc/Readers/Odt/Base.hs +++ /dev/null @@ -1,22 +0,0 @@ -{- | - Module : Text.Pandoc.Readers.Odt.Base - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann - Stability : alpha - Portability : portable - -Core types of the odt reader. --} - -module Text.Pandoc.Readers.Odt.Base where - -import Text.Pandoc.Readers.Odt.Generic.XMLConverter -import Text.Pandoc.Readers.Odt.Namespaces - -type OdtConverterState s = XMLConverterState Namespace s - -type XMLReader s a b = FallibleXMLConverter Namespace s a b - -type XMLReaderSafe s a b = XMLConverter Namespace s a b diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs deleted file mode 100644 index 77324ac2f..000000000 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ /dev/null @@ -1,960 +0,0 @@ -{-# LANGUAGE Arrows #-} -{-# LANGUAGE CPP #-} -{-# LANGUAGE DeriveFoldable #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE PatternGuards #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE ViewPatterns #-} -{-# LANGUAGE OverloadedStrings #-} -{- | - Module : Text.Pandoc.Readers.Odt.ContentReader - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann - Stability : alpha - Portability : portable - -The core of the odt reader that converts odt features into Pandoc types. --} - -module Text.Pandoc.Readers.Odt.ContentReader -( readerState -, read_body -) where - -import Prelude hiding (Applicative(..)) -import Control.Applicative hiding (liftA, liftA2, liftA3) -import Control.Arrow -import Control.Monad ((<=<)) - -import qualified Data.ByteString.Lazy as B -import Data.Foldable (fold) -import Data.List (find) -import qualified Data.Map as M -import qualified Data.Text as T -import Data.Maybe -import Data.Monoid (Alt (..)) - -import Text.TeXMath (readMathML, writeTeX) -import qualified Text.Pandoc.XML.Light as XML - -import Text.Pandoc.Builder hiding (underline) -import Text.Pandoc.MediaBag (MediaBag, insertMedia) -import Text.Pandoc.Shared -import Text.Pandoc.Extensions (extensionsFromList, Extension(..)) -import qualified Text.Pandoc.UTF8 as UTF8 - -import Text.Pandoc.Readers.Odt.Base -import Text.Pandoc.Readers.Odt.Namespaces -import Text.Pandoc.Readers.Odt.StyleReader - -import Text.Pandoc.Readers.Odt.Arrows.State (foldS) -import Text.Pandoc.Readers.Odt.Arrows.Utils -import Text.Pandoc.Readers.Odt.Generic.Fallible -import Text.Pandoc.Readers.Odt.Generic.Utils -import Text.Pandoc.Readers.Odt.Generic.XMLConverter - -import Network.URI (parseRelativeReference, URI(uriPath)) -import qualified Data.Set as Set - --------------------------------------------------------------------------------- --- State --------------------------------------------------------------------------------- - -type Anchor = T.Text -type Media = [(FilePath, B.ByteString)] - -data ReaderState - = ReaderState { -- | A collection of styles read somewhere else. - -- It is only queried here, not modified. - styleSet :: Styles - -- | A stack of the styles of parent elements. - -- Used to look up inherited style properties. - , styleTrace :: [Style] - -- | Keeps track of the current depth in nested lists - , currentListLevel :: ListLevel - -- | Lists may provide their own style, but they don't have - -- to. If they do not, the style of a parent list may be used - -- or even a default list style from the paragraph style. - -- This value keeps track of the closest list style there - -- currently is. - , currentListStyle :: Maybe ListStyle - -- | A map from internal anchor names to "pretty" ones. - -- The mapping is a purely cosmetic one. - , bookmarkAnchors :: M.Map Anchor Anchor - -- | A map of files / binary data from the archive - , envMedia :: Media - -- | Hold binary resources used in the document - , odtMediaBag :: MediaBag - } - deriving ( Show ) - -readerState :: Styles -> Media -> ReaderState -readerState styles media = ReaderState styles [] 0 Nothing M.empty media mempty - --- -pushStyle' :: Style -> ReaderState -> ReaderState -pushStyle' style state = state { styleTrace = style : styleTrace state } - --- -popStyle' :: ReaderState -> ReaderState -popStyle' state = case styleTrace state of - _:trace -> state { styleTrace = trace } - _ -> state - --- -modifyListLevel :: (ListLevel -> ListLevel) -> (ReaderState -> ReaderState) -modifyListLevel f state = state { currentListLevel = f (currentListLevel state) } - --- -shiftListLevel :: ListLevel -> (ReaderState -> ReaderState) -shiftListLevel diff = modifyListLevel (+ diff) - --- -swapCurrentListStyle :: Maybe ListStyle -> ReaderState - -> (ReaderState, Maybe ListStyle) -swapCurrentListStyle mListStyle state = ( state { currentListStyle = mListStyle } - , currentListStyle state - ) - --- -lookupPrettyAnchor :: Anchor -> ReaderState -> Maybe Anchor -lookupPrettyAnchor anchor ReaderState{..} = M.lookup anchor bookmarkAnchors - --- -putPrettyAnchor :: Anchor -> Anchor -> ReaderState -> ReaderState -putPrettyAnchor ugly pretty state@ReaderState{..} - = state { bookmarkAnchors = M.insert ugly pretty bookmarkAnchors } - --- -usedAnchors :: ReaderState -> [Anchor] -usedAnchors ReaderState{..} = M.elems bookmarkAnchors - -getMediaBag :: ReaderState -> MediaBag -getMediaBag ReaderState{..} = odtMediaBag - -getMediaEnv :: ReaderState -> Media -getMediaEnv ReaderState{..} = envMedia - -insertMedia' :: (FilePath, B.ByteString) -> ReaderState -> ReaderState -insertMedia' (fp, bs) state@ReaderState{..} - = state { odtMediaBag = insertMedia fp Nothing bs odtMediaBag } - --------------------------------------------------------------------------------- --- Reader type and associated tools --------------------------------------------------------------------------------- - -type OdtReader a b = XMLReader ReaderState a b - -type OdtReaderSafe a b = XMLReaderSafe ReaderState a b - --- | Extract something from the styles -fromStyles :: (a -> Styles -> b) -> OdtReaderSafe a b -fromStyles f = keepingTheValue - (getExtraState >>^ styleSet) - >>% f - --- -getStyleByName :: OdtReader StyleName Style -getStyleByName = fromStyles lookupStyle >>^ maybeToChoice - --- -findStyleFamily :: OdtReader Style StyleFamily -findStyleFamily = fromStyles getStyleFamily >>^ maybeToChoice - --- -lookupListStyle :: OdtReader StyleName ListStyle -lookupListStyle = fromStyles lookupListStyleByName >>^ maybeToChoice - --- -switchCurrentListStyle :: OdtReaderSafe (Maybe ListStyle) (Maybe ListStyle) -switchCurrentListStyle = keepingTheValue getExtraState - >>% swapCurrentListStyle - >>> first setExtraState - >>^ snd - --- -pushStyle :: OdtReaderSafe Style Style -pushStyle = keepingTheValue ( - ( keepingTheValue getExtraState - >>% pushStyle' - ) - >>> setExtraState - ) - >>^ fst - --- -popStyle :: OdtReaderSafe x x -popStyle = keepingTheValue ( - getExtraState - >>> arr popStyle' - >>> setExtraState - ) - >>^ fst - --- -getCurrentListLevel :: OdtReaderSafe _x ListLevel -getCurrentListLevel = getExtraState >>^ currentListLevel - --- -updateMediaWithResource :: OdtReaderSafe (FilePath, B.ByteString) (FilePath, B.ByteString) -updateMediaWithResource = keepingTheValue ( - (keepingTheValue getExtraState - >>% insertMedia' - ) - >>> setExtraState - ) - >>^ fst - -lookupResource :: OdtReaderSafe FilePath (FilePath, B.ByteString) -lookupResource = proc target -> do - state <- getExtraState -< () - case lookup target (getMediaEnv state) of - Just bs -> returnV (target, bs) -<< () - Nothing -> returnV ("", B.empty) -< () - -type AnchorPrefix = T.Text - --- | An adaptation of 'uniqueIdent' from "Text.Pandoc.Shared" that generates a --- unique identifier but without assuming that the id should be for a header. --- Second argument is a list of already used identifiers. -uniqueIdentFrom :: AnchorPrefix -> [Anchor] -> Anchor -uniqueIdentFrom baseIdent usedIdents = - let numIdent n = baseIdent <> "-" <> T.pack (show n) - in if baseIdent `elem` usedIdents - then maybe baseIdent numIdent - $ find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) - -- if we have more than 60,000, allow repeats - else baseIdent - --- | First argument: basis for a new "pretty" anchor if none exists yet --- Second argument: a key ("ugly" anchor) --- Returns: saved "pretty" anchor or created new one -getPrettyAnchor :: OdtReaderSafe (AnchorPrefix, Anchor) Anchor -getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do - state <- getExtraState -< () - case lookupPrettyAnchor uglyAnchor state of - Just prettyAnchor -> returnA -< prettyAnchor - Nothing -> do - let newPretty = uniqueIdentFrom baseIdent (usedAnchors state) - modifyExtraState (putPrettyAnchor uglyAnchor newPretty) -<< newPretty - --- | Input: basis for a new header anchor --- Output: saved new anchor -getHeaderAnchor :: OdtReaderSafe Inlines Anchor -getHeaderAnchor = proc title -> do - state <- getExtraState -< () - let exts = extensionsFromList [Ext_auto_identifiers] - let anchor = uniqueIdent exts (toList title) - (Set.fromList $ usedAnchors state) - modifyExtraState (putPrettyAnchor anchor anchor) -<< anchor - - --------------------------------------------------------------------------------- --- Working with styles --------------------------------------------------------------------------------- - --- -readStyleByName :: OdtReader _x (StyleName, Style) -readStyleByName = - findAttr NsText "style-name" >>? keepingTheValue getStyleByName >>^ liftE - where - liftE :: (StyleName, Fallible Style) -> Fallible (StyleName, Style) - liftE (name, Right v) = Right (name, v) - liftE (_, Left v) = Left v - --- -isStyleToTrace :: OdtReader Style Bool -isStyleToTrace = findStyleFamily >>?^ (==FaText) - --- -withNewStyle :: OdtReaderSafe x Inlines -> OdtReaderSafe x Inlines -withNewStyle a = proc x -> do - fStyle <- readStyleByName -< () - case fStyle of - Right (styleName, _) | isCodeStyle styleName -> do - inlines <- a -< x - arr inlineCode -<< inlines - Right (_, style) -> do - mFamily <- arr styleFamily -< style - fTextProps <- arr ( maybeToChoice - . textProperties - . styleProperties - ) -< style - case fTextProps of - Right textProps -> do - state <- getExtraState -< () - let triple = (state, textProps, mFamily) - modifier <- arr modifierFromStyleDiff -< triple - fShouldTrace <- isStyleToTrace -< style - case fShouldTrace of - Right shouldTrace -> - if shouldTrace - then do - pushStyle -< style - inlines <- a -< x - popStyle -< () - arr modifier -<< inlines - else - -- In case anything goes wrong - a -< x - Left _ -> a -< x - Left _ -> a -< x - Left _ -> a -< x - where - isCodeStyle :: StyleName -> Bool - isCodeStyle "Source_Text" = True - isCodeStyle _ = False - - inlineCode :: Inlines -> Inlines - inlineCode = code . T.concat . map stringify . toList - -type PropertyTriple = (ReaderState, TextProperties, Maybe StyleFamily) -type InlineModifier = Inlines -> Inlines - --- | Given data about the local style changes, calculates how to modify --- an instance of 'Inlines' -modifierFromStyleDiff :: PropertyTriple -> InlineModifier -modifierFromStyleDiff propertyTriple = - composition $ - getVPosModifier propertyTriple - : map (first ($ propertyTriple) >>> ifThen_else ignore) - [ (hasEmphChanged , emph ) - , (hasChanged isStrong , strong ) - , (hasChanged strikethrough , strikeout ) - ] - where - ifThen_else else' (if',then') = if if' then then' else else' - - ignore = id :: InlineModifier - - getVPosModifier :: PropertyTriple -> InlineModifier - getVPosModifier triple@(_,textProps,_) = - let getVPos = Just . verticalPosition - in case lookupPreviousValueM getVPos triple of - Nothing -> ignore - Just oldVPos -> getVPosModifier' (oldVPos, verticalPosition textProps) - - getVPosModifier' (oldVPos , newVPos ) | oldVPos == newVPos = ignore - getVPosModifier' ( _ , VPosSub ) = subscript - getVPosModifier' ( _ , VPosSuper ) = superscript - getVPosModifier' ( _ , _ ) = ignore - - hasEmphChanged :: PropertyTriple -> Bool - hasEmphChanged = swing any [ hasChanged isEmphasised - , hasChangedM pitch - , hasChanged underline - ] - - hasChanged property triple@(_, property -> newProperty, _) = - (/= Just newProperty) (lookupPreviousValue property triple) - - hasChangedM property triple@(_, textProps,_) = - fromMaybe False $ (/=) <$> property textProps <*> lookupPreviousValueM property triple - - lookupPreviousValue f = lookupPreviousStyleValue (fmap f . textProperties) - - lookupPreviousValueM f = lookupPreviousStyleValue (f <=< textProperties) - - lookupPreviousStyleValue f (ReaderState{..},_,mFamily) - = findBy f (extendedStylePropertyChain styleTrace styleSet) - <|> (f . lookupDefaultStyle' styleSet =<< mFamily) - - -type ParaModifier = Blocks -> Blocks - -_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ :: Int -_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ :: Int -_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ = 5 -_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ = 5 - --- | Returns either 'id' or 'blockQuote' depending on the current indentation -getParaModifier :: Style -> ParaModifier -getParaModifier Style{..} | Just props <- paraProperties styleProperties - , isBlockQuote (indentation props) - (margin_left props) - = blockQuote - | otherwise - = id - where - isBlockQuote mIndent mMargin - | LengthValueMM indent <- mIndent - , indent > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ - = True - | LengthValueMM margin <- mMargin - , margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ - = True - | LengthValueMM indent <- mIndent - , LengthValueMM margin <- mMargin - = indent + margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ - - | PercentValue indent <- mIndent - , indent > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ - = True - | PercentValue margin <- mMargin - , margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ - = True - | PercentValue indent <- mIndent - , PercentValue margin <- mMargin - = indent + margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ - - | otherwise - = False - --- -constructPara :: OdtReaderSafe Blocks Blocks -> OdtReaderSafe Blocks Blocks -constructPara reader = proc blocks -> do - fStyle <- readStyleByName -< blocks - case fStyle of - Left _ -> reader -< blocks - Right (styleName, _) | isTableCaptionStyle styleName -> do - blocks' <- reader -< blocks - arr tableCaptionP -< blocks' - Right (_, style) -> do - let modifier = getParaModifier style - blocks' <- reader -< blocks - arr modifier -<< blocks' - where - isTableCaptionStyle :: StyleName -> Bool - isTableCaptionStyle "Table" = True - isTableCaptionStyle _ = False - tableCaptionP b = divWith ("", ["caption"], []) b - -type ListConstructor = [Blocks] -> Blocks - -getListConstructor :: ListLevelStyle -> ListConstructor -getListConstructor ListLevelStyle{..} = - case listLevelType of - LltBullet -> bulletList - LltImage -> bulletList - LltNumbered -> let listNumberStyle = toListNumberStyle listItemFormat - listNumberDelim = toListNumberDelim listItemPrefix - listItemSuffix - in orderedListWith (listItemStart, listNumberStyle, listNumberDelim) - where - toListNumberStyle LinfNone = DefaultStyle - toListNumberStyle LinfNumber = Decimal - toListNumberStyle LinfRomanLC = LowerRoman - toListNumberStyle LinfRomanUC = UpperRoman - toListNumberStyle LinfAlphaLC = LowerAlpha - toListNumberStyle LinfAlphaUC = UpperAlpha - toListNumberStyle (LinfString _) = Example - - toListNumberDelim Nothing (Just ".") = Period - toListNumberDelim (Just "" ) (Just ".") = Period - toListNumberDelim Nothing (Just ")") = OneParen - toListNumberDelim (Just "" ) (Just ")") = OneParen - toListNumberDelim (Just "(") (Just ")") = TwoParens - toListNumberDelim _ _ = DefaultDelim - - --- | Determines which style to use for a list, which level to use of that --- style, and which type of list to create as a result of this information. --- Then prepares the state for eventual child lists and constructs the list from --- the results. --- Two main cases are handled: The list may provide its own style or it may --- rely on a parent list's style. I the former case the current style in the --- state must be switched before and after the call to the child converter --- while in the latter the child converter can be called directly. --- If anything goes wrong, a default ordered-list-constructor is used. -constructList :: OdtReaderSafe x [Blocks] -> OdtReaderSafe x Blocks -constructList reader = proc x -> do - modifyExtraState (shiftListLevel 1) -< () - listLevel <- getCurrentListLevel -< () - fStyleName <- findAttr NsText "style-name" -< () - case fStyleName of - Right styleName -> do - fListStyle <- lookupListStyle -< styleName - case fListStyle of - Right listStyle -> do - fLLS <- arr (uncurry getListLevelStyle) -< (listLevel,listStyle) - case fLLS of - Just listLevelStyle -> do - oldListStyle <- switchCurrentListStyle -< Just listStyle - blocks <- constructListWith listLevelStyle -<< x - switchCurrentListStyle -< oldListStyle - returnA -< blocks - Nothing -> constructOrderedList -< x - Left _ -> constructOrderedList -< x - Left _ -> do - state <- getExtraState -< () - mListStyle <- arr currentListStyle -< state - case mListStyle of - Just listStyle -> do - fLLS <- arr (uncurry getListLevelStyle) -< (listLevel,listStyle) - case fLLS of - Just listLevelStyle -> constructListWith listLevelStyle -<< x - Nothing -> constructOrderedList -< x - Nothing -> constructOrderedList -< x - where - constructOrderedList = - reader - >>> modifyExtraState (shiftListLevel (-1)) - >>^ orderedList - constructListWith listLevelStyle = - reader - >>> getListConstructor listLevelStyle - ^>> modifyExtraState (shiftListLevel (-1)) - --------------------------------------------------------------------------------- --- Readers --------------------------------------------------------------------------------- - -type ElementMatcher result = (Namespace, ElementName, OdtReader result result) - -type InlineMatcher = ElementMatcher Inlines - -type BlockMatcher = ElementMatcher Blocks - -newtype FirstMatch a = FirstMatch (Alt Maybe a) - deriving (Foldable, Monoid, Semigroup) - -firstMatch :: a -> FirstMatch a -firstMatch = FirstMatch . Alt . Just - --- -matchingElement :: (Monoid e) - => Namespace -> ElementName - -> OdtReaderSafe e e - -> ElementMatcher e -matchingElement ns name reader = (ns, name, asResultAccumulator reader) - where - asResultAccumulator :: (ArrowChoice a, Monoid m) => a m m -> a m (Fallible m) - asResultAccumulator a = liftAsSuccess $ keepingTheValue a >>% mappend - --- -matchChildContent' :: (Monoid result) - => [ElementMatcher result] - -> OdtReaderSafe _x result -matchChildContent' ls = returnV mempty >>> matchContent' ls - --- -matchChildContent :: (Monoid result) - => [ElementMatcher result] - -> OdtReaderSafe (result, XML.Content) result - -> OdtReaderSafe _x result -matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback - --------------------------------------------- --- Matchers --------------------------------------------- - ----------------------- --- Basics ----------------------- - --- --- | Open Document allows several consecutive spaces if they are marked up -read_plain_text :: OdtReaderSafe (Inlines, XML.Content) Inlines -read_plain_text = fst ^&&& read_plain_text' >>% recover - where - -- fallible version - read_plain_text' :: OdtReader (Inlines, XML.Content) Inlines - read_plain_text' = ( second ( arr extractText ) - >>^ spreadChoice >>?! second text - ) - >>?% mappend - -- - extractText :: XML.Content -> Fallible T.Text - extractText (XML.Text cData) = succeedWith (XML.cdData cData) - extractText _ = failEmpty - -read_text_seq :: InlineMatcher -read_text_seq = matchingElement NsText "sequence" - $ matchChildContent [] read_plain_text - - --- specifically. I honor that, although the current implementation of 'mappend' --- for 'Inlines' in "Text.Pandoc.Builder" will collapse them again. --- The rational is to be prepared for future modifications. -read_spaces :: InlineMatcher -read_spaces = matchingElement NsText "s" ( - readAttrWithDefault NsText "c" 1 -- how many spaces? - >>^ fromList.(`replicate` Space) - ) --- -read_line_break :: InlineMatcher -read_line_break = matchingElement NsText "line-break" - $ returnV linebreak --- -read_tab :: InlineMatcher -read_tab = matchingElement NsText "tab" - $ returnV space --- -read_span :: InlineMatcher -read_span = matchingElement NsText "span" - $ withNewStyle - $ matchChildContent [ read_span - , read_spaces - , read_line_break - , read_tab - , read_link - , read_note - , read_citation - , read_bookmark - , read_bookmark_start - , read_reference_start - , read_bookmark_ref - , read_reference_ref - ] read_plain_text - --- -read_paragraph :: BlockMatcher -read_paragraph = matchingElement NsText "p" - $ constructPara - $ liftA para - $ withNewStyle - $ matchChildContent [ read_span - , read_spaces - , read_line_break - , read_tab - , read_link - , read_note - , read_citation - , read_bookmark - , read_bookmark_start - , read_reference_start - , read_bookmark_ref - , read_reference_ref - , read_frame - , read_text_seq - ] read_plain_text - - ----------------------- --- Headers ----------------------- - --- -read_header :: BlockMatcher -read_header = matchingElement NsText "h" - $ proc blocks -> do - level <- ( readAttrWithDefault NsText "outline-level" 1 - ) -< blocks - children <- ( matchChildContent [ read_span - , read_spaces - , read_line_break - , read_tab - , read_link - , read_note - , read_citation - , read_bookmark - , read_bookmark_start - , read_reference_start - , read_bookmark_ref - , read_reference_ref - , read_frame - ] read_plain_text - ) -< blocks - anchor <- getHeaderAnchor -< children - let idAttr = (anchor, [], []) -- no classes, no key-value pairs - arr (uncurry3 headerWith) -< (idAttr, level, children) - ----------------------- --- Lists ----------------------- - --- -read_list :: BlockMatcher -read_list = matchingElement NsText "list" --- $ withIncreasedListLevel - $ constructList --- $ liftA bulletList - $ matchChildContent' [ read_list_item - , read_list_header - ] --- -read_list_item :: ElementMatcher [Blocks] -read_list_item = read_list_element "list-item" - -read_list_header :: ElementMatcher [Blocks] -read_list_header = read_list_element "list-header" - -read_list_element :: ElementName -> ElementMatcher [Blocks] -read_list_element listElement = matchingElement NsText listElement - $ liftA (compactify.(:[])) - ( matchChildContent' [ read_paragraph - , read_header - , read_list - ] - ) - - ----------------------- --- Links ----------------------- - -read_link :: InlineMatcher -read_link = matchingElement NsText "a" - $ liftA3 link - ( findAttrTextWithDefault NsXLink "href" "" - >>> arr fixRelativeLink ) - ( findAttrTextWithDefault NsOffice "title" "" ) - ( matchChildContent [ read_span - , read_note - , read_citation - , read_bookmark - , read_bookmark_start - , read_reference_start - , read_bookmark_ref - , read_reference_ref - ] read_plain_text ) - -fixRelativeLink :: T.Text -> T.Text -fixRelativeLink uri = - case parseRelativeReference (T.unpack uri) of - Nothing -> uri - Just u -> - case uriPath u of - '.':'.':'/':xs -> tshow $ u{ uriPath = xs } - _ -> uri - -------------------------- --- Footnotes -------------------------- - -read_note :: InlineMatcher -read_note = matchingElement NsText "note" - $ liftA note - $ matchChildContent' [ read_note_body ] - -read_note_body :: BlockMatcher -read_note_body = matchingElement NsText "note-body" - $ matchChildContent' [ read_paragraph ] - -------------------------- --- Citations -------------------------- - -read_citation :: InlineMatcher -read_citation = matchingElement NsText "bibliography-mark" - $ liftA2 cite - ( liftA2 makeCitation - ( findAttrTextWithDefault NsText "identifier" "" ) - ( readAttrWithDefault NsText "number" 0 ) - ) - ( matchChildContent [] read_plain_text ) - where - makeCitation :: T.Text -> Int -> [Citation] - makeCitation citeId num = [Citation citeId [] [] NormalCitation num 0] - - ----------------------- --- Tables ----------------------- - --- -read_table :: BlockMatcher -read_table = matchingElement NsTable "table" - $ liftA simpleTable' - $ matchChildContent' [ read_table_row - ] - --- | A simple table without a caption or headers --- | Infers the number of headers from rows -simpleTable' :: [[Blocks]] -> Blocks -simpleTable' [] = simpleTable [] [] -simpleTable' (x : rest) = simpleTable (fmap (const defaults) x) (x : rest) - where defaults = fromList [] - --- -read_table_row :: ElementMatcher [[Blocks]] -read_table_row = matchingElement NsTable "table-row" - $ liftA (:[]) - $ matchChildContent' [ read_table_cell - ] - --- -read_table_cell :: ElementMatcher [Blocks] -read_table_cell = matchingElement NsTable "table-cell" - $ liftA (compactify.(:[])) - $ matchChildContent' [ read_paragraph - ] - ----------------------- --- Frames ----------------------- - --- -read_frame :: InlineMatcher -read_frame = matchingElement NsDraw "frame" - $ filterChildrenName' NsDraw (`elem` ["image", "object", "text-box"]) - >>> foldS read_frame_child - >>> arr fold - -read_frame_child :: OdtReaderSafe XML.Element (FirstMatch Inlines) -read_frame_child = - proc child -> case elName child of - "image" -> read_frame_img -< child - "object" -> read_frame_mathml -< child - "text-box" -> read_frame_text_box -< child - _ -> returnV mempty -< () - -read_frame_img :: OdtReaderSafe XML.Element (FirstMatch Inlines) -read_frame_img = - proc img -> do - src <- executeIn (findAttr' NsXLink "href") -< img - case fold src of - "" -> returnV mempty -< () - src' -> do - let exts = extensionsFromList [Ext_auto_identifiers] - resource <- lookupResource -< T.unpack src' - _ <- updateMediaWithResource -< resource - w <- findAttrText' NsSVG "width" -< () - h <- findAttrText' NsSVG "height" -< () - titleNodes <- matchChildContent' [ read_frame_title ] -< () - alt <- matchChildContent [] read_plain_text -< () - arr (firstMatch . uncurry4 imageWith) -< - (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt) - -read_frame_title :: InlineMatcher -read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text) - -image_attributes :: Maybe T.Text -> Maybe T.Text -> Attr -image_attributes x y = - ( "", [], dim "width" x ++ dim "height" y) - where - dim _ (Just "") = [] - dim name (Just v) = [(name, v)] - dim _ Nothing = [] - -read_frame_mathml :: OdtReaderSafe XML.Element (FirstMatch Inlines) -read_frame_mathml = - proc obj -> do - src <- executeIn (findAttr' NsXLink "href") -< obj - case fold src of - "" -> returnV mempty -< () - src' -> do - let path = T.unpack $ - fromMaybe src' (T.stripPrefix "./" src') <> "/content.xml" - (_, mathml) <- lookupResource -< path - case readMathML (UTF8.toText $ B.toStrict mathml) of - Left _ -> returnV mempty -< () - Right exps -> arr (firstMatch . displayMath . writeTeX) -< exps - -read_frame_text_box :: OdtReaderSafe XML.Element (FirstMatch Inlines) -read_frame_text_box = proc box -> do - paragraphs <- executeIn (matchChildContent' [ read_paragraph ]) -< box - arr read_img_with_caption -< toList paragraphs - -read_img_with_caption :: [Block] -> FirstMatch Inlines -read_img_with_caption (Para [Image attr alt (src,title)] : _) = - firstMatch $ singleton (Image attr alt (src, "fig:" <> title)) -- no text, default caption -read_img_with_caption (Para (Image attr _ (src,title) : txt) : _) = - firstMatch $ singleton (Image attr txt (src, "fig:" <> title) ) -- override caption with the text that follows -read_img_with_caption ( Para (_ : xs) : ys) = - read_img_with_caption (Para xs : ys) -read_img_with_caption _ = - mempty - ----------------------- --- Internal links ----------------------- - -_ANCHOR_PREFIX_ :: T.Text -_ANCHOR_PREFIX_ = "anchor" - --- -readAnchorAttr :: OdtReader _x Anchor -readAnchorAttr = findAttrText NsText "name" - --- | Beware: may fail -findAnchorName :: OdtReader AnchorPrefix Anchor -findAnchorName = ( keepingTheValue readAnchorAttr - >>^ spreadChoice - ) >>?! getPrettyAnchor - - --- -maybeAddAnchorFrom :: OdtReader Inlines AnchorPrefix - -> OdtReaderSafe Inlines Inlines -maybeAddAnchorFrom anchorReader = - keepingTheValue (anchorReader >>? findAnchorName >>?^ toAnchorElem) - >>> - proc (inlines, fAnchorElem) -> do - case fAnchorElem of - Right anchorElem -> returnA -< anchorElem - Left _ -> returnA -< inlines - where - toAnchorElem :: Anchor -> Inlines - toAnchorElem anchorID = spanWith (anchorID, [], []) mempty - -- no classes, no key-value pairs - --- -read_bookmark :: InlineMatcher -read_bookmark = matchingElement NsText "bookmark" - $ maybeAddAnchorFrom (liftAsSuccess $ returnV _ANCHOR_PREFIX_) - --- -read_bookmark_start :: InlineMatcher -read_bookmark_start = matchingElement NsText "bookmark-start" - $ maybeAddAnchorFrom (liftAsSuccess $ returnV _ANCHOR_PREFIX_) - --- -read_reference_start :: InlineMatcher -read_reference_start = matchingElement NsText "reference-mark-start" - $ maybeAddAnchorFrom readAnchorAttr - --- | Beware: may fail -findAnchorRef :: OdtReader _x Anchor -findAnchorRef = ( findAttrText NsText "ref-name" - >>?^ (_ANCHOR_PREFIX_,) - ) >>?! getPrettyAnchor - - --- -maybeInAnchorRef :: OdtReaderSafe Inlines Inlines -maybeInAnchorRef = proc inlines -> do - fRef <- findAnchorRef -< () - case fRef of - Right anchor -> - arr (toAnchorRef anchor) -<< inlines - Left _ -> returnA -< inlines - where - toAnchorRef :: Anchor -> Inlines -> Inlines - toAnchorRef anchor = link ("#" <> anchor) "" -- no title - --- -read_bookmark_ref :: InlineMatcher -read_bookmark_ref = matchingElement NsText "bookmark-ref" - $ maybeInAnchorRef - <<< matchChildContent [] read_plain_text - --- -read_reference_ref :: InlineMatcher -read_reference_ref = matchingElement NsText "reference-ref" - $ maybeInAnchorRef - <<< matchChildContent [] read_plain_text - - ----------------------- --- Entry point ----------------------- - -read_text :: OdtReaderSafe _x Pandoc -read_text = matchChildContent' [ read_header - , read_paragraph - , read_list - , read_table - ] - >>^ doc - -post_process :: Pandoc -> Pandoc -post_process (Pandoc m blocks) = - Pandoc m (post_process' blocks) - -post_process' :: [Block] -> [Block] -post_process' (Table attr _ specs th tb tf : Div ("", ["caption"], _) blks : xs) - = Table attr (Caption Nothing blks) specs th tb tf : post_process' xs -post_process' bs = bs - -read_body :: OdtReader _x (Pandoc, MediaBag) -read_body = executeInSub NsOffice "body" - $ executeInSub NsOffice "text" - $ liftAsSuccess - $ proc inlines -> do - txt <- read_text -< inlines - state <- getExtraState -< () - returnA -< (post_process txt, getMediaBag state) diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs deleted file mode 100644 index 1902259c4..000000000 --- a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs +++ /dev/null @@ -1,99 +0,0 @@ -{- | - Module : Text.Pandoc.Readers.Odt.Generic.Fallible - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann - Stability : alpha - Portability : portable - -Data types and utilities representing failure. Most of it is based on the -"Either" type in its usual configuration (left represents failure). - -In most cases, the failure type is implied or required to be a "Monoid". - -The choice of "Either" instead of a custom type makes it easier to write -compatible instances of "ArrowChoice". --} - --- We export everything -module Text.Pandoc.Readers.Odt.Generic.Fallible where - --- | Default for now. Will probably become a class at some point. -type Failure = () - -type Fallible a = Either Failure a - - --- -maybeToEither :: Maybe a -> Fallible a -maybeToEither (Just a) = Right a -maybeToEither Nothing = Left () - --- -eitherToMaybe :: Either _l a -> Maybe a -eitherToMaybe (Left _) = Nothing -eitherToMaybe (Right a) = Just a - --- | > recover a === either (const a) id -recover :: a -> Either _f a -> a -recover a (Left _) = a -recover _ (Right a) = a - --- | I would love to use 'fail'. Alas, 'Monad.fail'... -failWith :: failure -> Either failure _x -failWith f = Left f - --- -failEmpty :: (Monoid failure) => Either failure _x -failEmpty = failWith mempty - --- -succeedWith :: a -> Either _x a -succeedWith = Right - --- -collapseEither :: Either failure (Either failure x) - -> Either failure x -collapseEither (Left f ) = Left f -collapseEither (Right (Left f)) = Left f -collapseEither (Right (Right x)) = Right x - --- | If either of the values represents a non-error, the result is a --- (possibly combined) non-error. If both values represent an error, an error --- is returned. -chooseMax :: (Monoid a, Monoid b) => Either a b -> Either a b -> Either a b -chooseMax = chooseMaxWith mappend - --- | If either of the values represents a non-error, the result is a --- (possibly combined) non-error. If both values represent an error, an error --- is returned. -chooseMaxWith :: (Monoid a) => (b -> b -> b) - -> Either a b - -> Either a b - -> Either a b -chooseMaxWith (><) (Right a) (Right b) = Right $ a >< b -chooseMaxWith _ (Left a) (Left b) = Left $ a `mappend` b -chooseMaxWith _ (Right a) _ = Right a -chooseMaxWith _ _ (Right b) = Right b - - --- | Class of containers that can escalate contained 'Either's. --- The word "Vector" is meant in the sense of a disease transmitter. -class ChoiceVector v where - spreadChoice :: v (Either f a) -> Either f (v a) - -instance ChoiceVector ((,) a) where - spreadChoice (_, Left f) = Left f - spreadChoice (x, Right y) = Right (x,y) - -- Wasn't there a newtype somewhere with the elements flipped? - --- | Wrapper for a list. While the normal list instance of 'ChoiceVector' --- fails whenever it can, this type will never fail. -newtype SuccessList a = SuccessList { collectNonFailing :: [a] } - deriving ( Eq, Ord, Show ) - -instance ChoiceVector SuccessList where - spreadChoice = Right . SuccessList . foldr unTagRight [] . collectNonFailing - where unTagRight (Right x) = (x:) - unTagRight _ = id diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs deleted file mode 100644 index 78a7fc0b2..000000000 --- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs +++ /dev/null @@ -1,45 +0,0 @@ -{- | - Module : Text.Pandoc.Readers.Odt.Generic.Namespaces - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann - Stability : alpha - Portability : portable - -A class containing a set of namespace identifiers. Used to convert between -typesafe Haskell namespace identifiers and unsafe "real world" namespaces. --} - -module Text.Pandoc.Readers.Odt.Generic.Namespaces where - -import qualified Data.Map as M -import Data.Text (Text) - --- -type NameSpaceIRI = Text - --- -type NameSpaceIRIs nsID = M.Map nsID NameSpaceIRI - --- -class (Eq nsID, Ord nsID) => NameSpaceID nsID where - - -- | Given a IRI, possibly update the map and return the id of the namespace. - -- May fail if the namespace is unknown and the application does not - -- allow unknown namespaces. - getNamespaceID :: NameSpaceIRI - -> NameSpaceIRIs nsID - -> Maybe (NameSpaceIRIs nsID, nsID) - -- | Given a namespace id, lookup its IRI. May be overridden for performance. - getIRI :: nsID - -> NameSpaceIRIs nsID - -> Maybe NameSpaceIRI - -- | The root element of an XML document has a namespace, too, and the - -- "XML.Light-parser" is eager to remove the corresponding namespace - -- attribute. - -- As a result, at least this root namespace must be provided. - getInitialIRImap :: NameSpaceIRIs nsID - - getIRI = M.lookup - getInitialIRImap = M.empty diff --git a/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs b/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs deleted file mode 100644 index 0e4fa0990..000000000 --- a/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs +++ /dev/null @@ -1,30 +0,0 @@ -{- | - Module : Text.Pandoc.Readers.Odt.Generic.SetMap - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann - Stability : alpha - Portability : portable - -A map of values to sets of values. --} - -module Text.Pandoc.Readers.Odt.Generic.SetMap where - -import qualified Data.Map as M -import qualified Data.Set as S - -type SetMap k v = M.Map k (S.Set v) - -empty :: SetMap k v -empty = M.empty - -fromList :: (Ord k, Ord v) => [(k,v)] -> SetMap k v -fromList = foldr (uncurry insert) empty - -insert :: (Ord k, Ord v) => k -> v -> SetMap k v -> SetMap k v -insert key value setMap = M.insertWith S.union key (S.singleton value) setMap - -union3 :: (Ord k) => SetMap k v -> SetMap k v -> SetMap k v -> SetMap k v -union3 sm1 sm2 sm3 = sm1 `M.union` sm2 `M.union` sm3 diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs deleted file mode 100644 index edefe3c70..000000000 --- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs +++ /dev/null @@ -1,116 +0,0 @@ -{-# LANGUAGE ViewPatterns #-} -{- | - Module : Text.Pandoc.Reader.Odt.Generic.Utils - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann - Stability : alpha - Portability : portable - -General utility functions for the odt reader. --} - -module Text.Pandoc.Readers.Odt.Generic.Utils -( uncurry3 -, uncurry4 -, uncurry5 -, uncurry6 -, swap -, reverseComposition -, tryToRead -, Lookupable(..) -, readLookupable -, readPercent -, findBy -, swing -, composition -) where - -import Control.Category (Category, (<<<), (>>>)) -import qualified Control.Category as Cat (id) -import Data.Char (isSpace) -import qualified Data.Foldable as F (Foldable, foldr) -import Data.Maybe -import Data.Text (Text) -import qualified Data.Text as T - --- | Equivalent to --- > foldr (.) id --- where '(.)' are 'id' are the ones from "Control.Category" --- and 'foldr' is the one from "Data.Foldable". --- The noun-form was chosen to be consistent with 'sum', 'product' etc --- based on the discussion at --- --- (that I was not part of) -composition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a -composition = F.foldr (<<<) Cat.id - --- | Equivalent to --- > foldr (flip (.)) id --- where '(.)' are 'id' are the ones from "Control.Category" --- and 'foldr' is the one from "Data.Foldable". --- A reversed version of 'composition'. -reverseComposition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a -reverseComposition = F.foldr (>>>) Cat.id - --- | This function often makes it possible to switch values with the functions --- that are applied to them. --- --- Examples: --- > swing map :: [a -> b] -> a -> [b] --- > swing any :: [a -> Bool] -> a -> Bool --- > swing foldr :: b -> a -> [a -> b -> b] -> b --- > swing scanr :: c -> a -> [a -> c -> c] -> c --- > swing zipWith :: [a -> b -> c] -> a -> [b] -> [c] --- > swing find :: [a -> Bool] -> a -> Maybe (a -> Bool) --- --- Stolen from -swing :: (((a -> b) -> b) -> c -> d) -> c -> a -> d -swing = flip.(.flip id) --- swing f c a = f ($ a) c - - --- | Alternative to 'read'/'reads'. The former of these throws errors --- (nobody wants that) while the latter returns "to much" for simple purposes. --- This function instead applies 'reads' and returns the first match (if any) --- in a 'Maybe'. -tryToRead :: (Read r) => Text -> Maybe r -tryToRead = (reads . T.unpack) >>> listToMaybe >>> fmap fst - --- | A version of 'reads' that requires a '%' sign after the number -readPercent :: ReadS Int -readPercent s = [ (i,s') | (i , r ) <- reads s - , ("%" , s') <- lex r - ] - --- | Data that can be looked up. --- This is mostly a utility to read data with kind *. -class Lookupable a where - lookupTable :: [(Text, a)] - --- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer. -readLookupable :: (Lookupable a) => Text -> Maybe a -readLookupable s = - lookup (T.takeWhile (not . isSpace) $ T.dropWhile isSpace s) lookupTable - -uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z -uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z -uncurry5 :: (a->b->c->d->e -> z) -> (a,b,c,d,e ) -> z -uncurry6 :: (a->b->c->d->e->f -> z) -> (a,b,c,d,e,f ) -> z - -uncurry3 fun (a,b,c ) = fun a b c -uncurry4 fun (a,b,c,d ) = fun a b c d -uncurry5 fun (a,b,c,d,e ) = fun a b c d e -uncurry6 fun (a,b,c,d,e,f ) = fun a b c d e f - -swap :: (a,b) -> (b,a) -swap (a,b) = (b,a) - --- | A version of "Data.List.find" that uses a converter to a Maybe instance. --- The returned value is the first which the converter returns in a 'Just' --- wrapper. -findBy :: (a -> Maybe b) -> [a] -> Maybe b -findBy _ [] = Nothing -findBy f ((f -> Just x):_ ) = Just x -findBy f ( _:xs) = findBy f xs diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs deleted file mode 100644 index 551279cf6..000000000 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ /dev/null @@ -1,775 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE GADTs #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE PatternGuards #-} -{- | - Module : Text.Pandoc.Readers.Odt.Generic.XMLConverter - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann - Stability : alpha - Portability : portable - -A generalized XML parser based on stateful arrows. -It might be sufficient to define this reader as a comonad, but there is -not a lot of use in trying. --} - -module Text.Pandoc.Readers.Odt.Generic.XMLConverter -( ElementName -, XMLConverterState -, XMLConverter -, FallibleXMLConverter -, runConverter' -, getExtraState -, setExtraState -, modifyExtraState -, producingExtraState -, findChild' -, filterChildrenName' -, isSet' -, isSetWithDefault -, elName -, searchAttr -, lookupAttr -, lookupAttr' -, lookupDefaultingAttr -, findAttr' -, findAttrText' -, findAttr -, findAttrText -, findAttrTextWithDefault -, readAttr -, readAttr' -, readAttrWithDefault -, getAttr -, executeIn -, executeInSub -, withEveryL -, tryAll -, matchContent' -, matchContent -) where - -import Prelude hiding (Applicative(..)) -import Control.Applicative hiding ( liftA, liftA2 ) -import Control.Monad ( MonadPlus ) -import Control.Arrow - -import Data.Bool ( bool ) -import Data.Either ( rights ) -import qualified Data.Map as M -import Data.Text (Text) -import Data.Default -import Data.Maybe -import Data.List (foldl') - -import qualified Text.Pandoc.XML.Light as XML - -import Text.Pandoc.Readers.Odt.Arrows.State -import Text.Pandoc.Readers.Odt.Arrows.Utils -import Text.Pandoc.Readers.Odt.Generic.Namespaces -import Text.Pandoc.Readers.Odt.Generic.Utils -import Text.Pandoc.Readers.Odt.Generic.Fallible - --------------------------------------------------------------------------------- --- Basis types for readability --------------------------------------------------------------------------------- - --- -type ElementName = Text -type AttributeName = Text -type AttributeValue = Text -type TextAttributeValue = Text - --- -type NameSpacePrefix = Text - --- -type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix - --------------------------------------------------------------------------------- --- Main converter state --------------------------------------------------------------------------------- - --- GADT so some of the NameSpaceID restrictions can be deduced -data XMLConverterState nsID extraState where - XMLConverterState :: NameSpaceID nsID - => { -- | A stack of parent elements. The top element is the current one. - -- Arguably, a real Zipper would be better. But that is an - -- optimization that can be made at a later time, e.g. when - -- replacing Text.XML.Light. - parentElements :: [XML.Element] - -- | A map from internal namespace IDs to the namespace prefixes - -- used in XML elements - , namespacePrefixes :: NameSpacePrefixes nsID - -- | A map from internal namespace IDs to namespace IRIs - -- (Only necessary for matching namespace IDs and prefixes) - , namespaceIRIs :: NameSpaceIRIs nsID - -- | A place to put "something else". This feature is used heavily - -- to keep the main code cleaner. More specifically, the main reader - -- is divided into different stages. Each stage lifts something up - -- here, which the next stage can then use. This could of course be - -- generalized to a state-tree or used for the namespace IRIs. The - -- border between states and values is an imaginary one, after all. - -- But the separation as it is seems to be enough for now. - , moreState :: extraState - } - -> XMLConverterState nsID extraState - --- -createStartState :: (NameSpaceID nsID) - => XML.Element - -> extraState - -> XMLConverterState nsID extraState -createStartState element extraState = - XMLConverterState - { parentElements = [element] - , namespacePrefixes = M.empty - , namespaceIRIs = getInitialIRImap - , moreState = extraState - } - --- | Functor over extra state -instance Functor (XMLConverterState nsID) where - fmap f ( XMLConverterState parents prefixes iRIs extraState ) - = XMLConverterState parents prefixes iRIs (f extraState) - --- -replaceExtraState :: extraState - -> XMLConverterState nsID _x - -> XMLConverterState nsID extraState -replaceExtraState x s - = fmap (const x) s - --- -currentElement :: XMLConverterState nsID extraState - -> XML.Element -currentElement state = head (parentElements state) - --- | Replace the current position by another, modifying the extra state --- in the process -swapStack' :: XMLConverterState nsID extraState - -> [XML.Element] - -> ( XMLConverterState nsID extraState , [XML.Element] ) -swapStack' state stack - = ( state { parentElements = stack } - , parentElements state - ) - --- -pushElement :: XML.Element - -> XMLConverterState nsID extraState - -> XMLConverterState nsID extraState -pushElement e state = state { parentElements = e:parentElements state } - --- | Pop the top element from the call stack, unless it is the last one. -popElement :: XMLConverterState nsID extraState - -> Maybe (XMLConverterState nsID extraState) -popElement state - | _:es@(_:_) <- parentElements state = Just $ state { parentElements = es } - | otherwise = Nothing - --------------------------------------------------------------------------------- --- Main type --------------------------------------------------------------------------------- - --- It might be a good idea to pack the converters in a GADT --- Downside: data instead of type --- Upside: 'Failure' could be made a parameter as well. - --- -type XMLConverter nsID extraState input output - = ArrowState (XMLConverterState nsID extraState ) input output - -type FallibleXMLConverter nsID extraState input output - = XMLConverter nsID extraState input (Fallible output) - --- -runConverter :: XMLConverter nsID extraState input output - -> XMLConverterState nsID extraState - -> input - -> output -runConverter converter state input = snd $ runArrowState converter (state,input) - -runConverter' :: (NameSpaceID nsID) - => FallibleXMLConverter nsID extraState () success - -> extraState - -> XML.Element - -> Fallible success -runConverter' converter extraState element = runConverter (readNSattributes >>? converter) (createStartState element extraState) () - --- -getCurrentElement :: XMLConverter nsID extraState x XML.Element -getCurrentElement = extractFromState currentElement - --- -getExtraState :: XMLConverter nsID extraState x extraState -getExtraState = extractFromState moreState - --- -setExtraState :: XMLConverter nsID extraState extraState extraState -setExtraState = withState $ \state extra - -> (replaceExtraState extra state , extra) - - --- | Lifts a function to the extra state. -modifyExtraState :: (extraState -> extraState) - -> XMLConverter nsID extraState x x -modifyExtraState = modifyState.fmap - - --- | First sets the extra state to the new value. Then modifies the original --- extra state with a converter that uses the new state. Finally, the --- intermediate state is dropped and the extra state is lifted into the --- state as it was at the beginning of the function. --- As a result, exactly the extra state and nothing else is changed. --- The resulting converter even behaves like an identity converter on the --- value level. --- --- (The -ing form is meant to be mnemonic in a sequence of arrows as in --- convertingExtraState () converter >>> doOtherStuff) --- -convertingExtraState :: extraState' - -> FallibleXMLConverter nsID extraState' extraState extraState - -> FallibleXMLConverter nsID extraState x x -convertingExtraState v a = withSubStateF setVAsExtraState modifyWithA - where - setVAsExtraState = liftAsSuccess $ extractFromState id >>^ replaceExtraState v - modifyWithA = keepingTheValue (moreState ^>> a) - >>^ spreadChoice >>?% flip replaceExtraState - --- | First sets the extra state to the new value. Then produces a new --- extra state with a converter that uses the new state. Finally, the --- intermediate state is dropped and the extra state is lifted into the --- state as it was at the beginning of the function. --- As a result, exactly the extra state and nothing else is changed. --- The resulting converter even behaves like an identity converter on the --- value level. --- --- Equivalent to --- --- > \v x a -> convertingExtraState v (returnV x >>> a) --- --- (The -ing form is meant to be mnemonic in a sequence of arrows as in --- producingExtraState () () producer >>> doOtherStuff) --- -producingExtraState :: extraState' - -> a - -> FallibleXMLConverter nsID extraState' a extraState - -> FallibleXMLConverter nsID extraState x x -producingExtraState v x a = convertingExtraState v (returnV x >>> a) - - --------------------------------------------------------------------------------- --- Work in namespaces --------------------------------------------------------------------------------- - --- | Arrow version of 'getIRI' -lookupNSiri :: (NameSpaceID nsID) - => nsID - -> XMLConverter nsID extraState x (Maybe NameSpaceIRI) -lookupNSiri nsID = extractFromState - $ \state -> getIRI nsID $ namespaceIRIs state - --- -lookupNSprefix :: (NameSpaceID nsID) - => nsID - -> XMLConverter nsID extraState x (Maybe NameSpacePrefix) -lookupNSprefix nsID = extractFromState - $ \state -> M.lookup nsID $ namespacePrefixes state - --- | Extracts namespace attributes from the current element and tries to --- update the current mapping accordingly -readNSattributes :: (NameSpaceID nsID) - => FallibleXMLConverter nsID extraState x () -readNSattributes = fromState $ \state -> maybe (state, failEmpty ) - ( , succeedWith ()) - (extractNSAttrs state ) - where - extractNSAttrs :: (NameSpaceID nsID) - => XMLConverterState nsID extraState - -> Maybe (XMLConverterState nsID extraState) - extractNSAttrs startState - = foldl' (\state d -> state >>= addNS d) - (Just startState) - nsAttribs - where nsAttribs = mapMaybe readNSattr (XML.elAttribs element) - element = currentElement startState - readNSattr (XML.Attr (XML.QName name _ (Just "xmlns")) iri) - = Just (name, iri) - readNSattr _ = Nothing - addNS (prefix, iri) state = fmap updateState - $ getNamespaceID iri - $ namespaceIRIs state - where updateState (iris,nsID) - = state { namespaceIRIs = iris - , namespacePrefixes = M.insert nsID prefix - $ namespacePrefixes state - } - --------------------------------------------------------------------------------- --- Common namespace accessors --------------------------------------------------------------------------------- - --- | Given a namespace id and an element name, creates a 'XML.QName' for --- internal use -qualifyName :: (NameSpaceID nsID) - => nsID -> ElementName - -> XMLConverter nsID extraState x XML.QName -qualifyName nsID name = lookupNSiri nsID - &&& lookupNSprefix nsID - >>% XML.QName name - --- | Checks if a given element matches both a specified namespace id --- and a predicate -elemNameMatches :: (NameSpaceID nsID) - => nsID -> (ElementName -> Bool) - -> XMLConverter nsID extraState XML.Element Bool -elemNameMatches nsID f = keepingTheValue (lookupNSiri nsID) >>% hasMatchingName - where hasMatchingName e iri = let name = XML.elName e - in f (XML.qName name) - && XML.qURI name == iri - --- | Checks if a given element matches both a specified namespace id --- and a specified element name -elemNameIs :: (NameSpaceID nsID) - => nsID -> ElementName - -> XMLConverter nsID extraState XML.Element Bool -elemNameIs nsID name = elemNameMatches nsID (== name) - --------------------------------------------------------------------------------- --- General content --------------------------------------------------------------------------------- - -elName :: XML.Element -> ElementName -elName = XML.qName . XML.elName - --- -elContent :: XMLConverter nsID extraState x [XML.Content] -elContent = getCurrentElement - >>^ XML.elContent - --------------------------------------------------------------------------------- --- Children --------------------------------------------------------------------------------- - --- --- -findChildren :: (NameSpaceID nsID) - => nsID -> ElementName - -> XMLConverter nsID extraState x [XML.Element] -findChildren nsID name = qualifyName nsID name - &&& getCurrentElement - >>% XML.findChildren - --- -findChild' :: (NameSpaceID nsID) - => nsID - -> ElementName - -> XMLConverter nsID extraState x (Maybe XML.Element) -findChild' nsID name = qualifyName nsID name - &&& getCurrentElement - >>% XML.findChild - --- -findChild :: (NameSpaceID nsID) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState x XML.Element -findChild nsID name = findChild' nsID name - >>> maybeToChoice - -filterChildrenName' :: (NameSpaceID nsID) - => nsID - -> (ElementName -> Bool) - -> XMLConverter nsID extraState x [XML.Element] -filterChildrenName' nsID f = getCurrentElement - >>> arr XML.elChildren - >>> iterateS (keepingTheValue (elemNameMatches nsID f)) - >>> arr (map fst . filter snd) - --------------------------------------------------------------------------------- --- Attributes --------------------------------------------------------------------------------- - --- -isSet' :: (NameSpaceID nsID) - => nsID -> AttributeName - -> XMLConverter nsID extraState x (Maybe Bool) -isSet' nsID attrName = findAttr' nsID attrName - >>^ (>>= stringToBool') - -isSetWithDefault :: (NameSpaceID nsID) - => nsID -> AttributeName - -> Bool - -> XMLConverter nsID extraState x Bool -isSetWithDefault nsID attrName def' - = isSet' nsID attrName - >>^ fromMaybe def' - --- | Lookup value in a dictionary, fail if no attribute found or value --- not in dictionary -searchAttrIn :: (NameSpaceID nsID) - => nsID -> AttributeName - -> [(AttributeValue,a)] - -> FallibleXMLConverter nsID extraState x a -searchAttrIn nsID attrName dict - = findAttr nsID attrName - >>?^? maybeToChoice.(`lookup` dict ) - --- | Lookup value in a dictionary. If attribute or value not found, --- return default value -searchAttr :: (NameSpaceID nsID) - => nsID -> AttributeName - -> a - -> [(AttributeValue,a)] - -> XMLConverter nsID extraState x a -searchAttr nsID attrName defV dict - = searchAttrIn nsID attrName dict - >>> const defV ^|||^ id - --- | Read a 'Lookupable' attribute. Fail if no match. -lookupAttr :: (NameSpaceID nsID, Lookupable a) - => nsID -> AttributeName - -> FallibleXMLConverter nsID extraState x a -lookupAttr nsID attrName = lookupAttr' nsID attrName - >>^ maybeToChoice - - --- | Read a 'Lookupable' attribute. Return the result as a 'Maybe'. -lookupAttr' :: (NameSpaceID nsID, Lookupable a) - => nsID -> AttributeName - -> XMLConverter nsID extraState x (Maybe a) -lookupAttr' nsID attrName - = findAttr' nsID attrName - >>^ (>>= readLookupable) - --- | Read a 'Lookupable' attribute with explicit default -lookupAttrWithDefault :: (NameSpaceID nsID, Lookupable a) - => nsID -> AttributeName - -> a - -> XMLConverter nsID extraState x a -lookupAttrWithDefault nsID attrName deflt - = lookupAttr' nsID attrName - >>^ fromMaybe deflt - --- | Read a 'Lookupable' attribute with implicit default -lookupDefaultingAttr :: (NameSpaceID nsID, Lookupable a, Default a) - => nsID -> AttributeName - -> XMLConverter nsID extraState x a -lookupDefaultingAttr nsID attrName - = lookupAttrWithDefault nsID attrName def - --- | Return value as a (Maybe Text) -findAttr' :: (NameSpaceID nsID) - => nsID -> AttributeName - -> XMLConverter nsID extraState x (Maybe AttributeValue) -findAttr' nsID attrName = qualifyName nsID attrName - &&& getCurrentElement - >>% XML.findAttr - --- | Return value as a (Maybe Text) -findAttrText' :: (NameSpaceID nsID) - => nsID -> AttributeName - -> XMLConverter nsID extraState x (Maybe TextAttributeValue) -findAttrText' nsID attrName - = qualifyName nsID attrName - &&& getCurrentElement - >>% XML.findAttr - --- | Return value as string or fail -findAttr :: (NameSpaceID nsID) - => nsID -> AttributeName - -> FallibleXMLConverter nsID extraState x AttributeValue -findAttr nsID attrName = findAttr' nsID attrName - >>> maybeToChoice - --- | Return value as text or fail -findAttrText :: (NameSpaceID nsID) - => nsID -> AttributeName - -> FallibleXMLConverter nsID extraState x TextAttributeValue -findAttrText nsID attrName - = findAttr' nsID attrName - >>> maybeToChoice - --- | Return value as string or return provided default value -findAttrTextWithDefault :: (NameSpaceID nsID) - => nsID -> AttributeName - -> TextAttributeValue - -> XMLConverter nsID extraState x TextAttributeValue -findAttrTextWithDefault nsID attrName deflt - = findAttr' nsID attrName - >>^ fromMaybe deflt - --- | Read and return value or fail -readAttr :: (NameSpaceID nsID, Read attrValue) - => nsID -> AttributeName - -> FallibleXMLConverter nsID extraState x attrValue -readAttr nsID attrName = readAttr' nsID attrName - >>> maybeToChoice - --- | Read and return value or return Nothing -readAttr' :: (NameSpaceID nsID, Read attrValue) - => nsID -> AttributeName - -> XMLConverter nsID extraState x (Maybe attrValue) -readAttr' nsID attrName = findAttr' nsID attrName - >>^ (>>= tryToRead) - --- | Read and return value or return provided default value -readAttrWithDefault :: (NameSpaceID nsID, Read attrValue) - => nsID -> AttributeName - -> attrValue - -> XMLConverter nsID extraState x attrValue -readAttrWithDefault nsID attrName deflt - = findAttr' nsID attrName - >>^ (>>= tryToRead) - >>^ fromMaybe deflt - --- | Read and return value or return default value from 'Default' instance -getAttr :: (NameSpaceID nsID, Read attrValue, Default attrValue) - => nsID -> AttributeName - -> XMLConverter nsID extraState x attrValue -getAttr nsID attrName = readAttrWithDefault nsID attrName def - --------------------------------------------------------------------------------- --- Movements --------------------------------------------------------------------------------- - --- -jumpThere :: XMLConverter nsID extraState XML.Element XML.Element -jumpThere = withState (\state element - -> ( pushElement element state , element ) - ) - --- -swapStack :: XMLConverter nsID extraState [XML.Element] [XML.Element] -swapStack = withState swapStack' - --- -jumpBack :: FallibleXMLConverter nsID extraState _x _x -jumpBack = tryModifyState (popElement >>> maybeToChoice) - --- | Support function for "procedural" converters: jump to an element, execute --- a converter, jump back. --- This version is safer than 'executeThere', because it does not rely on the --- internal stack. As a result, the converter can not move around in arbitrary --- ways. The downside is of course that some of the environment is not --- accessible to the converter. -switchingTheStack :: XMLConverter nsID moreState a b - -> XMLConverter nsID moreState (a, XML.Element) b -switchingTheStack a = second ( (:[]) ^>> swapStack ) - >>> first a - >>> second swapStack - >>^ fst - --- | Support function for "procedural" converters: jumps to an element, executes --- a converter, jumps back. --- Make sure that the converter is well-behaved; that is it should --- return to the exact position it started from in /every possible path/ of --- execution, even if it "fails". If it does not, you may encounter --- strange bugs. If you are not sure about the behaviour or want to use --- shortcuts, you can often use 'switchingTheStack' instead. -executeThere :: FallibleXMLConverter nsID moreState a b - -> FallibleXMLConverter nsID moreState (a, XML.Element) b -executeThere a = second jumpThere - >>> fst - ^>> a - >>> jumpBack -- >>? jumpBack would not ensure the jump. - >>^ collapseEither - - --- | Do something in a specific element, then come back -executeIn :: XMLConverter nsID extraState XML.Element s - -> XMLConverter nsID extraState XML.Element s -executeIn a = duplicate >>> switchingTheStack a - --- | Do something in a sub-element, then come back -executeInSub :: (NameSpaceID nsID) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState f s - -> FallibleXMLConverter nsID extraState f s -executeInSub nsID name a = keepingTheValue - (findChild nsID name) - >>> ignoringState liftFailure - >>? switchingTheStack a - where liftFailure (_, Left f) = Left f - liftFailure (x, Right e) = Right (x, e) - --------------------------------------------------------------------------------- --- Iterating over children --------------------------------------------------------------------------------- - --- Helper converter to prepare different types of iterations. --- It lifts the children (of a certain type) of the current element --- into the value level and pairs each one with the current input value. -prepareIteration :: (NameSpaceID nsID) - => nsID -> ElementName - -> XMLConverter nsID extraState b [(b, XML.Element)] -prepareIteration nsID name = keepingTheValue - (findChildren nsID name) - >>% distributeValue - --- -withEveryL :: (NameSpaceID nsID) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState a b - -> FallibleXMLConverter nsID extraState a [b] -withEveryL = withEvery - --- | Applies a converter to every child element of a specific type. --- Collects results in a 'MonadPlus'. --- Fails completely if any conversion fails. -withEvery :: (NameSpaceID nsID, MonadPlus m) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState a b - -> FallibleXMLConverter nsID extraState a (m b) -withEvery nsID name a = prepareIteration nsID name - >>> iterateS' (switchingTheStack a) - --- | Applies a converter to every child element of a specific type. --- Collects all successful results in a list. -tryAll :: (NameSpaceID nsID) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState b a - -> XMLConverter nsID extraState b [a] -tryAll nsID name a = prepareIteration nsID name - >>> iterateS (switchingTheStack a) - >>^ rights - --------------------------------------------------------------------------------- --- Matching children --------------------------------------------------------------------------------- - -type IdXMLConverter nsID moreState x - = XMLConverter nsID moreState x x - -type MaybeCConverter nsID moreState x - = Maybe (IdXMLConverter nsID moreState (x, XML.Content)) - --- Chainable converter that helps deciding which converter to actually use. -type ContentMatchConverter nsID extraState x - = IdXMLConverter nsID - extraState - (MaybeCConverter nsID extraState x, XML.Content) - --- Helper function: The @c@ is actually a converter that is to be selected by --- matching XML content to the first two parameters. --- The fold used to match elements however is very simple, so to use it, --- this function wraps the converter in another converter that unifies --- the accumulator. Think of a lot of converters with the resulting type --- chained together. The accumulator not only transports the element --- unchanged to the next matcher, it also does the actual selecting by --- combining the intermediate results with '(<|>)'. -makeMatcherC :: (NameSpaceID nsID) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState a a - -> ContentMatchConverter nsID extraState a -makeMatcherC nsID name c = ( second ( contentToElem - >>> returnV Nothing - ||| ( elemNameIs nsID name - >>^ bool Nothing (Just cWithJump) - ) - ) - >>% (<|>) - ) &&&^ snd - where cWithJump = ( fst - ^&&& ( second contentToElem - >>> spreadChoice - ^>>? executeThere c - ) - >>% recover) - &&&^ snd - contentToElem :: FallibleXMLConverter nsID extraState XML.Content XML.Element - contentToElem = arr $ \case - XML.Elem e' -> succeedWith e' - _ -> failEmpty - --- Creates and chains a bunch of matchers -prepareMatchersC :: (NameSpaceID nsID) - => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)] - -> ContentMatchConverter nsID extraState x ---prepareMatchersC = foldSs . (map $ uncurry3 makeMatcherC) -prepareMatchersC = reverseComposition . map (uncurry3 makeMatcherC) - --- | Takes a list of element-data - converter groups and --- * Finds all content of the current element --- * Matches each group to each piece of content in order --- (at most one group per piece of content) --- * Filters non-matched content --- * Chains all found converters in content-order --- * Applies the chain to the input element -matchContent' :: (NameSpaceID nsID) - => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] - -> XMLConverter nsID extraState a a -matchContent' lookups = matchContent lookups (arr fst) - --- | Takes a list of element-data - converter groups and --- * Finds all content of the current element --- * Matches each group to each piece of content in order --- (at most one group per piece of content) --- * Adds a default converter for all non-matched content --- * Chains all found converters in content-order --- * Applies the chain to the input element -matchContent :: (NameSpaceID nsID) - => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] - -> XMLConverter nsID extraState (a,XML.Content) a - -> XMLConverter nsID extraState a a -matchContent lookups fallback - = let matcher = prepareMatchersC lookups - in keepingTheValue ( - elContent - >>> map (Nothing,) - ^>> iterateSL matcher - >>^ map swallowOrFallback - -- >>> foldSs - >>> reverseComposition - ) - >>> swap - ^>> app - where - -- let the converter swallow the content and drop the content - -- in the return value - swallowOrFallback (Just converter,content) = (,content) ^>> converter >>^ fst - swallowOrFallback (Nothing ,content) = (,content) ^>> fallback - --------------------------------------------------------------------------------- --- Internals --------------------------------------------------------------------------------- - -stringToBool' :: Text -> Maybe Bool -stringToBool' val | val `elem` trueValues = Just True - | val `elem` falseValues = Just False - | otherwise = Nothing - where trueValues = ["true" ,"on" ,"1"] - falseValues = ["false","off","0"] - - -distributeValue :: a -> [b] -> [(a,b)] -distributeValue = map.(,) - --------------------------------------------------------------------------------- - -{- -NOTES -It might be a good idea to refactor the namespace stuff. -E.g.: if a namespace constructor took a string as a parameter, things like -> a ?>/< (NsText,"body") -would be nicer. -Together with a rename and some trickery, something like -> |< NsText "body" >< NsText "p" ?> a | -might even be possible. - -Some day, XML.Light should be replaced by something better. -While doing that, it might be useful to replace String as the type of element -names with something else, too. (Of course with OverloadedStrings). -While doing that, maybe the types can be created in a way that something like -> NsText:"body" -could be used. Overloading (:) does not sounds like the best idea, but if the -element name type was a list, this might be possible. -Of course that would be a bit hackish, so the "right" way would probably be -something like -> InNS NsText "body" -but isn't that a bit boring? ;) --} diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs deleted file mode 100644 index 70741c28d..000000000 --- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs +++ /dev/null @@ -1,93 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- | - Module : Text.Pandoc.Reader.Odt.Namespaces - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann - Stability : alpha - Portability : portable - -Namespaces used in odt files. --} - -module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..) - ) where - -import qualified Data.Map as M (empty, insert) -import Data.Maybe (fromMaybe, listToMaybe) -import Data.Text (Text) -import qualified Data.Text as T -import Text.Pandoc.Readers.Odt.Generic.Namespaces - - -instance NameSpaceID Namespace where - - getInitialIRImap = nsIDmap - - getNamespaceID "" m = Just(m, NsXML) - getNamespaceID iri m = asPair $ fromMaybe (NsOther iri) (findID iri) - where asPair nsID = Just (M.insert nsID iri m, nsID) - - -findID :: NameSpaceIRI -> Maybe Namespace -findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `T.isPrefixOf` iri] - -nsIDmap :: NameSpaceIRIs Namespace -nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs - -data Namespace = -- Open Document core - NsOffice | NsStyle | NsText | NsTable | NsForm - | NsDraw | Ns3D | NsAnim | NsChart | NsConfig - | NsDB | NsMeta | NsNumber | NsScript | NsManifest - | NsPresentation - -- Metadata - | NsODF - -- Compatible elements - | NsXSL_FO | NsSVG | NsSmil - -- External standards - | NsMathML | NsXForms | NsXLink | NsXHtml | NsGRDDL - | NsDublinCore - -- Metadata manifest - | NsPKG - -- Others - | NsOpenFormula - -- Core XML (basically only for the 'id'-attribute) - | NsXML - -- Fallback - | NsOther Text - deriving ( Eq, Ord, Show ) - --- | Not the actual iri's, but large prefixes of them - this way there are --- less versioning problems and the like. -nsIDs :: [(Text, Namespace)] -nsIDs = [ - ("urn:oasis:names:tc:opendocument:xmlns:animation" , NsAnim ), - ("urn:oasis:names:tc:opendocument:xmlns:chart" , NsChart ), - ("urn:oasis:names:tc:opendocument:xmlns:config" , NsConfig ), - ("urn:oasis:names:tc:opendocument:xmlns:database" , NsDB ), - ("urn:oasis:names:tc:opendocument:xmlns:dr3d" , Ns3D ), - ("urn:oasis:names:tc:opendocument:xmlns:drawing" , NsDraw ), - ("urn:oasis:names:tc:opendocument:xmlns:form" , NsForm ), - ("urn:oasis:names:tc:opendocument:xmlns:manifest" , NsManifest ), - ("urn:oasis:names:tc:opendocument:xmlns:meta" , NsMeta ), - ("urn:oasis:names:tc:opendocument:xmlns:datastyle" , NsNumber ), - ("urn:oasis:names:tc:opendocument:xmlns:of" , NsOpenFormula ), - ("urn:oasis:names:tc:opendocument:xmlns:office:1.0" , NsOffice ), - ("urn:oasis:names:tc:opendocument:xmlns:presentation" , NsPresentation ), - ("urn:oasis:names:tc:opendocument:xmlns:script" , NsScript ), - ("urn:oasis:names:tc:opendocument:xmlns:style" , NsStyle ), - ("urn:oasis:names:tc:opendocument:xmlns:table" , NsTable ), - ("urn:oasis:names:tc:opendocument:xmlns:text" , NsText ), - ("urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible", NsXSL_FO ), - ("urn:oasis:names:tc:opendocument:xmlns:smil-compatible" , NsSmil ), - ("urn:oasis:names:tc:opendocument:xmlns:svg-compatible" , NsSVG ), - ("http://docs.oasis-open.org/ns/office/1.2/meta/odf" , NsODF ), - ("http://docs.oasis-open.org/ns/office/1.2/meta/pkg" , NsPKG ), - ("http://purl.org/dc/elements" , NsDublinCore ), - ("http://www.w3.org/2003/g/data-view" , NsGRDDL ), - ("http://www.w3.org/1998/Math/MathML" , NsMathML ), - ("http://www.w3.org/1999/xhtml" , NsXHtml ), - ("http://www.w3.org/2002/xforms" , NsXForms ), - ("http://www.w3.org/1999/xlink" , NsXLink ) - ] diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs deleted file mode 100644 index 7337194cb..000000000 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ /dev/null @@ -1,640 +0,0 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE Arrows #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE OverloadedStrings #-} -{- | - Module : Text.Pandoc.Readers.Odt.StyleReader - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann - Stability : alpha - Portability : portable - -Reader for the style information in an odt document. --} - -module Text.Pandoc.Readers.Odt.StyleReader -( Style (..) -, StyleName -, StyleFamily (..) -, Styles (..) -, StyleProperties (..) -, TextProperties (..) -, ParaProperties (..) -, VerticalTextPosition (..) -, ListItemNumberFormat (..) -, ListLevel -, ListStyle (..) -, ListLevelStyle (..) -, ListLevelType (..) -, LengthOrPercent (..) -, lookupStyle -, getListLevelStyle -, getStyleFamily -, lookupDefaultStyle' -, lookupListStyleByName -, extendedStylePropertyChain -, readStylesAt -) where - -import Prelude hiding (Applicative(..)) -import Control.Applicative hiding (liftA, liftA2, liftA3) -import Control.Arrow - -import Data.Default -import qualified Data.Foldable as F -import Data.List (unfoldr, foldl') -import qualified Data.Map as M -import Data.Maybe -import Data.Text (Text) -import qualified Data.Text as T -import qualified Data.Set as S - -import qualified Text.Pandoc.XML.Light as XML - -import Text.Pandoc.Shared (safeRead, tshow) - -import Text.Pandoc.Readers.Odt.Arrows.Utils - -import Text.Pandoc.Readers.Odt.Generic.Fallible -import qualified Text.Pandoc.Readers.Odt.Generic.SetMap as SM -import Text.Pandoc.Readers.Odt.Generic.Utils -import Text.Pandoc.Readers.Odt.Generic.XMLConverter - -import Text.Pandoc.Readers.Odt.Base -import Text.Pandoc.Readers.Odt.Namespaces - -readStylesAt :: XML.Element -> Fallible Styles -readStylesAt e = runConverter' readAllStyles mempty e - --------------------------------------------------------------------------------- --- Reader for font declarations and font pitches --------------------------------------------------------------------------------- - --- Pandoc has no support for different font pitches. Yet knowing them can be --- very helpful in cases where Pandoc has more semantics than OpenDocument. --- In these cases, the pitch can help deciding as what to define a block of --- text. So let's start with a type for font pitches: - -data FontPitch = PitchVariable | PitchFixed - deriving ( Eq, Show ) - -instance Lookupable FontPitch where - lookupTable = [ ("variable" , PitchVariable) - , ("fixed" , PitchFixed ) - ] - -instance Default FontPitch where - def = PitchVariable - --- The font pitch can be specified in a style directly. Normally, however, --- it is defined in the font. That is also the specs' recommendation. --- --- Thus, we want - -type FontFaceName = Text - -type FontPitches = M.Map FontFaceName FontPitch - --- To get there, the fonts have to be read and the pitches extracted. --- But the resulting map are only needed at one later place, so it should not be --- transported on the value level, especially as we already use a state arrow. --- So instead, the resulting map is lifted into the state of the reader. --- (An alternative might be ImplicitParams, but again, we already have a state.) --- --- So the main style readers will have the types -type StyleReader a b = XMLReader FontPitches a b --- and -type StyleReaderSafe a b = XMLReaderSafe FontPitches a b --- respectively. --- --- But before we can work with these, we need to define the reader that reads --- the fonts: - --- | A reader for font pitches -fontPitchReader :: XMLReader _s _x FontPitches -fontPitchReader = executeInSub NsOffice "font-face-decls" ( - withEveryL NsStyle "font-face" (liftAsSuccess ( - findAttr' NsStyle "name" - &&& - lookupDefaultingAttr NsStyle "font-pitch" - )) - >>?^ ( M.fromList . foldl' accumLegalPitches [] ) - ) `ifFailedDo` returnV (Right M.empty) - where accumLegalPitches ls (Nothing,_) = ls - accumLegalPitches ls (Just n,p) = (n,p):ls - - --- | A wrapper around the font pitch reader that lifts the result into the --- state. -readFontPitches :: StyleReader x x -readFontPitches = producingExtraState () () fontPitchReader - - --- | Looking up a pitch in the state of the arrow. --- --- The function does the following: --- * Look for the font pitch in an attribute. --- * If that fails, look for the font name, look up the font in the state --- and use the pitch from there. --- * Return the result in a Maybe --- -findPitch :: XMLReaderSafe FontPitches _x (Maybe FontPitch) -findPitch = ( lookupAttr NsStyle "font-pitch" - `ifFailedDo` findAttr NsStyle "font-name" - >>? ( keepingTheValue getExtraState - >>% M.lookup - >>^ maybeToChoice - ) - ) - >>> choiceToMaybe - --------------------------------------------------------------------------------- --- Definitions of main data --------------------------------------------------------------------------------- - -type StyleName = Text - --- | There are two types of styles: named styles with a style family and an --- optional style parent, and default styles for each style family, --- defining default style properties -data Styles = Styles - { stylesByName :: M.Map StyleName Style - , listStylesByName :: M.Map StyleName ListStyle - , defaultStyleMap :: M.Map StyleFamily StyleProperties - } - deriving ( Show ) - --- Styles from a monoid under union -instance Semigroup Styles where - (Styles sBn1 dSm1 lsBn1) <> (Styles sBn2 dSm2 lsBn2) - = Styles (M.union sBn1 sBn2) - (M.union dSm1 dSm2) - (M.union lsBn1 lsBn2) -instance Monoid Styles where - mempty = Styles M.empty M.empty M.empty - mappend = (<>) - --- Not all families from the specifications are implemented, only those we need. --- But there are none that are not mentioned here. -data StyleFamily = FaText | FaParagraph --- | FaTable | FaTableCell | FaTableColumn | FaTableRow --- | FaGraphic | FaDrawing | FaChart --- | FaPresentation --- | FaRuby - deriving ( Eq, Ord, Show ) - -instance Lookupable StyleFamily where - lookupTable = [ ( "text" , FaText ) - , ( "paragraph" , FaParagraph ) --- , ( "table" , FaTable ) --- , ( "table-cell" , FaTableCell ) --- , ( "table-column" , FaTableColumn ) --- , ( "table-row" , FaTableRow ) --- , ( "graphic" , FaGraphic ) --- , ( "drawing-page" , FaDrawing ) --- , ( "chart" , FaChart ) --- , ( "presentation" , FaPresentation ) --- , ( "ruby" , FaRuby ) - ] - --- | A named style -data Style = Style { styleFamily :: Maybe StyleFamily - , styleParentName :: Maybe StyleName - , listStyle :: Maybe StyleName - , styleProperties :: StyleProperties - } - deriving ( Eq, Show ) - -data StyleProperties = SProps { textProperties :: Maybe TextProperties - , paraProperties :: Maybe ParaProperties --- , tableColProperties :: Maybe TColProperties --- , tableRowProperties :: Maybe TRowProperties --- , tableCellProperties :: Maybe TCellProperties --- , tableProperties :: Maybe TableProperties --- , graphicProperties :: Maybe GraphProperties - } - deriving ( Eq, Show ) - -instance Default StyleProperties where - def = SProps { textProperties = Just def - , paraProperties = Just def - } - -data TextProperties = PropT { isEmphasised :: Bool - , isStrong :: Bool - , pitch :: Maybe FontPitch - , verticalPosition :: VerticalTextPosition - , underline :: Maybe UnderlineMode - , strikethrough :: Maybe UnderlineMode - } - deriving ( Eq, Show ) - -instance Default TextProperties where - def = PropT { isEmphasised = False - , isStrong = False - , pitch = Just def - , verticalPosition = def - , underline = Nothing - , strikethrough = Nothing - } - -data ParaProperties = PropP { paraNumbering :: ParaNumbering - , indentation :: LengthOrPercent - , margin_left :: LengthOrPercent - } - deriving ( Eq, Show ) - -instance Default ParaProperties where - def = PropP { paraNumbering = NumberingNone - , indentation = def - , margin_left = def - } - ----- --- All the little data types that make up the properties ----- - -data VerticalTextPosition = VPosNormal | VPosSuper | VPosSub - deriving ( Eq, Show ) - -instance Default VerticalTextPosition where - def = VPosNormal - -instance Read VerticalTextPosition where - readsPrec _ s = [ (VPosSub , s') | ("sub" , s') <- lexS ] - ++ [ (VPosSuper , s') | ("super" , s') <- lexS ] - ++ [ (signumToVPos n , s') | ( n , s') <- readPercent s ] - where - lexS = lex s - signumToVPos n | n < 0 = VPosSub - | n > 0 = VPosSuper - | otherwise = VPosNormal - -data UnderlineMode = UnderlineModeNormal | UnderlineModeSkipWhitespace - deriving ( Eq, Show ) - -instance Lookupable UnderlineMode where - lookupTable = [ ( "continuous" , UnderlineModeNormal ) - , ( "skip-white-space" , UnderlineModeSkipWhitespace ) - ] - - -data ParaNumbering = NumberingNone | NumberingKeep | NumberingRestart Int - deriving ( Eq, Show ) - -data LengthOrPercent = LengthValueMM Int | PercentValue Int - deriving ( Eq, Show ) - -instance Default LengthOrPercent where - def = LengthValueMM 0 - -instance Read LengthOrPercent where - readsPrec _ s = - [ (PercentValue percent , s' ) | (percent , s' ) <- readPercent s] - ++ [ (LengthValueMM lengthMM , s'') | (length' , s' ) <- reads s - , (unit , s'') <- reads s' - , let lengthMM = estimateInMillimeter - length' unit - ] - -data XslUnit = XslUnitMM | XslUnitCM - | XslUnitInch - | XslUnitPoints | XslUnitPica - | XslUnitPixel - | XslUnitEM - -instance Show XslUnit where - show XslUnitMM = "mm" - show XslUnitCM = "cm" - show XslUnitInch = "in" - show XslUnitPoints = "pt" - show XslUnitPica = "pc" - show XslUnitPixel = "px" - show XslUnitEM = "em" - -instance Read XslUnit where - readsPrec _ "mm" = [(XslUnitMM , "")] - readsPrec _ "cm" = [(XslUnitCM , "")] - readsPrec _ "in" = [(XslUnitInch , "")] - readsPrec _ "pt" = [(XslUnitPoints , "")] - readsPrec _ "pc" = [(XslUnitPica , "")] - readsPrec _ "px" = [(XslUnitPixel , "")] - readsPrec _ "em" = [(XslUnitEM , "")] - readsPrec _ _ = [] - --- | Rough conversion of measures into millimetres. --- Pixels and em's are actually implementation dependent/relative measures, --- so I could not really easily calculate anything exact here even if I wanted. --- But I do not care about exactness right now, as I only use measures --- to determine if a paragraph is "indented" or not. -estimateInMillimeter :: Int -> XslUnit -> Int -estimateInMillimeter n XslUnitMM = n -estimateInMillimeter n XslUnitCM = n * 10 -estimateInMillimeter n XslUnitInch = n * 25 -- \* 25.4 -estimateInMillimeter n XslUnitPoints = n `div` 3 -- \* 1/72 * 25.4 -estimateInMillimeter n XslUnitPica = n * 4 -- \* 12 * 1/72 * 25.4 -estimateInMillimeter n XslUnitPixel = n `div`3 -- \* 1/72 * 25.4 -estimateInMillimeter n XslUnitEM = n * 7 -- \* 16 * 1/72 * 25.4 - - ----- --- List styles ----- - -type ListLevel = Int - -newtype ListStyle = ListStyle { levelStyles :: M.Map ListLevel ListLevelStyle - } - deriving ( Eq, Show ) - --- -getListLevelStyle :: ListLevel -> ListStyle -> Maybe ListLevelStyle -getListLevelStyle level ListStyle{..} = - let (lower , exactHit , _) = M.splitLookup level levelStyles - in exactHit <|> fmap fst (M.maxView lower) - -- findBy (`M.lookup` levelStyles) [level, (level-1) .. 1] - -- \^ simpler, but in general less efficient - -data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType - , listItemPrefix :: Maybe Text - , listItemSuffix :: Maybe Text - , listItemFormat :: ListItemNumberFormat - , listItemStart :: Int - } - deriving ( Eq, Ord ) - -instance Show ListLevelStyle where - show ListLevelStyle{..} = " listItemPrefix) - ++ show listItemFormat - ++ maybeToString (T.unpack <$> listItemSuffix) - ++ ">" - where maybeToString = fromMaybe "" - -data ListLevelType = LltBullet | LltImage | LltNumbered - deriving ( Eq, Ord, Show ) - -data ListItemNumberFormat = LinfNone - | LinfNumber - | LinfRomanLC | LinfRomanUC - | LinfAlphaLC | LinfAlphaUC - | LinfString String - deriving ( Eq, Ord ) - -instance Show ListItemNumberFormat where - show LinfNone = "" - show LinfNumber = "1" - show LinfRomanLC = "i" - show LinfRomanUC = "I" - show LinfAlphaLC = "a" - show LinfAlphaUC = "A" - show (LinfString s) = s - -instance Default ListItemNumberFormat where - def = LinfNone - -instance Read ListItemNumberFormat where - readsPrec _ "" = [(LinfNone , "")] - readsPrec _ "1" = [(LinfNumber , "")] - readsPrec _ "i" = [(LinfRomanLC , "")] - readsPrec _ "I" = [(LinfRomanUC , "")] - readsPrec _ "a" = [(LinfAlphaLC , "")] - readsPrec _ "A" = [(LinfAlphaUC , "")] - readsPrec _ s = [(LinfString s , "")] - --------------------------------------------------------------------------------- --- Readers --- --- ...it seems like a whole lot of this should be automatically derivable --- or at least moveable into a class. Most of this is data concealed in --- code. --------------------------------------------------------------------------------- - --- -readAllStyles :: StyleReader _x Styles -readAllStyles = ( readFontPitches - >>?! ( readAutomaticStyles - &&& readStyles )) - >>?%? chooseMax - -- all top elements are always on the same hierarchy level - --- -readStyles :: StyleReader _x Styles -readStyles = executeInSub NsOffice "styles" $ liftAsSuccess - $ liftA3 Styles - ( tryAll NsStyle "style" readStyle >>^ M.fromList ) - ( tryAll NsText "list-style" readListStyle >>^ M.fromList ) - ( tryAll NsStyle "default-style" readDefaultStyle >>^ M.fromList ) - --- -readAutomaticStyles :: StyleReader _x Styles -readAutomaticStyles = executeInSub NsOffice "automatic-styles" $ liftAsSuccess - $ liftA3 Styles - ( tryAll NsStyle "style" readStyle >>^ M.fromList ) - ( tryAll NsText "list-style" readListStyle >>^ M.fromList ) - ( returnV M.empty ) - --- -readDefaultStyle :: StyleReader _x (StyleFamily, StyleProperties) -readDefaultStyle = lookupAttr NsStyle "family" - >>?! keepingTheValue readStyleProperties - --- -readStyle :: StyleReader _x (StyleName,Style) -readStyle = findAttr NsStyle "name" - >>?! keepingTheValue - ( liftA4 Style - ( lookupAttr' NsStyle "family" ) - ( findAttr' NsStyle "parent-style-name" ) - ( findAttr' NsStyle "list-style-name" ) - readStyleProperties - ) - --- -readStyleProperties :: StyleReaderSafe _x StyleProperties -readStyleProperties = liftA2 SProps - ( readTextProperties >>> choiceToMaybe ) - ( readParaProperties >>> choiceToMaybe ) - --- -readTextProperties :: StyleReader _x TextProperties -readTextProperties = - executeInSub NsStyle "text-properties" $ liftAsSuccess - ( liftA6 PropT - ( searchAttr NsXSL_FO "font-style" False isFontEmphasised ) - ( searchAttr NsXSL_FO "font-weight" False isFontBold ) - findPitch - ( getAttr NsStyle "text-position" ) - readUnderlineMode - readStrikeThroughMode - ) - where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)] - isFontBold = ("normal",False):("bold",True) - :map ((,True) . tshow) ([100,200..900]::[Int]) - -readUnderlineMode :: StyleReaderSafe _x (Maybe UnderlineMode) -readUnderlineMode = readLineMode "text-underline-mode" - "text-underline-style" - -readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode) -readStrikeThroughMode = readLineMode "text-line-through-mode" - "text-line-through-style" - -readLineMode :: Text -> Text -> StyleReaderSafe _x (Maybe UnderlineMode) -readLineMode modeAttr styleAttr = proc x -> do - isUL <- searchAttr NsStyle styleAttr False isLinePresent -< x - mode <- lookupAttr' NsStyle modeAttr -< x - if isUL - then case mode of - Just m -> returnA -< Just m - Nothing -> returnA -< Just UnderlineModeNormal - else returnA -< Nothing - where - isLinePresent = ("none",False) : map (,True) - [ "dash" , "dot-dash" , "dot-dot-dash" , "dotted" - , "long-dash" , "solid" , "wave" - ] - --- -readParaProperties :: StyleReader _x ParaProperties -readParaProperties = - executeInSub NsStyle "paragraph-properties" $ liftAsSuccess - ( liftA3 PropP - ( liftA2 readNumbering - ( isSet' NsText "number-lines" ) - ( readAttr' NsText "line-number" ) - ) - ( liftA2 readIndentation - ( isSetWithDefault NsStyle "auto-text-indent" False ) - ( getAttr NsXSL_FO "text-indent" ) - ) - ( getAttr NsXSL_FO "margin-left" ) - ) - where readNumbering (Just True) (Just n) = NumberingRestart n - readNumbering (Just True) _ = NumberingKeep - readNumbering _ _ = NumberingNone - - readIndentation False indent = indent - readIndentation True _ = def - ----- --- List styles ----- - --- -readListStyle :: StyleReader _x (StyleName, ListStyle) -readListStyle = - findAttr NsStyle "name" - >>?! keepingTheValue - ( liftA ListStyle - $ liftA3 SM.union3 - ( readListLevelStyles NsText "list-level-style-number" LltNumbered ) - ( readListLevelStyles NsText "list-level-style-bullet" LltBullet ) - ( readListLevelStyles NsText "list-level-style-image" LltImage ) >>^ M.mapMaybe chooseMostSpecificListLevelStyle - ) --- -readListLevelStyles :: Namespace -> ElementName - -> ListLevelType - -> StyleReaderSafe _x (SM.SetMap Int ListLevelStyle) -readListLevelStyles namespace elementName levelType = - tryAll namespace elementName (readListLevelStyle levelType) - >>^ SM.fromList - --- -readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle) -readListLevelStyle levelType = readAttr NsText "level" - >>?! keepingTheValue - ( liftA5 toListLevelStyle - ( returnV levelType ) - ( findAttr' NsStyle "num-prefix" ) - ( findAttr' NsStyle "num-suffix" ) - ( getAttr NsStyle "num-format" ) - ( findAttrText' NsText "start-value" ) - ) - where - toListLevelStyle _ p s LinfNone b = ListLevelStyle LltBullet p s LinfNone (startValue b) - toListLevelStyle _ p s f@(LinfString _) b = ListLevelStyle LltBullet p s f (startValue b) - toListLevelStyle t p s f b = ListLevelStyle t p s f (startValue b) - startValue mbx = fromMaybe 1 (mbx >>= safeRead) - --- -chooseMostSpecificListLevelStyle :: S.Set ListLevelStyle -> Maybe ListLevelStyle -chooseMostSpecificListLevelStyle ls = F.foldr select Nothing ls - where - select l Nothing = Just l - select ( ListLevelStyle t1 p1 s1 f1 b1 ) - ( Just ( ListLevelStyle t2 p2 s2 f2 _ )) - = Just $ ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) - (selectLinf f1 f2) b1 - select' LltNumbered _ = LltNumbered - select' _ LltNumbered = LltNumbered - select' _ _ = LltBullet - selectLinf LinfNone f2 = f2 - selectLinf f1 LinfNone = f1 - selectLinf (LinfString _) f2 = f2 - selectLinf f1 (LinfString _) = f1 - selectLinf f1 _ = f1 - - --------------------------------------------------------------------------------- --- Tools to access style data --------------------------------------------------------------------------------- - --- -lookupStyle :: StyleName -> Styles -> Maybe Style -lookupStyle name Styles{..} = M.lookup name stylesByName - --- -lookupDefaultStyle' :: Styles -> StyleFamily -> StyleProperties -lookupDefaultStyle' Styles{..} family = fromMaybe def - (M.lookup family defaultStyleMap) - --- -lookupListStyleByName :: StyleName -> Styles -> Maybe ListStyle -lookupListStyleByName name Styles{..} = M.lookup name listStylesByName - - --- | Returns a chain of parent of the current style. The direct parent will --- be the first element of the list, followed by its parent and so on. --- The current style is not in the list. -parents :: Style -> Styles -> [Style] -parents style styles = unfoldr findNextParent style -- Ha! - where findNextParent Style{..} - = fmap duplicate $ (`lookupStyle` styles) =<< styleParentName - --- | Looks up the style family of the current style. Normally, every style --- should have one. But if not, all parents are searched. -getStyleFamily :: Style -> Styles -> Maybe StyleFamily -getStyleFamily style@Style{..} styles - = styleFamily - <|> F.asum (map (`getStyleFamily` styles) $ parents style styles) - --- | Each 'Style' has certain 'StyleProperties'. But sometimes not all property --- values are specified. Instead, a value might be inherited from a --- parent style. This function makes this chain of inheritance --- concrete and easily accessible by encapsulating the necessary lookups. --- The resulting list contains the direct properties of the style as the first --- element, the ones of the direct parent element as the next one, and so on. --- --- Note: There should also be default properties for each style family. These --- are @not@ contained in this list because properties inherited from --- parent elements take precedence over default styles. --- --- This function is primarily meant to be used through convenience wrappers. --- -stylePropertyChain :: Style -> Styles -> [StyleProperties] -stylePropertyChain style styles - = map styleProperties (style : parents style styles) - --- -extendedStylePropertyChain :: [Style] -> Styles -> [StyleProperties] -extendedStylePropertyChain [] _ = [] -extendedStylePropertyChain [style] styles = stylePropertyChain style styles - ++ maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles)) -extendedStylePropertyChain (style:trace) styles = stylePropertyChain style styles - ++ extendedStylePropertyChain trace styles diff --git a/test/Tests/Readers/ODT.hs b/test/Tests/Readers/ODT.hs new file mode 100644 index 000000000..ce107af69 --- /dev/null +++ b/test/Tests/Readers/ODT.hs @@ -0,0 +1,187 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Tests.Readers.ODT + Copyright : © 2015-2022 John MacFarlane + 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Tests for the ODT reader. +-} +module Tests.Readers.ODT (tests) where + +import Control.Monad (liftM) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Lazy as B +import qualified Data.Map as M +import Data.Text (unpack) +import System.IO.Unsafe (unsafePerformIO) +import Test.Tasty +import Tests.Helpers +import Text.Pandoc +import qualified Text.Pandoc.UTF8 as UTF8 + +defopts :: ReaderOptions +defopts = def{ readerExtensions = getDefaultExtensions "odt" } + +tests :: [TestTree] +tests = testsComparingToMarkdown ++ testsComparingToNative + +testsComparingToMarkdown :: [TestTree] +testsComparingToMarkdown = map nameToTest namesOfTestsComparingToMarkdown + where nameToTest name = createTest + compareODTToMarkdown + name + (toODTPath name) + (toMarkdownPath name) + toODTPath name = "odt/odt/" ++ name ++ ".odt" + toMarkdownPath name = "odt/markdown/" ++ name ++ ".md" + +testsComparingToNative :: [TestTree] +testsComparingToNative = map nameToTest namesOfTestsComparingToNative + where nameToTest name = createTest + compareODTToNative + name + (toODTPath name) + (toNativePath name) + toODTPath name = "odt/odt/" ++ name ++ ".odt" + toNativePath name = "odt/native/" ++ name ++ ".native" + + +newtype NoNormPandoc = NoNormPandoc {unNoNorm :: Pandoc} + deriving ( Show ) + +instance ToString NoNormPandoc where + toString d = unpack $ + purely (writeNative def{ writerTemplate = s }) $ toPandoc d + where s = case d of + NoNormPandoc (Pandoc (Meta m) _) + | M.null m -> Nothing + | otherwise -> Just mempty -- need this for Meta output + +instance ToPandoc NoNormPandoc where + toPandoc = unNoNorm + +getNoNormVia :: (a -> Pandoc) -> String -> Either PandocError a -> NoNormPandoc +getNoNormVia _ readerName (Left _) = error (readerName ++ " reader failed") +getNoNormVia f _ (Right a) = NoNormPandoc (f a) + +type TestCreator = ReaderOptions + -> FilePath -> FilePath + -> IO (NoNormPandoc, NoNormPandoc) + +compareODTToNative :: TestCreator +compareODTToNative opts odtPath nativePath = do + nativeFile <- UTF8.toText <$> BS.readFile nativePath + odtFile <- B.readFile odtPath + native <- getNoNormVia id "native" <$> runIO (readNative def nativeFile) + odt <- getNoNormVia id "odt" <$> runIO (readODT opts odtFile) + return (odt,native) + +compareODTToMarkdown :: TestCreator +compareODTToMarkdown opts odtPath markdownPath = do + markdownFile <- UTF8.toText <$> BS.readFile markdownPath + odtFile <- B.readFile odtPath + markdown <- getNoNormVia id "markdown" <$> + runIO (readMarkdown def{ readerExtensions = pandocExtensions } + markdownFile) + odt <- getNoNormVia id "odt" <$> runIO (readODT opts odtFile) + return (odt,markdown) + + +createTest :: TestCreator + -> TestName + -> FilePath -> FilePath + -> TestTree +createTest creator name path1 path2 = + unsafePerformIO $ liftM (test id name) (creator defopts path1 path2) + +{- +-- + +getMedia :: FilePath -> FilePath -> IO (Maybe B.ByteString) +getMedia archivePath mediaPath = do + zf <- B.readFile archivePath >>= return . toArchive + return $ findEntryByPath ("Pictures/" ++ mediaPath) zf >>= (Just . fromEntry) + +compareMediaPathIO :: FilePath -> MediaBag -> FilePath -> IO Bool +compareMediaPathIO mediaPath mediaBag odtPath = do + odtMedia <- getMedia odtPath mediaPath + let mbBS = case lookupMedia mediaPath mediaBag of + Just (_, bs) -> bs + Nothing -> error ("couldn't find " ++ + mediaPath ++ + " in media bag") + odtBS = case odtMedia of + Just bs -> bs + Nothing -> error ("couldn't find " ++ + mediaPath ++ + " in media bag") + return $ mbBS == odtBS + +compareMediaBagIO :: FilePath -> IO Bool +compareMediaBagIO odtFile = do + df <- B.readFile odtFile + let (_, mb) = readODT def df + bools <- mapM + (\(fp, _, _) -> compareMediaPathIO fp mb odtFile) + (mediaDirectory mb) + return $ and bools + +testMediaBagIO :: String -> FilePath -> IO TestTree +testMediaBagIO name odtFile = do + outcome <- compareMediaBagIO odtFile + return $ testCase name (assertBool + ("Media didn't match media bag in file " ++ odtFile) + outcome) + +testMediaBag :: String -> FilePath -> TestTree +testMediaBag name odtFile = buildTest $ testMediaBagIO name odtFile +-} +-- + + + +namesOfTestsComparingToMarkdown :: [ String ] +namesOfTestsComparingToMarkdown = [ "bold" +-- , "citation" + , "endnote" + , "externalLink" + , "footnote" + , "formula" + , "headers" +-- , "horizontalRule" + , "italic" +-- , "listBlocks" + , "paragraph" + , "strikeout" +-- , "trackedChanges" + , "underlined" + ] + +namesOfTestsComparingToNative :: [ String ] +namesOfTestsComparingToNative = [ "blockquote" + , "image" + , "imageIndex" + , "imageWithCaption" + , "inlinedCode" + , "orderedListMixed" + , "orderedListRoman" + , "orderedListSimple" + , "orderedListHeader" + , "referenceToChapter" + , "referenceToListItem" + , "referenceToText" + , "simpleTable" + , "simpleTableWithCaption" + , "tab" +-- , "table" + , "textMixedStyles" + , "tableWithContents" + , "unicode" + , "unorderedList" + , "unorderedListHeader" + ] diff --git a/test/Tests/Readers/Odt.hs b/test/Tests/Readers/Odt.hs deleted file mode 100644 index 8dcd7b29b..000000000 --- a/test/Tests/Readers/Odt.hs +++ /dev/null @@ -1,187 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- | - Module : Tests.Readers.Odt - Copyright : © 2015-2022 John MacFarlane - 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Tests for the ODT reader. --} -module Tests.Readers.Odt (tests) where - -import Control.Monad (liftM) -import qualified Data.ByteString as BS -import qualified Data.ByteString.Lazy as B -import qualified Data.Map as M -import Data.Text (unpack) -import System.IO.Unsafe (unsafePerformIO) -import Test.Tasty -import Tests.Helpers -import Text.Pandoc -import qualified Text.Pandoc.UTF8 as UTF8 - -defopts :: ReaderOptions -defopts = def{ readerExtensions = getDefaultExtensions "odt" } - -tests :: [TestTree] -tests = testsComparingToMarkdown ++ testsComparingToNative - -testsComparingToMarkdown :: [TestTree] -testsComparingToMarkdown = map nameToTest namesOfTestsComparingToMarkdown - where nameToTest name = createTest - compareOdtToMarkdown - name - (toOdtPath name) - (toMarkdownPath name) - toOdtPath name = "odt/odt/" ++ name ++ ".odt" - toMarkdownPath name = "odt/markdown/" ++ name ++ ".md" - -testsComparingToNative :: [TestTree] -testsComparingToNative = map nameToTest namesOfTestsComparingToNative - where nameToTest name = createTest - compareOdtToNative - name - (toOdtPath name) - (toNativePath name) - toOdtPath name = "odt/odt/" ++ name ++ ".odt" - toNativePath name = "odt/native/" ++ name ++ ".native" - - -newtype NoNormPandoc = NoNormPandoc {unNoNorm :: Pandoc} - deriving ( Show ) - -instance ToString NoNormPandoc where - toString d = unpack $ - purely (writeNative def{ writerTemplate = s }) $ toPandoc d - where s = case d of - NoNormPandoc (Pandoc (Meta m) _) - | M.null m -> Nothing - | otherwise -> Just mempty -- need this for Meta output - -instance ToPandoc NoNormPandoc where - toPandoc = unNoNorm - -getNoNormVia :: (a -> Pandoc) -> String -> Either PandocError a -> NoNormPandoc -getNoNormVia _ readerName (Left _) = error (readerName ++ " reader failed") -getNoNormVia f _ (Right a) = NoNormPandoc (f a) - -type TestCreator = ReaderOptions - -> FilePath -> FilePath - -> IO (NoNormPandoc, NoNormPandoc) - -compareOdtToNative :: TestCreator -compareOdtToNative opts odtPath nativePath = do - nativeFile <- UTF8.toText <$> BS.readFile nativePath - odtFile <- B.readFile odtPath - native <- getNoNormVia id "native" <$> runIO (readNative def nativeFile) - odt <- getNoNormVia id "odt" <$> runIO (readOdt opts odtFile) - return (odt,native) - -compareOdtToMarkdown :: TestCreator -compareOdtToMarkdown opts odtPath markdownPath = do - markdownFile <- UTF8.toText <$> BS.readFile markdownPath - odtFile <- B.readFile odtPath - markdown <- getNoNormVia id "markdown" <$> - runIO (readMarkdown def{ readerExtensions = pandocExtensions } - markdownFile) - odt <- getNoNormVia id "odt" <$> runIO (readOdt opts odtFile) - return (odt,markdown) - - -createTest :: TestCreator - -> TestName - -> FilePath -> FilePath - -> TestTree -createTest creator name path1 path2 = - unsafePerformIO $ liftM (test id name) (creator defopts path1 path2) - -{- --- - -getMedia :: FilePath -> FilePath -> IO (Maybe B.ByteString) -getMedia archivePath mediaPath = do - zf <- B.readFile archivePath >>= return . toArchive - return $ findEntryByPath ("Pictures/" ++ mediaPath) zf >>= (Just . fromEntry) - -compareMediaPathIO :: FilePath -> MediaBag -> FilePath -> IO Bool -compareMediaPathIO mediaPath mediaBag odtPath = do - odtMedia <- getMedia odtPath mediaPath - let mbBS = case lookupMedia mediaPath mediaBag of - Just (_, bs) -> bs - Nothing -> error ("couldn't find " ++ - mediaPath ++ - " in media bag") - odtBS = case odtMedia of - Just bs -> bs - Nothing -> error ("couldn't find " ++ - mediaPath ++ - " in media bag") - return $ mbBS == odtBS - -compareMediaBagIO :: FilePath -> IO Bool -compareMediaBagIO odtFile = do - df <- B.readFile odtFile - let (_, mb) = readOdt def df - bools <- mapM - (\(fp, _, _) -> compareMediaPathIO fp mb odtFile) - (mediaDirectory mb) - return $ and bools - -testMediaBagIO :: String -> FilePath -> IO TestTree -testMediaBagIO name odtFile = do - outcome <- compareMediaBagIO odtFile - return $ testCase name (assertBool - ("Media didn't match media bag in file " ++ odtFile) - outcome) - -testMediaBag :: String -> FilePath -> TestTree -testMediaBag name odtFile = buildTest $ testMediaBagIO name odtFile --} --- - - - -namesOfTestsComparingToMarkdown :: [ String ] -namesOfTestsComparingToMarkdown = [ "bold" --- , "citation" - , "endnote" - , "externalLink" - , "footnote" - , "formula" - , "headers" --- , "horizontalRule" - , "italic" --- , "listBlocks" - , "paragraph" - , "strikeout" --- , "trackedChanges" - , "underlined" - ] - -namesOfTestsComparingToNative :: [ String ] -namesOfTestsComparingToNative = [ "blockquote" - , "image" - , "imageIndex" - , "imageWithCaption" - , "inlinedCode" - , "orderedListMixed" - , "orderedListRoman" - , "orderedListSimple" - , "orderedListHeader" - , "referenceToChapter" - , "referenceToListItem" - , "referenceToText" - , "simpleTable" - , "simpleTableWithCaption" - , "tab" --- , "table" - , "textMixedStyles" - , "tableWithContents" - , "unicode" - , "unorderedList" - , "unorderedListHeader" - ] diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs index 678d534dd..d4e069739 100644 --- a/test/test-pandoc.hs +++ b/test/test-pandoc.hs @@ -23,7 +23,7 @@ import qualified Tests.Readers.Jira import qualified Tests.Readers.LaTeX import qualified Tests.Readers.Markdown import qualified Tests.Readers.Muse -import qualified Tests.Readers.Odt +import qualified Tests.Readers.ODT import qualified Tests.Readers.Org import qualified Tests.Readers.RST import qualified Tests.Readers.RTF @@ -89,7 +89,7 @@ tests pandocPath = testGroup "pandoc tests" , testGroup "RST" Tests.Readers.RST.tests , testGroup "RTF" Tests.Readers.RTF.tests , testGroup "Docx" Tests.Readers.Docx.tests - , testGroup "Odt" Tests.Readers.Odt.tests + , testGroup "ODT" Tests.Readers.ODT.tests , testGroup "Txt2Tags" Tests.Readers.Txt2Tags.tests , testGroup "EPUB" Tests.Readers.EPUB.tests , testGroup "Muse" Tests.Readers.Muse.tests -- cgit v1.2.3