summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Docx/Fields.hs
blob: 08e253ffbd73a66227803c1086cdfe64ab1d0e6a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.Readers.Docx.Fields
   Copyright   : Copyright (C) 2014-2020 Jesse Rosenthal
   License     : GNU GPL, version 2 or above

   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
   Stability   : alpha
   Portability : portable

For parsing Field definitions in instText tags, as described in
ECMA-376-1:2016, §17.16.5 -}

module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..)
                                       , parseFieldInfo
                                       ) where

import Data.Functor (($>), void)
import qualified Data.Text as T
import Text.Pandoc.Parsing

type URL = T.Text
type Anchor = T.Text

data FieldInfo = HyperlinkField URL
                -- The boolean indicates whether the field is a hyperlink.
               | PagerefField Anchor Bool
               | CslCitation T.Text
               | CslBibliography
               | EndNoteCite T.Text
               | EndNoteRefList
               | UnknownField
               deriving (Show)

type Parser = Parsec T.Text ()

parseFieldInfo :: T.Text -> Either ParseError FieldInfo
parseFieldInfo = parse fieldInfo ""

fieldInfo :: Parser FieldInfo
fieldInfo =
  try (HyperlinkField <$> hyperlink)
  <|>
  try ((uncurry PagerefField) <$> pageref)
  <|>
  try addIn
  <|>
  return UnknownField

addIn :: Parser FieldInfo
addIn = do
  spaces
  string "ADDIN"
  spaces
  try cslCitation <|> cslBibliography <|> endnoteCite <|> endnoteRefList

cslCitation :: Parser FieldInfo
cslCitation = do
  optional (string "ZOTERO_ITEM")
  spaces
  string "CSL_CITATION"
  spaces
  CslCitation <$> getInput

cslBibliography :: Parser FieldInfo
cslBibliography = do
  string "ZOTERO_BIBL" <|> string "Mendeley Bibliography CSL_BIBLIOGRAPHY"
  return CslBibliography

endnoteCite :: Parser FieldInfo
endnoteCite = try $ do
  string "EN.CITE"
  spaces
  EndNoteCite <$> getInput

endnoteRefList :: Parser FieldInfo
endnoteRefList = try $ do
  string "EN.REFLIST"
  return EndNoteRefList


escapedQuote :: Parser T.Text
escapedQuote = string "\\\"" $> "\\\""

inQuotes :: Parser T.Text
inQuotes =
  try escapedQuote <|> (T.singleton <$> anyChar)

quotedString :: Parser T.Text
quotedString = do
  char '"'
  T.concat <$> manyTill inQuotes (try (char '"'))

unquotedString :: Parser T.Text
unquotedString = T.pack <$> manyTill anyChar (try $ void (lookAhead space) <|> eof)

fieldArgument :: Parser T.Text
fieldArgument = quotedString <|> unquotedString

-- there are other switches, but this is the only one I've seen in the wild so far, so it's the first one I'll implement. See §17.16.5.25
hyperlinkSwitch :: Parser (T.Text, T.Text)
hyperlinkSwitch = do
  sw <- string "\\l"
  spaces
  farg <- fieldArgument
  return (T.pack sw, farg)

hyperlink :: Parser URL
hyperlink = do
  many space
  string "HYPERLINK"
  spaces
  farg <- option "" $ notFollowedBy (char '\\') *> fieldArgument
  switches <- spaces *> many hyperlinkSwitch
  let url = case switches of
              ("\\l", s) : _ -> farg <> "#" <> s
              _              -> farg
  return url

-- See §17.16.5.45
pagerefSwitch :: Parser (T.Text, T.Text)
pagerefSwitch = do
  sw <- string "\\h"
  spaces
  farg <- fieldArgument
  return (T.pack sw, farg)

pageref :: Parser (Anchor, Bool)
pageref = do
  many space
  string "PAGEREF"
  spaces
  farg <- fieldArgument
  switches <- spaces *> many pagerefSwitch
  let isLink = case switches of
              ("\\h", _) : _ -> True
              _              -> False
  return (farg, isLink)