1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Fields
Copyright : Copyright (C) 2014-2020 Jesse Rosenthal
License : GNU GPL, version 2 or above
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
Stability : alpha
Portability : portable
For parsing Field definitions in instText tags, as described in
ECMA-376-1:2016, §17.16.5 -}
module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..)
, parseFieldInfo
) where
import Data.Functor (($>), void)
import qualified Data.Text as T
import Text.Pandoc.Parsing
type URL = T.Text
type Anchor = T.Text
data FieldInfo = HyperlinkField URL
-- The boolean indicates whether the field is a hyperlink.
| PagerefField Anchor Bool
| CslCitation T.Text
| CslBibliography
| EndNoteCite T.Text
| EndNoteRefList
| UnknownField
deriving (Show)
type Parser = Parsec T.Text ()
parseFieldInfo :: T.Text -> Either ParseError FieldInfo
parseFieldInfo = parse fieldInfo ""
fieldInfo :: Parser FieldInfo
fieldInfo =
try (HyperlinkField <$> hyperlink)
<|>
try ((uncurry PagerefField) <$> pageref)
<|>
try addIn
<|>
return UnknownField
addIn :: Parser FieldInfo
addIn = do
spaces
string "ADDIN"
spaces
try cslCitation <|> cslBibliography <|> endnoteCite <|> endnoteRefList
cslCitation :: Parser FieldInfo
cslCitation = do
optional (string "ZOTERO_ITEM")
spaces
string "CSL_CITATION"
spaces
CslCitation <$> getInput
cslBibliography :: Parser FieldInfo
cslBibliography = do
string "ZOTERO_BIBL" <|> string "Mendeley Bibliography CSL_BIBLIOGRAPHY"
return CslBibliography
endnoteCite :: Parser FieldInfo
endnoteCite = try $ do
string "EN.CITE"
spaces
EndNoteCite <$> getInput
endnoteRefList :: Parser FieldInfo
endnoteRefList = try $ do
string "EN.REFLIST"
return EndNoteRefList
escapedQuote :: Parser T.Text
escapedQuote = string "\\\"" $> "\\\""
inQuotes :: Parser T.Text
inQuotes =
try escapedQuote <|> (T.singleton <$> anyChar)
quotedString :: Parser T.Text
quotedString = do
char '"'
T.concat <$> manyTill inQuotes (try (char '"'))
unquotedString :: Parser T.Text
unquotedString = T.pack <$> manyTill anyChar (try $ void (lookAhead space) <|> eof)
fieldArgument :: Parser T.Text
fieldArgument = quotedString <|> unquotedString
-- there are other switches, but this is the only one I've seen in the wild so far, so it's the first one I'll implement. See §17.16.5.25
hyperlinkSwitch :: Parser (T.Text, T.Text)
hyperlinkSwitch = do
sw <- string "\\l"
spaces
farg <- fieldArgument
return (T.pack sw, farg)
hyperlink :: Parser URL
hyperlink = do
many space
string "HYPERLINK"
spaces
farg <- option "" $ notFollowedBy (char '\\') *> fieldArgument
switches <- spaces *> many hyperlinkSwitch
let url = case switches of
("\\l", s) : _ -> farg <> "#" <> s
_ -> farg
return url
-- See §17.16.5.45
pagerefSwitch :: Parser (T.Text, T.Text)
pagerefSwitch = do
sw <- string "\\h"
spaces
farg <- fieldArgument
return (T.pack sw, farg)
pageref :: Parser (Anchor, Bool)
pageref = do
many space
string "PAGEREF"
spaces
farg <- fieldArgument
switches <- spaces *> many pagerefSwitch
let isLink = case switches of
("\\h", _) : _ -> True
_ -> False
return (farg, isLink)
|