summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-03-12 22:03:10 +0100
committerJohn MacFarlane <jgm@berkeley.edu>2017-03-12 22:07:28 +0100
commit2f8f8f0da64388fa01d83fccf3cf1f2899c64269 (patch)
tree163b99c64a5ccffc7c419a5d7d686995959aea9f
parentc8b906256dccc7b0572c1707dee9b211c86e996d (diff)
Issue warning for duplicate header identifiers.
As noted in the previous commit, an autogenerated identifier may still coincide with an explicit identifier that is given for a header later in the document, or with an identifier on a div, span, link, or image. This commit adds a warning in this case, so users can supply an explicit identifier. * Added `DuplicateIdentifier` to LogMessage. * Modified HTML, Org, MediaWiki readers so their custom state type is an instance of HasLogMessages. This is necessary for `registerHeader` to issue warnings. See #1745.
-rw-r--r--src/Text/Pandoc/Logging.hs11
-rw-r--r--src/Text/Pandoc/Parsing.hs10
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs16
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs7
-rw-r--r--src/Text/Pandoc/Readers/Org.hs2
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs8
6 files changed, 46 insertions, 8 deletions
diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
index ba836b91a..3d2cc2287 100644
--- a/src/Text/Pandoc/Logging.hs
+++ b/src/Text/Pandoc/Logging.hs
@@ -62,6 +62,7 @@ data LogMessage =
| CouldNotParseYamlMetadata String SourcePos
| DuplicateLinkReference String SourcePos
| DuplicateNoteReference String SourcePos
+ | DuplicateIdentifier String SourcePos
| ReferenceNotFound String SourcePos
| CircularReference String SourcePos
| ParsingUnescaped String SourcePos
@@ -106,6 +107,11 @@ instance ToJSON LogMessage where
"source" .= Text.pack (sourceName pos),
"line" .= toJSON (sourceLine pos),
"column" .= toJSON (sourceColumn pos)]
+ DuplicateIdentifier s pos ->
+ ["contents" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
ReferenceNotFound s pos ->
["contents" .= Text.pack s,
"source" .= Text.pack (sourceName pos),
@@ -184,6 +190,8 @@ showLogMessage msg =
"Duplicate link reference '" ++ s ++ "' at " ++ showPos pos
DuplicateNoteReference s pos ->
"Duplicate note reference '" ++ s ++ "' at " ++ showPos pos
+ DuplicateIdentifier s pos ->
+ "Duplicate identifier '" ++ s ++ "' at " ++ showPos pos
ReferenceNotFound s pos ->
"Reference not found for '" ++ s ++ "' at " ++ showPos pos
CircularReference s pos ->
@@ -233,6 +241,7 @@ messageVerbosity msg =
CouldNotParseYamlMetadata{} -> WARNING
DuplicateLinkReference{} -> WARNING
DuplicateNoteReference{} -> WARNING
+ DuplicateIdentifier{} -> WARNING
ReferenceNotFound{} -> WARNING
CircularReference{} -> WARNING
CouldNotLoadIncludeFile{} -> WARNING
@@ -249,4 +258,4 @@ messageVerbosity msg =
CouldNotParseCSS{} -> WARNING
Fetching{} -> INFO
NoTitleElement{} -> WARNING
- NoLangSpecified -> INFO \ No newline at end of file
+ NoLangSpecified -> INFO
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index a616058bb..3058185da 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -1112,8 +1112,11 @@ type SubstTable = M.Map Key Inlines
-- with its associated identifier. If the identifier is null
-- and the auto_identifers extension is set, generate a new
-- unique identifier, and update the list of identifiers
--- in state.
-registerHeader :: (Stream s m a, HasReaderOptions st, HasHeaderMap st, HasIdentifierList st)
+-- in state. Issue a warning if an explicit identifier
+-- is encountered that duplicates an earlier identifier
+-- (explict or automatically generated).
+registerHeader :: (Stream s m a, HasReaderOptions st,
+ HasHeaderMap st, HasLogMessages st, HasIdentifierList st)
=> Attr -> Inlines -> ParserT s st m Attr
registerHeader (ident,classes,kvs) header' = do
ids <- extractIdentifierList <$> getState
@@ -1131,6 +1134,9 @@ registerHeader (ident,classes,kvs) header' = do
return (id'',classes,kvs)
else do
unless (null ident) $ do
+ when (ident `Set.member` ids) $ do
+ pos <- getPosition
+ logMessage $ DuplicateIdentifier ident pos
updateState $ updateIdentifierList $ Set.insert ident
updateState $ updateHeaderMap $ insert' header' ident
return (ident,classes,kvs)
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 7e7d505ac..0af369469 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -83,14 +83,15 @@ readHtml opts inp = do
blocks <- (fixPlains False) . mconcat <$> manyTill block eof
meta <- stateMeta . parserState <$> getState
bs' <- replaceNotes (B.toList blocks)
+ reportLogMessages
return $ Pandoc meta bs'
getError (errorMessages -> ms) = case ms of
[] -> ""
(m:_) -> messageString m
result <- flip runReaderT def $
- runParserT parseDoc
- (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty)
- "source" tags
+ runParserT parseDoc
+ (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty [])
+ "source" tags
case result of
Right doc -> return doc
Left err -> throwError $ PandocParseError $ getError err
@@ -110,7 +111,8 @@ data HTMLState =
noteTable :: [(String, Blocks)],
baseHref :: Maybe URI,
identifiers :: Set.Set String,
- headerMap :: M.Map Inlines String
+ headerMap :: M.Map Inlines String,
+ logMessages :: [LogMessage]
}
data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
@@ -376,7 +378,7 @@ ignore raw = do
-- raw can be null for tags like <!DOCTYPE>; see paRawTag
-- in this case we don't want a warning:
unless (null raw) $
- report $ SkippedContent raw pos
+ logMessage $ SkippedContent raw pos
return mempty
pHtmlBlock :: PandocMonad m => String -> TagParser m String
@@ -1092,6 +1094,10 @@ instance HasHeaderMap HTMLState where
extractHeaderMap = headerMap
updateHeaderMap f s = s{ headerMap = f (headerMap s) }
+instance HasLogMessages HTMLState where
+ addLogMessage m s = s{ logMessages = m : logMessages s }
+ getLogMessages = reverse . logMessages
+
-- This signature should be more general
-- MonadReader HTMLLocal m => HasQuoteContext st m
instance PandocMonad m => HasQuoteContext HTMLState (ReaderT HTMLLocal m) where
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index fa20ade07..b35f39aad 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -73,6 +73,7 @@ readMediaWiki opts s = do
, mwCategoryLinks = []
, mwHeaderMap = M.empty
, mwIdentifierList = Set.empty
+ , mwLogMessages = []
}
(s ++ "\n")
case parsed of
@@ -85,6 +86,7 @@ data MWState = MWState { mwOptions :: ReaderOptions
, mwCategoryLinks :: [Inlines]
, mwHeaderMap :: M.Map Inlines String
, mwIdentifierList :: Set.Set String
+ , mwLogMessages :: [LogMessage]
}
type MWParser m = ParserT [Char] MWState m
@@ -100,6 +102,10 @@ instance HasIdentifierList MWState where
extractIdentifierList = mwIdentifierList
updateIdentifierList f st = st{ mwIdentifierList = f $ mwIdentifierList st }
+instance HasLogMessages MWState where
+ addLogMessage m s = s{ mwLogMessages = m : mwLogMessages s }
+ getLogMessages = reverse . mwLogMessages
+
--
-- auxiliary functions
--
@@ -187,6 +193,7 @@ parseMediaWiki = do
let categories = if null categoryLinks
then mempty
else B.para $ mconcat $ intersperse B.space categoryLinks
+ reportLogMessages
return $ B.doc $ bs <> categories
--
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index cc3ed6003..5e509178d 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -35,6 +35,7 @@ import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Options
+import Text.Pandoc.Parsing (reportLogMessages)
import Control.Monad.Except (throwError)
import Control.Monad.Reader (runReaderT)
@@ -59,4 +60,5 @@ parseOrg :: PandocMonad m => OrgParser m Pandoc
parseOrg = do
blocks' <- blockList
meta' <- meta
+ reportLogMessages
return $ Pandoc meta' blocks'
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index 0bbe27991..6bed2a547 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -60,7 +60,9 @@ import qualified Data.Set as Set
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
import Text.Pandoc.Definition (Meta (..), nullMeta)
import Text.Pandoc.Options (ReaderOptions (..))
+import Text.Pandoc.Logging
import Text.Pandoc.Parsing (HasHeaderMap (..), HasIdentifierList (..),
+ HasLogMessages (..),
HasLastStrPosition (..), HasQuoteContext (..),
HasReaderOptions (..), ParserContext (..),
QuoteContext (..), SourcePos)
@@ -104,6 +106,7 @@ data OrgParserState = OrgParserState
, orgStateOptions :: ReaderOptions
, orgStateParserContext :: ParserContext
, orgStateTodoSequences :: [TodoSequence]
+ , orgLogMessages :: [LogMessage]
}
data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
@@ -130,6 +133,10 @@ instance HasHeaderMap OrgParserState where
extractHeaderMap = orgStateHeaderMap
updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) }
+instance HasLogMessages OrgParserState where
+ addLogMessage msg st = st{ orgLogMessages = msg : orgLogMessages st }
+ getLogMessages st = reverse $ orgLogMessages st
+
instance Default OrgParserState where
def = defaultOrgParserState
@@ -150,6 +157,7 @@ defaultOrgParserState = OrgParserState
, orgStateOptions = def
, orgStateParserContext = NullState
, orgStateTodoSequences = []
+ , orgLogMessages = []
}
optionsToParserState :: ReaderOptions -> OrgParserState