summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2022-04-24 12:25:04 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2022-04-24 12:25:04 -0700
commit16f0316fbaa4d667ba40772969ab8e28fea6a493 (patch)
tree06e3c874011e16230142d0f07be0da3382fa7eec /src
parentb1990b065790473dd7bf13075072f86680ef3a8b (diff)
Add tsv (tab separated values) as an input format.
We us ethe simple spec at <https://www.iana.org/assignments/media-types/text/tab-separated-values>. API change: Text.Pandoc.Readers.CSV now exports `readTSV`. Internal change: In Text.Pandoc.CSV, CSVOptions has changed so that csvQuote takes a Maybe value. Closes #7974.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/App.hs3
-rw-r--r--src/Text/Pandoc/App/FormatHeuristics.hs1
-rw-r--r--src/Text/Pandoc/CSV.hs30
-rw-r--r--src/Text/Pandoc/Readers.hs2
-rw-r--r--src/Text/Pandoc/Readers/CSV.hs33
-rw-r--r--src/Text/Pandoc/Readers/RST.hs4
6 files changed, 53 insertions, 20 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 94b242cb4..1a694abb0 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -258,7 +258,8 @@ convertWithOpts opts = do
let convertTabs = tabFilter (if optPreserveTabs opts ||
readerNameBase == "t2t" ||
- readerNameBase == "man"
+ readerNameBase == "man" ||
+ readerNameBase == "tsv"
then 0
else optTabStop opts)
diff --git a/src/Text/Pandoc/App/FormatHeuristics.hs b/src/Text/Pandoc/App/FormatHeuristics.hs
index ebf8db4c5..c6f187363 100644
--- a/src/Text/Pandoc/App/FormatHeuristics.hs
+++ b/src/Text/Pandoc/App/FormatHeuristics.hs
@@ -86,6 +86,7 @@ formatFromFilePath x =
".xhtml" -> Just "html"
".ipynb" -> Just "ipynb"
".csv" -> Just "csv"
+ ".tsv" -> Just "tsv"
".bib" -> Just "biblatex"
['.',y] | y `elem` ['1'..'9'] -> Just "man"
_ -> Nothing
diff --git a/src/Text/Pandoc/CSV.hs b/src/Text/Pandoc/CSV.hs
index 858dd5f6d..963fead0d 100644
--- a/src/Text/Pandoc/CSV.hs
+++ b/src/Text/Pandoc/CSV.hs
@@ -16,7 +16,7 @@ module Text.Pandoc.CSV (
ParseError
) where
-import Control.Monad (unless, void)
+import Control.Monad (unless, void, mzero)
import Data.Text (Text)
import qualified Data.Text as T
import Text.Parsec
@@ -24,7 +24,7 @@ import Text.Parsec.Text (Parser)
data CSVOptions = CSVOptions{
csvDelim :: Char
- , csvQuote :: Char
+ , csvQuote :: Maybe Char
, csvKeepSpace :: Bool -- treat whitespace following delim as significant
, csvEscape :: Maybe Char -- default is to double up quote
} deriving (Read, Show)
@@ -32,7 +32,7 @@ data CSVOptions = CSVOptions{
defaultCSVOptions :: CSVOptions
defaultCSVOptions = CSVOptions{
csvDelim = ','
- , csvQuote = '"'
+ , csvQuote = Just '"'
, csvKeepSpace = False
, csvEscape = Nothing }
@@ -53,18 +53,24 @@ pCSVCell :: CSVOptions -> Parser Text
pCSVCell opts = pCSVQuotedCell opts <|> pCSVUnquotedCell opts
pCSVQuotedCell :: CSVOptions -> Parser Text
-pCSVQuotedCell opts = do
- char (csvQuote opts)
- res <- many (satisfy (\c -> c /= csvQuote opts &&
- Just c /= csvEscape opts) <|> escaped opts)
- char (csvQuote opts)
- return $ T.pack res
+pCSVQuotedCell opts =
+ case csvQuote opts of
+ Nothing -> mzero
+ Just quotechar -> do
+ char quotechar
+ res <- many (satisfy (\c -> c /= quotechar &&
+ Just c /= csvEscape opts) <|> escaped opts)
+ char quotechar
+ return $ T.pack res
escaped :: CSVOptions -> Parser Char
-escaped opts = try $
+escaped opts =
case csvEscape opts of
- Nothing -> char (csvQuote opts) >> char (csvQuote opts)
- Just c -> char c >> noneOf "\r\n"
+ Nothing ->
+ case csvQuote opts of
+ Nothing -> mzero
+ Just q -> try $ char q >> char q
+ Just c -> try $ char c >> noneOf "\r\n"
pCSVUnquotedCell :: CSVOptions -> Parser Text
pCSVUnquotedCell opts = T.pack <$>
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index 95f5f5b61..7abd1d024 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -52,6 +52,7 @@ module Text.Pandoc.Readers
, readFB2
, readIpynb
, readCSV
+ , readTSV
, readCslJson
, readBibTeX
, readBibLaTeX
@@ -152,6 +153,7 @@ readers = [("native" , TextReader readNative)
,("fb2" , TextReader readFB2)
,("ipynb" , TextReader readIpynb)
,("csv" , TextReader readCSV)
+ ,("tsv" , TextReader readTSV)
,("csljson" , TextReader readCslJson)
,("bibtex" , TextReader readBibTeX)
,("biblatex" , TextReader readBibLaTeX)
diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs
index 0fcf4bc35..23e0f7448 100644
--- a/src/Text/Pandoc/Readers/CSV.hs
+++ b/src/Text/Pandoc/Readers/CSV.hs
@@ -10,11 +10,14 @@
Stability : alpha
Portability : portable
-Conversion from CSV to a 'Pandoc' table.
+Conversion from CSV or TSV to a 'Pandoc' table.
-}
-module Text.Pandoc.Readers.CSV ( readCSV ) where
+module Text.Pandoc.Readers.CSV (
+ readCSV,
+ readTSV
+) where
import qualified Data.Text as T
-import Text.Pandoc.CSV (parseCSV, defaultCSVOptions)
+import Text.Pandoc.CSV (parseCSV, defaultCSVOptions, CSVOptions(..))
import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad)
@@ -22,14 +25,34 @@ import Text.Pandoc.Error
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
import Text.Pandoc.Options (ReaderOptions)
import Control.Monad.Except (throwError)
+import Data.Text (Text)
readCSV :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
-> a
-> m Pandoc
readCSV _opts s = do
- let txt = sourcesToText $ toSources s
- case parseCSV defaultCSVOptions txt of
+ readCSVWith defaultCSVOptions $ sourcesToText $ toSources s
+
+readTSV :: (PandocMonad m, ToSources a)
+ => ReaderOptions -- ^ Reader options
+ -> a
+ -> m Pandoc
+readTSV _opts s = do
+ readCSVWith tsvOpts $ sourcesToText $ toSources s
+ where
+ tsvOpts = CSVOptions{
+ csvDelim = '\t',
+ csvQuote = Nothing,
+ csvKeepSpace = False,
+ csvEscape = Nothing }
+
+readCSVWith :: PandocMonad m
+ => CSVOptions
+ -> Text
+ -> m Pandoc
+readCSVWith csvopts txt = do
+ case parseCSV csvopts txt of
Right (r:rs) -> return $ B.doc $ B.table capt
(zip aligns widths)
(TableHead nullAttr hdrs)
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index f13b70738..b87c0ab71 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -845,8 +845,8 @@ csvTableDirective top fields rawcsv = do
_ -> ','
, csvQuote = case trim <$> lookup "quote" fields of
Just (T.unpack -> [c])
- -> c
- _ -> '"'
+ -> Just c
+ _ -> Just '"'
, csvEscape = case trim <$> lookup "escape" fields of
Just (T.unpack -> [c])
-> Just c