summaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorAlbert Krewinkel <albert@zeitkraut.de>2022-03-12 23:16:31 +0100
committerAlbert Krewinkel <albert@zeitkraut.de>2022-03-12 23:16:31 +0100
commitedfe34c86c595f253467304ab54061a56cf5045b (patch)
tree9ee590a8c2b0fcdb8f62ece7df0bf0464dd7fb85 /src/Text
parent699336cf5bf787b6e2766c6133852bbcff0c8264 (diff)
Document more functions in T.P.Parsing and T.P.Shared.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Parsing/GridTable.hs12
-rw-r--r--src/Text/Pandoc/Parsing/Lists.hs4
-rw-r--r--src/Text/Pandoc/Parsing/Smart.hs45
-rw-r--r--src/Text/Pandoc/Shared.hs26
4 files changed, 76 insertions, 11 deletions
diff --git a/src/Text/Pandoc/Parsing/GridTable.hs b/src/Text/Pandoc/Parsing/GridTable.hs
index ebc12197f..74ad64d78 100644
--- a/src/Text/Pandoc/Parsing/GridTable.hs
+++ b/src/Text/Pandoc/Parsing/GridTable.hs
@@ -40,16 +40,18 @@ import qualified Text.Pandoc.Builder as B
-- line).
gridTableWith :: (Monad m, Monad mf, HasLastStrPosition st, HasReaderOptions st)
=> ParserT Sources st m (mf Blocks) -- ^ Block list parser
- -> Bool -- ^ Headerless table
+ -> Bool -- ^ Headerless table
-> ParserT Sources st m (mf Blocks)
gridTableWith blocks headless =
tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
(gridTableSep '-') gridTableFooter
+-- | Like @'gridTableWith'@, but returns 'TableComponents' instead of a
+-- Table.
gridTableWith' :: (Monad m, Monad mf,
HasReaderOptions st, HasLastStrPosition st)
- => ParserT Sources st m (mf Blocks) -- ^ Block list parser
- -> Bool -- ^ Headerless table
+ => ParserT Sources st m (mf Blocks) -- ^ Block list parser
+ -> Bool -- ^ Headerless table
-> ParserT Sources st m (TableComponents mf)
gridTableWith' blocks headless =
tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
@@ -59,6 +61,10 @@ gridTableSplitLine :: [Int] -> Text -> [Text]
gridTableSplitLine indices line = map removeFinalBar $ tail $
splitTextByIndices (init indices) $ trimr line
+-- | Parses a grid segment, where the grid line is made up from the
+-- given char and terminated with a plus (@+@). The grid line may begin
+-- and/or end with a colon, signaling column alignment. Returns the size
+-- of the grid part and column alignment
gridPart :: Monad m => Char -> ParserT Sources st m ((Int, Int), Alignment)
gridPart ch = do
leftColon <- option False (True <$ char ':')
diff --git a/src/Text/Pandoc/Parsing/Lists.hs b/src/Text/Pandoc/Parsing/Lists.hs
index d3f2a2e6f..09a06e440 100644
--- a/src/Text/Pandoc/Parsing/Lists.hs
+++ b/src/Text/Pandoc/Parsing/Lists.hs
@@ -56,7 +56,8 @@ import qualified Data.Map as M
import qualified Data.Text as T
-- | Parses a roman numeral (uppercase or lowercase), returns number.
-romanNumeral :: (Stream s m Char, UpdateSourcePos s Char) => Bool -- ^ Uppercase if true
+romanNumeral :: (Stream s m Char, UpdateSourcePos s Char)
+ => Bool -- ^ Uppercase if true
-> ParserT s st m Int
romanNumeral upperCase = do
let rchar uc = char $ if upperCase then uc else toLower uc
@@ -212,4 +213,3 @@ orderedListMarker style delim = do
TwoParens -> inTwoParens
(start, _, _) <- context num
return start
-
diff --git a/src/Text/Pandoc/Parsing/Smart.hs b/src/Text/Pandoc/Parsing/Smart.hs
index 58254d2a6..13cb18efe 100644
--- a/src/Text/Pandoc/Parsing/Smart.hs
+++ b/src/Text/Pandoc/Parsing/Smart.hs
@@ -46,6 +46,10 @@ import Text.Parsec
import qualified Text.Pandoc.Builder as B
+-- | Parses various ASCII punctuation, quotes, and apostrophe in a smart
+-- way, inferring their semantic meaning.
+--
+-- Fails unless the 'Ext_smart' extension has been enabled.
smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st,
HasQuoteContext st m,
Stream s m Char, UpdateSourcePos s Char)
@@ -55,12 +59,16 @@ smartPunctuation inlineParser = do
guardEnabled Ext_smart
choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ]
+-- | Parses inline text in single or double quotes, assumes English
+-- quoting conventions.
quoted :: (HasLastStrPosition st, HasQuoteContext st m,
Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
+-- | Parses inline text in single quotes, assumes English quoting
+-- conventions.
singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m,
Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
@@ -72,6 +80,8 @@ singleQuoted inlineParser = do
(withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)))
<|> pure "\8217"
+-- | Parses inline text in double quotes; assumes English quoting
+-- conventions.
doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st,
Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
@@ -89,6 +99,14 @@ charOrRef cs =
guard (c `elem` cs)
return c)
+-- | Succeeds if the parser is
+--
+-- * not within single quoted text;
+-- * not directly after a word; and
+-- * looking at an opening single quote char that's not followed by a
+-- space.
+--
+-- Gobbles the quote character on success.
singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m,
Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m ()
@@ -106,6 +124,16 @@ singleQuoteEnd = try $ do
charOrRef "'\8217\146"
notFollowedBy alphaNum
+-- | Succeeds if the parser is
+--
+-- * not within a double quoted text;
+--
+-- * not directly after a word; and
+--
+-- * looking at an opening double quote char that's not followed by a
+-- space.
+--
+-- Gobbles the quote character on success.
doubleQuoteStart :: (HasLastStrPosition st,
HasQuoteContext st m,
Stream s m Char, UpdateSourcePos s Char)
@@ -116,20 +144,33 @@ doubleQuoteStart = do
try $ do charOrRef "\"\8220\147"
void $ lookAhead (satisfy (not . isSpaceChar))
+-- | Parses a closing quote character.
doubleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m ()
doubleQuoteEnd = void (charOrRef "\"\8221\148")
-apostrophe :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
+-- | Parses an ASCII apostrophe (@'@) or right single quotation mark and
+-- returns a RIGHT SINGLE QUOtatiON MARK character.
+apostrophe :: (Stream s m Char, UpdateSourcePos s Char)
+ => ParserT s st m Inlines
apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217")
-doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
+-- | Parses an ASCII quotation mark character and returns a RIGHT DOUBLE
+-- QUOTATION MARK.
+doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char)
+ => ParserT s st m Inlines
doubleCloseQuote = B.str "\8221" <$ char '"'
+-- | Parses three dots as HORIZONTAL ELLIPSIS.
ellipses :: (Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
ellipses = try (string "..." >> return (B.str "\8230"))
+-- | Parses two hyphens as EN DASH and three as EM DASH.
+--
+-- If the extension @'Ext_old_dashes'@ is enabled, then two hyphens are
+-- parsed as EM DASH, and one hyphen is parsed as EN DASH if it is
+-- followed by a digit.
dash :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
=> ParserT s st m Inlines
dash = try $ do
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 5b833dd34..26c922a04 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -152,6 +152,9 @@ splitTextBy isSep t
| otherwise = let (first, rest) = T.break isSep t
in first : splitTextBy isSep (T.dropWhile isSep rest)
+-- | Split text at the given widths. Note that the break points are
+-- /not/ indices but text widths, which will be different for East Asian
+-- characters, emojis, etc.
splitTextByIndices :: [Int] -> T.Text -> [T.Text]
splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns)) . T.unpack
where
@@ -160,9 +163,12 @@ splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns)) . T.unpack
let (first, rest) = splitAt' x cs
in T.pack first : splitTextByRelIndices xs rest
--- Note: don't replace this with T.splitAt, which is not sensitive
+-- | Returns a pair whose first element is a prefix of @t@ and that has
+-- width @n@, and whose second is the remainder of the string.
+--
+-- Note: Do *not* replace this with 'T.splitAt', which is not sensitive
-- to character widths!
-splitAt' :: Int -> [Char] -> ([Char],[Char])
+splitAt' :: Int {-^ n -} -> [Char] {-^ t -} -> ([Char],[Char])
splitAt' _ [] = ([],[])
splitAt' n xs | n <= 0 = ([],xs)
splitAt' n (x:xs) = (x:ys,zs)
@@ -175,7 +181,11 @@ ordNub l = go Set.empty l
go s (x:xs) = if x `Set.member` s then go s xs
else x : go (Set.insert x s) xs
-findM :: forall m t a. (Monad m, Foldable t) => (a -> m Bool) -> t a -> m (Maybe a)
+-- | Returns the last element in a foldable structure for that the
+-- monadic predicate holds true, and @Nothing@ if no such element
+-- exists.
+findM :: forall m t a. (Monad m, Foldable t)
+ => (a -> m Bool) -> t a -> m (Maybe a)
findM p = foldr go (pure Nothing)
where
go :: a -> m (Maybe a) -> m (Maybe a)
@@ -191,6 +201,7 @@ findM p = foldr go (pure Nothing)
inquotes :: T.Text -> T.Text
inquotes txt = T.cons '\"' (T.snoc txt '\"')
+-- | Like @'show'@, but returns a 'T.Text' instead of a 'String'.
tshow :: Show a => a -> T.Text
tshow = T.pack . show
@@ -206,6 +217,8 @@ notElemText c = T.all (/= c)
stripTrailingNewlines :: T.Text -> T.Text
stripTrailingNewlines = T.dropWhileEnd (== '\n')
+-- | Returns 'True' for an ASCII whitespace character, viz. space,
+-- carriage return, newline, and horizontal tab.
isWS :: Char -> Bool
isWS ' ' = True
isWS '\r' = True
@@ -317,6 +330,7 @@ crFilter = T.filter (/= '\r')
normalizeDate :: T.Text -> Maybe T.Text
normalizeDate = fmap T.pack . normalizeDate' . T.unpack
+-- | Like @'normalizeDate'@, but acts on 'String' instead of 'T.Text'.
normalizeDate' :: String -> Maybe String
normalizeDate' s = fmap (formatTime defaultTimeLocale "%F")
(msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day)
@@ -382,10 +396,12 @@ removeFormatting = query go . walk (deNote . deQuote)
go LineBreak = [Space]
go _ = []
+-- | Replaces 'Note' elements with empty strings.
deNote :: Inline -> Inline
deNote (Note _) = Str ""
deNote x = x
+-- | Turns links into spans, keeping just the link text.
deLink :: Inline -> Inline
deLink (Link _ ils _) = Span nullAttr ils
deLink x = x
@@ -412,6 +428,8 @@ stringify = query go . walk fixInlines
fixInlines (q@Quoted{}) = deQuote q
fixInlines x = x
+-- | Unwrap 'Quoted' inline elements, enclosing the contents with
+-- English-style Unicode quotes instead.
deQuote :: Inline -> Inline
deQuote (Quoted SingleQuote xs) =
Span ("",[],[]) (Str "\8216" : xs ++ [Str "\8217"])
@@ -813,7 +831,7 @@ collapseFilePath = Posix.joinPath . reverse . foldl' go [] . splitDirectories
isSingleton _ = Nothing
checkPathSeperator = fmap isPathSeparator . isSingleton
--- Convert the path part of a file: URI to a regular path.
+-- | Converts the path part of a file: URI to a regular path.
-- On windows, @/c:/foo@ should be @c:/foo@.
-- On linux, @/foo@ should be @/foo@.
uriPathToPath :: T.Text -> FilePath