summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2023-01-05 21:14:02 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2023-01-05 21:14:02 -0800
commit0d891afab65b6928ace302d1a110ea9303c4abfb (patch)
treea32f0e0c624e8b5f1f56f7a66b150a273823505c
parent7a82686adcf6efd68b32c5e471b3059be5085165 (diff)
isURI: don't require non-ASCII characters to be escaped.
Closes #8508.
-rw-r--r--src/Text/Pandoc/URI.hs6
-rw-r--r--test/command/8508.md14
2 files changed, 18 insertions, 2 deletions
diff --git a/src/Text/Pandoc/URI.hs b/src/Text/Pandoc/URI.hs
index 345ada768..7addd2844 100644
--- a/src/Text/Pandoc/URI.hs
+++ b/src/Text/Pandoc/URI.hs
@@ -20,7 +20,7 @@ import qualified Network.HTTP.Types as HTTP
import qualified Text.Pandoc.UTF8 as UTF8
import qualified Data.Text as T
import qualified Data.Set as Set
-import Data.Char (isSpace)
+import Data.Char (isSpace, isAscii)
import Network.URI (URI (uriScheme), parseURI, escapeURIString)
urlEncode :: T.Text -> T.Text
@@ -90,7 +90,9 @@ schemes = Set.fromList
-- | Check if the string is a valid URL with a IANA or frequently used but
-- unofficial scheme (see @schemes@).
isURI :: T.Text -> Bool
-isURI = maybe False hasKnownScheme . parseURI . T.unpack
+isURI =
+ -- we URI-escape non-ASCII characters because otherwise parseURI will choke:
+ maybe False hasKnownScheme . parseURI . escapeURIString isAscii . T.unpack
where
hasKnownScheme = (`Set.member` schemes) . T.toLower .
T.filter (/= ':') . T.pack . uriScheme
diff --git a/test/command/8508.md b/test/command/8508.md
new file mode 100644
index 000000000..6a8932a1d
--- /dev/null
+++ b/test/command/8508.md
@@ -0,0 +1,14 @@
+```
+% pandoc -t man
+SEE ALSO
+========
+
+* [Milk](https://en.wikipedia.org/wiki/Milk)
+* [EBNF](https://en.wikipedia.org/wiki/Extended_Backus–Naur_form)
+^D
+.SH SEE ALSO
+.IP \[bu] 2
+Milk (https://en.wikipedia.org/wiki/Milk)
+.IP \[bu] 2
+EBNF (https://en.wikipedia.org/wiki/Extended_Backus–Naur_form)
+```