From fdfa9fca68921ea67c84749cc420ab49ad77a7fc Mon Sep 17 00:00:00 2001 From: Ruqi Date: Fri, 13 Jan 2023 13:02:56 +0700 Subject: Refine "blending" rules for MediaWiki links The rules for "blending" characters outside a link into the link are described here: https://en.wikipedia.org/wiki/Help:Wikitext#Blend_link These pose a problem for CJK languages, which generally don't have spaces after links. However, it turns out that the blending behavior, as implemented on Wikipedia, is (contrary to the documentation) only for ASCII letters. This commit implements that restriction, which fixes the problem for CJK. (#8525) --- src/Text/Pandoc/Readers/MediaWiki.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 8953c0a46..423758c60 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -19,7 +19,7 @@ module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where import Control.Monad import Control.Monad.Except (throwError) -import Data.Char (isDigit, isSpace) +import Data.Char (isAscii, isDigit, isLetter, isSpace) import qualified Data.Foldable as F import Data.List (intersperse) import Data.Maybe (fromMaybe, maybeToList) @@ -664,7 +664,7 @@ internalLink = try $ do -- [[Help:Contents|] -> "Contents" <|> return (B.text $ T.drop 1 $ T.dropWhile (/=':') pagename) ) sym "]]" - linktrail <- B.text <$> manyChar letter + linktrail <- B.text <$> manyChar (satisfy (\c -> isAscii c && isLetter c)) let link = B.link (addUnderscores pagename) "wikilink" (label <> linktrail) if "Category:" `T.isPrefixOf` pagename then do -- cgit v1.2.3