diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 67 |
1 files changed, 39 insertions, 28 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 388c6fa09..f60a5dd35 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -354,41 +354,41 @@ getInputTokens = do Sources ((_,t):rest) -> tokenizeSources $ Sources ((pos,t):rest) tokenize :: SourcePos -> Text -> [Tok] -tokenize = totoks +tokenize = totoks False where - totoks pos t = + totoks atIsLetter pos t = case T.uncons t of Nothing -> [] Just (c, rest) | c == '\n' -> Tok pos Newline "\n" - : totoks (setSourceColumn (incSourceLine pos 1) 1) rest + : totoks atIsLetter (setSourceColumn (incSourceLine pos 1) 1) rest | isSpaceOrTab c -> let (sps, rest') = T.span isSpaceOrTab t in Tok pos Spaces sps - : totoks (incSourceColumn pos (T.length sps)) + : totoks atIsLetter (incSourceColumn pos (T.length sps)) rest' | isAlphaNum c -> let (ws, rest') = T.span isAlphaNum t in Tok pos Word ws - : totoks (incSourceColumn pos (T.length ws)) rest' + : totoks atIsLetter (incSourceColumn pos (T.length ws)) rest' | c == '%' -> let (cs, rest') = T.break (== '\n') rest in Tok pos Comment ("%" <> cs) - : totoks (incSourceColumn pos (1 + T.length cs)) rest' + : totoks atIsLetter (incSourceColumn pos (1 + T.length cs)) rest' | c == '\\' -> case T.uncons rest of Nothing -> [Tok pos (CtrlSeq " ") "\\"] Just (d, rest') - | isLetterOrAt d -> - -- \makeatletter is common in macro defs; - -- ideally we should make tokenization sensitive - -- to \makeatletter and \makeatother, but this is - -- probably best for now - let (ws, rest'') = T.span isLetterOrAt rest + | isLetter' atIsLetter d -> + let (ws, rest'') = T.span (isLetter' atIsLetter) rest (ss, rest''') = T.span isSpaceOrTab rest'' + atIsLetter' = case ws of + "makeatletter" -> True + "makeatother" -> False + _ -> atIsLetter in Tok pos (CtrlSeq ws) ("\\" <> ws <> ss) - : totoks (incSourceColumn pos + : totoks atIsLetter' (incSourceColumn pos (1 + T.length ws + T.length ss)) rest''' | isSpaceOrTab d || d == '\n' -> let (w1, r1) = T.span isSpaceOrTab rest @@ -401,15 +401,15 @@ tokenize = totoks in case T.uncons r3 of Just ('\n', _) -> Tok pos (CtrlSeq " ") ("\\" <> w1) - : totoks (incSourceColumn pos (T.length ws)) - r1 + : totoks atIsLetter + (incSourceColumn pos (T.length ws)) r1 _ -> Tok pos (CtrlSeq " ") ws - : totoks (incSourceColumn pos (T.length ws)) - r3 + : totoks atIsLetter + (incSourceColumn pos (T.length ws)) r3 | otherwise -> Tok pos (CtrlSeq (T.singleton d)) (T.pack [c,d]) - : totoks (incSourceColumn pos 2) rest' + : totoks atIsLetter (incSourceColumn pos 2) rest' | c == '#' -> case T.uncons rest of Just ('#', t3) -> @@ -417,18 +417,20 @@ tokenize = totoks in case safeRead t1 of Just i -> Tok pos (DeferredArg i) ("##" <> t1) - : totoks (incSourceColumn pos (2 + T.length t1)) t2 + : totoks atIsLetter + (incSourceColumn pos (2 + T.length t1)) t2 Nothing -> Tok pos Symbol "#" : Tok (incSourceColumn pos 1) Symbol "#" - : totoks (incSourceColumn pos 1) t3 + : totoks atIsLetter (incSourceColumn pos 1) t3 _ -> let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest in case safeRead t1 of Just i -> Tok pos (Arg i) ("#" <> t1) - : totoks (incSourceColumn pos (1 + T.length t1)) t2 + : totoks atIsLetter + (incSourceColumn pos (1 + T.length t1)) t2 Nothing -> Tok pos Symbol "#" - : totoks (incSourceColumn pos 1) rest + : totoks atIsLetter (incSourceColumn pos 1) rest | c == '^' -> case T.uncons rest of Just ('^', rest') -> @@ -438,26 +440,35 @@ tokenize = totoks case T.uncons rest'' of Just (e, rest''') | isLowerHex e -> Tok pos Esc2 (T.pack ['^','^',d,e]) - : totoks (incSourceColumn pos 4) rest''' + : totoks atIsLetter + (incSourceColumn pos 4) rest''' _ -> Tok pos Esc1 (T.pack ['^','^',d]) - : totoks (incSourceColumn pos 3) rest'' + : totoks atIsLetter + (incSourceColumn pos 3) rest'' | d < '\128' -> Tok pos Esc1 (T.pack ['^','^',d]) - : totoks (incSourceColumn pos 3) rest'' + : totoks atIsLetter + (incSourceColumn pos 3) rest'' _ -> Tok pos Symbol "^" : Tok (incSourceColumn pos 1) Symbol "^" : - totoks (incSourceColumn pos 2) rest' + totoks atIsLetter (incSourceColumn pos 2) rest' _ -> Tok pos Symbol "^" - : totoks (incSourceColumn pos 1) rest + : totoks atIsLetter (incSourceColumn pos 1) rest | otherwise -> - Tok pos Symbol (T.singleton c) : totoks (incSourceColumn pos 1) rest + Tok pos Symbol (T.singleton c) : + totoks atIsLetter (incSourceColumn pos 1) rest isSpaceOrTab :: Char -> Bool isSpaceOrTab ' ' = True isSpaceOrTab '\t' = True isSpaceOrTab _ = False +-- First parameter is True if @ is letter +isLetter' :: Bool -> Char -> Bool +isLetter' True '@' = True +isLetter' _ c = isLetter c + isLetterOrAt :: Char -> Bool isLetterOrAt '@' = True isLetterOrAt c = isLetter c |
