summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Parsing.hs67
1 files changed, 39 insertions, 28 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 388c6fa09..f60a5dd35 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -354,41 +354,41 @@ getInputTokens = do
Sources ((_,t):rest) -> tokenizeSources $ Sources ((pos,t):rest)
tokenize :: SourcePos -> Text -> [Tok]
-tokenize = totoks
+tokenize = totoks False
where
- totoks pos t =
+ totoks atIsLetter pos t =
case T.uncons t of
Nothing -> []
Just (c, rest)
| c == '\n' ->
Tok pos Newline "\n"
- : totoks (setSourceColumn (incSourceLine pos 1) 1) rest
+ : totoks atIsLetter (setSourceColumn (incSourceLine pos 1) 1) rest
| isSpaceOrTab c ->
let (sps, rest') = T.span isSpaceOrTab t
in Tok pos Spaces sps
- : totoks (incSourceColumn pos (T.length sps))
+ : totoks atIsLetter (incSourceColumn pos (T.length sps))
rest'
| isAlphaNum c ->
let (ws, rest') = T.span isAlphaNum t
in Tok pos Word ws
- : totoks (incSourceColumn pos (T.length ws)) rest'
+ : totoks atIsLetter (incSourceColumn pos (T.length ws)) rest'
| c == '%' ->
let (cs, rest') = T.break (== '\n') rest
in Tok pos Comment ("%" <> cs)
- : totoks (incSourceColumn pos (1 + T.length cs)) rest'
+ : totoks atIsLetter (incSourceColumn pos (1 + T.length cs)) rest'
| c == '\\' ->
case T.uncons rest of
Nothing -> [Tok pos (CtrlSeq " ") "\\"]
Just (d, rest')
- | isLetterOrAt d ->
- -- \makeatletter is common in macro defs;
- -- ideally we should make tokenization sensitive
- -- to \makeatletter and \makeatother, but this is
- -- probably best for now
- let (ws, rest'') = T.span isLetterOrAt rest
+ | isLetter' atIsLetter d ->
+ let (ws, rest'') = T.span (isLetter' atIsLetter) rest
(ss, rest''') = T.span isSpaceOrTab rest''
+ atIsLetter' = case ws of
+ "makeatletter" -> True
+ "makeatother" -> False
+ _ -> atIsLetter
in Tok pos (CtrlSeq ws) ("\\" <> ws <> ss)
- : totoks (incSourceColumn pos
+ : totoks atIsLetter' (incSourceColumn pos
(1 + T.length ws + T.length ss)) rest'''
| isSpaceOrTab d || d == '\n' ->
let (w1, r1) = T.span isSpaceOrTab rest
@@ -401,15 +401,15 @@ tokenize = totoks
in case T.uncons r3 of
Just ('\n', _) ->
Tok pos (CtrlSeq " ") ("\\" <> w1)
- : totoks (incSourceColumn pos (T.length ws))
- r1
+ : totoks atIsLetter
+ (incSourceColumn pos (T.length ws)) r1
_ ->
Tok pos (CtrlSeq " ") ws
- : totoks (incSourceColumn pos (T.length ws))
- r3
+ : totoks atIsLetter
+ (incSourceColumn pos (T.length ws)) r3
| otherwise ->
Tok pos (CtrlSeq (T.singleton d)) (T.pack [c,d])
- : totoks (incSourceColumn pos 2) rest'
+ : totoks atIsLetter (incSourceColumn pos 2) rest'
| c == '#' ->
case T.uncons rest of
Just ('#', t3) ->
@@ -417,18 +417,20 @@ tokenize = totoks
in case safeRead t1 of
Just i ->
Tok pos (DeferredArg i) ("##" <> t1)
- : totoks (incSourceColumn pos (2 + T.length t1)) t2
+ : totoks atIsLetter
+ (incSourceColumn pos (2 + T.length t1)) t2
Nothing -> Tok pos Symbol "#"
: Tok (incSourceColumn pos 1) Symbol "#"
- : totoks (incSourceColumn pos 1) t3
+ : totoks atIsLetter (incSourceColumn pos 1) t3
_ ->
let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest
in case safeRead t1 of
Just i ->
Tok pos (Arg i) ("#" <> t1)
- : totoks (incSourceColumn pos (1 + T.length t1)) t2
+ : totoks atIsLetter
+ (incSourceColumn pos (1 + T.length t1)) t2
Nothing -> Tok pos Symbol "#"
- : totoks (incSourceColumn pos 1) rest
+ : totoks atIsLetter (incSourceColumn pos 1) rest
| c == '^' ->
case T.uncons rest of
Just ('^', rest') ->
@@ -438,26 +440,35 @@ tokenize = totoks
case T.uncons rest'' of
Just (e, rest''') | isLowerHex e ->
Tok pos Esc2 (T.pack ['^','^',d,e])
- : totoks (incSourceColumn pos 4) rest'''
+ : totoks atIsLetter
+ (incSourceColumn pos 4) rest'''
_ ->
Tok pos Esc1 (T.pack ['^','^',d])
- : totoks (incSourceColumn pos 3) rest''
+ : totoks atIsLetter
+ (incSourceColumn pos 3) rest''
| d < '\128' ->
Tok pos Esc1 (T.pack ['^','^',d])
- : totoks (incSourceColumn pos 3) rest''
+ : totoks atIsLetter
+ (incSourceColumn pos 3) rest''
_ -> Tok pos Symbol "^" :
Tok (incSourceColumn pos 1) Symbol "^" :
- totoks (incSourceColumn pos 2) rest'
+ totoks atIsLetter (incSourceColumn pos 2) rest'
_ -> Tok pos Symbol "^"
- : totoks (incSourceColumn pos 1) rest
+ : totoks atIsLetter (incSourceColumn pos 1) rest
| otherwise ->
- Tok pos Symbol (T.singleton c) : totoks (incSourceColumn pos 1) rest
+ Tok pos Symbol (T.singleton c) :
+ totoks atIsLetter (incSourceColumn pos 1) rest
isSpaceOrTab :: Char -> Bool
isSpaceOrTab ' ' = True
isSpaceOrTab '\t' = True
isSpaceOrTab _ = False
+-- First parameter is True if @ is letter
+isLetter' :: Bool -> Char -> Bool
+isLetter' True '@' = True
+isLetter' _ c = isLetter c
+
isLetterOrAt :: Char -> Bool
isLetterOrAt '@' = True
isLetterOrAt c = isLetter c