-
Notifications
You must be signed in to change notification settings - Fork 82
/
Copy pathAlexWrappers.hs
432 lines (351 loc) · 15.6 KB
/
AlexWrappers.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
-- -----------------------------------------------------------------------------
-- Alex wrapper code.
--
-- This code is in the PUBLIC DOMAIN; you may copy it freely and use
-- it for any purpose whatsoever.
#if defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING)
import Control.Applicative as App (Applicative (..))
#endif
import Data.Word (Word8)
#if defined(ALEX_BASIC_BYTESTRING) || defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING)
import Data.Int (Int64)
import qualified Data.Char
import qualified Data.ByteString.Lazy as ByteString
import qualified Data.ByteString.Internal as ByteString (w2c)
#elif defined(ALEX_STRICT_BYTESTRING)
import qualified Data.Char
import qualified Data.ByteString as ByteString
import qualified Data.ByteString.Internal as ByteString hiding (ByteString)
import qualified Data.ByteString.Unsafe as ByteString
#else
import Data.Char (ord)
import qualified Data.Bits
-- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
utf8Encode :: Char -> [Word8]
utf8Encode = uncurry (:) . utf8Encode'
utf8Encode' :: Char -> (Word8, [Word8])
utf8Encode' c = case go (ord c) of
(x, xs) -> (fromIntegral x, map fromIntegral xs)
where
go oc
| oc <= 0x7f = ( oc
, [
])
| oc <= 0x7ff = ( 0xc0 + (oc `Data.Bits.shiftR` 6)
, [0x80 + oc Data.Bits..&. 0x3f
])
| oc <= 0xffff = ( 0xe0 + (oc `Data.Bits.shiftR` 12)
, [0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
, 0x80 + oc Data.Bits..&. 0x3f
])
| otherwise = ( 0xf0 + (oc `Data.Bits.shiftR` 18)
, [0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f)
, 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
, 0x80 + oc Data.Bits..&. 0x3f
])
#endif
type Byte = Word8
-- -----------------------------------------------------------------------------
-- The input type
#if defined(ALEX_POSN) || defined(ALEX_MONAD) || defined(ALEX_GSCAN)
type AlexInput = (AlexPosn, -- current position,
Char, -- previous char
[Byte], -- pending bytes on current char
String) -- current input string
ignorePendingBytes :: AlexInput -> AlexInput
ignorePendingBytes (p,c,_ps,s) = (p,c,[],s)
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (_p,c,_bs,_s) = c
alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s))
alexGetByte (_,_,[],[]) = Nothing
alexGetByte (p,_,[],(c:s)) = let p' = alexMove p c
in case utf8Encode' c of
(b, bs) -> p' `seq` Just (b, (p', c, bs, s))
#endif
#if defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING)
type AlexInput = (AlexPosn, -- current position,
Char, -- previous char
ByteString.ByteString, -- current input string
Int64) -- bytes consumed so far
ignorePendingBytes :: AlexInput -> AlexInput
ignorePendingBytes i = i -- no pending bytes when lexing bytestrings
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (_,c,_,_) = c
alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
alexGetByte (p,_,cs,n) =
case ByteString.uncons cs of
Nothing -> Nothing
Just (b, cs') ->
let c = ByteString.w2c b
p' = alexMove p c
n' = n+1
in p' `seq` cs' `seq` n' `seq` Just (b, (p', c, cs',n'))
#endif
#ifdef ALEX_BASIC_BYTESTRING
data AlexInput = AlexInput { alexChar :: {-# UNPACK #-} !Char, -- previous char
alexStr :: !ByteString.ByteString, -- current input string
alexBytePos :: {-# UNPACK #-} !Int64} -- bytes consumed so far
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar = alexChar
alexGetByte (AlexInput {alexStr=cs,alexBytePos=n}) =
case ByteString.uncons cs of
Nothing -> Nothing
Just (c, rest) ->
Just (c, AlexInput {
alexChar = ByteString.w2c c,
alexStr = rest,
alexBytePos = n+1})
#endif
#ifdef ALEX_STRICT_BYTESTRING
data AlexInput = AlexInput { alexChar :: {-# UNPACK #-} !Char,
alexStr :: {-# UNPACK #-} !ByteString.ByteString,
alexBytePos :: {-# UNPACK #-} !Int}
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar = alexChar
alexGetByte (AlexInput {alexStr=cs,alexBytePos=n}) =
case ByteString.uncons cs of
Nothing -> Nothing
Just (c, rest) ->
Just (c, AlexInput {
alexChar = ByteString.w2c c,
alexStr = rest,
alexBytePos = n+1})
#endif
-- -----------------------------------------------------------------------------
-- Token positions
-- `Posn' records the location of a token in the input text. It has three
-- fields: the address (number of characters preceding the token), line number
-- and column of a token within the file. `start_pos' gives the position of the
-- start of the file and `eof_pos' a standard encoding for the end of file.
-- `move_pos' calculates the new position after traversing a given character,
-- assuming the usual eight character tab stops.
#if defined(ALEX_POSN) || defined(ALEX_MONAD) || defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) || defined(ALEX_GSCAN)
data AlexPosn = AlexPn !Int !Int !Int
deriving (Eq,Show)
alexStartPos :: AlexPosn
alexStartPos = AlexPn 0 1 1
alexMove :: AlexPosn -> Char -> AlexPosn
alexMove (AlexPn a l c) '\t' = AlexPn (a+1) l (c+alex_tab_size-((c-1) `mod` alex_tab_size))
alexMove (AlexPn a l _) '\n' = AlexPn (a+1) (l+1) 1
alexMove (AlexPn a l c) _ = AlexPn (a+1) l (c+1)
#endif
-- -----------------------------------------------------------------------------
-- Monad (default and with ByteString input)
#if defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING)
data AlexState = AlexState {
alex_pos :: !AlexPosn, -- position at current input location
#ifndef ALEX_MONAD_BYTESTRING
alex_inp :: String, -- the current input
alex_chr :: !Char, -- the character before the input
alex_bytes :: [Byte],
#else /* ALEX_MONAD_BYTESTRING */
alex_bpos:: !Int64, -- bytes consumed so far
alex_inp :: ByteString.ByteString, -- the current input
alex_chr :: !Char, -- the character before the input
#endif /* ALEX_MONAD_BYTESTRING */
alex_scd :: !Int -- the current startcode
#ifdef ALEX_MONAD_USER_STATE
, alex_ust :: AlexUserState -- AlexUserState will be defined in the user program
#endif
}
-- Compile with -funbox-strict-fields for best results!
#ifndef ALEX_MONAD_BYTESTRING
runAlex :: String -> Alex a -> Either String a
runAlex input__ (Alex f)
= case f (AlexState {alex_bytes = [],
#else /* ALEX_MONAD_BYTESTRING */
runAlex :: ByteString.ByteString -> Alex a -> Either String a
runAlex input__ (Alex f)
= case f (AlexState {alex_bpos = 0,
#endif /* ALEX_MONAD_BYTESTRING */
alex_pos = alexStartPos,
alex_inp = input__,
alex_chr = '\n',
#ifdef ALEX_MONAD_USER_STATE
alex_ust = alexInitUserState,
#endif
alex_scd = 0}) of Left msg -> Left msg
Right ( _, a ) -> Right a
newtype Alex a = Alex { unAlex :: AlexState -> Either String (AlexState, a) }
instance Functor Alex where
fmap f a = Alex $ \s -> case unAlex a s of
Left msg -> Left msg
Right (s', a') -> Right (s', f a')
instance Applicative Alex where
pure a = Alex $ \s -> Right (s, a)
fa <*> a = Alex $ \s -> case unAlex fa s of
Left msg -> Left msg
Right (s', f) -> case unAlex a s' of
Left msg -> Left msg
Right (s'', b) -> Right (s'', f b)
instance Monad Alex where
m >>= k = Alex $ \s -> case unAlex m s of
Left msg -> Left msg
Right (s',a) -> unAlex (k a) s'
return = App.pure
alexGetInput :: Alex AlexInput
alexGetInput
#ifndef ALEX_MONAD_BYTESTRING
= Alex $ \s@AlexState{alex_pos=pos,alex_chr=c,alex_bytes=bs,alex_inp=inp__} ->
Right (s, (pos,c,bs,inp__))
#else /* ALEX_MONAD_BYTESTRING */
= Alex $ \s@AlexState{alex_pos=pos,alex_bpos=bpos,alex_chr=c,alex_inp=inp__} ->
Right (s, (pos,c,inp__,bpos))
#endif /* ALEX_MONAD_BYTESTRING */
alexSetInput :: AlexInput -> Alex ()
#ifndef ALEX_MONAD_BYTESTRING
alexSetInput (pos,c,bs,inp__)
= Alex $ \s -> case s{alex_pos=pos,alex_chr=c,alex_bytes=bs,alex_inp=inp__} of
#else /* ALEX_MONAD_BYTESTRING */
alexSetInput (pos,c,inp__,bpos)
= Alex $ \s -> case s{alex_pos=pos,
alex_bpos=bpos,
alex_chr=c,
alex_inp=inp__} of
#endif /* ALEX_MONAD_BYTESTRING */
state__@(AlexState{}) -> Right (state__, ())
alexError :: String -> Alex a
alexError message = Alex $ const $ Left message
alexGetStartCode :: Alex Int
alexGetStartCode = Alex $ \s@AlexState{alex_scd=sc} -> Right (s, sc)
alexSetStartCode :: Int -> Alex ()
alexSetStartCode sc = Alex $ \s -> Right (s{alex_scd=sc}, ())
#if !defined(ALEX_MONAD_BYTESTRING) && defined(ALEX_MONAD_USER_STATE)
alexGetUserState :: Alex AlexUserState
alexGetUserState = Alex $ \s@AlexState{alex_ust=ust} -> Right (s,ust)
alexSetUserState :: AlexUserState -> Alex ()
alexSetUserState ss = Alex $ \s -> Right (s{alex_ust=ss}, ())
#endif /* !defined(ALEX_MONAD_BYTESTRING) && defined(ALEX_MONAD_USER_STATE) */
alexMonadScan = do
#ifndef ALEX_MONAD_BYTESTRING
inp__ <- alexGetInput
#else /* ALEX_MONAD_BYTESTRING */
inp__@(_,_,_,n) <- alexGetInput
#endif /* ALEX_MONAD_BYTESTRING */
sc <- alexGetStartCode
case alexScan inp__ sc of
AlexEOF -> alexEOF
AlexError ((AlexPn _ line column),_,_,_) -> alexError $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column)
AlexSkip inp__' _len -> do
alexSetInput inp__'
alexMonadScan
#ifndef ALEX_MONAD_BYTESTRING
AlexToken inp__' len action -> do
#else /* ALEX_MONAD_BYTESTRING */
AlexToken inp__'@(_,_,_,n') _ action -> let len = n'-n in do
#endif /* ALEX_MONAD_BYTESTRING */
alexSetInput inp__'
action (ignorePendingBytes inp__) len
-- -----------------------------------------------------------------------------
-- Useful token actions
#ifndef ALEX_MONAD_BYTESTRING
type AlexAction result = AlexInput -> Int -> Alex result
#else /* ALEX_MONAD_BYTESTRING */
type AlexAction result = AlexInput -> Int64 -> Alex result
#endif /* ALEX_MONAD_BYTESTRING */
-- just ignore this token and scan another one
-- skip :: AlexAction result
skip _input _len = alexMonadScan
-- ignore this token, but set the start code to a new value
-- begin :: Int -> AlexAction result
begin code _input _len = do alexSetStartCode code; alexMonadScan
-- perform an action for this token, and set the start code to a new value
andBegin :: AlexAction result -> Int -> AlexAction result
(action `andBegin` code) input__ len = do
alexSetStartCode code
action input__ len
#ifndef ALEX_MONAD_BYTESTRING
token :: (AlexInput -> Int -> token) -> AlexAction token
#else /* ALEX_MONAD_BYTESTRING */
token :: (AlexInput -> Int64 -> token) -> AlexAction token
#endif /* ALEX_MONAD_BYTESTRING */
token t input__ len = return (t input__ len)
#endif /* defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING) */
-- -----------------------------------------------------------------------------
-- Basic wrapper
#ifdef ALEX_BASIC
type AlexInput = (Char,[Byte],String)
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (c,_,_) = c
-- alexScanTokens :: String -> [token]
alexScanTokens str = go ('\n',[],str)
where go inp__@(_,_bs,s) =
case alexScan inp__ 0 of
AlexEOF -> []
AlexError _ -> error "lexical error"
AlexSkip inp__' _ln -> go inp__'
AlexToken inp__' len act -> act (take len s) : go inp__'
alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
alexGetByte (c,(b:bs),s) = Just (b,(c,bs,s))
alexGetByte (_,[],[]) = Nothing
alexGetByte (_,[],(c:s)) = case utf8Encode' c of
(b, bs) -> Just (b, (c, bs, s))
#endif
-- -----------------------------------------------------------------------------
-- Basic wrapper, ByteString version
#ifdef ALEX_BASIC_BYTESTRING
-- alexScanTokens :: ByteString.ByteString -> [token]
alexScanTokens str = go (AlexInput '\n' str 0)
where go inp__ =
case alexScan inp__ 0 of
AlexEOF -> []
AlexError _ -> error "lexical error"
AlexSkip inp__' _len -> go inp__'
AlexToken inp__' _ act ->
let len = alexBytePos inp__' - alexBytePos inp__ in
act (ByteString.take len (alexStr inp__)) : go inp__'
#endif
#ifdef ALEX_STRICT_BYTESTRING
-- alexScanTokens :: ByteString.ByteString -> [token]
alexScanTokens str = go (AlexInput '\n' str 0)
where go inp__ =
case alexScan inp__ 0 of
AlexEOF -> []
AlexError _ -> error "lexical error"
AlexSkip inp__' _len -> go inp__'
AlexToken inp__' _ act ->
let len = alexBytePos inp__' - alexBytePos inp__ in
act (ByteString.take len (alexStr inp__)) : go inp__'
#endif
-- -----------------------------------------------------------------------------
-- Posn wrapper
-- Adds text positions to the basic model.
#ifdef ALEX_POSN
--alexScanTokens :: String -> [token]
alexScanTokens str0 = go (alexStartPos,'\n',[],str0)
where go inp__@(pos,_,_,str) =
case alexScan inp__ 0 of
AlexEOF -> []
AlexError ((AlexPn _ line column),_,_,_) -> error $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column)
AlexSkip inp__' _ln -> go inp__'
AlexToken inp__' len act -> act pos (take len str) : go inp__'
#endif
-- -----------------------------------------------------------------------------
-- Posn wrapper, ByteString version
#ifdef ALEX_POSN_BYTESTRING
--alexScanTokens :: ByteString.ByteString -> [token]
alexScanTokens str0 = go (alexStartPos,'\n',str0,0)
where go inp__@(pos,_,str,n) =
case alexScan inp__ 0 of
AlexEOF -> []
AlexError ((AlexPn _ line column),_,_,_) -> error $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column)
AlexSkip inp__' _len -> go inp__'
AlexToken inp__'@(_,_,_,n') _ act ->
act pos (ByteString.take (n'-n) str) : go inp__'
#endif
-- -----------------------------------------------------------------------------
-- GScan wrapper
-- For compatibility with previous versions of Alex, and because we can.
#ifdef ALEX_GSCAN
alexGScan stop__ state__ inp__ =
alex_gscan stop__ alexStartPos '\n' [] inp__ (0,state__)
alex_gscan stop__ p c bs inp__ (sc,state__) =
case alexScan (p,c,bs,inp__) sc of
AlexEOF -> stop__ p c inp__ (sc,state__)
AlexError _ -> stop__ p c inp__ (sc,state__)
AlexSkip (p',c',bs',inp__') _len ->
alex_gscan stop__ p' c' bs' inp__' (sc,state__)
AlexToken (p',c',bs',inp__') len k ->
k p c inp__ len (\scs -> alex_gscan stop__ p' c' bs' inp__' scs) (sc,state__)
#endif