forked from donya/Kulitta
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCykParser.lhs
273 lines (206 loc) · 10.1 KB
/
CykParser.lhs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
> module CykParser where
> import Data.List
> import Parser
CYK Implementation
Donya Quick
Last modified: 21-July-2012
type Rule a = (a, [a])
> findProducers :: (Eq a) => [Rule a] -> [a] -> [Rule a]
> findProducers rs str = filter (\(l,r) -> r==str) rs
> cat :: [a] -> [a] -> [[a]]
> cat xs ys = [[x,y] | x<-xs, y<-ys]
> nextRow :: (Eq a) => [Rule a] -> [[[a]]] -> [[a]]
> nextRow rs rows =
> let n = length rows + 1 -- we assume terminal level is not included
> segs = map (\i -> (i,n-i)) [1..n-1]
> strs offset (i,j) = cat
> (rows !! (i-1) !! offset)
> (rows !! (j-1) !! (offset + i))
> rules offset (i,j) = concatMap (map fst. findProducers rs)
> (strs offset (i,j))
> f offset = nub $ concatMap (rules offset) segs
> in map f [0..length (rows !! 0) - n]
> mkSegs' :: Int -> Int -> [[Int]]
> mkSegs' n m = filter (\s -> sum s == n) $
> makeRange $ take m $ repeat (0,n) where
> makeRange = foldr (\(l,u) xs -> [(a:b) | a<-[l..u], b<-xs]) [[]]
> mkSegs :: Int -> Int -> [[Int]]
> mkSegs n m = filter (\s -> length s > 1) $ map (filter (>0)) $ mkSegs' n m
mkSegs must be called with:
n = the maximum length of the string in question.
m = the maximum possible number of substrings
n needs to be controlled by both the level (row+1) and
the offset. The formula should be:
n = min (length rows) (length (rows !! 0) - offset)
> toInds :: Int -> [Int] -> [(Int, Int)]
> toInds offset [] = []
> toInds offset (l:ls) =
> let row = l - 1
> col = offset
> in if l <= 0 then toInds offset ls
> else (row, col) : toInds (offset+l) ls
> toStrs :: [[[a]]] -> [(Int, Int)] -> [[a]]
> toStrs rows [] = [[]]
> toStrs rows ((i,j):cs) =
> let strs = toStrs rows cs
> theCell = if i < length rows && j < length (rows !! i)
> then rows !! i !! j
> else error ("(toStr) Bad box: ("++show i++", "++show j++")")
> in [(x:y) | x<-theCell, y<-strs]
> nextRowM m rs rows = -- m is the # of subdivisions
> let n = length rows +1 -- the "level"
> m' = min n m -- the number of substrings (or nonterms)
> segs = mkSegs n m' -- this is ok
> offsets = [0..length (rows !! 0) - n] -- all offsets
> nts o s = concatMap (map fst . findProducers rs) (toStrs rows (toInds o s))
> f o = concatMap (nts o) segs
> in map f offsets
> nextRowM2 m rs rows = -- m is the # of subdivisions
> let n = length rows +1 -- the "level"
> m' = min n m -- the number of substrings (or nonterms)
> segs = mkSegs n m' -- this is ok
> offsets = [0..length (rows !! 0) - n] -- all offsets
> nts o s = concatMap (map fst . findProducers rs) (toStrs rows (toInds o s))
> f o = concatMap (nts o) segs
> in map f offsets
> allRowsMS :: (Eq a) => Int -> [Rule a] -> [a] -> [[[a]]]
> allRowsMS m rs str = allRows' rs [fixRow rs $ firstRow' rs str] where
> allRows' rs rows = if length rows == length (head rows) then rows
> else allRows' rs (rows ++ [fixRow rs $ nextRowM m rs rows])
> allRows :: (Eq a) => [Rule a] -> [a] -> [[[a]]]
> allRows rs str = allRows' rs [firstRow rs str] where
> allRows' rs rows = if length rows == length (head rows) then rows
> else allRows' rs (rows ++ [nextRow rs rows])
> showRows :: (Show a) => [[[a]]] -> String
> showRows rs =
> let f line = concatMap g line ++ "\n"
> g bucket = show bucket ++ "\t"
> in concatMap f (reverse rs)
> printRows :: (Show a) => [[[a]]] -> IO ()
> printRows = putStr . showRows
==============
SYNONYM EXTENION
> findSynonyms :: (Eq a) => [Rule a] -> a -> [a]
> findSynonyms rules x = map fst $ filter (\(l,r) -> r==[x]) rules
> findSynRec :: (Eq a) => [Rule a] -> [a] -> [a]
> findSynRec rules syns =
> let s = nub (syns ++ concatMap (findSynonyms rules) syns)
> in if s == syns then syns else findSynRec rules s
findSynRec :: (Eq a) => [Rule a] -> a -> [a]
findSynRec rules x =
let s = findSynonyms rules x
s' = nub (s ++ concatMap (findSynonyms rules) s)
in if s == s' then s else nub $ s ++ concatMap (findSynRec rules) s'
> fixSyns :: (Eq a) => [Rule a] -> [a] -> [a]
> fixSyns rules bucket = nub (bucket ++ concatMap (findSynonyms rules) bucket)
> fixRow :: (Eq a) => [Rule a] -> [[a]] -> [[a]]
> fixRow rules row = map (findSynRec rules) row
> allRowsS :: (Eq a) => [Rule a] -> [a] -> [[[a]]]
> allRowsS rs str = allRows' rs [fixRow rs $ firstRow rs str] where
> allRows' rs rows = if length rows == length (head rows) then rows
> else allRows' rs (rows ++ [fixRow rs $ nextRow rs rows])
> firstRowOld rs cs = map (nub . map fst . findProducers rs . \a -> [a]) cs
> firstRow rs cs = map (\c -> [c]) cs
> firstRow' rs [] = []
> firstRow' rs (c:cs) =
> let fr0 = nub $ c : (map fst $ findProducers rs [c])
> in (if null fr0 then [c] else fr0) : firstRow' rs cs
==============
GENERATING ALL PARSES
allParses :: [Rule a] -> [[[a]]] -> Int -> Int [[Rule a]]
allParses rules rows i j = -- i is the row, j is the column
let f :: a -> [Rule a]
f x = filter (\(l,r) -> l==x) rules
in undefined
mkSegs :: Int -> Int -> [[Int]]
mkSegs n m = filter (\s -> length s > 1) $ map (filter (>0)) $ mkSegs' n m
mkSegs must be called with:
n = the maximum length of the string in question.
m = the maximum possible number of substrings
Given a rule that we know can be applied, pick the cells it generates.
> getCells :: (Eq a) => [[[a]]] -> Rule a -> Int -> Int -> [[(Int, Int)]]
> getCells rows (lhs, rhs) level offset =
> let n = level + 1
> m = length rhs -- number of syms to genererate
> segs = mkSegs n m -- get all possible ways to chunk the string
> inds = map (toInds offset) segs -- :: [[(Int, Int]] turn these into cells
> -- need to filter the cells now! --
> in filter (goodCells rows rhs) inds
> goodCells :: (Eq a) => [[[a]]] -> [a] -> [(Int, Int)] -> Bool
> goodCells rows [] [] = True
> goodCells rows (x:xs) ((i,j):is) =
> elem x (rows !! i !! j) && goodCells rows xs is
> appendTo :: (Eq a) => [[[a]]] -> (Int, Int) -> a -> [[[a]]]
> appendTo [] (i,j) x = []
> appendTo xs (i,j) x =
> let (preRs, theRow, postRs) = (take i xs, xs !! i, drop (i+1) xs)
> (preCs, theCell, postCs) = (take j theRow, theRow !! j, drop (j+1) theRow)
> newCell = nub (x : theCell) -- ensure no duplicates
> in preRs ++ (preCs ++ newCell : postCs) : postRs
> appendTo2 :: (Eq a) => [[[a]]] -> (Int, Int) -> [a] -> [[[a]]]
> appendTo2 [] (i,j) x = []
> appendTo2 xs (i,j) x =
> let (preRs, theRow, postRs) = (take i xs, xs !! i, drop (i+1) xs)
> (preCs, theCell, postCs) = (take j theRow, theRow !! j, drop (j+1) theRow)
> newCell = nub (x ++ theCell) -- ensure no duplicates
> in preRs ++ (preCs ++ newCell : postCs) : postRs
The parseDown1 function completes a parse from a particular cell and symbol. We
assume the symbol is a member of the cell in question.
> doAll = True
> parseDown1 :: (Eq a) => [[[a]]] -> [Rule a] -> [[[a]]] -> ((Int, Int), a) -> [[[[a]]]]
> parseDown1 rows rules newRows ((0,j),x) = [appendTo newRows (0,j) x]
> parseDown1 rows rules newRows ((i,j),x) =
> let newRows' = appendTo newRows (i,j) x -- put x in the current table
> pRules = filter (\(l,r) -> l==x && length r > 0) rules -- rules of form A->BC...N
> sRules = filter (\(l,r) -> l==x && length r == 1) rules -- rules of form A->B
> f r = getCells rows r i j -- get a rule's target cells (CAN BE >1 LIST!)
> f2 r@(lhs,rhs) = map (zipWith (\a (b,c) -> ((b,c),a)) rhs) (f r) -- group with coords
> --pCells type is [[[((Int, Int), a)]]]
> pCells = filter (\l -> not $ null l) $ map f2 pRules -- get cells according to pRules
> recCall pCell = parseDown1 rows rules newRows' pCell -- recurse on rule expansions
> pTabs = map (map (map recCall)) pCells -- gives one SET of tables per pCell
> pTabs' = map (map combineSets) pTabs -- gives one SET of tables per inner pCell list (can be >1)
> pTabs'' = concat $ concat pTabs' -- flatten lists, the parses are done
> syns = filter (/=x) $ map (\(l,r) -> head r) $ sRules -- what synonym symbols are there?
> synResults = concatMap (\a -> parseDown1 rows rules newRows' ((i,j),a)) syns -- recurse on syns
> in synResults ++ pTabs''
The parseDown function takes the cyk rows, the grammar's rules, and the start symbol.
> parseDown rows rules s =
> let n = length $ head rows
> in map (map (map reverse)) $ parseDown1 rows rules (emptyRows n) ((n-1,0),s)
xtrs = [(1,[1,1]), (1,[1,2]), (2,[2,2]), (3, [1]), (1, [1])] :: [Rule Int]
xstr = [1,1,1,1] :: [Int]
xp = allRowsMS 2 xtrs xstr
xtest1 = parseDown xp xtrs 3
> emptyRows n =
> if n <= 0 then [] else take n (repeat []) : emptyRows (n-1)
2 x
1 x x
0 x x x
0 1 2
The combineSets function takes a bunch of table sets, one set per
cell that has been parsed, and finds every combination of them.
> combineSets :: (Eq a) => [[[[[a]]]]] -> [[[[a]]]]
> combineSets [] = error "(combineSets) No sets to combine!"
> combineSets [tset] = tset
> combineSets (tset:moreSets) =
> let pairs = [(a,b) | a<-tset, b<-combineSets moreSets]
> in map (\(a,b) -> combine1 a b) pairs
> -- merges N tables
> combineN :: (Eq a) => [[[[a]]]] -> [[[a]]]
> combineN [] = error "(combineN) No tables to combine!"
> combineN [t] = t
> combineN (t:ts) = combine1 t (combineN ts)
> -- merges two tables
> combine1 :: (Eq a) => [[[a]]] -> [[[a]]] -> [[[a]]]
> combine1 tab1 tab2 =
> let n = length $ head tab1
> is = [(i,j) | i<-[0..n-1], j<-[0..n-1], j<=n-i-1]
> xs = map (\(i,j) -> tab2 !! i !! j) is
> in foldUpdate2 tab1 (zip is xs)
> foldUpdate :: (Eq a) => [[[a]]] -> [((Int, Int), a)] -> [[[a]]]
> foldUpdate table [] = table
> foldUpdate table ((c,x):xs) = foldUpdate (appendTo table c x) xs
> foldUpdate2 :: (Eq a) => [[[a]]] -> [((Int, Int), [a])] -> [[[a]]]
> foldUpdate2 table [] = table
> foldUpdate2 table ((c,x):xs) = foldUpdate2 (appendTo2 table c x) xs