-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmkdfa.jl
166 lines (138 loc) · 4.57 KB
/
mkdfa.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
###############################################################################
#
# Copyright (C) 2015 VoxForge
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
###############################################################################
#
# port of Julius perl script: mkdfa.pl
#
###############################################################################
function reverse_grammar(rgramfile,gramfile)
rgramfile_fh=open(rgramfile,"w")
gramfile_arr=open(readlines, gramfile) # automatically closes file handle
n=0
for lineln=gramfile_arr
if ! ismatch(r"^[\n|\r]", lineln)
line=replace(chomp(lineln), r"#.*", "") # remove line endings & comments
(left, right)=split(line,r"\:")
category_arr=split(right,r"\s")
reverse_category_arr=reverse(category_arr)
write(rgramfile_fh, left * ":")
write(rgramfile_fh, join(reverse_category_arr," ") )
if ismatch(r"\r$", lineln) # windows line ending
write(rgramfile_fh, "\n\r")
else
write(rgramfile_fh, "\n")
end
n=n+1
end
end
close(rgramfile_fh)
println("$gramfile has $n rules")
println("---")
end
function make_category_voca(vocafile,termfile,tmpvocafile)
tmpvocafile_fh=open(tmpvocafile,"w")
termfile_fh=open(termfile,"w")
vocafile_arr=open(readlines, vocafile) # automatically closes file handle
n1=0
n2=0
termid=0
for lineln=vocafile_arr
if ismatch(r"\r$", lineln)
lineend="\r\n" # windows line ending
else
lineend="\n" # unix/linux line ending
end
line=replace(chomp(lineln), r"#.*", "") # remove line endings & comments
m=match(r"^%[ \t]*([A-Za-z0-9_]*)", line)
if m == nothing
n2=n2+1
else
found=m.captures[1]
write(tmpvocafile_fh, "\#$found$(lineend)")
write(termfile_fh, "$termid\t$found$(lineend)")
termid=termid+1
n1=n1+1
end
end
close(tmpvocafile_fh)
close(termfile_fh)
println("$vocafile has $n1 categories and $n2 words")
println("generated: $termfile")
println("---")
end
function voca2dict(vocafile, dictfile)
dictfile_fh=open(dictfile,"w")
vocafile_arr=open(readlines, vocafile) # automatically closes file handle
newid=-1
for lineln=vocafile_arr
if ismatch(r"\r$", lineln)
lineend="\r\n" # windows line ending
else
lineend="\n" # unix/linux line ending
end
line=replace(chomp(lineln), r"#.*", "") # remove line endings & comments
if ismatch(r"^[\s\t]*$", line) # skip blank lines
continue
end
if ismatch(r"^%", line)
newid=newid+1
else
line_arr=split(line,r"[\s\t]+")
name=shift!(line_arr)
write(dictfile_fh, "$(newid)\t[$(name)]\t$(join(line_arr," "))$(lineend)")
end
end
close(dictfile_fh)
println("generated: $dictfile")
end
function main ()
grammar_prefix=ARGS[1] # can include path
if ! isfile(grammar_prefix * ".grammar")
error("can't find gramfile file: $(grammar_prefix).grammar")
end
if ! isfile(grammar_prefix * ".voca")
error("can't find voca file: $(grammar_prefix).voca")
end
if length(ARGS) > 1
error("mkdfa: too many arguments for call from command line")
end
mkfa= @windows ? "mkfa.exe" : "mkfa"
dfa_minimize= @windows ? "dfa_minimize.exe" : "dfa_minimize"
workingfolder=mktempdir()
rgramfile= "$(workingfolder)/g$(getpid()).grammar"
gramfile="$(grammar_prefix).grammar"
vocafile=grammar_prefix * ".voca"
termfile=grammar_prefix * ".term"
tmpvocafile="$(workingfolder)/g$(getpid()).voca"
dfafile=grammar_prefix * ".dfa"
dictfile="$(grammar_prefix).dict"
headerfile="$(workingfolder)/g$(getpid()).h"
reverse_grammar(rgramfile,gramfile)
make_category_voca(vocafile,termfile,tmpvocafile)
run(`$mkfa -e1 -fg $rgramfile -fv $tmpvocafile -fo $(dfafile).tmp -fh $headerfile`)
run(`$dfa_minimize $(dfafile).tmp -o $dfafile`)
voca2dict(vocafile, dictfile)
rm("$(dfafile).tmp")
rm(rgramfile)
rm(tmpvocafile)
rm(headerfile)
end
# called from command line
if length(ARGS) > 0
main()
end