-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtester.py
168 lines (167 loc) · 7.3 KB
/
tester.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#####################################################################################################################
import numpy as np
import tensorflow as tf
import csv
from keras.utils import to_categorical
from keras.models import Model
from keras.layers import Dense, Input, Dropout, LSTM, Activation, Bidirectional
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.initializers import glorot_uniform
from keras.models import model_from_json
import pickle
import nltk
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))
import os
#from aion.util.spell_check import SpellCorrector
import re
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
#####################################################################################################################
def index_to_answers(answers_file):
index_to_answer={}
with open(answers_file,'r') as file:
csvreader = csv.reader(file)
for row1,row2 in csvreader:
#print(str(row1) + ':' + str(row2))
index_to_answer[int(row1)]=row2
return index_to_answer
maxLen=20
#####################################################################################################################
word_to_index={}
index_to_word={}
word_to_vec_map={}
fp= open('word_to_index.pkl', 'rb')
word_to_index = pickle.load(fp)
#spell_corrector = SpellCorrector(dictionary=word_to_index)
fp= open('index_to_word.pkl', 'rb')
index_to_word = pickle.load(fp)
fp= open('word_to_vec_map.pkl', 'rb')
word_to_vec_map = pickle.load(fp)
fp.close()
#####################################################################################################################
def sentences_to_indices(X, word_to_index, max_len,remove_stop):
m = X.shape[0]
X_indices = np.zeros((m,max_len))
for i in range(m):
X[i]=X[i].lower()
sentence_words=nltk.TreebankWordTokenizer().tokenize(X[i])
if remove_stop==1:
new_sentence=[]
for word in sentence_words:
if word not in stop_words:
new_sentence.append(word)
sentence_words=new_sentence
j = 0
print(sentence_words)
for w in sentence_words:
if w in word_to_index:
X_indices[i, j] = word_to_index[w]
else:
print(w)
#X_indices[i,j]=0
j = j+1
return X_indices
#####################################################################################################################
models=[]
number_of_models=2
for i in range(number_of_models):
weight_name='model_weights'+str(i)+'.h5'
architecture_name='model_architecture'+str(i)+'.json'
with open(architecture_name, 'r') as f:
model = model_from_json(f.read())
model.load_weights(weight_name)
models.append(model)
#####################################################################################################################
def removeprodname(sentence):
sentence=sentence.lower()
sentence_words=nltk.TreebankWordTokenizer().tokenize(sentence)
number_of_words=len(sentence_words)
for i in range(number_of_words):
if sentence_words[i]=='growth' and sentence_words[i+1]=='plus':
sentence_words[i]='it'
sentence_words[i+1]=''
if sentence_words[i]=='eno':
sentence_words[i]='it'
if sentence_words[i]=='protein' and sentence_words[i+1]=='plus':
sentence_words[i]='it'
if sentence_words[i]=='crocin':
sentence_words[i]='it'
if sentence_words[i]=='otrivin':
sentence_words[i]='it'
if sentence_words[i]=='horlicks':
sentence_words[i]='it'
if sentence_words[i]=='brush':
sentence_words[i]='it'
if sentence_words[i]=='sensodyne' and sentence_words[i+1]=='base':
sentence_words[i]='it'
if sentence_words[i]=='sensodyne' and sentence_words[i+1]=='rapid':
sentence_words[i]='it'
if sentence_words[i]=='sensodyne' and sentence_words[i+1]=='repair':
sentence_words[i]='it'
if sentence_words[i]=='sensodyne' and sentence_words[i+1]=='herbal':
sentence_words[i]='it'
sentence_words=' '.join(sentence_words)
return sentence_words
#####################################################################################################################
def check_product(sentence):
sentence=sentence.lower()
sentence_words=nltk.TreebankWordTokenizer().tokenize(sentence)
number_of_words=len(sentence_words)
for i in range(number_of_words):
if sentence_words[i]=='growth' and sentence_words[i+1]=='plus':
return 0
if sentence_words[i]=='eno': #and sentence_words[i+1] != 'cooling':
return 1
if sentence_words[i]=='protein' and sentence_words[i+1]=='plus':
return 2
if sentence_words[i]=='crocin':
return 3
if sentence_words[i]=='otrivin':
return 4
if sentence_words[i]=='horlicks':
return 5
if sentence_words[i]=='brush':
return 6
if sentence_words[i]=='sensodyne' and sentence_words[i+1]=='base':
return 7
if sentence_words[i]=='sensodyne' and sentence_words[i+1]=='rapid':
return 8
if sentence_words[i]=='sensodyne' and sentence_words[i+1]=='repair':
return 9
if sentence_words[i]=='sensodyne' and sentence_words[i+1]=='herbal':
return 10
return 11
#####################################################################################################################
#index_to_sentence=index_to_answers('answers.csv')
while [1]:
print ("Type your question ")
question=input("Enter your question :")
product_number=check_product(question)
print(product_number)
question_modify=removeprodname(question)
print(question_modify)
if product_number<=10:
#answers_file='answers'+str(product_number)+'.csv'
#index_to_answer=index_to_answers(answers_file)
x_test_typezero=np.array([question_modify])
x_test_typeone=np.array([question])
X_test_indices_zero = sentences_to_indices(x_test_typezero, word_to_index, maxLen,0) #ALSO CHECKS FOR SPELLINGS
X_test_indices_one = sentences_to_indices(x_test_typeone, word_to_index, maxLen,1) #ALSO CHECKS FOR SPELLINGS
predictions=models[0].predict(X_test_indices_zero)
index_zero=np.argmax(predictions)
confidence_zero=predictions[:,index_zero]*100
predictions=models[1].predict(X_test_indices_one)
index_one=np.argmax(predictions)
confidence_one=predictions[:,index_one]*100
print('Confidence in file zero:'+str(confidence_zero)+' for index:'+str(index_zero))
print('Confidence in file one:'+str(confidence_one)+' for index:'+str(index_one))
else:
x_test=np.array([question])
X_test_indices = sentences_to_indices(x_test, word_to_index, maxLen,1)
predictions=models[1].predict(X_test_indices)
index=np.argmax(predictions)
confidence=predictions[:,index]*100
print('No catch for product name')
print('Catch in type 1 file index:'+str(index)+' with confidence:'+str(confidence))
#####################################################################################################################