-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathapp.py
162 lines (138 loc) · 4.43 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#import packages
import streamlit as st
import joblib
import pandas as pd
from os.path import dirname, join, realpath
# add banner image
st.header("Financial Inclusion in Africa")
st.image("images/financial-inclusion.jpeg")
st.subheader(
"""
A simple app to predict who in Africa is most likely to have a bank account?.
"""
)
# form to collect user information
my_form = st.form(key="financial_form")
country = my_form.selectbox("select country", ("Tanzania", "kenya", "Uganda", "Rwanda"))
location_type = my_form.selectbox("select location", ("Rural", "Urban"))
year = my_form.number_input("Inter year", min_value=2000, max_value=2100)
cellphone_access = my_form.selectbox("Do you have a cellphone?", ("Yes", "No"))
gender_of_respondent = my_form.selectbox("Gender", ("Female", "Male"))
relationship_with_head = my_form.selectbox(
"what is your relationship with the head of the family",
(
"Spouse",
"Head of Household",
"Other relative",
"Child",
"Parent",
"Other non-relatives",
),
)
marital_status = my_form.selectbox(
"Your marital status",
(
"Married/Living together",
"Widowed",
"Single/Never Married",
"Divorced/Seperated",
"Dont know",
),
)
education_level = my_form.selectbox(
"Your education level",
(
"Secondary education",
"No formal education",
"Vocation/Specialised training",
"Primary education",
"Tertiary education",
"Other/Dont know/RTA",
),
)
job_type = my_form.selectbox(
"Your job type",
(
"Self employed",
"Government Dependent",
"Formally employed Private",
"Informally employed",
"Formally employed Government",
"Farming and Fishing",
"Remittance Dependent",
"Other Income",
"Dont Know/Refuse to answer",
"No Income",
),
)
household_size = my_form.number_input(
"How many people are living in the house?", min_value=1, max_value=100
)
age_of_respondent = my_form.number_input("Your age", min_value=18, max_value=120)
submit = my_form.form_submit_button(label="make prediction")
# load the model and one-hot-encoder and scaler
with open(
join(dirname(realpath(__file__)), "model/lightgbm-financial-inclusion-model.pkl"),
"rb",
) as f:
model = joblib.load(f)
with open(
join(dirname(realpath(__file__)), "preprocessing/min-max-scaler.pkl"), "rb"
) as f:
scaler = joblib.load(f)
with open(
join(dirname(realpath(__file__)), "preprocessing/one-hot-encoder.pkl"), "rb"
) as f:
one_hot_encoder = joblib.load(f)
@st.cache
# function to clean and tranform the input
def preprocessing_data(data, enc, scaler):
# Convert the following numerical labels from integer to float
float_array = data[["household_size", "age_of_respondent", "year"]].values.astype(
float
)
# One Hot Encoding conversion
data = enc.transform(data)
# scale our data into range of 0 and 1
data = scaler.transform(data)
return data
if submit:
# collect inputs
input = {
"country": country,
"year": year,
"location_type": location_type,
"cellphone_access": cellphone_access,
"household_size": household_size,
"age_of_respondent": age_of_respondent,
"gender_of_respondent": gender_of_respondent,
"relationship_with_head": relationship_with_head,
"marital_status": marital_status,
"education_level": education_level,
"job_type": job_type,
}
# create a dataframe
data = pd.DataFrame(input, index=[0])
# clean and transform input
transformed_data = preprocessing_data(data=data, enc=one_hot_encoder, scaler=scaler)
# perform prediction
prediction = model.predict(transformed_data)
output = int(prediction[0])
probas = model.predict_proba(transformed_data)
probability = "{:.2f}".format(float(probas[:, output]))
# Display results of the NLP task
st.header("Results")
if output == 1:
st.write(
"You are most likely to have a bank account with probability of {} 😊".format(
probability
)
)
elif output == 0:
st.write(
"You are most likely not to have a bank account with probability of {} 😔".format(
probability
)
)
url = "https://twitter.com/Davis_McDavid"
st.write("Developed with ❤️ by [Davis David](%s)" % url)