-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmachineLearning.py
125 lines (103 loc) · 3.95 KB
/
machineLearning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.applications import EfficientNetB0
from sklearn.metrics import accuracy_score
import pandas as pd
import os
from skimage import io
from skimage.transform import resize
import random
import numpy as np
def return_test_train(split_value):
rval = random.random()
if rval >= split_value:
return 1 # 1 means its validation data
else:
return 0 # zero means its training data
def encode_one_hot(label):
if label == "positive":
return 1
elif label == "negetive":
return 0
def construct_dataset(DATA_INDEX, DATA_PATH, IMG_SIZE, test_train_split):
data_index = pd.read_csv(DATA_INDEX)
data_index.head()
x_train = []
y_train = []
x_test = []
y_test = []
counter = 0
for index, row in data_index.iterrows():
# construct the image path
image_path_folder = os.path.join(DATA_PATH, row["profile"])
for file in os.listdir(image_path_folder):
fp = os.path.join(image_path_folder, file)
image = io.imread(fp)
img = resize(image, (IMG_SIZE, IMG_SIZE))
label = encode_one_hot(row["outcome"])
if return_test_train(test_train_split) == 0: # zero means its training data
# reshape the image using tensorflow's image automator
x_train.append(img)
y_train.append(label)
else:
x_test.append(img)
y_test.append(label)
counter += 1
print(f"A total of {counter} images were processed")
return [np.asarray(x_train), np.asarray(y_train), np.asarray(x_test), np.asarray(y_test)]
def build_model(IMG_SIZE):
inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
img_augmentation = Sequential(
[
layers.RandomRotation(factor=0.10),
layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
],
name="img_augmentation",
)
x = img_augmentation(inputs)
model = EfficientNetB0(include_top=False, input_tensor=x, weights="imagenet")
# Freeze the pretrained weights
model.trainable = False
# Rebuild top
x = layers.GlobalAveragePooling2D(name="avg_pool")(model.output)
x = layers.BatchNormalization()(x)
top_dropout_rate = 0.2
x = layers.Dropout(top_dropout_rate, name="top_dropout")(x)
outputs = layers.Dense(1, activation="sigmoid", name="pred")(x) # the otput layers are hardcoded
# cause you cant sorta like a person on bumble
# Compile
model = tf.keras.Model(inputs, outputs, name="EfficientNet")
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])
return model
def train_model(x_train, y_train, IMG_SIZE, BATCH_SIZE, epochs):
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
model = build_model(IMG_SIZE)
model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=epochs, verbose=1)
return model
def make_prediction(pictures, model):
prediction = model.predict(np.asarray(pictures))
avg = np.mean(prediction)
print(f"Average weight of prediction {avg}")
return avg
def make_decision(pred_avg, thresh):
print(f"Making a decision, accuracy of {pred_avg}, threshold set at {thresh}")
if pred_avg > thresh:
return 1
else:
return 0
def load_images_for_prediction(DATA_PATH, IMG_SIZE, profile):
pictureDir = os.path.join(os.getcwd(), DATA_PATH, profile)
# load all the pictures into an array
pictures = []
for file in os.listdir(pictureDir):
fp = os.path.join(pictureDir, file)
# then load them using sk image
image = io.imread(fp)
img = resize(image, (IMG_SIZE, IMG_SIZE))
pictures.append(img)
return pictures
def return_accuracy(ytrue, ypreads):
return accuracy_score(ytrue, ypreads)