-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmachine_learning_code_mongodb.py
66 lines (50 loc) · 2.15 KB
/
machine_learning_code_mongodb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import pickle
import json
import pandas as pd
import pymongo
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
# Setting koneksi ke database mongodb
client = pymongo.MongoClient("mongodb://doitAdmin:[email protected]:27017/?authMechanism=DEFAULT&tls=false")
# Tentukan basis data (database) dan koleksi (collection) yang ingin Anda baca
db = client["klaim_asuransi"]
collection = db["data_klaim"]
# Lakukan query untuk mengambil data dari koleksi tersebut
data = collection.find()
# Konversi data MongoDB menjadi DataFrame pandas
df = pd.DataFrame(list(data))
# Encode fitur kategori (transaction_type dan card_type)
df_encoded = pd.get_dummies(
df, columns=['dataprocess.transaction_type', 'dataprocess.card_type'], drop_first=True)
# Bagi dataset menjadi fitur (X) dan variabel target (y)
X = df_encoded.drop('is_fraud', axis=1)
# Hapus kolom tambahan sesuai kebutuhan
X = X.drop(['dataprocess.transaction_date', 'dataprocess.merchant_id', 'dataprocess.customer_id', 'dataprocess.location', 'dataprocess.card_number'], axis=1)
# Lakukan langkah-langkah preprocessing data sesuai kebutuhan
# Bagi data menjadi set pelatihan dan pengujian
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42)
# Standarisasi fitur (scaling)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Buat dan latih model Regresi Logistik
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)
# Simpan model ke disk
filename = 'finalized_model1.sav'
pickle.dump(model, open(filename, 'wb'))
# Lakukan prediksi pada data uji
y_pred = model.predict(X_test)
# Evaluasi model
akurasi = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
print(f"Akurasi: {akurasi}")
print(f"Matrix Confusi:\n{conf_matrix}")
print(f"Laporan Klasifikasi:\n{class_report}")
print(df_encoded.columns)
# Tutup koneksi MongoDB
client.close()