-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy path0XpK.py
105 lines (76 loc) · 2.99 KB
/
0XpK.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#AI-TECHGYM-3-10-A-1
#回帰問題と分類問題
#インポート
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
#データフレーム
data_added_dummies = pd.read_csv("./data_added_dummies.csv")
#ヒストグラムの表示
plt.hist(data_added_dummies["取引価格(総額)"])
plt.show()
#6000万円未満の不動産に絞って表示
tmp_data = data_added_dummies[data_added_dummies["取引価格(総額)"] < 60000000]
plt.hist(tmp_data["取引価格(総額)"])
plt.show()
#6000万円以下のデータに絞る
data_added_dummies = data_added_dummies[data_added_dummies["取引価格(総額)"] < 60000000]
#モデル
lr = LinearRegression()
#目的変数、説明変数(単回帰)
x_column_list = ['面積(㎡)']
y_column_list = ['取引価格(総額)']
x = data_added_dummies[x_column_list]
y = data_added_dummies[y_column_list]
#学習
lr.fit(x, y)
#必要なら係数、切片を表示
#print('係数(単回帰)',lr.coef_)
#print('切片(単回帰)',lr.intercept_)
#目的変数、説明変数(重回帰)
x = data_added_dummies.drop("取引価格(総額)", axis=1)
y = data_added_dummies["取引価格(総額)"]
#モデル、学習
lr_multi = LinearRegression()
lr_multi.fit(x, y)
#必要なら係数、切片を表示
#print('係数(重回帰)',lr_multi.coef_)
#print('切片(重回帰)',lr_multi.intercept_)
#各係数を必要なら表示
print(x.columns[1], lr_multi.coef_[1])
print(x.columns[2], lr_multi.coef_[2])
print(x.columns[3], lr_multi.coef_[3])
#目的変数、説明変数(単回帰)
x_column_list = ['面積(㎡)']
y_column_list = ['取引価格(総額)']
#学習データとテストデータの分割
X_train, X_test, y_train, y_test = train_test_split(data_added_dummies[x_column_list], data_added_dummies[y_column_list], test_size=0.3)
#モデル、学習、予測
lr_single = LinearRegression()
lr_single.fit(X_train, y_train)
y_pred = lr_single.predict(X_test)
#MAE
print('MAE(単回帰)',mean_absolute_error(y_pred, y_test))
#決定係数
print('決定係数(単回帰)',r2_score(y_test, y_pred))
#目的変数、説明変数(重回帰)
x = data_added_dummies.drop("取引価格(総額)", axis=1)
y = data_added_dummies["取引価格(総額)"]
#学習データとテストデータの分割
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
#モデル、学習、予測
lr_multi2 = LinearRegression()
lr_multi2.fit(X_train, y_train)
y_pred = lr_multi2.predict(X_test)
#MAE
print('MAE(重回帰)',mean_absolute_error(y_pred, y_test))
#決定係数
print('決定係数(重回帰)',r2_score(y_test, y_pred))
#予測値とテストデータの差分
diff = y_pred - y_test
display(diff.head())