-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdemo_synthetic.py
92 lines (83 loc) · 4.38 KB
/
demo_synthetic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import numpy as np
from lingam import DirectLiNGAM
from lingam.utils import make_dot
from utils import interaction_matrix, cost_order_all_permutations
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from linear_oce import LinearOrderedActionExtractor
from mlp_oce import MLPOrderedActionExtractor
from forest_oce import ForestOrderedActionExtractor
def demonstration(clf='lr', n=1, costs=['TLPS'], ordering_cost='uniform', verbose=True):
N = 1000
c_21 = 1
c_32 = 6
c_34 = 4
c_54 = -0.5
names = ['Education','JobSkill','Income','WorkPerDay','HealthStatus']
x_1 = np.random.randint(1, 5, N)
x_2 = c_21 * x_1 + np.random.randint(-1, 1, N)
x_4 = np.random.randint(2, 6, N) * 2
x_3 = c_32 * x_2 + c_34 * x_4 + np.random.randint(-2, 2, N)
x_5 = c_54 * x_4 + np.random.randint(6, 13, N)
X = np.array([x_1, x_2, x_3, x_4, x_5]).T
_, C = interaction_matrix(X, interaction_type='causal')
w_3, w_5 = 1.0/x_3.mean(), 1.0/x_5.mean()
y = (w_3 * X[:,2] + w_5 * X[:,4] < 2.0).astype(int)
if(verbose):
print('# Synthetic Example:')
print('\t- x_1: Education : 1 ~ 5')
print('\t- x_2: JobSkill : 1 ~ 10')
print('\t- x_3: Income : 10 ~ 100')
print('\t- x_4: WorkPerDay : 4 ~ 10')
print('\t- x_5: HealthStatus : 1 ~ 10')
print('# Causal Relationship:')
print('\t- x_1 = e_1')
print('\t- x_2 = {} * x_1 + e_2'.format(c_21))
print('\t- x_3 = {} * x_2 + {} * x_4 + e_3'.format(c_32, c_34))
print('\t- x_4 = e_4')
print('\t- x_5 = {} * x_4 + e_5'.format(c_54))
C = np.array([[0.0]*5, [c_21]+[0.0]*4, [c_21*c_32,c_32,0.0,c_34,0.0], [0.0]*5, [0.0]*3+[c_54,0.0]])
print('# Interaction Matrix: \n', C)
B = np.array([[0.0]*5, [c_21]+[0.0]*4, [0.0,c_32,0.0,c_34,0.0], [0.0]*5, [0.0]*3+[c_54,0.0]])
# dot = make_dot(B, labels=names)
# dot.attr('graph', fontname='arial')
# dot.render('demonstration_causal_dag')
if(clf=='lr'):
mdl = LogisticRegression(penalty='l2', C=1.0, fit_intercept=True, solver='liblinear', max_iter=10000)
mdl = mdl.fit(X, y)
# print('# Model Coef.: \n', mdl.coef_, (mdl.intercept_))
oce = LinearOrderedActionExtractor(mdl, X, feature_names=names, feature_types=['I', 'I', 'I', 'I', 'I'], feature_constraints=['INC']*2+['']*3, target_name='Loan', target_labels=['Accept', 'Reject'], interaction_matrix=C)
print('# Classifier: LogisticRegression')
elif(clf=='mlp'):
mdl = MLPClassifier(hidden_layer_sizes=(30,), max_iter=500, activation='relu', alpha=0.0001)
mdl = mdl.fit(X, y)
oce = MLPOrderedActionExtractor(mdl, X, feature_names=names, feature_types=['I', 'I', 'I', 'I', 'I'], feature_constraints=['INC']*2+['']*3, target_name='Loan', target_labels=['Accept', 'Reject'], interaction_matrix=C)
print('# Classifier: MultiLayerPerceptron')
elif(clf=='rf'):
mdl = RandomForestClassifier(n_estimators=30, max_depth=6)
mdl = mdl.fit(X, y)
oce = ForestOrderedActionExtractor(mdl, X, feature_names=names, feature_types=['I', 'I', 'I', 'I', 'I'], feature_constraints=['INC']*2+['']*3, target_name='Loan', target_labels=['Accept', 'Reject'], interaction_matrix=C)
print('# Classifier: RandomForest')
print()
denied_individual = X[mdl.predict(X)==1]
for i, x in enumerate(denied_individual[:n]):
print('# {}-th Denied Individual: '.format(i+1))
print('\t- x_1: Education : {}'.format(x[0]))
print('\t- x_2: JobSkill : {}'.format(x[1]))
print('\t- x_3: Income : {}'.format(x[2]))
print('\t- x_4: WorkPerDay : {}'.format(x[3]))
print('\t- x_5: HealthStatus : {}'.format(x[4]))
for cost in costs:
print('## {} (non-order):'.format(cost))
oa = oce.extract(x, K=5, ordering=False, post_ordering=False, cost_type=cost)
if(oa!=-1): print(oa)
print('## {} + C_ord: '.format(cost))
for g in [1.0]:
oa = oce.extract(x, K=5, gamma=g, ordering=True, cost_type=cost, ordering_cost_type=ordering_cost)
if(oa!=-1): print(oa)
print()
print('---')
if(__name__ == '__main__'):
np.random.seed(0)
demonstration(clf='lr', n=1, costs=['TLPS'])