Skip to content

Commit

Permalink
set random state for the result to be reproducible
Browse files Browse the repository at this point in the history
  • Loading branch information
xiw315 committed Feb 6, 2020
1 parent 3adcd55 commit 85f487c
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 14 deletions.
19 changes: 9 additions & 10 deletions results/analysis_result.csv
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@

method,Random Forest
Best_max_depth,[3]
Best_min_samples_split,[3]
Best_CV_Score,[0.7045454545454546]
Training_Error,[0.25909090909090904]
Best_max_depth,[4]
Best_min_samples_split,[2]
Best_CV_Score,[0.6954545454545454]
Training_Error,[0.2090909090909091]
Validation_Error,[0.3035714285714286]
method,Decision Tree
Best_max_depth,[7]
Bestmin_samples_split,[3]
Best_CV_Score,[0.6545454545454545]
Training_Error,[0.1454545454545455]
Validation_Error,[0.4107142857142857]
Bestmin_samples_split,[4]
Best_CV_Score,[0.6590909090909091]
Training_Error,[0.15000000000000002]
Validation_Error,[0.3571428571428571]
method,Logistic regression
C,[1]
Best_CV_Score,[0.7090909090909091]
Training_Error,[0.2727272727272727]
Validation_Error,[0.3214285714285714]
Validation_Error,[0.3214285714285714]
8 changes: 4 additions & 4 deletions src/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def main(train_data, local_path):


#fit and predict using decision tree classifer and gridCV to find best max_depth hyperparameter
model = DecisionTreeClassifier()
model = DecisionTreeClassifier(random_state= 100)
param_grid = {'max_depth': list(range(2,15)),'min_samples_split':list(range(2,5))}
CV = GridSearchCV(model, param_grid, cv = 10, refit=True)
CV.fit(X_train, y_train)
Expand All @@ -58,7 +58,7 @@ def main(train_data, local_path):


# try random forest model
forest = RandomForestClassifier(n_estimators =100)
forest = RandomForestClassifier(n_estimators =100, random_state=100)
param_grid_f = {'max_depth': list(range(2,15)),'min_samples_split':list(range(2,5))}
#'n_estimators': list(range(80,120))}

Expand All @@ -75,7 +75,7 @@ def main(train_data, local_path):


#try logistic regression
model_r = LogisticRegression(solver ='lbfgs')
model_r = LogisticRegression(solver ='lbfgs',random_state=100)
param_grid_r = {'C':[0.001, 0.01, 0.1, 1, 10, 100, 1000]}
CV_r = GridSearchCV(model_r, param_grid_r, cv = 10, refit=True)
CV_r.fit(X_train, y_train['Class'])
Expand All @@ -90,7 +90,7 @@ def main(train_data, local_path):
model_score = pd.concat([d_2, d_r])

#find out the best one is logistic regression, so we do a confusion matrix
model_better = LogisticRegression(solver ='lbfgs', C=CV_r.best_params_['C'])
model_better = LogisticRegression(solver ='lbfgs', C=CV_r.best_params_['C'],random_state=100)
model_better.fit(X_train, y_train['Class'])
y_pred = model_better.predict(X_valid)
confusion = pd.DataFrame(confusion_matrix(y_valid, y_pred))
Expand Down

0 comments on commit 85f487c

Please sign in to comment.