# Import required libraries
import numpy as np
import csv
import cv2
import sklearn
import time

from sklearn import preprocessing, metrics, cross_validation
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC, SVC

# NOTE: the next import is only valid for scikit-learn version <= 0.17
# for scikit-learn >= 0.18 use:
# from sklearn.model_selection import train_test_split
from sklearn.cross_validation import train_test_split

# Constants
data_path = "data/"

csv_params = []
csv_labels = []

# Reading the content of csv file
with open(data_path + 'heart-statlog.csv') as csv_file:
    csv_reader = csv.reader(csv_file)
    # Uncomment this line if file contains headers
    # next(csv_reader, None)
    for each_line in csv_reader:
        csv_params.append(each_line[:-1])
        csv_labels.append(each_line[-1])

# Converting the data type from list to numpy array
input_params = np.array(csv_params)
input_labels = np.array(csv_labels)

print("Shape of input params: ", input_params.shape) #(rows, columns)
print("Shape of input labels: ", input_labels.shape) #(rows, columns)

Shape of input params:  (270, 13)
Shape of input labels:  (270,)

Headers correspond to each column

age
sex
chest pain type (4 values)
resting blood pressure
serum cholestoral in mg/dl
fasting blood sugar > 120 mg/dl
resting electrocardiographic results (values 0,1,2)
maximum heart rate achieved
exercise induced angina
oldpeak = ST depression induced by exercise relative to rest
the slope of the peak exercise ST segment
number of major vessels (0-3) colored by flourosopy
thal: 3 = normal; 6 = fixed defect; 7 = reversable defect
input_label -> present/absent

print("Sample input parameters: ", input_params[0])
print("Corresponding input label: ", input_labels[0])

Sample input parameters:  ['70' '1' '4' '130' '322' '0' '2' '109' '0' '2.4' '2' '3' '3']
Corresponding input label:  present

# Scaling the input params by using standard scaler
X_scaler = StandardScaler().fit(input_params)
X_scaled = X_scaler.transform(input_params)
print("Scaled Output: ", X_scaled[0])

Scaled Output:  [ 1.71209356  0.6894997   0.87092765 -0.07540984  1.40221232 -0.41702883
  0.98166365 -1.75920811 -0.7012223   1.18101235  0.67641928  2.47268219
 -0.87570581]


C:\Users\AKhil\Anaconda3\envs\carnd-term1\lib\site-packages\sklearn\utils\validation.py:475: DataConversionWarning: Data with input dtype <U3 was converted to float64 by StandardScaler.
  warnings.warn(msg, DataConversionWarning)

# Splitting the data-set in train and test data
rand_state = np.random.randint(0, 100)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, input_labels, test_size=0.2, random_state=rand_state)

print("Length of training data: ", len(X_train), len(y_train))
print("Length of test data: ", len(X_test), len(y_test))

Length of training data:  216 216
Length of test data:  54 54

# Training the SVM model
svc = LinearSVC()
# Check the training time for the SVC
t=time.time()
svc.fit(X_train, y_train)
t2 = time.time()
print(round(t2-t, 2), 'Seconds to train SVC...')
# Check the score of the SVC
print('Test Accuracy of SVC = ', round(svc.score(X_test, y_test), 4))
# Check the prediction time for a single sample
t=time.time()

0.03 Seconds to train SVC...
Test Accuracy of SVC =  0.8889

# 57,1,4,140,192,0,0,148,0,0.4,2,0,6,absent
# 67,1,4,160,286,0,2,108,1,1.5,2,3,3,present
validation_array = np.array([57,1,4,140,192,0,0,148,0,0.4,2,0,6])
transformed_input = X_scaler.transform(validation_array.reshape(1, -1))
print(svc.predict(transformed_input))

['absent']

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

README.md

README.md

Headers correspond to each column

Files

README.md

Latest commit

History

README.md

File metadata and controls

Headers correspond to each column