From e2ca50db42cd516e4eea1b70ec8605e36180c8ac Mon Sep 17 00:00:00 2001 From: Seymour Knowles-Barley Date: Thu, 30 Jan 2014 10:56:18 -0500 Subject: [PATCH] Random forest options for lgn etc and progress and gpu deep net cnn functionality added. --- .../predict_gpu_randomforest_2class.py | 194 +++++ .../predict_gpu_randomforest_3class.py | 176 +++++ .../predict_gpu_randomforest_cp7.py | 206 +++++ .../predict_gpu_randomforest_cp7_ds2.py | 208 +++++ .../predict_gpu_randomforest_lgn_ds2.py | 208 +++++ .../predict_gpu_randomforest_norm2.py | 167 ++++ .../predict_gpu_randomforest_test.py | 168 ++++ ClassifyMembranes/predict_pyv_randomforest.py | 127 +++ ClassifyMembranes/rf_classify.html | 664 ++++++++++++++++ ClassifyMembranes/rf_classify_parallel.html | 730 ++++++++++++++++++ .../train_gpu_randomforest_2class.py | 664 ++++++++++++++++ .../train_gpu_randomforest_3class.py | 666 ++++++++++++++++ .../train_gpu_randomforest_cp7.py | 645 ++++++++++++++++ .../train_gpu_randomforest_cp7_ds2.py | 658 ++++++++++++++++ .../train_gpu_randomforest_lgn_ds2.py | 653 ++++++++++++++++ .../train_gpu_randomforest_norm2.py | 673 ++++++++++++++++ DeepNets/full_image_classify_stumpin.py | 237 ++++++ DeepNets/full_image_cnn.py | 667 ++++++++++++++++ Segment/import_segmentations_4cube.py | 66 ++ 19 files changed, 7777 insertions(+) create mode 100644 ClassifyMembranes/predict_gpu_randomforest_2class.py create mode 100644 ClassifyMembranes/predict_gpu_randomforest_3class.py create mode 100644 ClassifyMembranes/predict_gpu_randomforest_cp7.py create mode 100644 ClassifyMembranes/predict_gpu_randomforest_cp7_ds2.py create mode 100644 ClassifyMembranes/predict_gpu_randomforest_lgn_ds2.py create mode 100644 ClassifyMembranes/predict_gpu_randomforest_norm2.py create mode 100644 ClassifyMembranes/predict_gpu_randomforest_test.py create mode 100644 ClassifyMembranes/predict_pyv_randomforest.py create mode 100644 ClassifyMembranes/rf_classify.html create mode 100644 ClassifyMembranes/rf_classify_parallel.html create mode 100644 ClassifyMembranes/train_gpu_randomforest_2class.py create mode 100644 ClassifyMembranes/train_gpu_randomforest_3class.py create mode 100644 ClassifyMembranes/train_gpu_randomforest_cp7.py create mode 100644 ClassifyMembranes/train_gpu_randomforest_cp7_ds2.py create mode 100644 ClassifyMembranes/train_gpu_randomforest_lgn_ds2.py create mode 100644 ClassifyMembranes/train_gpu_randomforest_norm2.py create mode 100644 DeepNets/full_image_classify_stumpin.py create mode 100644 DeepNets/full_image_cnn.py create mode 100644 Segment/import_segmentations_4cube.py diff --git a/ClassifyMembranes/predict_gpu_randomforest_2class.py b/ClassifyMembranes/predict_gpu_randomforest_2class.py new file mode 100644 index 0000000..f6d1e04 --- /dev/null +++ b/ClassifyMembranes/predict_gpu_randomforest_2class.py @@ -0,0 +1,194 @@ +############################################################ +# GPU Implementation of Random Forest Classifier - Training +# v0.1 +# Seymour Knowles-Barley +############################################################ +# Based on c code from: +# http://code.google.com/p/randomforest-matlab/ +# License: GPLv2 +############################################################ + +import numpy as np +import os +import sys +import h5py +import glob +import mahotas + +import pycuda.autoinit +import pycuda.driver as cu +import pycuda.compiler as nvcc +import pycuda.gpuarray as gpuarray + +gpu_randomforest_predict_source = """ +#define NODE_TERMINAL -1 +#define NODE_TOSPLIT -2 +#define NODE_INTERIOR -3 + +__global__ void predictKernel(const float *x, int n, int mdim, const int *treemap, + const int *nodestatus, const float *xbestsplit, + const int *bestvar, const int *nodeclass, + int nclass, + int ntree, int *countts, int maxTreeSize) + //int *jts, + //int *nodex, +{ + int idx = threadIdx.x + blockDim.x * (blockIdx.x + gridDim.x * blockIdx.y); + + //Make sure we don't overrun + if (idx < n) { + int m, k, treei, treeOffset1, treeOffset2; + + //Repeat for each tree - this way only one thread writes to any point in the vote output array + + for (treei = 0; treei < ntree; ++treei) { + //for (treei = 0; treei < ntree; ++treei) { + treeOffset1 = treei*maxTreeSize; + treeOffset2 = treei*2*maxTreeSize; + k = 0; + + while (nodestatus[treeOffset1 + k] != NODE_TERMINAL) { + m = bestvar[treeOffset1 + k] - 1; + //Split by a numerical predictor + k = (x[idx + n * m] <= xbestsplit[treeOffset1 + k]) ? + treemap[treeOffset2 + k * 2] - 1 : treemap[treeOffset2 + 1 + k * 2] - 1; + } + //We found the terminal node: assign class label + //jts[chunki + treei] = nodeclass[treeOffset + k]; + //nodex[chunki + treei] = k + 1; + countts[idx * nclass + nodeclass[treeOffset1 + k] - 1] += 1; + } + } + +} +""" + +# forest_file = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\rhoana_forest_2class.h5' +# input_image_folder = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\classifyds2\\' +# input_image_suffix = '.tif' +# input_features_suffix = '_rhoana_features.h5' +# output_folder = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\classifyds2\\output\\' + +# forest_file = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\autoencoder_forest_2class.h5' +# input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\' +# input_image_suffix = '.tif' +# input_features_suffix = '_autoencoder_features.h5' +# output_folder = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\autoencoder_output\\' + +# forest_file = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\autoencoder_nosig_forest_2class.h5' +# input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\' +# input_image_suffix = '.tif' +# input_features_suffix = '_autoencoder_nosig_features.h5' +# output_folder = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\autoencoder_nosig_output\\' + +forest_file = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2_auto1\\auto1_forest_3class.hdf5' +input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\original' +input_image_suffix = '.tif' +input_features_suffix = '_autoencoder_features.h5' +output_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2_auto1\\output\\' + + +# Load the forest settings + +model = h5py.File(forest_file, 'r') + + +# Prep the gpu function +gpu_predict = nvcc.SourceModule(gpu_randomforest_predict_source).get_function('predictKernel') + +d_treemap = gpuarray.to_gpu(model['/forest/treemap'][...]) +d_nodestatus = gpuarray.to_gpu(model['/forest/nodestatus'][...]) +d_xbestsplit = gpuarray.to_gpu(model['/forest/xbestsplit'][...]) +d_bestvar = gpuarray.to_gpu(model['/forest/bestvar'][...]) +d_nodeclass = gpuarray.to_gpu(model['/forest/nodeclass'][...]) + +nrnodes = model['/forest/nrnodes'][...]; +ntree = model['/forest/ntree'][...]; +nclass = model['/forest/nclass'][...]; + + +files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) ) + +print 'Found {0} images to classify.'.format(len(files)) + +for file in files: + features_file = file.replace(input_image_suffix, input_features_suffix) + + # Load the features + f = h5py.File(features_file, 'r') + + nfeatures = len(f.keys()) + image_shape = f[f.keys()[0]].shape + fshape = (nfeatures, image_shape[0] * image_shape[1]) + features = np.zeros(fshape, dtype=np.float32) + + for i,k in enumerate(f.keys()): + features[i,:] = f[k][...].ravel() + + + # Predict + + out_votes = np.zeros((image_shape[0], image_shape[1], nclass), dtype=np.int32) + d_votes = gpuarray.to_gpu(out_votes) + + d_features = gpuarray.to_gpu(features) + + block = (64, 1, 1) + grid = (1024, int(fshape[1] / block[0] / 1024 + 1)) + + gpu_predict(d_features, np.int32(fshape[1]), np.int32(fshape[0]), + d_treemap, d_nodestatus, d_xbestsplit, d_bestvar, d_nodeclass, + np.int32(nclass), np.int32(ntree), d_votes, np.int32(nrnodes), + grid=grid, block=block) + + + # Save / display results + + votes = d_votes.get() + + del d_votes + del d_features + + prob_image = np.float32(votes) / ntree + + output_image_basename = file.replace(input_image_folder, output_folder) + + for classi in range(nclass): + output_image_file = output_image_basename.replace(input_image_suffix, '_class{0}.png'.format(classi + 1)) + mahotas.imsave(output_image_file, np.uint8(prob_image[:,:,classi] * 255)) + + if (nclass == 2): + color_image = np.zeros((prob_image.shape[0], prob_image.shape[1], 3), dtype=np.float32) + color_image[:,:,0] = prob_image[:,:,0] + color_image[:,:,1] = prob_image[:,:,1] + color_image[:,:,2] = prob_image[:,:,0] + + output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png') + mahotas.imsave(output_image_file, np.uint8(color_image * 255)) + + win_0 = prob_image[:,:,0] > prob_image[:,:,1] + win_1 = np.logical_not(win_0) + + win_image = color_image + win_image[:,:,0] = win_0 * 255 + win_image[:,:,1] = win_1 * 255 + win_image[:,:,2] = win_0 * 255 + + output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png') + mahotas.imsave(output_image_file, np.uint8(win_image)) + + output_path = output_image_basename.replace(input_image_suffix, '_probabilities.hdf5'); + temp_path = output_path + '_tmp' + out_hdf5 = h5py.File(temp_path, 'w') + # copy the probabilities for future use + probs_out = out_hdf5.create_dataset('probabilities', + data = prob_image, + chunks = (64,64,1), + compression = 'gzip') + out_hdf5.close() + + if os.path.exists(output_path): + os.unlink(output_path) + os.rename(temp_path, output_path) + + print '{0} done.'.format(file) diff --git a/ClassifyMembranes/predict_gpu_randomforest_3class.py b/ClassifyMembranes/predict_gpu_randomforest_3class.py new file mode 100644 index 0000000..c250c34 --- /dev/null +++ b/ClassifyMembranes/predict_gpu_randomforest_3class.py @@ -0,0 +1,176 @@ +############################################################ +# GPU Implementation of Random Forest Classifier - Training +# v0.1 +# Seymour Knowles-Barley +############################################################ +# Based on c code from: +# http://code.google.com/p/randomforest-matlab/ +# License: GPLv2 +############################################################ + +import numpy as np +import os +import sys +import h5py +import glob +import mahotas + +import pycuda.autoinit +import pycuda.driver as cu +import pycuda.compiler as nvcc +import pycuda.gpuarray as gpuarray + +gpu_randomforest_predict_source = """ +#define NODE_TERMINAL -1 +#define NODE_TOSPLIT -2 +#define NODE_INTERIOR -3 + +__global__ void predictKernel(const float *x, int n, int mdim, const int *treemap, + const int *nodestatus, const float *xbestsplit, + const int *bestvar, const int *nodeclass, + int nclass, + int ntree, int *countts, int maxTreeSize) + //int *jts, + //int *nodex, +{ + int idx = threadIdx.x + blockDim.x * (blockIdx.x + gridDim.x * blockIdx.y); + + //Make sure we don't overrun + if (idx < n) { + int m, k, treei, treeOffset1, treeOffset2; + + //Repeat for each tree - this way only one thread writes to any point in the vote output array + + for (treei = 0; treei < ntree; ++treei) { + //for (treei = 0; treei < ntree; ++treei) { + treeOffset1 = treei*maxTreeSize; + treeOffset2 = treei*2*maxTreeSize; + k = 0; + + while (nodestatus[treeOffset1 + k] != NODE_TERMINAL) { + m = bestvar[treeOffset1 + k] - 1; + //Split by a numerical predictor + k = (x[idx + n * m] <= xbestsplit[treeOffset1 + k]) ? + treemap[treeOffset2 + k * 2] - 1 : treemap[treeOffset2 + 1 + k * 2] - 1; + } + //We found the terminal node: assign class label + //jts[chunki + treei] = nodeclass[treeOffset + k]; + //nodex[chunki + treei] = k + 1; + countts[idx * nclass + nodeclass[treeOffset1 + k] - 1] += 1; + } + } + +} +""" + +# forest_file = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\rhoana_forest_3class.hdf5' +# input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\all' +# input_image_suffix = '_labeled.png' +# input_features_suffix = '.hdf5' +# output_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\output2\\' + +forest_file = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2_auto1\\auto1_forest_3class.hdf5' +input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\original' +input_image_suffix = '.tif' +input_features_suffix = '_autoencoder_features.h5' +output_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2_auto1\\output\\' + +# Load the forest settings + +model = h5py.File(forest_file, 'r') + + +# Prep the gpu function +gpu_predict = nvcc.SourceModule(gpu_randomforest_predict_source).get_function('predictKernel') + +d_treemap = gpuarray.to_gpu(model['/forest/treemap'][...]) +d_nodestatus = gpuarray.to_gpu(model['/forest/nodestatus'][...]) +d_xbestsplit = gpuarray.to_gpu(model['/forest/xbestsplit'][...]) +d_bestvar = gpuarray.to_gpu(model['/forest/bestvar'][...]) +d_nodeclass = gpuarray.to_gpu(model['/forest/nodeclass'][...]) + +nrnodes = model['/forest/nrnodes'][...]; +ntree = model['/forest/ntree'][...]; +nclass = model['/forest/nclass'][...]; + + +files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) ) + +print 'Found {0} images to classify.'.format(len(files)) + +for file in files: + features_file = file.replace(input_image_suffix, input_features_suffix) + + # Load the features + f = h5py.File(features_file, 'r') + + nfeatures = len(f.keys()) + image_shape = f[f.keys()[0]].shape + fshape = (nfeatures, image_shape[0] * image_shape[1]) + features = np.zeros(fshape, dtype=np.float32) + + for i,k in enumerate(f.keys()): + features[i,:] = f[k][...].ravel() + + + # Predict + + out_votes = np.zeros((image_shape[0], image_shape[1], nclass), dtype=np.int32) + d_votes = gpuarray.to_gpu(out_votes) + + d_features = gpuarray.to_gpu(features) + + block = (64, 1, 1) + grid = (1024, int(fshape[1] / block[0] / 1024 + 1)) + + gpu_predict(d_features, np.int32(fshape[1]), np.int32(fshape[0]), + d_treemap, d_nodestatus, d_xbestsplit, d_bestvar, d_nodeclass, + np.int32(nclass), np.int32(ntree), d_votes, np.int32(nrnodes), + grid=grid, block=block) + + + # Save / display results + + votes = d_votes.get() + + del d_votes + del d_features + + prob_image = np.float32(votes) / ntree + + output_image_basename = file.replace(input_image_folder, output_folder) + + # for classi in range(nclass): + # output_image_file = output_image_basename.replace(input_image_suffix, '_class{0}.png'.format(classi + 1)) + # mahotas.imsave(output_image_file, np.uint8(prob_image[:,:,classi] * 255)) + + output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png') + mahotas.imsave(output_image_file, np.uint8(prob_image * 255)) + + win_0 = np.logical_and(prob_image[:,:,0] > prob_image[:,:,1], prob_image[:,:,0] > prob_image[:,:,2]) + win_2 = np.logical_and(prob_image[:,:,2] > prob_image[:,:,0], prob_image[:,:,2] > prob_image[:,:,1]) + win_1 = np.logical_not(np.logical_or(win_0, win_2)) + + win_image = prob_image + win_image[:,:,0] = win_0 * 255 + win_image[:,:,1] = win_1 * 255 + win_image[:,:,2] = win_2 * 255 + + output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png') + mahotas.imsave(output_image_file, np.uint8(win_image)) + + output_path = output_image_basename.replace(input_image_suffix, '_probabilities.hdf5'); + temp_path = output_path + '_tmp' + out_hdf5 = h5py.File(temp_path, 'w') + # copy the probabilities for future use + probs_out = out_hdf5.create_dataset('probabilities', + data = prob_image, + chunks = (64,64,1), + compression = 'gzip') + out_hdf5.close() + + if os.path.exists(output_path): + os.unlink(output_path) + os.rename(temp_path, output_path) + + print '{0} done.'.format(file) diff --git a/ClassifyMembranes/predict_gpu_randomforest_cp7.py b/ClassifyMembranes/predict_gpu_randomforest_cp7.py new file mode 100644 index 0000000..d5bb142 --- /dev/null +++ b/ClassifyMembranes/predict_gpu_randomforest_cp7.py @@ -0,0 +1,206 @@ +############################################################ +# GPU Implementation of Random Forest Classifier - Training +# v0.1 +# Seymour Knowles-Barley +############################################################ +# Based on c code from: +# http://code.google.com/p/randomforest-matlab/ +# License: GPLv2 +############################################################ + +import numpy as np +import os +import sys +import h5py +import glob +import mahotas + +import pycuda.autoinit +import pycuda.driver as cu +import pycuda.compiler as nvcc +import pycuda.gpuarray as gpuarray + +gpu_randomforest_predict_source = """ +#define NODE_TERMINAL -1 +#define NODE_TOSPLIT -2 +#define NODE_INTERIOR -3 + +__global__ void predictKernel(const float *x, int n, int mdim, const int *treemap, + const int *nodestatus, const float *xbestsplit, + const int *bestvar, const int *nodeclass, + int nclass, + int ntree, int *countts, int maxTreeSize) + //int *jts, + //int *nodex, +{ + int idx = threadIdx.x + blockDim.x * (blockIdx.x + gridDim.x * blockIdx.y); + + //Make sure we don't overrun + if (idx < n) { + int m, k, treei, treeOffset1, treeOffset2; + + //Repeat for each tree - this way only one thread writes to any point in the vote output array + + for (treei = 0; treei < ntree; ++treei) { + //for (treei = 0; treei < ntree; ++treei) { + treeOffset1 = treei*maxTreeSize; + treeOffset2 = treei*2*maxTreeSize; + k = 0; + + while (nodestatus[treeOffset1 + k] != NODE_TERMINAL) { + m = bestvar[treeOffset1 + k] - 1; + //Split by a numerical predictor + k = (x[idx + n * m] <= xbestsplit[treeOffset1 + k]) ? + treemap[treeOffset2 + k * 2] - 1 : treemap[treeOffset2 + 1 + k * 2] - 1; + } + //We found the terminal node: assign class label + //jts[chunki + treei] = nodeclass[treeOffset + k]; + //nodex[chunki + treei] = k + 1; + countts[idx * nclass + nodeclass[treeOffset1 + k] - 1] += 1; + } + } + +} +""" + +forest_file = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data\\rhoana_forest_cp7_2class.h5' +input_image_folder = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data\\' +input_image_suffix = '_train.png' +input_features_suffix = '_rhoana_features.h5' +output_folder = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data\\output\\' + +# Load the forest settings + +model = h5py.File(forest_file, 'r') + + +# Prep the gpu function +gpu_predict = nvcc.SourceModule(gpu_randomforest_predict_source).get_function('predictKernel') + +d_treemap = gpuarray.to_gpu(model['/forest/treemap'][...]) +d_nodestatus = gpuarray.to_gpu(model['/forest/nodestatus'][...]) +d_xbestsplit = gpuarray.to_gpu(model['/forest/xbestsplit'][...]) +d_bestvar = gpuarray.to_gpu(model['/forest/bestvar'][...]) +d_nodeclass = gpuarray.to_gpu(model['/forest/nodeclass'][...]) + +nrnodes = model['/forest/nrnodes'][...]; +ntree = model['/forest/ntree'][...]; +nclass = model['/forest/nclass'][...]; + + +files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) ) + +print 'Found {0} images to classify.'.format(len(files)) + +for file in files: + features_file = file.replace(input_image_suffix, input_features_suffix) + + # Load the features + f = h5py.File(features_file, 'r') + + nfeatures = len(f.keys()) + image_shape = f[f.keys()[0]].shape + fshape = (nfeatures, image_shape[0] * image_shape[1]) + features = np.zeros(fshape, dtype=np.float32) + + for i,k in enumerate(f.keys()): + features[i,:] = f[k][...].ravel() + + + # Predict + + out_votes = np.zeros((image_shape[0], image_shape[1], nclass), dtype=np.int32) + d_votes = gpuarray.to_gpu(out_votes) + + d_features = gpuarray.to_gpu(features) + + block = (64, 1, 1) + grid = (1024, int(fshape[1] / block[0] / 1024 + 1)) + + gpu_predict(d_features, np.int32(fshape[1]), np.int32(fshape[0]), + d_treemap, d_nodestatus, d_xbestsplit, d_bestvar, d_nodeclass, + np.int32(nclass), np.int32(ntree), d_votes, np.int32(nrnodes), + grid=grid, block=block) + + + # Save / display results + + votes = d_votes.get() + + prob_image = np.float32(votes) / ntree + + output_image_basename = file.replace(input_image_folder, output_folder) + + # for classi in range(nclass): + # output_image_file = output_image_basename.replace(input_image_suffix, '_class{0}.png'.format(classi + 1)) + # mahotas.imsave(output_image_file, np.uint8(prob_image[:,:,classi] * 255)) + + if prob_image.shape[2] == 3: + + output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png') + mahotas.imsave(output_image_file, np.uint8(prob_image * 255)) + + win_0 = np.logical_and(prob_image[:,:,0] > prob_image[:,:,1], prob_image[:,:,0] > prob_image[:,:,2]) + win_2 = np.logical_and(prob_image[:,:,2] > prob_image[:,:,0], prob_image[:,:,2] > prob_image[:,:,1]) + win_1 = np.logical_not(np.logical_or(win_0, win_2)) + + win_image = prob_image + win_image[:,:,0] = win_0 * 255 + win_image[:,:,1] = win_1 * 255 + win_image[:,:,2] = win_2 * 255 + + output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png') + mahotas.imsave(output_image_file, np.uint8(win_image)) + + elif prob_image.shape[2] == 2: + + output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png') + + out_image = np.zeros((prob_image.shape[0], prob_image.shape[1], 3), dtype=np.uint8) + out_image[:,:,0] = prob_image[:,:,0] * 255 + out_image[:,:,1] = prob_image[:,:,1] * 255 + out_image[:,:,2] = prob_image[:,:,0] * 255 + mahotas.imsave(output_image_file, out_image) + + win_0 = prob_image[:,:,0] > prob_image[:,:,1] + win_1 = np.logical_not(win_0) + + win_image = np.zeros((prob_image.shape[0], prob_image.shape[1], 3), dtype=np.uint8) + win_image[:,:,0] = win_0 * 255 + win_image[:,:,1] = win_1 * 255 + win_image[:,:,2] = win_0 * 255 + + output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png') + mahotas.imsave(output_image_file, win_image) + + + output_path = output_image_basename.replace(input_image_suffix, '_probabilities.hdf5'); + temp_path = output_path + '_tmp' + out_hdf5 = h5py.File(temp_path, 'w') + # copy the probabilities for future use + probs_out = out_hdf5.create_dataset('probabilities', + data = prob_image[:,:,1], + chunks = (64,64), + compression = 'gzip') + + original_out = out_hdf5.create_dataset('original', + data = f['original'][...], + chunks = (64,64), + compression = 'gzip') + + for i,k in enumerate(f.keys()): + if k.startswith('membrane_19_3_'): + membrane_out = out_hdf5.create_dataset(k, + data = f[k][...], + chunks = (64,64), + compression = 'gzip') + + out_hdf5.close() + + if os.path.exists(output_path): + os.unlink(output_path) + os.rename(temp_path, output_path) + + f.close() + + print '{0} done.'.format(file) diff --git a/ClassifyMembranes/predict_gpu_randomforest_cp7_ds2.py b/ClassifyMembranes/predict_gpu_randomforest_cp7_ds2.py new file mode 100644 index 0000000..14f7b17 --- /dev/null +++ b/ClassifyMembranes/predict_gpu_randomforest_cp7_ds2.py @@ -0,0 +1,208 @@ +############################################################ +# GPU Implementation of Random Forest Classifier - Training +# v0.1 +# Seymour Knowles-Barley +############################################################ +# Based on c code from: +# http://code.google.com/p/randomforest-matlab/ +# License: GPLv2 +############################################################ + +import numpy as np +import os +import sys +import h5py +import glob +import mahotas + +import pycuda.autoinit +import pycuda.driver as cu +import pycuda.compiler as nvcc +import pycuda.gpuarray as gpuarray + +gpu_randomforest_predict_source = """ +#define NODE_TERMINAL -1 +#define NODE_TOSPLIT -2 +#define NODE_INTERIOR -3 + +__global__ void predictKernel(const float *x, int n, int mdim, const int *treemap, + const int *nodestatus, const float *xbestsplit, + const int *bestvar, const int *nodeclass, + int nclass, + int ntree, int *countts, int maxTreeSize) + //int *jts, + //int *nodex, +{ + int idx = threadIdx.x + blockDim.x * (blockIdx.x + gridDim.x * blockIdx.y); + + //Make sure we don't overrun + if (idx < n) { + int m, k, treei, treeOffset1, treeOffset2; + + //Repeat for each tree - this way only one thread writes to any point in the vote output array + + for (treei = 0; treei < ntree; ++treei) { + //for (treei = 0; treei < ntree; ++treei) { + treeOffset1 = treei*maxTreeSize; + treeOffset2 = treei*2*maxTreeSize; + k = 0; + + while (nodestatus[treeOffset1 + k] != NODE_TERMINAL) { + m = bestvar[treeOffset1 + k] - 1; + //Split by a numerical predictor + k = (x[idx + n * m] <= xbestsplit[treeOffset1 + k]) ? + treemap[treeOffset2 + k * 2] - 1 : treemap[treeOffset2 + 1 + k * 2] - 1; + } + //We found the terminal node: assign class label + //jts[chunki + treei] = nodeclass[treeOffset + k]; + //nodex[chunki + treei] = k + 1; + countts[idx * nclass + nodeclass[treeOffset1 + k] - 1] += 1; + } + } + +} +""" + +downsample_factor = 2 + +forest_file = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data\\rhoana_forest_cp7_2class.h5' +input_image_folder = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data\\' +input_image_suffix = '_train.png' +input_features_suffix = '_rhoana_features_ds{0}.h5'.format(downsample_factor) +output_folder = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data\\output_ds{0}\\'.format(downsample_factor) + +# Load the forest settings + +model = h5py.File(forest_file, 'r') + + +# Prep the gpu function +gpu_predict = nvcc.SourceModule(gpu_randomforest_predict_source).get_function('predictKernel') + +d_treemap = gpuarray.to_gpu(model['/forest/treemap'][...]) +d_nodestatus = gpuarray.to_gpu(model['/forest/nodestatus'][...]) +d_xbestsplit = gpuarray.to_gpu(model['/forest/xbestsplit'][...]) +d_bestvar = gpuarray.to_gpu(model['/forest/bestvar'][...]) +d_nodeclass = gpuarray.to_gpu(model['/forest/nodeclass'][...]) + +nrnodes = model['/forest/nrnodes'][...]; +ntree = model['/forest/ntree'][...]; +nclass = model['/forest/nclass'][...]; + + +files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) ) + +print 'Found {0} images to classify.'.format(len(files)) + +for file in files: + features_file = file.replace(input_image_suffix, input_features_suffix) + + # Load the features + f = h5py.File(features_file, 'r') + + nfeatures = len(f.keys()) + image_shape = f[f.keys()[0]].shape + fshape = (nfeatures, image_shape[0] * image_shape[1]) + features = np.zeros(fshape, dtype=np.float32) + + for i,k in enumerate(f.keys()): + features[i,:] = f[k][...].ravel() + + + # Predict + + out_votes = np.zeros((image_shape[0], image_shape[1], nclass), dtype=np.int32) + d_votes = gpuarray.to_gpu(out_votes) + + d_features = gpuarray.to_gpu(features) + + block = (64, 1, 1) + grid = (1024, int(fshape[1] / block[0] / 1024 + 1)) + + gpu_predict(d_features, np.int32(fshape[1]), np.int32(fshape[0]), + d_treemap, d_nodestatus, d_xbestsplit, d_bestvar, d_nodeclass, + np.int32(nclass), np.int32(ntree), d_votes, np.int32(nrnodes), + grid=grid, block=block) + + + # Save / display results + + votes = d_votes.get() + + prob_image = np.float32(votes) / ntree + + output_image_basename = file.replace(input_image_folder, output_folder) + + # for classi in range(nclass): + # output_image_file = output_image_basename.replace(input_image_suffix, '_class{0}.png'.format(classi + 1)) + # mahotas.imsave(output_image_file, np.uint8(prob_image[:,:,classi] * 255)) + + if prob_image.shape[2] == 3: + + output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png') + mahotas.imsave(output_image_file, np.uint8(prob_image * 255)) + + win_0 = np.logical_and(prob_image[:,:,0] > prob_image[:,:,1], prob_image[:,:,0] > prob_image[:,:,2]) + win_2 = np.logical_and(prob_image[:,:,2] > prob_image[:,:,0], prob_image[:,:,2] > prob_image[:,:,1]) + win_1 = np.logical_not(np.logical_or(win_0, win_2)) + + win_image = prob_image + win_image[:,:,0] = win_0 * 255 + win_image[:,:,1] = win_1 * 255 + win_image[:,:,2] = win_2 * 255 + + output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png') + mahotas.imsave(output_image_file, np.uint8(win_image)) + + elif prob_image.shape[2] == 2: + + output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png') + + out_image = np.zeros((prob_image.shape[0], prob_image.shape[1], 3), dtype=np.uint8) + out_image[:,:,0] = prob_image[:,:,0] * 255 + out_image[:,:,1] = prob_image[:,:,1] * 255 + out_image[:,:,2] = prob_image[:,:,0] * 255 + mahotas.imsave(output_image_file, out_image) + + win_0 = prob_image[:,:,0] > prob_image[:,:,1] + win_1 = np.logical_not(win_0) + + win_image = np.zeros((prob_image.shape[0], prob_image.shape[1], 3), dtype=np.uint8) + win_image[:,:,0] = win_0 * 255 + win_image[:,:,1] = win_1 * 255 + win_image[:,:,2] = win_0 * 255 + + output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png') + mahotas.imsave(output_image_file, win_image) + + + output_path = output_image_basename.replace(input_image_suffix, '_probabilities.hdf5'); + temp_path = output_path + '_tmp' + out_hdf5 = h5py.File(temp_path, 'w') + # copy the probabilities for future use + probs_out = out_hdf5.create_dataset('probabilities', + data = prob_image[:,:,1], + chunks = (64,64), + compression = 'gzip') + + original_out = out_hdf5.create_dataset('original', + data = f['original'][...], + chunks = (64,64), + compression = 'gzip') + + for i,k in enumerate(f.keys()): + if k.startswith('membrane_19_3_'): + membrane_out = out_hdf5.create_dataset(k, + data = f[k][...], + chunks = (64,64), + compression = 'gzip') + + out_hdf5.close() + + if os.path.exists(output_path): + os.unlink(output_path) + os.rename(temp_path, output_path) + + f.close() + + print '{0} done.'.format(file) diff --git a/ClassifyMembranes/predict_gpu_randomforest_lgn_ds2.py b/ClassifyMembranes/predict_gpu_randomforest_lgn_ds2.py new file mode 100644 index 0000000..aa59b1f --- /dev/null +++ b/ClassifyMembranes/predict_gpu_randomforest_lgn_ds2.py @@ -0,0 +1,208 @@ +############################################################ +# GPU Implementation of Random Forest Classifier - Training +# v0.1 +# Seymour Knowles-Barley +############################################################ +# Based on c code from: +# http://code.google.com/p/randomforest-matlab/ +# License: GPLv2 +############################################################ + +import numpy as np +import os +import sys +import h5py +import glob +import mahotas + +import pycuda.autoinit +import pycuda.driver as cu +import pycuda.compiler as nvcc +import pycuda.gpuarray as gpuarray + +gpu_randomforest_predict_source = """ +#define NODE_TERMINAL -1 +#define NODE_TOSPLIT -2 +#define NODE_INTERIOR -3 + +__global__ void predictKernel(const float *x, int n, int mdim, const int *treemap, + const int *nodestatus, const float *xbestsplit, + const int *bestvar, const int *nodeclass, + int nclass, + int ntree, int *countts, int maxTreeSize) + //int *jts, + //int *nodex, +{ + int idx = threadIdx.x + blockDim.x * (blockIdx.x + gridDim.x * blockIdx.y); + + //Make sure we don't overrun + if (idx < n) { + int m, k, treei, treeOffset1, treeOffset2; + + //Repeat for each tree - this way only one thread writes to any point in the vote output array + + for (treei = 0; treei < ntree; ++treei) { + //for (treei = 0; treei < ntree; ++treei) { + treeOffset1 = treei*maxTreeSize; + treeOffset2 = treei*2*maxTreeSize; + k = 0; + + while (nodestatus[treeOffset1 + k] != NODE_TERMINAL) { + m = bestvar[treeOffset1 + k] - 1; + //Split by a numerical predictor + k = (x[idx + n * m] <= xbestsplit[treeOffset1 + k]) ? + treemap[treeOffset2 + k * 2] - 1 : treemap[treeOffset2 + 1 + k * 2] - 1; + } + //We found the terminal node: assign class label + //jts[chunki + treei] = nodeclass[treeOffset + k]; + //nodex[chunki + treei] = k + 1; + countts[idx * nclass + nodeclass[treeOffset1 + k] - 1] += 1; + } + } + +} +""" + +downsample_factor = 2 + +forest_file = 'D:\\dev\\datasets\\LGN1\\RandomForestTraining\\Josh2\\rhoana_forest_ds{0}_2class.hdf5'.format(downsample_factor) +input_image_folder = 'D:\\dev\\datasets\\LGN1\\RandomForestTraining\\Josh2' +input_image_suffix = '_labeled.png' +input_features_suffix = '_rhoana_features_ds{0}.h5'.format(downsample_factor) +output_folder = 'D:\\dev\\datasets\\LGN1\\RandomForestTraining\\Josh2\\output_ds{0}\\'.format(downsample_factor) + +# Load the forest settings + +model = h5py.File(forest_file, 'r') + + +# Prep the gpu function +gpu_predict = nvcc.SourceModule(gpu_randomforest_predict_source).get_function('predictKernel') + +d_treemap = gpuarray.to_gpu(model['/forest/treemap'][...]) +d_nodestatus = gpuarray.to_gpu(model['/forest/nodestatus'][...]) +d_xbestsplit = gpuarray.to_gpu(model['/forest/xbestsplit'][...]) +d_bestvar = gpuarray.to_gpu(model['/forest/bestvar'][...]) +d_nodeclass = gpuarray.to_gpu(model['/forest/nodeclass'][...]) + +nrnodes = model['/forest/nrnodes'][...]; +ntree = model['/forest/ntree'][...]; +nclass = model['/forest/nclass'][...]; + + +files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) ) + +print 'Found {0} images to classify.'.format(len(files)) + +for file in files: + features_file = file.replace(input_image_suffix, input_features_suffix) + + # Load the features + f = h5py.File(features_file, 'r') + + nfeatures = len(f.keys()) + image_shape = f[f.keys()[0]].shape + fshape = (nfeatures, image_shape[0] * image_shape[1]) + features = np.zeros(fshape, dtype=np.float32) + + for i,k in enumerate(f.keys()): + features[i,:] = f[k][...].ravel() + + + # Predict + + out_votes = np.zeros((image_shape[0], image_shape[1], nclass), dtype=np.int32) + d_votes = gpuarray.to_gpu(out_votes) + + d_features = gpuarray.to_gpu(features) + + block = (64, 1, 1) + grid = (1024, int(fshape[1] / block[0] / 1024 + 1)) + + gpu_predict(d_features, np.int32(fshape[1]), np.int32(fshape[0]), + d_treemap, d_nodestatus, d_xbestsplit, d_bestvar, d_nodeclass, + np.int32(nclass), np.int32(ntree), d_votes, np.int32(nrnodes), + grid=grid, block=block) + + + # Save / display results + + votes = d_votes.get() + + prob_image = np.float32(votes) / ntree + + output_image_basename = file.replace(input_image_folder, output_folder) + + # for classi in range(nclass): + # output_image_file = output_image_basename.replace(input_image_suffix, '_class{0}.png'.format(classi + 1)) + # mahotas.imsave(output_image_file, np.uint8(prob_image[:,:,classi] * 255)) + + if prob_image.shape[2] == 3: + + output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png') + mahotas.imsave(output_image_file, np.uint8(prob_image * 255)) + + win_0 = np.logical_and(prob_image[:,:,0] > prob_image[:,:,1], prob_image[:,:,0] > prob_image[:,:,2]) + win_2 = np.logical_and(prob_image[:,:,2] > prob_image[:,:,0], prob_image[:,:,2] > prob_image[:,:,1]) + win_1 = np.logical_not(np.logical_or(win_0, win_2)) + + win_image = prob_image + win_image[:,:,0] = win_0 * 255 + win_image[:,:,1] = win_1 * 255 + win_image[:,:,2] = win_2 * 255 + + output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png') + mahotas.imsave(output_image_file, np.uint8(win_image)) + + elif prob_image.shape[2] == 2: + + output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png') + + out_image = np.zeros((prob_image.shape[0], prob_image.shape[1], 3), dtype=np.uint8) + out_image[:,:,0] = prob_image[:,:,0] * 255 + out_image[:,:,1] = prob_image[:,:,1] * 255 + out_image[:,:,2] = prob_image[:,:,0] * 255 + mahotas.imsave(output_image_file, out_image) + + win_0 = prob_image[:,:,0] > prob_image[:,:,1] + win_1 = np.logical_not(win_0) + + win_image = np.zeros((prob_image.shape[0], prob_image.shape[1], 3), dtype=np.uint8) + win_image[:,:,0] = win_0 * 255 + win_image[:,:,1] = win_1 * 255 + win_image[:,:,2] = win_0 * 255 + + output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png') + mahotas.imsave(output_image_file, win_image) + + + output_path = output_image_basename.replace(input_image_suffix, '_probabilities.hdf5'); + temp_path = output_path + '_tmp' + out_hdf5 = h5py.File(temp_path, 'w') + # copy the probabilities for future use + probs_out = out_hdf5.create_dataset('probabilities', + data = prob_image[:,:,1], + chunks = (64,64), + compression = 'gzip') + + original_out = out_hdf5.create_dataset('original', + data = f['original'][...], + chunks = (64,64), + compression = 'gzip') + + for i,k in enumerate(f.keys()): + if k.startswith('membrane_19_3_'): + membrane_out = out_hdf5.create_dataset(k, + data = f[k][...], + chunks = (64,64), + compression = 'gzip') + + out_hdf5.close() + + if os.path.exists(output_path): + os.unlink(output_path) + os.rename(temp_path, output_path) + + f.close() + + print '{0} done.'.format(file) diff --git a/ClassifyMembranes/predict_gpu_randomforest_norm2.py b/ClassifyMembranes/predict_gpu_randomforest_norm2.py new file mode 100644 index 0000000..590e871 --- /dev/null +++ b/ClassifyMembranes/predict_gpu_randomforest_norm2.py @@ -0,0 +1,167 @@ +############################################################ +# GPU Implementation of Random Forest Classifier - Training +# v0.1 +# Seymour Knowles-Barley +############################################################ +# Based on c code from: +# http://code.google.com/p/randomforest-matlab/ +# License: GPLv2 +############################################################ + +import numpy as np +import os +import sys +import h5py +import glob +import mahotas + +import pycuda.autoinit +import pycuda.driver as cu +import pycuda.compiler as nvcc +import pycuda.gpuarray as gpuarray + +gpu_randomforest_predict_source = """ +#define NODE_TERMINAL -1 +#define NODE_TOSPLIT -2 +#define NODE_INTERIOR -3 + +__global__ void predictKernel(const float *x, int n, int mdim, const int *treemap, + const int *nodestatus, const float *xbestsplit, + const int *bestvar, const int *nodeclass, + int nclass, + int ntree, int *countts, int maxTreeSize) + //int *jts, + //int *nodex, +{ + int idx = threadIdx.x + blockDim.x * (blockIdx.x + gridDim.x * blockIdx.y); + + //Make sure we don't overrun + if (idx < n) { + int m, k, treei, treeOffset1, treeOffset2; + + //Repeat for each tree - this way only one thread writes to any point in the vote output array + + for (treei = 0; treei < ntree; ++treei) { + //for (treei = 0; treei < ntree; ++treei) { + treeOffset1 = treei*maxTreeSize; + treeOffset2 = treei*2*maxTreeSize; + k = 0; + + while (nodestatus[treeOffset1 + k] != NODE_TERMINAL) { + m = bestvar[treeOffset1 + k] - 1; + //Split by a numerical predictor + k = (x[idx + n * m] <= xbestsplit[treeOffset1 + k]) ? + treemap[treeOffset2 + k * 2] - 1 : treemap[treeOffset2 + 1 + k * 2] - 1; + } + //We found the terminal node: assign class label + //jts[chunki + treei] = nodeclass[treeOffset + k]; + //nodex[chunki + treei] = k + 1; + countts[idx * nclass + nodeclass[treeOffset1 + k] - 1] += 1; + } + } + +} +""" + +forest_file = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\rhoana_forest_3class_norm2.hdf5' +input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\all' +input_image_suffix = '_labeled.png' +input_features_suffix = '.hdf5' +output_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\output2_norm2\\' + +# Load the forest settings + +model = h5py.File(forest_file, 'r') + + +# Prep the gpu function +gpu_predict = nvcc.SourceModule(gpu_randomforest_predict_source).get_function('predictKernel') + +d_treemap = gpuarray.to_gpu(model['/forest/treemap'][...]) +d_nodestatus = gpuarray.to_gpu(model['/forest/nodestatus'][...]) +d_xbestsplit = gpuarray.to_gpu(model['/forest/xbestsplit'][...]) +d_bestvar = gpuarray.to_gpu(model['/forest/bestvar'][...]) +d_nodeclass = gpuarray.to_gpu(model['/forest/nodeclass'][...]) + +nrnodes = model['/forest/nrnodes'][...]; +ntree = model['/forest/ntree'][...]; +nclass = model['/forest/nclass'][...]; + + +files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) ) + +print 'Found {0} images to classify.'.format(len(files)) + +for file in files: + features_file = file.replace(input_image_suffix, input_features_suffix) + + # Load the features + f = h5py.File(features_file, 'r') + + nfeatures = len(f.keys()) + image_shape = f[f.keys()[0]].shape + fshape = (nfeatures, image_shape[0] * image_shape[1]) + features = np.zeros(fshape, dtype=np.float32) + + for i,k in enumerate(f.keys()): + features[i,:] = f[k][...].ravel() + + + # Predict + + out_votes = np.zeros((image_shape[0], image_shape[1], nclass), dtype=np.int32) + d_votes = gpuarray.to_gpu(out_votes) + + d_features = gpuarray.to_gpu(features) + + block = (64, 1, 1) + grid = (1024, int(fshape[1] / block[0] / 1024 + 1)) + + gpu_predict(d_features, np.int32(fshape[1]), np.int32(fshape[0]), + d_treemap, d_nodestatus, d_xbestsplit, d_bestvar, d_nodeclass, + np.int32(nclass), np.int32(ntree), d_votes, np.int32(nrnodes), + grid=grid, block=block) + + + # Save / display results + + votes = d_votes.get() + + prob_image = np.float32(votes) / ntree + + output_image_basename = file.replace(input_image_folder, output_folder) + + # for classi in range(nclass): + # output_image_file = output_image_basename.replace(input_image_suffix, '_class{0}.png'.format(classi + 1)) + # mahotas.imsave(output_image_file, np.uint8(prob_image[:,:,classi] * 255)) + + output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png') + mahotas.imsave(output_image_file, np.uint8(prob_image * 255)) + + win_0 = np.logical_and(prob_image[:,:,0] > prob_image[:,:,1], prob_image[:,:,0] > prob_image[:,:,2]) + win_2 = np.logical_and(prob_image[:,:,2] > prob_image[:,:,0], prob_image[:,:,2] > prob_image[:,:,1]) + win_1 = np.logical_not(np.logical_or(win_0, win_2)) + + win_image = prob_image + win_image[:,:,0] = win_0 * 255 + win_image[:,:,1] = win_1 * 255 + win_image[:,:,2] = win_2 * 255 + + output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png') + mahotas.imsave(output_image_file, np.uint8(win_image)) + + output_path = output_image_basename.replace(input_image_suffix, '_probabilities.hdf5'); + temp_path = output_path + '_tmp' + out_hdf5 = h5py.File(temp_path, 'w') + # copy the probabilities for future use + probs_out = out_hdf5.create_dataset('probabilities', + data = prob_image, + chunks = (64,64,1), + compression = 'gzip') + out_hdf5.close() + + if os.path.exists(output_path): + os.unlink(output_path) + os.rename(temp_path, output_path) + + print '{0} done.'.format(file) diff --git a/ClassifyMembranes/predict_gpu_randomforest_test.py b/ClassifyMembranes/predict_gpu_randomforest_test.py new file mode 100644 index 0000000..c604392 --- /dev/null +++ b/ClassifyMembranes/predict_gpu_randomforest_test.py @@ -0,0 +1,168 @@ +############################################################ +# GPU Implementation of Random Forest Classifier - Training +# v0.1 +# Seymour Knowles-Barley +############################################################ +# Based on c code from: +# http://code.google.com/p/randomforest-matlab/ +# License: GPLv2 +############################################################ + +import numpy as np +import os +import sys +import h5py +import glob +import mahotas + +import pycuda.autoinit +import pycuda.driver as cu +import pycuda.compiler as nvcc +import pycuda.gpuarray as gpuarray + +gpu_randomforest_predict_source = """ +#define NODE_TERMINAL -1 +#define NODE_TOSPLIT -2 +#define NODE_INTERIOR -3 + +__global__ void predictKernel(const float *x, int n, int mdim, const int *treemap, + const int *nodestatus, const float *xbestsplit, + const int *bestvar, const int *nodeclass, + int nclass, + int ntree, int *countts, int maxTreeSize) + //int *jts, + //int *nodex, +{ + int idx = threadIdx.x + blockDim.x * (blockIdx.x + gridDim.x * blockIdx.y); + + //Make sure we don't overrun + if (idx < n) { + int m, k, treei, treeOffset1, treeOffset2; + + //Repeat for each tree - this way only one thread writes to any point in the vote output array + + for (treei = 0; treei < ntree; ++treei) { + //for (treei = 0; treei < ntree; ++treei) { + treeOffset1 = treei*maxTreeSize; + treeOffset2 = treei*2*maxTreeSize; + k = 0; + + while (nodestatus[treeOffset1 + k] != NODE_TERMINAL) { + m = bestvar[treeOffset1 + k] - 1; + //Split by a numerical predictor + k = (x[idx + n * m] <= xbestsplit[treeOffset1 + k]) ? + treemap[treeOffset2 + k * 2] - 1 : treemap[treeOffset2 + 1 + k * 2] - 1; + } + //We found the terminal node: assign class label + //jts[chunki + treei] = nodeclass[treeOffset + k]; + //nodex[chunki + treei] = k + 1; + countts[idx * nclass + nodeclass[treeOffset1 + k] - 1] += 1; + } + } + +} +""" + +forest_file = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\rhoana_forest_3class.hdf5' +input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\all' +input_image_suffix = '_labeled.png' +input_features_suffix = '.hdf5' +output_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\output2test\\' + +# Load the forest settings + +model = h5py.File(forest_file, 'r') + + +# Prep the gpu function +gpu_predict = nvcc.SourceModule(gpu_randomforest_predict_source).get_function('predictKernel') + +d_treemap = gpuarray.to_gpu(model['/forest/treemap'][...]) +d_nodestatus = gpuarray.to_gpu(model['/forest/nodestatus'][...]) +d_xbestsplit = gpuarray.to_gpu(model['/forest/xbestsplit'][...]) +d_bestvar = gpuarray.to_gpu(model['/forest/bestvar'][...]) +d_nodeclass = gpuarray.to_gpu(model['/forest/nodeclass'][...]) + +nrnodes = model['/forest/nrnodes'][...]; +ntree = model['/forest/ntree'][...]; +#ntree = 1 +nclass = model['/forest/nclass'][...]; + + +files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) ) + +print 'Found {0} images to classify.'.format(len(files)) + +for file in files: + features_file = file.replace(input_image_suffix, input_features_suffix) + + # Load the features + f = h5py.File(features_file, 'r') + + nfeatures = len(f.keys()) + image_shape = f[f.keys()[0]].shape + fshape = (nfeatures, image_shape[0] * image_shape[1]) + features = np.zeros(fshape, dtype=np.float32) + + for i,k in enumerate(f.keys()): + features[i,:] = f[k][...].ravel() + + + # Predict + + out_votes = np.zeros((image_shape[0], image_shape[1], nclass), dtype=np.int32) + d_votes = gpuarray.to_gpu(out_votes) + + d_features = gpuarray.to_gpu(features) + + block = (64, 1, 1) + grid = (1024, int(fshape[1] / block[0] / 1024 + 1)) + + gpu_predict(d_features, np.int32(fshape[1]), np.int32(fshape[0]), + d_treemap, d_nodestatus, d_xbestsplit, d_bestvar, d_nodeclass, + np.int32(nclass), np.int32(ntree), d_votes, np.int32(nrnodes), + grid=grid, block=block) + + + # Save / display results + + votes = d_votes.get() + + prob_image = np.float32(votes) / ntree + + output_image_basename = file.replace(input_image_folder, output_folder) + + # for classi in range(nclass): + # output_image_file = output_image_basename.replace(input_image_suffix, '_class{0}.png'.format(classi + 1)) + # mahotas.imsave(output_image_file, np.uint8(prob_image[:,:,classi] * 255)) + + output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png') + mahotas.imsave(output_image_file, np.uint8(prob_image * 255)) + + win_0 = np.logical_and(prob_image[:,:,0] > prob_image[:,:,1], prob_image[:,:,0] > prob_image[:,:,2]) + win_2 = np.logical_and(prob_image[:,:,2] > prob_image[:,:,0], prob_image[:,:,2] > prob_image[:,:,1]) + win_1 = np.logical_not(np.logical_or(win_0, win_2)) + + win_image = prob_image + win_image[:,:,0] = win_0 * 255 + win_image[:,:,1] = win_1 * 255 + win_image[:,:,2] = win_2 * 255 + + output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png') + mahotas.imsave(output_image_file, np.uint8(win_image)) + + output_path = output_image_basename.replace(input_image_suffix, '_probabilities.hdf5'); + temp_path = output_path + '_tmp' + out_hdf5 = h5py.File(temp_path, 'w') + # copy the probabilities for future use + probs_out = out_hdf5.create_dataset('probabilities', + data = prob_image, + chunks = (64,64,1), + compression = 'gzip') + out_hdf5.close() + + if os.path.exists(output_path): + os.unlink(output_path) + os.rename(temp_path, output_path) + + print '{0} done.'.format(file) diff --git a/ClassifyMembranes/predict_pyv_randomforest.py b/ClassifyMembranes/predict_pyv_randomforest.py new file mode 100644 index 0000000..2a14493 --- /dev/null +++ b/ClassifyMembranes/predict_pyv_randomforest.py @@ -0,0 +1,127 @@ +############################################################ +# GPU Implementation of Random Forest Classifier - Training +# v0.1 +# Seymour Knowles-Barley +############################################################ +# Based on c code from: +# http://code.google.com/p/randomforest-matlab/ +# License: GPLv2 +############################################################ + +import numpy as np +import os +import sys +import h5py +import glob +import mahotas + +forest_file = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\rhoana_forest_3class.hdf5' +input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\all' +input_image_suffix = '_labeled.png' +input_features_suffix = '.hdf5' +output_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\output2pyv\\' + +NODE_TERMINAL = -1 +NODE_TOSPLIT = -2 +NODE_INTERIOR = -3 + +# Load the forest settings + +model = h5py.File(forest_file, 'r') + +treemap = model['/forest/treemap'][...] +nodestatus = model['/forest/nodestatus'][...] +xbestsplit = model['/forest/xbestsplit'][...] +bestvar = model['/forest/bestvar'][...] +nodeclass = model['/forest/nodeclass'][...] + +nrnodes = model['/forest/nrnodes'][...]; +ntree = model['/forest/ntree'][...]; +nclass = model['/forest/nclass'][...]; + + +files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) ) + +print 'Found {0} images to classify.'.format(len(files)) + +for file in files: + features_file = file.replace(input_image_suffix, input_features_suffix) + + # Load the features + f = h5py.File(features_file, 'r') + + nfeatures = len(f.keys()) + image_shape = f[f.keys()[0]].shape + npix = image_shape[0] * image_shape[1] + fshape = (nfeatures, npix) + features = np.zeros(fshape, dtype=np.float32) + + for i,k in enumerate(f.keys()): + features[i,:] = f[k][...].ravel() + + + # Predict + + votes = np.zeros((npix, nclass), dtype=np.int32) + + k = np.zeros((ntree, npix), dtype=np.int32) + + alltreei = np.reshape(np.repeat(np.arange(512), npix), (ntree,npix)) + + (treei, pixi) = np.nonzero(nodestatus[alltreei, k] != NODE_TERMINAL) + + while len(treei) > 0: + m = bestvar[treei, k[treei, pixi]] - 1 + choice = 1 * (features[m, pixi] > xbestsplit[treei, k[treei, pixi]]) + #Split by a numerical predictor + k[treei, pixi] = treemap[treei * 2, k[treei, pixi] * 2 + choice] - 1 + (treei, pixi) = np.nonzero(nodestatus[alltreei, k] != NODE_TERMINAL) + print "{0} non terminal nodes.".format(len(treei)) + + #We found the terminal node: assign class label + #jts[chunki + treei] = nodeclass[treeOffset + k] + #nodex[chunki + treei] = k + 1 + cast_votes = nodeclass[alltreei, k] - 1 + for classi in range(nclass): + votes[:,classi] = np.sum(cast_votes == classi, 0) + + # Save / display results + + prob_image = np.reshape(np.float32(votes) / ntree, (image_shape[0], image_shape[1], nclass)) + + output_image_basename = file.replace(input_image_folder, output_folder) + + # for classi in range(nclass): + # output_image_file = output_image_basename.replace(input_image_suffix, '_class{0}.png'.format(classi + 1)) + # mahotas.imsave(output_image_file, np.uint8(prob_image[:,:,classi] * 255)) + + output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png') + mahotas.imsave(output_image_file, np.uint8(prob_image * 255)) + + win_0 = np.logical_and(prob_image[:,:,0] > prob_image[:,:,1], prob_image[:,:,0] > prob_image[:,:,2]) + win_2 = np.logical_and(prob_image[:,:,2] > prob_image[:,:,0], prob_image[:,:,2] > prob_image[:,:,1]) + win_1 = np.logical_not(np.logical_or(win_0, win_2)) + + win_image = prob_image + win_image[:,:,0] = win_0 * 255 + win_image[:,:,1] = win_1 * 255 + win_image[:,:,2] = win_2 * 255 + + output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png') + mahotas.imsave(output_image_file, np.uint8(win_image)) + + output_path = output_image_basename.replace(input_image_suffix, '_probabilities.hdf5'); + temp_path = output_path + '_tmp' + out_hdf5 = h5py.File(temp_path, 'w') + # copy the probabilities for future use + probs_out = out_hdf5.create_dataset('probabilities', + data = prob_image, + chunks = (64,64,1), + compression = 'gzip') + out_hdf5.close() + + if os.path.exists(output_path): + os.unlink(output_path) + os.rename(temp_path, output_path) + + print '{0} done.'.format(file) diff --git a/ClassifyMembranes/rf_classify.html b/ClassifyMembranes/rf_classify.html new file mode 100644 index 0000000..cc1560e --- /dev/null +++ b/ClassifyMembranes/rf_classify.html @@ -0,0 +1,664 @@ + + + + + + + + + +

Generated by Cython 0.19.1 on Fri Aug 02 12:09:04 2013 +

Raw output: rf_classify.c +

 1: #cython: boundscheck=False
+
+  /* "rf_classify.pyx":1
+ * #cython: boundscheck=False             # <<<<<<<<<<<<<<
+ * #cython: wraparound=False
+ * 
+ */
+  __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(((PyObject *)__pyx_t_1));
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s____test__, ((PyObject *)__pyx_t_1)) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+
 2: #cython: wraparound=False
+
 3: 
+
 4: import numpy as np
+
+  /* "rf_classify.pyx":4
+ * #cython: wraparound=False
+ * 
+ * import numpy as np             # <<<<<<<<<<<<<<
+ * cimport numpy as np
+ * 
+ */
+  __pyx_t_1 = __Pyx_Import(((PyObject *)__pyx_n_s__numpy), 0, -1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s__np, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
 5: cimport numpy as np
+
 6: 
+
 7: cpdef rf_classify(model, float[:,:] features):
+
+/* "rf_classify.pyx":7
+ * cimport numpy as np
+ * 
+ * cpdef rf_classify(model, float[:,:] features):             # <<<<<<<<<<<<<<
+ * 
+ *     cdef int NODE_TERMINAL, nfeatures, npix, treei, k, m, choice, vote_class, nrnodes, ntree, nclass, pixi
+ */
+
+static PyObject *__pyx_pw_11rf_classify_1rf_classify(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static PyObject *__pyx_f_11rf_classify_rf_classify(PyObject *__pyx_v_model, __Pyx_memviewslice __pyx_v_features, CYTHON_UNUSED int __pyx_skip_dispatch) {
+  int __pyx_v_NODE_TERMINAL;
+  int __pyx_v_nfeatures;
+  int __pyx_v_npix;
+  int __pyx_v_treei;
+  int __pyx_v_k;
+  int __pyx_v_m;
+  int __pyx_v_choice;
+  int __pyx_v_vote_class;
+  CYTHON_UNUSED int __pyx_v_nrnodes;
+  int __pyx_v_ntree;
+  int __pyx_v_nclass;
+  int __pyx_v_pixi;
+  __Pyx_memviewslice __pyx_v_treemap = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_nodestatus = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_bestvar = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_nodeclass = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_votes = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_xbestsplit = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_pixel_features = { 0, 0, { 0 }, { 0 }, { 0 } };
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("rf_classify", 0);
+
+/* "rf_classify.pyx":7
+ * cimport numpy as np
+ * 
+ * cpdef rf_classify(model, float[:,:] features):             # <<<<<<<<<<<<<<
+ * 
+ *     cdef int NODE_TERMINAL, nfeatures, npix, treei, k, m, choice, vote_class, nrnodes, ntree, nclass, pixi
+ */
+
+static PyObject *__pyx_pf_11rf_classify_rf_classify(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, __Pyx_memviewslice __pyx_v_features) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("rf_classify", 0);
+  __Pyx_XDECREF(__pyx_r);
+  if (unlikely(!__pyx_v_features.memview)) { __Pyx_RaiseUnboundLocalError("features"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L1_error;} }
+  __pyx_t_1 = __pyx_f_11rf_classify_rf_classify(__pyx_v_model, __pyx_v_features, 0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("rf_classify.rf_classify", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __PYX_XDEC_MEMVIEW(&__pyx_v_features, 1);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static CYTHON_UNUSED int __pyx_pw_5numpy_7ndarray_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/
+static CYTHON_UNUSED int __pyx_pw_5numpy_7ndarray_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__getbuffer__ (wrapper)", 0);
+  __pyx_r = __pyx_pf_5numpy_7ndarray___getbuffer__(((PyArrayObject *)__pyx_v_self), ((Py_buffer *)__pyx_v_info), ((int)__pyx_v_flags));
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
 8: 
+
 9:     cdef int NODE_TERMINAL, nfeatures, npix, treei, k, m, choice, vote_class, nrnodes, ntree, nclass, pixi
+
 10:     cdef int [:,:] treemap, nodestatus, bestvar, nodeclass, votes
+
 11:     cdef float [:,:] xbestsplit
+
 12:     cdef float[:] pixel_features
+
 13: 
+
 14:     NODE_TERMINAL = -1
+
+  /* "rf_classify.pyx":14
+ *     cdef float[:] pixel_features
+ * 
+ *     NODE_TERMINAL = -1             # <<<<<<<<<<<<<<
+ *     #NODE_TOSPLIT  = -2
+ *     #NODE_INTERIOR = -3
+ */
+  __pyx_v_NODE_TERMINAL = -1;
+
 15:     #NODE_TOSPLIT  = -2
+
 16:     #NODE_INTERIOR = -3
+
 17: 
+
 18:     nfeatures = features.shape[0]
+
+  /* "rf_classify.pyx":18
+ *     #NODE_INTERIOR = -3
+ * 
+ *     nfeatures = features.shape[0]             # <<<<<<<<<<<<<<
+ *     npix = features.shape[1]
+ * 
+ */
+  __pyx_v_nfeatures = (__pyx_v_features.shape[0]);
+
 19:     npix = features.shape[1]
+
+  /* "rf_classify.pyx":19
+ * 
+ *     nfeatures = features.shape[0]
+ *     npix = features.shape[1]             # <<<<<<<<<<<<<<
+ * 
+ *     treemap = model.treemap
+ */
+  __pyx_v_npix = (__pyx_v_features.shape[1]);
+
 20: 
+
 21:     treemap = model.treemap
+
+  /* "rf_classify.pyx":21
+ *     npix = features.shape[1]
+ * 
+ *     treemap = model.treemap             # <<<<<<<<<<<<<<
+ *     nodestatus = model.nodestatus
+ *     xbestsplit = model.xbestsplit
+ */
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s__treemap); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyObject_to_MemoryviewSlice_dsds_int(__pyx_t_1);
+  if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_treemap = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+
 22:     nodestatus = model.nodestatus
+
+  /* "rf_classify.pyx":22
+ * 
+ *     treemap = model.treemap
+ *     nodestatus = model.nodestatus             # <<<<<<<<<<<<<<
+ *     xbestsplit = model.xbestsplit
+ *     bestvar = model.bestvar
+ */
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s__nodestatus); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyObject_to_MemoryviewSlice_dsds_int(__pyx_t_1);
+  if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_nodestatus = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+
 23:     xbestsplit = model.xbestsplit
+
+  /* "rf_classify.pyx":23
+ *     treemap = model.treemap
+ *     nodestatus = model.nodestatus
+ *     xbestsplit = model.xbestsplit             # <<<<<<<<<<<<<<
+ *     bestvar = model.bestvar
+ *     nodeclass = model.nodeclass
+ */
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s__xbestsplit); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_3 = __Pyx_PyObject_to_MemoryviewSlice_dsds_float(__pyx_t_1);
+  if (unlikely(!__pyx_t_3.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_xbestsplit = __pyx_t_3;
+  __pyx_t_3.memview = NULL;
+  __pyx_t_3.data = NULL;
+
 24:     bestvar = model.bestvar
+
+  /* "rf_classify.pyx":24
+ *     nodestatus = model.nodestatus
+ *     xbestsplit = model.xbestsplit
+ *     bestvar = model.bestvar             # <<<<<<<<<<<<<<
+ *     nodeclass = model.nodeclass
+ * 
+ */
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s__bestvar); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyObject_to_MemoryviewSlice_dsds_int(__pyx_t_1);
+  if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_bestvar = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+
 25:     nodeclass = model.nodeclass
+
+  /* "rf_classify.pyx":25
+ *     xbestsplit = model.xbestsplit
+ *     bestvar = model.bestvar
+ *     nodeclass = model.nodeclass             # <<<<<<<<<<<<<<
+ * 
+ *     nrnodes = model.nrnodes
+ */
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s__nodeclass); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyObject_to_MemoryviewSlice_dsds_int(__pyx_t_1);
+  if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_nodeclass = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+
 26: 
+
 27:     nrnodes = model.nrnodes
+
+  /* "rf_classify.pyx":27
+ *     nodeclass = model.nodeclass
+ * 
+ *     nrnodes = model.nrnodes             # <<<<<<<<<<<<<<
+ *     ntree = model.ntree
+ *     nclass = model.nclass
+ */
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s__nrnodes); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_t_1); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_nrnodes = __pyx_t_4;
+
 28:     ntree = model.ntree
+
+  /* "rf_classify.pyx":28
+ * 
+ *     nrnodes = model.nrnodes
+ *     ntree = model.ntree             # <<<<<<<<<<<<<<
+ *     nclass = model.nclass
+ * 
+ */
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s__ntree); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_t_1); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_ntree = __pyx_t_4;
+
 29:     nclass = model.nclass
+
+  /* "rf_classify.pyx":29
+ *     nrnodes = model.nrnodes
+ *     ntree = model.ntree
+ *     nclass = model.nclass             # <<<<<<<<<<<<<<
+ * 
+ *     # Predict
+ */
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s__nclass); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_t_1); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_nclass = __pyx_t_4;
+
 30: 
+
 31:     # Predict
+
 32:     votes = np.zeros((npix, nclass), dtype=np.int32)
+
+  /* "rf_classify.pyx":32
+ * 
+ *     # Predict
+ *     votes = np.zeros((npix, nclass), dtype=np.int32)             # <<<<<<<<<<<<<<
+ *     pixel_features = np.zeros(nfeatures, dtype=np.float32)
+ * 
+ */
+  __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s__np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s__zeros); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = PyInt_FromLong(__pyx_v_npix); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_6 = PyInt_FromLong(__pyx_v_nclass); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_6);
+  __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_7);
+  PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_1);
+  __Pyx_GIVEREF(__pyx_t_1);
+  PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_t_6);
+  __Pyx_GIVEREF(__pyx_t_6);
+  __pyx_t_1 = 0;
+  __pyx_t_6 = 0;
+  __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_6);
+  PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_t_7));
+  __Pyx_GIVEREF(((PyObject *)__pyx_t_7));
+  __pyx_t_7 = 0;
+  __pyx_t_7 = PyDict_New(); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(((PyObject *)__pyx_t_7));
+  __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s__np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s__int32); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_8);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  if (PyDict_SetItem(__pyx_t_7, ((PyObject *)__pyx_n_s__dtype), __pyx_t_8) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+  __pyx_t_8 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_t_6), ((PyObject *)__pyx_t_7)); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_8);
+  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0;
+  __Pyx_DECREF(((PyObject *)__pyx_t_7)); __pyx_t_7 = 0;
+  __pyx_t_2 = __Pyx_PyObject_to_MemoryviewSlice_dsds_int(__pyx_t_8);
+  if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+  __pyx_v_votes = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+
 33:     pixel_features = np.zeros(nfeatures, dtype=np.float32)
+
+  /* "rf_classify.pyx":33
+ *     # Predict
+ *     votes = np.zeros((npix, nclass), dtype=np.int32)
+ *     pixel_features = np.zeros(nfeatures, dtype=np.float32)             # <<<<<<<<<<<<<<
+ * 
+ *     for pixi in range(npix):
+ */
+  __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s__np); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_8);
+  __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s__zeros); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_7);
+  __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+  __pyx_t_8 = PyInt_FromLong(__pyx_v_nfeatures); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_8);
+  __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_6);
+  PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_8);
+  __Pyx_GIVEREF(__pyx_t_8);
+  __pyx_t_8 = 0;
+  __pyx_t_8 = PyDict_New(); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(((PyObject *)__pyx_t_8));
+  __pyx_t_5 = __Pyx_GetModuleGlobalName(__pyx_n_s__np); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_5);
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s__float32); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  if (PyDict_SetItem(__pyx_t_8, ((PyObject *)__pyx_n_s__dtype), __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = PyObject_Call(__pyx_t_7, ((PyObject *)__pyx_t_6), ((PyObject *)__pyx_t_8)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+  __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0;
+  __Pyx_DECREF(((PyObject *)__pyx_t_8)); __pyx_t_8 = 0;
+  __pyx_t_9 = __Pyx_PyObject_to_MemoryviewSlice_ds_float(__pyx_t_1);
+  if (unlikely(!__pyx_t_9.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_pixel_features = __pyx_t_9;
+  __pyx_t_9.memview = NULL;
+  __pyx_t_9.data = NULL;
+
 34: 
+
 35:     for pixi in range(npix):
+
+  /* "rf_classify.pyx":35
+ *     pixel_features = np.zeros(nfeatures, dtype=np.float32)
+ * 
+ *     for pixi in range(npix):             # <<<<<<<<<<<<<<
+ * 
+ *         pixel_features[...] = features[:,pixi]
+ */
+  __pyx_t_4 = __pyx_v_npix;
+  for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_4; __pyx_t_10+=1) {
+    __pyx_v_pixi = __pyx_t_10;
+
 36: 
+
 37:         pixel_features[...] = features[:,pixi]
+
+    /* "rf_classify.pyx":37
+ *     for pixi in range(npix):
+ * 
+ *         pixel_features[...] = features[:,pixi]             # <<<<<<<<<<<<<<
+ * 
+ *         for treei in range(ntree):
+ */
+    __pyx_t_12 = -1;
+    __pyx_t_11.data = __pyx_v_features.data;
+    __pyx_t_11.memview = __pyx_v_features.memview;
+    __PYX_INC_MEMVIEW(&__pyx_t_11, 0);
+    __pyx_t_11.shape[0] = __pyx_v_features.shape[0];
+__pyx_t_11.strides[0] = __pyx_v_features.strides[0];
+    __pyx_t_11.suboffsets[0] = -1;
+
+{
+    Py_ssize_t __pyx_tmp_idx = __pyx_v_pixi;
+    Py_ssize_t __pyx_tmp_shape = __pyx_v_features.shape[1];
+    Py_ssize_t __pyx_tmp_stride = __pyx_v_features.strides[1];
+    if (0 && (__pyx_tmp_idx < 0))
+        __pyx_tmp_idx += __pyx_tmp_shape;
+    if (0 && (__pyx_tmp_idx < 0 || __pyx_tmp_idx >= __pyx_tmp_shape)) {
+        PyErr_SetString(PyExc_IndexError, "Index out of bounds (axis 1)");
+        {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    }
+        __pyx_t_11.data += __pyx_tmp_idx * __pyx_tmp_stride;
+}
+
+if (unlikely(__pyx_memoryview_copy_contents(__pyx_t_11, __pyx_v_pixel_features, 1, 1, 0) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __PYX_XDEC_MEMVIEW(&__pyx_t_11, 1);
+
 38: 
+
 39:         for treei in range(ntree):
+
+    /* "rf_classify.pyx":39
+ *         pixel_features[...] = features[:,pixi]
+ * 
+ *         for treei in range(ntree):             # <<<<<<<<<<<<<<
+ * 
+ *             k = 0
+ */
+    __pyx_t_12 = __pyx_v_ntree;
+    for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_12; __pyx_t_13+=1) {
+      __pyx_v_treei = __pyx_t_13;
+
 40: 
+
 41:             k = 0
+
+      /* "rf_classify.pyx":41
+ *         for treei in range(ntree):
+ * 
+ *             k = 0             # <<<<<<<<<<<<<<
+ *             while nodestatus[treei, k] != NODE_TERMINAL:
+ *                 m = bestvar[treei, k] - 1
+ */
+      __pyx_v_k = 0;
+
 42:             while nodestatus[treei, k] != NODE_TERMINAL:
+
+      /* "rf_classify.pyx":42
+ * 
+ *             k = 0
+ *             while nodestatus[treei, k] != NODE_TERMINAL:             # <<<<<<<<<<<<<<
+ *                 m = bestvar[treei, k] - 1
+ *                 #Split by a numerical predictor
+ */
+      while (1) {
+        __pyx_t_14 = __pyx_v_treei;
+        __pyx_t_15 = __pyx_v_k;
+        __pyx_t_16 = (((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_nodestatus.data + __pyx_t_14 * __pyx_v_nodestatus.strides[0]) ) + __pyx_t_15 * __pyx_v_nodestatus.strides[1]) ))) != __pyx_v_NODE_TERMINAL) != 0);
+        if (!__pyx_t_16) break;
+
 43:                 m = bestvar[treei, k] - 1
+
+        /* "rf_classify.pyx":43
+ *             k = 0
+ *             while nodestatus[treei, k] != NODE_TERMINAL:
+ *                 m = bestvar[treei, k] - 1             # <<<<<<<<<<<<<<
+ *                 #Split by a numerical predictor
+ *                 choice = 1 * (pixel_features[m] > xbestsplit[treei, k])
+ */
+        __pyx_t_17 = __pyx_v_treei;
+        __pyx_t_18 = __pyx_v_k;
+        __pyx_v_m = ((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_bestvar.data + __pyx_t_17 * __pyx_v_bestvar.strides[0]) ) + __pyx_t_18 * __pyx_v_bestvar.strides[1]) ))) - 1);
+
 44:                 #Split by a numerical predictor
+
 45:                 choice = 1 * (pixel_features[m] > xbestsplit[treei, k])
+
+        /* "rf_classify.pyx":45
+ *                 m = bestvar[treei, k] - 1
+ *                 #Split by a numerical predictor
+ *                 choice = 1 * (pixel_features[m] > xbestsplit[treei, k])             # <<<<<<<<<<<<<<
+ *                 k = treemap[treei * 2, k * 2 + choice] - 1
+ * 
+ */
+        __pyx_t_19 = __pyx_v_m;
+        __pyx_t_20 = __pyx_v_treei;
+        __pyx_t_21 = __pyx_v_k;
+        __pyx_v_choice = (1 * ((*((float *) ( /* dim=0 */ (__pyx_v_pixel_features.data + __pyx_t_19 * __pyx_v_pixel_features.strides[0]) ))) > (*((float *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_xbestsplit.data + __pyx_t_20 * __pyx_v_xbestsplit.strides[0]) ) + __pyx_t_21 * __pyx_v_xbestsplit.strides[1]) )))));
+
 46:                 k = treemap[treei * 2, k * 2 + choice] - 1
+
+        /* "rf_classify.pyx":46
+ *                 #Split by a numerical predictor
+ *                 choice = 1 * (pixel_features[m] > xbestsplit[treei, k])
+ *                 k = treemap[treei * 2, k * 2 + choice] - 1             # <<<<<<<<<<<<<<
+ * 
+ *             #We found the terminal node: assign class label
+ */
+        __pyx_t_22 = (__pyx_v_treei * 2);
+        __pyx_t_23 = ((__pyx_v_k * 2) + __pyx_v_choice);
+        __pyx_v_k = ((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_treemap.data + __pyx_t_22 * __pyx_v_treemap.strides[0]) ) + __pyx_t_23 * __pyx_v_treemap.strides[1]) ))) - 1);
+      }
+
 47: 
+
 48:             #We found the terminal node: assign class label
+
 49:             vote_class = nodeclass[treei, k] - 1
+
+      /* "rf_classify.pyx":49
+ * 
+ *             #We found the terminal node: assign class label
+ *             vote_class = nodeclass[treei, k] - 1             # <<<<<<<<<<<<<<
+ *             votes[pixi,vote_class] = votes[pixi,vote_class] + 1
+ * 
+ */
+      __pyx_t_24 = __pyx_v_treei;
+      __pyx_t_25 = __pyx_v_k;
+      __pyx_v_vote_class = ((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_nodeclass.data + __pyx_t_24 * __pyx_v_nodeclass.strides[0]) ) + __pyx_t_25 * __pyx_v_nodeclass.strides[1]) ))) - 1);
+
 50:             votes[pixi,vote_class] = votes[pixi,vote_class] + 1
+
+      /* "rf_classify.pyx":50
+ *             #We found the terminal node: assign class label
+ *             vote_class = nodeclass[treei, k] - 1
+ *             votes[pixi,vote_class] = votes[pixi,vote_class] + 1             # <<<<<<<<<<<<<<
+ * 
+ *     return votes
+ */
+      __pyx_t_26 = __pyx_v_pixi;
+      __pyx_t_27 = __pyx_v_vote_class;
+      __pyx_t_28 = __pyx_v_pixi;
+      __pyx_t_29 = __pyx_v_vote_class;
+      *((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_votes.data + __pyx_t_28 * __pyx_v_votes.strides[0]) ) + __pyx_t_29 * __pyx_v_votes.strides[1]) )) = ((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_votes.data + __pyx_t_26 * __pyx_v_votes.strides[0]) ) + __pyx_t_27 * __pyx_v_votes.strides[1]) ))) + 1);
+    }
+  }
+
 51: 
+
 52:     return votes
+
+  /* "rf_classify.pyx":52
+ *             votes[pixi,vote_class] = votes[pixi,vote_class] + 1
+ * 
+ *     return votes             # <<<<<<<<<<<<<<
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = __pyx_memoryview_fromslice(__pyx_v_votes, 2, (PyObject *(*)(char *)) __pyx_memview_get_int, (int (*)(char *, PyObject *)) __pyx_memview_set_int, 0);; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_2, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_3, 1);
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_7);
+  __Pyx_XDECREF(__pyx_t_8);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_9, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_11, 1);
+  __Pyx_AddTraceback("rf_classify.rf_classify", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __PYX_XDEC_MEMVIEW(&__pyx_v_treemap, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_nodestatus, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_bestvar, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_nodeclass, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_votes, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_xbestsplit, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_pixel_features, 1);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_11rf_classify_1rf_classify(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static PyObject *__pyx_pw_11rf_classify_1rf_classify(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+  PyObject *__pyx_v_model = 0;
+  __Pyx_memviewslice __pyx_v_features = { 0, 0, { 0 }, { 0 }, { 0 } };
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("rf_classify (wrapper)", 0);
+  {
+    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__model,&__pyx_n_s__features,0};
+    PyObject* values[2] = {0,0};
+    if (unlikely(__pyx_kwds)) {
+      Py_ssize_t kw_args;
+      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+      switch (pos_args) {
+        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+        case  0: break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+      kw_args = PyDict_Size(__pyx_kwds);
+      switch (pos_args) {
+        case  0:
+        if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__model)) != 0)) kw_args--;
+        else goto __pyx_L5_argtuple_error;
+        case  1:
+        if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__features)) != 0)) kw_args--;
+        else {
+          __Pyx_RaiseArgtupleInvalid("rf_classify", 1, 2, 2, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+        }
+      }
+      if (unlikely(kw_args > 0)) {
+        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "rf_classify") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+      }
+    } else if (PyTuple_GET_SIZE(__pyx_args) != 2) {
+      goto __pyx_L5_argtuple_error;
+    } else {
+      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+    }
+    __pyx_v_model = values[0];
+    __pyx_v_features = __Pyx_PyObject_to_MemoryviewSlice_dsds_float(values[1]); if (unlikely(!__pyx_v_features.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+  }
+  goto __pyx_L4_argument_unpacking_done;
+  __pyx_L5_argtuple_error:;
+  __Pyx_RaiseArgtupleInvalid("rf_classify", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+  __pyx_L3_error:;
+  __Pyx_AddTraceback("rf_classify.rf_classify", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __Pyx_RefNannyFinishContext();
+  return NULL;
+  __pyx_L4_argument_unpacking_done:;
+  __pyx_r = __pyx_pf_11rf_classify_rf_classify(__pyx_self, __pyx_v_model, __pyx_v_features);
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
diff --git a/ClassifyMembranes/rf_classify_parallel.html b/ClassifyMembranes/rf_classify_parallel.html new file mode 100644 index 0000000..6610e14 --- /dev/null +++ b/ClassifyMembranes/rf_classify_parallel.html @@ -0,0 +1,730 @@ + + + + + + + + + +

Generated by Cython 0.19.1 on Fri Aug 02 15:04:08 2013 +

Raw output: rf_classify_parallel.c +

 1: #cython: boundscheck=False
+
+  /* "rf_classify_parallel.pyx":1
+ * #cython: boundscheck=False             # <<<<<<<<<<<<<<
+ * #cython: wraparound=False
+ * 
+ */
+  __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(((PyObject *)__pyx_t_1));
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s____test__, ((PyObject *)__pyx_t_1)) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+
 2: #cython: wraparound=False
+
 3: 
+
 4: import numpy as np
+
+  /* "rf_classify_parallel.pyx":4
+ * #cython: wraparound=False
+ * 
+ * import numpy as np             # <<<<<<<<<<<<<<
+ * cimport numpy as np
+ * from cython.parallel cimport prange
+ */
+  __pyx_t_1 = __Pyx_Import(((PyObject *)__pyx_n_s__numpy), 0, -1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s__np, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
 5: cimport numpy as np
+
 6: from cython.parallel cimport prange
+
 7: 
+
 8: cpdef rf_classify(model, float[:,:] features):
+
+/* "rf_classify_parallel.pyx":8
+ * from cython.parallel cimport prange
+ * 
+ * cpdef rf_classify(model, float[:,:] features):             # <<<<<<<<<<<<<<
+ * 
+ *     cdef int NODE_TERMINAL, nfeatures, npix, treei, k, m, choice, vote_class, nrnodes, ntree, nclass, pixi
+ */
+
+static PyObject *__pyx_pw_20rf_classify_parallel_1rf_classify(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static PyObject *__pyx_f_20rf_classify_parallel_rf_classify(PyObject *__pyx_v_model, __Pyx_memviewslice __pyx_v_features, CYTHON_UNUSED int __pyx_skip_dispatch) {
+  int __pyx_v_NODE_TERMINAL;
+  int __pyx_v_nfeatures;
+  int __pyx_v_npix;
+  int __pyx_v_treei;
+  int __pyx_v_k;
+  int __pyx_v_m;
+  int __pyx_v_choice;
+  int __pyx_v_vote_class;
+  CYTHON_UNUSED int __pyx_v_nrnodes;
+  CYTHON_UNUSED int __pyx_v_ntree;
+  int __pyx_v_nclass;
+  int __pyx_v_pixi;
+  __Pyx_memviewslice __pyx_v_treemap = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_nodestatus = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_bestvar = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_nodeclass = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_votes = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_xbestsplit = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_pixel_features = { 0, 0, { 0 }, { 0 }, { 0 } };
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("rf_classify", 0);
+
+/* "rf_classify_parallel.pyx":8
+ * from cython.parallel cimport prange
+ * 
+ * cpdef rf_classify(model, float[:,:] features):             # <<<<<<<<<<<<<<
+ * 
+ *     cdef int NODE_TERMINAL, nfeatures, npix, treei, k, m, choice, vote_class, nrnodes, ntree, nclass, pixi
+ */
+
+static PyObject *__pyx_pf_20rf_classify_parallel_rf_classify(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, __Pyx_memviewslice __pyx_v_features) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("rf_classify", 0);
+  __Pyx_XDECREF(__pyx_r);
+  if (unlikely(!__pyx_v_features.memview)) { __Pyx_RaiseUnboundLocalError("features"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L1_error;} }
+  __pyx_t_1 = __pyx_f_20rf_classify_parallel_rf_classify(__pyx_v_model, __pyx_v_features, 0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("rf_classify_parallel.rf_classify", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __PYX_XDEC_MEMVIEW(&__pyx_v_features, 1);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static CYTHON_UNUSED int __pyx_pw_5numpy_7ndarray_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/
+static CYTHON_UNUSED int __pyx_pw_5numpy_7ndarray_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__getbuffer__ (wrapper)", 0);
+  __pyx_r = __pyx_pf_5numpy_7ndarray___getbuffer__(((PyArrayObject *)__pyx_v_self), ((Py_buffer *)__pyx_v_info), ((int)__pyx_v_flags));
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
 9: 
+
 10:     cdef int NODE_TERMINAL, nfeatures, npix, treei, k, m, choice, vote_class, nrnodes, ntree, nclass, pixi
+
 11:     cdef int [:,:] treemap, nodestatus, bestvar, nodeclass, votes
+
 12:     cdef float [:,:] xbestsplit
+
 13:     cdef float[:] pixel_features
+
 14: 
+
 15:     NODE_TERMINAL = -1
+
+  /* "rf_classify_parallel.pyx":15
+ *     cdef float[:] pixel_features
+ * 
+ *     NODE_TERMINAL = -1             # <<<<<<<<<<<<<<
+ *     #NODE_TOSPLIT  = -2
+ *     #NODE_INTERIOR = -3
+ */
+  __pyx_v_NODE_TERMINAL = -1;
+
 16:     #NODE_TOSPLIT  = -2
+
 17:     #NODE_INTERIOR = -3
+
 18: 
+
 19:     nfeatures = features.shape[0]
+
+  /* "rf_classify_parallel.pyx":19
+ *     #NODE_INTERIOR = -3
+ * 
+ *     nfeatures = features.shape[0]             # <<<<<<<<<<<<<<
+ *     npix = features.shape[1]
+ * 
+ */
+  __pyx_v_nfeatures = (__pyx_v_features.shape[0]);
+
 20:     npix = features.shape[1]
+
+  /* "rf_classify_parallel.pyx":20
+ * 
+ *     nfeatures = features.shape[0]
+ *     npix = features.shape[1]             # <<<<<<<<<<<<<<
+ * 
+ *     treemap = model.treemap
+ */
+  __pyx_v_npix = (__pyx_v_features.shape[1]);
+
 21: 
+
 22:     treemap = model.treemap
+
+  /* "rf_classify_parallel.pyx":22
+ *     npix = features.shape[1]
+ * 
+ *     treemap = model.treemap             # <<<<<<<<<<<<<<
+ *     nodestatus = model.nodestatus
+ *     xbestsplit = model.xbestsplit
+ */
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s__treemap); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyObject_to_MemoryviewSlice_dsds_int(__pyx_t_1);
+  if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_treemap = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+
 23:     nodestatus = model.nodestatus
+
+  /* "rf_classify_parallel.pyx":23
+ * 
+ *     treemap = model.treemap
+ *     nodestatus = model.nodestatus             # <<<<<<<<<<<<<<
+ *     xbestsplit = model.xbestsplit
+ *     bestvar = model.bestvar
+ */
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s__nodestatus); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyObject_to_MemoryviewSlice_dsds_int(__pyx_t_1);
+  if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_nodestatus = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+
 24:     xbestsplit = model.xbestsplit
+
+  /* "rf_classify_parallel.pyx":24
+ *     treemap = model.treemap
+ *     nodestatus = model.nodestatus
+ *     xbestsplit = model.xbestsplit             # <<<<<<<<<<<<<<
+ *     bestvar = model.bestvar
+ *     nodeclass = model.nodeclass
+ */
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s__xbestsplit); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_3 = __Pyx_PyObject_to_MemoryviewSlice_dsds_float(__pyx_t_1);
+  if (unlikely(!__pyx_t_3.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_xbestsplit = __pyx_t_3;
+  __pyx_t_3.memview = NULL;
+  __pyx_t_3.data = NULL;
+
 25:     bestvar = model.bestvar
+
+  /* "rf_classify_parallel.pyx":25
+ *     nodestatus = model.nodestatus
+ *     xbestsplit = model.xbestsplit
+ *     bestvar = model.bestvar             # <<<<<<<<<<<<<<
+ *     nodeclass = model.nodeclass
+ * 
+ */
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s__bestvar); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyObject_to_MemoryviewSlice_dsds_int(__pyx_t_1);
+  if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_bestvar = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+
 26:     nodeclass = model.nodeclass
+
+  /* "rf_classify_parallel.pyx":26
+ *     xbestsplit = model.xbestsplit
+ *     bestvar = model.bestvar
+ *     nodeclass = model.nodeclass             # <<<<<<<<<<<<<<
+ * 
+ *     nrnodes = model.nrnodes
+ */
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s__nodeclass); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyObject_to_MemoryviewSlice_dsds_int(__pyx_t_1);
+  if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_nodeclass = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+
 27: 
+
 28:     nrnodes = model.nrnodes
+
+  /* "rf_classify_parallel.pyx":28
+ *     nodeclass = model.nodeclass
+ * 
+ *     nrnodes = model.nrnodes             # <<<<<<<<<<<<<<
+ *     ntree = model.ntree
+ *     nclass = model.nclass
+ */
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s__nrnodes); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_t_1); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_nrnodes = __pyx_t_4;
+
 29:     ntree = model.ntree
+
+  /* "rf_classify_parallel.pyx":29
+ * 
+ *     nrnodes = model.nrnodes
+ *     ntree = model.ntree             # <<<<<<<<<<<<<<
+ *     nclass = model.nclass
+ * 
+ */
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s__ntree); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_t_1); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_ntree = __pyx_t_4;
+
 30:     nclass = model.nclass
+
+  /* "rf_classify_parallel.pyx":30
+ *     nrnodes = model.nrnodes
+ *     ntree = model.ntree
+ *     nclass = model.nclass             # <<<<<<<<<<<<<<
+ * 
+ *     # Predict
+ */
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s__nclass); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_t_1); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_nclass = __pyx_t_4;
+
 31: 
+
 32:     # Predict
+
 33:     votes = np.zeros((npix, nclass), dtype=np.int32)
+
+  /* "rf_classify_parallel.pyx":33
+ * 
+ *     # Predict
+ *     votes = np.zeros((npix, nclass), dtype=np.int32)             # <<<<<<<<<<<<<<
+ *     pixel_features = np.zeros(nfeatures, dtype=np.float32)
+ * 
+ */
+  __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s__np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s__zeros); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = PyInt_FromLong(__pyx_v_npix); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_6 = PyInt_FromLong(__pyx_v_nclass); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_6);
+  __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_7);
+  PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_1);
+  __Pyx_GIVEREF(__pyx_t_1);
+  PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_t_6);
+  __Pyx_GIVEREF(__pyx_t_6);
+  __pyx_t_1 = 0;
+  __pyx_t_6 = 0;
+  __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_6);
+  PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_t_7));
+  __Pyx_GIVEREF(((PyObject *)__pyx_t_7));
+  __pyx_t_7 = 0;
+  __pyx_t_7 = PyDict_New(); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(((PyObject *)__pyx_t_7));
+  __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s__np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s__int32); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_8);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  if (PyDict_SetItem(__pyx_t_7, ((PyObject *)__pyx_n_s__dtype), __pyx_t_8) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+  __pyx_t_8 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_t_6), ((PyObject *)__pyx_t_7)); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_8);
+  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0;
+  __Pyx_DECREF(((PyObject *)__pyx_t_7)); __pyx_t_7 = 0;
+  __pyx_t_2 = __Pyx_PyObject_to_MemoryviewSlice_dsds_int(__pyx_t_8);
+  if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+  __pyx_v_votes = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+
 34:     pixel_features = np.zeros(nfeatures, dtype=np.float32)
+
+  /* "rf_classify_parallel.pyx":34
+ *     # Predict
+ *     votes = np.zeros((npix, nclass), dtype=np.int32)
+ *     pixel_features = np.zeros(nfeatures, dtype=np.float32)             # <<<<<<<<<<<<<<
+ * 
+ *     for pixi in range(npix):
+ */
+  __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s__np); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_8);
+  __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s__zeros); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_7);
+  __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+  __pyx_t_8 = PyInt_FromLong(__pyx_v_nfeatures); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_8);
+  __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_6);
+  PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_8);
+  __Pyx_GIVEREF(__pyx_t_8);
+  __pyx_t_8 = 0;
+  __pyx_t_8 = PyDict_New(); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(((PyObject *)__pyx_t_8));
+  __pyx_t_5 = __Pyx_GetModuleGlobalName(__pyx_n_s__np); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_5);
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s__float32); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  if (PyDict_SetItem(__pyx_t_8, ((PyObject *)__pyx_n_s__dtype), __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = PyObject_Call(__pyx_t_7, ((PyObject *)__pyx_t_6), ((PyObject *)__pyx_t_8)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+  __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0;
+  __Pyx_DECREF(((PyObject *)__pyx_t_8)); __pyx_t_8 = 0;
+  __pyx_t_9 = __Pyx_PyObject_to_MemoryviewSlice_ds_float(__pyx_t_1);
+  if (unlikely(!__pyx_t_9.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_pixel_features = __pyx_t_9;
+  __pyx_t_9.memview = NULL;
+  __pyx_t_9.data = NULL;
+
 35: 
+
 36:     for pixi in range(npix):
+
+  /* "rf_classify_parallel.pyx":36
+ *     pixel_features = np.zeros(nfeatures, dtype=np.float32)
+ * 
+ *     for pixi in range(npix):             # <<<<<<<<<<<<<<
+ * 
+ *         pixel_features[...] = features[:,pixi]
+ */
+  __pyx_t_4 = __pyx_v_npix;
+  for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_4; __pyx_t_10+=1) {
+    __pyx_v_pixi = __pyx_t_10;
+
 37: 
+
 38:         pixel_features[...] = features[:,pixi]
+
+    /* "rf_classify_parallel.pyx":38
+ *     for pixi in range(npix):
+ * 
+ *         pixel_features[...] = features[:,pixi]             # <<<<<<<<<<<<<<
+ * 
+ *         with nogil:
+ */
+    __pyx_t_12 = -1;
+    __pyx_t_11.data = __pyx_v_features.data;
+    __pyx_t_11.memview = __pyx_v_features.memview;
+    __PYX_INC_MEMVIEW(&__pyx_t_11, 0);
+    __pyx_t_11.shape[0] = __pyx_v_features.shape[0];
+__pyx_t_11.strides[0] = __pyx_v_features.strides[0];
+    __pyx_t_11.suboffsets[0] = -1;
+
+{
+    Py_ssize_t __pyx_tmp_idx = __pyx_v_pixi;
+    Py_ssize_t __pyx_tmp_shape = __pyx_v_features.shape[1];
+    Py_ssize_t __pyx_tmp_stride = __pyx_v_features.strides[1];
+    if (0 && (__pyx_tmp_idx < 0))
+        __pyx_tmp_idx += __pyx_tmp_shape;
+    if (0 && (__pyx_tmp_idx < 0 || __pyx_tmp_idx >= __pyx_tmp_shape)) {
+        PyErr_SetString(PyExc_IndexError, "Index out of bounds (axis 1)");
+        {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    }
+        __pyx_t_11.data += __pyx_tmp_idx * __pyx_tmp_stride;
+}
+
+if (unlikely(__pyx_memoryview_copy_contents(__pyx_t_11, __pyx_v_pixel_features, 1, 1, 0) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+    __PYX_XDEC_MEMVIEW(&__pyx_t_11, 1);
+
 39: 
+
 40:         with nogil:
+
+    /* "rf_classify_parallel.pyx":40
+ *         pixel_features[...] = features[:,pixi]
+ * 
+ *         with nogil:             # <<<<<<<<<<<<<<
+ * 
+ *             for treei in prange(ntree, schedule='static', num_threads=16):
+ */
+    {
+        #ifdef WITH_THREAD
+        PyThreadState *_save;
+        Py_UNBLOCK_THREADS
+        #endif
+        /*try:*/ {
+
+        /* "rf_classify_parallel.pyx":40
+ *         pixel_features[...] = features[:,pixi]
+ * 
+ *         with nogil:             # <<<<<<<<<<<<<<
+ * 
+ *             for treei in prange(ntree, schedule='static', num_threads=16):
+ */
+        /*finally:*/ {
+          #ifdef WITH_THREAD
+          Py_BLOCK_THREADS
+          #endif
+        }
+    }
+  }
+
 41: 
+
 42:             for treei in prange(ntree, schedule='static', num_threads=16):
+
+          /* "rf_classify_parallel.pyx":42
+ *         with nogil:
+ * 
+ *             for treei in prange(ntree, schedule='static', num_threads=16):             # <<<<<<<<<<<<<<
+ * 
+ *                 k = 0
+ */
+          __pyx_t_12 = __pyx_v_ntree;
+          if (1 == 0) abort();
+          {
+              #if ((defined(__APPLE__) || defined(__OSX__)) && (defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))))
+                  #undef likely
+                  #undef unlikely
+                  #define likely(x)   (x)
+                  #define unlikely(x) (x)
+              #endif
+              __pyx_t_14 = (__pyx_t_12 - 0) / 1;
+              if (__pyx_t_14 > 0)
+              {
+                  #ifdef _OPENMP
+                  #pragma omp parallel
+                  #endif /* _OPENMP */
+                  {
+                      #ifdef _OPENMP
+                      #pragma omp for lastprivate(__pyx_v_k) lastprivate(__pyx_v_choice) lastprivate(__pyx_v_vote_class) lastprivate(__pyx_v_m) firstprivate(__pyx_v_treei) lastprivate(__pyx_v_treei) schedule(static) num_threads(16)
+                      #endif /* _OPENMP */
+                      for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_14; __pyx_t_13++){
+                          {
+                              __pyx_v_treei = 0 + 1 * __pyx_t_13;
+                              /* Initialize private variables to invalid values */
+                              __pyx_v_k = ((int)0xbad0bad0);
+                              __pyx_v_choice = ((int)0xbad0bad0);
+                              __pyx_v_vote_class = ((int)0xbad0bad0);
+                              __pyx_v_m = ((int)0xbad0bad0);
+
 43: 
+
 44:                 k = 0
+
+                              /* "rf_classify_parallel.pyx":44
+ *             for treei in prange(ntree, schedule='static', num_threads=16):
+ * 
+ *                 k = 0             # <<<<<<<<<<<<<<
+ *                 while nodestatus[treei, k] != NODE_TERMINAL:
+ *                     m = bestvar[treei, k] - 1
+ */
+                              __pyx_v_k = 0;
+
 45:                 while nodestatus[treei, k] != NODE_TERMINAL:
+
+                              /* "rf_classify_parallel.pyx":45
+ * 
+ *                 k = 0
+ *                 while nodestatus[treei, k] != NODE_TERMINAL:             # <<<<<<<<<<<<<<
+ *                     m = bestvar[treei, k] - 1
+ *                     #Split by a numerical predictor
+ */
+                              while (1) {
+                                __pyx_t_15 = __pyx_v_treei;
+                                __pyx_t_16 = __pyx_v_k;
+                                __pyx_t_17 = (((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_nodestatus.data + __pyx_t_15 * __pyx_v_nodestatus.strides[0]) ) + __pyx_t_16 * __pyx_v_nodestatus.strides[1]) ))) != __pyx_v_NODE_TERMINAL) != 0);
+                                if (!__pyx_t_17) break;
+
 46:                     m = bestvar[treei, k] - 1
+
+                                /* "rf_classify_parallel.pyx":46
+ *                 k = 0
+ *                 while nodestatus[treei, k] != NODE_TERMINAL:
+ *                     m = bestvar[treei, k] - 1             # <<<<<<<<<<<<<<
+ *                     #Split by a numerical predictor
+ *                     choice = 1 * (pixel_features[m] > xbestsplit[treei, k])
+ */
+                                __pyx_t_18 = __pyx_v_treei;
+                                __pyx_t_19 = __pyx_v_k;
+                                __pyx_v_m = ((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_bestvar.data + __pyx_t_18 * __pyx_v_bestvar.strides[0]) ) + __pyx_t_19 * __pyx_v_bestvar.strides[1]) ))) - 1);
+
 47:                     #Split by a numerical predictor
+
 48:                     choice = 1 * (pixel_features[m] > xbestsplit[treei, k])
+
+                                /* "rf_classify_parallel.pyx":48
+ *                     m = bestvar[treei, k] - 1
+ *                     #Split by a numerical predictor
+ *                     choice = 1 * (pixel_features[m] > xbestsplit[treei, k])             # <<<<<<<<<<<<<<
+ *                     k = treemap[treei * 2, k * 2 + choice] - 1
+ * 
+ */
+                                __pyx_t_20 = __pyx_v_m;
+                                __pyx_t_21 = __pyx_v_treei;
+                                __pyx_t_22 = __pyx_v_k;
+                                __pyx_v_choice = (1 * ((*((float *) ( /* dim=0 */ (__pyx_v_pixel_features.data + __pyx_t_20 * __pyx_v_pixel_features.strides[0]) ))) > (*((float *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_xbestsplit.data + __pyx_t_21 * __pyx_v_xbestsplit.strides[0]) ) + __pyx_t_22 * __pyx_v_xbestsplit.strides[1]) )))));
+
 49:                     k = treemap[treei * 2, k * 2 + choice] - 1
+
+                                /* "rf_classify_parallel.pyx":49
+ *                     #Split by a numerical predictor
+ *                     choice = 1 * (pixel_features[m] > xbestsplit[treei, k])
+ *                     k = treemap[treei * 2, k * 2 + choice] - 1             # <<<<<<<<<<<<<<
+ * 
+ *                 #We found the terminal node: assign class label
+ */
+                                __pyx_t_23 = (__pyx_v_treei * 2);
+                                __pyx_t_24 = ((__pyx_v_k * 2) + __pyx_v_choice);
+                                __pyx_v_k = ((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_treemap.data + __pyx_t_23 * __pyx_v_treemap.strides[0]) ) + __pyx_t_24 * __pyx_v_treemap.strides[1]) ))) - 1);
+                              }
+
 50: 
+
 51:                 #We found the terminal node: assign class label
+
 52:                 vote_class = nodeclass[treei, k] - 1
+
+                              /* "rf_classify_parallel.pyx":52
+ * 
+ *                 #We found the terminal node: assign class label
+ *                 vote_class = nodeclass[treei, k] - 1             # <<<<<<<<<<<<<<
+ *                 votes[pixi,vote_class] = votes[pixi,vote_class] + 1
+ * 
+ */
+                              __pyx_t_25 = __pyx_v_treei;
+                              __pyx_t_26 = __pyx_v_k;
+                              __pyx_v_vote_class = ((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_nodeclass.data + __pyx_t_25 * __pyx_v_nodeclass.strides[0]) ) + __pyx_t_26 * __pyx_v_nodeclass.strides[1]) ))) - 1);
+
 53:                 votes[pixi,vote_class] = votes[pixi,vote_class] + 1
+
+                              /* "rf_classify_parallel.pyx":53
+ *                 #We found the terminal node: assign class label
+ *                 vote_class = nodeclass[treei, k] - 1
+ *                 votes[pixi,vote_class] = votes[pixi,vote_class] + 1             # <<<<<<<<<<<<<<
+ * 
+ *     return votes
+ */
+                              __pyx_t_27 = __pyx_v_pixi;
+                              __pyx_t_28 = __pyx_v_vote_class;
+                              __pyx_t_29 = __pyx_v_pixi;
+                              __pyx_t_30 = __pyx_v_vote_class;
+                              *((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_votes.data + __pyx_t_29 * __pyx_v_votes.strides[0]) ) + __pyx_t_30 * __pyx_v_votes.strides[1]) )) = ((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_votes.data + __pyx_t_27 * __pyx_v_votes.strides[0]) ) + __pyx_t_28 * __pyx_v_votes.strides[1]) ))) + 1);
+                          }
+                      }
+                  }
+              }
+          }
+          #if ((defined(__APPLE__) || defined(__OSX__)) && (defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))))
+              #undef likely
+              #undef unlikely
+              #define likely(x)   __builtin_expect(!!(x), 1)
+              #define unlikely(x) __builtin_expect(!!(x), 0)
+          #endif
+        }
+
 54: 
+
 55:     return votes
+
+  /* "rf_classify_parallel.pyx":55
+ *                 votes[pixi,vote_class] = votes[pixi,vote_class] + 1
+ * 
+ *     return votes             # <<<<<<<<<<<<<<
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = __pyx_memoryview_fromslice(__pyx_v_votes, 2, (PyObject *(*)(char *)) __pyx_memview_get_int, (int (*)(char *, PyObject *)) __pyx_memview_set_int, 0);; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_2, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_3, 1);
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_7);
+  __Pyx_XDECREF(__pyx_t_8);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_9, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_11, 1);
+  __Pyx_AddTraceback("rf_classify_parallel.rf_classify", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __PYX_XDEC_MEMVIEW(&__pyx_v_treemap, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_nodestatus, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_bestvar, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_nodeclass, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_votes, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_xbestsplit, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_pixel_features, 1);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_20rf_classify_parallel_1rf_classify(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static PyObject *__pyx_pw_20rf_classify_parallel_1rf_classify(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+  PyObject *__pyx_v_model = 0;
+  __Pyx_memviewslice __pyx_v_features = { 0, 0, { 0 }, { 0 }, { 0 } };
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("rf_classify (wrapper)", 0);
+  {
+    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__model,&__pyx_n_s__features,0};
+    PyObject* values[2] = {0,0};
+    if (unlikely(__pyx_kwds)) {
+      Py_ssize_t kw_args;
+      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+      switch (pos_args) {
+        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+        case  0: break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+      kw_args = PyDict_Size(__pyx_kwds);
+      switch (pos_args) {
+        case  0:
+        if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__model)) != 0)) kw_args--;
+        else goto __pyx_L5_argtuple_error;
+        case  1:
+        if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__features)) != 0)) kw_args--;
+        else {
+          __Pyx_RaiseArgtupleInvalid("rf_classify", 1, 2, 2, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+        }
+      }
+      if (unlikely(kw_args > 0)) {
+        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "rf_classify") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+      }
+    } else if (PyTuple_GET_SIZE(__pyx_args) != 2) {
+      goto __pyx_L5_argtuple_error;
+    } else {
+      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+    }
+    __pyx_v_model = values[0];
+    __pyx_v_features = __Pyx_PyObject_to_MemoryviewSlice_dsds_float(values[1]); if (unlikely(!__pyx_v_features.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+  }
+  goto __pyx_L4_argument_unpacking_done;
+  __pyx_L5_argtuple_error:;
+  __Pyx_RaiseArgtupleInvalid("rf_classify", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+  __pyx_L3_error:;
+  __Pyx_AddTraceback("rf_classify_parallel.rf_classify", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __Pyx_RefNannyFinishContext();
+  return NULL;
+  __pyx_L4_argument_unpacking_done:;
+  __pyx_r = __pyx_pf_20rf_classify_parallel_rf_classify(__pyx_self, __pyx_v_model, __pyx_v_features);
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
diff --git a/ClassifyMembranes/train_gpu_randomforest_2class.py b/ClassifyMembranes/train_gpu_randomforest_2class.py new file mode 100644 index 0000000..7340990 --- /dev/null +++ b/ClassifyMembranes/train_gpu_randomforest_2class.py @@ -0,0 +1,664 @@ +############################################################ +# GPU Implementation of Random Forest Classifier - Training +# v0.1 +# Seymour Knowles-Barley +############################################################ +# Based on c code from: +# http://code.google.com/p/randomforest-matlab/ +# License: GPLv2 +############################################################ + +import numpy as np +import sys +import h5py +import glob +import mahotas + +import pycuda.autoinit +import pycuda.driver as cu +import pycuda.compiler as nvcc +import pycuda.gpuarray as gpuarray + +gpu_randomforest_train_source = """ +#include "curand_kernel.h" + +#define NODE_TERMINAL -1 +#define NODE_TOSPLIT -2 +#define NODE_INTERIOR -3 + +__device__ void movedata() { +} + +__device__ void sampledata(const int nclass, const int* nsamples, const int* samplefrom, + const int maxnsamples, int* bagstart, curandState_t *randstate) +{ + //Select random samples + int iclass, isamp; + for (iclass=0; iclass < nclass; ++iclass) { + for (isamp=0; isamp < nsamples[iclass]; ++isamp) { + bagstart[isamp + iclass*maxnsamples] = curand(randstate) % samplefrom[iclass]; + } + } +} + +__device__ void sortbagbyx( + const float *baggedxstart, int totsamples, int mdim, int featurei, int *bagstart, int ndstart, int ndend, int *tempbagstart) +{ + //Sort elements of bagstart (from ndstart to ndend) according to x values + //Write results into bagstart + int length = ndend-ndstart+1; + if (length == 1) + { + return; + } + int xstart = featurei * totsamples; + int *inbag = bagstart; + int *outbag = tempbagstart; + + //For-loop merge sort + int i = 1; + int start1, start2, end1, end2, p1, p2, output; + while (i < length) + { + + for (start1 = ndstart; start1 <= ndend; start1 += i*2) + { + end1 = start1 + i - 1; + start2 = start1 + i; + end2 = start2 + i - 1; + p1 = start1; p2 = start2; + output = start1; + while (p1 <= end1 && p1 <= ndend && p2 <= end2 && p2 <= ndend && output <= ndend) + { + if (baggedxstart[xstart + inbag[p1]] < baggedxstart[xstart + inbag[p2]]) + { + outbag[output] = inbag[p1]; + ++p1; + } + else + { + outbag[output] = inbag[p2]; + ++p2; + } + ++output; + } + while (p1 <= end1 && p1 <= ndend) + { + outbag[output] = inbag[p1]; + ++p1; + ++output; + } + while (p2 <= end2 && p2 <= ndend) + { + outbag[output] = inbag[p2]; + ++p2; + ++output; + } + } + + //swap for next run + if (inbag == bagstart) + { + inbag = tempbagstart; + outbag = bagstart; + } + else + { + inbag = bagstart; + outbag = tempbagstart; + } + + //Loop again with larger chunks + i *= 2; + + } + + //Copy output to bagstart (if necessary) + if (inbag == tempbagstart) + { + for (p1 = ndstart; p1 <= ndend; ++p1) + { + bagstart[p1] = tempbagstart[p1]; + } + } + +} + +__device__ void findBestSplit( + const float *baggedxstart, const int *baggedclassstart, int mdim, int nclass, int *bagstart, + int totsamples, int k, int ndstart, int ndend, int *ndendl, + int *msplit, float *gini_score, float *best_split, int *best_split_index, bool *isTerminal, + int mtry, int idx, int maxTreeSize, int *classpop, float* classweights, + curandState_t *randstate, + int *wlstart, int *wrstart, int *dimtempstart, int *tempbagstart) +{ + //Compute initial values of numerator and denominator of Gini + float gini_n = 0.0; + float gini_d = 0.0; + float gini_rightn, gini_rightd, gini_leftn, gini_leftd; + int ctreestart = k * nclass + nclass * idx * maxTreeSize; + int i; + for (i = 0; i < nclass; ++i) + { + gini_n += classpop[i + ctreestart] * classpop[i + ctreestart]; + gini_d += classpop[i + ctreestart]; + } + float gini_crit0 = gini_n / gini_d; + + //start main loop through variables to find best split + float gini_critmax = -1.0e25; + float crit; + int trynum, featurei; + int maxfeature = mdim; + + for (i = 0; i < mdim; ++i) + { + dimtempstart[i] = i; + } + + *msplit = -1; + + //for (trynum = 0; trynum < 1; ++trynum) + for (trynum = 0; trynum < mtry && trynum < mdim; ++trynum) + { + //Choose a random feature + i = curand(randstate) % maxfeature; + featurei = dimtempstart[i]; + dimtempstart[i] = dimtempstart[maxfeature-1]; + dimtempstart[maxfeature-1] = featurei; + --maxfeature; + + //Sort according to this feature + sortbagbyx(baggedxstart, totsamples, mdim, featurei, bagstart, ndstart, ndend, tempbagstart); + + //Split on numerical predictor featurei + gini_rightn = gini_n; + gini_rightd = gini_d; + gini_leftn = 0; + gini_leftd = 0; + for (i = 0; i < nclass; ++i) + { + wrstart[i] = classpop[i + ctreestart]; + wlstart[i] = 0; + } + int splitpoint; + int splitxi; + float split_weight, thisx, nextx; + int split_class; + int ntie = 1; + //Loop through all possible split points + for (splitpoint = ndstart; splitpoint <= ndend-1; ++splitpoint) + { + //Get split details + splitxi = bagstart[splitpoint]; + //Determine class based on index and nsamples vector + split_class = baggedclassstart[splitxi]-1; + split_weight = classweights[split_class]; + //Update neumerator and demominator + gini_leftn += split_weight * (2 * wlstart[split_class] + split_weight); + gini_rightn += split_weight * (-2 * wrstart[split_class] + split_weight); + gini_leftd += split_weight; + gini_rightd -= split_weight; + wlstart[split_class] += split_weight; + wrstart[split_class] -= split_weight; + + //Check if the next value is the same (no point splitting) + thisx = baggedxstart[splitxi + totsamples * featurei]; + nextx = baggedxstart[bagstart[splitpoint+1] + totsamples * featurei]; + if (thisx != nextx) + { + //Check if either node is empty (or very small to allow for float errors) + if (gini_rightd > 1.0e-5 && gini_leftd > 1.0e-5) + { + //Check the split + crit = (gini_leftn / gini_leftd) + (gini_rightn / gini_rightd); + if (crit > gini_critmax) + { + *best_split = (thisx + nextx) / 2; + *best_split_index = splitpoint; + gini_critmax = crit; + *msplit = featurei; + *ndendl = splitpoint; + ntie = 1; + } + else if (crit == gini_critmax) + { + ++ntie; + //Break ties at random + if ((curand(randstate) % ntie) == 0) + { + *best_split = (thisx + nextx) / 2; + *best_split_index = splitpoint; + gini_critmax = crit; + *msplit = featurei; + *ndendl = splitpoint; + } + } + } + } + } // end splitpoint for + } // end trynum for + + if (gini_critmax < -1.0e10 || *msplit == -1) + { + //We could not find a suitable split - mark as a terminal node + *isTerminal = true; + } + else if (*msplit != featurei) + { + //Resort for msplit (if necessary) + sortbagbyx(baggedxstart, totsamples, mdim, *msplit, bagstart, ndstart, ndend, tempbagstart); + } + *gini_score = gini_critmax - gini_crit0; + +} + +extern "C" __global__ void trainKernel( + const float *x, int n, int mdim, int nclass, + const int *classes, const int *classindex, + const int *nsamples, const int *samplefrom, + int maxnsamples, + unsigned long long seed, unsigned long long sequencestart, + int ntree, int maxTreeSize, int mtry, int nodeStopSize, + int *treemap, int *nodestatus, float *xbestsplit, + int *bestvar, int *nodeclass, int *ndbigtree, + int *nodestart, int *nodepop, + int *classpop, float *classweights, + int *weight_left, int *weight_right, + int *dimtemp, int *bagspace, int *tempbag, float *baggedx, int *baggedclass) +{ +// Optional arguments for debug (place after xbestsplit): int *nbestsplit, float *bestgini, + int idx = threadIdx.x + blockDim.x * blockIdx.x; + + //Make sure we don't overrun + if (idx < ntree) { + //Init random number generators (one for each thread) + curandState_t state; + curand_init(seed, sequencestart + idx, 0, &state); + + int i,j,k,cioffset,bioffset; + + int totsamples = 0; + for (i = 0; i < nclass; ++i){ + totsamples += nsamples[i]; + } + + //Choose random samples for all classes + int *bagstart = bagspace + idx * nclass * maxnsamples; + int *tempbagstart = tempbag + idx * nclass * maxnsamples; + float *baggedxstart = baggedx + idx * mdim * totsamples; + int *baggedclassstart = baggedclass + idx * totsamples; + //TODO: offset weightleft, weightright and dimtemp ! + sampledata(nclass, nsamples, samplefrom, maxnsamples, bagstart, &state); + + //Remove gaps and index into x (instead of into class) + k = 0; + cioffset = 0; + bioffset = 0; + for (i = 0; i < nclass; ++i){ + for (j = 0; j < nsamples[i]; ++j) { + //Move memory into local block? + int xindex = classindex[bagstart[j + i * maxnsamples] + cioffset]; + int dimindex; + for (dimindex = 0; dimindex < mdim; ++dimindex){ + baggedxstart[j + bioffset + totsamples * dimindex] = x[xindex + n * dimindex]; + } + baggedclassstart[j + bioffset] = classes[xindex]; + bagstart[k] = j + bioffset; + ++k; + } + cioffset += samplefrom[i]; + bioffset += nsamples[i]; + classpop[i + idx * nclass * maxTreeSize] = nsamples[i]; + } + + //Wipe other values + for (;k < nclass * maxnsamples; ++k) { + bagstart[k] = -1; + } + + int ndstart, ndend, ndendl; + int msplit, best_split_index; + float best_split, gini_score; + + //Repeat findbestsplit until the tree is complete + int ncur = 0; + int treeoffset1 = idx * maxTreeSize; + int treeOffset2 = idx * 2 * maxTreeSize; + nodestart[treeoffset1] = 0; + nodepop[treeoffset1] = totsamples; + nodestatus[treeoffset1] = NODE_TOSPLIT; + + for (k = 0; k < maxTreeSize-2; ++k) { + //Check for end of tree + if (k > ncur || ncur >= maxTreeSize - 2) break; + //Skip nodes we don't need to split + if (nodestatus[treeoffset1+k] != NODE_TOSPLIT) continue; + + /* initialize for next call to findbestsplit */ + ndstart = nodestart[treeoffset1 + k]; + ndend = ndstart + nodepop[treeoffset1 + k] - 1; + bool isTerminal = false; + gini_score = 0.0; + best_split_index = -1; + + findBestSplit(baggedxstart, baggedclassstart, mdim, nclass, bagstart, totsamples, k, ndstart, ndend, &ndendl, + &msplit, &gini_score, &best_split, &best_split_index, &isTerminal, mtry, idx, maxTreeSize, classpop, classweights, + &state, weight_left + nclass * idx, weight_right + nclass * idx, dimtemp + mdim * idx, tempbagstart); + + if (isTerminal) { + /* Node is terminal: Mark it as such and move on to the next. */ + nodestatus[k] = NODE_TERMINAL; + //bestvar[treeoffset1 + k] = 0; + //xbestsplit[treeoffset1 + k] = 0; + continue; + } + + // this is a split node - prepare for next round + bestvar[treeoffset1 + k] = msplit + 1; + //bestgini[treeoffset1 + k] = gini_score; + xbestsplit[treeoffset1 + k] = best_split; + //nbestsplit[treeoffset1 + k] = best_split_index; + nodestatus[treeoffset1 + k] = NODE_INTERIOR; + //varUsed[msplit - 1] = 1; + //tgini[msplit - 1] += decsplit; + + int leftk = ncur + 1; + int rightk = ncur + 2; + nodepop[treeoffset1 + leftk] = ndendl - ndstart + 1; + nodepop[treeoffset1 + rightk] = ndend - ndendl; + nodestart[treeoffset1 + leftk] = ndstart; + nodestart[treeoffset1 + rightk] = ndendl + 1; + + // Check for terminal node conditions + nodestatus[treeoffset1 + leftk] = NODE_TOSPLIT; + if (nodepop[treeoffset1 + leftk] <= nodeStopSize) { + nodestatus[treeoffset1 + leftk] = NODE_TERMINAL; + } + + nodestatus[treeoffset1 + rightk] = NODE_TOSPLIT; + if (nodepop[treeoffset1 + rightk] <= nodeStopSize) { + nodestatus[treeoffset1 + rightk] = NODE_TERMINAL; + } + + //Calculate class populations + int nodeclass = 0; + int ctreestart_left = leftk * nclass + idx * nclass * maxTreeSize; + int ctreestart_right = rightk * nclass + idx * nclass * maxTreeSize; + for (i = ndstart; i <= ndendl; ++i) { + nodeclass = baggedclassstart[bagstart[i]]-1; + classpop[nodeclass + ctreestart_left] += classweights[nodeclass]; + } + for (i = ndendl+1; i <= ndend; ++i) { + nodeclass = baggedclassstart[bagstart[i]]-1; + classpop[nodeclass + ctreestart_right] += classweights[nodeclass]; + } + + for(i = 0; i < nclass; ++i) + { + if (classpop[i + ctreestart_left] == nodepop[treeoffset1 + leftk]) + { + nodestatus[treeoffset1 + leftk] = NODE_TERMINAL; + } + if (classpop[i + ctreestart_right] == nodepop[treeoffset1 + rightk]) + { + nodestatus[treeoffset1 + rightk] = NODE_TERMINAL; + } + } + + //Update treemap offset (indexed from 1 rather than 0) + treemap[treeOffset2 + k*2] = ncur + 2; + treemap[treeOffset2 + 1 + k*2] = ncur + 3; + ncur += 2; + + } + + //Tidy up + //TODO: Check results - should not be necessary to go up to maxTreeSize + ndbigtree[idx] = ncur+1; + //ndbigtree[idx] = maxTreeSize; + for(k = maxTreeSize-1; k >= 0; --k) + { + //if (nodestatus[treeoffset1 + k] == 0) + // --ndbigtree[idx]; + if (nodestatus[treeoffset1 + k] == NODE_TOSPLIT) + nodestatus[treeoffset1 + k] = NODE_TERMINAL; + } + + //Calculate prediction for terminal nodes + for (k = 0; k < maxTreeSize; ++k) + { + treeoffset1 = idx * maxTreeSize; + if (nodestatus[treeoffset1 + k] == NODE_TERMINAL) + { + int toppop = 0; + int ntie = 1; + for (i = 0; i < nclass; ++i) + { + int ctreeoffset = k * nclass + idx * nclass * maxTreeSize; + if (classpop[i + ctreeoffset] > toppop) + { + nodeclass[treeoffset1 + k] = i+1; + toppop = classpop[i + ctreeoffset]; + } + //Break ties at random + if (classpop[i + ctreeoffset] == toppop) + { + ++ntie; + if ((curand(&state) % ntie) == 0) + { + nodeclass[treeoffset1 + k] = i+1; + toppop = classpop[i + ctreeoffset]; + } + } + } + } + } + + //ndbigtree[idx] = idx; + + } + +} +""" + +# input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\membraneDetectionECSx4ds2\\' +# input_image_suffix = '_train.png' +# input_features_suffix = '_rhoanafeatures.hdf5' +# output_path = 'D:\\dev\\Rhoana\\classifierTraining\\membraneDetectionECSx4ds2\\rhoana_forest.hdf5' + +#input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\' +#input_image_suffix = '_labeled_update.tif' +#input_features_suffix = '.hdf5' +#output_path = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\rhoana_forest_3class.hdf5' + +input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2_auto1\\' +input_image_suffix = '_labeled_update.tif' +input_features_suffix = '_autoencoder_features.h5' +output_path = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2_auto1\\auto1_forest_3class.hdf5' + +# input_image_folder = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\' +# input_image_suffix = 'Cyto.png' +# input_features_suffix = 'Cyto_rhoana_features.h5' +# output_path = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\rhoana_forest_2class.h5' + +# input_image_folder = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\' +# input_image_suffix = 'Cytods2.png' +# input_features_suffix = 'Cytods2_rhoana_features.h5' +# output_path = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\rhoana_forest_2classds2.h5' + +# input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\' +# input_image_suffix = '_training.png' +# input_features_suffix = '_autoencoder_nosig_features.h5' +# output_path = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\autoencoder_nosig_forest_2class.h5' +# #input_features_suffix = '_autoencoder_features.h5' +# #output_path = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\autoencoder_forest_2class.h5' +# #input_features_suffix = '_rhoana_features.h5' +# #output_path = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\rhoana_forest_2class.h5' + + +# Prep the gpu function +gpu_train = nvcc.SourceModule(gpu_randomforest_train_source, no_extern_c=True).get_function('trainKernel') + +# Load training data +files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) ) + +# 2 Class +#class_colors = [[255,0,0], [0,255,0]] +#class_colors = [[255,85,255], [255,255,0]] + +# 3 Class +#class_colors = [[255,0,0], [0,255,0], [0,0,255]] +#class_colors = [[255,85,255], [255,255,0], [0,255,255]] + +class_colors = [0, 1] +#class_colors = [0, 1, 2] + +nclass = len(class_colors) + +training_x = np.zeros((0,0), dtype=np.float32) +training_y = np.zeros((0,1), dtype=np.int32) + +print 'Found {0} training images.'.format(len(files)) + +# Loop through all images +for file in files: + training_image = mahotas.imread(file) + + for classi in range(nclass): + + this_color = class_colors[classi] + + # Find pixels for this class + # 2 class + class_indices = np.nonzero(np.logical_and( + training_image[:,:,this_color] > training_image[:,:,(this_color + 1) % 3], + training_image[:,:,this_color] > training_image[:,:,(this_color + 2) % 3])) + + # 3 class + # class_indices = np.nonzero(np.logical_and( + # training_image[:,:,0] == this_color[0], + # training_image[:,:,1] == this_color[1], + # training_image[:,:,2] == this_color[2])) + + # Add features to x and classes to y + + training_y = np.concatenate((training_y, np.ones((len(class_indices[0]), 1), dtype=np.int32) * (classi + 1))) + + # Load the features + f = h5py.File(file.replace(input_image_suffix, input_features_suffix), 'r') + + nfeatures = len(f.keys()) + train_features = np.zeros((nfeatures, len(class_indices[0])), dtype=np.float32) + + for i,k in enumerate(f.keys()): + feature = f[k][...] + train_features[i,:] = feature[class_indices[0], class_indices[1]] + + f.close() + + if training_x.size > 0: + training_x = np.concatenate((training_x, train_features), axis=1) + else: + training_x = train_features + +for classi in range(nclass): + print 'Class {0}: {1} training pixels.'.format(classi, np.sum(training_y == classi + 1)) + +# Train on GPU +ntree = np.int32(512) +mtry = np.int32(np.floor(np.sqrt(training_x.shape[0]))) +#nsamples = np.ones((1,nclass), dtype=np.int32) * (training_x.shape[1] / nclass) +nsamples = np.ones((1,nclass), dtype=np.int32) * 1000 +classweights = np.ones((1,nclass), dtype=np.float32) + +# Sanity check +assert(training_x.shape[1] == training_y.shape[0]) + +# Random number seeds +seed = np.int64(42) +sequencestart = np.int64(43) + +samplefrom = np.zeros((nclass), dtype=np.int32) +maxTreeSize = np.int32(2 * np.sum(nsamples) + 1) +nodeStopSize = np.int32(1) + +for classi in range(nclass): + samplefrom[classi] = np.sum(training_y == (classi + 1)) + +maxnsamples = np.max(nsamples) +classindex = -1 * np.ones((np.max(samplefrom) * nclass), dtype=np.int32) + +cioffset = 0 +for classi in range(nclass): + classindex[cioffset:cioffset + samplefrom[classi]] = np.nonzero(training_y == (classi + 1))[0] + cioffset = cioffset + samplefrom[classi] + +bagmem = -1 * np.ones((ntree, maxnsamples * nclass), dtype=np.int32) +d_bagspace = gpuarray.to_gpu(bagmem) +d_tempbag = gpuarray.to_gpu(bagmem) +bagmem = None + +d_treemap = gpuarray.zeros((long(ntree * 2), long(maxTreeSize)), np.int32) +d_nodestatus = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_xbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32) +#d_nbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +#d_bestgini = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32) +d_bestvar = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_nodeclass = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_ndbigtree = gpuarray.zeros((long(ntree), 1), np.int32) +d_nodestart = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_nodepop = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_classpop = gpuarray.zeros((long(ntree), long(maxTreeSize*nclass)), np.int32) +d_classweights = gpuarray.to_gpu(classweights) +d_weight_left = gpuarray.zeros((long(ntree), long(nclass)), np.int32) +d_weight_right = gpuarray.zeros((long(ntree), long(nclass)), np.int32) +d_dimtemp = gpuarray.zeros((long(ntree), long(training_x.shape[0])), np.int32) + +d_baggedx = gpuarray.zeros((long(np.sum(nsamples)*training_x.shape[0]), long(ntree)), np.float32) +d_baggedclass = gpuarray.zeros((long(ntree), long(np.sum(nsamples))), np.int32) + +d_training_x = gpuarray.to_gpu(training_x) +d_training_y = gpuarray.to_gpu(training_y) +d_classindex = gpuarray.to_gpu(classindex) +d_nsamples = gpuarray.to_gpu(nsamples) +d_samplefrom = gpuarray.to_gpu(samplefrom) + +threadsPerBlock = 32 +block = (32, 1, 1) +grid = (int(ntree / block[0] + 1), 1) + +gpu_train(d_training_x, np.int32(training_x.shape[1]), np.int32(training_x.shape[0]), np.int32(nclass), + d_training_y, d_classindex, d_nsamples, d_samplefrom, + np.int32(maxnsamples), seed, sequencestart, np.int32(ntree), np.int32(maxTreeSize), np.int32(mtry), np.int32(nodeStopSize), + d_treemap, d_nodestatus, d_xbestsplit, + d_bestvar, d_nodeclass, d_ndbigtree, + d_nodestart, d_nodepop, + d_classpop, d_classweights, + d_weight_left, d_weight_right, + d_dimtemp, d_bagspace, d_tempbag, d_baggedx, d_baggedclass, + block=block, grid=grid) + +treemap = d_treemap.get() +nodestatus = d_nodestatus.get() +xbestsplit = d_xbestsplit.get() +bestvar = d_bestvar.get() +nodeclass = d_nodeclass.get() +ndbigtree = d_ndbigtree.get() + +# Save results +out_hdf5 = h5py.File(output_path, 'w') +out_hdf5['/forest/treemap'] = treemap +out_hdf5['/forest/nodestatus'] = nodestatus +out_hdf5['/forest/xbestsplit'] = xbestsplit +out_hdf5['/forest/bestvar'] = bestvar +out_hdf5['/forest/nodeclass'] = nodeclass +out_hdf5['/forest/ndbigtree'] = ndbigtree + +out_hdf5['/forest/nrnodes'] = maxTreeSize +out_hdf5['/forest/ntree'] = ntree +out_hdf5['/forest/nclass'] = nclass +out_hdf5['/forest/classweights'] = classweights +out_hdf5['/forest/mtry'] = mtry + +out_hdf5.close() diff --git a/ClassifyMembranes/train_gpu_randomforest_3class.py b/ClassifyMembranes/train_gpu_randomforest_3class.py new file mode 100644 index 0000000..55a4480 --- /dev/null +++ b/ClassifyMembranes/train_gpu_randomforest_3class.py @@ -0,0 +1,666 @@ +############################################################ +# GPU Implementation of Random Forest Classifier - Training +# v0.1 +# Seymour Knowles-Barley +############################################################ +# Based on c code from: +# http://code.google.com/p/randomforest-matlab/ +# License: GPLv2 +############################################################ + +import numpy as np +import sys +import h5py +import glob +import mahotas + +import pycuda.autoinit +import pycuda.driver as cu +import pycuda.compiler as nvcc +import pycuda.gpuarray as gpuarray + +gpu_randomforest_train_source = """ +#include "curand_kernel.h" + +#define NODE_TERMINAL -1 +#define NODE_TOSPLIT -2 +#define NODE_INTERIOR -3 + +__device__ void movedata() { +} + +__device__ void sampledata(const int nclass, const int* nsamples, const int* samplefrom, + const int maxnsamples, int* bagstart, curandState_t *randstate) +{ + //Select random samples + int iclass, isamp; + for (iclass=0; iclass < nclass; ++iclass) { + for (isamp=0; isamp < nsamples[iclass]; ++isamp) { + bagstart[isamp + iclass*maxnsamples] = curand(randstate) % samplefrom[iclass]; + } + } +} + +__device__ void sortbagbyx( + const float *baggedxstart, int totsamples, int mdim, int featurei, int *bagstart, int ndstart, int ndend, int *tempbagstart) +{ + //Sort elements of bagstart (from ndstart to ndend) according to x values + //Write results into bagstart + int length = ndend-ndstart+1; + if (length == 1) + { + return; + } + int xstart = featurei * totsamples; + int *inbag = bagstart; + int *outbag = tempbagstart; + + //For-loop merge sort + int i = 1; + int start1, start2, end1, end2, p1, p2, output; + while (i < length) + { + + for (start1 = ndstart; start1 <= ndend; start1 += i*2) + { + end1 = start1 + i - 1; + start2 = start1 + i; + end2 = start2 + i - 1; + p1 = start1; p2 = start2; + output = start1; + while (p1 <= end1 && p1 <= ndend && p2 <= end2 && p2 <= ndend && output <= ndend) + { + if (baggedxstart[xstart + inbag[p1]] < baggedxstart[xstart + inbag[p2]]) + { + outbag[output] = inbag[p1]; + ++p1; + } + else + { + outbag[output] = inbag[p2]; + ++p2; + } + ++output; + } + while (p1 <= end1 && p1 <= ndend) + { + outbag[output] = inbag[p1]; + ++p1; + ++output; + } + while (p2 <= end2 && p2 <= ndend) + { + outbag[output] = inbag[p2]; + ++p2; + ++output; + } + } + + //swap for next run + if (inbag == bagstart) + { + inbag = tempbagstart; + outbag = bagstart; + } + else + { + inbag = bagstart; + outbag = tempbagstart; + } + + //Loop again with larger chunks + i *= 2; + + } + + //Copy output to bagstart (if necessary) + if (inbag == tempbagstart) + { + for (p1 = ndstart; p1 <= ndend; ++p1) + { + bagstart[p1] = tempbagstart[p1]; + } + } + +} + +__device__ void findBestSplit( + const float *baggedxstart, const int *baggedclassstart, int mdim, int nclass, int *bagstart, + int totsamples, int k, int ndstart, int ndend, int *ndendl, + int *msplit, float *gini_score, float *best_split, int *best_split_index, bool *isTerminal, + int mtry, int idx, int maxTreeSize, int *classpop, float* classweights, + curandState_t *randstate, + int *wlstart, int *wrstart, int *dimtempstart, int *tempbagstart) +{ + //Compute initial values of numerator and denominator of Gini + float gini_n = 0.0; + float gini_d = 0.0; + float gini_rightn, gini_rightd, gini_leftn, gini_leftd; + int ctreestart = k * nclass + nclass * idx * maxTreeSize; + int i; + for (i = 0; i < nclass; ++i) + { + gini_n += classpop[i + ctreestart] * classpop[i + ctreestart]; + gini_d += classpop[i + ctreestart]; + } + float gini_crit0 = gini_n / gini_d; + + //start main loop through variables to find best split + float gini_critmax = -1.0e25; + float crit; + int trynum, featurei; + int maxfeature = mdim; + + for (i = 0; i < mdim; ++i) + { + dimtempstart[i] = i; + } + + *msplit = -1; + + //for (trynum = 0; trynum < 1; ++trynum) + for (trynum = 0; trynum < mtry && trynum < mdim; ++trynum) + { + //Choose a random feature + i = curand(randstate) % maxfeature; + featurei = dimtempstart[i]; + dimtempstart[i] = dimtempstart[maxfeature-1]; + dimtempstart[maxfeature-1] = featurei; + --maxfeature; + + //Sort according to this feature + sortbagbyx(baggedxstart, totsamples, mdim, featurei, bagstart, ndstart, ndend, tempbagstart); + + //Split on numerical predictor featurei + gini_rightn = gini_n; + gini_rightd = gini_d; + gini_leftn = 0; + gini_leftd = 0; + for (i = 0; i < nclass; ++i) + { + wrstart[i] = classpop[i + ctreestart]; + wlstart[i] = 0; + } + int splitpoint; + int splitxi; + float split_weight, thisx, nextx; + int split_class; + int ntie = 1; + //Loop through all possible split points + for (splitpoint = ndstart; splitpoint <= ndend-1; ++splitpoint) + { + //Get split details + splitxi = bagstart[splitpoint]; + //Determine class based on index and nsamples vector + split_class = baggedclassstart[splitxi]-1; + split_weight = classweights[split_class]; + //Update neumerator and demominator + gini_leftn += split_weight * (2 * wlstart[split_class] + split_weight); + gini_rightn += split_weight * (-2 * wrstart[split_class] + split_weight); + gini_leftd += split_weight; + gini_rightd -= split_weight; + wlstart[split_class] += split_weight; + wrstart[split_class] -= split_weight; + + //Check if the next value is the same (no point splitting) + thisx = baggedxstart[splitxi + totsamples * featurei]; + nextx = baggedxstart[bagstart[splitpoint+1] + totsamples * featurei]; + if (thisx != nextx) + { + //Check if either node is empty (or very small to allow for float errors) + if (gini_rightd > 1.0e-5 && gini_leftd > 1.0e-5) + { + //Check the split + crit = (gini_leftn / gini_leftd) + (gini_rightn / gini_rightd); + if (crit > gini_critmax) + { + *best_split = (thisx + nextx) / 2; + *best_split_index = splitpoint; + gini_critmax = crit; + *msplit = featurei; + *ndendl = splitpoint; + ntie = 1; + } + else if (crit == gini_critmax) + { + ++ntie; + //Break ties at random + if ((curand(randstate) % ntie) == 0) + { + *best_split = (thisx + nextx) / 2; + *best_split_index = splitpoint; + gini_critmax = crit; + *msplit = featurei; + *ndendl = splitpoint; + } + } + } + } + } // end splitpoint for + } // end trynum for + + if (gini_critmax < -1.0e10 || *msplit == -1) + { + //We could not find a suitable split - mark as a terminal node + *isTerminal = true; + } + else if (*msplit != featurei) + { + //Resort for msplit (if necessary) + sortbagbyx(baggedxstart, totsamples, mdim, *msplit, bagstart, ndstart, ndend, tempbagstart); + } + *gini_score = gini_critmax - gini_crit0; + +} + +extern "C" __global__ void trainKernel( + const float *x, int n, int mdim, int nclass, + const int *classes, const int *classindex, + const int *nsamples, const int *samplefrom, + int maxnsamples, + unsigned long long seed, unsigned long long sequencestart, + int ntree, int maxTreeSize, int mtry, int nodeStopSize, + int *treemap, int *nodestatus, float *xbestsplit, + int *bestvar, int *nodeclass, int *ndbigtree, + int *nodestart, int *nodepop, + int *classpop, float *classweights, + int *weight_left, int *weight_right, + int *dimtemp, int *bagspace, int *tempbag, float *baggedx, int *baggedclass) +{ +// Optional arguments for debug (place after xbestsplit): int *nbestsplit, float *bestgini, + int idx = threadIdx.x + blockDim.x * blockIdx.x; + + //Make sure we don't overrun + if (idx < ntree) { + //Init random number generators (one for each thread) + curandState_t state; + curand_init(seed, sequencestart + idx, 0, &state); + + int i,j,k,cioffset,bioffset; + + int totsamples = 0; + for (i = 0; i < nclass; ++i){ + totsamples += nsamples[i]; + } + + //Choose random samples for all classes + int *bagstart = bagspace + idx * nclass * maxnsamples; + int *tempbagstart = tempbag + idx * nclass * maxnsamples; + float *baggedxstart = baggedx + idx * mdim * totsamples; + int *baggedclassstart = baggedclass + idx * totsamples; + //TODO: offset weightleft, weightright and dimtemp ! + sampledata(nclass, nsamples, samplefrom, maxnsamples, bagstart, &state); + + //Remove gaps and index into x (instead of into class) + k = 0; + cioffset = 0; + bioffset = 0; + for (i = 0; i < nclass; ++i){ + for (j = 0; j < nsamples[i]; ++j) { + //Move memory into local block? + int xindex = classindex[bagstart[j + i * maxnsamples] + cioffset]; + int dimindex; + for (dimindex = 0; dimindex < mdim; ++dimindex){ + baggedxstart[j + bioffset + totsamples * dimindex] = x[xindex + n * dimindex]; + } + baggedclassstart[j + bioffset] = classes[xindex]; + bagstart[k] = j + bioffset; + ++k; + } + cioffset += samplefrom[i]; + bioffset += nsamples[i]; + classpop[i + idx * nclass * maxTreeSize] = nsamples[i]; + } + + //Wipe other values + for (;k < nclass * maxnsamples; ++k) { + bagstart[k] = -1; + } + + int ndstart, ndend, ndendl; + int msplit, best_split_index; + float best_split, gini_score; + + //Repeat findbestsplit until the tree is complete + int ncur = 0; + int treeoffset1 = idx * maxTreeSize; + int treeOffset2 = idx * 2 * maxTreeSize; + nodestart[treeoffset1] = 0; + nodepop[treeoffset1] = totsamples; + nodestatus[treeoffset1] = NODE_TOSPLIT; + + for (k = 0; k < maxTreeSize-2; ++k) { + //Check for end of tree + if (k > ncur || ncur >= maxTreeSize - 2) break; + //Skip nodes we don't need to split + if (nodestatus[treeoffset1+k] != NODE_TOSPLIT) continue; + + /* initialize for next call to findbestsplit */ + ndstart = nodestart[treeoffset1 + k]; + ndend = ndstart + nodepop[treeoffset1 + k] - 1; + bool isTerminal = false; + gini_score = 0.0; + best_split_index = -1; + + findBestSplit(baggedxstart, baggedclassstart, mdim, nclass, bagstart, totsamples, k, ndstart, ndend, &ndendl, + &msplit, &gini_score, &best_split, &best_split_index, &isTerminal, mtry, idx, maxTreeSize, classpop, classweights, + &state, weight_left + nclass * idx, weight_right + nclass * idx, dimtemp + mdim * idx, tempbagstart); + + if (isTerminal) { + /* Node is terminal: Mark it as such and move on to the next. */ + nodestatus[k] = NODE_TERMINAL; + //bestvar[treeoffset1 + k] = 0; + //xbestsplit[treeoffset1 + k] = 0; + continue; + } + + // this is a split node - prepare for next round + bestvar[treeoffset1 + k] = msplit + 1; + //bestgini[treeoffset1 + k] = gini_score; + xbestsplit[treeoffset1 + k] = best_split; + //nbestsplit[treeoffset1 + k] = best_split_index; + nodestatus[treeoffset1 + k] = NODE_INTERIOR; + //varUsed[msplit - 1] = 1; + //tgini[msplit - 1] += decsplit; + + int leftk = ncur + 1; + int rightk = ncur + 2; + nodepop[treeoffset1 + leftk] = ndendl - ndstart + 1; + nodepop[treeoffset1 + rightk] = ndend - ndendl; + nodestart[treeoffset1 + leftk] = ndstart; + nodestart[treeoffset1 + rightk] = ndendl + 1; + + // Check for terminal node conditions + nodestatus[treeoffset1 + leftk] = NODE_TOSPLIT; + if (nodepop[treeoffset1 + leftk] <= nodeStopSize) { + nodestatus[treeoffset1 + leftk] = NODE_TERMINAL; + } + + nodestatus[treeoffset1 + rightk] = NODE_TOSPLIT; + if (nodepop[treeoffset1 + rightk] <= nodeStopSize) { + nodestatus[treeoffset1 + rightk] = NODE_TERMINAL; + } + + //Calculate class populations + int nodeclass = 0; + int ctreestart_left = leftk * nclass + idx * nclass * maxTreeSize; + int ctreestart_right = rightk * nclass + idx * nclass * maxTreeSize; + for (i = ndstart; i <= ndendl; ++i) { + nodeclass = baggedclassstart[bagstart[i]]-1; + classpop[nodeclass + ctreestart_left] += classweights[nodeclass]; + } + for (i = ndendl+1; i <= ndend; ++i) { + nodeclass = baggedclassstart[bagstart[i]]-1; + classpop[nodeclass + ctreestart_right] += classweights[nodeclass]; + } + + for(i = 0; i < nclass; ++i) + { + if (classpop[i + ctreestart_left] == nodepop[treeoffset1 + leftk]) + { + nodestatus[treeoffset1 + leftk] = NODE_TERMINAL; + } + if (classpop[i + ctreestart_right] == nodepop[treeoffset1 + rightk]) + { + nodestatus[treeoffset1 + rightk] = NODE_TERMINAL; + } + } + + //Update treemap offset (indexed from 1 rather than 0) + treemap[treeOffset2 + k*2] = ncur + 2; + treemap[treeOffset2 + 1 + k*2] = ncur + 3; + ncur += 2; + + } + + //Tidy up + //TODO: Check results - should not be necessary to go up to maxTreeSize + ndbigtree[idx] = ncur+1; + //ndbigtree[idx] = maxTreeSize; + for(k = maxTreeSize-1; k >= 0; --k) + { + //if (nodestatus[treeoffset1 + k] == 0) + // --ndbigtree[idx]; + if (nodestatus[treeoffset1 + k] == NODE_TOSPLIT) + nodestatus[treeoffset1 + k] = NODE_TERMINAL; + } + + //Calculate prediction for terminal nodes + for (k = 0; k < maxTreeSize; ++k) + { + treeoffset1 = idx * maxTreeSize; + if (nodestatus[treeoffset1 + k] == NODE_TERMINAL) + { + int toppop = 0; + int ntie = 1; + for (i = 0; i < nclass; ++i) + { + int ctreeoffset = k * nclass + idx * nclass * maxTreeSize; + if (classpop[i + ctreeoffset] > toppop) + { + nodeclass[treeoffset1 + k] = i+1; + toppop = classpop[i + ctreeoffset]; + } + //Break ties at random + if (classpop[i + ctreeoffset] == toppop) + { + ++ntie; + if ((curand(&state) % ntie) == 0) + { + nodeclass[treeoffset1 + k] = i+1; + toppop = classpop[i + ctreeoffset]; + } + } + } + } + } + + //ndbigtree[idx] = idx; + + } + +} +""" + +# input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\membraneDetectionECSx4ds2\\' +# input_image_suffix = '_train.png' +# input_features_suffix = '_rhoanafeatures.hdf5' +# output_path = 'D:\\dev\\Rhoana\\classifierTraining\\membraneDetectionECSx4ds2\\rhoana_forest.hdf5' + +#input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\' +#input_image_suffix = '_labeled_update.tif' +#input_features_suffix = '.hdf5' +#output_path = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\rhoana_forest_3class.hdf5' + +input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2_auto1\\' +input_image_suffix = '_labeled_update.tif' +input_features_suffix = '_autoencoder_features.h5' +output_path = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2_auto1\\auto1_forest_3class.hdf5' + +# input_image_folder = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\' +# input_image_suffix = 'Cyto.png' +# input_features_suffix = 'Cyto_rhoana_features.h5' +# output_path = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\rhoana_forest_2class.h5' + +# input_image_folder = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\' +# input_image_suffix = 'Cytods2.png' +# input_features_suffix = 'Cytods2_rhoana_features.h5' +# output_path = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\rhoana_forest_2classds2.h5' + +# input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\' +# input_image_suffix = '_training.png' +# input_features_suffix = '_autoencoder_nosig_features.h5' +# output_path = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\autoencoder_nosig_forest_2class.h5' +# #input_features_suffix = '_autoencoder_features.h5' +# #output_path = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\autoencoder_forest_2class.h5' +# #input_features_suffix = '_rhoana_features.h5' +# #output_path = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\rhoana_forest_2class.h5' + + +# Prep the gpu function +gpu_train = nvcc.SourceModule(gpu_randomforest_train_source, no_extern_c=True).get_function('trainKernel') + +# Load training data +files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) ) + +# 2 Class +#class_colors = [[255,0,0], [0,255,0]] +#class_colors = [[255,85,255], [255,255,0]] + +# 3 Class +#class_colors = [[255,0,0], [0,255,0], [0,0,255]] +#class_colors = [[255,85,255], [255,255,0], [0,255,255]] + +#class_colors = [0, 1] +class_colors = [0, 1, 2] + +nclass = len(class_colors) + +training_x = np.zeros((0,0), dtype=np.float32) +training_y = np.zeros((0,1), dtype=np.int32) + +print 'Found {0} training images.'.format(len(files)) + +# Loop through all images +for file in files: + training_image = mahotas.imread(file) + + for classi in range(nclass): + + this_color = class_colors[classi] + + # Find pixels for this class + # RGB (rounded) + class_indices = np.nonzero(np.logical_and( + training_image[:,:,this_color] > training_image[:,:,(this_color + 1) % 3], + training_image[:,:,this_color] > training_image[:,:,(this_color + 2) % 3])) + + # Specific colors + # class_indices = np.nonzero(np.logical_and( + # training_image[:,:,0] == this_color[0], + # training_image[:,:,1] == this_color[1], + # training_image[:,:,2] == this_color[2])) + + # Add features to x and classes to y + + training_y = np.concatenate((training_y, np.ones((len(class_indices[0]), 1), dtype=np.int32) * (classi + 1))) + + # Load the features + f = h5py.File(file.replace(input_image_suffix, input_features_suffix), 'r') + + nfeatures = len(f.keys()) + train_features = np.zeros((nfeatures, len(class_indices[0])), dtype=np.float32) + + for i,k in enumerate(f.keys()): + feature = f[k][...] + train_features[i,:] = feature[class_indices[0], class_indices[1]] + + f.close() + + if training_x.size > 0: + training_x = np.concatenate((training_x, train_features), axis=1) + else: + training_x = train_features + +for classi in range(nclass): + print 'Class {0}: {1} training pixels.'.format(classi, np.sum(training_y == classi + 1)) + +# Train on GPU +ntree = np.int32(512) +mtry = np.int32(np.floor(np.sqrt(training_x.shape[0]))) +#nsamples = np.ones((1,nclass), dtype=np.int32) * (training_x.shape[1] / nclass) +nsamples = np.ones((1,nclass), dtype=np.int32) * 1000 +classweights = np.ones((1,nclass), dtype=np.float32) + +# Sanity check +assert(training_x.shape[1] == training_y.shape[0]) + +# Random number seeds +seed = np.int64(42) +sequencestart = np.int64(43) + +samplefrom = np.zeros((nclass), dtype=np.int32) +maxTreeSize = np.int32(2 * np.sum(nsamples) + 1) +nodeStopSize = np.int32(1) + +for classi in range(nclass): + samplefrom[classi] = np.sum(training_y == (classi + 1)) + +maxnsamples = np.max(nsamples) +classindex = -1 * np.ones((np.max(samplefrom) * nclass), dtype=np.int32) + +cioffset = 0 +for classi in range(nclass): + classindex[cioffset:cioffset + samplefrom[classi]] = np.nonzero(training_y == (classi + 1))[0] + cioffset = cioffset + samplefrom[classi] + +bagmem = -1 * np.ones((ntree, maxnsamples * nclass), dtype=np.int32) +d_bagspace = gpuarray.to_gpu(bagmem) +d_tempbag = gpuarray.to_gpu(bagmem) +bagmem = None + +d_treemap = gpuarray.zeros((long(ntree * 2), long(maxTreeSize)), np.int32) +d_nodestatus = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_xbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32) +#d_nbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +#d_bestgini = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32) +d_bestvar = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_nodeclass = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_ndbigtree = gpuarray.zeros((long(ntree), 1), np.int32) +d_nodestart = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_nodepop = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_classpop = gpuarray.zeros((long(ntree), long(maxTreeSize*nclass)), np.int32) +d_classweights = gpuarray.to_gpu(classweights) +d_weight_left = gpuarray.zeros((long(ntree), long(nclass)), np.int32) +d_weight_right = gpuarray.zeros((long(ntree), long(nclass)), np.int32) +d_dimtemp = gpuarray.zeros((long(ntree), long(training_x.shape[0])), np.int32) + +d_baggedx = gpuarray.zeros((long(np.sum(nsamples)*training_x.shape[0]), long(ntree)), np.float32) +d_baggedclass = gpuarray.zeros((long(ntree), long(np.sum(nsamples))), np.int32) + +d_training_x = gpuarray.to_gpu(training_x) +d_training_y = gpuarray.to_gpu(training_y) +d_classindex = gpuarray.to_gpu(classindex) +d_nsamples = gpuarray.to_gpu(nsamples) +d_samplefrom = gpuarray.to_gpu(samplefrom) + +threadsPerBlock = 32 +block = (32, 1, 1) +grid = (int(ntree / block[0] + 1), 1) + +gpu_train(d_training_x, np.int32(training_x.shape[1]), np.int32(training_x.shape[0]), np.int32(nclass), + d_training_y, d_classindex, d_nsamples, d_samplefrom, + np.int32(maxnsamples), seed, sequencestart, np.int32(ntree), np.int32(maxTreeSize), np.int32(mtry), np.int32(nodeStopSize), + d_treemap, d_nodestatus, d_xbestsplit, + d_bestvar, d_nodeclass, d_ndbigtree, + d_nodestart, d_nodepop, + d_classpop, d_classweights, + d_weight_left, d_weight_right, + d_dimtemp, d_bagspace, d_tempbag, d_baggedx, d_baggedclass, + block=block, grid=grid) + +treemap = d_treemap.get() +nodestatus = d_nodestatus.get() +xbestsplit = d_xbestsplit.get() +bestvar = d_bestvar.get() +nodeclass = d_nodeclass.get() +ndbigtree = d_ndbigtree.get() + +# Save results +out_hdf5 = h5py.File(output_path, 'w') +out_hdf5['/forest/treemap'] = treemap +out_hdf5['/forest/nodestatus'] = nodestatus +out_hdf5['/forest/xbestsplit'] = xbestsplit +out_hdf5['/forest/bestvar'] = bestvar +out_hdf5['/forest/nodeclass'] = nodeclass +out_hdf5['/forest/ndbigtree'] = ndbigtree + +out_hdf5['/forest/nrnodes'] = maxTreeSize +out_hdf5['/forest/ntree'] = ntree +out_hdf5['/forest/nclass'] = nclass +out_hdf5['/forest/classweights'] = classweights +out_hdf5['/forest/mtry'] = mtry + +out_hdf5.close() + + diff --git a/ClassifyMembranes/train_gpu_randomforest_cp7.py b/ClassifyMembranes/train_gpu_randomforest_cp7.py new file mode 100644 index 0000000..e78e27f --- /dev/null +++ b/ClassifyMembranes/train_gpu_randomforest_cp7.py @@ -0,0 +1,645 @@ +############################################################ +# GPU Implementation of Random Forest Classifier - Training +# v0.1 +# Seymour Knowles-Barley +############################################################ +# Based on c code from: +# http://code.google.com/p/randomforest-matlab/ +# License: GPLv2 +############################################################ + +import numpy as np +import sys +import h5py +import glob +import mahotas +import subprocess +import os + +import pycuda.autoinit +import pycuda.driver as cu +import pycuda.compiler as nvcc +import pycuda.gpuarray as gpuarray + +gpu_randomforest_train_source = """ +#include "curand_kernel.h" + +#define NODE_TERMINAL -1 +#define NODE_TOSPLIT -2 +#define NODE_INTERIOR -3 + +__device__ void movedata() { +} + +__device__ void sampledata(const int nclass, const int* nsamples, const int* samplefrom, + const int maxnsamples, int* bagstart, curandState_t *randstate) +{ + //Select random samples + int iclass, isamp; + for (iclass=0; iclass < nclass; ++iclass) { + for (isamp=0; isamp < nsamples[iclass]; ++isamp) { + bagstart[isamp + iclass*maxnsamples] = curand(randstate) % samplefrom[iclass]; + } + } +} + +__device__ void sortbagbyx( + const float *baggedxstart, int totsamples, int mdim, int featurei, int *bagstart, int ndstart, int ndend, int *tempbagstart) +{ + //Sort elements of bagstart (from ndstart to ndend) according to x values + //Write results into bagstart + int length = ndend-ndstart+1; + if (length == 1) + { + return; + } + int xstart = featurei * totsamples; + int *inbag = bagstart; + int *outbag = tempbagstart; + + //For-loop merge sort + int i = 1; + int start1, start2, end1, end2, p1, p2, output; + while (i < length) + { + + for (start1 = ndstart; start1 <= ndend; start1 += i*2) + { + end1 = start1 + i - 1; + start2 = start1 + i; + end2 = start2 + i - 1; + p1 = start1; p2 = start2; + output = start1; + while (p1 <= end1 && p1 <= ndend && p2 <= end2 && p2 <= ndend && output <= ndend) + { + if (baggedxstart[xstart + inbag[p1]] < baggedxstart[xstart + inbag[p2]]) + { + outbag[output] = inbag[p1]; + ++p1; + } + else + { + outbag[output] = inbag[p2]; + ++p2; + } + ++output; + } + while (p1 <= end1 && p1 <= ndend) + { + outbag[output] = inbag[p1]; + ++p1; + ++output; + } + while (p2 <= end2 && p2 <= ndend) + { + outbag[output] = inbag[p2]; + ++p2; + ++output; + } + } + + //swap for next run + if (inbag == bagstart) + { + inbag = tempbagstart; + outbag = bagstart; + } + else + { + inbag = bagstart; + outbag = tempbagstart; + } + + //Loop again with larger chunks + i *= 2; + + } + + //Copy output to bagstart (if necessary) + if (inbag == tempbagstart) + { + for (p1 = ndstart; p1 <= ndend; ++p1) + { + bagstart[p1] = tempbagstart[p1]; + } + } + +} + +__device__ void findBestSplit( + const float *baggedxstart, const int *baggedclassstart, int mdim, int nclass, int *bagstart, + int totsamples, int k, int ndstart, int ndend, int *ndendl, + int *msplit, float *gini_score, float *best_split, int *best_split_index, bool *isTerminal, + int mtry, int idx, int maxTreeSize, int *classpop, float* classweights, + curandState_t *randstate, + int *wlstart, int *wrstart, int *dimtempstart, int *tempbagstart) +{ + //Compute initial values of numerator and denominator of Gini + float gini_n = 0.0; + float gini_d = 0.0; + float gini_rightn, gini_rightd, gini_leftn, gini_leftd; + int ctreestart = k * nclass + nclass * idx * maxTreeSize; + int i; + for (i = 0; i < nclass; ++i) + { + gini_n += classpop[i + ctreestart] * classpop[i + ctreestart]; + gini_d += classpop[i + ctreestart]; + } + float gini_crit0 = gini_n / gini_d; + + //start main loop through variables to find best split + float gini_critmax = -1.0e25; + float crit; + int trynum, featurei; + int maxfeature = mdim; + + for (i = 0; i < mdim; ++i) + { + dimtempstart[i] = i; + } + + *msplit = -1; + + //for (trynum = 0; trynum < 1; ++trynum) + for (trynum = 0; trynum < mtry && trynum < mdim; ++trynum) + { + //Choose a random feature + i = curand(randstate) % maxfeature; + featurei = dimtempstart[i]; + dimtempstart[i] = dimtempstart[maxfeature-1]; + dimtempstart[maxfeature-1] = featurei; + --maxfeature; + + //Sort according to this feature + sortbagbyx(baggedxstart, totsamples, mdim, featurei, bagstart, ndstart, ndend, tempbagstart); + + //Split on numerical predictor featurei + gini_rightn = gini_n; + gini_rightd = gini_d; + gini_leftn = 0; + gini_leftd = 0; + for (i = 0; i < nclass; ++i) + { + wrstart[i] = classpop[i + ctreestart]; + wlstart[i] = 0; + } + int splitpoint; + int splitxi; + float split_weight, thisx, nextx; + int split_class; + int ntie = 1; + //Loop through all possible split points + for (splitpoint = ndstart; splitpoint <= ndend-1; ++splitpoint) + { + //Get split details + splitxi = bagstart[splitpoint]; + //Determine class based on index and nsamples vector + split_class = baggedclassstart[splitxi]-1; + split_weight = classweights[split_class]; + //Update neumerator and demominator + gini_leftn += split_weight * (2 * wlstart[split_class] + split_weight); + gini_rightn += split_weight * (-2 * wrstart[split_class] + split_weight); + gini_leftd += split_weight; + gini_rightd -= split_weight; + wlstart[split_class] += split_weight; + wrstart[split_class] -= split_weight; + + //Check if the next value is the same (no point splitting) + thisx = baggedxstart[splitxi + totsamples * featurei]; + nextx = baggedxstart[bagstart[splitpoint+1] + totsamples * featurei]; + if (thisx != nextx) + { + //Check if either node is empty (or very small to allow for float errors) + if (gini_rightd > 1.0e-5 && gini_leftd > 1.0e-5) + { + //Check the split + crit = (gini_leftn / gini_leftd) + (gini_rightn / gini_rightd); + if (crit > gini_critmax) + { + *best_split = (thisx + nextx) / 2; + *best_split_index = splitpoint; + gini_critmax = crit; + *msplit = featurei; + *ndendl = splitpoint; + ntie = 1; + } + else if (crit == gini_critmax) + { + ++ntie; + //Break ties at random + if ((curand(randstate) % ntie) == 0) + { + *best_split = (thisx + nextx) / 2; + *best_split_index = splitpoint; + gini_critmax = crit; + *msplit = featurei; + *ndendl = splitpoint; + } + } + } + } + } // end splitpoint for + } // end trynum for + + if (gini_critmax < -1.0e10 || *msplit == -1) + { + //We could not find a suitable split - mark as a terminal node + *isTerminal = true; + } + else if (*msplit != featurei) + { + //Resort for msplit (if necessary) + sortbagbyx(baggedxstart, totsamples, mdim, *msplit, bagstart, ndstart, ndend, tempbagstart); + } + *gini_score = gini_critmax - gini_crit0; + +} + +extern "C" __global__ void trainKernel( + const float *x, int n, int mdim, int nclass, + const int *classes, const int *classindex, + const int *nsamples, const int *samplefrom, + int maxnsamples, + unsigned long long seed, unsigned long long sequencestart, + int ntree, int maxTreeSize, int mtry, int nodeStopSize, + int *treemap, int *nodestatus, float *xbestsplit, + int *bestvar, int *nodeclass, int *ndbigtree, + int *nodestart, int *nodepop, + int *classpop, float *classweights, + int *weight_left, int *weight_right, + int *dimtemp, int *bagspace, int *tempbag, float *baggedx, int *baggedclass) +{ +// Optional arguments for debug (place after xbestsplit): int *nbestsplit, float *bestgini, + int idx = threadIdx.x + blockDim.x * blockIdx.x; + + //Make sure we don't overrun + if (idx < ntree) { + //Init random number generators (one for each thread) + curandState_t state; + curand_init(seed, sequencestart + idx, 0, &state); + + int i,j,k,cioffset,bioffset; + + int totsamples = 0; + for (i = 0; i < nclass; ++i){ + totsamples += nsamples[i]; + } + + //Choose random samples for all classes + int *bagstart = bagspace + idx * nclass * maxnsamples; + int *tempbagstart = tempbag + idx * nclass * maxnsamples; + float *baggedxstart = baggedx + idx * mdim * totsamples; + int *baggedclassstart = baggedclass + idx * totsamples; + //TODO: offset weightleft, weightright and dimtemp ! + sampledata(nclass, nsamples, samplefrom, maxnsamples, bagstart, &state); + + //Remove gaps and index into x (instead of into class) + k = 0; + cioffset = 0; + bioffset = 0; + for (i = 0; i < nclass; ++i){ + for (j = 0; j < nsamples[i]; ++j) { + //Move memory into local block? + int xindex = classindex[bagstart[j + i * maxnsamples] + cioffset]; + int dimindex; + for (dimindex = 0; dimindex < mdim; ++dimindex){ + baggedxstart[j + bioffset + totsamples * dimindex] = x[xindex + n * dimindex]; + } + baggedclassstart[j + bioffset] = classes[xindex]; + bagstart[k] = j + bioffset; + ++k; + } + cioffset += samplefrom[i]; + bioffset += nsamples[i]; + classpop[i + idx * nclass * maxTreeSize] = nsamples[i]; + } + + //Wipe other values + for (;k < nclass * maxnsamples; ++k) { + bagstart[k] = -1; + } + + int ndstart, ndend, ndendl; + int msplit, best_split_index; + float best_split, gini_score; + + //Repeat findbestsplit until the tree is complete + int ncur = 0; + int treeoffset1 = idx * maxTreeSize; + int treeOffset2 = idx * 2 * maxTreeSize; + nodestart[treeoffset1] = 0; + nodepop[treeoffset1] = totsamples; + nodestatus[treeoffset1] = NODE_TOSPLIT; + + for (k = 0; k < maxTreeSize-2; ++k) { + //Check for end of tree + if (k > ncur || ncur >= maxTreeSize - 2) break; + //Skip nodes we don't need to split + if (nodestatus[treeoffset1+k] != NODE_TOSPLIT) continue; + + /* initialize for next call to findbestsplit */ + ndstart = nodestart[treeoffset1 + k]; + ndend = ndstart + nodepop[treeoffset1 + k] - 1; + bool isTerminal = false; + gini_score = 0.0; + best_split_index = -1; + + findBestSplit(baggedxstart, baggedclassstart, mdim, nclass, bagstart, totsamples, k, ndstart, ndend, &ndendl, + &msplit, &gini_score, &best_split, &best_split_index, &isTerminal, mtry, idx, maxTreeSize, classpop, classweights, + &state, weight_left + nclass * idx, weight_right + nclass * idx, dimtemp + mdim * idx, tempbagstart); + + if (isTerminal) { + /* Node is terminal: Mark it as such and move on to the next. */ + nodestatus[k] = NODE_TERMINAL; + //bestvar[treeoffset1 + k] = 0; + //xbestsplit[treeoffset1 + k] = 0; + continue; + } + + // this is a split node - prepare for next round + bestvar[treeoffset1 + k] = msplit + 1; + //bestgini[treeoffset1 + k] = gini_score; + xbestsplit[treeoffset1 + k] = best_split; + //nbestsplit[treeoffset1 + k] = best_split_index; + nodestatus[treeoffset1 + k] = NODE_INTERIOR; + //varUsed[msplit - 1] = 1; + //tgini[msplit - 1] += decsplit; + + int leftk = ncur + 1; + int rightk = ncur + 2; + nodepop[treeoffset1 + leftk] = ndendl - ndstart + 1; + nodepop[treeoffset1 + rightk] = ndend - ndendl; + nodestart[treeoffset1 + leftk] = ndstart; + nodestart[treeoffset1 + rightk] = ndendl + 1; + + // Check for terminal node conditions + nodestatus[treeoffset1 + leftk] = NODE_TOSPLIT; + if (nodepop[treeoffset1 + leftk] <= nodeStopSize) { + nodestatus[treeoffset1 + leftk] = NODE_TERMINAL; + } + + nodestatus[treeoffset1 + rightk] = NODE_TOSPLIT; + if (nodepop[treeoffset1 + rightk] <= nodeStopSize) { + nodestatus[treeoffset1 + rightk] = NODE_TERMINAL; + } + + //Calculate class populations + int nodeclass = 0; + int ctreestart_left = leftk * nclass + idx * nclass * maxTreeSize; + int ctreestart_right = rightk * nclass + idx * nclass * maxTreeSize; + for (i = ndstart; i <= ndendl; ++i) { + nodeclass = baggedclassstart[bagstart[i]]-1; + classpop[nodeclass + ctreestart_left] += classweights[nodeclass]; + } + for (i = ndendl+1; i <= ndend; ++i) { + nodeclass = baggedclassstart[bagstart[i]]-1; + classpop[nodeclass + ctreestart_right] += classweights[nodeclass]; + } + + for(i = 0; i < nclass; ++i) + { + if (classpop[i + ctreestart_left] == nodepop[treeoffset1 + leftk]) + { + nodestatus[treeoffset1 + leftk] = NODE_TERMINAL; + } + if (classpop[i + ctreestart_right] == nodepop[treeoffset1 + rightk]) + { + nodestatus[treeoffset1 + rightk] = NODE_TERMINAL; + } + } + + //Update treemap offset (indexed from 1 rather than 0) + treemap[treeOffset2 + k*2] = ncur + 2; + treemap[treeOffset2 + 1 + k*2] = ncur + 3; + ncur += 2; + + } + + //Tidy up + //TODO: Check results - should not be necessary to go up to maxTreeSize + ndbigtree[idx] = ncur+1; + //ndbigtree[idx] = maxTreeSize; + for(k = maxTreeSize-1; k >= 0; --k) + { + //if (nodestatus[treeoffset1 + k] == 0) + // --ndbigtree[idx]; + if (nodestatus[treeoffset1 + k] == NODE_TOSPLIT) + nodestatus[treeoffset1 + k] = NODE_TERMINAL; + } + + //Calculate prediction for terminal nodes + for (k = 0; k < maxTreeSize; ++k) + { + treeoffset1 = idx * maxTreeSize; + if (nodestatus[treeoffset1 + k] == NODE_TERMINAL) + { + int toppop = 0; + int ntie = 1; + for (i = 0; i < nclass; ++i) + { + int ctreeoffset = k * nclass + idx * nclass * maxTreeSize; + if (classpop[i + ctreeoffset] > toppop) + { + nodeclass[treeoffset1 + k] = i+1; + toppop = classpop[i + ctreeoffset]; + } + //Break ties at random + if (classpop[i + ctreeoffset] == toppop) + { + ++ntie; + if ((curand(&state) % ntie) == 0) + { + nodeclass[treeoffset1 + k] = i+1; + toppop = classpop[i + ctreeoffset]; + } + } + } + } + } + + //ndbigtree[idx] = idx; + + } + +} +""" + +input_image_folder1 = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data' +raw_image_suffix = '.png' +input_image_suffix = '_train.png' +input_features_suffix = '_rhoana_features.h5' +output_path = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data\\rhoana_forest_cp7_2class.h5' + +features_prog = 'D:\\dev\\Rhoana\\rhoana\\ClassifyMembranes\\x64\\Release\\compute_features.exe' + +# Prep the gpu function +gpu_train = nvcc.SourceModule(gpu_randomforest_train_source, no_extern_c=True).get_function('trainKernel') + +# Load training data +files = sorted( glob.glob( input_image_folder1 + '\\*' + input_image_suffix ) ) +#files = files + sorted( glob.glob( input_image_folder2 + '\\*' + input_image_suffix ) ) + +#2 Class +class_colors = [[255,0,0], [0,255,0]] +#class_colors = [[255,85,255], [255,255,0]] + +# 3 Class +#class_colors = [[255,0,0], [0,255,0], [0,0,255]] +#class_colors = [[255,85,255], [255,255,0], [0,255,255]] + +# class_colors = [0, 1] + +nclass = len(class_colors) + +training_x = np.zeros((0,0), dtype=np.float32) +training_y = np.zeros((0,1), dtype=np.int32) + +print 'Found {0} training images.'.format(len(files)) + +# Loop through all images +for file in files: + training_image = mahotas.imread(file) + + # Load the features + image_file = file.replace(input_image_suffix, raw_image_suffix) + features_file = file.replace(input_image_suffix, input_features_suffix) + + if not os.path.exists(features_file): + print "Computing features:", features_prog, image_file, features_file + subprocess.check_call([features_prog, image_file, features_file], env=os.environ) + + f = h5py.File(features_file, 'r') + + nfeatures = len(f.keys()) + + for classi in range(nclass): + + this_color = class_colors[classi] + + # Find pixels for this class + # class_indices = np.nonzero(np.logical_and( + # training_image[:,:,this_color] > training_image[:,:,(this_color + 1) % 3], + # training_image[:,:,this_color] > training_image[:,:,(this_color + 2) % 3])) + + class_indices = np.nonzero(np.logical_and( + training_image[:,:,0] == this_color[0], + training_image[:,:,1] == this_color[1], + training_image[:,:,2] == this_color[2])) + + # Add features to x and classes to y + + training_y = np.concatenate((training_y, np.ones((len(class_indices[0]), 1), dtype=np.int32) * (classi + 1))) + + train_features = np.zeros((nfeatures, len(class_indices[0])), dtype=np.float32) + + for i,k in enumerate(f.keys()): + feature = f[k][...] + train_features[i,:] = feature[class_indices[0], class_indices[1]] + + if training_x.size > 0: + training_x = np.concatenate((training_x, train_features), axis=1) + else: + training_x = train_features + + f.close() + +for classi in range(nclass): + print 'Class {0}: {1} training pixels.'.format(classi, np.sum(training_y == classi + 1)) + +# Train on GPU +ntree = np.int32(512) +mtry = np.int32(np.floor(np.sqrt(training_x.shape[0]))) +#nsamples = np.ones((1,nclass), dtype=np.int32) * (training_x.shape[1] / nclass) +nsamples = np.ones((1,nclass), dtype=np.int32) * 1000 +classweights = np.ones((1,nclass), dtype=np.float32) + +# Sanity check +assert(training_x.shape[1] == training_y.shape[0]) + +# Random number seeds +seed = np.int64(42) +sequencestart = np.int64(43) + +samplefrom = np.zeros((nclass), dtype=np.int32) +maxTreeSize = np.int32(2 * np.sum(nsamples) + 1) +nodeStopSize = np.int32(1) + +for classi in range(nclass): + samplefrom[classi] = np.sum(training_y == (classi + 1)) + +maxnsamples = np.max(nsamples) +classindex = -1 * np.ones((np.max(samplefrom) * nclass), dtype=np.int32) + +cioffset = 0 +for classi in range(nclass): + classindex[cioffset:cioffset + samplefrom[classi]] = np.nonzero(training_y == (classi + 1))[0] + cioffset = cioffset + samplefrom[classi] + +bagmem = -1 * np.ones((ntree, maxnsamples * nclass), dtype=np.int32) +d_bagspace = gpuarray.to_gpu(bagmem) +d_tempbag = gpuarray.to_gpu(bagmem) +bagmem = None + +d_treemap = gpuarray.zeros((long(ntree * 2), long(maxTreeSize)), np.int32) +d_nodestatus = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_xbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32) +#d_nbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +#d_bestgini = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32) +d_bestvar = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_nodeclass = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_ndbigtree = gpuarray.zeros((long(ntree), 1), np.int32) +d_nodestart = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_nodepop = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_classpop = gpuarray.zeros((long(ntree), long(maxTreeSize*nclass)), np.int32) +d_classweights = gpuarray.to_gpu(classweights) +d_weight_left = gpuarray.zeros((long(ntree), long(nclass)), np.int32) +d_weight_right = gpuarray.zeros((long(ntree), long(nclass)), np.int32) +d_dimtemp = gpuarray.zeros((long(ntree), long(training_x.shape[0])), np.int32) + +d_baggedx = gpuarray.zeros((long(np.sum(nsamples)*training_x.shape[0]), long(ntree)), np.float32) +d_baggedclass = gpuarray.zeros((long(ntree), long(np.sum(nsamples))), np.int32) + +d_training_x = gpuarray.to_gpu(training_x) +d_training_y = gpuarray.to_gpu(training_y) +d_classindex = gpuarray.to_gpu(classindex) +d_nsamples = gpuarray.to_gpu(nsamples) +d_samplefrom = gpuarray.to_gpu(samplefrom) + +threadsPerBlock = 32 +block = (32, 1, 1) +grid = (int(ntree / block[0] + 1), 1) + +gpu_train(d_training_x, np.int32(training_x.shape[1]), np.int32(training_x.shape[0]), np.int32(nclass), + d_training_y, d_classindex, d_nsamples, d_samplefrom, + np.int32(maxnsamples), seed, sequencestart, np.int32(ntree), np.int32(maxTreeSize), np.int32(mtry), np.int32(nodeStopSize), + d_treemap, d_nodestatus, d_xbestsplit, + d_bestvar, d_nodeclass, d_ndbigtree, + d_nodestart, d_nodepop, + d_classpop, d_classweights, + d_weight_left, d_weight_right, + d_dimtemp, d_bagspace, d_tempbag, d_baggedx, d_baggedclass, + block=block, grid=grid) + +treemap = d_treemap.get() +nodestatus = d_nodestatus.get() +xbestsplit = d_xbestsplit.get() +bestvar = d_bestvar.get() +nodeclass = d_nodeclass.get() +ndbigtree = d_ndbigtree.get() + +# Save results +out_hdf5 = h5py.File(output_path, 'w') +out_hdf5['/forest/treemap'] = treemap +out_hdf5['/forest/nodestatus'] = nodestatus +out_hdf5['/forest/xbestsplit'] = xbestsplit +out_hdf5['/forest/bestvar'] = bestvar +out_hdf5['/forest/nodeclass'] = nodeclass +out_hdf5['/forest/ndbigtree'] = ndbigtree + +out_hdf5['/forest/nrnodes'] = maxTreeSize +out_hdf5['/forest/ntree'] = ntree +out_hdf5['/forest/nclass'] = nclass +out_hdf5['/forest/classweights'] = classweights +out_hdf5['/forest/mtry'] = mtry + +out_hdf5.close() diff --git a/ClassifyMembranes/train_gpu_randomforest_cp7_ds2.py b/ClassifyMembranes/train_gpu_randomforest_cp7_ds2.py new file mode 100644 index 0000000..67311cd --- /dev/null +++ b/ClassifyMembranes/train_gpu_randomforest_cp7_ds2.py @@ -0,0 +1,658 @@ +############################################################ +# GPU Implementation of Random Forest Classifier - Training +# v0.1 +# Seymour Knowles-Barley +############################################################ +# Based on c code from: +# http://code.google.com/p/randomforest-matlab/ +# License: GPLv2 +############################################################ + +import numpy as np +import sys +import h5py +import glob +import mahotas +import subprocess +import os + +import pycuda.autoinit +import pycuda.driver as cu +import pycuda.compiler as nvcc +import pycuda.gpuarray as gpuarray + +gpu_randomforest_train_source = """ +#include "curand_kernel.h" + +#define NODE_TERMINAL -1 +#define NODE_TOSPLIT -2 +#define NODE_INTERIOR -3 + +__device__ void movedata() { +} + +__device__ void sampledata(const int nclass, const int* nsamples, const int* samplefrom, + const int maxnsamples, int* bagstart, curandState_t *randstate) +{ + //Select random samples + int iclass, isamp; + for (iclass=0; iclass < nclass; ++iclass) { + for (isamp=0; isamp < nsamples[iclass]; ++isamp) { + bagstart[isamp + iclass*maxnsamples] = curand(randstate) % samplefrom[iclass]; + } + } +} + +__device__ void sortbagbyx( + const float *baggedxstart, int totsamples, int mdim, int featurei, int *bagstart, int ndstart, int ndend, int *tempbagstart) +{ + //Sort elements of bagstart (from ndstart to ndend) according to x values + //Write results into bagstart + int length = ndend-ndstart+1; + if (length == 1) + { + return; + } + int xstart = featurei * totsamples; + int *inbag = bagstart; + int *outbag = tempbagstart; + + //For-loop merge sort + int i = 1; + int start1, start2, end1, end2, p1, p2, output; + while (i < length) + { + + for (start1 = ndstart; start1 <= ndend; start1 += i*2) + { + end1 = start1 + i - 1; + start2 = start1 + i; + end2 = start2 + i - 1; + p1 = start1; p2 = start2; + output = start1; + while (p1 <= end1 && p1 <= ndend && p2 <= end2 && p2 <= ndend && output <= ndend) + { + if (baggedxstart[xstart + inbag[p1]] < baggedxstart[xstart + inbag[p2]]) + { + outbag[output] = inbag[p1]; + ++p1; + } + else + { + outbag[output] = inbag[p2]; + ++p2; + } + ++output; + } + while (p1 <= end1 && p1 <= ndend) + { + outbag[output] = inbag[p1]; + ++p1; + ++output; + } + while (p2 <= end2 && p2 <= ndend) + { + outbag[output] = inbag[p2]; + ++p2; + ++output; + } + } + + //swap for next run + if (inbag == bagstart) + { + inbag = tempbagstart; + outbag = bagstart; + } + else + { + inbag = bagstart; + outbag = tempbagstart; + } + + //Loop again with larger chunks + i *= 2; + + } + + //Copy output to bagstart (if necessary) + if (inbag == tempbagstart) + { + for (p1 = ndstart; p1 <= ndend; ++p1) + { + bagstart[p1] = tempbagstart[p1]; + } + } + +} + +__device__ void findBestSplit( + const float *baggedxstart, const int *baggedclassstart, int mdim, int nclass, int *bagstart, + int totsamples, int k, int ndstart, int ndend, int *ndendl, + int *msplit, float *gini_score, float *best_split, int *best_split_index, bool *isTerminal, + int mtry, int idx, int maxTreeSize, int *classpop, float* classweights, + curandState_t *randstate, + int *wlstart, int *wrstart, int *dimtempstart, int *tempbagstart) +{ + //Compute initial values of numerator and denominator of Gini + float gini_n = 0.0; + float gini_d = 0.0; + float gini_rightn, gini_rightd, gini_leftn, gini_leftd; + int ctreestart = k * nclass + nclass * idx * maxTreeSize; + int i; + for (i = 0; i < nclass; ++i) + { + gini_n += classpop[i + ctreestart] * classpop[i + ctreestart]; + gini_d += classpop[i + ctreestart]; + } + float gini_crit0 = gini_n / gini_d; + + //start main loop through variables to find best split + float gini_critmax = -1.0e25; + float crit; + int trynum, featurei; + int maxfeature = mdim; + + for (i = 0; i < mdim; ++i) + { + dimtempstart[i] = i; + } + + *msplit = -1; + + //for (trynum = 0; trynum < 1; ++trynum) + for (trynum = 0; trynum < mtry && trynum < mdim; ++trynum) + { + //Choose a random feature + i = curand(randstate) % maxfeature; + featurei = dimtempstart[i]; + dimtempstart[i] = dimtempstart[maxfeature-1]; + dimtempstart[maxfeature-1] = featurei; + --maxfeature; + + //Sort according to this feature + sortbagbyx(baggedxstart, totsamples, mdim, featurei, bagstart, ndstart, ndend, tempbagstart); + + //Split on numerical predictor featurei + gini_rightn = gini_n; + gini_rightd = gini_d; + gini_leftn = 0; + gini_leftd = 0; + for (i = 0; i < nclass; ++i) + { + wrstart[i] = classpop[i + ctreestart]; + wlstart[i] = 0; + } + int splitpoint; + int splitxi; + float split_weight, thisx, nextx; + int split_class; + int ntie = 1; + //Loop through all possible split points + for (splitpoint = ndstart; splitpoint <= ndend-1; ++splitpoint) + { + //Get split details + splitxi = bagstart[splitpoint]; + //Determine class based on index and nsamples vector + split_class = baggedclassstart[splitxi]-1; + split_weight = classweights[split_class]; + //Update neumerator and demominator + gini_leftn += split_weight * (2 * wlstart[split_class] + split_weight); + gini_rightn += split_weight * (-2 * wrstart[split_class] + split_weight); + gini_leftd += split_weight; + gini_rightd -= split_weight; + wlstart[split_class] += split_weight; + wrstart[split_class] -= split_weight; + + //Check if the next value is the same (no point splitting) + thisx = baggedxstart[splitxi + totsamples * featurei]; + nextx = baggedxstart[bagstart[splitpoint+1] + totsamples * featurei]; + if (thisx != nextx) + { + //Check if either node is empty (or very small to allow for float errors) + if (gini_rightd > 1.0e-5 && gini_leftd > 1.0e-5) + { + //Check the split + crit = (gini_leftn / gini_leftd) + (gini_rightn / gini_rightd); + if (crit > gini_critmax) + { + *best_split = (thisx + nextx) / 2; + *best_split_index = splitpoint; + gini_critmax = crit; + *msplit = featurei; + *ndendl = splitpoint; + ntie = 1; + } + else if (crit == gini_critmax) + { + ++ntie; + //Break ties at random + if ((curand(randstate) % ntie) == 0) + { + *best_split = (thisx + nextx) / 2; + *best_split_index = splitpoint; + gini_critmax = crit; + *msplit = featurei; + *ndendl = splitpoint; + } + } + } + } + } // end splitpoint for + } // end trynum for + + if (gini_critmax < -1.0e10 || *msplit == -1) + { + //We could not find a suitable split - mark as a terminal node + *isTerminal = true; + } + else if (*msplit != featurei) + { + //Resort for msplit (if necessary) + sortbagbyx(baggedxstart, totsamples, mdim, *msplit, bagstart, ndstart, ndend, tempbagstart); + } + *gini_score = gini_critmax - gini_crit0; + +} + +extern "C" __global__ void trainKernel( + const float *x, int n, int mdim, int nclass, + const int *classes, const int *classindex, + const int *nsamples, const int *samplefrom, + int maxnsamples, + unsigned long long seed, unsigned long long sequencestart, + int ntree, int maxTreeSize, int mtry, int nodeStopSize, + int *treemap, int *nodestatus, float *xbestsplit, + int *bestvar, int *nodeclass, int *ndbigtree, + int *nodestart, int *nodepop, + int *classpop, float *classweights, + int *weight_left, int *weight_right, + int *dimtemp, int *bagspace, int *tempbag, float *baggedx, int *baggedclass) +{ +// Optional arguments for debug (place after xbestsplit): int *nbestsplit, float *bestgini, + int idx = threadIdx.x + blockDim.x * blockIdx.x; + + //Make sure we don't overrun + if (idx < ntree) { + //Init random number generators (one for each thread) + curandState_t state; + curand_init(seed, sequencestart + idx, 0, &state); + + int i,j,k,cioffset,bioffset; + + int totsamples = 0; + for (i = 0; i < nclass; ++i){ + totsamples += nsamples[i]; + } + + //Choose random samples for all classes + int *bagstart = bagspace + idx * nclass * maxnsamples; + int *tempbagstart = tempbag + idx * nclass * maxnsamples; + float *baggedxstart = baggedx + idx * mdim * totsamples; + int *baggedclassstart = baggedclass + idx * totsamples; + //TODO: offset weightleft, weightright and dimtemp ! + sampledata(nclass, nsamples, samplefrom, maxnsamples, bagstart, &state); + + //Remove gaps and index into x (instead of into class) + k = 0; + cioffset = 0; + bioffset = 0; + for (i = 0; i < nclass; ++i){ + for (j = 0; j < nsamples[i]; ++j) { + //Move memory into local block? + int xindex = classindex[bagstart[j + i * maxnsamples] + cioffset]; + int dimindex; + for (dimindex = 0; dimindex < mdim; ++dimindex){ + baggedxstart[j + bioffset + totsamples * dimindex] = x[xindex + n * dimindex]; + } + baggedclassstart[j + bioffset] = classes[xindex]; + bagstart[k] = j + bioffset; + ++k; + } + cioffset += samplefrom[i]; + bioffset += nsamples[i]; + classpop[i + idx * nclass * maxTreeSize] = nsamples[i]; + } + + //Wipe other values + for (;k < nclass * maxnsamples; ++k) { + bagstart[k] = -1; + } + + int ndstart, ndend, ndendl; + int msplit, best_split_index; + float best_split, gini_score; + + //Repeat findbestsplit until the tree is complete + int ncur = 0; + int treeoffset1 = idx * maxTreeSize; + int treeOffset2 = idx * 2 * maxTreeSize; + nodestart[treeoffset1] = 0; + nodepop[treeoffset1] = totsamples; + nodestatus[treeoffset1] = NODE_TOSPLIT; + + for (k = 0; k < maxTreeSize-2; ++k) { + //Check for end of tree + if (k > ncur || ncur >= maxTreeSize - 2) break; + //Skip nodes we don't need to split + if (nodestatus[treeoffset1+k] != NODE_TOSPLIT) continue; + + /* initialize for next call to findbestsplit */ + ndstart = nodestart[treeoffset1 + k]; + ndend = ndstart + nodepop[treeoffset1 + k] - 1; + bool isTerminal = false; + gini_score = 0.0; + best_split_index = -1; + + findBestSplit(baggedxstart, baggedclassstart, mdim, nclass, bagstart, totsamples, k, ndstart, ndend, &ndendl, + &msplit, &gini_score, &best_split, &best_split_index, &isTerminal, mtry, idx, maxTreeSize, classpop, classweights, + &state, weight_left + nclass * idx, weight_right + nclass * idx, dimtemp + mdim * idx, tempbagstart); + + if (isTerminal) { + /* Node is terminal: Mark it as such and move on to the next. */ + nodestatus[k] = NODE_TERMINAL; + //bestvar[treeoffset1 + k] = 0; + //xbestsplit[treeoffset1 + k] = 0; + continue; + } + + // this is a split node - prepare for next round + bestvar[treeoffset1 + k] = msplit + 1; + //bestgini[treeoffset1 + k] = gini_score; + xbestsplit[treeoffset1 + k] = best_split; + //nbestsplit[treeoffset1 + k] = best_split_index; + nodestatus[treeoffset1 + k] = NODE_INTERIOR; + //varUsed[msplit - 1] = 1; + //tgini[msplit - 1] += decsplit; + + int leftk = ncur + 1; + int rightk = ncur + 2; + nodepop[treeoffset1 + leftk] = ndendl - ndstart + 1; + nodepop[treeoffset1 + rightk] = ndend - ndendl; + nodestart[treeoffset1 + leftk] = ndstart; + nodestart[treeoffset1 + rightk] = ndendl + 1; + + // Check for terminal node conditions + nodestatus[treeoffset1 + leftk] = NODE_TOSPLIT; + if (nodepop[treeoffset1 + leftk] <= nodeStopSize) { + nodestatus[treeoffset1 + leftk] = NODE_TERMINAL; + } + + nodestatus[treeoffset1 + rightk] = NODE_TOSPLIT; + if (nodepop[treeoffset1 + rightk] <= nodeStopSize) { + nodestatus[treeoffset1 + rightk] = NODE_TERMINAL; + } + + //Calculate class populations + int nodeclass = 0; + int ctreestart_left = leftk * nclass + idx * nclass * maxTreeSize; + int ctreestart_right = rightk * nclass + idx * nclass * maxTreeSize; + for (i = ndstart; i <= ndendl; ++i) { + nodeclass = baggedclassstart[bagstart[i]]-1; + classpop[nodeclass + ctreestart_left] += classweights[nodeclass]; + } + for (i = ndendl+1; i <= ndend; ++i) { + nodeclass = baggedclassstart[bagstart[i]]-1; + classpop[nodeclass + ctreestart_right] += classweights[nodeclass]; + } + + for(i = 0; i < nclass; ++i) + { + if (classpop[i + ctreestart_left] == nodepop[treeoffset1 + leftk]) + { + nodestatus[treeoffset1 + leftk] = NODE_TERMINAL; + } + if (classpop[i + ctreestart_right] == nodepop[treeoffset1 + rightk]) + { + nodestatus[treeoffset1 + rightk] = NODE_TERMINAL; + } + } + + //Update treemap offset (indexed from 1 rather than 0) + treemap[treeOffset2 + k*2] = ncur + 2; + treemap[treeOffset2 + 1 + k*2] = ncur + 3; + ncur += 2; + + } + + //Tidy up + //TODO: Check results - should not be necessary to go up to maxTreeSize + ndbigtree[idx] = ncur+1; + //ndbigtree[idx] = maxTreeSize; + for(k = maxTreeSize-1; k >= 0; --k) + { + //if (nodestatus[treeoffset1 + k] == 0) + // --ndbigtree[idx]; + if (nodestatus[treeoffset1 + k] == NODE_TOSPLIT) + nodestatus[treeoffset1 + k] = NODE_TERMINAL; + } + + //Calculate prediction for terminal nodes + for (k = 0; k < maxTreeSize; ++k) + { + treeoffset1 = idx * maxTreeSize; + if (nodestatus[treeoffset1 + k] == NODE_TERMINAL) + { + int toppop = 0; + int ntie = 1; + for (i = 0; i < nclass; ++i) + { + int ctreeoffset = k * nclass + idx * nclass * maxTreeSize; + if (classpop[i + ctreeoffset] > toppop) + { + nodeclass[treeoffset1 + k] = i+1; + toppop = classpop[i + ctreeoffset]; + } + //Break ties at random + if (classpop[i + ctreeoffset] == toppop) + { + ++ntie; + if ((curand(&state) % ntie) == 0) + { + nodeclass[treeoffset1 + k] = i+1; + toppop = classpop[i + ctreeoffset]; + } + } + } + } + } + + //ndbigtree[idx] = idx; + + } + +} +""" + +downsample_factor = 2 + +input_image_folder1 = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data' +ds_input_image_folder = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data\\input_images_ds{0}'.format(downsample_factor) + +raw_image_suffix = '.png' +input_image_suffix = '_train.png' +input_features_suffix = '_rhoana_features_ds{0}.h5'.format(downsample_factor) + +output_path = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data\\rhoana_forest_ds{0}_cp7_2class.h5'.format(downsample_factor) + +features_prog = 'D:\\dev\\Rhoana\\rhoana\\ClassifyMembranes\\x64\\Release\\compute_features.exe' + +# Prep the gpu function +gpu_train = nvcc.SourceModule(gpu_randomforest_train_source, no_extern_c=True).get_function('trainKernel') + +# Load training data +files = sorted( glob.glob( input_image_folder1 + '\\*' + input_image_suffix ) ) + +#2 Class +class_colors = [[255,0,0], [0,255,0]] +#class_colors = [[255,85,255], [255,255,0]] + +# 3 Class +#class_colors = [[255,0,0], [0,255,0], [0,0,255]] +#class_colors = [[255,85,255], [255,255,0], [0,255,255]] + +# class_colors = [0, 1] + +nclass = len(class_colors) + +training_x = np.zeros((0,0), dtype=np.float32) +training_y = np.zeros((0,1), dtype=np.int32) + +print 'Found {0} training images.'.format(len(files)) + +# Loop through all images +for file in files: + training_image = mahotas.imread(file) + + for classi in range(nclass): + + this_color = class_colors[classi] + + # Find pixels for this class + # class_indices = np.nonzero(np.logical_and( + # training_image[:,:,this_color] > training_image[:,:,(this_color + 1) % 3], + # training_image[:,:,this_color] > training_image[:,:,(this_color + 2) % 3])) + + class_indices = np.nonzero(np.logical_and( + training_image[:,:,0] == this_color[0], + training_image[:,:,1] == this_color[1], + training_image[:,:,2] == this_color[2])) + + if downsample_factor != 1: + class_indices = (class_indices[0] / downsample_factor, class_indices[1] / downsample_factor) + + # Add features to x and classes to y + + training_y = np.concatenate((training_y, np.ones((len(class_indices[0]), 1), dtype=np.int32) * (classi + 1))) + + # Load the features + image_file = file.replace(input_image_suffix, raw_image_suffix).replace(input_image_folder1, ds_input_image_folder) + if not os.path.exists(image_file): + full_image = mahotas.imread(file.replace(input_image_suffix, raw_image_suffix))[:,:,0] + ds_image = mahotas.imresize(full_image, 1.0 / downsample_factor) + mahotas.imsave(image_file, np.uint8(ds_image)) + full_image = None + ds_image = None + + features_file = file.replace(input_image_suffix, input_features_suffix) + + if not os.path.exists(features_file): + print "Computing features:", features_prog, image_file, features_file + subprocess.check_call([features_prog, image_file, features_file], env=os.environ) + + f = h5py.File(features_file, 'r') + + nfeatures = len(f.keys()) + train_features = np.zeros((nfeatures, len(class_indices[0])), dtype=np.float32) + + for i,k in enumerate(f.keys()): + feature = f[k][...] + train_features[i,:] = feature[class_indices[0], class_indices[1]] + + f.close() + + if training_x.size > 0: + training_x = np.concatenate((training_x, train_features), axis=1) + else: + training_x = train_features + +for classi in range(nclass): + print 'Class {0}: {1} training pixels.'.format(classi, np.sum(training_y == classi + 1)) + +# Train on GPU +ntree = np.int32(512) +mtry = np.int32(np.floor(np.sqrt(training_x.shape[0]))) +#nsamples = np.ones((1,nclass), dtype=np.int32) * (training_x.shape[1] / nclass) +nsamples = np.ones((1,nclass), dtype=np.int32) * 1000 +classweights = np.ones((1,nclass), dtype=np.float32) + +# Sanity check +assert(training_x.shape[1] == training_y.shape[0]) + +# Random number seeds +seed = np.int64(42) +sequencestart = np.int64(43) + +samplefrom = np.zeros((nclass), dtype=np.int32) +maxTreeSize = np.int32(2 * np.sum(nsamples) + 1) +nodeStopSize = np.int32(1) + +for classi in range(nclass): + samplefrom[classi] = np.sum(training_y == (classi + 1)) + +maxnsamples = np.max(nsamples) +classindex = -1 * np.ones((np.max(samplefrom) * nclass), dtype=np.int32) + +cioffset = 0 +for classi in range(nclass): + classindex[cioffset:cioffset + samplefrom[classi]] = np.nonzero(training_y == (classi + 1))[0] + cioffset = cioffset + samplefrom[classi] + +bagmem = -1 * np.ones((ntree, maxnsamples * nclass), dtype=np.int32) +d_bagspace = gpuarray.to_gpu(bagmem) +d_tempbag = gpuarray.to_gpu(bagmem) +bagmem = None + +d_treemap = gpuarray.zeros((long(ntree * 2), long(maxTreeSize)), np.int32) +d_nodestatus = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_xbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32) +#d_nbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +#d_bestgini = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32) +d_bestvar = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_nodeclass = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_ndbigtree = gpuarray.zeros((long(ntree), 1), np.int32) +d_nodestart = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_nodepop = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_classpop = gpuarray.zeros((long(ntree), long(maxTreeSize*nclass)), np.int32) +d_classweights = gpuarray.to_gpu(classweights) +d_weight_left = gpuarray.zeros((long(ntree), long(nclass)), np.int32) +d_weight_right = gpuarray.zeros((long(ntree), long(nclass)), np.int32) +d_dimtemp = gpuarray.zeros((long(ntree), long(training_x.shape[0])), np.int32) + +d_baggedx = gpuarray.zeros((long(np.sum(nsamples)*training_x.shape[0]), long(ntree)), np.float32) +d_baggedclass = gpuarray.zeros((long(ntree), long(np.sum(nsamples))), np.int32) + +d_training_x = gpuarray.to_gpu(training_x) +d_training_y = gpuarray.to_gpu(training_y) +d_classindex = gpuarray.to_gpu(classindex) +d_nsamples = gpuarray.to_gpu(nsamples) +d_samplefrom = gpuarray.to_gpu(samplefrom) + +threadsPerBlock = 32 +block = (32, 1, 1) +grid = (int(ntree / block[0] + 1), 1) + +gpu_train(d_training_x, np.int32(training_x.shape[1]), np.int32(training_x.shape[0]), np.int32(nclass), + d_training_y, d_classindex, d_nsamples, d_samplefrom, + np.int32(maxnsamples), seed, sequencestart, np.int32(ntree), np.int32(maxTreeSize), np.int32(mtry), np.int32(nodeStopSize), + d_treemap, d_nodestatus, d_xbestsplit, + d_bestvar, d_nodeclass, d_ndbigtree, + d_nodestart, d_nodepop, + d_classpop, d_classweights, + d_weight_left, d_weight_right, + d_dimtemp, d_bagspace, d_tempbag, d_baggedx, d_baggedclass, + block=block, grid=grid) + +treemap = d_treemap.get() +nodestatus = d_nodestatus.get() +xbestsplit = d_xbestsplit.get() +bestvar = d_bestvar.get() +nodeclass = d_nodeclass.get() +ndbigtree = d_ndbigtree.get() + +# Save results +out_hdf5 = h5py.File(output_path, 'w') +out_hdf5['/forest/treemap'] = treemap +out_hdf5['/forest/nodestatus'] = nodestatus +out_hdf5['/forest/xbestsplit'] = xbestsplit +out_hdf5['/forest/bestvar'] = bestvar +out_hdf5['/forest/nodeclass'] = nodeclass +out_hdf5['/forest/ndbigtree'] = ndbigtree + +out_hdf5['/forest/nrnodes'] = maxTreeSize +out_hdf5['/forest/ntree'] = ntree +out_hdf5['/forest/nclass'] = nclass +out_hdf5['/forest/classweights'] = classweights +out_hdf5['/forest/mtry'] = mtry + +out_hdf5.close() diff --git a/ClassifyMembranes/train_gpu_randomforest_lgn_ds2.py b/ClassifyMembranes/train_gpu_randomforest_lgn_ds2.py new file mode 100644 index 0000000..dc5a176 --- /dev/null +++ b/ClassifyMembranes/train_gpu_randomforest_lgn_ds2.py @@ -0,0 +1,653 @@ +############################################################ +# GPU Implementation of Random Forest Classifier - Training +# v0.1 +# Seymour Knowles-Barley +############################################################ +# Based on c code from: +# http://code.google.com/p/randomforest-matlab/ +# License: GPLv2 +############################################################ + +import numpy as np +import sys +import h5py +import glob +import mahotas +import subprocess +import os + +import pycuda.autoinit +import pycuda.driver as cu +import pycuda.compiler as nvcc +import pycuda.gpuarray as gpuarray + +gpu_randomforest_train_source = """ +#include "curand_kernel.h" + +#define NODE_TERMINAL -1 +#define NODE_TOSPLIT -2 +#define NODE_INTERIOR -3 + +__device__ void movedata() { +} + +__device__ void sampledata(const int nclass, const int* nsamples, const int* samplefrom, + const int maxnsamples, int* bagstart, curandState_t *randstate) +{ + //Select random samples + int iclass, isamp; + for (iclass=0; iclass < nclass; ++iclass) { + for (isamp=0; isamp < nsamples[iclass]; ++isamp) { + bagstart[isamp + iclass*maxnsamples] = curand(randstate) % samplefrom[iclass]; + } + } +} + +__device__ void sortbagbyx( + const float *baggedxstart, int totsamples, int mdim, int featurei, int *bagstart, int ndstart, int ndend, int *tempbagstart) +{ + //Sort elements of bagstart (from ndstart to ndend) according to x values + //Write results into bagstart + int length = ndend-ndstart+1; + if (length == 1) + { + return; + } + int xstart = featurei * totsamples; + int *inbag = bagstart; + int *outbag = tempbagstart; + + //For-loop merge sort + int i = 1; + int start1, start2, end1, end2, p1, p2, output; + while (i < length) + { + + for (start1 = ndstart; start1 <= ndend; start1 += i*2) + { + end1 = start1 + i - 1; + start2 = start1 + i; + end2 = start2 + i - 1; + p1 = start1; p2 = start2; + output = start1; + while (p1 <= end1 && p1 <= ndend && p2 <= end2 && p2 <= ndend && output <= ndend) + { + if (baggedxstart[xstart + inbag[p1]] < baggedxstart[xstart + inbag[p2]]) + { + outbag[output] = inbag[p1]; + ++p1; + } + else + { + outbag[output] = inbag[p2]; + ++p2; + } + ++output; + } + while (p1 <= end1 && p1 <= ndend) + { + outbag[output] = inbag[p1]; + ++p1; + ++output; + } + while (p2 <= end2 && p2 <= ndend) + { + outbag[output] = inbag[p2]; + ++p2; + ++output; + } + } + + //swap for next run + if (inbag == bagstart) + { + inbag = tempbagstart; + outbag = bagstart; + } + else + { + inbag = bagstart; + outbag = tempbagstart; + } + + //Loop again with larger chunks + i *= 2; + + } + + //Copy output to bagstart (if necessary) + if (inbag == tempbagstart) + { + for (p1 = ndstart; p1 <= ndend; ++p1) + { + bagstart[p1] = tempbagstart[p1]; + } + } + +} + +__device__ void findBestSplit( + const float *baggedxstart, const int *baggedclassstart, int mdim, int nclass, int *bagstart, + int totsamples, int k, int ndstart, int ndend, int *ndendl, + int *msplit, float *gini_score, float *best_split, int *best_split_index, bool *isTerminal, + int mtry, int idx, int maxTreeSize, int *classpop, float* classweights, + curandState_t *randstate, + int *wlstart, int *wrstart, int *dimtempstart, int *tempbagstart) +{ + //Compute initial values of numerator and denominator of Gini + float gini_n = 0.0; + float gini_d = 0.0; + float gini_rightn, gini_rightd, gini_leftn, gini_leftd; + int ctreestart = k * nclass + nclass * idx * maxTreeSize; + int i; + for (i = 0; i < nclass; ++i) + { + gini_n += classpop[i + ctreestart] * classpop[i + ctreestart]; + gini_d += classpop[i + ctreestart]; + } + float gini_crit0 = gini_n / gini_d; + + //start main loop through variables to find best split + float gini_critmax = -1.0e25; + float crit; + int trynum, featurei; + int maxfeature = mdim; + + for (i = 0; i < mdim; ++i) + { + dimtempstart[i] = i; + } + + *msplit = -1; + + //for (trynum = 0; trynum < 1; ++trynum) + for (trynum = 0; trynum < mtry && trynum < mdim; ++trynum) + { + //Choose a random feature + i = curand(randstate) % maxfeature; + featurei = dimtempstart[i]; + dimtempstart[i] = dimtempstart[maxfeature-1]; + dimtempstart[maxfeature-1] = featurei; + --maxfeature; + + //Sort according to this feature + sortbagbyx(baggedxstart, totsamples, mdim, featurei, bagstart, ndstart, ndend, tempbagstart); + + //Split on numerical predictor featurei + gini_rightn = gini_n; + gini_rightd = gini_d; + gini_leftn = 0; + gini_leftd = 0; + for (i = 0; i < nclass; ++i) + { + wrstart[i] = classpop[i + ctreestart]; + wlstart[i] = 0; + } + int splitpoint; + int splitxi; + float split_weight, thisx, nextx; + int split_class; + int ntie = 1; + //Loop through all possible split points + for (splitpoint = ndstart; splitpoint <= ndend-1; ++splitpoint) + { + //Get split details + splitxi = bagstart[splitpoint]; + //Determine class based on index and nsamples vector + split_class = baggedclassstart[splitxi]-1; + split_weight = classweights[split_class]; + //Update neumerator and demominator + gini_leftn += split_weight * (2 * wlstart[split_class] + split_weight); + gini_rightn += split_weight * (-2 * wrstart[split_class] + split_weight); + gini_leftd += split_weight; + gini_rightd -= split_weight; + wlstart[split_class] += split_weight; + wrstart[split_class] -= split_weight; + + //Check if the next value is the same (no point splitting) + thisx = baggedxstart[splitxi + totsamples * featurei]; + nextx = baggedxstart[bagstart[splitpoint+1] + totsamples * featurei]; + if (thisx != nextx) + { + //Check if either node is empty (or very small to allow for float errors) + if (gini_rightd > 1.0e-5 && gini_leftd > 1.0e-5) + { + //Check the split + crit = (gini_leftn / gini_leftd) + (gini_rightn / gini_rightd); + if (crit > gini_critmax) + { + *best_split = (thisx + nextx) / 2; + *best_split_index = splitpoint; + gini_critmax = crit; + *msplit = featurei; + *ndendl = splitpoint; + ntie = 1; + } + else if (crit == gini_critmax) + { + ++ntie; + //Break ties at random + if ((curand(randstate) % ntie) == 0) + { + *best_split = (thisx + nextx) / 2; + *best_split_index = splitpoint; + gini_critmax = crit; + *msplit = featurei; + *ndendl = splitpoint; + } + } + } + } + } // end splitpoint for + } // end trynum for + + if (gini_critmax < -1.0e10 || *msplit == -1) + { + //We could not find a suitable split - mark as a terminal node + *isTerminal = true; + } + else if (*msplit != featurei) + { + //Resort for msplit (if necessary) + sortbagbyx(baggedxstart, totsamples, mdim, *msplit, bagstart, ndstart, ndend, tempbagstart); + } + *gini_score = gini_critmax - gini_crit0; + +} + +extern "C" __global__ void trainKernel( + const float *x, int n, int mdim, int nclass, + const int *classes, const int *classindex, + const int *nsamples, const int *samplefrom, + int maxnsamples, + unsigned long long seed, unsigned long long sequencestart, + int ntree, int maxTreeSize, int mtry, int nodeStopSize, + int *treemap, int *nodestatus, float *xbestsplit, + int *bestvar, int *nodeclass, int *ndbigtree, + int *nodestart, int *nodepop, + int *classpop, float *classweights, + int *weight_left, int *weight_right, + int *dimtemp, int *bagspace, int *tempbag, float *baggedx, int *baggedclass) +{ +// Optional arguments for debug (place after xbestsplit): int *nbestsplit, float *bestgini, + int idx = threadIdx.x + blockDim.x * blockIdx.x; + + //Make sure we don't overrun + if (idx < ntree) { + //Init random number generators (one for each thread) + curandState_t state; + curand_init(seed, sequencestart + idx, 0, &state); + + int i,j,k,cioffset,bioffset; + + int totsamples = 0; + for (i = 0; i < nclass; ++i){ + totsamples += nsamples[i]; + } + + //Choose random samples for all classes + int *bagstart = bagspace + idx * nclass * maxnsamples; + int *tempbagstart = tempbag + idx * nclass * maxnsamples; + float *baggedxstart = baggedx + idx * mdim * totsamples; + int *baggedclassstart = baggedclass + idx * totsamples; + //TODO: offset weightleft, weightright and dimtemp ! + sampledata(nclass, nsamples, samplefrom, maxnsamples, bagstart, &state); + + //Remove gaps and index into x (instead of into class) + k = 0; + cioffset = 0; + bioffset = 0; + for (i = 0; i < nclass; ++i){ + for (j = 0; j < nsamples[i]; ++j) { + //Move memory into local block? + int xindex = classindex[bagstart[j + i * maxnsamples] + cioffset]; + int dimindex; + for (dimindex = 0; dimindex < mdim; ++dimindex){ + baggedxstart[j + bioffset + totsamples * dimindex] = x[xindex + n * dimindex]; + } + baggedclassstart[j + bioffset] = classes[xindex]; + bagstart[k] = j + bioffset; + ++k; + } + cioffset += samplefrom[i]; + bioffset += nsamples[i]; + classpop[i + idx * nclass * maxTreeSize] = nsamples[i]; + } + + //Wipe other values + for (;k < nclass * maxnsamples; ++k) { + bagstart[k] = -1; + } + + int ndstart, ndend, ndendl; + int msplit, best_split_index; + float best_split, gini_score; + + //Repeat findbestsplit until the tree is complete + int ncur = 0; + int treeoffset1 = idx * maxTreeSize; + int treeOffset2 = idx * 2 * maxTreeSize; + nodestart[treeoffset1] = 0; + nodepop[treeoffset1] = totsamples; + nodestatus[treeoffset1] = NODE_TOSPLIT; + + for (k = 0; k < maxTreeSize-2; ++k) { + //Check for end of tree + if (k > ncur || ncur >= maxTreeSize - 2) break; + //Skip nodes we don't need to split + if (nodestatus[treeoffset1+k] != NODE_TOSPLIT) continue; + + /* initialize for next call to findbestsplit */ + ndstart = nodestart[treeoffset1 + k]; + ndend = ndstart + nodepop[treeoffset1 + k] - 1; + bool isTerminal = false; + gini_score = 0.0; + best_split_index = -1; + + findBestSplit(baggedxstart, baggedclassstart, mdim, nclass, bagstart, totsamples, k, ndstart, ndend, &ndendl, + &msplit, &gini_score, &best_split, &best_split_index, &isTerminal, mtry, idx, maxTreeSize, classpop, classweights, + &state, weight_left + nclass * idx, weight_right + nclass * idx, dimtemp + mdim * idx, tempbagstart); + + if (isTerminal) { + /* Node is terminal: Mark it as such and move on to the next. */ + nodestatus[k] = NODE_TERMINAL; + //bestvar[treeoffset1 + k] = 0; + //xbestsplit[treeoffset1 + k] = 0; + continue; + } + + // this is a split node - prepare for next round + bestvar[treeoffset1 + k] = msplit + 1; + //bestgini[treeoffset1 + k] = gini_score; + xbestsplit[treeoffset1 + k] = best_split; + //nbestsplit[treeoffset1 + k] = best_split_index; + nodestatus[treeoffset1 + k] = NODE_INTERIOR; + //varUsed[msplit - 1] = 1; + //tgini[msplit - 1] += decsplit; + + int leftk = ncur + 1; + int rightk = ncur + 2; + nodepop[treeoffset1 + leftk] = ndendl - ndstart + 1; + nodepop[treeoffset1 + rightk] = ndend - ndendl; + nodestart[treeoffset1 + leftk] = ndstart; + nodestart[treeoffset1 + rightk] = ndendl + 1; + + // Check for terminal node conditions + nodestatus[treeoffset1 + leftk] = NODE_TOSPLIT; + if (nodepop[treeoffset1 + leftk] <= nodeStopSize) { + nodestatus[treeoffset1 + leftk] = NODE_TERMINAL; + } + + nodestatus[treeoffset1 + rightk] = NODE_TOSPLIT; + if (nodepop[treeoffset1 + rightk] <= nodeStopSize) { + nodestatus[treeoffset1 + rightk] = NODE_TERMINAL; + } + + //Calculate class populations + int nodeclass = 0; + int ctreestart_left = leftk * nclass + idx * nclass * maxTreeSize; + int ctreestart_right = rightk * nclass + idx * nclass * maxTreeSize; + for (i = ndstart; i <= ndendl; ++i) { + nodeclass = baggedclassstart[bagstart[i]]-1; + classpop[nodeclass + ctreestart_left] += classweights[nodeclass]; + } + for (i = ndendl+1; i <= ndend; ++i) { + nodeclass = baggedclassstart[bagstart[i]]-1; + classpop[nodeclass + ctreestart_right] += classweights[nodeclass]; + } + + for(i = 0; i < nclass; ++i) + { + if (classpop[i + ctreestart_left] == nodepop[treeoffset1 + leftk]) + { + nodestatus[treeoffset1 + leftk] = NODE_TERMINAL; + } + if (classpop[i + ctreestart_right] == nodepop[treeoffset1 + rightk]) + { + nodestatus[treeoffset1 + rightk] = NODE_TERMINAL; + } + } + + //Update treemap offset (indexed from 1 rather than 0) + treemap[treeOffset2 + k*2] = ncur + 2; + treemap[treeOffset2 + 1 + k*2] = ncur + 3; + ncur += 2; + + } + + //Tidy up + //TODO: Check results - should not be necessary to go up to maxTreeSize + ndbigtree[idx] = ncur+1; + //ndbigtree[idx] = maxTreeSize; + for(k = maxTreeSize-1; k >= 0; --k) + { + //if (nodestatus[treeoffset1 + k] == 0) + // --ndbigtree[idx]; + if (nodestatus[treeoffset1 + k] == NODE_TOSPLIT) + nodestatus[treeoffset1 + k] = NODE_TERMINAL; + } + + //Calculate prediction for terminal nodes + for (k = 0; k < maxTreeSize; ++k) + { + treeoffset1 = idx * maxTreeSize; + if (nodestatus[treeoffset1 + k] == NODE_TERMINAL) + { + int toppop = 0; + int ntie = 1; + for (i = 0; i < nclass; ++i) + { + int ctreeoffset = k * nclass + idx * nclass * maxTreeSize; + if (classpop[i + ctreeoffset] > toppop) + { + nodeclass[treeoffset1 + k] = i+1; + toppop = classpop[i + ctreeoffset]; + } + //Break ties at random + if (classpop[i + ctreeoffset] == toppop) + { + ++ntie; + if ((curand(&state) % ntie) == 0) + { + nodeclass[treeoffset1 + k] = i+1; + toppop = classpop[i + ctreeoffset]; + } + } + } + } + } + + //ndbigtree[idx] = idx; + + } + +} +""" + +downsample_factor = 2 + +input_image_folder1 = 'D:\\dev\\datasets\\LGN1\\RandomForestTraining\\Josh2' +input_image_folder2 = 'D:\\dev\\datasets\\LGN1\\RandomForestTraining\\Combined_LessMito' +ds_input_image_folder = 'D:\\dev\\datasets\\LGN1\\input_images_ds{0}'.format(downsample_factor) + +raw_image_suffix = '.tif' +input_image_suffix = '_labeled.png' +input_features_suffix = '_rhoana_features_ds{0}.h5'.format(downsample_factor) + +output_path = 'D:\\dev\\datasets\\LGN1\\RandomForestTraining\\Josh2\\rhoana_forest_ds{0}_2class.hdf5'.format(downsample_factor) + +features_prog = 'D:\\dev\\Rhoana\\rhoana\\ClassifyMembranes\\x64\\Release\\compute_features.exe' + +# Prep the gpu function +gpu_train = nvcc.SourceModule(gpu_randomforest_train_source, no_extern_c=True).get_function('trainKernel') + +# Load training data +files = sorted( glob.glob( input_image_folder1 + '\\*' + input_image_suffix ) ) +#files = files + sorted( glob.glob( input_image_folder2 + '\\*' + input_image_suffix ) ) + +#2 Class +class_colors = [[255,0,0], [0,255,0]] +#class_colors = [[255,85,255], [255,255,0]] + +# 3 Class +#class_colors = [[255,0,0], [0,255,0], [0,0,255]] +#class_colors = [[255,85,255], [255,255,0], [0,255,255]] + +# class_colors = [0, 1] + +nclass = len(class_colors) + +training_x = np.zeros((0,0), dtype=np.float32) +training_y = np.zeros((0,1), dtype=np.int32) + +print 'Found {0} training images.'.format(len(files)) + +# Loop through all images +for file in files: + training_image = mahotas.imread(file) + + for classi in range(nclass): + + this_color = class_colors[classi] + + # Find pixels for this class + # class_indices = np.nonzero(np.logical_and( + # training_image[:,:,this_color] > training_image[:,:,(this_color + 1) % 3], + # training_image[:,:,this_color] > training_image[:,:,(this_color + 2) % 3])) + + class_indices = np.nonzero(np.logical_and( + training_image[:,:,0] == this_color[0], + training_image[:,:,1] == this_color[1], + training_image[:,:,2] == this_color[2])) + + if downsample_factor != 1: + class_indices = (class_indices[0] / downsample_factor, class_indices[1] / downsample_factor) + + # Add features to x and classes to y + + training_y = np.concatenate((training_y, np.ones((len(class_indices[0]), 1), dtype=np.int32) * (classi + 1))) + + # Load the features + image_file = file.replace(input_image_suffix, raw_image_suffix).replace(input_image_folder1, ds_input_image_folder).replace(input_image_folder2, ds_input_image_folder) + features_file = file.replace(input_image_suffix, input_features_suffix) + + if not os.path.exists(features_file): + print "Computing features:", features_prog, image_file, features_file + subprocess.check_call([features_prog, image_file, features_file], env=os.environ) + + f = h5py.File(features_file, 'r') + + nfeatures = len(f.keys()) + train_features = np.zeros((nfeatures, len(class_indices[0])), dtype=np.float32) + + for i,k in enumerate(f.keys()): + feature = f[k][...] + train_features[i,:] = feature[class_indices[0], class_indices[1]] + + f.close() + + if training_x.size > 0: + training_x = np.concatenate((training_x, train_features), axis=1) + else: + training_x = train_features + +for classi in range(nclass): + print 'Class {0}: {1} training pixels.'.format(classi, np.sum(training_y == classi + 1)) + +# Train on GPU +ntree = np.int32(512) +mtry = np.int32(np.floor(np.sqrt(training_x.shape[0]))) +#nsamples = np.ones((1,nclass), dtype=np.int32) * (training_x.shape[1] / nclass) +nsamples = np.ones((1,nclass), dtype=np.int32) * 1000 +classweights = np.ones((1,nclass), dtype=np.float32) + +# Sanity check +assert(training_x.shape[1] == training_y.shape[0]) + +# Random number seeds +seed = np.int64(42) +sequencestart = np.int64(43) + +samplefrom = np.zeros((nclass), dtype=np.int32) +maxTreeSize = np.int32(2 * np.sum(nsamples) + 1) +nodeStopSize = np.int32(1) + +for classi in range(nclass): + samplefrom[classi] = np.sum(training_y == (classi + 1)) + +maxnsamples = np.max(nsamples) +classindex = -1 * np.ones((np.max(samplefrom) * nclass), dtype=np.int32) + +cioffset = 0 +for classi in range(nclass): + classindex[cioffset:cioffset + samplefrom[classi]] = np.nonzero(training_y == (classi + 1))[0] + cioffset = cioffset + samplefrom[classi] + +bagmem = -1 * np.ones((ntree, maxnsamples * nclass), dtype=np.int32) +d_bagspace = gpuarray.to_gpu(bagmem) +d_tempbag = gpuarray.to_gpu(bagmem) +bagmem = None + +d_treemap = gpuarray.zeros((long(ntree * 2), long(maxTreeSize)), np.int32) +d_nodestatus = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_xbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32) +#d_nbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +#d_bestgini = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32) +d_bestvar = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_nodeclass = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_ndbigtree = gpuarray.zeros((long(ntree), 1), np.int32) +d_nodestart = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_nodepop = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_classpop = gpuarray.zeros((long(ntree), long(maxTreeSize*nclass)), np.int32) +d_classweights = gpuarray.to_gpu(classweights) +d_weight_left = gpuarray.zeros((long(ntree), long(nclass)), np.int32) +d_weight_right = gpuarray.zeros((long(ntree), long(nclass)), np.int32) +d_dimtemp = gpuarray.zeros((long(ntree), long(training_x.shape[0])), np.int32) + +d_baggedx = gpuarray.zeros((long(np.sum(nsamples)*training_x.shape[0]), long(ntree)), np.float32) +d_baggedclass = gpuarray.zeros((long(ntree), long(np.sum(nsamples))), np.int32) + +d_training_x = gpuarray.to_gpu(training_x) +d_training_y = gpuarray.to_gpu(training_y) +d_classindex = gpuarray.to_gpu(classindex) +d_nsamples = gpuarray.to_gpu(nsamples) +d_samplefrom = gpuarray.to_gpu(samplefrom) + +threadsPerBlock = 32 +block = (32, 1, 1) +grid = (int(ntree / block[0] + 1), 1) + +gpu_train(d_training_x, np.int32(training_x.shape[1]), np.int32(training_x.shape[0]), np.int32(nclass), + d_training_y, d_classindex, d_nsamples, d_samplefrom, + np.int32(maxnsamples), seed, sequencestart, np.int32(ntree), np.int32(maxTreeSize), np.int32(mtry), np.int32(nodeStopSize), + d_treemap, d_nodestatus, d_xbestsplit, + d_bestvar, d_nodeclass, d_ndbigtree, + d_nodestart, d_nodepop, + d_classpop, d_classweights, + d_weight_left, d_weight_right, + d_dimtemp, d_bagspace, d_tempbag, d_baggedx, d_baggedclass, + block=block, grid=grid) + +treemap = d_treemap.get() +nodestatus = d_nodestatus.get() +xbestsplit = d_xbestsplit.get() +bestvar = d_bestvar.get() +nodeclass = d_nodeclass.get() +ndbigtree = d_ndbigtree.get() + +# Save results +out_hdf5 = h5py.File(output_path, 'w') +out_hdf5['/forest/treemap'] = treemap +out_hdf5['/forest/nodestatus'] = nodestatus +out_hdf5['/forest/xbestsplit'] = xbestsplit +out_hdf5['/forest/bestvar'] = bestvar +out_hdf5['/forest/nodeclass'] = nodeclass +out_hdf5['/forest/ndbigtree'] = ndbigtree + +out_hdf5['/forest/nrnodes'] = maxTreeSize +out_hdf5['/forest/ntree'] = ntree +out_hdf5['/forest/nclass'] = nclass +out_hdf5['/forest/classweights'] = classweights +out_hdf5['/forest/mtry'] = mtry + +out_hdf5.close() diff --git a/ClassifyMembranes/train_gpu_randomforest_norm2.py b/ClassifyMembranes/train_gpu_randomforest_norm2.py new file mode 100644 index 0000000..7efb7d5 --- /dev/null +++ b/ClassifyMembranes/train_gpu_randomforest_norm2.py @@ -0,0 +1,673 @@ +############################################################ +# GPU Implementation of Random Forest Classifier - Training +# v0.1 +# Seymour Knowles-Barley +############################################################ +# Based on c code from: +# http://code.google.com/p/randomforest-matlab/ +# License: GPLv2 +############################################################ + +import numpy as np +import sys +import h5py +import glob +import mahotas + +import pycuda.autoinit +import pycuda.driver as cu +import pycuda.compiler as nvcc +import pycuda.gpuarray as gpuarray + +gpu_randomforest_train_source = """ +#include "curand_kernel.h" + +#define NODE_TERMINAL -1 +#define NODE_TOSPLIT -2 +#define NODE_INTERIOR -3 + +__device__ void movedata() { +} + +__device__ void sampledata(const int nclass, const int* nsamples, const int* samplefrom, + const int maxnsamples, int* bagstart, curandState_t *randstate) +{ + //Select random samples + int iclass, isamp; + for (iclass=0; iclass < nclass; ++iclass) { + for (isamp=0; isamp < nsamples[iclass]; ++isamp) { + bagstart[isamp + iclass*maxnsamples] = curand(randstate) % samplefrom[iclass]; + } + } +} + +__device__ void sortbagbyx( + const float *baggedxstart, int totsamples, int mdim, int featurei, int *bagstart, int ndstart, int ndend, int *tempbagstart) +{ + //Sort elements of bagstart (from ndstart to ndend) according to x values + //Write results into bagstart + int length = ndend-ndstart+1; + if (length == 1) + { + return; + } + int xstart = featurei * totsamples; + int *inbag = bagstart; + int *outbag = tempbagstart; + + //For-loop merge sort + int i = 1; + int start1, start2, end1, end2, p1, p2, output; + while (i < length) + { + + for (start1 = ndstart; start1 <= ndend; start1 += i*2) + { + end1 = start1 + i - 1; + start2 = start1 + i; + end2 = start2 + i - 1; + p1 = start1; p2 = start2; + output = start1; + while (p1 <= end1 && p1 <= ndend && p2 <= end2 && p2 <= ndend && output <= ndend) + { + if (baggedxstart[xstart + inbag[p1]] < baggedxstart[xstart + inbag[p2]]) + { + outbag[output] = inbag[p1]; + ++p1; + } + else + { + outbag[output] = inbag[p2]; + ++p2; + } + ++output; + } + while (p1 <= end1 && p1 <= ndend) + { + outbag[output] = inbag[p1]; + ++p1; + ++output; + } + while (p2 <= end2 && p2 <= ndend) + { + outbag[output] = inbag[p2]; + ++p2; + ++output; + } + } + + //swap for next run + if (inbag == bagstart) + { + inbag = tempbagstart; + outbag = bagstart; + } + else + { + inbag = bagstart; + outbag = tempbagstart; + } + + //Loop again with larger chunks + i *= 2; + + } + + //Copy output to bagstart (if necessary) + if (inbag == tempbagstart) + { + for (p1 = ndstart; p1 <= ndend; ++p1) + { + bagstart[p1] = tempbagstart[p1]; + } + } + +} + +__device__ void findBestSplit( + const float *baggedxstart, const int *baggedclassstart, int mdim, int nclass, int *bagstart, + int totsamples, int k, int ndstart, int ndend, int *ndendl, + int *msplit, float *gini_score, float *best_split, int *best_split_index, bool *isTerminal, + int mtry, int idx, int maxTreeSize, int *classpop, float* classweights, + curandState_t *randstate, + int *wlstart, int *wrstart, int *dimtempstart, int *tempbagstart) +{ + //Compute initial values of numerator and denominator of Gini + float gini_n = 0.0; + float gini_d = 0.0; + float gini_rightn, gini_rightd, gini_leftn, gini_leftd; + int ctreestart = k * nclass + nclass * idx * maxTreeSize; + int i; + for (i = 0; i < nclass; ++i) + { + gini_n += classpop[i + ctreestart] * classpop[i + ctreestart]; + gini_d += classpop[i + ctreestart]; + } + + /* norm 1 + float gini_crit0 = gini_n / gini_d; + */ + + float gini_crit0 = 0; + + if ( gini_d > 1 ) + { + gini_crit0 = gini_n / ( gini_d * ( gini_d - 1 ) ); + } + + //start main loop through variables to find best split + float gini_critmax = -1.0e25; + float crit; + int trynum, featurei; + int maxfeature = mdim; + + for (i = 0; i < mdim; ++i) + { + dimtempstart[i] = i; + } + + *msplit = -1; + + //for (trynum = 0; trynum < 1; ++trynum) + for (trynum = 0; trynum < mtry && trynum < mdim; ++trynum) + { + //Choose a random feature + i = curand(randstate) % maxfeature; + featurei = dimtempstart[i]; + dimtempstart[i] = dimtempstart[maxfeature-1]; + dimtempstart[maxfeature-1] = featurei; + --maxfeature; + + //Sort according to this feature + sortbagbyx(baggedxstart, totsamples, mdim, featurei, bagstart, ndstart, ndend, tempbagstart); + + //Split on numerical predictor featurei + gini_rightn = gini_n; + gini_rightd = gini_d; + gini_leftn = 0; + gini_leftd = 0; + for (i = 0; i < nclass; ++i) + { + wrstart[i] = classpop[i + ctreestart]; + wlstart[i] = 0; + } + int splitpoint; + int splitxi; + float split_weight, thisx, nextx; + int split_class; + int ntie = 1; + //Loop through all possible split points + for (splitpoint = ndstart; splitpoint <= ndend-1; ++splitpoint) + { + //Get split details + splitxi = bagstart[splitpoint]; + //Determine class based on index and nsamples vector + split_class = baggedclassstart[splitxi]-1; + split_weight = classweights[split_class]; + //Update neumerator and demominator + gini_leftn += split_weight * (2 * wlstart[split_class] + split_weight); + gini_rightn += split_weight * (-2 * wrstart[split_class] + split_weight); + gini_leftd += split_weight; + gini_rightd -= split_weight; + wlstart[split_class] += split_weight; + wrstart[split_class] -= split_weight; + + //Check if the next value is the same (no point splitting) + thisx = baggedxstart[splitxi + totsamples * featurei]; + nextx = baggedxstart[bagstart[splitpoint+1] + totsamples * featurei]; + if (thisx != nextx) + { + //Check if either node is empty (or very small to allow for float errors) + if (gini_rightd > 1.0e-5 && gini_leftd > 1.0e-5) + { + //Check the split + + crit = (gini_leftn / (gini_leftd * gini_leftd) ) + (gini_rightn / (gini_rightd * gini_rightd) ); + + /* norm 2 + crit = 0; + + if ( gini_leftd > 1 ) + { + crit += gini_leftn / ( gini_leftd * ( gini_leftd - 1 ) ); + } + else + { + crit += gini_leftn; + } + + if ( gini_rightd > 1 ) + { + crit += gini_rightn / ( gini_rightd * ( gini_rightd - 1 ) ); + } + else + { + crit +=gini_rightn; + } + */ + + if (crit > gini_critmax) + { + *best_split = (thisx + nextx) / 2; + *best_split_index = splitpoint; + gini_critmax = crit; + *msplit = featurei; + *ndendl = splitpoint; + ntie = 1; + } + else if (crit == gini_critmax) + { + ++ntie; + //Break ties at random + if ((curand(randstate) % ntie) == 0) + { + *best_split = (thisx + nextx) / 2; + *best_split_index = splitpoint; + gini_critmax = crit; + *msplit = featurei; + *ndendl = splitpoint; + } + } + } + } + } // end splitpoint for + } // end trynum for + + if (gini_critmax < -1.0e10 || *msplit == -1) + { + //We could not find a suitable split - mark as a terminal node + *isTerminal = true; + } + else if (*msplit != featurei) + { + //Resort for msplit (if necessary) + sortbagbyx(baggedxstart, totsamples, mdim, *msplit, bagstart, ndstart, ndend, tempbagstart); + } + *gini_score = gini_critmax - gini_crit0; + +} + +extern "C" __global__ void trainKernel( + const float *x, int n, int mdim, int nclass, + const int *classes, const int *classindex, + const int *nsamples, const int *samplefrom, + int maxnsamples, + unsigned long long seed, unsigned long long sequencestart, + int ntree, int maxTreeSize, int mtry, int nodeStopSize, + int *treemap, int *nodestatus, float *xbestsplit, + int *bestvar, int *nodeclass, int *ndbigtree, + int *nodestart, int *nodepop, + int *classpop, float *classweights, + int *weight_left, int *weight_right, + int *dimtemp, int *bagspace, int *tempbag, float *baggedx, int *baggedclass) +{ +// Optional arguments for debug (place after xbestsplit): int *nbestsplit, float *bestgini, + int idx = threadIdx.x + blockDim.x * blockIdx.x; + + //Make sure we don't overrun + if (idx < ntree) { + //Init random number generators (one for each thread) + curandState_t state; + curand_init(seed, sequencestart + idx, 0, &state); + + int i,j,k,cioffset,bioffset; + + int totsamples = 0; + for (i = 0; i < nclass; ++i){ + totsamples += nsamples[i]; + } + + //Choose random samples for all classes + int *bagstart = bagspace + idx * nclass * maxnsamples; + int *tempbagstart = tempbag + idx * nclass * maxnsamples; + float *baggedxstart = baggedx + idx * mdim * totsamples; + int *baggedclassstart = baggedclass + idx * totsamples; + //TODO: offset weightleft, weightright and dimtemp ! + sampledata(nclass, nsamples, samplefrom, maxnsamples, bagstart, &state); + + //Remove gaps and index into x (instead of into class) + k = 0; + cioffset = 0; + bioffset = 0; + for (i = 0; i < nclass; ++i){ + for (j = 0; j < nsamples[i]; ++j) { + //Move memory into local block? + int xindex = classindex[bagstart[j + i * maxnsamples] + cioffset]; + int dimindex; + for (dimindex = 0; dimindex < mdim; ++dimindex){ + baggedxstart[j + bioffset + totsamples * dimindex] = x[xindex + n * dimindex]; + } + baggedclassstart[j + bioffset] = classes[xindex]; + bagstart[k] = j + bioffset; + ++k; + } + cioffset += samplefrom[i]; + bioffset += nsamples[i]; + classpop[i + idx * nclass * maxTreeSize] = nsamples[i]; + } + + //Wipe other values + for (;k < nclass * maxnsamples; ++k) { + bagstart[k] = -1; + } + + int ndstart, ndend, ndendl; + int msplit, best_split_index; + float best_split, gini_score; + + //Repeat findbestsplit until the tree is complete + int ncur = 0; + int treeoffset1 = idx * maxTreeSize; + int treeOffset2 = idx * 2 * maxTreeSize; + nodestart[treeoffset1] = 0; + nodepop[treeoffset1] = totsamples; + nodestatus[treeoffset1] = NODE_TOSPLIT; + + for (k = 0; k < maxTreeSize-2; ++k) { + //Check for end of tree + if (k > ncur || ncur >= maxTreeSize - 2) break; + //Skip nodes we don't need to split + if (nodestatus[treeoffset1+k] != NODE_TOSPLIT) continue; + + /* initialize for next call to findbestsplit */ + ndstart = nodestart[treeoffset1 + k]; + ndend = ndstart + nodepop[treeoffset1 + k] - 1; + bool isTerminal = false; + gini_score = 0.0; + best_split_index = -1; + + findBestSplit(baggedxstart, baggedclassstart, mdim, nclass, bagstart, totsamples, k, ndstart, ndend, &ndendl, + &msplit, &gini_score, &best_split, &best_split_index, &isTerminal, mtry, idx, maxTreeSize, classpop, classweights, + &state, weight_left + nclass * idx, weight_right + nclass * idx, dimtemp + mdim * idx, tempbagstart); + + if (isTerminal) { + /* Node is terminal: Mark it as such and move on to the next. */ + nodestatus[k] = NODE_TERMINAL; + //bestvar[treeoffset1 + k] = 0; + //xbestsplit[treeoffset1 + k] = 0; + continue; + } + + // this is a split node - prepare for next round + bestvar[treeoffset1 + k] = msplit + 1; + //bestgini[treeoffset1 + k] = gini_score; + xbestsplit[treeoffset1 + k] = best_split; + //nbestsplit[treeoffset1 + k] = best_split_index; + nodestatus[treeoffset1 + k] = NODE_INTERIOR; + //varUsed[msplit - 1] = 1; + //tgini[msplit - 1] += decsplit; + + int leftk = ncur + 1; + int rightk = ncur + 2; + nodepop[treeoffset1 + leftk] = ndendl - ndstart + 1; + nodepop[treeoffset1 + rightk] = ndend - ndendl; + nodestart[treeoffset1 + leftk] = ndstart; + nodestart[treeoffset1 + rightk] = ndendl + 1; + + // Check for terminal node conditions + nodestatus[treeoffset1 + leftk] = NODE_TOSPLIT; + if (nodepop[treeoffset1 + leftk] <= nodeStopSize) { + nodestatus[treeoffset1 + leftk] = NODE_TERMINAL; + } + + nodestatus[treeoffset1 + rightk] = NODE_TOSPLIT; + if (nodepop[treeoffset1 + rightk] <= nodeStopSize) { + nodestatus[treeoffset1 + rightk] = NODE_TERMINAL; + } + + //Calculate class populations + int nodeclass = 0; + int ctreestart_left = leftk * nclass + idx * nclass * maxTreeSize; + int ctreestart_right = rightk * nclass + idx * nclass * maxTreeSize; + for (i = ndstart; i <= ndendl; ++i) { + nodeclass = baggedclassstart[bagstart[i]]-1; + classpop[nodeclass + ctreestart_left] += classweights[nodeclass]; + } + for (i = ndendl+1; i <= ndend; ++i) { + nodeclass = baggedclassstart[bagstart[i]]-1; + classpop[nodeclass + ctreestart_right] += classweights[nodeclass]; + } + + for(i = 0; i < nclass; ++i) + { + if (classpop[i + ctreestart_left] == nodepop[treeoffset1 + leftk]) + { + nodestatus[treeoffset1 + leftk] = NODE_TERMINAL; + } + if (classpop[i + ctreestart_right] == nodepop[treeoffset1 + rightk]) + { + nodestatus[treeoffset1 + rightk] = NODE_TERMINAL; + } + } + + //Update treemap offset (indexed from 1 rather than 0) + treemap[treeOffset2 + k*2] = ncur + 2; + treemap[treeOffset2 + 1 + k*2] = ncur + 3; + ncur += 2; + + } + + //Tidy up + //TODO: Check results - should not be necessary to go up to maxTreeSize + ndbigtree[idx] = ncur+1; + //ndbigtree[idx] = maxTreeSize; + for(k = maxTreeSize-1; k >= 0; --k) + { + //if (nodestatus[treeoffset1 + k] == 0) + // --ndbigtree[idx]; + if (nodestatus[treeoffset1 + k] == NODE_TOSPLIT) + nodestatus[treeoffset1 + k] = NODE_TERMINAL; + } + + //Calculate prediction for terminal nodes + for (k = 0; k < maxTreeSize; ++k) + { + treeoffset1 = idx * maxTreeSize; + if (nodestatus[treeoffset1 + k] == NODE_TERMINAL) + { + int toppop = 0; + int ntie = 1; + for (i = 0; i < nclass; ++i) + { + int ctreeoffset = k * nclass + idx * nclass * maxTreeSize; + if (classpop[i + ctreeoffset] > toppop) + { + nodeclass[treeoffset1 + k] = i+1; + toppop = classpop[i + ctreeoffset]; + } + //Break ties at random + if (classpop[i + ctreeoffset] == toppop) + { + ++ntie; + if ((curand(&state) % ntie) == 0) + { + nodeclass[treeoffset1 + k] = i+1; + toppop = classpop[i + ctreeoffset]; + } + } + } + } + } + + //ndbigtree[idx] = idx; + + } + +} +""" + +# input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\membraneDetectionECSx4ds2\\' +# input_image_suffix = '_train.png' +# input_features_suffix = '_rhoanafeatures.hdf5' +# output_path = 'D:\\dev\\Rhoana\\classifierTraining\\membraneDetectionECSx4ds2\\rhoana_forest.hdf5' + +input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\' +input_image_suffix = '_labeled_update.tif' +input_features_suffix = '.hdf5' +output_path = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\rhoana_forest_3class_norm2.hdf5' + + +# Prep the gpu function +gpu_train = nvcc.SourceModule(gpu_randomforest_train_source, no_extern_c=True).get_function('trainKernel') + +# Load training data +files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) ) + +# 2 Class +#class_colors = [[255,0,0], [0,255,0]] +#class_colors = [[255,85,255], [255,255,0]] + +# 3 Class +#class_colors = [[255,0,0], [0,255,0], [0,0,255]] +#class_colors = [[255,85,255], [255,255,0], [0,255,255]] + +class_colors = [0, 1, 2] + +nclass = len(class_colors) + +training_x = np.zeros((0,0), dtype=np.float32) +training_y = np.zeros((0,1), dtype=np.int32) + +print 'Found {0} training images.'.format(len(files)) + +# Loop through all images +for file in files: + + print "Reading image {0}.".format(file) + training_image = mahotas.imread(file) + + for classi in range(nclass): + + this_color = class_colors[classi] + + # Find pixels for this class + class_indices = np.nonzero(np.logical_and( + training_image[:,:,this_color] > training_image[:,:,(this_color + 1) % 3], + training_image[:,:,this_color] > training_image[:,:,(this_color + 2) % 3])) + + # class_indices = np.nonzero(np.logical_and( + # training_image[:,:,0] == this_color[0], + # training_image[:,:,1] == this_color[1], + # training_image[:,:,2] == this_color[2])) + + # Add features to x and classes to y + + training_y = np.concatenate((training_y, np.ones((len(class_indices[0]), 1), dtype=np.int32) * (classi + 1))) + + # Load the features + f = h5py.File(file.replace(input_image_suffix, input_features_suffix), 'r') + + nfeatures = len(f.keys()) + train_features = np.zeros((nfeatures, len(class_indices[0])), dtype=np.float32) + + for i,k in enumerate(f.keys()): + feature = f[k][...] + train_features[i,:] = feature[class_indices[0], class_indices[1]] + + f.close() + + if training_x.size > 0: + training_x = np.concatenate((training_x, train_features), axis=1) + else: + training_x = train_features + +for classi in range(nclass): + print 'Class {0}: {1} training pixels.'.format(classi, np.sum(training_y == classi + 1)) + +# Train on GPU +ntree = np.int32(512) +mtry = np.int32(np.floor(np.sqrt(training_x.shape[0]))) +#nsamples = np.ones((1,nclass), dtype=np.int32) * (training_x.shape[1] / nclass) +nsamples = np.ones((1,nclass), dtype=np.int32) * 1000 +classweights = np.ones((1,nclass), dtype=np.float32) + +# Sanity check +assert(training_x.shape[1] == training_y.shape[0]) + +# Random number seeds +seed = np.int64(42) +sequencestart = np.int64(43) + +samplefrom = np.zeros((nclass), dtype=np.int32) +maxTreeSize = np.int32(2 * np.sum(nsamples) + 1) +nodeStopSize = np.int32(1) + +for classi in range(nclass): + samplefrom[classi] = np.sum(training_y == (classi + 1)) + +maxnsamples = np.max(nsamples) +classindex = -1 * np.ones((np.max(samplefrom) * nclass), dtype=np.int32) + +cioffset = 0 +for classi in range(nclass): + classindex[cioffset:cioffset + samplefrom[classi]] = np.nonzero(training_y == (classi + 1))[0] + cioffset = cioffset + samplefrom[classi] + +bagmem = -1 * np.ones((ntree, maxnsamples * nclass), dtype=np.int32) +d_bagspace = gpuarray.to_gpu(bagmem) +d_tempbag = gpuarray.to_gpu(bagmem) +bagmem = None + +d_treemap = gpuarray.zeros((long(ntree * 2), long(maxTreeSize)), np.int32) +d_nodestatus = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_xbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32) +#d_nbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +#d_bestgini = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32) +d_bestvar = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_nodeclass = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_ndbigtree = gpuarray.zeros((long(ntree), 1), np.int32) +d_nodestart = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_nodepop = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32) +d_classpop = gpuarray.zeros((long(ntree), long(maxTreeSize*nclass)), np.int32) +d_classweights = gpuarray.to_gpu(classweights) +d_weight_left = gpuarray.zeros((long(ntree), long(nclass)), np.int32) +d_weight_right = gpuarray.zeros((long(ntree), long(nclass)), np.int32) +d_dimtemp = gpuarray.zeros((long(ntree), long(training_x.shape[0])), np.int32) + +d_baggedx = gpuarray.zeros((long(np.sum(nsamples)*training_x.shape[0]), long(ntree)), np.float32) +d_baggedclass = gpuarray.zeros((long(ntree), long(np.sum(nsamples))), np.int32) + +d_training_x = gpuarray.to_gpu(training_x) +d_training_y = gpuarray.to_gpu(training_y) +d_classindex = gpuarray.to_gpu(classindex) +d_nsamples = gpuarray.to_gpu(nsamples) +d_samplefrom = gpuarray.to_gpu(samplefrom) + +threadsPerBlock = 32 +block = (32, 1, 1) +grid = (int(ntree / block[0] + 1), 1) + +gpu_train(d_training_x, np.int32(training_x.shape[1]), np.int32(training_x.shape[0]), np.int32(nclass), + d_training_y, d_classindex, d_nsamples, d_samplefrom, + np.int32(maxnsamples), seed, sequencestart, np.int32(ntree), np.int32(maxTreeSize), np.int32(mtry), np.int32(nodeStopSize), + d_treemap, d_nodestatus, d_xbestsplit, + d_bestvar, d_nodeclass, d_ndbigtree, + d_nodestart, d_nodepop, + d_classpop, d_classweights, + d_weight_left, d_weight_right, + d_dimtemp, d_bagspace, d_tempbag, d_baggedx, d_baggedclass, + block=block, grid=grid) + +treemap = d_treemap.get() +nodestatus = d_nodestatus.get() +xbestsplit = d_xbestsplit.get() +bestvar = d_bestvar.get() +nodeclass = d_nodeclass.get() +ndbigtree = d_ndbigtree.get() + +# Save results +out_hdf5 = h5py.File(output_path, 'w') +out_hdf5['/forest/treemap'] = treemap +out_hdf5['/forest/nodestatus'] = nodestatus +out_hdf5['/forest/xbestsplit'] = xbestsplit +out_hdf5['/forest/bestvar'] = bestvar +out_hdf5['/forest/nodeclass'] = nodeclass +out_hdf5['/forest/ndbigtree'] = ndbigtree + +out_hdf5['/forest/nrnodes'] = maxTreeSize +out_hdf5['/forest/ntree'] = ntree +out_hdf5['/forest/nclass'] = nclass +out_hdf5['/forest/classweights'] = classweights +out_hdf5['/forest/mtry'] = mtry + +out_hdf5.close() diff --git a/DeepNets/full_image_classify_stumpin.py b/DeepNets/full_image_classify_stumpin.py new file mode 100644 index 0000000..91cd5a1 --- /dev/null +++ b/DeepNets/full_image_classify_stumpin.py @@ -0,0 +1,237 @@ + +import os +import sys +import time + +import numpy as np +import mahotas + +import glob +import h5py + +execfile('full_image_cnn.py') +#from full_image_cnn import * + +#param_path = 'D:/dev/Rhoana/membrane_cnn/results/resonance/' +#param_file = param_path + 'LGN1_MembraneSamples_65x65x1_mp0.50_train10000_valid2000_test2000_seed7.progress_anneal_rotmir_k[48, 48, 48, 48].h5' +#param_file = param_path + 'LGN1_MembraneSamples_95x95x1_mp0.50_train5000_valid1000_test1000_seed7.progress_anneal_rotmir_k[48, 48, 48, 48].h5' +#param_file = param_path + 'progress2/LGN1_MembraneSamples_31x31x1_mp0.50_train50000_valid5000_test5000_seed7_ds4.progress_anneal_rotmir_k[32, 32, 32]_baseLR0.001.h5' + +#param_path = 'D:/dev/Rhoana/membrane_cnn/results/PC/' +#param_file = param_path + 'LGN1_MembraneSamples_65x65x1_mp0.50_train10000_valid2000_test2000_seed7.progress_anneal_rotmir_k[32, 32, 32, 32].h5.' +#param_file = param_path + 'LGN1_MembraneSamples_31x31x1_mp0.50_train50000_valid5000_test5000_seed7_ds4b.progress_anneal_rotmir_k[32, 32, 32]_baseLR0.004_v1.h5' +#param_file = param_path + 'lenet0_membrane_epoch_25100.h5' +#param_file = param_path + '5layer_params_large_epoch_285.h5' + +#param_files = [param_file] + +param_path = 'D:/dev/Rhoana/membrane_cnn/results/stumpin/' +param_files = glob.glob(param_path + "*.h5") +param_files = [x for x in param_files if x.find('.ot.h5') == -1] + +for param_file in param_files: + + output_path = param_file.replace('.h5', '_stumpin') + if not os.path.exists(output_path): + os.makedirs(output_path) + + print 'Opening parameter file {0}.'.format(param_file) + h5file = h5py.File(param_file, 'r') + + # Construct a blank network + nlayers = h5file['/layers'][...] + iterations = h5file['/iterations'][...] + + print "Loaded {0} layer network trained up to iteration {1}.".format(nlayers, iterations) + + all_layers = [] + stride_in = 1 + + for layer in range(nlayers): + + layer_string = '/layer{0}/'.format(layer) + layer_type = h5file[layer_string + 'type'][...] + + if layer_type == 'Convolution': + + layer_weights = h5file[layer_string + 'weights'][...] + layer_bias = h5file[layer_string + 'bias'][...] + layer_maxpoolsize = h5file[layer_string + 'maxpoolsize'][...] + + new_layer = ConvolutionMaxpoolLayer( + layer_weights.shape[0], layer_weights.shape[1], layer_weights.shape[2], + stride_in, layer_maxpoolsize, W=layer_weights, b=layer_bias) + + elif layer_type == 'FullyConnected': + + layer_weights = h5file[layer_string + 'weights'][...] + layer_bias = h5file[layer_string + 'bias'][...] + layer_ksize = h5file[layer_string + 'ksize'][...] + + new_layer = FullyConnectedLayer( + layer_weights.shape[0] / (layer_ksize ** 2), layer_weights.shape[1], layer_ksize, + stride_in, W=layer_weights, b=layer_bias) + + elif layer_type == 'LogisticRegression': + + layer_weights = h5file[layer_string + 'weights'][...] + layer_bias = h5file[layer_string + 'bias'][...] + + new_layer = LogisticRegressionLayer(layer_weights.shape[0], layer_weights.shape[1], + stride_in, W=layer_weights, b=layer_bias) + + else: + raise Exception("Unknown layer type: {0}".format(layer_type)) + + print new_layer.W.shape + print 'layer {0} Wsum={1}.'.format(layer, np.sum(new_layer.W)) + + all_layers.append(new_layer) + + stride_in = new_layer.stride_out + + h5file.close() + + # Calculate network footprint and therefore pad size + footprint = 1 + for revlayer in range(1,nlayers): + layer = nlayers - revlayer - 1 + if revlayer == 1: + footprint = all_layers[layer].kernel_size + else: + footprint = footprint * all_layers[layer].maxpool_size - 1 + all_layers[layer].kernel_size + + pad_by = footprint // 2 + + #image_path='D:/dev/datasets/isbi/train-input/train-input_0000.tif' + #gold_image_path='D:/dev/datasets/isbi/train-labels/train-labels_0000.tif' + + image_path_format_string='D:/dev/datasets/LGN1/JoshProbabilities/2kSampAligned{0:04d}.tif' + gold_image_path_format_string='D:/dev/datasets/LGN1/gold/lxVastExport_8+12+13/Segmentation1-LX_8-12_export_s{0:03d}.png' + + saturation_level = 0.005 + + def normalize_image(original_image): + sorted_image = np.sort( np.uint8(original_image).ravel() ) + minval = np.float32( sorted_image[ len(sorted_image) * ( saturation_level / 2 ) ] ) + maxval = np.float32( sorted_image[ len(sorted_image) * ( 1 - saturation_level / 2 ) ] ) + norm_image = np.float32(original_image - minval) * ( 255 / (maxval - minval)) + norm_image[norm_image < 0] = 0 + norm_image[norm_image > 255] = 255 + return np.uint8(255 - norm_image) + + def open_image_and_gold(image_index, crop_from, crop_size): + path = image_path_format_string.format(image_index) + gold_path = gold_image_path_format_string.format(image_index) + + # Open raw image + image = np.float32(normalize_image(mahotas.imread(path)[crop_from[0]:crop_from[0]+crop_size,crop_from[1]:crop_from[1]+crop_size])) + + # Open gold standard image + gold_image = mahotas.imread(gold_path)[crop_from[0]:crop_from[0]+crop_size,crop_from[1]:crop_from[1]+crop_size] + + # Convert to ids + if len(gold_image.shape) == 3: + gold_image = (np.uint32(gold_image[:,:,0]) * 2**16 + np.uint32(gold_image[:,:,1]) * 2**8 + np.uint32(gold_image[:,:,2])).squeeze() + + return (image, gold_image) + + + def rotmir(image, mirror, rotate): + if mirror == 1: + image = image[::-1,:] + + if rotate == 1: + image = image[::-1,:].T + elif rotate == 2: + image = image[::-1,:][:,::-1] + elif rotate == 3: + image = image.T[::-1,:] + + return image + + + classify_start = 100 + classify_n = 1 #105 + + crop_from = (512, 512) + crop_size = 1024 + #crop_size = 512 + #crop_size = 256 + #crop_size = 128 + + # def output_image (data, path, index, name): + # maxdata = np.max(data) + # mindata = np.min(data) + # normdata = (data - mindata) / (maxdata - mindata) + # mahotas.imsave(path + '/{0}_{1}.tif'.format(index, name), np.uint16(normdata * 65535)) + def output_image (data, layer, index, unpad_by, image_num=0, downsample=1): + data = data[unpad_by:data.shape[0]-unpad_by,unpad_by:data.shape[1]-unpad_by] + if downsample != 1: + data = np.float32(mahotas.imresize(data, downsample)) + maxdata = np.max(data) + mindata = np.min(data) + normdata = (np.float32(data) - mindata) / (maxdata - mindata) + mahotas.imsave(output_path + '/{0:04d}_classify_output_layer{1}_{2}.tif'.format(image_num, layer, index), np.uint16(normdata * 65535)) + + # Main classification loop + for image_index in range(classify_start, classify_start + classify_n): + + # Normalized training + input_image, target_image = open_image_and_gold(image_index, crop_from, crop_size) + + # Direct pixel intensity training + #input_image = np.float32(255-mahotas.imread(image_path_format_string.format(image_index))[crop_from[0]:crop_from[0]+crop_size,crop_from[1]:crop_from[1]+crop_size]) + + downsample = 1 + if param_file.find('_ds2') != -1: + downsample = 2 + input_image = np.float32(mahotas.imresize(input_image, 1.0/downsample)) + elif param_file.find('_ds4') != -1: + downsample = 4 + input_image = np.float32(mahotas.imresize(input_image, 1.0/downsample)) + + + # Random rotate / mirror + # mirror = np.random.choice(2) + # rotate = np.random.choice(4) + # input_image = rotmir(input_image, mirror, rotate) + # target_image = rotmir(input_image, mirror, rotate) + + #Pad the image borders so we get a full image output and to avoid edge effects + pad_image = np.pad(input_image, ((pad_by, pad_by), (pad_by, pad_by)), 'symmetric') + layer0_in = pad_image.reshape(1, pad_image.shape[0], pad_image.shape[1]) + + start_time = time.clock() + + #Classify image + layer_output = [] + for layeri in range(len(all_layers)): + if layeri == 0: + layer_output.append(all_layers[layeri].apply_layer(layer0_in)) + else: + layer_output.append(all_layers[layeri].apply_layer(layer_output[layeri-1])) + + end_time = time.clock() + + print('Classification complete.') + print('Classification code ran for %.2fm' % ((end_time - start_time) / 60.)) + + # # Crop to input image size + # layer3_out = layer3_out[:,pad_by:-pad_by,pad_by:-pad_by] + # output_image(input_image, output_path, image_index, 'input') + # output_image(layer3_out[1,:,:], output_path, image_index, 'output') + # output_image(target_image == 0, output_path, image_index, 'target') + + output_image(layer0_in[0,:,:], 99, 0, pad_by) + # for layeri in range(len(layer_output)): + # for i in range(layer_output[layeri].shape[0]): + # output_image(layer_output[layeri][i,:,:], layeri, i, pad_by) + # if i == 20: break + + output_image(layer_output[-1][0,:,:], len(layer_output)+1, 0, pad_by, image_index, downsample) + output_image(all_layers[-1].pre_softmax[0,:,:], len(layer_output), 0, pad_by, image_index, downsample) + + #print "Classification error before training: {0}".format(np.sum((layer3_out[1,:,:] - target_output[1,:,:])**2)) + + print 'Classification of all images complete.' diff --git a/DeepNets/full_image_cnn.py b/DeepNets/full_image_cnn.py new file mode 100644 index 0000000..c4157d3 --- /dev/null +++ b/DeepNets/full_image_cnn.py @@ -0,0 +1,667 @@ +# Library for full image cnn operations + +import numpy as np +import scipy.ndimage +from scipy.signal import convolve2d +from scipy.signal import fftconvolve +from numpy.fft import rfftn +from numpy.fft import irfftn +import mahotas +import time +import h5py + +VALID_SIZE_CROP = False + +def _centered(arr, newsize): + # Return the center newsize portion of the array. + newsize = np.asarray(newsize) + currsize = np.array(arr.shape) + startind = (currsize - newsize) // 2 + endind = startind + newsize + myslice = [slice(startind[k], endind[k]) for k in range(len(endind))] + return arr[tuple(myslice)] + +class ConvolutionMaxpoolLayer(object): + def __init__(self, nkernels, ninputs, kernel_size, stride_in, maxpool_size, + weight_init=0.005, W=[], b=[]): + self.ninputs = ninputs + self.nkernels = nkernels + self.kernel_size = kernel_size + self.maxpool_size = maxpool_size + self.stride_in = stride_in + self.stride_out = stride_in * maxpool_size + self.prev_conv_size = 0 + + if W == []: + self.W = (np.float32(np.random.random((nkernels, ninputs, kernel_size, kernel_size))) - 0.5) * weight_init * 2 + else: + self.W = W + + if b == []: + self.b = np.zeros((nkernels), dtype=np.float32) + else: + self.b = b + + def apply_layer(self, input_image): + # Calculate feed-forward result + assert(input_image.shape[0] == self.ninputs) + + if VALID_SIZE_CROP: + # valid size output + output_size = (input_image.shape[1] - self.kernel_size + 1, input_image.shape[2] - self.kernel_size + 1) + else: + # same size output + output_size = (input_image.shape[1], input_image.shape[2]) + + output = np.zeros((self.nkernels, output_size[0], output_size[1]), dtype=np.float32) + self.switches = np.zeros((self.nkernels, output_size[0], output_size[1]), dtype=np.uint32) + + #options for + #scipy convolution? + #fft convolution? + #cuda convolution? + + # Retain precalculated fft / size for efficient repeat calculations + + for stridex in range(self.stride_in): + for stridey in range(self.stride_in): + + same_fft_size = True + + for filteri in range(self.nkernels): + + # Apply convolution + + if VALID_SIZE_CROP: + stride_shape = ( + len(np.arange(stridex, input_image.shape[1] - self.kernel_size + 1, self.stride_in)), + len(np.arange(stridey, input_image.shape[2] - self.kernel_size + 1, self.stride_in))) + else: + stride_shape = ( + len(np.arange(stridex, input_image.shape[1], self.stride_in)), + len(np.arange(stridey, input_image.shape[2], self.stride_in))) + + #conv_result = np.zeros(((output_size[0] + stridex) / self.stride_in, (output_size[1] + stridey) / self.stride_in), dtype=np.float32) + conv_result = np.zeros((stride_shape[0], stride_shape[1]), dtype=np.float32) + + for channeli in range(self.ninputs): + + # Space domain convolution + # conv_result = conv_result + convolve2d( + # input_image[channeli, stridex::self.stride_in, stridey::self.stride_in].squeeze(), + # self.W[filteri,channeli,:,:].squeeze(), + # mode='same') + # #mode='valid') + + # FFT convolution + #conv_result = conv_result + fftconvolve( + # input_image[channeli, stridex::self.stride_in, stridey::self.stride_in].squeeze(), + # self.W[filteri,channeli,:,:].squeeze(), + # mode='same') + + # FFT convolution (cache filter transformations) + convolve_image = input_image[channeli, stridex::self.stride_in, stridey::self.stride_in].squeeze() + conv_size = (self.kernel_size + convolve_image.shape[0] - 1, self.kernel_size + convolve_image.shape[1] - 1) + + fsize = 2 ** np.ceil(np.log2(conv_size)).astype(int) + fslice = tuple([slice(0, int(sz)) for sz in conv_size]) + + if same_fft_size and conv_size == self.prev_conv_size: + fft_result = irfftn(rfftn(convolve_image, fsize) * self.Wfft[filteri,channeli,:,:], fsize)[fslice].copy() + else: + if same_fft_size: + self.Wfft = np.zeros((self.nkernels, self.ninputs, fsize[0], fsize[1]//2+1), np.complex64) + same_fft_size = False + self.prev_conv_size = conv_size + + filter_fft = rfftn(self.W[filteri,channeli,:,:].squeeze(), fsize) + fft_result = irfftn(rfftn(convolve_image, fsize) * filter_fft, fsize)[fslice].copy() + + self.Wfft[filteri,channeli,:,:] = filter_fft + + conv_result += _centered(fft_result.real, conv_result.shape) + + # if mode == "full": + # return ret + # elif mode == "same": + # return _centered(ret, s1) + # elif mode == "valid": + # return _centered(ret, abs(s1 - s2) + 1) + + # Apply maxpool (record switches) + + fullx = conv_result.shape[0] + fully = conv_result.shape[1] + splitx = (fullx + 1) / self.maxpool_size + splity = (fully + 1) / self.maxpool_size + + striderangex = np.arange(0, fullx-1, self.maxpool_size) + striderangey = np.arange(0, fully-1, self.maxpool_size) + + for poolx in range(self.maxpool_size): + for pooly in range(self.maxpool_size): + + maxpool = np.ones((splitx, splity, self.maxpool_size ** 2), dtype=np.float32) * -np.inf + + offset_i = 0 + for offset_x in range(self.maxpool_size): + for offset_y in range(self.maxpool_size): + pool_non_padded = conv_result[poolx + offset_x::self.maxpool_size, pooly + offset_y::self.maxpool_size] + maxpool[0:pool_non_padded.shape[0],0:pool_non_padded.shape[1],offset_i] = pool_non_padded + offset_i = offset_i + 1 + + max_indices = np.argmax(maxpool, axis=2) + maxpool = np.amax(maxpool, axis=2) + + # Tanh and bias + maxpool = np.tanh(maxpool + self.b[filteri]) + + # truncate if necessary + if poolx > 0 and fullx % self.maxpool_size >= poolx: + maxpool = maxpool[:-1,:] + max_indices = max_indices[:-1,:] + if pooly > 0 and fully % self.maxpool_size >= pooly: + maxpool = maxpool[:,:-1] + max_indices = max_indices[:,:-1] + + output[filteri,stridex+poolx*self.stride_in::self.stride_out,stridey+pooly*self.stride_in::self.stride_out] = maxpool + self.switches[filteri,stridex+poolx*self.stride_in::self.stride_out,stridey+pooly*self.stride_in::self.stride_out] = max_indices + + if filteri == 0: + self.conv_result = conv_result + + print "CONV Layer: Done pool {0}, of {1}.".format(stridex * self.stride_in + stridey + 1, self.stride_in ** 2) + + return output + + def backpropogate_error(self, input_image, output, output_error, learning_rate): + # df / dx * error + error_bp = (1 - output**2) * output_error + + error_in = np.zeros(input_image.shape, dtype=np.float32) + gradW = np.zeros(self.W.shape, dtype=np.float32) + + crop_switches = _centered(self.switches, output.shape) + + for stridex in range(self.stride_in): + for stridey in range(self.stride_in): + + input_pool = input_image[:,stridex::self.stride_in,stridey::self.stride_in] + error_in_pool = np.zeros(input_pool.shape, dtype=np.float32) + + nc, nx, ny = input_pool.shape + + for filteri in range(self.nkernels): + + conv_error = np.zeros((nx, ny), dtype=np.float32) + + # reverse maxpool step based on saved switch values + for poolx in range(self.maxpool_size): + for pooly in range(self.maxpool_size): + + error_bp_pool = error_bp[filteri,stridex+poolx*self.stride_in::self.stride_out,stridey+pooly*self.stride_in::self.stride_out] + switches_pool = crop_switches[filteri,stridex+poolx*self.stride_in::self.stride_out,stridey+pooly*self.stride_in::self.stride_out] + + # Unpool into conv_error + offset_i = 0 + for offset_x in range(self.maxpool_size): + for offset_y in range(self.maxpool_size): + + wnx = (conv_error.shape[0] - poolx - offset_x + 1) // self.maxpool_size + wny = (conv_error.shape[1] - pooly - offset_y + 1) // self.maxpool_size + + #conv_error[poolx + offset_x::self.maxpool_size, pooly + offset_y::self.maxpool_size] += error_bp_pool[:wnx,:wny] * (switches_pool[:wnx,:wny] == offset_i) + + indices = np.nonzero(switches_pool[:wnx,:wny] == offset_i) + conv_error[indices[0]*self.maxpool_size+poolx+offset_x,indices[1]*self.maxpool_size+pooly+offset_y] += error_bp_pool[indices[0],indices[1]] + + valid = np.nonzero(conv_error) + #print 'Found {0} pool winners from {1}.'.format(len(valid[0]), nx*ny) + + for (lx,ly) in zip(valid[0],valid[1]): + + layer_temp = np.zeros((self.ninputs, self.kernel_size, self.kernel_size), dtype=np.float32) + window_non_padded = input_pool[:,lx:lx+self.kernel_size, ly:ly+self.kernel_size] + layer_temp[:,:window_non_padded.shape[1], :window_non_padded.shape[2]] = window_non_padded + + # Add to gradient + gradW[filteri,:,:,:] += conv_error[lx, ly] * layer_temp + + # Add to error in + limitx = lx + window_non_padded.shape[1] + limity = ly + window_non_padded.shape[2] + node_error = self.W[filteri,:,:,:] * conv_error[lx, ly] + error_in_pool[:,lx:limitx,ly:limity] += node_error[:,:limitx-lx,:limity-ly] + + error_in[:,stridex::self.stride_in,stridey::self.stride_in] = error_in_pool + + print 'CONV Backprop: Done pool {0} of {1}'.format(stridex * self.stride_in + stridey + 1, self.stride_in ** 2) + + # Normalize by the number of training examples + #ntrain = output_error.shape[0] * output_error.shape[1] + #gradW = gradW / ntrain + #gradb = np.sum(np.sum(error_bp, axis=2), axis=1) / ntrain + gradb = np.sum(np.sum(error_bp, axis=2), axis=1) + + # print error_bp.shape + # print error_in.shape + # print gradW.shape + # print gradb.shape + + self.W = self.W - learning_rate * gradW + self.b = self.b - learning_rate * gradb + + return error_in + + + +class FullyConnectedLayer(object): + def __init__(self, ninputs, noutputs, kernel_size, stride, weight_init=0.005, W=[], b=[]): + self.ninputs = ninputs + self.noutputs = noutputs + self.kernel_size = kernel_size + self.stride_in = stride + self.stride_out = stride + + if W == []: + self.W = (np.float32(np.random.random((ninputs * kernel_size ** 2, noutputs))) - 0.5) * weight_init * 2 + else: + self.W = W + + if b ==[]: + self.b = np.zeros((noutputs), dtype=np.float32) + else: + self.b = b + + def apply_layer(self, input_image): + # Calculate feed-forward result + assert(input_image.shape[0] == self.ninputs ) + + if VALID_SIZE_CROP: + # valid size output + output_size = (input_image.shape[1] - self.kernel_size + 1, input_image.shape[2] - self.kernel_size + 1) + else: + # same size output + output_size = (input_image.shape[1], input_image.shape[2]) + + output = np.zeros((self.noutputs, output_size[0], output_size[1]), dtype=np.float32) + + # Apply dot product for each image window in each pool + for poolx in range(self.stride_in): + for pooly in range(self.stride_in): + + fullx = input_image.shape[1] + fully = input_image.shape[2] + + poolfrange = np.arange(self.ninputs) + poolxrange = np.arange(poolx, fullx, self.stride_in) + poolyrange = np.arange(pooly, fully, self.stride_in) + + layer_pool = input_image[np.ix_(poolfrange, poolxrange, poolyrange)] + + if VALID_SIZE_CROP: + startx = 0 + endx = layer_pool.shape[1] - self.kernel_size + 1 + starty = 0 + endy = layer_pool.shape[2] - self.kernel_size + 1 + else: + startx = -((self.kernel_size + 1) / 2) + 1 + endx = startx + layer_pool.shape[1] + starty = -((self.kernel_size + 1) / 2) + 1 + endy = starty + layer_pool.shape[2] + + #print (startx, endx) + #print (starty, endy) + + for lx in range(startx, endx): + for ly in range(starty, endy): + + basex = np.max([lx,0]) + basey = np.max([ly,0]) + + layer_temp = np.zeros((self.ninputs, self.kernel_size, self.kernel_size), dtype=np.float32) + window_non_padded = layer_pool[:,basex:lx+self.kernel_size, basey:ly+self.kernel_size] + + xfrom = np.max([-lx,0]) + yfrom = np.max([-ly,0]) + + layer_temp[:,xfrom:xfrom+window_non_padded.shape[1], yfrom:yfrom+window_non_padded.shape[2]] = window_non_padded + + layer_temp = np.tanh(np.dot(layer_temp.flatten(), self.W) + self.b) + output[:, poolx + self.stride_in * (lx - startx), pooly + self.stride_in * (ly - starty)] = layer_temp + + print 'FC Layer: Done pool {0} of {1}'.format(poolx * self.stride_in + pooly + 1, self.stride_in ** 2) + + return output + + def backpropogate_error(self, input_image, output, output_error, learning_rate): + + # df / dx * error + error_bp = (1 - output**2) * output_error + + error_in = np.zeros(input_image.shape, dtype=np.float32) + gradW = np.zeros(self.W.shape, dtype=np.float32) + #ntrain = 0 + + for poolx in range(self.stride_in): + for pooly in range(self.stride_in): + + error_bp_pool = error_bp[:,poolx::self.stride_in,pooly::self.stride_in] + input_pool = input_image[:,poolx::self.stride_in,pooly::self.stride_in] + error_in_pool = np.zeros(input_pool.shape, dtype=np.float32) + + nerr, nx, ny = error_bp_pool.shape + + # Only train on full windows + for lx in range(nx-self.kernel_size+1): + for ly in range(ny-self.kernel_size+1): + + layer_temp = input_pool[:,lx:lx+self.kernel_size, ly:ly+self.kernel_size] + + # zero-padded + #layer_temp = np.zeros((self.ninputs, self.kernel_size, self.kernel_size), dtype=np.float32) + #window_non_padded = input_pool[:,lx:lx+self.kernel_size, ly:ly+self.kernel_size] + #layer_temp[:,:window_non_padded.shape[1], :window_non_padded.shape[2]] = window_non_padded + + # dE / dW = input * error (summed over kernel neighbourhood) + gradW += np.dot(error_bp_pool[:,lx,ly].reshape(self.noutputs,1), layer_temp.reshape(1,layer_temp.size)).T + + # error_in (important to calculate this before weights are updated) + error_in_pool[:,lx:lx+self.kernel_size,ly:ly+self.kernel_size] += np.dot(self.W, error_bp_pool[:,lx,ly]).reshape(self.ninputs, self.kernel_size, self.kernel_size) + + error_in[:,poolx::self.stride_in,pooly::self.stride_in] = error_in_pool + #ntrain += (nx-self.kernel_size) * (ny-self.kernel_size) + + print 'FC Backprop: Done pool {0} of {1}'.format(poolx * self.stride_in + pooly + 1, self.stride_in ** 2) + + # Normalize by the number of training examples + #gradW = gradW / ntrain + #gradb = np.sum(np.sum(error_bp, axis=2), axis=1) / ntrain + gradb = np.sum(np.sum(error_bp, axis=2), axis=1) + + # print error_bp.shape + # print error_in.shape + # print gradW.shape + # print gradb.shape + + self.W = self.W - learning_rate * gradW + self.b = self.b - learning_rate * gradb + + return error_in + + + +class LogisticRegressionLayer(object): + def __init__(self, ninputs, noutputs, stride, W=[], b=[]): + self.ninputs = ninputs + self.noutputs = noutputs + self.stride_in = stride + self.stride_out = stride + + if W == []: + self.W = np.zeros((ninputs, noutputs), dtype=np.float32) + else: + self.W = W + + if b ==[]: + self.b = np.zeros((noutputs), dtype=np.float32) + else: + self.b = b + + def apply_layer(self, input_image): + # Calculate feed-forward result + assert(input_image.shape[0] == self.ninputs) + output = np.zeros((self.noutputs, input_image.shape[1], input_image.shape[2]), dtype=np.float32) + + # Apply dot procuct for each pixel + for lx in range(input_image.shape[1]): + for ly in range(input_image.shape[2]): + output[:,lx,ly] = np.dot(input_image[:,lx,ly], self.W) + self.b + + self.pre_softmax = output + + #Apply softmax + maxes = np.amax(output, axis=0) + maxes = np.tile(maxes, (2,1,1)) + e = np.exp(output - maxes) + output = e / np.sum(e, axis=0) + + print 'LR Layer: Complete.' + + return output + + def backpropogate_error(self, input_image, output, target_output, learning_rate): + + nerr, nx, ny = target_output.shape + + # df / dx * error + error_bp = output * (1 - output) * (output - target_output) + + error_in = np.zeros(input_image.shape, dtype=np.float32) + gradW = np.zeros(self.W.shape, dtype=np.float32) + + for lx in range(nx): + for ly in range(ny): + + # dE / dW = input * error + #gradW += np.tile(error_bp[:,lx,ly], (self.ninputs, 1)) * np.tile(input_image[:,lx,ly], (self.noutputs, 1)).T + gradW += np.dot(error_bp[:,lx,ly].reshape(self.noutputs,1), input_image[:,lx,ly].reshape(1,self.ninputs)).T + + # error_in (important to calculate this before weights are updated) + error_in[:,lx,ly] += np.dot(self.W, error_bp[:,lx,ly]) + + # Normalize by the number of training examples + ntrain = nx * ny + #gradW = gradW / ntrain + gradb = np.sum(np.sum(error_bp, axis=2), axis=1) / ntrain + #gradb = np.sum(np.sum(error_bp, axis=2), axis=1) + print error_bp.shape + print 'gradb={0}'.format(gradb) + + # print error_bp.shape + # print error_in.shape + # print gradW.shape + # print gradb.shape + + self.W = self.W - learning_rate * gradW + self.b = self.b - learning_rate * gradb + + print 'LR Backprop: Complete.' + + return error_in + +class DeepNetwork(object): + def __init__(self, all_layers, best_offset, best_sigma, downsample, pad_by, stumpin=False): + self.all_layers = all_layers + self.best_offset = best_offset + self.best_sigma = best_sigma + self.downsample = downsample + self.pad_by = pad_by + self.stumpin = stumpin + + assert np.max(np.abs(self.best_offset)) <= self.pad_by + + + def apply_net(self, input_image, perform_downsample=True, perform_pad=True, perform_upsample=True, perform_blur=True, perform_offset=True): + + if perform_pad: + input_image = np.pad(input_image, ((self.pad_by, self.pad_by), (self.pad_by, self.pad_by)), 'symmetric') + + if perform_downsample: + input_image = np.float32(mahotas.imresize(input_image, 1.0/self.downsample)) + + layer_temp = input_image.reshape(1, input_image.shape[0], input_image.shape[1]) + + for layeri in range(len(self.all_layers)): + layer_temp = self.all_layers[layeri].apply_layer(layer_temp) + + output_image = layer_temp[0,:,:] + + if perform_upsample: + output_image = np.float32(mahotas.imresize(output_image, self.downsample)) + + if perform_blur: + output_image = scipy.ndimage.filters.gaussian_filter(output_image, self.best_sigma) + + if perform_offset: + #Translate + output_image = np.roll(output_image, self.best_offset[0], axis=0) + output_image = np.roll(output_image, self.best_offset[1], axis=1) + + # Crop to valid size + output_image = output_image[self.pad_by:-self.pad_by,self.pad_by:-self.pad_by] + + return output_image + +class ComboDeepNetwork(object): + def __init__(self, filename): + + combo_h5 = h5py.File(filename, 'r') + + self.nnets = combo_h5['/nets'][...] + self.all_nets = [] + + for net_i in range(self.nnets): + net_string = '/net{0}'.format(net_i) + + best_offset = combo_h5[net_string + '/best_offset'][...] + best_sigma = float(combo_h5[net_string + '/best_sigma'][...]) + downsample = float(combo_h5[net_string + '/downsample_factor'][...]) + nlayers = int(combo_h5[net_string + '/layers'][...]) + stumpin = net_string + '/stumpin' in combo_h5 + + print 'Network {0} has {1} layers.'.format(net_i, nlayers) + #print stumpin + + all_layers = [] + stride_in = 1 + + for layer_i in range(nlayers): + + layer_string = net_string + '/layer{0}/'.format(layer_i) + layer_type = combo_h5[layer_string + 'type'][...] + + if layer_type == 'Convolution': + + layer_weights = combo_h5[layer_string + 'weights'][...] + layer_bias = combo_h5[layer_string + 'bias'][...] + layer_maxpoolsize = combo_h5[layer_string + 'maxpoolsize'][...] + + new_layer = ConvolutionMaxpoolLayer( + layer_weights.shape[0], layer_weights.shape[1], layer_weights.shape[2], + stride_in, layer_maxpoolsize, W=layer_weights, b=layer_bias) + + elif layer_type == 'FullyConnected': + + layer_weights = combo_h5[layer_string + 'weights'][...] + layer_bias = combo_h5[layer_string + 'bias'][...] + layer_ksize = combo_h5[layer_string + 'ksize'][...] + + new_layer = FullyConnectedLayer( + layer_weights.shape[0] / (layer_ksize ** 2), layer_weights.shape[1], layer_ksize, + stride_in, W=layer_weights, b=layer_bias) + + elif layer_type == 'LogisticRegression': + + layer_weights = combo_h5[layer_string + 'weights'][...] + layer_bias = combo_h5[layer_string + 'bias'][...] + + new_layer = LogisticRegressionLayer(layer_weights.shape[0], layer_weights.shape[1], + stride_in, W=layer_weights, b=layer_bias) + + else: + raise Exception("Unknown layer type: {0}".format(layer_type)) + + all_layers.append(new_layer) + + stride_in = new_layer.stride_out + + # Calculate network footprint and therefore pad size + footprint = 1 + for revlayer in range(1,nlayers): + layer = nlayers - revlayer - 1 + if revlayer == 1: + footprint = all_layers[layer].kernel_size + else: + footprint = footprint * all_layers[layer].maxpool_size - 1 + all_layers[layer].kernel_size + + pad_by = int(downsample * (footprint // 2)) + + new_network = DeepNetwork(all_layers, best_offset, best_sigma, downsample, pad_by, stumpin) + + self.all_nets.append(new_network) + + def apply_combo_net(self, input_image, block_size=400, stump_input=None, return_parts=False): + + average_image = np.zeros(input_image.shape, dtype=np.float32) + + parts = [] + + prev_downsample = 0 + prev_pad_by = 0 + + start_time = time.clock() + + for net_i in range(self.nnets): + + net_input = stump_input if self.all_nets[net_i].stumpin else input_image + + downsample = self.all_nets[net_i].downsample + pad_by = self.all_nets[net_i].pad_by + + # Downsample and pad + if prev_downsample != downsample or prev_pad_by != pad_by: + preprocessed_image = np.pad(net_input, ((pad_by, pad_by), (pad_by, pad_by)), 'symmetric') + preprocessed_image = np.float32(mahotas.imresize(preprocessed_image, 1.0 / downsample)) + + halo = int((pad_by + downsample - 1) / downsample) + + # Compute in blocks (small edges) + block_x = range(halo, preprocessed_image.shape[0], block_size) + block_y = range(halo, preprocessed_image.shape[1], block_size) + + # (full edges) + # block_x = range(halo, preprocessed_image.shape[0] - block_size + 1, block_size) + # block_y = range(halo, preprocessed_image.shape[1] - block_size + 1, block_size) + # if preprocessed_image.shape[0] % block_size > 0: + # block_x.append(max(halo, preprocessed_image.shape[0] - block_size - halo)) + # if preprocessed_image.shape[1] % block_size > 0: + # block_y.append(max(halo, preprocessed_image.shape[1] - block_size - halo)) + + blocki = 0 + nblocks = len(block_x) * len(block_y) + + output_image = np.zeros(input_image.shape, dtype=np.float32) + + for from_x in block_x: + for from_y in block_y: + + # Crop out a padded input block + block = preprocessed_image[from_x-halo:from_x+block_size+halo, from_y-halo:from_y+block_size+halo] + + # Apply network + output_block = self.all_nets[net_i].apply_net(block, perform_downsample=False, perform_pad=False) + + # Output block is not padded + to_x = (from_x - halo) * downsample + to_y = (from_y - halo) * downsample + output_image[to_x:to_x + output_block.shape[0], to_y:to_y + output_block.shape[1]] = output_block + + blocki += 1 + print 'Block {0} of {1} complete.'.format(blocki, nblocks) + + average_image += output_image + + if return_parts: + parts.append(output_image) + + print 'Net {0} of {1} complete.'.format(net_i + 1, self.nnets) + + average_image /= self.nnets + + end_time = time.clock() + + print('Classification complete.') + print('Classification code ran for %.2fm' % ((end_time - start_time) / 60.)) + + return (average_image, parts) if return_parts else average_image diff --git a/Segment/import_segmentations_4cube.py b/Segment/import_segmentations_4cube.py new file mode 100644 index 0000000..db62b9d --- /dev/null +++ b/Segment/import_segmentations_4cube.py @@ -0,0 +1,66 @@ +import sys +import numpy as np +import scipy +import scipy.io +import scipy.ndimage +import mahotas +import math +import h5py +import time +import pymaxflow +import timer +import os +import glob + +try: + input_paths = ['D:\dev\datasets\conn\ecs\ecs20_crop1kds2\diced_xy=512_z=32_xyOv=128_zOv=12_dwnSmp=1\cubes\cubeId=000001_Z=1_Y=1_X=1_minZ=1_maxZ=20_minY=1_maxY=576_minX=1_maxX=576_dwnSmp=1\pre_segs_kaynig', + 'D:\dev\datasets\conn\ecs\ecs20_crop1kds2\diced_xy=512_z=32_xyOv=128_zOv=12_dwnSmp=1\cubes\cubeId=000002_Z=1_Y=1_X=2_minZ=1_maxZ=20_minY=1_maxY=576_minX=449_maxX=1024_dwnSmp=1\pre_segs_kaynig', + 'D:\dev\datasets\conn\ecs\ecs20_crop1kds2\diced_xy=512_z=32_xyOv=128_zOv=12_dwnSmp=1\cubes\cubeId=000003_Z=1_Y=2_X=1_minZ=1_maxZ=20_minY=449_maxY=1024_minX=1_maxX=576_dwnSmp=1\pre_segs_kaynig', + 'D:\dev\datasets\conn\ecs\ecs20_crop1kds2\diced_xy=512_z=32_xyOv=128_zOv=12_dwnSmp=1\cubes\cubeId=000004_Z=1_Y=2_X=2_minZ=1_maxZ=20_minY=449_maxY=1024_minX=449_maxX=1024_dwnSmp=1\pre_segs_kaynig'] + + imshape = [1024, 1024] + + input_areas = [[0, 576, 0, 576], + [0, 576, 448, 1024], + [448, 1024, 0, 576], + [448, 1024, 448, 1024]] + + output_path = 'D:\dev\datasets\conn\ecs\ecs20_crop1kds2\diced_xy=512_z=32_xyOv=128_zOv=12_dwnSmp=1\cubes\segs.h5' + + for pi in range(len(input_paths)): + + input_search_string = input_paths[pi] + '/*' + seg_dirs = sorted( glob.glob( input_search_string ) ) + print 'Found {0} segmentation directories'.format(len(seg_dirs)) + + for di in range(len(seg_dirs)): + imagedir = seg_dirs[di] + segmentation_files = sorted( glob.glob( imagedir + '/*.png' ) ) + print 'Found {0} segmentations in directory {1}.'.format(len(segmentation_files), di) + + if pi == 0 and di == 0: + out_hdf5 = h5py.File(output_path, 'w') + segmentations = out_hdf5.create_dataset('segmentations', + (imshape[0], imshape[1], len(segmentation_files), len(seg_dirs)), + dtype=np.bool, + chunks=(256, 256, 1, 1), + compression='gzip') + + + for fi in range(len(segmentation_files)): + + seg = mahotas.imread(segmentation_files[fi]) == 0 + + segmentations[input_areas[pi][0]:input_areas[pi][1], input_areas[pi][2]:input_areas[pi][3], fi, di] = seg + + + figure(figsize=(20,20)) + imshow(segmentations[:, :, 10, 10], cmap=cm.gray) + + out_hdf5.close() + print "Success" + +except Exception as e: + print e + raise +