From e2ca50db42cd516e4eea1b70ec8605e36180c8ac Mon Sep 17 00:00:00 2001
From: Seymour Knowles-Barley <seymourkb@seas.harvard.edu>
Date: Thu, 30 Jan 2014 10:56:18 -0500
Subject: [PATCH] Random forest options for lgn etc and progress and gpu deep
 net cnn functionality added.

---
 .../predict_gpu_randomforest_2class.py        | 194 +++++
 .../predict_gpu_randomforest_3class.py        | 176 +++++
 .../predict_gpu_randomforest_cp7.py           | 206 +++++
 .../predict_gpu_randomforest_cp7_ds2.py       | 208 +++++
 .../predict_gpu_randomforest_lgn_ds2.py       | 208 +++++
 .../predict_gpu_randomforest_norm2.py         | 167 ++++
 .../predict_gpu_randomforest_test.py          | 168 ++++
 ClassifyMembranes/predict_pyv_randomforest.py | 127 +++
 ClassifyMembranes/rf_classify.html            | 664 ++++++++++++++++
 ClassifyMembranes/rf_classify_parallel.html   | 730 ++++++++++++++++++
 .../train_gpu_randomforest_2class.py          | 664 ++++++++++++++++
 .../train_gpu_randomforest_3class.py          | 666 ++++++++++++++++
 .../train_gpu_randomforest_cp7.py             | 645 ++++++++++++++++
 .../train_gpu_randomforest_cp7_ds2.py         | 658 ++++++++++++++++
 .../train_gpu_randomforest_lgn_ds2.py         | 653 ++++++++++++++++
 .../train_gpu_randomforest_norm2.py           | 673 ++++++++++++++++
 DeepNets/full_image_classify_stumpin.py       | 237 ++++++
 DeepNets/full_image_cnn.py                    | 667 ++++++++++++++++
 Segment/import_segmentations_4cube.py         |  66 ++
 19 files changed, 7777 insertions(+)
 create mode 100644 ClassifyMembranes/predict_gpu_randomforest_2class.py
 create mode 100644 ClassifyMembranes/predict_gpu_randomforest_3class.py
 create mode 100644 ClassifyMembranes/predict_gpu_randomforest_cp7.py
 create mode 100644 ClassifyMembranes/predict_gpu_randomforest_cp7_ds2.py
 create mode 100644 ClassifyMembranes/predict_gpu_randomforest_lgn_ds2.py
 create mode 100644 ClassifyMembranes/predict_gpu_randomforest_norm2.py
 create mode 100644 ClassifyMembranes/predict_gpu_randomforest_test.py
 create mode 100644 ClassifyMembranes/predict_pyv_randomforest.py
 create mode 100644 ClassifyMembranes/rf_classify.html
 create mode 100644 ClassifyMembranes/rf_classify_parallel.html
 create mode 100644 ClassifyMembranes/train_gpu_randomforest_2class.py
 create mode 100644 ClassifyMembranes/train_gpu_randomforest_3class.py
 create mode 100644 ClassifyMembranes/train_gpu_randomforest_cp7.py
 create mode 100644 ClassifyMembranes/train_gpu_randomforest_cp7_ds2.py
 create mode 100644 ClassifyMembranes/train_gpu_randomforest_lgn_ds2.py
 create mode 100644 ClassifyMembranes/train_gpu_randomforest_norm2.py
 create mode 100644 DeepNets/full_image_classify_stumpin.py
 create mode 100644 DeepNets/full_image_cnn.py
 create mode 100644 Segment/import_segmentations_4cube.py

diff --git a/ClassifyMembranes/predict_gpu_randomforest_2class.py b/ClassifyMembranes/predict_gpu_randomforest_2class.py
new file mode 100644
index 0000000..f6d1e04
--- /dev/null
+++ b/ClassifyMembranes/predict_gpu_randomforest_2class.py
@@ -0,0 +1,194 @@
+############################################################
+# GPU Implementation of Random Forest Classifier - Training
+# v0.1
+# Seymour Knowles-Barley
+############################################################
+# Based on c code from:
+# http://code.google.com/p/randomforest-matlab/
+# License: GPLv2
+############################################################
+
+import numpy as np
+import os
+import sys
+import h5py
+import glob
+import mahotas
+
+import pycuda.autoinit
+import pycuda.driver as cu
+import pycuda.compiler as nvcc
+import pycuda.gpuarray as gpuarray
+
+gpu_randomforest_predict_source = """
+#define NODE_TERMINAL -1
+#define NODE_TOSPLIT  -2
+#define NODE_INTERIOR -3
+
+__global__ void predictKernel(const float *x, int n, int mdim, const int *treemap,
+		      const int *nodestatus, const float *xbestsplit,
+		      const int *bestvar, const int *nodeclass,
+		      int nclass,
+			  int ntree, int *countts, int maxTreeSize)
+		      //int *jts,
+			  //int *nodex,
+{
+	int idx = threadIdx.x + blockDim.x * (blockIdx.x + gridDim.x * blockIdx.y);
+
+	//Make sure we don't overrun
+	if (idx < n) {
+		int m, k, treei, treeOffset1, treeOffset2;
+
+		//Repeat for each tree - this way only one thread writes to any point in the vote output array
+
+		for (treei = 0; treei < ntree; ++treei) {
+		//for (treei = 0; treei < ntree; ++treei) {
+			treeOffset1 = treei*maxTreeSize;
+			treeOffset2 = treei*2*maxTreeSize;
+			k = 0;
+
+			while (nodestatus[treeOffset1 + k] != NODE_TERMINAL) {
+				m = bestvar[treeOffset1 + k] - 1;
+				//Split by a numerical predictor
+				k = (x[idx + n * m] <= xbestsplit[treeOffset1 + k]) ?
+					treemap[treeOffset2 + k * 2] - 1 : treemap[treeOffset2 + 1 + k * 2] - 1;
+			}
+			//We found the terminal node: assign class label
+			//jts[chunki + treei] = nodeclass[treeOffset + k];
+			//nodex[chunki + treei] = k + 1;
+			countts[idx * nclass + nodeclass[treeOffset1 + k] - 1] += 1;
+		}
+	}
+
+}
+"""
+
+# forest_file = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\rhoana_forest_2class.h5'
+# input_image_folder = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\classifyds2\\'
+# input_image_suffix = '.tif'
+# input_features_suffix = '_rhoana_features.h5'
+# output_folder = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\classifyds2\\output\\'
+
+# forest_file = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\autoencoder_forest_2class.h5'
+# input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\'
+# input_image_suffix = '.tif'
+# input_features_suffix = '_autoencoder_features.h5'
+# output_folder = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\autoencoder_output\\'
+
+# forest_file = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\autoencoder_nosig_forest_2class.h5'
+# input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\'
+# input_image_suffix = '.tif'
+# input_features_suffix = '_autoencoder_nosig_features.h5'
+# output_folder = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\autoencoder_nosig_output\\'
+
+forest_file = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2_auto1\\auto1_forest_3class.hdf5'
+input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\original'
+input_image_suffix = '.tif'
+input_features_suffix = '_autoencoder_features.h5'
+output_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2_auto1\\output\\'
+
+
+# Load the forest settings
+
+model = h5py.File(forest_file, 'r')
+
+
+# Prep the gpu function
+gpu_predict = nvcc.SourceModule(gpu_randomforest_predict_source).get_function('predictKernel')
+
+d_treemap = gpuarray.to_gpu(model['/forest/treemap'][...])
+d_nodestatus = gpuarray.to_gpu(model['/forest/nodestatus'][...])
+d_xbestsplit = gpuarray.to_gpu(model['/forest/xbestsplit'][...])
+d_bestvar = gpuarray.to_gpu(model['/forest/bestvar'][...])
+d_nodeclass = gpuarray.to_gpu(model['/forest/nodeclass'][...])
+
+nrnodes = model['/forest/nrnodes'][...];
+ntree = model['/forest/ntree'][...];
+nclass = model['/forest/nclass'][...];
+
+
+files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) )
+
+print 'Found {0} images to classify.'.format(len(files))
+
+for file in files:
+	features_file = file.replace(input_image_suffix, input_features_suffix)
+
+	# Load the features
+	f = h5py.File(features_file, 'r')
+
+	nfeatures = len(f.keys())
+	image_shape = f[f.keys()[0]].shape
+	fshape = (nfeatures, image_shape[0] * image_shape[1])
+	features = np.zeros(fshape, dtype=np.float32)
+
+	for i,k in enumerate(f.keys()):
+		features[i,:] = f[k][...].ravel()
+
+
+	# Predict
+
+	out_votes = np.zeros((image_shape[0], image_shape[1], nclass), dtype=np.int32)
+	d_votes = gpuarray.to_gpu(out_votes)
+
+	d_features = gpuarray.to_gpu(features)
+
+	block = (64, 1, 1)
+	grid = (1024, int(fshape[1] / block[0] / 1024 + 1))
+
+	gpu_predict(d_features, np.int32(fshape[1]), np.int32(fshape[0]),
+		d_treemap, d_nodestatus, d_xbestsplit, d_bestvar, d_nodeclass,
+		np.int32(nclass), np.int32(ntree), d_votes, np.int32(nrnodes),
+		grid=grid, block=block)
+
+
+	# Save / display results
+
+	votes = d_votes.get()
+
+	del d_votes
+	del d_features
+
+	prob_image = np.float32(votes) / ntree
+
+	output_image_basename = file.replace(input_image_folder, output_folder)
+
+	for classi in range(nclass):
+		output_image_file = output_image_basename.replace(input_image_suffix, '_class{0}.png'.format(classi + 1))
+		mahotas.imsave(output_image_file, np.uint8(prob_image[:,:,classi] * 255))
+
+	if (nclass == 2):
+		color_image = np.zeros((prob_image.shape[0], prob_image.shape[1], 3), dtype=np.float32)
+		color_image[:,:,0] = prob_image[:,:,0]
+		color_image[:,:,1] = prob_image[:,:,1]
+		color_image[:,:,2] = prob_image[:,:,0]
+
+	output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png')
+	mahotas.imsave(output_image_file, np.uint8(color_image * 255))
+
+	win_0 = prob_image[:,:,0] > prob_image[:,:,1]
+	win_1 = np.logical_not(win_0)
+
+	win_image = color_image
+	win_image[:,:,0] = win_0 * 255
+	win_image[:,:,1] = win_1 * 255
+	win_image[:,:,2] = win_0 * 255
+
+	output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png')
+	mahotas.imsave(output_image_file, np.uint8(win_image))
+
+	output_path = output_image_basename.replace(input_image_suffix, '_probabilities.hdf5');
+	temp_path = output_path + '_tmp'
+	out_hdf5 = h5py.File(temp_path, 'w')
+	# copy the probabilities for future use
+	probs_out = out_hdf5.create_dataset('probabilities',
+	                                    data = prob_image,
+	                                    chunks = (64,64,1),
+	                                    compression = 'gzip')
+	out_hdf5.close()
+
+	if os.path.exists(output_path):
+	    os.unlink(output_path)
+	os.rename(temp_path, output_path)
+
+	print '{0} done.'.format(file)
diff --git a/ClassifyMembranes/predict_gpu_randomforest_3class.py b/ClassifyMembranes/predict_gpu_randomforest_3class.py
new file mode 100644
index 0000000..c250c34
--- /dev/null
+++ b/ClassifyMembranes/predict_gpu_randomforest_3class.py
@@ -0,0 +1,176 @@
+############################################################
+# GPU Implementation of Random Forest Classifier - Training
+# v0.1
+# Seymour Knowles-Barley
+############################################################
+# Based on c code from:
+# http://code.google.com/p/randomforest-matlab/
+# License: GPLv2
+############################################################
+
+import numpy as np
+import os
+import sys
+import h5py
+import glob
+import mahotas
+
+import pycuda.autoinit
+import pycuda.driver as cu
+import pycuda.compiler as nvcc
+import pycuda.gpuarray as gpuarray
+
+gpu_randomforest_predict_source = """
+#define NODE_TERMINAL -1
+#define NODE_TOSPLIT  -2
+#define NODE_INTERIOR -3
+
+__global__ void predictKernel(const float *x, int n, int mdim, const int *treemap,
+		      const int *nodestatus, const float *xbestsplit,
+		      const int *bestvar, const int *nodeclass,
+		      int nclass,
+			  int ntree, int *countts, int maxTreeSize)
+		      //int *jts,
+			  //int *nodex,
+{
+	int idx = threadIdx.x + blockDim.x * (blockIdx.x + gridDim.x * blockIdx.y);
+
+	//Make sure we don't overrun
+	if (idx < n) {
+		int m, k, treei, treeOffset1, treeOffset2;
+
+		//Repeat for each tree - this way only one thread writes to any point in the vote output array
+
+		for (treei = 0; treei < ntree; ++treei) {
+		//for (treei = 0; treei < ntree; ++treei) {
+			treeOffset1 = treei*maxTreeSize;
+			treeOffset2 = treei*2*maxTreeSize;
+			k = 0;
+
+			while (nodestatus[treeOffset1 + k] != NODE_TERMINAL) {
+				m = bestvar[treeOffset1 + k] - 1;
+				//Split by a numerical predictor
+				k = (x[idx + n * m] <= xbestsplit[treeOffset1 + k]) ?
+					treemap[treeOffset2 + k * 2] - 1 : treemap[treeOffset2 + 1 + k * 2] - 1;
+			}
+			//We found the terminal node: assign class label
+			//jts[chunki + treei] = nodeclass[treeOffset + k];
+			//nodex[chunki + treei] = k + 1;
+			countts[idx * nclass + nodeclass[treeOffset1 + k] - 1] += 1;
+		}
+	}
+
+}
+"""
+
+# forest_file = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\rhoana_forest_3class.hdf5'
+# input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\all'
+# input_image_suffix = '_labeled.png'
+# input_features_suffix = '.hdf5'
+# output_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\output2\\'
+
+forest_file = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2_auto1\\auto1_forest_3class.hdf5'
+input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\original'
+input_image_suffix = '.tif'
+input_features_suffix = '_autoencoder_features.h5'
+output_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2_auto1\\output\\'
+
+# Load the forest settings
+
+model = h5py.File(forest_file, 'r')
+
+
+# Prep the gpu function
+gpu_predict = nvcc.SourceModule(gpu_randomforest_predict_source).get_function('predictKernel')
+
+d_treemap = gpuarray.to_gpu(model['/forest/treemap'][...])
+d_nodestatus = gpuarray.to_gpu(model['/forest/nodestatus'][...])
+d_xbestsplit = gpuarray.to_gpu(model['/forest/xbestsplit'][...])
+d_bestvar = gpuarray.to_gpu(model['/forest/bestvar'][...])
+d_nodeclass = gpuarray.to_gpu(model['/forest/nodeclass'][...])
+
+nrnodes = model['/forest/nrnodes'][...];
+ntree = model['/forest/ntree'][...];
+nclass = model['/forest/nclass'][...];
+
+
+files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) )
+
+print 'Found {0} images to classify.'.format(len(files))
+
+for file in files:
+	features_file = file.replace(input_image_suffix, input_features_suffix)
+
+	# Load the features
+	f = h5py.File(features_file, 'r')
+
+	nfeatures = len(f.keys())
+	image_shape = f[f.keys()[0]].shape
+	fshape = (nfeatures, image_shape[0] * image_shape[1])
+	features = np.zeros(fshape, dtype=np.float32)
+
+	for i,k in enumerate(f.keys()):
+		features[i,:] = f[k][...].ravel()
+
+
+	# Predict
+
+	out_votes = np.zeros((image_shape[0], image_shape[1], nclass), dtype=np.int32)
+	d_votes = gpuarray.to_gpu(out_votes)
+
+	d_features = gpuarray.to_gpu(features)
+
+	block = (64, 1, 1)
+	grid = (1024, int(fshape[1] / block[0] / 1024 + 1))
+
+	gpu_predict(d_features, np.int32(fshape[1]), np.int32(fshape[0]),
+		d_treemap, d_nodestatus, d_xbestsplit, d_bestvar, d_nodeclass,
+		np.int32(nclass), np.int32(ntree), d_votes, np.int32(nrnodes),
+		grid=grid, block=block)
+
+
+	# Save / display results
+
+	votes = d_votes.get()
+	
+	del d_votes
+	del d_features
+
+	prob_image = np.float32(votes) / ntree
+
+	output_image_basename = file.replace(input_image_folder, output_folder)
+
+	# for classi in range(nclass):
+	# 	output_image_file = output_image_basename.replace(input_image_suffix, '_class{0}.png'.format(classi + 1))
+	# 	mahotas.imsave(output_image_file, np.uint8(prob_image[:,:,classi] * 255))
+
+	output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png')
+	mahotas.imsave(output_image_file, np.uint8(prob_image * 255))
+
+	win_0 = np.logical_and(prob_image[:,:,0] > prob_image[:,:,1], prob_image[:,:,0] > prob_image[:,:,2])
+	win_2 = np.logical_and(prob_image[:,:,2] > prob_image[:,:,0], prob_image[:,:,2] > prob_image[:,:,1])
+	win_1 = np.logical_not(np.logical_or(win_0, win_2))
+
+	win_image = prob_image
+	win_image[:,:,0] = win_0 * 255
+	win_image[:,:,1] = win_1 * 255
+	win_image[:,:,2] = win_2 * 255
+
+	output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png')
+	mahotas.imsave(output_image_file, np.uint8(win_image))
+
+	output_path = output_image_basename.replace(input_image_suffix, '_probabilities.hdf5');
+	temp_path = output_path + '_tmp'
+	out_hdf5 = h5py.File(temp_path, 'w')
+	# copy the probabilities for future use
+	probs_out = out_hdf5.create_dataset('probabilities',
+	                                    data = prob_image,
+	                                    chunks = (64,64,1),
+	                                    compression = 'gzip')
+	out_hdf5.close()
+
+	if os.path.exists(output_path):
+	    os.unlink(output_path)
+	os.rename(temp_path, output_path)
+
+	print '{0} done.'.format(file)
diff --git a/ClassifyMembranes/predict_gpu_randomforest_cp7.py b/ClassifyMembranes/predict_gpu_randomforest_cp7.py
new file mode 100644
index 0000000..d5bb142
--- /dev/null
+++ b/ClassifyMembranes/predict_gpu_randomforest_cp7.py
@@ -0,0 +1,206 @@
+############################################################
+# GPU Implementation of Random Forest Classifier - Training
+# v0.1
+# Seymour Knowles-Barley
+############################################################
+# Based on c code from:
+# http://code.google.com/p/randomforest-matlab/
+# License: GPLv2
+############################################################
+
+import numpy as np
+import os
+import sys
+import h5py
+import glob
+import mahotas
+
+import pycuda.autoinit
+import pycuda.driver as cu
+import pycuda.compiler as nvcc
+import pycuda.gpuarray as gpuarray
+
+gpu_randomforest_predict_source = """
+#define NODE_TERMINAL -1
+#define NODE_TOSPLIT  -2
+#define NODE_INTERIOR -3
+
+__global__ void predictKernel(const float *x, int n, int mdim, const int *treemap,
+		      const int *nodestatus, const float *xbestsplit,
+		      const int *bestvar, const int *nodeclass,
+		      int nclass,
+			  int ntree, int *countts, int maxTreeSize)
+		      //int *jts,
+			  //int *nodex,
+{
+	int idx = threadIdx.x + blockDim.x * (blockIdx.x + gridDim.x * blockIdx.y);
+
+	//Make sure we don't overrun
+	if (idx < n) {
+		int m, k, treei, treeOffset1, treeOffset2;
+
+		//Repeat for each tree - this way only one thread writes to any point in the vote output array
+
+		for (treei = 0; treei < ntree; ++treei) {
+		//for (treei = 0; treei < ntree; ++treei) {
+			treeOffset1 = treei*maxTreeSize;
+			treeOffset2 = treei*2*maxTreeSize;
+			k = 0;
+
+			while (nodestatus[treeOffset1 + k] != NODE_TERMINAL) {
+				m = bestvar[treeOffset1 + k] - 1;
+				//Split by a numerical predictor
+				k = (x[idx + n * m] <= xbestsplit[treeOffset1 + k]) ?
+					treemap[treeOffset2 + k * 2] - 1 : treemap[treeOffset2 + 1 + k * 2] - 1;
+			}
+			//We found the terminal node: assign class label
+			//jts[chunki + treei] = nodeclass[treeOffset + k];
+			//nodex[chunki + treei] = k + 1;
+			countts[idx * nclass + nodeclass[treeOffset1 + k] - 1] += 1;
+		}
+	}
+
+}
+"""
+
+forest_file = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data\\rhoana_forest_cp7_2class.h5'
+input_image_folder = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data\\'
+input_image_suffix = '_train.png'
+input_features_suffix = '_rhoana_features.h5'
+output_folder = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data\\output\\'
+
+# Load the forest settings
+
+model = h5py.File(forest_file, 'r')
+
+
+# Prep the gpu function
+gpu_predict = nvcc.SourceModule(gpu_randomforest_predict_source).get_function('predictKernel')
+
+d_treemap = gpuarray.to_gpu(model['/forest/treemap'][...])
+d_nodestatus = gpuarray.to_gpu(model['/forest/nodestatus'][...])
+d_xbestsplit = gpuarray.to_gpu(model['/forest/xbestsplit'][...])
+d_bestvar = gpuarray.to_gpu(model['/forest/bestvar'][...])
+d_nodeclass = gpuarray.to_gpu(model['/forest/nodeclass'][...])
+
+nrnodes = model['/forest/nrnodes'][...];
+ntree = model['/forest/ntree'][...];
+nclass = model['/forest/nclass'][...];
+
+
+files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) )
+
+print 'Found {0} images to classify.'.format(len(files))
+
+for file in files:
+	features_file = file.replace(input_image_suffix, input_features_suffix)
+
+	# Load the features
+	f = h5py.File(features_file, 'r')
+
+	nfeatures = len(f.keys())
+	image_shape = f[f.keys()[0]].shape
+	fshape = (nfeatures, image_shape[0] * image_shape[1])
+	features = np.zeros(fshape, dtype=np.float32)
+
+	for i,k in enumerate(f.keys()):
+		features[i,:] = f[k][...].ravel()
+
+
+	# Predict
+
+	out_votes = np.zeros((image_shape[0], image_shape[1], nclass), dtype=np.int32)
+	d_votes = gpuarray.to_gpu(out_votes)
+
+	d_features = gpuarray.to_gpu(features)
+
+	block = (64, 1, 1)
+	grid = (1024, int(fshape[1] / block[0] / 1024 + 1))
+
+	gpu_predict(d_features, np.int32(fshape[1]), np.int32(fshape[0]),
+		d_treemap, d_nodestatus, d_xbestsplit, d_bestvar, d_nodeclass,
+		np.int32(nclass), np.int32(ntree), d_votes, np.int32(nrnodes),
+		grid=grid, block=block)
+
+
+	# Save / display results
+
+	votes = d_votes.get()
+
+	prob_image = np.float32(votes) / ntree
+
+	output_image_basename = file.replace(input_image_folder, output_folder)
+
+	# for classi in range(nclass):
+	# 	output_image_file = output_image_basename.replace(input_image_suffix, '_class{0}.png'.format(classi + 1))
+	# 	mahotas.imsave(output_image_file, np.uint8(prob_image[:,:,classi] * 255))
+
+	if prob_image.shape[2] == 3:
+
+		output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png')
+		mahotas.imsave(output_image_file, np.uint8(prob_image * 255))
+
+		win_0 = np.logical_and(prob_image[:,:,0] > prob_image[:,:,1], prob_image[:,:,0] > prob_image[:,:,2])
+		win_2 = np.logical_and(prob_image[:,:,2] > prob_image[:,:,0], prob_image[:,:,2] > prob_image[:,:,1])
+		win_1 = np.logical_not(np.logical_or(win_0, win_2))
+
+		win_image = prob_image
+		win_image[:,:,0] = win_0 * 255
+		win_image[:,:,1] = win_1 * 255
+		win_image[:,:,2] = win_2 * 255
+
+		output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png')
+		mahotas.imsave(output_image_file, np.uint8(win_image))
+
+	elif prob_image.shape[2] == 2:
+
+		output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png')
+
+		out_image = np.zeros((prob_image.shape[0], prob_image.shape[1], 3), dtype=np.uint8)
+		out_image[:,:,0] = prob_image[:,:,0] * 255
+		out_image[:,:,1] = prob_image[:,:,1] * 255
+		out_image[:,:,2] = prob_image[:,:,0] * 255
+		mahotas.imsave(output_image_file, out_image)
+
+		win_0 = prob_image[:,:,0] > prob_image[:,:,1]
+		win_1 = np.logical_not(win_0)
+
+		win_image = np.zeros((prob_image.shape[0], prob_image.shape[1], 3), dtype=np.uint8)
+		win_image[:,:,0] = win_0 * 255
+		win_image[:,:,1] = win_1 * 255
+		win_image[:,:,2] = win_0 * 255
+
+		output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png')
+		mahotas.imsave(output_image_file, win_image)
+
+
+	output_path = output_image_basename.replace(input_image_suffix, '_probabilities.hdf5');
+	temp_path = output_path + '_tmp'
+	out_hdf5 = h5py.File(temp_path, 'w')
+	# copy the probabilities for future use
+	probs_out = out_hdf5.create_dataset('probabilities',
+	                                    data = prob_image[:,:,1],
+	                                    chunks = (64,64),
+	                                    compression = 'gzip')
+
+	original_out = out_hdf5.create_dataset('original',
+	                                    data = f['original'][...],
+	                                    chunks = (64,64),
+	                                    compression = 'gzip')
+
+	for i,k in enumerate(f.keys()):
+		if k.startswith('membrane_19_3_'):
+			membrane_out = out_hdf5.create_dataset(k,
+                                    data = f[k][...],
+                                    chunks = (64,64),
+                                    compression = 'gzip')
+
+	out_hdf5.close()
+
+	if os.path.exists(output_path):
+	    os.unlink(output_path)
+	os.rename(temp_path, output_path)
+
+	f.close()
+
+	print '{0} done.'.format(file)
diff --git a/ClassifyMembranes/predict_gpu_randomforest_cp7_ds2.py b/ClassifyMembranes/predict_gpu_randomforest_cp7_ds2.py
new file mode 100644
index 0000000..14f7b17
--- /dev/null
+++ b/ClassifyMembranes/predict_gpu_randomforest_cp7_ds2.py
@@ -0,0 +1,208 @@
+############################################################
+# GPU Implementation of Random Forest Classifier - Training
+# v0.1
+# Seymour Knowles-Barley
+############################################################
+# Based on c code from:
+# http://code.google.com/p/randomforest-matlab/
+# License: GPLv2
+############################################################
+
+import numpy as np
+import os
+import sys
+import h5py
+import glob
+import mahotas
+
+import pycuda.autoinit
+import pycuda.driver as cu
+import pycuda.compiler as nvcc
+import pycuda.gpuarray as gpuarray
+
+gpu_randomforest_predict_source = """
+#define NODE_TERMINAL -1
+#define NODE_TOSPLIT  -2
+#define NODE_INTERIOR -3
+
+__global__ void predictKernel(const float *x, int n, int mdim, const int *treemap,
+		      const int *nodestatus, const float *xbestsplit,
+		      const int *bestvar, const int *nodeclass,
+		      int nclass,
+			  int ntree, int *countts, int maxTreeSize)
+		      //int *jts,
+			  //int *nodex,
+{
+	int idx = threadIdx.x + blockDim.x * (blockIdx.x + gridDim.x * blockIdx.y);
+
+	//Make sure we don't overrun
+	if (idx < n) {
+		int m, k, treei, treeOffset1, treeOffset2;
+
+		//Repeat for each tree - this way only one thread writes to any point in the vote output array
+
+		for (treei = 0; treei < ntree; ++treei) {
+		//for (treei = 0; treei < ntree; ++treei) {
+			treeOffset1 = treei*maxTreeSize;
+			treeOffset2 = treei*2*maxTreeSize;
+			k = 0;
+
+			while (nodestatus[treeOffset1 + k] != NODE_TERMINAL) {
+				m = bestvar[treeOffset1 + k] - 1;
+				//Split by a numerical predictor
+				k = (x[idx + n * m] <= xbestsplit[treeOffset1 + k]) ?
+					treemap[treeOffset2 + k * 2] - 1 : treemap[treeOffset2 + 1 + k * 2] - 1;
+			}
+			//We found the terminal node: assign class label
+			//jts[chunki + treei] = nodeclass[treeOffset + k];
+			//nodex[chunki + treei] = k + 1;
+			countts[idx * nclass + nodeclass[treeOffset1 + k] - 1] += 1;
+		}
+	}
+
+}
+"""
+
+downsample_factor = 2
+
+forest_file = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data\\rhoana_forest_cp7_2class.h5'
+input_image_folder = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data\\'
+input_image_suffix = '_train.png'
+input_features_suffix = '_rhoana_features_ds{0}.h5'.format(downsample_factor)
+output_folder = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data\\output_ds{0}\\'.format(downsample_factor)
+
+# Load the forest settings
+
+model = h5py.File(forest_file, 'r')
+
+
+# Prep the gpu function
+gpu_predict = nvcc.SourceModule(gpu_randomforest_predict_source).get_function('predictKernel')
+
+d_treemap = gpuarray.to_gpu(model['/forest/treemap'][...])
+d_nodestatus = gpuarray.to_gpu(model['/forest/nodestatus'][...])
+d_xbestsplit = gpuarray.to_gpu(model['/forest/xbestsplit'][...])
+d_bestvar = gpuarray.to_gpu(model['/forest/bestvar'][...])
+d_nodeclass = gpuarray.to_gpu(model['/forest/nodeclass'][...])
+
+nrnodes = model['/forest/nrnodes'][...];
+ntree = model['/forest/ntree'][...];
+nclass = model['/forest/nclass'][...];
+
+
+files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) )
+
+print 'Found {0} images to classify.'.format(len(files))
+
+for file in files:
+	features_file = file.replace(input_image_suffix, input_features_suffix)
+
+	# Load the features
+	f = h5py.File(features_file, 'r')
+
+	nfeatures = len(f.keys())
+	image_shape = f[f.keys()[0]].shape
+	fshape = (nfeatures, image_shape[0] * image_shape[1])
+	features = np.zeros(fshape, dtype=np.float32)
+
+	for i,k in enumerate(f.keys()):
+		features[i,:] = f[k][...].ravel()
+
+
+	# Predict
+
+	out_votes = np.zeros((image_shape[0], image_shape[1], nclass), dtype=np.int32)
+	d_votes = gpuarray.to_gpu(out_votes)
+
+	d_features = gpuarray.to_gpu(features)
+
+	block = (64, 1, 1)
+	grid = (1024, int(fshape[1] / block[0] / 1024 + 1))
+
+	gpu_predict(d_features, np.int32(fshape[1]), np.int32(fshape[0]),
+		d_treemap, d_nodestatus, d_xbestsplit, d_bestvar, d_nodeclass,
+		np.int32(nclass), np.int32(ntree), d_votes, np.int32(nrnodes),
+		grid=grid, block=block)
+
+
+	# Save / display results
+
+	votes = d_votes.get()
+
+	prob_image = np.float32(votes) / ntree
+
+	output_image_basename = file.replace(input_image_folder, output_folder)
+
+	# for classi in range(nclass):
+	# 	output_image_file = output_image_basename.replace(input_image_suffix, '_class{0}.png'.format(classi + 1))
+	# 	mahotas.imsave(output_image_file, np.uint8(prob_image[:,:,classi] * 255))
+
+	if prob_image.shape[2] == 3:
+
+		output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png')
+		mahotas.imsave(output_image_file, np.uint8(prob_image * 255))
+
+		win_0 = np.logical_and(prob_image[:,:,0] > prob_image[:,:,1], prob_image[:,:,0] > prob_image[:,:,2])
+		win_2 = np.logical_and(prob_image[:,:,2] > prob_image[:,:,0], prob_image[:,:,2] > prob_image[:,:,1])
+		win_1 = np.logical_not(np.logical_or(win_0, win_2))
+
+		win_image = prob_image
+		win_image[:,:,0] = win_0 * 255
+		win_image[:,:,1] = win_1 * 255
+		win_image[:,:,2] = win_2 * 255
+
+		output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png')
+		mahotas.imsave(output_image_file, np.uint8(win_image))
+
+	elif prob_image.shape[2] == 2:
+
+		output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png')
+
+		out_image = np.zeros((prob_image.shape[0], prob_image.shape[1], 3), dtype=np.uint8)
+		out_image[:,:,0] = prob_image[:,:,0] * 255
+		out_image[:,:,1] = prob_image[:,:,1] * 255
+		out_image[:,:,2] = prob_image[:,:,0] * 255
+		mahotas.imsave(output_image_file, out_image)
+
+		win_0 = prob_image[:,:,0] > prob_image[:,:,1]
+		win_1 = np.logical_not(win_0)
+
+		win_image = np.zeros((prob_image.shape[0], prob_image.shape[1], 3), dtype=np.uint8)
+		win_image[:,:,0] = win_0 * 255
+		win_image[:,:,1] = win_1 * 255
+		win_image[:,:,2] = win_0 * 255
+
+		output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png')
+		mahotas.imsave(output_image_file, win_image)
+
+
+	output_path = output_image_basename.replace(input_image_suffix, '_probabilities.hdf5');
+	temp_path = output_path + '_tmp'
+	out_hdf5 = h5py.File(temp_path, 'w')
+	# copy the probabilities for future use
+	probs_out = out_hdf5.create_dataset('probabilities',
+	                                    data = prob_image[:,:,1],
+	                                    chunks = (64,64),
+	                                    compression = 'gzip')
+
+	original_out = out_hdf5.create_dataset('original',
+	                                    data = f['original'][...],
+	                                    chunks = (64,64),
+	                                    compression = 'gzip')
+
+	for i,k in enumerate(f.keys()):
+		if k.startswith('membrane_19_3_'):
+			membrane_out = out_hdf5.create_dataset(k,
+                                    data = f[k][...],
+                                    chunks = (64,64),
+                                    compression = 'gzip')
+
+	out_hdf5.close()
+
+	if os.path.exists(output_path):
+	    os.unlink(output_path)
+	os.rename(temp_path, output_path)
+
+	f.close()
+
+	print '{0} done.'.format(file)
diff --git a/ClassifyMembranes/predict_gpu_randomforest_lgn_ds2.py b/ClassifyMembranes/predict_gpu_randomforest_lgn_ds2.py
new file mode 100644
index 0000000..aa59b1f
--- /dev/null
+++ b/ClassifyMembranes/predict_gpu_randomforest_lgn_ds2.py
@@ -0,0 +1,208 @@
+############################################################
+# GPU Implementation of Random Forest Classifier - Training
+# v0.1
+# Seymour Knowles-Barley
+############################################################
+# Based on c code from:
+# http://code.google.com/p/randomforest-matlab/
+# License: GPLv2
+############################################################
+
+import numpy as np
+import os
+import sys
+import h5py
+import glob
+import mahotas
+
+import pycuda.autoinit
+import pycuda.driver as cu
+import pycuda.compiler as nvcc
+import pycuda.gpuarray as gpuarray
+
+gpu_randomforest_predict_source = """
+#define NODE_TERMINAL -1
+#define NODE_TOSPLIT  -2
+#define NODE_INTERIOR -3
+
+__global__ void predictKernel(const float *x, int n, int mdim, const int *treemap,
+		      const int *nodestatus, const float *xbestsplit,
+		      const int *bestvar, const int *nodeclass,
+		      int nclass,
+			  int ntree, int *countts, int maxTreeSize)
+		      //int *jts,
+			  //int *nodex,
+{
+	int idx = threadIdx.x + blockDim.x * (blockIdx.x + gridDim.x * blockIdx.y);
+
+	//Make sure we don't overrun
+	if (idx < n) {
+		int m, k, treei, treeOffset1, treeOffset2;
+
+		//Repeat for each tree - this way only one thread writes to any point in the vote output array
+
+		for (treei = 0; treei < ntree; ++treei) {
+		//for (treei = 0; treei < ntree; ++treei) {
+			treeOffset1 = treei*maxTreeSize;
+			treeOffset2 = treei*2*maxTreeSize;
+			k = 0;
+
+			while (nodestatus[treeOffset1 + k] != NODE_TERMINAL) {
+				m = bestvar[treeOffset1 + k] - 1;
+				//Split by a numerical predictor
+				k = (x[idx + n * m] <= xbestsplit[treeOffset1 + k]) ?
+					treemap[treeOffset2 + k * 2] - 1 : treemap[treeOffset2 + 1 + k * 2] - 1;
+			}
+			//We found the terminal node: assign class label
+			//jts[chunki + treei] = nodeclass[treeOffset + k];
+			//nodex[chunki + treei] = k + 1;
+			countts[idx * nclass + nodeclass[treeOffset1 + k] - 1] += 1;
+		}
+	}
+
+}
+"""
+
+downsample_factor = 2
+
+forest_file = 'D:\\dev\\datasets\\LGN1\\RandomForestTraining\\Josh2\\rhoana_forest_ds{0}_2class.hdf5'.format(downsample_factor)
+input_image_folder = 'D:\\dev\\datasets\\LGN1\\RandomForestTraining\\Josh2'
+input_image_suffix = '_labeled.png'
+input_features_suffix = '_rhoana_features_ds{0}.h5'.format(downsample_factor)
+output_folder = 'D:\\dev\\datasets\\LGN1\\RandomForestTraining\\Josh2\\output_ds{0}\\'.format(downsample_factor)
+
+# Load the forest settings
+
+model = h5py.File(forest_file, 'r')
+
+
+# Prep the gpu function
+gpu_predict = nvcc.SourceModule(gpu_randomforest_predict_source).get_function('predictKernel')
+
+d_treemap = gpuarray.to_gpu(model['/forest/treemap'][...])
+d_nodestatus = gpuarray.to_gpu(model['/forest/nodestatus'][...])
+d_xbestsplit = gpuarray.to_gpu(model['/forest/xbestsplit'][...])
+d_bestvar = gpuarray.to_gpu(model['/forest/bestvar'][...])
+d_nodeclass = gpuarray.to_gpu(model['/forest/nodeclass'][...])
+
+nrnodes = model['/forest/nrnodes'][...];
+ntree = model['/forest/ntree'][...];
+nclass = model['/forest/nclass'][...];
+
+
+files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) )
+
+print 'Found {0} images to classify.'.format(len(files))
+
+for file in files:
+	features_file = file.replace(input_image_suffix, input_features_suffix)
+
+	# Load the features
+	f = h5py.File(features_file, 'r')
+
+	nfeatures = len(f.keys())
+	image_shape = f[f.keys()[0]].shape
+	fshape = (nfeatures, image_shape[0] * image_shape[1])
+	features = np.zeros(fshape, dtype=np.float32)
+
+	for i,k in enumerate(f.keys()):
+		features[i,:] = f[k][...].ravel()
+
+
+	# Predict
+
+	out_votes = np.zeros((image_shape[0], image_shape[1], nclass), dtype=np.int32)
+	d_votes = gpuarray.to_gpu(out_votes)
+
+	d_features = gpuarray.to_gpu(features)
+
+	block = (64, 1, 1)
+	grid = (1024, int(fshape[1] / block[0] / 1024 + 1))
+
+	gpu_predict(d_features, np.int32(fshape[1]), np.int32(fshape[0]),
+		d_treemap, d_nodestatus, d_xbestsplit, d_bestvar, d_nodeclass,
+		np.int32(nclass), np.int32(ntree), d_votes, np.int32(nrnodes),
+		grid=grid, block=block)
+
+
+	# Save / display results
+
+	votes = d_votes.get()
+
+	prob_image = np.float32(votes) / ntree
+
+	output_image_basename = file.replace(input_image_folder, output_folder)
+
+	# for classi in range(nclass):
+	# 	output_image_file = output_image_basename.replace(input_image_suffix, '_class{0}.png'.format(classi + 1))
+	# 	mahotas.imsave(output_image_file, np.uint8(prob_image[:,:,classi] * 255))
+
+	if prob_image.shape[2] == 3:
+
+		output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png')
+		mahotas.imsave(output_image_file, np.uint8(prob_image * 255))
+
+		win_0 = np.logical_and(prob_image[:,:,0] > prob_image[:,:,1], prob_image[:,:,0] > prob_image[:,:,2])
+		win_2 = np.logical_and(prob_image[:,:,2] > prob_image[:,:,0], prob_image[:,:,2] > prob_image[:,:,1])
+		win_1 = np.logical_not(np.logical_or(win_0, win_2))
+
+		win_image = prob_image
+		win_image[:,:,0] = win_0 * 255
+		win_image[:,:,1] = win_1 * 255
+		win_image[:,:,2] = win_2 * 255
+
+		output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png')
+		mahotas.imsave(output_image_file, np.uint8(win_image))
+
+	elif prob_image.shape[2] == 2:
+
+		output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png')
+
+		out_image = np.zeros((prob_image.shape[0], prob_image.shape[1], 3), dtype=np.uint8)
+		out_image[:,:,0] = prob_image[:,:,0] * 255
+		out_image[:,:,1] = prob_image[:,:,1] * 255
+		out_image[:,:,2] = prob_image[:,:,0] * 255
+		mahotas.imsave(output_image_file, out_image)
+
+		win_0 = prob_image[:,:,0] > prob_image[:,:,1]
+		win_1 = np.logical_not(win_0)
+
+		win_image = np.zeros((prob_image.shape[0], prob_image.shape[1], 3), dtype=np.uint8)
+		win_image[:,:,0] = win_0 * 255
+		win_image[:,:,1] = win_1 * 255
+		win_image[:,:,2] = win_0 * 255
+
+		output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png')
+		mahotas.imsave(output_image_file, win_image)
+
+
+	output_path = output_image_basename.replace(input_image_suffix, '_probabilities.hdf5');
+	temp_path = output_path + '_tmp'
+	out_hdf5 = h5py.File(temp_path, 'w')
+	# copy the probabilities for future use
+	probs_out = out_hdf5.create_dataset('probabilities',
+	                                    data = prob_image[:,:,1],
+	                                    chunks = (64,64),
+	                                    compression = 'gzip')
+
+	original_out = out_hdf5.create_dataset('original',
+	                                    data = f['original'][...],
+	                                    chunks = (64,64),
+	                                    compression = 'gzip')
+
+	for i,k in enumerate(f.keys()):
+		if k.startswith('membrane_19_3_'):
+			membrane_out = out_hdf5.create_dataset(k,
+                                    data = f[k][...],
+                                    chunks = (64,64),
+                                    compression = 'gzip')
+
+	out_hdf5.close()
+
+	if os.path.exists(output_path):
+	    os.unlink(output_path)
+	os.rename(temp_path, output_path)
+
+	f.close()
+
+	print '{0} done.'.format(file)
diff --git a/ClassifyMembranes/predict_gpu_randomforest_norm2.py b/ClassifyMembranes/predict_gpu_randomforest_norm2.py
new file mode 100644
index 0000000..590e871
--- /dev/null
+++ b/ClassifyMembranes/predict_gpu_randomforest_norm2.py
@@ -0,0 +1,167 @@
+############################################################
+# GPU Implementation of Random Forest Classifier - Training
+# v0.1
+# Seymour Knowles-Barley
+############################################################
+# Based on c code from:
+# http://code.google.com/p/randomforest-matlab/
+# License: GPLv2
+############################################################
+
+import numpy as np
+import os
+import sys
+import h5py
+import glob
+import mahotas
+
+import pycuda.autoinit
+import pycuda.driver as cu
+import pycuda.compiler as nvcc
+import pycuda.gpuarray as gpuarray
+
+gpu_randomforest_predict_source = """
+#define NODE_TERMINAL -1
+#define NODE_TOSPLIT  -2
+#define NODE_INTERIOR -3
+
+__global__ void predictKernel(const float *x, int n, int mdim, const int *treemap,
+		      const int *nodestatus, const float *xbestsplit,
+		      const int *bestvar, const int *nodeclass,
+		      int nclass,
+			  int ntree, int *countts, int maxTreeSize)
+		      //int *jts,
+			  //int *nodex,
+{
+	int idx = threadIdx.x + blockDim.x * (blockIdx.x + gridDim.x * blockIdx.y);
+
+	//Make sure we don't overrun
+	if (idx < n) {
+		int m, k, treei, treeOffset1, treeOffset2;
+
+		//Repeat for each tree - this way only one thread writes to any point in the vote output array
+
+		for (treei = 0; treei < ntree; ++treei) {
+		//for (treei = 0; treei < ntree; ++treei) {
+			treeOffset1 = treei*maxTreeSize;
+			treeOffset2 = treei*2*maxTreeSize;
+			k = 0;
+
+			while (nodestatus[treeOffset1 + k] != NODE_TERMINAL) {
+				m = bestvar[treeOffset1 + k] - 1;
+				//Split by a numerical predictor
+				k = (x[idx + n * m] <= xbestsplit[treeOffset1 + k]) ?
+					treemap[treeOffset2 + k * 2] - 1 : treemap[treeOffset2 + 1 + k * 2] - 1;
+			}
+			//We found the terminal node: assign class label
+			//jts[chunki + treei] = nodeclass[treeOffset + k];
+			//nodex[chunki + treei] = k + 1;
+			countts[idx * nclass + nodeclass[treeOffset1 + k] - 1] += 1;
+		}
+	}
+
+}
+"""
+
+forest_file = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\rhoana_forest_3class_norm2.hdf5'
+input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\all'
+input_image_suffix = '_labeled.png'
+input_features_suffix = '.hdf5'
+output_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\output2_norm2\\'
+
+# Load the forest settings
+
+model = h5py.File(forest_file, 'r')
+
+
+# Prep the gpu function
+gpu_predict = nvcc.SourceModule(gpu_randomforest_predict_source).get_function('predictKernel')
+
+d_treemap = gpuarray.to_gpu(model['/forest/treemap'][...])
+d_nodestatus = gpuarray.to_gpu(model['/forest/nodestatus'][...])
+d_xbestsplit = gpuarray.to_gpu(model['/forest/xbestsplit'][...])
+d_bestvar = gpuarray.to_gpu(model['/forest/bestvar'][...])
+d_nodeclass = gpuarray.to_gpu(model['/forest/nodeclass'][...])
+
+nrnodes = model['/forest/nrnodes'][...];
+ntree = model['/forest/ntree'][...];
+nclass = model['/forest/nclass'][...];
+
+
+files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) )
+
+print 'Found {0} images to classify.'.format(len(files))
+
+for file in files:
+	features_file = file.replace(input_image_suffix, input_features_suffix)
+
+	# Load the features
+	f = h5py.File(features_file, 'r')
+
+	nfeatures = len(f.keys())
+	image_shape = f[f.keys()[0]].shape
+	fshape = (nfeatures, image_shape[0] * image_shape[1])
+	features = np.zeros(fshape, dtype=np.float32)
+
+	for i,k in enumerate(f.keys()):
+		features[i,:] = f[k][...].ravel()
+
+
+	# Predict
+
+	out_votes = np.zeros((image_shape[0], image_shape[1], nclass), dtype=np.int32)
+	d_votes = gpuarray.to_gpu(out_votes)
+
+	d_features = gpuarray.to_gpu(features)
+
+	block = (64, 1, 1)
+	grid = (1024, int(fshape[1] / block[0] / 1024 + 1))
+
+	gpu_predict(d_features, np.int32(fshape[1]), np.int32(fshape[0]),
+		d_treemap, d_nodestatus, d_xbestsplit, d_bestvar, d_nodeclass,
+		np.int32(nclass), np.int32(ntree), d_votes, np.int32(nrnodes),
+		grid=grid, block=block)
+
+
+	# Save / display results
+
+	votes = d_votes.get()
+
+	prob_image = np.float32(votes) / ntree
+
+	output_image_basename = file.replace(input_image_folder, output_folder)
+
+	# for classi in range(nclass):
+	# 	output_image_file = output_image_basename.replace(input_image_suffix, '_class{0}.png'.format(classi + 1))
+	# 	mahotas.imsave(output_image_file, np.uint8(prob_image[:,:,classi] * 255))
+
+	output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png')
+	mahotas.imsave(output_image_file, np.uint8(prob_image * 255))
+
+	win_0 = np.logical_and(prob_image[:,:,0] > prob_image[:,:,1], prob_image[:,:,0] > prob_image[:,:,2])
+	win_2 = np.logical_and(prob_image[:,:,2] > prob_image[:,:,0], prob_image[:,:,2] > prob_image[:,:,1])
+	win_1 = np.logical_not(np.logical_or(win_0, win_2))
+
+	win_image = prob_image
+	win_image[:,:,0] = win_0 * 255
+	win_image[:,:,1] = win_1 * 255
+	win_image[:,:,2] = win_2 * 255
+
+	output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png')
+	mahotas.imsave(output_image_file, np.uint8(win_image))
+
+	output_path = output_image_basename.replace(input_image_suffix, '_probabilities.hdf5');
+	temp_path = output_path + '_tmp'
+	out_hdf5 = h5py.File(temp_path, 'w')
+	# copy the probabilities for future use
+	probs_out = out_hdf5.create_dataset('probabilities',
+	                                    data = prob_image,
+	                                    chunks = (64,64,1),
+	                                    compression = 'gzip')
+	out_hdf5.close()
+
+	if os.path.exists(output_path):
+	    os.unlink(output_path)
+	os.rename(temp_path, output_path)
+
+	print '{0} done.'.format(file)
diff --git a/ClassifyMembranes/predict_gpu_randomforest_test.py b/ClassifyMembranes/predict_gpu_randomforest_test.py
new file mode 100644
index 0000000..c604392
--- /dev/null
+++ b/ClassifyMembranes/predict_gpu_randomforest_test.py
@@ -0,0 +1,168 @@
+############################################################
+# GPU Implementation of Random Forest Classifier - Training
+# v0.1
+# Seymour Knowles-Barley
+############################################################
+# Based on c code from:
+# http://code.google.com/p/randomforest-matlab/
+# License: GPLv2
+############################################################
+
+import numpy as np
+import os
+import sys
+import h5py
+import glob
+import mahotas
+
+import pycuda.autoinit
+import pycuda.driver as cu
+import pycuda.compiler as nvcc
+import pycuda.gpuarray as gpuarray
+
+gpu_randomforest_predict_source = """
+#define NODE_TERMINAL -1
+#define NODE_TOSPLIT  -2
+#define NODE_INTERIOR -3
+
+__global__ void predictKernel(const float *x, int n, int mdim, const int *treemap,
+		      const int *nodestatus, const float *xbestsplit,
+		      const int *bestvar, const int *nodeclass,
+		      int nclass,
+			  int ntree, int *countts, int maxTreeSize)
+		      //int *jts,
+			  //int *nodex,
+{
+	int idx = threadIdx.x + blockDim.x * (blockIdx.x + gridDim.x * blockIdx.y);
+
+	//Make sure we don't overrun
+	if (idx < n) {
+		int m, k, treei, treeOffset1, treeOffset2;
+
+		//Repeat for each tree - this way only one thread writes to any point in the vote output array
+
+		for (treei = 0; treei < ntree; ++treei) {
+		//for (treei = 0; treei < ntree; ++treei) {
+			treeOffset1 = treei*maxTreeSize;
+			treeOffset2 = treei*2*maxTreeSize;
+			k = 0;
+
+			while (nodestatus[treeOffset1 + k] != NODE_TERMINAL) {
+				m = bestvar[treeOffset1 + k] - 1;
+				//Split by a numerical predictor
+				k = (x[idx + n * m] <= xbestsplit[treeOffset1 + k]) ?
+					treemap[treeOffset2 + k * 2] - 1 : treemap[treeOffset2 + 1 + k * 2] - 1;
+			}
+			//We found the terminal node: assign class label
+			//jts[chunki + treei] = nodeclass[treeOffset + k];
+			//nodex[chunki + treei] = k + 1;
+			countts[idx * nclass + nodeclass[treeOffset1 + k] - 1] += 1;
+		}
+	}
+
+}
+"""
+
+forest_file = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\rhoana_forest_3class.hdf5'
+input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\all'
+input_image_suffix = '_labeled.png'
+input_features_suffix = '.hdf5'
+output_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\output2test\\'
+
+# Load the forest settings
+
+model = h5py.File(forest_file, 'r')
+
+
+# Prep the gpu function
+gpu_predict = nvcc.SourceModule(gpu_randomforest_predict_source).get_function('predictKernel')
+
+d_treemap = gpuarray.to_gpu(model['/forest/treemap'][...])
+d_nodestatus = gpuarray.to_gpu(model['/forest/nodestatus'][...])
+d_xbestsplit = gpuarray.to_gpu(model['/forest/xbestsplit'][...])
+d_bestvar = gpuarray.to_gpu(model['/forest/bestvar'][...])
+d_nodeclass = gpuarray.to_gpu(model['/forest/nodeclass'][...])
+
+nrnodes = model['/forest/nrnodes'][...];
+ntree = model['/forest/ntree'][...];
+#ntree = 1
+nclass = model['/forest/nclass'][...];
+
+
+files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) )
+
+print 'Found {0} images to classify.'.format(len(files))
+
+for file in files:
+	features_file = file.replace(input_image_suffix, input_features_suffix)
+
+	# Load the features
+	f = h5py.File(features_file, 'r')
+
+	nfeatures = len(f.keys())
+	image_shape = f[f.keys()[0]].shape
+	fshape = (nfeatures, image_shape[0] * image_shape[1])
+	features = np.zeros(fshape, dtype=np.float32)
+
+	for i,k in enumerate(f.keys()):
+		features[i,:] = f[k][...].ravel()
+
+
+	# Predict
+
+	out_votes = np.zeros((image_shape[0], image_shape[1], nclass), dtype=np.int32)
+	d_votes = gpuarray.to_gpu(out_votes)
+
+	d_features = gpuarray.to_gpu(features)
+
+	block = (64, 1, 1)
+	grid = (1024, int(fshape[1] / block[0] / 1024 + 1))
+
+	gpu_predict(d_features, np.int32(fshape[1]), np.int32(fshape[0]),
+		d_treemap, d_nodestatus, d_xbestsplit, d_bestvar, d_nodeclass,
+		np.int32(nclass), np.int32(ntree), d_votes, np.int32(nrnodes),
+		grid=grid, block=block)
+
+
+	# Save / display results
+
+	votes = d_votes.get()
+
+	prob_image = np.float32(votes) / ntree
+
+	output_image_basename = file.replace(input_image_folder, output_folder)
+
+	# for classi in range(nclass):
+	# 	output_image_file = output_image_basename.replace(input_image_suffix, '_class{0}.png'.format(classi + 1))
+	# 	mahotas.imsave(output_image_file, np.uint8(prob_image[:,:,classi] * 255))
+
+	output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png')
+	mahotas.imsave(output_image_file, np.uint8(prob_image * 255))
+
+	win_0 = np.logical_and(prob_image[:,:,0] > prob_image[:,:,1], prob_image[:,:,0] > prob_image[:,:,2])
+	win_2 = np.logical_and(prob_image[:,:,2] > prob_image[:,:,0], prob_image[:,:,2] > prob_image[:,:,1])
+	win_1 = np.logical_not(np.logical_or(win_0, win_2))
+
+	win_image = prob_image
+	win_image[:,:,0] = win_0 * 255
+	win_image[:,:,1] = win_1 * 255
+	win_image[:,:,2] = win_2 * 255
+
+	output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png')
+	mahotas.imsave(output_image_file, np.uint8(win_image))
+
+	output_path = output_image_basename.replace(input_image_suffix, '_probabilities.hdf5');
+	temp_path = output_path + '_tmp'
+	out_hdf5 = h5py.File(temp_path, 'w')
+	# copy the probabilities for future use
+	probs_out = out_hdf5.create_dataset('probabilities',
+	                                    data = prob_image,
+	                                    chunks = (64,64,1),
+	                                    compression = 'gzip')
+	out_hdf5.close()
+
+	if os.path.exists(output_path):
+	    os.unlink(output_path)
+	os.rename(temp_path, output_path)
+
+	print '{0} done.'.format(file)
diff --git a/ClassifyMembranes/predict_pyv_randomforest.py b/ClassifyMembranes/predict_pyv_randomforest.py
new file mode 100644
index 0000000..2a14493
--- /dev/null
+++ b/ClassifyMembranes/predict_pyv_randomforest.py
@@ -0,0 +1,127 @@
+############################################################
+# GPU Implementation of Random Forest Classifier - Training
+# v0.1
+# Seymour Knowles-Barley
+############################################################
+# Based on c code from:
+# http://code.google.com/p/randomforest-matlab/
+# License: GPLv2
+############################################################
+
+import numpy as np
+import os
+import sys
+import h5py
+import glob
+import mahotas
+
+forest_file = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\rhoana_forest_3class.hdf5'
+input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\all'
+input_image_suffix = '_labeled.png'
+input_features_suffix = '.hdf5'
+output_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\output2pyv\\'
+
+NODE_TERMINAL = -1
+NODE_TOSPLIT  = -2
+NODE_INTERIOR = -3
+
+# Load the forest settings
+
+model = h5py.File(forest_file, 'r')
+
+treemap = model['/forest/treemap'][...]
+nodestatus = model['/forest/nodestatus'][...]
+xbestsplit = model['/forest/xbestsplit'][...]
+bestvar = model['/forest/bestvar'][...]
+nodeclass = model['/forest/nodeclass'][...]
+
+nrnodes = model['/forest/nrnodes'][...];
+ntree = model['/forest/ntree'][...];
+nclass = model['/forest/nclass'][...];
+
+
+files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) )
+
+print 'Found {0} images to classify.'.format(len(files))
+
+for file in files:
+	features_file = file.replace(input_image_suffix, input_features_suffix)
+
+	# Load the features
+	f = h5py.File(features_file, 'r')
+
+	nfeatures = len(f.keys())
+	image_shape = f[f.keys()[0]].shape
+	npix = image_shape[0] * image_shape[1]
+	fshape = (nfeatures, npix)
+	features = np.zeros(fshape, dtype=np.float32)
+
+	for i,k in enumerate(f.keys()):
+		features[i,:] = f[k][...].ravel()
+
+
+	# Predict
+
+	votes = np.zeros((npix, nclass), dtype=np.int32)
+
+	k = np.zeros((ntree, npix), dtype=np.int32)
+
+	alltreei = np.reshape(np.repeat(np.arange(512), npix), (ntree,npix))
+
+	(treei, pixi) = np.nonzero(nodestatus[alltreei, k] != NODE_TERMINAL)
+
+	while len(treei) > 0:
+		m = bestvar[treei, k[treei, pixi]] - 1
+		choice = 1 * (features[m, pixi] > xbestsplit[treei, k[treei, pixi]])
+		#Split by a numerical predictor
+		k[treei, pixi] = treemap[treei * 2, k[treei, pixi] * 2 + choice] - 1
+		(treei, pixi) = np.nonzero(nodestatus[alltreei, k] != NODE_TERMINAL)
+		print "{0} non terminal nodes.".format(len(treei))
+
+	#We found the terminal node: assign class label
+	#jts[chunki + treei] = nodeclass[treeOffset + k]
+	#nodex[chunki + treei] = k + 1
+	cast_votes = nodeclass[alltreei, k] - 1
+	for classi in range(nclass):
+		votes[:,classi] = np.sum(cast_votes == classi, 0)
+
+	# Save / display results
+
+	prob_image = np.reshape(np.float32(votes) / ntree, (image_shape[0], image_shape[1], nclass))
+
+	output_image_basename = file.replace(input_image_folder, output_folder)
+
+	# for classi in range(nclass):
+	# 	output_image_file = output_image_basename.replace(input_image_suffix, '_class{0}.png'.format(classi + 1))
+	# 	mahotas.imsave(output_image_file, np.uint8(prob_image[:,:,classi] * 255))
+
+	output_image_file = output_image_basename.replace(input_image_suffix, '_allclass.png')
+	mahotas.imsave(output_image_file, np.uint8(prob_image * 255))
+
+	win_0 = np.logical_and(prob_image[:,:,0] > prob_image[:,:,1], prob_image[:,:,0] > prob_image[:,:,2])
+	win_2 = np.logical_and(prob_image[:,:,2] > prob_image[:,:,0], prob_image[:,:,2] > prob_image[:,:,1])
+	win_1 = np.logical_not(np.logical_or(win_0, win_2))
+
+	win_image = prob_image
+	win_image[:,:,0] = win_0 * 255
+	win_image[:,:,1] = win_1 * 255
+	win_image[:,:,2] = win_2 * 255
+
+	output_image_file = output_image_basename.replace(input_image_suffix, '_winclass.png')
+	mahotas.imsave(output_image_file, np.uint8(win_image))
+
+	output_path = output_image_basename.replace(input_image_suffix, '_probabilities.hdf5');
+	temp_path = output_path + '_tmp'
+	out_hdf5 = h5py.File(temp_path, 'w')
+	# copy the probabilities for future use
+	probs_out = out_hdf5.create_dataset('probabilities',
+	                                    data = prob_image,
+	                                    chunks = (64,64,1),
+	                                    compression = 'gzip')
+	out_hdf5.close()
+
+	if os.path.exists(output_path):
+	    os.unlink(output_path)
+	os.rename(temp_path, output_path)
+
+	print '{0} done.'.format(file)
diff --git a/ClassifyMembranes/rf_classify.html b/ClassifyMembranes/rf_classify.html
new file mode 100644
index 0000000..cc1560e
--- /dev/null
+++ b/ClassifyMembranes/rf_classify.html
@@ -0,0 +1,664 @@
+<!-- Generated by Cython 0.19.1 on Fri Aug 02 12:09:04 2013 -->
+<html>
+
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<style type="text/css">
+
+body { font-family: courier; font-size: 12; }
+
+.code  { font-size: 9; color: #444444; display: none; margin-left: 20px; }
+.py_c_api  { color: red; }
+.py_macro_api  { color: #FF7000; }
+.pyx_c_api  { color: #FF3000; }
+.pyx_macro_api  { color: #FF7000; }
+.refnanny  { color: #FFA000; }
+
+.error_goto  { color: #FFA000; }
+
+.tag  {  }
+
+.coerce  { color: #008000; border: 1px dotted #008000 }
+
+.py_attr { color: #FF0000; font-weight: bold; }
+.c_attr  { color: #0000FF; }
+
+.py_call { color: #FF0000; font-weight: bold; }
+.c_call  { color: #0000FF; }
+
+.line { margin: 0em }
+
+</style>
+<script>
+function toggleDiv(id) {
+    theDiv = document.getElementById(id);
+    if (theDiv.style.display == 'none') theDiv.style.display = 'block';
+    else theDiv.style.display = 'none';
+}
+</script>
+</head>
+        <body>
+<p>Generated by Cython 0.19.1 on Fri Aug 02 12:09:04 2013
+<p>Raw output: <a href="rf_classify.c">rf_classify.c</a>
+<pre class='line' style='background-color: #FFFF79' onclick='toggleDiv("line1")'> 1: #cython: boundscheck=False</pre>
+<pre id='line1' class='code' style='background-color: #FFFF79'>
+  /* "rf_classify.pyx":1
+ * #cython: boundscheck=False             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * #cython: wraparound=False
+ * 
+ */
+  __pyx_t_1 = <span class='py_c_api'>PyDict_New</span>();<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(((PyObject *)__pyx_t_1));
+  if (<span class='py_c_api'>PyDict_SetItem</span>(__pyx_d, __pyx_n_s____test__, ((PyObject *)__pyx_t_1)) <code><</code> 0)<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line2")'> 2: #cython: wraparound=False</pre>
+<pre id='line2' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line3")'> 3: </pre>
+<pre id='line3' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFF8d' onclick='toggleDiv("line4")'> 4: import numpy as np</pre>
+<pre id='line4' class='code' style='background-color: #FFFF8d'>
+  /* "rf_classify.pyx":4
+ * #cython: wraparound=False
+ * 
+ * import numpy as np             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * cimport numpy as np
+ * 
+ */
+  __pyx_t_1 = <span class='pyx_c_api'>__Pyx_Import</span>(((PyObject *)__pyx_n_s__numpy), 0, -1);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  if (<span class='py_c_api'>PyDict_SetItem</span>(__pyx_d, __pyx_n_s__np, __pyx_t_1) <code><</code> 0)<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line5")'> 5: cimport numpy as np</pre>
+<pre id='line5' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line6")'> 6: </pre>
+<pre id='line6' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFF86' onclick='toggleDiv("line7")'> 7: cpdef rf_classify(model, float[:,:] features):</pre>
+<pre id='line7' class='code' style='background-color: #FFFF86'>
+/* "rf_classify.pyx":7
+ * cimport numpy as np
+ * 
+ * cpdef rf_classify(model, float[:,:] features):             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *     cdef int NODE_TERMINAL, nfeatures, npix, treei, k, m, choice, vote_class, nrnodes, ntree, nclass, pixi
+ */
+
+static PyObject *__pyx_pw_11rf_classify_1rf_classify(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static PyObject *__pyx_f_11rf_classify_rf_classify(PyObject *__pyx_v_model, __Pyx_memviewslice __pyx_v_features, CYTHON_UNUSED int __pyx_skip_dispatch) {
+  int __pyx_v_NODE_TERMINAL;
+  int __pyx_v_nfeatures;
+  int __pyx_v_npix;
+  int __pyx_v_treei;
+  int __pyx_v_k;
+  int __pyx_v_m;
+  int __pyx_v_choice;
+  int __pyx_v_vote_class;
+  CYTHON_UNUSED int __pyx_v_nrnodes;
+  int __pyx_v_ntree;
+  int __pyx_v_nclass;
+  int __pyx_v_pixi;
+  __Pyx_memviewslice __pyx_v_treemap = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_nodestatus = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_bestvar = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_nodeclass = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_votes = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_xbestsplit = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_pixel_features = { 0, 0, { 0 }, { 0 }, { 0 } };
+  PyObject *__pyx_r = NULL;
+  <span class='refnanny'>__Pyx_RefNannyDeclarations</span>
+  <span class='pyx_c_api'><span class='refnanny'>__Pyx_RefNannySetupContext</span></span>("rf_classify", 0);
+
+/* "rf_classify.pyx":7
+ * cimport numpy as np
+ * 
+ * cpdef rf_classify(model, float[:,:] features):             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *     cdef int NODE_TERMINAL, nfeatures, npix, treei, k, m, choice, vote_class, nrnodes, ntree, nclass, pixi
+ */
+
+static PyObject *__pyx_pf_11rf_classify_rf_classify(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, __Pyx_memviewslice __pyx_v_features) {
+  PyObject *__pyx_r = NULL;
+  <span class='refnanny'>__Pyx_RefNannyDeclarations</span>
+  <span class='pyx_c_api'><span class='refnanny'>__Pyx_RefNannySetupContext</span></span>("rf_classify", 0);
+  <span class='pyx_macro_api'>__Pyx_XDECREF</span>(__pyx_r);
+  if (unlikely(!__pyx_v_features.memview)) { <span class='pyx_c_api'>__Pyx_RaiseUnboundLocalError</span>("features");<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span> }
+  __pyx_t_1 = __pyx_f_11rf_classify_rf_classify(__pyx_v_model, __pyx_v_features, 0);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; <span class='pyx_macro_api'>__Pyx_INCREF</span>(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  <span class='pyx_macro_api'>__Pyx_XDECREF</span>(__pyx_t_1);
+  <span class='pyx_c_api'>__Pyx_AddTraceback</span>("rf_classify.rf_classify", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __PYX_XDEC_MEMVIEW(&__pyx_v_features, 1);
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_XGIVEREF</span></span>(__pyx_r);
+  <span class='pyx_c_api'><span class='refnanny'>__Pyx_RefNannyFinishContext</span></span>();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static CYTHON_UNUSED int __pyx_pw_5numpy_7ndarray_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/
+static CYTHON_UNUSED int __pyx_pw_5numpy_7ndarray_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) {
+  int __pyx_r;
+  <span class='refnanny'>__Pyx_RefNannyDeclarations</span>
+  <span class='pyx_c_api'><span class='refnanny'>__Pyx_RefNannySetupContext</span></span>("__getbuffer__ (wrapper)", 0);
+  __pyx_r = __pyx_pf_5numpy_7ndarray___getbuffer__(((PyArrayObject *)__pyx_v_self), ((Py_buffer *)__pyx_v_info), ((int)__pyx_v_flags));
+  <span class='pyx_c_api'><span class='refnanny'>__Pyx_RefNannyFinishContext</span></span>();
+  return __pyx_r;
+}
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line8")'> 8: </pre>
+<pre id='line8' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line9")'> 9:     cdef int NODE_TERMINAL, nfeatures, npix, treei, k, m, choice, vote_class, nrnodes, ntree, nclass, pixi</pre>
+<pre id='line9' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line10")'> 10:     cdef int [:,:] treemap, nodestatus, bestvar, nodeclass, votes</pre>
+<pre id='line10' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line11")'> 11:     cdef float [:,:] xbestsplit</pre>
+<pre id='line11' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line12")'> 12:     cdef float[:] pixel_features</pre>
+<pre id='line12' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line13")'> 13: </pre>
+<pre id='line13' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line14")'> 14:     NODE_TERMINAL = -1</pre>
+<pre id='line14' class='code' style='background-color: #FFFFff'>
+  /* "rf_classify.pyx":14
+ *     cdef float[:] pixel_features
+ * 
+ *     NODE_TERMINAL = -1             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *     #NODE_TOSPLIT  = -2
+ *     #NODE_INTERIOR = -3
+ */
+  __pyx_v_NODE_TERMINAL = -1;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line15")'> 15:     #NODE_TOSPLIT  = -2</pre>
+<pre id='line15' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line16")'> 16:     #NODE_INTERIOR = -3</pre>
+<pre id='line16' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line17")'> 17: </pre>
+<pre id='line17' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line18")'> 18:     nfeatures = features.shape[0]</pre>
+<pre id='line18' class='code' style='background-color: #FFFFff'>
+  /* "rf_classify.pyx":18
+ *     #NODE_INTERIOR = -3
+ * 
+ *     nfeatures = features.shape[0]             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *     npix = features.shape[1]
+ * 
+ */
+  __pyx_v_nfeatures = (__pyx_v_features.shape[0]);
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line19")'> 19:     npix = features.shape[1]</pre>
+<pre id='line19' class='code' style='background-color: #FFFFff'>
+  /* "rf_classify.pyx":19
+ * 
+ *     nfeatures = features.shape[0]
+ *     npix = features.shape[1]             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *     treemap = model.treemap
+ */
+  __pyx_v_npix = (__pyx_v_features.shape[1]);
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line20")'> 20: </pre>
+<pre id='line20' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFF8d' onclick='toggleDiv("line21")'> 21:     treemap = model.treemap</pre>
+<pre id='line21' class='code' style='background-color: #FFFF8d'>
+  /* "rf_classify.pyx":21
+ *     npix = features.shape[1]
+ * 
+ *     treemap = model.treemap             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *     nodestatus = model.nodestatus
+ *     xbestsplit = model.xbestsplit
+ */
+  __pyx_t_1 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_v_model, __pyx_n_s__treemap);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_2 = <span class='pyx_c_api'>__Pyx_PyObject_to_MemoryviewSlice_dsds_int</span>(__pyx_t_1);
+  if (unlikely(!__pyx_t_2.memview))<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_treemap = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+</pre><pre class='line' style='background-color: #FFFF8d' onclick='toggleDiv("line22")'> 22:     nodestatus = model.nodestatus</pre>
+<pre id='line22' class='code' style='background-color: #FFFF8d'>
+  /* "rf_classify.pyx":22
+ * 
+ *     treemap = model.treemap
+ *     nodestatus = model.nodestatus             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *     xbestsplit = model.xbestsplit
+ *     bestvar = model.bestvar
+ */
+  __pyx_t_1 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_v_model, __pyx_n_s__nodestatus);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_2 = <span class='pyx_c_api'>__Pyx_PyObject_to_MemoryviewSlice_dsds_int</span>(__pyx_t_1);
+  if (unlikely(!__pyx_t_2.memview))<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_nodestatus = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+</pre><pre class='line' style='background-color: #FFFF8d' onclick='toggleDiv("line23")'> 23:     xbestsplit = model.xbestsplit</pre>
+<pre id='line23' class='code' style='background-color: #FFFF8d'>
+  /* "rf_classify.pyx":23
+ *     treemap = model.treemap
+ *     nodestatus = model.nodestatus
+ *     xbestsplit = model.xbestsplit             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *     bestvar = model.bestvar
+ *     nodeclass = model.nodeclass
+ */
+  __pyx_t_1 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_v_model, __pyx_n_s__xbestsplit);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_3 = <span class='pyx_c_api'>__Pyx_PyObject_to_MemoryviewSlice_dsds_float</span>(__pyx_t_1);
+  if (unlikely(!__pyx_t_3.memview))<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_xbestsplit = __pyx_t_3;
+  __pyx_t_3.memview = NULL;
+  __pyx_t_3.data = NULL;
+</pre><pre class='line' style='background-color: #FFFF8d' onclick='toggleDiv("line24")'> 24:     bestvar = model.bestvar</pre>
+<pre id='line24' class='code' style='background-color: #FFFF8d'>
+  /* "rf_classify.pyx":24
+ *     nodestatus = model.nodestatus
+ *     xbestsplit = model.xbestsplit
+ *     bestvar = model.bestvar             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *     nodeclass = model.nodeclass
+ * 
+ */
+  __pyx_t_1 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_v_model, __pyx_n_s__bestvar);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_2 = <span class='pyx_c_api'>__Pyx_PyObject_to_MemoryviewSlice_dsds_int</span>(__pyx_t_1);
+  if (unlikely(!__pyx_t_2.memview))<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_bestvar = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+</pre><pre class='line' style='background-color: #FFFF8d' onclick='toggleDiv("line25")'> 25:     nodeclass = model.nodeclass</pre>
+<pre id='line25' class='code' style='background-color: #FFFF8d'>
+  /* "rf_classify.pyx":25
+ *     xbestsplit = model.xbestsplit
+ *     bestvar = model.bestvar
+ *     nodeclass = model.nodeclass             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *     nrnodes = model.nrnodes
+ */
+  __pyx_t_1 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_v_model, __pyx_n_s__nodeclass);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_2 = <span class='pyx_c_api'>__Pyx_PyObject_to_MemoryviewSlice_dsds_int</span>(__pyx_t_1);
+  if (unlikely(!__pyx_t_2.memview))<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_nodeclass = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line26")'> 26: </pre>
+<pre id='line26' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFF62' onclick='toggleDiv("line27")'> 27:     nrnodes = model.nrnodes</pre>
+<pre id='line27' class='code' style='background-color: #FFFF62'>
+  /* "rf_classify.pyx":27
+ *     nodeclass = model.nodeclass
+ * 
+ *     nrnodes = model.nrnodes             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *     ntree = model.ntree
+ *     nclass = model.nclass
+ */
+  __pyx_t_1 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_v_model, __pyx_n_s__nrnodes);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_4 = __Pyx_<span class='py_c_api'>PyInt_AsInt</span>(__pyx_t_1);<span class='error_goto'> if (unlikely((__pyx_t_4 == (int)-1) && <span class='py_c_api'>PyErr_Occurred</span>())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_nrnodes = __pyx_t_4;
+</pre><pre class='line' style='background-color: #FFFF62' onclick='toggleDiv("line28")'> 28:     ntree = model.ntree</pre>
+<pre id='line28' class='code' style='background-color: #FFFF62'>
+  /* "rf_classify.pyx":28
+ * 
+ *     nrnodes = model.nrnodes
+ *     ntree = model.ntree             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *     nclass = model.nclass
+ * 
+ */
+  __pyx_t_1 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_v_model, __pyx_n_s__ntree);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_4 = __Pyx_<span class='py_c_api'>PyInt_AsInt</span>(__pyx_t_1);<span class='error_goto'> if (unlikely((__pyx_t_4 == (int)-1) && <span class='py_c_api'>PyErr_Occurred</span>())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_ntree = __pyx_t_4;
+</pre><pre class='line' style='background-color: #FFFF62' onclick='toggleDiv("line29")'> 29:     nclass = model.nclass</pre>
+<pre id='line29' class='code' style='background-color: #FFFF62'>
+  /* "rf_classify.pyx":29
+ *     nrnodes = model.nrnodes
+ *     ntree = model.ntree
+ *     nclass = model.nclass             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *     # Predict
+ */
+  __pyx_t_1 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_v_model, __pyx_n_s__nclass);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_4 = __Pyx_<span class='py_c_api'>PyInt_AsInt</span>(__pyx_t_1);<span class='error_goto'> if (unlikely((__pyx_t_4 == (int)-1) && <span class='py_c_api'>PyErr_Occurred</span>())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_nclass = __pyx_t_4;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line30")'> 30: </pre>
+<pre id='line30' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line31")'> 31:     # Predict</pre>
+<pre id='line31' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFF23' onclick='toggleDiv("line32")'> 32:     votes = np.zeros((npix, nclass), dtype=np.int32)</pre>
+<pre id='line32' class='code' style='background-color: #FFFF23'>
+  /* "rf_classify.pyx":32
+ * 
+ *     # Predict
+ *     votes = np.zeros((npix, nclass), dtype=np.int32)             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *     pixel_features = np.zeros(nfeatures, dtype=np.float32)
+ * 
+ */
+  __pyx_t_1 = <span class='pyx_c_api'>__Pyx_GetModuleGlobalName</span>(__pyx_n_s__np);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_5 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_t_1, __pyx_n_s__zeros);<span class='error_goto'> if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_5);
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = <span class='py_c_api'>PyInt_FromLong</span>(__pyx_v_npix);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_6 = <span class='py_c_api'>PyInt_FromLong</span>(__pyx_v_nclass);<span class='error_goto'> if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_6);
+  __pyx_t_7 = <span class='py_c_api'>PyTuple_New</span>(2);<span class='error_goto'> if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_7);
+  <span class='py_macro_api'>PyTuple_SET_ITEM</span>(__pyx_t_7, 0, __pyx_t_1);
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GIVEREF</span></span>(__pyx_t_1);
+  <span class='py_macro_api'>PyTuple_SET_ITEM</span>(__pyx_t_7, 1, __pyx_t_6);
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GIVEREF</span></span>(__pyx_t_6);
+  __pyx_t_1 = 0;
+  __pyx_t_6 = 0;
+  __pyx_t_6 = <span class='py_c_api'>PyTuple_New</span>(1);<span class='error_goto'> if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_6);
+  <span class='py_macro_api'>PyTuple_SET_ITEM</span>(__pyx_t_6, 0, ((PyObject *)__pyx_t_7));
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GIVEREF</span></span>(((PyObject *)__pyx_t_7));
+  __pyx_t_7 = 0;
+  __pyx_t_7 = <span class='py_c_api'>PyDict_New</span>();<span class='error_goto'> if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(((PyObject *)__pyx_t_7));
+  __pyx_t_1 = <span class='pyx_c_api'>__Pyx_GetModuleGlobalName</span>(__pyx_n_s__np);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_8 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_t_1, __pyx_n_s__int32);<span class='error_goto'> if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_8);
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  if (<span class='py_c_api'>PyDict_SetItem</span>(__pyx_t_7, ((PyObject *)__pyx_n_s__dtype), __pyx_t_8) <code><</code> 0)<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_8); __pyx_t_8 = 0;
+  __pyx_t_8 = <span class='py_c_api'>PyObject_Call</span>(__pyx_t_5, ((PyObject *)__pyx_t_6), ((PyObject *)__pyx_t_7));<span class='error_goto'> if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_8);
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_5); __pyx_t_5 = 0;
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0;
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(((PyObject *)__pyx_t_7)); __pyx_t_7 = 0;
+  __pyx_t_2 = <span class='pyx_c_api'>__Pyx_PyObject_to_MemoryviewSlice_dsds_int</span>(__pyx_t_8);
+  if (unlikely(!__pyx_t_2.memview))<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_8); __pyx_t_8 = 0;
+  __pyx_v_votes = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+</pre><pre class='line' style='background-color: #FFFF2b' onclick='toggleDiv("line33")'> 33:     pixel_features = np.zeros(nfeatures, dtype=np.float32)</pre>
+<pre id='line33' class='code' style='background-color: #FFFF2b'>
+  /* "rf_classify.pyx":33
+ *     # Predict
+ *     votes = np.zeros((npix, nclass), dtype=np.int32)
+ *     pixel_features = np.zeros(nfeatures, dtype=np.float32)             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *     for pixi in range(npix):
+ */
+  __pyx_t_8 = <span class='pyx_c_api'>__Pyx_GetModuleGlobalName</span>(__pyx_n_s__np);<span class='error_goto'> if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_8);
+  __pyx_t_7 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_t_8, __pyx_n_s__zeros);<span class='error_goto'> if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_7);
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_8); __pyx_t_8 = 0;
+  __pyx_t_8 = <span class='py_c_api'>PyInt_FromLong</span>(__pyx_v_nfeatures);<span class='error_goto'> if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_8);
+  __pyx_t_6 = <span class='py_c_api'>PyTuple_New</span>(1);<span class='error_goto'> if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_6);
+  <span class='py_macro_api'>PyTuple_SET_ITEM</span>(__pyx_t_6, 0, __pyx_t_8);
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GIVEREF</span></span>(__pyx_t_8);
+  __pyx_t_8 = 0;
+  __pyx_t_8 = <span class='py_c_api'>PyDict_New</span>();<span class='error_goto'> if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(((PyObject *)__pyx_t_8));
+  __pyx_t_5 = <span class='pyx_c_api'>__Pyx_GetModuleGlobalName</span>(__pyx_n_s__np);<span class='error_goto'> if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_5);
+  __pyx_t_1 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_t_5, __pyx_n_s__float32);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_5); __pyx_t_5 = 0;
+  if (<span class='py_c_api'>PyDict_SetItem</span>(__pyx_t_8, ((PyObject *)__pyx_n_s__dtype), __pyx_t_1) <code><</code> 0)<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = <span class='py_c_api'>PyObject_Call</span>(__pyx_t_7, ((PyObject *)__pyx_t_6), ((PyObject *)__pyx_t_8));<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_7); __pyx_t_7 = 0;
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0;
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(((PyObject *)__pyx_t_8)); __pyx_t_8 = 0;
+  __pyx_t_9 = <span class='pyx_c_api'>__Pyx_PyObject_to_MemoryviewSlice_ds_float</span>(__pyx_t_1);
+  if (unlikely(!__pyx_t_9.memview))<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_pixel_features = __pyx_t_9;
+  __pyx_t_9.memview = NULL;
+  __pyx_t_9.data = NULL;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line34")'> 34: </pre>
+<pre id='line34' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line35")'> 35:     for pixi in range(npix):</pre>
+<pre id='line35' class='code' style='background-color: #FFFFff'>
+  /* "rf_classify.pyx":35
+ *     pixel_features = np.zeros(nfeatures, dtype=np.float32)
+ * 
+ *     for pixi in range(npix):             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *         pixel_features[...] = features[:,pixi]
+ */
+  __pyx_t_4 = __pyx_v_npix;
+  for (__pyx_t_10 = 0; __pyx_t_10 <code><</code> __pyx_t_4; __pyx_t_10+=1) {
+    __pyx_v_pixi = __pyx_t_10;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line36")'> 36: </pre>
+<pre id='line36' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFaa' onclick='toggleDiv("line37")'> 37:         pixel_features[...] = features[:,pixi]</pre>
+<pre id='line37' class='code' style='background-color: #FFFFaa'>
+    /* "rf_classify.pyx":37
+ *     for pixi in range(npix):
+ * 
+ *         pixel_features[...] = features[:,pixi]             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *         for treei in range(ntree):
+ */
+    __pyx_t_12 = -1;
+    __pyx_t_11.data = __pyx_v_features.data;
+    __pyx_t_11.memview = __pyx_v_features.memview;
+    __PYX_INC_MEMVIEW(&__pyx_t_11, 0);
+    __pyx_t_11.shape[0] = __pyx_v_features.shape[0];
+__pyx_t_11.strides[0] = __pyx_v_features.strides[0];
+    __pyx_t_11.suboffsets[0] = -1;
+
+{
+    Py_ssize_t __pyx_tmp_idx = __pyx_v_pixi;
+    Py_ssize_t __pyx_tmp_shape = __pyx_v_features.shape[1];
+    Py_ssize_t __pyx_tmp_stride = __pyx_v_features.strides[1];
+    if (0 && (__pyx_tmp_idx <code><</code> 0))
+        __pyx_tmp_idx += __pyx_tmp_shape;
+    if (0 && (__pyx_tmp_idx <code><</code> 0 || __pyx_tmp_idx >= __pyx_tmp_shape)) {
+        <span class='py_c_api'>PyErr_SetString</span>(PyExc_IndexError, "Index out of bounds (axis 1)");
+       <span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+    }
+        __pyx_t_11.data += __pyx_tmp_idx * __pyx_tmp_stride;
+}
+
+if (unlikely(__pyx_memoryview_copy_contents(__pyx_t_11, __pyx_v_pixel_features, 1, 1, 0) <code><</code> 0))<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+    __PYX_XDEC_MEMVIEW(&__pyx_t_11, 1);
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line38")'> 38: </pre>
+<pre id='line38' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line39")'> 39:         for treei in range(ntree):</pre>
+<pre id='line39' class='code' style='background-color: #FFFFff'>
+    /* "rf_classify.pyx":39
+ *         pixel_features[...] = features[:,pixi]
+ * 
+ *         for treei in range(ntree):             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *             k = 0
+ */
+    __pyx_t_12 = __pyx_v_ntree;
+    for (__pyx_t_13 = 0; __pyx_t_13 <code><</code> __pyx_t_12; __pyx_t_13+=1) {
+      __pyx_v_treei = __pyx_t_13;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line40")'> 40: </pre>
+<pre id='line40' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line41")'> 41:             k = 0</pre>
+<pre id='line41' class='code' style='background-color: #FFFFff'>
+      /* "rf_classify.pyx":41
+ *         for treei in range(ntree):
+ * 
+ *             k = 0             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *             while nodestatus[treei, k] != NODE_TERMINAL:
+ *                 m = bestvar[treei, k] - 1
+ */
+      __pyx_v_k = 0;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line42")'> 42:             while nodestatus[treei, k] != NODE_TERMINAL:</pre>
+<pre id='line42' class='code' style='background-color: #FFFFff'>
+      /* "rf_classify.pyx":42
+ * 
+ *             k = 0
+ *             while nodestatus[treei, k] != NODE_TERMINAL:             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *                 m = bestvar[treei, k] - 1
+ *                 #Split by a numerical predictor
+ */
+      while (1) {
+        __pyx_t_14 = __pyx_v_treei;
+        __pyx_t_15 = __pyx_v_k;
+        __pyx_t_16 = (((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_nodestatus.data + __pyx_t_14 * __pyx_v_nodestatus.strides[0]) ) + __pyx_t_15 * __pyx_v_nodestatus.strides[1]) ))) != __pyx_v_NODE_TERMINAL) != 0);
+        if (!__pyx_t_16) break;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line43")'> 43:                 m = bestvar[treei, k] - 1</pre>
+<pre id='line43' class='code' style='background-color: #FFFFff'>
+        /* "rf_classify.pyx":43
+ *             k = 0
+ *             while nodestatus[treei, k] != NODE_TERMINAL:
+ *                 m = bestvar[treei, k] - 1             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *                 #Split by a numerical predictor
+ *                 choice = 1 * (pixel_features[m] > xbestsplit[treei, k])
+ */
+        __pyx_t_17 = __pyx_v_treei;
+        __pyx_t_18 = __pyx_v_k;
+        __pyx_v_m = ((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_bestvar.data + __pyx_t_17 * __pyx_v_bestvar.strides[0]) ) + __pyx_t_18 * __pyx_v_bestvar.strides[1]) ))) - 1);
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line44")'> 44:                 #Split by a numerical predictor</pre>
+<pre id='line44' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line45")'> 45:                 choice = 1 * (pixel_features[m] &gt; xbestsplit[treei, k])</pre>
+<pre id='line45' class='code' style='background-color: #FFFFff'>
+        /* "rf_classify.pyx":45
+ *                 m = bestvar[treei, k] - 1
+ *                 #Split by a numerical predictor
+ *                 choice = 1 * (pixel_features[m] > xbestsplit[treei, k])             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *                 k = treemap[treei * 2, k * 2 + choice] - 1
+ * 
+ */
+        __pyx_t_19 = __pyx_v_m;
+        __pyx_t_20 = __pyx_v_treei;
+        __pyx_t_21 = __pyx_v_k;
+        __pyx_v_choice = (1 * ((*((float *) ( /* dim=0 */ (__pyx_v_pixel_features.data + __pyx_t_19 * __pyx_v_pixel_features.strides[0]) ))) > (*((float *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_xbestsplit.data + __pyx_t_20 * __pyx_v_xbestsplit.strides[0]) ) + __pyx_t_21 * __pyx_v_xbestsplit.strides[1]) )))));
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line46")'> 46:                 k = treemap[treei * 2, k * 2 + choice] - 1</pre>
+<pre id='line46' class='code' style='background-color: #FFFFff'>
+        /* "rf_classify.pyx":46
+ *                 #Split by a numerical predictor
+ *                 choice = 1 * (pixel_features[m] > xbestsplit[treei, k])
+ *                 k = treemap[treei * 2, k * 2 + choice] - 1             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *             #We found the terminal node: assign class label
+ */
+        __pyx_t_22 = (__pyx_v_treei * 2);
+        __pyx_t_23 = ((__pyx_v_k * 2) + __pyx_v_choice);
+        __pyx_v_k = ((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_treemap.data + __pyx_t_22 * __pyx_v_treemap.strides[0]) ) + __pyx_t_23 * __pyx_v_treemap.strides[1]) ))) - 1);
+      }
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line47")'> 47: </pre>
+<pre id='line47' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line48")'> 48:             #We found the terminal node: assign class label</pre>
+<pre id='line48' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line49")'> 49:             vote_class = nodeclass[treei, k] - 1</pre>
+<pre id='line49' class='code' style='background-color: #FFFFff'>
+      /* "rf_classify.pyx":49
+ * 
+ *             #We found the terminal node: assign class label
+ *             vote_class = nodeclass[treei, k] - 1             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *             votes[pixi,vote_class] = votes[pixi,vote_class] + 1
+ * 
+ */
+      __pyx_t_24 = __pyx_v_treei;
+      __pyx_t_25 = __pyx_v_k;
+      __pyx_v_vote_class = ((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_nodeclass.data + __pyx_t_24 * __pyx_v_nodeclass.strides[0]) ) + __pyx_t_25 * __pyx_v_nodeclass.strides[1]) ))) - 1);
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line50")'> 50:             votes[pixi,vote_class] = votes[pixi,vote_class] + 1</pre>
+<pre id='line50' class='code' style='background-color: #FFFFff'>
+      /* "rf_classify.pyx":50
+ *             #We found the terminal node: assign class label
+ *             vote_class = nodeclass[treei, k] - 1
+ *             votes[pixi,vote_class] = votes[pixi,vote_class] + 1             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *     return votes
+ */
+      __pyx_t_26 = __pyx_v_pixi;
+      __pyx_t_27 = __pyx_v_vote_class;
+      __pyx_t_28 = __pyx_v_pixi;
+      __pyx_t_29 = __pyx_v_vote_class;
+      *((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_votes.data + __pyx_t_28 * __pyx_v_votes.strides[0]) ) + __pyx_t_29 * __pyx_v_votes.strides[1]) )) = ((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_votes.data + __pyx_t_26 * __pyx_v_votes.strides[0]) ) + __pyx_t_27 * __pyx_v_votes.strides[1]) ))) + 1);
+    }
+  }
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line51")'> 51: </pre>
+<pre id='line51' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFF2f' onclick='toggleDiv("line52")'> 52:     return votes</pre>
+<pre id='line52' class='code' style='background-color: #FFFF2f'>
+  /* "rf_classify.pyx":52
+ *             votes[pixi,vote_class] = votes[pixi,vote_class] + 1
+ * 
+ *     return votes             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ */
+  <span class='pyx_macro_api'>__Pyx_XDECREF</span>(__pyx_r);
+  __pyx_t_1 = __pyx_memoryview_fromslice(__pyx_v_votes, 2, (PyObject *(*)(char *)) __pyx_memview_get_int, (int (*)(char *, PyObject *)) __pyx_memview_set_int, 0);;<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; <span class='pyx_macro_api'>__Pyx_INCREF</span>(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  <span class='pyx_macro_api'>__Pyx_XDECREF</span>(__pyx_t_1);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_2, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_3, 1);
+  <span class='pyx_macro_api'>__Pyx_XDECREF</span>(__pyx_t_5);
+  <span class='pyx_macro_api'>__Pyx_XDECREF</span>(__pyx_t_6);
+  <span class='pyx_macro_api'>__Pyx_XDECREF</span>(__pyx_t_7);
+  <span class='pyx_macro_api'>__Pyx_XDECREF</span>(__pyx_t_8);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_9, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_11, 1);
+  <span class='pyx_c_api'>__Pyx_AddTraceback</span>("rf_classify.rf_classify", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __PYX_XDEC_MEMVIEW(&__pyx_v_treemap, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_nodestatus, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_bestvar, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_nodeclass, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_votes, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_xbestsplit, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_pixel_features, 1);
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_XGIVEREF</span></span>(__pyx_r);
+  <span class='pyx_c_api'><span class='refnanny'>__Pyx_RefNannyFinishContext</span></span>();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_11rf_classify_1rf_classify(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static PyObject *__pyx_pw_11rf_classify_1rf_classify(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+  PyObject *__pyx_v_model = 0;
+  __Pyx_memviewslice __pyx_v_features = { 0, 0, { 0 }, { 0 }, { 0 } };
+  PyObject *__pyx_r = 0;
+  <span class='refnanny'>__Pyx_RefNannyDeclarations</span>
+  <span class='pyx_c_api'><span class='refnanny'>__Pyx_RefNannySetupContext</span></span>("rf_classify (wrapper)", 0);
+  {
+    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__model,&__pyx_n_s__features,0};
+    PyObject* values[2] = {0,0};
+    if (unlikely(__pyx_kwds)) {
+      Py_ssize_t kw_args;
+      const Py_ssize_t pos_args = <span class='py_macro_api'>PyTuple_GET_SIZE</span>(__pyx_args);
+      switch (pos_args) {
+        case  2: values[1] = <span class='py_macro_api'>PyTuple_GET_ITEM</span>(__pyx_args, 1);
+        case  1: values[0] = <span class='py_macro_api'>PyTuple_GET_ITEM</span>(__pyx_args, 0);
+        case  0: break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+      kw_args = <span class='py_c_api'>PyDict_Size</span>(__pyx_kwds);
+      switch (pos_args) {
+        case  0:
+        if (likely((values[0] = <span class='py_c_api'>PyDict_GetItem</span>(__pyx_kwds, __pyx_n_s__model)) != 0)) kw_args--;
+        else goto __pyx_L5_argtuple_error;
+        case  1:
+        if (likely((values[1] = <span class='py_c_api'>PyDict_GetItem</span>(__pyx_kwds, __pyx_n_s__features)) != 0)) kw_args--;
+        else {
+          <span class='pyx_c_api'>__Pyx_RaiseArgtupleInvalid</span>("rf_classify", 1, 2, 2, 1);<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L3_error;}</span>
+        }
+      }
+      if (unlikely(kw_args > 0)) {
+        if (unlikely(<span class='pyx_c_api'>__Pyx_ParseOptionalKeywords</span>(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "rf_classify") <code><</code> 0))<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L3_error;}</span>
+      }
+    } else if (<span class='py_macro_api'>PyTuple_GET_SIZE</span>(__pyx_args) != 2) {
+      goto __pyx_L5_argtuple_error;
+    } else {
+      values[0] = <span class='py_macro_api'>PyTuple_GET_ITEM</span>(__pyx_args, 0);
+      values[1] = <span class='py_macro_api'>PyTuple_GET_ITEM</span>(__pyx_args, 1);
+    }
+    __pyx_v_model = values[0];
+    __pyx_v_features = <span class='pyx_c_api'>__Pyx_PyObject_to_MemoryviewSlice_dsds_float</span>(values[1]);<span class='error_goto'> if (unlikely(!__pyx_v_features.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L3_error;}</span>
+  }
+  goto __pyx_L4_argument_unpacking_done;
+  __pyx_L5_argtuple_error:;
+  <span class='pyx_c_api'>__Pyx_RaiseArgtupleInvalid</span>("rf_classify", 1, 2, 2, <span class='py_macro_api'>PyTuple_GET_SIZE</span>(__pyx_args));<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L3_error;}</span>
+  __pyx_L3_error:;
+  <span class='pyx_c_api'>__Pyx_AddTraceback</span>("rf_classify.rf_classify", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  <span class='pyx_c_api'><span class='refnanny'>__Pyx_RefNannyFinishContext</span></span>();
+  return NULL;
+  __pyx_L4_argument_unpacking_done:;
+  __pyx_r = __pyx_pf_11rf_classify_rf_classify(__pyx_self, __pyx_v_model, __pyx_v_features);
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  <span class='pyx_c_api'><span class='refnanny'>__Pyx_RefNannyFinishContext</span></span>();
+  return __pyx_r;
+}
+</pre></body></html>
diff --git a/ClassifyMembranes/rf_classify_parallel.html b/ClassifyMembranes/rf_classify_parallel.html
new file mode 100644
index 0000000..6610e14
--- /dev/null
+++ b/ClassifyMembranes/rf_classify_parallel.html
@@ -0,0 +1,730 @@
+<!-- Generated by Cython 0.19.1 on Fri Aug 02 15:04:08 2013 -->
+<html>
+
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<style type="text/css">
+
+body { font-family: courier; font-size: 12; }
+
+.code  { font-size: 9; color: #444444; display: none; margin-left: 20px; }
+.py_c_api  { color: red; }
+.py_macro_api  { color: #FF7000; }
+.pyx_c_api  { color: #FF3000; }
+.pyx_macro_api  { color: #FF7000; }
+.refnanny  { color: #FFA000; }
+
+.error_goto  { color: #FFA000; }
+
+.tag  {  }
+
+.coerce  { color: #008000; border: 1px dotted #008000 }
+
+.py_attr { color: #FF0000; font-weight: bold; }
+.c_attr  { color: #0000FF; }
+
+.py_call { color: #FF0000; font-weight: bold; }
+.c_call  { color: #0000FF; }
+
+.line { margin: 0em }
+
+</style>
+<script>
+function toggleDiv(id) {
+    theDiv = document.getElementById(id);
+    if (theDiv.style.display == 'none') theDiv.style.display = 'block';
+    else theDiv.style.display = 'none';
+}
+</script>
+</head>
+        <body>
+<p>Generated by Cython 0.19.1 on Fri Aug 02 15:04:08 2013
+<p>Raw output: <a href="rf_classify_parallel.c">rf_classify_parallel.c</a>
+<pre class='line' style='background-color: #FFFF79' onclick='toggleDiv("line1")'> 1: #cython: boundscheck=False</pre>
+<pre id='line1' class='code' style='background-color: #FFFF79'>
+  /* "rf_classify_parallel.pyx":1
+ * #cython: boundscheck=False             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * #cython: wraparound=False
+ * 
+ */
+  __pyx_t_1 = <span class='py_c_api'>PyDict_New</span>();<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(((PyObject *)__pyx_t_1));
+  if (<span class='py_c_api'>PyDict_SetItem</span>(__pyx_d, __pyx_n_s____test__, ((PyObject *)__pyx_t_1)) <code><</code> 0)<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line2")'> 2: #cython: wraparound=False</pre>
+<pre id='line2' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line3")'> 3: </pre>
+<pre id='line3' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFF8d' onclick='toggleDiv("line4")'> 4: import numpy as np</pre>
+<pre id='line4' class='code' style='background-color: #FFFF8d'>
+  /* "rf_classify_parallel.pyx":4
+ * #cython: wraparound=False
+ * 
+ * import numpy as np             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * cimport numpy as np
+ * from cython.parallel cimport prange
+ */
+  __pyx_t_1 = <span class='pyx_c_api'>__Pyx_Import</span>(((PyObject *)__pyx_n_s__numpy), 0, -1);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  if (<span class='py_c_api'>PyDict_SetItem</span>(__pyx_d, __pyx_n_s__np, __pyx_t_1) <code><</code> 0)<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line5")'> 5: cimport numpy as np</pre>
+<pre id='line5' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line6")'> 6: from cython.parallel cimport prange</pre>
+<pre id='line6' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line7")'> 7: </pre>
+<pre id='line7' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFF86' onclick='toggleDiv("line8")'> 8: cpdef rf_classify(model, float[:,:] features):</pre>
+<pre id='line8' class='code' style='background-color: #FFFF86'>
+/* "rf_classify_parallel.pyx":8
+ * from cython.parallel cimport prange
+ * 
+ * cpdef rf_classify(model, float[:,:] features):             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *     cdef int NODE_TERMINAL, nfeatures, npix, treei, k, m, choice, vote_class, nrnodes, ntree, nclass, pixi
+ */
+
+static PyObject *__pyx_pw_20rf_classify_parallel_1rf_classify(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static PyObject *__pyx_f_20rf_classify_parallel_rf_classify(PyObject *__pyx_v_model, __Pyx_memviewslice __pyx_v_features, CYTHON_UNUSED int __pyx_skip_dispatch) {
+  int __pyx_v_NODE_TERMINAL;
+  int __pyx_v_nfeatures;
+  int __pyx_v_npix;
+  int __pyx_v_treei;
+  int __pyx_v_k;
+  int __pyx_v_m;
+  int __pyx_v_choice;
+  int __pyx_v_vote_class;
+  CYTHON_UNUSED int __pyx_v_nrnodes;
+  CYTHON_UNUSED int __pyx_v_ntree;
+  int __pyx_v_nclass;
+  int __pyx_v_pixi;
+  __Pyx_memviewslice __pyx_v_treemap = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_nodestatus = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_bestvar = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_nodeclass = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_votes = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_xbestsplit = { 0, 0, { 0 }, { 0 }, { 0 } };
+  __Pyx_memviewslice __pyx_v_pixel_features = { 0, 0, { 0 }, { 0 }, { 0 } };
+  PyObject *__pyx_r = NULL;
+  <span class='refnanny'>__Pyx_RefNannyDeclarations</span>
+  <span class='pyx_c_api'><span class='refnanny'>__Pyx_RefNannySetupContext</span></span>("rf_classify", 0);
+
+/* "rf_classify_parallel.pyx":8
+ * from cython.parallel cimport prange
+ * 
+ * cpdef rf_classify(model, float[:,:] features):             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *     cdef int NODE_TERMINAL, nfeatures, npix, treei, k, m, choice, vote_class, nrnodes, ntree, nclass, pixi
+ */
+
+static PyObject *__pyx_pf_20rf_classify_parallel_rf_classify(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, __Pyx_memviewslice __pyx_v_features) {
+  PyObject *__pyx_r = NULL;
+  <span class='refnanny'>__Pyx_RefNannyDeclarations</span>
+  <span class='pyx_c_api'><span class='refnanny'>__Pyx_RefNannySetupContext</span></span>("rf_classify", 0);
+  <span class='pyx_macro_api'>__Pyx_XDECREF</span>(__pyx_r);
+  if (unlikely(!__pyx_v_features.memview)) { <span class='pyx_c_api'>__Pyx_RaiseUnboundLocalError</span>("features");<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span> }
+  __pyx_t_1 = __pyx_f_20rf_classify_parallel_rf_classify(__pyx_v_model, __pyx_v_features, 0);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; <span class='pyx_macro_api'>__Pyx_INCREF</span>(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  <span class='pyx_macro_api'>__Pyx_XDECREF</span>(__pyx_t_1);
+  <span class='pyx_c_api'>__Pyx_AddTraceback</span>("rf_classify_parallel.rf_classify", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __PYX_XDEC_MEMVIEW(&__pyx_v_features, 1);
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_XGIVEREF</span></span>(__pyx_r);
+  <span class='pyx_c_api'><span class='refnanny'>__Pyx_RefNannyFinishContext</span></span>();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static CYTHON_UNUSED int __pyx_pw_5numpy_7ndarray_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/
+static CYTHON_UNUSED int __pyx_pw_5numpy_7ndarray_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) {
+  int __pyx_r;
+  <span class='refnanny'>__Pyx_RefNannyDeclarations</span>
+  <span class='pyx_c_api'><span class='refnanny'>__Pyx_RefNannySetupContext</span></span>("__getbuffer__ (wrapper)", 0);
+  __pyx_r = __pyx_pf_5numpy_7ndarray___getbuffer__(((PyArrayObject *)__pyx_v_self), ((Py_buffer *)__pyx_v_info), ((int)__pyx_v_flags));
+  <span class='pyx_c_api'><span class='refnanny'>__Pyx_RefNannyFinishContext</span></span>();
+  return __pyx_r;
+}
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line9")'> 9: </pre>
+<pre id='line9' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line10")'> 10:     cdef int NODE_TERMINAL, nfeatures, npix, treei, k, m, choice, vote_class, nrnodes, ntree, nclass, pixi</pre>
+<pre id='line10' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line11")'> 11:     cdef int [:,:] treemap, nodestatus, bestvar, nodeclass, votes</pre>
+<pre id='line11' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line12")'> 12:     cdef float [:,:] xbestsplit</pre>
+<pre id='line12' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line13")'> 13:     cdef float[:] pixel_features</pre>
+<pre id='line13' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line14")'> 14: </pre>
+<pre id='line14' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line15")'> 15:     NODE_TERMINAL = -1</pre>
+<pre id='line15' class='code' style='background-color: #FFFFff'>
+  /* "rf_classify_parallel.pyx":15
+ *     cdef float[:] pixel_features
+ * 
+ *     NODE_TERMINAL = -1             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *     #NODE_TOSPLIT  = -2
+ *     #NODE_INTERIOR = -3
+ */
+  __pyx_v_NODE_TERMINAL = -1;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line16")'> 16:     #NODE_TOSPLIT  = -2</pre>
+<pre id='line16' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line17")'> 17:     #NODE_INTERIOR = -3</pre>
+<pre id='line17' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line18")'> 18: </pre>
+<pre id='line18' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line19")'> 19:     nfeatures = features.shape[0]</pre>
+<pre id='line19' class='code' style='background-color: #FFFFff'>
+  /* "rf_classify_parallel.pyx":19
+ *     #NODE_INTERIOR = -3
+ * 
+ *     nfeatures = features.shape[0]             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *     npix = features.shape[1]
+ * 
+ */
+  __pyx_v_nfeatures = (__pyx_v_features.shape[0]);
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line20")'> 20:     npix = features.shape[1]</pre>
+<pre id='line20' class='code' style='background-color: #FFFFff'>
+  /* "rf_classify_parallel.pyx":20
+ * 
+ *     nfeatures = features.shape[0]
+ *     npix = features.shape[1]             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *     treemap = model.treemap
+ */
+  __pyx_v_npix = (__pyx_v_features.shape[1]);
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line21")'> 21: </pre>
+<pre id='line21' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFF8d' onclick='toggleDiv("line22")'> 22:     treemap = model.treemap</pre>
+<pre id='line22' class='code' style='background-color: #FFFF8d'>
+  /* "rf_classify_parallel.pyx":22
+ *     npix = features.shape[1]
+ * 
+ *     treemap = model.treemap             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *     nodestatus = model.nodestatus
+ *     xbestsplit = model.xbestsplit
+ */
+  __pyx_t_1 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_v_model, __pyx_n_s__treemap);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_2 = <span class='pyx_c_api'>__Pyx_PyObject_to_MemoryviewSlice_dsds_int</span>(__pyx_t_1);
+  if (unlikely(!__pyx_t_2.memview))<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_treemap = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+</pre><pre class='line' style='background-color: #FFFF8d' onclick='toggleDiv("line23")'> 23:     nodestatus = model.nodestatus</pre>
+<pre id='line23' class='code' style='background-color: #FFFF8d'>
+  /* "rf_classify_parallel.pyx":23
+ * 
+ *     treemap = model.treemap
+ *     nodestatus = model.nodestatus             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *     xbestsplit = model.xbestsplit
+ *     bestvar = model.bestvar
+ */
+  __pyx_t_1 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_v_model, __pyx_n_s__nodestatus);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_2 = <span class='pyx_c_api'>__Pyx_PyObject_to_MemoryviewSlice_dsds_int</span>(__pyx_t_1);
+  if (unlikely(!__pyx_t_2.memview))<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_nodestatus = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+</pre><pre class='line' style='background-color: #FFFF8d' onclick='toggleDiv("line24")'> 24:     xbestsplit = model.xbestsplit</pre>
+<pre id='line24' class='code' style='background-color: #FFFF8d'>
+  /* "rf_classify_parallel.pyx":24
+ *     treemap = model.treemap
+ *     nodestatus = model.nodestatus
+ *     xbestsplit = model.xbestsplit             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *     bestvar = model.bestvar
+ *     nodeclass = model.nodeclass
+ */
+  __pyx_t_1 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_v_model, __pyx_n_s__xbestsplit);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_3 = <span class='pyx_c_api'>__Pyx_PyObject_to_MemoryviewSlice_dsds_float</span>(__pyx_t_1);
+  if (unlikely(!__pyx_t_3.memview))<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_xbestsplit = __pyx_t_3;
+  __pyx_t_3.memview = NULL;
+  __pyx_t_3.data = NULL;
+</pre><pre class='line' style='background-color: #FFFF8d' onclick='toggleDiv("line25")'> 25:     bestvar = model.bestvar</pre>
+<pre id='line25' class='code' style='background-color: #FFFF8d'>
+  /* "rf_classify_parallel.pyx":25
+ *     nodestatus = model.nodestatus
+ *     xbestsplit = model.xbestsplit
+ *     bestvar = model.bestvar             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *     nodeclass = model.nodeclass
+ * 
+ */
+  __pyx_t_1 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_v_model, __pyx_n_s__bestvar);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_2 = <span class='pyx_c_api'>__Pyx_PyObject_to_MemoryviewSlice_dsds_int</span>(__pyx_t_1);
+  if (unlikely(!__pyx_t_2.memview))<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_bestvar = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+</pre><pre class='line' style='background-color: #FFFF8d' onclick='toggleDiv("line26")'> 26:     nodeclass = model.nodeclass</pre>
+<pre id='line26' class='code' style='background-color: #FFFF8d'>
+  /* "rf_classify_parallel.pyx":26
+ *     xbestsplit = model.xbestsplit
+ *     bestvar = model.bestvar
+ *     nodeclass = model.nodeclass             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *     nrnodes = model.nrnodes
+ */
+  __pyx_t_1 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_v_model, __pyx_n_s__nodeclass);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_2 = <span class='pyx_c_api'>__Pyx_PyObject_to_MemoryviewSlice_dsds_int</span>(__pyx_t_1);
+  if (unlikely(!__pyx_t_2.memview))<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_nodeclass = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line27")'> 27: </pre>
+<pre id='line27' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFF62' onclick='toggleDiv("line28")'> 28:     nrnodes = model.nrnodes</pre>
+<pre id='line28' class='code' style='background-color: #FFFF62'>
+  /* "rf_classify_parallel.pyx":28
+ *     nodeclass = model.nodeclass
+ * 
+ *     nrnodes = model.nrnodes             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *     ntree = model.ntree
+ *     nclass = model.nclass
+ */
+  __pyx_t_1 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_v_model, __pyx_n_s__nrnodes);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_4 = __Pyx_<span class='py_c_api'>PyInt_AsInt</span>(__pyx_t_1);<span class='error_goto'> if (unlikely((__pyx_t_4 == (int)-1) && <span class='py_c_api'>PyErr_Occurred</span>())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_nrnodes = __pyx_t_4;
+</pre><pre class='line' style='background-color: #FFFF62' onclick='toggleDiv("line29")'> 29:     ntree = model.ntree</pre>
+<pre id='line29' class='code' style='background-color: #FFFF62'>
+  /* "rf_classify_parallel.pyx":29
+ * 
+ *     nrnodes = model.nrnodes
+ *     ntree = model.ntree             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *     nclass = model.nclass
+ * 
+ */
+  __pyx_t_1 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_v_model, __pyx_n_s__ntree);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_4 = __Pyx_<span class='py_c_api'>PyInt_AsInt</span>(__pyx_t_1);<span class='error_goto'> if (unlikely((__pyx_t_4 == (int)-1) && <span class='py_c_api'>PyErr_Occurred</span>())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_ntree = __pyx_t_4;
+</pre><pre class='line' style='background-color: #FFFF62' onclick='toggleDiv("line30")'> 30:     nclass = model.nclass</pre>
+<pre id='line30' class='code' style='background-color: #FFFF62'>
+  /* "rf_classify_parallel.pyx":30
+ *     nrnodes = model.nrnodes
+ *     ntree = model.ntree
+ *     nclass = model.nclass             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *     # Predict
+ */
+  __pyx_t_1 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_v_model, __pyx_n_s__nclass);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_4 = __Pyx_<span class='py_c_api'>PyInt_AsInt</span>(__pyx_t_1);<span class='error_goto'> if (unlikely((__pyx_t_4 == (int)-1) && <span class='py_c_api'>PyErr_Occurred</span>())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_nclass = __pyx_t_4;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line31")'> 31: </pre>
+<pre id='line31' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line32")'> 32:     # Predict</pre>
+<pre id='line32' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFF23' onclick='toggleDiv("line33")'> 33:     votes = np.zeros((npix, nclass), dtype=np.int32)</pre>
+<pre id='line33' class='code' style='background-color: #FFFF23'>
+  /* "rf_classify_parallel.pyx":33
+ * 
+ *     # Predict
+ *     votes = np.zeros((npix, nclass), dtype=np.int32)             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *     pixel_features = np.zeros(nfeatures, dtype=np.float32)
+ * 
+ */
+  __pyx_t_1 = <span class='pyx_c_api'>__Pyx_GetModuleGlobalName</span>(__pyx_n_s__np);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_5 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_t_1, __pyx_n_s__zeros);<span class='error_goto'> if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_5);
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = <span class='py_c_api'>PyInt_FromLong</span>(__pyx_v_npix);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_6 = <span class='py_c_api'>PyInt_FromLong</span>(__pyx_v_nclass);<span class='error_goto'> if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_6);
+  __pyx_t_7 = <span class='py_c_api'>PyTuple_New</span>(2);<span class='error_goto'> if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_7);
+  <span class='py_macro_api'>PyTuple_SET_ITEM</span>(__pyx_t_7, 0, __pyx_t_1);
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GIVEREF</span></span>(__pyx_t_1);
+  <span class='py_macro_api'>PyTuple_SET_ITEM</span>(__pyx_t_7, 1, __pyx_t_6);
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GIVEREF</span></span>(__pyx_t_6);
+  __pyx_t_1 = 0;
+  __pyx_t_6 = 0;
+  __pyx_t_6 = <span class='py_c_api'>PyTuple_New</span>(1);<span class='error_goto'> if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_6);
+  <span class='py_macro_api'>PyTuple_SET_ITEM</span>(__pyx_t_6, 0, ((PyObject *)__pyx_t_7));
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GIVEREF</span></span>(((PyObject *)__pyx_t_7));
+  __pyx_t_7 = 0;
+  __pyx_t_7 = <span class='py_c_api'>PyDict_New</span>();<span class='error_goto'> if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(((PyObject *)__pyx_t_7));
+  __pyx_t_1 = <span class='pyx_c_api'>__Pyx_GetModuleGlobalName</span>(__pyx_n_s__np);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_t_8 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_t_1, __pyx_n_s__int32);<span class='error_goto'> if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_8);
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  if (<span class='py_c_api'>PyDict_SetItem</span>(__pyx_t_7, ((PyObject *)__pyx_n_s__dtype), __pyx_t_8) <code><</code> 0)<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_8); __pyx_t_8 = 0;
+  __pyx_t_8 = <span class='py_c_api'>PyObject_Call</span>(__pyx_t_5, ((PyObject *)__pyx_t_6), ((PyObject *)__pyx_t_7));<span class='error_goto'> if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_8);
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_5); __pyx_t_5 = 0;
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0;
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(((PyObject *)__pyx_t_7)); __pyx_t_7 = 0;
+  __pyx_t_2 = <span class='pyx_c_api'>__Pyx_PyObject_to_MemoryviewSlice_dsds_int</span>(__pyx_t_8);
+  if (unlikely(!__pyx_t_2.memview))<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_8); __pyx_t_8 = 0;
+  __pyx_v_votes = __pyx_t_2;
+  __pyx_t_2.memview = NULL;
+  __pyx_t_2.data = NULL;
+</pre><pre class='line' style='background-color: #FFFF2b' onclick='toggleDiv("line34")'> 34:     pixel_features = np.zeros(nfeatures, dtype=np.float32)</pre>
+<pre id='line34' class='code' style='background-color: #FFFF2b'>
+  /* "rf_classify_parallel.pyx":34
+ *     # Predict
+ *     votes = np.zeros((npix, nclass), dtype=np.int32)
+ *     pixel_features = np.zeros(nfeatures, dtype=np.float32)             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *     for pixi in range(npix):
+ */
+  __pyx_t_8 = <span class='pyx_c_api'>__Pyx_GetModuleGlobalName</span>(__pyx_n_s__np);<span class='error_goto'> if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_8);
+  __pyx_t_7 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_t_8, __pyx_n_s__zeros);<span class='error_goto'> if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_7);
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_8); __pyx_t_8 = 0;
+  __pyx_t_8 = <span class='py_c_api'>PyInt_FromLong</span>(__pyx_v_nfeatures);<span class='error_goto'> if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_8);
+  __pyx_t_6 = <span class='py_c_api'>PyTuple_New</span>(1);<span class='error_goto'> if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_6);
+  <span class='py_macro_api'>PyTuple_SET_ITEM</span>(__pyx_t_6, 0, __pyx_t_8);
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GIVEREF</span></span>(__pyx_t_8);
+  __pyx_t_8 = 0;
+  __pyx_t_8 = <span class='py_c_api'>PyDict_New</span>();<span class='error_goto'> if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(((PyObject *)__pyx_t_8));
+  __pyx_t_5 = <span class='pyx_c_api'>__Pyx_GetModuleGlobalName</span>(__pyx_n_s__np);<span class='error_goto'> if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_5);
+  __pyx_t_1 = __Pyx_<span class='py_c_api'>PyObject_GetAttrStr</span>(__pyx_t_5, __pyx_n_s__float32);<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_5); __pyx_t_5 = 0;
+  if (<span class='py_c_api'>PyDict_SetItem</span>(__pyx_t_8, ((PyObject *)__pyx_n_s__dtype), __pyx_t_1) <code><</code> 0)<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = <span class='py_c_api'>PyObject_Call</span>(__pyx_t_7, ((PyObject *)__pyx_t_6), ((PyObject *)__pyx_t_8));<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_7); __pyx_t_7 = 0;
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0;
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(((PyObject *)__pyx_t_8)); __pyx_t_8 = 0;
+  __pyx_t_9 = <span class='pyx_c_api'>__Pyx_PyObject_to_MemoryviewSlice_ds_float</span>(__pyx_t_1);
+  if (unlikely(!__pyx_t_9.memview))<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'>__Pyx_DECREF</span>(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_v_pixel_features = __pyx_t_9;
+  __pyx_t_9.memview = NULL;
+  __pyx_t_9.data = NULL;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line35")'> 35: </pre>
+<pre id='line35' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line36")'> 36:     for pixi in range(npix):</pre>
+<pre id='line36' class='code' style='background-color: #FFFFff'>
+  /* "rf_classify_parallel.pyx":36
+ *     pixel_features = np.zeros(nfeatures, dtype=np.float32)
+ * 
+ *     for pixi in range(npix):             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *         pixel_features[...] = features[:,pixi]
+ */
+  __pyx_t_4 = __pyx_v_npix;
+  for (__pyx_t_10 = 0; __pyx_t_10 <code><</code> __pyx_t_4; __pyx_t_10+=1) {
+    __pyx_v_pixi = __pyx_t_10;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line37")'> 37: </pre>
+<pre id='line37' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFaa' onclick='toggleDiv("line38")'> 38:         pixel_features[...] = features[:,pixi]</pre>
+<pre id='line38' class='code' style='background-color: #FFFFaa'>
+    /* "rf_classify_parallel.pyx":38
+ *     for pixi in range(npix):
+ * 
+ *         pixel_features[...] = features[:,pixi]             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *         with nogil:
+ */
+    __pyx_t_12 = -1;
+    __pyx_t_11.data = __pyx_v_features.data;
+    __pyx_t_11.memview = __pyx_v_features.memview;
+    __PYX_INC_MEMVIEW(&__pyx_t_11, 0);
+    __pyx_t_11.shape[0] = __pyx_v_features.shape[0];
+__pyx_t_11.strides[0] = __pyx_v_features.strides[0];
+    __pyx_t_11.suboffsets[0] = -1;
+
+{
+    Py_ssize_t __pyx_tmp_idx = __pyx_v_pixi;
+    Py_ssize_t __pyx_tmp_shape = __pyx_v_features.shape[1];
+    Py_ssize_t __pyx_tmp_stride = __pyx_v_features.strides[1];
+    if (0 && (__pyx_tmp_idx <code><</code> 0))
+        __pyx_tmp_idx += __pyx_tmp_shape;
+    if (0 && (__pyx_tmp_idx <code><</code> 0 || __pyx_tmp_idx >= __pyx_tmp_shape)) {
+        <span class='py_c_api'>PyErr_SetString</span>(PyExc_IndexError, "Index out of bounds (axis 1)");
+       <span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+    }
+        __pyx_t_11.data += __pyx_tmp_idx * __pyx_tmp_stride;
+}
+
+if (unlikely(__pyx_memoryview_copy_contents(__pyx_t_11, __pyx_v_pixel_features, 1, 1, 0) <code><</code> 0))<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+    __PYX_XDEC_MEMVIEW(&__pyx_t_11, 1);
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line39")'> 39: </pre>
+<pre id='line39' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line40")'> 40:         with nogil:</pre>
+<pre id='line40' class='code' style='background-color: #FFFFff'>
+    /* "rf_classify_parallel.pyx":40
+ *         pixel_features[...] = features[:,pixi]
+ * 
+ *         with nogil:             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *             for treei in prange(ntree, schedule='static', num_threads=16):
+ */
+    {
+        #ifdef WITH_THREAD
+        PyThreadState *_save;
+        Py_UNBLOCK_THREADS
+        #endif
+        /*try:*/ {
+
+        /* "rf_classify_parallel.pyx":40
+ *         pixel_features[...] = features[:,pixi]
+ * 
+ *         with nogil:             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *             for treei in prange(ntree, schedule='static', num_threads=16):
+ */
+        /*finally:*/ {
+          #ifdef WITH_THREAD
+          Py_BLOCK_THREADS
+          #endif
+        }
+    }
+  }
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line41")'> 41: </pre>
+<pre id='line41' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line42")'> 42:             for treei in prange(ntree, schedule='static', num_threads=16):</pre>
+<pre id='line42' class='code' style='background-color: #FFFFff'>
+          /* "rf_classify_parallel.pyx":42
+ *         with nogil:
+ * 
+ *             for treei in prange(ntree, schedule='static', num_threads=16):             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *                 k = 0
+ */
+          __pyx_t_12 = __pyx_v_ntree;
+          if (1 == 0) abort();
+          {
+              #if ((defined(__APPLE__) || defined(__OSX__)) && (defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))))
+                  #undef likely
+                  #undef unlikely
+                  #define likely(x)   (x)
+                  #define unlikely(x) (x)
+              #endif
+              __pyx_t_14 = (__pyx_t_12 - 0) / 1;
+              if (__pyx_t_14 > 0)
+              {
+                  #ifdef _OPENMP
+                  #pragma omp parallel
+                  #endif /* _OPENMP */
+                  {
+                      #ifdef _OPENMP
+                      #pragma omp for lastprivate(__pyx_v_k) lastprivate(__pyx_v_choice) lastprivate(__pyx_v_vote_class) lastprivate(__pyx_v_m) firstprivate(__pyx_v_treei) lastprivate(__pyx_v_treei) schedule(static) num_threads(16)
+                      #endif /* _OPENMP */
+                      for (__pyx_t_13 = 0; __pyx_t_13 <code><</code> __pyx_t_14; __pyx_t_13++){
+                          {
+                              __pyx_v_treei = 0 + 1 * __pyx_t_13;
+                              /* Initialize private variables to invalid values */
+                              __pyx_v_k = ((int)0xbad0bad0);
+                              __pyx_v_choice = ((int)0xbad0bad0);
+                              __pyx_v_vote_class = ((int)0xbad0bad0);
+                              __pyx_v_m = ((int)0xbad0bad0);
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line43")'> 43: </pre>
+<pre id='line43' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line44")'> 44:                 k = 0</pre>
+<pre id='line44' class='code' style='background-color: #FFFFff'>
+                              /* "rf_classify_parallel.pyx":44
+ *             for treei in prange(ntree, schedule='static', num_threads=16):
+ * 
+ *                 k = 0             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *                 while nodestatus[treei, k] != NODE_TERMINAL:
+ *                     m = bestvar[treei, k] - 1
+ */
+                              __pyx_v_k = 0;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line45")'> 45:                 while nodestatus[treei, k] != NODE_TERMINAL:</pre>
+<pre id='line45' class='code' style='background-color: #FFFFff'>
+                              /* "rf_classify_parallel.pyx":45
+ * 
+ *                 k = 0
+ *                 while nodestatus[treei, k] != NODE_TERMINAL:             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *                     m = bestvar[treei, k] - 1
+ *                     #Split by a numerical predictor
+ */
+                              while (1) {
+                                __pyx_t_15 = __pyx_v_treei;
+                                __pyx_t_16 = __pyx_v_k;
+                                __pyx_t_17 = (((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_nodestatus.data + __pyx_t_15 * __pyx_v_nodestatus.strides[0]) ) + __pyx_t_16 * __pyx_v_nodestatus.strides[1]) ))) != __pyx_v_NODE_TERMINAL) != 0);
+                                if (!__pyx_t_17) break;
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line46")'> 46:                     m = bestvar[treei, k] - 1</pre>
+<pre id='line46' class='code' style='background-color: #FFFFff'>
+                                /* "rf_classify_parallel.pyx":46
+ *                 k = 0
+ *                 while nodestatus[treei, k] != NODE_TERMINAL:
+ *                     m = bestvar[treei, k] - 1             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *                     #Split by a numerical predictor
+ *                     choice = 1 * (pixel_features[m] > xbestsplit[treei, k])
+ */
+                                __pyx_t_18 = __pyx_v_treei;
+                                __pyx_t_19 = __pyx_v_k;
+                                __pyx_v_m = ((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_bestvar.data + __pyx_t_18 * __pyx_v_bestvar.strides[0]) ) + __pyx_t_19 * __pyx_v_bestvar.strides[1]) ))) - 1);
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line47")'> 47:                     #Split by a numerical predictor</pre>
+<pre id='line47' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line48")'> 48:                     choice = 1 * (pixel_features[m] &gt; xbestsplit[treei, k])</pre>
+<pre id='line48' class='code' style='background-color: #FFFFff'>
+                                /* "rf_classify_parallel.pyx":48
+ *                     m = bestvar[treei, k] - 1
+ *                     #Split by a numerical predictor
+ *                     choice = 1 * (pixel_features[m] > xbestsplit[treei, k])             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *                     k = treemap[treei * 2, k * 2 + choice] - 1
+ * 
+ */
+                                __pyx_t_20 = __pyx_v_m;
+                                __pyx_t_21 = __pyx_v_treei;
+                                __pyx_t_22 = __pyx_v_k;
+                                __pyx_v_choice = (1 * ((*((float *) ( /* dim=0 */ (__pyx_v_pixel_features.data + __pyx_t_20 * __pyx_v_pixel_features.strides[0]) ))) > (*((float *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_xbestsplit.data + __pyx_t_21 * __pyx_v_xbestsplit.strides[0]) ) + __pyx_t_22 * __pyx_v_xbestsplit.strides[1]) )))));
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line49")'> 49:                     k = treemap[treei * 2, k * 2 + choice] - 1</pre>
+<pre id='line49' class='code' style='background-color: #FFFFff'>
+                                /* "rf_classify_parallel.pyx":49
+ *                     #Split by a numerical predictor
+ *                     choice = 1 * (pixel_features[m] > xbestsplit[treei, k])
+ *                     k = treemap[treei * 2, k * 2 + choice] - 1             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *                 #We found the terminal node: assign class label
+ */
+                                __pyx_t_23 = (__pyx_v_treei * 2);
+                                __pyx_t_24 = ((__pyx_v_k * 2) + __pyx_v_choice);
+                                __pyx_v_k = ((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_treemap.data + __pyx_t_23 * __pyx_v_treemap.strides[0]) ) + __pyx_t_24 * __pyx_v_treemap.strides[1]) ))) - 1);
+                              }
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line50")'> 50: </pre>
+<pre id='line50' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line51")'> 51:                 #We found the terminal node: assign class label</pre>
+<pre id='line51' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line52")'> 52:                 vote_class = nodeclass[treei, k] - 1</pre>
+<pre id='line52' class='code' style='background-color: #FFFFff'>
+                              /* "rf_classify_parallel.pyx":52
+ * 
+ *                 #We found the terminal node: assign class label
+ *                 vote_class = nodeclass[treei, k] - 1             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ *                 votes[pixi,vote_class] = votes[pixi,vote_class] + 1
+ * 
+ */
+                              __pyx_t_25 = __pyx_v_treei;
+                              __pyx_t_26 = __pyx_v_k;
+                              __pyx_v_vote_class = ((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_nodeclass.data + __pyx_t_25 * __pyx_v_nodeclass.strides[0]) ) + __pyx_t_26 * __pyx_v_nodeclass.strides[1]) ))) - 1);
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line53")'> 53:                 votes[pixi,vote_class] = votes[pixi,vote_class] + 1</pre>
+<pre id='line53' class='code' style='background-color: #FFFFff'>
+                              /* "rf_classify_parallel.pyx":53
+ *                 #We found the terminal node: assign class label
+ *                 vote_class = nodeclass[treei, k] - 1
+ *                 votes[pixi,vote_class] = votes[pixi,vote_class] + 1             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ * 
+ *     return votes
+ */
+                              __pyx_t_27 = __pyx_v_pixi;
+                              __pyx_t_28 = __pyx_v_vote_class;
+                              __pyx_t_29 = __pyx_v_pixi;
+                              __pyx_t_30 = __pyx_v_vote_class;
+                              *((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_votes.data + __pyx_t_29 * __pyx_v_votes.strides[0]) ) + __pyx_t_30 * __pyx_v_votes.strides[1]) )) = ((*((int *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_votes.data + __pyx_t_27 * __pyx_v_votes.strides[0]) ) + __pyx_t_28 * __pyx_v_votes.strides[1]) ))) + 1);
+                          }
+                      }
+                  }
+              }
+          }
+          #if ((defined(__APPLE__) || defined(__OSX__)) && (defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))))
+              #undef likely
+              #undef unlikely
+              #define likely(x)   __builtin_expect(!!(x), 1)
+              #define unlikely(x) __builtin_expect(!!(x), 0)
+          #endif
+        }
+</pre><pre class='line' style='background-color: #FFFFff' onclick='toggleDiv("line54")'> 54: </pre>
+<pre id='line54' class='code' style='background-color: #FFFFff'></pre><pre class='line' style='background-color: #FFFF2f' onclick='toggleDiv("line55")'> 55:     return votes</pre>
+<pre id='line55' class='code' style='background-color: #FFFF2f'>
+  /* "rf_classify_parallel.pyx":55
+ *                 votes[pixi,vote_class] = votes[pixi,vote_class] + 1
+ * 
+ *     return votes             # <code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code><code><</code>
+ */
+  <span class='pyx_macro_api'>__Pyx_XDECREF</span>(__pyx_r);
+  __pyx_t_1 = __pyx_memoryview_fromslice(__pyx_v_votes, 2, (PyObject *(*)(char *)) __pyx_memview_get_int, (int (*)(char *, PyObject *)) __pyx_memview_set_int, 0);;<span class='error_goto'> if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;}</span>
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_GOTREF</span></span>(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  __pyx_r = Py_None; <span class='pyx_macro_api'>__Pyx_INCREF</span>(Py_None);
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  <span class='pyx_macro_api'>__Pyx_XDECREF</span>(__pyx_t_1);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_2, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_3, 1);
+  <span class='pyx_macro_api'>__Pyx_XDECREF</span>(__pyx_t_5);
+  <span class='pyx_macro_api'>__Pyx_XDECREF</span>(__pyx_t_6);
+  <span class='pyx_macro_api'>__Pyx_XDECREF</span>(__pyx_t_7);
+  <span class='pyx_macro_api'>__Pyx_XDECREF</span>(__pyx_t_8);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_9, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_t_11, 1);
+  <span class='pyx_c_api'>__Pyx_AddTraceback</span>("rf_classify_parallel.rf_classify", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __PYX_XDEC_MEMVIEW(&__pyx_v_treemap, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_nodestatus, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_bestvar, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_nodeclass, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_votes, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_xbestsplit, 1);
+  __PYX_XDEC_MEMVIEW(&__pyx_v_pixel_features, 1);
+  <span class='pyx_macro_api'><span class='refnanny'>__Pyx_XGIVEREF</span></span>(__pyx_r);
+  <span class='pyx_c_api'><span class='refnanny'>__Pyx_RefNannyFinishContext</span></span>();
+  return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_20rf_classify_parallel_1rf_classify(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static PyObject *__pyx_pw_20rf_classify_parallel_1rf_classify(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+  PyObject *__pyx_v_model = 0;
+  __Pyx_memviewslice __pyx_v_features = { 0, 0, { 0 }, { 0 }, { 0 } };
+  PyObject *__pyx_r = 0;
+  <span class='refnanny'>__Pyx_RefNannyDeclarations</span>
+  <span class='pyx_c_api'><span class='refnanny'>__Pyx_RefNannySetupContext</span></span>("rf_classify (wrapper)", 0);
+  {
+    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__model,&__pyx_n_s__features,0};
+    PyObject* values[2] = {0,0};
+    if (unlikely(__pyx_kwds)) {
+      Py_ssize_t kw_args;
+      const Py_ssize_t pos_args = <span class='py_macro_api'>PyTuple_GET_SIZE</span>(__pyx_args);
+      switch (pos_args) {
+        case  2: values[1] = <span class='py_macro_api'>PyTuple_GET_ITEM</span>(__pyx_args, 1);
+        case  1: values[0] = <span class='py_macro_api'>PyTuple_GET_ITEM</span>(__pyx_args, 0);
+        case  0: break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+      kw_args = <span class='py_c_api'>PyDict_Size</span>(__pyx_kwds);
+      switch (pos_args) {
+        case  0:
+        if (likely((values[0] = <span class='py_c_api'>PyDict_GetItem</span>(__pyx_kwds, __pyx_n_s__model)) != 0)) kw_args--;
+        else goto __pyx_L5_argtuple_error;
+        case  1:
+        if (likely((values[1] = <span class='py_c_api'>PyDict_GetItem</span>(__pyx_kwds, __pyx_n_s__features)) != 0)) kw_args--;
+        else {
+          <span class='pyx_c_api'>__Pyx_RaiseArgtupleInvalid</span>("rf_classify", 1, 2, 2, 1);<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L3_error;}</span>
+        }
+      }
+      if (unlikely(kw_args > 0)) {
+        if (unlikely(<span class='pyx_c_api'>__Pyx_ParseOptionalKeywords</span>(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "rf_classify") <code><</code> 0))<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L3_error;}</span>
+      }
+    } else if (<span class='py_macro_api'>PyTuple_GET_SIZE</span>(__pyx_args) != 2) {
+      goto __pyx_L5_argtuple_error;
+    } else {
+      values[0] = <span class='py_macro_api'>PyTuple_GET_ITEM</span>(__pyx_args, 0);
+      values[1] = <span class='py_macro_api'>PyTuple_GET_ITEM</span>(__pyx_args, 1);
+    }
+    __pyx_v_model = values[0];
+    __pyx_v_features = <span class='pyx_c_api'>__Pyx_PyObject_to_MemoryviewSlice_dsds_float</span>(values[1]);<span class='error_goto'> if (unlikely(!__pyx_v_features.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L3_error;}</span>
+  }
+  goto __pyx_L4_argument_unpacking_done;
+  __pyx_L5_argtuple_error:;
+  <span class='pyx_c_api'>__Pyx_RaiseArgtupleInvalid</span>("rf_classify", 1, 2, 2, <span class='py_macro_api'>PyTuple_GET_SIZE</span>(__pyx_args));<span class='error_goto'> {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L3_error;}</span>
+  __pyx_L3_error:;
+  <span class='pyx_c_api'>__Pyx_AddTraceback</span>("rf_classify_parallel.rf_classify", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  <span class='pyx_c_api'><span class='refnanny'>__Pyx_RefNannyFinishContext</span></span>();
+  return NULL;
+  __pyx_L4_argument_unpacking_done:;
+  __pyx_r = __pyx_pf_20rf_classify_parallel_rf_classify(__pyx_self, __pyx_v_model, __pyx_v_features);
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  <span class='pyx_c_api'><span class='refnanny'>__Pyx_RefNannyFinishContext</span></span>();
+  return __pyx_r;
+}
+</pre></body></html>
diff --git a/ClassifyMembranes/train_gpu_randomforest_2class.py b/ClassifyMembranes/train_gpu_randomforest_2class.py
new file mode 100644
index 0000000..7340990
--- /dev/null
+++ b/ClassifyMembranes/train_gpu_randomforest_2class.py
@@ -0,0 +1,664 @@
+############################################################
+# GPU Implementation of Random Forest Classifier - Training
+# v0.1
+# Seymour Knowles-Barley
+############################################################
+# Based on c code from:
+# http://code.google.com/p/randomforest-matlab/
+# License: GPLv2
+############################################################
+
+import numpy as np
+import sys
+import h5py
+import glob
+import mahotas
+
+import pycuda.autoinit
+import pycuda.driver as cu
+import pycuda.compiler as nvcc
+import pycuda.gpuarray as gpuarray
+
+gpu_randomforest_train_source = """
+#include "curand_kernel.h"
+
+#define NODE_TERMINAL -1
+#define NODE_TOSPLIT  -2
+#define NODE_INTERIOR -3
+
+__device__ void movedata() {
+}
+
+__device__ void sampledata(const int nclass, const int* nsamples, const int* samplefrom,
+	const int maxnsamples, int* bagstart, curandState_t *randstate)
+{
+	//Select random samples
+	int iclass, isamp;
+	for (iclass=0; iclass < nclass; ++iclass) {
+		for (isamp=0; isamp < nsamples[iclass]; ++isamp) {
+			bagstart[isamp + iclass*maxnsamples] = curand(randstate) % samplefrom[iclass];
+		}
+	}
+}
+
+__device__ void sortbagbyx(
+	const float *baggedxstart, int totsamples, int mdim, int featurei, int *bagstart, int ndstart, int ndend, int *tempbagstart)
+{
+	//Sort elements of bagstart (from ndstart to ndend) according to x values
+	//Write results into bagstart
+	int length = ndend-ndstart+1;
+	if (length == 1)
+	{
+		return;
+	}
+	int xstart = featurei * totsamples;
+	int *inbag = bagstart;
+	int *outbag = tempbagstart;
+
+	//For-loop merge sort
+	int i = 1;
+	int start1, start2, end1, end2, p1, p2, output;
+	while (i < length)
+	{
+
+		for (start1 = ndstart; start1 <= ndend; start1 += i*2)
+		{
+			end1 = start1 + i - 1;
+			start2 = start1 + i;
+			end2 = start2 + i - 1;
+			p1 = start1; p2 = start2;
+			output = start1;
+			while (p1 <= end1 && p1 <= ndend && p2 <= end2 && p2 <= ndend && output <= ndend)
+			{
+				if (baggedxstart[xstart + inbag[p1]] < baggedxstart[xstart + inbag[p2]])
+				{
+					outbag[output] = inbag[p1];
+					++p1;
+				}
+				else
+				{
+					outbag[output] = inbag[p2];
+					++p2;
+				}
+				++output;
+			}
+			while (p1 <= end1 && p1 <= ndend)
+			{
+				outbag[output] = inbag[p1];
+				++p1;
+				++output;
+			}
+			while (p2 <= end2 && p2 <= ndend)
+			{
+				outbag[output] = inbag[p2];
+				++p2;
+				++output;
+			}
+		}
+
+		//swap for next run
+		if (inbag == bagstart)
+		{
+			inbag = tempbagstart;
+			outbag = bagstart;
+		}
+		else
+		{
+			inbag = bagstart;
+			outbag = tempbagstart;
+		}
+
+		//Loop again with larger chunks
+		i *= 2;
+
+	}
+
+	//Copy output to bagstart (if necessary)
+	if (inbag == tempbagstart)
+	{
+		for (p1 = ndstart; p1 <= ndend; ++p1)
+		{
+			bagstart[p1] = tempbagstart[p1];
+		}
+	}
+
+}
+
+__device__ void findBestSplit(
+	const float *baggedxstart, const int *baggedclassstart, int mdim, int nclass, int *bagstart,
+	int totsamples, int k, int ndstart, int ndend, int *ndendl,
+	int *msplit, float *gini_score, float *best_split, int *best_split_index, bool *isTerminal,
+	int mtry, int idx, int maxTreeSize, int *classpop, float* classweights,
+	curandState_t *randstate,
+	int *wlstart, int *wrstart, int *dimtempstart, int *tempbagstart)
+{
+	//Compute initial values of numerator and denominator of Gini
+	float gini_n = 0.0;
+	float gini_d = 0.0;
+	float gini_rightn, gini_rightd, gini_leftn, gini_leftd;
+	int ctreestart = k * nclass + nclass * idx * maxTreeSize;
+	int i;
+	for (i = 0; i < nclass; ++i)
+	{
+		gini_n += classpop[i + ctreestart] * classpop[i + ctreestart];
+		gini_d += classpop[i + ctreestart];
+	}
+	float gini_crit0 = gini_n / gini_d;
+
+	//start main loop through variables to find best split
+	float gini_critmax = -1.0e25;
+	float crit;
+	int trynum, featurei;
+	int maxfeature = mdim;
+
+	for (i = 0; i < mdim; ++i)
+	{
+		dimtempstart[i] = i;
+	}
+
+	*msplit = -1;
+
+	//for (trynum = 0; trynum < 1; ++trynum)
+	for (trynum = 0; trynum < mtry && trynum < mdim; ++trynum)
+	{
+		//Choose a random feature
+		i = curand(randstate) % maxfeature;
+		featurei = dimtempstart[i];
+		dimtempstart[i] = dimtempstart[maxfeature-1];
+		dimtempstart[maxfeature-1] = featurei;
+		--maxfeature;
+
+		//Sort according to this feature
+		sortbagbyx(baggedxstart, totsamples, mdim, featurei, bagstart, ndstart, ndend, tempbagstart);
+
+		//Split on numerical predictor featurei
+		gini_rightn = gini_n;
+		gini_rightd = gini_d;
+		gini_leftn = 0;
+		gini_leftd = 0;
+		for (i = 0; i < nclass; ++i)
+		{
+			wrstart[i] = classpop[i + ctreestart];
+			wlstart[i] = 0;
+		}
+		int splitpoint;
+		int splitxi;
+		float split_weight, thisx, nextx;
+		int split_class;
+		int ntie = 1;
+		//Loop through all possible split points
+		for (splitpoint = ndstart; splitpoint <= ndend-1; ++splitpoint)
+		{
+			//Get split details
+			splitxi = bagstart[splitpoint];
+			//Determine class based on index and nsamples vector
+			split_class = baggedclassstart[splitxi]-1;
+			split_weight = classweights[split_class];
+			//Update neumerator and demominator
+			gini_leftn += split_weight * (2 * wlstart[split_class] + split_weight);
+			gini_rightn += split_weight * (-2 * wrstart[split_class] + split_weight);
+			gini_leftd += split_weight;
+			gini_rightd -= split_weight;
+			wlstart[split_class] += split_weight;
+			wrstart[split_class] -= split_weight;
+
+			//Check if the next value is the same (no point splitting)
+			thisx = baggedxstart[splitxi + totsamples * featurei];
+			nextx = baggedxstart[bagstart[splitpoint+1] + totsamples * featurei];
+			if (thisx != nextx)
+			{
+				//Check if either node is empty (or very small to allow for float errors)
+				if (gini_rightd > 1.0e-5 && gini_leftd > 1.0e-5)
+				{
+					//Check the split
+					crit = (gini_leftn / gini_leftd) + (gini_rightn / gini_rightd);
+					if (crit > gini_critmax)
+					{
+						*best_split = (thisx + nextx) / 2;
+						*best_split_index = splitpoint;
+						gini_critmax = crit;
+						*msplit = featurei;
+						*ndendl = splitpoint;
+						ntie = 1;
+					}
+					else if (crit == gini_critmax)
+					{
+						++ntie;
+						//Break ties at random
+						if ((curand(randstate) % ntie) == 0)
+						{
+							*best_split = (thisx + nextx) / 2;
+							*best_split_index = splitpoint;
+							gini_critmax = crit;
+							*msplit = featurei;
+							*ndendl = splitpoint;
+						}
+					}
+				}
+			}
+		} // end splitpoint for
+	} // end trynum for
+
+	if (gini_critmax < -1.0e10 || *msplit == -1)
+	{
+		//We could not find a suitable split - mark as a terminal node
+		*isTerminal = true;
+	}
+	else if (*msplit != featurei)
+	{
+		//Resort for msplit (if necessary)
+		sortbagbyx(baggedxstart, totsamples, mdim, *msplit, bagstart, ndstart, ndend, tempbagstart);
+	}
+	*gini_score = gini_critmax - gini_crit0;
+
+}
+
+extern "C" __global__ void trainKernel(
+	const float *x, int n, int mdim, int nclass,
+	const int *classes, const int *classindex,
+	const int *nsamples, const int *samplefrom,
+	int maxnsamples, 
+	unsigned long long seed, unsigned long long sequencestart,
+	int ntree, int maxTreeSize, int mtry, int nodeStopSize,
+	int *treemap, int *nodestatus, float *xbestsplit, 
+	int *bestvar, int *nodeclass, int *ndbigtree,
+	int *nodestart, int *nodepop,
+	int *classpop, float *classweights,
+	int *weight_left, int *weight_right,
+	int *dimtemp, int *bagspace, int *tempbag, float *baggedx, int *baggedclass)
+{
+// Optional arguments for debug (place after xbestsplit): int *nbestsplit, float *bestgini,
+	int idx = threadIdx.x + blockDim.x * blockIdx.x;
+
+	//Make sure we don't overrun
+	if (idx < ntree) {
+		//Init random number generators (one for each thread)
+		curandState_t state;
+		curand_init(seed, sequencestart + idx, 0, &state);
+
+		int i,j,k,cioffset,bioffset;
+
+		int totsamples = 0;
+		for (i = 0; i < nclass; ++i){
+			totsamples += nsamples[i];
+		}
+
+		//Choose random samples for all classes
+		int *bagstart = bagspace + idx * nclass * maxnsamples;
+		int *tempbagstart = tempbag + idx * nclass * maxnsamples;
+		float *baggedxstart = baggedx + idx * mdim * totsamples;
+		int *baggedclassstart = baggedclass + idx * totsamples;
+		//TODO: offset weightleft, weightright and dimtemp !
+		sampledata(nclass, nsamples, samplefrom, maxnsamples, bagstart, &state);
+
+		//Remove gaps and index into x (instead of into class)
+		k = 0;
+		cioffset = 0;
+		bioffset = 0;
+		for (i = 0; i < nclass; ++i){
+			for (j = 0; j < nsamples[i]; ++j) {
+				//Move memory into local block?
+				int xindex = classindex[bagstart[j + i * maxnsamples] + cioffset];
+				int dimindex;
+				for (dimindex = 0; dimindex < mdim; ++dimindex){
+					baggedxstart[j + bioffset + totsamples * dimindex] = x[xindex + n * dimindex];
+				}
+				baggedclassstart[j + bioffset] = classes[xindex];
+				bagstart[k] = j + bioffset;
+				++k;
+			}
+			cioffset += samplefrom[i];
+			bioffset += nsamples[i];
+			classpop[i + idx * nclass * maxTreeSize] = nsamples[i];
+		}
+
+		//Wipe other values
+		for (;k < nclass * maxnsamples; ++k) {
+			bagstart[k] = -1;
+		}
+
+		int ndstart, ndend, ndendl;
+		int msplit, best_split_index;
+		float best_split, gini_score;
+
+		//Repeat findbestsplit until the tree is complete
+		int ncur = 0;
+		int treeoffset1 = idx * maxTreeSize;
+		int treeOffset2 = idx * 2 * maxTreeSize;
+		nodestart[treeoffset1] = 0;
+		nodepop[treeoffset1] = totsamples;
+		nodestatus[treeoffset1] = NODE_TOSPLIT;
+
+		for (k = 0; k < maxTreeSize-2; ++k) {
+			//Check for end of tree
+			if (k > ncur || ncur >= maxTreeSize - 2) break;
+			//Skip nodes we don't need to split
+			if (nodestatus[treeoffset1+k] != NODE_TOSPLIT) continue;
+
+			/* initialize for next call to findbestsplit */
+			ndstart = nodestart[treeoffset1 + k];
+			ndend = ndstart + nodepop[treeoffset1 + k] - 1;
+			bool isTerminal = false;
+			gini_score = 0.0;
+			best_split_index = -1;
+
+			findBestSplit(baggedxstart, baggedclassstart, mdim, nclass, bagstart, totsamples, k, ndstart, ndend, &ndendl,
+				&msplit, &gini_score, &best_split, &best_split_index, &isTerminal, mtry, idx, maxTreeSize, classpop, classweights, 
+				&state, weight_left + nclass * idx, weight_right + nclass * idx, dimtemp + mdim * idx, tempbagstart);
+
+			if (isTerminal) {
+				/* Node is terminal: Mark it as such and move on to the next. */
+				nodestatus[k] = NODE_TERMINAL;
+				//bestvar[treeoffset1 + k] = 0;
+				//xbestsplit[treeoffset1 + k] = 0;
+				continue;
+			}
+
+			// this is a split node - prepare for next round
+			bestvar[treeoffset1 + k] = msplit + 1;
+			//bestgini[treeoffset1 + k] = gini_score;
+			xbestsplit[treeoffset1 + k] = best_split;
+			//nbestsplit[treeoffset1 + k] = best_split_index;
+			nodestatus[treeoffset1 + k] = NODE_INTERIOR;
+			//varUsed[msplit - 1] = 1;
+			//tgini[msplit - 1] += decsplit;
+
+			int leftk = ncur + 1;
+			int rightk = ncur + 2;
+			nodepop[treeoffset1 + leftk] = ndendl - ndstart + 1;
+			nodepop[treeoffset1 + rightk] = ndend - ndendl;
+			nodestart[treeoffset1 + leftk] = ndstart;
+			nodestart[treeoffset1 + rightk] = ndendl + 1;
+
+			// Check for terminal node conditions
+			nodestatus[treeoffset1 + leftk] = NODE_TOSPLIT;
+			if (nodepop[treeoffset1 + leftk] <= nodeStopSize) {
+				nodestatus[treeoffset1 + leftk] = NODE_TERMINAL;
+			}
+
+			nodestatus[treeoffset1 + rightk] = NODE_TOSPLIT;
+			if (nodepop[treeoffset1 + rightk] <= nodeStopSize) {
+				nodestatus[treeoffset1 + rightk] = NODE_TERMINAL;
+			}
+
+			//Calculate class populations
+			int nodeclass = 0;
+			int ctreestart_left = leftk * nclass + idx * nclass * maxTreeSize;
+			int ctreestart_right = rightk * nclass + idx * nclass * maxTreeSize;
+			for (i = ndstart; i <= ndendl; ++i) {
+				nodeclass = baggedclassstart[bagstart[i]]-1;
+				classpop[nodeclass + ctreestart_left] += classweights[nodeclass];
+			}
+			for (i = ndendl+1; i <= ndend; ++i) {
+				nodeclass = baggedclassstart[bagstart[i]]-1;
+				classpop[nodeclass + ctreestart_right] += classweights[nodeclass];
+			}
+
+			for(i = 0; i < nclass; ++i)
+			{
+				if (classpop[i + ctreestart_left] == nodepop[treeoffset1 + leftk])
+				{
+					nodestatus[treeoffset1 + leftk] = NODE_TERMINAL;
+				}
+				if (classpop[i + ctreestart_right] == nodepop[treeoffset1 + rightk])
+				{
+					nodestatus[treeoffset1 + rightk] = NODE_TERMINAL;
+				}
+			}
+
+			//Update treemap offset (indexed from 1 rather than 0)
+			treemap[treeOffset2 + k*2] = ncur + 2;
+			treemap[treeOffset2 + 1 + k*2] = ncur + 3;
+			ncur += 2;
+
+		}
+
+		//Tidy up
+		//TODO: Check results - should not be necessary to go up to maxTreeSize
+		ndbigtree[idx] = ncur+1;
+		//ndbigtree[idx] = maxTreeSize;
+		for(k = maxTreeSize-1; k >= 0; --k)
+		{
+			//if (nodestatus[treeoffset1 + k] == 0)
+			//	--ndbigtree[idx];
+			if (nodestatus[treeoffset1 + k] == NODE_TOSPLIT)
+				nodestatus[treeoffset1 + k] = NODE_TERMINAL;
+		}
+
+		//Calculate prediction for terminal nodes
+		for (k = 0; k < maxTreeSize; ++k)
+		{
+			treeoffset1 = idx * maxTreeSize;
+			if (nodestatus[treeoffset1 + k] == NODE_TERMINAL)
+			{
+				int toppop = 0;
+				int ntie = 1;
+				for (i = 0; i < nclass; ++i)
+				{
+					int ctreeoffset = k * nclass + idx * nclass * maxTreeSize;
+					if (classpop[i + ctreeoffset] > toppop)
+					{
+						nodeclass[treeoffset1 + k] = i+1;
+						toppop = classpop[i + ctreeoffset];
+					}
+					//Break ties at random
+					if (classpop[i + ctreeoffset] == toppop)
+					{
+						++ntie;
+						if ((curand(&state) % ntie) == 0)
+						{
+							nodeclass[treeoffset1 + k] = i+1;
+							toppop = classpop[i + ctreeoffset];
+						}
+					}
+				}
+			}
+		}
+
+		//ndbigtree[idx] = idx;
+
+	}
+
+}
+"""
+
+# input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\membraneDetectionECSx4ds2\\'
+# input_image_suffix = '_train.png'
+# input_features_suffix = '_rhoanafeatures.hdf5'
+# output_path = 'D:\\dev\\Rhoana\\classifierTraining\\membraneDetectionECSx4ds2\\rhoana_forest.hdf5'
+
+#input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\'
+#input_image_suffix = '_labeled_update.tif'
+#input_features_suffix = '.hdf5'
+#output_path = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\rhoana_forest_3class.hdf5'
+
+input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2_auto1\\'
+input_image_suffix = '_labeled_update.tif'
+input_features_suffix = '_autoencoder_features.h5'
+output_path = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2_auto1\\auto1_forest_3class.hdf5'
+
+# input_image_folder = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\'
+# input_image_suffix = 'Cyto.png'
+# input_features_suffix = 'Cyto_rhoana_features.h5'
+# output_path = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\rhoana_forest_2class.h5'
+
+# input_image_folder = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\'
+# input_image_suffix = 'Cytods2.png'
+# input_features_suffix = 'Cytods2_rhoana_features.h5'
+# output_path = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\rhoana_forest_2classds2.h5'
+
+# input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\'
+# input_image_suffix = '_training.png'
+# input_features_suffix = '_autoencoder_nosig_features.h5'
+# output_path = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\autoencoder_nosig_forest_2class.h5'
+# #input_features_suffix = '_autoencoder_features.h5'
+# #output_path = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\autoencoder_forest_2class.h5'
+# #input_features_suffix = '_rhoana_features.h5'
+# #output_path = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\rhoana_forest_2class.h5'
+
+
+# Prep the gpu function
+gpu_train = nvcc.SourceModule(gpu_randomforest_train_source, no_extern_c=True).get_function('trainKernel')
+
+# Load training data
+files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) )
+
+# 2 Class
+#class_colors = [[255,0,0], [0,255,0]]
+#class_colors = [[255,85,255], [255,255,0]]
+
+# 3 Class
+#class_colors = [[255,0,0], [0,255,0], [0,0,255]]
+#class_colors = [[255,85,255], [255,255,0], [0,255,255]]
+
+class_colors = [0, 1]
+#class_colors = [0, 1, 2]
+
+nclass = len(class_colors)
+
+training_x = np.zeros((0,0), dtype=np.float32)
+training_y = np.zeros((0,1), dtype=np.int32)
+
+print 'Found {0} training images.'.format(len(files))
+
+# Loop through all images
+for file in files:
+	training_image = mahotas.imread(file)
+
+	for classi in range(nclass):
+
+		this_color = class_colors[classi]
+
+		# Find pixels for this class
+		# 2 class
+		class_indices = np.nonzero(np.logical_and(
+			training_image[:,:,this_color] > training_image[:,:,(this_color + 1) % 3],
+			training_image[:,:,this_color] > training_image[:,:,(this_color + 2) % 3]))
+
+		# 3 class
+		# class_indices = np.nonzero(np.logical_and(
+		# 	training_image[:,:,0] == this_color[0],
+		# 	training_image[:,:,1] == this_color[1],
+		# 	training_image[:,:,2] == this_color[2]))
+
+		# Add features to x and classes to y
+
+		training_y = np.concatenate((training_y, np.ones((len(class_indices[0]), 1), dtype=np.int32) * (classi + 1)))
+
+		# Load the features
+		f = h5py.File(file.replace(input_image_suffix, input_features_suffix), 'r')
+
+		nfeatures = len(f.keys())
+		train_features = np.zeros((nfeatures, len(class_indices[0])), dtype=np.float32)
+
+		for i,k in enumerate(f.keys()):
+			feature = f[k][...]
+			train_features[i,:] = feature[class_indices[0], class_indices[1]]
+
+		f.close()
+		
+		if training_x.size > 0:
+			training_x = np.concatenate((training_x, train_features), axis=1)
+		else:
+			training_x = train_features
+
+for classi in range(nclass):
+	print 'Class {0}: {1} training pixels.'.format(classi, np.sum(training_y == classi + 1))
+
+# Train on GPU
+ntree = np.int32(512)
+mtry = np.int32(np.floor(np.sqrt(training_x.shape[0])))
+#nsamples = np.ones((1,nclass), dtype=np.int32) * (training_x.shape[1] / nclass)
+nsamples = np.ones((1,nclass), dtype=np.int32) * 1000
+classweights = np.ones((1,nclass), dtype=np.float32)
+
+# Sanity check
+assert(training_x.shape[1] == training_y.shape[0])
+
+# Random number seeds
+seed = np.int64(42)
+sequencestart = np.int64(43)
+
+samplefrom = np.zeros((nclass), dtype=np.int32)
+maxTreeSize = np.int32(2 * np.sum(nsamples) + 1)
+nodeStopSize = np.int32(1)
+
+for classi in range(nclass):
+	samplefrom[classi] = np.sum(training_y == (classi + 1))
+
+maxnsamples = np.max(nsamples)
+classindex = -1 * np.ones((np.max(samplefrom) * nclass), dtype=np.int32)
+
+cioffset = 0
+for classi in range(nclass):
+	classindex[cioffset:cioffset + samplefrom[classi]] = np.nonzero(training_y == (classi + 1))[0]
+	cioffset = cioffset + samplefrom[classi]
+
+bagmem = -1 * np.ones((ntree, maxnsamples * nclass), dtype=np.int32)
+d_bagspace = gpuarray.to_gpu(bagmem)
+d_tempbag = gpuarray.to_gpu(bagmem)
+bagmem = None
+
+d_treemap = gpuarray.zeros((long(ntree * 2), long(maxTreeSize)), np.int32)
+d_nodestatus = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_xbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32)
+#d_nbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+#d_bestgini = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32)
+d_bestvar = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_nodeclass = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_ndbigtree = gpuarray.zeros((long(ntree), 1), np.int32)
+d_nodestart = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_nodepop = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_classpop = gpuarray.zeros((long(ntree), long(maxTreeSize*nclass)), np.int32)
+d_classweights = gpuarray.to_gpu(classweights)
+d_weight_left = gpuarray.zeros((long(ntree), long(nclass)), np.int32)
+d_weight_right = gpuarray.zeros((long(ntree), long(nclass)), np.int32)
+d_dimtemp = gpuarray.zeros((long(ntree), long(training_x.shape[0])), np.int32)
+
+d_baggedx = gpuarray.zeros((long(np.sum(nsamples)*training_x.shape[0]), long(ntree)), np.float32)
+d_baggedclass = gpuarray.zeros((long(ntree), long(np.sum(nsamples))), np.int32)
+
+d_training_x = gpuarray.to_gpu(training_x)
+d_training_y = gpuarray.to_gpu(training_y)
+d_classindex = gpuarray.to_gpu(classindex)
+d_nsamples = gpuarray.to_gpu(nsamples)
+d_samplefrom = gpuarray.to_gpu(samplefrom)
+
+threadsPerBlock = 32
+block = (32, 1, 1)
+grid = (int(ntree / block[0] + 1), 1)
+
+gpu_train(d_training_x, np.int32(training_x.shape[1]), np.int32(training_x.shape[0]), np.int32(nclass),
+	d_training_y, d_classindex, d_nsamples, d_samplefrom,
+	np.int32(maxnsamples), seed, sequencestart, np.int32(ntree), np.int32(maxTreeSize), np.int32(mtry), np.int32(nodeStopSize),
+	d_treemap, d_nodestatus, d_xbestsplit,
+	d_bestvar, d_nodeclass, d_ndbigtree,
+	d_nodestart, d_nodepop,
+	d_classpop, d_classweights,
+	d_weight_left, d_weight_right,
+	d_dimtemp, d_bagspace, d_tempbag, d_baggedx, d_baggedclass,
+	block=block, grid=grid)
+
+treemap = d_treemap.get()
+nodestatus = d_nodestatus.get()
+xbestsplit = d_xbestsplit.get()
+bestvar = d_bestvar.get()
+nodeclass = d_nodeclass.get()
+ndbigtree = d_ndbigtree.get()
+
+# Save results
+out_hdf5 = h5py.File(output_path, 'w')
+out_hdf5['/forest/treemap'] = treemap
+out_hdf5['/forest/nodestatus'] = nodestatus
+out_hdf5['/forest/xbestsplit'] = xbestsplit
+out_hdf5['/forest/bestvar'] = bestvar
+out_hdf5['/forest/nodeclass'] = nodeclass
+out_hdf5['/forest/ndbigtree'] = ndbigtree
+
+out_hdf5['/forest/nrnodes'] = maxTreeSize
+out_hdf5['/forest/ntree'] = ntree
+out_hdf5['/forest/nclass'] = nclass
+out_hdf5['/forest/classweights'] = classweights
+out_hdf5['/forest/mtry'] = mtry
+
+out_hdf5.close()
diff --git a/ClassifyMembranes/train_gpu_randomforest_3class.py b/ClassifyMembranes/train_gpu_randomforest_3class.py
new file mode 100644
index 0000000..55a4480
--- /dev/null
+++ b/ClassifyMembranes/train_gpu_randomforest_3class.py
@@ -0,0 +1,666 @@
+############################################################
+# GPU Implementation of Random Forest Classifier - Training
+# v0.1
+# Seymour Knowles-Barley
+############################################################
+# Based on c code from:
+# http://code.google.com/p/randomforest-matlab/
+# License: GPLv2
+############################################################
+
+import numpy as np
+import sys
+import h5py
+import glob
+import mahotas
+
+import pycuda.autoinit
+import pycuda.driver as cu
+import pycuda.compiler as nvcc
+import pycuda.gpuarray as gpuarray
+
+gpu_randomforest_train_source = """
+#include "curand_kernel.h"
+
+#define NODE_TERMINAL -1
+#define NODE_TOSPLIT  -2
+#define NODE_INTERIOR -3
+
+__device__ void movedata() {
+}
+
+__device__ void sampledata(const int nclass, const int* nsamples, const int* samplefrom,
+	const int maxnsamples, int* bagstart, curandState_t *randstate)
+{
+	//Select random samples
+	int iclass, isamp;
+	for (iclass=0; iclass < nclass; ++iclass) {
+		for (isamp=0; isamp < nsamples[iclass]; ++isamp) {
+			bagstart[isamp + iclass*maxnsamples] = curand(randstate) % samplefrom[iclass];
+		}
+	}
+}
+
+__device__ void sortbagbyx(
+	const float *baggedxstart, int totsamples, int mdim, int featurei, int *bagstart, int ndstart, int ndend, int *tempbagstart)
+{
+	//Sort elements of bagstart (from ndstart to ndend) according to x values
+	//Write results into bagstart
+	int length = ndend-ndstart+1;
+	if (length == 1)
+	{
+		return;
+	}
+	int xstart = featurei * totsamples;
+	int *inbag = bagstart;
+	int *outbag = tempbagstart;
+
+	//For-loop merge sort
+	int i = 1;
+	int start1, start2, end1, end2, p1, p2, output;
+	while (i < length)
+	{
+
+		for (start1 = ndstart; start1 <= ndend; start1 += i*2)
+		{
+			end1 = start1 + i - 1;
+			start2 = start1 + i;
+			end2 = start2 + i - 1;
+			p1 = start1; p2 = start2;
+			output = start1;
+			while (p1 <= end1 && p1 <= ndend && p2 <= end2 && p2 <= ndend && output <= ndend)
+			{
+				if (baggedxstart[xstart + inbag[p1]] < baggedxstart[xstart + inbag[p2]])
+				{
+					outbag[output] = inbag[p1];
+					++p1;
+				}
+				else
+				{
+					outbag[output] = inbag[p2];
+					++p2;
+				}
+				++output;
+			}
+			while (p1 <= end1 && p1 <= ndend)
+			{
+				outbag[output] = inbag[p1];
+				++p1;
+				++output;
+			}
+			while (p2 <= end2 && p2 <= ndend)
+			{
+				outbag[output] = inbag[p2];
+				++p2;
+				++output;
+			}
+		}
+
+		//swap for next run
+		if (inbag == bagstart)
+		{
+			inbag = tempbagstart;
+			outbag = bagstart;
+		}
+		else
+		{
+			inbag = bagstart;
+			outbag = tempbagstart;
+		}
+
+		//Loop again with larger chunks
+		i *= 2;
+
+	}
+
+	//Copy output to bagstart (if necessary)
+	if (inbag == tempbagstart)
+	{
+		for (p1 = ndstart; p1 <= ndend; ++p1)
+		{
+			bagstart[p1] = tempbagstart[p1];
+		}
+	}
+
+}
+
+__device__ void findBestSplit(
+	const float *baggedxstart, const int *baggedclassstart, int mdim, int nclass, int *bagstart,
+	int totsamples, int k, int ndstart, int ndend, int *ndendl,
+	int *msplit, float *gini_score, float *best_split, int *best_split_index, bool *isTerminal,
+	int mtry, int idx, int maxTreeSize, int *classpop, float* classweights,
+	curandState_t *randstate,
+	int *wlstart, int *wrstart, int *dimtempstart, int *tempbagstart)
+{
+	//Compute initial values of numerator and denominator of Gini
+	float gini_n = 0.0;
+	float gini_d = 0.0;
+	float gini_rightn, gini_rightd, gini_leftn, gini_leftd;
+	int ctreestart = k * nclass + nclass * idx * maxTreeSize;
+	int i;
+	for (i = 0; i < nclass; ++i)
+	{
+		gini_n += classpop[i + ctreestart] * classpop[i + ctreestart];
+		gini_d += classpop[i + ctreestart];
+	}
+	float gini_crit0 = gini_n / gini_d;
+
+	//start main loop through variables to find best split
+	float gini_critmax = -1.0e25;
+	float crit;
+	int trynum, featurei;
+	int maxfeature = mdim;
+
+	for (i = 0; i < mdim; ++i)
+	{
+		dimtempstart[i] = i;
+	}
+
+	*msplit = -1;
+
+	//for (trynum = 0; trynum < 1; ++trynum)
+	for (trynum = 0; trynum < mtry && trynum < mdim; ++trynum)
+	{
+		//Choose a random feature
+		i = curand(randstate) % maxfeature;
+		featurei = dimtempstart[i];
+		dimtempstart[i] = dimtempstart[maxfeature-1];
+		dimtempstart[maxfeature-1] = featurei;
+		--maxfeature;
+
+		//Sort according to this feature
+		sortbagbyx(baggedxstart, totsamples, mdim, featurei, bagstart, ndstart, ndend, tempbagstart);
+
+		//Split on numerical predictor featurei
+		gini_rightn = gini_n;
+		gini_rightd = gini_d;
+		gini_leftn = 0;
+		gini_leftd = 0;
+		for (i = 0; i < nclass; ++i)
+		{
+			wrstart[i] = classpop[i + ctreestart];
+			wlstart[i] = 0;
+		}
+		int splitpoint;
+		int splitxi;
+		float split_weight, thisx, nextx;
+		int split_class;
+		int ntie = 1;
+		//Loop through all possible split points
+		for (splitpoint = ndstart; splitpoint <= ndend-1; ++splitpoint)
+		{
+			//Get split details
+			splitxi = bagstart[splitpoint];
+			//Determine class based on index and nsamples vector
+			split_class = baggedclassstart[splitxi]-1;
+			split_weight = classweights[split_class];
+			//Update neumerator and demominator
+			gini_leftn += split_weight * (2 * wlstart[split_class] + split_weight);
+			gini_rightn += split_weight * (-2 * wrstart[split_class] + split_weight);
+			gini_leftd += split_weight;
+			gini_rightd -= split_weight;
+			wlstart[split_class] += split_weight;
+			wrstart[split_class] -= split_weight;
+
+			//Check if the next value is the same (no point splitting)
+			thisx = baggedxstart[splitxi + totsamples * featurei];
+			nextx = baggedxstart[bagstart[splitpoint+1] + totsamples * featurei];
+			if (thisx != nextx)
+			{
+				//Check if either node is empty (or very small to allow for float errors)
+				if (gini_rightd > 1.0e-5 && gini_leftd > 1.0e-5)
+				{
+					//Check the split
+					crit = (gini_leftn / gini_leftd) + (gini_rightn / gini_rightd);
+					if (crit > gini_critmax)
+					{
+						*best_split = (thisx + nextx) / 2;
+						*best_split_index = splitpoint;
+						gini_critmax = crit;
+						*msplit = featurei;
+						*ndendl = splitpoint;
+						ntie = 1;
+					}
+					else if (crit == gini_critmax)
+					{
+						++ntie;
+						//Break ties at random
+						if ((curand(randstate) % ntie) == 0)
+						{
+							*best_split = (thisx + nextx) / 2;
+							*best_split_index = splitpoint;
+							gini_critmax = crit;
+							*msplit = featurei;
+							*ndendl = splitpoint;
+						}
+					}
+				}
+			}
+		} // end splitpoint for
+	} // end trynum for
+
+	if (gini_critmax < -1.0e10 || *msplit == -1)
+	{
+		//We could not find a suitable split - mark as a terminal node
+		*isTerminal = true;
+	}
+	else if (*msplit != featurei)
+	{
+		//Resort for msplit (if necessary)
+		sortbagbyx(baggedxstart, totsamples, mdim, *msplit, bagstart, ndstart, ndend, tempbagstart);
+	}
+	*gini_score = gini_critmax - gini_crit0;
+
+}
+
+extern "C" __global__ void trainKernel(
+	const float *x, int n, int mdim, int nclass,
+	const int *classes, const int *classindex,
+	const int *nsamples, const int *samplefrom,
+	int maxnsamples, 
+	unsigned long long seed, unsigned long long sequencestart,
+	int ntree, int maxTreeSize, int mtry, int nodeStopSize,
+	int *treemap, int *nodestatus, float *xbestsplit, 
+	int *bestvar, int *nodeclass, int *ndbigtree,
+	int *nodestart, int *nodepop,
+	int *classpop, float *classweights,
+	int *weight_left, int *weight_right,
+	int *dimtemp, int *bagspace, int *tempbag, float *baggedx, int *baggedclass)
+{
+// Optional arguments for debug (place after xbestsplit): int *nbestsplit, float *bestgini,
+	int idx = threadIdx.x + blockDim.x * blockIdx.x;
+
+	//Make sure we don't overrun
+	if (idx < ntree) {
+		//Init random number generators (one for each thread)
+		curandState_t state;
+		curand_init(seed, sequencestart + idx, 0, &state);
+
+		int i,j,k,cioffset,bioffset;
+
+		int totsamples = 0;
+		for (i = 0; i < nclass; ++i){
+			totsamples += nsamples[i];
+		}
+
+		//Choose random samples for all classes
+		int *bagstart = bagspace + idx * nclass * maxnsamples;
+		int *tempbagstart = tempbag + idx * nclass * maxnsamples;
+		float *baggedxstart = baggedx + idx * mdim * totsamples;
+		int *baggedclassstart = baggedclass + idx * totsamples;
+		//TODO: offset weightleft, weightright and dimtemp !
+		sampledata(nclass, nsamples, samplefrom, maxnsamples, bagstart, &state);
+
+		//Remove gaps and index into x (instead of into class)
+		k = 0;
+		cioffset = 0;
+		bioffset = 0;
+		for (i = 0; i < nclass; ++i){
+			for (j = 0; j < nsamples[i]; ++j) {
+				//Move memory into local block?
+				int xindex = classindex[bagstart[j + i * maxnsamples] + cioffset];
+				int dimindex;
+				for (dimindex = 0; dimindex < mdim; ++dimindex){
+					baggedxstart[j + bioffset + totsamples * dimindex] = x[xindex + n * dimindex];
+				}
+				baggedclassstart[j + bioffset] = classes[xindex];
+				bagstart[k] = j + bioffset;
+				++k;
+			}
+			cioffset += samplefrom[i];
+			bioffset += nsamples[i];
+			classpop[i + idx * nclass * maxTreeSize] = nsamples[i];
+		}
+
+		//Wipe other values
+		for (;k < nclass * maxnsamples; ++k) {
+			bagstart[k] = -1;
+		}
+
+		int ndstart, ndend, ndendl;
+		int msplit, best_split_index;
+		float best_split, gini_score;
+
+		//Repeat findbestsplit until the tree is complete
+		int ncur = 0;
+		int treeoffset1 = idx * maxTreeSize;
+		int treeOffset2 = idx * 2 * maxTreeSize;
+		nodestart[treeoffset1] = 0;
+		nodepop[treeoffset1] = totsamples;
+		nodestatus[treeoffset1] = NODE_TOSPLIT;
+
+		for (k = 0; k < maxTreeSize-2; ++k) {
+			//Check for end of tree
+			if (k > ncur || ncur >= maxTreeSize - 2) break;
+			//Skip nodes we don't need to split
+			if (nodestatus[treeoffset1+k] != NODE_TOSPLIT) continue;
+
+			/* initialize for next call to findbestsplit */
+			ndstart = nodestart[treeoffset1 + k];
+			ndend = ndstart + nodepop[treeoffset1 + k] - 1;
+			bool isTerminal = false;
+			gini_score = 0.0;
+			best_split_index = -1;
+
+			findBestSplit(baggedxstart, baggedclassstart, mdim, nclass, bagstart, totsamples, k, ndstart, ndend, &ndendl,
+				&msplit, &gini_score, &best_split, &best_split_index, &isTerminal, mtry, idx, maxTreeSize, classpop, classweights, 
+				&state, weight_left + nclass * idx, weight_right + nclass * idx, dimtemp + mdim * idx, tempbagstart);
+
+			if (isTerminal) {
+				/* Node is terminal: Mark it as such and move on to the next. */
+				nodestatus[k] = NODE_TERMINAL;
+				//bestvar[treeoffset1 + k] = 0;
+				//xbestsplit[treeoffset1 + k] = 0;
+				continue;
+			}
+
+			// this is a split node - prepare for next round
+			bestvar[treeoffset1 + k] = msplit + 1;
+			//bestgini[treeoffset1 + k] = gini_score;
+			xbestsplit[treeoffset1 + k] = best_split;
+			//nbestsplit[treeoffset1 + k] = best_split_index;
+			nodestatus[treeoffset1 + k] = NODE_INTERIOR;
+			//varUsed[msplit - 1] = 1;
+			//tgini[msplit - 1] += decsplit;
+
+			int leftk = ncur + 1;
+			int rightk = ncur + 2;
+			nodepop[treeoffset1 + leftk] = ndendl - ndstart + 1;
+			nodepop[treeoffset1 + rightk] = ndend - ndendl;
+			nodestart[treeoffset1 + leftk] = ndstart;
+			nodestart[treeoffset1 + rightk] = ndendl + 1;
+
+			// Check for terminal node conditions
+			nodestatus[treeoffset1 + leftk] = NODE_TOSPLIT;
+			if (nodepop[treeoffset1 + leftk] <= nodeStopSize) {
+				nodestatus[treeoffset1 + leftk] = NODE_TERMINAL;
+			}
+
+			nodestatus[treeoffset1 + rightk] = NODE_TOSPLIT;
+			if (nodepop[treeoffset1 + rightk] <= nodeStopSize) {
+				nodestatus[treeoffset1 + rightk] = NODE_TERMINAL;
+			}
+
+			//Calculate class populations
+			int nodeclass = 0;
+			int ctreestart_left = leftk * nclass + idx * nclass * maxTreeSize;
+			int ctreestart_right = rightk * nclass + idx * nclass * maxTreeSize;
+			for (i = ndstart; i <= ndendl; ++i) {
+				nodeclass = baggedclassstart[bagstart[i]]-1;
+				classpop[nodeclass + ctreestart_left] += classweights[nodeclass];
+			}
+			for (i = ndendl+1; i <= ndend; ++i) {
+				nodeclass = baggedclassstart[bagstart[i]]-1;
+				classpop[nodeclass + ctreestart_right] += classweights[nodeclass];
+			}
+
+			for(i = 0; i < nclass; ++i)
+			{
+				if (classpop[i + ctreestart_left] == nodepop[treeoffset1 + leftk])
+				{
+					nodestatus[treeoffset1 + leftk] = NODE_TERMINAL;
+				}
+				if (classpop[i + ctreestart_right] == nodepop[treeoffset1 + rightk])
+				{
+					nodestatus[treeoffset1 + rightk] = NODE_TERMINAL;
+				}
+			}
+
+			//Update treemap offset (indexed from 1 rather than 0)
+			treemap[treeOffset2 + k*2] = ncur + 2;
+			treemap[treeOffset2 + 1 + k*2] = ncur + 3;
+			ncur += 2;
+
+		}
+
+		//Tidy up
+		//TODO: Check results - should not be necessary to go up to maxTreeSize
+		ndbigtree[idx] = ncur+1;
+		//ndbigtree[idx] = maxTreeSize;
+		for(k = maxTreeSize-1; k >= 0; --k)
+		{
+			//if (nodestatus[treeoffset1 + k] == 0)
+			//	--ndbigtree[idx];
+			if (nodestatus[treeoffset1 + k] == NODE_TOSPLIT)
+				nodestatus[treeoffset1 + k] = NODE_TERMINAL;
+		}
+
+		//Calculate prediction for terminal nodes
+		for (k = 0; k < maxTreeSize; ++k)
+		{
+			treeoffset1 = idx * maxTreeSize;
+			if (nodestatus[treeoffset1 + k] == NODE_TERMINAL)
+			{
+				int toppop = 0;
+				int ntie = 1;
+				for (i = 0; i < nclass; ++i)
+				{
+					int ctreeoffset = k * nclass + idx * nclass * maxTreeSize;
+					if (classpop[i + ctreeoffset] > toppop)
+					{
+						nodeclass[treeoffset1 + k] = i+1;
+						toppop = classpop[i + ctreeoffset];
+					}
+					//Break ties at random
+					if (classpop[i + ctreeoffset] == toppop)
+					{
+						++ntie;
+						if ((curand(&state) % ntie) == 0)
+						{
+							nodeclass[treeoffset1 + k] = i+1;
+							toppop = classpop[i + ctreeoffset];
+						}
+					}
+				}
+			}
+		}
+
+		//ndbigtree[idx] = idx;
+
+	}
+
+}
+"""
+
+# input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\membraneDetectionECSx4ds2\\'
+# input_image_suffix = '_train.png'
+# input_features_suffix = '_rhoanafeatures.hdf5'
+# output_path = 'D:\\dev\\Rhoana\\classifierTraining\\membraneDetectionECSx4ds2\\rhoana_forest.hdf5'
+
+#input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\'
+#input_image_suffix = '_labeled_update.tif'
+#input_features_suffix = '.hdf5'
+#output_path = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\rhoana_forest_3class.hdf5'
+
+input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2_auto1\\'
+input_image_suffix = '_labeled_update.tif'
+input_features_suffix = '_autoencoder_features.h5'
+output_path = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2_auto1\\auto1_forest_3class.hdf5'
+
+# input_image_folder = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\'
+# input_image_suffix = 'Cyto.png'
+# input_features_suffix = 'Cyto_rhoana_features.h5'
+# output_path = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\rhoana_forest_2class.h5'
+
+# input_image_folder = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\'
+# input_image_suffix = 'Cytods2.png'
+# input_features_suffix = 'Cytods2_rhoana_features.h5'
+# output_path = 'D:\\dev\\datasets\\LGN1\\autoSeg\\IlasticExportViews\\joshm\\rhoana_forest_2classds2.h5'
+
+# input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\'
+# input_image_suffix = '_training.png'
+# input_features_suffix = '_autoencoder_nosig_features.h5'
+# output_path = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\autoencoder_nosig_forest_2class.h5'
+# #input_features_suffix = '_autoencoder_features.h5'
+# #output_path = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\autoencoder_forest_2class.h5'
+# #input_features_suffix = '_rhoana_features.h5'
+# #output_path = 'D:\\dev\\Rhoana\\classifierTraining\\autoencoder_test\\rhoana_forest_2class.h5'
+
+
+# Prep the gpu function
+gpu_train = nvcc.SourceModule(gpu_randomforest_train_source, no_extern_c=True).get_function('trainKernel')
+
+# Load training data
+files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) )
+
+# 2 Class
+#class_colors = [[255,0,0], [0,255,0]]
+#class_colors = [[255,85,255], [255,255,0]]
+
+# 3 Class
+#class_colors = [[255,0,0], [0,255,0], [0,0,255]]
+#class_colors = [[255,85,255], [255,255,0], [0,255,255]]
+
+#class_colors = [0, 1]
+class_colors = [0, 1, 2]
+
+nclass = len(class_colors)
+
+training_x = np.zeros((0,0), dtype=np.float32)
+training_y = np.zeros((0,1), dtype=np.int32)
+
+print 'Found {0} training images.'.format(len(files))
+
+# Loop through all images
+for file in files:
+	training_image = mahotas.imread(file)
+
+	for classi in range(nclass):
+
+		this_color = class_colors[classi]
+
+		# Find pixels for this class
+		# RGB (rounded)
+		class_indices = np.nonzero(np.logical_and(
+			training_image[:,:,this_color] > training_image[:,:,(this_color + 1) % 3],
+			training_image[:,:,this_color] > training_image[:,:,(this_color + 2) % 3]))
+
+		# Specific colors
+		# class_indices = np.nonzero(np.logical_and(
+		# 	training_image[:,:,0] == this_color[0],
+		# 	training_image[:,:,1] == this_color[1],
+		# 	training_image[:,:,2] == this_color[2]))
+
+		# Add features to x and classes to y
+
+		training_y = np.concatenate((training_y, np.ones((len(class_indices[0]), 1), dtype=np.int32) * (classi + 1)))
+
+		# Load the features
+		f = h5py.File(file.replace(input_image_suffix, input_features_suffix), 'r')
+
+		nfeatures = len(f.keys())
+		train_features = np.zeros((nfeatures, len(class_indices[0])), dtype=np.float32)
+
+		for i,k in enumerate(f.keys()):
+			feature = f[k][...]
+			train_features[i,:] = feature[class_indices[0], class_indices[1]]
+
+		f.close()
+		
+		if training_x.size > 0:
+			training_x = np.concatenate((training_x, train_features), axis=1)
+		else:
+			training_x = train_features
+
+for classi in range(nclass):
+	print 'Class {0}: {1} training pixels.'.format(classi, np.sum(training_y == classi + 1))
+
+# Train on GPU
+ntree = np.int32(512)
+mtry = np.int32(np.floor(np.sqrt(training_x.shape[0])))
+#nsamples = np.ones((1,nclass), dtype=np.int32) * (training_x.shape[1] / nclass)
+nsamples = np.ones((1,nclass), dtype=np.int32) * 1000
+classweights = np.ones((1,nclass), dtype=np.float32)
+
+# Sanity check
+assert(training_x.shape[1] == training_y.shape[0])
+
+# Random number seeds
+seed = np.int64(42)
+sequencestart = np.int64(43)
+
+samplefrom = np.zeros((nclass), dtype=np.int32)
+maxTreeSize = np.int32(2 * np.sum(nsamples) + 1)
+nodeStopSize = np.int32(1)
+
+for classi in range(nclass):
+	samplefrom[classi] = np.sum(training_y == (classi + 1))
+
+maxnsamples = np.max(nsamples)
+classindex = -1 * np.ones((np.max(samplefrom) * nclass), dtype=np.int32)
+
+cioffset = 0
+for classi in range(nclass):
+	classindex[cioffset:cioffset + samplefrom[classi]] = np.nonzero(training_y == (classi + 1))[0]
+	cioffset = cioffset + samplefrom[classi]
+
+bagmem = -1 * np.ones((ntree, maxnsamples * nclass), dtype=np.int32)
+d_bagspace = gpuarray.to_gpu(bagmem)
+d_tempbag = gpuarray.to_gpu(bagmem)
+bagmem = None
+
+d_treemap = gpuarray.zeros((long(ntree * 2), long(maxTreeSize)), np.int32)
+d_nodestatus = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_xbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32)
+#d_nbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+#d_bestgini = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32)
+d_bestvar = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_nodeclass = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_ndbigtree = gpuarray.zeros((long(ntree), 1), np.int32)
+d_nodestart = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_nodepop = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_classpop = gpuarray.zeros((long(ntree), long(maxTreeSize*nclass)), np.int32)
+d_classweights = gpuarray.to_gpu(classweights)
+d_weight_left = gpuarray.zeros((long(ntree), long(nclass)), np.int32)
+d_weight_right = gpuarray.zeros((long(ntree), long(nclass)), np.int32)
+d_dimtemp = gpuarray.zeros((long(ntree), long(training_x.shape[0])), np.int32)
+
+d_baggedx = gpuarray.zeros((long(np.sum(nsamples)*training_x.shape[0]), long(ntree)), np.float32)
+d_baggedclass = gpuarray.zeros((long(ntree), long(np.sum(nsamples))), np.int32)
+
+d_training_x = gpuarray.to_gpu(training_x)
+d_training_y = gpuarray.to_gpu(training_y)
+d_classindex = gpuarray.to_gpu(classindex)
+d_nsamples = gpuarray.to_gpu(nsamples)
+d_samplefrom = gpuarray.to_gpu(samplefrom)
+
+threadsPerBlock = 32
+block = (32, 1, 1)
+grid = (int(ntree / block[0] + 1), 1)
+
+gpu_train(d_training_x, np.int32(training_x.shape[1]), np.int32(training_x.shape[0]), np.int32(nclass),
+	d_training_y, d_classindex, d_nsamples, d_samplefrom,
+	np.int32(maxnsamples), seed, sequencestart, np.int32(ntree), np.int32(maxTreeSize), np.int32(mtry), np.int32(nodeStopSize),
+	d_treemap, d_nodestatus, d_xbestsplit,
+	d_bestvar, d_nodeclass, d_ndbigtree,
+	d_nodestart, d_nodepop,
+	d_classpop, d_classweights,
+	d_weight_left, d_weight_right,
+	d_dimtemp, d_bagspace, d_tempbag, d_baggedx, d_baggedclass,
+	block=block, grid=grid)
+
+treemap = d_treemap.get()
+nodestatus = d_nodestatus.get()
+xbestsplit = d_xbestsplit.get()
+bestvar = d_bestvar.get()
+nodeclass = d_nodeclass.get()
+ndbigtree = d_ndbigtree.get()
+
+# Save results
+out_hdf5 = h5py.File(output_path, 'w')
+out_hdf5['/forest/treemap'] = treemap
+out_hdf5['/forest/nodestatus'] = nodestatus
+out_hdf5['/forest/xbestsplit'] = xbestsplit
+out_hdf5['/forest/bestvar'] = bestvar
+out_hdf5['/forest/nodeclass'] = nodeclass
+out_hdf5['/forest/ndbigtree'] = ndbigtree
+
+out_hdf5['/forest/nrnodes'] = maxTreeSize
+out_hdf5['/forest/ntree'] = ntree
+out_hdf5['/forest/nclass'] = nclass
+out_hdf5['/forest/classweights'] = classweights
+out_hdf5['/forest/mtry'] = mtry
+
+out_hdf5.close()
+
+
diff --git a/ClassifyMembranes/train_gpu_randomforest_cp7.py b/ClassifyMembranes/train_gpu_randomforest_cp7.py
new file mode 100644
index 0000000..e78e27f
--- /dev/null
+++ b/ClassifyMembranes/train_gpu_randomforest_cp7.py
@@ -0,0 +1,645 @@
+############################################################
+# GPU Implementation of Random Forest Classifier - Training
+# v0.1
+# Seymour Knowles-Barley
+############################################################
+# Based on c code from:
+# http://code.google.com/p/randomforest-matlab/
+# License: GPLv2
+############################################################
+
+import numpy as np
+import sys
+import h5py
+import glob
+import mahotas
+import subprocess
+import os
+
+import pycuda.autoinit
+import pycuda.driver as cu
+import pycuda.compiler as nvcc
+import pycuda.gpuarray as gpuarray
+
+gpu_randomforest_train_source = """
+#include "curand_kernel.h"
+
+#define NODE_TERMINAL -1
+#define NODE_TOSPLIT  -2
+#define NODE_INTERIOR -3
+
+__device__ void movedata() {
+}
+
+__device__ void sampledata(const int nclass, const int* nsamples, const int* samplefrom,
+	const int maxnsamples, int* bagstart, curandState_t *randstate)
+{
+	//Select random samples
+	int iclass, isamp;
+	for (iclass=0; iclass < nclass; ++iclass) {
+		for (isamp=0; isamp < nsamples[iclass]; ++isamp) {
+			bagstart[isamp + iclass*maxnsamples] = curand(randstate) % samplefrom[iclass];
+		}
+	}
+}
+
+__device__ void sortbagbyx(
+	const float *baggedxstart, int totsamples, int mdim, int featurei, int *bagstart, int ndstart, int ndend, int *tempbagstart)
+{
+	//Sort elements of bagstart (from ndstart to ndend) according to x values
+	//Write results into bagstart
+	int length = ndend-ndstart+1;
+	if (length == 1)
+	{
+		return;
+	}
+	int xstart = featurei * totsamples;
+	int *inbag = bagstart;
+	int *outbag = tempbagstart;
+
+	//For-loop merge sort
+	int i = 1;
+	int start1, start2, end1, end2, p1, p2, output;
+	while (i < length)
+	{
+
+		for (start1 = ndstart; start1 <= ndend; start1 += i*2)
+		{
+			end1 = start1 + i - 1;
+			start2 = start1 + i;
+			end2 = start2 + i - 1;
+			p1 = start1; p2 = start2;
+			output = start1;
+			while (p1 <= end1 && p1 <= ndend && p2 <= end2 && p2 <= ndend && output <= ndend)
+			{
+				if (baggedxstart[xstart + inbag[p1]] < baggedxstart[xstart + inbag[p2]])
+				{
+					outbag[output] = inbag[p1];
+					++p1;
+				}
+				else
+				{
+					outbag[output] = inbag[p2];
+					++p2;
+				}
+				++output;
+			}
+			while (p1 <= end1 && p1 <= ndend)
+			{
+				outbag[output] = inbag[p1];
+				++p1;
+				++output;
+			}
+			while (p2 <= end2 && p2 <= ndend)
+			{
+				outbag[output] = inbag[p2];
+				++p2;
+				++output;
+			}
+		}
+
+		//swap for next run
+		if (inbag == bagstart)
+		{
+			inbag = tempbagstart;
+			outbag = bagstart;
+		}
+		else
+		{
+			inbag = bagstart;
+			outbag = tempbagstart;
+		}
+
+		//Loop again with larger chunks
+		i *= 2;
+
+	}
+
+	//Copy output to bagstart (if necessary)
+	if (inbag == tempbagstart)
+	{
+		for (p1 = ndstart; p1 <= ndend; ++p1)
+		{
+			bagstart[p1] = tempbagstart[p1];
+		}
+	}
+
+}
+
+__device__ void findBestSplit(
+	const float *baggedxstart, const int *baggedclassstart, int mdim, int nclass, int *bagstart,
+	int totsamples, int k, int ndstart, int ndend, int *ndendl,
+	int *msplit, float *gini_score, float *best_split, int *best_split_index, bool *isTerminal,
+	int mtry, int idx, int maxTreeSize, int *classpop, float* classweights,
+	curandState_t *randstate,
+	int *wlstart, int *wrstart, int *dimtempstart, int *tempbagstart)
+{
+	//Compute initial values of numerator and denominator of Gini
+	float gini_n = 0.0;
+	float gini_d = 0.0;
+	float gini_rightn, gini_rightd, gini_leftn, gini_leftd;
+	int ctreestart = k * nclass + nclass * idx * maxTreeSize;
+	int i;
+	for (i = 0; i < nclass; ++i)
+	{
+		gini_n += classpop[i + ctreestart] * classpop[i + ctreestart];
+		gini_d += classpop[i + ctreestart];
+	}
+	float gini_crit0 = gini_n / gini_d;
+
+	//start main loop through variables to find best split
+	float gini_critmax = -1.0e25;
+	float crit;
+	int trynum, featurei;
+	int maxfeature = mdim;
+
+	for (i = 0; i < mdim; ++i)
+	{
+		dimtempstart[i] = i;
+	}
+
+	*msplit = -1;
+
+	//for (trynum = 0; trynum < 1; ++trynum)
+	for (trynum = 0; trynum < mtry && trynum < mdim; ++trynum)
+	{
+		//Choose a random feature
+		i = curand(randstate) % maxfeature;
+		featurei = dimtempstart[i];
+		dimtempstart[i] = dimtempstart[maxfeature-1];
+		dimtempstart[maxfeature-1] = featurei;
+		--maxfeature;
+
+		//Sort according to this feature
+		sortbagbyx(baggedxstart, totsamples, mdim, featurei, bagstart, ndstart, ndend, tempbagstart);
+
+		//Split on numerical predictor featurei
+		gini_rightn = gini_n;
+		gini_rightd = gini_d;
+		gini_leftn = 0;
+		gini_leftd = 0;
+		for (i = 0; i < nclass; ++i)
+		{
+			wrstart[i] = classpop[i + ctreestart];
+			wlstart[i] = 0;
+		}
+		int splitpoint;
+		int splitxi;
+		float split_weight, thisx, nextx;
+		int split_class;
+		int ntie = 1;
+		//Loop through all possible split points
+		for (splitpoint = ndstart; splitpoint <= ndend-1; ++splitpoint)
+		{
+			//Get split details
+			splitxi = bagstart[splitpoint];
+			//Determine class based on index and nsamples vector
+			split_class = baggedclassstart[splitxi]-1;
+			split_weight = classweights[split_class];
+			//Update neumerator and demominator
+			gini_leftn += split_weight * (2 * wlstart[split_class] + split_weight);
+			gini_rightn += split_weight * (-2 * wrstart[split_class] + split_weight);
+			gini_leftd += split_weight;
+			gini_rightd -= split_weight;
+			wlstart[split_class] += split_weight;
+			wrstart[split_class] -= split_weight;
+
+			//Check if the next value is the same (no point splitting)
+			thisx = baggedxstart[splitxi + totsamples * featurei];
+			nextx = baggedxstart[bagstart[splitpoint+1] + totsamples * featurei];
+			if (thisx != nextx)
+			{
+				//Check if either node is empty (or very small to allow for float errors)
+				if (gini_rightd > 1.0e-5 && gini_leftd > 1.0e-5)
+				{
+					//Check the split
+					crit = (gini_leftn / gini_leftd) + (gini_rightn / gini_rightd);
+					if (crit > gini_critmax)
+					{
+						*best_split = (thisx + nextx) / 2;
+						*best_split_index = splitpoint;
+						gini_critmax = crit;
+						*msplit = featurei;
+						*ndendl = splitpoint;
+						ntie = 1;
+					}
+					else if (crit == gini_critmax)
+					{
+						++ntie;
+						//Break ties at random
+						if ((curand(randstate) % ntie) == 0)
+						{
+							*best_split = (thisx + nextx) / 2;
+							*best_split_index = splitpoint;
+							gini_critmax = crit;
+							*msplit = featurei;
+							*ndendl = splitpoint;
+						}
+					}
+				}
+			}
+		} // end splitpoint for
+	} // end trynum for
+
+	if (gini_critmax < -1.0e10 || *msplit == -1)
+	{
+		//We could not find a suitable split - mark as a terminal node
+		*isTerminal = true;
+	}
+	else if (*msplit != featurei)
+	{
+		//Resort for msplit (if necessary)
+		sortbagbyx(baggedxstart, totsamples, mdim, *msplit, bagstart, ndstart, ndend, tempbagstart);
+	}
+	*gini_score = gini_critmax - gini_crit0;
+
+}
+
+extern "C" __global__ void trainKernel(
+	const float *x, int n, int mdim, int nclass,
+	const int *classes, const int *classindex,
+	const int *nsamples, const int *samplefrom,
+	int maxnsamples, 
+	unsigned long long seed, unsigned long long sequencestart,
+	int ntree, int maxTreeSize, int mtry, int nodeStopSize,
+	int *treemap, int *nodestatus, float *xbestsplit, 
+	int *bestvar, int *nodeclass, int *ndbigtree,
+	int *nodestart, int *nodepop,
+	int *classpop, float *classweights,
+	int *weight_left, int *weight_right,
+	int *dimtemp, int *bagspace, int *tempbag, float *baggedx, int *baggedclass)
+{
+// Optional arguments for debug (place after xbestsplit): int *nbestsplit, float *bestgini,
+	int idx = threadIdx.x + blockDim.x * blockIdx.x;
+
+	//Make sure we don't overrun
+	if (idx < ntree) {
+		//Init random number generators (one for each thread)
+		curandState_t state;
+		curand_init(seed, sequencestart + idx, 0, &state);
+
+		int i,j,k,cioffset,bioffset;
+
+		int totsamples = 0;
+		for (i = 0; i < nclass; ++i){
+			totsamples += nsamples[i];
+		}
+
+		//Choose random samples for all classes
+		int *bagstart = bagspace + idx * nclass * maxnsamples;
+		int *tempbagstart = tempbag + idx * nclass * maxnsamples;
+		float *baggedxstart = baggedx + idx * mdim * totsamples;
+		int *baggedclassstart = baggedclass + idx * totsamples;
+		//TODO: offset weightleft, weightright and dimtemp !
+		sampledata(nclass, nsamples, samplefrom, maxnsamples, bagstart, &state);
+
+		//Remove gaps and index into x (instead of into class)
+		k = 0;
+		cioffset = 0;
+		bioffset = 0;
+		for (i = 0; i < nclass; ++i){
+			for (j = 0; j < nsamples[i]; ++j) {
+				//Move memory into local block?
+				int xindex = classindex[bagstart[j + i * maxnsamples] + cioffset];
+				int dimindex;
+				for (dimindex = 0; dimindex < mdim; ++dimindex){
+					baggedxstart[j + bioffset + totsamples * dimindex] = x[xindex + n * dimindex];
+				}
+				baggedclassstart[j + bioffset] = classes[xindex];
+				bagstart[k] = j + bioffset;
+				++k;
+			}
+			cioffset += samplefrom[i];
+			bioffset += nsamples[i];
+			classpop[i + idx * nclass * maxTreeSize] = nsamples[i];
+		}
+
+		//Wipe other values
+		for (;k < nclass * maxnsamples; ++k) {
+			bagstart[k] = -1;
+		}
+
+		int ndstart, ndend, ndendl;
+		int msplit, best_split_index;
+		float best_split, gini_score;
+
+		//Repeat findbestsplit until the tree is complete
+		int ncur = 0;
+		int treeoffset1 = idx * maxTreeSize;
+		int treeOffset2 = idx * 2 * maxTreeSize;
+		nodestart[treeoffset1] = 0;
+		nodepop[treeoffset1] = totsamples;
+		nodestatus[treeoffset1] = NODE_TOSPLIT;
+
+		for (k = 0; k < maxTreeSize-2; ++k) {
+			//Check for end of tree
+			if (k > ncur || ncur >= maxTreeSize - 2) break;
+			//Skip nodes we don't need to split
+			if (nodestatus[treeoffset1+k] != NODE_TOSPLIT) continue;
+
+			/* initialize for next call to findbestsplit */
+			ndstart = nodestart[treeoffset1 + k];
+			ndend = ndstart + nodepop[treeoffset1 + k] - 1;
+			bool isTerminal = false;
+			gini_score = 0.0;
+			best_split_index = -1;
+
+			findBestSplit(baggedxstart, baggedclassstart, mdim, nclass, bagstart, totsamples, k, ndstart, ndend, &ndendl,
+				&msplit, &gini_score, &best_split, &best_split_index, &isTerminal, mtry, idx, maxTreeSize, classpop, classweights, 
+				&state, weight_left + nclass * idx, weight_right + nclass * idx, dimtemp + mdim * idx, tempbagstart);
+
+			if (isTerminal) {
+				/* Node is terminal: Mark it as such and move on to the next. */
+				nodestatus[k] = NODE_TERMINAL;
+				//bestvar[treeoffset1 + k] = 0;
+				//xbestsplit[treeoffset1 + k] = 0;
+				continue;
+			}
+
+			// this is a split node - prepare for next round
+			bestvar[treeoffset1 + k] = msplit + 1;
+			//bestgini[treeoffset1 + k] = gini_score;
+			xbestsplit[treeoffset1 + k] = best_split;
+			//nbestsplit[treeoffset1 + k] = best_split_index;
+			nodestatus[treeoffset1 + k] = NODE_INTERIOR;
+			//varUsed[msplit - 1] = 1;
+			//tgini[msplit - 1] += decsplit;
+
+			int leftk = ncur + 1;
+			int rightk = ncur + 2;
+			nodepop[treeoffset1 + leftk] = ndendl - ndstart + 1;
+			nodepop[treeoffset1 + rightk] = ndend - ndendl;
+			nodestart[treeoffset1 + leftk] = ndstart;
+			nodestart[treeoffset1 + rightk] = ndendl + 1;
+
+			// Check for terminal node conditions
+			nodestatus[treeoffset1 + leftk] = NODE_TOSPLIT;
+			if (nodepop[treeoffset1 + leftk] <= nodeStopSize) {
+				nodestatus[treeoffset1 + leftk] = NODE_TERMINAL;
+			}
+
+			nodestatus[treeoffset1 + rightk] = NODE_TOSPLIT;
+			if (nodepop[treeoffset1 + rightk] <= nodeStopSize) {
+				nodestatus[treeoffset1 + rightk] = NODE_TERMINAL;
+			}
+
+			//Calculate class populations
+			int nodeclass = 0;
+			int ctreestart_left = leftk * nclass + idx * nclass * maxTreeSize;
+			int ctreestart_right = rightk * nclass + idx * nclass * maxTreeSize;
+			for (i = ndstart; i <= ndendl; ++i) {
+				nodeclass = baggedclassstart[bagstart[i]]-1;
+				classpop[nodeclass + ctreestart_left] += classweights[nodeclass];
+			}
+			for (i = ndendl+1; i <= ndend; ++i) {
+				nodeclass = baggedclassstart[bagstart[i]]-1;
+				classpop[nodeclass + ctreestart_right] += classweights[nodeclass];
+			}
+
+			for(i = 0; i < nclass; ++i)
+			{
+				if (classpop[i + ctreestart_left] == nodepop[treeoffset1 + leftk])
+				{
+					nodestatus[treeoffset1 + leftk] = NODE_TERMINAL;
+				}
+				if (classpop[i + ctreestart_right] == nodepop[treeoffset1 + rightk])
+				{
+					nodestatus[treeoffset1 + rightk] = NODE_TERMINAL;
+				}
+			}
+
+			//Update treemap offset (indexed from 1 rather than 0)
+			treemap[treeOffset2 + k*2] = ncur + 2;
+			treemap[treeOffset2 + 1 + k*2] = ncur + 3;
+			ncur += 2;
+
+		}
+
+		//Tidy up
+		//TODO: Check results - should not be necessary to go up to maxTreeSize
+		ndbigtree[idx] = ncur+1;
+		//ndbigtree[idx] = maxTreeSize;
+		for(k = maxTreeSize-1; k >= 0; --k)
+		{
+			//if (nodestatus[treeoffset1 + k] == 0)
+			//	--ndbigtree[idx];
+			if (nodestatus[treeoffset1 + k] == NODE_TOSPLIT)
+				nodestatus[treeoffset1 + k] = NODE_TERMINAL;
+		}
+
+		//Calculate prediction for terminal nodes
+		for (k = 0; k < maxTreeSize; ++k)
+		{
+			treeoffset1 = idx * maxTreeSize;
+			if (nodestatus[treeoffset1 + k] == NODE_TERMINAL)
+			{
+				int toppop = 0;
+				int ntie = 1;
+				for (i = 0; i < nclass; ++i)
+				{
+					int ctreeoffset = k * nclass + idx * nclass * maxTreeSize;
+					if (classpop[i + ctreeoffset] > toppop)
+					{
+						nodeclass[treeoffset1 + k] = i+1;
+						toppop = classpop[i + ctreeoffset];
+					}
+					//Break ties at random
+					if (classpop[i + ctreeoffset] == toppop)
+					{
+						++ntie;
+						if ((curand(&state) % ntie) == 0)
+						{
+							nodeclass[treeoffset1 + k] = i+1;
+							toppop = classpop[i + ctreeoffset];
+						}
+					}
+				}
+			}
+		}
+
+		//ndbigtree[idx] = idx;
+
+	}
+
+}
+"""
+
+input_image_folder1 = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data'
+raw_image_suffix = '.png'
+input_image_suffix = '_train.png'
+input_features_suffix = '_rhoana_features.h5'
+output_path = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data\\rhoana_forest_cp7_2class.h5'
+
+features_prog = 'D:\\dev\\Rhoana\\rhoana\\ClassifyMembranes\\x64\\Release\\compute_features.exe'
+
+# Prep the gpu function
+gpu_train = nvcc.SourceModule(gpu_randomforest_train_source, no_extern_c=True).get_function('trainKernel')
+
+# Load training data
+files = sorted( glob.glob( input_image_folder1 + '\\*' + input_image_suffix ) )
+#files = files + sorted( glob.glob( input_image_folder2 + '\\*' + input_image_suffix ) )
+
+#2 Class
+class_colors = [[255,0,0], [0,255,0]]
+#class_colors = [[255,85,255], [255,255,0]]
+
+# 3 Class
+#class_colors = [[255,0,0], [0,255,0], [0,0,255]]
+#class_colors = [[255,85,255], [255,255,0], [0,255,255]]
+
+# class_colors = [0, 1]
+
+nclass = len(class_colors)
+
+training_x = np.zeros((0,0), dtype=np.float32)
+training_y = np.zeros((0,1), dtype=np.int32)
+
+print 'Found {0} training images.'.format(len(files))
+
+# Loop through all images
+for file in files:
+	training_image = mahotas.imread(file)
+
+	# Load the features
+	image_file = file.replace(input_image_suffix, raw_image_suffix)
+	features_file = file.replace(input_image_suffix, input_features_suffix)
+
+	if not os.path.exists(features_file):
+		print "Computing features:", features_prog, image_file, features_file
+		subprocess.check_call([features_prog, image_file, features_file], env=os.environ)
+
+	f = h5py.File(features_file, 'r')
+
+	nfeatures = len(f.keys())
+
+	for classi in range(nclass):
+
+		this_color = class_colors[classi]
+
+		# Find pixels for this class
+		# class_indices = np.nonzero(np.logical_and(
+		# 	training_image[:,:,this_color] > training_image[:,:,(this_color + 1) % 3],
+		# 	training_image[:,:,this_color] > training_image[:,:,(this_color + 2) % 3]))
+
+		class_indices = np.nonzero(np.logical_and(
+			training_image[:,:,0] == this_color[0],
+			training_image[:,:,1] == this_color[1],
+			training_image[:,:,2] == this_color[2]))
+
+		# Add features to x and classes to y
+
+		training_y = np.concatenate((training_y, np.ones((len(class_indices[0]), 1), dtype=np.int32) * (classi + 1)))
+
+		train_features = np.zeros((nfeatures, len(class_indices[0])), dtype=np.float32)
+
+		for i,k in enumerate(f.keys()):
+			feature = f[k][...]
+			train_features[i,:] = feature[class_indices[0], class_indices[1]]
+
+		if training_x.size > 0:
+			training_x = np.concatenate((training_x, train_features), axis=1)
+		else:
+			training_x = train_features
+
+	f.close()
+
+for classi in range(nclass):
+	print 'Class {0}: {1} training pixels.'.format(classi, np.sum(training_y == classi + 1))
+
+# Train on GPU
+ntree = np.int32(512)
+mtry = np.int32(np.floor(np.sqrt(training_x.shape[0])))
+#nsamples = np.ones((1,nclass), dtype=np.int32) * (training_x.shape[1] / nclass)
+nsamples = np.ones((1,nclass), dtype=np.int32) * 1000
+classweights = np.ones((1,nclass), dtype=np.float32)
+
+# Sanity check
+assert(training_x.shape[1] == training_y.shape[0])
+
+# Random number seeds
+seed = np.int64(42)
+sequencestart = np.int64(43)
+
+samplefrom = np.zeros((nclass), dtype=np.int32)
+maxTreeSize = np.int32(2 * np.sum(nsamples) + 1)
+nodeStopSize = np.int32(1)
+
+for classi in range(nclass):
+	samplefrom[classi] = np.sum(training_y == (classi + 1))
+
+maxnsamples = np.max(nsamples)
+classindex = -1 * np.ones((np.max(samplefrom) * nclass), dtype=np.int32)
+
+cioffset = 0
+for classi in range(nclass):
+	classindex[cioffset:cioffset + samplefrom[classi]] = np.nonzero(training_y == (classi + 1))[0]
+	cioffset = cioffset + samplefrom[classi]
+
+bagmem = -1 * np.ones((ntree, maxnsamples * nclass), dtype=np.int32)
+d_bagspace = gpuarray.to_gpu(bagmem)
+d_tempbag = gpuarray.to_gpu(bagmem)
+bagmem = None
+
+d_treemap = gpuarray.zeros((long(ntree * 2), long(maxTreeSize)), np.int32)
+d_nodestatus = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_xbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32)
+#d_nbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+#d_bestgini = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32)
+d_bestvar = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_nodeclass = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_ndbigtree = gpuarray.zeros((long(ntree), 1), np.int32)
+d_nodestart = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_nodepop = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_classpop = gpuarray.zeros((long(ntree), long(maxTreeSize*nclass)), np.int32)
+d_classweights = gpuarray.to_gpu(classweights)
+d_weight_left = gpuarray.zeros((long(ntree), long(nclass)), np.int32)
+d_weight_right = gpuarray.zeros((long(ntree), long(nclass)), np.int32)
+d_dimtemp = gpuarray.zeros((long(ntree), long(training_x.shape[0])), np.int32)
+
+d_baggedx = gpuarray.zeros((long(np.sum(nsamples)*training_x.shape[0]), long(ntree)), np.float32)
+d_baggedclass = gpuarray.zeros((long(ntree), long(np.sum(nsamples))), np.int32)
+
+d_training_x = gpuarray.to_gpu(training_x)
+d_training_y = gpuarray.to_gpu(training_y)
+d_classindex = gpuarray.to_gpu(classindex)
+d_nsamples = gpuarray.to_gpu(nsamples)
+d_samplefrom = gpuarray.to_gpu(samplefrom)
+
+threadsPerBlock = 32
+block = (32, 1, 1)
+grid = (int(ntree / block[0] + 1), 1)
+
+gpu_train(d_training_x, np.int32(training_x.shape[1]), np.int32(training_x.shape[0]), np.int32(nclass),
+	d_training_y, d_classindex, d_nsamples, d_samplefrom,
+	np.int32(maxnsamples), seed, sequencestart, np.int32(ntree), np.int32(maxTreeSize), np.int32(mtry), np.int32(nodeStopSize),
+	d_treemap, d_nodestatus, d_xbestsplit,
+	d_bestvar, d_nodeclass, d_ndbigtree,
+	d_nodestart, d_nodepop,
+	d_classpop, d_classweights,
+	d_weight_left, d_weight_right,
+	d_dimtemp, d_bagspace, d_tempbag, d_baggedx, d_baggedclass,
+	block=block, grid=grid)
+
+treemap = d_treemap.get()
+nodestatus = d_nodestatus.get()
+xbestsplit = d_xbestsplit.get()
+bestvar = d_bestvar.get()
+nodeclass = d_nodeclass.get()
+ndbigtree = d_ndbigtree.get()
+
+# Save results
+out_hdf5 = h5py.File(output_path, 'w')
+out_hdf5['/forest/treemap'] = treemap
+out_hdf5['/forest/nodestatus'] = nodestatus
+out_hdf5['/forest/xbestsplit'] = xbestsplit
+out_hdf5['/forest/bestvar'] = bestvar
+out_hdf5['/forest/nodeclass'] = nodeclass
+out_hdf5['/forest/ndbigtree'] = ndbigtree
+
+out_hdf5['/forest/nrnodes'] = maxTreeSize
+out_hdf5['/forest/ntree'] = ntree
+out_hdf5['/forest/nclass'] = nclass
+out_hdf5['/forest/classweights'] = classweights
+out_hdf5['/forest/mtry'] = mtry
+
+out_hdf5.close()
diff --git a/ClassifyMembranes/train_gpu_randomforest_cp7_ds2.py b/ClassifyMembranes/train_gpu_randomforest_cp7_ds2.py
new file mode 100644
index 0000000..67311cd
--- /dev/null
+++ b/ClassifyMembranes/train_gpu_randomforest_cp7_ds2.py
@@ -0,0 +1,658 @@
+############################################################
+# GPU Implementation of Random Forest Classifier - Training
+# v0.1
+# Seymour Knowles-Barley
+############################################################
+# Based on c code from:
+# http://code.google.com/p/randomforest-matlab/
+# License: GPLv2
+############################################################
+
+import numpy as np
+import sys
+import h5py
+import glob
+import mahotas
+import subprocess
+import os
+
+import pycuda.autoinit
+import pycuda.driver as cu
+import pycuda.compiler as nvcc
+import pycuda.gpuarray as gpuarray
+
+gpu_randomforest_train_source = """
+#include "curand_kernel.h"
+
+#define NODE_TERMINAL -1
+#define NODE_TOSPLIT  -2
+#define NODE_INTERIOR -3
+
+__device__ void movedata() {
+}
+
+__device__ void sampledata(const int nclass, const int* nsamples, const int* samplefrom,
+	const int maxnsamples, int* bagstart, curandState_t *randstate)
+{
+	//Select random samples
+	int iclass, isamp;
+	for (iclass=0; iclass < nclass; ++iclass) {
+		for (isamp=0; isamp < nsamples[iclass]; ++isamp) {
+			bagstart[isamp + iclass*maxnsamples] = curand(randstate) % samplefrom[iclass];
+		}
+	}
+}
+
+__device__ void sortbagbyx(
+	const float *baggedxstart, int totsamples, int mdim, int featurei, int *bagstart, int ndstart, int ndend, int *tempbagstart)
+{
+	//Sort elements of bagstart (from ndstart to ndend) according to x values
+	//Write results into bagstart
+	int length = ndend-ndstart+1;
+	if (length == 1)
+	{
+		return;
+	}
+	int xstart = featurei * totsamples;
+	int *inbag = bagstart;
+	int *outbag = tempbagstart;
+
+	//For-loop merge sort
+	int i = 1;
+	int start1, start2, end1, end2, p1, p2, output;
+	while (i < length)
+	{
+
+		for (start1 = ndstart; start1 <= ndend; start1 += i*2)
+		{
+			end1 = start1 + i - 1;
+			start2 = start1 + i;
+			end2 = start2 + i - 1;
+			p1 = start1; p2 = start2;
+			output = start1;
+			while (p1 <= end1 && p1 <= ndend && p2 <= end2 && p2 <= ndend && output <= ndend)
+			{
+				if (baggedxstart[xstart + inbag[p1]] < baggedxstart[xstart + inbag[p2]])
+				{
+					outbag[output] = inbag[p1];
+					++p1;
+				}
+				else
+				{
+					outbag[output] = inbag[p2];
+					++p2;
+				}
+				++output;
+			}
+			while (p1 <= end1 && p1 <= ndend)
+			{
+				outbag[output] = inbag[p1];
+				++p1;
+				++output;
+			}
+			while (p2 <= end2 && p2 <= ndend)
+			{
+				outbag[output] = inbag[p2];
+				++p2;
+				++output;
+			}
+		}
+
+		//swap for next run
+		if (inbag == bagstart)
+		{
+			inbag = tempbagstart;
+			outbag = bagstart;
+		}
+		else
+		{
+			inbag = bagstart;
+			outbag = tempbagstart;
+		}
+
+		//Loop again with larger chunks
+		i *= 2;
+
+	}
+
+	//Copy output to bagstart (if necessary)
+	if (inbag == tempbagstart)
+	{
+		for (p1 = ndstart; p1 <= ndend; ++p1)
+		{
+			bagstart[p1] = tempbagstart[p1];
+		}
+	}
+
+}
+
+__device__ void findBestSplit(
+	const float *baggedxstart, const int *baggedclassstart, int mdim, int nclass, int *bagstart,
+	int totsamples, int k, int ndstart, int ndend, int *ndendl,
+	int *msplit, float *gini_score, float *best_split, int *best_split_index, bool *isTerminal,
+	int mtry, int idx, int maxTreeSize, int *classpop, float* classweights,
+	curandState_t *randstate,
+	int *wlstart, int *wrstart, int *dimtempstart, int *tempbagstart)
+{
+	//Compute initial values of numerator and denominator of Gini
+	float gini_n = 0.0;
+	float gini_d = 0.0;
+	float gini_rightn, gini_rightd, gini_leftn, gini_leftd;
+	int ctreestart = k * nclass + nclass * idx * maxTreeSize;
+	int i;
+	for (i = 0; i < nclass; ++i)
+	{
+		gini_n += classpop[i + ctreestart] * classpop[i + ctreestart];
+		gini_d += classpop[i + ctreestart];
+	}
+	float gini_crit0 = gini_n / gini_d;
+
+	//start main loop through variables to find best split
+	float gini_critmax = -1.0e25;
+	float crit;
+	int trynum, featurei;
+	int maxfeature = mdim;
+
+	for (i = 0; i < mdim; ++i)
+	{
+		dimtempstart[i] = i;
+	}
+
+	*msplit = -1;
+
+	//for (trynum = 0; trynum < 1; ++trynum)
+	for (trynum = 0; trynum < mtry && trynum < mdim; ++trynum)
+	{
+		//Choose a random feature
+		i = curand(randstate) % maxfeature;
+		featurei = dimtempstart[i];
+		dimtempstart[i] = dimtempstart[maxfeature-1];
+		dimtempstart[maxfeature-1] = featurei;
+		--maxfeature;
+
+		//Sort according to this feature
+		sortbagbyx(baggedxstart, totsamples, mdim, featurei, bagstart, ndstart, ndend, tempbagstart);
+
+		//Split on numerical predictor featurei
+		gini_rightn = gini_n;
+		gini_rightd = gini_d;
+		gini_leftn = 0;
+		gini_leftd = 0;
+		for (i = 0; i < nclass; ++i)
+		{
+			wrstart[i] = classpop[i + ctreestart];
+			wlstart[i] = 0;
+		}
+		int splitpoint;
+		int splitxi;
+		float split_weight, thisx, nextx;
+		int split_class;
+		int ntie = 1;
+		//Loop through all possible split points
+		for (splitpoint = ndstart; splitpoint <= ndend-1; ++splitpoint)
+		{
+			//Get split details
+			splitxi = bagstart[splitpoint];
+			//Determine class based on index and nsamples vector
+			split_class = baggedclassstart[splitxi]-1;
+			split_weight = classweights[split_class];
+			//Update neumerator and demominator
+			gini_leftn += split_weight * (2 * wlstart[split_class] + split_weight);
+			gini_rightn += split_weight * (-2 * wrstart[split_class] + split_weight);
+			gini_leftd += split_weight;
+			gini_rightd -= split_weight;
+			wlstart[split_class] += split_weight;
+			wrstart[split_class] -= split_weight;
+
+			//Check if the next value is the same (no point splitting)
+			thisx = baggedxstart[splitxi + totsamples * featurei];
+			nextx = baggedxstart[bagstart[splitpoint+1] + totsamples * featurei];
+			if (thisx != nextx)
+			{
+				//Check if either node is empty (or very small to allow for float errors)
+				if (gini_rightd > 1.0e-5 && gini_leftd > 1.0e-5)
+				{
+					//Check the split
+					crit = (gini_leftn / gini_leftd) + (gini_rightn / gini_rightd);
+					if (crit > gini_critmax)
+					{
+						*best_split = (thisx + nextx) / 2;
+						*best_split_index = splitpoint;
+						gini_critmax = crit;
+						*msplit = featurei;
+						*ndendl = splitpoint;
+						ntie = 1;
+					}
+					else if (crit == gini_critmax)
+					{
+						++ntie;
+						//Break ties at random
+						if ((curand(randstate) % ntie) == 0)
+						{
+							*best_split = (thisx + nextx) / 2;
+							*best_split_index = splitpoint;
+							gini_critmax = crit;
+							*msplit = featurei;
+							*ndendl = splitpoint;
+						}
+					}
+				}
+			}
+		} // end splitpoint for
+	} // end trynum for
+
+	if (gini_critmax < -1.0e10 || *msplit == -1)
+	{
+		//We could not find a suitable split - mark as a terminal node
+		*isTerminal = true;
+	}
+	else if (*msplit != featurei)
+	{
+		//Resort for msplit (if necessary)
+		sortbagbyx(baggedxstart, totsamples, mdim, *msplit, bagstart, ndstart, ndend, tempbagstart);
+	}
+	*gini_score = gini_critmax - gini_crit0;
+
+}
+
+extern "C" __global__ void trainKernel(
+	const float *x, int n, int mdim, int nclass,
+	const int *classes, const int *classindex,
+	const int *nsamples, const int *samplefrom,
+	int maxnsamples, 
+	unsigned long long seed, unsigned long long sequencestart,
+	int ntree, int maxTreeSize, int mtry, int nodeStopSize,
+	int *treemap, int *nodestatus, float *xbestsplit, 
+	int *bestvar, int *nodeclass, int *ndbigtree,
+	int *nodestart, int *nodepop,
+	int *classpop, float *classweights,
+	int *weight_left, int *weight_right,
+	int *dimtemp, int *bagspace, int *tempbag, float *baggedx, int *baggedclass)
+{
+// Optional arguments for debug (place after xbestsplit): int *nbestsplit, float *bestgini,
+	int idx = threadIdx.x + blockDim.x * blockIdx.x;
+
+	//Make sure we don't overrun
+	if (idx < ntree) {
+		//Init random number generators (one for each thread)
+		curandState_t state;
+		curand_init(seed, sequencestart + idx, 0, &state);
+
+		int i,j,k,cioffset,bioffset;
+
+		int totsamples = 0;
+		for (i = 0; i < nclass; ++i){
+			totsamples += nsamples[i];
+		}
+
+		//Choose random samples for all classes
+		int *bagstart = bagspace + idx * nclass * maxnsamples;
+		int *tempbagstart = tempbag + idx * nclass * maxnsamples;
+		float *baggedxstart = baggedx + idx * mdim * totsamples;
+		int *baggedclassstart = baggedclass + idx * totsamples;
+		//TODO: offset weightleft, weightright and dimtemp !
+		sampledata(nclass, nsamples, samplefrom, maxnsamples, bagstart, &state);
+
+		//Remove gaps and index into x (instead of into class)
+		k = 0;
+		cioffset = 0;
+		bioffset = 0;
+		for (i = 0; i < nclass; ++i){
+			for (j = 0; j < nsamples[i]; ++j) {
+				//Move memory into local block?
+				int xindex = classindex[bagstart[j + i * maxnsamples] + cioffset];
+				int dimindex;
+				for (dimindex = 0; dimindex < mdim; ++dimindex){
+					baggedxstart[j + bioffset + totsamples * dimindex] = x[xindex + n * dimindex];
+				}
+				baggedclassstart[j + bioffset] = classes[xindex];
+				bagstart[k] = j + bioffset;
+				++k;
+			}
+			cioffset += samplefrom[i];
+			bioffset += nsamples[i];
+			classpop[i + idx * nclass * maxTreeSize] = nsamples[i];
+		}
+
+		//Wipe other values
+		for (;k < nclass * maxnsamples; ++k) {
+			bagstart[k] = -1;
+		}
+
+		int ndstart, ndend, ndendl;
+		int msplit, best_split_index;
+		float best_split, gini_score;
+
+		//Repeat findbestsplit until the tree is complete
+		int ncur = 0;
+		int treeoffset1 = idx * maxTreeSize;
+		int treeOffset2 = idx * 2 * maxTreeSize;
+		nodestart[treeoffset1] = 0;
+		nodepop[treeoffset1] = totsamples;
+		nodestatus[treeoffset1] = NODE_TOSPLIT;
+
+		for (k = 0; k < maxTreeSize-2; ++k) {
+			//Check for end of tree
+			if (k > ncur || ncur >= maxTreeSize - 2) break;
+			//Skip nodes we don't need to split
+			if (nodestatus[treeoffset1+k] != NODE_TOSPLIT) continue;
+
+			/* initialize for next call to findbestsplit */
+			ndstart = nodestart[treeoffset1 + k];
+			ndend = ndstart + nodepop[treeoffset1 + k] - 1;
+			bool isTerminal = false;
+			gini_score = 0.0;
+			best_split_index = -1;
+
+			findBestSplit(baggedxstart, baggedclassstart, mdim, nclass, bagstart, totsamples, k, ndstart, ndend, &ndendl,
+				&msplit, &gini_score, &best_split, &best_split_index, &isTerminal, mtry, idx, maxTreeSize, classpop, classweights, 
+				&state, weight_left + nclass * idx, weight_right + nclass * idx, dimtemp + mdim * idx, tempbagstart);
+
+			if (isTerminal) {
+				/* Node is terminal: Mark it as such and move on to the next. */
+				nodestatus[k] = NODE_TERMINAL;
+				//bestvar[treeoffset1 + k] = 0;
+				//xbestsplit[treeoffset1 + k] = 0;
+				continue;
+			}
+
+			// this is a split node - prepare for next round
+			bestvar[treeoffset1 + k] = msplit + 1;
+			//bestgini[treeoffset1 + k] = gini_score;
+			xbestsplit[treeoffset1 + k] = best_split;
+			//nbestsplit[treeoffset1 + k] = best_split_index;
+			nodestatus[treeoffset1 + k] = NODE_INTERIOR;
+			//varUsed[msplit - 1] = 1;
+			//tgini[msplit - 1] += decsplit;
+
+			int leftk = ncur + 1;
+			int rightk = ncur + 2;
+			nodepop[treeoffset1 + leftk] = ndendl - ndstart + 1;
+			nodepop[treeoffset1 + rightk] = ndend - ndendl;
+			nodestart[treeoffset1 + leftk] = ndstart;
+			nodestart[treeoffset1 + rightk] = ndendl + 1;
+
+			// Check for terminal node conditions
+			nodestatus[treeoffset1 + leftk] = NODE_TOSPLIT;
+			if (nodepop[treeoffset1 + leftk] <= nodeStopSize) {
+				nodestatus[treeoffset1 + leftk] = NODE_TERMINAL;
+			}
+
+			nodestatus[treeoffset1 + rightk] = NODE_TOSPLIT;
+			if (nodepop[treeoffset1 + rightk] <= nodeStopSize) {
+				nodestatus[treeoffset1 + rightk] = NODE_TERMINAL;
+			}
+
+			//Calculate class populations
+			int nodeclass = 0;
+			int ctreestart_left = leftk * nclass + idx * nclass * maxTreeSize;
+			int ctreestart_right = rightk * nclass + idx * nclass * maxTreeSize;
+			for (i = ndstart; i <= ndendl; ++i) {
+				nodeclass = baggedclassstart[bagstart[i]]-1;
+				classpop[nodeclass + ctreestart_left] += classweights[nodeclass];
+			}
+			for (i = ndendl+1; i <= ndend; ++i) {
+				nodeclass = baggedclassstart[bagstart[i]]-1;
+				classpop[nodeclass + ctreestart_right] += classweights[nodeclass];
+			}
+
+			for(i = 0; i < nclass; ++i)
+			{
+				if (classpop[i + ctreestart_left] == nodepop[treeoffset1 + leftk])
+				{
+					nodestatus[treeoffset1 + leftk] = NODE_TERMINAL;
+				}
+				if (classpop[i + ctreestart_right] == nodepop[treeoffset1 + rightk])
+				{
+					nodestatus[treeoffset1 + rightk] = NODE_TERMINAL;
+				}
+			}
+
+			//Update treemap offset (indexed from 1 rather than 0)
+			treemap[treeOffset2 + k*2] = ncur + 2;
+			treemap[treeOffset2 + 1 + k*2] = ncur + 3;
+			ncur += 2;
+
+		}
+
+		//Tidy up
+		//TODO: Check results - should not be necessary to go up to maxTreeSize
+		ndbigtree[idx] = ncur+1;
+		//ndbigtree[idx] = maxTreeSize;
+		for(k = maxTreeSize-1; k >= 0; --k)
+		{
+			//if (nodestatus[treeoffset1 + k] == 0)
+			//	--ndbigtree[idx];
+			if (nodestatus[treeoffset1 + k] == NODE_TOSPLIT)
+				nodestatus[treeoffset1 + k] = NODE_TERMINAL;
+		}
+
+		//Calculate prediction for terminal nodes
+		for (k = 0; k < maxTreeSize; ++k)
+		{
+			treeoffset1 = idx * maxTreeSize;
+			if (nodestatus[treeoffset1 + k] == NODE_TERMINAL)
+			{
+				int toppop = 0;
+				int ntie = 1;
+				for (i = 0; i < nclass; ++i)
+				{
+					int ctreeoffset = k * nclass + idx * nclass * maxTreeSize;
+					if (classpop[i + ctreeoffset] > toppop)
+					{
+						nodeclass[treeoffset1 + k] = i+1;
+						toppop = classpop[i + ctreeoffset];
+					}
+					//Break ties at random
+					if (classpop[i + ctreeoffset] == toppop)
+					{
+						++ntie;
+						if ((curand(&state) % ntie) == 0)
+						{
+							nodeclass[treeoffset1 + k] = i+1;
+							toppop = classpop[i + ctreeoffset];
+						}
+					}
+				}
+			}
+		}
+
+		//ndbigtree[idx] = idx;
+
+	}
+
+}
+"""
+
+downsample_factor = 2
+
+input_image_folder1 = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data'
+ds_input_image_folder = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data\\input_images_ds{0}'.format(downsample_factor)
+
+raw_image_suffix = '.png'
+input_image_suffix = '_train.png'
+input_features_suffix = '_rhoana_features_ds{0}.h5'.format(downsample_factor)
+
+output_path = 'D:\\dev\\datasets\\Cerebellum\\Classifiers\\P7_Data\\rhoana_forest_ds{0}_cp7_2class.h5'.format(downsample_factor)
+
+features_prog = 'D:\\dev\\Rhoana\\rhoana\\ClassifyMembranes\\x64\\Release\\compute_features.exe'
+
+# Prep the gpu function
+gpu_train = nvcc.SourceModule(gpu_randomforest_train_source, no_extern_c=True).get_function('trainKernel')
+
+# Load training data
+files = sorted( glob.glob( input_image_folder1 + '\\*' + input_image_suffix ) )
+
+#2 Class
+class_colors = [[255,0,0], [0,255,0]]
+#class_colors = [[255,85,255], [255,255,0]]
+
+# 3 Class
+#class_colors = [[255,0,0], [0,255,0], [0,0,255]]
+#class_colors = [[255,85,255], [255,255,0], [0,255,255]]
+
+# class_colors = [0, 1]
+
+nclass = len(class_colors)
+
+training_x = np.zeros((0,0), dtype=np.float32)
+training_y = np.zeros((0,1), dtype=np.int32)
+
+print 'Found {0} training images.'.format(len(files))
+
+# Loop through all images
+for file in files:
+	training_image = mahotas.imread(file)
+
+	for classi in range(nclass):
+
+		this_color = class_colors[classi]
+
+		# Find pixels for this class
+		# class_indices = np.nonzero(np.logical_and(
+		# 	training_image[:,:,this_color] > training_image[:,:,(this_color + 1) % 3],
+		# 	training_image[:,:,this_color] > training_image[:,:,(this_color + 2) % 3]))
+
+		class_indices = np.nonzero(np.logical_and(
+			training_image[:,:,0] == this_color[0],
+			training_image[:,:,1] == this_color[1],
+			training_image[:,:,2] == this_color[2]))
+
+		if downsample_factor != 1:
+			class_indices = (class_indices[0] / downsample_factor, class_indices[1] / downsample_factor)
+
+		# Add features to x and classes to y
+
+		training_y = np.concatenate((training_y, np.ones((len(class_indices[0]), 1), dtype=np.int32) * (classi + 1)))
+
+		# Load the features
+		image_file = file.replace(input_image_suffix, raw_image_suffix).replace(input_image_folder1, ds_input_image_folder)
+		if not os.path.exists(image_file):
+			full_image = mahotas.imread(file.replace(input_image_suffix, raw_image_suffix))[:,:,0]
+			ds_image = mahotas.imresize(full_image, 1.0 / downsample_factor)
+			mahotas.imsave(image_file, np.uint8(ds_image))
+			full_image = None
+			ds_image = None
+
+		features_file = file.replace(input_image_suffix, input_features_suffix)
+
+		if not os.path.exists(features_file):
+			print "Computing features:", features_prog, image_file, features_file
+			subprocess.check_call([features_prog, image_file, features_file], env=os.environ)
+
+		f = h5py.File(features_file, 'r')
+
+		nfeatures = len(f.keys())
+		train_features = np.zeros((nfeatures, len(class_indices[0])), dtype=np.float32)
+
+		for i,k in enumerate(f.keys()):
+			feature = f[k][...]
+			train_features[i,:] = feature[class_indices[0], class_indices[1]]
+
+		f.close()
+		
+		if training_x.size > 0:
+			training_x = np.concatenate((training_x, train_features), axis=1)
+		else:
+			training_x = train_features
+
+for classi in range(nclass):
+	print 'Class {0}: {1} training pixels.'.format(classi, np.sum(training_y == classi + 1))
+
+# Train on GPU
+ntree = np.int32(512)
+mtry = np.int32(np.floor(np.sqrt(training_x.shape[0])))
+#nsamples = np.ones((1,nclass), dtype=np.int32) * (training_x.shape[1] / nclass)
+nsamples = np.ones((1,nclass), dtype=np.int32) * 1000
+classweights = np.ones((1,nclass), dtype=np.float32)
+
+# Sanity check
+assert(training_x.shape[1] == training_y.shape[0])
+
+# Random number seeds
+seed = np.int64(42)
+sequencestart = np.int64(43)
+
+samplefrom = np.zeros((nclass), dtype=np.int32)
+maxTreeSize = np.int32(2 * np.sum(nsamples) + 1)
+nodeStopSize = np.int32(1)
+
+for classi in range(nclass):
+	samplefrom[classi] = np.sum(training_y == (classi + 1))
+
+maxnsamples = np.max(nsamples)
+classindex = -1 * np.ones((np.max(samplefrom) * nclass), dtype=np.int32)
+
+cioffset = 0
+for classi in range(nclass):
+	classindex[cioffset:cioffset + samplefrom[classi]] = np.nonzero(training_y == (classi + 1))[0]
+	cioffset = cioffset + samplefrom[classi]
+
+bagmem = -1 * np.ones((ntree, maxnsamples * nclass), dtype=np.int32)
+d_bagspace = gpuarray.to_gpu(bagmem)
+d_tempbag = gpuarray.to_gpu(bagmem)
+bagmem = None
+
+d_treemap = gpuarray.zeros((long(ntree * 2), long(maxTreeSize)), np.int32)
+d_nodestatus = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_xbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32)
+#d_nbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+#d_bestgini = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32)
+d_bestvar = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_nodeclass = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_ndbigtree = gpuarray.zeros((long(ntree), 1), np.int32)
+d_nodestart = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_nodepop = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_classpop = gpuarray.zeros((long(ntree), long(maxTreeSize*nclass)), np.int32)
+d_classweights = gpuarray.to_gpu(classweights)
+d_weight_left = gpuarray.zeros((long(ntree), long(nclass)), np.int32)
+d_weight_right = gpuarray.zeros((long(ntree), long(nclass)), np.int32)
+d_dimtemp = gpuarray.zeros((long(ntree), long(training_x.shape[0])), np.int32)
+
+d_baggedx = gpuarray.zeros((long(np.sum(nsamples)*training_x.shape[0]), long(ntree)), np.float32)
+d_baggedclass = gpuarray.zeros((long(ntree), long(np.sum(nsamples))), np.int32)
+
+d_training_x = gpuarray.to_gpu(training_x)
+d_training_y = gpuarray.to_gpu(training_y)
+d_classindex = gpuarray.to_gpu(classindex)
+d_nsamples = gpuarray.to_gpu(nsamples)
+d_samplefrom = gpuarray.to_gpu(samplefrom)
+
+threadsPerBlock = 32
+block = (32, 1, 1)
+grid = (int(ntree / block[0] + 1), 1)
+
+gpu_train(d_training_x, np.int32(training_x.shape[1]), np.int32(training_x.shape[0]), np.int32(nclass),
+	d_training_y, d_classindex, d_nsamples, d_samplefrom,
+	np.int32(maxnsamples), seed, sequencestart, np.int32(ntree), np.int32(maxTreeSize), np.int32(mtry), np.int32(nodeStopSize),
+	d_treemap, d_nodestatus, d_xbestsplit,
+	d_bestvar, d_nodeclass, d_ndbigtree,
+	d_nodestart, d_nodepop,
+	d_classpop, d_classweights,
+	d_weight_left, d_weight_right,
+	d_dimtemp, d_bagspace, d_tempbag, d_baggedx, d_baggedclass,
+	block=block, grid=grid)
+
+treemap = d_treemap.get()
+nodestatus = d_nodestatus.get()
+xbestsplit = d_xbestsplit.get()
+bestvar = d_bestvar.get()
+nodeclass = d_nodeclass.get()
+ndbigtree = d_ndbigtree.get()
+
+# Save results
+out_hdf5 = h5py.File(output_path, 'w')
+out_hdf5['/forest/treemap'] = treemap
+out_hdf5['/forest/nodestatus'] = nodestatus
+out_hdf5['/forest/xbestsplit'] = xbestsplit
+out_hdf5['/forest/bestvar'] = bestvar
+out_hdf5['/forest/nodeclass'] = nodeclass
+out_hdf5['/forest/ndbigtree'] = ndbigtree
+
+out_hdf5['/forest/nrnodes'] = maxTreeSize
+out_hdf5['/forest/ntree'] = ntree
+out_hdf5['/forest/nclass'] = nclass
+out_hdf5['/forest/classweights'] = classweights
+out_hdf5['/forest/mtry'] = mtry
+
+out_hdf5.close()
diff --git a/ClassifyMembranes/train_gpu_randomforest_lgn_ds2.py b/ClassifyMembranes/train_gpu_randomforest_lgn_ds2.py
new file mode 100644
index 0000000..dc5a176
--- /dev/null
+++ b/ClassifyMembranes/train_gpu_randomforest_lgn_ds2.py
@@ -0,0 +1,653 @@
+############################################################
+# GPU Implementation of Random Forest Classifier - Training
+# v0.1
+# Seymour Knowles-Barley
+############################################################
+# Based on c code from:
+# http://code.google.com/p/randomforest-matlab/
+# License: GPLv2
+############################################################
+
+import numpy as np
+import sys
+import h5py
+import glob
+import mahotas
+import subprocess
+import os
+
+import pycuda.autoinit
+import pycuda.driver as cu
+import pycuda.compiler as nvcc
+import pycuda.gpuarray as gpuarray
+
+gpu_randomforest_train_source = """
+#include "curand_kernel.h"
+
+#define NODE_TERMINAL -1
+#define NODE_TOSPLIT  -2
+#define NODE_INTERIOR -3
+
+__device__ void movedata() {
+}
+
+__device__ void sampledata(const int nclass, const int* nsamples, const int* samplefrom,
+	const int maxnsamples, int* bagstart, curandState_t *randstate)
+{
+	//Select random samples
+	int iclass, isamp;
+	for (iclass=0; iclass < nclass; ++iclass) {
+		for (isamp=0; isamp < nsamples[iclass]; ++isamp) {
+			bagstart[isamp + iclass*maxnsamples] = curand(randstate) % samplefrom[iclass];
+		}
+	}
+}
+
+__device__ void sortbagbyx(
+	const float *baggedxstart, int totsamples, int mdim, int featurei, int *bagstart, int ndstart, int ndend, int *tempbagstart)
+{
+	//Sort elements of bagstart (from ndstart to ndend) according to x values
+	//Write results into bagstart
+	int length = ndend-ndstart+1;
+	if (length == 1)
+	{
+		return;
+	}
+	int xstart = featurei * totsamples;
+	int *inbag = bagstart;
+	int *outbag = tempbagstart;
+
+	//For-loop merge sort
+	int i = 1;
+	int start1, start2, end1, end2, p1, p2, output;
+	while (i < length)
+	{
+
+		for (start1 = ndstart; start1 <= ndend; start1 += i*2)
+		{
+			end1 = start1 + i - 1;
+			start2 = start1 + i;
+			end2 = start2 + i - 1;
+			p1 = start1; p2 = start2;
+			output = start1;
+			while (p1 <= end1 && p1 <= ndend && p2 <= end2 && p2 <= ndend && output <= ndend)
+			{
+				if (baggedxstart[xstart + inbag[p1]] < baggedxstart[xstart + inbag[p2]])
+				{
+					outbag[output] = inbag[p1];
+					++p1;
+				}
+				else
+				{
+					outbag[output] = inbag[p2];
+					++p2;
+				}
+				++output;
+			}
+			while (p1 <= end1 && p1 <= ndend)
+			{
+				outbag[output] = inbag[p1];
+				++p1;
+				++output;
+			}
+			while (p2 <= end2 && p2 <= ndend)
+			{
+				outbag[output] = inbag[p2];
+				++p2;
+				++output;
+			}
+		}
+
+		//swap for next run
+		if (inbag == bagstart)
+		{
+			inbag = tempbagstart;
+			outbag = bagstart;
+		}
+		else
+		{
+			inbag = bagstart;
+			outbag = tempbagstart;
+		}
+
+		//Loop again with larger chunks
+		i *= 2;
+
+	}
+
+	//Copy output to bagstart (if necessary)
+	if (inbag == tempbagstart)
+	{
+		for (p1 = ndstart; p1 <= ndend; ++p1)
+		{
+			bagstart[p1] = tempbagstart[p1];
+		}
+	}
+
+}
+
+__device__ void findBestSplit(
+	const float *baggedxstart, const int *baggedclassstart, int mdim, int nclass, int *bagstart,
+	int totsamples, int k, int ndstart, int ndend, int *ndendl,
+	int *msplit, float *gini_score, float *best_split, int *best_split_index, bool *isTerminal,
+	int mtry, int idx, int maxTreeSize, int *classpop, float* classweights,
+	curandState_t *randstate,
+	int *wlstart, int *wrstart, int *dimtempstart, int *tempbagstart)
+{
+	//Compute initial values of numerator and denominator of Gini
+	float gini_n = 0.0;
+	float gini_d = 0.0;
+	float gini_rightn, gini_rightd, gini_leftn, gini_leftd;
+	int ctreestart = k * nclass + nclass * idx * maxTreeSize;
+	int i;
+	for (i = 0; i < nclass; ++i)
+	{
+		gini_n += classpop[i + ctreestart] * classpop[i + ctreestart];
+		gini_d += classpop[i + ctreestart];
+	}
+	float gini_crit0 = gini_n / gini_d;
+
+	//start main loop through variables to find best split
+	float gini_critmax = -1.0e25;
+	float crit;
+	int trynum, featurei;
+	int maxfeature = mdim;
+
+	for (i = 0; i < mdim; ++i)
+	{
+		dimtempstart[i] = i;
+	}
+
+	*msplit = -1;
+
+	//for (trynum = 0; trynum < 1; ++trynum)
+	for (trynum = 0; trynum < mtry && trynum < mdim; ++trynum)
+	{
+		//Choose a random feature
+		i = curand(randstate) % maxfeature;
+		featurei = dimtempstart[i];
+		dimtempstart[i] = dimtempstart[maxfeature-1];
+		dimtempstart[maxfeature-1] = featurei;
+		--maxfeature;
+
+		//Sort according to this feature
+		sortbagbyx(baggedxstart, totsamples, mdim, featurei, bagstart, ndstart, ndend, tempbagstart);
+
+		//Split on numerical predictor featurei
+		gini_rightn = gini_n;
+		gini_rightd = gini_d;
+		gini_leftn = 0;
+		gini_leftd = 0;
+		for (i = 0; i < nclass; ++i)
+		{
+			wrstart[i] = classpop[i + ctreestart];
+			wlstart[i] = 0;
+		}
+		int splitpoint;
+		int splitxi;
+		float split_weight, thisx, nextx;
+		int split_class;
+		int ntie = 1;
+		//Loop through all possible split points
+		for (splitpoint = ndstart; splitpoint <= ndend-1; ++splitpoint)
+		{
+			//Get split details
+			splitxi = bagstart[splitpoint];
+			//Determine class based on index and nsamples vector
+			split_class = baggedclassstart[splitxi]-1;
+			split_weight = classweights[split_class];
+			//Update neumerator and demominator
+			gini_leftn += split_weight * (2 * wlstart[split_class] + split_weight);
+			gini_rightn += split_weight * (-2 * wrstart[split_class] + split_weight);
+			gini_leftd += split_weight;
+			gini_rightd -= split_weight;
+			wlstart[split_class] += split_weight;
+			wrstart[split_class] -= split_weight;
+
+			//Check if the next value is the same (no point splitting)
+			thisx = baggedxstart[splitxi + totsamples * featurei];
+			nextx = baggedxstart[bagstart[splitpoint+1] + totsamples * featurei];
+			if (thisx != nextx)
+			{
+				//Check if either node is empty (or very small to allow for float errors)
+				if (gini_rightd > 1.0e-5 && gini_leftd > 1.0e-5)
+				{
+					//Check the split
+					crit = (gini_leftn / gini_leftd) + (gini_rightn / gini_rightd);
+					if (crit > gini_critmax)
+					{
+						*best_split = (thisx + nextx) / 2;
+						*best_split_index = splitpoint;
+						gini_critmax = crit;
+						*msplit = featurei;
+						*ndendl = splitpoint;
+						ntie = 1;
+					}
+					else if (crit == gini_critmax)
+					{
+						++ntie;
+						//Break ties at random
+						if ((curand(randstate) % ntie) == 0)
+						{
+							*best_split = (thisx + nextx) / 2;
+							*best_split_index = splitpoint;
+							gini_critmax = crit;
+							*msplit = featurei;
+							*ndendl = splitpoint;
+						}
+					}
+				}
+			}
+		} // end splitpoint for
+	} // end trynum for
+
+	if (gini_critmax < -1.0e10 || *msplit == -1)
+	{
+		//We could not find a suitable split - mark as a terminal node
+		*isTerminal = true;
+	}
+	else if (*msplit != featurei)
+	{
+		//Resort for msplit (if necessary)
+		sortbagbyx(baggedxstart, totsamples, mdim, *msplit, bagstart, ndstart, ndend, tempbagstart);
+	}
+	*gini_score = gini_critmax - gini_crit0;
+
+}
+
+extern "C" __global__ void trainKernel(
+	const float *x, int n, int mdim, int nclass,
+	const int *classes, const int *classindex,
+	const int *nsamples, const int *samplefrom,
+	int maxnsamples, 
+	unsigned long long seed, unsigned long long sequencestart,
+	int ntree, int maxTreeSize, int mtry, int nodeStopSize,
+	int *treemap, int *nodestatus, float *xbestsplit, 
+	int *bestvar, int *nodeclass, int *ndbigtree,
+	int *nodestart, int *nodepop,
+	int *classpop, float *classweights,
+	int *weight_left, int *weight_right,
+	int *dimtemp, int *bagspace, int *tempbag, float *baggedx, int *baggedclass)
+{
+// Optional arguments for debug (place after xbestsplit): int *nbestsplit, float *bestgini,
+	int idx = threadIdx.x + blockDim.x * blockIdx.x;
+
+	//Make sure we don't overrun
+	if (idx < ntree) {
+		//Init random number generators (one for each thread)
+		curandState_t state;
+		curand_init(seed, sequencestart + idx, 0, &state);
+
+		int i,j,k,cioffset,bioffset;
+
+		int totsamples = 0;
+		for (i = 0; i < nclass; ++i){
+			totsamples += nsamples[i];
+		}
+
+		//Choose random samples for all classes
+		int *bagstart = bagspace + idx * nclass * maxnsamples;
+		int *tempbagstart = tempbag + idx * nclass * maxnsamples;
+		float *baggedxstart = baggedx + idx * mdim * totsamples;
+		int *baggedclassstart = baggedclass + idx * totsamples;
+		//TODO: offset weightleft, weightright and dimtemp !
+		sampledata(nclass, nsamples, samplefrom, maxnsamples, bagstart, &state);
+
+		//Remove gaps and index into x (instead of into class)
+		k = 0;
+		cioffset = 0;
+		bioffset = 0;
+		for (i = 0; i < nclass; ++i){
+			for (j = 0; j < nsamples[i]; ++j) {
+				//Move memory into local block?
+				int xindex = classindex[bagstart[j + i * maxnsamples] + cioffset];
+				int dimindex;
+				for (dimindex = 0; dimindex < mdim; ++dimindex){
+					baggedxstart[j + bioffset + totsamples * dimindex] = x[xindex + n * dimindex];
+				}
+				baggedclassstart[j + bioffset] = classes[xindex];
+				bagstart[k] = j + bioffset;
+				++k;
+			}
+			cioffset += samplefrom[i];
+			bioffset += nsamples[i];
+			classpop[i + idx * nclass * maxTreeSize] = nsamples[i];
+		}
+
+		//Wipe other values
+		for (;k < nclass * maxnsamples; ++k) {
+			bagstart[k] = -1;
+		}
+
+		int ndstart, ndend, ndendl;
+		int msplit, best_split_index;
+		float best_split, gini_score;
+
+		//Repeat findbestsplit until the tree is complete
+		int ncur = 0;
+		int treeoffset1 = idx * maxTreeSize;
+		int treeOffset2 = idx * 2 * maxTreeSize;
+		nodestart[treeoffset1] = 0;
+		nodepop[treeoffset1] = totsamples;
+		nodestatus[treeoffset1] = NODE_TOSPLIT;
+
+		for (k = 0; k < maxTreeSize-2; ++k) {
+			//Check for end of tree
+			if (k > ncur || ncur >= maxTreeSize - 2) break;
+			//Skip nodes we don't need to split
+			if (nodestatus[treeoffset1+k] != NODE_TOSPLIT) continue;
+
+			/* initialize for next call to findbestsplit */
+			ndstart = nodestart[treeoffset1 + k];
+			ndend = ndstart + nodepop[treeoffset1 + k] - 1;
+			bool isTerminal = false;
+			gini_score = 0.0;
+			best_split_index = -1;
+
+			findBestSplit(baggedxstart, baggedclassstart, mdim, nclass, bagstart, totsamples, k, ndstart, ndend, &ndendl,
+				&msplit, &gini_score, &best_split, &best_split_index, &isTerminal, mtry, idx, maxTreeSize, classpop, classweights, 
+				&state, weight_left + nclass * idx, weight_right + nclass * idx, dimtemp + mdim * idx, tempbagstart);
+
+			if (isTerminal) {
+				/* Node is terminal: Mark it as such and move on to the next. */
+				nodestatus[k] = NODE_TERMINAL;
+				//bestvar[treeoffset1 + k] = 0;
+				//xbestsplit[treeoffset1 + k] = 0;
+				continue;
+			}
+
+			// this is a split node - prepare for next round
+			bestvar[treeoffset1 + k] = msplit + 1;
+			//bestgini[treeoffset1 + k] = gini_score;
+			xbestsplit[treeoffset1 + k] = best_split;
+			//nbestsplit[treeoffset1 + k] = best_split_index;
+			nodestatus[treeoffset1 + k] = NODE_INTERIOR;
+			//varUsed[msplit - 1] = 1;
+			//tgini[msplit - 1] += decsplit;
+
+			int leftk = ncur + 1;
+			int rightk = ncur + 2;
+			nodepop[treeoffset1 + leftk] = ndendl - ndstart + 1;
+			nodepop[treeoffset1 + rightk] = ndend - ndendl;
+			nodestart[treeoffset1 + leftk] = ndstart;
+			nodestart[treeoffset1 + rightk] = ndendl + 1;
+
+			// Check for terminal node conditions
+			nodestatus[treeoffset1 + leftk] = NODE_TOSPLIT;
+			if (nodepop[treeoffset1 + leftk] <= nodeStopSize) {
+				nodestatus[treeoffset1 + leftk] = NODE_TERMINAL;
+			}
+
+			nodestatus[treeoffset1 + rightk] = NODE_TOSPLIT;
+			if (nodepop[treeoffset1 + rightk] <= nodeStopSize) {
+				nodestatus[treeoffset1 + rightk] = NODE_TERMINAL;
+			}
+
+			//Calculate class populations
+			int nodeclass = 0;
+			int ctreestart_left = leftk * nclass + idx * nclass * maxTreeSize;
+			int ctreestart_right = rightk * nclass + idx * nclass * maxTreeSize;
+			for (i = ndstart; i <= ndendl; ++i) {
+				nodeclass = baggedclassstart[bagstart[i]]-1;
+				classpop[nodeclass + ctreestart_left] += classweights[nodeclass];
+			}
+			for (i = ndendl+1; i <= ndend; ++i) {
+				nodeclass = baggedclassstart[bagstart[i]]-1;
+				classpop[nodeclass + ctreestart_right] += classweights[nodeclass];
+			}
+
+			for(i = 0; i < nclass; ++i)
+			{
+				if (classpop[i + ctreestart_left] == nodepop[treeoffset1 + leftk])
+				{
+					nodestatus[treeoffset1 + leftk] = NODE_TERMINAL;
+				}
+				if (classpop[i + ctreestart_right] == nodepop[treeoffset1 + rightk])
+				{
+					nodestatus[treeoffset1 + rightk] = NODE_TERMINAL;
+				}
+			}
+
+			//Update treemap offset (indexed from 1 rather than 0)
+			treemap[treeOffset2 + k*2] = ncur + 2;
+			treemap[treeOffset2 + 1 + k*2] = ncur + 3;
+			ncur += 2;
+
+		}
+
+		//Tidy up
+		//TODO: Check results - should not be necessary to go up to maxTreeSize
+		ndbigtree[idx] = ncur+1;
+		//ndbigtree[idx] = maxTreeSize;
+		for(k = maxTreeSize-1; k >= 0; --k)
+		{
+			//if (nodestatus[treeoffset1 + k] == 0)
+			//	--ndbigtree[idx];
+			if (nodestatus[treeoffset1 + k] == NODE_TOSPLIT)
+				nodestatus[treeoffset1 + k] = NODE_TERMINAL;
+		}
+
+		//Calculate prediction for terminal nodes
+		for (k = 0; k < maxTreeSize; ++k)
+		{
+			treeoffset1 = idx * maxTreeSize;
+			if (nodestatus[treeoffset1 + k] == NODE_TERMINAL)
+			{
+				int toppop = 0;
+				int ntie = 1;
+				for (i = 0; i < nclass; ++i)
+				{
+					int ctreeoffset = k * nclass + idx * nclass * maxTreeSize;
+					if (classpop[i + ctreeoffset] > toppop)
+					{
+						nodeclass[treeoffset1 + k] = i+1;
+						toppop = classpop[i + ctreeoffset];
+					}
+					//Break ties at random
+					if (classpop[i + ctreeoffset] == toppop)
+					{
+						++ntie;
+						if ((curand(&state) % ntie) == 0)
+						{
+							nodeclass[treeoffset1 + k] = i+1;
+							toppop = classpop[i + ctreeoffset];
+						}
+					}
+				}
+			}
+		}
+
+		//ndbigtree[idx] = idx;
+
+	}
+
+}
+"""
+
+downsample_factor = 2
+
+input_image_folder1 = 'D:\\dev\\datasets\\LGN1\\RandomForestTraining\\Josh2'
+input_image_folder2 = 'D:\\dev\\datasets\\LGN1\\RandomForestTraining\\Combined_LessMito'
+ds_input_image_folder = 'D:\\dev\\datasets\\LGN1\\input_images_ds{0}'.format(downsample_factor)
+
+raw_image_suffix = '.tif'
+input_image_suffix = '_labeled.png'
+input_features_suffix = '_rhoana_features_ds{0}.h5'.format(downsample_factor)
+
+output_path = 'D:\\dev\\datasets\\LGN1\\RandomForestTraining\\Josh2\\rhoana_forest_ds{0}_2class.hdf5'.format(downsample_factor)
+
+features_prog = 'D:\\dev\\Rhoana\\rhoana\\ClassifyMembranes\\x64\\Release\\compute_features.exe'
+
+# Prep the gpu function
+gpu_train = nvcc.SourceModule(gpu_randomforest_train_source, no_extern_c=True).get_function('trainKernel')
+
+# Load training data
+files = sorted( glob.glob( input_image_folder1 + '\\*' + input_image_suffix ) )
+#files = files + sorted( glob.glob( input_image_folder2 + '\\*' + input_image_suffix ) )
+
+#2 Class
+class_colors = [[255,0,0], [0,255,0]]
+#class_colors = [[255,85,255], [255,255,0]]
+
+# 3 Class
+#class_colors = [[255,0,0], [0,255,0], [0,0,255]]
+#class_colors = [[255,85,255], [255,255,0], [0,255,255]]
+
+# class_colors = [0, 1]
+
+nclass = len(class_colors)
+
+training_x = np.zeros((0,0), dtype=np.float32)
+training_y = np.zeros((0,1), dtype=np.int32)
+
+print 'Found {0} training images.'.format(len(files))
+
+# Loop through all images
+for file in files:
+	training_image = mahotas.imread(file)
+
+	for classi in range(nclass):
+
+		this_color = class_colors[classi]
+
+		# Find pixels for this class
+		# class_indices = np.nonzero(np.logical_and(
+		# 	training_image[:,:,this_color] > training_image[:,:,(this_color + 1) % 3],
+		# 	training_image[:,:,this_color] > training_image[:,:,(this_color + 2) % 3]))
+
+		class_indices = np.nonzero(np.logical_and(
+			training_image[:,:,0] == this_color[0],
+			training_image[:,:,1] == this_color[1],
+			training_image[:,:,2] == this_color[2]))
+
+		if downsample_factor != 1:
+			class_indices = (class_indices[0] / downsample_factor, class_indices[1] / downsample_factor)
+
+		# Add features to x and classes to y
+
+		training_y = np.concatenate((training_y, np.ones((len(class_indices[0]), 1), dtype=np.int32) * (classi + 1)))
+
+		# Load the features
+		image_file = file.replace(input_image_suffix, raw_image_suffix).replace(input_image_folder1, ds_input_image_folder).replace(input_image_folder2, ds_input_image_folder)
+		features_file = file.replace(input_image_suffix, input_features_suffix)
+
+		if not os.path.exists(features_file):
+			print "Computing features:", features_prog, image_file, features_file
+			subprocess.check_call([features_prog, image_file, features_file], env=os.environ)
+
+		f = h5py.File(features_file, 'r')
+
+		nfeatures = len(f.keys())
+		train_features = np.zeros((nfeatures, len(class_indices[0])), dtype=np.float32)
+
+		for i,k in enumerate(f.keys()):
+			feature = f[k][...]
+			train_features[i,:] = feature[class_indices[0], class_indices[1]]
+
+		f.close()
+		
+		if training_x.size > 0:
+			training_x = np.concatenate((training_x, train_features), axis=1)
+		else:
+			training_x = train_features
+
+for classi in range(nclass):
+	print 'Class {0}: {1} training pixels.'.format(classi, np.sum(training_y == classi + 1))
+
+# Train on GPU
+ntree = np.int32(512)
+mtry = np.int32(np.floor(np.sqrt(training_x.shape[0])))
+#nsamples = np.ones((1,nclass), dtype=np.int32) * (training_x.shape[1] / nclass)
+nsamples = np.ones((1,nclass), dtype=np.int32) * 1000
+classweights = np.ones((1,nclass), dtype=np.float32)
+
+# Sanity check
+assert(training_x.shape[1] == training_y.shape[0])
+
+# Random number seeds
+seed = np.int64(42)
+sequencestart = np.int64(43)
+
+samplefrom = np.zeros((nclass), dtype=np.int32)
+maxTreeSize = np.int32(2 * np.sum(nsamples) + 1)
+nodeStopSize = np.int32(1)
+
+for classi in range(nclass):
+	samplefrom[classi] = np.sum(training_y == (classi + 1))
+
+maxnsamples = np.max(nsamples)
+classindex = -1 * np.ones((np.max(samplefrom) * nclass), dtype=np.int32)
+
+cioffset = 0
+for classi in range(nclass):
+	classindex[cioffset:cioffset + samplefrom[classi]] = np.nonzero(training_y == (classi + 1))[0]
+	cioffset = cioffset + samplefrom[classi]
+
+bagmem = -1 * np.ones((ntree, maxnsamples * nclass), dtype=np.int32)
+d_bagspace = gpuarray.to_gpu(bagmem)
+d_tempbag = gpuarray.to_gpu(bagmem)
+bagmem = None
+
+d_treemap = gpuarray.zeros((long(ntree * 2), long(maxTreeSize)), np.int32)
+d_nodestatus = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_xbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32)
+#d_nbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+#d_bestgini = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32)
+d_bestvar = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_nodeclass = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_ndbigtree = gpuarray.zeros((long(ntree), 1), np.int32)
+d_nodestart = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_nodepop = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_classpop = gpuarray.zeros((long(ntree), long(maxTreeSize*nclass)), np.int32)
+d_classweights = gpuarray.to_gpu(classweights)
+d_weight_left = gpuarray.zeros((long(ntree), long(nclass)), np.int32)
+d_weight_right = gpuarray.zeros((long(ntree), long(nclass)), np.int32)
+d_dimtemp = gpuarray.zeros((long(ntree), long(training_x.shape[0])), np.int32)
+
+d_baggedx = gpuarray.zeros((long(np.sum(nsamples)*training_x.shape[0]), long(ntree)), np.float32)
+d_baggedclass = gpuarray.zeros((long(ntree), long(np.sum(nsamples))), np.int32)
+
+d_training_x = gpuarray.to_gpu(training_x)
+d_training_y = gpuarray.to_gpu(training_y)
+d_classindex = gpuarray.to_gpu(classindex)
+d_nsamples = gpuarray.to_gpu(nsamples)
+d_samplefrom = gpuarray.to_gpu(samplefrom)
+
+threadsPerBlock = 32
+block = (32, 1, 1)
+grid = (int(ntree / block[0] + 1), 1)
+
+gpu_train(d_training_x, np.int32(training_x.shape[1]), np.int32(training_x.shape[0]), np.int32(nclass),
+	d_training_y, d_classindex, d_nsamples, d_samplefrom,
+	np.int32(maxnsamples), seed, sequencestart, np.int32(ntree), np.int32(maxTreeSize), np.int32(mtry), np.int32(nodeStopSize),
+	d_treemap, d_nodestatus, d_xbestsplit,
+	d_bestvar, d_nodeclass, d_ndbigtree,
+	d_nodestart, d_nodepop,
+	d_classpop, d_classweights,
+	d_weight_left, d_weight_right,
+	d_dimtemp, d_bagspace, d_tempbag, d_baggedx, d_baggedclass,
+	block=block, grid=grid)
+
+treemap = d_treemap.get()
+nodestatus = d_nodestatus.get()
+xbestsplit = d_xbestsplit.get()
+bestvar = d_bestvar.get()
+nodeclass = d_nodeclass.get()
+ndbigtree = d_ndbigtree.get()
+
+# Save results
+out_hdf5 = h5py.File(output_path, 'w')
+out_hdf5['/forest/treemap'] = treemap
+out_hdf5['/forest/nodestatus'] = nodestatus
+out_hdf5['/forest/xbestsplit'] = xbestsplit
+out_hdf5['/forest/bestvar'] = bestvar
+out_hdf5['/forest/nodeclass'] = nodeclass
+out_hdf5['/forest/ndbigtree'] = ndbigtree
+
+out_hdf5['/forest/nrnodes'] = maxTreeSize
+out_hdf5['/forest/ntree'] = ntree
+out_hdf5['/forest/nclass'] = nclass
+out_hdf5['/forest/classweights'] = classweights
+out_hdf5['/forest/mtry'] = mtry
+
+out_hdf5.close()
diff --git a/ClassifyMembranes/train_gpu_randomforest_norm2.py b/ClassifyMembranes/train_gpu_randomforest_norm2.py
new file mode 100644
index 0000000..7efb7d5
--- /dev/null
+++ b/ClassifyMembranes/train_gpu_randomforest_norm2.py
@@ -0,0 +1,673 @@
+############################################################
+# GPU Implementation of Random Forest Classifier - Training
+# v0.1
+# Seymour Knowles-Barley
+############################################################
+# Based on c code from:
+# http://code.google.com/p/randomforest-matlab/
+# License: GPLv2
+############################################################
+
+import numpy as np
+import sys
+import h5py
+import glob
+import mahotas
+
+import pycuda.autoinit
+import pycuda.driver as cu
+import pycuda.compiler as nvcc
+import pycuda.gpuarray as gpuarray
+
+gpu_randomforest_train_source = """
+#include "curand_kernel.h"
+
+#define NODE_TERMINAL -1
+#define NODE_TOSPLIT  -2
+#define NODE_INTERIOR -3
+
+__device__ void movedata() {
+}
+
+__device__ void sampledata(const int nclass, const int* nsamples, const int* samplefrom,
+	const int maxnsamples, int* bagstart, curandState_t *randstate)
+{
+	//Select random samples
+	int iclass, isamp;
+	for (iclass=0; iclass < nclass; ++iclass) {
+		for (isamp=0; isamp < nsamples[iclass]; ++isamp) {
+			bagstart[isamp + iclass*maxnsamples] = curand(randstate) % samplefrom[iclass];
+		}
+	}
+}
+
+__device__ void sortbagbyx(
+	const float *baggedxstart, int totsamples, int mdim, int featurei, int *bagstart, int ndstart, int ndend, int *tempbagstart)
+{
+	//Sort elements of bagstart (from ndstart to ndend) according to x values
+	//Write results into bagstart
+	int length = ndend-ndstart+1;
+	if (length == 1)
+	{
+		return;
+	}
+	int xstart = featurei * totsamples;
+	int *inbag = bagstart;
+	int *outbag = tempbagstart;
+
+	//For-loop merge sort
+	int i = 1;
+	int start1, start2, end1, end2, p1, p2, output;
+	while (i < length)
+	{
+
+		for (start1 = ndstart; start1 <= ndend; start1 += i*2)
+		{
+			end1 = start1 + i - 1;
+			start2 = start1 + i;
+			end2 = start2 + i - 1;
+			p1 = start1; p2 = start2;
+			output = start1;
+			while (p1 <= end1 && p1 <= ndend && p2 <= end2 && p2 <= ndend && output <= ndend)
+			{
+				if (baggedxstart[xstart + inbag[p1]] < baggedxstart[xstart + inbag[p2]])
+				{
+					outbag[output] = inbag[p1];
+					++p1;
+				}
+				else
+				{
+					outbag[output] = inbag[p2];
+					++p2;
+				}
+				++output;
+			}
+			while (p1 <= end1 && p1 <= ndend)
+			{
+				outbag[output] = inbag[p1];
+				++p1;
+				++output;
+			}
+			while (p2 <= end2 && p2 <= ndend)
+			{
+				outbag[output] = inbag[p2];
+				++p2;
+				++output;
+			}
+		}
+
+		//swap for next run
+		if (inbag == bagstart)
+		{
+			inbag = tempbagstart;
+			outbag = bagstart;
+		}
+		else
+		{
+			inbag = bagstart;
+			outbag = tempbagstart;
+		}
+
+		//Loop again with larger chunks
+		i *= 2;
+
+	}
+
+	//Copy output to bagstart (if necessary)
+	if (inbag == tempbagstart)
+	{
+		for (p1 = ndstart; p1 <= ndend; ++p1)
+		{
+			bagstart[p1] = tempbagstart[p1];
+		}
+	}
+
+}
+
+__device__ void findBestSplit(
+	const float *baggedxstart, const int *baggedclassstart, int mdim, int nclass, int *bagstart,
+	int totsamples, int k, int ndstart, int ndend, int *ndendl,
+	int *msplit, float *gini_score, float *best_split, int *best_split_index, bool *isTerminal,
+	int mtry, int idx, int maxTreeSize, int *classpop, float* classweights,
+	curandState_t *randstate,
+	int *wlstart, int *wrstart, int *dimtempstart, int *tempbagstart)
+{
+	//Compute initial values of numerator and denominator of Gini
+	float gini_n = 0.0;
+	float gini_d = 0.0;
+	float gini_rightn, gini_rightd, gini_leftn, gini_leftd;
+	int ctreestart = k * nclass + nclass * idx * maxTreeSize;
+	int i;
+	for (i = 0; i < nclass; ++i)
+	{
+		gini_n += classpop[i + ctreestart] * classpop[i + ctreestart];
+		gini_d += classpop[i + ctreestart];
+	}
+
+	/* norm 1
+	float gini_crit0 = gini_n / gini_d;
+	*/
+
+	float gini_crit0 = 0;
+
+	if ( gini_d > 1 )
+	{
+		gini_crit0 = gini_n / ( gini_d * ( gini_d - 1 ) );
+	}
+
+	//start main loop through variables to find best split
+	float gini_critmax = -1.0e25;
+	float crit;
+	int trynum, featurei;
+	int maxfeature = mdim;
+
+	for (i = 0; i < mdim; ++i)
+	{
+		dimtempstart[i] = i;
+	}
+
+	*msplit = -1;
+
+	//for (trynum = 0; trynum < 1; ++trynum)
+	for (trynum = 0; trynum < mtry && trynum < mdim; ++trynum)
+	{
+		//Choose a random feature
+		i = curand(randstate) % maxfeature;
+		featurei = dimtempstart[i];
+		dimtempstart[i] = dimtempstart[maxfeature-1];
+		dimtempstart[maxfeature-1] = featurei;
+		--maxfeature;
+
+		//Sort according to this feature
+		sortbagbyx(baggedxstart, totsamples, mdim, featurei, bagstart, ndstart, ndend, tempbagstart);
+
+		//Split on numerical predictor featurei
+		gini_rightn = gini_n;
+		gini_rightd = gini_d;
+		gini_leftn = 0;
+		gini_leftd = 0;
+		for (i = 0; i < nclass; ++i)
+		{
+			wrstart[i] = classpop[i + ctreestart];
+			wlstart[i] = 0;
+		}
+		int splitpoint;
+		int splitxi;
+		float split_weight, thisx, nextx;
+		int split_class;
+		int ntie = 1;
+		//Loop through all possible split points
+		for (splitpoint = ndstart; splitpoint <= ndend-1; ++splitpoint)
+		{
+			//Get split details
+			splitxi = bagstart[splitpoint];
+			//Determine class based on index and nsamples vector
+			split_class = baggedclassstart[splitxi]-1;
+			split_weight = classweights[split_class];
+			//Update neumerator and demominator
+			gini_leftn += split_weight * (2 * wlstart[split_class] + split_weight);
+			gini_rightn += split_weight * (-2 * wrstart[split_class] + split_weight);
+			gini_leftd += split_weight;
+			gini_rightd -= split_weight;
+			wlstart[split_class] += split_weight;
+			wrstart[split_class] -= split_weight;
+
+			//Check if the next value is the same (no point splitting)
+			thisx = baggedxstart[splitxi + totsamples * featurei];
+			nextx = baggedxstart[bagstart[splitpoint+1] + totsamples * featurei];
+			if (thisx != nextx)
+			{
+				//Check if either node is empty (or very small to allow for float errors)
+				if (gini_rightd > 1.0e-5 && gini_leftd > 1.0e-5)
+				{
+					//Check the split
+
+					crit = (gini_leftn / (gini_leftd * gini_leftd) ) + (gini_rightn / (gini_rightd * gini_rightd) );
+
+					/* norm 2
+					crit = 0;
+
+					if ( gini_leftd > 1 )
+					{
+						crit += gini_leftn / ( gini_leftd * ( gini_leftd - 1 ) );
+					}
+					else
+					{
+						crit += gini_leftn;
+					}
+
+					if ( gini_rightd > 1 )
+					{
+						crit += gini_rightn / ( gini_rightd * ( gini_rightd - 1 ) );
+					}
+					else
+					{
+						crit +=gini_rightn;
+					}
+					*/
+					
+					if (crit > gini_critmax)
+					{
+						*best_split = (thisx + nextx) / 2;
+						*best_split_index = splitpoint;
+						gini_critmax = crit;
+						*msplit = featurei;
+						*ndendl = splitpoint;
+						ntie = 1;
+					}
+					else if (crit == gini_critmax)
+					{
+						++ntie;
+						//Break ties at random
+						if ((curand(randstate) % ntie) == 0)
+						{
+							*best_split = (thisx + nextx) / 2;
+							*best_split_index = splitpoint;
+							gini_critmax = crit;
+							*msplit = featurei;
+							*ndendl = splitpoint;
+						}
+					}
+				}
+			}
+		} // end splitpoint for
+	} // end trynum for
+
+	if (gini_critmax < -1.0e10 || *msplit == -1)
+	{
+		//We could not find a suitable split - mark as a terminal node
+		*isTerminal = true;
+	}
+	else if (*msplit != featurei)
+	{
+		//Resort for msplit (if necessary)
+		sortbagbyx(baggedxstart, totsamples, mdim, *msplit, bagstart, ndstart, ndend, tempbagstart);
+	}
+	*gini_score = gini_critmax - gini_crit0;
+
+}
+
+extern "C" __global__ void trainKernel(
+	const float *x, int n, int mdim, int nclass,
+	const int *classes, const int *classindex,
+	const int *nsamples, const int *samplefrom,
+	int maxnsamples, 
+	unsigned long long seed, unsigned long long sequencestart,
+	int ntree, int maxTreeSize, int mtry, int nodeStopSize,
+	int *treemap, int *nodestatus, float *xbestsplit, 
+	int *bestvar, int *nodeclass, int *ndbigtree,
+	int *nodestart, int *nodepop,
+	int *classpop, float *classweights,
+	int *weight_left, int *weight_right,
+	int *dimtemp, int *bagspace, int *tempbag, float *baggedx, int *baggedclass)
+{
+// Optional arguments for debug (place after xbestsplit): int *nbestsplit, float *bestgini,
+	int idx = threadIdx.x + blockDim.x * blockIdx.x;
+
+	//Make sure we don't overrun
+	if (idx < ntree) {
+		//Init random number generators (one for each thread)
+		curandState_t state;
+		curand_init(seed, sequencestart + idx, 0, &state);
+
+		int i,j,k,cioffset,bioffset;
+
+		int totsamples = 0;
+		for (i = 0; i < nclass; ++i){
+			totsamples += nsamples[i];
+		}
+
+		//Choose random samples for all classes
+		int *bagstart = bagspace + idx * nclass * maxnsamples;
+		int *tempbagstart = tempbag + idx * nclass * maxnsamples;
+		float *baggedxstart = baggedx + idx * mdim * totsamples;
+		int *baggedclassstart = baggedclass + idx * totsamples;
+		//TODO: offset weightleft, weightright and dimtemp !
+		sampledata(nclass, nsamples, samplefrom, maxnsamples, bagstart, &state);
+
+		//Remove gaps and index into x (instead of into class)
+		k = 0;
+		cioffset = 0;
+		bioffset = 0;
+		for (i = 0; i < nclass; ++i){
+			for (j = 0; j < nsamples[i]; ++j) {
+				//Move memory into local block?
+				int xindex = classindex[bagstart[j + i * maxnsamples] + cioffset];
+				int dimindex;
+				for (dimindex = 0; dimindex < mdim; ++dimindex){
+					baggedxstart[j + bioffset + totsamples * dimindex] = x[xindex + n * dimindex];
+				}
+				baggedclassstart[j + bioffset] = classes[xindex];
+				bagstart[k] = j + bioffset;
+				++k;
+			}
+			cioffset += samplefrom[i];
+			bioffset += nsamples[i];
+			classpop[i + idx * nclass * maxTreeSize] = nsamples[i];
+		}
+
+		//Wipe other values
+		for (;k < nclass * maxnsamples; ++k) {
+			bagstart[k] = -1;
+		}
+
+		int ndstart, ndend, ndendl;
+		int msplit, best_split_index;
+		float best_split, gini_score;
+
+		//Repeat findbestsplit until the tree is complete
+		int ncur = 0;
+		int treeoffset1 = idx * maxTreeSize;
+		int treeOffset2 = idx * 2 * maxTreeSize;
+		nodestart[treeoffset1] = 0;
+		nodepop[treeoffset1] = totsamples;
+		nodestatus[treeoffset1] = NODE_TOSPLIT;
+
+		for (k = 0; k < maxTreeSize-2; ++k) {
+			//Check for end of tree
+			if (k > ncur || ncur >= maxTreeSize - 2) break;
+			//Skip nodes we don't need to split
+			if (nodestatus[treeoffset1+k] != NODE_TOSPLIT) continue;
+
+			/* initialize for next call to findbestsplit */
+			ndstart = nodestart[treeoffset1 + k];
+			ndend = ndstart + nodepop[treeoffset1 + k] - 1;
+			bool isTerminal = false;
+			gini_score = 0.0;
+			best_split_index = -1;
+
+			findBestSplit(baggedxstart, baggedclassstart, mdim, nclass, bagstart, totsamples, k, ndstart, ndend, &ndendl,
+				&msplit, &gini_score, &best_split, &best_split_index, &isTerminal, mtry, idx, maxTreeSize, classpop, classweights, 
+				&state, weight_left + nclass * idx, weight_right + nclass * idx, dimtemp + mdim * idx, tempbagstart);
+
+			if (isTerminal) {
+				/* Node is terminal: Mark it as such and move on to the next. */
+				nodestatus[k] = NODE_TERMINAL;
+				//bestvar[treeoffset1 + k] = 0;
+				//xbestsplit[treeoffset1 + k] = 0;
+				continue;
+			}
+
+			// this is a split node - prepare for next round
+			bestvar[treeoffset1 + k] = msplit + 1;
+			//bestgini[treeoffset1 + k] = gini_score;
+			xbestsplit[treeoffset1 + k] = best_split;
+			//nbestsplit[treeoffset1 + k] = best_split_index;
+			nodestatus[treeoffset1 + k] = NODE_INTERIOR;
+			//varUsed[msplit - 1] = 1;
+			//tgini[msplit - 1] += decsplit;
+
+			int leftk = ncur + 1;
+			int rightk = ncur + 2;
+			nodepop[treeoffset1 + leftk] = ndendl - ndstart + 1;
+			nodepop[treeoffset1 + rightk] = ndend - ndendl;
+			nodestart[treeoffset1 + leftk] = ndstart;
+			nodestart[treeoffset1 + rightk] = ndendl + 1;
+
+			// Check for terminal node conditions
+			nodestatus[treeoffset1 + leftk] = NODE_TOSPLIT;
+			if (nodepop[treeoffset1 + leftk] <= nodeStopSize) {
+				nodestatus[treeoffset1 + leftk] = NODE_TERMINAL;
+			}
+
+			nodestatus[treeoffset1 + rightk] = NODE_TOSPLIT;
+			if (nodepop[treeoffset1 + rightk] <= nodeStopSize) {
+				nodestatus[treeoffset1 + rightk] = NODE_TERMINAL;
+			}
+
+			//Calculate class populations
+			int nodeclass = 0;
+			int ctreestart_left = leftk * nclass + idx * nclass * maxTreeSize;
+			int ctreestart_right = rightk * nclass + idx * nclass * maxTreeSize;
+			for (i = ndstart; i <= ndendl; ++i) {
+				nodeclass = baggedclassstart[bagstart[i]]-1;
+				classpop[nodeclass + ctreestart_left] += classweights[nodeclass];
+			}
+			for (i = ndendl+1; i <= ndend; ++i) {
+				nodeclass = baggedclassstart[bagstart[i]]-1;
+				classpop[nodeclass + ctreestart_right] += classweights[nodeclass];
+			}
+
+			for(i = 0; i < nclass; ++i)
+			{
+				if (classpop[i + ctreestart_left] == nodepop[treeoffset1 + leftk])
+				{
+					nodestatus[treeoffset1 + leftk] = NODE_TERMINAL;
+				}
+				if (classpop[i + ctreestart_right] == nodepop[treeoffset1 + rightk])
+				{
+					nodestatus[treeoffset1 + rightk] = NODE_TERMINAL;
+				}
+			}
+
+			//Update treemap offset (indexed from 1 rather than 0)
+			treemap[treeOffset2 + k*2] = ncur + 2;
+			treemap[treeOffset2 + 1 + k*2] = ncur + 3;
+			ncur += 2;
+
+		}
+
+		//Tidy up
+		//TODO: Check results - should not be necessary to go up to maxTreeSize
+		ndbigtree[idx] = ncur+1;
+		//ndbigtree[idx] = maxTreeSize;
+		for(k = maxTreeSize-1; k >= 0; --k)
+		{
+			//if (nodestatus[treeoffset1 + k] == 0)
+			//	--ndbigtree[idx];
+			if (nodestatus[treeoffset1 + k] == NODE_TOSPLIT)
+				nodestatus[treeoffset1 + k] = NODE_TERMINAL;
+		}
+
+		//Calculate prediction for terminal nodes
+		for (k = 0; k < maxTreeSize; ++k)
+		{
+			treeoffset1 = idx * maxTreeSize;
+			if (nodestatus[treeoffset1 + k] == NODE_TERMINAL)
+			{
+				int toppop = 0;
+				int ntie = 1;
+				for (i = 0; i < nclass; ++i)
+				{
+					int ctreeoffset = k * nclass + idx * nclass * maxTreeSize;
+					if (classpop[i + ctreeoffset] > toppop)
+					{
+						nodeclass[treeoffset1 + k] = i+1;
+						toppop = classpop[i + ctreeoffset];
+					}
+					//Break ties at random
+					if (classpop[i + ctreeoffset] == toppop)
+					{
+						++ntie;
+						if ((curand(&state) % ntie) == 0)
+						{
+							nodeclass[treeoffset1 + k] = i+1;
+							toppop = classpop[i + ctreeoffset];
+						}
+					}
+				}
+			}
+		}
+
+		//ndbigtree[idx] = idx;
+
+	}
+
+}
+"""
+
+# input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\membraneDetectionECSx4ds2\\'
+# input_image_suffix = '_train.png'
+# input_features_suffix = '_rhoanafeatures.hdf5'
+# output_path = 'D:\\dev\\Rhoana\\classifierTraining\\membraneDetectionECSx4ds2\\rhoana_forest.hdf5'
+
+input_image_folder = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\'
+input_image_suffix = '_labeled_update.tif'
+input_features_suffix = '.hdf5'
+output_path = 'D:\\dev\\Rhoana\\classifierTraining\\Miketraining\\training2\\rhoana_forest_3class_norm2.hdf5'
+
+
+# Prep the gpu function
+gpu_train = nvcc.SourceModule(gpu_randomforest_train_source, no_extern_c=True).get_function('trainKernel')
+
+# Load training data
+files = sorted( glob.glob( input_image_folder + '\\*' + input_image_suffix ) )
+
+# 2 Class
+#class_colors = [[255,0,0], [0,255,0]]
+#class_colors = [[255,85,255], [255,255,0]]
+
+# 3 Class
+#class_colors = [[255,0,0], [0,255,0], [0,0,255]]
+#class_colors = [[255,85,255], [255,255,0], [0,255,255]]
+
+class_colors = [0, 1, 2]
+
+nclass = len(class_colors)
+
+training_x = np.zeros((0,0), dtype=np.float32)
+training_y = np.zeros((0,1), dtype=np.int32)
+
+print 'Found {0} training images.'.format(len(files))
+
+# Loop through all images
+for file in files:
+
+	print "Reading image {0}.".format(file)
+	training_image = mahotas.imread(file)
+
+	for classi in range(nclass):
+
+		this_color = class_colors[classi]
+
+		# Find pixels for this class
+		class_indices = np.nonzero(np.logical_and(
+			training_image[:,:,this_color] > training_image[:,:,(this_color + 1) % 3],
+			training_image[:,:,this_color] > training_image[:,:,(this_color + 2) % 3]))
+
+		# class_indices = np.nonzero(np.logical_and(
+		# 	training_image[:,:,0] == this_color[0],
+		# 	training_image[:,:,1] == this_color[1],
+		# 	training_image[:,:,2] == this_color[2]))
+
+		# Add features to x and classes to y
+
+		training_y = np.concatenate((training_y, np.ones((len(class_indices[0]), 1), dtype=np.int32) * (classi + 1)))
+
+		# Load the features
+		f = h5py.File(file.replace(input_image_suffix, input_features_suffix), 'r')
+
+		nfeatures = len(f.keys())
+		train_features = np.zeros((nfeatures, len(class_indices[0])), dtype=np.float32)
+
+		for i,k in enumerate(f.keys()):
+			feature = f[k][...]
+			train_features[i,:] = feature[class_indices[0], class_indices[1]]
+
+		f.close()
+		
+		if training_x.size > 0:
+			training_x = np.concatenate((training_x, train_features), axis=1)
+		else:
+			training_x = train_features
+
+for classi in range(nclass):
+	print 'Class {0}: {1} training pixels.'.format(classi, np.sum(training_y == classi + 1))
+
+# Train on GPU
+ntree = np.int32(512)
+mtry = np.int32(np.floor(np.sqrt(training_x.shape[0])))
+#nsamples = np.ones((1,nclass), dtype=np.int32) * (training_x.shape[1] / nclass)
+nsamples = np.ones((1,nclass), dtype=np.int32) * 1000
+classweights = np.ones((1,nclass), dtype=np.float32)
+
+# Sanity check
+assert(training_x.shape[1] == training_y.shape[0])
+
+# Random number seeds
+seed = np.int64(42)
+sequencestart = np.int64(43)
+
+samplefrom = np.zeros((nclass), dtype=np.int32)
+maxTreeSize = np.int32(2 * np.sum(nsamples) + 1)
+nodeStopSize = np.int32(1)
+
+for classi in range(nclass):
+	samplefrom[classi] = np.sum(training_y == (classi + 1))
+
+maxnsamples = np.max(nsamples)
+classindex = -1 * np.ones((np.max(samplefrom) * nclass), dtype=np.int32)
+
+cioffset = 0
+for classi in range(nclass):
+	classindex[cioffset:cioffset + samplefrom[classi]] = np.nonzero(training_y == (classi + 1))[0]
+	cioffset = cioffset + samplefrom[classi]
+
+bagmem = -1 * np.ones((ntree, maxnsamples * nclass), dtype=np.int32)
+d_bagspace = gpuarray.to_gpu(bagmem)
+d_tempbag = gpuarray.to_gpu(bagmem)
+bagmem = None
+
+d_treemap = gpuarray.zeros((long(ntree * 2), long(maxTreeSize)), np.int32)
+d_nodestatus = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_xbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32)
+#d_nbestsplit = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+#d_bestgini = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.float32)
+d_bestvar = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_nodeclass = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_ndbigtree = gpuarray.zeros((long(ntree), 1), np.int32)
+d_nodestart = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_nodepop = gpuarray.zeros((long(ntree), long(maxTreeSize)), np.int32)
+d_classpop = gpuarray.zeros((long(ntree), long(maxTreeSize*nclass)), np.int32)
+d_classweights = gpuarray.to_gpu(classweights)
+d_weight_left = gpuarray.zeros((long(ntree), long(nclass)), np.int32)
+d_weight_right = gpuarray.zeros((long(ntree), long(nclass)), np.int32)
+d_dimtemp = gpuarray.zeros((long(ntree), long(training_x.shape[0])), np.int32)
+
+d_baggedx = gpuarray.zeros((long(np.sum(nsamples)*training_x.shape[0]), long(ntree)), np.float32)
+d_baggedclass = gpuarray.zeros((long(ntree), long(np.sum(nsamples))), np.int32)
+
+d_training_x = gpuarray.to_gpu(training_x)
+d_training_y = gpuarray.to_gpu(training_y)
+d_classindex = gpuarray.to_gpu(classindex)
+d_nsamples = gpuarray.to_gpu(nsamples)
+d_samplefrom = gpuarray.to_gpu(samplefrom)
+
+threadsPerBlock = 32
+block = (32, 1, 1)
+grid = (int(ntree / block[0] + 1), 1)
+
+gpu_train(d_training_x, np.int32(training_x.shape[1]), np.int32(training_x.shape[0]), np.int32(nclass),
+	d_training_y, d_classindex, d_nsamples, d_samplefrom,
+	np.int32(maxnsamples), seed, sequencestart, np.int32(ntree), np.int32(maxTreeSize), np.int32(mtry), np.int32(nodeStopSize),
+	d_treemap, d_nodestatus, d_xbestsplit,
+	d_bestvar, d_nodeclass, d_ndbigtree,
+	d_nodestart, d_nodepop,
+	d_classpop, d_classweights,
+	d_weight_left, d_weight_right,
+	d_dimtemp, d_bagspace, d_tempbag, d_baggedx, d_baggedclass,
+	block=block, grid=grid)
+
+treemap = d_treemap.get()
+nodestatus = d_nodestatus.get()
+xbestsplit = d_xbestsplit.get()
+bestvar = d_bestvar.get()
+nodeclass = d_nodeclass.get()
+ndbigtree = d_ndbigtree.get()
+
+# Save results
+out_hdf5 = h5py.File(output_path, 'w')
+out_hdf5['/forest/treemap'] = treemap
+out_hdf5['/forest/nodestatus'] = nodestatus
+out_hdf5['/forest/xbestsplit'] = xbestsplit
+out_hdf5['/forest/bestvar'] = bestvar
+out_hdf5['/forest/nodeclass'] = nodeclass
+out_hdf5['/forest/ndbigtree'] = ndbigtree
+
+out_hdf5['/forest/nrnodes'] = maxTreeSize
+out_hdf5['/forest/ntree'] = ntree
+out_hdf5['/forest/nclass'] = nclass
+out_hdf5['/forest/classweights'] = classweights
+out_hdf5['/forest/mtry'] = mtry
+
+out_hdf5.close()
diff --git a/DeepNets/full_image_classify_stumpin.py b/DeepNets/full_image_classify_stumpin.py
new file mode 100644
index 0000000..91cd5a1
--- /dev/null
+++ b/DeepNets/full_image_classify_stumpin.py
@@ -0,0 +1,237 @@
+
+import os
+import sys
+import time
+
+import numpy as np
+import mahotas
+
+import glob
+import h5py
+
+execfile('full_image_cnn.py')
+#from full_image_cnn import *
+
+#param_path = 'D:/dev/Rhoana/membrane_cnn/results/resonance/'
+#param_file = param_path + 'LGN1_MembraneSamples_65x65x1_mp0.50_train10000_valid2000_test2000_seed7.progress_anneal_rotmir_k[48, 48, 48, 48].h5'
+#param_file = param_path + 'LGN1_MembraneSamples_95x95x1_mp0.50_train5000_valid1000_test1000_seed7.progress_anneal_rotmir_k[48, 48, 48, 48].h5'
+#param_file = param_path + 'progress2/LGN1_MembraneSamples_31x31x1_mp0.50_train50000_valid5000_test5000_seed7_ds4.progress_anneal_rotmir_k[32, 32, 32]_baseLR0.001.h5'
+
+#param_path = 'D:/dev/Rhoana/membrane_cnn/results/PC/'
+#param_file = param_path + 'LGN1_MembraneSamples_65x65x1_mp0.50_train10000_valid2000_test2000_seed7.progress_anneal_rotmir_k[32, 32, 32, 32].h5.'
+#param_file = param_path + 'LGN1_MembraneSamples_31x31x1_mp0.50_train50000_valid5000_test5000_seed7_ds4b.progress_anneal_rotmir_k[32, 32, 32]_baseLR0.004_v1.h5'
+#param_file = param_path + 'lenet0_membrane_epoch_25100.h5'
+#param_file = param_path + '5layer_params_large_epoch_285.h5'
+
+#param_files = [param_file]
+
+param_path = 'D:/dev/Rhoana/membrane_cnn/results/stumpin/'
+param_files = glob.glob(param_path + "*.h5")
+param_files = [x for x in param_files if x.find('.ot.h5') == -1]
+
+for param_file in param_files:
+
+    output_path = param_file.replace('.h5', '_stumpin')
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    print 'Opening parameter file {0}.'.format(param_file)
+    h5file = h5py.File(param_file, 'r')
+
+    # Construct a blank network
+    nlayers = h5file['/layers'][...]
+    iterations = h5file['/iterations'][...]
+
+    print "Loaded {0} layer network trained up to iteration {1}.".format(nlayers, iterations)
+
+    all_layers = []
+    stride_in = 1
+
+    for layer in range(nlayers):
+
+        layer_string = '/layer{0}/'.format(layer)
+        layer_type = h5file[layer_string + 'type'][...]
+
+        if layer_type == 'Convolution':
+
+            layer_weights = h5file[layer_string + 'weights'][...]
+            layer_bias = h5file[layer_string + 'bias'][...]
+            layer_maxpoolsize = h5file[layer_string + 'maxpoolsize'][...]
+
+            new_layer = ConvolutionMaxpoolLayer(
+                layer_weights.shape[0], layer_weights.shape[1], layer_weights.shape[2],
+                stride_in, layer_maxpoolsize, W=layer_weights, b=layer_bias)
+
+        elif layer_type == 'FullyConnected':
+
+            layer_weights = h5file[layer_string + 'weights'][...]
+            layer_bias = h5file[layer_string + 'bias'][...]
+            layer_ksize = h5file[layer_string + 'ksize'][...]
+
+            new_layer = FullyConnectedLayer(
+                layer_weights.shape[0] / (layer_ksize ** 2), layer_weights.shape[1], layer_ksize,
+                stride_in, W=layer_weights, b=layer_bias)
+
+        elif layer_type == 'LogisticRegression':
+
+            layer_weights = h5file[layer_string + 'weights'][...]
+            layer_bias = h5file[layer_string + 'bias'][...]
+
+            new_layer = LogisticRegressionLayer(layer_weights.shape[0], layer_weights.shape[1],
+                stride_in, W=layer_weights, b=layer_bias)
+
+        else:
+            raise Exception("Unknown layer type: {0}".format(layer_type))
+
+        print new_layer.W.shape
+        print 'layer {0} Wsum={1}.'.format(layer, np.sum(new_layer.W))
+
+        all_layers.append(new_layer)
+
+        stride_in = new_layer.stride_out
+
+    h5file.close()
+
+    # Calculate network footprint and therefore pad size
+    footprint = 1
+    for revlayer in range(1,nlayers):
+        layer = nlayers - revlayer - 1
+        if revlayer == 1:
+            footprint = all_layers[layer].kernel_size
+        else:
+            footprint = footprint * all_layers[layer].maxpool_size - 1 + all_layers[layer].kernel_size
+
+    pad_by = footprint // 2
+
+    #image_path='D:/dev/datasets/isbi/train-input/train-input_0000.tif'
+    #gold_image_path='D:/dev/datasets/isbi/train-labels/train-labels_0000.tif'
+
+    image_path_format_string='D:/dev/datasets/LGN1/JoshProbabilities/2kSampAligned{0:04d}.tif'
+    gold_image_path_format_string='D:/dev/datasets/LGN1/gold/lxVastExport_8+12+13/Segmentation1-LX_8-12_export_s{0:03d}.png'
+
+    saturation_level = 0.005
+
+    def normalize_image(original_image):
+        sorted_image = np.sort( np.uint8(original_image).ravel() )
+        minval = np.float32( sorted_image[ len(sorted_image) * ( saturation_level / 2 ) ] )
+        maxval = np.float32( sorted_image[ len(sorted_image) * ( 1 - saturation_level / 2 ) ] )
+        norm_image = np.float32(original_image - minval) * ( 255 / (maxval - minval))
+        norm_image[norm_image < 0] = 0
+        norm_image[norm_image > 255] = 255
+        return np.uint8(255 - norm_image)
+
+    def open_image_and_gold(image_index, crop_from, crop_size):
+        path = image_path_format_string.format(image_index)
+        gold_path = gold_image_path_format_string.format(image_index)
+
+        # Open raw image
+        image = np.float32(normalize_image(mahotas.imread(path)[crop_from[0]:crop_from[0]+crop_size,crop_from[1]:crop_from[1]+crop_size]))
+
+        # Open gold standard image
+        gold_image = mahotas.imread(gold_path)[crop_from[0]:crop_from[0]+crop_size,crop_from[1]:crop_from[1]+crop_size]
+
+        # Convert to ids
+        if len(gold_image.shape) == 3:
+            gold_image = (np.uint32(gold_image[:,:,0]) * 2**16 + np.uint32(gold_image[:,:,1]) * 2**8 + np.uint32(gold_image[:,:,2])).squeeze()
+
+        return (image, gold_image)
+
+
+    def rotmir(image, mirror, rotate):
+        if mirror == 1:
+            image = image[::-1,:]
+
+        if rotate == 1:
+            image = image[::-1,:].T
+        elif rotate == 2:
+            image = image[::-1,:][:,::-1]
+        elif rotate == 3:
+            image = image.T[::-1,:]
+
+        return image
+
+
+    classify_start = 100
+    classify_n = 1 #105
+
+    crop_from = (512, 512)
+    crop_size = 1024
+    #crop_size = 512
+    #crop_size = 256
+    #crop_size = 128
+
+    # def output_image (data, path, index, name):
+    #     maxdata = np.max(data)
+    #     mindata = np.min(data)
+    #     normdata = (data - mindata) / (maxdata - mindata)
+    #     mahotas.imsave(path + '/{0}_{1}.tif'.format(index, name), np.uint16(normdata * 65535))
+    def output_image (data, layer, index, unpad_by, image_num=0, downsample=1):
+        data = data[unpad_by:data.shape[0]-unpad_by,unpad_by:data.shape[1]-unpad_by]
+        if downsample != 1:
+            data = np.float32(mahotas.imresize(data, downsample))
+        maxdata = np.max(data)
+        mindata = np.min(data)
+        normdata = (np.float32(data) - mindata) / (maxdata - mindata)
+        mahotas.imsave(output_path + '/{0:04d}_classify_output_layer{1}_{2}.tif'.format(image_num, layer, index), np.uint16(normdata * 65535))
+
+    # Main classification loop
+    for image_index in range(classify_start, classify_start + classify_n):
+
+        # Normalized training
+        input_image, target_image = open_image_and_gold(image_index, crop_from, crop_size)
+
+        # Direct pixel intensity training
+        #input_image = np.float32(255-mahotas.imread(image_path_format_string.format(image_index))[crop_from[0]:crop_from[0]+crop_size,crop_from[1]:crop_from[1]+crop_size])
+
+        downsample = 1
+        if param_file.find('_ds2') != -1:
+            downsample = 2
+            input_image = np.float32(mahotas.imresize(input_image, 1.0/downsample))
+        elif param_file.find('_ds4') != -1:
+            downsample = 4
+            input_image = np.float32(mahotas.imresize(input_image, 1.0/downsample))
+
+
+        # Random rotate / mirror
+        # mirror = np.random.choice(2)
+        # rotate = np.random.choice(4)
+        # input_image = rotmir(input_image, mirror, rotate)
+        # target_image = rotmir(input_image, mirror, rotate)
+
+        #Pad the image borders so we get a full image output and to avoid edge effects
+        pad_image = np.pad(input_image, ((pad_by, pad_by), (pad_by, pad_by)), 'symmetric')
+        layer0_in = pad_image.reshape(1, pad_image.shape[0], pad_image.shape[1])
+
+        start_time = time.clock()
+
+        #Classify image
+        layer_output = []
+        for layeri in range(len(all_layers)):
+            if layeri == 0:
+                layer_output.append(all_layers[layeri].apply_layer(layer0_in))
+            else:
+                layer_output.append(all_layers[layeri].apply_layer(layer_output[layeri-1]))
+
+        end_time = time.clock()
+
+        print('Classification complete.')
+        print('Classification code ran for %.2fm' % ((end_time - start_time) / 60.))
+
+        # # Crop to input image size
+        # layer3_out = layer3_out[:,pad_by:-pad_by,pad_by:-pad_by]
+        # output_image(input_image, output_path, image_index, 'input')
+        # output_image(layer3_out[1,:,:], output_path, image_index, 'output')
+        # output_image(target_image == 0, output_path, image_index, 'target')
+
+        output_image(layer0_in[0,:,:], 99, 0, pad_by)
+        # for layeri in range(len(layer_output)):
+        #     for i in range(layer_output[layeri].shape[0]):
+        #         output_image(layer_output[layeri][i,:,:], layeri, i, pad_by)
+        #         if i == 20: break
+
+        output_image(layer_output[-1][0,:,:], len(layer_output)+1, 0, pad_by, image_index, downsample)
+        output_image(all_layers[-1].pre_softmax[0,:,:], len(layer_output), 0, pad_by, image_index, downsample)
+
+        #print "Classification error before training: {0}".format(np.sum((layer3_out[1,:,:] - target_output[1,:,:])**2))
+
+    print 'Classification of all images complete.'
diff --git a/DeepNets/full_image_cnn.py b/DeepNets/full_image_cnn.py
new file mode 100644
index 0000000..c4157d3
--- /dev/null
+++ b/DeepNets/full_image_cnn.py
@@ -0,0 +1,667 @@
+# Library for full image cnn operations
+
+import numpy as np
+import scipy.ndimage
+from scipy.signal import convolve2d
+from scipy.signal import fftconvolve
+from numpy.fft import rfftn
+from numpy.fft import irfftn
+import mahotas
+import time
+import h5py
+
+VALID_SIZE_CROP = False
+
+def _centered(arr, newsize):
+    # Return the center newsize portion of the array.
+    newsize = np.asarray(newsize)
+    currsize = np.array(arr.shape)
+    startind = (currsize - newsize) // 2
+    endind = startind + newsize
+    myslice = [slice(startind[k], endind[k]) for k in range(len(endind))]
+    return arr[tuple(myslice)]
+
+class ConvolutionMaxpoolLayer(object):
+    def __init__(self, nkernels, ninputs, kernel_size, stride_in, maxpool_size,
+        weight_init=0.005, W=[], b=[]):
+        self.ninputs = ninputs
+        self.nkernels = nkernels
+        self.kernel_size = kernel_size
+        self.maxpool_size = maxpool_size
+        self.stride_in = stride_in
+        self.stride_out = stride_in * maxpool_size
+        self.prev_conv_size = 0
+
+        if W == []:
+            self.W = (np.float32(np.random.random((nkernels, ninputs, kernel_size, kernel_size))) - 0.5) * weight_init * 2
+        else:
+            self.W = W
+
+        if b == []:
+            self.b = np.zeros((nkernels), dtype=np.float32)
+        else:
+            self.b = b
+
+    def apply_layer(self, input_image):
+        # Calculate feed-forward result
+        assert(input_image.shape[0] == self.ninputs)
+
+        if VALID_SIZE_CROP:
+            # valid size output
+            output_size = (input_image.shape[1] - self.kernel_size + 1, input_image.shape[2] - self.kernel_size + 1)
+        else:
+            # same size output
+            output_size = (input_image.shape[1], input_image.shape[2])
+
+        output = np.zeros((self.nkernels, output_size[0], output_size[1]), dtype=np.float32)
+        self.switches = np.zeros((self.nkernels, output_size[0], output_size[1]), dtype=np.uint32)
+
+            #options for
+            #scipy convolution?
+            #fft convolution?
+            #cuda convolution?
+
+            # Retain precalculated fft / size for efficient repeat calculations
+
+        for stridex in range(self.stride_in):
+            for stridey in range(self.stride_in):
+
+                same_fft_size = True
+
+                for filteri in range(self.nkernels):
+
+                    # Apply convolution
+
+                    if VALID_SIZE_CROP:
+                        stride_shape = (
+                            len(np.arange(stridex, input_image.shape[1] - self.kernel_size + 1, self.stride_in)),
+                            len(np.arange(stridey, input_image.shape[2] - self.kernel_size + 1, self.stride_in)))
+                    else:
+                        stride_shape = (
+                            len(np.arange(stridex, input_image.shape[1], self.stride_in)),
+                            len(np.arange(stridey, input_image.shape[2], self.stride_in)))
+
+                    #conv_result = np.zeros(((output_size[0] + stridex) / self.stride_in, (output_size[1] + stridey) / self.stride_in), dtype=np.float32)
+                    conv_result = np.zeros((stride_shape[0], stride_shape[1]), dtype=np.float32)
+
+                    for channeli in range(self.ninputs):
+
+                        # Space domain convolution
+                        # conv_result = conv_result + convolve2d(
+                        #    input_image[channeli, stridex::self.stride_in, stridey::self.stride_in].squeeze(),
+                        #    self.W[filteri,channeli,:,:].squeeze(),
+                        #    mode='same')
+                        #    #mode='valid')
+
+                        # FFT convolution
+                        #conv_result = conv_result + fftconvolve(
+                        #    input_image[channeli, stridex::self.stride_in, stridey::self.stride_in].squeeze(),
+                        #    self.W[filteri,channeli,:,:].squeeze(),
+                        #    mode='same')
+
+                        # FFT convolution (cache filter transformations)
+                        convolve_image = input_image[channeli, stridex::self.stride_in, stridey::self.stride_in].squeeze()
+                        conv_size = (self.kernel_size + convolve_image.shape[0] - 1, self.kernel_size + convolve_image.shape[1] - 1)
+
+                        fsize = 2 ** np.ceil(np.log2(conv_size)).astype(int)
+                        fslice = tuple([slice(0, int(sz)) for sz in conv_size])
+
+                        if same_fft_size and conv_size == self.prev_conv_size:
+                            fft_result = irfftn(rfftn(convolve_image, fsize) * self.Wfft[filteri,channeli,:,:], fsize)[fslice].copy()
+                        else:
+                            if same_fft_size:
+                                self.Wfft = np.zeros((self.nkernels, self.ninputs, fsize[0], fsize[1]//2+1), np.complex64)
+                                same_fft_size = False
+                                self.prev_conv_size = conv_size
+
+                            filter_fft = rfftn(self.W[filteri,channeli,:,:].squeeze(), fsize)
+                            fft_result = irfftn(rfftn(convolve_image, fsize) * filter_fft, fsize)[fslice].copy()
+
+                            self.Wfft[filteri,channeli,:,:] = filter_fft
+
+                        conv_result += _centered(fft_result.real, conv_result.shape)
+
+                        # if mode == "full":
+                        #     return ret
+                        # elif mode == "same":
+                        #     return _centered(ret, s1)
+                        # elif mode == "valid":
+                        #     return _centered(ret, abs(s1 - s2) + 1)
+
+                    # Apply maxpool (record switches)
+
+                    fullx = conv_result.shape[0]
+                    fully = conv_result.shape[1]
+                    splitx = (fullx + 1) / self.maxpool_size
+                    splity = (fully + 1) / self.maxpool_size
+
+                    striderangex = np.arange(0, fullx-1, self.maxpool_size)
+                    striderangey = np.arange(0, fully-1, self.maxpool_size)
+
+                    for poolx in range(self.maxpool_size):
+                        for pooly in range(self.maxpool_size):
+
+                            maxpool = np.ones((splitx, splity, self.maxpool_size ** 2), dtype=np.float32) * -np.inf
+
+                            offset_i = 0
+                            for offset_x in range(self.maxpool_size):
+                                for offset_y in range(self.maxpool_size):
+                                    pool_non_padded = conv_result[poolx + offset_x::self.maxpool_size, pooly + offset_y::self.maxpool_size]
+                                    maxpool[0:pool_non_padded.shape[0],0:pool_non_padded.shape[1],offset_i] = pool_non_padded
+                                    offset_i = offset_i + 1
+
+                            max_indices = np.argmax(maxpool, axis=2)
+                            maxpool = np.amax(maxpool, axis=2)
+                            
+                            # Tanh and bias
+                            maxpool = np.tanh(maxpool + self.b[filteri])
+
+                            # truncate if necessary
+                            if poolx > 0 and fullx % self.maxpool_size >= poolx:
+                                maxpool = maxpool[:-1,:]
+                                max_indices = max_indices[:-1,:]
+                            if pooly > 0 and fully % self.maxpool_size >= pooly:
+                                maxpool = maxpool[:,:-1]
+                                max_indices = max_indices[:,:-1]
+
+                            output[filteri,stridex+poolx*self.stride_in::self.stride_out,stridey+pooly*self.stride_in::self.stride_out] = maxpool
+                            self.switches[filteri,stridex+poolx*self.stride_in::self.stride_out,stridey+pooly*self.stride_in::self.stride_out] = max_indices
+
+                    if filteri == 0:
+                        self.conv_result = conv_result
+
+                print "CONV Layer: Done pool {0}, of {1}.".format(stridex * self.stride_in + stridey + 1, self.stride_in ** 2)
+
+        return output
+
+    def backpropogate_error(self, input_image, output, output_error, learning_rate):
+        # df / dx * error
+        error_bp = (1 - output**2) * output_error
+
+        error_in = np.zeros(input_image.shape, dtype=np.float32)
+        gradW = np.zeros(self.W.shape, dtype=np.float32)
+
+        crop_switches = _centered(self.switches, output.shape)
+
+        for stridex in range(self.stride_in):
+            for stridey in range(self.stride_in):
+
+                input_pool = input_image[:,stridex::self.stride_in,stridey::self.stride_in]
+                error_in_pool = np.zeros(input_pool.shape, dtype=np.float32)
+
+                nc, nx, ny = input_pool.shape
+
+                for filteri in range(self.nkernels):
+
+                    conv_error = np.zeros((nx, ny), dtype=np.float32)
+
+                    # reverse maxpool step based on saved switch values
+                    for poolx in range(self.maxpool_size):
+                        for pooly in range(self.maxpool_size):
+
+                            error_bp_pool = error_bp[filteri,stridex+poolx*self.stride_in::self.stride_out,stridey+pooly*self.stride_in::self.stride_out]
+                            switches_pool = crop_switches[filteri,stridex+poolx*self.stride_in::self.stride_out,stridey+pooly*self.stride_in::self.stride_out]
+
+                            # Unpool into conv_error
+                            offset_i = 0
+                            for offset_x in range(self.maxpool_size):
+                                for offset_y in range(self.maxpool_size):
+
+                                    wnx = (conv_error.shape[0] - poolx - offset_x + 1) // self.maxpool_size
+                                    wny = (conv_error.shape[1] - pooly - offset_y + 1) // self.maxpool_size
+
+                                    #conv_error[poolx + offset_x::self.maxpool_size, pooly + offset_y::self.maxpool_size] += error_bp_pool[:wnx,:wny] * (switches_pool[:wnx,:wny] == offset_i)
+
+                                    indices = np.nonzero(switches_pool[:wnx,:wny] == offset_i)
+                                    conv_error[indices[0]*self.maxpool_size+poolx+offset_x,indices[1]*self.maxpool_size+pooly+offset_y] += error_bp_pool[indices[0],indices[1]]
+
+                    valid = np.nonzero(conv_error)
+                    #print 'Found {0} pool winners from {1}.'.format(len(valid[0]), nx*ny)
+
+                    for (lx,ly) in zip(valid[0],valid[1]):
+
+                        layer_temp = np.zeros((self.ninputs, self.kernel_size, self.kernel_size), dtype=np.float32)
+                        window_non_padded = input_pool[:,lx:lx+self.kernel_size, ly:ly+self.kernel_size]
+                        layer_temp[:,:window_non_padded.shape[1], :window_non_padded.shape[2]] = window_non_padded
+
+                        # Add to gradient
+                        gradW[filteri,:,:,:] += conv_error[lx, ly] * layer_temp
+
+                        # Add to error in
+                        limitx = lx + window_non_padded.shape[1]
+                        limity = ly + window_non_padded.shape[2]
+                        node_error = self.W[filteri,:,:,:] * conv_error[lx, ly]
+                        error_in_pool[:,lx:limitx,ly:limity] += node_error[:,:limitx-lx,:limity-ly]
+
+                error_in[:,stridex::self.stride_in,stridey::self.stride_in] = error_in_pool
+
+                print 'CONV Backprop: Done pool {0} of {1}'.format(stridex * self.stride_in + stridey + 1, self.stride_in ** 2)
+
+        # Normalize by the number of training examples
+        #ntrain = output_error.shape[0] * output_error.shape[1]
+        #gradW = gradW / ntrain
+        #gradb = np.sum(np.sum(error_bp, axis=2), axis=1) / ntrain
+        gradb = np.sum(np.sum(error_bp, axis=2), axis=1)
+
+        # print error_bp.shape
+        # print error_in.shape
+        # print gradW.shape
+        # print gradb.shape
+
+        self.W = self.W - learning_rate * gradW
+        self.b = self.b - learning_rate * gradb
+
+        return error_in
+
+
+
+class FullyConnectedLayer(object):
+    def __init__(self, ninputs, noutputs, kernel_size, stride, weight_init=0.005, W=[], b=[]):
+        self.ninputs = ninputs
+        self.noutputs = noutputs
+        self.kernel_size = kernel_size
+        self.stride_in = stride
+        self.stride_out = stride
+
+        if W == []:
+            self.W = (np.float32(np.random.random((ninputs * kernel_size ** 2, noutputs))) - 0.5) * weight_init * 2
+        else:
+            self.W = W
+
+        if b ==[]:
+            self.b = np.zeros((noutputs), dtype=np.float32)
+        else:
+            self.b = b
+
+    def apply_layer(self, input_image):
+        # Calculate feed-forward result
+        assert(input_image.shape[0] == self.ninputs )
+
+        if VALID_SIZE_CROP:
+            # valid size output
+            output_size = (input_image.shape[1] - self.kernel_size + 1, input_image.shape[2] - self.kernel_size + 1)
+        else:
+            # same size output
+            output_size = (input_image.shape[1], input_image.shape[2])
+
+        output = np.zeros((self.noutputs, output_size[0], output_size[1]), dtype=np.float32)
+
+        # Apply dot product for each image window in each pool
+        for poolx in range(self.stride_in):
+            for pooly in range(self.stride_in):
+
+                fullx = input_image.shape[1]
+                fully = input_image.shape[2]
+
+                poolfrange = np.arange(self.ninputs)
+                poolxrange = np.arange(poolx, fullx, self.stride_in)
+                poolyrange = np.arange(pooly, fully, self.stride_in)
+
+                layer_pool = input_image[np.ix_(poolfrange, poolxrange, poolyrange)]
+
+                if VALID_SIZE_CROP:
+                    startx = 0
+                    endx = layer_pool.shape[1] - self.kernel_size + 1
+                    starty = 0
+                    endy = layer_pool.shape[2] - self.kernel_size + 1
+                else:
+                    startx = -((self.kernel_size + 1) / 2) + 1
+                    endx = startx + layer_pool.shape[1]
+                    starty = -((self.kernel_size + 1) / 2) + 1
+                    endy = starty + layer_pool.shape[2]
+
+                #print (startx, endx)
+                #print (starty, endy)
+
+                for lx in range(startx, endx):
+                    for ly in range(starty, endy):
+
+                        basex = np.max([lx,0])
+                        basey = np.max([ly,0])
+
+                        layer_temp = np.zeros((self.ninputs, self.kernel_size, self.kernel_size), dtype=np.float32)
+                        window_non_padded = layer_pool[:,basex:lx+self.kernel_size, basey:ly+self.kernel_size]
+
+                        xfrom = np.max([-lx,0])
+                        yfrom = np.max([-ly,0])
+
+                        layer_temp[:,xfrom:xfrom+window_non_padded.shape[1], yfrom:yfrom+window_non_padded.shape[2]] = window_non_padded
+
+                        layer_temp = np.tanh(np.dot(layer_temp.flatten(), self.W) + self.b)
+                        output[:, poolx + self.stride_in * (lx - startx), pooly + self.stride_in * (ly - starty)] = layer_temp
+
+                print 'FC Layer: Done pool {0} of {1}'.format(poolx * self.stride_in + pooly + 1, self.stride_in ** 2)
+
+        return output
+
+    def backpropogate_error(self, input_image, output, output_error, learning_rate):
+
+        # df / dx * error
+        error_bp = (1 - output**2) * output_error
+
+        error_in = np.zeros(input_image.shape, dtype=np.float32)
+        gradW = np.zeros(self.W.shape, dtype=np.float32)
+        #ntrain = 0
+
+        for poolx in range(self.stride_in):
+            for pooly in range(self.stride_in):
+
+                error_bp_pool = error_bp[:,poolx::self.stride_in,pooly::self.stride_in]
+                input_pool = input_image[:,poolx::self.stride_in,pooly::self.stride_in]
+                error_in_pool = np.zeros(input_pool.shape, dtype=np.float32)
+
+                nerr, nx, ny = error_bp_pool.shape
+
+                # Only train on full windows
+                for lx in range(nx-self.kernel_size+1):
+                    for ly in range(ny-self.kernel_size+1):
+
+                        layer_temp = input_pool[:,lx:lx+self.kernel_size, ly:ly+self.kernel_size]
+
+                        # zero-padded
+                        #layer_temp = np.zeros((self.ninputs, self.kernel_size, self.kernel_size), dtype=np.float32)
+                        #window_non_padded = input_pool[:,lx:lx+self.kernel_size, ly:ly+self.kernel_size]
+                        #layer_temp[:,:window_non_padded.shape[1], :window_non_padded.shape[2]] = window_non_padded
+
+                        # dE / dW = input * error (summed over kernel neighbourhood)
+                        gradW += np.dot(error_bp_pool[:,lx,ly].reshape(self.noutputs,1), layer_temp.reshape(1,layer_temp.size)).T
+
+                        # error_in (important to calculate this before weights are updated)
+                        error_in_pool[:,lx:lx+self.kernel_size,ly:ly+self.kernel_size] += np.dot(self.W, error_bp_pool[:,lx,ly]).reshape(self.ninputs, self.kernel_size, self.kernel_size)
+
+                error_in[:,poolx::self.stride_in,pooly::self.stride_in] = error_in_pool
+                #ntrain += (nx-self.kernel_size) * (ny-self.kernel_size)
+
+                print 'FC Backprop: Done pool {0} of {1}'.format(poolx * self.stride_in + pooly + 1, self.stride_in ** 2)
+
+        # Normalize by the number of training examples
+        #gradW = gradW / ntrain
+        #gradb = np.sum(np.sum(error_bp, axis=2), axis=1) / ntrain
+        gradb = np.sum(np.sum(error_bp, axis=2), axis=1)
+
+        # print error_bp.shape
+        # print error_in.shape
+        # print gradW.shape
+        # print gradb.shape
+
+        self.W = self.W - learning_rate * gradW
+        self.b = self.b - learning_rate * gradb
+
+        return error_in
+
+
+
+class LogisticRegressionLayer(object):
+    def __init__(self, ninputs, noutputs, stride, W=[], b=[]):
+        self.ninputs = ninputs
+        self.noutputs = noutputs
+        self.stride_in = stride
+        self.stride_out = stride
+
+        if W == []:
+            self.W = np.zeros((ninputs, noutputs), dtype=np.float32)
+        else:
+            self.W = W
+
+        if b ==[]:
+            self.b = np.zeros((noutputs), dtype=np.float32)
+        else:
+            self.b = b
+
+    def apply_layer(self, input_image):
+        # Calculate feed-forward result
+        assert(input_image.shape[0] == self.ninputs)
+        output = np.zeros((self.noutputs, input_image.shape[1], input_image.shape[2]), dtype=np.float32)
+
+        # Apply dot procuct for each pixel
+        for lx in range(input_image.shape[1]):
+            for ly in range(input_image.shape[2]):
+                output[:,lx,ly] = np.dot(input_image[:,lx,ly], self.W) + self.b
+
+        self.pre_softmax = output
+
+        #Apply softmax
+        maxes = np.amax(output, axis=0)
+        maxes = np.tile(maxes, (2,1,1))
+        e = np.exp(output - maxes)
+        output = e / np.sum(e, axis=0)
+
+        print 'LR Layer: Complete.'
+
+        return output
+
+    def backpropogate_error(self, input_image, output, target_output, learning_rate):
+
+        nerr, nx, ny = target_output.shape
+
+        # df / dx * error
+        error_bp = output * (1 - output) * (output - target_output)
+
+        error_in = np.zeros(input_image.shape, dtype=np.float32)
+        gradW = np.zeros(self.W.shape, dtype=np.float32)
+
+        for lx in range(nx):
+            for ly in range(ny):
+
+                # dE / dW = input * error
+                #gradW += np.tile(error_bp[:,lx,ly], (self.ninputs, 1)) * np.tile(input_image[:,lx,ly], (self.noutputs, 1)).T
+                gradW += np.dot(error_bp[:,lx,ly].reshape(self.noutputs,1), input_image[:,lx,ly].reshape(1,self.ninputs)).T
+
+                # error_in (important to calculate this before weights are updated)
+                error_in[:,lx,ly] += np.dot(self.W, error_bp[:,lx,ly])
+
+        # Normalize by the number of training examples
+        ntrain = nx * ny
+        #gradW = gradW / ntrain
+        gradb = np.sum(np.sum(error_bp, axis=2), axis=1) / ntrain
+        #gradb = np.sum(np.sum(error_bp, axis=2), axis=1)
+        print error_bp.shape
+        print 'gradb={0}'.format(gradb)
+
+        # print error_bp.shape
+        # print error_in.shape
+        # print gradW.shape
+        # print gradb.shape
+
+        self.W = self.W - learning_rate * gradW
+        self.b = self.b - learning_rate * gradb
+
+        print 'LR Backprop: Complete.'
+
+        return error_in
+
+class DeepNetwork(object):
+    def __init__(self, all_layers, best_offset, best_sigma, downsample, pad_by, stumpin=False):
+        self.all_layers = all_layers
+        self.best_offset = best_offset
+        self.best_sigma = best_sigma
+        self.downsample = downsample
+        self.pad_by = pad_by
+        self.stumpin = stumpin
+
+        assert np.max(np.abs(self.best_offset)) <= self.pad_by
+
+
+    def apply_net(self, input_image, perform_downsample=True, perform_pad=True, perform_upsample=True, perform_blur=True, perform_offset=True):
+
+        if perform_pad:
+            input_image = np.pad(input_image, ((self.pad_by, self.pad_by), (self.pad_by, self.pad_by)), 'symmetric')
+
+        if perform_downsample:
+            input_image = np.float32(mahotas.imresize(input_image, 1.0/self.downsample))
+
+        layer_temp = input_image.reshape(1, input_image.shape[0], input_image.shape[1])
+
+        for layeri in range(len(self.all_layers)):
+            layer_temp = self.all_layers[layeri].apply_layer(layer_temp)
+
+        output_image = layer_temp[0,:,:]
+
+        if perform_upsample:
+            output_image = np.float32(mahotas.imresize(output_image, self.downsample))
+
+        if perform_blur:
+            output_image = scipy.ndimage.filters.gaussian_filter(output_image, self.best_sigma)
+
+        if perform_offset:
+            #Translate
+            output_image = np.roll(output_image, self.best_offset[0], axis=0)
+            output_image = np.roll(output_image, self.best_offset[1], axis=1)
+
+        # Crop to valid size
+        output_image = output_image[self.pad_by:-self.pad_by,self.pad_by:-self.pad_by]
+
+        return output_image
+
+class ComboDeepNetwork(object):
+    def __init__(self, filename):
+
+        combo_h5 = h5py.File(filename, 'r')
+
+        self.nnets = combo_h5['/nets'][...]
+        self.all_nets = []
+
+        for net_i in range(self.nnets):
+            net_string = '/net{0}'.format(net_i)
+
+            best_offset = combo_h5[net_string + '/best_offset'][...]
+            best_sigma = float(combo_h5[net_string + '/best_sigma'][...])
+            downsample = float(combo_h5[net_string + '/downsample_factor'][...])
+            nlayers = int(combo_h5[net_string + '/layers'][...])
+            stumpin = net_string + '/stumpin' in combo_h5
+
+            print 'Network {0} has {1} layers.'.format(net_i, nlayers)
+            #print stumpin
+
+            all_layers = []
+            stride_in = 1
+
+            for layer_i in range(nlayers):
+
+                layer_string = net_string + '/layer{0}/'.format(layer_i)
+                layer_type = combo_h5[layer_string + 'type'][...]
+
+                if layer_type == 'Convolution':
+
+                    layer_weights = combo_h5[layer_string + 'weights'][...]
+                    layer_bias = combo_h5[layer_string + 'bias'][...]
+                    layer_maxpoolsize = combo_h5[layer_string + 'maxpoolsize'][...]
+
+                    new_layer = ConvolutionMaxpoolLayer(
+                        layer_weights.shape[0], layer_weights.shape[1], layer_weights.shape[2],
+                        stride_in, layer_maxpoolsize, W=layer_weights, b=layer_bias)
+
+                elif layer_type == 'FullyConnected':
+
+                    layer_weights = combo_h5[layer_string + 'weights'][...]
+                    layer_bias = combo_h5[layer_string + 'bias'][...]
+                    layer_ksize = combo_h5[layer_string + 'ksize'][...]
+
+                    new_layer = FullyConnectedLayer(
+                        layer_weights.shape[0] / (layer_ksize ** 2), layer_weights.shape[1], layer_ksize,
+                        stride_in, W=layer_weights, b=layer_bias)
+
+                elif layer_type == 'LogisticRegression':
+
+                    layer_weights = combo_h5[layer_string + 'weights'][...]
+                    layer_bias = combo_h5[layer_string + 'bias'][...]
+
+                    new_layer = LogisticRegressionLayer(layer_weights.shape[0], layer_weights.shape[1],
+                        stride_in, W=layer_weights, b=layer_bias)
+
+                else:
+                    raise Exception("Unknown layer type: {0}".format(layer_type))
+
+                all_layers.append(new_layer)
+
+                stride_in = new_layer.stride_out
+
+            # Calculate network footprint and therefore pad size
+            footprint = 1
+            for revlayer in range(1,nlayers):
+                layer = nlayers - revlayer - 1
+                if revlayer == 1:
+                    footprint = all_layers[layer].kernel_size
+                else:
+                    footprint = footprint * all_layers[layer].maxpool_size - 1 + all_layers[layer].kernel_size
+
+            pad_by = int(downsample * (footprint // 2))
+
+            new_network = DeepNetwork(all_layers, best_offset, best_sigma, downsample, pad_by, stumpin)
+
+            self.all_nets.append(new_network)
+
+    def apply_combo_net(self, input_image, block_size=400, stump_input=None, return_parts=False):
+
+        average_image = np.zeros(input_image.shape, dtype=np.float32)
+
+        parts = []
+
+        prev_downsample = 0
+        prev_pad_by = 0
+
+        start_time = time.clock()
+
+        for net_i in range(self.nnets):
+
+            net_input = stump_input if self.all_nets[net_i].stumpin else input_image
+
+            downsample = self.all_nets[net_i].downsample
+            pad_by = self.all_nets[net_i].pad_by
+
+            # Downsample and pad
+            if prev_downsample != downsample or prev_pad_by != pad_by:
+                preprocessed_image = np.pad(net_input, ((pad_by, pad_by), (pad_by, pad_by)), 'symmetric')
+                preprocessed_image = np.float32(mahotas.imresize(preprocessed_image, 1.0 / downsample))
+
+            halo = int((pad_by + downsample - 1) / downsample)
+
+            # Compute in blocks (small edges)
+            block_x = range(halo, preprocessed_image.shape[0], block_size)
+            block_y = range(halo, preprocessed_image.shape[1], block_size)
+
+            # (full edges)
+            # block_x = range(halo, preprocessed_image.shape[0] - block_size + 1, block_size)
+            # block_y = range(halo, preprocessed_image.shape[1] - block_size + 1, block_size)
+            # if preprocessed_image.shape[0] % block_size > 0:
+            #     block_x.append(max(halo, preprocessed_image.shape[0] - block_size - halo))
+            # if preprocessed_image.shape[1] % block_size > 0:
+            #     block_y.append(max(halo, preprocessed_image.shape[1] - block_size - halo))
+
+            blocki = 0
+            nblocks = len(block_x) * len(block_y)
+
+            output_image = np.zeros(input_image.shape, dtype=np.float32)
+
+            for from_x in block_x:
+                for from_y in block_y:
+
+                    # Crop out a padded input block
+                    block = preprocessed_image[from_x-halo:from_x+block_size+halo, from_y-halo:from_y+block_size+halo]
+
+                    # Apply network
+                    output_block = self.all_nets[net_i].apply_net(block, perform_downsample=False, perform_pad=False)
+
+                    # Output block is not padded
+                    to_x = (from_x - halo) * downsample
+                    to_y = (from_y - halo) * downsample
+                    output_image[to_x:to_x + output_block.shape[0], to_y:to_y + output_block.shape[1]] = output_block
+
+                    blocki += 1
+                    print 'Block {0} of {1} complete.'.format(blocki, nblocks)
+
+            average_image += output_image
+
+            if return_parts:
+                parts.append(output_image)
+
+            print 'Net {0} of {1} complete.'.format(net_i + 1, self.nnets)
+
+        average_image /= self.nnets
+
+        end_time = time.clock()
+
+        print('Classification complete.')
+        print('Classification code ran for %.2fm' % ((end_time - start_time) / 60.))
+
+        return (average_image, parts) if return_parts else average_image
diff --git a/Segment/import_segmentations_4cube.py b/Segment/import_segmentations_4cube.py
new file mode 100644
index 0000000..db62b9d
--- /dev/null
+++ b/Segment/import_segmentations_4cube.py
@@ -0,0 +1,66 @@
+import sys
+import numpy as np
+import scipy
+import scipy.io
+import scipy.ndimage
+import mahotas
+import math
+import h5py
+import time
+import pymaxflow
+import timer
+import os
+import glob
+
+try:    
+    input_paths = ['D:\dev\datasets\conn\ecs\ecs20_crop1kds2\diced_xy=512_z=32_xyOv=128_zOv=12_dwnSmp=1\cubes\cubeId=000001_Z=1_Y=1_X=1_minZ=1_maxZ=20_minY=1_maxY=576_minX=1_maxX=576_dwnSmp=1\pre_segs_kaynig',
+        'D:\dev\datasets\conn\ecs\ecs20_crop1kds2\diced_xy=512_z=32_xyOv=128_zOv=12_dwnSmp=1\cubes\cubeId=000002_Z=1_Y=1_X=2_minZ=1_maxZ=20_minY=1_maxY=576_minX=449_maxX=1024_dwnSmp=1\pre_segs_kaynig',
+        'D:\dev\datasets\conn\ecs\ecs20_crop1kds2\diced_xy=512_z=32_xyOv=128_zOv=12_dwnSmp=1\cubes\cubeId=000003_Z=1_Y=2_X=1_minZ=1_maxZ=20_minY=449_maxY=1024_minX=1_maxX=576_dwnSmp=1\pre_segs_kaynig',
+        'D:\dev\datasets\conn\ecs\ecs20_crop1kds2\diced_xy=512_z=32_xyOv=128_zOv=12_dwnSmp=1\cubes\cubeId=000004_Z=1_Y=2_X=2_minZ=1_maxZ=20_minY=449_maxY=1024_minX=449_maxX=1024_dwnSmp=1\pre_segs_kaynig']
+
+    imshape = [1024, 1024]
+
+    input_areas = [[0, 576, 0, 576],
+        [0, 576, 448, 1024],
+        [448, 1024, 0, 576],
+        [448, 1024, 448, 1024]]
+
+    output_path = 'D:\dev\datasets\conn\ecs\ecs20_crop1kds2\diced_xy=512_z=32_xyOv=128_zOv=12_dwnSmp=1\cubes\segs.h5'
+    
+    for pi in range(len(input_paths)):
+
+        input_search_string  = input_paths[pi] + '/*'
+        seg_dirs                = sorted( glob.glob( input_search_string ) )
+        print 'Found {0} segmentation directories'.format(len(seg_dirs))
+
+        for di in range(len(seg_dirs)):
+            imagedir = seg_dirs[di]
+            segmentation_files = sorted( glob.glob( imagedir + '/*.png' ) )
+            print 'Found {0} segmentations in directory {1}.'.format(len(segmentation_files), di)
+
+            if pi == 0 and di == 0:
+                out_hdf5 = h5py.File(output_path, 'w')
+                segmentations = out_hdf5.create_dataset('segmentations',
+                    (imshape[0], imshape[1], len(segmentation_files), len(seg_dirs)),
+                    dtype=np.bool,
+                    chunks=(256, 256, 1, 1),
+                    compression='gzip')
+
+
+            for fi in range(len(segmentation_files)):
+
+                seg = mahotas.imread(segmentation_files[fi]) == 0
+
+                segmentations[input_areas[pi][0]:input_areas[pi][1], input_areas[pi][2]:input_areas[pi][3], fi, di] = seg
+
+
+    figure(figsize=(20,20))
+    imshow(segmentations[:, :, 10, 10], cmap=cm.gray)
+
+    out_hdf5.close()
+    print "Success"
+
+except Exception as e:
+    print e
+    raise
+