Skip to content

Commit

Permalink
Merge pull request #1787 from lixun910/master
Browse files Browse the repository at this point in the history
GeoDa 1.12.1.183
  • Loading branch information
lixun910 authored Dec 19, 2018
2 parents 6031681 + a5d7e89 commit b253ca4
Show file tree
Hide file tree
Showing 61 changed files with 3,255 additions and 1,350 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
*.gal
*.pyc

deps/

swig/*.gwt
swig/_geoda.so
swig/geoda.pyc
Expand Down Expand Up @@ -98,3 +100,4 @@ BuildTools/macosx/temp1/boost_1_57_0/b2
*.plist
.vs/slnx.sqlite
BuildTools/temp/CLAPACK-3.1.1-VisualStudio/BLAS/blas.vcproj
*.sqlite-journal
28 changes: 25 additions & 3 deletions Algorithms/DataUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ class DataUtils {
{
double d =0,tmp=0;
for (size_t i =0; i<size; i++ ) {
tmp = (x1[i] - x2[i]) * weight[i];
d += tmp * tmp;
tmp = (x1[i] - x2[i]);
d += tmp * tmp * weight[i];
}
return d;
}
Expand Down Expand Up @@ -283,7 +283,29 @@ class DataUtils {

return copy;
}


static double** ComputeFullDistMatrix(double** data, double* weight,
int rows, int columns
, double dist(double* , double* , size_t, double*))
{
double** dist_matrix = new double*[rows];
for (size_t i=0; i<rows; ++i) {
dist_matrix[i] = new double[rows];
}
for (size_t i=0; i<rows; ++i) {
for (size_t j=i; j<rows; ++j) {
if ( i == j ) {
dist_matrix[i][j] = 0;
continue;
}
dist_matrix[i][j] = dist(data[i], data[j], columns, weight);
dist_matrix[i][j] = sqrt(dist_matrix[i][j]);
dist_matrix[j][i] = dist_matrix[i][j] ;
}
}
return dist_matrix;
}

// upper triangular part of a symmetric matrix
static double* getPairWiseDistance(double** matrix, double* weight, int n, int k, double dist(double* , double* , size_t, double*))
{
Expand Down
62 changes: 40 additions & 22 deletions Algorithms/hdbscan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,36 +18,51 @@ bool EdgeLess1(SimpleEdge* a, SimpleEdge* b)
// HDBSCAN
//
////////////////////////////////////////////////////////////////////////////////
HDBScan::HDBScan(int min_cluster_size, int min_samples, double alpha, int _cluster_selection_method, bool _allow_single_cluster, int rows, int cols, double** _distances, double** _data, const vector<bool>& _undefs
//,GalElement* w, double* _controls, double _control_thres
)

vector<double> HDBScan::ComputeCoreDistance(double** input_data, int n_pts,
int n_dim, int min_samples,
char dist)
{
int cluster_selection_method = _cluster_selection_method;
bool allow_single_cluster = _allow_single_cluster;
bool match_reference_implementation = false;

// Core distances
core_dist.resize(rows);

int k = min_samples;
int dim = cols;
vector<double> core_d;
core_d.resize(n_pts);

double eps = 0; // error bound
int nPts = rows;

ANNkd_tree* kdTree = new ANNkd_tree(_data, nPts, dim);
if (dist == 'e') ANN_DIST_TYPE = 2; // euclidean
else if (dist == 'b') ANN_DIST_TYPE = 1; // manhattan

// since KNN search will always return the query point itself, so add 1
// to make sure returning min_samples number of results
//min_samples = min_samples + 1;

ANNidxArray nnIdx = new ANNidx[k];
ANNdistArray dists = new ANNdist[k];
for (int i=0; i<nPts; i++) {
kdTree->annkSearch(_data[i], k, nnIdx, dists, eps);
core_dist[i] = sqrt(dists[k-1]);
ANNkd_tree* kdTree = new ANNkd_tree(input_data, n_pts, n_dim);
ANNidxArray nnIdx = new ANNidx[min_samples];
ANNdistArray dists = new ANNdist[min_samples];
for (size_t i=0; i<n_pts; ++i) {
kdTree->annkSearch(input_data[i], min_samples, nnIdx, dists, eps);
core_d[i] = sqrt(dists[min_samples-1]);
}
delete[] nnIdx;
delete[] dists;
delete kdTree;

return core_d;
}

HDBScan::HDBScan(int min_cluster_size, int min_samples, double alpha,
int _cluster_selection_method, bool _allow_single_cluster,
int rows, int cols, double** _distances,
vector<double> _core_dist,
const vector<bool>& _undefs)
{
int cluster_selection_method = _cluster_selection_method;
bool allow_single_cluster = _allow_single_cluster;
bool match_reference_implementation = false;

// Core distances
core_dist = _core_dist;

// MST
mst_linkage_core_vector(dim, core_dist, _distances, alpha);
mst_linkage_core_vector(cols, core_dist, _distances, alpha);
std::sort(mst_edges.begin(), mst_edges.end(), EdgeLess1);

// Extract the HDBSCAN hierarchy as a dendrogram from mst
Expand Down Expand Up @@ -741,7 +756,10 @@ vector<int> HDBScan::recurse_leaf_dfs(vector<CondensedTree*>& cluster_tree, int
}
}

void HDBScan::mst_linkage_core_vector(int num_features, vector<double>& core_distances, double** dist_metric, double alpha)
void HDBScan::mst_linkage_core_vector(int num_features,
vector<double>& core_distances,
double** dist_metric,
double alpha)
{
int dim = core_distances.size();

Expand Down
38 changes: 23 additions & 15 deletions Algorithms/hdbscan.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,36 +212,43 @@ namespace GeoDaClustering {
bool allow_single_cluster,
int rows, int cols,
double** _distances,
double** data,
vector<double> _core_dist,
const vector<bool>& undefs
//GalElement * w,
//double* controls,
//double control_thres
);
virtual ~HDBScan();


static vector<double> ComputeCoreDistance(double** input_data, int n_pts,
int n_dim, int min_samples,
char dist);

vector<vector<int> > GetRegions();

vector<double> outlier_scores(vector<CondensedTree*>& tree);

boost::unordered_map<int, double> compute_stability(vector<CondensedTree*>& condensed_tree);
boost::unordered_map<int, double> compute_stability(
vector<CondensedTree*>& condensed_tree);

void condense_tree(double** hierarchy, int N, int min_cluster_size=10);

vector<double> max_lambdas(vector<CondensedTree*>& tree);

vector<int> do_labelling(vector<CondensedTree*>& tree, set<int>& clusters,
boost::unordered_map<int, int>& cluster_label_map,
bool allow_single_cluster = false,
bool match_reference_implementation = false);
vector<int> do_labelling(vector<CondensedTree*>& tree,
set<int>& clusters,
boost::unordered_map<int, int>& cluster_label_map,
bool allow_single_cluster = false,
bool match_reference_implementation = false);

vector<double> get_probabilities(vector<CondensedTree*>& tree,
boost::unordered_map<int, int>& reverse_cluster_map,
vector<int>& labels);
boost::unordered_map<int, int>& reverse_cluster_map,
vector<int>& labels);

vector<double> get_stability_scores(vector<int>& labels, set<int>& clusters,
boost::unordered_map<int, double>& stability,
double max_lambda);
vector<double> get_stability_scores(vector<int>& labels,
set<int>& clusters,
boost::unordered_map<int, double>& stability,
double max_lambda);

void get_clusters(vector<CondensedTree*>& tree,
boost::unordered_map<int, double>& stability,
Expand All @@ -253,12 +260,13 @@ namespace GeoDaClustering {
bool match_reference_implementation=false);

void mst_linkage_core_vector(int num_features,
vector<double>& core_distances,
double** dist_metric, double alpha);
vector<double>& core_distances,
double** dist_metric, double alpha);

vector<int> get_cluster_tree_leaves(vector<CondensedTree*>& cluster_tree);

vector<int> recurse_leaf_dfs(vector<CondensedTree*>& cluster_tree, int current_node);
vector<int> recurse_leaf_dfs(vector<CondensedTree*>& cluster_tree,
int current_node);

vector<int> bfs_from_hierarchy(double** hierarchy, int dim, int bfs_root)
{
Expand Down
23 changes: 16 additions & 7 deletions Algorithms/spectral.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,13 @@ void Spectral::affinity_matrix()

void Spectral::generate_kernel_matrix()
{
//If you have an affinity matrix, such as a distance matrix, for which 0 means identical elements, and high values means very dissimilar elements, it can be transformed in a similarity matrix that is well suited for the algorithm by applying the Gaussian (RBF, heat) kernel:
//np.exp(- X ** 2 / (2. * delta ** 2))
//delta = X.maxCoeff() - X.minCoeff();
// If you have an affinity matrix, such as a distance matrix,
// for which 0 means identical elements, and high values means very
// dissimilar elements, it can be transformed in a similarity matrix
// that is well suited for the algorithm by applying
// the Gaussian (RBF, heat) kernel:
// np.exp(- X ** 2 / (2. * delta ** 2))
// delta = X.maxCoeff() - X.minCoeff();

// Fill kernel matrix
K.resize(X.rows(),X.rows());
Expand Down Expand Up @@ -200,7 +204,8 @@ void Spectral::fast_eigendecomposition()
}
}

void Spectral::eigendecomposition(){
void Spectral::eigendecomposition()
{

//Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> edecomp(K, true);

Expand Down Expand Up @@ -260,8 +265,12 @@ void Spectral::cluster(int affinity_type)
generate_knn_matrix();
}

if (power_iter>0) fast_eigendecomposition();
else eigendecomposition();
if (power_iter>0) {
fast_eigendecomposition();
} else {
// try other method than eigen3, e.g. Intel MLK
eigendecomposition();
}
kmeans();
}

Expand All @@ -273,7 +282,7 @@ void Spectral::kmeans()
int transpose = 0; // row wise
int* clusterid = new int[rows];
double* weight = new double[columns];
for (int j=0; j<columns; j++){ weight[j] = 1;}
for (int j=0; j<columns; j++) weight[j] = 1;

// init input_data[rows][cols]
double** input_data = new double*[rows];
Expand Down
16 changes: 13 additions & 3 deletions Algorithms/spectral.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,13 @@ using namespace std;
class Spectral{

public:
Spectral() : centers(2), kernel_type(1), normalise(1), max_iters(1000), sigma(0.001), constant(1.0), order(2.0), method('a'), dist('e'), npass(10), n_maxiter(300) {}
explicit Spectral(MatrixXd& d) : centers(2), kernel_type(1), normalise(1), max_iters(1000), sigma(0.001), constant(1.0), order(2.0), method('a'), dist('e'), npass(10), n_maxiter(300) {X = d;}
Spectral() : centers(2), kernel_type(1), normalise(1), max_iters(1000),
sigma(0.001), constant(1.0), order(2.0), method('a'), dist('e'),
npass(10), n_maxiter(300) {}

explicit Spectral(MatrixXd& d) : centers(2), kernel_type(1), normalise(1),
max_iters(1000), sigma(0.001), constant(1.0), order(2.0), method('a'),
dist('e'), npass(10), n_maxiter(300) {X = d;}


void set_data(double** input_data, int nrows, int ncols);
Expand All @@ -38,7 +43,12 @@ class Spectral{
void set_order(const double i){order = i;};
void set_max_iters(const unsigned int i){max_iters = i;};
void set_power_iters(const unsigned int i){power_iter = i;};


void set_kmeans_dist(char d) { dist = d;};
void set_kmeans_method(char m) { method = m;};
void set_kmeans_npass(int n) { npass = n; };
void set_kmeans_maxiter(int n) { n_maxiter = n;};

void cluster(int affinity_type=0);
const std::vector<wxInt64> &get_assignments() const {return assignments;};

Expand Down
2 changes: 1 addition & 1 deletion BuildTools/macosx/GeoDa.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -830,7 +830,7 @@
DDD593C612E9F90000F7A7C4 /* GalWeight.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = GalWeight.cpp; sourceTree = "<group>"; };
DDD593C812E9F90C00F7A7C4 /* GwtWeight.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GwtWeight.h; sourceTree = "<group>"; };
DDD593C912E9F90C00F7A7C4 /* GwtWeight.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = GwtWeight.cpp; sourceTree = "<group>"; };
DDDBF284163AD1D50070610C /* ConditionalMapView.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ConditionalMapView.cpp; sourceTree = "<group>"; };
DDDBF284163AD1D50070610C /* ConditionalMapView.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ConditionalMapView.cpp; sourceTree = "<group>"; xcLanguageSpecificationIdentifier = xcode.lang.cpp; };
DDDBF285163AD1D50070610C /* ConditionalMapView.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ConditionalMapView.h; sourceTree = "<group>"; };
DDDBF299163AD2BF0070610C /* ConditionalScatterPlotView.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ConditionalScatterPlotView.h; sourceTree = "<group>"; };
DDDBF29A163AD2BF0070610C /* ConditionalScatterPlotView.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ConditionalScatterPlotView.cpp; sourceTree = "<group>"; };
Expand Down
15 changes: 9 additions & 6 deletions BuildTools/windows/GeoDa.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@
<LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</LinkIncremental>
<LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug2017|x64'">
<EnableManagedIncrementalBuild>true</EnableManagedIncrementalBuild>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<Optimization>Disabled</Optimization>
Expand Down Expand Up @@ -189,7 +192,7 @@
<AdditionalIncludeDirectories>temp\wxWidgets-3.1.0\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ResourceCompile>
<Link>
<AdditionalDependencies>opencl.lib;zlibstat.lib;gdal_i.lib;libcurl.lib;libboost_date_time-vc100-mt-gd-1_57.lib;libboost_thread-vc100-mt-gd-1_57.lib;BLAS.lib;clapack.lib;libf2c.lib;json_spirit_lib.lib;sqlite3_i.lib;GlU32.lib;OpenGL32.lib;wxmsw31ud.lib;wxmsw31ud_gl.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;wxregexud.lib;wxexpatd.lib;wsock32.lib;comctl32.lib;winmm.lib;rpcrt4.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>opencl.lib;zlibstat.lib;gdal_i.lib;libcurl.lib;libboost_date_time-vc100-mt-gd-1_57.lib;libboost_thread-vc100-mt-gd-1_57.lib;BLAS.lib;clapack.lib;libf2c.lib;json_spirit_lib.lib;GlU32.lib;OpenGL32.lib;wxmsw31ud.lib;wxmsw31ud_gl.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;wxregexud.lib;wxexpatd.lib;wsock32.lib;comctl32.lib;winmm.lib;rpcrt4.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>C:\Intel\OpenCL\sdk\lib\x64;dep\zlib\lib;C:\OSGeo4W\lib;temp\wxWidgets-3.1.0\lib\vc_x64_dll;temp\CLAPACK-3.1.1-VisualStudio\LIB\x64;temp\boost_1_57_0\stage\lib;temp\json_spirit_v4.08\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<IgnoreAllDefaultLibraries>
</IgnoreAllDefaultLibraries>
Expand All @@ -205,8 +208,8 @@
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug2017|x64'">
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>C:\OSGeo4W\include;temp\boost_1_57_0;temp\wxWidgets-3.1.0\include;temp\wxWidgets-3.1.0\lib\vc_x64_dll\mswud;temp\json_spirit_v4.08;temp\eigen3;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;DEBUG;_DEBUG;_WINDOWS;__WXMSW__;__WXDEBUG__;WXUSINGDLL;UNICODE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>..\..\deps\OpenCL\include;..\..\deps\zlib\include;..\..\deps\boost_1_57_0\include;..\..\deps\wxWidgets-3.1.0\include;..\..\deps\wxWidgets-3.1.0\lib\vc_x64_dll\mswud;..\..\deps\json_spirit_v4.08\include;..\..\deps\eigen3;..\..\deps\gdal-1.9.0\include;..\..\deps\curl-7.46.0\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;DEBUG;_WINDOWS;__WXMSW__;__WXDEBUG__;WXUSINGDLL;UNICODE;_UNICODE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>false</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
Expand All @@ -222,11 +225,11 @@
<ResourceCompile>
<PreprocessorDefinitions>__WXMSW__;_UNICODE;_WINDOWS;NOPCH;WXUSINGDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0409</Culture>
<AdditionalIncludeDirectories>temp\wxWidgets-3.1.0\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>..\..\deps\wxWidgets-3.1.0\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ResourceCompile>
<Link>
<AdditionalDependencies>opencl.lib;zlibstat.lib;gdal_i.lib;libcurl.lib;libboost_date_time-vc140-mt-gd-1_57.lib;libboost_thread-vc140-mt-gd-1_57.lib;BLAS.lib;clapack.lib;libf2c.lib;json_spirit_lib.lib;sqlite3_i.lib;GlU32.lib;OpenGL32.lib;wxmsw31ud.lib;wxmsw31ud_gl.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;wxregexud.lib;wxexpatd.lib;wsock32.lib;comctl32.lib;winmm.lib;rpcrt4.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>C:\Intel\OpenCL\sdk\lib\x64;dep\zlib\lib;C:\OSGeo4W\lib;temp\wxWidgets-3.1.0\lib\vc_x64_dll;temp\CLAPACK-3.1.1-VisualStudio\LIB\x64;temp\boost_1_57_0\stage\lib;temp\json_spirit_v4.08\Debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>opencl.lib;zlibstat.lib;gdal_i.lib;libcurl.lib;libboost_date_time-vc140-mt-gd-1_57.lib;libboost_thread-vc140-mt-gd-1_57.lib;libboost_system-vc140-mt-gd-1_57.lib;BLAS.lib;clapack.lib;libf2c.lib;json_spirit_libd.lib;wxmsw31ud.lib;wxmsw31ud_gl.lib;wxtiffd.lib;wxjpegd.lib;wxpngd.lib;wxzlibd.lib;wxregexud.lib;wxexpatd.lib;wxscintillad.lib;wsock32.lib;comctl32.lib;winmm.lib;rpcrt4.lib;GlU32.lib;OpenGL32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>..\..\deps\zlib\lib;..\..\deps\boost_1_57_0\lib\x64;..\..\deps\wxWidgets-3.1.0\lib\vc_x64_dll;..\..\deps\OpenCL\lib\x64;..\..\deps\CLAPACK-3.1.1\lib\x64;..\..\deps\gdal-1.9.0\lib\x64;..\..\deps\json_spirit_v4.08\lib\x64;..\..\deps\curl-7.46.0\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<IgnoreAllDefaultLibraries>
</IgnoreAllDefaultLibraries>
<IgnoreSpecificDefaultLibraries>%(IgnoreSpecificDefaultLibraries)</IgnoreSpecificDefaultLibraries>
Expand Down
Loading

0 comments on commit b253ca4

Please sign in to comment.