-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAlgorithm3.py
135 lines (115 loc) · 3.68 KB
/
Algorithm3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# -*- coding: utf-8 -*-
# @Time : 2018/11/22 19:46
# @Author : xieyunshen
# @Email : [email protected]
# @File : Algorithm4.py
# @Software: PyCharm
# @ModifyTime:
# Unweight Bregman Consensus Clustering With Constraints
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.cluster import KMeans
import numpy as np
def generate_CCM(partitions):
'''
construct m cluster aggregation matrix Mi
:param part: 即partition
:return: 返回嵌套列表形式的矩阵
'''
M_list = []
for part in partitions:
n = len(part)
M = np.zeros((n, n), dtype=float)
for i in range(n):
for j in range(n):
if part[i] == part[j]:
M[i][j] = 1.0
else:
M[i][j] = 0.0
M_list.append(M)
return M_list
def UBCCC(partitions, M, C):
'''
Unweight Bregman Consensus Clustering With Constraints
:param partition:
:param M:(xp,xq) belong to M indicates that xp and xq belong to same cluster
:param C:(xp,xq) belong to M denotes that xp and xq belong to different cluster
:return: optimal M
'''
M_list = generate_CCM(partitions)
# m表示partition的数量
m = len(M_list)
# n表示矩阵的维度
n = len(M_list[0])
sum_ = np.zeros((n, n), dtype=float)
for cell in M_list:
sum_ += cell
optimal_M = sum_/m
s_max = np.max(optimal_M)
s_min = np.min(optimal_M)
if len(M) !=0:
for cell in M:
optimal_M[cell[0], cell[1]] = s_max
optimal_M[cell[1], cell[0]] = s_max
if len(C) !=0:
for cell in C:
optimal_M[cell[0], cell[1]] = s_min
optimal_M[cell[1], cell[0]] = s_min
return optimal_M
# 生成M和C集合,M为must-link约束,C为cannot-link约束
def generate_M_C(file):
data = pd.read_csv(file,delimiter=',',encoding='utf-8',header=0)
# print(list(data['Death']))
labels = list(data['Death'])
# 随机生成20个随机数
M = []
C = []
nums = set(np.random.randint(0, 125, 20))
# print(nums)
for i in nums:
for j in nums:
if i!=j:
if labels[i]==labels[j]:
M.append([i,j])
else:
C.append([i,j])
return M,C
# 生成热力图
def generate_heatmap(M,title):
# cmap = sns.color_palette(flatui)
# cmap = sns.light_palette("black", reverse=True, n_colors=8)
f, ax = plt.subplots(figsize=(10, 10))
cmap = sns.light_palette("black", reverse=True, n_colors=8)
sns.heatmap(M,cbar=False,cmap=cmap)
ax.set_title(title)
plt.show()
f.savefig('./'+title+'.jpg')
def get_data(file):
data = pd.read_csv(file,delimiter=',',encoding='utf-8',header=0).round(6)
title = list(data.ix[:0])
cluster_data = []
for i in range(len(title)-1):
cluster_data.append(list(data.ix[:,title[i]]))
return cluster_data
def generate_cluster(cluster_data,n_clusters=3):
X = np.array(cluster_data)
kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(X)
return list(kmeans.labels_)
# 设定不同的K,根据K-means聚类生成partitions。
def generate_partitions(file,s,e):
cluster_data = get_data(file)
partitions = []
for i in range(s,e+1):
kmeans = generate_cluster(cluster_data,i)
partitions.append(kmeans)
return partitions
if __name__=='__main__':
file_path = '../../data/GBM/Gene.csv'
partitions = generate_partitions(file_path, 2, 10)
M, C = generate_M_C('../../data/GBM/Survival.csv')
# print(M)
# print(C)
# exit()
output_M = UBCCC(partitions, M, C)
generate_heatmap(output_M,'UBCCC_1')