Skip to content

Commit

Permalink
Merge pull request biolab#82 from pavlin-policar/louvain-sorted-clusters
Browse files Browse the repository at this point in the history
OWLouvain: Sort cluster names by number of instances
  • Loading branch information
mstrazar authored Feb 7, 2018
2 parents 3a3e5b4 + 8ff50b4 commit abdd64e
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 2 deletions.
41 changes: 41 additions & 0 deletions orangecontrib/single_cell/tests/test_owlouvain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import numpy as np

from Orange.data import Table, Domain
from Orange.widgets.tests.base import WidgetTest
from orangecontrib.single_cell.widgets.owlouvainclustering import \
OWLouvainClustering


# Deterministic tests
np.random.seed(42)


class TestOWLouvain(WidgetTest):
def setUp(self):
self.widget = self.create_widget(
OWLouvainClustering, stored_settings={'auto_commit': False}
)

def tearDown(self):
self.widget.onDeleteWidget()
super().tearDown()

def test_clusters_ordered_by_size(self):
"""Cluster names should be sorted based on the number of instances."""
x1 = np.array([[0, 0]] * 20)
x2 = np.array([[1, 0]] * 15)
x3 = np.array([[0, 1]] * 10)
x4 = np.array([[1, 1]] * 5)
data = np.vstack((x1, x2, x3, x4))
# Remove any order depencence in data, not that this should affect it
np.random.shuffle(data)

table = Table.from_numpy(domain=Domain.from_numpy(X=data), X=data)
self.send_signal(self.widget.Inputs.data, table)
self.widget.k_neighbours = 4
self.widget.commit(force=True)
output = self.get_output(self.widget.Outputs.annotated_data, wait=1000)

clustering = output.get_column_view('Cluster')[0].astype(int)
counts = np.bincount(clustering)
np.testing.assert_equal(counts, sorted(counts, reverse=True))
10 changes: 8 additions & 2 deletions orangecontrib/single_cell/widgets/owlouvainclustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,14 +292,20 @@ def commit(self, force=False):

def _send_data(self):
domain = self.data.domain
# Compute the frequency of each cluster index
counts = np.bincount(self.partition)
indices = np.argsort(counts)[::-1]
index_map = {n: o for n, o in zip(indices, range(len(indices)))}
new_partition = list(map(index_map.get, self.partition))

cluster_var = DiscreteVariable(
get_next_name(domain, 'Cluster'),
values=['C%d' % (i + 1) for i, _ in enumerate(np.unique(self.partition))]
values=['C%d' % (i + 1) for i, _ in enumerate(np.unique(new_partition))]
)

new_domain = add_columns(domain, metas=[cluster_var])
new_table = self.data.transform(new_domain)
new_table.get_column_view(cluster_var)[0][:] = self.partition
new_table.get_column_view(cluster_var)[0][:] = new_partition
self.Outputs.annotated_data.send(new_table)

if Graph is not None:
Expand Down

0 comments on commit abdd64e

Please sign in to comment.