Skip to content

Commit

Permalink
added arguments in python tests
Browse files Browse the repository at this point in the history
  • Loading branch information
FlytxtRnD committed May 5, 2015
1 parent 21eb84c commit 4d4e695
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
4 changes: 2 additions & 2 deletions python/pyspark/mllib/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class KMeansModel(Saveable, Loader):
>>> data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4, 2)
>>> model = KMeans.train(
... sc.parallelize(data), 2, maxIterations=10, runs=30, initializationMode="random",
... initializationSteps=5, epsilon=1e-4, seed=None)
... seed=None, initializationSteps=5, epsilon=1e-4)
>>> model.predict(array([0.0, 0.0])) == model.predict(array([1.0, 1.0]))
True
>>> model.predict(array([8.0, 9.0])) == model.predict(array([9.0, 8.0]))
Expand All @@ -58,7 +58,7 @@ class KMeansModel(Saveable, Loader):
... SparseVector(3, {2: 1.1})
... ]
>>> model = KMeans.train(sc.parallelize(sparse_data), 2, initializationMode="k-means||",
... initializationSteps=5, epsilon=1e-4, seed=None)
... seed=None, initializationSteps=5, epsilon=1e-4)
>>> model.predict(array([0., 1., 0.])) == model.predict(array([0, 1.1, 0.]))
True
>>> model.predict(array([0., 0., 1.])) == model.predict(array([0, 0, 1.1]))
Expand Down
9 changes: 6 additions & 3 deletions python/pyspark/mllib/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,8 @@ def test_kmeans(self):
[1.1, 0],
[1.2, 0],
]
clusters = KMeans.train(self.sc.parallelize(data), 2, initializationMode="k-means||")
clusters = KMeans.train(self.sc.parallelize(data), 2, initializationMode="k-means||",
initializationSteps=7, epsilon=1e-4)
self.assertEquals(clusters.predict(data[0]), clusters.predict(data[1]))
self.assertEquals(clusters.predict(data[2]), clusters.predict(data[3]))

Expand All @@ -243,9 +244,11 @@ def test_kmeans_deterministic(self):
Y = range(0, 100, 10)
data = [[x, y] for x, y in zip(X, Y)]
clusters1 = KMeans.train(self.sc.parallelize(data),
3, initializationMode="k-means||", seed=42)
3, initializationMode="k-means||",
seed=42, initializationSteps=7, epsilon=1e-4)
clusters2 = KMeans.train(self.sc.parallelize(data),
3, initializationMode="k-means||", seed=42)
3, initializationMode="k-means||",
seed=42, initializationSteps=7, epsilon=1e-4)
centers1 = clusters1.centers
centers2 = clusters2.centers
for c1, c2 in zip(centers1, centers2):
Expand Down

0 comments on commit 4d4e695

Please sign in to comment.