diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 22cead99973d1..38f353c1c29a6 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -385,6 +385,7 @@ def takeSample(self, withReplacement, num, seed=None): # See: scala/spark/RDD.scala rand = Random(seed) while len(samples) < total: + #TODO add log warning when more than one iteration was run samples = self.sample(withReplacement, fraction, rand.randint(0, sys.maxint)).collect() sampler = RDDSampler(withReplacement, fraction, rand.randint(0, sys.maxint))