Update comment to better reflect what we're doing

ankurdave · Sep 3, 2014 · e5f7e4d · e5f7e4d
1 parent 09a27f7
commit e5f7e4d
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 6 deletions.
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1064,9 +1064,8 @@ abstract class RDD[T: ClassTag](
       // greater than totalParts because we actually cap it at totalParts in runJob.
       var numPartsToTry = 1
       if (partsScanned > 0) {
-        // If we didn't find any rows after the first iteration, just try all partitions next.
-        // Otherwise, interpolate the number of partitions we need to try, but overestimate it
-        // by 50%.
+        // If we didn't find any rows after the previous iteration, double and retry.  Otherwise,
+        // interpolate the number of partitions we need to try, but overestimate it by 50%.
         if (buf.size == 0) {
           numPartsToTry = partsScanned * 2
         } else {

diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
@@ -1031,9 +1031,9 @@ def take(self, num):
             # we actually cap it at totalParts in runJob.
             numPartsToTry = 1
             if partsScanned > 0:
-                # If we didn't find any rows after the first iteration, just
-                # try all partitions next. Otherwise, interpolate the number
-                # of partitions we need to try, but overestimate it by 50%.
+                # If we didn't find any rows after the previous iteration,
+                # double and retry.  Otherwise, interpolate the number of
+                # partitions we need to try, but overestimate it by 50%.
                 if len(items) == 0:
                     numPartsToTry = partsScanned * 2
                 else: