diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index 9419c3f5989be..7dcfbf741c4f1 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -678,7 +678,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* here.
*
* @param relativeSD Relative accuracy. Smaller values create counters that require more space.
- * It should be greater than 0.000017.
+ * It must be greater than 0.000017.
* @param partitioner partitioner of the resulting RDD.
*/
def countApproxDistinctByKey(relativeSD: Double, partitioner: Partitioner): JavaPairRDD[K, Long] =
@@ -694,7 +694,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* here.
*
* @param relativeSD Relative accuracy. Smaller values create counters that require more space.
- * It should be greater than 0.000017.
+ * It must be greater than 0.000017.
* @param numPartitions number of partitions of the resulting RDD.
*/
def countApproxDistinctByKey(relativeSD: Double, numPartitions: Int): JavaPairRDD[K, Long] = {
@@ -709,7 +709,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* here.
*
* @param relativeSD Relative accuracy. Smaller values create counters that require more space.
- * It should be greater than 0.000017.
+ * It must be greater than 0.000017.
*/
def countApproxDistinctByKey(relativeSD: Double): JavaPairRDD[K, Long] = {
fromRDD(rdd.countApproxDistinctByKey(relativeSD))
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index 2741532732c27..330569a8d8837 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -565,7 +565,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
* here.
*
* @param relativeSD Relative accuracy. Smaller values create counters that require more space.
- * It should be greater than 0.000017.
+ * It must be greater than 0.000017.
*/
def countApproxDistinct(relativeSD: Double): Long = rdd.countApproxDistinct(relativeSD)
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index d459815ae7cbe..f2ce3cbd47f93 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -225,16 +225,16 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
* would trigger sparse representation of registers, which may reduce the memory consumption
* and increase accuracy when the cardinality is small.
*
- *@param p The precision value for the normal set.
- * `p` must be a value between 4 and `sp` (32 max).
+ * @param p The precision value for the normal set.
+ * `p` must be a value between 4 and `sp` if `sp` is not zero (32 max).
* @param sp The precision value for the sparse set, between 0 and 32.
* If `sp` equals 0, the sparse representation is skipped.
* @param partitioner Partitioner to use for the resulting RDD.
*/
@Experimental
def countApproxDistinctByKey(p: Int, sp: Int, partitioner: Partitioner): RDD[(K, Long)] = {
- require(p >= 4, s"p ($p) should be >= 4")
- require(sp <= 32, s"sp ($sp) should be <= 32")
+ require(p >= 4, s"p ($p) must be >= 4")
+ require(sp <= 32, s"sp ($sp) must be <= 32")
require(sp == 0 || p <= sp, s"p ($p) cannot be greater than sp ($sp)")
val createHLL = (v: V) => {
val hll = new HyperLogLogPlus(p, sp)
@@ -261,11 +261,11 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
* here.
*
* @param relativeSD Relative accuracy. Smaller values create counters that require more space.
- * It should be greater than 0.000017.
+ * It must be greater than 0.000017.
* @param partitioner partitioner of the resulting RDD
*/
def countApproxDistinctByKey(relativeSD: Double, partitioner: Partitioner): RDD[(K, Long)] = {
- require(relativeSD > 0.000017, s"accuracy ($relativeSD) should be greater than 0.000017")
+ require(relativeSD > 0.000017, s"accuracy ($relativeSD) must be greater than 0.000017")
val p = math.ceil(2.0 * math.log(1.054 / relativeSD) / math.log(2)).toInt
assert(p <= 32)
countApproxDistinctByKey(if (p < 4) 4 else p, 0, partitioner)
@@ -279,7 +279,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
* here.
*
* @param relativeSD Relative accuracy. Smaller values create counters that require more space.
- * It should be greater than 0.000017.
+ * It must be greater than 0.000017.
* @param numPartitions number of partitions of the resulting RDD
*/
def countApproxDistinctByKey(relativeSD: Double, numPartitions: Int): RDD[(K, Long)] = {
@@ -294,7 +294,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
* here.
*
* @param relativeSD Relative accuracy. Smaller values create counters that require more space.
- * It should be greater than 0.000017.
+ * It must be greater than 0.000017.
*/
def countApproxDistinctByKey(relativeSD: Double = 0.05): RDD[(K, Long)] = {
countApproxDistinctByKey(relativeSD, defaultPartitioner(self))
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 58375b9b07c32..585b2f76afa65 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -929,9 +929,9 @@ abstract class RDD[T: ClassTag](
* and increase accuracy when the cardinality is small.
*
* @param p The precision value for the normal set.
- * p
must be a value between 4 and sp
.
+ * `p` must be a value between 4 and `sp` if `sp` is not zero (32 max).
* @param sp The precision value for the sparse set, between 0 and 32.
- * If sp
equals 0, the sparse representation is skipped.
+ * If `sp` equals 0, the sparse representation is skipped.
*/
@Experimental
def countApproxDistinct(p: Int, sp: Int): Long = {
@@ -958,6 +958,7 @@ abstract class RDD[T: ClassTag](
* here.
*
* @param relativeSD Relative accuracy. Smaller values create counters that require more space.
+ * It must be greater than 0.000017.
*/
def countApproxDistinct(relativeSD: Double = 0.05): Long = {
val p = math.ceil(2.0 * math.log(1.054 / relativeSD) / math.log(2)).toInt