diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py index f3e64989ed564..20dbb4994f156 100644 --- a/python/pyspark/broadcast.py +++ b/python/pyspark/broadcast.py @@ -19,18 +19,13 @@ >>> from pyspark.context import SparkContext >>> sc = SparkContext('local', 'test') >>> b = sc.broadcast([1, 2, 3, 4, 5]) ->>> b.value -[1, 2, 3, 4, 5] - ->>> from pyspark.broadcast import _broadcastRegistry ->>> _broadcastRegistry[b.bid] = b ->>> from cPickle import dumps, loads ->>> loads(dumps(b)).value -[1, 2, 3, 4, 5] - >>> sc.parallelize([0, 0]).flatMap(lambda x: b.value).collect() [1, 2, 3, 4, 5, 1, 2, 3, 4, 5] +>>> b = sc.broadcast([1, 2, 3, 4, 5], keep=True) +>>> b.value +[1, 2, 3, 4, 5] + >>> large_broadcast = sc.broadcast(list(range(10000))) """ # Holds broadcasted data received from Java, keyed by its id. @@ -66,3 +61,8 @@ def __init__(self, bid, value, java_broadcast=None, pickle_registry=None): def __reduce__(self): self._pickle_registry.add(self) return (_from_id, (self.bid, )) + + +if __name__ == "__main__": + import doctest + doctest.testmod()