From 906d1800517c8231138577cd285c8f2df6372ad3 Mon Sep 17 00:00:00 2001 From: Ahir Reddy Date: Mon, 7 Apr 2014 23:36:53 -0700 Subject: [PATCH] added todo explaining cost of creating Row object in python --- python/pyspark/rdd.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 16c7a3ba49224..c090f970be40a 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -1419,6 +1419,9 @@ def registerAsTable(self, name): def toPython(self): jrdd = self._jschema_rdd.javaToPython() + # TODO: This is inefficient, we should construct the Python Row object + # in Java land in the javaToPython function. May require a custom + # pickle serializer in Pyrolite return RDD(jrdd, self._sc, self._sc.serializer).map(lambda d: Row(d)) def _test():