Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-6661] Python type errors should print type, not object #5361

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/pyspark/accumulators.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
>>> sc.accumulator([1.0, 2.0, 3.0]) # doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
Exception:...
TypeError:...
"""

import sys
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,7 @@ def accumulator(self, value, accum_param=None):
elif isinstance(value, complex):
accum_param = accumulators.COMPLEX_ACCUMULATOR_PARAM
else:
raise Exception("No default accumulator param for type %s" % type(value))
raise TypeError("No default accumulator param for type %s" % type(value))
SparkContext._next_accum_id += 1
return Accumulator(SparkContext._next_accum_id - 1, value, accum_param)

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/ml/param/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class Param(object):

def __init__(self, parent, name, doc):
if not isinstance(parent, Params):
raise ValueError("Parent must be a Params but got type %s." % type(parent).__name__)
raise TypeError("Parent must be a Params but got type %s." % type(parent))
self.parent = parent
self.name = str(name)
self.doc = str(doc)
Expand Down
4 changes: 2 additions & 2 deletions python/pyspark/ml/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ def fit(self, dataset, params={}):
stages = paramMap[self.stages]
for stage in stages:
if not (isinstance(stage, Estimator) or isinstance(stage, Transformer)):
raise ValueError(
"Cannot recognize a pipeline stage of type %s." % type(stage).__name__)
raise TypeError(
"Cannot recognize a pipeline stage of type %s." % type(stage))
indexOfLastEstimator = -1
for i, stage in enumerate(stages):
if isinstance(stage, Estimator):
Expand Down
4 changes: 2 additions & 2 deletions python/pyspark/mllib/linalg.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def serialize(self, obj):
values = [float(v) for v in obj]
return (1, None, None, values)
else:
raise ValueError("cannot serialize %r of type %r" % (obj, type(obj)))
raise TypeError("cannot serialize %r of type %r" % (obj, type(obj)))

def deserialize(self, datum):
assert len(datum) == 4, \
Expand Down Expand Up @@ -561,7 +561,7 @@ def __getitem__(self, index):
inds = self.indices
vals = self.values
if not isinstance(index, int):
raise ValueError(
raise TypeError(
"Indices must be of type integer, got type %s" % type(index))
if index < 0:
index += self.size
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/mllib/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
from pyspark.mllib.classification import LogisticRegressionModel
first = data.first()
if not isinstance(first, LabeledPoint):
raise ValueError("data should be an RDD of LabeledPoint, but got %s" % first)
raise TypeError("data should be an RDD of LabeledPoint, but got %s" % type(first))
if initial_weights is None:
initial_weights = [0.0] * len(data.first().features)
if (modelClass == LogisticRegressionModel):
Expand Down
6 changes: 4 additions & 2 deletions python/pyspark/mllib/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,10 @@ def test_sparse_vector_indexing(self):
self.assertEquals(sv[-1], 2)
self.assertEquals(sv[-2], 0)
self.assertEquals(sv[-4], 0)
for ind in [4, -5, 7.8]:
for ind in [4, -5]:
self.assertRaises(ValueError, sv.__getitem__, ind)
for ind in [7.8, '1']:
self.assertRaises(TypeError, sv.__getitem__, ind)

def test_matrix_indexing(self):
mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10])
Expand Down Expand Up @@ -450,7 +452,7 @@ def test_infer_schema(self):
elif isinstance(v, DenseVector):
self.assertEqual(v, self.dv1)
else:
raise ValueError("expecting a vector but got %r of type %r" % (v, type(v)))
raise TypeError("expecting a vector but got %r of type %r" % (v, type(v)))


@unittest.skipIf(not _have_scipy, "SciPy not installed")
Expand Down
12 changes: 6 additions & 6 deletions python/pyspark/sql/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,8 +562,8 @@ def _infer_type(obj):
else:
try:
return _infer_schema(obj)
except ValueError:
raise ValueError("not supported type: %s" % type(obj))
except TypeError:
raise TypeError("not supported type: %s" % type(obj))


def _infer_schema(row):
Expand All @@ -584,7 +584,7 @@ def _infer_schema(row):
items = sorted(row.__dict__.items())

else:
raise ValueError("Can not infer schema for type: %s" % type(row))
raise TypeError("Can not infer schema for type: %s" % type(row))

fields = [StructField(k, _infer_type(v), True) for k, v in items]
return StructType(fields)
Expand Down Expand Up @@ -696,7 +696,7 @@ def _merge_type(a, b):
return a
elif type(a) is not type(b):
# TODO: type cast (such as int -> long)
raise TypeError("Can not merge type %s and %s" % (a, b))
raise TypeError("Can not merge type %s and %s" % (type(a), type(b)))

# same type
if isinstance(a, StructType):
Expand Down Expand Up @@ -773,7 +773,7 @@ def convert_struct(obj):
elif hasattr(obj, "__dict__"): # object
d = obj.__dict__
else:
raise ValueError("Unexpected obj: %s" % obj)
raise TypeError("Unexpected obj type: %s" % type(obj))

if convert_fields:
return tuple([conv(d.get(name)) for name, conv in zip(names, converters)])
Expand Down Expand Up @@ -912,7 +912,7 @@ def _infer_schema_type(obj, dataType):
return StructType(fields)

else:
raise ValueError("Unexpected dataType: %s" % dataType)
raise TypeError("Unexpected dataType: %s" % type(dataType))


_acceptable_types = {
Expand Down
8 changes: 4 additions & 4 deletions python/pyspark/sql/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def applySchema(self, rdd, schema):
raise TypeError("Cannot apply schema to DataFrame")

if not isinstance(schema, StructType):
raise TypeError("schema should be StructType, but got %s" % schema)
raise TypeError("schema should be StructType, but got %s" % type(schema))

return self.createDataFrame(rdd, schema)

Expand Down Expand Up @@ -281,7 +281,7 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
# data could be list, tuple, generator ...
rdd = self._sc.parallelize(data)
except Exception:
raise ValueError("cannot create an RDD from type: %s" % type(data))
raise TypeError("cannot create an RDD from type: %s" % type(data))
else:
rdd = data

Expand All @@ -293,8 +293,8 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
if isinstance(schema, (list, tuple)):
first = rdd.first()
if not isinstance(first, (list, tuple)):
raise ValueError("each row in `rdd` should be list or tuple, "
"but got %r" % type(first))
raise TypeError("each row in `rdd` should be list or tuple, "
"but got %r" % type(first))
row_cls = Row(*schema)
schema = self._inferSchema(rdd.map(lambda r: row_cls(*r)), samplingRatio)

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,7 @@ def __getitem__(self, item):
jc = self._jdf.apply(self.columns[item])
return Column(jc)
else:
raise TypeError("unexpected type: %s" % type(item))
raise TypeError("unexpected item type: %s" % type(item))

def __getattr__(self, name):
"""Returns the :class:`Column` denoted by ``name``.
Expand Down