diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index 7020397f3b064..0e07dfabfeaab 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -102,6 +102,8 @@ object PCA extends DefaultParamsReadable[PCA] { * Model fitted by [[PCA]]. * * @param pc A principal components Matrix. Each column is one principal component. + * @param explainedVariance A vector of proportions of variance explained by + * each principal component. */ @Experimental class PCAModel private[ml] ( diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 141ec3492aa94..1fa0eab384e7b 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -1987,6 +1987,8 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol): >>> model = pca.fit(df) >>> model.transform(df).collect()[0].pca_features DenseVector([1.648..., -4.013...]) + >>> model.explainedVariance + DenseVector([0.794..., 0.205...]) .. versionadded:: 1.5.0 """ @@ -2052,6 +2054,15 @@ def pc(self): """ return self._call_java("pc") + @property + @since("2.0.0") + def explainedVariance(self): + """ + Returns a vector of proportions of variance + explained by each principal component. + """ + return self._call_java("explainedVariance") + @inherit_doc class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol):