From 19cc505718281fb7b24750b0bb08b43eb7c9841e Mon Sep 17 00:00:00 2001 From: zero323 Date: Sun, 12 Jan 2020 23:28:30 +0100 Subject: [PATCH 01/15] Update ml.base --- third_party/3/pyspark/ml/base.pyi | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/third_party/3/pyspark/ml/base.pyi b/third_party/3/pyspark/ml/base.pyi index 16edf396..099f2e9b 100644 --- a/third_party/3/pyspark/ml/base.pyi +++ b/third_party/3/pyspark/ml/base.pyi @@ -1,8 +1,7 @@ -# Stubs for pyspark.ml.base (Python 3.5) - import abc from typing import overload from typing import Any, Callable, Dict, Generic, Iterable, List, Optional, Tuple, Type, TypeVar +from pyspark.ml._typing import M import _thread @@ -43,3 +42,5 @@ class UnaryTransformer(HasInputCol, HasOutputCol, Transformer): def outputDataType(self) -> DataType: ... def validateInputType(self, inputType: DataType) -> None: ... def transformSchema(self, schema: StructType) -> StructType: ... + def setInputCol(self: M, value: str) -> M: ... + def setOutputCol(self: M, value: str) -> M: ... From 31671984e9d8c8a08e756daa52b7d7765a377123 Mon Sep 17 00:00:00 2001 From: zero323 Date: Sun, 12 Jan 2020 23:35:39 +0100 Subject: [PATCH 02/15] Update ml.classification.LinearSVC --- third_party/3/pyspark/ml/classification.pyi | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/third_party/3/pyspark/ml/classification.pyi b/third_party/3/pyspark/ml/classification.pyi index 8bfef4f4..e8cc81e7 100644 --- a/third_party/3/pyspark/ml/classification.pyi +++ b/third_party/3/pyspark/ml/classification.pyi @@ -39,8 +39,17 @@ class _LinearSVCParams(_JavaClassifierParams, HasRegParam, HasMaxIter, HasFitInt class LinearSVC(JavaClassifier[LinearSVCModel], _LinearSVCParams, JavaMLWritable, JavaMLReadable[LinearSVC]): def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxIter: int = ..., regParam: float = ..., tol: float = ..., rawPredictionCol: str = ..., fitIntercept: bool = ..., standardization: bool = ..., threshold: float = ..., weightCol: Optional[str] = ..., aggregationDepth: int = ...) -> None: ... def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxIter: int = ..., regParam: float = ..., tol: float = ..., rawPredictionCol: str = ..., fitIntercept: bool = ..., standardization: bool = ..., threshold: float = ..., weightCol: Optional[str] = ..., aggregationDepth: int = ...) -> LinearSVC: ... + def setMaxIter(self, value: int) -> LinearSVC: ... + def setRegParam(self, value: float) -> LinearSVC: ... + def setTol(self, value: float) -> LinearSVC: ... + def setFitIntercept(self, value: bool) -> LinearSVC: ... + def setStandardization(self, value: bool) -> LinearSVC: ... + def setThreshold(self, value: float) -> LinearSVC: ... + def setWeightCol(self, value: str) -> LinearSVC: ... + def setAggregationDepth(self, value: int) -> LinearSVC: ... class LinearSVCModel(JavaClassificationModel[Vector], _LinearSVCParams, JavaMLWritable, JavaMLReadable[LinearSVCModel]): + def setThreshold(self, value: float) -> LinearSVCModel: ... @property def coefficients(self) -> Vector: ... @property From 46b5048555ec4c0dfcb5eed789828c7a84f78a51 Mon Sep 17 00:00:00 2001 From: zero323 Date: Sun, 12 Jan 2020 23:42:08 +0100 Subject: [PATCH 03/15] Update ml.classification.LogisticRegression --- third_party/3/pyspark/ml/classification.pyi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/third_party/3/pyspark/ml/classification.pyi b/third_party/3/pyspark/ml/classification.pyi index e8cc81e7..a679a311 100644 --- a/third_party/3/pyspark/ml/classification.pyi +++ b/third_party/3/pyspark/ml/classification.pyi @@ -80,6 +80,14 @@ class LogisticRegression(JavaProbabilisticClassifier[LogisticRegressionModel], _ def setUpperBoundsOnCoefficients(self, value: Matrix) -> LogisticRegression: ... def setLowerBoundsOnIntercepts(self, value: Vector) -> LogisticRegression: ... def setUpperBoundsOnIntercepts(self, value: Vector) -> LogisticRegression: ... + def setMaxIter(self, value: int) -> LogisticRegression: ... + def setRegParam(self, value: float) -> LogisticRegression: ... + def setTol(self, value: float) -> LogisticRegression: ... + def setElasticNetParam(self, value: float) -> LogisticRegression: ... + def setFitIntercept(self, value: bool) -> LogisticRegression: ... + def setStandardization(self, value: bool) -> LogisticRegression: ... + def setWeightCol(self, value: str) -> LogisticRegression: ... + def setAggregationDepth(self, value: int) -> LogisticRegression: ... class LogisticRegressionModel(JavaProbabilisticClassificationModel[Vector], _LogisticRegressionParams, JavaMLWritable, JavaMLReadable[LogisticRegressionModel], HasTrainingSummary[LogisticRegressionTrainingSummary]): @property From cfe9623113a165ddd1d2362fbc8b50f321d05e1d Mon Sep 17 00:00:00 2001 From: zero323 Date: Sun, 12 Jan 2020 23:45:22 +0100 Subject: [PATCH 04/15] Update ml.classification.DecisionTreeClassifier --- third_party/3/pyspark/ml/classification.pyi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/third_party/3/pyspark/ml/classification.pyi b/third_party/3/pyspark/ml/classification.pyi index a679a311..7155bf9e 100644 --- a/third_party/3/pyspark/ml/classification.pyi +++ b/third_party/3/pyspark/ml/classification.pyi @@ -173,6 +173,9 @@ class DecisionTreeClassifier(JavaProbabilisticClassifier[DecisionTreeClassificat def setMaxMemoryInMB(self, value: int) -> DecisionTreeClassifier: ... def setCacheNodeIds(self, value: bool) -> DecisionTreeClassifier: ... def setImpurity(self, value: str) -> DecisionTreeClassifier: ... + def setCheckpointInterval(self, value: int) -> DecisionTreeClassifier: ... + def setSeed(self, value: int) -> DecisionTreeClassifier: ... + def setWeightCol(self, value: str) -> DecisionTreeClassifier: ... class DecisionTreeClassificationModel(_DecisionTreeModel, JavaProbabilisticClassificationModel[Vector], _DecisionTreeClassifierParams, JavaMLWritable, JavaMLReadable[DecisionTreeClassificationModel]): @property From 26a3e8ff76506606fd852cf4af276cc44442a852 Mon Sep 17 00:00:00 2001 From: zero323 Date: Sun, 12 Jan 2020 23:46:28 +0100 Subject: [PATCH 05/15] Update ml.classification.RandomForestClassifier --- third_party/3/pyspark/ml/classification.pyi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/third_party/3/pyspark/ml/classification.pyi b/third_party/3/pyspark/ml/classification.pyi index 7155bf9e..8ef91920 100644 --- a/third_party/3/pyspark/ml/classification.pyi +++ b/third_party/3/pyspark/ml/classification.pyi @@ -196,6 +196,8 @@ class RandomForestClassifier(JavaProbabilisticClassifier[RandomForestClassificat def setNumTrees(self, value: int) -> RandomForestClassifier: ... def setSubsamplingRate(self, value: float) -> RandomForestClassifier: ... def setFeatureSubsetStrategy(self, value: str) -> RandomForestClassifier: ... + def setSeed(self, value: int) -> RandomForestClassifier: ... + def setCheckpointInterval(self, value: int) -> RandomForestClassifier: ... class RandomForestClassificationModel(_TreeEnsembleModel, JavaProbabilisticClassificationModel[Vector], _RandomForestClassifierParams, JavaMLWritable, JavaMLReadable[RandomForestClassificationModel]): @property From 074d0f73e8af86d39c5100d2d7f9c45f9546c9f0 Mon Sep 17 00:00:00 2001 From: zero323 Date: Sun, 12 Jan 2020 23:49:32 +0100 Subject: [PATCH 06/15] Update ml.classification.GBTClassifier --- third_party/3/pyspark/ml/classification.pyi | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/third_party/3/pyspark/ml/classification.pyi b/third_party/3/pyspark/ml/classification.pyi index 8ef91920..28f1a20a 100644 --- a/third_party/3/pyspark/ml/classification.pyi +++ b/third_party/3/pyspark/ml/classification.pyi @@ -197,7 +197,7 @@ class RandomForestClassifier(JavaProbabilisticClassifier[RandomForestClassificat def setSubsamplingRate(self, value: float) -> RandomForestClassifier: ... def setFeatureSubsetStrategy(self, value: str) -> RandomForestClassifier: ... def setSeed(self, value: int) -> RandomForestClassifier: ... - def setCheckpointInterval(self, value: int) -> RandomForestClassifier: ... + def setCheckpointInterval(self, value: int) -> RandomForestClassifier: ... class RandomForestClassificationModel(_TreeEnsembleModel, JavaProbabilisticClassificationModel[Vector], _RandomForestClassifierParams, JavaMLWritable, JavaMLReadable[RandomForestClassificationModel]): @property @@ -224,6 +224,10 @@ class GBTClassifier(JavaProbabilisticClassifier[GBTClassificationModel], _GBTCla def setSubsamplingRate(self, value: float) -> GBTClassifier: ... def setFeatureSubsetStrategy(self, value: str) -> GBTClassifier: ... def setValidationIndicatorCol(self, value: str) -> GBTClassifier: ... + def setMaxIter(self, value: int) -> GBTClassifier: ... + def setCheckpointInterval(self, value: int) -> GBTClassifier: ... + def setSeed(self, value: int) -> GBTClassifier: ... + def setStepSize(self, value: float) -> GBTClassifier: ... class GBTClassificationModel(_TreeEnsembleModel, JavaProbabilisticClassificationModel[Vector], _GBTClassifierParams, JavaMLWritable, JavaMLReadable[GBTClassificationModel]): @property From f105d139e9ed9038c084dd9ee6844293841d6c0c Mon Sep 17 00:00:00 2001 From: zero323 Date: Sun, 12 Jan 2020 23:50:46 +0100 Subject: [PATCH 07/15] Update ml.classification.NaiveBayes --- third_party/3/pyspark/ml/classification.pyi | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/3/pyspark/ml/classification.pyi b/third_party/3/pyspark/ml/classification.pyi index 28f1a20a..a566c340 100644 --- a/third_party/3/pyspark/ml/classification.pyi +++ b/third_party/3/pyspark/ml/classification.pyi @@ -247,6 +247,7 @@ class NaiveBayes(JavaProbabilisticClassifier[NaiveBayesModel], _NaiveBayesParams def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., probabilityCol: str = ..., rawPredictionCol: str = ..., smoothing: float = ..., modelType: str = ..., thresholds: Optional[List[float]] = ..., weightCol: Optional[str] = ...) -> NaiveBayes: ... def setSmoothing(self, value: float) -> NaiveBayes: ... def setModelType(self, value: str) -> NaiveBayes: ... + def setWeightCol(self, value: str) -> NaiveBayes: ... class NaiveBayesModel(JavaProbabilisticClassificationModel[Vector], _NaiveBayesParams, JavaMLWritable, JavaMLReadable[NaiveBayesModel]): @property From 5ad7d60fbb00179de127ac983247b3104f8a312f Mon Sep 17 00:00:00 2001 From: zero323 Date: Sun, 12 Jan 2020 23:58:18 +0100 Subject: [PATCH 08/15] Update ml.classification.MultilayerPerceptronClassifier --- third_party/3/pyspark/ml/classification.pyi | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/third_party/3/pyspark/ml/classification.pyi b/third_party/3/pyspark/ml/classification.pyi index a566c340..d0e796c2 100644 --- a/third_party/3/pyspark/ml/classification.pyi +++ b/third_party/3/pyspark/ml/classification.pyi @@ -270,8 +270,12 @@ class MultilayerPerceptronClassifier(JavaProbabilisticClassifier[MultilayerPerce def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxIter: int = ..., tol: float = ..., seed: Optional[int] = ..., layers: Optional[List[int]] = ..., blockSize: int = ..., stepSize: float = ..., solver: str = ..., initialWeights: Optional[Vector] = ..., probabilityCol: str = ..., rawPredictionCol: str = ...) -> MultilayerPerceptronClassifier: ... def setLayers(self, value: List[int]) -> MultilayerPerceptronClassifier: ... def setBlockSize(self, value: int) -> MultilayerPerceptronClassifier: ... - def setStepSize(self, value: float) -> MultilayerPerceptronClassifier: ... def setInitialWeights(self, value: Vector) -> MultilayerPerceptronClassifier: ... + def setMaxIter(self, value: int) -> MultilayerPerceptronClassifier: ... + def setSeed(self, value: int) -> MultilayerPerceptronClassifier: ... + def setTol(self, value: float) -> MultilayerPerceptronClassifier: ... + def setStepSize(self, value: float) -> MultilayerPerceptronClassifier: ... + def setSolver(self, value: str) -> MultilayerPerceptronClassifier: ... class MultilayerPerceptronClassificationModel(JavaProbabilisticClassificationModel[Vector], JavaMLWritable, JavaMLReadable[MultilayerPerceptronClassificationModel]): @property From 3ac009161846a40560ef975bddade4a625c61b2d Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 13 Jan 2020 00:02:44 +0100 Subject: [PATCH 09/15] Update ml.classification.OneVsRest --- third_party/3/pyspark/ml/classification.pyi | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/third_party/3/pyspark/ml/classification.pyi b/third_party/3/pyspark/ml/classification.pyi index d0e796c2..68d7063d 100644 --- a/third_party/3/pyspark/ml/classification.pyi +++ b/third_party/3/pyspark/ml/classification.pyi @@ -291,10 +291,18 @@ class OneVsRest(Estimator[OneVsRestModel], _OneVsRestParams, HasParallelism, Jav def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., rawPredictionCol: str = ..., classifier: Optional[Estimator[M]] = ..., weightCol: Optional[str] = ..., parallelism: int = ...) -> None: ... def setParams(self, *, featuresCol: Optional[str] = ..., labelCol: Optional[str] = ..., predictionCol: Optional[str] = ..., rawPredictionCol: str = ..., classifier: Optional[Estimator[M]] = ..., weightCol: Optional[str] = ..., parallelism: int = ...) -> OneVsRest: ... def setClassifier(self, value: Estimator[M]) -> OneVsRest: ... + def setLabelCol(self, value: str) -> OneVsRest: ... + def setFeaturesCol(self, value: str) -> OneVsRest: ... + def setPredictionCol(self, value: str) -> OneVsRest: ... + def setRawPredictionCol(self, value: str) -> OneVsRest: ... + def setWeightCol(self, value: str) -> OneVsRest: ... + def setParallelism(self, value: int) -> OneVsRest: ... def copy(self, extra: Optional[ParamMap] = ...) -> OneVsRest: ... class OneVsRestModel(Model, _OneVsRestParams, JavaMLReadable[OneVsRestModel], JavaMLWritable): models: List[Transformer] def __init__(self, models: List[Transformer]) -> None: ... - def setClassifier(self, value: Estimator[M]) -> OneVsRest: ... + def setFeaturesCol(self, value: str) -> OneVsRestModel: ... + def setPredictionCol(self, value: str) -> OneVsRestModel: ... + def setRawPredictionCol(self, value: str) -> OneVsRestModel: ... def copy(self, extra: Optional[ParamMap] = ...) -> OneVsRestModel: ... From 17b45570f65df6a98b044defbf867fd673239ac4 Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 13 Jan 2020 16:55:29 +0100 Subject: [PATCH 10/15] Update ml.clustering.GaussianMixture annotations --- third_party/3/pyspark/ml/clustering.pyi | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/third_party/3/pyspark/ml/clustering.pyi b/third_party/3/pyspark/ml/clustering.pyi index f3b33277..a239e92e 100644 --- a/third_party/3/pyspark/ml/clustering.pyi +++ b/third_party/3/pyspark/ml/clustering.pyi @@ -32,6 +32,9 @@ class _GaussianMixtureParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionC def getK(self) -> int: ... class GaussianMixtureModel(JavaModel, _GaussianMixtureParams, JavaMLWritable, JavaMLReadable[GaussianMixtureModel], HasTrainingSummary[GaussianMixtureSummary]): + def setFeaturesCol(self, value: str) -> GaussianMixtureModel: ... + def setPredictionCol(self, value: str) -> GaussianMixtureModel: ... + def setProbabilityCol(self, value: str) -> GaussianMixtureModel: ... @property def weights(self) -> List[float]: ... @property @@ -45,6 +48,14 @@ class GaussianMixture(JavaEstimator[GaussianMixtureModel], _GaussianMixtureParam def __init__(self, *, featuresCol: str = ..., predictionCol: str = ..., k: int = ..., probabilityCol: str = ..., tol: float = ..., maxIter: int = ..., seed: Optional[int] = ...) -> None: ... def setParams(self, *, featuresCol: str = ..., predictionCol: str = ..., k: int = ..., probabilityCol: str = ..., tol: float = ..., maxIter: int = ..., seed: Optional[int] = ...) -> GaussianMixture: ... def setK(self, value: int) -> GaussianMixture: ... + def setMaxIter(self, value: int) -> GaussianMixture: ... + def setFeaturesCol(self, value: str) -> GaussianMixture: ... + def setPredictionCol(self, value: str) -> GaussianMixture: ... + def setProbabilityCol(self, value: str) -> GaussianMixture: ... + def setWeightCol(self, value: str) -> GaussianMixture: ... + def setSeed(self, value: int) -> GaussianMixture: ... + def setTol(self, value: float) -> GaussianMixture: ... + def setAggregationDepth(self, value: int) -> GaussianMixture: ... class GaussianMixtureSummary(ClusteringSummary): @property From 1be1ed436c4ed680b39be5483690bf2d2c0e071d Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 13 Jan 2020 16:59:05 +0100 Subject: [PATCH 11/15] Update ml.clustering.KMeans annotations --- third_party/3/pyspark/ml/clustering.pyi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/third_party/3/pyspark/ml/clustering.pyi b/third_party/3/pyspark/ml/clustering.pyi index a239e92e..ac808a36 100644 --- a/third_party/3/pyspark/ml/clustering.pyi +++ b/third_party/3/pyspark/ml/clustering.pyi @@ -77,6 +77,8 @@ class _KMeansParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol, HasTo def getInitSteps(self) -> int: ... class KMeansModel(JavaModel, _KMeansParams, GeneralJavaMLWritable, JavaMLReadable[KMeansModel], HasTrainingSummary[KMeansSummary]): + def setFeaturesCol(self, value: str) -> KMeansModel: ... + def setPredictionCol(self, value: str) -> KMeansModel: ... def clusterCenters(self) -> List[ndarray]: ... @property def summary(self) -> KMeansSummary: ... @@ -89,6 +91,12 @@ class KMeans(JavaEstimator[KMeansModel], _KMeansParams, JavaMLWritable, JavaMLRe def setInitMode(self, value: str) -> KMeans: ... def setInitSteps(self, value: int) -> KMeans: ... def setDistanceMeasure(self, value: str) -> KMeans: ... + def setMaxIter(self, value: int) -> KMeans: ... + def setFeaturesCol(self, value: str) -> KMeans: ... + def setPredictionCol(self, value: str) -> KMeans: ... + def setSeed(self, value: int) -> KMeans: ... + def setTol(self, value: float) -> KMeans: ... + def setWeightCol(self, value: str) -> KMeans: ... class _BisectingKMeansParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol, HasDistanceMeasure): k: Param[int] From 074b23a10ea0ce409122fa02c2cbe1117be2ff03 Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 13 Jan 2020 17:01:08 +0100 Subject: [PATCH 12/15] Update ml.clustering.BisectingKMeans annotations --- third_party/3/pyspark/ml/clustering.pyi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/third_party/3/pyspark/ml/clustering.pyi b/third_party/3/pyspark/ml/clustering.pyi index ac808a36..b862a93c 100644 --- a/third_party/3/pyspark/ml/clustering.pyi +++ b/third_party/3/pyspark/ml/clustering.pyi @@ -105,6 +105,8 @@ class _BisectingKMeansParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionC def getMinDivisibleClusterSize(self) -> float: ... class BisectingKMeansModel(JavaModel, _BisectingKMeansParams, JavaMLWritable, JavaMLReadable[BisectingKMeansModel], HasTrainingSummary[BisectingKMeansSummary]): + def setFeaturesCol(self, value: str) -> BisectingKMeansModel: ... + def setPredictionCol(self, value: str) -> BisectingKMeansModel: ... def clusterCenters(self) -> List[ndarray]: ... def computeCost(self, dataset: DataFrame) -> float: ... @property @@ -117,6 +119,10 @@ class BisectingKMeans(JavaEstimator[BisectingKMeansModel], _BisectingKMeansParam def setK(self, value: int) -> BisectingKMeans: ... def setMinDivisibleClusterSize(self, value: float) -> BisectingKMeans: ... def setDistanceMeasure(self, value: str) -> BisectingKMeans: ... + def setMaxIter(self, value: int) -> BisectingKMeans: ... + def setFeaturesCol(self, value: str) -> BisectingKMeans: ... + def setPredictionCol(self, value: str) -> BisectingKMeans: ... + def setSeed(self, value: int) -> BisectingKMeans: ... class BisectingKMeansSummary(ClusteringSummary): @property From d8546189cabe4a18f2a66da79dad5dff8194e693 Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 13 Jan 2020 17:26:47 +0100 Subject: [PATCH 13/15] Update ml.clustering.LDA annotations --- third_party/3/pyspark/ml/clustering.pyi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/third_party/3/pyspark/ml/clustering.pyi b/third_party/3/pyspark/ml/clustering.pyi index b862a93c..00c8ea16 100644 --- a/third_party/3/pyspark/ml/clustering.pyi +++ b/third_party/3/pyspark/ml/clustering.pyi @@ -151,6 +151,9 @@ class _LDAParams(HasMaxIter, HasFeaturesCol, HasSeed, HasCheckpointInterval): def getKeepLastCheckpoint(self) -> bool: ... class LDAModel(JavaModel, _LDAParams): + def setFeaturesCol(self, value: str) -> LDAModel: ... + def setSeed(self, value: int) -> LDAModel: ... + def setTopicDistributionCol(self, value: str) -> LDAModel: ... def isDistributed(self) -> bool: ... def vocabSize(self) -> int: ... def topicsMatrix(self) -> Matrix: ... @@ -170,6 +173,9 @@ class LocalLDAModel(LDAModel, JavaMLReadable[LocalLDAModel], JavaMLWritable): .. class LDA(JavaEstimator[LDAModel], _LDAParams, JavaMLReadable[LDA], JavaMLWritable): def __init__(self, *, featuresCol: str = ..., maxIter: int = ..., seed: Optional[int] = ..., checkpointInterval: int = ..., k: int = ..., optimizer: str = ..., learningOffset: float = ..., learningDecay: float = ..., subsamplingRate: float = ..., optimizeDocConcentration: bool = ..., docConcentration: Optional[List[float]] = ..., topicConcentration: Optional[float] = ..., topicDistributionCol: str = ..., keepLastCheckpoint: bool = ...) -> None: ... def setParams(self, *, featuresCol: str = ..., maxIter: int = ..., seed: Optional[int] = ..., checkpointInterval: int = ..., k: int = ..., optimizer: str = ..., learningOffset: float = ..., learningDecay: float = ..., subsamplingRate: float = ..., optimizeDocConcentration: bool = ..., docConcentration: Optional[List[float]] = ..., topicConcentration: Optional[float] = ..., topicDistributionCol: str = ..., keepLastCheckpoint: bool = ...) -> LDA: ... + def setCheckpointInterval(self, value: int) -> LDA: ... + def setSeed(self, value: int) -> LDA: ... + def setK(self, value: int) -> LDA: ... def setOptimizer(self, value: str) -> LDA: ... def setLearningOffset(self, value: float) -> LDA: ... def setLearningDecay(self, value: float) -> LDA: ... @@ -179,6 +185,8 @@ class LDA(JavaEstimator[LDAModel], _LDAParams, JavaMLReadable[LDA], JavaMLWritab def setTopicConcentration(self, value: float) -> LDA: ... def setTopicDistributionCol(self, value: str) -> LDA: ... def setKeepLastCheckpoint(self, value: bool) -> LDA: ... + def setMaxIter(self, value: int) -> LDA: ... + def setFeaturesCol(self, value: str) -> LDA: ... class _PowerIterationClusteringParams(HasMaxIter, HasWeightCol): k: Param[int] From 88cad157c5f5b2f3a4651653307e1cdbafff65e1 Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 13 Jan 2020 17:35:11 +0100 Subject: [PATCH 14/15] Update ml.clustering.PowerIterationClustering annotations --- third_party/3/pyspark/ml/clustering.pyi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/third_party/3/pyspark/ml/clustering.pyi b/third_party/3/pyspark/ml/clustering.pyi index 00c8ea16..2710d58d 100644 --- a/third_party/3/pyspark/ml/clustering.pyi +++ b/third_party/3/pyspark/ml/clustering.pyi @@ -205,4 +205,6 @@ class PowerIterationClustering(_PowerIterationClusteringParams, JavaParams, Java def setInitMode(self, value: str) -> PowerIterationClustering: ... def setSrcCol(self, value: str) -> str: ... def setDstCol(self, value: str) -> PowerIterationClustering: ... + def setMaxIter(self, value: int) -> PowerIterationClustering: ... + def setWeightCol(self, value: str) -> PowerIterationClustering: ... def assignClusters(self, dataset: DataFrame) -> DataFrame: ... From 689450236861612e5dcf2ad8764d47e545dc9c1f Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 13 Jan 2020 19:32:43 +0100 Subject: [PATCH 15/15] Update ml.evaluation annotations --- third_party/3/pyspark/ml/evaluation.pyi | 35 +++++++++++++++++++------ 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/third_party/3/pyspark/ml/evaluation.pyi b/third_party/3/pyspark/ml/evaluation.pyi index e3ba13b2..38a68849 100644 --- a/third_party/3/pyspark/ml/evaluation.pyi +++ b/third_party/3/pyspark/ml/evaluation.pyi @@ -7,7 +7,7 @@ from pyspark.ml._typing import ParamMap, BinaryClassificationEvaluatorMetricType from pyspark.ml.wrapper import JavaParams from pyspark.ml.param import Param, Params -from pyspark.ml.param.shared import HasFeaturesCol, HasLabelCol, HasPredictionCol, HasRawPredictionCol, HasWeightCol +from pyspark.ml.param.shared import HasFeaturesCol, HasLabelCol, HasPredictionCol, HasProbabilityCol, HasRawPredictionCol, HasWeightCol from pyspark.ml.util import JavaMLReadable, JavaMLWritable class Evaluator(Params): @@ -25,9 +25,12 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction def __init__(self, *, rawPredictionCol: str = ..., labelCol: str = ..., metricName: BinaryClassificationEvaluatorMetricType = ..., weightCol: Optional[str] = ..., numBins: int = ...) -> None: ... def setMetricName(self, value: BinaryClassificationEvaluatorMetricType) -> BinaryClassificationEvaluator: ... def getMetricName(self) -> BinaryClassificationEvaluatorMetricType: ... - def setParams(self, *, rawPredictionCol: str = ..., labelCol: str = ..., metricName: BinaryClassificationEvaluatorMetricType = ..., weightCol: Optional[str] = ..., numBins: int = ...) -> BinaryClassificationEvaluator: ... def setNumBins(self, value: int) -> BinaryClassificationEvaluator: ... def getNumBins(self) -> int: ... + def setLabelCol(self, value: str) -> BinaryClassificationEvaluator: ... + def setRawPredictionCol(self, value: str) -> BinaryClassificationEvaluator: ... + def setWeightCol(self, value: str) -> BinaryClassificationEvaluator: ... +def setParams(self, *, rawPredictionCol: str = ..., labelCol: str = ..., metricName: BinaryClassificationEvaluatorMetricType = ..., weightCol: Optional[str] = ..., numBins: int = ...) -> BinaryClassificationEvaluator: ... class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeightCol, JavaMLReadable[RegressionEvaluator], JavaMLWritable): metricName: Param[RegressionEvaluatorMetricType] @@ -35,22 +38,32 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeigh def __init__(self, *, predictionCol: str = ..., labelCol: str = ..., metricName: RegressionEvaluatorMetricType = ..., weightCol: Optional[str] = ..., throughOrigin: bool = ...) -> None: ... def setMetricName(self, value: RegressionEvaluatorMetricType) -> RegressionEvaluator: ... def getMetricName(self) -> RegressionEvaluatorMetricType: ... - def setParams(self, *, predictionCol: str = ..., labelCol: str = ..., metricName: RegressionEvaluatorMetricType = ..., weightCol: Optional[str] = ..., throughOrigin: bool = ...) -> RegressionEvaluator: ... def setThroughOrigin(self, value: bool) -> RegressionEvaluator: ... def getThroughOrigin(self) -> bool: ... + def setLabelCol(self, value: str) -> RegressionEvaluator: ... + def setPredictionCol(self, value: str) -> RegressionEvaluator: ... + def setWeightCol(self, value: str) -> RegressionEvaluator: ... + def setParams(self, *, predictionCol: str = ..., labelCol: str = ..., metricName: RegressionEvaluatorMetricType = ..., weightCol: Optional[str] = ..., throughOrigin: bool = ...) -> RegressionEvaluator: ... -class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeightCol, JavaMLReadable[MulticlassClassificationEvaluator], JavaMLWritable): +class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeightCol, HasProbabilityCol, JavaMLReadable[MulticlassClassificationEvaluator], JavaMLWritable): metricName: Param[MulticlassClassificationEvaluatorMetricType] metricLabel: Param[float] beta: Param[float] - def __init__(self, *, predictionCol: str = ..., labelCol: str = ..., metricName: MulticlassClassificationEvaluatorMetricType = ..., weightCol: Optional[str] = ..., metricLabel: float = ..., beta: float = ...) -> None: ... + eps: Param[float] + def __init__(self, *, predictionCol: str = ..., labelCol: str = ..., metricName: MulticlassClassificationEvaluatorMetricType = ..., weightCol: Optional[str] = ..., metricLabel: float = ..., beta: float = ..., probabilityCol: str = ..., eps: float = ...) -> None: ... def setMetricName(self, value: MulticlassClassificationEvaluatorMetricType) -> MulticlassClassificationEvaluator: ... def getMetricName(self) -> MulticlassClassificationEvaluatorMetricType: ... def setMetricLabel(self, value: float) -> MulticlassClassificationEvaluator: ... def getMetricLabel(self) -> float: ... def setBeta(self, value: float) -> MulticlassClassificationEvaluator: ... def getBeta(self) -> float: ... - def setParams(self, *, predictionCol: str = ..., labelCol: str = ..., metricName: MulticlassClassificationEvaluatorMetricType = ..., weightCol: Optional[str] = ..., metricLabel: float = ..., beta: float = ...) -> MulticlassClassificationEvaluator: ... + def setEps(self, value: float) -> MulticlassClassificationEvaluator: ... + def getEps(self) -> float: ... + def setLabelCol(self, value: str) -> MulticlassClassificationEvaluator: ... + def setPredictionCol(self, value: str) -> MulticlassClassificationEvaluator: ... + def setProbabilityCol(self, value: str) -> MulticlassClassificationEvaluator: ... + def setWeightCol(self, value: str) -> MulticlassClassificationEvaluator: ... + def setParams(self, *, predictionCol: str = ..., labelCol: str = ..., metricName: MulticlassClassificationEvaluatorMetricType = ..., weightCol: Optional[str] = ..., metricLabel: float = ..., beta: float = ..., probabilityCol: str = ..., eps: float = ...) -> MulticlassClassificationEvaluator: ... class MultilabelClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, JavaMLReadable[MultilabelClassificationEvaluator], JavaMLWritable): metricName: Param[MultilabelClassificationEvaluatorMetricType] @@ -60,17 +73,21 @@ class MultilabelClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio def getMetricName(self) -> MultilabelClassificationEvaluatorMetricType: ... def setMetricLabel(self, value: float) -> MultilabelClassificationEvaluator: ... def getMetricLabel(self) -> float: ... + def setLabelCol(self, value: str) -> MultilabelClassificationEvaluator: ... + def setPredictionCol(self, value: str) -> MultilabelClassificationEvaluator: ... def setParams(self, *, predictionCol: str = ..., labelCol: str = ..., metricName: MultilabelClassificationEvaluatorMetricType = ..., metricLabel: float = ...) -> MultilabelClassificationEvaluator: ... class ClusteringEvaluator(JavaEvaluator, HasPredictionCol, HasFeaturesCol, JavaMLReadable[ClusteringEvaluator], JavaMLWritable): metricName: Param[ClusteringEvaluatorMetricType] distanceMeasure: Param[str] def __init__(self, *, predictionCol: str = ..., featuresCol: str = ..., metricName: ClusteringEvaluatorMetricType = ..., distanceMeasure: str = ...) -> None: ... + def setParams(self, *, predictionCol: str = ..., featuresCol: str = ..., metricName: ClusteringEvaluatorMetricType = ..., distanceMeasure: str = ...) -> ClusteringEvaluator: ... def setMetricName(self, value: ClusteringEvaluatorMetricType) -> ClusteringEvaluator: ... def getMetricName(self) -> ClusteringEvaluatorMetricType: ... - def setParams(self, *, predictionCol: str = ..., featuresCol: str = ..., metricName: ClusteringEvaluatorMetricType = ..., distanceMeasure: str = ...) -> MulticlassClassificationEvaluator: ... - def setDistanceMeasure(self, value: str) -> MulticlassClassificationEvaluator: ... + def setDistanceMeasure(self, value: str) -> ClusteringEvaluator: ... def getDistanceMeasure(self) -> str: ... + def setFeaturesCol(self, value: str) -> ClusteringEvaluator: ... + def setPredictionCol(self, value: str) -> ClusteringEvaluator: ... class RankingEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, JavaMLReadable[RankingEvaluator], JavaMLWritable): metricName: Param[RankingEvaluatorMetricType] @@ -80,4 +97,6 @@ class RankingEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, JavaMLReada def getMetricName(self) -> RankingEvaluatorMetricType: ... def setK(self, value: int) -> RankingEvaluator: ... def getK(self) -> int: ... + def setLabelCol(self, value: str) -> RankingEvaluator: ... + def setPredictionCol(self, value: str) -> RankingEvaluator: ... def setParams(self, *, predictionCol: str = ..., labelCol: str = ..., metricName: RankingEvaluatorMetricType = ..., k: int = ...) -> RankingEvaluator: ...