From 55c426e1bece0c9ea28359698dd865742af3f9db Mon Sep 17 00:00:00 2001 From: zero323 Date: Wed, 2 Oct 2019 16:34:57 +0200 Subject: [PATCH 1/3] Reflect changes introduced with SPARK-28985 --- third_party/3/pyspark/ml/_typing.py | 4 +- third_party/3/pyspark/ml/classification.pyi | 55 ++++++++++++++------- third_party/3/pyspark/ml/regression.pyi | 34 ++++++------- third_party/3/pyspark/ml/util.pyi | 4 -- third_party/3/pyspark/ml/wrapper.pyi | 20 ++++++-- 5 files changed, 73 insertions(+), 44 deletions(-) diff --git a/third_party/3/pyspark/ml/_typing.py b/third_party/3/pyspark/ml/_typing.py index 678cb4bd..4297fcb1 100644 --- a/third_party/3/pyspark/ml/_typing.py +++ b/third_party/3/pyspark/ml/_typing.py @@ -1,8 +1,9 @@ from typing import Any, Dict, TypeVar, Union -import pyspark.ml.param import pyspark.ml.base +import pyspark.ml.param import pyspark.ml.util +import pyspark.ml.wrapper ParamMap = Dict[pyspark.ml.param.Param, Any] PipelineStage = Union[pyspark.ml.base.Estimator, pyspark.ml.base.Transformer] @@ -10,3 +11,4 @@ T = TypeVar("T") P = TypeVar("P", bound=pyspark.ml.param.Params) M = TypeVar("M", bound=pyspark.ml.base.Transformer) +JM = TypeVar("JM", bound=pyspark.ml.wrapper.JavaTransformer) diff --git a/third_party/3/pyspark/ml/classification.pyi b/third_party/3/pyspark/ml/classification.pyi index 672bfb49..09acc8ec 100644 --- a/third_party/3/pyspark/ml/classification.pyi +++ b/third_party/3/pyspark/ml/classification.pyi @@ -1,32 +1,48 @@ # Stubs for pyspark.ml.classification (Python 3) +import abc from typing import Any, Dict, List, Optional, TypeVar -from pyspark.ml._typing import M, P, ParamMap +from pyspark.ml._typing import JM, M, P, T, ParamMap from pyspark.ml.base import Estimator, Model, Transformer from pyspark.ml.linalg import Matrix, Vector from pyspark.ml.param.shared import * from pyspark.ml.regression import DecisionTreeModel, DecisionTreeParams, DecisionTreeRegressionModel, GBTParams, HasVarianceImpurity, RandomForestParams, TreeEnsembleModel from pyspark.ml.util import * -from pyspark.ml.wrapper import JavaEstimator, JavaModel -from pyspark.ml.wrapper import JavaWrapper +from pyspark.ml.wrapper import JavaPredictionModel, JavaPredictor, JavaPredictorParams, JavaWrapper, JavaTransformer from pyspark.sql.dataframe import DataFrame -class JavaClassificationModel(JavaPredictionModel): +class JavaClassifierParams(HasRawPredictionCol, JavaPredictorParams): ... + +class JavaClassifier(JavaPredictor[JM], JavaClassifierParams, metaclass=abc.ABCMeta): + def setRawPredictionCol(self: P, value: str) -> P: ... + +class JavaClassificationModel(JavaPredictionModel[T], JavaClassifierParams): + def setRawPredictionCol(self: P, value: str) -> P: ... @property def numClasses(self) -> int: ... -class LinearSVC(JavaEstimator[LinearSVCModel], HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter, HasRegParam, HasTol, HasRawPredictionCol, HasFitIntercept, HasStandardization, HasThreshold, HasWeightCol, HasAggregationDepth, JavaMLWritable, JavaMLReadable[LinearSVC]): +class JavaProbabilisticClassifierParams(HasProbabilityCol, HasThresholds, JavaClassifierParams): ... + +class JavaProbabilisticClassifier(JavaClassifier[JM], JavaProbabilisticClassifierParams, metaclass=abc.ABCMeta): + def setProbabilityCol(self: P, value: str) -> P: ... + def setThresholds(self: P, value: List[float]) -> P: ... + +class JavaProbabilisticClassificationModel(JavaClassificationModel[T], JavaProbabilisticClassifierParams): + def setProbabilityCol(self: P, value: str) -> P: ... + def setThresholds(self, value: List[float]) -> P: ... + +class LinearSVC(JavaClassifier[LinearSVCModel], HasMaxIter, HasRegParam, HasTol, HasFitIntercept, HasStandardization, HasWeightCol, HasAggregationDepth, HasThreshold, JavaMLWritable, JavaMLReadable[LinearSVC]): def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxIter: int = ..., regParam: float = ..., tol: float = ..., rawPredictionCol: str = ..., fitIntercept: bool = ..., standardization: bool = ..., threshold: float = ..., weightCol: Optional[str] = ..., aggregationDepth: int = ...) -> None: ... def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxIter: int = ..., regParam: float = ..., tol: float = ..., rawPredictionCol: str = ..., fitIntercept: bool = ..., standardization: bool = ..., threshold: float = ..., weightCol: Optional[str] = ..., aggregationDepth: int = ...) -> LinearSVC: ... -class LinearSVCModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable[LinearSVCModel]): +class LinearSVCModel(JavaClassificationModel[Vector], JavaMLWritable, JavaMLReadable[LinearSVCModel]): @property def coefficients(self) -> Vector: ... @property def intercept(self) -> float: ... -class LogisticRegression(JavaEstimator[LogisticRegressionModel], HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter, HasRegParam, HasTol, HasProbabilityCol, HasRawPredictionCol, HasElasticNetParam, HasFitIntercept, HasStandardization, HasThresholds, HasWeightCol, HasAggregationDepth, JavaMLWritable, JavaMLReadable[LogisticRegression]): +class LogisticRegression(JavaProbabilisticClassifier[LogisticRegressionModel], HasMaxIter, HasRegParam, HasTol, HasElasticNetParam, HasFitIntercept, HasStandardization, HasThresholds, HasWeightCol, HasAggregationDepth, JavaMLWritable, JavaMLReadable[LogisticRegression]): threshold: Param[float] family: Param[str] lowerBoundsOnCoefficients: Param[Matrix] @@ -50,7 +66,7 @@ class LogisticRegression(JavaEstimator[LogisticRegressionModel], HasFeaturesCol, def setUpperBoundsOnIntercepts(self, value: Vector) -> LogisticRegression: ... def getUpperBoundsOnIntercepts(self) -> Vector: ... -class LogisticRegressionModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable[LogisticRegressionModel], HasTrainingSummary): +class LogisticRegressionModel(JavaProbabilisticClassificationModel[Vector], JavaMLWritable, JavaMLReadable[LogisticRegressionModel], HasTrainingSummary[LogisticRegressionTrainingSummary]): @property def coefficients(self) -> Vector: ... @property @@ -127,7 +143,7 @@ class TreeClassifierParams: def __init__(self) -> None: ... def getImpurity(self) -> str: ... -class DecisionTreeClassifier(JavaEstimator[DecisionTreeClassificationModel], HasFeaturesCol, HasLabelCol, HasWeightCol, HasPredictionCol, HasProbabilityCol, HasRawPredictionCol, DecisionTreeParams, TreeClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable[DecisionTreeClassifier]): +class DecisionTreeClassifier(JavaProbabilisticClassifier[DecisionTreeClassificationModel], HasWeightCol, DecisionTreeParams, TreeClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable[DecisionTreeClassifier]): def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., probabilityCol: str = ..., rawPredictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., seed: Optional[int] = ..., weightCol: Optional[str] = ..., leafCol: str = ...) -> None: ... def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., probabilityCol: str = ..., rawPredictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., seed: Optional[int] = ..., weightCol: Optional[str] = ..., leafCol: str = ...) -> DecisionTreeClassifier: ... def setMaxDepth(self, value: int) -> DecisionTreeClassifier: ... @@ -138,11 +154,11 @@ class DecisionTreeClassifier(JavaEstimator[DecisionTreeClassificationModel], Has def setCacheNodeIds(self, value: bool) -> DecisionTreeClassifier: ... def setImpurity(self, value: str) -> DecisionTreeClassifier: ... -class DecisionTreeClassificationModel(DecisionTreeModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable[DecisionTreeClassificationModel]): +class DecisionTreeClassificationModel(DecisionTreeModel, JavaProbabilisticClassificationModel[Vector], JavaMLWritable, JavaMLReadable[DecisionTreeClassificationModel]): @property def featureImportances(self) -> Vector: ... -class RandomForestClassifier(JavaEstimator[RandomForestClassificationModel], HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed, HasRawPredictionCol, HasProbabilityCol, RandomForestParams, TreeClassifierParams, HasCheckpointInterval, JavaMLWritable, JavaMLReadable[RandomForestClassifier]): +class RandomForestClassifier(JavaProbabilisticClassifier[RandomForestClassificationModel], HasSeed, RandomForestParams, TreeClassifierParams, HasCheckpointInterval, JavaMLWritable, JavaMLReadable[RandomForestClassifier]): def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., probabilityCol: str = ..., rawPredictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., numTrees: int = ..., featureSubsetStrategy: str = ..., seed: Optional[int] = ..., subsamplingRate: float = ..., leafCol: str = ...) -> None: ... def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., probabilityCol: str = ..., rawPredictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., seed: Optional[int] = ..., impurity: str = ..., numTrees: int = ..., featureSubsetStrategy: str = ..., subsamplingRate: float = ..., leafCol: str = ...) -> RandomForestClassifier: ... def setMaxDepth(self, value: int) -> RandomForestClassifier: ... @@ -156,7 +172,7 @@ class RandomForestClassifier(JavaEstimator[RandomForestClassificationModel], Has def setSubsamplingRate(self, value: float) -> RandomForestClassifier: ... def setFeatureSubsetStrategy(self, value: str) -> RandomForestClassifier: ... -class RandomForestClassificationModel(TreeEnsembleModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable[RandomForestClassificationModel]): +class RandomForestClassificationModel(TreeEnsembleModel, JavaProbabilisticClassificationModel[Vector], JavaMLWritable, JavaMLReadable[RandomForestClassificationModel]): @property def featureImportances(self) -> Vector: ... @property @@ -167,7 +183,7 @@ class GBTClassifierParams(GBTParams, HasVarianceImpurity): lossType: Param[str] def getLossType(self) -> str: ... -class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, GBTClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable[GBTClassifier]): +class GBTClassifier(JavaProbabilisticClassifier[GBTClassificationModel], GBTClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable[GBTClassifier]): def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., lossType: str = ..., maxIter: int = ..., stepSize: float = ..., seed: Optional[int] = ..., subsamplingRate: float = ..., featureSubsetStrategy: str = ..., validationTol: float = ..., validationIndicatorCol: Optional[str] = ..., leafCol: str = ...) -> None: ... def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., lossType: str = ..., maxIter: int = ..., stepSize: float = ..., seed: Optional[int] = ..., subsamplingRate: float = ..., featureSubsetStrategy: str = ..., validationTol: float = ..., validationIndicatorCol: Optional[str] = ..., leafCol: str = ...) -> GBTClassifier: ... def setMaxDepth(self, value: int) -> GBTClassifier: ... @@ -182,14 +198,14 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol def setFeatureSubsetStrategy(self, value: str) -> GBTClassifier: ... def setValidationIndicatorCol(self, value: str) -> GBTClassifier: ... -class GBTClassificationModel(TreeEnsembleModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable[GBTClassificationModel]): +class GBTClassificationModel(TreeEnsembleModel, JavaProbabilisticClassificationModel[Vector], JavaMLWritable, JavaMLReadable[GBTClassificationModel]): @property def featureImportances(self) -> Vector: ... @property def trees(self) -> List[DecisionTreeRegressionModel]: ... def evaluateEachIteration(self, dataset: DataFrame) -> List[float]: ... -class NaiveBayes(JavaEstimator[NaiveBayesModel], HasFeaturesCol, HasLabelCol, HasPredictionCol, HasProbabilityCol, HasRawPredictionCol, HasThresholds, HasWeightCol, JavaMLWritable, JavaMLReadable[NaiveBayes]): +class NaiveBayes(JavaProbabilisticClassifier[NaiveBayesModel], HasThresholds, HasWeightCol, JavaMLWritable, JavaMLReadable[NaiveBayes]): smoothing: Param[float] modelType: Param[str] def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., probabilityCol: str = ..., rawPredictionCol: str = ..., smoothing: float = ..., modelType: str = ..., thresholds: Optional[List[float]] = ..., weightCol: Optional[str] = ...) -> None: ... @@ -199,13 +215,13 @@ class NaiveBayes(JavaEstimator[NaiveBayesModel], HasFeaturesCol, HasLabelCol, Ha def setModelType(self, value: str) -> NaiveBayes: ... def getModelType(self) -> str: ... -class NaiveBayesModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable[NaiveBayesModel]): +class NaiveBayesModel(JavaProbabilisticClassificationModel[Vector], JavaMLWritable, JavaMLReadable[NaiveBayesModel]): @property def pi(self) -> Vector: ... @property def theta(self) -> Matrix: ... -class MultilayerPerceptronClassifier(JavaEstimator[MultilayerPerceptronClassificationModel], HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter, HasTol, HasSeed, HasStepSize, HasSolver, JavaMLWritable, JavaMLReadable[MultilayerPerceptronClassifier], HasProbabilityCol, HasRawPredictionCol): +class MultilayerPerceptronClassifier(JavaProbabilisticClassifier[MultilayerPerceptronClassificationModel], HasMaxIter, HasTol, HasSeed, HasStepSize, HasSolver, JavaMLWritable, JavaMLReadable[MultilayerPerceptronClassifier]): layers: Param[List[int]] blockSize: Param[int] solver: Param[str] @@ -221,13 +237,13 @@ class MultilayerPerceptronClassifier(JavaEstimator[MultilayerPerceptronClassific def setInitialWeights(self, value: Vector) -> MultilayerPerceptronClassifier: ... def getInitialWeights(self) -> Vector: ... -class MultilayerPerceptronClassificationModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable[MultilayerPerceptronClassificationModel]): +class MultilayerPerceptronClassificationModel(JavaProbabilisticClassificationModel[Vector], JavaMLWritable, JavaMLReadable[MultilayerPerceptronClassificationModel]): @property def layers(self) -> List[int]: ... @property def weights(self) -> Vector: ... -class OneVsRestParams(HasFeaturesCol, HasLabelCol, HasWeightCol, HasPredictionCol, HasRawPredictionCol): +class OneVsRestParams(JavaClassifierParams, HasWeightCol): classifier: Param[Estimator] def getClassifier(self) -> Estimator[M]: ... @@ -240,4 +256,5 @@ class OneVsRest(Estimator[OneVsRestModel], OneVsRestParams, HasParallelism, Java class OneVsRestModel(Model, OneVsRestParams, JavaMLReadable[OneVsRestModel], JavaMLWritable): models: List[Transformer] def __init__(self, models: List[Transformer]) -> None: ... + def setClassifier(self, value: Estimator[M]) -> OneVsRest: ... def copy(self, extra: Optional[ParamMap] = ...) -> OneVsRestModel: ... diff --git a/third_party/3/pyspark/ml/regression.pyi b/third_party/3/pyspark/ml/regression.pyi index 964ddda4..6e8191d9 100644 --- a/third_party/3/pyspark/ml/regression.pyi +++ b/third_party/3/pyspark/ml/regression.pyi @@ -1,19 +1,19 @@ # Stubs for pyspark.ml.regression (Python 3) from typing import Any, List, Optional, Sequence -from pyspark.ml._typing import P +from pyspark.ml._typing import P, T from pyspark.ml.param.shared import * from pyspark.ml.linalg import Vector from pyspark.ml.util import * -from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaWrapper +from pyspark.ml.wrapper import JavaModel, JavaPredictionModel, JavaPredictor, JavaWrapper from pyspark.sql.dataframe import DataFrame -class LinearRegression(JavaEstimator[LinearRegressionModel], HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter, HasRegParam, HasTol, HasElasticNetParam, HasFitIntercept, HasStandardization, HasSolver, HasWeightCol, HasAggregationDepth, JavaMLWritable, JavaMLReadable[LinearRegression]): +class LinearRegression(JavaPredictor[LinearRegressionModel], HasMaxIter, HasRegParam, HasTol, HasElasticNetParam, HasFitIntercept, HasStandardization, HasSolver, HasWeightCol, HasAggregationDepth, HasLoss, JavaMLWritable, JavaMLReadable[LinearRegression]): def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxIter: int = ..., regParam: float = ..., elasticNetParam: float = ..., tol: float = ..., fitIntercept: bool = ..., standardization: bool = ..., solver: str = ..., weightCol: Optional[str] = ..., aggregationDepth: int = ...) -> None: ... def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxIter: int = ..., regParam: float = ..., elasticNetParam: float = ..., tol: float = ..., fitIntercept: bool = ..., standardization: bool = ..., solver: str = ..., weightCol: Optional[str] = ..., aggregationDepth: int = ...) -> LinearRegression: ... -class LinearRegressionModel(JavaModel, JavaPredictionModel, GeneralJavaMLWritable, JavaMLReadable[LinearRegressionModel], HasTrainingSummary[LinearRegressionSummary]): +class LinearRegressionModel(JavaPredictionModel[Vector], GeneralJavaMLWritable, JavaMLReadable[LinearRegressionModel], HasTrainingSummary[LinearRegressionSummary]): @property def coefficients(self) -> Vector: ... @property @@ -64,7 +64,7 @@ class LinearRegressionTrainingSummary(LinearRegressionSummary): @property def totalIterations(self) -> int: ... -class IsotonicRegression(JavaEstimator[IsotonicRegressionModel], HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol, JavaMLWritable, JavaMLReadable[IsotonicRegression]): +class IsotonicRegression(JavaPredictor[IsotonicRegressionModel], HasWeightCol, JavaMLWritable, JavaMLReadable[IsotonicRegression]): isotonic: Param[bool] featureIndex: Param[int] def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., weightCol: Optional[str] = ..., isotonic: bool = ..., featureIndex: int = ...) -> None: ... @@ -74,7 +74,7 @@ class IsotonicRegression(JavaEstimator[IsotonicRegressionModel], HasFeaturesCol, def setFeatureIndex(self, value: int) -> IsotonicRegression: ... def getFeatureIndex(self) -> int: ... -class IsotonicRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable[IsotonicRegressionModel]): +class IsotonicRegressionModel(JavaPredictionModel, JavaMLWritable, JavaMLReadable[IsotonicRegressionModel]): @property def boundaries(self) -> Vector: ... @property @@ -135,7 +135,7 @@ class GBTRegressorParams(GBTParams, TreeRegressorParams): lossType: Param[str] def getLossType(self) -> str: ... -class DecisionTreeRegressor(JavaEstimator[DecisionTreeRegressionModel], HasFeaturesCol, HasLabelCol, HasWeightCol, HasPredictionCol, DecisionTreeParams, TreeRegressorParams, HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable[DecisionTreeRegressor], HasVarianceCol): +class DecisionTreeRegressor(JavaPredictor[DecisionTreeRegressionModel], HasWeightCol, DecisionTreeParams, TreeRegressorParams, HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable[DecisionTreeRegressor], HasVarianceCol): def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., seed: Optional[int] = ..., varianceCol: Optional[str] = ..., weightCol: Optional[str] = ..., leafCol: str = ...) -> None: ... def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., seed: Optional[int] = ..., varianceCol: Optional[str] = ..., weightCol: Optional[str] = ..., leafCol: str = ...) -> DecisionTreeRegressor: ... def setMaxDepth(self, value: int) -> DecisionTreeRegressor: ... @@ -146,7 +146,7 @@ class DecisionTreeRegressor(JavaEstimator[DecisionTreeRegressionModel], HasFeatu def setCacheNodeIds(self, value: bool) -> DecisionTreeRegressor: ... def setImpurity(self, value: str) -> DecisionTreeRegressor: ... -class DecisionTreeModel(JavaModel, JavaPredictionModel): +class DecisionTreeModel(JavaPredictionModel[T]): @property def numNodes(self) -> int: ... @property @@ -166,22 +166,22 @@ class TreeEnsembleModel(JavaModel): @property def toDebugString(self) -> str: ... -class DecisionTreeRegressionModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable[DecisionTreeRegressionModel]): +class DecisionTreeRegressionModel(DecisionTreeModel[T], JavaMLWritable, JavaMLReadable[DecisionTreeRegressionModel]): @property def featureImportances(self) -> Vector: ... -class RandomForestRegressor(JavaEstimator[RandomForestRegressionModel], HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed, RandomForestParams, TreeRegressorParams, HasCheckpointInterval, JavaMLWritable, JavaMLReadable[RandomForestRegressor]): +class RandomForestRegressor(JavaPredictor[RandomForestRegressionModel], HasSeed, RandomForestParams, TreeRegressorParams, HasCheckpointInterval, JavaMLWritable, JavaMLReadable[RandomForestRegressor]): def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., subsamplingRate: float = ..., seed: Optional[int] = ..., numTrees: int = ..., featureSubsetStrategy: str = ...) -> None: ... def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., subsamplingRate: float = ..., seed: Optional[int] = ..., numTrees: int = ..., featureSubsetStrategy: str = ...) -> RandomForestRegressor: ... def setFeatureSubsetStrategy(self, value: str) -> RandomForestRegressor: ... -class RandomForestRegressionModel(TreeEnsembleModel, JavaPredictionModel, JavaMLWritable, JavaMLReadable[RandomForestRegressionModel]): +class RandomForestRegressionModel(TreeEnsembleModel, JavaPredictionModel[Vector], JavaMLWritable, JavaMLReadable[RandomForestRegressionModel]): @property def trees(self) -> Sequence[DecisionTreeRegressionModel]: ... @property def featureImportances(self) -> Vector: ... -class GBTRegressor(JavaEstimator[GBTRegressionModel], HasFeaturesCol, HasLabelCol, HasPredictionCol, GBTRegressorParams, HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable[GBTRegressor]): +class GBTRegressor(JavaPredictor[GBTRegressionModel], GBTRegressorParams, HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable[GBTRegressor]): def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., subsamplingRate: float = ..., checkpointInterval: int = ..., lossType: str = ..., maxIter: int = ..., stepSize: float = ..., seed: Optional[int] = ..., impurity: str = ..., featureSubsetStrategy: str = ..., validationTol: float = ..., validationIndicatorCol: Optional[str] = ..., leafCol: str = ...) -> None: ... def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., subsamplingRate: float = ..., checkpointInterval: int = ..., lossType: str = ..., maxIter: int = ..., stepSize: float = ..., seed: Optional[int] = ..., impuriy: str = ..., featureSubsetStrategy: str = ..., validationTol: float = ..., validationIndicatorCol: Optional[str] = ..., leafCol: str = ...) -> GBTRegressor: ... def setMaxDepth(self, value: int) -> GBTRegressor: ... @@ -196,14 +196,14 @@ class GBTRegressor(JavaEstimator[GBTRegressionModel], HasFeaturesCol, HasLabelCo def setFeatureSubsetStrategy(self, value: str) -> GBTRegressor: ... def setValidationIndicatorCol(self, value: str) -> GBTRegressor: ... -class GBTRegressionModel(TreeEnsembleModel, JavaPredictionModel, JavaMLWritable, JavaMLReadable[GBTRegressionModel]): +class GBTRegressionModel(TreeEnsembleModel, JavaPredictionModel[Vector], JavaMLWritable, JavaMLReadable[GBTRegressionModel]): @property def featureImportances(self) -> Vector: ... @property def trees(self) -> Sequence[DecisionTreeRegressionModel]: ... def evaluateEachIteration(self, dataset: DataFrame, loss: str) -> List[float]: ... -class AFTSurvivalRegression(JavaEstimator[AFTSurvivalRegressionModel], HasFeaturesCol, HasLabelCol, HasPredictionCol, HasFitIntercept, HasMaxIter, HasTol, HasAggregationDepth, JavaMLWritable, JavaMLReadable[AFTSurvivalRegression]): +class AFTSurvivalRegression(JavaPredictor[AFTSurvivalRegressionModel], HasFitIntercept, HasMaxIter, HasTol, HasAggregationDepth, JavaMLWritable, JavaMLReadable[AFTSurvivalRegression]): censorCol: Param[str] quantileProbabilities: Param[List[float]] quantilesCol: Param[str] @@ -216,7 +216,7 @@ class AFTSurvivalRegression(JavaEstimator[AFTSurvivalRegressionModel], HasFeatur def setQuantilesCol(self, value: str) -> AFTSurvivalRegression: ... def getQuantilesCol(self) -> str: ... -class AFTSurvivalRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable[AFTSurvivalRegressionModel]): +class AFTSurvivalRegressionModel(JavaPredictionModel, JavaMLWritable, JavaMLReadable[AFTSurvivalRegressionModel]): @property def coefficients(self) -> Vector: ... @property @@ -226,7 +226,7 @@ class AFTSurvivalRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable[AFTSu def predictQuantiles(self, features: Vector) -> Vector: ... def predict(self, features: Vector) -> float: ... -class GeneralizedLinearRegression(JavaEstimator[GeneralizedLinearRegressionModel], HasLabelCol, HasFeaturesCol, HasPredictionCol, HasFitIntercept, HasMaxIter, HasTol, HasRegParam, HasWeightCol, HasSolver, JavaMLWritable, JavaMLReadable[GeneralizedLinearRegression]): +class GeneralizedLinearRegression(JavaPredictor[GeneralizedLinearRegressionModel], HasFitIntercept, HasMaxIter, HasTol, HasRegParam, HasWeightCol, HasSolver, JavaMLWritable, JavaMLReadable[GeneralizedLinearRegression]): family: Param[str] link: Param[str] linkPredictionCol: Param[str] @@ -249,7 +249,7 @@ class GeneralizedLinearRegression(JavaEstimator[GeneralizedLinearRegressionModel def setOffsetCol(self, value: str) -> GeneralizedLinearRegression: ... def getOffsetCol(self) -> str: ... -class GeneralizedLinearRegressionModel(JavaModel, JavaPredictionModel, JavaMLWritable, JavaMLReadable[GeneralizedLinearRegressionModel], HasTrainingSummary): +class GeneralizedLinearRegressionModel(JavaPredictionModel[Vector], JavaMLWritable, JavaMLReadable[GeneralizedLinearRegressionModel], HasTrainingSummary[GeneralizedLinearRegressionTrainingSummary]): @property def coefficients(self) -> Vector: ... @property diff --git a/third_party/3/pyspark/ml/util.pyi b/third_party/3/pyspark/ml/util.pyi index deeb4b51..0b9be63c 100644 --- a/third_party/3/pyspark/ml/util.pyi +++ b/third_party/3/pyspark/ml/util.pyi @@ -77,10 +77,6 @@ class JavaMLReadable(MLReadable[R]): @classmethod def read(cls) -> JavaMLReader[R]: ... -class JavaPredictionModel: - @property - def numFeatures(self) -> int: ... - class DefaultParamsWritable(MLWritable): def write(self) -> MLWriter: ... diff --git a/third_party/3/pyspark/ml/wrapper.pyi b/third_party/3/pyspark/ml/wrapper.pyi index 30d539a2..4b416a48 100644 --- a/third_party/3/pyspark/ml/wrapper.pyi +++ b/third_party/3/pyspark/ml/wrapper.pyi @@ -1,12 +1,12 @@ # Stubs for pyspark.ml.wrapper (Python 3) import abc -from typing import Any, Optional, Type, TypeVar +from typing import Any, Generic, Optional, Type, TypeVar +from pyspark.ml._typing import P, T, JM from pyspark.ml import Estimator, Model, Transformer from pyspark.ml.param import Params - -JM = TypeVar("JM", bound=JavaTransformer) +from pyspark.ml.param.shared import HasFeaturesCol, HasLabelCol, HasPredictionCol xrange = range @@ -27,3 +27,17 @@ class JavaTransformer(JavaParams, Transformer): class JavaModel(JavaTransformer, Model): __metaclass__: Type[abc.ABCMeta] = ... def __init__(self, java_model: Optional[Any] = ...) -> None: ... + +class JavaPredictorParams(HasLabelCol, HasFeaturesCol, HasPredictionCol): ... + +class JavaPredictor(JavaEstimator[JM], JavaPredictorParams, metaclass=abc.ABCMeta): + def setLabelCol(self: P, value: str) -> P: ... + def setFeaturesCol(self: P, value: str) -> P: ... + def setPredictionCol(self: P, value: str) -> P: ... + +class JavaPredictionModel(Generic[T], JavaModel, JavaPredictorParams): + def setFeaturesCol(self: P, value: str) -> P: ... + def setPredictionCol(self: P, value: str) -> P: ... + @property + def numFeatures(self) -> int: ... + def predict(self, value: T) -> float: ... From 210417c28f0beb78de72ff899909cf7a2e8302eb Mon Sep 17 00:00:00 2001 From: zero323 Date: Sat, 5 Oct 2019 03:12:46 +0200 Subject: [PATCH 2/3] Add _IsotonicRegressionBase --- third_party/3/pyspark/ml/regression.pyi | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/third_party/3/pyspark/ml/regression.pyi b/third_party/3/pyspark/ml/regression.pyi index 6e8191d9..3531267b 100644 --- a/third_party/3/pyspark/ml/regression.pyi +++ b/third_party/3/pyspark/ml/regression.pyi @@ -6,7 +6,7 @@ from pyspark.ml._typing import P, T from pyspark.ml.param.shared import * from pyspark.ml.linalg import Vector from pyspark.ml.util import * -from pyspark.ml.wrapper import JavaModel, JavaPredictionModel, JavaPredictor, JavaWrapper +from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaPredictionModel, JavaPredictor, JavaWrapper from pyspark.sql.dataframe import DataFrame class LinearRegression(JavaPredictor[LinearRegressionModel], HasMaxIter, HasRegParam, HasTol, HasElasticNetParam, HasFitIntercept, HasStandardization, HasSolver, HasWeightCol, HasAggregationDepth, HasLoss, JavaMLWritable, JavaMLReadable[LinearRegression]): @@ -64,17 +64,19 @@ class LinearRegressionTrainingSummary(LinearRegressionSummary): @property def totalIterations(self) -> int: ... -class IsotonicRegression(JavaPredictor[IsotonicRegressionModel], HasWeightCol, JavaMLWritable, JavaMLReadable[IsotonicRegression]): +class _IsotonicRegressionBase(HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol): isotonic: Param[bool] featureIndex: Param[int] + def getIsotonic(self) -> bool: ... + def getFeatureIndex(self) -> int: ... + +class IsotonicRegression(JavaEstimator[IsotonicRegressionModel], _IsotonicRegressionBase, HasWeightCol, JavaMLWritable, JavaMLReadable[IsotonicRegression]): def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., weightCol: Optional[str] = ..., isotonic: bool = ..., featureIndex: int = ...) -> None: ... def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., weightCol: Optional[str] = ..., isotonic: bool = ..., featureIndex: int = ...) -> IsotonicRegression: ... def setIsotonic(self, value: bool) -> IsotonicRegression: ... - def getIsotonic(self) -> bool: ... def setFeatureIndex(self, value: int) -> IsotonicRegression: ... - def getFeatureIndex(self) -> int: ... -class IsotonicRegressionModel(JavaPredictionModel, JavaMLWritable, JavaMLReadable[IsotonicRegressionModel]): +class IsotonicRegressionModel(JavaModel, _IsotonicRegressionBase, JavaMLWritable, JavaMLReadable[IsotonicRegressionModel]): @property def boundaries(self) -> Vector: ... @property From 99c5b011d9de26cd368037f695567f6e7e118ee8 Mon Sep 17 00:00:00 2001 From: zero323 Date: Sat, 5 Oct 2019 03:20:35 +0200 Subject: [PATCH 3/3] Add _AFTSurvivalRegressionParams --- third_party/3/pyspark/ml/regression.pyi | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/third_party/3/pyspark/ml/regression.pyi b/third_party/3/pyspark/ml/regression.pyi index 3531267b..b68f6135 100644 --- a/third_party/3/pyspark/ml/regression.pyi +++ b/third_party/3/pyspark/ml/regression.pyi @@ -205,20 +205,22 @@ class GBTRegressionModel(TreeEnsembleModel, JavaPredictionModel[Vector], JavaMLW def trees(self) -> Sequence[DecisionTreeRegressionModel]: ... def evaluateEachIteration(self, dataset: DataFrame, loss: str) -> List[float]: ... -class AFTSurvivalRegression(JavaPredictor[AFTSurvivalRegressionModel], HasFitIntercept, HasMaxIter, HasTol, HasAggregationDepth, JavaMLWritable, JavaMLReadable[AFTSurvivalRegression]): +class _AFTSurvivalRegressionParams(HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter, HasTol, HasFitIntercept, HasAggregationDepth): censorCol: Param[str] quantileProbabilities: Param[List[float]] quantilesCol: Param[str] + def getCensorCol(self) -> str: ... + def getQuantileProbabilities(self) -> List[float]: ... + def getQuantilesCol(self) -> str: ... + +class AFTSurvivalRegression(JavaEstimator[AFTSurvivalRegressionModel], _AFTSurvivalRegressionParams, JavaMLWritable, JavaMLReadable[AFTSurvivalRegression]): def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., fitIntercept: bool = ..., maxIter: int = ..., tol: float = ..., censorCol: str = ..., quantileProbabilities: List[float] = ..., quantilesCol: Optional[str] = ..., aggregationDepth: int = ...) -> None: ... def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., fitIntercept: bool = ..., maxIter: int = ..., tol: float = ..., censorCol: str = ..., quantileProbabilities: List[float] = ..., quantilesCol: Optional[str] = ..., aggregationDepth: int = ...) -> AFTSurvivalRegression: ... def setCensorCol(self, value: str) -> AFTSurvivalRegression: ... - def getCensorCol(self) -> str: ... def setQuantileProbabilities(self, value: List[float]) -> AFTSurvivalRegression: ... - def getQuantileProbabilities(self) -> List[float]: ... def setQuantilesCol(self, value: str) -> AFTSurvivalRegression: ... - def getQuantilesCol(self) -> str: ... -class AFTSurvivalRegressionModel(JavaPredictionModel, JavaMLWritable, JavaMLReadable[AFTSurvivalRegressionModel]): +class AFTSurvivalRegressionModel(JavaModel, _AFTSurvivalRegressionParams, JavaMLWritable, JavaMLReadable[AFTSurvivalRegressionModel]): @property def coefficients(self) -> Vector: ... @property