Skip to content
This repository has been archived by the owner on Nov 22, 2022. It is now read-only.

SPARK-29093, part 1 #298

Merged
merged 15 commits into from
Jan 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions third_party/3/pyspark/ml/base.pyi
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# Stubs for pyspark.ml.base (Python 3.5)

import abc
from typing import overload
from typing import Any, Callable, Dict, Generic, Iterable, List, Optional, Tuple, Type, TypeVar
from pyspark.ml._typing import M

import _thread

Expand Down Expand Up @@ -43,3 +42,5 @@ class UnaryTransformer(HasInputCol, HasOutputCol, Transformer):
def outputDataType(self) -> DataType: ...
def validateInputType(self, inputType: DataType) -> None: ...
def transformSchema(self, schema: StructType) -> StructType: ...
def setInputCol(self: M, value: str) -> M: ...
def setOutputCol(self: M, value: str) -> M: ...
43 changes: 41 additions & 2 deletions third_party/3/pyspark/ml/classification.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,17 @@ class _LinearSVCParams(_JavaClassifierParams, HasRegParam, HasMaxIter, HasFitInt
class LinearSVC(JavaClassifier[LinearSVCModel], _LinearSVCParams, JavaMLWritable, JavaMLReadable[LinearSVC]):
def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxIter: int = ..., regParam: float = ..., tol: float = ..., rawPredictionCol: str = ..., fitIntercept: bool = ..., standardization: bool = ..., threshold: float = ..., weightCol: Optional[str] = ..., aggregationDepth: int = ...) -> None: ...
def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxIter: int = ..., regParam: float = ..., tol: float = ..., rawPredictionCol: str = ..., fitIntercept: bool = ..., standardization: bool = ..., threshold: float = ..., weightCol: Optional[str] = ..., aggregationDepth: int = ...) -> LinearSVC: ...
def setMaxIter(self, value: int) -> LinearSVC: ...
def setRegParam(self, value: float) -> LinearSVC: ...
def setTol(self, value: float) -> LinearSVC: ...
def setFitIntercept(self, value: bool) -> LinearSVC: ...
def setStandardization(self, value: bool) -> LinearSVC: ...
def setThreshold(self, value: float) -> LinearSVC: ...
def setWeightCol(self, value: str) -> LinearSVC: ...
def setAggregationDepth(self, value: int) -> LinearSVC: ...

class LinearSVCModel(JavaClassificationModel[Vector], _LinearSVCParams, JavaMLWritable, JavaMLReadable[LinearSVCModel]):
def setThreshold(self, value: float) -> LinearSVCModel: ...
@property
def coefficients(self) -> Vector: ...
@property
Expand Down Expand Up @@ -71,6 +80,14 @@ class LogisticRegression(JavaProbabilisticClassifier[LogisticRegressionModel], _
def setUpperBoundsOnCoefficients(self, value: Matrix) -> LogisticRegression: ...
def setLowerBoundsOnIntercepts(self, value: Vector) -> LogisticRegression: ...
def setUpperBoundsOnIntercepts(self, value: Vector) -> LogisticRegression: ...
def setMaxIter(self, value: int) -> LogisticRegression: ...
def setRegParam(self, value: float) -> LogisticRegression: ...
def setTol(self, value: float) -> LogisticRegression: ...
def setElasticNetParam(self, value: float) -> LogisticRegression: ...
def setFitIntercept(self, value: bool) -> LogisticRegression: ...
def setStandardization(self, value: bool) -> LogisticRegression: ...
def setWeightCol(self, value: str) -> LogisticRegression: ...
def setAggregationDepth(self, value: int) -> LogisticRegression: ...

class LogisticRegressionModel(JavaProbabilisticClassificationModel[Vector], _LogisticRegressionParams, JavaMLWritable, JavaMLReadable[LogisticRegressionModel], HasTrainingSummary[LogisticRegressionTrainingSummary]):
@property
Expand Down Expand Up @@ -156,6 +173,9 @@ class DecisionTreeClassifier(JavaProbabilisticClassifier[DecisionTreeClassificat
def setMaxMemoryInMB(self, value: int) -> DecisionTreeClassifier: ...
def setCacheNodeIds(self, value: bool) -> DecisionTreeClassifier: ...
def setImpurity(self, value: str) -> DecisionTreeClassifier: ...
def setCheckpointInterval(self, value: int) -> DecisionTreeClassifier: ...
def setSeed(self, value: int) -> DecisionTreeClassifier: ...
def setWeightCol(self, value: str) -> DecisionTreeClassifier: ...

class DecisionTreeClassificationModel(_DecisionTreeModel, JavaProbabilisticClassificationModel[Vector], _DecisionTreeClassifierParams, JavaMLWritable, JavaMLReadable[DecisionTreeClassificationModel]):
@property
Expand All @@ -176,6 +196,8 @@ class RandomForestClassifier(JavaProbabilisticClassifier[RandomForestClassificat
def setNumTrees(self, value: int) -> RandomForestClassifier: ...
def setSubsamplingRate(self, value: float) -> RandomForestClassifier: ...
def setFeatureSubsetStrategy(self, value: str) -> RandomForestClassifier: ...
def setSeed(self, value: int) -> RandomForestClassifier: ...
def setCheckpointInterval(self, value: int) -> RandomForestClassifier: ...

class RandomForestClassificationModel(_TreeEnsembleModel, JavaProbabilisticClassificationModel[Vector], _RandomForestClassifierParams, JavaMLWritable, JavaMLReadable[RandomForestClassificationModel]):
@property
Expand All @@ -202,6 +224,10 @@ class GBTClassifier(JavaProbabilisticClassifier[GBTClassificationModel], _GBTCla
def setSubsamplingRate(self, value: float) -> GBTClassifier: ...
def setFeatureSubsetStrategy(self, value: str) -> GBTClassifier: ...
def setValidationIndicatorCol(self, value: str) -> GBTClassifier: ...
def setMaxIter(self, value: int) -> GBTClassifier: ...
def setCheckpointInterval(self, value: int) -> GBTClassifier: ...
def setSeed(self, value: int) -> GBTClassifier: ...
def setStepSize(self, value: float) -> GBTClassifier: ...

class GBTClassificationModel(_TreeEnsembleModel, JavaProbabilisticClassificationModel[Vector], _GBTClassifierParams, JavaMLWritable, JavaMLReadable[GBTClassificationModel]):
@property
Expand All @@ -221,6 +247,7 @@ class NaiveBayes(JavaProbabilisticClassifier[NaiveBayesModel], _NaiveBayesParams
def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., probabilityCol: str = ..., rawPredictionCol: str = ..., smoothing: float = ..., modelType: str = ..., thresholds: Optional[List[float]] = ..., weightCol: Optional[str] = ...) -> NaiveBayes: ...
def setSmoothing(self, value: float) -> NaiveBayes: ...
def setModelType(self, value: str) -> NaiveBayes: ...
def setWeightCol(self, value: str) -> NaiveBayes: ...

class NaiveBayesModel(JavaProbabilisticClassificationModel[Vector], _NaiveBayesParams, JavaMLWritable, JavaMLReadable[NaiveBayesModel]):
@property
Expand All @@ -243,8 +270,12 @@ class MultilayerPerceptronClassifier(JavaProbabilisticClassifier[MultilayerPerce
def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxIter: int = ..., tol: float = ..., seed: Optional[int] = ..., layers: Optional[List[int]] = ..., blockSize: int = ..., stepSize: float = ..., solver: str = ..., initialWeights: Optional[Vector] = ..., probabilityCol: str = ..., rawPredictionCol: str = ...) -> MultilayerPerceptronClassifier: ...
def setLayers(self, value: List[int]) -> MultilayerPerceptronClassifier: ...
def setBlockSize(self, value: int) -> MultilayerPerceptronClassifier: ...
def setStepSize(self, value: float) -> MultilayerPerceptronClassifier: ...
def setInitialWeights(self, value: Vector) -> MultilayerPerceptronClassifier: ...
def setMaxIter(self, value: int) -> MultilayerPerceptronClassifier: ...
def setSeed(self, value: int) -> MultilayerPerceptronClassifier: ...
def setTol(self, value: float) -> MultilayerPerceptronClassifier: ...
def setStepSize(self, value: float) -> MultilayerPerceptronClassifier: ...
def setSolver(self, value: str) -> MultilayerPerceptronClassifier: ...

class MultilayerPerceptronClassificationModel(JavaProbabilisticClassificationModel[Vector], JavaMLWritable, JavaMLReadable[MultilayerPerceptronClassificationModel]):
@property
Expand All @@ -260,10 +291,18 @@ class OneVsRest(Estimator[OneVsRestModel], _OneVsRestParams, HasParallelism, Jav
def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., rawPredictionCol: str = ..., classifier: Optional[Estimator[M]] = ..., weightCol: Optional[str] = ..., parallelism: int = ...) -> None: ...
def setParams(self, *, featuresCol: Optional[str] = ..., labelCol: Optional[str] = ..., predictionCol: Optional[str] = ..., rawPredictionCol: str = ..., classifier: Optional[Estimator[M]] = ..., weightCol: Optional[str] = ..., parallelism: int = ...) -> OneVsRest: ...
def setClassifier(self, value: Estimator[M]) -> OneVsRest: ...
def setLabelCol(self, value: str) -> OneVsRest: ...
def setFeaturesCol(self, value: str) -> OneVsRest: ...
def setPredictionCol(self, value: str) -> OneVsRest: ...
def setRawPredictionCol(self, value: str) -> OneVsRest: ...
def setWeightCol(self, value: str) -> OneVsRest: ...
def setParallelism(self, value: int) -> OneVsRest: ...
def copy(self, extra: Optional[ParamMap] = ...) -> OneVsRest: ...

class OneVsRestModel(Model, _OneVsRestParams, JavaMLReadable[OneVsRestModel], JavaMLWritable):
models: List[Transformer]
def __init__(self, models: List[Transformer]) -> None: ...
def setClassifier(self, value: Estimator[M]) -> OneVsRest: ...
def setFeaturesCol(self, value: str) -> OneVsRestModel: ...
def setPredictionCol(self, value: str) -> OneVsRestModel: ...
def setRawPredictionCol(self, value: str) -> OneVsRestModel: ...
def copy(self, extra: Optional[ParamMap] = ...) -> OneVsRestModel: ...
35 changes: 35 additions & 0 deletions third_party/3/pyspark/ml/clustering.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ class _GaussianMixtureParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionC
def getK(self) -> int: ...

class GaussianMixtureModel(JavaModel, _GaussianMixtureParams, JavaMLWritable, JavaMLReadable[GaussianMixtureModel], HasTrainingSummary[GaussianMixtureSummary]):
def setFeaturesCol(self, value: str) -> GaussianMixtureModel: ...
def setPredictionCol(self, value: str) -> GaussianMixtureModel: ...
def setProbabilityCol(self, value: str) -> GaussianMixtureModel: ...
@property
def weights(self) -> List[float]: ...
@property
Expand All @@ -45,6 +48,14 @@ class GaussianMixture(JavaEstimator[GaussianMixtureModel], _GaussianMixtureParam
def __init__(self, *, featuresCol: str = ..., predictionCol: str = ..., k: int = ..., probabilityCol: str = ..., tol: float = ..., maxIter: int = ..., seed: Optional[int] = ...) -> None: ...
def setParams(self, *, featuresCol: str = ..., predictionCol: str = ..., k: int = ..., probabilityCol: str = ..., tol: float = ..., maxIter: int = ..., seed: Optional[int] = ...) -> GaussianMixture: ...
def setK(self, value: int) -> GaussianMixture: ...
def setMaxIter(self, value: int) -> GaussianMixture: ...
def setFeaturesCol(self, value: str) -> GaussianMixture: ...
def setPredictionCol(self, value: str) -> GaussianMixture: ...
def setProbabilityCol(self, value: str) -> GaussianMixture: ...
def setWeightCol(self, value: str) -> GaussianMixture: ...
def setSeed(self, value: int) -> GaussianMixture: ...
def setTol(self, value: float) -> GaussianMixture: ...
def setAggregationDepth(self, value: int) -> GaussianMixture: ...

class GaussianMixtureSummary(ClusteringSummary):
@property
Expand All @@ -66,6 +77,8 @@ class _KMeansParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol, HasTo
def getInitSteps(self) -> int: ...

class KMeansModel(JavaModel, _KMeansParams, GeneralJavaMLWritable, JavaMLReadable[KMeansModel], HasTrainingSummary[KMeansSummary]):
def setFeaturesCol(self, value: str) -> KMeansModel: ...
def setPredictionCol(self, value: str) -> KMeansModel: ...
def clusterCenters(self) -> List[ndarray]: ...
@property
def summary(self) -> KMeansSummary: ...
Expand All @@ -78,6 +91,12 @@ class KMeans(JavaEstimator[KMeansModel], _KMeansParams, JavaMLWritable, JavaMLRe
def setInitMode(self, value: str) -> KMeans: ...
def setInitSteps(self, value: int) -> KMeans: ...
def setDistanceMeasure(self, value: str) -> KMeans: ...
def setMaxIter(self, value: int) -> KMeans: ...
def setFeaturesCol(self, value: str) -> KMeans: ...
def setPredictionCol(self, value: str) -> KMeans: ...
def setSeed(self, value: int) -> KMeans: ...
def setTol(self, value: float) -> KMeans: ...
def setWeightCol(self, value: str) -> KMeans: ...

class _BisectingKMeansParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol, HasDistanceMeasure):
k: Param[int]
Expand All @@ -86,6 +105,8 @@ class _BisectingKMeansParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionC
def getMinDivisibleClusterSize(self) -> float: ...

class BisectingKMeansModel(JavaModel, _BisectingKMeansParams, JavaMLWritable, JavaMLReadable[BisectingKMeansModel], HasTrainingSummary[BisectingKMeansSummary]):
def setFeaturesCol(self, value: str) -> BisectingKMeansModel: ...
def setPredictionCol(self, value: str) -> BisectingKMeansModel: ...
def clusterCenters(self) -> List[ndarray]: ...
def computeCost(self, dataset: DataFrame) -> float: ...
@property
Expand All @@ -98,6 +119,10 @@ class BisectingKMeans(JavaEstimator[BisectingKMeansModel], _BisectingKMeansParam
def setK(self, value: int) -> BisectingKMeans: ...
def setMinDivisibleClusterSize(self, value: float) -> BisectingKMeans: ...
def setDistanceMeasure(self, value: str) -> BisectingKMeans: ...
def setMaxIter(self, value: int) -> BisectingKMeans: ...
def setFeaturesCol(self, value: str) -> BisectingKMeans: ...
def setPredictionCol(self, value: str) -> BisectingKMeans: ...
def setSeed(self, value: int) -> BisectingKMeans: ...

class BisectingKMeansSummary(ClusteringSummary):
@property
Expand Down Expand Up @@ -126,6 +151,9 @@ class _LDAParams(HasMaxIter, HasFeaturesCol, HasSeed, HasCheckpointInterval):
def getKeepLastCheckpoint(self) -> bool: ...

class LDAModel(JavaModel, _LDAParams):
def setFeaturesCol(self, value: str) -> LDAModel: ...
def setSeed(self, value: int) -> LDAModel: ...
def setTopicDistributionCol(self, value: str) -> LDAModel: ...
def isDistributed(self) -> bool: ...
def vocabSize(self) -> int: ...
def topicsMatrix(self) -> Matrix: ...
Expand All @@ -145,6 +173,9 @@ class LocalLDAModel(LDAModel, JavaMLReadable[LocalLDAModel], JavaMLWritable): ..
class LDA(JavaEstimator[LDAModel], _LDAParams, JavaMLReadable[LDA], JavaMLWritable):
def __init__(self, *, featuresCol: str = ..., maxIter: int = ..., seed: Optional[int] = ..., checkpointInterval: int = ..., k: int = ..., optimizer: str = ..., learningOffset: float = ..., learningDecay: float = ..., subsamplingRate: float = ..., optimizeDocConcentration: bool = ..., docConcentration: Optional[List[float]] = ..., topicConcentration: Optional[float] = ..., topicDistributionCol: str = ..., keepLastCheckpoint: bool = ...) -> None: ...
def setParams(self, *, featuresCol: str = ..., maxIter: int = ..., seed: Optional[int] = ..., checkpointInterval: int = ..., k: int = ..., optimizer: str = ..., learningOffset: float = ..., learningDecay: float = ..., subsamplingRate: float = ..., optimizeDocConcentration: bool = ..., docConcentration: Optional[List[float]] = ..., topicConcentration: Optional[float] = ..., topicDistributionCol: str = ..., keepLastCheckpoint: bool = ...) -> LDA: ...
def setCheckpointInterval(self, value: int) -> LDA: ...
def setSeed(self, value: int) -> LDA: ...
def setK(self, value: int) -> LDA: ...
def setOptimizer(self, value: str) -> LDA: ...
def setLearningOffset(self, value: float) -> LDA: ...
def setLearningDecay(self, value: float) -> LDA: ...
Expand All @@ -154,6 +185,8 @@ class LDA(JavaEstimator[LDAModel], _LDAParams, JavaMLReadable[LDA], JavaMLWritab
def setTopicConcentration(self, value: float) -> LDA: ...
def setTopicDistributionCol(self, value: str) -> LDA: ...
def setKeepLastCheckpoint(self, value: bool) -> LDA: ...
def setMaxIter(self, value: int) -> LDA: ...
def setFeaturesCol(self, value: str) -> LDA: ...

class _PowerIterationClusteringParams(HasMaxIter, HasWeightCol):
k: Param[int]
Expand All @@ -172,4 +205,6 @@ class PowerIterationClustering(_PowerIterationClusteringParams, JavaParams, Java
def setInitMode(self, value: str) -> PowerIterationClustering: ...
def setSrcCol(self, value: str) -> str: ...
def setDstCol(self, value: str) -> PowerIterationClustering: ...
def setMaxIter(self, value: int) -> PowerIterationClustering: ...
def setWeightCol(self, value: str) -> PowerIterationClustering: ...
def assignClusters(self, dataset: DataFrame) -> DataFrame: ...
Loading