Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Standardize code style using Black #1284

Closed
wants to merge 12 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion databricks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
#

# https://packaging.python.org/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages
__path__ = __import__('pkgutil').extend_path(__path__, __name__) # type: ignore
__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore
20 changes: 9 additions & 11 deletions databricks/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@
from databricks.koalas import utils


shared_conf = {
"spark.sql.shuffle.partitions": "4"
}
shared_conf = {"spark.sql.shuffle.partitions": "4"}
# Initialize Spark session that should be used in doctests or unittests.
# Delta requires Spark 2.4.2+. See
# https://github.com/delta-io/delta#compatibility-with-apache-spark-versions.
Expand All @@ -48,7 +46,7 @@
session = utils.default_session(shared_conf)


@pytest.fixture(scope='session', autouse=True)
@pytest.fixture(scope="session", autouse=True)
def session_termination():
yield
# Share one session across all the tests. Repeating starting and stopping sessions and contexts
Expand All @@ -58,46 +56,46 @@ def session_termination():

@pytest.fixture(autouse=True)
def add_ks(doctest_namespace):
doctest_namespace['ks'] = koalas
doctest_namespace["ks"] = koalas


@pytest.fixture(autouse=True)
def add_pd(doctest_namespace):
if os.getenv("PANDAS_VERSION", None) is not None:
assert pd.__version__ == os.getenv("PANDAS_VERSION")
doctest_namespace['pd'] = pd
doctest_namespace["pd"] = pd


@pytest.fixture(autouse=True)
def add_pa(doctest_namespace):
if os.getenv("PYARROW_VERSION", None) is not None:
assert pa.__version__ == os.getenv("PYARROW_VERSION")
doctest_namespace['pa'] = pa
doctest_namespace["pa"] = pa


@pytest.fixture(autouse=True)
def add_np(doctest_namespace):
doctest_namespace['np'] = numpy
doctest_namespace["np"] = numpy


@pytest.fixture(autouse=True)
def add_path(doctest_namespace):
path = tempfile.mkdtemp()
atexit.register(lambda: shutil.rmtree(path, ignore_errors=True))
doctest_namespace['path'] = path
doctest_namespace["path"] = path


@pytest.fixture(autouse=True)
def add_db(doctest_namespace):
db_name = "db%s" % str(uuid.uuid4()).replace("-", "")
session.sql("CREATE DATABASE %s" % db_name)
atexit.register(lambda: session.sql("DROP DATABASE IF EXISTS %s CASCADE" % db_name))
doctest_namespace['db'] = db_name
doctest_namespace["db"] = db_name


@pytest.fixture(autouse=os.getenv("KOALAS_USAGE_LOGGER", None) is not None)
def add_caplog(caplog):
with caplog.at_level(logging.INFO, logger='databricks.koalas.usage_logger'):
with caplog.at_level(logging.INFO, logger="databricks.koalas.usage_logger"):
yield


Expand Down
71 changes: 53 additions & 18 deletions databricks/koalas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,27 +21,33 @@

def assert_pyspark_version():
import logging

pyspark_ver = None
try:
import pyspark
except ImportError:
raise ImportError('Unable to import pyspark - consider doing a pip install with [spark] '
'extra to install pyspark with pip')
raise ImportError(
"Unable to import pyspark - consider doing a pip install with [spark] "
"extra to install pyspark with pip"
)
else:
pyspark_ver = getattr(pyspark, '__version__')
if pyspark_ver is None or pyspark_ver < '2.4':
pyspark_ver = getattr(pyspark, "__version__")
if pyspark_ver is None or pyspark_ver < "2.4":
logging.warning(
'Found pyspark version "{}" installed. pyspark>=2.4.0 is recommended.'
.format(pyspark_ver if pyspark_ver is not None else '<unknown version>'))
'Found pyspark version "{}" installed. pyspark>=2.4.0 is recommended.'.format(
pyspark_ver if pyspark_ver is not None else "<unknown version>"
)
)


assert_pyspark_version()

import pyspark
import pyarrow

if LooseVersion(pyarrow.__version__) >= LooseVersion("0.15") and \
LooseVersion(pyspark.__version__) < LooseVersion("3.0"):
if LooseVersion(pyarrow.__version__) >= LooseVersion("0.15") and LooseVersion(
pyspark.__version__
) < LooseVersion("3.0"):
# This is required to support PyArrow 0.15 in PySpark versions lower than 3.0.
# See SPARK-29367.
os.environ["ARROW_PRE_0_15_IPC_FORMAT"] = "1"
Expand All @@ -53,10 +59,31 @@ def assert_pyspark_version():
from databricks.koalas.config import get_option, set_option, reset_option, options
from databricks.koalas.groupby import NamedAgg

__all__ = ['read_csv', 'read_parquet', 'to_datetime', 'from_pandas',
'get_dummies', 'DataFrame', 'Series', 'Index', 'MultiIndex', 'pandas_wraps',
'sql', 'range', 'concat', 'melt', 'get_option', 'set_option', 'reset_option',
'read_sql_table', 'read_sql_query', 'read_sql', 'options', 'option_context', 'NamedAgg']
__all__ = [
"read_csv",
"read_parquet",
"to_datetime",
"from_pandas",
"get_dummies",
"DataFrame",
"Series",
"Index",
"MultiIndex",
"pandas_wraps",
"sql",
"range",
"concat",
"melt",
"get_option",
"set_option",
"reset_option",
"read_sql_table",
"read_sql_query",
"read_sql",
"options",
"option_context",
"NamedAgg",
]


def _auto_patch():
Expand All @@ -68,21 +95,29 @@ def _auto_patch():
if logger_module is not None:
try:
from databricks.koalas import usage_logging

usage_logging.attach(logger_module)
except Exception as e:
from pyspark.util import _exception_message
logger = logging.getLogger('databricks.koalas.usage_logger')
logger.warning('Tried to attach usage logger `{}`, but an exception was raised: {}'
.format(logger_module, _exception_message(e)))

logger = logging.getLogger("databricks.koalas.usage_logger")
logger.warning(
"Tried to attach usage logger `{}`, but an exception was raised: {}".format(
logger_module, _exception_message(e)
)
)

# Autopatching is on by default.
x = os.getenv("SPARK_KOALAS_AUTOPATCH", "true")
if x.lower() in ("true", "1", "enabled"):
logger = logging.getLogger('spark')
logger.info("Patching spark automatically. You can disable it by setting "
"SPARK_KOALAS_AUTOPATCH=false in your environment")
logger = logging.getLogger("spark")
logger.info(
"Patching spark automatically. You can disable it by setting "
"SPARK_KOALAS_AUTOPATCH=false in your environment"
)

from pyspark.sql import dataframe as df

df.DataFrame.to_koalas = DataFrame.to_koalas


Expand Down
Loading