-
Notifications
You must be signed in to change notification settings - Fork 121
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add to_string
method to SparkLikeExprDateTimeNamespace
#1842
base: main
Are you sure you want to change the base?
Changes from 2 commits
5c4dd91
6f16584
1f9e9f7
baf84f0
5b96b0c
93a45d5
a476ba6
7e8eee5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -15,6 +15,56 @@ class SparkLikeExprDateTimeNamespace: | |||||
def __init__(self: Self, expr: SparkLikeExpr) -> None: | ||||||
self._compliant_expr = expr | ||||||
|
||||||
def to_string(self: Self, format: str) -> SparkLikeExpr: # noqa: A002 | ||||||
def _format_iso_week_with_day(_input: Column) -> Column: | ||||||
"""Format datetime as ISO week string with day.""" | ||||||
year = F.date_format(_input, "YYYY") | ||||||
week = F.lpad(F.weekofyear(_input).cast("string"), 2, "0") | ||||||
day = F.dayofweek(_input) | ||||||
# Adjust Sunday from 1 to 7 | ||||||
day = F.when(day == 1, 7).otherwise(day - 1) | ||||||
return F.concat(year, F.lit("-W"), week, F.lit("-"), day.cast("string")) | ||||||
|
||||||
def _format_iso_week(_input: Column) -> Column: | ||||||
"""Format datetime as ISO week string.""" | ||||||
year = F.date_format(_input, "YYYY") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
week = F.lpad(F.weekofyear(_input).cast("string"), 2, "0") | ||||||
return F.concat(year, F.lit("-W"), week) | ||||||
|
||||||
def _format_iso_datetime(_input: Column) -> Column: | ||||||
"""Format datetime as ISO datetime with microseconds.""" | ||||||
date_part = F.date_format(_input, "yyyy-MM-dd") | ||||||
time_part = F.date_format(_input, "HH:mm:ss") | ||||||
micros = F.unix_micros(_input) % 1_000_000 | ||||||
micros_str = F.lpad(micros.cast("string"), 6, "0") | ||||||
return F.concat(date_part, F.lit("T"), time_part, F.lit("."), micros_str) | ||||||
|
||||||
def _to_string(_input: Column) -> Column: | ||||||
# Handle special formats | ||||||
if format == "%G-W%V": | ||||||
return _format_iso_week(_input) | ||||||
if format == "%G-W%V-%u": | ||||||
return _format_iso_week_with_day(_input) | ||||||
if format in ("%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S%.f"): | ||||||
return _format_iso_datetime(_input) | ||||||
|
||||||
# Standard format conversions | ||||||
java_fmt = ( | ||||||
format.replace("%Y", "yyyy") | ||||||
.replace("%m", "MM") | ||||||
.replace("%d", "dd") | ||||||
.replace("%H", "HH") | ||||||
.replace("%M", "mm") | ||||||
.replace("%S", "ss") | ||||||
) | ||||||
FBruzzesi marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
return F.date_format(_input, java_fmt) | ||||||
|
||||||
return self._compliant_expr._from_call( | ||||||
_to_string, | ||||||
"to_string", | ||||||
returns_scalar=self._compliant_expr._returns_scalar, | ||||||
) | ||||||
|
||||||
def date(self: Self) -> SparkLikeExpr: | ||||||
return self._compliant_expr._from_call( | ||||||
F.to_date, | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -159,6 +159,7 @@ def pyspark_lazy_constructor() -> Callable[[Any], IntoFrame]: # pragma: no cove | |
.config("spark.sql.shuffle.partitions", "2") | ||
# common timezone for all tests environments | ||
.config("spark.sql.session.timeZone", "UTC") | ||
.config("spark.sql.legacy.timeParserPolicy", "LEGACY") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what does this do? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I faced the date migration issue from spark 2.0 to 3.0 (basically to match the behaviour datetime parsing of spark versions <3). I refered this issue here https://stackoverflow.com/questions/62602720/string-to-date-migration-from-spark-2-0-to-3-0-gives-fail-to-recognize-eee-mmm There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of setting this, I suggest using a valid pattern for Spark 3.0: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html When we remove the config we get the error:
we need to substitute |
||
.getOrCreate() | ||
) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.