Skip to content

Commit

Permalink
Fix semantic issues in cast function (#280)
Browse files Browse the repository at this point in the history
  • Loading branch information
PHILO-HE authored May 26, 2023
1 parent 23c0569 commit 97d2829
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 15 deletions.
17 changes: 17 additions & 0 deletions velox/expression/tests/CastExprTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,16 @@ TEST_F(CastExprTest, date) {

setCastIntByTruncate(true);
testCast<std::string, Date>("date", input, result);

// Wrong date format case.
std::vector<std::optional<std::string>> inputWrongFormat{
"1970-01/01", "2023/05/10", "2023-/05-/10", "20150318"};
std::vector<std::optional<Date>> nullResult{
std::nullopt, std::nullopt, std::nullopt, std::nullopt};
testCast<std::string, Date>(
"date", inputWrongFormat, nullResult, false, true);
testCast<std::string, Date>(
"date", inputWrongFormat, nullResult, true, false);
}

TEST_F(CastExprTest, invalidDate) {
Expand Down Expand Up @@ -552,6 +562,13 @@ TEST_F(CastExprTest, allowDecimal) {
"int", {"-.", "0.0", "125.5", "-128.3"}, {0, 0, 125, -128}, false, true);
}

TEST_F(CastExprTest, sparkSemantic) {
// Allow decimal.
setCastIntAllowDecimalAndByTruncate(true);
testCast<float, bool>(
"bool", {0.5, -0.5, 1, 0}, {true, true, true, false}, false, true);
}

constexpr vector_size_t kVectorSize = 1'000;

TEST_F(CastExprTest, mapCast) {
Expand Down
20 changes: 12 additions & 8 deletions velox/functions/lib/tests/DateTimeFormatterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,11 +547,13 @@ TEST_F(JodaDateTimeFormatterTest, parseYear) {
EXPECT_THROW(parseJoda("++100", "y"), VeloxUserError);

// Probe the year range
EXPECT_THROW(parseJoda("-292275056", "y"), VeloxUserError);
EXPECT_THROW(parseJoda("292278995", "y"), VeloxUserError);
EXPECT_EQ(
util::fromTimestampString("292278994-01-01"),
parseJoda("292278994", "y").timestamp);
// Temporarily removed for adapting to spark semantic (not allowed year digits
// larger than 7).
// EXPECT_THROW(parseJoda("-292275056", "y"), VeloxUserError);
// EXPECT_THROW(parseJoda("292278995", "y"), VeloxUserError);
// EXPECT_EQ(
// util::fromTimestampString("292278994-01-01"),
// parseJoda("292278994", "y").timestamp);
}

TEST_F(JodaDateTimeFormatterTest, parseWeekYear) {
Expand Down Expand Up @@ -626,9 +628,11 @@ TEST_F(JodaDateTimeFormatterTest, parseWeekYear) {

TEST_F(JodaDateTimeFormatterTest, parseCenturyOfEra) {
// Probe century range
EXPECT_EQ(
util::fromTimestampString("292278900-01-01 00:00:00"),
parseJoda("2922789", "CCCCCCC").timestamp);
// Temporarily removed for adapting to spark semantic (not allowed year digits
// larger than 7).
// EXPECT_EQ(
// util::fromTimestampString("292278900-01-01 00:00:00"),
// parseJoda("2922789", "CCCCCCC").timestamp);
EXPECT_EQ(
util::fromTimestampString("00-01-01 00:00:00"),
parseJoda("0", "C").timestamp);
Expand Down
29 changes: 24 additions & 5 deletions velox/type/Conversions.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,9 @@ struct Converter<
if (v > LimitType::maxLimit()) {
return LimitType::max();
}
if (v < LimitType::minLimit()) {
// bool type's min is 0, but spark expects true for casting negative float
// data.
if (!std::is_same_v<T, bool> && v < LimitType::minLimit()) {
return LimitType::min();
}
return LimitType::cast(v);
Expand All @@ -321,7 +323,9 @@ struct Converter<
if (v > LimitType::maxLimit()) {
return LimitType::max();
}
if (v < LimitType::minLimit()) {
// bool type's min is 0, but spark expects true for casting negative float
// data.
if (!std::is_same_v<T, bool> && v < LimitType::minLimit()) {
return LimitType::min();
}
return LimitType::cast(v);
Expand Down Expand Up @@ -596,15 +600,30 @@ struct Converter<TypeKind::DATE, void, TRUNCATE, ALLOW_DECIMAL> {
}

static T cast(folly::StringPiece v, bool& nullOutput) {
return fromDateString(v.data(), v.size());
try {
return fromDateString(v.data(), v.size());
} catch (const VeloxUserError& ve) {
nullOutput = true;
return (T)0;
}
}

static T cast(const StringView& v, bool& nullOutput) {
return fromDateString(v.data(), v.size());
try {
return fromDateString(v.data(), v.size());
} catch (const VeloxUserError& ve) {
nullOutput = true;
return (T)0;
}
}

static T cast(const std::string& v, bool& nullOutput) {
return fromDateString(v.data(), v.size());
try {
return fromDateString(v.data(), v.size());
} catch (const VeloxUserError& ve) {
nullOutput = true;
return (T)0;
}
}

static T cast(const Timestamp& t, bool& nullOutput) {
Expand Down
8 changes: 7 additions & 1 deletion velox/type/TimestampConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,17 @@ bool tryParseDateString(
if (!characterIsDigit(buf[pos])) {
return false;
}
int yearSegStart = pos;
// First parse the year.
for (; pos < len && characterIsDigit(buf[pos]); pos++) {
year = checkedPlus((buf[pos] - '0'), checkedMultiply(year, 10));
if (year > kMaxYear) {
break;
}
// Align with spark, year digits should not be greater than 7.
if (pos - yearSegStart + 1 > 7) {
return false;
}
}
if (yearneg) {
year = checkedNegate(year);
Expand All @@ -203,7 +208,8 @@ bool tryParseDateString(

// Fetch the separator.
sep = buf[pos++];
if (sep != ' ' && sep != '-' && sep != '/' && sep != '\\') {
// For spark, "/" separtor is not supported.
if (sep != ' ' && sep != '-' && sep != '\\') {
// Invalid separator.
return false;
}
Expand Down
3 changes: 2 additions & 1 deletion velox/type/tests/TimestampConversionTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ TEST(DateTimeUtilTest, fromDateString) {
EXPECT_EQ(-719162, fromDateString(" \t \n 00001-1-1 \n"));

// Different separators.
EXPECT_EQ(-719162, fromDateString("1/1/1"));
// Illegal date format for spark.
// EXPECT_EQ(-719162, fromDateString("1/1/1"));
EXPECT_EQ(-719162, fromDateString("1 1 1"));
EXPECT_EQ(-719162, fromDateString("1\\1\\1"));

Expand Down

0 comments on commit 97d2829

Please sign in to comment.