Skip to content

Commit

Permalink
Folder: type, vector
Browse files Browse the repository at this point in the history
relative pr:

Support more data types for read filter facebookincubator#139
Fix cast double to decimal facebookincubator#179
Fix casting from string to decimal facebookincubator#281
Support cast decimal to int facebookincubator#177
Fix null on overflow and multiply as spark precision and support cast varchar to decimal facebookincubator#169
Disable tokenizing the path by dot facebookincubator#109
Serialize and deserialize RowVector facebookincubator#250
Support datetime pattern in spark facebookincubator#94
  • Loading branch information
zhejiangxiaomai committed Jul 4, 2023
1 parent 6ae5844 commit e672fb1
Show file tree
Hide file tree
Showing 21 changed files with 1,447 additions and 59 deletions.
144 changes: 131 additions & 13 deletions velox/type/Conversions.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,32 @@
#include <string>
#include <type_traits>
#include "velox/common/base/Exceptions.h"
#include "velox/type/DecimalUtil.h"
#include "velox/type/TimestampConversion.h"
#include "velox/type/Type.h"

namespace facebook::velox::util {

template <TypeKind KIND, typename = void, bool TRUNCATE = false>
template <
TypeKind KIND,
typename = void,
bool TRUNCATE = false,
bool ALLOW_DECIMAL = false>
struct Converter {
template <typename T>
static typename TypeTraits<KIND>::NativeType cast(T) {
VELOX_UNSUPPORTED(
"Conversion to {} is not supported", TypeTraits<KIND>::name);
}

template <typename T>
static typename TypeTraits<KIND>::NativeType
cast(T val, bool& nullOutput, const TypePtr& toType) {
VELOX_UNSUPPORTED(
"Conversion of {} to {} is not supported",
CppToType<T>::name,
TypeTraits<KIND>::name);
}
};

template <>
Expand Down Expand Up @@ -65,15 +79,16 @@ struct Converter<TypeKind::BOOLEAN> {
}
};

template <TypeKind KIND, bool TRUNCATE>
template <TypeKind KIND, bool TRUNCATE, bool ALLOW_DECIMAL>
struct Converter<
KIND,
std::enable_if_t<
KIND == TypeKind::BOOLEAN || KIND == TypeKind::TINYINT ||
KIND == TypeKind::SMALLINT || KIND == TypeKind::INTEGER ||
KIND == TypeKind::BIGINT || KIND == TypeKind::HUGEINT,
void>,
TRUNCATE> {
TRUNCATE,
ALLOW_DECIMAL> {
using T = typename TypeTraits<KIND>::NativeType;

template <typename From>
Expand All @@ -82,7 +97,41 @@ struct Converter<
"Conversion to {} is not supported", TypeTraits<KIND>::name);
}

static T convertStringToInt(const folly::StringPiece v) {
static T cast(const From& v, const TypePtr& toType) {
VELOX_NYI();
}

// from long decimal cast to some type
static T cast(const int128_t& d, const TypePtr& fromType) {
const auto& decimalType = fromType->asLongDecimal();
auto scale0Decimal = DecimalUtil::rescaleWithRoundUp<int128_t, int128_t>(
d,
decimalType.precision(),
decimalType.scale(),
decimalType.precision(),
0,
false,
false);
return cast(scale0Decimal.value());
}

// from short decimal cast to some type
static T cast(const int64_t& d, const TypePtr& fromType) {
const auto& decimalType = fromType->asShortDecimal();
auto scale0Decimal = DecimalUtil::rescaleWithRoundUp<int64_t, int64_t>(
d,
decimalType.precision(),
decimalType.scale(),
decimalType.precision(),
0,
false,
false);
return cast(scale0Decimal.value(), );
}

static T convertStringToInt(
const folly::StringPiece& v,
const bool allowDecimal) {
// Handling boolean target case fist because it is in this scope
if constexpr (std::is_same_v<T, bool>) {
return folly::to<T>(v);
Expand All @@ -106,6 +155,10 @@ struct Converter<
}
if (negative) {
for (; index < len; index++) {
// Allow decimal and ignore the fractional part.
if (v[index] == '.' && allowDecimal) {
break;
}
if (!std::isdigit(v[index])) {
VELOX_USER_FAIL("Encountered a non-digit character");
}
Expand All @@ -117,6 +170,9 @@ struct Converter<
}
} else {
for (; index < len; index++) {
if (v[index] == '.' && allowDecimal) {
break;
}
if (!std::isdigit(v[index])) {
VELOX_USER_FAIL("Encountered a non-digit character");
}
Expand All @@ -135,7 +191,7 @@ struct Converter<
static T cast(folly::StringPiece v) {
try {
if constexpr (TRUNCATE) {
return convertStringToInt(v);
return convertStringToInt(v, ALLOW_DECIMAL);
} else {
return folly::to<T>(v);
}
Expand All @@ -147,7 +203,7 @@ struct Converter<
static T cast(const StringView& v) {
try {
if constexpr (TRUNCATE) {
return convertStringToInt(folly::StringPiece(v));
return convertStringToInt(folly::StringPiece(v), ALLOW_DECIMAL);
} else {
return folly::to<T>(folly::StringPiece(v));
}
Expand All @@ -159,7 +215,7 @@ struct Converter<
static T cast(const std::string& v) {
try {
if constexpr (TRUNCATE) {
return convertStringToInt(v);
return convertStringToInt(v, ALLOW_DECIMAL);
} else {
return folly::to<T>(v);
}
Expand Down Expand Up @@ -223,6 +279,11 @@ struct Converter<
} else if (v < LimitType::minLimit()) {
return LimitType::min();
}
// bool type's min is 0, but spark expects true for casting negative float
// data.
if (!std::is_same_v<T, bool> && v < LimitType::minLimit()) {
return LimitType::min();
}
return LimitType::cast(v);
} else {
if (std::isnan(v)) {
Expand All @@ -247,6 +308,11 @@ struct Converter<
} else if (v < LimitType::minLimit()) {
return LimitType::min();
}
// bool type's min is 0, but spark expects true for casting negative float
// data.
if (!std::is_same_v<T, bool> && v < LimitType::minLimit()) {
return LimitType::min();
}
return LimitType::cast(v);
} else {
if (std::isnan(v)) {
Expand Down Expand Up @@ -287,13 +353,22 @@ struct Converter<
return folly::to<T>(v);
}
}

static T cast(const int128_t& v, bool& nullOutput) {
if constexpr (TRUNCATE) {
return T(v);
} else {
return static_cast<T>(v);
}
}
};

template <TypeKind KIND, bool TRUNCATE>
template <TypeKind KIND, bool TRUNCATE, bool ALLOW_DECIMAL>
struct Converter<
KIND,
std::enable_if_t<KIND == TypeKind::REAL || KIND == TypeKind::DOUBLE, void>,
TRUNCATE> {
TRUNCATE,
ALLOW_DECIMAL> {
using T = typename TypeTraits<KIND>::NativeType;

template <typename From>
Expand All @@ -305,6 +380,20 @@ struct Converter<
}
}

static T cast(const From& v, const TypePtr& toType) {
VELOX_NYI();
}

static T cast(const int64_t& v, const TypePtr& fromType) {
auto decimalType = fromType->asShortDecimal();
return DecimalUtil::toDoubleValue(v, decimalType.scale());
}

static T cast(const int128_t& v, const TypePtr& fromType) {
auto decimalType = fromType->asLongDecimal();
return DecimalUtil::toDoubleValue(v, decimalType.scale());
}

static T cast(folly::StringPiece v) {
return cast<folly::StringPiece>(v);
}
Expand Down Expand Up @@ -361,6 +450,11 @@ struct Converter<
VELOX_UNSUPPORTED(
"Conversion of Timestamp to Real or Double is not supported");
}

static T cast(const int128_t& d, bool& nullOutput) {
VELOX_UNSUPPORTED(
"Conversion of int128_t to Real or Double is not supported");
}
};

template <bool TRUNCATE>
Expand All @@ -372,8 +466,21 @@ struct Converter<TypeKind::VARBINARY, void, TRUNCATE> {
}
};

template <bool TRUNCATE>
struct Converter<TypeKind::VARCHAR, void, TRUNCATE> {
template <bool TRUNCATE, bool ALLOW_DECIMAL>
struct Converter<TypeKind::VARCHAR, void, TRUNCATE, ALLOW_DECIMAL> {
template <typename T>
static std::string cast(const T& v, const TypePtr& fromType) {
VELOX_NYI();
}

static std::string cast(const int64_t& v, const TypePtr& fromType) {
return DecimalUtil::toString(v, fromType);
}

static std::string cast(const int128_t& v, const TypePtr& fromType) {
return DecimalUtil::toString(v, fromType);
}

template <typename T>
static std::string cast(const T& val) {
if constexpr (
Expand Down Expand Up @@ -402,6 +509,11 @@ template <>
struct Converter<TypeKind::TIMESTAMP> {
using T = typename TypeTraits<TypeKind::TIMESTAMP>::NativeType;

template <typename From>
static T cast(const From& v, const TypePtr& toType) {
VELOX_NYI();
}

template <typename From>
static T cast(const From& /* v */) {
VELOX_UNSUPPORTED("Conversion to Timestamp is not supported");
Expand All @@ -427,9 +539,15 @@ struct Converter<TypeKind::TIMESTAMP> {
};

// Allow conversions from string to DATE type.
template <bool TRUNCATE>
struct Converter<TypeKind::DATE, void, TRUNCATE> {
template <bool TRUNCATE, bool ALLOW_DECIMAL>
struct Converter<TypeKind::DATE, void, TRUNCATE, ALLOW_DECIMAL> {
using T = typename TypeTraits<TypeKind::DATE>::NativeType;

template <typename From>
static T cast(const From& v, bool& nullOutput, const TypePtr& toType) {
VELOX_NYI();
}

template <typename From>
static T cast(const From& /* v */) {
VELOX_UNSUPPORTED("Conversion to Date is not supported");
Expand Down
Loading

0 comments on commit e672fb1

Please sign in to comment.