Skip to content

Commit

Permalink
Add rank() and dense_rank() window function in spark sql
Browse files Browse the repository at this point in the history
  • Loading branch information
JkSelf committed Aug 28, 2023
1 parent f943531 commit cdd649d
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 15 deletions.
11 changes: 10 additions & 1 deletion velox/docs/functions/spark/window.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,13 @@ Rank functions

.. spark:function:: row_number() -> integer
Returns a unique, sequential number to each row, starting with one, according to the ordering of rows within the window partition.
Returns a unique, sequential number to each row, starting with one, according to the ordering of rows within the window partition.

.. spark:function:: rank() -> integer
Returns the rank of a value in a group of values. The rank is one plus the number of rows preceding the row that are not peer with the row. Thus, the values in the ordering will produce gaps in the sequence. The ranking is performed for each window partition.

.. spark:function:: dense_rank()() -> integer
Returns the rank of a value in a group of values. This is similar to rank(), except that tie values do not produce gaps in the sequence.

2 changes: 1 addition & 1 deletion velox/functions/lib/window/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

add_library(velox_functions_window NthValue.cpp RowNumber.cpp)
add_library(velox_functions_window NthValue.cpp Rank.cpp RowNumber.cpp)

target_link_libraries(velox_functions_window velox_buffer velox_exec
Folly::folly)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include "velox/expression/FunctionSignature.h"
#include "velox/vector/FlatVector.h"

namespace facebook::velox::window::prestosql {
namespace facebook::velox::functions::window {

// Types of rank functions.
enum class RankType {
Expand Down Expand Up @@ -112,14 +112,20 @@ void registerRankInternal(
});
}

void registerRank(const std::string& name) {
void registerRankBigint(const std::string& name) {
registerRankInternal<RankType::kRank, int64_t>(name, "bigint");
}
void registerDenseRank(const std::string& name) {
void registerRankInteger(const std::string& name) {
registerRankInternal<RankType::kRank, int32_t>(name, "integer");
}
void registerDenseRankBigint(const std::string& name) {
registerRankInternal<RankType::kDenseRank, int64_t>(name, "bigint");
}
void registerDenseRankInteger(const std::string& name) {
registerRankInternal<RankType::kDenseRank, int32_t>(name, "integer");
}
void registerPercentRank(const std::string& name) {
registerRankInternal<RankType::kPercentRank, double>(name, "double");
}

} // namespace facebook::velox::window::prestosql
} // namespace facebook::velox::functions::window
20 changes: 20 additions & 0 deletions velox/functions/lib/window/RegistrationFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,24 @@ void registerRowNumberBigint(const std::string& name);
// Register the Spark function row_number() with the integer data type
// for the return value.
void registerRowNumberInteger(const std::string& name);

// Register the Presto function rank() with the bigint data type
// for the return value.
void registerRankBigint(const std::string& name);

// Register the Spark function rank() with the integer data type
// for the return value.
void registerRankInteger(const std::string& name);

// Register the Presto function dense_rank() with the bigint data type
// for the return value.
void registerDenseRankBigint(const std::string& name);

// Register the Spark function dense_rank() with the integer data type
// for the return value.
void registerDenseRankInteger(const std::string& name);

// Register the function percent_rank() both for Presto and Spark.
void registerPercentRank(const std::string& name);

} // namespace facebook::velox::functions::window
2 changes: 1 addition & 1 deletion velox/functions/prestosql/window/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ if(${VELOX_BUILD_TESTING})
endif()

add_library(velox_window CumeDist.cpp FirstLastValue.cpp LeadLag.cpp Ntile.cpp
Rank.cpp WindowFunctionsRegistration.cpp)
WindowFunctionsRegistration.cpp)

target_link_libraries(velox_window velox_buffer velox_exec
velox_functions_window Folly::folly)
10 changes: 3 additions & 7 deletions velox/functions/prestosql/window/WindowFunctionsRegistration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@ namespace facebook::velox::window {

namespace prestosql {

extern void registerRowNumber(const std::string& name);
extern void registerRank(const std::string& name);
extern void registerDenseRank(const std::string& name);
extern void registerPercentRank(const std::string& name);
extern void registerCumeDist(const std::string& name);
extern void registerNtile(const std::string& name);
extern void registerFirstValue(const std::string& name);
Expand All @@ -33,9 +29,9 @@ extern void registerLead(const std::string& name);

void registerAllWindowFunctions(const std::string& prefix) {
functions::window::registerRowNumberBigint(prefix + "row_number");
registerRank(prefix + "rank");
registerDenseRank(prefix + "dense_rank");
registerPercentRank(prefix + "percent_rank");
functions::window::registerRankBigint(prefix + "rank");
functions::window::registerDenseRankBigint(prefix + "dense_rank");
functions::window::registerPercentRank(prefix + "percent_rank");
registerCumeDist(prefix + "cume_dist");
registerNtile(prefix + "ntile");
functions::window::registerNthValueBigint(prefix + "nth_value");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ namespace facebook::velox::functions::window::sparksql {
void registerWindowFunctions(const std::string& prefix) {
functions::window::registerNthValueInteger(prefix + "nth_value");
functions::window::registerRowNumberInteger(prefix + "row_number");
functions::window::registerRankInteger(prefix + "rank");
functions::window::registerDenseRankInteger(prefix + "dense_rank");
}

} // namespace facebook::velox::functions::window::sparksql
4 changes: 3 additions & 1 deletion velox/functions/sparksql/window/tests/SparkWindowTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ namespace {

static const std::vector<std::string> kSparkWindowFunctions = {
std::string("nth_value(c0, 1)"),
std::string("row_number()")};
std::string("row_number()"),
std::string("rank()"),
std::string("dense_rank()")};

struct SparkWindowTestParam {
const std::string function;
Expand Down

0 comments on commit cdd649d

Please sign in to comment.