From a1a631be2f319b128afada1f012a2cc485b709f0 Mon Sep 17 00:00:00 2001 From: "joey.ljy" Date: Wed, 8 Mar 2023 19:51:54 +0800 Subject: [PATCH 1/2] agg enable corr covar_pop and covar_samp --- velox/substrait/SubstraitToVeloxPlanValidator.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/velox/substrait/SubstraitToVeloxPlanValidator.cpp b/velox/substrait/SubstraitToVeloxPlanValidator.cpp index fc57d2e400a3..94f0c2a9e436 100644 --- a/velox/substrait/SubstraitToVeloxPlanValidator.cpp +++ b/velox/substrait/SubstraitToVeloxPlanValidator.cpp @@ -789,7 +789,10 @@ bool SubstraitToVeloxPlanValidator::validate( "var_samp", "var_pop", "bitwise_and_agg", - "bitwise_or_agg"}; + "bitwise_or_agg", + "corr", + "covar_pop", + "covar_samp"}; for (const auto& funcSpec : funcSpecs) { auto funcName = subParser_->getSubFunctionName(funcSpec); if (supportedFuncs.find(funcName) == supportedFuncs.end()) { From 4f319c604c13ff582165ed059ad08ceed16517cf Mon Sep 17 00:00:00 2001 From: "joey.ljy" Date: Wed, 29 Mar 2023 11:49:54 +0800 Subject: [PATCH 2/2] modify the calculation order --- .../functions/prestosql/aggregates/CovarianceAggregates.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/velox/functions/prestosql/aggregates/CovarianceAggregates.cpp b/velox/functions/prestosql/aggregates/CovarianceAggregates.cpp index 00a7c7e8a6a6..f652a4c4b15f 100644 --- a/velox/functions/prestosql/aggregates/CovarianceAggregates.cpp +++ b/velox/functions/prestosql/aggregates/CovarianceAggregates.cpp @@ -229,9 +229,8 @@ struct CorrResultAccessor { } static double result(const CorrAccumulator& accumulator) { - double stddevX = std::sqrt(accumulator.m2X()); - double stddevY = std::sqrt(accumulator.m2Y()); - return accumulator.c2() / stddevX / stddevY; + // Need to modify the calculation order to maintain the same accuracy as spark + return accumulator.c2() / std::sqrt(accumulator.m2X() * accumulator.m2Y()); } };