From 18d30f9e795e3b430d62f5b6d7a670a60727244e Mon Sep 17 00:00:00 2001 From: Ben Plessinger Date: Tue, 7 Feb 2017 09:17:48 -0500 Subject: [PATCH 1/2] cleanup --- classes/DB/Aggregator.php | 200 +++++++++++++++++++------------------- 1 file changed, 99 insertions(+), 101 deletions(-) diff --git a/classes/DB/Aggregator.php b/classes/DB/Aggregator.php index 46e8b84ee0..207fc9c07e 100644 --- a/classes/DB/Aggregator.php +++ b/classes/DB/Aggregator.php @@ -7,111 +7,109 @@ * @author: Amin Ghadersohi 8/1/2013 * */ - + class Aggregator extends Loggable { - static $__initialized; - - /** - * (Optional) The name of the realm associated with this aggregator. - * - * @var string|null - */ - protected $realmName = null; + private static $__initialized; - public function __construct() - { - } - - /** - * Update the filter lists associated with this aggregator's realm. - * - * If $realmName has not been set for this aggregator, this will do nothing. - */ - public function updateFilters() - { - if (empty($this->realmName)) { - return; - } + /** + * (Optional) The name of the realm associated with this aggregator. + * + * @var string|null + */ + protected $realmName = null; - $filterListBuilder = new FilterListBuilder(); - $filterListBuilder->buildRealmLists($this->realmName); - } + public function __construct() + { + } - /* - * Replaces all the occurrences of : in $statement with key/value pairs in $params = array('' => 'value',...); - */ - protected function bindParams($statement, $params) - { - foreach($params as $param_key => $param_value) - { - $statement = str_replace(':'.$param_key,$param_value,$statement);//todo: use regex such that prefixes of param names dont get replaced. - } - return $statement; - } - - /* - * Writes the query, after binding the value of all bound params, to outfile. - */ - protected function dumpQuery($outfile, $select_statement, $select_params) - { - file_put_contents($outfile, $this->bindParams($select_statement,$select_params)); - } - - /* - * Returns a sql case statement distributes a stat that was recorded between $s1 and $e1 - * linearly between $s2 and $e2 where duration between $s2 and $e2 is $max because $max is not always $e2 - $s2 - */ - protected function getDistributionSQLCaseStatement($stat, $max, $s1, $e1, $s2, $e2) - { - return "case when ($s1 between $s2 and $e2 and - $e1 between $s2 and $e2 ) - then $stat - when ($s1 < $s2 and - $e1 between $s2 and $e2 ) - then $stat*($e1 - $s2 ) / ($e1 - $s1) - when ($s1 between $s2 and $e2 and - $e1 > $e2 ) - then $stat*( $e2 - $s1) / ($e1 - $s1) - when ($s1 < $s2 and - $e1 > $e2 ) - then $stat*( $max ) / ($e1 - $s1) - else $stat - end"; - } + /** + * Update the filter lists associated with this aggregator's realm. + * + * If $realmName has not been set for this aggregator, this will do nothing. + */ + public function updateFilters() + { + if (empty($this->realmName)) { + return; + } - /* - * Returns a sql case statement distributes a stat that was recorded between $s1 and $e1 - * linearly between $s2 and $e2 where duration between $s2 and $e2 is $max because $max is not always $e2 - $s2 - */ - protected function getDistributionSQLCaseStatementWithDtype($stat, $dtype, $max, $s1, $e1, $s2, $e2) - { - return "case when ($s1 between $s2 and $e2 and - $e1 between $s2 and $e2 ) - then $stat - when ($s1 < $s2 and - $e1 between $s2 and $e2 ) - then CAST( $stat*($e1 - $s2 ) / ($e1 - $s1) AS $dtype ) - when ($s1 between $s2 and $e2 and - $e1 > $e2 ) - then CAST( $stat*( $e2 - $s1) / ($e1 - $s1) AS $dtype ) - when ($s1 < $s2 and - $e1 > $e2 ) - then CAST( $stat*( $max ) / ($e1 - $s1) AS $dtype ) - else $stat - end"; - } - - /* - * Returns a SQL case statement given condition/then/else as strings - */ - protected function getIf($condition, $then, $else) - { - return "case when $condition - then $then - else $else - end"; - } -} //Aggregator + $filterListBuilder = new FilterListBuilder(); + $filterListBuilder->buildRealmLists($this->realmName); + } + + /* + * Replaces all the occurrences of : in $statement with key/value pairs in $params = array('' => 'value',...); + */ + protected function bindParams($statement, $params) + { + foreach($params as $param_key => $param_value) + { + $statement = str_replace(':'.$param_key, $param_value, $statement);//todo: use regex such that prefixes of param names dont get replaced. + } + return $statement; + } + + /* + * Writes the query, after binding the value of all bound params, to outfile. + */ + protected function dumpQuery($outfile, $select_statement, $select_params) + { + file_put_contents($outfile, $this->bindParams($select_statement, $select_params)); + } -?> + /* + * Returns a sql case statement distributes a stat that was recorded between $s1 and $e1 + * linearly between $s2 and $e2 where duration between $s2 and $e2 is $max because $max is not always $e2 - $s2 + */ + protected function getDistributionSQLCaseStatement($stat, $max, $s1, $e1, $s2, $e2) + { + return "case when ($s1 between $s2 and $e2 and + $e1 between $s2 and $e2 ) + then $stat + when ($s1 < $s2 and + $e1 between $s2 and $e2 ) + then $stat*($e1 - $s2 ) / ($e1 - $s1) + when ($s1 between $s2 and $e2 and + $e1 > $e2 ) + then $stat*( $e2 - $s1) / ($e1 - $s1) + when ($s1 < $s2 and + $e1 > $e2 ) + then $stat*( $max ) / ($e1 - $s1) + else $stat + end"; + } + + /* + * Returns a sql case statement distributes a stat that was recorded between $s1 and $e1 + * linearly between $s2 and $e2 where duration between $s2 and $e2 is $max because $max is not always $e2 - $s2 + */ + protected function getDistributionSQLCaseStatementWithDtype($stat, $dtype, $max, $s1, $e1, $s2, $e2) + { + return "case when ($s1 between $s2 and $e2 and + $e1 between $s2 and $e2 ) + then $stat + when ($s1 < $s2 and + $e1 between $s2 and $e2 ) + then CAST( $stat*($e1 - $s2 ) / ($e1 - $s1) AS $dtype ) + when ($s1 between $s2 and $e2 and + $e1 > $e2 ) + then CAST( $stat*( $e2 - $s1) / ($e1 - $s1) AS $dtype ) + when ($s1 < $s2 and + $e1 > $e2 ) + then CAST( $stat*( $max ) / ($e1 - $s1) AS $dtype ) + else $stat + end"; + } + + /* + * Returns a SQL case statement given condition/then/else as strings + */ + protected function getIf($condition, $then, $else) + { + return "case when $condition + then $then + else $else + end"; + } +} //Aggregator From 7d3bd8c2dea8f7ca4f34184c2a6f2eb38cd5ff30 Mon Sep 17 00:00:00 2001 From: Ben Plessinger Date: Tue, 7 Feb 2017 09:21:59 -0500 Subject: [PATCH 2/2] Update Aggregators for off by one --- classes/DB/Aggregator.php | 70 ++++++++++--------- .../statistic_ratio_as_datatype_case.sql | 36 ++++++---- .../etl/etl_macros.d/statistic_ratio_case.sql | 28 +++++--- 3 files changed, 82 insertions(+), 52 deletions(-) diff --git a/classes/DB/Aggregator.php b/classes/DB/Aggregator.php index 207fc9c07e..8353089f2e 100644 --- a/classes/DB/Aggregator.php +++ b/classes/DB/Aggregator.php @@ -64,20 +64,22 @@ protected function dumpQuery($outfile, $select_statement, $select_params) */ protected function getDistributionSQLCaseStatement($stat, $max, $s1, $e1, $s2, $e2) { - return "case when ($s1 between $s2 and $e2 and - $e1 between $s2 and $e2 ) - then $stat - when ($s1 < $s2 and - $e1 between $s2 and $e2 ) - then $stat*($e1 - $s2 ) / ($e1 - $s1) - when ($s1 between $s2 and $e2 and - $e1 > $e2 ) - then $stat*( $e2 - $s1) / ($e1 - $s1) - when ($s1 < $s2 and - $e1 > $e2 ) - then $stat*( $max ) / ($e1 - $s1) - else $stat - end"; + return " + CASE + WHEN ($s1 BETWEEN $s2 AND $e2 AND + $e1 BETWEEN $s2 AND $e2 ) + THEN $stat + WHEN ($s1 < $s2 AND + $e1 BETWEEN $s2 AND $e2 ) + THEN $stat * ($e1 - $s2 + 1) / ($e1 - $s1 + 1 ) + WHEN ($s1 BETWEEN $s2 AND $e2 AND + $e1 > $e2 ) + THEN $stat * ( $e2 - $s1 + 1 ) / ($e1 - $s1 + 1 ) + WHEN ($s1 < $s2 AND + $e1 > $e2 ) + THEN $stat * ( $max ) / ($e1 - $s1 + 1 ) + ELSE $stat + END"; } /* @@ -86,20 +88,22 @@ protected function getDistributionSQLCaseStatement($stat, $max, $s1, $e1, $s2, $ */ protected function getDistributionSQLCaseStatementWithDtype($stat, $dtype, $max, $s1, $e1, $s2, $e2) { - return "case when ($s1 between $s2 and $e2 and - $e1 between $s2 and $e2 ) - then $stat - when ($s1 < $s2 and - $e1 between $s2 and $e2 ) - then CAST( $stat*($e1 - $s2 ) / ($e1 - $s1) AS $dtype ) - when ($s1 between $s2 and $e2 and - $e1 > $e2 ) - then CAST( $stat*( $e2 - $s1) / ($e1 - $s1) AS $dtype ) - when ($s1 < $s2 and - $e1 > $e2 ) - then CAST( $stat*( $max ) / ($e1 - $s1) AS $dtype ) - else $stat - end"; + return " + CASE + WHEN ($s1 BETWEEN $s2 AND $e2 AND + $e1 BETWEEN $s2 AND $e2 ) + THEN $stat + WHEN ($s1 < $s2 AND + $e1 BETWEEN $s2 AND $e2 ) + THEN CAST( $stat * ($e1 - $s2 + 1) / ($e1 - $s1 + 1) AS $dtype ) + WHEN ($s1 BETWEEN $s2 AND $e2 AND + $e1 > $e2 ) + THEN CAST( $stat * ( $e2 - $s1 + 1) / ($e1 - $s1 + 1) AS $dtype ) + WHEN ($s1 < $s2 AND + $e1 > $e2 ) + THEN CAST( $stat * ( $max ) / ($e1 - $s1 + 1) AS $dtype ) + ELSE $stat + END"; } /* @@ -107,9 +111,11 @@ protected function getDistributionSQLCaseStatementWithDtype($stat, $dtype, $max, */ protected function getIf($condition, $then, $else) { - return "case when $condition - then $then - else $else - end"; + return " + CASE + WHEN $condition + THEN $then + ELSE $else + END"; } } //Aggregator diff --git a/configuration/etl/etl_macros.d/statistic_ratio_as_datatype_case.sql b/configuration/etl/etl_macros.d/statistic_ratio_as_datatype_case.sql index 86de527e62..b4fab21b60 100644 --- a/configuration/etl/etl_macros.d/statistic_ratio_as_datatype_case.sql +++ b/configuration/etl/etl_macros.d/statistic_ratio_as_datatype_case.sql @@ -11,7 +11,7 @@ -- -- 2) Source time period overlaps a portion of destination time period. Return (stat * fraction of overlap) -- --- Ss ----- Se OR Ss ----- Se +-- Ss ----- Se OR Ss ----- Se -- Ds ---------- De Ds ---------- De -- -- 3) Source includes destination. Return (stat * max) where max may not be De - Ds. @@ -30,15 +30,27 @@ -- @param $dest_end_ts End time of the destination period -- -------------------------------------------------------------------------------- CAST( -CASE - WHEN (${src_start_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts}) AND (${src_end_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} ) - THEN ${statistic} - WHEN ( ${src_start_ts} < ${dest_start_ts} AND ${src_end_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} ) - THEN ${statistic} * (${src_end_ts} - ${dest_start_ts}) / (${src_end_ts} - ${src_start_ts}) - WHEN ( ${src_start_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} AND ${src_end_ts} > ${dest_end_ts} ) - THEN ${statistic} * ((${dest_end_ts} + 1) - ${src_start_ts}) / (${src_end_ts} - ${src_start_ts}) - WHEN ( ${src_start_ts} < ${dest_start_ts} AND ${src_end_ts} > ${dest_end_ts} ) - THEN ${statistic} * ${max} / (${src_end_ts} - ${src_start_ts}) - ELSE ${statistic} -END + CASE + WHEN ( + ${src_start_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} + AND ${src_end_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} + ) + THEN ${statistic} + WHEN ( + ${src_start_ts} < ${dest_start_ts} + AND ${src_end_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} + ) + THEN ${statistic} * ( ${src_end_ts} - ${dest_start_ts} + 1 ) / ( ${src_end_ts} - ${src_start_ts} + 1 ) + WHEN ( + ${src_start_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} + AND ${src_end_ts} > ${dest_end_ts} + ) + THEN ${statistic} * ( ${dest_end_ts} - ${src_start_ts} + 1 ) / (${src_end_ts} - ${src_start_ts} + 1 ) + WHEN ( + ${src_start_ts} < ${dest_start_ts} + AND ${src_end_ts} > ${dest_end_ts} + ) + THEN ${statistic} * ${max} / ( ${src_end_ts} - ${src_start_ts} + 1 ) + ELSE ${statistic} + END AS ${data_type}) diff --git a/configuration/etl/etl_macros.d/statistic_ratio_case.sql b/configuration/etl/etl_macros.d/statistic_ratio_case.sql index 2b764e0aa3..a21fac1442 100644 --- a/configuration/etl/etl_macros.d/statistic_ratio_case.sql +++ b/configuration/etl/etl_macros.d/statistic_ratio_case.sql @@ -10,7 +10,7 @@ -- -- 2) Source time period overlaps a portion of destination time period. Return (stat * fraction of overlap) -- --- Ss ----- Se OR Ss ----- Se +-- Ss ----- Se OR Ss ----- Se -- Ds ---------- De Ds ---------- De -- -- 3) Source includes destination. Return (stat * max) where max may not be De - Ds. @@ -28,13 +28,25 @@ -- @param $dest_end_ts End time of the destination period -- -------------------------------------------------------------------------------- CASE - WHEN (${src_start_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts}) AND (${src_end_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} ) + WHEN ( + ${src_start_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} + AND ${src_end_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} + ) THEN ${statistic} - WHEN ( ${src_start_ts} < ${dest_start_ts} AND ${src_end_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} ) - THEN ${statistic} * (${src_end_ts} - ${dest_start_ts}) / (${src_end_ts} - ${src_start_ts}) - WHEN ( ${src_start_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} AND ${src_end_ts} > ${dest_end_ts} ) - THEN ${statistic} * ((${dest_end_ts} + 1) - ${src_start_ts}) / (${src_end_ts} - ${src_start_ts}) - WHEN ( ${src_start_ts} < ${dest_start_ts} AND ${src_end_ts} > ${dest_end_ts} ) - THEN ${statistic} * ${max} / (${src_end_ts} - ${src_start_ts}) + WHEN ( + ${src_start_ts} < ${dest_start_ts} + AND ${src_end_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} + ) + THEN ${statistic} * ( ${src_end_ts} - ${dest_start_ts} + 1 ) / ( ${src_end_ts} - ${src_start_ts} + 1 ) + WHEN ( + ${src_start_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} + AND ${src_end_ts} > ${dest_end_ts} + ) + THEN ${statistic} * ( ${dest_end_ts} - ${src_start_ts} + 1 ) / (${src_end_ts} - ${src_start_ts} + 1 ) + WHEN ( + ${src_start_ts} < ${dest_start_ts} + AND ${src_end_ts} > ${dest_end_ts} + ) + THEN ${statistic} * ${max} / ( ${src_end_ts} - ${src_start_ts} + 1 ) ELSE ${statistic} END