Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update getDistributionSQLCaseStatement #47

Merged
merged 2 commits into from
Feb 7, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 105 additions & 101 deletions classes/DB/Aggregator.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,111 +7,115 @@
* @author: Amin Ghadersohi 8/1/2013
*
*/

class Aggregator extends Loggable
{
static $__initialized;

/**
* (Optional) The name of the realm associated with this aggregator.
*
* @var string|null
*/
protected $realmName = null;
private static $__initialized;

public function __construct()
{
}

/**
* Update the filter lists associated with this aggregator's realm.
*
* If $realmName has not been set for this aggregator, this will do nothing.
*/
public function updateFilters()
{
if (empty($this->realmName)) {
return;
}
/**
* (Optional) The name of the realm associated with this aggregator.
*
* @var string|null
*/
protected $realmName = null;

$filterListBuilder = new FilterListBuilder();
$filterListBuilder->buildRealmLists($this->realmName);
}
public function __construct()
{
}

/*
* Replaces all the occurrences of :<key> in $statement with key/value pairs in $params = array('<expression>' => 'value',...);
*/
protected function bindParams($statement, $params)
{
foreach($params as $param_key => $param_value)
{
$statement = str_replace(':'.$param_key,$param_value,$statement);//todo: use regex such that prefixes of param names dont get replaced.
}
return $statement;
}

/*
* Writes the query, after binding the value of all bound params, to outfile.
*/
protected function dumpQuery($outfile, $select_statement, $select_params)
{
file_put_contents($outfile, $this->bindParams($select_statement,$select_params));
}

/*
* Returns a sql case statement distributes a stat that was recorded between $s1 and $e1
* linearly between $s2 and $e2 where duration between $s2 and $e2 is $max because $max is not always $e2 - $s2
*/
protected function getDistributionSQLCaseStatement($stat, $max, $s1, $e1, $s2, $e2)
{
return "case when ($s1 between $s2 and $e2 and
$e1 between $s2 and $e2 )
then $stat
when ($s1 < $s2 and
$e1 between $s2 and $e2 )
then $stat*($e1 - $s2 ) / ($e1 - $s1)
when ($s1 between $s2 and $e2 and
$e1 > $e2 )
then $stat*( $e2 - $s1) / ($e1 - $s1)
when ($s1 < $s2 and
$e1 > $e2 )
then $stat*( $max ) / ($e1 - $s1)
else $stat
end";
}
/**
* Update the filter lists associated with this aggregator's realm.
*
* If $realmName has not been set for this aggregator, this will do nothing.
*/
public function updateFilters()
{
if (empty($this->realmName)) {
return;
}

/*
* Returns a sql case statement distributes a stat that was recorded between $s1 and $e1
* linearly between $s2 and $e2 where duration between $s2 and $e2 is $max because $max is not always $e2 - $s2
*/
protected function getDistributionSQLCaseStatementWithDtype($stat, $dtype, $max, $s1, $e1, $s2, $e2)
{
return "case when ($s1 between $s2 and $e2 and
$e1 between $s2 and $e2 )
then $stat
when ($s1 < $s2 and
$e1 between $s2 and $e2 )
then CAST( $stat*($e1 - $s2 ) / ($e1 - $s1) AS $dtype )
when ($s1 between $s2 and $e2 and
$e1 > $e2 )
then CAST( $stat*( $e2 - $s1) / ($e1 - $s1) AS $dtype )
when ($s1 < $s2 and
$e1 > $e2 )
then CAST( $stat*( $max ) / ($e1 - $s1) AS $dtype )
else $stat
end";
}

/*
* Returns a SQL case statement given condition/then/else as strings
*/
protected function getIf($condition, $then, $else)
{
return "case when $condition
then $then
else $else
end";
}
} //Aggregator
$filterListBuilder = new FilterListBuilder();
$filterListBuilder->buildRealmLists($this->realmName);
}

/*
* Replaces all the occurrences of :<key> in $statement with key/value pairs in $params = array('<expression>' => 'value',...);
*/
protected function bindParams($statement, $params)
{
foreach($params as $param_key => $param_value)
{
$statement = str_replace(':'.$param_key, $param_value, $statement);//todo: use regex such that prefixes of param names dont get replaced.
}
return $statement;
}

/*
* Writes the query, after binding the value of all bound params, to outfile.
*/
protected function dumpQuery($outfile, $select_statement, $select_params)
{
file_put_contents($outfile, $this->bindParams($select_statement, $select_params));
}

?>
/*
* Returns a sql case statement distributes a stat that was recorded between $s1 and $e1
* linearly between $s2 and $e2 where duration between $s2 and $e2 is $max because $max is not always $e2 - $s2
*/
protected function getDistributionSQLCaseStatement($stat, $max, $s1, $e1, $s2, $e2)
{
return "
CASE
WHEN ($s1 BETWEEN $s2 AND $e2 AND
$e1 BETWEEN $s2 AND $e2 )
THEN $stat
WHEN ($s1 < $s2 AND
$e1 BETWEEN $s2 AND $e2 )
THEN $stat * ($e1 - $s2 + 1) / ($e1 - $s1 + 1 )
WHEN ($s1 BETWEEN $s2 AND $e2 AND
$e1 > $e2 )
THEN $stat * ( $e2 - $s1 + 1 ) / ($e1 - $s1 + 1 )
WHEN ($s1 < $s2 AND
$e1 > $e2 )
THEN $stat * ( $max ) / ($e1 - $s1 + 1 )
ELSE $stat
END";
}

/*
* Returns a sql case statement distributes a stat that was recorded between $s1 and $e1
* linearly between $s2 and $e2 where duration between $s2 and $e2 is $max because $max is not always $e2 - $s2
*/
protected function getDistributionSQLCaseStatementWithDtype($stat, $dtype, $max, $s1, $e1, $s2, $e2)
{
return "
CASE
WHEN ($s1 BETWEEN $s2 AND $e2 AND
$e1 BETWEEN $s2 AND $e2 )
THEN $stat
WHEN ($s1 < $s2 AND
$e1 BETWEEN $s2 AND $e2 )
THEN CAST( $stat * ($e1 - $s2 + 1) / ($e1 - $s1 + 1) AS $dtype )
WHEN ($s1 BETWEEN $s2 AND $e2 AND
$e1 > $e2 )
THEN CAST( $stat * ( $e2 - $s1 + 1) / ($e1 - $s1 + 1) AS $dtype )
WHEN ($s1 < $s2 AND
$e1 > $e2 )
THEN CAST( $stat * ( $max ) / ($e1 - $s1 + 1) AS $dtype )
ELSE $stat
END";
}

/*
* Returns a SQL case statement given condition/then/else as strings
*/
protected function getIf($condition, $then, $else)
{
return "
CASE
WHEN $condition
THEN $then
ELSE $else
END";
}
} //Aggregator
36 changes: 24 additions & 12 deletions configuration/etl/etl_macros.d/statistic_ratio_as_datatype_case.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
--
-- 2) Source time period overlaps a portion of destination time period. Return (stat * fraction of overlap)
--
-- Ss ----- Se OR Ss ----- Se
-- Ss ----- Se OR Ss ----- Se
-- Ds ---------- De Ds ---------- De
--
-- 3) Source includes destination. Return (stat * max) where max may not be De - Ds.
Expand All @@ -30,15 +30,27 @@
-- @param $dest_end_ts End time of the destination period
-- --------------------------------------------------------------------------------
CAST(
CASE
WHEN (${src_start_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts}) AND (${src_end_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} )
THEN ${statistic}
WHEN ( ${src_start_ts} < ${dest_start_ts} AND ${src_end_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} )
THEN ${statistic} * (${src_end_ts} - ${dest_start_ts}) / (${src_end_ts} - ${src_start_ts})
WHEN ( ${src_start_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} AND ${src_end_ts} > ${dest_end_ts} )
THEN ${statistic} * ((${dest_end_ts} + 1) - ${src_start_ts}) / (${src_end_ts} - ${src_start_ts})
WHEN ( ${src_start_ts} < ${dest_start_ts} AND ${src_end_ts} > ${dest_end_ts} )
THEN ${statistic} * ${max} / (${src_end_ts} - ${src_start_ts})
ELSE ${statistic}
END
CASE
WHEN (
${src_start_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts}
AND ${src_end_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts}
)
THEN ${statistic}
WHEN (
${src_start_ts} < ${dest_start_ts}
AND ${src_end_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts}
)
THEN ${statistic} * ( ${src_end_ts} - ${dest_start_ts} + 1 ) / ( ${src_end_ts} - ${src_start_ts} + 1 )
WHEN (
${src_start_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts}
AND ${src_end_ts} > ${dest_end_ts}
)
THEN ${statistic} * ( ${dest_end_ts} - ${src_start_ts} + 1 ) / (${src_end_ts} - ${src_start_ts} + 1 )
WHEN (
${src_start_ts} < ${dest_start_ts}
AND ${src_end_ts} > ${dest_end_ts}
)
THEN ${statistic} * ${max} / ( ${src_end_ts} - ${src_start_ts} + 1 )
ELSE ${statistic}
END
AS ${data_type})
28 changes: 20 additions & 8 deletions configuration/etl/etl_macros.d/statistic_ratio_case.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
--
-- 2) Source time period overlaps a portion of destination time period. Return (stat * fraction of overlap)
--
-- Ss ----- Se OR Ss ----- Se
-- Ss ----- Se OR Ss ----- Se
-- Ds ---------- De Ds ---------- De
--
-- 3) Source includes destination. Return (stat * max) where max may not be De - Ds.
Expand All @@ -28,13 +28,25 @@
-- @param $dest_end_ts End time of the destination period
-- --------------------------------------------------------------------------------
CASE
WHEN (${src_start_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts}) AND (${src_end_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} )
WHEN (
${src_start_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts}
AND ${src_end_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts}
)
THEN ${statistic}
WHEN ( ${src_start_ts} < ${dest_start_ts} AND ${src_end_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} )
THEN ${statistic} * (${src_end_ts} - ${dest_start_ts}) / (${src_end_ts} - ${src_start_ts})
WHEN ( ${src_start_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts} AND ${src_end_ts} > ${dest_end_ts} )
THEN ${statistic} * ((${dest_end_ts} + 1) - ${src_start_ts}) / (${src_end_ts} - ${src_start_ts})
WHEN ( ${src_start_ts} < ${dest_start_ts} AND ${src_end_ts} > ${dest_end_ts} )
THEN ${statistic} * ${max} / (${src_end_ts} - ${src_start_ts})
WHEN (
${src_start_ts} < ${dest_start_ts}
AND ${src_end_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts}
)
THEN ${statistic} * ( ${src_end_ts} - ${dest_start_ts} + 1 ) / ( ${src_end_ts} - ${src_start_ts} + 1 )
WHEN (
${src_start_ts} BETWEEN ${dest_start_ts} AND ${dest_end_ts}
AND ${src_end_ts} > ${dest_end_ts}
)
THEN ${statistic} * ( ${dest_end_ts} - ${src_start_ts} + 1 ) / (${src_end_ts} - ${src_start_ts} + 1 )
WHEN (
${src_start_ts} < ${dest_start_ts}
AND ${src_end_ts} > ${dest_end_ts}
)
THEN ${statistic} * ${max} / ( ${src_end_ts} - ${src_start_ts} + 1 )
ELSE ${statistic}
END