Skip to content

Commit

Permalink
user metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
jay-dhanwant-yral committed Aug 28, 2024
1 parent c227000 commit b72e3a8
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 26 deletions.
32 changes: 18 additions & 14 deletions global_video_stats/ds__global_video_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ def create_initial_query():
STDDEV(user_normalized_share_perc) AS global_stddev_user_normalized_shares,
AVG(user_normalized_watch_percentage_perc) AS global_avg_user_normalized_watch_percentage,
STDDEV(user_normalized_watch_percentage_perc) AS global_stddev_user_normalized_watch_percentage,
SUM(total_impressions) AS total_impressions
SUM(total_impressions) AS total_impressions,
MAX(last_update_timestamp) AS last_update_timestamp
FROM
`hot-or-not-feed-intelligence.yral_ds.video_statistics`;
"""
Expand All @@ -51,43 +52,46 @@ def create_incremental_query():
STDDEV(user_normalized_share_perc) AS global_stddev_user_normalized_shares,
AVG(user_normalized_watch_percentage_perc) AS global_avg_user_normalized_watch_percentage,
STDDEV(user_normalized_watch_percentage_perc) AS global_stddev_user_normalized_watch_percentage,
SUM(total_impressions) AS total_impressions
SUM(total_impressions) AS total_impressions,
MAX(last_update_timestamp) AS last_update_timestamp
FROM
`hot-or-not-feed-intelligence.yral_ds.video_statistics`
WHERE
last_update_timestamp > (SELECT MAX(last_update_timestamp) FROM `hot-or-not-feed-intelligence.yral_ds.global_video_stats`)
AND total_impressions IS NOT NULL
) S
ON TRUE -- Always match to update the global stats
WHEN MATCHED THEN
UPDATE SET
T.global_avg_user_normalized_likes = (T.global_avg_user_normalized_likes * T.total_impressions + S.global_avg_user_normalized_likes * S.total_impressions) / (T.total_impressions + S.total_impressions),
T.global_stddev_user_normalized_likes = SQRT(
T.global_avg_user_normalized_likes = IFNULL((T.global_avg_user_normalized_likes * T.total_impressions + S.global_avg_user_normalized_likes * S.total_impressions) / (T.total_impressions + S.total_impressions), T.global_avg_user_normalized_likes),
T.global_stddev_user_normalized_likes = IFNULL(SQRT(
(
(T.total_impressions - 1) * POW(T.global_stddev_user_normalized_likes, 2) +
(S.total_impressions - 1) * POW(S.global_stddev_user_normalized_likes, 2) +
(T.total_impressions * S.total_impressions / (T.total_impressions + S.total_impressions)) * POW(T.global_avg_user_normalized_likes - S.global_avg_user_normalized_likes, 2)
) / (T.total_impressions + S.total_impressions - 1)
),
T.global_avg_user_normalized_shares = (T.global_avg_user_normalized_shares * T.total_impressions + S.global_avg_user_normalized_shares * S.total_impressions) / (T.total_impressions + S.total_impressions),
T.global_stddev_user_normalized_shares = SQRT(
), T.global_stddev_user_normalized_likes),
T.global_avg_user_normalized_shares = IFNULL((T.global_avg_user_normalized_shares * T.total_impressions + S.global_avg_user_normalized_shares * S.total_impressions) / (T.total_impressions + S.total_impressions), T.global_avg_user_normalized_shares),
T.global_stddev_user_normalized_shares = IFNULL(SQRT(
(
(T.total_impressions - 1) * POW(T.global_stddev_user_normalized_shares, 2) +
(S.total_impressions - 1) * POW(S.global_stddev_user_normalized_shares, 2) +
(T.total_impressions * S.total_impressions / (T.total_impressions + S.total_impressions)) * POW(T.global_avg_user_normalized_shares - S.global_avg_user_normalized_shares, 2)
) / (T.total_impressions + S.total_impressions - 1)
),
T.global_avg_user_normalized_watch_percentage = (T.global_avg_user_normalized_watch_percentage * T.total_impressions + S.global_avg_user_normalized_watch_percentage * S.total_impressions) / (T.total_impressions + S.total_impressions),
T.global_stddev_user_normalized_watch_percentage = SQRT(
), T.global_stddev_user_normalized_shares),
T.global_avg_user_normalized_watch_percentage = IFNULL((T.global_avg_user_normalized_watch_percentage * T.total_impressions + S.global_avg_user_normalized_watch_percentage * S.total_impressions) / (T.total_impressions + S.total_impressions), T.global_avg_user_normalized_watch_percentage),
T.global_stddev_user_normalized_watch_percentage = IFNULL(SQRT(
(
(T.total_impressions - 1) * POW(T.global_stddev_user_normalized_watch_percentage, 2) +
(S.total_impressions - 1) * POW(S.global_stddev_user_normalized_watch_percentage, 2) +
(T.total_impressions * S.total_impressions / (T.total_impressions + S.total_impressions)) * POW(T.global_avg_user_normalized_watch_percentage - S.global_avg_user_normalized_watch_percentage, 2)
) / (T.total_impressions + S.total_impressions - 1)
),
T.total_impressions = T.total_impressions + S.total_impressions
), T.global_stddev_user_normalized_watch_percentage),
T.total_impressions = IFNULL(T.total_impressions, 0) + IFNULL(S.total_impressions, 0),
T.last_update_timestamp = IFNULL(S.last_update_timestamp, T.last_update_timestamp)
WHEN NOT MATCHED THEN
INSERT (global_avg_user_normalized_likes, global_stddev_user_normalized_likes, global_avg_user_normalized_shares, global_stddev_user_normalized_shares, global_avg_user_normalized_watch_percentage, global_stddev_user_normalized_watch_percentage, total_impressions)
VALUES (S.global_avg_user_normalized_likes, S.global_stddev_user_normalized_likes, S.global_avg_user_normalized_shares, S.global_stddev_user_normalized_shares, S.global_avg_user_normalized_watch_percentage, S.global_stddev_user_normalized_watch_percentage, S.total_impressions);
INSERT (global_avg_user_normalized_likes, global_stddev_user_normalized_likes, global_avg_user_normalized_shares, global_stddev_user_normalized_shares, global_avg_user_normalized_watch_percentage, global_stddev_user_normalized_watch_percentage, total_impressions, last_update_timestamp)
VALUES (S.global_avg_user_normalized_likes, S.global_stddev_user_normalized_likes, S.global_avg_user_normalized_shares, S.global_stddev_user_normalized_shares, S.global_avg_user_normalized_watch_percentage, S.global_stddev_user_normalized_watch_percentage, S.total_impressions, S.last_update_timestamp);
"""

def run_query():
Expand Down
18 changes: 9 additions & 9 deletions normalized_video_statistics/ds__normalized_video_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,9 @@ def create_incremental_query():
normalized_stats AS (
SELECT
vs.video_id,
(vs.user_normalized_like_perc - gs.global_avg_user_normalized_likes) / gs.global_stddev_user_normalized_likes AS normalized_like_perc,
(vs.user_normalized_share_perc - gs.global_avg_user_normalized_shares) / gs.global_stddev_user_normalized_shares AS normalized_share_perc,
(vs.user_normalized_watch_percentage_perc - gs.global_avg_user_normalized_watch_percentage) / gs.global_stddev_user_normalized_watch_percentage AS normalized_watch_perc,
(vs.user_normalized_like_perc - gs.global_avg_user_normalized_likes) / NULLIF(gs.global_stddev_user_normalized_likes, 0) AS normalized_like_perc,
(vs.user_normalized_share_perc - gs.global_avg_user_normalized_shares) / NULLIF(gs.global_stddev_user_normalized_shares, 0) AS normalized_share_perc,
(vs.user_normalized_watch_percentage_perc - gs.global_avg_user_normalized_watch_percentage) / NULLIF(gs.global_stddev_user_normalized_watch_percentage, 0) AS normalized_watch_perc,
user_normalized_like_perc as like_percentage_un,
user_normalized_share_perc as share_percentage_un,
user_normalized_watch_percentage_perc as watch_percentage_un,
Expand All @@ -111,9 +111,9 @@ def create_incremental_query():
like_percentage_un,
share_percentage_un,
watch_percentage_un,
normalized_like_perc,
normalized_share_perc,
normalized_watch_perc,
IFNULL(normalized_like_perc, 0) AS normalized_like_perc,
IFNULL(normalized_share_perc, 0) AS normalized_share_perc,
IFNULL(normalized_watch_perc, 0) AS normalized_watch_perc,
total_impressions,
last_update_timestamp,
global_avg_user_normalized_likes,
Expand All @@ -131,9 +131,9 @@ def create_incremental_query():
T.like_percentage_un = (T.like_percentage_un * T.total_impressions + S.like_percentage_un * S.total_impressions) / (T.total_impressions + S.total_impressions),
T.share_percentage_un = (T.share_percentage_un * T.total_impressions + S.share_percentage_un * S.total_impressions) / (T.total_impressions + S.total_impressions),
T.watch_percentage_un = (T.watch_percentage_un * T.total_impressions + S.watch_percentage_un * S.total_impressions) / (T.total_impressions + S.total_impressions),
T.normalized_like_perc = (T.like_percentage_un - S.global_avg_user_normalized_likes) / S.global_stddev_user_normalized_likes,
T.normalized_share_perc = (T.share_percentage_un - S.global_avg_user_normalized_shares) / S.global_stddev_user_normalized_shares,
T.normalized_watch_perc = (T.watch_percentage_un - S.global_avg_user_normalized_watch_percentage) / S.global_stddev_user_normalized_watch_percentage,
T.normalized_like_perc = IFNULL((T.like_percentage_un - S.global_avg_user_normalized_likes) / NULLIF(S.global_stddev_user_normalized_likes, 0), 0),
T.normalized_share_perc = IFNULL((T.share_percentage_un - S.global_avg_user_normalized_shares) / NULLIF(S.global_stddev_user_normalized_shares, 0), 0),
T.normalized_watch_perc = IFNULL((T.watch_percentage_un - S.global_avg_user_normalized_watch_percentage) / NULLIF(S.global_stddev_user_normalized_watch_percentage, 0), 0),
T.total_impressions = T.total_impressions + S.total_impressions,
T.last_update_timestamp = S.last_update_timestamp
WHEN NOT MATCHED THEN
Expand Down
6 changes: 3 additions & 3 deletions user_video_relation/ds__user_video_relation.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def create_initial_query():
analytics_335143420.test_events_analytics -- base analytics table -- change this if the table name changes
WHERE
event = 'video_duration_watched'
AND CAST(JSON_EXTRACT_SCALAR(params, '$.percentage_watched') AS FLOAT64) <= 100
AND CAST(JSON_EXTRACT_SCALAR(params, '$.percentage_watched') AS FLOAT64) <= 100 -- there is some issue if this is greater than 100
GROUP BY
user_id, video_id
),
Expand Down Expand Up @@ -100,7 +100,7 @@ def create_initial_query():
ON
vw.user_id = vs.user_id
AND vw.video_id = vs.video_id
order by last_watched_timestamp desc;
order by last_watched_timestamp desc; -- unit tests -- per video id & per user id
"""

def create_incremental_query(last_timestamp):
Expand All @@ -118,7 +118,7 @@ def create_incremental_query(last_timestamp):
WHERE
event = 'video_duration_watched'
AND timestamp > '{last_timestamp}'
AND CAST(JSON_EXTRACT_SCALAR(params, '$.percentage_watched') AS FLOAT64) <= 100
AND CAST(JSON_EXTRACT_SCALAR(params, '$.percentage_watched') AS FLOAT64) <= 100 -- there is some issue if this is greater than 100
GROUP BY
user_id, video_id
),
Expand Down

0 comments on commit b72e3a8

Please sign in to comment.