From be2bde5d143c699ca7d041dd1aedca949ea6aea4 Mon Sep 17 00:00:00 2001 From: jay-dhanwant Date: Wed, 14 Aug 2024 22:36:36 +0530 Subject: [PATCH] popularity tweak --- .../ds__global_popular_videos_l7d.py | 42 +++++++++++++++--- .../ds__global_popular_videos_l90d.py | 30 ++++++++++--- .../ds__local_popular_videos_l7d.py | 31 ++++++++++--- .../ds__local_popular_videos_l90d.py | 43 ++++++++++++++++--- 4 files changed, 124 insertions(+), 22 deletions(-) diff --git a/global_popular_videos_l7d/ds__global_popular_videos_l7d.py b/global_popular_videos_l7d/ds__global_popular_videos_l7d.py index cc91d78..fcc6121 100644 --- a/global_popular_videos_l7d/ds__global_popular_videos_l7d.py +++ b/global_popular_videos_l7d/ds__global_popular_videos_l7d.py @@ -45,16 +45,48 @@ def send_alert_to_google_chat(): STDDEV(watch_perc) OVER() AS stddev_watch_perc FROM stats +), +stats_with_mean_std AS ( + SELECT + video_id, + like_perc, + watch_perc, + AVG(like_perc) OVER() AS mean_like_perc, + STDDEV(like_perc) OVER() AS stddev_like_perc, + AVG(watch_perc) OVER() AS mean_watch_perc, + STDDEV(watch_perc) OVER() AS stddev_watch_perc + FROM + stats +), +normalized_stats AS ( + SELECT + video_id, + region, + (like_perc - mean_like_perc) / stddev_like_perc AS normalized_like_perc, + (watch_perc - mean_watch_perc) / stddev_watch_perc AS normalized_watch_perc + FROM + stats_with_mean_std +), +offset_stats AS ( + SELECT + video_id, + region, + normalized_like_perc, + normalized_watch_perc, + LEAST(normalized_like_perc, normalized_watch_perc) AS min_normalized_perc + FROM + normalized_stats ) SELECT video_id, - (like_perc - mean_like_perc) / stddev_like_perc AS normalized_like_perc, - (watch_perc - mean_watch_perc) / stddev_watch_perc AS normalized_watch_perc, - 2 / (1 / (3*(like_perc - mean_like_perc + 1e-9) / (stddev_like_perc + 1e-9)) + 1 / ((watch_perc - mean_watch_perc + 1e-9) / (stddev_watch_perc + 1e-9))) AS global_popularity_score + region, + normalized_like_perc, + normalized_watch_perc, + 2 / (1 / (normalized_like_perc - min_normalized_perc + 1 + 1e-9) + 1 / (normalized_watch_perc - min_normalized_perc + 1 + 1e-9)) AS global_popularity_score FROM - stats_with_mean_std + offset_stats ORDER BY - global_popularity_score DESC + region DESC, global_popularity_score DESC """ def create_global_popular_videos_l7d(): diff --git a/global_popular_videos_l90d/ds__global_popular_videos_l90d.py b/global_popular_videos_l90d/ds__global_popular_videos_l90d.py index 5b2242b..f44cec3 100644 --- a/global_popular_videos_l90d/ds__global_popular_videos_l90d.py +++ b/global_popular_videos_l90d/ds__global_popular_videos_l90d.py @@ -44,16 +44,36 @@ def send_alert_to_google_chat(): STDDEV(watch_perc) OVER() AS stddev_watch_perc FROM stats +), +normalized_stats AS ( + SELECT + video_id, + region, + (like_perc - mean_like_perc) / stddev_like_perc AS normalized_like_perc, + (watch_perc - mean_watch_perc) / stddev_watch_perc AS normalized_watch_perc + FROM + stats_with_mean_std +), +offset_stats AS ( + SELECT + video_id, + region, + normalized_like_perc, + normalized_watch_perc, + LEAST(normalized_like_perc, normalized_watch_perc) AS min_normalized_perc + FROM + normalized_stats ) SELECT video_id, - (like_perc - mean_like_perc) / stddev_like_perc AS normalized_like_perc, - (watch_perc - mean_watch_perc) / stddev_watch_perc AS normalized_watch_perc, - 2 / (1 / ((like_perc - mean_like_perc + 1e-9) / (stddev_like_perc + 1e-9)) + 1 / ((watch_perc - mean_watch_perc + 1e-9) / (stddev_watch_perc + 1e-9))) AS global_popularity_score + region, + normalized_like_perc, + normalized_watch_perc, + 2 / (1 / (normalized_like_perc - min_normalized_perc + 1 + 1e-9) + 1 / (normalized_watch_perc - min_normalized_perc + 1 + 1e-9)) AS global_popularity_score FROM - stats_with_mean_std + offset_stats ORDER BY - global_popularity_score DESC + region DESC, global_popularity_score DESC """ def create_global_popular_videos_l90d(): diff --git a/local_popular_videos_l7d/ds__local_popular_videos_l7d.py b/local_popular_videos_l7d/ds__local_popular_videos_l7d.py index 3e21857..45e6dad 100644 --- a/local_popular_videos_l7d/ds__local_popular_videos_l7d.py +++ b/local_popular_videos_l7d/ds__local_popular_videos_l7d.py @@ -58,15 +58,34 @@ def send_alert_to_google_chat(): COALESCE(NULLIF(STDDEV(watch_perc) OVER(PARTITION BY region), 0), 100) AS stddev_watch_perc FROM stats - ) + ), +normalized_stats AS ( + SELECT + video_id, + region, + (like_perc - mean_like_perc) / stddev_like_perc AS normalized_like_perc, + (watch_perc - mean_watch_perc) / stddev_watch_perc AS normalized_watch_perc + FROM + stats_with_mean_std +), +offset_stats AS ( + SELECT + video_id, + region, + normalized_like_perc, + normalized_watch_perc, + LEAST(normalized_like_perc, normalized_watch_perc) AS min_normalized_perc + FROM + normalized_stats +) SELECT - video_id, + video_id, region, - (like_perc - mean_like_perc) / stddev_like_perc AS normalized_like_perc, - (watch_perc - mean_watch_perc) / stddev_watch_perc AS normalized_watch_perc, - 2 / (1 / ((like_perc - mean_like_perc + 1e-9) / (stddev_like_perc + 1e-9)) + 1 / ((watch_perc - mean_watch_perc + 1e-9) / (stddev_watch_perc + 1e-9))) AS local_popularity_score + normalized_like_perc, + normalized_watch_perc, + 2 / (1 / (normalized_like_perc - min_normalized_perc + 1 + 1e-9) + 1 / (normalized_watch_perc - min_normalized_perc + 1 + 1e-9)) AS local_popularity_score FROM - stats_with_mean_std + offset_stats ORDER BY region DESC, local_popularity_score DESC """ diff --git a/local_popular_videos_l90d/ds__local_popular_videos_l90d.py b/local_popular_videos_l90d/ds__local_popular_videos_l90d.py index 66428e7..ef3e366 100644 --- a/local_popular_videos_l90d/ds__local_popular_videos_l90d.py +++ b/local_popular_videos_l90d/ds__local_popular_videos_l90d.py @@ -58,15 +58,46 @@ def send_alert_to_google_chat(): COALESCE(NULLIF(STDDEV(watch_perc) OVER(PARTITION BY region), 0), 100) AS stddev_watch_perc FROM stats - ) + ), +stats_with_mean_std AS ( + SELECT + video_id, + like_perc, + watch_perc, + AVG(like_perc) OVER() AS mean_like_perc, + STDDEV(like_perc) OVER() AS stddev_like_perc, + AVG(watch_perc) OVER() AS mean_watch_perc, + STDDEV(watch_perc) OVER() AS stddev_watch_perc + FROM + stats +), +normalized_stats AS ( + SELECT + video_id, + region, + (like_perc - mean_like_perc) / stddev_like_perc AS normalized_like_perc, + (watch_perc - mean_watch_perc) / stddev_watch_perc AS normalized_watch_perc + FROM + stats_with_mean_std +), +offset_stats AS ( + SELECT + video_id, + region, + normalized_like_perc, + normalized_watch_perc, + LEAST(normalized_like_perc, normalized_watch_perc) AS min_normalized_perc + FROM + normalized_stats +) SELECT - video_id, + video_id, region, - (like_perc - mean_like_perc) / stddev_like_perc AS normalized_like_perc, - (watch_perc - mean_watch_perc) / stddev_watch_perc AS normalized_watch_perc, - 2 / (1 / ((like_perc - mean_like_perc + 1e-9) / (stddev_like_perc + 1e-9)) + 1 / ((watch_perc - mean_watch_perc + 1e-9) / (stddev_watch_perc + 1e-9))) AS local_popularity_score + normalized_like_perc, + normalized_watch_perc, + 2 / (1 / (normalized_like_perc - min_normalized_perc + 1 + 1e-9) + 1 / (normalized_watch_perc - min_normalized_perc + 1 + 1e-9)) AS local_popularity_score FROM - stats_with_mean_std + offset_stats ORDER BY region DESC, local_popularity_score DESC """