From 583b1c57266ad5163a91dc68a263ba7634e20bab Mon Sep 17 00:00:00 2001 From: Anirudh Pillai Date: Wed, 5 Feb 2025 13:47:19 +0000 Subject: [PATCH] fix(trends): Fixed trends first time user matching event (#28325) --- .../insights/trends/aggregation_operations.py | 4 +- .../trends/test/test_trends_query_runner.py | 120 ++++++++++++++++++ .../insights/trends/trends_query_builder.py | 2 - .../insights/utils/aggregations.py | 5 +- 4 files changed, 125 insertions(+), 6 deletions(-) diff --git a/posthog/hogql_queries/insights/trends/aggregation_operations.py b/posthog/hogql_queries/insights/trends/aggregation_operations.py index c42674adbd8a3..296fd1be6cc70 100644 --- a/posthog/hogql_queries/insights/trends/aggregation_operations.py +++ b/posthog/hogql_queries/insights/trends/aggregation_operations.py @@ -118,7 +118,7 @@ def is_active_users_math(self): return self.series.math in ["weekly_active", "monthly_active"] def is_first_time_ever_math(self): - return self.series.math == "first_time_for_user" + return self.series.math in {"first_time_for_user", "first_matching_event_for_user"} def is_first_matching_event(self): return self.series.math == "first_matching_event_for_user" @@ -465,7 +465,6 @@ def get_first_time_math_query_orchestrator( events_where_clause: ast.Expr, sample_value: ast.RatioExpr, event_name_filter: ast.Expr | None = None, - is_first_matching_event: bool = False, ): date_placeholders = self.query_date_range.to_placeholders() date_from = parse_expr( @@ -479,6 +478,7 @@ def get_first_time_math_query_orchestrator( events_query = ast.SelectQuery(select=[]) parent_select = self._first_time_parent_query(events_query) + is_first_matching_event = self.is_first_matching_event() class QueryOrchestrator: events_query_builder: FirstTimeForUserEventsQueryAlternator diff --git a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py index 396fcaeed5f5f..e246d9e5ef43a 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py @@ -5022,6 +5022,13 @@ def test_trends_aggregation_first_matching_event_for_user(self): distinct_ids=["p1"], properties={}, ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-06T12:00:00Z", + properties={"$browser": "Firefox"}, + ) _create_event( team=self.team, event="$pageview", @@ -5069,3 +5076,116 @@ def test_trends_aggregation_first_matching_event_for_user(self): assert len(response.results) == 1 assert response.results[0]["count"] == 1 assert response.results[0]["data"] == [0, 0, 1, 0] + + def test_trends_aggregation_first_matching_event_for_user_with_breakdown(self): + _create_person( + team=self.team, + distinct_ids=["p1"], + properties={}, + ) + _create_person( + team=self.team, + distinct_ids=["p2"], + properties={}, + ) + + # these events outside date range should be ignored + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-06T12:00:00Z", + properties={"$browser": "Chrome"}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p2", + timestamp="2020-01-06T12:00:00Z", + properties={"$browser": "Firefox"}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-06T13:00:00Z", + properties={"$browser": "Firefox"}, + ) + + # only this event for p1 should be included + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-08T12:00:00Z", + properties={"$browser": "Chrome"}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-09T12:00:00Z", + properties={"$browser": "Chrome"}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-10T12:00:00Z", + properties={"$browser": "Firefox"}, + ) + + # only this event for p2 should be included + _create_event( + team=self.team, + event="$pageview", + distinct_id="p2", + timestamp="2020-01-10T12:00:00Z", + properties={"$browser": "Firefox"}, + ) + + _create_event( + team=self.team, + event="$pageview", + distinct_id="p1", + timestamp="2020-01-11T12:00:00Z", + properties={"$browser": "Firefox"}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="p2", + timestamp="2020-01-11T12:00:00Z", + properties={"$browser": "Firefox"}, + ) + + flush_persons_and_events() + + response = self._run_trends_query( + "2020-01-08", + "2020-01-11", + IntervalType.DAY, + [ + EventsNode( + event="$pageview", + math=BaseMathType.FIRST_MATCHING_EVENT_FOR_USER, + ) + ], + TrendsFilter(display=ChartDisplayType.ACTIONS_LINE_GRAPH), + BreakdownFilter(breakdown_type=BreakdownType.EVENT, breakdown="$browser"), + ) + + # one for each breakdown value + assert len(response.results) == 2 + + # chrome + assert response.results[0]["breakdown_value"] == "Chrome" + assert response.results[0]["count"] == 1 + # match on 8th (p1) for first day in time range + assert response.results[0]["data"] == [1, 0, 0, 0] + + # firefox + assert response.results[1]["breakdown_value"] == "Firefox" + assert response.results[1]["count"] == 1 + # match on 10th (p2) for third day in time range + assert response.results[1]["data"] == [0, 0, 1, 0] diff --git a/posthog/hogql_queries/insights/trends/trends_query_builder.py b/posthog/hogql_queries/insights/trends/trends_query_builder.py index 1afdc8aee4a2f..78478e3216b2f 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_builder.py +++ b/posthog/hogql_queries/insights/trends/trends_query_builder.py @@ -296,13 +296,11 @@ def _get_events_subquery( # Just complex series aggregation elif self._aggregation_operation.requires_query_orchestration() and ( self._aggregation_operation.is_first_time_ever_math() - or self._aggregation_operation.is_first_matching_event() ): return self._aggregation_operation.get_first_time_math_query_orchestrator( events_where_clause=events_filter, sample_value=self._sample_value(), event_name_filter=self._event_or_action_where_expr(), - is_first_matching_event=self._aggregation_operation.is_first_matching_event(), ).build() elif self._aggregation_operation.requires_query_orchestration(): return self._aggregation_operation.get_actors_query_orchestrator( diff --git a/posthog/hogql_queries/insights/utils/aggregations.py b/posthog/hogql_queries/insights/utils/aggregations.py index bdf30f527d921..3c453c79c4616 100644 --- a/posthog/hogql_queries/insights/utils/aggregations.py +++ b/posthog/hogql_queries/insights/utils/aggregations.py @@ -81,9 +81,10 @@ def _select_expr(self, date_from: ast.Expr, filters: ast.Expr | None = None, is_ aggregation_filters = date_from if filters is None else ast.And(exprs=[date_from, filters]) min_timestamp_expr = ( ast.Call(name="min", args=[ast.Field(chain=["timestamp"])]) - if not is_first_matching_event or filters is None - else ast.Call(name="minIf", args=[ast.Field(chain=["timestamp"]), filters]) + if not is_first_matching_event + else ast.Call(name="minIf", args=[ast.Field(chain=["timestamp"]), aggregation_filters]) ) + return [ ast.Alias( alias="min_timestamp",