Skip to content

Commit

Permalink
Page Scroll Goals (#5029)
Browse files Browse the repository at this point in the history
* migration: add scroll_threshold to goals

* update goal schema

* setup simple UI for creating scroll goals

* add ability to filter and breakdown scroll goals

* fix goals form tests

* add valiation for page path exists

* move todo comments to expression.ex

* move tests

* make it clear that scroll_threshold is optional

* avoid calling Plausible.Goal.type() too many times

* do not consider 255 scroll depth a conversion

* migration: add scroll_threshold to goals

* do not drop the old index yet

* More efficient goals join again

* Refactor: move goals stats code explicitly under Stats.Goals module

* Move code under Plausible.Stats.Goals

* 254 -> 100

* add scroll_threshold field to goal schema + new unique constraint

* adjust test to test what it claims to

* mix format

* add migration

* consider imported query unsupported when page scroll goal filter

* add missing tests

* pattern match imported argument

* silence credo

* Update lib/plausible/stats/sql/expression.ex

Co-authored-by: Karl-Aksel Puulmann <[email protected]>

* use site_imports populated in test setup

---------

Co-authored-by: Karl-Aksel Puulmann <[email protected]>
Co-authored-by: Karl-Aksel Puulmann <[email protected]>
  • Loading branch information
3 people authored Feb 4, 2025
1 parent 15d78bd commit 4d05245
Show file tree
Hide file tree
Showing 19 changed files with 1,066 additions and 91 deletions.
30 changes: 26 additions & 4 deletions lib/plausible/goal/schema.ex
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,19 @@ defmodule Plausible.Goal do
|> cast_assoc(:site)
|> update_leading_slash()
|> validate_event_name_and_page_path()
|> validate_page_path_for_scroll_goal()
|> maybe_put_display_name()
|> unique_constraint(:event_name, name: :goals_event_name_unique)
|> unique_constraint(:page_path, name: :goals_page_path_unique)
|> unique_constraint([:page_path, :scroll_threshold],
name: :goals_page_path_and_scroll_threshold_unique
)
|> unique_constraint(:display_name, name: :goals_site_id_display_name_index)
|> validate_length(:event_name, max: @max_event_name_length)
|> validate_number(:scroll_threshold,
greater_than_or_equal_to: -1,
less_than_or_equal_to: 100,
message: "Should be -1 (missing) or in range [0, 100]"
)
|> check_constraint(:event_name,
name: :check_event_name_or_page_path,
message: "cannot co-exist with page_path"
Expand All @@ -58,9 +63,14 @@ defmodule Plausible.Goal do
goal.display_name
end

@spec type(t()) :: :event | :page
def type(%{event_name: event_name}) when is_binary(event_name), do: :event
def type(%{page_path: page_path}) when is_binary(page_path), do: :page
@spec type(t()) :: :event | :scroll | :page
def type(goal) do
cond do
is_binary(goal.event_name) -> :event
is_binary(goal.page_path) && goal.scroll_threshold > -1 -> :scroll
is_binary(goal.page_path) -> :page
end
end

defp update_leading_slash(changeset) do
case get_field(changeset, :page_path) do
Expand Down Expand Up @@ -90,6 +100,18 @@ defmodule Plausible.Goal do
end
end

defp validate_page_path_for_scroll_goal(changeset) do
scroll_threshold = get_field(changeset, :scroll_threshold)
page_path = get_field(changeset, :page_path)

if scroll_threshold > -1 and is_nil(page_path) do
changeset
|> add_error(:scroll_threshold, "page_path field missing for page scroll goal")
else
changeset
end
end

defp validate_page_path(changeset) do
value = get_field(changeset, :page_path)

Expand Down
2 changes: 1 addition & 1 deletion lib/plausible/stats/filters/query_parser.ex
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ defmodule Plausible.Stats.Filters.QueryParser do

def preload_goals_and_revenue(site, metrics, filters, dimensions) do
preloaded_goals =
Plausible.Goals.Filters.preload_needed_goals(site, dimensions, filters)
Plausible.Stats.Goals.preload_needed_goals(site, dimensions, filters)

{revenue_warning, revenue_currencies} =
preload_revenue(site, preloaded_goals, metrics, dimensions)
Expand Down
14 changes: 0 additions & 14 deletions lib/plausible/stats/filters/utils.ex
Original file line number Diff line number Diff line change
@@ -1,20 +1,6 @@
defmodule Plausible.Stats.Filters.Utils do
@moduledoc false

def split_goals(goals) do
Enum.split_with(goals, fn goal -> Plausible.Goal.type(goal) == :event end)
end

def split_goals_query_expressions(goals) do
{event_goals, pageview_goals} = split_goals(goals)
events = Enum.map(event_goals, fn goal -> goal.event_name end)

page_regexes =
Enum.map(pageview_goals, fn goal -> page_regex(goal.page_path) end)

{events, page_regexes}
end

def page_regex(expr) do
escaped =
expr
Expand Down
103 changes: 81 additions & 22 deletions lib/plausible/goals/filters.ex → lib/plausible/stats/goals.ex
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
defmodule Plausible.Goals.Filters do
@moduledoc false
defmodule Plausible.Stats.Goals do
@moduledoc """
Stats code related to filtering and grouping by goals.
"""

import Ecto.Query
import Plausible.Stats.Filters.Utils, only: [page_regex: 1]
Expand Down Expand Up @@ -59,6 +61,49 @@ defmodule Plausible.Goals.Filters do
end)
end

@type goal_join_data() :: %{
indices: [non_neg_integer()],
types: [String.t()],
event_names_imports: [String.t()],
event_names_by_type: [String.t()],
page_regexes: [String.t()],
scroll_thresholds: [non_neg_integer()]
}

@doc """
Returns data needed to perform a GROUP BY on goals in an ecto query.
"""
@spec goal_join_data(Plausible.Stats.Query.t()) :: goal_join_data()
def goal_join_data(query) do
goals = query.preloaded_goals.matching_toplevel_filters

%{
indices: Enum.with_index(goals, 1) |> Enum.map(fn {_goal, idx} -> idx end),
types: Enum.map(goals, &to_string(Plausible.Goal.type(&1))),
# :TRICKY: This will contain "" for non-event goals
event_names_imports: Enum.map(goals, &to_string(&1.event_name)),
event_names_by_type:
Enum.map(goals, fn goal ->
case Plausible.Goal.type(goal) do
:event -> goal.event_name
:page -> "pageview"
:scroll -> "pageleave"
end
end),
# :TRICKY: event goals are considered to match everything for the sake of efficient queries in query_builder.ex
# See also Plausible.Stats.SQL.Expression#event_goal_join
page_regexes:
Enum.map(goals, fn goal ->
case Plausible.Goal.type(goal) do
:event -> ".?"
:page -> Filters.Utils.page_regex(goal.page_path)
:scroll -> Filters.Utils.page_regex(goal.page_path)
end
end),
scroll_thresholds: Enum.map(goals, & &1.scroll_threshold)
}
end

defp filter_preloaded(goals, filter, clause) do
Enum.filter(goals, fn goal -> matches?(goal, filter, clause) end)
end
Expand Down Expand Up @@ -106,33 +151,47 @@ defmodule Plausible.Goals.Filters do

defp build_condition(filtered_goals, imported?) do
Enum.reduce(filtered_goals, false, fn goal, dynamic_statement ->
case goal do
nil ->
dynamic([e], ^dynamic_statement)

%Plausible.Goal{event_name: event_name} when is_binary(event_name) ->
dynamic([e], e.name == ^event_name or ^dynamic_statement)

%Plausible.Goal{page_path: page_path} when is_binary(page_path) ->
dynamic([e], ^page_filter_condition(page_path, imported?) or ^dynamic_statement)
if is_nil(goal) do
dynamic([e], ^dynamic_statement)
else
type = Plausible.Goal.type(goal)
dynamic([e], ^goal_condition(type, goal, imported?) or ^dynamic_statement)
end
end)
end

defp page_filter_condition(page_path, imported?) do
db_field = page_path_db_field(imported?)
defp goal_condition(:event, goal, _) do
dynamic([e], e.name == ^goal.event_name)
end

page_condition =
if String.contains?(page_path, "*") do
dynamic([e], fragment("match(?, ?)", field(e, ^db_field), ^page_regex(page_path)))
else
dynamic([e], field(e, ^db_field) == ^page_path)
end
defp goal_condition(:scroll, goal, false = _imported?) do
pathname_condition = page_path_condition(goal.page_path, _imported? = false)
name_condition = dynamic([e], e.name == "pageleave")

scroll_condition =
dynamic([e], e.scroll_depth <= 100 and e.scroll_depth >= ^goal.scroll_threshold)

dynamic([e], ^pathname_condition and ^name_condition and ^scroll_condition)
end

defp goal_condition(:page, goal, true = _imported?) do
page_path_condition(goal.page_path, _imported? = true)
end

defp goal_condition(:page, goal, false = _imported?) do
name_condition = dynamic([e], e.name == "pageview")
pathname_condition = page_path_condition(goal.page_path, _imported? = false)

dynamic([e], ^pathname_condition and ^name_condition)
end

defp page_path_condition(page_path, imported?) do
db_field = page_path_db_field(imported?)

if imported? do
dynamic([e], ^page_condition)
if String.contains?(page_path, "*") do
dynamic([e], fragment("match(?, ?)", field(e, ^db_field), ^page_regex(page_path)))
else
dynamic([e], ^page_condition and e.name == "pageview")
dynamic([e], field(e, ^db_field) == ^page_path)
end
end

Expand Down
1 change: 1 addition & 0 deletions lib/plausible/stats/imported/base.ex
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ defmodule Plausible.Stats.Imported.Base do
|> Enum.map(fn
:event -> "imported_custom_events"
:page -> "imported_pages"
:scroll -> nil
end)

case Enum.uniq(table_candidates ++ filter_goal_table_candidates) do
Expand Down
26 changes: 19 additions & 7 deletions lib/plausible/stats/imported/imported.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ defmodule Plausible.Stats.Imported do
import Ecto.Query
import Plausible.Stats.Imported.SQL.Expression

alias Plausible.Stats.Filters
alias Plausible.Stats.Imported
alias Plausible.Stats.Query
alias Plausible.Stats.SQL.QueryBuilder
Expand Down Expand Up @@ -239,16 +238,20 @@ defmodule Plausible.Stats.Imported do
end

def merge_imported(q, site, %Query{dimensions: ["event:goal"]} = query, metrics) do
{events, page_regexes} =
Filters.Utils.split_goals_query_expressions(query.preloaded_goals.matching_toplevel_filters)
goal_join_data = Plausible.Stats.Goals.goal_join_data(query)

Imported.Base.decide_tables(query)
|> Enum.map(fn
"imported_custom_events" ->
Imported.Base.query_imported("imported_custom_events", site, query)
|> where([i], i.visitors > 0)
|> select_merge_as([i], %{
dim0: fragment("-indexOf(?, ?)", type(^events, {:array, :string}), i.name)
dim0:
fragment(
"indexOf(?, ?)",
type(^goal_join_data.event_names_imports, {:array, :string}),
i.name
)
})
|> select_imported_metrics(metrics)
|> group_by([], selected_as(:dim0))
Expand All @@ -260,15 +263,24 @@ defmodule Plausible.Stats.Imported do
|> where(
[i],
fragment(
"notEmpty(multiMatchAllIndices(?, ?) as indices)",
"""
notEmpty(
arrayFilter(
goal_idx -> ?[goal_idx] = 'page' AND match(?, ?[goal_idx]),
?
) as indices
)
""",
type(^goal_join_data.types, {:array, :string}),
i.page,
type(^page_regexes, {:array, :string})
type(^goal_join_data.page_regexes, {:array, :string}),
type(^goal_join_data.indices, {:array, :integer})
)
)
|> join(:inner, [_i], index in fragment("indices"), hints: "ARRAY", on: true)
|> group_by([_i, index], index)
|> select_merge_as([_i, index], %{
dim0: type(fragment("?", index), :integer)
dim0: fragment("CAST(?, 'UInt64')", index)
})
|> select_imported_metrics(metrics)
end)
Expand Down
2 changes: 1 addition & 1 deletion lib/plausible/stats/imported/sql/where_builder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ defmodule Plausible.Stats.Imported.SQL.WhereBuilder do
db_field = Plausible.Stats.Filters.without_prefix(dimension)

if db_field == :goal do
Plausible.Goals.Filters.add_filter(query, filter, imported?: true)
Plausible.Stats.Goals.add_filter(query, filter, imported?: true)
else
mapped_db_field = Map.get(@db_field_mappings, db_field, db_field)

Expand Down
11 changes: 3 additions & 8 deletions lib/plausible/stats/query_runner.ex
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ defmodule Plausible.Stats.QueryRunner do
QueryOptimizer,
QueryResult,
Legacy,
Filters,
SQL,
Util,
Time
Expand Down Expand Up @@ -137,15 +136,11 @@ defmodule Plausible.Stats.QueryRunner do
end

defp dimension_label("event:goal", entry, query) do
{events, paths} = Filters.Utils.split_goals(query.preloaded_goals.matching_toplevel_filters)

goal_index = Map.get(entry, Util.shortname(query, "event:goal"))

# Closely coupled logic with SQL.Expression.event_goal_join/2
cond do
goal_index < 0 -> Enum.at(events, -goal_index - 1) |> Plausible.Goal.display_name()
goal_index > 0 -> Enum.at(paths, goal_index - 1) |> Plausible.Goal.display_name()
end
query.preloaded_goals.matching_toplevel_filters
|> Enum.at(goal_index - 1)
|> Plausible.Goal.display_name()
end

defp dimension_label("time:" <> _ = time_dimension, entry, query) do
Expand Down
23 changes: 16 additions & 7 deletions lib/plausible/stats/sql/expression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ defmodule Plausible.Stats.SQL.Expression do
})
end

# TODO: make it possible to query events from pageleave events (total conversions for page scroll goals)
def event_metric(:events) do
wrap_alias([e], %{
events: fragment("toUInt64(round(countIf(? != 'pageleave') * any(_sample_factor)))", e.name)
Expand Down Expand Up @@ -327,19 +328,27 @@ defmodule Plausible.Stats.SQL.Expression do
def session_metric(:conversion_rate, _query), do: %{}
def session_metric(:group_conversion_rate, _query), do: %{}

defmacro event_goal_join(events, page_regexes) do
defmacro event_goal_join(goal_join_data) do
quote do
fragment(
"""
arrayPushFront(
CAST(multiMatchAllIndices(?, ?) AS Array(Int64)),
-indexOf(?, ?)
arrayIntersect(
multiMatchAllIndices(?, ?),
arrayMap(
(expected_name, threshold, index) -> if(expected_name = ? and ? between threshold and 100, index, -1),
?,
?,
?
)
)
""",
e.pathname,
type(^unquote(page_regexes), {:array, :string}),
type(^unquote(events), {:array, :string}),
e.name
type(^unquote(goal_join_data).page_regexes, {:array, :string}),
e.name,
e.scroll_depth,
type(^unquote(goal_join_data).event_names_by_type, {:array, :string}),
type(^unquote(goal_join_data).scroll_thresholds, {:array, :integer}),
type(^unquote(goal_join_data).indices, {:array, :integer})
)
end
end
Expand Down
12 changes: 5 additions & 7 deletions lib/plausible/stats/sql/query_builder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do
import Plausible.Stats.Imported
import Plausible.Stats.Util

alias Plausible.Stats.{Filters, Query, QueryOptimizer, TableDecider, SQL}
alias Plausible.Stats.{Query, QueryOptimizer, TableDecider, SQL}
alias Plausible.Stats.SQL.Expression

require Plausible.Stats.SQL.Expression
Expand Down Expand Up @@ -138,19 +138,17 @@ defmodule Plausible.Stats.SQL.QueryBuilder do
Enum.reduce(query.dimensions, q, &dimension_group_by(&2, table, query, &1))
end

defp dimension_group_by(q, _table, query, "event:goal" = dimension) do
{events, page_regexes} =
Filters.Utils.split_goals_query_expressions(query.preloaded_goals.matching_toplevel_filters)
defp dimension_group_by(q, :events, query, "event:goal" = dimension) do
goal_join_data = Plausible.Stats.Goals.goal_join_data(query)

from(e in q,
join: goal in Expression.event_goal_join(events, page_regexes),
join: goal in Expression.event_goal_join(goal_join_data),
hints: "ARRAY",
on: true,
select_merge: %{
^shortname(query, dimension) => fragment("?", goal)
},
group_by: goal,
where: goal != 0 and (e.name == "pageview" or goal < 0)
group_by: goal
)
end

Expand Down
Loading

0 comments on commit 4d05245

Please sign in to comment.