Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
ruslandoga committed Mar 11, 2024
1 parent 0353680 commit e421301
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 89 deletions.
99 changes: 12 additions & 87 deletions lib/plausible/exports.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,90 +5,6 @@ defmodule Plausible.Exports do

import Ecto.Query

# TODO time_on_page
# TODO sampling
# TODO export_visitors_q only can use just sessions_v2?

# @sample to_string(2_000_000)

def export_import do
### clean

imported_com = Plausible.Repo.get_by!(Plausible.Site, domain: "imported.com")
Plausible.Repo.delete_all(where("site_imports", site_id: ^imported_com.id))

Enum.each(Plausible.Imported.tables(), fn table ->
Plausible.ImportDeletionRepo.query!(
"ALTER TABLE {$0:Identifier} DELETE WHERE site_id = {$1:UInt64}",
[table, imported_com.id],
settings: [mutations_sync: 1]
)
end)

Plausible.Sites.clear_stats_start_date!(imported_com)

if File.exists?("Plausible") do
File.ls!("Plausible")
|> Enum.each(fn file ->
File.rm!("Plausible/" <> file)
end)

File.rmdir!("Plausible")
end

File.rm("Plausible.zip")

### export

task =
Task.async(fn ->
{:ok, ch} =
Plausible.ClickhouseRepo.config()
|> Keyword.replace!(:pool_size, 1)
|> Ch.start_link()

DBConnection.run(
ch,
fn conn ->
conn
|> stream_archive(
export_queries(_plausible_io = 37, extname: ".csv"),
format: "CSVWithNames"
)
|> Stream.into(File.stream!("Plausible.zip"))
|> Stream.run()
end,
timeout: :infinity
)

IO.inspect(Float.round(File.stat!("Plausible.zip").size / 1_000_000, 2),
label: "Plausible.zip size (MB)"
)
end)

Task.await(task, :infinity)

### import

{:ok, files} = :zip.unzip(~c"Plausible.zip", cwd: ~c"Plausible")

uploads =
Enum.map(files, fn ~c"Plausible/" ++ file ->
key = "#{imported_com.id}/#{file}"
ExAws.request!(ExAws.S3.put_object("imports", key, File.read!("Plausible/#{file}")))
%{"filename" => file, "s3_url" => "http://172.17.0.1:6000/imports/" <> key}
end)

user = Plausible.Repo.get_by!(Plausible.Auth.User, email: "[email protected]")

{:ok, _job} =
Plausible.Imported.CSVImporter.new_import(imported_com, user,
start_date: ~D[1970-01-01],
end_date: ~D[1970-01-01],
uploads: uploads
)
end

@doc """
Builds Ecto queries to export data from `events_v2` and `sessions_v2`
tables into the format of `imported_*` tables for a website.
Expand Down Expand Up @@ -142,7 +58,6 @@ defmodule Plausible.Exports do
end

defmacrop bounces(t) do
# TODO multiply by sample_factor?
quote do
selected_as(
fragment("greatest(sum(?*?),0)", unquote(t).sign, unquote(t).is_bounce),
Expand All @@ -155,6 +70,7 @@ defmodule Plausible.Exports do
def export_visitors_q(site_id) do
visitors_sessions_q =
from s in "sessions_v2",
# NOTE: no smapling is used right now
# hints: ["SAMPLE", unsafe_fragment(^@sample)],
where: s.site_id == ^site_id,
group_by: selected_as(:date),
Expand All @@ -163,12 +79,13 @@ defmodule Plausible.Exports do
bounces: bounces(s),
visits: visits(s),
visit_duration: visit_duration(s)
# TODO
# NOTE: can we use just sessions_v2 table in this query? sum(pageviews) and visitors(s)?
# visitors: visitors(s)
}

visitors_events_q =
from e in "events_v2",
# NOTE: no smapling is used right now
# hints: ["SAMPLE", unsafe_fragment(^@sample)],
where: e.site_id == ^site_id,
group_by: selected_as(:date),
Expand Down Expand Up @@ -204,6 +121,7 @@ defmodule Plausible.Exports do
@spec export_sources_q(pos_integer) :: Ecto.Query.t()
def export_sources_q(site_id) do
from s in "sessions_v2",
# NOTE: no smapling is used right now
# hints: ["SAMPLE", unsafe_fragment(^@sample)],
where: s.site_id == ^site_id,
group_by: [
Expand Down Expand Up @@ -233,6 +151,7 @@ defmodule Plausible.Exports do
def export_pages_q(site_id) do
window_q =
from e in "events_v2",
# NOTE: no smapling is used right now
# hints: ["SAMPLE", unsafe_fragment(^@sample)],
where: e.site_id == ^site_id,
select: %{
Expand Down Expand Up @@ -262,7 +181,7 @@ defmodule Plausible.Exports do
fragment("toUInt64(round(countIf(?='pageview')*any(_sample_factor)))", e.name),
:pageviews
),
# TODO are exits pageviews or any events?
# NOTE: are exits pageviews or any events?
selected_as(
fragment("toUInt64(round(countIf(?=0)*any(_sample_factor)))", e.next_timestamp),
:exits
Expand All @@ -277,6 +196,7 @@ defmodule Plausible.Exports do
@spec export_entry_pages_q(pos_integer) :: Ecto.Query.t()
def export_entry_pages_q(site_id) do
from s in "sessions_v2",
# NOTE: no smapling is used right now
# hints: ["SAMPLE", unsafe_fragment(^@sample)],
where: s.site_id == ^site_id,
group_by: [selected_as(:date), s.entry_page],
Expand All @@ -297,6 +217,7 @@ defmodule Plausible.Exports do
@spec export_exit_pages_q(pos_integer) :: Ecto.Query.t()
def export_exit_pages_q(site_id) do
from s in "sessions_v2",
# NOTE: no smapling is used right now
# hints: ["SAMPLE", unsafe_fragment(^@sample)],
where: s.site_id == ^site_id,
group_by: [selected_as(:date), s.exit_page],
Expand All @@ -315,6 +236,7 @@ defmodule Plausible.Exports do
@spec export_locations_q(pos_integer) :: Ecto.Query.t()
def export_locations_q(site_id) do
from s in "sessions_v2",
# NOTE: no smapling is used right now
# hints: ["SAMPLE", unsafe_fragment(^@sample)],
where: s.site_id == ^site_id,
where: s.city_geoname_id != 0 and s.country_code != "\0\0" and s.country_code != "ZZ",
Expand All @@ -335,6 +257,7 @@ defmodule Plausible.Exports do
@spec export_devices_q(pos_integer) :: Ecto.Query.t()
def export_devices_q(site_id) do
from s in "sessions_v2",
# NOTE: no smapling is used right now
# hints: ["SAMPLE", unsafe_fragment(^@sample)],
where: s.site_id == ^site_id,
group_by: [selected_as(:date), s.screen_size],
Expand All @@ -352,6 +275,7 @@ defmodule Plausible.Exports do
@spec export_browsers_q(pos_integer) :: Ecto.Query.t()
def export_browsers_q(site_id) do
from s in "sessions_v2",
# NOTE: no smapling is used right now
# hints: ["SAMPLE", unsafe_fragment(^@sample)],
where: s.site_id == ^site_id,
group_by: [selected_as(:date), s.browser],
Expand All @@ -369,6 +293,7 @@ defmodule Plausible.Exports do
@spec export_operating_systems_q(pos_integer) :: Ecto.Query.t()
def export_operating_systems_q(site_id) do
from s in "sessions_v2",
# NOTE: no smapling is used right now
# hints: ["SAMPLE", unsafe_fragment(^@sample)],
where: s.site_id == ^site_id,
group_by: [selected_as(:date), s.operating_system],
Expand Down
1 change: 0 additions & 1 deletion mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ defmodule Plausible.MixProject do
{:bypass, "~> 2.1", only: [:dev, :test, :small_test]},
{:cachex, "~> 3.4"},
{:ecto_ch, "~> 0.3"},
{:ch, "~> 0.2.5-rc.0"},
{:cloak, "~> 1.1"},
{:cloak_ecto, "~> 1.2"},
{:combination, "~> 0.0.3"},
Expand Down
2 changes: 1 addition & 1 deletion mix.lock
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"cachex": {:hex, :cachex, "3.6.0", "14a1bfbeee060dd9bec25a5b6f4e4691e3670ebda28c8ba2884b12fe30b36bf8", [:mix], [{:eternal, "~> 1.2", [hex: :eternal, repo: "hexpm", optional: false]}, {:jumper, "~> 1.0", [hex: :jumper, repo: "hexpm", optional: false]}, {:sleeplocks, "~> 1.1", [hex: :sleeplocks, repo: "hexpm", optional: false]}, {:unsafe, "~> 1.0", [hex: :unsafe, repo: "hexpm", optional: false]}], "hexpm", "ebf24e373883bc8e0c8d894a63bbe102ae13d918f790121f5cfe6e485cc8e2e2"},
"castore": {:hex, :castore, "1.0.5", "9eeebb394cc9a0f3ae56b813459f990abb0a3dedee1be6b27fdb50301930502f", [:mix], [], "hexpm", "8d7c597c3e4a64c395980882d4bca3cebb8d74197c590dc272cfd3b6a6310578"},
"certifi": {:hex, :certifi, "2.12.0", "2d1cca2ec95f59643862af91f001478c9863c2ac9cb6e2f89780bfd8de987329", [:rebar3], [], "hexpm", "ee68d85df22e554040cdb4be100f33873ac6051387baf6a8f6ce82272340ff1c"},
"ch": {:hex, :ch, "0.2.5-rc.0", "c9a6faed74b6ffefec83cafff49ee0bdc38fb34a6b021b8e0a4819e1cb7b9b80", [:mix], [{:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:ecto, "~> 3.5", [hex: :ecto, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mint, "~> 1.0", [hex: :mint, repo: "hexpm", optional: false]}], "hexpm", "ca7b7a1c79d6b61ad42db16d1857a6bc93051d443b25dc620b9c997778d19525"},
"ch": {:hex, :ch, "0.2.5", "b8d70689951bd14c8c8791dc72cdc957ba489ceae723e79cf1a91d95b6b855ae", [:mix], [{:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:ecto, "~> 3.5", [hex: :ecto, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mint, "~> 1.0", [hex: :mint, repo: "hexpm", optional: false]}], "hexpm", "97de104c8f513a23c6d673da37741f68ae743f6cdb654b96a728d382e2fba4de"},
"chatterbox": {:hex, :ts_chatterbox, "0.15.1", "5cac4d15dd7ad61fc3c4415ce4826fc563d4643dee897a558ec4ea0b1c835c9c", [:rebar3], [{:hpack, "~> 0.3.0", [hex: :hpack_erl, repo: "hexpm", optional: false]}], "hexpm", "4f75b91451338bc0da5f52f3480fa6ef6e3a2aeecfc33686d6b3d0a0948f31aa"},
"cldr_utils": {:hex, :cldr_utils, "2.24.2", "364fa30be55d328e704629568d431eb74cd2f085752b27f8025520b566352859", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:certifi, "~> 2.5", [hex: :certifi, repo: "hexpm", optional: true]}, {:decimal, "~> 1.9 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}], "hexpm", "3362b838836a9f0fa309de09a7127e36e67310e797d556db92f71b548832c7cf"},
"cloak": {:hex, :cloak, "1.1.2", "7e0006c2b0b98d976d4f559080fabefd81f0e0a50a3c4b621f85ceeb563e80bb", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "940d5ac4fcd51b252930fd112e319ea5ae6ab540b722f3ca60a85666759b9585"},
Expand Down

0 comments on commit e421301

Please sign in to comment.