Skip to content

Commit

Permalink
Merge pull request #29 from dbecorp/hercules-packages
Browse files Browse the repository at this point in the history
Hercules packages
  • Loading branch information
jakthom authored Oct 23, 2024
2 parents 41da523 + 2f569fe commit 9c89546
Show file tree
Hide file tree
Showing 16 changed files with 323 additions and 175 deletions.
2 changes: 1 addition & 1 deletion .VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.5.3
0.6.0
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,19 @@ globalLabels:
- env: dev
```

### Packages

Hercules includes a yml-based package loader which means extensions, macros, sources, and metrics can be logically grouped and distributed.

Starter packages can be found in the [hercules-packages](/hercules-packages/) directory.

**Example package registration**

```
packages:
- location: hercules-packages/snowflake-performance.yml
```


### Embedded Analytics

Expand Down
40 changes: 35 additions & 5 deletions cmd/hercules/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

"github.com/dbecorp/hercules/pkg/config"
"github.com/dbecorp/hercules/pkg/flock"
herculespackage "github.com/dbecorp/hercules/pkg/herculesPackage"
metrics "github.com/dbecorp/hercules/pkg/metrics"
"github.com/dbecorp/hercules/pkg/middleware"
"github.com/prometheus/client_golang/prometheus"
Expand All @@ -26,6 +27,7 @@ var VERSION string
type Hercules struct {
config config.Config
db *sql.DB
packages []herculespackage.Package
conn *sql.Conn
metricRegistry *metrics.MetricRegistry
}
Expand All @@ -48,21 +50,49 @@ func (d *Hercules) initializeFlock() {
d.db, d.conn = flock.InitializeDB(d.config)
}

func (d *Hercules) initializeSources() {
for _, source := range d.config.Sources {
source.InitializeWithConnection(d.conn)
func (d *Hercules) loadPackages() {
pkgs := []herculespackage.Package{}
for _, pkgConfig := range d.config.Packages {
pkg, err := pkgConfig.GetPackage()
if err != nil {
log.Error().Err(err).Msg("could not get package")
}
pkgs = append(pkgs, pkg)
}
// Represent core configuration via a package
pkgs = append(pkgs, herculespackage.Package{
Name: "core",
Version: "1.0.0",
Extensions: d.config.Extensions,
Macros: d.config.Macros,
Sources: d.config.Sources,
Metrics: d.config.Metrics,
})
d.packages = pkgs

}

func (d *Hercules) initializePackages() {
for _, p := range d.packages {
p.InitializeWithConnection(d.conn)
}
}

func (d *Hercules) initializeRegistry() {
d.metricRegistry = metrics.NewMetricRegistry(d.config.Metrics, d.config.InstanceLabels())
// Merge metric definitions from all packages
metricDefinitions := metrics.MetricDefinitions{}
for _, pkg := range d.packages {
metricDefinitions.Merge(pkg.Metrics)
}
d.metricRegistry = metrics.NewMetricRegistry(metricDefinitions, d.config.InstanceLabels())
}

func (d *Hercules) Initialize() {
log.Debug().Msg("initializing Hercules")
d.configure()
d.initializeFlock()
d.initializeSources()
d.loadPackages()
d.initializePackages()
d.initializeRegistry()
log.Debug().Interface("config", d.config).Msg("running with config")
}
Expand Down
18 changes: 18 additions & 0 deletions hercules-packages/example-nyc-taxi.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: nyc-taxi
version: 1.0.0

sources:
- name: nyc_yellow_taxi_june_2024
type: parquet
source: https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2024-07.parquet
materialize: true
refreshIntervalSeconds: 100

metrics:
gauge:
- name: nyc_pickup_location_fare_total
help: Total NYC fares for the month of August by pickup location
enabled: True
sql: select struct_pack(pickupLocation := PULocationID::text), sum(fare_amount) as val from nyc_yellow_taxi_june_2024 group by 1
labels:
- pickupLocation
2 changes: 2 additions & 0 deletions hercules-packages/example-tpch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name: tpch
version: 1.0.0
120 changes: 120 additions & 0 deletions hercules-packages/snowflake-performance.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
name: snowflake-performance
version: 1.0.0

extensions:
core:
- name: inet

community:
- name: chsql # Clickhouse macros and functions

macros:
- sql: create or replace macro one() AS (SELECT 1);

sources:
- name: snowflake_query_history
type: parquet
source: assets/snowflake_query_history.parquet
materialize: true
refreshIntervalSeconds: 5

metrics:
gauge:
- name: query_status_count
help: Queries executed and their associated status, by user and warehouse
enabled: true
sql: from snowflake_query_history select struct_pack(user := user_name, warehouse := warehouse_name, status := lower(execution_status)) as labels, count(*) as value group by 1;
labels:
- user
- warehouse
- status

- name: queries_this_week_total
help: Queries this week total, by user and warehouse
enabled: true
sql: select struct_pack(user := user_name, warehouse := warehouse_name) as labels, count(*) as value from snowflake_query_history group by 1;
labels:
- user
- warehouse

- name: avg_query_duration_seconds
help: The average query duration for a particular user, using a particular warehouse
enabled: true
sql: select struct_pack(user := user_name, warehouse := warehouse_name) as labels, avg(TOTAL_ELAPSED_TIME) as value from snowflake_query_history group by 1;
labels:
- user
- warehouse

- name: table_operations_count
help: The number of operations on each table over the last week
enabled: true
sql: select struct_pack(user := user_name, query_type := query_type) as labels, count(*) as value from snowflake_query_history group by 1;
labels:
- user
- query_type

- name: avg_virtual_warehouse_spill_to_local_storage_bytes
help: The average bytes spilled to disk for queries on a specific warehouse
enabled: true
sql: select struct_pack(user := user_name, warehouse := warehouse_name) as labels, avg(BYTES_SPILLED_TO_LOCAL_STORAGE) as value from snowflake_query_history group by 1;
labels:
- user
- warehouse

- name: avg_virtual_warehouse_spill_to_remote_storage_bytes
help: The average bytes spilled to remote disk for queries on a specific warehouse
enabled: true
sql: select struct_pack(user := user_name, warehouse := warehouse_name) as labels, avg(BYTES_SPILLED_TO_REMOTE_STORAGE) as value from snowflake_query_history group by 1;
labels:
- user
- warehouse

histogram:
- name: query_duration_seconds
help: Histogram of query duration seconds
sql: select struct_pack(user := user_name, warehouse := warehouse_name) as labels, total_elapsed_time as value from snowflake_query_history;
labels:
- user
- warehouse
buckets:
- 0.1
- 0.5
- 1
- 2
- 4
- 8
- 16
- 32
- 64
- 128
- 256
- 512
- 1024
- 2048
- 4096
- 8192
- 16384
- 32768

summary:
- name: virtual_warehouse_query_duration_seconds
help: Summary of query duration seconds
sql: select struct_pack(user := user_name, warehouse := warehouse_name) as labels, total_elapsed_time as value from snowflake_query_history;
labels:
- user
- warehouse
objectives:
- 0.001
- 0.05
- 0.01
- 0.5
- 0.9
- 0.99

counter:
- name: queries_executed_count
help: The count of queries executed by user and warehouse
sql: select struct_pack(user := user_name, warehouse := warehouse_name) as labels, 1 as value from snowflake_query_history;
labels:
- user
- warehouse
2 changes: 2 additions & 0 deletions hercules-packages/snowflake-security.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name: snowflake-security
version: 1.0.0
Loading

0 comments on commit 9c89546

Please sign in to comment.