Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ai: add ai-stream-status endpoint & client #198

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 117 additions & 0 deletions ai/clickhouse.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package ai

import (
"context"
"crypto/tls"
"fmt"
"math"
"strings"

"github.com/ClickHouse/clickhouse-go/v2"
"github.com/ClickHouse/clickhouse-go/v2/lib/driver"
"github.com/Masterminds/squirrel"
)

const maxClickhouseResultRows = 1000

type AIStreamStatusEventRow struct {
StreamID string `ch:"stream_id"`
AvgInputFPS float64 `ch:"avg_input_fps"`
AvgOutputFPS float64 `ch:"avg_output_fps"`
ErrorCount uint64 `ch:"error_count"`
Errors []string `ch:"errors"`
TotalRestarts uint64 `ch:"total_restarts"`
RestartLogs []string `ch:"restart_logs"`
}

type Clickhouse interface {
QueryAIStreamStatusEvents(ctx context.Context, spec QuerySpec) ([]AIStreamStatusEventRow, error)
}

type ClickhouseOptions struct {
Addr string
User string
Password string
Database string
}

type ClickhouseClient struct {
conn driver.Conn
}

func NewClickhouseConn(opts ClickhouseOptions) (*ClickhouseClient, error) {
conn, err := clickhouse.Open(&clickhouse.Options{
Addr: strings.Split(opts.Addr, ","),
Auth: clickhouse.Auth{
Database: opts.Database,
Username: opts.User,
Password: opts.Password,
},
TLS: &tls.Config{},
})
if err != nil {
return nil, err
}
return &ClickhouseClient{conn: conn}, nil

Check warning on line 55 in ai/clickhouse.go

View check run for this annotation

Codecov / codecov/patch

ai/clickhouse.go#L42-L55

Added lines #L42 - L55 were not covered by tests
}

func (c *ClickhouseClient) QueryAIStreamStatusEvents(ctx context.Context, spec QuerySpec) ([]AIStreamStatusEventRow, error) {
sql, args, err := buildAIStreamStatusEventsQuery(spec)
if err != nil {
return nil, fmt.Errorf("error building AI stream status events query: %w", err)
}
var res []AIStreamStatusEventRow
err = c.conn.Select(ctx, &res, sql, args...)
if err != nil {
return nil, err
}
res = replaceNaN(res)
return res, nil

Check warning on line 69 in ai/clickhouse.go

View check run for this annotation

Codecov / codecov/patch

ai/clickhouse.go#L58-L69

Added lines #L58 - L69 were not covered by tests
}

func buildAIStreamStatusEventsQuery(spec QuerySpec) (string, []interface{}, error) {
query := squirrel.Select(
"stream_id",
"avg(input_fps) as avg_input_fps",
"avg(output_fps) as avg_output_fps",
"countIf(last_error != '') as error_count",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not an entirely correct aggregation, the right way would be aggregating the error events. How hard would that be? I expected the data pipeline itself to be doing that kind of aggregation though, so we're not scanning the table at query time.

"arrayFilter(x -> x != '', groupUniqArray(last_error)) as errors",
"sum(restart_count) as total_restarts",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is also incorrect. restart_count is already cumulative. This should be a max on the simplest solution. Should work.

"arrayFilter(x -> x != '', groupUniqArray(last_restart_logs)) as restart_logs").
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe only keep the last one as well? I think it would be confusing the merge+uniq these logs.

From("stream_status").
GroupBy("stream_id").
Limit(maxClickhouseResultRows + 1)

if spec.Filter.StreamID != "" {
query = query.Where("stream_id = ?", spec.Filter.StreamID)
}

Check warning on line 87 in ai/clickhouse.go

View check run for this annotation

Codecov / codecov/patch

ai/clickhouse.go#L72-L87

Added lines #L72 - L87 were not covered by tests

if spec.From != nil {
query = query.Where("timestamp_ts > ?", spec.From)
}

Check warning on line 91 in ai/clickhouse.go

View check run for this annotation

Codecov / codecov/patch

ai/clickhouse.go#L89-L91

Added lines #L89 - L91 were not covered by tests

if spec.To != nil {
query = query.Where("timestamp_ts < ?", spec.To)
}

Check warning on line 95 in ai/clickhouse.go

View check run for this annotation

Codecov / codecov/patch

ai/clickhouse.go#L93-L95

Added lines #L93 - L95 were not covered by tests

sql, args, err := query.ToSql()
if err != nil {
return "", nil, err
}

Check warning on line 100 in ai/clickhouse.go

View check run for this annotation

Codecov / codecov/patch

ai/clickhouse.go#L97-L100

Added lines #L97 - L100 were not covered by tests

return sql, args, nil

Check warning on line 102 in ai/clickhouse.go

View check run for this annotation

Codecov / codecov/patch

ai/clickhouse.go#L102

Added line #L102 was not covered by tests
}

func replaceNaN(rows []AIStreamStatusEventRow) []AIStreamStatusEventRow {
var res []AIStreamStatusEventRow
for _, r := range rows {
if math.IsNaN(r.AvgInputFPS) {
r.AvgInputFPS = 0.0
}
if math.IsNaN(r.AvgOutputFPS) {
r.AvgOutputFPS = 0.0
}
res = append(res, r)

Check warning on line 114 in ai/clickhouse.go

View check run for this annotation

Codecov / codecov/patch

ai/clickhouse.go#L105-L114

Added lines #L105 - L114 were not covered by tests
}
return res

Check warning on line 116 in ai/clickhouse.go

View check run for this annotation

Codecov / codecov/patch

ai/clickhouse.go#L116

Added line #L116 was not covered by tests
}
79 changes: 79 additions & 0 deletions ai/client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package ai

import (
"context"
"errors"
"fmt"

livepeer "github.com/livepeer/go-api-client"
"github.com/livepeer/livepeer-data/pkg/data"
promClient "github.com/prometheus/client_golang/api"
)

var ErrAssetNotFound = errors.New("asset not found")

type StreamStatus struct {
StreamID string `json:"streamId"`
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we use snake_case like the other AI APIs?

AvgInputFPS data.Nullable[float64] `json:"avgInputFps"`
AvgOutputFPS data.Nullable[float64] `json:"avgOutputFps"`
ErrorCount uint64 `json:"errorCount"`
Errors []string `json:"errors"`
TotalRestarts uint64 `json:"totalRestarts"`
RestartLogs []string `json:"restartLogs"`
}

type ClientOptions struct {
Prometheus promClient.Config
Livepeer livepeer.ClientOptions
ClickhouseOptions
}

type Client struct {
opts ClientOptions
lp *livepeer.Client
clickhouse Clickhouse
}

func NewClient(opts ClientOptions) (*Client, error) {
lp := livepeer.NewAPIClient(opts.Livepeer)

clickhouse, err := NewClickhouseConn(opts.ClickhouseOptions)
if err != nil {
return nil, fmt.Errorf("error creating clickhouse client: %w", err)
}

Check warning on line 43 in ai/client.go

View check run for this annotation

Codecov / codecov/patch

ai/client.go#L37-L43

Added lines #L37 - L43 were not covered by tests

return &Client{opts, lp, clickhouse}, nil

Check warning on line 45 in ai/client.go

View check run for this annotation

Codecov / codecov/patch

ai/client.go#L45

Added line #L45 was not covered by tests
}

func (c *Client) QueryAIStreamStatusEvents(ctx context.Context, spec QuerySpec) ([]StreamStatus, error) {
rows, err := c.clickhouse.QueryAIStreamStatusEvents(ctx, spec)
if err != nil {
return nil, err
}

Check warning on line 52 in ai/client.go

View check run for this annotation

Codecov / codecov/patch

ai/client.go#L51-L52

Added lines #L51 - L52 were not covered by tests
metrics := aiStreamStatusEventsToStreamStatuses(rows, spec)
return metrics, nil
}

func aiStreamStatusEventsToStreamStatuses(rows []AIStreamStatusEventRow, spec QuerySpec) []StreamStatus {
streamStatuses := make([]StreamStatus, len(rows))
for i, row := range rows {
streamStatuses[i] = StreamStatus{
StreamID: row.StreamID,
AvgInputFPS: data.WrapNullable(row.AvgInputFPS),
AvgOutputFPS: data.WrapNullable(row.AvgOutputFPS),
ErrorCount: row.ErrorCount,
Errors: row.Errors,
TotalRestarts: row.TotalRestarts,
RestartLogs: row.RestartLogs,
}
}
return streamStatuses
}

func toFloat64Ptr(f float64, asked bool) data.Nullable[float64] {
return data.ToNullable(f, true, asked)

Check warning on line 74 in ai/client.go

View check run for this annotation

Codecov / codecov/patch

ai/client.go#L73-L74

Added lines #L73 - L74 were not covered by tests
}

func toStringPtr(s string, asked bool) data.Nullable[string] {
return data.ToNullable(s, true, asked)

Check warning on line 78 in ai/client.go

View check run for this annotation

Codecov / codecov/patch

ai/client.go#L77-L78

Added lines #L77 - L78 were not covered by tests
}
100 changes: 100 additions & 0 deletions ai/client_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package ai

import (
"context"
"encoding/json"
"testing"

"github.com/stretchr/testify/require"
)

type MockClickhouseClient struct {
rows []AIStreamStatusEventRow
}

func (m MockClickhouseClient) QueryAIStreamStatusEvents(ctx context.Context, spec QuerySpec) ([]AIStreamStatusEventRow, error) {
return m.rows, nil
}

func TestQueryAIStreamStatusEvents(t *testing.T) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice

require := require.New(t)

tests := []struct {
name string
spec QuerySpec
rows []AIStreamStatusEventRow
expJson string
}{
{
name: "basic query with no errors",
rows: []AIStreamStatusEventRow{
{
StreamID: "stream-1",
AvgInputFPS: 30.0,
AvgOutputFPS: 25.0,
ErrorCount: 0,
Errors: []string{},
TotalRestarts: 1,
RestartLogs: []string{"restart-log-1"},
},
},
expJson: `
[
{
"streamId": "stream-1",
"avgInputFps": 30.0,
"avgOutputFps": 25.0,
"errorCount": 0,
"errors": [],
"totalRestarts": 1,
"restartLogs": ["restart-log-1"]
}
]
`,
},
{
name: "query with errors",
rows: []AIStreamStatusEventRow{
{
StreamID: "stream-2",
AvgInputFPS: 20.0,
AvgOutputFPS: 15.0,
ErrorCount: 2,
Errors: []string{"error-1", "error-2"},
TotalRestarts: 3,
RestartLogs: []string{"restart-log-2", "restart-log-3"},
},
},
expJson: `
[
{
"streamId": "stream-2",
"avgInputFps": 20.0,
"avgOutputFps": 15.0,
"errorCount": 2,
"errors": ["error-1", "error-2"],
"totalRestarts": 3,
"restartLogs": ["restart-log-2", "restart-log-3"]
}
]
`,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// given
mockClickhouse := MockClickhouseClient{rows: tt.rows}
client := Client{clickhouse: &mockClickhouse}

// when
res, err := client.QueryAIStreamStatusEvents(context.Background(), tt.spec)

// then
require.NoError(err)
jsonRes, err := json.Marshal(res)
require.NoError(err)
require.JSONEq(tt.expJson, string(jsonRes))
})
}
}
24 changes: 24 additions & 0 deletions ai/query_spec.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package ai

import (
"time"
)

type QueryFilter struct {
StreamID string
}

type QuerySpec struct {
From, To *time.Time
Filter QueryFilter
}

func NewQuerySpec(streamID string, from, to *time.Time) QuerySpec {
return QuerySpec{
From: from,
To: to,
Filter: QueryFilter{
StreamID: streamID,
},
}

Check warning on line 23 in ai/query_spec.go

View check run for this annotation

Codecov / codecov/patch

ai/query_spec.go#L16-L23

Added lines #L16 - L23 were not covered by tests
}
Loading
Loading