diff --git a/config/config.go b/config/config.go index 19c17178fae..1a176601520 100644 --- a/config/config.go +++ b/config/config.go @@ -83,6 +83,14 @@ type Configuration struct { //When true, new bid id will be generated in seatbid[].bid[].ext.prebid.bidid and used in event urls instead GenerateBidID bool `mapstructure:"generate_bid_id"` TrackerURL string `mapstructure:"tracker_url"` + + VendorListScheduler VendorListScheduler `mapstructure:"vendor_list_scheduler"` +} + +type VendorListScheduler struct { + Enabled bool `mapstructure:"enabled"` + Interval string `mapstructure:"interval"` + Timeout string `mapstructure:"timeout"` } const MIN_COOKIE_SIZE_BYTES = 500 diff --git a/gdpr/vendorlist-fetching.go b/gdpr/vendorlist-fetching.go index 24489e73265..04fbaa659bf 100644 --- a/gdpr/vendorlist-fetching.go +++ b/gdpr/vendorlist-fetching.go @@ -20,6 +20,9 @@ import ( type saveVendors func(uint16, api.VendorList) +var cacheSave func(vendorListVersion uint16, list api.VendorList) +var cacheLoad func(vendorListVersion uint16) api.VendorList + // This file provides the vendorlist-fetching function for Prebid Server. // // For more info, see https://github.com/prebid/prebid-server/issues/504 @@ -27,7 +30,7 @@ type saveVendors func(uint16, api.VendorList) // Nothing in this file is exported. Public APIs can be found in gdpr.go func newVendorListFetcher(initCtx context.Context, cfg config.GDPR, client *http.Client, urlMaker func(uint16) string) func(ctx context.Context, id uint16) (vendorlist.VendorList, error) { - cacheSave, cacheLoad := newVendorListCache() + cacheSave, cacheLoad = newVendorListCache() preloadContext, cancel := context.WithTimeout(initCtx, cfg.Timeouts.InitTimeout()) defer cancel() diff --git a/gdpr/vendorlist-scheduler.go b/gdpr/vendorlist-scheduler.go new file mode 100644 index 00000000000..234fa943540 --- /dev/null +++ b/gdpr/vendorlist-scheduler.go @@ -0,0 +1,119 @@ +package gdpr + +import ( + "context" + "errors" + "github.com/golang/glog" + "net/http" + "sync" + "time" +) + +type vendorListScheduler struct { + ticker *time.Ticker + interval time.Duration + done chan bool + isRunning bool + isStarted bool + lastRun time.Time + + httpClient *http.Client + timeout time.Duration +} + +//Only single instance must be created +var _instance *vendorListScheduler +var once sync.Once + +func GetVendorListScheduler(interval, timeout string, httpClient *http.Client) (*vendorListScheduler, error) { + if _instance != nil { + return _instance, nil + } + + intervalDuration, err := time.ParseDuration(interval) + if err != nil { + return nil, errors.New("error parsing vendor list scheduler interval: " + err.Error()) + } + + timeoutDuration, err := time.ParseDuration(timeout) + if err != nil { + return nil, errors.New("error parsing vendor list scheduler timeout: " + err.Error()) + } + + if httpClient == nil { + return nil, errors.New("http-client can not be nil") + } + + once.Do(func() { + _instance = &vendorListScheduler{ + ticker: nil, + interval: intervalDuration, + done: make(chan bool), + httpClient: httpClient, + timeout: timeoutDuration, + } + }) + + return _instance, nil +} + +func (scheduler *vendorListScheduler) Start() { + if scheduler == nil || scheduler.isStarted { + return + } + + scheduler.ticker = time.NewTicker(scheduler.interval) + scheduler.isStarted = true + go func() { + for { + select { + case <-scheduler.done: + scheduler.isRunning = false + scheduler.isStarted = false + scheduler.ticker = nil + return + case t := <-scheduler.ticker.C: + if !scheduler.isRunning { + scheduler.isRunning = true + + glog.Info("Running vendor list scheduler at ", t) + scheduler.runLoadCache() + + scheduler.lastRun = t + scheduler.isRunning = false + } + } + } + }() +} + +func (scheduler *vendorListScheduler) Stop() { + if scheduler == nil || !scheduler.isStarted { + return + } + scheduler.ticker.Stop() + scheduler.done <- true +} + +func (scheduler *vendorListScheduler) runLoadCache() { + if scheduler == nil { + return + } + + preloadContext, cancel := context.WithTimeout(context.Background(), scheduler.timeout) + defer cancel() + + latestVersion := saveOne(preloadContext, scheduler.httpClient, vendorListURLMaker(0), cacheSave) + + // The GVL for TCF2 has no vendors defined in its first version. It's very unlikely to be used, so don't preload it. + firstVersionToLoad := uint16(2) + + for i := latestVersion; i >= firstVersionToLoad; i-- { + // Check if version is present in the cache + if list := cacheLoad(i); list != nil { + continue + } + glog.Infof("Downloading: " + vendorListURLMaker(i)) + saveOne(preloadContext, scheduler.httpClient, vendorListURLMaker(i), cacheSave) + } +} diff --git a/gdpr/vendorlist-scheduler_test.go b/gdpr/vendorlist-scheduler_test.go new file mode 100644 index 00000000000..eb06bf88bef --- /dev/null +++ b/gdpr/vendorlist-scheduler_test.go @@ -0,0 +1,175 @@ +package gdpr + +import ( + "context" + "github.com/prebid/go-gdpr/api" + "github.com/stretchr/testify/assert" + "net/http" + "testing" + "time" +) + +func TestGetVendorListScheduler(t *testing.T) { + type args struct { + interval string + timeout string + httpClient *http.Client + } + tests := []struct { + name string + args args + want *vendorListScheduler + wantErr bool + }{ + { + name: "Test singleton", + args: args{ + interval: "1m", + timeout: "1s", + httpClient: http.DefaultClient, + }, + want: GetExpectedVendorListScheduler("1m", "1s", http.DefaultClient), + wantErr: false, + }, + { + name: "Test singleton again", + args: args{ + interval: "2m", + timeout: "2s", + httpClient: http.DefaultClient, + }, + want: GetExpectedVendorListScheduler("2m", "2s", http.DefaultClient), + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + //Mark instance as nil for recreating new instance + if tt.want == nil { + //_instance = nil + } + + got, err := GetVendorListScheduler(tt.args.interval, tt.args.timeout, tt.args.httpClient) + if got != tt.want { + t.Errorf("GetVendorListScheduler() got = %v, want %v", got, tt.want) + } + if (err != nil) != tt.wantErr { + t.Errorf("GetVendorListScheduler() error = %v, wantErr %v", err, tt.wantErr) + return + } + }) + } +} + +func GetExpectedVendorListScheduler(interval string, timeout string, httpClient *http.Client) *vendorListScheduler { + s, _ := GetVendorListScheduler(interval, timeout, httpClient) + return s +} + +func Test_vendorListScheduler_Start(t *testing.T) { + type fields struct { + scheduler *vendorListScheduler + } + tests := []struct { + name string + fields fields + }{ + { + name: "Start test", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + scheduler, err := GetVendorListScheduler("1m", "30s", http.DefaultClient) + assert.Nil(t, err, "error should be nil") + assert.NotNil(t, scheduler, "scheduler instance should not be nil") + + scheduler.Start() + + assert.NotNil(t, scheduler.ticker, "ticker should not be nil") + assert.True(t, scheduler.isStarted, "isStarted should be true") + + scheduler.Stop() + }) + } +} + +func Test_vendorListScheduler_Stop(t *testing.T) { + type fields struct { + scheduler *vendorListScheduler + } + tests := []struct { + name string + fields fields + }{ + { + name: "Stop test", + }, + { + name: "Calling stop again", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + scheduler, err := GetVendorListScheduler("1m", "30s", http.DefaultClient) + assert.Nil(t, err, "error should be nil") + assert.NotNil(t, scheduler, "scheduler instance should not be nil") + + scheduler.Start() + scheduler.Stop() + + assert.Nil(t, scheduler.ticker, "ticker should not be nil") + assert.False(t, scheduler.isStarted, "isStarted should be true") + }) + } +} + +func Test_vendorListScheduler_runLoadCache(t *testing.T) { + type fields struct { + scheduler *vendorListScheduler + } + tests := []struct { + name string + fields fields + }{ + { + name: "runLoadCache caches all files", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var err error + tt.fields.scheduler, err = GetVendorListScheduler("5m", "5m", http.DefaultClient) + assert.Nil(t, err, "error should be nil") + assert.False(t, tt.fields.scheduler.isStarted, "VendorListScheduler should not be already running") + + tt.fields.scheduler.timeout = 2 * time.Minute + + mockCacheSave := func(uint16, api.VendorList) {} + latestVersion := saveOne(context.Background(), http.DefaultClient, vendorListURLMaker(0), mockCacheSave) + + cacheSave, cacheLoad = newVendorListCache() + tt.fields.scheduler.runLoadCache() + + firstVersionToLoad := uint16(2) + for i := latestVersion; i >= firstVersionToLoad; i-- { + list := cacheLoad(i) + assert.NotNil(t, list, "vendor-list file should be present in cache") + } + }) + } +} + +func Benchmark_vendorListScheduler_runLoadCache(b *testing.B) { + scheduler, err := GetVendorListScheduler("1m", "30m", http.DefaultClient) + assert.Nil(b, err, "") + assert.NotNil(b, scheduler, "") + + scheduler.timeout = 2 * time.Minute + + for n := 0; n < b.N; n++ { + cacheSave, cacheLoad = newVendorListCache() + scheduler.runLoadCache() + } + +} diff --git a/router/router.go b/router/router.go index 58bdee057f6..441dace8e78 100644 --- a/router/router.go +++ b/router/router.go @@ -320,6 +320,14 @@ func New(cfg *config.Configuration, rateConvertor *currency.RateConverter) (r *R gvlVendorIDs := bidderInfos.ToGVLVendorIDMap() g_gdprPerms = gdpr.NewPermissions(context.Background(), cfg.GDPR, gvlVendorIDs, generalHttpClient) + if cfg.VendorListScheduler.Enabled { + vendorListScheduler, err := gdpr.GetVendorListScheduler(cfg.VendorListScheduler.Interval, cfg.VendorListScheduler.Timeout, generalHttpClient) + if err != nil { + glog.Fatal(err) + } + vendorListScheduler.Start() + } + exchanges = newExchangeMap(cfg) g_cacheClient = pbc.NewClient(cacheHttpClient, &cfg.CacheURL, &cfg.ExtCacheURL, g_metrics)