From 2d434bbd92b2d6bbf83e457ee8c715c0be93211f Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Tue, 13 Feb 2024 14:36:10 +0100 Subject: [PATCH] feat: periodic gc --- docs/environment-variables.md | 17 ++++++++++++- gc.go | 1 + gc_run.go | 37 ++++++++++++++++++++++++++++ gc_windows.go | 12 +++++++++ main.go | 46 ++++++++++++++++++++++++++++++++++- setup.go | 12 +++++++++ 6 files changed, 123 insertions(+), 2 deletions(-) create mode 100644 gc_run.go create mode 100644 gc_windows.go diff --git a/docs/environment-variables.md b/docs/environment-variables.md index 6e2d9d7..dcbe284 100644 --- a/docs/environment-variables.md +++ b/docs/environment-variables.md @@ -23,13 +23,28 @@ Comma-separated list of path gateway hostnames. For example, passing `ipfs.io` w Default: `127.0.0.1` - ### `RAINBOW_SUBDOMAIN_GATEWAY_DOMAINS` Comma-separated list of [subdomain gateway](https://specs.ipfs.tech/http-gateways/subdomain-gateway/) domains. For example, passing `dweb.link` will enable handler for standard [subdomain gateway](https://specs.ipfs.tech/http-gateways/subdomain-gateway/) requests with the `Host` header set to `*.ipfs.dweb.link` and `*.ipns.dweb.link`. Default: `localhost` +## `RAINBOW_GC_INTERVAL` + +The interval at which the garbage collector will be called. This is given as a string that corresponds to the duration of the interval. Set 0 to disable. + +This functionality does not work on Windows. + +Default: `60m` + +## `RAINBOW_GC_THRESHOLD` + +The threshold of how much free space one wants to always have available on disk. This is used with the periodic garbage collector. + +When the periodic GC runs, it checks for the total and available space on disk. If the available space is larger than the threshold, the GC is not called. Otherwise, the GC is asked to remove how many bytes necessary such that the threshold of available space on disk is met. + +Default: `0.3` (always keep 30% of the disk available) + ### `KUBO_RPC_URL` Default: `127.0.0.1:5001` (see `DefaultKuboRPC`) diff --git a/gc.go b/gc.go index 8c5a21f..84661b6 100644 --- a/gc.go +++ b/gc.go @@ -2,6 +2,7 @@ package main import ( "context" + badger4 "github.com/ipfs/go-ds-badger4" ) diff --git a/gc_run.go b/gc_run.go new file mode 100644 index 0000000..f3153cc --- /dev/null +++ b/gc_run.go @@ -0,0 +1,37 @@ +//go:build !windows + +package main + +import ( + "context" + "syscall" +) + +func (nd *Node) periodicGC(ctx context.Context, threshold float64) error { + var stat syscall.Statfs_t + + err := syscall.Statfs(nd.dataDir, &stat) + if err != nil { + return err + } + + totalBytes := uint64(stat.Blocks) * uint64(stat.Bsize) + availableBytes := stat.Bfree * uint64(stat.Bsize) + + // Calculate % of the total space + minFreeBytes := uint64((float64(totalBytes) * threshold)) + + goLog.Infow("fileystem data collected", "total_bytes", totalBytes, "available_bytes", availableBytes, "min_free_bytes", minFreeBytes) + + // If there's enough free space, do nothing. + if minFreeBytes > availableBytes { + return nil + } + + bytesToFree := (minFreeBytes - availableBytes) + if bytesToFree <= 0 { + return nil + } + + return nd.GC(ctx, int64(bytesToFree)) +} diff --git a/gc_windows.go b/gc_windows.go new file mode 100644 index 0000000..c1edda6 --- /dev/null +++ b/gc_windows.go @@ -0,0 +1,12 @@ +//go:build windows + +package main + +import ( + "context" + "errors" +) + +func (nd *Node) periodicGC(ctx context.Context, threshold float64) error { + return errors.New("feature not implemented on windows") +} diff --git a/main.go b/main.go index e1fb2a4..63dbe7a 100644 --- a/main.go +++ b/main.go @@ -116,7 +116,18 @@ Generate an identity seed and launch a gateway: EnvVars: []string{"RAINBOW_CTL_LISTEN_ADDRESS"}, Usage: "Listen address for the management api and metrics", }, - + &cli.DurationFlag{ + Name: "gc-interval", + Value: time.Minute * 60, + EnvVars: []string{"RAINBOW_GC_INTERVAL"}, + Usage: "The interval between automatic GC runs. Set 0 to disable.", + }, + &cli.Float64Flag{ + Name: "gc-threshold", + Value: 0.3, + EnvVars: []string{"RAINBOW_GC_THRESHOLD"}, + Usage: "Percentage of how much of the disk free space must be available.", + }, &cli.IntFlag{ Name: "connmgr-low", Value: 100, @@ -281,6 +292,8 @@ share the same seed as long as the indexes are different. DHTSharedHost: cctx.Bool("dht-shared-host"), DenylistSubs: getCommaSeparatedList(cctx.String("denylists")), Peering: peeringAddrs, + GCInterval: cctx.Duration("gc-interval"), + GCThreshold: cctx.Float64("gc-threshold"), } goLog.Debugf("Rainbow config: %+v", cfg) @@ -359,6 +372,31 @@ share the same seed as long as the indexes are different. } }() + var gcTicker *time.Ticker + var gcTickerDone chan bool + + if cfg.GCInterval > 0 { + gcTicker = time.NewTicker(cfg.GCInterval) + gcTickerDone = make(chan bool) + wg.Add(1) + + go func() { + defer wg.Done() + + for { + select { + case <-gcTickerDone: + return + case <-gcTicker.C: + err = gnd.periodicGC(cctx.Context, cfg.GCThreshold) + if err != nil { + goLog.Errorf("error when running periodic gc: %w", err) + } + } + } + }() + } + sddaemon.SdNotify(false, sddaemon.SdNotifyReady) signal.Notify( quit, @@ -371,6 +409,12 @@ share the same seed as long as the indexes are different. goLog.Info("Closing servers...") go gatewaySrv.Close() go apiSrv.Close() + + if gcTicker != nil { + gcTicker.Stop() + gcTickerDone <- true + } + for _, sub := range gnd.denylistSubs { sub.Stop() } diff --git a/setup.go b/setup.go index c3ecb0f..294030d 100644 --- a/setup.go +++ b/setup.go @@ -65,6 +65,7 @@ type Node struct { vs routing.ValueStore host host.Host + dataDir string datastore datastore.Batching blockstore blockstore.Blockstore bsClient *bsclient.Client @@ -103,9 +104,19 @@ type Config struct { DenylistSubs []string Peering []peer.AddrInfo + + GCInterval time.Duration + GCThreshold float64 } func Setup(ctx context.Context, cfg Config, key crypto.PrivKey, dnsCache *cachedDNS) (*Node, error) { + var err error + + cfg.DataDir, err = filepath.Abs(cfg.DataDir) + if err != nil { + return nil, err + } + ds, err := setupDatastore(cfg) if err != nil { return nil, err @@ -350,6 +361,7 @@ func Setup(ctx context.Context, cfg Config, key crypto.PrivKey, dnsCache *cached return &Node{ host: h, blockstore: blkst, + dataDir: cfg.DataDir, datastore: ds, bsClient: bswap, ns: ns,