Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Promtail: (and also fluent-bit) change the max batch size to 1MB #2710

Merged
merged 2 commits into from
Oct 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions cmd/docker-driver/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,14 @@ const (

var (
defaultClientConfig = client.Config{
BatchWait: 1 * time.Second,
BatchSize: 100 * 1024,
BatchWait: client.BatchWait,
BatchSize: client.BatchSize,
BackoffConfig: cortex_util.BackoffConfig{
MinBackoff: 100 * time.Millisecond,
MaxBackoff: 10 * time.Second,
MaxRetries: 10,
MinBackoff: client.MinBackoff,
MaxBackoff: client.MaxBackoff,
MaxRetries: client.MaxRetries,
},
Timeout: 10 * time.Second,
Timeout: client.Timeout,
}
)

Expand Down Expand Up @@ -242,8 +242,8 @@ func parseConfig(logCtx logger.Info) (*config, error) {

// other labels coming from docker labels or env selected by user labels, labels-regex, env, env-regex config.
attrs, err := logCtx.ExtraAttributes(func(label string) string {
return strings.ReplaceAll(strings.ReplaceAll(label, "-", "_"), ".", "_")
})
return strings.ReplaceAll(strings.ReplaceAll(label, "-", "_"), ".", "_")
})
if err != nil {
return nil, err
}
Expand Down
34 changes: 22 additions & 12 deletions pkg/promtail/client/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ import (
lokiflag "github.com/grafana/loki/pkg/util/flagext"
)

// NOTE the helm chart for promtail and fluent-bit also have defaults for these values, please update to match if you make changes here.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pretty sure we could find a way at some point to have global default.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

helm makes this difficult, we specify the values in the config so they can be overridden but i think leaving them empty would be an error, I don't have the time to dig deeper into this right now so I added this comment as "better than nothing"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep makes total sense.

const (
BatchWait = 1 * time.Second
BatchSize int = 1024 * 1024
MinBackoff = 500 * time.Millisecond
MaxBackoff = 5 * time.Minute
MaxRetries int = 10
Timeout = 10 * time.Second
)

// Config describes configuration for a HTTP pusher client.
type Config struct {
URL flagext.URLValue
Expand All @@ -33,13 +43,13 @@ type Config struct {
// prefix. If prefix is a non-empty string, prefix should end with a period.
func (c *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
f.Var(&c.URL, prefix+"client.url", "URL of log server")
f.DurationVar(&c.BatchWait, prefix+"client.batch-wait", 1*time.Second, "Maximum wait period before sending batch.")
f.IntVar(&c.BatchSize, prefix+"client.batch-size-bytes", 1024*1024, "Maximum batch size to accrue before sending. ")
f.DurationVar(&c.BatchWait, prefix+"client.batch-wait", BatchWait, "Maximum wait period before sending batch.")
f.IntVar(&c.BatchSize, prefix+"client.batch-size-bytes", BatchSize, "Maximum batch size to accrue before sending. ")
// Default backoff schedule: 0.5s, 1s, 2s, 4s, 8s, 16s, 32s, 64s, 128s, 256s(4.267m) For a total time of 511.5s(8.5m) before logs are lost
f.IntVar(&c.BackoffConfig.MaxRetries, prefix+"client.max-retries", 10, "Maximum number of retires when sending batches.")
f.DurationVar(&c.BackoffConfig.MinBackoff, prefix+"client.min-backoff", 500*time.Millisecond, "Initial backoff time between retries.")
f.DurationVar(&c.BackoffConfig.MaxBackoff, prefix+"client.max-backoff", 5*time.Minute, "Maximum backoff time between retries.")
f.DurationVar(&c.Timeout, prefix+"client.timeout", 10*time.Second, "Maximum time to wait for server to respond to a request")
f.IntVar(&c.BackoffConfig.MaxRetries, prefix+"client.max-retries", MaxRetries, "Maximum number of retires when sending batches.")
f.DurationVar(&c.BackoffConfig.MinBackoff, prefix+"client.min-backoff", MinBackoff, "Initial backoff time between retries.")
f.DurationVar(&c.BackoffConfig.MaxBackoff, prefix+"client.max-backoff", MaxBackoff, "Maximum backoff time between retries.")
f.DurationVar(&c.Timeout, prefix+"client.timeout", Timeout, "Maximum time to wait for server to respond to a request")
f.Var(&c.ExternalLabels, prefix+"client.external-labels", "list of external labels to add to each log (e.g: --client.external-labels=lb1=v1,lb2=v2)")

f.StringVar(&c.TenantID, prefix+"client.tenant-id", "", "Tenant ID to use when pushing logs to Loki.")
Expand All @@ -61,13 +71,13 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
// force sane defaults.
cfg = raw{
BackoffConfig: util.BackoffConfig{
MaxBackoff: 5 * time.Minute,
MaxRetries: 10,
MinBackoff: 500 * time.Millisecond,
MaxBackoff: MaxBackoff,
MaxRetries: MaxRetries,
MinBackoff: MinBackoff,
},
BatchSize: 100 * 1024,
BatchWait: 1 * time.Second,
Timeout: 10 * time.Second,
BatchSize: BatchSize,
BatchWait: BatchWait,
Timeout: Timeout,
}
}

Expand Down
12 changes: 6 additions & 6 deletions pkg/promtail/client/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ func Test_Config(t *testing.T) {
URL: u,
},
BackoffConfig: util.BackoffConfig{
MaxBackoff: 5 * time.Minute,
MaxRetries: 10,
MinBackoff: 500 * time.Millisecond,
MaxBackoff: MaxBackoff,
MaxRetries: MaxRetries,
MinBackoff: MinBackoff,
},
BatchSize: 100 * 1024,
BatchWait: 1 * time.Second,
Timeout: 10 * time.Second,
BatchSize: BatchSize,
BatchWait: BatchWait,
Timeout: Timeout,
},
},
{
Expand Down
2 changes: 1 addition & 1 deletion production/helm/fluent-bit/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
apiVersion: "v1"
name: fluent-bit
version: 0.3.0
version: 0.3.1
appVersion: v1.6.0
kubeVersion: "^1.10.0-0"
description: "Uses fluent-bit Loki go plugin for gathering logs and sending them to Loki"
Expand Down
2 changes: 1 addition & 1 deletion production/helm/fluent-bit/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ config:
port: 2020
tenantID: '""'
batchWait: 1
batchSize: 10240
batchSize: 1048576
loglevel: warn
lineFormat: json
k8sLoggingParser: "Off"
Expand Down
2 changes: 1 addition & 1 deletion production/helm/loki-stack/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
apiVersion: "v1"
name: loki-stack
version: 0.41.0
version: 0.41.1
appVersion: v1.6.0
kubeVersion: "^1.10.0-0"
description: "Loki: like Prometheus, but for logs."
Expand Down
2 changes: 1 addition & 1 deletion production/helm/promtail/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
apiVersion: "v1"
name: promtail
version: 0.25.0
version: 0.25.1
appVersion: v1.6.0
kubeVersion: "^1.10.0-0"
description: "Responsible for gathering logs and sending them to Loki"
Expand Down
8 changes: 4 additions & 4 deletions production/helm/promtail/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -159,18 +159,18 @@ config:
# Maximum wait period before sending batch
batchwait: 1s
# Maximum batch size to accrue before sending, unit is byte
batchsize: 102400
batchsize: 1048576

# Maximum time to wait for server to respond to a request
timeout: 10s

backoff_config:
# Initial backoff time between retries
min_period: 100ms
min_period: 500ms
# Maximum backoff time between retries
max_period: 5s
max_period: 5m
# Maximum number of retries when sending batches, 0 means infinite retries
max_retries: 20
max_retries: 10

# The labels to add to any time series or alerts when communicating with loki
external_labels: {}
Expand Down