diff --git a/CHANGELOG.md b/CHANGELOG.md index 1177b6f866ac7..61afe0f8c9258 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ * [7880](https://github.com/grafana/loki/pull/7880) **sandeepsukhani**: consider range and offset in queries while looking for schema config for query sharding. * [7937](https://github.com/grafana/loki/pull/7937) **ssncferreira**: Deprecate CLI flag `-ruler.wal-cleaer.period` and replace it with `-ruler.wal-cleaner.period`. * [7906](https://github.com/grafana/loki/pull/7906) **kavirajk**: Add API endpoint that formats LogQL expressions and support new `fmt` subcommand in `logcli` to format LogQL query. +* [7966](https://github.com/grafana/loki/pull/7966) **sandeepsukhani**: Fix query-frontend request load balancing when using k8s service. ##### Changes diff --git a/docs/sources/upgrading/_index.md b/docs/sources/upgrading/_index.md index 31f9c459326ad..15e5769df43be 100644 --- a/docs/sources/upgrading/_index.md +++ b/docs/sources/upgrading/_index.md @@ -57,6 +57,29 @@ ruler: period: 5s ``` +### Querier + +#### query-frontend k8s headless service changed to load balanced service + +*Note:* This is relevant only if you are using [jsonnet for deploying Loki in Kubernetes](https://grafana.com/docs/loki/latest/installation/tanka/) + +The `query-frontend` k8s service was previously headless and was used for two purposes: +* Distributing the Loki query requests amongst all the available Query Frontend pods. +* Discover IPs of Query Frontend pods from Queriers to connect as workers. + +The problem here is that a headless service does not support load balancing and leaves it up to the client to balance the load. +Additionally, a load-balanced service does not let us discover the IPs of the underlying pods. + +To meet both these requirements, we have made the following changes: +* Changed the existing `query-frontend` k8s service from headless to load-balanced to have a fair load distribution on all the Query Frontend instances. +* Added `query-frontend-headless` to discover QF pod IPs from queriers to connect as workers. + +If you are deploying Loki with Query Scheduler by setting [query_scheduler_enabled](https://github.com/grafana/loki/blob/cc4ab7487ab3cd3b07c63601b074101b0324083b/production/ksonnet/loki/config.libsonnet#L18) config to `true`, then there is nothing to do here for this change. +If you are not using Query Scheduler, then to avoid any issues on the Read path until the rollout finishes, it would be good to follow below steps: +* Create just the `query-frontend-headless` service without applying any changes to the `query-frontend` service. +* Rollout changes to `queriers`. +* Roll out the rest of the changes. + ## 2.7.0 ### Loki diff --git a/production/ksonnet/loki/query-frontend.libsonnet b/production/ksonnet/loki/query-frontend.libsonnet index 081c5acb89250..2694715880d2f 100644 --- a/production/ksonnet/loki/query-frontend.libsonnet +++ b/production/ksonnet/loki/query-frontend.libsonnet @@ -42,7 +42,11 @@ local k = import 'ksonnet-util/kausal.libsonnet'; local service = k.core.v1.service, - query_frontend_service: + // A headless service for discovering IPs of each query-frontend pod. + // It leaves it up to the client to do any load-balancing of requests, + // so if the intention is to use the k8s service for load balancing, + // it is advised to use the below `query-frontend` service instead. + query_frontend_headless_service: $.util.grpclbServiceFor($.query_frontend_deployment) + // Make sure that query frontend worker, running in the querier, do resolve // each query-frontend pod IP and NOT the service IP. To make it, we do NOT @@ -52,6 +56,9 @@ local k = import 'ksonnet-util/kausal.libsonnet'; // Query frontend will not become ready until at least one querier connects // which creates a chicken and egg scenario if we don't publish the // query-frontend address before it's ready. - service.mixin.spec.withPublishNotReadyAddresses(true), + service.mixin.spec.withPublishNotReadyAddresses(true) + + service.mixin.metadata.withName('query-frontend-headless'), + query_frontend_service: + k.util.serviceFor($.query_frontend_deployment, $._config.service_ignored_labels), } diff --git a/production/ksonnet/loki/query-scheduler.libsonnet b/production/ksonnet/loki/query-scheduler.libsonnet index bb63b08c12172..1c221940d6281 100644 --- a/production/ksonnet/loki/query-scheduler.libsonnet +++ b/production/ksonnet/loki/query-scheduler.libsonnet @@ -20,7 +20,7 @@ local k = import 'ksonnet-util/kausal.libsonnet'; max_outstanding_per_tenant: max_outstanding, }, frontend_worker+: { - frontend_address: 'query-frontend.%s.svc.cluster.local.:9095' % $._config.namespace, + frontend_address: 'query-frontend-headless.%s.svc.cluster.local.:9095' % $._config.namespace, }, }, },