Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(observability): remove more deprecated internal metrics #17542

Merged
merged 3 commits into from
May 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 3 additions & 10 deletions src/internal_events/batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ pub struct LargeEventDroppedError {

impl InternalEvent for LargeEventDroppedError {
fn emit(self) {
let reason = "Event larger than batch max_bytes.";
error!(
message = "Event larger than batch max_bytes.",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not relevant to this PR, but it does strike me that maybe component_errors_total would also be a good candidate for shared internal event type that could assert all of the required fields are present.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, fair point.

There's still a lot of custom error metrics that are marked as deprecated in favor of component_errors_total... the work to clean those up would probably be a good spot to explore moving this stuff into a single, shared event type.

message = reason,
batch_max_bytes = %self.max_length,
length = %self.length,
error_type = error_type::CONDITION_FAILED,
Expand All @@ -28,14 +29,6 @@ impl InternalEvent for LargeEventDroppedError {
"error_type" => error_type::CONDITION_FAILED,
"stage" => error_stage::SENDING,
);
emit!(ComponentEventsDropped::<UNINTENTIONAL> {
count: 1,
reason: "Event larger than batch max_bytes."
});
// deprecated
counter!(
"events_discarded_total", 1,
"reason" => "oversized",
);
emit!(ComponentEventsDropped::<UNINTENTIONAL> { count: 1, reason });
}
}
2 changes: 0 additions & 2 deletions src/internal_events/dedupe.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use crate::emit;
use metrics::counter;
use vector_core::internal_event::{ComponentEventsDropped, InternalEvent, INTENTIONAL};

#[derive(Debug)]
Expand All @@ -13,6 +12,5 @@ impl InternalEvent for DedupeEventsDropped {
count: self.count,
reason: "Events have been found in cache for deduplication.",
});
counter!("events_discarded_total", self.count as u64); // Deprecated
}
}
3 changes: 0 additions & 3 deletions src/internal_events/filter.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use metrics::{register_counter, Counter};
use vector_common::internal_event::{ComponentEventsDropped, Count, Registered, INTENTIONAL};

use crate::register;
Expand All @@ -9,11 +8,9 @@ vector_common::registered_event! (
= register!(ComponentEventsDropped::<INTENTIONAL>::from(
"Events matched filter condition."
)),
events_discarded: Counter = register_counter!("events_discarded_total"),
}

fn emit(&self, data: Count) {
self.events_dropped.emit(data);
self.events_discarded.increment(data.0 as u64);
}
);
2 changes: 0 additions & 2 deletions src/internal_events/kafka.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,6 @@ impl InternalEvent for KafkaReadError {
"error_type" => error_type::READER_FAILED,
"stage" => error_stage::RECEIVING,
);
// deprecated
counter!("events_failed_total", 1);
}
}

Expand Down
53 changes: 36 additions & 17 deletions src/internal_events/loki.rs
Original file line number Diff line number Diff line change
@@ -1,35 +1,58 @@
use crate::emit;
use metrics::counter;
use vector_common::internal_event::{error_stage, error_type};
use vector_core::internal_event::{ComponentEventsDropped, InternalEvent, INTENTIONAL};

#[derive(Debug)]
pub struct LokiEventUnlabeled;
pub struct LokiEventUnlabeledError;

impl InternalEvent for LokiEventUnlabeled {
impl InternalEvent for LokiEventUnlabeledError {
fn emit(self) {
// Deprecated
counter!("processing_errors_total", 1,
"error_type" => "unlabeled_event");
error!(
message = "Event had no labels. Adding default `agent` label.",
error_code = "unlabeled_event",
error_type = error_type::CONDITION_FAILED,
stage = error_stage::PROCESSING,
internal_log_rate_limit = true,
);

counter!(
"component_errors_total", 1,
"error_code" => "unlabeled_event",
"error_type" => error_type::CONDITION_FAILED,
"stage" => error_stage::PROCESSING,
);
}
}

#[derive(Debug)]
pub struct LokiOutOfOrderEventDropped {
pub struct LokiOutOfOrderEventDroppedError {
pub count: usize,
}

impl InternalEvent for LokiOutOfOrderEventDropped {
impl InternalEvent for LokiOutOfOrderEventDroppedError {
fn emit(self) {
let reason = "Dropping out-of-order event(s).";

error!(
message = reason,
error_code = "out_of_order",
error_type = error_type::CONDITION_FAILED,
stage = error_stage::PROCESSING,
internal_log_rate_limit = true,
);

emit!(ComponentEventsDropped::<INTENTIONAL> {
count: self.count,
reason: "out_of_order",
reason,
});

// Deprecated
counter!("events_discarded_total", self.count as u64,
"reason" => "out_of_order");
counter!("processing_errors_total", 1,
"error_type" => "out_of_order");
counter!(
"component_errors_total", 1,
"error_code" => "out_of_order",
"error_type" => error_type::CONDITION_FAILED,
"stage" => error_stage::PROCESSING,
);
}
}

Expand All @@ -47,9 +70,5 @@ impl InternalEvent for LokiOutOfOrderEventRewritten {
internal_log_rate_limit = true,
);
counter!("rewritten_timestamp_events_total", self.count as u64);

// Deprecated
counter!("processing_errors_total", 1,
"error_type" => "out_of_order");
}
}
8 changes: 0 additions & 8 deletions src/internal_events/lua.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ impl InternalEvent for LuaScriptError {
count: 1,
reason: "Error in lua script.",
});
// deprecated
counter!("processing_errors_total", 1);
}
}

Expand All @@ -70,12 +68,6 @@ impl InternalEvent for LuaBuildError {
"error_type" => error_type::SCRIPT_FAILED,
"stage" => error_stage:: PROCESSING,
);
emit!(ComponentEventsDropped::<UNINTENTIONAL> {
count: 1,
reason: "Error in lua build.",
});
// deprecated
counter!("processing_errors_total", 1);

emit!(ComponentEventsDropped::<UNINTENTIONAL> { count: 1, reason })
}
Expand Down
2 changes: 0 additions & 2 deletions src/internal_events/metric_to_log.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ impl InternalEvent for MetricToLogSerializeError {
"error_type" => error_type::ENCODER_FAILED,
"stage" => error_stage::PROCESSING,
);
// deprecated
counter!("processing_errors_total", 1, "error_type" => "failed_serialize");

emit!(ComponentEventsDropped::<UNINTENTIONAL> { count: 1, reason })
}
Expand Down
6 changes: 0 additions & 6 deletions src/internal_events/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ impl InternalEvent for ParserMatchError<'_> {
"error_type" => error_type::CONDITION_FAILED,
"stage" => error_stage::PROCESSING,
);
// deprecated
counter!("processing_errors_total", 1, "error_type" => "failed_match");
}
}

Expand Down Expand Up @@ -75,8 +73,6 @@ impl<const DROP_EVENT: bool> InternalEvent for ParserMissingFieldError<'_, DROP_
"stage" => error_stage::PROCESSING,
"field" => self.field.to_string(),
);
// deprecated
counter!("processing_errors_total", 1, "error_type" => "missing_field");

if DROP_EVENT {
emit!(ComponentEventsDropped::<UNINTENTIONAL> { count: 1, reason });
Expand Down Expand Up @@ -108,8 +104,6 @@ impl<'a> InternalEvent for ParserConversionError<'a> {
"stage" => error_stage::PROCESSING,
"name" => self.name.to_string(),
);
// deprecated
counter!("processing_errors_total", 1, "error_type" => "type_conversion_failed");
}
}

Expand Down
2 changes: 0 additions & 2 deletions src/internal_events/remap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ impl InternalEvent for RemapMappingError {
reason: "Mapping failed with event.",
});
}
// deprecated
counter!("processing_errors_total", 1);
}
}

Expand Down
2 changes: 0 additions & 2 deletions src/internal_events/sample.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
use crate::emit;
use metrics::counter;
use vector_core::internal_event::{ComponentEventsDropped, InternalEvent, INTENTIONAL};

#[derive(Debug)]
pub struct SampleEventDiscarded;

impl InternalEvent for SampleEventDiscarded {
fn emit(self) {
counter!("events_discarded_total", 1); // Deprecated.
emit!(ComponentEventsDropped::<INTENTIONAL> {
count: 1,
reason: "Sample discarded."
Expand Down
2 changes: 0 additions & 2 deletions src/internal_events/sematext_metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ impl<'a> InternalEvent for SematextMetricsInvalidMetricError<'a> {
"error_type" => error_type::ENCODER_FAILED,
"stage" => error_stage::PROCESSING,
);
// deprecated
counter!("processing_errors_total", 1);

emit!(ComponentEventsDropped::<UNINTENTIONAL> { count: 1, reason });
}
Expand Down
2 changes: 0 additions & 2 deletions src/internal_events/statsd_sink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ impl<'a> InternalEvent for StatsdInvalidMetricError<'a> {
"error_type" => error_type::ENCODER_FAILED,
"stage" => error_stage::PROCESSING,
);
// deprecated
counter!("processing_errors_total", 1);

emit!(ComponentEventsDropped::<UNINTENTIONAL> { reason, count: 1 });
}
Expand Down
7 changes: 0 additions & 7 deletions src/internal_events/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,11 @@ impl<'a> InternalEvent for TemplateRenderingError<'a> {
"stage" => error_stage::PROCESSING,
);

// deprecated
counter!("processing_errors_total", 1,
"error_type" => "render_error");

if self.drop_event {
emit!(ComponentEventsDropped::<UNINTENTIONAL> {
count: 1,
reason: "Failed to render template.",
});

// deprecated
counter!("events_discarded_total", 1);
}
}
}
10 changes: 9 additions & 1 deletion src/internal_events/throttle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,15 @@ pub(crate) struct ThrottleEventDiscarded {

impl InternalEvent for ThrottleEventDiscarded {
fn emit(self) {
debug!(message = "Rate limit exceeded.", key = ?self.key); // Deprecated.
// TODO: Technically, the Component Specification states that the discarded events metric
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, yeah, this is a good question. I wonder how much harm there is in allowing additional tags as long as the required tags are there.

// must _only_ have the `intentional` tag, in addition to the core tags like
// `component_kind`, etc, and nothing else.
//
// That doesn't give us the leeway to specify which throttle bucket the events are being
// discarded for... but including the key/bucket as a tag does seem useful and so I wonder
// if we should change the specification wording? Sort of a similar situation to the
// `error_code` tag for the component errors metric, where it's meant to be optional and
// only specified when relevant.
Comment on lines +12 to +20
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, the spec should be loosened there.

counter!(
"events_discarded_total", 1,
"key" => self.key,
Expand Down
6 changes: 3 additions & 3 deletions src/sinks/loki/sink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use crate::{
codecs::{Encoder, Transformer},
http::{get_http_scheme_from_uri, HttpClient},
internal_events::{
LokiEventUnlabeled, LokiOutOfOrderEventDropped, LokiOutOfOrderEventRewritten,
LokiEventUnlabeledError, LokiOutOfOrderEventDroppedError, LokiOutOfOrderEventRewritten,
SinkRequestBuildError, TemplateRenderingError,
},
sinks::util::{
Expand Down Expand Up @@ -288,7 +288,7 @@ impl EventEncoder {
// `{agent="vector"}` label. This can happen if the only
// label is a templatable one but the event doesn't match.
if labels.is_empty() {
emit!(LokiEventUnlabeled);
emit!(LokiEventUnlabeledError);
labels = vec![("agent".to_string(), "vector".to_string())]
}

Expand Down Expand Up @@ -486,7 +486,7 @@ impl LokiSink {
}
Some((partition, result))
} else {
emit!(LokiOutOfOrderEventDropped { count: batch.len() });
emit!(LokiOutOfOrderEventDroppedError { count: batch.len() });
None
}
})
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
---
date: "2023-07-05"
title: "0.31 Upgrade Guide"
description: "An upgrade guide that addresses breaking changes in 0.31.0"
authors: ["tobz"]
release: "0.31.0"
hide_on_release_notes: false
badges:
type: breaking change
---

Vector's 0.31.0 release includes **breaking changes**:

1. [Removal of various deprecated internal metrics](#deprecated-internal-metrics)

We cover them below to help you upgrade quickly:

## Upgrade guide

### Breaking changes

#### Removal of various deprecated internal metrics {#deprecated-internal-metrics}

Over the course of many of the previous releases, we've been working to deprecate the usage of older
internal metrics as we worked towards implementing full support for the [Component
Specification][component_spec], which dictates the basic metrics that all components, or the basic
metrics all components of a specific type, are expected to emit.

We've made enough progress on this work that we've gone ahead and removed many of the deprecated
metrics from this release. First, below is a list of all metrics we've removed:

- `events_in_total` (superceded by `component_received_events_total`)
- `events_out_total` (superceded by `component_sent_events_total`)
- `processed_bytes_total` (superceded by either `component_received_bytes_total` or
`component_sent_bytes_total`, more below)
- `processed_events_total` (superceded by either `component_received_events_total` or
`component_sent_events_total`, more below)
- `processing_errors_total` (superceded by `component_errors_total`)
- `events_failed_total` (superceded by `component_errors_total`)

Most of the removals have straightforward replacements, but the `processed_`-prefixed metrics
involve a small amount of logic. For **sources**, `processed_bytes_total` is superceded by
`component_received_bytes_total`, and `processed_events_total` is superceded by
`component_received_events_total`. For **sinks**, `processed_bytes_total` is superceded by
`component_sent_bytes_total`, and `processed_events_total` is superceded by
`component_sent_events_total`.

A small note is that a small number of components still emit some of these metrics, as they provided
additional tags and information that is disallowed by the Component Specification. They will be
removed in a future version once we can rectify those discrepancies, but they are effectively
removed as of this release: you cannot depend on them still existing.
Comment on lines +48 to +51
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note this may change if we elect to update the spec.

15 changes: 10 additions & 5 deletions website/cue/reference/components/sinks.cue
Original file line number Diff line number Diff line change
Expand Up @@ -652,16 +652,21 @@ components: sinks: [Name=string]: {
}

telemetry: metrics: {
component_received_events_count: components.sources.internal_metrics.output.metrics.component_received_events_count
component_received_events_total: components.sources.internal_metrics.output.metrics.component_received_events_total
component_received_event_bytes_total: components.sources.internal_metrics.output.metrics.component_received_event_bytes_total
utilization: components.sources.internal_metrics.output.metrics.utilization
buffer_byte_size: components.sources.internal_metrics.output.metrics.buffer_byte_size
buffer_discarded_events_total: components.sources.internal_metrics.output.metrics.buffer_discarded_events_total
buffer_events: components.sources.internal_metrics.output.metrics.buffer_events
buffer_received_events_total: components.sources.internal_metrics.output.metrics.buffer_received_events_total
buffer_received_event_bytes_total: components.sources.internal_metrics.output.metrics.buffer_received_event_bytes_total
buffer_sent_events_total: components.sources.internal_metrics.output.metrics.buffer_sent_events_total
buffer_sent_event_bytes_total: components.sources.internal_metrics.output.metrics.buffer_sent_event_bytes_total
buffer_discarded_events_total: components.sources.internal_metrics.output.metrics.buffer_discarded_events_total
component_discarded_events_total: components.sources.internal_metrics.output.metrics.component_discarded_events_total
component_errors_total: components.sources.internal_metrics.output.metrics.component_errors_total
component_received_events_count: components.sources.internal_metrics.output.metrics.component_received_events_count
component_received_events_total: components.sources.internal_metrics.output.metrics.component_received_events_total
component_received_event_bytes_total: components.sources.internal_metrics.output.metrics.component_received_event_bytes_total
component_sent_bytes_total: components.sources.internal_metrics.output.metrics.component_sent_bytes_total
component_sent_events_total: components.sources.internal_metrics.output.metrics.component_sent_events_total
component_sent_event_bytes_total: components.sources.internal_metrics.output.metrics.component_sent_event_bytes_total
utilization: components.sources.internal_metrics.output.metrics.utilization
}
}
5 changes: 0 additions & 5 deletions website/cue/reference/components/sinks/amqp.cue
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,4 @@ components: sinks: amqp: {
}

how_it_works: components._amqp.how_it_works

telemetry: metrics: {
events_discarded_total: components.sources.internal_metrics.output.metrics.events_discarded_total
processing_errors_total: components.sources.internal_metrics.output.metrics.processing_errors_total
}
}
Loading