-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[azure] [app_insights] Group metrics by dimensions (segments) and timestamp #36634
Conversation
This pull request doesn't have a |
This pull request does not have a backport label.
To fixup this pull request, you need to add the backport labels for the needed
|
Here's a diff patch as well which I'd like to suggest. Please ignore the unrelated ones if you want to. As I was reviewing the whole data{,_test}.go files to understand the logic properly I started improving the other parts as well. diff --git a/x-pack/metricbeat/module/azure/app_insights/client_test.go b/x-pack/metricbeat/module/azure/app_insights/client_test.go
index 4763c28da2..b003263969 100644
--- a/x-pack/metricbeat/module/azure/app_insights/client_test.go
+++ b/x-pack/metricbeat/module/azure/app_insights/client_test.go
@@ -13,17 +13,15 @@ import (
"github.com/stretchr/testify/mock"
)
-var (
- config = Config{
- ApplicationId: "",
- ApiKey: "",
- Metrics: []Metric{
- {
- ID: []string{"requests/count"},
- },
+var config = Config{
+ ApplicationId: "",
+ ApiKey: "",
+ Metrics: []Metric{
+ {
+ ID: []string{"requests/count"},
},
- }
-)
+ },
+}
func TestClient(t *testing.T) {
t.Run("return error not valid query", func(t *testing.T) {
diff --git a/x-pack/metricbeat/module/azure/app_insights/data.go b/x-pack/metricbeat/module/azure/app_insights/data.go
index 49a94ec5cd..9d95c7fa1b 100644
--- a/x-pack/metricbeat/module/azure/app_insights/data.go
+++ b/x-pack/metricbeat/module/azure/app_insights/data.go
@@ -123,21 +123,16 @@ type metricTimeKey struct {
End time.Time
}
-func newMetricTimeKey(
- start time.Time,
- end time.Time,
-) metricTimeKey {
- return metricTimeKey{
- Start: start,
- End: end,
- }
+func newMetricTimeKey(start, end time.Time) metricTimeKey {
+ return metricTimeKey{Start: start, End: end}
}
-func EventsMapping(metricValues insights.ListMetricsResultsItem, applicationId string, namespace string) []mb.Event {
+func EventsMapping(metricValues insights.ListMetricsResultsItem, applicationId, namespace string) []mb.Event {
var events []mb.Event
if metricValues.Value == nil {
return events
}
+
groupedAddProp := make(map[string][]MetricValue)
mValues := mapMetricValues(metricValues)
@@ -160,7 +155,15 @@ func EventsMapping(metricValues insights.ListMetricsResultsItem, applicationId s
groupedByTime := groupMetricsByTime(group)
for ts, group := range groupedByTime {
- events = append(events, createGroupEvent(group, ts, applicationId, namespace))
+ // NOTE: Wrapping if the line exceeds 80 chars
+ events = append(events,
+ createGroupEvent(
+ group,
+ ts,
+ applicationId,
+ namespace,
+ ),
+ )
}
}
return events
@@ -168,12 +171,17 @@ func EventsMapping(metricValues insights.ListMetricsResultsItem, applicationId s
// groupMetricsByTime groups metrics by their start and end times truncated to the second.
func groupMetricsByTime(metrics []MetricValue) map[metricTimeKey][]MetricValue {
- result := make(map[metricTimeKey][]MetricValue)
+ // Preallocating with a safe number. Used len(metricsetName)/2 instead of
+ // len(metricsetName) because there could be duplicates.
+ result := make(map[metricTimeKey][]MetricValue, len(metricsetName)/2)
for _, metric := range metrics {
// The start and end times are truncated to the nearest second.
// This is done to ensure that metrics that fall within the same second are grouped together, even if their actual times are slightly different.
- timeKey := newMetricTimeKey(metric.Start.Time.Truncate(time.Second), metric.End.Time.Truncate(time.Second))
+ timeKey := newMetricTimeKey(
+ metric.Start.Time.Truncate(time.Second),
+ metric.End.Time.Truncate(time.Second),
+ )
result[timeKey] = append(result[timeKey], metric)
}
@@ -235,18 +243,18 @@ func groupMetricsByDimension(metrics []MetricValue) map[string][]MetricValue {
return keys
}
-// getSortedKeys returns a string of sorted keys.
-// The keys are sorted in alphabetical order.
+// getSortedKeys returns a string where keys are sorted in lexicographical order
+// and concatenated.
func getSortedKeys(m map[string]string) string {
keys := make([]string, 0, len(m))
for k, v := range m {
- keys = append(keys, fmt.Sprintf("%s%s", k, v))
+ keys = append(keys, k+v)
}
sort.Strings(keys)
return strings.Join(keys, "")
}
-func createGroupEvent(metricValue []MetricValue, metricTime metricTimeKey, applicationId string, namespace string) mb.Event {
+func createGroupEvent(metricValue []MetricValue, metricTime metricTimeKey, applicationId, namespace string) mb.Event {
metricList := mapstr.M{}
if metricTime.Start.IsZero() || metricTime.End.IsZero() {
@@ -264,9 +272,7 @@ func createGroupEvent(metricValue []MetricValue, metricTime metricTimeKey, appli
}
event := mb.Event{
- ModuleFields: mapstr.M{
- "application_id": applicationId,
- },
+ ModuleFields: mapstr.M{"application_id": applicationId},
MetricSetFields: mapstr.M{
"start_date": metricTime.Start,
"end_date": metricTime.End,
@@ -300,16 +306,11 @@ func createGroupEvent(metricValue []MetricValue, metricTime metricTimeKey, appli
return event
}
-func createEvent(start *date.Time, end *date.Time, applicationId string, namespace string, metricList mapstr.M) mb.Event {
+func createEvent(start, end *date.Time, applicationId, namespace string, metricList mapstr.M) mb.Event {
event := mb.Event{
- ModuleFields: mapstr.M{
- "application_id": applicationId,
- },
- MetricSetFields: mapstr.M{
- "start_date": start,
- "end_date": end,
- },
- Timestamp: end.Time,
+ ModuleFields: mapstr.M{"application_id": applicationId},
+ MetricSetFields: mapstr.M{"start_date": start, "end_date": end},
+ Timestamp: end.Time,
}
event.RootFields = mapstr.M{}
event.RootFields.Put("cloud.provider", "azure")
@@ -323,7 +324,7 @@ func createEvent(start *date.Time, end *date.Time, applicationId string, namespa
return event
}
-func createNoSegEvent(values []MetricValue, applicationId string, namespace string) mb.Event {
+func createNoSegEvent(values []MetricValue, applicationId, namespace string) mb.Event {
metricList := mapstr.M{}
for _, value := range values {
for key, metric := range value.Value {
@@ -334,7 +335,6 @@ func createNoSegEvent(values []MetricValue, applicationId string, namespace stri
return mb.Event{}
}
return createEvent(values[0].Start, values[0].End, applicationId, namespace, metricList)
-
}
func getAdditionalPropMetric(addProp map[string]interface{}) map[string]interface{} {
@@ -344,7 +344,7 @@ func getAdditionalPropMetric(addProp map[string]interface{}) map[string]interfac
case map[string]interface{}:
for subKey, subVal := range v {
if subVal != nil {
- metricNames[cleanMetricNames(fmt.Sprintf("%s.%s", key, subKey))] = subVal
+ metricNames[cleanMetricNames(key+"."+subKey)] = subVal
}
}
default:
@@ -355,14 +355,15 @@ func getAdditionalPropMetric(addProp map[string]interface{}) map[string]interfac
}
func cleanMetricNames(metric string) string {
+ // Tip: You can also do this.
+ // r := strings.NewReplacer("/", "_", " ", "_")
+ // metric = r.Replace(metric)
metric = strings.Replace(metric, "/", "_", -1)
metric = strings.Replace(metric, " ", "_", -1)
metric = azure.ReplaceUpperCase(metric)
obj := strings.Split(metric, ".")
- for index := range obj {
- // in some cases a trailing "_" is found
- obj[index] = strings.TrimPrefix(obj[index], "_")
- obj[index] = strings.TrimSuffix(obj[index], "_")
+ for i := range obj {
+ obj[i] = strings.Trim(obj[i], "_") // remove trailing "_", if any
}
metric = strings.ToLower(strings.Join(obj, "_"))
aggsRegex := regexp.MustCompile(aggsRegex)
diff --git a/x-pack/metricbeat/module/azure/app_insights/data_test.go b/x-pack/metricbeat/module/azure/app_insights/data_test.go
index b4b797ecf0..ce6f595ff3 100644
--- a/x-pack/metricbeat/module/azure/app_insights/data_test.go
+++ b/x-pack/metricbeat/module/azure/app_insights/data_test.go
@@ -17,11 +17,7 @@ import (
"github.com/elastic/elastic-agent-libs/mapstr"
)
-func newMetricsTest(
- timestamp1 *date.Time,
- timestamp2 *date.Time,
- timestamp3 *date.Time,
-) []MetricValue {
+func newMetricsTest(timestamp1, timestamp2, timestamp3 *date.Time) []MetricValue {
return []MetricValue{
{
SegmentName: map[string]string{},
@@ -327,7 +323,7 @@ func TestGroupMetrics(t *testing.T) {
func TestEventMapping(t *testing.T) {
startDate := date.Time{}
id := "123"
- var info = insights.MetricsResultInfo{
+ info := insights.MetricsResultInfo{
AdditionalProperties: map[string]interface{}{
"requests/count": map[string]interface{}{"sum": 12},
"requests/failed": map[string]interface{}{"sum": 10},
@@ -335,7 +331,7 @@ func TestEventMapping(t *testing.T) {
Start: &startDate,
End: &startDate,
}
- var metricResult = insights.MetricsResult{
+ metricResult := insights.MetricsResult{
Value: &info,
}
metrics := []insights.MetricsResultsItem{
@@ -345,7 +341,7 @@ func TestEventMapping(t *testing.T) {
Body: &metricResult,
},
}
- var result = insights.ListMetricsResultsItem{
+ result := insights.ListMetricsResultsItem{
Value: &metrics,
}
applicationId := "abc"
@@ -364,7 +360,6 @@ func TestEventMapping(t *testing.T) {
assert.Equal(t, val4, applicationId)
}
-
}
func TestEventMappingGrouping(t *testing.T) { |
@shmsr Thanks for the review! I would address only code changes from this PR and maybe make a new one for the entire metricset. |
Here's another diff patch:
diff --git a/x-pack/metricbeat/module/azure/app_insights/data.go b/x-pack/metricbeat/module/azure/app_insights/data.go
index 9ab2a577cb..b79bc1e2ba 100644
--- a/x-pack/metricbeat/module/azure/app_insights/data.go
+++ b/x-pack/metricbeat/module/azure/app_insights/data.go
@@ -169,7 +169,9 @@ func groupMetricsByTime(metrics []MetricValue) map[metricTimeKey][]MetricValue {
for _, metric := range metrics {
// The start and end times are truncated to the nearest second.
- // This is done to ensure that metrics that fall within the same second are grouped together, even if their actual times are slightly different.
+ // This is done to ensure that metrics that fall within the same
+ // second are grouped together, even if their actual time are
+ // slightly different.
timeKey := newMetricTimeKey(
metric.Start.Time.Truncate(time.Second),
metric.End.Time.Truncate(time.Second),
@@ -182,10 +184,13 @@ func groupMetricsByTime(metrics []MetricValue) map[metricTimeKey][]MetricValue {
// groupMetricsByDimension groups the given metrics by their dimension keys.
func groupMetricsByDimension(metrics []MetricValue) map[string][]MetricValue {
- keys := make(map[string][]MetricValue)
- var firstStart, firstEnd *date.Time
+ var (
+ keys = make(map[string][]MetricValue)
+ firstStart, firstEnd *date.Time
+ helper func(metrics []MetricValue)
+ )
- var helper func(metrics []MetricValue)
+ // Review comment: Can you add some more comments to this helper func?
helper = func(metrics []MetricValue) {
for _, metric := range metrics {
dimensionKey := getSortedKeys(metric.SegmentName)
@@ -222,9 +227,7 @@ func groupMetricsByDimension(metrics []MetricValue) map[string][]MetricValue {
}
} else if dimensionKey != "" {
m := metric
- m.Start = firstStart
- m.End = firstEnd
-
+ m.Start, m.End = firstStart, firstEnd
keys[dimensionKey] = append(keys[dimensionKey], m)
}
}
diff --git a/x-pack/metricbeat/module/azure/app_insights/data_test.go b/x-pack/metricbeat/module/azure/app_insights/data_test.go
index eb4d546c4a..040c4386c2 100644
--- a/x-pack/metricbeat/module/azure/app_insights/data_test.go
+++ b/x-pack/metricbeat/module/azure/app_insights/data_test.go
@@ -17,99 +17,62 @@ import (
"github.com/elastic/elastic-agent-libs/mapstr"
)
-func newMetricsTest(timestamp1, timestamp2, timestamp3 *date.Time) []MetricValue {
- return []MetricValue{
+func newMetricsTest(ts ...*date.Time) []MetricValue {
+ type values struct {
+ SegmentName map[string]string
+ Value map[string]interface{}
+ T *date.Time
+ }
+
+ const numOfMetricValue = 3
+
+ if numOfMetricValue != len(ts) {
+ panic("number of arguments to newMetricsTest is not correct")
+ }
+
+ vals := [numOfMetricValue]values{
{
- SegmentName: map[string]string{},
- Value: map[string]interface{}{},
- Segments: []MetricValue{
- {
- SegmentName: map[string]string{},
- Value: map[string]interface{}{},
- Segments: []MetricValue{
- {
- SegmentName: map[string]string{
- "request_url_host": "",
- },
- Value: map[string]interface{}{
- "users_count.unique": 44,
- },
- Segments: nil,
- Interval: "",
- Start: nil,
- End: nil,
- },
- },
- Interval: "",
- Start: nil,
- End: nil,
- },
- },
- Interval: "P5M",
- Start: timestamp1,
- End: timestamp1,
+ SegmentName: map[string]string{"request_url_host": ""},
+ Value: map[string]interface{}{"users_count.unique": 44},
+ T: ts[0],
},
{
- SegmentName: map[string]string{},
- Value: map[string]interface{}{},
- Segments: []MetricValue{
- {
- SegmentName: map[string]string{},
- Value: map[string]interface{}{},
- Segments: []MetricValue{
- {
- SegmentName: map[string]string{
- "request_url_host": "",
- },
- Value: map[string]interface{}{
- "sessions_count.unique": 44,
- },
- Segments: nil,
- Interval: "",
- Start: nil,
- End: nil,
- },
- },
- Interval: "",
- Start: nil,
- End: nil,
- },
- },
- Interval: "P5M",
- Start: timestamp2,
- End: timestamp2,
+ SegmentName: map[string]string{"request_url_host": ""},
+ Value: map[string]interface{}{"sessions_count.unique": 44},
+ T: ts[1],
},
{
- SegmentName: map[string]string{},
- Value: map[string]interface{}{},
- Segments: []MetricValue{
- {
- SegmentName: map[string]string{},
- Value: map[string]interface{}{},
- Segments: []MetricValue{
- {
- SegmentName: map[string]string{
- "request_url_host": "localhost",
- },
- Value: map[string]interface{}{
- "sessions_count.unique": 44,
+ SegmentName: map[string]string{"request_url_host": "localhost"},
+ Value: map[string]interface{}{"sessions_count.unique": 44},
+ T: ts[2],
+ },
+ }
+
+ mv := make([]MetricValue, 0, 3)
+ for i := range vals {
+ mv = append(mv,
+ MetricValue{
+ SegmentName: map[string]string{},
+ Value: map[string]interface{}{},
+ Segments: []MetricValue{
+ {
+ SegmentName: map[string]string{},
+ Value: map[string]interface{}{},
+ Segments: []MetricValue{
+ {
+ SegmentName: vals[i].SegmentName,
+ Value: vals[i].Value,
},
- Segments: nil,
- Interval: "",
- Start: nil,
- End: nil,
},
},
- Interval: "",
- Start: nil,
- End: nil,
},
+ Interval: "P5M",
+ Start: vals[i].T, End: vals[i].T,
},
- Interval: "P5M",
- Start: timestamp3,
- End: timestamp3,
- },
+ )
}
+
+ return mv
}
func TestGroupMetrics(t *testing.T) {
@@ -128,10 +91,8 @@ func TestGroupMetrics(t *testing.T) {
Value: map[string]interface{}{
"users_count.unique": 44,
},
- Segments: nil,
- Interval: "",
- Start: timestamp1,
- End: timestamp1,
+ Start: timestamp1,
+ End: timestamp1,
},
{
SegmentName: map[string]string{
@@ -140,10 +101,8 @@ func TestGroupMetrics(t *testing.T) {
Value: map[string]interface{}{
"sessions_count.unique": 44,
},
- Segments: nil,
- Interval: "",
- Start: timestamp2,
- End: timestamp2,
+ Start: timestamp2,
+ End: timestamp2,
},
}
@@ -155,10 +114,8 @@ func TestGroupMetrics(t *testing.T) {
Value: map[string]interface{}{
"sessions_count.unique": 44,
},
- Segments: nil,
- Interval: "",
- Start: timestamp3,
- End: timestamp3,
+ Start: timestamp3,
+ End: timestamp3,
},
}
@@ -205,10 +162,8 @@ func TestGroupMetrics(t *testing.T) {
Value: map[string]interface{}{
"users_count.unique": 44,
},
- Segments: nil,
- Interval: "",
- Start: timestamp1,
- End: timestamp1,
+ Start: timestamp1,
+ End: timestamp1,
},
{
SegmentName: map[string]string{
@@ -217,10 +172,8 @@ func TestGroupMetrics(t *testing.T) {
Value: map[string]interface{}{
"sessions_count.unique": 44,
},
- Segments: nil,
- Interval: "",
- Start: timestamp2,
- End: timestamp2,
+ Start: timestamp2,
+ End: timestamp2,
},
}
@@ -232,10 +185,8 @@ func TestGroupMetrics(t *testing.T) {
Value: map[string]interface{}{
"sessions_count.unique": 44,
},
- Segments: nil,
- Interval: "",
- Start: timestamp3,
- End: timestamp3,
+ Start: timestamp3,
+ End: timestamp3,
},
}
@@ -247,10 +198,8 @@ func TestGroupMetrics(t *testing.T) {
Value: map[string]interface{}{
"users_count.unique": 44,
},
- Segments: nil,
- Interval: "",
- Start: timestamp1,
- End: timestamp1,
+ Start: timestamp1,
+ End: timestamp1,
},
}
@@ -262,10 +211,8 @@ func TestGroupMetrics(t *testing.T) {
Value: map[string]interface{}{
"sessions_count.unique": 44,
},
- Segments: nil,
- Interval: "",
- Start: timestamp2,
- End: timestamp2,
+ Start: timestamp2,
+ End: timestamp2,
},
}
@@ -277,10 +224,8 @@ func TestGroupMetrics(t *testing.T) {
Value: map[string]interface{}{
"sessions_count.unique": 44,
},
- Segments: nil,
- Interval: "",
- Start: timestamp3,
- End: timestamp3,
+ Start: timestamp3,
+ End: timestamp3,
},
}
Other than this, logic looks good to me. If you want, you can directly apply the git patch. |
@elastic/integrations could someone take a look please? |
This pull request is now in conflicts. Could you fix it? 🙏
|
if metricTime.Start.IsZero() || metricTime.End.IsZero() { | ||
return mb.Event{} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Under which circumstances can this happen?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's just a safety check. Normally, the child segments don't have their own start or end times. They rely on the parents segments for that info. Just double-checking to make sure the time info is there - it should never happen.
Example:
MetricsResult (Parent)
│
│ ├── Start: 2023-01-01 08:00
│ └── End: 2023-01-01 10:00
│
└─── Segments: MetricsSegmentInfo (First-level Child)
│
│ ├── Start: 2023-01-01 08:00
│ └── End: 2023-01-01 10:00
│
└─── Segments: []MetricsSegmentInfo (Second-level Children)
│
├── Segment 1:
│ │
│ ├── AdditionalProperties: {"browserTiming/urlHost": "localhost"}
│ │ (No specific Start/End time here)
│ │
│ └─── Segments: []MetricsSegmentInfo (Third-level Children)
│ │
│ └─── Child Segment:
│ │
│ └── AdditionalProperties: {"browserTiming/urlPath": "/test", "browserTimings/networkDuration": {"avg": 1.5}}
│ (No specific Start/End time here)
│
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This would be a great comment to explain what the code is trying to accomplish!
groupedByDimensions := groupMetricsByDimension(mValues) | ||
|
||
for _, group := range groupedByDimensions { | ||
groupedByTime := groupMetricsByTime(group) | ||
|
||
for ts, group := range groupedByTime { | ||
events = append( | ||
events, | ||
createGroupEvent(group, ts, applicationId, namespace), | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why are we grouping by dimensions and later by time instead of grouping by dimension+time like we did in the GCP metrics?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see any blocker in this PR.
However, some parts are obscure and would become clearer by renaming variables/functions or adding comments that explain WHY the code is doing something.
All of this was pre-existing in this PR, but since we're spending time understanding and changing this code, we should take this as an opportunity to make the code more manageable for future maintainers.
add more comments
💚 Build Succeeded
Expand to view the summary
Build stats
❕ Flaky test reportNo test was executed to be analysed. 🤖 GitHub commentsExpand to view the GitHub comments
To re-run your PR in the CI, just comment with:
|
@gpop63, the latest changes looks good and the code is easier to read — 👍 |
…estamp (elastic#36634) * add grouping * fix golangci errors * address comments and minor fixes * add changelog entry * Address review comments * Address review comments * simplify grouping logic add more comments * update tests --------- Co-authored-by: subham sarkar <[email protected]>
Proposed commit message
Currently,
app_insights
creates an event for each metric, even though they might share the same dimensions.This change aims to group together the metrics that share the same dimensions (segments) values.
Checklist
CHANGELOG.next.asciidoc
orCHANGELOG-developer.next.asciidoc
.Author's Checklist
How to test this PR locally
Related issues
Use cases
Screenshots
Logs