Merge pull request #601 from go-faster/feat/remove-metrics-normalization

feat: remove metrics mapping
go-faster · Jan 26, 2025 · e9a3e00 · e9a3e00
2 parents edccab9 + c82572a
commit e9a3e00
Show file tree

Hide file tree

Showing 14 changed files with 77 additions and 162 deletions.
diff --git a/go.mod b/go.mod
@@ -118,7 +118,7 @@ require (
 	github.com/docker/go-metrics v0.0.1 // indirect
 	github.com/docker/go-units v0.5.0 // indirect
 	github.com/ebitengine/purego v0.8.1 // indirect
-	github.com/edsrzf/mmap-go v1.1.0 // indirect
+	github.com/edsrzf/mmap-go v1.2.0 // indirect
 	github.com/elastic/go-grok v0.3.1 // indirect
 	github.com/elastic/lunes v0.1.0 // indirect
 	github.com/expr-lang/expr v1.16.9 // indirect

diff --git a/go.sum b/go.sum
@@ -172,8 +172,8 @@ github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+m
 github.com/dvsekhvalnov/jose2go v0.0.0-20170216131308-f21a8cedbbae/go.mod h1:7BvyPhdbLxMXIYTFPLsyJRFMsKmOZnQmzh6Gb+uquuM=
 github.com/ebitengine/purego v0.8.1 h1:sdRKd6plj7KYW33EH5As6YKfe8m9zbN9JMrOjNVF/BE=
 github.com/ebitengine/purego v0.8.1/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
-github.com/edsrzf/mmap-go v1.1.0 h1:6EUwBLQ/Mcr1EYLE4Tn1VdW1A4ckqCQWZBw8Hr0kjpQ=
-github.com/edsrzf/mmap-go v1.1.0/go.mod h1:19H/e8pUPLicwkyNgOykDXkJ9F0MHE+Z52B8EIth78Q=
+github.com/edsrzf/mmap-go v1.2.0 h1:hXLYlkbaPzt1SaQk+anYwKSRNhufIDCchSPkUD6dD84=
+github.com/edsrzf/mmap-go v1.2.0/go.mod h1:19H/e8pUPLicwkyNgOykDXkJ9F0MHE+Z52B8EIth78Q=
 github.com/elastic/go-grok v0.3.1 h1:WEhUxe2KrwycMnlvMimJXvzRa7DoByJB4PVUIE1ZD/U=
 github.com/elastic/go-grok v0.3.1/go.mod h1:n38ls8ZgOboZRgKcjMY8eFeZFMmcL9n2lP0iHhIDk64=
 github.com/elastic/lunes v0.1.0 h1:amRtLPjwkWtzDF/RKzcEPMvSsSseLDLW+bnhfNSLRe4=

diff --git a/integration/prome2e/common_test.go b/integration/prome2e/common_test.go
@@ -405,6 +405,7 @@ func runTest(
 	t.Run("Series", func(t *testing.T) {
 		testName := func(name string) func(t *testing.T) {
 			return func(t *testing.T) {
+				t.Helper()
 				a := require.New(t)
 
 				r, err := c.GetSeries(ctx, promapi.GetSeriesParams{
@@ -423,7 +424,9 @@ func runTest(
 		t.Run("PointByName", testName(`prometheus_http_requests_total`))
 		t.Run("HistogramByName", testName(`prometheus_http_request_duration_seconds_count`))
 		t.Run("SummaryByName", testName(`go_gc_duration_seconds`))
-		t.Run("PointByMappedName", testName(`process_runtime_go_gc_count`))
+
+		// TODO(ernado): support when parser support dots?
+		// t.Run("PointByMappedName", testName(`process.runtime.go.gc.count`))
 
 		t.Run("OneMatcher", func(t *testing.T) {
 			a := require.New(t)

diff --git a/integration/prome2e/prom_e2e.go b/integration/prome2e/prom_e2e.go
@@ -10,8 +10,6 @@ import (
 	"github.com/prometheus/prometheus/promql/parser"
 	"go.opentelemetry.io/collector/pdata/pcommon"
 	"go.opentelemetry.io/collector/pdata/pmetric"
-
-	"github.com/go-faster/oteldb/internal/otelstorage"
 )
 
 // BatchSet is a set of batches.
@@ -218,11 +216,10 @@ func (s *BatchSet) addSeries(name string, res, scope, attrs pcommon.Map) {
 			switch t := v.Type(); t {
 			case pcommon.ValueTypeMap, pcommon.ValueTypeSlice:
 			default:
-				key := otelstorage.KeyToLabel(k)
 				val := v.AsString()
 
-				s.addLabel(key, val)
-				lb[key] = val
+				s.addLabel(k, val)
+				lb[k] = val
 			}
 			return true
 		})
@@ -244,14 +241,10 @@ func (s *BatchSet) addLabel(label, val string) {
 	if s.Labels == nil {
 		s.Labels = map[string]map[string]struct{}{}
 	}
-	label = otelstorage.KeyToLabel(label)
 	m := s.Labels[label]
 	if m == nil {
 		m = map[string]struct{}{}
 		s.Labels[label] = m
 	}
-	if label == labels.MetricName {
-		val = otelstorage.KeyToLabel(val)
-	}
 	m[val] = struct{}{}
 }
diff --git a/internal/chstorage/_golden/schema.metrics_exemplars.sql b/internal/chstorage/_golden/schema.metrics_exemplars.sql
@@ -1,7 +1,6 @@
 CREATE TABLE IF NOT EXISTS `metrics_exemplars`
 (
 	`name`                LowCardinality(String),
-	`name_normalized`     LowCardinality(String),
 	`timestamp`           DateTime64(9)          CODEC(Delta, ZSTD(1)),
 	`filtered_attributes` String,
 	`exemplar_timestamp`  DateTime64(9)          CODEC(Delta, ZSTD(1)),
@@ -20,4 +19,4 @@ CREATE TABLE IF NOT EXISTS `metrics_exemplars`
 )
 ENGINE = MergeTree
 PARTITION BY toYYYYMMDD(timestamp)
-ORDER BY (`name_normalized`, `resource`, `attribute`, `timestamp`)
+ORDER BY (`name`, `resource`, `attribute`, `timestamp`)
diff --git a/internal/chstorage/_golden/schema.metrics_exp_histograms.sql b/internal/chstorage/_golden/schema.metrics_exp_histograms.sql
@@ -1,7 +1,6 @@
 CREATE TABLE IF NOT EXISTS `metrics_exp_histograms`
 (
 	`name`                                 LowCardinality(String) CODEC(ZSTD(1)),
-	`name_normalized`                      LowCardinality(String),
 	`timestamp`                            DateTime64(9)          CODEC(Delta, ZSTD(1)),
 	`exp_histogram_count`                  UInt64,
 	`exp_histogram_sum`                    Nullable(Float64),

diff --git a/internal/chstorage/_golden/schema.metrics_labels.sql b/internal/chstorage/_golden/schema.metrics_labels.sql
@@ -1,10 +1,8 @@
 CREATE TABLE IF NOT EXISTS `metrics_labels`
 (
-	`name`             LowCardinality(String) COMMENT 'original name, i.e. foo.bar',
-	`name_normalized`  LowCardinality(String) COMMENT 'normalized name, foo_bar',
-	`value`            String,
-	`value_normalized` String,
-	`scope`            Enum8('NONE' = 0, 'RESOURCE' = 1, 'INSTRUMENTATION' = 2, 'ATTRIBUTE' = 4)
+	`name`  LowCardinality(String),
+	`value` String,
+	`scope` Enum8('NONE' = 0, 'RESOURCE' = 1, 'INSTRUMENTATION' = 2, 'ATTRIBUTE' = 4)
 )
 ENGINE = ReplacingMergeTree
-ORDER BY (`name_normalized`, `value`, `scope`)
+ORDER BY (`name`, `value`, `scope`)
diff --git a/internal/chstorage/_golden/schema.metrics_points.sql b/internal/chstorage/_golden/schema.metrics_points.sql
@@ -1,9 +1,8 @@
 CREATE TABLE IF NOT EXISTS `metrics_points`
 (
-	`name`            LowCardinality(String) CODEC(ZSTD(1)),
-	`name_normalized` LowCardinality(String),
-	`timestamp`       DateTime64(9)          CODEC(Delta, ZSTD(1)),
-	`mapping`         Enum8(
+	`name`      LowCardinality(String) CODEC(ZSTD(1)),
+	`timestamp` DateTime64(9)          CODEC(Delta, ZSTD(1)),
+	`mapping`   Enum8(
 		'NO_MAPPING' = 0,
 		'HISTOGRAM_COUNT' = 1,
 		'HISTOGRAM_SUM' = 2,
@@ -14,22 +13,22 @@ CREATE TABLE IF NOT EXISTS `metrics_points`
 		'SUMMARY_SUM' = 7,
 		'SUMMARY_QUANTILE' = 8
 		) CODEC(T64, ZSTD(1)),
-	`value`           Float64                CODEC(Gorilla, ZSTD(1)),
-	`flags`           UInt8                  CODEC(T64, ZSTD(1)),
+	`value`     Float64                CODEC(Gorilla, ZSTD(1)),
+	`flags`     UInt8                  CODEC(T64, ZSTD(1)),
 	-- attribute attributes
-	`attribute`       LowCardinality(String),
+	`attribute` LowCardinality(String),
 	-- end
 	-- resource attributes
-	`resource`        LowCardinality(String),
+	`resource`  LowCardinality(String),
 	-- end
 	-- scope attributes
-	`scope`           LowCardinality(String),
+	`scope`     LowCardinality(String),
 	-- end
 
 	INDEX `idx_ts` timestamp TYPE minmax GRANULARITY 8192
 )
 ENGINE = MergeTree
 PARTITION BY toYYYYMMDD(timestamp)
-ORDER BY (`name_normalized`, `mapping`, `resource`, `attribute`, `timestamp`)
-PRIMARY KEY (`name_normalized`, `mapping`, `resource`, `attribute`)
+ORDER BY (`name`, `mapping`, `resource`, `attribute`, `timestamp`)
+PRIMARY KEY (`name`, `mapping`, `resource`, `attribute`)
 TTL toDateTime(`timestamp`) + toIntervalSecond(259200)
diff --git a/internal/chstorage/attributes.go b/internal/chstorage/attributes.go
@@ -445,7 +445,7 @@ func appendDDL[T any](col []ddl.Column, m map[string]proto.ColumnOf[T]) []ddl.Co
 
 func attrsToLabels(m otelstorage.Attrs, to map[string]string) {
 	m.AsMap().Range(func(k string, v pcommon.Value) bool {
-		to[otelstorage.KeyToLabel(k)] = v.Str()
+		to[k] = v.Str()
 		return true
 	})
 }
diff --git a/internal/chstorage/columns_metrics.go b/internal/chstorage/columns_metrics.go
@@ -8,9 +8,8 @@ import (
 )
 
 type pointColumns struct {
-	name           *proto.ColLowCardinality[string]
-	nameNormalized *proto.ColLowCardinality[string]
-	timestamp      *proto.ColDateTime64
+	name      *proto.ColLowCardinality[string]
+	timestamp *proto.ColDateTime64
 
 	mapping proto.ColEnum8
 	value   proto.ColFloat64
@@ -23,9 +22,8 @@ type pointColumns struct {
 
 func newPointColumns() *pointColumns {
 	return &pointColumns{
-		name:           new(proto.ColStr).LowCardinality(),
-		nameNormalized: new(proto.ColStr).LowCardinality(),
-		timestamp:      new(proto.ColDateTime64).WithPrecision(proto.PrecisionNano),
+		name:      new(proto.ColStr).LowCardinality(),
+		timestamp: new(proto.ColDateTime64).WithPrecision(proto.PrecisionNano),
 
 		attributes: NewAttributes(colAttrs),
 		scope:      NewAttributes(colScope),
@@ -37,7 +35,6 @@ func (c *pointColumns) Columns() Columns {
 	return MergeColumns(
 		Columns{
 			{Name: "name", Data: c.name},
-			{Name: "name_normalized", Data: c.nameNormalized},
 			{Name: "timestamp", Data: c.timestamp},
 
 			{Name: "mapping", Data: proto.Wrap(&c.mapping, metricMappingDDL)},
@@ -59,8 +56,8 @@ func (c *pointColumns) DDL() ddl.Table {
 	table := ddl.Table{
 		Engine:      "MergeTree",
 		PartitionBy: "toYYYYMMDD(timestamp)",
-		PrimaryKey:  []string{"name_normalized", "mapping", "resource", "attribute"},
-		OrderBy:     []string{"name_normalized", "mapping", "resource", "attribute", "timestamp"},
+		PrimaryKey:  []string{"name", "mapping", "resource", "attribute"},
+		OrderBy:     []string{"name", "mapping", "resource", "attribute", "timestamp"},
 		TTL:         ddl.TTL{Field: "timestamp"},
 		Indexes: []ddl.Index{
 			{
@@ -76,10 +73,6 @@ func (c *pointColumns) DDL() ddl.Table {
 				Type:  c.name.Type(),
 				Codec: "ZSTD(1)",
 			},
-			{
-				Name: "name_normalized",
-				Type: c.nameNormalized.Type(),
-			},
 			{
 				Name:  "timestamp",
 				Type:  c.timestamp.Type(),
@@ -111,9 +104,8 @@ func (c *pointColumns) DDL() ddl.Table {
 }
 
 type expHistogramColumns struct {
-	name           *proto.ColLowCardinality[string]
-	nameNormalized *proto.ColLowCardinality[string]
-	timestamp      *proto.ColDateTime64
+	name      *proto.ColLowCardinality[string]
+	timestamp *proto.ColDateTime64
 
 	count                proto.ColUInt64
 	sum                  *proto.ColNullable[float64]
@@ -134,9 +126,8 @@ type expHistogramColumns struct {
 
 func newExpHistogramColumns() *expHistogramColumns {
 	return &expHistogramColumns{
-		name:           new(proto.ColStr).LowCardinality(),
-		nameNormalized: new(proto.ColStr).LowCardinality(),
-		timestamp:      new(proto.ColDateTime64).WithPrecision(proto.PrecisionNano),
+		name:      new(proto.ColStr).LowCardinality(),
+		timestamp: new(proto.ColDateTime64).WithPrecision(proto.PrecisionNano),
 
 		sum:                  new(proto.ColFloat64).Nullable(),
 		min:                  new(proto.ColFloat64).Nullable(),
@@ -154,7 +145,6 @@ func (c *expHistogramColumns) Columns() Columns {
 	return MergeColumns(
 		Columns{
 			{Name: "name", Data: c.name},
-			{Name: "name_normalized", Data: c.nameNormalized},
 			{Name: "timestamp", Data: c.timestamp},
 
 			{Name: "exp_histogram_count", Data: &c.count},
@@ -191,10 +181,6 @@ func (c *expHistogramColumns) DDL() ddl.Table {
 				Type:  c.name.Type(),
 				Codec: "ZSTD(1)",
 			},
-			{
-				Name: "name_normalized",
-				Type: c.nameNormalized.Type(),
-			},
 			{
 				Name:  "timestamp",
 				Type:  c.timestamp.Type(),
@@ -256,29 +242,21 @@ func (c *expHistogramColumns) DDL() ddl.Table {
 }
 
 type labelsColumns struct {
-	name           *proto.ColLowCardinality[string]
-	nameNormalized *proto.ColLowCardinality[string]
-
-	value           proto.ColStr
-	valueNormalized proto.ColStr
-
+	name  *proto.ColLowCardinality[string]
+	value proto.ColStr
 	scope proto.ColEnum8
 }
 
 func newLabelsColumns() *labelsColumns {
 	return &labelsColumns{
-		name:           new(proto.ColStr).LowCardinality(),
-		nameNormalized: new(proto.ColStr).LowCardinality(),
+		name: new(proto.ColStr).LowCardinality(),
 	}
 }
 
 func (c *labelsColumns) Columns() Columns {
 	return Columns{
 		{Name: "name", Data: c.name},
-		{Name: "name_normalized", Data: c.nameNormalized},
 		{Name: "value", Data: &c.value},
-		{Name: "value_normalized", Data: &c.valueNormalized},
-
 		{Name: "scope", Data: proto.Wrap(&c.scope, metricLabelScopeDDL)},
 	}
 }
@@ -289,26 +267,16 @@ func (c *labelsColumns) ChsqlResult() []chsql.ResultColumn { return c.Columns().
 func (c *labelsColumns) DDL() ddl.Table {
 	return ddl.Table{
 		Engine:  "ReplacingMergeTree",
-		OrderBy: []string{"name_normalized", "value", "scope"},
+		OrderBy: []string{"name", "value", "scope"},
 		Columns: []ddl.Column{
 			{
-				Name:    "name",
-				Type:    c.name.Type(),
-				Comment: "original name, i.e. foo.bar",
-			},
-			{
-				Name:    "name_normalized",
-				Type:    c.nameNormalized.Type(),
-				Comment: "normalized name, foo_bar",
+				Name: "name",
+				Type: c.name.Type(),
 			},
 			{
 				Name: "value",
 				Type: c.value.Type(),
 			},
-			{
-				Name: "value_normalized",
-				Type: c.valueNormalized.Type(),
-			},
 			{
 				Name: "scope",
 				Type: c.scope.Type().Sub(metricLabelScopeDDL),
@@ -318,9 +286,8 @@ func (c *labelsColumns) DDL() ddl.Table {
 }
 
 type exemplarColumns struct {
-	name           *proto.ColLowCardinality[string]
-	nameNormalized *proto.ColLowCardinality[string]
-	timestamp      *proto.ColDateTime64
+	name      *proto.ColLowCardinality[string]
+	timestamp *proto.ColDateTime64
 
 	filteredAttributes proto.ColBytes
 	exemplarTimestamp  *proto.ColDateTime64
@@ -336,7 +303,6 @@ type exemplarColumns struct {
 func newExemplarColumns() *exemplarColumns {
 	return &exemplarColumns{
 		name:              new(proto.ColStr).LowCardinality(),
-		nameNormalized:    new(proto.ColStr).LowCardinality(),
 		timestamp:         new(proto.ColDateTime64).WithPrecision(proto.PrecisionNano),
 		exemplarTimestamp: new(proto.ColDateTime64).WithPrecision(proto.PrecisionNano),
 		attributes:        NewAttributes(colAttrs),
@@ -349,7 +315,6 @@ func (c *exemplarColumns) Columns() Columns {
 	return MergeColumns(
 		Columns{
 			{Name: "name", Data: c.name},
-			{Name: "name_normalized", Data: c.nameNormalized},
 			{Name: "timestamp", Data: c.timestamp},
 
 			{Name: "filtered_attributes", Data: &c.filteredAttributes},
@@ -372,16 +337,12 @@ func (c *exemplarColumns) DDL() ddl.Table {
 	table := ddl.Table{
 		Engine:      "MergeTree",
 		PartitionBy: "toYYYYMMDD(timestamp)",
-		OrderBy:     []string{"name_normalized", "resource", "attribute", "timestamp"},
+		OrderBy:     []string{"name", "resource", "attribute", "timestamp"},
 		Columns: []ddl.Column{
 			{
 				Name: "name",
 				Type: c.name.Type(),
 			},
-			{
-				Name: "name_normalized",
-				Type: c.nameNormalized.Type(),
-			},
 			{
 				Name:  "timestamp",
 				Type:  c.timestamp.Type(),