From b887c1a277c5060af7234cf4404bd626cfd88c3a Mon Sep 17 00:00:00 2001 From: Riccardo Biraghi Date: Wed, 13 Jun 2018 17:45:27 +0100 Subject: [PATCH 01/12] Fix ExecContainer executing commands twice --- test/docker/docker_api_test.go | 8 ++++--- test/docker/docker_api_test_util.go | 36 +++++++++++++++-------------- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/test/docker/docker_api_test.go b/test/docker/docker_api_test.go index e93bfec9..7f828008 100644 --- a/test/docker/docker_api_test.go +++ b/test/docker/docker_api_test.go @@ -329,9 +329,9 @@ func TestVolumeUnmount(t *testing.T) { defer cleanContainer(t, cli, ctr.ID) waitForReady(t, cli, ctr.ID) // Unmount the volume as root - rc, _ := execContainer(t, cli, ctr.ID, "root", []string{"umount", "-l", "-f", "/mnt/mqm"}) + rc, out := execContainer(t, cli, ctr.ID, "root", []string{"umount", "-l", "-f", "/mnt/mqm"}) if rc != 0 { - t.Fatalf("Expected umount to work with rc=0, got %v", rc) + t.Fatalf("Expected umount to work with rc=0, got %v. Output was: %s", rc, out) } time.Sleep(3 * time.Second) rc, _ = execContainer(t, cli, ctr.ID, "mqm", []string{"chkmqhealthy"}) @@ -366,7 +366,9 @@ func TestZombies(t *testing.T) { // will be adopted by PID 1, and should then be reaped when they die. _, out := execContainer(t, cli, id, "mqm", []string{"pkill", "--signal", "kill", "-c", "amqzxma0"}) if out == "0" { - t.Fatalf("Expected pkill to kill a process, got %v", out) + t.Log("Failed to kill process 'amqzxma0'") + _, out := execContainer(t, cli, id, "root", []string{"ps", "-lA"}) + t.Fatalf("Here is a list of currently running processes:\n%s", out) } time.Sleep(3 * time.Second) _, out = execContainer(t, cli, id, "mqm", []string{"bash", "-c", "ps -lA | grep '^. Z'"}) diff --git a/test/docker/docker_api_test_util.go b/test/docker/docker_api_test_util.go index dc3a9894..5d712129 100644 --- a/test/docker/docker_api_test_util.go +++ b/test/docker/docker_api_test_util.go @@ -328,7 +328,6 @@ func waitForContainer(t *testing.T, cli *client.Client, ID string, timeout int64 // execContainer runs a command in a running container, and returns the exit code and output func execContainer(t *testing.T, cli *client.Client, ID string, user string, cmd []string) (int, string) { -rerun: config := types.ExecConfig{ User: user, Privileged: false, @@ -357,30 +356,33 @@ rerun: } // Wait for the command to finish var exitcode int + var outputStr string for { inspect, err := cli.ContainerExecInspect(context.Background(), resp.ID) if err != nil { t.Fatal(err) } - if !inspect.Running { - exitcode = inspect.ExitCode - break + if inspect.Running { + continue + } + + exitcode = inspect.ExitCode + buf := new(bytes.Buffer) + // Each output line has a header, which needs to be removed + _, err = stdcopy.StdCopy(buf, buf, hijack.Reader) + if err != nil { + t.Fatal(err) } - } - buf := new(bytes.Buffer) - // Each output line has a header, which needs to be removed - _, err = stdcopy.StdCopy(buf, buf, hijack.Reader) - if err != nil { - t.Fatal(err) - } - outputStr := strings.TrimSpace(buf.String()) + outputStr = strings.TrimSpace(buf.String()) - // Before we go let's just double check it did actually run because sometimes we get a "Exec command already running error" - alreadyRunningErr := regexp.MustCompile("Error: Exec command .* is already running") - if alreadyRunningErr.MatchString(outputStr) { - time.Sleep(1 * time.Second) - goto rerun + // Before we go let's just double check it did actually finish running + // because sometimes we get a "Exec command already running error" + alreadyRunningErr := regexp.MustCompile("Error: Exec command .* is already running") + if alreadyRunningErr.MatchString(outputStr) { + continue + } + break } return exitcode, outputStr From 16e244427bf5ece26add1d1685c6dcaa6f23726d Mon Sep 17 00:00:00 2001 From: Riccardo Biraghi Date: Thu, 14 Jun 2018 11:54:35 +0100 Subject: [PATCH 02/12] Fix timing in execContainer --- test/docker/docker_api_test_util.go | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/test/docker/docker_api_test_util.go b/test/docker/docker_api_test_util.go index a2ad493b..fd879916 100644 --- a/test/docker/docker_api_test_util.go +++ b/test/docker/docker_api_test_util.go @@ -27,7 +27,6 @@ import ( "os" "os/exec" "path/filepath" - "regexp" "runtime" "strconv" "strings" @@ -347,7 +346,9 @@ func execContainer(t *testing.T, cli *client.Client, ID string, user string, cmd if err != nil { t.Fatal(err) } - cli.ContainerExecStart(context.Background(), resp.ID, types.ExecStartCheck{ + defer hijack.Close() + time.Sleep(time.Millisecond * 10) + err = cli.ContainerExecStart(context.Background(), resp.ID, types.ExecStartCheck{ Detach: false, Tty: false, }) @@ -376,12 +377,17 @@ func execContainer(t *testing.T, cli *client.Client, ID string, user string, cmd outputStr = strings.TrimSpace(buf.String()) - // Before we go let's just double check it did actually finish running - // because sometimes we get a "Exec command already running error" - alreadyRunningErr := regexp.MustCompile("Error: Exec command .* is already running") - if alreadyRunningErr.MatchString(outputStr) { - continue - } + /* Commented out on 14/06/2018 as it might not be needed after adding + * pause between ContainerExecAttach and ContainerExecStart. + * TODO If intermittent failures do not occur, remove and refactor. + * + * // Before we go let's just double check it did actually finish running + * // because sometimes we get a "Exec command already running error" + * alreadyRunningErr := regexp.MustCompile("Error: Exec command .* is already running") + * if alreadyRunningErr.MatchString(outputStr) { + * continue + * } + */ break } From d4d5e09a02d8fa593737464d4034e5d588e8d44a Mon Sep 17 00:00:00 2001 From: Riccardo Biraghi Date: Thu, 14 Jun 2018 12:41:06 +0100 Subject: [PATCH 03/12] Add waitForReady to TestCorrectLicense --- test/docker/docker_api_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/test/docker/docker_api_test.go b/test/docker/docker_api_test.go index 7f828008..0299556b 100644 --- a/test/docker/docker_api_test.go +++ b/test/docker/docker_api_test.go @@ -637,6 +637,7 @@ func TestCorrectLicense(t *testing.T) { } id := runContainer(t, cli, &containerConfig) defer cleanContainer(t, cli, id) + waitForReady(t, cli, id) rc, license := execContainer(t, cli, id, "mqm", []string{"dspmqver", "-f", "8192", "-b"}) if rc != 0 { From 616cb179d627d91d92fbf235cf8401f569165798 Mon Sep 17 00:00:00 2001 From: Stephen Marshall Date: Thu, 14 Jun 2018 12:49:24 +0100 Subject: [PATCH 04/12] Doc changes for metrics --- README.md | 1 + docs/internals.md | 16 +++++++++++++++- docs/usage.md | 15 +++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 69902447..a3de6982 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ Note that in order to use the image, it is necessary to accept the terms of the - **LANG** - Set this to the language you would like the license to be printed in. - **MQ_QMGR_NAME** - Set this to the name you want your Queue Manager to be created with. - **LOG_FORMAT** - Set this to change the format of the logs which are printed on the container's stdout. Set to "json" to use JSON format (JSON object per line); set to "basic" to use a simple human-readable format. Defaults to "basic". +- **MQ_ENABLE_METRICS** - Set this to `true` to generate Prometheus metrics for your Queue Manager. See the [default developer configuration docs](docs/developer-config.md) for the extra environment variables supported by the MQ Advanced for Developers image. diff --git a/docs/internals.md b/docs/internals.md index a39fb26f..f98f7e6e 100644 --- a/docs/internals.md +++ b/docs/internals.md @@ -24,6 +24,7 @@ The `runmqserver` command has the following responsibilities: - Works as PID 1, so is responsible for [reaping zombie processes](https://blog.phusion.nl/2015/01/20/docker-and-the-pid-1-zombie-reaping-problem/) * Creating and starting a queue manager * Configuring the queue manager, by running any MQSC scripts found under `/etc/mqm` +* Starting Prometheus metrics generation for the queue manager (if enabled) * Indicates to the `chkmqready` command that configuration is complete, and that normal readiness checking can happen. This is done by writing a file into `/run/runmqserver` In addition, for MQ Advanced for Developers only, the web server is started. @@ -35,4 +36,17 @@ The `runmqdevserver` command is added to the MQ Advanced for Developers image on 2. Generates MQSC files to put in `/etc/mqm`, based on a template, which is updated with values based on supplied environment variables. 3. If requested, it creates TLS key stores under `/run/runmqdevserver`, and configures MQ and the web server to use them -A special version of `runmqserver` is used in the developer image, which performs extra actions like starting the web server. This is built using the `mqdev` [build constraint](https://golang.org/pkg/go/build/#hdr-Build_Constraints). \ No newline at end of file +A special version of `runmqserver` is used in the developer image, which performs extra actions like starting the web server. This is built using the `mqdev` [build constraint](https://golang.org/pkg/go/build/#hdr-Build_Constraints). + +## Prometheus metrics +[Prometheus](https://prometheus.io) metrics are generated for the queue manager as follows: + +1. A connection is established with the queue manager +2. Metrics are discovered by subscribing to topics that provide meta-data on metric classes, types and elements +3. Subscriptions are then created for each topic that provides this metric data +4. Metrics are initialised using Prometheus names mapped from their element descriptions +5. The metrics are then registered with the Prometheus registry as Prometheus Gauges +6. Publications are processed on a periodic basis to retrieve the metric data +7. An HTTP server is setup to listen for requests from Prometheus on `/metrics` port `9157` +8. Prometheus requests are handled by updating the Prometheus Gauges with the latest metric data +9. These updated Prometheus Gauges are then collected by the Prometheus registry diff --git a/docs/usage.md b/docs/usage.md index 4614338a..007a9a2f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -37,6 +37,21 @@ docker run \ The Docker image always uses `/mnt/mqm` for MQ data, which is correctly linked for you under `/var/mqm` at runtime. This is to handle problems with file permissions on some platforms. +## Running with the default configuration and Prometheus metrics enabled +You can run a queue manager with [Prometheus](https://prometheus.io) metrics enabled. The following command will generate Prometheus metrics for your queue manager on `/metrics` port `9157`: + +``` +docker run \ + --env LICENSE=accept \ + --env MQ_QMGR_NAME=QM1 \ + --env MQ_ENABLE_METRICS=true \ + --publish 1414:1414 \ + --publish 9443:9443 \ + --publish 9157:9157 \ + --detach \ + ibmcom/mq +``` + ## Customizing the queue manager configuration You can customize the configuration in several ways: From 2e0aad03187785f30746b3013a7bb4344cad1b9f Mon Sep 17 00:00:00 2001 From: Stephen Marshall Date: Mon, 18 Jun 2018 13:24:12 +0100 Subject: [PATCH 05/12] Update metric names and allow skipping of specific metrics --- internal/metrics/mapping.go | 203 ++++++++++++++++-------------- internal/metrics/mapping_test.go | 4 +- internal/metrics/update.go | 34 ++--- test/docker/mqmetric_test_util.go | 162 ++++++++++++------------ 4 files changed, 211 insertions(+), 192 deletions(-) diff --git a/internal/metrics/mapping.go b/internal/metrics/mapping.go index 6edaf2e7..439fb894 100644 --- a/internal/metrics/mapping.go +++ b/internal/metrics/mapping.go @@ -17,103 +17,116 @@ limitations under the License. // Package metrics contains code to provide metrics for the queue manager package metrics +type metricLookup struct { + name string + enabled bool +} + // generateMetricNamesMap generates metric names mapped from their description -func generateMetricNamesMap() map[string]string { +func generateMetricNamesMap() map[string]metricLookup { - metricNamesMap := map[string]string{ - "CPU/SystemSummary/CPU load - five minute average": "cpu_load_five_minute_average_percentage", - "CPU/SystemSummary/CPU load - fifteen minute average": "cpu_load_fifteen_minute_average_percentage", - "CPU/SystemSummary/RAM free percentage": "ram_free_percentage", - "CPU/SystemSummary/RAM total bytes": "ram_total_bytes", - "CPU/SystemSummary/User CPU time percentage": "user_cpu_time_percentage", - "CPU/SystemSummary/System CPU time percentage": "system_cpu_time_percentage", - "CPU/SystemSummary/CPU load - one minute average": "cpu_load_one_minute_average_percentage", - "CPU/QMgrSummary/System CPU time - percentage estimate for queue manager": "system_cpu_time_estimate_for_queue_manager_percentage", - "CPU/QMgrSummary/RAM total bytes - estimate for queue manager": "ram_total_estimate_for_queue_manager_bytes", - "CPU/QMgrSummary/User CPU time - percentage estimate for queue manager": "user_cpu_time_estimate_for_queue_manager_percentage", - "DISK/SystemSummary/MQ trace file system - bytes in use": "mq_trace_file_system_in_use_bytes", - "DISK/SystemSummary/MQ trace file system - free space": "mq_trace_file_system_free_space_percentage", - "DISK/SystemSummary/MQ errors file system - bytes in use": "mq_errors_file_system_in_use_bytes", - "DISK/SystemSummary/MQ errors file system - free space": "mq_errors_file_system_free_space_percentage", - "DISK/SystemSummary/MQ FDC file count": "mq_fdc_file_count", - "DISK/QMgrSummary/Queue Manager file system - bytes in use": "queue_manager_file_system_in_use_bytes", - "DISK/QMgrSummary/Queue Manager file system - free space": "queue_manager_file_system_free_space_percentage", - "DISK/Log/Log - bytes occupied by reusable extents": "log_occupied_by_reusable_extents_bytes", - "DISK/Log/Log - write size": "log_write_size_bytes", - "DISK/Log/Log - bytes in use": "log_in_use_bytes", - "DISK/Log/Log - logical bytes written": "log_logical_written_bytes", - "DISK/Log/Log - write latency": "log_write_latency_seconds", - "DISK/Log/Log - bytes required for media recovery": "log_required_for_media_recovery_bytes", - "DISK/Log/Log - current primary space in use": "log_current_primary_space_in_use_percentage", - "DISK/Log/Log - workload primary space utilization": "log_workload_primary_space_utilization_percentage", - "DISK/Log/Log - bytes occupied by extents waiting to be archived": "log_occupied_by_extents_waiting_to_be_archived_bytes", - "DISK/Log/Log - bytes max": "log_max_bytes", - "DISK/Log/Log file system - bytes in use": "log_file_system_in_use_bytes", - "DISK/Log/Log file system - bytes max": "log_file_system_max_bytes", - "DISK/Log/Log - physical bytes written": "log_physical_written_bytes", - "STATMQI/SUBSCRIBE/Create durable subscription count": "create_durable_subscription_count", - "STATMQI/SUBSCRIBE/Resume durable subscription count": "resume_durable_subscription_count", - "STATMQI/SUBSCRIBE/Create non-durable subscription count": "create_non_durable_subscription_count", - "STATMQI/SUBSCRIBE/Failed create/alter/resume subscription count": "failed_create_alter_resume_subscription_count", - "STATMQI/SUBSCRIBE/Subscription delete failure count": "subscription_delete_failure_count", - "STATMQI/SUBSCRIBE/MQSUBRQ count": "mqsubrq_count", - "STATMQI/SUBSCRIBE/Failed MQSUBRQ count": "failed_mqsubrq_count", - "STATMQI/SUBSCRIBE/Durable subscriber - high water mark": "durable_subscriber_high_water_mark_count", - "STATMQI/SUBSCRIBE/Non-durable subscriber - high water mark": "non_durable_subscriber_high_water_mark_count", - "STATMQI/SUBSCRIBE/Durable subscriber - low water mark": "durable_subscriber_low_water_mark_count", - "STATMQI/SUBSCRIBE/Delete non-durable subscription count": "delete_non_durable_subscription_count", - "STATMQI/SUBSCRIBE/Alter durable subscription count": "alter_durable_subscription_count", - "STATMQI/SUBSCRIBE/Delete durable subscription count": "delete_durable_subscription_count", - "STATMQI/SUBSCRIBE/Non-durable subscriber - low water mark": "non_durable_subscriber_low_water_mark_count", - "STATMQI/PUBLISH/Interval total topic bytes put": "interval_total_topic_put_bytes", - "STATMQI/PUBLISH/Published to subscribers - message count": "published_to_subscribers_message_count", - "STATMQI/PUBLISH/Published to subscribers - byte count": "published_to_subscribers_bytes", - "STATMQI/PUBLISH/Non-persistent - topic MQPUT/MQPUT1 count": "non_persistent_topic_mqput_mqput1_count", - "STATMQI/PUBLISH/Persistent - topic MQPUT/MQPUT1 count": "persistent_topic_mqput_mqput1_count", - "STATMQI/PUBLISH/Failed topic MQPUT/MQPUT1 count": "failed_topic_mqput_mqput1_count", - "STATMQI/PUBLISH/Topic MQPUT/MQPUT1 interval total": "topic_mqput_mqput1_interval_count", - "STATMQI/CONNDISC/MQCONN/MQCONNX count": "mqconn_mqconnx_count", - "STATMQI/CONNDISC/Failed MQCONN/MQCONNX count": "failed_mqconn_mqconnx_count", - "STATMQI/CONNDISC/Concurrent connections - high water mark": "concurrent_connections_high_water_mark_count", - "STATMQI/CONNDISC/MQDISC count": "mqdisc_count", - "STATMQI/OPENCLOSE/MQOPEN count": "mqopen_count", - "STATMQI/OPENCLOSE/Failed MQOPEN count": "failed_mqopen_count", - "STATMQI/OPENCLOSE/MQCLOSE count": "mqclose_count", - "STATMQI/OPENCLOSE/Failed MQCLOSE count": "failed_mqclose_count", - "STATMQI/INQSET/MQINQ count": "mqinq_count", - "STATMQI/INQSET/Failed MQINQ count": "failed_mqinq_count", - "STATMQI/INQSET/MQSET count": "mqset_count", - "STATMQI/INQSET/Failed MQSET count": "failed_mqset_count", - "STATMQI/PUT/Interval total MQPUT/MQPUT1 byte count": "interval_total_mqput_mqput1_bytes", - "STATMQI/PUT/Persistent message MQPUT count": "persistent_message_mqput_count", - "STATMQI/PUT/Failed MQPUT count": "failed_mqput_count", - "STATMQI/PUT/Non-persistent message MQPUT1 count": "non_persistent_message_mqput1_count", - "STATMQI/PUT/Persistent message MQPUT1 count": "persistent_message_mqput1_count", - "STATMQI/PUT/Failed MQPUT1 count": "failed_mqput1_count", - "STATMQI/PUT/Put non-persistent messages - byte count": "put_non_persistent_messages_bytes", - "STATMQI/PUT/Interval total MQPUT/MQPUT1 count": "interval_total_mqput_mqput1_count", - "STATMQI/PUT/Put persistent messages - byte count": "put_persistent_messages_bytes", - "STATMQI/PUT/MQSTAT count": "mqstat_count", - "STATMQI/PUT/Non-persistent message MQPUT count": "non_persistent_message_mqput_count", - "STATMQI/GET/Interval total destructive get- count": "interval_total_destructive_get_count", - "STATMQI/GET/MQCTL count": "mqctl_count", - "STATMQI/GET/Failed MQGET - count": "failed_mqget_count", - "STATMQI/GET/Got non-persistent messages - byte count": "got_non_persistent_messages_bytes", - "STATMQI/GET/Persistent message browse - count": "persistent_message_browse_count", - "STATMQI/GET/Expired message count": "expired_message_count", - "STATMQI/GET/Purged queue count": "purged_queue_count", - "STATMQI/GET/Interval total destructive get - byte count": "interval_total_destructive_get_bytes", - "STATMQI/GET/Non-persistent message destructive get - count": "non_persistent_message_destructive_get_count", - "STATMQI/GET/Got persistent messages - byte count": "got_persistent_messages_bytes", - "STATMQI/GET/Non-persistent message browse - count": "non_persistent_message_browse_count", - "STATMQI/GET/Failed browse count": "failed_browse_count", - "STATMQI/GET/Persistent message destructive get - count": "persistent_message_destructive_get_count", - "STATMQI/GET/Non-persistent message browse - byte count": "non_persistent_message_browse_bytes", - "STATMQI/GET/Persistent message browse - byte count": "persistent_message_browse_bytes", - "STATMQI/GET/MQCB count": "mqcb_count", - "STATMQI/GET/Failed MQCB count": "failed_mqcb_count", - "STATMQI/SYNCPOINT/Commit count": "commit_count", - "STATMQI/SYNCPOINT/Rollback count": "rollback_count", + metricNamesMap := map[string]metricLookup{ + "CPU/SystemSummary/CPU load - one minute average": setMetricName("cpu_load_one_minute_average_percentage", true), + "CPU/SystemSummary/CPU load - five minute average": setMetricName("cpu_load_five_minute_average_percentage", true), + "CPU/SystemSummary/CPU load - fifteen minute average": setMetricName("cpu_load_fifteen_minute_average_percentage", true), + "CPU/SystemSummary/System CPU time percentage": setMetricName("system_cpu_time_percentage", true), + "CPU/SystemSummary/User CPU time percentage": setMetricName("user_cpu_time_percentage", true), + "CPU/SystemSummary/RAM free percentage": setMetricName("ram_free_percentage", true), + "CPU/SystemSummary/RAM total bytes": setMetricName("system_ram_size_bytes", true), + "CPU/QMgrSummary/System CPU time - percentage estimate for queue manager": setMetricName("system_cpu_time_estimate_for_queue_manager_percentage", true), + "CPU/QMgrSummary/User CPU time - percentage estimate for queue manager": setMetricName("user_cpu_time_estimate_for_queue_manager_percentage", true), + "CPU/QMgrSummary/RAM total bytes - estimate for queue manager": setMetricName("ram_usage_estimate_for_queue_manager_bytes", true), + "DISK/SystemSummary/MQ trace file system - free space": setMetricName("trace_file_system_free_space_percentage", true), + "DISK/SystemSummary/MQ trace file system - bytes in use": setMetricName("trace_file_system_in_use_bytes", true), + "DISK/SystemSummary/MQ errors file system - free space": setMetricName("errors_file_system_free_space_percentage", true), + "DISK/SystemSummary/MQ errors file system - bytes in use": setMetricName("errors_file_system_in_use_bytes", true), + "DISK/SystemSummary/MQ FDC file count": setMetricName("fdc_files", true), + "DISK/QMgrSummary/Queue Manager file system - free space": setMetricName("queue_manager_file_system_free_space_percentage", true), + "DISK/QMgrSummary/Queue Manager file system - bytes in use": setMetricName("queue_manager_file_system_in_use_bytes", true), + "DISK/Log/Log - logical bytes written": setMetricName("log_logical_written_bytes_interval_total", true), + "DISK/Log/Log - physical bytes written": setMetricName("log_physical_written_bytes_interval_total", true), + "DISK/Log/Log - current primary space in use": setMetricName("log_primary_space_in_use_percentage", true), + "DISK/Log/Log - workload primary space utilization": setMetricName("log_workload_primary_space_utilization_percentage", true), + "DISK/Log/Log - write latency": setMetricName("log_write_latency_seconds", true), + "DISK/Log/Log - bytes max": setMetricName("log_max_bytes", true), + "DISK/Log/Log - write size": setMetricName("log_write_size_bytes", true), + "DISK/Log/Log - bytes in use": setMetricName("log_in_use_bytes", true), + "DISK/Log/Log file system - bytes max": setMetricName("log_file_system_max_bytes", true), + "DISK/Log/Log file system - bytes in use": setMetricName("log_file_system_in_use_bytes", true), + "DISK/Log/Log - bytes occupied by reusable extents": setMetricName("log_occupied_by_reusable_extents_bytes", true), + "DISK/Log/Log - bytes occupied by extents waiting to be archived": setMetricName("log_occupied_by_extents_waiting_to_be_archived_bytes", true), + "DISK/Log/Log - bytes required for media recovery": setMetricName("log_required_for_media_recovery_bytes", true), + "STATMQI/SUBSCRIBE/Create durable subscription count": setMetricName("durable_subscription_create_interval_total", true), + "STATMQI/SUBSCRIBE/Alter durable subscription count": setMetricName("durable_subscription_alter_interval_total", true), + "STATMQI/SUBSCRIBE/Resume durable subscription count": setMetricName("durable_subscription_resume_interval_total", true), + "STATMQI/SUBSCRIBE/Delete durable subscription count": setMetricName("durable_subscription_delete_interval_total", true), + "STATMQI/SUBSCRIBE/Create non-durable subscription count": setMetricName("non_durable_subscription_create_interval_total", true), + "STATMQI/SUBSCRIBE/Delete non-durable subscription count": setMetricName("non_durable_subscription_delete_interval_total", true), + "STATMQI/SUBSCRIBE/Failed create/alter/resume subscription count": setMetricName("failed_subscription_create_alter_resume_interval_total", true), + "STATMQI/SUBSCRIBE/Subscription delete failure count": setMetricName("failed_subscription_delete_interval_total", true), + "STATMQI/SUBSCRIBE/MQSUBRQ count": setMetricName("mqsubrq_interval_total", true), + "STATMQI/SUBSCRIBE/Failed MQSUBRQ count": setMetricName("failed_mqsubrq_interval_total", true), + "STATMQI/SUBSCRIBE/Durable subscriber - high water mark": setMetricName("durable_subscriber_high_water_mark", true), + "STATMQI/SUBSCRIBE/Durable subscriber - low water mark": setMetricName("durable_subscriber_low_water_mark", true), + "STATMQI/SUBSCRIBE/Non-durable subscriber - high water mark": setMetricName("non_durable_subscriber_high_water_mark", true), + "STATMQI/SUBSCRIBE/Non-durable subscriber - low water mark": setMetricName("non_durable_subscriber_low_water_mark", true), + "STATMQI/PUBLISH/Topic MQPUT/MQPUT1 interval total": setMetricName("topic_mqput_mqput1_interval_total", true), + "STATMQI/PUBLISH/Interval total topic bytes put": setMetricName("topic_put_bytes_interval_total", true), + "STATMQI/PUBLISH/Failed topic MQPUT/MQPUT1 count": setMetricName("failed_topic_mqput_mqput1_interval_total", true), + "STATMQI/PUBLISH/Persistent - topic MQPUT/MQPUT1 count": setMetricName("persistent_topic_mqput_mqput1_interval_total", true), + "STATMQI/PUBLISH/Non-persistent - topic MQPUT/MQPUT1 count": setMetricName("non_persistent_topic_mqput_mqput1_interval_total", true), + "STATMQI/PUBLISH/Published to subscribers - message count": setMetricName("published_to_subscribers_message_interval_total", true), + "STATMQI/PUBLISH/Published to subscribers - byte count": setMetricName("published_to_subscribers_bytes_interval_total", true), + "STATMQI/CONNDISC/MQCONN/MQCONNX count": setMetricName("mqconn_mqconnx_interval_total", true), + "STATMQI/CONNDISC/Failed MQCONN/MQCONNX count": setMetricName("failed_mqconn_mqconnx_interval_total", true), + "STATMQI/CONNDISC/MQDISC count": setMetricName("mqdisc_interval_total", true), + "STATMQI/CONNDISC/Concurrent connections - high water mark": setMetricName("concurrent_connections_high_water_mark", true), + "STATMQI/OPENCLOSE/MQOPEN count": setMetricName("mqopen_interval_total", true), + "STATMQI/OPENCLOSE/Failed MQOPEN count": setMetricName("failed_mqopen_interval_total", true), + "STATMQI/OPENCLOSE/MQCLOSE count": setMetricName("mqclose_interval_total", true), + "STATMQI/OPENCLOSE/Failed MQCLOSE count": setMetricName("failed_mqclose_interval_total", true), + "STATMQI/INQSET/MQINQ count": setMetricName("mqinq_interval_total", true), + "STATMQI/INQSET/Failed MQINQ count": setMetricName("failed_mqinq_interval_total", true), + "STATMQI/INQSET/MQSET count": setMetricName("mqset_interval_total", true), + "STATMQI/INQSET/Failed MQSET count": setMetricName("failed_mqset_interval_total", true), + "STATMQI/PUT/Persistent message MQPUT count": setMetricName("persistent_message_mqput_interval_total", true), + "STATMQI/PUT/Persistent message MQPUT1 count": setMetricName("persistent_message_mqput1_interval_total", true), + "STATMQI/PUT/Put persistent messages - byte count": setMetricName("persistent_message_put_bytes_interval_total", true), + "STATMQI/PUT/Non-persistent message MQPUT count": setMetricName("non_persistent_message_mqput_interval_total", true), + "STATMQI/PUT/Non-persistent message MQPUT1 count": setMetricName("non_persistent_message_mqput1_interval_total", true), + "STATMQI/PUT/Put non-persistent messages - byte count": setMetricName("non_persistent_message_put_bytes_interval_total", true), + "STATMQI/PUT/Interval total MQPUT/MQPUT1 count": setMetricName("mqput_mqput1_interval_total", true), + "STATMQI/PUT/Interval total MQPUT/MQPUT1 byte count": setMetricName("mqput_mqput1_bytes_interval_total", true), + "STATMQI/PUT/Failed MQPUT count": setMetricName("failed_mqput_interval_total", true), + "STATMQI/PUT/Failed MQPUT1 count": setMetricName("failed_mqput1_interval_total", true), + "STATMQI/PUT/MQSTAT count": setMetricName("mqstat_interval_total", true), + "STATMQI/GET/Persistent message destructive get - count": setMetricName("persistent_message_destructive_get_interval_total", true), + "STATMQI/GET/Persistent message browse - count": setMetricName("persistent_message_browse_interval_total", true), + "STATMQI/GET/Got persistent messages - byte count": setMetricName("persistent_message_get_bytes_interval_total", true), + "STATMQI/GET/Persistent message browse - byte count": setMetricName("persistent_message_browse_bytes_interval_total", true), + "STATMQI/GET/Non-persistent message destructive get - count": setMetricName("non_persistent_message_destructive_get_interval_total", true), + "STATMQI/GET/Non-persistent message browse - count": setMetricName("non_persistent_message_browse_interval_total", true), + "STATMQI/GET/Got non-persistent messages - byte count": setMetricName("non_persistent_message_get_bytes_interval_total", true), + "STATMQI/GET/Non-persistent message browse - byte count": setMetricName("non_persistent_message_browse_bytes_interval_total", true), + "STATMQI/GET/Interval total destructive get- count": setMetricName("destructive_get_interval_total", true), + "STATMQI/GET/Interval total destructive get - byte count": setMetricName("destructive_get_bytes_interval_total", true), + "STATMQI/GET/Failed MQGET - count": setMetricName("failed_mqget_interval_total", true), + "STATMQI/GET/Failed browse count": setMetricName("failed_browse_interval_total", true), + "STATMQI/GET/MQCTL count": setMetricName("mqctl_interval_total", true), + "STATMQI/GET/Expired message count": setMetricName("expired_message_interval_total", true), + "STATMQI/GET/Purged queue count": setMetricName("purged_queue_interval_total", true), + "STATMQI/GET/MQCB count": setMetricName("mqcb_interval_total", true), + "STATMQI/GET/Failed MQCB count": setMetricName("failed_mqcb_interval_total", true), + "STATMQI/SYNCPOINT/Commit count": setMetricName("commit_interval_total", true), + "STATMQI/SYNCPOINT/Rollback count": setMetricName("rollback_interval_total", true), } return metricNamesMap } + +// setMetricName sets the metric name & specifies if the metric is enabled +func setMetricName(name string, enabled bool) metricLookup { + return metricLookup{ + name: name, + enabled: enabled, + } +} diff --git a/internal/metrics/mapping_test.go b/internal/metrics/mapping_test.go index 1c4cafab..5cb707fc 100644 --- a/internal/metrics/mapping_test.go +++ b/internal/metrics/mapping_test.go @@ -30,8 +30,8 @@ func TestGenerateMetricNamesMap(t *testing.T) { if !ok { t.Errorf("No metric name mapping found for %s", testKey1) } else { - if actual != testElement1Name { - t.Errorf("Expected metric name=%s; actual %s", testElement1Name, actual) + if actual.name != testElement1Name { + t.Errorf("Expected metric name=%s; actual %s", testElement1Name, actual.name) } } } diff --git a/internal/metrics/update.go b/internal/metrics/update.go index 0a443d2e..b8429b50 100644 --- a/internal/metrics/update.go +++ b/internal/metrics/update.go @@ -143,23 +143,29 @@ func initialiseMetrics(log *logger.Logger) (map[string]*metricData, error) { key := makeKey(metricElement) // Get metric name from mapping - if metricName, found := metricNamesMap[key]; found { - - // Set metric details - metric := metricData{ - name: metricName, - description: metricElement.Description, - } - - // Add metric - if _, exists := metrics[key]; !exists { - metrics[key] = &metric + if metricLookup, found := metricNamesMap[key]; found { + + // Check if metric is enabled + if metricLookup.enabled { + + // Set metric details + metric := metricData{ + name: metricLookup.name, + description: metricElement.Description, + } + + // Add metric + if _, exists := metrics[key]; !exists { + metrics[key] = &metric + } else { + log.Errorf("Metrics Error: Found duplicate metric key [%s]", key) + validMetrics = false + } } else { - log.Errorf("Metrics Error: Found duplicate metric key %s", key) - validMetrics = false + log.Debugf("Metrics: Skipping metric, metric is not enabled for key [%s]", key) } } else { - log.Errorf("Metrics Error: Skipping metric, unexpected key %s", key) + log.Errorf("Metrics Error: Skipping metric, unexpected key [%s]", key) validMetrics = false } } diff --git a/test/docker/mqmetric_test_util.go b/test/docker/mqmetric_test_util.go index 9a1af05f..cfe42a22 100644 --- a/test/docker/mqmetric_test_util.go +++ b/test/docker/mqmetric_test_util.go @@ -164,96 +164,96 @@ func metricNames() []string { // - log_required_for_media_recovery_bytes names := []string{ + "cpu_load_one_minute_average_percentage", "cpu_load_five_minute_average_percentage", "cpu_load_fifteen_minute_average_percentage", - "ram_free_percentage", - "ram_total_bytes", - "user_cpu_time_percentage", "system_cpu_time_percentage", - "cpu_load_one_minute_average_percentage", + "user_cpu_time_percentage", + "ram_free_percentage", + "system_ram_size_bytes", "system_cpu_time_estimate_for_queue_manager_percentage", - "ram_total_estimate_for_queue_manager_bytes", "user_cpu_time_estimate_for_queue_manager_percentage", - "mq_trace_file_system_in_use_bytes", - "mq_trace_file_system_free_space_percentage", - "mq_errors_file_system_in_use_bytes", - "mq_errors_file_system_free_space_percentage", - "mq_fdc_file_count", - "queue_manager_file_system_in_use_bytes", + "ram_usage_estimate_for_queue_manager_bytes", + "trace_file_system_free_space_percentage", + "trace_file_system_in_use_bytes", + "errors_file_system_free_space_percentage", + "errors_file_system_in_use_bytes", + "fdc_files", "queue_manager_file_system_free_space_percentage", - "log_write_size_bytes", - "log_in_use_bytes", - "log_logical_written_bytes", - "log_write_latency_seconds", - "log_current_primary_space_in_use_percentage", + "queue_manager_file_system_in_use_bytes", + "log_logical_written_bytes_interval_total", + "log_physical_written_bytes_interval_total", + "log_primary_space_in_use_percentage", "log_workload_primary_space_utilization_percentage", + "log_write_latency_seconds", "log_max_bytes", - "log_file_system_in_use_bytes", + "log_write_size_bytes", + "log_in_use_bytes", "log_file_system_max_bytes", - "log_physical_written_bytes", - "create_durable_subscription_count", - "resume_durable_subscription_count", - "create_non_durable_subscription_count", - "failed_create_alter_resume_subscription_count", - "subscription_delete_failure_count", - "mqsubrq_count", - "failed_mqsubrq_count", - "durable_subscriber_high_water_mark_count", - "non_durable_subscriber_high_water_mark_count", - "durable_subscriber_low_water_mark_count", - "delete_non_durable_subscription_count", - "alter_durable_subscription_count", - "delete_durable_subscription_count", - "non_durable_subscriber_low_water_mark_count", - "interval_total_topic_put_bytes", - "published_to_subscribers_message_count", - "published_to_subscribers_bytes", - "non_persistent_topic_mqput_mqput1_count", - "persistent_topic_mqput_mqput1_count", - "failed_topic_mqput_mqput1_count", - "topic_mqput_mqput1_interval_count", - "mqconn_mqconnx_count", - "failed_mqconn_mqconnx_count", - "concurrent_connections_high_water_mark_count", - "mqdisc_count", - "mqopen_count", - "failed_mqopen_count", - "mqclose_count", - "failed_mqclose_count", - "mqinq_count", - "failed_mqinq_count", - "mqset_count", - "failed_mqset_count", - "interval_total_mqput_mqput1_bytes", - "persistent_message_mqput_count", - "failed_mqput_count", - "non_persistent_message_mqput1_count", - "persistent_message_mqput1_count", - "failed_mqput1_count", - "put_non_persistent_messages_bytes", - "interval_total_mqput_mqput1_count", - "put_persistent_messages_bytes", - "mqstat_count", - "non_persistent_message_mqput_count", - "interval_total_destructive_get_count", - "mqctl_count", - "failed_mqget_count", - "got_non_persistent_messages_bytes", - "persistent_message_browse_count", - "expired_message_count", - "purged_queue_count", - "interval_total_destructive_get_bytes", - "non_persistent_message_destructive_get_count", - "got_persistent_messages_bytes", - "non_persistent_message_browse_count", - "failed_browse_count", - "persistent_message_destructive_get_count", - "non_persistent_message_browse_bytes", - "persistent_message_browse_bytes", - "mqcb_count", - "failed_mqcb_count", - "commit_count", - "rollback_count", + "log_file_system_in_use_bytes", + "durable_subscription_create_interval_total", + "durable_subscription_alter_interval_total", + "durable_subscription_resume_interval_total", + "durable_subscription_delete_interval_total", + "non_durable_subscription_create_interval_total", + "non_durable_subscription_delete_interval_total", + "failed_subscription_create_alter_resume_interval_total", + "failed_subscription_delete_interval_total", + "mqsubrq_interval_total", + "failed_mqsubrq_interval_total", + "durable_subscriber_high_water_mark", + "durable_subscriber_low_water_mark", + "non_durable_subscriber_high_water_mark", + "non_durable_subscriber_low_water_mark", + "topic_mqput_mqput1_interval_total", + "topic_put_bytes_interval_total", + "failed_topic_mqput_mqput1_interval_total", + "persistent_topic_mqput_mqput1_interval_total", + "non_persistent_topic_mqput_mqput1_interval_total", + "published_to_subscribers_message_interval_total", + "published_to_subscribers_bytes_interval_total", + "mqconn_mqconnx_interval_total", + "failed_mqconn_mqconnx_interval_total", + "mqdisc_interval_total", + "concurrent_connections_high_water_mark", + "mqopen_interval_total", + "failed_mqopen_interval_total", + "mqclose_interval_total", + "failed_mqclose_interval_total", + "mqinq_interval_total", + "failed_mqinq_interval_total", + "mqset_interval_total", + "failed_mqset_interval_total", + "persistent_message_mqput_interval_total", + "persistent_message_mqput1_interval_total", + "persistent_message_put_bytes_interval_total", + "non_persistent_message_mqput_interval_total", + "non_persistent_message_mqput1_interval_total", + "non_persistent_message_put_bytes_interval_total", + "mqput_mqput1_interval_total", + "mqput_mqput1_bytes_interval_total", + "failed_mqput_interval_total", + "failed_mqput1_interval_total", + "mqstat_interval_total", + "persistent_message_destructive_get_interval_total", + "persistent_message_browse_interval_total", + "persistent_message_get_bytes_interval_total", + "persistent_message_browse_bytes_interval_total", + "non_persistent_message_destructive_get_interval_total", + "non_persistent_message_browse_interval_total", + "non_persistent_message_get_bytes_interval_total", + "non_persistent_message_browse_bytes_interval_total", + "destructive_get_interval_total", + "destructive_get_bytes_interval_total", + "failed_mqget_interval_total", + "failed_browse_interval_total", + "mqctl_interval_total", + "expired_message_interval_total", + "purged_queue_interval_total", + "mqcb_interval_total", + "failed_mqcb_interval_total", + "commit_interval_total", + "rollback_interval_total", } return names } From a1ae6b3a9732d538fd7fbc935f2aa1d37d11f237 Mon Sep 17 00:00:00 2001 From: Stephen Marshall Date: Mon, 18 Jun 2018 14:28:46 +0100 Subject: [PATCH 06/12] Improve metrics name mapping code --- internal/metrics/mapping.go | 194 +++++++++++++++++------------------- 1 file changed, 93 insertions(+), 101 deletions(-) diff --git a/internal/metrics/mapping.go b/internal/metrics/mapping.go index 439fb894..646a9395 100644 --- a/internal/metrics/mapping.go +++ b/internal/metrics/mapping.go @@ -26,107 +26,99 @@ type metricLookup struct { func generateMetricNamesMap() map[string]metricLookup { metricNamesMap := map[string]metricLookup{ - "CPU/SystemSummary/CPU load - one minute average": setMetricName("cpu_load_one_minute_average_percentage", true), - "CPU/SystemSummary/CPU load - five minute average": setMetricName("cpu_load_five_minute_average_percentage", true), - "CPU/SystemSummary/CPU load - fifteen minute average": setMetricName("cpu_load_fifteen_minute_average_percentage", true), - "CPU/SystemSummary/System CPU time percentage": setMetricName("system_cpu_time_percentage", true), - "CPU/SystemSummary/User CPU time percentage": setMetricName("user_cpu_time_percentage", true), - "CPU/SystemSummary/RAM free percentage": setMetricName("ram_free_percentage", true), - "CPU/SystemSummary/RAM total bytes": setMetricName("system_ram_size_bytes", true), - "CPU/QMgrSummary/System CPU time - percentage estimate for queue manager": setMetricName("system_cpu_time_estimate_for_queue_manager_percentage", true), - "CPU/QMgrSummary/User CPU time - percentage estimate for queue manager": setMetricName("user_cpu_time_estimate_for_queue_manager_percentage", true), - "CPU/QMgrSummary/RAM total bytes - estimate for queue manager": setMetricName("ram_usage_estimate_for_queue_manager_bytes", true), - "DISK/SystemSummary/MQ trace file system - free space": setMetricName("trace_file_system_free_space_percentage", true), - "DISK/SystemSummary/MQ trace file system - bytes in use": setMetricName("trace_file_system_in_use_bytes", true), - "DISK/SystemSummary/MQ errors file system - free space": setMetricName("errors_file_system_free_space_percentage", true), - "DISK/SystemSummary/MQ errors file system - bytes in use": setMetricName("errors_file_system_in_use_bytes", true), - "DISK/SystemSummary/MQ FDC file count": setMetricName("fdc_files", true), - "DISK/QMgrSummary/Queue Manager file system - free space": setMetricName("queue_manager_file_system_free_space_percentage", true), - "DISK/QMgrSummary/Queue Manager file system - bytes in use": setMetricName("queue_manager_file_system_in_use_bytes", true), - "DISK/Log/Log - logical bytes written": setMetricName("log_logical_written_bytes_interval_total", true), - "DISK/Log/Log - physical bytes written": setMetricName("log_physical_written_bytes_interval_total", true), - "DISK/Log/Log - current primary space in use": setMetricName("log_primary_space_in_use_percentage", true), - "DISK/Log/Log - workload primary space utilization": setMetricName("log_workload_primary_space_utilization_percentage", true), - "DISK/Log/Log - write latency": setMetricName("log_write_latency_seconds", true), - "DISK/Log/Log - bytes max": setMetricName("log_max_bytes", true), - "DISK/Log/Log - write size": setMetricName("log_write_size_bytes", true), - "DISK/Log/Log - bytes in use": setMetricName("log_in_use_bytes", true), - "DISK/Log/Log file system - bytes max": setMetricName("log_file_system_max_bytes", true), - "DISK/Log/Log file system - bytes in use": setMetricName("log_file_system_in_use_bytes", true), - "DISK/Log/Log - bytes occupied by reusable extents": setMetricName("log_occupied_by_reusable_extents_bytes", true), - "DISK/Log/Log - bytes occupied by extents waiting to be archived": setMetricName("log_occupied_by_extents_waiting_to_be_archived_bytes", true), - "DISK/Log/Log - bytes required for media recovery": setMetricName("log_required_for_media_recovery_bytes", true), - "STATMQI/SUBSCRIBE/Create durable subscription count": setMetricName("durable_subscription_create_interval_total", true), - "STATMQI/SUBSCRIBE/Alter durable subscription count": setMetricName("durable_subscription_alter_interval_total", true), - "STATMQI/SUBSCRIBE/Resume durable subscription count": setMetricName("durable_subscription_resume_interval_total", true), - "STATMQI/SUBSCRIBE/Delete durable subscription count": setMetricName("durable_subscription_delete_interval_total", true), - "STATMQI/SUBSCRIBE/Create non-durable subscription count": setMetricName("non_durable_subscription_create_interval_total", true), - "STATMQI/SUBSCRIBE/Delete non-durable subscription count": setMetricName("non_durable_subscription_delete_interval_total", true), - "STATMQI/SUBSCRIBE/Failed create/alter/resume subscription count": setMetricName("failed_subscription_create_alter_resume_interval_total", true), - "STATMQI/SUBSCRIBE/Subscription delete failure count": setMetricName("failed_subscription_delete_interval_total", true), - "STATMQI/SUBSCRIBE/MQSUBRQ count": setMetricName("mqsubrq_interval_total", true), - "STATMQI/SUBSCRIBE/Failed MQSUBRQ count": setMetricName("failed_mqsubrq_interval_total", true), - "STATMQI/SUBSCRIBE/Durable subscriber - high water mark": setMetricName("durable_subscriber_high_water_mark", true), - "STATMQI/SUBSCRIBE/Durable subscriber - low water mark": setMetricName("durable_subscriber_low_water_mark", true), - "STATMQI/SUBSCRIBE/Non-durable subscriber - high water mark": setMetricName("non_durable_subscriber_high_water_mark", true), - "STATMQI/SUBSCRIBE/Non-durable subscriber - low water mark": setMetricName("non_durable_subscriber_low_water_mark", true), - "STATMQI/PUBLISH/Topic MQPUT/MQPUT1 interval total": setMetricName("topic_mqput_mqput1_interval_total", true), - "STATMQI/PUBLISH/Interval total topic bytes put": setMetricName("topic_put_bytes_interval_total", true), - "STATMQI/PUBLISH/Failed topic MQPUT/MQPUT1 count": setMetricName("failed_topic_mqput_mqput1_interval_total", true), - "STATMQI/PUBLISH/Persistent - topic MQPUT/MQPUT1 count": setMetricName("persistent_topic_mqput_mqput1_interval_total", true), - "STATMQI/PUBLISH/Non-persistent - topic MQPUT/MQPUT1 count": setMetricName("non_persistent_topic_mqput_mqput1_interval_total", true), - "STATMQI/PUBLISH/Published to subscribers - message count": setMetricName("published_to_subscribers_message_interval_total", true), - "STATMQI/PUBLISH/Published to subscribers - byte count": setMetricName("published_to_subscribers_bytes_interval_total", true), - "STATMQI/CONNDISC/MQCONN/MQCONNX count": setMetricName("mqconn_mqconnx_interval_total", true), - "STATMQI/CONNDISC/Failed MQCONN/MQCONNX count": setMetricName("failed_mqconn_mqconnx_interval_total", true), - "STATMQI/CONNDISC/MQDISC count": setMetricName("mqdisc_interval_total", true), - "STATMQI/CONNDISC/Concurrent connections - high water mark": setMetricName("concurrent_connections_high_water_mark", true), - "STATMQI/OPENCLOSE/MQOPEN count": setMetricName("mqopen_interval_total", true), - "STATMQI/OPENCLOSE/Failed MQOPEN count": setMetricName("failed_mqopen_interval_total", true), - "STATMQI/OPENCLOSE/MQCLOSE count": setMetricName("mqclose_interval_total", true), - "STATMQI/OPENCLOSE/Failed MQCLOSE count": setMetricName("failed_mqclose_interval_total", true), - "STATMQI/INQSET/MQINQ count": setMetricName("mqinq_interval_total", true), - "STATMQI/INQSET/Failed MQINQ count": setMetricName("failed_mqinq_interval_total", true), - "STATMQI/INQSET/MQSET count": setMetricName("mqset_interval_total", true), - "STATMQI/INQSET/Failed MQSET count": setMetricName("failed_mqset_interval_total", true), - "STATMQI/PUT/Persistent message MQPUT count": setMetricName("persistent_message_mqput_interval_total", true), - "STATMQI/PUT/Persistent message MQPUT1 count": setMetricName("persistent_message_mqput1_interval_total", true), - "STATMQI/PUT/Put persistent messages - byte count": setMetricName("persistent_message_put_bytes_interval_total", true), - "STATMQI/PUT/Non-persistent message MQPUT count": setMetricName("non_persistent_message_mqput_interval_total", true), - "STATMQI/PUT/Non-persistent message MQPUT1 count": setMetricName("non_persistent_message_mqput1_interval_total", true), - "STATMQI/PUT/Put non-persistent messages - byte count": setMetricName("non_persistent_message_put_bytes_interval_total", true), - "STATMQI/PUT/Interval total MQPUT/MQPUT1 count": setMetricName("mqput_mqput1_interval_total", true), - "STATMQI/PUT/Interval total MQPUT/MQPUT1 byte count": setMetricName("mqput_mqput1_bytes_interval_total", true), - "STATMQI/PUT/Failed MQPUT count": setMetricName("failed_mqput_interval_total", true), - "STATMQI/PUT/Failed MQPUT1 count": setMetricName("failed_mqput1_interval_total", true), - "STATMQI/PUT/MQSTAT count": setMetricName("mqstat_interval_total", true), - "STATMQI/GET/Persistent message destructive get - count": setMetricName("persistent_message_destructive_get_interval_total", true), - "STATMQI/GET/Persistent message browse - count": setMetricName("persistent_message_browse_interval_total", true), - "STATMQI/GET/Got persistent messages - byte count": setMetricName("persistent_message_get_bytes_interval_total", true), - "STATMQI/GET/Persistent message browse - byte count": setMetricName("persistent_message_browse_bytes_interval_total", true), - "STATMQI/GET/Non-persistent message destructive get - count": setMetricName("non_persistent_message_destructive_get_interval_total", true), - "STATMQI/GET/Non-persistent message browse - count": setMetricName("non_persistent_message_browse_interval_total", true), - "STATMQI/GET/Got non-persistent messages - byte count": setMetricName("non_persistent_message_get_bytes_interval_total", true), - "STATMQI/GET/Non-persistent message browse - byte count": setMetricName("non_persistent_message_browse_bytes_interval_total", true), - "STATMQI/GET/Interval total destructive get- count": setMetricName("destructive_get_interval_total", true), - "STATMQI/GET/Interval total destructive get - byte count": setMetricName("destructive_get_bytes_interval_total", true), - "STATMQI/GET/Failed MQGET - count": setMetricName("failed_mqget_interval_total", true), - "STATMQI/GET/Failed browse count": setMetricName("failed_browse_interval_total", true), - "STATMQI/GET/MQCTL count": setMetricName("mqctl_interval_total", true), - "STATMQI/GET/Expired message count": setMetricName("expired_message_interval_total", true), - "STATMQI/GET/Purged queue count": setMetricName("purged_queue_interval_total", true), - "STATMQI/GET/MQCB count": setMetricName("mqcb_interval_total", true), - "STATMQI/GET/Failed MQCB count": setMetricName("failed_mqcb_interval_total", true), - "STATMQI/SYNCPOINT/Commit count": setMetricName("commit_interval_total", true), - "STATMQI/SYNCPOINT/Rollback count": setMetricName("rollback_interval_total", true), + "CPU/SystemSummary/CPU load - one minute average": metricLookup{"cpu_load_one_minute_average_percentage", true}, + "CPU/SystemSummary/CPU load - five minute average": metricLookup{"cpu_load_five_minute_average_percentage", true}, + "CPU/SystemSummary/CPU load - fifteen minute average": metricLookup{"cpu_load_fifteen_minute_average_percentage", true}, + "CPU/SystemSummary/System CPU time percentage": metricLookup{"system_cpu_time_percentage", true}, + "CPU/SystemSummary/User CPU time percentage": metricLookup{"user_cpu_time_percentage", true}, + "CPU/SystemSummary/RAM free percentage": metricLookup{"ram_free_percentage", true}, + "CPU/SystemSummary/RAM total bytes": metricLookup{"system_ram_size_bytes", true}, + "CPU/QMgrSummary/System CPU time - percentage estimate for queue manager": metricLookup{"system_cpu_time_estimate_for_queue_manager_percentage", true}, + "CPU/QMgrSummary/User CPU time - percentage estimate for queue manager": metricLookup{"user_cpu_time_estimate_for_queue_manager_percentage", true}, + "CPU/QMgrSummary/RAM total bytes - estimate for queue manager": metricLookup{"ram_usage_estimate_for_queue_manager_bytes", true}, + "DISK/SystemSummary/MQ trace file system - free space": metricLookup{"trace_file_system_free_space_percentage", true}, + "DISK/SystemSummary/MQ trace file system - bytes in use": metricLookup{"trace_file_system_in_use_bytes", true}, + "DISK/SystemSummary/MQ errors file system - free space": metricLookup{"errors_file_system_free_space_percentage", true}, + "DISK/SystemSummary/MQ errors file system - bytes in use": metricLookup{"errors_file_system_in_use_bytes", true}, + "DISK/SystemSummary/MQ FDC file count": metricLookup{"fdc_files", true}, + "DISK/QMgrSummary/Queue Manager file system - free space": metricLookup{"queue_manager_file_system_free_space_percentage", true}, + "DISK/QMgrSummary/Queue Manager file system - bytes in use": metricLookup{"queue_manager_file_system_in_use_bytes", true}, + "DISK/Log/Log - logical bytes written": metricLookup{"log_logical_written_bytes_interval_total", true}, + "DISK/Log/Log - physical bytes written": metricLookup{"log_physical_written_bytes_interval_total", true}, + "DISK/Log/Log - current primary space in use": metricLookup{"log_primary_space_in_use_percentage", true}, + "DISK/Log/Log - workload primary space utilization": metricLookup{"log_workload_primary_space_utilization_percentage", true}, + "DISK/Log/Log - write latency": metricLookup{"log_write_latency_seconds", true}, + "DISK/Log/Log - bytes max": metricLookup{"log_max_bytes", true}, + "DISK/Log/Log - write size": metricLookup{"log_write_size_bytes", true}, + "DISK/Log/Log - bytes in use": metricLookup{"log_in_use_bytes", true}, + "DISK/Log/Log file system - bytes max": metricLookup{"log_file_system_max_bytes", true}, + "DISK/Log/Log file system - bytes in use": metricLookup{"log_file_system_in_use_bytes", true}, + "DISK/Log/Log - bytes occupied by reusable extents": metricLookup{"log_occupied_by_reusable_extents_bytes", true}, + "DISK/Log/Log - bytes occupied by extents waiting to be archived": metricLookup{"log_occupied_by_extents_waiting_to_be_archived_bytes", true}, + "DISK/Log/Log - bytes required for media recovery": metricLookup{"log_required_for_media_recovery_bytes", true}, + "STATMQI/SUBSCRIBE/Create durable subscription count": metricLookup{"durable_subscription_create_interval_total", true}, + "STATMQI/SUBSCRIBE/Alter durable subscription count": metricLookup{"durable_subscription_alter_interval_total", true}, + "STATMQI/SUBSCRIBE/Resume durable subscription count": metricLookup{"durable_subscription_resume_interval_total", true}, + "STATMQI/SUBSCRIBE/Delete durable subscription count": metricLookup{"durable_subscription_delete_interval_total", true}, + "STATMQI/SUBSCRIBE/Create non-durable subscription count": metricLookup{"non_durable_subscription_create_interval_total", true}, + "STATMQI/SUBSCRIBE/Delete non-durable subscription count": metricLookup{"non_durable_subscription_delete_interval_total", true}, + "STATMQI/SUBSCRIBE/Failed create/alter/resume subscription count": metricLookup{"failed_subscription_create_alter_resume_interval_total", true}, + "STATMQI/SUBSCRIBE/Subscription delete failure count": metricLookup{"failed_subscription_delete_interval_total", true}, + "STATMQI/SUBSCRIBE/MQSUBRQ count": metricLookup{"mqsubrq_interval_total", true}, + "STATMQI/SUBSCRIBE/Failed MQSUBRQ count": metricLookup{"failed_mqsubrq_interval_total", true}, + "STATMQI/SUBSCRIBE/Durable subscriber - high water mark": metricLookup{"durable_subscriber_high_water_mark", true}, + "STATMQI/SUBSCRIBE/Durable subscriber - low water mark": metricLookup{"durable_subscriber_low_water_mark", true}, + "STATMQI/SUBSCRIBE/Non-durable subscriber - high water mark": metricLookup{"non_durable_subscriber_high_water_mark", true}, + "STATMQI/SUBSCRIBE/Non-durable subscriber - low water mark": metricLookup{"non_durable_subscriber_low_water_mark", true}, + "STATMQI/PUBLISH/Topic MQPUT/MQPUT1 interval total": metricLookup{"topic_mqput_mqput1_interval_total", true}, + "STATMQI/PUBLISH/Interval total topic bytes put": metricLookup{"topic_put_bytes_interval_total", true}, + "STATMQI/PUBLISH/Failed topic MQPUT/MQPUT1 count": metricLookup{"failed_topic_mqput_mqput1_interval_total", true}, + "STATMQI/PUBLISH/Persistent - topic MQPUT/MQPUT1 count": metricLookup{"persistent_topic_mqput_mqput1_interval_total", true}, + "STATMQI/PUBLISH/Non-persistent - topic MQPUT/MQPUT1 count": metricLookup{"non_persistent_topic_mqput_mqput1_interval_total", true}, + "STATMQI/PUBLISH/Published to subscribers - message count": metricLookup{"published_to_subscribers_message_interval_total", true}, + "STATMQI/PUBLISH/Published to subscribers - byte count": metricLookup{"published_to_subscribers_bytes_interval_total", true}, + "STATMQI/CONNDISC/MQCONN/MQCONNX count": metricLookup{"mqconn_mqconnx_interval_total", true}, + "STATMQI/CONNDISC/Failed MQCONN/MQCONNX count": metricLookup{"failed_mqconn_mqconnx_interval_total", true}, + "STATMQI/CONNDISC/MQDISC count": metricLookup{"mqdisc_interval_total", true}, + "STATMQI/CONNDISC/Concurrent connections - high water mark": metricLookup{"concurrent_connections_high_water_mark", true}, + "STATMQI/OPENCLOSE/MQOPEN count": metricLookup{"mqopen_interval_total", true}, + "STATMQI/OPENCLOSE/Failed MQOPEN count": metricLookup{"failed_mqopen_interval_total", true}, + "STATMQI/OPENCLOSE/MQCLOSE count": metricLookup{"mqclose_interval_total", true}, + "STATMQI/OPENCLOSE/Failed MQCLOSE count": metricLookup{"failed_mqclose_interval_total", true}, + "STATMQI/INQSET/MQINQ count": metricLookup{"mqinq_interval_total", true}, + "STATMQI/INQSET/Failed MQINQ count": metricLookup{"failed_mqinq_interval_total", true}, + "STATMQI/INQSET/MQSET count": metricLookup{"mqset_interval_total", true}, + "STATMQI/INQSET/Failed MQSET count": metricLookup{"failed_mqset_interval_total", true}, + "STATMQI/PUT/Persistent message MQPUT count": metricLookup{"persistent_message_mqput_interval_total", true}, + "STATMQI/PUT/Persistent message MQPUT1 count": metricLookup{"persistent_message_mqput1_interval_total", true}, + "STATMQI/PUT/Put persistent messages - byte count": metricLookup{"persistent_message_put_bytes_interval_total", true}, + "STATMQI/PUT/Non-persistent message MQPUT count": metricLookup{"non_persistent_message_mqput_interval_total", true}, + "STATMQI/PUT/Non-persistent message MQPUT1 count": metricLookup{"non_persistent_message_mqput1_interval_total", true}, + "STATMQI/PUT/Put non-persistent messages - byte count": metricLookup{"non_persistent_message_put_bytes_interval_total", true}, + "STATMQI/PUT/Interval total MQPUT/MQPUT1 count": metricLookup{"mqput_mqput1_interval_total", true}, + "STATMQI/PUT/Interval total MQPUT/MQPUT1 byte count": metricLookup{"mqput_mqput1_bytes_interval_total", true}, + "STATMQI/PUT/Failed MQPUT count": metricLookup{"failed_mqput_interval_total", true}, + "STATMQI/PUT/Failed MQPUT1 count": metricLookup{"failed_mqput1_interval_total", true}, + "STATMQI/PUT/MQSTAT count": metricLookup{"mqstat_interval_total", true}, + "STATMQI/GET/Persistent message destructive get - count": metricLookup{"persistent_message_destructive_get_interval_total", true}, + "STATMQI/GET/Persistent message browse - count": metricLookup{"persistent_message_browse_interval_total", true}, + "STATMQI/GET/Got persistent messages - byte count": metricLookup{"persistent_message_get_bytes_interval_total", true}, + "STATMQI/GET/Persistent message browse - byte count": metricLookup{"persistent_message_browse_bytes_interval_total", true}, + "STATMQI/GET/Non-persistent message destructive get - count": metricLookup{"non_persistent_message_destructive_get_interval_total", true}, + "STATMQI/GET/Non-persistent message browse - count": metricLookup{"non_persistent_message_browse_interval_total", true}, + "STATMQI/GET/Got non-persistent messages - byte count": metricLookup{"non_persistent_message_get_bytes_interval_total", true}, + "STATMQI/GET/Non-persistent message browse - byte count": metricLookup{"non_persistent_message_browse_bytes_interval_total", true}, + "STATMQI/GET/Interval total destructive get- count": metricLookup{"destructive_get_interval_total", true}, + "STATMQI/GET/Interval total destructive get - byte count": metricLookup{"destructive_get_bytes_interval_total", true}, + "STATMQI/GET/Failed MQGET - count": metricLookup{"failed_mqget_interval_total", true}, + "STATMQI/GET/Failed browse count": metricLookup{"failed_browse_interval_total", true}, + "STATMQI/GET/MQCTL count": metricLookup{"mqctl_interval_total", true}, + "STATMQI/GET/Expired message count": metricLookup{"expired_message_interval_total", true}, + "STATMQI/GET/Purged queue count": metricLookup{"purged_queue_interval_total", true}, + "STATMQI/GET/MQCB count": metricLookup{"mqcb_interval_total", true}, + "STATMQI/GET/Failed MQCB count": metricLookup{"failed_mqcb_interval_total", true}, + "STATMQI/SYNCPOINT/Commit count": metricLookup{"commit_interval_total", true}, + "STATMQI/SYNCPOINT/Rollback count": metricLookup{"rollback_interval_total", true}, } return metricNamesMap } - -// setMetricName sets the metric name & specifies if the metric is enabled -func setMetricName(name string, enabled bool) metricLookup { - return metricLookup{ - name: name, - enabled: enabled, - } -} From 0f9cd3f204d1a06b6bfda08d355f4901e52bbbda Mon Sep 17 00:00:00 2001 From: Riccardo Biraghi Date: Tue, 19 Jun 2018 15:27:11 +0100 Subject: [PATCH 07/12] Add sfx filesystem and unknown fs handling --- cmd/runmqserver/mqconfig_linux.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cmd/runmqserver/mqconfig_linux.go b/cmd/runmqserver/mqconfig_linux.go index bf01efdb..45d9efd2 100644 --- a/cmd/runmqserver/mqconfig_linux.go +++ b/cmd/runmqserver/mqconfig_linux.go @@ -33,6 +33,7 @@ var fsTypes = map[int64]string{ 0x9123683e: "btrfs", 0x01021994: "tmpfs", 0x794c7630: "overlayfs", + 0x58465342: "xfs", } func checkFS(path string) error { @@ -43,7 +44,11 @@ func checkFS(path string) error { return nil } // Use a type conversion to make type an int64. On s390x it's a uint32. - t := fsTypes[int64(statfs.Type)] + t, ok := fsTypes[int64(statfs.Type)] + if !ok { + log.Printf("WARNING: detected %v has unknown filesystem type %x", path, statfs.Type) + return nil + } switch t { case "aufs", "overlayfs", "tmpfs": return fmt.Errorf("%v uses unsupported filesystem type: %v", path, t) From 0b9a9186cf1c71142a5c2fbde099684186defc32 Mon Sep 17 00:00:00 2001 From: Riccardo Biraghi Date: Wed, 20 Jun 2018 10:02:36 +0100 Subject: [PATCH 08/12] Extend fs type list for linux --- cmd/runmqserver/mqconfig_linux.go | 65 +++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/cmd/runmqserver/mqconfig_linux.go b/cmd/runmqserver/mqconfig_linux.go index 45d9efd2..2a07aa43 100644 --- a/cmd/runmqserver/mqconfig_linux.go +++ b/cmd/runmqserver/mqconfig_linux.go @@ -34,6 +34,71 @@ var fsTypes = map[int64]string{ 0x01021994: "tmpfs", 0x794c7630: "overlayfs", 0x58465342: "xfs", + // less popular codes + 0xadf5: "adfs", + 0xadff: "affs", + 0x5346414F: "afs", + 0x0187: "autofs", + 0x73757245: "coda", + 0x28cd3d45: "cramfs", + 0x453dcd28: "cramfs", + 0x64626720: "debugfs", + 0x73636673: "securityfs", + 0xf97cff8c: "selinux", + 0x43415d53: "smack", + 0x858458f6: "ramfs", + 0x958458f6: "hugetlbfs", + 0x73717368: "squashfs", + 0xf15f: "ecryptfs", + 0x414A53: "efs", + 0xabba1974: "xenfs", + 0x3434: "nilfs", + 0xF2F52010: "f2fs", + 0xf995e849: "hpfs", + 0x9660: "isofs", + 0x72b6: "jffs2", + 0x6165676C: "pstorefs", + 0xde5e81e4: "efivarfs", + 0x00c0ffee: "hostfs", + 0x137F: "minix_14", // minix v1 fs, 14 char names + 0x138F: "minix_30", // minix v1 fs, 30 char names + 0x2468: "minix2_14", // minix v2 fs, 14 char names + 0x2478: "minix2_30", // minix v2 fs, 30 char names + 0x4d5a: "minix3_60", // minix v3 fs, 60 char names + 0x4d44: "msdos", + 0x564c: "ncp", + 0x7461636f: "ocfs2", + 0x9fa1: "openprom", + 0x002f: "qnx4", + 0x68191122: "qnx6", + 0x6B414653: "afs_fs", + 0x52654973: "reiserfs", + 0x517B: "smb", + 0x27e0eb: "cgroup", + 0x63677270: "cgroup2", + 0x7655821: "rdtgroup", + 0x57AC6E9D: "stack_end", + 0x74726163: "tracefs", + 0x01021997: "v9fs", + 0x62646576: "bdevfs", + 0x64646178: "daxfs", + 0x42494e4d: "binfmtfs", + 0x1cd1: "devpts", + 0xBAD1DEA: "futexfs", + 0x50495045: "pipefs", + 0x9fa0: "proc", + 0x534F434B: "sockfs", + 0x62656572: "sysfs", + 0x9fa2: "usbdevice", + 0x11307854: "mtd_inode", + 0x09041934: "anon_inode", + 0x73727279: "btrfs", + 0x6e736673: "nsfs", + 0xcafe4a11: "bpf", + 0x5a3c69f0: "aafs", + 0x15013346: "udf", + 0x13661366: "balloon_kvm", + 0x58295829: "zsmalloc", } func checkFS(path string) error { From 74659dbf59f99e258fa08e3417dba57bcafa0293 Mon Sep 17 00:00:00 2001 From: Stephen Marshall Date: Wed, 20 Jun 2018 14:46:27 +0100 Subject: [PATCH 09/12] Change delta type metrics to counters --- internal/metrics/exporter.go | 132 ++++++++++++++++++++++-------- internal/metrics/exporter_test.go | 80 ++++++++++++++++-- internal/metrics/mapping.go | 120 +++++++++++++-------------- internal/metrics/update.go | 9 ++ test/docker/mqmetric_test_util.go | 120 +++++++++++++-------------- 5 files changed, 302 insertions(+), 159 deletions(-) diff --git a/internal/metrics/exporter.go b/internal/metrics/exporter.go index 241c97e1..9c06cd9f 100644 --- a/internal/metrics/exporter.go +++ b/internal/metrics/exporter.go @@ -33,6 +33,7 @@ const ( type exporter struct { qmName string gaugeMap map[string]*prometheus.GaugeVec + counterMap map[string]*prometheus.CounterVec firstCollect bool log *logger.Logger } @@ -41,6 +42,7 @@ func newExporter(qmName string, log *logger.Logger) *exporter { return &exporter{ qmName: qmName, gaugeMap: make(map[string]*prometheus.GaugeVec), + counterMap: make(map[string]*prometheus.CounterVec), firstCollect: true, log: log, } @@ -54,12 +56,22 @@ func (e *exporter) Describe(ch chan<- *prometheus.Desc) { for key, metric := range response { - // Allocate a Prometheus Gauge for each available metric - gaugeVec := createGaugeVec(metric.name, metric.description, metric.objectType) - e.gaugeMap[key] = gaugeVec + if metric.isDelta { + // For delta type metrics - allocate a Prometheus Counter + counterVec := createCounterVec(metric.name, metric.description, metric.objectType) + e.counterMap[key] = counterVec - // Describe metric - gaugeVec.Describe(ch) + // Describe metric + counterVec.Describe(ch) + + } else { + // For non-delta type metrics - allocate a Prometheus Gauge + gaugeVec := createGaugeVec(metric.name, metric.description, metric.objectType) + e.gaugeMap[key] = gaugeVec + + // Describe metric + gaugeVec.Describe(ch) + } } } @@ -71,32 +83,61 @@ func (e *exporter) Collect(ch chan<- prometheus.Metric) { for key, metric := range response { - // Reset Prometheus Gauge - gaugeVec := e.gaugeMap[key] - gaugeVec.Reset() - - // Populate Prometheus Gauge with metric values - // - Skip on first collect to avoid build-up of accumulated values - if !e.firstCollect { - for label, value := range metric.values { - var err error - var gauge prometheus.Gauge - - if label == qmgrLabelValue { - gauge, err = gaugeVec.GetMetricWithLabelValues(e.qmName) - } else { - gauge, err = gaugeVec.GetMetricWithLabelValues(label, e.qmName) + if metric.isDelta { + // For delta type metrics - update their Prometheus Counter + counterVec := e.counterMap[key] + + // Populate Prometheus Counter with metric values + // - Skip on first collect to avoid build-up of accumulated values + if !e.firstCollect { + for label, value := range metric.values { + var err error + var counter prometheus.Counter + + if label == qmgrLabelValue { + counter, err = counterVec.GetMetricWithLabelValues(e.qmName) + } else { + counter, err = counterVec.GetMetricWithLabelValues(label, e.qmName) + } + if err == nil { + counter.Add(value) + } else { + e.log.Errorf("Metrics Error: %s", err.Error()) + } } - if err == nil { - gauge.Set(value) - } else { - e.log.Errorf("Metrics Error: %s", err.Error()) + } + + // Collect metric + counterVec.Collect(ch) + + } else { + // For non-delta type metrics - reset their Prometheus Gauge + gaugeVec := e.gaugeMap[key] + gaugeVec.Reset() + + // Populate Prometheus Gauge with metric values + // - Skip on first collect to avoid build-up of accumulated values + if !e.firstCollect { + for label, value := range metric.values { + var err error + var gauge prometheus.Gauge + + if label == qmgrLabelValue { + gauge, err = gaugeVec.GetMetricWithLabelValues(e.qmName) + } else { + gauge, err = gaugeVec.GetMetricWithLabelValues(label, e.qmName) + } + if err == nil { + gauge.Set(value) + } else { + e.log.Errorf("Metrics Error: %s", err.Error()) + } } } - } - // Collect metric - gaugeVec.Collect(ch) + // Collect metric + gaugeVec.Collect(ch) + } } if e.firstCollect { @@ -104,16 +145,26 @@ func (e *exporter) Collect(ch chan<- prometheus.Metric) { } } +// createCounterVec returns a Prometheus CounterVec populated with metric details +func createCounterVec(name, description string, objectType bool) *prometheus.CounterVec { + + prefix, labels := getVecDetails(objectType) + + counterVec := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Name: prefix + "_" + name, + Help: description, + }, + labels, + ) + return counterVec +} + // createGaugeVec returns a Prometheus GaugeVec populated with metric details func createGaugeVec(name, description string, objectType bool) *prometheus.GaugeVec { - prefix := qmgrPrefix - labels := []string{qmgrLabel} - - if objectType { - prefix = objectPrefix - labels = []string{objectLabel, qmgrLabel} - } + prefix, labels := getVecDetails(objectType) gaugeVec := prometheus.NewGaugeVec( prometheus.GaugeOpts{ @@ -125,3 +176,16 @@ func createGaugeVec(name, description string, objectType bool) *prometheus.Gauge ) return gaugeVec } + +// getVecDetails returns the required prefix and labels for a metric +func getVecDetails(objectType bool) (prefix string, labels []string) { + + prefix = qmgrPrefix + labels = []string{qmgrLabel} + + if objectType { + prefix = objectPrefix + labels = []string{objectLabel, qmgrLabel} + } + return prefix, labels +} diff --git a/internal/metrics/exporter_test.go b/internal/metrics/exporter_test.go index e8db3539..7281a129 100644 --- a/internal/metrics/exporter_test.go +++ b/internal/metrics/exporter_test.go @@ -19,11 +19,21 @@ import ( "testing" "time" + "github.com/ibm-messaging/mq-golang/ibmmq" + "github.com/ibm-messaging/mq-golang/mqmetric" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" ) -func TestDescribe(t *testing.T) { +func TestDescribe_Counter(t *testing.T) { + testDescribe(t, true) +} + +func TestDescribe_Gauge(t *testing.T) { + testDescribe(t, false) +} + +func testDescribe(t *testing.T, isDelta bool) { teardownTestCase := setupTestCase(false) defer teardownTestCase() @@ -40,6 +50,9 @@ func TestDescribe(t *testing.T) { t.Errorf("Received unexpected collect request") } + if isDelta { + mqmetric.Metrics.Classes[0].Types[0].Elements[0].Datatype = ibmmq.MQIAMO_MONITOR_DELTA + } metrics, _ := initialiseMetrics(log) responseChannel <- metrics @@ -55,14 +68,26 @@ func TestDescribe(t *testing.T) { } } -func TestCollect(t *testing.T) { +func TestCollect_Counter(t *testing.T) { + testCollect(t, true) +} + +func TestCollect_Gauge(t *testing.T) { + testCollect(t, false) +} + +func testCollect(t *testing.T, isDelta bool) { teardownTestCase := setupTestCase(false) defer teardownTestCase() log := getTestLogger() exporter := newExporter("qmName", log) - exporter.gaugeMap[testKey1] = createGaugeVec(testElement1Name, testElement1Description, false) + if isDelta { + exporter.counterMap[testKey1] = createCounterVec(testElement1Name, testElement1Description, false) + } else { + exporter.gaugeMap[testKey1] = createGaugeVec(testElement1Name, testElement1Description, false) + } for i := 1; i <= 3; i++ { @@ -78,20 +103,33 @@ func TestCollect(t *testing.T) { } populateTestMetrics(i, false) + if isDelta { + mqmetric.Metrics.Classes[0].Types[0].Elements[0].Datatype = ibmmq.MQIAMO_MONITOR_DELTA + } metrics, _ := initialiseMetrics(log) updateMetrics(metrics) responseChannel <- metrics select { case <-ch: + var actual float64 prometheusMetric := dto.Metric{} - exporter.gaugeMap[testKey1].WithLabelValues("qmName").Write(&prometheusMetric) - actual := prometheusMetric.GetGauge().GetValue() + if isDelta { + exporter.counterMap[testKey1].WithLabelValues("qmName").Write(&prometheusMetric) + actual = prometheusMetric.GetCounter().GetValue() + } else { + exporter.gaugeMap[testKey1].WithLabelValues("qmName").Write(&prometheusMetric) + actual = prometheusMetric.GetGauge().GetValue() + } if i == 1 { if actual != float64(0) { t.Errorf("Expected values to be zero on first collect; actual %f", actual) } + } else if isDelta && i != 2 { + if actual != float64(i+(i-1)) { + t.Errorf("Expected value=%f; actual %f", float64(i+(i-1)), actual) + } } else if actual != float64(i) { t.Errorf("Expected value=%f; actual %f", float64(i), actual) } @@ -101,6 +139,38 @@ func TestCollect(t *testing.T) { } } +func TestCreateCounterVec(t *testing.T) { + + ch := make(chan *prometheus.Desc) + counterVec := createCounterVec("MetricName", "MetricDescription", false) + go func() { + counterVec.Describe(ch) + }() + description := <-ch + + expected := "Desc{fqName: \"ibmmq_qmgr_MetricName\", help: \"MetricDescription\", constLabels: {}, variableLabels: [qmgr]}" + actual := description.String() + if actual != expected { + t.Errorf("Expected value=%s; actual %s", expected, actual) + } +} + +func TestCreateCounterVec_ObjectLabel(t *testing.T) { + + ch := make(chan *prometheus.Desc) + counterVec := createCounterVec("MetricName", "MetricDescription", true) + go func() { + counterVec.Describe(ch) + }() + description := <-ch + + expected := "Desc{fqName: \"ibmmq_object_MetricName\", help: \"MetricDescription\", constLabels: {}, variableLabels: [object qmgr]}" + actual := description.String() + if actual != expected { + t.Errorf("Expected value=%s; actual %s", expected, actual) + } +} + func TestCreateGaugeVec(t *testing.T) { ch := make(chan *prometheus.Desc) diff --git a/internal/metrics/mapping.go b/internal/metrics/mapping.go index 646a9395..8c6ff8a5 100644 --- a/internal/metrics/mapping.go +++ b/internal/metrics/mapping.go @@ -43,8 +43,8 @@ func generateMetricNamesMap() map[string]metricLookup { "DISK/SystemSummary/MQ FDC file count": metricLookup{"fdc_files", true}, "DISK/QMgrSummary/Queue Manager file system - free space": metricLookup{"queue_manager_file_system_free_space_percentage", true}, "DISK/QMgrSummary/Queue Manager file system - bytes in use": metricLookup{"queue_manager_file_system_in_use_bytes", true}, - "DISK/Log/Log - logical bytes written": metricLookup{"log_logical_written_bytes_interval_total", true}, - "DISK/Log/Log - physical bytes written": metricLookup{"log_physical_written_bytes_interval_total", true}, + "DISK/Log/Log - logical bytes written": metricLookup{"log_logical_written_bytes_total", true}, + "DISK/Log/Log - physical bytes written": metricLookup{"log_physical_written_bytes_total", true}, "DISK/Log/Log - current primary space in use": metricLookup{"log_primary_space_in_use_percentage", true}, "DISK/Log/Log - workload primary space utilization": metricLookup{"log_workload_primary_space_utilization_percentage", true}, "DISK/Log/Log - write latency": metricLookup{"log_write_latency_seconds", true}, @@ -56,69 +56,69 @@ func generateMetricNamesMap() map[string]metricLookup { "DISK/Log/Log - bytes occupied by reusable extents": metricLookup{"log_occupied_by_reusable_extents_bytes", true}, "DISK/Log/Log - bytes occupied by extents waiting to be archived": metricLookup{"log_occupied_by_extents_waiting_to_be_archived_bytes", true}, "DISK/Log/Log - bytes required for media recovery": metricLookup{"log_required_for_media_recovery_bytes", true}, - "STATMQI/SUBSCRIBE/Create durable subscription count": metricLookup{"durable_subscription_create_interval_total", true}, - "STATMQI/SUBSCRIBE/Alter durable subscription count": metricLookup{"durable_subscription_alter_interval_total", true}, - "STATMQI/SUBSCRIBE/Resume durable subscription count": metricLookup{"durable_subscription_resume_interval_total", true}, - "STATMQI/SUBSCRIBE/Delete durable subscription count": metricLookup{"durable_subscription_delete_interval_total", true}, - "STATMQI/SUBSCRIBE/Create non-durable subscription count": metricLookup{"non_durable_subscription_create_interval_total", true}, - "STATMQI/SUBSCRIBE/Delete non-durable subscription count": metricLookup{"non_durable_subscription_delete_interval_total", true}, - "STATMQI/SUBSCRIBE/Failed create/alter/resume subscription count": metricLookup{"failed_subscription_create_alter_resume_interval_total", true}, - "STATMQI/SUBSCRIBE/Subscription delete failure count": metricLookup{"failed_subscription_delete_interval_total", true}, - "STATMQI/SUBSCRIBE/MQSUBRQ count": metricLookup{"mqsubrq_interval_total", true}, - "STATMQI/SUBSCRIBE/Failed MQSUBRQ count": metricLookup{"failed_mqsubrq_interval_total", true}, + "STATMQI/SUBSCRIBE/Create durable subscription count": metricLookup{"durable_subscription_create_total", true}, + "STATMQI/SUBSCRIBE/Alter durable subscription count": metricLookup{"durable_subscription_alter_total", true}, + "STATMQI/SUBSCRIBE/Resume durable subscription count": metricLookup{"durable_subscription_resume_total", true}, + "STATMQI/SUBSCRIBE/Delete durable subscription count": metricLookup{"durable_subscription_delete_total", true}, + "STATMQI/SUBSCRIBE/Create non-durable subscription count": metricLookup{"non_durable_subscription_create_total", true}, + "STATMQI/SUBSCRIBE/Delete non-durable subscription count": metricLookup{"non_durable_subscription_delete_total", true}, + "STATMQI/SUBSCRIBE/Failed create/alter/resume subscription count": metricLookup{"failed_subscription_create_alter_resume_total", true}, + "STATMQI/SUBSCRIBE/Subscription delete failure count": metricLookup{"failed_subscription_delete_total", true}, + "STATMQI/SUBSCRIBE/MQSUBRQ count": metricLookup{"mqsubrq_total", true}, + "STATMQI/SUBSCRIBE/Failed MQSUBRQ count": metricLookup{"failed_mqsubrq_total", true}, "STATMQI/SUBSCRIBE/Durable subscriber - high water mark": metricLookup{"durable_subscriber_high_water_mark", true}, "STATMQI/SUBSCRIBE/Durable subscriber - low water mark": metricLookup{"durable_subscriber_low_water_mark", true}, "STATMQI/SUBSCRIBE/Non-durable subscriber - high water mark": metricLookup{"non_durable_subscriber_high_water_mark", true}, "STATMQI/SUBSCRIBE/Non-durable subscriber - low water mark": metricLookup{"non_durable_subscriber_low_water_mark", true}, - "STATMQI/PUBLISH/Topic MQPUT/MQPUT1 interval total": metricLookup{"topic_mqput_mqput1_interval_total", true}, - "STATMQI/PUBLISH/Interval total topic bytes put": metricLookup{"topic_put_bytes_interval_total", true}, - "STATMQI/PUBLISH/Failed topic MQPUT/MQPUT1 count": metricLookup{"failed_topic_mqput_mqput1_interval_total", true}, - "STATMQI/PUBLISH/Persistent - topic MQPUT/MQPUT1 count": metricLookup{"persistent_topic_mqput_mqput1_interval_total", true}, - "STATMQI/PUBLISH/Non-persistent - topic MQPUT/MQPUT1 count": metricLookup{"non_persistent_topic_mqput_mqput1_interval_total", true}, - "STATMQI/PUBLISH/Published to subscribers - message count": metricLookup{"published_to_subscribers_message_interval_total", true}, - "STATMQI/PUBLISH/Published to subscribers - byte count": metricLookup{"published_to_subscribers_bytes_interval_total", true}, - "STATMQI/CONNDISC/MQCONN/MQCONNX count": metricLookup{"mqconn_mqconnx_interval_total", true}, - "STATMQI/CONNDISC/Failed MQCONN/MQCONNX count": metricLookup{"failed_mqconn_mqconnx_interval_total", true}, - "STATMQI/CONNDISC/MQDISC count": metricLookup{"mqdisc_interval_total", true}, + "STATMQI/PUBLISH/Topic MQPUT/MQPUT1 interval total": metricLookup{"topic_mqput_mqput1_total", true}, + "STATMQI/PUBLISH/Interval total topic bytes put": metricLookup{"topic_put_bytes_total", true}, + "STATMQI/PUBLISH/Failed topic MQPUT/MQPUT1 count": metricLookup{"failed_topic_mqput_mqput1_total", true}, + "STATMQI/PUBLISH/Persistent - topic MQPUT/MQPUT1 count": metricLookup{"persistent_topic_mqput_mqput1_total", true}, + "STATMQI/PUBLISH/Non-persistent - topic MQPUT/MQPUT1 count": metricLookup{"non_persistent_topic_mqput_mqput1_total", true}, + "STATMQI/PUBLISH/Published to subscribers - message count": metricLookup{"published_to_subscribers_message_total", true}, + "STATMQI/PUBLISH/Published to subscribers - byte count": metricLookup{"published_to_subscribers_bytes_total", true}, + "STATMQI/CONNDISC/MQCONN/MQCONNX count": metricLookup{"mqconn_mqconnx_total", true}, + "STATMQI/CONNDISC/Failed MQCONN/MQCONNX count": metricLookup{"failed_mqconn_mqconnx_total", true}, + "STATMQI/CONNDISC/MQDISC count": metricLookup{"mqdisc_total", true}, "STATMQI/CONNDISC/Concurrent connections - high water mark": metricLookup{"concurrent_connections_high_water_mark", true}, - "STATMQI/OPENCLOSE/MQOPEN count": metricLookup{"mqopen_interval_total", true}, - "STATMQI/OPENCLOSE/Failed MQOPEN count": metricLookup{"failed_mqopen_interval_total", true}, - "STATMQI/OPENCLOSE/MQCLOSE count": metricLookup{"mqclose_interval_total", true}, - "STATMQI/OPENCLOSE/Failed MQCLOSE count": metricLookup{"failed_mqclose_interval_total", true}, - "STATMQI/INQSET/MQINQ count": metricLookup{"mqinq_interval_total", true}, - "STATMQI/INQSET/Failed MQINQ count": metricLookup{"failed_mqinq_interval_total", true}, - "STATMQI/INQSET/MQSET count": metricLookup{"mqset_interval_total", true}, - "STATMQI/INQSET/Failed MQSET count": metricLookup{"failed_mqset_interval_total", true}, - "STATMQI/PUT/Persistent message MQPUT count": metricLookup{"persistent_message_mqput_interval_total", true}, - "STATMQI/PUT/Persistent message MQPUT1 count": metricLookup{"persistent_message_mqput1_interval_total", true}, - "STATMQI/PUT/Put persistent messages - byte count": metricLookup{"persistent_message_put_bytes_interval_total", true}, - "STATMQI/PUT/Non-persistent message MQPUT count": metricLookup{"non_persistent_message_mqput_interval_total", true}, - "STATMQI/PUT/Non-persistent message MQPUT1 count": metricLookup{"non_persistent_message_mqput1_interval_total", true}, - "STATMQI/PUT/Put non-persistent messages - byte count": metricLookup{"non_persistent_message_put_bytes_interval_total", true}, - "STATMQI/PUT/Interval total MQPUT/MQPUT1 count": metricLookup{"mqput_mqput1_interval_total", true}, - "STATMQI/PUT/Interval total MQPUT/MQPUT1 byte count": metricLookup{"mqput_mqput1_bytes_interval_total", true}, - "STATMQI/PUT/Failed MQPUT count": metricLookup{"failed_mqput_interval_total", true}, - "STATMQI/PUT/Failed MQPUT1 count": metricLookup{"failed_mqput1_interval_total", true}, - "STATMQI/PUT/MQSTAT count": metricLookup{"mqstat_interval_total", true}, - "STATMQI/GET/Persistent message destructive get - count": metricLookup{"persistent_message_destructive_get_interval_total", true}, - "STATMQI/GET/Persistent message browse - count": metricLookup{"persistent_message_browse_interval_total", true}, - "STATMQI/GET/Got persistent messages - byte count": metricLookup{"persistent_message_get_bytes_interval_total", true}, - "STATMQI/GET/Persistent message browse - byte count": metricLookup{"persistent_message_browse_bytes_interval_total", true}, - "STATMQI/GET/Non-persistent message destructive get - count": metricLookup{"non_persistent_message_destructive_get_interval_total", true}, - "STATMQI/GET/Non-persistent message browse - count": metricLookup{"non_persistent_message_browse_interval_total", true}, - "STATMQI/GET/Got non-persistent messages - byte count": metricLookup{"non_persistent_message_get_bytes_interval_total", true}, - "STATMQI/GET/Non-persistent message browse - byte count": metricLookup{"non_persistent_message_browse_bytes_interval_total", true}, - "STATMQI/GET/Interval total destructive get- count": metricLookup{"destructive_get_interval_total", true}, - "STATMQI/GET/Interval total destructive get - byte count": metricLookup{"destructive_get_bytes_interval_total", true}, - "STATMQI/GET/Failed MQGET - count": metricLookup{"failed_mqget_interval_total", true}, - "STATMQI/GET/Failed browse count": metricLookup{"failed_browse_interval_total", true}, - "STATMQI/GET/MQCTL count": metricLookup{"mqctl_interval_total", true}, - "STATMQI/GET/Expired message count": metricLookup{"expired_message_interval_total", true}, - "STATMQI/GET/Purged queue count": metricLookup{"purged_queue_interval_total", true}, - "STATMQI/GET/MQCB count": metricLookup{"mqcb_interval_total", true}, - "STATMQI/GET/Failed MQCB count": metricLookup{"failed_mqcb_interval_total", true}, - "STATMQI/SYNCPOINT/Commit count": metricLookup{"commit_interval_total", true}, - "STATMQI/SYNCPOINT/Rollback count": metricLookup{"rollback_interval_total", true}, + "STATMQI/OPENCLOSE/MQOPEN count": metricLookup{"mqopen_total", true}, + "STATMQI/OPENCLOSE/Failed MQOPEN count": metricLookup{"failed_mqopen_total", true}, + "STATMQI/OPENCLOSE/MQCLOSE count": metricLookup{"mqclose_total", true}, + "STATMQI/OPENCLOSE/Failed MQCLOSE count": metricLookup{"failed_mqclose_total", true}, + "STATMQI/INQSET/MQINQ count": metricLookup{"mqinq_total", true}, + "STATMQI/INQSET/Failed MQINQ count": metricLookup{"failed_mqinq_total", true}, + "STATMQI/INQSET/MQSET count": metricLookup{"mqset_total", true}, + "STATMQI/INQSET/Failed MQSET count": metricLookup{"failed_mqset_total", true}, + "STATMQI/PUT/Persistent message MQPUT count": metricLookup{"persistent_message_mqput_total", true}, + "STATMQI/PUT/Persistent message MQPUT1 count": metricLookup{"persistent_message_mqput1_total", true}, + "STATMQI/PUT/Put persistent messages - byte count": metricLookup{"persistent_message_put_bytes_total", true}, + "STATMQI/PUT/Non-persistent message MQPUT count": metricLookup{"non_persistent_message_mqput_total", true}, + "STATMQI/PUT/Non-persistent message MQPUT1 count": metricLookup{"non_persistent_message_mqput1_total", true}, + "STATMQI/PUT/Put non-persistent messages - byte count": metricLookup{"non_persistent_message_put_bytes_total", true}, + "STATMQI/PUT/Interval total MQPUT/MQPUT1 count": metricLookup{"mqput_mqput1_total", true}, + "STATMQI/PUT/Interval total MQPUT/MQPUT1 byte count": metricLookup{"mqput_mqput1_bytes_total", true}, + "STATMQI/PUT/Failed MQPUT count": metricLookup{"failed_mqput_total", true}, + "STATMQI/PUT/Failed MQPUT1 count": metricLookup{"failed_mqput1_total", true}, + "STATMQI/PUT/MQSTAT count": metricLookup{"mqstat_total", true}, + "STATMQI/GET/Persistent message destructive get - count": metricLookup{"persistent_message_destructive_get_total", true}, + "STATMQI/GET/Persistent message browse - count": metricLookup{"persistent_message_browse_total", true}, + "STATMQI/GET/Got persistent messages - byte count": metricLookup{"persistent_message_get_bytes_total", true}, + "STATMQI/GET/Persistent message browse - byte count": metricLookup{"persistent_message_browse_bytes_total", true}, + "STATMQI/GET/Non-persistent message destructive get - count": metricLookup{"non_persistent_message_destructive_get_total", true}, + "STATMQI/GET/Non-persistent message browse - count": metricLookup{"non_persistent_message_browse_total", true}, + "STATMQI/GET/Got non-persistent messages - byte count": metricLookup{"non_persistent_message_get_bytes_total", true}, + "STATMQI/GET/Non-persistent message browse - byte count": metricLookup{"non_persistent_message_browse_bytes_total", true}, + "STATMQI/GET/Interval total destructive get- count": metricLookup{"destructive_get_total", true}, + "STATMQI/GET/Interval total destructive get - byte count": metricLookup{"destructive_get_bytes_total", true}, + "STATMQI/GET/Failed MQGET - count": metricLookup{"failed_mqget_total", true}, + "STATMQI/GET/Failed browse count": metricLookup{"failed_browse_total", true}, + "STATMQI/GET/MQCTL count": metricLookup{"mqctl_total", true}, + "STATMQI/GET/Expired message count": metricLookup{"expired_message_total", true}, + "STATMQI/GET/Purged queue count": metricLookup{"purged_queue_total", true}, + "STATMQI/GET/MQCB count": metricLookup{"mqcb_total", true}, + "STATMQI/GET/Failed MQCB count": metricLookup{"failed_mqcb_total", true}, + "STATMQI/SYNCPOINT/Commit count": metricLookup{"commit_total", true}, + "STATMQI/SYNCPOINT/Rollback count": metricLookup{"rollback_total", true}, } return metricNamesMap } diff --git a/internal/metrics/update.go b/internal/metrics/update.go index b8429b50..9aceceb3 100644 --- a/internal/metrics/update.go +++ b/internal/metrics/update.go @@ -23,6 +23,7 @@ import ( "time" "github.com/ibm-messaging/mq-container/internal/logger" + "github.com/ibm-messaging/mq-golang/ibmmq" "github.com/ibm-messaging/mq-golang/mqmetric" ) @@ -43,6 +44,7 @@ type metricData struct { description string objectType bool values map[string]float64 + isDelta bool } // processMetrics processes publications of metric data and handles describe/collect/stop requests @@ -148,10 +150,17 @@ func initialiseMetrics(log *logger.Logger) (map[string]*metricData, error) { // Check if metric is enabled if metricLookup.enabled { + // Check if metric is a delta type + isDelta := false + if metricElement.Datatype == ibmmq.MQIAMO_MONITOR_DELTA { + isDelta = true + } + // Set metric details metric := metricData{ name: metricLookup.name, description: metricElement.Description, + isDelta: isDelta, } // Add metric diff --git a/test/docker/mqmetric_test_util.go b/test/docker/mqmetric_test_util.go index cfe42a22..ab10ff0d 100644 --- a/test/docker/mqmetric_test_util.go +++ b/test/docker/mqmetric_test_util.go @@ -181,8 +181,8 @@ func metricNames() []string { "fdc_files", "queue_manager_file_system_free_space_percentage", "queue_manager_file_system_in_use_bytes", - "log_logical_written_bytes_interval_total", - "log_physical_written_bytes_interval_total", + "log_logical_written_bytes_total", + "log_physical_written_bytes_total", "log_primary_space_in_use_percentage", "log_workload_primary_space_utilization_percentage", "log_write_latency_seconds", @@ -191,69 +191,69 @@ func metricNames() []string { "log_in_use_bytes", "log_file_system_max_bytes", "log_file_system_in_use_bytes", - "durable_subscription_create_interval_total", - "durable_subscription_alter_interval_total", - "durable_subscription_resume_interval_total", - "durable_subscription_delete_interval_total", - "non_durable_subscription_create_interval_total", - "non_durable_subscription_delete_interval_total", - "failed_subscription_create_alter_resume_interval_total", - "failed_subscription_delete_interval_total", - "mqsubrq_interval_total", - "failed_mqsubrq_interval_total", + "durable_subscription_create_total", + "durable_subscription_alter_total", + "durable_subscription_resume_total", + "durable_subscription_delete_total", + "non_durable_subscription_create_total", + "non_durable_subscription_delete_total", + "failed_subscription_create_alter_resume_total", + "failed_subscription_delete_total", + "mqsubrq_total", + "failed_mqsubrq_total", "durable_subscriber_high_water_mark", "durable_subscriber_low_water_mark", "non_durable_subscriber_high_water_mark", "non_durable_subscriber_low_water_mark", - "topic_mqput_mqput1_interval_total", - "topic_put_bytes_interval_total", - "failed_topic_mqput_mqput1_interval_total", - "persistent_topic_mqput_mqput1_interval_total", - "non_persistent_topic_mqput_mqput1_interval_total", - "published_to_subscribers_message_interval_total", - "published_to_subscribers_bytes_interval_total", - "mqconn_mqconnx_interval_total", - "failed_mqconn_mqconnx_interval_total", - "mqdisc_interval_total", + "topic_mqput_mqput1_total", + "topic_put_bytes_total", + "failed_topic_mqput_mqput1_total", + "persistent_topic_mqput_mqput1_total", + "non_persistent_topic_mqput_mqput1_total", + "published_to_subscribers_message_total", + "published_to_subscribers_bytes_total", + "mqconn_mqconnx_total", + "failed_mqconn_mqconnx_total", + "mqdisc_total", "concurrent_connections_high_water_mark", - "mqopen_interval_total", - "failed_mqopen_interval_total", - "mqclose_interval_total", - "failed_mqclose_interval_total", - "mqinq_interval_total", - "failed_mqinq_interval_total", - "mqset_interval_total", - "failed_mqset_interval_total", - "persistent_message_mqput_interval_total", - "persistent_message_mqput1_interval_total", - "persistent_message_put_bytes_interval_total", - "non_persistent_message_mqput_interval_total", - "non_persistent_message_mqput1_interval_total", - "non_persistent_message_put_bytes_interval_total", - "mqput_mqput1_interval_total", - "mqput_mqput1_bytes_interval_total", - "failed_mqput_interval_total", - "failed_mqput1_interval_total", - "mqstat_interval_total", - "persistent_message_destructive_get_interval_total", - "persistent_message_browse_interval_total", - "persistent_message_get_bytes_interval_total", - "persistent_message_browse_bytes_interval_total", - "non_persistent_message_destructive_get_interval_total", - "non_persistent_message_browse_interval_total", - "non_persistent_message_get_bytes_interval_total", - "non_persistent_message_browse_bytes_interval_total", - "destructive_get_interval_total", - "destructive_get_bytes_interval_total", - "failed_mqget_interval_total", - "failed_browse_interval_total", - "mqctl_interval_total", - "expired_message_interval_total", - "purged_queue_interval_total", - "mqcb_interval_total", - "failed_mqcb_interval_total", - "commit_interval_total", - "rollback_interval_total", + "mqopen_total", + "failed_mqopen_total", + "mqclose_total", + "failed_mqclose_total", + "mqinq_total", + "failed_mqinq_total", + "mqset_total", + "failed_mqset_total", + "persistent_message_mqput_total", + "persistent_message_mqput1_total", + "persistent_message_put_bytes_total", + "non_persistent_message_mqput_total", + "non_persistent_message_mqput1_total", + "non_persistent_message_put_bytes_total", + "mqput_mqput1_total", + "mqput_mqput1_bytes_total", + "failed_mqput_total", + "failed_mqput1_total", + "mqstat_total", + "persistent_message_destructive_get_total", + "persistent_message_browse_total", + "persistent_message_get_bytes_total", + "persistent_message_browse_bytes_total", + "non_persistent_message_destructive_get_total", + "non_persistent_message_browse_total", + "non_persistent_message_get_bytes_total", + "non_persistent_message_browse_bytes_total", + "destructive_get_total", + "destructive_get_bytes_total", + "failed_mqget_total", + "failed_browse_total", + "mqctl_total", + "expired_message_total", + "purged_queue_total", + "mqcb_total", + "failed_mqcb_total", + "commit_total", + "rollback_total", } return names } From 4ec28870479a6c3bd93c41c00501d8b9fa1723fe Mon Sep 17 00:00:00 2001 From: Stephen Marshall Date: Wed, 20 Jun 2018 15:21:31 +0100 Subject: [PATCH 10/12] Security upgrade for libgcrypt20 --- install-mq.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install-mq.sh b/install-mq.sh index 3e99b05b..cb14ab6a 100644 --- a/install-mq.sh +++ b/install-mq.sh @@ -139,7 +139,7 @@ rm -rf ${DIR_EXTRACT} # Apply any bug fixes not included in base Ubuntu or MQ image. # Don't upgrade everything based on Docker best practices https://docs.docker.com/engine/userguide/eng-image/dockerfile_best-practices/#run -$UBUNTU && apt-get upgrade -y gpgv gnupg +$UBUNTU && apt-get upgrade -y libgcrypt20 # End of bug fixes # Clean up cached files From 5a63591b187f2aad7fb1642949c7862931befcf3 Mon Sep 17 00:00:00 2001 From: Stephen Marshall Date: Mon, 25 Jun 2018 12:13:11 +0100 Subject: [PATCH 11/12] Remove watermark metrics --- internal/metrics/mapping.go | 10 +++++----- test/docker/mqmetric_test_util.go | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/internal/metrics/mapping.go b/internal/metrics/mapping.go index 8c6ff8a5..742194fd 100644 --- a/internal/metrics/mapping.go +++ b/internal/metrics/mapping.go @@ -66,10 +66,10 @@ func generateMetricNamesMap() map[string]metricLookup { "STATMQI/SUBSCRIBE/Subscription delete failure count": metricLookup{"failed_subscription_delete_total", true}, "STATMQI/SUBSCRIBE/MQSUBRQ count": metricLookup{"mqsubrq_total", true}, "STATMQI/SUBSCRIBE/Failed MQSUBRQ count": metricLookup{"failed_mqsubrq_total", true}, - "STATMQI/SUBSCRIBE/Durable subscriber - high water mark": metricLookup{"durable_subscriber_high_water_mark", true}, - "STATMQI/SUBSCRIBE/Durable subscriber - low water mark": metricLookup{"durable_subscriber_low_water_mark", true}, - "STATMQI/SUBSCRIBE/Non-durable subscriber - high water mark": metricLookup{"non_durable_subscriber_high_water_mark", true}, - "STATMQI/SUBSCRIBE/Non-durable subscriber - low water mark": metricLookup{"non_durable_subscriber_low_water_mark", true}, + "STATMQI/SUBSCRIBE/Durable subscriber - high water mark": metricLookup{"durable_subscriber_high_water_mark", false}, + "STATMQI/SUBSCRIBE/Durable subscriber - low water mark": metricLookup{"durable_subscriber_low_water_mark", false}, + "STATMQI/SUBSCRIBE/Non-durable subscriber - high water mark": metricLookup{"non_durable_subscriber_high_water_mark", false}, + "STATMQI/SUBSCRIBE/Non-durable subscriber - low water mark": metricLookup{"non_durable_subscriber_low_water_mark", false}, "STATMQI/PUBLISH/Topic MQPUT/MQPUT1 interval total": metricLookup{"topic_mqput_mqput1_total", true}, "STATMQI/PUBLISH/Interval total topic bytes put": metricLookup{"topic_put_bytes_total", true}, "STATMQI/PUBLISH/Failed topic MQPUT/MQPUT1 count": metricLookup{"failed_topic_mqput_mqput1_total", true}, @@ -80,7 +80,7 @@ func generateMetricNamesMap() map[string]metricLookup { "STATMQI/CONNDISC/MQCONN/MQCONNX count": metricLookup{"mqconn_mqconnx_total", true}, "STATMQI/CONNDISC/Failed MQCONN/MQCONNX count": metricLookup{"failed_mqconn_mqconnx_total", true}, "STATMQI/CONNDISC/MQDISC count": metricLookup{"mqdisc_total", true}, - "STATMQI/CONNDISC/Concurrent connections - high water mark": metricLookup{"concurrent_connections_high_water_mark", true}, + "STATMQI/CONNDISC/Concurrent connections - high water mark": metricLookup{"concurrent_connections_high_water_mark", false}, "STATMQI/OPENCLOSE/MQOPEN count": metricLookup{"mqopen_total", true}, "STATMQI/OPENCLOSE/Failed MQOPEN count": metricLookup{"failed_mqopen_total", true}, "STATMQI/OPENCLOSE/MQCLOSE count": metricLookup{"mqclose_total", true}, diff --git a/test/docker/mqmetric_test_util.go b/test/docker/mqmetric_test_util.go index ab10ff0d..2f521317 100644 --- a/test/docker/mqmetric_test_util.go +++ b/test/docker/mqmetric_test_util.go @@ -201,10 +201,10 @@ func metricNames() []string { "failed_subscription_delete_total", "mqsubrq_total", "failed_mqsubrq_total", - "durable_subscriber_high_water_mark", - "durable_subscriber_low_water_mark", - "non_durable_subscriber_high_water_mark", - "non_durable_subscriber_low_water_mark", + // disabled : "durable_subscriber_high_water_mark", + // disabled : "durable_subscriber_low_water_mark", + // disabled : "non_durable_subscriber_high_water_mark", + // disabled : "non_durable_subscriber_low_water_mark", "topic_mqput_mqput1_total", "topic_put_bytes_total", "failed_topic_mqput_mqput1_total", @@ -215,7 +215,7 @@ func metricNames() []string { "mqconn_mqconnx_total", "failed_mqconn_mqconnx_total", "mqdisc_total", - "concurrent_connections_high_water_mark", + // disabled : "concurrent_connections_high_water_mark", "mqopen_total", "failed_mqopen_total", "mqclose_total", From ecace2b033c2b8b02986565f2a5962d1d67a7d51 Mon Sep 17 00:00:00 2001 From: Stephen Marshall Date: Thu, 21 Jun 2018 14:17:31 +0100 Subject: [PATCH 12/12] Change package security updates to use install --- install-mq.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install-mq.sh b/install-mq.sh index cb14ab6a..2ade53c1 100644 --- a/install-mq.sh +++ b/install-mq.sh @@ -139,7 +139,7 @@ rm -rf ${DIR_EXTRACT} # Apply any bug fixes not included in base Ubuntu or MQ image. # Don't upgrade everything based on Docker best practices https://docs.docker.com/engine/userguide/eng-image/dockerfile_best-practices/#run -$UBUNTU && apt-get upgrade -y libgcrypt20 +$UBUNTU && apt-get install -y gnupg gpgv libgcrypt20 perl-base --only-upgrade # End of bug fixes # Clean up cached files