From bb17859c2044232ef37aff7aad59c844dcdd0076 Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Wed, 5 Jun 2019 14:17:54 +0200 Subject: [PATCH] read Git client versions from audit log Previously we read the versions from the haproxy log and used the originating IP address to distinguish machines/users. In a load balancer setup this IP address can be the same for all requests and consequently the number of users per version are not counted correctly. Fix this by using the audit log to count the Git versions. Also count the versions by user ID and not by IP address. see https://github.com/Autodesk/hubble/issues/184 --- updater/scripts/git-versions.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/updater/scripts/git-versions.sh b/updater/scripts/git-versions.sh index 8de81abb..46706403 100644 --- a/updater/scripts/git-versions.sh +++ b/updater/scripts/git-versions.sh @@ -4,11 +4,11 @@ # echo -e "Git version\tusers" -zgrep -hF '||git/' /var/log/haproxy.log.1* | - perl -lape 's/.* (.*):.* \[.*\|\|git\/(\d+(?:\.\d+){0,2}).*/$1 $2/' | +zcat -f /var/log/github-audit.log.1* | + perl -ne 'print if s/.*agent=git\/(\d+(?:\.\d+){0,2}).*"user_id":(\d+).*/\2\t\1/' | sort | uniq | - perl -lape 's/[^ ]+ //' | + perl -lape 's/\d+ *//' | sort -r -V | uniq -ic | awk '{printf("%s\t%s\n",$2,$1)}'