Skip to content

Commit

Permalink
feat: improved cloudwatch analytics rollup BM-1092 (#3381)
Browse files Browse the repository at this point in the history
#### Motivation

We currently get 10s of millions of requests per day, which are all
logged by cloudwatch into a s3 bucket, these logs are very large when
looking over the entire history of basemaps, so we rollup the logs into
hourly summaries of the requests.

The current implementation rolls up the logs into coarse buckets these
buckets give a good overview of usage but do not allow us to dig into
more detailed usage trends such as which datasets are used in what
formats.

### Modification

Introduces a new analytic lambda to run side by side with the existing
lambda until we are comfortable to turn off the old lambda.

The logs are now rolled aggregated on more fields such as 
- tiles
  - zoom level
  - matrix set (and requested matrix set, /3857/ vs /WebMercatorQuad/ )
  - extension, png vs pbf
- tile set
- api key and type
  • Loading branch information
blacha authored Jan 13, 2025
1 parent de4fbf5 commit c14dcb8
Show file tree
Hide file tree
Showing 28 changed files with 1,705 additions and 19 deletions.
288 changes: 272 additions & 16 deletions package-lock.json

Large diffs are not rendered by default.

35 changes: 33 additions & 2 deletions packages/_infra/src/analytics/edge.analytics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ import { RetentionDays } from 'aws-cdk-lib/aws-logs';
import { BlockPublicAccess, Bucket } from 'aws-cdk-lib/aws-s3';
import { Construct } from 'constructs';

const CODE_PATH = '../lambda-analytics/dist';
import { getConfig } from '../config.js';

const CodePath = '../lambda-analytics/dist';
const CodePathV2 = '../lambda-analytics-cloudfront/dist';

export interface EdgeAnalyticsProps extends StackProps {
distributionId: string;
Expand Down Expand Up @@ -36,7 +39,7 @@ export class EdgeAnalytics extends Stack {
memorySize: 2048,
timeout: Duration.minutes(10),
handler: 'index.handler',
code: lambda.Code.fromAsset(CODE_PATH),
code: lambda.Code.fromAsset(CodePath),
environment: {
[Env.Analytics.CloudFrontId]: distributionId,
[Env.Analytics.CacheBucket]: `s3://${cacheBucket.bucketName}`,
Expand All @@ -53,5 +56,33 @@ export class EdgeAnalytics extends Stack {
// Run this lambda function every hour
const rule = new Rule(this, 'AnalyticRule', { schedule: Schedule.rate(Duration.hours(1)) });
rule.addTarget(new LambdaFunction(this.lambda));

const v2Lambda = new lambda.Function(this, 'AnalyticV2Lambda', {
runtime: lambda.Runtime.NODEJS_LATEST,
memorySize: 2048,
timeout: Duration.minutes(10),
handler: 'index.handler',
code: lambda.Code.fromAsset(CodePathV2),
environment: {
[Env.Analytics.CloudFrontId]: distributionId,
[Env.Analytics.CacheBucket]: `s3://${cacheBucket.bucketName}`,
[Env.Analytics.CloudFrontSourceBucket]: `s3://${logBucket.bucketName}`,
[Env.Analytics.MaxRecords]: String(24 * 7 * 4),
[Env.Analytics.ElasticId]: Env.get(Env.Analytics.ElasticId) ?? '',
[Env.Analytics.ElasticApiKey]: Env.get(Env.Analytics.ElasticApiKey) ?? '',
[Env.Analytics.ElasticIndexName]: getConfig().ElasticHistoryIndexName,
AWS_NODEJS_CONNECTION_REUSE_ENABLED: '1',
},
logRetention: RetentionDays.ONE_MONTH,
loggingFormat: lambda.LoggingFormat.JSON,
});

cacheBucket.grantReadWrite(v2Lambda);
logBucket.grantRead(v2Lambda);

// Run this lambda function every hour
new Rule(this, 'AnalyticV2Rule', { schedule: Schedule.rate(Duration.hours(1)) }).addTarget(
new LambdaFunction(v2Lambda),
);
}
}
5 changes: 5 additions & 0 deletions packages/_infra/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ export interface BaseMapsConfig {

/** AWS role config bucket */
AwsRoleConfigBucket: string;

/** Elastic Index to use for basemaps history data */
ElasticHistoryIndexName: string;
}

export const BaseMapsProdConfig: BaseMapsConfig = {
Expand All @@ -26,13 +29,15 @@ export const BaseMapsProdConfig: BaseMapsConfig = {
CloudFrontDns: ['basemaps.linz.govt.nz', 'tiles.basemaps.linz.govt.nz'],
PublicUrlBase: 'https://basemaps.linz.govt.nz',
AwsRoleConfigBucket: 'linz-bucket-config',
ElasticHistoryIndexName: 'basemaps-history',
};

export const BaseMapsDevConfig: BaseMapsConfig = {
CogBucket: ['basemaps-cog-test', ...BaseMapsProdConfig.CogBucket],
CloudFrontDns: ['dev.basemaps.linz.govt.nz', 'tiles.dev.basemaps.linz.govt.nz'],
PublicUrlBase: 'https://dev.basemaps.linz.govt.nz',
AwsRoleConfigBucket: 'linz-bucket-config',
ElasticHistoryIndexName: 'nonprod-basemaps-history',
};
/** Is this deployment intended for production */
export const IsProduction = process.env['NODE_ENV'] === 'production';
Expand Down
Empty file.
5 changes: 5 additions & 0 deletions packages/lambda-analytic-cloudfront/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# @basemaps/lambda-analytic-cloudfront

Generate analytics from CloudFront distribution statistics

Every hour this lambda function runs and generates a rolled up summary of usage by API Key
42 changes: 42 additions & 0 deletions packages/lambda-analytic-cloudfront/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"name": "@basemaps/lambda-analytic-cloudfront",
"version": "7.11.0",
"private": true,
"repository": {
"type": "git",
"url": "https://github.com/linz/basemaps.git",
"directory": "packages/lambda-analytic-cloudfront"
},
"author": {
"name": "Land Information New Zealand",
"url": "https://linz.govt.nz",
"organization": true
},
"type": "module",
"engines": {
"node": ">=16.0.0"
},
"license": "MIT",
"dependencies": {
"@basemaps/config": "^7.11.0",
"@basemaps/geo": "^7.11.0",
"@basemaps/shared": "^7.11.0",
"@elastic/elasticsearch": "^8.16.2",
"@linzjs/lambda": "^4.0.0",
"ua-parser-js": "^1.0.39"
},
"scripts": {
"test": "node --test",
"bundle": "../../scripts/bundle.mjs package.json"
},
"devDependencies": {
"@types/ua-parser-js": "^0.7.36"
},
"bundle": {
"entry": "src/index.ts",
"outdir": "dist/",
"external": [
"pino-pretty"
]
}
}
133 changes: 133 additions & 0 deletions packages/lambda-analytic-cloudfront/src/__test__/analytics.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import assert from 'node:assert';
import { afterEach, beforeEach, describe, it, TestContext } from 'node:test';
import { gzipSync } from 'node:zlib';

import { Env, fsa, FsMemory, LogConfig } from '@basemaps/shared';
import { Client } from '@elastic/elasticsearch';
import { LambdaRequest } from '@linzjs/lambda';
import { Context } from 'aws-lambda';

import { getYesterday } from '../date.js';
import { Elastic } from '../elastic.js';
import { main } from '../handler.js';
import { LogStats } from '../log.stats.js';
import { LogData } from './log.data.js';

interface IndexOperation {
index: { _index: string };
}
type BulkOperation = (IndexOperation | LogStats)[];

export class FakeLambdaRequest extends LambdaRequest {
constructor() {
super({}, {} as Context, LogConfig.get());
}
}

describe('analytic lambda', () => {
const memory = new FsMemory();
beforeEach(() => {
fsa.register('mem://', memory);
memory.files.clear();

Elastic.indexDelay = 1; // do not wait between requests
Elastic.minRequestCount = 0; // index everything
Elastic._client = undefined;
LogConfig.get().level = 'silent';
});

afterEach(() => {
LogConfig.get().level = 'info';
});

function setupEnv(t: TestContext): void {
t.mock.method(Env, 'get', (key: string): string => {
switch (key) {
case Env.Analytics.CacheBucket:
return 'mem://cache/';
case Env.Analytics.CloudFrontSourceBucket:
return 'mem://source/';
case Env.Analytics.CloudFrontId:
return 'cfid';
case Env.Analytics.MaxRecords:
return '1';
}
throw new Error(`Invalid test process.env access ${key}`);
});
}

it('should process some log data', async (t) => {
setupEnv(t);

const operations: BulkOperation[] = [];
Elastic._client = {
bulk(op: { operations: BulkOperation }) {
operations.push(op.operations);
return Promise.resolve({});
},
} as unknown as Client;

const YesterDay = getYesterday();
const shortDate = YesterDay.toISOString().slice(0, 13).replace('T', '-');

await fsa.write(new URL(`mem://source/cfid.${shortDate}/data.txt.gz`), gzipSync(LogData));

await main(new FakeLambdaRequest());

// One call to insert
assert.equal(operations.length, 1);

const op = operations[0];

const indexOpt = op[0] as IndexOperation;
const logOpt = op[1] as LogStats;

// First Log line: /v1/tiles/aerial/EPSG:3857/19/516588/320039.webp
assert.equal(indexOpt.index._index, 'basemaps-history-2020');
assert.equal(logOpt.apiType, 'd');
assert.equal(logOpt.tileMatrix, 'EPSG:3857');
assert.equal(logOpt.tileMatrixId, 'WebMercatorQuad');
assert.equal(logOpt.tileSet, 'aerial');
assert.equal(logOpt.z, 19);
assert.equal(logOpt.cacheHit, 1);
assert.equal(logOpt.cacheMiss, 0);
assert.equal(logOpt.total, 1);

assert.deepEqual(logOpt.ua, { os: 'linux', name: 'chrome', version: '85', variant: 'unknown' });

const files = [...memory.files.keys()];
assert.equal(files.length, 2); // two files one input one output

assert.equal(
files[1],
`mem://cache/RollUpV3/${shortDate.slice(0, 4)}/${shortDate.slice(5, 7)}/${shortDate}.ndjson.gz`,
);
});

it('should write errors to storage', async (t) => {
setupEnv(t);

Elastic._client = {
bulk() {
return Promise.resolve({ errors: ['Hello'] });
},
} as unknown as Client;

const YesterDay = getYesterday();
const shortDate = YesterDay.toISOString().slice(0, 13).replace('T', '-');

await fsa.write(new URL(`mem://source/cfid.${shortDate}/data.txt.gz`), gzipSync(LogData));

const ret = await main(new FakeLambdaRequest()).catch((e: Error) => e);

assert.equal(String(ret), 'Error: Failed to index');

const files = [...memory.files.keys()];
assert.equal(files.length, 2); // two files one input one output

assert.ok(files[1].startsWith(`mem://cache/errors-${new Date().toISOString().slice(0, 12)}`));

const data = await fsa.read(new URL(files[1]));
assert.ok(data.toString().includes(JSON.stringify('Hello')));
});
});
15 changes: 15 additions & 0 deletions packages/lambda-analytic-cloudfront/src/__test__/log.data.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import { ulid } from 'ulid';

export const DevApiKey = 'd' + ulid().toLowerCase();
export const ClientApiKey = 'c' + ulid().toLowerCase();

export const LogData = `#Version: 1.0
#Fields: date time x-edge-location sc-bytes c-ip cs-method cs(Host) cs-uri-stem sc-status cs(Referer) cs(User-Agent) cs-uri-query cs(Cookie) x-edge-result-type x-edge-request-id x-host-header cs-protocol cs-bytes time-taken x-forwarded-for ssl-protocol ssl-cipher x-edge-response-result-type cs-protocol-version fle-status fle-encrypted-fields c-port time-to-first-byte x-edge-detailed-result-type sc-content-type sc-content-len sc-range-start sc-range-end
2020-07-28 01:11:25 AKL50-C1 20753 255.255.255.141 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/aerial/EPSG:3857/19/516588/320039.webp 200 https://bar.com/ Mozilla/5.0%20(X11;%20Linux%20x86_64)%20AppleWebKit/537.36%20(KHTML,%20like%20Gecko)%20Chrome/85.0.4183.101%20Safari/537.36 api=${DevApiKey} - Hit sBUoz03SwR_hVZkdj0LVC1s_bKakd9ONcKTYRrQLuIR3VPBQUx5xog== basemaps.linz.govt.nz https 82 0.049 - TLSv1.3 TLS_AES_128_GCM_SHA256 Hit HTTP/2.0 -- 21780 0.049 Hit image/webp 20320 - -
2020-07-28 01:16:13 SYD1-C2 156474 255.255.255.4 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/aerial/NZTM2000Quad/19/516542/319785.png 200 https://www.bar.com/ Mozilla/5.0%20(Macintosh;%20Intel%20Mac%20OS%20X%2010_15_4)%20AppleWebKit/605.1.15%20(KHTML,%20like%20Gecko)%20Version/13.1.2%20Safari/605.1.15 api=${DevApiKey}&foo=bar - Hit 9KNnEESjZA-yVs62ffwtRYNaa0gpYKLeEEHH490dmO7AAu3ZxnPc8Q== basemaps.linz.govt.nz https 77 1.791 - TLSv1.3 TLS_AES_128_GCM_SHA256 Hit HTTP/2.0 - - 19468 0.028 Hit image/png 155886 - -
2020-07-28 01:16:21 SYD1-C2 21223 255.255.255.73 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/topo50/3857/18/257866/162011.jpeg 200 https://bar.com/map/ Mozilla/5.0%20(Windows%20NT%2010.0;%20Win64;%20x64)%20AppleWebKit/537.36%20(KHTML,%20like%20Gecko)%20Chrome/85.0.4183.102%20Safari/537.36 api=${DevApiKey} - Miss a5nrTCsdsP5EDQ9EXkUQQJMCJTlbRUz5JIxowZ-1kRriRDUmLPxvVQ== basemaps.linz.govt.nz https 76 0.222 - TLSv1.3 TLS_AES_128_GCM_SHA256 Miss HTTP/2.0 - - 57799 0.222 Miss image/jpeg 20797 - -
2020-07-28 01:13:33 SYD4-C2 2588 255.255.255.128 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/topo50/EPSG:3857/WMTSCapabilities.xml 200 - Mozilla/5.0%20QGIS/31006 api=${ClientApiKey} - RefreshHit oflBr-vO5caoVpi2S23hGh9YWMUca-McU_Fl5oN9fqW_H9ea_iS-Kg== basemaps.linz.govt.nz https 243 0.051 - TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 RefreshHit HTTP/1.1 - - 55515 0.050 RefreshHit text/xml -
2020-07-28 01:13:33 SYD4-C2 2588 255.255.255.128 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/topo50/EPSG:2193/18/257866/162011.pbf 200 - Mozilla/5.0%20QGIS/31006 api=${ClientApiKey} - RefreshHit oflBr-vO5caoVpi2S23hGh9YWMUca-McU_Fl5oN9fqW_H9ea_iS-Kg== basemaps.linz.govt.nz https 243 0.051 - TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 RefreshHit HTTP/1.1 - - 55515 0.050 RefreshHit text/xml -
2020-07-28 01:13:33 SYD4-C2 2588 255.255.255.128 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/antipodes-islands-satellite-2019-2020-0.5m/NZTM2000Quad/18/257866/162011.webp 200 - Mozilla/5.0%20QGIS/31006 api=${ClientApiKey} - RefreshHit oflBr-vO5caoVpi2S23hGh9YWMUca-McU_Fl5oN9fqW_H9ea_iS-Kg== basemaps.linz.govt.nz https 243 0.051 - TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 RefreshHit HTTP/1.1 - - 55515 0.050 RefreshHit text/xml -
2020-07-28 01:13:33 SYD4-C2 2588 255.255.255.128 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/elevation/WebMercatorQuad/18/257866/162011.png 200 - Mozilla/5.0%20QGIS/31006 api=${ClientApiKey}&pipeline=terrain-rgb - RefreshHit oflBr-vO5caoVpi2S23hGh9YWMUca-McU_Fl5oN9fqW_H9ea_iS-Kg== basemaps.linz.govt.nz https 243 0.051 - TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 RefreshHit HTTP/1.1 - - 55515 0.050 RefreshHit text/xml -
`.trim();
10 changes: 10 additions & 0 deletions packages/lambda-analytic-cloudfront/src/bin.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import { LogConfig } from '@basemaps/shared';
import { LambdaRequest } from '@linzjs/lambda';
import { Context } from 'aws-lambda';

import { main } from './handler.js';

/**
* Manually run the lambda function, this can be helpful for debugging the analytic roll up process
*/
main(new LambdaRequest(null, {} as Context, LogConfig.get())).catch((e) => console.error(e));
33 changes: 33 additions & 0 deletions packages/lambda-analytic-cloudfront/src/date.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
export function getYesterday(): Date {
// Process up to about a day ago
const maxDate = new Date();
maxDate.setUTCMinutes(0);
maxDate.setUTCSeconds(0);
maxDate.setUTCMilliseconds(0);
maxDate.setUTCDate(maxDate.getUTCDate() - 1);
return maxDate;
}

export function* byDay(startDate: Date, endDate: Date): Generator<string> {
const currentDate = new Date(startDate);
currentDate.setUTCMinutes(0);
currentDate.setUTCSeconds(0);
currentDate.setUTCMilliseconds(0);
while (true) {
yield currentDate.toISOString().slice(0, 10);
currentDate.setUTCDate(currentDate.getUTCDate() - 1);
if (currentDate.getTime() < endDate.getTime()) break;
}
}

export function* byMonth(startDate: Date, endDate: Date): Generator<string> {
const currentDate = new Date(startDate);
currentDate.setUTCMinutes(0);
currentDate.setUTCSeconds(0);
currentDate.setUTCMilliseconds(0);
while (true) {
yield currentDate.toISOString().slice(0, 7);
currentDate.setUTCMonth(currentDate.getUTCMonth() - 1);
if (currentDate.getTime() < endDate.getTime()) break;
}
}
Loading

0 comments on commit c14dcb8

Please sign in to comment.