From ad0db842c361a3106ab18dcf03ad53a5d367c95c Mon Sep 17 00:00:00 2001 From: sorja Date: Mon, 13 Jan 2025 14:06:40 +0200 Subject: [PATCH 1/4] 3855 - Add Arrays.chunk --- package.json | 2 ++ src/utils/arrays.ts | 5 ++++- yarn.lock | 12 ++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/package.json b/package.json index c232913d3d..1fbbb124d5 100644 --- a/package.json +++ b/package.json @@ -167,6 +167,7 @@ "@openforis/arena-core": "^0.0.79", "@reduxjs/toolkit": "^1.8.1", "@socket.io/redis-streams-adapter": "^0.1.0", + "@types/lodash.chunk": "^4.2.9", "@types/multer": "^1.4.7", "archiver": "^6.0.2", "assert": "^2.0.0", @@ -208,6 +209,7 @@ "jsep": "^1.3.4", "json2csv": "^5.0.7", "jsonwebtoken": "^8.5.1", + "lodash.chunk": "^4.2.0", "lodash.clonedeep": "^4.5.0", "lodash.debounce": "^4.0.8", "lodash.differencewith": "^4.5.0", diff --git a/src/utils/arrays.ts b/src/utils/arrays.ts index 391dc17d53..05a28861d7 100644 --- a/src/utils/arrays.ts +++ b/src/utils/arrays.ts @@ -1,4 +1,6 @@ // @ts-ignore +import * as chunk from 'lodash.chunk' +// @ts-ignore import * as differenceWith from 'lodash.differencewith' // @ts-ignore import * as range from 'lodash.range' @@ -21,11 +23,12 @@ const startsWith = (list: T[], start: T[]): boolean => start.every((item, ind const unique = (array: Array): Array => uniqWith(array, Objects.isEqual) export const Arrays = { + chunk, difference, intersection, - startsWith, range, reverse, + startsWith, unique, uniqueBy, } diff --git a/yarn.lock b/yarn.lock index 1390053c26..f154dd74a8 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3564,6 +3564,13 @@ dependencies: "@types/node" "*" +"@types/lodash.chunk@^4.2.9": + version "4.2.9" + resolved "https://registry.yarnpkg.com/@types/lodash.chunk/-/lodash.chunk-4.2.9.tgz#60da44c404dfa8b01b426034c1183e5eb9b09727" + integrity sha512-Z9VtFUSnmT0No/QymqfG9AGbfOA4O5qB/uyP89xeZBqDAsKsB4gQFTqt7d0pHjbsTwtQ4yZObQVHuKlSOhIJ5Q== + dependencies: + "@types/lodash" "*" + "@types/lodash.clonedeep@^4.5.6": version "4.5.9" resolved "https://registry.yarnpkg.com/@types/lodash.clonedeep/-/lodash.clonedeep-4.5.9.tgz#ea48276c7cc18d080e00bb56cf965bcceb3f0fc1" @@ -10455,6 +10462,11 @@ lodash-es@^4.2.1: resolved "https://registry.yarnpkg.com/lodash-es/-/lodash-es-4.17.21.tgz#43e626c46e6591b7750beb2b50117390c609e3ee" integrity sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw== +lodash.chunk@^4.2.0: + version "4.2.0" + resolved "https://registry.yarnpkg.com/lodash.chunk/-/lodash.chunk-4.2.0.tgz#66e5ce1f76ed27b4303d8c6512e8d1216e8106bc" + integrity sha512-ZzydJKfUHJwHa+hF5X66zLFCBrWn5GeF28OHEr4WVWtNDXlQ/IjWKPBiikqKo2ne0+v6JgCgJ0GzJp8k8bHC7w== + lodash.clonedeep@^4.5.0: version "4.5.0" resolved "https://registry.yarnpkg.com/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz#e23f3f9c4f8fbdde872529c1071857a086e5ccef" From ff334c479393e48cd18ec4734e6878c6370c2609 Mon Sep 17 00:00:00 2001 From: sorja Date: Mon, 13 Jan 2025 14:07:17 +0200 Subject: [PATCH 2/4] 3855 - Add env var: authCookie --- .env.template | 2 ++ src/server/utils/processEnv.ts | 1 + 2 files changed, 3 insertions(+) diff --git a/.env.template b/.env.template index a6f322af7f..9cd6ffe1b0 100644 --- a/.env.template +++ b/.env.template @@ -57,3 +57,5 @@ AWS_ACCESS_KEY_ID=your-access-key-id AWS_SECRET_ACCESS_KEY=your-secret-access-key AWS_REGION=eu-west-1 S3_BUCKET_NAME=fra-platform-s3 + +AUTH_COOKIE=fra-auth-token-copied-from-browser \ No newline at end of file diff --git a/src/server/utils/processEnv.ts b/src/server/utils/processEnv.ts index d77a650e61..feaf10e512 100644 --- a/src/server/utils/processEnv.ts +++ b/src/server/utils/processEnv.ts @@ -20,6 +20,7 @@ export const ProcessEnv = { // app appUri: process.env.APP_URI ?? 'http://localhost:9001', port: process.env.PORT ? Number(process.env.PORT) : 80, + authCookie: process.env.AUTH_COOKIE, // aws/s3 awsAccessKeyId: process.env.AWS_ACCESS_KEY_ID, From 7275ef3a5e9bd4a678964e31484990d52d8247b8 Mon Sep 17 00:00:00 2001 From: sorja Date: Mon, 13 Jan 2025 17:07:58 +0200 Subject: [PATCH 3/4] 3855 - Update test --- src/test/s3Files/index.ts | 119 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 src/test/s3Files/index.ts diff --git a/src/test/s3Files/index.ts b/src/test/s3Files/index.ts new file mode 100644 index 0000000000..04f8e53123 --- /dev/null +++ b/src/test/s3Files/index.ts @@ -0,0 +1,119 @@ +import 'tsconfig-paths/register' +import 'dotenv/config' + +import axios from 'axios' +import { Arrays } from 'utils/arrays' +import { Promises } from 'utils/promises' + +import { Link } from 'meta/cycleData' +import { FileSummary } from 'meta/file' + +import { AssessmentController } from 'server/controller/assessment' +import { DB, Schemas } from 'server/db' +import { FileStorage } from 'server/service/fileStorage' +import { ProcessEnv } from 'server/utils' +import { Logger } from 'server/utils/logger' + +const client = DB + +// Note: Running this script many times may cause runners IP to timeout as protection mechanism + +// 1 -- Test all files exist in s3 +const TEST_S3_FILES = true +// 2 -- Test links in assessment_cycle.link work (redundant if files exist) +const TEST_LINKS = false + +const BATCH_SIZE = 25 + +// eslint-disable-next-line no-promise-executor-return +const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)) + +const _allFilesExist = async (files: Array): Promise => { + if (!TEST_S3_FILES) return true + const batches = Arrays.chunk(files, BATCH_SIZE) + const batchResults = await Promises.each(batches, async (batch) => { + return Promises.each(batch, async ({ uuid: key }) => { + const exists = await FileStorage.fileExists({ key }) + if (!exists) Logger.info(`Missing file in S3: ${key}`) + return exists + }) + }) + + return batchResults.flat().every((exists) => exists) +} + +const _testFileDownload = async (url: string): Promise => { + try { + await sleep(1000) + const response = await axios({ + method: 'GET', + url, + responseType: 'stream', + timeout: 5000, + validateStatus: (status) => status === 200, + headers: { + // Copy this from browser request and add it to .env - check .env.template + Cookie: ProcessEnv.authCookie, + }, + }) + + const contentDisposition = response.headers['content-disposition'] + const contentType = response.headers['content-type'] + + // dont persist + response.data.destroy() + + // Either should be set + return Boolean(contentDisposition || contentType) + } catch (error) { + Logger.error(`Failed to test download for ${url}: ${error.message}`) + return false + } +} + +const _allFileLinksWork = async (): Promise => { + if (!TEST_LINKS) return true + const assessments = await AssessmentController.getAll({}) + const schemas = assessments.flatMap((assessment) => + assessment.cycles.map((cycle) => Schemas.getNameCycle(assessment, cycle)) + ) + + // Replace production URL with appUri (e.g. localhost) + const cb = ({ link }: Link): string => link.replace('https://fra-data.fao.org', ProcessEnv.appUri) + + const linksTested = await Promises.each(schemas, async (schema) => { + const links = await client.map( + `select * from ${schema}.link where link ilike '%https://fra-data.fao.org/api/cycle-data/repository/file/%'`, + [], + cb + ) + const results = await Promises.each(links, async (link) => { + const result = await _testFileDownload(link) + return result + }) + // Every link OK? + return results.every(Boolean) + }) + + // Every link in every assessment OK? + return linksTested.every(Boolean) +} + +const exec = async (): Promise => { + const files = await client.many(`select * from public.file`) + const allFilesExist = await _allFilesExist(files) + const allFileLinksWork = await _allFileLinksWork() + + Logger.debug(`All files exist in S3? ${allFilesExist ? 'yes' : 'no'}`) + Logger.debug(`All links work? ${allFileLinksWork ? 'yes' : 'no'}`) +} + +const start = new Date().getTime() +Logger.debug(`========== START FETCHING S3 FILES ${start}`) + +exec().then(() => { + const end = new Date().getTime() + Logger.debug(`========== END ${end} ELAPSED ${(end - start) / 1000}s`) + DB.$pool.end() + process.exit(0) +}) From dbb15258077ee62e772bca4c41c3985e08a4af51 Mon Sep 17 00:00:00 2001 From: sorja Date: Mon, 13 Jan 2025 17:14:40 +0200 Subject: [PATCH 4/4] 3855 - Remove authCookie env var --- .env.template | 2 -- src/server/utils/processEnv.ts | 1 - src/test/s3Files/index.ts | 4 +++- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.env.template b/.env.template index 9cd6ffe1b0..a6f322af7f 100644 --- a/.env.template +++ b/.env.template @@ -57,5 +57,3 @@ AWS_ACCESS_KEY_ID=your-access-key-id AWS_SECRET_ACCESS_KEY=your-secret-access-key AWS_REGION=eu-west-1 S3_BUCKET_NAME=fra-platform-s3 - -AUTH_COOKIE=fra-auth-token-copied-from-browser \ No newline at end of file diff --git a/src/server/utils/processEnv.ts b/src/server/utils/processEnv.ts index feaf10e512..d77a650e61 100644 --- a/src/server/utils/processEnv.ts +++ b/src/server/utils/processEnv.ts @@ -20,7 +20,6 @@ export const ProcessEnv = { // app appUri: process.env.APP_URI ?? 'http://localhost:9001', port: process.env.PORT ? Number(process.env.PORT) : 80, - authCookie: process.env.AUTH_COOKIE, // aws/s3 awsAccessKeyId: process.env.AWS_ACCESS_KEY_ID, diff --git a/src/test/s3Files/index.ts b/src/test/s3Files/index.ts index 04f8e53123..d3e1e3a423 100644 --- a/src/test/s3Files/index.ts +++ b/src/test/s3Files/index.ts @@ -17,6 +17,8 @@ import { Logger } from 'server/utils/logger' const client = DB // Note: Running this script many times may cause runners IP to timeout as protection mechanism +// Get from browser cookies +const COOKIE = '' // 1 -- Test all files exist in s3 const TEST_S3_FILES = true @@ -53,7 +55,7 @@ const _testFileDownload = async (url: string): Promise => { validateStatus: (status) => status === 200, headers: { // Copy this from browser request and add it to .env - check .env.template - Cookie: ProcessEnv.authCookie, + Cookie: COOKIE, }, })