Skip to content

Commit

Permalink
blobs: prevent NULL contentType in db (#1355)
Browse files Browse the repository at this point in the history
Set `blob."contentType"` values to `application/octet-stream` if no mime type is supplied on upload.

Related: #1351
  • Loading branch information
alxndrsn authored Feb 4, 2025
1 parent 37b85b5 commit dfe0269
Show file tree
Hide file tree
Showing 8 changed files with 158 additions and 18 deletions.
6 changes: 3 additions & 3 deletions docs/api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3892,7 +3892,7 @@ paths:
- Individual Form
summary: Downloading a Form Attachment
description: |-
To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time) will be given.
To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time, or `application/octet-stream` if none was supplied) will be given.

This endpoint supports `ETag`, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, it returns a value in `ETag` header, you can pass this value in the header `If-None-Match` of subsequent requests. If the file has not been changed since the previous request, you will receive `304 Not Modified` response otherwise you'll get the latest file.
operationId: Downloading a Form Attachment
Expand Down Expand Up @@ -4422,7 +4422,7 @@ paths:
summary: Downloading a Draft Form Attachment
description: To download a single file, use this endpoint. The appropriate `Content-Disposition`
(attachment with a filename or Dataset name) and `Content-Type` (based on
the type supplied at upload time or `text/csv` in the case of a linked Dataset)
the type supplied at upload time, or `text/csv` in the case of a linked Dataset, or `application/octet-stream` otherwise)
will be given.

This endpoint supports `ETag`, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, it returns a value in `ETag` header, you can pass this value in the header `If-None-Match` of subsequent requests. If the file has not been changed since the previous request, you will receive `304 Not Modified` response otherwise you'll get the latest file.
Expand Down Expand Up @@ -5123,7 +5123,7 @@ paths:
- Published Form Versions
summary: Downloading a Form Version Attachment
description: |-
To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time) will be given.
To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time, or `application/octet-stream` if none was supplied) will be given.

This endpoint supports `ETag` header, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, the endpoint returns a value in `ETag` header. If you pass that value in the `If-None-Match` header of a subsequent request, then if the file has not been changed since the previous request, you will receive `304 Not Modified` response; otherwise you'll get the latest file.
operationId: Downloading a Form Version Attachment
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Copyright 2025 ODK Central Developers
// See the NOTICE file at the top-level directory of this distribution and at
// https://github.com/getodk/central-backend/blob/master/NOTICE.
// This file is part of ODK Central. It is subject to the license terms in
// the LICENSE file found in the top-level directory of this distribution and at
// https://www.apache.org/licenses/LICENSE-2.0. No part of ODK Central,
// including this file, may be copied, modified, propagated, or distributed
// except according to the terms contained in the LICENSE file.

const up = (db) => db.raw(`
UPDATE blobs SET "contentType"='application/octet-stream' WHERE "contentType" IS NULL;
ALTER TABLE blobs
ALTER COLUMN "contentType" SET DEFAULT 'application/octet-stream',
ALTER COLUMN "contentType" SET NOT NULL
`);

const down = (db) => db.raw(`
ALTER TABLE blobs
ALTER COLUMN "contentType" DROP NOT NULL,
ALTER COLUMN "contentType" DROP DEFAULT
`);

module.exports = { up, down };
2 changes: 1 addition & 1 deletion lib/model/query/blobs.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ const { construct } = require('../../util/util');
const ensure = (blob) => ({ oneFirst }) => oneFirst(sql`
with ensured as
(insert into blobs (sha, md5, content, "contentType") values
(${blob.sha}, ${blob.md5}, ${sql.binary(blob.content)}, ${blob.contentType || null})
(${blob.sha}, ${blob.md5}, ${sql.binary(blob.content)}, ${blob.contentType || sql`DEFAULT`})
on conflict (sha) do update set sha = ${blob.sha}
returning id)
select id from ensured`);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
const assert = require('node:assert/strict');
const { hash, randomBytes } = require('node:crypto');

const { // eslint-disable-line object-curly-newline
assertTableContents,
describeMigration,
rowsExistFor,
} = require('./utils'); // eslint-disable-line object-curly-newline

describeMigration('20250113-01-disable-nullable-blob-content-types', ({ runMigrationBeingTested }) => {
const aBlobWith = props => {
const randomContent = randomBytes(100);
const md5 = hash('md5', randomContent); // eslint-disable-line no-multi-spaces
const sha = hash('sha1', randomContent);
return { md5, sha, ...props };
};
const aBlob = () => aBlobWith({});

const blob1 = aBlobWith({ contentType: null });
const blob2 = aBlobWith({ contentType: 'text/plain' });

before(async () => {
await rowsExistFor('blobs', blob1, blob2);

await runMigrationBeingTested();
});

it('should change existing NULL contentType values to application/octet-stream, and preserve non-NULL values', async () => {
await assertTableContents('blobs',
{ ...blob1, contentType: 'application/octet-stream' },
{ ...blob2, contentType: 'text/plain' },
);
});

it(`should create new blobs with contentType 'application/octet-stream' (contentType not supplied)`, async () => {
const { md5, sha } = aBlob();

const created = await db.oneFirst(sql`
INSERT INTO blobs (md5, sha)
VALUES(${md5}, ${sha})
RETURNING "contentType"
`);

assert.equal(created, 'application/octet-stream');
});

it(`should create new blobs with contentType 'application/octet-stream' (supplied DEFAULT contentType)`, async () => {
const { md5, sha } = aBlob();

const created = await db.oneFirst(sql`
INSERT INTO blobs (md5, sha, "contentType")
VALUES(${md5}, ${sha}, DEFAULT)
RETURNING "contentType"
`);

assert.equal(created, 'application/octet-stream');
});
});
63 changes: 63 additions & 0 deletions test/db-migrations/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,72 @@ function assertIncludes(actual, expected) {
}
}

async function rowsExistFor(tableName, ...rows) {
if (!rows.length) throw new Error(`Attempted to insert 0 rows into table ${tableName}`);

assertAllHaveSameProps(rows); // eslint-disable-line no-use-before-define
const colNames = Object.keys(rows[0]);
if (!colNames.length) throw new Error(`Attempted to insert data with 0 defined columns`);

const table = sql.identifier([tableName]);
const cols = sql.join(colNames.map(k => sql.identifier([k])), sql`,`);

return db.query(
sql`
INSERT INTO ${table} (${cols})
SELECT ${cols}
FROM JSON_POPULATE_RECORDSET(NULL::${table}, ${JSON.stringify(rows)})
`,
);
}

async function assertTableContents(tableName, ...expected) {
const { rows: actual } = await db.query(sql`SELECT * FROM ${sql.identifier([tableName])}`);

assert.equal(
actual.length,
expected.length,
`Unexpected number of rows in table '${tableName}'. ` +
`Expected ${expected.length} but got ${actual.length}. ` +
`DB returned: ${JSON.stringify(actual, null, 2)}`,
);

const remainingRows = [ ...actual ];
for (let i=0; i<expected.length; ++i) { // eslint-disable-line no-plusplus
const x = expected[i];
let found = false;
for (let j=0; j<remainingRows.length; ++j) { // eslint-disable-line no-plusplus
const rr = remainingRows[j];
try {
assertIncludes(rr, x);
remainingRows.splice(j, 1);
found = true;
break;
} catch (err) { /* keep searching */ }
}
if (!found) {
const filteredRemainingRows = remainingRows.map(r => _.pick(r, Object.keys(x)));
assert.fail(`Expected row ${i} not found in table '${tableName}':\n json=${JSON.stringify({ remainingRows, filteredRemainingRows, expectedRow: x })}`);
}
}
}

function assertAllHaveSameProps(list) {
if (list.length < 2) return;
const [ first, ...rest ] = list.map(Object.keys);

rest.forEach((v, i) => {
assert.deepEqual(v, first, `Row #${i+1} has different props to row #0. All supplied rows must have the same props.`);
});
}

module.exports = {
assertIndexExists,
assertTableContents,
assertTableDoesNotExist,
assertTableSchema,

describeMigration,

rowsExistFor,
};
4 changes: 1 addition & 3 deletions test/e2e/s3/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -379,9 +379,7 @@ describe('s3 support', () => {

const filepath = `${attDir}/${name}`;

// "null" is a questionable content-type, but matches current central behaviour
// See: https://github.com/getodk/central-backend/pull/1352
const expectedContentType = mimetypeFor(name) ?? 'null';
const expectedContentType = mimetypeFor(name) ?? 'application/octet-stream';

const actualContentType = res.headers.get('content-type');
should.equal(actualContentType, expectedContentType);
Expand Down
18 changes: 8 additions & 10 deletions test/integration/api/submissions.js
Original file line number Diff line number Diff line change
Expand Up @@ -4377,8 +4377,7 @@ one,h,/data/h,2000-01-01T00:06,2000-01-01T00:07,-5,-6,,ee,ff
body.toString().should.equal('testvideo');
})))))));

// Ref https://github.com/getodk/central-backend/issues/1351
it('should attach a given file with empty Content-Type', testService((service) =>
it('should attach a given file with empty Content-Type and serve it with default mime type', testService((service) =>
service.login('alice', (asAlice) =>
asAlice.post('/v1/projects/1/forms?publish=true')
.set('Content-Type', 'application/xml')
Expand All @@ -4394,13 +4393,12 @@ one,h,/data/h,2000-01-01T00:06,2000-01-01T00:07,-5,-6,,ee,ff
.expect(200)
.then(() => asAlice.get('/v1/projects/1/forms/binaryType/submissions/both/attachments/my_file1.mp4')
.expect(200)
.then(({ headers, text }) => {
headers['content-type'].should.equal('null');
text.toString().should.equal('testvideo'); // use 'text' instead of 'body' to avoid supertest response parsing
.then(({ headers, body }) => {
headers['content-type'].should.equal('application/octet-stream');
body.toString().should.equal('testvideo');
})))))));

// Ref https://github.com/getodk/central-backend/issues/1351
it('should attach a given file with missing Content-Type', testService((service) =>
it('should attach a given file with missing Content-Type and serve it with default mime type', testService((service) =>
service.login('alice', (asAlice) =>
asAlice.post('/v1/projects/1/forms?publish=true')
.set('Content-Type', 'application/xml')
Expand All @@ -4416,9 +4414,9 @@ one,h,/data/h,2000-01-01T00:06,2000-01-01T00:07,-5,-6,,ee,ff
.expect(200)
.then(() => asAlice.get('/v1/projects/1/forms/binaryType/submissions/both/attachments/my_file1.mp4')
.expect(200)
.then(({ headers, text }) => {
headers['content-type'].should.equal('null');
text.toString().should.equal('testvideo'); // use 'text' instead of 'body' to avoid supertest response parsing
.then(({ headers, body }) => {
headers['content-type'].should.equal('application/octet-stream');
body.toString().should.equal('testvideo');
})))))));

it('should log an audit entry about initial attachment', testService((service, { Audits, Forms, Submissions, SubmissionAttachments }) =>
Expand Down
2 changes: 1 addition & 1 deletion test/integration/task/s3.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ const aBlobExistsWith = async (container, { status }) => {
const blob = await Blob.fromBuffer(crypto.randomBytes(100));
await container.run(sql`
INSERT INTO BLOBS (sha, md5, content, "contentType", s3_status)
VALUES (${blob.sha}, ${blob.md5}, ${sql.binary(blob.content)}, ${blob.contentType || null}, ${status})
VALUES (${blob.sha}, ${blob.md5}, ${sql.binary(blob.content)}, ${blob.contentType || sql`DEFAULT`}, ${status})
`);
};

Expand Down

0 comments on commit dfe0269

Please sign in to comment.