diff --git a/src/handlers/oai.js b/src/handlers/oai.js index 382571f1..ee89c703 100644 --- a/src/handlers/oai.js +++ b/src/handlers/oai.js @@ -18,6 +18,23 @@ const allowedVerbs = [ "ListSets", ]; +function invalidDateParameters(verb, dates) { + if (!["ListRecords", "ListIdentifiers"].includes(verb)) return []; + + const regex = new RegExp(/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}Z$/); + let invalidDates = []; + + for (const [dateParameter, dateValue] of Object.entries(dates)) { + if (dateValue && !regex.test(dateValue)) { + invalidDates.push(dateParameter); + } else { + continue; + } + } + + return invalidDates; +} + /** * A function to support the OAI-PMH harvesting specfication */ @@ -29,14 +46,24 @@ exports.handler = wrap(async (event) => { identifier = event.queryStringParameters?.identifier; metadataPrefix = event.queryStringParameters?.metadataPrefix; resumptionToken = event.queryStringParameters?.resumptionToken; + from = event.queryStringParameters?.from; + until = event.queryStringParameters?.until; } else { const body = new URLSearchParams(event.body); verb = body.get("verb"); identifier = body.get("identifier"); metadataPrefix = body.get("metadataPrefix"); resumptionToken = body.get("resumptionToken"); + from = body.get("from"); + until = body.get("until"); } + const dates = { from, until }; + if (invalidDateParameters(verb, dates).length > 0) + return invalidOaiRequest( + "badArgument", + "Invalid date -- make sure that 'from' or 'until' parameters are formatted as: 'YYYY-MM-DDThh:mm:ss.ffffffZ'" + ); if (!verb) return invalidOaiRequest("badArgument", "Missing required verb"); switch (verb) { @@ -45,11 +72,23 @@ exports.handler = wrap(async (event) => { case "Identify": return await identify(url); case "ListIdentifiers": - return await listIdentifiers(url, event, metadataPrefix, resumptionToken); + return await listIdentifiers( + url, + event, + metadataPrefix, + dates, + resumptionToken + ); case "ListMetadataFormats": return await listMetadataFormats(url); case "ListRecords": - return await listRecords(url, event, metadataPrefix, resumptionToken); + return await listRecords( + url, + event, + metadataPrefix, + dates, + resumptionToken + ); case "ListSets": return invalidOaiRequest( "noSetHierarchy", diff --git a/src/handlers/oai/search.js b/src/handlers/oai/search.js index 3dee1870..b542d6cf 100644 --- a/src/handlers/oai/search.js +++ b/src/handlers/oai/search.js @@ -3,9 +3,8 @@ const { extractRequestedModels, modelsToTargets, } = require("../../api/request/models"); -const fs = require("fs"); -async function earliestRecordCreateDate() { +async function earliestRecord() { const body = { size: 1, _source: "create_date", @@ -18,7 +17,7 @@ async function earliestRecordCreateDate() { ], }, }, - sort: [{ create_date: "desc" }], + sort: [{ create_date: "asc" }], }; const esResponse = await search( modelsToTargets(extractRequestedModels()), @@ -28,20 +27,32 @@ async function earliestRecordCreateDate() { return responseBody?.hits?.hits[0]?._source?.create_date; } -async function oaiSearch() { +async function oaiSearch(dates) { + let rangeQuery = { range: { modified_date: {} } }; + + if (dates.from) { + rangeQuery.range.modified_date.gt = dates.from; + } + + if (dates.until) { + rangeQuery.range.modified_date.lt = dates.until; + } + const body = { - size: 5000, + size: 1000, query: { bool: { must: [ { term: { api_model: "Work" } }, { term: { published: true } }, { term: { visibility: "Public" } }, + rangeQuery, ], }, }, - sort: [{ create_date: "desc" }], + sort: [{ modified_date: "asc" }], }; + const esResponse = await search( modelsToTargets(extractRequestedModels()), JSON.stringify(body), @@ -53,4 +64,4 @@ async function oaiSearch() { }; } -module.exports = { earliestRecordCreateDate, oaiSearch }; +module.exports = { earliestRecord, oaiSearch }; diff --git a/src/handlers/oai/verbs.js b/src/handlers/oai/verbs.js index 8aa07c8c..bc947728 100644 --- a/src/handlers/oai/verbs.js +++ b/src/handlers/oai/verbs.js @@ -1,5 +1,5 @@ const { invalidOaiRequest, output } = require("../oai/xml-transformer"); -const { earliestRecordCreateDate, oaiSearch } = require("../oai/search"); +const { earliestRecord, oaiSearch } = require("../oai/search"); const { deleteScroll, getWork, scroll } = require("../../api/opensearch"); const fieldMapper = { @@ -92,7 +92,7 @@ const getRecord = async (url, id) => { }; const identify = async (url) => { - let earliestDatestamp = await earliestRecordCreateDate(); + let earliestDatestamp = await earliestRecord(); const obj = { OAI_PMH: { _attributes: oaiAttributes, @@ -109,14 +109,20 @@ const identify = async (url) => { protocolVersion: "2.0", earliestDatestamp: earliestDatestamp, deletedRecord: "no", - granularity: "YYYY-MM-DDThh:mm:ssZ", + granularity: "YYYY-MM-DDThh:mm:ss.ffffffZ", }, }, }; return output(obj); }; -const listIdentifiers = async (url, event, metadataPrefix, resumptionToken) => { +const listIdentifiers = async ( + url, + event, + metadataPrefix, + dates, + resumptionToken +) => { if (!metadataPrefix) { return invalidOaiRequest( "badArgument", @@ -126,14 +132,14 @@ const listIdentifiers = async (url, event, metadataPrefix, resumptionToken) => { const response = typeof resumptionToken === "string" && resumptionToken.length !== 0 ? await scroll(resumptionToken) - : await oaiSearch(); + : await oaiSearch(dates); let headers = []; let resumptionTokenElement; if (response.statusCode == 200) { const responseBody = JSON.parse(response.body); - let scrollId = responseBody._scroll_id; const hits = responseBody.hits.hits; + let scrollId = responseBody._scroll_id; if (hits.length === 0) { await deleteScroll(scrollId); @@ -178,7 +184,7 @@ const listIdentifiers = async (url, event, metadataPrefix, resumptionToken) => { } else { return invalidOaiRequest( "badRequest", - "An error occurred processing the ListRecords request" + "An error occurred processing the ListIdentifiers request" ); } }; @@ -206,7 +212,13 @@ const listMetadataFormats = (url) => { return output(obj); }; -const listRecords = async (url, event, metadataPrefix, resumptionToken) => { +const listRecords = async ( + url, + event, + metadataPrefix, + dates, + resumptionToken +) => { if (!metadataPrefix) { return invalidOaiRequest( "badArgument", @@ -216,7 +228,7 @@ const listRecords = async (url, event, metadataPrefix, resumptionToken) => { const response = typeof resumptionToken === "string" && resumptionToken.length !== 0 ? await scroll(resumptionToken) - : await oaiSearch(); + : await oaiSearch(dates); let records = []; let resumptionTokenElement; diff --git a/template.yaml b/template.yaml index ce701096..9d9e5a67 100644 --- a/template.yaml +++ b/template.yaml @@ -370,6 +370,7 @@ Resources: Properties: Handler: handlers/oai.handler Description: Transforms works into OAI Records. + Timeout: 60 Policies: Version: 2012-10-17 Statement: diff --git a/test/fixtures/mocks/search-earliest-create-date.json b/test/fixtures/mocks/search-earliest-record.json similarity index 100% rename from test/fixtures/mocks/search-earliest-create-date.json rename to test/fixtures/mocks/search-earliest-record.json diff --git a/test/integration/oai.test.js b/test/integration/oai.test.js index adc0bec8..93305e2d 100644 --- a/test/integration/oai.test.js +++ b/test/integration/oai.test.js @@ -95,6 +95,38 @@ describe("Oai routes", () => { .and.to.have.lengthOf(12); }); + it("validates 'from' and 'until' parameters", async () => { + const body = + "verb=ListRecords&metadataPrefix=oai_dc&from=INVALID_DATE&until=INVALID_DATE"; + const event = helpers.mockEvent("POST", "/oai").body(body).render(); + const result = await handler(event); + expect(result.statusCode).to.eq(400); + expect(result).to.have.header("content-type", /application\/xml/); + const resultBody = convert.xml2js(result.body, xmlOpts); + expect(resultBody.OAI_PMH.error["_attributes"]["code"]).to.eq( + "badArgument" + ); + expect(resultBody.OAI_PMH.error["_text"]).to.eq( + "Invalid date -- make sure that 'from' or 'until' parameters are formatted as: 'YYYY-MM-DDThh:mm:ss.ffffffZ'" + ); + }); + + it("supports 'from' and 'until' parameters in ListRecords and ListIdentifiers verbs", async () => { + const body = + "verb=ListRecords&metadataPrefix=oai_dc&from=2022-11-22T06:16:13.791570Z&until=2022-11-22T06:16:13.791572Z"; + mock + .post("/dc-v2-work/_search?scroll=2m") + .reply(200, helpers.testFixture("mocks/scroll.json")); + const event = helpers.mockEvent("POST", "/oai").body(body).render(); + const result = await handler(event); + expect(result.statusCode).to.eq(200); + expect(result).to.have.header("content-type", /application\/xml/); + const resultBody = convert.xml2js(result.body, xmlOpts); + expect(resultBody.OAI_PMH.ListRecords.record) + .to.be.an("array") + .and.to.have.lengthOf(12); + }); + it("uses an empty resumptionToken to tell harvesters that list requests are complete", async () => { mock .post( @@ -227,14 +259,11 @@ describe("Oai routes", () => { ], }, }, - sort: [{ create_date: "desc" }], + sort: [{ create_date: "asc" }], }; mock .post("/dc-v2-work/_search", query) - .reply( - 200, - helpers.testFixture("mocks/search-earliest-create-date.json") - ); + .reply(200, helpers.testFixture("mocks/search-earliest-record.json")); const event = helpers .mockEvent("GET", "/oai") .queryParams({ verb: "Identify", metadataPrefix: "oai_dc" }) @@ -248,7 +277,9 @@ describe("Oai routes", () => { "2022-11-22T20:36:00.581418Z" ); expect(identifyElement.deletedRecord._text).to.eq("no"); - expect(identifyElement.granularity._text).to.eq("YYYY-MM-DDThh:mm:ssZ"); + expect(identifyElement.granularity._text).to.eq( + "YYYY-MM-DDThh:mm:ss.ffffffZ" + ); }); it("supports the ListRecords verb", async () => { @@ -399,7 +430,7 @@ describe("Oai routes", () => { "badRequest" ); expect(resultBody.OAI_PMH.error["_text"]).to.eq( - "An error occurred processing the ListRecords request" + "An error occurred processing the ListIdentifiers request" ); });