Skip to content

Commit

Permalink
Add support for 'for' and 'until' in OAI-PHM ListRecords and ListIden…
Browse files Browse the repository at this point in the history
…tifiers verbs (#63)

* Add support for 'for' and 'until' in OAI-PHM ListRecords and ListIdentifiers verbs

* Revert to 'create_date' for OAI-PMH range requests, reduce hard-coded size in OAI search requests

* Bump the OAI handler timeout to 60 seconds

* Use create_date to get the earliestDatestamp value and modified_date in search verbs.
  • Loading branch information
bmquinn authored Jan 13, 2023
1 parent 4a1494b commit 95381c1
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 25 deletions.
43 changes: 41 additions & 2 deletions src/handlers/oai.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,23 @@ const allowedVerbs = [
"ListSets",
];

function invalidDateParameters(verb, dates) {
if (!["ListRecords", "ListIdentifiers"].includes(verb)) return [];

const regex = new RegExp(/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}Z$/);
let invalidDates = [];

for (const [dateParameter, dateValue] of Object.entries(dates)) {
if (dateValue && !regex.test(dateValue)) {
invalidDates.push(dateParameter);
} else {
continue;
}
}

return invalidDates;
}

/**
* A function to support the OAI-PMH harvesting specfication
*/
Expand All @@ -29,14 +46,24 @@ exports.handler = wrap(async (event) => {
identifier = event.queryStringParameters?.identifier;
metadataPrefix = event.queryStringParameters?.metadataPrefix;
resumptionToken = event.queryStringParameters?.resumptionToken;
from = event.queryStringParameters?.from;
until = event.queryStringParameters?.until;
} else {
const body = new URLSearchParams(event.body);
verb = body.get("verb");
identifier = body.get("identifier");
metadataPrefix = body.get("metadataPrefix");
resumptionToken = body.get("resumptionToken");
from = body.get("from");
until = body.get("until");
}

const dates = { from, until };
if (invalidDateParameters(verb, dates).length > 0)
return invalidOaiRequest(
"badArgument",
"Invalid date -- make sure that 'from' or 'until' parameters are formatted as: 'YYYY-MM-DDThh:mm:ss.ffffffZ'"
);
if (!verb) return invalidOaiRequest("badArgument", "Missing required verb");

switch (verb) {
Expand All @@ -45,11 +72,23 @@ exports.handler = wrap(async (event) => {
case "Identify":
return await identify(url);
case "ListIdentifiers":
return await listIdentifiers(url, event, metadataPrefix, resumptionToken);
return await listIdentifiers(
url,
event,
metadataPrefix,
dates,
resumptionToken
);
case "ListMetadataFormats":
return await listMetadataFormats(url);
case "ListRecords":
return await listRecords(url, event, metadataPrefix, resumptionToken);
return await listRecords(
url,
event,
metadataPrefix,
dates,
resumptionToken
);
case "ListSets":
return invalidOaiRequest(
"noSetHierarchy",
Expand Down
25 changes: 18 additions & 7 deletions src/handlers/oai/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@ const {
extractRequestedModels,
modelsToTargets,
} = require("../../api/request/models");
const fs = require("fs");

async function earliestRecordCreateDate() {
async function earliestRecord() {
const body = {
size: 1,
_source: "create_date",
Expand All @@ -18,7 +17,7 @@ async function earliestRecordCreateDate() {
],
},
},
sort: [{ create_date: "desc" }],
sort: [{ create_date: "asc" }],
};
const esResponse = await search(
modelsToTargets(extractRequestedModels()),
Expand All @@ -28,20 +27,32 @@ async function earliestRecordCreateDate() {
return responseBody?.hits?.hits[0]?._source?.create_date;
}

async function oaiSearch() {
async function oaiSearch(dates) {
let rangeQuery = { range: { modified_date: {} } };

if (dates.from) {
rangeQuery.range.modified_date.gt = dates.from;
}

if (dates.until) {
rangeQuery.range.modified_date.lt = dates.until;
}

const body = {
size: 5000,
size: 1000,
query: {
bool: {
must: [
{ term: { api_model: "Work" } },
{ term: { published: true } },
{ term: { visibility: "Public" } },
rangeQuery,
],
},
},
sort: [{ create_date: "desc" }],
sort: [{ modified_date: "asc" }],
};

const esResponse = await search(
modelsToTargets(extractRequestedModels()),
JSON.stringify(body),
Expand All @@ -53,4 +64,4 @@ async function oaiSearch() {
};
}

module.exports = { earliestRecordCreateDate, oaiSearch };
module.exports = { earliestRecord, oaiSearch };
30 changes: 21 additions & 9 deletions src/handlers/oai/verbs.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
const { invalidOaiRequest, output } = require("../oai/xml-transformer");
const { earliestRecordCreateDate, oaiSearch } = require("../oai/search");
const { earliestRecord, oaiSearch } = require("../oai/search");
const { deleteScroll, getWork, scroll } = require("../../api/opensearch");

const fieldMapper = {
Expand Down Expand Up @@ -92,7 +92,7 @@ const getRecord = async (url, id) => {
};

const identify = async (url) => {
let earliestDatestamp = await earliestRecordCreateDate();
let earliestDatestamp = await earliestRecord();
const obj = {
OAI_PMH: {
_attributes: oaiAttributes,
Expand All @@ -109,14 +109,20 @@ const identify = async (url) => {
protocolVersion: "2.0",
earliestDatestamp: earliestDatestamp,
deletedRecord: "no",
granularity: "YYYY-MM-DDThh:mm:ssZ",
granularity: "YYYY-MM-DDThh:mm:ss.ffffffZ",
},
},
};
return output(obj);
};

const listIdentifiers = async (url, event, metadataPrefix, resumptionToken) => {
const listIdentifiers = async (
url,
event,
metadataPrefix,
dates,
resumptionToken
) => {
if (!metadataPrefix) {
return invalidOaiRequest(
"badArgument",
Expand All @@ -126,14 +132,14 @@ const listIdentifiers = async (url, event, metadataPrefix, resumptionToken) => {
const response =
typeof resumptionToken === "string" && resumptionToken.length !== 0
? await scroll(resumptionToken)
: await oaiSearch();
: await oaiSearch(dates);
let headers = [];
let resumptionTokenElement;

if (response.statusCode == 200) {
const responseBody = JSON.parse(response.body);
let scrollId = responseBody._scroll_id;
const hits = responseBody.hits.hits;
let scrollId = responseBody._scroll_id;

if (hits.length === 0) {
await deleteScroll(scrollId);
Expand Down Expand Up @@ -178,7 +184,7 @@ const listIdentifiers = async (url, event, metadataPrefix, resumptionToken) => {
} else {
return invalidOaiRequest(
"badRequest",
"An error occurred processing the ListRecords request"
"An error occurred processing the ListIdentifiers request"
);
}
};
Expand Down Expand Up @@ -206,7 +212,13 @@ const listMetadataFormats = (url) => {
return output(obj);
};

const listRecords = async (url, event, metadataPrefix, resumptionToken) => {
const listRecords = async (
url,
event,
metadataPrefix,
dates,
resumptionToken
) => {
if (!metadataPrefix) {
return invalidOaiRequest(
"badArgument",
Expand All @@ -216,7 +228,7 @@ const listRecords = async (url, event, metadataPrefix, resumptionToken) => {
const response =
typeof resumptionToken === "string" && resumptionToken.length !== 0
? await scroll(resumptionToken)
: await oaiSearch();
: await oaiSearch(dates);
let records = [];
let resumptionTokenElement;

Expand Down
1 change: 1 addition & 0 deletions template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ Resources:
Properties:
Handler: handlers/oai.handler
Description: Transforms works into OAI Records.
Timeout: 60
Policies:
Version: 2012-10-17
Statement:
Expand Down
45 changes: 38 additions & 7 deletions test/integration/oai.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,38 @@ describe("Oai routes", () => {
.and.to.have.lengthOf(12);
});

it("validates 'from' and 'until' parameters", async () => {
const body =
"verb=ListRecords&metadataPrefix=oai_dc&from=INVALID_DATE&until=INVALID_DATE";
const event = helpers.mockEvent("POST", "/oai").body(body).render();
const result = await handler(event);
expect(result.statusCode).to.eq(400);
expect(result).to.have.header("content-type", /application\/xml/);
const resultBody = convert.xml2js(result.body, xmlOpts);
expect(resultBody.OAI_PMH.error["_attributes"]["code"]).to.eq(
"badArgument"
);
expect(resultBody.OAI_PMH.error["_text"]).to.eq(
"Invalid date -- make sure that 'from' or 'until' parameters are formatted as: 'YYYY-MM-DDThh:mm:ss.ffffffZ'"
);
});

it("supports 'from' and 'until' parameters in ListRecords and ListIdentifiers verbs", async () => {
const body =
"verb=ListRecords&metadataPrefix=oai_dc&from=2022-11-22T06:16:13.791570Z&until=2022-11-22T06:16:13.791572Z";
mock
.post("/dc-v2-work/_search?scroll=2m")
.reply(200, helpers.testFixture("mocks/scroll.json"));
const event = helpers.mockEvent("POST", "/oai").body(body).render();
const result = await handler(event);
expect(result.statusCode).to.eq(200);
expect(result).to.have.header("content-type", /application\/xml/);
const resultBody = convert.xml2js(result.body, xmlOpts);
expect(resultBody.OAI_PMH.ListRecords.record)
.to.be.an("array")
.and.to.have.lengthOf(12);
});

it("uses an empty resumptionToken to tell harvesters that list requests are complete", async () => {
mock
.post(
Expand Down Expand Up @@ -227,14 +259,11 @@ describe("Oai routes", () => {
],
},
},
sort: [{ create_date: "desc" }],
sort: [{ create_date: "asc" }],
};
mock
.post("/dc-v2-work/_search", query)
.reply(
200,
helpers.testFixture("mocks/search-earliest-create-date.json")
);
.reply(200, helpers.testFixture("mocks/search-earliest-record.json"));
const event = helpers
.mockEvent("GET", "/oai")
.queryParams({ verb: "Identify", metadataPrefix: "oai_dc" })
Expand All @@ -248,7 +277,9 @@ describe("Oai routes", () => {
"2022-11-22T20:36:00.581418Z"
);
expect(identifyElement.deletedRecord._text).to.eq("no");
expect(identifyElement.granularity._text).to.eq("YYYY-MM-DDThh:mm:ssZ");
expect(identifyElement.granularity._text).to.eq(
"YYYY-MM-DDThh:mm:ss.ffffffZ"
);
});

it("supports the ListRecords verb", async () => {
Expand Down Expand Up @@ -399,7 +430,7 @@ describe("Oai routes", () => {
"badRequest"
);
expect(resultBody.OAI_PMH.error["_text"]).to.eq(
"An error occurred processing the ListRecords request"
"An error occurred processing the ListIdentifiers request"
);
});

Expand Down

0 comments on commit 95381c1

Please sign in to comment.