Skip to content

Commit

Permalink
Add support for 'for' and 'until' in OAI-PHM ListRecords and ListIden…
Browse files Browse the repository at this point in the history
…tifiers verbs
  • Loading branch information
bmquinn committed Jan 12, 2023
1 parent 51b2211 commit 4f69eae
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 23 deletions.
43 changes: 41 additions & 2 deletions src/handlers/oai.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,23 @@ const allowedVerbs = [
"ListSets",
];

function invalidDateParameters(verb, dates) {
if (!["ListRecords", "ListIdentifiers"].includes(verb)) return [];

const regex = new RegExp(/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}$/);
let invalidDates = [];

for (const [dateParameter, dateValue] of Object.entries(dates)) {
if (dateValue && !regex.test(dateValue)) {
invalidDates.push(dateParameter);
} else {
continue;
}
}

return invalidDates;
}

/**
* A function to support the OAI-PMH harvesting specfication
*/
Expand All @@ -29,14 +46,24 @@ exports.handler = wrap(async (event) => {
identifier = event.queryStringParameters?.identifier;
metadataPrefix = event.queryStringParameters?.metadataPrefix;
resumptionToken = event.queryStringParameters?.resumptionToken;
from = event.queryStringParameters?.from;
until = event.queryStringParameters?.until;
} else {
const body = new URLSearchParams(event.body);
verb = body.get("verb");
identifier = body.get("identifier");
metadataPrefix = body.get("metadataPrefix");
resumptionToken = body.get("resumptionToken");
from = body.get("from");
until = body.get("until");
}

const dates = { from, until };
if (invalidDateParameters(verb, dates).length > 0)
return invalidOaiRequest(
"badArgument",
"Invalid date -- make sure that 'from' or 'until' parameters are formatted as: 'YYYY-MM-DDThh:mm:ss.ffffff'"
);
if (!verb) return invalidOaiRequest("badArgument", "Missing required verb");

switch (verb) {
Expand All @@ -45,11 +72,23 @@ exports.handler = wrap(async (event) => {
case "Identify":
return await identify(url);
case "ListIdentifiers":
return await listIdentifiers(url, event, metadataPrefix, resumptionToken);
return await listIdentifiers(
url,
event,
metadataPrefix,
dates,
resumptionToken
);
case "ListMetadataFormats":
return await listMetadataFormats(url);
case "ListRecords":
return await listRecords(url, event, metadataPrefix, resumptionToken);
return await listRecords(
url,
event,
metadataPrefix,
dates,
resumptionToken
);
case "ListSets":
return invalidOaiRequest(
"noSetHierarchy",
Expand Down
27 changes: 19 additions & 8 deletions src/handlers/oai/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@ const {
extractRequestedModels,
modelsToTargets,
} = require("../../api/request/models");
const fs = require("fs");

async function earliestRecordCreateDate() {
async function earliestRecord() {
const body = {
size: 1,
_source: "create_date",
_source: "indexed_at",
query: {
bool: {
must: [
Expand All @@ -18,17 +17,27 @@ async function earliestRecordCreateDate() {
],
},
},
sort: [{ create_date: "desc" }],
sort: [{ indexed_at: "asc" }],
};
const esResponse = await search(
modelsToTargets(extractRequestedModels()),
JSON.stringify(body)
);
const responseBody = JSON.parse(esResponse.body);
return responseBody?.hits?.hits[0]?._source?.create_date;
return responseBody?.hits?.hits[0]?._source?.indexed_at;
}

async function oaiSearch() {
async function oaiSearch(dates) {
let rangeQuery = { range: { indexed_at: {} } };

if (dates.from) {
rangeQuery.range.indexed_at.gt = dates.from;
}

if (dates.until) {
rangeQuery.range.indexed_at.lt = dates.until;
}

const body = {
size: 5000,
query: {
Expand All @@ -37,11 +46,13 @@ async function oaiSearch() {
{ term: { api_model: "Work" } },
{ term: { published: true } },
{ term: { visibility: "Public" } },
rangeQuery,
],
},
},
sort: [{ create_date: "desc" }],
sort: [{ indexed_at: "asc" }],
};

const esResponse = await search(
modelsToTargets(extractRequestedModels()),
JSON.stringify(body),
Expand All @@ -53,4 +64,4 @@ async function oaiSearch() {
};
}

module.exports = { earliestRecordCreateDate, oaiSearch };
module.exports = { earliestRecord, oaiSearch };
26 changes: 19 additions & 7 deletions src/handlers/oai/verbs.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
const { invalidOaiRequest, output } = require("../oai/xml-transformer");
const { earliestRecordCreateDate, oaiSearch } = require("../oai/search");
const { earliestRecord, oaiSearch } = require("../oai/search");
const { deleteScroll, getWork, scroll } = require("../../api/opensearch");

const fieldMapper = {
Expand Down Expand Up @@ -92,7 +92,7 @@ const getRecord = async (url, id) => {
};

const identify = async (url) => {
let earliestDatestamp = await earliestRecordCreateDate();
let earliestDatestamp = await earliestRecord();
const obj = {
OAI_PMH: {
_attributes: oaiAttributes,
Expand All @@ -109,14 +109,20 @@ const identify = async (url) => {
protocolVersion: "2.0",
earliestDatestamp: earliestDatestamp,
deletedRecord: "no",
granularity: "YYYY-MM-DDThh:mm:ssZ",
granularity: "YYYY-MM-DDThh:mm:ss.ffffff",
},
},
};
return output(obj);
};

const listIdentifiers = async (url, event, metadataPrefix, resumptionToken) => {
const listIdentifiers = async (
url,
event,
metadataPrefix,
dates,
resumptionToken
) => {
if (!metadataPrefix) {
return invalidOaiRequest(
"badArgument",
Expand All @@ -126,7 +132,7 @@ const listIdentifiers = async (url, event, metadataPrefix, resumptionToken) => {
const response =
typeof resumptionToken === "string" && resumptionToken.length !== 0
? await scroll(resumptionToken)
: await oaiSearch();
: await oaiSearch(dates);
let headers = [];
let resumptionTokenElement;

Expand Down Expand Up @@ -206,7 +212,13 @@ const listMetadataFormats = (url) => {
return output(obj);
};

const listRecords = async (url, event, metadataPrefix, resumptionToken) => {
const listRecords = async (
url,
event,
metadataPrefix,
dates,
resumptionToken
) => {
if (!metadataPrefix) {
return invalidOaiRequest(
"badArgument",
Expand All @@ -216,7 +228,7 @@ const listRecords = async (url, event, metadataPrefix, resumptionToken) => {
const response =
typeof resumptionToken === "string" && resumptionToken.length !== 0
? await scroll(resumptionToken)
: await oaiSearch();
: await oaiSearch(dates);
let records = [];
let resumptionTokenElement;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"_id": "559ca7fb-55d1-45dc-9d8d-bd2ae2de6ae5",
"_score": null,
"_source": {
"create_date": "2022-11-22T20:36:00.581418Z"
"indexed_at": "2022-11-22T20:36:00.581418"
},
"sort": [1669149360581]
}
Expand Down
44 changes: 39 additions & 5 deletions test/integration/oai.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,38 @@ describe("Oai routes", () => {
.and.to.have.lengthOf(12);
});

it("validates 'from' and 'until' parameters", async () => {
const body =
"verb=ListRecords&metadataPrefix=oai_dc&from=INVALID_DATE&until=INVALID_DATE";
const event = helpers.mockEvent("POST", "/oai").body(body).render();
const result = await handler(event);
expect(result.statusCode).to.eq(400);
expect(result).to.have.header("content-type", /application\/xml/);
const resultBody = convert.xml2js(result.body, xmlOpts);
expect(resultBody.OAI_PMH.error["_attributes"]["code"]).to.eq(
"badArgument"
);
expect(resultBody.OAI_PMH.error["_text"]).to.eq(
"Invalid date -- make sure that 'from' or 'until' parameters are formatted as: 'YYYY-MM-DDThh:mm:ss.ffffff'"
);
});

it("supports 'from' and 'until' parameters in ListRecords and ListIdentifiers verbs", async () => {
const body =
"verb=ListRecords&metadataPrefix=oai_dc&from=2022-11-22T06:16:13.791570&until=2022-11-22T06:16:13.791572";
mock
.post("/dc-v2-work/_search?scroll=2m")
.reply(200, helpers.testFixture("mocks/scroll.json"));
const event = helpers.mockEvent("POST", "/oai").body(body).render();
const result = await handler(event);
expect(result.statusCode).to.eq(200);
expect(result).to.have.header("content-type", /application\/xml/);
const resultBody = convert.xml2js(result.body, xmlOpts);
expect(resultBody.OAI_PMH.ListRecords.record)
.to.be.an("array")
.and.to.have.lengthOf(12);
});

it("uses an empty resumptionToken to tell harvesters that list requests are complete", async () => {
mock
.post(
Expand Down Expand Up @@ -217,7 +249,7 @@ describe("Oai routes", () => {
it("supports the Identify verb", async () => {
const query = {
size: 1,
_source: "create_date",
_source: "indexed_at",
query: {
bool: {
must: [
Expand All @@ -227,13 +259,13 @@ describe("Oai routes", () => {
],
},
},
sort: [{ create_date: "desc" }],
sort: [{ indexed_at: "asc" }],
};
mock
.post("/dc-v2-work/_search", query)
.reply(
200,
helpers.testFixture("mocks/search-earliest-create-date.json")
helpers.testFixture("mocks/search-earliest-indexed-at.json")
);
const event = helpers
.mockEvent("GET", "/oai")
Expand All @@ -245,10 +277,12 @@ describe("Oai routes", () => {
const resultBody = convert.xml2js(result.body, xmlOpts);
const identifyElement = resultBody.OAI_PMH.Identify;
expect(identifyElement.earliestDatestamp._text).to.eq(
"2022-11-22T20:36:00.581418Z"
"2022-11-22T20:36:00.581418"
);
expect(identifyElement.deletedRecord._text).to.eq("no");
expect(identifyElement.granularity._text).to.eq("YYYY-MM-DDThh:mm:ssZ");
expect(identifyElement.granularity._text).to.eq(
"YYYY-MM-DDThh:mm:ss.ffffff"
);
});

it("supports the ListRecords verb", async () => {
Expand Down

0 comments on commit 4f69eae

Please sign in to comment.