Skip to content

Commit

Permalink
Merge pull request #6441 from IQSS/6300-additional-dataset-metadata-v…
Browse files Browse the repository at this point in the history
…ia-search-api

6300/6396 additional dataset metadata via search api
  • Loading branch information
kcondon authored Dec 11, 2019
2 parents 751ca19 + 6851c28 commit 2573b2c
Show file tree
Hide file tree
Showing 6 changed files with 286 additions and 6 deletions.
13 changes: 13 additions & 0 deletions doc/release-notes/6300-6396-search-api.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
## Major Use Cases

Newly-supported use cases in this release include:

- Search API users will see additional fields in the JSON output #6300 #6396

## Notes for Tool Developers and Integrators

### Search API

The boolean parameter `query_entities` has been removed from the Search API.

The former "true" behavior of "whether entities are queried via direct database calls (for developer use)" is now always true.
47 changes: 43 additions & 4 deletions doc/sphinx-guides/source/api/search.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ show_relevance boolean Whether or not to show details of which fields were ma
show_facets boolean Whether or not to show facets that can be operated on by the "fq" parameter. False by default. See :ref:`advanced search example <advancedsearch-example>`.
fq string A filter query on the search term. Multiple "fq" parameters can be used. See :ref:`advanced search example <advancedsearch-example>`.
show_entity_ids boolean Whether or not to show the database IDs of the search results (for developer use).
query_entities boolean Whether entities are queried via direct database calls (for developer use).
=============== ======= ===========

Basic Search Example
Expand All @@ -49,7 +48,7 @@ https://demo.dataverse.org/api/search?q=trees
"status":"OK",
"data":{
"q":"trees",
"total_count":4,
"total_count":5,
"start":0,
"spelling_alternatives":{
"trees":"[tree]"
Expand Down Expand Up @@ -99,9 +98,49 @@ https://demo.dataverse.org/api/search?q=trees
"identifier":"birds",
"description":"A bird dataverse with some trees",
"published_at":"2016-05-10T12:57:27Z"
}
},
{
"name":"Darwin's Finches",
"type":"dataset",
"url":"https://doi.org/10.70122/FK2/MB5VGR",
"global_id":"doi:10.70122/FK2/MB5VGR",
"description":"Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.",
"published_at":"2019-12-11T15:26:10Z",
"publisher":"dvbe69f5e1",
"citationHtml":"Finch, Fiona; Spruce, Sabrina; Poe, Edgar Allen; Mulligan, Hercules, 2019, \"Darwin's Finches\", <a href=\"https://doi.org/10.70122/FK2/MB5VGR\" target=\"_blank\">https://doi.org/10.70122/FK2/MB5VGR</a>, Root, V3",
"identifier_of_dataverse":"dvbe69f5e1",
"name_of_dataverse":"dvbe69f5e1",
"citation":"Finch, Fiona; Spruce, Sabrina; Poe, Edgar Allen; Mulligan, Hercules, 2019, \"Darwin's Finches\", https://doi.org/10.70122/FK2/MB5VGR, Root, V3",
"storageIdentifier":"file://10.70122/FK2/MB5VGR",
"subjects":[
"Astronomy and Astrophysics",
"Other"
],
"versionId":1260,
"versionState":"RELEASED",
"majorVersion":3,
"minorVersion":0,
"createdAt":"2019-09-20T18:08:29Z",
"updatedAt":"2019-12-11T15:26:10Z",
"contacts":[
{
"name":"Finch, Fiona",
"affiliation":""
}
],
"producers":[
"Allen, Irwin",
"Spielberg, Stephen"
],
"authors":[
"Finch, Fiona",
"Spruce, Sabrina",
"Poe, Edgar Allen",
"Mulligan, Hercules"
]
}
],
"count_in_response":4
"count_in_response":5
}
}
Expand Down
58 changes: 58 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java
Original file line number Diff line number Diff line change
Expand Up @@ -1130,6 +1130,62 @@ public List<String> getKeywords() {
return getCompoundChildFieldValues(DatasetFieldConstant.keyword, DatasetFieldConstant.keywordValue);
}

public List<String> getRelatedMaterial() {
List<String> relMaterial = new ArrayList<>();
for (DatasetField dsf : this.getDatasetFields()) {
if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.relatedMaterial)) {
relMaterial.addAll(dsf.getValues());
}
}
return relMaterial;
}

public List<String> getDataSource() {
List<String> dataSources = new ArrayList<>();
for (DatasetField dsf : this.getDatasetFields()) {
if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.dataSources)) {
dataSources.addAll(dsf.getValues());
}
}
return dataSources;
}

public List<String[]> getGeographicCoverage() {
List<String[]> geoCoverages = new ArrayList<>();

for (DatasetField dsf : this.getDatasetFields()) {
if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.geographicCoverage)) {
for (DatasetFieldCompoundValue geoCoverage : dsf.getDatasetFieldCompoundValues()) {
String country = null;
String state = null;
String city = null;
String other = null;
String[] coverageItem = null;
for (DatasetField subField : geoCoverage.getChildDatasetFields()) {
if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.country)) {
country = subField.getDisplayValue();
}
if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.state)) {
state = subField.getDisplayValue();
}
if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.city)) {
city = subField.getDisplayValue();
}
if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.otherGeographicCoverage)) {
other = subField.getDisplayValue();
}

coverageItem = new String[]{country, state, city, other};
}
geoCoverages.add(coverageItem);
}

}
}
return geoCoverages;
}


public List<DatasetRelPublication> getRelatedPublications() {
List<DatasetRelPublication> relatedPublications = new ArrayList<>();
for (DatasetField dsf : this.getDatasetFields()) {
Expand All @@ -1141,6 +1197,8 @@ public List<DatasetRelPublication> getRelatedPublications() {
String citation = subField.getDisplayValue();
relatedPublication.setText(citation);
}


if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationURL)) {
// We have to avoid using subField.getDisplayValue() here - because the DisplayFormatType
// for this url metadata field is likely set up so that the display value is automatically
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/edu/harvard/iq/dataverse/api/Search.java
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ public Response search(
paginationStart,
dataRelatedToMe,
numResultsPerPage,
queryEntities
true //SEK get query entities always for search API additional Dataset Information 6300 12/6/2019
);
} catch (SearchException ex) {
Throwable cause = ex;
Expand Down
102 changes: 101 additions & 1 deletion src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetRelPublication;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.DvObject;
import edu.harvard.iq.dataverse.api.Util;
import edu.harvard.iq.dataverse.dataset.DatasetThumbnail;
Expand Down Expand Up @@ -437,7 +439,7 @@ public JsonObjectBuilder getJsonForMyData() {
.add("parentName", this.getParent().get("name"));
}
}

return myDataJson;
} //getJsonForMydata

Expand All @@ -457,6 +459,8 @@ public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, bool
String filePersistentId = null;
String preferredUrl = null;
String apiUrl = null;
String publisherName = null;


if (this.type.equals(SearchConstants.DATAVERSES)) {
displayName = this.name;
Expand All @@ -466,6 +470,8 @@ public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, bool
displayName = this.title;
identifierLabel = "global_id";
preferredUrl = getPersistentUrl();
publisherName = this.parent.get("name");
// if
/**
* @todo Should we show the name of the parent dataverse?
*/
Expand Down Expand Up @@ -534,6 +540,7 @@ public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, bool
.add("file_persistent_id", this.filePersistentId)
.add("dataset_name", datasetName)
.add("dataset_id", datasetId)
.add("publisher", publisherName)
.add("dataset_persistent_id", datasetPersistentId)
.add("dataset_citation", datasetCitation)
.add("deaccession_reason", this.deaccessionReason)
Expand All @@ -552,6 +559,99 @@ public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, bool
}
}

if (this.entity == null) {

} else {
if (this.entity.isInstanceofDataset()) {
nullSafeJsonBuilder.add("storageIdentifier", this.entity.getStorageIdentifier());
Dataset ds = (Dataset) this.entity;
DatasetVersion dv;
if (this.isDraftState()) {
dv = ds.getLatestVersion();
} else {
dv = ds.getReleasedVersion();
}

if (!dv.getKeywords().isEmpty()) {
JsonArrayBuilder keyWords = Json.createArrayBuilder();
for (String keyword : dv.getKeywords()) {
keyWords.add(keyword);
}
nullSafeJsonBuilder.add("keywords", keyWords);
}
JsonArrayBuilder subjects = Json.createArrayBuilder();
for (String subject : dv.getDatasetSubjects()) {
subjects.add(subject);
}
nullSafeJsonBuilder.add("subjects", subjects);
nullSafeJsonBuilder.add("versionId", dv.getId());
nullSafeJsonBuilder.add("versionState", dv.getVersionState().toString());
if(this.isPublishedState()){
nullSafeJsonBuilder.add("majorVersion", dv.getVersionNumber());
nullSafeJsonBuilder.add("minorVersion", dv.getMinorVersionNumber());
}
nullSafeJsonBuilder.add("createdAt", ds.getCreateDate());
nullSafeJsonBuilder.add("updatedAt", ds.getModificationTime());

if (!dv.getDatasetContacts().isEmpty()) {
JsonArrayBuilder contacts = Json.createArrayBuilder();
NullSafeJsonBuilder nullSafeJsonBuilderInner = jsonObjectBuilder();
for (String contact[] : dv.getDatasetContacts()) {
nullSafeJsonBuilderInner.add("name", contact[0]);
nullSafeJsonBuilderInner.add("affiliation", contact[1]);
contacts.add(nullSafeJsonBuilderInner);
}
nullSafeJsonBuilder.add("contacts", contacts);
}
if(!dv.getRelatedPublications().isEmpty()){
JsonArrayBuilder relPub = Json.createArrayBuilder();
NullSafeJsonBuilder inner = jsonObjectBuilder();
for (DatasetRelPublication dsRelPub : dv.getRelatedPublications()) {
inner.add("title", dsRelPub.getTitle());
inner.add("citation", dsRelPub.getText());
inner.add("url", dsRelPub.getUrl());
relPub.add(inner);
}
nullSafeJsonBuilder.add("publications", relPub);
}

if (!dv.getDatasetProducers().isEmpty()) {
JsonArrayBuilder producers = Json.createArrayBuilder();
for (String[] producer : dv.getDatasetProducers()) {
producers.add(producer[0]);
}
nullSafeJsonBuilder.add("producers", producers);
}
if (!dv.getRelatedMaterial().isEmpty()) {
JsonArrayBuilder relatedMaterials = Json.createArrayBuilder();
for (String relatedMaterial : dv.getRelatedMaterial()) {
relatedMaterials.add(relatedMaterial);
}
nullSafeJsonBuilder.add("relatedMaterial", relatedMaterials);
}

if (!dv.getGeographicCoverage().isEmpty()) {
JsonArrayBuilder geoCov = Json.createArrayBuilder();
NullSafeJsonBuilder inner = jsonObjectBuilder();
for (String ind[] : dv.getGeographicCoverage()) {
inner.add("country", ind[0]);
inner.add("state", ind[1]);
inner.add("city", ind[2]);
inner.add("other", ind[3]);
geoCov.add(inner);
}
nullSafeJsonBuilder.add("geographicCoverage", geoCov);
}
if (!dv.getDataSource().isEmpty()) {
JsonArrayBuilder dataSources = Json.createArrayBuilder();
for (String dsource : dv.getDataSource()) {
dataSources.add(dsource);
}
nullSafeJsonBuilder.add("dataSources", dataSources);
}
}
}

if (showApiUrls) {
/**
* @todo We should probably have a metadata_url or api_url concept
Expand Down
70 changes: 70 additions & 0 deletions src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import static java.lang.Thread.sleep;
import static javax.ws.rs.core.Response.Status.CREATED;
import static javax.ws.rs.core.Response.Status.NOT_FOUND;
import static javax.ws.rs.core.Response.Status.OK;
import static javax.ws.rs.core.Response.Status.UNAUTHORIZED;
import org.hamcrest.Matchers;
import org.junit.After;
Expand Down Expand Up @@ -200,7 +201,76 @@ public void testSearchCitation() {
assertEquals(200, deleteUserResponse.getStatusCode());

}

@Test
public void testAdditionalDatasetContent6300() {

Response createUser = UtilIT.createRandomUser();
createUser.prettyPrint();
String username = UtilIT.getUsernameFromResponse(createUser);
String apiToken = UtilIT.getApiTokenFromResponse(createUser);

Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
createDataverseResponse.prettyPrint();
String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);

Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
createDatasetResponse.prettyPrint();
Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse);

Response datasetAsJson = UtilIT.nativeGet(datasetId, apiToken);
datasetAsJson.then().assertThat()
.statusCode(OK.getStatusCode());

String identifier = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.identifier");

Response getDatasetJsonBeforePublishing = UtilIT.nativeGet(datasetId, apiToken);
getDatasetJsonBeforePublishing.prettyPrint();
String protocol = JsonPath.from(getDatasetJsonBeforePublishing.getBody().asString()).getString("data.protocol");
String authority = JsonPath.from(getDatasetJsonBeforePublishing.getBody().asString()).getString("data.authority");

String datasetPersistentId = protocol + ":" + authority + "/" + identifier;
String pathToJsonFile = "doc/sphinx-guides/source/_static/api/dataset-add-metadata.json";
Response addSubjectViaNative = UtilIT.addDatasetMetadataViaNative(datasetPersistentId, pathToJsonFile, apiToken);
addSubjectViaNative.prettyPrint();
addSubjectViaNative.then().assertThat()
.statusCode(OK.getStatusCode());

Response searchResponse = UtilIT.search("id:dataset_" + datasetId + "_draft", apiToken);
searchResponse.prettyPrint();
/*["Astronomy and Astrophysics"]*/
assertTrue(searchResponse.body().jsonPath().getString("data.items[0].subjects").contains("Astronomy and Astrophysics"));
assertTrue(searchResponse.body().jsonPath().getString("data.items[0].versionState").equals("DRAFT"));
/* "versionState": "DRAFT",*/

//We now need to publish to see version number
Response publishDataverse = UtilIT.publishDataverseViaSword(dataverseAlias, apiToken);
publishDataverse.prettyPrint();
publishDataverse.then().assertThat()
.statusCode(OK.getStatusCode());

Response publishDataset = UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken);
publishDataset.prettyPrint();
publishDataset.then().assertThat()
.statusCode(OK.getStatusCode());

searchResponse = UtilIT.search("id:dataset_" + datasetId, apiToken);
searchResponse.prettyPrint();
/*["Astronomy and Astrophysics"]*/
assertTrue(searchResponse.body().jsonPath().getString("data.items[0].subjects").contains("Astronomy and Astrophysics"));
assertTrue(searchResponse.body().jsonPath().getString("data.items[0].versionState").equals("RELEASED"));

assertTrue(searchResponse.body().jsonPath().getString("data.items[0].majorVersion").equals("1"));
assertTrue(searchResponse.body().jsonPath().getString("data.items[0].minorVersion").equals("0"));

assertTrue(searchResponse.body().jsonPath().getString("data.items[0].authors").contains("Spruce, Sabrina"));

assertTrue(searchResponse.body().jsonPath().getString("data.items[0].contacts[0].name").contains("Finch, Fiona"));
assertTrue(searchResponse.body().jsonPath().getString("data.items[0].storageIdentifier").contains(identifier));

}


/*
* Note: this test does a lot of checking for permissions with / without privlidged api key.
* Thumbnails access is the same with/without that access as of 4.9.4 --MAD
Expand Down

0 comments on commit 2573b2c

Please sign in to comment.