From 955312df6710841cbb4989326a28a067a07f6b5a Mon Sep 17 00:00:00 2001 From: darms Date: Mon, 19 Aug 2024 13:30:43 +0200 Subject: [PATCH 01/12] feat(api/versions): Added a new optional property to hide metadataBlocks from API response. --- .../10171-exlude-metadatablocks.md | 4 ++++ doc/sphinx-guides/source/api/native-api.rst | 8 +++++++ .../harvard/iq/dataverse/api/Datasets.java | 11 +++++---- .../iq/dataverse/util/json/JsonPrinter.java | 23 ++++++++++++------- 4 files changed, 34 insertions(+), 12 deletions(-) create mode 100644 doc/release-notes/10171-exlude-metadatablocks.md diff --git a/doc/release-notes/10171-exlude-metadatablocks.md b/doc/release-notes/10171-exlude-metadatablocks.md new file mode 100644 index 00000000000..64fadfcc35e --- /dev/null +++ b/doc/release-notes/10171-exlude-metadatablocks.md @@ -0,0 +1,4 @@ +Extension of API `{id}/versions` and `{id}/versions/{versionId}` with an optional ``excludeMetadataBlocks`` parameter, +that specifies whether the metadataBlocks should be listed in the output. It defaults to ``true``, preserving backward +compatibility. (Note that for a dataset with a large number of versions and/or metadataBlocks having the metadata blocks +included can dramatically increase the volume of the output). diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index a5f7d03899a..5168b8c5917 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -1150,6 +1150,8 @@ It returns a list of versions with their metadata, and file list: The optional ``excludeFiles`` parameter specifies whether the files should be listed in the output. It defaults to ``true``, preserving backward compatibility. (Note that for a dataset with a large number of versions and/or files having the files included can dramatically increase the volume of the output). A separate ``/files`` API can be used for listing the files, or a subset thereof in a given version. +The optional ``excludeMetadataBlocks`` parameter specifies whether the metadataBlocks should be listed in the output. It defaults to ``true``, preserving backward compatibility. (Note that for a dataset with a large number of versions and/or metadataBlocks having the metadata blocks included can dramatically increase the volume of the output). + The optional ``offset`` and ``limit`` parameters can be used to specify the range of the versions list to be shown. This can be used to paginate through the list in a dataset with a large number of versions. @@ -1174,6 +1176,12 @@ The fully expanded example above (without environment variables) looks like this The optional ``excludeFiles`` parameter specifies whether the files should be listed in the output (defaults to ``true``). Note that a separate ``/files`` API can be used for listing the files, or a subset thereof in a given version. +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasets/24/versions/1.0?excludeMetadataBlocks=false" + +The optional ``excludeMetadataBlocks`` parameter specifies whether the metadataBlocks should be listed in the output (defaults to ``true``). + By default, deaccessioned dataset versions are not included in the search when applying the :latest or :latest-published identifiers. Additionally, when filtering by a specific version tag, you will get a "not found" error if the version is deaccessioned and you do not enable the ``includeDeaccessioned`` option described below. diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 4b919c5ed82..d576022389c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -415,15 +415,16 @@ public Response useDefaultCitationDate(@Context ContainerRequestContext crc, @Pa @GET @AuthRequired @Path("{id}/versions") - public Response listVersions(@Context ContainerRequestContext crc, @PathParam("id") String id, @QueryParam("excludeFiles") Boolean excludeFiles, @QueryParam("limit") Integer limit, @QueryParam("offset") Integer offset) { + public Response listVersions(@Context ContainerRequestContext crc, @PathParam("id") String id, @QueryParam("excludeFiles") Boolean excludeFiles,@QueryParam("excludeMetadataBlocks") Boolean excludeMetadataBlocks, @QueryParam("limit") Integer limit, @QueryParam("offset") Integer offset) { return response( req -> { Dataset dataset = findDatasetOrDie(id); Boolean deepLookup = excludeFiles == null ? true : !excludeFiles; + Boolean includeMetadataBlocks = excludeMetadataBlocks == null ? true : !excludeMetadataBlocks; return ok( execCommand( new ListVersionsCommand(req, dataset, offset, limit, deepLookup) ) .stream() - .map( d -> json(d, deepLookup) ) + .map( d -> json(d, deepLookup, includeMetadataBlocks) ) .collect(toJsonArray())); }, getRequestUser(crc)); } @@ -435,6 +436,7 @@ public Response getVersion(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @QueryParam("excludeFiles") Boolean excludeFiles, + @QueryParam("excludeMetadataBlocks") Boolean excludeMetadataBlocks, @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, @QueryParam("returnOwners") boolean returnOwners, @Context UriInfo uriInfo, @@ -460,11 +462,12 @@ public Response getVersion(@Context ContainerRequestContext crc, if (excludeFiles == null ? true : !excludeFiles) { requestedDatasetVersion = datasetversionService.findDeep(requestedDatasetVersion.getId()); } + Boolean includeMetadataBlocks = excludeMetadataBlocks == null ? true : !excludeMetadataBlocks; JsonObjectBuilder jsonBuilder = json(requestedDatasetVersion, null, - excludeFiles == null ? true : !excludeFiles, - returnOwners); + excludeFiles == null ? true : !excludeFiles, + returnOwners, includeMetadataBlocks); return ok(jsonBuilder); }, getRequestUser(crc)); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index c908a4d2bce..d9c2250ef6f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -418,11 +418,17 @@ public static JsonObjectBuilder json(FileDetailsHolder ds) { } public static JsonObjectBuilder json(DatasetVersion dsv, boolean includeFiles) { - return json(dsv, null, includeFiles, false); + return json(dsv, null, includeFiles, false,true); + } + public static JsonObjectBuilder json(DatasetVersion dsv, boolean includeFiles, boolean includeMetadataBlocks) { + return json(dsv, null, includeFiles, false, includeMetadataBlocks); + } + public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList, + boolean includeFiles, boolean returnOwners) { + return json( dsv, anonymizedFieldTypeNamesList, includeFiles, returnOwners,true); } - public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList, - boolean includeFiles, boolean returnOwners) { + boolean includeFiles, boolean returnOwners, boolean includeMetadataBlocks) { Dataset dataset = dsv.getDataset(); JsonObjectBuilder bld = jsonObjectBuilder() .add("id", dsv.getId()).add("datasetId", dataset.getId()) @@ -467,11 +473,12 @@ public static JsonObjectBuilder json(DatasetVersion dsv, List anonymized .add("sizeOfCollection", dsv.getTermsOfUseAndAccess().getSizeOfCollection()) .add("studyCompletion", dsv.getTermsOfUseAndAccess().getStudyCompletion()) .add("fileAccessRequest", dsv.getTermsOfUseAndAccess().isFileAccessRequest()); - - bld.add("metadataBlocks", (anonymizedFieldTypeNamesList != null) ? - jsonByBlocks(dsv.getDatasetFields(), anonymizedFieldTypeNamesList) - : jsonByBlocks(dsv.getDatasetFields()) - ); + if(includeMetadataBlocks) { + bld.add("metadataBlocks", (anonymizedFieldTypeNamesList != null) ? + jsonByBlocks(dsv.getDatasetFields(), anonymizedFieldTypeNamesList) + : jsonByBlocks(dsv.getDatasetFields()) + ); + } if(returnOwners){ bld.add("isPartOf", getOwnersFromDvObject(dataset)); } From 408172c06a60d88af9e4c9c72b8f153867958779 Mon Sep 17 00:00:00 2001 From: darms Date: Mon, 19 Aug 2024 13:40:59 +0200 Subject: [PATCH 02/12] feat(api/versions): Added a new optional property to hide metadataBlocks from API response. --- doc/release-notes/10171-exlude-metadatablocks.md | 2 +- doc/sphinx-guides/source/api/native-api.rst | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/release-notes/10171-exlude-metadatablocks.md b/doc/release-notes/10171-exlude-metadatablocks.md index 64fadfcc35e..8039b36617c 100644 --- a/doc/release-notes/10171-exlude-metadatablocks.md +++ b/doc/release-notes/10171-exlude-metadatablocks.md @@ -1,4 +1,4 @@ Extension of API `{id}/versions` and `{id}/versions/{versionId}` with an optional ``excludeMetadataBlocks`` parameter, -that specifies whether the metadataBlocks should be listed in the output. It defaults to ``true``, preserving backward +that specifies whether the metadataBlocks should be listed in the output. It defaults to ``false``, preserving backward compatibility. (Note that for a dataset with a large number of versions and/or metadataBlocks having the metadata blocks included can dramatically increase the volume of the output). diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 5168b8c5917..a3a004969dc 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -1150,7 +1150,7 @@ It returns a list of versions with their metadata, and file list: The optional ``excludeFiles`` parameter specifies whether the files should be listed in the output. It defaults to ``true``, preserving backward compatibility. (Note that for a dataset with a large number of versions and/or files having the files included can dramatically increase the volume of the output). A separate ``/files`` API can be used for listing the files, or a subset thereof in a given version. -The optional ``excludeMetadataBlocks`` parameter specifies whether the metadataBlocks should be listed in the output. It defaults to ``true``, preserving backward compatibility. (Note that for a dataset with a large number of versions and/or metadataBlocks having the metadata blocks included can dramatically increase the volume of the output). +The optional ``excludeMetadataBlocks`` parameter specifies whether the metadataBlocks should be listed in the output. It defaults to ``false``, preserving backward compatibility. (Note that for a dataset with a large number of versions and/or metadataBlocks having the metadata blocks included can dramatically increase the volume of the output). The optional ``offset`` and ``limit`` parameters can be used to specify the range of the versions list to be shown. This can be used to paginate through the list in a dataset with a large number of versions. @@ -1180,7 +1180,7 @@ The optional ``excludeFiles`` parameter specifies whether the files should be li curl "https://demo.dataverse.org/api/datasets/24/versions/1.0?excludeMetadataBlocks=false" -The optional ``excludeMetadataBlocks`` parameter specifies whether the metadataBlocks should be listed in the output (defaults to ``true``). +The optional ``excludeMetadataBlocks`` parameter specifies whether the metadataBlocks should be listed in the output (defaults to ``false``). By default, deaccessioned dataset versions are not included in the search when applying the :latest or :latest-published identifiers. Additionally, when filtering by a specific version tag, you will get a "not found" error if the version is deaccessioned and you do not enable the ``includeDeaccessioned`` option described below. From c27e910da84f1dc829cca480e8ca11408c7ba688 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Thu, 5 Dec 2024 15:38:42 -0500 Subject: [PATCH 03/12] remove duplicate DatasetFieldTypes --- .../iq/dataverse/util/json/JsonPrinter.java | 25 +++++++--- .../dataverse/util/json/JsonPrinterTest.java | 49 +++++++++++++++++++ 2 files changed, 68 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index 91af13c79a3..bb0c8e10c82 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -642,17 +642,19 @@ public static JsonObjectBuilder json(MetadataBlock metadataBlock, boolean printO .add("displayName", metadataBlock.getDisplayName()) .add("displayOnCreate", metadataBlock.isDisplayOnCreate()); - Set datasetFieldTypes; + List datasetFieldTypesList; if (ownerDataverse != null) { - datasetFieldTypes = new TreeSet<>(datasetFieldService.findAllInMetadataBlockAndDataverse( - metadataBlock, ownerDataverse, printOnlyDisplayedOnCreateDatasetFieldTypes)); + datasetFieldTypesList = datasetFieldService.findAllInMetadataBlockAndDataverse( + metadataBlock, ownerDataverse, printOnlyDisplayedOnCreateDatasetFieldTypes); } else { - datasetFieldTypes = printOnlyDisplayedOnCreateDatasetFieldTypes - ? new TreeSet<>(datasetFieldService.findAllDisplayedOnCreateInMetadataBlock(metadataBlock)) - : new TreeSet<>(metadataBlock.getDatasetFieldTypes()); + datasetFieldTypesList = printOnlyDisplayedOnCreateDatasetFieldTypes + ? datasetFieldService.findAllDisplayedOnCreateInMetadataBlock(metadataBlock) + : metadataBlock.getDatasetFieldTypes(); } + Set datasetFieldTypes = filterOutDuplicateDatasetFieldTypes(datasetFieldTypesList); + JsonObjectBuilder fieldsBuilder = Json.createObjectBuilder(); for (DatasetFieldType datasetFieldType : datasetFieldTypes) { fieldsBuilder.add(datasetFieldType.getName(), json(datasetFieldType, ownerDataverse)); @@ -662,6 +664,17 @@ public static JsonObjectBuilder json(MetadataBlock metadataBlock, boolean printO return jsonObjectBuilder; } + // This will remove datasetFieldTypes that are in the list but also a child of another datasetFieldType in the list + // Prevents duplicate datasetFieldType information from being returned twice + // See: https://github.com/IQSS/dataverse/issues/10472 + private static Set filterOutDuplicateDatasetFieldTypes(List datasetFieldTypesList) { + // making a copy of the list as to not damage the original when we remove items + List datasetFieldTypes = new ArrayList<>(datasetFieldTypesList); + // exclude/remove datasetFieldTypes if datasetFieldType exists as a child of another datasetFieldType + datasetFieldTypesList.forEach(dsft -> dsft.getChildDatasetFieldTypes().forEach(c -> datasetFieldTypes.remove(c))); + return new TreeSet<>(datasetFieldTypes); + } + public static JsonArrayBuilder jsonDatasetFieldTypes(List fields) { JsonArrayBuilder fieldsJson = Json.createArrayBuilder(); for (DatasetFieldType field : fields) { diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java index 7ec8e0b25f3..1987307637c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java @@ -25,6 +25,7 @@ import jakarta.json.JsonString; import edu.harvard.iq.dataverse.util.BundleUtil; +import org.assertj.core.util.Lists; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.BeforeEach; @@ -268,6 +269,54 @@ public void testDatasetContactWithPrivacy() { } + @Test + public void testDatasetFieldTypesWithChildren() { + MetadataBlock block = new MetadataBlock(); + block.setId(0L); + block.setName("citation"); + long id = 0L; + // create datasetFieldTypes + List datasetFieldTypes = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + DatasetFieldType dft = new DatasetFieldType(); + dft.setId(id++); + dft.setDisplayOrder(i); + dft.setMetadataBlock(block); + dft.setFieldType(FieldType.TEXT); + dft.setName("subType" + dft.getId()); + dft.setTitle(dft.getName()); + dft.setChildDatasetFieldTypes(Lists.emptyList()); + datasetFieldTypes.add(dft); + } + // add DatasetFieldType as children to another DatasetFieldType to test the suppression of duplicate data + // adding 3 and 4 as children of 2 + datasetFieldTypes.get(3).setParentDatasetFieldType(datasetFieldTypes.get(2)); + datasetFieldTypes.get(4).setParentDatasetFieldType(datasetFieldTypes.get(2)); + datasetFieldTypes.get(2).setChildDatasetFieldTypes(List.of(datasetFieldTypes.get(3), datasetFieldTypes.get(4))); + // adding 6 as child of 9 + datasetFieldTypes.get(6).setParentDatasetFieldType(datasetFieldTypes.get(9)); + datasetFieldTypes.get(9).setChildDatasetFieldTypes(List.of(datasetFieldTypes.get(6))); + + block.setDatasetFieldTypes(datasetFieldTypes); + + DatasetFieldServiceBean nullDFServiceBean = null; + JsonPrinter.injectSettingsService(new MockSettingsSvc(), nullDFServiceBean); + + JsonObject jsonObject = JsonPrinter.json(block).build(); + assertNotNull(jsonObject); + + System.out.println("json: " + JsonUtil.prettyPrint(jsonObject.toString())); + assertEquals("subType2 subType3", jsonObject.getJsonObject("fields").getJsonObject("subType2") + .getJsonObject("childFields").getJsonObject("subType3").getString("displayName")); + assertEquals("subType2 subType4", jsonObject.getJsonObject("fields").getJsonObject("subType2") + .getJsonObject("childFields").getJsonObject("subType4").getString("displayName")); + assertEquals("subType9 subType6", jsonObject.getJsonObject("fields").getJsonObject("subType9") + .getJsonObject("childFields").getJsonObject("subType6").getString("displayName")); + assertNull(jsonObject.getJsonObject("fields").getJsonObject("subType3")); + assertNull(jsonObject.getJsonObject("fields").getJsonObject("subType4")); + assertNull(jsonObject.getJsonObject("fields").getJsonObject("subType6")); + } + @Test public void testDataversePrinter() { Dataverse dataverse = new Dataverse(); From b2e449779cc141160d15070b18eb2a3a037c848b Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Fri, 6 Dec 2024 11:20:50 -0500 Subject: [PATCH 04/12] fix tests afetr validating new results --- .../java/edu/harvard/iq/dataverse/api/DataversesIT.java | 6 +++--- .../java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java index 76bb515beb2..d58c84a06d4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java @@ -927,7 +927,7 @@ public void testListMetadataBlocks() { .body("data.size()", equalTo(1)) .body("data[0].name", is("citation")) .body("data[0].fields.title.displayOnCreate", equalTo(true)) - .body("data[0].fields.size()", is(28)); + .body("data[0].fields.size()", is(10)); // 28 - 18 child duplicates Response setMetadataBlocksResponse = UtilIT.setMetadataBlocks(dataverseAlias, Json.createArrayBuilder().add("citation").add("astrophysics"), apiToken); setMetadataBlocksResponse.then().assertThat().statusCode(OK.getStatusCode()); @@ -1007,11 +1007,11 @@ public void testListMetadataBlocks() { // Since the included property of notesText is set to false, we should retrieve the total number of fields minus one int citationMetadataBlockIndex = geospatialMetadataBlockIndex == 0 ? 1 : 0; listMetadataBlocksResponse.then().assertThat() - .body(String.format("data[%d].fields.size()", citationMetadataBlockIndex), equalTo(79)); + .body(String.format("data[%d].fields.size()", citationMetadataBlockIndex), equalTo(34)); // 79 minus 45 child duplicates // Since the included property of geographicCoverage is set to false, we should retrieve the total number of fields minus one listMetadataBlocksResponse.then().assertThat() - .body(String.format("data[%d].fields.size()", geospatialMetadataBlockIndex), equalTo(10)); + .body(String.format("data[%d].fields.size()", geospatialMetadataBlockIndex), equalTo(6)); // 10 - 4 child duplicates String actualGeospatialMetadataField1 = listMetadataBlocksResponse.then().extract().path(String.format("data[%d].fields.geographicCoverage.name", geospatialMetadataBlockIndex)); String actualGeospatialMetadataField2 = listMetadataBlocksResponse.then().extract().path(String.format("data[%d].fields.country.name", geospatialMetadataBlockIndex)); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java index 6e7061961f0..fe608a97ac8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java @@ -42,7 +42,7 @@ void testListMetadataBlocks() { // returnDatasetFieldTypes=true listMetadataBlocksResponse = UtilIT.listMetadataBlocks(false, true); - int expectedNumberOfMetadataFields = 80; + int expectedNumberOfMetadataFields = 35; // 80 - 45 child duplicates; listMetadataBlocksResponse.then().assertThat() .statusCode(OK.getStatusCode()) .body("data[0].fields", not(equalTo(null))) @@ -51,7 +51,8 @@ void testListMetadataBlocks() { // onlyDisplayedOnCreate=true and returnDatasetFieldTypes=true listMetadataBlocksResponse = UtilIT.listMetadataBlocks(true, true); - expectedNumberOfMetadataFields = 28; + listMetadataBlocksResponse.prettyPrint(); + expectedNumberOfMetadataFields = 10; // 28 - 18 child duplicates listMetadataBlocksResponse.then().assertThat() .statusCode(OK.getStatusCode()) .body("data[0].fields", not(equalTo(null))) From 9fb3dbe956fe69f0a7be17dd67cbd1dce7b42bcc Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Fri, 6 Dec 2024 13:45:16 -0500 Subject: [PATCH 05/12] add release note --- ...0472-review-modify-jsonprinter-for-datasetfieldtype.md | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 doc/release-notes/10472-review-modify-jsonprinter-for-datasetfieldtype.md diff --git a/doc/release-notes/10472-review-modify-jsonprinter-for-datasetfieldtype.md b/doc/release-notes/10472-review-modify-jsonprinter-for-datasetfieldtype.md new file mode 100644 index 00000000000..f0b9c30c9cd --- /dev/null +++ b/doc/release-notes/10472-review-modify-jsonprinter-for-datasetfieldtype.md @@ -0,0 +1,8 @@ +### Json Printer Bug fix + +DatasetFieldTypes in MetadataBlock response that are also a child of another DatasetFieldType were being returned twice. The child DatasetFieldType was included in the "fields" object as well as in the "childFields" of it's parent DatasetFieldType. This fix suppresses the standalone object so only one instance of the DatasetFieldType is returned (in the "childFields" of its parent). +This fix changes the Json output of the API `/api/dataverses/{dataverseAlias}/metadatablocks` + +## Backward Incompatible Changes + +The Json response of API call `/api/dataverses/{dataverseAlias}/metadatablocks` will no longer include the DatasetFieldTypes in "fields" if they are children of another DatasetFieldType. The child DatasetFieldType will only be included in the "childFields" of it's parent DatasetFieldType. From 8813df9f13ba29e685a6638f259408060b93fc8b Mon Sep 17 00:00:00 2001 From: Johannes Darms Date: Wed, 18 Dec 2024 14:30:54 +0100 Subject: [PATCH 06/12] adapted documentation according to phil suggestions --- doc/release-notes/10171-exlude-metadatablocks.md | 2 +- doc/sphinx-guides/source/api/native-api.rst | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/release-notes/10171-exlude-metadatablocks.md b/doc/release-notes/10171-exlude-metadatablocks.md index 8039b36617c..7c0a9b030eb 100644 --- a/doc/release-notes/10171-exlude-metadatablocks.md +++ b/doc/release-notes/10171-exlude-metadatablocks.md @@ -1,4 +1,4 @@ Extension of API `{id}/versions` and `{id}/versions/{versionId}` with an optional ``excludeMetadataBlocks`` parameter, that specifies whether the metadataBlocks should be listed in the output. It defaults to ``false``, preserving backward compatibility. (Note that for a dataset with a large number of versions and/or metadataBlocks having the metadata blocks -included can dramatically increase the volume of the output). +included can dramatically increase the volume of the output). See also [the guides](https://dataverse-guide--10778.org.readthedocs.build/en/10778/api/native-api.html#list-versions-of-a-dataset), #10778, and #10171. diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 462f145ab4f..b013c967228 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -1201,7 +1201,7 @@ It returns a list of versions with their metadata, and file list: The optional ``excludeFiles`` parameter specifies whether the files should be listed in the output. It defaults to ``true``, preserving backward compatibility. (Note that for a dataset with a large number of versions and/or files having the files included can dramatically increase the volume of the output). A separate ``/files`` API can be used for listing the files, or a subset thereof in a given version. -The optional ``excludeMetadataBlocks`` parameter specifies whether the metadataBlocks should be listed in the output. It defaults to ``false``, preserving backward compatibility. (Note that for a dataset with a large number of versions and/or metadataBlocks having the metadata blocks included can dramatically increase the volume of the output). +The optional ``excludeMetadataBlocks`` parameter specifies whether the metadata blocks should be listed in the output. It defaults to ``false``, preserving backward compatibility. (Note that for a dataset with a large number of versions and/or metadata blocks having the metadata blocks included can dramatically increase the volume of the output). The optional ``offset`` and ``limit`` parameters can be used to specify the range of the versions list to be shown. This can be used to paginate through the list in a dataset with a large number of versions. @@ -1231,7 +1231,7 @@ The optional ``excludeFiles`` parameter specifies whether the files should be li curl "https://demo.dataverse.org/api/datasets/24/versions/1.0?excludeMetadataBlocks=false" -The optional ``excludeMetadataBlocks`` parameter specifies whether the metadataBlocks should be listed in the output (defaults to ``false``). +The optional ``excludeMetadataBlocks`` parameter specifies whether the metadata blocks should be listed in the output (defaults to ``false``). By default, deaccessioned dataset versions are not included in the search when applying the :latest or :latest-published identifiers. Additionally, when filtering by a specific version tag, you will get a "not found" error if the version is deaccessioned and you do not enable the ``includeDeaccessioned`` option described below. From 84cac1ee2eed6c980026970a2b1f5b1c1a779f21 Mon Sep 17 00:00:00 2001 From: Johannes Darms Date: Wed, 18 Dec 2024 15:04:38 +0100 Subject: [PATCH 07/12] added a simple unit test --- .../harvard/iq/dataverse/api/DatasetsIT.java | 36 +++++++++++++++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 6 +++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 4f1ee1717c1..3c1cc879d13 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -678,6 +678,42 @@ public void testCreatePublishDestroyDataset() { } + @Test + public void testHideMetadataBlocksInDatasetVersionsAPI() { + + // Create user + String apiToken = UtilIT.createRandomUserGetToken(); + + // Create user with no permission + String apiTokenNoPerms = UtilIT.createRandomUserGetToken(); + + // Create Collection + String collectionAlias = UtilIT.createRandomCollectionGetAlias(apiToken); + + // Create Dataset + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(collectionAlias, apiToken); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = JsonPath.from(createDataset.asString()).getString("data.persistentId"); + + // Now check that the metadata is NOT shown, when we ask the versions api to dos o. + boolean excludeMetadata = true; + Response unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_DRAFT, apiToken, true,excludeMetadata, false); + unpublishedDraft.prettyPrint(); + unpublishedDraft.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.metadataBlocks", equalTo(null)); + + // Now check that the metadata is shown, when we ask the versions api to dos o. + excludeMetadata = false; + unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_DRAFT, apiToken,true, excludeMetadata, false); + unpublishedDraft.prettyPrint(); + unpublishedDraft.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.metadataBlocks", notNullValue() ); + } /** * The apis (/api/datasets/{id}/versions and /api/datasets/{id}/versions/{vid} * are already called from other RestAssured tests, in this class and also in FilesIT. diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 8f1fcdf57eb..fea6300a678 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -1533,6 +1533,9 @@ static Response getDatasetVersion(String persistentId, String versionNumber, Str } static Response getDatasetVersion(String persistentId, String versionNumber, String apiToken, boolean excludeFiles, boolean includeDeaccessioned) { + return getDatasetVersion(persistentId,versionNumber,apiToken,excludeFiles,false,includeDeaccessioned); + } + static Response getDatasetVersion(String persistentId, String versionNumber, String apiToken, boolean excludeFiles,boolean excludeMetadataBlocks, boolean includeDeaccessioned) { return given() .header(API_TOKEN_HTTP_HEADER, apiToken) .queryParam("includeDeaccessioned", includeDeaccessioned) @@ -1540,7 +1543,8 @@ static Response getDatasetVersion(String persistentId, String versionNumber, Str + versionNumber + "?persistentId=" + persistentId - + (excludeFiles ? "&excludeFiles=true" : "")); + + (excludeFiles ? "&excludeFiles=true" : "") + + (excludeMetadataBlocks ? "&excludeMetadataBlocks=true" : "")); } static Response getDatasetWithOwners(String persistentId, String apiToken, boolean returnOwners) { From 939d8e9e1cf349e138242d6b481f1a78f072fcce Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 18 Dec 2024 15:14:08 -0500 Subject: [PATCH 08/12] switch container demo tutorial from FAKE to perma #11107 --- doc/release-notes/11107-fake-to-perma-demo.md | 3 +++ doc/sphinx-guides/source/container/running/demo.rst | 5 +++++ docker/compose/demo/compose.yml | 12 ++++++------ 3 files changed, 14 insertions(+), 6 deletions(-) create mode 100644 doc/release-notes/11107-fake-to-perma-demo.md diff --git a/doc/release-notes/11107-fake-to-perma-demo.md b/doc/release-notes/11107-fake-to-perma-demo.md new file mode 100644 index 00000000000..afb6b8a7917 --- /dev/null +++ b/doc/release-notes/11107-fake-to-perma-demo.md @@ -0,0 +1,3 @@ +### Demo/Eval Container Tutorial + +The demo/eval container tutorial has been updated to use the Permalink PID provider instead of the FAKE DOI Provider. See also #11107. diff --git a/doc/sphinx-guides/source/container/running/demo.rst b/doc/sphinx-guides/source/container/running/demo.rst index b1945070714..2483d3217a5 100644 --- a/doc/sphinx-guides/source/container/running/demo.rst +++ b/doc/sphinx-guides/source/container/running/demo.rst @@ -160,6 +160,11 @@ Next, set up the UI toggle between English and French, again using the unblock k Stop and start the Dataverse container in order for the language toggle to work. +PID Providers ++++++++++++++ + +Dataverse supports multiple Persistent ID (PID) providers. The ``compose.yml`` file uses the Permalink PID provider. Follow :ref:`pids-configuration` to reconfigure as needed. + Next Steps ---------- diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml index 60ed130612e..bc0fe9825ba 100644 --- a/docker/compose/demo/compose.yml +++ b/docker/compose/demo/compose.yml @@ -20,12 +20,12 @@ services: -Ddataverse.files.file1.type=file -Ddataverse.files.file1.label=Filesystem -Ddataverse.files.file1.directory=${STORAGE_DIR}/store - -Ddataverse.pid.providers=fake - -Ddataverse.pid.default-provider=fake - -Ddataverse.pid.fake.type=FAKE - -Ddataverse.pid.fake.label=FakeDOIProvider - -Ddataverse.pid.fake.authority=10.5072 - -Ddataverse.pid.fake.shoulder=FK2/ + -Ddataverse.pid.providers=perma1 + -Ddataverse.pid.default-provider=perma1 + -Ddataverse.pid.perma1.type=perma + -Ddataverse.pid.perma1.label=Perma1 + -Ddataverse.pid.perma1.authority=DV + -Ddataverse.pid.perma1.permalink.separator=/ #-Ddataverse.lang.directory=/dv/lang ports: - "8080:8080" # HTTP (Dataverse Application) From 339ce7aa67a6f8f95936f477b610a7affbd438c4 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 19 Dec 2024 13:34:54 -0500 Subject: [PATCH 09/12] don't create unneeded per docs --- src/main/java/edu/harvard/iq/dataverse/DataFile.java | 8 ++++++++ .../harvard/iq/dataverse/search/SolrIndexServiceBean.java | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index 1a610d9ea6e..01c1a48e117 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -1142,4 +1142,12 @@ public boolean isDeaccessioned() { } return inDeaccessionedVersions; // since any published version would have already returned } + public boolean isInDatasetVersion(DatasetVersion version) { + for (FileMetadata fmd : getFileMetadatas()) { + if (fmd.getDatasetVersion().equals(version)) { + return true; + } + } + return false; + } } // end of class diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java index e4d885276d0..aa27a051e98 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java @@ -155,7 +155,7 @@ private List constructDatafileSolrDocs(DataFile dataFile, Map desiredCards = searchPermissionsService.getDesiredCards(dataFile.getOwner()); for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor(dataFile.getOwner())) { boolean cardShouldExist = desiredCards.get(datasetVersionFileIsAttachedTo.getVersionState()); - if (cardShouldExist) { + if (cardShouldExist && dataFile.isInDatasetVersion(datasetVersionFileIsAttachedTo)) { String solrIdStart = IndexServiceBean.solrDocIdentifierFile + dataFile.getId(); String solrIdEnd = getDatasetOrDataFileSolrEnding(datasetVersionFileIsAttachedTo.getVersionState()); String solrId = solrIdStart + solrIdEnd; From 4818383538fb5ca071954b0be15045d1f1886faa Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 19 Dec 2024 14:18:48 -0500 Subject: [PATCH 10/12] add note about alternate fixes --- .../harvard/iq/dataverse/search/SolrIndexServiceBean.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java index aa27a051e98..8b04b6c5b6b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java @@ -155,6 +155,14 @@ private List constructDatafileSolrDocs(DataFile dataFile, Map desiredCards = searchPermissionsService.getDesiredCards(dataFile.getOwner()); for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor(dataFile.getOwner())) { boolean cardShouldExist = desiredCards.get(datasetVersionFileIsAttachedTo.getVersionState()); + /* + * Since datasetVersionFileIsAttachedTo should be a draft or the most recent + * released one, it could be more efficient to stop the search through + * FileMetadatas after those two (versus continuing through all prior versions + * as in isInDatasetVersion). Alternately, perhaps filesToReIndexPermissionsFor + * should not combine the list of files for the different datsetversions into a + * single list to start with. + */ if (cardShouldExist && dataFile.isInDatasetVersion(datasetVersionFileIsAttachedTo)) { String solrIdStart = IndexServiceBean.solrDocIdentifierFile + dataFile.getId(); String solrIdEnd = getDatasetOrDataFileSolrEnding(datasetVersionFileIsAttachedTo.getVersionState()); From e16ff355fc56ace769f5c76669c9a42bc6db2d6d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 19 Dec 2024 14:35:20 -0500 Subject: [PATCH 11/12] release note --- doc/release-notes/11113-avoid-orphan-perm-docs.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 doc/release-notes/11113-avoid-orphan-perm-docs.md diff --git a/doc/release-notes/11113-avoid-orphan-perm-docs.md b/doc/release-notes/11113-avoid-orphan-perm-docs.md new file mode 100644 index 00000000000..4c52d72d7db --- /dev/null +++ b/doc/release-notes/11113-avoid-orphan-perm-docs.md @@ -0,0 +1,5 @@ +This release fixes a bug that caused Dataverse to generate unnecessary solr documents for files when a file is added/deleted from a draft dataset. These documents could accumulate and potentially impact performance. + +Assuming the upgrade to solr 9.7.0 also occurs in this release, there's nothing else needed for this PR. (Starting with a new solr insures the solr db is empty and that a reindex is already required.) + + From a067793eab56bc9280ac8dd7b0628d071f48926b Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Mon, 6 Jan 2025 09:44:32 -0500 Subject: [PATCH 12/12] fix merged test --- src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java index deb3354b481..960e3d2e3ac 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java @@ -1012,8 +1012,9 @@ public void testListMetadataBlocks() { // Since the included property of geographicCoverage is set to false, we should retrieve the total number of fields minus one listMetadataBlocksResponse.then().assertThat() - .body(String.format("data[%d].fields.size()", geospatialMetadataBlockIndex), equalTo(6)); // 10 - 4 child duplicates + .body(String.format("data[%d].fields.size()", geospatialMetadataBlockIndex), equalTo(2)); + listMetadataBlocksResponse = UtilIT.getMetadataBlock("geospatial"); String actualGeospatialMetadataField1 = listMetadataBlocksResponse.then().extract().path(String.format("data.fields['geographicCoverage'].name")); String actualGeospatialMetadataField2 = listMetadataBlocksResponse.then().extract().path(String.format("data.fields['geographicCoverage'].childFields['country'].name")); String actualGeospatialMetadataField3 = listMetadataBlocksResponse.then().extract().path(String.format("data.fields['geographicCoverage'].childFields['city'].name"));